summaryrefslogtreecommitdiff
path: root/arch/arm64/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arm64/kernel')
-rw-r--r--arch/arm64/kernel/.gitignore1
-rw-r--r--arch/arm64/kernel/Makefile69
-rw-r--r--arch/arm64/kernel/acpi.c281
-rw-r--r--arch/arm64/kernel/acpi_numa.c14
-rw-r--r--arch/arm64/kernel/acpi_parking_protocol.c5
-rw-r--r--arch/arm64/kernel/alternative.c149
-rw-r--r--arch/arm64/kernel/armv8_deprecated.c583
-rw-r--r--arch/arm64/kernel/asm-offsets.c103
-rw-r--r--arch/arm64/kernel/cacheinfo.c45
-rw-r--r--arch/arm64/kernel/compat_alignment.c383
-rw-r--r--arch/arm64/kernel/cpu-reset.S23
-rw-r--r--arch/arm64/kernel/cpu-reset.h32
-rw-r--r--arch/arm64/kernel/cpu_errata.c800
-rw-r--r--arch/arm64/kernel/cpu_ops.c11
-rw-r--r--arch/arm64/kernel/cpufeature.c2805
-rw-r--r--arch/arm64/kernel/cpuidle.c44
-rw-r--r--arch/arm64/kernel/cpuinfo.c310
-rw-r--r--arch/arm64/kernel/crash_core.c21
-rw-r--r--arch/arm64/kernel/crash_dump.c31
-rw-r--r--arch/arm64/kernel/debug-monitors.c56
-rw-r--r--arch/arm64/kernel/efi-entry.S120
-rw-r--r--arch/arm64/kernel/efi-header.S155
-rw-r--r--arch/arm64/kernel/efi-rt-wrapper.S59
-rw-r--r--arch/arm64/kernel/efi.c137
-rw-r--r--arch/arm64/kernel/elfcore.c138
-rw-r--r--arch/arm64/kernel/entry-common.c776
-rw-r--r--arch/arm64/kernel/entry-fpsimd.S109
-rw-r--r--arch/arm64/kernel/entry-ftrace.S323
-rw-r--r--arch/arm64/kernel/entry.S1108
-rw-r--r--arch/arm64/kernel/fpsimd.c1381
-rw-r--r--arch/arm64/kernel/ftrace.c395
-rw-r--r--arch/arm64/kernel/head.S972
-rw-r--r--arch/arm64/kernel/hibernate-asm.S85
-rw-r--r--arch/arm64/kernel/hibernate.c445
-rw-r--r--arch/arm64/kernel/hw_breakpoint.c66
-rw-r--r--arch/arm64/kernel/hyp-stub.S168
-rw-r--r--arch/arm64/kernel/idle.c45
-rw-r--r--arch/arm64/kernel/idreg-override.c375
-rw-r--r--arch/arm64/kernel/image-vars.h108
-rw-r--r--arch/arm64/kernel/image.h1
-rw-r--r--arch/arm64/kernel/insn.c1690
-rw-r--r--arch/arm64/kernel/irq.c102
-rw-r--r--arch/arm64/kernel/jump_label.c12
-rw-r--r--arch/arm64/kernel/kaslr.c203
-rw-r--r--arch/arm64/kernel/kexec_image.c64
-rw-r--r--arch/arm64/kernel/kgdb.c11
-rw-r--r--arch/arm64/kernel/kuser32.S3
-rw-r--r--arch/arm64/kernel/machine_kexec.c256
-rw-r--r--arch/arm64/kernel/machine_kexec_file.c219
-rw-r--r--arch/arm64/kernel/module-plts.c79
-rw-r--r--arch/arm64/kernel/module.c186
-rw-r--r--arch/arm64/kernel/module.lds5
-rw-r--r--arch/arm64/kernel/mte.c606
-rw-r--r--arch/arm64/kernel/paravirt.c68
-rw-r--r--arch/arm64/kernel/patch-scs.c262
-rw-r--r--arch/arm64/kernel/patching.c167
-rw-r--r--arch/arm64/kernel/pci.c35
-rw-r--r--arch/arm64/kernel/perf_callchain.c34
-rw-r--r--arch/arm64/kernel/perf_event.c1168
-rw-r--r--arch/arm64/kernel/perf_regs.c58
-rw-r--r--arch/arm64/kernel/pi/Makefile35
-rw-r--r--arch/arm64/kernel/pi/kaslr_early.c110
-rw-r--r--arch/arm64/kernel/pointer_auth.c74
-rw-r--r--arch/arm64/kernel/probes/decode-insn.c13
-rw-r--r--arch/arm64/kernel/probes/kprobes.c365
-rw-r--r--arch/arm64/kernel/probes/kprobes_trampoline.S14
-rw-r--r--arch/arm64/kernel/probes/simulate-insn.c1
-rw-r--r--arch/arm64/kernel/probes/uprobes.c8
-rw-r--r--arch/arm64/kernel/process.c478
-rw-r--r--arch/arm64/kernel/proton-pack.c1156
-rw-r--r--arch/arm64/kernel/psci.c8
-rw-r--r--arch/arm64/kernel/ptrace.c991
-rw-r--r--arch/arm64/kernel/reloc_test_core.c4
-rw-r--r--arch/arm64/kernel/reloc_test_syms.S44
-rw-r--r--arch/arm64/kernel/relocate_kernel.S127
-rw-r--r--arch/arm64/kernel/return_address.c16
-rw-r--r--arch/arm64/kernel/sdei.c147
-rw-r--r--arch/arm64/kernel/setup.c159
-rw-r--r--arch/arm64/kernel/signal.c592
-rw-r--r--arch/arm64/kernel/signal32.c77
-rw-r--r--arch/arm64/kernel/sigreturn32.S1
-rw-r--r--arch/arm64/kernel/sleep.S28
-rw-r--r--arch/arm64/kernel/smccc-call.S102
-rw-r--r--arch/arm64/kernel/smp.c563
-rw-r--r--arch/arm64/kernel/smp_spin_table.c16
-rw-r--r--arch/arm64/kernel/ssbd.c129
-rw-r--r--arch/arm64/kernel/stacktrace.c362
-rw-r--r--arch/arm64/kernel/suspend.c52
-rw-r--r--arch/arm64/kernel/sys_compat.c12
-rw-r--r--arch/arm64/kernel/syscall.c137
-rw-r--r--arch/arm64/kernel/time.c27
-rw-r--r--arch/arm64/kernel/topology.c327
-rw-r--r--arch/arm64/kernel/traps.c728
-rw-r--r--arch/arm64/kernel/vdso-wrap.S (renamed from arch/arm64/kernel/vdso/vdso.S)3
-rw-r--r--arch/arm64/kernel/vdso.c369
-rw-r--r--arch/arm64/kernel/vdso/.gitignore1
-rw-r--r--arch/arm64/kernel/vdso/Makefile59
-rwxr-xr-xarch/arm64/kernel/vdso/gen_vdso_offsets.sh4
-rw-r--r--arch/arm64/kernel/vdso/note.S3
-rw-r--r--arch/arm64/kernel/vdso/sigreturn.S74
-rw-r--r--arch/arm64/kernel/vdso/vdso.lds.S37
-rw-r--r--arch/arm64/kernel/vdso/vgettimeofday.c6
-rw-r--r--arch/arm64/kernel/vdso32-wrap.S (renamed from arch/arm64/kernel/vdso32/vdso.S)0
-rw-r--r--arch/arm64/kernel/vdso32/.gitignore1
-rw-r--r--arch/arm64/kernel/vdso32/Makefile94
-rw-r--r--arch/arm64/kernel/vdso32/sigreturn.S62
-rw-r--r--arch/arm64/kernel/vdso32/vdso.lds.S44
-rw-r--r--arch/arm64/kernel/vdso32/vgettimeofday.c16
-rw-r--r--arch/arm64/kernel/vmlinux.lds.S240
-rw-r--r--arch/arm64/kernel/watchdog_hld.c36
110 files changed, 15505 insertions, 10407 deletions
diff --git a/arch/arm64/kernel/.gitignore b/arch/arm64/kernel/.gitignore
index c5f676c3c224..bbb90f92d051 100644
--- a/arch/arm64/kernel/.gitignore
+++ b/arch/arm64/kernel/.gitignore
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
vmlinux.lds
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index fc6488660f64..e5d03a7039b4 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -3,56 +3,63 @@
# Makefile for the linux kernel.
#
-CPPFLAGS_vmlinux.lds := -DTEXT_OFFSET=$(TEXT_OFFSET)
-AFLAGS_head.o := -DTEXT_OFFSET=$(TEXT_OFFSET)
CFLAGS_armv8_deprecated.o := -I$(src)
CFLAGS_REMOVE_ftrace.o = $(CC_FLAGS_FTRACE)
CFLAGS_REMOVE_insn.o = $(CC_FLAGS_FTRACE)
CFLAGS_REMOVE_return_address.o = $(CC_FLAGS_FTRACE)
+# Remove stack protector to avoid triggering unneeded stack canary
+# checks due to randomize_kstack_offset.
+CFLAGS_REMOVE_syscall.o = -fstack-protector -fstack-protector-strong
+CFLAGS_syscall.o += -fno-stack-protector
+
+# When KASAN is enabled, a stack trace is recorded for every alloc/free, which
+# can significantly impact performance. Avoid instrumenting the stack trace
+# collection code to minimize this impact.
+KASAN_SANITIZE_stacktrace.o := n
+
+# It's not safe to invoke KCOV when portions of the kernel environment aren't
+# available or are out-of-sync with HW state. Since `noinstr` doesn't always
+# inhibit KCOV instrumentation, disable it for the entire compilation unit.
+KCOV_INSTRUMENT_entry-common.o := n
+KCOV_INSTRUMENT_idle.o := n
+
# Object file lists.
obj-y := debug-monitors.o entry.o irq.o fpsimd.o \
entry-common.o entry-fpsimd.o process.o ptrace.o \
setup.o signal.o sys.o stacktrace.o time.o traps.o \
- io.o vdso.o hyp-stub.o psci.o cpu_ops.o insn.o \
+ io.o vdso.o hyp-stub.o psci.o cpu_ops.o \
return_address.o cpuinfo.o cpu_errata.o \
cpufeature.o alternative.o cacheinfo.o \
smp.o smp_spin_table.o topology.o smccc-call.o \
- syscall.o
-
-extra-$(CONFIG_EFI) := efi-entry.o
-
-OBJCOPYFLAGS := --prefix-symbols=__efistub_
-$(obj)/%.stub.o: $(obj)/%.o FORCE
- $(call if_changed,objcopy)
+ syscall.o proton-pack.o idreg-override.o idle.o \
+ patching.o
obj-$(CONFIG_COMPAT) += sys32.o signal32.o \
sys_compat.o
-ifneq ($(CONFIG_COMPAT_VDSO), y)
obj-$(CONFIG_COMPAT) += sigreturn32.o
-endif
+obj-$(CONFIG_COMPAT_ALIGNMENT_FIXUPS) += compat_alignment.o
obj-$(CONFIG_KUSER_HELPERS) += kuser32.o
obj-$(CONFIG_FUNCTION_TRACER) += ftrace.o entry-ftrace.o
-obj-$(CONFIG_MODULES) += module.o
-obj-$(CONFIG_ARM64_MODULE_PLTS) += module-plts.o
+obj-$(CONFIG_MODULES) += module.o module-plts.o
obj-$(CONFIG_PERF_EVENTS) += perf_regs.o perf_callchain.o
-obj-$(CONFIG_HW_PERF_EVENTS) += perf_event.o
+obj-$(CONFIG_HARDLOCKUP_DETECTOR_PERF) += watchdog_hld.o
obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
obj-$(CONFIG_CPU_PM) += sleep.o suspend.o
obj-$(CONFIG_CPU_IDLE) += cpuidle.o
obj-$(CONFIG_JUMP_LABEL) += jump_label.o
obj-$(CONFIG_KGDB) += kgdb.o
-obj-$(CONFIG_EFI) += efi.o efi-entry.stub.o \
- efi-rt-wrapper.o
+obj-$(CONFIG_EFI) += efi.o efi-rt-wrapper.o
obj-$(CONFIG_PCI) += pci.o
obj-$(CONFIG_ARMV8_DEPRECATED) += armv8_deprecated.o
obj-$(CONFIG_ACPI) += acpi.o
obj-$(CONFIG_ACPI_NUMA) += acpi_numa.o
obj-$(CONFIG_ARM64_ACPI_PARKING_PROTOCOL) += acpi_parking_protocol.o
obj-$(CONFIG_PARAVIRT) += paravirt.o
-obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
+obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o pi/
obj-$(CONFIG_HIBERNATION) += hibernate.o hibernate-asm.o
+obj-$(CONFIG_ELF_CORE) += elfcore.o
obj-$(CONFIG_KEXEC_CORE) += machine_kexec.o relocate_kernel.o \
cpu-reset.o
obj-$(CONFIG_KEXEC_FILE) += machine_kexec_file.o kexec_image.o
@@ -61,14 +68,30 @@ arm64-reloc-test-y := reloc_test_core.o reloc_test_syms.o
obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
obj-$(CONFIG_CRASH_CORE) += crash_core.o
obj-$(CONFIG_ARM_SDE_INTERFACE) += sdei.o
-obj-$(CONFIG_ARM64_SSBD) += ssbd.o
obj-$(CONFIG_ARM64_PTR_AUTH) += pointer_auth.o
+obj-$(CONFIG_ARM64_MTE) += mte.o
+obj-y += vdso-wrap.o
+obj-$(CONFIG_COMPAT_VDSO) += vdso32-wrap.o
+obj-$(CONFIG_UNWIND_PATCH_PAC_INTO_SCS) += patch-scs.o
+
+# We need to prevent the SCS patching code from patching itself. Using
+# -mbranch-protection=none here to avoid the patchable PAC opcodes from being
+# generated triggers an issue with full LTO on Clang, which stops emitting PAC
+# instructions altogether. So instead, omit the unwind tables used by the
+# patching code, so it will not be able to locate its own PAC instructions.
+CFLAGS_patch-scs.o += -fno-asynchronous-unwind-tables -fno-unwind-tables
-obj-y += vdso/ probes/
-obj-$(CONFIG_COMPAT_VDSO) += vdso32/
-head-y := head.o
-extra-y += $(head-y) vmlinux.lds
+# Force dependency (vdso*-wrap.S includes vdso.so through incbin)
+$(obj)/vdso-wrap.o: $(obj)/vdso/vdso.so
+$(obj)/vdso32-wrap.o: $(obj)/vdso32/vdso.so
+
+obj-y += probes/
+obj-y += head.o
+extra-y += vmlinux.lds
ifeq ($(CONFIG_DEBUG_EFI),y)
AFLAGS_head.o += -DVMLINUX_PATH="\"$(realpath $(objtree)/vmlinux)\""
endif
+
+# for cleaning
+subdir- += vdso vdso32
diff --git a/arch/arm64/kernel/acpi.c b/arch/arm64/kernel/acpi.c
index 3a58e9db5cfe..dba8fcec7f33 100644
--- a/arch/arm64/kernel/acpi.c
+++ b/arch/arm64/kernel/acpi.c
@@ -13,22 +13,25 @@
#define pr_fmt(fmt) "ACPI: " fmt
#include <linux/acpi.h>
+#include <linux/arm-smccc.h>
#include <linux/cpumask.h>
#include <linux/efi.h>
#include <linux/efi-bgrt.h>
#include <linux/init.h>
#include <linux/irq.h>
#include <linux/irqdomain.h>
+#include <linux/irq_work.h>
#include <linux/memblock.h>
#include <linux/of_fdt.h>
+#include <linux/libfdt.h>
#include <linux/smp.h>
#include <linux/serial_core.h>
+#include <linux/pgtable.h>
#include <acpi/ghes.h>
#include <asm/cputype.h>
#include <asm/cpu_ops.h>
#include <asm/daifflags.h>
-#include <asm/pgtable.h>
#include <asm/smp_plat.h>
int acpi_noirq = 1; /* skip ACPI IRQ initialization */
@@ -61,29 +64,22 @@ static int __init parse_acpi(char *arg)
}
early_param("acpi", parse_acpi);
-static int __init dt_scan_depth1_nodes(unsigned long node,
- const char *uname, int depth,
- void *data)
+static bool __init dt_is_stub(void)
{
- /*
- * Ignore anything not directly under the root node; we'll
- * catch its parent instead.
- */
- if (depth != 1)
- return 0;
+ int node;
- if (strcmp(uname, "chosen") == 0)
- return 0;
+ fdt_for_each_subnode(node, initial_boot_params, 0) {
+ const char *name = fdt_get_name(initial_boot_params, node, NULL);
+ if (strcmp(name, "chosen") == 0)
+ continue;
+ if (strcmp(name, "hypervisor") == 0 &&
+ of_flat_dt_is_compatible(node, "xen,xen"))
+ continue;
- if (strcmp(uname, "hypervisor") == 0 &&
- of_flat_dt_is_compatible(node, "xen,xen"))
- return 0;
+ return false;
+ }
- /*
- * This node at depth 1 is neither a chosen node nor a xen node,
- * which we do not expect.
- */
- return 1;
+ return true;
}
/*
@@ -204,8 +200,7 @@ void __init acpi_boot_table_init(void)
* and ACPI has not been [force] enabled (acpi=on|force)
*/
if (param_acpi_off ||
- (!param_acpi_on && !param_acpi_force &&
- of_scan_flat_dt(dt_scan_depth1_nodes, NULL)))
+ (!param_acpi_on && !param_acpi_force && !dt_is_stub()))
goto done;
/*
@@ -238,6 +233,18 @@ done:
}
}
+static pgprot_t __acpi_get_writethrough_mem_attribute(void)
+{
+ /*
+ * Although UEFI specifies the use of Normal Write-through for
+ * EFI_MEMORY_WT, it is seldom used in practice and not implemented
+ * by most (all?) CPUs. Rather than allocate a MAIR just for this
+ * purpose, emit a warning and use Normal Non-cacheable instead.
+ */
+ pr_warn_once("No MAIR allocation for EFI_MEMORY_WT; treating as Normal Non-cacheable\n");
+ return __pgprot(PROT_NORMAL_NC);
+}
+
pgprot_t __acpi_get_mem_attribute(phys_addr_t addr)
{
/*
@@ -245,7 +252,7 @@ pgprot_t __acpi_get_mem_attribute(phys_addr_t addr)
* types" of UEFI 2.5 section 2.3.6.1, each EFI memory type is
* mapped to a corresponding MAIR attribute encoding.
* The EFI memory attribute advises all possible capabilities
- * of a memory region. We use the most efficient capability.
+ * of a memory region.
*/
u64 attr;
@@ -253,13 +260,101 @@ pgprot_t __acpi_get_mem_attribute(phys_addr_t addr)
attr = efi_mem_attributes(addr);
if (attr & EFI_MEMORY_WB)
return PAGE_KERNEL;
- if (attr & EFI_MEMORY_WT)
- return __pgprot(PROT_NORMAL_WT);
if (attr & EFI_MEMORY_WC)
return __pgprot(PROT_NORMAL_NC);
+ if (attr & EFI_MEMORY_WT)
+ return __acpi_get_writethrough_mem_attribute();
return __pgprot(PROT_DEVICE_nGnRnE);
}
+void __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size)
+{
+ efi_memory_desc_t *md, *region = NULL;
+ pgprot_t prot;
+
+ if (WARN_ON_ONCE(!efi_enabled(EFI_MEMMAP)))
+ return NULL;
+
+ for_each_efi_memory_desc(md) {
+ u64 end = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT);
+
+ if (phys < md->phys_addr || phys >= end)
+ continue;
+
+ if (phys + size > end) {
+ pr_warn(FW_BUG "requested region covers multiple EFI memory regions\n");
+ return NULL;
+ }
+ region = md;
+ break;
+ }
+
+ /*
+ * It is fine for AML to remap regions that are not represented in the
+ * EFI memory map at all, as it only describes normal memory, and MMIO
+ * regions that require a virtual mapping to make them accessible to
+ * the EFI runtime services.
+ */
+ prot = __pgprot(PROT_DEVICE_nGnRnE);
+ if (region) {
+ switch (region->type) {
+ case EFI_LOADER_CODE:
+ case EFI_LOADER_DATA:
+ case EFI_BOOT_SERVICES_CODE:
+ case EFI_BOOT_SERVICES_DATA:
+ case EFI_CONVENTIONAL_MEMORY:
+ case EFI_PERSISTENT_MEMORY:
+ if (memblock_is_map_memory(phys) ||
+ !memblock_is_region_memory(phys, size)) {
+ pr_warn(FW_BUG "requested region covers kernel memory @ %pa\n", &phys);
+ return NULL;
+ }
+ /*
+ * Mapping kernel memory is permitted if the region in
+ * question is covered by a single memblock with the
+ * NOMAP attribute set: this enables the use of ACPI
+ * table overrides passed via initramfs, which are
+ * reserved in memory using arch_reserve_mem_area()
+ * below. As this particular use case only requires
+ * read access, fall through to the R/O mapping case.
+ */
+ fallthrough;
+
+ case EFI_RUNTIME_SERVICES_CODE:
+ /*
+ * This would be unusual, but not problematic per se,
+ * as long as we take care not to create a writable
+ * mapping for executable code.
+ */
+ prot = PAGE_KERNEL_RO;
+ break;
+
+ case EFI_ACPI_RECLAIM_MEMORY:
+ /*
+ * ACPI reclaim memory is used to pass firmware tables
+ * and other data that is intended for consumption by
+ * the OS only, which may decide it wants to reclaim
+ * that memory and use it for something else. We never
+ * do that, but we usually add it to the linear map
+ * anyway, in which case we should use the existing
+ * mapping.
+ */
+ if (memblock_is_map_memory(phys))
+ return (void __iomem *)__phys_to_virt(phys);
+ fallthrough;
+
+ default:
+ if (region->attribute & EFI_MEMORY_WB)
+ prot = PAGE_KERNEL;
+ else if (region->attribute & EFI_MEMORY_WC)
+ prot = __pgprot(PROT_NORMAL_NC);
+ else if (region->attribute & EFI_MEMORY_WT)
+ prot = __acpi_get_writethrough_mem_attribute();
+ }
+ }
+ return ioremap_prot(phys, size, pgprot_val(prot));
+}
+
/*
* Claim Synchronous External Aborts as a firmware first notification.
*
@@ -269,12 +364,19 @@ pgprot_t __acpi_get_mem_attribute(phys_addr_t addr)
int apei_claim_sea(struct pt_regs *regs)
{
int err = -ENOENT;
+ bool return_to_irqs_enabled;
unsigned long current_flags;
if (!IS_ENABLED(CONFIG_ACPI_APEI_GHES))
return err;
- current_flags = arch_local_save_flags();
+ current_flags = local_daif_save_flags();
+
+ /* current_flags isn't useful here as daif doesn't tell us about pNMI */
+ return_to_irqs_enabled = !irqs_disabled_flags(arch_local_save_flags());
+
+ if (regs)
+ return_to_irqs_enabled = interrupts_enabled(regs);
/*
* SEA can interrupt SError, mask it and describe this as an NMI so
@@ -284,7 +386,134 @@ int apei_claim_sea(struct pt_regs *regs)
nmi_enter();
err = ghes_notify_sea();
nmi_exit();
+
+ /*
+ * APEI NMI-like notifications are deferred to irq_work. Unless
+ * we interrupted irqs-masked code, we can do that now.
+ */
+ if (!err) {
+ if (return_to_irqs_enabled) {
+ local_daif_restore(DAIF_PROCCTX_NOIRQ);
+ __irq_enter();
+ irq_work_run();
+ __irq_exit();
+ } else {
+ pr_warn_ratelimited("APEI work queued but not completed");
+ err = -EINPROGRESS;
+ }
+ }
+
local_daif_restore(current_flags);
return err;
}
+
+void arch_reserve_mem_area(acpi_physical_address addr, size_t size)
+{
+ memblock_mark_nomap(addr, size);
+}
+
+#ifdef CONFIG_ACPI_FFH
+/*
+ * Implements ARM64 specific callbacks to support ACPI FFH Operation Region as
+ * specified in https://developer.arm.com/docs/den0048/latest
+ */
+struct acpi_ffh_data {
+ struct acpi_ffh_info info;
+ void (*invoke_ffh_fn)(unsigned long a0, unsigned long a1,
+ unsigned long a2, unsigned long a3,
+ unsigned long a4, unsigned long a5,
+ unsigned long a6, unsigned long a7,
+ struct arm_smccc_res *args,
+ struct arm_smccc_quirk *res);
+ void (*invoke_ffh64_fn)(const struct arm_smccc_1_2_regs *args,
+ struct arm_smccc_1_2_regs *res);
+};
+
+int acpi_ffh_address_space_arch_setup(void *handler_ctxt, void **region_ctxt)
+{
+ enum arm_smccc_conduit conduit;
+ struct acpi_ffh_data *ffh_ctxt;
+
+ if (arm_smccc_get_version() < ARM_SMCCC_VERSION_1_2)
+ return -EOPNOTSUPP;
+
+ conduit = arm_smccc_1_1_get_conduit();
+ if (conduit == SMCCC_CONDUIT_NONE) {
+ pr_err("%s: invalid SMCCC conduit\n", __func__);
+ return -EOPNOTSUPP;
+ }
+
+ ffh_ctxt = kzalloc(sizeof(*ffh_ctxt), GFP_KERNEL);
+ if (!ffh_ctxt)
+ return -ENOMEM;
+
+ if (conduit == SMCCC_CONDUIT_SMC) {
+ ffh_ctxt->invoke_ffh_fn = __arm_smccc_smc;
+ ffh_ctxt->invoke_ffh64_fn = arm_smccc_1_2_smc;
+ } else {
+ ffh_ctxt->invoke_ffh_fn = __arm_smccc_hvc;
+ ffh_ctxt->invoke_ffh64_fn = arm_smccc_1_2_hvc;
+ }
+
+ memcpy(ffh_ctxt, handler_ctxt, sizeof(ffh_ctxt->info));
+
+ *region_ctxt = ffh_ctxt;
+ return AE_OK;
+}
+
+static bool acpi_ffh_smccc_owner_allowed(u32 fid)
+{
+ int owner = ARM_SMCCC_OWNER_NUM(fid);
+
+ if (owner == ARM_SMCCC_OWNER_STANDARD ||
+ owner == ARM_SMCCC_OWNER_SIP || owner == ARM_SMCCC_OWNER_OEM)
+ return true;
+
+ return false;
+}
+
+int acpi_ffh_address_space_arch_handler(acpi_integer *value, void *region_context)
+{
+ int ret = 0;
+ struct acpi_ffh_data *ffh_ctxt = region_context;
+
+ if (ffh_ctxt->info.offset == 0) {
+ /* SMC/HVC 32bit call */
+ struct arm_smccc_res res;
+ u32 a[8] = { 0 }, *ptr = (u32 *)value;
+
+ if (!ARM_SMCCC_IS_FAST_CALL(*ptr) || ARM_SMCCC_IS_64(*ptr) ||
+ !acpi_ffh_smccc_owner_allowed(*ptr) ||
+ ffh_ctxt->info.length > 32) {
+ ret = AE_ERROR;
+ } else {
+ int idx, len = ffh_ctxt->info.length >> 2;
+
+ for (idx = 0; idx < len; idx++)
+ a[idx] = *(ptr + idx);
+
+ ffh_ctxt->invoke_ffh_fn(a[0], a[1], a[2], a[3], a[4],
+ a[5], a[6], a[7], &res, NULL);
+ memcpy(value, &res, sizeof(res));
+ }
+
+ } else if (ffh_ctxt->info.offset == 1) {
+ /* SMC/HVC 64bit call */
+ struct arm_smccc_1_2_regs *r = (struct arm_smccc_1_2_regs *)value;
+
+ if (!ARM_SMCCC_IS_FAST_CALL(r->a0) || !ARM_SMCCC_IS_64(r->a0) ||
+ !acpi_ffh_smccc_owner_allowed(r->a0) ||
+ ffh_ctxt->info.length > sizeof(*r)) {
+ ret = AE_ERROR;
+ } else {
+ ffh_ctxt->invoke_ffh64_fn(r, r);
+ memcpy(value, r, ffh_ctxt->info.length);
+ }
+ } else {
+ ret = AE_ERROR;
+ }
+
+ return ret;
+}
+#endif /* CONFIG_ACPI_FFH */
diff --git a/arch/arm64/kernel/acpi_numa.c b/arch/arm64/kernel/acpi_numa.c
index 7ff800045434..e51535a5f939 100644
--- a/arch/arm64/kernel/acpi_numa.c
+++ b/arch/arm64/kernel/acpi_numa.c
@@ -109,7 +109,7 @@ void __init acpi_numa_gicc_affinity_init(struct acpi_srat_gicc_affinity *pa)
pxm = pa->proximity_domain;
node = acpi_map_pxm_to_node(pxm);
- if (node == NUMA_NO_NODE || node >= MAX_NUMNODES) {
+ if (node == NUMA_NO_NODE) {
pr_err("SRAT: Too many proximity domains %d\n", pxm);
bad_srat();
return;
@@ -118,15 +118,3 @@ void __init acpi_numa_gicc_affinity_init(struct acpi_srat_gicc_affinity *pa)
node_set(node, numa_nodes_parsed);
}
-int __init arm64_acpi_numa_init(void)
-{
- int ret;
-
- ret = acpi_numa_init();
- if (ret) {
- pr_info("Failed to initialise from firmware\n");
- return ret;
- }
-
- return srat_disabled() ? -EINVAL : 0;
-}
diff --git a/arch/arm64/kernel/acpi_parking_protocol.c b/arch/arm64/kernel/acpi_parking_protocol.c
index e7c941d8340d..e1be29e608b7 100644
--- a/arch/arm64/kernel/acpi_parking_protocol.c
+++ b/arch/arm64/kernel/acpi_parking_protocol.c
@@ -99,10 +99,11 @@ static int acpi_parking_protocol_cpu_boot(unsigned int cpu)
* that read this address need to convert this address to the
* Boot-Loader's endianness before jumping.
*/
- writeq_relaxed(__pa_symbol(secondary_entry), &mailbox->entry_point);
+ writeq_relaxed(__pa_symbol(secondary_entry),
+ &mailbox->entry_point);
writel_relaxed(cpu_entry->gic_cpu_id, &mailbox->cpu_id);
- arch_send_wakeup_ipi_mask(cpumask_of(cpu));
+ arch_send_wakeup_ipi(cpu);
return 0;
}
diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c
index d1757ef1b1e7..8ff6610af496 100644
--- a/arch/arm64/kernel/alternative.c
+++ b/arch/arm64/kernel/alternative.c
@@ -10,18 +10,25 @@
#include <linux/init.h>
#include <linux/cpu.h>
+#include <linux/elf.h>
#include <asm/cacheflush.h>
#include <asm/alternative.h>
#include <asm/cpufeature.h>
#include <asm/insn.h>
+#include <asm/module.h>
#include <asm/sections.h>
+#include <asm/vdso.h>
#include <linux/stop_machine.h>
-#define __ALT_PTR(a,f) ((void *)&(a)->f + (a)->f)
+#define __ALT_PTR(a, f) ((void *)&(a)->f + (a)->f)
#define ALT_ORIG_PTR(a) __ALT_PTR(a, orig_offset)
#define ALT_REPL_PTR(a) __ALT_PTR(a, alt_offset)
-static int all_alternatives_applied;
+#define ALT_CAP(a) ((a)->cpucap & ~ARM64_CB_BIT)
+#define ALT_HAS_CB(a) ((a)->cpucap & ARM64_CB_BIT)
+
+/* Volatile, as we may be patching the guts of READ_ONCE() */
+static volatile int all_alternatives_applied;
static DECLARE_BITMAP(applied_alternatives, ARM64_NCAPS);
@@ -30,38 +37,26 @@ struct alt_region {
struct alt_instr *end;
};
-bool alternative_is_applied(u16 cpufeature)
+bool alternative_is_applied(u16 cpucap)
{
- if (WARN_ON(cpufeature >= ARM64_NCAPS))
+ if (WARN_ON(cpucap >= ARM64_NCAPS))
return false;
- return test_bit(cpufeature, applied_alternatives);
+ return test_bit(cpucap, applied_alternatives);
}
/*
* Check if the target PC is within an alternative block.
*/
-static bool branch_insn_requires_update(struct alt_instr *alt, unsigned long pc)
+static __always_inline bool branch_insn_requires_update(struct alt_instr *alt, unsigned long pc)
{
- unsigned long replptr;
-
- if (kernel_text_address(pc))
- return true;
-
- replptr = (unsigned long)ALT_REPL_PTR(alt);
- if (pc >= replptr && pc <= (replptr + alt->alt_len))
- return false;
-
- /*
- * Branching into *another* alternate sequence is doomed, and
- * we're not even trying to fix it up.
- */
- BUG();
+ unsigned long replptr = (unsigned long)ALT_REPL_PTR(alt);
+ return !(pc >= replptr && pc <= (replptr + alt->alt_len));
}
#define align_down(x, a) ((unsigned long)(x) & ~(((unsigned long)(a)) - 1))
-static u32 get_alt_insn(struct alt_instr *alt, __le32 *insnptr, __le32 *altinsnptr)
+static __always_inline u32 get_alt_insn(struct alt_instr *alt, __le32 *insnptr, __le32 *altinsnptr)
{
u32 insn;
@@ -106,7 +101,7 @@ static u32 get_alt_insn(struct alt_instr *alt, __le32 *insnptr, __le32 *altinsnp
return insn;
}
-static void patch_alternative(struct alt_instr *alt,
+static noinstr void patch_alternative(struct alt_instr *alt,
__le32 *origptr, __le32 *updptr, int nr_inst)
{
__le32 *replptr;
@@ -126,13 +121,13 @@ static void patch_alternative(struct alt_instr *alt,
* accidentally call into the cache.S code, which is patched by us at
* runtime.
*/
-static void clean_dcache_range_nopatch(u64 start, u64 end)
+static noinstr void clean_dcache_range_nopatch(u64 start, u64 end)
{
u64 cur, d_size, ctr_el0;
- ctr_el0 = read_sanitised_ftr_reg(SYS_CTR_EL0);
+ ctr_el0 = arm64_ftr_reg_ctrel0.sys_val;
d_size = 4 << cpuid_feature_extract_unsigned_field(ctr_el0,
- CTR_DMINLINE_SHIFT);
+ CTR_EL0_DminLine_SHIFT);
cur = start & ~(d_size - 1);
do {
/*
@@ -144,40 +139,37 @@ static void clean_dcache_range_nopatch(u64 start, u64 end)
} while (cur += d_size, cur < end);
}
-static void __apply_alternatives(void *alt_region, bool is_module,
- unsigned long *feature_mask)
+static void __apply_alternatives(const struct alt_region *region,
+ bool is_module,
+ unsigned long *cpucap_mask)
{
struct alt_instr *alt;
- struct alt_region *region = alt_region;
__le32 *origptr, *updptr;
alternative_cb_t alt_cb;
for (alt = region->begin; alt < region->end; alt++) {
int nr_inst;
+ int cap = ALT_CAP(alt);
- if (!test_bit(alt->cpufeature, feature_mask))
+ if (!test_bit(cap, cpucap_mask))
continue;
- /* Use ARM64_CB_PATCH as an unconditional patch */
- if (alt->cpufeature < ARM64_CB_PATCH &&
- !cpus_have_cap(alt->cpufeature))
+ if (!cpus_have_cap(cap))
continue;
- if (alt->cpufeature == ARM64_CB_PATCH)
+ if (ALT_HAS_CB(alt))
BUG_ON(alt->alt_len != 0);
else
BUG_ON(alt->alt_len != alt->orig_len);
- pr_info_once("patching kernel code\n");
-
origptr = ALT_ORIG_PTR(alt);
updptr = is_module ? origptr : lm_alias(origptr);
nr_inst = alt->orig_len / AARCH64_INSN_SIZE;
- if (alt->cpufeature < ARM64_CB_PATCH)
- alt_cb = patch_alternative;
- else
+ if (ALT_HAS_CB(alt))
alt_cb = ALT_REPL_PTR(alt);
+ else
+ alt_cb = patch_alternative;
alt_cb(alt, origptr, updptr, nr_inst);
@@ -193,43 +185,67 @@ static void __apply_alternatives(void *alt_region, bool is_module,
*/
if (!is_module) {
dsb(ish);
- __flush_icache_all();
+ icache_inval_all_pou();
isb();
- /* Ignore ARM64_CB bit from feature mask */
bitmap_or(applied_alternatives, applied_alternatives,
- feature_mask, ARM64_NCAPS);
+ cpucap_mask, ARM64_NCAPS);
bitmap_and(applied_alternatives, applied_alternatives,
- cpu_hwcaps, ARM64_NCAPS);
+ system_cpucaps, ARM64_NCAPS);
}
}
+static void __init apply_alternatives_vdso(void)
+{
+ struct alt_region region;
+ const struct elf64_hdr *hdr;
+ const struct elf64_shdr *shdr;
+ const struct elf64_shdr *alt;
+ DECLARE_BITMAP(all_capabilities, ARM64_NCAPS);
+
+ bitmap_fill(all_capabilities, ARM64_NCAPS);
+
+ hdr = (struct elf64_hdr *)vdso_start;
+ shdr = (void *)hdr + hdr->e_shoff;
+ alt = find_section(hdr, shdr, ".altinstructions");
+ if (!alt)
+ return;
+
+ region = (struct alt_region){
+ .begin = (void *)hdr + alt->sh_offset,
+ .end = (void *)hdr + alt->sh_offset + alt->sh_size,
+ };
+
+ __apply_alternatives(&region, false, &all_capabilities[0]);
+}
+
+static const struct alt_region kernel_alternatives __initconst = {
+ .begin = (struct alt_instr *)__alt_instructions,
+ .end = (struct alt_instr *)__alt_instructions_end,
+};
+
/*
* We might be patching the stop_machine state machine, so implement a
* really simple polling protocol here.
*/
-static int __apply_alternatives_multi_stop(void *unused)
+static int __init __apply_alternatives_multi_stop(void *unused)
{
- struct alt_region region = {
- .begin = (struct alt_instr *)__alt_instructions,
- .end = (struct alt_instr *)__alt_instructions_end,
- };
-
/* We always have a CPU 0 at this point (__init) */
if (smp_processor_id()) {
- while (!READ_ONCE(all_alternatives_applied))
+ while (!all_alternatives_applied)
cpu_relax();
isb();
} else {
- DECLARE_BITMAP(remaining_capabilities, ARM64_NPATCHABLE);
+ DECLARE_BITMAP(remaining_capabilities, ARM64_NCAPS);
- bitmap_complement(remaining_capabilities, boot_capabilities,
- ARM64_NPATCHABLE);
+ bitmap_complement(remaining_capabilities, boot_cpucaps,
+ ARM64_NCAPS);
BUG_ON(all_alternatives_applied);
- __apply_alternatives(&region, false, remaining_capabilities);
+ __apply_alternatives(&kernel_alternatives, false,
+ remaining_capabilities);
/* Barriers provided by the cache flushing */
- WRITE_ONCE(all_alternatives_applied, 1);
+ all_alternatives_applied = 1;
}
return 0;
@@ -237,6 +253,9 @@ static int __apply_alternatives_multi_stop(void *unused)
void __init apply_alternatives_all(void)
{
+ pr_info("applying system-wide alternatives\n");
+
+ apply_alternatives_vdso();
/* better not try code patching on a live SMP system */
stop_machine(__apply_alternatives_multi_stop, NULL, cpu_online_mask);
}
@@ -248,15 +267,13 @@ void __init apply_alternatives_all(void)
*/
void __init apply_boot_alternatives(void)
{
- struct alt_region region = {
- .begin = (struct alt_instr *)__alt_instructions,
- .end = (struct alt_instr *)__alt_instructions_end,
- };
-
/* If called on non-boot cpu things could go wrong */
WARN_ON(smp_processor_id() != 0);
- __apply_alternatives(&region, false, &boot_capabilities[0]);
+ pr_info("applying boot alternatives\n");
+
+ __apply_alternatives(&kernel_alternatives, false,
+ &boot_cpucaps[0]);
}
#ifdef CONFIG_MODULES
@@ -266,10 +283,18 @@ void apply_alternatives_module(void *start, size_t length)
.begin = start,
.end = start + length,
};
- DECLARE_BITMAP(all_capabilities, ARM64_NPATCHABLE);
+ DECLARE_BITMAP(all_capabilities, ARM64_NCAPS);
- bitmap_fill(all_capabilities, ARM64_NPATCHABLE);
+ bitmap_fill(all_capabilities, ARM64_NCAPS);
__apply_alternatives(&region, true, &all_capabilities[0]);
}
#endif
+
+noinstr void alt_cb_patch_nops(struct alt_instr *alt, __le32 *origptr,
+ __le32 *updptr, int nr_inst)
+{
+ for (int i = 0; i < nr_inst; i++)
+ updptr[i] = cpu_to_le32(aarch64_insn_gen_nop());
+}
+EXPORT_SYMBOL(alt_cb_patch_nops);
diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c
index ca158be21f83..dd6ce86d4332 100644
--- a/arch/arm64/kernel/armv8_deprecated.c
+++ b/arch/arm64/kernel/armv8_deprecated.c
@@ -17,7 +17,6 @@
#include <asm/sysreg.h>
#include <asm/system_misc.h>
#include <asm/traps.h>
-#include <asm/kprobes.h>
#define CREATE_TRACE_POINTS
#include "trace-events-emulation.h"
@@ -39,225 +38,44 @@ enum insn_emulation_mode {
enum legacy_insn_status {
INSN_DEPRECATED,
INSN_OBSOLETE,
-};
-
-struct insn_emulation_ops {
- const char *name;
- enum legacy_insn_status status;
- struct undef_hook *hooks;
- int (*set_hw_mode)(bool enable);
+ INSN_UNAVAILABLE,
};
struct insn_emulation {
- struct list_head node;
- struct insn_emulation_ops *ops;
+ const char *name;
+ enum legacy_insn_status status;
+ bool (*try_emulate)(struct pt_regs *regs,
+ u32 insn);
+ int (*set_hw_mode)(bool enable);
+
int current_mode;
int min;
int max;
-};
-
-static LIST_HEAD(insn_emulation);
-static int nr_insn_emulated __initdata;
-static DEFINE_RAW_SPINLOCK(insn_emulation_lock);
-
-static void register_emulation_hooks(struct insn_emulation_ops *ops)
-{
- struct undef_hook *hook;
-
- BUG_ON(!ops->hooks);
-
- for (hook = ops->hooks; hook->instr_mask; hook++)
- register_undef_hook(hook);
-
- pr_notice("Registered %s emulation handler\n", ops->name);
-}
-
-static void remove_emulation_hooks(struct insn_emulation_ops *ops)
-{
- struct undef_hook *hook;
-
- BUG_ON(!ops->hooks);
-
- for (hook = ops->hooks; hook->instr_mask; hook++)
- unregister_undef_hook(hook);
-
- pr_notice("Removed %s emulation handler\n", ops->name);
-}
-
-static void enable_insn_hw_mode(void *data)
-{
- struct insn_emulation *insn = (struct insn_emulation *)data;
- if (insn->ops->set_hw_mode)
- insn->ops->set_hw_mode(true);
-}
-
-static void disable_insn_hw_mode(void *data)
-{
- struct insn_emulation *insn = (struct insn_emulation *)data;
- if (insn->ops->set_hw_mode)
- insn->ops->set_hw_mode(false);
-}
-
-/* Run set_hw_mode(mode) on all active CPUs */
-static int run_all_cpu_set_hw_mode(struct insn_emulation *insn, bool enable)
-{
- if (!insn->ops->set_hw_mode)
- return -EINVAL;
- if (enable)
- on_each_cpu(enable_insn_hw_mode, (void *)insn, true);
- else
- on_each_cpu(disable_insn_hw_mode, (void *)insn, true);
- return 0;
-}
-/*
- * Run set_hw_mode for all insns on a starting CPU.
- * Returns:
- * 0 - If all the hooks ran successfully.
- * -EINVAL - At least one hook is not supported by the CPU.
- */
-static int run_all_insn_set_hw_mode(unsigned int cpu)
-{
- int rc = 0;
- unsigned long flags;
- struct insn_emulation *insn;
-
- raw_spin_lock_irqsave(&insn_emulation_lock, flags);
- list_for_each_entry(insn, &insn_emulation, node) {
- bool enable = (insn->current_mode == INSN_HW);
- if (insn->ops->set_hw_mode && insn->ops->set_hw_mode(enable)) {
- pr_warn("CPU[%u] cannot support the emulation of %s",
- cpu, insn->ops->name);
- rc = -EINVAL;
- }
- }
- raw_spin_unlock_irqrestore(&insn_emulation_lock, flags);
- return rc;
-}
-
-static int update_insn_emulation_mode(struct insn_emulation *insn,
- enum insn_emulation_mode prev)
-{
- int ret = 0;
-
- switch (prev) {
- case INSN_UNDEF: /* Nothing to be done */
- break;
- case INSN_EMULATE:
- remove_emulation_hooks(insn->ops);
- break;
- case INSN_HW:
- if (!run_all_cpu_set_hw_mode(insn, false))
- pr_notice("Disabled %s support\n", insn->ops->name);
- break;
- }
-
- switch (insn->current_mode) {
- case INSN_UNDEF:
- break;
- case INSN_EMULATE:
- register_emulation_hooks(insn->ops);
- break;
- case INSN_HW:
- ret = run_all_cpu_set_hw_mode(insn, true);
- if (!ret)
- pr_notice("Enabled %s support\n", insn->ops->name);
- break;
- }
-
- return ret;
-}
-
-static void __init register_insn_emulation(struct insn_emulation_ops *ops)
-{
- unsigned long flags;
- struct insn_emulation *insn;
-
- insn = kzalloc(sizeof(*insn), GFP_KERNEL);
- if (!insn)
- return;
-
- insn->ops = ops;
- insn->min = INSN_UNDEF;
-
- switch (ops->status) {
- case INSN_DEPRECATED:
- insn->current_mode = INSN_EMULATE;
- /* Disable the HW mode if it was turned on at early boot time */
- run_all_cpu_set_hw_mode(insn, false);
- insn->max = INSN_HW;
- break;
- case INSN_OBSOLETE:
- insn->current_mode = INSN_UNDEF;
- insn->max = INSN_EMULATE;
- break;
- }
-
- raw_spin_lock_irqsave(&insn_emulation_lock, flags);
- list_add(&insn->node, &insn_emulation);
- nr_insn_emulated++;
- raw_spin_unlock_irqrestore(&insn_emulation_lock, flags);
-
- /* Register any handlers if required */
- update_insn_emulation_mode(insn, INSN_UNDEF);
-}
-
-static int emulation_proc_handler(struct ctl_table *table, int write,
- void __user *buffer, size_t *lenp,
- loff_t *ppos)
-{
- int ret = 0;
- struct insn_emulation *insn = (struct insn_emulation *) table->data;
- enum insn_emulation_mode prev_mode = insn->current_mode;
-
- table->data = &insn->current_mode;
- ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
+ /* sysctl for this emulation */
+ struct ctl_table sysctl;
+};
- if (ret || !write || prev_mode == insn->current_mode)
- goto ret;
+#define ARM_OPCODE_CONDTEST_FAIL 0
+#define ARM_OPCODE_CONDTEST_PASS 1
+#define ARM_OPCODE_CONDTEST_UNCOND 2
- ret = update_insn_emulation_mode(insn, prev_mode);
- if (ret) {
- /* Mode change failed, revert to previous mode. */
- insn->current_mode = prev_mode;
- update_insn_emulation_mode(insn, INSN_UNDEF);
- }
-ret:
- table->data = insn;
- return ret;
-}
+#define ARM_OPCODE_CONDITION_UNCOND 0xf
-static void __init register_insn_emulation_sysctl(void)
+static unsigned int __maybe_unused aarch32_check_condition(u32 opcode, u32 psr)
{
- unsigned long flags;
- int i = 0;
- struct insn_emulation *insn;
- struct ctl_table *insns_sysctl, *sysctl;
-
- insns_sysctl = kcalloc(nr_insn_emulated + 1, sizeof(*sysctl),
- GFP_KERNEL);
- if (!insns_sysctl)
- return;
-
- raw_spin_lock_irqsave(&insn_emulation_lock, flags);
- list_for_each_entry(insn, &insn_emulation, node) {
- sysctl = &insns_sysctl[i];
-
- sysctl->mode = 0644;
- sysctl->maxlen = sizeof(int);
+ u32 cc_bits = opcode >> 28;
- sysctl->procname = insn->ops->name;
- sysctl->data = insn;
- sysctl->extra1 = &insn->min;
- sysctl->extra2 = &insn->max;
- sysctl->proc_handler = emulation_proc_handler;
- i++;
+ if (cc_bits != ARM_OPCODE_CONDITION_UNCOND) {
+ if ((*aarch32_opcode_cond_checks[cc_bits])(psr))
+ return ARM_OPCODE_CONDTEST_PASS;
+ else
+ return ARM_OPCODE_CONDTEST_FAIL;
}
- raw_spin_unlock_irqrestore(&insn_emulation_lock, flags);
-
- register_sysctl("abi", insns_sysctl);
+ return ARM_OPCODE_CONDTEST_UNCOND;
}
+#ifdef CONFIG_SWP_EMULATION
/*
* Implement emulation of the SWP/SWPB instructions using load-exclusive and
* store-exclusive.
@@ -277,9 +95,9 @@ static void __init register_insn_emulation_sysctl(void)
#define __user_swpX_asm(data, addr, res, temp, temp2, B) \
do { \
- uaccess_enable(); \
+ uaccess_enable_privileged(); \
__asm__ __volatile__( \
- " mov %w3, %w7\n" \
+ " mov %w3, %w6\n" \
"0: ldxr"B" %w2, [%4]\n" \
"1: stxr"B" %w0, %w1, [%4]\n" \
" cbz %w0, 2f\n" \
@@ -290,19 +108,13 @@ do { \
"2:\n" \
" mov %w1, %w2\n" \
"3:\n" \
- " .pushsection .fixup,\"ax\"\n" \
- " .align 2\n" \
- "4: mov %w0, %w6\n" \
- " b 3b\n" \
- " .popsection" \
- _ASM_EXTABLE(0b, 4b) \
- _ASM_EXTABLE(1b, 4b) \
+ _ASM_EXTABLE_UACCESS_ERR(0b, 3b, %w0) \
+ _ASM_EXTABLE_UACCESS_ERR(1b, 3b, %w0) \
: "=&r" (res), "+r" (data), "=&r" (temp), "=&r" (temp2) \
: "r" ((unsigned long)addr), "i" (-EAGAIN), \
- "i" (-EFAULT), \
"i" (__SWP_LL_SC_LOOPS) \
: "memory"); \
- uaccess_disable(); \
+ uaccess_disable_privileged(); \
} while (0)
#define __user_swp_asm(data, addr, res, temp, temp2) \
@@ -344,25 +156,6 @@ static int emulate_swpX(unsigned int address, unsigned int *data,
return res;
}
-#define ARM_OPCODE_CONDTEST_FAIL 0
-#define ARM_OPCODE_CONDTEST_PASS 1
-#define ARM_OPCODE_CONDTEST_UNCOND 2
-
-#define ARM_OPCODE_CONDITION_UNCOND 0xf
-
-static unsigned int __kprobes aarch32_check_condition(u32 opcode, u32 psr)
-{
- u32 cc_bits = opcode >> 28;
-
- if (cc_bits != ARM_OPCODE_CONDITION_UNCOND) {
- if ((*aarch32_opcode_cond_checks[cc_bits])(psr))
- return ARM_OPCODE_CONDTEST_PASS;
- else
- return ARM_OPCODE_CONDTEST_FAIL;
- }
- return ARM_OPCODE_CONDTEST_UNCOND;
-}
-
/*
* swp_handler logs the id of calling process, dissects the instruction, sanity
* checks the memory location, calls emulate_swpX for the actual operation and
@@ -435,28 +228,27 @@ fault:
return 0;
}
-/*
- * Only emulate SWP/SWPB executed in ARM state/User mode.
- * The kernel must be SWP free and SWP{B} does not exist in Thumb.
- */
-static struct undef_hook swp_hooks[] = {
- {
- .instr_mask = 0x0fb00ff0,
- .instr_val = 0x01000090,
- .pstate_mask = PSR_AA32_MODE_MASK,
- .pstate_val = PSR_AA32_MODE_USR,
- .fn = swp_handler
- },
- { }
-};
+static bool try_emulate_swp(struct pt_regs *regs, u32 insn)
+{
+ /* SWP{B} only exists in ARM state and does not exist in Thumb */
+ if (!compat_user_mode(regs) || compat_thumb_mode(regs))
+ return false;
+
+ if ((insn & 0x0fb00ff0) != 0x01000090)
+ return false;
-static struct insn_emulation_ops swp_ops = {
+ return swp_handler(regs, insn) == 0;
+}
+
+static struct insn_emulation insn_swp = {
.name = "swp",
.status = INSN_OBSOLETE,
- .hooks = swp_hooks,
+ .try_emulate = try_emulate_swp,
.set_hw_mode = NULL,
};
+#endif /* CONFIG_SWP_EMULATION */
+#ifdef CONFIG_CP15_BARRIER_EMULATION
static int cp15barrier_handler(struct pt_regs *regs, u32 instr)
{
perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, regs->pc);
@@ -519,31 +311,29 @@ static int cp15_barrier_set_hw_mode(bool enable)
return 0;
}
-static struct undef_hook cp15_barrier_hooks[] = {
- {
- .instr_mask = 0x0fff0fdf,
- .instr_val = 0x0e070f9a,
- .pstate_mask = PSR_AA32_MODE_MASK,
- .pstate_val = PSR_AA32_MODE_USR,
- .fn = cp15barrier_handler,
- },
- {
- .instr_mask = 0x0fff0fff,
- .instr_val = 0x0e070f95,
- .pstate_mask = PSR_AA32_MODE_MASK,
- .pstate_val = PSR_AA32_MODE_USR,
- .fn = cp15barrier_handler,
- },
- { }
-};
+static bool try_emulate_cp15_barrier(struct pt_regs *regs, u32 insn)
+{
+ if (!compat_user_mode(regs) || compat_thumb_mode(regs))
+ return false;
+
+ if ((insn & 0x0fff0fdf) == 0x0e070f9a)
+ return cp15barrier_handler(regs, insn) == 0;
-static struct insn_emulation_ops cp15_barrier_ops = {
+ if ((insn & 0x0fff0fff) == 0x0e070f95)
+ return cp15barrier_handler(regs, insn) == 0;
+
+ return false;
+}
+
+static struct insn_emulation insn_cp15_barrier = {
.name = "cp15_barrier",
.status = INSN_DEPRECATED,
- .hooks = cp15_barrier_hooks,
+ .try_emulate = try_emulate_cp15_barrier,
.set_hw_mode = cp15_barrier_set_hw_mode,
};
+#endif /* CONFIG_CP15_BARRIER_EMULATION */
+#ifdef CONFIG_SETEND_EMULATION
static int setend_set_hw_mode(bool enable)
{
if (!cpu_supports_mixed_endian_el0())
@@ -591,55 +381,244 @@ static int t16_setend_handler(struct pt_regs *regs, u32 instr)
return rc;
}
-static struct undef_hook setend_hooks[] = {
- {
- .instr_mask = 0xfffffdff,
- .instr_val = 0xf1010000,
- .pstate_mask = PSR_AA32_MODE_MASK,
- .pstate_val = PSR_AA32_MODE_USR,
- .fn = a32_setend_handler,
- },
- {
- /* Thumb mode */
- .instr_mask = 0x0000fff7,
- .instr_val = 0x0000b650,
- .pstate_mask = (PSR_AA32_T_BIT | PSR_AA32_MODE_MASK),
- .pstate_val = (PSR_AA32_T_BIT | PSR_AA32_MODE_USR),
- .fn = t16_setend_handler,
- },
- {}
-};
+static bool try_emulate_setend(struct pt_regs *regs, u32 insn)
+{
+ if (compat_thumb_mode(regs) &&
+ (insn & 0xfffffff7) == 0x0000b650)
+ return t16_setend_handler(regs, insn) == 0;
-static struct insn_emulation_ops setend_ops = {
+ if (compat_user_mode(regs) &&
+ (insn & 0xfffffdff) == 0xf1010000)
+ return a32_setend_handler(regs, insn) == 0;
+
+ return false;
+}
+
+static struct insn_emulation insn_setend = {
.name = "setend",
.status = INSN_DEPRECATED,
- .hooks = setend_hooks,
+ .try_emulate = try_emulate_setend,
.set_hw_mode = setend_set_hw_mode,
};
+#endif /* CONFIG_SETEND_EMULATION */
+
+static struct insn_emulation *insn_emulations[] = {
+#ifdef CONFIG_SWP_EMULATION
+ &insn_swp,
+#endif
+#ifdef CONFIG_CP15_BARRIER_EMULATION
+ &insn_cp15_barrier,
+#endif
+#ifdef CONFIG_SETEND_EMULATION
+ &insn_setend,
+#endif
+};
+
+static DEFINE_MUTEX(insn_emulation_mutex);
+
+static void enable_insn_hw_mode(void *data)
+{
+ struct insn_emulation *insn = data;
+ if (insn->set_hw_mode)
+ insn->set_hw_mode(true);
+}
+
+static void disable_insn_hw_mode(void *data)
+{
+ struct insn_emulation *insn = data;
+ if (insn->set_hw_mode)
+ insn->set_hw_mode(false);
+}
+
+/* Run set_hw_mode(mode) on all active CPUs */
+static int run_all_cpu_set_hw_mode(struct insn_emulation *insn, bool enable)
+{
+ if (!insn->set_hw_mode)
+ return -EINVAL;
+ if (enable)
+ on_each_cpu(enable_insn_hw_mode, (void *)insn, true);
+ else
+ on_each_cpu(disable_insn_hw_mode, (void *)insn, true);
+ return 0;
+}
/*
- * Invoked as late_initcall, since not needed before init spawned.
+ * Run set_hw_mode for all insns on a starting CPU.
+ * Returns:
+ * 0 - If all the hooks ran successfully.
+ * -EINVAL - At least one hook is not supported by the CPU.
+ */
+static int run_all_insn_set_hw_mode(unsigned int cpu)
+{
+ int rc = 0;
+ unsigned long flags;
+
+ /*
+ * Disable IRQs to serialize against an IPI from
+ * run_all_cpu_set_hw_mode(), ensuring the HW is programmed to the most
+ * recent enablement state if the two race with one another.
+ */
+ local_irq_save(flags);
+ for (int i = 0; i < ARRAY_SIZE(insn_emulations); i++) {
+ struct insn_emulation *insn = insn_emulations[i];
+ bool enable = READ_ONCE(insn->current_mode) == INSN_HW;
+ if (insn->set_hw_mode && insn->set_hw_mode(enable)) {
+ pr_warn("CPU[%u] cannot support the emulation of %s",
+ cpu, insn->name);
+ rc = -EINVAL;
+ }
+ }
+ local_irq_restore(flags);
+
+ return rc;
+}
+
+static int update_insn_emulation_mode(struct insn_emulation *insn,
+ enum insn_emulation_mode prev)
+{
+ int ret = 0;
+
+ switch (prev) {
+ case INSN_UNDEF: /* Nothing to be done */
+ break;
+ case INSN_EMULATE:
+ break;
+ case INSN_HW:
+ if (!run_all_cpu_set_hw_mode(insn, false))
+ pr_notice("Disabled %s support\n", insn->name);
+ break;
+ }
+
+ switch (insn->current_mode) {
+ case INSN_UNDEF:
+ break;
+ case INSN_EMULATE:
+ break;
+ case INSN_HW:
+ ret = run_all_cpu_set_hw_mode(insn, true);
+ if (!ret)
+ pr_notice("Enabled %s support\n", insn->name);
+ break;
+ }
+
+ return ret;
+}
+
+static int emulation_proc_handler(struct ctl_table *table, int write,
+ void *buffer, size_t *lenp,
+ loff_t *ppos)
+{
+ int ret = 0;
+ struct insn_emulation *insn = container_of(table->data, struct insn_emulation, current_mode);
+ enum insn_emulation_mode prev_mode = insn->current_mode;
+
+ mutex_lock(&insn_emulation_mutex);
+ ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
+
+ if (ret || !write || prev_mode == insn->current_mode)
+ goto ret;
+
+ ret = update_insn_emulation_mode(insn, prev_mode);
+ if (ret) {
+ /* Mode change failed, revert to previous mode. */
+ WRITE_ONCE(insn->current_mode, prev_mode);
+ update_insn_emulation_mode(insn, INSN_UNDEF);
+ }
+ret:
+ mutex_unlock(&insn_emulation_mutex);
+ return ret;
+}
+
+static void __init register_insn_emulation(struct insn_emulation *insn)
+{
+ struct ctl_table *sysctl;
+
+ insn->min = INSN_UNDEF;
+
+ switch (insn->status) {
+ case INSN_DEPRECATED:
+ insn->current_mode = INSN_EMULATE;
+ /* Disable the HW mode if it was turned on at early boot time */
+ run_all_cpu_set_hw_mode(insn, false);
+ insn->max = INSN_HW;
+ break;
+ case INSN_OBSOLETE:
+ insn->current_mode = INSN_UNDEF;
+ insn->max = INSN_EMULATE;
+ break;
+ case INSN_UNAVAILABLE:
+ insn->current_mode = INSN_UNDEF;
+ insn->max = INSN_UNDEF;
+ break;
+ }
+
+ /* Program the HW if required */
+ update_insn_emulation_mode(insn, INSN_UNDEF);
+
+ if (insn->status != INSN_UNAVAILABLE) {
+ sysctl = &insn->sysctl;
+
+ sysctl->mode = 0644;
+ sysctl->maxlen = sizeof(int);
+
+ sysctl->procname = insn->name;
+ sysctl->data = &insn->current_mode;
+ sysctl->extra1 = &insn->min;
+ sysctl->extra2 = &insn->max;
+ sysctl->proc_handler = emulation_proc_handler;
+
+ register_sysctl_sz("abi", sysctl, 1);
+ }
+}
+
+bool try_emulate_armv8_deprecated(struct pt_regs *regs, u32 insn)
+{
+ for (int i = 0; i < ARRAY_SIZE(insn_emulations); i++) {
+ struct insn_emulation *ie = insn_emulations[i];
+
+ if (ie->status == INSN_UNAVAILABLE)
+ continue;
+
+ /*
+ * A trap may race with the mode being changed
+ * INSN_EMULATE<->INSN_HW. Try to emulate the instruction to
+ * avoid a spurious UNDEF.
+ */
+ if (READ_ONCE(ie->current_mode) == INSN_UNDEF)
+ continue;
+
+ if (ie->try_emulate(regs, insn))
+ return true;
+ }
+
+ return false;
+}
+
+/*
+ * Invoked as core_initcall, which guarantees that the instruction
+ * emulation is ready for userspace.
*/
static int __init armv8_deprecated_init(void)
{
- if (IS_ENABLED(CONFIG_SWP_EMULATION))
- register_insn_emulation(&swp_ops);
+#ifdef CONFIG_SETEND_EMULATION
+ if (!system_supports_mixed_endian_el0()) {
+ insn_setend.status = INSN_UNAVAILABLE;
+ pr_info("setend instruction emulation is not supported on this system\n");
+ }
- if (IS_ENABLED(CONFIG_CP15_BARRIER_EMULATION))
- register_insn_emulation(&cp15_barrier_ops);
+#endif
+ for (int i = 0; i < ARRAY_SIZE(insn_emulations); i++) {
+ struct insn_emulation *ie = insn_emulations[i];
- if (IS_ENABLED(CONFIG_SETEND_EMULATION)) {
- if(system_supports_mixed_endian_el0())
- register_insn_emulation(&setend_ops);
- else
- pr_info("setend instruction emulation is not supported on this system\n");
+ if (ie->status == INSN_UNAVAILABLE)
+ continue;
+
+ register_insn_emulation(ie);
}
cpuhp_setup_state_nocalls(CPUHP_AP_ARM64_ISNDEP_STARTING,
"arm64/isndep:starting",
run_all_insn_set_hw_mode, NULL);
- register_insn_emulation_sysctl();
-
return 0;
}
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index a5bdce8af65b..5a7dbbe0ce63 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -9,6 +9,8 @@
#include <linux/arm_sdei.h>
#include <linux/sched.h>
+#include <linux/ftrace.h>
+#include <linux/kexec.h>
#include <linux/mm.h>
#include <linux/dma-mapping.h>
#include <linux/kvm_host.h>
@@ -28,18 +30,32 @@ int main(void)
{
DEFINE(TSK_ACTIVE_MM, offsetof(struct task_struct, active_mm));
BLANK();
+ DEFINE(TSK_TI_CPU, offsetof(struct task_struct, thread_info.cpu));
DEFINE(TSK_TI_FLAGS, offsetof(struct task_struct, thread_info.flags));
DEFINE(TSK_TI_PREEMPT, offsetof(struct task_struct, thread_info.preempt_count));
- DEFINE(TSK_TI_ADDR_LIMIT, offsetof(struct task_struct, thread_info.addr_limit));
#ifdef CONFIG_ARM64_SW_TTBR0_PAN
DEFINE(TSK_TI_TTBR0, offsetof(struct task_struct, thread_info.ttbr0));
#endif
+#ifdef CONFIG_SHADOW_CALL_STACK
+ DEFINE(TSK_TI_SCS_BASE, offsetof(struct task_struct, thread_info.scs_base));
+ DEFINE(TSK_TI_SCS_SP, offsetof(struct task_struct, thread_info.scs_sp));
+#endif
DEFINE(TSK_STACK, offsetof(struct task_struct, stack));
#ifdef CONFIG_STACKPROTECTOR
DEFINE(TSK_STACK_CANARY, offsetof(struct task_struct, stack_canary));
#endif
BLANK();
DEFINE(THREAD_CPU_CONTEXT, offsetof(struct task_struct, thread.cpu_context));
+ DEFINE(THREAD_SCTLR_USER, offsetof(struct task_struct, thread.sctlr_user));
+#ifdef CONFIG_ARM64_PTR_AUTH
+ DEFINE(THREAD_KEYS_USER, offsetof(struct task_struct, thread.keys_user));
+#endif
+#ifdef CONFIG_ARM64_PTR_AUTH_KERNEL
+ DEFINE(THREAD_KEYS_KERNEL, offsetof(struct task_struct, thread.keys_kernel));
+#endif
+#ifdef CONFIG_ARM64_MTE
+ DEFINE(THREAD_MTE_CTRL, offsetof(struct task_struct, thread.mte_ctrl));
+#endif
BLANK();
DEFINE(S_X0, offsetof(struct pt_regs, regs[0]));
DEFINE(S_X2, offsetof(struct pt_regs, regs[2]));
@@ -62,11 +78,27 @@ int main(void)
DEFINE(S_PSTATE, offsetof(struct pt_regs, pstate));
DEFINE(S_PC, offsetof(struct pt_regs, pc));
DEFINE(S_SYSCALLNO, offsetof(struct pt_regs, syscallno));
- DEFINE(S_ORIG_ADDR_LIMIT, offsetof(struct pt_regs, orig_addr_limit));
+ DEFINE(S_SDEI_TTBR1, offsetof(struct pt_regs, sdei_ttbr1));
DEFINE(S_PMR_SAVE, offsetof(struct pt_regs, pmr_save));
DEFINE(S_STACKFRAME, offsetof(struct pt_regs, stackframe));
- DEFINE(S_FRAME_SIZE, sizeof(struct pt_regs));
+ DEFINE(PT_REGS_SIZE, sizeof(struct pt_regs));
BLANK();
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_ARGS
+ DEFINE(FREGS_X0, offsetof(struct ftrace_regs, regs[0]));
+ DEFINE(FREGS_X2, offsetof(struct ftrace_regs, regs[2]));
+ DEFINE(FREGS_X4, offsetof(struct ftrace_regs, regs[4]));
+ DEFINE(FREGS_X6, offsetof(struct ftrace_regs, regs[6]));
+ DEFINE(FREGS_X8, offsetof(struct ftrace_regs, regs[8]));
+ DEFINE(FREGS_FP, offsetof(struct ftrace_regs, fp));
+ DEFINE(FREGS_LR, offsetof(struct ftrace_regs, lr));
+ DEFINE(FREGS_SP, offsetof(struct ftrace_regs, sp));
+ DEFINE(FREGS_PC, offsetof(struct ftrace_regs, pc));
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
+ DEFINE(FREGS_DIRECT_TRAMP, offsetof(struct ftrace_regs, direct_tramp));
+#endif
+ DEFINE(FREGS_SIZE, sizeof(struct ftrace_regs));
+ BLANK();
+#endif
#ifdef CONFIG_COMPAT
DEFINE(COMPAT_SIGFRAME_REGS_OFFSET, offsetof(struct compat_sigframe, uc.uc_mcontext.arm_r0));
DEFINE(COMPAT_RT_SIGFRAME_REGS_OFFSET, offsetof(struct compat_rt_sigframe, sig.uc.uc_mcontext.arm_r0));
@@ -86,23 +118,33 @@ int main(void)
BLANK();
DEFINE(PREEMPT_DISABLE_OFFSET, PREEMPT_DISABLE_OFFSET);
BLANK();
- DEFINE(CPU_BOOT_STACK, offsetof(struct secondary_data, stack));
DEFINE(CPU_BOOT_TASK, offsetof(struct secondary_data, task));
BLANK();
-#ifdef CONFIG_KVM_ARM_HOST
+ DEFINE(FTR_OVR_VAL_OFFSET, offsetof(struct arm64_ftr_override, val));
+ DEFINE(FTR_OVR_MASK_OFFSET, offsetof(struct arm64_ftr_override, mask));
+ BLANK();
+#ifdef CONFIG_KVM
DEFINE(VCPU_CONTEXT, offsetof(struct kvm_vcpu, arch.ctxt));
DEFINE(VCPU_FAULT_DISR, offsetof(struct kvm_vcpu, arch.fault.disr_el1));
- DEFINE(VCPU_WORKAROUND_FLAGS, offsetof(struct kvm_vcpu, arch.workaround_flags));
DEFINE(VCPU_HCR_EL2, offsetof(struct kvm_vcpu, arch.hcr_el2));
- DEFINE(CPU_GP_REGS, offsetof(struct kvm_cpu_context, gp_regs));
+ DEFINE(CPU_USER_PT_REGS, offsetof(struct kvm_cpu_context, regs));
+ DEFINE(CPU_RGSR_EL1, offsetof(struct kvm_cpu_context, sys_regs[RGSR_EL1]));
+ DEFINE(CPU_GCR_EL1, offsetof(struct kvm_cpu_context, sys_regs[GCR_EL1]));
DEFINE(CPU_APIAKEYLO_EL1, offsetof(struct kvm_cpu_context, sys_regs[APIAKEYLO_EL1]));
DEFINE(CPU_APIBKEYLO_EL1, offsetof(struct kvm_cpu_context, sys_regs[APIBKEYLO_EL1]));
DEFINE(CPU_APDAKEYLO_EL1, offsetof(struct kvm_cpu_context, sys_regs[APDAKEYLO_EL1]));
DEFINE(CPU_APDBKEYLO_EL1, offsetof(struct kvm_cpu_context, sys_regs[APDBKEYLO_EL1]));
DEFINE(CPU_APGAKEYLO_EL1, offsetof(struct kvm_cpu_context, sys_regs[APGAKEYLO_EL1]));
- DEFINE(CPU_USER_PT_REGS, offsetof(struct kvm_regs, regs));
DEFINE(HOST_CONTEXT_VCPU, offsetof(struct kvm_cpu_context, __hyp_running_vcpu));
DEFINE(HOST_DATA_CONTEXT, offsetof(struct kvm_host_data, host_ctxt));
+ DEFINE(NVHE_INIT_MAIR_EL2, offsetof(struct kvm_nvhe_init_params, mair_el2));
+ DEFINE(NVHE_INIT_TCR_EL2, offsetof(struct kvm_nvhe_init_params, tcr_el2));
+ DEFINE(NVHE_INIT_TPIDR_EL2, offsetof(struct kvm_nvhe_init_params, tpidr_el2));
+ DEFINE(NVHE_INIT_STACK_HYP_VA, offsetof(struct kvm_nvhe_init_params, stack_hyp_va));
+ DEFINE(NVHE_INIT_PGD_PA, offsetof(struct kvm_nvhe_init_params, pgd_pa));
+ DEFINE(NVHE_INIT_HCR_EL2, offsetof(struct kvm_nvhe_init_params, hcr_el2));
+ DEFINE(NVHE_INIT_VTTBR, offsetof(struct kvm_nvhe_init_params, vttbr));
+ DEFINE(NVHE_INIT_VTCR, offsetof(struct kvm_nvhe_init_params, vtcr));
#endif
#ifdef CONFIG_CPU_PM
DEFINE(CPU_CTX_SP, offsetof(struct cpu_suspend_ctx, sp));
@@ -115,6 +157,15 @@ int main(void)
DEFINE(ARM_SMCCC_RES_X2_OFFS, offsetof(struct arm_smccc_res, a2));
DEFINE(ARM_SMCCC_QUIRK_ID_OFFS, offsetof(struct arm_smccc_quirk, id));
DEFINE(ARM_SMCCC_QUIRK_STATE_OFFS, offsetof(struct arm_smccc_quirk, state));
+ DEFINE(ARM_SMCCC_1_2_REGS_X0_OFFS, offsetof(struct arm_smccc_1_2_regs, a0));
+ DEFINE(ARM_SMCCC_1_2_REGS_X2_OFFS, offsetof(struct arm_smccc_1_2_regs, a2));
+ DEFINE(ARM_SMCCC_1_2_REGS_X4_OFFS, offsetof(struct arm_smccc_1_2_regs, a4));
+ DEFINE(ARM_SMCCC_1_2_REGS_X6_OFFS, offsetof(struct arm_smccc_1_2_regs, a6));
+ DEFINE(ARM_SMCCC_1_2_REGS_X8_OFFS, offsetof(struct arm_smccc_1_2_regs, a8));
+ DEFINE(ARM_SMCCC_1_2_REGS_X10_OFFS, offsetof(struct arm_smccc_1_2_regs, a10));
+ DEFINE(ARM_SMCCC_1_2_REGS_X12_OFFS, offsetof(struct arm_smccc_1_2_regs, a12));
+ DEFINE(ARM_SMCCC_1_2_REGS_X14_OFFS, offsetof(struct arm_smccc_1_2_regs, a14));
+ DEFINE(ARM_SMCCC_1_2_REGS_X16_OFFS, offsetof(struct arm_smccc_1_2_regs, a16));
BLANK();
DEFINE(HIBERN_PBE_ORIG, offsetof(struct pbe, orig_address));
DEFINE(HIBERN_PBE_ADDR, offsetof(struct pbe, address));
@@ -128,5 +179,41 @@ int main(void)
DEFINE(SDEI_EVENT_INTREGS, offsetof(struct sdei_registered_event, interrupted_regs));
DEFINE(SDEI_EVENT_PRIORITY, offsetof(struct sdei_registered_event, priority));
#endif
+#ifdef CONFIG_ARM64_PTR_AUTH
+ DEFINE(PTRAUTH_USER_KEY_APIA, offsetof(struct ptrauth_keys_user, apia));
+#ifdef CONFIG_ARM64_PTR_AUTH_KERNEL
+ DEFINE(PTRAUTH_KERNEL_KEY_APIA, offsetof(struct ptrauth_keys_kernel, apia));
+#endif
+ BLANK();
+#endif
+#ifdef CONFIG_KEXEC_CORE
+ DEFINE(KIMAGE_ARCH_DTB_MEM, offsetof(struct kimage, arch.dtb_mem));
+ DEFINE(KIMAGE_ARCH_EL2_VECTORS, offsetof(struct kimage, arch.el2_vectors));
+ DEFINE(KIMAGE_ARCH_ZERO_PAGE, offsetof(struct kimage, arch.zero_page));
+ DEFINE(KIMAGE_ARCH_PHYS_OFFSET, offsetof(struct kimage, arch.phys_offset));
+ DEFINE(KIMAGE_ARCH_TTBR1, offsetof(struct kimage, arch.ttbr1));
+ DEFINE(KIMAGE_HEAD, offsetof(struct kimage, head));
+ DEFINE(KIMAGE_START, offsetof(struct kimage, start));
+ BLANK();
+#endif
+#ifdef CONFIG_FUNCTION_TRACER
+ DEFINE(FTRACE_OPS_FUNC, offsetof(struct ftrace_ops, func));
+#endif
+ BLANK();
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+ DEFINE(FGRET_REGS_X0, offsetof(struct fgraph_ret_regs, regs[0]));
+ DEFINE(FGRET_REGS_X1, offsetof(struct fgraph_ret_regs, regs[1]));
+ DEFINE(FGRET_REGS_X2, offsetof(struct fgraph_ret_regs, regs[2]));
+ DEFINE(FGRET_REGS_X3, offsetof(struct fgraph_ret_regs, regs[3]));
+ DEFINE(FGRET_REGS_X4, offsetof(struct fgraph_ret_regs, regs[4]));
+ DEFINE(FGRET_REGS_X5, offsetof(struct fgraph_ret_regs, regs[5]));
+ DEFINE(FGRET_REGS_X6, offsetof(struct fgraph_ret_regs, regs[6]));
+ DEFINE(FGRET_REGS_X7, offsetof(struct fgraph_ret_regs, regs[7]));
+ DEFINE(FGRET_REGS_FP, offsetof(struct fgraph_ret_regs, fp));
+ DEFINE(FGRET_REGS_SIZE, sizeof(struct fgraph_ret_regs));
+#endif
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
+ DEFINE(FTRACE_OPS_DIRECT_CALL, offsetof(struct ftrace_ops, direct_call));
+#endif
return 0;
}
diff --git a/arch/arm64/kernel/cacheinfo.c b/arch/arm64/kernel/cacheinfo.c
index 7fa6828bb488..d9c9218fa1fd 100644
--- a/arch/arm64/kernel/cacheinfo.c
+++ b/arch/arm64/kernel/cacheinfo.c
@@ -11,11 +11,6 @@
#include <linux/of.h>
#define MAX_CACHE_LEVEL 7 /* Max 7 level supported */
-/* Ctypen, bits[3(n - 1) + 2 : 3(n - 1)], for n = 1 to 7 */
-#define CLIDR_CTYPE_SHIFT(level) (3 * (level - 1))
-#define CLIDR_CTYPE_MASK(level) (7 << CLIDR_CTYPE_SHIFT(level))
-#define CLIDR_CTYPE(clidr, level) \
- (((clidr) & CLIDR_CTYPE_MASK(level)) >> CLIDR_CTYPE_SHIFT(level))
int cache_line_size(void)
{
@@ -43,10 +38,9 @@ static void ci_leaf_init(struct cacheinfo *this_leaf,
this_leaf->type = type;
}
-static int __init_cache_level(unsigned int cpu)
+static void detect_cache_level(unsigned int *level_p, unsigned int *leaves_p)
{
- unsigned int ctype, level, leaves, fw_level;
- struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
+ unsigned int ctype, level, leaves;
for (level = 1, leaves = 0; level <= MAX_CACHE_LEVEL; level++) {
ctype = get_cache_type(level);
@@ -58,10 +52,34 @@ static int __init_cache_level(unsigned int cpu)
leaves += (ctype == CACHE_TYPE_SEPARATE) ? 2 : 1;
}
- if (acpi_disabled)
+ *level_p = level;
+ *leaves_p = leaves;
+}
+
+int early_cache_level(unsigned int cpu)
+{
+ struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
+
+ detect_cache_level(&this_cpu_ci->num_levels, &this_cpu_ci->num_leaves);
+
+ return 0;
+}
+
+int init_cache_level(unsigned int cpu)
+{
+ unsigned int level, leaves;
+ int fw_level, ret;
+ struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
+
+ detect_cache_level(&level, &leaves);
+
+ if (acpi_disabled) {
fw_level = of_find_last_cache_level(cpu);
- else
- fw_level = acpi_find_last_cache_level(cpu);
+ } else {
+ ret = acpi_get_cache_info(cpu, &fw_level, NULL);
+ if (ret < 0)
+ fw_level = 0;
+ }
if (level < fw_level) {
/*
@@ -78,7 +96,7 @@ static int __init_cache_level(unsigned int cpu)
return 0;
}
-static int __populate_cache_leaves(unsigned int cpu)
+int populate_cache_leaves(unsigned int cpu)
{
unsigned int level, idx;
enum cache_type type;
@@ -97,6 +115,3 @@ static int __populate_cache_leaves(unsigned int cpu)
}
return 0;
}
-
-DEFINE_SMP_CALL_CACHE_FUNCTION(init_cache_level)
-DEFINE_SMP_CALL_CACHE_FUNCTION(populate_cache_leaves)
diff --git a/arch/arm64/kernel/compat_alignment.c b/arch/arm64/kernel/compat_alignment.c
new file mode 100644
index 000000000000..deff21bfa680
--- /dev/null
+++ b/arch/arm64/kernel/compat_alignment.c
@@ -0,0 +1,383 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// based on arch/arm/mm/alignment.c
+
+#include <linux/compiler.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/perf_event.h>
+#include <linux/uaccess.h>
+
+#include <asm/exception.h>
+#include <asm/ptrace.h>
+#include <asm/traps.h>
+
+/*
+ * 32-bit misaligned trap handler (c) 1998 San Mehat (CCC) -July 1998
+ *
+ * Speed optimisations and better fault handling by Russell King.
+ */
+#define CODING_BITS(i) (i & 0x0e000000)
+
+#define LDST_P_BIT(i) (i & (1 << 24)) /* Preindex */
+#define LDST_U_BIT(i) (i & (1 << 23)) /* Add offset */
+#define LDST_W_BIT(i) (i & (1 << 21)) /* Writeback */
+#define LDST_L_BIT(i) (i & (1 << 20)) /* Load */
+
+#define LDST_P_EQ_U(i) ((((i) ^ ((i) >> 1)) & (1 << 23)) == 0)
+
+#define LDSTHD_I_BIT(i) (i & (1 << 22)) /* double/half-word immed */
+
+#define RN_BITS(i) ((i >> 16) & 15) /* Rn */
+#define RD_BITS(i) ((i >> 12) & 15) /* Rd */
+#define RM_BITS(i) (i & 15) /* Rm */
+
+#define REGMASK_BITS(i) (i & 0xffff)
+
+#define BAD_INSTR 0xdeadc0de
+
+/* Thumb-2 32 bit format per ARMv7 DDI0406A A6.3, either f800h,e800h,f800h */
+#define IS_T32(hi16) \
+ (((hi16) & 0xe000) == 0xe000 && ((hi16) & 0x1800))
+
+union offset_union {
+ unsigned long un;
+ signed long sn;
+};
+
+#define TYPE_ERROR 0
+#define TYPE_FAULT 1
+#define TYPE_LDST 2
+#define TYPE_DONE 3
+
+static void
+do_alignment_finish_ldst(unsigned long addr, u32 instr, struct pt_regs *regs,
+ union offset_union offset)
+{
+ if (!LDST_U_BIT(instr))
+ offset.un = -offset.un;
+
+ if (!LDST_P_BIT(instr))
+ addr += offset.un;
+
+ if (!LDST_P_BIT(instr) || LDST_W_BIT(instr))
+ regs->regs[RN_BITS(instr)] = addr;
+}
+
+static int
+do_alignment_ldrdstrd(unsigned long addr, u32 instr, struct pt_regs *regs)
+{
+ unsigned int rd = RD_BITS(instr);
+ unsigned int rd2;
+ int load;
+
+ if ((instr & 0xfe000000) == 0xe8000000) {
+ /* ARMv7 Thumb-2 32-bit LDRD/STRD */
+ rd2 = (instr >> 8) & 0xf;
+ load = !!(LDST_L_BIT(instr));
+ } else if (((rd & 1) == 1) || (rd == 14)) {
+ return TYPE_ERROR;
+ } else {
+ load = ((instr & 0xf0) == 0xd0);
+ rd2 = rd + 1;
+ }
+
+ if (load) {
+ unsigned int val, val2;
+
+ if (get_user(val, (u32 __user *)addr) ||
+ get_user(val2, (u32 __user *)(addr + 4)))
+ return TYPE_FAULT;
+ regs->regs[rd] = val;
+ regs->regs[rd2] = val2;
+ } else {
+ if (put_user(regs->regs[rd], (u32 __user *)addr) ||
+ put_user(regs->regs[rd2], (u32 __user *)(addr + 4)))
+ return TYPE_FAULT;
+ }
+ return TYPE_LDST;
+}
+
+/*
+ * LDM/STM alignment handler.
+ *
+ * There are 4 variants of this instruction:
+ *
+ * B = rn pointer before instruction, A = rn pointer after instruction
+ * ------ increasing address ----->
+ * | | r0 | r1 | ... | rx | |
+ * PU = 01 B A
+ * PU = 11 B A
+ * PU = 00 A B
+ * PU = 10 A B
+ */
+static int
+do_alignment_ldmstm(unsigned long addr, u32 instr, struct pt_regs *regs)
+{
+ unsigned int rd, rn, nr_regs, regbits;
+ unsigned long eaddr, newaddr;
+ unsigned int val;
+
+ /* count the number of registers in the mask to be transferred */
+ nr_regs = hweight16(REGMASK_BITS(instr)) * 4;
+
+ rn = RN_BITS(instr);
+ newaddr = eaddr = regs->regs[rn];
+
+ if (!LDST_U_BIT(instr))
+ nr_regs = -nr_regs;
+ newaddr += nr_regs;
+ if (!LDST_U_BIT(instr))
+ eaddr = newaddr;
+
+ if (LDST_P_EQ_U(instr)) /* U = P */
+ eaddr += 4;
+
+ for (regbits = REGMASK_BITS(instr), rd = 0; regbits;
+ regbits >>= 1, rd += 1)
+ if (regbits & 1) {
+ if (LDST_L_BIT(instr)) {
+ if (get_user(val, (u32 __user *)eaddr))
+ return TYPE_FAULT;
+ if (rd < 15)
+ regs->regs[rd] = val;
+ else
+ regs->pc = val;
+ } else {
+ /*
+ * The PC register has a bias of +8 in ARM mode
+ * and +4 in Thumb mode. This means that a read
+ * of the value of PC should account for this.
+ * Since Thumb does not permit STM instructions
+ * to refer to PC, just add 8 here.
+ */
+ val = (rd < 15) ? regs->regs[rd] : regs->pc + 8;
+ if (put_user(val, (u32 __user *)eaddr))
+ return TYPE_FAULT;
+ }
+ eaddr += 4;
+ }
+
+ if (LDST_W_BIT(instr))
+ regs->regs[rn] = newaddr;
+
+ return TYPE_DONE;
+}
+
+/*
+ * Convert Thumb multi-word load/store instruction forms to equivalent ARM
+ * instructions so we can reuse ARM userland alignment fault fixups for Thumb.
+ *
+ * This implementation was initially based on the algorithm found in
+ * gdb/sim/arm/thumbemu.c. It is basically just a code reduction of same
+ * to convert only Thumb ld/st instruction forms to equivalent ARM forms.
+ *
+ * NOTES:
+ * 1. Comments below refer to ARM ARM DDI0100E Thumb Instruction sections.
+ * 2. If for some reason we're passed an non-ld/st Thumb instruction to
+ * decode, we return 0xdeadc0de. This should never happen under normal
+ * circumstances but if it does, we've got other problems to deal with
+ * elsewhere and we obviously can't fix those problems here.
+ */
+
+static unsigned long thumb2arm(u16 tinstr)
+{
+ u32 L = (tinstr & (1<<11)) >> 11;
+
+ switch ((tinstr & 0xf800) >> 11) {
+ /* 6.6.1 Format 1: */
+ case 0xc000 >> 11: /* 7.1.51 STMIA */
+ case 0xc800 >> 11: /* 7.1.25 LDMIA */
+ {
+ u32 Rn = (tinstr & (7<<8)) >> 8;
+ u32 W = ((L<<Rn) & (tinstr&255)) ? 0 : 1<<21;
+
+ return 0xe8800000 | W | (L<<20) | (Rn<<16) |
+ (tinstr&255);
+ }
+
+ /* 6.6.1 Format 2: */
+ case 0xb000 >> 11: /* 7.1.48 PUSH */
+ case 0xb800 >> 11: /* 7.1.47 POP */
+ if ((tinstr & (3 << 9)) == 0x0400) {
+ static const u32 subset[4] = {
+ 0xe92d0000, /* STMDB sp!,{registers} */
+ 0xe92d4000, /* STMDB sp!,{registers,lr} */
+ 0xe8bd0000, /* LDMIA sp!,{registers} */
+ 0xe8bd8000 /* LDMIA sp!,{registers,pc} */
+ };
+ return subset[(L<<1) | ((tinstr & (1<<8)) >> 8)] |
+ (tinstr & 255); /* register_list */
+ }
+ fallthrough; /* for illegal instruction case */
+
+ default:
+ return BAD_INSTR;
+ }
+}
+
+/*
+ * Convert Thumb-2 32 bit LDM, STM, LDRD, STRD to equivalent instruction
+ * handlable by ARM alignment handler, also find the corresponding handler,
+ * so that we can reuse ARM userland alignment fault fixups for Thumb.
+ *
+ * @pinstr: original Thumb-2 instruction; returns new handlable instruction
+ * @regs: register context.
+ * @poffset: return offset from faulted addr for later writeback
+ *
+ * NOTES:
+ * 1. Comments below refer to ARMv7 DDI0406A Thumb Instruction sections.
+ * 2. Register name Rt from ARMv7 is same as Rd from ARMv6 (Rd is Rt)
+ */
+static void *
+do_alignment_t32_to_handler(u32 *pinstr, struct pt_regs *regs,
+ union offset_union *poffset)
+{
+ u32 instr = *pinstr;
+ u16 tinst1 = (instr >> 16) & 0xffff;
+ u16 tinst2 = instr & 0xffff;
+
+ switch (tinst1 & 0xffe0) {
+ /* A6.3.5 Load/Store multiple */
+ case 0xe880: /* STM/STMIA/STMEA,LDM/LDMIA, PUSH/POP T2 */
+ case 0xe8a0: /* ...above writeback version */
+ case 0xe900: /* STMDB/STMFD, LDMDB/LDMEA */
+ case 0xe920: /* ...above writeback version */
+ /* no need offset decision since handler calculates it */
+ return do_alignment_ldmstm;
+
+ case 0xf840: /* POP/PUSH T3 (single register) */
+ if (RN_BITS(instr) == 13 && (tinst2 & 0x09ff) == 0x0904) {
+ u32 L = !!(LDST_L_BIT(instr));
+ const u32 subset[2] = {
+ 0xe92d0000, /* STMDB sp!,{registers} */
+ 0xe8bd0000, /* LDMIA sp!,{registers} */
+ };
+ *pinstr = subset[L] | (1<<RD_BITS(instr));
+ return do_alignment_ldmstm;
+ }
+ /* Else fall through for illegal instruction case */
+ break;
+
+ /* A6.3.6 Load/store double, STRD/LDRD(immed, lit, reg) */
+ case 0xe860:
+ case 0xe960:
+ case 0xe8e0:
+ case 0xe9e0:
+ poffset->un = (tinst2 & 0xff) << 2;
+ fallthrough;
+
+ case 0xe940:
+ case 0xe9c0:
+ return do_alignment_ldrdstrd;
+
+ /*
+ * No need to handle load/store instructions up to word size
+ * since ARMv6 and later CPUs can perform unaligned accesses.
+ */
+ default:
+ break;
+ }
+ return NULL;
+}
+
+static int alignment_get_arm(struct pt_regs *regs, __le32 __user *ip, u32 *inst)
+{
+ __le32 instr = 0;
+ int fault;
+
+ fault = get_user(instr, ip);
+ if (fault)
+ return fault;
+
+ *inst = __le32_to_cpu(instr);
+ return 0;
+}
+
+static int alignment_get_thumb(struct pt_regs *regs, __le16 __user *ip, u16 *inst)
+{
+ __le16 instr = 0;
+ int fault;
+
+ fault = get_user(instr, ip);
+ if (fault)
+ return fault;
+
+ *inst = __le16_to_cpu(instr);
+ return 0;
+}
+
+int do_compat_alignment_fixup(unsigned long addr, struct pt_regs *regs)
+{
+ union offset_union offset;
+ unsigned long instrptr;
+ int (*handler)(unsigned long addr, u32 instr, struct pt_regs *regs);
+ unsigned int type;
+ u32 instr = 0;
+ int isize = 4;
+ int thumb2_32b = 0;
+
+ instrptr = instruction_pointer(regs);
+
+ if (compat_thumb_mode(regs)) {
+ __le16 __user *ptr = (__le16 __user *)(instrptr & ~1);
+ u16 tinstr, tinst2;
+
+ if (alignment_get_thumb(regs, ptr, &tinstr))
+ return 1;
+
+ if (IS_T32(tinstr)) { /* Thumb-2 32-bit */
+ if (alignment_get_thumb(regs, ptr + 1, &tinst2))
+ return 1;
+ instr = ((u32)tinstr << 16) | tinst2;
+ thumb2_32b = 1;
+ } else {
+ isize = 2;
+ instr = thumb2arm(tinstr);
+ }
+ } else {
+ if (alignment_get_arm(regs, (__le32 __user *)instrptr, &instr))
+ return 1;
+ }
+
+ switch (CODING_BITS(instr)) {
+ case 0x00000000: /* 3.13.4 load/store instruction extensions */
+ if (LDSTHD_I_BIT(instr))
+ offset.un = (instr & 0xf00) >> 4 | (instr & 15);
+ else
+ offset.un = regs->regs[RM_BITS(instr)];
+
+ if ((instr & 0x001000f0) == 0x000000d0 || /* LDRD */
+ (instr & 0x001000f0) == 0x000000f0) /* STRD */
+ handler = do_alignment_ldrdstrd;
+ else
+ return 1;
+ break;
+
+ case 0x08000000: /* ldm or stm, or thumb-2 32bit instruction */
+ if (thumb2_32b) {
+ offset.un = 0;
+ handler = do_alignment_t32_to_handler(&instr, regs, &offset);
+ } else {
+ offset.un = 0;
+ handler = do_alignment_ldmstm;
+ }
+ break;
+
+ default:
+ return 1;
+ }
+
+ type = handler(addr, instr, regs);
+
+ if (type == TYPE_ERROR || type == TYPE_FAULT)
+ return 1;
+
+ if (type == TYPE_LDST)
+ do_alignment_finish_ldst(addr, instr, regs, offset);
+
+ perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, regs->pc);
+ arm64_skip_faulting_instruction(regs, isize);
+
+ return 0;
+}
diff --git a/arch/arm64/kernel/cpu-reset.S b/arch/arm64/kernel/cpu-reset.S
index 6ea337d464c4..c87445dde674 100644
--- a/arch/arm64/kernel/cpu-reset.S
+++ b/arch/arm64/kernel/cpu-reset.S
@@ -8,16 +8,16 @@
*/
#include <linux/linkage.h>
+#include <linux/cfi_types.h>
#include <asm/assembler.h>
#include <asm/sysreg.h>
#include <asm/virt.h>
.text
-.pushsection .idmap.text, "awx"
+.pushsection .idmap.text, "a"
/*
- * __cpu_soft_restart(el2_switch, entry, arg0, arg1, arg2) - Helper for
- * cpu_soft_restart.
+ * cpu_soft_restart(el2_switch, entry, arg0, arg1, arg2)
*
* @el2_switch: Flag to indicate a switch to EL2 is needed.
* @entry: Location to jump to for soft reset.
@@ -29,12 +29,13 @@
* branch to what would be the reset vector. It must be executed with the
* flat identity mapping.
*/
-ENTRY(__cpu_soft_restart)
- /* Clear sctlr_el1 flags. */
- mrs x12, sctlr_el1
- ldr x13, =SCTLR_ELx_FLAGS
- bic x12, x12, x13
+SYM_TYPED_FUNC_START(cpu_soft_restart)
+ mov_q x12, INIT_SCTLR_EL1_MMU_OFF
pre_disable_mmu_workaround
+ /*
+ * either disable EL1&0 translation regime or disable EL2&0 translation
+ * regime if HCR_EL2.E2H == 1
+ */
msr sctlr_el1, x12
isb
@@ -42,11 +43,11 @@ ENTRY(__cpu_soft_restart)
mov x0, #HVC_SOFT_RESTART
hvc #0 // no return
-1: mov x18, x1 // entry
+1: mov x8, x1 // entry
mov x0, x2 // arg0
mov x1, x3 // arg1
mov x2, x4 // arg2
- br x18
-ENDPROC(__cpu_soft_restart)
+ br x8
+SYM_FUNC_END(cpu_soft_restart)
.popsection
diff --git a/arch/arm64/kernel/cpu-reset.h b/arch/arm64/kernel/cpu-reset.h
deleted file mode 100644
index ed50e9587ad8..000000000000
--- a/arch/arm64/kernel/cpu-reset.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * CPU reset routines
- *
- * Copyright (C) 2015 Huawei Futurewei Technologies.
- */
-
-#ifndef _ARM64_CPU_RESET_H
-#define _ARM64_CPU_RESET_H
-
-#include <asm/virt.h>
-
-void __cpu_soft_restart(unsigned long el2_switch, unsigned long entry,
- unsigned long arg0, unsigned long arg1, unsigned long arg2);
-
-static inline void __noreturn cpu_soft_restart(unsigned long entry,
- unsigned long arg0,
- unsigned long arg1,
- unsigned long arg2)
-{
- typeof(__cpu_soft_restart) *restart;
-
- unsigned long el2_switch = !is_kernel_in_hyp_mode() &&
- is_hyp_mode_available();
- restart = (void *)__pa_symbol(__cpu_soft_restart);
-
- cpu_install_idmap();
- restart(el2_switch, entry, arg0, arg1, arg2);
- unreachable();
-}
-
-#endif
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index 6a09ca7644ea..967c7c7a4e7d 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -11,6 +11,7 @@
#include <asm/cpu.h>
#include <asm/cputype.h>
#include <asm/cpufeature.h>
+#include <asm/kvm_asm.h>
#include <asm/smp_plat.h>
static bool __maybe_unused
@@ -105,379 +106,12 @@ cpu_enable_trap_ctr_access(const struct arm64_cpu_capabilities *cap)
sysreg_clear_set(sctlr_el1, SCTLR_EL1_UCT, 0);
}
-atomic_t arm64_el2_vector_last_slot = ATOMIC_INIT(-1);
-
-#include <asm/mmu_context.h>
-#include <asm/cacheflush.h>
-
-DEFINE_PER_CPU_READ_MOSTLY(struct bp_hardening_data, bp_hardening_data);
-
-#ifdef CONFIG_KVM_INDIRECT_VECTORS
-extern char __smccc_workaround_1_smc_start[];
-extern char __smccc_workaround_1_smc_end[];
-
-static void __copy_hyp_vect_bpi(int slot, const char *hyp_vecs_start,
- const char *hyp_vecs_end)
-{
- void *dst = lm_alias(__bp_harden_hyp_vecs_start + slot * SZ_2K);
- int i;
-
- for (i = 0; i < SZ_2K; i += 0x80)
- memcpy(dst + i, hyp_vecs_start, hyp_vecs_end - hyp_vecs_start);
-
- __flush_icache_range((uintptr_t)dst, (uintptr_t)dst + SZ_2K);
-}
-
-static void install_bp_hardening_cb(bp_hardening_cb_t fn,
- const char *hyp_vecs_start,
- const char *hyp_vecs_end)
-{
- static DEFINE_RAW_SPINLOCK(bp_lock);
- int cpu, slot = -1;
-
- /*
- * detect_harden_bp_fw() passes NULL for the hyp_vecs start/end if
- * we're a guest. Skip the hyp-vectors work.
- */
- if (!hyp_vecs_start) {
- __this_cpu_write(bp_hardening_data.fn, fn);
- return;
- }
-
- raw_spin_lock(&bp_lock);
- for_each_possible_cpu(cpu) {
- if (per_cpu(bp_hardening_data.fn, cpu) == fn) {
- slot = per_cpu(bp_hardening_data.hyp_vectors_slot, cpu);
- break;
- }
- }
-
- if (slot == -1) {
- slot = atomic_inc_return(&arm64_el2_vector_last_slot);
- BUG_ON(slot >= BP_HARDEN_EL2_SLOTS);
- __copy_hyp_vect_bpi(slot, hyp_vecs_start, hyp_vecs_end);
- }
-
- __this_cpu_write(bp_hardening_data.hyp_vectors_slot, slot);
- __this_cpu_write(bp_hardening_data.fn, fn);
- raw_spin_unlock(&bp_lock);
-}
-#else
-#define __smccc_workaround_1_smc_start NULL
-#define __smccc_workaround_1_smc_end NULL
-
-static void install_bp_hardening_cb(bp_hardening_cb_t fn,
- const char *hyp_vecs_start,
- const char *hyp_vecs_end)
-{
- __this_cpu_write(bp_hardening_data.fn, fn);
-}
-#endif /* CONFIG_KVM_INDIRECT_VECTORS */
-
-#include <linux/arm-smccc.h>
-
-static void call_smc_arch_workaround_1(void)
-{
- arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_1, NULL);
-}
-
-static void call_hvc_arch_workaround_1(void)
-{
- arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_WORKAROUND_1, NULL);
-}
-
-static void qcom_link_stack_sanitization(void)
-{
- u64 tmp;
-
- asm volatile("mov %0, x30 \n"
- ".rept 16 \n"
- "bl . + 4 \n"
- ".endr \n"
- "mov x30, %0 \n"
- : "=&r" (tmp));
-}
-
-static bool __nospectre_v2;
-static int __init parse_nospectre_v2(char *str)
-{
- __nospectre_v2 = true;
- return 0;
-}
-early_param("nospectre_v2", parse_nospectre_v2);
-
-/*
- * -1: No workaround
- * 0: No workaround required
- * 1: Workaround installed
- */
-static int detect_harden_bp_fw(void)
-{
- bp_hardening_cb_t cb;
- void *smccc_start, *smccc_end;
- struct arm_smccc_res res;
- u32 midr = read_cpuid_id();
-
- arm_smccc_1_1_invoke(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
- ARM_SMCCC_ARCH_WORKAROUND_1, &res);
-
- switch ((int)res.a0) {
- case 1:
- /* Firmware says we're just fine */
- return 0;
- case 0:
- break;
- default:
- return -1;
- }
-
- switch (arm_smccc_1_1_get_conduit()) {
- case SMCCC_CONDUIT_HVC:
- cb = call_hvc_arch_workaround_1;
- /* This is a guest, no need to patch KVM vectors */
- smccc_start = NULL;
- smccc_end = NULL;
- break;
-
- case SMCCC_CONDUIT_SMC:
- cb = call_smc_arch_workaround_1;
- smccc_start = __smccc_workaround_1_smc_start;
- smccc_end = __smccc_workaround_1_smc_end;
- break;
-
- default:
- return -1;
- }
-
- if (((midr & MIDR_CPU_MODEL_MASK) == MIDR_QCOM_FALKOR) ||
- ((midr & MIDR_CPU_MODEL_MASK) == MIDR_QCOM_FALKOR_V1))
- cb = qcom_link_stack_sanitization;
-
- if (IS_ENABLED(CONFIG_HARDEN_BRANCH_PREDICTOR))
- install_bp_hardening_cb(cb, smccc_start, smccc_end);
-
- return 1;
-}
-
-DEFINE_PER_CPU_READ_MOSTLY(u64, arm64_ssbd_callback_required);
-
-int ssbd_state __read_mostly = ARM64_SSBD_KERNEL;
-static bool __ssb_safe = true;
-
-static const struct ssbd_options {
- const char *str;
- int state;
-} ssbd_options[] = {
- { "force-on", ARM64_SSBD_FORCE_ENABLE, },
- { "force-off", ARM64_SSBD_FORCE_DISABLE, },
- { "kernel", ARM64_SSBD_KERNEL, },
-};
-
-static int __init ssbd_cfg(char *buf)
-{
- int i;
-
- if (!buf || !buf[0])
- return -EINVAL;
-
- for (i = 0; i < ARRAY_SIZE(ssbd_options); i++) {
- int len = strlen(ssbd_options[i].str);
-
- if (strncmp(buf, ssbd_options[i].str, len))
- continue;
-
- ssbd_state = ssbd_options[i].state;
- return 0;
- }
-
- return -EINVAL;
-}
-early_param("ssbd", ssbd_cfg);
-
-void __init arm64_update_smccc_conduit(struct alt_instr *alt,
- __le32 *origptr, __le32 *updptr,
- int nr_inst)
-{
- u32 insn;
-
- BUG_ON(nr_inst != 1);
-
- switch (arm_smccc_1_1_get_conduit()) {
- case SMCCC_CONDUIT_HVC:
- insn = aarch64_insn_get_hvc_value();
- break;
- case SMCCC_CONDUIT_SMC:
- insn = aarch64_insn_get_smc_value();
- break;
- default:
- return;
- }
-
- *updptr = cpu_to_le32(insn);
-}
-
-void __init arm64_enable_wa2_handling(struct alt_instr *alt,
- __le32 *origptr, __le32 *updptr,
- int nr_inst)
-{
- BUG_ON(nr_inst != 1);
- /*
- * Only allow mitigation on EL1 entry/exit and guest
- * ARCH_WORKAROUND_2 handling if the SSBD state allows it to
- * be flipped.
- */
- if (arm64_get_ssbd_state() == ARM64_SSBD_KERNEL)
- *updptr = cpu_to_le32(aarch64_insn_gen_nop());
-}
-
-void arm64_set_ssbd_mitigation(bool state)
-{
- int conduit;
-
- if (!IS_ENABLED(CONFIG_ARM64_SSBD)) {
- pr_info_once("SSBD disabled by kernel configuration\n");
- return;
- }
-
- if (this_cpu_has_cap(ARM64_SSBS)) {
- if (state)
- asm volatile(SET_PSTATE_SSBS(0));
- else
- asm volatile(SET_PSTATE_SSBS(1));
- return;
- }
-
- conduit = arm_smccc_1_1_invoke(ARM_SMCCC_ARCH_WORKAROUND_2, state,
- NULL);
-
- WARN_ON_ONCE(conduit == SMCCC_CONDUIT_NONE);
-}
-
-static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry,
- int scope)
-{
- struct arm_smccc_res res;
- bool required = true;
- s32 val;
- bool this_cpu_safe = false;
- int conduit;
-
- WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible());
-
- if (cpu_mitigations_off())
- ssbd_state = ARM64_SSBD_FORCE_DISABLE;
-
- /* delay setting __ssb_safe until we get a firmware response */
- if (is_midr_in_range_list(read_cpuid_id(), entry->midr_range_list))
- this_cpu_safe = true;
-
- if (this_cpu_has_cap(ARM64_SSBS)) {
- if (!this_cpu_safe)
- __ssb_safe = false;
- required = false;
- goto out_printmsg;
- }
-
- conduit = arm_smccc_1_1_invoke(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
- ARM_SMCCC_ARCH_WORKAROUND_2, &res);
-
- if (conduit == SMCCC_CONDUIT_NONE) {
- ssbd_state = ARM64_SSBD_UNKNOWN;
- if (!this_cpu_safe)
- __ssb_safe = false;
- return false;
- }
-
- val = (s32)res.a0;
-
- switch (val) {
- case SMCCC_RET_NOT_SUPPORTED:
- ssbd_state = ARM64_SSBD_UNKNOWN;
- if (!this_cpu_safe)
- __ssb_safe = false;
- return false;
-
- /* machines with mixed mitigation requirements must not return this */
- case SMCCC_RET_NOT_REQUIRED:
- pr_info_once("%s mitigation not required\n", entry->desc);
- ssbd_state = ARM64_SSBD_MITIGATED;
- return false;
-
- case SMCCC_RET_SUCCESS:
- __ssb_safe = false;
- required = true;
- break;
-
- case 1: /* Mitigation not required on this CPU */
- required = false;
- break;
-
- default:
- WARN_ON(1);
- if (!this_cpu_safe)
- __ssb_safe = false;
- return false;
- }
-
- switch (ssbd_state) {
- case ARM64_SSBD_FORCE_DISABLE:
- arm64_set_ssbd_mitigation(false);
- required = false;
- break;
-
- case ARM64_SSBD_KERNEL:
- if (required) {
- __this_cpu_write(arm64_ssbd_callback_required, 1);
- arm64_set_ssbd_mitigation(true);
- }
- break;
-
- case ARM64_SSBD_FORCE_ENABLE:
- arm64_set_ssbd_mitigation(true);
- required = true;
- break;
-
- default:
- WARN_ON(1);
- break;
- }
-
-out_printmsg:
- switch (ssbd_state) {
- case ARM64_SSBD_FORCE_DISABLE:
- pr_info_once("%s disabled from command-line\n", entry->desc);
- break;
-
- case ARM64_SSBD_FORCE_ENABLE:
- pr_info_once("%s forced from command-line\n", entry->desc);
- break;
- }
-
- return required;
-}
-
-/* known invulnerable cores */
-static const struct midr_range arm64_ssb_cpus[] = {
- MIDR_ALL_VERSIONS(MIDR_CORTEX_A35),
- MIDR_ALL_VERSIONS(MIDR_CORTEX_A53),
- MIDR_ALL_VERSIONS(MIDR_CORTEX_A55),
- MIDR_ALL_VERSIONS(MIDR_BRAHMA_B53),
- {},
-};
-
#ifdef CONFIG_ARM64_ERRATUM_1463225
-DEFINE_PER_CPU(int, __in_cortex_a76_erratum_1463225_wa);
-
static bool
has_cortex_a76_erratum_1463225(const struct arm64_cpu_capabilities *entry,
int scope)
{
- u32 midr = read_cpuid_id();
- /* Cortex-A76 r0p0 - r3p1 */
- struct midr_range range = MIDR_RANGE(MIDR_CORTEX_A76, 0, 0, 3, 1);
-
- WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible());
- return is_midr_in_range(midr, &range) && is_kernel_in_hyp_mode();
+ return is_affected_midr_range_list(entry, scope) && is_kernel_in_hyp_mode();
}
#endif
@@ -524,80 +158,6 @@ cpu_enable_cache_maint_trap(const struct arm64_cpu_capabilities *__unused)
.type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, \
CAP_MIDR_RANGE_LIST(midr_list)
-/* Track overall mitigation state. We are only mitigated if all cores are ok */
-static bool __hardenbp_enab = true;
-static bool __spectrev2_safe = true;
-
-int get_spectre_v2_workaround_state(void)
-{
- if (__spectrev2_safe)
- return ARM64_BP_HARDEN_NOT_REQUIRED;
-
- if (!__hardenbp_enab)
- return ARM64_BP_HARDEN_UNKNOWN;
-
- return ARM64_BP_HARDEN_WA_NEEDED;
-}
-
-/*
- * List of CPUs that do not need any Spectre-v2 mitigation at all.
- */
-static const struct midr_range spectre_v2_safe_list[] = {
- MIDR_ALL_VERSIONS(MIDR_CORTEX_A35),
- MIDR_ALL_VERSIONS(MIDR_CORTEX_A53),
- MIDR_ALL_VERSIONS(MIDR_CORTEX_A55),
- MIDR_ALL_VERSIONS(MIDR_BRAHMA_B53),
- { /* sentinel */ }
-};
-
-/*
- * Track overall bp hardening for all heterogeneous cores in the machine.
- * We are only considered "safe" if all booted cores are known safe.
- */
-static bool __maybe_unused
-check_branch_predictor(const struct arm64_cpu_capabilities *entry, int scope)
-{
- int need_wa;
-
- WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible());
-
- /* If the CPU has CSV2 set, we're safe */
- if (cpuid_feature_extract_unsigned_field(read_cpuid(ID_AA64PFR0_EL1),
- ID_AA64PFR0_CSV2_SHIFT))
- return false;
-
- /* Alternatively, we have a list of unaffected CPUs */
- if (is_midr_in_range_list(read_cpuid_id(), spectre_v2_safe_list))
- return false;
-
- /* Fallback to firmware detection */
- need_wa = detect_harden_bp_fw();
- if (!need_wa)
- return false;
-
- __spectrev2_safe = false;
-
- if (!IS_ENABLED(CONFIG_HARDEN_BRANCH_PREDICTOR)) {
- pr_warn_once("spectrev2 mitigation disabled by kernel configuration\n");
- __hardenbp_enab = false;
- return false;
- }
-
- /* forced off */
- if (__nospectre_v2 || cpu_mitigations_off()) {
- pr_info_once("spectrev2 mitigation disabled by command line option\n");
- __hardenbp_enab = false;
- return false;
- }
-
- if (need_wa < 0) {
- pr_warn_once("ARM_SMCCC_ARCH_WORKAROUND_1 missing from firmware\n");
- __hardenbp_enab = false;
- }
-
- return (need_wa > 0);
-}
-
static const __maybe_unused struct midr_range tx2_family_cpus[] = {
MIDR_ALL_VERSIONS(MIDR_BRCM_VULCAN),
MIDR_ALL_VERSIONS(MIDR_CAVIUM_THUNDERX2),
@@ -627,23 +187,13 @@ has_neoverse_n1_erratum_1542419(const struct arm64_cpu_capabilities *entry,
int scope)
{
u32 midr = read_cpuid_id();
- bool has_dic = read_cpuid_cachetype() & BIT(CTR_DIC_SHIFT);
+ bool has_dic = read_cpuid_cachetype() & BIT(CTR_EL0_DIC_SHIFT);
const struct midr_range range = MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N1);
WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible());
return is_midr_in_range(midr, &range) && has_dic;
}
-#if defined(CONFIG_HARDEN_EL2_VECTORS) || defined(CONFIG_ARM64_ERRATUM_1319367)
-
-static const struct midr_range ca57_a72[] = {
- MIDR_ALL_VERSIONS(MIDR_CORTEX_A57),
- MIDR_ALL_VERSIONS(MIDR_CORTEX_A72),
- {},
-};
-
-#endif
-
#ifdef CONFIG_ARM64_WORKAROUND_REPEAT_TLBI
static const struct arm64_cpu_capabilities arm64_repeat_tlbi_list[] = {
#ifdef CONFIG_QCOM_FALKOR_ERRATUM_1009
@@ -659,11 +209,41 @@ static const struct arm64_cpu_capabilities arm64_repeat_tlbi_list[] = {
{
ERRATA_MIDR_RANGE(MIDR_CORTEX_A76, 0, 0, 3, 0),
},
+ {
+ /* Kryo4xx Gold (rcpe to rfpe) => (r0p0 to r3p0) */
+ ERRATA_MIDR_RANGE(MIDR_QCOM_KRYO_4XX_GOLD, 0xc, 0xe, 0xf, 0xe),
+ },
+#endif
+#ifdef CONFIG_ARM64_ERRATUM_2441007
+ {
+ ERRATA_MIDR_ALL_VERSIONS(MIDR_CORTEX_A55),
+ },
+#endif
+#ifdef CONFIG_ARM64_ERRATUM_2441009
+ {
+ /* Cortex-A510 r0p0 -> r1p1. Fixed in r1p2 */
+ ERRATA_MIDR_RANGE(MIDR_CORTEX_A510, 0, 0, 1, 1),
+ },
#endif
{},
};
#endif
+#ifdef CONFIG_CAVIUM_ERRATUM_23154
+static const struct midr_range cavium_erratum_23154_cpus[] = {
+ MIDR_ALL_VERSIONS(MIDR_THUNDERX),
+ MIDR_ALL_VERSIONS(MIDR_THUNDERX_81XX),
+ MIDR_ALL_VERSIONS(MIDR_THUNDERX_83XX),
+ MIDR_ALL_VERSIONS(MIDR_OCTX2_98XX),
+ MIDR_ALL_VERSIONS(MIDR_OCTX2_96XX),
+ MIDR_ALL_VERSIONS(MIDR_OCTX2_95XX),
+ MIDR_ALL_VERSIONS(MIDR_OCTX2_95XXN),
+ MIDR_ALL_VERSIONS(MIDR_OCTX2_95XXMM),
+ MIDR_ALL_VERSIONS(MIDR_OCTX2_95XXO),
+ {},
+};
+#endif
+
#ifdef CONFIG_CAVIUM_ERRATUM_27456
const struct midr_range cavium_erratum_27456_cpus[] = {
/* Cavium ThunderX, T88 pass 1.x - 2.1 */
@@ -725,6 +305,8 @@ static const struct midr_range erratum_1418040_list[] = {
MIDR_RANGE(MIDR_CORTEX_A76, 0, 0, 3, 1),
/* Neoverse-N1 r0p0 to r3p1 */
MIDR_RANGE(MIDR_NEOVERSE_N1, 0, 0, 3, 1),
+ /* Kryo4xx Gold (rcpe to rfpf) => (r0p0 to r3p1) */
+ MIDR_RANGE(MIDR_QCOM_KRYO_4XX_GOLD, 0xc, 0xe, 0xf, 0xf),
{},
};
#endif
@@ -735,6 +317,8 @@ static const struct midr_range erratum_845719_list[] = {
MIDR_REV_RANGE(MIDR_CORTEX_A53, 0, 0, 4),
/* Brahma-B53 r0p[0] */
MIDR_REV(MIDR_BRAHMA_B53, 0, 0),
+ /* Kryo2XX Silver rAp4 */
+ MIDR_REV(MIDR_QCOM_KRYO_2XX_SILVER, 0xa, 0x4),
{},
};
#endif
@@ -756,10 +340,99 @@ static const struct arm64_cpu_capabilities erratum_843419_list[] = {
};
#endif
+#ifdef CONFIG_ARM64_WORKAROUND_SPECULATIVE_AT
+static const struct midr_range erratum_speculative_at_list[] = {
+#ifdef CONFIG_ARM64_ERRATUM_1165522
+ /* Cortex A76 r0p0 to r2p0 */
+ MIDR_RANGE(MIDR_CORTEX_A76, 0, 0, 2, 0),
+#endif
+#ifdef CONFIG_ARM64_ERRATUM_1319367
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A57),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A72),
+#endif
+#ifdef CONFIG_ARM64_ERRATUM_1530923
+ /* Cortex A55 r0p0 to r2p0 */
+ MIDR_RANGE(MIDR_CORTEX_A55, 0, 0, 2, 0),
+ /* Kryo4xx Silver (rdpe => r1p0) */
+ MIDR_REV(MIDR_QCOM_KRYO_4XX_SILVER, 0xd, 0xe),
+#endif
+ {},
+};
+#endif
+
+#ifdef CONFIG_ARM64_ERRATUM_1463225
+static const struct midr_range erratum_1463225[] = {
+ /* Cortex-A76 r0p0 - r3p1 */
+ MIDR_RANGE(MIDR_CORTEX_A76, 0, 0, 3, 1),
+ /* Kryo4xx Gold (rcpe to rfpf) => (r0p0 to r3p1) */
+ MIDR_RANGE(MIDR_QCOM_KRYO_4XX_GOLD, 0xc, 0xe, 0xf, 0xf),
+ {},
+};
+#endif
+
+#ifdef CONFIG_ARM64_WORKAROUND_TRBE_OVERWRITE_FILL_MODE
+static const struct midr_range trbe_overwrite_fill_mode_cpus[] = {
+#ifdef CONFIG_ARM64_ERRATUM_2139208
+ MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2),
+#endif
+#ifdef CONFIG_ARM64_ERRATUM_2119858
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A710),
+ MIDR_RANGE(MIDR_CORTEX_X2, 0, 0, 2, 0),
+#endif
+ {},
+};
+#endif /* CONFIG_ARM64_WORKAROUND_TRBE_OVERWRITE_FILL_MODE */
+
+#ifdef CONFIG_ARM64_WORKAROUND_TSB_FLUSH_FAILURE
+static const struct midr_range tsb_flush_fail_cpus[] = {
+#ifdef CONFIG_ARM64_ERRATUM_2067961
+ MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2),
+#endif
+#ifdef CONFIG_ARM64_ERRATUM_2054223
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A710),
+#endif
+ {},
+};
+#endif /* CONFIG_ARM64_WORKAROUND_TSB_FLUSH_FAILURE */
+
+#ifdef CONFIG_ARM64_WORKAROUND_TRBE_WRITE_OUT_OF_RANGE
+static struct midr_range trbe_write_out_of_range_cpus[] = {
+#ifdef CONFIG_ARM64_ERRATUM_2253138
+ MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2),
+#endif
+#ifdef CONFIG_ARM64_ERRATUM_2224489
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A710),
+ MIDR_RANGE(MIDR_CORTEX_X2, 0, 0, 2, 0),
+#endif
+ {},
+};
+#endif /* CONFIG_ARM64_WORKAROUND_TRBE_WRITE_OUT_OF_RANGE */
+
+#ifdef CONFIG_ARM64_ERRATUM_1742098
+static struct midr_range broken_aarch32_aes[] = {
+ MIDR_RANGE(MIDR_CORTEX_A57, 0, 1, 0xf, 0xf),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A72),
+ {},
+};
+#endif /* CONFIG_ARM64_WORKAROUND_TRBE_WRITE_OUT_OF_RANGE */
+
+#ifdef CONFIG_ARM64_WORKAROUND_SPECULATIVE_UNPRIV_LOAD
+static const struct midr_range erratum_spec_unpriv_load_list[] = {
+#ifdef CONFIG_ARM64_ERRATUM_3117295
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A510),
+#endif
+#ifdef CONFIG_ARM64_ERRATUM_2966298
+ /* Cortex-A520 r0p0 to r0p1 */
+ MIDR_REV_RANGE(MIDR_CORTEX_A520, 0, 0, 1),
+#endif
+ {},
+};
+#endif
+
const struct arm64_cpu_capabilities arm64_errata[] = {
#ifdef CONFIG_ARM64_WORKAROUND_CLEAN_CACHE
{
- .desc = "ARM errata 826319, 827319, 824069, 819472",
+ .desc = "ARM errata 826319, 827319, 824069, or 819472",
.capability = ARM64_WORKAROUND_CLEAN_CACHE,
ERRATA_MIDR_RANGE_LIST(workaround_clean_cache),
.cpu_enable = cpu_enable_cache_maint_trap,
@@ -803,10 +476,10 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
#endif
#ifdef CONFIG_CAVIUM_ERRATUM_23154
{
- /* Cavium ThunderX, pass 1.x */
- .desc = "Cavium erratum 23154",
+ .desc = "Cavium errata 23154 and 38545",
.capability = ARM64_WORKAROUND_CAVIUM_23154,
- ERRATA_MIDR_REV_RANGE(MIDR_THUNDERX, 0, 0, 1),
+ .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM,
+ ERRATA_MIDR_RANGE_LIST(cavium_erratum_23154_cpus),
},
#endif
#ifdef CONFIG_CAVIUM_ERRATUM_27456
@@ -841,7 +514,7 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
#endif
#ifdef CONFIG_ARM64_WORKAROUND_REPEAT_TLBI
{
- .desc = "Qualcomm erratum 1009, ARM erratum 1286807",
+ .desc = "Qualcomm erratum 1009, or ARM erratum 1286807, 2441009",
.capability = ARM64_WORKAROUND_REPEAT_TLBI,
.type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM,
.matches = cpucap_multi_entry_cap_matches,
@@ -857,37 +530,54 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
},
#endif
{
- .capability = ARM64_HARDEN_BRANCH_PREDICTOR,
+ .desc = "Spectre-v2",
+ .capability = ARM64_SPECTRE_V2,
.type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM,
- .matches = check_branch_predictor,
+ .matches = has_spectre_v2,
+ .cpu_enable = spectre_v2_enable_mitigation,
},
-#ifdef CONFIG_HARDEN_EL2_VECTORS
+#ifdef CONFIG_RANDOMIZE_BASE
{
- .desc = "EL2 vector hardening",
- .capability = ARM64_HARDEN_EL2_VECTORS,
- ERRATA_MIDR_RANGE_LIST(ca57_a72),
+ /* Must come after the Spectre-v2 entry */
+ .desc = "Spectre-v3a",
+ .capability = ARM64_SPECTRE_V3A,
+ .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM,
+ .matches = has_spectre_v3a,
+ .cpu_enable = spectre_v3a_enable_mitigation,
},
#endif
{
- .desc = "Speculative Store Bypass Disable",
- .capability = ARM64_SSBD,
+ .desc = "Spectre-v4",
+ .capability = ARM64_SPECTRE_V4,
.type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM,
- .matches = has_ssbd_mitigation,
- .midr_range_list = arm64_ssb_cpus,
+ .matches = has_spectre_v4,
+ .cpu_enable = spectre_v4_enable_mitigation,
+ },
+ {
+ .desc = "Spectre-BHB",
+ .capability = ARM64_SPECTRE_BHB,
+ .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM,
+ .matches = is_spectre_bhb_affected,
+ .cpu_enable = spectre_bhb_enable_mitigation,
},
#ifdef CONFIG_ARM64_ERRATUM_1418040
{
.desc = "ARM erratum 1418040",
.capability = ARM64_WORKAROUND_1418040,
ERRATA_MIDR_RANGE_LIST(erratum_1418040_list),
+ /*
+ * We need to allow affected CPUs to come in late, but
+ * also need the non-affected CPUs to be able to come
+ * in at any point in time. Wonderful.
+ */
+ .type = ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE,
},
#endif
-#ifdef CONFIG_ARM64_ERRATUM_1165522
+#ifdef CONFIG_ARM64_WORKAROUND_SPECULATIVE_AT
{
- /* Cortex-A76 r0p0 to r2p0 */
- .desc = "ARM erratum 1165522",
- .capability = ARM64_WORKAROUND_1165522,
- ERRATA_MIDR_RANGE(MIDR_CORTEX_A76, 0, 0, 2, 0),
+ .desc = "ARM errata 1165522, 1319367, or 1530923",
+ .capability = ARM64_WORKAROUND_SPECULATIVE_AT,
+ ERRATA_MIDR_RANGE_LIST(erratum_speculative_at_list),
},
#endif
#ifdef CONFIG_ARM64_ERRATUM_1463225
@@ -896,6 +586,7 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
.capability = ARM64_WORKAROUND_1463225,
.type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM,
.matches = has_cortex_a76_erratum_1463225,
+ .midr_range_list = erratum_1463225,
},
#endif
#ifdef CONFIG_CAVIUM_TX2_ERRATUM_219
@@ -921,50 +612,135 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
.cpu_enable = cpu_enable_trap_ctr_access,
},
#endif
-#ifdef CONFIG_ARM64_ERRATUM_1319367
+#ifdef CONFIG_ARM64_ERRATUM_1508412
{
- .desc = "ARM erratum 1319367",
- .capability = ARM64_WORKAROUND_1319367,
- ERRATA_MIDR_RANGE_LIST(ca57_a72),
+ /* we depend on the firmware portion for correctness */
+ .desc = "ARM erratum 1508412 (kernel portion)",
+ .capability = ARM64_WORKAROUND_1508412,
+ ERRATA_MIDR_RANGE(MIDR_CORTEX_A77,
+ 0, 0,
+ 1, 0),
},
#endif
+#ifdef CONFIG_NVIDIA_CARMEL_CNP_ERRATUM
{
- }
-};
+ /* NVIDIA Carmel */
+ .desc = "NVIDIA Carmel CNP erratum",
+ .capability = ARM64_WORKAROUND_NVIDIA_CARMEL_CNP,
+ ERRATA_MIDR_ALL_VERSIONS(MIDR_NVIDIA_CARMEL),
+ },
+#endif
+#ifdef CONFIG_ARM64_WORKAROUND_TRBE_OVERWRITE_FILL_MODE
+ {
+ /*
+ * The erratum work around is handled within the TRBE
+ * driver and can be applied per-cpu. So, we can allow
+ * a late CPU to come online with this erratum.
+ */
+ .desc = "ARM erratum 2119858 or 2139208",
+ .capability = ARM64_WORKAROUND_TRBE_OVERWRITE_FILL_MODE,
+ .type = ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE,
+ CAP_MIDR_RANGE_LIST(trbe_overwrite_fill_mode_cpus),
+ },
+#endif
+#ifdef CONFIG_ARM64_WORKAROUND_TSB_FLUSH_FAILURE
+ {
+ .desc = "ARM erratum 2067961 or 2054223",
+ .capability = ARM64_WORKAROUND_TSB_FLUSH_FAILURE,
+ ERRATA_MIDR_RANGE_LIST(tsb_flush_fail_cpus),
+ },
+#endif
+#ifdef CONFIG_ARM64_WORKAROUND_TRBE_WRITE_OUT_OF_RANGE
+ {
+ .desc = "ARM erratum 2253138 or 2224489",
+ .capability = ARM64_WORKAROUND_TRBE_WRITE_OUT_OF_RANGE,
+ .type = ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE,
+ CAP_MIDR_RANGE_LIST(trbe_write_out_of_range_cpus),
+ },
+#endif
+#ifdef CONFIG_ARM64_ERRATUM_2645198
+ {
+ .desc = "ARM erratum 2645198",
+ .capability = ARM64_WORKAROUND_2645198,
+ ERRATA_MIDR_ALL_VERSIONS(MIDR_CORTEX_A715)
+ },
+#endif
+#ifdef CONFIG_ARM64_ERRATUM_2077057
+ {
+ .desc = "ARM erratum 2077057",
+ .capability = ARM64_WORKAROUND_2077057,
+ ERRATA_MIDR_REV_RANGE(MIDR_CORTEX_A510, 0, 0, 2),
+ },
+#endif
+#ifdef CONFIG_ARM64_ERRATUM_2064142
+ {
+ .desc = "ARM erratum 2064142",
+ .capability = ARM64_WORKAROUND_2064142,
-ssize_t cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr,
- char *buf)
-{
- return sprintf(buf, "Mitigation: __user pointer sanitization\n");
-}
+ /* Cortex-A510 r0p0 - r0p2 */
+ ERRATA_MIDR_REV_RANGE(MIDR_CORTEX_A510, 0, 0, 2)
+ },
+#endif
+#ifdef CONFIG_ARM64_ERRATUM_2457168
+ {
+ .desc = "ARM erratum 2457168",
+ .capability = ARM64_WORKAROUND_2457168,
+ .type = ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE,
-ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr,
- char *buf)
-{
- switch (get_spectre_v2_workaround_state()) {
- case ARM64_BP_HARDEN_NOT_REQUIRED:
- return sprintf(buf, "Not affected\n");
- case ARM64_BP_HARDEN_WA_NEEDED:
- return sprintf(buf, "Mitigation: Branch predictor hardening\n");
- case ARM64_BP_HARDEN_UNKNOWN:
- default:
- return sprintf(buf, "Vulnerable\n");
- }
-}
+ /* Cortex-A510 r0p0-r1p1 */
+ CAP_MIDR_RANGE(MIDR_CORTEX_A510, 0, 0, 1, 1)
+ },
+#endif
+#ifdef CONFIG_ARM64_ERRATUM_2038923
+ {
+ .desc = "ARM erratum 2038923",
+ .capability = ARM64_WORKAROUND_2038923,
-ssize_t cpu_show_spec_store_bypass(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- if (__ssb_safe)
- return sprintf(buf, "Not affected\n");
-
- switch (ssbd_state) {
- case ARM64_SSBD_KERNEL:
- case ARM64_SSBD_FORCE_ENABLE:
- if (IS_ENABLED(CONFIG_ARM64_SSBD))
- return sprintf(buf,
- "Mitigation: Speculative Store Bypass disabled via prctl\n");
- }
+ /* Cortex-A510 r0p0 - r0p2 */
+ ERRATA_MIDR_REV_RANGE(MIDR_CORTEX_A510, 0, 0, 2)
+ },
+#endif
+#ifdef CONFIG_ARM64_ERRATUM_1902691
+ {
+ .desc = "ARM erratum 1902691",
+ .capability = ARM64_WORKAROUND_1902691,
- return sprintf(buf, "Vulnerable\n");
-}
+ /* Cortex-A510 r0p0 - r0p1 */
+ ERRATA_MIDR_REV_RANGE(MIDR_CORTEX_A510, 0, 0, 1)
+ },
+#endif
+#ifdef CONFIG_ARM64_ERRATUM_1742098
+ {
+ .desc = "ARM erratum 1742098",
+ .capability = ARM64_WORKAROUND_1742098,
+ CAP_MIDR_RANGE_LIST(broken_aarch32_aes),
+ .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM,
+ },
+#endif
+#ifdef CONFIG_ARM64_ERRATUM_2658417
+ {
+ .desc = "ARM erratum 2658417",
+ .capability = ARM64_WORKAROUND_2658417,
+ /* Cortex-A510 r0p0 - r1p1 */
+ ERRATA_MIDR_RANGE(MIDR_CORTEX_A510, 0, 0, 1, 1),
+ MIDR_FIXED(MIDR_CPU_VAR_REV(1,1), BIT(25)),
+ },
+#endif
+#ifdef CONFIG_ARM64_WORKAROUND_SPECULATIVE_UNPRIV_LOAD
+ {
+ .desc = "ARM errata 2966298, 3117295",
+ .capability = ARM64_WORKAROUND_SPECULATIVE_UNPRIV_LOAD,
+ /* Cortex-A520 r0p0 - r0p1 */
+ ERRATA_MIDR_RANGE_LIST(erratum_spec_unpriv_load_list),
+ },
+#endif
+#ifdef CONFIG_AMPERE_ERRATUM_AC03_CPU_38
+ {
+ .desc = "AmpereOne erratum AC03_CPU_38",
+ .capability = ARM64_WORKAROUND_AMPERE_AC03_CPU_38,
+ ERRATA_MIDR_ALL_VERSIONS(MIDR_AMPERE1),
+ },
+#endif
+ {
+ }
+};
diff --git a/arch/arm64/kernel/cpu_ops.c b/arch/arm64/kernel/cpu_ops.c
index 7e07072757af..e133011f64b5 100644
--- a/arch/arm64/kernel/cpu_ops.c
+++ b/arch/arm64/kernel/cpu_ops.c
@@ -15,10 +15,12 @@
#include <asm/smp_plat.h>
extern const struct cpu_operations smp_spin_table_ops;
+#ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL
extern const struct cpu_operations acpi_parking_protocol_ops;
+#endif
extern const struct cpu_operations cpu_psci_ops;
-const struct cpu_operations *cpu_ops[NR_CPUS] __ro_after_init;
+static const struct cpu_operations *cpu_ops[NR_CPUS] __ro_after_init;
static const struct cpu_operations *const dt_supported_cpu_ops[] __initconst = {
&smp_spin_table_ops,
@@ -94,7 +96,7 @@ static const char *__init cpu_read_enable_method(int cpu)
/*
* Read a cpu's enable method and record it in cpu_ops.
*/
-int __init cpu_read_ops(int cpu)
+int __init init_cpu_ops(int cpu)
{
const char *enable_method = cpu_read_enable_method(cpu);
@@ -109,3 +111,8 @@ int __init cpu_read_ops(int cpu)
return 0;
}
+
+const struct cpu_operations *get_cpu_ops(int cpu)
+{
+ return cpu_ops[cpu];
+}
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 04cf64e9f0c9..8d1a634a403e 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -3,6 +3,61 @@
* Contains CPU feature definitions
*
* Copyright (C) 2015 ARM Ltd.
+ *
+ * A note for the weary kernel hacker: the code here is confusing and hard to
+ * follow! That's partly because it's solving a nasty problem, but also because
+ * there's a little bit of over-abstraction that tends to obscure what's going
+ * on behind a maze of helper functions and macros.
+ *
+ * The basic problem is that hardware folks have started gluing together CPUs
+ * with distinct architectural features; in some cases even creating SoCs where
+ * user-visible instructions are available only on a subset of the available
+ * cores. We try to address this by snapshotting the feature registers of the
+ * boot CPU and comparing these with the feature registers of each secondary
+ * CPU when bringing them up. If there is a mismatch, then we update the
+ * snapshot state to indicate the lowest-common denominator of the feature,
+ * known as the "safe" value. This snapshot state can be queried to view the
+ * "sanitised" value of a feature register.
+ *
+ * The sanitised register values are used to decide which capabilities we
+ * have in the system. These may be in the form of traditional "hwcaps"
+ * advertised to userspace or internal "cpucaps" which are used to configure
+ * things like alternative patching and static keys. While a feature mismatch
+ * may result in a TAINT_CPU_OUT_OF_SPEC kernel taint, a capability mismatch
+ * may prevent a CPU from being onlined at all.
+ *
+ * Some implementation details worth remembering:
+ *
+ * - Mismatched features are *always* sanitised to a "safe" value, which
+ * usually indicates that the feature is not supported.
+ *
+ * - A mismatched feature marked with FTR_STRICT will cause a "SANITY CHECK"
+ * warning when onlining an offending CPU and the kernel will be tainted
+ * with TAINT_CPU_OUT_OF_SPEC.
+ *
+ * - Features marked as FTR_VISIBLE have their sanitised value visible to
+ * userspace. FTR_VISIBLE features in registers that are only visible
+ * to EL0 by trapping *must* have a corresponding HWCAP so that late
+ * onlining of CPUs cannot lead to features disappearing at runtime.
+ *
+ * - A "feature" is typically a 4-bit register field. A "capability" is the
+ * high-level description derived from the sanitised field value.
+ *
+ * - Read the Arm ARM (DDI 0487F.a) section D13.1.3 ("Principles of the ID
+ * scheme for fields in ID registers") to understand when feature fields
+ * may be signed or unsigned (FTR_SIGNED and FTR_UNSIGNED accordingly).
+ *
+ * - KVM exposes its own view of the feature registers to guest operating
+ * systems regardless of FTR_VISIBLE. This is typically driven from the
+ * sanitised register values to allow virtual CPUs to be migrated between
+ * arbitrary physical CPUs, but some features not present on the host are
+ * also advertised and emulated. Look at sys_reg_descs[] for the gory
+ * details.
+ *
+ * - If the arm64_ftr_bits[] for a register has a missing field, then this
+ * field is treated as STRICT RES0, including for read_sanitised_ftr_reg().
+ * This is stronger than FTR_HIDDEN and can be used to hide features from
+ * KVM guests.
*/
#define pr_fmt(fmt) "CPU features: " fmt
@@ -10,79 +65,87 @@
#include <linux/bsearch.h>
#include <linux/cpumask.h>
#include <linux/crash_dump.h>
+#include <linux/kstrtox.h>
#include <linux/sort.h>
#include <linux/stop_machine.h>
+#include <linux/sysfs.h>
#include <linux/types.h>
+#include <linux/minmax.h>
#include <linux/mm.h>
#include <linux/cpu.h>
+#include <linux/kasan.h>
+#include <linux/percpu.h>
+
#include <asm/cpu.h>
#include <asm/cpufeature.h>
#include <asm/cpu_ops.h>
#include <asm/fpsimd.h>
+#include <asm/hwcap.h>
+#include <asm/insn.h>
+#include <asm/kvm_host.h>
#include <asm/mmu_context.h>
+#include <asm/mte.h>
#include <asm/processor.h>
+#include <asm/smp.h>
#include <asm/sysreg.h>
#include <asm/traps.h>
+#include <asm/vectors.h>
#include <asm/virt.h>
/* Kernel representation of AT_HWCAP and AT_HWCAP2 */
-static unsigned long elf_hwcap __read_mostly;
+static DECLARE_BITMAP(elf_hwcap, MAX_CPU_FEATURES) __read_mostly;
#ifdef CONFIG_COMPAT
#define COMPAT_ELF_HWCAP_DEFAULT \
(COMPAT_HWCAP_HALF|COMPAT_HWCAP_THUMB|\
COMPAT_HWCAP_FAST_MULT|COMPAT_HWCAP_EDSP|\
- COMPAT_HWCAP_TLS|COMPAT_HWCAP_VFP|\
- COMPAT_HWCAP_VFPv3|COMPAT_HWCAP_VFPv4|\
- COMPAT_HWCAP_NEON|COMPAT_HWCAP_IDIV|\
+ COMPAT_HWCAP_TLS|COMPAT_HWCAP_IDIV|\
COMPAT_HWCAP_LPAE)
unsigned int compat_elf_hwcap __read_mostly = COMPAT_ELF_HWCAP_DEFAULT;
unsigned int compat_elf_hwcap2 __read_mostly;
#endif
-DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS);
-EXPORT_SYMBOL(cpu_hwcaps);
-static struct arm64_cpu_capabilities const __ro_after_init *cpu_hwcaps_ptrs[ARM64_NCAPS];
+DECLARE_BITMAP(system_cpucaps, ARM64_NCAPS);
+EXPORT_SYMBOL(system_cpucaps);
+static struct arm64_cpu_capabilities const __ro_after_init *cpucap_ptrs[ARM64_NCAPS];
+
+DECLARE_BITMAP(boot_cpucaps, ARM64_NCAPS);
-/* Need also bit for ARM64_CB_PATCH */
-DECLARE_BITMAP(boot_capabilities, ARM64_NPATCHABLE);
+bool arm64_use_ng_mappings = false;
+EXPORT_SYMBOL(arm64_use_ng_mappings);
+
+DEFINE_PER_CPU_READ_MOSTLY(const char *, this_cpu_vector) = vectors;
/*
- * Flag to indicate if we have computed the system wide
- * capabilities based on the boot time active CPUs. This
- * will be used to determine if a new booting CPU should
- * go through the verification process to make sure that it
- * supports the system capabilities, without using a hotplug
- * notifier.
+ * Permit PER_LINUX32 and execve() of 32-bit binaries even if not all CPUs
+ * support it?
*/
-static bool sys_caps_initialised;
-
-static inline void set_sys_caps_initialised(void)
-{
- sys_caps_initialised = true;
-}
+static bool __read_mostly allow_mismatched_32bit_el0;
-static int dump_cpu_hwcaps(struct notifier_block *self, unsigned long v, void *p)
-{
- /* file-wide pr_fmt adds "CPU features: " prefix */
- pr_emerg("0x%*pb\n", ARM64_NCAPS, &cpu_hwcaps);
- return 0;
-}
+/*
+ * Static branch enabled only if allow_mismatched_32bit_el0 is set and we have
+ * seen at least one CPU capable of 32-bit EL0.
+ */
+DEFINE_STATIC_KEY_FALSE(arm64_mismatched_32bit_el0);
-static struct notifier_block cpu_hwcaps_notifier = {
- .notifier_call = dump_cpu_hwcaps
-};
+/*
+ * Mask of CPUs supporting 32-bit EL0.
+ * Only valid if arm64_mismatched_32bit_el0 is enabled.
+ */
+static cpumask_var_t cpu_32bit_el0_mask __cpumask_var_read_mostly;
-static int __init register_cpu_hwcaps_dumper(void)
+void dump_cpu_features(void)
{
- atomic_notifier_chain_register(&panic_notifier_list,
- &cpu_hwcaps_notifier);
- return 0;
+ /* file-wide pr_fmt adds "CPU features: " prefix */
+ pr_emerg("0x%*pb\n", ARM64_NCAPS, &system_cpucaps);
}
-__initcall(register_cpu_hwcaps_dumper);
-DEFINE_STATIC_KEY_ARRAY_FALSE(cpu_hwcap_keys, ARM64_NCAPS);
-EXPORT_SYMBOL(cpu_hwcap_keys);
+#define ARM64_CPUID_FIELDS(reg, field, min_value) \
+ .sys_reg = SYS_##reg, \
+ .field_pos = reg##_##field##_SHIFT, \
+ .field_width = reg##_##field##_WIDTH, \
+ .sign = reg##_##field##_SIGNED, \
+ .min_field_value = reg##_##field##_##min_value,
#define __ARM64_FTR_BITS(SIGNED, VISIBLE, STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) \
{ \
@@ -108,88 +171,176 @@ EXPORT_SYMBOL(cpu_hwcap_keys);
.width = 0, \
}
-/* meta feature for alternatives */
-static bool __maybe_unused
-cpufeature_pan_not_uao(const struct arm64_cpu_capabilities *entry, int __unused);
-
static void cpu_enable_cnp(struct arm64_cpu_capabilities const *cap);
+static bool __system_matches_cap(unsigned int n);
+
/*
* NOTE: Any changes to the visibility of features should be kept in
* sync with the documentation of the CPU feature register ABI.
*/
static const struct arm64_ftr_bits ftr_id_aa64isar0[] = {
- ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_TS_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_FHM_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_DP_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_SM4_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_SM3_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_SHA3_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_RDM_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_ATOMICS_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_CRC32_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_SHA2_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_SHA1_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_AES_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_EL1_RNDR_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_EL1_TLB_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_EL1_TS_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_EL1_FHM_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_EL1_DP_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_EL1_SM4_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_EL1_SM3_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_EL1_SHA3_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_EL1_RDM_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_EL1_ATOMIC_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_EL1_CRC32_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_EL1_SHA2_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_EL1_SHA1_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_EL1_AES_SHIFT, 4, 0),
ARM64_FTR_END,
};
static const struct arm64_ftr_bits ftr_id_aa64isar1[] = {
- ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_SB_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_FRINTTS_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_I8MM_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_DGH_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_BF16_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_SPECRES_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_SB_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_FRINTTS_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_PTR_AUTH),
- FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_GPI_SHIFT, 4, 0),
+ FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_GPI_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_PTR_AUTH),
- FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_GPA_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_LRCPC_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_FCMA_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_JSCVT_SHIFT, 4, 0),
+ FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_GPA_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_LRCPC_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_FCMA_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_JSCVT_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_PTR_AUTH),
- FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_API_SHIFT, 4, 0),
+ FTR_STRICT, FTR_EXACT, ID_AA64ISAR1_EL1_API_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_PTR_AUTH),
- FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_APA_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_DPB_SHIFT, 4, 0),
+ FTR_STRICT, FTR_EXACT, ID_AA64ISAR1_EL1_APA_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_DPB_SHIFT, 4, 0),
+ ARM64_FTR_END,
+};
+
+static const struct arm64_ftr_bits ftr_id_aa64isar2[] = {
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_EL1_CSSC_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_EL1_RPRFM_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_EL1_CLRBHB_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_EL1_BC_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_EL1_MOPS_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_PTR_AUTH),
+ FTR_STRICT, FTR_EXACT, ID_AA64ISAR2_EL1_APA3_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_PTR_AUTH),
+ FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_EL1_GPA3_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_EL1_RPRES_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_EL1_WFxT_SHIFT, 4, 0),
ARM64_FTR_END,
};
static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = {
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_CSV3_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_CSV2_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_DIT_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_CSV3_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_CSV2_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_DIT_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_AMU_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_MPAM_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_SEL2_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
- FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_SVE_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_RAS_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_GIC_SHIFT, 4, 0),
- S_ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_ASIMD_SHIFT, 4, ID_AA64PFR0_ASIMD_NI),
- S_ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_FP_SHIFT, 4, ID_AA64PFR0_FP_NI),
- /* Linux doesn't care about the EL3 */
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL3_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL2_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_SHIFT, 4, ID_AA64PFR0_EL1_64BIT_ONLY),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL0_SHIFT, 4, ID_AA64PFR0_EL0_64BIT_ONLY),
+ FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_SVE_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_RAS_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_GIC_SHIFT, 4, 0),
+ S_ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_AdvSIMD_SHIFT, 4, ID_AA64PFR0_EL1_AdvSIMD_NI),
+ S_ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_FP_SHIFT, 4, ID_AA64PFR0_EL1_FP_NI),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_EL3_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_EL2_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_EL1_SHIFT, 4, ID_AA64PFR0_EL1_ELx_64BIT_ONLY),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_EL0_SHIFT, 4, ID_AA64PFR0_EL1_ELx_64BIT_ONLY),
ARM64_FTR_END,
};
static const struct arm64_ftr_bits ftr_id_aa64pfr1[] = {
- ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_SSBS_SHIFT, 4, ID_AA64PFR1_SSBS_PSTATE_NI),
+ ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
+ FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_EL1_SME_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_EL1_MPAM_frac_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_EL1_RAS_frac_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_MTE),
+ FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_EL1_MTE_SHIFT, 4, ID_AA64PFR1_EL1_MTE_NI),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR1_EL1_SSBS_SHIFT, 4, ID_AA64PFR1_EL1_SSBS_NI),
+ ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_BTI),
+ FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_EL1_BT_SHIFT, 4, 0),
ARM64_FTR_END,
};
static const struct arm64_ftr_bits ftr_id_aa64zfr0[] = {
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
- FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_SM4_SHIFT, 4, 0),
+ FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_EL1_F64MM_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
+ FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_EL1_F32MM_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
+ FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_EL1_I8MM_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
- FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_SHA3_SHIFT, 4, 0),
+ FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_EL1_SM4_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
- FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_BITPERM_SHIFT, 4, 0),
+ FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_EL1_SHA3_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
- FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_AES_SHIFT, 4, 0),
+ FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_EL1_B16B16_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
- FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_SVEVER_SHIFT, 4, 0),
+ FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_EL1_BF16_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
+ FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_EL1_BitPerm_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
+ FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_EL1_AES_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
+ FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_EL1_SVEver_SHIFT, 4, 0),
+ ARM64_FTR_END,
+};
+
+static const struct arm64_ftr_bits ftr_id_aa64smfr0[] = {
+ ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
+ FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_FA64_SHIFT, 1, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
+ FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_SMEver_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
+ FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_I16I64_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
+ FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_F64F64_SHIFT, 1, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
+ FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_I16I32_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
+ FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_B16B16_SHIFT, 1, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
+ FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_F16F16_SHIFT, 1, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
+ FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_I8I32_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
+ FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_F16F32_SHIFT, 1, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
+ FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_B16F32_SHIFT, 1, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
+ FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_BI32I32_SHIFT, 1, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
+ FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_F32F32_SHIFT, 1, 0),
ARM64_FTR_END,
};
static const struct arm64_ftr_bits ftr_id_aa64mmfr0[] = {
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_EL1_ECV_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_EL1_FGT_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_EL1_EXS_SHIFT, 4, 0),
+ /*
+ * Page size not being supported at Stage-2 is not fatal. You
+ * just give up KVM if PAGE_SIZE isn't supported there. Go fix
+ * your favourite nesting hypervisor.
+ *
+ * There is a small corner case where the hypervisor explicitly
+ * advertises a given granule size at Stage-2 (value 2) on some
+ * vCPUs, and uses the fallback to Stage-1 (value 0) for other
+ * vCPUs. Although this is not forbidden by the architecture, it
+ * indicates that the hypervisor is being silly (or buggy).
+ *
+ * We make no effort to cope with this and pretend that if these
+ * fields are inconsistent across vCPUs, then it isn't worth
+ * trying to bring KVM up.
+ */
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_EXACT, ID_AA64MMFR0_EL1_TGRAN4_2_SHIFT, 4, 1),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_EXACT, ID_AA64MMFR0_EL1_TGRAN64_2_SHIFT, 4, 1),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_EXACT, ID_AA64MMFR0_EL1_TGRAN16_2_SHIFT, 4, 1),
/*
* We already refuse to boot CPUs that don't support our configured
* page size, so we can only detect mismatches for a page size other
@@ -197,145 +348,268 @@ static const struct arm64_ftr_bits ftr_id_aa64mmfr0[] = {
* exist in the wild so, even though we don't like it, we'll have to go
* along with it and treat them as non-strict.
*/
- S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_TGRAN4_SHIFT, 4, ID_AA64MMFR0_TGRAN4_NI),
- S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_TGRAN64_SHIFT, 4, ID_AA64MMFR0_TGRAN64_NI),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_TGRAN16_SHIFT, 4, ID_AA64MMFR0_TGRAN16_NI),
+ S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_EL1_TGRAN4_SHIFT, 4, ID_AA64MMFR0_EL1_TGRAN4_NI),
+ S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_EL1_TGRAN64_SHIFT, 4, ID_AA64MMFR0_EL1_TGRAN64_NI),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_EL1_TGRAN16_SHIFT, 4, ID_AA64MMFR0_EL1_TGRAN16_NI),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_BIGENDEL0_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_EL1_BIGENDEL0_SHIFT, 4, 0),
/* Linux shouldn't care about secure memory */
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_SNSMEM_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_BIGENDEL_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_ASID_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_EL1_SNSMEM_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_EL1_BIGEND_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_EL1_ASIDBITS_SHIFT, 4, 0),
/*
* Differing PARange is fine as long as all peripherals and memory are mapped
* within the minimum PARange of all CPUs
*/
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_PARANGE_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_EL1_PARANGE_SHIFT, 4, 0),
ARM64_FTR_END,
};
static const struct arm64_ftr_bits ftr_id_aa64mmfr1[] = {
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_PAN_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_LOR_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_HPD_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_VHE_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_VMIDBITS_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_HADBS_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_TIDCP1_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_AFP_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_HCX_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_ETS_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_TWED_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_XNX_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_HIGHER_SAFE, ID_AA64MMFR1_EL1_SpecSEI_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_PAN_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_LO_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_HPDS_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_VH_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_VMIDBits_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_HAFDBS_SHIFT, 4, 0),
ARM64_FTR_END,
};
static const struct arm64_ftr_bits ftr_id_aa64mmfr2[] = {
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_FWB_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_AT_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_LVA_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_IESB_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_LSM_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_UAO_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_CNP_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_EL1_E0PD_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_EL1_EVT_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_EL1_BBM_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_EL1_TTL_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_EL1_FWB_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_EL1_IDS_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_EL1_AT_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_EL1_ST_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_EL1_NV_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_EL1_CCIDX_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_EL1_VARange_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_EL1_IESB_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_EL1_LSM_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_EL1_UAO_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_EL1_CnP_SHIFT, 4, 0),
+ ARM64_FTR_END,
+};
+
+static const struct arm64_ftr_bits ftr_id_aa64mmfr3[] = {
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR3_EL1_S1PIE_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR3_EL1_TCRX_SHIFT, 4, 0),
ARM64_FTR_END,
};
static const struct arm64_ftr_bits ftr_ctr[] = {
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, 31, 1, 1), /* RES1 */
- ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, CTR_DIC_SHIFT, 1, 1),
- ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, CTR_IDC_SHIFT, 1, 1),
- ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_HIGHER_OR_ZERO_SAFE, CTR_CWG_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_HIGHER_OR_ZERO_SAFE, CTR_ERG_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, CTR_DMINLINE_SHIFT, 4, 1),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, CTR_EL0_DIC_SHIFT, 1, 1),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, CTR_EL0_IDC_SHIFT, 1, 1),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_HIGHER_OR_ZERO_SAFE, CTR_EL0_CWG_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_HIGHER_OR_ZERO_SAFE, CTR_EL0_ERG_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, CTR_EL0_DminLine_SHIFT, 4, 1),
/*
* Linux can handle differing I-cache policies. Userspace JITs will
* make use of *minLine.
* If we have differing I-cache policies, report it as the weakest - VIPT.
*/
- ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_EXACT, 14, 2, ICACHE_POLICY_VIPT), /* L1Ip */
- ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, CTR_IMINLINE_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_EXACT, CTR_EL0_L1Ip_SHIFT, 2, CTR_EL0_L1Ip_VIPT), /* L1Ip */
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, CTR_EL0_IminLine_SHIFT, 4, 0),
ARM64_FTR_END,
};
+static struct arm64_ftr_override __ro_after_init no_override = { };
+
struct arm64_ftr_reg arm64_ftr_reg_ctrel0 = {
.name = "SYS_CTR_EL0",
- .ftr_bits = ftr_ctr
+ .ftr_bits = ftr_ctr,
+ .override = &no_override,
};
static const struct arm64_ftr_bits ftr_id_mmfr0[] = {
- S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 28, 4, 0xf), /* InnerShr */
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 24, 4, 0), /* FCSE */
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, 20, 4, 0), /* AuxReg */
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 16, 4, 0), /* TCM */
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 12, 4, 0), /* ShareLvl */
- S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 8, 4, 0xf), /* OuterShr */
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 4, 4, 0), /* PMSA */
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0), /* VMSA */
+ S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR0_EL1_InnerShr_SHIFT, 4, 0xf),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR0_EL1_FCSE_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_MMFR0_EL1_AuxReg_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR0_EL1_TCM_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR0_EL1_ShareLvl_SHIFT, 4, 0),
+ S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR0_EL1_OuterShr_SHIFT, 4, 0xf),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR0_EL1_PMSA_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR0_EL1_VMSA_SHIFT, 4, 0),
ARM64_FTR_END,
};
static const struct arm64_ftr_bits ftr_id_aa64dfr0[] = {
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 36, 28, 0),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64DFR0_PMSVER_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_CTX_CMPS_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_WRPS_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_BRPS_SHIFT, 4, 0),
+ S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_EL1_DoubleLock_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64DFR0_EL1_PMSVer_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_EL1_CTX_CMPs_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_EL1_WRPs_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_EL1_BRPs_SHIFT, 4, 0),
/*
* We can instantiate multiple PMU instances with different levels
* of support.
*/
- S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_EXACT, ID_AA64DFR0_PMUVER_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64DFR0_TRACEVER_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64DFR0_DEBUGVER_SHIFT, 4, 0x6),
+ S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_EXACT, ID_AA64DFR0_EL1_PMUVer_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64DFR0_EL1_DebugVer_SHIFT, 4, 0x6),
+ ARM64_FTR_END,
+};
+
+static const struct arm64_ftr_bits ftr_mvfr0[] = {
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, MVFR0_EL1_FPRound_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, MVFR0_EL1_FPShVec_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, MVFR0_EL1_FPSqrt_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, MVFR0_EL1_FPDivide_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, MVFR0_EL1_FPTrap_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, MVFR0_EL1_FPDP_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, MVFR0_EL1_FPSP_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, MVFR0_EL1_SIMDReg_SHIFT, 4, 0),
+ ARM64_FTR_END,
+};
+
+static const struct arm64_ftr_bits ftr_mvfr1[] = {
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, MVFR1_EL1_SIMDFMAC_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, MVFR1_EL1_FPHP_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, MVFR1_EL1_SIMDHP_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, MVFR1_EL1_SIMDSP_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, MVFR1_EL1_SIMDInt_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, MVFR1_EL1_SIMDLS_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, MVFR1_EL1_FPDNaN_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, MVFR1_EL1_FPFtZ_SHIFT, 4, 0),
ARM64_FTR_END,
};
static const struct arm64_ftr_bits ftr_mvfr2[] = {
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 4, 4, 0), /* FPMisc */
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0), /* SIMDMisc */
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, MVFR2_EL1_FPMisc_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, MVFR2_EL1_SIMDMisc_SHIFT, 4, 0),
ARM64_FTR_END,
};
static const struct arm64_ftr_bits ftr_dczid[] = {
- ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, 4, 1, 1), /* DZP */
- ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0), /* BS */
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, DCZID_EL0_DZP_SHIFT, 1, 1),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, DCZID_EL0_BS_SHIFT, 4, 0),
+ ARM64_FTR_END,
+};
+
+static const struct arm64_ftr_bits ftr_gmid[] = {
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, GMID_EL1_BS_SHIFT, 4, 0),
ARM64_FTR_END,
};
+static const struct arm64_ftr_bits ftr_id_isar0[] = {
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR0_EL1_Divide_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR0_EL1_Debug_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR0_EL1_Coproc_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR0_EL1_CmpBranch_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR0_EL1_BitField_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR0_EL1_BitCount_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR0_EL1_Swap_SHIFT, 4, 0),
+ ARM64_FTR_END,
+};
static const struct arm64_ftr_bits ftr_id_isar5[] = {
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR5_RDM_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR5_CRC32_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR5_SHA2_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR5_SHA1_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR5_AES_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR5_SEVL_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR5_EL1_RDM_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR5_EL1_CRC32_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR5_EL1_SHA2_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR5_EL1_SHA1_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR5_EL1_AES_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR5_EL1_SEVL_SHIFT, 4, 0),
ARM64_FTR_END,
};
static const struct arm64_ftr_bits ftr_id_mmfr4[] = {
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 4, 4, 0), /* ac2 */
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR4_EL1_EVT_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR4_EL1_CCIDX_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR4_EL1_LSM_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR4_EL1_HPDS_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR4_EL1_CnP_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR4_EL1_XNX_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR4_EL1_AC2_SHIFT, 4, 0),
+
+ /*
+ * SpecSEI = 1 indicates that the PE might generate an SError on an
+ * external abort on speculative read. It is safe to assume that an
+ * SError might be generated than it will not be. Hence it has been
+ * classified as FTR_HIGHER_SAFE.
+ */
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_HIGHER_SAFE, ID_MMFR4_EL1_SpecSEI_SHIFT, 4, 0),
+ ARM64_FTR_END,
+};
+
+static const struct arm64_ftr_bits ftr_id_isar4[] = {
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR4_EL1_SWP_frac_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR4_EL1_PSR_M_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR4_EL1_SynchPrim_frac_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR4_EL1_Barrier_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR4_EL1_SMC_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR4_EL1_Writeback_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR4_EL1_WithShifts_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR4_EL1_Unpriv_SHIFT, 4, 0),
+ ARM64_FTR_END,
+};
+
+static const struct arm64_ftr_bits ftr_id_mmfr5[] = {
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR5_EL1_ETS_SHIFT, 4, 0),
+ ARM64_FTR_END,
+};
+
+static const struct arm64_ftr_bits ftr_id_isar6[] = {
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR6_EL1_I8MM_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR6_EL1_BF16_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR6_EL1_SPECRES_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR6_EL1_SB_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR6_EL1_FHM_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR6_EL1_DP_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR6_EL1_JSCVT_SHIFT, 4, 0),
ARM64_FTR_END,
};
static const struct arm64_ftr_bits ftr_id_pfr0[] = {
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 12, 4, 0), /* State3 */
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 8, 4, 0), /* State2 */
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 4, 4, 0), /* State1 */
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0), /* State0 */
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR0_EL1_DIT_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_PFR0_EL1_CSV2_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR0_EL1_State3_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR0_EL1_State2_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR0_EL1_State1_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR0_EL1_State0_SHIFT, 4, 0),
+ ARM64_FTR_END,
+};
+
+static const struct arm64_ftr_bits ftr_id_pfr1[] = {
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR1_EL1_GIC_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR1_EL1_Virt_frac_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR1_EL1_Sec_frac_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR1_EL1_GenTimer_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR1_EL1_Virtualization_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR1_EL1_MProgMod_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR1_EL1_Security_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR1_EL1_ProgMod_SHIFT, 4, 0),
+ ARM64_FTR_END,
+};
+
+static const struct arm64_ftr_bits ftr_id_pfr2[] = {
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_PFR2_EL1_SSBS_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_PFR2_EL1_CSV3_SHIFT, 4, 0),
ARM64_FTR_END,
};
static const struct arm64_ftr_bits ftr_id_dfr0[] = {
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 28, 4, 0),
- S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 24, 4, 0xf), /* PerfMon */
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 20, 4, 0),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 16, 4, 0),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 12, 4, 0),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 8, 4, 0),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 4, 4, 0),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0),
+ /* [31:28] TraceFilt */
+ S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_EXACT, ID_DFR0_EL1_PerfMon_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_DFR0_EL1_MProfDbg_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_DFR0_EL1_MMapTrc_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_DFR0_EL1_CopTrc_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_DFR0_EL1_MMapDbg_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_DFR0_EL1_CopSDbg_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_DFR0_EL1_CopDbg_SHIFT, 4, 0),
ARM64_FTR_END,
};
-static const struct arm64_ftr_bits ftr_zcr[] = {
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE,
- ZCR_ELx_LEN_SHIFT, ZCR_ELx_LEN_SIZE, 0), /* LEN */
+static const struct arm64_ftr_bits ftr_id_dfr1[] = {
+ S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_DFR1_EL1_MTPMU_SHIFT, 4, 0),
ARM64_FTR_END,
};
@@ -343,7 +617,7 @@ static const struct arm64_ftr_bits ftr_zcr[] = {
* Common ftr bits for a 32bit register with all hidden, strict
* attributes, with 4bit feature fields and a default safe value of
* 0. Covers the following 32bit registers:
- * id_isar[0-4], id_mmfr[1-3], id_pfr1, mvfr[0-1]
+ * id_isar[1-3], id_mmfr[1-3]
*/
static const struct arm64_ftr_bits ftr_generic_32bits[] = {
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 28, 4, 0),
@@ -367,13 +641,30 @@ static const struct arm64_ftr_bits ftr_raz[] = {
ARM64_FTR_END,
};
-#define ARM64_FTR_REG(id, table) { \
- .sys_id = id, \
- .reg = &(struct arm64_ftr_reg){ \
- .name = #id, \
- .ftr_bits = &((table)[0]), \
+#define __ARM64_FTR_REG_OVERRIDE(id_str, id, table, ovr) { \
+ .sys_id = id, \
+ .reg = &(struct arm64_ftr_reg){ \
+ .name = id_str, \
+ .override = (ovr), \
+ .ftr_bits = &((table)[0]), \
}}
+#define ARM64_FTR_REG_OVERRIDE(id, table, ovr) \
+ __ARM64_FTR_REG_OVERRIDE(#id, id, table, ovr)
+
+#define ARM64_FTR_REG(id, table) \
+ __ARM64_FTR_REG_OVERRIDE(#id, id, table, &no_override)
+
+struct arm64_ftr_override __ro_after_init id_aa64mmfr1_override;
+struct arm64_ftr_override __ro_after_init id_aa64pfr0_override;
+struct arm64_ftr_override __ro_after_init id_aa64pfr1_override;
+struct arm64_ftr_override __ro_after_init id_aa64zfr0_override;
+struct arm64_ftr_override __ro_after_init id_aa64smfr0_override;
+struct arm64_ftr_override __ro_after_init id_aa64isar1_override;
+struct arm64_ftr_override __ro_after_init id_aa64isar2_override;
+
+struct arm64_ftr_override arm64_sw_feature_override;
+
static const struct __ftr_reg_entry {
u32 sys_id;
struct arm64_ftr_reg *reg;
@@ -381,7 +672,7 @@ static const struct __ftr_reg_entry {
/* Op1 = 0, CRn = 0, CRm = 1 */
ARM64_FTR_REG(SYS_ID_PFR0_EL1, ftr_id_pfr0),
- ARM64_FTR_REG(SYS_ID_PFR1_EL1, ftr_generic_32bits),
+ ARM64_FTR_REG(SYS_ID_PFR1_EL1, ftr_id_pfr1),
ARM64_FTR_REG(SYS_ID_DFR0_EL1, ftr_id_dfr0),
ARM64_FTR_REG(SYS_ID_MMFR0_EL1, ftr_id_mmfr0),
ARM64_FTR_REG(SYS_ID_MMFR1_EL1, ftr_generic_32bits),
@@ -389,23 +680,32 @@ static const struct __ftr_reg_entry {
ARM64_FTR_REG(SYS_ID_MMFR3_EL1, ftr_generic_32bits),
/* Op1 = 0, CRn = 0, CRm = 2 */
- ARM64_FTR_REG(SYS_ID_ISAR0_EL1, ftr_generic_32bits),
+ ARM64_FTR_REG(SYS_ID_ISAR0_EL1, ftr_id_isar0),
ARM64_FTR_REG(SYS_ID_ISAR1_EL1, ftr_generic_32bits),
ARM64_FTR_REG(SYS_ID_ISAR2_EL1, ftr_generic_32bits),
ARM64_FTR_REG(SYS_ID_ISAR3_EL1, ftr_generic_32bits),
- ARM64_FTR_REG(SYS_ID_ISAR4_EL1, ftr_generic_32bits),
+ ARM64_FTR_REG(SYS_ID_ISAR4_EL1, ftr_id_isar4),
ARM64_FTR_REG(SYS_ID_ISAR5_EL1, ftr_id_isar5),
ARM64_FTR_REG(SYS_ID_MMFR4_EL1, ftr_id_mmfr4),
+ ARM64_FTR_REG(SYS_ID_ISAR6_EL1, ftr_id_isar6),
/* Op1 = 0, CRn = 0, CRm = 3 */
- ARM64_FTR_REG(SYS_MVFR0_EL1, ftr_generic_32bits),
- ARM64_FTR_REG(SYS_MVFR1_EL1, ftr_generic_32bits),
+ ARM64_FTR_REG(SYS_MVFR0_EL1, ftr_mvfr0),
+ ARM64_FTR_REG(SYS_MVFR1_EL1, ftr_mvfr1),
ARM64_FTR_REG(SYS_MVFR2_EL1, ftr_mvfr2),
+ ARM64_FTR_REG(SYS_ID_PFR2_EL1, ftr_id_pfr2),
+ ARM64_FTR_REG(SYS_ID_DFR1_EL1, ftr_id_dfr1),
+ ARM64_FTR_REG(SYS_ID_MMFR5_EL1, ftr_id_mmfr5),
/* Op1 = 0, CRn = 0, CRm = 4 */
- ARM64_FTR_REG(SYS_ID_AA64PFR0_EL1, ftr_id_aa64pfr0),
- ARM64_FTR_REG(SYS_ID_AA64PFR1_EL1, ftr_id_aa64pfr1),
- ARM64_FTR_REG(SYS_ID_AA64ZFR0_EL1, ftr_id_aa64zfr0),
+ ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64PFR0_EL1, ftr_id_aa64pfr0,
+ &id_aa64pfr0_override),
+ ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64PFR1_EL1, ftr_id_aa64pfr1,
+ &id_aa64pfr1_override),
+ ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64ZFR0_EL1, ftr_id_aa64zfr0,
+ &id_aa64zfr0_override),
+ ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64SMFR0_EL1, ftr_id_aa64smfr0,
+ &id_aa64smfr0_override),
/* Op1 = 0, CRn = 0, CRm = 5 */
ARM64_FTR_REG(SYS_ID_AA64DFR0_EL1, ftr_id_aa64dfr0),
@@ -413,15 +713,20 @@ static const struct __ftr_reg_entry {
/* Op1 = 0, CRn = 0, CRm = 6 */
ARM64_FTR_REG(SYS_ID_AA64ISAR0_EL1, ftr_id_aa64isar0),
- ARM64_FTR_REG(SYS_ID_AA64ISAR1_EL1, ftr_id_aa64isar1),
+ ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64ISAR1_EL1, ftr_id_aa64isar1,
+ &id_aa64isar1_override),
+ ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64ISAR2_EL1, ftr_id_aa64isar2,
+ &id_aa64isar2_override),
/* Op1 = 0, CRn = 0, CRm = 7 */
ARM64_FTR_REG(SYS_ID_AA64MMFR0_EL1, ftr_id_aa64mmfr0),
- ARM64_FTR_REG(SYS_ID_AA64MMFR1_EL1, ftr_id_aa64mmfr1),
+ ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64MMFR1_EL1, ftr_id_aa64mmfr1,
+ &id_aa64mmfr1_override),
ARM64_FTR_REG(SYS_ID_AA64MMFR2_EL1, ftr_id_aa64mmfr2),
+ ARM64_FTR_REG(SYS_ID_AA64MMFR3_EL1, ftr_id_aa64mmfr3),
- /* Op1 = 0, CRn = 1, CRm = 2 */
- ARM64_FTR_REG(SYS_ZCR_EL1, ftr_zcr),
+ /* Op1 = 1, CRn = 0, CRm = 0 */
+ ARM64_FTR_REG(SYS_GMID_EL1, ftr_gmid),
/* Op1 = 3, CRn = 0, CRm = 0 */
{ SYS_CTR_EL0, &arm64_ftr_reg_ctrel0 },
@@ -437,16 +742,16 @@ static int search_cmp_ftr_reg(const void *id, const void *regp)
}
/*
- * get_arm64_ftr_reg - Lookup a feature register entry using its
- * sys_reg() encoding. With the array arm64_ftr_regs sorted in the
- * ascending order of sys_id , we use binary search to find a matching
+ * get_arm64_ftr_reg_nowarn - Looks up a feature register entry using
+ * its sys_reg() encoding. With the array arm64_ftr_regs sorted in the
+ * ascending order of sys_id, we use binary search to find a matching
* entry.
*
* returns - Upon success, matching ftr_reg entry for id.
* - NULL on failure. It is upto the caller to decide
* the impact of a failure.
*/
-static struct arm64_ftr_reg *get_arm64_ftr_reg(u32 sys_id)
+static struct arm64_ftr_reg *get_arm64_ftr_reg_nowarn(u32 sys_id)
{
const struct __ftr_reg_entry *ret;
@@ -460,6 +765,27 @@ static struct arm64_ftr_reg *get_arm64_ftr_reg(u32 sys_id)
return NULL;
}
+/*
+ * get_arm64_ftr_reg - Looks up a feature register entry using
+ * its sys_reg() encoding. This calls get_arm64_ftr_reg_nowarn().
+ *
+ * returns - Upon success, matching ftr_reg entry for id.
+ * - NULL on failure but with an WARN_ON().
+ */
+struct arm64_ftr_reg *get_arm64_ftr_reg(u32 sys_id)
+{
+ struct arm64_ftr_reg *reg;
+
+ reg = get_arm64_ftr_reg_nowarn(sys_id);
+
+ /*
+ * Requesting a non-existent register search is an error. Warn
+ * and let the caller handle it.
+ */
+ WARN_ON(!reg);
+ return reg;
+}
+
static u64 arm64_ftr_set_value(const struct arm64_ftr_bits *ftrp, s64 reg,
s64 ftr_val)
{
@@ -470,7 +796,7 @@ static u64 arm64_ftr_set_value(const struct arm64_ftr_bits *ftrp, s64 reg,
return reg;
}
-static s64 arm64_ftr_safe_value(const struct arm64_ftr_bits *ftrp, s64 new,
+s64 arm64_ftr_safe_value(const struct arm64_ftr_bits *ftrp, s64 new,
s64 cur)
{
s64 ret = 0;
@@ -480,14 +806,14 @@ static s64 arm64_ftr_safe_value(const struct arm64_ftr_bits *ftrp, s64 new,
ret = ftrp->safe_val;
break;
case FTR_LOWER_SAFE:
- ret = new < cur ? new : cur;
+ ret = min(new, cur);
break;
case FTR_HIGHER_OR_ZERO_SAFE:
if (!cur || !new)
break;
- /* Fallthrough */
+ fallthrough;
case FTR_HIGHER_SAFE:
- ret = new > cur ? new : cur;
+ ret = max(new, cur);
break;
default:
BUG();
@@ -498,11 +824,52 @@ static s64 arm64_ftr_safe_value(const struct arm64_ftr_bits *ftrp, s64 new,
static void __init sort_ftr_regs(void)
{
- int i;
+ unsigned int i;
+
+ for (i = 0; i < ARRAY_SIZE(arm64_ftr_regs); i++) {
+ const struct arm64_ftr_reg *ftr_reg = arm64_ftr_regs[i].reg;
+ const struct arm64_ftr_bits *ftr_bits = ftr_reg->ftr_bits;
+ unsigned int j = 0;
+
+ /*
+ * Features here must be sorted in descending order with respect
+ * to their shift values and should not overlap with each other.
+ */
+ for (; ftr_bits->width != 0; ftr_bits++, j++) {
+ unsigned int width = ftr_reg->ftr_bits[j].width;
+ unsigned int shift = ftr_reg->ftr_bits[j].shift;
+ unsigned int prev_shift;
+
+ WARN((shift + width) > 64,
+ "%s has invalid feature at shift %d\n",
+ ftr_reg->name, shift);
+
+ /*
+ * Skip the first feature. There is nothing to
+ * compare against for now.
+ */
+ if (j == 0)
+ continue;
- /* Check that the array is sorted so that we can do the binary search */
- for (i = 1; i < ARRAY_SIZE(arm64_ftr_regs); i++)
- BUG_ON(arm64_ftr_regs[i].sys_id < arm64_ftr_regs[i - 1].sys_id);
+ prev_shift = ftr_reg->ftr_bits[j - 1].shift;
+ WARN((shift + width) > prev_shift,
+ "%s has feature overlap at shift %d\n",
+ ftr_reg->name, shift);
+ }
+
+ /*
+ * Skip the first register. There is nothing to
+ * compare against for now.
+ */
+ if (i == 0)
+ continue;
+ /*
+ * Registers here must be sorted in ascending order with respect
+ * to sys_id for subsequent binary search in get_arm64_ftr_reg()
+ * to work correctly.
+ */
+ BUG_ON(arm64_ftr_regs[i].sys_id <= arm64_ftr_regs[i - 1].sys_id);
+ }
}
/*
@@ -511,7 +878,7 @@ static void __init sort_ftr_regs(void)
* Any bits that are not covered by an arm64_ftr_bits entry are considered
* RES0 for the system-wide value, and must strictly match.
*/
-static void __init init_cpu_ftr_reg(u32 sys_reg, u64 new)
+static void init_cpu_ftr_reg(u32 sys_reg, u64 new)
{
u64 val = 0;
u64 strict_mask = ~0x0ULL;
@@ -521,11 +888,45 @@ static void __init init_cpu_ftr_reg(u32 sys_reg, u64 new)
const struct arm64_ftr_bits *ftrp;
struct arm64_ftr_reg *reg = get_arm64_ftr_reg(sys_reg);
- BUG_ON(!reg);
+ if (!reg)
+ return;
- for (ftrp = reg->ftr_bits; ftrp->width; ftrp++) {
+ for (ftrp = reg->ftr_bits; ftrp->width; ftrp++) {
u64 ftr_mask = arm64_ftr_mask(ftrp);
s64 ftr_new = arm64_ftr_value(ftrp, new);
+ s64 ftr_ovr = arm64_ftr_value(ftrp, reg->override->val);
+
+ if ((ftr_mask & reg->override->mask) == ftr_mask) {
+ s64 tmp = arm64_ftr_safe_value(ftrp, ftr_ovr, ftr_new);
+ char *str = NULL;
+
+ if (ftr_ovr != tmp) {
+ /* Unsafe, remove the override */
+ reg->override->mask &= ~ftr_mask;
+ reg->override->val &= ~ftr_mask;
+ tmp = ftr_ovr;
+ str = "ignoring override";
+ } else if (ftr_new != tmp) {
+ /* Override was valid */
+ ftr_new = tmp;
+ str = "forced";
+ } else if (ftr_ovr == tmp) {
+ /* Override was the safe value */
+ str = "already set";
+ }
+
+ if (str)
+ pr_warn("%s[%d:%d]: %s to %llx\n",
+ reg->name,
+ ftrp->shift + ftrp->width - 1,
+ ftrp->shift, str, tmp);
+ } else if ((ftr_mask & reg->override->val) == ftr_mask) {
+ reg->override->val &= ~ftr_mask;
+ pr_warn("%s[%d:%d]: impossible override, ignored\n",
+ reg->name,
+ ftrp->shift + ftrp->width - 1,
+ ftrp->shift);
+ }
val = arm64_ftr_set_value(ftrp, val, ftr_new);
@@ -551,28 +952,84 @@ extern const struct arm64_cpu_capabilities arm64_errata[];
static const struct arm64_cpu_capabilities arm64_features[];
static void __init
-init_cpu_hwcaps_indirect_list_from_array(const struct arm64_cpu_capabilities *caps)
+init_cpucap_indirect_list_from_array(const struct arm64_cpu_capabilities *caps)
{
for (; caps->matches; caps++) {
if (WARN(caps->capability >= ARM64_NCAPS,
"Invalid capability %d\n", caps->capability))
continue;
- if (WARN(cpu_hwcaps_ptrs[caps->capability],
+ if (WARN(cpucap_ptrs[caps->capability],
"Duplicate entry for capability %d\n",
caps->capability))
continue;
- cpu_hwcaps_ptrs[caps->capability] = caps;
+ cpucap_ptrs[caps->capability] = caps;
}
}
-static void __init init_cpu_hwcaps_indirect_list(void)
+static void __init init_cpucap_indirect_list(void)
{
- init_cpu_hwcaps_indirect_list_from_array(arm64_features);
- init_cpu_hwcaps_indirect_list_from_array(arm64_errata);
+ init_cpucap_indirect_list_from_array(arm64_features);
+ init_cpucap_indirect_list_from_array(arm64_errata);
}
static void __init setup_boot_cpu_capabilities(void);
+static void init_32bit_cpu_features(struct cpuinfo_32bit *info)
+{
+ init_cpu_ftr_reg(SYS_ID_DFR0_EL1, info->reg_id_dfr0);
+ init_cpu_ftr_reg(SYS_ID_DFR1_EL1, info->reg_id_dfr1);
+ init_cpu_ftr_reg(SYS_ID_ISAR0_EL1, info->reg_id_isar0);
+ init_cpu_ftr_reg(SYS_ID_ISAR1_EL1, info->reg_id_isar1);
+ init_cpu_ftr_reg(SYS_ID_ISAR2_EL1, info->reg_id_isar2);
+ init_cpu_ftr_reg(SYS_ID_ISAR3_EL1, info->reg_id_isar3);
+ init_cpu_ftr_reg(SYS_ID_ISAR4_EL1, info->reg_id_isar4);
+ init_cpu_ftr_reg(SYS_ID_ISAR5_EL1, info->reg_id_isar5);
+ init_cpu_ftr_reg(SYS_ID_ISAR6_EL1, info->reg_id_isar6);
+ init_cpu_ftr_reg(SYS_ID_MMFR0_EL1, info->reg_id_mmfr0);
+ init_cpu_ftr_reg(SYS_ID_MMFR1_EL1, info->reg_id_mmfr1);
+ init_cpu_ftr_reg(SYS_ID_MMFR2_EL1, info->reg_id_mmfr2);
+ init_cpu_ftr_reg(SYS_ID_MMFR3_EL1, info->reg_id_mmfr3);
+ init_cpu_ftr_reg(SYS_ID_MMFR4_EL1, info->reg_id_mmfr4);
+ init_cpu_ftr_reg(SYS_ID_MMFR5_EL1, info->reg_id_mmfr5);
+ init_cpu_ftr_reg(SYS_ID_PFR0_EL1, info->reg_id_pfr0);
+ init_cpu_ftr_reg(SYS_ID_PFR1_EL1, info->reg_id_pfr1);
+ init_cpu_ftr_reg(SYS_ID_PFR2_EL1, info->reg_id_pfr2);
+ init_cpu_ftr_reg(SYS_MVFR0_EL1, info->reg_mvfr0);
+ init_cpu_ftr_reg(SYS_MVFR1_EL1, info->reg_mvfr1);
+ init_cpu_ftr_reg(SYS_MVFR2_EL1, info->reg_mvfr2);
+}
+
+#ifdef CONFIG_ARM64_PSEUDO_NMI
+static bool enable_pseudo_nmi;
+
+static int __init early_enable_pseudo_nmi(char *p)
+{
+ return kstrtobool(p, &enable_pseudo_nmi);
+}
+early_param("irqchip.gicv3_pseudo_nmi", early_enable_pseudo_nmi);
+
+static __init void detect_system_supports_pseudo_nmi(void)
+{
+ struct device_node *np;
+
+ if (!enable_pseudo_nmi)
+ return;
+
+ /*
+ * Detect broken MediaTek firmware that doesn't properly save and
+ * restore GIC priorities.
+ */
+ np = of_find_compatible_node(NULL, NULL, "arm,gic-v3");
+ if (np && of_property_read_bool(np, "mediatek,broken-save-restore-fw")) {
+ pr_info("Pseudo-NMI disabled due to MediaTek Chromebook GICR save problem\n");
+ enable_pseudo_nmi = false;
+ }
+ of_node_put(np);
+}
+#else /* CONFIG_ARM64_PSEUDO_NMI */
+static inline void detect_system_supports_pseudo_nmi(void) { }
+#endif
+
void __init init_cpu_features(struct cpuinfo_arm64 *info)
{
/* Before we start using the tables, make sure it is sorted */
@@ -585,48 +1042,45 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info)
init_cpu_ftr_reg(SYS_ID_AA64DFR1_EL1, info->reg_id_aa64dfr1);
init_cpu_ftr_reg(SYS_ID_AA64ISAR0_EL1, info->reg_id_aa64isar0);
init_cpu_ftr_reg(SYS_ID_AA64ISAR1_EL1, info->reg_id_aa64isar1);
+ init_cpu_ftr_reg(SYS_ID_AA64ISAR2_EL1, info->reg_id_aa64isar2);
init_cpu_ftr_reg(SYS_ID_AA64MMFR0_EL1, info->reg_id_aa64mmfr0);
init_cpu_ftr_reg(SYS_ID_AA64MMFR1_EL1, info->reg_id_aa64mmfr1);
init_cpu_ftr_reg(SYS_ID_AA64MMFR2_EL1, info->reg_id_aa64mmfr2);
+ init_cpu_ftr_reg(SYS_ID_AA64MMFR3_EL1, info->reg_id_aa64mmfr3);
init_cpu_ftr_reg(SYS_ID_AA64PFR0_EL1, info->reg_id_aa64pfr0);
init_cpu_ftr_reg(SYS_ID_AA64PFR1_EL1, info->reg_id_aa64pfr1);
init_cpu_ftr_reg(SYS_ID_AA64ZFR0_EL1, info->reg_id_aa64zfr0);
+ init_cpu_ftr_reg(SYS_ID_AA64SMFR0_EL1, info->reg_id_aa64smfr0);
- if (id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0)) {
- init_cpu_ftr_reg(SYS_ID_DFR0_EL1, info->reg_id_dfr0);
- init_cpu_ftr_reg(SYS_ID_ISAR0_EL1, info->reg_id_isar0);
- init_cpu_ftr_reg(SYS_ID_ISAR1_EL1, info->reg_id_isar1);
- init_cpu_ftr_reg(SYS_ID_ISAR2_EL1, info->reg_id_isar2);
- init_cpu_ftr_reg(SYS_ID_ISAR3_EL1, info->reg_id_isar3);
- init_cpu_ftr_reg(SYS_ID_ISAR4_EL1, info->reg_id_isar4);
- init_cpu_ftr_reg(SYS_ID_ISAR5_EL1, info->reg_id_isar5);
- init_cpu_ftr_reg(SYS_ID_MMFR0_EL1, info->reg_id_mmfr0);
- init_cpu_ftr_reg(SYS_ID_MMFR1_EL1, info->reg_id_mmfr1);
- init_cpu_ftr_reg(SYS_ID_MMFR2_EL1, info->reg_id_mmfr2);
- init_cpu_ftr_reg(SYS_ID_MMFR3_EL1, info->reg_id_mmfr3);
- init_cpu_ftr_reg(SYS_ID_PFR0_EL1, info->reg_id_pfr0);
- init_cpu_ftr_reg(SYS_ID_PFR1_EL1, info->reg_id_pfr1);
- init_cpu_ftr_reg(SYS_MVFR0_EL1, info->reg_mvfr0);
- init_cpu_ftr_reg(SYS_MVFR1_EL1, info->reg_mvfr1);
- init_cpu_ftr_reg(SYS_MVFR2_EL1, info->reg_mvfr2);
- }
+ if (id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0))
+ init_32bit_cpu_features(&info->aarch32);
+
+ if (IS_ENABLED(CONFIG_ARM64_SVE) &&
+ id_aa64pfr0_sve(read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1))) {
+ unsigned long cpacr = cpacr_save_enable_kernel_sve();
- if (id_aa64pfr0_sve(info->reg_id_aa64pfr0)) {
- init_cpu_ftr_reg(SYS_ZCR_EL1, info->reg_zcr);
- sve_init_vq_map();
+ vec_init_vq_map(ARM64_VEC_SVE);
+
+ cpacr_restore(cpacr);
}
- /*
- * Initialize the indirect array of CPU hwcaps capabilities pointers
- * before we handle the boot CPU below.
- */
- init_cpu_hwcaps_indirect_list();
+ if (IS_ENABLED(CONFIG_ARM64_SME) &&
+ id_aa64pfr1_sme(read_sanitised_ftr_reg(SYS_ID_AA64PFR1_EL1))) {
+ unsigned long cpacr = cpacr_save_enable_kernel_sme();
- /*
- * Detect and enable early CPU capabilities based on the boot CPU,
- * after we have initialised the CPU feature infrastructure.
- */
- setup_boot_cpu_capabilities();
+ /*
+ * We mask out SMPS since even if the hardware
+ * supports priorities the kernel does not at present
+ * and we block access to them.
+ */
+ info->reg_smidr = read_cpuid(SMIDR_EL1) & ~SMIDR_EL1_SMPS;
+ vec_init_vq_map(ARM64_VEC_SME);
+
+ cpacr_restore(cpacr);
+ }
+
+ if (id_aa64pfr1_mte(info->reg_id_aa64pfr1))
+ init_cpu_ftr_reg(SYS_GMID_EL1, info->reg_gmid);
}
static void update_cpu_ftr_reg(struct arm64_ftr_reg *reg, u64 new)
@@ -650,7 +1104,9 @@ static int check_update_ftr_reg(u32 sys_id, int cpu, u64 val, u64 boot)
{
struct arm64_ftr_reg *regp = get_arm64_ftr_reg(sys_id);
- BUG_ON(!regp);
+ if (!regp)
+ return 0;
+
update_cpu_ftr_reg(regp, val);
if ((boot & regp->strict_mask) == (val & regp->strict_mask))
return 0;
@@ -659,6 +1115,112 @@ static int check_update_ftr_reg(u32 sys_id, int cpu, u64 val, u64 boot)
return 1;
}
+static void relax_cpu_ftr_reg(u32 sys_id, int field)
+{
+ const struct arm64_ftr_bits *ftrp;
+ struct arm64_ftr_reg *regp = get_arm64_ftr_reg(sys_id);
+
+ if (!regp)
+ return;
+
+ for (ftrp = regp->ftr_bits; ftrp->width; ftrp++) {
+ if (ftrp->shift == field) {
+ regp->strict_mask &= ~arm64_ftr_mask(ftrp);
+ break;
+ }
+ }
+
+ /* Bogus field? */
+ WARN_ON(!ftrp->width);
+}
+
+static void lazy_init_32bit_cpu_features(struct cpuinfo_arm64 *info,
+ struct cpuinfo_arm64 *boot)
+{
+ static bool boot_cpu_32bit_regs_overridden = false;
+
+ if (!allow_mismatched_32bit_el0 || boot_cpu_32bit_regs_overridden)
+ return;
+
+ if (id_aa64pfr0_32bit_el0(boot->reg_id_aa64pfr0))
+ return;
+
+ boot->aarch32 = info->aarch32;
+ init_32bit_cpu_features(&boot->aarch32);
+ boot_cpu_32bit_regs_overridden = true;
+}
+
+static int update_32bit_cpu_features(int cpu, struct cpuinfo_32bit *info,
+ struct cpuinfo_32bit *boot)
+{
+ int taint = 0;
+ u64 pfr0 = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1);
+
+ /*
+ * If we don't have AArch32 at EL1, then relax the strictness of
+ * EL1-dependent register fields to avoid spurious sanity check fails.
+ */
+ if (!id_aa64pfr0_32bit_el1(pfr0)) {
+ relax_cpu_ftr_reg(SYS_ID_ISAR4_EL1, ID_ISAR4_EL1_SMC_SHIFT);
+ relax_cpu_ftr_reg(SYS_ID_PFR1_EL1, ID_PFR1_EL1_Virt_frac_SHIFT);
+ relax_cpu_ftr_reg(SYS_ID_PFR1_EL1, ID_PFR1_EL1_Sec_frac_SHIFT);
+ relax_cpu_ftr_reg(SYS_ID_PFR1_EL1, ID_PFR1_EL1_Virtualization_SHIFT);
+ relax_cpu_ftr_reg(SYS_ID_PFR1_EL1, ID_PFR1_EL1_Security_SHIFT);
+ relax_cpu_ftr_reg(SYS_ID_PFR1_EL1, ID_PFR1_EL1_ProgMod_SHIFT);
+ }
+
+ taint |= check_update_ftr_reg(SYS_ID_DFR0_EL1, cpu,
+ info->reg_id_dfr0, boot->reg_id_dfr0);
+ taint |= check_update_ftr_reg(SYS_ID_DFR1_EL1, cpu,
+ info->reg_id_dfr1, boot->reg_id_dfr1);
+ taint |= check_update_ftr_reg(SYS_ID_ISAR0_EL1, cpu,
+ info->reg_id_isar0, boot->reg_id_isar0);
+ taint |= check_update_ftr_reg(SYS_ID_ISAR1_EL1, cpu,
+ info->reg_id_isar1, boot->reg_id_isar1);
+ taint |= check_update_ftr_reg(SYS_ID_ISAR2_EL1, cpu,
+ info->reg_id_isar2, boot->reg_id_isar2);
+ taint |= check_update_ftr_reg(SYS_ID_ISAR3_EL1, cpu,
+ info->reg_id_isar3, boot->reg_id_isar3);
+ taint |= check_update_ftr_reg(SYS_ID_ISAR4_EL1, cpu,
+ info->reg_id_isar4, boot->reg_id_isar4);
+ taint |= check_update_ftr_reg(SYS_ID_ISAR5_EL1, cpu,
+ info->reg_id_isar5, boot->reg_id_isar5);
+ taint |= check_update_ftr_reg(SYS_ID_ISAR6_EL1, cpu,
+ info->reg_id_isar6, boot->reg_id_isar6);
+
+ /*
+ * Regardless of the value of the AuxReg field, the AIFSR, ADFSR, and
+ * ACTLR formats could differ across CPUs and therefore would have to
+ * be trapped for virtualization anyway.
+ */
+ taint |= check_update_ftr_reg(SYS_ID_MMFR0_EL1, cpu,
+ info->reg_id_mmfr0, boot->reg_id_mmfr0);
+ taint |= check_update_ftr_reg(SYS_ID_MMFR1_EL1, cpu,
+ info->reg_id_mmfr1, boot->reg_id_mmfr1);
+ taint |= check_update_ftr_reg(SYS_ID_MMFR2_EL1, cpu,
+ info->reg_id_mmfr2, boot->reg_id_mmfr2);
+ taint |= check_update_ftr_reg(SYS_ID_MMFR3_EL1, cpu,
+ info->reg_id_mmfr3, boot->reg_id_mmfr3);
+ taint |= check_update_ftr_reg(SYS_ID_MMFR4_EL1, cpu,
+ info->reg_id_mmfr4, boot->reg_id_mmfr4);
+ taint |= check_update_ftr_reg(SYS_ID_MMFR5_EL1, cpu,
+ info->reg_id_mmfr5, boot->reg_id_mmfr5);
+ taint |= check_update_ftr_reg(SYS_ID_PFR0_EL1, cpu,
+ info->reg_id_pfr0, boot->reg_id_pfr0);
+ taint |= check_update_ftr_reg(SYS_ID_PFR1_EL1, cpu,
+ info->reg_id_pfr1, boot->reg_id_pfr1);
+ taint |= check_update_ftr_reg(SYS_ID_PFR2_EL1, cpu,
+ info->reg_id_pfr2, boot->reg_id_pfr2);
+ taint |= check_update_ftr_reg(SYS_MVFR0_EL1, cpu,
+ info->reg_mvfr0, boot->reg_mvfr0);
+ taint |= check_update_ftr_reg(SYS_MVFR1_EL1, cpu,
+ info->reg_mvfr1, boot->reg_mvfr1);
+ taint |= check_update_ftr_reg(SYS_MVFR2_EL1, cpu,
+ info->reg_mvfr2, boot->reg_mvfr2);
+
+ return taint;
+}
+
/*
* Update system wide CPU feature registers with the values from a
* non-boot CPU. Also performs SANITY checks to make sure that there
@@ -708,6 +1270,8 @@ void update_cpu_features(int cpu,
info->reg_id_aa64isar0, boot->reg_id_aa64isar0);
taint |= check_update_ftr_reg(SYS_ID_AA64ISAR1_EL1, cpu,
info->reg_id_aa64isar1, boot->reg_id_aa64isar1);
+ taint |= check_update_ftr_reg(SYS_ID_AA64ISAR2_EL1, cpu,
+ info->reg_id_aa64isar2, boot->reg_id_aa64isar2);
/*
* Differing PARange support is fine as long as all peripherals and
@@ -720,10 +1284,9 @@ void update_cpu_features(int cpu,
info->reg_id_aa64mmfr1, boot->reg_id_aa64mmfr1);
taint |= check_update_ftr_reg(SYS_ID_AA64MMFR2_EL1, cpu,
info->reg_id_aa64mmfr2, boot->reg_id_aa64mmfr2);
+ taint |= check_update_ftr_reg(SYS_ID_AA64MMFR3_EL1, cpu,
+ info->reg_id_aa64mmfr3, boot->reg_id_aa64mmfr3);
- /*
- * EL3 is not our concern.
- */
taint |= check_update_ftr_reg(SYS_ID_AA64PFR0_EL1, cpu,
info->reg_id_aa64pfr0, boot->reg_id_aa64pfr0);
taint |= check_update_ftr_reg(SYS_ID_AA64PFR1_EL1, cpu,
@@ -732,61 +1295,62 @@ void update_cpu_features(int cpu,
taint |= check_update_ftr_reg(SYS_ID_AA64ZFR0_EL1, cpu,
info->reg_id_aa64zfr0, boot->reg_id_aa64zfr0);
- /*
- * If we have AArch32, we care about 32-bit features for compat.
- * If the system doesn't support AArch32, don't update them.
- */
- if (id_aa64pfr0_32bit_el0(read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1)) &&
- id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0)) {
-
- taint |= check_update_ftr_reg(SYS_ID_DFR0_EL1, cpu,
- info->reg_id_dfr0, boot->reg_id_dfr0);
- taint |= check_update_ftr_reg(SYS_ID_ISAR0_EL1, cpu,
- info->reg_id_isar0, boot->reg_id_isar0);
- taint |= check_update_ftr_reg(SYS_ID_ISAR1_EL1, cpu,
- info->reg_id_isar1, boot->reg_id_isar1);
- taint |= check_update_ftr_reg(SYS_ID_ISAR2_EL1, cpu,
- info->reg_id_isar2, boot->reg_id_isar2);
- taint |= check_update_ftr_reg(SYS_ID_ISAR3_EL1, cpu,
- info->reg_id_isar3, boot->reg_id_isar3);
- taint |= check_update_ftr_reg(SYS_ID_ISAR4_EL1, cpu,
- info->reg_id_isar4, boot->reg_id_isar4);
- taint |= check_update_ftr_reg(SYS_ID_ISAR5_EL1, cpu,
- info->reg_id_isar5, boot->reg_id_isar5);
+ taint |= check_update_ftr_reg(SYS_ID_AA64SMFR0_EL1, cpu,
+ info->reg_id_aa64smfr0, boot->reg_id_aa64smfr0);
+
+ /* Probe vector lengths */
+ if (IS_ENABLED(CONFIG_ARM64_SVE) &&
+ id_aa64pfr0_sve(read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1))) {
+ if (!system_capabilities_finalized()) {
+ unsigned long cpacr = cpacr_save_enable_kernel_sve();
+
+ vec_update_vq_map(ARM64_VEC_SVE);
+
+ cpacr_restore(cpacr);
+ }
+ }
+
+ if (IS_ENABLED(CONFIG_ARM64_SME) &&
+ id_aa64pfr1_sme(read_sanitised_ftr_reg(SYS_ID_AA64PFR1_EL1))) {
+ unsigned long cpacr = cpacr_save_enable_kernel_sme();
/*
- * Regardless of the value of the AuxReg field, the AIFSR, ADFSR, and
- * ACTLR formats could differ across CPUs and therefore would have to
- * be trapped for virtualization anyway.
+ * We mask out SMPS since even if the hardware
+ * supports priorities the kernel does not at present
+ * and we block access to them.
*/
- taint |= check_update_ftr_reg(SYS_ID_MMFR0_EL1, cpu,
- info->reg_id_mmfr0, boot->reg_id_mmfr0);
- taint |= check_update_ftr_reg(SYS_ID_MMFR1_EL1, cpu,
- info->reg_id_mmfr1, boot->reg_id_mmfr1);
- taint |= check_update_ftr_reg(SYS_ID_MMFR2_EL1, cpu,
- info->reg_id_mmfr2, boot->reg_id_mmfr2);
- taint |= check_update_ftr_reg(SYS_ID_MMFR3_EL1, cpu,
- info->reg_id_mmfr3, boot->reg_id_mmfr3);
- taint |= check_update_ftr_reg(SYS_ID_PFR0_EL1, cpu,
- info->reg_id_pfr0, boot->reg_id_pfr0);
- taint |= check_update_ftr_reg(SYS_ID_PFR1_EL1, cpu,
- info->reg_id_pfr1, boot->reg_id_pfr1);
- taint |= check_update_ftr_reg(SYS_MVFR0_EL1, cpu,
- info->reg_mvfr0, boot->reg_mvfr0);
- taint |= check_update_ftr_reg(SYS_MVFR1_EL1, cpu,
- info->reg_mvfr1, boot->reg_mvfr1);
- taint |= check_update_ftr_reg(SYS_MVFR2_EL1, cpu,
- info->reg_mvfr2, boot->reg_mvfr2);
+ info->reg_smidr = read_cpuid(SMIDR_EL1) & ~SMIDR_EL1_SMPS;
+
+ /* Probe vector lengths */
+ if (!system_capabilities_finalized())
+ vec_update_vq_map(ARM64_VEC_SME);
+
+ cpacr_restore(cpacr);
}
- if (id_aa64pfr0_sve(info->reg_id_aa64pfr0)) {
- taint |= check_update_ftr_reg(SYS_ZCR_EL1, cpu,
- info->reg_zcr, boot->reg_zcr);
+ /*
+ * The kernel uses the LDGM/STGM instructions and the number of tags
+ * they read/write depends on the GMID_EL1.BS field. Check that the
+ * value is the same on all CPUs.
+ */
+ if (IS_ENABLED(CONFIG_ARM64_MTE) &&
+ id_aa64pfr1_mte(info->reg_id_aa64pfr1)) {
+ taint |= check_update_ftr_reg(SYS_GMID_EL1, cpu,
+ info->reg_gmid, boot->reg_gmid);
+ }
- /* Probe vector lengths, unless we already gave up on SVE */
- if (id_aa64pfr0_sve(read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1)) &&
- !sys_caps_initialised)
- sve_update_vq_map();
+ /*
+ * If we don't have AArch32 at all then skip the checks entirely
+ * as the register values may be UNKNOWN and we're not going to be
+ * using them for anything.
+ *
+ * This relies on a sanitised view of the AArch64 ID registers
+ * (e.g. SYS_ID_AA64PFR0_EL1), so we call it last.
+ */
+ if (id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0)) {
+ lazy_init_32bit_cpu_features(info, boot);
+ taint |= update_32bit_cpu_features(cpu, &info->aarch32,
+ &boot->aarch32);
}
/*
@@ -803,34 +1367,43 @@ u64 read_sanitised_ftr_reg(u32 id)
{
struct arm64_ftr_reg *regp = get_arm64_ftr_reg(id);
- /* We shouldn't get a request for an unsupported register */
- BUG_ON(!regp);
+ if (!regp)
+ return 0;
return regp->sys_val;
}
+EXPORT_SYMBOL_GPL(read_sanitised_ftr_reg);
#define read_sysreg_case(r) \
- case r: return read_sysreg_s(r)
+ case r: val = read_sysreg_s(r); break;
/*
* __read_sysreg_by_encoding() - Used by a STARTING cpu before cpuinfo is populated.
* Read the system register on the current CPU
*/
-static u64 __read_sysreg_by_encoding(u32 sys_id)
+u64 __read_sysreg_by_encoding(u32 sys_id)
{
+ struct arm64_ftr_reg *regp;
+ u64 val;
+
switch (sys_id) {
read_sysreg_case(SYS_ID_PFR0_EL1);
read_sysreg_case(SYS_ID_PFR1_EL1);
+ read_sysreg_case(SYS_ID_PFR2_EL1);
read_sysreg_case(SYS_ID_DFR0_EL1);
+ read_sysreg_case(SYS_ID_DFR1_EL1);
read_sysreg_case(SYS_ID_MMFR0_EL1);
read_sysreg_case(SYS_ID_MMFR1_EL1);
read_sysreg_case(SYS_ID_MMFR2_EL1);
read_sysreg_case(SYS_ID_MMFR3_EL1);
+ read_sysreg_case(SYS_ID_MMFR4_EL1);
+ read_sysreg_case(SYS_ID_MMFR5_EL1);
read_sysreg_case(SYS_ID_ISAR0_EL1);
read_sysreg_case(SYS_ID_ISAR1_EL1);
read_sysreg_case(SYS_ID_ISAR2_EL1);
read_sysreg_case(SYS_ID_ISAR3_EL1);
read_sysreg_case(SYS_ID_ISAR4_EL1);
read_sysreg_case(SYS_ID_ISAR5_EL1);
+ read_sysreg_case(SYS_ID_ISAR6_EL1);
read_sysreg_case(SYS_MVFR0_EL1);
read_sysreg_case(SYS_MVFR1_EL1);
read_sysreg_case(SYS_MVFR2_EL1);
@@ -838,13 +1411,16 @@ static u64 __read_sysreg_by_encoding(u32 sys_id)
read_sysreg_case(SYS_ID_AA64PFR0_EL1);
read_sysreg_case(SYS_ID_AA64PFR1_EL1);
read_sysreg_case(SYS_ID_AA64ZFR0_EL1);
+ read_sysreg_case(SYS_ID_AA64SMFR0_EL1);
read_sysreg_case(SYS_ID_AA64DFR0_EL1);
read_sysreg_case(SYS_ID_AA64DFR1_EL1);
read_sysreg_case(SYS_ID_AA64MMFR0_EL1);
read_sysreg_case(SYS_ID_AA64MMFR1_EL1);
read_sysreg_case(SYS_ID_AA64MMFR2_EL1);
+ read_sysreg_case(SYS_ID_AA64MMFR3_EL1);
read_sysreg_case(SYS_ID_AA64ISAR0_EL1);
read_sysreg_case(SYS_ID_AA64ISAR1_EL1);
+ read_sysreg_case(SYS_ID_AA64ISAR2_EL1);
read_sysreg_case(SYS_CNTFRQ_EL0);
read_sysreg_case(SYS_CTR_EL0);
@@ -854,32 +1430,126 @@ static u64 __read_sysreg_by_encoding(u32 sys_id)
BUG();
return 0;
}
+
+ regp = get_arm64_ftr_reg(sys_id);
+ if (regp) {
+ val &= ~regp->override->mask;
+ val |= (regp->override->val & regp->override->mask);
+ }
+
+ return val;
}
#include <linux/irqchip/arm-gic-v3.h>
static bool
+has_always(const struct arm64_cpu_capabilities *entry, int scope)
+{
+ return true;
+}
+
+static bool
feature_matches(u64 reg, const struct arm64_cpu_capabilities *entry)
{
- int val = cpuid_feature_extract_field(reg, entry->field_pos, entry->sign);
+ int val = cpuid_feature_extract_field_width(reg, entry->field_pos,
+ entry->field_width,
+ entry->sign);
return val >= entry->min_field_value;
}
-static bool
-has_cpuid_feature(const struct arm64_cpu_capabilities *entry, int scope)
+static u64
+read_scoped_sysreg(const struct arm64_cpu_capabilities *entry, int scope)
{
- u64 val;
-
WARN_ON(scope == SCOPE_LOCAL_CPU && preemptible());
if (scope == SCOPE_SYSTEM)
- val = read_sanitised_ftr_reg(entry->sys_reg);
+ return read_sanitised_ftr_reg(entry->sys_reg);
else
- val = __read_sysreg_by_encoding(entry->sys_reg);
+ return __read_sysreg_by_encoding(entry->sys_reg);
+}
+
+static bool
+has_user_cpuid_feature(const struct arm64_cpu_capabilities *entry, int scope)
+{
+ int mask;
+ struct arm64_ftr_reg *regp;
+ u64 val = read_scoped_sysreg(entry, scope);
+
+ regp = get_arm64_ftr_reg(entry->sys_reg);
+ if (!regp)
+ return false;
+
+ mask = cpuid_feature_extract_unsigned_field_width(regp->user_mask,
+ entry->field_pos,
+ entry->field_width);
+ if (!mask)
+ return false;
return feature_matches(val, entry);
}
+static bool
+has_cpuid_feature(const struct arm64_cpu_capabilities *entry, int scope)
+{
+ u64 val = read_scoped_sysreg(entry, scope);
+ return feature_matches(val, entry);
+}
+
+const struct cpumask *system_32bit_el0_cpumask(void)
+{
+ if (!system_supports_32bit_el0())
+ return cpu_none_mask;
+
+ if (static_branch_unlikely(&arm64_mismatched_32bit_el0))
+ return cpu_32bit_el0_mask;
+
+ return cpu_possible_mask;
+}
+
+static int __init parse_32bit_el0_param(char *str)
+{
+ allow_mismatched_32bit_el0 = true;
+ return 0;
+}
+early_param("allow_mismatched_32bit_el0", parse_32bit_el0_param);
+
+static ssize_t aarch32_el0_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ const struct cpumask *mask = system_32bit_el0_cpumask();
+
+ return sysfs_emit(buf, "%*pbl\n", cpumask_pr_args(mask));
+}
+static const DEVICE_ATTR_RO(aarch32_el0);
+
+static int __init aarch32_el0_sysfs_init(void)
+{
+ struct device *dev_root;
+ int ret = 0;
+
+ if (!allow_mismatched_32bit_el0)
+ return 0;
+
+ dev_root = bus_get_dev_root(&cpu_subsys);
+ if (dev_root) {
+ ret = device_create_file(dev_root, &dev_attr_aarch32_el0);
+ put_device(dev_root);
+ }
+ return ret;
+}
+device_initcall(aarch32_el0_sysfs_init);
+
+static bool has_32bit_el0(const struct arm64_cpu_capabilities *entry, int scope)
+{
+ if (!has_cpuid_feature(entry, scope))
+ return allow_mismatched_32bit_el0;
+
+ if (scope == SCOPE_SYSTEM)
+ pr_info("detected: 32-bit EL0 Support\n");
+
+ return true;
+}
+
static bool has_useable_gicv3_cpuif(const struct arm64_cpu_capabilities *entry, int scope)
{
bool has_sre;
@@ -895,24 +1565,6 @@ static bool has_useable_gicv3_cpuif(const struct arm64_cpu_capabilities *entry,
return has_sre;
}
-static bool has_no_hw_prefetch(const struct arm64_cpu_capabilities *entry, int __unused)
-{
- u32 midr = read_cpuid_id();
-
- /* Cavium ThunderX pass 1.x and 2.x */
- return midr_is_cpu_model_range(midr, MIDR_THUNDERX,
- MIDR_CPU_VAR_REV(0, 0),
- MIDR_CPU_VAR_REV(1, MIDR_REVISION_MASK));
-}
-
-static bool has_no_fpsimd(const struct arm64_cpu_capabilities *entry, int __unused)
-{
- u64 pfr0 = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1);
-
- return cpuid_feature_extract_signed_field(pfr0,
- ID_AA64PFR0_FP_SHIFT) < 0;
-}
-
static bool has_cache_idc(const struct arm64_cpu_capabilities *entry,
int scope)
{
@@ -923,7 +1575,7 @@ static bool has_cache_idc(const struct arm64_cpu_capabilities *entry,
else
ctr = read_cpuid_effective_cachetype();
- return ctr & BIT(CTR_IDC_SHIFT);
+ return ctr & BIT(CTR_EL0_IDC_SHIFT);
}
static void cpu_emulate_effective_ctr(const struct arm64_cpu_capabilities *__unused)
@@ -934,7 +1586,7 @@ static void cpu_emulate_effective_ctr(const struct arm64_cpu_capabilities *__unu
* to the CTR_EL0 on this CPU and emulate it with the real/safe
* value.
*/
- if (!(read_cpuid_cachetype() & BIT(CTR_IDC_SHIFT)))
+ if (!(read_cpuid_cachetype() & BIT(CTR_EL0_IDC_SHIFT)))
sysreg_clear_set(sctlr_el1, SCTLR_EL1_UCT, 0);
}
@@ -948,7 +1600,7 @@ static bool has_cache_dic(const struct arm64_cpu_capabilities *entry,
else
ctr = read_cpuid_cachetype();
- return ctr & BIT(CTR_DIC_SHIFT);
+ return ctr & BIT(CTR_EL0_DIC_SHIFT);
}
static bool __maybe_unused
@@ -959,12 +1611,55 @@ has_useable_cnp(const struct arm64_cpu_capabilities *entry, int scope)
* may share TLB entries with a CPU stuck in the crashed
* kernel.
*/
- if (is_kdump_kernel())
+ if (is_kdump_kernel())
+ return false;
+
+ if (cpus_have_cap(ARM64_WORKAROUND_NVIDIA_CARMEL_CNP))
return false;
return has_cpuid_feature(entry, scope);
}
+/*
+ * This check is triggered during the early boot before the cpufeature
+ * is initialised. Checking the status on the local CPU allows the boot
+ * CPU to detect the need for non-global mappings and thus avoiding a
+ * pagetable re-write after all the CPUs are booted. This check will be
+ * anyway run on individual CPUs, allowing us to get the consistent
+ * state once the SMP CPUs are up and thus make the switch to non-global
+ * mappings if required.
+ */
+bool kaslr_requires_kpti(void)
+{
+ if (!IS_ENABLED(CONFIG_RANDOMIZE_BASE))
+ return false;
+
+ /*
+ * E0PD does a similar job to KPTI so can be used instead
+ * where available.
+ */
+ if (IS_ENABLED(CONFIG_ARM64_E0PD)) {
+ u64 mmfr2 = read_sysreg_s(SYS_ID_AA64MMFR2_EL1);
+ if (cpuid_feature_extract_unsigned_field(mmfr2,
+ ID_AA64MMFR2_EL1_E0PD_SHIFT))
+ return false;
+ }
+
+ /*
+ * Systems affected by Cavium erratum 24756 are incompatible
+ * with KPTI.
+ */
+ if (IS_ENABLED(CONFIG_CAVIUM_ERRATUM_27456)) {
+ extern const struct midr_range cavium_erratum_27456_cpus[];
+
+ if (is_midr_in_range_list(read_cpuid_id(),
+ cavium_erratum_27456_cpus))
+ return false;
+ }
+
+ return kaslr_enabled();
+}
+
static bool __meltdown_safe = true;
static int __kpti_forced; /* 0: not forced, >0: forced on, <0: forced off */
@@ -975,6 +1670,7 @@ static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry,
static const struct midr_range kpti_safe_list[] = {
MIDR_ALL_VERSIONS(MIDR_CAVIUM_THUNDERX2),
MIDR_ALL_VERSIONS(MIDR_BRCM_VULCAN),
+ MIDR_ALL_VERSIONS(MIDR_BRAHMA_B53),
MIDR_ALL_VERSIONS(MIDR_CORTEX_A35),
MIDR_ALL_VERSIONS(MIDR_CORTEX_A53),
MIDR_ALL_VERSIONS(MIDR_CORTEX_A55),
@@ -983,6 +1679,10 @@ static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry,
MIDR_ALL_VERSIONS(MIDR_CORTEX_A73),
MIDR_ALL_VERSIONS(MIDR_HISI_TSV110),
MIDR_ALL_VERSIONS(MIDR_NVIDIA_CARMEL),
+ MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_2XX_GOLD),
+ MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_2XX_SILVER),
+ MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_3XX_SILVER),
+ MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_4XX_SILVER),
{ /* sentinel */ }
};
char const *str = "kpti command line option";
@@ -1000,15 +1700,19 @@ static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry,
/*
* For reasons that aren't entirely clear, enabling KPTI on Cavium
* ThunderX leads to apparent I-cache corruption of kernel text, which
- * ends as well as you might imagine. Don't even try.
+ * ends as well as you might imagine. Don't even try. We cannot rely
+ * on the cpus_have_*cap() helpers here to detect the CPU erratum
+ * because cpucap detection order may change. However, since we know
+ * affected CPUs are always in a homogeneous configuration, it is
+ * safe to rely on this_cpu_has_cap() here.
*/
- if (cpus_have_const_cap(ARM64_WORKAROUND_CAVIUM_27456)) {
+ if (this_cpu_has_cap(ARM64_WORKAROUND_CAVIUM_27456)) {
str = "ARM64_WORKAROUND_CAVIUM_27456";
__kpti_forced = -1;
}
/* Useful for KASLR robustness */
- if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && kaslr_offset() > 0) {
+ if (kaslr_requires_kpti()) {
if (!__kpti_forced) {
str = "KASLR";
__kpti_forced = 1;
@@ -1035,47 +1739,145 @@ static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry,
return !meltdown_safe;
}
+#if defined(ID_AA64MMFR0_EL1_TGRAN_LPA2) && defined(ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_LPA2)
+static bool has_lpa2_at_stage1(u64 mmfr0)
+{
+ unsigned int tgran;
+
+ tgran = cpuid_feature_extract_unsigned_field(mmfr0,
+ ID_AA64MMFR0_EL1_TGRAN_SHIFT);
+ return tgran == ID_AA64MMFR0_EL1_TGRAN_LPA2;
+}
+
+static bool has_lpa2_at_stage2(u64 mmfr0)
+{
+ unsigned int tgran;
+
+ tgran = cpuid_feature_extract_unsigned_field(mmfr0,
+ ID_AA64MMFR0_EL1_TGRAN_2_SHIFT);
+ return tgran == ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_LPA2;
+}
+
+static bool has_lpa2(const struct arm64_cpu_capabilities *entry, int scope)
+{
+ u64 mmfr0;
+
+ mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
+ return has_lpa2_at_stage1(mmfr0) && has_lpa2_at_stage2(mmfr0);
+}
+#else
+static bool has_lpa2(const struct arm64_cpu_capabilities *entry, int scope)
+{
+ return false;
+}
+#endif
+
#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
-static void
-kpti_install_ng_mappings(const struct arm64_cpu_capabilities *__unused)
+#define KPTI_NG_TEMP_VA (-(1UL << PMD_SHIFT))
+
+extern
+void create_kpti_ng_temp_pgd(pgd_t *pgdir, phys_addr_t phys, unsigned long virt,
+ phys_addr_t size, pgprot_t prot,
+ phys_addr_t (*pgtable_alloc)(int), int flags);
+
+static phys_addr_t __initdata kpti_ng_temp_alloc;
+
+static phys_addr_t __init kpti_ng_pgd_alloc(int shift)
+{
+ kpti_ng_temp_alloc -= PAGE_SIZE;
+ return kpti_ng_temp_alloc;
+}
+
+static int __init __kpti_install_ng_mappings(void *__unused)
{
- typedef void (kpti_remap_fn)(int, int, phys_addr_t);
+ typedef void (kpti_remap_fn)(int, int, phys_addr_t, unsigned long);
extern kpti_remap_fn idmap_kpti_install_ng_mappings;
kpti_remap_fn *remap_fn;
- static bool kpti_applied = false;
int cpu = smp_processor_id();
+ int levels = CONFIG_PGTABLE_LEVELS;
+ int order = order_base_2(levels);
+ u64 kpti_ng_temp_pgd_pa = 0;
+ pgd_t *kpti_ng_temp_pgd;
+ u64 alloc = 0;
+
+ remap_fn = (void *)__pa_symbol(idmap_kpti_install_ng_mappings);
+
+ if (!cpu) {
+ alloc = __get_free_pages(GFP_ATOMIC | __GFP_ZERO, order);
+ kpti_ng_temp_pgd = (pgd_t *)(alloc + (levels - 1) * PAGE_SIZE);
+ kpti_ng_temp_alloc = kpti_ng_temp_pgd_pa = __pa(kpti_ng_temp_pgd);
+
+ //
+ // Create a minimal page table hierarchy that permits us to map
+ // the swapper page tables temporarily as we traverse them.
+ //
+ // The physical pages are laid out as follows:
+ //
+ // +--------+-/-------+-/------ +-\\--------+
+ // : PTE[] : | PMD[] : | PUD[] : || PGD[] :
+ // +--------+-\-------+-\------ +-//--------+
+ // ^
+ // The first page is mapped into this hierarchy at a PMD_SHIFT
+ // aligned virtual address, so that we can manipulate the PTE
+ // level entries while the mapping is active. The first entry
+ // covers the PTE[] page itself, the remaining entries are free
+ // to be used as a ad-hoc fixmap.
+ //
+ create_kpti_ng_temp_pgd(kpti_ng_temp_pgd, __pa(alloc),
+ KPTI_NG_TEMP_VA, PAGE_SIZE, PAGE_KERNEL,
+ kpti_ng_pgd_alloc, 0);
+ }
+
+ cpu_install_idmap();
+ remap_fn(cpu, num_online_cpus(), kpti_ng_temp_pgd_pa, KPTI_NG_TEMP_VA);
+ cpu_uninstall_idmap();
+
+ if (!cpu) {
+ free_pages(alloc, order);
+ arm64_use_ng_mappings = true;
+ }
+
+ return 0;
+}
+
+static void __init kpti_install_ng_mappings(void)
+{
+ /* Check whether KPTI is going to be used */
+ if (!arm64_kernel_unmapped_at_el0())
+ return;
/*
* We don't need to rewrite the page-tables if either we've done
* it already or we have KASLR enabled and therefore have not
* created any global mappings at all.
*/
- if (kpti_applied || kaslr_offset() > 0)
+ if (arm64_use_ng_mappings)
return;
- remap_fn = (void *)__pa_symbol(idmap_kpti_install_ng_mappings);
-
- cpu_install_idmap();
- remap_fn(cpu, num_online_cpus(), __pa_symbol(swapper_pg_dir));
- cpu_uninstall_idmap();
-
- if (!cpu)
- kpti_applied = true;
-
- return;
+ stop_machine(__kpti_install_ng_mappings, NULL, cpu_online_mask);
}
+
#else
-static void
-kpti_install_ng_mappings(const struct arm64_cpu_capabilities *__unused)
+static inline void kpti_install_ng_mappings(void)
{
}
#endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */
+static void cpu_enable_kpti(struct arm64_cpu_capabilities const *cap)
+{
+ if (__this_cpu_read(this_cpu_vector) == vectors) {
+ const char *v = arm64_get_bp_hardening_vector(EL1_VECTOR_KPTI);
+
+ __this_cpu_write(this_cpu_vector, v);
+ }
+
+}
+
static int __init parse_kpti(char *str)
{
bool enabled;
- int ret = strtobool(str, &enabled);
+ int ret = kstrtobool(str, &enabled);
if (ret)
return ret;
@@ -1086,12 +1888,15 @@ static int __init parse_kpti(char *str)
early_param("kpti", parse_kpti);
#ifdef CONFIG_ARM64_HW_AFDBM
+static struct cpumask dbm_cpus __read_mostly;
+
static inline void __cpu_enable_hw_dbm(void)
{
u64 tcr = read_sysreg(tcr_el1) | TCR_HD;
write_sysreg(tcr, tcr_el1);
isb();
+ local_flush_tlb_all();
}
static bool cpu_has_broken_dbm(void)
@@ -1099,7 +1904,12 @@ static bool cpu_has_broken_dbm(void)
/* List of CPUs which have broken DBM support. */
static const struct midr_range cpus[] = {
#ifdef CONFIG_ARM64_ERRATUM_1024718
- MIDR_RANGE(MIDR_CORTEX_A55, 0, 0, 1, 0), // A55 r0p0 -r1p0
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A55),
+ /* Kryo4xx Silver (rdpe => r1p0) */
+ MIDR_REV(MIDR_QCOM_KRYO_4XX_SILVER, 0xd, 0xe),
+#endif
+#ifdef CONFIG_ARM64_ERRATUM_2051678
+ MIDR_REV_RANGE(MIDR_CORTEX_A510, 0, 0, 2),
#endif
{},
};
@@ -1115,42 +1925,87 @@ static bool cpu_can_use_dbm(const struct arm64_cpu_capabilities *cap)
static void cpu_enable_hw_dbm(struct arm64_cpu_capabilities const *cap)
{
- if (cpu_can_use_dbm(cap))
+ if (cpu_can_use_dbm(cap)) {
__cpu_enable_hw_dbm();
+ cpumask_set_cpu(smp_processor_id(), &dbm_cpus);
+ }
}
static bool has_hw_dbm(const struct arm64_cpu_capabilities *cap,
int __unused)
{
- static bool detected = false;
/*
* DBM is a non-conflicting feature. i.e, the kernel can safely
* run a mix of CPUs with and without the feature. So, we
* unconditionally enable the capability to allow any late CPU
* to use the feature. We only enable the control bits on the
- * CPU, if it actually supports.
- *
- * We have to make sure we print the "feature" detection only
- * when at least one CPU actually uses it. So check if this CPU
- * can actually use it and print the message exactly once.
- *
- * This is safe as all CPUs (including secondary CPUs - due to the
- * LOCAL_CPU scope - and the hotplugged CPUs - via verification)
- * goes through the "matches" check exactly once. Also if a CPU
- * matches the criteria, it is guaranteed that the CPU will turn
- * the DBM on, as the capability is unconditionally enabled.
+ * CPU, if it is supported.
*/
- if (!detected && cpu_can_use_dbm(cap)) {
- detected = true;
- pr_info("detected: Hardware dirty bit management\n");
- }
return true;
}
#endif
-#ifdef CONFIG_ARM64_VHE
+#ifdef CONFIG_ARM64_AMU_EXTN
+
+/*
+ * The "amu_cpus" cpumask only signals that the CPU implementation for the
+ * flagged CPUs supports the Activity Monitors Unit (AMU) but does not provide
+ * information regarding all the events that it supports. When a CPU bit is
+ * set in the cpumask, the user of this feature can only rely on the presence
+ * of the 4 fixed counters for that CPU. But this does not guarantee that the
+ * counters are enabled or access to these counters is enabled by code
+ * executed at higher exception levels (firmware).
+ */
+static struct cpumask amu_cpus __read_mostly;
+
+bool cpu_has_amu_feat(int cpu)
+{
+ return cpumask_test_cpu(cpu, &amu_cpus);
+}
+
+int get_cpu_with_amu_feat(void)
+{
+ return cpumask_any(&amu_cpus);
+}
+
+static void cpu_amu_enable(struct arm64_cpu_capabilities const *cap)
+{
+ if (has_cpuid_feature(cap, SCOPE_LOCAL_CPU)) {
+ cpumask_set_cpu(smp_processor_id(), &amu_cpus);
+
+ /* 0 reference values signal broken/disabled counters */
+ if (!this_cpu_has_cap(ARM64_WORKAROUND_2457168))
+ update_freq_counters_refs();
+ }
+}
+
+static bool has_amu(const struct arm64_cpu_capabilities *cap,
+ int __unused)
+{
+ /*
+ * The AMU extension is a non-conflicting feature: the kernel can
+ * safely run a mix of CPUs with and without support for the
+ * activity monitors extension. Therefore, unconditionally enable
+ * the capability to allow any late CPU to use the feature.
+ *
+ * With this feature unconditionally enabled, the cpu_enable
+ * function will be called for all CPUs that match the criteria,
+ * including secondary and hotplugged, marking this feature as
+ * present on that respective CPU. The enable function will also
+ * print a detection message.
+ */
+
+ return true;
+}
+#else
+int get_cpu_with_amu_feat(void)
+{
+ return nr_cpu_ids;
+}
+#endif
+
static bool runs_at_el2(const struct arm64_cpu_capabilities *entry, int __unused)
{
return is_kernel_in_hyp_mode();
@@ -1169,57 +2024,33 @@ static void cpu_copy_el2regs(const struct arm64_cpu_capabilities *__unused)
if (!alternative_is_applied(ARM64_HAS_VIRT_HOST_EXTN))
write_sysreg(read_sysreg(tpidr_el1), tpidr_el2);
}
-#endif
-
-static void cpu_has_fwb(const struct arm64_cpu_capabilities *__unused)
-{
- u64 val = read_sysreg_s(SYS_CLIDR_EL1);
-
- /* Check that CLIDR_EL1.LOU{U,IS} are both 0 */
- WARN_ON(val & (7 << 27 | 7 << 21));
-}
-#ifdef CONFIG_ARM64_SSBD
-static int ssbs_emulation_handler(struct pt_regs *regs, u32 instr)
+static bool has_nested_virt_support(const struct arm64_cpu_capabilities *cap,
+ int scope)
{
- if (user_mode(regs))
- return 1;
+ if (kvm_get_mode() != KVM_MODE_NV)
+ return false;
- if (instr & BIT(PSTATE_Imm_shift))
- regs->pstate |= PSR_SSBS_BIT;
- else
- regs->pstate &= ~PSR_SSBS_BIT;
+ if (!has_cpuid_feature(cap, scope)) {
+ pr_warn("unavailable: %s\n", cap->desc);
+ return false;
+ }
- arm64_skip_faulting_instruction(regs, 4);
- return 0;
+ return true;
}
-static struct undef_hook ssbs_emulation_hook = {
- .instr_mask = ~(1U << PSTATE_Imm_shift),
- .instr_val = 0xd500401f | PSTATE_SSBS,
- .fn = ssbs_emulation_handler,
-};
-
-static void cpu_enable_ssbs(const struct arm64_cpu_capabilities *__unused)
+static bool hvhe_possible(const struct arm64_cpu_capabilities *entry,
+ int __unused)
{
- static bool undef_hook_registered = false;
- static DEFINE_RAW_SPINLOCK(hook_lock);
+ u64 val;
- raw_spin_lock(&hook_lock);
- if (!undef_hook_registered) {
- register_undef_hook(&ssbs_emulation_hook);
- undef_hook_registered = true;
- }
- raw_spin_unlock(&hook_lock);
+ val = read_sysreg(id_aa64mmfr1_el1);
+ if (!cpuid_feature_extract_unsigned_field(val, ID_AA64MMFR1_EL1_VH_SHIFT))
+ return false;
- if (arm64_get_ssbd_state() == ARM64_SSBD_FORCE_DISABLE) {
- sysreg_clear_set(sctlr_el1, 0, SCTLR_ELx_DSSBS);
- arm64_set_ssbd_mitigation(false);
- } else {
- arm64_set_ssbd_mitigation(true);
- }
+ val = arm64_sw_feature_override.val & arm64_sw_feature_override.mask;
+ return cpuid_feature_extract_unsigned_field(val, ARM64_SW_FEATURE_OVERRIDE_HVHE);
}
-#endif /* CONFIG_ARM64_SSBD */
#ifdef CONFIG_ARM64_PAN
static void cpu_enable_pan(const struct arm64_cpu_capabilities *__unused)
@@ -1231,7 +2062,7 @@ static void cpu_enable_pan(const struct arm64_cpu_capabilities *__unused)
WARN_ON_ONCE(in_interrupt());
sysreg_clear_set(sctlr_el1, SCTLR_EL1_SPAN, 0);
- asm(SET_PSTATE_PAN(1));
+ set_pstate_pan(1);
}
#endif /* CONFIG_ARM64_PAN */
@@ -1244,39 +2075,231 @@ static void cpu_clear_disr(const struct arm64_cpu_capabilities *__unused)
#endif /* CONFIG_ARM64_RAS_EXTN */
#ifdef CONFIG_ARM64_PTR_AUTH
-static void cpu_enable_address_auth(struct arm64_cpu_capabilities const *cap)
+static bool has_address_auth_cpucap(const struct arm64_cpu_capabilities *entry, int scope)
{
- sysreg_clear_set(sctlr_el1, 0, SCTLR_ELx_ENIA | SCTLR_ELx_ENIB |
- SCTLR_ELx_ENDA | SCTLR_ELx_ENDB);
+ int boot_val, sec_val;
+
+ /* We don't expect to be called with SCOPE_SYSTEM */
+ WARN_ON(scope == SCOPE_SYSTEM);
+ /*
+ * The ptr-auth feature levels are not intercompatible with lower
+ * levels. Hence we must match ptr-auth feature level of the secondary
+ * CPUs with that of the boot CPU. The level of boot cpu is fetched
+ * from the sanitised register whereas direct register read is done for
+ * the secondary CPUs.
+ * The sanitised feature state is guaranteed to match that of the
+ * boot CPU as a mismatched secondary CPU is parked before it gets
+ * a chance to update the state, with the capability.
+ */
+ boot_val = cpuid_feature_extract_field(read_sanitised_ftr_reg(entry->sys_reg),
+ entry->field_pos, entry->sign);
+ if (scope & SCOPE_BOOT_CPU)
+ return boot_val >= entry->min_field_value;
+ /* Now check for the secondary CPUs with SCOPE_LOCAL_CPU scope */
+ sec_val = cpuid_feature_extract_field(__read_sysreg_by_encoding(entry->sys_reg),
+ entry->field_pos, entry->sign);
+ return (sec_val >= entry->min_field_value) && (sec_val == boot_val);
}
-#endif /* CONFIG_ARM64_PTR_AUTH */
-#ifdef CONFIG_ARM64_PSEUDO_NMI
-static bool enable_pseudo_nmi;
+static bool has_address_auth_metacap(const struct arm64_cpu_capabilities *entry,
+ int scope)
+{
+ bool api = has_address_auth_cpucap(cpucap_ptrs[ARM64_HAS_ADDRESS_AUTH_IMP_DEF], scope);
+ bool apa = has_address_auth_cpucap(cpucap_ptrs[ARM64_HAS_ADDRESS_AUTH_ARCH_QARMA5], scope);
+ bool apa3 = has_address_auth_cpucap(cpucap_ptrs[ARM64_HAS_ADDRESS_AUTH_ARCH_QARMA3], scope);
-static int __init early_enable_pseudo_nmi(char *p)
+ return apa || apa3 || api;
+}
+
+static bool has_generic_auth(const struct arm64_cpu_capabilities *entry,
+ int __unused)
{
- return strtobool(p, &enable_pseudo_nmi);
+ bool gpi = __system_matches_cap(ARM64_HAS_GENERIC_AUTH_IMP_DEF);
+ bool gpa = __system_matches_cap(ARM64_HAS_GENERIC_AUTH_ARCH_QARMA5);
+ bool gpa3 = __system_matches_cap(ARM64_HAS_GENERIC_AUTH_ARCH_QARMA3);
+
+ return gpa || gpa3 || gpi;
}
-early_param("irqchip.gicv3_pseudo_nmi", early_enable_pseudo_nmi);
+#endif /* CONFIG_ARM64_PTR_AUTH */
+#ifdef CONFIG_ARM64_E0PD
+static void cpu_enable_e0pd(struct arm64_cpu_capabilities const *cap)
+{
+ if (this_cpu_has_cap(ARM64_HAS_E0PD))
+ sysreg_clear_set(tcr_el1, 0, TCR_E0PD1);
+}
+#endif /* CONFIG_ARM64_E0PD */
+
+#ifdef CONFIG_ARM64_PSEUDO_NMI
static bool can_use_gic_priorities(const struct arm64_cpu_capabilities *entry,
int scope)
{
- return enable_pseudo_nmi && has_useable_gicv3_cpuif(entry, scope);
+ /*
+ * ARM64_HAS_GIC_CPUIF_SYSREGS has a lower index, and is a boot CPU
+ * feature, so will be detected earlier.
+ */
+ BUILD_BUG_ON(ARM64_HAS_GIC_PRIO_MASKING <= ARM64_HAS_GIC_CPUIF_SYSREGS);
+ if (!cpus_have_cap(ARM64_HAS_GIC_CPUIF_SYSREGS))
+ return false;
+
+ return enable_pseudo_nmi;
+}
+
+static bool has_gic_prio_relaxed_sync(const struct arm64_cpu_capabilities *entry,
+ int scope)
+{
+ /*
+ * If we're not using priority masking then we won't be poking PMR_EL1,
+ * and there's no need to relax synchronization of writes to it, and
+ * ICC_CTLR_EL1 might not be accessible and we must avoid reads from
+ * that.
+ *
+ * ARM64_HAS_GIC_PRIO_MASKING has a lower index, and is a boot CPU
+ * feature, so will be detected earlier.
+ */
+ BUILD_BUG_ON(ARM64_HAS_GIC_PRIO_RELAXED_SYNC <= ARM64_HAS_GIC_PRIO_MASKING);
+ if (!cpus_have_cap(ARM64_HAS_GIC_PRIO_MASKING))
+ return false;
+
+ /*
+ * When Priority Mask Hint Enable (PMHE) == 0b0, PMR is not used as a
+ * hint for interrupt distribution, a DSB is not necessary when
+ * unmasking IRQs via PMR, and we can relax the barrier to a NOP.
+ *
+ * Linux itself doesn't use 1:N distribution, so has no need to
+ * set PMHE. The only reason to have it set is if EL3 requires it
+ * (and we can't change it).
+ */
+ return (gic_read_ctlr() & ICC_CTLR_EL1_PMHE_MASK) == 0;
}
#endif
+#ifdef CONFIG_ARM64_BTI
+static void bti_enable(const struct arm64_cpu_capabilities *__unused)
+{
+ /*
+ * Use of X16/X17 for tail-calls and trampolines that jump to
+ * function entry points using BR is a requirement for
+ * marking binaries with GNU_PROPERTY_AARCH64_FEATURE_1_BTI.
+ * So, be strict and forbid other BRs using other registers to
+ * jump onto a PACIxSP instruction:
+ */
+ sysreg_clear_set(sctlr_el1, 0, SCTLR_EL1_BT0 | SCTLR_EL1_BT1);
+ isb();
+}
+#endif /* CONFIG_ARM64_BTI */
+
+#ifdef CONFIG_ARM64_MTE
+static void cpu_enable_mte(struct arm64_cpu_capabilities const *cap)
+{
+ sysreg_clear_set(sctlr_el1, 0, SCTLR_ELx_ATA | SCTLR_EL1_ATA0);
+
+ mte_cpu_setup();
+
+ /*
+ * Clear the tags in the zero page. This needs to be done via the
+ * linear map which has the Tagged attribute.
+ */
+ if (try_page_mte_tagging(ZERO_PAGE(0))) {
+ mte_clear_page_tags(lm_alias(empty_zero_page));
+ set_page_mte_tagged(ZERO_PAGE(0));
+ }
+
+ kasan_init_hw_tags_cpu();
+}
+#endif /* CONFIG_ARM64_MTE */
+
+static void user_feature_fixup(void)
+{
+ if (cpus_have_cap(ARM64_WORKAROUND_2658417)) {
+ struct arm64_ftr_reg *regp;
+
+ regp = get_arm64_ftr_reg(SYS_ID_AA64ISAR1_EL1);
+ if (regp)
+ regp->user_mask &= ~ID_AA64ISAR1_EL1_BF16_MASK;
+ }
+}
+
+static void elf_hwcap_fixup(void)
+{
+#ifdef CONFIG_COMPAT
+ if (cpus_have_cap(ARM64_WORKAROUND_1742098))
+ compat_elf_hwcap2 &= ~COMPAT_HWCAP2_AES;
+#endif /* CONFIG_COMPAT */
+}
+
+#ifdef CONFIG_KVM
+static bool is_kvm_protected_mode(const struct arm64_cpu_capabilities *entry, int __unused)
+{
+ return kvm_get_mode() == KVM_MODE_PROTECTED;
+}
+#endif /* CONFIG_KVM */
+
+static void cpu_trap_el0_impdef(const struct arm64_cpu_capabilities *__unused)
+{
+ sysreg_clear_set(sctlr_el1, 0, SCTLR_EL1_TIDCP);
+}
+
+static void cpu_enable_dit(const struct arm64_cpu_capabilities *__unused)
+{
+ set_pstate_dit(1);
+}
+
+static void cpu_enable_mops(const struct arm64_cpu_capabilities *__unused)
+{
+ sysreg_clear_set(sctlr_el1, 0, SCTLR_EL1_MSCEn);
+}
+
+/* Internal helper functions to match cpu capability type */
+static bool
+cpucap_late_cpu_optional(const struct arm64_cpu_capabilities *cap)
+{
+ return !!(cap->type & ARM64_CPUCAP_OPTIONAL_FOR_LATE_CPU);
+}
+
+static bool
+cpucap_late_cpu_permitted(const struct arm64_cpu_capabilities *cap)
+{
+ return !!(cap->type & ARM64_CPUCAP_PERMITTED_FOR_LATE_CPU);
+}
+
+static bool
+cpucap_panic_on_conflict(const struct arm64_cpu_capabilities *cap)
+{
+ return !!(cap->type & ARM64_CPUCAP_PANIC_ON_CONFLICT);
+}
+
static const struct arm64_cpu_capabilities arm64_features[] = {
{
+ .capability = ARM64_ALWAYS_BOOT,
+ .type = ARM64_CPUCAP_BOOT_CPU_FEATURE,
+ .matches = has_always,
+ },
+ {
+ .capability = ARM64_ALWAYS_SYSTEM,
+ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+ .matches = has_always,
+ },
+ {
.desc = "GIC system register CPU interface",
- .capability = ARM64_HAS_SYSREG_GIC_CPUIF,
+ .capability = ARM64_HAS_GIC_CPUIF_SYSREGS,
.type = ARM64_CPUCAP_STRICT_BOOT_CPU_FEATURE,
.matches = has_useable_gicv3_cpuif,
- .sys_reg = SYS_ID_AA64PFR0_EL1,
- .field_pos = ID_AA64PFR0_GIC_SHIFT,
- .sign = FTR_UNSIGNED,
- .min_field_value = 1,
+ ARM64_CPUID_FIELDS(ID_AA64PFR0_EL1, GIC, IMP)
+ },
+ {
+ .desc = "Enhanced Counter Virtualization",
+ .capability = ARM64_HAS_ECV,
+ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+ .matches = has_cpuid_feature,
+ ARM64_CPUID_FIELDS(ID_AA64MMFR0_EL1, ECV, IMP)
+ },
+ {
+ .desc = "Enhanced Counter Virtualization (CNTPOFF)",
+ .capability = ARM64_HAS_ECV_CNTPOFF,
+ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+ .matches = has_cpuid_feature,
+ ARM64_CPUID_FIELDS(ID_AA64MMFR0_EL1, ECV, CNTPOFF)
},
#ifdef CONFIG_ARM64_PAN
{
@@ -1284,93 +2307,89 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.capability = ARM64_HAS_PAN,
.type = ARM64_CPUCAP_SYSTEM_FEATURE,
.matches = has_cpuid_feature,
- .sys_reg = SYS_ID_AA64MMFR1_EL1,
- .field_pos = ID_AA64MMFR1_PAN_SHIFT,
- .sign = FTR_UNSIGNED,
- .min_field_value = 1,
.cpu_enable = cpu_enable_pan,
+ ARM64_CPUID_FIELDS(ID_AA64MMFR1_EL1, PAN, IMP)
},
#endif /* CONFIG_ARM64_PAN */
-#if defined(CONFIG_AS_LSE) && defined(CONFIG_ARM64_LSE_ATOMICS)
+#ifdef CONFIG_ARM64_EPAN
+ {
+ .desc = "Enhanced Privileged Access Never",
+ .capability = ARM64_HAS_EPAN,
+ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+ .matches = has_cpuid_feature,
+ ARM64_CPUID_FIELDS(ID_AA64MMFR1_EL1, PAN, PAN3)
+ },
+#endif /* CONFIG_ARM64_EPAN */
+#ifdef CONFIG_ARM64_LSE_ATOMICS
{
.desc = "LSE atomic instructions",
.capability = ARM64_HAS_LSE_ATOMICS,
.type = ARM64_CPUCAP_SYSTEM_FEATURE,
.matches = has_cpuid_feature,
- .sys_reg = SYS_ID_AA64ISAR0_EL1,
- .field_pos = ID_AA64ISAR0_ATOMICS_SHIFT,
- .sign = FTR_UNSIGNED,
- .min_field_value = 2,
+ ARM64_CPUID_FIELDS(ID_AA64ISAR0_EL1, ATOMIC, IMP)
},
-#endif /* CONFIG_AS_LSE && CONFIG_ARM64_LSE_ATOMICS */
+#endif /* CONFIG_ARM64_LSE_ATOMICS */
{
- .desc = "Software prefetching using PRFM",
- .capability = ARM64_HAS_NO_HW_PREFETCH,
- .type = ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE,
- .matches = has_no_hw_prefetch,
+ .desc = "Virtualization Host Extensions",
+ .capability = ARM64_HAS_VIRT_HOST_EXTN,
+ .type = ARM64_CPUCAP_STRICT_BOOT_CPU_FEATURE,
+ .matches = runs_at_el2,
+ .cpu_enable = cpu_copy_el2regs,
},
-#ifdef CONFIG_ARM64_UAO
{
- .desc = "User Access Override",
- .capability = ARM64_HAS_UAO,
+ .desc = "Nested Virtualization Support",
+ .capability = ARM64_HAS_NESTED_VIRT,
.type = ARM64_CPUCAP_SYSTEM_FEATURE,
- .matches = has_cpuid_feature,
- .sys_reg = SYS_ID_AA64MMFR2_EL1,
- .field_pos = ID_AA64MMFR2_UAO_SHIFT,
- .min_field_value = 1,
- /*
- * We rely on stop_machine() calling uao_thread_switch() to set
- * UAO immediately after patching.
- */
+ .matches = has_nested_virt_support,
+ ARM64_CPUID_FIELDS(ID_AA64MMFR2_EL1, NV, NV2)
},
-#endif /* CONFIG_ARM64_UAO */
-#ifdef CONFIG_ARM64_PAN
{
- .capability = ARM64_ALT_PAN_NOT_UAO,
+ .capability = ARM64_HAS_32BIT_EL0_DO_NOT_USE,
.type = ARM64_CPUCAP_SYSTEM_FEATURE,
- .matches = cpufeature_pan_not_uao,
+ .matches = has_32bit_el0,
+ ARM64_CPUID_FIELDS(ID_AA64PFR0_EL1, EL0, AARCH32)
},
-#endif /* CONFIG_ARM64_PAN */
-#ifdef CONFIG_ARM64_VHE
+#ifdef CONFIG_KVM
{
- .desc = "Virtualization Host Extensions",
- .capability = ARM64_HAS_VIRT_HOST_EXTN,
- .type = ARM64_CPUCAP_STRICT_BOOT_CPU_FEATURE,
- .matches = runs_at_el2,
- .cpu_enable = cpu_copy_el2regs,
+ .desc = "32-bit EL1 Support",
+ .capability = ARM64_HAS_32BIT_EL1,
+ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+ .matches = has_cpuid_feature,
+ ARM64_CPUID_FIELDS(ID_AA64PFR0_EL1, EL1, AARCH32)
},
-#endif /* CONFIG_ARM64_VHE */
{
- .desc = "32-bit EL0 Support",
- .capability = ARM64_HAS_32BIT_EL0,
+ .desc = "Protected KVM",
+ .capability = ARM64_KVM_PROTECTED_MODE,
.type = ARM64_CPUCAP_SYSTEM_FEATURE,
+ .matches = is_kvm_protected_mode,
+ },
+ {
+ .desc = "HCRX_EL2 register",
+ .capability = ARM64_HAS_HCX,
+ .type = ARM64_CPUCAP_STRICT_BOOT_CPU_FEATURE,
.matches = has_cpuid_feature,
- .sys_reg = SYS_ID_AA64PFR0_EL1,
- .sign = FTR_UNSIGNED,
- .field_pos = ID_AA64PFR0_EL0_SHIFT,
- .min_field_value = ID_AA64PFR0_EL0_32BIT_64BIT,
+ ARM64_CPUID_FIELDS(ID_AA64MMFR1_EL1, HCX, IMP)
},
+#endif
{
.desc = "Kernel page table isolation (KPTI)",
.capability = ARM64_UNMAP_KERNEL_AT_EL0,
.type = ARM64_CPUCAP_BOOT_RESTRICTED_CPU_LOCAL_FEATURE,
+ .cpu_enable = cpu_enable_kpti,
+ .matches = unmap_kernel_at_el0,
/*
* The ID feature fields below are used to indicate that
* the CPU doesn't need KPTI. See unmap_kernel_at_el0 for
* more details.
*/
- .sys_reg = SYS_ID_AA64PFR0_EL1,
- .field_pos = ID_AA64PFR0_CSV3_SHIFT,
- .min_field_value = 1,
- .matches = unmap_kernel_at_el0,
- .cpu_enable = kpti_install_ng_mappings,
+ ARM64_CPUID_FIELDS(ID_AA64PFR0_EL1, CSV3, IMP)
},
{
- /* FP/SIMD is not implemented */
- .capability = ARM64_HAS_NO_FPSIMD,
+ .capability = ARM64_HAS_FPSIMD,
.type = ARM64_CPUCAP_SYSTEM_FEATURE,
- .min_field_value = 0,
- .matches = has_no_fpsimd,
+ .matches = has_cpuid_feature,
+ .cpu_enable = cpu_enable_fpsimd,
+ ARM64_CPUID_FIELDS(ID_AA64PFR0_EL1, FP, IMP)
},
#ifdef CONFIG_ARM64_PMEM
{
@@ -1378,19 +2397,14 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.capability = ARM64_HAS_DCPOP,
.type = ARM64_CPUCAP_SYSTEM_FEATURE,
.matches = has_cpuid_feature,
- .sys_reg = SYS_ID_AA64ISAR1_EL1,
- .field_pos = ID_AA64ISAR1_DPB_SHIFT,
- .min_field_value = 1,
+ ARM64_CPUID_FIELDS(ID_AA64ISAR1_EL1, DPB, IMP)
},
{
.desc = "Data cache clean to Point of Deep Persistence",
.capability = ARM64_HAS_DCPODP,
.type = ARM64_CPUCAP_SYSTEM_FEATURE,
.matches = has_cpuid_feature,
- .sys_reg = SYS_ID_AA64ISAR1_EL1,
- .sign = FTR_UNSIGNED,
- .field_pos = ID_AA64ISAR1_DPB_SHIFT,
- .min_field_value = 2,
+ ARM64_CPUID_FIELDS(ID_AA64ISAR1_EL1, DPB, DPB2)
},
#endif
#ifdef CONFIG_ARM64_SVE
@@ -1398,12 +2412,9 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.desc = "Scalable Vector Extension",
.type = ARM64_CPUCAP_SYSTEM_FEATURE,
.capability = ARM64_SVE,
- .sys_reg = SYS_ID_AA64PFR0_EL1,
- .sign = FTR_UNSIGNED,
- .field_pos = ID_AA64PFR0_SVE_SHIFT,
- .min_field_value = ID_AA64PFR0_SVE,
+ .cpu_enable = cpu_enable_sve,
.matches = has_cpuid_feature,
- .cpu_enable = sve_kernel_enable,
+ ARM64_CPUID_FIELDS(ID_AA64PFR0_EL1, SVE, IMP)
},
#endif /* CONFIG_ARM64_SVE */
#ifdef CONFIG_ARM64_RAS_EXTN
@@ -1412,13 +2423,21 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.capability = ARM64_HAS_RAS_EXTN,
.type = ARM64_CPUCAP_SYSTEM_FEATURE,
.matches = has_cpuid_feature,
- .sys_reg = SYS_ID_AA64PFR0_EL1,
- .sign = FTR_UNSIGNED,
- .field_pos = ID_AA64PFR0_RAS_SHIFT,
- .min_field_value = ID_AA64PFR0_RAS_V1,
.cpu_enable = cpu_clear_disr,
+ ARM64_CPUID_FIELDS(ID_AA64PFR0_EL1, RAS, IMP)
},
#endif /* CONFIG_ARM64_RAS_EXTN */
+#ifdef CONFIG_ARM64_AMU_EXTN
+ {
+ .desc = "Activity Monitors Unit (AMU)",
+ .capability = ARM64_HAS_AMU_EXTN,
+ .type = ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE,
+ .matches = has_amu,
+ .cpu_enable = cpu_amu_enable,
+ .cpus = &amu_cpus,
+ ARM64_CPUID_FIELDS(ID_AA64PFR0_EL1, AMU, IMP)
+ },
+#endif /* CONFIG_ARM64_AMU_EXTN */
{
.desc = "Data cache clean to the PoU not required for I/D coherence",
.capability = ARM64_HAS_CACHE_IDC,
@@ -1436,31 +2455,32 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.desc = "Stage-2 Force Write-Back",
.type = ARM64_CPUCAP_SYSTEM_FEATURE,
.capability = ARM64_HAS_STAGE2_FWB,
- .sys_reg = SYS_ID_AA64MMFR2_EL1,
- .sign = FTR_UNSIGNED,
- .field_pos = ID_AA64MMFR2_FWB_SHIFT,
- .min_field_value = 1,
.matches = has_cpuid_feature,
- .cpu_enable = cpu_has_fwb,
+ ARM64_CPUID_FIELDS(ID_AA64MMFR2_EL1, FWB, IMP)
+ },
+ {
+ .desc = "ARMv8.4 Translation Table Level",
+ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+ .capability = ARM64_HAS_ARMv8_4_TTL,
+ .matches = has_cpuid_feature,
+ ARM64_CPUID_FIELDS(ID_AA64MMFR2_EL1, TTL, IMP)
+ },
+ {
+ .desc = "TLB range maintenance instructions",
+ .capability = ARM64_HAS_TLB_RANGE,
+ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+ .matches = has_cpuid_feature,
+ ARM64_CPUID_FIELDS(ID_AA64ISAR0_EL1, TLB, RANGE)
},
#ifdef CONFIG_ARM64_HW_AFDBM
{
- /*
- * Since we turn this on always, we don't want the user to
- * think that the feature is available when it may not be.
- * So hide the description.
- *
- * .desc = "Hardware pagetable Dirty Bit Management",
- *
- */
+ .desc = "Hardware dirty bit management",
.type = ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE,
.capability = ARM64_HW_DBM,
- .sys_reg = SYS_ID_AA64MMFR1_EL1,
- .sign = FTR_UNSIGNED,
- .field_pos = ID_AA64MMFR1_HADBS_SHIFT,
- .min_field_value = 2,
.matches = has_hw_dbm,
.cpu_enable = cpu_enable_hw_dbm,
+ .cpus = &dbm_cpus,
+ ARM64_CPUID_FIELDS(ID_AA64MMFR1_EL1, HAFDBS, DBM)
},
#endif
{
@@ -1468,34 +2488,23 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.capability = ARM64_HAS_CRC32,
.type = ARM64_CPUCAP_SYSTEM_FEATURE,
.matches = has_cpuid_feature,
- .sys_reg = SYS_ID_AA64ISAR0_EL1,
- .field_pos = ID_AA64ISAR0_CRC32_SHIFT,
- .min_field_value = 1,
+ ARM64_CPUID_FIELDS(ID_AA64ISAR0_EL1, CRC32, IMP)
},
-#ifdef CONFIG_ARM64_SSBD
{
.desc = "Speculative Store Bypassing Safe (SSBS)",
.capability = ARM64_SSBS,
- .type = ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE,
+ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
.matches = has_cpuid_feature,
- .sys_reg = SYS_ID_AA64PFR1_EL1,
- .field_pos = ID_AA64PFR1_SSBS_SHIFT,
- .sign = FTR_UNSIGNED,
- .min_field_value = ID_AA64PFR1_SSBS_PSTATE_ONLY,
- .cpu_enable = cpu_enable_ssbs,
+ ARM64_CPUID_FIELDS(ID_AA64PFR1_EL1, SSBS, IMP)
},
-#endif
#ifdef CONFIG_ARM64_CNP
{
.desc = "Common not Private translations",
.capability = ARM64_HAS_CNP,
.type = ARM64_CPUCAP_SYSTEM_FEATURE,
.matches = has_useable_cnp,
- .sys_reg = SYS_ID_AA64MMFR2_EL1,
- .sign = FTR_UNSIGNED,
- .field_pos = ID_AA64MMFR2_CNP_SHIFT,
- .min_field_value = 1,
.cpu_enable = cpu_enable_cnp,
+ ARM64_CPUID_FIELDS(ID_AA64MMFR2_EL1, CnP, IMP)
},
#endif
{
@@ -1503,53 +2512,60 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.capability = ARM64_HAS_SB,
.type = ARM64_CPUCAP_SYSTEM_FEATURE,
.matches = has_cpuid_feature,
- .sys_reg = SYS_ID_AA64ISAR1_EL1,
- .field_pos = ID_AA64ISAR1_SB_SHIFT,
- .sign = FTR_UNSIGNED,
- .min_field_value = 1,
+ ARM64_CPUID_FIELDS(ID_AA64ISAR1_EL1, SB, IMP)
},
#ifdef CONFIG_ARM64_PTR_AUTH
{
- .desc = "Address authentication (architected algorithm)",
- .capability = ARM64_HAS_ADDRESS_AUTH_ARCH,
- .type = ARM64_CPUCAP_SYSTEM_FEATURE,
- .sys_reg = SYS_ID_AA64ISAR1_EL1,
- .sign = FTR_UNSIGNED,
- .field_pos = ID_AA64ISAR1_APA_SHIFT,
- .min_field_value = ID_AA64ISAR1_APA_ARCHITECTED,
- .matches = has_cpuid_feature,
- .cpu_enable = cpu_enable_address_auth,
+ .desc = "Address authentication (architected QARMA5 algorithm)",
+ .capability = ARM64_HAS_ADDRESS_AUTH_ARCH_QARMA5,
+ .type = ARM64_CPUCAP_BOOT_CPU_FEATURE,
+ .matches = has_address_auth_cpucap,
+ ARM64_CPUID_FIELDS(ID_AA64ISAR1_EL1, APA, PAuth)
+ },
+ {
+ .desc = "Address authentication (architected QARMA3 algorithm)",
+ .capability = ARM64_HAS_ADDRESS_AUTH_ARCH_QARMA3,
+ .type = ARM64_CPUCAP_BOOT_CPU_FEATURE,
+ .matches = has_address_auth_cpucap,
+ ARM64_CPUID_FIELDS(ID_AA64ISAR2_EL1, APA3, PAuth)
},
{
.desc = "Address authentication (IMP DEF algorithm)",
.capability = ARM64_HAS_ADDRESS_AUTH_IMP_DEF,
+ .type = ARM64_CPUCAP_BOOT_CPU_FEATURE,
+ .matches = has_address_auth_cpucap,
+ ARM64_CPUID_FIELDS(ID_AA64ISAR1_EL1, API, PAuth)
+ },
+ {
+ .capability = ARM64_HAS_ADDRESS_AUTH,
+ .type = ARM64_CPUCAP_BOOT_CPU_FEATURE,
+ .matches = has_address_auth_metacap,
+ },
+ {
+ .desc = "Generic authentication (architected QARMA5 algorithm)",
+ .capability = ARM64_HAS_GENERIC_AUTH_ARCH_QARMA5,
.type = ARM64_CPUCAP_SYSTEM_FEATURE,
- .sys_reg = SYS_ID_AA64ISAR1_EL1,
- .sign = FTR_UNSIGNED,
- .field_pos = ID_AA64ISAR1_API_SHIFT,
- .min_field_value = ID_AA64ISAR1_API_IMP_DEF,
.matches = has_cpuid_feature,
- .cpu_enable = cpu_enable_address_auth,
+ ARM64_CPUID_FIELDS(ID_AA64ISAR1_EL1, GPA, IMP)
},
{
- .desc = "Generic authentication (architected algorithm)",
- .capability = ARM64_HAS_GENERIC_AUTH_ARCH,
+ .desc = "Generic authentication (architected QARMA3 algorithm)",
+ .capability = ARM64_HAS_GENERIC_AUTH_ARCH_QARMA3,
.type = ARM64_CPUCAP_SYSTEM_FEATURE,
- .sys_reg = SYS_ID_AA64ISAR1_EL1,
- .sign = FTR_UNSIGNED,
- .field_pos = ID_AA64ISAR1_GPA_SHIFT,
- .min_field_value = ID_AA64ISAR1_GPA_ARCHITECTED,
.matches = has_cpuid_feature,
+ ARM64_CPUID_FIELDS(ID_AA64ISAR2_EL1, GPA3, IMP)
},
{
.desc = "Generic authentication (IMP DEF algorithm)",
.capability = ARM64_HAS_GENERIC_AUTH_IMP_DEF,
.type = ARM64_CPUCAP_SYSTEM_FEATURE,
- .sys_reg = SYS_ID_AA64ISAR1_EL1,
- .sign = FTR_UNSIGNED,
- .field_pos = ID_AA64ISAR1_GPI_SHIFT,
- .min_field_value = ID_AA64ISAR1_GPI_IMP_DEF,
.matches = has_cpuid_feature,
+ ARM64_CPUID_FIELDS(ID_AA64ISAR1_EL1, GPI, IMP)
+ },
+ {
+ .capability = ARM64_HAS_GENERIC_AUTH,
+ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+ .matches = has_generic_auth,
},
#endif /* CONFIG_ARM64_PTR_AUTH */
#ifdef CONFIG_ARM64_PSEUDO_NMI
@@ -1558,24 +2574,177 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
* Depends on having GICv3
*/
.desc = "IRQ priority masking",
- .capability = ARM64_HAS_IRQ_PRIO_MASKING,
+ .capability = ARM64_HAS_GIC_PRIO_MASKING,
.type = ARM64_CPUCAP_STRICT_BOOT_CPU_FEATURE,
.matches = can_use_gic_priorities,
- .sys_reg = SYS_ID_AA64PFR0_EL1,
- .field_pos = ID_AA64PFR0_GIC_SHIFT,
- .sign = FTR_UNSIGNED,
- .min_field_value = 1,
},
+ {
+ /*
+ * Depends on ARM64_HAS_GIC_PRIO_MASKING
+ */
+ .capability = ARM64_HAS_GIC_PRIO_RELAXED_SYNC,
+ .type = ARM64_CPUCAP_STRICT_BOOT_CPU_FEATURE,
+ .matches = has_gic_prio_relaxed_sync,
+ },
+#endif
+#ifdef CONFIG_ARM64_E0PD
+ {
+ .desc = "E0PD",
+ .capability = ARM64_HAS_E0PD,
+ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+ .cpu_enable = cpu_enable_e0pd,
+ .matches = has_cpuid_feature,
+ ARM64_CPUID_FIELDS(ID_AA64MMFR2_EL1, E0PD, IMP)
+ },
+#endif
+ {
+ .desc = "Random Number Generator",
+ .capability = ARM64_HAS_RNG,
+ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+ .matches = has_cpuid_feature,
+ ARM64_CPUID_FIELDS(ID_AA64ISAR0_EL1, RNDR, IMP)
+ },
+#ifdef CONFIG_ARM64_BTI
+ {
+ .desc = "Branch Target Identification",
+ .capability = ARM64_BTI,
+#ifdef CONFIG_ARM64_BTI_KERNEL
+ .type = ARM64_CPUCAP_STRICT_BOOT_CPU_FEATURE,
+#else
+ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
#endif
+ .matches = has_cpuid_feature,
+ .cpu_enable = bti_enable,
+ ARM64_CPUID_FIELDS(ID_AA64PFR1_EL1, BT, IMP)
+ },
+#endif
+#ifdef CONFIG_ARM64_MTE
+ {
+ .desc = "Memory Tagging Extension",
+ .capability = ARM64_MTE,
+ .type = ARM64_CPUCAP_STRICT_BOOT_CPU_FEATURE,
+ .matches = has_cpuid_feature,
+ .cpu_enable = cpu_enable_mte,
+ ARM64_CPUID_FIELDS(ID_AA64PFR1_EL1, MTE, MTE2)
+ },
+ {
+ .desc = "Asymmetric MTE Tag Check Fault",
+ .capability = ARM64_MTE_ASYMM,
+ .type = ARM64_CPUCAP_BOOT_CPU_FEATURE,
+ .matches = has_cpuid_feature,
+ ARM64_CPUID_FIELDS(ID_AA64PFR1_EL1, MTE, MTE3)
+ },
+#endif /* CONFIG_ARM64_MTE */
+ {
+ .desc = "RCpc load-acquire (LDAPR)",
+ .capability = ARM64_HAS_LDAPR,
+ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+ .matches = has_cpuid_feature,
+ ARM64_CPUID_FIELDS(ID_AA64ISAR1_EL1, LRCPC, IMP)
+ },
+ {
+ .desc = "Fine Grained Traps",
+ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+ .capability = ARM64_HAS_FGT,
+ .matches = has_cpuid_feature,
+ ARM64_CPUID_FIELDS(ID_AA64MMFR0_EL1, FGT, IMP)
+ },
+#ifdef CONFIG_ARM64_SME
+ {
+ .desc = "Scalable Matrix Extension",
+ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+ .capability = ARM64_SME,
+ .matches = has_cpuid_feature,
+ .cpu_enable = cpu_enable_sme,
+ ARM64_CPUID_FIELDS(ID_AA64PFR1_EL1, SME, IMP)
+ },
+ /* FA64 should be sorted after the base SME capability */
+ {
+ .desc = "FA64",
+ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+ .capability = ARM64_SME_FA64,
+ .matches = has_cpuid_feature,
+ .cpu_enable = cpu_enable_fa64,
+ ARM64_CPUID_FIELDS(ID_AA64SMFR0_EL1, FA64, IMP)
+ },
+ {
+ .desc = "SME2",
+ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+ .capability = ARM64_SME2,
+ .matches = has_cpuid_feature,
+ .cpu_enable = cpu_enable_sme2,
+ ARM64_CPUID_FIELDS(ID_AA64PFR1_EL1, SME, SME2)
+ },
+#endif /* CONFIG_ARM64_SME */
+ {
+ .desc = "WFx with timeout",
+ .capability = ARM64_HAS_WFXT,
+ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+ .matches = has_cpuid_feature,
+ ARM64_CPUID_FIELDS(ID_AA64ISAR2_EL1, WFxT, IMP)
+ },
+ {
+ .desc = "Trap EL0 IMPLEMENTATION DEFINED functionality",
+ .capability = ARM64_HAS_TIDCP1,
+ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+ .matches = has_cpuid_feature,
+ .cpu_enable = cpu_trap_el0_impdef,
+ ARM64_CPUID_FIELDS(ID_AA64MMFR1_EL1, TIDCP1, IMP)
+ },
+ {
+ .desc = "Data independent timing control (DIT)",
+ .capability = ARM64_HAS_DIT,
+ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+ .matches = has_cpuid_feature,
+ .cpu_enable = cpu_enable_dit,
+ ARM64_CPUID_FIELDS(ID_AA64PFR0_EL1, DIT, IMP)
+ },
+ {
+ .desc = "Memory Copy and Memory Set instructions",
+ .capability = ARM64_HAS_MOPS,
+ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+ .matches = has_cpuid_feature,
+ .cpu_enable = cpu_enable_mops,
+ ARM64_CPUID_FIELDS(ID_AA64ISAR2_EL1, MOPS, IMP)
+ },
+ {
+ .capability = ARM64_HAS_TCR2,
+ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+ .matches = has_cpuid_feature,
+ ARM64_CPUID_FIELDS(ID_AA64MMFR3_EL1, TCRX, IMP)
+ },
+ {
+ .desc = "Stage-1 Permission Indirection Extension (S1PIE)",
+ .capability = ARM64_HAS_S1PIE,
+ .type = ARM64_CPUCAP_BOOT_CPU_FEATURE,
+ .matches = has_cpuid_feature,
+ ARM64_CPUID_FIELDS(ID_AA64MMFR3_EL1, S1PIE, IMP)
+ },
+ {
+ .desc = "VHE for hypervisor only",
+ .capability = ARM64_KVM_HVHE,
+ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+ .matches = hvhe_possible,
+ },
+ {
+ .desc = "Enhanced Virtualization Traps",
+ .capability = ARM64_HAS_EVT,
+ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+ .matches = has_cpuid_feature,
+ ARM64_CPUID_FIELDS(ID_AA64MMFR2_EL1, EVT, IMP)
+ },
+ {
+ .desc = "52-bit Virtual Addressing for KVM (LPA2)",
+ .capability = ARM64_HAS_LPA2,
+ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+ .matches = has_lpa2,
+ },
{},
};
-#define HWCAP_CPUID_MATCH(reg, field, s, min_value) \
- .matches = has_cpuid_feature, \
- .sys_reg = reg, \
- .field_pos = field, \
- .sign = s, \
- .min_field_value = min_value,
+#define HWCAP_CPUID_MATCH(reg, field, min_value) \
+ .matches = has_user_cpuid_feature, \
+ ARM64_CPUID_FIELDS(reg, field, min_value)
#define __HWCAP_CAP(name, cap_type, cap) \
.desc = name, \
@@ -1583,10 +2752,10 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.hwcap_type = cap_type, \
.hwcap = cap, \
-#define HWCAP_CAP(reg, field, s, min_value, cap_type, cap) \
+#define HWCAP_CAP(reg, field, min_value, cap_type, cap) \
{ \
__HWCAP_CAP(#cap, cap_type, cap) \
- HWCAP_CPUID_MATCH(reg, field, s, min_value) \
+ HWCAP_CPUID_MATCH(reg, field, min_value) \
}
#define HWCAP_MULTI_CAP(list, cap_type, cap) \
@@ -1596,91 +2765,179 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.match_list = list, \
}
+#define HWCAP_CAP_MATCH(match, cap_type, cap) \
+ { \
+ __HWCAP_CAP(#cap, cap_type, cap) \
+ .matches = match, \
+ }
+
#ifdef CONFIG_ARM64_PTR_AUTH
static const struct arm64_cpu_capabilities ptr_auth_hwcap_addr_matches[] = {
{
- HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_APA_SHIFT,
- FTR_UNSIGNED, ID_AA64ISAR1_APA_ARCHITECTED)
+ HWCAP_CPUID_MATCH(ID_AA64ISAR1_EL1, APA, PAuth)
+ },
+ {
+ HWCAP_CPUID_MATCH(ID_AA64ISAR2_EL1, APA3, PAuth)
},
{
- HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_API_SHIFT,
- FTR_UNSIGNED, ID_AA64ISAR1_API_IMP_DEF)
+ HWCAP_CPUID_MATCH(ID_AA64ISAR1_EL1, API, PAuth)
},
{},
};
static const struct arm64_cpu_capabilities ptr_auth_hwcap_gen_matches[] = {
{
- HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_GPA_SHIFT,
- FTR_UNSIGNED, ID_AA64ISAR1_GPA_ARCHITECTED)
+ HWCAP_CPUID_MATCH(ID_AA64ISAR1_EL1, GPA, IMP)
+ },
+ {
+ HWCAP_CPUID_MATCH(ID_AA64ISAR2_EL1, GPA3, IMP)
},
{
- HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_GPI_SHIFT,
- FTR_UNSIGNED, ID_AA64ISAR1_GPI_IMP_DEF)
+ HWCAP_CPUID_MATCH(ID_AA64ISAR1_EL1, GPI, IMP)
},
{},
};
#endif
static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
- HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_AES_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_PMULL),
- HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_AES_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_AES),
- HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA1_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SHA1),
- HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA2_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SHA2),
- HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA2_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_SHA512),
- HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_CRC32_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_CRC32),
- HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_ATOMICS_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_ATOMICS),
- HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_RDM_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDRDM),
- HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA3_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SHA3),
- HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SM3_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SM3),
- HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SM4_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SM4),
- HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_DP_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDDP),
- HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_FHM_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDFHM),
- HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_TS_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FLAGM),
- HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_TS_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_FLAGM2),
- HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, KERNEL_HWCAP_FP),
- HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FPHP),
- HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, KERNEL_HWCAP_ASIMD),
- HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDHP),
- HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_DIT_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_DIT),
- HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_DPB_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_DCPOP),
- HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_DPB_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_DCPODP),
- HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_JSCVT_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_JSCVT),
- HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_FCMA_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FCMA),
- HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_LRCPC_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_LRCPC),
- HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_LRCPC_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_ILRCPC),
- HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_FRINTTS_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FRINT),
- HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_SB_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SB),
- HWCAP_CAP(SYS_ID_AA64MMFR2_EL1, ID_AA64MMFR2_AT_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_USCAT),
+ HWCAP_CAP(ID_AA64ISAR0_EL1, AES, PMULL, CAP_HWCAP, KERNEL_HWCAP_PMULL),
+ HWCAP_CAP(ID_AA64ISAR0_EL1, AES, AES, CAP_HWCAP, KERNEL_HWCAP_AES),
+ HWCAP_CAP(ID_AA64ISAR0_EL1, SHA1, IMP, CAP_HWCAP, KERNEL_HWCAP_SHA1),
+ HWCAP_CAP(ID_AA64ISAR0_EL1, SHA2, SHA256, CAP_HWCAP, KERNEL_HWCAP_SHA2),
+ HWCAP_CAP(ID_AA64ISAR0_EL1, SHA2, SHA512, CAP_HWCAP, KERNEL_HWCAP_SHA512),
+ HWCAP_CAP(ID_AA64ISAR0_EL1, CRC32, IMP, CAP_HWCAP, KERNEL_HWCAP_CRC32),
+ HWCAP_CAP(ID_AA64ISAR0_EL1, ATOMIC, IMP, CAP_HWCAP, KERNEL_HWCAP_ATOMICS),
+ HWCAP_CAP(ID_AA64ISAR0_EL1, ATOMIC, FEAT_LSE128, CAP_HWCAP, KERNEL_HWCAP_LSE128),
+ HWCAP_CAP(ID_AA64ISAR0_EL1, RDM, IMP, CAP_HWCAP, KERNEL_HWCAP_ASIMDRDM),
+ HWCAP_CAP(ID_AA64ISAR0_EL1, SHA3, IMP, CAP_HWCAP, KERNEL_HWCAP_SHA3),
+ HWCAP_CAP(ID_AA64ISAR0_EL1, SM3, IMP, CAP_HWCAP, KERNEL_HWCAP_SM3),
+ HWCAP_CAP(ID_AA64ISAR0_EL1, SM4, IMP, CAP_HWCAP, KERNEL_HWCAP_SM4),
+ HWCAP_CAP(ID_AA64ISAR0_EL1, DP, IMP, CAP_HWCAP, KERNEL_HWCAP_ASIMDDP),
+ HWCAP_CAP(ID_AA64ISAR0_EL1, FHM, IMP, CAP_HWCAP, KERNEL_HWCAP_ASIMDFHM),
+ HWCAP_CAP(ID_AA64ISAR0_EL1, TS, FLAGM, CAP_HWCAP, KERNEL_HWCAP_FLAGM),
+ HWCAP_CAP(ID_AA64ISAR0_EL1, TS, FLAGM2, CAP_HWCAP, KERNEL_HWCAP_FLAGM2),
+ HWCAP_CAP(ID_AA64ISAR0_EL1, RNDR, IMP, CAP_HWCAP, KERNEL_HWCAP_RNG),
+ HWCAP_CAP(ID_AA64PFR0_EL1, FP, IMP, CAP_HWCAP, KERNEL_HWCAP_FP),
+ HWCAP_CAP(ID_AA64PFR0_EL1, FP, FP16, CAP_HWCAP, KERNEL_HWCAP_FPHP),
+ HWCAP_CAP(ID_AA64PFR0_EL1, AdvSIMD, IMP, CAP_HWCAP, KERNEL_HWCAP_ASIMD),
+ HWCAP_CAP(ID_AA64PFR0_EL1, AdvSIMD, FP16, CAP_HWCAP, KERNEL_HWCAP_ASIMDHP),
+ HWCAP_CAP(ID_AA64PFR0_EL1, DIT, IMP, CAP_HWCAP, KERNEL_HWCAP_DIT),
+ HWCAP_CAP(ID_AA64ISAR1_EL1, DPB, IMP, CAP_HWCAP, KERNEL_HWCAP_DCPOP),
+ HWCAP_CAP(ID_AA64ISAR1_EL1, DPB, DPB2, CAP_HWCAP, KERNEL_HWCAP_DCPODP),
+ HWCAP_CAP(ID_AA64ISAR1_EL1, JSCVT, IMP, CAP_HWCAP, KERNEL_HWCAP_JSCVT),
+ HWCAP_CAP(ID_AA64ISAR1_EL1, FCMA, IMP, CAP_HWCAP, KERNEL_HWCAP_FCMA),
+ HWCAP_CAP(ID_AA64ISAR1_EL1, LRCPC, IMP, CAP_HWCAP, KERNEL_HWCAP_LRCPC),
+ HWCAP_CAP(ID_AA64ISAR1_EL1, LRCPC, LRCPC2, CAP_HWCAP, KERNEL_HWCAP_ILRCPC),
+ HWCAP_CAP(ID_AA64ISAR1_EL1, LRCPC, LRCPC3, CAP_HWCAP, KERNEL_HWCAP_LRCPC3),
+ HWCAP_CAP(ID_AA64ISAR1_EL1, FRINTTS, IMP, CAP_HWCAP, KERNEL_HWCAP_FRINT),
+ HWCAP_CAP(ID_AA64ISAR1_EL1, SB, IMP, CAP_HWCAP, KERNEL_HWCAP_SB),
+ HWCAP_CAP(ID_AA64ISAR1_EL1, BF16, IMP, CAP_HWCAP, KERNEL_HWCAP_BF16),
+ HWCAP_CAP(ID_AA64ISAR1_EL1, BF16, EBF16, CAP_HWCAP, KERNEL_HWCAP_EBF16),
+ HWCAP_CAP(ID_AA64ISAR1_EL1, DGH, IMP, CAP_HWCAP, KERNEL_HWCAP_DGH),
+ HWCAP_CAP(ID_AA64ISAR1_EL1, I8MM, IMP, CAP_HWCAP, KERNEL_HWCAP_I8MM),
+ HWCAP_CAP(ID_AA64MMFR2_EL1, AT, IMP, CAP_HWCAP, KERNEL_HWCAP_USCAT),
#ifdef CONFIG_ARM64_SVE
- HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_SVE_SHIFT, FTR_UNSIGNED, ID_AA64PFR0_SVE, CAP_HWCAP, KERNEL_HWCAP_SVE),
- HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_SVEVER_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_SVEVER_SVE2, CAP_HWCAP, KERNEL_HWCAP_SVE2),
- HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_AES_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_AES, CAP_HWCAP, KERNEL_HWCAP_SVEAES),
- HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_AES_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_AES_PMULL, CAP_HWCAP, KERNEL_HWCAP_SVEPMULL),
- HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_BITPERM_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_BITPERM, CAP_HWCAP, KERNEL_HWCAP_SVEBITPERM),
- HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_SHA3_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_SHA3, CAP_HWCAP, KERNEL_HWCAP_SVESHA3),
- HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_SM4_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_SM4, CAP_HWCAP, KERNEL_HWCAP_SVESM4),
+ HWCAP_CAP(ID_AA64PFR0_EL1, SVE, IMP, CAP_HWCAP, KERNEL_HWCAP_SVE),
+ HWCAP_CAP(ID_AA64ZFR0_EL1, SVEver, SVE2p1, CAP_HWCAP, KERNEL_HWCAP_SVE2P1),
+ HWCAP_CAP(ID_AA64ZFR0_EL1, SVEver, SVE2, CAP_HWCAP, KERNEL_HWCAP_SVE2),
+ HWCAP_CAP(ID_AA64ZFR0_EL1, AES, IMP, CAP_HWCAP, KERNEL_HWCAP_SVEAES),
+ HWCAP_CAP(ID_AA64ZFR0_EL1, AES, PMULL128, CAP_HWCAP, KERNEL_HWCAP_SVEPMULL),
+ HWCAP_CAP(ID_AA64ZFR0_EL1, BitPerm, IMP, CAP_HWCAP, KERNEL_HWCAP_SVEBITPERM),
+ HWCAP_CAP(ID_AA64ZFR0_EL1, B16B16, IMP, CAP_HWCAP, KERNEL_HWCAP_SVE_B16B16),
+ HWCAP_CAP(ID_AA64ZFR0_EL1, BF16, IMP, CAP_HWCAP, KERNEL_HWCAP_SVEBF16),
+ HWCAP_CAP(ID_AA64ZFR0_EL1, BF16, EBF16, CAP_HWCAP, KERNEL_HWCAP_SVE_EBF16),
+ HWCAP_CAP(ID_AA64ZFR0_EL1, SHA3, IMP, CAP_HWCAP, KERNEL_HWCAP_SVESHA3),
+ HWCAP_CAP(ID_AA64ZFR0_EL1, SM4, IMP, CAP_HWCAP, KERNEL_HWCAP_SVESM4),
+ HWCAP_CAP(ID_AA64ZFR0_EL1, I8MM, IMP, CAP_HWCAP, KERNEL_HWCAP_SVEI8MM),
+ HWCAP_CAP(ID_AA64ZFR0_EL1, F32MM, IMP, CAP_HWCAP, KERNEL_HWCAP_SVEF32MM),
+ HWCAP_CAP(ID_AA64ZFR0_EL1, F64MM, IMP, CAP_HWCAP, KERNEL_HWCAP_SVEF64MM),
+#endif
+ HWCAP_CAP(ID_AA64PFR1_EL1, SSBS, SSBS2, CAP_HWCAP, KERNEL_HWCAP_SSBS),
+#ifdef CONFIG_ARM64_BTI
+ HWCAP_CAP(ID_AA64PFR1_EL1, BT, IMP, CAP_HWCAP, KERNEL_HWCAP_BTI),
#endif
- HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_SSBS_SHIFT, FTR_UNSIGNED, ID_AA64PFR1_SSBS_PSTATE_INSNS, CAP_HWCAP, KERNEL_HWCAP_SSBS),
#ifdef CONFIG_ARM64_PTR_AUTH
HWCAP_MULTI_CAP(ptr_auth_hwcap_addr_matches, CAP_HWCAP, KERNEL_HWCAP_PACA),
HWCAP_MULTI_CAP(ptr_auth_hwcap_gen_matches, CAP_HWCAP, KERNEL_HWCAP_PACG),
#endif
+#ifdef CONFIG_ARM64_MTE
+ HWCAP_CAP(ID_AA64PFR1_EL1, MTE, MTE2, CAP_HWCAP, KERNEL_HWCAP_MTE),
+ HWCAP_CAP(ID_AA64PFR1_EL1, MTE, MTE3, CAP_HWCAP, KERNEL_HWCAP_MTE3),
+#endif /* CONFIG_ARM64_MTE */
+ HWCAP_CAP(ID_AA64MMFR0_EL1, ECV, IMP, CAP_HWCAP, KERNEL_HWCAP_ECV),
+ HWCAP_CAP(ID_AA64MMFR1_EL1, AFP, IMP, CAP_HWCAP, KERNEL_HWCAP_AFP),
+ HWCAP_CAP(ID_AA64ISAR2_EL1, CSSC, IMP, CAP_HWCAP, KERNEL_HWCAP_CSSC),
+ HWCAP_CAP(ID_AA64ISAR2_EL1, RPRFM, IMP, CAP_HWCAP, KERNEL_HWCAP_RPRFM),
+ HWCAP_CAP(ID_AA64ISAR2_EL1, RPRES, IMP, CAP_HWCAP, KERNEL_HWCAP_RPRES),
+ HWCAP_CAP(ID_AA64ISAR2_EL1, WFxT, IMP, CAP_HWCAP, KERNEL_HWCAP_WFXT),
+ HWCAP_CAP(ID_AA64ISAR2_EL1, MOPS, IMP, CAP_HWCAP, KERNEL_HWCAP_MOPS),
+ HWCAP_CAP(ID_AA64ISAR2_EL1, BC, IMP, CAP_HWCAP, KERNEL_HWCAP_HBC),
+#ifdef CONFIG_ARM64_SME
+ HWCAP_CAP(ID_AA64PFR1_EL1, SME, IMP, CAP_HWCAP, KERNEL_HWCAP_SME),
+ HWCAP_CAP(ID_AA64SMFR0_EL1, FA64, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_FA64),
+ HWCAP_CAP(ID_AA64SMFR0_EL1, SMEver, SME2p1, CAP_HWCAP, KERNEL_HWCAP_SME2P1),
+ HWCAP_CAP(ID_AA64SMFR0_EL1, SMEver, SME2, CAP_HWCAP, KERNEL_HWCAP_SME2),
+ HWCAP_CAP(ID_AA64SMFR0_EL1, I16I64, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_I16I64),
+ HWCAP_CAP(ID_AA64SMFR0_EL1, F64F64, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F64F64),
+ HWCAP_CAP(ID_AA64SMFR0_EL1, I16I32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_I16I32),
+ HWCAP_CAP(ID_AA64SMFR0_EL1, B16B16, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_B16B16),
+ HWCAP_CAP(ID_AA64SMFR0_EL1, F16F16, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F16F16),
+ HWCAP_CAP(ID_AA64SMFR0_EL1, I8I32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_I8I32),
+ HWCAP_CAP(ID_AA64SMFR0_EL1, F16F32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F16F32),
+ HWCAP_CAP(ID_AA64SMFR0_EL1, B16F32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_B16F32),
+ HWCAP_CAP(ID_AA64SMFR0_EL1, BI32I32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_BI32I32),
+ HWCAP_CAP(ID_AA64SMFR0_EL1, F32F32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F32F32),
+#endif /* CONFIG_ARM64_SME */
{},
};
+#ifdef CONFIG_COMPAT
+static bool compat_has_neon(const struct arm64_cpu_capabilities *cap, int scope)
+{
+ /*
+ * Check that all of MVFR1_EL1.{SIMDSP, SIMDInt, SIMDLS} are available,
+ * in line with that of arm32 as in vfp_init(). We make sure that the
+ * check is future proof, by making sure value is non-zero.
+ */
+ u32 mvfr1;
+
+ WARN_ON(scope == SCOPE_LOCAL_CPU && preemptible());
+ if (scope == SCOPE_SYSTEM)
+ mvfr1 = read_sanitised_ftr_reg(SYS_MVFR1_EL1);
+ else
+ mvfr1 = read_sysreg_s(SYS_MVFR1_EL1);
+
+ return cpuid_feature_extract_unsigned_field(mvfr1, MVFR1_EL1_SIMDSP_SHIFT) &&
+ cpuid_feature_extract_unsigned_field(mvfr1, MVFR1_EL1_SIMDInt_SHIFT) &&
+ cpuid_feature_extract_unsigned_field(mvfr1, MVFR1_EL1_SIMDLS_SHIFT);
+}
+#endif
+
static const struct arm64_cpu_capabilities compat_elf_hwcaps[] = {
#ifdef CONFIG_COMPAT
- HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_AES_SHIFT, FTR_UNSIGNED, 2, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_PMULL),
- HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_AES_SHIFT, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_AES),
- HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_SHA1_SHIFT, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_SHA1),
- HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_SHA2_SHIFT, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_SHA2),
- HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_CRC32_SHIFT, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_CRC32),
+ HWCAP_CAP_MATCH(compat_has_neon, CAP_COMPAT_HWCAP, COMPAT_HWCAP_NEON),
+ HWCAP_CAP(MVFR1_EL1, SIMDFMAC, IMP, CAP_COMPAT_HWCAP, COMPAT_HWCAP_VFPv4),
+ /* Arm v8 mandates MVFR0.FPDP == {0, 2}. So, piggy back on this for the presence of VFP support */
+ HWCAP_CAP(MVFR0_EL1, FPDP, VFPv3, CAP_COMPAT_HWCAP, COMPAT_HWCAP_VFP),
+ HWCAP_CAP(MVFR0_EL1, FPDP, VFPv3, CAP_COMPAT_HWCAP, COMPAT_HWCAP_VFPv3),
+ HWCAP_CAP(MVFR1_EL1, FPHP, FP16, CAP_COMPAT_HWCAP, COMPAT_HWCAP_FPHP),
+ HWCAP_CAP(MVFR1_EL1, SIMDHP, SIMDHP_FLOAT, CAP_COMPAT_HWCAP, COMPAT_HWCAP_ASIMDHP),
+ HWCAP_CAP(ID_ISAR5_EL1, AES, VMULL, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_PMULL),
+ HWCAP_CAP(ID_ISAR5_EL1, AES, IMP, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_AES),
+ HWCAP_CAP(ID_ISAR5_EL1, SHA1, IMP, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_SHA1),
+ HWCAP_CAP(ID_ISAR5_EL1, SHA2, IMP, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_SHA2),
+ HWCAP_CAP(ID_ISAR5_EL1, CRC32, IMP, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_CRC32),
+ HWCAP_CAP(ID_ISAR6_EL1, DP, IMP, CAP_COMPAT_HWCAP, COMPAT_HWCAP_ASIMDDP),
+ HWCAP_CAP(ID_ISAR6_EL1, FHM, IMP, CAP_COMPAT_HWCAP, COMPAT_HWCAP_ASIMDFHM),
+ HWCAP_CAP(ID_ISAR6_EL1, SB, IMP, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_SB),
+ HWCAP_CAP(ID_ISAR6_EL1, BF16, IMP, CAP_COMPAT_HWCAP, COMPAT_HWCAP_ASIMDBF16),
+ HWCAP_CAP(ID_ISAR6_EL1, I8MM, IMP, CAP_COMPAT_HWCAP, COMPAT_HWCAP_I8MM),
+ HWCAP_CAP(ID_PFR2_EL1, SSBS, IMP, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_SSBS),
#endif
{},
};
-static void __init cap_set_elf_hwcap(const struct arm64_cpu_capabilities *cap)
+static void cap_set_elf_hwcap(const struct arm64_cpu_capabilities *cap)
{
switch (cap->hwcap_type) {
case CAP_HWCAP:
@@ -1725,7 +2982,7 @@ static bool cpus_have_elf_hwcap(const struct arm64_cpu_capabilities *cap)
return rc;
}
-static void __init setup_elf_hwcaps(const struct arm64_cpu_capabilities *hwcaps)
+static void setup_elf_hwcaps(const struct arm64_cpu_capabilities *hwcaps)
{
/* We support emulation of accesses to CPU ID feature registers */
cpu_set_named_feature(CPUID);
@@ -1741,18 +2998,19 @@ static void update_cpu_capabilities(u16 scope_mask)
scope_mask &= ARM64_CPUCAP_SCOPE_MASK;
for (i = 0; i < ARM64_NCAPS; i++) {
- caps = cpu_hwcaps_ptrs[i];
+ caps = cpucap_ptrs[i];
if (!caps || !(caps->type & scope_mask) ||
cpus_have_cap(caps->capability) ||
!caps->matches(caps, cpucap_default_scope(caps)))
continue;
- if (caps->desc)
+ if (caps->desc && !caps->cpus)
pr_info("detected: %s\n", caps->desc);
- cpus_set_cap(caps->capability);
+
+ __set_bit(caps->capability, system_cpucaps);
if ((scope_mask & SCOPE_BOOT_CPU) && (caps->type & SCOPE_BOOT_CPU))
- set_bit(caps->capability, boot_capabilities);
+ set_bit(caps->capability, boot_cpucaps);
}
}
@@ -1766,7 +3024,7 @@ static int cpu_enable_non_boot_scope_capabilities(void *__unused)
u16 non_boot_scope = SCOPE_ALL & ~SCOPE_BOOT_CPU;
for_each_available_cap(i) {
- const struct arm64_cpu_capabilities *cap = cpu_hwcaps_ptrs[i];
+ const struct arm64_cpu_capabilities *cap = cpucap_ptrs[i];
if (WARN_ON(!cap))
continue;
@@ -1796,16 +3054,13 @@ static void __init enable_cpu_capabilities(u16 scope_mask)
for (i = 0; i < ARM64_NCAPS; i++) {
unsigned int num;
- caps = cpu_hwcaps_ptrs[i];
+ caps = cpucap_ptrs[i];
if (!caps || !(caps->type & scope_mask))
continue;
num = caps->capability;
if (!cpus_have_cap(num))
continue;
- /* Ensure cpus_have_const_cap(num) works */
- static_branch_enable(&cpu_hwcap_keys[num]);
-
if (boot_scope && caps->cpu_enable)
/*
* Capabilities with SCOPE_BOOT_CPU scope are finalised
@@ -1834,10 +3089,8 @@ static void __init enable_cpu_capabilities(u16 scope_mask)
* Run through the list of capabilities to check for conflicts.
* If the system has already detected a capability, take necessary
* action on this CPU.
- *
- * Returns "false" on conflicts.
*/
-static bool verify_local_cpu_caps(u16 scope_mask)
+static void verify_local_cpu_caps(u16 scope_mask)
{
int i;
bool cpu_has_cap, system_has_cap;
@@ -1846,7 +3099,7 @@ static bool verify_local_cpu_caps(u16 scope_mask)
scope_mask &= ARM64_CPUCAP_SCOPE_MASK;
for (i = 0; i < ARM64_NCAPS; i++) {
- caps = cpu_hwcaps_ptrs[i];
+ caps = cpucap_ptrs[i];
if (!caps || !(caps->type & scope_mask))
continue;
@@ -1882,10 +3135,12 @@ static bool verify_local_cpu_caps(u16 scope_mask)
pr_crit("CPU%d: Detected conflict for capability %d (%s), System: %d, CPU: %d\n",
smp_processor_id(), caps->capability,
caps->desc, system_has_cap, cpu_has_cap);
- return false;
- }
- return true;
+ if (cpucap_panic_on_conflict(caps))
+ cpu_panic_kernel();
+ else
+ cpu_die_early();
+ }
}
/*
@@ -1895,16 +3150,12 @@ static bool verify_local_cpu_caps(u16 scope_mask)
static void check_early_cpu_features(void)
{
verify_cpu_asid_bits();
- /*
- * Early features are used by the kernel already. If there
- * is a conflict, we cannot proceed further.
- */
- if (!verify_local_cpu_caps(SCOPE_BOOT_CPU))
- cpu_panic_kernel();
+
+ verify_local_cpu_caps(SCOPE_BOOT_CPU);
}
static void
-verify_local_elf_hwcaps(const struct arm64_cpu_capabilities *caps)
+__verify_local_elf_hwcaps(const struct arm64_cpu_capabilities *caps)
{
for (; caps->matches; caps++)
@@ -1915,23 +3166,70 @@ verify_local_elf_hwcaps(const struct arm64_cpu_capabilities *caps)
}
}
-static void verify_sve_features(void)
+static void verify_local_elf_hwcaps(void)
{
- u64 safe_zcr = read_sanitised_ftr_reg(SYS_ZCR_EL1);
- u64 zcr = read_zcr_features();
+ __verify_local_elf_hwcaps(arm64_elf_hwcaps);
- unsigned int safe_len = safe_zcr & ZCR_ELx_LEN_MASK;
- unsigned int len = zcr & ZCR_ELx_LEN_MASK;
+ if (id_aa64pfr0_32bit_el0(read_cpuid(ID_AA64PFR0_EL1)))
+ __verify_local_elf_hwcaps(compat_elf_hwcaps);
+}
- if (len < safe_len || sve_verify_vq_map()) {
+static void verify_sve_features(void)
+{
+ unsigned long cpacr = cpacr_save_enable_kernel_sve();
+
+ if (vec_verify_vq_map(ARM64_VEC_SVE)) {
pr_crit("CPU%d: SVE: vector length support mismatch\n",
smp_processor_id());
cpu_die_early();
}
- /* Add checks on other ZCR bits here if necessary */
+ cpacr_restore(cpacr);
+}
+
+static void verify_sme_features(void)
+{
+ unsigned long cpacr = cpacr_save_enable_kernel_sme();
+
+ if (vec_verify_vq_map(ARM64_VEC_SME)) {
+ pr_crit("CPU%d: SME: vector length support mismatch\n",
+ smp_processor_id());
+ cpu_die_early();
+ }
+
+ cpacr_restore(cpacr);
}
+static void verify_hyp_capabilities(void)
+{
+ u64 safe_mmfr1, mmfr0, mmfr1;
+ int parange, ipa_max;
+ unsigned int safe_vmid_bits, vmid_bits;
+
+ if (!IS_ENABLED(CONFIG_KVM))
+ return;
+
+ safe_mmfr1 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1);
+ mmfr0 = read_cpuid(ID_AA64MMFR0_EL1);
+ mmfr1 = read_cpuid(ID_AA64MMFR1_EL1);
+
+ /* Verify VMID bits */
+ safe_vmid_bits = get_vmid_bits(safe_mmfr1);
+ vmid_bits = get_vmid_bits(mmfr1);
+ if (vmid_bits < safe_vmid_bits) {
+ pr_crit("CPU%d: VMID width mismatch\n", smp_processor_id());
+ cpu_die_early();
+ }
+
+ /* Verify IPA range */
+ parange = cpuid_feature_extract_unsigned_field(mmfr0,
+ ID_AA64MMFR0_EL1_PARANGE_SHIFT);
+ ipa_max = id_aa64mmfr0_parange_to_phys_shift(parange);
+ if (ipa_max < get_kvm_ipa_limit()) {
+ pr_crit("CPU%d: IPA range mismatch\n", smp_processor_id());
+ cpu_die_early();
+ }
+}
/*
* Run through the enabled system capabilities and enable() it on this CPU.
@@ -1948,16 +3246,17 @@ static void verify_local_cpu_capabilities(void)
* check_early_cpu_features(), as they need to be verified
* on all secondary CPUs.
*/
- if (!verify_local_cpu_caps(SCOPE_ALL & ~SCOPE_BOOT_CPU))
- cpu_die_early();
-
- verify_local_elf_hwcaps(arm64_elf_hwcaps);
-
- if (system_supports_32bit_el0())
- verify_local_elf_hwcaps(compat_elf_hwcaps);
+ verify_local_cpu_caps(SCOPE_ALL & ~SCOPE_BOOT_CPU);
+ verify_local_elf_hwcaps();
if (system_supports_sve())
verify_sve_features();
+
+ if (system_supports_sme())
+ verify_sme_features();
+
+ if (is_hyp_mode_available())
+ verify_hyp_capabilities();
}
void check_local_cpu_capabilities(void)
@@ -1974,51 +3273,49 @@ void check_local_cpu_capabilities(void)
* Otherwise, this CPU should verify that it has all the system
* advertised capabilities.
*/
- if (!sys_caps_initialised)
+ if (!system_capabilities_finalized())
update_cpu_capabilities(SCOPE_LOCAL_CPU);
else
verify_local_cpu_capabilities();
}
-static void __init setup_boot_cpu_capabilities(void)
+bool this_cpu_has_cap(unsigned int n)
{
- /* Detect capabilities with either SCOPE_BOOT_CPU or SCOPE_LOCAL_CPU */
- update_cpu_capabilities(SCOPE_BOOT_CPU | SCOPE_LOCAL_CPU);
- /* Enable the SCOPE_BOOT_CPU capabilities alone right away */
- enable_cpu_capabilities(SCOPE_BOOT_CPU);
-}
+ if (!WARN_ON(preemptible()) && n < ARM64_NCAPS) {
+ const struct arm64_cpu_capabilities *cap = cpucap_ptrs[n];
-DEFINE_STATIC_KEY_FALSE(arm64_const_caps_ready);
-EXPORT_SYMBOL(arm64_const_caps_ready);
+ if (cap)
+ return cap->matches(cap, SCOPE_LOCAL_CPU);
+ }
-static void __init mark_const_caps_ready(void)
-{
- static_branch_enable(&arm64_const_caps_ready);
+ return false;
}
+EXPORT_SYMBOL_GPL(this_cpu_has_cap);
-bool this_cpu_has_cap(unsigned int n)
+/*
+ * This helper function is used in a narrow window when,
+ * - The system wide safe registers are set with all the SMP CPUs and,
+ * - The SYSTEM_FEATURE system_cpucaps may not have been set.
+ */
+static bool __maybe_unused __system_matches_cap(unsigned int n)
{
- if (!WARN_ON(preemptible()) && n < ARM64_NCAPS) {
- const struct arm64_cpu_capabilities *cap = cpu_hwcaps_ptrs[n];
+ if (n < ARM64_NCAPS) {
+ const struct arm64_cpu_capabilities *cap = cpucap_ptrs[n];
if (cap)
- return cap->matches(cap, SCOPE_LOCAL_CPU);
+ return cap->matches(cap, SCOPE_SYSTEM);
}
-
return false;
}
void cpu_set_feature(unsigned int num)
{
- WARN_ON(num >= MAX_CPU_FEATURES);
- elf_hwcap |= BIT(num);
+ set_bit(num, elf_hwcap);
}
-EXPORT_SYMBOL_GPL(cpu_set_feature);
bool cpu_have_feature(unsigned int num)
{
- WARN_ON(num >= MAX_CPU_FEATURES);
- return elf_hwcap & BIT(num);
+ return test_bit(num, elf_hwcap);
}
EXPORT_SYMBOL_GPL(cpu_have_feature);
@@ -2029,69 +3326,166 @@ unsigned long cpu_get_elf_hwcap(void)
* note that for userspace compatibility we guarantee that bits 62
* and 63 will always be returned as 0.
*/
- return lower_32_bits(elf_hwcap);
+ return elf_hwcap[0];
}
unsigned long cpu_get_elf_hwcap2(void)
{
- return upper_32_bits(elf_hwcap);
+ return elf_hwcap[1];
}
-static void __init setup_system_capabilities(void)
+static void __init setup_boot_cpu_capabilities(void)
{
/*
- * We have finalised the system-wide safe feature
- * registers, finalise the capabilities that depend
- * on it. Also enable all the available capabilities,
- * that are not enabled already.
+ * The boot CPU's feature register values have been recorded. Detect
+ * boot cpucaps and local cpucaps for the boot CPU, then enable and
+ * patch alternatives for the available boot cpucaps.
*/
- update_cpu_capabilities(SCOPE_SYSTEM);
- enable_cpu_capabilities(SCOPE_ALL & ~SCOPE_BOOT_CPU);
+ update_cpu_capabilities(SCOPE_BOOT_CPU | SCOPE_LOCAL_CPU);
+ enable_cpu_capabilities(SCOPE_BOOT_CPU);
+ apply_boot_alternatives();
}
-void __init setup_cpu_features(void)
+void __init setup_boot_cpu_features(void)
{
- u32 cwg;
+ /*
+ * Initialize the indirect array of CPU capabilities pointers before we
+ * handle the boot CPU.
+ */
+ init_cpucap_indirect_list();
- setup_system_capabilities();
- mark_const_caps_ready();
- setup_elf_hwcaps(arm64_elf_hwcaps);
+ /*
+ * Detect broken pseudo-NMI. Must be called _before_ the call to
+ * setup_boot_cpu_capabilities() since it interacts with
+ * can_use_gic_priorities().
+ */
+ detect_system_supports_pseudo_nmi();
- if (system_supports_32bit_el0())
- setup_elf_hwcaps(compat_elf_hwcaps);
+ setup_boot_cpu_capabilities();
+}
+static void __init setup_system_capabilities(void)
+{
+ /*
+ * The system-wide safe feature register values have been finalized.
+ * Detect, enable, and patch alternatives for the available system
+ * cpucaps.
+ */
+ update_cpu_capabilities(SCOPE_SYSTEM);
+ enable_cpu_capabilities(SCOPE_ALL & ~SCOPE_BOOT_CPU);
+ apply_alternatives_all();
+
+ /*
+ * Log any cpucaps with a cpumask as these aren't logged by
+ * update_cpu_capabilities().
+ */
+ for (int i = 0; i < ARM64_NCAPS; i++) {
+ const struct arm64_cpu_capabilities *caps = cpucap_ptrs[i];
+
+ if (caps && caps->cpus && caps->desc &&
+ cpumask_any(caps->cpus) < nr_cpu_ids)
+ pr_info("detected: %s on CPU%*pbl\n",
+ caps->desc, cpumask_pr_args(caps->cpus));
+ }
+
+ /*
+ * TTBR0 PAN doesn't have its own cpucap, so log it manually.
+ */
if (system_uses_ttbr0_pan())
pr_info("emulated: Privileged Access Never (PAN) using TTBR0_EL1 switching\n");
+}
- sve_setup();
- minsigstksz_setup();
+void __init setup_system_features(void)
+{
+ setup_system_capabilities();
- /* Advertise that we have computed the system capabilities */
- set_sys_caps_initialised();
+ kpti_install_ng_mappings();
+
+ sve_setup();
+ sme_setup();
/*
* Check for sane CTR_EL0.CWG value.
*/
- cwg = cache_type_cwg();
- if (!cwg)
+ if (!cache_type_cwg())
pr_warn("No Cache Writeback Granule information, assuming %d\n",
ARCH_DMA_MINALIGN);
}
-static bool __maybe_unused
-cpufeature_pan_not_uao(const struct arm64_cpu_capabilities *entry, int __unused)
+void __init setup_user_features(void)
{
- return (cpus_have_const_cap(ARM64_HAS_PAN) && !cpus_have_const_cap(ARM64_HAS_UAO));
+ user_feature_fixup();
+
+ setup_elf_hwcaps(arm64_elf_hwcaps);
+
+ if (system_supports_32bit_el0()) {
+ setup_elf_hwcaps(compat_elf_hwcaps);
+ elf_hwcap_fixup();
+ }
+
+ minsigstksz_setup();
}
+static int enable_mismatched_32bit_el0(unsigned int cpu)
+{
+ /*
+ * The first 32-bit-capable CPU we detected and so can no longer
+ * be offlined by userspace. -1 indicates we haven't yet onlined
+ * a 32-bit-capable CPU.
+ */
+ static int lucky_winner = -1;
+
+ struct cpuinfo_arm64 *info = &per_cpu(cpu_data, cpu);
+ bool cpu_32bit = id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0);
+
+ if (cpu_32bit) {
+ cpumask_set_cpu(cpu, cpu_32bit_el0_mask);
+ static_branch_enable_cpuslocked(&arm64_mismatched_32bit_el0);
+ }
+
+ if (cpumask_test_cpu(0, cpu_32bit_el0_mask) == cpu_32bit)
+ return 0;
+
+ if (lucky_winner >= 0)
+ return 0;
+
+ /*
+ * We've detected a mismatch. We need to keep one of our CPUs with
+ * 32-bit EL0 online so that is_cpu_allowed() doesn't end up rejecting
+ * every CPU in the system for a 32-bit task.
+ */
+ lucky_winner = cpu_32bit ? cpu : cpumask_any_and(cpu_32bit_el0_mask,
+ cpu_active_mask);
+ get_cpu_device(lucky_winner)->offline_disabled = true;
+ setup_elf_hwcaps(compat_elf_hwcaps);
+ elf_hwcap_fixup();
+ pr_info("Asymmetric 32-bit EL0 support detected on CPU %u; CPU hot-unplug disabled on CPU %u\n",
+ cpu, lucky_winner);
+ return 0;
+}
+
+static int __init init_32bit_el0_mask(void)
+{
+ if (!allow_mismatched_32bit_el0)
+ return 0;
+
+ if (!zalloc_cpumask_var(&cpu_32bit_el0_mask, GFP_KERNEL))
+ return -ENOMEM;
+
+ return cpuhp_setup_state(CPUHP_AP_ONLINE_DYN,
+ "arm64/mismatched_32bit_el0:online",
+ enable_mismatched_32bit_el0, NULL);
+}
+subsys_initcall_sync(init_32bit_el0_mask);
+
static void __maybe_unused cpu_enable_cnp(struct arm64_cpu_capabilities const *cap)
{
- cpu_replace_ttbr1(lm_alias(swapper_pg_dir));
+ cpu_enable_swapper_cnp();
}
/*
* We emulate only the following system register space.
- * Op0 = 0x3, CRn = 0x0, Op1 = 0x0, CRm = [0, 4 - 7]
+ * Op0 = 0x3, CRn = 0x0, Op1 = 0x0, CRm = [0, 2 - 7]
* See Table C5-6 System instruction encodings for System register accesses,
* ARMv8 ARM(ARM DDI 0487A.f) for more details.
*/
@@ -2101,7 +3495,7 @@ static inline bool __attribute_const__ is_emulated(u32 id)
sys_reg_CRn(id) == 0x0 &&
sys_reg_Op1(id) == 0x0 &&
(sys_reg_CRm(id) == 0 ||
- ((sys_reg_CRm(id) >= 4) && (sys_reg_CRm(id) <= 7))));
+ ((sys_reg_CRm(id) >= 2) && (sys_reg_CRm(id) <= 7))));
}
/*
@@ -2138,7 +3532,7 @@ static int emulate_sys_reg(u32 id, u64 *valp)
if (sys_reg_CRm(id) == 0)
return emulate_id_reg(id, valp);
- regp = get_arm64_ftr_reg(id);
+ regp = get_arm64_ftr_reg_nowarn(id);
if (regp)
*valp = arm64_ftr_reg_user_value(regp);
else
@@ -2163,43 +3557,44 @@ int do_emulate_mrs(struct pt_regs *regs, u32 sys_reg, u32 rt)
return rc;
}
-static int emulate_mrs(struct pt_regs *regs, u32 insn)
+bool try_emulate_mrs(struct pt_regs *regs, u32 insn)
{
u32 sys_reg, rt;
+ if (compat_user_mode(regs) || !aarch64_insn_is_mrs(insn))
+ return false;
+
/*
* sys_reg values are defined as used in mrs/msr instruction.
* shift the imm value to get the encoding.
*/
sys_reg = (u32)aarch64_insn_decode_immediate(AARCH64_INSN_IMM_16, insn) << 5;
rt = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RT, insn);
- return do_emulate_mrs(regs, sys_reg, rt);
+ return do_emulate_mrs(regs, sys_reg, rt) == 0;
}
-static struct undef_hook mrs_hook = {
- .instr_mask = 0xfff00000,
- .instr_val = 0xd5300000,
- .pstate_mask = PSR_AA32_MODE_MASK,
- .pstate_val = PSR_MODE_EL0t,
- .fn = emulate_mrs,
-};
-
-static int __init enable_mrs_emulation(void)
+enum mitigation_state arm64_get_meltdown_state(void)
{
- register_undef_hook(&mrs_hook);
- return 0;
-}
+ if (__meltdown_safe)
+ return SPECTRE_UNAFFECTED;
+
+ if (arm64_kernel_unmapped_at_el0())
+ return SPECTRE_MITIGATED;
-core_initcall(enable_mrs_emulation);
+ return SPECTRE_VULNERABLE;
+}
ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr,
char *buf)
{
- if (__meltdown_safe)
+ switch (arm64_get_meltdown_state()) {
+ case SPECTRE_UNAFFECTED:
return sprintf(buf, "Not affected\n");
- if (arm64_kernel_unmapped_at_el0())
+ case SPECTRE_MITIGATED:
return sprintf(buf, "Mitigation: PTI\n");
- return sprintf(buf, "Vulnerable\n");
+ default:
+ return sprintf(buf, "Vulnerable\n");
+ }
}
diff --git a/arch/arm64/kernel/cpuidle.c b/arch/arm64/kernel/cpuidle.c
index e4d6af2fdec7..f372295207fb 100644
--- a/arch/arm64/kernel/cpuidle.c
+++ b/arch/arm64/kernel/cpuidle.c
@@ -9,39 +9,9 @@
#include <linux/acpi.h>
#include <linux/cpuidle.h>
#include <linux/cpu_pm.h>
-#include <linux/of.h>
-#include <linux/of_device.h>
#include <linux/psci.h>
-#include <asm/cpuidle.h>
-#include <asm/cpu_ops.h>
-
-int arm_cpuidle_init(unsigned int cpu)
-{
- int ret = -EOPNOTSUPP;
-
- if (cpu_ops[cpu] && cpu_ops[cpu]->cpu_suspend &&
- cpu_ops[cpu]->cpu_init_idle)
- ret = cpu_ops[cpu]->cpu_init_idle(cpu);
-
- return ret;
-}
-
-/**
- * arm_cpuidle_suspend() - function to enter a low-power idle state
- * @arg: argument to pass to CPU suspend operations
- *
- * Return: 0 on success, -EOPNOTSUPP if CPU suspend hook not initialized, CPU
- * operations back-end error code otherwise.
- */
-int arm_cpuidle_suspend(int index)
-{
- int cpu = smp_processor_id();
-
- return cpu_ops[cpu]->cpu_suspend(index);
-}
-
-#ifdef CONFIG_ACPI
+#ifdef CONFIG_ACPI_PROCESSOR_IDLE
#include <acpi/processor.h>
@@ -53,6 +23,9 @@ static int psci_acpi_cpu_init_idle(unsigned int cpu)
struct acpi_lpi_state *lpi;
struct acpi_processor *pr = per_cpu(processors, cpu);
+ if (unlikely(!pr || !pr->flags.has_lpi))
+ return -EINVAL;
+
/*
* If the PSCI cpu_suspend function hook has not been initialized
* idle states must not be enabled, so bail out
@@ -60,9 +33,6 @@ static int psci_acpi_cpu_init_idle(unsigned int cpu)
if (!psci_ops.cpu_suspend)
return -EOPNOTSUPP;
- if (unlikely(!pr || !pr->flags.has_lpi))
- return -EINVAL;
-
count = pr->power.count - 1;
if (count <= 0)
return -ENODEV;
@@ -90,15 +60,15 @@ int acpi_processor_ffh_lpi_probe(unsigned int cpu)
return psci_acpi_cpu_init_idle(cpu);
}
-int acpi_processor_ffh_lpi_enter(struct acpi_lpi_state *lpi)
+__cpuidle int acpi_processor_ffh_lpi_enter(struct acpi_lpi_state *lpi)
{
u32 state = lpi->address;
if (ARM64_LPI_IS_RETENTION_STATE(lpi->arch_flags))
- return CPU_PM_CPU_IDLE_ENTER_RETENTION_PARAM(psci_cpu_suspend_enter,
+ return CPU_PM_CPU_IDLE_ENTER_RETENTION_PARAM_RCU(psci_cpu_suspend_enter,
lpi->index, state);
else
- return CPU_PM_CPU_IDLE_ENTER_PARAM(psci_cpu_suspend_enter,
+ return CPU_PM_CPU_IDLE_ENTER_PARAM_RCU(psci_cpu_suspend_enter,
lpi->index, state);
}
#endif
diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c
index 56bba746da1c..47043c0d95ec 100644
--- a/arch/arm64/kernel/cpuinfo.c
+++ b/arch/arm64/kernel/cpuinfo.c
@@ -33,94 +33,145 @@
DEFINE_PER_CPU(struct cpuinfo_arm64, cpu_data);
static struct cpuinfo_arm64 boot_cpu_data;
-static const char *icache_policy_str[] = {
- [0 ... ICACHE_POLICY_PIPT] = "RESERVED/UNKNOWN",
- [ICACHE_POLICY_VIPT] = "VIPT",
- [ICACHE_POLICY_PIPT] = "PIPT",
- [ICACHE_POLICY_VPIPT] = "VPIPT",
-};
+static inline const char *icache_policy_str(int l1ip)
+{
+ switch (l1ip) {
+ case CTR_EL0_L1Ip_VIPT:
+ return "VIPT";
+ case CTR_EL0_L1Ip_PIPT:
+ return "PIPT";
+ default:
+ return "RESERVED/UNKNOWN";
+ }
+}
unsigned long __icache_flags;
static const char *const hwcap_str[] = {
- "fp",
- "asimd",
- "evtstrm",
- "aes",
- "pmull",
- "sha1",
- "sha2",
- "crc32",
- "atomics",
- "fphp",
- "asimdhp",
- "cpuid",
- "asimdrdm",
- "jscvt",
- "fcma",
- "lrcpc",
- "dcpop",
- "sha3",
- "sm3",
- "sm4",
- "asimddp",
- "sha512",
- "sve",
- "asimdfhm",
- "dit",
- "uscat",
- "ilrcpc",
- "flagm",
- "ssbs",
- "sb",
- "paca",
- "pacg",
- "dcpodp",
- "sve2",
- "sveaes",
- "svepmull",
- "svebitperm",
- "svesha3",
- "svesm4",
- "flagm2",
- "frint",
- NULL
+ [KERNEL_HWCAP_FP] = "fp",
+ [KERNEL_HWCAP_ASIMD] = "asimd",
+ [KERNEL_HWCAP_EVTSTRM] = "evtstrm",
+ [KERNEL_HWCAP_AES] = "aes",
+ [KERNEL_HWCAP_PMULL] = "pmull",
+ [KERNEL_HWCAP_SHA1] = "sha1",
+ [KERNEL_HWCAP_SHA2] = "sha2",
+ [KERNEL_HWCAP_CRC32] = "crc32",
+ [KERNEL_HWCAP_ATOMICS] = "atomics",
+ [KERNEL_HWCAP_FPHP] = "fphp",
+ [KERNEL_HWCAP_ASIMDHP] = "asimdhp",
+ [KERNEL_HWCAP_CPUID] = "cpuid",
+ [KERNEL_HWCAP_ASIMDRDM] = "asimdrdm",
+ [KERNEL_HWCAP_JSCVT] = "jscvt",
+ [KERNEL_HWCAP_FCMA] = "fcma",
+ [KERNEL_HWCAP_LRCPC] = "lrcpc",
+ [KERNEL_HWCAP_DCPOP] = "dcpop",
+ [KERNEL_HWCAP_SHA3] = "sha3",
+ [KERNEL_HWCAP_SM3] = "sm3",
+ [KERNEL_HWCAP_SM4] = "sm4",
+ [KERNEL_HWCAP_ASIMDDP] = "asimddp",
+ [KERNEL_HWCAP_SHA512] = "sha512",
+ [KERNEL_HWCAP_SVE] = "sve",
+ [KERNEL_HWCAP_ASIMDFHM] = "asimdfhm",
+ [KERNEL_HWCAP_DIT] = "dit",
+ [KERNEL_HWCAP_USCAT] = "uscat",
+ [KERNEL_HWCAP_ILRCPC] = "ilrcpc",
+ [KERNEL_HWCAP_FLAGM] = "flagm",
+ [KERNEL_HWCAP_SSBS] = "ssbs",
+ [KERNEL_HWCAP_SB] = "sb",
+ [KERNEL_HWCAP_PACA] = "paca",
+ [KERNEL_HWCAP_PACG] = "pacg",
+ [KERNEL_HWCAP_DCPODP] = "dcpodp",
+ [KERNEL_HWCAP_SVE2] = "sve2",
+ [KERNEL_HWCAP_SVEAES] = "sveaes",
+ [KERNEL_HWCAP_SVEPMULL] = "svepmull",
+ [KERNEL_HWCAP_SVEBITPERM] = "svebitperm",
+ [KERNEL_HWCAP_SVESHA3] = "svesha3",
+ [KERNEL_HWCAP_SVESM4] = "svesm4",
+ [KERNEL_HWCAP_FLAGM2] = "flagm2",
+ [KERNEL_HWCAP_FRINT] = "frint",
+ [KERNEL_HWCAP_SVEI8MM] = "svei8mm",
+ [KERNEL_HWCAP_SVEF32MM] = "svef32mm",
+ [KERNEL_HWCAP_SVEF64MM] = "svef64mm",
+ [KERNEL_HWCAP_SVEBF16] = "svebf16",
+ [KERNEL_HWCAP_I8MM] = "i8mm",
+ [KERNEL_HWCAP_BF16] = "bf16",
+ [KERNEL_HWCAP_DGH] = "dgh",
+ [KERNEL_HWCAP_RNG] = "rng",
+ [KERNEL_HWCAP_BTI] = "bti",
+ [KERNEL_HWCAP_MTE] = "mte",
+ [KERNEL_HWCAP_ECV] = "ecv",
+ [KERNEL_HWCAP_AFP] = "afp",
+ [KERNEL_HWCAP_RPRES] = "rpres",
+ [KERNEL_HWCAP_MTE3] = "mte3",
+ [KERNEL_HWCAP_SME] = "sme",
+ [KERNEL_HWCAP_SME_I16I64] = "smei16i64",
+ [KERNEL_HWCAP_SME_F64F64] = "smef64f64",
+ [KERNEL_HWCAP_SME_I8I32] = "smei8i32",
+ [KERNEL_HWCAP_SME_F16F32] = "smef16f32",
+ [KERNEL_HWCAP_SME_B16F32] = "smeb16f32",
+ [KERNEL_HWCAP_SME_F32F32] = "smef32f32",
+ [KERNEL_HWCAP_SME_FA64] = "smefa64",
+ [KERNEL_HWCAP_WFXT] = "wfxt",
+ [KERNEL_HWCAP_EBF16] = "ebf16",
+ [KERNEL_HWCAP_SVE_EBF16] = "sveebf16",
+ [KERNEL_HWCAP_CSSC] = "cssc",
+ [KERNEL_HWCAP_RPRFM] = "rprfm",
+ [KERNEL_HWCAP_SVE2P1] = "sve2p1",
+ [KERNEL_HWCAP_SME2] = "sme2",
+ [KERNEL_HWCAP_SME2P1] = "sme2p1",
+ [KERNEL_HWCAP_SME_I16I32] = "smei16i32",
+ [KERNEL_HWCAP_SME_BI32I32] = "smebi32i32",
+ [KERNEL_HWCAP_SME_B16B16] = "smeb16b16",
+ [KERNEL_HWCAP_SME_F16F16] = "smef16f16",
+ [KERNEL_HWCAP_MOPS] = "mops",
+ [KERNEL_HWCAP_HBC] = "hbc",
+ [KERNEL_HWCAP_SVE_B16B16] = "sveb16b16",
+ [KERNEL_HWCAP_LRCPC3] = "lrcpc3",
+ [KERNEL_HWCAP_LSE128] = "lse128",
};
#ifdef CONFIG_COMPAT
+#define COMPAT_KERNEL_HWCAP(x) const_ilog2(COMPAT_HWCAP_ ## x)
static const char *const compat_hwcap_str[] = {
- "swp",
- "half",
- "thumb",
- "26bit",
- "fastmult",
- "fpa",
- "vfp",
- "edsp",
- "java",
- "iwmmxt",
- "crunch",
- "thumbee",
- "neon",
- "vfpv3",
- "vfpv3d16",
- "tls",
- "vfpv4",
- "idiva",
- "idivt",
- "vfpd32",
- "lpae",
- "evtstrm",
- NULL
+ [COMPAT_KERNEL_HWCAP(SWP)] = "swp",
+ [COMPAT_KERNEL_HWCAP(HALF)] = "half",
+ [COMPAT_KERNEL_HWCAP(THUMB)] = "thumb",
+ [COMPAT_KERNEL_HWCAP(26BIT)] = NULL, /* Not possible on arm64 */
+ [COMPAT_KERNEL_HWCAP(FAST_MULT)] = "fastmult",
+ [COMPAT_KERNEL_HWCAP(FPA)] = NULL, /* Not possible on arm64 */
+ [COMPAT_KERNEL_HWCAP(VFP)] = "vfp",
+ [COMPAT_KERNEL_HWCAP(EDSP)] = "edsp",
+ [COMPAT_KERNEL_HWCAP(JAVA)] = NULL, /* Not possible on arm64 */
+ [COMPAT_KERNEL_HWCAP(IWMMXT)] = NULL, /* Not possible on arm64 */
+ [COMPAT_KERNEL_HWCAP(CRUNCH)] = NULL, /* Not possible on arm64 */
+ [COMPAT_KERNEL_HWCAP(THUMBEE)] = NULL, /* Not possible on arm64 */
+ [COMPAT_KERNEL_HWCAP(NEON)] = "neon",
+ [COMPAT_KERNEL_HWCAP(VFPv3)] = "vfpv3",
+ [COMPAT_KERNEL_HWCAP(VFPV3D16)] = NULL, /* Not possible on arm64 */
+ [COMPAT_KERNEL_HWCAP(TLS)] = "tls",
+ [COMPAT_KERNEL_HWCAP(VFPv4)] = "vfpv4",
+ [COMPAT_KERNEL_HWCAP(IDIVA)] = "idiva",
+ [COMPAT_KERNEL_HWCAP(IDIVT)] = "idivt",
+ [COMPAT_KERNEL_HWCAP(VFPD32)] = NULL, /* Not possible on arm64 */
+ [COMPAT_KERNEL_HWCAP(LPAE)] = "lpae",
+ [COMPAT_KERNEL_HWCAP(EVTSTRM)] = "evtstrm",
+ [COMPAT_KERNEL_HWCAP(FPHP)] = "fphp",
+ [COMPAT_KERNEL_HWCAP(ASIMDHP)] = "asimdhp",
+ [COMPAT_KERNEL_HWCAP(ASIMDDP)] = "asimddp",
+ [COMPAT_KERNEL_HWCAP(ASIMDFHM)] = "asimdfhm",
+ [COMPAT_KERNEL_HWCAP(ASIMDBF16)] = "asimdbf16",
+ [COMPAT_KERNEL_HWCAP(I8MM)] = "i8mm",
};
+#define COMPAT_KERNEL_HWCAP2(x) const_ilog2(COMPAT_HWCAP2_ ## x)
static const char *const compat_hwcap2_str[] = {
- "aes",
- "pmull",
- "sha1",
- "sha2",
- "crc32",
- NULL
+ [COMPAT_KERNEL_HWCAP2(AES)] = "aes",
+ [COMPAT_KERNEL_HWCAP2(PMULL)] = "pmull",
+ [COMPAT_KERNEL_HWCAP2(SHA1)] = "sha1",
+ [COMPAT_KERNEL_HWCAP2(SHA2)] = "sha2",
+ [COMPAT_KERNEL_HWCAP2(CRC32)] = "crc32",
+ [COMPAT_KERNEL_HWCAP2(SB)] = "sb",
+ [COMPAT_KERNEL_HWCAP2(SSBS)] = "ssbs",
};
#endif /* CONFIG_COMPAT */
@@ -156,16 +207,25 @@ static int c_show(struct seq_file *m, void *v)
seq_puts(m, "Features\t:");
if (compat) {
#ifdef CONFIG_COMPAT
- for (j = 0; compat_hwcap_str[j]; j++)
- if (compat_elf_hwcap & (1 << j))
+ for (j = 0; j < ARRAY_SIZE(compat_hwcap_str); j++) {
+ if (compat_elf_hwcap & (1 << j)) {
+ /*
+ * Warn once if any feature should not
+ * have been present on arm64 platform.
+ */
+ if (WARN_ON_ONCE(!compat_hwcap_str[j]))
+ continue;
+
seq_printf(m, " %s", compat_hwcap_str[j]);
+ }
+ }
- for (j = 0; compat_hwcap2_str[j]; j++)
+ for (j = 0; j < ARRAY_SIZE(compat_hwcap2_str); j++)
if (compat_elf_hwcap2 & (1 << j))
seq_printf(m, " %s", compat_hwcap2_str[j]);
#endif /* CONFIG_COMPAT */
} else {
- for (j = 0; hwcap_str[j]; j++)
+ for (j = 0; j < ARRAY_SIZE(hwcap_str); j++)
if (cpu_have_feature(j))
seq_printf(m, " %s", hwcap_str[j]);
}
@@ -228,7 +288,7 @@ static struct kobj_type cpuregs_kobj_type = {
struct cpuinfo_arm64 *info = kobj_to_cpuinfo(kobj); \
\
if (info->reg_midr) \
- return sprintf(buf, "0x%016x\n", info->reg_##_field); \
+ return sprintf(buf, "0x%016llx\n", info->reg_##_field); \
else \
return 0; \
} \
@@ -236,6 +296,7 @@ static struct kobj_type cpuregs_kobj_type = {
CPUREGS_ATTR_RO(midr_el1, midr);
CPUREGS_ATTR_RO(revidr_el1, revidr);
+CPUREGS_ATTR_RO(smidr_el1, smidr);
static struct attribute *cpuregs_id_attrs[] = {
&cpuregs_attr_midr_el1.attr,
@@ -248,6 +309,16 @@ static const struct attribute_group cpuregs_attr_group = {
.name = "identification"
};
+static struct attribute *sme_cpuregs_id_attrs[] = {
+ &cpuregs_attr_smidr_el1.attr,
+ NULL
+};
+
+static const struct attribute_group sme_cpuregs_attr_group = {
+ .attrs = sme_cpuregs_id_attrs,
+ .name = "identification"
+};
+
static int cpuid_cpu_online(unsigned int cpu)
{
int rc;
@@ -265,6 +336,8 @@ static int cpuid_cpu_online(unsigned int cpu)
rc = sysfs_create_group(&info->kobj, &cpuregs_attr_group);
if (rc)
kobject_del(&info->kobj);
+ if (system_supports_sme())
+ rc = sysfs_merge_group(&info->kobj, &sme_cpuregs_attr_group);
out:
return rc;
}
@@ -303,25 +376,50 @@ static int __init cpuinfo_regs_init(void)
}
return 0;
}
+device_initcall(cpuinfo_regs_init);
+
static void cpuinfo_detect_icache_policy(struct cpuinfo_arm64 *info)
{
unsigned int cpu = smp_processor_id();
u32 l1ip = CTR_L1IP(info->reg_ctr);
switch (l1ip) {
- case ICACHE_POLICY_PIPT:
- break;
- case ICACHE_POLICY_VPIPT:
- set_bit(ICACHEF_VPIPT, &__icache_flags);
+ case CTR_EL0_L1Ip_PIPT:
break;
+ case CTR_EL0_L1Ip_VIPT:
default:
- /* Fallthrough */
- case ICACHE_POLICY_VIPT:
/* Assume aliasing */
set_bit(ICACHEF_ALIASING, &__icache_flags);
+ break;
}
- pr_info("Detected %s I-cache on CPU%d\n", icache_policy_str[l1ip], cpu);
+ pr_info("Detected %s I-cache on CPU%d\n", icache_policy_str(l1ip), cpu);
+}
+
+static void __cpuinfo_store_cpu_32bit(struct cpuinfo_32bit *info)
+{
+ info->reg_id_dfr0 = read_cpuid(ID_DFR0_EL1);
+ info->reg_id_dfr1 = read_cpuid(ID_DFR1_EL1);
+ info->reg_id_isar0 = read_cpuid(ID_ISAR0_EL1);
+ info->reg_id_isar1 = read_cpuid(ID_ISAR1_EL1);
+ info->reg_id_isar2 = read_cpuid(ID_ISAR2_EL1);
+ info->reg_id_isar3 = read_cpuid(ID_ISAR3_EL1);
+ info->reg_id_isar4 = read_cpuid(ID_ISAR4_EL1);
+ info->reg_id_isar5 = read_cpuid(ID_ISAR5_EL1);
+ info->reg_id_isar6 = read_cpuid(ID_ISAR6_EL1);
+ info->reg_id_mmfr0 = read_cpuid(ID_MMFR0_EL1);
+ info->reg_id_mmfr1 = read_cpuid(ID_MMFR1_EL1);
+ info->reg_id_mmfr2 = read_cpuid(ID_MMFR2_EL1);
+ info->reg_id_mmfr3 = read_cpuid(ID_MMFR3_EL1);
+ info->reg_id_mmfr4 = read_cpuid(ID_MMFR4_EL1);
+ info->reg_id_mmfr5 = read_cpuid(ID_MMFR5_EL1);
+ info->reg_id_pfr0 = read_cpuid(ID_PFR0_EL1);
+ info->reg_id_pfr1 = read_cpuid(ID_PFR1_EL1);
+ info->reg_id_pfr2 = read_cpuid(ID_PFR2_EL1);
+
+ info->reg_mvfr0 = read_cpuid(MVFR0_EL1);
+ info->reg_mvfr1 = read_cpuid(MVFR1_EL1);
+ info->reg_mvfr2 = read_cpuid(MVFR2_EL1);
}
static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info)
@@ -333,7 +431,7 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info)
* with the CLIDR_EL1 fields to avoid triggering false warnings
* when there is a mismatch across the CPUs. Keep track of the
* effective value of the CTR_EL0 in our internal records for
- * acurate sanity check and feature enablement.
+ * accurate sanity check and feature enablement.
*/
info->reg_ctr = read_cpuid_effective_cachetype();
info->reg_dczid = read_cpuid(DCZID_EL0);
@@ -344,37 +442,21 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info)
info->reg_id_aa64dfr1 = read_cpuid(ID_AA64DFR1_EL1);
info->reg_id_aa64isar0 = read_cpuid(ID_AA64ISAR0_EL1);
info->reg_id_aa64isar1 = read_cpuid(ID_AA64ISAR1_EL1);
+ info->reg_id_aa64isar2 = read_cpuid(ID_AA64ISAR2_EL1);
info->reg_id_aa64mmfr0 = read_cpuid(ID_AA64MMFR0_EL1);
info->reg_id_aa64mmfr1 = read_cpuid(ID_AA64MMFR1_EL1);
info->reg_id_aa64mmfr2 = read_cpuid(ID_AA64MMFR2_EL1);
+ info->reg_id_aa64mmfr3 = read_cpuid(ID_AA64MMFR3_EL1);
info->reg_id_aa64pfr0 = read_cpuid(ID_AA64PFR0_EL1);
info->reg_id_aa64pfr1 = read_cpuid(ID_AA64PFR1_EL1);
info->reg_id_aa64zfr0 = read_cpuid(ID_AA64ZFR0_EL1);
+ info->reg_id_aa64smfr0 = read_cpuid(ID_AA64SMFR0_EL1);
- /* Update the 32bit ID registers only if AArch32 is implemented */
- if (id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0)) {
- info->reg_id_dfr0 = read_cpuid(ID_DFR0_EL1);
- info->reg_id_isar0 = read_cpuid(ID_ISAR0_EL1);
- info->reg_id_isar1 = read_cpuid(ID_ISAR1_EL1);
- info->reg_id_isar2 = read_cpuid(ID_ISAR2_EL1);
- info->reg_id_isar3 = read_cpuid(ID_ISAR3_EL1);
- info->reg_id_isar4 = read_cpuid(ID_ISAR4_EL1);
- info->reg_id_isar5 = read_cpuid(ID_ISAR5_EL1);
- info->reg_id_mmfr0 = read_cpuid(ID_MMFR0_EL1);
- info->reg_id_mmfr1 = read_cpuid(ID_MMFR1_EL1);
- info->reg_id_mmfr2 = read_cpuid(ID_MMFR2_EL1);
- info->reg_id_mmfr3 = read_cpuid(ID_MMFR3_EL1);
- info->reg_id_pfr0 = read_cpuid(ID_PFR0_EL1);
- info->reg_id_pfr1 = read_cpuid(ID_PFR1_EL1);
-
- info->reg_mvfr0 = read_cpuid(MVFR0_EL1);
- info->reg_mvfr1 = read_cpuid(MVFR1_EL1);
- info->reg_mvfr2 = read_cpuid(MVFR2_EL1);
- }
+ if (id_aa64pfr1_mte(info->reg_id_aa64pfr1))
+ info->reg_gmid = read_cpuid(GMID_EL1);
- if (IS_ENABLED(CONFIG_ARM64_SVE) &&
- id_aa64pfr0_sve(info->reg_id_aa64pfr0))
- info->reg_zcr = read_zcr_features();
+ if (id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0))
+ __cpuinfo_store_cpu_32bit(&info->aarch32);
cpuinfo_detect_icache_policy(info);
}
@@ -394,5 +476,3 @@ void __init cpuinfo_store_boot_cpu(void)
boot_cpu_data = *info;
init_cpu_features(&boot_cpu_data);
}
-
-device_initcall(cpuinfo_regs_init);
diff --git a/arch/arm64/kernel/crash_core.c b/arch/arm64/kernel/crash_core.c
index ca4c3e12d8c5..66cde752cd74 100644
--- a/arch/arm64/kernel/crash_core.c
+++ b/arch/arm64/kernel/crash_core.c
@@ -5,15 +5,36 @@
*/
#include <linux/crash_core.h>
+#include <asm/cpufeature.h>
#include <asm/memory.h>
+#include <asm/pgtable-hwdef.h>
+#include <asm/pointer_auth.h>
+
+static inline u64 get_tcr_el1_t1sz(void);
+
+static inline u64 get_tcr_el1_t1sz(void)
+{
+ return (read_sysreg(tcr_el1) & TCR_T1SZ_MASK) >> TCR_T1SZ_OFFSET;
+}
void arch_crash_save_vmcoreinfo(void)
{
VMCOREINFO_NUMBER(VA_BITS);
/* Please note VMCOREINFO_NUMBER() uses "%d", not "%x" */
+ vmcoreinfo_append_str("NUMBER(MODULES_VADDR)=0x%lx\n", MODULES_VADDR);
+ vmcoreinfo_append_str("NUMBER(MODULES_END)=0x%lx\n", MODULES_END);
+ vmcoreinfo_append_str("NUMBER(VMALLOC_START)=0x%lx\n", VMALLOC_START);
+ vmcoreinfo_append_str("NUMBER(VMALLOC_END)=0x%lx\n", VMALLOC_END);
+ vmcoreinfo_append_str("NUMBER(VMEMMAP_START)=0x%lx\n", VMEMMAP_START);
+ vmcoreinfo_append_str("NUMBER(VMEMMAP_END)=0x%lx\n", VMEMMAP_END);
vmcoreinfo_append_str("NUMBER(kimage_voffset)=0x%llx\n",
kimage_voffset);
vmcoreinfo_append_str("NUMBER(PHYS_OFFSET)=0x%llx\n",
PHYS_OFFSET);
+ vmcoreinfo_append_str("NUMBER(TCR_EL1_T1SZ)=0x%llx\n",
+ get_tcr_el1_t1sz());
vmcoreinfo_append_str("KERNELOFFSET=%lx\n", kaslr_offset());
+ vmcoreinfo_append_str("NUMBER(KERNELPACMASK)=0x%llx\n",
+ system_supports_address_auth() ?
+ ptrauth_kernel_pac_mask() : 0);
}
diff --git a/arch/arm64/kernel/crash_dump.c b/arch/arm64/kernel/crash_dump.c
index e6e284265f19..670e4ce81822 100644
--- a/arch/arm64/kernel/crash_dump.c
+++ b/arch/arm64/kernel/crash_dump.c
@@ -9,25 +9,11 @@
#include <linux/crash_dump.h>
#include <linux/errno.h>
#include <linux/io.h>
-#include <linux/memblock.h>
-#include <linux/uaccess.h>
+#include <linux/uio.h>
#include <asm/memory.h>
-/**
- * copy_oldmem_page() - copy one page from old kernel memory
- * @pfn: page frame number to be copied
- * @buf: buffer where the copied page is placed
- * @csize: number of bytes to copy
- * @offset: offset in bytes into the page
- * @userbuf: if set, @buf is in a user address space
- *
- * This function copies one page from old kernel memory into buffer pointed by
- * @buf. If @buf is in userspace, set @userbuf to %1. Returns number of bytes
- * copied or negative error in case of failure.
- */
-ssize_t copy_oldmem_page(unsigned long pfn, char *buf,
- size_t csize, unsigned long offset,
- int userbuf)
+ssize_t copy_oldmem_page(struct iov_iter *iter, unsigned long pfn,
+ size_t csize, unsigned long offset)
{
void *vaddr;
@@ -38,14 +24,7 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf,
if (!vaddr)
return -ENOMEM;
- if (userbuf) {
- if (copy_to_user((char __user *)buf, vaddr + offset, csize)) {
- memunmap(vaddr);
- return -EFAULT;
- }
- } else {
- memcpy(buf, vaddr + offset, csize);
- }
+ csize = copy_to_iter(vaddr + offset, csize, iter);
memunmap(vaddr);
@@ -64,5 +43,7 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf,
ssize_t elfcorehdr_read(char *buf, size_t count, u64 *ppos)
{
memcpy(buf, phys_to_virt((phys_addr_t)*ppos), count);
+ *ppos += count;
+
return count;
}
diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c
index 48222a4760c2..64f2ecbdfe5c 100644
--- a/arch/arm64/kernel/debug-monitors.c
+++ b/arch/arm64/kernel/debug-monitors.c
@@ -28,7 +28,7 @@
u8 debug_monitors_arch(void)
{
return cpuid_feature_extract_unsigned_field(read_sanitised_ftr_reg(SYS_ID_AA64DFR0_EL1),
- ID_AA64DFR0_DEBUGVER_SHIFT);
+ ID_AA64DFR0_EL1_DebugVer_SHIFT);
}
/*
@@ -130,7 +130,7 @@ static int clear_os_lock(unsigned int cpu)
return 0;
}
-static int debug_monitors_init(void)
+static int __init debug_monitors_init(void)
{
return cpuhp_setup_state(CPUHP_AP_ARM64_DEBUG_MONITORS_STARTING,
"arm64/debug_monitors:starting",
@@ -141,17 +141,20 @@ postcore_initcall(debug_monitors_init);
/*
* Single step API and exception handling.
*/
-static void set_regs_spsr_ss(struct pt_regs *regs)
+static void set_user_regs_spsr_ss(struct user_pt_regs *regs)
{
regs->pstate |= DBG_SPSR_SS;
}
-NOKPROBE_SYMBOL(set_regs_spsr_ss);
+NOKPROBE_SYMBOL(set_user_regs_spsr_ss);
-static void clear_regs_spsr_ss(struct pt_regs *regs)
+static void clear_user_regs_spsr_ss(struct user_pt_regs *regs)
{
regs->pstate &= ~DBG_SPSR_SS;
}
-NOKPROBE_SYMBOL(clear_regs_spsr_ss);
+NOKPROBE_SYMBOL(clear_user_regs_spsr_ss);
+
+#define set_regs_spsr_ss(r) set_user_regs_spsr_ss(&(r)->user_regs)
+#define clear_regs_spsr_ss(r) clear_user_regs_spsr_ss(&(r)->user_regs)
static DEFINE_SPINLOCK(debug_hook_lock);
static LIST_HEAD(user_step_hook);
@@ -199,7 +202,7 @@ void unregister_kernel_step_hook(struct step_hook *hook)
* So we call all the registered handlers, until the right handler is
* found which returns zero.
*/
-static int call_step_hook(struct pt_regs *regs, unsigned int esr)
+static int call_step_hook(struct pt_regs *regs, unsigned long esr)
{
struct step_hook *hook;
struct list_head *list;
@@ -231,12 +234,11 @@ static void send_user_sigtrap(int si_code)
if (interrupts_enabled(regs))
local_irq_enable();
- arm64_force_sig_fault(SIGTRAP, si_code,
- (void __user *)instruction_pointer(regs),
- "User debug trap");
+ arm64_force_sig_fault(SIGTRAP, si_code, instruction_pointer(regs),
+ "User debug trap");
}
-static int single_step_handler(unsigned long unused, unsigned int esr,
+static int single_step_handler(unsigned long unused, unsigned long esr,
struct pt_regs *regs)
{
bool handler_found = false;
@@ -297,11 +299,11 @@ void unregister_kernel_break_hook(struct break_hook *hook)
unregister_debug_hook(&hook->node);
}
-static int call_break_hook(struct pt_regs *regs, unsigned int esr)
+static int call_break_hook(struct pt_regs *regs, unsigned long esr)
{
struct break_hook *hook;
struct list_head *list;
- int (*fn)(struct pt_regs *regs, unsigned int esr) = NULL;
+ int (*fn)(struct pt_regs *regs, unsigned long esr) = NULL;
list = user_mode(regs) ? &user_break_hook : &kernel_break_hook;
@@ -310,7 +312,7 @@ static int call_break_hook(struct pt_regs *regs, unsigned int esr)
* entirely not preemptible, and we can use rcu list safely here.
*/
list_for_each_entry_rcu(hook, list, node) {
- unsigned int comment = esr & ESR_ELx_BRK64_ISS_COMMENT_MASK;
+ unsigned long comment = esr & ESR_ELx_BRK64_ISS_COMMENT_MASK;
if ((comment & ~hook->mask) == hook->imm)
fn = hook->fn;
@@ -320,7 +322,7 @@ static int call_break_hook(struct pt_regs *regs, unsigned int esr)
}
NOKPROBE_SYMBOL(call_break_hook);
-static int brk_handler(unsigned long unused, unsigned int esr,
+static int brk_handler(unsigned long unused, unsigned long esr,
struct pt_regs *regs)
{
if (call_break_hook(regs, esr) == DBG_HOOK_HANDLED)
@@ -376,15 +378,13 @@ int aarch32_break_handler(struct pt_regs *regs)
}
NOKPROBE_SYMBOL(aarch32_break_handler);
-static int __init debug_traps_init(void)
+void __init debug_traps_init(void)
{
hook_debug_fault_code(DBG_ESR_EVT_HWSS, single_step_handler, SIGTRAP,
TRAP_TRACE, "single-step handler");
hook_debug_fault_code(DBG_ESR_EVT_BRK, brk_handler, SIGTRAP,
- TRAP_BRKPT, "ptrace BRK handler");
- return 0;
+ TRAP_BRKPT, "BRK handler");
}
-arch_initcall(debug_traps_init);
/* Re-enable single step for syscall restarting. */
void user_rewind_single_step(struct task_struct *task)
@@ -393,17 +393,26 @@ void user_rewind_single_step(struct task_struct *task)
* If single step is active for this thread, then set SPSR.SS
* to 1 to avoid returning to the active-pending state.
*/
- if (test_ti_thread_flag(task_thread_info(task), TIF_SINGLESTEP))
+ if (test_tsk_thread_flag(task, TIF_SINGLESTEP))
set_regs_spsr_ss(task_pt_regs(task));
}
NOKPROBE_SYMBOL(user_rewind_single_step);
void user_fastforward_single_step(struct task_struct *task)
{
- if (test_ti_thread_flag(task_thread_info(task), TIF_SINGLESTEP))
+ if (test_tsk_thread_flag(task, TIF_SINGLESTEP))
clear_regs_spsr_ss(task_pt_regs(task));
}
+void user_regs_reset_single_step(struct user_pt_regs *regs,
+ struct task_struct *task)
+{
+ if (test_tsk_thread_flag(task, TIF_SINGLESTEP))
+ set_user_regs_spsr_ss(regs);
+ else
+ clear_user_regs_spsr_ss(regs);
+}
+
/* Kernel API */
void kernel_enable_single_step(struct pt_regs *regs)
{
@@ -429,6 +438,11 @@ int kernel_active_single_step(void)
}
NOKPROBE_SYMBOL(kernel_active_single_step);
+void kernel_rewind_single_step(struct pt_regs *regs)
+{
+ set_regs_spsr_ss(regs);
+}
+
/* ptrace API */
void user_enable_single_step(struct task_struct *task)
{
diff --git a/arch/arm64/kernel/efi-entry.S b/arch/arm64/kernel/efi-entry.S
deleted file mode 100644
index 304d5b02ca67..000000000000
--- a/arch/arm64/kernel/efi-entry.S
+++ /dev/null
@@ -1,120 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * EFI entry point.
- *
- * Copyright (C) 2013, 2014 Red Hat, Inc.
- * Author: Mark Salter <msalter@redhat.com>
- */
-#include <linux/linkage.h>
-#include <linux/init.h>
-
-#include <asm/assembler.h>
-
-#define EFI_LOAD_ERROR 0x8000000000000001
-
- __INIT
-
- /*
- * We arrive here from the EFI boot manager with:
- *
- * * CPU in little-endian mode
- * * MMU on with identity-mapped RAM
- * * Icache and Dcache on
- *
- * We will most likely be running from some place other than where
- * we want to be. The kernel image wants to be placed at TEXT_OFFSET
- * from start of RAM.
- */
-ENTRY(entry)
- /*
- * Create a stack frame to save FP/LR with extra space
- * for image_addr variable passed to efi_entry().
- */
- stp x29, x30, [sp, #-32]!
- mov x29, sp
-
- /*
- * Call efi_entry to do the real work.
- * x0 and x1 are already set up by firmware. Current runtime
- * address of image is calculated and passed via *image_addr.
- *
- * unsigned long efi_entry(void *handle,
- * efi_system_table_t *sys_table,
- * unsigned long *image_addr) ;
- */
- adr_l x8, _text
- add x2, sp, 16
- str x8, [x2]
- bl efi_entry
- cmn x0, #1
- b.eq efi_load_fail
-
- /*
- * efi_entry() will have copied the kernel image if necessary and we
- * return here with device tree address in x0 and the kernel entry
- * point stored at *image_addr. Save those values in registers which
- * are callee preserved.
- */
- mov x20, x0 // DTB address
- ldr x0, [sp, #16] // relocated _text address
- ldr w21, =stext_offset
- add x21, x0, x21
-
- /*
- * Calculate size of the kernel Image (same for original and copy).
- */
- adr_l x1, _text
- adr_l x2, _edata
- sub x1, x2, x1
-
- /*
- * Flush the copied Image to the PoC, and ensure it is not shadowed by
- * stale icache entries from before relocation.
- */
- bl __flush_dcache_area
- ic ialluis
-
- /*
- * Ensure that the rest of this function (in the original Image) is
- * visible when the caches are disabled. The I-cache can't have stale
- * entries for the VA range of the current image, so no maintenance is
- * necessary.
- */
- adr x0, entry
- adr x1, entry_end
- sub x1, x1, x0
- bl __flush_dcache_area
-
- /* Turn off Dcache and MMU */
- mrs x0, CurrentEL
- cmp x0, #CurrentEL_EL2
- b.ne 1f
- mrs x0, sctlr_el2
- bic x0, x0, #1 << 0 // clear SCTLR.M
- bic x0, x0, #1 << 2 // clear SCTLR.C
- pre_disable_mmu_workaround
- msr sctlr_el2, x0
- isb
- b 2f
-1:
- mrs x0, sctlr_el1
- bic x0, x0, #1 << 0 // clear SCTLR.M
- bic x0, x0, #1 << 2 // clear SCTLR.C
- pre_disable_mmu_workaround
- msr sctlr_el1, x0
- isb
-2:
- /* Jump to kernel entry point */
- mov x0, x20
- mov x1, xzr
- mov x2, xzr
- mov x3, xzr
- br x21
-
-efi_load_fail:
- mov x0, #EFI_LOAD_ERROR
- ldp x29, x30, [sp], #32
- ret
-
-entry_end:
-ENDPROC(entry)
diff --git a/arch/arm64/kernel/efi-header.S b/arch/arm64/kernel/efi-header.S
index a7cfacce3e15..11d7f7de202d 100644
--- a/arch/arm64/kernel/efi-header.S
+++ b/arch/arm64/kernel/efi-header.S
@@ -7,54 +7,72 @@
#include <linux/pe.h>
#include <linux/sizes.h>
+ .macro efi_signature_nop
+#ifdef CONFIG_EFI
+.L_head:
+ /*
+ * This ccmp instruction has no meaningful effect except that
+ * its opcode forms the magic "MZ" signature required by UEFI.
+ */
+ ccmp x18, #0, #0xd, pl
+#else
+ /*
+ * Bootloaders may inspect the opcode at the start of the kernel
+ * image to decide if the kernel is capable of booting via UEFI.
+ * So put an ordinary NOP here, not the "MZ.." pseudo-nop above.
+ */
+ nop
+#endif
+ .endm
+
.macro __EFI_PE_HEADER
+#ifdef CONFIG_EFI
+ .set .Lpe_header_offset, . - .L_head
.long PE_MAGIC
-coff_header:
.short IMAGE_FILE_MACHINE_ARM64 // Machine
- .short section_count // NumberOfSections
+ .short .Lsection_count // NumberOfSections
.long 0 // TimeDateStamp
.long 0 // PointerToSymbolTable
.long 0 // NumberOfSymbols
- .short section_table - optional_header // SizeOfOptionalHeader
+ .short .Lsection_table - .Loptional_header // SizeOfOptionalHeader
.short IMAGE_FILE_DEBUG_STRIPPED | \
IMAGE_FILE_EXECUTABLE_IMAGE | \
IMAGE_FILE_LINE_NUMS_STRIPPED // Characteristics
-optional_header:
+.Loptional_header:
.short PE_OPT_MAGIC_PE32PLUS // PE32+ format
.byte 0x02 // MajorLinkerVersion
.byte 0x14 // MinorLinkerVersion
- .long __initdata_begin - efi_header_end // SizeOfCode
+ .long __initdata_begin - .Lefi_header_end // SizeOfCode
.long __pecoff_data_size // SizeOfInitializedData
.long 0 // SizeOfUninitializedData
- .long __efistub_entry - _head // AddressOfEntryPoint
- .long efi_header_end - _head // BaseOfCode
+ .long __efistub_efi_pe_entry - .L_head // AddressOfEntryPoint
+ .long .Lefi_header_end - .L_head // BaseOfCode
-extra_header_fields:
.quad 0 // ImageBase
- .long SZ_4K // SectionAlignment
+ .long SEGMENT_ALIGN // SectionAlignment
.long PECOFF_FILE_ALIGNMENT // FileAlignment
.short 0 // MajorOperatingSystemVersion
.short 0 // MinorOperatingSystemVersion
- .short 0 // MajorImageVersion
- .short 0 // MinorImageVersion
+ .short LINUX_EFISTUB_MAJOR_VERSION // MajorImageVersion
+ .short LINUX_EFISTUB_MINOR_VERSION // MinorImageVersion
.short 0 // MajorSubsystemVersion
.short 0 // MinorSubsystemVersion
.long 0 // Win32VersionValue
- .long _end - _head // SizeOfImage
+ .long _end - .L_head // SizeOfImage
// Everything before the kernel image is considered part of the header
- .long efi_header_end - _head // SizeOfHeaders
+ .long .Lefi_header_end - .L_head // SizeOfHeaders
.long 0 // CheckSum
.short IMAGE_SUBSYSTEM_EFI_APPLICATION // Subsystem
- .short 0 // DllCharacteristics
+ .short IMAGE_DLL_CHARACTERISTICS_NX_COMPAT // DllCharacteristics
.quad 0 // SizeOfStackReserve
.quad 0 // SizeOfStackCommit
.quad 0 // SizeOfHeapReserve
.quad 0 // SizeOfHeapCommit
.long 0 // LoaderFlags
- .long (section_table - .) / 8 // NumberOfRvaAndSizes
+ .long (.Lsection_table - .) / 8 // NumberOfRvaAndSizes
.quad 0 // ExportTable
.quad 0 // ImportTable
@@ -63,18 +81,56 @@ extra_header_fields:
.quad 0 // CertificationTable
.quad 0 // BaseRelocationTable
+#if defined(CONFIG_DEBUG_EFI) || defined(CONFIG_ARM64_BTI_KERNEL)
+ .long .Lefi_debug_table - .L_head // DebugTable
+ .long .Lefi_debug_table_size
+
+ /*
+ * The debug table is referenced via its Relative Virtual Address (RVA),
+ * which is only defined for those parts of the image that are covered
+ * by a section declaration. Since this header is not covered by any
+ * section, the debug table must be emitted elsewhere. So stick it in
+ * the .init.rodata section instead.
+ *
+ * Note that the payloads themselves are permitted to have zero RVAs,
+ * which means we can simply put those right after the section headers.
+ */
+ __INITRODATA
+
+ .align 2
+.Lefi_debug_table:
#ifdef CONFIG_DEBUG_EFI
- .long efi_debug_table - _head // DebugTable
- .long efi_debug_table_size
+ // EFI_IMAGE_DEBUG_DIRECTORY_ENTRY
+ .long 0 // Characteristics
+ .long 0 // TimeDateStamp
+ .short 0 // MajorVersion
+ .short 0 // MinorVersion
+ .long IMAGE_DEBUG_TYPE_CODEVIEW // Type
+ .long .Lefi_debug_entry_size // SizeOfData
+ .long 0 // RVA
+ .long .Lefi_debug_entry - .L_head // FileOffset
+#endif
+#ifdef CONFIG_ARM64_BTI_KERNEL
+ .long 0 // Characteristics
+ .long 0 // TimeDateStamp
+ .short 0 // MajorVersion
+ .short 0 // MinorVersion
+ .long IMAGE_DEBUG_TYPE_EX_DLLCHARACTERISTICS // Type
+ .long 4 // SizeOfData
+ .long 0 // RVA
+ .long .Lefi_dll_characteristics_ex - .L_head // FileOffset
+#endif
+ .set .Lefi_debug_table_size, . - .Lefi_debug_table
+ .previous
#endif
// Section table
-section_table:
+.Lsection_table:
.ascii ".text\0\0\0"
- .long __initdata_begin - efi_header_end // VirtualSize
- .long efi_header_end - _head // VirtualAddress
- .long __initdata_begin - efi_header_end // SizeOfRawData
- .long efi_header_end - _head // PointerToRawData
+ .long __initdata_begin - .Lefi_header_end // VirtualSize
+ .long .Lefi_header_end - .L_head // VirtualAddress
+ .long __initdata_begin - .Lefi_header_end // SizeOfRawData
+ .long .Lefi_header_end - .L_head // PointerToRawData
.long 0 // PointerToRelocations
.long 0 // PointerToLineNumbers
@@ -86,9 +142,9 @@ section_table:
.ascii ".data\0\0\0"
.long __pecoff_data_size // VirtualSize
- .long __initdata_begin - _head // VirtualAddress
+ .long __initdata_begin - .L_head // VirtualAddress
.long __pecoff_data_rawsize // SizeOfRawData
- .long __initdata_begin - _head // PointerToRawData
+ .long __initdata_begin - .L_head // PointerToRawData
.long 0 // PointerToRelocations
.long 0 // PointerToLineNumbers
@@ -98,37 +154,10 @@ section_table:
IMAGE_SCN_MEM_READ | \
IMAGE_SCN_MEM_WRITE // Characteristics
- .set section_count, (. - section_table) / 40
+ .set .Lsection_count, (. - .Lsection_table) / 40
#ifdef CONFIG_DEBUG_EFI
- /*
- * The debug table is referenced via its Relative Virtual Address (RVA),
- * which is only defined for those parts of the image that are covered
- * by a section declaration. Since this header is not covered by any
- * section, the debug table must be emitted elsewhere. So stick it in
- * the .init.rodata section instead.
- *
- * Note that the EFI debug entry itself may legally have a zero RVA,
- * which means we can simply put it right after the section headers.
- */
- __INITRODATA
-
- .align 2
-efi_debug_table:
- // EFI_IMAGE_DEBUG_DIRECTORY_ENTRY
- .long 0 // Characteristics
- .long 0 // TimeDateStamp
- .short 0 // MajorVersion
- .short 0 // MinorVersion
- .long IMAGE_DEBUG_TYPE_CODEVIEW // Type
- .long efi_debug_entry_size // SizeOfData
- .long 0 // RVA
- .long efi_debug_entry - _head // FileOffset
-
- .set efi_debug_table_size, . - efi_debug_table
- .previous
-
-efi_debug_entry:
+.Lefi_debug_entry:
// EFI_IMAGE_DEBUG_CODEVIEW_NB10_ENTRY
.ascii "NB10" // Signature
.long 0 // Unknown
@@ -137,16 +166,16 @@ efi_debug_entry:
.asciz VMLINUX_PATH
- .set efi_debug_entry_size, . - efi_debug_entry
+ .set .Lefi_debug_entry_size, . - .Lefi_debug_entry
+#endif
+#ifdef CONFIG_ARM64_BTI_KERNEL
+.Lefi_dll_characteristics_ex:
+ .long IMAGE_DLLCHARACTERISTICS_EX_FORWARD_CFI_COMPAT
#endif
- /*
- * EFI will load .text onwards at the 4k section alignment
- * described in the PE/COFF header. To ensure that instruction
- * sequences using an adrp and a :lo12: immediate will function
- * correctly at this alignment, we must ensure that .text is
- * placed at a 4k boundary in the Image to begin with.
- */
- .align 12
-efi_header_end:
+ .balign SEGMENT_ALIGN
+.Lefi_header_end:
+#else
+ .set .Lpe_header_offset, 0x0
+#endif
.endm
diff --git a/arch/arm64/kernel/efi-rt-wrapper.S b/arch/arm64/kernel/efi-rt-wrapper.S
index 3fc71106cb2b..e8ae803662cf 100644
--- a/arch/arm64/kernel/efi-rt-wrapper.S
+++ b/arch/arm64/kernel/efi-rt-wrapper.S
@@ -4,9 +4,10 @@
*/
#include <linux/linkage.h>
+#include <asm/assembler.h>
-ENTRY(__efi_rt_asm_wrapper)
- stp x29, x30, [sp, #-32]!
+SYM_FUNC_START(__efi_rt_asm_wrapper)
+ stp x29, x30, [sp, #-112]!
mov x29, sp
/*
@@ -17,6 +18,22 @@ ENTRY(__efi_rt_asm_wrapper)
stp x1, x18, [sp, #16]
/*
+ * Preserve all callee saved registers and preserve the stack pointer
+ * value at the base of the EFI runtime stack so we can recover from
+ * synchronous exceptions occurring while executing the firmware
+ * routines.
+ */
+ stp x19, x20, [sp, #32]
+ stp x21, x22, [sp, #48]
+ stp x23, x24, [sp, #64]
+ stp x25, x26, [sp, #80]
+ stp x27, x28, [sp, #96]
+
+ ldr_l x16, efi_rt_stack_top
+ mov sp, x16
+ stp x18, x29, [sp, #-16]!
+
+ /*
* We are lucky enough that no EFI runtime services take more than
* 5 arguments, so all are passed in registers rather than via the
* stack.
@@ -29,10 +46,42 @@ ENTRY(__efi_rt_asm_wrapper)
mov x4, x6
blr x8
+ mov x16, sp
+ mov sp, x29
+ str xzr, [x16, #8] // clear recorded task SP value
+
ldp x1, x2, [sp, #16]
cmp x2, x18
- ldp x29, x30, [sp], #32
+ ldp x29, x30, [sp], #112
b.ne 0f
ret
-0: b efi_handle_corrupted_x18 // tail call
-ENDPROC(__efi_rt_asm_wrapper)
+0:
+ /*
+ * With CONFIG_SHADOW_CALL_STACK, the kernel uses x18 to store a
+ * shadow stack pointer, which we need to restore before returning to
+ * potentially instrumented code. This is safe because the wrapper is
+ * called with preemption disabled and a separate shadow stack is used
+ * for interrupts.
+ */
+#ifdef CONFIG_SHADOW_CALL_STACK
+ ldr_l x18, efi_rt_stack_top
+ ldr x18, [x18, #-16]
+#endif
+
+ b efi_handle_corrupted_x18 // tail call
+SYM_FUNC_END(__efi_rt_asm_wrapper)
+
+SYM_CODE_START(__efi_rt_asm_recover)
+ mov sp, x30
+
+ ldr_l x16, efi_rt_stack_top // clear recorded task SP value
+ str xzr, [x16, #-8]
+
+ ldp x19, x20, [sp, #32]
+ ldp x21, x22, [sp, #48]
+ ldp x23, x24, [sp, #64]
+ ldp x25, x26, [sp, #80]
+ ldp x27, x28, [sp, #96]
+ ldp x29, x30, [sp], #112
+ ret
+SYM_CODE_END(__efi_rt_asm_recover)
diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c
index d0cf596db82c..0228001347be 100644
--- a/arch/arm64/kernel/efi.c
+++ b/arch/arm64/kernel/efi.c
@@ -9,8 +9,18 @@
#include <linux/efi.h>
#include <linux/init.h>
+#include <linux/screen_info.h>
#include <asm/efi.h>
+#include <asm/stacktrace.h>
+
+static bool region_is_misaligned(const efi_memory_desc_t *md)
+{
+ if (PAGE_SIZE == EFI_PAGE_SIZE)
+ return false;
+ return !PAGE_ALIGNED(md->phys_addr) ||
+ !PAGE_ALIGNED(md->num_pages << EFI_PAGE_SHIFT);
+}
/*
* Only regions of type EFI_RUNTIME_SERVICES_CODE need to be
@@ -25,14 +35,22 @@ static __init pteval_t create_mapping_protection(efi_memory_desc_t *md)
if (type == EFI_MEMORY_MAPPED_IO)
return PROT_DEVICE_nGnRE;
- if (WARN_ONCE(!PAGE_ALIGNED(md->phys_addr),
- "UEFI Runtime regions are not aligned to 64 KB -- buggy firmware?"))
+ if (region_is_misaligned(md)) {
+ static bool __initdata code_is_misaligned;
+
/*
- * If the region is not aligned to the page size of the OS, we
- * can not use strict permissions, since that would also affect
- * the mapping attributes of the adjacent regions.
+ * Regions that are not aligned to the OS page size cannot be
+ * mapped with strict permissions, as those might interfere
+ * with the permissions that are needed by the adjacent
+ * region's mapping. However, if we haven't encountered any
+ * misaligned runtime code regions so far, we can safely use
+ * non-executable permissions for non-code regions.
*/
- return pgprot_val(PAGE_KERNEL_EXEC);
+ code_is_misaligned |= (type == EFI_RUNTIME_SERVICES_CODE);
+
+ return code_is_misaligned ? pgprot_val(PAGE_KERNEL_EXEC)
+ : pgprot_val(PAGE_KERNEL);
+ }
/* R-- */
if ((attr & (EFI_MEMORY_XP | EFI_MEMORY_RO)) ==
@@ -53,28 +71,22 @@ static __init pteval_t create_mapping_protection(efi_memory_desc_t *md)
return pgprot_val(PAGE_KERNEL_EXEC);
}
-/* we will fill this structure from the stub, so don't put it in .bss */
-struct screen_info screen_info __section(.data);
-
int __init efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md)
{
pteval_t prot_val = create_mapping_protection(md);
bool page_mappings_only = (md->type == EFI_RUNTIME_SERVICES_CODE ||
md->type == EFI_RUNTIME_SERVICES_DATA);
- if (!PAGE_ALIGNED(md->phys_addr) ||
- !PAGE_ALIGNED(md->num_pages << EFI_PAGE_SHIFT)) {
- /*
- * If the end address of this region is not aligned to page
- * size, the mapping is rounded up, and may end up sharing a
- * page frame with the next UEFI memory region. If we create
- * a block entry now, we may need to split it again when mapping
- * the next region, and support for that is going to be removed
- * from the MMU routines. So avoid block mappings altogether in
- * that case.
- */
+ /*
+ * If this region is not aligned to the page size used by the OS, the
+ * mapping will be rounded outwards, and may end up sharing a page
+ * frame with an adjacent runtime memory region. Given that the page
+ * table descriptor covering the shared page will be rewritten when the
+ * adjacent region gets mapped, we must avoid block mappings here so we
+ * don't have to worry about splitting them when that happens.
+ */
+ if (region_is_misaligned(md))
page_mappings_only = true;
- }
create_pgd_mapping(mm, md->phys_addr, md->virt_addr,
md->num_pages << EFI_PAGE_SHIFT,
@@ -82,25 +94,39 @@ int __init efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md)
return 0;
}
+struct set_perm_data {
+ const efi_memory_desc_t *md;
+ bool has_bti;
+};
+
static int __init set_permissions(pte_t *ptep, unsigned long addr, void *data)
{
- efi_memory_desc_t *md = data;
+ struct set_perm_data *spd = data;
+ const efi_memory_desc_t *md = spd->md;
pte_t pte = READ_ONCE(*ptep);
if (md->attribute & EFI_MEMORY_RO)
pte = set_pte_bit(pte, __pgprot(PTE_RDONLY));
if (md->attribute & EFI_MEMORY_XP)
pte = set_pte_bit(pte, __pgprot(PTE_PXN));
+ else if (system_supports_bti_kernel() && spd->has_bti)
+ pte = set_pte_bit(pte, __pgprot(PTE_GP));
set_pte(ptep, pte);
return 0;
}
int __init efi_set_mapping_permissions(struct mm_struct *mm,
- efi_memory_desc_t *md)
+ efi_memory_desc_t *md,
+ bool has_bti)
{
+ struct set_perm_data data = { md, has_bti };
+
BUG_ON(md->type != EFI_RUNTIME_SERVICES_CODE &&
md->type != EFI_RUNTIME_SERVICES_DATA);
+ if (region_is_misaligned(md))
+ return 0;
+
/*
* Calling apply_to_page_range() is only safe on regions that are
* guaranteed to be mapped down to pages. Since we are only called
@@ -110,7 +136,7 @@ int __init efi_set_mapping_permissions(struct mm_struct *mm,
*/
return apply_to_page_range(mm, md->virt_addr,
md->num_pages << EFI_PAGE_SHIFT,
- set_permissions, md);
+ set_permissions, &data);
}
/*
@@ -127,3 +153,66 @@ asmlinkage efi_status_t efi_handle_corrupted_x18(efi_status_t s, const char *f)
pr_err_ratelimited(FW_BUG "register x18 corrupted by EFI %s\n", f);
return s;
}
+
+static DEFINE_RAW_SPINLOCK(efi_rt_lock);
+
+void arch_efi_call_virt_setup(void)
+{
+ efi_virtmap_load();
+ __efi_fpsimd_begin();
+ raw_spin_lock(&efi_rt_lock);
+}
+
+void arch_efi_call_virt_teardown(void)
+{
+ raw_spin_unlock(&efi_rt_lock);
+ __efi_fpsimd_end();
+ efi_virtmap_unload();
+}
+
+asmlinkage u64 *efi_rt_stack_top __ro_after_init;
+
+asmlinkage efi_status_t __efi_rt_asm_recover(void);
+
+bool efi_runtime_fixup_exception(struct pt_regs *regs, const char *msg)
+{
+ /* Check whether the exception occurred while running the firmware */
+ if (!current_in_efi() || regs->pc >= TASK_SIZE_64)
+ return false;
+
+ pr_err(FW_BUG "Unable to handle %s in EFI runtime service\n", msg);
+ add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
+ clear_bit(EFI_RUNTIME_SERVICES, &efi.flags);
+
+ regs->regs[0] = EFI_ABORTED;
+ regs->regs[30] = efi_rt_stack_top[-1];
+ regs->pc = (u64)__efi_rt_asm_recover;
+
+ if (IS_ENABLED(CONFIG_SHADOW_CALL_STACK))
+ regs->regs[18] = efi_rt_stack_top[-2];
+
+ return true;
+}
+
+/* EFI requires 8 KiB of stack space for runtime services */
+static_assert(THREAD_SIZE >= SZ_8K);
+
+static int __init arm64_efi_rt_init(void)
+{
+ void *p;
+
+ if (!efi_enabled(EFI_RUNTIME_SERVICES))
+ return 0;
+
+ p = __vmalloc_node(THREAD_SIZE, THREAD_ALIGN, GFP_KERNEL,
+ NUMA_NO_NODE, &&l);
+l: if (!p) {
+ pr_warn("Failed to allocate EFI runtime stack\n");
+ clear_bit(EFI_RUNTIME_SERVICES, &efi.flags);
+ return -ENOMEM;
+ }
+
+ efi_rt_stack_top = p + THREAD_SIZE;
+ return 0;
+}
+core_initcall(arm64_efi_rt_init);
diff --git a/arch/arm64/kernel/elfcore.c b/arch/arm64/kernel/elfcore.c
new file mode 100644
index 000000000000..2e94d20c4ac7
--- /dev/null
+++ b/arch/arm64/kernel/elfcore.c
@@ -0,0 +1,138 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/coredump.h>
+#include <linux/elfcore.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+
+#include <asm/cpufeature.h>
+#include <asm/mte.h>
+
+#define for_each_mte_vma(cprm, i, m) \
+ if (system_supports_mte()) \
+ for (i = 0, m = cprm->vma_meta; \
+ i < cprm->vma_count; \
+ i++, m = cprm->vma_meta + i) \
+ if (m->flags & VM_MTE)
+
+static unsigned long mte_vma_tag_dump_size(struct core_vma_metadata *m)
+{
+ return (m->dump_size >> PAGE_SHIFT) * MTE_PAGE_TAG_STORAGE;
+}
+
+/* Derived from dump_user_range(); start/end must be page-aligned */
+static int mte_dump_tag_range(struct coredump_params *cprm,
+ unsigned long start, unsigned long len)
+{
+ int ret = 1;
+ unsigned long addr;
+ void *tags = NULL;
+
+ for (addr = start; addr < start + len; addr += PAGE_SIZE) {
+ struct page *page = get_dump_page(addr);
+
+ /*
+ * get_dump_page() returns NULL when encountering an empty
+ * page table entry that would otherwise have been filled with
+ * the zero page. Skip the equivalent tag dump which would
+ * have been all zeros.
+ */
+ if (!page) {
+ dump_skip(cprm, MTE_PAGE_TAG_STORAGE);
+ continue;
+ }
+
+ /*
+ * Pages mapped in user space as !pte_access_permitted() (e.g.
+ * PROT_EXEC only) may not have the PG_mte_tagged flag set.
+ */
+ if (!page_mte_tagged(page)) {
+ put_page(page);
+ dump_skip(cprm, MTE_PAGE_TAG_STORAGE);
+ continue;
+ }
+
+ if (!tags) {
+ tags = mte_allocate_tag_storage();
+ if (!tags) {
+ put_page(page);
+ ret = 0;
+ break;
+ }
+ }
+
+ mte_save_page_tags(page_address(page), tags);
+ put_page(page);
+ if (!dump_emit(cprm, tags, MTE_PAGE_TAG_STORAGE)) {
+ ret = 0;
+ break;
+ }
+ }
+
+ if (tags)
+ mte_free_tag_storage(tags);
+
+ return ret;
+}
+
+Elf_Half elf_core_extra_phdrs(struct coredump_params *cprm)
+{
+ int i;
+ struct core_vma_metadata *m;
+ int vma_count = 0;
+
+ for_each_mte_vma(cprm, i, m)
+ vma_count++;
+
+ return vma_count;
+}
+
+int elf_core_write_extra_phdrs(struct coredump_params *cprm, loff_t offset)
+{
+ int i;
+ struct core_vma_metadata *m;
+
+ for_each_mte_vma(cprm, i, m) {
+ struct elf_phdr phdr;
+
+ phdr.p_type = PT_AARCH64_MEMTAG_MTE;
+ phdr.p_offset = offset;
+ phdr.p_vaddr = m->start;
+ phdr.p_paddr = 0;
+ phdr.p_filesz = mte_vma_tag_dump_size(m);
+ phdr.p_memsz = m->end - m->start;
+ offset += phdr.p_filesz;
+ phdr.p_flags = 0;
+ phdr.p_align = 0;
+
+ if (!dump_emit(cprm, &phdr, sizeof(phdr)))
+ return 0;
+ }
+
+ return 1;
+}
+
+size_t elf_core_extra_data_size(struct coredump_params *cprm)
+{
+ int i;
+ struct core_vma_metadata *m;
+ size_t data_size = 0;
+
+ for_each_mte_vma(cprm, i, m)
+ data_size += mte_vma_tag_dump_size(m);
+
+ return data_size;
+}
+
+int elf_core_write_extra_data(struct coredump_params *cprm)
+{
+ int i;
+ struct core_vma_metadata *m;
+
+ for_each_mte_vma(cprm, i, m) {
+ if (!mte_dump_tag_range(cprm, m->start, m->dump_size))
+ return 0;
+ }
+
+ return 1;
+}
diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c
index 5dce5e56995a..0fc94207e69a 100644
--- a/arch/arm64/kernel/entry-common.c
+++ b/arch/arm64/kernel/entry-common.c
@@ -6,67 +6,449 @@
*/
#include <linux/context_tracking.h>
+#include <linux/kasan.h>
+#include <linux/linkage.h>
+#include <linux/lockdep.h>
#include <linux/ptrace.h>
+#include <linux/sched.h>
+#include <linux/sched/debug.h>
#include <linux/thread_info.h>
#include <asm/cpufeature.h>
#include <asm/daifflags.h>
#include <asm/esr.h>
#include <asm/exception.h>
+#include <asm/irq_regs.h>
#include <asm/kprobes.h>
#include <asm/mmu.h>
+#include <asm/processor.h>
+#include <asm/sdei.h>
+#include <asm/stacktrace.h>
#include <asm/sysreg.h>
+#include <asm/system_misc.h>
-static void notrace el1_abort(struct pt_regs *regs, unsigned long esr)
+/*
+ * Handle IRQ/context state management when entering from kernel mode.
+ * Before this function is called it is not safe to call regular kernel code,
+ * instrumentable code, or any code which may trigger an exception.
+ *
+ * This is intended to match the logic in irqentry_enter(), handling the kernel
+ * mode transitions only.
+ */
+static __always_inline void __enter_from_kernel_mode(struct pt_regs *regs)
+{
+ regs->exit_rcu = false;
+
+ if (!IS_ENABLED(CONFIG_TINY_RCU) && is_idle_task(current)) {
+ lockdep_hardirqs_off(CALLER_ADDR0);
+ ct_irq_enter();
+ trace_hardirqs_off_finish();
+
+ regs->exit_rcu = true;
+ return;
+ }
+
+ lockdep_hardirqs_off(CALLER_ADDR0);
+ rcu_irq_enter_check_tick();
+ trace_hardirqs_off_finish();
+}
+
+static void noinstr enter_from_kernel_mode(struct pt_regs *regs)
+{
+ __enter_from_kernel_mode(regs);
+ mte_check_tfsr_entry();
+ mte_disable_tco_entry(current);
+}
+
+/*
+ * Handle IRQ/context state management when exiting to kernel mode.
+ * After this function returns it is not safe to call regular kernel code,
+ * instrumentable code, or any code which may trigger an exception.
+ *
+ * This is intended to match the logic in irqentry_exit(), handling the kernel
+ * mode transitions only, and with preemption handled elsewhere.
+ */
+static __always_inline void __exit_to_kernel_mode(struct pt_regs *regs)
+{
+ lockdep_assert_irqs_disabled();
+
+ if (interrupts_enabled(regs)) {
+ if (regs->exit_rcu) {
+ trace_hardirqs_on_prepare();
+ lockdep_hardirqs_on_prepare();
+ ct_irq_exit();
+ lockdep_hardirqs_on(CALLER_ADDR0);
+ return;
+ }
+
+ trace_hardirqs_on();
+ } else {
+ if (regs->exit_rcu)
+ ct_irq_exit();
+ }
+}
+
+static void noinstr exit_to_kernel_mode(struct pt_regs *regs)
+{
+ mte_check_tfsr_exit();
+ __exit_to_kernel_mode(regs);
+}
+
+/*
+ * Handle IRQ/context state management when entering from user mode.
+ * Before this function is called it is not safe to call regular kernel code,
+ * instrumentable code, or any code which may trigger an exception.
+ */
+static __always_inline void __enter_from_user_mode(void)
+{
+ lockdep_hardirqs_off(CALLER_ADDR0);
+ CT_WARN_ON(ct_state() != CONTEXT_USER);
+ user_exit_irqoff();
+ trace_hardirqs_off_finish();
+ mte_disable_tco_entry(current);
+}
+
+static __always_inline void enter_from_user_mode(struct pt_regs *regs)
+{
+ __enter_from_user_mode();
+}
+
+/*
+ * Handle IRQ/context state management when exiting to user mode.
+ * After this function returns it is not safe to call regular kernel code,
+ * instrumentable code, or any code which may trigger an exception.
+ */
+static __always_inline void __exit_to_user_mode(void)
+{
+ trace_hardirqs_on_prepare();
+ lockdep_hardirqs_on_prepare();
+ user_enter_irqoff();
+ lockdep_hardirqs_on(CALLER_ADDR0);
+}
+
+static __always_inline void exit_to_user_mode_prepare(struct pt_regs *regs)
+{
+ unsigned long flags;
+
+ local_daif_mask();
+
+ flags = read_thread_flags();
+ if (unlikely(flags & _TIF_WORK_MASK))
+ do_notify_resume(regs, flags);
+
+ lockdep_sys_exit();
+}
+
+static __always_inline void exit_to_user_mode(struct pt_regs *regs)
+{
+ exit_to_user_mode_prepare(regs);
+ mte_check_tfsr_exit();
+ __exit_to_user_mode();
+}
+
+asmlinkage void noinstr asm_exit_to_user_mode(struct pt_regs *regs)
+{
+ exit_to_user_mode(regs);
+}
+
+/*
+ * Handle IRQ/context state management when entering an NMI from user/kernel
+ * mode. Before this function is called it is not safe to call regular kernel
+ * code, instrumentable code, or any code which may trigger an exception.
+ */
+static void noinstr arm64_enter_nmi(struct pt_regs *regs)
+{
+ regs->lockdep_hardirqs = lockdep_hardirqs_enabled();
+
+ __nmi_enter();
+ lockdep_hardirqs_off(CALLER_ADDR0);
+ lockdep_hardirq_enter();
+ ct_nmi_enter();
+
+ trace_hardirqs_off_finish();
+ ftrace_nmi_enter();
+}
+
+/*
+ * Handle IRQ/context state management when exiting an NMI from user/kernel
+ * mode. After this function returns it is not safe to call regular kernel
+ * code, instrumentable code, or any code which may trigger an exception.
+ */
+static void noinstr arm64_exit_nmi(struct pt_regs *regs)
+{
+ bool restore = regs->lockdep_hardirqs;
+
+ ftrace_nmi_exit();
+ if (restore) {
+ trace_hardirqs_on_prepare();
+ lockdep_hardirqs_on_prepare();
+ }
+
+ ct_nmi_exit();
+ lockdep_hardirq_exit();
+ if (restore)
+ lockdep_hardirqs_on(CALLER_ADDR0);
+ __nmi_exit();
+}
+
+/*
+ * Handle IRQ/context state management when entering a debug exception from
+ * kernel mode. Before this function is called it is not safe to call regular
+ * kernel code, instrumentable code, or any code which may trigger an exception.
+ */
+static void noinstr arm64_enter_el1_dbg(struct pt_regs *regs)
+{
+ regs->lockdep_hardirqs = lockdep_hardirqs_enabled();
+
+ lockdep_hardirqs_off(CALLER_ADDR0);
+ ct_nmi_enter();
+
+ trace_hardirqs_off_finish();
+}
+
+/*
+ * Handle IRQ/context state management when exiting a debug exception from
+ * kernel mode. After this function returns it is not safe to call regular
+ * kernel code, instrumentable code, or any code which may trigger an exception.
+ */
+static void noinstr arm64_exit_el1_dbg(struct pt_regs *regs)
+{
+ bool restore = regs->lockdep_hardirqs;
+
+ if (restore) {
+ trace_hardirqs_on_prepare();
+ lockdep_hardirqs_on_prepare();
+ }
+
+ ct_nmi_exit();
+ if (restore)
+ lockdep_hardirqs_on(CALLER_ADDR0);
+}
+
+#ifdef CONFIG_PREEMPT_DYNAMIC
+DEFINE_STATIC_KEY_TRUE(sk_dynamic_irqentry_exit_cond_resched);
+#define need_irq_preemption() \
+ (static_branch_unlikely(&sk_dynamic_irqentry_exit_cond_resched))
+#else
+#define need_irq_preemption() (IS_ENABLED(CONFIG_PREEMPTION))
+#endif
+
+static void __sched arm64_preempt_schedule_irq(void)
+{
+ if (!need_irq_preemption())
+ return;
+
+ /*
+ * Note: thread_info::preempt_count includes both thread_info::count
+ * and thread_info::need_resched, and is not equivalent to
+ * preempt_count().
+ */
+ if (READ_ONCE(current_thread_info()->preempt_count) != 0)
+ return;
+
+ /*
+ * DAIF.DA are cleared at the start of IRQ/FIQ handling, and when GIC
+ * priority masking is used the GIC irqchip driver will clear DAIF.IF
+ * using gic_arch_enable_irqs() for normal IRQs. If anything is set in
+ * DAIF we must have handled an NMI, so skip preemption.
+ */
+ if (system_uses_irq_prio_masking() && read_sysreg(daif))
+ return;
+
+ /*
+ * Preempting a task from an IRQ means we leave copies of PSTATE
+ * on the stack. cpufeature's enable calls may modify PSTATE, but
+ * resuming one of these preempted tasks would undo those changes.
+ *
+ * Only allow a task to be preempted once cpufeatures have been
+ * enabled.
+ */
+ if (system_capabilities_finalized())
+ preempt_schedule_irq();
+}
+
+static void do_interrupt_handler(struct pt_regs *regs,
+ void (*handler)(struct pt_regs *))
+{
+ struct pt_regs *old_regs = set_irq_regs(regs);
+
+ if (on_thread_stack())
+ call_on_irq_stack(regs, handler);
+ else
+ handler(regs);
+
+ set_irq_regs(old_regs);
+}
+
+extern void (*handle_arch_irq)(struct pt_regs *);
+extern void (*handle_arch_fiq)(struct pt_regs *);
+
+static void noinstr __panic_unhandled(struct pt_regs *regs, const char *vector,
+ unsigned long esr)
+{
+ arm64_enter_nmi(regs);
+
+ console_verbose();
+
+ pr_crit("Unhandled %s exception on CPU%d, ESR 0x%016lx -- %s\n",
+ vector, smp_processor_id(), esr,
+ esr_get_class_string(esr));
+
+ __show_regs(regs);
+ panic("Unhandled exception");
+}
+
+#define UNHANDLED(el, regsize, vector) \
+asmlinkage void noinstr el##_##regsize##_##vector##_handler(struct pt_regs *regs) \
+{ \
+ const char *desc = #regsize "-bit " #el " " #vector; \
+ __panic_unhandled(regs, desc, read_sysreg(esr_el1)); \
+}
+
+#ifdef CONFIG_ARM64_ERRATUM_1463225
+static DEFINE_PER_CPU(int, __in_cortex_a76_erratum_1463225_wa);
+
+static void cortex_a76_erratum_1463225_svc_handler(void)
+{
+ u32 reg, val;
+
+ if (!unlikely(test_thread_flag(TIF_SINGLESTEP)))
+ return;
+
+ if (!unlikely(this_cpu_has_cap(ARM64_WORKAROUND_1463225)))
+ return;
+
+ __this_cpu_write(__in_cortex_a76_erratum_1463225_wa, 1);
+ reg = read_sysreg(mdscr_el1);
+ val = reg | DBG_MDSCR_SS | DBG_MDSCR_KDE;
+ write_sysreg(val, mdscr_el1);
+ asm volatile("msr daifclr, #8");
+ isb();
+
+ /* We will have taken a single-step exception by this point */
+
+ write_sysreg(reg, mdscr_el1);
+ __this_cpu_write(__in_cortex_a76_erratum_1463225_wa, 0);
+}
+
+static __always_inline bool
+cortex_a76_erratum_1463225_debug_handler(struct pt_regs *regs)
+{
+ if (!__this_cpu_read(__in_cortex_a76_erratum_1463225_wa))
+ return false;
+
+ /*
+ * We've taken a dummy step exception from the kernel to ensure
+ * that interrupts are re-enabled on the syscall path. Return back
+ * to cortex_a76_erratum_1463225_svc_handler() with debug exceptions
+ * masked so that we can safely restore the mdscr and get on with
+ * handling the syscall.
+ */
+ regs->pstate |= PSR_D_BIT;
+ return true;
+}
+#else /* CONFIG_ARM64_ERRATUM_1463225 */
+static void cortex_a76_erratum_1463225_svc_handler(void) { }
+static bool cortex_a76_erratum_1463225_debug_handler(struct pt_regs *regs)
+{
+ return false;
+}
+#endif /* CONFIG_ARM64_ERRATUM_1463225 */
+
+/*
+ * As per the ABI exit SME streaming mode and clear the SVE state not
+ * shared with FPSIMD on syscall entry.
+ */
+static inline void fp_user_discard(void)
+{
+ /*
+ * If SME is active then exit streaming mode. If ZA is active
+ * then flush the SVE registers but leave userspace access to
+ * both SVE and SME enabled, otherwise disable SME for the
+ * task and fall through to disabling SVE too. This means
+ * that after a syscall we never have any streaming mode
+ * register state to track, if this changes the KVM code will
+ * need updating.
+ */
+ if (system_supports_sme())
+ sme_smstop_sm();
+
+ if (!system_supports_sve())
+ return;
+
+ if (test_thread_flag(TIF_SVE)) {
+ unsigned int sve_vq_minus_one;
+
+ sve_vq_minus_one = sve_vq_from_vl(task_get_sve_vl(current)) - 1;
+ sve_flush_live(true, sve_vq_minus_one);
+ }
+}
+
+UNHANDLED(el1t, 64, sync)
+UNHANDLED(el1t, 64, irq)
+UNHANDLED(el1t, 64, fiq)
+UNHANDLED(el1t, 64, error)
+
+static void noinstr el1_abort(struct pt_regs *regs, unsigned long esr)
{
unsigned long far = read_sysreg(far_el1);
+ enter_from_kernel_mode(regs);
local_daif_inherit(regs);
- far = untagged_addr(far);
do_mem_abort(far, esr, regs);
+ local_daif_mask();
+ exit_to_kernel_mode(regs);
}
-NOKPROBE_SYMBOL(el1_abort);
-static void notrace el1_pc(struct pt_regs *regs, unsigned long esr)
+static void noinstr el1_pc(struct pt_regs *regs, unsigned long esr)
{
unsigned long far = read_sysreg(far_el1);
+ enter_from_kernel_mode(regs);
local_daif_inherit(regs);
do_sp_pc_abort(far, esr, regs);
+ local_daif_mask();
+ exit_to_kernel_mode(regs);
}
-NOKPROBE_SYMBOL(el1_pc);
-static void el1_undef(struct pt_regs *regs)
+static void noinstr el1_undef(struct pt_regs *regs, unsigned long esr)
{
+ enter_from_kernel_mode(regs);
local_daif_inherit(regs);
- do_undefinstr(regs);
+ do_el1_undef(regs, esr);
+ local_daif_mask();
+ exit_to_kernel_mode(regs);
}
-NOKPROBE_SYMBOL(el1_undef);
-static void el1_inv(struct pt_regs *regs, unsigned long esr)
+static void noinstr el1_bti(struct pt_regs *regs, unsigned long esr)
{
+ enter_from_kernel_mode(regs);
local_daif_inherit(regs);
- bad_mode(regs, 0, esr);
+ do_el1_bti(regs, esr);
+ local_daif_mask();
+ exit_to_kernel_mode(regs);
}
-NOKPROBE_SYMBOL(el1_inv);
-static void notrace el1_dbg(struct pt_regs *regs, unsigned long esr)
+static void noinstr el1_dbg(struct pt_regs *regs, unsigned long esr)
{
unsigned long far = read_sysreg(far_el1);
- /*
- * The CPU masked interrupts, and we are leaving them masked during
- * do_debug_exception(). Update PMR as if we had called
- * local_mask_daif().
- */
- if (system_uses_irq_prio_masking())
- gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET);
+ arm64_enter_el1_dbg(regs);
+ if (!cortex_a76_erratum_1463225_debug_handler(regs))
+ do_debug_exception(far, esr, regs);
+ arm64_exit_el1_dbg(regs);
+}
- do_debug_exception(far, esr, regs);
+static void noinstr el1_fpac(struct pt_regs *regs, unsigned long esr)
+{
+ enter_from_kernel_mode(regs);
+ local_daif_inherit(regs);
+ do_el1_fpac(regs, esr);
+ local_daif_mask();
+ exit_to_kernel_mode(regs);
}
-NOKPROBE_SYMBOL(el1_dbg);
-asmlinkage void notrace el1_sync_handler(struct pt_regs *regs)
+asmlinkage void noinstr el1h_64_sync_handler(struct pt_regs *regs)
{
unsigned long esr = read_sysreg(esr_el1);
@@ -84,7 +466,10 @@ asmlinkage void notrace el1_sync_handler(struct pt_regs *regs)
break;
case ESR_ELx_EC_SYS64:
case ESR_ELx_EC_UNKNOWN:
- el1_undef(regs);
+ el1_undef(regs, esr);
+ break;
+ case ESR_ELx_EC_BTI:
+ el1_bti(regs, esr);
break;
case ESR_ELx_EC_BREAKPT_CUR:
case ESR_ELx_EC_SOFTSTP_CUR:
@@ -92,24 +477,77 @@ asmlinkage void notrace el1_sync_handler(struct pt_regs *regs)
case ESR_ELx_EC_BRK64:
el1_dbg(regs, esr);
break;
+ case ESR_ELx_EC_FPAC:
+ el1_fpac(regs, esr);
+ break;
default:
- el1_inv(regs, esr);
- };
+ __panic_unhandled(regs, "64-bit el1h sync", esr);
+ }
+}
+
+static __always_inline void __el1_pnmi(struct pt_regs *regs,
+ void (*handler)(struct pt_regs *))
+{
+ arm64_enter_nmi(regs);
+ do_interrupt_handler(regs, handler);
+ arm64_exit_nmi(regs);
+}
+
+static __always_inline void __el1_irq(struct pt_regs *regs,
+ void (*handler)(struct pt_regs *))
+{
+ enter_from_kernel_mode(regs);
+
+ irq_enter_rcu();
+ do_interrupt_handler(regs, handler);
+ irq_exit_rcu();
+
+ arm64_preempt_schedule_irq();
+
+ exit_to_kernel_mode(regs);
+}
+static void noinstr el1_interrupt(struct pt_regs *regs,
+ void (*handler)(struct pt_regs *))
+{
+ write_sysreg(DAIF_PROCCTX_NOIRQ, daif);
+
+ if (IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI) && !interrupts_enabled(regs))
+ __el1_pnmi(regs, handler);
+ else
+ __el1_irq(regs, handler);
+}
+
+asmlinkage void noinstr el1h_64_irq_handler(struct pt_regs *regs)
+{
+ el1_interrupt(regs, handle_arch_irq);
+}
+
+asmlinkage void noinstr el1h_64_fiq_handler(struct pt_regs *regs)
+{
+ el1_interrupt(regs, handle_arch_fiq);
+}
+
+asmlinkage void noinstr el1h_64_error_handler(struct pt_regs *regs)
+{
+ unsigned long esr = read_sysreg(esr_el1);
+
+ local_daif_restore(DAIF_ERRCTX);
+ arm64_enter_nmi(regs);
+ do_serror(regs, esr);
+ arm64_exit_nmi(regs);
}
-NOKPROBE_SYMBOL(el1_sync_handler);
-static void notrace el0_da(struct pt_regs *regs, unsigned long esr)
+static void noinstr el0_da(struct pt_regs *regs, unsigned long esr)
{
unsigned long far = read_sysreg(far_el1);
- user_exit_irqoff();
+ enter_from_user_mode(regs);
local_daif_restore(DAIF_PROCCTX);
- far = untagged_addr(far);
do_mem_abort(far, esr, regs);
+ exit_to_user_mode(regs);
}
-NOKPROBE_SYMBOL(el0_da);
-static void notrace el0_ia(struct pt_regs *regs, unsigned long esr)
+static void noinstr el0_ia(struct pt_regs *regs, unsigned long esr)
{
unsigned long far = read_sysreg(far_el1);
@@ -121,105 +559,135 @@ static void notrace el0_ia(struct pt_regs *regs, unsigned long esr)
if (!is_ttbr0_addr(far))
arm64_apply_bp_hardening();
- user_exit_irqoff();
+ enter_from_user_mode(regs);
local_daif_restore(DAIF_PROCCTX);
do_mem_abort(far, esr, regs);
+ exit_to_user_mode(regs);
}
-NOKPROBE_SYMBOL(el0_ia);
-static void notrace el0_fpsimd_acc(struct pt_regs *regs, unsigned long esr)
+static void noinstr el0_fpsimd_acc(struct pt_regs *regs, unsigned long esr)
{
- user_exit_irqoff();
+ enter_from_user_mode(regs);
local_daif_restore(DAIF_PROCCTX);
do_fpsimd_acc(esr, regs);
+ exit_to_user_mode(regs);
}
-NOKPROBE_SYMBOL(el0_fpsimd_acc);
-static void notrace el0_sve_acc(struct pt_regs *regs, unsigned long esr)
+static void noinstr el0_sve_acc(struct pt_regs *regs, unsigned long esr)
{
- user_exit_irqoff();
+ enter_from_user_mode(regs);
local_daif_restore(DAIF_PROCCTX);
do_sve_acc(esr, regs);
+ exit_to_user_mode(regs);
}
-NOKPROBE_SYMBOL(el0_sve_acc);
-static void notrace el0_fpsimd_exc(struct pt_regs *regs, unsigned long esr)
+static void noinstr el0_sme_acc(struct pt_regs *regs, unsigned long esr)
{
- user_exit_irqoff();
+ enter_from_user_mode(regs);
+ local_daif_restore(DAIF_PROCCTX);
+ do_sme_acc(esr, regs);
+ exit_to_user_mode(regs);
+}
+
+static void noinstr el0_fpsimd_exc(struct pt_regs *regs, unsigned long esr)
+{
+ enter_from_user_mode(regs);
local_daif_restore(DAIF_PROCCTX);
do_fpsimd_exc(esr, regs);
+ exit_to_user_mode(regs);
}
-NOKPROBE_SYMBOL(el0_fpsimd_exc);
-static void notrace el0_sys(struct pt_regs *regs, unsigned long esr)
+static void noinstr el0_sys(struct pt_regs *regs, unsigned long esr)
{
- user_exit_irqoff();
+ enter_from_user_mode(regs);
local_daif_restore(DAIF_PROCCTX);
- do_sysinstr(esr, regs);
+ do_el0_sys(esr, regs);
+ exit_to_user_mode(regs);
}
-NOKPROBE_SYMBOL(el0_sys);
-static void notrace el0_pc(struct pt_regs *regs, unsigned long esr)
+static void noinstr el0_pc(struct pt_regs *regs, unsigned long esr)
{
unsigned long far = read_sysreg(far_el1);
if (!is_ttbr0_addr(instruction_pointer(regs)))
arm64_apply_bp_hardening();
- user_exit_irqoff();
+ enter_from_user_mode(regs);
local_daif_restore(DAIF_PROCCTX);
do_sp_pc_abort(far, esr, regs);
+ exit_to_user_mode(regs);
}
-NOKPROBE_SYMBOL(el0_pc);
-static void notrace el0_sp(struct pt_regs *regs, unsigned long esr)
+static void noinstr el0_sp(struct pt_regs *regs, unsigned long esr)
{
- user_exit_irqoff();
- local_daif_restore(DAIF_PROCCTX_NOIRQ);
+ enter_from_user_mode(regs);
+ local_daif_restore(DAIF_PROCCTX);
do_sp_pc_abort(regs->sp, esr, regs);
+ exit_to_user_mode(regs);
}
-NOKPROBE_SYMBOL(el0_sp);
-static void notrace el0_undef(struct pt_regs *regs)
+static void noinstr el0_undef(struct pt_regs *regs, unsigned long esr)
{
- user_exit_irqoff();
+ enter_from_user_mode(regs);
local_daif_restore(DAIF_PROCCTX);
- do_undefinstr(regs);
+ do_el0_undef(regs, esr);
+ exit_to_user_mode(regs);
}
-NOKPROBE_SYMBOL(el0_undef);
-static void notrace el0_inv(struct pt_regs *regs, unsigned long esr)
+static void noinstr el0_bti(struct pt_regs *regs)
{
- user_exit_irqoff();
+ enter_from_user_mode(regs);
+ local_daif_restore(DAIF_PROCCTX);
+ do_el0_bti(regs);
+ exit_to_user_mode(regs);
+}
+
+static void noinstr el0_mops(struct pt_regs *regs, unsigned long esr)
+{
+ enter_from_user_mode(regs);
+ local_daif_restore(DAIF_PROCCTX);
+ do_el0_mops(regs, esr);
+ exit_to_user_mode(regs);
+}
+
+static void noinstr el0_inv(struct pt_regs *regs, unsigned long esr)
+{
+ enter_from_user_mode(regs);
local_daif_restore(DAIF_PROCCTX);
bad_el0_sync(regs, 0, esr);
+ exit_to_user_mode(regs);
}
-NOKPROBE_SYMBOL(el0_inv);
-static void notrace el0_dbg(struct pt_regs *regs, unsigned long esr)
+static void noinstr el0_dbg(struct pt_regs *regs, unsigned long esr)
{
/* Only watchpoints write FAR_EL1, otherwise its UNKNOWN */
unsigned long far = read_sysreg(far_el1);
- if (system_uses_irq_prio_masking())
- gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET);
-
- user_exit_irqoff();
+ enter_from_user_mode(regs);
do_debug_exception(far, esr, regs);
- local_daif_restore(DAIF_PROCCTX_NOIRQ);
+ local_daif_restore(DAIF_PROCCTX);
+ exit_to_user_mode(regs);
}
-NOKPROBE_SYMBOL(el0_dbg);
-static void notrace el0_svc(struct pt_regs *regs)
+static void noinstr el0_svc(struct pt_regs *regs)
{
- if (system_uses_irq_prio_masking())
- gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET);
+ enter_from_user_mode(regs);
+ cortex_a76_erratum_1463225_svc_handler();
+ fp_user_discard();
+ local_daif_restore(DAIF_PROCCTX);
+ do_el0_svc(regs);
+ exit_to_user_mode(regs);
+}
- el0_svc_handler(regs);
+static void noinstr el0_fpac(struct pt_regs *regs, unsigned long esr)
+{
+ enter_from_user_mode(regs);
+ local_daif_restore(DAIF_PROCCTX);
+ do_el0_fpac(regs, esr);
+ exit_to_user_mode(regs);
}
-NOKPROBE_SYMBOL(el0_svc);
-asmlinkage void notrace el0_sync_handler(struct pt_regs *regs)
+asmlinkage void noinstr el0t_64_sync_handler(struct pt_regs *regs)
{
unsigned long esr = read_sysreg(esr_el1);
@@ -239,6 +707,9 @@ asmlinkage void notrace el0_sync_handler(struct pt_regs *regs)
case ESR_ELx_EC_SVE:
el0_sve_acc(regs, esr);
break;
+ case ESR_ELx_EC_SME:
+ el0_sme_acc(regs, esr);
+ break;
case ESR_ELx_EC_FP_EXC64:
el0_fpsimd_exc(regs, esr);
break;
@@ -253,7 +724,13 @@ asmlinkage void notrace el0_sync_handler(struct pt_regs *regs)
el0_pc(regs, esr);
break;
case ESR_ELx_EC_UNKNOWN:
- el0_undef(regs);
+ el0_undef(regs, esr);
+ break;
+ case ESR_ELx_EC_BTI:
+ el0_bti(regs);
+ break;
+ case ESR_ELx_EC_MOPS:
+ el0_mops(regs, esr);
break;
case ESR_ELx_EC_BREAKPT_LOW:
case ESR_ELx_EC_SOFTSTP_LOW:
@@ -261,31 +738,88 @@ asmlinkage void notrace el0_sync_handler(struct pt_regs *regs)
case ESR_ELx_EC_BRK64:
el0_dbg(regs, esr);
break;
+ case ESR_ELx_EC_FPAC:
+ el0_fpac(regs, esr);
+ break;
default:
el0_inv(regs, esr);
}
}
-NOKPROBE_SYMBOL(el0_sync_handler);
-#ifdef CONFIG_COMPAT
-static void notrace el0_cp15(struct pt_regs *regs, unsigned long esr)
+static void noinstr el0_interrupt(struct pt_regs *regs,
+ void (*handler)(struct pt_regs *))
{
- user_exit_irqoff();
+ enter_from_user_mode(regs);
+
+ write_sysreg(DAIF_PROCCTX_NOIRQ, daif);
+
+ if (regs->pc & BIT(55))
+ arm64_apply_bp_hardening();
+
+ irq_enter_rcu();
+ do_interrupt_handler(regs, handler);
+ irq_exit_rcu();
+
+ exit_to_user_mode(regs);
+}
+
+static void noinstr __el0_irq_handler_common(struct pt_regs *regs)
+{
+ el0_interrupt(regs, handle_arch_irq);
+}
+
+asmlinkage void noinstr el0t_64_irq_handler(struct pt_regs *regs)
+{
+ __el0_irq_handler_common(regs);
+}
+
+static void noinstr __el0_fiq_handler_common(struct pt_regs *regs)
+{
+ el0_interrupt(regs, handle_arch_fiq);
+}
+
+asmlinkage void noinstr el0t_64_fiq_handler(struct pt_regs *regs)
+{
+ __el0_fiq_handler_common(regs);
+}
+
+static void noinstr __el0_error_handler_common(struct pt_regs *regs)
+{
+ unsigned long esr = read_sysreg(esr_el1);
+
+ enter_from_user_mode(regs);
+ local_daif_restore(DAIF_ERRCTX);
+ arm64_enter_nmi(regs);
+ do_serror(regs, esr);
+ arm64_exit_nmi(regs);
local_daif_restore(DAIF_PROCCTX);
- do_cp15instr(esr, regs);
+ exit_to_user_mode(regs);
+}
+
+asmlinkage void noinstr el0t_64_error_handler(struct pt_regs *regs)
+{
+ __el0_error_handler_common(regs);
}
-NOKPROBE_SYMBOL(el0_cp15);
-static void notrace el0_svc_compat(struct pt_regs *regs)
+#ifdef CONFIG_COMPAT
+static void noinstr el0_cp15(struct pt_regs *regs, unsigned long esr)
{
- if (system_uses_irq_prio_masking())
- gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET);
+ enter_from_user_mode(regs);
+ local_daif_restore(DAIF_PROCCTX);
+ do_el0_cp15(esr, regs);
+ exit_to_user_mode(regs);
+}
- el0_svc_compat_handler(regs);
+static void noinstr el0_svc_compat(struct pt_regs *regs)
+{
+ enter_from_user_mode(regs);
+ cortex_a76_erratum_1463225_svc_handler();
+ local_daif_restore(DAIF_PROCCTX);
+ do_el0_svc_compat(regs);
+ exit_to_user_mode(regs);
}
-NOKPROBE_SYMBOL(el0_svc_compat);
-asmlinkage void notrace el0_sync_compat_handler(struct pt_regs *regs)
+asmlinkage void noinstr el0t_32_sync_handler(struct pt_regs *regs)
{
unsigned long esr = read_sysreg(esr_el1);
@@ -312,7 +846,7 @@ asmlinkage void notrace el0_sync_compat_handler(struct pt_regs *regs)
case ESR_ELx_EC_CP14_MR:
case ESR_ELx_EC_CP14_LS:
case ESR_ELx_EC_CP14_64:
- el0_undef(regs);
+ el0_undef(regs, esr);
break;
case ESR_ELx_EC_CP15_32:
case ESR_ELx_EC_CP15_64:
@@ -328,5 +862,71 @@ asmlinkage void notrace el0_sync_compat_handler(struct pt_regs *regs)
el0_inv(regs, esr);
}
}
-NOKPROBE_SYMBOL(el0_sync_compat_handler);
+
+asmlinkage void noinstr el0t_32_irq_handler(struct pt_regs *regs)
+{
+ __el0_irq_handler_common(regs);
+}
+
+asmlinkage void noinstr el0t_32_fiq_handler(struct pt_regs *regs)
+{
+ __el0_fiq_handler_common(regs);
+}
+
+asmlinkage void noinstr el0t_32_error_handler(struct pt_regs *regs)
+{
+ __el0_error_handler_common(regs);
+}
+#else /* CONFIG_COMPAT */
+UNHANDLED(el0t, 32, sync)
+UNHANDLED(el0t, 32, irq)
+UNHANDLED(el0t, 32, fiq)
+UNHANDLED(el0t, 32, error)
#endif /* CONFIG_COMPAT */
+
+#ifdef CONFIG_VMAP_STACK
+asmlinkage void noinstr __noreturn handle_bad_stack(struct pt_regs *regs)
+{
+ unsigned long esr = read_sysreg(esr_el1);
+ unsigned long far = read_sysreg(far_el1);
+
+ arm64_enter_nmi(regs);
+ panic_bad_stack(regs, esr, far);
+}
+#endif /* CONFIG_VMAP_STACK */
+
+#ifdef CONFIG_ARM_SDE_INTERFACE
+asmlinkage noinstr unsigned long
+__sdei_handler(struct pt_regs *regs, struct sdei_registered_event *arg)
+{
+ unsigned long ret;
+
+ /*
+ * We didn't take an exception to get here, so the HW hasn't
+ * set/cleared bits in PSTATE that we may rely on.
+ *
+ * The original SDEI spec (ARM DEN 0054A) can be read ambiguously as to
+ * whether PSTATE bits are inherited unchanged or generated from
+ * scratch, and the TF-A implementation always clears PAN and always
+ * clears UAO. There are no other known implementations.
+ *
+ * Subsequent revisions (ARM DEN 0054B) follow the usual rules for how
+ * PSTATE is modified upon architectural exceptions, and so PAN is
+ * either inherited or set per SCTLR_ELx.SPAN, and UAO is always
+ * cleared.
+ *
+ * We must explicitly reset PAN to the expected state, including
+ * clearing it when the host isn't using it, in case a VM had it set.
+ */
+ if (system_uses_hw_pan())
+ set_pstate_pan(1);
+ else if (cpu_has_pan())
+ set_pstate_pan(0);
+
+ arm64_enter_nmi(regs);
+ ret = do_sdei_event(regs, arg);
+ arm64_exit_nmi(regs);
+
+ return ret;
+}
+#endif /* CONFIG_ARM_SDE_INTERFACE */
diff --git a/arch/arm64/kernel/entry-fpsimd.S b/arch/arm64/kernel/entry-fpsimd.S
index 0f24eae8f3cc..6325db1a2179 100644
--- a/arch/arm64/kernel/entry-fpsimd.S
+++ b/arch/arm64/kernel/entry-fpsimd.S
@@ -16,34 +16,119 @@
*
* x0 - pointer to struct fpsimd_state
*/
-ENTRY(fpsimd_save_state)
+SYM_FUNC_START(fpsimd_save_state)
fpsimd_save x0, 8
ret
-ENDPROC(fpsimd_save_state)
+SYM_FUNC_END(fpsimd_save_state)
/*
* Load the FP registers.
*
* x0 - pointer to struct fpsimd_state
*/
-ENTRY(fpsimd_load_state)
+SYM_FUNC_START(fpsimd_load_state)
fpsimd_restore x0, 8
ret
-ENDPROC(fpsimd_load_state)
+SYM_FUNC_END(fpsimd_load_state)
#ifdef CONFIG_ARM64_SVE
-ENTRY(sve_save_state)
- sve_save 0, x1, 2
+
+/*
+ * Save the SVE state
+ *
+ * x0 - pointer to buffer for state
+ * x1 - pointer to storage for FPSR
+ * x2 - Save FFR if non-zero
+ */
+SYM_FUNC_START(sve_save_state)
+ sve_save 0, x1, x2, 3
ret
-ENDPROC(sve_save_state)
+SYM_FUNC_END(sve_save_state)
-ENTRY(sve_load_state)
- sve_load 0, x1, x2, 3, x4
+/*
+ * Load the SVE state
+ *
+ * x0 - pointer to buffer for state
+ * x1 - pointer to storage for FPSR
+ * x2 - Restore FFR if non-zero
+ */
+SYM_FUNC_START(sve_load_state)
+ sve_load 0, x1, x2, 4
ret
-ENDPROC(sve_load_state)
+SYM_FUNC_END(sve_load_state)
-ENTRY(sve_get_vl)
+SYM_FUNC_START(sve_get_vl)
_sve_rdvl 0, 1
ret
-ENDPROC(sve_get_vl)
+SYM_FUNC_END(sve_get_vl)
+
+SYM_FUNC_START(sve_set_vq)
+ sve_load_vq x0, x1, x2
+ ret
+SYM_FUNC_END(sve_set_vq)
+
+/*
+ * Zero all SVE registers but the first 128-bits of each vector
+ *
+ * VQ must already be configured by caller, any further updates of VQ
+ * will need to ensure that the register state remains valid.
+ *
+ * x0 = include FFR?
+ * x1 = VQ - 1
+ */
+SYM_FUNC_START(sve_flush_live)
+ cbz x1, 1f // A VQ-1 of 0 is 128 bits so no extra Z state
+ sve_flush_z
+1: sve_flush_p
+ tbz x0, #0, 2f
+ sve_flush_ffr
+2: ret
+SYM_FUNC_END(sve_flush_live)
+
#endif /* CONFIG_ARM64_SVE */
+
+#ifdef CONFIG_ARM64_SME
+
+SYM_FUNC_START(sme_get_vl)
+ _sme_rdsvl 0, 1
+ ret
+SYM_FUNC_END(sme_get_vl)
+
+SYM_FUNC_START(sme_set_vq)
+ sme_load_vq x0, x1, x2
+ ret
+SYM_FUNC_END(sme_set_vq)
+
+/*
+ * Save the ZA and ZT state
+ *
+ * x0 - pointer to buffer for state
+ * x1 - number of ZT registers to save
+ */
+SYM_FUNC_START(sme_save_state)
+ _sme_rdsvl 2, 1 // x2 = VL/8
+ sme_save_za 0, x2, 12 // Leaves x0 pointing to the end of ZA
+
+ cbz x1, 1f
+ _str_zt 0
+1:
+ ret
+SYM_FUNC_END(sme_save_state)
+
+/*
+ * Load the ZA and ZT state
+ *
+ * x0 - pointer to buffer for state
+ * x1 - number of ZT registers to save
+ */
+SYM_FUNC_START(sme_load_state)
+ _sme_rdsvl 2, 1 // x2 = VL/8
+ sme_load_za 0, x2, 12 // Leaves x0 pointing to the end of ZA
+
+ cbz x1, 1f
+ _ldr_zt 0
+1:
+ ret
+SYM_FUNC_END(sme_load_state)
+
+#endif /* CONFIG_ARM64_SME */
diff --git a/arch/arm64/kernel/entry-ftrace.S b/arch/arm64/kernel/entry-ftrace.S
index 7d02f9966d34..f0c16640ef21 100644
--- a/arch/arm64/kernel/entry-ftrace.S
+++ b/arch/arm64/kernel/entry-ftrace.S
@@ -7,97 +7,108 @@
*/
#include <linux/linkage.h>
+#include <linux/cfi_types.h>
#include <asm/asm-offsets.h>
#include <asm/assembler.h>
#include <asm/ftrace.h>
#include <asm/insn.h>
-#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_ARGS
/*
* Due to -fpatchable-function-entry=2, the compiler has placed two NOPs before
* the regular function prologue. For an enabled callsite, ftrace_init_nop() and
* ftrace_make_call() have patched those NOPs to:
*
* MOV X9, LR
- * BL <entry>
+ * BL ftrace_caller
*
- * ... where <entry> is either ftrace_caller or ftrace_regs_caller.
+ * Each instrumented function follows the AAPCS, so here x0-x8 and x18-x30 are
+ * live (x18 holds the Shadow Call Stack pointer), and x9-x17 are safe to
+ * clobber.
*
- * Each instrumented function follows the AAPCS, so here x0-x8 and x19-x30 are
- * live, and x9-x18 are safe to clobber.
- *
- * We save the callsite's context into a pt_regs before invoking any ftrace
- * callbacks. So that we can get a sensible backtrace, we create a stack record
- * for the callsite and the ftrace entry assembly. This is not sufficient for
- * reliable stacktrace: until we create the callsite stack record, its caller
- * is missing from the LR and existing chain of frame records.
+ * We save the callsite's context into a struct ftrace_regs before invoking any
+ * ftrace callbacks. So that we can get a sensible backtrace, we create frame
+ * records for the callsite and the ftrace entry assembly. This is not
+ * sufficient for reliable stacktrace: until we create the callsite stack
+ * record, its caller is missing from the LR and existing chain of frame
+ * records.
*/
- .macro ftrace_regs_entry, allregs=0
- /* Make room for pt_regs, plus a callee frame */
- sub sp, sp, #(S_FRAME_SIZE + 16)
-
- /* Save function arguments (and x9 for simplicity) */
- stp x0, x1, [sp, #S_X0]
- stp x2, x3, [sp, #S_X2]
- stp x4, x5, [sp, #S_X4]
- stp x6, x7, [sp, #S_X6]
- stp x8, x9, [sp, #S_X8]
-
- /* Optionally save the callee-saved registers, always save the FP */
- .if \allregs == 1
- stp x10, x11, [sp, #S_X10]
- stp x12, x13, [sp, #S_X12]
- stp x14, x15, [sp, #S_X14]
- stp x16, x17, [sp, #S_X16]
- stp x18, x19, [sp, #S_X18]
- stp x20, x21, [sp, #S_X20]
- stp x22, x23, [sp, #S_X22]
- stp x24, x25, [sp, #S_X24]
- stp x26, x27, [sp, #S_X26]
- stp x28, x29, [sp, #S_X28]
- .else
- str x29, [sp, #S_FP]
- .endif
-
- /* Save the callsite's SP and LR */
- add x10, sp, #(S_FRAME_SIZE + 16)
- stp x9, x10, [sp, #S_LR]
+SYM_CODE_START(ftrace_caller)
+ bti c
+
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS
+ /*
+ * The literal pointer to the ops is at an 8-byte aligned boundary
+ * which is either 12 or 16 bytes before the BL instruction in the call
+ * site. See ftrace_call_adjust() for details.
+ *
+ * Therefore here the LR points at `literal + 16` or `literal + 20`,
+ * and we can find the address of the literal in either case by
+ * aligning to an 8-byte boundary and subtracting 16. We do the
+ * alignment first as this allows us to fold the subtraction into the
+ * LDR.
+ */
+ bic x11, x30, 0x7
+ ldr x11, [x11, #-(4 * AARCH64_INSN_SIZE)] // op
+
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
+ /*
+ * If the op has a direct call, handle it immediately without
+ * saving/restoring registers.
+ */
+ ldr x17, [x11, #FTRACE_OPS_DIRECT_CALL] // op->direct_call
+ cbnz x17, ftrace_caller_direct
+#endif
+#endif
- /* Save the PC after the ftrace callsite */
- str x30, [sp, #S_PC]
+ /* Save original SP */
+ mov x10, sp
- /* Create a frame record for the callsite above pt_regs */
- stp x29, x9, [sp, #S_FRAME_SIZE]
- add x29, sp, #S_FRAME_SIZE
+ /* Make room for ftrace regs, plus two frame records */
+ sub sp, sp, #(FREGS_SIZE + 32)
- /* Create our frame record within pt_regs. */
- stp x29, x30, [sp, #S_STACKFRAME]
- add x29, sp, #S_STACKFRAME
- .endm
+ /* Save function arguments */
+ stp x0, x1, [sp, #FREGS_X0]
+ stp x2, x3, [sp, #FREGS_X2]
+ stp x4, x5, [sp, #FREGS_X4]
+ stp x6, x7, [sp, #FREGS_X6]
+ str x8, [sp, #FREGS_X8]
-ENTRY(ftrace_regs_caller)
- ftrace_regs_entry 1
- b ftrace_common
-ENDPROC(ftrace_regs_caller)
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
+ str xzr, [sp, #FREGS_DIRECT_TRAMP]
+#endif
-ENTRY(ftrace_caller)
- ftrace_regs_entry 0
- b ftrace_common
-ENDPROC(ftrace_caller)
+ /* Save the callsite's FP, LR, SP */
+ str x29, [sp, #FREGS_FP]
+ str x9, [sp, #FREGS_LR]
+ str x10, [sp, #FREGS_SP]
-ENTRY(ftrace_common)
- sub x0, x30, #AARCH64_INSN_SIZE // ip (callsite's BL insn)
- mov x1, x9 // parent_ip (callsite's LR)
- ldr_l x2, function_trace_op // op
- mov x3, sp // regs
+ /* Save the PC after the ftrace callsite */
+ str x30, [sp, #FREGS_PC]
-GLOBAL(ftrace_call)
- bl ftrace_stub
+ /* Create a frame record for the callsite above the ftrace regs */
+ stp x29, x9, [sp, #FREGS_SIZE + 16]
+ add x29, sp, #FREGS_SIZE + 16
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-GLOBAL(ftrace_graph_call) // ftrace_graph_caller();
- nop // If enabled, this will be replaced
- // "b ftrace_graph_caller"
+ /* Create our frame record above the ftrace regs */
+ stp x29, x30, [sp, #FREGS_SIZE]
+ add x29, sp, #FREGS_SIZE
+
+ /* Prepare arguments for the the tracer func */
+ sub x0, x30, #AARCH64_INSN_SIZE // ip (callsite's BL insn)
+ mov x1, x9 // parent_ip (callsite's LR)
+ mov x3, sp // regs
+
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS
+ mov x2, x11 // op
+ ldr x4, [x2, #FTRACE_OPS_FUNC] // op->func
+ blr x4 // op->func(ip, parent_ip, op, regs)
+
+#else
+ ldr_l x2, function_trace_op // op
+
+SYM_INNER_LABEL(ftrace_call, SYM_L_GLOBAL)
+ bl ftrace_stub // func(ip, parent_ip, op, regs)
#endif
/*
@@ -105,37 +116,69 @@ GLOBAL(ftrace_graph_call) // ftrace_graph_caller();
* x19-x29 per the AAPCS, and we created frame records upon entry, so we need
* to restore x0-x8, x29, and x30.
*/
-ftrace_common_return:
/* Restore function arguments */
- ldp x0, x1, [sp]
- ldp x2, x3, [sp, #S_X2]
- ldp x4, x5, [sp, #S_X4]
- ldp x6, x7, [sp, #S_X6]
- ldr x8, [sp, #S_X8]
+ ldp x0, x1, [sp, #FREGS_X0]
+ ldp x2, x3, [sp, #FREGS_X2]
+ ldp x4, x5, [sp, #FREGS_X4]
+ ldp x6, x7, [sp, #FREGS_X6]
+ ldr x8, [sp, #FREGS_X8]
+
+ /* Restore the callsite's FP */
+ ldr x29, [sp, #FREGS_FP]
+
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
+ ldr x17, [sp, #FREGS_DIRECT_TRAMP]
+ cbnz x17, ftrace_caller_direct_late
+#endif
- /* Restore the callsite's FP, LR, PC */
- ldr x29, [sp, #S_FP]
- ldr x30, [sp, #S_LR]
- ldr x9, [sp, #S_PC]
+ /* Restore the callsite's LR and PC */
+ ldr x30, [sp, #FREGS_LR]
+ ldr x9, [sp, #FREGS_PC]
/* Restore the callsite's SP */
- add sp, sp, #S_FRAME_SIZE + 16
+ add sp, sp, #FREGS_SIZE + 32
ret x9
-ENDPROC(ftrace_common)
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-ENTRY(ftrace_graph_caller)
- ldr x0, [sp, #S_PC]
- sub x0, x0, #AARCH64_INSN_SIZE // ip (callsite's BL insn)
- add x1, sp, #S_LR // parent_ip (callsite's LR)
- ldr x2, [sp, #S_FRAME_SIZE] // parent fp (callsite's FP)
- bl prepare_ftrace_return
- b ftrace_common_return
-ENDPROC(ftrace_graph_caller)
-#endif
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
+SYM_INNER_LABEL(ftrace_caller_direct_late, SYM_L_LOCAL)
+ /*
+ * Head to a direct trampoline in x17 after having run other tracers.
+ * The ftrace_regs are live, and x0-x8 and FP have been restored. The
+ * LR, PC, and SP have not been restored.
+ */
+
+ /*
+ * Restore the callsite's LR and PC matching the trampoline calling
+ * convention.
+ */
+ ldr x9, [sp, #FREGS_LR]
+ ldr x30, [sp, #FREGS_PC]
-#else /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */
+ /* Restore the callsite's SP */
+ add sp, sp, #FREGS_SIZE + 32
+
+SYM_INNER_LABEL(ftrace_caller_direct, SYM_L_LOCAL)
+ /*
+ * Head to a direct trampoline in x17.
+ *
+ * We use `BR X17` as this can safely land on a `BTI C` or `PACIASP` in
+ * the trampoline, and will not unbalance any return stack.
+ */
+ br x17
+#endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */
+SYM_CODE_END(ftrace_caller)
+
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
+SYM_CODE_START(ftrace_stub_direct_tramp)
+ bti c
+ mov x10, x30
+ mov x30, x9
+ ret x10
+SYM_CODE_END(ftrace_stub_direct_tramp)
+#endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */
+
+#else /* CONFIG_DYNAMIC_FTRACE_WITH_ARGS */
/*
* Gcc with -pg will put the following code in the beginning of each function:
@@ -209,53 +252,15 @@ ENDPROC(ftrace_graph_caller)
add \reg, \reg, #8
.endm
-#ifndef CONFIG_DYNAMIC_FTRACE
-/*
- * void _mcount(unsigned long return_address)
- * @return_address: return address to instrumented function
- *
- * This function makes calls, if enabled, to:
- * - tracer function to probe instrumented function's entry,
- * - ftrace_graph_caller to set up an exit hook
- */
-ENTRY(_mcount)
- mcount_enter
-
- ldr_l x2, ftrace_trace_function
- adr x0, ftrace_stub
- cmp x0, x2 // if (ftrace_trace_function
- b.eq skip_ftrace_call // != ftrace_stub) {
-
- mcount_get_pc x0 // function's pc
- mcount_get_lr x1 // function's lr (= parent's pc)
- blr x2 // (*ftrace_trace_function)(pc, lr);
-
-skip_ftrace_call: // }
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
- ldr_l x2, ftrace_graph_return
- cmp x0, x2 // if ((ftrace_graph_return
- b.ne ftrace_graph_caller // != ftrace_stub)
-
- ldr_l x2, ftrace_graph_entry // || (ftrace_graph_entry
- adr_l x0, ftrace_graph_entry_stub // != ftrace_graph_entry_stub))
- cmp x0, x2
- b.ne ftrace_graph_caller // ftrace_graph_caller();
-#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
- mcount_exit
-ENDPROC(_mcount)
-EXPORT_SYMBOL(_mcount)
-NOKPROBE(_mcount)
-
-#else /* CONFIG_DYNAMIC_FTRACE */
/*
* _mcount() is used to build the kernel with -pg option, but all the branch
* instructions to _mcount() are replaced to NOP initially at kernel start up,
* and later on, NOP to branch to ftrace_caller() when enabled or branch to
* NOP when disabled per-function base.
*/
-ENTRY(_mcount)
+SYM_FUNC_START(_mcount)
ret
-ENDPROC(_mcount)
+SYM_FUNC_END(_mcount)
EXPORT_SYMBOL(_mcount)
NOKPROBE(_mcount)
@@ -268,25 +273,24 @@ NOKPROBE(_mcount)
* - tracer function to probe instrumented function's entry,
* - ftrace_graph_caller to set up an exit hook
*/
-ENTRY(ftrace_caller)
+SYM_FUNC_START(ftrace_caller)
mcount_enter
mcount_get_pc0 x0 // function's pc
mcount_get_lr x1 // function's lr
-GLOBAL(ftrace_call) // tracer(pc, lr);
+SYM_INNER_LABEL(ftrace_call, SYM_L_GLOBAL) // tracer(pc, lr);
nop // This will be replaced with "bl xxx"
// where xxx can be any kind of tracer.
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-GLOBAL(ftrace_graph_call) // ftrace_graph_caller();
+SYM_INNER_LABEL(ftrace_graph_call, SYM_L_GLOBAL) // ftrace_graph_caller();
nop // If enabled, this will be replaced
// "b ftrace_graph_caller"
#endif
mcount_exit
-ENDPROC(ftrace_caller)
-#endif /* CONFIG_DYNAMIC_FTRACE */
+SYM_FUNC_END(ftrace_caller)
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
/*
@@ -298,47 +302,52 @@ ENDPROC(ftrace_caller)
* the call stack in order to intercept instrumented function's return path
* and run return_to_handler() later on its exit.
*/
-ENTRY(ftrace_graph_caller)
+SYM_FUNC_START(ftrace_graph_caller)
mcount_get_pc x0 // function's pc
mcount_get_lr_addr x1 // pointer to function's saved lr
mcount_get_parent_fp x2 // parent's fp
bl prepare_ftrace_return // prepare_ftrace_return(pc, &lr, fp)
mcount_exit
-ENDPROC(ftrace_graph_caller)
+SYM_FUNC_END(ftrace_graph_caller)
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
-#endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */
+#endif /* CONFIG_DYNAMIC_FTRACE_WITH_ARGS */
-ENTRY(ftrace_stub)
+SYM_TYPED_FUNC_START(ftrace_stub)
ret
-ENDPROC(ftrace_stub)
+SYM_FUNC_END(ftrace_stub)
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+SYM_TYPED_FUNC_START(ftrace_stub_graph)
+ ret
+SYM_FUNC_END(ftrace_stub_graph)
+
/*
* void return_to_handler(void)
*
* Run ftrace_return_to_handler() before going back to parent.
* @fp is checked against the value passed by ftrace_graph_caller().
*/
-ENTRY(return_to_handler)
+SYM_CODE_START(return_to_handler)
/* save return value regs */
- sub sp, sp, #64
- stp x0, x1, [sp]
- stp x2, x3, [sp, #16]
- stp x4, x5, [sp, #32]
- stp x6, x7, [sp, #48]
+ sub sp, sp, #FGRET_REGS_SIZE
+ stp x0, x1, [sp, #FGRET_REGS_X0]
+ stp x2, x3, [sp, #FGRET_REGS_X2]
+ stp x4, x5, [sp, #FGRET_REGS_X4]
+ stp x6, x7, [sp, #FGRET_REGS_X6]
+ str x29, [sp, #FGRET_REGS_FP] // parent's fp
- mov x0, x29 // parent's fp
- bl ftrace_return_to_handler// addr = ftrace_return_to_hander(fp);
- mov x30, x0 // restore the original return address
+ mov x0, sp
+ bl ftrace_return_to_handler // addr = ftrace_return_to_hander(regs);
+ mov x30, x0 // restore the original return address
/* restore return value regs */
- ldp x0, x1, [sp]
- ldp x2, x3, [sp, #16]
- ldp x4, x5, [sp, #32]
- ldp x6, x7, [sp, #48]
- add sp, sp, #64
+ ldp x0, x1, [sp, #FGRET_REGS_X0]
+ ldp x2, x3, [sp, #FGRET_REGS_X2]
+ ldp x4, x5, [sp, #FGRET_REGS_X4]
+ ldp x6, x7, [sp, #FGRET_REGS_X6]
+ add sp, sp, #FGRET_REGS_SIZE
ret
-END(return_to_handler)
+SYM_CODE_END(return_to_handler)
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 7c6a0a41676f..7ef0e127b149 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -14,6 +14,8 @@
#include <asm/alternative.h>
#include <asm/assembler.h>
#include <asm/asm-offsets.h>
+#include <asm/asm_pointer_auth.h>
+#include <asm/bug.h>
#include <asm/cpufeature.h>
#include <asm/errno.h>
#include <asm/esr.h>
@@ -22,57 +24,36 @@
#include <asm/mmu.h>
#include <asm/processor.h>
#include <asm/ptrace.h>
+#include <asm/scs.h>
#include <asm/thread_info.h>
#include <asm/asm-uaccess.h>
#include <asm/unistd.h>
-/*
- * Context tracking subsystem. Used to instrument transitions
- * between user and kernel mode.
- */
- .macro ct_user_exit_irqoff
-#ifdef CONFIG_CONTEXT_TRACKING
- bl enter_from_user_mode
-#endif
- .endm
-
- .macro ct_user_enter
-#ifdef CONFIG_CONTEXT_TRACKING
- bl context_tracking_user_enter
-#endif
- .endm
-
.macro clear_gp_regs
.irp n,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29
mov x\n, xzr
.endr
.endm
-/*
- * Bad Abort numbers
- *-----------------
- */
-#define BAD_SYNC 0
-#define BAD_IRQ 1
-#define BAD_FIQ 2
-#define BAD_ERROR 3
-
- .macro kernel_ventry, el, label, regsize = 64
+ .macro kernel_ventry, el:req, ht:req, regsize:req, label:req
.align 7
-#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
-alternative_if ARM64_UNMAP_KERNEL_AT_EL0
+.Lventry_start\@:
.if \el == 0
+ /*
+ * This must be the first instruction of the EL0 vector entries. It is
+ * skipped by the trampoline vectors, to trigger the cleanup.
+ */
+ b .Lskip_tramp_vectors_cleanup\@
.if \regsize == 64
mrs x30, tpidrro_el0
msr tpidrro_el0, xzr
.else
mov x30, xzr
.endif
+.Lskip_tramp_vectors_cleanup\@:
.endif
-alternative_else_nop_endif
-#endif
- sub sp, sp, #S_FRAME_SIZE
+ sub sp, sp, #PT_REGS_SIZE
#ifdef CONFIG_VMAP_STACK
/*
* Test whether the SP has overflowed, without corrupting a GPR.
@@ -84,7 +65,7 @@ alternative_else_nop_endif
tbnz x0, #THREAD_SHIFT, 0f
sub x0, sp, x0 // x0'' = sp' - x0' = (sp + x0) - sp = x0
sub sp, sp, x0 // sp'' = sp' - x0 = (sp + x0) - x0 = sp
- b el\()\el\()_\label
+ b el\el\ht\()_\regsize\()_\label
0:
/*
@@ -93,7 +74,7 @@ alternative_else_nop_endif
* userspace, and can clobber EL0 registers to free up GPRs.
*/
- /* Stash the original SP (minus S_FRAME_SIZE) in tpidr_el0. */
+ /* Stash the original SP (minus PT_REGS_SIZE) in tpidr_el0. */
msr tpidr_el0, x0
/* Recover the original x0 value and stash it in tpidrro_el0 */
@@ -116,20 +97,24 @@ alternative_else_nop_endif
sub sp, sp, x0
mrs x0, tpidrro_el0
#endif
- b el\()\el\()_\label
+ b el\el\ht\()_\regsize\()_\label
+.org .Lventry_start\@ + 128 // Did we overflow the ventry slot?
.endm
- .macro tramp_alias, dst, sym
- mov_q \dst, TRAMP_VALIAS
- add \dst, \dst, #(\sym - .entry.tramp.text)
+ .macro tramp_alias, dst, sym
+ .set .Lalias\@, TRAMP_VALIAS + \sym - .entry.tramp.text
+ movz \dst, :abs_g2_s:.Lalias\@
+ movk \dst, :abs_g1_nc:.Lalias\@
+ movk \dst, :abs_g0_nc:.Lalias\@
.endm
- // This macro corrupts x0-x3. It is the caller's duty
- // to save/restore them if required.
+ /*
+ * This macro corrupts x0-x3. It is the caller's duty to save/restore
+ * them if required.
+ */
.macro apply_ssbd, state, tmp1, tmp2
-#ifdef CONFIG_ARM64_SSBD
-alternative_cb arm64_enable_wa2_handling
- b .L__asm_ssbd_skip\@
+alternative_cb ARM64_ALWAYS_SYSTEM, spectre_v4_patch_fw_mitigation_enable
+ b .L__asm_ssbd_skip\@ // Patched to NOP
alternative_cb_end
ldr_this_cpu \tmp2, arm64_ssbd_callback_required, \tmp1
cbz \tmp2, .L__asm_ssbd_skip\@
@@ -137,14 +122,83 @@ alternative_cb_end
tbnz \tmp2, #TIF_SSBD, .L__asm_ssbd_skip\@
mov w0, #ARM_SMCCC_ARCH_WORKAROUND_2
mov w1, #\state
-alternative_cb arm64_update_smccc_conduit
+alternative_cb ARM64_ALWAYS_SYSTEM, smccc_patch_fw_mitigation_conduit
nop // Patched to SMC/HVC #0
alternative_cb_end
.L__asm_ssbd_skip\@:
+ .endm
+
+ /* Check for MTE asynchronous tag check faults */
+ .macro check_mte_async_tcf, tmp, ti_flags, thread_sctlr
+#ifdef CONFIG_ARM64_MTE
+ .arch_extension lse
+alternative_if_not ARM64_MTE
+ b 1f
+alternative_else_nop_endif
+ /*
+ * Asynchronous tag check faults are only possible in ASYNC (2) or
+ * ASYM (3) modes. In each of these modes bit 1 of SCTLR_EL1.TCF0 is
+ * set, so skip the check if it is unset.
+ */
+ tbz \thread_sctlr, #(SCTLR_EL1_TCF0_SHIFT + 1), 1f
+ mrs_s \tmp, SYS_TFSRE0_EL1
+ tbz \tmp, #SYS_TFSR_EL1_TF0_SHIFT, 1f
+ /* Asynchronous TCF occurred for TTBR0 access, set the TI flag */
+ mov \tmp, #_TIF_MTE_ASYNC_FAULT
+ add \ti_flags, tsk, #TSK_TI_FLAGS
+ stset \tmp, [\ti_flags]
+1:
+#endif
+ .endm
+
+ /* Clear the MTE asynchronous tag check faults */
+ .macro clear_mte_async_tcf thread_sctlr
+#ifdef CONFIG_ARM64_MTE
+alternative_if ARM64_MTE
+ /* See comment in check_mte_async_tcf above. */
+ tbz \thread_sctlr, #(SCTLR_EL1_TCF0_SHIFT + 1), 1f
+ dsb ish
+ msr_s SYS_TFSRE0_EL1, xzr
+1:
+alternative_else_nop_endif
+#endif
+ .endm
+
+ .macro mte_set_gcr, mte_ctrl, tmp
+#ifdef CONFIG_ARM64_MTE
+ ubfx \tmp, \mte_ctrl, #MTE_CTRL_GCR_USER_EXCL_SHIFT, #16
+ orr \tmp, \tmp, #SYS_GCR_EL1_RRND
+ msr_s SYS_GCR_EL1, \tmp
+#endif
+ .endm
+
+ .macro mte_set_kernel_gcr, tmp, tmp2
+#ifdef CONFIG_KASAN_HW_TAGS
+alternative_cb ARM64_ALWAYS_SYSTEM, kasan_hw_tags_enable
+ b 1f
+alternative_cb_end
+ mov \tmp, KERNEL_GCR_EL1
+ msr_s SYS_GCR_EL1, \tmp
+1:
+#endif
+ .endm
+
+ .macro mte_set_user_gcr, tsk, tmp, tmp2
+#ifdef CONFIG_KASAN_HW_TAGS
+alternative_cb ARM64_ALWAYS_SYSTEM, kasan_hw_tags_enable
+ b 1f
+alternative_cb_end
+ ldr \tmp, [\tsk, #THREAD_MTE_CTRL]
+
+ mte_set_gcr \tmp, \tmp2
+1:
#endif
.endm
.macro kernel_entry, el, regsize = 64
+ .if \el == 0
+ alternative_insn nop, SET_PSTATE_DIT(1), ARM64_HAS_DIT
+ .endif
.if \regsize == 32
mov w0, w0 // zero upper 32 bits of x0
.endif
@@ -167,30 +221,72 @@ alternative_cb_end
.if \el == 0
clear_gp_regs
mrs x21, sp_el0
- ldr_this_cpu tsk, __entry_task, x20 // Ensure MDSCR_EL1.SS is clear,
- ldr x19, [tsk, #TSK_TI_FLAGS] // since we can unmask debug
- disable_step_tsk x19, x20 // exceptions when scheduling.
+ ldr_this_cpu tsk, __entry_task, x20
+ msr sp_el0, tsk
+
+ /*
+ * Ensure MDSCR_EL1.SS is clear, since we can unmask debug exceptions
+ * when scheduling.
+ */
+ ldr x19, [tsk, #TSK_TI_FLAGS]
+ disable_step_tsk x19, x20
+
+ /* Check for asynchronous tag check faults in user space */
+ ldr x0, [tsk, THREAD_SCTLR_USER]
+ check_mte_async_tcf x22, x23, x0
+
+#ifdef CONFIG_ARM64_PTR_AUTH
+alternative_if ARM64_HAS_ADDRESS_AUTH
+ /*
+ * Enable IA for in-kernel PAC if the task had it disabled. Although
+ * this could be implemented with an unconditional MRS which would avoid
+ * a load, this was measured to be slower on Cortex-A75 and Cortex-A76.
+ *
+ * Install the kernel IA key only if IA was enabled in the task. If IA
+ * was disabled on kernel exit then we would have left the kernel IA
+ * installed so there is no need to install it again.
+ */
+ tbz x0, SCTLR_ELx_ENIA_SHIFT, 1f
+ __ptrauth_keys_install_kernel_nosync tsk, x20, x22, x23
+ b 2f
+1:
+ mrs x0, sctlr_el1
+ orr x0, x0, SCTLR_ELx_ENIA
+ msr sctlr_el1, x0
+2:
+alternative_else_nop_endif
+#endif
apply_ssbd 1, x22, x23
+ mte_set_kernel_gcr x22, x23
+
+ /*
+ * Any non-self-synchronizing system register updates required for
+ * kernel entry should be placed before this point.
+ */
+alternative_if ARM64_MTE
+ isb
+ b 1f
+alternative_else_nop_endif
+alternative_if ARM64_HAS_ADDRESS_AUTH
+ isb
+alternative_else_nop_endif
+1:
+
+ scs_load_current
.else
- add x21, sp, #S_FRAME_SIZE
+ add x21, sp, #PT_REGS_SIZE
get_current_task tsk
- /* Save the task's original addr_limit and set USER_DS */
- ldr x20, [tsk, #TSK_TI_ADDR_LIMIT]
- str x20, [sp, #S_ORIG_ADDR_LIMIT]
- mov x20, #USER_DS
- str x20, [tsk, #TSK_TI_ADDR_LIMIT]
- /* No need to reset PSTATE.UAO, hardware's already set it to 0 for us */
.endif /* \el == 0 */
mrs x22, elr_el1
mrs x23, spsr_el1
stp lr, x21, [sp, #S_LR]
/*
- * In order to be able to dump the contents of struct pt_regs at the
- * time the exception was taken (in case we attempt to walk the call
- * stack later), chain it together with the stack frames.
+ * For exceptions from EL0, create a final frame record.
+ * For exceptions from EL1, create a synthetic frame record so the
+ * interrupted code shows up in the backtrace.
*/
.if \el == 0
stp xzr, xzr, [sp, #S_STACKFRAME]
@@ -200,28 +296,9 @@ alternative_cb_end
add x29, sp, #S_STACKFRAME
#ifdef CONFIG_ARM64_SW_TTBR0_PAN
- /*
- * Set the TTBR0 PAN bit in SPSR. When the exception is taken from
- * EL0, there is no need to check the state of TTBR0_EL1 since
- * accesses are always enabled.
- * Note that the meaning of this bit differs from the ARMv8.1 PAN
- * feature as all TTBR0_EL1 accesses are disabled, not just those to
- * user mappings.
- */
-alternative_if ARM64_HAS_PAN
- b 1f // skip TTBR0 PAN
+alternative_if_not ARM64_HAS_PAN
+ bl __swpan_entry_el\el
alternative_else_nop_endif
-
- .if \el != 0
- mrs x21, ttbr0_el1
- tst x21, #TTBR_ASID_MASK // Check for the reserved ASID
- orr x23, x23, #PSR_PAN_BIT // Set the emulated PAN in the saved SPSR
- b.eq 1f // TTBR0 access already disabled
- and x23, x23, #~PSR_PAN_BIT // Clear the emulated PAN in the saved SPSR
- .endif
-
- __uaccess_ttbr0_disable x21
-1:
#endif
stp x22, x23, [sp, #S_PC]
@@ -232,18 +309,18 @@ alternative_else_nop_endif
str w21, [sp, #S_SYSCALLNO]
.endif
- /*
- * Set sp_el0 to current thread_info.
- */
- .if \el == 0
- msr sp_el0, tsk
- .endif
+#ifdef CONFIG_ARM64_PSEUDO_NMI
+alternative_if_not ARM64_HAS_GIC_PRIO_MASKING
+ b .Lskip_pmr_save\@
+alternative_else_nop_endif
- /* Save pmr */
-alternative_if ARM64_HAS_IRQ_PRIO_MASKING
mrs_s x20, SYS_ICC_PMR_EL1
str x20, [sp, #S_PMR_SAVE]
-alternative_else_nop_endif
+ mov x20, #GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET
+ msr_s SYS_ICC_PMR_EL1, x20
+
+.Lskip_pmr_save\@:
+#endif
/*
* Registers that may be useful after this macro is invoked:
@@ -258,58 +335,30 @@ alternative_else_nop_endif
.macro kernel_exit, el
.if \el != 0
disable_daif
-
- /* Restore the task's original addr_limit. */
- ldr x20, [sp, #S_ORIG_ADDR_LIMIT]
- str x20, [tsk, #TSK_TI_ADDR_LIMIT]
-
- /* No need to restore UAO, it will be restored from SPSR_EL1 */
.endif
- /* Restore pmr */
-alternative_if ARM64_HAS_IRQ_PRIO_MASKING
+#ifdef CONFIG_ARM64_PSEUDO_NMI
+alternative_if_not ARM64_HAS_GIC_PRIO_MASKING
+ b .Lskip_pmr_restore\@
+alternative_else_nop_endif
+
ldr x20, [sp, #S_PMR_SAVE]
msr_s SYS_ICC_PMR_EL1, x20
- mrs_s x21, SYS_ICC_CTLR_EL1
- tbz x21, #6, .L__skip_pmr_sync\@ // Check for ICC_CTLR_EL1.PMHE
- dsb sy // Ensure priority change is seen by redistributor
-.L__skip_pmr_sync\@:
+
+ /* Ensure priority change is seen by redistributor */
+alternative_if_not ARM64_HAS_GIC_PRIO_RELAXED_SYNC
+ dsb sy
alternative_else_nop_endif
+.Lskip_pmr_restore\@:
+#endif
+
ldp x21, x22, [sp, #S_PC] // load ELR, SPSR
- .if \el == 0
- ct_user_enter
- .endif
#ifdef CONFIG_ARM64_SW_TTBR0_PAN
- /*
- * Restore access to TTBR0_EL1. If returning to EL0, no need for SPSR
- * PAN bit checking.
- */
-alternative_if ARM64_HAS_PAN
- b 2f // skip TTBR0 PAN
+alternative_if_not ARM64_HAS_PAN
+ bl __swpan_exit_el\el
alternative_else_nop_endif
-
- .if \el != 0
- tbnz x22, #22, 1f // Skip re-enabling TTBR0 access if the PSR_PAN_BIT is set
- .endif
-
- __uaccess_ttbr0_enable x0, x1
-
- .if \el == 0
- /*
- * Enable errata workarounds only if returning to user. The only
- * workaround currently required for TTBR0_EL1 changes are for the
- * Cavium erratum 27456 (broadcast TLBI instructions may cause I-cache
- * corruption).
- */
- bl post_ttbr_update_workaround
- .endif
-1:
- .if \el != 0
- and x22, x22, #~PSR_PAN_BIT // ARMv8.0 CPUs do not understand this bit
- .endif
-2:
#endif
.if \el == 0
@@ -329,21 +378,34 @@ alternative_if ARM64_WORKAROUND_845719
alternative_else_nop_endif
#endif
3:
-#ifdef CONFIG_ARM64_ERRATUM_1418040
-alternative_if_not ARM64_WORKAROUND_1418040
- b 4f
-alternative_else_nop_endif
+ scs_save tsk
+
+ /* Ignore asynchronous tag check faults in the uaccess routines */
+ ldr x0, [tsk, THREAD_SCTLR_USER]
+ clear_mte_async_tcf x0
+
+#ifdef CONFIG_ARM64_PTR_AUTH
+alternative_if ARM64_HAS_ADDRESS_AUTH
/*
- * if (x22.mode32 == cntkctl_el1.el0vcten)
- * cntkctl_el1.el0vcten = ~cntkctl_el1.el0vcten
+ * IA was enabled for in-kernel PAC. Disable it now if needed, or
+ * alternatively install the user's IA. All other per-task keys and
+ * SCTLR bits were updated on task switch.
+ *
+ * No kernel C function calls after this.
*/
- mrs x1, cntkctl_el1
- eon x0, x1, x22, lsr #3
- tbz x0, #1, 4f
- eor x1, x1, #2 // ARCH_TIMER_USR_VCT_ACCESS_EN
- msr cntkctl_el1, x1
-4:
+ tbz x0, SCTLR_ELx_ENIA_SHIFT, 1f
+ __ptrauth_keys_install_user tsk, x0, x1, x2
+ b 2f
+1:
+ mrs x0, sctlr_el1
+ bic x0, x0, SCTLR_ELx_ENIA
+ msr sctlr_el1, x0
+2:
+alternative_else_nop_endif
#endif
+
+ mte_set_user_gcr tsk, x0, x1
+
apply_ssbd 0, x0, x1
.endif
@@ -364,101 +426,87 @@ alternative_else_nop_endif
ldp x24, x25, [sp, #16 * 12]
ldp x26, x27, [sp, #16 * 13]
ldp x28, x29, [sp, #16 * 14]
- ldr lr, [sp, #S_LR]
- add sp, sp, #S_FRAME_SIZE // restore sp
.if \el == 0
-alternative_insn eret, nop, ARM64_UNMAP_KERNEL_AT_EL0
#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
- bne 5f
- msr far_el1, x30
- tramp_alias x30, tramp_exit_native
- br x30
-5:
- tramp_alias x30, tramp_exit_compat
- br x30
+ alternative_insn "b .L_skip_tramp_exit_\@", nop, ARM64_UNMAP_KERNEL_AT_EL0
+
+ msr far_el1, x29
+
+ ldr_this_cpu x30, this_cpu_vector, x29
+ tramp_alias x29, tramp_exit
+ msr vbar_el1, x30 // install vector table
+ ldr lr, [sp, #S_LR] // restore x30
+ add sp, sp, #PT_REGS_SIZE // restore sp
+ br x29
+
+.L_skip_tramp_exit_\@:
#endif
+ .endif
+
+ ldr lr, [sp, #S_LR]
+ add sp, sp, #PT_REGS_SIZE // restore sp
+
+ .if \el == 0
+ /* This must be after the last explicit memory access */
+alternative_if ARM64_WORKAROUND_SPECULATIVE_UNPRIV_LOAD
+ tlbi vale1, xzr
+ dsb nsh
+alternative_else_nop_endif
.else
- eret
+ /* Ensure any device/NC reads complete */
+ alternative_insn nop, "dmb sy", ARM64_WORKAROUND_1508412
.endif
+
+ eret
sb
.endm
- .macro irq_stack_entry
- mov x19, sp // preserve the original sp
-
+#ifdef CONFIG_ARM64_SW_TTBR0_PAN
/*
- * Compare sp with the base of the task stack.
- * If the top ~(THREAD_SIZE - 1) bits match, we are on a task stack,
- * and should switch to the irq stack.
+ * Set the TTBR0 PAN bit in SPSR. When the exception is taken from
+ * EL0, there is no need to check the state of TTBR0_EL1 since
+ * accesses are always enabled.
+ * Note that the meaning of this bit differs from the ARMv8.1 PAN
+ * feature as all TTBR0_EL1 accesses are disabled, not just those to
+ * user mappings.
*/
- ldr x25, [tsk, TSK_STACK]
- eor x25, x25, x19
- and x25, x25, #~(THREAD_SIZE - 1)
- cbnz x25, 9998f
-
- ldr_this_cpu x25, irq_stack_ptr, x26
- mov x26, #IRQ_STACK_SIZE
- add x26, x25, x26
-
- /* switch to the irq stack */
- mov sp, x26
-9998:
- .endm
+SYM_CODE_START_LOCAL(__swpan_entry_el1)
+ mrs x21, ttbr0_el1
+ tst x21, #TTBR_ASID_MASK // Check for the reserved ASID
+ orr x23, x23, #PSR_PAN_BIT // Set the emulated PAN in the saved SPSR
+ b.eq 1f // TTBR0 access already disabled
+ and x23, x23, #~PSR_PAN_BIT // Clear the emulated PAN in the saved SPSR
+SYM_INNER_LABEL(__swpan_entry_el0, SYM_L_LOCAL)
+ __uaccess_ttbr0_disable x21
+1: ret
+SYM_CODE_END(__swpan_entry_el1)
/*
- * x19 should be preserved between irq_stack_entry and
- * irq_stack_exit.
+ * Restore access to TTBR0_EL1. If returning to EL0, no need for SPSR
+ * PAN bit checking.
*/
- .macro irq_stack_exit
- mov sp, x19
- .endm
-
-/* GPRs used by entry code */
-tsk .req x28 // current thread_info
-
-/*
- * Interrupt handling.
- */
- .macro irq_handler
- ldr_l x1, handle_arch_irq
- mov x0, sp
- irq_stack_entry
- blr x1
- irq_stack_exit
- .endm
+SYM_CODE_START_LOCAL(__swpan_exit_el1)
+ tbnz x22, #22, 1f // Skip re-enabling TTBR0 access if the PSR_PAN_BIT is set
+ __uaccess_ttbr0_enable x0, x1
+1: and x22, x22, #~PSR_PAN_BIT // ARMv8.0 CPUs do not understand this bit
+ ret
+SYM_CODE_END(__swpan_exit_el1)
-#ifdef CONFIG_ARM64_PSEUDO_NMI
+SYM_CODE_START_LOCAL(__swpan_exit_el0)
+ __uaccess_ttbr0_enable x0, x1
/*
- * Set res to 0 if irqs were unmasked in interrupted context.
- * Otherwise set res to non-0 value.
+ * Enable errata workarounds only if returning to user. The only
+ * workaround currently required for TTBR0_EL1 changes are for the
+ * Cavium erratum 27456 (broadcast TLBI instructions may cause I-cache
+ * corruption).
*/
- .macro test_irqs_unmasked res:req, pmr:req
-alternative_if ARM64_HAS_IRQ_PRIO_MASKING
- sub \res, \pmr, #GIC_PRIO_IRQON
-alternative_else
- mov \res, xzr
-alternative_endif
- .endm
-#endif
-
- .macro gic_prio_kentry_setup, tmp:req
-#ifdef CONFIG_ARM64_PSEUDO_NMI
- alternative_if ARM64_HAS_IRQ_PRIO_MASKING
- mov \tmp, #(GIC_PRIO_PSR_I_SET | GIC_PRIO_IRQON)
- msr_s SYS_ICC_PMR_EL1, \tmp
- alternative_else_nop_endif
+ b post_ttbr_update_workaround
+SYM_CODE_END(__swpan_exit_el0)
#endif
- .endm
- .macro gic_prio_irq_setup, pmr:req, tmp:req
-#ifdef CONFIG_ARM64_PSEUDO_NMI
- alternative_if ARM64_HAS_IRQ_PRIO_MASKING
- orr \tmp, \pmr, #GIC_PRIO_PSR_I_SET
- msr_s SYS_ICC_PMR_EL1, \tmp
- alternative_else_nop_endif
-#endif
- .endm
+/* GPRs used by entry code */
+tsk .req x28 // current thread_info
.text
@@ -468,53 +516,47 @@ alternative_endif
.pushsection ".entry.text", "ax"
.align 11
-ENTRY(vectors)
- kernel_ventry 1, sync_invalid // Synchronous EL1t
- kernel_ventry 1, irq_invalid // IRQ EL1t
- kernel_ventry 1, fiq_invalid // FIQ EL1t
- kernel_ventry 1, error_invalid // Error EL1t
-
- kernel_ventry 1, sync // Synchronous EL1h
- kernel_ventry 1, irq // IRQ EL1h
- kernel_ventry 1, fiq_invalid // FIQ EL1h
- kernel_ventry 1, error // Error EL1h
-
- kernel_ventry 0, sync // Synchronous 64-bit EL0
- kernel_ventry 0, irq // IRQ 64-bit EL0
- kernel_ventry 0, fiq_invalid // FIQ 64-bit EL0
- kernel_ventry 0, error // Error 64-bit EL0
-
-#ifdef CONFIG_COMPAT
- kernel_ventry 0, sync_compat, 32 // Synchronous 32-bit EL0
- kernel_ventry 0, irq_compat, 32 // IRQ 32-bit EL0
- kernel_ventry 0, fiq_invalid_compat, 32 // FIQ 32-bit EL0
- kernel_ventry 0, error_compat, 32 // Error 32-bit EL0
-#else
- kernel_ventry 0, sync_invalid, 32 // Synchronous 32-bit EL0
- kernel_ventry 0, irq_invalid, 32 // IRQ 32-bit EL0
- kernel_ventry 0, fiq_invalid, 32 // FIQ 32-bit EL0
- kernel_ventry 0, error_invalid, 32 // Error 32-bit EL0
-#endif
-END(vectors)
+SYM_CODE_START(vectors)
+ kernel_ventry 1, t, 64, sync // Synchronous EL1t
+ kernel_ventry 1, t, 64, irq // IRQ EL1t
+ kernel_ventry 1, t, 64, fiq // FIQ EL1t
+ kernel_ventry 1, t, 64, error // Error EL1t
+
+ kernel_ventry 1, h, 64, sync // Synchronous EL1h
+ kernel_ventry 1, h, 64, irq // IRQ EL1h
+ kernel_ventry 1, h, 64, fiq // FIQ EL1h
+ kernel_ventry 1, h, 64, error // Error EL1h
+
+ kernel_ventry 0, t, 64, sync // Synchronous 64-bit EL0
+ kernel_ventry 0, t, 64, irq // IRQ 64-bit EL0
+ kernel_ventry 0, t, 64, fiq // FIQ 64-bit EL0
+ kernel_ventry 0, t, 64, error // Error 64-bit EL0
+
+ kernel_ventry 0, t, 32, sync // Synchronous 32-bit EL0
+ kernel_ventry 0, t, 32, irq // IRQ 32-bit EL0
+ kernel_ventry 0, t, 32, fiq // FIQ 32-bit EL0
+ kernel_ventry 0, t, 32, error // Error 32-bit EL0
+SYM_CODE_END(vectors)
#ifdef CONFIG_VMAP_STACK
+SYM_CODE_START_LOCAL(__bad_stack)
/*
* We detected an overflow in kernel_ventry, which switched to the
* overflow stack. Stash the exception regs, and head to our overflow
* handler.
*/
-__bad_stack:
+
/* Restore the original x0 value */
mrs x0, tpidrro_el0
/*
* Store the original GPRs to the new stack. The orginal SP (minus
- * S_FRAME_SIZE) was stashed in tpidr_el0 by kernel_ventry.
+ * PT_REGS_SIZE) was stashed in tpidr_el0 by kernel_ventry.
*/
- sub sp, sp, #S_FRAME_SIZE
+ sub sp, sp, #PT_REGS_SIZE
kernel_entry 1
mrs x0, tpidr_el0
- add x0, x0, #S_FRAME_SIZE
+ add x0, x0, #PT_REGS_SIZE
str x0, [sp, #S_SP]
/* Stash the regs for handle_bad_stack */
@@ -523,244 +565,65 @@ __bad_stack:
/* Time to die */
bl handle_bad_stack
ASM_BUG()
+SYM_CODE_END(__bad_stack)
#endif /* CONFIG_VMAP_STACK */
-/*
- * Invalid mode handlers
- */
- .macro inv_entry, el, reason, regsize = 64
+
+ .macro entry_handler el:req, ht:req, regsize:req, label:req
+SYM_CODE_START_LOCAL(el\el\ht\()_\regsize\()_\label)
kernel_entry \el, \regsize
mov x0, sp
- mov x1, #\reason
- mrs x2, esr_el1
- bl bad_mode
- ASM_BUG()
+ bl el\el\ht\()_\regsize\()_\label\()_handler
+ .if \el == 0
+ b ret_to_user
+ .else
+ b ret_to_kernel
+ .endif
+SYM_CODE_END(el\el\ht\()_\regsize\()_\label)
.endm
-el0_sync_invalid:
- inv_entry 0, BAD_SYNC
-ENDPROC(el0_sync_invalid)
-
-el0_irq_invalid:
- inv_entry 0, BAD_IRQ
-ENDPROC(el0_irq_invalid)
-
-el0_fiq_invalid:
- inv_entry 0, BAD_FIQ
-ENDPROC(el0_fiq_invalid)
-
-el0_error_invalid:
- inv_entry 0, BAD_ERROR
-ENDPROC(el0_error_invalid)
-
-#ifdef CONFIG_COMPAT
-el0_fiq_invalid_compat:
- inv_entry 0, BAD_FIQ, 32
-ENDPROC(el0_fiq_invalid_compat)
-#endif
-
-el1_sync_invalid:
- inv_entry 1, BAD_SYNC
-ENDPROC(el1_sync_invalid)
-
-el1_irq_invalid:
- inv_entry 1, BAD_IRQ
-ENDPROC(el1_irq_invalid)
-
-el1_fiq_invalid:
- inv_entry 1, BAD_FIQ
-ENDPROC(el1_fiq_invalid)
-
-el1_error_invalid:
- inv_entry 1, BAD_ERROR
-ENDPROC(el1_error_invalid)
-
/*
- * EL1 mode handlers.
+ * Early exception handlers
*/
- .align 6
-el1_sync:
- kernel_entry 1
- mov x0, sp
- bl el1_sync_handler
+ entry_handler 1, t, 64, sync
+ entry_handler 1, t, 64, irq
+ entry_handler 1, t, 64, fiq
+ entry_handler 1, t, 64, error
+
+ entry_handler 1, h, 64, sync
+ entry_handler 1, h, 64, irq
+ entry_handler 1, h, 64, fiq
+ entry_handler 1, h, 64, error
+
+ entry_handler 0, t, 64, sync
+ entry_handler 0, t, 64, irq
+ entry_handler 0, t, 64, fiq
+ entry_handler 0, t, 64, error
+
+ entry_handler 0, t, 32, sync
+ entry_handler 0, t, 32, irq
+ entry_handler 0, t, 32, fiq
+ entry_handler 0, t, 32, error
+
+SYM_CODE_START_LOCAL(ret_to_kernel)
kernel_exit 1
-ENDPROC(el1_sync)
-
- .align 6
-el1_irq:
- kernel_entry 1
- gic_prio_irq_setup pmr=x20, tmp=x1
- enable_da_f
-
-#ifdef CONFIG_ARM64_PSEUDO_NMI
- test_irqs_unmasked res=x0, pmr=x20
- cbz x0, 1f
- bl asm_nmi_enter
-1:
-#endif
-
-#ifdef CONFIG_TRACE_IRQFLAGS
- bl trace_hardirqs_off
-#endif
+SYM_CODE_END(ret_to_kernel)
- irq_handler
-
-#ifdef CONFIG_PREEMPT
- ldr x24, [tsk, #TSK_TI_PREEMPT] // get preempt count
-alternative_if ARM64_HAS_IRQ_PRIO_MASKING
- /*
- * DA_F were cleared at start of handling. If anything is set in DAIF,
- * we come back from an NMI, so skip preemption
- */
- mrs x0, daif
- orr x24, x24, x0
-alternative_else_nop_endif
- cbnz x24, 1f // preempt count != 0 || NMI return path
- bl arm64_preempt_schedule_irq // irq en/disable is done inside
-1:
-#endif
-
-#ifdef CONFIG_ARM64_PSEUDO_NMI
- /*
- * When using IRQ priority masking, we can get spurious interrupts while
- * PMR is set to GIC_PRIO_IRQOFF. An NMI might also have occurred in a
- * section with interrupts disabled. Skip tracing in those cases.
- */
- test_irqs_unmasked res=x0, pmr=x20
- cbz x0, 1f
- bl asm_nmi_exit
-1:
-#endif
-
-#ifdef CONFIG_TRACE_IRQFLAGS
-#ifdef CONFIG_ARM64_PSEUDO_NMI
- test_irqs_unmasked res=x0, pmr=x20
- cbnz x0, 1f
-#endif
- bl trace_hardirqs_on
-1:
-#endif
-
- kernel_exit 1
-ENDPROC(el1_irq)
-
-/*
- * EL0 mode handlers.
- */
- .align 6
-el0_sync:
- kernel_entry 0
- mov x0, sp
- bl el0_sync_handler
- b ret_to_user
-
-#ifdef CONFIG_COMPAT
- .align 6
-el0_sync_compat:
- kernel_entry 0, 32
- mov x0, sp
- bl el0_sync_compat_handler
- b ret_to_user
-ENDPROC(el0_sync)
-
- .align 6
-el0_irq_compat:
- kernel_entry 0, 32
- b el0_irq_naked
-
-el0_error_compat:
- kernel_entry 0, 32
- b el0_error_naked
-#endif
-
- .align 6
-el0_irq:
- kernel_entry 0
-el0_irq_naked:
- gic_prio_irq_setup pmr=x20, tmp=x0
- ct_user_exit_irqoff
- enable_da_f
-
-#ifdef CONFIG_TRACE_IRQFLAGS
- bl trace_hardirqs_off
-#endif
-
-#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR
- tbz x22, #55, 1f
- bl do_el0_irq_bp_hardening
-1:
-#endif
- irq_handler
-
-#ifdef CONFIG_TRACE_IRQFLAGS
- bl trace_hardirqs_on
-#endif
- b ret_to_user
-ENDPROC(el0_irq)
-
-el1_error:
- kernel_entry 1
- mrs x1, esr_el1
- gic_prio_kentry_setup tmp=x2
- enable_dbg
- mov x0, sp
- bl do_serror
- kernel_exit 1
-ENDPROC(el1_error)
-
-el0_error:
- kernel_entry 0
-el0_error_naked:
- mrs x25, esr_el1
- gic_prio_kentry_setup tmp=x2
- ct_user_exit_irqoff
- enable_dbg
- mov x0, sp
- mov x1, x25
- bl do_serror
- enable_da_f
- b ret_to_user
-ENDPROC(el0_error)
-
-/*
- * Ok, we need to do extra processing, enter the slow path.
- */
-work_pending:
- mov x0, sp // 'regs'
- bl do_notify_resume
-#ifdef CONFIG_TRACE_IRQFLAGS
- bl trace_hardirqs_on // enabled while in userspace
-#endif
- ldr x1, [tsk, #TSK_TI_FLAGS] // re-check for single-step
- b finish_ret_to_user
-/*
- * "slow" syscall return path.
- */
-ret_to_user:
- disable_daif
- gic_prio_kentry_setup tmp=x3
- ldr x1, [tsk, #TSK_TI_FLAGS]
- and x2, x1, #_TIF_WORK_MASK
- cbnz x2, work_pending
-finish_ret_to_user:
- enable_step_tsk x1, x2
+SYM_CODE_START_LOCAL(ret_to_user)
+ ldr x19, [tsk, #TSK_TI_FLAGS] // re-check for single-step
+ enable_step_tsk x19, x2
#ifdef CONFIG_GCC_PLUGIN_STACKLEAK
- bl stackleak_erase
+ bl stackleak_erase_on_task_stack
#endif
kernel_exit 0
-ENDPROC(ret_to_user)
+SYM_CODE_END(ret_to_user)
.popsection // .entry.text
-#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
-/*
- * Exception vectors trampoline.
- */
- .pushsection ".entry.tramp.text", "ax"
-
+ // Move from tramp_pg_dir to swapper_pg_dir
.macro tramp_map_kernel, tmp
mrs \tmp, ttbr1_el1
- add \tmp, \tmp, #(PAGE_SIZE + RESERVED_TTBR0_SIZE)
+ add \tmp, \tmp, #TRAMP_SWAPPER_OFFSET
bic \tmp, \tmp, #USER_ASID_FLAG
msr ttbr1_el1, \tmp
#ifdef CONFIG_QCOM_FALKOR_ERRATUM_1003
@@ -777,9 +640,10 @@ alternative_else_nop_endif
#endif /* CONFIG_QCOM_FALKOR_ERRATUM_1003 */
.endm
+ // Move from swapper_pg_dir to tramp_pg_dir
.macro tramp_unmap_kernel, tmp
mrs \tmp, ttbr1_el1
- sub \tmp, \tmp, #(PAGE_SIZE + RESERVED_TTBR0_SIZE)
+ sub \tmp, \tmp, #TRAMP_SWAPPER_OFFSET
orr \tmp, \tmp, #USER_ASID_FLAG
msr ttbr1_el1, \tmp
/*
@@ -789,12 +653,57 @@ alternative_else_nop_endif
*/
.endm
- .macro tramp_ventry, regsize = 64
+ .macro tramp_data_read_var dst, var
+#ifdef CONFIG_RELOCATABLE
+ ldr \dst, .L__tramp_data_\var
+ .ifndef .L__tramp_data_\var
+ .pushsection ".entry.tramp.rodata", "a", %progbits
+ .align 3
+.L__tramp_data_\var:
+ .quad \var
+ .popsection
+ .endif
+#else
+ /*
+ * As !RELOCATABLE implies !RANDOMIZE_BASE the address is always a
+ * compile time constant (and hence not secret and not worth hiding).
+ *
+ * As statically allocated kernel code and data always live in the top
+ * 47 bits of the address space we can sign-extend bit 47 and avoid an
+ * instruction to load the upper 16 bits (which must be 0xFFFF).
+ */
+ movz \dst, :abs_g2_s:\var
+ movk \dst, :abs_g1_nc:\var
+ movk \dst, :abs_g0_nc:\var
+#endif
+ .endm
+
+#define BHB_MITIGATION_NONE 0
+#define BHB_MITIGATION_LOOP 1
+#define BHB_MITIGATION_FW 2
+#define BHB_MITIGATION_INSN 3
+
+ .macro tramp_ventry, vector_start, regsize, kpti, bhb
.align 7
1:
.if \regsize == 64
msr tpidrro_el0, x30 // Restored in kernel_ventry
.endif
+
+ .if \bhb == BHB_MITIGATION_LOOP
+ /*
+ * This sequence must appear before the first indirect branch. i.e. the
+ * ret out of tramp_ventry. It appears here because x30 is free.
+ */
+ __mitigate_spectre_bhb_loop x30
+ .endif // \bhb == BHB_MITIGATION_LOOP
+
+ .if \bhb == BHB_MITIGATION_INSN
+ clearbhb
+ isb
+ .endif // \bhb == BHB_MITIGATION_INSN
+
+ .if \kpti == 1
/*
* Defend against branch aliasing attacks by pushing a dummy
* entry onto the return stack and using a RET instruction to
@@ -804,67 +713,106 @@ alternative_else_nop_endif
b .
2:
tramp_map_kernel x30
-#ifdef CONFIG_RANDOMIZE_BASE
- adr x30, tramp_vectors + PAGE_SIZE
alternative_insn isb, nop, ARM64_WORKAROUND_QCOM_FALKOR_E1003
- ldr x30, [x30]
-#else
- ldr x30, =vectors
-#endif
+ tramp_data_read_var x30, vectors
alternative_if_not ARM64_WORKAROUND_CAVIUM_TX2_219_PRFM
- prfm plil1strm, [x30, #(1b - tramp_vectors)]
+ prfm plil1strm, [x30, #(1b - \vector_start)]
alternative_else_nop_endif
+
msr vbar_el1, x30
- add x30, x30, #(1b - tramp_vectors)
isb
- ret
- .endm
+ .else
+ adr_l x30, vectors
+ .endif // \kpti == 1
- .macro tramp_exit, regsize = 64
- adr x30, tramp_vectors
- msr vbar_el1, x30
- tramp_unmap_kernel x30
- .if \regsize == 64
- mrs x30, far_el1
- .endif
- eret
- sb
+ .if \bhb == BHB_MITIGATION_FW
+ /*
+ * The firmware sequence must appear before the first indirect branch.
+ * i.e. the ret out of tramp_ventry. But it also needs the stack to be
+ * mapped to save/restore the registers the SMC clobbers.
+ */
+ __mitigate_spectre_bhb_fw
+ .endif // \bhb == BHB_MITIGATION_FW
+
+ add x30, x30, #(1b - \vector_start + 4)
+ ret
+.org 1b + 128 // Did we overflow the ventry slot?
.endm
- .align 11
-ENTRY(tramp_vectors)
+ .macro generate_tramp_vector, kpti, bhb
+.Lvector_start\@:
.space 0x400
- tramp_ventry
- tramp_ventry
- tramp_ventry
- tramp_ventry
+ .rept 4
+ tramp_ventry .Lvector_start\@, 64, \kpti, \bhb
+ .endr
+ .rept 4
+ tramp_ventry .Lvector_start\@, 32, \kpti, \bhb
+ .endr
+ .endm
- tramp_ventry 32
- tramp_ventry 32
- tramp_ventry 32
- tramp_ventry 32
-END(tramp_vectors)
+#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
+/*
+ * Exception vectors trampoline.
+ * The order must match __bp_harden_el1_vectors and the
+ * arm64_bp_harden_el1_vectors enum.
+ */
+ .pushsection ".entry.tramp.text", "ax"
+ .align 11
+SYM_CODE_START_LOCAL_NOALIGN(tramp_vectors)
+#ifdef CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY
+ generate_tramp_vector kpti=1, bhb=BHB_MITIGATION_LOOP
+ generate_tramp_vector kpti=1, bhb=BHB_MITIGATION_FW
+ generate_tramp_vector kpti=1, bhb=BHB_MITIGATION_INSN
+#endif /* CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY */
+ generate_tramp_vector kpti=1, bhb=BHB_MITIGATION_NONE
+SYM_CODE_END(tramp_vectors)
+
+SYM_CODE_START_LOCAL(tramp_exit)
+ tramp_unmap_kernel x29
+ mrs x29, far_el1 // restore x29
+ eret
+ sb
+SYM_CODE_END(tramp_exit)
+ .popsection // .entry.tramp.text
+#endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */
-ENTRY(tramp_exit_native)
- tramp_exit
-END(tramp_exit_native)
+/*
+ * Exception vectors for spectre mitigations on entry from EL1 when
+ * kpti is not in use.
+ */
+ .macro generate_el1_vector, bhb
+.Lvector_start\@:
+ kernel_ventry 1, t, 64, sync // Synchronous EL1t
+ kernel_ventry 1, t, 64, irq // IRQ EL1t
+ kernel_ventry 1, t, 64, fiq // FIQ EL1h
+ kernel_ventry 1, t, 64, error // Error EL1t
+
+ kernel_ventry 1, h, 64, sync // Synchronous EL1h
+ kernel_ventry 1, h, 64, irq // IRQ EL1h
+ kernel_ventry 1, h, 64, fiq // FIQ EL1h
+ kernel_ventry 1, h, 64, error // Error EL1h
+
+ .rept 4
+ tramp_ventry .Lvector_start\@, 64, 0, \bhb
+ .endr
+ .rept 4
+ tramp_ventry .Lvector_start\@, 32, 0, \bhb
+ .endr
+ .endm
-ENTRY(tramp_exit_compat)
- tramp_exit 32
-END(tramp_exit_compat)
+/* The order must match tramp_vecs and the arm64_bp_harden_el1_vectors enum. */
+ .pushsection ".entry.text", "ax"
+ .align 11
+SYM_CODE_START(__bp_harden_el1_vectors)
+#ifdef CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY
+ generate_el1_vector bhb=BHB_MITIGATION_LOOP
+ generate_el1_vector bhb=BHB_MITIGATION_FW
+ generate_el1_vector bhb=BHB_MITIGATION_INSN
+#endif /* CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY */
+SYM_CODE_END(__bp_harden_el1_vectors)
+ .popsection
- .ltorg
- .popsection // .entry.tramp.text
-#ifdef CONFIG_RANDOMIZE_BASE
- .pushsection ".rodata", "a"
- .align PAGE_SHIFT
- .globl __entry_tramp_data_start
-__entry_tramp_data_start:
- .quad vectors
- .popsection // .rodata
-#endif /* CONFIG_RANDOMIZE_BASE */
-#endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */
/*
* Register switch for AArch64. The callee-saved registers need to be saved
@@ -874,7 +822,7 @@ __entry_tramp_data_start:
* Previous and next are guaranteed not to be the same.
*
*/
-ENTRY(cpu_switch_to)
+SYM_FUNC_START(cpu_switch_to)
mov x10, #THREAD_CPU_CONTEXT
add x8, x0, x10
mov x9, sp
@@ -895,23 +843,62 @@ ENTRY(cpu_switch_to)
ldr lr, [x8]
mov sp, x9
msr sp_el0, x1
+ ptrauth_keys_install_kernel x1, x8, x9, x10
+ scs_save x0
+ scs_load_current
ret
-ENDPROC(cpu_switch_to)
+SYM_FUNC_END(cpu_switch_to)
NOKPROBE(cpu_switch_to)
/*
* This is how we return from a fork.
*/
-ENTRY(ret_from_fork)
+SYM_CODE_START(ret_from_fork)
bl schedule_tail
cbz x19, 1f // not a kernel thread
mov x0, x20
blr x19
1: get_current_task tsk
+ mov x0, sp
+ bl asm_exit_to_user_mode
b ret_to_user
-ENDPROC(ret_from_fork)
+SYM_CODE_END(ret_from_fork)
NOKPROBE(ret_from_fork)
+/*
+ * void call_on_irq_stack(struct pt_regs *regs,
+ * void (*func)(struct pt_regs *));
+ *
+ * Calls func(regs) using this CPU's irq stack and shadow irq stack.
+ */
+SYM_FUNC_START(call_on_irq_stack)
+#ifdef CONFIG_SHADOW_CALL_STACK
+ get_current_task x16
+ scs_save x16
+ ldr_this_cpu scs_sp, irq_shadow_call_stack_ptr, x17
+#endif
+
+ /* Create a frame record to save our LR and SP (implicit in FP) */
+ stp x29, x30, [sp, #-16]!
+ mov x29, sp
+
+ ldr_this_cpu x16, irq_stack_ptr, x17
+
+ /* Move to the new stack and call the function there */
+ add sp, x16, #IRQ_STACK_SIZE
+ blr x1
+
+ /*
+ * Restore the SP from the FP, and restore the FP and LR from the frame
+ * record.
+ */
+ mov sp, x29
+ ldp x29, x30, [sp], #16
+ scs_load_current
+ ret
+SYM_FUNC_END(call_on_irq_stack)
+NOKPROBE(call_on_irq_stack)
+
#ifdef CONFIG_ARM_SDE_INTERFACE
#include <asm/sdei.h>
@@ -936,9 +923,8 @@ NOKPROBE(ret_from_fork)
* This clobbers x4, __sdei_handler() will restore this from firmware's
* copy.
*/
-.ltorg
.pushsection ".entry.tramp.text", "ax"
-ENTRY(__sdei_asm_entry_trampoline)
+SYM_CODE_START(__sdei_asm_entry_trampoline)
mrs x4, ttbr1_el1
tbz x4, #USER_ASID_BIT, 1f
@@ -947,20 +933,12 @@ ENTRY(__sdei_asm_entry_trampoline)
mov x4, xzr
/*
- * Use reg->interrupted_regs.addr_limit to remember whether to unmap
- * the kernel on exit.
+ * Remember whether to unmap the kernel on exit.
*/
-1: str x4, [x1, #(SDEI_EVENT_INTREGS + S_ORIG_ADDR_LIMIT)]
-
-#ifdef CONFIG_RANDOMIZE_BASE
- adr x4, tramp_vectors + PAGE_SIZE
- add x4, x4, #:lo12:__sdei_asm_trampoline_next_handler
- ldr x4, [x4]
-#else
- ldr x4, =__sdei_asm_handler
-#endif
+1: str x4, [x1, #(SDEI_EVENT_INTREGS + S_SDEI_TTBR1)]
+ tramp_data_read_var x4, __sdei_asm_handler
br x4
-ENDPROC(__sdei_asm_entry_trampoline)
+SYM_CODE_END(__sdei_asm_entry_trampoline)
NOKPROBE(__sdei_asm_entry_trampoline)
/*
@@ -970,23 +948,16 @@ NOKPROBE(__sdei_asm_entry_trampoline)
* x2: exit_mode
* x4: struct sdei_registered_event argument from registration time.
*/
-ENTRY(__sdei_asm_exit_trampoline)
- ldr x4, [x4, #(SDEI_EVENT_INTREGS + S_ORIG_ADDR_LIMIT)]
+SYM_CODE_START(__sdei_asm_exit_trampoline)
+ ldr x4, [x4, #(SDEI_EVENT_INTREGS + S_SDEI_TTBR1)]
cbnz x4, 1f
tramp_unmap_kernel tmp=x4
1: sdei_handler_exit exit_mode=x2
-ENDPROC(__sdei_asm_exit_trampoline)
+SYM_CODE_END(__sdei_asm_exit_trampoline)
NOKPROBE(__sdei_asm_exit_trampoline)
- .ltorg
.popsection // .entry.tramp.text
-#ifdef CONFIG_RANDOMIZE_BASE
-.pushsection ".rodata", "a"
-__sdei_asm_trampoline_next_handler:
- .quad __sdei_asm_handler
-.popsection // .rodata
-#endif /* CONFIG_RANDOMIZE_BASE */
#endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */
/*
@@ -1002,7 +973,7 @@ __sdei_asm_trampoline_next_handler:
* follow SMC-CC. We save (or retrieve) all the registers as the handler may
* want them.
*/
-ENTRY(__sdei_asm_handler)
+SYM_CODE_START(__sdei_asm_handler)
stp x2, x3, [x1, #SDEI_EVENT_INTREGS + S_PC]
stp x4, x5, [x1, #SDEI_EVENT_INTREGS + 16 * 2]
stp x6, x7, [x1, #SDEI_EVENT_INTREGS + 16 * 3]
@@ -1022,13 +993,20 @@ ENTRY(__sdei_asm_handler)
mov x19, x1
+ /* Store the registered-event for crash_smp_send_stop() */
+ ldrb w4, [x19, #SDEI_EVENT_PRIORITY]
+ cbnz w4, 1f
+ adr_this_cpu dst=x5, sym=sdei_active_normal_event, tmp=x6
+ b 2f
+1: adr_this_cpu dst=x5, sym=sdei_active_critical_event, tmp=x6
+2: str x19, [x5]
+
#ifdef CONFIG_VMAP_STACK
/*
* entry.S may have been using sp as a scratch register, find whether
* this is a normal or critical event and switch to the appropriate
* stack for this CPU.
*/
- ldrb w4, [x19, #SDEI_EVENT_PRIORITY]
cbnz w4, 1f
ldr_this_cpu dst=x5, sym=sdei_stack_normal_ptr, tmp=x6
b 2f
@@ -1038,6 +1016,15 @@ ENTRY(__sdei_asm_handler)
mov sp, x5
#endif
+#ifdef CONFIG_SHADOW_CALL_STACK
+ /* Use a separate shadow call stack for normal and critical events */
+ cbnz w4, 3f
+ ldr_this_cpu dst=scs_sp, sym=sdei_shadow_call_stack_normal_ptr, tmp=x6
+ b 4f
+3: ldr_this_cpu dst=scs_sp, sym=sdei_shadow_call_stack_critical_ptr, tmp=x6
+4:
+#endif
+
/*
* We may have interrupted userspace, or a guest, or exit-from or
* return-to either of these. We can't trust sp_el0, restore it.
@@ -1069,14 +1056,24 @@ ENTRY(__sdei_asm_handler)
mov sp, x1
mov x1, x0 // address to complete_and_resume
- /* x0 = (x0 <= 1) ? EVENT_COMPLETE:EVENT_COMPLETE_AND_RESUME */
- cmp x0, #1
+ /* x0 = (x0 <= SDEI_EV_FAILED) ?
+ * EVENT_COMPLETE:EVENT_COMPLETE_AND_RESUME
+ */
+ cmp x0, #SDEI_EV_FAILED
mov_q x2, SDEI_1_0_FN_SDEI_EVENT_COMPLETE
mov_q x3, SDEI_1_0_FN_SDEI_EVENT_COMPLETE_AND_RESUME
csel x0, x2, x3, ls
ldr_l x2, sdei_exit_mode
+ /* Clear the registered-event seen by crash_smp_send_stop() */
+ ldrb w3, [x4, #SDEI_EVENT_PRIORITY]
+ cbnz w3, 1f
+ adr_this_cpu dst=x5, sym=sdei_active_normal_event, tmp=x6
+ b 2f
+1: adr_this_cpu dst=x5, sym=sdei_active_critical_event, tmp=x6
+2: str xzr, [x5]
+
alternative_if_not ARM64_UNMAP_KERNEL_AT_EL0
sdei_handler_exit exit_mode=x2
alternative_else_nop_endif
@@ -1085,6 +1082,17 @@ alternative_else_nop_endif
tramp_alias dst=x5, sym=__sdei_asm_exit_trampoline
br x5
#endif
-ENDPROC(__sdei_asm_handler)
+SYM_CODE_END(__sdei_asm_handler)
NOKPROBE(__sdei_asm_handler)
+
+SYM_CODE_START(__sdei_handler_abort)
+ mov_q x0, SDEI_1_0_FN_SDEI_EVENT_COMPLETE_AND_RESUME
+ adr x1, 1f
+ ldr_l x2, sdei_exit_mode
+ sdei_handler_exit exit_mode=x2
+ // exit the handler and jump to the next instruction.
+ // Exit will stomp x0-x17, PSTATE, ELR_ELx, and SPSR_ELx.
+1: ret
+SYM_CODE_END(__sdei_handler_abort)
+NOKPROBE(__sdei_handler_abort)
#endif /* CONFIG_ARM_SDE_INTERFACE */
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index 3eb338f14386..a5dc6f764195 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -12,8 +12,10 @@
#include <linux/bug.h>
#include <linux/cache.h>
#include <linux/compat.h>
+#include <linux/compiler.h>
#include <linux/cpu.h>
#include <linux/cpu_pm.h>
+#include <linux/ctype.h>
#include <linux/kernel.h>
#include <linux/linkage.h>
#include <linux/irqflags.h>
@@ -31,9 +33,11 @@
#include <linux/swab.h>
#include <asm/esr.h>
+#include <asm/exception.h>
#include <asm/fpsimd.h>
#include <asm/cpufeature.h>
#include <asm/cputype.h>
+#include <asm/neon.h>
#include <asm/processor.h>
#include <asm/simd.h>
#include <asm/sigcontext.h>
@@ -75,15 +79,19 @@
* indicate whether or not the userland FPSIMD state of the current task is
* present in the registers. The flag is set unless the FPSIMD registers of this
* CPU currently contain the most recent userland FPSIMD state of the current
- * task.
+ * task. If the task is behaving as a VMM, then this is will be managed by
+ * KVM which will clear it to indicate that the vcpu FPSIMD state is currently
+ * loaded on the CPU, allowing the state to be saved if a FPSIMD-aware
+ * softirq kicks in. Upon vcpu_put(), KVM will save the vcpu FP state and
+ * flag the register state as invalid.
*
- * In order to allow softirq handlers to use FPSIMD, kernel_neon_begin() may
- * save the task's FPSIMD context back to task_struct from softirq context.
- * To prevent this from racing with the manipulation of the task's FPSIMD state
- * from task context and thereby corrupting the state, it is necessary to
- * protect any manipulation of a task's fpsimd_state or TIF_FOREIGN_FPSTATE
- * flag with {, __}get_cpu_fpsimd_context(). This will still allow softirqs to
- * run but prevent them to use FPSIMD.
+ * In order to allow softirq handlers to use FPSIMD, kernel_neon_begin() may be
+ * called from softirq context, which will save the task's FPSIMD context back
+ * to task_struct. To prevent this from racing with the manipulation of the
+ * task's FPSIMD state from task context and thereby corrupting the state, it
+ * is necessary to protect any manipulation of a task's fpsimd_state or
+ * TIF_FOREIGN_FPSTATE flag with get_cpu_fpsimd_context(), which will suspend
+ * softirq servicing entirely until put_cpu_fpsimd_context() is called.
*
* For a certain task, the sequence may look something like this:
* - the task gets scheduled in; if both the task's fpsimd_cpu field
@@ -110,72 +118,117 @@
* returned from the 2nd syscall yet, TIF_FOREIGN_FPSTATE is still set so
* whatever is in the FPSIMD registers is not saved to memory, but discarded.
*/
-struct fpsimd_last_state_struct {
- struct user_fpsimd_state *st;
- void *sve_state;
- unsigned int sve_vl;
+
+static DEFINE_PER_CPU(struct cpu_fp_state, fpsimd_last_state);
+
+__ro_after_init struct vl_info vl_info[ARM64_VEC_MAX] = {
+#ifdef CONFIG_ARM64_SVE
+ [ARM64_VEC_SVE] = {
+ .type = ARM64_VEC_SVE,
+ .name = "SVE",
+ .min_vl = SVE_VL_MIN,
+ .max_vl = SVE_VL_MIN,
+ .max_virtualisable_vl = SVE_VL_MIN,
+ },
+#endif
+#ifdef CONFIG_ARM64_SME
+ [ARM64_VEC_SME] = {
+ .type = ARM64_VEC_SME,
+ .name = "SME",
+ },
+#endif
+};
+
+static unsigned int vec_vl_inherit_flag(enum vec_type type)
+{
+ switch (type) {
+ case ARM64_VEC_SVE:
+ return TIF_SVE_VL_INHERIT;
+ case ARM64_VEC_SME:
+ return TIF_SME_VL_INHERIT;
+ default:
+ WARN_ON_ONCE(1);
+ return 0;
+ }
+}
+
+struct vl_config {
+ int __default_vl; /* Default VL for tasks */
};
-static DEFINE_PER_CPU(struct fpsimd_last_state_struct, fpsimd_last_state);
+static struct vl_config vl_config[ARM64_VEC_MAX];
-/* Default VL for tasks that don't set it explicitly: */
-static int sve_default_vl = -1;
+static inline int get_default_vl(enum vec_type type)
+{
+ return READ_ONCE(vl_config[type].__default_vl);
+}
#ifdef CONFIG_ARM64_SVE
-/* Maximum supported vector length across all CPUs (initially poisoned) */
-int __ro_after_init sve_max_vl = SVE_VL_MIN;
-int __ro_after_init sve_max_virtualisable_vl = SVE_VL_MIN;
+static inline int get_sve_default_vl(void)
+{
+ return get_default_vl(ARM64_VEC_SVE);
+}
-/*
- * Set of available vector lengths,
- * where length vq encoded as bit __vq_to_bit(vq):
- */
-__ro_after_init DECLARE_BITMAP(sve_vq_map, SVE_VQ_MAX);
-/* Set of vector lengths present on at least one cpu: */
-static __ro_after_init DECLARE_BITMAP(sve_vq_partial_map, SVE_VQ_MAX);
+static inline void set_default_vl(enum vec_type type, int val)
+{
+ WRITE_ONCE(vl_config[type].__default_vl, val);
+}
+
+static inline void set_sve_default_vl(int val)
+{
+ set_default_vl(ARM64_VEC_SVE, val);
+}
static void __percpu *efi_sve_state;
#else /* ! CONFIG_ARM64_SVE */
/* Dummy declaration for code that will be optimised out: */
-extern __ro_after_init DECLARE_BITMAP(sve_vq_map, SVE_VQ_MAX);
-extern __ro_after_init DECLARE_BITMAP(sve_vq_partial_map, SVE_VQ_MAX);
extern void __percpu *efi_sve_state;
#endif /* ! CONFIG_ARM64_SVE */
-DEFINE_PER_CPU(bool, fpsimd_context_busy);
-EXPORT_PER_CPU_SYMBOL(fpsimd_context_busy);
+#ifdef CONFIG_ARM64_SME
-static void __get_cpu_fpsimd_context(void)
+static int get_sme_default_vl(void)
{
- bool busy = __this_cpu_xchg(fpsimd_context_busy, true);
+ return get_default_vl(ARM64_VEC_SME);
+}
- WARN_ON(busy);
+static void set_sme_default_vl(int val)
+{
+ set_default_vl(ARM64_VEC_SME, val);
}
+static void sme_free(struct task_struct *);
+
+#else
+
+static inline void sme_free(struct task_struct *t) { }
+
+#endif
+
+static void fpsimd_bind_task_to_cpu(void);
+
/*
* Claim ownership of the CPU FPSIMD context for use by the calling context.
*
* The caller may freely manipulate the FPSIMD context metadata until
* put_cpu_fpsimd_context() is called.
*
- * The double-underscore version must only be called if you know the task
- * can't be preempted.
+ * On RT kernels local_bh_disable() is not sufficient because it only
+ * serializes soft interrupt related sections via a local lock, but stays
+ * preemptible. Disabling preemption is the right choice here as bottom
+ * half processing is always in thread context on RT kernels so it
+ * implicitly prevents bottom half processing as well.
*/
static void get_cpu_fpsimd_context(void)
{
- preempt_disable();
- __get_cpu_fpsimd_context();
-}
-
-static void __put_cpu_fpsimd_context(void)
-{
- bool busy = __this_cpu_xchg(fpsimd_context_busy, false);
-
- WARN_ON(!busy); /* No matching get_cpu_fpsimd_context()? */
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+ local_bh_disable();
+ else
+ preempt_disable();
}
/*
@@ -187,55 +240,61 @@ static void __put_cpu_fpsimd_context(void)
*/
static void put_cpu_fpsimd_context(void)
{
- __put_cpu_fpsimd_context();
- preempt_enable();
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+ local_bh_enable();
+ else
+ preempt_enable();
}
-static bool have_cpu_fpsimd_context(void)
+unsigned int task_get_vl(const struct task_struct *task, enum vec_type type)
{
- return !preemptible() && __this_cpu_read(fpsimd_context_busy);
+ return task->thread.vl[type];
}
-/*
- * Call __sve_free() directly only if you know task can't be scheduled
- * or preempted.
- */
-static void __sve_free(struct task_struct *task)
+void task_set_vl(struct task_struct *task, enum vec_type type,
+ unsigned long vl)
{
- kfree(task->thread.sve_state);
- task->thread.sve_state = NULL;
+ task->thread.vl[type] = vl;
}
-static void sve_free(struct task_struct *task)
+unsigned int task_get_vl_onexec(const struct task_struct *task,
+ enum vec_type type)
{
- WARN_ON(test_tsk_thread_flag(task, TIF_SVE));
+ return task->thread.vl_onexec[type];
+}
- __sve_free(task);
+void task_set_vl_onexec(struct task_struct *task, enum vec_type type,
+ unsigned long vl)
+{
+ task->thread.vl_onexec[type] = vl;
}
/*
+ * TIF_SME controls whether a task can use SME without trapping while
+ * in userspace, when TIF_SME is set then we must have storage
+ * allocated in sve_state and sme_state to store the contents of both ZA
+ * and the SVE registers for both streaming and non-streaming modes.
+ *
+ * If both SVCR.ZA and SVCR.SM are disabled then at any point we
+ * may disable TIF_SME and reenable traps.
+ */
+
+
+/*
* TIF_SVE controls whether a task can use SVE without trapping while
- * in userspace, and also the way a task's FPSIMD/SVE state is stored
- * in thread_struct.
+ * in userspace, and also (together with TIF_SME) the way a task's
+ * FPSIMD/SVE state is stored in thread_struct.
*
* The kernel uses this flag to track whether a user task is actively
* using SVE, and therefore whether full SVE register state needs to
* be tracked. If not, the cheaper FPSIMD context handling code can
* be used instead of the more costly SVE equivalents.
*
- * * TIF_SVE set:
+ * * TIF_SVE or SVCR.SM set:
*
* The task can execute SVE instructions while in userspace without
* trapping to the kernel.
*
- * When stored, Z0-Z31 (incorporating Vn in bits[127:0] or the
- * corresponding Zn), P0-P15 and FFR are encoded in in
- * task->thread.sve_state, formatted appropriately for vector
- * length task->thread.sve_vl.
- *
- * task->thread.sve_state must point to a valid buffer at least
- * sve_state_size(task) bytes in size.
- *
* During any syscall, the kernel may optionally clear TIF_SVE and
* discard the vector state except for the FPSIMD subset.
*
@@ -245,7 +304,15 @@ static void sve_free(struct task_struct *task)
* do_sve_acc() to be called, which does some preparation and then
* sets TIF_SVE.
*
- * When stored, FPSIMD registers V0-V31 are encoded in
+ * During any syscall, the kernel may optionally clear TIF_SVE and
+ * discard the vector state except for the FPSIMD subset.
+ *
+ * The data will be stored in one of two formats:
+ *
+ * * FPSIMD only - FP_STATE_FPSIMD:
+ *
+ * When the FPSIMD only state stored task->thread.fp_type is set to
+ * FP_STATE_FPSIMD, the FPSIMD registers V0-V31 are encoded in
* task->thread.uw.fpsimd_state; bits [max : 128] for each of Z0-Z31 are
* logically zero but not stored anywhere; P0-P15 and FFR are not
* stored and have unspecified values from userspace's point of
@@ -253,7 +320,23 @@ static void sve_free(struct task_struct *task)
* but userspace is discouraged from relying on this.
*
* task->thread.sve_state does not need to be non-NULL, valid or any
- * particular size: it must not be dereferenced.
+ * particular size: it must not be dereferenced and any data stored
+ * there should be considered stale and not referenced.
+ *
+ * * SVE state - FP_STATE_SVE:
+ *
+ * When the full SVE state is stored task->thread.fp_type is set to
+ * FP_STATE_SVE and Z0-Z31 (incorporating Vn in bits[127:0] or the
+ * corresponding Zn), P0-P15 and FFR are encoded in in
+ * task->thread.sve_state, formatted appropriately for vector
+ * length task->thread.sve_vl or, if SVCR.SM is set,
+ * task->thread.sme_vl. The storage for the vector registers in
+ * task->thread.uw.fpsimd_state should be ignored.
+ *
+ * task->thread.sve_state must point to a valid buffer at least
+ * sve_state_size(task) bytes in size. The data stored in
+ * task->thread.uw.fpsimd_state.vregs should be considered stale
+ * and not referenced.
*
* * FPSR and FPCR are always stored in task->thread.uw.fpsimd_state
* irrespective of whether TIF_SVE is clear or set, since these are
@@ -269,45 +352,149 @@ static void sve_free(struct task_struct *task)
*/
static void task_fpsimd_load(void)
{
- WARN_ON(!have_cpu_fpsimd_context());
+ bool restore_sve_regs = false;
+ bool restore_ffr;
+
+ WARN_ON(!system_supports_fpsimd());
+ WARN_ON(preemptible());
+ WARN_ON(test_thread_flag(TIF_KERNEL_FPSTATE));
+
+ if (system_supports_sve() || system_supports_sme()) {
+ switch (current->thread.fp_type) {
+ case FP_STATE_FPSIMD:
+ /* Stop tracking SVE for this task until next use. */
+ if (test_and_clear_thread_flag(TIF_SVE))
+ sve_user_disable();
+ break;
+ case FP_STATE_SVE:
+ if (!thread_sm_enabled(&current->thread) &&
+ !WARN_ON_ONCE(!test_and_set_thread_flag(TIF_SVE)))
+ sve_user_enable();
+
+ if (test_thread_flag(TIF_SVE))
+ sve_set_vq(sve_vq_from_vl(task_get_sve_vl(current)) - 1);
+
+ restore_sve_regs = true;
+ restore_ffr = true;
+ break;
+ default:
+ /*
+ * This indicates either a bug in
+ * fpsimd_save_user_state() or memory corruption, we
+ * should always record an explicit format
+ * when we save. We always at least have the
+ * memory allocated for FPSMID registers so
+ * try that and hope for the best.
+ */
+ WARN_ON_ONCE(1);
+ clear_thread_flag(TIF_SVE);
+ break;
+ }
+ }
- if (system_supports_sve() && test_thread_flag(TIF_SVE))
+ /* Restore SME, override SVE register configuration if needed */
+ if (system_supports_sme()) {
+ unsigned long sme_vl = task_get_sme_vl(current);
+
+ /* Ensure VL is set up for restoring data */
+ if (test_thread_flag(TIF_SME))
+ sme_set_vq(sve_vq_from_vl(sme_vl) - 1);
+
+ write_sysreg_s(current->thread.svcr, SYS_SVCR);
+
+ if (thread_za_enabled(&current->thread))
+ sme_load_state(current->thread.sme_state,
+ system_supports_sme2());
+
+ if (thread_sm_enabled(&current->thread))
+ restore_ffr = system_supports_fa64();
+ }
+
+ if (restore_sve_regs) {
+ WARN_ON_ONCE(current->thread.fp_type != FP_STATE_SVE);
sve_load_state(sve_pffr(&current->thread),
&current->thread.uw.fpsimd_state.fpsr,
- sve_vq_from_vl(current->thread.sve_vl) - 1);
- else
+ restore_ffr);
+ } else {
+ WARN_ON_ONCE(current->thread.fp_type != FP_STATE_FPSIMD);
fpsimd_load_state(&current->thread.uw.fpsimd_state);
+ }
}
/*
* Ensure FPSIMD/SVE storage in memory for the loaded context is up to
- * date with respect to the CPU registers.
+ * date with respect to the CPU registers. Note carefully that the
+ * current context is the context last bound to the CPU stored in
+ * last, if KVM is involved this may be the guest VM context rather
+ * than the host thread for the VM pointed to by current. This means
+ * that we must always reference the state storage via last rather
+ * than via current, if we are saving KVM state then it will have
+ * ensured that the type of registers to save is set in last->to_save.
*/
-static void fpsimd_save(void)
+static void fpsimd_save_user_state(void)
{
- struct fpsimd_last_state_struct const *last =
+ struct cpu_fp_state const *last =
this_cpu_ptr(&fpsimd_last_state);
/* set by fpsimd_bind_task_to_cpu() or fpsimd_bind_state_to_cpu() */
+ bool save_sve_regs = false;
+ bool save_ffr;
+ unsigned int vl;
- WARN_ON(!have_cpu_fpsimd_context());
+ WARN_ON(!system_supports_fpsimd());
+ WARN_ON(preemptible());
- if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) {
- if (system_supports_sve() && test_thread_flag(TIF_SVE)) {
- if (WARN_ON(sve_get_vl() != last->sve_vl)) {
- /*
- * Can't save the user regs, so current would
- * re-enter user with corrupt state.
- * There's no way to recover, so kill it:
- */
- force_signal_inject(SIGKILL, SI_KERNEL, 0);
- return;
- }
+ if (test_thread_flag(TIF_FOREIGN_FPSTATE))
+ return;
+
+ /*
+ * If a task is in a syscall the ABI allows us to only
+ * preserve the state shared with FPSIMD so don't bother
+ * saving the full SVE state in that case.
+ */
+ if ((last->to_save == FP_STATE_CURRENT && test_thread_flag(TIF_SVE) &&
+ !in_syscall(current_pt_regs())) ||
+ last->to_save == FP_STATE_SVE) {
+ save_sve_regs = true;
+ save_ffr = true;
+ vl = last->sve_vl;
+ }
- sve_save_state((char *)last->sve_state +
- sve_ffr_offset(last->sve_vl),
- &last->st->fpsr);
- } else
- fpsimd_save_state(last->st);
+ if (system_supports_sme()) {
+ u64 *svcr = last->svcr;
+
+ *svcr = read_sysreg_s(SYS_SVCR);
+
+ if (*svcr & SVCR_ZA_MASK)
+ sme_save_state(last->sme_state,
+ system_supports_sme2());
+
+ /* If we are in streaming mode override regular SVE. */
+ if (*svcr & SVCR_SM_MASK) {
+ save_sve_regs = true;
+ save_ffr = system_supports_fa64();
+ vl = last->sme_vl;
+ }
+ }
+
+ if (IS_ENABLED(CONFIG_ARM64_SVE) && save_sve_regs) {
+ /* Get the configured VL from RDVL, will account for SM */
+ if (WARN_ON(sve_get_vl() != vl)) {
+ /*
+ * Can't save the user regs, so current would
+ * re-enter user with corrupt state.
+ * There's no way to recover, so kill it:
+ */
+ force_signal_inject(SIGKILL, SI_KERNEL, 0, 0);
+ return;
+ }
+
+ sve_save_state((char *)last->sve_state +
+ sve_ffr_offset(vl),
+ &last->st->fpsr, save_ffr);
+ *last->fp_type = FP_STATE_SVE;
+ } else {
+ fpsimd_save_state(last->st);
+ *last->fp_type = FP_STATE_FPSIMD;
}
}
@@ -317,33 +504,38 @@ static void fpsimd_save(void)
* If things go wrong there's a bug somewhere, but try to fall back to a
* safe choice.
*/
-static unsigned int find_supported_vector_length(unsigned int vl)
+static unsigned int find_supported_vector_length(enum vec_type type,
+ unsigned int vl)
{
+ struct vl_info *info = &vl_info[type];
int bit;
- int max_vl = sve_max_vl;
+ int max_vl = info->max_vl;
if (WARN_ON(!sve_vl_valid(vl)))
- vl = SVE_VL_MIN;
+ vl = info->min_vl;
if (WARN_ON(!sve_vl_valid(max_vl)))
- max_vl = SVE_VL_MIN;
+ max_vl = info->min_vl;
if (vl > max_vl)
vl = max_vl;
+ if (vl < info->min_vl)
+ vl = info->min_vl;
- bit = find_next_bit(sve_vq_map, SVE_VQ_MAX,
+ bit = find_next_bit(info->vq_map, SVE_VQ_MAX,
__vq_to_bit(sve_vq_from_vl(vl)));
return sve_vl_from_vq(__bit_to_vq(bit));
}
-#ifdef CONFIG_SYSCTL
+#if defined(CONFIG_ARM64_SVE) && defined(CONFIG_SYSCTL)
-static int sve_proc_do_default_vl(struct ctl_table *table, int write,
- void __user *buffer, size_t *lenp,
- loff_t *ppos)
+static int vec_proc_do_default_vl(struct ctl_table *table, int write,
+ void *buffer, size_t *lenp, loff_t *ppos)
{
+ struct vl_info *info = table->extra1;
+ enum vec_type type = info->type;
int ret;
- int vl = sve_default_vl;
+ int vl = get_default_vl(type);
struct ctl_table tmp_table = {
.data = &vl,
.maxlen = sizeof(vl),
@@ -355,12 +547,12 @@ static int sve_proc_do_default_vl(struct ctl_table *table, int write,
/* Writing -1 has the special meaning "set to max": */
if (vl == -1)
- vl = sve_max_vl;
+ vl = info->max_vl;
if (!sve_vl_valid(vl))
return -EINVAL;
- sve_default_vl = find_supported_vector_length(vl);
+ set_default_vl(type, find_supported_vector_length(type, vl));
return 0;
}
@@ -368,9 +560,9 @@ static struct ctl_table sve_default_vl_table[] = {
{
.procname = "sve_default_vector_length",
.mode = 0644,
- .proc_handler = sve_proc_do_default_vl,
+ .proc_handler = vec_proc_do_default_vl,
+ .extra1 = &vl_info[ARM64_VEC_SVE],
},
- { }
};
static int __init sve_sysctl_init(void)
@@ -382,9 +574,32 @@ static int __init sve_sysctl_init(void)
return 0;
}
-#else /* ! CONFIG_SYSCTL */
+#else /* ! (CONFIG_ARM64_SVE && CONFIG_SYSCTL) */
static int __init sve_sysctl_init(void) { return 0; }
-#endif /* ! CONFIG_SYSCTL */
+#endif /* ! (CONFIG_ARM64_SVE && CONFIG_SYSCTL) */
+
+#if defined(CONFIG_ARM64_SME) && defined(CONFIG_SYSCTL)
+static struct ctl_table sme_default_vl_table[] = {
+ {
+ .procname = "sme_default_vector_length",
+ .mode = 0644,
+ .proc_handler = vec_proc_do_default_vl,
+ .extra1 = &vl_info[ARM64_VEC_SME],
+ },
+};
+
+static int __init sme_sysctl_init(void)
+{
+ if (system_supports_sme())
+ if (!register_sysctl("abi", sme_default_vl_table))
+ return -EINVAL;
+
+ return 0;
+}
+
+#else /* ! (CONFIG_ARM64_SME && CONFIG_SYSCTL) */
+static int __init sme_sysctl_init(void) { return 0; }
+#endif /* ! (CONFIG_ARM64_SME && CONFIG_SYSCTL) */
#define ZREG(sve_state, vq, n) ((char *)(sve_state) + \
(SVE_SIG_ZREG_OFFSET(vq, n) - SVE_SIG_REGS_OFFSET))
@@ -436,10 +651,10 @@ static void fpsimd_to_sve(struct task_struct *task)
void *sst = task->thread.sve_state;
struct user_fpsimd_state const *fst = &task->thread.uw.fpsimd_state;
- if (!system_supports_sve())
+ if (!system_supports_sve() && !system_supports_sme())
return;
- vq = sve_vq_from_vl(task->thread.sve_vl);
+ vq = sve_vq_from_vl(thread_get_cur_vl(&task->thread));
__fpsimd_to_sve(sst, fst, vq);
}
@@ -456,16 +671,17 @@ static void fpsimd_to_sve(struct task_struct *task)
*/
static void sve_to_fpsimd(struct task_struct *task)
{
- unsigned int vq;
+ unsigned int vq, vl;
void const *sst = task->thread.sve_state;
struct user_fpsimd_state *fst = &task->thread.uw.fpsimd_state;
unsigned int i;
__uint128_t const *p;
- if (!system_supports_sve())
+ if (!system_supports_sve() && !system_supports_sme())
return;
- vq = sve_vq_from_vl(task->thread.sve_vl);
+ vl = thread_get_cur_vl(&task->thread);
+ vq = sve_vq_from_vl(vl);
for (i = 0; i < SVE_NUM_ZREGS; ++i) {
p = (__uint128_t const *)ZREG(sst, vq, i);
fst->vregs[i] = arm64_le128_to_cpu(*p);
@@ -473,6 +689,22 @@ static void sve_to_fpsimd(struct task_struct *task)
}
#ifdef CONFIG_ARM64_SVE
+/*
+ * Call __sve_free() directly only if you know task can't be scheduled
+ * or preempted.
+ */
+static void __sve_free(struct task_struct *task)
+{
+ kfree(task->thread.sve_state);
+ task->thread.sve_state = NULL;
+}
+
+static void sve_free(struct task_struct *task)
+{
+ WARN_ON(test_tsk_thread_flag(task, TIF_SVE));
+
+ __sve_free(task);
+}
/*
* Return how many bytes of memory are required to store the full SVE
@@ -480,7 +712,14 @@ static void sve_to_fpsimd(struct task_struct *task)
*/
size_t sve_state_size(struct task_struct const *task)
{
- return SVE_SIG_REGS_SIZE(sve_vq_from_vl(task->thread.sve_vl));
+ unsigned int vl = 0;
+
+ if (system_supports_sve())
+ vl = task_get_sve_vl(task);
+ if (system_supports_sme())
+ vl = max(vl, task_get_sme_vl(task));
+
+ return SVE_SIG_REGS_SIZE(sve_vq_from_vl(vl));
}
/*
@@ -493,26 +732,35 @@ size_t sve_state_size(struct task_struct const *task)
* do_sve_acc() case, there is no ABI requirement to hide stale data
* written previously be task.
*/
-void sve_alloc(struct task_struct *task)
+void sve_alloc(struct task_struct *task, bool flush)
{
if (task->thread.sve_state) {
- memset(task->thread.sve_state, 0, sve_state_size(current));
+ if (flush)
+ memset(task->thread.sve_state, 0,
+ sve_state_size(task));
return;
}
/* This is a small allocation (maximum ~8KB) and Should Not Fail. */
task->thread.sve_state =
kzalloc(sve_state_size(task), GFP_KERNEL);
-
- /*
- * If future SVE revisions can have larger vectors though,
- * this may cease to be true:
- */
- BUG_ON(!task->thread.sve_state);
}
/*
+ * Force the FPSIMD state shared with SVE to be updated in the SVE state
+ * even if the SVE state is the current active state.
+ *
+ * This should only be called by ptrace. task must be non-runnable.
+ * task->thread.sve_state must point to at least sve_state_size(task)
+ * bytes of allocated kernel memory.
+ */
+void fpsimd_force_sync_to_sve(struct task_struct *task)
+{
+ fpsimd_to_sve(task);
+}
+
+/*
* Ensure that task->thread.sve_state is up to date with respect to
* the user task, irrespective of when SVE is in use or not.
*
@@ -522,7 +770,8 @@ void sve_alloc(struct task_struct *task)
*/
void fpsimd_sync_to_sve(struct task_struct *task)
{
- if (!test_tsk_thread_flag(task, TIF_SVE))
+ if (!test_tsk_thread_flag(task, TIF_SVE) &&
+ !thread_sm_enabled(&task->thread))
fpsimd_to_sve(task);
}
@@ -536,7 +785,7 @@ void fpsimd_sync_to_sve(struct task_struct *task)
*/
void sve_sync_to_fpsimd(struct task_struct *task)
{
- if (test_tsk_thread_flag(task, TIF_SVE))
+ if (task->thread.fp_type == FP_STATE_SVE)
sve_to_fpsimd(task);
}
@@ -558,18 +807,21 @@ void sve_sync_from_fpsimd_zeropad(struct task_struct *task)
void *sst = task->thread.sve_state;
struct user_fpsimd_state const *fst = &task->thread.uw.fpsimd_state;
- if (!test_tsk_thread_flag(task, TIF_SVE))
+ if (!test_tsk_thread_flag(task, TIF_SVE) &&
+ !thread_sm_enabled(&task->thread))
return;
- vq = sve_vq_from_vl(task->thread.sve_vl);
+ vq = sve_vq_from_vl(thread_get_cur_vl(&task->thread));
memset(sst, 0, SVE_SIG_REGS_SIZE(vq));
__fpsimd_to_sve(sst, fst, vq);
}
-int sve_set_vector_length(struct task_struct *task,
+int vec_set_vector_length(struct task_struct *task, enum vec_type type,
unsigned long vl, unsigned long flags)
{
+ bool free_sme = false;
+
if (flags & ~(unsigned long)(PR_SVE_VL_INHERIT |
PR_SVE_SET_VL_ONEXEC))
return -EINVAL;
@@ -578,57 +830,82 @@ int sve_set_vector_length(struct task_struct *task,
return -EINVAL;
/*
- * Clamp to the maximum vector length that VL-agnostic SVE code can
- * work with. A flag may be assigned in the future to allow setting
- * of larger vector lengths without confusing older software.
+ * Clamp to the maximum vector length that VL-agnostic code
+ * can work with. A flag may be assigned in the future to
+ * allow setting of larger vector lengths without confusing
+ * older software.
*/
- if (vl > SVE_VL_ARCH_MAX)
- vl = SVE_VL_ARCH_MAX;
+ if (vl > VL_ARCH_MAX)
+ vl = VL_ARCH_MAX;
- vl = find_supported_vector_length(vl);
+ vl = find_supported_vector_length(type, vl);
if (flags & (PR_SVE_VL_INHERIT |
PR_SVE_SET_VL_ONEXEC))
- task->thread.sve_vl_onexec = vl;
+ task_set_vl_onexec(task, type, vl);
else
/* Reset VL to system default on next exec: */
- task->thread.sve_vl_onexec = 0;
+ task_set_vl_onexec(task, type, 0);
/* Only actually set the VL if not deferred: */
if (flags & PR_SVE_SET_VL_ONEXEC)
goto out;
- if (vl == task->thread.sve_vl)
+ if (vl == task_get_vl(task, type))
goto out;
/*
* To ensure the FPSIMD bits of the SVE vector registers are preserved,
* write any live register state back to task_struct, and convert to a
- * non-SVE thread.
+ * regular FPSIMD thread.
*/
if (task == current) {
get_cpu_fpsimd_context();
- fpsimd_save();
+ fpsimd_save_user_state();
}
fpsimd_flush_task_state(task);
- if (test_and_clear_tsk_thread_flag(task, TIF_SVE))
+ if (test_and_clear_tsk_thread_flag(task, TIF_SVE) ||
+ thread_sm_enabled(&task->thread)) {
sve_to_fpsimd(task);
+ task->thread.fp_type = FP_STATE_FPSIMD;
+ }
+
+ if (system_supports_sme()) {
+ if (type == ARM64_VEC_SME ||
+ !(task->thread.svcr & (SVCR_SM_MASK | SVCR_ZA_MASK))) {
+ /*
+ * We are changing the SME VL or weren't using
+ * SME anyway, discard the state and force a
+ * reallocation.
+ */
+ task->thread.svcr &= ~(SVCR_SM_MASK |
+ SVCR_ZA_MASK);
+ clear_tsk_thread_flag(task, TIF_SME);
+ free_sme = true;
+ }
+ }
if (task == current)
put_cpu_fpsimd_context();
+ task_set_vl(task, type, vl);
+
/*
- * Force reallocation of task SVE state to the correct size
- * on next use:
+ * Free the changed states if they are not in use, SME will be
+ * reallocated to the correct size on next use and we just
+ * allocate SVE now in case it is needed for use in streaming
+ * mode.
*/
sve_free(task);
+ sve_alloc(task, true);
- task->thread.sve_vl = vl;
+ if (free_sme)
+ sme_free(task);
out:
- update_tsk_thread_flag(task, TIF_SVE_VL_INHERIT,
+ update_tsk_thread_flag(task, vec_vl_inherit_flag(type),
flags & PR_SVE_VL_INHERIT);
return 0;
@@ -636,20 +913,21 @@ out:
/*
* Encode the current vector length and flags for return.
- * This is only required for prctl(): ptrace has separate fields
+ * This is only required for prctl(): ptrace has separate fields.
+ * SVE and SME use the same bits for _ONEXEC and _INHERIT.
*
- * flags are as for sve_set_vector_length().
+ * flags are as for vec_set_vector_length().
*/
-static int sve_prctl_status(unsigned long flags)
+static int vec_prctl_status(enum vec_type type, unsigned long flags)
{
int ret;
if (flags & PR_SVE_SET_VL_ONEXEC)
- ret = current->thread.sve_vl_onexec;
+ ret = task_get_vl_onexec(current, type);
else
- ret = current->thread.sve_vl;
+ ret = task_get_vl(current, type);
- if (test_thread_flag(TIF_SVE_VL_INHERIT))
+ if (test_thread_flag(vec_vl_inherit_flag(type)))
ret |= PR_SVE_VL_INHERIT;
return ret;
@@ -664,38 +942,81 @@ int sve_set_current_vl(unsigned long arg)
vl = arg & PR_SVE_VL_LEN_MASK;
flags = arg & ~vl;
- if (!system_supports_sve())
+ if (!system_supports_sve() || is_compat_task())
return -EINVAL;
- ret = sve_set_vector_length(current, vl, flags);
+ ret = vec_set_vector_length(current, ARM64_VEC_SVE, vl, flags);
if (ret)
return ret;
- return sve_prctl_status(flags);
+ return vec_prctl_status(ARM64_VEC_SVE, flags);
}
/* PR_SVE_GET_VL */
int sve_get_current_vl(void)
{
- if (!system_supports_sve())
+ if (!system_supports_sve() || is_compat_task())
+ return -EINVAL;
+
+ return vec_prctl_status(ARM64_VEC_SVE, 0);
+}
+
+#ifdef CONFIG_ARM64_SME
+/* PR_SME_SET_VL */
+int sme_set_current_vl(unsigned long arg)
+{
+ unsigned long vl, flags;
+ int ret;
+
+ vl = arg & PR_SME_VL_LEN_MASK;
+ flags = arg & ~vl;
+
+ if (!system_supports_sme() || is_compat_task())
+ return -EINVAL;
+
+ ret = vec_set_vector_length(current, ARM64_VEC_SME, vl, flags);
+ if (ret)
+ return ret;
+
+ return vec_prctl_status(ARM64_VEC_SME, flags);
+}
+
+/* PR_SME_GET_VL */
+int sme_get_current_vl(void)
+{
+ if (!system_supports_sme() || is_compat_task())
return -EINVAL;
- return sve_prctl_status(0);
+ return vec_prctl_status(ARM64_VEC_SME, 0);
}
+#endif /* CONFIG_ARM64_SME */
-static void sve_probe_vqs(DECLARE_BITMAP(map, SVE_VQ_MAX))
+static void vec_probe_vqs(struct vl_info *info,
+ DECLARE_BITMAP(map, SVE_VQ_MAX))
{
unsigned int vq, vl;
- unsigned long zcr;
bitmap_zero(map, SVE_VQ_MAX);
- zcr = ZCR_ELx_LEN_MASK;
- zcr = read_sysreg_s(SYS_ZCR_EL1) & ~zcr;
-
for (vq = SVE_VQ_MAX; vq >= SVE_VQ_MIN; --vq) {
- write_sysreg_s(zcr | (vq - 1), SYS_ZCR_EL1); /* self-syncing */
- vl = sve_get_vl();
+ write_vl(info->type, vq - 1); /* self-syncing */
+
+ switch (info->type) {
+ case ARM64_VEC_SVE:
+ vl = sve_get_vl();
+ break;
+ case ARM64_VEC_SME:
+ vl = sme_get_vl();
+ break;
+ default:
+ vl = 0;
+ break;
+ }
+
+ /* Minimum VL identified? */
+ if (sve_vq_from_vl(vl) > vq)
+ break;
+
vq = sve_vq_from_vl(vl); /* skip intervening lengths */
set_bit(__vq_to_bit(vq), map);
}
@@ -705,10 +1026,11 @@ static void sve_probe_vqs(DECLARE_BITMAP(map, SVE_VQ_MAX))
* Initialise the set of known supported VQs for the boot CPU.
* This is called during kernel boot, before secondary CPUs are brought up.
*/
-void __init sve_init_vq_map(void)
+void __init vec_init_vq_map(enum vec_type type)
{
- sve_probe_vqs(sve_vq_map);
- bitmap_copy(sve_vq_partial_map, sve_vq_map, SVE_VQ_MAX);
+ struct vl_info *info = &vl_info[type];
+ vec_probe_vqs(info, info->vq_map);
+ bitmap_copy(info->vq_partial_map, info->vq_map, SVE_VQ_MAX);
}
/*
@@ -716,30 +1038,33 @@ void __init sve_init_vq_map(void)
* those not supported by the current CPU.
* This function is called during the bring-up of early secondary CPUs only.
*/
-void sve_update_vq_map(void)
+void vec_update_vq_map(enum vec_type type)
{
+ struct vl_info *info = &vl_info[type];
DECLARE_BITMAP(tmp_map, SVE_VQ_MAX);
- sve_probe_vqs(tmp_map);
- bitmap_and(sve_vq_map, sve_vq_map, tmp_map, SVE_VQ_MAX);
- bitmap_or(sve_vq_partial_map, sve_vq_partial_map, tmp_map, SVE_VQ_MAX);
+ vec_probe_vqs(info, tmp_map);
+ bitmap_and(info->vq_map, info->vq_map, tmp_map, SVE_VQ_MAX);
+ bitmap_or(info->vq_partial_map, info->vq_partial_map, tmp_map,
+ SVE_VQ_MAX);
}
/*
* Check whether the current CPU supports all VQs in the committed set.
* This function is called during the bring-up of late secondary CPUs only.
*/
-int sve_verify_vq_map(void)
+int vec_verify_vq_map(enum vec_type type)
{
+ struct vl_info *info = &vl_info[type];
DECLARE_BITMAP(tmp_map, SVE_VQ_MAX);
unsigned long b;
- sve_probe_vqs(tmp_map);
+ vec_probe_vqs(info, tmp_map);
bitmap_complement(tmp_map, tmp_map, SVE_VQ_MAX);
- if (bitmap_intersects(tmp_map, sve_vq_map, SVE_VQ_MAX)) {
- pr_warn("SVE: cpu%d: Required vector length(s) missing\n",
- smp_processor_id());
+ if (bitmap_intersects(tmp_map, info->vq_map, SVE_VQ_MAX)) {
+ pr_warn("%s: cpu%d: Required vector length(s) missing\n",
+ info->name, smp_processor_id());
return -EINVAL;
}
@@ -755,7 +1080,7 @@ int sve_verify_vq_map(void)
/* Recover the set of supported VQs: */
bitmap_complement(tmp_map, tmp_map, SVE_VQ_MAX);
/* Find VQs supported that are not globally supported: */
- bitmap_andnot(tmp_map, tmp_map, sve_vq_map, SVE_VQ_MAX);
+ bitmap_andnot(tmp_map, tmp_map, info->vq_map, SVE_VQ_MAX);
/* Find the lowest such VQ, if any: */
b = find_last_bit(tmp_map, SVE_VQ_MAX);
@@ -766,9 +1091,9 @@ int sve_verify_vq_map(void)
* Mismatches above sve_max_virtualisable_vl are fine, since
* no guest is allowed to configure ZCR_EL2.LEN to exceed this:
*/
- if (sve_vl_from_vq(__bit_to_vq(b)) <= sve_max_virtualisable_vl) {
- pr_warn("SVE: cpu%d: Unsupported vector length(s) present\n",
- smp_processor_id());
+ if (sve_vl_from_vq(__bit_to_vq(b)) <= info->max_virtualisable_vl) {
+ pr_warn("%s: cpu%d: Unsupported vector length(s) present\n",
+ info->name, smp_processor_id());
return -EINVAL;
}
@@ -777,19 +1102,25 @@ int sve_verify_vq_map(void)
static void __init sve_efi_setup(void)
{
+ int max_vl = 0;
+ int i;
+
if (!IS_ENABLED(CONFIG_EFI))
return;
+ for (i = 0; i < ARRAY_SIZE(vl_info); i++)
+ max_vl = max(vl_info[i].max_vl, max_vl);
+
/*
* alloc_percpu() warns and prints a backtrace if this goes wrong.
* This is evidence of a crippled system and we are returning void,
* so no attempt is made to handle this situation here.
*/
- if (!sve_vl_valid(sve_max_vl))
+ if (!sve_vl_valid(max_vl))
goto fail;
efi_sve_state = __alloc_percpu(
- SVE_SIG_REGS_SIZE(sve_vq_from_vl(sve_max_vl)), SVE_VQ_BYTES);
+ SVE_SIG_REGS_SIZE(sve_vq_from_vl(max_vl)), SVE_VQ_BYTES);
if (!efi_sve_state)
goto fail;
@@ -799,48 +1130,18 @@ fail:
panic("Cannot allocate percpu memory for EFI SVE save/restore");
}
-/*
- * Enable SVE for EL1.
- * Intended for use by the cpufeatures code during CPU boot.
- */
-void sve_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p)
+void cpu_enable_sve(const struct arm64_cpu_capabilities *__always_unused p)
{
write_sysreg(read_sysreg(CPACR_EL1) | CPACR_EL1_ZEN_EL1EN, CPACR_EL1);
isb();
}
-/*
- * Read the pseudo-ZCR used by cpufeatures to identify the supported SVE
- * vector length.
- *
- * Use only if SVE is present.
- * This function clobbers the SVE vector length.
- */
-u64 read_zcr_features(void)
-{
- u64 zcr;
- unsigned int vq_max;
-
- /*
- * Set the maximum possible VL, and write zeroes to all other
- * bits to see if they stick.
- */
- sve_kernel_enable(NULL);
- write_sysreg_s(ZCR_ELx_LEN_MASK, SYS_ZCR_EL1);
-
- zcr = read_sysreg_s(SYS_ZCR_EL1);
- zcr &= ~(u64)ZCR_ELx_LEN_MASK; /* find sticky 1s outside LEN field */
- vq_max = sve_vq_from_vl(sve_get_vl());
- zcr |= vq_max - 1; /* set LEN field to maximum effective value */
-
- return zcr;
-}
-
void __init sve_setup(void)
{
- u64 zcr;
+ struct vl_info *info = &vl_info[ARM64_VEC_SVE];
DECLARE_BITMAP(tmp_map, SVE_VQ_MAX);
unsigned long b;
+ int max_bit;
if (!system_supports_sve())
return;
@@ -850,49 +1151,43 @@ void __init sve_setup(void)
* so sve_vq_map must have at least SVE_VQ_MIN set.
* If something went wrong, at least try to patch it up:
*/
- if (WARN_ON(!test_bit(__vq_to_bit(SVE_VQ_MIN), sve_vq_map)))
- set_bit(__vq_to_bit(SVE_VQ_MIN), sve_vq_map);
-
- zcr = read_sanitised_ftr_reg(SYS_ZCR_EL1);
- sve_max_vl = sve_vl_from_vq((zcr & ZCR_ELx_LEN_MASK) + 1);
+ if (WARN_ON(!test_bit(__vq_to_bit(SVE_VQ_MIN), info->vq_map)))
+ set_bit(__vq_to_bit(SVE_VQ_MIN), info->vq_map);
- /*
- * Sanity-check that the max VL we determined through CPU features
- * corresponds properly to sve_vq_map. If not, do our best:
- */
- if (WARN_ON(sve_max_vl != find_supported_vector_length(sve_max_vl)))
- sve_max_vl = find_supported_vector_length(sve_max_vl);
+ max_bit = find_first_bit(info->vq_map, SVE_VQ_MAX);
+ info->max_vl = sve_vl_from_vq(__bit_to_vq(max_bit));
/*
* For the default VL, pick the maximum supported value <= 64.
* VL == 64 is guaranteed not to grow the signal frame.
*/
- sve_default_vl = find_supported_vector_length(64);
+ set_sve_default_vl(find_supported_vector_length(ARM64_VEC_SVE, 64));
- bitmap_andnot(tmp_map, sve_vq_partial_map, sve_vq_map,
+ bitmap_andnot(tmp_map, info->vq_partial_map, info->vq_map,
SVE_VQ_MAX);
b = find_last_bit(tmp_map, SVE_VQ_MAX);
if (b >= SVE_VQ_MAX)
/* No non-virtualisable VLs found */
- sve_max_virtualisable_vl = SVE_VQ_MAX;
+ info->max_virtualisable_vl = SVE_VQ_MAX;
else if (WARN_ON(b == SVE_VQ_MAX - 1))
/* No virtualisable VLs? This is architecturally forbidden. */
- sve_max_virtualisable_vl = SVE_VQ_MIN;
+ info->max_virtualisable_vl = SVE_VQ_MIN;
else /* b + 1 < SVE_VQ_MAX */
- sve_max_virtualisable_vl = sve_vl_from_vq(__bit_to_vq(b + 1));
+ info->max_virtualisable_vl = sve_vl_from_vq(__bit_to_vq(b + 1));
- if (sve_max_virtualisable_vl > sve_max_vl)
- sve_max_virtualisable_vl = sve_max_vl;
+ if (info->max_virtualisable_vl > info->max_vl)
+ info->max_virtualisable_vl = info->max_vl;
- pr_info("SVE: maximum available vector length %u bytes per vector\n",
- sve_max_vl);
- pr_info("SVE: default vector length %u bytes per vector\n",
- sve_default_vl);
+ pr_info("%s: maximum available vector length %u bytes per vector\n",
+ info->name, info->max_vl);
+ pr_info("%s: default vector length %u bytes per vector\n",
+ info->name, get_sve_default_vl());
/* KVM decides whether to support mismatched systems. Just warn here: */
- if (sve_max_virtualisable_vl < sve_max_vl)
- pr_warn("SVE: unvirtualisable vector lengths present\n");
+ if (sve_max_virtualisable_vl() < sve_max_vl())
+ pr_warn("%s: unvirtualisable vector lengths present\n",
+ info->name);
sve_efi_setup();
}
@@ -904,59 +1199,260 @@ void __init sve_setup(void)
void fpsimd_release_task(struct task_struct *dead_task)
{
__sve_free(dead_task);
+ sme_free(dead_task);
}
#endif /* CONFIG_ARM64_SVE */
+#ifdef CONFIG_ARM64_SME
+
+/*
+ * Ensure that task->thread.sme_state is allocated and sufficiently large.
+ *
+ * This function should be used only in preparation for replacing
+ * task->thread.sme_state with new data. The memory is always zeroed
+ * here to prevent stale data from showing through: this is done in
+ * the interest of testability and predictability, the architecture
+ * guarantees that when ZA is enabled it will be zeroed.
+ */
+void sme_alloc(struct task_struct *task, bool flush)
+{
+ if (task->thread.sme_state) {
+ if (flush)
+ memset(task->thread.sme_state, 0,
+ sme_state_size(task));
+ return;
+ }
+
+ /* This could potentially be up to 64K. */
+ task->thread.sme_state =
+ kzalloc(sme_state_size(task), GFP_KERNEL);
+}
+
+static void sme_free(struct task_struct *task)
+{
+ kfree(task->thread.sme_state);
+ task->thread.sme_state = NULL;
+}
+
+void cpu_enable_sme(const struct arm64_cpu_capabilities *__always_unused p)
+{
+ /* Set priority for all PEs to architecturally defined minimum */
+ write_sysreg_s(read_sysreg_s(SYS_SMPRI_EL1) & ~SMPRI_EL1_PRIORITY_MASK,
+ SYS_SMPRI_EL1);
+
+ /* Allow SME in kernel */
+ write_sysreg(read_sysreg(CPACR_EL1) | CPACR_EL1_SMEN_EL1EN, CPACR_EL1);
+ isb();
+
+ /* Allow EL0 to access TPIDR2 */
+ write_sysreg(read_sysreg(SCTLR_EL1) | SCTLR_ELx_ENTP2, SCTLR_EL1);
+ isb();
+}
+
+void cpu_enable_sme2(const struct arm64_cpu_capabilities *__always_unused p)
+{
+ /* This must be enabled after SME */
+ BUILD_BUG_ON(ARM64_SME2 <= ARM64_SME);
+
+ /* Allow use of ZT0 */
+ write_sysreg_s(read_sysreg_s(SYS_SMCR_EL1) | SMCR_ELx_EZT0_MASK,
+ SYS_SMCR_EL1);
+}
+
+void cpu_enable_fa64(const struct arm64_cpu_capabilities *__always_unused p)
+{
+ /* This must be enabled after SME */
+ BUILD_BUG_ON(ARM64_SME_FA64 <= ARM64_SME);
+
+ /* Allow use of FA64 */
+ write_sysreg_s(read_sysreg_s(SYS_SMCR_EL1) | SMCR_ELx_FA64_MASK,
+ SYS_SMCR_EL1);
+}
+
+void __init sme_setup(void)
+{
+ struct vl_info *info = &vl_info[ARM64_VEC_SME];
+ int min_bit, max_bit;
+
+ if (!system_supports_sme())
+ return;
+
+ /*
+ * SME doesn't require any particular vector length be
+ * supported but it does require at least one. We should have
+ * disabled the feature entirely while bringing up CPUs but
+ * let's double check here. The bitmap is SVE_VQ_MAP sized for
+ * sharing with SVE.
+ */
+ WARN_ON(bitmap_empty(info->vq_map, SVE_VQ_MAX));
+
+ min_bit = find_last_bit(info->vq_map, SVE_VQ_MAX);
+ info->min_vl = sve_vl_from_vq(__bit_to_vq(min_bit));
+
+ max_bit = find_first_bit(info->vq_map, SVE_VQ_MAX);
+ info->max_vl = sve_vl_from_vq(__bit_to_vq(max_bit));
+
+ WARN_ON(info->min_vl > info->max_vl);
+
+ /*
+ * For the default VL, pick the maximum supported value <= 32
+ * (256 bits) if there is one since this is guaranteed not to
+ * grow the signal frame when in streaming mode, otherwise the
+ * minimum available VL will be used.
+ */
+ set_sme_default_vl(find_supported_vector_length(ARM64_VEC_SME, 32));
+
+ pr_info("SME: minimum available vector length %u bytes per vector\n",
+ info->min_vl);
+ pr_info("SME: maximum available vector length %u bytes per vector\n",
+ info->max_vl);
+ pr_info("SME: default vector length %u bytes per vector\n",
+ get_sme_default_vl());
+}
+
+#endif /* CONFIG_ARM64_SME */
+
+static void sve_init_regs(void)
+{
+ /*
+ * Convert the FPSIMD state to SVE, zeroing all the state that
+ * is not shared with FPSIMD. If (as is likely) the current
+ * state is live in the registers then do this there and
+ * update our metadata for the current task including
+ * disabling the trap, otherwise update our in-memory copy.
+ * We are guaranteed to not be in streaming mode, we can only
+ * take a SVE trap when not in streaming mode and we can't be
+ * in streaming mode when taking a SME trap.
+ */
+ if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) {
+ unsigned long vq_minus_one =
+ sve_vq_from_vl(task_get_sve_vl(current)) - 1;
+ sve_set_vq(vq_minus_one);
+ sve_flush_live(true, vq_minus_one);
+ fpsimd_bind_task_to_cpu();
+ } else {
+ fpsimd_to_sve(current);
+ current->thread.fp_type = FP_STATE_SVE;
+ }
+}
+
/*
* Trapped SVE access
*
* Storage is allocated for the full SVE state, the current FPSIMD
- * register contents are migrated across, and TIF_SVE is set so that
- * the SVE access trap will be disabled the next time this task
- * reaches ret_to_user.
+ * register contents are migrated across, and the access trap is
+ * disabled.
*
- * TIF_SVE should be clear on entry: otherwise, task_fpsimd_load()
+ * TIF_SVE should be clear on entry: otherwise, fpsimd_restore_current_state()
* would have disabled the SVE access trap for userspace during
* ret_to_user, making an SVE access trap impossible in that case.
*/
-void do_sve_acc(unsigned int esr, struct pt_regs *regs)
+void do_sve_acc(unsigned long esr, struct pt_regs *regs)
{
/* Even if we chose not to use SVE, the hardware could still trap: */
if (unlikely(!system_supports_sve()) || WARN_ON(is_compat_task())) {
- force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc);
+ force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc, 0);
return;
}
- sve_alloc(current);
+ sve_alloc(current, true);
+ if (!current->thread.sve_state) {
+ force_sig(SIGKILL);
+ return;
+ }
get_cpu_fpsimd_context();
- fpsimd_save();
-
- /* Force ret_to_user to reload the registers: */
- fpsimd_flush_task_state(current);
-
- fpsimd_to_sve(current);
if (test_and_set_thread_flag(TIF_SVE))
WARN_ON(1); /* SVE access shouldn't have trapped */
+ /*
+ * Even if the task can have used streaming mode we can only
+ * generate SVE access traps in normal SVE mode and
+ * transitioning out of streaming mode may discard any
+ * streaming mode state. Always clear the high bits to avoid
+ * any potential errors tracking what is properly initialised.
+ */
+ sve_init_regs();
+
+ put_cpu_fpsimd_context();
+}
+
+/*
+ * Trapped SME access
+ *
+ * Storage is allocated for the full SVE and SME state, the current
+ * FPSIMD register contents are migrated to SVE if SVE is not already
+ * active, and the access trap is disabled.
+ *
+ * TIF_SME should be clear on entry: otherwise, fpsimd_restore_current_state()
+ * would have disabled the SME access trap for userspace during
+ * ret_to_user, making an SME access trap impossible in that case.
+ */
+void do_sme_acc(unsigned long esr, struct pt_regs *regs)
+{
+ /* Even if we chose not to use SME, the hardware could still trap: */
+ if (unlikely(!system_supports_sme()) || WARN_ON(is_compat_task())) {
+ force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc, 0);
+ return;
+ }
+
+ /*
+ * If this not a trap due to SME being disabled then something
+ * is being used in the wrong mode, report as SIGILL.
+ */
+ if (ESR_ELx_ISS(esr) != ESR_ELx_SME_ISS_SME_DISABLED) {
+ force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc, 0);
+ return;
+ }
+
+ sve_alloc(current, false);
+ sme_alloc(current, true);
+ if (!current->thread.sve_state || !current->thread.sme_state) {
+ force_sig(SIGKILL);
+ return;
+ }
+
+ get_cpu_fpsimd_context();
+
+ /* With TIF_SME userspace shouldn't generate any traps */
+ if (test_and_set_thread_flag(TIF_SME))
+ WARN_ON(1);
+
+ if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) {
+ unsigned long vq_minus_one =
+ sve_vq_from_vl(task_get_sme_vl(current)) - 1;
+ sme_set_vq(vq_minus_one);
+
+ fpsimd_bind_task_to_cpu();
+ }
+
put_cpu_fpsimd_context();
}
/*
* Trapped FP/ASIMD access.
*/
-void do_fpsimd_acc(unsigned int esr, struct pt_regs *regs)
+void do_fpsimd_acc(unsigned long esr, struct pt_regs *regs)
{
- /* TODO: implement lazy context saving/restoring */
- WARN_ON(1);
+ /* Even if we chose not to use FPSIMD, the hardware could still trap: */
+ if (!system_supports_fpsimd()) {
+ force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc, 0);
+ return;
+ }
+
+ /*
+ * When FPSIMD is enabled, we should never take a trap unless something
+ * has gone very wrong.
+ */
+ BUG();
}
/*
* Raise a SIGFPE for the current process.
*/
-void do_fpsimd_exc(unsigned int esr, struct pt_regs *regs)
+void do_fpsimd_exc(unsigned long esr, struct pt_regs *regs)
{
unsigned int si_code = FPE_FLTUNK;
@@ -978,6 +1474,34 @@ void do_fpsimd_exc(unsigned int esr, struct pt_regs *regs)
current);
}
+static void fpsimd_load_kernel_state(struct task_struct *task)
+{
+ struct cpu_fp_state *last = this_cpu_ptr(&fpsimd_last_state);
+
+ /*
+ * Elide the load if this CPU holds the most recent kernel mode
+ * FPSIMD context of the current task.
+ */
+ if (last->st == &task->thread.kernel_fpsimd_state &&
+ task->thread.kernel_fpsimd_cpu == smp_processor_id())
+ return;
+
+ fpsimd_load_state(&task->thread.kernel_fpsimd_state);
+}
+
+static void fpsimd_save_kernel_state(struct task_struct *task)
+{
+ struct cpu_fp_state cpu_fp_state = {
+ .st = &task->thread.kernel_fpsimd_state,
+ .to_save = FP_STATE_FPSIMD,
+ };
+
+ fpsimd_save_state(&task->thread.kernel_fpsimd_state);
+ fpsimd_bind_state_to_cpu(&cpu_fp_state);
+
+ task->thread.kernel_fpsimd_cpu = smp_processor_id();
+}
+
void fpsimd_thread_switch(struct task_struct *next)
{
bool wrong_task, wrong_cpu;
@@ -985,29 +1509,72 @@ void fpsimd_thread_switch(struct task_struct *next)
if (!system_supports_fpsimd())
return;
- __get_cpu_fpsimd_context();
+ WARN_ON_ONCE(!irqs_disabled());
/* Save unsaved fpsimd state, if any: */
- fpsimd_save();
+ if (test_thread_flag(TIF_KERNEL_FPSTATE))
+ fpsimd_save_kernel_state(current);
+ else
+ fpsimd_save_user_state();
+
+ if (test_tsk_thread_flag(next, TIF_KERNEL_FPSTATE)) {
+ fpsimd_load_kernel_state(next);
+ set_tsk_thread_flag(next, TIF_FOREIGN_FPSTATE);
+ } else {
+ /*
+ * Fix up TIF_FOREIGN_FPSTATE to correctly describe next's
+ * state. For kernel threads, FPSIMD registers are never
+ * loaded with user mode FPSIMD state and so wrong_task and
+ * wrong_cpu will always be true.
+ */
+ wrong_task = __this_cpu_read(fpsimd_last_state.st) !=
+ &next->thread.uw.fpsimd_state;
+ wrong_cpu = next->thread.fpsimd_cpu != smp_processor_id();
+
+ update_tsk_thread_flag(next, TIF_FOREIGN_FPSTATE,
+ wrong_task || wrong_cpu);
+ }
+}
+
+static void fpsimd_flush_thread_vl(enum vec_type type)
+{
+ int vl, supported_vl;
/*
- * Fix up TIF_FOREIGN_FPSTATE to correctly describe next's
- * state. For kernel threads, FPSIMD registers are never loaded
- * and wrong_task and wrong_cpu will always be true.
+ * Reset the task vector length as required. This is where we
+ * ensure that all user tasks have a valid vector length
+ * configured: no kernel task can become a user task without
+ * an exec and hence a call to this function. By the time the
+ * first call to this function is made, all early hardware
+ * probing is complete, so __sve_default_vl should be valid.
+ * If a bug causes this to go wrong, we make some noise and
+ * try to fudge thread.sve_vl to a safe value here.
*/
- wrong_task = __this_cpu_read(fpsimd_last_state.st) !=
- &next->thread.uw.fpsimd_state;
- wrong_cpu = next->thread.fpsimd_cpu != smp_processor_id();
+ vl = task_get_vl_onexec(current, type);
+ if (!vl)
+ vl = get_default_vl(type);
+
+ if (WARN_ON(!sve_vl_valid(vl)))
+ vl = vl_info[type].min_vl;
+
+ supported_vl = find_supported_vector_length(type, vl);
+ if (WARN_ON(supported_vl != vl))
+ vl = supported_vl;
- update_tsk_thread_flag(next, TIF_FOREIGN_FPSTATE,
- wrong_task || wrong_cpu);
+ task_set_vl(current, type, vl);
- __put_cpu_fpsimd_context();
+ /*
+ * If the task is not set to inherit, ensure that the vector
+ * length will be reset by a subsequent exec:
+ */
+ if (!test_thread_flag(vec_vl_inherit_flag(type)))
+ task_set_vl_onexec(current, type, 0);
}
void fpsimd_flush_thread(void)
{
- int vl, supported_vl;
+ void *sve_state = NULL;
+ void *sme_state = NULL;
if (!system_supports_fpsimd())
return;
@@ -1020,40 +1587,30 @@ void fpsimd_flush_thread(void)
if (system_supports_sve()) {
clear_thread_flag(TIF_SVE);
- sve_free(current);
- /*
- * Reset the task vector length as required.
- * This is where we ensure that all user tasks have a valid
- * vector length configured: no kernel task can become a user
- * task without an exec and hence a call to this function.
- * By the time the first call to this function is made, all
- * early hardware probing is complete, so sve_default_vl
- * should be valid.
- * If a bug causes this to go wrong, we make some noise and
- * try to fudge thread.sve_vl to a safe value here.
- */
- vl = current->thread.sve_vl_onexec ?
- current->thread.sve_vl_onexec : sve_default_vl;
+ /* Defer kfree() while in atomic context */
+ sve_state = current->thread.sve_state;
+ current->thread.sve_state = NULL;
- if (WARN_ON(!sve_vl_valid(vl)))
- vl = SVE_VL_MIN;
+ fpsimd_flush_thread_vl(ARM64_VEC_SVE);
+ }
- supported_vl = find_supported_vector_length(vl);
- if (WARN_ON(supported_vl != vl))
- vl = supported_vl;
+ if (system_supports_sme()) {
+ clear_thread_flag(TIF_SME);
- current->thread.sve_vl = vl;
+ /* Defer kfree() while in atomic context */
+ sme_state = current->thread.sme_state;
+ current->thread.sme_state = NULL;
- /*
- * If the task is not set to inherit, ensure that the vector
- * length will be reset by a subsequent exec:
- */
- if (!test_thread_flag(TIF_SVE_VL_INHERIT))
- current->thread.sve_vl_onexec = 0;
+ fpsimd_flush_thread_vl(ARM64_VEC_SME);
+ current->thread.svcr = 0;
}
+ current->thread.fp_type = FP_STATE_FPSIMD;
+
put_cpu_fpsimd_context();
+ kfree(sve_state);
+ kfree(sme_state);
}
/*
@@ -1066,7 +1623,7 @@ void fpsimd_preserve_current_state(void)
return;
get_cpu_fpsimd_context();
- fpsimd_save();
+ fpsimd_save_user_state();
put_cpu_fpsimd_context();
}
@@ -1078,58 +1635,115 @@ void fpsimd_preserve_current_state(void)
void fpsimd_signal_preserve_current_state(void)
{
fpsimd_preserve_current_state();
- if (system_supports_sve() && test_thread_flag(TIF_SVE))
+ if (test_thread_flag(TIF_SVE))
sve_to_fpsimd(current);
}
/*
+ * Called by KVM when entering the guest.
+ */
+void fpsimd_kvm_prepare(void)
+{
+ if (!system_supports_sve())
+ return;
+
+ /*
+ * KVM does not save host SVE state since we can only enter
+ * the guest from a syscall so the ABI means that only the
+ * non-saved SVE state needs to be saved. If we have left
+ * SVE enabled for performance reasons then update the task
+ * state to be FPSIMD only.
+ */
+ get_cpu_fpsimd_context();
+
+ if (test_and_clear_thread_flag(TIF_SVE)) {
+ sve_to_fpsimd(current);
+ current->thread.fp_type = FP_STATE_FPSIMD;
+ }
+
+ put_cpu_fpsimd_context();
+}
+
+/*
* Associate current's FPSIMD context with this cpu
* The caller must have ownership of the cpu FPSIMD context before calling
* this function.
*/
-void fpsimd_bind_task_to_cpu(void)
+static void fpsimd_bind_task_to_cpu(void)
{
- struct fpsimd_last_state_struct *last =
- this_cpu_ptr(&fpsimd_last_state);
+ struct cpu_fp_state *last = this_cpu_ptr(&fpsimd_last_state);
+ WARN_ON(!system_supports_fpsimd());
last->st = &current->thread.uw.fpsimd_state;
last->sve_state = current->thread.sve_state;
- last->sve_vl = current->thread.sve_vl;
+ last->sme_state = current->thread.sme_state;
+ last->sve_vl = task_get_sve_vl(current);
+ last->sme_vl = task_get_sme_vl(current);
+ last->svcr = &current->thread.svcr;
+ last->fp_type = &current->thread.fp_type;
+ last->to_save = FP_STATE_CURRENT;
current->thread.fpsimd_cpu = smp_processor_id();
+ /*
+ * Toggle SVE and SME trapping for userspace if needed, these
+ * are serialsied by ret_to_user().
+ */
+ if (system_supports_sme()) {
+ if (test_thread_flag(TIF_SME))
+ sme_user_enable();
+ else
+ sme_user_disable();
+ }
+
if (system_supports_sve()) {
- /* Toggle SVE trapping for userspace if needed */
if (test_thread_flag(TIF_SVE))
sve_user_enable();
else
sve_user_disable();
-
- /* Serialised by exception return to user */
}
}
-void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *st, void *sve_state,
- unsigned int sve_vl)
+void fpsimd_bind_state_to_cpu(struct cpu_fp_state *state)
{
- struct fpsimd_last_state_struct *last =
- this_cpu_ptr(&fpsimd_last_state);
+ struct cpu_fp_state *last = this_cpu_ptr(&fpsimd_last_state);
+ WARN_ON(!system_supports_fpsimd());
WARN_ON(!in_softirq() && !irqs_disabled());
- last->st = st;
- last->sve_state = sve_state;
- last->sve_vl = sve_vl;
+ *last = *state;
}
/*
* Load the userland FPSIMD state of 'current' from memory, but only if the
* FPSIMD state already held in the registers is /not/ the most recent FPSIMD
- * state of 'current'
+ * state of 'current'. This is called when we are preparing to return to
+ * userspace to ensure that userspace sees a good register state.
*/
void fpsimd_restore_current_state(void)
{
- if (!system_supports_fpsimd())
+ /*
+ * TIF_FOREIGN_FPSTATE is set on the init task and copied by
+ * arch_dup_task_struct() regardless of whether FP/SIMD is detected.
+ * Thus user threads can have this set even when FP/SIMD hasn't been
+ * detected.
+ *
+ * When FP/SIMD is detected, begin_new_exec() will set
+ * TIF_FOREIGN_FPSTATE via flush_thread() -> fpsimd_flush_thread(),
+ * and fpsimd_thread_switch() will set TIF_FOREIGN_FPSTATE when
+ * switching tasks. We detect FP/SIMD before we exec the first user
+ * process, ensuring this has TIF_FOREIGN_FPSTATE set and
+ * do_notify_resume() will call fpsimd_restore_current_state() to
+ * install the user FP/SIMD context.
+ *
+ * When FP/SIMD is not detected, nothing else will clear or set
+ * TIF_FOREIGN_FPSTATE prior to the first return to userspace, and
+ * we must clear TIF_FOREIGN_FPSTATE to avoid do_notify_resume()
+ * looping forever calling fpsimd_restore_current_state().
+ */
+ if (!system_supports_fpsimd()) {
+ clear_thread_flag(TIF_FOREIGN_FPSTATE);
return;
+ }
get_cpu_fpsimd_context();
@@ -1144,17 +1758,19 @@ void fpsimd_restore_current_state(void)
/*
* Load an updated userland FPSIMD state for 'current' from memory and set the
* flag that indicates that the FPSIMD register contents are the most recent
- * FPSIMD state of 'current'
+ * FPSIMD state of 'current'. This is used by the signal code to restore the
+ * register state when returning from a signal handler in FPSIMD only cases,
+ * any SVE context will be discarded.
*/
void fpsimd_update_current_state(struct user_fpsimd_state const *state)
{
- if (!system_supports_fpsimd())
+ if (WARN_ON(!system_supports_fpsimd()))
return;
get_cpu_fpsimd_context();
current->thread.uw.fpsimd_state = *state;
- if (system_supports_sve() && test_thread_flag(TIF_SVE))
+ if (test_thread_flag(TIF_SVE))
fpsimd_to_sve(current);
task_fpsimd_load();
@@ -1179,7 +1795,13 @@ void fpsimd_update_current_state(struct user_fpsimd_state const *state)
void fpsimd_flush_task_state(struct task_struct *t)
{
t->thread.fpsimd_cpu = NR_CPUS;
-
+ /*
+ * If we don't support fpsimd, bail out after we have
+ * reset the fpsimd_cpu for this task and clear the
+ * FPSTATE.
+ */
+ if (!system_supports_fpsimd())
+ return;
barrier();
set_tsk_thread_flag(t, TIF_FOREIGN_FPSTATE);
@@ -1193,7 +1815,17 @@ void fpsimd_flush_task_state(struct task_struct *t)
*/
static void fpsimd_flush_cpu_state(void)
{
+ WARN_ON(!system_supports_fpsimd());
__this_cpu_write(fpsimd_last_state.st, NULL);
+
+ /*
+ * Leaving streaming mode enabled will cause issues for any kernel
+ * NEON and leaving streaming mode or ZA enabled may increase power
+ * consumption.
+ */
+ if (system_supports_sme())
+ sme_smstop();
+
set_thread_flag(TIF_FOREIGN_FPSTATE);
}
@@ -1203,11 +1835,15 @@ static void fpsimd_flush_cpu_state(void)
*/
void fpsimd_save_and_flush_cpu_state(void)
{
+ unsigned long flags;
+
+ if (!system_supports_fpsimd())
+ return;
WARN_ON(preemptible());
- __get_cpu_fpsimd_context();
- fpsimd_save();
+ local_irq_save(flags);
+ fpsimd_save_user_state();
fpsimd_flush_cpu_state();
- __put_cpu_fpsimd_context();
+ local_irq_restore(flags);
}
#ifdef CONFIG_KERNEL_MODE_NEON
@@ -1239,12 +1875,39 @@ void kernel_neon_begin(void)
get_cpu_fpsimd_context();
/* Save unsaved fpsimd state, if any: */
- fpsimd_save();
+ if (test_thread_flag(TIF_KERNEL_FPSTATE)) {
+ BUG_ON(IS_ENABLED(CONFIG_PREEMPT_RT) || !in_serving_softirq());
+ fpsimd_save_kernel_state(current);
+ } else {
+ fpsimd_save_user_state();
+
+ /*
+ * Set the thread flag so that the kernel mode FPSIMD state
+ * will be context switched along with the rest of the task
+ * state.
+ *
+ * On non-PREEMPT_RT, softirqs may interrupt task level kernel
+ * mode FPSIMD, but the task will not be preemptible so setting
+ * TIF_KERNEL_FPSTATE for those would be both wrong (as it
+ * would mark the task context FPSIMD state as requiring a
+ * context switch) and unnecessary.
+ *
+ * On PREEMPT_RT, softirqs are serviced from a separate thread,
+ * which is scheduled as usual, and this guarantees that these
+ * softirqs are not interrupting use of the FPSIMD in kernel
+ * mode in task context. So in this case, setting the flag here
+ * is always appropriate.
+ */
+ if (IS_ENABLED(CONFIG_PREEMPT_RT) || !in_serving_softirq())
+ set_thread_flag(TIF_KERNEL_FPSTATE);
+ }
/* Invalidate any task state remaining in the fpsimd regs: */
fpsimd_flush_cpu_state();
+
+ put_cpu_fpsimd_context();
}
-EXPORT_SYMBOL(kernel_neon_begin);
+EXPORT_SYMBOL_GPL(kernel_neon_begin);
/*
* kernel_neon_end(): give the CPU FPSIMD registers back to the current task
@@ -1260,15 +1923,25 @@ void kernel_neon_end(void)
if (!system_supports_fpsimd())
return;
- put_cpu_fpsimd_context();
+ /*
+ * If we are returning from a nested use of kernel mode FPSIMD, restore
+ * the task context kernel mode FPSIMD state. This can only happen when
+ * running in softirq context on non-PREEMPT_RT.
+ */
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT) && in_serving_softirq() &&
+ test_thread_flag(TIF_KERNEL_FPSTATE))
+ fpsimd_load_kernel_state(current);
+ else
+ clear_thread_flag(TIF_KERNEL_FPSTATE);
}
-EXPORT_SYMBOL(kernel_neon_end);
+EXPORT_SYMBOL_GPL(kernel_neon_end);
#ifdef CONFIG_EFI
static DEFINE_PER_CPU(struct user_fpsimd_state, efi_fpsimd_state);
static DEFINE_PER_CPU(bool, efi_fpsimd_state_used);
static DEFINE_PER_CPU(bool, efi_sve_state_used);
+static DEFINE_PER_CPU(bool, efi_sm_state);
/*
* EFI runtime services support functions
@@ -1303,11 +1976,33 @@ void __efi_fpsimd_begin(void)
*/
if (system_supports_sve() && likely(efi_sve_state)) {
char *sve_state = this_cpu_ptr(efi_sve_state);
+ bool ffr = true;
+ u64 svcr;
__this_cpu_write(efi_sve_state_used, true);
- sve_save_state(sve_state + sve_ffr_offset(sve_max_vl),
- &this_cpu_ptr(&efi_fpsimd_state)->fpsr);
+ if (system_supports_sme()) {
+ svcr = read_sysreg_s(SYS_SVCR);
+
+ __this_cpu_write(efi_sm_state,
+ svcr & SVCR_SM_MASK);
+
+ /*
+ * Unless we have FA64 FFR does not
+ * exist in streaming mode.
+ */
+ if (!system_supports_fa64())
+ ffr = !(svcr & SVCR_SM_MASK);
+ }
+
+ sve_save_state(sve_state + sve_ffr_offset(sve_max_vl()),
+ &this_cpu_ptr(&efi_fpsimd_state)->fpsr,
+ ffr);
+
+ if (system_supports_sme())
+ sysreg_clear_set_s(SYS_SVCR,
+ SVCR_SM_MASK, 0);
+
} else {
fpsimd_save_state(this_cpu_ptr(&efi_fpsimd_state));
}
@@ -1330,10 +2025,31 @@ void __efi_fpsimd_end(void)
if (system_supports_sve() &&
likely(__this_cpu_read(efi_sve_state_used))) {
char const *sve_state = this_cpu_ptr(efi_sve_state);
+ bool ffr = true;
+
+ /*
+ * Restore streaming mode; EFI calls are
+ * normal function calls so should not return in
+ * streaming mode.
+ */
+ if (system_supports_sme()) {
+ if (__this_cpu_read(efi_sm_state)) {
+ sysreg_clear_set_s(SYS_SVCR,
+ 0,
+ SVCR_SM_MASK);
+
+ /*
+ * Unless we have FA64 FFR does not
+ * exist in streaming mode.
+ */
+ if (!system_supports_fa64())
+ ffr = false;
+ }
+ }
- sve_load_state(sve_state + sve_ffr_offset(sve_max_vl),
+ sve_load_state(sve_state + sve_ffr_offset(sve_max_vl()),
&this_cpu_ptr(&efi_fpsimd_state)->fpsr,
- sve_vq_from_vl(sve_get_vl()) - 1);
+ ffr);
__this_cpu_write(efi_sve_state_used, false);
} else {
@@ -1393,6 +2109,13 @@ static inline void fpsimd_hotplug_init(void)
static inline void fpsimd_hotplug_init(void) { }
#endif
+void cpu_enable_fpsimd(const struct arm64_cpu_capabilities *__always_unused p)
+{
+ unsigned long enable = CPACR_EL1_FPEN_EL1EN | CPACR_EL1_FPEN_EL0EN;
+ write_sysreg(read_sysreg(CPACR_EL1) | enable, CPACR_EL1);
+ isb();
+}
+
/*
* FP/SIMD support code initialisation.
*/
@@ -1408,6 +2131,10 @@ static int __init fpsimd_init(void)
if (!cpu_have_named_feature(ASIMD))
pr_notice("Advanced SIMD is not implemented\n");
- return sve_sysctl_init();
+
+ sve_sysctl_init();
+ sme_sysctl_init();
+
+ return 0;
}
core_initcall(fpsimd_init);
diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c
index 8618faa82e6d..a650f5e11fc5 100644
--- a/arch/arm64/kernel/ftrace.c
+++ b/arch/arm64/kernel/ftrace.c
@@ -15,8 +15,134 @@
#include <asm/debug-monitors.h>
#include <asm/ftrace.h>
#include <asm/insn.h>
+#include <asm/patching.h>
+
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_ARGS
+struct fregs_offset {
+ const char *name;
+ int offset;
+};
+
+#define FREGS_OFFSET(n, field) \
+{ \
+ .name = n, \
+ .offset = offsetof(struct ftrace_regs, field), \
+}
+
+static const struct fregs_offset fregs_offsets[] = {
+ FREGS_OFFSET("x0", regs[0]),
+ FREGS_OFFSET("x1", regs[1]),
+ FREGS_OFFSET("x2", regs[2]),
+ FREGS_OFFSET("x3", regs[3]),
+ FREGS_OFFSET("x4", regs[4]),
+ FREGS_OFFSET("x5", regs[5]),
+ FREGS_OFFSET("x6", regs[6]),
+ FREGS_OFFSET("x7", regs[7]),
+ FREGS_OFFSET("x8", regs[8]),
+
+ FREGS_OFFSET("x29", fp),
+ FREGS_OFFSET("x30", lr),
+ FREGS_OFFSET("lr", lr),
+
+ FREGS_OFFSET("sp", sp),
+ FREGS_OFFSET("pc", pc),
+};
+
+int ftrace_regs_query_register_offset(const char *name)
+{
+ for (int i = 0; i < ARRAY_SIZE(fregs_offsets); i++) {
+ const struct fregs_offset *roff = &fregs_offsets[i];
+ if (!strcmp(roff->name, name))
+ return roff->offset;
+ }
+
+ return -EINVAL;
+}
+#endif
+
+unsigned long ftrace_call_adjust(unsigned long addr)
+{
+ /*
+ * When using mcount, addr is the address of the mcount call
+ * instruction, and no adjustment is necessary.
+ */
+ if (!IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_ARGS))
+ return addr;
+
+ /*
+ * When using patchable-function-entry without pre-function NOPS, addr
+ * is the address of the first NOP after the function entry point.
+ *
+ * The compiler has either generated:
+ *
+ * addr+00: func: NOP // To be patched to MOV X9, LR
+ * addr+04: NOP // To be patched to BL <caller>
+ *
+ * Or:
+ *
+ * addr-04: BTI C
+ * addr+00: func: NOP // To be patched to MOV X9, LR
+ * addr+04: NOP // To be patched to BL <caller>
+ *
+ * We must adjust addr to the address of the NOP which will be patched
+ * to `BL <caller>`, which is at `addr + 4` bytes in either case.
+ *
+ */
+ if (!IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS))
+ return addr + AARCH64_INSN_SIZE;
+
+ /*
+ * When using patchable-function-entry with pre-function NOPs, addr is
+ * the address of the first pre-function NOP.
+ *
+ * Starting from an 8-byte aligned base, the compiler has either
+ * generated:
+ *
+ * addr+00: NOP // Literal (first 32 bits)
+ * addr+04: NOP // Literal (last 32 bits)
+ * addr+08: func: NOP // To be patched to MOV X9, LR
+ * addr+12: NOP // To be patched to BL <caller>
+ *
+ * Or:
+ *
+ * addr+00: NOP // Literal (first 32 bits)
+ * addr+04: NOP // Literal (last 32 bits)
+ * addr+08: func: BTI C
+ * addr+12: NOP // To be patched to MOV X9, LR
+ * addr+16: NOP // To be patched to BL <caller>
+ *
+ * We must adjust addr to the address of the NOP which will be patched
+ * to `BL <caller>`, which is at either addr+12 or addr+16 depending on
+ * whether there is a BTI.
+ */
+
+ if (!IS_ALIGNED(addr, sizeof(unsigned long))) {
+ WARN_RATELIMIT(1, "Misaligned patch-site %pS\n",
+ (void *)(addr + 8));
+ return 0;
+ }
+
+ /* Skip the NOPs placed before the function entry point */
+ addr += 2 * AARCH64_INSN_SIZE;
+
+ /* Skip any BTI */
+ if (IS_ENABLED(CONFIG_ARM64_BTI_KERNEL)) {
+ u32 insn = le32_to_cpu(*(__le32 *)addr);
+
+ if (aarch64_insn_is_bti(insn)) {
+ addr += AARCH64_INSN_SIZE;
+ } else if (insn != aarch64_insn_gen_nop()) {
+ WARN_RATELIMIT(1, "unexpected insn in patch-site %pS: 0x%08x\n",
+ (void *)addr, insn);
+ }
+ }
+
+ /* Skip the first NOP after function entry */
+ addr += AARCH64_INSN_SIZE;
+
+ return addr;
+}
-#ifdef CONFIG_DYNAMIC_FTRACE
/*
* Replace a single instruction, which may be a branch or NOP.
* If @validate == true, a replaced instruction is checked against 'old'.
@@ -55,80 +181,181 @@ int ftrace_update_ftrace_func(ftrace_func_t func)
unsigned long pc;
u32 new;
- pc = (unsigned long)&ftrace_call;
+ /*
+ * When using CALL_OPS, the function to call is associated with the
+ * call site, and we don't have a global function pointer to update.
+ */
+ if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS))
+ return 0;
+
+ pc = (unsigned long)ftrace_call;
new = aarch64_insn_gen_branch_imm(pc, (unsigned long)func,
AARCH64_INSN_BRANCH_LINK);
return ftrace_modify_code(pc, 0, new, false);
}
-static struct plt_entry *get_ftrace_plt(struct module *mod, unsigned long addr)
+static struct plt_entry *get_ftrace_plt(struct module *mod)
{
-#ifdef CONFIG_ARM64_MODULE_PLTS
+#ifdef CONFIG_MODULES
struct plt_entry *plt = mod->arch.ftrace_trampolines;
- if (addr == FTRACE_ADDR)
- return &plt[FTRACE_PLT_IDX];
- if (addr == FTRACE_REGS_ADDR && IS_ENABLED(CONFIG_FTRACE_WITH_REGS))
- return &plt[FTRACE_REGS_PLT_IDX];
-#endif
+ return &plt[FTRACE_PLT_IDX];
+#else
return NULL;
+#endif
+}
+
+static bool reachable_by_bl(unsigned long addr, unsigned long pc)
+{
+ long offset = (long)addr - (long)pc;
+
+ return offset >= -SZ_128M && offset < SZ_128M;
}
/*
- * Turn on the call to ftrace_caller() in instrumented function
+ * Find the address the callsite must branch to in order to reach '*addr'.
+ *
+ * Due to the limited range of 'BL' instructions, modules may be placed too far
+ * away to branch directly and must use a PLT.
+ *
+ * Returns true when '*addr' contains a reachable target address, or has been
+ * modified to contain a PLT address. Returns false otherwise.
*/
-int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
+static bool ftrace_find_callable_addr(struct dyn_ftrace *rec,
+ struct module *mod,
+ unsigned long *addr)
{
unsigned long pc = rec->ip;
- u32 old, new;
- long offset = (long)pc - (long)addr;
+ struct plt_entry *plt;
- if (offset < -SZ_128M || offset >= SZ_128M) {
- struct module *mod;
- struct plt_entry *plt;
+ /*
+ * If a custom trampoline is unreachable, rely on the ftrace_caller
+ * trampoline which knows how to indirectly reach that trampoline
+ * through ops->direct_call.
+ */
+ if (*addr != FTRACE_ADDR && !reachable_by_bl(*addr, pc))
+ *addr = FTRACE_ADDR;
- if (!IS_ENABLED(CONFIG_ARM64_MODULE_PLTS))
- return -EINVAL;
+ /*
+ * When the target is within range of the 'BL' instruction, use 'addr'
+ * as-is and branch to that directly.
+ */
+ if (reachable_by_bl(*addr, pc))
+ return true;
- /*
- * On kernels that support module PLTs, the offset between the
- * branch instruction and its target may legally exceed the
- * range of an ordinary relative 'bl' opcode. In this case, we
- * need to branch via a trampoline in the module.
- *
- * NOTE: __module_text_address() must be called with preemption
- * disabled, but we can rely on ftrace_lock to ensure that 'mod'
- * retains its validity throughout the remainder of this code.
- */
+ /*
+ * When the target is outside of the range of a 'BL' instruction, we
+ * must use a PLT to reach it. We can only place PLTs for modules, and
+ * only when module PLT support is built-in.
+ */
+ if (!IS_ENABLED(CONFIG_MODULES))
+ return false;
+
+ /*
+ * 'mod' is only set at module load time, but if we end up
+ * dealing with an out-of-range condition, we can assume it
+ * is due to a module being loaded far away from the kernel.
+ *
+ * NOTE: __module_text_address() must be called with preemption
+ * disabled, but we can rely on ftrace_lock to ensure that 'mod'
+ * retains its validity throughout the remainder of this code.
+ */
+ if (!mod) {
preempt_disable();
mod = __module_text_address(pc);
preempt_enable();
+ }
- if (WARN_ON(!mod))
- return -EINVAL;
+ if (WARN_ON(!mod))
+ return false;
- plt = get_ftrace_plt(mod, addr);
- if (!plt) {
- pr_err("ftrace: no module PLT for %ps\n", (void *)addr);
- return -EINVAL;
- }
+ plt = get_ftrace_plt(mod);
+ if (!plt) {
+ pr_err("ftrace: no module PLT for %ps\n", (void *)*addr);
+ return false;
+ }
+
+ *addr = (unsigned long)plt;
+ return true;
+}
- addr = (unsigned long)plt;
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS
+static const struct ftrace_ops *arm64_rec_get_ops(struct dyn_ftrace *rec)
+{
+ const struct ftrace_ops *ops = NULL;
+
+ if (rec->flags & FTRACE_FL_CALL_OPS_EN) {
+ ops = ftrace_find_unique_ops(rec);
+ WARN_ON_ONCE(!ops);
}
+ if (!ops)
+ ops = &ftrace_list_ops;
+
+ return ops;
+}
+
+static int ftrace_rec_set_ops(const struct dyn_ftrace *rec,
+ const struct ftrace_ops *ops)
+{
+ unsigned long literal = ALIGN_DOWN(rec->ip - 12, 8);
+ return aarch64_insn_write_literal_u64((void *)literal,
+ (unsigned long)ops);
+}
+
+static int ftrace_rec_set_nop_ops(struct dyn_ftrace *rec)
+{
+ return ftrace_rec_set_ops(rec, &ftrace_nop_ops);
+}
+
+static int ftrace_rec_update_ops(struct dyn_ftrace *rec)
+{
+ return ftrace_rec_set_ops(rec, arm64_rec_get_ops(rec));
+}
+#else
+static int ftrace_rec_set_nop_ops(struct dyn_ftrace *rec) { return 0; }
+static int ftrace_rec_update_ops(struct dyn_ftrace *rec) { return 0; }
+#endif
+
+/*
+ * Turn on the call to ftrace_caller() in instrumented function
+ */
+int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
+{
+ unsigned long pc = rec->ip;
+ u32 old, new;
+ int ret;
+
+ ret = ftrace_rec_update_ops(rec);
+ if (ret)
+ return ret;
+
+ if (!ftrace_find_callable_addr(rec, NULL, &addr))
+ return -EINVAL;
+
old = aarch64_insn_gen_nop();
new = aarch64_insn_gen_branch_imm(pc, addr, AARCH64_INSN_BRANCH_LINK);
return ftrace_modify_code(pc, old, new, true);
}
-#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS
int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
- unsigned long addr)
+ unsigned long addr)
{
unsigned long pc = rec->ip;
u32 old, new;
+ int ret;
+
+ ret = ftrace_rec_set_ops(rec, arm64_rec_get_ops(rec));
+ if (ret)
+ return ret;
+
+ if (!ftrace_find_callable_addr(rec, NULL, &old_addr))
+ return -EINVAL;
+ if (!ftrace_find_callable_addr(rec, NULL, &addr))
+ return -EINVAL;
old = aarch64_insn_gen_branch_imm(pc, old_addr,
AARCH64_INSN_BRANCH_LINK);
@@ -136,7 +363,9 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
return ftrace_modify_code(pc, old, new, true);
}
+#endif
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_ARGS
/*
* The compiler has inserted two NOPs before the regular function prologue.
* All instrumented functions follow the AAPCS, so x0-x8 and x19-x30 are live,
@@ -152,7 +381,7 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
* | NOP | MOV X9, LR | MOV X9, LR |
* | NOP | NOP | BL <entry> |
*
- * The LR value will be recovered by ftrace_regs_entry, and restored into LR
+ * The LR value will be recovered by ftrace_caller, and restored into LR
* before returning to the regular function prologue. When a function is not
* being traced, the MOV is not harmful given x9 is not live per the AAPCS.
*
@@ -163,6 +392,11 @@ int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec)
{
unsigned long pc = rec->ip - AARCH64_INSN_SIZE;
u32 old, new;
+ int ret;
+
+ ret = ftrace_rec_set_nop_ops(rec);
+ if (ret)
+ return ret;
old = aarch64_insn_gen_nop();
new = aarch64_insn_gen_move_reg(AARCH64_INSN_REG_9,
@@ -179,54 +413,33 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec,
unsigned long addr)
{
unsigned long pc = rec->ip;
- bool validate = true;
u32 old = 0, new;
- long offset = (long)pc - (long)addr;
-
- if (offset < -SZ_128M || offset >= SZ_128M) {
- u32 replaced;
+ int ret;
- if (!IS_ENABLED(CONFIG_ARM64_MODULE_PLTS))
- return -EINVAL;
+ new = aarch64_insn_gen_nop();
- /*
- * 'mod' is only set at module load time, but if we end up
- * dealing with an out-of-range condition, we can assume it
- * is due to a module being loaded far away from the kernel.
- */
- if (!mod) {
- preempt_disable();
- mod = __module_text_address(pc);
- preempt_enable();
-
- if (WARN_ON(!mod))
- return -EINVAL;
- }
+ ret = ftrace_rec_set_nop_ops(rec);
+ if (ret)
+ return ret;
- /*
- * The instruction we are about to patch may be a branch and
- * link instruction that was redirected via a PLT entry. In
- * this case, the normal validation will fail, but we can at
- * least check that we are dealing with a branch and link
- * instruction that points into the right module.
- */
- if (aarch64_insn_read((void *)pc, &replaced))
- return -EFAULT;
-
- if (!aarch64_insn_is_bl(replaced) ||
- !within_module(pc + aarch64_get_branch_offset(replaced),
- mod))
- return -EINVAL;
+ /*
+ * When using mcount, callsites in modules may have been initalized to
+ * call an arbitrary module PLT (which redirects to the _mcount stub)
+ * rather than the ftrace PLT we'll use at runtime (which redirects to
+ * the ftrace trampoline). We can ignore the old PLT when initializing
+ * the callsite.
+ *
+ * Note: 'mod' is only set at module load time.
+ */
+ if (!IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_ARGS) && mod)
+ return aarch64_insn_patch_text_nosync((void *)pc, new);
- validate = false;
- } else {
- old = aarch64_insn_gen_branch_imm(pc, addr,
- AARCH64_INSN_BRANCH_LINK);
- }
+ if (!ftrace_find_callable_addr(rec, mod, &addr))
+ return -EINVAL;
- new = aarch64_insn_gen_nop();
+ old = aarch64_insn_gen_branch_imm(pc, addr, AARCH64_INSN_BRANCH_LINK);
- return ftrace_modify_code(pc, old, new, validate);
+ return ftrace_modify_code(pc, old, new, true);
}
void arch_ftrace_update_code(int command)
@@ -235,20 +448,12 @@ void arch_ftrace_update_code(int command)
ftrace_modify_all_code(command);
}
-int __init ftrace_dyn_arch_init(void)
-{
- return 0;
-}
-#endif /* CONFIG_DYNAMIC_FTRACE */
-
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
/*
* function_graph tracer expects ftrace_return_to_handler() to be called
* on the way back to parent. For this purpose, this function is called
* in _mcount() or ftrace_caller() to replace return address (*parent) on
* the call stack to return_to_handler.
- *
- * Note that @frame_pointer is used only for sanity check later.
*/
void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent,
unsigned long frame_pointer)
@@ -266,11 +471,19 @@ void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent,
*/
old = *parent;
- if (!function_graph_enter(old, self_addr, frame_pointer, NULL))
+ if (!function_graph_enter(old, self_addr, frame_pointer,
+ (void *)frame_pointer)) {
*parent = return_hooker;
+ }
}
-#ifdef CONFIG_DYNAMIC_FTRACE
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_ARGS
+void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
+ struct ftrace_ops *op, struct ftrace_regs *fregs)
+{
+ prepare_ftrace_return(ip, &fregs->lr, fregs->fp);
+}
+#else
/*
* Turn on/off the call to ftrace_graph_caller() in ftrace_caller()
* depending on @enable.
@@ -300,5 +513,5 @@ int ftrace_disable_ftrace_graph_caller(void)
{
return ftrace_modify_graph_caller(false);
}
-#endif /* CONFIG_DYNAMIC_FTRACE */
+#endif /* CONFIG_DYNAMIC_FTRACE_WITH_ARGS */
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 989b1944cb71..cab7f91949d8 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -11,22 +11,25 @@
#include <linux/linkage.h>
#include <linux/init.h>
-#include <linux/irqchip/arm-gic-v3.h>
+#include <linux/pgtable.h>
+#include <asm/asm_pointer_auth.h>
#include <asm/assembler.h>
#include <asm/boot.h>
+#include <asm/bug.h>
#include <asm/ptrace.h>
#include <asm/asm-offsets.h>
#include <asm/cache.h>
#include <asm/cputype.h>
+#include <asm/el2_setup.h>
#include <asm/elf.h>
#include <asm/image.h>
#include <asm/kernel-pgtable.h>
#include <asm/kvm_arm.h>
#include <asm/memory.h>
#include <asm/pgtable-hwdef.h>
-#include <asm/pgtable.h>
#include <asm/page.h>
+#include <asm/scs.h>
#include <asm/smp.h>
#include <asm/sysreg.h>
#include <asm/thread_info.h>
@@ -34,14 +37,8 @@
#include "efi-header.S"
-#define __PHYS_OFFSET (KERNEL_START - TEXT_OFFSET)
-
-#if (TEXT_OFFSET & 0xfff) != 0
-#error TEXT_OFFSET must be at least 4KB aligned
-#elif (PAGE_OFFSET & 0x1fffff) != 0
+#if (PAGE_OFFSET & 0x1fffff) != 0
#error PAGE_OFFSET must be at least 2MB aligned
-#elif TEXT_OFFSET > 0x1fffff
-#error TEXT_OFFSET must be less than 2MB
#endif
/*
@@ -52,115 +49,144 @@
* MMU = off, D-cache = off, I-cache = on or off,
* x0 = physical address to the FDT blob.
*
- * This code is mostly position independent so you call this at
- * __pa(PAGE_OFFSET + TEXT_OFFSET).
- *
* Note that the callee-saved registers are used for storing variables
* that are useful before the MMU is enabled. The allocations are described
* in the entry routines.
*/
__HEAD
-_head:
/*
* DO NOT MODIFY. Image header expected by Linux boot-loaders.
*/
-#ifdef CONFIG_EFI
- /*
- * This add instruction has no meaningful effect except that
- * its opcode forms the magic "MZ" signature required by UEFI.
- */
- add x13, x18, #0x16
- b stext
-#else
- b stext // branch to kernel start, magic
- .long 0 // reserved
-#endif
- le64sym _kernel_offset_le // Image load offset from start of RAM, little-endian
+ efi_signature_nop // special NOP to identity as PE/COFF executable
+ b primary_entry // branch to kernel start, magic
+ .quad 0 // Image load offset from start of RAM, little-endian
le64sym _kernel_size_le // Effective size of kernel image, little-endian
le64sym _kernel_flags_le // Informative flags, little-endian
.quad 0 // reserved
.quad 0 // reserved
.quad 0 // reserved
.ascii ARM64_IMAGE_MAGIC // Magic number
-#ifdef CONFIG_EFI
- .long pe_header - _head // Offset to the PE header.
+ .long .Lpe_header_offset // Offset to the PE header.
-pe_header:
__EFI_PE_HEADER
-#else
- .long 0 // reserved
-#endif
- __INIT
+ .section ".idmap.text","a"
/*
* The following callee saved general purpose registers are used on the
* primary lowlevel boot path:
*
* Register Scope Purpose
- * x21 stext() .. start_kernel() FDT pointer passed at boot in x0
- * x23 stext() .. start_kernel() physical misalignment/KASLR offset
- * x28 __create_page_tables() callee preserved temp register
- * x19/x20 __primary_switch() callee preserved temp registers
- * x24 __primary_switch() .. relocate_kernel()
- * current RELR displacement
+ * x19 primary_entry() .. start_kernel() whether we entered with the MMU on
+ * x20 primary_entry() .. __primary_switch() CPU boot mode
+ * x21 primary_entry() .. start_kernel() FDT pointer passed at boot in x0
+ * x22 create_idmap() .. start_kernel() ID map VA of the DT blob
+ * x23 primary_entry() .. start_kernel() physical misalignment/KASLR offset
+ * x24 __primary_switch() linear map KASLR seed
+ * x25 primary_entry() .. start_kernel() supported VA size
+ * x28 create_idmap() callee preserved temp register
*/
-ENTRY(stext)
+SYM_CODE_START(primary_entry)
+ bl record_mmu_state
bl preserve_boot_args
- bl el2_setup // Drop to EL1, w0=cpu_boot_mode
- adrp x23, __PHYS_OFFSET
- and x23, x23, MIN_KIMG_ALIGN - 1 // KASLR offset, defaults to 0
- bl set_cpu_boot_mode_flag
- bl __create_page_tables
+ bl create_idmap
+
+ /*
+ * If we entered with the MMU and caches on, clean the ID mapped part
+ * of the primary boot code to the PoC so we can safely execute it with
+ * the MMU off.
+ */
+ cbz x19, 0f
+ adrp x0, __idmap_text_start
+ adr_l x1, __idmap_text_end
+ adr_l x2, dcache_clean_poc
+ blr x2
+0: mov x0, x19
+ bl init_kernel_el // w0=cpu_boot_mode
+ mov x20, x0
+
/*
* The following calls CPU setup code, see arch/arm64/mm/proc.S for
* details.
* On return, the CPU will be ready for the MMU to be turned on and
* the TCR will have been set.
*/
+#if VA_BITS > 48
+ mrs_s x0, SYS_ID_AA64MMFR2_EL1
+ tst x0, ID_AA64MMFR2_EL1_VARange_MASK
+ mov x0, #VA_BITS
+ mov x25, #VA_BITS_MIN
+ csel x25, x25, x0, eq
+ mov x0, x25
+#endif
bl __cpu_setup // initialise processor
b __primary_switch
-ENDPROC(stext)
+SYM_CODE_END(primary_entry)
+
+ __INIT
+SYM_CODE_START_LOCAL(record_mmu_state)
+ mrs x19, CurrentEL
+ cmp x19, #CurrentEL_EL2
+ mrs x19, sctlr_el1
+ b.ne 0f
+ mrs x19, sctlr_el2
+0:
+CPU_LE( tbnz x19, #SCTLR_ELx_EE_SHIFT, 1f )
+CPU_BE( tbz x19, #SCTLR_ELx_EE_SHIFT, 1f )
+ tst x19, #SCTLR_ELx_C // Z := (C == 0)
+ and x19, x19, #SCTLR_ELx_M // isolate M bit
+ csel x19, xzr, x19, eq // clear x19 if Z
+ ret
+
+ /*
+ * Set the correct endianness early so all memory accesses issued
+ * before init_kernel_el() occur in the correct byte order. Note that
+ * this means the MMU must be disabled, or the active ID map will end
+ * up getting interpreted with the wrong byte order.
+ */
+1: eor x19, x19, #SCTLR_ELx_EE
+ bic x19, x19, #SCTLR_ELx_M
+ b.ne 2f
+ pre_disable_mmu_workaround
+ msr sctlr_el2, x19
+ b 3f
+2: pre_disable_mmu_workaround
+ msr sctlr_el1, x19
+3: isb
+ mov x19, xzr
+ ret
+SYM_CODE_END(record_mmu_state)
/*
* Preserve the arguments passed by the bootloader in x0 .. x3
*/
-preserve_boot_args:
+SYM_CODE_START_LOCAL(preserve_boot_args)
mov x21, x0 // x21=FDT
adr_l x0, boot_args // record the contents of
stp x21, x1, [x0] // x0 .. x3 at kernel entry
stp x2, x3, [x0, #16]
+ cbnz x19, 0f // skip cache invalidation if MMU is on
dmb sy // needed before dc ivac with
// MMU off
- mov x1, #0x20 // 4 x 8 bytes
- b __inval_dcache_area // tail call
-ENDPROC(preserve_boot_args)
+ add x1, x0, #0x20 // 4 x 8 bytes
+ b dcache_inval_poc // tail call
+0: str_l x19, mmu_enabled_at_boot, x0
+ ret
+SYM_CODE_END(preserve_boot_args)
-/*
- * Macro to create a table entry to the next page.
- *
- * tbl: page table address
- * virt: virtual address
- * shift: #imm page table shift
- * ptrs: #imm pointers per table page
- *
- * Preserves: virt
- * Corrupts: ptrs, tmp1, tmp2
- * Returns: tbl -> next level table page address
- */
- .macro create_table_entry, tbl, virt, shift, ptrs, tmp1, tmp2
- add \tmp1, \tbl, #PAGE_SIZE
- phys_to_pte \tmp2, \tmp1
- orr \tmp2, \tmp2, #PMD_TYPE_TABLE // address of next table and entry type
- lsr \tmp1, \virt, #\shift
- sub \ptrs, \ptrs, #1
- and \tmp1, \tmp1, \ptrs // table index
- str \tmp2, [\tbl, \tmp1, lsl #3]
- add \tbl, \tbl, #PAGE_SIZE // next level table page
- .endm
+SYM_FUNC_START_LOCAL(clear_page_tables)
+ /*
+ * Clear the init page tables.
+ */
+ adrp x0, init_pg_dir
+ adrp x1, init_pg_end
+ sub x2, x1, x0
+ mov x1, xzr
+ b __pi_memset // tail call
+SYM_FUNC_END(clear_page_tables)
/*
* Macro to populate page table entries, these entries can be pointers to the next level
@@ -194,33 +220,22 @@ ENDPROC(preserve_boot_args)
* to be composed of multiple pages. (This effectively scales the end index).
*
* vstart: virtual address of start of range
- * vend: virtual address of end of range
+ * vend: virtual address of end of range - we map [vstart, vend]
* shift: shift used to transform virtual address into index
- * ptrs: number of entries in page table
+ * order: #imm 2log(number of entries in page table)
* istart: index in table corresponding to vstart
* iend: index in table corresponding to vend
* count: On entry: how many extra entries were required in previous level, scales
* our end index.
* On exit: returns how many extra entries required for next page table level
*
- * Preserves: vstart, vend, shift, ptrs
+ * Preserves: vstart, vend
* Returns: istart, iend, count
*/
- .macro compute_indices, vstart, vend, shift, ptrs, istart, iend, count
- lsr \iend, \vend, \shift
- mov \istart, \ptrs
- sub \istart, \istart, #1
- and \iend, \iend, \istart // iend = (vend >> shift) & (ptrs - 1)
- mov \istart, \ptrs
- mul \istart, \istart, \count
- add \iend, \iend, \istart // iend += (count - 1) * ptrs
- // our entries span multiple tables
-
- lsr \istart, \vstart, \shift
- mov \count, \ptrs
- sub \count, \count, #1
- and \istart, \istart, \count
-
+ .macro compute_indices, vstart, vend, shift, order, istart, iend, count
+ ubfx \istart, \vstart, \shift, \order
+ ubfx \iend, \vend, \shift, \order
+ add \iend, \iend, \count, lsl \order
sub \count, \iend, \istart
.endm
@@ -231,123 +246,120 @@ ENDPROC(preserve_boot_args)
*
* tbl: location of page table
* rtbl: address to be used for first level page table entry (typically tbl + PAGE_SIZE)
- * vstart: start address to map
- * vend: end address to map - we map [vstart, vend]
+ * vstart: virtual address of start of range
+ * vend: virtual address of end of range - we map [vstart, vend - 1]
* flags: flags to use to map last level entries
* phys: physical address corresponding to vstart - physical memory is contiguous
- * pgds: the number of pgd entries
+ * order: #imm 2log(number of entries in PGD table)
+ *
+ * If extra_shift is set, an extra level will be populated if the end address does
+ * not fit in 'extra_shift' bits. This assumes vend is in the TTBR0 range.
*
* Temporaries: istart, iend, tmp, count, sv - these need to be different registers
- * Preserves: vstart, vend, flags
- * Corrupts: tbl, rtbl, istart, iend, tmp, count, sv
+ * Preserves: vstart, flags
+ * Corrupts: tbl, rtbl, vend, istart, iend, tmp, count, sv
*/
- .macro map_memory, tbl, rtbl, vstart, vend, flags, phys, pgds, istart, iend, tmp, count, sv
+ .macro map_memory, tbl, rtbl, vstart, vend, flags, phys, order, istart, iend, tmp, count, sv, extra_shift
+ sub \vend, \vend, #1
add \rtbl, \tbl, #PAGE_SIZE
- mov \sv, \rtbl
mov \count, #0
- compute_indices \vstart, \vend, #PGDIR_SHIFT, \pgds, \istart, \iend, \count
+
+ .ifnb \extra_shift
+ tst \vend, #~((1 << (\extra_shift)) - 1)
+ b.eq .L_\@
+ compute_indices \vstart, \vend, #\extra_shift, #(PAGE_SHIFT - 3), \istart, \iend, \count
+ mov \sv, \rtbl
populate_entries \tbl, \rtbl, \istart, \iend, #PMD_TYPE_TABLE, #PAGE_SIZE, \tmp
mov \tbl, \sv
+ .endif
+.L_\@:
+ compute_indices \vstart, \vend, #PGDIR_SHIFT, #\order, \istart, \iend, \count
mov \sv, \rtbl
+ populate_entries \tbl, \rtbl, \istart, \iend, #PMD_TYPE_TABLE, #PAGE_SIZE, \tmp
+ mov \tbl, \sv
#if SWAPPER_PGTABLE_LEVELS > 3
- compute_indices \vstart, \vend, #PUD_SHIFT, #PTRS_PER_PUD, \istart, \iend, \count
+ compute_indices \vstart, \vend, #PUD_SHIFT, #(PAGE_SHIFT - 3), \istart, \iend, \count
+ mov \sv, \rtbl
populate_entries \tbl, \rtbl, \istart, \iend, #PMD_TYPE_TABLE, #PAGE_SIZE, \tmp
mov \tbl, \sv
- mov \sv, \rtbl
#endif
#if SWAPPER_PGTABLE_LEVELS > 2
- compute_indices \vstart, \vend, #SWAPPER_TABLE_SHIFT, #PTRS_PER_PMD, \istart, \iend, \count
+ compute_indices \vstart, \vend, #SWAPPER_TABLE_SHIFT, #(PAGE_SHIFT - 3), \istart, \iend, \count
+ mov \sv, \rtbl
populate_entries \tbl, \rtbl, \istart, \iend, #PMD_TYPE_TABLE, #PAGE_SIZE, \tmp
mov \tbl, \sv
#endif
- compute_indices \vstart, \vend, #SWAPPER_BLOCK_SHIFT, #PTRS_PER_PTE, \istart, \iend, \count
- bic \count, \phys, #SWAPPER_BLOCK_SIZE - 1
- populate_entries \tbl, \count, \istart, \iend, \flags, #SWAPPER_BLOCK_SIZE, \tmp
+ compute_indices \vstart, \vend, #SWAPPER_BLOCK_SHIFT, #(PAGE_SHIFT - 3), \istart, \iend, \count
+ bic \rtbl, \phys, #SWAPPER_BLOCK_SIZE - 1
+ populate_entries \tbl, \rtbl, \istart, \iend, \flags, #SWAPPER_BLOCK_SIZE, \tmp
.endm
/*
- * Setup the initial page tables. We only setup the barest amount which is
- * required to get the kernel running. The following sections are required:
- * - identity mapping to enable the MMU (low address, TTBR0)
- * - first few MB of the kernel linear mapping to jump to once the MMU has
- * been enabled
+ * Remap a subregion created with the map_memory macro with modified attributes
+ * or output address. The entire remapped region must have been covered in the
+ * invocation of map_memory.
+ *
+ * x0: last level table address (returned in first argument to map_memory)
+ * x1: start VA of the existing mapping
+ * x2: start VA of the region to update
+ * x3: end VA of the region to update (exclusive)
+ * x4: start PA associated with the region to update
+ * x5: attributes to set on the updated region
+ * x6: order of the last level mappings
*/
-__create_page_tables:
- mov x28, lr
+SYM_FUNC_START_LOCAL(remap_region)
+ sub x3, x3, #1 // make end inclusive
- /*
- * Invalidate the init page tables to avoid potential dirty cache lines
- * being evicted. Other page tables are allocated in rodata as part of
- * the kernel image, and thus are clean to the PoC per the boot
- * protocol.
- */
- adrp x0, init_pg_dir
- adrp x1, init_pg_end
- sub x1, x1, x0
- bl __inval_dcache_area
+ // Get the index offset for the start of the last level table
+ lsr x1, x1, x6
+ bfi x1, xzr, #0, #PAGE_SHIFT - 3
- /*
- * Clear the init page tables.
- */
- adrp x0, init_pg_dir
- adrp x1, init_pg_end
- sub x1, x1, x0
-1: stp xzr, xzr, [x0], #16
- stp xzr, xzr, [x0], #16
- stp xzr, xzr, [x0], #16
- stp xzr, xzr, [x0], #16
- subs x1, x1, #64
- b.ne 1b
+ // Derive the start and end indexes into the last level table
+ // associated with the provided region
+ lsr x2, x2, x6
+ lsr x3, x3, x6
+ sub x2, x2, x1
+ sub x3, x3, x1
- mov x7, SWAPPER_MM_MMUFLAGS
+ mov x1, #1
+ lsl x6, x1, x6 // block size at this level
- /*
- * Create the identity mapping.
- */
- adrp x0, idmap_pg_dir
- adrp x3, __idmap_text_start // __pa(__idmap_text_start)
-
-#ifdef CONFIG_ARM64_VA_BITS_52
- mrs_s x6, SYS_ID_AA64MMFR2_EL1
- and x6, x6, #(0xf << ID_AA64MMFR2_LVA_SHIFT)
- mov x5, #52
- cbnz x6, 1f
-#endif
- mov x5, #VA_BITS_MIN
-1:
- adr_l x6, vabits_actual
- str x5, [x6]
- dmb sy
- dc ivac, x6 // Invalidate potentially stale cache line
+ populate_entries x0, x4, x2, x3, x5, x6, x7
+ ret
+SYM_FUNC_END(remap_region)
+SYM_FUNC_START_LOCAL(create_idmap)
+ mov x28, lr
/*
- * VA_BITS may be too small to allow for an ID mapping to be created
- * that covers system RAM if that is located sufficiently high in the
- * physical address space. So for the ID map, use an extended virtual
- * range in that case, and configure an additional translation level
- * if needed.
+ * The ID map carries a 1:1 mapping of the physical address range
+ * covered by the loaded image, which could be anywhere in DRAM. This
+ * means that the required size of the VA (== PA) space is decided at
+ * boot time, and could be more than the configured size of the VA
+ * space for ordinary kernel and user space mappings.
+ *
+ * There are three cases to consider here:
+ * - 39 <= VA_BITS < 48, and the ID map needs up to 48 VA bits to cover
+ * the placement of the image. In this case, we configure one extra
+ * level of translation on the fly for the ID map only. (This case
+ * also covers 42-bit VA/52-bit PA on 64k pages).
+ *
+ * - VA_BITS == 48, and the ID map needs more than 48 VA bits. This can
+ * only happen when using 64k pages, in which case we need to extend
+ * the root level table rather than add a level. Note that we can
+ * treat this case as 'always extended' as long as we take care not
+ * to program an unsupported T0SZ value into the TCR register.
*
- * Calculate the maximum allowed value for TCR_EL1.T0SZ so that the
- * entire ID map region can be mapped. As T0SZ == (64 - #bits used),
- * this number conveniently equals the number of leading zeroes in
- * the physical address of __idmap_text_end.
+ * - Combinations that would require two additional levels of
+ * translation are not supported, e.g., VA_BITS==36 on 16k pages, or
+ * VA_BITS==39/4k pages with 5-level paging, where the input address
+ * requires more than 47 or 48 bits, respectively.
*/
- adrp x5, __idmap_text_end
- clz x5, x5
- cmp x5, TCR_T0SZ(VA_BITS) // default T0SZ small enough?
- b.ge 1f // .. then skip VA range extension
-
- adr_l x6, idmap_t0sz
- str x5, [x6]
- dmb sy
- dc ivac, x6 // Invalidate potentially stale cache line
-
#if (VA_BITS < 48)
+#define IDMAP_PGD_ORDER (VA_BITS - PGDIR_SHIFT)
#define EXTRA_SHIFT (PGDIR_SHIFT + PAGE_SHIFT - 3)
-#define EXTRA_PTRS (1 << (PHYS_MASK_SHIFT - EXTRA_SHIFT))
/*
* If VA_BITS < 48, we have to configure an additional table level.
@@ -359,77 +371,124 @@ __create_page_tables:
#if VA_BITS != EXTRA_SHIFT
#error "Mismatch between VA_BITS and page size/number of translation levels"
#endif
-
- mov x4, EXTRA_PTRS
- create_table_entry x0, x3, EXTRA_SHIFT, x4, x5, x6
#else
+#define IDMAP_PGD_ORDER (PHYS_MASK_SHIFT - PGDIR_SHIFT)
+#define EXTRA_SHIFT
/*
* If VA_BITS == 48, we don't have to configure an additional
* translation level, but the top-level table has more entries.
*/
- mov x4, #1 << (PHYS_MASK_SHIFT - PGDIR_SHIFT)
- str_l x4, idmap_ptrs_per_pgd, x5
#endif
-1:
- ldr_l x4, idmap_ptrs_per_pgd
- mov x5, x3 // __pa(__idmap_text_start)
- adr_l x6, __idmap_text_end // __pa(__idmap_text_end)
-
- map_memory x0, x1, x3, x6, x7, x3, x4, x10, x11, x12, x13, x14
+ adrp x0, init_idmap_pg_dir
+ adrp x3, _text
+ adrp x6, _end + MAX_FDT_SIZE + SWAPPER_BLOCK_SIZE
+ mov_q x7, SWAPPER_RX_MMUFLAGS
+
+ map_memory x0, x1, x3, x6, x7, x3, IDMAP_PGD_ORDER, x10, x11, x12, x13, x14, EXTRA_SHIFT
+
+ /* Remap the kernel page tables r/w in the ID map */
+ adrp x1, _text
+ adrp x2, init_pg_dir
+ adrp x3, init_pg_end
+ bic x4, x2, #SWAPPER_BLOCK_SIZE - 1
+ mov_q x5, SWAPPER_RW_MMUFLAGS
+ mov x6, #SWAPPER_BLOCK_SHIFT
+ bl remap_region
+
+ /* Remap the FDT after the kernel image */
+ adrp x1, _text
+ adrp x22, _end + SWAPPER_BLOCK_SIZE
+ bic x2, x22, #SWAPPER_BLOCK_SIZE - 1
+ bfi x22, x21, #0, #SWAPPER_BLOCK_SHIFT // remapped FDT address
+ add x3, x2, #MAX_FDT_SIZE + SWAPPER_BLOCK_SIZE
+ bic x4, x21, #SWAPPER_BLOCK_SIZE - 1
+ mov_q x5, SWAPPER_RW_MMUFLAGS
+ mov x6, #SWAPPER_BLOCK_SHIFT
+ bl remap_region
/*
- * Map the kernel image (starting with PHYS_OFFSET).
+ * Since the page tables have been populated with non-cacheable
+ * accesses (MMU disabled), invalidate those tables again to
+ * remove any speculatively loaded cache lines.
*/
+ cbnz x19, 0f // skip cache invalidation if MMU is on
+ dmb sy
+
+ adrp x0, init_idmap_pg_dir
+ adrp x1, init_idmap_pg_end
+ bl dcache_inval_poc
+0: ret x28
+SYM_FUNC_END(create_idmap)
+
+SYM_FUNC_START_LOCAL(create_kernel_mapping)
adrp x0, init_pg_dir
- mov_q x5, KIMAGE_VADDR + TEXT_OFFSET // compile time __va(_text)
+ mov_q x5, KIMAGE_VADDR // compile time __va(_text)
+#ifdef CONFIG_RELOCATABLE
add x5, x5, x23 // add KASLR displacement
- mov x4, PTRS_PER_PGD
+#endif
adrp x6, _end // runtime __pa(_end)
adrp x3, _text // runtime __pa(_text)
sub x6, x6, x3 // _end - _text
add x6, x6, x5 // runtime __va(_end)
+ mov_q x7, SWAPPER_RW_MMUFLAGS
+
+ map_memory x0, x1, x5, x6, x7, x3, (VA_BITS - PGDIR_SHIFT), x10, x11, x12, x13, x14
- map_memory x0, x1, x5, x6, x7, x3, x4, x10, x11, x12, x13, x14
+ dsb ishst // sync with page table walker
+ ret
+SYM_FUNC_END(create_kernel_mapping)
/*
- * Since the page tables have been populated with non-cacheable
- * accesses (MMU disabled), invalidate the idmap and swapper page
- * tables again to remove any speculatively loaded cache lines.
+ * Initialize CPU registers with task-specific and cpu-specific context.
+ *
+ * Create a final frame record at task_pt_regs(current)->stackframe, so
+ * that the unwinder can identify the final frame record of any task by
+ * its location in the task stack. We reserve the entire pt_regs space
+ * for consistency with user tasks and kthreads.
*/
- adrp x0, idmap_pg_dir
- adrp x1, init_pg_end
- sub x1, x1, x0
- dmb sy
- bl __inval_dcache_area
+ .macro init_cpu_task tsk, tmp1, tmp2
+ msr sp_el0, \tsk
+
+ ldr \tmp1, [\tsk, #TSK_STACK]
+ add sp, \tmp1, #THREAD_SIZE
+ sub sp, sp, #PT_REGS_SIZE
+
+ stp xzr, xzr, [sp, #S_STACKFRAME]
+ add x29, sp, #S_STACKFRAME
+
+ scs_load_current
- ret x28
-ENDPROC(__create_page_tables)
- .ltorg
+ adr_l \tmp1, __per_cpu_offset
+ ldr w\tmp2, [\tsk, #TSK_TI_CPU]
+ ldr \tmp1, [\tmp1, \tmp2, lsl #3]
+ set_this_cpu_offset \tmp1
+ .endm
/*
* The following fragment of code is executed with the MMU enabled.
*
- * x0 = __PHYS_OFFSET
+ * x0 = __pa(KERNEL_START)
*/
-__primary_switched:
- adrp x4, init_thread_union
- add sp, x4, #THREAD_SIZE
- adr_l x5, init_task
- msr sp_el0, x5 // Save thread_info
+SYM_FUNC_START_LOCAL(__primary_switched)
+ adr_l x4, init_task
+ init_cpu_task x4, x5, x6
adr_l x8, vectors // load VBAR_EL1 with virtual
msr vbar_el1, x8 // vector table address
isb
- stp xzr, x30, [sp, #-16]!
+ stp x29, x30, [sp, #-16]!
mov x29, sp
str_l x21, __fdt_pointer, x5 // Save FDT pointer
- ldr_l x4, kimage_vaddr // Save the offset between
+ adrp x4, _text // Save the offset between
sub x4, x4, x0 // the kernel virtual and
str_l x4, kimage_voffset, x5 // physical mappings
+ mov x0, x20
+ bl set_cpu_boot_mode_flag
+
// Clear BSS
adr_l x0, __bss_start
mov x1, xzr
@@ -438,309 +497,209 @@ __primary_switched:
bl __pi_memset
dsb ishst // Make zero page visible to PTW
-#ifdef CONFIG_KASAN
- bl kasan_early_init
+#if VA_BITS > 48
+ adr_l x8, vabits_actual // Set this early so KASAN early init
+ str x25, [x8] // ... observes the correct value
+ dc civac, x8 // Make visible to booting secondaries
#endif
+
#ifdef CONFIG_RANDOMIZE_BASE
- tst x23, ~(MIN_KIMG_ALIGN - 1) // already running randomized?
- b.ne 0f
+ adrp x5, memstart_offset_seed // Save KASLR linear map seed
+ strh w24, [x5, :lo12:memstart_offset_seed]
+#endif
+#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
+ bl kasan_early_init
+#endif
mov x0, x21 // pass FDT address in x0
- bl kaslr_early_init // parse FDT for KASLR options
- cbz x0, 0f // KASLR disabled? just proceed
- orr x23, x23, x0 // record KASLR offset
- ldp x29, x30, [sp], #16 // we must enable KASLR, return
- ret // to __primary_switch()
-0:
+ bl early_fdt_map // Try mapping the FDT early
+ mov x0, x20 // pass the full boot status
+ bl init_feature_override // Parse cpu feature overrides
+#ifdef CONFIG_UNWIND_PATCH_PAC_INTO_SCS
+ bl scs_patch_vmlinux
#endif
- add sp, sp, #16
- mov x29, #0
- mov x30, #0
- b start_kernel
-ENDPROC(__primary_switched)
+ mov x0, x20
+ bl finalise_el2 // Prefer VHE if possible
+ ldp x29, x30, [sp], #16
+ bl start_kernel
+ ASM_BUG()
+SYM_FUNC_END(__primary_switched)
/*
* end early head section, begin head code that is also used for
* hotplug and needs to have the same protections as the text region
*/
- .section ".idmap.text","awx"
-
-ENTRY(kimage_vaddr)
- .quad _text - TEXT_OFFSET
-EXPORT_SYMBOL(kimage_vaddr)
+ .section ".idmap.text","a"
/*
- * If we're fortunate enough to boot at EL2, ensure that the world is
- * sane before dropping to EL1.
+ * Starting from EL2 or EL1, configure the CPU to execute at the highest
+ * reachable EL supported by the kernel in a chosen default state. If dropping
+ * from EL2 to EL1, configure EL2 before configuring EL1.
*
- * Returns either BOOT_CPU_MODE_EL1 or BOOT_CPU_MODE_EL2 in w0 if
- * booted in EL1 or EL2 respectively.
+ * Since we cannot always rely on ERET synchronizing writes to sysregs (e.g. if
+ * SCTLR_ELx.EOS is clear), we place an ISB prior to ERET.
+ *
+ * Returns either BOOT_CPU_MODE_EL1 or BOOT_CPU_MODE_EL2 in x0 if
+ * booted in EL1 or EL2 respectively, with the top 32 bits containing
+ * potential context flags. These flags are *not* stored in __boot_cpu_mode.
+ *
+ * x0: whether we are being called from the primary boot path with the MMU on
*/
-ENTRY(el2_setup)
- msr SPsel, #1 // We want to use SP_EL{1,2}
- mrs x0, CurrentEL
- cmp x0, #CurrentEL_EL2
- b.eq 1f
- mov_q x0, (SCTLR_EL1_RES1 | ENDIAN_SET_EL1)
+SYM_FUNC_START(init_kernel_el)
+ mrs x1, CurrentEL
+ cmp x1, #CurrentEL_EL2
+ b.eq init_el2
+
+SYM_INNER_LABEL(init_el1, SYM_L_LOCAL)
+ mov_q x0, INIT_SCTLR_EL1_MMU_OFF
+ pre_disable_mmu_workaround
msr sctlr_el1, x0
- mov w0, #BOOT_CPU_MODE_EL1 // This cpu booted in EL1
isb
- ret
-
-1: mov_q x0, (SCTLR_EL2_RES1 | ENDIAN_SET_EL2)
- msr sctlr_el2, x0
+ mov_q x0, INIT_PSTATE_EL1
+ msr spsr_el1, x0
+ msr elr_el1, lr
+ mov w0, #BOOT_CPU_MODE_EL1
+ eret
-#ifdef CONFIG_ARM64_VHE
- /*
- * Check for VHE being present. For the rest of the EL2 setup,
- * x2 being non-zero indicates that we do have VHE, and that the
- * kernel is intended to run at EL2.
- */
- mrs x2, id_aa64mmfr1_el1
- ubfx x2, x2, #ID_AA64MMFR1_VHE_SHIFT, #4
-#else
- mov x2, xzr
-#endif
+SYM_INNER_LABEL(init_el2, SYM_L_LOCAL)
+ msr elr_el2, lr
- /* Hyp configuration. */
+ // clean all HYP code to the PoC if we booted at EL2 with the MMU on
+ cbz x0, 0f
+ adrp x0, __hyp_idmap_text_start
+ adr_l x1, __hyp_text_end
+ adr_l x2, dcache_clean_poc
+ blr x2
+0:
mov_q x0, HCR_HOST_NVHE_FLAGS
- cbz x2, set_hcr
- mov_q x0, HCR_HOST_VHE_FLAGS
-set_hcr:
msr hcr_el2, x0
isb
- /*
- * Allow Non-secure EL1 and EL0 to access physical timer and counter.
- * This is not necessary for VHE, since the host kernel runs in EL2,
- * and EL0 accesses are configured in the later stage of boot process.
- * Note that when HCR_EL2.E2H == 1, CNTHCTL_EL2 has the same bit layout
- * as CNTKCTL_EL1, and CNTKCTL_EL1 accessing instructions are redefined
- * to access CNTHCTL_EL2. This allows the kernel designed to run at EL1
- * to transparently mess with the EL0 bits via CNTKCTL_EL1 access in
- * EL2.
- */
- cbnz x2, 1f
- mrs x0, cnthctl_el2
- orr x0, x0, #3 // Enable EL1 physical timers
- msr cnthctl_el2, x0
-1:
- msr cntvoff_el2, xzr // Clear virtual offset
-
-#ifdef CONFIG_ARM_GIC_V3
- /* GICv3 system register access */
- mrs x0, id_aa64pfr0_el1
- ubfx x0, x0, #ID_AA64PFR0_GIC_SHIFT, #4
- cbz x0, 3f
-
- mrs_s x0, SYS_ICC_SRE_EL2
- orr x0, x0, #ICC_SRE_EL2_SRE // Set ICC_SRE_EL2.SRE==1
- orr x0, x0, #ICC_SRE_EL2_ENABLE // Set ICC_SRE_EL2.Enable==1
- msr_s SYS_ICC_SRE_EL2, x0
- isb // Make sure SRE is now set
- mrs_s x0, SYS_ICC_SRE_EL2 // Read SRE back,
- tbz x0, #0, 3f // and check that it sticks
- msr_s SYS_ICH_HCR_EL2, xzr // Reset ICC_HCR_EL2 to defaults
-
-3:
-#endif
+ init_el2_state
- /* Populate ID registers. */
- mrs x0, midr_el1
- mrs x1, mpidr_el1
- msr vpidr_el2, x0
- msr vmpidr_el2, x1
-
-#ifdef CONFIG_COMPAT
- msr hstr_el2, xzr // Disable CP15 traps to EL2
-#endif
-
- /* EL2 debug */
- mrs x1, id_aa64dfr0_el1
- sbfx x0, x1, #ID_AA64DFR0_PMUVER_SHIFT, #4
- cmp x0, #1
- b.lt 4f // Skip if no PMU present
- mrs x0, pmcr_el0 // Disable debug access traps
- ubfx x0, x0, #11, #5 // to EL2 and allow access to
-4:
- csel x3, xzr, x0, lt // all PMU counters from EL1
-
- /* Statistical profiling */
- ubfx x0, x1, #ID_AA64DFR0_PMSVER_SHIFT, #4
- cbz x0, 7f // Skip if SPE not present
- cbnz x2, 6f // VHE?
- mrs_s x4, SYS_PMBIDR_EL1 // If SPE available at EL2,
- and x4, x4, #(1 << SYS_PMBIDR_EL1_P_SHIFT)
- cbnz x4, 5f // then permit sampling of physical
- mov x4, #(1 << SYS_PMSCR_EL2_PCT_SHIFT | \
- 1 << SYS_PMSCR_EL2_PA_SHIFT)
- msr_s SYS_PMSCR_EL2, x4 // addresses and physical counter
-5:
- mov x1, #(MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT)
- orr x3, x3, x1 // If we don't have VHE, then
- b 7f // use EL1&0 translation.
-6: // For VHE, use EL2 translation
- orr x3, x3, #MDCR_EL2_TPMS // and disable access from EL1
-7:
- msr mdcr_el2, x3 // Configure debug traps
-
- /* LORegions */
- mrs x1, id_aa64mmfr1_el1
- ubfx x0, x1, #ID_AA64MMFR1_LOR_SHIFT, 4
- cbz x0, 1f
- msr_s SYS_LORC_EL1, xzr
-1:
-
- /* Stage-2 translation */
- msr vttbr_el2, xzr
-
- cbz x2, install_el2_stub
-
- mov w0, #BOOT_CPU_MODE_EL2 // This CPU booted in EL2
+ /* Hypervisor stub */
+ adr_l x0, __hyp_stub_vectors
+ msr vbar_el2, x0
isb
- ret
-install_el2_stub:
+ mov_q x1, INIT_SCTLR_EL1_MMU_OFF
+
/*
- * When VHE is not in use, early init of EL2 and EL1 needs to be
- * done here.
- * When VHE _is_ in use, EL1 will not be used in the host and
- * requires no configuration, and all non-hyp-specific EL2 setup
- * will be done via the _EL1 system register aliases in __cpu_setup.
+ * Fruity CPUs seem to have HCR_EL2.E2H set to RES1,
+ * making it impossible to start in nVHE mode. Is that
+ * compliant with the architecture? Absolutely not!
*/
- mov_q x0, (SCTLR_EL1_RES1 | ENDIAN_SET_EL1)
- msr sctlr_el1, x0
-
- /* Coprocessor traps. */
- mov x0, #0x33ff
- msr cptr_el2, x0 // Disable copro. traps to EL2
-
- /* SVE register access */
- mrs x1, id_aa64pfr0_el1
- ubfx x1, x1, #ID_AA64PFR0_SVE_SHIFT, #4
- cbz x1, 7f
+ mrs x0, hcr_el2
+ and x0, x0, #HCR_E2H
+ cbz x0, 1f
- bic x0, x0, #CPTR_EL2_TZ // Also disable SVE traps
- msr cptr_el2, x0 // Disable copro. traps to EL2
- isb
- mov x1, #ZCR_ELx_LEN_MASK // SVE: Enable full vector
- msr_s SYS_ZCR_EL2, x1 // length for EL1.
+ /* Set a sane SCTLR_EL1, the VHE way */
+ pre_disable_mmu_workaround
+ msr_s SYS_SCTLR_EL12, x1
+ mov x2, #BOOT_CPU_FLAG_E2H
+ b 2f
- /* Hypervisor stub */
-7: adr_l x0, __hyp_stub_vectors
- msr vbar_el2, x0
+1:
+ pre_disable_mmu_workaround
+ msr sctlr_el1, x1
+ mov x2, xzr
+2:
+ __init_el2_nvhe_prepare_eret
- /* spsr */
- mov x0, #(PSR_F_BIT | PSR_I_BIT | PSR_A_BIT | PSR_D_BIT |\
- PSR_MODE_EL1h)
- msr spsr_el2, x0
- msr elr_el2, lr
- mov w0, #BOOT_CPU_MODE_EL2 // This CPU booted in EL2
+ mov w0, #BOOT_CPU_MODE_EL2
+ orr x0, x0, x2
eret
-ENDPROC(el2_setup)
-
-/*
- * Sets the __boot_cpu_mode flag depending on the CPU boot mode passed
- * in w0. See arch/arm64/include/asm/virt.h for more info.
- */
-set_cpu_boot_mode_flag:
- adr_l x1, __boot_cpu_mode
- cmp w0, #BOOT_CPU_MODE_EL2
- b.ne 1f
- add x1, x1, #4
-1: str w0, [x1] // This CPU has booted in EL1
- dmb sy
- dc ivac, x1 // Invalidate potentially stale cache line
- ret
-ENDPROC(set_cpu_boot_mode_flag)
-
-/*
- * These values are written with the MMU off, but read with the MMU on.
- * Writers will invalidate the corresponding address, discarding up to a
- * 'Cache Writeback Granule' (CWG) worth of data. The linker script ensures
- * sufficient alignment that the CWG doesn't overlap another section.
- */
- .pushsection ".mmuoff.data.write", "aw"
-/*
- * We need to find out the CPU boot mode long after boot, so we need to
- * store it in a writable variable.
- *
- * This is not in .bss, because we set it sufficiently early that the boot-time
- * zeroing of .bss would clobber it.
- */
-ENTRY(__boot_cpu_mode)
- .long BOOT_CPU_MODE_EL2
- .long BOOT_CPU_MODE_EL1
-/*
- * The booting CPU updates the failed status @__early_cpu_boot_status,
- * with MMU turned off.
- */
-ENTRY(__early_cpu_boot_status)
- .quad 0
-
- .popsection
+SYM_FUNC_END(init_kernel_el)
/*
* This provides a "holding pen" for platforms to hold all secondary
* cores are held until we're ready for them to initialise.
*/
-ENTRY(secondary_holding_pen)
- bl el2_setup // Drop to EL1, w0=cpu_boot_mode
- bl set_cpu_boot_mode_flag
- mrs x0, mpidr_el1
+SYM_FUNC_START(secondary_holding_pen)
+ mov x0, xzr
+ bl init_kernel_el // w0=cpu_boot_mode
+ mrs x2, mpidr_el1
mov_q x1, MPIDR_HWID_BITMASK
- and x0, x0, x1
+ and x2, x2, x1
adr_l x3, secondary_holding_pen_release
pen: ldr x4, [x3]
- cmp x4, x0
+ cmp x4, x2
b.eq secondary_startup
wfe
b pen
-ENDPROC(secondary_holding_pen)
+SYM_FUNC_END(secondary_holding_pen)
/*
* Secondary entry point that jumps straight into the kernel. Only to
* be used where CPUs are brought online dynamically by the kernel.
*/
-ENTRY(secondary_entry)
- bl el2_setup // Drop to EL1
- bl set_cpu_boot_mode_flag
+SYM_FUNC_START(secondary_entry)
+ mov x0, xzr
+ bl init_kernel_el // w0=cpu_boot_mode
b secondary_startup
-ENDPROC(secondary_entry)
+SYM_FUNC_END(secondary_entry)
-secondary_startup:
+SYM_FUNC_START_LOCAL(secondary_startup)
/*
* Common entry point for secondary CPUs.
*/
+ mov x20, x0 // preserve boot mode
bl __cpu_secondary_check52bitva
+#if VA_BITS > 48
+ ldr_l x0, vabits_actual
+#endif
bl __cpu_setup // initialise processor
adrp x1, swapper_pg_dir
+ adrp x2, idmap_pg_dir
bl __enable_mmu
ldr x8, =__secondary_switched
br x8
-ENDPROC(secondary_startup)
+SYM_FUNC_END(secondary_startup)
+
+ .text
+SYM_FUNC_START_LOCAL(__secondary_switched)
+ mov x0, x20
+ bl set_cpu_boot_mode_flag
+
+ mov x0, x20
+ bl finalise_el2
-__secondary_switched:
+ str_l xzr, __early_cpu_boot_status, x3
adr_l x5, vectors
msr vbar_el1, x5
isb
adr_l x0, secondary_data
- ldr x1, [x0, #CPU_BOOT_STACK] // get secondary_data.stack
- cbz x1, __secondary_too_slow
- mov sp, x1
ldr x2, [x0, #CPU_BOOT_TASK]
cbz x2, __secondary_too_slow
- msr sp_el0, x2
- mov x29, #0
- mov x30, #0
- b secondary_start_kernel
-ENDPROC(__secondary_switched)
-__secondary_too_slow:
+ init_cpu_task x2, x1, x3
+
+#ifdef CONFIG_ARM64_PTR_AUTH
+ ptrauth_keys_init_cpu x2, x3, x4, x5
+#endif
+
+ bl secondary_start_kernel
+ ASM_BUG()
+SYM_FUNC_END(__secondary_switched)
+
+SYM_FUNC_START_LOCAL(__secondary_too_slow)
wfe
wfi
b __secondary_too_slow
-ENDPROC(__secondary_too_slow)
+SYM_FUNC_END(__secondary_too_slow)
+
+/*
+ * Sets the __boot_cpu_mode flag depending on the CPU boot mode passed
+ * in w0. See arch/arm64/include/asm/virt.h for more info.
+ */
+SYM_FUNC_START_LOCAL(set_cpu_boot_mode_flag)
+ adr_l x1, __boot_cpu_mode
+ cmp w0, #BOOT_CPU_MODE_EL2
+ b.ne 1f
+ add x1, x1, #4
+1: str w0, [x1] // Save CPU boot mode
+ ret
+SYM_FUNC_END(set_cpu_boot_mode_flag)
/*
* The booting CPU updates the failed status @__early_cpu_boot_status,
@@ -765,6 +724,7 @@ ENDPROC(__secondary_too_slow)
*
* x0 = SCTLR_EL1 value for turning on the MMU.
* x1 = TTBR1_EL1 value
+ * x2 = ID map root table address
*
* Returns to the caller via x30/lr. This requires the caller to be covered
* by the .idmap.text section.
@@ -772,40 +732,31 @@ ENDPROC(__secondary_too_slow)
* Checks if the selected granule size is supported by the CPU.
* If it isn't, park the CPU
*/
-ENTRY(__enable_mmu)
- mrs x2, ID_AA64MMFR0_EL1
- ubfx x2, x2, #ID_AA64MMFR0_TGRAN_SHIFT, 4
- cmp x2, #ID_AA64MMFR0_TGRAN_SUPPORTED
- b.ne __no_granule_support
- update_early_cpu_boot_status 0, x2, x3
- adrp x2, idmap_pg_dir
- phys_to_ttbr x1, x1
+ .section ".idmap.text","a"
+SYM_FUNC_START(__enable_mmu)
+ mrs x3, ID_AA64MMFR0_EL1
+ ubfx x3, x3, #ID_AA64MMFR0_EL1_TGRAN_SHIFT, 4
+ cmp x3, #ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MIN
+ b.lt __no_granule_support
+ cmp x3, #ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MAX
+ b.gt __no_granule_support
phys_to_ttbr x2, x2
msr ttbr0_el1, x2 // load TTBR0
- offset_ttbr1 x1, x3
- msr ttbr1_el1, x1 // load TTBR1
- isb
- msr sctlr_el1, x0
- isb
- /*
- * Invalidate the local I-cache so that any instructions fetched
- * speculatively from the PoC are discarded, since they may have
- * been dynamically patched at the PoU.
- */
- ic iallu
- dsb nsh
- isb
+ load_ttbr1 x1, x1, x3
+
+ set_sctlr_el1 x0
+
ret
-ENDPROC(__enable_mmu)
+SYM_FUNC_END(__enable_mmu)
-ENTRY(__cpu_secondary_check52bitva)
-#ifdef CONFIG_ARM64_VA_BITS_52
+SYM_FUNC_START(__cpu_secondary_check52bitva)
+#if VA_BITS > 48
ldr_l x0, vabits_actual
cmp x0, #52
b.ne 2f
mrs_s x0, SYS_ID_AA64MMFR2_EL1
- and x0, x0, #(0xf << ID_AA64MMFR2_LVA_SHIFT)
+ and x0, x0, ID_AA64MMFR2_EL1_VARange_MASK
cbnz x0, 2f
update_early_cpu_boot_status \
@@ -816,9 +767,9 @@ ENTRY(__cpu_secondary_check52bitva)
#endif
2: ret
-ENDPROC(__cpu_secondary_check52bitva)
+SYM_FUNC_END(__cpu_secondary_check52bitva)
-__no_granule_support:
+SYM_FUNC_START_LOCAL(__no_granule_support)
/* Indicate that this CPU can't boot and is stuck in the kernel */
update_early_cpu_boot_status \
CPU_STUCK_IN_KERNEL | CPU_STUCK_REASON_NO_GRAN, x1, x2
@@ -826,21 +777,18 @@ __no_granule_support:
wfe
wfi
b 1b
-ENDPROC(__no_granule_support)
+SYM_FUNC_END(__no_granule_support)
#ifdef CONFIG_RELOCATABLE
-__relocate_kernel:
+SYM_FUNC_START_LOCAL(__relocate_kernel)
/*
* Iterate over each entry in the relocation table, and apply the
* relocations in place.
*/
- ldr w9, =__rela_offset // offset to reloc table
- ldr w10, =__rela_size // size of reloc table
-
+ adr_l x9, __rela_start
+ adr_l x10, __rela_end
mov_q x11, KIMAGE_VADDR // default virtual offset
add x11, x11, x23 // actual virtual offset
- add x9, x9, x11 // __va(.rela)
- add x10, x9, x10 // __va(.rela) + sizeof(.rela)
0: cmp x9, x10
b.hs 1f
@@ -883,21 +831,9 @@ __relocate_kernel:
* entry in x9, the address being relocated by the current address or
* bitmap entry in x13 and the address being relocated by the current
* bit in x14.
- *
- * Because addends are stored in place in the binary, RELR relocations
- * cannot be applied idempotently. We use x24 to keep track of the
- * currently applied displacement so that we can correctly relocate if
- * __relocate_kernel is called twice with non-zero displacements (i.e.
- * if there is both a physical misalignment and a KASLR displacement).
*/
- ldr w9, =__relr_offset // offset to reloc table
- ldr w10, =__relr_size // size of reloc table
- add x9, x9, x11 // __va(.relr)
- add x10, x9, x10 // __va(.relr) + sizeof(.relr)
-
- sub x15, x23, x24 // delta from previous offset
- cbz x15, 7f // nothing to do if unchanged
- mov x24, x23 // save new offset
+ adr_l x9, __relr_start
+ adr_l x10, __relr_end
2: cmp x9, x10
b.hs 7f
@@ -905,7 +841,7 @@ __relocate_kernel:
tbnz x11, #0, 3f // branch to handle bitmaps
add x13, x11, x23
ldr x12, [x13] // relocate address entry
- add x12, x12, x15
+ add x12, x12, x23
str x12, [x13], #8 // adjust to start of bitmap
b 2b
@@ -914,7 +850,7 @@ __relocate_kernel:
cbz x11, 6f
tbz x11, #0, 5f // skip bit if not set
ldr x12, [x14] // relocate bit
- add x12, x12, x15
+ add x12, x12, x23
str x12, [x14]
5: add x14, x14, #8 // move to next bit's address
@@ -931,50 +867,36 @@ __relocate_kernel:
#endif
ret
-ENDPROC(__relocate_kernel)
+SYM_FUNC_END(__relocate_kernel)
#endif
-__primary_switch:
+SYM_FUNC_START_LOCAL(__primary_switch)
+ adrp x1, reserved_pg_dir
+ adrp x2, init_idmap_pg_dir
+ bl __enable_mmu
+#ifdef CONFIG_RELOCATABLE
+ adrp x23, KERNEL_START
+ and x23, x23, MIN_KIMG_ALIGN - 1
#ifdef CONFIG_RANDOMIZE_BASE
- mov x19, x0 // preserve new SCTLR_EL1 value
- mrs x20, sctlr_el1 // preserve old SCTLR_EL1 value
+ mov x0, x22
+ adrp x1, init_pg_end
+ mov sp, x1
+ mov x29, xzr
+ bl __pi_kaslr_early_init
+ and x24, x0, #SZ_2M - 1 // capture memstart offset seed
+ bic x0, x0, #SZ_2M - 1
+ orr x23, x23, x0 // record kernel offset
+#endif
#endif
+ bl clear_page_tables
+ bl create_kernel_mapping
adrp x1, init_pg_dir
- bl __enable_mmu
+ load_ttbr1 x1, x1, x2
#ifdef CONFIG_RELOCATABLE
-#ifdef CONFIG_RELR
- mov x24, #0 // no RELR displacement yet
-#endif
bl __relocate_kernel
-#ifdef CONFIG_RANDOMIZE_BASE
- ldr x8, =__primary_switched
- adrp x0, __PHYS_OFFSET
- blr x8
-
- /*
- * If we return here, we have a KASLR displacement in x23 which we need
- * to take into account by discarding the current kernel mapping and
- * creating a new one.
- */
- pre_disable_mmu_workaround
- msr sctlr_el1, x20 // disable the MMU
- isb
- bl __create_page_tables // recreate kernel mapping
-
- tlbi vmalle1 // Remove any stale TLB entries
- dsb nsh
-
- msr sctlr_el1, x19 // re-enable the MMU
- isb
- ic iallu // flush instructions fetched
- dsb nsh // via old mapping
- isb
-
- bl __relocate_kernel
-#endif
#endif
ldr x8, =__primary_switched
- adrp x0, __PHYS_OFFSET
+ adrp x0, KERNEL_START // __pa(KERNEL_START)
br x8
-ENDPROC(__primary_switch)
+SYM_FUNC_END(__primary_switch)
diff --git a/arch/arm64/kernel/hibernate-asm.S b/arch/arm64/kernel/hibernate-asm.S
index 38bcd4d4e43b..0e1d9c3c6a93 100644
--- a/arch/arm64/kernel/hibernate-asm.S
+++ b/arch/arm64/kernel/hibernate-asm.S
@@ -16,26 +16,6 @@
#include <asm/virt.h>
/*
- * To prevent the possibility of old and new partial table walks being visible
- * in the tlb, switch the ttbr to a zero page when we invalidate the old
- * records. D4.7.1 'General TLB maintenance requirements' in ARM DDI 0487A.i
- * Even switching to our copied tables will cause a changed output address at
- * each stage of the walk.
- */
-.macro break_before_make_ttbr_switch zero_page, page_table, tmp, tmp2
- phys_to_ttbr \tmp, \zero_page
- msr ttbr1_el1, \tmp
- isb
- tlbi vmalle1
- dsb nsh
- phys_to_ttbr \tmp, \page_table
- offset_ttbr1 \tmp, \tmp2
- msr ttbr1_el1, \tmp
- isb
-.endm
-
-
-/*
* Resume from hibernate
*
* Loads temporary page tables then restores the memory image.
@@ -45,7 +25,7 @@
* Because this code has to be copied to a 'safe' page, it can't call out to
* other functions by PC-relative address. Also remember that it may be
* mid-way through over-writing other functions. For this reason it contains
- * code from flush_icache_range() and uses the copy_page() macro.
+ * code from caches_clean_inval_pou() and uses the copy_page() macro.
*
* This 'safe' page is mapped via ttbr0, and executed from there. This function
* switches to a copy of the linear map in ttbr1, performs the restore, then
@@ -65,7 +45,7 @@
* x5: physical address of a zero page that remains zero after resume
*/
.pushsection ".hibernate_exit.text", "ax"
-ENTRY(swsusp_arch_suspend_exit)
+SYM_CODE_START(swsusp_arch_suspend_exit)
/*
* We execute from ttbr0, change ttbr1 to our copied linear map tables
* with a break-before-make via the zero page
@@ -87,11 +67,12 @@ ENTRY(swsusp_arch_suspend_exit)
copy_page x0, x1, x2, x3, x4, x5, x6, x7, x8, x9
add x1, x10, #PAGE_SIZE
- /* Clean the copied page to PoU - based on flush_icache_range() */
+ /* Clean the copied page to PoU - based on caches_clean_inval_pou() */
raw_dcache_line_size x2, x3
sub x3, x2, #1
bic x4, x10, x3
-2: dc cvau, x4 /* clean D line / unified line */
+2: /* clean D line / unified line */
+alternative_insn "dc cvau, x4", "dc civac, x4", ARM64_WORKAROUND_CLEAN_CACHE
add x4, x4, x2
cmp x4, x1
b.lo 2b
@@ -110,59 +91,5 @@ ENTRY(swsusp_arch_suspend_exit)
cbz x24, 3f /* Do we need to re-initialise EL2? */
hvc #0
3: ret
-
- .ltorg
-ENDPROC(swsusp_arch_suspend_exit)
-
-/*
- * Restore the hyp stub.
- * This must be done before the hibernate page is unmapped by _cpu_resume(),
- * but happens before any of the hyp-stub's code is cleaned to PoC.
- *
- * x24: The physical address of __hyp_stub_vectors
- */
-el1_sync:
- msr vbar_el2, x24
- eret
-ENDPROC(el1_sync)
-
-.macro invalid_vector label
-\label:
- b \label
-ENDPROC(\label)
-.endm
-
- invalid_vector el2_sync_invalid
- invalid_vector el2_irq_invalid
- invalid_vector el2_fiq_invalid
- invalid_vector el2_error_invalid
- invalid_vector el1_sync_invalid
- invalid_vector el1_irq_invalid
- invalid_vector el1_fiq_invalid
- invalid_vector el1_error_invalid
-
-/* el2 vectors - switch el2 here while we restore the memory image. */
- .align 11
-ENTRY(hibernate_el2_vectors)
- ventry el2_sync_invalid // Synchronous EL2t
- ventry el2_irq_invalid // IRQ EL2t
- ventry el2_fiq_invalid // FIQ EL2t
- ventry el2_error_invalid // Error EL2t
-
- ventry el2_sync_invalid // Synchronous EL2h
- ventry el2_irq_invalid // IRQ EL2h
- ventry el2_fiq_invalid // FIQ EL2h
- ventry el2_error_invalid // Error EL2h
-
- ventry el1_sync // Synchronous 64-bit EL1
- ventry el1_irq_invalid // IRQ 64-bit EL1
- ventry el1_fiq_invalid // FIQ 64-bit EL1
- ventry el1_error_invalid // Error 64-bit EL1
-
- ventry el1_sync_invalid // Synchronous 32-bit EL1
- ventry el1_irq_invalid // IRQ 32-bit EL1
- ventry el1_fiq_invalid // FIQ 32-bit EL1
- ventry el1_error_invalid // Error 32-bit EL1
-END(hibernate_el2_vectors)
-
+SYM_CODE_END(swsusp_arch_suspend_exit)
.popsection
diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c
index a96b2921d22c..02870beb271e 100644
--- a/arch/arm64/kernel/hibernate.c
+++ b/arch/arm64/kernel/hibernate.c
@@ -7,21 +7,15 @@
* Ubuntu project, hibernation support for mach-dove
* Copyright (C) 2010 Nokia Corporation (Hiroshi Doyu)
* Copyright (C) 2010 Texas Instruments, Inc. (Teerth Reddy et al.)
- * https://lkml.org/lkml/2010/6/18/4
- * https://lists.linux-foundation.org/pipermail/linux-pm/2010-June/027422.html
- * https://patchwork.kernel.org/patch/96442/
- *
* Copyright (C) 2006 Rafael J. Wysocki <rjw@sisk.pl>
*/
#define pr_fmt(x) "hibernate: " x
#include <linux/cpu.h>
#include <linux/kvm_host.h>
-#include <linux/mm.h>
#include <linux/pm.h>
#include <linux/sched.h>
#include <linux/suspend.h>
#include <linux/utsname.h>
-#include <linux/version.h>
#include <asm/barrier.h>
#include <asm/cacheflush.h>
@@ -31,14 +25,13 @@
#include <asm/kexec.h>
#include <asm/memory.h>
#include <asm/mmu_context.h>
-#include <asm/pgalloc.h>
-#include <asm/pgtable.h>
-#include <asm/pgtable-hwdef.h>
+#include <asm/mte.h>
#include <asm/sections.h>
#include <asm/smp.h>
#include <asm/smp_plat.h>
#include <asm/suspend.h>
#include <asm/sysreg.h>
+#include <asm/trans_pgd.h>
#include <asm/virt.h>
/*
@@ -52,10 +45,7 @@
extern int in_suspend;
/* Do we need to reset el2? */
-#define el2_reset_needed() (is_hyp_mode_available() && !is_kernel_in_hyp_mode())
-
-/* temporary el2 vectors in the __hibernate_exit_text section. */
-extern char hibernate_el2_vectors[];
+#define el2_reset_needed() (is_hyp_nvhe())
/* hyp-stub vectors, used to restore el2 during resume from hibernate. */
extern char __hyp_stub_vectors[];
@@ -109,7 +99,6 @@ int pfn_is_nosave(unsigned long pfn)
void notrace save_processor_state(void)
{
- WARN_ON(num_online_cpus() != 1);
}
void notrace restore_processor_state(void)
@@ -166,14 +155,11 @@ int arch_hibernation_header_restore(void *addr)
sleep_cpu = -EINVAL;
return -EINVAL;
}
- if (!cpu_online(sleep_cpu)) {
- pr_info("Hibernated on a CPU that is offline! Bringing CPU up.\n");
- ret = cpu_up(sleep_cpu);
- if (ret) {
- pr_err("Failed to bring hibernate-CPU up!\n");
- sleep_cpu = -EINVAL;
- return ret;
- }
+
+ ret = bringup_hibernate_cpu(sleep_cpu);
+ if (ret) {
+ sleep_cpu = -EINVAL;
+ return ret;
}
resume_hdr = *hdr;
@@ -182,9 +168,14 @@ int arch_hibernation_header_restore(void *addr)
}
EXPORT_SYMBOL(arch_hibernation_header_restore);
+static void *hibernate_page_alloc(void *arg)
+{
+ return (void *)get_safe_page((__force gfp_t)(unsigned long)arg);
+}
+
/*
* Copies length bytes, starting at src_start into an new page,
- * perform cache maintentance, then maps it at the specified address low
+ * perform cache maintenance, then maps it at the specified address low
* address as executable.
*
* This is used by hibernate to copy the code it needs to execute when
@@ -195,90 +186,143 @@ EXPORT_SYMBOL(arch_hibernation_header_restore);
* page system.
*/
static int create_safe_exec_page(void *src_start, size_t length,
- unsigned long dst_addr,
- phys_addr_t *phys_dst_addr,
- void *(*allocator)(gfp_t mask),
- gfp_t mask)
+ phys_addr_t *phys_dst_addr)
{
- int rc = 0;
- pgd_t *trans_pgd;
- pgd_t *pgdp;
- pud_t *pudp;
- pmd_t *pmdp;
- pte_t *ptep;
- unsigned long dst = (unsigned long)allocator(mask);
-
- if (!dst) {
- rc = -ENOMEM;
- goto out;
- }
+ struct trans_pgd_info trans_info = {
+ .trans_alloc_page = hibernate_page_alloc,
+ .trans_alloc_arg = (__force void *)GFP_ATOMIC,
+ };
+
+ void *page = (void *)get_safe_page(GFP_ATOMIC);
+ phys_addr_t trans_ttbr0;
+ unsigned long t0sz;
+ int rc;
+
+ if (!page)
+ return -ENOMEM;
+
+ memcpy(page, src_start, length);
+ caches_clean_inval_pou((unsigned long)page, (unsigned long)page + length);
+ rc = trans_pgd_idmap_page(&trans_info, &trans_ttbr0, &t0sz, page);
+ if (rc)
+ return rc;
- memcpy((void *)dst, src_start, length);
- __flush_icache_range(dst, dst + length);
+ cpu_install_ttbr0(trans_ttbr0, t0sz);
+ *phys_dst_addr = virt_to_phys(page);
- trans_pgd = allocator(mask);
- if (!trans_pgd) {
- rc = -ENOMEM;
- goto out;
+ return 0;
+}
+
+#ifdef CONFIG_ARM64_MTE
+
+static DEFINE_XARRAY(mte_pages);
+
+static int save_tags(struct page *page, unsigned long pfn)
+{
+ void *tag_storage, *ret;
+
+ tag_storage = mte_allocate_tag_storage();
+ if (!tag_storage)
+ return -ENOMEM;
+
+ mte_save_page_tags(page_address(page), tag_storage);
+
+ ret = xa_store(&mte_pages, pfn, tag_storage, GFP_KERNEL);
+ if (WARN(xa_is_err(ret), "Failed to store MTE tags")) {
+ mte_free_tag_storage(tag_storage);
+ return xa_err(ret);
+ } else if (WARN(ret, "swsusp: %s: Duplicate entry", __func__)) {
+ mte_free_tag_storage(ret);
}
- pgdp = pgd_offset_raw(trans_pgd, dst_addr);
- if (pgd_none(READ_ONCE(*pgdp))) {
- pudp = allocator(mask);
- if (!pudp) {
- rc = -ENOMEM;
- goto out;
- }
- pgd_populate(&init_mm, pgdp, pudp);
+ return 0;
+}
+
+static void swsusp_mte_free_storage(void)
+{
+ XA_STATE(xa_state, &mte_pages, 0);
+ void *tags;
+
+ xa_lock(&mte_pages);
+ xas_for_each(&xa_state, tags, ULONG_MAX) {
+ mte_free_tag_storage(tags);
}
+ xa_unlock(&mte_pages);
+
+ xa_destroy(&mte_pages);
+}
+
+static int swsusp_mte_save_tags(void)
+{
+ struct zone *zone;
+ unsigned long pfn, max_zone_pfn;
+ int ret = 0;
+ int n = 0;
+
+ if (!system_supports_mte())
+ return 0;
+
+ for_each_populated_zone(zone) {
+ max_zone_pfn = zone_end_pfn(zone);
+ for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) {
+ struct page *page = pfn_to_online_page(pfn);
- pudp = pud_offset(pgdp, dst_addr);
- if (pud_none(READ_ONCE(*pudp))) {
- pmdp = allocator(mask);
- if (!pmdp) {
- rc = -ENOMEM;
- goto out;
+ if (!page)
+ continue;
+
+ if (!page_mte_tagged(page))
+ continue;
+
+ ret = save_tags(page, pfn);
+ if (ret) {
+ swsusp_mte_free_storage();
+ goto out;
+ }
+
+ n++;
}
- pud_populate(&init_mm, pudp, pmdp);
}
+ pr_info("Saved %d MTE pages\n", n);
- pmdp = pmd_offset(pudp, dst_addr);
- if (pmd_none(READ_ONCE(*pmdp))) {
- ptep = allocator(mask);
- if (!ptep) {
- rc = -ENOMEM;
- goto out;
- }
- pmd_populate_kernel(&init_mm, pmdp, ptep);
+out:
+ return ret;
+}
+
+static void swsusp_mte_restore_tags(void)
+{
+ XA_STATE(xa_state, &mte_pages, 0);
+ int n = 0;
+ void *tags;
+
+ xa_lock(&mte_pages);
+ xas_for_each(&xa_state, tags, ULONG_MAX) {
+ unsigned long pfn = xa_state.xa_index;
+ struct page *page = pfn_to_online_page(pfn);
+
+ mte_restore_page_tags(page_address(page), tags);
+
+ mte_free_tag_storage(tags);
+ n++;
}
+ xa_unlock(&mte_pages);
- ptep = pte_offset_kernel(pmdp, dst_addr);
- set_pte(ptep, pfn_pte(virt_to_pfn(dst), PAGE_KERNEL_EXEC));
+ pr_info("Restored %d MTE pages\n", n);
- /*
- * Load our new page tables. A strict BBM approach requires that we
- * ensure that TLBs are free of any entries that may overlap with the
- * global mappings we are about to install.
- *
- * For a real hibernate/resume cycle TTBR0 currently points to a zero
- * page, but TLBs may contain stale ASID-tagged entries (e.g. for EFI
- * runtime services), while for a userspace-driven test_resume cycle it
- * points to userspace page tables (and we must point it at a zero page
- * ourselves). Elsewhere we only (un)install the idmap with preemption
- * disabled, so T0SZ should be as required regardless.
- */
- cpu_set_reserved_ttbr0();
- local_flush_tlb_all();
- write_sysreg(phys_to_ttbr(virt_to_phys(pgdp)), ttbr0_el1);
- isb();
+ xa_destroy(&mte_pages);
+}
- *phys_dst_addr = virt_to_phys((void *)dst);
+#else /* CONFIG_ARM64_MTE */
-out:
- return rc;
+static int swsusp_mte_save_tags(void)
+{
+ return 0;
+}
+
+static void swsusp_mte_restore_tags(void)
+{
}
-#define dcache_clean_range(start, end) __flush_dcache_area(start, (end - start))
+#endif /* CONFIG_ARM64_MTE */
int swsusp_arch_suspend(void)
{
@@ -297,19 +341,30 @@ int swsusp_arch_suspend(void)
/* make the crash dump kernel image visible/saveable */
crash_prepare_suspend();
+ ret = swsusp_mte_save_tags();
+ if (ret)
+ return ret;
+
sleep_cpu = smp_processor_id();
ret = swsusp_save();
} else {
/* Clean kernel core startup/idle code to PoC*/
- dcache_clean_range(__mmuoff_data_start, __mmuoff_data_end);
- dcache_clean_range(__idmap_text_start, __idmap_text_end);
+ dcache_clean_inval_poc((unsigned long)__mmuoff_data_start,
+ (unsigned long)__mmuoff_data_end);
+ dcache_clean_inval_poc((unsigned long)__idmap_text_start,
+ (unsigned long)__idmap_text_end);
/* Clean kvm setup code to PoC? */
if (el2_reset_needed()) {
- dcache_clean_range(__hyp_idmap_text_start, __hyp_idmap_text_end);
- dcache_clean_range(__hyp_text_start, __hyp_text_end);
+ dcache_clean_inval_poc(
+ (unsigned long)__hyp_idmap_text_start,
+ (unsigned long)__hyp_idmap_text_end);
+ dcache_clean_inval_poc((unsigned long)__hyp_text_start,
+ (unsigned long)__hyp_text_end);
}
+ swsusp_mte_restore_tags();
+
/* make the crash dump kernel image protected again */
crash_post_resume();
@@ -327,11 +382,7 @@ int swsusp_arch_suspend(void)
* mitigation off behind our back, let's set the state
* to what we expect it to be.
*/
- switch (arm64_get_ssbd_state()) {
- case ARM64_SSBD_FORCE_ENABLE:
- case ARM64_SSBD_KERNEL:
- arm64_set_ssbd_mitigation(true);
- }
+ spectre_v4_enable_mitigation(NULL);
}
local_daif_restore(flags);
@@ -339,143 +390,6 @@ int swsusp_arch_suspend(void)
return ret;
}
-static void _copy_pte(pte_t *dst_ptep, pte_t *src_ptep, unsigned long addr)
-{
- pte_t pte = READ_ONCE(*src_ptep);
-
- if (pte_valid(pte)) {
- /*
- * Resume will overwrite areas that may be marked
- * read only (code, rodata). Clear the RDONLY bit from
- * the temporary mappings we use during restore.
- */
- set_pte(dst_ptep, pte_mkwrite(pte));
- } else if (debug_pagealloc_enabled() && !pte_none(pte)) {
- /*
- * debug_pagealloc will removed the PTE_VALID bit if
- * the page isn't in use by the resume kernel. It may have
- * been in use by the original kernel, in which case we need
- * to put it back in our copy to do the restore.
- *
- * Before marking this entry valid, check the pfn should
- * be mapped.
- */
- BUG_ON(!pfn_valid(pte_pfn(pte)));
-
- set_pte(dst_ptep, pte_mkpresent(pte_mkwrite(pte)));
- }
-}
-
-static int copy_pte(pmd_t *dst_pmdp, pmd_t *src_pmdp, unsigned long start,
- unsigned long end)
-{
- pte_t *src_ptep;
- pte_t *dst_ptep;
- unsigned long addr = start;
-
- dst_ptep = (pte_t *)get_safe_page(GFP_ATOMIC);
- if (!dst_ptep)
- return -ENOMEM;
- pmd_populate_kernel(&init_mm, dst_pmdp, dst_ptep);
- dst_ptep = pte_offset_kernel(dst_pmdp, start);
-
- src_ptep = pte_offset_kernel(src_pmdp, start);
- do {
- _copy_pte(dst_ptep, src_ptep, addr);
- } while (dst_ptep++, src_ptep++, addr += PAGE_SIZE, addr != end);
-
- return 0;
-}
-
-static int copy_pmd(pud_t *dst_pudp, pud_t *src_pudp, unsigned long start,
- unsigned long end)
-{
- pmd_t *src_pmdp;
- pmd_t *dst_pmdp;
- unsigned long next;
- unsigned long addr = start;
-
- if (pud_none(READ_ONCE(*dst_pudp))) {
- dst_pmdp = (pmd_t *)get_safe_page(GFP_ATOMIC);
- if (!dst_pmdp)
- return -ENOMEM;
- pud_populate(&init_mm, dst_pudp, dst_pmdp);
- }
- dst_pmdp = pmd_offset(dst_pudp, start);
-
- src_pmdp = pmd_offset(src_pudp, start);
- do {
- pmd_t pmd = READ_ONCE(*src_pmdp);
-
- next = pmd_addr_end(addr, end);
- if (pmd_none(pmd))
- continue;
- if (pmd_table(pmd)) {
- if (copy_pte(dst_pmdp, src_pmdp, addr, next))
- return -ENOMEM;
- } else {
- set_pmd(dst_pmdp,
- __pmd(pmd_val(pmd) & ~PMD_SECT_RDONLY));
- }
- } while (dst_pmdp++, src_pmdp++, addr = next, addr != end);
-
- return 0;
-}
-
-static int copy_pud(pgd_t *dst_pgdp, pgd_t *src_pgdp, unsigned long start,
- unsigned long end)
-{
- pud_t *dst_pudp;
- pud_t *src_pudp;
- unsigned long next;
- unsigned long addr = start;
-
- if (pgd_none(READ_ONCE(*dst_pgdp))) {
- dst_pudp = (pud_t *)get_safe_page(GFP_ATOMIC);
- if (!dst_pudp)
- return -ENOMEM;
- pgd_populate(&init_mm, dst_pgdp, dst_pudp);
- }
- dst_pudp = pud_offset(dst_pgdp, start);
-
- src_pudp = pud_offset(src_pgdp, start);
- do {
- pud_t pud = READ_ONCE(*src_pudp);
-
- next = pud_addr_end(addr, end);
- if (pud_none(pud))
- continue;
- if (pud_table(pud)) {
- if (copy_pmd(dst_pudp, src_pudp, addr, next))
- return -ENOMEM;
- } else {
- set_pud(dst_pudp,
- __pud(pud_val(pud) & ~PMD_SECT_RDONLY));
- }
- } while (dst_pudp++, src_pudp++, addr = next, addr != end);
-
- return 0;
-}
-
-static int copy_page_tables(pgd_t *dst_pgdp, unsigned long start,
- unsigned long end)
-{
- unsigned long next;
- unsigned long addr = start;
- pgd_t *src_pgdp = pgd_offset_k(start);
-
- dst_pgdp = pgd_offset_raw(dst_pgdp, start);
- do {
- next = pgd_addr_end(addr, end);
- if (pgd_none(READ_ONCE(*src_pgdp)))
- continue;
- if (copy_pud(dst_pgdp, src_pgdp, addr, next))
- return -ENOMEM;
- } while (dst_pgdp++, src_pgdp++, addr = next, addr != end);
-
- return 0;
-}
-
/*
* Setup then Resume from the hibernate image using swsusp_arch_suspend_exit().
*
@@ -484,85 +398,72 @@ static int copy_page_tables(pgd_t *dst_pgdp, unsigned long start,
*/
int swsusp_arch_resume(void)
{
- int rc = 0;
+ int rc;
void *zero_page;
size_t exit_size;
pgd_t *tmp_pg_dir;
- phys_addr_t phys_hibernate_exit;
+ phys_addr_t el2_vectors;
void __noreturn (*hibernate_exit)(phys_addr_t, phys_addr_t, void *,
void *, phys_addr_t, phys_addr_t);
+ struct trans_pgd_info trans_info = {
+ .trans_alloc_page = hibernate_page_alloc,
+ .trans_alloc_arg = (void *)GFP_ATOMIC,
+ };
/*
* Restoring the memory image will overwrite the ttbr1 page tables.
* Create a second copy of just the linear map, and use this when
* restoring.
*/
- tmp_pg_dir = (pgd_t *)get_safe_page(GFP_ATOMIC);
- if (!tmp_pg_dir) {
- pr_err("Failed to allocate memory for temporary page tables.\n");
- rc = -ENOMEM;
- goto out;
- }
- rc = copy_page_tables(tmp_pg_dir, PAGE_OFFSET, PAGE_END);
+ rc = trans_pgd_create_copy(&trans_info, &tmp_pg_dir, PAGE_OFFSET,
+ PAGE_END);
if (rc)
- goto out;
+ return rc;
/*
- * We need a zero page that is zero before & after resume in order to
+ * We need a zero page that is zero before & after resume in order
* to break before make on the ttbr1 page tables.
*/
zero_page = (void *)get_safe_page(GFP_ATOMIC);
if (!zero_page) {
pr_err("Failed to allocate zero page.\n");
- rc = -ENOMEM;
- goto out;
+ return -ENOMEM;
+ }
+
+ if (el2_reset_needed()) {
+ rc = trans_pgd_copy_el2_vectors(&trans_info, &el2_vectors);
+ if (rc) {
+ pr_err("Failed to setup el2 vectors\n");
+ return rc;
+ }
}
- /*
- * Locate the exit code in the bottom-but-one page, so that *NULL
- * still has disastrous affects.
- */
- hibernate_exit = (void *)PAGE_SIZE;
exit_size = __hibernate_exit_text_end - __hibernate_exit_text_start;
/*
* Copy swsusp_arch_suspend_exit() to a safe page. This will generate
* a new set of ttbr0 page tables and load them.
*/
rc = create_safe_exec_page(__hibernate_exit_text_start, exit_size,
- (unsigned long)hibernate_exit,
- &phys_hibernate_exit,
- (void *)get_safe_page, GFP_ATOMIC);
+ (phys_addr_t *)&hibernate_exit);
if (rc) {
pr_err("Failed to create safe executable page for hibernate_exit code.\n");
- goto out;
+ return rc;
}
/*
- * The hibernate exit text contains a set of el2 vectors, that will
- * be executed at el2 with the mmu off in order to reload hyp-stub.
- */
- __flush_dcache_area(hibernate_exit, exit_size);
-
- /*
* KASLR will cause the el2 vectors to be in a different location in
* the resumed kernel. Load hibernate's temporary copy into el2.
*
* We can skip this step if we booted at EL1, or are running with VHE.
*/
- if (el2_reset_needed()) {
- phys_addr_t el2_vectors = phys_hibernate_exit; /* base */
- el2_vectors += hibernate_el2_vectors -
- __hibernate_exit_text_start; /* offset */
-
+ if (el2_reset_needed())
__hyp_set_vectors(el2_vectors);
- }
hibernate_exit(virt_to_phys(tmp_pg_dir), resume_hdr.ttbr1_el1,
resume_hdr.reenter_kernel, restore_pblist,
resume_hdr.__hyp_stub_vectors, virt_to_phys(zero_page));
-out:
- return rc;
+ return 0;
}
int hibernate_resume_nonboot_cpu_disable(void)
diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c
index 0b727edf4104..35225632d70a 100644
--- a/arch/arm64/kernel/hw_breakpoint.c
+++ b/arch/arm64/kernel/hw_breakpoint.c
@@ -257,7 +257,7 @@ static int hw_breakpoint_control(struct perf_event *bp,
* level.
*/
enable_debug_monitors(dbg_el);
- /* Fall through */
+ fallthrough;
case HW_BREAKPOINT_RESTORE:
/* Setup the address register. */
write_wb_reg(val_reg, i, info->address);
@@ -541,13 +541,13 @@ int hw_breakpoint_arch_parse(struct perf_event *bp,
if (hw->ctrl.len == ARM_BREAKPOINT_LEN_2)
break;
- /* Fallthrough */
+ fallthrough;
case 3:
/* Allow single byte watchpoint. */
if (hw->ctrl.len == ARM_BREAKPOINT_LEN_1)
break;
- /* Fallthrough */
+ fallthrough;
default:
return -EINVAL;
}
@@ -617,7 +617,7 @@ NOKPROBE_SYMBOL(toggle_bp_registers);
/*
* Debug exception handlers.
*/
-static int breakpoint_handler(unsigned long unused, unsigned int esr,
+static int breakpoint_handler(unsigned long unused, unsigned long esr,
struct pt_regs *regs)
{
int i, step = 0, *kernel_step;
@@ -654,7 +654,7 @@ static int breakpoint_handler(unsigned long unused, unsigned int esr,
perf_bp_event(bp, regs);
/* Do we need to handle the stepping? */
- if (is_default_overflow_handler(bp))
+ if (uses_default_overflow_handler(bp))
step = 1;
unlock:
rcu_read_unlock();
@@ -701,7 +701,7 @@ NOKPROBE_SYMBOL(breakpoint_handler);
* addresses. There is no straight-forward way, short of disassembling the
* offending instruction, to map that address back to the watchpoint. This
* function computes the distance of the memory access from the watchpoint as a
- * heuristic for the likelyhood that a given access triggered the watchpoint.
+ * heuristic for the likelihood that a given access triggered the watchpoint.
*
* See Section D2.10.5 "Determining the memory location that caused a Watchpoint
* exception" of ARMv8 Architecture Reference Manual for details.
@@ -730,7 +730,28 @@ static u64 get_distance_from_watchpoint(unsigned long addr, u64 val,
return 0;
}
-static int watchpoint_handler(unsigned long addr, unsigned int esr,
+static int watchpoint_report(struct perf_event *wp, unsigned long addr,
+ struct pt_regs *regs)
+{
+ int step = uses_default_overflow_handler(wp);
+ struct arch_hw_breakpoint *info = counter_arch_bp(wp);
+
+ info->trigger = addr;
+
+ /*
+ * If we triggered a user watchpoint from a uaccess routine, then
+ * handle the stepping ourselves since userspace really can't help
+ * us with this.
+ */
+ if (!user_mode(regs) && info->ctrl.privilege == AARCH64_BREAKPOINT_EL0)
+ step = 1;
+ else
+ perf_bp_event(wp, regs);
+
+ return step;
+}
+
+static int watchpoint_handler(unsigned long addr, unsigned long esr,
struct pt_regs *regs)
{
int i, step = 0, *kernel_step, access, closest_match = 0;
@@ -739,7 +760,6 @@ static int watchpoint_handler(unsigned long addr, unsigned int esr,
u64 val;
struct perf_event *wp, **slots;
struct debug_info *debug_info;
- struct arch_hw_breakpoint *info;
struct arch_hw_breakpoint_ctrl ctrl;
slots = this_cpu_ptr(wp_on_reg);
@@ -777,25 +797,13 @@ static int watchpoint_handler(unsigned long addr, unsigned int esr,
if (dist != 0)
continue;
- info = counter_arch_bp(wp);
- info->trigger = addr;
- perf_bp_event(wp, regs);
-
- /* Do we need to handle the stepping? */
- if (is_default_overflow_handler(wp))
- step = 1;
+ step = watchpoint_report(wp, addr, regs);
}
- if (min_dist > 0 && min_dist != -1) {
- /* No exact match found. */
- wp = slots[closest_match];
- info = counter_arch_bp(wp);
- info->trigger = addr;
- perf_bp_event(wp, regs);
- /* Do we need to handle the stepping? */
- if (is_default_overflow_handler(wp))
- step = 1;
- }
+ /* No exact match found? */
+ if (min_dist > 0 && min_dist != -1)
+ step = watchpoint_report(slots[closest_match], addr, regs);
+
rcu_read_unlock();
if (!step)
@@ -965,14 +973,6 @@ static int hw_breakpoint_reset(unsigned int cpu)
return 0;
}
-#ifdef CONFIG_CPU_PM
-extern void cpu_suspend_set_dbg_restorer(int (*hw_bp_restore)(unsigned int));
-#else
-static inline void cpu_suspend_set_dbg_restorer(int (*hw_bp_restore)(unsigned int))
-{
-}
-#endif
-
/*
* One-time initialisation.
*/
diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S
index 73d46070b315..65f76064c86b 100644
--- a/arch/arm64/kernel/hyp-stub.S
+++ b/arch/arm64/kernel/hyp-stub.S
@@ -8,9 +8,9 @@
#include <linux/init.h>
#include <linux/linkage.h>
-#include <linux/irqchip/arm-gic-v3.h>
#include <asm/assembler.h>
+#include <asm/el2_setup.h>
#include <asm/kvm_arm.h>
#include <asm/kvm_asm.h>
#include <asm/ptrace.h>
@@ -21,18 +21,18 @@
.align 11
-ENTRY(__hyp_stub_vectors)
+SYM_CODE_START(__hyp_stub_vectors)
ventry el2_sync_invalid // Synchronous EL2t
ventry el2_irq_invalid // IRQ EL2t
ventry el2_fiq_invalid // FIQ EL2t
ventry el2_error_invalid // Error EL2t
- ventry el2_sync_invalid // Synchronous EL2h
+ ventry elx_sync // Synchronous EL2h
ventry el2_irq_invalid // IRQ EL2h
ventry el2_fiq_invalid // FIQ EL2h
ventry el2_error_invalid // Error EL2h
- ventry el1_sync // Synchronous 64-bit EL1
+ ventry elx_sync // Synchronous 64-bit EL1
ventry el1_irq_invalid // IRQ 64-bit EL1
ventry el1_fiq_invalid // FIQ 64-bit EL1
ventry el1_error_invalid // Error 64-bit EL1
@@ -41,16 +41,19 @@ ENTRY(__hyp_stub_vectors)
ventry el1_irq_invalid // IRQ 32-bit EL1
ventry el1_fiq_invalid // FIQ 32-bit EL1
ventry el1_error_invalid // Error 32-bit EL1
-ENDPROC(__hyp_stub_vectors)
+SYM_CODE_END(__hyp_stub_vectors)
.align 11
-el1_sync:
+SYM_CODE_START_LOCAL(elx_sync)
cmp x0, #HVC_SET_VECTORS
- b.ne 2f
+ b.ne 1f
msr vbar_el2, x1
b 9f
+1: cmp x0, #HVC_FINALISE_EL2
+ b.eq __finalise_el2
+
2: cmp x0, #HVC_SOFT_RESTART
b.ne 3f
mov x0, x2
@@ -63,17 +66,129 @@ el1_sync:
beq 9f // Nothing to reset!
/* Someone called kvm_call_hyp() against the hyp-stub... */
- ldr x0, =HVC_STUB_ERR
+ mov_q x0, HVC_STUB_ERR
eret
9: mov x0, xzr
eret
-ENDPROC(el1_sync)
+SYM_CODE_END(elx_sync)
+
+SYM_CODE_START_LOCAL(__finalise_el2)
+ finalise_el2_state
+
+ // nVHE? No way! Give me the real thing!
+ // Sanity check: MMU *must* be off
+ mrs x1, sctlr_el2
+ tbnz x1, #0, 1f
+
+ // Needs to be VHE capable, obviously
+ check_override id_aa64mmfr1 ID_AA64MMFR1_EL1_VH_SHIFT 0f 1f x1 x2
+
+0: // Check whether we only want the hypervisor to run VHE, not the kernel
+ adr_l x1, arm64_sw_feature_override
+ ldr x2, [x1, FTR_OVR_VAL_OFFSET]
+ ldr x1, [x1, FTR_OVR_MASK_OFFSET]
+ and x2, x2, x1
+ ubfx x2, x2, #ARM64_SW_FEATURE_OVERRIDE_HVHE, #4
+ cbz x2, 2f
+
+1: mov_q x0, HVC_STUB_ERR
+ eret
+2:
+ // Engage the VHE magic!
+ mov_q x0, HCR_HOST_VHE_FLAGS
+ msr hcr_el2, x0
+ isb
+
+ // Use the EL1 allocated stack, per-cpu offset
+ mrs x0, sp_el1
+ mov sp, x0
+ mrs x0, tpidr_el1
+ msr tpidr_el2, x0
+
+ // FP configuration, vectors
+ mrs_s x0, SYS_CPACR_EL12
+ msr cpacr_el1, x0
+ mrs_s x0, SYS_VBAR_EL12
+ msr vbar_el1, x0
+
+ // Use EL2 translations for SPE & TRBE and disable access from EL1
+ mrs x0, mdcr_el2
+ bic x0, x0, #(MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT)
+ bic x0, x0, #(MDCR_EL2_E2TB_MASK << MDCR_EL2_E2TB_SHIFT)
+ msr mdcr_el2, x0
+
+ // Transfer the MM state from EL1 to EL2
+ mrs_s x0, SYS_TCR_EL12
+ msr tcr_el1, x0
+ mrs_s x0, SYS_TTBR0_EL12
+ msr ttbr0_el1, x0
+ mrs_s x0, SYS_TTBR1_EL12
+ msr ttbr1_el1, x0
+ mrs_s x0, SYS_MAIR_EL12
+ msr mair_el1, x0
+ mrs x1, REG_ID_AA64MMFR3_EL1
+ ubfx x1, x1, #ID_AA64MMFR3_EL1_TCRX_SHIFT, #4
+ cbz x1, .Lskip_tcr2
+ mrs x0, REG_TCR2_EL12
+ msr REG_TCR2_EL1, x0
+
+ // Transfer permission indirection state
+ mrs x1, REG_ID_AA64MMFR3_EL1
+ ubfx x1, x1, #ID_AA64MMFR3_EL1_S1PIE_SHIFT, #4
+ cbz x1, .Lskip_indirection
+ mrs x0, REG_PIRE0_EL12
+ msr REG_PIRE0_EL1, x0
+ mrs x0, REG_PIR_EL12
+ msr REG_PIR_EL1, x0
+
+.Lskip_indirection:
+.Lskip_tcr2:
+
+ isb
+
+ // Hack the exception return to stay at EL2
+ mrs x0, spsr_el1
+ and x0, x0, #~PSR_MODE_MASK
+ mov x1, #PSR_MODE_EL2h
+ orr x0, x0, x1
+ msr spsr_el1, x0
+
+ b enter_vhe
+SYM_CODE_END(__finalise_el2)
+
+ // At the point where we reach enter_vhe(), we run with
+ // the MMU off (which is enforced by __finalise_el2()).
+ // We thus need to be in the idmap, or everything will
+ // explode when enabling the MMU.
+
+ .pushsection .idmap.text, "ax"
+
+SYM_CODE_START_LOCAL(enter_vhe)
+ // Invalidate TLBs before enabling the MMU
+ tlbi vmalle1
+ dsb nsh
+ isb
+
+ // Enable the EL2 S1 MMU, as set up from EL1
+ mrs_s x0, SYS_SCTLR_EL12
+ set_sctlr_el1 x0
+
+ // Disable the EL1 S1 MMU for a good measure
+ mov_q x0, INIT_SCTLR_EL1_MMU_OFF
+ msr_s SYS_SCTLR_EL12, x0
+
+ mov x0, xzr
+
+ eret
+SYM_CODE_END(enter_vhe)
+
+ .popsection
.macro invalid_vector label
-\label:
+SYM_CODE_START_LOCAL(\label)
b \label
-ENDPROC(\label)
+SYM_CODE_END(\label)
.endm
invalid_vector el2_sync_invalid
@@ -85,6 +200,8 @@ ENDPROC(\label)
invalid_vector el1_fiq_invalid
invalid_vector el1_error_invalid
+ .popsection
+
/*
* __hyp_set_vectors: Call this after boot to set the initial hypervisor
* vectors as part of hypervisor installation. On an SMP system, this should
@@ -106,15 +223,36 @@ ENDPROC(\label)
* initialisation entry point.
*/
-ENTRY(__hyp_set_vectors)
+SYM_FUNC_START(__hyp_set_vectors)
mov x1, x0
mov x0, #HVC_SET_VECTORS
hvc #0
ret
-ENDPROC(__hyp_set_vectors)
+SYM_FUNC_END(__hyp_set_vectors)
-ENTRY(__hyp_reset_vectors)
+SYM_FUNC_START(__hyp_reset_vectors)
mov x0, #HVC_RESET_VECTORS
hvc #0
ret
-ENDPROC(__hyp_reset_vectors)
+SYM_FUNC_END(__hyp_reset_vectors)
+
+/*
+ * Entry point to finalise EL2 and switch to VHE if deemed capable
+ *
+ * w0: boot mode, as returned by init_kernel_el()
+ */
+SYM_FUNC_START(finalise_el2)
+ // Need to have booted at EL2
+ cmp w0, #BOOT_CPU_MODE_EL2
+ b.ne 1f
+
+ // and still be at EL1
+ mrs x0, CurrentEL
+ cmp x0, #CurrentEL_EL1
+ b.ne 1f
+
+ mov x0, #HVC_FINALISE_EL2
+ hvc #0
+1:
+ ret
+SYM_FUNC_END(finalise_el2)
diff --git a/arch/arm64/kernel/idle.c b/arch/arm64/kernel/idle.c
new file mode 100644
index 000000000000..05cfb347ec26
--- /dev/null
+++ b/arch/arm64/kernel/idle.c
@@ -0,0 +1,45 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Low-level idle sequences
+ */
+
+#include <linux/cpu.h>
+#include <linux/irqflags.h>
+
+#include <asm/barrier.h>
+#include <asm/cpuidle.h>
+#include <asm/cpufeature.h>
+#include <asm/sysreg.h>
+
+/*
+ * cpu_do_idle()
+ *
+ * Idle the processor (wait for interrupt).
+ *
+ * If the CPU supports priority masking we must do additional work to
+ * ensure that interrupts are not masked at the PMR (because the core will
+ * not wake up if we block the wake up signal in the interrupt controller).
+ */
+void __cpuidle cpu_do_idle(void)
+{
+ struct arm_cpuidle_irq_context context;
+
+ arm_cpuidle_save_irq_context(&context);
+
+ dsb(sy);
+ wfi();
+
+ arm_cpuidle_restore_irq_context(&context);
+}
+
+/*
+ * This is our default idle handler.
+ */
+void __cpuidle arch_cpu_idle(void)
+{
+ /*
+ * This should do all the clock switching and wait for interrupt
+ * tricks
+ */
+ cpu_do_idle();
+}
diff --git a/arch/arm64/kernel/idreg-override.c b/arch/arm64/kernel/idreg-override.c
new file mode 100644
index 000000000000..e30fd9e32ef3
--- /dev/null
+++ b/arch/arm64/kernel/idreg-override.c
@@ -0,0 +1,375 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Early cpufeature override framework
+ *
+ * Copyright (C) 2020 Google LLC
+ * Author: Marc Zyngier <maz@kernel.org>
+ */
+
+#include <linux/ctype.h>
+#include <linux/kernel.h>
+#include <linux/libfdt.h>
+
+#include <asm/cacheflush.h>
+#include <asm/cpufeature.h>
+#include <asm/setup.h>
+
+#define FTR_DESC_NAME_LEN 20
+#define FTR_DESC_FIELD_LEN 10
+#define FTR_ALIAS_NAME_LEN 30
+#define FTR_ALIAS_OPTION_LEN 116
+
+static u64 __boot_status __initdata;
+
+// temporary __prel64 related definitions
+// to be removed when this code is moved under pi/
+
+#define __prel64_initconst __initconst
+
+#define PREL64(type, name) union { type *name; }
+
+#define prel64_pointer(__d) (__d)
+
+typedef bool filter_t(u64 val);
+
+struct ftr_set_desc {
+ char name[FTR_DESC_NAME_LEN];
+ PREL64(struct arm64_ftr_override, override);
+ struct {
+ char name[FTR_DESC_FIELD_LEN];
+ u8 shift;
+ u8 width;
+ PREL64(filter_t, filter);
+ } fields[];
+};
+
+#define FIELD(n, s, f) { .name = n, .shift = s, .width = 4, .filter = f }
+
+static bool __init mmfr1_vh_filter(u64 val)
+{
+ /*
+ * If we ever reach this point while running VHE, we're
+ * guaranteed to be on one of these funky, VHE-stuck CPUs. If
+ * the user was trying to force nVHE on us, proceed with
+ * attitude adjustment.
+ */
+ return !(__boot_status == (BOOT_CPU_FLAG_E2H | BOOT_CPU_MODE_EL2) &&
+ val == 0);
+}
+
+static const struct ftr_set_desc mmfr1 __prel64_initconst = {
+ .name = "id_aa64mmfr1",
+ .override = &id_aa64mmfr1_override,
+ .fields = {
+ FIELD("vh", ID_AA64MMFR1_EL1_VH_SHIFT, mmfr1_vh_filter),
+ {}
+ },
+};
+
+static bool __init pfr0_sve_filter(u64 val)
+{
+ /*
+ * Disabling SVE also means disabling all the features that
+ * are associated with it. The easiest way to do it is just to
+ * override id_aa64zfr0_el1 to be 0.
+ */
+ if (!val) {
+ id_aa64zfr0_override.val = 0;
+ id_aa64zfr0_override.mask = GENMASK(63, 0);
+ }
+
+ return true;
+}
+
+static const struct ftr_set_desc pfr0 __prel64_initconst = {
+ .name = "id_aa64pfr0",
+ .override = &id_aa64pfr0_override,
+ .fields = {
+ FIELD("sve", ID_AA64PFR0_EL1_SVE_SHIFT, pfr0_sve_filter),
+ {}
+ },
+};
+
+static bool __init pfr1_sme_filter(u64 val)
+{
+ /*
+ * Similarly to SVE, disabling SME also means disabling all
+ * the features that are associated with it. Just set
+ * id_aa64smfr0_el1 to 0 and don't look back.
+ */
+ if (!val) {
+ id_aa64smfr0_override.val = 0;
+ id_aa64smfr0_override.mask = GENMASK(63, 0);
+ }
+
+ return true;
+}
+
+static const struct ftr_set_desc pfr1 __prel64_initconst = {
+ .name = "id_aa64pfr1",
+ .override = &id_aa64pfr1_override,
+ .fields = {
+ FIELD("bt", ID_AA64PFR1_EL1_BT_SHIFT, NULL ),
+ FIELD("mte", ID_AA64PFR1_EL1_MTE_SHIFT, NULL),
+ FIELD("sme", ID_AA64PFR1_EL1_SME_SHIFT, pfr1_sme_filter),
+ {}
+ },
+};
+
+static const struct ftr_set_desc isar1 __prel64_initconst = {
+ .name = "id_aa64isar1",
+ .override = &id_aa64isar1_override,
+ .fields = {
+ FIELD("gpi", ID_AA64ISAR1_EL1_GPI_SHIFT, NULL),
+ FIELD("gpa", ID_AA64ISAR1_EL1_GPA_SHIFT, NULL),
+ FIELD("api", ID_AA64ISAR1_EL1_API_SHIFT, NULL),
+ FIELD("apa", ID_AA64ISAR1_EL1_APA_SHIFT, NULL),
+ {}
+ },
+};
+
+static const struct ftr_set_desc isar2 __prel64_initconst = {
+ .name = "id_aa64isar2",
+ .override = &id_aa64isar2_override,
+ .fields = {
+ FIELD("gpa3", ID_AA64ISAR2_EL1_GPA3_SHIFT, NULL),
+ FIELD("apa3", ID_AA64ISAR2_EL1_APA3_SHIFT, NULL),
+ FIELD("mops", ID_AA64ISAR2_EL1_MOPS_SHIFT, NULL),
+ {}
+ },
+};
+
+static const struct ftr_set_desc smfr0 __prel64_initconst = {
+ .name = "id_aa64smfr0",
+ .override = &id_aa64smfr0_override,
+ .fields = {
+ FIELD("smever", ID_AA64SMFR0_EL1_SMEver_SHIFT, NULL),
+ /* FA64 is a one bit field... :-/ */
+ { "fa64", ID_AA64SMFR0_EL1_FA64_SHIFT, 1, },
+ {}
+ },
+};
+
+static bool __init hvhe_filter(u64 val)
+{
+ u64 mmfr1 = read_sysreg(id_aa64mmfr1_el1);
+
+ return (val == 1 &&
+ lower_32_bits(__boot_status) == BOOT_CPU_MODE_EL2 &&
+ cpuid_feature_extract_unsigned_field(mmfr1,
+ ID_AA64MMFR1_EL1_VH_SHIFT));
+}
+
+static const struct ftr_set_desc sw_features __prel64_initconst = {
+ .name = "arm64_sw",
+ .override = &arm64_sw_feature_override,
+ .fields = {
+ FIELD("nokaslr", ARM64_SW_FEATURE_OVERRIDE_NOKASLR, NULL),
+ FIELD("hvhe", ARM64_SW_FEATURE_OVERRIDE_HVHE, hvhe_filter),
+ {}
+ },
+};
+
+static const
+PREL64(const struct ftr_set_desc, reg) regs[] __prel64_initconst = {
+ { &mmfr1 },
+ { &pfr0 },
+ { &pfr1 },
+ { &isar1 },
+ { &isar2 },
+ { &smfr0 },
+ { &sw_features },
+};
+
+static const struct {
+ char alias[FTR_ALIAS_NAME_LEN];
+ char feature[FTR_ALIAS_OPTION_LEN];
+} aliases[] __initconst = {
+ { "kvm_arm.mode=nvhe", "id_aa64mmfr1.vh=0" },
+ { "kvm_arm.mode=protected", "id_aa64mmfr1.vh=0" },
+ { "arm64.nosve", "id_aa64pfr0.sve=0" },
+ { "arm64.nosme", "id_aa64pfr1.sme=0" },
+ { "arm64.nobti", "id_aa64pfr1.bt=0" },
+ { "arm64.nopauth",
+ "id_aa64isar1.gpi=0 id_aa64isar1.gpa=0 "
+ "id_aa64isar1.api=0 id_aa64isar1.apa=0 "
+ "id_aa64isar2.gpa3=0 id_aa64isar2.apa3=0" },
+ { "arm64.nomops", "id_aa64isar2.mops=0" },
+ { "arm64.nomte", "id_aa64pfr1.mte=0" },
+ { "nokaslr", "arm64_sw.nokaslr=1" },
+};
+
+static int __init parse_hexdigit(const char *p, u64 *v)
+{
+ // skip "0x" if it comes next
+ if (p[0] == '0' && tolower(p[1]) == 'x')
+ p += 2;
+
+ // check whether the RHS is a single hex digit
+ if (!isxdigit(p[0]) || (p[1] && !isspace(p[1])))
+ return -EINVAL;
+
+ *v = tolower(*p) - (isdigit(*p) ? '0' : 'a' - 10);
+ return 0;
+}
+
+static int __init find_field(const char *cmdline, char *opt, int len,
+ const struct ftr_set_desc *reg, int f, u64 *v)
+{
+ int flen = strlen(reg->fields[f].name);
+
+ // append '<fieldname>=' to obtain '<name>.<fieldname>='
+ memcpy(opt + len, reg->fields[f].name, flen);
+ len += flen;
+ opt[len++] = '=';
+
+ if (memcmp(cmdline, opt, len))
+ return -1;
+
+ return parse_hexdigit(cmdline + len, v);
+}
+
+static void __init match_options(const char *cmdline)
+{
+ char opt[FTR_DESC_NAME_LEN + FTR_DESC_FIELD_LEN + 2];
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(regs); i++) {
+ const struct ftr_set_desc *reg = prel64_pointer(regs[i].reg);
+ struct arm64_ftr_override *override;
+ int len = strlen(reg->name);
+ int f;
+
+ override = prel64_pointer(reg->override);
+
+ // set opt[] to '<name>.'
+ memcpy(opt, reg->name, len);
+ opt[len++] = '.';
+
+ for (f = 0; reg->fields[f].name[0] != '\0'; f++) {
+ u64 shift = reg->fields[f].shift;
+ u64 width = reg->fields[f].width ?: 4;
+ u64 mask = GENMASK_ULL(shift + width - 1, shift);
+ bool (*filter)(u64 val);
+ u64 v;
+
+ if (find_field(cmdline, opt, len, reg, f, &v))
+ continue;
+
+ /*
+ * If an override gets filtered out, advertise
+ * it by setting the value to the all-ones while
+ * clearing the mask... Yes, this is fragile.
+ */
+ filter = prel64_pointer(reg->fields[f].filter);
+ if (filter && !filter(v)) {
+ override->val |= mask;
+ override->mask &= ~mask;
+ continue;
+ }
+
+ override->val &= ~mask;
+ override->val |= (v << shift) & mask;
+ override->mask |= mask;
+
+ return;
+ }
+ }
+}
+
+static __init void __parse_cmdline(const char *cmdline, bool parse_aliases)
+{
+ do {
+ char buf[256];
+ size_t len;
+ int i;
+
+ cmdline = skip_spaces(cmdline);
+
+ /* terminate on "--" appearing on the command line by itself */
+ if (cmdline[0] == '-' && cmdline[1] == '-' && isspace(cmdline[2]))
+ return;
+
+ for (len = 0; cmdline[len] && !isspace(cmdline[len]); len++) {
+ if (len >= sizeof(buf) - 1)
+ break;
+ if (cmdline[len] == '-')
+ buf[len] = '_';
+ else
+ buf[len] = cmdline[len];
+ }
+ if (!len)
+ return;
+
+ buf[len] = 0;
+
+ cmdline += len;
+
+ match_options(buf);
+
+ for (i = 0; parse_aliases && i < ARRAY_SIZE(aliases); i++)
+ if (!memcmp(buf, aliases[i].alias, len + 1))
+ __parse_cmdline(aliases[i].feature, false);
+ } while (1);
+}
+
+static __init const u8 *get_bootargs_cmdline(void)
+{
+ const u8 *prop;
+ void *fdt;
+ int node;
+
+ fdt = get_early_fdt_ptr();
+ if (!fdt)
+ return NULL;
+
+ node = fdt_path_offset(fdt, "/chosen");
+ if (node < 0)
+ return NULL;
+
+ prop = fdt_getprop(fdt, node, "bootargs", NULL);
+ if (!prop)
+ return NULL;
+
+ return strlen(prop) ? prop : NULL;
+}
+
+static __init void parse_cmdline(void)
+{
+ const u8 *prop = get_bootargs_cmdline();
+
+ if (IS_ENABLED(CONFIG_CMDLINE_FORCE) || !prop)
+ __parse_cmdline(CONFIG_CMDLINE, true);
+
+ if (!IS_ENABLED(CONFIG_CMDLINE_FORCE) && prop)
+ __parse_cmdline(prop, true);
+}
+
+/* Keep checkers quiet */
+void init_feature_override(u64 boot_status);
+
+asmlinkage void __init init_feature_override(u64 boot_status)
+{
+ struct arm64_ftr_override *override;
+ const struct ftr_set_desc *reg;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(regs); i++) {
+ reg = prel64_pointer(regs[i].reg);
+ override = prel64_pointer(reg->override);
+
+ override->val = 0;
+ override->mask = 0;
+ }
+
+ __boot_status = boot_status;
+
+ parse_cmdline();
+
+ for (i = 0; i < ARRAY_SIZE(regs); i++) {
+ reg = prel64_pointer(regs[i].reg);
+ override = prel64_pointer(reg->override);
+ dcache_clean_inval_poc((unsigned long)override,
+ (unsigned long)(override + 1));
+ }
+}
diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h
index 25a2a9b479c2..5e4dc72ab1bd 100644
--- a/arch/arm64/kernel/image-vars.h
+++ b/arch/arm64/kernel/image-vars.h
@@ -10,9 +10,7 @@
#error This file should only be included in vmlinux.lds.S
#endif
-#ifdef CONFIG_EFI
-
-__efistub_stext_offset = stext - _text;
+PROVIDE(__efistub_primary_entry = primary_entry);
/*
* The EFI stub has its own symbol namespace prefixed by __efistub_, to
@@ -23,29 +21,97 @@ __efistub_stext_offset = stext - _text;
* linked at. The routines below are all implemented in assembler in a
* position independent manner
*/
-__efistub_memcmp = __pi_memcmp;
-__efistub_memchr = __pi_memchr;
-__efistub_memcpy = __pi_memcpy;
-__efistub_memmove = __pi_memmove;
-__efistub_memset = __pi_memset;
-__efistub_strlen = __pi_strlen;
-__efistub_strnlen = __pi_strnlen;
-__efistub_strcmp = __pi_strcmp;
-__efistub_strncmp = __pi_strncmp;
-__efistub_strrchr = __pi_strrchr;
-__efistub___flush_dcache_area = __pi___flush_dcache_area;
+PROVIDE(__efistub_caches_clean_inval_pou = __pi_caches_clean_inval_pou);
+
+PROVIDE(__efistub__text = _text);
+PROVIDE(__efistub__end = _end);
+PROVIDE(__efistub___inittext_end = __inittext_end);
+PROVIDE(__efistub__edata = _edata);
+#if defined(CONFIG_EFI_EARLYCON) || defined(CONFIG_SYSFB)
+PROVIDE(__efistub_screen_info = screen_info);
+#endif
+PROVIDE(__efistub__ctype = _ctype);
+
+PROVIDE(__pi___memcpy = __pi_memcpy);
+PROVIDE(__pi___memmove = __pi_memmove);
+PROVIDE(__pi___memset = __pi_memset);
+
+#ifdef CONFIG_KVM
+
+/*
+ * KVM nVHE code has its own symbol namespace prefixed with __kvm_nvhe_, to
+ * separate it from the kernel proper. The following symbols are legally
+ * accessed by it, therefore provide aliases to make them linkable.
+ * Do not include symbols which may not be safely accessed under hypervisor
+ * memory mappings.
+ */
+
+/* Alternative callbacks for init-time patching of nVHE hyp code. */
+KVM_NVHE_ALIAS(kvm_patch_vector_branch);
+KVM_NVHE_ALIAS(kvm_update_va_mask);
+KVM_NVHE_ALIAS(kvm_get_kimage_voffset);
+KVM_NVHE_ALIAS(kvm_compute_final_ctr_el0);
+KVM_NVHE_ALIAS(spectre_bhb_patch_loop_iter);
+KVM_NVHE_ALIAS(spectre_bhb_patch_loop_mitigation_enable);
+KVM_NVHE_ALIAS(spectre_bhb_patch_wa3);
+KVM_NVHE_ALIAS(spectre_bhb_patch_clearbhb);
+KVM_NVHE_ALIAS(alt_cb_patch_nops);
+
+/* Global kernel state accessed by nVHE hyp code. */
+KVM_NVHE_ALIAS(kvm_vgic_global_state);
+
+/* Kernel symbols used to call panic() from nVHE hyp code (via ERET). */
+KVM_NVHE_ALIAS(nvhe_hyp_panic_handler);
+
+/* Vectors installed by hyp-init on reset HVC. */
+KVM_NVHE_ALIAS(__hyp_stub_vectors);
+
+/* Static keys which are set if a vGIC trap should be handled in hyp. */
+KVM_NVHE_ALIAS(vgic_v2_cpuif_trap);
+KVM_NVHE_ALIAS(vgic_v3_cpuif_trap);
+
+#ifdef CONFIG_ARM64_PSEUDO_NMI
+/* Static key checked in GIC_PRIO_IRQOFF. */
+KVM_NVHE_ALIAS(gic_nonsecure_priorities);
+#endif
+
+/* EL2 exception handling */
+KVM_NVHE_ALIAS(__start___kvm_ex_table);
+KVM_NVHE_ALIAS(__stop___kvm_ex_table);
+
+/* PMU available static key */
+#ifdef CONFIG_HW_PERF_EVENTS
+KVM_NVHE_ALIAS(kvm_arm_pmu_available);
+#endif
+
+/* Position-independent library routines */
+KVM_NVHE_ALIAS_HYP(clear_page, __pi_clear_page);
+KVM_NVHE_ALIAS_HYP(copy_page, __pi_copy_page);
+KVM_NVHE_ALIAS_HYP(memcpy, __pi_memcpy);
+KVM_NVHE_ALIAS_HYP(memset, __pi_memset);
#ifdef CONFIG_KASAN
-__efistub___memcpy = __pi_memcpy;
-__efistub___memmove = __pi_memmove;
-__efistub___memset = __pi_memset;
+KVM_NVHE_ALIAS_HYP(__memcpy, __pi_memcpy);
+KVM_NVHE_ALIAS_HYP(__memset, __pi_memset);
#endif
-__efistub__text = _text;
-__efistub__end = _end;
-__efistub__edata = _edata;
-__efistub_screen_info = screen_info;
+/* Hyp memory sections */
+KVM_NVHE_ALIAS(__hyp_idmap_text_start);
+KVM_NVHE_ALIAS(__hyp_idmap_text_end);
+KVM_NVHE_ALIAS(__hyp_text_start);
+KVM_NVHE_ALIAS(__hyp_text_end);
+KVM_NVHE_ALIAS(__hyp_bss_start);
+KVM_NVHE_ALIAS(__hyp_bss_end);
+KVM_NVHE_ALIAS(__hyp_rodata_start);
+KVM_NVHE_ALIAS(__hyp_rodata_end);
+
+/* pKVM static key */
+KVM_NVHE_ALIAS(kvm_protected_mode_initialized);
+
+#endif /* CONFIG_KVM */
+#ifdef CONFIG_EFI_ZBOOT
+_kernel_codesize = ABSOLUTE(__inittext_end - _text);
#endif
#endif /* __ARM64_KERNEL_IMAGE_VARS_H */
diff --git a/arch/arm64/kernel/image.h b/arch/arm64/kernel/image.h
index c7d38c660372..7bc3ba897901 100644
--- a/arch/arm64/kernel/image.h
+++ b/arch/arm64/kernel/image.h
@@ -62,7 +62,6 @@
*/
#define HEAD_SYMBOLS \
DEFINE_IMAGE_LE64(_kernel_size_le, _end - _text); \
- DEFINE_IMAGE_LE64(_kernel_offset_le, TEXT_OFFSET); \
DEFINE_IMAGE_LE64(_kernel_flags_le, __HEAD_FLAGS);
#endif /* __ARM64_KERNEL_IMAGE_H */
diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c
deleted file mode 100644
index 4a9e773a177f..000000000000
--- a/arch/arm64/kernel/insn.c
+++ /dev/null
@@ -1,1690 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) 2013 Huawei Ltd.
- * Author: Jiang Liu <liuj97@gmail.com>
- *
- * Copyright (C) 2014-2016 Zi Shen Lim <zlim.lnx@gmail.com>
- */
-#include <linux/bitops.h>
-#include <linux/bug.h>
-#include <linux/compiler.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/smp.h>
-#include <linux/spinlock.h>
-#include <linux/stop_machine.h>
-#include <linux/types.h>
-#include <linux/uaccess.h>
-
-#include <asm/cacheflush.h>
-#include <asm/debug-monitors.h>
-#include <asm/fixmap.h>
-#include <asm/insn.h>
-#include <asm/kprobes.h>
-#include <asm/sections.h>
-
-#define AARCH64_INSN_SF_BIT BIT(31)
-#define AARCH64_INSN_N_BIT BIT(22)
-#define AARCH64_INSN_LSL_12 BIT(22)
-
-static const int aarch64_insn_encoding_class[] = {
- AARCH64_INSN_CLS_UNKNOWN,
- AARCH64_INSN_CLS_UNKNOWN,
- AARCH64_INSN_CLS_UNKNOWN,
- AARCH64_INSN_CLS_UNKNOWN,
- AARCH64_INSN_CLS_LDST,
- AARCH64_INSN_CLS_DP_REG,
- AARCH64_INSN_CLS_LDST,
- AARCH64_INSN_CLS_DP_FPSIMD,
- AARCH64_INSN_CLS_DP_IMM,
- AARCH64_INSN_CLS_DP_IMM,
- AARCH64_INSN_CLS_BR_SYS,
- AARCH64_INSN_CLS_BR_SYS,
- AARCH64_INSN_CLS_LDST,
- AARCH64_INSN_CLS_DP_REG,
- AARCH64_INSN_CLS_LDST,
- AARCH64_INSN_CLS_DP_FPSIMD,
-};
-
-enum aarch64_insn_encoding_class __kprobes aarch64_get_insn_class(u32 insn)
-{
- return aarch64_insn_encoding_class[(insn >> 25) & 0xf];
-}
-
-/* NOP is an alias of HINT */
-bool __kprobes aarch64_insn_is_nop(u32 insn)
-{
- if (!aarch64_insn_is_hint(insn))
- return false;
-
- switch (insn & 0xFE0) {
- case AARCH64_INSN_HINT_YIELD:
- case AARCH64_INSN_HINT_WFE:
- case AARCH64_INSN_HINT_WFI:
- case AARCH64_INSN_HINT_SEV:
- case AARCH64_INSN_HINT_SEVL:
- return false;
- default:
- return true;
- }
-}
-
-bool aarch64_insn_is_branch_imm(u32 insn)
-{
- return (aarch64_insn_is_b(insn) || aarch64_insn_is_bl(insn) ||
- aarch64_insn_is_tbz(insn) || aarch64_insn_is_tbnz(insn) ||
- aarch64_insn_is_cbz(insn) || aarch64_insn_is_cbnz(insn) ||
- aarch64_insn_is_bcond(insn));
-}
-
-static DEFINE_RAW_SPINLOCK(patch_lock);
-
-static bool is_exit_text(unsigned long addr)
-{
- /* discarded with init text/data */
- return system_state < SYSTEM_RUNNING &&
- addr >= (unsigned long)__exittext_begin &&
- addr < (unsigned long)__exittext_end;
-}
-
-static bool is_image_text(unsigned long addr)
-{
- return core_kernel_text(addr) || is_exit_text(addr);
-}
-
-static void __kprobes *patch_map(void *addr, int fixmap)
-{
- unsigned long uintaddr = (uintptr_t) addr;
- bool image = is_image_text(uintaddr);
- struct page *page;
-
- if (image)
- page = phys_to_page(__pa_symbol(addr));
- else if (IS_ENABLED(CONFIG_STRICT_MODULE_RWX))
- page = vmalloc_to_page(addr);
- else
- return addr;
-
- BUG_ON(!page);
- return (void *)set_fixmap_offset(fixmap, page_to_phys(page) +
- (uintaddr & ~PAGE_MASK));
-}
-
-static void __kprobes patch_unmap(int fixmap)
-{
- clear_fixmap(fixmap);
-}
-/*
- * In ARMv8-A, A64 instructions have a fixed length of 32 bits and are always
- * little-endian.
- */
-int __kprobes aarch64_insn_read(void *addr, u32 *insnp)
-{
- int ret;
- __le32 val;
-
- ret = probe_kernel_read(&val, addr, AARCH64_INSN_SIZE);
- if (!ret)
- *insnp = le32_to_cpu(val);
-
- return ret;
-}
-
-static int __kprobes __aarch64_insn_write(void *addr, __le32 insn)
-{
- void *waddr = addr;
- unsigned long flags = 0;
- int ret;
-
- raw_spin_lock_irqsave(&patch_lock, flags);
- waddr = patch_map(addr, FIX_TEXT_POKE0);
-
- ret = probe_kernel_write(waddr, &insn, AARCH64_INSN_SIZE);
-
- patch_unmap(FIX_TEXT_POKE0);
- raw_spin_unlock_irqrestore(&patch_lock, flags);
-
- return ret;
-}
-
-int __kprobes aarch64_insn_write(void *addr, u32 insn)
-{
- return __aarch64_insn_write(addr, cpu_to_le32(insn));
-}
-
-bool __kprobes aarch64_insn_uses_literal(u32 insn)
-{
- /* ldr/ldrsw (literal), prfm */
-
- return aarch64_insn_is_ldr_lit(insn) ||
- aarch64_insn_is_ldrsw_lit(insn) ||
- aarch64_insn_is_adr_adrp(insn) ||
- aarch64_insn_is_prfm_lit(insn);
-}
-
-bool __kprobes aarch64_insn_is_branch(u32 insn)
-{
- /* b, bl, cb*, tb*, b.cond, br, blr */
-
- return aarch64_insn_is_b(insn) ||
- aarch64_insn_is_bl(insn) ||
- aarch64_insn_is_cbz(insn) ||
- aarch64_insn_is_cbnz(insn) ||
- aarch64_insn_is_tbz(insn) ||
- aarch64_insn_is_tbnz(insn) ||
- aarch64_insn_is_ret(insn) ||
- aarch64_insn_is_br(insn) ||
- aarch64_insn_is_blr(insn) ||
- aarch64_insn_is_bcond(insn);
-}
-
-int __kprobes aarch64_insn_patch_text_nosync(void *addr, u32 insn)
-{
- u32 *tp = addr;
- int ret;
-
- /* A64 instructions must be word aligned */
- if ((uintptr_t)tp & 0x3)
- return -EINVAL;
-
- ret = aarch64_insn_write(tp, insn);
- if (ret == 0)
- __flush_icache_range((uintptr_t)tp,
- (uintptr_t)tp + AARCH64_INSN_SIZE);
-
- return ret;
-}
-
-struct aarch64_insn_patch {
- void **text_addrs;
- u32 *new_insns;
- int insn_cnt;
- atomic_t cpu_count;
-};
-
-static int __kprobes aarch64_insn_patch_text_cb(void *arg)
-{
- int i, ret = 0;
- struct aarch64_insn_patch *pp = arg;
-
- /* The first CPU becomes master */
- if (atomic_inc_return(&pp->cpu_count) == 1) {
- for (i = 0; ret == 0 && i < pp->insn_cnt; i++)
- ret = aarch64_insn_patch_text_nosync(pp->text_addrs[i],
- pp->new_insns[i]);
- /* Notify other processors with an additional increment. */
- atomic_inc(&pp->cpu_count);
- } else {
- while (atomic_read(&pp->cpu_count) <= num_online_cpus())
- cpu_relax();
- isb();
- }
-
- return ret;
-}
-
-int __kprobes aarch64_insn_patch_text(void *addrs[], u32 insns[], int cnt)
-{
- struct aarch64_insn_patch patch = {
- .text_addrs = addrs,
- .new_insns = insns,
- .insn_cnt = cnt,
- .cpu_count = ATOMIC_INIT(0),
- };
-
- if (cnt <= 0)
- return -EINVAL;
-
- return stop_machine_cpuslocked(aarch64_insn_patch_text_cb, &patch,
- cpu_online_mask);
-}
-
-static int __kprobes aarch64_get_imm_shift_mask(enum aarch64_insn_imm_type type,
- u32 *maskp, int *shiftp)
-{
- u32 mask;
- int shift;
-
- switch (type) {
- case AARCH64_INSN_IMM_26:
- mask = BIT(26) - 1;
- shift = 0;
- break;
- case AARCH64_INSN_IMM_19:
- mask = BIT(19) - 1;
- shift = 5;
- break;
- case AARCH64_INSN_IMM_16:
- mask = BIT(16) - 1;
- shift = 5;
- break;
- case AARCH64_INSN_IMM_14:
- mask = BIT(14) - 1;
- shift = 5;
- break;
- case AARCH64_INSN_IMM_12:
- mask = BIT(12) - 1;
- shift = 10;
- break;
- case AARCH64_INSN_IMM_9:
- mask = BIT(9) - 1;
- shift = 12;
- break;
- case AARCH64_INSN_IMM_7:
- mask = BIT(7) - 1;
- shift = 15;
- break;
- case AARCH64_INSN_IMM_6:
- case AARCH64_INSN_IMM_S:
- mask = BIT(6) - 1;
- shift = 10;
- break;
- case AARCH64_INSN_IMM_R:
- mask = BIT(6) - 1;
- shift = 16;
- break;
- case AARCH64_INSN_IMM_N:
- mask = 1;
- shift = 22;
- break;
- default:
- return -EINVAL;
- }
-
- *maskp = mask;
- *shiftp = shift;
-
- return 0;
-}
-
-#define ADR_IMM_HILOSPLIT 2
-#define ADR_IMM_SIZE SZ_2M
-#define ADR_IMM_LOMASK ((1 << ADR_IMM_HILOSPLIT) - 1)
-#define ADR_IMM_HIMASK ((ADR_IMM_SIZE >> ADR_IMM_HILOSPLIT) - 1)
-#define ADR_IMM_LOSHIFT 29
-#define ADR_IMM_HISHIFT 5
-
-u64 aarch64_insn_decode_immediate(enum aarch64_insn_imm_type type, u32 insn)
-{
- u32 immlo, immhi, mask;
- int shift;
-
- switch (type) {
- case AARCH64_INSN_IMM_ADR:
- shift = 0;
- immlo = (insn >> ADR_IMM_LOSHIFT) & ADR_IMM_LOMASK;
- immhi = (insn >> ADR_IMM_HISHIFT) & ADR_IMM_HIMASK;
- insn = (immhi << ADR_IMM_HILOSPLIT) | immlo;
- mask = ADR_IMM_SIZE - 1;
- break;
- default:
- if (aarch64_get_imm_shift_mask(type, &mask, &shift) < 0) {
- pr_err("aarch64_insn_decode_immediate: unknown immediate encoding %d\n",
- type);
- return 0;
- }
- }
-
- return (insn >> shift) & mask;
-}
-
-u32 __kprobes aarch64_insn_encode_immediate(enum aarch64_insn_imm_type type,
- u32 insn, u64 imm)
-{
- u32 immlo, immhi, mask;
- int shift;
-
- if (insn == AARCH64_BREAK_FAULT)
- return AARCH64_BREAK_FAULT;
-
- switch (type) {
- case AARCH64_INSN_IMM_ADR:
- shift = 0;
- immlo = (imm & ADR_IMM_LOMASK) << ADR_IMM_LOSHIFT;
- imm >>= ADR_IMM_HILOSPLIT;
- immhi = (imm & ADR_IMM_HIMASK) << ADR_IMM_HISHIFT;
- imm = immlo | immhi;
- mask = ((ADR_IMM_LOMASK << ADR_IMM_LOSHIFT) |
- (ADR_IMM_HIMASK << ADR_IMM_HISHIFT));
- break;
- default:
- if (aarch64_get_imm_shift_mask(type, &mask, &shift) < 0) {
- pr_err("aarch64_insn_encode_immediate: unknown immediate encoding %d\n",
- type);
- return AARCH64_BREAK_FAULT;
- }
- }
-
- /* Update the immediate field. */
- insn &= ~(mask << shift);
- insn |= (imm & mask) << shift;
-
- return insn;
-}
-
-u32 aarch64_insn_decode_register(enum aarch64_insn_register_type type,
- u32 insn)
-{
- int shift;
-
- switch (type) {
- case AARCH64_INSN_REGTYPE_RT:
- case AARCH64_INSN_REGTYPE_RD:
- shift = 0;
- break;
- case AARCH64_INSN_REGTYPE_RN:
- shift = 5;
- break;
- case AARCH64_INSN_REGTYPE_RT2:
- case AARCH64_INSN_REGTYPE_RA:
- shift = 10;
- break;
- case AARCH64_INSN_REGTYPE_RM:
- shift = 16;
- break;
- default:
- pr_err("%s: unknown register type encoding %d\n", __func__,
- type);
- return 0;
- }
-
- return (insn >> shift) & GENMASK(4, 0);
-}
-
-static u32 aarch64_insn_encode_register(enum aarch64_insn_register_type type,
- u32 insn,
- enum aarch64_insn_register reg)
-{
- int shift;
-
- if (insn == AARCH64_BREAK_FAULT)
- return AARCH64_BREAK_FAULT;
-
- if (reg < AARCH64_INSN_REG_0 || reg > AARCH64_INSN_REG_SP) {
- pr_err("%s: unknown register encoding %d\n", __func__, reg);
- return AARCH64_BREAK_FAULT;
- }
-
- switch (type) {
- case AARCH64_INSN_REGTYPE_RT:
- case AARCH64_INSN_REGTYPE_RD:
- shift = 0;
- break;
- case AARCH64_INSN_REGTYPE_RN:
- shift = 5;
- break;
- case AARCH64_INSN_REGTYPE_RT2:
- case AARCH64_INSN_REGTYPE_RA:
- shift = 10;
- break;
- case AARCH64_INSN_REGTYPE_RM:
- case AARCH64_INSN_REGTYPE_RS:
- shift = 16;
- break;
- default:
- pr_err("%s: unknown register type encoding %d\n", __func__,
- type);
- return AARCH64_BREAK_FAULT;
- }
-
- insn &= ~(GENMASK(4, 0) << shift);
- insn |= reg << shift;
-
- return insn;
-}
-
-static u32 aarch64_insn_encode_ldst_size(enum aarch64_insn_size_type type,
- u32 insn)
-{
- u32 size;
-
- switch (type) {
- case AARCH64_INSN_SIZE_8:
- size = 0;
- break;
- case AARCH64_INSN_SIZE_16:
- size = 1;
- break;
- case AARCH64_INSN_SIZE_32:
- size = 2;
- break;
- case AARCH64_INSN_SIZE_64:
- size = 3;
- break;
- default:
- pr_err("%s: unknown size encoding %d\n", __func__, type);
- return AARCH64_BREAK_FAULT;
- }
-
- insn &= ~GENMASK(31, 30);
- insn |= size << 30;
-
- return insn;
-}
-
-static inline long branch_imm_common(unsigned long pc, unsigned long addr,
- long range)
-{
- long offset;
-
- if ((pc & 0x3) || (addr & 0x3)) {
- pr_err("%s: A64 instructions must be word aligned\n", __func__);
- return range;
- }
-
- offset = ((long)addr - (long)pc);
-
- if (offset < -range || offset >= range) {
- pr_err("%s: offset out of range\n", __func__);
- return range;
- }
-
- return offset;
-}
-
-u32 __kprobes aarch64_insn_gen_branch_imm(unsigned long pc, unsigned long addr,
- enum aarch64_insn_branch_type type)
-{
- u32 insn;
- long offset;
-
- /*
- * B/BL support [-128M, 128M) offset
- * ARM64 virtual address arrangement guarantees all kernel and module
- * texts are within +/-128M.
- */
- offset = branch_imm_common(pc, addr, SZ_128M);
- if (offset >= SZ_128M)
- return AARCH64_BREAK_FAULT;
-
- switch (type) {
- case AARCH64_INSN_BRANCH_LINK:
- insn = aarch64_insn_get_bl_value();
- break;
- case AARCH64_INSN_BRANCH_NOLINK:
- insn = aarch64_insn_get_b_value();
- break;
- default:
- pr_err("%s: unknown branch encoding %d\n", __func__, type);
- return AARCH64_BREAK_FAULT;
- }
-
- return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_26, insn,
- offset >> 2);
-}
-
-u32 aarch64_insn_gen_comp_branch_imm(unsigned long pc, unsigned long addr,
- enum aarch64_insn_register reg,
- enum aarch64_insn_variant variant,
- enum aarch64_insn_branch_type type)
-{
- u32 insn;
- long offset;
-
- offset = branch_imm_common(pc, addr, SZ_1M);
- if (offset >= SZ_1M)
- return AARCH64_BREAK_FAULT;
-
- switch (type) {
- case AARCH64_INSN_BRANCH_COMP_ZERO:
- insn = aarch64_insn_get_cbz_value();
- break;
- case AARCH64_INSN_BRANCH_COMP_NONZERO:
- insn = aarch64_insn_get_cbnz_value();
- break;
- default:
- pr_err("%s: unknown branch encoding %d\n", __func__, type);
- return AARCH64_BREAK_FAULT;
- }
-
- switch (variant) {
- case AARCH64_INSN_VARIANT_32BIT:
- break;
- case AARCH64_INSN_VARIANT_64BIT:
- insn |= AARCH64_INSN_SF_BIT;
- break;
- default:
- pr_err("%s: unknown variant encoding %d\n", __func__, variant);
- return AARCH64_BREAK_FAULT;
- }
-
- insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RT, insn, reg);
-
- return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_19, insn,
- offset >> 2);
-}
-
-u32 aarch64_insn_gen_cond_branch_imm(unsigned long pc, unsigned long addr,
- enum aarch64_insn_condition cond)
-{
- u32 insn;
- long offset;
-
- offset = branch_imm_common(pc, addr, SZ_1M);
-
- insn = aarch64_insn_get_bcond_value();
-
- if (cond < AARCH64_INSN_COND_EQ || cond > AARCH64_INSN_COND_AL) {
- pr_err("%s: unknown condition encoding %d\n", __func__, cond);
- return AARCH64_BREAK_FAULT;
- }
- insn |= cond;
-
- return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_19, insn,
- offset >> 2);
-}
-
-u32 __kprobes aarch64_insn_gen_hint(enum aarch64_insn_hint_op op)
-{
- return aarch64_insn_get_hint_value() | op;
-}
-
-u32 __kprobes aarch64_insn_gen_nop(void)
-{
- return aarch64_insn_gen_hint(AARCH64_INSN_HINT_NOP);
-}
-
-u32 aarch64_insn_gen_branch_reg(enum aarch64_insn_register reg,
- enum aarch64_insn_branch_type type)
-{
- u32 insn;
-
- switch (type) {
- case AARCH64_INSN_BRANCH_NOLINK:
- insn = aarch64_insn_get_br_value();
- break;
- case AARCH64_INSN_BRANCH_LINK:
- insn = aarch64_insn_get_blr_value();
- break;
- case AARCH64_INSN_BRANCH_RETURN:
- insn = aarch64_insn_get_ret_value();
- break;
- default:
- pr_err("%s: unknown branch encoding %d\n", __func__, type);
- return AARCH64_BREAK_FAULT;
- }
-
- return aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn, reg);
-}
-
-u32 aarch64_insn_gen_load_store_reg(enum aarch64_insn_register reg,
- enum aarch64_insn_register base,
- enum aarch64_insn_register offset,
- enum aarch64_insn_size_type size,
- enum aarch64_insn_ldst_type type)
-{
- u32 insn;
-
- switch (type) {
- case AARCH64_INSN_LDST_LOAD_REG_OFFSET:
- insn = aarch64_insn_get_ldr_reg_value();
- break;
- case AARCH64_INSN_LDST_STORE_REG_OFFSET:
- insn = aarch64_insn_get_str_reg_value();
- break;
- default:
- pr_err("%s: unknown load/store encoding %d\n", __func__, type);
- return AARCH64_BREAK_FAULT;
- }
-
- insn = aarch64_insn_encode_ldst_size(size, insn);
-
- insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RT, insn, reg);
-
- insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn,
- base);
-
- return aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RM, insn,
- offset);
-}
-
-u32 aarch64_insn_gen_load_store_pair(enum aarch64_insn_register reg1,
- enum aarch64_insn_register reg2,
- enum aarch64_insn_register base,
- int offset,
- enum aarch64_insn_variant variant,
- enum aarch64_insn_ldst_type type)
-{
- u32 insn;
- int shift;
-
- switch (type) {
- case AARCH64_INSN_LDST_LOAD_PAIR_PRE_INDEX:
- insn = aarch64_insn_get_ldp_pre_value();
- break;
- case AARCH64_INSN_LDST_STORE_PAIR_PRE_INDEX:
- insn = aarch64_insn_get_stp_pre_value();
- break;
- case AARCH64_INSN_LDST_LOAD_PAIR_POST_INDEX:
- insn = aarch64_insn_get_ldp_post_value();
- break;
- case AARCH64_INSN_LDST_STORE_PAIR_POST_INDEX:
- insn = aarch64_insn_get_stp_post_value();
- break;
- default:
- pr_err("%s: unknown load/store encoding %d\n", __func__, type);
- return AARCH64_BREAK_FAULT;
- }
-
- switch (variant) {
- case AARCH64_INSN_VARIANT_32BIT:
- if ((offset & 0x3) || (offset < -256) || (offset > 252)) {
- pr_err("%s: offset must be multiples of 4 in the range of [-256, 252] %d\n",
- __func__, offset);
- return AARCH64_BREAK_FAULT;
- }
- shift = 2;
- break;
- case AARCH64_INSN_VARIANT_64BIT:
- if ((offset & 0x7) || (offset < -512) || (offset > 504)) {
- pr_err("%s: offset must be multiples of 8 in the range of [-512, 504] %d\n",
- __func__, offset);
- return AARCH64_BREAK_FAULT;
- }
- shift = 3;
- insn |= AARCH64_INSN_SF_BIT;
- break;
- default:
- pr_err("%s: unknown variant encoding %d\n", __func__, variant);
- return AARCH64_BREAK_FAULT;
- }
-
- insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RT, insn,
- reg1);
-
- insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RT2, insn,
- reg2);
-
- insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn,
- base);
-
- return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_7, insn,
- offset >> shift);
-}
-
-u32 aarch64_insn_gen_load_store_ex(enum aarch64_insn_register reg,
- enum aarch64_insn_register base,
- enum aarch64_insn_register state,
- enum aarch64_insn_size_type size,
- enum aarch64_insn_ldst_type type)
-{
- u32 insn;
-
- switch (type) {
- case AARCH64_INSN_LDST_LOAD_EX:
- insn = aarch64_insn_get_load_ex_value();
- break;
- case AARCH64_INSN_LDST_STORE_EX:
- insn = aarch64_insn_get_store_ex_value();
- break;
- default:
- pr_err("%s: unknown load/store exclusive encoding %d\n", __func__, type);
- return AARCH64_BREAK_FAULT;
- }
-
- insn = aarch64_insn_encode_ldst_size(size, insn);
-
- insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RT, insn,
- reg);
-
- insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn,
- base);
-
- insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RT2, insn,
- AARCH64_INSN_REG_ZR);
-
- return aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RS, insn,
- state);
-}
-
-u32 aarch64_insn_gen_ldadd(enum aarch64_insn_register result,
- enum aarch64_insn_register address,
- enum aarch64_insn_register value,
- enum aarch64_insn_size_type size)
-{
- u32 insn = aarch64_insn_get_ldadd_value();
-
- switch (size) {
- case AARCH64_INSN_SIZE_32:
- case AARCH64_INSN_SIZE_64:
- break;
- default:
- pr_err("%s: unimplemented size encoding %d\n", __func__, size);
- return AARCH64_BREAK_FAULT;
- }
-
- insn = aarch64_insn_encode_ldst_size(size, insn);
-
- insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RT, insn,
- result);
-
- insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn,
- address);
-
- return aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RS, insn,
- value);
-}
-
-u32 aarch64_insn_gen_stadd(enum aarch64_insn_register address,
- enum aarch64_insn_register value,
- enum aarch64_insn_size_type size)
-{
- /*
- * STADD is simply encoded as an alias for LDADD with XZR as
- * the destination register.
- */
- return aarch64_insn_gen_ldadd(AARCH64_INSN_REG_ZR, address,
- value, size);
-}
-
-static u32 aarch64_insn_encode_prfm_imm(enum aarch64_insn_prfm_type type,
- enum aarch64_insn_prfm_target target,
- enum aarch64_insn_prfm_policy policy,
- u32 insn)
-{
- u32 imm_type = 0, imm_target = 0, imm_policy = 0;
-
- switch (type) {
- case AARCH64_INSN_PRFM_TYPE_PLD:
- break;
- case AARCH64_INSN_PRFM_TYPE_PLI:
- imm_type = BIT(0);
- break;
- case AARCH64_INSN_PRFM_TYPE_PST:
- imm_type = BIT(1);
- break;
- default:
- pr_err("%s: unknown prfm type encoding %d\n", __func__, type);
- return AARCH64_BREAK_FAULT;
- }
-
- switch (target) {
- case AARCH64_INSN_PRFM_TARGET_L1:
- break;
- case AARCH64_INSN_PRFM_TARGET_L2:
- imm_target = BIT(0);
- break;
- case AARCH64_INSN_PRFM_TARGET_L3:
- imm_target = BIT(1);
- break;
- default:
- pr_err("%s: unknown prfm target encoding %d\n", __func__, target);
- return AARCH64_BREAK_FAULT;
- }
-
- switch (policy) {
- case AARCH64_INSN_PRFM_POLICY_KEEP:
- break;
- case AARCH64_INSN_PRFM_POLICY_STRM:
- imm_policy = BIT(0);
- break;
- default:
- pr_err("%s: unknown prfm policy encoding %d\n", __func__, policy);
- return AARCH64_BREAK_FAULT;
- }
-
- /* In this case, imm5 is encoded into Rt field. */
- insn &= ~GENMASK(4, 0);
- insn |= imm_policy | (imm_target << 1) | (imm_type << 3);
-
- return insn;
-}
-
-u32 aarch64_insn_gen_prefetch(enum aarch64_insn_register base,
- enum aarch64_insn_prfm_type type,
- enum aarch64_insn_prfm_target target,
- enum aarch64_insn_prfm_policy policy)
-{
- u32 insn = aarch64_insn_get_prfm_value();
-
- insn = aarch64_insn_encode_ldst_size(AARCH64_INSN_SIZE_64, insn);
-
- insn = aarch64_insn_encode_prfm_imm(type, target, policy, insn);
-
- insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn,
- base);
-
- return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_12, insn, 0);
-}
-
-u32 aarch64_insn_gen_add_sub_imm(enum aarch64_insn_register dst,
- enum aarch64_insn_register src,
- int imm, enum aarch64_insn_variant variant,
- enum aarch64_insn_adsb_type type)
-{
- u32 insn;
-
- switch (type) {
- case AARCH64_INSN_ADSB_ADD:
- insn = aarch64_insn_get_add_imm_value();
- break;
- case AARCH64_INSN_ADSB_SUB:
- insn = aarch64_insn_get_sub_imm_value();
- break;
- case AARCH64_INSN_ADSB_ADD_SETFLAGS:
- insn = aarch64_insn_get_adds_imm_value();
- break;
- case AARCH64_INSN_ADSB_SUB_SETFLAGS:
- insn = aarch64_insn_get_subs_imm_value();
- break;
- default:
- pr_err("%s: unknown add/sub encoding %d\n", __func__, type);
- return AARCH64_BREAK_FAULT;
- }
-
- switch (variant) {
- case AARCH64_INSN_VARIANT_32BIT:
- break;
- case AARCH64_INSN_VARIANT_64BIT:
- insn |= AARCH64_INSN_SF_BIT;
- break;
- default:
- pr_err("%s: unknown variant encoding %d\n", __func__, variant);
- return AARCH64_BREAK_FAULT;
- }
-
- /* We can't encode more than a 24bit value (12bit + 12bit shift) */
- if (imm & ~(BIT(24) - 1))
- goto out;
-
- /* If we have something in the top 12 bits... */
- if (imm & ~(SZ_4K - 1)) {
- /* ... and in the low 12 bits -> error */
- if (imm & (SZ_4K - 1))
- goto out;
-
- imm >>= 12;
- insn |= AARCH64_INSN_LSL_12;
- }
-
- insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RD, insn, dst);
-
- insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn, src);
-
- return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_12, insn, imm);
-
-out:
- pr_err("%s: invalid immediate encoding %d\n", __func__, imm);
- return AARCH64_BREAK_FAULT;
-}
-
-u32 aarch64_insn_gen_bitfield(enum aarch64_insn_register dst,
- enum aarch64_insn_register src,
- int immr, int imms,
- enum aarch64_insn_variant variant,
- enum aarch64_insn_bitfield_type type)
-{
- u32 insn;
- u32 mask;
-
- switch (type) {
- case AARCH64_INSN_BITFIELD_MOVE:
- insn = aarch64_insn_get_bfm_value();
- break;
- case AARCH64_INSN_BITFIELD_MOVE_UNSIGNED:
- insn = aarch64_insn_get_ubfm_value();
- break;
- case AARCH64_INSN_BITFIELD_MOVE_SIGNED:
- insn = aarch64_insn_get_sbfm_value();
- break;
- default:
- pr_err("%s: unknown bitfield encoding %d\n", __func__, type);
- return AARCH64_BREAK_FAULT;
- }
-
- switch (variant) {
- case AARCH64_INSN_VARIANT_32BIT:
- mask = GENMASK(4, 0);
- break;
- case AARCH64_INSN_VARIANT_64BIT:
- insn |= AARCH64_INSN_SF_BIT | AARCH64_INSN_N_BIT;
- mask = GENMASK(5, 0);
- break;
- default:
- pr_err("%s: unknown variant encoding %d\n", __func__, variant);
- return AARCH64_BREAK_FAULT;
- }
-
- if (immr & ~mask) {
- pr_err("%s: invalid immr encoding %d\n", __func__, immr);
- return AARCH64_BREAK_FAULT;
- }
- if (imms & ~mask) {
- pr_err("%s: invalid imms encoding %d\n", __func__, imms);
- return AARCH64_BREAK_FAULT;
- }
-
- insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RD, insn, dst);
-
- insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn, src);
-
- insn = aarch64_insn_encode_immediate(AARCH64_INSN_IMM_R, insn, immr);
-
- return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_S, insn, imms);
-}
-
-u32 aarch64_insn_gen_movewide(enum aarch64_insn_register dst,
- int imm, int shift,
- enum aarch64_insn_variant variant,
- enum aarch64_insn_movewide_type type)
-{
- u32 insn;
-
- switch (type) {
- case AARCH64_INSN_MOVEWIDE_ZERO:
- insn = aarch64_insn_get_movz_value();
- break;
- case AARCH64_INSN_MOVEWIDE_KEEP:
- insn = aarch64_insn_get_movk_value();
- break;
- case AARCH64_INSN_MOVEWIDE_INVERSE:
- insn = aarch64_insn_get_movn_value();
- break;
- default:
- pr_err("%s: unknown movewide encoding %d\n", __func__, type);
- return AARCH64_BREAK_FAULT;
- }
-
- if (imm & ~(SZ_64K - 1)) {
- pr_err("%s: invalid immediate encoding %d\n", __func__, imm);
- return AARCH64_BREAK_FAULT;
- }
-
- switch (variant) {
- case AARCH64_INSN_VARIANT_32BIT:
- if (shift != 0 && shift != 16) {
- pr_err("%s: invalid shift encoding %d\n", __func__,
- shift);
- return AARCH64_BREAK_FAULT;
- }
- break;
- case AARCH64_INSN_VARIANT_64BIT:
- insn |= AARCH64_INSN_SF_BIT;
- if (shift != 0 && shift != 16 && shift != 32 && shift != 48) {
- pr_err("%s: invalid shift encoding %d\n", __func__,
- shift);
- return AARCH64_BREAK_FAULT;
- }
- break;
- default:
- pr_err("%s: unknown variant encoding %d\n", __func__, variant);
- return AARCH64_BREAK_FAULT;
- }
-
- insn |= (shift >> 4) << 21;
-
- insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RD, insn, dst);
-
- return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_16, insn, imm);
-}
-
-u32 aarch64_insn_gen_add_sub_shifted_reg(enum aarch64_insn_register dst,
- enum aarch64_insn_register src,
- enum aarch64_insn_register reg,
- int shift,
- enum aarch64_insn_variant variant,
- enum aarch64_insn_adsb_type type)
-{
- u32 insn;
-
- switch (type) {
- case AARCH64_INSN_ADSB_ADD:
- insn = aarch64_insn_get_add_value();
- break;
- case AARCH64_INSN_ADSB_SUB:
- insn = aarch64_insn_get_sub_value();
- break;
- case AARCH64_INSN_ADSB_ADD_SETFLAGS:
- insn = aarch64_insn_get_adds_value();
- break;
- case AARCH64_INSN_ADSB_SUB_SETFLAGS:
- insn = aarch64_insn_get_subs_value();
- break;
- default:
- pr_err("%s: unknown add/sub encoding %d\n", __func__, type);
- return AARCH64_BREAK_FAULT;
- }
-
- switch (variant) {
- case AARCH64_INSN_VARIANT_32BIT:
- if (shift & ~(SZ_32 - 1)) {
- pr_err("%s: invalid shift encoding %d\n", __func__,
- shift);
- return AARCH64_BREAK_FAULT;
- }
- break;
- case AARCH64_INSN_VARIANT_64BIT:
- insn |= AARCH64_INSN_SF_BIT;
- if (shift & ~(SZ_64 - 1)) {
- pr_err("%s: invalid shift encoding %d\n", __func__,
- shift);
- return AARCH64_BREAK_FAULT;
- }
- break;
- default:
- pr_err("%s: unknown variant encoding %d\n", __func__, variant);
- return AARCH64_BREAK_FAULT;
- }
-
-
- insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RD, insn, dst);
-
- insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn, src);
-
- insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RM, insn, reg);
-
- return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_6, insn, shift);
-}
-
-u32 aarch64_insn_gen_data1(enum aarch64_insn_register dst,
- enum aarch64_insn_register src,
- enum aarch64_insn_variant variant,
- enum aarch64_insn_data1_type type)
-{
- u32 insn;
-
- switch (type) {
- case AARCH64_INSN_DATA1_REVERSE_16:
- insn = aarch64_insn_get_rev16_value();
- break;
- case AARCH64_INSN_DATA1_REVERSE_32:
- insn = aarch64_insn_get_rev32_value();
- break;
- case AARCH64_INSN_DATA1_REVERSE_64:
- if (variant != AARCH64_INSN_VARIANT_64BIT) {
- pr_err("%s: invalid variant for reverse64 %d\n",
- __func__, variant);
- return AARCH64_BREAK_FAULT;
- }
- insn = aarch64_insn_get_rev64_value();
- break;
- default:
- pr_err("%s: unknown data1 encoding %d\n", __func__, type);
- return AARCH64_BREAK_FAULT;
- }
-
- switch (variant) {
- case AARCH64_INSN_VARIANT_32BIT:
- break;
- case AARCH64_INSN_VARIANT_64BIT:
- insn |= AARCH64_INSN_SF_BIT;
- break;
- default:
- pr_err("%s: unknown variant encoding %d\n", __func__, variant);
- return AARCH64_BREAK_FAULT;
- }
-
- insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RD, insn, dst);
-
- return aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn, src);
-}
-
-u32 aarch64_insn_gen_data2(enum aarch64_insn_register dst,
- enum aarch64_insn_register src,
- enum aarch64_insn_register reg,
- enum aarch64_insn_variant variant,
- enum aarch64_insn_data2_type type)
-{
- u32 insn;
-
- switch (type) {
- case AARCH64_INSN_DATA2_UDIV:
- insn = aarch64_insn_get_udiv_value();
- break;
- case AARCH64_INSN_DATA2_SDIV:
- insn = aarch64_insn_get_sdiv_value();
- break;
- case AARCH64_INSN_DATA2_LSLV:
- insn = aarch64_insn_get_lslv_value();
- break;
- case AARCH64_INSN_DATA2_LSRV:
- insn = aarch64_insn_get_lsrv_value();
- break;
- case AARCH64_INSN_DATA2_ASRV:
- insn = aarch64_insn_get_asrv_value();
- break;
- case AARCH64_INSN_DATA2_RORV:
- insn = aarch64_insn_get_rorv_value();
- break;
- default:
- pr_err("%s: unknown data2 encoding %d\n", __func__, type);
- return AARCH64_BREAK_FAULT;
- }
-
- switch (variant) {
- case AARCH64_INSN_VARIANT_32BIT:
- break;
- case AARCH64_INSN_VARIANT_64BIT:
- insn |= AARCH64_INSN_SF_BIT;
- break;
- default:
- pr_err("%s: unknown variant encoding %d\n", __func__, variant);
- return AARCH64_BREAK_FAULT;
- }
-
- insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RD, insn, dst);
-
- insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn, src);
-
- return aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RM, insn, reg);
-}
-
-u32 aarch64_insn_gen_data3(enum aarch64_insn_register dst,
- enum aarch64_insn_register src,
- enum aarch64_insn_register reg1,
- enum aarch64_insn_register reg2,
- enum aarch64_insn_variant variant,
- enum aarch64_insn_data3_type type)
-{
- u32 insn;
-
- switch (type) {
- case AARCH64_INSN_DATA3_MADD:
- insn = aarch64_insn_get_madd_value();
- break;
- case AARCH64_INSN_DATA3_MSUB:
- insn = aarch64_insn_get_msub_value();
- break;
- default:
- pr_err("%s: unknown data3 encoding %d\n", __func__, type);
- return AARCH64_BREAK_FAULT;
- }
-
- switch (variant) {
- case AARCH64_INSN_VARIANT_32BIT:
- break;
- case AARCH64_INSN_VARIANT_64BIT:
- insn |= AARCH64_INSN_SF_BIT;
- break;
- default:
- pr_err("%s: unknown variant encoding %d\n", __func__, variant);
- return AARCH64_BREAK_FAULT;
- }
-
- insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RD, insn, dst);
-
- insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RA, insn, src);
-
- insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn,
- reg1);
-
- return aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RM, insn,
- reg2);
-}
-
-u32 aarch64_insn_gen_logical_shifted_reg(enum aarch64_insn_register dst,
- enum aarch64_insn_register src,
- enum aarch64_insn_register reg,
- int shift,
- enum aarch64_insn_variant variant,
- enum aarch64_insn_logic_type type)
-{
- u32 insn;
-
- switch (type) {
- case AARCH64_INSN_LOGIC_AND:
- insn = aarch64_insn_get_and_value();
- break;
- case AARCH64_INSN_LOGIC_BIC:
- insn = aarch64_insn_get_bic_value();
- break;
- case AARCH64_INSN_LOGIC_ORR:
- insn = aarch64_insn_get_orr_value();
- break;
- case AARCH64_INSN_LOGIC_ORN:
- insn = aarch64_insn_get_orn_value();
- break;
- case AARCH64_INSN_LOGIC_EOR:
- insn = aarch64_insn_get_eor_value();
- break;
- case AARCH64_INSN_LOGIC_EON:
- insn = aarch64_insn_get_eon_value();
- break;
- case AARCH64_INSN_LOGIC_AND_SETFLAGS:
- insn = aarch64_insn_get_ands_value();
- break;
- case AARCH64_INSN_LOGIC_BIC_SETFLAGS:
- insn = aarch64_insn_get_bics_value();
- break;
- default:
- pr_err("%s: unknown logical encoding %d\n", __func__, type);
- return AARCH64_BREAK_FAULT;
- }
-
- switch (variant) {
- case AARCH64_INSN_VARIANT_32BIT:
- if (shift & ~(SZ_32 - 1)) {
- pr_err("%s: invalid shift encoding %d\n", __func__,
- shift);
- return AARCH64_BREAK_FAULT;
- }
- break;
- case AARCH64_INSN_VARIANT_64BIT:
- insn |= AARCH64_INSN_SF_BIT;
- if (shift & ~(SZ_64 - 1)) {
- pr_err("%s: invalid shift encoding %d\n", __func__,
- shift);
- return AARCH64_BREAK_FAULT;
- }
- break;
- default:
- pr_err("%s: unknown variant encoding %d\n", __func__, variant);
- return AARCH64_BREAK_FAULT;
- }
-
-
- insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RD, insn, dst);
-
- insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn, src);
-
- insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RM, insn, reg);
-
- return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_6, insn, shift);
-}
-
-/*
- * MOV (register) is architecturally an alias of ORR (shifted register) where
- * MOV <*d>, <*m> is equivalent to ORR <*d>, <*ZR>, <*m>
- */
-u32 aarch64_insn_gen_move_reg(enum aarch64_insn_register dst,
- enum aarch64_insn_register src,
- enum aarch64_insn_variant variant)
-{
- return aarch64_insn_gen_logical_shifted_reg(dst, AARCH64_INSN_REG_ZR,
- src, 0, variant,
- AARCH64_INSN_LOGIC_ORR);
-}
-
-u32 aarch64_insn_gen_adr(unsigned long pc, unsigned long addr,
- enum aarch64_insn_register reg,
- enum aarch64_insn_adr_type type)
-{
- u32 insn;
- s32 offset;
-
- switch (type) {
- case AARCH64_INSN_ADR_TYPE_ADR:
- insn = aarch64_insn_get_adr_value();
- offset = addr - pc;
- break;
- case AARCH64_INSN_ADR_TYPE_ADRP:
- insn = aarch64_insn_get_adrp_value();
- offset = (addr - ALIGN_DOWN(pc, SZ_4K)) >> 12;
- break;
- default:
- pr_err("%s: unknown adr encoding %d\n", __func__, type);
- return AARCH64_BREAK_FAULT;
- }
-
- if (offset < -SZ_1M || offset >= SZ_1M)
- return AARCH64_BREAK_FAULT;
-
- insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RD, insn, reg);
-
- return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_ADR, insn, offset);
-}
-
-/*
- * Decode the imm field of a branch, and return the byte offset as a
- * signed value (so it can be used when computing a new branch
- * target).
- */
-s32 aarch64_get_branch_offset(u32 insn)
-{
- s32 imm;
-
- if (aarch64_insn_is_b(insn) || aarch64_insn_is_bl(insn)) {
- imm = aarch64_insn_decode_immediate(AARCH64_INSN_IMM_26, insn);
- return (imm << 6) >> 4;
- }
-
- if (aarch64_insn_is_cbz(insn) || aarch64_insn_is_cbnz(insn) ||
- aarch64_insn_is_bcond(insn)) {
- imm = aarch64_insn_decode_immediate(AARCH64_INSN_IMM_19, insn);
- return (imm << 13) >> 11;
- }
-
- if (aarch64_insn_is_tbz(insn) || aarch64_insn_is_tbnz(insn)) {
- imm = aarch64_insn_decode_immediate(AARCH64_INSN_IMM_14, insn);
- return (imm << 18) >> 16;
- }
-
- /* Unhandled instruction */
- BUG();
-}
-
-/*
- * Encode the displacement of a branch in the imm field and return the
- * updated instruction.
- */
-u32 aarch64_set_branch_offset(u32 insn, s32 offset)
-{
- if (aarch64_insn_is_b(insn) || aarch64_insn_is_bl(insn))
- return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_26, insn,
- offset >> 2);
-
- if (aarch64_insn_is_cbz(insn) || aarch64_insn_is_cbnz(insn) ||
- aarch64_insn_is_bcond(insn))
- return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_19, insn,
- offset >> 2);
-
- if (aarch64_insn_is_tbz(insn) || aarch64_insn_is_tbnz(insn))
- return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_14, insn,
- offset >> 2);
-
- /* Unhandled instruction */
- BUG();
-}
-
-s32 aarch64_insn_adrp_get_offset(u32 insn)
-{
- BUG_ON(!aarch64_insn_is_adrp(insn));
- return aarch64_insn_decode_immediate(AARCH64_INSN_IMM_ADR, insn) << 12;
-}
-
-u32 aarch64_insn_adrp_set_offset(u32 insn, s32 offset)
-{
- BUG_ON(!aarch64_insn_is_adrp(insn));
- return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_ADR, insn,
- offset >> 12);
-}
-
-/*
- * Extract the Op/CR data from a msr/mrs instruction.
- */
-u32 aarch64_insn_extract_system_reg(u32 insn)
-{
- return (insn & 0x1FFFE0) >> 5;
-}
-
-bool aarch32_insn_is_wide(u32 insn)
-{
- return insn >= 0xe800;
-}
-
-/*
- * Macros/defines for extracting register numbers from instruction.
- */
-u32 aarch32_insn_extract_reg_num(u32 insn, int offset)
-{
- return (insn & (0xf << offset)) >> offset;
-}
-
-#define OPC2_MASK 0x7
-#define OPC2_OFFSET 5
-u32 aarch32_insn_mcr_extract_opc2(u32 insn)
-{
- return (insn & (OPC2_MASK << OPC2_OFFSET)) >> OPC2_OFFSET;
-}
-
-#define CRM_MASK 0xf
-u32 aarch32_insn_mcr_extract_crm(u32 insn)
-{
- return insn & CRM_MASK;
-}
-
-static bool __kprobes __check_eq(unsigned long pstate)
-{
- return (pstate & PSR_Z_BIT) != 0;
-}
-
-static bool __kprobes __check_ne(unsigned long pstate)
-{
- return (pstate & PSR_Z_BIT) == 0;
-}
-
-static bool __kprobes __check_cs(unsigned long pstate)
-{
- return (pstate & PSR_C_BIT) != 0;
-}
-
-static bool __kprobes __check_cc(unsigned long pstate)
-{
- return (pstate & PSR_C_BIT) == 0;
-}
-
-static bool __kprobes __check_mi(unsigned long pstate)
-{
- return (pstate & PSR_N_BIT) != 0;
-}
-
-static bool __kprobes __check_pl(unsigned long pstate)
-{
- return (pstate & PSR_N_BIT) == 0;
-}
-
-static bool __kprobes __check_vs(unsigned long pstate)
-{
- return (pstate & PSR_V_BIT) != 0;
-}
-
-static bool __kprobes __check_vc(unsigned long pstate)
-{
- return (pstate & PSR_V_BIT) == 0;
-}
-
-static bool __kprobes __check_hi(unsigned long pstate)
-{
- pstate &= ~(pstate >> 1); /* PSR_C_BIT &= ~PSR_Z_BIT */
- return (pstate & PSR_C_BIT) != 0;
-}
-
-static bool __kprobes __check_ls(unsigned long pstate)
-{
- pstate &= ~(pstate >> 1); /* PSR_C_BIT &= ~PSR_Z_BIT */
- return (pstate & PSR_C_BIT) == 0;
-}
-
-static bool __kprobes __check_ge(unsigned long pstate)
-{
- pstate ^= (pstate << 3); /* PSR_N_BIT ^= PSR_V_BIT */
- return (pstate & PSR_N_BIT) == 0;
-}
-
-static bool __kprobes __check_lt(unsigned long pstate)
-{
- pstate ^= (pstate << 3); /* PSR_N_BIT ^= PSR_V_BIT */
- return (pstate & PSR_N_BIT) != 0;
-}
-
-static bool __kprobes __check_gt(unsigned long pstate)
-{
- /*PSR_N_BIT ^= PSR_V_BIT */
- unsigned long temp = pstate ^ (pstate << 3);
-
- temp |= (pstate << 1); /*PSR_N_BIT |= PSR_Z_BIT */
- return (temp & PSR_N_BIT) == 0;
-}
-
-static bool __kprobes __check_le(unsigned long pstate)
-{
- /*PSR_N_BIT ^= PSR_V_BIT */
- unsigned long temp = pstate ^ (pstate << 3);
-
- temp |= (pstate << 1); /*PSR_N_BIT |= PSR_Z_BIT */
- return (temp & PSR_N_BIT) != 0;
-}
-
-static bool __kprobes __check_al(unsigned long pstate)
-{
- return true;
-}
-
-/*
- * Note that the ARMv8 ARM calls condition code 0b1111 "nv", but states that
- * it behaves identically to 0b1110 ("al").
- */
-pstate_check_t * const aarch32_opcode_cond_checks[16] = {
- __check_eq, __check_ne, __check_cs, __check_cc,
- __check_mi, __check_pl, __check_vs, __check_vc,
- __check_hi, __check_ls, __check_ge, __check_lt,
- __check_gt, __check_le, __check_al, __check_al
-};
-
-static bool range_of_ones(u64 val)
-{
- /* Doesn't handle full ones or full zeroes */
- u64 sval = val >> __ffs64(val);
-
- /* One of Sean Eron Anderson's bithack tricks */
- return ((sval + 1) & (sval)) == 0;
-}
-
-static u32 aarch64_encode_immediate(u64 imm,
- enum aarch64_insn_variant variant,
- u32 insn)
-{
- unsigned int immr, imms, n, ones, ror, esz, tmp;
- u64 mask = ~0UL;
-
- /* Can't encode full zeroes or full ones */
- if (!imm || !~imm)
- return AARCH64_BREAK_FAULT;
-
- switch (variant) {
- case AARCH64_INSN_VARIANT_32BIT:
- if (upper_32_bits(imm))
- return AARCH64_BREAK_FAULT;
- esz = 32;
- break;
- case AARCH64_INSN_VARIANT_64BIT:
- insn |= AARCH64_INSN_SF_BIT;
- esz = 64;
- break;
- default:
- pr_err("%s: unknown variant encoding %d\n", __func__, variant);
- return AARCH64_BREAK_FAULT;
- }
-
- /*
- * Inverse of Replicate(). Try to spot a repeating pattern
- * with a pow2 stride.
- */
- for (tmp = esz / 2; tmp >= 2; tmp /= 2) {
- u64 emask = BIT(tmp) - 1;
-
- if ((imm & emask) != ((imm >> tmp) & emask))
- break;
-
- esz = tmp;
- mask = emask;
- }
-
- /* N is only set if we're encoding a 64bit value */
- n = esz == 64;
-
- /* Trim imm to the element size */
- imm &= mask;
-
- /* That's how many ones we need to encode */
- ones = hweight64(imm);
-
- /*
- * imms is set to (ones - 1), prefixed with a string of ones
- * and a zero if they fit. Cap it to 6 bits.
- */
- imms = ones - 1;
- imms |= 0xf << ffs(esz);
- imms &= BIT(6) - 1;
-
- /* Compute the rotation */
- if (range_of_ones(imm)) {
- /*
- * Pattern: 0..01..10..0
- *
- * Compute how many rotate we need to align it right
- */
- ror = __ffs64(imm);
- } else {
- /*
- * Pattern: 0..01..10..01..1
- *
- * Fill the unused top bits with ones, and check if
- * the result is a valid immediate (all ones with a
- * contiguous ranges of zeroes).
- */
- imm |= ~mask;
- if (!range_of_ones(~imm))
- return AARCH64_BREAK_FAULT;
-
- /*
- * Compute the rotation to get a continuous set of
- * ones, with the first bit set at position 0
- */
- ror = fls(~imm);
- }
-
- /*
- * immr is the number of bits we need to rotate back to the
- * original set of ones. Note that this is relative to the
- * element size...
- */
- immr = (esz - ror) % esz;
-
- insn = aarch64_insn_encode_immediate(AARCH64_INSN_IMM_N, insn, n);
- insn = aarch64_insn_encode_immediate(AARCH64_INSN_IMM_R, insn, immr);
- return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_S, insn, imms);
-}
-
-u32 aarch64_insn_gen_logical_immediate(enum aarch64_insn_logic_type type,
- enum aarch64_insn_variant variant,
- enum aarch64_insn_register Rn,
- enum aarch64_insn_register Rd,
- u64 imm)
-{
- u32 insn;
-
- switch (type) {
- case AARCH64_INSN_LOGIC_AND:
- insn = aarch64_insn_get_and_imm_value();
- break;
- case AARCH64_INSN_LOGIC_ORR:
- insn = aarch64_insn_get_orr_imm_value();
- break;
- case AARCH64_INSN_LOGIC_EOR:
- insn = aarch64_insn_get_eor_imm_value();
- break;
- case AARCH64_INSN_LOGIC_AND_SETFLAGS:
- insn = aarch64_insn_get_ands_imm_value();
- break;
- default:
- pr_err("%s: unknown logical encoding %d\n", __func__, type);
- return AARCH64_BREAK_FAULT;
- }
-
- insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RD, insn, Rd);
- insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn, Rn);
- return aarch64_encode_immediate(imm, variant, insn);
-}
-
-u32 aarch64_insn_gen_extr(enum aarch64_insn_variant variant,
- enum aarch64_insn_register Rm,
- enum aarch64_insn_register Rn,
- enum aarch64_insn_register Rd,
- u8 lsb)
-{
- u32 insn;
-
- insn = aarch64_insn_get_extr_value();
-
- switch (variant) {
- case AARCH64_INSN_VARIANT_32BIT:
- if (lsb > 31)
- return AARCH64_BREAK_FAULT;
- break;
- case AARCH64_INSN_VARIANT_64BIT:
- if (lsb > 63)
- return AARCH64_BREAK_FAULT;
- insn |= AARCH64_INSN_SF_BIT;
- insn = aarch64_insn_encode_immediate(AARCH64_INSN_IMM_N, insn, 1);
- break;
- default:
- pr_err("%s: unknown variant encoding %d\n", __func__, variant);
- return AARCH64_BREAK_FAULT;
- }
-
- insn = aarch64_insn_encode_immediate(AARCH64_INSN_IMM_S, insn, lsb);
- insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RD, insn, Rd);
- insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn, Rn);
- return aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RM, insn, Rm);
-}
diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c
index 04a327ccf84d..85087e2df564 100644
--- a/arch/arm64/kernel/irq.c
+++ b/arch/arm64/kernel/irq.c
@@ -10,40 +10,55 @@
* Copyright (C) 2012 ARM Ltd.
*/
-#include <linux/kernel_stat.h>
-#include <linux/irq.h>
-#include <linux/memory.h>
-#include <linux/smp.h>
+#include <linux/hardirq.h>
#include <linux/init.h>
+#include <linux/irq.h>
#include <linux/irqchip.h>
#include <linux/kprobes.h>
+#include <linux/memory.h>
+#include <linux/scs.h>
#include <linux/seq_file.h>
+#include <linux/smp.h>
#include <linux/vmalloc.h>
#include <asm/daifflags.h>
+#include <asm/exception.h>
+#include <asm/numa.h>
+#include <asm/softirq_stack.h>
+#include <asm/stacktrace.h>
#include <asm/vmap_stack.h>
-unsigned long irq_err_count;
-
/* Only access this in an NMI enter/exit */
DEFINE_PER_CPU(struct nmi_ctx, nmi_contexts);
DEFINE_PER_CPU(unsigned long *, irq_stack_ptr);
-int arch_show_interrupts(struct seq_file *p, int prec)
+
+DECLARE_PER_CPU(unsigned long *, irq_shadow_call_stack_ptr);
+
+#ifdef CONFIG_SHADOW_CALL_STACK
+DEFINE_PER_CPU(unsigned long *, irq_shadow_call_stack_ptr);
+#endif
+
+static void init_irq_scs(void)
{
- show_ipi_list(p, prec);
- seq_printf(p, "%*s: %10lu\n", prec, "Err", irq_err_count);
- return 0;
+ int cpu;
+
+ if (!scs_is_enabled())
+ return;
+
+ for_each_possible_cpu(cpu)
+ per_cpu(irq_shadow_call_stack_ptr, cpu) =
+ scs_alloc(early_cpu_to_node(cpu));
}
#ifdef CONFIG_VMAP_STACK
-static void init_irq_stacks(void)
+static void __init init_irq_stacks(void)
{
int cpu;
unsigned long *p;
for_each_possible_cpu(cpu) {
- p = arch_alloc_vmap_stack(IRQ_STACK_SIZE, cpu_to_node(cpu));
+ p = arch_alloc_vmap_stack(IRQ_STACK_SIZE, early_cpu_to_node(cpu));
per_cpu(irq_stack_ptr, cpu) = p;
}
}
@@ -60,12 +75,56 @@ static void init_irq_stacks(void)
}
#endif
+#ifndef CONFIG_PREEMPT_RT
+static void ____do_softirq(struct pt_regs *regs)
+{
+ __do_softirq();
+}
+
+void do_softirq_own_stack(void)
+{
+ call_on_irq_stack(NULL, ____do_softirq);
+}
+#endif
+
+static void default_handle_irq(struct pt_regs *regs)
+{
+ panic("IRQ taken without a root IRQ handler\n");
+}
+
+static void default_handle_fiq(struct pt_regs *regs)
+{
+ panic("FIQ taken without a root FIQ handler\n");
+}
+
+void (*handle_arch_irq)(struct pt_regs *) __ro_after_init = default_handle_irq;
+void (*handle_arch_fiq)(struct pt_regs *) __ro_after_init = default_handle_fiq;
+
+int __init set_handle_irq(void (*handle_irq)(struct pt_regs *))
+{
+ if (handle_arch_irq != default_handle_irq)
+ return -EBUSY;
+
+ handle_arch_irq = handle_irq;
+ pr_info("Root IRQ handler: %ps\n", handle_irq);
+ return 0;
+}
+
+int __init set_handle_fiq(void (*handle_fiq)(struct pt_regs *))
+{
+ if (handle_arch_fiq != default_handle_fiq)
+ return -EBUSY;
+
+ handle_arch_fiq = handle_fiq;
+ pr_info("Root FIQ handler: %ps\n", handle_fiq);
+ return 0;
+}
+
void __init init_IRQ(void)
{
init_irq_stacks();
+ init_irq_scs();
irqchip_init();
- if (!handle_arch_irq)
- panic("No interrupt controller found.");
if (system_uses_irq_prio_masking()) {
/*
@@ -76,18 +135,3 @@ void __init init_IRQ(void)
local_daif_restore(DAIF_PROCCTX_NOIRQ);
}
}
-
-/*
- * Stubs to make nmi_enter/exit() code callable from ASM
- */
-asmlinkage void notrace asm_nmi_enter(void)
-{
- nmi_enter();
-}
-NOKPROBE_SYMBOL(asm_nmi_enter);
-
-asmlinkage void notrace asm_nmi_exit(void)
-{
- nmi_exit();
-}
-NOKPROBE_SYMBOL(asm_nmi_exit);
diff --git a/arch/arm64/kernel/jump_label.c b/arch/arm64/kernel/jump_label.c
index 9a8a0ae1e75f..faf88ec9c48e 100644
--- a/arch/arm64/kernel/jump_label.c
+++ b/arch/arm64/kernel/jump_label.c
@@ -8,6 +8,7 @@
#include <linux/kernel.h>
#include <linux/jump_label.h>
#include <asm/insn.h>
+#include <asm/patching.h>
void arch_jump_label_transform(struct jump_entry *entry,
enum jump_label_type type)
@@ -25,14 +26,3 @@ void arch_jump_label_transform(struct jump_entry *entry,
aarch64_insn_patch_text_nosync(addr, insn);
}
-
-void arch_jump_label_transform_static(struct jump_entry *entry,
- enum jump_label_type type)
-{
- /*
- * We use the architected A64 NOP in arch_static_branch, so there's no
- * need to patch an identical A64 NOP over the top of it here. The core
- * will call arch_jump_label_transform from a module notifier if the
- * NOP needs to be replaced by a branch.
- */
-}
diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c
index 2a11a962e571..12c7f3c8ba76 100644
--- a/arch/arm64/kernel/kaslr.c
+++ b/arch/arm64/kernel/kaslr.c
@@ -4,205 +4,42 @@
*/
#include <linux/cache.h>
-#include <linux/crc32.h>
#include <linux/init.h>
-#include <linux/libfdt.h>
-#include <linux/mm_types.h>
-#include <linux/sched.h>
-#include <linux/types.h>
+#include <linux/printk.h>
-#include <asm/cacheflush.h>
-#include <asm/fixmap.h>
-#include <asm/kernel-pgtable.h>
+#include <asm/cpufeature.h>
#include <asm/memory.h>
-#include <asm/mmu.h>
-#include <asm/pgtable.h>
-#include <asm/sections.h>
-enum kaslr_status {
- KASLR_ENABLED,
- KASLR_DISABLED_CMDLINE,
- KASLR_DISABLED_NO_SEED,
- KASLR_DISABLED_FDT_REMAP,
-};
-
-static enum kaslr_status __initdata kaslr_status;
-u64 __ro_after_init module_alloc_base;
u16 __initdata memstart_offset_seed;
-static __init u64 get_kaslr_seed(void *fdt)
-{
- int node, len;
- fdt64_t *prop;
- u64 ret;
-
- node = fdt_path_offset(fdt, "/chosen");
- if (node < 0)
- return 0;
-
- prop = fdt_getprop_w(fdt, node, "kaslr-seed", &len);
- if (!prop || len != sizeof(u64))
- return 0;
-
- ret = fdt64_to_cpu(*prop);
- *prop = 0;
- return ret;
-}
+bool __ro_after_init __kaslr_is_enabled = false;
-static __init const u8 *kaslr_get_cmdline(void *fdt)
+void __init kaslr_init(void)
{
- static __initconst const u8 default_cmdline[] = CONFIG_CMDLINE;
-
- if (!IS_ENABLED(CONFIG_CMDLINE_FORCE)) {
- int node;
- const u8 *prop;
-
- node = fdt_path_offset(fdt, "/chosen");
- if (node < 0)
- goto out;
-
- prop = fdt_getprop(fdt, node, "bootargs", NULL);
- if (!prop)
- goto out;
- return prop;
- }
-out:
- return default_cmdline;
-}
-
-/*
- * This routine will be executed with the kernel mapped at its default virtual
- * address, and if it returns successfully, the kernel will be remapped, and
- * start_kernel() will be executed from a randomized virtual offset. The
- * relocation will result in all absolute references (e.g., static variables
- * containing function pointers) to be reinitialized, and zero-initialized
- * .bss variables will be reset to 0.
- */
-u64 __init kaslr_early_init(u64 dt_phys)
-{
- void *fdt;
- u64 seed, offset, mask, module_range;
- const u8 *cmdline, *str;
- int size;
-
- /*
- * Set a reasonable default for module_alloc_base in case
- * we end up running with module randomization disabled.
- */
- module_alloc_base = (u64)_etext - MODULES_VSIZE;
- __flush_dcache_area(&module_alloc_base, sizeof(module_alloc_base));
-
- /*
- * Try to map the FDT early. If this fails, we simply bail,
- * and proceed with KASLR disabled. We will make another
- * attempt at mapping the FDT in setup_machine()
- */
- early_fixmap_init();
- fdt = fixmap_remap_fdt(dt_phys, &size, PAGE_KERNEL);
- if (!fdt) {
- kaslr_status = KASLR_DISABLED_FDT_REMAP;
- return 0;
- }
-
- /*
- * Retrieve (and wipe) the seed from the FDT
- */
- seed = get_kaslr_seed(fdt);
-
- /*
- * Check if 'nokaslr' appears on the command line, and
- * return 0 if that is the case.
- */
- cmdline = kaslr_get_cmdline(fdt);
- str = strstr(cmdline, "nokaslr");
- if (str == cmdline || (str > cmdline && *(str - 1) == ' ')) {
- kaslr_status = KASLR_DISABLED_CMDLINE;
- return 0;
- }
-
- if (!seed) {
- kaslr_status = KASLR_DISABLED_NO_SEED;
- return 0;
+ if (cpuid_feature_extract_unsigned_field(arm64_sw_feature_override.val &
+ arm64_sw_feature_override.mask,
+ ARM64_SW_FEATURE_OVERRIDE_NOKASLR)) {
+ pr_info("KASLR disabled on command line\n");
+ return;
}
/*
- * OK, so we are proceeding with KASLR enabled. Calculate a suitable
- * kernel image offset from the seed. Let's place the kernel in the
- * middle half of the VMALLOC area (VA_BITS_MIN - 2), and stay clear of
- * the lower and upper quarters to avoid colliding with other
- * allocations.
- * Even if we could randomize at page granularity for 16k and 64k pages,
- * let's always round to 2 MB so we don't interfere with the ability to
- * map using contiguous PTEs
+ * The KASLR offset modulo MIN_KIMG_ALIGN is taken from the physical
+ * placement of the image rather than from the seed, so a displacement
+ * of less than MIN_KIMG_ALIGN means that no seed was provided.
*/
- mask = ((1UL << (VA_BITS_MIN - 2)) - 1) & ~(SZ_2M - 1);
- offset = BIT(VA_BITS_MIN - 3) + (seed & mask);
-
- /* use the top 16 bits to randomize the linear region */
- memstart_offset_seed = seed >> 48;
-
- if (IS_ENABLED(CONFIG_KASAN))
- /*
- * KASAN does not expect the module region to intersect the
- * vmalloc region, since shadow memory is allocated for each
- * module at load time, whereas the vmalloc region is shadowed
- * by KASAN zero pages. So keep modules out of the vmalloc
- * region if KASAN is enabled, and put the kernel well within
- * 4 GB of the module region.
- */
- return offset % SZ_2G;
-
- if (IS_ENABLED(CONFIG_RANDOMIZE_MODULE_REGION_FULL)) {
- /*
- * Randomize the module region over a 2 GB window covering the
- * kernel. This reduces the risk of modules leaking information
- * about the address of the kernel itself, but results in
- * branches between modules and the core kernel that are
- * resolved via PLTs. (Branches between modules will be
- * resolved normally.)
- */
- module_range = SZ_2G - (u64)(_end - _stext);
- module_alloc_base = max((u64)_end + offset - SZ_2G,
- (u64)MODULES_VADDR);
- } else {
- /*
- * Randomize the module region by setting module_alloc_base to
- * a PAGE_SIZE multiple in the range [_etext - MODULES_VSIZE,
- * _stext) . This guarantees that the resulting region still
- * covers [_stext, _etext], and that all relative branches can
- * be resolved without veneers.
- */
- module_range = MODULES_VSIZE - (u64)(_etext - _stext);
- module_alloc_base = (u64)_etext + offset - MODULES_VSIZE;
+ if (kaslr_offset() < MIN_KIMG_ALIGN) {
+ pr_warn("KASLR disabled due to lack of seed\n");
+ return;
}
- /* use the lower 21 bits to randomize the base of the module region */
- module_alloc_base += (module_range * (seed & ((1 << 21) - 1))) >> 21;
- module_alloc_base &= PAGE_MASK;
-
- __flush_dcache_area(&module_alloc_base, sizeof(module_alloc_base));
- __flush_dcache_area(&memstart_offset_seed, sizeof(memstart_offset_seed));
-
- return offset;
+ pr_info("KASLR enabled\n");
+ __kaslr_is_enabled = true;
}
-static int __init kaslr_init(void)
+static int __init parse_nokaslr(char *unused)
{
- switch (kaslr_status) {
- case KASLR_ENABLED:
- pr_info("KASLR enabled\n");
- break;
- case KASLR_DISABLED_CMDLINE:
- pr_info("KASLR disabled on command line\n");
- break;
- case KASLR_DISABLED_NO_SEED:
- pr_warn("KASLR disabled due to lack of seed\n");
- break;
- case KASLR_DISABLED_FDT_REMAP:
- pr_warn("KASLR disabled due to FDT remapping failure\n");
- break;
- }
-
+ /* nokaslr param handling is done by early cpufeature code */
return 0;
}
-core_initcall(kaslr_init)
+early_param("nokaslr", parse_nokaslr);
diff --git a/arch/arm64/kernel/kexec_image.c b/arch/arm64/kernel/kexec_image.c
index 29a9428486a5..532d72ea42ee 100644
--- a/arch/arm64/kernel/kexec_image.c
+++ b/arch/arm64/kernel/kexec_image.c
@@ -14,7 +14,6 @@
#include <linux/kexec.h>
#include <linux/pe.h>
#include <linux/string.h>
-#include <linux/verification.h>
#include <asm/byteorder.h>
#include <asm/cpufeature.h>
#include <asm/image.h>
@@ -43,17 +42,13 @@ static void *image_load(struct kimage *image,
u64 flags, value;
bool be_image, be_kernel;
struct kexec_buf kbuf;
- unsigned long text_offset;
+ unsigned long text_offset, kernel_segment_number;
struct kexec_segment *kernel_segment;
int ret;
- /* We don't support crash kernels yet. */
- if (image->type == KEXEC_TYPE_CRASH)
- return ERR_PTR(-EOPNOTSUPP);
-
/*
* We require a kernel with an unambiguous Image header. Per
- * Documentation/arm64/booting.rst, this is the case when image_size
+ * Documentation/arch/arm64/booting.rst, this is the case when image_size
* is non-zero (practically speaking, since v3.17).
*/
h = (struct arm64_image_header *)kernel;
@@ -92,39 +87,52 @@ static void *image_load(struct kimage *image,
/* Adjust kernel segment with TEXT_OFFSET */
kbuf.memsz += text_offset;
- ret = kexec_add_buffer(&kbuf);
- if (ret)
+ kernel_segment_number = image->nr_segments;
+
+ /*
+ * The location of the kernel segment may make it impossible to satisfy
+ * the other segment requirements, so we try repeatedly to find a
+ * location that will work.
+ */
+ while ((ret = kexec_add_buffer(&kbuf)) == 0) {
+ /* Try to load additional data */
+ kernel_segment = &image->segment[kernel_segment_number];
+ ret = load_other_segments(image, kernel_segment->mem,
+ kernel_segment->memsz, initrd,
+ initrd_len, cmdline);
+ if (!ret)
+ break;
+
+ /*
+ * We couldn't find space for the other segments; erase the
+ * kernel segment and try the next available hole.
+ */
+ image->nr_segments -= 1;
+ kbuf.buf_min = kernel_segment->mem + kernel_segment->memsz;
+ kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
+ }
+
+ if (ret) {
+ pr_err("Could not find any suitable kernel location!");
return ERR_PTR(ret);
+ }
- kernel_segment = &image->segment[image->nr_segments - 1];
+ kernel_segment = &image->segment[kernel_segment_number];
kernel_segment->mem += text_offset;
kernel_segment->memsz -= text_offset;
image->start = kernel_segment->mem;
- pr_debug("Loaded kernel at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
- kernel_segment->mem, kbuf.bufsz,
- kernel_segment->memsz);
-
- /* Load additional data */
- ret = load_other_segments(image,
- kernel_segment->mem, kernel_segment->memsz,
- initrd, initrd_len, cmdline);
+ kexec_dprintk("Loaded kernel at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
+ kernel_segment->mem, kbuf.bufsz,
+ kernel_segment->memsz);
- return ERR_PTR(ret);
+ return NULL;
}
-#ifdef CONFIG_KEXEC_IMAGE_VERIFY_SIG
-static int image_verify_sig(const char *kernel, unsigned long kernel_len)
-{
- return verify_pefile_signature(kernel, kernel_len, NULL,
- VERIFYING_KEXEC_PE_SIGNATURE);
-}
-#endif
-
const struct kexec_file_ops kexec_image_ops = {
.probe = image_probe,
.load = image_load,
#ifdef CONFIG_KEXEC_IMAGE_VERIFY_SIG
- .verify_sig = image_verify_sig,
+ .verify_sig = kexec_kernel_verify_pe_sig,
#endif
};
diff --git a/arch/arm64/kernel/kgdb.c b/arch/arm64/kernel/kgdb.c
index 43119922341f..4e1f983df3d1 100644
--- a/arch/arm64/kernel/kgdb.c
+++ b/arch/arm64/kernel/kgdb.c
@@ -17,6 +17,7 @@
#include <asm/debug-monitors.h>
#include <asm/insn.h>
+#include <asm/patching.h>
#include <asm/traps.h>
struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = {
@@ -223,6 +224,8 @@ int kgdb_arch_handle_exception(int exception_vector, int signo,
*/
if (!kernel_active_single_step())
kernel_enable_single_step(linux_regs);
+ else
+ kernel_rewind_single_step(linux_regs);
err = 0;
break;
default:
@@ -231,14 +234,14 @@ int kgdb_arch_handle_exception(int exception_vector, int signo,
return err;
}
-static int kgdb_brk_fn(struct pt_regs *regs, unsigned int esr)
+static int kgdb_brk_fn(struct pt_regs *regs, unsigned long esr)
{
kgdb_handle_exception(1, SIGTRAP, 0, regs);
return DBG_HOOK_HANDLED;
}
NOKPROBE_SYMBOL(kgdb_brk_fn)
-static int kgdb_compiled_brk_fn(struct pt_regs *regs, unsigned int esr)
+static int kgdb_compiled_brk_fn(struct pt_regs *regs, unsigned long esr)
{
compiled_break = 1;
kgdb_handle_exception(1, SIGTRAP, 0, regs);
@@ -247,12 +250,12 @@ static int kgdb_compiled_brk_fn(struct pt_regs *regs, unsigned int esr)
}
NOKPROBE_SYMBOL(kgdb_compiled_brk_fn);
-static int kgdb_step_brk_fn(struct pt_regs *regs, unsigned int esr)
+static int kgdb_step_brk_fn(struct pt_regs *regs, unsigned long esr)
{
if (!kgdb_single_step)
return DBG_HOOK_ERROR;
- kgdb_handle_exception(1, SIGTRAP, 0, regs);
+ kgdb_handle_exception(0, SIGTRAP, 0, regs);
return DBG_HOOK_HANDLED;
}
NOKPROBE_SYMBOL(kgdb_step_brk_fn);
diff --git a/arch/arm64/kernel/kuser32.S b/arch/arm64/kernel/kuser32.S
index 42bd8c0c60e0..af046ceac22d 100644
--- a/arch/arm64/kernel/kuser32.S
+++ b/arch/arm64/kernel/kuser32.S
@@ -10,11 +10,12 @@
* aarch32_setup_additional_pages() and are provided for compatibility
* reasons with 32 bit (aarch32) applications that need them.
*
- * See Documentation/arm/kernel_user_helpers.rst for formal definitions.
+ * See Documentation/arch/arm/kernel_user_helpers.rst for formal definitions.
*/
#include <asm/unistd.h>
+ .section .rodata
.align 5
.globl __kuser_helper_start
__kuser_helper_start:
diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c
index 0df8493624e0..b38aae5b488d 100644
--- a/arch/arm64/kernel/machine_kexec.c
+++ b/arch/arm64/kernel/machine_kexec.c
@@ -11,6 +11,8 @@
#include <linux/kernel.h>
#include <linux/kexec.h>
#include <linux/page-flags.h>
+#include <linux/reboot.h>
+#include <linux/set_memory.h>
#include <linux/smp.h>
#include <asm/cacheflush.h>
@@ -20,12 +22,8 @@
#include <asm/mmu.h>
#include <asm/mmu_context.h>
#include <asm/page.h>
-
-#include "cpu-reset.h"
-
-/* Global variables for the arm64_relocate_new_kernel routine. */
-extern const unsigned char arm64_relocate_new_kernel[];
-extern const unsigned long arm64_relocate_new_kernel_size;
+#include <asm/sections.h>
+#include <asm/trans_pgd.h>
/**
* kexec_image_info - For debugging output.
@@ -34,23 +32,12 @@ extern const unsigned long arm64_relocate_new_kernel_size;
static void _kexec_image_info(const char *func, int line,
const struct kimage *kimage)
{
- unsigned long i;
-
- pr_debug("%s:%d:\n", func, line);
- pr_debug(" kexec kimage info:\n");
- pr_debug(" type: %d\n", kimage->type);
- pr_debug(" start: %lx\n", kimage->start);
- pr_debug(" head: %lx\n", kimage->head);
- pr_debug(" nr_segments: %lu\n", kimage->nr_segments);
-
- for (i = 0; i < kimage->nr_segments; i++) {
- pr_debug(" segment[%lu]: %016lx - %016lx, 0x%lx bytes, %lu pages\n",
- i,
- kimage->segment[i].mem,
- kimage->segment[i].mem + kimage->segment[i].memsz,
- kimage->segment[i].memsz,
- kimage->segment[i].memsz / PAGE_SIZE);
- }
+ kexec_dprintk("%s:%d:\n", func, line);
+ kexec_dprintk(" kexec kimage info:\n");
+ kexec_dprintk(" type: %d\n", kimage->type);
+ kexec_dprintk(" head: %lx\n", kimage->head);
+ kexec_dprintk(" kern_reloc: %pa\n", &kimage->arch.kern_reloc);
+ kexec_dprintk(" el2_vectors: %pa\n", &kimage->arch.el2_vectors);
}
void machine_kexec_cleanup(struct kimage *kimage)
@@ -67,8 +54,6 @@ void machine_kexec_cleanup(struct kimage *kimage)
*/
int machine_kexec_prepare(struct kimage *kimage)
{
- kexec_image_info(kimage);
-
if (kimage->type != KEXEC_TYPE_CRASH && cpus_are_stuck_in_kernel()) {
pr_err("Can't kexec: CPUs are stuck in the kernel.\n");
return -EBUSY;
@@ -78,43 +63,6 @@ int machine_kexec_prepare(struct kimage *kimage)
}
/**
- * kexec_list_flush - Helper to flush the kimage list and source pages to PoC.
- */
-static void kexec_list_flush(struct kimage *kimage)
-{
- kimage_entry_t *entry;
-
- for (entry = &kimage->head; ; entry++) {
- unsigned int flag;
- void *addr;
-
- /* flush the list entries. */
- __flush_dcache_area(entry, sizeof(kimage_entry_t));
-
- flag = *entry & IND_FLAGS;
- if (flag == IND_DONE)
- break;
-
- addr = phys_to_virt(*entry & PAGE_MASK);
-
- switch (flag) {
- case IND_INDIRECTION:
- /* Set entry point just before the new list page. */
- entry = (kimage_entry_t *)addr - 1;
- break;
- case IND_SOURCE:
- /* flush the source pages. */
- __flush_dcache_area(addr, PAGE_SIZE);
- break;
- case IND_DESTINATION:
- break;
- default:
- BUG();
- }
- }
-}
-
-/**
* kexec_segment_flush - Helper to flush the kimage segments to PoC.
*/
static void kexec_segment_flush(const struct kimage *kimage)
@@ -131,11 +79,84 @@ static void kexec_segment_flush(const struct kimage *kimage)
kimage->segment[i].memsz,
kimage->segment[i].memsz / PAGE_SIZE);
- __flush_dcache_area(phys_to_virt(kimage->segment[i].mem),
- kimage->segment[i].memsz);
+ dcache_clean_inval_poc(
+ (unsigned long)phys_to_virt(kimage->segment[i].mem),
+ (unsigned long)phys_to_virt(kimage->segment[i].mem) +
+ kimage->segment[i].memsz);
}
}
+/* Allocates pages for kexec page table */
+static void *kexec_page_alloc(void *arg)
+{
+ struct kimage *kimage = arg;
+ struct page *page = kimage_alloc_control_pages(kimage, 0);
+ void *vaddr = NULL;
+
+ if (!page)
+ return NULL;
+
+ vaddr = page_address(page);
+ memset(vaddr, 0, PAGE_SIZE);
+
+ return vaddr;
+}
+
+int machine_kexec_post_load(struct kimage *kimage)
+{
+ int rc;
+ pgd_t *trans_pgd;
+ void *reloc_code = page_to_virt(kimage->control_code_page);
+ long reloc_size;
+ struct trans_pgd_info info = {
+ .trans_alloc_page = kexec_page_alloc,
+ .trans_alloc_arg = kimage,
+ };
+
+ /* If in place, relocation is not used, only flush next kernel */
+ if (kimage->head & IND_DONE) {
+ kexec_segment_flush(kimage);
+ kexec_image_info(kimage);
+ return 0;
+ }
+
+ kimage->arch.el2_vectors = 0;
+ if (is_hyp_nvhe()) {
+ rc = trans_pgd_copy_el2_vectors(&info,
+ &kimage->arch.el2_vectors);
+ if (rc)
+ return rc;
+ }
+
+ /* Create a copy of the linear map */
+ trans_pgd = kexec_page_alloc(kimage);
+ if (!trans_pgd)
+ return -ENOMEM;
+ rc = trans_pgd_create_copy(&info, &trans_pgd, PAGE_OFFSET, PAGE_END);
+ if (rc)
+ return rc;
+ kimage->arch.ttbr1 = __pa(trans_pgd);
+ kimage->arch.zero_page = __pa_symbol(empty_zero_page);
+
+ reloc_size = __relocate_new_kernel_end - __relocate_new_kernel_start;
+ memcpy(reloc_code, __relocate_new_kernel_start, reloc_size);
+ kimage->arch.kern_reloc = __pa(reloc_code);
+ rc = trans_pgd_idmap_page(&info, &kimage->arch.ttbr0,
+ &kimage->arch.t0sz, reloc_code);
+ if (rc)
+ return rc;
+ kimage->arch.phys_offset = virt_to_phys(kimage) - (long)kimage;
+
+ /* Flush the reloc_code in preparation for its execution. */
+ dcache_clean_inval_poc((unsigned long)reloc_code,
+ (unsigned long)reloc_code + reloc_size);
+ icache_inval_pou((uintptr_t)reloc_code,
+ (uintptr_t)reloc_code + reloc_size);
+ kexec_image_info(kimage);
+
+ return 0;
+}
+
/**
* machine_kexec - Do the kexec reboot.
*
@@ -143,8 +164,6 @@ static void kexec_segment_flush(const struct kimage *kimage)
*/
void machine_kexec(struct kimage *kimage)
{
- phys_addr_t reboot_code_buffer_phys;
- void *reboot_code_buffer;
bool in_kexec_crash = (kimage == kexec_crash_image);
bool stuck_cpus = cpus_are_stuck_in_kernel();
@@ -155,71 +174,35 @@ void machine_kexec(struct kimage *kimage)
WARN(in_kexec_crash && (stuck_cpus || smp_crash_stop_failed()),
"Some CPUs may be stale, kdump will be unreliable.\n");
- reboot_code_buffer_phys = page_to_phys(kimage->control_code_page);
- reboot_code_buffer = phys_to_virt(reboot_code_buffer_phys);
-
- kexec_image_info(kimage);
-
- pr_debug("%s:%d: control_code_page: %p\n", __func__, __LINE__,
- kimage->control_code_page);
- pr_debug("%s:%d: reboot_code_buffer_phys: %pa\n", __func__, __LINE__,
- &reboot_code_buffer_phys);
- pr_debug("%s:%d: reboot_code_buffer: %p\n", __func__, __LINE__,
- reboot_code_buffer);
- pr_debug("%s:%d: relocate_new_kernel: %p\n", __func__, __LINE__,
- arm64_relocate_new_kernel);
- pr_debug("%s:%d: relocate_new_kernel_size: 0x%lx(%lu) bytes\n",
- __func__, __LINE__, arm64_relocate_new_kernel_size,
- arm64_relocate_new_kernel_size);
-
- /*
- * Copy arm64_relocate_new_kernel to the reboot_code_buffer for use
- * after the kernel is shut down.
- */
- memcpy(reboot_code_buffer, arm64_relocate_new_kernel,
- arm64_relocate_new_kernel_size);
-
- /* Flush the reboot_code_buffer in preparation for its execution. */
- __flush_dcache_area(reboot_code_buffer, arm64_relocate_new_kernel_size);
-
- /*
- * Although we've killed off the secondary CPUs, we don't update
- * the online mask if we're handling a crash kernel and consequently
- * need to avoid flush_icache_range(), which will attempt to IPI
- * the offline CPUs. Therefore, we must use the __* variant here.
- */
- __flush_icache_range((uintptr_t)reboot_code_buffer,
- arm64_relocate_new_kernel_size);
-
- /* Flush the kimage list and its buffers. */
- kexec_list_flush(kimage);
-
- /* Flush the new image if already in place. */
- if ((kimage != kexec_crash_image) && (kimage->head & IND_DONE))
- kexec_segment_flush(kimage);
-
pr_info("Bye!\n");
local_daif_mask();
/*
- * cpu_soft_restart will shutdown the MMU, disable data caches, then
- * transfer control to the reboot_code_buffer which contains a copy of
- * the arm64_relocate_new_kernel routine. arm64_relocate_new_kernel
- * uses physical addressing to relocate the new image to its final
- * position and transfers control to the image entry point when the
- * relocation is complete.
+ * Both restart and kernel_reloc will shutdown the MMU, disable data
+ * caches. However, restart will start new kernel or purgatory directly,
+ * kernel_reloc contains the body of arm64_relocate_new_kernel
* In kexec case, kimage->start points to purgatory assuming that
* kernel entry and dtb address are embedded in purgatory by
* userspace (kexec-tools).
* In kexec_file case, the kernel starts directly without purgatory.
*/
- cpu_soft_restart(reboot_code_buffer_phys, kimage->head, kimage->start,
-#ifdef CONFIG_KEXEC_FILE
- kimage->arch.dtb_mem);
-#else
- 0);
-#endif
+ if (kimage->head & IND_DONE) {
+ typeof(cpu_soft_restart) *restart;
+
+ cpu_install_idmap();
+ restart = (void *)__pa_symbol(cpu_soft_restart);
+ restart(is_hyp_nvhe(), kimage->start, kimage->arch.dtb_mem,
+ 0, 0);
+ } else {
+ void (*kernel_reloc)(struct kimage *kimage);
+
+ if (is_hyp_nvhe())
+ __hyp_set_vectors(kimage->arch.el2_vectors);
+ cpu_install_ttbr0(kimage->arch.ttbr0, kimage->arch.t0sz);
+ kernel_reloc = (void *)kimage->arch.kern_reloc;
+ kernel_reloc(kimage);
+ }
BUG(); /* Should never get here. */
}
@@ -272,28 +255,6 @@ void machine_crash_shutdown(struct pt_regs *regs)
pr_info("Starting crashdump kernel...\n");
}
-void arch_kexec_protect_crashkres(void)
-{
- int i;
-
- kexec_segment_flush(kexec_crash_image);
-
- for (i = 0; i < kexec_crash_image->nr_segments; i++)
- set_memory_valid(
- __phys_to_virt(kexec_crash_image->segment[i].mem),
- kexec_crash_image->segment[i].memsz >> PAGE_SHIFT, 0);
-}
-
-void arch_kexec_unprotect_crashkres(void)
-{
- int i;
-
- for (i = 0; i < kexec_crash_image->nr_segments; i++)
- set_memory_valid(
- __phys_to_virt(kexec_crash_image->segment[i].mem),
- kexec_crash_image->segment[i].memsz >> PAGE_SHIFT, 1);
-}
-
#ifdef CONFIG_HIBERNATION
/*
* To preserve the crash dump kernel image, the relevant memory segments
@@ -335,8 +296,13 @@ bool crash_is_nosave(unsigned long pfn)
/* in reserved memory? */
addr = __pfn_to_phys(pfn);
- if ((addr < crashk_res.start) || (crashk_res.end < addr))
- return false;
+ if ((addr < crashk_res.start) || (crashk_res.end < addr)) {
+ if (!crashk_low_res.end)
+ return false;
+
+ if ((addr < crashk_low_res.start) || (crashk_low_res.end < addr))
+ return false;
+ }
if (!kexec_crash_image)
return true;
diff --git a/arch/arm64/kernel/machine_kexec_file.c b/arch/arm64/kernel/machine_kexec_file.c
index 7b08bf9499b6..0e017358f4ba 100644
--- a/arch/arm64/kernel/machine_kexec_file.c
+++ b/arch/arm64/kernel/machine_kexec_file.c
@@ -15,20 +15,12 @@
#include <linux/kexec.h>
#include <linux/libfdt.h>
#include <linux/memblock.h>
+#include <linux/of.h>
#include <linux/of_fdt.h>
-#include <linux/random.h>
+#include <linux/slab.h>
#include <linux/string.h>
#include <linux/types.h>
#include <linux/vmalloc.h>
-#include <asm/byteorder.h>
-
-/* relevant device tree properties */
-#define FDT_PROP_INITRD_START "linux,initrd-start"
-#define FDT_PROP_INITRD_END "linux,initrd-end"
-#define FDT_PROP_BOOTARGS "bootargs"
-#define FDT_PROP_KASLR_SEED "kaslr-seed"
-#define FDT_PROP_RNG_SEED "rng-seed"
-#define RNG_SEED_SIZE 128
const struct kexec_file_ops * const kexec_file_loaders[] = {
&kexec_image_ops,
@@ -37,143 +29,63 @@ const struct kexec_file_ops * const kexec_file_loaders[] = {
int arch_kimage_file_post_load_cleanup(struct kimage *image)
{
- vfree(image->arch.dtb);
+ kvfree(image->arch.dtb);
image->arch.dtb = NULL;
+ vfree(image->elf_headers);
+ image->elf_headers = NULL;
+ image->elf_headers_sz = 0;
+
return kexec_image_post_load_cleanup_default(image);
}
-static int setup_dtb(struct kimage *image,
- unsigned long initrd_load_addr, unsigned long initrd_len,
- char *cmdline, void *dtb)
+static int prepare_elf_headers(void **addr, unsigned long *sz)
{
- int off, ret;
-
- ret = fdt_path_offset(dtb, "/chosen");
- if (ret < 0)
- goto out;
-
- off = ret;
-
- /* add bootargs */
- if (cmdline) {
- ret = fdt_setprop_string(dtb, off, FDT_PROP_BOOTARGS, cmdline);
- if (ret)
- goto out;
- } else {
- ret = fdt_delprop(dtb, off, FDT_PROP_BOOTARGS);
- if (ret && (ret != -FDT_ERR_NOTFOUND))
- goto out;
- }
-
- /* add initrd-* */
- if (initrd_load_addr) {
- ret = fdt_setprop_u64(dtb, off, FDT_PROP_INITRD_START,
- initrd_load_addr);
- if (ret)
- goto out;
-
- ret = fdt_setprop_u64(dtb, off, FDT_PROP_INITRD_END,
- initrd_load_addr + initrd_len);
- if (ret)
- goto out;
- } else {
- ret = fdt_delprop(dtb, off, FDT_PROP_INITRD_START);
- if (ret && (ret != -FDT_ERR_NOTFOUND))
- goto out;
-
- ret = fdt_delprop(dtb, off, FDT_PROP_INITRD_END);
- if (ret && (ret != -FDT_ERR_NOTFOUND))
- goto out;
+ struct crash_mem *cmem;
+ unsigned int nr_ranges;
+ int ret;
+ u64 i;
+ phys_addr_t start, end;
+
+ nr_ranges = 2; /* for exclusion of crashkernel region */
+ for_each_mem_range(i, &start, &end)
+ nr_ranges++;
+
+ cmem = kmalloc(struct_size(cmem, ranges, nr_ranges), GFP_KERNEL);
+ if (!cmem)
+ return -ENOMEM;
+
+ cmem->max_nr_ranges = nr_ranges;
+ cmem->nr_ranges = 0;
+ for_each_mem_range(i, &start, &end) {
+ cmem->ranges[cmem->nr_ranges].start = start;
+ cmem->ranges[cmem->nr_ranges].end = end - 1;
+ cmem->nr_ranges++;
}
- /* add kaslr-seed */
- ret = fdt_delprop(dtb, off, FDT_PROP_KASLR_SEED);
- if (ret == -FDT_ERR_NOTFOUND)
- ret = 0;
- else if (ret)
+ /* Exclude crashkernel region */
+ ret = crash_exclude_mem_range(cmem, crashk_res.start, crashk_res.end);
+ if (ret)
goto out;
- if (rng_is_initialized()) {
- u64 seed = get_random_u64();
- ret = fdt_setprop_u64(dtb, off, FDT_PROP_KASLR_SEED, seed);
+ if (crashk_low_res.end) {
+ ret = crash_exclude_mem_range(cmem, crashk_low_res.start, crashk_low_res.end);
if (ret)
goto out;
- } else {
- pr_notice("RNG is not initialised: omitting \"%s\" property\n",
- FDT_PROP_KASLR_SEED);
}
- /* add rng-seed */
- if (rng_is_initialized()) {
- u8 rng_seed[RNG_SEED_SIZE];
- get_random_bytes(rng_seed, RNG_SEED_SIZE);
- ret = fdt_setprop(dtb, off, FDT_PROP_RNG_SEED, rng_seed,
- RNG_SEED_SIZE);
- if (ret)
- goto out;
- } else {
- pr_notice("RNG is not initialised: omitting \"%s\" property\n",
- FDT_PROP_RNG_SEED);
- }
+ ret = crash_prepare_elf64_headers(cmem, true, addr, sz);
out:
- if (ret)
- return (ret == -FDT_ERR_NOSPACE) ? -ENOMEM : -EINVAL;
-
- return 0;
+ kfree(cmem);
+ return ret;
}
/*
- * More space needed so that we can add initrd, bootargs, kaslr-seed, and
- * rng-seed.
+ * Tries to add the initrd and DTB to the image. If it is not possible to find
+ * valid locations, this function will undo changes to the image and return non
+ * zero.
*/
-#define DTB_EXTRA_SPACE 0x1000
-
-static int create_dtb(struct kimage *image,
- unsigned long initrd_load_addr, unsigned long initrd_len,
- char *cmdline, void **dtb)
-{
- void *buf;
- size_t buf_size;
- size_t cmdline_len;
- int ret;
-
- cmdline_len = cmdline ? strlen(cmdline) : 0;
- buf_size = fdt_totalsize(initial_boot_params)
- + cmdline_len + DTB_EXTRA_SPACE;
-
- for (;;) {
- buf = vmalloc(buf_size);
- if (!buf)
- return -ENOMEM;
-
- /* duplicate a device tree blob */
- ret = fdt_open_into(initial_boot_params, buf, buf_size);
- if (ret)
- return -EINVAL;
-
- ret = setup_dtb(image, initrd_load_addr, initrd_len,
- cmdline, buf);
- if (ret) {
- vfree(buf);
- if (ret == -ENOMEM) {
- /* unlikely, but just in case */
- buf_size += DTB_EXTRA_SPACE;
- continue;
- } else {
- return ret;
- }
- }
-
- /* trim it */
- fdt_pack(buf);
- *dtb = buf;
-
- return 0;
- }
-}
-
int load_other_segments(struct kimage *image,
unsigned long kernel_load_addr,
unsigned long kernel_size,
@@ -181,14 +93,44 @@ int load_other_segments(struct kimage *image,
char *cmdline)
{
struct kexec_buf kbuf;
- void *dtb = NULL;
- unsigned long initrd_load_addr = 0, dtb_len;
+ void *headers, *dtb = NULL;
+ unsigned long headers_sz, initrd_load_addr = 0, dtb_len,
+ orig_segments = image->nr_segments;
int ret = 0;
kbuf.image = image;
/* not allocate anything below the kernel */
kbuf.buf_min = kernel_load_addr + kernel_size;
+ /* load elf core header */
+ if (image->type == KEXEC_TYPE_CRASH) {
+ ret = prepare_elf_headers(&headers, &headers_sz);
+ if (ret) {
+ pr_err("Preparing elf core header failed\n");
+ goto out_err;
+ }
+
+ kbuf.buffer = headers;
+ kbuf.bufsz = headers_sz;
+ kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
+ kbuf.memsz = headers_sz;
+ kbuf.buf_align = SZ_64K; /* largest supported page size */
+ kbuf.buf_max = ULONG_MAX;
+ kbuf.top_down = true;
+
+ ret = kexec_add_buffer(&kbuf);
+ if (ret) {
+ vfree(headers);
+ goto out_err;
+ }
+ image->elf_headers = headers;
+ image->elf_load_addr = kbuf.mem;
+ image->elf_headers_sz = headers_sz;
+
+ kexec_dprintk("Loaded elf core header at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
+ image->elf_load_addr, kbuf.bufsz, kbuf.memsz);
+ }
+
/* load initrd */
if (initrd) {
kbuf.buffer = initrd;
@@ -206,17 +148,21 @@ int load_other_segments(struct kimage *image,
goto out_err;
initrd_load_addr = kbuf.mem;
- pr_debug("Loaded initrd at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
- initrd_load_addr, initrd_len, initrd_len);
+ kexec_dprintk("Loaded initrd at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
+ initrd_load_addr, kbuf.bufsz, kbuf.memsz);
}
/* load dtb */
- ret = create_dtb(image, initrd_load_addr, initrd_len, cmdline, &dtb);
- if (ret) {
+ dtb = of_kexec_alloc_and_setup_fdt(image, initrd_load_addr,
+ initrd_len, cmdline, 0);
+ if (!dtb) {
pr_err("Preparing for new dtb failed\n");
+ ret = -EINVAL;
goto out_err;
}
+ /* trim it */
+ fdt_pack(dtb);
dtb_len = fdt_totalsize(dtb);
kbuf.buffer = dtb;
kbuf.bufsz = dtb_len;
@@ -233,12 +179,13 @@ int load_other_segments(struct kimage *image,
image->arch.dtb = dtb;
image->arch.dtb_mem = kbuf.mem;
- pr_debug("Loaded dtb at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
- kbuf.mem, dtb_len, dtb_len);
+ kexec_dprintk("Loaded dtb at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
+ kbuf.mem, kbuf.bufsz, kbuf.memsz);
return 0;
out_err:
- vfree(dtb);
+ image->nr_segments = orig_segments;
+ kvfree(dtb);
return ret;
}
diff --git a/arch/arm64/kernel/module-plts.c b/arch/arm64/kernel/module-plts.c
index 65b08a74aec6..bde32979c06a 100644
--- a/arch/arm64/kernel/module-plts.c
+++ b/arch/arm64/kernel/module-plts.c
@@ -7,6 +7,7 @@
#include <linux/ftrace.h>
#include <linux/kernel.h>
#include <linux/module.h>
+#include <linux/moduleloader.h>
#include <linux/sort.h>
static struct plt_entry __get_adrp_add_pair(u64 dst, u64 pc,
@@ -37,7 +38,8 @@ struct plt_entry get_plt_entry(u64 dst, void *pc)
return plt;
}
-bool plt_entries_equal(const struct plt_entry *a, const struct plt_entry *b)
+static bool plt_entries_equal(const struct plt_entry *a,
+ const struct plt_entry *b)
{
u64 p, q;
@@ -64,17 +66,12 @@ bool plt_entries_equal(const struct plt_entry *a, const struct plt_entry *b)
(q + aarch64_insn_adrp_get_offset(le32_to_cpu(b->adrp)));
}
-static bool in_init(const struct module *mod, void *loc)
-{
- return (u64)loc - (u64)mod->init_layout.base < mod->init_layout.size;
-}
-
u64 module_emit_plt_entry(struct module *mod, Elf64_Shdr *sechdrs,
void *loc, const Elf64_Rela *rela,
Elf64_Sym *sym)
{
- struct mod_plt_sec *pltsec = !in_init(mod, loc) ? &mod->arch.core :
- &mod->arch.init;
+ struct mod_plt_sec *pltsec = !within_module_init((unsigned long)loc, mod) ?
+ &mod->arch.core : &mod->arch.init;
struct plt_entry *plt = (struct plt_entry *)sechdrs[pltsec->plt_shndx].sh_addr;
int i = pltsec->plt_num_entries;
int j = i - 1;
@@ -104,8 +101,8 @@ u64 module_emit_plt_entry(struct module *mod, Elf64_Shdr *sechdrs,
u64 module_emit_veneer_for_adrp(struct module *mod, Elf64_Shdr *sechdrs,
void *loc, u64 val)
{
- struct mod_plt_sec *pltsec = !in_init(mod, loc) ? &mod->arch.core :
- &mod->arch.init;
+ struct mod_plt_sec *pltsec = !within_module_init((unsigned long)loc, mod) ?
+ &mod->arch.core : &mod->arch.init;
struct plt_entry *plt = (struct plt_entry *)sechdrs[pltsec->plt_shndx].sh_addr;
int i = pltsec->plt_num_entries++;
u32 br;
@@ -131,7 +128,7 @@ u64 module_emit_veneer_for_adrp(struct module *mod, Elf64_Shdr *sechdrs,
}
#endif
-#define cmp_3way(a,b) ((a) < (b) ? -1 : (a) > (b))
+#define cmp_3way(a, b) ((a) < (b) ? -1 : (a) > (b))
static int cmp_rela(const void *a, const void *b)
{
@@ -170,9 +167,6 @@ static unsigned int count_plts(Elf64_Sym *syms, Elf64_Rela *rela, int num,
switch (ELF64_R_TYPE(rela[i].r_info)) {
case R_AARCH64_JUMP26:
case R_AARCH64_CALL26:
- if (!IS_ENABLED(CONFIG_RANDOMIZE_BASE))
- break;
-
/*
* We only have to consider branch targets that resolve
* to symbols that are defined in a different section.
@@ -206,8 +200,7 @@ static unsigned int count_plts(Elf64_Sym *syms, Elf64_Rela *rela, int num,
break;
case R_AARCH64_ADR_PREL_PG_HI21_NC:
case R_AARCH64_ADR_PREL_PG_HI21:
- if (!IS_ENABLED(CONFIG_ARM64_ERRATUM_843419) ||
- !cpus_have_const_cap(ARM64_WORKAROUND_843419))
+ if (!cpus_have_final_cap(ARM64_WORKAROUND_843419))
break;
/*
@@ -220,7 +213,7 @@ static unsigned int count_plts(Elf64_Sym *syms, Elf64_Rela *rela, int num,
* increasing the section's alignment so that the
* resulting address of this instruction is guaranteed
* to equal the offset in that particular bit (as well
- * as all less signficant bits). This ensures that the
+ * as all less significant bits). This ensures that the
* address modulo 4 KB != 0xfff8 or 0xfffc (which would
* have all ones in bits [11:3])
*/
@@ -242,17 +235,48 @@ static unsigned int count_plts(Elf64_Sym *syms, Elf64_Rela *rela, int num,
}
}
- if (IS_ENABLED(CONFIG_ARM64_ERRATUM_843419) &&
- cpus_have_const_cap(ARM64_WORKAROUND_843419))
+ if (cpus_have_final_cap(ARM64_WORKAROUND_843419)) {
/*
* Add some slack so we can skip PLT slots that may trigger
* the erratum due to the placement of the ADRP instruction.
*/
ret += DIV_ROUND_UP(ret, (SZ_4K / sizeof(struct plt_entry)));
+ }
return ret;
}
+static bool branch_rela_needs_plt(Elf64_Sym *syms, Elf64_Rela *rela,
+ Elf64_Word dstidx)
+{
+
+ Elf64_Sym *s = syms + ELF64_R_SYM(rela->r_info);
+
+ if (s->st_shndx == dstidx)
+ return false;
+
+ return ELF64_R_TYPE(rela->r_info) == R_AARCH64_JUMP26 ||
+ ELF64_R_TYPE(rela->r_info) == R_AARCH64_CALL26;
+}
+
+/* Group branch PLT relas at the front end of the array. */
+static int partition_branch_plt_relas(Elf64_Sym *syms, Elf64_Rela *rela,
+ int numrels, Elf64_Word dstidx)
+{
+ int i = 0, j = numrels - 1;
+
+ while (i < j) {
+ if (branch_rela_needs_plt(syms, &rela[i], dstidx))
+ i++;
+ else if (branch_rela_needs_plt(syms, &rela[j], dstidx))
+ swap(rela[i], rela[j]);
+ else
+ j--;
+ }
+
+ return i;
+}
+
int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
char *secstrings, struct module *mod)
{
@@ -271,8 +295,7 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
mod->arch.core.plt_shndx = i;
else if (!strcmp(secstrings + sechdrs[i].sh_name, ".init.plt"))
mod->arch.init.plt_shndx = i;
- else if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE) &&
- !strcmp(secstrings + sechdrs[i].sh_name,
+ else if (!strcmp(secstrings + sechdrs[i].sh_name,
".text.ftrace_trampoline"))
tramp = sechdrs + i;
else if (sechdrs[i].sh_type == SHT_SYMTAB)
@@ -290,7 +313,7 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
for (i = 0; i < ehdr->e_shnum; i++) {
Elf64_Rela *rels = (void *)ehdr + sechdrs[i].sh_offset;
- int numrels = sechdrs[i].sh_size / sizeof(Elf64_Rela);
+ int nents, numrels = sechdrs[i].sh_size / sizeof(Elf64_Rela);
Elf64_Shdr *dstsec = sechdrs + sechdrs[i].sh_info;
if (sechdrs[i].sh_type != SHT_RELA)
@@ -300,10 +323,16 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
if (!(dstsec->sh_flags & SHF_EXECINSTR))
continue;
- /* sort by type, symbol index and addend */
- sort(rels, numrels, sizeof(Elf64_Rela), cmp_rela, NULL);
+ /*
+ * sort branch relocations requiring a PLT by type, symbol index
+ * and addend
+ */
+ nents = partition_branch_plt_relas(syms, rels, numrels,
+ sechdrs[i].sh_info);
+ if (nents)
+ sort(rels, nents, sizeof(Elf64_Rela), cmp_rela, NULL);
- if (!str_has_prefix(secstrings + dstsec->sh_name, ".init"))
+ if (!module_init_layout_section(secstrings + dstsec->sh_name))
core_plts += count_plts(syms, rels, numrels,
sechdrs[i].sh_info, dstsec);
else
diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c
index 1cd1a4d0ed30..dd851297596e 100644
--- a/arch/arm64/kernel/module.c
+++ b/arch/arm64/kernel/module.c
@@ -7,6 +7,8 @@
* Author: Will Deacon <will.deacon@arm.com>
*/
+#define pr_fmt(fmt) "Modules: " fmt
+
#include <linux/bitops.h>
#include <linux/elf.h>
#include <linux/ftrace.h>
@@ -15,51 +17,137 @@
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/moduleloader.h>
+#include <linux/random.h>
+#include <linux/scs.h>
#include <linux/vmalloc.h>
+
#include <asm/alternative.h>
#include <asm/insn.h>
+#include <asm/scs.h>
#include <asm/sections.h>
-void *module_alloc(unsigned long size)
+static u64 module_direct_base __ro_after_init = 0;
+static u64 module_plt_base __ro_after_init = 0;
+
+/*
+ * Choose a random page-aligned base address for a window of 'size' bytes which
+ * entirely contains the interval [start, end - 1].
+ */
+static u64 __init random_bounding_box(u64 size, u64 start, u64 end)
{
- u64 module_alloc_end = module_alloc_base + MODULES_VSIZE;
- gfp_t gfp_mask = GFP_KERNEL;
- void *p;
+ u64 max_pgoff, pgoff;
- /* Silence the initial allocation */
- if (IS_ENABLED(CONFIG_ARM64_MODULE_PLTS))
- gfp_mask |= __GFP_NOWARN;
+ if ((end - start) >= size)
+ return 0;
- if (IS_ENABLED(CONFIG_KASAN))
- /* don't exceed the static module region - see below */
- module_alloc_end = MODULES_END;
+ max_pgoff = (size - (end - start)) / PAGE_SIZE;
+ pgoff = get_random_u32_inclusive(0, max_pgoff);
- p = __vmalloc_node_range(size, MODULE_ALIGN, module_alloc_base,
- module_alloc_end, gfp_mask, PAGE_KERNEL, 0,
- NUMA_NO_NODE, __builtin_return_address(0));
+ return start - pgoff * PAGE_SIZE;
+}
- if (!p && IS_ENABLED(CONFIG_ARM64_MODULE_PLTS) &&
- !IS_ENABLED(CONFIG_KASAN))
- /*
- * KASAN can only deal with module allocations being served
- * from the reserved module region, since the remainder of
- * the vmalloc region is already backed by zero shadow pages,
- * and punching holes into it is non-trivial. Since the module
- * region is not randomized when KASAN is enabled, it is even
- * less likely that the module region gets exhausted, so we
- * can simply omit this fallback in that case.
- */
- p = __vmalloc_node_range(size, MODULE_ALIGN, module_alloc_base,
- module_alloc_base + SZ_2G, GFP_KERNEL,
- PAGE_KERNEL, 0, NUMA_NO_NODE,
- __builtin_return_address(0));
+/*
+ * Modules may directly reference data and text anywhere within the kernel
+ * image and other modules. References using PREL32 relocations have a +/-2G
+ * range, and so we need to ensure that the entire kernel image and all modules
+ * fall within a 2G window such that these are always within range.
+ *
+ * Modules may directly branch to functions and code within the kernel text,
+ * and to functions and code within other modules. These branches will use
+ * CALL26/JUMP26 relocations with a +/-128M range. Without PLTs, we must ensure
+ * that the entire kernel text and all module text falls within a 128M window
+ * such that these are always within range. With PLTs, we can expand this to a
+ * 2G window.
+ *
+ * We chose the 128M region to surround the entire kernel image (rather than
+ * just the text) as using the same bounds for the 128M and 2G regions ensures
+ * by construction that we never select a 128M region that is not a subset of
+ * the 2G region. For very large and unusual kernel configurations this means
+ * we may fall back to PLTs where they could have been avoided, but this keeps
+ * the logic significantly simpler.
+ */
+static int __init module_init_limits(void)
+{
+ u64 kernel_end = (u64)_end;
+ u64 kernel_start = (u64)_text;
+ u64 kernel_size = kernel_end - kernel_start;
+
+ /*
+ * The default modules region is placed immediately below the kernel
+ * image, and is large enough to use the full 2G relocation range.
+ */
+ BUILD_BUG_ON(KIMAGE_VADDR != MODULES_END);
+ BUILD_BUG_ON(MODULES_VSIZE < SZ_2G);
+
+ if (!kaslr_enabled()) {
+ if (kernel_size < SZ_128M)
+ module_direct_base = kernel_end - SZ_128M;
+ if (kernel_size < SZ_2G)
+ module_plt_base = kernel_end - SZ_2G;
+ } else {
+ u64 min = kernel_start;
+ u64 max = kernel_end;
+
+ if (IS_ENABLED(CONFIG_RANDOMIZE_MODULE_REGION_FULL)) {
+ pr_info("2G module region forced by RANDOMIZE_MODULE_REGION_FULL\n");
+ } else {
+ module_direct_base = random_bounding_box(SZ_128M, min, max);
+ if (module_direct_base) {
+ min = module_direct_base;
+ max = module_direct_base + SZ_128M;
+ }
+ }
+
+ module_plt_base = random_bounding_box(SZ_2G, min, max);
+ }
- if (p && (kasan_module_alloc(p, size) < 0)) {
+ pr_info("%llu pages in range for non-PLT usage",
+ module_direct_base ? (SZ_128M - kernel_size) / PAGE_SIZE : 0);
+ pr_info("%llu pages in range for PLT usage",
+ module_plt_base ? (SZ_2G - kernel_size) / PAGE_SIZE : 0);
+
+ return 0;
+}
+subsys_initcall(module_init_limits);
+
+void *module_alloc(unsigned long size)
+{
+ void *p = NULL;
+
+ /*
+ * Where possible, prefer to allocate within direct branch range of the
+ * kernel such that no PLTs are necessary.
+ */
+ if (module_direct_base) {
+ p = __vmalloc_node_range(size, MODULE_ALIGN,
+ module_direct_base,
+ module_direct_base + SZ_128M,
+ GFP_KERNEL | __GFP_NOWARN,
+ PAGE_KERNEL, 0, NUMA_NO_NODE,
+ __builtin_return_address(0));
+ }
+
+ if (!p && module_plt_base) {
+ p = __vmalloc_node_range(size, MODULE_ALIGN,
+ module_plt_base,
+ module_plt_base + SZ_2G,
+ GFP_KERNEL | __GFP_NOWARN,
+ PAGE_KERNEL, 0, NUMA_NO_NODE,
+ __builtin_return_address(0));
+ }
+
+ if (!p) {
+ pr_warn_ratelimited("%s: unable to allocate memory\n",
+ __func__);
+ }
+
+ if (p && (kasan_alloc_module_shadow(p, size, GFP_KERNEL) < 0)) {
vfree(p);
return NULL;
}
- return p;
+ /* Memory is intended to be executable, reset the pointer tag. */
+ return kasan_reset_tag(p);
}
enum aarch64_reloc_op {
@@ -315,21 +403,21 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
/* MOVW instruction relocations. */
case R_AARCH64_MOVW_UABS_G0_NC:
overflow_check = false;
- /* Fall through */
+ fallthrough;
case R_AARCH64_MOVW_UABS_G0:
ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 0,
AARCH64_INSN_IMM_MOVKZ);
break;
case R_AARCH64_MOVW_UABS_G1_NC:
overflow_check = false;
- /* Fall through */
+ fallthrough;
case R_AARCH64_MOVW_UABS_G1:
ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 16,
AARCH64_INSN_IMM_MOVKZ);
break;
case R_AARCH64_MOVW_UABS_G2_NC:
overflow_check = false;
- /* Fall through */
+ fallthrough;
case R_AARCH64_MOVW_UABS_G2:
ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 32,
AARCH64_INSN_IMM_MOVKZ);
@@ -397,7 +485,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
break;
case R_AARCH64_ADR_PREL_PG_HI21_NC:
overflow_check = false;
- /* Fall through */
+ fallthrough;
case R_AARCH64_ADR_PREL_PG_HI21:
ovf = reloc_insn_adrp(me, sechdrs, loc, val);
if (ovf && ovf != -ERANGE)
@@ -441,9 +529,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
case R_AARCH64_CALL26:
ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2, 26,
AARCH64_INSN_IMM_26);
-
- if (IS_ENABLED(CONFIG_ARM64_MODULE_PLTS) &&
- ovf == -ERANGE) {
+ if (ovf == -ERANGE) {
val = module_emit_plt_entry(me, sechdrs, loc, &rel[i], sym);
if (!val)
return -ENOEXEC;
@@ -471,21 +557,6 @@ overflow:
return -ENOEXEC;
}
-static const Elf_Shdr *find_section(const Elf_Ehdr *hdr,
- const Elf_Shdr *sechdrs,
- const char *name)
-{
- const Elf_Shdr *s, *se;
- const char *secstrs = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
-
- for (s = sechdrs, se = sechdrs + hdr->e_shnum; s < se; s++) {
- if (strcmp(name, secstrs + s->sh_name) == 0)
- return s;
- }
-
- return NULL;
-}
-
static inline void __init_plt(struct plt_entry *plt, unsigned long addr)
{
*plt = get_plt_entry(addr, plt);
@@ -495,7 +566,7 @@ static int module_init_ftrace_plt(const Elf_Ehdr *hdr,
const Elf_Shdr *sechdrs,
struct module *mod)
{
-#if defined(CONFIG_ARM64_MODULE_PLTS) && defined(CONFIG_DYNAMIC_FTRACE)
+#if defined(CONFIG_DYNAMIC_FTRACE)
const Elf_Shdr *s;
struct plt_entry *plts;
@@ -507,9 +578,6 @@ static int module_init_ftrace_plt(const Elf_Ehdr *hdr,
__init_plt(&plts[FTRACE_PLT_IDX], FTRACE_ADDR);
- if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS))
- __init_plt(&plts[FTRACE_REGS_PLT_IDX], FTRACE_REGS_ADDR);
-
mod->arch.ftrace_trampolines = plts;
#endif
return 0;
@@ -524,5 +592,11 @@ int module_finalize(const Elf_Ehdr *hdr,
if (s)
apply_alternatives_module((void *)s->sh_addr, s->sh_size);
+ if (scs_is_dynamic()) {
+ s = find_section(hdr, sechdrs, ".init.eh_frame");
+ if (s)
+ scs_patch((void *)s->sh_addr, s->sh_size);
+ }
+
return module_init_ftrace_plt(hdr, sechdrs, me);
}
diff --git a/arch/arm64/kernel/module.lds b/arch/arm64/kernel/module.lds
deleted file mode 100644
index 22e36a21c113..000000000000
--- a/arch/arm64/kernel/module.lds
+++ /dev/null
@@ -1,5 +0,0 @@
-SECTIONS {
- .plt (NOLOAD) : { BYTE(0) }
- .init.plt (NOLOAD) : { BYTE(0) }
- .text.ftrace_trampoline (NOLOAD) : { BYTE(0) }
-}
diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c
new file mode 100644
index 000000000000..a41ef3213e1e
--- /dev/null
+++ b/arch/arm64/kernel/mte.c
@@ -0,0 +1,606 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2020 ARM Ltd.
+ */
+
+#include <linux/bitops.h>
+#include <linux/cpu.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/prctl.h>
+#include <linux/sched.h>
+#include <linux/sched/mm.h>
+#include <linux/string.h>
+#include <linux/swap.h>
+#include <linux/swapops.h>
+#include <linux/thread_info.h>
+#include <linux/types.h>
+#include <linux/uaccess.h>
+#include <linux/uio.h>
+
+#include <asm/barrier.h>
+#include <asm/cpufeature.h>
+#include <asm/mte.h>
+#include <asm/ptrace.h>
+#include <asm/sysreg.h>
+
+static DEFINE_PER_CPU_READ_MOSTLY(u64, mte_tcf_preferred);
+
+#ifdef CONFIG_KASAN_HW_TAGS
+/*
+ * The asynchronous and asymmetric MTE modes have the same behavior for
+ * store operations. This flag is set when either of these modes is enabled.
+ */
+DEFINE_STATIC_KEY_FALSE(mte_async_or_asymm_mode);
+EXPORT_SYMBOL_GPL(mte_async_or_asymm_mode);
+#endif
+
+void mte_sync_tags(pte_t pte, unsigned int nr_pages)
+{
+ struct page *page = pte_page(pte);
+ unsigned int i;
+
+ /* if PG_mte_tagged is set, tags have already been initialised */
+ for (i = 0; i < nr_pages; i++, page++) {
+ if (try_page_mte_tagging(page)) {
+ mte_clear_page_tags(page_address(page));
+ set_page_mte_tagged(page);
+ }
+ }
+
+ /* ensure the tags are visible before the PTE is set */
+ smp_wmb();
+}
+
+int memcmp_pages(struct page *page1, struct page *page2)
+{
+ char *addr1, *addr2;
+ int ret;
+
+ addr1 = page_address(page1);
+ addr2 = page_address(page2);
+ ret = memcmp(addr1, addr2, PAGE_SIZE);
+
+ if (!system_supports_mte() || ret)
+ return ret;
+
+ /*
+ * If the page content is identical but at least one of the pages is
+ * tagged, return non-zero to avoid KSM merging. If only one of the
+ * pages is tagged, set_pte_at() may zero or change the tags of the
+ * other page via mte_sync_tags().
+ */
+ if (page_mte_tagged(page1) || page_mte_tagged(page2))
+ return addr1 != addr2;
+
+ return ret;
+}
+
+static inline void __mte_enable_kernel(const char *mode, unsigned long tcf)
+{
+ /* Enable MTE Sync Mode for EL1. */
+ sysreg_clear_set(sctlr_el1, SCTLR_EL1_TCF_MASK,
+ SYS_FIELD_PREP(SCTLR_EL1, TCF, tcf));
+ isb();
+
+ pr_info_once("MTE: enabled in %s mode at EL1\n", mode);
+}
+
+#ifdef CONFIG_KASAN_HW_TAGS
+void mte_enable_kernel_sync(void)
+{
+ /*
+ * Make sure we enter this function when no PE has set
+ * async mode previously.
+ */
+ WARN_ONCE(system_uses_mte_async_or_asymm_mode(),
+ "MTE async mode enabled system wide!");
+
+ __mte_enable_kernel("synchronous", SCTLR_EL1_TCF_SYNC);
+}
+
+void mte_enable_kernel_async(void)
+{
+ __mte_enable_kernel("asynchronous", SCTLR_EL1_TCF_ASYNC);
+
+ /*
+ * MTE async mode is set system wide by the first PE that
+ * executes this function.
+ *
+ * Note: If in future KASAN acquires a runtime switching
+ * mode in between sync and async, this strategy needs
+ * to be reviewed.
+ */
+ if (!system_uses_mte_async_or_asymm_mode())
+ static_branch_enable(&mte_async_or_asymm_mode);
+}
+
+void mte_enable_kernel_asymm(void)
+{
+ if (cpus_have_cap(ARM64_MTE_ASYMM)) {
+ __mte_enable_kernel("asymmetric", SCTLR_EL1_TCF_ASYMM);
+
+ /*
+ * MTE asymm mode behaves as async mode for store
+ * operations. The mode is set system wide by the
+ * first PE that executes this function.
+ *
+ * Note: If in future KASAN acquires a runtime switching
+ * mode in between sync and async, this strategy needs
+ * to be reviewed.
+ */
+ if (!system_uses_mte_async_or_asymm_mode())
+ static_branch_enable(&mte_async_or_asymm_mode);
+ } else {
+ /*
+ * If the CPU does not support MTE asymmetric mode the
+ * kernel falls back on synchronous mode which is the
+ * default for kasan=on.
+ */
+ mte_enable_kernel_sync();
+ }
+}
+#endif
+
+#ifdef CONFIG_KASAN_HW_TAGS
+void mte_check_tfsr_el1(void)
+{
+ u64 tfsr_el1 = read_sysreg_s(SYS_TFSR_EL1);
+
+ if (unlikely(tfsr_el1 & SYS_TFSR_EL1_TF1)) {
+ /*
+ * Note: isb() is not required after this direct write
+ * because there is no indirect read subsequent to it
+ * (per ARM DDI 0487F.c table D13-1).
+ */
+ write_sysreg_s(0, SYS_TFSR_EL1);
+
+ kasan_report_async();
+ }
+}
+#endif
+
+/*
+ * This is where we actually resolve the system and process MTE mode
+ * configuration into an actual value in SCTLR_EL1 that affects
+ * userspace.
+ */
+static void mte_update_sctlr_user(struct task_struct *task)
+{
+ /*
+ * This must be called with preemption disabled and can only be called
+ * on the current or next task since the CPU must match where the thread
+ * is going to run. The caller is responsible for calling
+ * update_sctlr_el1() later in the same preemption disabled block.
+ */
+ unsigned long sctlr = task->thread.sctlr_user;
+ unsigned long mte_ctrl = task->thread.mte_ctrl;
+ unsigned long pref, resolved_mte_tcf;
+
+ pref = __this_cpu_read(mte_tcf_preferred);
+ /*
+ * If there is no overlap between the system preferred and
+ * program requested values go with what was requested.
+ */
+ resolved_mte_tcf = (mte_ctrl & pref) ? pref : mte_ctrl;
+ sctlr &= ~SCTLR_EL1_TCF0_MASK;
+ /*
+ * Pick an actual setting. The order in which we check for
+ * set bits and map into register values determines our
+ * default order.
+ */
+ if (resolved_mte_tcf & MTE_CTRL_TCF_ASYMM)
+ sctlr |= SYS_FIELD_PREP_ENUM(SCTLR_EL1, TCF0, ASYMM);
+ else if (resolved_mte_tcf & MTE_CTRL_TCF_ASYNC)
+ sctlr |= SYS_FIELD_PREP_ENUM(SCTLR_EL1, TCF0, ASYNC);
+ else if (resolved_mte_tcf & MTE_CTRL_TCF_SYNC)
+ sctlr |= SYS_FIELD_PREP_ENUM(SCTLR_EL1, TCF0, SYNC);
+ task->thread.sctlr_user = sctlr;
+}
+
+static void mte_update_gcr_excl(struct task_struct *task)
+{
+ /*
+ * SYS_GCR_EL1 will be set to current->thread.mte_ctrl value by
+ * mte_set_user_gcr() in kernel_exit, but only if KASAN is enabled.
+ */
+ if (kasan_hw_tags_enabled())
+ return;
+
+ write_sysreg_s(
+ ((task->thread.mte_ctrl >> MTE_CTRL_GCR_USER_EXCL_SHIFT) &
+ SYS_GCR_EL1_EXCL_MASK) | SYS_GCR_EL1_RRND,
+ SYS_GCR_EL1);
+}
+
+#ifdef CONFIG_KASAN_HW_TAGS
+/* Only called from assembly, silence sparse */
+void __init kasan_hw_tags_enable(struct alt_instr *alt, __le32 *origptr,
+ __le32 *updptr, int nr_inst);
+
+void __init kasan_hw_tags_enable(struct alt_instr *alt, __le32 *origptr,
+ __le32 *updptr, int nr_inst)
+{
+ BUG_ON(nr_inst != 1); /* Branch -> NOP */
+
+ if (kasan_hw_tags_enabled())
+ *updptr = cpu_to_le32(aarch64_insn_gen_nop());
+}
+#endif
+
+void mte_thread_init_user(void)
+{
+ if (!system_supports_mte())
+ return;
+
+ /* clear any pending asynchronous tag fault */
+ dsb(ish);
+ write_sysreg_s(0, SYS_TFSRE0_EL1);
+ clear_thread_flag(TIF_MTE_ASYNC_FAULT);
+ /* disable tag checking and reset tag generation mask */
+ set_mte_ctrl(current, 0);
+}
+
+void mte_thread_switch(struct task_struct *next)
+{
+ if (!system_supports_mte())
+ return;
+
+ mte_update_sctlr_user(next);
+ mte_update_gcr_excl(next);
+
+ /* TCO may not have been disabled on exception entry for the current task. */
+ mte_disable_tco_entry(next);
+
+ /*
+ * Check if an async tag exception occurred at EL1.
+ *
+ * Note: On the context switch path we rely on the dsb() present
+ * in __switch_to() to guarantee that the indirect writes to TFSR_EL1
+ * are synchronized before this point.
+ */
+ isb();
+ mte_check_tfsr_el1();
+}
+
+void mte_cpu_setup(void)
+{
+ u64 rgsr;
+
+ /*
+ * CnP must be enabled only after the MAIR_EL1 register has been set
+ * up. Inconsistent MAIR_EL1 between CPUs sharing the same TLB may
+ * lead to the wrong memory type being used for a brief window during
+ * CPU power-up.
+ *
+ * CnP is not a boot feature so MTE gets enabled before CnP, but let's
+ * make sure that is the case.
+ */
+ BUG_ON(read_sysreg(ttbr0_el1) & TTBR_CNP_BIT);
+ BUG_ON(read_sysreg(ttbr1_el1) & TTBR_CNP_BIT);
+
+ /* Normal Tagged memory type at the corresponding MAIR index */
+ sysreg_clear_set(mair_el1,
+ MAIR_ATTRIDX(MAIR_ATTR_MASK, MT_NORMAL_TAGGED),
+ MAIR_ATTRIDX(MAIR_ATTR_NORMAL_TAGGED,
+ MT_NORMAL_TAGGED));
+
+ write_sysreg_s(KERNEL_GCR_EL1, SYS_GCR_EL1);
+
+ /*
+ * If GCR_EL1.RRND=1 is implemented the same way as RRND=0, then
+ * RGSR_EL1.SEED must be non-zero for IRG to produce
+ * pseudorandom numbers. As RGSR_EL1 is UNKNOWN out of reset, we
+ * must initialize it.
+ */
+ rgsr = (read_sysreg(CNTVCT_EL0) & SYS_RGSR_EL1_SEED_MASK) <<
+ SYS_RGSR_EL1_SEED_SHIFT;
+ if (rgsr == 0)
+ rgsr = 1 << SYS_RGSR_EL1_SEED_SHIFT;
+ write_sysreg_s(rgsr, SYS_RGSR_EL1);
+
+ /* clear any pending tag check faults in TFSR*_EL1 */
+ write_sysreg_s(0, SYS_TFSR_EL1);
+ write_sysreg_s(0, SYS_TFSRE0_EL1);
+
+ local_flush_tlb_all();
+}
+
+void mte_suspend_enter(void)
+{
+ if (!system_supports_mte())
+ return;
+
+ /*
+ * The barriers are required to guarantee that the indirect writes
+ * to TFSR_EL1 are synchronized before we report the state.
+ */
+ dsb(nsh);
+ isb();
+
+ /* Report SYS_TFSR_EL1 before suspend entry */
+ mte_check_tfsr_el1();
+}
+
+void mte_suspend_exit(void)
+{
+ if (!system_supports_mte())
+ return;
+
+ mte_cpu_setup();
+}
+
+long set_mte_ctrl(struct task_struct *task, unsigned long arg)
+{
+ u64 mte_ctrl = (~((arg & PR_MTE_TAG_MASK) >> PR_MTE_TAG_SHIFT) &
+ SYS_GCR_EL1_EXCL_MASK) << MTE_CTRL_GCR_USER_EXCL_SHIFT;
+
+ if (!system_supports_mte())
+ return 0;
+
+ if (arg & PR_MTE_TCF_ASYNC)
+ mte_ctrl |= MTE_CTRL_TCF_ASYNC;
+ if (arg & PR_MTE_TCF_SYNC)
+ mte_ctrl |= MTE_CTRL_TCF_SYNC;
+
+ /*
+ * If the system supports it and both sync and async modes are
+ * specified then implicitly enable asymmetric mode.
+ * Userspace could see a mix of both sync and async anyway due
+ * to differing or changing defaults on CPUs.
+ */
+ if (cpus_have_cap(ARM64_MTE_ASYMM) &&
+ (arg & PR_MTE_TCF_ASYNC) &&
+ (arg & PR_MTE_TCF_SYNC))
+ mte_ctrl |= MTE_CTRL_TCF_ASYMM;
+
+ task->thread.mte_ctrl = mte_ctrl;
+ if (task == current) {
+ preempt_disable();
+ mte_update_sctlr_user(task);
+ mte_update_gcr_excl(task);
+ update_sctlr_el1(task->thread.sctlr_user);
+ preempt_enable();
+ }
+
+ return 0;
+}
+
+long get_mte_ctrl(struct task_struct *task)
+{
+ unsigned long ret;
+ u64 mte_ctrl = task->thread.mte_ctrl;
+ u64 incl = (~mte_ctrl >> MTE_CTRL_GCR_USER_EXCL_SHIFT) &
+ SYS_GCR_EL1_EXCL_MASK;
+
+ if (!system_supports_mte())
+ return 0;
+
+ ret = incl << PR_MTE_TAG_SHIFT;
+ if (mte_ctrl & MTE_CTRL_TCF_ASYNC)
+ ret |= PR_MTE_TCF_ASYNC;
+ if (mte_ctrl & MTE_CTRL_TCF_SYNC)
+ ret |= PR_MTE_TCF_SYNC;
+
+ return ret;
+}
+
+/*
+ * Access MTE tags in another process' address space as given in mm. Update
+ * the number of tags copied. Return 0 if any tags copied, error otherwise.
+ * Inspired by __access_remote_vm().
+ */
+static int __access_remote_tags(struct mm_struct *mm, unsigned long addr,
+ struct iovec *kiov, unsigned int gup_flags)
+{
+ void __user *buf = kiov->iov_base;
+ size_t len = kiov->iov_len;
+ int err = 0;
+ int write = gup_flags & FOLL_WRITE;
+
+ if (!access_ok(buf, len))
+ return -EFAULT;
+
+ if (mmap_read_lock_killable(mm))
+ return -EIO;
+
+ while (len) {
+ struct vm_area_struct *vma;
+ unsigned long tags, offset;
+ void *maddr;
+ struct page *page = get_user_page_vma_remote(mm, addr,
+ gup_flags, &vma);
+
+ if (IS_ERR(page)) {
+ err = PTR_ERR(page);
+ break;
+ }
+
+ /*
+ * Only copy tags if the page has been mapped as PROT_MTE
+ * (PG_mte_tagged set). Otherwise the tags are not valid and
+ * not accessible to user. Moreover, an mprotect(PROT_MTE)
+ * would cause the existing tags to be cleared if the page
+ * was never mapped with PROT_MTE.
+ */
+ if (!(vma->vm_flags & VM_MTE)) {
+ err = -EOPNOTSUPP;
+ put_page(page);
+ break;
+ }
+ WARN_ON_ONCE(!page_mte_tagged(page));
+
+ /* limit access to the end of the page */
+ offset = offset_in_page(addr);
+ tags = min(len, (PAGE_SIZE - offset) / MTE_GRANULE_SIZE);
+
+ maddr = page_address(page);
+ if (write) {
+ tags = mte_copy_tags_from_user(maddr + offset, buf, tags);
+ set_page_dirty_lock(page);
+ } else {
+ tags = mte_copy_tags_to_user(buf, maddr + offset, tags);
+ }
+ put_page(page);
+
+ /* error accessing the tracer's buffer */
+ if (!tags)
+ break;
+
+ len -= tags;
+ buf += tags;
+ addr += tags * MTE_GRANULE_SIZE;
+ }
+ mmap_read_unlock(mm);
+
+ /* return an error if no tags copied */
+ kiov->iov_len = buf - kiov->iov_base;
+ if (!kiov->iov_len) {
+ /* check for error accessing the tracee's address space */
+ if (err)
+ return -EIO;
+ else
+ return -EFAULT;
+ }
+
+ return 0;
+}
+
+/*
+ * Copy MTE tags in another process' address space at 'addr' to/from tracer's
+ * iovec buffer. Return 0 on success. Inspired by ptrace_access_vm().
+ */
+static int access_remote_tags(struct task_struct *tsk, unsigned long addr,
+ struct iovec *kiov, unsigned int gup_flags)
+{
+ struct mm_struct *mm;
+ int ret;
+
+ mm = get_task_mm(tsk);
+ if (!mm)
+ return -EPERM;
+
+ if (!tsk->ptrace || (current != tsk->parent) ||
+ ((get_dumpable(mm) != SUID_DUMP_USER) &&
+ !ptracer_capable(tsk, mm->user_ns))) {
+ mmput(mm);
+ return -EPERM;
+ }
+
+ ret = __access_remote_tags(mm, addr, kiov, gup_flags);
+ mmput(mm);
+
+ return ret;
+}
+
+int mte_ptrace_copy_tags(struct task_struct *child, long request,
+ unsigned long addr, unsigned long data)
+{
+ int ret;
+ struct iovec kiov;
+ struct iovec __user *uiov = (void __user *)data;
+ unsigned int gup_flags = FOLL_FORCE;
+
+ if (!system_supports_mte())
+ return -EIO;
+
+ if (get_user(kiov.iov_base, &uiov->iov_base) ||
+ get_user(kiov.iov_len, &uiov->iov_len))
+ return -EFAULT;
+
+ if (request == PTRACE_POKEMTETAGS)
+ gup_flags |= FOLL_WRITE;
+
+ /* align addr to the MTE tag granule */
+ addr &= MTE_GRANULE_MASK;
+
+ ret = access_remote_tags(child, addr, &kiov, gup_flags);
+ if (!ret)
+ ret = put_user(kiov.iov_len, &uiov->iov_len);
+
+ return ret;
+}
+
+static ssize_t mte_tcf_preferred_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ switch (per_cpu(mte_tcf_preferred, dev->id)) {
+ case MTE_CTRL_TCF_ASYNC:
+ return sysfs_emit(buf, "async\n");
+ case MTE_CTRL_TCF_SYNC:
+ return sysfs_emit(buf, "sync\n");
+ case MTE_CTRL_TCF_ASYMM:
+ return sysfs_emit(buf, "asymm\n");
+ default:
+ return sysfs_emit(buf, "???\n");
+ }
+}
+
+static ssize_t mte_tcf_preferred_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ u64 tcf;
+
+ if (sysfs_streq(buf, "async"))
+ tcf = MTE_CTRL_TCF_ASYNC;
+ else if (sysfs_streq(buf, "sync"))
+ tcf = MTE_CTRL_TCF_SYNC;
+ else if (cpus_have_cap(ARM64_MTE_ASYMM) && sysfs_streq(buf, "asymm"))
+ tcf = MTE_CTRL_TCF_ASYMM;
+ else
+ return -EINVAL;
+
+ device_lock(dev);
+ per_cpu(mte_tcf_preferred, dev->id) = tcf;
+ device_unlock(dev);
+
+ return count;
+}
+static DEVICE_ATTR_RW(mte_tcf_preferred);
+
+static int register_mte_tcf_preferred_sysctl(void)
+{
+ unsigned int cpu;
+
+ if (!system_supports_mte())
+ return 0;
+
+ for_each_possible_cpu(cpu) {
+ per_cpu(mte_tcf_preferred, cpu) = MTE_CTRL_TCF_ASYNC;
+ device_create_file(get_cpu_device(cpu),
+ &dev_attr_mte_tcf_preferred);
+ }
+
+ return 0;
+}
+subsys_initcall(register_mte_tcf_preferred_sysctl);
+
+/*
+ * Return 0 on success, the number of bytes not probed otherwise.
+ */
+size_t mte_probe_user_range(const char __user *uaddr, size_t size)
+{
+ const char __user *end = uaddr + size;
+ int err = 0;
+ char val;
+
+ __raw_get_user(val, uaddr, err);
+ if (err)
+ return size;
+
+ uaddr = PTR_ALIGN(uaddr, MTE_GRANULE_SIZE);
+ while (uaddr < end) {
+ /*
+ * A read is sufficient for mte, the caller should have probed
+ * for the pte write permission if required.
+ */
+ __raw_get_user(val, uaddr, err);
+ if (err)
+ return end - uaddr;
+ uaddr += MTE_GRANULE_SIZE;
+ }
+ (void)val;
+
+ return 0;
+}
diff --git a/arch/arm64/kernel/paravirt.c b/arch/arm64/kernel/paravirt.c
index 1ef702b0be2d..aa718d6a9274 100644
--- a/arch/arm64/kernel/paravirt.c
+++ b/arch/arm64/kernel/paravirt.c
@@ -18,6 +18,7 @@
#include <linux/reboot.h>
#include <linux/slab.h>
#include <linux/types.h>
+#include <linux/static_call.h>
#include <asm/paravirt.h>
#include <asm/pvclock-abi.h>
@@ -26,11 +27,15 @@
struct static_key paravirt_steal_enabled;
struct static_key paravirt_steal_rq_enabled;
-struct paravirt_patch_template pv_ops;
-EXPORT_SYMBOL_GPL(pv_ops);
+static u64 native_steal_clock(int cpu)
+{
+ return 0;
+}
+
+DEFINE_STATIC_CALL(pv_steal_clock, native_steal_clock);
struct pv_time_stolen_time_region {
- struct pvclock_vcpu_stolen_time *kaddr;
+ struct pvclock_vcpu_stolen_time __rcu *kaddr;
};
static DEFINE_PER_CPU(struct pv_time_stolen_time_region, stolen_time_region);
@@ -45,36 +50,50 @@ static int __init parse_no_stealacc(char *arg)
early_param("no-steal-acc", parse_no_stealacc);
/* return stolen time in ns by asking the hypervisor */
-static u64 pv_steal_clock(int cpu)
+static u64 para_steal_clock(int cpu)
{
+ struct pvclock_vcpu_stolen_time *kaddr = NULL;
struct pv_time_stolen_time_region *reg;
+ u64 ret = 0;
reg = per_cpu_ptr(&stolen_time_region, cpu);
- if (!reg->kaddr) {
- pr_warn_once("stolen time enabled but not configured for cpu %d\n",
- cpu);
+
+ /*
+ * paravirt_steal_clock() may be called before the CPU
+ * online notification callback runs. Until the callback
+ * has run we just return zero.
+ */
+ rcu_read_lock();
+ kaddr = rcu_dereference(reg->kaddr);
+ if (!kaddr) {
+ rcu_read_unlock();
return 0;
}
- return le64_to_cpu(READ_ONCE(reg->kaddr->stolen_time));
+ ret = le64_to_cpu(READ_ONCE(kaddr->stolen_time));
+ rcu_read_unlock();
+ return ret;
}
-static int stolen_time_dying_cpu(unsigned int cpu)
+static int stolen_time_cpu_down_prepare(unsigned int cpu)
{
+ struct pvclock_vcpu_stolen_time *kaddr = NULL;
struct pv_time_stolen_time_region *reg;
reg = this_cpu_ptr(&stolen_time_region);
if (!reg->kaddr)
return 0;
- memunmap(reg->kaddr);
- memset(reg, 0, sizeof(*reg));
+ kaddr = rcu_replace_pointer(reg->kaddr, NULL, true);
+ synchronize_rcu();
+ memunmap(kaddr);
return 0;
}
-static int init_stolen_time_cpu(unsigned int cpu)
+static int stolen_time_cpu_online(unsigned int cpu)
{
+ struct pvclock_vcpu_stolen_time *kaddr = NULL;
struct pv_time_stolen_time_region *reg;
struct arm_smccc_res res;
@@ -85,17 +104,19 @@ static int init_stolen_time_cpu(unsigned int cpu)
if (res.a0 == SMCCC_RET_NOT_SUPPORTED)
return -EINVAL;
- reg->kaddr = memremap(res.a0,
+ kaddr = memremap(res.a0,
sizeof(struct pvclock_vcpu_stolen_time),
MEMREMAP_WB);
+ rcu_assign_pointer(reg->kaddr, kaddr);
+
if (!reg->kaddr) {
pr_warn("Failed to map stolen time data structure\n");
return -ENOMEM;
}
- if (le32_to_cpu(reg->kaddr->revision) != 0 ||
- le32_to_cpu(reg->kaddr->attributes) != 0) {
+ if (le32_to_cpu(kaddr->revision) != 0 ||
+ le32_to_cpu(kaddr->attributes) != 0) {
pr_warn_once("Unexpected revision or attributes in stolen time data\n");
return -ENXIO;
}
@@ -103,26 +124,23 @@ static int init_stolen_time_cpu(unsigned int cpu)
return 0;
}
-static int pv_time_init_stolen_time(void)
+static int __init pv_time_init_stolen_time(void)
{
int ret;
- ret = cpuhp_setup_state(CPUHP_AP_ARM_KVMPV_STARTING,
- "hypervisor/arm/pvtime:starting",
- init_stolen_time_cpu, stolen_time_dying_cpu);
+ ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN,
+ "hypervisor/arm/pvtime:online",
+ stolen_time_cpu_online,
+ stolen_time_cpu_down_prepare);
if (ret < 0)
return ret;
return 0;
}
-static bool has_pv_steal_clock(void)
+static bool __init has_pv_steal_clock(void)
{
struct arm_smccc_res res;
- /* To detect the presence of PV time support we require SMCCC 1.1+ */
- if (psci_ops.smccc_version < SMCCC_VERSION_1_1)
- return false;
-
arm_smccc_1_1_invoke(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
ARM_SMCCC_HV_PV_TIME_FEATURES, &res);
@@ -146,7 +164,7 @@ int __init pv_time_init(void)
if (ret)
return ret;
- pv_ops.time.steal_clock = pv_steal_clock;
+ static_call_update(pv_steal_clock, para_steal_clock);
static_key_slow_inc(&paravirt_steal_enabled);
if (steal_acc)
diff --git a/arch/arm64/kernel/patch-scs.c b/arch/arm64/kernel/patch-scs.c
new file mode 100644
index 000000000000..a1fe4b4ff591
--- /dev/null
+++ b/arch/arm64/kernel/patch-scs.c
@@ -0,0 +1,262 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2022 - Google LLC
+ * Author: Ard Biesheuvel <ardb@google.com>
+ */
+
+#include <linux/bug.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/linkage.h>
+#include <linux/printk.h>
+#include <linux/types.h>
+
+#include <asm/cacheflush.h>
+#include <asm/scs.h>
+
+//
+// This minimal DWARF CFI parser is partially based on the code in
+// arch/arc/kernel/unwind.c, and on the document below:
+// https://refspecs.linuxbase.org/LSB_4.0.0/LSB-Core-generic/LSB-Core-generic/ehframechpt.html
+//
+
+#define DW_CFA_nop 0x00
+#define DW_CFA_set_loc 0x01
+#define DW_CFA_advance_loc1 0x02
+#define DW_CFA_advance_loc2 0x03
+#define DW_CFA_advance_loc4 0x04
+#define DW_CFA_offset_extended 0x05
+#define DW_CFA_restore_extended 0x06
+#define DW_CFA_undefined 0x07
+#define DW_CFA_same_value 0x08
+#define DW_CFA_register 0x09
+#define DW_CFA_remember_state 0x0a
+#define DW_CFA_restore_state 0x0b
+#define DW_CFA_def_cfa 0x0c
+#define DW_CFA_def_cfa_register 0x0d
+#define DW_CFA_def_cfa_offset 0x0e
+#define DW_CFA_def_cfa_expression 0x0f
+#define DW_CFA_expression 0x10
+#define DW_CFA_offset_extended_sf 0x11
+#define DW_CFA_def_cfa_sf 0x12
+#define DW_CFA_def_cfa_offset_sf 0x13
+#define DW_CFA_val_offset 0x14
+#define DW_CFA_val_offset_sf 0x15
+#define DW_CFA_val_expression 0x16
+#define DW_CFA_lo_user 0x1c
+#define DW_CFA_negate_ra_state 0x2d
+#define DW_CFA_GNU_args_size 0x2e
+#define DW_CFA_GNU_negative_offset_extended 0x2f
+#define DW_CFA_hi_user 0x3f
+
+extern const u8 __eh_frame_start[], __eh_frame_end[];
+
+enum {
+ PACIASP = 0xd503233f,
+ AUTIASP = 0xd50323bf,
+ SCS_PUSH = 0xf800865e,
+ SCS_POP = 0xf85f8e5e,
+};
+
+static void __always_inline scs_patch_loc(u64 loc)
+{
+ u32 insn = le32_to_cpup((void *)loc);
+
+ switch (insn) {
+ case PACIASP:
+ *(u32 *)loc = cpu_to_le32(SCS_PUSH);
+ break;
+ case AUTIASP:
+ *(u32 *)loc = cpu_to_le32(SCS_POP);
+ break;
+ default:
+ /*
+ * While the DW_CFA_negate_ra_state directive is guaranteed to
+ * appear right after a PACIASP/AUTIASP instruction, it may
+ * also appear after a DW_CFA_restore_state directive that
+ * restores a state that is only partially accurate, and is
+ * followed by DW_CFA_negate_ra_state directive to toggle the
+ * PAC bit again. So we permit other instructions here, and ignore
+ * them.
+ */
+ return;
+ }
+ dcache_clean_pou(loc, loc + sizeof(u32));
+}
+
+/*
+ * Skip one uleb128/sleb128 encoded quantity from the opcode stream. All bytes
+ * except the last one have bit #7 set.
+ */
+static int __always_inline skip_xleb128(const u8 **opcode, int size)
+{
+ u8 c;
+
+ do {
+ c = *(*opcode)++;
+ size--;
+ } while (c & BIT(7));
+
+ return size;
+}
+
+struct eh_frame {
+ /*
+ * The size of this frame if 0 < size < U32_MAX, 0 terminates the list.
+ */
+ u32 size;
+
+ /*
+ * The first frame is a Common Information Entry (CIE) frame, followed
+ * by one or more Frame Description Entry (FDE) frames. In the former
+ * case, this field is 0, otherwise it is the negated offset relative
+ * to the associated CIE frame.
+ */
+ u32 cie_id_or_pointer;
+
+ union {
+ struct { // CIE
+ u8 version;
+ u8 augmentation_string[];
+ };
+
+ struct { // FDE
+ s32 initial_loc;
+ s32 range;
+ u8 opcodes[];
+ };
+ };
+};
+
+static int noinstr scs_handle_fde_frame(const struct eh_frame *frame,
+ bool fde_has_augmentation_data,
+ int code_alignment_factor,
+ bool dry_run)
+{
+ int size = frame->size - offsetof(struct eh_frame, opcodes) + 4;
+ u64 loc = (u64)offset_to_ptr(&frame->initial_loc);
+ const u8 *opcode = frame->opcodes;
+
+ if (fde_has_augmentation_data) {
+ int l;
+
+ // assume single byte uleb128_t
+ if (WARN_ON(*opcode & BIT(7)))
+ return -ENOEXEC;
+
+ l = *opcode++;
+ opcode += l;
+ size -= l + 1;
+ }
+
+ /*
+ * Starting from 'loc', apply the CFA opcodes that advance the location
+ * pointer, and identify the locations of the PAC instructions.
+ */
+ while (size-- > 0) {
+ switch (*opcode++) {
+ case DW_CFA_nop:
+ case DW_CFA_remember_state:
+ case DW_CFA_restore_state:
+ break;
+
+ case DW_CFA_advance_loc1:
+ loc += *opcode++ * code_alignment_factor;
+ size--;
+ break;
+
+ case DW_CFA_advance_loc2:
+ loc += *opcode++ * code_alignment_factor;
+ loc += (*opcode++ << 8) * code_alignment_factor;
+ size -= 2;
+ break;
+
+ case DW_CFA_def_cfa:
+ case DW_CFA_offset_extended:
+ size = skip_xleb128(&opcode, size);
+ fallthrough;
+ case DW_CFA_def_cfa_offset:
+ case DW_CFA_def_cfa_offset_sf:
+ case DW_CFA_def_cfa_register:
+ case DW_CFA_same_value:
+ case DW_CFA_restore_extended:
+ case 0x80 ... 0xbf:
+ size = skip_xleb128(&opcode, size);
+ break;
+
+ case DW_CFA_negate_ra_state:
+ if (!dry_run)
+ scs_patch_loc(loc - 4);
+ break;
+
+ case 0x40 ... 0x7f:
+ // advance loc
+ loc += (opcode[-1] & 0x3f) * code_alignment_factor;
+ break;
+
+ case 0xc0 ... 0xff:
+ break;
+
+ default:
+ pr_err("unhandled opcode: %02x in FDE frame %lx\n", opcode[-1], (uintptr_t)frame);
+ return -ENOEXEC;
+ }
+ }
+ return 0;
+}
+
+int noinstr scs_patch(const u8 eh_frame[], int size)
+{
+ const u8 *p = eh_frame;
+
+ while (size > 4) {
+ const struct eh_frame *frame = (const void *)p;
+ bool fde_has_augmentation_data = true;
+ int code_alignment_factor = 1;
+ int ret;
+
+ if (frame->size == 0 ||
+ frame->size == U32_MAX ||
+ frame->size > size)
+ break;
+
+ if (frame->cie_id_or_pointer == 0) {
+ const u8 *p = frame->augmentation_string;
+
+ /* a 'z' in the augmentation string must come first */
+ fde_has_augmentation_data = *p == 'z';
+
+ /*
+ * The code alignment factor is a uleb128 encoded field
+ * but given that the only sensible values are 1 or 4,
+ * there is no point in decoding the whole thing.
+ */
+ p += strlen(p) + 1;
+ if (!WARN_ON(*p & BIT(7)))
+ code_alignment_factor = *p;
+ } else {
+ ret = scs_handle_fde_frame(frame,
+ fde_has_augmentation_data,
+ code_alignment_factor,
+ true);
+ if (ret)
+ return ret;
+ scs_handle_fde_frame(frame, fde_has_augmentation_data,
+ code_alignment_factor, false);
+ }
+
+ p += sizeof(frame->size) + frame->size;
+ size -= sizeof(frame->size) + frame->size;
+ }
+ return 0;
+}
+
+asmlinkage void __init scs_patch_vmlinux(void)
+{
+ if (!should_patch_pac_into_scs())
+ return;
+
+ WARN_ON(scs_patch(__eh_frame_start, __eh_frame_end - __eh_frame_start));
+ icache_inval_all_pou();
+ isb();
+}
diff --git a/arch/arm64/kernel/patching.c b/arch/arm64/kernel/patching.c
new file mode 100644
index 000000000000..b4835f6d594b
--- /dev/null
+++ b/arch/arm64/kernel/patching.c
@@ -0,0 +1,167 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/spinlock.h>
+#include <linux/stop_machine.h>
+#include <linux/uaccess.h>
+
+#include <asm/cacheflush.h>
+#include <asm/fixmap.h>
+#include <asm/insn.h>
+#include <asm/kprobes.h>
+#include <asm/patching.h>
+#include <asm/sections.h>
+
+static DEFINE_RAW_SPINLOCK(patch_lock);
+
+static bool is_exit_text(unsigned long addr)
+{
+ /* discarded with init text/data */
+ return system_state < SYSTEM_RUNNING &&
+ addr >= (unsigned long)__exittext_begin &&
+ addr < (unsigned long)__exittext_end;
+}
+
+static bool is_image_text(unsigned long addr)
+{
+ return core_kernel_text(addr) || is_exit_text(addr);
+}
+
+static void __kprobes *patch_map(void *addr, int fixmap)
+{
+ unsigned long uintaddr = (uintptr_t) addr;
+ bool image = is_image_text(uintaddr);
+ struct page *page;
+
+ if (image)
+ page = phys_to_page(__pa_symbol(addr));
+ else if (IS_ENABLED(CONFIG_STRICT_MODULE_RWX))
+ page = vmalloc_to_page(addr);
+ else
+ return addr;
+
+ BUG_ON(!page);
+ return (void *)set_fixmap_offset(fixmap, page_to_phys(page) +
+ (uintaddr & ~PAGE_MASK));
+}
+
+static void __kprobes patch_unmap(int fixmap)
+{
+ clear_fixmap(fixmap);
+}
+/*
+ * In ARMv8-A, A64 instructions have a fixed length of 32 bits and are always
+ * little-endian.
+ */
+int __kprobes aarch64_insn_read(void *addr, u32 *insnp)
+{
+ int ret;
+ __le32 val;
+
+ ret = copy_from_kernel_nofault(&val, addr, AARCH64_INSN_SIZE);
+ if (!ret)
+ *insnp = le32_to_cpu(val);
+
+ return ret;
+}
+
+static int __kprobes __aarch64_insn_write(void *addr, __le32 insn)
+{
+ void *waddr = addr;
+ unsigned long flags = 0;
+ int ret;
+
+ raw_spin_lock_irqsave(&patch_lock, flags);
+ waddr = patch_map(addr, FIX_TEXT_POKE0);
+
+ ret = copy_to_kernel_nofault(waddr, &insn, AARCH64_INSN_SIZE);
+
+ patch_unmap(FIX_TEXT_POKE0);
+ raw_spin_unlock_irqrestore(&patch_lock, flags);
+
+ return ret;
+}
+
+int __kprobes aarch64_insn_write(void *addr, u32 insn)
+{
+ return __aarch64_insn_write(addr, cpu_to_le32(insn));
+}
+
+noinstr int aarch64_insn_write_literal_u64(void *addr, u64 val)
+{
+ u64 *waddr;
+ unsigned long flags;
+ int ret;
+
+ raw_spin_lock_irqsave(&patch_lock, flags);
+ waddr = patch_map(addr, FIX_TEXT_POKE0);
+
+ ret = copy_to_kernel_nofault(waddr, &val, sizeof(val));
+
+ patch_unmap(FIX_TEXT_POKE0);
+ raw_spin_unlock_irqrestore(&patch_lock, flags);
+
+ return ret;
+}
+
+int __kprobes aarch64_insn_patch_text_nosync(void *addr, u32 insn)
+{
+ u32 *tp = addr;
+ int ret;
+
+ /* A64 instructions must be word aligned */
+ if ((uintptr_t)tp & 0x3)
+ return -EINVAL;
+
+ ret = aarch64_insn_write(tp, insn);
+ if (ret == 0)
+ caches_clean_inval_pou((uintptr_t)tp,
+ (uintptr_t)tp + AARCH64_INSN_SIZE);
+
+ return ret;
+}
+
+struct aarch64_insn_patch {
+ void **text_addrs;
+ u32 *new_insns;
+ int insn_cnt;
+ atomic_t cpu_count;
+};
+
+static int __kprobes aarch64_insn_patch_text_cb(void *arg)
+{
+ int i, ret = 0;
+ struct aarch64_insn_patch *pp = arg;
+
+ /* The last CPU becomes master */
+ if (atomic_inc_return(&pp->cpu_count) == num_online_cpus()) {
+ for (i = 0; ret == 0 && i < pp->insn_cnt; i++)
+ ret = aarch64_insn_patch_text_nosync(pp->text_addrs[i],
+ pp->new_insns[i]);
+ /* Notify other processors with an additional increment. */
+ atomic_inc(&pp->cpu_count);
+ } else {
+ while (atomic_read(&pp->cpu_count) <= num_online_cpus())
+ cpu_relax();
+ isb();
+ }
+
+ return ret;
+}
+
+int __kprobes aarch64_insn_patch_text(void *addrs[], u32 insns[], int cnt)
+{
+ struct aarch64_insn_patch patch = {
+ .text_addrs = addrs,
+ .new_insns = insns,
+ .insn_cnt = cnt,
+ .cpu_count = ATOMIC_INIT(0),
+ };
+
+ if (cnt <= 0)
+ return -EINVAL;
+
+ return stop_machine_cpuslocked(aarch64_insn_patch_text_cb, &patch,
+ cpu_online_mask);
+}
diff --git a/arch/arm64/kernel/pci.c b/arch/arm64/kernel/pci.c
index 570988c7a7ff..f872c57e9909 100644
--- a/arch/arm64/kernel/pci.c
+++ b/arch/arm64/kernel/pci.c
@@ -11,8 +11,6 @@
#include <linux/io.h>
#include <linux/kernel.h>
#include <linux/mm.h>
-#include <linux/of_pci.h>
-#include <linux/of_platform.h>
#include <linux/pci.h>
#include <linux/pci-acpi.h>
#include <linux/pci-ecam.h>
@@ -82,14 +80,29 @@ int acpi_pci_bus_find_domain_nr(struct pci_bus *bus)
int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge)
{
- if (!acpi_disabled) {
- struct pci_config_window *cfg = bridge->bus->sysdata;
- struct acpi_device *adev = to_acpi_device(cfg->parent);
- struct device *bus_dev = &bridge->bus->dev;
+ struct pci_config_window *cfg;
+ struct acpi_device *adev;
+ struct device *bus_dev;
- ACPI_COMPANION_SET(&bridge->dev, adev);
- set_dev_node(bus_dev, acpi_get_node(acpi_device_handle(adev)));
- }
+ if (acpi_disabled)
+ return 0;
+
+ cfg = bridge->bus->sysdata;
+
+ /*
+ * On Hyper-V there is no corresponding ACPI device for a root bridge,
+ * therefore ->parent is set as NULL by the driver. And set 'adev' as
+ * NULL in this case because there is no proper ACPI device.
+ */
+ if (!cfg->parent)
+ adev = NULL;
+ else
+ adev = to_acpi_device(cfg->parent);
+
+ bus_dev = &bridge->bus->dev;
+
+ ACPI_COMPANION_SET(&bridge->dev, adev);
+ set_dev_node(bus_dev, acpi_get_node(acpi_device_handle(adev)));
return 0;
}
@@ -117,7 +130,7 @@ pci_acpi_setup_ecam_mapping(struct acpi_pci_root *root)
struct device *dev = &root->device->dev;
struct resource *bus_res = &root->secondary;
u16 seg = root->segment;
- struct pci_ecam_ops *ecam_ops;
+ const struct pci_ecam_ops *ecam_ops;
struct resource cfgres;
struct acpi_device *adev;
struct pci_config_window *cfg;
@@ -185,7 +198,7 @@ struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root)
root_ops->release_info = pci_acpi_generic_release_info;
root_ops->prepare_resources = pci_acpi_root_prepare_resources;
- root_ops->pci_ops = &ri->cfg->ops->pci_ops;
+ root_ops->pci_ops = (struct pci_ops *)&ri->cfg->ops->pci_ops;
bus = acpi_pci_root_create(root, root_ops, &ri->common, ri->cfg);
if (!bus)
return NULL;
diff --git a/arch/arm64/kernel/perf_callchain.c b/arch/arm64/kernel/perf_callchain.c
index b0e03e052dd1..6d157f32187b 100644
--- a/arch/arm64/kernel/perf_callchain.c
+++ b/arch/arm64/kernel/perf_callchain.c
@@ -5,10 +5,10 @@
* Copyright (C) 2015 ARM Limited
*/
#include <linux/perf_event.h>
+#include <linux/stacktrace.h>
#include <linux/uaccess.h>
#include <asm/pointer_auth.h>
-#include <asm/stacktrace.h>
struct frame_tail {
struct frame_tail __user *fp;
@@ -38,7 +38,7 @@ user_backtrace(struct frame_tail __user *tail,
if (err)
return NULL;
- lr = ptrauth_strip_insn_pac(buftail.lr);
+ lr = ptrauth_strip_user_insn_pac(buftail.lr);
perf_callchain_store(entry, lr);
@@ -102,7 +102,7 @@ compat_user_backtrace(struct compat_frame_tail __user *tail,
void perf_callchain_user(struct perf_callchain_entry_ctx *entry,
struct pt_regs *regs)
{
- if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
+ if (perf_guest_state()) {
/* We don't support guest os callchain now */
return;
}
@@ -116,7 +116,7 @@ void perf_callchain_user(struct perf_callchain_entry_ctx *entry,
tail = (struct frame_tail __user *)regs->regs[29];
while (entry->nr < entry->max_stack &&
- tail && !((unsigned long)tail & 0xf))
+ tail && !((unsigned long)tail & 0x7))
tail = user_backtrace(tail, entry);
} else {
#ifdef CONFIG_COMPAT
@@ -132,46 +132,38 @@ void perf_callchain_user(struct perf_callchain_entry_ctx *entry,
}
}
-/*
- * Gets called by walk_stackframe() for every stackframe. This will be called
- * whist unwinding the stackframe and is like a subroutine return so we use
- * the PC.
- */
-static int callchain_trace(struct stackframe *frame, void *data)
+static bool callchain_trace(void *data, unsigned long pc)
{
struct perf_callchain_entry_ctx *entry = data;
- perf_callchain_store(entry, frame->pc);
- return 0;
+ return perf_callchain_store(entry, pc) == 0;
}
void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry,
struct pt_regs *regs)
{
- struct stackframe frame;
-
- if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
+ if (perf_guest_state()) {
/* We don't support guest os callchain now */
return;
}
- start_backtrace(&frame, regs->regs[29], regs->pc);
- walk_stackframe(current, &frame, callchain_trace, entry);
+ arch_stack_walk(callchain_trace, entry, current, regs);
}
unsigned long perf_instruction_pointer(struct pt_regs *regs)
{
- if (perf_guest_cbs && perf_guest_cbs->is_in_guest())
- return perf_guest_cbs->get_guest_ip();
+ if (perf_guest_state())
+ return perf_guest_get_ip();
return instruction_pointer(regs);
}
unsigned long perf_misc_flags(struct pt_regs *regs)
{
+ unsigned int guest_state = perf_guest_state();
int misc = 0;
- if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
- if (perf_guest_cbs->is_user_mode())
+ if (guest_state) {
+ if (guest_state & PERF_GUEST_USER)
misc |= PERF_RECORD_MISC_GUEST_USER;
else
misc |= PERF_RECORD_MISC_GUEST_KERNEL;
diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
deleted file mode 100644
index e40b65645c86..000000000000
--- a/arch/arm64/kernel/perf_event.c
+++ /dev/null
@@ -1,1168 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * ARMv8 PMUv3 Performance Events handling code.
- *
- * Copyright (C) 2012 ARM Limited
- * Author: Will Deacon <will.deacon@arm.com>
- *
- * This code is based heavily on the ARMv7 perf event code.
- */
-
-#include <asm/irq_regs.h>
-#include <asm/perf_event.h>
-#include <asm/sysreg.h>
-#include <asm/virt.h>
-
-#include <linux/acpi.h>
-#include <linux/clocksource.h>
-#include <linux/kvm_host.h>
-#include <linux/of.h>
-#include <linux/perf/arm_pmu.h>
-#include <linux/platform_device.h>
-#include <linux/smp.h>
-
-/* ARMv8 Cortex-A53 specific event types. */
-#define ARMV8_A53_PERFCTR_PREF_LINEFILL 0xC2
-
-/* ARMv8 Cavium ThunderX specific event types. */
-#define ARMV8_THUNDER_PERFCTR_L1D_CACHE_MISS_ST 0xE9
-#define ARMV8_THUNDER_PERFCTR_L1D_CACHE_PREF_ACCESS 0xEA
-#define ARMV8_THUNDER_PERFCTR_L1D_CACHE_PREF_MISS 0xEB
-#define ARMV8_THUNDER_PERFCTR_L1I_CACHE_PREF_ACCESS 0xEC
-#define ARMV8_THUNDER_PERFCTR_L1I_CACHE_PREF_MISS 0xED
-
-/*
- * ARMv8 Architectural defined events, not all of these may
- * be supported on any given implementation. Unsupported events will
- * be disabled at run-time based on the PMCEID registers.
- */
-static const unsigned armv8_pmuv3_perf_map[PERF_COUNT_HW_MAX] = {
- PERF_MAP_ALL_UNSUPPORTED,
- [PERF_COUNT_HW_CPU_CYCLES] = ARMV8_PMUV3_PERFCTR_CPU_CYCLES,
- [PERF_COUNT_HW_INSTRUCTIONS] = ARMV8_PMUV3_PERFCTR_INST_RETIRED,
- [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV8_PMUV3_PERFCTR_L1D_CACHE,
- [PERF_COUNT_HW_CACHE_MISSES] = ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL,
- [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV8_PMUV3_PERFCTR_PC_WRITE_RETIRED,
- [PERF_COUNT_HW_BRANCH_MISSES] = ARMV8_PMUV3_PERFCTR_BR_MIS_PRED,
- [PERF_COUNT_HW_BUS_CYCLES] = ARMV8_PMUV3_PERFCTR_BUS_CYCLES,
- [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = ARMV8_PMUV3_PERFCTR_STALL_FRONTEND,
- [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = ARMV8_PMUV3_PERFCTR_STALL_BACKEND,
-};
-
-static const unsigned armv8_pmuv3_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
- [PERF_COUNT_HW_CACHE_OP_MAX]
- [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
- PERF_CACHE_MAP_ALL_UNSUPPORTED,
-
- [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1D_CACHE,
- [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL,
-
- [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1I_CACHE,
- [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1I_CACHE_REFILL,
-
- [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1D_TLB_REFILL,
- [C(DTLB)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1D_TLB,
-
- [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1I_TLB_REFILL,
- [C(ITLB)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1I_TLB,
-
- [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_BR_PRED,
- [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_BR_MIS_PRED,
-};
-
-static const unsigned armv8_a53_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
- [PERF_COUNT_HW_CACHE_OP_MAX]
- [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
- PERF_CACHE_MAP_ALL_UNSUPPORTED,
-
- [C(L1D)][C(OP_PREFETCH)][C(RESULT_MISS)] = ARMV8_A53_PERFCTR_PREF_LINEFILL,
-
- [C(NODE)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD,
- [C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR,
-};
-
-static const unsigned armv8_a57_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
- [PERF_COUNT_HW_CACHE_OP_MAX]
- [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
- PERF_CACHE_MAP_ALL_UNSUPPORTED,
-
- [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_RD,
- [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_RD,
- [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR,
- [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_WR,
-
- [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_RD,
- [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_WR,
-
- [C(NODE)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD,
- [C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR,
-};
-
-static const unsigned armv8_a73_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
- [PERF_COUNT_HW_CACHE_OP_MAX]
- [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
- PERF_CACHE_MAP_ALL_UNSUPPORTED,
-
- [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_RD,
- [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR,
-};
-
-static const unsigned armv8_thunder_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
- [PERF_COUNT_HW_CACHE_OP_MAX]
- [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
- PERF_CACHE_MAP_ALL_UNSUPPORTED,
-
- [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_RD,
- [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_RD,
- [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR,
- [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_THUNDER_PERFCTR_L1D_CACHE_MISS_ST,
- [C(L1D)][C(OP_PREFETCH)][C(RESULT_ACCESS)] = ARMV8_THUNDER_PERFCTR_L1D_CACHE_PREF_ACCESS,
- [C(L1D)][C(OP_PREFETCH)][C(RESULT_MISS)] = ARMV8_THUNDER_PERFCTR_L1D_CACHE_PREF_MISS,
-
- [C(L1I)][C(OP_PREFETCH)][C(RESULT_ACCESS)] = ARMV8_THUNDER_PERFCTR_L1I_CACHE_PREF_ACCESS,
- [C(L1I)][C(OP_PREFETCH)][C(RESULT_MISS)] = ARMV8_THUNDER_PERFCTR_L1I_CACHE_PREF_MISS,
-
- [C(DTLB)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_RD,
- [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_RD,
- [C(DTLB)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_WR,
- [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_WR,
-};
-
-static const unsigned armv8_vulcan_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
- [PERF_COUNT_HW_CACHE_OP_MAX]
- [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
- PERF_CACHE_MAP_ALL_UNSUPPORTED,
-
- [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_RD,
- [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_RD,
- [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR,
- [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_WR,
-
- [C(DTLB)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_RD,
- [C(DTLB)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_WR,
- [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_RD,
- [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_WR,
-
- [C(NODE)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD,
- [C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR,
-};
-
-static ssize_t
-armv8pmu_events_sysfs_show(struct device *dev,
- struct device_attribute *attr, char *page)
-{
- struct perf_pmu_events_attr *pmu_attr;
-
- pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr);
-
- return sprintf(page, "event=0x%03llx\n", pmu_attr->id);
-}
-
-#define ARMV8_EVENT_ATTR(name, config) \
- (&((struct perf_pmu_events_attr) { \
- .attr = __ATTR(name, 0444, armv8pmu_events_sysfs_show, NULL), \
- .id = config, \
- }).attr.attr)
-
-static struct attribute *armv8_pmuv3_event_attrs[] = {
- ARMV8_EVENT_ATTR(sw_incr, ARMV8_PMUV3_PERFCTR_SW_INCR),
- ARMV8_EVENT_ATTR(l1i_cache_refill, ARMV8_PMUV3_PERFCTR_L1I_CACHE_REFILL),
- ARMV8_EVENT_ATTR(l1i_tlb_refill, ARMV8_PMUV3_PERFCTR_L1I_TLB_REFILL),
- ARMV8_EVENT_ATTR(l1d_cache_refill, ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL),
- ARMV8_EVENT_ATTR(l1d_cache, ARMV8_PMUV3_PERFCTR_L1D_CACHE),
- ARMV8_EVENT_ATTR(l1d_tlb_refill, ARMV8_PMUV3_PERFCTR_L1D_TLB_REFILL),
- ARMV8_EVENT_ATTR(ld_retired, ARMV8_PMUV3_PERFCTR_LD_RETIRED),
- ARMV8_EVENT_ATTR(st_retired, ARMV8_PMUV3_PERFCTR_ST_RETIRED),
- ARMV8_EVENT_ATTR(inst_retired, ARMV8_PMUV3_PERFCTR_INST_RETIRED),
- ARMV8_EVENT_ATTR(exc_taken, ARMV8_PMUV3_PERFCTR_EXC_TAKEN),
- ARMV8_EVENT_ATTR(exc_return, ARMV8_PMUV3_PERFCTR_EXC_RETURN),
- ARMV8_EVENT_ATTR(cid_write_retired, ARMV8_PMUV3_PERFCTR_CID_WRITE_RETIRED),
- ARMV8_EVENT_ATTR(pc_write_retired, ARMV8_PMUV3_PERFCTR_PC_WRITE_RETIRED),
- ARMV8_EVENT_ATTR(br_immed_retired, ARMV8_PMUV3_PERFCTR_BR_IMMED_RETIRED),
- ARMV8_EVENT_ATTR(br_return_retired, ARMV8_PMUV3_PERFCTR_BR_RETURN_RETIRED),
- ARMV8_EVENT_ATTR(unaligned_ldst_retired, ARMV8_PMUV3_PERFCTR_UNALIGNED_LDST_RETIRED),
- ARMV8_EVENT_ATTR(br_mis_pred, ARMV8_PMUV3_PERFCTR_BR_MIS_PRED),
- ARMV8_EVENT_ATTR(cpu_cycles, ARMV8_PMUV3_PERFCTR_CPU_CYCLES),
- ARMV8_EVENT_ATTR(br_pred, ARMV8_PMUV3_PERFCTR_BR_PRED),
- ARMV8_EVENT_ATTR(mem_access, ARMV8_PMUV3_PERFCTR_MEM_ACCESS),
- ARMV8_EVENT_ATTR(l1i_cache, ARMV8_PMUV3_PERFCTR_L1I_CACHE),
- ARMV8_EVENT_ATTR(l1d_cache_wb, ARMV8_PMUV3_PERFCTR_L1D_CACHE_WB),
- ARMV8_EVENT_ATTR(l2d_cache, ARMV8_PMUV3_PERFCTR_L2D_CACHE),
- ARMV8_EVENT_ATTR(l2d_cache_refill, ARMV8_PMUV3_PERFCTR_L2D_CACHE_REFILL),
- ARMV8_EVENT_ATTR(l2d_cache_wb, ARMV8_PMUV3_PERFCTR_L2D_CACHE_WB),
- ARMV8_EVENT_ATTR(bus_access, ARMV8_PMUV3_PERFCTR_BUS_ACCESS),
- ARMV8_EVENT_ATTR(memory_error, ARMV8_PMUV3_PERFCTR_MEMORY_ERROR),
- ARMV8_EVENT_ATTR(inst_spec, ARMV8_PMUV3_PERFCTR_INST_SPEC),
- ARMV8_EVENT_ATTR(ttbr_write_retired, ARMV8_PMUV3_PERFCTR_TTBR_WRITE_RETIRED),
- ARMV8_EVENT_ATTR(bus_cycles, ARMV8_PMUV3_PERFCTR_BUS_CYCLES),
- /* Don't expose the chain event in /sys, since it's useless in isolation */
- ARMV8_EVENT_ATTR(l1d_cache_allocate, ARMV8_PMUV3_PERFCTR_L1D_CACHE_ALLOCATE),
- ARMV8_EVENT_ATTR(l2d_cache_allocate, ARMV8_PMUV3_PERFCTR_L2D_CACHE_ALLOCATE),
- ARMV8_EVENT_ATTR(br_retired, ARMV8_PMUV3_PERFCTR_BR_RETIRED),
- ARMV8_EVENT_ATTR(br_mis_pred_retired, ARMV8_PMUV3_PERFCTR_BR_MIS_PRED_RETIRED),
- ARMV8_EVENT_ATTR(stall_frontend, ARMV8_PMUV3_PERFCTR_STALL_FRONTEND),
- ARMV8_EVENT_ATTR(stall_backend, ARMV8_PMUV3_PERFCTR_STALL_BACKEND),
- ARMV8_EVENT_ATTR(l1d_tlb, ARMV8_PMUV3_PERFCTR_L1D_TLB),
- ARMV8_EVENT_ATTR(l1i_tlb, ARMV8_PMUV3_PERFCTR_L1I_TLB),
- ARMV8_EVENT_ATTR(l2i_cache, ARMV8_PMUV3_PERFCTR_L2I_CACHE),
- ARMV8_EVENT_ATTR(l2i_cache_refill, ARMV8_PMUV3_PERFCTR_L2I_CACHE_REFILL),
- ARMV8_EVENT_ATTR(l3d_cache_allocate, ARMV8_PMUV3_PERFCTR_L3D_CACHE_ALLOCATE),
- ARMV8_EVENT_ATTR(l3d_cache_refill, ARMV8_PMUV3_PERFCTR_L3D_CACHE_REFILL),
- ARMV8_EVENT_ATTR(l3d_cache, ARMV8_PMUV3_PERFCTR_L3D_CACHE),
- ARMV8_EVENT_ATTR(l3d_cache_wb, ARMV8_PMUV3_PERFCTR_L3D_CACHE_WB),
- ARMV8_EVENT_ATTR(l2d_tlb_refill, ARMV8_PMUV3_PERFCTR_L2D_TLB_REFILL),
- ARMV8_EVENT_ATTR(l2i_tlb_refill, ARMV8_PMUV3_PERFCTR_L2I_TLB_REFILL),
- ARMV8_EVENT_ATTR(l2d_tlb, ARMV8_PMUV3_PERFCTR_L2D_TLB),
- ARMV8_EVENT_ATTR(l2i_tlb, ARMV8_PMUV3_PERFCTR_L2I_TLB),
- ARMV8_EVENT_ATTR(remote_access, ARMV8_PMUV3_PERFCTR_REMOTE_ACCESS),
- ARMV8_EVENT_ATTR(ll_cache, ARMV8_PMUV3_PERFCTR_LL_CACHE),
- ARMV8_EVENT_ATTR(ll_cache_miss, ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS),
- ARMV8_EVENT_ATTR(dtlb_walk, ARMV8_PMUV3_PERFCTR_DTLB_WALK),
- ARMV8_EVENT_ATTR(itlb_walk, ARMV8_PMUV3_PERFCTR_ITLB_WALK),
- ARMV8_EVENT_ATTR(ll_cache_rd, ARMV8_PMUV3_PERFCTR_LL_CACHE_RD),
- ARMV8_EVENT_ATTR(ll_cache_miss_rd, ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS_RD),
- ARMV8_EVENT_ATTR(remote_access_rd, ARMV8_PMUV3_PERFCTR_REMOTE_ACCESS_RD),
- ARMV8_EVENT_ATTR(sample_pop, ARMV8_SPE_PERFCTR_SAMPLE_POP),
- ARMV8_EVENT_ATTR(sample_feed, ARMV8_SPE_PERFCTR_SAMPLE_FEED),
- ARMV8_EVENT_ATTR(sample_filtrate, ARMV8_SPE_PERFCTR_SAMPLE_FILTRATE),
- ARMV8_EVENT_ATTR(sample_collision, ARMV8_SPE_PERFCTR_SAMPLE_COLLISION),
- NULL,
-};
-
-static umode_t
-armv8pmu_event_attr_is_visible(struct kobject *kobj,
- struct attribute *attr, int unused)
-{
- struct device *dev = kobj_to_dev(kobj);
- struct pmu *pmu = dev_get_drvdata(dev);
- struct arm_pmu *cpu_pmu = container_of(pmu, struct arm_pmu, pmu);
- struct perf_pmu_events_attr *pmu_attr;
-
- pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr.attr);
-
- if (pmu_attr->id < ARMV8_PMUV3_MAX_COMMON_EVENTS &&
- test_bit(pmu_attr->id, cpu_pmu->pmceid_bitmap))
- return attr->mode;
-
- pmu_attr->id -= ARMV8_PMUV3_EXT_COMMON_EVENT_BASE;
- if (pmu_attr->id < ARMV8_PMUV3_MAX_COMMON_EVENTS &&
- test_bit(pmu_attr->id, cpu_pmu->pmceid_ext_bitmap))
- return attr->mode;
-
- return 0;
-}
-
-static struct attribute_group armv8_pmuv3_events_attr_group = {
- .name = "events",
- .attrs = armv8_pmuv3_event_attrs,
- .is_visible = armv8pmu_event_attr_is_visible,
-};
-
-PMU_FORMAT_ATTR(event, "config:0-15");
-PMU_FORMAT_ATTR(long, "config1:0");
-
-static inline bool armv8pmu_event_is_64bit(struct perf_event *event)
-{
- return event->attr.config1 & 0x1;
-}
-
-static struct attribute *armv8_pmuv3_format_attrs[] = {
- &format_attr_event.attr,
- &format_attr_long.attr,
- NULL,
-};
-
-static struct attribute_group armv8_pmuv3_format_attr_group = {
- .name = "format",
- .attrs = armv8_pmuv3_format_attrs,
-};
-
-/*
- * Perf Events' indices
- */
-#define ARMV8_IDX_CYCLE_COUNTER 0
-#define ARMV8_IDX_COUNTER0 1
-#define ARMV8_IDX_COUNTER_LAST(cpu_pmu) \
- (ARMV8_IDX_CYCLE_COUNTER + cpu_pmu->num_events - 1)
-
-/*
- * We must chain two programmable counters for 64 bit events,
- * except when we have allocated the 64bit cycle counter (for CPU
- * cycles event). This must be called only when the event has
- * a counter allocated.
- */
-static inline bool armv8pmu_event_is_chained(struct perf_event *event)
-{
- int idx = event->hw.idx;
-
- return !WARN_ON(idx < 0) &&
- armv8pmu_event_is_64bit(event) &&
- (idx != ARMV8_IDX_CYCLE_COUNTER);
-}
-
-/*
- * ARMv8 low level PMU access
- */
-
-/*
- * Perf Event to low level counters mapping
- */
-#define ARMV8_IDX_TO_COUNTER(x) \
- (((x) - ARMV8_IDX_COUNTER0) & ARMV8_PMU_COUNTER_MASK)
-
-static inline u32 armv8pmu_pmcr_read(void)
-{
- return read_sysreg(pmcr_el0);
-}
-
-static inline void armv8pmu_pmcr_write(u32 val)
-{
- val &= ARMV8_PMU_PMCR_MASK;
- isb();
- write_sysreg(val, pmcr_el0);
-}
-
-static inline int armv8pmu_has_overflowed(u32 pmovsr)
-{
- return pmovsr & ARMV8_PMU_OVERFLOWED_MASK;
-}
-
-static inline int armv8pmu_counter_valid(struct arm_pmu *cpu_pmu, int idx)
-{
- return idx >= ARMV8_IDX_CYCLE_COUNTER &&
- idx <= ARMV8_IDX_COUNTER_LAST(cpu_pmu);
-}
-
-static inline int armv8pmu_counter_has_overflowed(u32 pmnc, int idx)
-{
- return pmnc & BIT(ARMV8_IDX_TO_COUNTER(idx));
-}
-
-static inline void armv8pmu_select_counter(int idx)
-{
- u32 counter = ARMV8_IDX_TO_COUNTER(idx);
- write_sysreg(counter, pmselr_el0);
- isb();
-}
-
-static inline u32 armv8pmu_read_evcntr(int idx)
-{
- armv8pmu_select_counter(idx);
- return read_sysreg(pmxevcntr_el0);
-}
-
-static inline u64 armv8pmu_read_hw_counter(struct perf_event *event)
-{
- int idx = event->hw.idx;
- u64 val = 0;
-
- val = armv8pmu_read_evcntr(idx);
- if (armv8pmu_event_is_chained(event))
- val = (val << 32) | armv8pmu_read_evcntr(idx - 1);
- return val;
-}
-
-static u64 armv8pmu_read_counter(struct perf_event *event)
-{
- struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
- struct hw_perf_event *hwc = &event->hw;
- int idx = hwc->idx;
- u64 value = 0;
-
- if (!armv8pmu_counter_valid(cpu_pmu, idx))
- pr_err("CPU%u reading wrong counter %d\n",
- smp_processor_id(), idx);
- else if (idx == ARMV8_IDX_CYCLE_COUNTER)
- value = read_sysreg(pmccntr_el0);
- else
- value = armv8pmu_read_hw_counter(event);
-
- return value;
-}
-
-static inline void armv8pmu_write_evcntr(int idx, u32 value)
-{
- armv8pmu_select_counter(idx);
- write_sysreg(value, pmxevcntr_el0);
-}
-
-static inline void armv8pmu_write_hw_counter(struct perf_event *event,
- u64 value)
-{
- int idx = event->hw.idx;
-
- if (armv8pmu_event_is_chained(event)) {
- armv8pmu_write_evcntr(idx, upper_32_bits(value));
- armv8pmu_write_evcntr(idx - 1, lower_32_bits(value));
- } else {
- armv8pmu_write_evcntr(idx, value);
- }
-}
-
-static void armv8pmu_write_counter(struct perf_event *event, u64 value)
-{
- struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
- struct hw_perf_event *hwc = &event->hw;
- int idx = hwc->idx;
-
- if (!armv8pmu_counter_valid(cpu_pmu, idx))
- pr_err("CPU%u writing wrong counter %d\n",
- smp_processor_id(), idx);
- else if (idx == ARMV8_IDX_CYCLE_COUNTER) {
- /*
- * The cycles counter is really a 64-bit counter.
- * When treating it as a 32-bit counter, we only count
- * the lower 32 bits, and set the upper 32-bits so that
- * we get an interrupt upon 32-bit overflow.
- */
- if (!armv8pmu_event_is_64bit(event))
- value |= 0xffffffff00000000ULL;
- write_sysreg(value, pmccntr_el0);
- } else
- armv8pmu_write_hw_counter(event, value);
-}
-
-static inline void armv8pmu_write_evtype(int idx, u32 val)
-{
- armv8pmu_select_counter(idx);
- val &= ARMV8_PMU_EVTYPE_MASK;
- write_sysreg(val, pmxevtyper_el0);
-}
-
-static inline void armv8pmu_write_event_type(struct perf_event *event)
-{
- struct hw_perf_event *hwc = &event->hw;
- int idx = hwc->idx;
-
- /*
- * For chained events, the low counter is programmed to count
- * the event of interest and the high counter is programmed
- * with CHAIN event code with filters set to count at all ELs.
- */
- if (armv8pmu_event_is_chained(event)) {
- u32 chain_evt = ARMV8_PMUV3_PERFCTR_CHAIN |
- ARMV8_PMU_INCLUDE_EL2;
-
- armv8pmu_write_evtype(idx - 1, hwc->config_base);
- armv8pmu_write_evtype(idx, chain_evt);
- } else {
- armv8pmu_write_evtype(idx, hwc->config_base);
- }
-}
-
-static inline int armv8pmu_enable_counter(int idx)
-{
- u32 counter = ARMV8_IDX_TO_COUNTER(idx);
- write_sysreg(BIT(counter), pmcntenset_el0);
- return idx;
-}
-
-static inline void armv8pmu_enable_event_counter(struct perf_event *event)
-{
- struct perf_event_attr *attr = &event->attr;
- int idx = event->hw.idx;
- u32 counter_bits = BIT(ARMV8_IDX_TO_COUNTER(idx));
-
- if (armv8pmu_event_is_chained(event))
- counter_bits |= BIT(ARMV8_IDX_TO_COUNTER(idx - 1));
-
- kvm_set_pmu_events(counter_bits, attr);
-
- /* We rely on the hypervisor switch code to enable guest counters */
- if (!kvm_pmu_counter_deferred(attr)) {
- armv8pmu_enable_counter(idx);
- if (armv8pmu_event_is_chained(event))
- armv8pmu_enable_counter(idx - 1);
- }
-}
-
-static inline int armv8pmu_disable_counter(int idx)
-{
- u32 counter = ARMV8_IDX_TO_COUNTER(idx);
- write_sysreg(BIT(counter), pmcntenclr_el0);
- return idx;
-}
-
-static inline void armv8pmu_disable_event_counter(struct perf_event *event)
-{
- struct hw_perf_event *hwc = &event->hw;
- struct perf_event_attr *attr = &event->attr;
- int idx = hwc->idx;
- u32 counter_bits = BIT(ARMV8_IDX_TO_COUNTER(idx));
-
- if (armv8pmu_event_is_chained(event))
- counter_bits |= BIT(ARMV8_IDX_TO_COUNTER(idx - 1));
-
- kvm_clr_pmu_events(counter_bits);
-
- /* We rely on the hypervisor switch code to disable guest counters */
- if (!kvm_pmu_counter_deferred(attr)) {
- if (armv8pmu_event_is_chained(event))
- armv8pmu_disable_counter(idx - 1);
- armv8pmu_disable_counter(idx);
- }
-}
-
-static inline int armv8pmu_enable_intens(int idx)
-{
- u32 counter = ARMV8_IDX_TO_COUNTER(idx);
- write_sysreg(BIT(counter), pmintenset_el1);
- return idx;
-}
-
-static inline int armv8pmu_enable_event_irq(struct perf_event *event)
-{
- return armv8pmu_enable_intens(event->hw.idx);
-}
-
-static inline int armv8pmu_disable_intens(int idx)
-{
- u32 counter = ARMV8_IDX_TO_COUNTER(idx);
- write_sysreg(BIT(counter), pmintenclr_el1);
- isb();
- /* Clear the overflow flag in case an interrupt is pending. */
- write_sysreg(BIT(counter), pmovsclr_el0);
- isb();
-
- return idx;
-}
-
-static inline int armv8pmu_disable_event_irq(struct perf_event *event)
-{
- return armv8pmu_disable_intens(event->hw.idx);
-}
-
-static inline u32 armv8pmu_getreset_flags(void)
-{
- u32 value;
-
- /* Read */
- value = read_sysreg(pmovsclr_el0);
-
- /* Write to clear flags */
- value &= ARMV8_PMU_OVSR_MASK;
- write_sysreg(value, pmovsclr_el0);
-
- return value;
-}
-
-static void armv8pmu_enable_event(struct perf_event *event)
-{
- unsigned long flags;
- struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
- struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
-
- /*
- * Enable counter and interrupt, and set the counter to count
- * the event that we're interested in.
- */
- raw_spin_lock_irqsave(&events->pmu_lock, flags);
-
- /*
- * Disable counter
- */
- armv8pmu_disable_event_counter(event);
-
- /*
- * Set event (if destined for PMNx counters).
- */
- armv8pmu_write_event_type(event);
-
- /*
- * Enable interrupt for this counter
- */
- armv8pmu_enable_event_irq(event);
-
- /*
- * Enable counter
- */
- armv8pmu_enable_event_counter(event);
-
- raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
-}
-
-static void armv8pmu_disable_event(struct perf_event *event)
-{
- unsigned long flags;
- struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
- struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
-
- /*
- * Disable counter and interrupt
- */
- raw_spin_lock_irqsave(&events->pmu_lock, flags);
-
- /*
- * Disable counter
- */
- armv8pmu_disable_event_counter(event);
-
- /*
- * Disable interrupt for this counter
- */
- armv8pmu_disable_event_irq(event);
-
- raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
-}
-
-static void armv8pmu_start(struct arm_pmu *cpu_pmu)
-{
- unsigned long flags;
- struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
-
- raw_spin_lock_irqsave(&events->pmu_lock, flags);
- /* Enable all counters */
- armv8pmu_pmcr_write(armv8pmu_pmcr_read() | ARMV8_PMU_PMCR_E);
- raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
-}
-
-static void armv8pmu_stop(struct arm_pmu *cpu_pmu)
-{
- unsigned long flags;
- struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
-
- raw_spin_lock_irqsave(&events->pmu_lock, flags);
- /* Disable all counters */
- armv8pmu_pmcr_write(armv8pmu_pmcr_read() & ~ARMV8_PMU_PMCR_E);
- raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
-}
-
-static irqreturn_t armv8pmu_handle_irq(struct arm_pmu *cpu_pmu)
-{
- u32 pmovsr;
- struct perf_sample_data data;
- struct pmu_hw_events *cpuc = this_cpu_ptr(cpu_pmu->hw_events);
- struct pt_regs *regs;
- int idx;
-
- /*
- * Get and reset the IRQ flags
- */
- pmovsr = armv8pmu_getreset_flags();
-
- /*
- * Did an overflow occur?
- */
- if (!armv8pmu_has_overflowed(pmovsr))
- return IRQ_NONE;
-
- /*
- * Handle the counter(s) overflow(s)
- */
- regs = get_irq_regs();
-
- /*
- * Stop the PMU while processing the counter overflows
- * to prevent skews in group events.
- */
- armv8pmu_stop(cpu_pmu);
- for (idx = 0; idx < cpu_pmu->num_events; ++idx) {
- struct perf_event *event = cpuc->events[idx];
- struct hw_perf_event *hwc;
-
- /* Ignore if we don't have an event. */
- if (!event)
- continue;
-
- /*
- * We have a single interrupt for all counters. Check that
- * each counter has overflowed before we process it.
- */
- if (!armv8pmu_counter_has_overflowed(pmovsr, idx))
- continue;
-
- hwc = &event->hw;
- armpmu_event_update(event);
- perf_sample_data_init(&data, 0, hwc->last_period);
- if (!armpmu_event_set_period(event))
- continue;
-
- if (perf_event_overflow(event, &data, regs))
- cpu_pmu->disable(event);
- }
- armv8pmu_start(cpu_pmu);
-
- /*
- * Handle the pending perf events.
- *
- * Note: this call *must* be run with interrupts disabled. For
- * platforms that can have the PMU interrupts raised as an NMI, this
- * will not work.
- */
- irq_work_run();
-
- return IRQ_HANDLED;
-}
-
-static int armv8pmu_get_single_idx(struct pmu_hw_events *cpuc,
- struct arm_pmu *cpu_pmu)
-{
- int idx;
-
- for (idx = ARMV8_IDX_COUNTER0; idx < cpu_pmu->num_events; idx ++) {
- if (!test_and_set_bit(idx, cpuc->used_mask))
- return idx;
- }
- return -EAGAIN;
-}
-
-static int armv8pmu_get_chain_idx(struct pmu_hw_events *cpuc,
- struct arm_pmu *cpu_pmu)
-{
- int idx;
-
- /*
- * Chaining requires two consecutive event counters, where
- * the lower idx must be even.
- */
- for (idx = ARMV8_IDX_COUNTER0 + 1; idx < cpu_pmu->num_events; idx += 2) {
- if (!test_and_set_bit(idx, cpuc->used_mask)) {
- /* Check if the preceding even counter is available */
- if (!test_and_set_bit(idx - 1, cpuc->used_mask))
- return idx;
- /* Release the Odd counter */
- clear_bit(idx, cpuc->used_mask);
- }
- }
- return -EAGAIN;
-}
-
-static int armv8pmu_get_event_idx(struct pmu_hw_events *cpuc,
- struct perf_event *event)
-{
- struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
- struct hw_perf_event *hwc = &event->hw;
- unsigned long evtype = hwc->config_base & ARMV8_PMU_EVTYPE_EVENT;
-
- /* Always prefer to place a cycle counter into the cycle counter. */
- if (evtype == ARMV8_PMUV3_PERFCTR_CPU_CYCLES) {
- if (!test_and_set_bit(ARMV8_IDX_CYCLE_COUNTER, cpuc->used_mask))
- return ARMV8_IDX_CYCLE_COUNTER;
- }
-
- /*
- * Otherwise use events counters
- */
- if (armv8pmu_event_is_64bit(event))
- return armv8pmu_get_chain_idx(cpuc, cpu_pmu);
- else
- return armv8pmu_get_single_idx(cpuc, cpu_pmu);
-}
-
-static void armv8pmu_clear_event_idx(struct pmu_hw_events *cpuc,
- struct perf_event *event)
-{
- int idx = event->hw.idx;
-
- clear_bit(idx, cpuc->used_mask);
- if (armv8pmu_event_is_chained(event))
- clear_bit(idx - 1, cpuc->used_mask);
-}
-
-/*
- * Add an event filter to a given event.
- */
-static int armv8pmu_set_event_filter(struct hw_perf_event *event,
- struct perf_event_attr *attr)
-{
- unsigned long config_base = 0;
-
- if (attr->exclude_idle)
- return -EPERM;
-
- /*
- * If we're running in hyp mode, then we *are* the hypervisor.
- * Therefore we ignore exclude_hv in this configuration, since
- * there's no hypervisor to sample anyway. This is consistent
- * with other architectures (x86 and Power).
- */
- if (is_kernel_in_hyp_mode()) {
- if (!attr->exclude_kernel && !attr->exclude_host)
- config_base |= ARMV8_PMU_INCLUDE_EL2;
- if (attr->exclude_guest)
- config_base |= ARMV8_PMU_EXCLUDE_EL1;
- if (attr->exclude_host)
- config_base |= ARMV8_PMU_EXCLUDE_EL0;
- } else {
- if (!attr->exclude_hv && !attr->exclude_host)
- config_base |= ARMV8_PMU_INCLUDE_EL2;
- }
-
- /*
- * Filter out !VHE kernels and guest kernels
- */
- if (attr->exclude_kernel)
- config_base |= ARMV8_PMU_EXCLUDE_EL1;
-
- if (attr->exclude_user)
- config_base |= ARMV8_PMU_EXCLUDE_EL0;
-
- /*
- * Install the filter into config_base as this is used to
- * construct the event type.
- */
- event->config_base = config_base;
-
- return 0;
-}
-
-static int armv8pmu_filter_match(struct perf_event *event)
-{
- unsigned long evtype = event->hw.config_base & ARMV8_PMU_EVTYPE_EVENT;
- return evtype != ARMV8_PMUV3_PERFCTR_CHAIN;
-}
-
-static void armv8pmu_reset(void *info)
-{
- struct arm_pmu *cpu_pmu = (struct arm_pmu *)info;
- u32 idx, nb_cnt = cpu_pmu->num_events;
-
- /* The counter and interrupt enable registers are unknown at reset. */
- for (idx = ARMV8_IDX_CYCLE_COUNTER; idx < nb_cnt; ++idx) {
- armv8pmu_disable_counter(idx);
- armv8pmu_disable_intens(idx);
- }
-
- /* Clear the counters we flip at guest entry/exit */
- kvm_clr_pmu_events(U32_MAX);
-
- /*
- * Initialize & Reset PMNC. Request overflow interrupt for
- * 64 bit cycle counter but cheat in armv8pmu_write_counter().
- */
- armv8pmu_pmcr_write(ARMV8_PMU_PMCR_P | ARMV8_PMU_PMCR_C |
- ARMV8_PMU_PMCR_LC);
-}
-
-static int __armv8_pmuv3_map_event(struct perf_event *event,
- const unsigned (*extra_event_map)
- [PERF_COUNT_HW_MAX],
- const unsigned (*extra_cache_map)
- [PERF_COUNT_HW_CACHE_MAX]
- [PERF_COUNT_HW_CACHE_OP_MAX]
- [PERF_COUNT_HW_CACHE_RESULT_MAX])
-{
- int hw_event_id;
- struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
-
- hw_event_id = armpmu_map_event(event, &armv8_pmuv3_perf_map,
- &armv8_pmuv3_perf_cache_map,
- ARMV8_PMU_EVTYPE_EVENT);
-
- if (armv8pmu_event_is_64bit(event))
- event->hw.flags |= ARMPMU_EVT_64BIT;
-
- /* Only expose micro/arch events supported by this PMU */
- if ((hw_event_id > 0) && (hw_event_id < ARMV8_PMUV3_MAX_COMMON_EVENTS)
- && test_bit(hw_event_id, armpmu->pmceid_bitmap)) {
- return hw_event_id;
- }
-
- return armpmu_map_event(event, extra_event_map, extra_cache_map,
- ARMV8_PMU_EVTYPE_EVENT);
-}
-
-static int armv8_pmuv3_map_event(struct perf_event *event)
-{
- return __armv8_pmuv3_map_event(event, NULL, NULL);
-}
-
-static int armv8_a53_map_event(struct perf_event *event)
-{
- return __armv8_pmuv3_map_event(event, NULL, &armv8_a53_perf_cache_map);
-}
-
-static int armv8_a57_map_event(struct perf_event *event)
-{
- return __armv8_pmuv3_map_event(event, NULL, &armv8_a57_perf_cache_map);
-}
-
-static int armv8_a73_map_event(struct perf_event *event)
-{
- return __armv8_pmuv3_map_event(event, NULL, &armv8_a73_perf_cache_map);
-}
-
-static int armv8_thunder_map_event(struct perf_event *event)
-{
- return __armv8_pmuv3_map_event(event, NULL,
- &armv8_thunder_perf_cache_map);
-}
-
-static int armv8_vulcan_map_event(struct perf_event *event)
-{
- return __armv8_pmuv3_map_event(event, NULL,
- &armv8_vulcan_perf_cache_map);
-}
-
-struct armv8pmu_probe_info {
- struct arm_pmu *pmu;
- bool present;
-};
-
-static void __armv8pmu_probe_pmu(void *info)
-{
- struct armv8pmu_probe_info *probe = info;
- struct arm_pmu *cpu_pmu = probe->pmu;
- u64 dfr0;
- u64 pmceid_raw[2];
- u32 pmceid[2];
- int pmuver;
-
- dfr0 = read_sysreg(id_aa64dfr0_el1);
- pmuver = cpuid_feature_extract_unsigned_field(dfr0,
- ID_AA64DFR0_PMUVER_SHIFT);
- if (pmuver == 0xf || pmuver == 0)
- return;
-
- probe->present = true;
-
- /* Read the nb of CNTx counters supported from PMNC */
- cpu_pmu->num_events = (armv8pmu_pmcr_read() >> ARMV8_PMU_PMCR_N_SHIFT)
- & ARMV8_PMU_PMCR_N_MASK;
-
- /* Add the CPU cycles counter */
- cpu_pmu->num_events += 1;
-
- pmceid[0] = pmceid_raw[0] = read_sysreg(pmceid0_el0);
- pmceid[1] = pmceid_raw[1] = read_sysreg(pmceid1_el0);
-
- bitmap_from_arr32(cpu_pmu->pmceid_bitmap,
- pmceid, ARMV8_PMUV3_MAX_COMMON_EVENTS);
-
- pmceid[0] = pmceid_raw[0] >> 32;
- pmceid[1] = pmceid_raw[1] >> 32;
-
- bitmap_from_arr32(cpu_pmu->pmceid_ext_bitmap,
- pmceid, ARMV8_PMUV3_MAX_COMMON_EVENTS);
-}
-
-static int armv8pmu_probe_pmu(struct arm_pmu *cpu_pmu)
-{
- struct armv8pmu_probe_info probe = {
- .pmu = cpu_pmu,
- .present = false,
- };
- int ret;
-
- ret = smp_call_function_any(&cpu_pmu->supported_cpus,
- __armv8pmu_probe_pmu,
- &probe, 1);
- if (ret)
- return ret;
-
- return probe.present ? 0 : -ENODEV;
-}
-
-static int armv8_pmu_init(struct arm_pmu *cpu_pmu)
-{
- int ret = armv8pmu_probe_pmu(cpu_pmu);
- if (ret)
- return ret;
-
- cpu_pmu->handle_irq = armv8pmu_handle_irq;
- cpu_pmu->enable = armv8pmu_enable_event;
- cpu_pmu->disable = armv8pmu_disable_event;
- cpu_pmu->read_counter = armv8pmu_read_counter;
- cpu_pmu->write_counter = armv8pmu_write_counter;
- cpu_pmu->get_event_idx = armv8pmu_get_event_idx;
- cpu_pmu->clear_event_idx = armv8pmu_clear_event_idx;
- cpu_pmu->start = armv8pmu_start;
- cpu_pmu->stop = armv8pmu_stop;
- cpu_pmu->reset = armv8pmu_reset;
- cpu_pmu->set_event_filter = armv8pmu_set_event_filter;
- cpu_pmu->filter_match = armv8pmu_filter_match;
-
- return 0;
-}
-
-static int armv8_pmuv3_init(struct arm_pmu *cpu_pmu)
-{
- int ret = armv8_pmu_init(cpu_pmu);
- if (ret)
- return ret;
-
- cpu_pmu->name = "armv8_pmuv3";
- cpu_pmu->map_event = armv8_pmuv3_map_event;
- cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] =
- &armv8_pmuv3_events_attr_group;
- cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] =
- &armv8_pmuv3_format_attr_group;
-
- return 0;
-}
-
-static int armv8_a35_pmu_init(struct arm_pmu *cpu_pmu)
-{
- int ret = armv8_pmu_init(cpu_pmu);
- if (ret)
- return ret;
-
- cpu_pmu->name = "armv8_cortex_a35";
- cpu_pmu->map_event = armv8_a53_map_event;
- cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] =
- &armv8_pmuv3_events_attr_group;
- cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] =
- &armv8_pmuv3_format_attr_group;
-
- return 0;
-}
-
-static int armv8_a53_pmu_init(struct arm_pmu *cpu_pmu)
-{
- int ret = armv8_pmu_init(cpu_pmu);
- if (ret)
- return ret;
-
- cpu_pmu->name = "armv8_cortex_a53";
- cpu_pmu->map_event = armv8_a53_map_event;
- cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] =
- &armv8_pmuv3_events_attr_group;
- cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] =
- &armv8_pmuv3_format_attr_group;
-
- return 0;
-}
-
-static int armv8_a57_pmu_init(struct arm_pmu *cpu_pmu)
-{
- int ret = armv8_pmu_init(cpu_pmu);
- if (ret)
- return ret;
-
- cpu_pmu->name = "armv8_cortex_a57";
- cpu_pmu->map_event = armv8_a57_map_event;
- cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] =
- &armv8_pmuv3_events_attr_group;
- cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] =
- &armv8_pmuv3_format_attr_group;
-
- return 0;
-}
-
-static int armv8_a72_pmu_init(struct arm_pmu *cpu_pmu)
-{
- int ret = armv8_pmu_init(cpu_pmu);
- if (ret)
- return ret;
-
- cpu_pmu->name = "armv8_cortex_a72";
- cpu_pmu->map_event = armv8_a57_map_event;
- cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] =
- &armv8_pmuv3_events_attr_group;
- cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] =
- &armv8_pmuv3_format_attr_group;
-
- return 0;
-}
-
-static int armv8_a73_pmu_init(struct arm_pmu *cpu_pmu)
-{
- int ret = armv8_pmu_init(cpu_pmu);
- if (ret)
- return ret;
-
- cpu_pmu->name = "armv8_cortex_a73";
- cpu_pmu->map_event = armv8_a73_map_event;
- cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] =
- &armv8_pmuv3_events_attr_group;
- cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] =
- &armv8_pmuv3_format_attr_group;
-
- return 0;
-}
-
-static int armv8_thunder_pmu_init(struct arm_pmu *cpu_pmu)
-{
- int ret = armv8_pmu_init(cpu_pmu);
- if (ret)
- return ret;
-
- cpu_pmu->name = "armv8_cavium_thunder";
- cpu_pmu->map_event = armv8_thunder_map_event;
- cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] =
- &armv8_pmuv3_events_attr_group;
- cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] =
- &armv8_pmuv3_format_attr_group;
-
- return 0;
-}
-
-static int armv8_vulcan_pmu_init(struct arm_pmu *cpu_pmu)
-{
- int ret = armv8_pmu_init(cpu_pmu);
- if (ret)
- return ret;
-
- cpu_pmu->name = "armv8_brcm_vulcan";
- cpu_pmu->map_event = armv8_vulcan_map_event;
- cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] =
- &armv8_pmuv3_events_attr_group;
- cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] =
- &armv8_pmuv3_format_attr_group;
-
- return 0;
-}
-
-static const struct of_device_id armv8_pmu_of_device_ids[] = {
- {.compatible = "arm,armv8-pmuv3", .data = armv8_pmuv3_init},
- {.compatible = "arm,cortex-a35-pmu", .data = armv8_a35_pmu_init},
- {.compatible = "arm,cortex-a53-pmu", .data = armv8_a53_pmu_init},
- {.compatible = "arm,cortex-a57-pmu", .data = armv8_a57_pmu_init},
- {.compatible = "arm,cortex-a72-pmu", .data = armv8_a72_pmu_init},
- {.compatible = "arm,cortex-a73-pmu", .data = armv8_a73_pmu_init},
- {.compatible = "cavium,thunder-pmu", .data = armv8_thunder_pmu_init},
- {.compatible = "brcm,vulcan-pmu", .data = armv8_vulcan_pmu_init},
- {},
-};
-
-static int armv8_pmu_device_probe(struct platform_device *pdev)
-{
- return arm_pmu_device_probe(pdev, armv8_pmu_of_device_ids, NULL);
-}
-
-static struct platform_driver armv8_pmu_driver = {
- .driver = {
- .name = ARMV8_PMU_PDEV_NAME,
- .of_match_table = armv8_pmu_of_device_ids,
- .suppress_bind_attrs = true,
- },
- .probe = armv8_pmu_device_probe,
-};
-
-static int __init armv8_pmu_driver_init(void)
-{
- if (acpi_disabled)
- return platform_driver_register(&armv8_pmu_driver);
- else
- return arm_pmu_acpi_probe(armv8_pmuv3_init);
-}
-device_initcall(armv8_pmu_driver_init)
-
-void arch_perf_update_userpage(struct perf_event *event,
- struct perf_event_mmap_page *userpg, u64 now)
-{
- u32 freq;
- u32 shift;
-
- /*
- * Internal timekeeping for enabled/running/stopped times
- * is always computed with the sched_clock.
- */
- freq = arch_timer_get_rate();
- userpg->cap_user_time = 1;
-
- clocks_calc_mult_shift(&userpg->time_mult, &shift, freq,
- NSEC_PER_SEC, 0);
- /*
- * time_shift is not expected to be greater than 31 due to
- * the original published conversion algorithm shifting a
- * 32-bit value (now specifies a 64-bit value) - refer
- * perf_event_mmap_page documentation in perf_event.h.
- */
- if (shift == 32) {
- shift = 31;
- userpg->time_mult >>= 1;
- }
- userpg->time_shift = (u16)shift;
- userpg->time_offset = -now;
-}
diff --git a/arch/arm64/kernel/perf_regs.c b/arch/arm64/kernel/perf_regs.c
index 0bbac612146e..b4eece3eb17d 100644
--- a/arch/arm64/kernel/perf_regs.c
+++ b/arch/arm64/kernel/perf_regs.c
@@ -9,21 +9,58 @@
#include <asm/perf_regs.h>
#include <asm/ptrace.h>
+static u64 perf_ext_regs_value(int idx)
+{
+ switch (idx) {
+ case PERF_REG_ARM64_VG:
+ if (WARN_ON_ONCE(!system_supports_sve()))
+ return 0;
+
+ /*
+ * Vector granule is current length in bits of SVE registers
+ * divided by 64.
+ */
+ return (task_get_sve_vl(current) * 8) / 64;
+ default:
+ WARN_ON_ONCE(true);
+ return 0;
+ }
+}
+
u64 perf_reg_value(struct pt_regs *regs, int idx)
{
- if (WARN_ON_ONCE((u32)idx >= PERF_REG_ARM64_MAX))
+ if (WARN_ON_ONCE((u32)idx >= PERF_REG_ARM64_EXTENDED_MAX))
return 0;
/*
- * Compat (i.e. 32 bit) mode:
- * - PC has been set in the pt_regs struct in kernel_entry,
- * - Handle SP and LR here.
+ * Our handling of compat tasks (PERF_SAMPLE_REGS_ABI_32) is weird, but
+ * we're stuck with it for ABI compatibility reasons.
+ *
+ * For a 32-bit consumer inspecting a 32-bit task, then it will look at
+ * the first 16 registers (see arch/arm/include/uapi/asm/perf_regs.h).
+ * These correspond directly to a prefix of the registers saved in our
+ * 'struct pt_regs', with the exception of the PC, so we copy that down
+ * (x15 corresponds to SP_hyp in the architecture).
+ *
+ * So far, so good.
+ *
+ * The oddity arises when a 64-bit consumer looks at a 32-bit task and
+ * asks for registers beyond PERF_REG_ARM_MAX. In this case, we return
+ * SP_usr, LR_usr and PC in the positions where the AArch64 SP, LR and
+ * PC registers would normally live. The initial idea was to allow a
+ * 64-bit unwinder to unwind a 32-bit task and, although it's not clear
+ * how well that works in practice, somebody might be relying on it.
+ *
+ * At the time we make a sample, we don't know whether the consumer is
+ * 32-bit or 64-bit, so we have to cater for both possibilities.
*/
if (compat_user_mode(regs)) {
if ((u32)idx == PERF_REG_ARM64_SP)
return regs->compat_sp;
if ((u32)idx == PERF_REG_ARM64_LR)
return regs->compat_lr;
+ if (idx == 15)
+ return regs->pc;
}
if ((u32)idx == PERF_REG_ARM64_SP)
@@ -32,6 +69,9 @@ u64 perf_reg_value(struct pt_regs *regs, int idx)
if ((u32)idx == PERF_REG_ARM64_PC)
return regs->pc;
+ if ((u32)idx >= PERF_REG_ARM64_MAX)
+ return perf_ext_regs_value(idx);
+
return regs->regs[idx];
}
@@ -39,7 +79,12 @@ u64 perf_reg_value(struct pt_regs *regs, int idx)
int perf_reg_validate(u64 mask)
{
- if (!mask || mask & REG_RESERVED)
+ u64 reserved_mask = REG_RESERVED;
+
+ if (system_supports_sve())
+ reserved_mask &= ~(1ULL << PERF_REG_ARM64_VG);
+
+ if (!mask || mask & reserved_mask)
return -EINVAL;
return 0;
@@ -54,8 +99,7 @@ u64 perf_reg_abi(struct task_struct *task)
}
void perf_get_regs_user(struct perf_regs *regs_user,
- struct pt_regs *regs,
- struct pt_regs *regs_user_copy)
+ struct pt_regs *regs)
{
regs_user->regs = task_pt_regs(current);
regs_user->abi = perf_reg_abi(current);
diff --git a/arch/arm64/kernel/pi/Makefile b/arch/arm64/kernel/pi/Makefile
new file mode 100644
index 000000000000..c844a0546d7f
--- /dev/null
+++ b/arch/arm64/kernel/pi/Makefile
@@ -0,0 +1,35 @@
+# SPDX-License-Identifier: GPL-2.0
+# Copyright 2022 Google LLC
+
+KBUILD_CFLAGS := $(subst $(CC_FLAGS_FTRACE),,$(KBUILD_CFLAGS)) -fpie \
+ -Os -DDISABLE_BRANCH_PROFILING $(DISABLE_STACKLEAK_PLUGIN) \
+ $(DISABLE_LATENT_ENTROPY_PLUGIN) \
+ $(call cc-option,-mbranch-protection=none) \
+ -I$(srctree)/scripts/dtc/libfdt -fno-stack-protector \
+ -include $(srctree)/include/linux/hidden.h \
+ -D__DISABLE_EXPORTS -ffreestanding -D__NO_FORTIFY \
+ -fno-asynchronous-unwind-tables -fno-unwind-tables \
+ $(call cc-option,-fno-addrsig)
+
+# remove SCS flags from all objects in this directory
+KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_SCS), $(KBUILD_CFLAGS))
+# disable LTO
+KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_LTO), $(KBUILD_CFLAGS))
+
+GCOV_PROFILE := n
+KASAN_SANITIZE := n
+KCSAN_SANITIZE := n
+UBSAN_SANITIZE := n
+KCOV_INSTRUMENT := n
+
+$(obj)/%.pi.o: OBJCOPYFLAGS := --prefix-symbols=__pi_ \
+ --remove-section=.note.gnu.property \
+ --prefix-alloc-sections=.init
+$(obj)/%.pi.o: $(obj)/%.o FORCE
+ $(call if_changed,objcopy)
+
+$(obj)/lib-%.o: $(srctree)/lib/%.c FORCE
+ $(call if_changed_rule,cc_o_c)
+
+obj-y := kaslr_early.pi.o lib-fdt.pi.o lib-fdt_ro.pi.o
+extra-y := $(patsubst %.pi.o,%.o,$(obj-y))
diff --git a/arch/arm64/kernel/pi/kaslr_early.c b/arch/arm64/kernel/pi/kaslr_early.c
new file mode 100644
index 000000000000..17bff6e399e4
--- /dev/null
+++ b/arch/arm64/kernel/pi/kaslr_early.c
@@ -0,0 +1,110 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright 2022 Google LLC
+// Author: Ard Biesheuvel <ardb@google.com>
+
+// NOTE: code in this file runs *very* early, and is not permitted to use
+// global variables or anything that relies on absolute addressing.
+
+#include <linux/libfdt.h>
+#include <linux/init.h>
+#include <linux/linkage.h>
+#include <linux/types.h>
+#include <linux/sizes.h>
+#include <linux/string.h>
+
+#include <asm/archrandom.h>
+#include <asm/memory.h>
+
+/* taken from lib/string.c */
+static char *__strstr(const char *s1, const char *s2)
+{
+ size_t l1, l2;
+
+ l2 = strlen(s2);
+ if (!l2)
+ return (char *)s1;
+ l1 = strlen(s1);
+ while (l1 >= l2) {
+ l1--;
+ if (!memcmp(s1, s2, l2))
+ return (char *)s1;
+ s1++;
+ }
+ return NULL;
+}
+static bool cmdline_contains_nokaslr(const u8 *cmdline)
+{
+ const u8 *str;
+
+ str = __strstr(cmdline, "nokaslr");
+ return str == cmdline || (str > cmdline && *(str - 1) == ' ');
+}
+
+static bool is_kaslr_disabled_cmdline(void *fdt)
+{
+ if (!IS_ENABLED(CONFIG_CMDLINE_FORCE)) {
+ int node;
+ const u8 *prop;
+
+ node = fdt_path_offset(fdt, "/chosen");
+ if (node < 0)
+ goto out;
+
+ prop = fdt_getprop(fdt, node, "bootargs", NULL);
+ if (!prop)
+ goto out;
+
+ if (cmdline_contains_nokaslr(prop))
+ return true;
+
+ if (IS_ENABLED(CONFIG_CMDLINE_EXTEND))
+ goto out;
+
+ return false;
+ }
+out:
+ return cmdline_contains_nokaslr(CONFIG_CMDLINE);
+}
+
+static u64 get_kaslr_seed(void *fdt)
+{
+ int node, len;
+ fdt64_t *prop;
+ u64 ret;
+
+ node = fdt_path_offset(fdt, "/chosen");
+ if (node < 0)
+ return 0;
+
+ prop = fdt_getprop_w(fdt, node, "kaslr-seed", &len);
+ if (!prop || len != sizeof(u64))
+ return 0;
+
+ ret = fdt64_to_cpu(*prop);
+ *prop = 0;
+ return ret;
+}
+
+asmlinkage u64 kaslr_early_init(void *fdt)
+{
+ u64 seed;
+
+ if (is_kaslr_disabled_cmdline(fdt))
+ return 0;
+
+ seed = get_kaslr_seed(fdt);
+ if (!seed) {
+ if (!__early_cpu_has_rndr() ||
+ !__arm64_rndr((unsigned long *)&seed))
+ return 0;
+ }
+
+ /*
+ * OK, so we are proceeding with KASLR enabled. Calculate a suitable
+ * kernel image offset from the seed. Let's place the kernel in the
+ * middle half of the VMALLOC area (VA_BITS_MIN - 2), and stay clear of
+ * the lower and upper quarters to avoid colliding with other
+ * allocations.
+ */
+ return BIT(VA_BITS_MIN - 3) + (seed & GENMASK(VA_BITS_MIN - 3, 0));
+}
diff --git a/arch/arm64/kernel/pointer_auth.c b/arch/arm64/kernel/pointer_auth.c
index c507b584259d..2708b620b4ae 100644
--- a/arch/arm64/kernel/pointer_auth.c
+++ b/arch/arm64/kernel/pointer_auth.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
+#include <linux/compat.h>
#include <linux/errno.h>
#include <linux/prctl.h>
#include <linux/random.h>
@@ -9,7 +10,7 @@
int ptrauth_prctl_reset_keys(struct task_struct *tsk, unsigned long arg)
{
- struct ptrauth_keys *keys = &tsk->thread.keys_user;
+ struct ptrauth_keys_user *keys = &tsk->thread.keys_user;
unsigned long addr_key_mask = PR_PAC_APIAKEY | PR_PAC_APIBKEY |
PR_PAC_APDAKEY | PR_PAC_APDBKEY;
unsigned long key_mask = addr_key_mask | PR_PAC_APGAKEY;
@@ -17,9 +18,11 @@ int ptrauth_prctl_reset_keys(struct task_struct *tsk, unsigned long arg)
if (!system_supports_address_auth() && !system_supports_generic_auth())
return -EINVAL;
+ if (is_compat_thread(task_thread_info(tsk)))
+ return -EINVAL;
+
if (!arg) {
- ptrauth_keys_init(keys);
- ptrauth_keys_switch(keys);
+ ptrauth_keys_init_user(keys);
return 0;
}
@@ -40,8 +43,71 @@ int ptrauth_prctl_reset_keys(struct task_struct *tsk, unsigned long arg)
get_random_bytes(&keys->apdb, sizeof(keys->apdb));
if (arg & PR_PAC_APGAKEY)
get_random_bytes(&keys->apga, sizeof(keys->apga));
+ ptrauth_keys_install_user(keys);
+
+ return 0;
+}
+
+static u64 arg_to_enxx_mask(unsigned long arg)
+{
+ u64 sctlr_enxx_mask = 0;
+
+ WARN_ON(arg & ~PR_PAC_ENABLED_KEYS_MASK);
+ if (arg & PR_PAC_APIAKEY)
+ sctlr_enxx_mask |= SCTLR_ELx_ENIA;
+ if (arg & PR_PAC_APIBKEY)
+ sctlr_enxx_mask |= SCTLR_ELx_ENIB;
+ if (arg & PR_PAC_APDAKEY)
+ sctlr_enxx_mask |= SCTLR_ELx_ENDA;
+ if (arg & PR_PAC_APDBKEY)
+ sctlr_enxx_mask |= SCTLR_ELx_ENDB;
+ return sctlr_enxx_mask;
+}
+
+int ptrauth_set_enabled_keys(struct task_struct *tsk, unsigned long keys,
+ unsigned long enabled)
+{
+ u64 sctlr;
- ptrauth_keys_switch(keys);
+ if (!system_supports_address_auth())
+ return -EINVAL;
+
+ if (is_compat_thread(task_thread_info(tsk)))
+ return -EINVAL;
+
+ if ((keys & ~PR_PAC_ENABLED_KEYS_MASK) || (enabled & ~keys))
+ return -EINVAL;
+
+ preempt_disable();
+ sctlr = tsk->thread.sctlr_user;
+ sctlr &= ~arg_to_enxx_mask(keys);
+ sctlr |= arg_to_enxx_mask(enabled);
+ tsk->thread.sctlr_user = sctlr;
+ if (tsk == current)
+ update_sctlr_el1(sctlr);
+ preempt_enable();
return 0;
}
+
+int ptrauth_get_enabled_keys(struct task_struct *tsk)
+{
+ int retval = 0;
+
+ if (!system_supports_address_auth())
+ return -EINVAL;
+
+ if (is_compat_thread(task_thread_info(tsk)))
+ return -EINVAL;
+
+ if (tsk->thread.sctlr_user & SCTLR_ELx_ENIA)
+ retval |= PR_PAC_APIAKEY;
+ if (tsk->thread.sctlr_user & SCTLR_ELx_ENIB)
+ retval |= PR_PAC_APIBKEY;
+ if (tsk->thread.sctlr_user & SCTLR_ELx_ENDA)
+ retval |= PR_PAC_APDAKEY;
+ if (tsk->thread.sctlr_user & SCTLR_ELx_ENDB)
+ retval |= PR_PAC_APDBKEY;
+
+ return retval;
+}
diff --git a/arch/arm64/kernel/probes/decode-insn.c b/arch/arm64/kernel/probes/decode-insn.c
index b78fac9e546c..968d5fffe233 100644
--- a/arch/arm64/kernel/probes/decode-insn.c
+++ b/arch/arm64/kernel/probes/decode-insn.c
@@ -24,12 +24,13 @@ static bool __kprobes aarch64_insn_is_steppable(u32 insn)
* currently safe. Lastly, MSR instructions can do any number of nasty
* things we can't handle during single-stepping.
*/
- if (aarch64_get_insn_class(insn) == AARCH64_INSN_CLS_BR_SYS) {
+ if (aarch64_insn_is_class_branch_sys(insn)) {
if (aarch64_insn_is_branch(insn) ||
aarch64_insn_is_msr_imm(insn) ||
aarch64_insn_is_msr_reg(insn) ||
aarch64_insn_is_exception(insn) ||
- aarch64_insn_is_eret(insn))
+ aarch64_insn_is_eret(insn) ||
+ aarch64_insn_is_eret_auth(insn))
return false;
/*
@@ -42,11 +43,13 @@ static bool __kprobes aarch64_insn_is_steppable(u32 insn)
!= AARCH64_INSN_SPCLREG_DAIF;
/*
- * The HINT instruction is is problematic when single-stepping,
- * except for the NOP case.
+ * The HINT instruction is steppable only if it is in whitelist
+ * and the rest of other such instructions are blocked for
+ * single stepping as they may cause exception or other
+ * unintended behaviour.
*/
if (aarch64_insn_is_hint(insn))
- return aarch64_insn_is_nop(insn);
+ return aarch64_insn_is_steppable_hint(insn);
return true;
}
diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c
index d1c95dcf1d78..70b91a8c6bb3 100644
--- a/arch/arm64/kernel/probes/kprobes.c
+++ b/arch/arm64/kernel/probes/kprobes.c
@@ -7,26 +7,31 @@
* Copyright (C) 2013 Linaro Limited.
* Author: Sandeepa Prabhu <sandeepa.prabhu@linaro.org>
*/
+
+#define pr_fmt(fmt) "kprobes: " fmt
+
+#include <linux/extable.h>
#include <linux/kasan.h>
#include <linux/kernel.h>
#include <linux/kprobes.h>
-#include <linux/extable.h>
-#include <linux/slab.h>
-#include <linux/stop_machine.h>
#include <linux/sched/debug.h>
#include <linux/set_memory.h>
+#include <linux/slab.h>
+#include <linux/stop_machine.h>
#include <linux/stringify.h>
+#include <linux/uaccess.h>
#include <linux/vmalloc.h>
-#include <asm/traps.h>
-#include <asm/ptrace.h>
+
#include <asm/cacheflush.h>
-#include <asm/debug-monitors.h>
#include <asm/daifflags.h>
-#include <asm/system_misc.h>
+#include <asm/debug-monitors.h>
#include <asm/insn.h>
-#include <linux/uaccess.h>
#include <asm/irq.h>
+#include <asm/patching.h>
+#include <asm/ptrace.h>
#include <asm/sections.h>
+#include <asm/system_misc.h>
+#include <asm/traps.h>
#include "decode-insn.h"
@@ -34,27 +39,33 @@ DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
static void __kprobes
-post_kprobe_handler(struct kprobe_ctlblk *, struct pt_regs *);
-
-static int __kprobes patch_text(kprobe_opcode_t *addr, u32 opcode)
-{
- void *addrs[1];
- u32 insns[1];
-
- addrs[0] = addr;
- insns[0] = opcode;
-
- return aarch64_insn_patch_text(addrs, insns, 1);
-}
+post_kprobe_handler(struct kprobe *, struct kprobe_ctlblk *, struct pt_regs *);
static void __kprobes arch_prepare_ss_slot(struct kprobe *p)
{
- /* prepare insn slot */
- patch_text(p->ainsn.api.insn, p->opcode);
+ kprobe_opcode_t *addr = p->ainsn.api.insn;
- flush_icache_range((uintptr_t) (p->ainsn.api.insn),
- (uintptr_t) (p->ainsn.api.insn) +
- MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
+ /*
+ * Prepare insn slot, Mark Rutland points out it depends on a coupe of
+ * subtleties:
+ *
+ * - That the I-cache maintenance for these instructions is complete
+ * *before* the kprobe BRK is written (and aarch64_insn_patch_text_nosync()
+ * ensures this, but just omits causing a Context-Synchronization-Event
+ * on all CPUS).
+ *
+ * - That the kprobe BRK results in an exception (and consequently a
+ * Context-Synchronoization-Event), which ensures that the CPU will
+ * fetch thesingle-step slot instructions *after* this, ensuring that
+ * the new instructions are used
+ *
+ * It supposes to place ISB after patching to guarantee I-cache maintenance
+ * is observed on all CPUS, however, single-step slot is installed in
+ * the BRK exception handler, so it is unnecessary to generate
+ * Contex-Synchronization-Event via ISB again.
+ */
+ aarch64_insn_patch_text_nosync(addr, p->opcode);
+ aarch64_insn_patch_text_nosync(addr + 1, BRK64_OPCODE_KPROBES_SS);
/*
* Needs restoring of return address after stepping xol.
@@ -77,7 +88,7 @@ static void __kprobes arch_simulate_insn(struct kprobe *p, struct pt_regs *regs)
p->ainsn.api.handler((u32)p->opcode, (long)p->addr, regs);
/* single step simulated, now go for post processing */
- post_kprobe_handler(kcb, regs);
+ post_kprobe_handler(p, kcb, regs);
}
int __kprobes arch_prepare_kprobe(struct kprobe *p)
@@ -120,27 +131,26 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p)
void *alloc_insn_page(void)
{
- void *page;
-
- page = vmalloc_exec(PAGE_SIZE);
- if (page) {
- set_memory_ro((unsigned long)page, 1);
- set_vm_flush_reset_perms(page);
- }
-
- return page;
+ return __vmalloc_node_range(PAGE_SIZE, 1, VMALLOC_START, VMALLOC_END,
+ GFP_KERNEL, PAGE_KERNEL_ROX, VM_FLUSH_RESET_PERMS,
+ NUMA_NO_NODE, __builtin_return_address(0));
}
/* arm kprobe: install breakpoint in text */
void __kprobes arch_arm_kprobe(struct kprobe *p)
{
- patch_text(p->addr, BRK64_OPCODE_KPROBES);
+ void *addr = p->addr;
+ u32 insn = BRK64_OPCODE_KPROBES;
+
+ aarch64_insn_patch_text(&addr, &insn, 1);
}
/* disarm kprobe: remove breakpoint from text */
void __kprobes arch_disarm_kprobe(struct kprobe *p)
{
- patch_text(p->addr, p->opcode);
+ void *addr = p->addr;
+
+ aarch64_insn_patch_text(&addr, &p->opcode, 1);
}
void __kprobes arch_remove_kprobe(struct kprobe *p)
@@ -169,20 +179,15 @@ static void __kprobes set_current_kprobe(struct kprobe *p)
}
/*
- * Interrupts need to be disabled before single-step mode is set, and not
- * reenabled until after single-step mode ends.
- * Without disabling interrupt on local CPU, there is a chance of
- * interrupt occurrence in the period of exception return and start of
- * out-of-line single-step, that result in wrongly single stepping
- * into the interrupt handler.
+ * Mask all of DAIF while executing the instruction out-of-line, to keep things
+ * simple and avoid nesting exceptions. Interrupts do have to be disabled since
+ * the kprobe state is per-CPU and doesn't get migrated.
*/
static void __kprobes kprobes_save_local_irqflag(struct kprobe_ctlblk *kcb,
struct pt_regs *regs)
{
kcb->saved_irqflag = regs->pstate & DAIF_MASK;
- regs->pstate |= PSR_I_BIT;
- /* Unmask PSTATE.D for enabling software step exceptions. */
- regs->pstate &= ~PSR_D_BIT;
+ regs->pstate |= DAIF_MASK;
}
static void __kprobes kprobes_restore_local_irqflag(struct kprobe_ctlblk *kcb,
@@ -192,19 +197,6 @@ static void __kprobes kprobes_restore_local_irqflag(struct kprobe_ctlblk *kcb,
regs->pstate |= kcb->saved_irqflag;
}
-static void __kprobes
-set_ss_context(struct kprobe_ctlblk *kcb, unsigned long addr)
-{
- kcb->ss_ctx.ss_pending = true;
- kcb->ss_ctx.match_addr = addr + sizeof(kprobe_opcode_t);
-}
-
-static void __kprobes clear_ss_context(struct kprobe_ctlblk *kcb)
-{
- kcb->ss_ctx.ss_pending = false;
- kcb->ss_ctx.match_addr = 0;
-}
-
static void __kprobes setup_singlestep(struct kprobe *p,
struct pt_regs *regs,
struct kprobe_ctlblk *kcb, int reenter)
@@ -224,11 +216,7 @@ static void __kprobes setup_singlestep(struct kprobe *p,
/* prepare for single stepping */
slot = (unsigned long)p->ainsn.api.insn;
- set_ss_context(kcb, slot); /* mark pending ss */
-
- /* IRQs and single stepping do not mix well. */
kprobes_save_local_irqflag(kcb, regs);
- kernel_enable_single_step(regs);
instruction_pointer_set(regs, slot);
} else {
/* insn simulation */
@@ -248,7 +236,7 @@ static int __kprobes reenter_kprobe(struct kprobe *p,
break;
case KPROBE_HIT_SS:
case KPROBE_REENTER:
- pr_warn("Unrecoverable kprobe detected.\n");
+ pr_warn("Failed to recover from reentered kprobes.\n");
dump_kprobe(p);
BUG();
break;
@@ -261,13 +249,8 @@ static int __kprobes reenter_kprobe(struct kprobe *p,
}
static void __kprobes
-post_kprobe_handler(struct kprobe_ctlblk *kcb, struct pt_regs *regs)
+post_kprobe_handler(struct kprobe *cur, struct kprobe_ctlblk *kcb, struct pt_regs *regs)
{
- struct kprobe *cur = kprobe_running();
-
- if (!cur)
- return;
-
/* return addr restore if non-branching insn */
if (cur->ainsn.api.restore != 0)
instruction_pointer_set(regs, cur->ainsn.api.restore);
@@ -279,12 +262,8 @@ post_kprobe_handler(struct kprobe_ctlblk *kcb, struct pt_regs *regs)
}
/* call post handler */
kcb->kprobe_status = KPROBE_HIT_SSDONE;
- if (cur->post_handler) {
- /* post_handler can hit breakpoint and single step
- * again, so we enable D-flag for recursive exception.
- */
+ if (cur->post_handler)
cur->post_handler(cur, regs, 0);
- }
reset_current_kprobe();
}
@@ -305,47 +284,22 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, unsigned int fsr)
* normal page fault.
*/
instruction_pointer_set(regs, (unsigned long) cur->addr);
- if (!instruction_pointer(regs))
- BUG();
+ BUG_ON(!instruction_pointer(regs));
- kernel_disable_single_step();
-
- if (kcb->kprobe_status == KPROBE_REENTER)
+ if (kcb->kprobe_status == KPROBE_REENTER) {
restore_previous_kprobe(kcb);
- else
+ } else {
+ kprobes_restore_local_irqflag(kcb, regs);
reset_current_kprobe();
+ }
break;
- case KPROBE_HIT_ACTIVE:
- case KPROBE_HIT_SSDONE:
- /*
- * We increment the nmissed count for accounting,
- * we can also use npre/npostfault count for accounting
- * these specific fault cases.
- */
- kprobes_inc_nmissed_count(cur);
-
- /*
- * We come here because instructions in the pre/post
- * handler caused the page_fault, this could happen
- * if handler tries to access user space by
- * copy_from_user(), get_user() etc. Let the
- * user-specified handler try to fix it first.
- */
- if (cur->fault_handler && cur->fault_handler(cur, regs, fsr))
- return 1;
-
- /*
- * In case the user-specified fault handler returned
- * zero, try to fix up.
- */
- if (fixup_exception(regs))
- return 1;
}
return 0;
}
-static void __kprobes kprobe_handler(struct pt_regs *regs)
+static int __kprobes
+kprobe_breakpoint_handler(struct pt_regs *regs, unsigned long esr)
{
struct kprobe *p, *cur_kprobe;
struct kprobe_ctlblk *kcb;
@@ -355,88 +309,66 @@ static void __kprobes kprobe_handler(struct pt_regs *regs)
cur_kprobe = kprobe_running();
p = get_kprobe((kprobe_opcode_t *) addr);
-
- if (p) {
- if (cur_kprobe) {
- if (reenter_kprobe(p, regs, kcb))
- return;
- } else {
- /* Probe hit */
- set_current_kprobe(p);
- kcb->kprobe_status = KPROBE_HIT_ACTIVE;
-
- /*
- * If we have no pre-handler or it returned 0, we
- * continue with normal processing. If we have a
- * pre-handler and it returned non-zero, it will
- * modify the execution path and no need to single
- * stepping. Let's just reset current kprobe and exit.
- *
- * pre_handler can hit a breakpoint and can step thru
- * before return, keep PSTATE D-flag enabled until
- * pre_handler return back.
- */
- if (!p->pre_handler || !p->pre_handler(p, regs)) {
- setup_singlestep(p, regs, kcb, 0);
- } else
- reset_current_kprobe();
- }
+ if (WARN_ON_ONCE(!p)) {
+ /*
+ * Something went wrong. This BRK used an immediate reserved
+ * for kprobes, but we couldn't find any corresponding probe.
+ */
+ return DBG_HOOK_ERROR;
}
- /*
- * The breakpoint instruction was removed right
- * after we hit it. Another cpu has removed
- * either a probepoint or a debugger breakpoint
- * at this address. In either case, no further
- * handling of this interrupt is appropriate.
- * Return back to original instruction, and continue.
- */
-}
-static int __kprobes
-kprobe_ss_hit(struct kprobe_ctlblk *kcb, unsigned long addr)
-{
- if ((kcb->ss_ctx.ss_pending)
- && (kcb->ss_ctx.match_addr == addr)) {
- clear_ss_context(kcb); /* clear pending ss */
- return DBG_HOOK_HANDLED;
+ if (cur_kprobe) {
+ /* Hit a kprobe inside another kprobe */
+ if (!reenter_kprobe(p, regs, kcb))
+ return DBG_HOOK_ERROR;
+ } else {
+ /* Probe hit */
+ set_current_kprobe(p);
+ kcb->kprobe_status = KPROBE_HIT_ACTIVE;
+
+ /*
+ * If we have no pre-handler or it returned 0, we
+ * continue with normal processing. If we have a
+ * pre-handler and it returned non-zero, it will
+ * modify the execution path and not need to single-step
+ * Let's just reset current kprobe and exit.
+ */
+ if (!p->pre_handler || !p->pre_handler(p, regs))
+ setup_singlestep(p, regs, kcb, 0);
+ else
+ reset_current_kprobe();
}
- /* not ours, kprobes should ignore it */
- return DBG_HOOK_ERROR;
+
+ return DBG_HOOK_HANDLED;
}
+static struct break_hook kprobes_break_hook = {
+ .imm = KPROBES_BRK_IMM,
+ .fn = kprobe_breakpoint_handler,
+};
+
static int __kprobes
-kprobe_single_step_handler(struct pt_regs *regs, unsigned int esr)
+kprobe_breakpoint_ss_handler(struct pt_regs *regs, unsigned long esr)
{
struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
- int retval;
-
- /* return error if this is not our step */
- retval = kprobe_ss_hit(kcb, instruction_pointer(regs));
+ unsigned long addr = instruction_pointer(regs);
+ struct kprobe *cur = kprobe_running();
- if (retval == DBG_HOOK_HANDLED) {
+ if (cur && (kcb->kprobe_status & (KPROBE_HIT_SS | KPROBE_REENTER)) &&
+ ((unsigned long)&cur->ainsn.api.insn[1] == addr)) {
kprobes_restore_local_irqflag(kcb, regs);
- kernel_disable_single_step();
+ post_kprobe_handler(cur, kcb, regs);
- post_kprobe_handler(kcb, regs);
+ return DBG_HOOK_HANDLED;
}
- return retval;
-}
-
-static struct step_hook kprobes_step_hook = {
- .fn = kprobe_single_step_handler,
-};
-
-static int __kprobes
-kprobe_breakpoint_handler(struct pt_regs *regs, unsigned int esr)
-{
- kprobe_handler(regs);
- return DBG_HOOK_HANDLED;
+ /* not ours, kprobes should ignore it */
+ return DBG_HOOK_ERROR;
}
-static struct break_hook kprobes_break_hook = {
- .imm = KPROBES_BRK_IMM,
- .fn = kprobe_breakpoint_handler,
+static struct break_hook kprobes_break_ss_hook = {
+ .imm = KPROBES_BRK_SS_IMM,
+ .fn = kprobe_breakpoint_ss_handler,
};
/*
@@ -455,10 +387,6 @@ int __init arch_populate_kprobe_blacklist(void)
(unsigned long)__irqentry_text_end);
if (ret)
return ret;
- ret = kprobe_add_area_blacklist((unsigned long)__idmap_text_start,
- (unsigned long)__idmap_text_end);
- if (ret)
- return ret;
ret = kprobe_add_area_blacklist((unsigned long)__hyp_text_start,
(unsigned long)__hyp_text_end);
if (ret || is_kernel_in_hyp_mode())
@@ -470,90 +398,17 @@ int __init arch_populate_kprobe_blacklist(void)
void __kprobes __used *trampoline_probe_handler(struct pt_regs *regs)
{
- struct kretprobe_instance *ri = NULL;
- struct hlist_head *head, empty_rp;
- struct hlist_node *tmp;
- unsigned long flags, orig_ret_address = 0;
- unsigned long trampoline_address =
- (unsigned long)&kretprobe_trampoline;
- kprobe_opcode_t *correct_ret_addr = NULL;
-
- INIT_HLIST_HEAD(&empty_rp);
- kretprobe_hash_lock(current, &head, &flags);
-
- /*
- * It is possible to have multiple instances associated with a given
- * task either because multiple functions in the call path have
- * return probes installed on them, and/or more than one
- * return probe was registered for a target function.
- *
- * We can handle this because:
- * - instances are always pushed into the head of the list
- * - when multiple return probes are registered for the same
- * function, the (chronologically) first instance's ret_addr
- * will be the real return address, and all the rest will
- * point to kretprobe_trampoline.
- */
- hlist_for_each_entry_safe(ri, tmp, head, hlist) {
- if (ri->task != current)
- /* another task is sharing our hash bucket */
- continue;
-
- orig_ret_address = (unsigned long)ri->ret_addr;
-
- if (orig_ret_address != trampoline_address)
- /*
- * This is the real return address. Any other
- * instances associated with this task are for
- * other calls deeper on the call stack
- */
- break;
- }
-
- kretprobe_assert(ri, orig_ret_address, trampoline_address);
-
- correct_ret_addr = ri->ret_addr;
- hlist_for_each_entry_safe(ri, tmp, head, hlist) {
- if (ri->task != current)
- /* another task is sharing our hash bucket */
- continue;
-
- orig_ret_address = (unsigned long)ri->ret_addr;
- if (ri->rp && ri->rp->handler) {
- __this_cpu_write(current_kprobe, &ri->rp->kp);
- get_kprobe_ctlblk()->kprobe_status = KPROBE_HIT_ACTIVE;
- ri->ret_addr = correct_ret_addr;
- ri->rp->handler(ri, regs);
- __this_cpu_write(current_kprobe, NULL);
- }
-
- recycle_rp_inst(ri, &empty_rp);
-
- if (orig_ret_address != trampoline_address)
- /*
- * This is the real return address. Any other
- * instances associated with this task are for
- * other calls deeper on the call stack
- */
- break;
- }
-
- kretprobe_hash_unlock(current, &flags);
-
- hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) {
- hlist_del(&ri->hlist);
- kfree(ri);
- }
- return (void *)orig_ret_address;
+ return (void *)kretprobe_trampoline_handler(regs, (void *)regs->regs[29]);
}
void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
struct pt_regs *regs)
{
ri->ret_addr = (kprobe_opcode_t *)regs->regs[30];
+ ri->fp = (void *)regs->regs[29];
/* replace return addr (x30) with trampoline */
- regs->regs[30] = (long)&kretprobe_trampoline;
+ regs->regs[30] = (long)&__kretprobe_trampoline;
}
int __kprobes arch_trampoline_kprobe(struct kprobe *p)
@@ -564,7 +419,7 @@ int __kprobes arch_trampoline_kprobe(struct kprobe *p)
int __init arch_init_kprobes(void)
{
register_kernel_break_hook(&kprobes_break_hook);
- register_kernel_step_hook(&kprobes_step_hook);
+ register_kernel_break_hook(&kprobes_break_ss_hook);
return 0;
}
diff --git a/arch/arm64/kernel/probes/kprobes_trampoline.S b/arch/arm64/kernel/probes/kprobes_trampoline.S
index 45dce03aaeaf..9a6499bed58b 100644
--- a/arch/arm64/kernel/probes/kprobes_trampoline.S
+++ b/arch/arm64/kernel/probes/kprobes_trampoline.S
@@ -25,7 +25,7 @@
stp x24, x25, [sp, #S_X24]
stp x26, x27, [sp, #S_X26]
stp x28, x29, [sp, #S_X28]
- add x0, sp, #S_FRAME_SIZE
+ add x0, sp, #PT_REGS_SIZE
stp lr, x0, [sp, #S_LR]
/*
* Construct a useful saved PSTATE
@@ -61,11 +61,14 @@
ldp x28, x29, [sp, #S_X28]
.endm
-ENTRY(kretprobe_trampoline)
- sub sp, sp, #S_FRAME_SIZE
+SYM_CODE_START(__kretprobe_trampoline)
+ sub sp, sp, #PT_REGS_SIZE
save_all_base_regs
+ /* Setup a frame pointer. */
+ add x29, sp, #S_FP
+
mov x0, sp
bl trampoline_probe_handler
/*
@@ -74,9 +77,10 @@ ENTRY(kretprobe_trampoline)
*/
mov lr, x0
+ /* The frame pointer (x29) is restored with other registers. */
restore_all_base_regs
- add sp, sp, #S_FRAME_SIZE
+ add sp, sp, #PT_REGS_SIZE
ret
-ENDPROC(kretprobe_trampoline)
+SYM_CODE_END(__kretprobe_trampoline)
diff --git a/arch/arm64/kernel/probes/simulate-insn.c b/arch/arm64/kernel/probes/simulate-insn.c
index 25f67ec59635..22d0b3252476 100644
--- a/arch/arm64/kernel/probes/simulate-insn.c
+++ b/arch/arm64/kernel/probes/simulate-insn.c
@@ -10,6 +10,7 @@
#include <linux/kprobes.h>
#include <asm/ptrace.h>
+#include <asm/traps.h>
#include "simulate-insn.h"
diff --git a/arch/arm64/kernel/probes/uprobes.c b/arch/arm64/kernel/probes/uprobes.c
index a412d8edbcd2..d49aef2657cd 100644
--- a/arch/arm64/kernel/probes/uprobes.c
+++ b/arch/arm64/kernel/probes/uprobes.c
@@ -21,7 +21,7 @@ void arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr,
memcpy(dst, src, len);
/* flush caches (dcache/icache) */
- sync_icache_aliases(dst, len);
+ sync_icache_aliases((unsigned long)dst, (unsigned long)dst + len);
kunmap_atomic(xol_page_kaddr);
}
@@ -38,7 +38,7 @@ int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm,
/* TODO: Currently we do not support AARCH32 instruction probing */
if (mm->context.flags & MMCF_AARCH32)
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
else if (!IS_ALIGNED(addr, AARCH64_INSN_SIZE))
return -EINVAL;
@@ -166,7 +166,7 @@ int arch_uprobe_exception_notify(struct notifier_block *self,
}
static int uprobe_breakpoint_handler(struct pt_regs *regs,
- unsigned int esr)
+ unsigned long esr)
{
if (uprobe_pre_sstep_notifier(regs))
return DBG_HOOK_HANDLED;
@@ -175,7 +175,7 @@ static int uprobe_breakpoint_handler(struct pt_regs *regs,
}
static int uprobe_single_step_handler(struct pt_regs *regs,
- unsigned int esr)
+ unsigned long esr)
{
struct uprobe_task *utask = current->utask;
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 71f788cd2b18..7387b68c745b 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -6,19 +6,18 @@
* Copyright (C) 1996-2000 Russell King - Converted to ARM.
* Copyright (C) 2012 ARM Ltd.
*/
-
-#include <stdarg.h>
-
#include <linux/compat.h>
#include <linux/efi.h>
+#include <linux/elf.h>
#include <linux/export.h>
#include <linux/sched.h>
#include <linux/sched/debug.h>
#include <linux/sched/task.h>
#include <linux/sched/task_stack.h>
#include <linux/kernel.h>
-#include <linux/lockdep.h>
+#include <linux/mman.h>
#include <linux/mm.h>
+#include <linux/nospec.h>
#include <linux/stddef.h>
#include <linux/sysctl.h>
#include <linux/unistd.h>
@@ -41,22 +40,25 @@
#include <linux/percpu.h>
#include <linux/thread_info.h>
#include <linux/prctl.h>
+#include <linux/stacktrace.h>
#include <asm/alternative.h>
-#include <asm/arch_gicv3.h>
#include <asm/compat.h>
#include <asm/cpufeature.h>
#include <asm/cacheflush.h>
#include <asm/exec.h>
#include <asm/fpsimd.h>
#include <asm/mmu_context.h>
+#include <asm/mte.h>
#include <asm/processor.h>
#include <asm/pointer_auth.h>
#include <asm/stacktrace.h>
+#include <asm/switch_to.h>
+#include <asm/system_misc.h>
#if defined(CONFIG_STACKPROTECTOR) && !defined(CONFIG_STACKPROTECTOR_PER_TASK)
#include <linux/stackprotector.h>
-unsigned long __stack_chk_guard __read_mostly;
+unsigned long __stack_chk_guard __ro_after_init;
EXPORT_SYMBOL(__stack_chk_guard);
#endif
@@ -66,69 +68,8 @@ EXPORT_SYMBOL(__stack_chk_guard);
void (*pm_power_off)(void);
EXPORT_SYMBOL_GPL(pm_power_off);
-void (*arm_pm_restart)(enum reboot_mode reboot_mode, const char *cmd);
-
-static void __cpu_do_idle(void)
-{
- dsb(sy);
- wfi();
-}
-
-static void __cpu_do_idle_irqprio(void)
-{
- unsigned long pmr;
- unsigned long daif_bits;
-
- daif_bits = read_sysreg(daif);
- write_sysreg(daif_bits | PSR_I_BIT, daif);
-
- /*
- * Unmask PMR before going idle to make sure interrupts can
- * be raised.
- */
- pmr = gic_read_pmr();
- gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET);
-
- __cpu_do_idle();
-
- gic_write_pmr(pmr);
- write_sysreg(daif_bits, daif);
-}
-
-/*
- * cpu_do_idle()
- *
- * Idle the processor (wait for interrupt).
- *
- * If the CPU supports priority masking we must do additional work to
- * ensure that interrupts are not masked at the PMR (because the core will
- * not wake up if we block the wake up signal in the interrupt controller).
- */
-void cpu_do_idle(void)
-{
- if (system_uses_irq_prio_masking())
- __cpu_do_idle_irqprio();
- else
- __cpu_do_idle();
-}
-
-/*
- * This is our default idle handler.
- */
-void arch_cpu_idle(void)
-{
- /*
- * This should do all the clock switching and wait for interrupt
- * tricks
- */
- trace_cpu_idle_rcuidle(1, smp_processor_id());
- cpu_do_idle();
- local_irq_enable();
- trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id());
-}
-
#ifdef CONFIG_HOTPLUG_CPU
-void arch_cpu_idle_dead(void)
+void __noreturn arch_cpu_idle_dead(void)
{
cpu_die();
}
@@ -141,11 +82,11 @@ void arch_cpu_idle_dead(void)
* to execute e.g. a RAM-based pin loop is not sufficient. This allows the
* kexec'd kernel to use any and all RAM as it sees fit, without having to
* avoid any code or data used by any SW CPU pin loop. The CPU hotplug
- * functionality embodied in disable_nonboot_cpus() to achieve this.
+ * functionality embodied in smpt_shutdown_nonboot_cpus() to achieve this.
*/
void machine_shutdown(void)
{
- disable_nonboot_cpus();
+ smp_shutdown_nonboot_cpus(reboot_cpu);
}
/*
@@ -170,8 +111,7 @@ void machine_power_off(void)
{
local_irq_disable();
smp_send_stop();
- if (pm_power_off)
- pm_power_off();
+ do_kernel_power_off();
}
/*
@@ -197,10 +137,7 @@ void machine_restart(char *cmd)
efi_reboot(reboot_mode, NULL);
/* Now call the architecture specific reboot code. */
- if (arm_pm_restart)
- arm_pm_restart(reboot_mode, cmd);
- else
- do_kernel_restart(cmd);
+ do_kernel_restart(cmd);
/*
* Whoops - the architecture was unable to reboot.
@@ -209,12 +146,21 @@ void machine_restart(char *cmd)
while (1);
}
+#define bstr(suffix, str) [PSR_BTYPE_ ## suffix >> PSR_BTYPE_SHIFT] = str
+static const char *const btypes[] = {
+ bstr(NONE, "--"),
+ bstr( JC, "jc"),
+ bstr( C, "-c"),
+ bstr( J , "j-")
+};
+#undef bstr
+
static void print_pstate(struct pt_regs *regs)
{
u64 pstate = regs->pstate;
if (compat_user_mode(regs)) {
- printk("pstate: %08llx (%c%c%c%c %c %s %s %c%c%c)\n",
+ printk("pstate: %08llx (%c%c%c%c %c %s %s %c%c%c %cDIT %cSSBS)\n",
pstate,
pstate & PSR_AA32_N_BIT ? 'N' : 'n',
pstate & PSR_AA32_Z_BIT ? 'Z' : 'z',
@@ -225,9 +171,14 @@ static void print_pstate(struct pt_regs *regs)
pstate & PSR_AA32_E_BIT ? "BE" : "LE",
pstate & PSR_AA32_A_BIT ? 'A' : 'a',
pstate & PSR_AA32_I_BIT ? 'I' : 'i',
- pstate & PSR_AA32_F_BIT ? 'F' : 'f');
+ pstate & PSR_AA32_F_BIT ? 'F' : 'f',
+ pstate & PSR_AA32_DIT_BIT ? '+' : '-',
+ pstate & PSR_AA32_SSBS_BIT ? '+' : '-');
} else {
- printk("pstate: %08llx (%c%c%c%c %c%c%c%c %cPAN %cUAO)\n",
+ const char *btype_str = btypes[(pstate & PSR_BTYPE_MASK) >>
+ PSR_BTYPE_SHIFT];
+
+ printk("pstate: %08llx (%c%c%c%c %c%c%c%c %cPAN %cUAO %cTCO %cDIT %cSSBS BTYPE=%s)\n",
pstate,
pstate & PSR_N_BIT ? 'N' : 'n',
pstate & PSR_Z_BIT ? 'Z' : 'z',
@@ -238,7 +189,11 @@ static void print_pstate(struct pt_regs *regs)
pstate & PSR_I_BIT ? 'I' : 'i',
pstate & PSR_F_BIT ? 'F' : 'f',
pstate & PSR_PAN_BIT ? '+' : '-',
- pstate & PSR_UAO_BIT ? '+' : '-');
+ pstate & PSR_UAO_BIT ? '+' : '-',
+ pstate & PSR_TCO_BIT ? '+' : '-',
+ pstate & PSR_DIT_BIT ? '+' : '-',
+ pstate & PSR_SSBS_BIT ? '+' : '-',
+ btype_str);
}
}
@@ -262,7 +217,7 @@ void __show_regs(struct pt_regs *regs)
if (!user_mode(regs)) {
printk("pc : %pS\n", (void *)regs->pc);
- printk("lr : %pS\n", (void *)lr);
+ printk("lr : %pS\n", (void *)ptrauth_strip_kernel_insn_pac(lr));
} else {
printk("pc : %016llx\n", regs->pc);
printk("lr : %016llx\n", lr);
@@ -276,27 +231,26 @@ void __show_regs(struct pt_regs *regs)
i = top_reg;
while (i >= 0) {
- printk("x%-2d: %016llx ", i, regs->regs[i]);
- i--;
+ printk("x%-2d: %016llx", i, regs->regs[i]);
- if (i % 2 == 0) {
- pr_cont("x%-2d: %016llx ", i, regs->regs[i]);
- i--;
- }
+ while (i-- % 3)
+ pr_cont(" x%-2d: %016llx", i, regs->regs[i]);
pr_cont("\n");
}
}
-void show_regs(struct pt_regs * regs)
+void show_regs(struct pt_regs *regs)
{
__show_regs(regs);
- dump_backtrace(regs, NULL);
+ dump_backtrace(regs, NULL, KERN_DEFAULT);
}
static void tls_thread_flush(void)
{
write_sysreg(0, tpidr_el0);
+ if (system_supports_tpidr2())
+ write_sysreg_s(0, SYS_TPIDR2_EL0);
if (is_compat_task()) {
current->thread.uw.tp_value = 0;
@@ -325,10 +279,6 @@ void flush_thread(void)
flush_tagged_addr_state();
}
-void release_thread(struct task_struct *dead_task)
-{
-}
-
void arch_release_task_struct(struct task_struct *tsk)
{
fpsimd_release_task(tsk);
@@ -345,24 +295,58 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
/*
* Detach src's sve_state (if any) from dst so that it does not
- * get erroneously used or freed prematurely. dst's sve_state
+ * get erroneously used or freed prematurely. dst's copies
* will be allocated on demand later on if dst uses SVE.
* For consistency, also clear TIF_SVE here: this could be done
* later in copy_process(), but to avoid tripping up future
- * maintainers it is best not to leave TIF_SVE and sve_state in
+ * maintainers it is best not to leave TIF flags and buffers in
* an inconsistent state, even temporarily.
*/
dst->thread.sve_state = NULL;
clear_tsk_thread_flag(dst, TIF_SVE);
+ /*
+ * In the unlikely event that we create a new thread with ZA
+ * enabled we should retain the ZA and ZT state so duplicate
+ * it here. This may be shortly freed if we exec() or if
+ * CLONE_SETTLS but it's simpler to do it here. To avoid
+ * confusing the rest of the code ensure that we have a
+ * sve_state allocated whenever sme_state is allocated.
+ */
+ if (thread_za_enabled(&src->thread)) {
+ dst->thread.sve_state = kzalloc(sve_state_size(src),
+ GFP_KERNEL);
+ if (!dst->thread.sve_state)
+ return -ENOMEM;
+
+ dst->thread.sme_state = kmemdup(src->thread.sme_state,
+ sme_state_size(src),
+ GFP_KERNEL);
+ if (!dst->thread.sme_state) {
+ kfree(dst->thread.sve_state);
+ dst->thread.sve_state = NULL;
+ return -ENOMEM;
+ }
+ } else {
+ dst->thread.sme_state = NULL;
+ clear_tsk_thread_flag(dst, TIF_SME);
+ }
+
+ dst->thread.fp_type = FP_STATE_FPSIMD;
+
+ /* clear any pending asynchronous tag fault raised by the parent */
+ clear_tsk_thread_flag(dst, TIF_MTE_ASYNC_FAULT);
+
return 0;
}
asmlinkage void ret_from_fork(void) asm("ret_from_fork");
-int copy_thread(unsigned long clone_flags, unsigned long stack_start,
- unsigned long stk_sz, struct task_struct *p)
+int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
{
+ unsigned long clone_flags = args->flags;
+ unsigned long stack_start = args->stack;
+ unsigned long tls = args->tls;
struct pt_regs *childregs = task_pt_regs(p);
memset(&p->thread.cpu_context, 0, sizeof(struct cpu_context));
@@ -376,7 +360,9 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start,
*/
fpsimd_flush_task_state(p);
- if (likely(!(p->flags & PF_KTHREAD))) {
+ ptrauth_thread_init_kernel(p);
+
+ if (likely(!args->fn)) {
*childregs = *current_pt_regs();
childregs->regs[0] = 0;
@@ -385,6 +371,8 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start,
* out-of-sync with the saved value.
*/
*task_user_tls(p) = read_sysreg(tpidr_el0);
+ if (system_supports_tpidr2())
+ p->thread.tpidr2_el0 = read_sysreg_s(SYS_TPIDR2_EL0);
if (stack_start) {
if (is_compat_thread(task_thread_info(p)))
@@ -394,29 +382,34 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start,
}
/*
- * If a TLS pointer was passed to clone (4th argument), use it
- * for the new thread.
+ * If a TLS pointer was passed to clone, use it for the new
+ * thread. We also reset TPIDR2 if it's in use.
*/
- if (clone_flags & CLONE_SETTLS)
- p->thread.uw.tp_value = childregs->regs[3];
+ if (clone_flags & CLONE_SETTLS) {
+ p->thread.uw.tp_value = tls;
+ p->thread.tpidr2_el0 = 0;
+ }
} else {
+ /*
+ * A kthread has no context to ERET to, so ensure any buggy
+ * ERET is treated as an illegal exception return.
+ *
+ * When a user task is created from a kthread, childregs will
+ * be initialized by start_thread() or start_compat_thread().
+ */
memset(childregs, 0, sizeof(struct pt_regs));
- childregs->pstate = PSR_MODE_EL1h;
- if (IS_ENABLED(CONFIG_ARM64_UAO) &&
- cpus_have_const_cap(ARM64_HAS_UAO))
- childregs->pstate |= PSR_UAO_BIT;
-
- if (arm64_get_ssbd_state() == ARM64_SSBD_FORCE_DISABLE)
- set_ssbs_bit(childregs);
+ childregs->pstate = PSR_MODE_EL1h | PSR_IL_BIT;
- if (system_uses_irq_prio_masking())
- childregs->pmr_save = GIC_PRIO_IRQON;
-
- p->thread.cpu_context.x19 = stack_start;
- p->thread.cpu_context.x20 = stk_sz;
+ p->thread.cpu_context.x19 = (unsigned long)args->fn;
+ p->thread.cpu_context.x20 = (unsigned long)args->fn_arg;
}
p->thread.cpu_context.pc = (unsigned long)ret_from_fork;
p->thread.cpu_context.sp = (unsigned long)childregs;
+ /*
+ * For the benefit of the unwinder, set up childregs->stackframe
+ * as the final frame for the new task.
+ */
+ p->thread.cpu_context.fp = (unsigned long)childregs->stackframe;
ptrace_hw_copy_thread(p);
@@ -426,6 +419,8 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start,
void tls_preserve_current_state(void)
{
*task_user_tls(current) = read_sysreg(tpidr_el0);
+ if (system_supports_tpidr2() && !is_compat_task())
+ current->thread.tpidr2_el0 = read_sysreg_s(SYS_TPIDR2_EL0);
}
static void tls_thread_switch(struct task_struct *next)
@@ -438,17 +433,8 @@ static void tls_thread_switch(struct task_struct *next)
write_sysreg(0, tpidrro_el0);
write_sysreg(*task_user_tls(next), tpidr_el0);
-}
-
-/* Restore the UAO state depending on next's addr_limit */
-void uao_thread_switch(struct task_struct *next)
-{
- if (IS_ENABLED(CONFIG_ARM64_UAO)) {
- if (task_thread_info(next)->addr_limit == KERNEL_DS)
- asm(ALTERNATIVE("nop", SET_PSTATE_UAO(1), ARM64_HAS_UAO));
- else
- asm(ALTERNATIVE("nop", SET_PSTATE_UAO(0), ARM64_HAS_UAO));
- }
+ if (system_supports_tpidr2())
+ write_sysreg_s(next->thread.tpidr2_el0, SYS_TPIDR2_EL0);
}
/*
@@ -457,8 +443,6 @@ void uao_thread_switch(struct task_struct *next)
*/
static void ssbs_thread_switch(struct task_struct *next)
{
- struct pt_regs *regs = task_pt_regs(next);
-
/*
* Nothing to do for kernel threads, but 'regs' may be junk
* (e.g. idle task) so check the flags and bail early.
@@ -466,15 +450,14 @@ static void ssbs_thread_switch(struct task_struct *next)
if (unlikely(next->flags & PF_KTHREAD))
return;
- /* If the mitigation is enabled, then we leave SSBS clear. */
- if ((arm64_get_ssbd_state() == ARM64_SSBD_FORCE_ENABLE) ||
- test_tsk_thread_flag(next, TIF_SSBD))
+ /*
+ * If all CPUs implement the SSBS extension, then we just need to
+ * context-switch the PSTATE field.
+ */
+ if (alternative_has_cap_unlikely(ARM64_SSBS))
return;
- if (compat_user_mode(regs))
- set_compat_ssbs_bit(regs);
- else if (user_mode(regs))
- set_ssbs_bit(regs);
+ spectre_v4_enable_task_mitigation(next);
}
/*
@@ -492,9 +475,52 @@ static void entry_task_switch(struct task_struct *next)
}
/*
+ * ARM erratum 1418040 handling, affecting the 32bit view of CNTVCT.
+ * Ensure access is disabled when switching to a 32bit task, ensure
+ * access is enabled when switching to a 64bit task.
+ */
+static void erratum_1418040_thread_switch(struct task_struct *next)
+{
+ if (!IS_ENABLED(CONFIG_ARM64_ERRATUM_1418040) ||
+ !this_cpu_has_cap(ARM64_WORKAROUND_1418040))
+ return;
+
+ if (is_compat_thread(task_thread_info(next)))
+ sysreg_clear_set(cntkctl_el1, ARCH_TIMER_USR_VCT_ACCESS_EN, 0);
+ else
+ sysreg_clear_set(cntkctl_el1, 0, ARCH_TIMER_USR_VCT_ACCESS_EN);
+}
+
+static void erratum_1418040_new_exec(void)
+{
+ preempt_disable();
+ erratum_1418040_thread_switch(current);
+ preempt_enable();
+}
+
+/*
+ * __switch_to() checks current->thread.sctlr_user as an optimisation. Therefore
+ * this function must be called with preemption disabled and the update to
+ * sctlr_user must be made in the same preemption disabled block so that
+ * __switch_to() does not see the variable update before the SCTLR_EL1 one.
+ */
+void update_sctlr_el1(u64 sctlr)
+{
+ /*
+ * EnIA must not be cleared while in the kernel as this is necessary for
+ * in-kernel PAC. It will be cleared on kernel exit if needed.
+ */
+ sysreg_clear_set(sctlr_el1, SCTLR_USER_MASK & ~SCTLR_ELx_ENIA, sctlr);
+
+ /* ISB required for the kernel uaccess routines when setting TCF0. */
+ isb();
+}
+
+/*
* Thread switching.
*/
-__notrace_funcgraph struct task_struct *__switch_to(struct task_struct *prev,
+__notrace_funcgraph __sched
+struct task_struct *__switch_to(struct task_struct *prev,
struct task_struct *next)
{
struct task_struct *last;
@@ -504,9 +530,9 @@ __notrace_funcgraph struct task_struct *__switch_to(struct task_struct *prev,
hw_breakpoint_thread_switch(next);
contextidr_thread_switch(next);
entry_task_switch(next);
- uao_thread_switch(next);
- ptrauth_thread_switch(next);
ssbs_thread_switch(next);
+ erratum_1418040_thread_switch(next);
+ ptrauth_thread_switch_user(next);
/*
* Complete any pending TLB or cache maintenance on this CPU in case
@@ -516,55 +542,118 @@ __notrace_funcgraph struct task_struct *__switch_to(struct task_struct *prev,
*/
dsb(ish);
+ /*
+ * MTE thread switching must happen after the DSB above to ensure that
+ * any asynchronous tag check faults have been logged in the TFSR*_EL1
+ * registers.
+ */
+ mte_thread_switch(next);
+ /* avoid expensive SCTLR_EL1 accesses if no change */
+ if (prev->thread.sctlr_user != next->thread.sctlr_user)
+ update_sctlr_el1(next->thread.sctlr_user);
+
/* the actual thread switch */
last = cpu_switch_to(prev, next);
return last;
}
-unsigned long get_wchan(struct task_struct *p)
+struct wchan_info {
+ unsigned long pc;
+ int count;
+};
+
+static bool get_wchan_cb(void *arg, unsigned long pc)
{
- struct stackframe frame;
- unsigned long stack_page, ret = 0;
- int count = 0;
- if (!p || p == current || p->state == TASK_RUNNING)
- return 0;
+ struct wchan_info *wchan_info = arg;
- stack_page = (unsigned long)try_get_task_stack(p);
- if (!stack_page)
- return 0;
+ if (!in_sched_functions(pc)) {
+ wchan_info->pc = pc;
+ return false;
+ }
+ return wchan_info->count++ < 16;
+}
+
+unsigned long __get_wchan(struct task_struct *p)
+{
+ struct wchan_info wchan_info = {
+ .pc = 0,
+ .count = 0,
+ };
- start_backtrace(&frame, thread_saved_fp(p), thread_saved_pc(p));
+ if (!try_get_task_stack(p))
+ return 0;
- do {
- if (unwind_frame(p, &frame))
- goto out;
- if (!in_sched_functions(frame.pc)) {
- ret = frame.pc;
- goto out;
- }
- } while (count ++ < 16);
+ arch_stack_walk(get_wchan_cb, &wchan_info, p, NULL);
-out:
put_task_stack(p);
- return ret;
+
+ return wchan_info.pc;
}
unsigned long arch_align_stack(unsigned long sp)
{
if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
- sp -= get_random_int() & ~PAGE_MASK;
+ sp -= get_random_u32_below(PAGE_SIZE);
return sp & ~0xf;
}
+#ifdef CONFIG_COMPAT
+int compat_elf_check_arch(const struct elf32_hdr *hdr)
+{
+ if (!system_supports_32bit_el0())
+ return false;
+
+ if ((hdr)->e_machine != EM_ARM)
+ return false;
+
+ if (!((hdr)->e_flags & EF_ARM_EABI_MASK))
+ return false;
+
+ /*
+ * Prevent execve() of a 32-bit program from a deadline task
+ * if the restricted affinity mask would be inadmissible on an
+ * asymmetric system.
+ */
+ return !static_branch_unlikely(&arm64_mismatched_32bit_el0) ||
+ !dl_task_check_affinity(current, system_32bit_el0_cpumask());
+}
+#endif
+
/*
* Called from setup_new_exec() after (COMPAT_)SET_PERSONALITY.
*/
void arch_setup_new_exec(void)
{
- current->mm->context.flags = is_compat_task() ? MMCF_AARCH32 : 0;
+ unsigned long mmflags = 0;
+
+ if (is_compat_task()) {
+ mmflags = MMCF_AARCH32;
+
+ /*
+ * Restrict the CPU affinity mask for a 32-bit task so that
+ * it contains only 32-bit-capable CPUs.
+ *
+ * From the perspective of the task, this looks similar to
+ * what would happen if the 64-bit-only CPUs were hot-unplugged
+ * at the point of execve(), although we try a bit harder to
+ * honour the cpuset hierarchy.
+ */
+ if (static_branch_unlikely(&arm64_mismatched_32bit_el0))
+ force_compatible_cpus_allowed_ptr(current);
+ } else if (static_branch_unlikely(&arm64_mismatched_32bit_el0)) {
+ relax_compatible_cpus_allowed_ptr(current);
+ }
- ptrauth_thread_init_user(current);
+ current->mm->context.flags = mmflags;
+ ptrauth_thread_init_user();
+ mte_thread_init_user();
+ erratum_1418040_new_exec();
+
+ if (task_spec_ssb_noexec(current)) {
+ arch_prctl_spec_ctrl_set(current, PR_SPEC_STORE_BYPASS,
+ PR_SPEC_ENABLE);
+ }
}
#ifdef CONFIG_ARM64_TAGGED_ADDR_ABI
@@ -573,11 +662,19 @@ void arch_setup_new_exec(void)
*/
static unsigned int tagged_addr_disabled;
-long set_tagged_addr_ctrl(unsigned long arg)
+long set_tagged_addr_ctrl(struct task_struct *task, unsigned long arg)
{
- if (is_compat_task())
+ unsigned long valid_mask = PR_TAGGED_ADDR_ENABLE;
+ struct thread_info *ti = task_thread_info(task);
+
+ if (is_compat_thread(ti))
return -EINVAL;
- if (arg & ~PR_TAGGED_ADDR_ENABLE)
+
+ if (system_supports_mte())
+ valid_mask |= PR_MTE_TCF_SYNC | PR_MTE_TCF_ASYNC \
+ | PR_MTE_TAG_MASK;
+
+ if (arg & ~valid_mask)
return -EINVAL;
/*
@@ -587,20 +684,28 @@ long set_tagged_addr_ctrl(unsigned long arg)
if (arg & PR_TAGGED_ADDR_ENABLE && tagged_addr_disabled)
return -EINVAL;
- update_thread_flag(TIF_TAGGED_ADDR, arg & PR_TAGGED_ADDR_ENABLE);
+ if (set_mte_ctrl(task, arg) != 0)
+ return -EINVAL;
+
+ update_ti_thread_flag(ti, TIF_TAGGED_ADDR, arg & PR_TAGGED_ADDR_ENABLE);
return 0;
}
-long get_tagged_addr_ctrl(void)
+long get_tagged_addr_ctrl(struct task_struct *task)
{
- if (is_compat_task())
+ long ret = 0;
+ struct thread_info *ti = task_thread_info(task);
+
+ if (is_compat_thread(ti))
return -EINVAL;
- if (test_thread_flag(TIF_TAGGED_ADDR))
- return PR_TAGGED_ADDR_ENABLE;
+ if (test_ti_thread_flag(ti, TIF_TAGGED_ADDR))
+ ret = PR_TAGGED_ADDR_ENABLE;
- return 0;
+ ret |= get_mte_ctrl(task);
+
+ return ret;
}
/*
@@ -608,8 +713,6 @@ long get_tagged_addr_ctrl(void)
* only prevents the tagged address ABI enabling via prctl() and does not
* disable it for tasks that already opted in to the relaxed ABI.
*/
-static int zero;
-static int one = 1;
static struct ctl_table tagged_addr_sysctl_table[] = {
{
@@ -618,10 +721,9 @@ static struct ctl_table tagged_addr_sysctl_table[] = {
.data = &tagged_addr_disabled,
.maxlen = sizeof(int),
.proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
- .extra2 = &one,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
},
- { }
};
static int __init tagged_addr_init(void)
@@ -634,18 +736,24 @@ static int __init tagged_addr_init(void)
core_initcall(tagged_addr_init);
#endif /* CONFIG_ARM64_TAGGED_ADDR_ABI */
-asmlinkage void __sched arm64_preempt_schedule_irq(void)
+#ifdef CONFIG_BINFMT_ELF
+int arch_elf_adjust_prot(int prot, const struct arch_elf_state *state,
+ bool has_interp, bool is_interp)
{
- lockdep_assert_irqs_disabled();
-
/*
- * Preempting a task from an IRQ means we leave copies of PSTATE
- * on the stack. cpufeature's enable calls may modify PSTATE, but
- * resuming one of these preempted tasks would undo those changes.
- *
- * Only allow a task to be preempted once cpufeatures have been
- * enabled.
+ * For dynamically linked executables the interpreter is
+ * responsible for setting PROT_BTI on everything except
+ * itself.
*/
- if (static_branch_likely(&arm64_const_caps_ready))
- preempt_schedule_irq();
+ if (is_interp != has_interp)
+ return prot;
+
+ if (!(state->flags & ARM64_ELF_BTI))
+ return prot;
+
+ if (prot & PROT_EXEC)
+ prot |= PROT_BTI;
+
+ return prot;
}
+#endif
diff --git a/arch/arm64/kernel/proton-pack.c b/arch/arm64/kernel/proton-pack.c
new file mode 100644
index 000000000000..6268a13a1d58
--- /dev/null
+++ b/arch/arm64/kernel/proton-pack.c
@@ -0,0 +1,1156 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Handle detection, reporting and mitigation of Spectre v1, v2, v3a and v4, as
+ * detailed at:
+ *
+ * https://developer.arm.com/support/arm-security-updates/speculative-processor-vulnerability
+ *
+ * This code was originally written hastily under an awful lot of stress and so
+ * aspects of it are somewhat hacky. Unfortunately, changing anything in here
+ * instantly makes me feel ill. Thanks, Jann. Thann.
+ *
+ * Copyright (C) 2018 ARM Ltd, All Rights Reserved.
+ * Copyright (C) 2020 Google LLC
+ *
+ * "If there's something strange in your neighbourhood, who you gonna call?"
+ *
+ * Authors: Will Deacon <will@kernel.org> and Marc Zyngier <maz@kernel.org>
+ */
+
+#include <linux/arm-smccc.h>
+#include <linux/bpf.h>
+#include <linux/cpu.h>
+#include <linux/device.h>
+#include <linux/nospec.h>
+#include <linux/prctl.h>
+#include <linux/sched/task_stack.h>
+
+#include <asm/debug-monitors.h>
+#include <asm/insn.h>
+#include <asm/spectre.h>
+#include <asm/traps.h>
+#include <asm/vectors.h>
+#include <asm/virt.h>
+
+/*
+ * We try to ensure that the mitigation state can never change as the result of
+ * onlining a late CPU.
+ */
+static void update_mitigation_state(enum mitigation_state *oldp,
+ enum mitigation_state new)
+{
+ enum mitigation_state state;
+
+ do {
+ state = READ_ONCE(*oldp);
+ if (new <= state)
+ break;
+
+ /* Userspace almost certainly can't deal with this. */
+ if (WARN_ON(system_capabilities_finalized()))
+ break;
+ } while (cmpxchg_relaxed(oldp, state, new) != state);
+}
+
+/*
+ * Spectre v1.
+ *
+ * The kernel can't protect userspace for this one: it's each person for
+ * themselves. Advertise what we're doing and be done with it.
+ */
+ssize_t cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ return sprintf(buf, "Mitigation: __user pointer sanitization\n");
+}
+
+/*
+ * Spectre v2.
+ *
+ * This one sucks. A CPU is either:
+ *
+ * - Mitigated in hardware and advertised by ID_AA64PFR0_EL1.CSV2.
+ * - Mitigated in hardware and listed in our "safe list".
+ * - Mitigated in software by firmware.
+ * - Mitigated in software by a CPU-specific dance in the kernel and a
+ * firmware call at EL2.
+ * - Vulnerable.
+ *
+ * It's not unlikely for different CPUs in a big.LITTLE system to fall into
+ * different camps.
+ */
+static enum mitigation_state spectre_v2_state;
+
+static bool __read_mostly __nospectre_v2;
+static int __init parse_spectre_v2_param(char *str)
+{
+ __nospectre_v2 = true;
+ return 0;
+}
+early_param("nospectre_v2", parse_spectre_v2_param);
+
+static bool spectre_v2_mitigations_off(void)
+{
+ bool ret = __nospectre_v2 || cpu_mitigations_off();
+
+ if (ret)
+ pr_info_once("spectre-v2 mitigation disabled by command line option\n");
+
+ return ret;
+}
+
+static const char *get_bhb_affected_string(enum mitigation_state bhb_state)
+{
+ switch (bhb_state) {
+ case SPECTRE_UNAFFECTED:
+ return "";
+ default:
+ case SPECTRE_VULNERABLE:
+ return ", but not BHB";
+ case SPECTRE_MITIGATED:
+ return ", BHB";
+ }
+}
+
+static bool _unprivileged_ebpf_enabled(void)
+{
+#ifdef CONFIG_BPF_SYSCALL
+ return !sysctl_unprivileged_bpf_disabled;
+#else
+ return false;
+#endif
+}
+
+ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ enum mitigation_state bhb_state = arm64_get_spectre_bhb_state();
+ const char *bhb_str = get_bhb_affected_string(bhb_state);
+ const char *v2_str = "Branch predictor hardening";
+
+ switch (spectre_v2_state) {
+ case SPECTRE_UNAFFECTED:
+ if (bhb_state == SPECTRE_UNAFFECTED)
+ return sprintf(buf, "Not affected\n");
+
+ /*
+ * Platforms affected by Spectre-BHB can't report
+ * "Not affected" for Spectre-v2.
+ */
+ v2_str = "CSV2";
+ fallthrough;
+ case SPECTRE_MITIGATED:
+ if (bhb_state == SPECTRE_MITIGATED && _unprivileged_ebpf_enabled())
+ return sprintf(buf, "Vulnerable: Unprivileged eBPF enabled\n");
+
+ return sprintf(buf, "Mitigation: %s%s\n", v2_str, bhb_str);
+ case SPECTRE_VULNERABLE:
+ fallthrough;
+ default:
+ return sprintf(buf, "Vulnerable\n");
+ }
+}
+
+static enum mitigation_state spectre_v2_get_cpu_hw_mitigation_state(void)
+{
+ u64 pfr0;
+ static const struct midr_range spectre_v2_safe_list[] = {
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A35),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A53),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A55),
+ MIDR_ALL_VERSIONS(MIDR_BRAHMA_B53),
+ MIDR_ALL_VERSIONS(MIDR_HISI_TSV110),
+ MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_2XX_SILVER),
+ MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_3XX_SILVER),
+ MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_4XX_SILVER),
+ { /* sentinel */ }
+ };
+
+ /* If the CPU has CSV2 set, we're safe */
+ pfr0 = read_cpuid(ID_AA64PFR0_EL1);
+ if (cpuid_feature_extract_unsigned_field(pfr0, ID_AA64PFR0_EL1_CSV2_SHIFT))
+ return SPECTRE_UNAFFECTED;
+
+ /* Alternatively, we have a list of unaffected CPUs */
+ if (is_midr_in_range_list(read_cpuid_id(), spectre_v2_safe_list))
+ return SPECTRE_UNAFFECTED;
+
+ return SPECTRE_VULNERABLE;
+}
+
+static enum mitigation_state spectre_v2_get_cpu_fw_mitigation_state(void)
+{
+ int ret;
+ struct arm_smccc_res res;
+
+ arm_smccc_1_1_invoke(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
+ ARM_SMCCC_ARCH_WORKAROUND_1, &res);
+
+ ret = res.a0;
+ switch (ret) {
+ case SMCCC_RET_SUCCESS:
+ return SPECTRE_MITIGATED;
+ case SMCCC_ARCH_WORKAROUND_RET_UNAFFECTED:
+ return SPECTRE_UNAFFECTED;
+ default:
+ fallthrough;
+ case SMCCC_RET_NOT_SUPPORTED:
+ return SPECTRE_VULNERABLE;
+ }
+}
+
+bool has_spectre_v2(const struct arm64_cpu_capabilities *entry, int scope)
+{
+ WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible());
+
+ if (spectre_v2_get_cpu_hw_mitigation_state() == SPECTRE_UNAFFECTED)
+ return false;
+
+ if (spectre_v2_get_cpu_fw_mitigation_state() == SPECTRE_UNAFFECTED)
+ return false;
+
+ return true;
+}
+
+enum mitigation_state arm64_get_spectre_v2_state(void)
+{
+ return spectre_v2_state;
+}
+
+DEFINE_PER_CPU_READ_MOSTLY(struct bp_hardening_data, bp_hardening_data);
+
+static void install_bp_hardening_cb(bp_hardening_cb_t fn)
+{
+ __this_cpu_write(bp_hardening_data.fn, fn);
+
+ /*
+ * Vinz Clortho takes the hyp_vecs start/end "keys" at
+ * the door when we're a guest. Skip the hyp-vectors work.
+ */
+ if (!is_hyp_mode_available())
+ return;
+
+ __this_cpu_write(bp_hardening_data.slot, HYP_VECTOR_SPECTRE_DIRECT);
+}
+
+/* Called during entry so must be noinstr */
+static noinstr void call_smc_arch_workaround_1(void)
+{
+ arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_1, NULL);
+}
+
+/* Called during entry so must be noinstr */
+static noinstr void call_hvc_arch_workaround_1(void)
+{
+ arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_WORKAROUND_1, NULL);
+}
+
+/* Called during entry so must be noinstr */
+static noinstr void qcom_link_stack_sanitisation(void)
+{
+ u64 tmp;
+
+ asm volatile("mov %0, x30 \n"
+ ".rept 16 \n"
+ "bl . + 4 \n"
+ ".endr \n"
+ "mov x30, %0 \n"
+ : "=&r" (tmp));
+}
+
+static bp_hardening_cb_t spectre_v2_get_sw_mitigation_cb(void)
+{
+ u32 midr = read_cpuid_id();
+ if (((midr & MIDR_CPU_MODEL_MASK) != MIDR_QCOM_FALKOR) &&
+ ((midr & MIDR_CPU_MODEL_MASK) != MIDR_QCOM_FALKOR_V1))
+ return NULL;
+
+ return qcom_link_stack_sanitisation;
+}
+
+static enum mitigation_state spectre_v2_enable_fw_mitigation(void)
+{
+ bp_hardening_cb_t cb;
+ enum mitigation_state state;
+
+ state = spectre_v2_get_cpu_fw_mitigation_state();
+ if (state != SPECTRE_MITIGATED)
+ return state;
+
+ if (spectre_v2_mitigations_off())
+ return SPECTRE_VULNERABLE;
+
+ switch (arm_smccc_1_1_get_conduit()) {
+ case SMCCC_CONDUIT_HVC:
+ cb = call_hvc_arch_workaround_1;
+ break;
+
+ case SMCCC_CONDUIT_SMC:
+ cb = call_smc_arch_workaround_1;
+ break;
+
+ default:
+ return SPECTRE_VULNERABLE;
+ }
+
+ /*
+ * Prefer a CPU-specific workaround if it exists. Note that we
+ * still rely on firmware for the mitigation at EL2.
+ */
+ cb = spectre_v2_get_sw_mitigation_cb() ?: cb;
+ install_bp_hardening_cb(cb);
+ return SPECTRE_MITIGATED;
+}
+
+void spectre_v2_enable_mitigation(const struct arm64_cpu_capabilities *__unused)
+{
+ enum mitigation_state state;
+
+ WARN_ON(preemptible());
+
+ state = spectre_v2_get_cpu_hw_mitigation_state();
+ if (state == SPECTRE_VULNERABLE)
+ state = spectre_v2_enable_fw_mitigation();
+
+ update_mitigation_state(&spectre_v2_state, state);
+}
+
+/*
+ * Spectre-v3a.
+ *
+ * Phew, there's not an awful lot to do here! We just instruct EL2 to use
+ * an indirect trampoline for the hyp vectors so that guests can't read
+ * VBAR_EL2 to defeat randomisation of the hypervisor VA layout.
+ */
+bool has_spectre_v3a(const struct arm64_cpu_capabilities *entry, int scope)
+{
+ static const struct midr_range spectre_v3a_unsafe_list[] = {
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A57),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A72),
+ {},
+ };
+
+ WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible());
+ return is_midr_in_range_list(read_cpuid_id(), spectre_v3a_unsafe_list);
+}
+
+void spectre_v3a_enable_mitigation(const struct arm64_cpu_capabilities *__unused)
+{
+ struct bp_hardening_data *data = this_cpu_ptr(&bp_hardening_data);
+
+ if (this_cpu_has_cap(ARM64_SPECTRE_V3A))
+ data->slot += HYP_VECTOR_INDIRECT;
+}
+
+/*
+ * Spectre v4.
+ *
+ * If you thought Spectre v2 was nasty, wait until you see this mess. A CPU is
+ * either:
+ *
+ * - Mitigated in hardware and listed in our "safe list".
+ * - Mitigated in hardware via PSTATE.SSBS.
+ * - Mitigated in software by firmware (sometimes referred to as SSBD).
+ *
+ * Wait, that doesn't sound so bad, does it? Keep reading...
+ *
+ * A major source of headaches is that the software mitigation is enabled both
+ * on a per-task basis, but can also be forced on for the kernel, necessitating
+ * both context-switch *and* entry/exit hooks. To make it even worse, some CPUs
+ * allow EL0 to toggle SSBS directly, which can end up with the prctl() state
+ * being stale when re-entering the kernel. The usual big.LITTLE caveats apply,
+ * so you can have systems that have both firmware and SSBS mitigations. This
+ * means we actually have to reject late onlining of CPUs with mitigations if
+ * all of the currently onlined CPUs are safelisted, as the mitigation tends to
+ * be opt-in for userspace. Yes, really, the cure is worse than the disease.
+ *
+ * The only good part is that if the firmware mitigation is present, then it is
+ * present for all CPUs, meaning we don't have to worry about late onlining of a
+ * vulnerable CPU if one of the boot CPUs is using the firmware mitigation.
+ *
+ * Give me a VAX-11/780 any day of the week...
+ */
+static enum mitigation_state spectre_v4_state;
+
+/* This is the per-cpu state tracking whether we need to talk to firmware */
+DEFINE_PER_CPU_READ_MOSTLY(u64, arm64_ssbd_callback_required);
+
+enum spectre_v4_policy {
+ SPECTRE_V4_POLICY_MITIGATION_DYNAMIC,
+ SPECTRE_V4_POLICY_MITIGATION_ENABLED,
+ SPECTRE_V4_POLICY_MITIGATION_DISABLED,
+};
+
+static enum spectre_v4_policy __read_mostly __spectre_v4_policy;
+
+static const struct spectre_v4_param {
+ const char *str;
+ enum spectre_v4_policy policy;
+} spectre_v4_params[] = {
+ { "force-on", SPECTRE_V4_POLICY_MITIGATION_ENABLED, },
+ { "force-off", SPECTRE_V4_POLICY_MITIGATION_DISABLED, },
+ { "kernel", SPECTRE_V4_POLICY_MITIGATION_DYNAMIC, },
+};
+static int __init parse_spectre_v4_param(char *str)
+{
+ int i;
+
+ if (!str || !str[0])
+ return -EINVAL;
+
+ for (i = 0; i < ARRAY_SIZE(spectre_v4_params); i++) {
+ const struct spectre_v4_param *param = &spectre_v4_params[i];
+
+ if (strncmp(str, param->str, strlen(param->str)))
+ continue;
+
+ __spectre_v4_policy = param->policy;
+ return 0;
+ }
+
+ return -EINVAL;
+}
+early_param("ssbd", parse_spectre_v4_param);
+
+/*
+ * Because this was all written in a rush by people working in different silos,
+ * we've ended up with multiple command line options to control the same thing.
+ * Wrap these up in some helpers, which prefer disabling the mitigation if faced
+ * with contradictory parameters. The mitigation is always either "off",
+ * "dynamic" or "on".
+ */
+static bool spectre_v4_mitigations_off(void)
+{
+ bool ret = cpu_mitigations_off() ||
+ __spectre_v4_policy == SPECTRE_V4_POLICY_MITIGATION_DISABLED;
+
+ if (ret)
+ pr_info_once("spectre-v4 mitigation disabled by command-line option\n");
+
+ return ret;
+}
+
+/* Do we need to toggle the mitigation state on entry to/exit from the kernel? */
+static bool spectre_v4_mitigations_dynamic(void)
+{
+ return !spectre_v4_mitigations_off() &&
+ __spectre_v4_policy == SPECTRE_V4_POLICY_MITIGATION_DYNAMIC;
+}
+
+static bool spectre_v4_mitigations_on(void)
+{
+ return !spectre_v4_mitigations_off() &&
+ __spectre_v4_policy == SPECTRE_V4_POLICY_MITIGATION_ENABLED;
+}
+
+ssize_t cpu_show_spec_store_bypass(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ switch (spectre_v4_state) {
+ case SPECTRE_UNAFFECTED:
+ return sprintf(buf, "Not affected\n");
+ case SPECTRE_MITIGATED:
+ return sprintf(buf, "Mitigation: Speculative Store Bypass disabled via prctl\n");
+ case SPECTRE_VULNERABLE:
+ fallthrough;
+ default:
+ return sprintf(buf, "Vulnerable\n");
+ }
+}
+
+enum mitigation_state arm64_get_spectre_v4_state(void)
+{
+ return spectre_v4_state;
+}
+
+static enum mitigation_state spectre_v4_get_cpu_hw_mitigation_state(void)
+{
+ static const struct midr_range spectre_v4_safe_list[] = {
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A35),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A53),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A55),
+ MIDR_ALL_VERSIONS(MIDR_BRAHMA_B53),
+ MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_3XX_SILVER),
+ MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_4XX_SILVER),
+ { /* sentinel */ },
+ };
+
+ if (is_midr_in_range_list(read_cpuid_id(), spectre_v4_safe_list))
+ return SPECTRE_UNAFFECTED;
+
+ /* CPU features are detected first */
+ if (this_cpu_has_cap(ARM64_SSBS))
+ return SPECTRE_MITIGATED;
+
+ return SPECTRE_VULNERABLE;
+}
+
+static enum mitigation_state spectre_v4_get_cpu_fw_mitigation_state(void)
+{
+ int ret;
+ struct arm_smccc_res res;
+
+ arm_smccc_1_1_invoke(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
+ ARM_SMCCC_ARCH_WORKAROUND_2, &res);
+
+ ret = res.a0;
+ switch (ret) {
+ case SMCCC_RET_SUCCESS:
+ return SPECTRE_MITIGATED;
+ case SMCCC_ARCH_WORKAROUND_RET_UNAFFECTED:
+ fallthrough;
+ case SMCCC_RET_NOT_REQUIRED:
+ return SPECTRE_UNAFFECTED;
+ default:
+ fallthrough;
+ case SMCCC_RET_NOT_SUPPORTED:
+ return SPECTRE_VULNERABLE;
+ }
+}
+
+bool has_spectre_v4(const struct arm64_cpu_capabilities *cap, int scope)
+{
+ enum mitigation_state state;
+
+ WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible());
+
+ state = spectre_v4_get_cpu_hw_mitigation_state();
+ if (state == SPECTRE_VULNERABLE)
+ state = spectre_v4_get_cpu_fw_mitigation_state();
+
+ return state != SPECTRE_UNAFFECTED;
+}
+
+bool try_emulate_el1_ssbs(struct pt_regs *regs, u32 instr)
+{
+ const u32 instr_mask = ~(1U << PSTATE_Imm_shift);
+ const u32 instr_val = 0xd500401f | PSTATE_SSBS;
+
+ if ((instr & instr_mask) != instr_val)
+ return false;
+
+ if (instr & BIT(PSTATE_Imm_shift))
+ regs->pstate |= PSR_SSBS_BIT;
+ else
+ regs->pstate &= ~PSR_SSBS_BIT;
+
+ arm64_skip_faulting_instruction(regs, 4);
+ return true;
+}
+
+static enum mitigation_state spectre_v4_enable_hw_mitigation(void)
+{
+ enum mitigation_state state;
+
+ /*
+ * If the system is mitigated but this CPU doesn't have SSBS, then
+ * we must be on the safelist and there's nothing more to do.
+ */
+ state = spectre_v4_get_cpu_hw_mitigation_state();
+ if (state != SPECTRE_MITIGATED || !this_cpu_has_cap(ARM64_SSBS))
+ return state;
+
+ if (spectre_v4_mitigations_off()) {
+ sysreg_clear_set(sctlr_el1, 0, SCTLR_ELx_DSSBS);
+ set_pstate_ssbs(1);
+ return SPECTRE_VULNERABLE;
+ }
+
+ /* SCTLR_EL1.DSSBS was initialised to 0 during boot */
+ set_pstate_ssbs(0);
+ return SPECTRE_MITIGATED;
+}
+
+/*
+ * Patch a branch over the Spectre-v4 mitigation code with a NOP so that
+ * we fallthrough and check whether firmware needs to be called on this CPU.
+ */
+void __init spectre_v4_patch_fw_mitigation_enable(struct alt_instr *alt,
+ __le32 *origptr,
+ __le32 *updptr, int nr_inst)
+{
+ BUG_ON(nr_inst != 1); /* Branch -> NOP */
+
+ if (spectre_v4_mitigations_off())
+ return;
+
+ if (cpus_have_cap(ARM64_SSBS))
+ return;
+
+ if (spectre_v4_mitigations_dynamic())
+ *updptr = cpu_to_le32(aarch64_insn_gen_nop());
+}
+
+/*
+ * Patch a NOP in the Spectre-v4 mitigation code with an SMC/HVC instruction
+ * to call into firmware to adjust the mitigation state.
+ */
+void __init smccc_patch_fw_mitigation_conduit(struct alt_instr *alt,
+ __le32 *origptr,
+ __le32 *updptr, int nr_inst)
+{
+ u32 insn;
+
+ BUG_ON(nr_inst != 1); /* NOP -> HVC/SMC */
+
+ switch (arm_smccc_1_1_get_conduit()) {
+ case SMCCC_CONDUIT_HVC:
+ insn = aarch64_insn_get_hvc_value();
+ break;
+ case SMCCC_CONDUIT_SMC:
+ insn = aarch64_insn_get_smc_value();
+ break;
+ default:
+ return;
+ }
+
+ *updptr = cpu_to_le32(insn);
+}
+
+static enum mitigation_state spectre_v4_enable_fw_mitigation(void)
+{
+ enum mitigation_state state;
+
+ state = spectre_v4_get_cpu_fw_mitigation_state();
+ if (state != SPECTRE_MITIGATED)
+ return state;
+
+ if (spectre_v4_mitigations_off()) {
+ arm_smccc_1_1_invoke(ARM_SMCCC_ARCH_WORKAROUND_2, false, NULL);
+ return SPECTRE_VULNERABLE;
+ }
+
+ arm_smccc_1_1_invoke(ARM_SMCCC_ARCH_WORKAROUND_2, true, NULL);
+
+ if (spectre_v4_mitigations_dynamic())
+ __this_cpu_write(arm64_ssbd_callback_required, 1);
+
+ return SPECTRE_MITIGATED;
+}
+
+void spectre_v4_enable_mitigation(const struct arm64_cpu_capabilities *__unused)
+{
+ enum mitigation_state state;
+
+ WARN_ON(preemptible());
+
+ state = spectre_v4_enable_hw_mitigation();
+ if (state == SPECTRE_VULNERABLE)
+ state = spectre_v4_enable_fw_mitigation();
+
+ update_mitigation_state(&spectre_v4_state, state);
+}
+
+static void __update_pstate_ssbs(struct pt_regs *regs, bool state)
+{
+ u64 bit = compat_user_mode(regs) ? PSR_AA32_SSBS_BIT : PSR_SSBS_BIT;
+
+ if (state)
+ regs->pstate |= bit;
+ else
+ regs->pstate &= ~bit;
+}
+
+void spectre_v4_enable_task_mitigation(struct task_struct *tsk)
+{
+ struct pt_regs *regs = task_pt_regs(tsk);
+ bool ssbs = false, kthread = tsk->flags & PF_KTHREAD;
+
+ if (spectre_v4_mitigations_off())
+ ssbs = true;
+ else if (spectre_v4_mitigations_dynamic() && !kthread)
+ ssbs = !test_tsk_thread_flag(tsk, TIF_SSBD);
+
+ __update_pstate_ssbs(regs, ssbs);
+}
+
+/*
+ * The Spectre-v4 mitigation can be controlled via a prctl() from userspace.
+ * This is interesting because the "speculation disabled" behaviour can be
+ * configured so that it is preserved across exec(), which means that the
+ * prctl() may be necessary even when PSTATE.SSBS can be toggled directly
+ * from userspace.
+ */
+static void ssbd_prctl_enable_mitigation(struct task_struct *task)
+{
+ task_clear_spec_ssb_noexec(task);
+ task_set_spec_ssb_disable(task);
+ set_tsk_thread_flag(task, TIF_SSBD);
+}
+
+static void ssbd_prctl_disable_mitigation(struct task_struct *task)
+{
+ task_clear_spec_ssb_noexec(task);
+ task_clear_spec_ssb_disable(task);
+ clear_tsk_thread_flag(task, TIF_SSBD);
+}
+
+static int ssbd_prctl_set(struct task_struct *task, unsigned long ctrl)
+{
+ switch (ctrl) {
+ case PR_SPEC_ENABLE:
+ /* Enable speculation: disable mitigation */
+ /*
+ * Force disabled speculation prevents it from being
+ * re-enabled.
+ */
+ if (task_spec_ssb_force_disable(task))
+ return -EPERM;
+
+ /*
+ * If the mitigation is forced on, then speculation is forced
+ * off and we again prevent it from being re-enabled.
+ */
+ if (spectre_v4_mitigations_on())
+ return -EPERM;
+
+ ssbd_prctl_disable_mitigation(task);
+ break;
+ case PR_SPEC_FORCE_DISABLE:
+ /* Force disable speculation: force enable mitigation */
+ /*
+ * If the mitigation is forced off, then speculation is forced
+ * on and we prevent it from being disabled.
+ */
+ if (spectre_v4_mitigations_off())
+ return -EPERM;
+
+ task_set_spec_ssb_force_disable(task);
+ fallthrough;
+ case PR_SPEC_DISABLE:
+ /* Disable speculation: enable mitigation */
+ /* Same as PR_SPEC_FORCE_DISABLE */
+ if (spectre_v4_mitigations_off())
+ return -EPERM;
+
+ ssbd_prctl_enable_mitigation(task);
+ break;
+ case PR_SPEC_DISABLE_NOEXEC:
+ /* Disable speculation until execve(): enable mitigation */
+ /*
+ * If the mitigation state is forced one way or the other, then
+ * we must fail now before we try to toggle it on execve().
+ */
+ if (task_spec_ssb_force_disable(task) ||
+ spectre_v4_mitigations_off() ||
+ spectre_v4_mitigations_on()) {
+ return -EPERM;
+ }
+
+ ssbd_prctl_enable_mitigation(task);
+ task_set_spec_ssb_noexec(task);
+ break;
+ default:
+ return -ERANGE;
+ }
+
+ spectre_v4_enable_task_mitigation(task);
+ return 0;
+}
+
+int arch_prctl_spec_ctrl_set(struct task_struct *task, unsigned long which,
+ unsigned long ctrl)
+{
+ switch (which) {
+ case PR_SPEC_STORE_BYPASS:
+ return ssbd_prctl_set(task, ctrl);
+ default:
+ return -ENODEV;
+ }
+}
+
+static int ssbd_prctl_get(struct task_struct *task)
+{
+ switch (spectre_v4_state) {
+ case SPECTRE_UNAFFECTED:
+ return PR_SPEC_NOT_AFFECTED;
+ case SPECTRE_MITIGATED:
+ if (spectre_v4_mitigations_on())
+ return PR_SPEC_NOT_AFFECTED;
+
+ if (spectre_v4_mitigations_dynamic())
+ break;
+
+ /* Mitigations are disabled, so we're vulnerable. */
+ fallthrough;
+ case SPECTRE_VULNERABLE:
+ fallthrough;
+ default:
+ return PR_SPEC_ENABLE;
+ }
+
+ /* Check the mitigation state for this task */
+ if (task_spec_ssb_force_disable(task))
+ return PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE;
+
+ if (task_spec_ssb_noexec(task))
+ return PR_SPEC_PRCTL | PR_SPEC_DISABLE_NOEXEC;
+
+ if (task_spec_ssb_disable(task))
+ return PR_SPEC_PRCTL | PR_SPEC_DISABLE;
+
+ return PR_SPEC_PRCTL | PR_SPEC_ENABLE;
+}
+
+int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which)
+{
+ switch (which) {
+ case PR_SPEC_STORE_BYPASS:
+ return ssbd_prctl_get(task);
+ default:
+ return -ENODEV;
+ }
+}
+
+/*
+ * Spectre BHB.
+ *
+ * A CPU is either:
+ * - Mitigated by a branchy loop a CPU specific number of times, and listed
+ * in our "loop mitigated list".
+ * - Mitigated in software by the firmware Spectre v2 call.
+ * - Has the ClearBHB instruction to perform the mitigation.
+ * - Has the 'Exception Clears Branch History Buffer' (ECBHB) feature, so no
+ * software mitigation in the vectors is needed.
+ * - Has CSV2.3, so is unaffected.
+ */
+static enum mitigation_state spectre_bhb_state;
+
+enum mitigation_state arm64_get_spectre_bhb_state(void)
+{
+ return spectre_bhb_state;
+}
+
+enum bhb_mitigation_bits {
+ BHB_LOOP,
+ BHB_FW,
+ BHB_HW,
+ BHB_INSN,
+};
+static unsigned long system_bhb_mitigations;
+
+/*
+ * This must be called with SCOPE_LOCAL_CPU for each type of CPU, before any
+ * SCOPE_SYSTEM call will give the right answer.
+ */
+u8 spectre_bhb_loop_affected(int scope)
+{
+ u8 k = 0;
+ static u8 max_bhb_k;
+
+ if (scope == SCOPE_LOCAL_CPU) {
+ static const struct midr_range spectre_bhb_k32_list[] = {
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A78),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A78AE),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A78C),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_X1),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A710),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_X2),
+ MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2),
+ MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V1),
+ {},
+ };
+ static const struct midr_range spectre_bhb_k24_list[] = {
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A76),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A77),
+ MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N1),
+ {},
+ };
+ static const struct midr_range spectre_bhb_k11_list[] = {
+ MIDR_ALL_VERSIONS(MIDR_AMPERE1),
+ {},
+ };
+ static const struct midr_range spectre_bhb_k8_list[] = {
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A72),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A57),
+ {},
+ };
+
+ if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_k32_list))
+ k = 32;
+ else if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_k24_list))
+ k = 24;
+ else if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_k11_list))
+ k = 11;
+ else if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_k8_list))
+ k = 8;
+
+ max_bhb_k = max(max_bhb_k, k);
+ } else {
+ k = max_bhb_k;
+ }
+
+ return k;
+}
+
+static enum mitigation_state spectre_bhb_get_cpu_fw_mitigation_state(void)
+{
+ int ret;
+ struct arm_smccc_res res;
+
+ arm_smccc_1_1_invoke(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
+ ARM_SMCCC_ARCH_WORKAROUND_3, &res);
+
+ ret = res.a0;
+ switch (ret) {
+ case SMCCC_RET_SUCCESS:
+ return SPECTRE_MITIGATED;
+ case SMCCC_ARCH_WORKAROUND_RET_UNAFFECTED:
+ return SPECTRE_UNAFFECTED;
+ default:
+ fallthrough;
+ case SMCCC_RET_NOT_SUPPORTED:
+ return SPECTRE_VULNERABLE;
+ }
+}
+
+static bool is_spectre_bhb_fw_affected(int scope)
+{
+ static bool system_affected;
+ enum mitigation_state fw_state;
+ bool has_smccc = arm_smccc_1_1_get_conduit() != SMCCC_CONDUIT_NONE;
+ static const struct midr_range spectre_bhb_firmware_mitigated_list[] = {
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A73),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A75),
+ {},
+ };
+ bool cpu_in_list = is_midr_in_range_list(read_cpuid_id(),
+ spectre_bhb_firmware_mitigated_list);
+
+ if (scope != SCOPE_LOCAL_CPU)
+ return system_affected;
+
+ fw_state = spectre_bhb_get_cpu_fw_mitigation_state();
+ if (cpu_in_list || (has_smccc && fw_state == SPECTRE_MITIGATED)) {
+ system_affected = true;
+ return true;
+ }
+
+ return false;
+}
+
+static bool supports_ecbhb(int scope)
+{
+ u64 mmfr1;
+
+ if (scope == SCOPE_LOCAL_CPU)
+ mmfr1 = read_sysreg_s(SYS_ID_AA64MMFR1_EL1);
+ else
+ mmfr1 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1);
+
+ return cpuid_feature_extract_unsigned_field(mmfr1,
+ ID_AA64MMFR1_EL1_ECBHB_SHIFT);
+}
+
+bool is_spectre_bhb_affected(const struct arm64_cpu_capabilities *entry,
+ int scope)
+{
+ WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible());
+
+ if (supports_csv2p3(scope))
+ return false;
+
+ if (supports_clearbhb(scope))
+ return true;
+
+ if (spectre_bhb_loop_affected(scope))
+ return true;
+
+ if (is_spectre_bhb_fw_affected(scope))
+ return true;
+
+ return false;
+}
+
+static void this_cpu_set_vectors(enum arm64_bp_harden_el1_vectors slot)
+{
+ const char *v = arm64_get_bp_hardening_vector(slot);
+
+ __this_cpu_write(this_cpu_vector, v);
+
+ /*
+ * When KPTI is in use, the vectors are switched when exiting to
+ * user-space.
+ */
+ if (cpus_have_cap(ARM64_UNMAP_KERNEL_AT_EL0))
+ return;
+
+ write_sysreg(v, vbar_el1);
+ isb();
+}
+
+static bool __read_mostly __nospectre_bhb;
+static int __init parse_spectre_bhb_param(char *str)
+{
+ __nospectre_bhb = true;
+ return 0;
+}
+early_param("nospectre_bhb", parse_spectre_bhb_param);
+
+void spectre_bhb_enable_mitigation(const struct arm64_cpu_capabilities *entry)
+{
+ bp_hardening_cb_t cpu_cb;
+ enum mitigation_state fw_state, state = SPECTRE_VULNERABLE;
+ struct bp_hardening_data *data = this_cpu_ptr(&bp_hardening_data);
+
+ if (!is_spectre_bhb_affected(entry, SCOPE_LOCAL_CPU))
+ return;
+
+ if (arm64_get_spectre_v2_state() == SPECTRE_VULNERABLE) {
+ /* No point mitigating Spectre-BHB alone. */
+ } else if (!IS_ENABLED(CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY)) {
+ pr_info_once("spectre-bhb mitigation disabled by compile time option\n");
+ } else if (cpu_mitigations_off() || __nospectre_bhb) {
+ pr_info_once("spectre-bhb mitigation disabled by command line option\n");
+ } else if (supports_ecbhb(SCOPE_LOCAL_CPU)) {
+ state = SPECTRE_MITIGATED;
+ set_bit(BHB_HW, &system_bhb_mitigations);
+ } else if (supports_clearbhb(SCOPE_LOCAL_CPU)) {
+ /*
+ * Ensure KVM uses the indirect vector which will have ClearBHB
+ * added.
+ */
+ if (!data->slot)
+ data->slot = HYP_VECTOR_INDIRECT;
+
+ this_cpu_set_vectors(EL1_VECTOR_BHB_CLEAR_INSN);
+ state = SPECTRE_MITIGATED;
+ set_bit(BHB_INSN, &system_bhb_mitigations);
+ } else if (spectre_bhb_loop_affected(SCOPE_LOCAL_CPU)) {
+ /*
+ * Ensure KVM uses the indirect vector which will have the
+ * branchy-loop added. A57/A72-r0 will already have selected
+ * the spectre-indirect vector, which is sufficient for BHB
+ * too.
+ */
+ if (!data->slot)
+ data->slot = HYP_VECTOR_INDIRECT;
+
+ this_cpu_set_vectors(EL1_VECTOR_BHB_LOOP);
+ state = SPECTRE_MITIGATED;
+ set_bit(BHB_LOOP, &system_bhb_mitigations);
+ } else if (is_spectre_bhb_fw_affected(SCOPE_LOCAL_CPU)) {
+ fw_state = spectre_bhb_get_cpu_fw_mitigation_state();
+ if (fw_state == SPECTRE_MITIGATED) {
+ /*
+ * Ensure KVM uses one of the spectre bp_hardening
+ * vectors. The indirect vector doesn't include the EL3
+ * call, so needs upgrading to
+ * HYP_VECTOR_SPECTRE_INDIRECT.
+ */
+ if (!data->slot || data->slot == HYP_VECTOR_INDIRECT)
+ data->slot += 1;
+
+ this_cpu_set_vectors(EL1_VECTOR_BHB_FW);
+
+ /*
+ * The WA3 call in the vectors supersedes the WA1 call
+ * made during context-switch. Uninstall any firmware
+ * bp_hardening callback.
+ */
+ cpu_cb = spectre_v2_get_sw_mitigation_cb();
+ if (__this_cpu_read(bp_hardening_data.fn) != cpu_cb)
+ __this_cpu_write(bp_hardening_data.fn, NULL);
+
+ state = SPECTRE_MITIGATED;
+ set_bit(BHB_FW, &system_bhb_mitigations);
+ }
+ }
+
+ update_mitigation_state(&spectre_bhb_state, state);
+}
+
+/* Patched to NOP when enabled */
+void noinstr spectre_bhb_patch_loop_mitigation_enable(struct alt_instr *alt,
+ __le32 *origptr,
+ __le32 *updptr, int nr_inst)
+{
+ BUG_ON(nr_inst != 1);
+
+ if (test_bit(BHB_LOOP, &system_bhb_mitigations))
+ *updptr++ = cpu_to_le32(aarch64_insn_gen_nop());
+}
+
+/* Patched to NOP when enabled */
+void noinstr spectre_bhb_patch_fw_mitigation_enabled(struct alt_instr *alt,
+ __le32 *origptr,
+ __le32 *updptr, int nr_inst)
+{
+ BUG_ON(nr_inst != 1);
+
+ if (test_bit(BHB_FW, &system_bhb_mitigations))
+ *updptr++ = cpu_to_le32(aarch64_insn_gen_nop());
+}
+
+/* Patched to correct the immediate */
+void noinstr spectre_bhb_patch_loop_iter(struct alt_instr *alt,
+ __le32 *origptr, __le32 *updptr, int nr_inst)
+{
+ u8 rd;
+ u32 insn;
+ u16 loop_count = spectre_bhb_loop_affected(SCOPE_SYSTEM);
+
+ BUG_ON(nr_inst != 1); /* MOV -> MOV */
+
+ if (!IS_ENABLED(CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY))
+ return;
+
+ insn = le32_to_cpu(*origptr);
+ rd = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RD, insn);
+ insn = aarch64_insn_gen_movewide(rd, loop_count, 0,
+ AARCH64_INSN_VARIANT_64BIT,
+ AARCH64_INSN_MOVEWIDE_ZERO);
+ *updptr++ = cpu_to_le32(insn);
+}
+
+/* Patched to mov WA3 when supported */
+void noinstr spectre_bhb_patch_wa3(struct alt_instr *alt,
+ __le32 *origptr, __le32 *updptr, int nr_inst)
+{
+ u8 rd;
+ u32 insn;
+
+ BUG_ON(nr_inst != 1); /* MOV -> MOV */
+
+ if (!IS_ENABLED(CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY) ||
+ !test_bit(BHB_FW, &system_bhb_mitigations))
+ return;
+
+ insn = le32_to_cpu(*origptr);
+ rd = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RD, insn);
+
+ insn = aarch64_insn_gen_logical_immediate(AARCH64_INSN_LOGIC_ORR,
+ AARCH64_INSN_VARIANT_32BIT,
+ AARCH64_INSN_REG_ZR, rd,
+ ARM_SMCCC_ARCH_WORKAROUND_3);
+ if (WARN_ON_ONCE(insn == AARCH64_BREAK_FAULT))
+ return;
+
+ *updptr++ = cpu_to_le32(insn);
+}
+
+/* Patched to NOP when not supported */
+void __init spectre_bhb_patch_clearbhb(struct alt_instr *alt,
+ __le32 *origptr, __le32 *updptr, int nr_inst)
+{
+ BUG_ON(nr_inst != 2);
+
+ if (test_bit(BHB_INSN, &system_bhb_mitigations))
+ return;
+
+ *updptr++ = cpu_to_le32(aarch64_insn_gen_nop());
+ *updptr++ = cpu_to_le32(aarch64_insn_gen_nop());
+}
+
+#ifdef CONFIG_BPF_SYSCALL
+#define EBPF_WARN "Unprivileged eBPF is enabled, data leaks possible via Spectre v2 BHB attacks!\n"
+void unpriv_ebpf_notify(int new_state)
+{
+ if (spectre_v2_state == SPECTRE_VULNERABLE ||
+ spectre_bhb_state != SPECTRE_MITIGATED)
+ return;
+
+ if (!new_state)
+ pr_err("WARNING: %s", EBPF_WARN);
+}
+#endif
diff --git a/arch/arm64/kernel/psci.c b/arch/arm64/kernel/psci.c
index 43ae4e0c968f..29a8e444db83 100644
--- a/arch/arm64/kernel/psci.c
+++ b/arch/arm64/kernel/psci.c
@@ -38,7 +38,8 @@ static int __init cpu_psci_cpu_prepare(unsigned int cpu)
static int cpu_psci_cpu_boot(unsigned int cpu)
{
- int err = psci_ops.cpu_on(cpu_logical_map(cpu), __pa_symbol(secondary_entry));
+ phys_addr_t pa_secondary_entry = __pa_symbol(secondary_entry);
+ int err = psci_ops.cpu_on(cpu_logical_map(cpu), pa_secondary_entry);
if (err)
pr_err("failed to boot CPU%d (%d)\n", cpu, err);
@@ -66,7 +67,6 @@ static int cpu_psci_cpu_disable(unsigned int cpu)
static void cpu_psci_cpu_die(unsigned int cpu)
{
- int ret;
/*
* There are no known implementations of PSCI actually using the
* power state field, pass a sensible default for now.
@@ -74,9 +74,7 @@ static void cpu_psci_cpu_die(unsigned int cpu)
u32 state = PSCI_POWER_STATE_TYPE_POWER_DOWN <<
PSCI_0_2_POWER_STATE_TYPE_SHIFT;
- ret = psci_ops.cpu_off(state);
-
- pr_crit("unable to power off CPU%u (%d)\n", cpu, ret);
+ psci_ops.cpu_off(state);
}
static int cpu_psci_cpu_kill(unsigned int cpu)
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index 6771c399d40c..dc6cf0e37194 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -27,14 +27,14 @@
#include <linux/perf_event.h>
#include <linux/hw_breakpoint.h>
#include <linux/regset.h>
-#include <linux/tracehook.h>
#include <linux/elf.h>
+#include <linux/rseq.h>
#include <asm/compat.h>
#include <asm/cpufeature.h>
#include <asm/debug-monitors.h>
#include <asm/fpsimd.h>
-#include <asm/pgtable.h>
+#include <asm/mte.h>
#include <asm/pointer_auth.h>
#include <asm/stacktrace.h>
#include <asm/syscall.h>
@@ -122,7 +122,7 @@ static bool regs_within_kernel_stack(struct pt_regs *regs, unsigned long addr)
{
return ((addr & ~(THREAD_SIZE - 1)) ==
(kernel_stack_pointer(regs) & ~(THREAD_SIZE - 1))) ||
- on_irq_stack(addr, NULL);
+ on_irq_stack(addr, sizeof(unsigned long));
}
/**
@@ -192,14 +192,12 @@ static void ptrace_hbptriggered(struct perf_event *bp,
break;
}
}
- arm64_force_sig_ptrace_errno_trap(si_errno,
- (void __user *)bkpt->trigger,
+ arm64_force_sig_ptrace_errno_trap(si_errno, bkpt->trigger,
desc);
+ return;
}
#endif
- arm64_force_sig_fault(SIGTRAP, TRAP_HWBKPT,
- (void __user *)(bkpt->trigger),
- desc);
+ arm64_force_sig_fault(SIGTRAP, TRAP_HWBKPT, bkpt->trigger, desc);
}
/*
@@ -475,11 +473,10 @@ static int ptrace_hbp_set_addr(unsigned int note_type,
static int hw_break_get(struct task_struct *target,
const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
+ struct membuf to)
{
unsigned int note_type = regset->core_note_type;
- int ret, idx = 0, offset, limit;
+ int ret, idx = 0;
u32 info, ctrl;
u64 addr;
@@ -488,49 +485,21 @@ static int hw_break_get(struct task_struct *target,
if (ret)
return ret;
- ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &info, 0,
- sizeof(info));
- if (ret)
- return ret;
-
- /* Pad */
- offset = offsetof(struct user_hwdebug_state, pad);
- ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf, offset,
- offset + PTRACE_HBP_PAD_SZ);
- if (ret)
- return ret;
-
+ membuf_write(&to, &info, sizeof(info));
+ membuf_zero(&to, sizeof(u32));
/* (address, ctrl) registers */
- offset = offsetof(struct user_hwdebug_state, dbg_regs);
- limit = regset->n * regset->size;
- while (count && offset < limit) {
+ while (to.left) {
ret = ptrace_hbp_get_addr(note_type, target, idx, &addr);
if (ret)
return ret;
- ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &addr,
- offset, offset + PTRACE_HBP_ADDR_SZ);
- if (ret)
- return ret;
- offset += PTRACE_HBP_ADDR_SZ;
-
ret = ptrace_hbp_get_ctrl(note_type, target, idx, &ctrl);
if (ret)
return ret;
- ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &ctrl,
- offset, offset + PTRACE_HBP_CTRL_SZ);
- if (ret)
- return ret;
- offset += PTRACE_HBP_CTRL_SZ;
-
- ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf,
- offset,
- offset + PTRACE_HBP_PAD_SZ);
- if (ret)
- return ret;
- offset += PTRACE_HBP_PAD_SZ;
+ membuf_store(&to, addr);
+ membuf_store(&to, ctrl);
+ membuf_zero(&to, sizeof(u32));
idx++;
}
-
return 0;
}
@@ -546,9 +515,7 @@ static int hw_break_set(struct task_struct *target,
/* Resource info and pad */
offset = offsetof(struct user_hwdebug_state, dbg_regs);
- ret = user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf, 0, offset);
- if (ret)
- return ret;
+ user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf, 0, offset);
/* (address, ctrl) registers */
limit = regset->n * regset->size;
@@ -575,11 +542,8 @@ static int hw_break_set(struct task_struct *target,
return ret;
offset += PTRACE_HBP_CTRL_SZ;
- ret = user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf,
- offset,
- offset + PTRACE_HBP_PAD_SZ);
- if (ret)
- return ret;
+ user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf,
+ offset, offset + PTRACE_HBP_PAD_SZ);
offset += PTRACE_HBP_PAD_SZ;
idx++;
}
@@ -590,11 +554,10 @@ static int hw_break_set(struct task_struct *target,
static int gpr_get(struct task_struct *target,
const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
+ struct membuf to)
{
struct user_pt_regs *uregs = &task_pt_regs(target)->user_regs;
- return user_regset_copyout(&pos, &count, &kbuf, &ubuf, uregs, 0, -1);
+ return membuf_write(&to, uregs, sizeof(*uregs));
}
static int gpr_set(struct task_struct *target, const struct user_regset *regset,
@@ -615,13 +578,19 @@ static int gpr_set(struct task_struct *target, const struct user_regset *regset,
return 0;
}
+static int fpr_active(struct task_struct *target, const struct user_regset *regset)
+{
+ if (!system_supports_fpsimd())
+ return -ENODEV;
+ return regset->n;
+}
+
/*
* TODO: update fp accessors for lazy context switching (sync/flush hwstate)
*/
static int __fpr_get(struct task_struct *target,
const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf, unsigned int start_pos)
+ struct membuf to)
{
struct user_fpsimd_state *uregs;
@@ -629,18 +598,19 @@ static int __fpr_get(struct task_struct *target,
uregs = &target->thread.uw.fpsimd_state;
- return user_regset_copyout(&pos, &count, &kbuf, &ubuf, uregs,
- start_pos, start_pos + sizeof(*uregs));
+ return membuf_write(&to, uregs, sizeof(*uregs));
}
static int fpr_get(struct task_struct *target, const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
+ struct membuf to)
{
+ if (!system_supports_fpsimd())
+ return -EINVAL;
+
if (target == current)
fpsimd_preserve_current_state();
- return __fpr_get(target, regset, pos, count, kbuf, ubuf, 0);
+ return __fpr_get(target, regset, to);
}
static int __fpr_set(struct task_struct *target,
@@ -676,6 +646,9 @@ static int fpr_set(struct task_struct *target, const struct user_regset *regset,
{
int ret;
+ if (!system_supports_fpsimd())
+ return -EINVAL;
+
ret = __fpr_set(target, regset, pos, count, kbuf, ubuf, 0);
if (ret)
return ret;
@@ -687,15 +660,20 @@ static int fpr_set(struct task_struct *target, const struct user_regset *regset,
}
static int tls_get(struct task_struct *target, const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
+ struct membuf to)
{
- unsigned long *tls = &target->thread.uw.tp_value;
+ int ret;
if (target == current)
tls_preserve_current_state();
- return user_regset_copyout(&pos, &count, &kbuf, &ubuf, tls, 0, -1);
+ ret = membuf_store(&to, target->thread.uw.tp_value);
+ if (system_supports_tpidr2())
+ ret = membuf_store(&to, target->thread.tpidr2_el0);
+ else
+ ret = membuf_zero(&to, sizeof(u64));
+
+ return ret;
}
static int tls_set(struct task_struct *target, const struct user_regset *regset,
@@ -703,25 +681,28 @@ static int tls_set(struct task_struct *target, const struct user_regset *regset,
const void *kbuf, const void __user *ubuf)
{
int ret;
- unsigned long tls = target->thread.uw.tp_value;
+ unsigned long tls[2];
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &tls, 0, -1);
+ tls[0] = target->thread.uw.tp_value;
+ if (system_supports_tpidr2())
+ tls[1] = target->thread.tpidr2_el0;
+
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, tls, 0, count);
if (ret)
return ret;
- target->thread.uw.tp_value = tls;
+ target->thread.uw.tp_value = tls[0];
+ if (system_supports_tpidr2())
+ target->thread.tpidr2_el0 = tls[1];
+
return ret;
}
static int system_call_get(struct task_struct *target,
const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
+ struct membuf to)
{
- int syscallno = task_pt_regs(target)->syscallno;
-
- return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- &syscallno, 0, -1);
+ return membuf_store(&to, task_pt_regs(target)->syscallno);
}
static int system_call_set(struct task_struct *target,
@@ -743,21 +724,51 @@ static int system_call_set(struct task_struct *target,
#ifdef CONFIG_ARM64_SVE
static void sve_init_header_from_task(struct user_sve_header *header,
- struct task_struct *target)
+ struct task_struct *target,
+ enum vec_type type)
{
unsigned int vq;
+ bool active;
+ bool fpsimd_only;
+ enum vec_type task_type;
memset(header, 0, sizeof(*header));
- header->flags = test_tsk_thread_flag(target, TIF_SVE) ?
- SVE_PT_REGS_SVE : SVE_PT_REGS_FPSIMD;
- if (test_tsk_thread_flag(target, TIF_SVE_VL_INHERIT))
- header->flags |= SVE_PT_VL_INHERIT;
+ /* Check if the requested registers are active for the task */
+ if (thread_sm_enabled(&target->thread))
+ task_type = ARM64_VEC_SME;
+ else
+ task_type = ARM64_VEC_SVE;
+ active = (task_type == type);
+
+ switch (type) {
+ case ARM64_VEC_SVE:
+ if (test_tsk_thread_flag(target, TIF_SVE_VL_INHERIT))
+ header->flags |= SVE_PT_VL_INHERIT;
+ fpsimd_only = !test_tsk_thread_flag(target, TIF_SVE);
+ break;
+ case ARM64_VEC_SME:
+ if (test_tsk_thread_flag(target, TIF_SME_VL_INHERIT))
+ header->flags |= SVE_PT_VL_INHERIT;
+ fpsimd_only = false;
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ return;
+ }
+
+ if (active) {
+ if (fpsimd_only) {
+ header->flags |= SVE_PT_REGS_FPSIMD;
+ } else {
+ header->flags |= SVE_PT_REGS_SVE;
+ }
+ }
- header->vl = target->thread.sve_vl;
+ header->vl = task_get_vl(target, type);
vq = sve_vq_from_vl(header->vl);
- header->max_vl = sve_max_vl;
+ header->max_vl = vec_max_vl(type);
header->size = SVE_PT_SIZE(vq, header->flags);
header->max_size = SVE_PT_SIZE(sve_vq_from_vl(header->max_vl),
SVE_PT_REGS_SVE);
@@ -768,99 +779,79 @@ static unsigned int sve_size_from_header(struct user_sve_header const *header)
return ALIGN(header->size, SVE_VQ_BYTES);
}
-static unsigned int sve_get_size(struct task_struct *target,
- const struct user_regset *regset)
-{
- struct user_sve_header header;
-
- if (!system_supports_sve())
- return 0;
-
- sve_init_header_from_task(&header, target);
- return sve_size_from_header(&header);
-}
-
-static int sve_get(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
+static int sve_get_common(struct task_struct *target,
+ const struct user_regset *regset,
+ struct membuf to,
+ enum vec_type type)
{
- int ret;
struct user_sve_header header;
unsigned int vq;
unsigned long start, end;
- if (!system_supports_sve())
- return -EINVAL;
-
/* Header */
- sve_init_header_from_task(&header, target);
+ sve_init_header_from_task(&header, target, type);
vq = sve_vq_from_vl(header.vl);
- ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &header,
- 0, sizeof(header));
- if (ret)
- return ret;
+ membuf_write(&to, &header, sizeof(header));
if (target == current)
fpsimd_preserve_current_state();
- /* Registers: FPSIMD-only case */
-
BUILD_BUG_ON(SVE_PT_FPSIMD_OFFSET != sizeof(header));
- if ((header.flags & SVE_PT_REGS_MASK) == SVE_PT_REGS_FPSIMD)
- return __fpr_get(target, regset, pos, count, kbuf, ubuf,
- SVE_PT_FPSIMD_OFFSET);
+ BUILD_BUG_ON(SVE_PT_SVE_OFFSET != sizeof(header));
- /* Otherwise: full SVE case */
+ switch ((header.flags & SVE_PT_REGS_MASK)) {
+ case SVE_PT_REGS_FPSIMD:
+ return __fpr_get(target, regset, to);
- BUILD_BUG_ON(SVE_PT_SVE_OFFSET != sizeof(header));
- start = SVE_PT_SVE_OFFSET;
- end = SVE_PT_SVE_FFR_OFFSET(vq) + SVE_PT_SVE_FFR_SIZE(vq);
- ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- target->thread.sve_state,
- start, end);
- if (ret)
- return ret;
+ case SVE_PT_REGS_SVE:
+ start = SVE_PT_SVE_OFFSET;
+ end = SVE_PT_SVE_FFR_OFFSET(vq) + SVE_PT_SVE_FFR_SIZE(vq);
+ membuf_write(&to, target->thread.sve_state, end - start);
- start = end;
- end = SVE_PT_SVE_FPSR_OFFSET(vq);
- ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf,
- start, end);
- if (ret)
- return ret;
+ start = end;
+ end = SVE_PT_SVE_FPSR_OFFSET(vq);
+ membuf_zero(&to, end - start);
- /*
- * Copy fpsr, and fpcr which must follow contiguously in
- * struct fpsimd_state:
- */
- start = end;
- end = SVE_PT_SVE_FPCR_OFFSET(vq) + SVE_PT_SVE_FPCR_SIZE;
- ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- &target->thread.uw.fpsimd_state.fpsr,
- start, end);
- if (ret)
- return ret;
+ /*
+ * Copy fpsr, and fpcr which must follow contiguously in
+ * struct fpsimd_state:
+ */
+ start = end;
+ end = SVE_PT_SVE_FPCR_OFFSET(vq) + SVE_PT_SVE_FPCR_SIZE;
+ membuf_write(&to, &target->thread.uw.fpsimd_state.fpsr,
+ end - start);
- start = end;
- end = sve_size_from_header(&header);
- return user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf,
- start, end);
+ start = end;
+ end = sve_size_from_header(&header);
+ return membuf_zero(&to, end - start);
+
+ default:
+ return 0;
+ }
}
-static int sve_set(struct task_struct *target,
+static int sve_get(struct task_struct *target,
const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- const void *kbuf, const void __user *ubuf)
+ struct membuf to)
+{
+ if (!system_supports_sve())
+ return -EINVAL;
+
+ return sve_get_common(target, regset, to, ARM64_VEC_SVE);
+}
+
+static int sve_set_common(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf,
+ enum vec_type type)
{
int ret;
struct user_sve_header header;
unsigned int vq;
unsigned long start, end;
- if (!system_supports_sve())
- return -EINVAL;
-
/* Header */
if (count < sizeof(header))
return -EINVAL;
@@ -871,15 +862,47 @@ static int sve_set(struct task_struct *target,
/*
* Apart from SVE_PT_REGS_MASK, all SVE_PT_* flags are consumed by
- * sve_set_vector_length(), which will also validate them for us:
+ * vec_set_vector_length(), which will also validate them for us:
*/
- ret = sve_set_vector_length(target, header.vl,
+ ret = vec_set_vector_length(target, type, header.vl,
((unsigned long)header.flags & ~SVE_PT_REGS_MASK) << 16);
if (ret)
goto out;
/* Actual VL set may be less than the user asked for: */
- vq = sve_vq_from_vl(target->thread.sve_vl);
+ vq = sve_vq_from_vl(task_get_vl(target, type));
+
+ /* Enter/exit streaming mode */
+ if (system_supports_sme()) {
+ u64 old_svcr = target->thread.svcr;
+
+ switch (type) {
+ case ARM64_VEC_SVE:
+ target->thread.svcr &= ~SVCR_SM_MASK;
+ break;
+ case ARM64_VEC_SME:
+ target->thread.svcr |= SVCR_SM_MASK;
+
+ /*
+ * Disable traps and ensure there is SME storage but
+ * preserve any currently set values in ZA/ZT.
+ */
+ sme_alloc(target, false);
+ set_tsk_thread_flag(target, TIF_SME);
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ ret = -EINVAL;
+ goto out;
+ }
+
+ /*
+ * If we switched then invalidate any existing SVE
+ * state and ensure there's storage.
+ */
+ if (target->thread.svcr != old_svcr)
+ sve_alloc(target, true);
+ }
/* Registers: FPSIMD-only case */
@@ -888,10 +911,14 @@ static int sve_set(struct task_struct *target,
ret = __fpr_set(target, regset, pos, count, kbuf, ubuf,
SVE_PT_FPSIMD_OFFSET);
clear_tsk_thread_flag(target, TIF_SVE);
+ target->thread.fp_type = FP_STATE_FPSIMD;
goto out;
}
- /* Otherwise: full SVE case */
+ /*
+ * Otherwise: no registers or full SVE case. For backwards
+ * compatibility reasons we treat empty flags as SVE registers.
+ */
/*
* If setting a different VL from the requested VL and there is
@@ -903,15 +930,25 @@ static int sve_set(struct task_struct *target,
goto out;
}
- sve_alloc(target);
+ sve_alloc(target, true);
+ if (!target->thread.sve_state) {
+ ret = -ENOMEM;
+ clear_tsk_thread_flag(target, TIF_SVE);
+ target->thread.fp_type = FP_STATE_FPSIMD;
+ goto out;
+ }
/*
* Ensure target->thread.sve_state is up to date with target's
- * FPSIMD regs, so that a short copyin leaves trailing registers
- * unmodified.
+ * FPSIMD regs, so that a short copyin leaves trailing
+ * registers unmodified. Only enable SVE if we are
+ * configuring normal SVE, a system with streaming SVE may not
+ * have normal SVE.
*/
fpsimd_sync_to_sve(target);
- set_tsk_thread_flag(target, TIF_SVE);
+ if (type == ARM64_VEC_SVE)
+ set_tsk_thread_flag(target, TIF_SVE);
+ target->thread.fp_type = FP_STATE_SVE;
BUILD_BUG_ON(SVE_PT_SVE_OFFSET != sizeof(header));
start = SVE_PT_SVE_OFFSET;
@@ -924,10 +961,7 @@ static int sve_set(struct task_struct *target,
start = end;
end = SVE_PT_SVE_FPSR_OFFSET(vq);
- ret = user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf,
- start, end);
- if (ret)
- goto out;
+ user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf, start, end);
/*
* Copy fpsr, and fpcr which must follow contiguously in
@@ -944,13 +978,238 @@ out:
return ret;
}
+static int sve_set(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ if (!system_supports_sve())
+ return -EINVAL;
+
+ return sve_set_common(target, regset, pos, count, kbuf, ubuf,
+ ARM64_VEC_SVE);
+}
+
#endif /* CONFIG_ARM64_SVE */
+#ifdef CONFIG_ARM64_SME
+
+static int ssve_get(struct task_struct *target,
+ const struct user_regset *regset,
+ struct membuf to)
+{
+ if (!system_supports_sme())
+ return -EINVAL;
+
+ return sve_get_common(target, regset, to, ARM64_VEC_SME);
+}
+
+static int ssve_set(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ if (!system_supports_sme())
+ return -EINVAL;
+
+ return sve_set_common(target, regset, pos, count, kbuf, ubuf,
+ ARM64_VEC_SME);
+}
+
+static int za_get(struct task_struct *target,
+ const struct user_regset *regset,
+ struct membuf to)
+{
+ struct user_za_header header;
+ unsigned int vq;
+ unsigned long start, end;
+
+ if (!system_supports_sme())
+ return -EINVAL;
+
+ /* Header */
+ memset(&header, 0, sizeof(header));
+
+ if (test_tsk_thread_flag(target, TIF_SME_VL_INHERIT))
+ header.flags |= ZA_PT_VL_INHERIT;
+
+ header.vl = task_get_sme_vl(target);
+ vq = sve_vq_from_vl(header.vl);
+ header.max_vl = sme_max_vl();
+ header.max_size = ZA_PT_SIZE(vq);
+
+ /* If ZA is not active there is only the header */
+ if (thread_za_enabled(&target->thread))
+ header.size = ZA_PT_SIZE(vq);
+ else
+ header.size = ZA_PT_ZA_OFFSET;
+
+ membuf_write(&to, &header, sizeof(header));
+
+ BUILD_BUG_ON(ZA_PT_ZA_OFFSET != sizeof(header));
+ end = ZA_PT_ZA_OFFSET;
+
+ if (target == current)
+ fpsimd_preserve_current_state();
+
+ /* Any register data to include? */
+ if (thread_za_enabled(&target->thread)) {
+ start = end;
+ end = ZA_PT_SIZE(vq);
+ membuf_write(&to, target->thread.sme_state, end - start);
+ }
+
+ /* Zero any trailing padding */
+ start = end;
+ end = ALIGN(header.size, SVE_VQ_BYTES);
+ return membuf_zero(&to, end - start);
+}
+
+static int za_set(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ int ret;
+ struct user_za_header header;
+ unsigned int vq;
+ unsigned long start, end;
+
+ if (!system_supports_sme())
+ return -EINVAL;
+
+ /* Header */
+ if (count < sizeof(header))
+ return -EINVAL;
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &header,
+ 0, sizeof(header));
+ if (ret)
+ goto out;
+
+ /*
+ * All current ZA_PT_* flags are consumed by
+ * vec_set_vector_length(), which will also validate them for
+ * us:
+ */
+ ret = vec_set_vector_length(target, ARM64_VEC_SME, header.vl,
+ ((unsigned long)header.flags) << 16);
+ if (ret)
+ goto out;
+
+ /* Actual VL set may be less than the user asked for: */
+ vq = sve_vq_from_vl(task_get_sme_vl(target));
+
+ /* Ensure there is some SVE storage for streaming mode */
+ if (!target->thread.sve_state) {
+ sve_alloc(target, false);
+ if (!target->thread.sve_state) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ }
+
+ /*
+ * Only flush the storage if PSTATE.ZA was not already set,
+ * otherwise preserve any existing data.
+ */
+ sme_alloc(target, !thread_za_enabled(&target->thread));
+ if (!target->thread.sme_state)
+ return -ENOMEM;
+
+ /* If there is no data then disable ZA */
+ if (!count) {
+ target->thread.svcr &= ~SVCR_ZA_MASK;
+ goto out;
+ }
+
+ /*
+ * If setting a different VL from the requested VL and there is
+ * register data, the data layout will be wrong: don't even
+ * try to set the registers in this case.
+ */
+ if (vq != sve_vq_from_vl(header.vl)) {
+ ret = -EIO;
+ goto out;
+ }
+
+ BUILD_BUG_ON(ZA_PT_ZA_OFFSET != sizeof(header));
+ start = ZA_PT_ZA_OFFSET;
+ end = ZA_PT_SIZE(vq);
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ target->thread.sme_state,
+ start, end);
+ if (ret)
+ goto out;
+
+ /* Mark ZA as active and let userspace use it */
+ set_tsk_thread_flag(target, TIF_SME);
+ target->thread.svcr |= SVCR_ZA_MASK;
+
+out:
+ fpsimd_flush_task_state(target);
+ return ret;
+}
+
+static int zt_get(struct task_struct *target,
+ const struct user_regset *regset,
+ struct membuf to)
+{
+ if (!system_supports_sme2())
+ return -EINVAL;
+
+ /*
+ * If PSTATE.ZA is not set then ZT will be zeroed when it is
+ * enabled so report the current register value as zero.
+ */
+ if (thread_za_enabled(&target->thread))
+ membuf_write(&to, thread_zt_state(&target->thread),
+ ZT_SIG_REG_BYTES);
+ else
+ membuf_zero(&to, ZT_SIG_REG_BYTES);
+
+ return 0;
+}
+
+static int zt_set(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ int ret;
+
+ if (!system_supports_sme2())
+ return -EINVAL;
+
+ /* Ensure SVE storage in case this is first use of SME */
+ sve_alloc(target, false);
+ if (!target->thread.sve_state)
+ return -ENOMEM;
+
+ if (!thread_za_enabled(&target->thread)) {
+ sme_alloc(target, true);
+ if (!target->thread.sme_state)
+ return -ENOMEM;
+ }
+
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ thread_zt_state(&target->thread),
+ 0, ZT_SIG_REG_BYTES);
+ if (ret == 0) {
+ target->thread.svcr |= SVCR_ZA_MASK;
+ set_tsk_thread_flag(target, TIF_SME);
+ }
+
+ fpsimd_flush_task_state(target);
+
+ return ret;
+}
+
+#endif /* CONFIG_ARM64_SME */
+
#ifdef CONFIG_ARM64_PTR_AUTH
static int pac_mask_get(struct task_struct *target,
const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
+ struct membuf to)
{
/*
* The PAC bits can differ across data and instruction pointers
@@ -966,7 +1225,39 @@ static int pac_mask_get(struct task_struct *target,
if (!system_supports_address_auth())
return -EINVAL;
- return user_regset_copyout(&pos, &count, &kbuf, &ubuf, &uregs, 0, -1);
+ return membuf_write(&to, &uregs, sizeof(uregs));
+}
+
+static int pac_enabled_keys_get(struct task_struct *target,
+ const struct user_regset *regset,
+ struct membuf to)
+{
+ long enabled_keys = ptrauth_get_enabled_keys(target);
+
+ if (IS_ERR_VALUE(enabled_keys))
+ return enabled_keys;
+
+ return membuf_write(&to, &enabled_keys, sizeof(enabled_keys));
+}
+
+static int pac_enabled_keys_set(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ int ret;
+ long enabled_keys = ptrauth_get_enabled_keys(target);
+
+ if (IS_ERR_VALUE(enabled_keys))
+ return enabled_keys;
+
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &enabled_keys, 0,
+ sizeof(long));
+ if (ret)
+ return ret;
+
+ return ptrauth_set_enabled_keys(target, PR_PAC_ENABLED_KEYS_MASK,
+ enabled_keys);
}
#ifdef CONFIG_CHECKPOINT_RESTORE
@@ -986,7 +1277,7 @@ static struct ptrauth_key pac_key_from_user(__uint128_t ukey)
}
static void pac_address_keys_to_user(struct user_pac_address_keys *ukeys,
- const struct ptrauth_keys *keys)
+ const struct ptrauth_keys_user *keys)
{
ukeys->apiakey = pac_key_to_user(&keys->apia);
ukeys->apibkey = pac_key_to_user(&keys->apib);
@@ -994,7 +1285,7 @@ static void pac_address_keys_to_user(struct user_pac_address_keys *ukeys,
ukeys->apdbkey = pac_key_to_user(&keys->apdb);
}
-static void pac_address_keys_from_user(struct ptrauth_keys *keys,
+static void pac_address_keys_from_user(struct ptrauth_keys_user *keys,
const struct user_pac_address_keys *ukeys)
{
keys->apia = pac_key_from_user(ukeys->apiakey);
@@ -1005,10 +1296,9 @@ static void pac_address_keys_from_user(struct ptrauth_keys *keys,
static int pac_address_keys_get(struct task_struct *target,
const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
+ struct membuf to)
{
- struct ptrauth_keys *keys = &target->thread.keys_user;
+ struct ptrauth_keys_user *keys = &target->thread.keys_user;
struct user_pac_address_keys user_keys;
if (!system_supports_address_auth())
@@ -1016,8 +1306,7 @@ static int pac_address_keys_get(struct task_struct *target,
pac_address_keys_to_user(&user_keys, keys);
- return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- &user_keys, 0, -1);
+ return membuf_write(&to, &user_keys, sizeof(user_keys));
}
static int pac_address_keys_set(struct task_struct *target,
@@ -1025,7 +1314,7 @@ static int pac_address_keys_set(struct task_struct *target,
unsigned int pos, unsigned int count,
const void *kbuf, const void __user *ubuf)
{
- struct ptrauth_keys *keys = &target->thread.keys_user;
+ struct ptrauth_keys_user *keys = &target->thread.keys_user;
struct user_pac_address_keys user_keys;
int ret;
@@ -1043,12 +1332,12 @@ static int pac_address_keys_set(struct task_struct *target,
}
static void pac_generic_keys_to_user(struct user_pac_generic_keys *ukeys,
- const struct ptrauth_keys *keys)
+ const struct ptrauth_keys_user *keys)
{
ukeys->apgakey = pac_key_to_user(&keys->apga);
}
-static void pac_generic_keys_from_user(struct ptrauth_keys *keys,
+static void pac_generic_keys_from_user(struct ptrauth_keys_user *keys,
const struct user_pac_generic_keys *ukeys)
{
keys->apga = pac_key_from_user(ukeys->apgakey);
@@ -1056,10 +1345,9 @@ static void pac_generic_keys_from_user(struct ptrauth_keys *keys,
static int pac_generic_keys_get(struct task_struct *target,
const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
+ struct membuf to)
{
- struct ptrauth_keys *keys = &target->thread.keys_user;
+ struct ptrauth_keys_user *keys = &target->thread.keys_user;
struct user_pac_generic_keys user_keys;
if (!system_supports_generic_auth())
@@ -1067,8 +1355,7 @@ static int pac_generic_keys_get(struct task_struct *target,
pac_generic_keys_to_user(&user_keys, keys);
- return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- &user_keys, 0, -1);
+ return membuf_write(&to, &user_keys, sizeof(user_keys));
}
static int pac_generic_keys_set(struct task_struct *target,
@@ -1076,7 +1363,7 @@ static int pac_generic_keys_set(struct task_struct *target,
unsigned int pos, unsigned int count,
const void *kbuf, const void __user *ubuf)
{
- struct ptrauth_keys *keys = &target->thread.keys_user;
+ struct ptrauth_keys_user *keys = &target->thread.keys_user;
struct user_pac_generic_keys user_keys;
int ret;
@@ -1095,6 +1382,35 @@ static int pac_generic_keys_set(struct task_struct *target,
#endif /* CONFIG_CHECKPOINT_RESTORE */
#endif /* CONFIG_ARM64_PTR_AUTH */
+#ifdef CONFIG_ARM64_TAGGED_ADDR_ABI
+static int tagged_addr_ctrl_get(struct task_struct *target,
+ const struct user_regset *regset,
+ struct membuf to)
+{
+ long ctrl = get_tagged_addr_ctrl(target);
+
+ if (IS_ERR_VALUE(ctrl))
+ return ctrl;
+
+ return membuf_write(&to, &ctrl, sizeof(ctrl));
+}
+
+static int tagged_addr_ctrl_set(struct task_struct *target, const struct
+ user_regset *regset, unsigned int pos,
+ unsigned int count, const void *kbuf, const
+ void __user *ubuf)
+{
+ int ret;
+ long ctrl;
+
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &ctrl, 0, -1);
+ if (ret)
+ return ret;
+
+ return set_tagged_addr_ctrl(target, ctrl);
+}
+#endif
+
enum aarch64_regset {
REGSET_GPR,
REGSET_FPR,
@@ -1107,13 +1423,22 @@ enum aarch64_regset {
#ifdef CONFIG_ARM64_SVE
REGSET_SVE,
#endif
+#ifdef CONFIG_ARM64_SME
+ REGSET_SSVE,
+ REGSET_ZA,
+ REGSET_ZT,
+#endif
#ifdef CONFIG_ARM64_PTR_AUTH
REGSET_PAC_MASK,
+ REGSET_PAC_ENABLED_KEYS,
#ifdef CONFIG_CHECKPOINT_RESTORE
REGSET_PACA_KEYS,
REGSET_PACG_KEYS,
#endif
#endif
+#ifdef CONFIG_ARM64_TAGGED_ADDR_ABI
+ REGSET_TAGGED_ADDR_CTRL,
+#endif
};
static const struct user_regset aarch64_regsets[] = {
@@ -1122,7 +1447,7 @@ static const struct user_regset aarch64_regsets[] = {
.n = sizeof(struct user_pt_regs) / sizeof(u64),
.size = sizeof(u64),
.align = sizeof(u64),
- .get = gpr_get,
+ .regset_get = gpr_get,
.set = gpr_set
},
[REGSET_FPR] = {
@@ -1134,15 +1459,16 @@ static const struct user_regset aarch64_regsets[] = {
*/
.size = sizeof(u32),
.align = sizeof(u32),
- .get = fpr_get,
+ .active = fpr_active,
+ .regset_get = fpr_get,
.set = fpr_set
},
[REGSET_TLS] = {
.core_note_type = NT_ARM_TLS,
- .n = 1,
+ .n = 2,
.size = sizeof(void *),
.align = sizeof(void *),
- .get = tls_get,
+ .regset_get = tls_get,
.set = tls_set,
},
#ifdef CONFIG_HAVE_HW_BREAKPOINT
@@ -1151,7 +1477,7 @@ static const struct user_regset aarch64_regsets[] = {
.n = sizeof(struct user_hwdebug_state) / sizeof(u32),
.size = sizeof(u32),
.align = sizeof(u32),
- .get = hw_break_get,
+ .regset_get = hw_break_get,
.set = hw_break_set,
},
[REGSET_HW_WATCH] = {
@@ -1159,7 +1485,7 @@ static const struct user_regset aarch64_regsets[] = {
.n = sizeof(struct user_hwdebug_state) / sizeof(u32),
.size = sizeof(u32),
.align = sizeof(u32),
- .get = hw_break_get,
+ .regset_get = hw_break_get,
.set = hw_break_set,
},
#endif
@@ -1168,7 +1494,7 @@ static const struct user_regset aarch64_regsets[] = {
.n = 1,
.size = sizeof(int),
.align = sizeof(int),
- .get = system_call_get,
+ .regset_get = system_call_get,
.set = system_call_set,
},
#ifdef CONFIG_ARM64_SVE
@@ -1178,9 +1504,43 @@ static const struct user_regset aarch64_regsets[] = {
SVE_VQ_BYTES),
.size = SVE_VQ_BYTES,
.align = SVE_VQ_BYTES,
- .get = sve_get,
+ .regset_get = sve_get,
.set = sve_set,
- .get_size = sve_get_size,
+ },
+#endif
+#ifdef CONFIG_ARM64_SME
+ [REGSET_SSVE] = { /* Streaming mode SVE */
+ .core_note_type = NT_ARM_SSVE,
+ .n = DIV_ROUND_UP(SVE_PT_SIZE(SME_VQ_MAX, SVE_PT_REGS_SVE),
+ SVE_VQ_BYTES),
+ .size = SVE_VQ_BYTES,
+ .align = SVE_VQ_BYTES,
+ .regset_get = ssve_get,
+ .set = ssve_set,
+ },
+ [REGSET_ZA] = { /* SME ZA */
+ .core_note_type = NT_ARM_ZA,
+ /*
+ * ZA is a single register but it's variably sized and
+ * the ptrace core requires that the size of any data
+ * be an exact multiple of the configured register
+ * size so report as though we had SVE_VQ_BYTES
+ * registers. These values aren't exposed to
+ * userspace.
+ */
+ .n = DIV_ROUND_UP(ZA_PT_SIZE(SME_VQ_MAX), SVE_VQ_BYTES),
+ .size = SVE_VQ_BYTES,
+ .align = SVE_VQ_BYTES,
+ .regset_get = za_get,
+ .set = za_set,
+ },
+ [REGSET_ZT] = { /* SME ZT */
+ .core_note_type = NT_ARM_ZT,
+ .n = 1,
+ .size = ZT_SIG_REG_BYTES,
+ .align = sizeof(u64),
+ .regset_get = zt_get,
+ .set = zt_set,
},
#endif
#ifdef CONFIG_ARM64_PTR_AUTH
@@ -1189,16 +1549,24 @@ static const struct user_regset aarch64_regsets[] = {
.n = sizeof(struct user_pac_mask) / sizeof(u64),
.size = sizeof(u64),
.align = sizeof(u64),
- .get = pac_mask_get,
+ .regset_get = pac_mask_get,
/* this cannot be set dynamically */
},
+ [REGSET_PAC_ENABLED_KEYS] = {
+ .core_note_type = NT_ARM_PAC_ENABLED_KEYS,
+ .n = 1,
+ .size = sizeof(long),
+ .align = sizeof(long),
+ .regset_get = pac_enabled_keys_get,
+ .set = pac_enabled_keys_set,
+ },
#ifdef CONFIG_CHECKPOINT_RESTORE
[REGSET_PACA_KEYS] = {
.core_note_type = NT_ARM_PACA_KEYS,
.n = sizeof(struct user_pac_address_keys) / sizeof(__uint128_t),
.size = sizeof(__uint128_t),
.align = sizeof(__uint128_t),
- .get = pac_address_keys_get,
+ .regset_get = pac_address_keys_get,
.set = pac_address_keys_set,
},
[REGSET_PACG_KEYS] = {
@@ -1206,11 +1574,21 @@ static const struct user_regset aarch64_regsets[] = {
.n = sizeof(struct user_pac_generic_keys) / sizeof(__uint128_t),
.size = sizeof(__uint128_t),
.align = sizeof(__uint128_t),
- .get = pac_generic_keys_get,
+ .regset_get = pac_generic_keys_get,
.set = pac_generic_keys_set,
},
#endif
#endif
+#ifdef CONFIG_ARM64_TAGGED_ADDR_ABI
+ [REGSET_TAGGED_ADDR_CTRL] = {
+ .core_note_type = NT_ARM_TAGGED_ADDR_CTRL,
+ .n = 1,
+ .size = sizeof(long),
+ .align = sizeof(long),
+ .regset_get = tagged_addr_ctrl_get,
+ .set = tagged_addr_ctrl_set,
+ },
+#endif
};
static const struct user_regset_view user_aarch64_view = {
@@ -1224,57 +1602,31 @@ enum compat_regset {
REGSET_COMPAT_VFP,
};
-static int compat_gpr_get(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
+static inline compat_ulong_t compat_get_user_reg(struct task_struct *task, int idx)
{
- int ret = 0;
- unsigned int i, start, num_regs;
-
- /* Calculate the number of AArch32 registers contained in count */
- num_regs = count / regset->size;
-
- /* Convert pos into an register number */
- start = pos / regset->size;
-
- if (start + num_regs > regset->n)
- return -EIO;
+ struct pt_regs *regs = task_pt_regs(task);
- for (i = 0; i < num_regs; ++i) {
- unsigned int idx = start + i;
- compat_ulong_t reg;
-
- switch (idx) {
- case 15:
- reg = task_pt_regs(target)->pc;
- break;
- case 16:
- reg = task_pt_regs(target)->pstate;
- reg = pstate_to_compat_psr(reg);
- break;
- case 17:
- reg = task_pt_regs(target)->orig_x0;
- break;
- default:
- reg = task_pt_regs(target)->regs[idx];
- }
-
- if (kbuf) {
- memcpy(kbuf, &reg, sizeof(reg));
- kbuf += sizeof(reg);
- } else {
- ret = copy_to_user(ubuf, &reg, sizeof(reg));
- if (ret) {
- ret = -EFAULT;
- break;
- }
-
- ubuf += sizeof(reg);
- }
+ switch (idx) {
+ case 15:
+ return regs->pc;
+ case 16:
+ return pstate_to_compat_psr(regs->pstate);
+ case 17:
+ return regs->orig_x0;
+ default:
+ return regs->regs[idx];
}
+}
- return ret;
+static int compat_gpr_get(struct task_struct *target,
+ const struct user_regset *regset,
+ struct membuf to)
+{
+ int i = 0;
+
+ while (to.left)
+ membuf_store(&to, compat_get_user_reg(target, i++));
+ return 0;
}
static int compat_gpr_set(struct task_struct *target,
@@ -1341,12 +1693,13 @@ static int compat_gpr_set(struct task_struct *target,
static int compat_vfp_get(struct task_struct *target,
const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
+ struct membuf to)
{
struct user_fpsimd_state *uregs;
compat_ulong_t fpscr;
- int ret, vregs_end_pos;
+
+ if (!system_supports_fpsimd())
+ return -EINVAL;
uregs = &target->thread.uw.fpsimd_state;
@@ -1357,19 +1710,10 @@ static int compat_vfp_get(struct task_struct *target,
* The VFP registers are packed into the fpsimd_state, so they all sit
* nicely together for us. We just need to create the fpscr separately.
*/
- vregs_end_pos = VFP_STATE_SIZE - sizeof(compat_ulong_t);
- ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, uregs,
- 0, vregs_end_pos);
-
- if (count && !ret) {
- fpscr = (uregs->fpsr & VFP_FPSCR_STAT_MASK) |
- (uregs->fpcr & VFP_FPSCR_CTRL_MASK);
-
- ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &fpscr,
- vregs_end_pos, VFP_STATE_SIZE);
- }
-
- return ret;
+ membuf_write(&to, uregs, VFP_STATE_SIZE - sizeof(compat_ulong_t));
+ fpscr = (uregs->fpsr & VFP_FPSCR_STAT_MASK) |
+ (uregs->fpcr & VFP_FPSCR_CTRL_MASK);
+ return membuf_store(&to, fpscr);
}
static int compat_vfp_set(struct task_struct *target,
@@ -1381,6 +1725,9 @@ static int compat_vfp_set(struct task_struct *target,
compat_ulong_t fpscr;
int ret, vregs_end_pos;
+ if (!system_supports_fpsimd())
+ return -EINVAL;
+
uregs = &target->thread.uw.fpsimd_state;
vregs_end_pos = VFP_STATE_SIZE - sizeof(compat_ulong_t);
@@ -1401,11 +1748,10 @@ static int compat_vfp_set(struct task_struct *target,
}
static int compat_tls_get(struct task_struct *target,
- const struct user_regset *regset, unsigned int pos,
- unsigned int count, void *kbuf, void __user *ubuf)
+ const struct user_regset *regset,
+ struct membuf to)
{
- compat_ulong_t tls = (compat_ulong_t)target->thread.uw.tp_value;
- return user_regset_copyout(&pos, &count, &kbuf, &ubuf, &tls, 0, -1);
+ return membuf_store(&to, (compat_ulong_t)target->thread.uw.tp_value);
}
static int compat_tls_set(struct task_struct *target,
@@ -1430,7 +1776,7 @@ static const struct user_regset aarch32_regsets[] = {
.n = COMPAT_ELF_NGREG,
.size = sizeof(compat_elf_greg_t),
.align = sizeof(compat_elf_greg_t),
- .get = compat_gpr_get,
+ .regset_get = compat_gpr_get,
.set = compat_gpr_set
},
[REGSET_COMPAT_VFP] = {
@@ -1438,7 +1784,8 @@ static const struct user_regset aarch32_regsets[] = {
.n = VFP_STATE_SIZE / sizeof(compat_ulong_t),
.size = sizeof(compat_ulong_t),
.align = sizeof(compat_ulong_t),
- .get = compat_vfp_get,
+ .active = fpr_active,
+ .regset_get = compat_vfp_get,
.set = compat_vfp_set
},
};
@@ -1454,7 +1801,7 @@ static const struct user_regset aarch32_ptrace_regsets[] = {
.n = COMPAT_ELF_NGREG,
.size = sizeof(compat_elf_greg_t),
.align = sizeof(compat_elf_greg_t),
- .get = compat_gpr_get,
+ .regset_get = compat_gpr_get,
.set = compat_gpr_set
},
[REGSET_FPR] = {
@@ -1462,7 +1809,7 @@ static const struct user_regset aarch32_ptrace_regsets[] = {
.n = VFP_STATE_SIZE / sizeof(compat_ulong_t),
.size = sizeof(compat_ulong_t),
.align = sizeof(compat_ulong_t),
- .get = compat_vfp_get,
+ .regset_get = compat_vfp_get,
.set = compat_vfp_set
},
[REGSET_TLS] = {
@@ -1470,7 +1817,7 @@ static const struct user_regset aarch32_ptrace_regsets[] = {
.n = 1,
.size = sizeof(compat_ulong_t),
.align = sizeof(compat_ulong_t),
- .get = compat_tls_get,
+ .regset_get = compat_tls_get,
.set = compat_tls_set,
},
#ifdef CONFIG_HAVE_HW_BREAKPOINT
@@ -1479,7 +1826,7 @@ static const struct user_regset aarch32_ptrace_regsets[] = {
.n = sizeof(struct user_hwdebug_state) / sizeof(u32),
.size = sizeof(u32),
.align = sizeof(u32),
- .get = hw_break_get,
+ .regset_get = hw_break_get,
.set = hw_break_set,
},
[REGSET_HW_WATCH] = {
@@ -1487,7 +1834,7 @@ static const struct user_regset aarch32_ptrace_regsets[] = {
.n = sizeof(struct user_hwdebug_state) / sizeof(u32),
.size = sizeof(u32),
.align = sizeof(u32),
- .get = hw_break_get,
+ .regset_get = hw_break_get,
.set = hw_break_set,
},
#endif
@@ -1496,7 +1843,7 @@ static const struct user_regset aarch32_ptrace_regsets[] = {
.n = 1,
.size = sizeof(int),
.align = sizeof(int),
- .get = system_call_get,
+ .regset_get = system_call_get,
.set = system_call_set,
},
};
@@ -1521,9 +1868,7 @@ static int compat_ptrace_read_user(struct task_struct *tsk, compat_ulong_t off,
else if (off == COMPAT_PT_TEXT_END_ADDR)
tmp = tsk->mm->end_code;
else if (off < sizeof(compat_elf_gregset_t))
- return copy_regset_to_user(tsk, &user_aarch32_view,
- REGSET_COMPAT_GPR, off,
- sizeof(compat_ulong_t), ret);
+ tmp = compat_get_user_reg(tsk, off >> 2);
else if (off >= COMPAT_USER_SZ)
return -EIO;
else
@@ -1535,8 +1880,8 @@ static int compat_ptrace_read_user(struct task_struct *tsk, compat_ulong_t off,
static int compat_ptrace_write_user(struct task_struct *tsk, compat_ulong_t off,
compat_ulong_t val)
{
- int ret;
- mm_segment_t old_fs = get_fs();
+ struct pt_regs newregs = *task_pt_regs(tsk);
+ unsigned int idx = off / 4;
if (off & 3 || off >= COMPAT_USER_SZ)
return -EIO;
@@ -1544,14 +1889,25 @@ static int compat_ptrace_write_user(struct task_struct *tsk, compat_ulong_t off,
if (off >= sizeof(compat_elf_gregset_t))
return 0;
- set_fs(KERNEL_DS);
- ret = copy_regset_from_user(tsk, &user_aarch32_view,
- REGSET_COMPAT_GPR, off,
- sizeof(compat_ulong_t),
- &val);
- set_fs(old_fs);
+ switch (idx) {
+ case 15:
+ newregs.pc = val;
+ break;
+ case 16:
+ newregs.pstate = compat_psr_to_pstate(val);
+ break;
+ case 17:
+ newregs.orig_x0 = val;
+ break;
+ default:
+ newregs.regs[idx] = val;
+ }
- return ret;
+ if (!valid_user_regs(&newregs.user_regs, tsk))
+ return -EINVAL;
+
+ *task_pt_regs(tsk) = newregs;
+ return 0;
}
#ifdef CONFIG_HAVE_HW_BREAKPOINT
@@ -1776,6 +2132,12 @@ const struct user_regset_view *task_user_regset_view(struct task_struct *task)
long arch_ptrace(struct task_struct *child, long request,
unsigned long addr, unsigned long data)
{
+ switch (request) {
+ case PTRACE_PEEKMTETAGS:
+ case PTRACE_POKEMTETAGS:
+ return mte_ptrace_copy_tags(child, request, addr, data);
+ }
+
return ptrace_request(child, request, addr, data);
}
@@ -1784,40 +2146,63 @@ enum ptrace_syscall_dir {
PTRACE_SYSCALL_EXIT,
};
-static void tracehook_report_syscall(struct pt_regs *regs,
- enum ptrace_syscall_dir dir)
+static void report_syscall(struct pt_regs *regs, enum ptrace_syscall_dir dir)
{
int regno;
unsigned long saved_reg;
/*
- * A scratch register (ip(r12) on AArch32, x7 on AArch64) is
- * used to denote syscall entry/exit:
+ * We have some ABI weirdness here in the way that we handle syscall
+ * exit stops because we indicate whether or not the stop has been
+ * signalled from syscall entry or syscall exit by clobbering a general
+ * purpose register (ip/r12 for AArch32, x7 for AArch64) in the tracee
+ * and restoring its old value after the stop. This means that:
+ *
+ * - Any writes by the tracer to this register during the stop are
+ * ignored/discarded.
+ *
+ * - The actual value of the register is not available during the stop,
+ * so the tracer cannot save it and restore it later.
+ *
+ * - Syscall stops behave differently to seccomp and pseudo-step traps
+ * (the latter do not nobble any registers).
*/
regno = (is_compat_task() ? 12 : 7);
saved_reg = regs->regs[regno];
regs->regs[regno] = dir;
- if (dir == PTRACE_SYSCALL_EXIT)
- tracehook_report_syscall_exit(regs, 0);
- else if (tracehook_report_syscall_entry(regs))
- forget_syscall(regs);
+ if (dir == PTRACE_SYSCALL_ENTER) {
+ if (ptrace_report_syscall_entry(regs))
+ forget_syscall(regs);
+ regs->regs[regno] = saved_reg;
+ } else if (!test_thread_flag(TIF_SINGLESTEP)) {
+ ptrace_report_syscall_exit(regs, 0);
+ regs->regs[regno] = saved_reg;
+ } else {
+ regs->regs[regno] = saved_reg;
- regs->regs[regno] = saved_reg;
+ /*
+ * Signal a pseudo-step exception since we are stepping but
+ * tracer modifications to the registers may have rewound the
+ * state machine.
+ */
+ ptrace_report_syscall_exit(regs, 1);
+ }
}
int syscall_trace_enter(struct pt_regs *regs)
{
- if (test_thread_flag(TIF_SYSCALL_TRACE) ||
- test_thread_flag(TIF_SYSCALL_EMU)) {
- tracehook_report_syscall(regs, PTRACE_SYSCALL_ENTER);
- if (!in_syscall(regs) || test_thread_flag(TIF_SYSCALL_EMU))
- return -1;
+ unsigned long flags = read_thread_flags();
+
+ if (flags & (_TIF_SYSCALL_EMU | _TIF_SYSCALL_TRACE)) {
+ report_syscall(regs, PTRACE_SYSCALL_ENTER);
+ if (flags & _TIF_SYSCALL_EMU)
+ return NO_SYSCALL;
}
/* Do the secure computing after ptrace; failures should be fast. */
if (secure_computing() == -1)
- return -1;
+ return NO_SYSCALL;
if (test_thread_flag(TIF_SYSCALL_TRACEPOINT))
trace_sys_enter(regs, regs->syscallno);
@@ -1830,13 +2215,15 @@ int syscall_trace_enter(struct pt_regs *regs)
void syscall_trace_exit(struct pt_regs *regs)
{
+ unsigned long flags = read_thread_flags();
+
audit_syscall_exit(regs);
- if (test_thread_flag(TIF_SYSCALL_TRACEPOINT))
- trace_sys_exit(regs, regs_return_value(regs));
+ if (flags & _TIF_SYSCALL_TRACEPOINT)
+ trace_sys_exit(regs, syscall_get_return_value(current, regs));
- if (test_thread_flag(TIF_SYSCALL_TRACE))
- tracehook_report_syscall(regs, PTRACE_SYSCALL_EXIT);
+ if (flags & (_TIF_SYSCALL_TRACE | _TIF_SINGLESTEP))
+ report_syscall(regs, PTRACE_SYSCALL_EXIT);
rseq_syscall(regs);
}
@@ -1852,8 +2239,8 @@ void syscall_trace_exit(struct pt_regs *regs)
* We also reserve IL for the kernel; SS is handled dynamically.
*/
#define SPSR_EL1_AARCH64_RES0_BITS \
- (GENMASK_ULL(63, 32) | GENMASK_ULL(27, 25) | GENMASK_ULL(23, 22) | \
- GENMASK_ULL(20, 13) | GENMASK_ULL(11, 10) | GENMASK_ULL(5, 5))
+ (GENMASK_ULL(63, 32) | GENMASK_ULL(27, 26) | GENMASK_ULL(23, 22) | \
+ GENMASK_ULL(20, 13) | GENMASK_ULL(5, 5))
#define SPSR_EL1_AARCH32_RES0_BITS \
(GENMASK_ULL(63, 32) | GENMASK_ULL(22, 22) | GENMASK_ULL(20, 20))
@@ -1913,8 +2300,8 @@ static int valid_native_regs(struct user_pt_regs *regs)
*/
int valid_user_regs(struct user_pt_regs *regs, struct task_struct *task)
{
- if (!test_tsk_thread_flag(task, TIF_SINGLESTEP))
- regs->pstate &= ~DBG_SPSR_SS;
+ /* https://lore.kernel.org/lkml/20191118131525.GA4180@willie-the-truck */
+ user_regs_reset_single_step(regs, task);
if (is_compat_thread(task_thread_info(task)))
return valid_compat_regs(regs);
diff --git a/arch/arm64/kernel/reloc_test_core.c b/arch/arm64/kernel/reloc_test_core.c
index e87a2b7f20f6..99f2ffe9fc05 100644
--- a/arch/arm64/kernel/reloc_test_core.c
+++ b/arch/arm64/kernel/reloc_test_core.c
@@ -48,7 +48,7 @@ static struct {
{ "R_AARCH64_PREL16", relative_data16, (u64)&sym64_rel },
};
-static int reloc_test_init(void)
+static int __init reloc_test_init(void)
{
int i;
@@ -67,7 +67,7 @@ static int reloc_test_init(void)
return 0;
}
-static void reloc_test_exit(void)
+static void __exit reloc_test_exit(void)
{
}
diff --git a/arch/arm64/kernel/reloc_test_syms.S b/arch/arm64/kernel/reloc_test_syms.S
index 16a34f188f26..c50f45fa29fa 100644
--- a/arch/arm64/kernel/reloc_test_syms.S
+++ b/arch/arm64/kernel/reloc_test_syms.S
@@ -5,81 +5,81 @@
#include <linux/linkage.h>
-ENTRY(absolute_data64)
+SYM_FUNC_START(absolute_data64)
ldr x0, 0f
ret
0: .quad sym64_abs
-ENDPROC(absolute_data64)
+SYM_FUNC_END(absolute_data64)
-ENTRY(absolute_data32)
+SYM_FUNC_START(absolute_data32)
ldr w0, 0f
ret
0: .long sym32_abs
-ENDPROC(absolute_data32)
+SYM_FUNC_END(absolute_data32)
-ENTRY(absolute_data16)
+SYM_FUNC_START(absolute_data16)
adr x0, 0f
ldrh w0, [x0]
ret
0: .short sym16_abs, 0
-ENDPROC(absolute_data16)
+SYM_FUNC_END(absolute_data16)
-ENTRY(signed_movw)
+SYM_FUNC_START(signed_movw)
movz x0, #:abs_g2_s:sym64_abs
movk x0, #:abs_g1_nc:sym64_abs
movk x0, #:abs_g0_nc:sym64_abs
ret
-ENDPROC(signed_movw)
+SYM_FUNC_END(signed_movw)
-ENTRY(unsigned_movw)
+SYM_FUNC_START(unsigned_movw)
movz x0, #:abs_g3:sym64_abs
movk x0, #:abs_g2_nc:sym64_abs
movk x0, #:abs_g1_nc:sym64_abs
movk x0, #:abs_g0_nc:sym64_abs
ret
-ENDPROC(unsigned_movw)
+SYM_FUNC_END(unsigned_movw)
.align 12
.space 0xff8
-ENTRY(relative_adrp)
+SYM_FUNC_START(relative_adrp)
adrp x0, sym64_rel
add x0, x0, #:lo12:sym64_rel
ret
-ENDPROC(relative_adrp)
+SYM_FUNC_END(relative_adrp)
.align 12
.space 0xffc
-ENTRY(relative_adrp_far)
+SYM_FUNC_START(relative_adrp_far)
adrp x0, memstart_addr
add x0, x0, #:lo12:memstart_addr
ret
-ENDPROC(relative_adrp_far)
+SYM_FUNC_END(relative_adrp_far)
-ENTRY(relative_adr)
+SYM_FUNC_START(relative_adr)
adr x0, sym64_rel
ret
-ENDPROC(relative_adr)
+SYM_FUNC_END(relative_adr)
-ENTRY(relative_data64)
+SYM_FUNC_START(relative_data64)
adr x1, 0f
ldr x0, [x1]
add x0, x0, x1
ret
0: .quad sym64_rel - .
-ENDPROC(relative_data64)
+SYM_FUNC_END(relative_data64)
-ENTRY(relative_data32)
+SYM_FUNC_START(relative_data32)
adr x1, 0f
ldr w0, [x1]
add x0, x0, x1
ret
0: .long sym64_rel - .
-ENDPROC(relative_data32)
+SYM_FUNC_END(relative_data32)
-ENTRY(relative_data16)
+SYM_FUNC_START(relative_data16)
adr x1, 0f
ldrsh w0, [x1]
add x0, x0, x1
ret
0: .short sym64_rel - ., 0
-ENDPROC(relative_data16)
+SYM_FUNC_END(relative_data16)
diff --git a/arch/arm64/kernel/relocate_kernel.S b/arch/arm64/kernel/relocate_kernel.S
index c1d7db71a726..413f899e4ac6 100644
--- a/arch/arm64/kernel/relocate_kernel.S
+++ b/arch/arm64/kernel/relocate_kernel.S
@@ -4,6 +4,8 @@
*
* Copyright (C) Linaro.
* Copyright (C) Huawei Futurewei Technologies.
+ * Copyright (C) 2021, Microsoft Corporation.
+ * Pasha Tatashin <pasha.tatashin@soleen.com>
*/
#include <linux/kexec.h>
@@ -13,117 +15,86 @@
#include <asm/kexec.h>
#include <asm/page.h>
#include <asm/sysreg.h>
+#include <asm/virt.h>
+.macro turn_off_mmu tmp1, tmp2
+ mov_q \tmp1, INIT_SCTLR_EL1_MMU_OFF
+ pre_disable_mmu_workaround
+ msr sctlr_el1, \tmp1
+ isb
+.endm
+
+.section ".kexec_relocate.text", "ax"
/*
* arm64_relocate_new_kernel - Put a 2nd stage image in place and boot it.
*
- * The memory that the old kernel occupies may be overwritten when coping the
+ * The memory that the old kernel occupies may be overwritten when copying the
* new image to its final location. To assure that the
* arm64_relocate_new_kernel routine which does that copy is not overwritten,
* all code and data needed by arm64_relocate_new_kernel must be between the
* symbols arm64_relocate_new_kernel and arm64_relocate_new_kernel_end. The
* machine_kexec() routine will copy arm64_relocate_new_kernel to the kexec
- * control_code_page, a special page which has been set up to be preserved
- * during the copy operation.
+ * safe memory that has been set up to be preserved during the copy operation.
*/
-ENTRY(arm64_relocate_new_kernel)
+SYM_CODE_START(arm64_relocate_new_kernel)
+ /*
+ * The kimage structure isn't allocated specially and may be clobbered
+ * during relocation. We must load any values we need from it prior to
+ * any relocation occurring.
+ */
+ ldr x28, [x0, #KIMAGE_START]
+ ldr x27, [x0, #KIMAGE_ARCH_EL2_VECTORS]
+ ldr x26, [x0, #KIMAGE_ARCH_DTB_MEM]
/* Setup the list loop variables. */
- mov x18, x2 /* x18 = dtb address */
- mov x17, x1 /* x17 = kimage_start */
- mov x16, x0 /* x16 = kimage_head */
- raw_dcache_line_size x15, x0 /* x15 = dcache line size */
- mov x14, xzr /* x14 = entry ptr */
- mov x13, xzr /* x13 = copy dest */
-
- /* Clear the sctlr_el2 flags. */
- mrs x0, CurrentEL
- cmp x0, #CurrentEL_EL2
- b.ne 1f
- mrs x0, sctlr_el2
- ldr x1, =SCTLR_ELx_FLAGS
- bic x0, x0, x1
- pre_disable_mmu_workaround
- msr sctlr_el2, x0
- isb
-1:
-
- /* Check if the new image needs relocation. */
- tbnz x16, IND_DONE_BIT, .Ldone
-
+ ldr x18, [x0, #KIMAGE_ARCH_ZERO_PAGE] /* x18 = zero page for BBM */
+ ldr x17, [x0, #KIMAGE_ARCH_TTBR1] /* x17 = linear map copy */
+ ldr x16, [x0, #KIMAGE_HEAD] /* x16 = kimage_head */
+ ldr x22, [x0, #KIMAGE_ARCH_PHYS_OFFSET] /* x22 phys_offset */
+ raw_dcache_line_size x15, x1 /* x15 = dcache line size */
+ break_before_make_ttbr_switch x18, x17, x1, x2 /* set linear map */
.Lloop:
and x12, x16, PAGE_MASK /* x12 = addr */
-
+ sub x12, x12, x22 /* Convert x12 to virt */
/* Test the entry flags. */
.Ltest_source:
tbz x16, IND_SOURCE_BIT, .Ltest_indirection
/* Invalidate dest page to PoC. */
- mov x0, x13
- add x20, x0, #PAGE_SIZE
- sub x1, x15, #1
- bic x0, x0, x1
-2: dc ivac, x0
- add x0, x0, x15
- cmp x0, x20
- b.lo 2b
- dsb sy
-
- mov x20, x13
- mov x21, x12
- copy_page x20, x21, x0, x1, x2, x3, x4, x5, x6, x7
-
- /* dest += PAGE_SIZE */
- add x13, x13, PAGE_SIZE
+ mov x19, x13
+ copy_page x13, x12, x1, x2, x3, x4, x5, x6, x7, x8
+ add x1, x19, #PAGE_SIZE
+ dcache_by_myline_op civac, sy, x19, x1, x15, x20
b .Lnext
-
.Ltest_indirection:
tbz x16, IND_INDIRECTION_BIT, .Ltest_destination
-
- /* ptr = addr */
- mov x14, x12
+ mov x14, x12 /* ptr = addr */
b .Lnext
-
.Ltest_destination:
tbz x16, IND_DESTINATION_BIT, .Lnext
-
- /* dest = addr */
- mov x13, x12
-
+ mov x13, x12 /* dest = addr */
.Lnext:
- /* entry = *ptr++ */
- ldr x16, [x14], #8
-
- /* while (!(entry & DONE)) */
- tbz x16, IND_DONE_BIT, .Lloop
-
-.Ldone:
+ ldr x16, [x14], #8 /* entry = *ptr++ */
+ tbz x16, IND_DONE_BIT, .Lloop /* while (!(entry & DONE)) */
/* wait for writes from copy_page to finish */
dsb nsh
ic iallu
dsb nsh
isb
+ turn_off_mmu x12, x13
/* Start new image. */
- mov x0, x18
+ cbz x27, .Lel1
+ mov x1, x28 /* kernel entry point */
+ mov x2, x26 /* dtb address */
+ mov x3, xzr
+ mov x4, xzr
+ mov x0, #HVC_SOFT_RESTART
+ hvc #0 /* Jumps from el2 */
+.Lel1:
+ mov x0, x26 /* dtb address */
mov x1, xzr
mov x2, xzr
mov x3, xzr
- br x17
-
-ENDPROC(arm64_relocate_new_kernel)
-
-.ltorg
-
-.align 3 /* To keep the 64-bit values below naturally aligned. */
-
-.Lcopy_end:
-.org KEXEC_CONTROL_PAGE_SIZE
-
-/*
- * arm64_relocate_new_kernel_size - Number of bytes to copy to the
- * control_code_page.
- */
-.globl arm64_relocate_new_kernel_size
-arm64_relocate_new_kernel_size:
- .quad .Lcopy_end - arm64_relocate_new_kernel
+ br x28 /* Jumps from el1 */
+SYM_CODE_END(arm64_relocate_new_kernel)
diff --git a/arch/arm64/kernel/return_address.c b/arch/arm64/kernel/return_address.c
index a5e8b3b9d798..68330017d04f 100644
--- a/arch/arm64/kernel/return_address.c
+++ b/arch/arm64/kernel/return_address.c
@@ -9,25 +9,25 @@
#include <linux/export.h>
#include <linux/ftrace.h>
#include <linux/kprobes.h>
+#include <linux/stacktrace.h>
#include <asm/stack_pointer.h>
-#include <asm/stacktrace.h>
struct return_address_data {
unsigned int level;
void *addr;
};
-static int save_return_addr(struct stackframe *frame, void *d)
+static bool save_return_addr(void *d, unsigned long pc)
{
struct return_address_data *data = d;
if (!data->level) {
- data->addr = (void *)frame->pc;
- return 1;
+ data->addr = (void *)pc;
+ return false;
} else {
--data->level;
- return 0;
+ return true;
}
}
NOKPROBE_SYMBOL(save_return_addr);
@@ -35,15 +35,11 @@ NOKPROBE_SYMBOL(save_return_addr);
void *return_address(unsigned int level)
{
struct return_address_data data;
- struct stackframe frame;
data.level = level + 2;
data.addr = NULL;
- start_backtrace(&frame,
- (unsigned long)__builtin_frame_address(0),
- (unsigned long)return_address);
- walk_stackframe(current, &frame, save_return_addr, &data);
+ arch_stack_walk(save_return_addr, &data, current, NULL);
if (!data.level)
return data.addr;
diff --git a/arch/arm64/kernel/sdei.c b/arch/arm64/kernel/sdei.c
index d6259dac62b6..255d12f881c2 100644
--- a/arch/arm64/kernel/sdei.c
+++ b/arch/arm64/kernel/sdei.c
@@ -7,9 +7,11 @@
#include <linux/hardirq.h>
#include <linux/irqflags.h>
#include <linux/sched/task_stack.h>
+#include <linux/scs.h>
#include <linux/uaccess.h>
#include <asm/alternative.h>
+#include <asm/exception.h>
#include <asm/kprobes.h>
#include <asm/mmu.h>
#include <asm/ptrace.h>
@@ -37,6 +39,17 @@ DEFINE_PER_CPU(unsigned long *, sdei_stack_normal_ptr);
DEFINE_PER_CPU(unsigned long *, sdei_stack_critical_ptr);
#endif
+DECLARE_PER_CPU(unsigned long *, sdei_shadow_call_stack_normal_ptr);
+DECLARE_PER_CPU(unsigned long *, sdei_shadow_call_stack_critical_ptr);
+
+#ifdef CONFIG_SHADOW_CALL_STACK
+DEFINE_PER_CPU(unsigned long *, sdei_shadow_call_stack_normal_ptr);
+DEFINE_PER_CPU(unsigned long *, sdei_shadow_call_stack_critical_ptr);
+#endif
+
+DEFINE_PER_CPU(struct sdei_registered_event *, sdei_active_normal_event);
+DEFINE_PER_CPU(struct sdei_registered_event *, sdei_active_critical_event);
+
static void _free_sdei_stack(unsigned long * __percpu *ptr, int cpu)
{
unsigned long *p;
@@ -52,6 +65,9 @@ static void free_sdei_stacks(void)
{
int cpu;
+ if (!IS_ENABLED(CONFIG_VMAP_STACK))
+ return;
+
for_each_possible_cpu(cpu) {
_free_sdei_stack(&sdei_stack_normal_ptr, cpu);
_free_sdei_stack(&sdei_stack_critical_ptr, cpu);
@@ -75,6 +91,9 @@ static int init_sdei_stacks(void)
int cpu;
int err = 0;
+ if (!IS_ENABLED(CONFIG_VMAP_STACK))
+ return 0;
+
for_each_possible_cpu(cpu) {
err = _init_sdei_stack(&sdei_stack_normal_ptr, cpu);
if (err)
@@ -90,58 +109,60 @@ static int init_sdei_stacks(void)
return err;
}
-static bool on_sdei_normal_stack(unsigned long sp, struct stack_info *info)
+static void _free_sdei_scs(unsigned long * __percpu *ptr, int cpu)
{
- unsigned long low = (unsigned long)raw_cpu_read(sdei_stack_normal_ptr);
- unsigned long high = low + SDEI_STACK_SIZE;
-
- if (!low)
- return false;
+ void *s;
- if (sp < low || sp >= high)
- return false;
-
- if (info) {
- info->low = low;
- info->high = high;
- info->type = STACK_TYPE_SDEI_NORMAL;
+ s = per_cpu(*ptr, cpu);
+ if (s) {
+ per_cpu(*ptr, cpu) = NULL;
+ scs_free(s);
}
-
- return true;
}
-static bool on_sdei_critical_stack(unsigned long sp, struct stack_info *info)
+static void free_sdei_scs(void)
{
- unsigned long low = (unsigned long)raw_cpu_read(sdei_stack_critical_ptr);
- unsigned long high = low + SDEI_STACK_SIZE;
+ int cpu;
- if (!low)
- return false;
+ for_each_possible_cpu(cpu) {
+ _free_sdei_scs(&sdei_shadow_call_stack_normal_ptr, cpu);
+ _free_sdei_scs(&sdei_shadow_call_stack_critical_ptr, cpu);
+ }
+}
- if (sp < low || sp >= high)
- return false;
+static int _init_sdei_scs(unsigned long * __percpu *ptr, int cpu)
+{
+ void *s;
- if (info) {
- info->low = low;
- info->high = high;
- info->type = STACK_TYPE_SDEI_CRITICAL;
- }
+ s = scs_alloc(cpu_to_node(cpu));
+ if (!s)
+ return -ENOMEM;
+ per_cpu(*ptr, cpu) = s;
- return true;
+ return 0;
}
-bool _on_sdei_stack(unsigned long sp, struct stack_info *info)
+static int init_sdei_scs(void)
{
- if (!IS_ENABLED(CONFIG_VMAP_STACK))
- return false;
+ int cpu;
+ int err = 0;
+
+ if (!scs_is_enabled())
+ return 0;
- if (on_sdei_critical_stack(sp, info))
- return true;
+ for_each_possible_cpu(cpu) {
+ err = _init_sdei_scs(&sdei_shadow_call_stack_normal_ptr, cpu);
+ if (err)
+ break;
+ err = _init_sdei_scs(&sdei_shadow_call_stack_critical_ptr, cpu);
+ if (err)
+ break;
+ }
- if (on_sdei_normal_stack(sp, info))
- return true;
+ if (err)
+ free_sdei_scs();
- return false;
+ return err;
}
unsigned long sdei_arch_get_entry_point(int conduit)
@@ -152,15 +173,16 @@ unsigned long sdei_arch_get_entry_point(int conduit)
* dropped to EL1 because we don't support VHE, then we can't support
* SDEI.
*/
- if (is_hyp_mode_available() && !is_kernel_in_hyp_mode()) {
+ if (is_hyp_nvhe()) {
pr_err("Not supported on this hardware/boot configuration\n");
- return 0;
+ goto out_err;
}
- if (IS_ENABLED(CONFIG_VMAP_STACK)) {
- if (init_sdei_stacks())
- return 0;
- }
+ if (init_sdei_stacks())
+ goto out_err;
+
+ if (init_sdei_scs())
+ goto out_err_free_stacks;
sdei_exit_mode = (conduit == SMCCC_CONDUIT_HVC) ? SDEI_EXIT_HVC : SDEI_EXIT_SMC;
@@ -175,16 +197,20 @@ unsigned long sdei_arch_get_entry_point(int conduit)
#endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */
return (unsigned long)__sdei_asm_handler;
+out_err_free_stacks:
+ free_sdei_stacks();
+out_err:
+ return 0;
}
/*
- * __sdei_handler() returns one of:
+ * do_sdei_event() returns one of:
* SDEI_EV_HANDLED - success, return to the interrupted context.
* SDEI_EV_FAILED - failure, return this error code to firmare.
* virtual-address - success, return to this address.
*/
-static __kprobes unsigned long _sdei_handler(struct pt_regs *regs,
- struct sdei_registered_event *arg)
+unsigned long __kprobes do_sdei_event(struct pt_regs *regs,
+ struct sdei_registered_event *arg)
{
u32 mode;
int i, err = 0;
@@ -202,12 +228,6 @@ static __kprobes unsigned long _sdei_handler(struct pt_regs *regs,
sdei_api_event_context(i, &regs->regs[i]);
}
- /*
- * We didn't take an exception to get here, set PAN. UAO will be cleared
- * by sdei_event_handler()s set_fs(USER_DS) call.
- */
- __uaccess_enable_hw_pan();
-
err = sdei_event_handler(regs, arg);
if (err)
return SDEI_EV_FAILED;
@@ -245,28 +265,3 @@ static __kprobes unsigned long _sdei_handler(struct pt_regs *regs,
return vbar + 0x480;
}
-
-
-asmlinkage __kprobes notrace unsigned long
-__sdei_handler(struct pt_regs *regs, struct sdei_registered_event *arg)
-{
- unsigned long ret;
- bool do_nmi_exit = false;
-
- /*
- * nmi_enter() deals with printk() re-entrance and use of RCU when
- * RCU believed this CPU was idle. Because critical events can
- * interrupt normal events, we may already be in_nmi().
- */
- if (!in_nmi()) {
- nmi_enter();
- do_nmi_exit = true;
- }
-
- ret = _sdei_handler(regs, arg);
-
- if (do_nmi_exit)
- nmi_exit();
-
- return ret;
-}
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index 56f664561754..42c690bb2d60 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -23,12 +23,14 @@
#include <linux/interrupt.h>
#include <linux/smp.h>
#include <linux/fs.h>
+#include <linux/panic_notifier.h>
#include <linux/proc_fs.h>
#include <linux/memblock.h>
#include <linux/of_fdt.h>
#include <linux/efi.h>
#include <linux/psci.h>
#include <linux/sched/task.h>
+#include <linux/scs.h>
#include <linux/mm.h>
#include <asm/acpi.h>
@@ -41,6 +43,7 @@
#include <asm/cpu_ops.h>
#include <asm/kasan.h>
#include <asm/numa.h>
+#include <asm/scs.h>
#include <asm/sections.h>
#include <asm/setup.h>
#include <asm/smp_plat.h>
@@ -55,6 +58,7 @@ static int num_standard_resources;
static struct resource *standard_resources;
phys_addr_t __fdt_pointer __initdata;
+u64 mmu_enabled_at_boot __initdata;
/*
* Standard memory resources
@@ -85,14 +89,8 @@ u64 __cacheline_aligned boot_args[4];
void __init smp_setup_processor_id(void)
{
u64 mpidr = read_cpuid_mpidr() & MPIDR_HWID_BITMASK;
- cpu_logical_map(0) = mpidr;
+ set_cpu_logical_map(0, mpidr);
- /*
- * clear __my_cpu_offset on boot CPU to avoid hang caused by
- * using percpu variable early, for example, lockdep will
- * access percpu variable inside lock_release
- */
- set_my_cpu_offset(0);
pr_info("Booting Linux on physical CPU 0x%010lx [0x%08x]\n",
(unsigned long)mpidr, read_cpuid_id());
}
@@ -168,6 +166,21 @@ static void __init smp_build_mpidr_hash(void)
pr_warn("Large number of MPIDR hash buckets detected\n");
}
+static void *early_fdt_ptr __initdata;
+
+void __init *get_early_fdt_ptr(void)
+{
+ return early_fdt_ptr;
+}
+
+asmlinkage void __init early_fdt_map(u64 dt_phys)
+{
+ int fdt_size;
+
+ early_fixmap_init();
+ early_fdt_ptr = fixmap_remap_fdt(dt_phys, &fdt_size, PAGE_KERNEL);
+}
+
static void __init setup_machine_fdt(phys_addr_t dt_phys)
{
int size;
@@ -179,11 +192,16 @@ static void __init setup_machine_fdt(phys_addr_t dt_phys)
if (!dt_virt || !early_init_dt_scan(dt_virt)) {
pr_crit("\n"
- "Error: invalid device tree blob at physical address %pa (virtual address 0x%p)\n"
+ "Error: invalid device tree blob at physical address %pa (virtual address 0x%px)\n"
"The dtb must be 8-byte aligned and must not exceed 2 MB in size\n"
"\nPlease check your bootloader.",
&dt_phys, dt_virt);
+ /*
+ * Note that in this _really_ early stage we cannot even BUG()
+ * or oops, so the least terrible thing to do is cpu_relax(),
+ * or else we could end-up printing non-initialized data, etc.
+ */
while (true)
cpu_relax();
}
@@ -206,10 +224,12 @@ static void __init request_standard_resources(void)
unsigned long i = 0;
size_t res_size;
- kernel_code.start = __pa_symbol(_text);
+ kernel_code.start = __pa_symbol(_stext);
kernel_code.end = __pa_symbol(__init_begin - 1);
kernel_data.start = __pa_symbol(_sdata);
kernel_data.end = __pa_symbol(_end - 1);
+ insert_resource(&iomem_resource, &kernel_code);
+ insert_resource(&iomem_resource, &kernel_data);
num_standard_resources = memblock.memory.cnt;
res_size = num_standard_resources * sizeof(*standard_resources);
@@ -217,32 +237,21 @@ static void __init request_standard_resources(void)
if (!standard_resources)
panic("%s: Failed to allocate %zu bytes\n", __func__, res_size);
- for_each_memblock(memory, region) {
+ for_each_mem_region(region) {
res = &standard_resources[i++];
if (memblock_is_nomap(region)) {
res->name = "reserved";
res->flags = IORESOURCE_MEM;
+ res->start = __pfn_to_phys(memblock_region_reserved_base_pfn(region));
+ res->end = __pfn_to_phys(memblock_region_reserved_end_pfn(region)) - 1;
} else {
res->name = "System RAM";
res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
+ res->start = __pfn_to_phys(memblock_region_memory_base_pfn(region));
+ res->end = __pfn_to_phys(memblock_region_memory_end_pfn(region)) - 1;
}
- res->start = __pfn_to_phys(memblock_region_memory_base_pfn(region));
- res->end = __pfn_to_phys(memblock_region_memory_end_pfn(region)) - 1;
-
- request_resource(&iomem_resource, res);
-
- if (kernel_code.start >= res->start &&
- kernel_code.end <= res->end)
- request_resource(res, &kernel_code);
- if (kernel_data.start >= res->start &&
- kernel_data.end <= res->end)
- request_resource(res, &kernel_data);
-#ifdef CONFIG_KEXEC_CORE
- /* Userspace will find "Crash kernel" region in /proc/iomem. */
- if (crashk_res.end && crashk_res.start >= res->start &&
- crashk_res.end <= res->end)
- request_resource(res, &crashk_res);
-#endif
+
+ insert_resource(&iomem_resource, res);
}
}
@@ -257,7 +266,7 @@ static int __init reserve_memblock_reserved_regions(void)
if (!memblock_is_region_reserved(mem->start, mem_size))
continue;
- for_each_reserved_mem_region(j, &r_start, &r_end) {
+ for_each_reserved_mem_range(j, &r_start, &r_end) {
resource_size_t start, end;
start = max(PFN_PHYS(PFN_DOWN(r_start)), mem->start);
@@ -276,15 +285,26 @@ arch_initcall(reserve_memblock_reserved_regions);
u64 __cpu_logical_map[NR_CPUS] = { [0 ... NR_CPUS-1] = INVALID_HWID };
-void __init setup_arch(char **cmdline_p)
+u64 cpu_logical_map(unsigned int cpu)
+{
+ return __cpu_logical_map[cpu];
+}
+
+void __init __no_sanitize_address setup_arch(char **cmdline_p)
{
- init_mm.start_code = (unsigned long) _text;
- init_mm.end_code = (unsigned long) _etext;
- init_mm.end_data = (unsigned long) _edata;
- init_mm.brk = (unsigned long) _end;
+ setup_initial_init_mm(_stext, _etext, _edata, _end);
*cmdline_p = boot_command_line;
+ kaslr_init();
+
+ /*
+ * If know now we are going to need KPTI then use non-global
+ * mappings from the start, avoiding the cost of rewriting
+ * everything later.
+ */
+ arm64_use_ng_mappings = kaslr_requires_kpti();
+
early_fixmap_init();
early_ioremap_init();
@@ -297,6 +317,8 @@ void __init setup_arch(char **cmdline_p)
jump_label_init();
parse_early_param();
+ dynamic_scs_init();
+
/*
* Unmask asynchronous aborts and fiq after bringing up possible
* earlycon. (Report possible System Errors once we can report this
@@ -312,6 +334,14 @@ void __init setup_arch(char **cmdline_p)
xen_early_init();
efi_init();
+
+ if (!efi_enabled(EFI_BOOT)) {
+ if ((u64)_text % MIN_KIMG_ALIGN)
+ pr_warn(FW_BUG "Kernel image misaligned at boot, please fix your bootloader!");
+ WARN_TAINT(mmu_enabled_at_boot, TAINT_FIRMWARE_WORKAROUND,
+ FW_BUG "Booted with MMU enabled!");
+ }
+
arm64_memblock_init();
paging_init();
@@ -337,12 +367,12 @@ void __init setup_arch(char **cmdline_p)
else
psci_acpi_init();
- cpu_read_bootcpu_ops();
+ init_bootcpu_ops();
smp_init_cpus();
smp_build_mpidr_hash();
/* Init percpu seeds for random tags after cpus are set up. */
- kasan_init_tags();
+ kasan_init_sw_tags();
#ifdef CONFIG_ARM64_SW_TTBR0_PAN
/*
@@ -350,12 +380,9 @@ void __init setup_arch(char **cmdline_p)
* faults in case uaccess_enable() is inadvertently called by the init
* thread.
*/
- init_task.thread_info.ttbr0 = __pa_symbol(empty_zero_page);
+ init_task.thread_info.ttbr0 = phys_to_ttbr(__pa_symbol(reserved_pg_dir));
#endif
-#ifdef CONFIG_VT
- conswitchp = &dummy_con;
-#endif
if (boot_args[1] || boot_args[2] || boot_args[3]) {
pr_err("WARNING: x1-x3 nonzero in violation of boot protocol:\n"
"\tx1: %016llx\n\tx2: %016llx\n\tx3: %016llx\n"
@@ -367,34 +394,20 @@ void __init setup_arch(char **cmdline_p)
static inline bool cpu_can_disable(unsigned int cpu)
{
#ifdef CONFIG_HOTPLUG_CPU
- if (cpu_ops[cpu] && cpu_ops[cpu]->cpu_can_disable)
- return cpu_ops[cpu]->cpu_can_disable(cpu);
+ const struct cpu_operations *ops = get_cpu_ops(cpu);
+
+ if (ops && ops->cpu_can_disable)
+ return ops->cpu_can_disable(cpu);
#endif
return false;
}
-static int __init topology_init(void)
+bool arch_cpu_is_hotpluggable(int num)
{
- int i;
-
- for_each_online_node(i)
- register_one_node(i);
-
- for_each_possible_cpu(i) {
- struct cpu *cpu = &per_cpu(cpu_data.cpu, i);
- cpu->hotpluggable = cpu_can_disable(i);
- register_cpu(cpu, i);
- }
-
- return 0;
+ return cpu_can_disable(num);
}
-subsys_initcall(topology_init);
-/*
- * Dump out kernel offset information on panic.
- */
-static int dump_kernel_offset(struct notifier_block *self, unsigned long v,
- void *p)
+static void dump_kernel_offset(void)
{
const unsigned long offset = kaslr_offset();
@@ -405,17 +418,33 @@ static int dump_kernel_offset(struct notifier_block *self, unsigned long v,
} else {
pr_emerg("Kernel Offset: disabled\n");
}
+}
+
+static int arm64_panic_block_dump(struct notifier_block *self,
+ unsigned long v, void *p)
+{
+ dump_kernel_offset();
+ dump_cpu_features();
+ dump_mem_limit();
return 0;
}
-static struct notifier_block kernel_offset_notifier = {
- .notifier_call = dump_kernel_offset
+static struct notifier_block arm64_panic_block = {
+ .notifier_call = arm64_panic_block_dump
};
-static int __init register_kernel_offset_dumper(void)
+static int __init register_arm64_panic_block(void)
{
atomic_notifier_chain_register(&panic_notifier_list,
- &kernel_offset_notifier);
+ &arm64_panic_block);
+ return 0;
+}
+device_initcall(register_arm64_panic_block);
+
+static int __init check_mmu_enabled_at_boot(void)
+{
+ if (!efi_enabled(EFI_BOOT) && mmu_enabled_at_boot)
+ panic("Non-EFI boot detected with MMU and caches enabled");
return 0;
}
-__initcall(register_kernel_offset_dumper);
+device_initcall_sync(check_mmu_enabled_at_boot);
diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
index dd2cdc0d5be2..0e8beb3349ea 100644
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -11,24 +11,25 @@
#include <linux/errno.h>
#include <linux/kernel.h>
#include <linux/signal.h>
-#include <linux/personality.h>
#include <linux/freezer.h>
#include <linux/stddef.h>
#include <linux/uaccess.h>
#include <linux/sizes.h>
#include <linux/string.h>
-#include <linux/tracehook.h>
+#include <linux/resume_user_mode.h>
#include <linux/ratelimit.h>
#include <linux/syscalls.h>
#include <asm/daifflags.h>
#include <asm/debug-monitors.h>
#include <asm/elf.h>
+#include <asm/exception.h>
#include <asm/cacheflush.h>
#include <asm/ucontext.h>
#include <asm/unistd.h>
#include <asm/fpsimd.h>
#include <asm/ptrace.h>
+#include <asm/syscall.h>
#include <asm/signal32.h>
#include <asm/traps.h>
#include <asm/vdso.h>
@@ -56,6 +57,9 @@ struct rt_sigframe_user_layout {
unsigned long fpsimd_offset;
unsigned long esr_offset;
unsigned long sve_offset;
+ unsigned long tpidr2_offset;
+ unsigned long za_offset;
+ unsigned long zt_offset;
unsigned long extra_offset;
unsigned long end_offset;
};
@@ -90,7 +94,7 @@ static size_t sigframe_size(struct rt_sigframe_user_layout const *user)
* not taken into account. This limit is not a guarantee and is
* NOT ABI.
*/
-#define SIGFRAME_MAXSZ SZ_64K
+#define SIGFRAME_MAXSZ SZ_256K
static int __sigframe_alloc(struct rt_sigframe_user_layout *user,
unsigned long *offset, size_t size, bool extend)
@@ -167,6 +171,19 @@ static void __user *apply_user_offset(
return base + offset;
}
+struct user_ctxs {
+ struct fpsimd_context __user *fpsimd;
+ u32 fpsimd_size;
+ struct sve_context __user *sve;
+ u32 sve_size;
+ struct tpidr2_context __user *tpidr2;
+ u32 tpidr2_size;
+ struct za_context __user *za;
+ u32 za_size;
+ struct zt_context __user *zt;
+ u32 zt_size;
+};
+
static int preserve_fpsimd_context(struct fpsimd_context __user *ctx)
{
struct user_fpsimd_state const *fpsimd =
@@ -185,27 +202,23 @@ static int preserve_fpsimd_context(struct fpsimd_context __user *ctx)
return err ? -EFAULT : 0;
}
-static int restore_fpsimd_context(struct fpsimd_context __user *ctx)
+static int restore_fpsimd_context(struct user_ctxs *user)
{
struct user_fpsimd_state fpsimd;
- __u32 magic, size;
int err = 0;
- /* check the magic/size information */
- __get_user_error(magic, &ctx->head.magic, err);
- __get_user_error(size, &ctx->head.size, err);
- if (err)
- return -EFAULT;
- if (magic != FPSIMD_MAGIC || size != sizeof(struct fpsimd_context))
+ /* check the size information */
+ if (user->fpsimd_size != sizeof(struct fpsimd_context))
return -EINVAL;
/* copy the FP and status/control registers */
- err = __copy_from_user(fpsimd.vregs, ctx->vregs,
+ err = __copy_from_user(fpsimd.vregs, &(user->fpsimd->vregs),
sizeof(fpsimd.vregs));
- __get_user_error(fpsimd.fpsr, &ctx->fpsr, err);
- __get_user_error(fpsimd.fpcr, &ctx->fpcr, err);
+ __get_user_error(fpsimd.fpsr, &(user->fpsimd->fpsr), err);
+ __get_user_error(fpsimd.fpcr, &(user->fpsimd->fpcr), err);
clear_thread_flag(TIF_SVE);
+ current->thread.fp_type = FP_STATE_FPSIMD;
/* load the hardware registers from the fpsimd_state structure */
if (!err)
@@ -215,22 +228,23 @@ static int restore_fpsimd_context(struct fpsimd_context __user *ctx)
}
-struct user_ctxs {
- struct fpsimd_context __user *fpsimd;
- struct sve_context __user *sve;
-};
-
#ifdef CONFIG_ARM64_SVE
static int preserve_sve_context(struct sve_context __user *ctx)
{
int err = 0;
u16 reserved[ARRAY_SIZE(ctx->__reserved)];
- unsigned int vl = current->thread.sve_vl;
+ u16 flags = 0;
+ unsigned int vl = task_get_sve_vl(current);
unsigned int vq = 0;
- if (test_thread_flag(TIF_SVE))
+ if (thread_sm_enabled(&current->thread)) {
+ vl = task_get_sme_vl(current);
vq = sve_vq_from_vl(vl);
+ flags |= SVE_SIG_FLAG_SM;
+ } else if (test_thread_flag(TIF_SVE)) {
+ vq = sve_vq_from_vl(vl);
+ }
memset(reserved, 0, sizeof(reserved));
@@ -238,13 +252,15 @@ static int preserve_sve_context(struct sve_context __user *ctx)
__put_user_error(round_up(SVE_SIG_CONTEXT_SIZE(vq), 16),
&ctx->head.size, err);
__put_user_error(vl, &ctx->vl, err);
+ __put_user_error(flags, &ctx->flags, err);
BUILD_BUG_ON(sizeof(ctx->__reserved) != sizeof(reserved));
err |= __copy_to_user(&ctx->__reserved, reserved, sizeof(reserved));
if (vq) {
/*
* This assumes that the SVE state has already been saved to
- * the task struct by calling preserve_fpsimd_context().
+ * the task struct by calling the function
+ * fpsimd_signal_preserve_current_state().
*/
err |= __copy_to_user((char __user *)ctx + SVE_SIG_REGS_OFFSET,
current->thread.sve_state,
@@ -256,25 +272,49 @@ static int preserve_sve_context(struct sve_context __user *ctx)
static int restore_sve_fpsimd_context(struct user_ctxs *user)
{
- int err;
- unsigned int vq;
+ int err = 0;
+ unsigned int vl, vq;
struct user_fpsimd_state fpsimd;
- struct sve_context sve;
+ u16 user_vl, flags;
- if (__copy_from_user(&sve, user->sve, sizeof(sve)))
- return -EFAULT;
+ if (user->sve_size < sizeof(*user->sve))
+ return -EINVAL;
+
+ __get_user_error(user_vl, &(user->sve->vl), err);
+ __get_user_error(flags, &(user->sve->flags), err);
+ if (err)
+ return err;
+
+ if (flags & SVE_SIG_FLAG_SM) {
+ if (!system_supports_sme())
+ return -EINVAL;
- if (sve.vl != current->thread.sve_vl)
+ vl = task_get_sme_vl(current);
+ } else {
+ /*
+ * A SME only system use SVE for streaming mode so can
+ * have a SVE formatted context with a zero VL and no
+ * payload data.
+ */
+ if (!system_supports_sve() && !system_supports_sme())
+ return -EINVAL;
+
+ vl = task_get_sve_vl(current);
+ }
+
+ if (user_vl != vl)
return -EINVAL;
- if (sve.head.size <= sizeof(*user->sve)) {
+ if (user->sve_size == sizeof(*user->sve)) {
clear_thread_flag(TIF_SVE);
+ current->thread.svcr &= ~SVCR_SM_MASK;
+ current->thread.fp_type = FP_STATE_FPSIMD;
goto fpsimd_only;
}
- vq = sve_vq_from_vl(sve.vl);
+ vq = sve_vq_from_vl(vl);
- if (sve.head.size < SVE_SIG_CONTEXT_SIZE(vq))
+ if (user->sve_size < SVE_SIG_CONTEXT_SIZE(vq))
return -EINVAL;
/*
@@ -287,7 +327,12 @@ static int restore_sve_fpsimd_context(struct user_ctxs *user)
fpsimd_flush_task_state(current);
/* From now, fpsimd_thread_switch() won't touch thread.sve_state */
- sve_alloc(current);
+ sve_alloc(current, true);
+ if (!current->thread.sve_state) {
+ clear_thread_flag(TIF_SVE);
+ return -ENOMEM;
+ }
+
err = __copy_from_user(current->thread.sve_state,
(char __user const *)user->sve +
SVE_SIG_REGS_OFFSET,
@@ -295,7 +340,11 @@ static int restore_sve_fpsimd_context(struct user_ctxs *user)
if (err)
return -EFAULT;
- set_thread_flag(TIF_SVE);
+ if (flags & SVE_SIG_FLAG_SM)
+ current->thread.svcr |= SVCR_SM_MASK;
+ else
+ set_thread_flag(TIF_SVE);
+ current->thread.fp_type = FP_STATE_SVE;
fpsimd_only:
/* copy the FP and status/control registers */
@@ -314,12 +363,216 @@ fpsimd_only:
#else /* ! CONFIG_ARM64_SVE */
-/* Turn any non-optimised out attempts to use these into a link error: */
+static int restore_sve_fpsimd_context(struct user_ctxs *user)
+{
+ WARN_ON_ONCE(1);
+ return -EINVAL;
+}
+
+/* Turn any non-optimised out attempts to use this into a link error: */
extern int preserve_sve_context(void __user *ctx);
-extern int restore_sve_fpsimd_context(struct user_ctxs *user);
#endif /* ! CONFIG_ARM64_SVE */
+#ifdef CONFIG_ARM64_SME
+
+static int preserve_tpidr2_context(struct tpidr2_context __user *ctx)
+{
+ int err = 0;
+
+ current->thread.tpidr2_el0 = read_sysreg_s(SYS_TPIDR2_EL0);
+
+ __put_user_error(TPIDR2_MAGIC, &ctx->head.magic, err);
+ __put_user_error(sizeof(*ctx), &ctx->head.size, err);
+ __put_user_error(current->thread.tpidr2_el0, &ctx->tpidr2, err);
+
+ return err;
+}
+
+static int restore_tpidr2_context(struct user_ctxs *user)
+{
+ u64 tpidr2_el0;
+ int err = 0;
+
+ if (user->tpidr2_size != sizeof(*user->tpidr2))
+ return -EINVAL;
+
+ __get_user_error(tpidr2_el0, &user->tpidr2->tpidr2, err);
+ if (!err)
+ write_sysreg_s(tpidr2_el0, SYS_TPIDR2_EL0);
+
+ return err;
+}
+
+static int preserve_za_context(struct za_context __user *ctx)
+{
+ int err = 0;
+ u16 reserved[ARRAY_SIZE(ctx->__reserved)];
+ unsigned int vl = task_get_sme_vl(current);
+ unsigned int vq;
+
+ if (thread_za_enabled(&current->thread))
+ vq = sve_vq_from_vl(vl);
+ else
+ vq = 0;
+
+ memset(reserved, 0, sizeof(reserved));
+
+ __put_user_error(ZA_MAGIC, &ctx->head.magic, err);
+ __put_user_error(round_up(ZA_SIG_CONTEXT_SIZE(vq), 16),
+ &ctx->head.size, err);
+ __put_user_error(vl, &ctx->vl, err);
+ BUILD_BUG_ON(sizeof(ctx->__reserved) != sizeof(reserved));
+ err |= __copy_to_user(&ctx->__reserved, reserved, sizeof(reserved));
+
+ if (vq) {
+ /*
+ * This assumes that the ZA state has already been saved to
+ * the task struct by calling the function
+ * fpsimd_signal_preserve_current_state().
+ */
+ err |= __copy_to_user((char __user *)ctx + ZA_SIG_REGS_OFFSET,
+ current->thread.sme_state,
+ ZA_SIG_REGS_SIZE(vq));
+ }
+
+ return err ? -EFAULT : 0;
+}
+
+static int restore_za_context(struct user_ctxs *user)
+{
+ int err = 0;
+ unsigned int vq;
+ u16 user_vl;
+
+ if (user->za_size < sizeof(*user->za))
+ return -EINVAL;
+
+ __get_user_error(user_vl, &(user->za->vl), err);
+ if (err)
+ return err;
+
+ if (user_vl != task_get_sme_vl(current))
+ return -EINVAL;
+
+ if (user->za_size == sizeof(*user->za)) {
+ current->thread.svcr &= ~SVCR_ZA_MASK;
+ return 0;
+ }
+
+ vq = sve_vq_from_vl(user_vl);
+
+ if (user->za_size < ZA_SIG_CONTEXT_SIZE(vq))
+ return -EINVAL;
+
+ /*
+ * Careful: we are about __copy_from_user() directly into
+ * thread.sme_state with preemption enabled, so protection is
+ * needed to prevent a racing context switch from writing stale
+ * registers back over the new data.
+ */
+
+ fpsimd_flush_task_state(current);
+ /* From now, fpsimd_thread_switch() won't touch thread.sve_state */
+
+ sme_alloc(current, true);
+ if (!current->thread.sme_state) {
+ current->thread.svcr &= ~SVCR_ZA_MASK;
+ clear_thread_flag(TIF_SME);
+ return -ENOMEM;
+ }
+
+ err = __copy_from_user(current->thread.sme_state,
+ (char __user const *)user->za +
+ ZA_SIG_REGS_OFFSET,
+ ZA_SIG_REGS_SIZE(vq));
+ if (err)
+ return -EFAULT;
+
+ set_thread_flag(TIF_SME);
+ current->thread.svcr |= SVCR_ZA_MASK;
+
+ return 0;
+}
+
+static int preserve_zt_context(struct zt_context __user *ctx)
+{
+ int err = 0;
+ u16 reserved[ARRAY_SIZE(ctx->__reserved)];
+
+ if (WARN_ON(!thread_za_enabled(&current->thread)))
+ return -EINVAL;
+
+ memset(reserved, 0, sizeof(reserved));
+
+ __put_user_error(ZT_MAGIC, &ctx->head.magic, err);
+ __put_user_error(round_up(ZT_SIG_CONTEXT_SIZE(1), 16),
+ &ctx->head.size, err);
+ __put_user_error(1, &ctx->nregs, err);
+ BUILD_BUG_ON(sizeof(ctx->__reserved) != sizeof(reserved));
+ err |= __copy_to_user(&ctx->__reserved, reserved, sizeof(reserved));
+
+ /*
+ * This assumes that the ZT state has already been saved to
+ * the task struct by calling the function
+ * fpsimd_signal_preserve_current_state().
+ */
+ err |= __copy_to_user((char __user *)ctx + ZT_SIG_REGS_OFFSET,
+ thread_zt_state(&current->thread),
+ ZT_SIG_REGS_SIZE(1));
+
+ return err ? -EFAULT : 0;
+}
+
+static int restore_zt_context(struct user_ctxs *user)
+{
+ int err;
+ u16 nregs;
+
+ /* ZA must be restored first for this check to be valid */
+ if (!thread_za_enabled(&current->thread))
+ return -EINVAL;
+
+ if (user->zt_size != ZT_SIG_CONTEXT_SIZE(1))
+ return -EINVAL;
+
+ if (__copy_from_user(&nregs, &(user->zt->nregs), sizeof(nregs)))
+ return -EFAULT;
+
+ if (nregs != 1)
+ return -EINVAL;
+
+ /*
+ * Careful: we are about __copy_from_user() directly into
+ * thread.zt_state with preemption enabled, so protection is
+ * needed to prevent a racing context switch from writing stale
+ * registers back over the new data.
+ */
+
+ fpsimd_flush_task_state(current);
+ /* From now, fpsimd_thread_switch() won't touch ZT in thread state */
+
+ err = __copy_from_user(thread_zt_state(&current->thread),
+ (char __user const *)user->zt +
+ ZT_SIG_REGS_OFFSET,
+ ZT_SIG_REGS_SIZE(1));
+ if (err)
+ return -EFAULT;
+
+ return 0;
+}
+
+#else /* ! CONFIG_ARM64_SME */
+
+/* Turn any non-optimised out attempts to use these into a link error: */
+extern int preserve_tpidr2_context(void __user *ctx);
+extern int restore_tpidr2_context(struct user_ctxs *user);
+extern int preserve_za_context(void __user *ctx);
+extern int restore_za_context(struct user_ctxs *user);
+extern int preserve_zt_context(void __user *ctx);
+extern int restore_zt_context(struct user_ctxs *user);
+
+#endif /* ! CONFIG_ARM64_SME */
static int parse_user_sigframe(struct user_ctxs *user,
struct rt_sigframe __user *sf)
@@ -334,6 +587,9 @@ static int parse_user_sigframe(struct user_ctxs *user,
user->fpsimd = NULL;
user->sve = NULL;
+ user->tpidr2 = NULL;
+ user->za = NULL;
+ user->zt = NULL;
if (!IS_ALIGNED((unsigned long)base, 16))
goto invalid;
@@ -371,13 +627,13 @@ static int parse_user_sigframe(struct user_ctxs *user,
goto done;
case FPSIMD_MAGIC:
- if (user->fpsimd)
+ if (!system_supports_fpsimd())
goto invalid;
-
- if (size < sizeof(*user->fpsimd))
+ if (user->fpsimd)
goto invalid;
user->fpsimd = (struct fpsimd_context __user *)head;
+ user->fpsimd_size = size;
break;
case ESR_MAGIC:
@@ -385,16 +641,47 @@ static int parse_user_sigframe(struct user_ctxs *user,
break;
case SVE_MAGIC:
- if (!system_supports_sve())
+ if (!system_supports_sve() && !system_supports_sme())
goto invalid;
if (user->sve)
goto invalid;
- if (size < sizeof(*user->sve))
+ user->sve = (struct sve_context __user *)head;
+ user->sve_size = size;
+ break;
+
+ case TPIDR2_MAGIC:
+ if (!system_supports_tpidr2())
goto invalid;
- user->sve = (struct sve_context __user *)head;
+ if (user->tpidr2)
+ goto invalid;
+
+ user->tpidr2 = (struct tpidr2_context __user *)head;
+ user->tpidr2_size = size;
+ break;
+
+ case ZA_MAGIC:
+ if (!system_supports_sme())
+ goto invalid;
+
+ if (user->za)
+ goto invalid;
+
+ user->za = (struct za_context __user *)head;
+ user->za_size = size;
+ break;
+
+ case ZT_MAGIC:
+ if (!system_supports_sme2())
+ goto invalid;
+
+ if (user->zt)
+ goto invalid;
+
+ user->zt = (struct zt_context __user *)head;
+ user->zt_size = size;
break;
case EXTRA_MAGIC:
@@ -506,20 +793,25 @@ static int restore_sigframe(struct pt_regs *regs,
if (err == 0)
err = parse_user_sigframe(&user, sf);
- if (err == 0) {
+ if (err == 0 && system_supports_fpsimd()) {
if (!user.fpsimd)
return -EINVAL;
- if (user.sve) {
- if (!system_supports_sve())
- return -EINVAL;
-
+ if (user.sve)
err = restore_sve_fpsimd_context(&user);
- } else {
- err = restore_fpsimd_context(user.fpsimd);
- }
+ else
+ err = restore_fpsimd_context(&user);
}
+ if (err == 0 && system_supports_tpidr2() && user.tpidr2)
+ err = restore_tpidr2_context(&user);
+
+ if (err == 0 && system_supports_sme() && user.za)
+ err = restore_za_context(&user);
+
+ if (err == 0 && system_supports_sme2() && user.zt)
+ err = restore_zt_context(&user);
+
return err;
}
@@ -568,10 +860,12 @@ static int setup_sigframe_layout(struct rt_sigframe_user_layout *user,
{
int err;
- err = sigframe_alloc(user, &user->fpsimd_offset,
- sizeof(struct fpsimd_context));
- if (err)
- return err;
+ if (system_supports_fpsimd()) {
+ err = sigframe_alloc(user, &user->fpsimd_offset,
+ sizeof(struct fpsimd_context));
+ if (err)
+ return err;
+ }
/* fault information, if valid */
if (add_all || current->thread.fault_code) {
@@ -581,14 +875,15 @@ static int setup_sigframe_layout(struct rt_sigframe_user_layout *user,
return err;
}
- if (system_supports_sve()) {
+ if (system_supports_sve() || system_supports_sme()) {
unsigned int vq = 0;
- if (add_all || test_thread_flag(TIF_SVE)) {
- int vl = sve_max_vl;
+ if (add_all || test_thread_flag(TIF_SVE) ||
+ thread_sm_enabled(&current->thread)) {
+ int vl = max(sve_max_vl(), sme_max_vl());
if (!add_all)
- vl = current->thread.sve_vl;
+ vl = thread_get_cur_vl(&current->thread);
vq = sve_vq_from_vl(vl);
}
@@ -599,6 +894,40 @@ static int setup_sigframe_layout(struct rt_sigframe_user_layout *user,
return err;
}
+ if (system_supports_tpidr2()) {
+ err = sigframe_alloc(user, &user->tpidr2_offset,
+ sizeof(struct tpidr2_context));
+ if (err)
+ return err;
+ }
+
+ if (system_supports_sme()) {
+ unsigned int vl;
+ unsigned int vq = 0;
+
+ if (add_all)
+ vl = sme_max_vl();
+ else
+ vl = task_get_sme_vl(current);
+
+ if (thread_za_enabled(&current->thread))
+ vq = sve_vq_from_vl(vl);
+
+ err = sigframe_alloc(user, &user->za_offset,
+ ZA_SIG_CONTEXT_SIZE(vq));
+ if (err)
+ return err;
+ }
+
+ if (system_supports_sme2()) {
+ if (add_all || thread_za_enabled(&current->thread)) {
+ err = sigframe_alloc(user, &user->zt_offset,
+ ZT_SIG_CONTEXT_SIZE(1));
+ if (err)
+ return err;
+ }
+ }
+
return sigframe_alloc_end(user);
}
@@ -623,7 +952,7 @@ static int setup_sigframe(struct rt_sigframe_user_layout *user,
err |= __copy_to_user(&sf->uc.uc_sigmask, set, sizeof(*set));
- if (err == 0) {
+ if (err == 0 && system_supports_fpsimd()) {
struct fpsimd_context __user *fpsimd_ctx =
apply_user_offset(user, user->fpsimd_offset);
err |= preserve_fpsimd_context(fpsimd_ctx);
@@ -639,13 +968,35 @@ static int setup_sigframe(struct rt_sigframe_user_layout *user,
__put_user_error(current->thread.fault_code, &esr_ctx->esr, err);
}
- /* Scalable Vector Extension state, if present */
- if (system_supports_sve() && err == 0 && user->sve_offset) {
+ /* Scalable Vector Extension state (including streaming), if present */
+ if ((system_supports_sve() || system_supports_sme()) &&
+ err == 0 && user->sve_offset) {
struct sve_context __user *sve_ctx =
apply_user_offset(user, user->sve_offset);
err |= preserve_sve_context(sve_ctx);
}
+ /* TPIDR2 if supported */
+ if (system_supports_tpidr2() && err == 0) {
+ struct tpidr2_context __user *tpidr2_ctx =
+ apply_user_offset(user, user->tpidr2_offset);
+ err |= preserve_tpidr2_context(tpidr2_ctx);
+ }
+
+ /* ZA state if present */
+ if (system_supports_sme() && err == 0 && user->za_offset) {
+ struct za_context __user *za_ctx =
+ apply_user_offset(user, user->za_offset);
+ err |= preserve_za_context(za_ctx);
+ }
+
+ /* ZT state if present */
+ if (system_supports_sme2() && err == 0 && user->zt_offset) {
+ struct zt_context __user *zt_ctx =
+ apply_user_offset(user, user->zt_offset);
+ err |= preserve_zt_context(zt_ctx);
+ }
+
if (err == 0 && user->extra_offset) {
char __user *sfp = (char __user *)user->sigframe;
char __user *userp =
@@ -730,6 +1081,44 @@ static void setup_return(struct pt_regs *regs, struct k_sigaction *ka,
regs->regs[29] = (unsigned long)&user->next_frame->fp;
regs->pc = (unsigned long)ka->sa.sa_handler;
+ /*
+ * Signal delivery is a (wacky) indirect function call in
+ * userspace, so simulate the same setting of BTYPE as a BLR
+ * <register containing the signal handler entry point>.
+ * Signal delivery to a location in a PROT_BTI guarded page
+ * that is not a function entry point will now trigger a
+ * SIGILL in userspace.
+ *
+ * If the signal handler entry point is not in a PROT_BTI
+ * guarded page, this is harmless.
+ */
+ if (system_supports_bti()) {
+ regs->pstate &= ~PSR_BTYPE_MASK;
+ regs->pstate |= PSR_BTYPE_C;
+ }
+
+ /* TCO (Tag Check Override) always cleared for signal handlers */
+ regs->pstate &= ~PSR_TCO_BIT;
+
+ /* Signal handlers are invoked with ZA and streaming mode disabled */
+ if (system_supports_sme()) {
+ /*
+ * If we were in streaming mode the saved register
+ * state was SVE but we will exit SM and use the
+ * FPSIMD register state - flush the saved FPSIMD
+ * register state in case it gets loaded.
+ */
+ if (current->thread.svcr & SVCR_SM_MASK) {
+ memset(&current->thread.uw.fpsimd_state, 0,
+ sizeof(current->thread.uw.fpsimd_state));
+ current->thread.fp_type = FP_STATE_FPSIMD;
+ }
+
+ current->thread.svcr &= ~(SVCR_ZA_MASK |
+ SVCR_SM_MASK);
+ sme_smstop();
+ }
+
if (ka->sa.sa_flags & SA_RESTORER)
sigtramp = ka->sa.sa_restorer;
else
@@ -782,7 +1171,6 @@ static void setup_restart_syscall(struct pt_regs *regs)
*/
static void handle_signal(struct ksignal *ksig, struct pt_regs *regs)
{
- struct task_struct *tsk = current;
sigset_t *oldset = sigmask_to_save();
int usig = ksig->sig;
int ret;
@@ -806,14 +1194,8 @@ static void handle_signal(struct ksignal *ksig, struct pt_regs *regs)
*/
ret |= !valid_user_regs(&regs->user_regs, current);
- /*
- * Fast forward the stepping logic so we step into the signal
- * handler.
- */
- if (!ret)
- user_fastforward_single_step(tsk);
-
- signal_setup_done(ret, ksig, 0);
+ /* Step into the signal handler if we are stepping */
+ signal_setup_done(ret, ksig, test_thread_flag(TIF_SINGLESTEP));
}
/*
@@ -875,7 +1257,7 @@ static void do_signal(struct pt_regs *regs)
retval == -ERESTART_RESTARTBLOCK ||
(retval == -ERESTARTSYS &&
!(ksig.ka.sa.sa_flags & SA_RESTART)))) {
- regs->regs[0] = -EINTR;
+ syscall_set_return_value(current, regs, -EINTR, 0);
regs->pc = continue_addr;
}
@@ -896,20 +1278,9 @@ static void do_signal(struct pt_regs *regs)
restore_saved_sigmask();
}
-asmlinkage void do_notify_resume(struct pt_regs *regs,
- unsigned long thread_flags)
+void do_notify_resume(struct pt_regs *regs, unsigned long thread_flags)
{
- /*
- * The assembly code enters us with IRQs off, but it hasn't
- * informed the tracing code of that for efficiency reasons.
- * Update the trace code with the current status.
- */
- trace_hardirqs_off();
-
do {
- /* Check valid user FS if needed */
- addr_limit_user_check();
-
if (thread_flags & _TIF_NEED_RESCHED) {
/* Unmask Debug and SError for the next task */
local_daif_restore(DAIF_PROCCTX_NOIRQ);
@@ -921,21 +1292,24 @@ asmlinkage void do_notify_resume(struct pt_regs *regs,
if (thread_flags & _TIF_UPROBE)
uprobe_notify_resume(regs);
- if (thread_flags & _TIF_SIGPENDING)
+ if (thread_flags & _TIF_MTE_ASYNC_FAULT) {
+ clear_thread_flag(TIF_MTE_ASYNC_FAULT);
+ send_sig_fault(SIGSEGV, SEGV_MTEAERR,
+ (void __user *)NULL, current);
+ }
+
+ if (thread_flags & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL))
do_signal(regs);
- if (thread_flags & _TIF_NOTIFY_RESUME) {
- clear_thread_flag(TIF_NOTIFY_RESUME);
- tracehook_notify_resume(regs);
- rseq_handle_notify_resume(NULL, regs);
- }
+ if (thread_flags & _TIF_NOTIFY_RESUME)
+ resume_user_mode_work(regs);
if (thread_flags & _TIF_FOREIGN_FPSTATE)
fpsimd_restore_current_state();
}
local_daif_mask();
- thread_flags = READ_ONCE(current_thread_info()->flags);
+ thread_flags = read_thread_flags();
} while (thread_flags & _TIF_WORK_MASK);
}
@@ -963,3 +1337,43 @@ void __init minsigstksz_setup(void)
round_up(sizeof(struct frame_record), 16) +
16; /* max alignment padding */
}
+
+/*
+ * Compile-time assertions for siginfo_t offsets. Check NSIG* as well, as
+ * changes likely come with new fields that should be added below.
+ */
+static_assert(NSIGILL == 11);
+static_assert(NSIGFPE == 15);
+static_assert(NSIGSEGV == 10);
+static_assert(NSIGBUS == 5);
+static_assert(NSIGTRAP == 6);
+static_assert(NSIGCHLD == 6);
+static_assert(NSIGSYS == 2);
+static_assert(sizeof(siginfo_t) == 128);
+static_assert(__alignof__(siginfo_t) == 8);
+static_assert(offsetof(siginfo_t, si_signo) == 0x00);
+static_assert(offsetof(siginfo_t, si_errno) == 0x04);
+static_assert(offsetof(siginfo_t, si_code) == 0x08);
+static_assert(offsetof(siginfo_t, si_pid) == 0x10);
+static_assert(offsetof(siginfo_t, si_uid) == 0x14);
+static_assert(offsetof(siginfo_t, si_tid) == 0x10);
+static_assert(offsetof(siginfo_t, si_overrun) == 0x14);
+static_assert(offsetof(siginfo_t, si_status) == 0x18);
+static_assert(offsetof(siginfo_t, si_utime) == 0x20);
+static_assert(offsetof(siginfo_t, si_stime) == 0x28);
+static_assert(offsetof(siginfo_t, si_value) == 0x18);
+static_assert(offsetof(siginfo_t, si_int) == 0x18);
+static_assert(offsetof(siginfo_t, si_ptr) == 0x18);
+static_assert(offsetof(siginfo_t, si_addr) == 0x10);
+static_assert(offsetof(siginfo_t, si_addr_lsb) == 0x18);
+static_assert(offsetof(siginfo_t, si_lower) == 0x20);
+static_assert(offsetof(siginfo_t, si_upper) == 0x28);
+static_assert(offsetof(siginfo_t, si_pkey) == 0x20);
+static_assert(offsetof(siginfo_t, si_perf_data) == 0x18);
+static_assert(offsetof(siginfo_t, si_perf_type) == 0x20);
+static_assert(offsetof(siginfo_t, si_perf_flags) == 0x24);
+static_assert(offsetof(siginfo_t, si_band) == 0x10);
+static_assert(offsetof(siginfo_t, si_fd) == 0x18);
+static_assert(offsetof(siginfo_t, si_call_addr) == 0x10);
+static_assert(offsetof(siginfo_t, si_syscall) == 0x18);
+static_assert(offsetof(siginfo_t, si_arch) == 0x1c);
diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c
index 12a585386c2f..bbd542704730 100644
--- a/arch/arm64/kernel/signal32.c
+++ b/arch/arm64/kernel/signal32.c
@@ -46,8 +46,6 @@ struct compat_aux_sigframe {
unsigned long end_magic;
} __attribute__((__aligned__(8)));
-#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
-
static inline int put_sigset_t(compat_sigset_t __user *uset, sigset_t *set)
{
compat_sigset_t cset;
@@ -190,10 +188,8 @@ static int compat_restore_sigframe(struct pt_regs *regs,
unsigned long psr;
err = get_sigset_t(&set, &sf->uc.uc_sigmask);
- if (err == 0) {
- sigdelsetmask(&set, ~_BLOCKABLE);
+ if (err == 0)
set_current_blocked(&set);
- }
__get_user_error(regs->regs[0], &sf->uc.uc_mcontext.arm_r0, err);
__get_user_error(regs->regs[1], &sf->uc.uc_mcontext.arm_r1, err);
@@ -223,7 +219,7 @@ static int compat_restore_sigframe(struct pt_regs *regs,
err |= !valid_user_regs(&regs->user_regs, current);
aux = (struct compat_aux_sigframe __user *) sf->uc.uc_regspace;
- if (err == 0)
+ if (err == 0 && system_supports_fpsimd())
err |= compat_restore_vfp_context(&aux->vfp);
return err;
@@ -342,38 +338,13 @@ static void compat_setup_return(struct pt_regs *regs, struct k_sigaction *ka,
retcode = ptr_to_compat(ka->sa.sa_restorer);
} else {
/* Set up sigreturn pointer */
-#ifdef CONFIG_COMPAT_VDSO
- void *vdso_base = current->mm->context.vdso;
- void *vdso_trampoline;
-
- if (ka->sa.sa_flags & SA_SIGINFO) {
- if (thumb) {
- vdso_trampoline = VDSO_SYMBOL(vdso_base,
- compat_rt_sigreturn_thumb);
- } else {
- vdso_trampoline = VDSO_SYMBOL(vdso_base,
- compat_rt_sigreturn_arm);
- }
- } else {
- if (thumb) {
- vdso_trampoline = VDSO_SYMBOL(vdso_base,
- compat_sigreturn_thumb);
- } else {
- vdso_trampoline = VDSO_SYMBOL(vdso_base,
- compat_sigreturn_arm);
- }
- }
-
- retcode = ptr_to_compat(vdso_trampoline) + thumb;
-#else
unsigned int idx = thumb << 1;
if (ka->sa.sa_flags & SA_SIGINFO)
idx += 3;
- retcode = (unsigned long)current->mm->context.vdso +
+ retcode = (unsigned long)current->mm->context.sigpage +
(idx << 2) + thumb;
-#endif
}
regs->regs[0] = usig;
@@ -419,7 +390,7 @@ static int compat_setup_sigframe(struct compat_sigframe __user *sf,
aux = (struct compat_aux_sigframe __user *) sf->uc.uc_regspace;
- if (err == 0)
+ if (err == 0 && system_supports_fpsimd())
err |= compat_preserve_vfp_context(&aux->vfp);
__put_user_error(0, &aux->end_magic, err);
@@ -482,3 +453,43 @@ void compat_setup_restart_syscall(struct pt_regs *regs)
{
regs->regs[7] = __NR_compat_restart_syscall;
}
+
+/*
+ * Compile-time assertions for siginfo_t offsets. Check NSIG* as well, as
+ * changes likely come with new fields that should be added below.
+ */
+static_assert(NSIGILL == 11);
+static_assert(NSIGFPE == 15);
+static_assert(NSIGSEGV == 10);
+static_assert(NSIGBUS == 5);
+static_assert(NSIGTRAP == 6);
+static_assert(NSIGCHLD == 6);
+static_assert(NSIGSYS == 2);
+static_assert(sizeof(compat_siginfo_t) == 128);
+static_assert(__alignof__(compat_siginfo_t) == 4);
+static_assert(offsetof(compat_siginfo_t, si_signo) == 0x00);
+static_assert(offsetof(compat_siginfo_t, si_errno) == 0x04);
+static_assert(offsetof(compat_siginfo_t, si_code) == 0x08);
+static_assert(offsetof(compat_siginfo_t, si_pid) == 0x0c);
+static_assert(offsetof(compat_siginfo_t, si_uid) == 0x10);
+static_assert(offsetof(compat_siginfo_t, si_tid) == 0x0c);
+static_assert(offsetof(compat_siginfo_t, si_overrun) == 0x10);
+static_assert(offsetof(compat_siginfo_t, si_status) == 0x14);
+static_assert(offsetof(compat_siginfo_t, si_utime) == 0x18);
+static_assert(offsetof(compat_siginfo_t, si_stime) == 0x1c);
+static_assert(offsetof(compat_siginfo_t, si_value) == 0x14);
+static_assert(offsetof(compat_siginfo_t, si_int) == 0x14);
+static_assert(offsetof(compat_siginfo_t, si_ptr) == 0x14);
+static_assert(offsetof(compat_siginfo_t, si_addr) == 0x0c);
+static_assert(offsetof(compat_siginfo_t, si_addr_lsb) == 0x10);
+static_assert(offsetof(compat_siginfo_t, si_lower) == 0x14);
+static_assert(offsetof(compat_siginfo_t, si_upper) == 0x18);
+static_assert(offsetof(compat_siginfo_t, si_pkey) == 0x14);
+static_assert(offsetof(compat_siginfo_t, si_perf_data) == 0x10);
+static_assert(offsetof(compat_siginfo_t, si_perf_type) == 0x14);
+static_assert(offsetof(compat_siginfo_t, si_perf_flags) == 0x18);
+static_assert(offsetof(compat_siginfo_t, si_band) == 0x0c);
+static_assert(offsetof(compat_siginfo_t, si_fd) == 0x10);
+static_assert(offsetof(compat_siginfo_t, si_call_addr) == 0x0c);
+static_assert(offsetof(compat_siginfo_t, si_syscall) == 0x10);
+static_assert(offsetof(compat_siginfo_t, si_arch) == 0x14);
diff --git a/arch/arm64/kernel/sigreturn32.S b/arch/arm64/kernel/sigreturn32.S
index 475d30d471ac..ccbd4aab4ba4 100644
--- a/arch/arm64/kernel/sigreturn32.S
+++ b/arch/arm64/kernel/sigreturn32.S
@@ -15,6 +15,7 @@
#include <asm/unistd.h>
+ .section .rodata
.globl __aarch32_sigret_code_start
__aarch32_sigret_code_start:
diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S
index f5b04dd8a710..2aa5129d8253 100644
--- a/arch/arm64/kernel/sleep.S
+++ b/arch/arm64/kernel/sleep.S
@@ -3,6 +3,7 @@
#include <linux/linkage.h>
#include <asm/asm-offsets.h>
#include <asm/assembler.h>
+#include <asm/smp.h>
.text
/*
@@ -61,7 +62,7 @@
*
* x0 = struct sleep_stack_data area
*/
-ENTRY(__cpu_suspend_enter)
+SYM_FUNC_START(__cpu_suspend_enter)
stp x29, lr, [x0, #SLEEP_STACK_DATA_CALLEE_REGS]
stp x19, x20, [x0,#SLEEP_STACK_DATA_CALLEE_REGS+16]
stp x21, x22, [x0,#SLEEP_STACK_DATA_CALLEE_REGS+32]
@@ -94,22 +95,31 @@ ENTRY(__cpu_suspend_enter)
ldp x29, lr, [sp], #16
mov x0, #1
ret
-ENDPROC(__cpu_suspend_enter)
+SYM_FUNC_END(__cpu_suspend_enter)
- .pushsection ".idmap.text", "awx"
-ENTRY(cpu_resume)
- bl el2_setup // if in EL2 drop to EL1 cleanly
+ .pushsection ".idmap.text", "a"
+SYM_CODE_START(cpu_resume)
+ mov x0, xzr
+ bl init_kernel_el
+ mov x19, x0 // preserve boot mode
+#if VA_BITS > 48
+ ldr_l x0, vabits_actual
+#endif
bl __cpu_setup
/* enable the MMU early - so we can access sleep_save_stash by va */
adrp x1, swapper_pg_dir
+ adrp x2, idmap_pg_dir
bl __enable_mmu
ldr x8, =_cpu_resume
br x8
-ENDPROC(cpu_resume)
+SYM_CODE_END(cpu_resume)
.ltorg
.popsection
-ENTRY(_cpu_resume)
+SYM_FUNC_START(_cpu_resume)
+ mov x0, x19
+ bl finalise_el2
+
mrs x1, mpidr_el1
adr_l x8, mpidr_hash // x8 = struct mpidr_hash virt address
@@ -132,7 +142,7 @@ ENTRY(_cpu_resume)
*/
bl cpu_do_resume
-#ifdef CONFIG_KASAN
+#if defined(CONFIG_KASAN) && defined(CONFIG_KASAN_STACK)
mov x0, sp
bl kasan_unpoison_task_stack_below
#endif
@@ -145,4 +155,4 @@ ENTRY(_cpu_resume)
ldp x29, lr, [x29]
mov x0, #0
ret
-ENDPROC(_cpu_resume)
+SYM_FUNC_END(_cpu_resume)
diff --git a/arch/arm64/kernel/smccc-call.S b/arch/arm64/kernel/smccc-call.S
index 54655273d1e0..487381164ff6 100644
--- a/arch/arm64/kernel/smccc-call.S
+++ b/arch/arm64/kernel/smccc-call.S
@@ -7,21 +7,48 @@
#include <asm/asm-offsets.h>
#include <asm/assembler.h>
+#include <asm/thread_info.h>
+
+/*
+ * If we have SMCCC v1.3 and (as is likely) no SVE state in
+ * the registers then set the SMCCC hint bit to say there's no
+ * need to preserve it. Do this by directly adjusting the SMCCC
+ * function value which is already stored in x0 ready to be called.
+ */
+SYM_FUNC_START(__arm_smccc_sve_check)
+
+ ldr_l x16, smccc_has_sve_hint
+ cbz x16, 2f
+
+ get_current_task x16
+ ldr x16, [x16, #TSK_TI_FLAGS]
+ tbnz x16, #TIF_FOREIGN_FPSTATE, 1f // Any live FP state?
+ tbnz x16, #TIF_SVE, 2f // Does that state include SVE?
+
+1: orr x0, x0, ARM_SMCCC_1_3_SVE_HINT
+
+2: ret
+SYM_FUNC_END(__arm_smccc_sve_check)
+EXPORT_SYMBOL(__arm_smccc_sve_check)
.macro SMCCC instr
- .cfi_startproc
+ stp x29, x30, [sp, #-16]!
+ mov x29, sp
+alternative_if ARM64_SVE
+ bl __arm_smccc_sve_check
+alternative_else_nop_endif
\instr #0
- ldr x4, [sp]
+ ldr x4, [sp, #16]
stp x0, x1, [x4, #ARM_SMCCC_RES_X0_OFFS]
stp x2, x3, [x4, #ARM_SMCCC_RES_X2_OFFS]
- ldr x4, [sp, #8]
+ ldr x4, [sp, #24]
cbz x4, 1f /* no quirk structure */
ldr x9, [x4, #ARM_SMCCC_QUIRK_ID_OFFS]
cmp x9, #ARM_SMCCC_QUIRK_QCOM_A6
b.ne 1f
str x6, [x4, ARM_SMCCC_QUIRK_STATE_OFFS]
-1: ret
- .cfi_endproc
+1: ldp x29, x30, [sp], #16
+ ret
.endm
/*
@@ -30,9 +57,9 @@
* unsigned long a6, unsigned long a7, struct arm_smccc_res *res,
* struct arm_smccc_quirk *quirk)
*/
-ENTRY(__arm_smccc_smc)
+SYM_FUNC_START(__arm_smccc_smc)
SMCCC smc
-ENDPROC(__arm_smccc_smc)
+SYM_FUNC_END(__arm_smccc_smc)
EXPORT_SYMBOL(__arm_smccc_smc)
/*
@@ -41,7 +68,64 @@ EXPORT_SYMBOL(__arm_smccc_smc)
* unsigned long a6, unsigned long a7, struct arm_smccc_res *res,
* struct arm_smccc_quirk *quirk)
*/
-ENTRY(__arm_smccc_hvc)
+SYM_FUNC_START(__arm_smccc_hvc)
SMCCC hvc
-ENDPROC(__arm_smccc_hvc)
+SYM_FUNC_END(__arm_smccc_hvc)
EXPORT_SYMBOL(__arm_smccc_hvc)
+
+ .macro SMCCC_1_2 instr
+ /* Save `res` and free a GPR that won't be clobbered */
+ stp x1, x19, [sp, #-16]!
+
+ /* Ensure `args` won't be clobbered while loading regs in next step */
+ mov x19, x0
+
+ /* Load the registers x0 - x17 from the struct arm_smccc_1_2_regs */
+ ldp x0, x1, [x19, #ARM_SMCCC_1_2_REGS_X0_OFFS]
+ ldp x2, x3, [x19, #ARM_SMCCC_1_2_REGS_X2_OFFS]
+ ldp x4, x5, [x19, #ARM_SMCCC_1_2_REGS_X4_OFFS]
+ ldp x6, x7, [x19, #ARM_SMCCC_1_2_REGS_X6_OFFS]
+ ldp x8, x9, [x19, #ARM_SMCCC_1_2_REGS_X8_OFFS]
+ ldp x10, x11, [x19, #ARM_SMCCC_1_2_REGS_X10_OFFS]
+ ldp x12, x13, [x19, #ARM_SMCCC_1_2_REGS_X12_OFFS]
+ ldp x14, x15, [x19, #ARM_SMCCC_1_2_REGS_X14_OFFS]
+ ldp x16, x17, [x19, #ARM_SMCCC_1_2_REGS_X16_OFFS]
+
+ \instr #0
+
+ /* Load the `res` from the stack */
+ ldr x19, [sp]
+
+ /* Store the registers x0 - x17 into the result structure */
+ stp x0, x1, [x19, #ARM_SMCCC_1_2_REGS_X0_OFFS]
+ stp x2, x3, [x19, #ARM_SMCCC_1_2_REGS_X2_OFFS]
+ stp x4, x5, [x19, #ARM_SMCCC_1_2_REGS_X4_OFFS]
+ stp x6, x7, [x19, #ARM_SMCCC_1_2_REGS_X6_OFFS]
+ stp x8, x9, [x19, #ARM_SMCCC_1_2_REGS_X8_OFFS]
+ stp x10, x11, [x19, #ARM_SMCCC_1_2_REGS_X10_OFFS]
+ stp x12, x13, [x19, #ARM_SMCCC_1_2_REGS_X12_OFFS]
+ stp x14, x15, [x19, #ARM_SMCCC_1_2_REGS_X14_OFFS]
+ stp x16, x17, [x19, #ARM_SMCCC_1_2_REGS_X16_OFFS]
+
+ /* Restore original x19 */
+ ldp xzr, x19, [sp], #16
+ ret
+.endm
+
+/*
+ * void arm_smccc_1_2_hvc(const struct arm_smccc_1_2_regs *args,
+ * struct arm_smccc_1_2_regs *res);
+ */
+SYM_FUNC_START(arm_smccc_1_2_hvc)
+ SMCCC_1_2 hvc
+SYM_FUNC_END(arm_smccc_1_2_hvc)
+EXPORT_SYMBOL(arm_smccc_1_2_hvc)
+
+/*
+ * void arm_smccc_1_2_smc(const struct arm_smccc_1_2_regs *args,
+ * struct arm_smccc_1_2_regs *res);
+ */
+SYM_FUNC_START(arm_smccc_1_2_smc)
+ SMCCC_1_2 smc
+SYM_FUNC_END(arm_smccc_1_2_smc)
+EXPORT_SYMBOL(arm_smccc_1_2_smc)
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index d4ed9a19d8fe..4ced34f62dab 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -30,8 +30,11 @@
#include <linux/completion.h>
#include <linux/of.h>
#include <linux/irq_work.h>
+#include <linux/kernel_stat.h>
#include <linux/kexec.h>
+#include <linux/kgdb.h>
#include <linux/kvm_host.h>
+#include <linux/nmi.h>
#include <asm/alternative.h>
#include <asm/atomic.h>
@@ -43,8 +46,6 @@
#include <asm/kvm_mmu.h>
#include <asm/mmu_context.h>
#include <asm/numa.h>
-#include <asm/pgtable.h>
-#include <asm/pgalloc.h>
#include <asm/processor.h>
#include <asm/smp_plat.h>
#include <asm/sections.h>
@@ -52,7 +53,6 @@
#include <asm/ptrace.h>
#include <asm/virt.h>
-#define CREATE_TRACE_POINTS
#include <trace/events/ipi.h>
DEFINE_PER_CPU_READ_MOSTLY(int, cpu_number);
@@ -65,7 +65,7 @@ EXPORT_PER_CPU_SYMBOL(cpu_number);
*/
struct secondary_data secondary_data;
/* Number of CPUs which aren't online, but looping in kernel text. */
-int cpus_stuck_in_kernel;
+static int cpus_stuck_in_kernel;
enum ipi_msg_type {
IPI_RESCHEDULE,
@@ -74,10 +74,24 @@ enum ipi_msg_type {
IPI_CPU_CRASH_STOP,
IPI_TIMER,
IPI_IRQ_WORK,
- IPI_WAKEUP
+ NR_IPI,
+ /*
+ * Any enum >= NR_IPI and < MAX_IPI is special and not tracable
+ * with trace_ipi_*
+ */
+ IPI_CPU_BACKTRACE = NR_IPI,
+ IPI_KGDB_ROUNDUP,
+ MAX_IPI
};
+static int ipi_irq_base __ro_after_init;
+static int nr_ipi __ro_after_init = NR_IPI;
+static struct irq_desc *ipi_desc[MAX_IPI] __ro_after_init;
+
+static void ipi_setup(int cpu);
+
#ifdef CONFIG_HOTPLUG_CPU
+static void ipi_teardown(int cpu);
static int op_cpu_kill(unsigned int cpu);
#else
static inline int op_cpu_kill(unsigned int cpu)
@@ -93,8 +107,10 @@ static inline int op_cpu_kill(unsigned int cpu)
*/
static int boot_secondary(unsigned int cpu, struct task_struct *idle)
{
- if (cpu_ops[cpu]->cpu_boot)
- return cpu_ops[cpu]->cpu_boot(cpu);
+ const struct cpu_operations *ops = get_cpu_ops(cpu);
+
+ if (ops->cpu_boot)
+ return ops->cpu_boot(cpu);
return -EOPNOTSUPP;
}
@@ -111,67 +127,58 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle)
* page tables.
*/
secondary_data.task = idle;
- secondary_data.stack = task_stack_page(idle) + THREAD_SIZE;
update_cpu_boot_status(CPU_MMU_OFF);
- __flush_dcache_area(&secondary_data, sizeof(secondary_data));
- /*
- * Now bring the CPU into our world.
- */
+ /* Now bring the CPU into our world */
ret = boot_secondary(cpu, idle);
- if (ret == 0) {
- /*
- * CPU was successfully started, wait for it to come online or
- * time out.
- */
- wait_for_completion_timeout(&cpu_running,
- msecs_to_jiffies(5000));
-
- if (!cpu_online(cpu)) {
- pr_crit("CPU%u: failed to come online\n", cpu);
- ret = -EIO;
- }
- } else {
+ if (ret) {
pr_err("CPU%u: failed to boot: %d\n", cpu, ret);
return ret;
}
+ /*
+ * CPU was successfully started, wait for it to come online or
+ * time out.
+ */
+ wait_for_completion_timeout(&cpu_running,
+ msecs_to_jiffies(5000));
+ if (cpu_online(cpu))
+ return 0;
+
+ pr_crit("CPU%u: failed to come online\n", cpu);
secondary_data.task = NULL;
- secondary_data.stack = NULL;
- __flush_dcache_area(&secondary_data, sizeof(secondary_data));
status = READ_ONCE(secondary_data.status);
- if (ret && status) {
+ if (status == CPU_MMU_OFF)
+ status = READ_ONCE(__early_cpu_boot_status);
- if (status == CPU_MMU_OFF)
- status = READ_ONCE(__early_cpu_boot_status);
-
- switch (status & CPU_BOOT_STATUS_MASK) {
- default:
- pr_err("CPU%u: failed in unknown state : 0x%lx\n",
- cpu, status);
- cpus_stuck_in_kernel++;
- break;
- case CPU_KILL_ME:
- if (!op_cpu_kill(cpu)) {
- pr_crit("CPU%u: died during early boot\n", cpu);
- break;
- }
- pr_crit("CPU%u: may not have shut down cleanly\n", cpu);
- /* Fall through */
- case CPU_STUCK_IN_KERNEL:
- pr_crit("CPU%u: is stuck in kernel\n", cpu);
- if (status & CPU_STUCK_REASON_52_BIT_VA)
- pr_crit("CPU%u: does not support 52-bit VAs\n", cpu);
- if (status & CPU_STUCK_REASON_NO_GRAN)
- pr_crit("CPU%u: does not support %luK granule \n", cpu, PAGE_SIZE / SZ_1K);
- cpus_stuck_in_kernel++;
+ switch (status & CPU_BOOT_STATUS_MASK) {
+ default:
+ pr_err("CPU%u: failed in unknown state : 0x%lx\n",
+ cpu, status);
+ cpus_stuck_in_kernel++;
+ break;
+ case CPU_KILL_ME:
+ if (!op_cpu_kill(cpu)) {
+ pr_crit("CPU%u: died during early boot\n", cpu);
break;
- case CPU_PANIC_KERNEL:
- panic("CPU%u detected unsupported configuration\n", cpu);
}
+ pr_crit("CPU%u: may not have shut down cleanly\n", cpu);
+ fallthrough;
+ case CPU_STUCK_IN_KERNEL:
+ pr_crit("CPU%u: is stuck in kernel\n", cpu);
+ if (status & CPU_STUCK_REASON_52_BIT_VA)
+ pr_crit("CPU%u: does not support 52-bit VAs\n", cpu);
+ if (status & CPU_STUCK_REASON_NO_GRAN) {
+ pr_crit("CPU%u: does not support %luK granule\n",
+ cpu, PAGE_SIZE / SZ_1K);
+ }
+ cpus_stuck_in_kernel++;
+ break;
+ case CPU_PANIC_KERNEL:
+ panic("CPU%u detected unsupported configuration\n", cpu);
}
- return ret;
+ return -EIO;
}
static void init_gic_priority_masking(void)
@@ -184,6 +191,7 @@ static void init_gic_priority_masking(void)
cpuflags = read_sysreg(daif);
WARN_ON(!(cpuflags & PSR_I_BIT));
+ WARN_ON(!(cpuflags & PSR_F_BIT));
gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET);
}
@@ -196,10 +204,8 @@ asmlinkage notrace void secondary_start_kernel(void)
{
u64 mpidr = read_cpuid_mpidr() & MPIDR_HWID_BITMASK;
struct mm_struct *mm = &init_mm;
- unsigned int cpu;
-
- cpu = task_cpu(current);
- set_my_cpu_offset(per_cpu_offset(cpu));
+ const struct cpu_operations *ops;
+ unsigned int cpu = smp_processor_id();
/*
* All kernel threads share the same mm context; grab a
@@ -217,7 +223,7 @@ asmlinkage notrace void secondary_start_kernel(void)
if (system_uses_irq_prio_masking())
init_gic_priority_masking();
- preempt_disable();
+ rcutree_report_cpu_starting(cpu);
trace_hardirqs_off();
/*
@@ -227,20 +233,23 @@ asmlinkage notrace void secondary_start_kernel(void)
*/
check_local_cpu_capabilities();
- if (cpu_ops[cpu]->cpu_postboot)
- cpu_ops[cpu]->cpu_postboot();
+ ops = get_cpu_ops(cpu);
+ if (ops->cpu_postboot)
+ ops->cpu_postboot();
/*
* Log the CPU info before it is marked online and might get read.
*/
cpuinfo_store_cpu();
+ store_cpu_topology(cpu);
/*
* Enable GIC and timers.
*/
notify_cpu_starting(cpu);
- store_cpu_topology(cpu);
+ ipi_setup(cpu);
+
numa_add_cpu(cpu);
/*
@@ -266,19 +275,21 @@ asmlinkage notrace void secondary_start_kernel(void)
#ifdef CONFIG_HOTPLUG_CPU
static int op_cpu_disable(unsigned int cpu)
{
+ const struct cpu_operations *ops = get_cpu_ops(cpu);
+
/*
* If we don't have a cpu_die method, abort before we reach the point
* of no return. CPU0 may not have an cpu_ops, so test for it.
*/
- if (!cpu_ops[cpu] || !cpu_ops[cpu]->cpu_die)
+ if (!ops || !ops->cpu_die)
return -EOPNOTSUPP;
/*
* We may need to abort a hot unplug for some other mechanism-specific
* reason.
*/
- if (cpu_ops[cpu]->cpu_disable)
- return cpu_ops[cpu]->cpu_disable(cpu);
+ if (ops->cpu_disable)
+ return ops->cpu_disable(cpu);
return 0;
}
@@ -303,6 +314,7 @@ int __cpu_disable(void)
* and we must not schedule until we're ready to give up the cpu.
*/
set_cpu_online(cpu, false);
+ ipi_teardown(cpu);
/*
* OK - migrate IRQs away from this CPU
@@ -314,30 +326,28 @@ int __cpu_disable(void)
static int op_cpu_kill(unsigned int cpu)
{
+ const struct cpu_operations *ops = get_cpu_ops(cpu);
+
/*
* If we have no means of synchronising with the dying CPU, then assume
* that it is really dead. We can only wait for an arbitrary length of
* time and hope that it's dead, so let's skip the wait and just hope.
*/
- if (!cpu_ops[cpu]->cpu_kill)
+ if (!ops->cpu_kill)
return 0;
- return cpu_ops[cpu]->cpu_kill(cpu);
+ return ops->cpu_kill(cpu);
}
/*
- * called on the thread which is asking for a CPU to be shutdown -
- * waits until shutdown has completed, or it is timed out.
+ * Called on the thread which is asking for a CPU to be shutdown after the
+ * shutdown completed.
*/
-void __cpu_die(unsigned int cpu)
+void arch_cpuhp_cleanup_dead_cpu(unsigned int cpu)
{
int err;
- if (!cpu_wait_death(cpu, 5)) {
- pr_crit("CPU%u: cpu didn't die\n", cpu);
- return;
- }
- pr_notice("CPU%u: shutdown\n", cpu);
+ pr_debug("CPU%u: shutdown\n", cpu);
/*
* Now that the dying CPU is beyond the point of no return w.r.t.
@@ -354,33 +364,44 @@ void __cpu_die(unsigned int cpu)
* Called from the idle thread for the CPU which has been shutdown.
*
*/
-void cpu_die(void)
+void __noreturn cpu_die(void)
{
unsigned int cpu = smp_processor_id();
+ const struct cpu_operations *ops = get_cpu_ops(cpu);
idle_task_exit();
local_daif_mask();
- /* Tell __cpu_die() that this CPU is now safe to dispose of */
- (void)cpu_report_death();
+ /* Tell cpuhp_bp_sync_dead() that this CPU is now safe to dispose of */
+ cpuhp_ap_report_dead();
/*
* Actually shutdown the CPU. This must never fail. The specific hotplug
* mechanism must perform all required cache maintenance to ensure that
* no dirty lines are lost in the process of shutting down the CPU.
*/
- cpu_ops[cpu]->cpu_die(cpu);
+ ops->cpu_die(cpu);
BUG();
}
#endif
+static void __cpu_try_die(int cpu)
+{
+#ifdef CONFIG_HOTPLUG_CPU
+ const struct cpu_operations *ops = get_cpu_ops(cpu);
+
+ if (ops && ops->cpu_die)
+ ops->cpu_die(cpu);
+#endif
+}
+
/*
* Kill the calling secondary CPU, early in bringup before it is turned
* online.
*/
-void cpu_die_early(void)
+void __noreturn cpu_die_early(void)
{
int cpu = smp_processor_id();
@@ -388,13 +409,13 @@ void cpu_die_early(void)
/* Mark this CPU absent */
set_cpu_present(cpu, 0);
+ rcutree_report_cpu_dead();
+
+ if (IS_ENABLED(CONFIG_HOTPLUG_CPU)) {
+ update_cpu_boot_status(CPU_KILL_ME);
+ __cpu_try_die(cpu);
+ }
-#ifdef CONFIG_HOTPLUG_CPU
- update_cpu_boot_status(CPU_KILL_ME);
- /* Check if we can park ourselves */
- if (cpu_ops[cpu] && cpu_ops[cpu]->cpu_die)
- cpu_ops[cpu]->cpu_die(cpu);
-#endif
update_cpu_boot_status(CPU_STUCK_IN_KERNEL);
cpu_park_loop();
@@ -409,61 +430,38 @@ static void __init hyp_mode_check(void)
"CPU: CPUs started in inconsistent modes");
else
pr_info("CPU: All CPU(s) started at EL1\n");
- if (IS_ENABLED(CONFIG_KVM_ARM_HOST))
+ if (IS_ENABLED(CONFIG_KVM) && !is_kernel_in_hyp_mode()) {
kvm_compute_layout();
+ kvm_apply_hyp_relocations();
+ }
}
void __init smp_cpus_done(unsigned int max_cpus)
{
pr_info("SMP: Total of %d processors activated.\n", num_online_cpus());
- setup_cpu_features();
hyp_mode_check();
- apply_alternatives_all();
+ setup_system_features();
+ setup_user_features();
mark_linear_text_alias_ro();
}
void __init smp_prepare_boot_cpu(void)
{
- set_my_cpu_offset(per_cpu_offset(smp_processor_id()));
- cpuinfo_store_boot_cpu();
-
/*
- * We now know enough about the boot CPU to apply the
- * alternatives that cannot wait until interrupt handling
- * and/or scheduling is enabled.
+ * The runtime per-cpu areas have been allocated by
+ * setup_per_cpu_areas(), and CPU0's boot time per-cpu area will be
+ * freed shortly, so we must move over to the runtime per-cpu area.
*/
- apply_boot_alternatives();
+ set_my_cpu_offset(per_cpu_offset(smp_processor_id()));
+
+ cpuinfo_store_boot_cpu();
+ setup_boot_cpu_features();
/* Conditionally switch to GIC PMR for interrupt masking */
if (system_uses_irq_prio_masking())
init_gic_priority_masking();
-}
-
-static u64 __init of_get_cpu_mpidr(struct device_node *dn)
-{
- const __be32 *cell;
- u64 hwid;
-
- /*
- * A cpu node with missing "reg" property is
- * considered invalid to build a cpu_logical_map
- * entry.
- */
- cell = of_get_property(dn, "reg", NULL);
- if (!cell) {
- pr_err("%pOF: missing reg property\n", dn);
- return INVALID_HWID;
- }
- hwid = of_read_number(cell, of_n_addr_cells(dn));
- /*
- * Non affinity bits must be set to 0 in the DT
- */
- if (hwid & ~MPIDR_HWID_BITMASK) {
- pr_err("%pOF: invalid reg property\n", dn);
- return INVALID_HWID;
- }
- return hwid;
+ kasan_init_hw_tags();
}
/*
@@ -488,10 +486,13 @@ static bool __init is_mpidr_duplicate(unsigned int cpu, u64 hwid)
*/
static int __init smp_cpu_setup(int cpu)
{
- if (cpu_read_ops(cpu))
+ const struct cpu_operations *ops;
+
+ if (init_cpu_ops(cpu))
return -ENODEV;
- if (cpu_ops[cpu]->cpu_init(cpu))
+ ops = get_cpu_ops(cpu);
+ if (ops->cpu_init(cpu))
return -ENODEV;
set_cpu_possible(cpu, true);
@@ -509,6 +510,7 @@ struct acpi_madt_generic_interrupt *acpi_cpu_get_madt_gicc(int cpu)
{
return &cpu_madt_gicc[cpu];
}
+EXPORT_SYMBOL_GPL(acpi_cpu_get_madt_gicc);
/*
* acpi_map_gic_cpu_interface - parse processor MADT entry
@@ -521,7 +523,7 @@ acpi_map_gic_cpu_interface(struct acpi_madt_generic_interrupt *processor)
{
u64 hwid = processor->arm_mpidr;
- if (!(processor->flags & ACPI_MADT_ENABLED)) {
+ if (!acpi_gicc_is_usable(processor)) {
pr_debug("skipping disabled CPU entry with 0x%llx MPIDR\n", hwid);
return;
}
@@ -552,7 +554,7 @@ acpi_map_gic_cpu_interface(struct acpi_madt_generic_interrupt *processor)
return;
/* map the logical cpu id to cpu MPIDR */
- cpu_logical_map(cpu_count) = hwid;
+ set_cpu_logical_map(cpu_count, hwid);
cpu_madt_gicc[cpu_count] = *processor;
@@ -626,9 +628,9 @@ static void __init of_parse_and_init_cpus(void)
struct device_node *dn;
for_each_of_cpu_node(dn) {
- u64 hwid = of_get_cpu_mpidr(dn);
+ u64 hwid = of_get_cpu_hwid(dn, 0);
- if (hwid == INVALID_HWID)
+ if (hwid & ~MPIDR_HWID_BITMASK)
goto next;
if (is_mpidr_duplicate(cpu_count, hwid)) {
@@ -666,7 +668,7 @@ static void __init of_parse_and_init_cpus(void)
goto next;
pr_debug("cpu logical map 0x%llx\n", hwid);
- cpu_logical_map(cpu_count) = hwid;
+ set_cpu_logical_map(cpu_count, hwid);
early_map_cpu_to_node(cpu_count, of_node_to_nid(dn));
next:
@@ -707,13 +709,14 @@ void __init smp_init_cpus(void)
for (i = 1; i < nr_cpu_ids; i++) {
if (cpu_logical_map(i) != INVALID_HWID) {
if (smp_cpu_setup(i))
- cpu_logical_map(i) = INVALID_HWID;
+ set_cpu_logical_map(i, INVALID_HWID);
}
}
}
void __init smp_prepare_cpus(unsigned int max_cpus)
{
+ const struct cpu_operations *ops;
int err;
unsigned int cpu;
unsigned int this_cpu;
@@ -744,10 +747,11 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
if (cpu == smp_processor_id())
continue;
- if (!cpu_ops[cpu])
+ ops = get_cpu_ops(cpu);
+ if (!ops)
continue;
- err = cpu_ops[cpu]->cpu_prepare(cpu);
+ err = ops->cpu_prepare(cpu);
if (err)
continue;
@@ -756,31 +760,20 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
}
}
-void (*__smp_cross_call)(const struct cpumask *, unsigned int);
-
-void __init set_smp_cross_call(void (*fn)(const struct cpumask *, unsigned int))
-{
- __smp_cross_call = fn;
-}
-
static const char *ipi_types[NR_IPI] __tracepoint_string = {
-#define S(x,s) [x] = s
- S(IPI_RESCHEDULE, "Rescheduling interrupts"),
- S(IPI_CALL_FUNC, "Function call interrupts"),
- S(IPI_CPU_STOP, "CPU stop interrupts"),
- S(IPI_CPU_CRASH_STOP, "CPU stop (for crash dump) interrupts"),
- S(IPI_TIMER, "Timer broadcast interrupts"),
- S(IPI_IRQ_WORK, "IRQ work interrupts"),
- S(IPI_WAKEUP, "CPU wake-up interrupts"),
+ [IPI_RESCHEDULE] = "Rescheduling interrupts",
+ [IPI_CALL_FUNC] = "Function call interrupts",
+ [IPI_CPU_STOP] = "CPU stop interrupts",
+ [IPI_CPU_CRASH_STOP] = "CPU stop (for crash dump) interrupts",
+ [IPI_TIMER] = "Timer broadcast interrupts",
+ [IPI_IRQ_WORK] = "IRQ work interrupts",
};
-static void smp_cross_call(const struct cpumask *target, unsigned int ipinr)
-{
- trace_ipi_raise(target, ipi_types[ipinr]);
- __smp_cross_call(target, ipinr);
-}
+static void smp_cross_call(const struct cpumask *target, unsigned int ipinr);
+
+unsigned long irq_err_count;
-void show_ipi_list(struct seq_file *p, int prec)
+int arch_show_interrupts(struct seq_file *p, int prec)
{
unsigned int cpu, i;
@@ -788,21 +781,12 @@ void show_ipi_list(struct seq_file *p, int prec)
seq_printf(p, "%*s%u:%s", prec - 1, "IPI", i,
prec >= 4 ? " " : "");
for_each_online_cpu(cpu)
- seq_printf(p, "%10u ",
- __get_irq_stat(cpu, ipi_irqs[i]));
+ seq_printf(p, "%10u ", irq_desc_kstat_cpu(ipi_desc[i], cpu));
seq_printf(p, " %s\n", ipi_types[i]);
}
-}
-
-u64 smp_irq_stat_cpu(unsigned int cpu)
-{
- u64 sum = 0;
- int i;
- for (i = 0; i < NR_IPI; i++)
- sum += __get_irq_stat(cpu, ipi_irqs[i]);
-
- return sum;
+ seq_printf(p, "%*s: %10lu\n", prec, "Err", irq_err_count);
+ return 0;
}
void arch_send_call_function_ipi_mask(const struct cpumask *mask)
@@ -815,22 +799,14 @@ void arch_send_call_function_single_ipi(int cpu)
smp_cross_call(cpumask_of(cpu), IPI_CALL_FUNC);
}
-#ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL
-void arch_send_wakeup_ipi_mask(const struct cpumask *mask)
-{
- smp_cross_call(mask, IPI_WAKEUP);
-}
-#endif
-
#ifdef CONFIG_IRQ_WORK
void arch_irq_work_raise(void)
{
- if (__smp_cross_call)
- smp_cross_call(cpumask_of(smp_processor_id()), IPI_IRQ_WORK);
+ smp_cross_call(cpumask_of(smp_processor_id()), IPI_IRQ_WORK);
}
#endif
-static void local_cpu_stop(void)
+static void __noreturn local_cpu_stop(void)
{
set_cpu_online(smp_processor_id(), false);
@@ -844,7 +820,7 @@ static void local_cpu_stop(void)
* that cpu_online_mask gets correctly updated and smp_send_stop() can skip
* CPUs that have already stopped themselves.
*/
-void panic_smp_self_stop(void)
+void __noreturn panic_smp_self_stop(void)
{
local_cpu_stop();
}
@@ -853,7 +829,7 @@ void panic_smp_self_stop(void)
static atomic_t waiting_for_crash_ipi = ATOMIC_INIT(0);
#endif
-static void ipi_cpu_crash_stop(unsigned int cpu, struct pt_regs *regs)
+static void __noreturn ipi_cpu_crash_stop(unsigned int cpu, struct pt_regs *regs)
{
#ifdef CONFIG_KEXEC_CORE
crash_save_cpu(regs, cpu);
@@ -863,28 +839,57 @@ static void ipi_cpu_crash_stop(unsigned int cpu, struct pt_regs *regs)
local_irq_disable();
sdei_mask_local_cpu();
-#ifdef CONFIG_HOTPLUG_CPU
- if (cpu_ops[cpu]->cpu_die)
- cpu_ops[cpu]->cpu_die(cpu);
-#endif
+ if (IS_ENABLED(CONFIG_HOTPLUG_CPU))
+ __cpu_try_die(cpu);
/* just in case */
cpu_park_loop();
+#else
+ BUG();
#endif
}
+static void arm64_backtrace_ipi(cpumask_t *mask)
+{
+ __ipi_send_mask(ipi_desc[IPI_CPU_BACKTRACE], mask);
+}
+
+void arch_trigger_cpumask_backtrace(const cpumask_t *mask, int exclude_cpu)
+{
+ /*
+ * NOTE: though nmi_trigger_cpumask_backtrace() has "nmi_" in the name,
+ * nothing about it truly needs to be implemented using an NMI, it's
+ * just that it's _allowed_ to work with NMIs. If ipi_should_be_nmi()
+ * returned false our backtrace attempt will just use a regular IPI.
+ */
+ nmi_trigger_cpumask_backtrace(mask, exclude_cpu, arm64_backtrace_ipi);
+}
+
+#ifdef CONFIG_KGDB
+void kgdb_roundup_cpus(void)
+{
+ int this_cpu = raw_smp_processor_id();
+ int cpu;
+
+ for_each_online_cpu(cpu) {
+ /* No need to roundup ourselves */
+ if (cpu == this_cpu)
+ continue;
+
+ __ipi_send_single(ipi_desc[IPI_KGDB_ROUNDUP], cpu);
+ }
+}
+#endif
+
/*
* Main handler for inter-processor interrupts
*/
-void handle_IPI(int ipinr, struct pt_regs *regs)
+static void do_handle_IPI(int ipinr)
{
unsigned int cpu = smp_processor_id();
- struct pt_regs *old_regs = set_irq_regs(regs);
- if ((unsigned)ipinr < NR_IPI) {
- trace_ipi_entry_rcuidle(ipi_types[ipinr]);
- __inc_irq_stat(cpu, ipi_irqs[ipinr]);
- }
+ if ((unsigned)ipinr < NR_IPI)
+ trace_ipi_entry(ipi_types[ipinr]);
switch (ipinr) {
case IPI_RESCHEDULE:
@@ -892,21 +897,16 @@ void handle_IPI(int ipinr, struct pt_regs *regs)
break;
case IPI_CALL_FUNC:
- irq_enter();
generic_smp_call_function_interrupt();
- irq_exit();
break;
case IPI_CPU_STOP:
- irq_enter();
local_cpu_stop();
- irq_exit();
break;
case IPI_CPU_CRASH_STOP:
if (IS_ENABLED(CONFIG_KEXEC_CORE)) {
- irq_enter();
- ipi_cpu_crash_stop(cpu, regs);
+ ipi_cpu_crash_stop(cpu, get_irq_regs());
unreachable();
}
@@ -914,27 +914,27 @@ void handle_IPI(int ipinr, struct pt_regs *regs)
#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
case IPI_TIMER:
- irq_enter();
tick_receive_broadcast();
- irq_exit();
break;
#endif
#ifdef CONFIG_IRQ_WORK
case IPI_IRQ_WORK:
- irq_enter();
irq_work_run();
- irq_exit();
break;
#endif
-#ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL
- case IPI_WAKEUP:
- WARN_ONCE(!acpi_parking_protocol_valid(cpu),
- "CPU%u: Wake-up IPI outside the ACPI parking protocol\n",
- cpu);
+ case IPI_CPU_BACKTRACE:
+ /*
+ * NOTE: in some cases this _won't_ be NMI context. See the
+ * comment in arch_trigger_cpumask_backtrace().
+ */
+ nmi_cpu_backtrace(get_irq_regs());
+ break;
+
+ case IPI_KGDB_ROUNDUP:
+ kgdb_nmicallback(cpu, get_irq_regs());
break;
-#endif
default:
pr_crit("CPU%u: Unknown IPI message 0x%x\n", cpu, ipinr);
@@ -942,15 +942,121 @@ void handle_IPI(int ipinr, struct pt_regs *regs)
}
if ((unsigned)ipinr < NR_IPI)
- trace_ipi_exit_rcuidle(ipi_types[ipinr]);
- set_irq_regs(old_regs);
+ trace_ipi_exit(ipi_types[ipinr]);
+}
+
+static irqreturn_t ipi_handler(int irq, void *data)
+{
+ do_handle_IPI(irq - ipi_irq_base);
+ return IRQ_HANDLED;
+}
+
+static void smp_cross_call(const struct cpumask *target, unsigned int ipinr)
+{
+ trace_ipi_raise(target, ipi_types[ipinr]);
+ __ipi_send_mask(ipi_desc[ipinr], target);
+}
+
+static bool ipi_should_be_nmi(enum ipi_msg_type ipi)
+{
+ if (!system_uses_irq_prio_masking())
+ return false;
+
+ switch (ipi) {
+ case IPI_CPU_STOP:
+ case IPI_CPU_CRASH_STOP:
+ case IPI_CPU_BACKTRACE:
+ case IPI_KGDB_ROUNDUP:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static void ipi_setup(int cpu)
+{
+ int i;
+
+ if (WARN_ON_ONCE(!ipi_irq_base))
+ return;
+
+ for (i = 0; i < nr_ipi; i++) {
+ if (ipi_should_be_nmi(i)) {
+ prepare_percpu_nmi(ipi_irq_base + i);
+ enable_percpu_nmi(ipi_irq_base + i, 0);
+ } else {
+ enable_percpu_irq(ipi_irq_base + i, 0);
+ }
+ }
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+static void ipi_teardown(int cpu)
+{
+ int i;
+
+ if (WARN_ON_ONCE(!ipi_irq_base))
+ return;
+
+ for (i = 0; i < nr_ipi; i++) {
+ if (ipi_should_be_nmi(i)) {
+ disable_percpu_nmi(ipi_irq_base + i);
+ teardown_percpu_nmi(ipi_irq_base + i);
+ } else {
+ disable_percpu_irq(ipi_irq_base + i);
+ }
+ }
+}
+#endif
+
+void __init set_smp_ipi_range(int ipi_base, int n)
+{
+ int i;
+
+ WARN_ON(n < MAX_IPI);
+ nr_ipi = min(n, MAX_IPI);
+
+ for (i = 0; i < nr_ipi; i++) {
+ int err;
+
+ if (ipi_should_be_nmi(i)) {
+ err = request_percpu_nmi(ipi_base + i, ipi_handler,
+ "IPI", &cpu_number);
+ WARN(err, "Could not request IPI %d as NMI, err=%d\n",
+ i, err);
+ } else {
+ err = request_percpu_irq(ipi_base + i, ipi_handler,
+ "IPI", &cpu_number);
+ WARN(err, "Could not request IPI %d as IRQ, err=%d\n",
+ i, err);
+ }
+
+ ipi_desc[i] = irq_to_desc(ipi_base + i);
+ irq_set_status_flags(ipi_base + i, IRQ_HIDDEN);
+ }
+
+ ipi_irq_base = ipi_base;
+
+ /* Setup the boot CPU immediately */
+ ipi_setup(smp_processor_id());
}
-void smp_send_reschedule(int cpu)
+void arch_smp_send_reschedule(int cpu)
{
smp_cross_call(cpumask_of(cpu), IPI_RESCHEDULE);
}
+#ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL
+void arch_send_wakeup_ipi(unsigned int cpu)
+{
+ /*
+ * We use a scheduler IPI to wake the CPU as this avoids the need for a
+ * dedicated IPI and we can safely handle spurious scheduler IPIs.
+ */
+ smp_send_reschedule(cpu);
+}
+#endif
+
#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
void tick_broadcast(const struct cpumask *mask)
{
@@ -958,11 +1064,22 @@ void tick_broadcast(const struct cpumask *mask)
}
#endif
+/*
+ * The number of CPUs online, not counting this CPU (which may not be
+ * fully online and so not counted in num_online_cpus()).
+ */
+static inline unsigned int num_other_online_cpus(void)
+{
+ unsigned int this_cpu_online = cpu_online(smp_processor_id());
+
+ return num_online_cpus() - this_cpu_online;
+}
+
void smp_send_stop(void)
{
unsigned long timeout;
- if (num_online_cpus() > 1) {
+ if (num_other_online_cpus()) {
cpumask_t mask;
cpumask_copy(&mask, cpu_online_mask);
@@ -975,10 +1092,10 @@ void smp_send_stop(void)
/* Wait up to one second for other CPUs to stop */
timeout = USEC_PER_SEC;
- while (num_online_cpus() > 1 && timeout--)
+ while (num_other_online_cpus() && timeout--)
udelay(1);
- if (num_online_cpus() > 1)
+ if (num_other_online_cpus())
pr_warn("SMP: failed to stop secondary CPUs %*pbl\n",
cpumask_pr_args(cpu_online_mask));
@@ -1001,15 +1118,17 @@ void crash_smp_send_stop(void)
cpus_stopped = 1;
- if (num_online_cpus() == 1) {
- sdei_mask_local_cpu();
- return;
- }
+ /*
+ * If this cpu is the only one alive at this point in time, online or
+ * not, there are no stop messages to be sent around, so just back out.
+ */
+ if (num_other_online_cpus() == 0)
+ goto skip_ipi;
cpumask_copy(&mask, cpu_online_mask);
cpumask_clear_cpu(smp_processor_id(), &mask);
- atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1);
+ atomic_set(&waiting_for_crash_ipi, num_other_online_cpus());
pr_crit("SMP: stopping secondary CPUs\n");
smp_cross_call(&mask, IPI_CPU_CRASH_STOP);
@@ -1023,7 +1142,9 @@ void crash_smp_send_stop(void)
pr_warn("SMP: failed to stop secondary CPUs %*pbl\n",
cpumask_pr_args(&mask));
+skip_ipi:
sdei_mask_local_cpu();
+ sdei_handler_abort();
}
bool smp_crash_stop_failed(void)
@@ -1032,20 +1153,13 @@ bool smp_crash_stop_failed(void)
}
#endif
-/*
- * not supported here
- */
-int setup_profiling_timer(unsigned int multiplier)
-{
- return -EINVAL;
-}
-
static bool have_cpu_die(void)
{
#ifdef CONFIG_HOTPLUG_CPU
int any_cpu = raw_smp_processor_id();
+ const struct cpu_operations *ops = get_cpu_ops(any_cpu);
- if (cpu_ops[any_cpu] && cpu_ops[any_cpu]->cpu_die)
+ if (ops && ops->cpu_die)
return true;
#endif
return false;
@@ -1055,5 +1169,6 @@ bool cpus_are_stuck_in_kernel(void)
{
bool smp_spin_tables = (num_possible_cpus() > 1 && !have_cpu_die());
- return !!cpus_stuck_in_kernel || smp_spin_tables;
+ return !!cpus_stuck_in_kernel || smp_spin_tables ||
+ is_protected_kvm_enabled();
}
diff --git a/arch/arm64/kernel/smp_spin_table.c b/arch/arm64/kernel/smp_spin_table.c
index c8a3fee00c11..49029eace3ad 100644
--- a/arch/arm64/kernel/smp_spin_table.c
+++ b/arch/arm64/kernel/smp_spin_table.c
@@ -19,7 +19,7 @@
#include <asm/smp_plat.h>
extern void secondary_holding_pen(void);
-volatile unsigned long __section(.mmuoff.data.read)
+volatile unsigned long __section(".mmuoff.data.read")
secondary_holding_pen_release = INVALID_HWID;
static phys_addr_t cpu_release_addr[NR_CPUS];
@@ -36,7 +36,7 @@ static void write_pen_release(u64 val)
unsigned long size = sizeof(secondary_holding_pen_release);
secondary_holding_pen_release = val;
- __flush_dcache_area(start, size);
+ dcache_clean_inval_poc((unsigned long)start, (unsigned long)start + size);
}
@@ -66,6 +66,7 @@ static int smp_spin_table_cpu_init(unsigned int cpu)
static int smp_spin_table_cpu_prepare(unsigned int cpu)
{
__le64 __iomem *release_addr;
+ phys_addr_t pa_holding_pen = __pa_symbol(secondary_holding_pen);
if (!cpu_release_addr[cpu])
return -ENODEV;
@@ -83,14 +84,15 @@ static int smp_spin_table_cpu_prepare(unsigned int cpu)
/*
* We write the release address as LE regardless of the native
- * endianess of the kernel. Therefore, any boot-loaders that
+ * endianness of the kernel. Therefore, any boot-loaders that
* read this address need to convert this address to the
- * boot-loader's endianess before jumping. This is mandated by
+ * boot-loader's endianness before jumping. This is mandated by
* the boot protocol.
*/
- writeq_relaxed(__pa_symbol(secondary_holding_pen), release_addr);
- __flush_dcache_area((__force void *)release_addr,
- sizeof(*release_addr));
+ writeq_relaxed(pa_holding_pen, release_addr);
+ dcache_clean_inval_poc((__force unsigned long)release_addr,
+ (__force unsigned long)release_addr +
+ sizeof(*release_addr));
/*
* Send an event to wake up the secondary CPU.
diff --git a/arch/arm64/kernel/ssbd.c b/arch/arm64/kernel/ssbd.c
deleted file mode 100644
index 52cfc6148355..000000000000
--- a/arch/arm64/kernel/ssbd.c
+++ /dev/null
@@ -1,129 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2018 ARM Ltd, All Rights Reserved.
- */
-
-#include <linux/compat.h>
-#include <linux/errno.h>
-#include <linux/prctl.h>
-#include <linux/sched.h>
-#include <linux/sched/task_stack.h>
-#include <linux/thread_info.h>
-
-#include <asm/cpufeature.h>
-
-static void ssbd_ssbs_enable(struct task_struct *task)
-{
- u64 val = is_compat_thread(task_thread_info(task)) ?
- PSR_AA32_SSBS_BIT : PSR_SSBS_BIT;
-
- task_pt_regs(task)->pstate |= val;
-}
-
-static void ssbd_ssbs_disable(struct task_struct *task)
-{
- u64 val = is_compat_thread(task_thread_info(task)) ?
- PSR_AA32_SSBS_BIT : PSR_SSBS_BIT;
-
- task_pt_regs(task)->pstate &= ~val;
-}
-
-/*
- * prctl interface for SSBD
- */
-static int ssbd_prctl_set(struct task_struct *task, unsigned long ctrl)
-{
- int state = arm64_get_ssbd_state();
-
- /* Unsupported */
- if (state == ARM64_SSBD_UNKNOWN)
- return -EINVAL;
-
- /* Treat the unaffected/mitigated state separately */
- if (state == ARM64_SSBD_MITIGATED) {
- switch (ctrl) {
- case PR_SPEC_ENABLE:
- return -EPERM;
- case PR_SPEC_DISABLE:
- case PR_SPEC_FORCE_DISABLE:
- return 0;
- }
- }
-
- /*
- * Things are a bit backward here: the arm64 internal API
- * *enables the mitigation* when the userspace API *disables
- * speculation*. So much fun.
- */
- switch (ctrl) {
- case PR_SPEC_ENABLE:
- /* If speculation is force disabled, enable is not allowed */
- if (state == ARM64_SSBD_FORCE_ENABLE ||
- task_spec_ssb_force_disable(task))
- return -EPERM;
- task_clear_spec_ssb_disable(task);
- clear_tsk_thread_flag(task, TIF_SSBD);
- ssbd_ssbs_enable(task);
- break;
- case PR_SPEC_DISABLE:
- if (state == ARM64_SSBD_FORCE_DISABLE)
- return -EPERM;
- task_set_spec_ssb_disable(task);
- set_tsk_thread_flag(task, TIF_SSBD);
- ssbd_ssbs_disable(task);
- break;
- case PR_SPEC_FORCE_DISABLE:
- if (state == ARM64_SSBD_FORCE_DISABLE)
- return -EPERM;
- task_set_spec_ssb_disable(task);
- task_set_spec_ssb_force_disable(task);
- set_tsk_thread_flag(task, TIF_SSBD);
- ssbd_ssbs_disable(task);
- break;
- default:
- return -ERANGE;
- }
-
- return 0;
-}
-
-int arch_prctl_spec_ctrl_set(struct task_struct *task, unsigned long which,
- unsigned long ctrl)
-{
- switch (which) {
- case PR_SPEC_STORE_BYPASS:
- return ssbd_prctl_set(task, ctrl);
- default:
- return -ENODEV;
- }
-}
-
-static int ssbd_prctl_get(struct task_struct *task)
-{
- switch (arm64_get_ssbd_state()) {
- case ARM64_SSBD_UNKNOWN:
- return -EINVAL;
- case ARM64_SSBD_FORCE_ENABLE:
- return PR_SPEC_DISABLE;
- case ARM64_SSBD_KERNEL:
- if (task_spec_ssb_force_disable(task))
- return PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE;
- if (task_spec_ssb_disable(task))
- return PR_SPEC_PRCTL | PR_SPEC_DISABLE;
- return PR_SPEC_PRCTL | PR_SPEC_ENABLE;
- case ARM64_SSBD_FORCE_DISABLE:
- return PR_SPEC_ENABLE;
- default:
- return PR_SPEC_NOT_AFFECTED;
- }
-}
-
-int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which)
-{
- switch (which) {
- case PR_SPEC_STORE_BYPASS:
- return ssbd_prctl_get(task);
- default:
- return -ENODEV;
- }
-}
diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
index a336cb124320..7f88028a00c0 100644
--- a/arch/arm64/kernel/stacktrace.c
+++ b/arch/arm64/kernel/stacktrace.c
@@ -5,6 +5,7 @@
* Copyright (C) 2012 ARM Ltd.
*/
#include <linux/kernel.h>
+#include <linux/efi.h>
#include <linux/export.h>
#include <linux/ftrace.h>
#include <linux/kprobes.h>
@@ -13,200 +14,287 @@
#include <linux/sched/task_stack.h>
#include <linux/stacktrace.h>
+#include <asm/efi.h>
#include <asm/irq.h>
#include <asm/stack_pointer.h>
#include <asm/stacktrace.h>
/*
- * AArch64 PCS assigns the frame pointer to x29.
+ * Kernel unwind state
*
- * A simple function prologue looks like this:
- * sub sp, sp, #0x10
- * stp x29, x30, [sp]
- * mov x29, sp
- *
- * A simple function epilogue looks like this:
- * mov sp, x29
- * ldp x29, x30, [sp]
- * add sp, sp, #0x10
+ * @common: Common unwind state.
+ * @task: The task being unwound.
+ * @kr_cur: When KRETPROBES is selected, holds the kretprobe instance
+ * associated with the most recently encountered replacement lr
+ * value.
*/
+struct kunwind_state {
+ struct unwind_state common;
+ struct task_struct *task;
+#ifdef CONFIG_KRETPROBES
+ struct llist_node *kr_cur;
+#endif
+};
+
+static __always_inline void
+kunwind_init(struct kunwind_state *state,
+ struct task_struct *task)
+{
+ unwind_init_common(&state->common);
+ state->task = task;
+}
/*
- * Unwind from one frame record (A) to the next frame record (B).
+ * Start an unwind from a pt_regs.
*
- * We terminate early if the location of B indicates a malformed chain of frame
- * records (e.g. a cycle), determined based on the location and fp value of A
- * and the location (but not the fp value) of B.
+ * The unwind will begin at the PC within the regs.
+ *
+ * The regs must be on a stack currently owned by the calling task.
*/
-int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame)
+static __always_inline void
+kunwind_init_from_regs(struct kunwind_state *state,
+ struct pt_regs *regs)
{
- unsigned long fp = frame->fp;
- struct stack_info info;
+ kunwind_init(state, current);
- if (fp & 0xf)
- return -EINVAL;
+ state->common.fp = regs->regs[29];
+ state->common.pc = regs->pc;
+}
- if (!tsk)
- tsk = current;
+/*
+ * Start an unwind from a caller.
+ *
+ * The unwind will begin at the caller of whichever function this is inlined
+ * into.
+ *
+ * The function which invokes this must be noinline.
+ */
+static __always_inline void
+kunwind_init_from_caller(struct kunwind_state *state)
+{
+ kunwind_init(state, current);
- if (!on_accessible_stack(tsk, fp, &info))
- return -EINVAL;
-
- if (test_bit(info.type, frame->stacks_done))
- return -EINVAL;
-
- /*
- * As stacks grow downward, any valid record on the same stack must be
- * at a strictly higher address than the prior record.
- *
- * Stacks can nest in several valid orders, e.g.
- *
- * TASK -> IRQ -> OVERFLOW -> SDEI_NORMAL
- * TASK -> SDEI_NORMAL -> SDEI_CRITICAL -> OVERFLOW
- *
- * ... but the nesting itself is strict. Once we transition from one
- * stack to another, it's never valid to unwind back to that first
- * stack.
- */
- if (info.type == frame->prev_type) {
- if (fp <= frame->prev_fp)
- return -EINVAL;
- } else {
- set_bit(frame->prev_type, frame->stacks_done);
- }
+ state->common.fp = (unsigned long)__builtin_frame_address(1);
+ state->common.pc = (unsigned long)__builtin_return_address(0);
+}
+
+/*
+ * Start an unwind from a blocked task.
+ *
+ * The unwind will begin at the blocked tasks saved PC (i.e. the caller of
+ * cpu_switch_to()).
+ *
+ * The caller should ensure the task is blocked in cpu_switch_to() for the
+ * duration of the unwind, or the unwind will be bogus. It is never valid to
+ * call this for the current task.
+ */
+static __always_inline void
+kunwind_init_from_task(struct kunwind_state *state,
+ struct task_struct *task)
+{
+ kunwind_init(state, task);
- /*
- * Record this frame record's values and location. The prev_fp and
- * prev_type are only meaningful to the next unwind_frame() invocation.
- */
- frame->fp = READ_ONCE_NOCHECK(*(unsigned long *)(fp));
- frame->pc = READ_ONCE_NOCHECK(*(unsigned long *)(fp + 8));
- frame->prev_fp = fp;
- frame->prev_type = info.type;
+ state->common.fp = thread_saved_fp(task);
+ state->common.pc = thread_saved_pc(task);
+}
+static __always_inline int
+kunwind_recover_return_address(struct kunwind_state *state)
+{
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
- if (tsk->ret_stack &&
- (frame->pc == (unsigned long)return_to_handler)) {
- struct ftrace_ret_stack *ret_stack;
- /*
- * This is a case where function graph tracer has
- * modified a return address (LR) in a stack frame
- * to hook a function return.
- * So replace it to an original value.
- */
- ret_stack = ftrace_graph_get_ret_stack(tsk, frame->graph++);
- if (WARN_ON_ONCE(!ret_stack))
+ if (state->task->ret_stack &&
+ (state->common.pc == (unsigned long)return_to_handler)) {
+ unsigned long orig_pc;
+ orig_pc = ftrace_graph_ret_addr(state->task, NULL,
+ state->common.pc,
+ (void *)state->common.fp);
+ if (WARN_ON_ONCE(state->common.pc == orig_pc))
return -EINVAL;
- frame->pc = ret_stack->ret;
+ state->common.pc = orig_pc;
}
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
- /*
- * Frames created upon entry from EL0 have NULL FP and PC values, so
- * don't bother reporting these. Frames created by __noreturn functions
- * might have a valid FP even if PC is bogus, so only terminate where
- * both are NULL.
- */
- if (!frame->fp && !frame->pc)
- return -EINVAL;
+#ifdef CONFIG_KRETPROBES
+ if (is_kretprobe_trampoline(state->common.pc)) {
+ unsigned long orig_pc;
+ orig_pc = kretprobe_find_ret_addr(state->task,
+ (void *)state->common.fp,
+ &state->kr_cur);
+ state->common.pc = orig_pc;
+ }
+#endif /* CONFIG_KRETPROBES */
return 0;
}
-NOKPROBE_SYMBOL(unwind_frame);
-void notrace walk_stackframe(struct task_struct *tsk, struct stackframe *frame,
- int (*fn)(struct stackframe *, void *), void *data)
+/*
+ * Unwind from one frame record (A) to the next frame record (B).
+ *
+ * We terminate early if the location of B indicates a malformed chain of frame
+ * records (e.g. a cycle), determined based on the location and fp value of A
+ * and the location (but not the fp value) of B.
+ */
+static __always_inline int
+kunwind_next(struct kunwind_state *state)
{
+ struct task_struct *tsk = state->task;
+ unsigned long fp = state->common.fp;
+ int err;
+
+ /* Final frame; nothing to unwind */
+ if (fp == (unsigned long)task_pt_regs(tsk)->stackframe)
+ return -ENOENT;
+
+ err = unwind_next_frame_record(&state->common);
+ if (err)
+ return err;
+
+ state->common.pc = ptrauth_strip_kernel_insn_pac(state->common.pc);
+
+ return kunwind_recover_return_address(state);
+}
+
+typedef bool (*kunwind_consume_fn)(const struct kunwind_state *state, void *cookie);
+
+static __always_inline void
+do_kunwind(struct kunwind_state *state, kunwind_consume_fn consume_state,
+ void *cookie)
+{
+ if (kunwind_recover_return_address(state))
+ return;
+
while (1) {
int ret;
- if (fn(frame, data))
+ if (!consume_state(state, cookie))
break;
- ret = unwind_frame(tsk, frame);
+ ret = kunwind_next(state);
if (ret < 0)
break;
}
}
-NOKPROBE_SYMBOL(walk_stackframe);
-#ifdef CONFIG_STACKTRACE
-struct stack_trace_data {
- struct stack_trace *trace;
- unsigned int no_sched_functions;
- unsigned int skip;
-};
+/*
+ * Per-cpu stacks are only accessible when unwinding the current task in a
+ * non-preemptible context.
+ */
+#define STACKINFO_CPU(name) \
+ ({ \
+ ((task == current) && !preemptible()) \
+ ? stackinfo_get_##name() \
+ : stackinfo_get_unknown(); \
+ })
+
+/*
+ * SDEI stacks are only accessible when unwinding the current task in an NMI
+ * context.
+ */
+#define STACKINFO_SDEI(name) \
+ ({ \
+ ((task == current) && in_nmi()) \
+ ? stackinfo_get_sdei_##name() \
+ : stackinfo_get_unknown(); \
+ })
-static int save_trace(struct stackframe *frame, void *d)
+#define STACKINFO_EFI \
+ ({ \
+ ((task == current) && current_in_efi()) \
+ ? stackinfo_get_efi() \
+ : stackinfo_get_unknown(); \
+ })
+
+static __always_inline void
+kunwind_stack_walk(kunwind_consume_fn consume_state,
+ void *cookie, struct task_struct *task,
+ struct pt_regs *regs)
{
- struct stack_trace_data *data = d;
- struct stack_trace *trace = data->trace;
- unsigned long addr = frame->pc;
-
- if (data->no_sched_functions && in_sched_functions(addr))
- return 0;
- if (data->skip) {
- data->skip--;
- return 0;
+ struct stack_info stacks[] = {
+ stackinfo_get_task(task),
+ STACKINFO_CPU(irq),
+#if defined(CONFIG_VMAP_STACK)
+ STACKINFO_CPU(overflow),
+#endif
+#if defined(CONFIG_VMAP_STACK) && defined(CONFIG_ARM_SDE_INTERFACE)
+ STACKINFO_SDEI(normal),
+ STACKINFO_SDEI(critical),
+#endif
+#ifdef CONFIG_EFI
+ STACKINFO_EFI,
+#endif
+ };
+ struct kunwind_state state = {
+ .common = {
+ .stacks = stacks,
+ .nr_stacks = ARRAY_SIZE(stacks),
+ },
+ };
+
+ if (regs) {
+ if (task != current)
+ return;
+ kunwind_init_from_regs(&state, regs);
+ } else if (task == current) {
+ kunwind_init_from_caller(&state);
+ } else {
+ kunwind_init_from_task(&state, task);
}
- trace->entries[trace->nr_entries++] = addr;
+ do_kunwind(&state, consume_state, cookie);
+}
- return trace->nr_entries >= trace->max_entries;
+struct kunwind_consume_entry_data {
+ stack_trace_consume_fn consume_entry;
+ void *cookie;
+};
+
+static bool
+arch_kunwind_consume_entry(const struct kunwind_state *state, void *cookie)
+{
+ struct kunwind_consume_entry_data *data = cookie;
+ return data->consume_entry(data->cookie, state->common.pc);
}
-void save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace)
+noinline noinstr void arch_stack_walk(stack_trace_consume_fn consume_entry,
+ void *cookie, struct task_struct *task,
+ struct pt_regs *regs)
{
- struct stack_trace_data data;
- struct stackframe frame;
+ struct kunwind_consume_entry_data data = {
+ .consume_entry = consume_entry,
+ .cookie = cookie,
+ };
- data.trace = trace;
- data.skip = trace->skip;
- data.no_sched_functions = 0;
+ kunwind_stack_walk(arch_kunwind_consume_entry, &data, task, regs);
+}
- start_backtrace(&frame, regs->regs[29], regs->pc);
- walk_stackframe(current, &frame, save_trace, &data);
+static bool dump_backtrace_entry(void *arg, unsigned long where)
+{
+ char *loglvl = arg;
+ printk("%s %pSb\n", loglvl, (void *)where);
+ return true;
}
-EXPORT_SYMBOL_GPL(save_stack_trace_regs);
-static noinline void __save_stack_trace(struct task_struct *tsk,
- struct stack_trace *trace, unsigned int nosched)
+void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk,
+ const char *loglvl)
{
- struct stack_trace_data data;
- struct stackframe frame;
+ pr_debug("%s(regs = %p tsk = %p)\n", __func__, regs, tsk);
- if (!try_get_task_stack(tsk))
+ if (regs && user_mode(regs))
return;
- data.trace = trace;
- data.skip = trace->skip;
- data.no_sched_functions = nosched;
+ if (!tsk)
+ tsk = current;
- if (tsk != current) {
- start_backtrace(&frame, thread_saved_fp(tsk),
- thread_saved_pc(tsk));
- } else {
- /* We don't want this function nor the caller */
- data.skip += 2;
- start_backtrace(&frame,
- (unsigned long)__builtin_frame_address(0),
- (unsigned long)__save_stack_trace);
- }
+ if (!try_get_task_stack(tsk))
+ return;
- walk_stackframe(tsk, &frame, save_trace, &data);
+ printk("%sCall trace:\n", loglvl);
+ arch_stack_walk(dump_backtrace_entry, (void *)loglvl, tsk, regs);
put_task_stack(tsk);
}
-EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
-void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
+void show_stack(struct task_struct *tsk, unsigned long *sp, const char *loglvl)
{
- __save_stack_trace(tsk, trace, 1);
+ dump_backtrace(NULL, tsk, loglvl);
+ barrier();
}
-
-void save_stack_trace(struct stack_trace *trace)
-{
- __save_stack_trace(current, trace, 0);
-}
-
-EXPORT_SYMBOL_GPL(save_stack_trace);
-#endif
diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c
index 9405d1b7f4b0..eca4d0435211 100644
--- a/arch/arm64/kernel/suspend.c
+++ b/arch/arm64/kernel/suspend.c
@@ -3,13 +3,16 @@
#include <linux/percpu.h>
#include <linux/slab.h>
#include <linux/uaccess.h>
+#include <linux/pgtable.h>
+#include <linux/cpuidle.h>
#include <asm/alternative.h>
#include <asm/cacheflush.h>
#include <asm/cpufeature.h>
+#include <asm/cpuidle.h>
#include <asm/daifflags.h>
#include <asm/debug-monitors.h>
#include <asm/exec.h>
-#include <asm/pgtable.h>
+#include <asm/mte.h>
#include <asm/memory.h>
#include <asm/mmu_context.h>
#include <asm/smp_plat.h>
@@ -41,6 +44,8 @@ void notrace __cpu_suspend_exit(void)
{
unsigned int cpu = smp_processor_id();
+ mte_suspend_exit();
+
/*
* We are resuming from reset with the idmap active in TTBR0_EL1.
* We must uninstall the idmap and restore the expected MMU
@@ -50,14 +55,15 @@ void notrace __cpu_suspend_exit(void)
/* Restore CnP bit in TTBR1_EL1 */
if (system_supports_cnp())
- cpu_replace_ttbr1(lm_alias(swapper_pg_dir));
+ cpu_enable_swapper_cnp();
/*
* PSTATE was not saved over suspend/resume, re-enable any detected
* features that might not have been set correctly.
*/
+ if (alternative_has_cap_unlikely(ARM64_HAS_DIT))
+ set_pstate_dit(1);
__uaccess_enable_hw_pan();
- uao_thread_switch(current);
/*
* Restore HW breakpoint registers to sane values
@@ -72,8 +78,10 @@ void notrace __cpu_suspend_exit(void)
* have turned the mitigation on. If the user has forcefully
* disabled it, make sure their wishes are obeyed.
*/
- if (arm64_get_ssbd_state() == ARM64_SSBD_FORCE_DISABLE)
- arm64_set_ssbd_mitigation(false);
+ spectre_v4_enable_mitigation(NULL);
+
+ /* Restore additional feature-specific configuration */
+ ptrauth_suspend_exit();
}
/*
@@ -88,21 +96,46 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
int ret = 0;
unsigned long flags;
struct sleep_stack_data state;
+ struct arm_cpuidle_irq_context context;
+
+ /*
+ * Some portions of CPU state (e.g. PSTATE.{PAN,DIT}) are initialized
+ * before alternatives are patched, but are only restored by
+ * __cpu_suspend_exit() after alternatives are patched. To avoid
+ * accidentally losing these bits we must not attempt to suspend until
+ * after alternatives have been patched.
+ */
+ WARN_ON(!system_capabilities_finalized());
+
+ /* Report any MTE async fault before going to suspend */
+ mte_suspend_enter();
/*
* From this point debug exceptions are disabled to prevent
* updates to mdscr register (saved and restored along with
* general purpose registers) from kernel debuggers.
+ *
+ * Strictly speaking the trace_hardirqs_off() here is superfluous,
+ * hardirqs should be firmly off by now. This really ought to use
+ * something like raw_local_daif_save().
*/
flags = local_daif_save();
/*
- * Function graph tracer state gets incosistent when the kernel
+ * Function graph tracer state gets inconsistent when the kernel
* calls functions that never return (aka suspend finishers) hence
* disable graph tracing during their execution.
*/
pause_graph_tracing();
+ /*
+ * Switch to using DAIF.IF instead of PMR in order to reliably
+ * resume if we're using pseudo-NMIs.
+ */
+ arm_cpuidle_save_irq_context(&context);
+
+ ct_cpuidle_enter();
+
if (__cpu_suspend_enter(&state)) {
/* Call the suspend finisher */
ret = fn(arg);
@@ -116,16 +149,21 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
*/
if (!ret)
ret = -EOPNOTSUPP;
+
+ ct_cpuidle_exit();
} else {
+ ct_cpuidle_exit();
__cpu_suspend_exit();
}
+ arm_cpuidle_restore_irq_context(&context);
+
unpause_graph_tracing();
/*
* Restore pstate flags. OS lock and mdscr have been already
* restored, so from this point onwards, debugging is fully
- * renabled if it was enabled when core started shutdown.
+ * reenabled if it was enabled when core started shutdown.
*/
local_daif_restore(flags);
diff --git a/arch/arm64/kernel/sys_compat.c b/arch/arm64/kernel/sys_compat.c
index 3c18c2454089..4a609e9b65de 100644
--- a/arch/arm64/kernel/sys_compat.c
+++ b/arch/arm64/kernel/sys_compat.c
@@ -9,7 +9,6 @@
#include <linux/compat.h>
#include <linux/cpufeature.h>
-#include <linux/personality.h>
#include <linux/sched.h>
#include <linux/sched/signal.h>
#include <linux/slab.h>
@@ -32,7 +31,7 @@ __do_compat_cache_op(unsigned long start, unsigned long end)
if (fatal_signal_pending(current))
return 0;
- if (cpus_have_const_cap(ARM64_WORKAROUND_1542419)) {
+ if (cpus_have_final_cap(ARM64_WORKAROUND_1542419)) {
/*
* The workaround requires an inner-shareable tlbi.
* We pick the reserved-ASID to minimise the impact.
@@ -41,7 +40,7 @@ __do_compat_cache_op(unsigned long start, unsigned long end)
dsb(ish);
}
- ret = __flush_cache_user_range(start, start + chunk);
+ ret = caches_clean_inval_user_pou(start, start + chunk);
if (ret)
return ret;
@@ -68,7 +67,7 @@ do_compat_cache_op(unsigned long start, unsigned long end, int flags)
*/
long compat_arm_syscall(struct pt_regs *regs, int scno)
{
- void __user *addr;
+ unsigned long addr;
switch (scno) {
/*
@@ -111,10 +110,9 @@ long compat_arm_syscall(struct pt_regs *regs, int scno)
break;
}
- addr = (void __user *)instruction_pointer(regs) -
- (compat_thumb_mode(regs) ? 2 : 4);
+ addr = instruction_pointer(regs) - (compat_thumb_mode(regs) ? 2 : 4);
arm64_notify_die("Oops - bad compat syscall(2)", regs,
- SIGILL, ILL_ILLTRP, addr, scno);
+ SIGILL, ILL_ILLTRP, addr, 0);
return 0;
}
diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c
index 9a9d98a443fc..9a70d9746b66 100644
--- a/arch/arm64/kernel/syscall.c
+++ b/arch/arm64/kernel/syscall.c
@@ -5,10 +5,11 @@
#include <linux/errno.h>
#include <linux/nospec.h>
#include <linux/ptrace.h>
+#include <linux/randomize_kstack.h>
#include <linux/syscalls.h>
-#include <asm/daifflags.h>
#include <asm/debug-monitors.h>
+#include <asm/exception.h>
#include <asm/fpsimd.h>
#include <asm/syscall.h>
#include <asm/thread_info.h>
@@ -42,6 +43,8 @@ static void invoke_syscall(struct pt_regs *regs, unsigned int scno,
{
long ret;
+ add_random_kstack_offset();
+
if (scno < sc_nr) {
syscall_fn_t syscall_fn;
syscall_fn = syscall_table[array_index_nospec(scno, sc_nr)];
@@ -50,7 +53,20 @@ static void invoke_syscall(struct pt_regs *regs, unsigned int scno,
ret = do_ni_syscall(regs, scno);
}
- regs->regs[0] = ret;
+ syscall_set_return_value(current, regs, 0, ret);
+
+ /*
+ * Ultimately, this value will get limited by KSTACK_OFFSET_MAX(),
+ * but not enough for arm64 stack utilization comfort. To keep
+ * reasonable stack head room, reduce the maximum offset to 9 bits.
+ *
+ * The actual entropy will be further reduced by the compiler when
+ * applying stack alignment constraints: the AAPCS mandates a
+ * 16-byte (i.e. 4-bit) aligned SP at function boundaries.
+ *
+ * The resulting 5 bits of entropy is seen in SP[8:4].
+ */
+ choose_random_kstack_offset(get_random_u16() & 0x1FF);
}
static inline bool has_syscall_work(unsigned long flags)
@@ -58,54 +74,60 @@ static inline bool has_syscall_work(unsigned long flags)
return unlikely(flags & _TIF_SYSCALL_WORK);
}
-int syscall_trace_enter(struct pt_regs *regs);
-void syscall_trace_exit(struct pt_regs *regs);
-
-#ifdef CONFIG_ARM64_ERRATUM_1463225
-DECLARE_PER_CPU(int, __in_cortex_a76_erratum_1463225_wa);
-
-static void cortex_a76_erratum_1463225_svc_handler(void)
-{
- u32 reg, val;
-
- if (!unlikely(test_thread_flag(TIF_SINGLESTEP)))
- return;
-
- if (!unlikely(this_cpu_has_cap(ARM64_WORKAROUND_1463225)))
- return;
-
- __this_cpu_write(__in_cortex_a76_erratum_1463225_wa, 1);
- reg = read_sysreg(mdscr_el1);
- val = reg | DBG_MDSCR_SS | DBG_MDSCR_KDE;
- write_sysreg(val, mdscr_el1);
- asm volatile("msr daifclr, #8");
- isb();
-
- /* We will have taken a single-step exception by this point */
-
- write_sysreg(reg, mdscr_el1);
- __this_cpu_write(__in_cortex_a76_erratum_1463225_wa, 0);
-}
-#else
-static void cortex_a76_erratum_1463225_svc_handler(void) { }
-#endif /* CONFIG_ARM64_ERRATUM_1463225 */
-
static void el0_svc_common(struct pt_regs *regs, int scno, int sc_nr,
const syscall_fn_t syscall_table[])
{
- unsigned long flags = current_thread_info()->flags;
+ unsigned long flags = read_thread_flags();
regs->orig_x0 = regs->regs[0];
regs->syscallno = scno;
- cortex_a76_erratum_1463225_svc_handler();
- local_daif_restore(DAIF_PROCCTX);
- user_exit();
+ /*
+ * BTI note:
+ * The architecture does not guarantee that SPSR.BTYPE is zero
+ * on taking an SVC, so we could return to userspace with a
+ * non-zero BTYPE after the syscall.
+ *
+ * This shouldn't matter except when userspace is explicitly
+ * doing something stupid, such as setting PROT_BTI on a page
+ * that lacks conforming BTI/PACIxSP instructions, falling
+ * through from one executable page to another with differing
+ * PROT_BTI, or messing with BTYPE via ptrace: in such cases,
+ * userspace should not be surprised if a SIGILL occurs on
+ * syscall return.
+ *
+ * So, don't touch regs->pstate & PSR_BTYPE_MASK here.
+ * (Similarly for HVC and SMC elsewhere.)
+ */
+
+ if (flags & _TIF_MTE_ASYNC_FAULT) {
+ /*
+ * Process the asynchronous tag check fault before the actual
+ * syscall. do_notify_resume() will send a signal to userspace
+ * before the syscall is restarted.
+ */
+ syscall_set_return_value(current, regs, -ERESTARTNOINTR, 0);
+ return;
+ }
if (has_syscall_work(flags)) {
- /* set default errno for user-issued syscall(-1) */
+ /*
+ * The de-facto standard way to skip a system call using ptrace
+ * is to set the system call to -1 (NO_SYSCALL) and set x0 to a
+ * suitable error code for consumption by userspace. However,
+ * this cannot be distinguished from a user-issued syscall(-1)
+ * and so we must set x0 to -ENOSYS here in case the tracer doesn't
+ * issue the skip and we fall into trace_exit with x0 preserved.
+ *
+ * This is slightly odd because it also means that if a tracer
+ * sets the system call number to -1 but does not initialise x0,
+ * then x0 will be preserved for all system calls apart from a
+ * user-issued syscall(-1). However, requesting a skip and not
+ * setting the return value is unlikely to do anything sensible
+ * anyway.
+ */
if (scno == NO_SYSCALL)
- regs->regs[0] = -ENOSYS;
+ syscall_set_return_value(current, regs, -ENOSYS, 0);
scno = syscall_trace_enter(regs);
if (scno == NO_SYSCALL)
goto trace_exit;
@@ -119,49 +141,22 @@ static void el0_svc_common(struct pt_regs *regs, int scno, int sc_nr,
* exit regardless, as the old entry assembly did.
*/
if (!has_syscall_work(flags) && !IS_ENABLED(CONFIG_DEBUG_RSEQ)) {
- local_daif_mask();
- flags = current_thread_info()->flags;
- if (!has_syscall_work(flags)) {
- /*
- * We're off to userspace, where interrupts are
- * always enabled after we restore the flags from
- * the SPSR.
- */
- trace_hardirqs_on();
+ flags = read_thread_flags();
+ if (!has_syscall_work(flags) && !(flags & _TIF_SINGLESTEP))
return;
- }
- local_daif_restore(DAIF_PROCCTX);
}
trace_exit:
syscall_trace_exit(regs);
}
-static inline void sve_user_discard(void)
-{
- if (!system_supports_sve())
- return;
-
- clear_thread_flag(TIF_SVE);
-
- /*
- * task_fpsimd_load() won't be called to update CPACR_EL1 in
- * ret_to_user unless TIF_FOREIGN_FPSTATE is still set, which only
- * happens if a context switch or kernel_neon_begin() or context
- * modification (sigreturn, ptrace) intervenes.
- * So, ensure that CPACR_EL1 is already correct for the fast-path case.
- */
- sve_user_disable();
-}
-
-void el0_svc_handler(struct pt_regs *regs)
+void do_el0_svc(struct pt_regs *regs)
{
- sve_user_discard();
el0_svc_common(regs, regs->regs[8], __NR_syscalls, sys_call_table);
}
#ifdef CONFIG_COMPAT
-void el0_svc_compat_handler(struct pt_regs *regs)
+void do_el0_svc_compat(struct pt_regs *regs)
{
el0_svc_common(regs, regs->regs[7], __NR_compat_syscalls,
compat_sys_call_table);
diff --git a/arch/arm64/kernel/time.c b/arch/arm64/kernel/time.c
index 73f06d4b3aae..b5855eb7435d 100644
--- a/arch/arm64/kernel/time.c
+++ b/arch/arm64/kernel/time.c
@@ -18,36 +18,37 @@
#include <linux/timex.h>
#include <linux/errno.h>
#include <linux/profile.h>
+#include <linux/stacktrace.h>
#include <linux/syscore_ops.h>
#include <linux/timer.h>
#include <linux/irq.h>
#include <linux/delay.h>
#include <linux/clocksource.h>
-#include <linux/clk-provider.h>
+#include <linux/of_clk.h>
#include <linux/acpi.h>
#include <clocksource/arm_arch_timer.h>
#include <asm/thread_info.h>
-#include <asm/stacktrace.h>
#include <asm/paravirt.h>
-unsigned long profile_pc(struct pt_regs *regs)
+static bool profile_pc_cb(void *arg, unsigned long pc)
{
- struct stackframe frame;
+ unsigned long *prof_pc = arg;
- if (!in_lock_functions(regs->pc))
- return regs->pc;
+ if (in_lock_functions(pc))
+ return true;
+ *prof_pc = pc;
+ return false;
+}
- start_backtrace(&frame, regs->regs[29], regs->pc);
+unsigned long profile_pc(struct pt_regs *regs)
+{
+ unsigned long prof_pc = 0;
- do {
- int ret = unwind_frame(NULL, &frame);
- if (ret < 0)
- return 0;
- } while (in_lock_functions(frame.pc));
+ arch_stack_walk(profile_pc_cb, &prof_pc, current, regs);
- return frame.pc;
+ return prof_pc;
}
EXPORT_SYMBOL(profile_pc);
diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c
index fa9528dfd0ce..1a2c72f3e7f8 100644
--- a/arch/arm64/kernel/topology.c
+++ b/arch/arm64/kernel/topology.c
@@ -14,6 +14,7 @@
#include <linux/acpi.h>
#include <linux/arch_topology.h>
#include <linux/cacheinfo.h>
+#include <linux/cpufreq.h>
#include <linux/init.h>
#include <linux/percpu.h>
@@ -21,44 +22,6 @@
#include <asm/cputype.h>
#include <asm/topology.h>
-void store_cpu_topology(unsigned int cpuid)
-{
- struct cpu_topology *cpuid_topo = &cpu_topology[cpuid];
- u64 mpidr;
-
- if (cpuid_topo->package_id != -1)
- goto topology_populated;
-
- mpidr = read_cpuid_mpidr();
-
- /* Uniprocessor systems can rely on default topology values */
- if (mpidr & MPIDR_UP_BITMASK)
- return;
-
- /* Create cpu topology mapping based on MPIDR. */
- if (mpidr & MPIDR_MT_BITMASK) {
- /* Multiprocessor system : Multi-threads per core */
- cpuid_topo->thread_id = MPIDR_AFFINITY_LEVEL(mpidr, 0);
- cpuid_topo->core_id = MPIDR_AFFINITY_LEVEL(mpidr, 1);
- cpuid_topo->package_id = MPIDR_AFFINITY_LEVEL(mpidr, 2) |
- MPIDR_AFFINITY_LEVEL(mpidr, 3) << 8;
- } else {
- /* Multiprocessor system : Single-thread per core */
- cpuid_topo->thread_id = -1;
- cpuid_topo->core_id = MPIDR_AFFINITY_LEVEL(mpidr, 0);
- cpuid_topo->package_id = MPIDR_AFFINITY_LEVEL(mpidr, 1) |
- MPIDR_AFFINITY_LEVEL(mpidr, 2) << 8 |
- MPIDR_AFFINITY_LEVEL(mpidr, 3) << 16;
- }
-
- pr_debug("CPU%u: cluster %d core %d thread %d mpidr %#016llx\n",
- cpuid, cpuid_topo->package_id, cpuid_topo->core_id,
- cpuid_topo->thread_id, mpidr);
-
-topology_populated:
- update_siblings_masks(cpuid);
-}
-
#ifdef CONFIG_ACPI
static bool __init acpi_cpu_is_threaded(int cpu)
{
@@ -86,8 +49,6 @@ int __init parse_acpi_topology(void)
return 0;
for_each_possible_cpu(cpu) {
- int i, cache_id;
-
topology_id = find_acpi_cpu_topology(cpu, 0);
if (topology_id < 0)
return topology_id;
@@ -100,24 +61,288 @@ int __init parse_acpi_topology(void)
cpu_topology[cpu].thread_id = -1;
cpu_topology[cpu].core_id = topology_id;
}
+ topology_id = find_acpi_cpu_topology_cluster(cpu);
+ cpu_topology[cpu].cluster_id = topology_id;
topology_id = find_acpi_cpu_topology_package(cpu);
cpu_topology[cpu].package_id = topology_id;
+ }
+
+ return 0;
+}
+#endif
- i = acpi_find_last_cache_level(cpu);
+#ifdef CONFIG_ARM64_AMU_EXTN
+#define read_corecnt() read_sysreg_s(SYS_AMEVCNTR0_CORE_EL0)
+#define read_constcnt() read_sysreg_s(SYS_AMEVCNTR0_CONST_EL0)
+#else
+#define read_corecnt() (0UL)
+#define read_constcnt() (0UL)
+#endif
- if (i > 0) {
- /*
- * this is the only part of cpu_topology that has
- * a direct relationship with the cache topology
- */
- cache_id = find_acpi_cpu_cache_topology(cpu, i);
- if (cache_id > 0)
- cpu_topology[cpu].llc_id = cache_id;
- }
+#undef pr_fmt
+#define pr_fmt(fmt) "AMU: " fmt
+
+/*
+ * Ensure that amu_scale_freq_tick() will return SCHED_CAPACITY_SCALE until
+ * the CPU capacity and its associated frequency have been correctly
+ * initialized.
+ */
+static DEFINE_PER_CPU_READ_MOSTLY(unsigned long, arch_max_freq_scale) = 1UL << (2 * SCHED_CAPACITY_SHIFT);
+static DEFINE_PER_CPU(u64, arch_const_cycles_prev);
+static DEFINE_PER_CPU(u64, arch_core_cycles_prev);
+static cpumask_var_t amu_fie_cpus;
+
+void update_freq_counters_refs(void)
+{
+ this_cpu_write(arch_core_cycles_prev, read_corecnt());
+ this_cpu_write(arch_const_cycles_prev, read_constcnt());
+}
+
+static inline bool freq_counters_valid(int cpu)
+{
+ if ((cpu >= nr_cpu_ids) || !cpumask_test_cpu(cpu, cpu_present_mask))
+ return false;
+
+ if (!cpu_has_amu_feat(cpu)) {
+ pr_debug("CPU%d: counters are not supported.\n", cpu);
+ return false;
+ }
+
+ if (unlikely(!per_cpu(arch_const_cycles_prev, cpu) ||
+ !per_cpu(arch_core_cycles_prev, cpu))) {
+ pr_debug("CPU%d: cycle counters are not enabled.\n", cpu);
+ return false;
}
+ return true;
+}
+
+void freq_inv_set_max_ratio(int cpu, u64 max_rate)
+{
+ u64 ratio, ref_rate = arch_timer_get_rate();
+
+ if (unlikely(!max_rate || !ref_rate)) {
+ WARN_ONCE(1, "CPU%d: invalid maximum or reference frequency.\n",
+ cpu);
+ return;
+ }
+
+ /*
+ * Pre-compute the fixed ratio between the frequency of the constant
+ * reference counter and the maximum frequency of the CPU.
+ *
+ * ref_rate
+ * arch_max_freq_scale = ---------- * SCHED_CAPACITY_SCALE²
+ * max_rate
+ *
+ * We use a factor of 2 * SCHED_CAPACITY_SHIFT -> SCHED_CAPACITY_SCALE²
+ * in order to ensure a good resolution for arch_max_freq_scale for
+ * very low reference frequencies (down to the KHz range which should
+ * be unlikely).
+ */
+ ratio = ref_rate << (2 * SCHED_CAPACITY_SHIFT);
+ ratio = div64_u64(ratio, max_rate);
+ if (!ratio) {
+ WARN_ONCE(1, "Reference frequency too low.\n");
+ return;
+ }
+
+ WRITE_ONCE(per_cpu(arch_max_freq_scale, cpu), (unsigned long)ratio);
+}
+
+static void amu_scale_freq_tick(void)
+{
+ u64 prev_core_cnt, prev_const_cnt;
+ u64 core_cnt, const_cnt, scale;
+
+ prev_const_cnt = this_cpu_read(arch_const_cycles_prev);
+ prev_core_cnt = this_cpu_read(arch_core_cycles_prev);
+
+ update_freq_counters_refs();
+
+ const_cnt = this_cpu_read(arch_const_cycles_prev);
+ core_cnt = this_cpu_read(arch_core_cycles_prev);
+
+ if (unlikely(core_cnt <= prev_core_cnt ||
+ const_cnt <= prev_const_cnt))
+ return;
+
+ /*
+ * /\core arch_max_freq_scale
+ * scale = ------- * --------------------
+ * /\const SCHED_CAPACITY_SCALE
+ *
+ * See validate_cpu_freq_invariance_counters() for details on
+ * arch_max_freq_scale and the use of SCHED_CAPACITY_SHIFT.
+ */
+ scale = core_cnt - prev_core_cnt;
+ scale *= this_cpu_read(arch_max_freq_scale);
+ scale = div64_u64(scale >> SCHED_CAPACITY_SHIFT,
+ const_cnt - prev_const_cnt);
+
+ scale = min_t(unsigned long, scale, SCHED_CAPACITY_SCALE);
+ this_cpu_write(arch_freq_scale, (unsigned long)scale);
+}
+
+static struct scale_freq_data amu_sfd = {
+ .source = SCALE_FREQ_SOURCE_ARCH,
+ .set_freq_scale = amu_scale_freq_tick,
+};
+
+static void amu_fie_setup(const struct cpumask *cpus)
+{
+ int cpu;
+
+ /* We are already set since the last insmod of cpufreq driver */
+ if (unlikely(cpumask_subset(cpus, amu_fie_cpus)))
+ return;
+
+ for_each_cpu(cpu, cpus) {
+ if (!freq_counters_valid(cpu))
+ return;
+ }
+
+ cpumask_or(amu_fie_cpus, amu_fie_cpus, cpus);
+
+ topology_set_scale_freq_source(&amu_sfd, amu_fie_cpus);
+
+ pr_debug("CPUs[%*pbl]: counters will be used for FIE.",
+ cpumask_pr_args(cpus));
+}
+
+static int init_amu_fie_callback(struct notifier_block *nb, unsigned long val,
+ void *data)
+{
+ struct cpufreq_policy *policy = data;
+
+ if (val == CPUFREQ_CREATE_POLICY)
+ amu_fie_setup(policy->related_cpus);
+
+ /*
+ * We don't need to handle CPUFREQ_REMOVE_POLICY event as the AMU
+ * counters don't have any dependency on cpufreq driver once we have
+ * initialized AMU support and enabled invariance. The AMU counters will
+ * keep on working just fine in the absence of the cpufreq driver, and
+ * for the CPUs for which there are no counters available, the last set
+ * value of arch_freq_scale will remain valid as that is the frequency
+ * those CPUs are running at.
+ */
+
return 0;
}
-#endif
+static struct notifier_block init_amu_fie_notifier = {
+ .notifier_call = init_amu_fie_callback,
+};
+
+static int __init init_amu_fie(void)
+{
+ int ret;
+
+ if (!zalloc_cpumask_var(&amu_fie_cpus, GFP_KERNEL))
+ return -ENOMEM;
+
+ ret = cpufreq_register_notifier(&init_amu_fie_notifier,
+ CPUFREQ_POLICY_NOTIFIER);
+ if (ret)
+ free_cpumask_var(amu_fie_cpus);
+
+ return ret;
+}
+core_initcall(init_amu_fie);
+
+#ifdef CONFIG_ACPI_CPPC_LIB
+#include <acpi/cppc_acpi.h>
+static void cpu_read_corecnt(void *val)
+{
+ /*
+ * A value of 0 can be returned if the current CPU does not support AMUs
+ * or if the counter is disabled for this CPU. A return value of 0 at
+ * counter read is properly handled as an error case by the users of the
+ * counter.
+ */
+ *(u64 *)val = read_corecnt();
+}
+
+static void cpu_read_constcnt(void *val)
+{
+ /*
+ * Return 0 if the current CPU is affected by erratum 2457168. A value
+ * of 0 is also returned if the current CPU does not support AMUs or if
+ * the counter is disabled. A return value of 0 at counter read is
+ * properly handled as an error case by the users of the counter.
+ */
+ *(u64 *)val = this_cpu_has_cap(ARM64_WORKAROUND_2457168) ?
+ 0UL : read_constcnt();
+}
+
+static inline
+int counters_read_on_cpu(int cpu, smp_call_func_t func, u64 *val)
+{
+ /*
+ * Abort call on counterless CPU or when interrupts are
+ * disabled - can lead to deadlock in smp sync call.
+ */
+ if (!cpu_has_amu_feat(cpu))
+ return -EOPNOTSUPP;
+
+ if (WARN_ON_ONCE(irqs_disabled()))
+ return -EPERM;
+
+ smp_call_function_single(cpu, func, val, 1);
+
+ return 0;
+}
+
+/*
+ * Refer to drivers/acpi/cppc_acpi.c for the description of the functions
+ * below.
+ */
+bool cpc_ffh_supported(void)
+{
+ int cpu = get_cpu_with_amu_feat();
+
+ /*
+ * FFH is considered supported if there is at least one present CPU that
+ * supports AMUs. Using FFH to read core and reference counters for CPUs
+ * that do not support AMUs, have counters disabled or that are affected
+ * by errata, will result in a return value of 0.
+ *
+ * This is done to allow any enabled and valid counters to be read
+ * through FFH, knowing that potentially returning 0 as counter value is
+ * properly handled by the users of these counters.
+ */
+ if ((cpu >= nr_cpu_ids) || !cpumask_test_cpu(cpu, cpu_present_mask))
+ return false;
+
+ return true;
+}
+
+int cpc_read_ffh(int cpu, struct cpc_reg *reg, u64 *val)
+{
+ int ret = -EOPNOTSUPP;
+
+ switch ((u64)reg->address) {
+ case 0x0:
+ ret = counters_read_on_cpu(cpu, cpu_read_corecnt, val);
+ break;
+ case 0x1:
+ ret = counters_read_on_cpu(cpu, cpu_read_constcnt, val);
+ break;
+ }
+
+ if (!ret) {
+ *val &= GENMASK_ULL(reg->bit_offset + reg->bit_width - 1,
+ reg->bit_offset);
+ *val >>= reg->bit_offset;
+ }
+
+ return ret;
+}
+
+int cpc_write_ffh(int cpunum, struct cpc_reg *reg, u64 val)
+{
+ return -EOPNOTSUPP;
+}
+#endif /* CONFIG_ACPI_CPPC_LIB */
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index 73caf35c2262..215e6d7f2df8 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -9,7 +9,6 @@
#include <linux/bug.h>
#include <linux/context_tracking.h>
#include <linux/signal.h>
-#include <linux/personality.h>
#include <linux/kallsyms.h>
#include <linux/kprobes.h>
#include <linux/spinlock.h>
@@ -19,6 +18,7 @@
#include <linux/module.h>
#include <linux/kexec.h>
#include <linux/delay.h>
+#include <linux/efi.h>
#include <linux/init.h>
#include <linux/sched/signal.h>
#include <linux/sched/debug.h>
@@ -27,134 +27,167 @@
#include <linux/syscalls.h>
#include <linux/mm_types.h>
#include <linux/kasan.h>
+#include <linux/ubsan.h>
+#include <linux/cfi.h>
#include <asm/atomic.h>
#include <asm/bug.h>
#include <asm/cpufeature.h>
#include <asm/daifflags.h>
#include <asm/debug-monitors.h>
+#include <asm/efi.h>
#include <asm/esr.h>
+#include <asm/exception.h>
+#include <asm/extable.h>
#include <asm/insn.h>
#include <asm/kprobes.h>
+#include <asm/patching.h>
#include <asm/traps.h>
#include <asm/smp.h>
#include <asm/stack_pointer.h>
#include <asm/stacktrace.h>
-#include <asm/exception.h>
#include <asm/system_misc.h>
#include <asm/sysreg.h>
-static const char *handler[]= {
- "Synchronous Abort",
- "IRQ",
- "FIQ",
- "Error"
-};
+static bool __kprobes __check_eq(unsigned long pstate)
+{
+ return (pstate & PSR_Z_BIT) != 0;
+}
-int show_unhandled_signals = 0;
+static bool __kprobes __check_ne(unsigned long pstate)
+{
+ return (pstate & PSR_Z_BIT) == 0;
+}
-static void dump_backtrace_entry(unsigned long where)
+static bool __kprobes __check_cs(unsigned long pstate)
{
- printk(" %pS\n", (void *)where);
+ return (pstate & PSR_C_BIT) != 0;
}
-static void dump_kernel_instr(const char *lvl, struct pt_regs *regs)
+static bool __kprobes __check_cc(unsigned long pstate)
{
- unsigned long addr = instruction_pointer(regs);
- char str[sizeof("00000000 ") * 5 + 2 + 1], *p = str;
- int i;
+ return (pstate & PSR_C_BIT) == 0;
+}
- if (user_mode(regs))
- return;
+static bool __kprobes __check_mi(unsigned long pstate)
+{
+ return (pstate & PSR_N_BIT) != 0;
+}
- for (i = -4; i < 1; i++) {
- unsigned int val, bad;
+static bool __kprobes __check_pl(unsigned long pstate)
+{
+ return (pstate & PSR_N_BIT) == 0;
+}
- bad = aarch64_insn_read(&((u32 *)addr)[i], &val);
+static bool __kprobes __check_vs(unsigned long pstate)
+{
+ return (pstate & PSR_V_BIT) != 0;
+}
- if (!bad)
- p += sprintf(p, i == 0 ? "(%08x) " : "%08x ", val);
- else {
- p += sprintf(p, "bad PC value");
- break;
- }
- }
+static bool __kprobes __check_vc(unsigned long pstate)
+{
+ return (pstate & PSR_V_BIT) == 0;
+}
- printk("%sCode: %s\n", lvl, str);
+static bool __kprobes __check_hi(unsigned long pstate)
+{
+ pstate &= ~(pstate >> 1); /* PSR_C_BIT &= ~PSR_Z_BIT */
+ return (pstate & PSR_C_BIT) != 0;
}
-void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk)
+static bool __kprobes __check_ls(unsigned long pstate)
{
- struct stackframe frame;
- int skip = 0;
+ pstate &= ~(pstate >> 1); /* PSR_C_BIT &= ~PSR_Z_BIT */
+ return (pstate & PSR_C_BIT) == 0;
+}
- pr_debug("%s(regs = %p tsk = %p)\n", __func__, regs, tsk);
+static bool __kprobes __check_ge(unsigned long pstate)
+{
+ pstate ^= (pstate << 3); /* PSR_N_BIT ^= PSR_V_BIT */
+ return (pstate & PSR_N_BIT) == 0;
+}
- if (regs) {
- if (user_mode(regs))
- return;
- skip = 1;
- }
+static bool __kprobes __check_lt(unsigned long pstate)
+{
+ pstate ^= (pstate << 3); /* PSR_N_BIT ^= PSR_V_BIT */
+ return (pstate & PSR_N_BIT) != 0;
+}
- if (!tsk)
- tsk = current;
+static bool __kprobes __check_gt(unsigned long pstate)
+{
+ /*PSR_N_BIT ^= PSR_V_BIT */
+ unsigned long temp = pstate ^ (pstate << 3);
- if (!try_get_task_stack(tsk))
- return;
+ temp |= (pstate << 1); /*PSR_N_BIT |= PSR_Z_BIT */
+ return (temp & PSR_N_BIT) == 0;
+}
- if (tsk == current) {
- start_backtrace(&frame,
- (unsigned long)__builtin_frame_address(0),
- (unsigned long)dump_backtrace);
- } else {
- /*
- * task blocked in __switch_to
- */
- start_backtrace(&frame,
- thread_saved_fp(tsk),
- thread_saved_pc(tsk));
- }
+static bool __kprobes __check_le(unsigned long pstate)
+{
+ /*PSR_N_BIT ^= PSR_V_BIT */
+ unsigned long temp = pstate ^ (pstate << 3);
- printk("Call trace:\n");
- do {
- /* skip until specified stack frame */
- if (!skip) {
- dump_backtrace_entry(frame.pc);
- } else if (frame.fp == regs->regs[29]) {
- skip = 0;
- /*
- * Mostly, this is the case where this function is
- * called in panic/abort. As exception handler's
- * stack frame does not contain the corresponding pc
- * at which an exception has taken place, use regs->pc
- * instead.
- */
- dump_backtrace_entry(regs->pc);
- }
- } while (!unwind_frame(tsk, &frame));
+ temp |= (pstate << 1); /*PSR_N_BIT |= PSR_Z_BIT */
+ return (temp & PSR_N_BIT) != 0;
+}
- put_task_stack(tsk);
+static bool __kprobes __check_al(unsigned long pstate)
+{
+ return true;
}
-void show_stack(struct task_struct *tsk, unsigned long *sp)
+/*
+ * Note that the ARMv8 ARM calls condition code 0b1111 "nv", but states that
+ * it behaves identically to 0b1110 ("al").
+ */
+pstate_check_t * const aarch32_opcode_cond_checks[16] = {
+ __check_eq, __check_ne, __check_cs, __check_cc,
+ __check_mi, __check_pl, __check_vs, __check_vc,
+ __check_hi, __check_ls, __check_ge, __check_lt,
+ __check_gt, __check_le, __check_al, __check_al
+};
+
+int show_unhandled_signals = 0;
+
+static void dump_kernel_instr(const char *lvl, struct pt_regs *regs)
{
- dump_backtrace(NULL, tsk);
- barrier();
+ unsigned long addr = instruction_pointer(regs);
+ char str[sizeof("00000000 ") * 5 + 2 + 1], *p = str;
+ int i;
+
+ if (user_mode(regs))
+ return;
+
+ for (i = -4; i < 1; i++) {
+ unsigned int val, bad;
+
+ bad = aarch64_insn_read(&((u32 *)addr)[i], &val);
+
+ if (!bad)
+ p += sprintf(p, i == 0 ? "(%08x) " : "%08x ", val);
+ else
+ p += sprintf(p, i == 0 ? "(????????) " : "???????? ");
+ }
+
+ printk("%sCode: %s\n", lvl, str);
}
#ifdef CONFIG_PREEMPT
#define S_PREEMPT " PREEMPT"
+#elif defined(CONFIG_PREEMPT_RT)
+#define S_PREEMPT " PREEMPT_RT"
#else
#define S_PREEMPT ""
#endif
+
#define S_SMP " SMP"
-static int __die(const char *str, int err, struct pt_regs *regs)
+static int __die(const char *str, long err, struct pt_regs *regs)
{
static int die_counter;
int ret;
- pr_emerg("Internal error: %s: %x [#%d]" S_PREEMPT S_SMP "\n",
+ pr_emerg("Internal error: %s: %016lx [#%d]" S_PREEMPT S_SMP "\n",
str, err, ++die_counter);
/* trap and error numbers are mostly meaningless on ARM */
@@ -175,7 +208,7 @@ static DEFINE_RAW_SPINLOCK(die_lock);
/*
* This function is protected against re-entrancy.
*/
-void die(const char *str, struct pt_regs *regs, int err)
+void die(const char *str, struct pt_regs *regs, long err)
{
int ret;
unsigned long flags;
@@ -196,14 +229,14 @@ void die(const char *str, struct pt_regs *regs, int err)
oops_exit();
if (in_interrupt())
- panic("Fatal exception in interrupt");
+ panic("%s: Fatal exception in interrupt", str);
if (panic_on_oops)
- panic("Fatal exception");
+ panic("%s: Fatal exception", str);
raw_spin_unlock_irqrestore(&die_lock, flags);
if (ret != NOTIFY_STOP)
- do_exit(SIGSEGV);
+ make_task_dead(SIGSEGV);
}
static void arm64_show_signal(int signo, const char *str)
@@ -211,7 +244,7 @@ static void arm64_show_signal(int signo, const char *str)
static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
DEFAULT_RATELIMIT_BURST);
struct task_struct *tsk = current;
- unsigned int esr = tsk->thread.fault_code;
+ unsigned long esr = tsk->thread.fault_code;
struct pt_regs *regs = task_pt_regs(tsk);
/* Leave if the signal won't be shown */
@@ -222,7 +255,7 @@ static void arm64_show_signal(int signo, const char *str)
pr_info("%s[%d]: unhandled exception: ", tsk->comm, task_pid_nr(tsk));
if (esr)
- pr_cont("%s, ESR 0x%08x, ", esr_get_class_string(esr), esr);
+ pr_cont("%s, ESR 0x%016lx, ", esr_get_class_string(esr), esr);
pr_cont("%s", str);
print_vma_addr(KERN_CONT " in ", regs->pc);
@@ -230,102 +263,133 @@ static void arm64_show_signal(int signo, const char *str)
__show_regs(regs);
}
-void arm64_force_sig_fault(int signo, int code, void __user *addr,
+void arm64_force_sig_fault(int signo, int code, unsigned long far,
const char *str)
{
arm64_show_signal(signo, str);
if (signo == SIGKILL)
force_sig(SIGKILL);
else
- force_sig_fault(signo, code, addr);
+ force_sig_fault(signo, code, (void __user *)far);
}
-void arm64_force_sig_mceerr(int code, void __user *addr, short lsb,
+void arm64_force_sig_mceerr(int code, unsigned long far, short lsb,
const char *str)
{
arm64_show_signal(SIGBUS, str);
- force_sig_mceerr(code, addr, lsb);
+ force_sig_mceerr(code, (void __user *)far, lsb);
}
-void arm64_force_sig_ptrace_errno_trap(int errno, void __user *addr,
+void arm64_force_sig_ptrace_errno_trap(int errno, unsigned long far,
const char *str)
{
arm64_show_signal(SIGTRAP, str);
- force_sig_ptrace_errno_trap(errno, addr);
+ force_sig_ptrace_errno_trap(errno, (void __user *)far);
}
void arm64_notify_die(const char *str, struct pt_regs *regs,
- int signo, int sicode, void __user *addr,
- int err)
+ int signo, int sicode, unsigned long far,
+ unsigned long err)
{
if (user_mode(regs)) {
WARN_ON(regs != current_pt_regs());
current->thread.fault_address = 0;
current->thread.fault_code = err;
- arm64_force_sig_fault(signo, sicode, addr, str);
+ arm64_force_sig_fault(signo, sicode, far, str);
} else {
die(str, regs, err);
}
}
-void arm64_skip_faulting_instruction(struct pt_regs *regs, unsigned long size)
+#ifdef CONFIG_COMPAT
+#define PSTATE_IT_1_0_SHIFT 25
+#define PSTATE_IT_1_0_MASK (0x3 << PSTATE_IT_1_0_SHIFT)
+#define PSTATE_IT_7_2_SHIFT 10
+#define PSTATE_IT_7_2_MASK (0x3f << PSTATE_IT_7_2_SHIFT)
+
+static u32 compat_get_it_state(struct pt_regs *regs)
{
- regs->pc += size;
+ u32 it, pstate = regs->pstate;
- /*
- * If we were single stepping, we want to get the step exception after
- * we return from the trap.
- */
- if (user_mode(regs))
- user_fastforward_single_step(current);
+ it = (pstate & PSTATE_IT_1_0_MASK) >> PSTATE_IT_1_0_SHIFT;
+ it |= ((pstate & PSTATE_IT_7_2_MASK) >> PSTATE_IT_7_2_SHIFT) << 2;
+
+ return it;
}
-static LIST_HEAD(undef_hook);
-static DEFINE_RAW_SPINLOCK(undef_lock);
+static void compat_set_it_state(struct pt_regs *regs, u32 it)
+{
+ u32 pstate_it;
+
+ pstate_it = (it << PSTATE_IT_1_0_SHIFT) & PSTATE_IT_1_0_MASK;
+ pstate_it |= ((it >> 2) << PSTATE_IT_7_2_SHIFT) & PSTATE_IT_7_2_MASK;
+
+ regs->pstate &= ~PSR_AA32_IT_MASK;
+ regs->pstate |= pstate_it;
+}
-void register_undef_hook(struct undef_hook *hook)
+static void advance_itstate(struct pt_regs *regs)
{
- unsigned long flags;
+ u32 it;
+
+ /* ARM mode */
+ if (!(regs->pstate & PSR_AA32_T_BIT) ||
+ !(regs->pstate & PSR_AA32_IT_MASK))
+ return;
+
+ it = compat_get_it_state(regs);
- raw_spin_lock_irqsave(&undef_lock, flags);
- list_add(&hook->node, &undef_hook);
- raw_spin_unlock_irqrestore(&undef_lock, flags);
+ /*
+ * If this is the last instruction of the block, wipe the IT
+ * state. Otherwise advance it.
+ */
+ if (!(it & 7))
+ it = 0;
+ else
+ it = (it & 0xe0) | ((it << 1) & 0x1f);
+
+ compat_set_it_state(regs, it);
}
+#else
+static void advance_itstate(struct pt_regs *regs)
+{
+}
+#endif
-void unregister_undef_hook(struct undef_hook *hook)
+void arm64_skip_faulting_instruction(struct pt_regs *regs, unsigned long size)
{
- unsigned long flags;
+ regs->pc += size;
+
+ /*
+ * If we were single stepping, we want to get the step exception after
+ * we return from the trap.
+ */
+ if (user_mode(regs))
+ user_fastforward_single_step(current);
- raw_spin_lock_irqsave(&undef_lock, flags);
- list_del(&hook->node);
- raw_spin_unlock_irqrestore(&undef_lock, flags);
+ if (compat_user_mode(regs))
+ advance_itstate(regs);
+ else
+ regs->pstate &= ~PSR_BTYPE_MASK;
}
-static int call_undef_hook(struct pt_regs *regs)
+static int user_insn_read(struct pt_regs *regs, u32 *insnp)
{
- struct undef_hook *hook;
- unsigned long flags;
u32 instr;
- int (*fn)(struct pt_regs *regs, u32 instr) = NULL;
- void __user *pc = (void __user *)instruction_pointer(regs);
+ unsigned long pc = instruction_pointer(regs);
- if (!user_mode(regs)) {
- __le32 instr_le;
- if (probe_kernel_address((__force __le32 *)pc, instr_le))
- goto exit;
- instr = le32_to_cpu(instr_le);
- } else if (compat_thumb_mode(regs)) {
+ if (compat_thumb_mode(regs)) {
/* 16-bit Thumb instruction */
__le16 instr_le;
if (get_user(instr_le, (__le16 __user *)pc))
- goto exit;
+ return -EFAULT;
instr = le16_to_cpu(instr_le);
if (aarch32_insn_is_wide(instr)) {
u32 instr2;
if (get_user(instr_le, (__le16 __user *)(pc + 2)))
- goto exit;
+ return -EFAULT;
instr2 = le16_to_cpu(instr_le);
instr = (instr << 16) | instr2;
}
@@ -333,22 +397,15 @@ static int call_undef_hook(struct pt_regs *regs)
/* 32-bit ARM instruction */
__le32 instr_le;
if (get_user(instr_le, (__le32 __user *)pc))
- goto exit;
+ return -EFAULT;
instr = le32_to_cpu(instr_le);
}
- raw_spin_lock_irqsave(&undef_lock, flags);
- list_for_each_entry(hook, &undef_hook, node)
- if ((instr & hook->instr_mask) == hook->instr_val &&
- (regs->pstate & hook->pstate_mask) == hook->pstate_val)
- fn = hook->fn;
-
- raw_spin_unlock_irqrestore(&undef_lock, flags);
-exit:
- return fn ? fn(regs, instr) : 1;
+ *insnp = instr;
+ return 0;
}
-void force_signal_inject(int signal, int code, unsigned long address)
+void force_signal_inject(int signal, int code, unsigned long address, unsigned long err)
{
const char *desc;
struct pt_regs *regs = current_pt_regs();
@@ -374,7 +431,7 @@ void force_signal_inject(int signal, int code, unsigned long address)
signal = SIGKILL;
}
- arm64_notify_die(desc, regs, signal, code, (void __user *)address, 0);
+ arm64_notify_die(desc, regs, signal, code, address, err);
}
/*
@@ -384,32 +441,92 @@ void arm64_notify_segfault(unsigned long addr)
{
int code;
- down_read(&current->mm->mmap_sem);
- if (find_vma(current->mm, addr) == NULL)
+ mmap_read_lock(current->mm);
+ if (find_vma(current->mm, untagged_addr(addr)) == NULL)
code = SEGV_MAPERR;
else
code = SEGV_ACCERR;
- up_read(&current->mm->mmap_sem);
+ mmap_read_unlock(current->mm);
- force_signal_inject(SIGSEGV, code, addr);
+ force_signal_inject(SIGSEGV, code, addr, 0);
}
-void do_undefinstr(struct pt_regs *regs)
+void do_el0_undef(struct pt_regs *regs, unsigned long esr)
{
+ u32 insn;
+
/* check for AArch32 breakpoint instructions */
if (!aarch32_break_handler(regs))
return;
- if (call_undef_hook(regs) == 0)
+ if (user_insn_read(regs, &insn))
+ goto out_err;
+
+ if (try_emulate_mrs(regs, insn))
+ return;
+
+ if (try_emulate_armv8_deprecated(regs, insn))
return;
- BUG_ON(!user_mode(regs));
- force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc);
+out_err:
+ force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc, 0);
+}
+
+void do_el1_undef(struct pt_regs *regs, unsigned long esr)
+{
+ u32 insn;
+
+ if (aarch64_insn_read((void *)regs->pc, &insn))
+ goto out_err;
+
+ if (try_emulate_el1_ssbs(regs, insn))
+ return;
+
+out_err:
+ die("Oops - Undefined instruction", regs, esr);
+}
+
+void do_el0_bti(struct pt_regs *regs)
+{
+ force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc, 0);
+}
+
+void do_el1_bti(struct pt_regs *regs, unsigned long esr)
+{
+ if (efi_runtime_fixup_exception(regs, "BTI violation")) {
+ regs->pstate &= ~PSR_BTYPE_MASK;
+ return;
+ }
+ die("Oops - BTI", regs, esr);
+}
+
+void do_el0_fpac(struct pt_regs *regs, unsigned long esr)
+{
+ force_signal_inject(SIGILL, ILL_ILLOPN, regs->pc, esr);
+}
+
+void do_el1_fpac(struct pt_regs *regs, unsigned long esr)
+{
+ /*
+ * Unexpected FPAC exception in the kernel: kill the task before it
+ * does any more harm.
+ */
+ die("Oops - FPAC", regs, esr);
+}
+
+void do_el0_mops(struct pt_regs *regs, unsigned long esr)
+{
+ arm64_mops_reset_regs(&regs->user_regs, esr);
+
+ /*
+ * If single stepping then finish the step before executing the
+ * prologue instruction.
+ */
+ user_fastforward_single_step(current);
}
-NOKPROBE_SYMBOL(do_undefinstr);
#define __user_cache_maint(insn, address, res) \
- if (address >= user_addr_max()) { \
+ if (address >= TASK_SIZE_MAX) { \
res = -EFAULT; \
} else { \
uaccess_ttbr0_enable(); \
@@ -417,25 +534,21 @@ NOKPROBE_SYMBOL(do_undefinstr);
"1: " insn ", %1\n" \
" mov %w0, #0\n" \
"2:\n" \
- " .pushsection .fixup,\"ax\"\n" \
- " .align 2\n" \
- "3: mov %w0, %w2\n" \
- " b 2b\n" \
- " .popsection\n" \
- _ASM_EXTABLE(1b, 3b) \
+ _ASM_EXTABLE_UACCESS_ERR(1b, 2b, %w0) \
: "=r" (res) \
- : "r" (address), "i" (-EFAULT)); \
+ : "r" (address)); \
uaccess_ttbr0_disable(); \
}
-static void user_cache_maint_handler(unsigned int esr, struct pt_regs *regs)
+static void user_cache_maint_handler(unsigned long esr, struct pt_regs *regs)
{
- unsigned long address;
+ unsigned long tagged_address, address;
int rt = ESR_ELx_SYS64_ISS_RT(esr);
int crm = (esr & ESR_ELx_SYS64_ISS_CRM_MASK) >> ESR_ELx_SYS64_ISS_CRM_SHIFT;
int ret = 0;
- address = untagged_addr(pt_regs_read_reg(regs, rt));
+ tagged_address = pt_regs_read_reg(regs, rt);
+ address = untagged_addr(tagged_address);
switch (crm) {
case ESR_ELx_SYS64_ISS_CRM_DC_CVAU: /* DC CVAU, gets promoted */
@@ -457,28 +570,28 @@ static void user_cache_maint_handler(unsigned int esr, struct pt_regs *regs)
__user_cache_maint("ic ivau", address, ret);
break;
default:
- force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc);
+ force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc, 0);
return;
}
if (ret)
- arm64_notify_segfault(address);
+ arm64_notify_segfault(tagged_address);
else
arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE);
}
-static void ctr_read_handler(unsigned int esr, struct pt_regs *regs)
+static void ctr_read_handler(unsigned long esr, struct pt_regs *regs)
{
int rt = ESR_ELx_SYS64_ISS_RT(esr);
unsigned long val = arm64_ftr_reg_user_value(&arm64_ftr_reg_ctrel0);
- if (cpus_have_const_cap(ARM64_WORKAROUND_1542419)) {
+ if (cpus_have_final_cap(ARM64_WORKAROUND_1542419)) {
/* Hide DIC so that we can trap the unnecessary maintenance...*/
- val &= ~BIT(CTR_DIC_SHIFT);
+ val &= ~BIT(CTR_EL0_DIC_SHIFT);
/* ... and fake IminLine to reduce the number of traps. */
- val &= ~CTR_IMINLINE_MASK;
- val |= (PAGE_SHIFT - 2) & CTR_IMINLINE_MASK;
+ val &= ~CTR_EL0_IminLine_MASK;
+ val |= (PAGE_SHIFT - 2) & CTR_EL0_IminLine_MASK;
}
pt_regs_write_reg(regs, rt, val);
@@ -486,7 +599,7 @@ static void ctr_read_handler(unsigned int esr, struct pt_regs *regs)
arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE);
}
-static void cntvct_read_handler(unsigned int esr, struct pt_regs *regs)
+static void cntvct_read_handler(unsigned long esr, struct pt_regs *regs)
{
int rt = ESR_ELx_SYS64_ISS_RT(esr);
@@ -494,7 +607,7 @@ static void cntvct_read_handler(unsigned int esr, struct pt_regs *regs)
arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE);
}
-static void cntfrq_read_handler(unsigned int esr, struct pt_regs *regs)
+static void cntfrq_read_handler(unsigned long esr, struct pt_regs *regs)
{
int rt = ESR_ELx_SYS64_ISS_RT(esr);
@@ -502,7 +615,7 @@ static void cntfrq_read_handler(unsigned int esr, struct pt_regs *regs)
arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE);
}
-static void mrs_handler(unsigned int esr, struct pt_regs *regs)
+static void mrs_handler(unsigned long esr, struct pt_regs *regs)
{
u32 sysreg, rt;
@@ -510,18 +623,18 @@ static void mrs_handler(unsigned int esr, struct pt_regs *regs)
sysreg = esr_sys64_to_sysreg(esr);
if (do_emulate_mrs(regs, sysreg, rt) != 0)
- force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc);
+ force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc, 0);
}
-static void wfi_handler(unsigned int esr, struct pt_regs *regs)
+static void wfi_handler(unsigned long esr, struct pt_regs *regs)
{
arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE);
}
struct sys64_hook {
- unsigned int esr_mask;
- unsigned int esr_val;
- void (*handler)(unsigned int esr, struct pt_regs *regs);
+ unsigned long esr_mask;
+ unsigned long esr_val;
+ void (*handler)(unsigned long esr, struct pt_regs *regs);
};
static const struct sys64_hook sys64_hooks[] = {
@@ -543,6 +656,12 @@ static const struct sys64_hook sys64_hooks[] = {
.handler = cntvct_read_handler,
},
{
+ /* Trap read access to CNTVCTSS_EL0 */
+ .esr_mask = ESR_ELx_SYS64_ISS_SYS_OP_MASK,
+ .esr_val = ESR_ELx_SYS64_ISS_SYS_CNTVCTSS,
+ .handler = cntvct_read_handler,
+ },
+ {
/* Trap read access to CNTFRQ_EL0 */
.esr_mask = ESR_ELx_SYS64_ISS_SYS_OP_MASK,
.esr_val = ESR_ELx_SYS64_ISS_SYS_CNTFRQ,
@@ -563,35 +682,8 @@ static const struct sys64_hook sys64_hooks[] = {
{},
};
-
#ifdef CONFIG_COMPAT
-#define PSTATE_IT_1_0_SHIFT 25
-#define PSTATE_IT_1_0_MASK (0x3 << PSTATE_IT_1_0_SHIFT)
-#define PSTATE_IT_7_2_SHIFT 10
-#define PSTATE_IT_7_2_MASK (0x3f << PSTATE_IT_7_2_SHIFT)
-
-static u32 compat_get_it_state(struct pt_regs *regs)
-{
- u32 it, pstate = regs->pstate;
-
- it = (pstate & PSTATE_IT_1_0_MASK) >> PSTATE_IT_1_0_SHIFT;
- it |= ((pstate & PSTATE_IT_7_2_MASK) >> PSTATE_IT_7_2_SHIFT) << 2;
-
- return it;
-}
-
-static void compat_set_it_state(struct pt_regs *regs, u32 it)
-{
- u32 pstate_it;
-
- pstate_it = (it << PSTATE_IT_1_0_SHIFT) & PSTATE_IT_1_0_MASK;
- pstate_it |= ((it >> 2) << PSTATE_IT_7_2_SHIFT) & PSTATE_IT_7_2_MASK;
-
- regs->pstate &= ~PSR_AA32_IT_MASK;
- regs->pstate |= pstate_it;
-}
-
-static bool cp15_cond_valid(unsigned int esr, struct pt_regs *regs)
+static bool cp15_cond_valid(unsigned long esr, struct pt_regs *regs)
{
int cond;
@@ -611,42 +703,12 @@ static bool cp15_cond_valid(unsigned int esr, struct pt_regs *regs)
return aarch32_opcode_cond_checks[cond](regs->pstate);
}
-static void advance_itstate(struct pt_regs *regs)
-{
- u32 it;
-
- /* ARM mode */
- if (!(regs->pstate & PSR_AA32_T_BIT) ||
- !(regs->pstate & PSR_AA32_IT_MASK))
- return;
-
- it = compat_get_it_state(regs);
-
- /*
- * If this is the last instruction of the block, wipe the IT
- * state. Otherwise advance it.
- */
- if (!(it & 7))
- it = 0;
- else
- it = (it & 0xe0) | ((it << 1) & 0x1f);
-
- compat_set_it_state(regs, it);
-}
-
-static void arm64_compat_skip_faulting_instruction(struct pt_regs *regs,
- unsigned int sz)
-{
- advance_itstate(regs);
- arm64_skip_faulting_instruction(regs, sz);
-}
-
-static void compat_cntfrq_read_handler(unsigned int esr, struct pt_regs *regs)
+static void compat_cntfrq_read_handler(unsigned long esr, struct pt_regs *regs)
{
int reg = (esr & ESR_ELx_CP15_32_ISS_RT_MASK) >> ESR_ELx_CP15_32_ISS_RT_SHIFT;
pt_regs_write_reg(regs, reg, arch_timer_get_rate());
- arm64_compat_skip_faulting_instruction(regs, 4);
+ arm64_skip_faulting_instruction(regs, 4);
}
static const struct sys64_hook cp15_32_hooks[] = {
@@ -658,7 +720,7 @@ static const struct sys64_hook cp15_32_hooks[] = {
{},
};
-static void compat_cntvct_read_handler(unsigned int esr, struct pt_regs *regs)
+static void compat_cntvct_read_handler(unsigned long esr, struct pt_regs *regs)
{
int rt = (esr & ESR_ELx_CP15_64_ISS_RT_MASK) >> ESR_ELx_CP15_64_ISS_RT_SHIFT;
int rt2 = (esr & ESR_ELx_CP15_64_ISS_RT2_MASK) >> ESR_ELx_CP15_64_ISS_RT2_SHIFT;
@@ -666,7 +728,7 @@ static void compat_cntvct_read_handler(unsigned int esr, struct pt_regs *regs)
pt_regs_write_reg(regs, rt, lower_32_bits(val));
pt_regs_write_reg(regs, rt2, upper_32_bits(val));
- arm64_compat_skip_faulting_instruction(regs, 4);
+ arm64_skip_faulting_instruction(regs, 4);
}
static const struct sys64_hook cp15_64_hooks[] = {
@@ -675,10 +737,15 @@ static const struct sys64_hook cp15_64_hooks[] = {
.esr_val = ESR_ELx_CP15_64_ISS_SYS_CNTVCT,
.handler = compat_cntvct_read_handler,
},
+ {
+ .esr_mask = ESR_ELx_CP15_64_ISS_SYS_MASK,
+ .esr_val = ESR_ELx_CP15_64_ISS_SYS_CNTVCTSS,
+ .handler = compat_cntvct_read_handler,
+ },
{},
};
-void do_cp15instr(unsigned int esr, struct pt_regs *regs)
+void do_el0_cp15(unsigned long esr, struct pt_regs *regs)
{
const struct sys64_hook *hook, *hook_base;
@@ -687,7 +754,7 @@ void do_cp15instr(unsigned int esr, struct pt_regs *regs)
* There is no T16 variant of a CP access, so we
* always advance PC by 4 bytes.
*/
- arm64_compat_skip_faulting_instruction(regs, 4);
+ arm64_skip_faulting_instruction(regs, 4);
return;
}
@@ -699,7 +766,7 @@ void do_cp15instr(unsigned int esr, struct pt_regs *regs)
hook_base = cp15_64_hooks;
break;
default:
- do_undefinstr(regs);
+ do_el0_undef(regs, esr);
return;
}
@@ -714,12 +781,11 @@ void do_cp15instr(unsigned int esr, struct pt_regs *regs)
* EL0. Fall back to our usual undefined instruction handler
* so that we handle these consistently.
*/
- do_undefinstr(regs);
+ do_el0_undef(regs, esr);
}
-NOKPROBE_SYMBOL(do_cp15instr);
#endif
-void do_sysinstr(unsigned int esr, struct pt_regs *regs)
+void do_el0_sys(unsigned long esr, struct pt_regs *regs)
{
const struct sys64_hook *hook;
@@ -734,9 +800,8 @@ void do_sysinstr(unsigned int esr, struct pt_regs *regs)
* back to our usual undefined instruction handler so that we handle
* these consistently.
*/
- do_undefinstr(regs);
+ do_el0_undef(regs, esr);
}
-NOKPROBE_SYMBOL(do_sysinstr);
static const char *esr_class_str[] = {
[0 ... ESR_ELx_EC_MAX] = "UNRECOGNIZED EC",
@@ -750,6 +815,7 @@ static const char *esr_class_str[] = {
[ESR_ELx_EC_CP10_ID] = "CP10 MRC/VMRS",
[ESR_ELx_EC_PAC] = "PAC",
[ESR_ELx_EC_CP14_64] = "CP14 MCRR/MRRC",
+ [ESR_ELx_EC_BTI] = "BTI",
[ESR_ELx_EC_ILL] = "PSTATE.IL",
[ESR_ELx_EC_SVC32] = "SVC (AArch32)",
[ESR_ELx_EC_HVC32] = "HVC (AArch32)",
@@ -760,6 +826,8 @@ static const char *esr_class_str[] = {
[ESR_ELx_EC_SYS64] = "MSR/MRS (AArch64)",
[ESR_ELx_EC_SVE] = "SVE",
[ESR_ELx_EC_ERET] = "ERET/ERETAA/ERETAB",
+ [ESR_ELx_EC_FPAC] = "FPAC",
+ [ESR_ELx_EC_SME] = "SME",
[ESR_ELx_EC_IMP_DEF] = "EL3 IMP DEF",
[ESR_ELx_EC_IABT_LOW] = "IABT (lower EL)",
[ESR_ELx_EC_IABT_CUR] = "IABT (current EL)",
@@ -767,6 +835,7 @@ static const char *esr_class_str[] = {
[ESR_ELx_EC_DABT_LOW] = "DABT (lower EL)",
[ESR_ELx_EC_DABT_CUR] = "DABT (current EL)",
[ESR_ELx_EC_SP_ALIGN] = "SP Alignment",
+ [ESR_ELx_EC_MOPS] = "MOPS",
[ESR_ELx_EC_FP_EXC32] = "FP (AArch32)",
[ESR_ELx_EC_FP_EXC64] = "FP (AArch64)",
[ESR_ELx_EC_SERROR] = "SError",
@@ -781,34 +850,18 @@ static const char *esr_class_str[] = {
[ESR_ELx_EC_BRK64] = "BRK (AArch64)",
};
-const char *esr_get_class_string(u32 esr)
+const char *esr_get_class_string(unsigned long esr)
{
return esr_class_str[ESR_ELx_EC(esr)];
}
/*
- * bad_mode handles the impossible case in the exception vector. This is always
- * fatal.
- */
-asmlinkage void bad_mode(struct pt_regs *regs, int reason, unsigned int esr)
-{
- console_verbose();
-
- pr_crit("Bad mode in %s handler detected on CPU%d, code 0x%08x -- %s\n",
- handler[reason], smp_processor_id(), esr,
- esr_get_class_string(esr));
-
- local_daif_mask();
- panic("bad mode");
-}
-
-/*
* bad_el0_sync handles unexpected, but potentially recoverable synchronous
- * exceptions taken from EL0. Unlike bad_mode, this returns.
+ * exceptions taken from EL0.
*/
-void bad_el0_sync(struct pt_regs *regs, int reason, unsigned int esr)
+void bad_el0_sync(struct pt_regs *regs, int reason, unsigned long esr)
{
- void __user *pc = (void __user *)instruction_pointer(regs);
+ unsigned long pc = instruction_pointer(regs);
current->thread.fault_address = 0;
current->thread.fault_code = esr;
@@ -822,24 +875,22 @@ void bad_el0_sync(struct pt_regs *regs, int reason, unsigned int esr)
DEFINE_PER_CPU(unsigned long [OVERFLOW_STACK_SIZE/sizeof(long)], overflow_stack)
__aligned(16);
-asmlinkage void handle_bad_stack(struct pt_regs *regs)
+void __noreturn panic_bad_stack(struct pt_regs *regs, unsigned long esr, unsigned long far)
{
unsigned long tsk_stk = (unsigned long)current->stack;
unsigned long irq_stk = (unsigned long)this_cpu_read(irq_stack_ptr);
unsigned long ovf_stk = (unsigned long)this_cpu_ptr(overflow_stack);
- unsigned int esr = read_sysreg(esr_el1);
- unsigned long far = read_sysreg(far_el1);
console_verbose();
pr_emerg("Insufficient stack space to handle exception!");
- pr_emerg("ESR: 0x%08x -- %s\n", esr, esr_get_class_string(esr));
+ pr_emerg("ESR: 0x%016lx -- %s\n", esr, esr_get_class_string(esr));
pr_emerg("FAR: 0x%016lx\n", far);
pr_emerg("Task stack: [0x%016lx..0x%016lx]\n",
tsk_stk, tsk_stk + THREAD_SIZE);
pr_emerg("IRQ stack: [0x%016lx..0x%016lx]\n",
- irq_stk, irq_stk + THREAD_SIZE);
+ irq_stk, irq_stk + IRQ_STACK_SIZE);
pr_emerg("Overflow stack: [0x%016lx..0x%016lx]\n",
ovf_stk, ovf_stk + OVERFLOW_STACK_SIZE);
@@ -854,11 +905,11 @@ asmlinkage void handle_bad_stack(struct pt_regs *regs)
}
#endif
-void __noreturn arm64_serror_panic(struct pt_regs *regs, u32 esr)
+void __noreturn arm64_serror_panic(struct pt_regs *regs, unsigned long esr)
{
console_verbose();
- pr_crit("SError Interrupt on CPU%d, code 0x%08x -- %s\n",
+ pr_crit("SError Interrupt on CPU%d, code 0x%016lx -- %s\n",
smp_processor_id(), esr, esr_get_class_string(esr));
if (regs)
__show_regs(regs);
@@ -866,12 +917,11 @@ void __noreturn arm64_serror_panic(struct pt_regs *regs, u32 esr)
nmi_panic(regs, "Asynchronous SError Interrupt");
cpu_park_loop();
- unreachable();
}
-bool arm64_is_fatal_ras_serror(struct pt_regs *regs, unsigned int esr)
+bool arm64_is_fatal_ras_serror(struct pt_regs *regs, unsigned long esr)
{
- u32 aet = arm64_ras_serror_get_severity(esr);
+ unsigned long aet = arm64_ras_serror_get_severity(esr);
switch (aet) {
case ESR_ELx_AET_CE: /* corrected error */
@@ -901,50 +951,15 @@ bool arm64_is_fatal_ras_serror(struct pt_regs *regs, unsigned int esr)
}
}
-asmlinkage void do_serror(struct pt_regs *regs, unsigned int esr)
+void do_serror(struct pt_regs *regs, unsigned long esr)
{
- const bool was_in_nmi = in_nmi();
-
- if (!was_in_nmi)
- nmi_enter();
-
/* non-RAS errors are not containable */
if (!arm64_is_ras_serror(esr) || arm64_is_fatal_ras_serror(regs, esr))
arm64_serror_panic(regs, esr);
-
- if (!was_in_nmi)
- nmi_exit();
-}
-
-asmlinkage void enter_from_user_mode(void)
-{
- CT_WARN_ON(ct_state() != CONTEXT_USER);
- user_exit_irqoff();
-}
-NOKPROBE_SYMBOL(enter_from_user_mode);
-
-void __pte_error(const char *file, int line, unsigned long val)
-{
- pr_err("%s:%d: bad pte %016lx.\n", file, line, val);
-}
-
-void __pmd_error(const char *file, int line, unsigned long val)
-{
- pr_err("%s:%d: bad pmd %016lx.\n", file, line, val);
-}
-
-void __pud_error(const char *file, int line, unsigned long val)
-{
- pr_err("%s:%d: bad pud %016lx.\n", file, line, val);
-}
-
-void __pgd_error(const char *file, int line, unsigned long val)
-{
- pr_err("%s:%d: bad pgd %016lx.\n", file, line, val);
}
/* GENERIC_BUG traps */
-
+#ifdef CONFIG_GENERIC_BUG
int is_valid_bugaddr(unsigned long addr)
{
/*
@@ -956,12 +971,13 @@ int is_valid_bugaddr(unsigned long addr)
*/
return 1;
}
+#endif
-static int bug_handler(struct pt_regs *regs, unsigned int esr)
+static int bug_handler(struct pt_regs *regs, unsigned long esr)
{
switch (report_bug(regs->pc, regs)) {
case BUG_TRAP_TYPE_BUG:
- die("Oops - BUG", regs, 0);
+ die("Oops - BUG", regs, esr);
break;
case BUG_TRAP_TYPE_WARN:
@@ -982,6 +998,53 @@ static struct break_hook bug_break_hook = {
.imm = BUG_BRK_IMM,
};
+#ifdef CONFIG_CFI_CLANG
+static int cfi_handler(struct pt_regs *regs, unsigned long esr)
+{
+ unsigned long target;
+ u32 type;
+
+ target = pt_regs_read_reg(regs, FIELD_GET(CFI_BRK_IMM_TARGET, esr));
+ type = (u32)pt_regs_read_reg(regs, FIELD_GET(CFI_BRK_IMM_TYPE, esr));
+
+ switch (report_cfi_failure(regs, regs->pc, &target, type)) {
+ case BUG_TRAP_TYPE_BUG:
+ die("Oops - CFI", regs, esr);
+ break;
+
+ case BUG_TRAP_TYPE_WARN:
+ break;
+
+ default:
+ return DBG_HOOK_ERROR;
+ }
+
+ arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE);
+ return DBG_HOOK_HANDLED;
+}
+
+static struct break_hook cfi_break_hook = {
+ .fn = cfi_handler,
+ .imm = CFI_BRK_IMM_BASE,
+ .mask = CFI_BRK_IMM_MASK,
+};
+#endif /* CONFIG_CFI_CLANG */
+
+static int reserved_fault_handler(struct pt_regs *regs, unsigned long esr)
+{
+ pr_err("%s generated an invalid instruction at %pS!\n",
+ "Kernel text patching",
+ (void *)instruction_pointer(regs));
+
+ /* We cannot handle this */
+ return DBG_HOOK_ERROR;
+}
+
+static struct break_hook fault_break_hook = {
+ .fn = reserved_fault_handler,
+ .imm = FAULT_BRK_IMM,
+};
+
#ifdef CONFIG_KASAN_SW_TAGS
#define KASAN_ESR_RECOVER 0x20
@@ -989,12 +1052,12 @@ static struct break_hook bug_break_hook = {
#define KASAN_ESR_SIZE_MASK 0x0f
#define KASAN_ESR_SIZE(esr) (1 << ((esr) & KASAN_ESR_SIZE_MASK))
-static int kasan_handler(struct pt_regs *regs, unsigned int esr)
+static int kasan_handler(struct pt_regs *regs, unsigned long esr)
{
bool recover = esr & KASAN_ESR_RECOVER;
bool write = esr & KASAN_ESR_WRITE;
size_t size = KASAN_ESR_SIZE(esr);
- u64 addr = regs->regs[0];
+ void *addr = (void *)regs->regs[0];
u64 pc = regs->pc;
kasan_report(addr, size, write, pc);
@@ -1014,7 +1077,7 @@ static int kasan_handler(struct pt_regs *regs, unsigned int esr)
* This is something that might be fixed at some point in the future.
*/
if (!recover)
- die("Oops - KASAN", regs, 0);
+ die("Oops - KASAN", regs, esr);
/* If thread survives, skip over the brk instruction and continue: */
arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE);
@@ -1028,27 +1091,56 @@ static struct break_hook kasan_break_hook = {
};
#endif
+#ifdef CONFIG_UBSAN_TRAP
+static int ubsan_handler(struct pt_regs *regs, unsigned long esr)
+{
+ die(report_ubsan_failure(regs, esr & UBSAN_BRK_MASK), regs, esr);
+ return DBG_HOOK_HANDLED;
+}
+
+static struct break_hook ubsan_break_hook = {
+ .fn = ubsan_handler,
+ .imm = UBSAN_BRK_IMM,
+ .mask = UBSAN_BRK_MASK,
+};
+#endif
+
+#define esr_comment(esr) ((esr) & ESR_ELx_BRK64_ISS_COMMENT_MASK)
+
/*
* Initial handler for AArch64 BRK exceptions
* This handler only used until debug_traps_init().
*/
-int __init early_brk64(unsigned long addr, unsigned int esr,
+int __init early_brk64(unsigned long addr, unsigned long esr,
struct pt_regs *regs)
{
+#ifdef CONFIG_CFI_CLANG
+ if ((esr_comment(esr) & ~CFI_BRK_IMM_MASK) == CFI_BRK_IMM_BASE)
+ return cfi_handler(regs, esr) != DBG_HOOK_HANDLED;
+#endif
#ifdef CONFIG_KASAN_SW_TAGS
- unsigned int comment = esr & ESR_ELx_BRK64_ISS_COMMENT_MASK;
-
- if ((comment & ~KASAN_BRK_MASK) == KASAN_BRK_IMM)
+ if ((esr_comment(esr) & ~KASAN_BRK_MASK) == KASAN_BRK_IMM)
return kasan_handler(regs, esr) != DBG_HOOK_HANDLED;
#endif
+#ifdef CONFIG_UBSAN_TRAP
+ if ((esr_comment(esr) & ~UBSAN_BRK_MASK) == UBSAN_BRK_IMM)
+ return ubsan_handler(regs, esr) != DBG_HOOK_HANDLED;
+#endif
return bug_handler(regs, esr) != DBG_HOOK_HANDLED;
}
-/* This registration must happen early, before debug_traps_init(). */
void __init trap_init(void)
{
register_kernel_break_hook(&bug_break_hook);
+#ifdef CONFIG_CFI_CLANG
+ register_kernel_break_hook(&cfi_break_hook);
+#endif
+ register_kernel_break_hook(&fault_break_hook);
#ifdef CONFIG_KASAN_SW_TAGS
register_kernel_break_hook(&kasan_break_hook);
#endif
+#ifdef CONFIG_UBSAN_TRAP
+ register_kernel_break_hook(&ubsan_break_hook);
+#endif
+ debug_traps_init();
}
diff --git a/arch/arm64/kernel/vdso/vdso.S b/arch/arm64/kernel/vdso-wrap.S
index d1414fee5274..c4b1990bf2be 100644
--- a/arch/arm64/kernel/vdso/vdso.S
+++ b/arch/arm64/kernel/vdso-wrap.S
@@ -8,6 +8,7 @@
#include <linux/init.h>
#include <linux/linkage.h>
#include <linux/const.h>
+#include <asm/assembler.h>
#include <asm/page.h>
.globl vdso_start, vdso_end
@@ -19,3 +20,5 @@ vdso_start:
vdso_end:
.previous
+
+emit_aarch64_feature_1_and
diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c
index 354b11e27c07..5562daf38a22 100644
--- a/arch/arm64/kernel/vdso.c
+++ b/arch/arm64/kernel/vdso.c
@@ -18,6 +18,7 @@
#include <linux/sched.h>
#include <linux/signal.h>
#include <linux/slab.h>
+#include <linux/time_namespace.h>
#include <linux/timekeeper_internal.h>
#include <linux/vmalloc.h>
#include <vdso/datapage.h>
@@ -28,25 +29,18 @@
#include <asm/signal32.h>
#include <asm/vdso.h>
-extern char vdso_start[], vdso_end[];
-#ifdef CONFIG_COMPAT_VDSO
-extern char vdso32_start[], vdso32_end[];
-#endif /* CONFIG_COMPAT_VDSO */
+enum vdso_abi {
+ VDSO_ABI_AA64,
+ VDSO_ABI_AA32,
+};
-/* vdso_lookup arch_index */
-enum arch_vdso_type {
- ARM64_VDSO = 0,
-#ifdef CONFIG_COMPAT_VDSO
- ARM64_VDSO32 = 1,
-#endif /* CONFIG_COMPAT_VDSO */
+enum vvar_pages {
+ VVAR_DATA_PAGE_OFFSET,
+ VVAR_TIMENS_PAGE_OFFSET,
+ VVAR_NR_PAGES,
};
-#ifdef CONFIG_COMPAT_VDSO
-#define VDSO_TYPES (ARM64_VDSO32 + 1)
-#else
-#define VDSO_TYPES (ARM64_VDSO + 1)
-#endif /* CONFIG_COMPAT_VDSO */
-struct __vdso_abi {
+struct vdso_abi_info {
const char *name;
const char *vdso_code_start;
const char *vdso_code_end;
@@ -57,14 +51,14 @@ struct __vdso_abi {
struct vm_special_mapping *cm;
};
-static struct __vdso_abi vdso_lookup[VDSO_TYPES] __ro_after_init = {
- {
+static struct vdso_abi_info vdso_info[] __ro_after_init = {
+ [VDSO_ABI_AA64] = {
.name = "vdso",
.vdso_code_start = vdso_start,
.vdso_code_end = vdso_end,
},
#ifdef CONFIG_COMPAT_VDSO
- {
+ [VDSO_ABI_AA32] = {
.name = "vdso32",
.vdso_code_start = vdso32_start,
.vdso_code_end = vdso32_end,
@@ -81,72 +75,130 @@ static union {
} vdso_data_store __page_aligned_data;
struct vdso_data *vdso_data = vdso_data_store.data;
-static int __vdso_remap(enum arch_vdso_type arch_index,
- const struct vm_special_mapping *sm,
- struct vm_area_struct *new_vma)
+static int vdso_mremap(const struct vm_special_mapping *sm,
+ struct vm_area_struct *new_vma)
{
- unsigned long new_size = new_vma->vm_end - new_vma->vm_start;
- unsigned long vdso_size = vdso_lookup[arch_index].vdso_code_end -
- vdso_lookup[arch_index].vdso_code_start;
-
- if (vdso_size != new_size)
- return -EINVAL;
-
current->mm->context.vdso = (void *)new_vma->vm_start;
return 0;
}
-static int __vdso_init(enum arch_vdso_type arch_index)
+static int __init __vdso_init(enum vdso_abi abi)
{
int i;
struct page **vdso_pagelist;
unsigned long pfn;
- if (memcmp(vdso_lookup[arch_index].vdso_code_start, "\177ELF", 4)) {
+ if (memcmp(vdso_info[abi].vdso_code_start, "\177ELF", 4)) {
pr_err("vDSO is not a valid ELF object!\n");
return -EINVAL;
}
- vdso_lookup[arch_index].vdso_pages = (
- vdso_lookup[arch_index].vdso_code_end -
- vdso_lookup[arch_index].vdso_code_start) >>
+ vdso_info[abi].vdso_pages = (
+ vdso_info[abi].vdso_code_end -
+ vdso_info[abi].vdso_code_start) >>
PAGE_SHIFT;
- /* Allocate the vDSO pagelist, plus a page for the data. */
- vdso_pagelist = kcalloc(vdso_lookup[arch_index].vdso_pages + 1,
+ vdso_pagelist = kcalloc(vdso_info[abi].vdso_pages,
sizeof(struct page *),
GFP_KERNEL);
if (vdso_pagelist == NULL)
return -ENOMEM;
- /* Grab the vDSO data page. */
- vdso_pagelist[0] = phys_to_page(__pa_symbol(vdso_data));
+ /* Grab the vDSO code pages. */
+ pfn = sym_to_pfn(vdso_info[abi].vdso_code_start);
+ for (i = 0; i < vdso_info[abi].vdso_pages; i++)
+ vdso_pagelist[i] = pfn_to_page(pfn + i);
- /* Grab the vDSO code pages. */
- pfn = sym_to_pfn(vdso_lookup[arch_index].vdso_code_start);
+ vdso_info[abi].cm->pages = vdso_pagelist;
+
+ return 0;
+}
+
+#ifdef CONFIG_TIME_NS
+struct vdso_data *arch_get_vdso_data(void *vvar_page)
+{
+ return (struct vdso_data *)(vvar_page);
+}
+
+/*
+ * The vvar mapping contains data for a specific time namespace, so when a task
+ * changes namespace we must unmap its vvar data for the old namespace.
+ * Subsequent faults will map in data for the new namespace.
+ *
+ * For more details see timens_setup_vdso_data().
+ */
+int vdso_join_timens(struct task_struct *task, struct time_namespace *ns)
+{
+ struct mm_struct *mm = task->mm;
+ struct vm_area_struct *vma;
+ VMA_ITERATOR(vmi, mm, 0);
- for (i = 0; i < vdso_lookup[arch_index].vdso_pages; i++)
- vdso_pagelist[i + 1] = pfn_to_page(pfn + i);
+ mmap_read_lock(mm);
- vdso_lookup[arch_index].dm->pages = &vdso_pagelist[0];
- vdso_lookup[arch_index].cm->pages = &vdso_pagelist[1];
+ for_each_vma(vmi, vma) {
+ if (vma_is_special_mapping(vma, vdso_info[VDSO_ABI_AA64].dm))
+ zap_vma_pages(vma);
+#ifdef CONFIG_COMPAT_VDSO
+ if (vma_is_special_mapping(vma, vdso_info[VDSO_ABI_AA32].dm))
+ zap_vma_pages(vma);
+#endif
+ }
+ mmap_read_unlock(mm);
return 0;
}
+#endif
+
+static vm_fault_t vvar_fault(const struct vm_special_mapping *sm,
+ struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+ struct page *timens_page = find_timens_vvar_page(vma);
+ unsigned long pfn;
+
+ switch (vmf->pgoff) {
+ case VVAR_DATA_PAGE_OFFSET:
+ if (timens_page)
+ pfn = page_to_pfn(timens_page);
+ else
+ pfn = sym_to_pfn(vdso_data);
+ break;
+#ifdef CONFIG_TIME_NS
+ case VVAR_TIMENS_PAGE_OFFSET:
+ /*
+ * If a task belongs to a time namespace then a namespace
+ * specific VVAR is mapped with the VVAR_DATA_PAGE_OFFSET and
+ * the real VVAR page is mapped with the VVAR_TIMENS_PAGE_OFFSET
+ * offset.
+ * See also the comment near timens_setup_vdso_data().
+ */
+ if (!timens_page)
+ return VM_FAULT_SIGBUS;
+ pfn = sym_to_pfn(vdso_data);
+ break;
+#endif /* CONFIG_TIME_NS */
+ default:
+ return VM_FAULT_SIGBUS;
+ }
-static int __setup_additional_pages(enum arch_vdso_type arch_index,
+ return vmf_insert_pfn(vma, vmf->address, pfn);
+}
+
+static int __setup_additional_pages(enum vdso_abi abi,
struct mm_struct *mm,
struct linux_binprm *bprm,
int uses_interp)
{
unsigned long vdso_base, vdso_text_len, vdso_mapping_len;
+ unsigned long gp_flags = 0;
void *ret;
- vdso_text_len = vdso_lookup[arch_index].vdso_pages << PAGE_SHIFT;
+ BUILD_BUG_ON(VVAR_NR_PAGES != __VVAR_PAGES);
+
+ vdso_text_len = vdso_info[abi].vdso_pages << PAGE_SHIFT;
/* Be sure to map the data page */
- vdso_mapping_len = vdso_text_len + PAGE_SIZE;
+ vdso_mapping_len = vdso_text_len + VVAR_NR_PAGES * PAGE_SIZE;
vdso_base = get_unmapped_area(NULL, 0, vdso_mapping_len, 0, 0);
if (IS_ERR_VALUE(vdso_base)) {
@@ -154,18 +206,21 @@ static int __setup_additional_pages(enum arch_vdso_type arch_index,
goto up_fail;
}
- ret = _install_special_mapping(mm, vdso_base, PAGE_SIZE,
- VM_READ|VM_MAYREAD,
- vdso_lookup[arch_index].dm);
+ ret = _install_special_mapping(mm, vdso_base, VVAR_NR_PAGES * PAGE_SIZE,
+ VM_READ|VM_MAYREAD|VM_PFNMAP,
+ vdso_info[abi].dm);
if (IS_ERR(ret))
goto up_fail;
- vdso_base += PAGE_SIZE;
+ if (system_supports_bti_kernel())
+ gp_flags = VM_ARM64_BTI;
+
+ vdso_base += VVAR_NR_PAGES * PAGE_SIZE;
mm->context.vdso = (void *)vdso_base;
ret = _install_special_mapping(mm, vdso_base, vdso_text_len,
- VM_READ|VM_EXEC|
+ VM_READ|VM_EXEC|gp_flags|
VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
- vdso_lookup[arch_index].cm);
+ vdso_info[abi].cm);
if (IS_ERR(ret))
goto up_fail;
@@ -180,52 +235,42 @@ up_fail:
/*
* Create and map the vectors page for AArch32 tasks.
*/
-#ifdef CONFIG_COMPAT_VDSO
-static int aarch32_vdso_mremap(const struct vm_special_mapping *sm,
- struct vm_area_struct *new_vma)
+enum aarch32_map {
+ AA32_MAP_VECTORS, /* kuser helpers */
+ AA32_MAP_SIGPAGE,
+ AA32_MAP_VVAR,
+ AA32_MAP_VDSO,
+};
+
+static struct page *aarch32_vectors_page __ro_after_init;
+static struct page *aarch32_sig_page __ro_after_init;
+
+static int aarch32_sigpage_mremap(const struct vm_special_mapping *sm,
+ struct vm_area_struct *new_vma)
{
- return __vdso_remap(ARM64_VDSO32, sm, new_vma);
+ current->mm->context.sigpage = (void *)new_vma->vm_start;
+
+ return 0;
}
-#endif /* CONFIG_COMPAT_VDSO */
-/*
- * aarch32_vdso_pages:
- * 0 - kuser helpers
- * 1 - sigreturn code
- * or (CONFIG_COMPAT_VDSO):
- * 0 - kuser helpers
- * 1 - vdso data
- * 2 - vdso code
- */
-#define C_VECTORS 0
-#ifdef CONFIG_COMPAT_VDSO
-#define C_VVAR 1
-#define C_VDSO 2
-#define C_PAGES (C_VDSO + 1)
-#else
-#define C_SIGPAGE 1
-#define C_PAGES (C_SIGPAGE + 1)
-#endif /* CONFIG_COMPAT_VDSO */
-static struct page *aarch32_vdso_pages[C_PAGES] __ro_after_init;
-static struct vm_special_mapping aarch32_vdso_spec[C_PAGES] = {
- {
+static struct vm_special_mapping aarch32_vdso_maps[] = {
+ [AA32_MAP_VECTORS] = {
.name = "[vectors]", /* ABI */
- .pages = &aarch32_vdso_pages[C_VECTORS],
+ .pages = &aarch32_vectors_page,
},
-#ifdef CONFIG_COMPAT_VDSO
- {
+ [AA32_MAP_SIGPAGE] = {
+ .name = "[sigpage]", /* ABI */
+ .pages = &aarch32_sig_page,
+ .mremap = aarch32_sigpage_mremap,
+ },
+ [AA32_MAP_VVAR] = {
.name = "[vvar]",
+ .fault = vvar_fault,
},
- {
+ [AA32_MAP_VDSO] = {
.name = "[vdso]",
- .mremap = aarch32_vdso_mremap,
- },
-#else
- {
- .name = "[sigpage]", /* ABI */
- .pages = &aarch32_vdso_pages[C_SIGPAGE],
+ .mremap = vdso_mremap,
},
-#endif /* CONFIG_COMPAT_VDSO */
};
static int aarch32_alloc_kuser_vdso_page(void)
@@ -237,69 +282,59 @@ static int aarch32_alloc_kuser_vdso_page(void)
if (!IS_ENABLED(CONFIG_KUSER_HELPERS))
return 0;
- vdso_page = get_zeroed_page(GFP_ATOMIC);
+ vdso_page = get_zeroed_page(GFP_KERNEL);
if (!vdso_page)
return -ENOMEM;
memcpy((void *)(vdso_page + 0x1000 - kuser_sz), __kuser_helper_start,
kuser_sz);
- aarch32_vdso_pages[C_VECTORS] = virt_to_page(vdso_page);
- flush_dcache_page(aarch32_vdso_pages[C_VECTORS]);
+ aarch32_vectors_page = virt_to_page((void *)vdso_page);
return 0;
}
-#ifdef CONFIG_COMPAT_VDSO
-static int __aarch32_alloc_vdso_pages(void)
-{
- int ret;
-
- vdso_lookup[ARM64_VDSO32].dm = &aarch32_vdso_spec[C_VVAR];
- vdso_lookup[ARM64_VDSO32].cm = &aarch32_vdso_spec[C_VDSO];
-
- ret = __vdso_init(ARM64_VDSO32);
- if (ret)
- return ret;
-
- ret = aarch32_alloc_kuser_vdso_page();
- if (ret) {
- unsigned long c_vvar =
- (unsigned long)page_to_virt(aarch32_vdso_pages[C_VVAR]);
- unsigned long c_vdso =
- (unsigned long)page_to_virt(aarch32_vdso_pages[C_VDSO]);
-
- free_page(c_vvar);
- free_page(c_vdso);
- }
-
- return ret;
-}
-#else
-static int __aarch32_alloc_vdso_pages(void)
+#define COMPAT_SIGPAGE_POISON_WORD 0xe7fddef1
+static int aarch32_alloc_sigpage(void)
{
extern char __aarch32_sigret_code_start[], __aarch32_sigret_code_end[];
int sigret_sz = __aarch32_sigret_code_end - __aarch32_sigret_code_start;
- unsigned long sigpage;
- int ret;
+ __le32 poison = cpu_to_le32(COMPAT_SIGPAGE_POISON_WORD);
+ void *sigpage;
- sigpage = get_zeroed_page(GFP_ATOMIC);
+ sigpage = (void *)__get_free_page(GFP_KERNEL);
if (!sigpage)
return -ENOMEM;
- memcpy((void *)sigpage, __aarch32_sigret_code_start, sigret_sz);
- aarch32_vdso_pages[C_SIGPAGE] = virt_to_page(sigpage);
- flush_dcache_page(aarch32_vdso_pages[C_SIGPAGE]);
+ memset32(sigpage, (__force u32)poison, PAGE_SIZE / sizeof(poison));
+ memcpy(sigpage, __aarch32_sigret_code_start, sigret_sz);
+ aarch32_sig_page = virt_to_page(sigpage);
+ return 0;
+}
- ret = aarch32_alloc_kuser_vdso_page();
- if (ret)
- free_page(sigpage);
+static int __init __aarch32_alloc_vdso_pages(void)
+{
- return ret;
+ if (!IS_ENABLED(CONFIG_COMPAT_VDSO))
+ return 0;
+
+ vdso_info[VDSO_ABI_AA32].dm = &aarch32_vdso_maps[AA32_MAP_VVAR];
+ vdso_info[VDSO_ABI_AA32].cm = &aarch32_vdso_maps[AA32_MAP_VDSO];
+
+ return __vdso_init(VDSO_ABI_AA32);
}
-#endif /* CONFIG_COMPAT_VDSO */
static int __init aarch32_alloc_vdso_pages(void)
{
- return __aarch32_alloc_vdso_pages();
+ int ret;
+
+ ret = __aarch32_alloc_vdso_pages();
+ if (ret)
+ return ret;
+
+ ret = aarch32_alloc_sigpage();
+ if (ret)
+ return ret;
+
+ return aarch32_alloc_kuser_vdso_page();
}
arch_initcall(aarch32_alloc_vdso_pages);
@@ -317,12 +352,11 @@ static int aarch32_kuser_helpers_setup(struct mm_struct *mm)
ret = _install_special_mapping(mm, AARCH32_VECTORS_BASE, PAGE_SIZE,
VM_READ | VM_EXEC |
VM_MAYREAD | VM_MAYEXEC,
- &aarch32_vdso_spec[C_VECTORS]);
+ &aarch32_vdso_maps[AA32_MAP_VECTORS]);
return PTR_ERR_OR_ZERO(ret);
}
-#ifndef CONFIG_COMPAT_VDSO
static int aarch32_sigreturn_setup(struct mm_struct *mm)
{
unsigned long addr;
@@ -341,63 +375,53 @@ static int aarch32_sigreturn_setup(struct mm_struct *mm)
ret = _install_special_mapping(mm, addr, PAGE_SIZE,
VM_READ | VM_EXEC | VM_MAYREAD |
VM_MAYWRITE | VM_MAYEXEC,
- &aarch32_vdso_spec[C_SIGPAGE]);
+ &aarch32_vdso_maps[AA32_MAP_SIGPAGE]);
if (IS_ERR(ret))
goto out;
- mm->context.vdso = (void *)addr;
+ mm->context.sigpage = (void *)addr;
out:
return PTR_ERR_OR_ZERO(ret);
}
-#endif /* !CONFIG_COMPAT_VDSO */
int aarch32_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
{
struct mm_struct *mm = current->mm;
int ret;
- if (down_write_killable(&mm->mmap_sem))
+ if (mmap_write_lock_killable(mm))
return -EINTR;
ret = aarch32_kuser_helpers_setup(mm);
if (ret)
goto out;
-#ifdef CONFIG_COMPAT_VDSO
- ret = __setup_additional_pages(ARM64_VDSO32,
- mm,
- bprm,
- uses_interp);
-#else
- ret = aarch32_sigreturn_setup(mm);
-#endif /* CONFIG_COMPAT_VDSO */
+ if (IS_ENABLED(CONFIG_COMPAT_VDSO)) {
+ ret = __setup_additional_pages(VDSO_ABI_AA32, mm, bprm,
+ uses_interp);
+ if (ret)
+ goto out;
+ }
+ ret = aarch32_sigreturn_setup(mm);
out:
- up_write(&mm->mmap_sem);
+ mmap_write_unlock(mm);
return ret;
}
#endif /* CONFIG_COMPAT */
-static int vdso_mremap(const struct vm_special_mapping *sm,
- struct vm_area_struct *new_vma)
-{
- return __vdso_remap(ARM64_VDSO, sm, new_vma);
-}
+enum aarch64_map {
+ AA64_MAP_VVAR,
+ AA64_MAP_VDSO,
+};
-/*
- * aarch64_vdso_pages:
- * 0 - vvar
- * 1 - vdso
- */
-#define A_VVAR 0
-#define A_VDSO 1
-#define A_PAGES (A_VDSO + 1)
-static struct vm_special_mapping vdso_spec[A_PAGES] __ro_after_init = {
- {
+static struct vm_special_mapping aarch64_vdso_maps[] __ro_after_init = {
+ [AA64_MAP_VVAR] = {
.name = "[vvar]",
+ .fault = vvar_fault,
},
- {
+ [AA64_MAP_VDSO] = {
.name = "[vdso]",
.mremap = vdso_mremap,
},
@@ -405,28 +429,23 @@ static struct vm_special_mapping vdso_spec[A_PAGES] __ro_after_init = {
static int __init vdso_init(void)
{
- vdso_lookup[ARM64_VDSO].dm = &vdso_spec[A_VVAR];
- vdso_lookup[ARM64_VDSO].cm = &vdso_spec[A_VDSO];
+ vdso_info[VDSO_ABI_AA64].dm = &aarch64_vdso_maps[AA64_MAP_VVAR];
+ vdso_info[VDSO_ABI_AA64].cm = &aarch64_vdso_maps[AA64_MAP_VDSO];
- return __vdso_init(ARM64_VDSO);
+ return __vdso_init(VDSO_ABI_AA64);
}
arch_initcall(vdso_init);
-int arch_setup_additional_pages(struct linux_binprm *bprm,
- int uses_interp)
+int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
{
struct mm_struct *mm = current->mm;
int ret;
- if (down_write_killable(&mm->mmap_sem))
+ if (mmap_write_lock_killable(mm))
return -EINTR;
- ret = __setup_additional_pages(ARM64_VDSO,
- mm,
- bprm,
- uses_interp);
-
- up_write(&mm->mmap_sem);
+ ret = __setup_additional_pages(VDSO_ABI_AA64, mm, bprm, uses_interp);
+ mmap_write_unlock(mm);
return ret;
}
diff --git a/arch/arm64/kernel/vdso/.gitignore b/arch/arm64/kernel/vdso/.gitignore
index f8b69d84238e..652e31d82582 100644
--- a/arch/arm64/kernel/vdso/.gitignore
+++ b/arch/arm64/kernel/vdso/.gitignore
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
vdso.lds
diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile
index dd2514bb1511..8818287f1095 100644
--- a/arch/arm64/kernel/vdso/Makefile
+++ b/arch/arm64/kernel/vdso/Makefile
@@ -6,9 +6,7 @@
# Heavily based on the vDSO Makefiles for other archs.
#
-# Absolute relocation type $(ARCH_REL_TYPE_ABS) needs to be defined before
-# the inclusion of generic Makefile.
-ARCH_REL_TYPE_ABS := R_AARCH64_JUMP_SLOT|R_AARCH64_GLOB_DAT|R_AARCH64_ABS64
+# Include the generic Makefile to check the built vdso.
include $(srctree)/lib/vdso/Makefile
obj-vdso := vgettimeofday.o note.o sigreturn.o
@@ -17,44 +15,49 @@ obj-vdso := vgettimeofday.o note.o sigreturn.o
targets := $(obj-vdso) vdso.so vdso.so.dbg
obj-vdso := $(addprefix $(obj)/, $(obj-vdso))
-ldflags-y := -shared -nostdlib -soname=linux-vdso.so.1 --hash-style=sysv \
- --build-id -n -T
+btildflags-$(CONFIG_ARM64_BTI_KERNEL) += -z force-bti
-ccflags-y := -fno-common -fno-builtin -fno-stack-protector -ffixed-x18
-ccflags-y += -DDISABLE_BRANCH_PROFILING
+# -Bsymbolic has been added for consistency with arm, the compat vDSO and
+# potential future proofing if we end up with internal calls to the exported
+# routines, as x86 does (see 6f121e548f83 ("x86, vdso: Reimplement vdso.so
+# preparation in build-time C")).
+ldflags-y := -shared -soname=linux-vdso.so.1 --hash-style=sysv \
+ -Bsymbolic --build-id=sha1 -n $(btildflags-y)
+
+ifdef CONFIG_LD_ORPHAN_WARN
+ ldflags-y += --orphan-handling=$(CONFIG_LD_ORPHAN_WARN_LEVEL)
+endif
-VDSO_LDFLAGS := -Bsymbolic
+ldflags-y += -T
-CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os
-KBUILD_CFLAGS += $(DISABLE_LTO)
+ccflags-y := -fno-common -fno-builtin -fno-stack-protector -ffixed-x18
+ccflags-y += -DDISABLE_BRANCH_PROFILING -DBUILD_VDSO
+
+# -Wmissing-prototypes and -Wmissing-declarations are removed from
+# the CFLAGS of vgettimeofday.c to make possible to build the
+# kernel with CONFIG_WERROR enabled.
+CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os $(CC_FLAGS_SCS) \
+ $(RANDSTRUCT_CFLAGS) $(GCC_PLUGINS_CFLAGS) \
+ $(CC_FLAGS_LTO) $(CC_FLAGS_CFI) \
+ -Wmissing-prototypes -Wmissing-declarations
KASAN_SANITIZE := n
+KCSAN_SANITIZE := n
UBSAN_SANITIZE := n
OBJECT_FILES_NON_STANDARD := y
KCOV_INSTRUMENT := n
-CFLAGS_vgettimeofday.o = -O2 -mcmodel=tiny
+CFLAGS_vgettimeofday.o = -O2 -mcmodel=tiny -fasynchronous-unwind-tables
ifneq ($(c-gettimeofday-y),)
CFLAGS_vgettimeofday.o += -include $(c-gettimeofday-y)
endif
-# Clang versions less than 8 do not support -mcmodel=tiny
-ifeq ($(CONFIG_CC_IS_CLANG), y)
- ifeq ($(shell test $(CONFIG_CLANG_VERSION) -lt 80000; echo $$?),0)
- CFLAGS_REMOVE_vgettimeofday.o += -mcmodel=tiny
- endif
-endif
-
# Disable gcov profiling for VDSO code
GCOV_PROFILE := n
-obj-y += vdso.o
-extra-y += vdso.lds
+targets += vdso.lds
CPPFLAGS_vdso.lds += -P -C -U$(ARCH)
-# Force dependency (incbin is bad)
-$(obj)/vdso.o : $(obj)/vdso.so
-
# Link rule for the .so file, .lds has to be first
$(obj)/vdso.so.dbg: $(obj)/vdso.lds $(obj-vdso) FORCE
$(call if_changed,vdsold_and_vdso_check)
@@ -75,13 +78,3 @@ include/generated/vdso-offsets.h: $(obj)/vdso.so.dbg FORCE
# Actual build commands
quiet_cmd_vdsold_and_vdso_check = LD $@
cmd_vdsold_and_vdso_check = $(cmd_ld); $(cmd_vdso_check)
-
-# Install commands for the unstripped file
-quiet_cmd_vdso_install = INSTALL $@
- cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/$@
-
-vdso.so: $(obj)/vdso.so.dbg
- @mkdir -p $(MODLIB)/vdso
- $(call cmd,vdso_install)
-
-vdso_install: vdso.so
diff --git a/arch/arm64/kernel/vdso/gen_vdso_offsets.sh b/arch/arm64/kernel/vdso/gen_vdso_offsets.sh
index 0664acaf61ff..0387209d65b1 100755
--- a/arch/arm64/kernel/vdso/gen_vdso_offsets.sh
+++ b/arch/arm64/kernel/vdso/gen_vdso_offsets.sh
@@ -8,9 +8,9 @@
# Doing this inside the Makefile will break the $(filter-out) function,
# causing Kbuild to rebuild the vdso-offsets header file every time.
#
-# Author: Will Deacon <will.deacon@arm.com
+# Author: Will Deacon <will.deacon@arm.com>
#
LC_ALL=C
sed -n -e 's/^00*/0/' -e \
-'s/^\([0-9a-fA-F]*\) . VDSO_\([a-zA-Z0-9_]*\)$/\#define vdso_offset_\2\t0x\1/p'
+'s/^\([0-9a-fA-F]*\) . VDSO_\([a-zA-Z0-9_]*\)$/\#define vdso_offset_\2 0x\1/p'
diff --git a/arch/arm64/kernel/vdso/note.S b/arch/arm64/kernel/vdso/note.S
index 0ce6ec75a525..3d4e82290c80 100644
--- a/arch/arm64/kernel/vdso/note.S
+++ b/arch/arm64/kernel/vdso/note.S
@@ -12,9 +12,12 @@
#include <linux/version.h>
#include <linux/elfnote.h>
#include <linux/build-salt.h>
+#include <asm/assembler.h>
ELFNOTE_START(Linux, 0, "a")
.long LINUX_VERSION_CODE
ELFNOTE_END
BUILD_SALT
+
+emit_aarch64_feature_1_and
diff --git a/arch/arm64/kernel/vdso/sigreturn.S b/arch/arm64/kernel/vdso/sigreturn.S
index 0723aa398d6e..0e18729abc3b 100644
--- a/arch/arm64/kernel/vdso/sigreturn.S
+++ b/arch/arm64/kernel/vdso/sigreturn.S
@@ -1,7 +1,11 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Sigreturn trampoline for returning from a signal when the SA_RESTORER
- * flag is not set.
+ * flag is not set. It serves primarily as a hall of shame for crappy
+ * unwinders and features an exciting but mysterious NOP instruction.
+ *
+ * It's also fragile as hell, so please think twice before changing anything
+ * in here.
*
* Copyright (C) 2012 ARM Limited
*
@@ -9,18 +13,68 @@
*/
#include <linux/linkage.h>
+#include <asm/assembler.h>
#include <asm/unistd.h>
.text
- nop
-ENTRY(__kernel_rt_sigreturn)
- .cfi_startproc
- .cfi_signal_frame
- .cfi_def_cfa x29, 0
- .cfi_offset x29, 0 * 8
- .cfi_offset x30, 1 * 8
+/*
+ * NOTE!!! You may notice that all of the .cfi directives in this file have
+ * been commented out. This is because they have been shown to trigger segfaults
+ * in libgcc when unwinding out of a SIGCANCEL handler to invoke pthread
+ * cleanup handlers during the thread cancellation dance. By omitting the
+ * directives, we trigger an arm64-specific fallback path in the unwinder which
+ * recognises the signal frame and restores many of the registers directly from
+ * the sigcontext. Re-enabling the cfi directives here therefore needs to be
+ * much more comprehensive to reduce the risk of further regressions.
+ */
+
+/* Ensure that the mysterious NOP can be associated with a function. */
+// .cfi_startproc
+
+/*
+ * .cfi_signal_frame causes the corresponding Frame Description Entry (FDE) in
+ * the .eh_frame section to be annotated as a signal frame. This allows DWARF
+ * unwinders (e.g. libstdc++) to implement _Unwind_GetIPInfo() and identify
+ * the next frame using the unmodified return address instead of subtracting 1,
+ * which may yield the wrong FDE.
+ */
+// .cfi_signal_frame
+
+/*
+ * Tell the unwinder where to locate the frame record linking back to the
+ * interrupted context. We don't provide unwind info for registers other than
+ * the frame pointer and the link register here; in practice, this is likely to
+ * be insufficient for unwinding in C/C++ based runtimes, especially without a
+ * means to restore the stack pointer. Thankfully, unwinders and debuggers
+ * already have baked-in strategies for attempting to unwind out of signals.
+ */
+// .cfi_def_cfa x29, 0
+// .cfi_offset x29, 0 * 8
+// .cfi_offset x30, 1 * 8
+
+/*
+ * This mysterious NOP is required for some unwinders (e.g. libc++) that
+ * unconditionally subtract one from the result of _Unwind_GetIP() in order to
+ * identify the calling function.
+ * Hack borrowed from arch/powerpc/kernel/vdso64/sigtramp.S.
+ */
+ nop // Mysterious NOP
+
+/*
+ * GDB, libgcc and libunwind rely on being able to identify the sigreturn
+ * instruction sequence to unwind from signal handlers. We cannot, therefore,
+ * use SYM_FUNC_START() here, as it will emit a BTI C instruction and break the
+ * unwinder. Thankfully, this function is only ever called from a RET and so
+ * omitting the landing pad is perfectly fine.
+ */
+SYM_CODE_START(__kernel_rt_sigreturn)
+// PLEASE DO NOT MODIFY
mov x8, #__NR_rt_sigreturn
+// PLEASE DO NOT MODIFY
svc #0
- .cfi_endproc
-ENDPROC(__kernel_rt_sigreturn)
+// PLEASE DO NOT MODIFY
+// .cfi_endproc
+SYM_CODE_END(__kernel_rt_sigreturn)
+
+emit_aarch64_feature_1_and
diff --git a/arch/arm64/kernel/vdso/vdso.lds.S b/arch/arm64/kernel/vdso/vdso.lds.S
index 7ad2d3a0cd48..45354f2ddf70 100644
--- a/arch/arm64/kernel/vdso/vdso.lds.S
+++ b/arch/arm64/kernel/vdso/vdso.lds.S
@@ -11,13 +11,17 @@
#include <linux/const.h>
#include <asm/page.h>
#include <asm/vdso.h>
+#include <asm-generic/vmlinux.lds.h>
OUTPUT_FORMAT("elf64-littleaarch64", "elf64-bigaarch64", "elf64-littleaarch64")
OUTPUT_ARCH(aarch64)
SECTIONS
{
- PROVIDE(_vdso_data = . - PAGE_SIZE);
+ PROVIDE(_vdso_data = . - __VVAR_PAGES * PAGE_SIZE);
+#ifdef CONFIG_TIME_NS
+ PROVIDE(_timens_data = _vdso_data + PAGE_SIZE);
+#endif
. = VDSO_LBASE + SIZEOF_HEADERS;
.hash : { *(.hash) } :text
@@ -28,6 +32,13 @@ SECTIONS
.gnu.version_d : { *(.gnu.version_d) }
.gnu.version_r : { *(.gnu.version_r) }
+ /*
+ * Discard .note.gnu.property sections which are unused and have
+ * different alignment requirement from vDSO note sections.
+ */
+ /DISCARD/ : {
+ *(.note.GNU-stack .note.gnu.property)
+ }
.note : { *(.note.*) } :text :note
. = ALIGN(16);
@@ -37,20 +48,35 @@ SECTIONS
PROVIDE (_etext = .);
PROVIDE (etext = .);
- .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr
- .eh_frame : { KEEP (*(.eh_frame)) } :text
+ . = ALIGN(4);
+ .altinstructions : {
+ *(.altinstructions)
+ }
.dynamic : { *(.dynamic) } :text :dynamic
- .rodata : { *(.rodata*) } :text
+ .rela.dyn : ALIGN(8) { *(.rela .rela*) }
+
+ .rodata : {
+ *(.rodata*)
+ *(.got)
+ *(.got.plt)
+ *(.plt)
+ *(.plt.*)
+ *(.iplt)
+ *(.igot .igot.plt)
+ } :text
_end = .;
PROVIDE(end = .);
+ DWARF_DEBUG
+ ELF_DETAILS
+
/DISCARD/ : {
- *(.note.GNU-stack)
*(.data .data.* .gnu.linkonce.d.* .sdata*)
*(.bss .sbss .dynbss .dynsbss)
+ *(.eh_frame .eh_frame_hdr)
}
}
@@ -63,7 +89,6 @@ PHDRS
text PT_LOAD FLAGS(5) FILEHDR PHDRS; /* PF_R|PF_X */
dynamic PT_DYNAMIC FLAGS(4); /* PF_R */
note PT_NOTE FLAGS(4); /* PF_R */
- eh_frame_hdr PT_GNU_EH_FRAME;
}
/*
diff --git a/arch/arm64/kernel/vdso/vgettimeofday.c b/arch/arm64/kernel/vdso/vgettimeofday.c
index 747635501a14..9941c5b04f15 100644
--- a/arch/arm64/kernel/vdso/vgettimeofday.c
+++ b/arch/arm64/kernel/vdso/vgettimeofday.c
@@ -5,8 +5,10 @@
* Copyright (C) 2018 ARM Limited
*
*/
-#include <linux/time.h>
-#include <linux/types.h>
+
+int __kernel_clock_gettime(clockid_t clock, struct __kernel_timespec *ts);
+int __kernel_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz);
+int __kernel_clock_getres(clockid_t clock_id, struct __kernel_timespec *res);
int __kernel_clock_gettime(clockid_t clock,
struct __kernel_timespec *ts)
diff --git a/arch/arm64/kernel/vdso32/vdso.S b/arch/arm64/kernel/vdso32-wrap.S
index e72ac7bc4c04..e72ac7bc4c04 100644
--- a/arch/arm64/kernel/vdso32/vdso.S
+++ b/arch/arm64/kernel/vdso32-wrap.S
diff --git a/arch/arm64/kernel/vdso32/.gitignore b/arch/arm64/kernel/vdso32/.gitignore
index 4fea950fa5ed..3542fa24e26b 100644
--- a/arch/arm64/kernel/vdso32/.gitignore
+++ b/arch/arm64/kernel/vdso32/.gitignore
@@ -1,2 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
vdso.lds
vdso.so.raw
diff --git a/arch/arm64/kernel/vdso32/Makefile b/arch/arm64/kernel/vdso32/Makefile
index 76b327f88fbb..2266fcdff78a 100644
--- a/arch/arm64/kernel/vdso32/Makefile
+++ b/arch/arm64/kernel/vdso32/Makefile
@@ -3,26 +3,26 @@
# Makefile for vdso32
#
-# Absolute relocation type $(ARCH_REL_TYPE_ABS) needs to be defined before
-# the inclusion of generic Makefile.
-ARCH_REL_TYPE_ABS := R_ARM_JUMP_SLOT|R_ARM_GLOB_DAT|R_ARM_ABS32
include $(srctree)/lib/vdso/Makefile
# Same as cc-*option, but using CC_COMPAT instead of CC
ifeq ($(CONFIG_CC_IS_CLANG), y)
CC_COMPAT ?= $(CC)
+CC_COMPAT += --target=arm-linux-gnueabi
else
CC_COMPAT ?= $(CROSS_COMPILE_COMPAT)gcc
endif
+ifeq ($(CONFIG_LD_IS_LLD), y)
+LD_COMPAT ?= $(LD)
+else
+LD_COMPAT ?= $(CROSS_COMPILE_COMPAT)ld
+endif
+
cc32-option = $(call try-run,\
$(CC_COMPAT) $(1) -c -x c /dev/null -o "$$TMP",$(1),$(2))
cc32-disable-warning = $(call try-run,\
$(CC_COMPAT) -W$(strip $(1)) -c -x c /dev/null -o "$$TMP",-Wno-$(strip $(1)))
-cc32-ldoption = $(call try-run,\
- $(CC_COMPAT) $(1) -nostdlib -x c /dev/null -o "$$TMP",$(1),$(2))
-cc32-as-instr = $(call try-run,\
- printf "%b\n" "$(1)" | $(CC_COMPAT) $(VDSO_AFLAGS) -c -x assembler -o "$$TMP" -,$(2),$(3))
# We cannot use the global flags to compile the vDSO files, the main reason
# being that the 32-bit compiler may be older than the main (64-bit) compiler
@@ -32,16 +32,13 @@ cc32-as-instr = $(call try-run,\
# As a result we set our own flags here.
# KBUILD_CPPFLAGS and NOSTDINC_FLAGS from top-level Makefile
-VDSO_CPPFLAGS := -D__KERNEL__ -nostdinc -isystem $(shell $(CC_COMPAT) -print-file-name=include)
+VDSO_CPPFLAGS := -DBUILD_VDSO -D__KERNEL__ -nostdinc
+VDSO_CPPFLAGS += -isystem $(shell $(CC_COMPAT) -print-file-name=include 2>/dev/null)
VDSO_CPPFLAGS += $(LINUXINCLUDE)
# Common C and assembly flags
# From top-level Makefile
VDSO_CAFLAGS := $(VDSO_CPPFLAGS)
-ifneq ($(shell $(CC_COMPAT) --version 2>&1 | head -n 1 | grep clang),)
-VDSO_CAFLAGS += --target=$(notdir $(CROSS_COMPILE_COMPAT:%-=%))
-endif
-
VDSO_CAFLAGS += $(call cc32-option,-fno-PIE)
ifdef CONFIG_DEBUG_INFO
VDSO_CAFLAGS += -g
@@ -59,13 +56,7 @@ endif
# From arm vDSO Makefile
VDSO_CAFLAGS += -fPIC -fno-builtin -fno-stack-protector
VDSO_CAFLAGS += -DDISABLE_BRANCH_PROFILING
-
-
-# Try to compile for ARMv8. If the compiler is too old and doesn't support it,
-# fall back to v7. There is no easy way to check for what architecture the code
-# is being compiled, so define a macro specifying that (see arch/arm/Makefile).
-VDSO_CAFLAGS += $(call cc32-option,-march=armv8-a -D__LINUX_ARM_ARCH__=8,\
- -march=armv7-a -D__LINUX_ARM_ARCH__=7)
+VDSO_CAFLAGS += -march=armv8-a
VDSO_CFLAGS := $(VDSO_CAFLAGS)
VDSO_CFLAGS += -DENABLE_COMPAT_VDSO=1
@@ -74,14 +65,13 @@ VDSO_CFLAGS += -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \
-fno-strict-aliasing -fno-common \
-Werror-implicit-function-declaration \
-Wno-format-security \
- -std=gnu89
+ -std=gnu11
VDSO_CFLAGS += -O2
# Some useful compiler-dependent flags from top-level Makefile
-VDSO_CFLAGS += $(call cc32-option,-Wdeclaration-after-statement,)
VDSO_CFLAGS += $(call cc32-option,-Wno-pointer-sign)
-VDSO_CFLAGS += $(call cc32-option,-fno-strict-overflow)
+VDSO_CFLAGS += -fno-strict-overflow
VDSO_CFLAGS += $(call cc32-option,-Werror=strict-prototypes)
-VDSO_CFLAGS += $(call cc32-option,-Werror=date-time)
+VDSO_CFLAGS += -Werror=date-time
VDSO_CFLAGS += $(call cc32-option,-Werror=incompatible-pointer-types)
# The 32-bit compiler does not provide 128-bit integers, which are used in
@@ -94,34 +84,32 @@ VDSO_CFLAGS += -D__uint128_t='void*'
VDSO_CFLAGS += $(call cc32-disable-warning,shift-count-overflow)
VDSO_CFLAGS += -Wno-int-to-pointer-cast
+# Compile as THUMB2 or ARM. Unwinding via frame-pointers in THUMB2 is
+# unreliable.
+ifeq ($(CONFIG_THUMB2_COMPAT_VDSO), y)
+VDSO_CFLAGS += -mthumb -fomit-frame-pointer
+else
+VDSO_CFLAGS += -marm
+endif
+
VDSO_AFLAGS := $(VDSO_CAFLAGS)
VDSO_AFLAGS += -D__ASSEMBLY__
-# Check for binutils support for dmb ishld
-dmbinstr := $(call cc32-as-instr,dmb ishld,-DCONFIG_AS_DMB_ISHLD=1)
-
-VDSO_CFLAGS += $(dmbinstr)
-VDSO_AFLAGS += $(dmbinstr)
-
-VDSO_LDFLAGS := $(VDSO_CPPFLAGS)
# From arm vDSO Makefile
-VDSO_LDFLAGS += -Wl,-Bsymbolic -Wl,--no-undefined -Wl,-soname=linux-vdso.so.1
-VDSO_LDFLAGS += -Wl,-z,max-page-size=4096 -Wl,-z,common-page-size=4096
-VDSO_LDFLAGS += -nostdlib -shared -mfloat-abi=soft
-VDSO_LDFLAGS += -Wl,--hash-style=sysv
-VDSO_LDFLAGS += -Wl,--build-id
-VDSO_LDFLAGS += $(call cc32-ldoption,-fuse-ld=bfd)
+VDSO_LDFLAGS += -Bsymbolic --no-undefined -soname=linux-vdso.so.1
+VDSO_LDFLAGS += -z max-page-size=4096 -z common-page-size=4096
+VDSO_LDFLAGS += -shared --hash-style=sysv --build-id=sha1
+VDSO_LDFLAGS += --orphan-handling=$(CONFIG_LD_ORPHAN_WARN_LEVEL)
# Borrow vdsomunge.c from the arm vDSO
# We have to use a relative path because scripts/Makefile.host prefixes
-# $(hostprogs-y) with $(obj)
+# $(hostprogs) with $(obj)
munge := ../../../arm/vdso/vdsomunge
-hostprogs-y := $(munge)
+hostprogs := $(munge)
c-obj-vdso := note.o
c-obj-vdso-gettimeofday := vgettimeofday.o
-asm-obj-vdso := sigreturn.o
ifneq ($(c-gettimeofday-y),)
VDSO_CFLAGS_gettimeofday_o += -include $(c-gettimeofday-y)
@@ -130,28 +118,24 @@ endif
VDSO_CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os
# Build rules
-targets := $(c-obj-vdso) $(c-obj-vdso-gettimeofday) $(asm-obj-vdso) vdso.so vdso.so.dbg vdso.so.raw
+targets := $(c-obj-vdso) $(c-obj-vdso-gettimeofday) $(asm-obj-vdso) vdso.so vdso32.so.dbg vdso.so.raw
c-obj-vdso := $(addprefix $(obj)/, $(c-obj-vdso))
c-obj-vdso-gettimeofday := $(addprefix $(obj)/, $(c-obj-vdso-gettimeofday))
asm-obj-vdso := $(addprefix $(obj)/, $(asm-obj-vdso))
obj-vdso := $(c-obj-vdso) $(c-obj-vdso-gettimeofday) $(asm-obj-vdso)
-obj-y += vdso.o
-extra-y += vdso.lds
+targets += vdso.lds
CPPFLAGS_vdso.lds += -P -C -U$(ARCH)
-# Force dependency (vdso.s includes vdso.so through incbin)
-$(obj)/vdso.o: $(obj)/vdso.so
-
-include/generated/vdso32-offsets.h: $(obj)/vdso.so.dbg FORCE
+include/generated/vdso32-offsets.h: $(obj)/vdso32.so.dbg FORCE
$(call if_changed,vdsosym)
# Strip rule for vdso.so
$(obj)/vdso.so: OBJCOPYFLAGS := -S
-$(obj)/vdso.so: $(obj)/vdso.so.dbg FORCE
+$(obj)/vdso.so: $(obj)/vdso32.so.dbg FORCE
$(call if_changed,objcopy)
-$(obj)/vdso.so.dbg: $(obj)/vdso.so.raw $(obj)/$(munge) FORCE
+$(obj)/vdso32.so.dbg: $(obj)/vdso.so.raw $(obj)/$(munge) FORCE
$(call if_changed,vdsomunge)
# Link rule for the .so file, .lds has to be first
@@ -171,8 +155,8 @@ quiet_cmd_vdsold_and_vdso_check = LD32 $@
cmd_vdsold_and_vdso_check = $(cmd_vdsold); $(cmd_vdso_check)
quiet_cmd_vdsold = LD32 $@
- cmd_vdsold = $(CC_COMPAT) -Wp,-MD,$(depfile) $(VDSO_LDFLAGS) \
- -Wl,-T $(filter %.lds,$^) $(filter %.o,$^) -o $@
+ cmd_vdsold = $(LD_COMPAT) $(VDSO_LDFLAGS) \
+ -T $(filter %.lds,$^) $(filter %.o,$^) -o $@
quiet_cmd_vdsocc = CC32 $@
cmd_vdsocc = $(CC_COMPAT) -Wp,-MD,$(depfile) $(VDSO_CFLAGS) -c -o $@ $<
quiet_cmd_vdsocc_gettimeofday = CC32 $@
@@ -188,13 +172,3 @@ gen-vdsosym := $(srctree)/$(src)/../vdso/gen_vdso_offsets.sh
quiet_cmd_vdsosym = VDSOSYM $@
# The AArch64 nm should be able to read an AArch32 binary
cmd_vdsosym = $(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@
-
-# Install commands for the unstripped file
-quiet_cmd_vdso_install = INSTALL $@
- cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/vdso32.so
-
-vdso.so: $(obj)/vdso.so.dbg
- @mkdir -p $(MODLIB)/vdso
- $(call cmd,vdso_install)
-
-vdso_install: vdso.so
diff --git a/arch/arm64/kernel/vdso32/sigreturn.S b/arch/arm64/kernel/vdso32/sigreturn.S
deleted file mode 100644
index 1a81277c2d09..000000000000
--- a/arch/arm64/kernel/vdso32/sigreturn.S
+++ /dev/null
@@ -1,62 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * This file provides both A32 and T32 versions, in accordance with the
- * arm sigreturn code.
- *
- * Copyright (C) 2018 ARM Limited
- */
-
-#include <linux/linkage.h>
-#include <asm/asm-offsets.h>
-#include <asm/unistd.h>
-
-#define ARM_ENTRY(name) \
- ENTRY(name)
-
-#define ARM_ENDPROC(name) \
- .type name, %function; \
- END(name)
-
- .text
-
- .arm
- .fnstart
- .save {r0-r15}
- .pad #COMPAT_SIGFRAME_REGS_OFFSET
- nop
-ARM_ENTRY(__kernel_sigreturn_arm)
- mov r7, #__NR_compat_sigreturn
- svc #0
- .fnend
-ARM_ENDPROC(__kernel_sigreturn_arm)
-
- .fnstart
- .save {r0-r15}
- .pad #COMPAT_RT_SIGFRAME_REGS_OFFSET
- nop
-ARM_ENTRY(__kernel_rt_sigreturn_arm)
- mov r7, #__NR_compat_rt_sigreturn
- svc #0
- .fnend
-ARM_ENDPROC(__kernel_rt_sigreturn_arm)
-
- .thumb
- .fnstart
- .save {r0-r15}
- .pad #COMPAT_SIGFRAME_REGS_OFFSET
- nop
-ARM_ENTRY(__kernel_sigreturn_thumb)
- mov r7, #__NR_compat_sigreturn
- svc #0
- .fnend
-ARM_ENDPROC(__kernel_sigreturn_thumb)
-
- .fnstart
- .save {r0-r15}
- .pad #COMPAT_RT_SIGFRAME_REGS_OFFSET
- nop
-ARM_ENTRY(__kernel_rt_sigreturn_thumb)
- mov r7, #__NR_compat_rt_sigreturn
- svc #0
- .fnend
-ARM_ENDPROC(__kernel_rt_sigreturn_thumb)
diff --git a/arch/arm64/kernel/vdso32/vdso.lds.S b/arch/arm64/kernel/vdso32/vdso.lds.S
index a3944927eaeb..8d95d7d35057 100644
--- a/arch/arm64/kernel/vdso32/vdso.lds.S
+++ b/arch/arm64/kernel/vdso32/vdso.lds.S
@@ -11,13 +11,17 @@
#include <linux/const.h>
#include <asm/page.h>
#include <asm/vdso.h>
+#include <asm-generic/vmlinux.lds.h>
OUTPUT_FORMAT("elf32-littlearm", "elf32-bigarm", "elf32-littlearm")
OUTPUT_ARCH(arm)
SECTIONS
{
- PROVIDE_HIDDEN(_vdso_data = . - PAGE_SIZE);
+ PROVIDE_HIDDEN(_vdso_data = . - __VVAR_PAGES * PAGE_SIZE);
+#ifdef CONFIG_TIME_NS
+ PROVIDE_HIDDEN(_timens_data = _vdso_data + PAGE_SIZE);
+#endif
. = VDSO_LBASE + SIZEOF_HEADERS;
.hash : { *(.hash) } :text
@@ -32,12 +36,30 @@ SECTIONS
.dynamic : { *(.dynamic) } :text :dynamic
- .rodata : { *(.rodata*) } :text
+ .rodata : {
+ *(.rodata*)
+ *(.got)
+ *(.got.plt)
+ *(.plt)
+ *(.rel.iplt)
+ *(.iplt)
+ *(.igot.plt)
+ } :text
- .text : { *(.text*) } :text =0xe7f001f2
+ .text : {
+ *(.text*)
+ *(.glue_7)
+ *(.glue_7t)
+ *(.vfp11_veneer)
+ *(.v4_bx)
+ } :text =0xe7f001f2
- .got : { *(.got) }
- .rel.plt : { *(.rel.plt) }
+ .rel.dyn : { *(.rel*) }
+
+ .ARM.exidx : { *(.ARM.exidx*) }
+ DWARF_DEBUG
+ ELF_DETAILS
+ .ARM.attributes 0 : { *(.ARM.attributes) }
/DISCARD/ : {
*(.note.GNU-stack)
@@ -64,19 +86,7 @@ VERSION
__vdso_clock_gettime;
__vdso_gettimeofday;
__vdso_clock_getres;
- __kernel_sigreturn_arm;
- __kernel_sigreturn_thumb;
- __kernel_rt_sigreturn_arm;
- __kernel_rt_sigreturn_thumb;
__vdso_clock_gettime64;
local: *;
};
}
-
-/*
- * Make the sigreturn code visible to the kernel.
- */
-VDSO_compat_sigreturn_arm = __kernel_sigreturn_arm;
-VDSO_compat_sigreturn_thumb = __kernel_sigreturn_thumb;
-VDSO_compat_rt_sigreturn_arm = __kernel_rt_sigreturn_arm;
-VDSO_compat_rt_sigreturn_thumb = __kernel_rt_sigreturn_thumb;
diff --git a/arch/arm64/kernel/vdso32/vgettimeofday.c b/arch/arm64/kernel/vdso32/vgettimeofday.c
index 54fc1c2ce93f..29b4d8f61e39 100644
--- a/arch/arm64/kernel/vdso32/vgettimeofday.c
+++ b/arch/arm64/kernel/vdso32/vgettimeofday.c
@@ -5,26 +5,18 @@
* Copyright (C) 2018 ARM Limited
*
*/
-#include <linux/time.h>
-#include <linux/types.h>
+#define BUILD_VDSO32_64
+#include <vdso/gettime.h>
int __vdso_clock_gettime(clockid_t clock,
struct old_timespec32 *ts)
{
- /* The checks below are required for ABI consistency with arm */
- if ((u32)ts >= TASK_SIZE_32)
- return -EFAULT;
-
return __cvdso_clock_gettime32(clock, ts);
}
int __vdso_clock_gettime64(clockid_t clock,
struct __kernel_timespec *ts)
{
- /* The checks below are required for ABI consistency with arm */
- if ((u32)ts >= TASK_SIZE_32)
- return -EFAULT;
-
return __cvdso_clock_gettime(clock, ts);
}
@@ -37,10 +29,6 @@ int __vdso_gettimeofday(struct __kernel_old_timeval *tv,
int __vdso_clock_getres(clockid_t clock_id,
struct old_timespec32 *res)
{
- /* The checks below are required for ABI consistency with arm */
- if ((u32)res >= TASK_SIZE_32)
- return -EFAULT;
-
return __cvdso_clock_getres_time32(clock_id, res);
}
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index 497f9675071d..3cd7e76cc562 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -5,42 +5,84 @@
* Written by Martin Mares <mj@atrey.karlin.mff.cuni.cz>
*/
-#define RO_EXCEPTION_TABLE_ALIGN 8
+#include <asm/hyp_image.h>
+#ifdef CONFIG_KVM
+#define HYPERVISOR_EXTABLE \
+ . = ALIGN(SZ_8); \
+ __start___kvm_ex_table = .; \
+ *(__kvm_ex_table) \
+ __stop___kvm_ex_table = .;
+
+#define HYPERVISOR_DATA_SECTIONS \
+ HYP_SECTION_NAME(.rodata) : { \
+ . = ALIGN(PAGE_SIZE); \
+ __hyp_rodata_start = .; \
+ *(HYP_SECTION_NAME(.data..ro_after_init)) \
+ *(HYP_SECTION_NAME(.rodata)) \
+ . = ALIGN(PAGE_SIZE); \
+ __hyp_rodata_end = .; \
+ }
+
+#define HYPERVISOR_PERCPU_SECTION \
+ . = ALIGN(PAGE_SIZE); \
+ HYP_SECTION_NAME(.data..percpu) : { \
+ *(HYP_SECTION_NAME(.data..percpu)) \
+ }
+
+#define HYPERVISOR_RELOC_SECTION \
+ .hyp.reloc : ALIGN(4) { \
+ __hyp_reloc_begin = .; \
+ *(.hyp.reloc) \
+ __hyp_reloc_end = .; \
+ }
+
+#define BSS_FIRST_SECTIONS \
+ __hyp_bss_start = .; \
+ *(HYP_SECTION_NAME(.bss)) \
+ . = ALIGN(PAGE_SIZE); \
+ __hyp_bss_end = .;
+
+/*
+ * We require that __hyp_bss_start and __bss_start are aligned, and enforce it
+ * with an assertion. But the BSS_SECTION macro places an empty .sbss section
+ * between them, which can in some cases cause the linker to misalign them. To
+ * work around the issue, force a page alignment for __bss_start.
+ */
+#define SBSS_ALIGN PAGE_SIZE
+#else /* CONFIG_KVM */
+#define HYPERVISOR_EXTABLE
+#define HYPERVISOR_DATA_SECTIONS
+#define HYPERVISOR_PERCPU_SECTION
+#define HYPERVISOR_RELOC_SECTION
+#define SBSS_ALIGN 0
+#endif
+
+#define RO_EXCEPTION_TABLE_ALIGN 4
+#define RUNTIME_DISCARD_EXIT
#include <asm-generic/vmlinux.lds.h>
#include <asm/cache.h>
#include <asm/kernel-pgtable.h>
-#include <asm/thread_info.h>
+#include <asm/kexec.h>
#include <asm/memory.h>
#include <asm/page.h>
-#include <asm/pgtable.h>
#include "image.h"
-/* .exit.text needed in case of alternative patching */
-#define ARM_EXIT_KEEP(x) x
-#define ARM_EXIT_DISCARD(x)
-
OUTPUT_ARCH(aarch64)
ENTRY(_text)
jiffies = jiffies_64;
#define HYPERVISOR_TEXT \
- /* \
- * Align to 4 KB so that \
- * a) the HYP vector table is at its minimum \
- * alignment of 2048 bytes \
- * b) the HYP init code will not cross a page \
- * boundary if its size does not exceed \
- * 4 KB (see related ASSERT() below) \
- */ \
- . = ALIGN(SZ_4K); \
+ . = ALIGN(PAGE_SIZE); \
__hyp_idmap_text_start = .; \
*(.hyp.idmap.text) \
__hyp_idmap_text_end = .; \
__hyp_text_start = .; \
*(.hyp.text) \
+ HYPERVISOR_EXTABLE \
+ . = ALIGN(PAGE_SIZE); \
__hyp_text_end = .;
#define IDMAP_TEXT \
@@ -51,7 +93,7 @@ jiffies = jiffies_64;
#ifdef CONFIG_HIBERNATION
#define HIBERNATE_TEXT \
- . = ALIGN(SZ_4K); \
+ ALIGN_FUNCTION(); \
__hibernate_exit_text_start = .; \
*(.hibernate_exit.text) \
__hibernate_exit_text_end = .;
@@ -59,21 +101,43 @@ jiffies = jiffies_64;
#define HIBERNATE_TEXT
#endif
+#ifdef CONFIG_KEXEC_CORE
+#define KEXEC_TEXT \
+ ALIGN_FUNCTION(); \
+ __relocate_new_kernel_start = .; \
+ *(.kexec_relocate.text) \
+ __relocate_new_kernel_end = .;
+#else
+#define KEXEC_TEXT
+#endif
+
#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
#define TRAMP_TEXT \
. = ALIGN(PAGE_SIZE); \
__entry_tramp_text_start = .; \
*(.entry.tramp.text) \
. = ALIGN(PAGE_SIZE); \
- __entry_tramp_text_end = .;
+ __entry_tramp_text_end = .; \
+ *(.entry.tramp.rodata)
#else
#define TRAMP_TEXT
#endif
+#ifdef CONFIG_UNWIND_TABLES
+#define UNWIND_DATA_SECTIONS \
+ .eh_frame : { \
+ __eh_frame_start = .; \
+ *(.eh_frame) \
+ __eh_frame_end = .; \
+ }
+#else
+#define UNWIND_DATA_SECTIONS
+#endif
+
/*
* The size of the PE/COFF section that covers the kernel image, which
- * runs from stext to _edata, must be a round multiple of the PE/COFF
- * FileAlignment, which we set to its minimum value of 0x200. 'stext'
+ * runs from _stext to _edata, must be a round multiple of the PE/COFF
+ * FileAlignment, which we set to its minimum value of 0x200. '_stext'
* itself is 4 KB aligned, so padding out _edata to a 0x200 aligned
* boundary should be sufficient.
*/
@@ -94,41 +158,29 @@ SECTIONS
* matching the same input section name. There is no documented
* order of matching.
*/
+ DISCARDS
/DISCARD/ : {
- ARM_EXIT_DISCARD(EXIT_TEXT)
- ARM_EXIT_DISCARD(EXIT_DATA)
- EXIT_CALL
- *(.discard)
- *(.discard.*)
*(.interp .dynamic)
*(.dynsym .dynstr .hash .gnu.hash)
- *(.eh_frame)
}
- . = KIMAGE_VADDR + TEXT_OFFSET;
+ . = KIMAGE_VADDR;
.head.text : {
_text = .;
HEAD_TEXT
}
- .text : { /* Real text segment */
+ .text : ALIGN(SEGMENT_ALIGN) { /* Real text segment */
_stext = .; /* Text and read-only data */
IRQENTRY_TEXT
SOFTIRQENTRY_TEXT
ENTRY_TEXT
TEXT_TEXT
SCHED_TEXT
- CPUIDLE_TEXT
LOCK_TEXT
KPROBES_TEXT
HYPERVISOR_TEXT
- IDMAP_TEXT
- HIBERNATE_TEXT
- TRAMP_TEXT
- *(.fixup)
*(.gnu.warning)
- . = ALIGN(16);
- *(.got) /* Global offset table */
}
. = ALIGN(SEGMENT_ALIGN);
@@ -137,21 +189,39 @@ SECTIONS
/* everything from this point to __init_begin will be marked RO NX */
RO_DATA(PAGE_SIZE)
+ HYPERVISOR_DATA_SECTIONS
+
+ .got : { *(.got) }
+ /*
+ * Make sure that the .got.plt is either completely empty or it
+ * contains only the lazy dispatch entries.
+ */
+ .got.plt : { *(.got.plt) }
+ ASSERT(SIZEOF(.got.plt) == 0 || SIZEOF(.got.plt) == 0x18,
+ "Unexpected GOT/PLT entries detected!")
+
+ /* code sections that are never executed via the kernel mapping */
+ .rodata.text : {
+ TRAMP_TEXT
+ HIBERNATE_TEXT
+ KEXEC_TEXT
+ IDMAP_TEXT
+ . = ALIGN(PAGE_SIZE);
+ }
+
idmap_pg_dir = .;
- . += IDMAP_DIR_SIZE;
+ . += PAGE_SIZE;
#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
tramp_pg_dir = .;
. += PAGE_SIZE;
#endif
-#ifdef CONFIG_ARM64_SW_TTBR0_PAN
- reserved_ttbr0 = .;
- . += RESERVED_TTBR0_SIZE;
-#endif
+ reserved_pg_dir = .;
+ . += PAGE_SIZE;
+
swapper_pg_dir = .;
. += PAGE_SIZE;
- swapper_pg_end = .;
. = ALIGN(SEGMENT_ALIGN);
__init_begin = .;
@@ -161,7 +231,7 @@ SECTIONS
__exittext_begin = .;
.exit.text : {
- ARM_EXIT_KEEP(EXIT_TEXT)
+ EXIT_TEXT
}
__exittext_end = .;
@@ -171,44 +241,46 @@ SECTIONS
*(.altinstructions)
__alt_instructions_end = .;
}
- .altinstr_replacement : {
- *(.altinstr_replacement)
- }
- . = ALIGN(PAGE_SIZE);
+ UNWIND_DATA_SECTIONS
+
+ . = ALIGN(SEGMENT_ALIGN);
__inittext_end = .;
__initdata_begin = .;
+ init_idmap_pg_dir = .;
+ . += INIT_IDMAP_DIR_SIZE;
+ init_idmap_pg_end = .;
+
.init.data : {
INIT_DATA
INIT_SETUP(16)
INIT_CALLS
CON_INITCALL
INIT_RAM_FS
- *(.init.rodata.* .init.bss) /* from the EFI stub */
+ *(.init.altinstructions .init.bss) /* from the EFI stub */
}
.exit.data : {
- ARM_EXIT_KEEP(EXIT_DATA)
+ EXIT_DATA
}
PERCPU_SECTION(L1_CACHE_BYTES)
+ HYPERVISOR_PERCPU_SECTION
+
+ HYPERVISOR_RELOC_SECTION
.rela.dyn : ALIGN(8) {
+ __rela_start = .;
*(.rela .rela*)
+ __rela_end = .;
}
- __rela_offset = ABSOLUTE(ADDR(.rela.dyn) - KIMAGE_VADDR);
- __rela_size = SIZEOF(.rela.dyn);
-
-#ifdef CONFIG_RELR
.relr.dyn : ALIGN(8) {
+ __relr_start = .;
*(.relr.dyn)
+ __relr_end = .;
}
- __relr_offset = ABSOLUTE(ADDR(.relr.dyn) - KIMAGE_VADDR);
- __relr_size = SIZEOF(.relr.dyn);
-#endif
-
. = ALIGN(SEGMENT_ALIGN);
__initdata_end = .;
__init_end = .;
@@ -239,40 +311,78 @@ SECTIONS
__pecoff_data_rawsize = ABSOLUTE(. - __initdata_begin);
_edata = .;
- BSS_SECTION(0, 0, 0)
+ BSS_SECTION(SBSS_ALIGN, 0, 0)
. = ALIGN(PAGE_SIZE);
init_pg_dir = .;
. += INIT_DIR_SIZE;
init_pg_end = .;
+ . = ALIGN(SEGMENT_ALIGN);
__pecoff_data_size = ABSOLUTE(. - __initdata_begin);
_end = .;
STABS_DEBUG
+ DWARF_DEBUG
+ ELF_DETAILS
HEAD_SYMBOLS
+
+ /*
+ * Sections that should stay zero sized, which is safer to
+ * explicitly check instead of blindly discarding.
+ */
+ .plt : {
+ *(.plt) *(.plt.*) *(.iplt) *(.igot .igot.plt)
+ }
+ ASSERT(SIZEOF(.plt) == 0, "Unexpected run-time procedure linkages detected!")
+
+ .data.rel.ro : { *(.data.rel.ro) }
+ ASSERT(SIZEOF(.data.rel.ro) == 0, "Unexpected RELRO detected!")
}
#include "image-vars.h"
/*
- * The HYP init code and ID map text can't be longer than a page each,
- * and should not cross a page boundary.
+ * The HYP init code and ID map text can't be longer than a page each. The
+ * former is page-aligned, but the latter may not be with 16K or 64K pages, so
+ * it should also not cross a page boundary.
*/
-ASSERT(__hyp_idmap_text_end - (__hyp_idmap_text_start & ~(SZ_4K - 1)) <= SZ_4K,
- "HYP init code too big or misaligned")
+ASSERT(__hyp_idmap_text_end - __hyp_idmap_text_start <= PAGE_SIZE,
+ "HYP init code too big")
ASSERT(__idmap_text_end - (__idmap_text_start & ~(SZ_4K - 1)) <= SZ_4K,
"ID map text too big or misaligned")
#ifdef CONFIG_HIBERNATION
-ASSERT(__hibernate_exit_text_end - (__hibernate_exit_text_start & ~(SZ_4K - 1))
- <= SZ_4K, "Hibernate exit text too big or misaligned")
+ASSERT(__hibernate_exit_text_end - __hibernate_exit_text_start <= SZ_4K,
+ "Hibernate exit text is bigger than 4 KiB")
+ASSERT(__hibernate_exit_text_start == swsusp_arch_suspend_exit,
+ "Hibernate exit text does not start with swsusp_arch_suspend_exit")
#endif
#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
-ASSERT((__entry_tramp_text_end - __entry_tramp_text_start) == PAGE_SIZE,
+ASSERT((__entry_tramp_text_end - __entry_tramp_text_start) <= 3*PAGE_SIZE,
"Entry trampoline text too big")
#endif
+#ifdef CONFIG_KVM
+ASSERT(__hyp_bss_start == __bss_start, "HYP and Host BSS are misaligned")
+#endif
/*
* If padding is applied before .head.text, virt<->phys conversions will fail.
*/
-ASSERT(_text == (KIMAGE_VADDR + TEXT_OFFSET), "HEAD is misaligned")
+ASSERT(_text == KIMAGE_VADDR, "HEAD is misaligned")
+
+ASSERT(swapper_pg_dir - reserved_pg_dir == RESERVED_SWAPPER_OFFSET,
+ "RESERVED_SWAPPER_OFFSET is wrong!")
+
+#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
+ASSERT(swapper_pg_dir - tramp_pg_dir == TRAMP_SWAPPER_OFFSET,
+ "TRAMP_SWAPPER_OFFSET is wrong!")
+#endif
+
+#ifdef CONFIG_KEXEC_CORE
+/* kexec relocation code should fit into one KEXEC_CONTROL_PAGE_SIZE */
+ASSERT(__relocate_new_kernel_end - __relocate_new_kernel_start <= SZ_4K,
+ "kexec relocation code is bigger than 4 KiB")
+ASSERT(KEXEC_CONTROL_PAGE_SIZE >= SZ_4K, "KEXEC_CONTROL_PAGE_SIZE is broken")
+ASSERT(__relocate_new_kernel_start == arm64_relocate_new_kernel,
+ "kexec control page does not start with arm64_relocate_new_kernel")
+#endif
diff --git a/arch/arm64/kernel/watchdog_hld.c b/arch/arm64/kernel/watchdog_hld.c
new file mode 100644
index 000000000000..dcd25322127c
--- /dev/null
+++ b/arch/arm64/kernel/watchdog_hld.c
@@ -0,0 +1,36 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/nmi.h>
+#include <linux/cpufreq.h>
+#include <linux/perf/arm_pmu.h>
+
+/*
+ * Safe maximum CPU frequency in case a particular platform doesn't implement
+ * cpufreq driver. Although, architecture doesn't put any restrictions on
+ * maximum frequency but 5 GHz seems to be safe maximum given the available
+ * Arm CPUs in the market which are clocked much less than 5 GHz. On the other
+ * hand, we can't make it much higher as it would lead to a large hard-lockup
+ * detection timeout on parts which are running slower (eg. 1GHz on
+ * Developerbox) and doesn't possess a cpufreq driver.
+ */
+#define SAFE_MAX_CPU_FREQ 5000000000UL // 5 GHz
+u64 hw_nmi_get_sample_period(int watchdog_thresh)
+{
+ unsigned int cpu = smp_processor_id();
+ unsigned long max_cpu_freq;
+
+ max_cpu_freq = cpufreq_get_hw_max_freq(cpu) * 1000UL;
+ if (!max_cpu_freq)
+ max_cpu_freq = SAFE_MAX_CPU_FREQ;
+
+ return (u64)max_cpu_freq * watchdog_thresh;
+}
+
+bool __init arch_perf_nmi_is_available(void)
+{
+ /*
+ * hardlockup_detector_perf_init() will success even if Pseudo-NMI turns off,
+ * however, the pmu interrupts will act like a normal interrupt instead of
+ * NMI and the hardlockup detector would be broken.
+ */
+ return arm_pmu_irq_is_nmi();
+}