summaryrefslogtreecommitdiff
path: root/arch/arm64/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arm64/kernel')
-rw-r--r--arch/arm64/kernel/Makefile9
-rw-r--r--arch/arm64/kernel/Makefile.syscalls6
-rw-r--r--arch/arm64/kernel/acpi.c135
-rw-r--r--arch/arm64/kernel/acpi_numa.c13
-rw-r--r--arch/arm64/kernel/alternative.c19
-rw-r--r--arch/arm64/kernel/armv8_deprecated.c2
-rw-r--r--arch/arm64/kernel/asm-offsets.c66
-rw-r--r--arch/arm64/kernel/cacheinfo.c12
-rw-r--r--arch/arm64/kernel/compat_alignment.c2
-rw-r--r--arch/arm64/kernel/cpu_errata.c182
-rw-r--r--arch/arm64/kernel/cpufeature.c782
-rw-r--r--arch/arm64/kernel/cpuidle.c74
-rw-r--r--arch/arm64/kernel/cpuinfo.c148
-rw-r--r--arch/arm64/kernel/debug-monitors.c271
-rw-r--r--arch/arm64/kernel/efi-header.S6
-rw-r--r--arch/arm64/kernel/efi.c68
-rw-r--r--arch/arm64/kernel/elfcore.c3
-rw-r--r--arch/arm64/kernel/entry-common.c620
-rw-r--r--arch/arm64/kernel/entry-ftrace.S34
-rw-r--r--arch/arm64/kernel/entry.S30
-rw-r--r--arch/arm64/kernel/fpsimd.c498
-rw-r--r--arch/arm64/kernel/ftrace.c110
-rw-r--r--arch/arm64/kernel/head.S31
-rw-r--r--arch/arm64/kernel/hibernate.c8
-rw-r--r--arch/arm64/kernel/hw_breakpoint.c60
-rw-r--r--arch/arm64/kernel/hyp-stub.S11
-rw-r--r--arch/arm64/kernel/image-vars.h98
-rw-r--r--arch/arm64/kernel/io.c87
-rw-r--r--arch/arm64/kernel/irq.c15
-rw-r--r--arch/arm64/kernel/jump_label.c13
-rw-r--r--arch/arm64/kernel/kaslr.c2
-rw-r--r--arch/arm64/kernel/kgdb.c41
-rw-r--r--arch/arm64/kernel/machine_kexec.c33
-rw-r--r--arch/arm64/kernel/machine_kexec_file.c2
-rw-r--r--arch/arm64/kernel/module-plts.c12
-rw-r--r--arch/arm64/kernel/module.c139
-rw-r--r--arch/arm64/kernel/mte.c69
-rw-r--r--arch/arm64/kernel/patching.c23
-rw-r--r--arch/arm64/kernel/pci.c191
-rw-r--r--arch/arm64/kernel/perf_callchain.c28
-rw-r--r--arch/arm64/kernel/pi/Makefile4
-rw-r--r--arch/arm64/kernel/pi/idreg-override.c24
-rw-r--r--arch/arm64/kernel/pi/kaslr_early.c4
-rw-r--r--arch/arm64/kernel/pi/map_kernel.c86
-rw-r--r--arch/arm64/kernel/pi/map_range.c28
-rw-r--r--arch/arm64/kernel/pi/patch-scs.c97
-rw-r--r--arch/arm64/kernel/pi/pi.h12
-rw-r--r--arch/arm64/kernel/probes/decode-insn.c45
-rw-r--r--arch/arm64/kernel/probes/decode-insn.h2
-rw-r--r--arch/arm64/kernel/probes/kprobes.c87
-rw-r--r--arch/arm64/kernel/probes/kprobes_trampoline.S2
-rw-r--r--arch/arm64/kernel/probes/simulate-insn.c74
-rw-r--r--arch/arm64/kernel/probes/simulate-insn.h4
-rw-r--r--arch/arm64/kernel/probes/uprobes.c75
-rw-r--r--arch/arm64/kernel/process.c341
-rw-r--r--arch/arm64/kernel/proton-pack.c276
-rw-r--r--arch/arm64/kernel/psci.c2
-rw-r--r--arch/arm64/kernel/ptrace.c367
-rw-r--r--arch/arm64/kernel/reloc_test_core.c1
-rw-r--r--arch/arm64/kernel/rsi.c175
-rw-r--r--arch/arm64/kernel/sdei.c12
-rw-r--r--arch/arm64/kernel/setup.c30
-rw-r--r--arch/arm64/kernel/signal.c557
-rw-r--r--arch/arm64/kernel/signal32.c15
-rw-r--r--arch/arm64/kernel/sigreturn32.S18
-rw-r--r--arch/arm64/kernel/smccc-call.S35
-rw-r--r--arch/arm64/kernel/smp.c374
-rw-r--r--arch/arm64/kernel/stacktrace.c223
-rw-r--r--arch/arm64/kernel/sys.c6
-rw-r--r--arch/arm64/kernel/sys32.c17
-rw-r--r--arch/arm64/kernel/syscall.c7
-rw-r--r--arch/arm64/kernel/topology.c201
-rw-r--r--arch/arm64/kernel/traps.c165
-rw-r--r--arch/arm64/kernel/vdso.c131
-rw-r--r--arch/arm64/kernel/vdso/Makefile30
-rw-r--r--arch/arm64/kernel/vdso/vdso.lds.S12
-rw-r--r--arch/arm64/kernel/vdso/vgetrandom-chacha.S172
-rw-r--r--arch/arm64/kernel/vdso/vgetrandom.c15
-rw-r--r--arch/arm64/kernel/vdso32/Makefile21
-rw-r--r--arch/arm64/kernel/vdso32/vdso.lds.S9
-rw-r--r--arch/arm64/kernel/vmcore_info.c2
-rw-r--r--arch/arm64/kernel/vmlinux.lds.S37
-rw-r--r--arch/arm64/kernel/watchdog_hld.c58
83 files changed, 4777 insertions, 3029 deletions
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 763824963ed1..76f32e424065 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -33,7 +33,8 @@ obj-y := debug-monitors.o entry.o irq.o fpsimd.o \
return_address.o cpuinfo.o cpu_errata.o \
cpufeature.o alternative.o cacheinfo.o \
smp.o smp_spin_table.o topology.o smccc-call.o \
- syscall.o proton-pack.o idle.o patching.o pi/
+ syscall.o proton-pack.o idle.o patching.o pi/ \
+ rsi.o jump_label.o
obj-$(CONFIG_COMPAT) += sys32.o signal32.o \
sys_compat.o
@@ -46,8 +47,6 @@ obj-$(CONFIG_PERF_EVENTS) += perf_regs.o perf_callchain.o
obj-$(CONFIG_HARDLOCKUP_DETECTOR_PERF) += watchdog_hld.o
obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
obj-$(CONFIG_CPU_PM) += sleep.o suspend.o
-obj-$(CONFIG_CPU_IDLE) += cpuidle.o
-obj-$(CONFIG_JUMP_LABEL) += jump_label.o
obj-$(CONFIG_KGDB) += kgdb.o
obj-$(CONFIG_EFI) += efi.o efi-rt-wrapper.o
obj-$(CONFIG_PCI) += pci.o
@@ -78,10 +77,10 @@ $(obj)/vdso32-wrap.o: $(obj)/vdso32/vdso.so
obj-y += probes/
obj-y += head.o
-extra-y += vmlinux.lds
+always-$(KBUILD_BUILTIN) += vmlinux.lds
ifeq ($(CONFIG_DEBUG_EFI),y)
-AFLAGS_head.o += -DVMLINUX_PATH="\"$(realpath $(objtree)/vmlinux)\""
+AFLAGS_head.o += -DVMLINUX_PATH="\"$(abspath vmlinux)\""
endif
# for cleaning
diff --git a/arch/arm64/kernel/Makefile.syscalls b/arch/arm64/kernel/Makefile.syscalls
new file mode 100644
index 000000000000..0542a718871a
--- /dev/null
+++ b/arch/arm64/kernel/Makefile.syscalls
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0
+
+syscall_abis_32 +=
+syscall_abis_64 += renameat rlimit memfd_secret
+
+syscalltbl = arch/arm64/tools/syscall_%.tbl
diff --git a/arch/arm64/kernel/acpi.c b/arch/arm64/kernel/acpi.c
index e0e7b93c16cc..af90128cfed5 100644
--- a/arch/arm64/kernel/acpi.c
+++ b/arch/arm64/kernel/acpi.c
@@ -30,6 +30,7 @@
#include <linux/pgtable.h>
#include <acpi/ghes.h>
+#include <acpi/processor.h>
#include <asm/cputype.h>
#include <asm/cpu_ops.h>
#include <asm/daifflags.h>
@@ -45,6 +46,7 @@ EXPORT_SYMBOL(acpi_pci_disabled);
static bool param_acpi_off __initdata;
static bool param_acpi_on __initdata;
static bool param_acpi_force __initdata;
+static bool param_acpi_nospcr __initdata;
static int __init parse_acpi(char *arg)
{
@@ -58,6 +60,8 @@ static int __init parse_acpi(char *arg)
param_acpi_on = true;
else if (strcmp(arg, "force") == 0) /* force ACPI to be enabled */
param_acpi_force = true;
+ else if (strcmp(arg, "nospcr") == 0) /* disable SPCR as default console */
+ param_acpi_nospcr = true;
else
return -EINVAL; /* Core will print when we return error */
@@ -129,7 +133,7 @@ static int __init acpi_fadt_sanity_check(void)
/*
* FADT is required on arm64; retrieve it to check its presence
- * and carry out revision and ACPI HW reduced compliancy tests
+ * and carry out revision and ACPI HW reduced compliance tests
*/
status = acpi_get_table(ACPI_SIG_FADT, 0, &table);
if (ACPI_FAILURE(status)) {
@@ -237,7 +241,18 @@ done:
acpi_put_table(facs);
}
#endif
- acpi_parse_spcr(earlycon_acpi_spcr_enable, true);
+
+ /*
+ * For varying privacy and security reasons, sometimes need
+ * to completely silence the serial console output, and only
+ * enable it when needed.
+ * But there are many existing systems that depend on this
+ * behaviour, use acpi=nospcr to disable console in ACPI SPCR
+ * table as default serial console.
+ */
+ acpi_parse_spcr(earlycon_acpi_spcr_enable,
+ !param_acpi_nospcr);
+
if (IS_ENABLED(CONFIG_ACPI_BGRT))
acpi_table_parse(ACPI_SIG_BGRT, acpi_parse_bgrt);
}
@@ -362,7 +377,7 @@ void __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size)
prot = __acpi_get_writethrough_mem_attribute();
}
}
- return ioremap_prot(phys, size, pgprot_val(prot));
+ return ioremap_prot(phys, size, prot);
}
/*
@@ -386,7 +401,7 @@ int apei_claim_sea(struct pt_regs *regs)
return_to_irqs_enabled = !irqs_disabled_flags(arch_local_save_flags());
if (regs)
- return_to_irqs_enabled = interrupts_enabled(regs);
+ return_to_irqs_enabled = !regs_irqs_disabled(regs);
/*
* SEA can interrupt SError, mask it and describe this as an NMI so
@@ -408,7 +423,7 @@ int apei_claim_sea(struct pt_regs *regs)
irq_work_run();
__irq_exit();
} else {
- pr_warn_ratelimited("APEI work queued but not completed");
+ pr_warn_ratelimited("APEI work queued but not completed\n");
err = -EINPROGRESS;
}
}
@@ -423,107 +438,23 @@ void arch_reserve_mem_area(acpi_physical_address addr, size_t size)
memblock_mark_nomap(addr, size);
}
-#ifdef CONFIG_ACPI_FFH
-/*
- * Implements ARM64 specific callbacks to support ACPI FFH Operation Region as
- * specified in https://developer.arm.com/docs/den0048/latest
- */
-struct acpi_ffh_data {
- struct acpi_ffh_info info;
- void (*invoke_ffh_fn)(unsigned long a0, unsigned long a1,
- unsigned long a2, unsigned long a3,
- unsigned long a4, unsigned long a5,
- unsigned long a6, unsigned long a7,
- struct arm_smccc_res *args,
- struct arm_smccc_quirk *res);
- void (*invoke_ffh64_fn)(const struct arm_smccc_1_2_regs *args,
- struct arm_smccc_1_2_regs *res);
-};
-
-int acpi_ffh_address_space_arch_setup(void *handler_ctxt, void **region_ctxt)
+#ifdef CONFIG_ACPI_HOTPLUG_CPU
+int acpi_map_cpu(acpi_handle handle, phys_cpuid_t physid, u32 apci_id,
+ int *pcpu)
{
- enum arm_smccc_conduit conduit;
- struct acpi_ffh_data *ffh_ctxt;
-
- if (arm_smccc_get_version() < ARM_SMCCC_VERSION_1_2)
- return -EOPNOTSUPP;
-
- conduit = arm_smccc_1_1_get_conduit();
- if (conduit == SMCCC_CONDUIT_NONE) {
- pr_err("%s: invalid SMCCC conduit\n", __func__);
- return -EOPNOTSUPP;
+ /* If an error code is passed in this stub can't fix it */
+ if (*pcpu < 0) {
+ pr_warn_once("Unable to map CPU to valid ID\n");
+ return *pcpu;
}
- ffh_ctxt = kzalloc(sizeof(*ffh_ctxt), GFP_KERNEL);
- if (!ffh_ctxt)
- return -ENOMEM;
-
- if (conduit == SMCCC_CONDUIT_SMC) {
- ffh_ctxt->invoke_ffh_fn = __arm_smccc_smc;
- ffh_ctxt->invoke_ffh64_fn = arm_smccc_1_2_smc;
- } else {
- ffh_ctxt->invoke_ffh_fn = __arm_smccc_hvc;
- ffh_ctxt->invoke_ffh64_fn = arm_smccc_1_2_hvc;
- }
-
- memcpy(ffh_ctxt, handler_ctxt, sizeof(ffh_ctxt->info));
-
- *region_ctxt = ffh_ctxt;
- return AE_OK;
-}
-
-static bool acpi_ffh_smccc_owner_allowed(u32 fid)
-{
- int owner = ARM_SMCCC_OWNER_NUM(fid);
-
- if (owner == ARM_SMCCC_OWNER_STANDARD ||
- owner == ARM_SMCCC_OWNER_SIP || owner == ARM_SMCCC_OWNER_OEM)
- return true;
-
- return false;
+ return 0;
}
+EXPORT_SYMBOL(acpi_map_cpu);
-int acpi_ffh_address_space_arch_handler(acpi_integer *value, void *region_context)
+int acpi_unmap_cpu(int cpu)
{
- int ret = 0;
- struct acpi_ffh_data *ffh_ctxt = region_context;
-
- if (ffh_ctxt->info.offset == 0) {
- /* SMC/HVC 32bit call */
- struct arm_smccc_res res;
- u32 a[8] = { 0 }, *ptr = (u32 *)value;
-
- if (!ARM_SMCCC_IS_FAST_CALL(*ptr) || ARM_SMCCC_IS_64(*ptr) ||
- !acpi_ffh_smccc_owner_allowed(*ptr) ||
- ffh_ctxt->info.length > 32) {
- ret = AE_ERROR;
- } else {
- int idx, len = ffh_ctxt->info.length >> 2;
-
- for (idx = 0; idx < len; idx++)
- a[idx] = *(ptr + idx);
-
- ffh_ctxt->invoke_ffh_fn(a[0], a[1], a[2], a[3], a[4],
- a[5], a[6], a[7], &res, NULL);
- memcpy(value, &res, sizeof(res));
- }
-
- } else if (ffh_ctxt->info.offset == 1) {
- /* SMC/HVC 64bit call */
- struct arm_smccc_1_2_regs *r = (struct arm_smccc_1_2_regs *)value;
-
- if (!ARM_SMCCC_IS_FAST_CALL(r->a0) || !ARM_SMCCC_IS_64(r->a0) ||
- !acpi_ffh_smccc_owner_allowed(r->a0) ||
- ffh_ctxt->info.length > sizeof(*r)) {
- ret = AE_ERROR;
- } else {
- ffh_ctxt->invoke_ffh64_fn(r, r);
- memcpy(value, r, ffh_ctxt->info.length);
- }
- } else {
- ret = AE_ERROR;
- }
-
- return ret;
+ return 0;
}
-#endif /* CONFIG_ACPI_FFH */
+EXPORT_SYMBOL(acpi_unmap_cpu);
+#endif /* CONFIG_ACPI_HOTPLUG_CPU */
diff --git a/arch/arm64/kernel/acpi_numa.c b/arch/arm64/kernel/acpi_numa.c
index e51535a5f939..2465f291c7e1 100644
--- a/arch/arm64/kernel/acpi_numa.c
+++ b/arch/arm64/kernel/acpi_numa.c
@@ -27,24 +27,13 @@
#include <asm/numa.h>
-static int acpi_early_node_map[NR_CPUS] __initdata = { NUMA_NO_NODE };
+static int acpi_early_node_map[NR_CPUS] __initdata = { [0 ... NR_CPUS - 1] = NUMA_NO_NODE };
int __init acpi_numa_get_nid(unsigned int cpu)
{
return acpi_early_node_map[cpu];
}
-static inline int get_cpu_for_acpi_id(u32 uid)
-{
- int cpu;
-
- for (cpu = 0; cpu < nr_cpu_ids; cpu++)
- if (uid == get_acpi_id_for_cpu(cpu))
- return cpu;
-
- return -EINVAL;
-}
-
static int __init acpi_parse_gicc_pxm(union acpi_subtable_headers *header,
const unsigned long end)
{
diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c
index 8ff6610af496..f5ec7e7c1d3f 100644
--- a/arch/arm64/kernel/alternative.c
+++ b/arch/arm64/kernel/alternative.c
@@ -139,9 +139,9 @@ static noinstr void clean_dcache_range_nopatch(u64 start, u64 end)
} while (cur += d_size, cur < end);
}
-static void __apply_alternatives(const struct alt_region *region,
- bool is_module,
- unsigned long *cpucap_mask)
+static int __apply_alternatives(const struct alt_region *region,
+ bool is_module,
+ unsigned long *cpucap_mask)
{
struct alt_instr *alt;
__le32 *origptr, *updptr;
@@ -166,10 +166,13 @@ static void __apply_alternatives(const struct alt_region *region,
updptr = is_module ? origptr : lm_alias(origptr);
nr_inst = alt->orig_len / AARCH64_INSN_SIZE;
- if (ALT_HAS_CB(alt))
+ if (ALT_HAS_CB(alt)) {
alt_cb = ALT_REPL_PTR(alt);
- else
+ if (is_module && !core_kernel_text((unsigned long)alt_cb))
+ return -ENOEXEC;
+ } else {
alt_cb = patch_alternative;
+ }
alt_cb(alt, origptr, updptr, nr_inst);
@@ -193,6 +196,8 @@ static void __apply_alternatives(const struct alt_region *region,
bitmap_and(applied_alternatives, applied_alternatives,
system_cpucaps, ARM64_NCAPS);
}
+
+ return 0;
}
static void __init apply_alternatives_vdso(void)
@@ -277,7 +282,7 @@ void __init apply_boot_alternatives(void)
}
#ifdef CONFIG_MODULES
-void apply_alternatives_module(void *start, size_t length)
+int apply_alternatives_module(void *start, size_t length)
{
struct alt_region region = {
.begin = start,
@@ -287,7 +292,7 @@ void apply_alternatives_module(void *start, size_t length)
bitmap_fill(all_capabilities, ARM64_NCAPS);
- __apply_alternatives(&region, true, &all_capabilities[0]);
+ return __apply_alternatives(&region, true, &all_capabilities[0]);
}
#endif
diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c
index b776e7424fe9..e737c6295ec7 100644
--- a/arch/arm64/kernel/armv8_deprecated.c
+++ b/arch/arm64/kernel/armv8_deprecated.c
@@ -507,7 +507,7 @@ static int update_insn_emulation_mode(struct insn_emulation *insn,
return ret;
}
-static int emulation_proc_handler(struct ctl_table *table, int write,
+static int emulation_proc_handler(const struct ctl_table *table, int write,
void *buffer, size_t *lenp,
loff_t *ppos)
{
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 81496083c041..b6367ff3a49c 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -6,21 +6,19 @@
* 2001-2002 Keith Owens
* Copyright (C) 2012 ARM Ltd.
*/
+#define COMPILE_OFFSETS
#include <linux/arm_sdei.h>
#include <linux/sched.h>
#include <linux/ftrace.h>
#include <linux/kexec.h>
#include <linux/mm.h>
-#include <linux/dma-mapping.h>
#include <linux/kvm_host.h>
-#include <linux/preempt.h>
#include <linux/suspend.h>
#include <asm/cpufeature.h>
#include <asm/fixmap.h>
#include <asm/thread_info.h>
#include <asm/memory.h>
-#include <asm/signal32.h>
#include <asm/smp_plat.h>
#include <asm/suspend.h>
#include <linux/kbuild.h>
@@ -28,8 +26,6 @@
int main(void)
{
- DEFINE(TSK_ACTIVE_MM, offsetof(struct task_struct, active_mm));
- BLANK();
DEFINE(TSK_TI_CPU, offsetof(struct task_struct, thread_info.cpu));
DEFINE(TSK_TI_FLAGS, offsetof(struct task_struct, thread_info.flags));
DEFINE(TSK_TI_PREEMPT, offsetof(struct task_struct, thread_info.preempt_count));
@@ -79,45 +75,27 @@ int main(void)
DEFINE(S_PSTATE, offsetof(struct pt_regs, pstate));
DEFINE(S_SYSCALLNO, offsetof(struct pt_regs, syscallno));
DEFINE(S_SDEI_TTBR1, offsetof(struct pt_regs, sdei_ttbr1));
- DEFINE(S_PMR_SAVE, offsetof(struct pt_regs, pmr_save));
+ DEFINE(S_PMR, offsetof(struct pt_regs, pmr));
DEFINE(S_STACKFRAME, offsetof(struct pt_regs, stackframe));
+ DEFINE(S_STACKFRAME_TYPE, offsetof(struct pt_regs, stackframe.type));
DEFINE(PT_REGS_SIZE, sizeof(struct pt_regs));
BLANK();
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_ARGS
- DEFINE(FREGS_X0, offsetof(struct ftrace_regs, regs[0]));
- DEFINE(FREGS_X2, offsetof(struct ftrace_regs, regs[2]));
- DEFINE(FREGS_X4, offsetof(struct ftrace_regs, regs[4]));
- DEFINE(FREGS_X6, offsetof(struct ftrace_regs, regs[6]));
- DEFINE(FREGS_X8, offsetof(struct ftrace_regs, regs[8]));
- DEFINE(FREGS_FP, offsetof(struct ftrace_regs, fp));
- DEFINE(FREGS_LR, offsetof(struct ftrace_regs, lr));
- DEFINE(FREGS_SP, offsetof(struct ftrace_regs, sp));
- DEFINE(FREGS_PC, offsetof(struct ftrace_regs, pc));
+ DEFINE(FREGS_X0, offsetof(struct __arch_ftrace_regs, regs[0]));
+ DEFINE(FREGS_X2, offsetof(struct __arch_ftrace_regs, regs[2]));
+ DEFINE(FREGS_X4, offsetof(struct __arch_ftrace_regs, regs[4]));
+ DEFINE(FREGS_X6, offsetof(struct __arch_ftrace_regs, regs[6]));
+ DEFINE(FREGS_X8, offsetof(struct __arch_ftrace_regs, regs[8]));
+ DEFINE(FREGS_FP, offsetof(struct __arch_ftrace_regs, fp));
+ DEFINE(FREGS_LR, offsetof(struct __arch_ftrace_regs, lr));
+ DEFINE(FREGS_SP, offsetof(struct __arch_ftrace_regs, sp));
+ DEFINE(FREGS_PC, offsetof(struct __arch_ftrace_regs, pc));
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
- DEFINE(FREGS_DIRECT_TRAMP, offsetof(struct ftrace_regs, direct_tramp));
-#endif
- DEFINE(FREGS_SIZE, sizeof(struct ftrace_regs));
- BLANK();
+ DEFINE(FREGS_DIRECT_TRAMP, offsetof(struct __arch_ftrace_regs, direct_tramp));
#endif
-#ifdef CONFIG_COMPAT
- DEFINE(COMPAT_SIGFRAME_REGS_OFFSET, offsetof(struct compat_sigframe, uc.uc_mcontext.arm_r0));
- DEFINE(COMPAT_RT_SIGFRAME_REGS_OFFSET, offsetof(struct compat_rt_sigframe, sig.uc.uc_mcontext.arm_r0));
+ DEFINE(FREGS_SIZE, sizeof(struct __arch_ftrace_regs));
BLANK();
#endif
- DEFINE(MM_CONTEXT_ID, offsetof(struct mm_struct, context.id.counter));
- BLANK();
- DEFINE(VMA_VM_MM, offsetof(struct vm_area_struct, vm_mm));
- DEFINE(VMA_VM_FLAGS, offsetof(struct vm_area_struct, vm_flags));
- BLANK();
- DEFINE(VM_EXEC, VM_EXEC);
- BLANK();
- DEFINE(PAGE_SZ, PAGE_SIZE);
- BLANK();
- DEFINE(DMA_TO_DEVICE, DMA_TO_DEVICE);
- DEFINE(DMA_FROM_DEVICE, DMA_FROM_DEVICE);
- BLANK();
- DEFINE(PREEMPT_DISABLE_OFFSET, PREEMPT_DISABLE_OFFSET);
- BLANK();
DEFINE(CPU_BOOT_TASK, offsetof(struct secondary_data, task));
BLANK();
DEFINE(FTR_OVR_VAL_OFFSET, offsetof(struct arm64_ftr_override, val));
@@ -128,6 +106,7 @@ int main(void)
DEFINE(VCPU_FAULT_DISR, offsetof(struct kvm_vcpu, arch.fault.disr_el1));
DEFINE(VCPU_HCR_EL2, offsetof(struct kvm_vcpu, arch.hcr_el2));
DEFINE(CPU_USER_PT_REGS, offsetof(struct kvm_cpu_context, regs));
+ DEFINE(CPU_ELR_EL2, offsetof(struct kvm_cpu_context, sys_regs[ELR_EL2]));
DEFINE(CPU_RGSR_EL1, offsetof(struct kvm_cpu_context, sys_regs[RGSR_EL1]));
DEFINE(CPU_GCR_EL1, offsetof(struct kvm_cpu_context, sys_regs[GCR_EL1]));
DEFINE(CPU_APIAKEYLO_EL1, offsetof(struct kvm_cpu_context, sys_regs[APIAKEYLO_EL1]));
@@ -145,6 +124,7 @@ int main(void)
DEFINE(NVHE_INIT_HCR_EL2, offsetof(struct kvm_nvhe_init_params, hcr_el2));
DEFINE(NVHE_INIT_VTTBR, offsetof(struct kvm_nvhe_init_params, vttbr));
DEFINE(NVHE_INIT_VTCR, offsetof(struct kvm_nvhe_init_params, vtcr));
+ DEFINE(NVHE_INIT_TMP, offsetof(struct kvm_nvhe_init_params, tmp));
#endif
#ifdef CONFIG_CPU_PM
DEFINE(CPU_CTX_SP, offsetof(struct cpu_suspend_ctx, sp));
@@ -200,20 +180,10 @@ int main(void)
DEFINE(FTRACE_OPS_FUNC, offsetof(struct ftrace_ops, func));
#endif
BLANK();
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
- DEFINE(FGRET_REGS_X0, offsetof(struct fgraph_ret_regs, regs[0]));
- DEFINE(FGRET_REGS_X1, offsetof(struct fgraph_ret_regs, regs[1]));
- DEFINE(FGRET_REGS_X2, offsetof(struct fgraph_ret_regs, regs[2]));
- DEFINE(FGRET_REGS_X3, offsetof(struct fgraph_ret_regs, regs[3]));
- DEFINE(FGRET_REGS_X4, offsetof(struct fgraph_ret_regs, regs[4]));
- DEFINE(FGRET_REGS_X5, offsetof(struct fgraph_ret_regs, regs[5]));
- DEFINE(FGRET_REGS_X6, offsetof(struct fgraph_ret_regs, regs[6]));
- DEFINE(FGRET_REGS_X7, offsetof(struct fgraph_ret_regs, regs[7]));
- DEFINE(FGRET_REGS_FP, offsetof(struct fgraph_ret_regs, fp));
- DEFINE(FGRET_REGS_SIZE, sizeof(struct fgraph_ret_regs));
-#endif
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
DEFINE(FTRACE_OPS_DIRECT_CALL, offsetof(struct ftrace_ops, direct_call));
#endif
+ DEFINE(PIE_E0_ASM, PIE_E0);
+ DEFINE(PIE_E1_ASM, PIE_E1);
return 0;
}
diff --git a/arch/arm64/kernel/cacheinfo.c b/arch/arm64/kernel/cacheinfo.c
index d9c9218fa1fd..309942b06c5b 100644
--- a/arch/arm64/kernel/cacheinfo.c
+++ b/arch/arm64/kernel/cacheinfo.c
@@ -101,16 +101,18 @@ int populate_cache_leaves(unsigned int cpu)
unsigned int level, idx;
enum cache_type type;
struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
- struct cacheinfo *this_leaf = this_cpu_ci->info_list;
+ struct cacheinfo *infos = this_cpu_ci->info_list;
for (idx = 0, level = 1; level <= this_cpu_ci->num_levels &&
- idx < this_cpu_ci->num_leaves; idx++, level++) {
+ idx < this_cpu_ci->num_leaves; level++) {
type = get_cache_type(level);
if (type == CACHE_TYPE_SEPARATE) {
- ci_leaf_init(this_leaf++, CACHE_TYPE_DATA, level);
- ci_leaf_init(this_leaf++, CACHE_TYPE_INST, level);
+ if (idx + 1 >= this_cpu_ci->num_leaves)
+ break;
+ ci_leaf_init(&infos[idx++], CACHE_TYPE_DATA, level);
+ ci_leaf_init(&infos[idx++], CACHE_TYPE_INST, level);
} else {
- ci_leaf_init(this_leaf++, type, level);
+ ci_leaf_init(&infos[idx++], type, level);
}
}
return 0;
diff --git a/arch/arm64/kernel/compat_alignment.c b/arch/arm64/kernel/compat_alignment.c
index deff21bfa680..b68e1d328d4c 100644
--- a/arch/arm64/kernel/compat_alignment.c
+++ b/arch/arm64/kernel/compat_alignment.c
@@ -368,6 +368,8 @@ int do_compat_alignment_fixup(unsigned long addr, struct pt_regs *regs)
return 1;
}
+ if (!handler)
+ return 1;
type = handler(addr, instr, regs);
if (type == TYPE_ERROR || type == TYPE_FAULT)
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index 828be635e7e1..8cb3b575a031 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -14,31 +14,85 @@
#include <asm/kvm_asm.h>
#include <asm/smp_plat.h>
+static u64 target_impl_cpu_num;
+static struct target_impl_cpu *target_impl_cpus;
+
+bool cpu_errata_set_target_impl(u64 num, void *impl_cpus)
+{
+ if (target_impl_cpu_num || !num || !impl_cpus)
+ return false;
+
+ target_impl_cpu_num = num;
+ target_impl_cpus = impl_cpus;
+ return true;
+}
+
+static inline bool is_midr_in_range(struct midr_range const *range)
+{
+ int i;
+
+ if (!target_impl_cpu_num)
+ return midr_is_cpu_model_range(read_cpuid_id(), range->model,
+ range->rv_min, range->rv_max);
+
+ for (i = 0; i < target_impl_cpu_num; i++) {
+ if (midr_is_cpu_model_range(target_impl_cpus[i].midr,
+ range->model,
+ range->rv_min, range->rv_max))
+ return true;
+ }
+ return false;
+}
+
+bool is_midr_in_range_list(struct midr_range const *ranges)
+{
+ while (ranges->model)
+ if (is_midr_in_range(ranges++))
+ return true;
+ return false;
+}
+EXPORT_SYMBOL_GPL(is_midr_in_range_list);
+
static bool __maybe_unused
-is_affected_midr_range(const struct arm64_cpu_capabilities *entry, int scope)
+__is_affected_midr_range(const struct arm64_cpu_capabilities *entry,
+ u32 midr, u32 revidr)
{
const struct arm64_midr_revidr *fix;
- u32 midr = read_cpuid_id(), revidr;
-
- WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible());
- if (!is_midr_in_range(midr, &entry->midr_range))
+ if (!is_midr_in_range(&entry->midr_range))
return false;
midr &= MIDR_REVISION_MASK | MIDR_VARIANT_MASK;
- revidr = read_cpuid(REVIDR_EL1);
for (fix = entry->fixed_revs; fix && fix->revidr_mask; fix++)
if (midr == fix->midr_rv && (revidr & fix->revidr_mask))
return false;
-
return true;
}
static bool __maybe_unused
+is_affected_midr_range(const struct arm64_cpu_capabilities *entry, int scope)
+{
+ int i;
+
+ if (!target_impl_cpu_num) {
+ WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible());
+ return __is_affected_midr_range(entry, read_cpuid_id(),
+ read_cpuid(REVIDR_EL1));
+ }
+
+ for (i = 0; i < target_impl_cpu_num; i++) {
+ if (__is_affected_midr_range(entry, target_impl_cpus[i].midr,
+ target_impl_cpus[i].midr))
+ return true;
+ }
+ return false;
+}
+
+static bool __maybe_unused
is_affected_midr_range_list(const struct arm64_cpu_capabilities *entry,
int scope)
{
WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible());
- return is_midr_in_range_list(read_cpuid_id(), entry->midr_range_list);
+ return is_midr_in_range_list(entry->midr_range_list);
}
static bool __maybe_unused
@@ -186,12 +240,48 @@ static bool __maybe_unused
has_neoverse_n1_erratum_1542419(const struct arm64_cpu_capabilities *entry,
int scope)
{
- u32 midr = read_cpuid_id();
bool has_dic = read_cpuid_cachetype() & BIT(CTR_EL0_DIC_SHIFT);
const struct midr_range range = MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N1);
WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible());
- return is_midr_in_range(midr, &range) && has_dic;
+ return is_midr_in_range(&range) && has_dic;
+}
+
+static const struct midr_range impdef_pmuv3_cpus[] = {
+ MIDR_ALL_VERSIONS(MIDR_APPLE_M1_ICESTORM),
+ MIDR_ALL_VERSIONS(MIDR_APPLE_M1_FIRESTORM),
+ MIDR_ALL_VERSIONS(MIDR_APPLE_M1_ICESTORM_PRO),
+ MIDR_ALL_VERSIONS(MIDR_APPLE_M1_FIRESTORM_PRO),
+ MIDR_ALL_VERSIONS(MIDR_APPLE_M1_ICESTORM_MAX),
+ MIDR_ALL_VERSIONS(MIDR_APPLE_M1_FIRESTORM_MAX),
+ MIDR_ALL_VERSIONS(MIDR_APPLE_M2_BLIZZARD),
+ MIDR_ALL_VERSIONS(MIDR_APPLE_M2_AVALANCHE),
+ MIDR_ALL_VERSIONS(MIDR_APPLE_M2_BLIZZARD_PRO),
+ MIDR_ALL_VERSIONS(MIDR_APPLE_M2_AVALANCHE_PRO),
+ MIDR_ALL_VERSIONS(MIDR_APPLE_M2_BLIZZARD_MAX),
+ MIDR_ALL_VERSIONS(MIDR_APPLE_M2_AVALANCHE_MAX),
+ {},
+};
+
+static bool has_impdef_pmuv3(const struct arm64_cpu_capabilities *entry, int scope)
+{
+ u64 dfr0 = read_sanitised_ftr_reg(SYS_ID_AA64DFR0_EL1);
+ unsigned int pmuver;
+
+ if (!is_kernel_in_hyp_mode())
+ return false;
+
+ pmuver = cpuid_feature_extract_unsigned_field(dfr0,
+ ID_AA64DFR0_EL1_PMUVer_SHIFT);
+ if (pmuver != ID_AA64DFR0_EL1_PMUVer_IMP_DEF)
+ return false;
+
+ return is_midr_in_range_list(impdef_pmuv3_cpus);
+}
+
+static void cpu_enable_impdef_pmuv3_traps(const struct arm64_cpu_capabilities *__unused)
+{
+ sysreg_clear_set_s(SYS_HACR_EL2, 0, BIT(56));
}
#ifdef CONFIG_ARM64_WORKAROUND_REPEAT_TLBI
@@ -245,7 +335,7 @@ static const struct midr_range cavium_erratum_23154_cpus[] = {
#endif
#ifdef CONFIG_CAVIUM_ERRATUM_27456
-const struct midr_range cavium_erratum_27456_cpus[] = {
+static const struct midr_range cavium_erratum_27456_cpus[] = {
/* Cavium ThunderX, T88 pass 1.x - 2.1 */
MIDR_RANGE(MIDR_THUNDERX, 0, 0, 1, 1),
/* Cavium ThunderX, T81 pass 1.0 */
@@ -432,18 +522,50 @@ static const struct midr_range erratum_spec_unpriv_load_list[] = {
};
#endif
-#ifdef CONFIG_ARM64_WORKAROUND_SPECULATIVE_SSBS
-static const struct midr_range erratum_spec_ssbs_list[] = {
#ifdef CONFIG_ARM64_ERRATUM_3194386
+static const struct midr_range erratum_spec_ssbs_list[] = {
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A76),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A77),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A78),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A78C),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A710),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A715),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A720),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A720AE),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A725),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_X1),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_X1C),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_X2),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_X3),
MIDR_ALL_VERSIONS(MIDR_CORTEX_X4),
-#endif
-#ifdef CONFIG_ARM64_ERRATUM_3312417
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_X925),
+ MIDR_ALL_VERSIONS(MIDR_MICROSOFT_AZURE_COBALT_100),
+ MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N1),
+ MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2),
+ MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N3),
+ MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V1),
+ MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V2),
MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V3),
-#endif
+ MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V3AE),
{}
};
#endif
+#ifdef CONFIG_AMPERE_ERRATUM_AC03_CPU_38
+static const struct midr_range erratum_ac03_cpu_38_list[] = {
+ MIDR_ALL_VERSIONS(MIDR_AMPERE1),
+ MIDR_ALL_VERSIONS(MIDR_AMPERE1A),
+ {},
+};
+#endif
+
+#ifdef CONFIG_AMPERE_ERRATUM_AC04_CPU_23
+static const struct midr_range erratum_ac04_cpu_23_list[] = {
+ MIDR_ALL_VERSIONS(MIDR_AMPERE1A),
+ {},
+};
+#endif
+
const struct arm64_cpu_capabilities arm64_errata[] = {
#ifdef CONFIG_ARM64_WORKAROUND_CLEAN_CACHE
{
@@ -741,9 +863,9 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
MIDR_FIXED(MIDR_CPU_VAR_REV(1,1), BIT(25)),
},
#endif
-#ifdef CONFIG_ARM64_WORKAROUND_SPECULATIVE_SSBS
+#ifdef CONFIG_ARM64_ERRATUM_3194386
{
- .desc = "ARM errata 3194386, 3312417",
+ .desc = "SSBS not fully self-synchronizing",
.capability = ARM64_WORKAROUND_SPECULATIVE_SSBS,
ERRATA_MIDR_RANGE_LIST(erratum_spec_ssbs_list),
},
@@ -760,9 +882,31 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
{
.desc = "AmpereOne erratum AC03_CPU_38",
.capability = ARM64_WORKAROUND_AMPERE_AC03_CPU_38,
- ERRATA_MIDR_ALL_VERSIONS(MIDR_AMPERE1),
+ ERRATA_MIDR_RANGE_LIST(erratum_ac03_cpu_38_list),
},
#endif
+#ifdef CONFIG_AMPERE_ERRATUM_AC04_CPU_23
+ {
+ .desc = "AmpereOne erratum AC04_CPU_23",
+ .capability = ARM64_WORKAROUND_AMPERE_AC04_CPU_23,
+ ERRATA_MIDR_RANGE_LIST(erratum_ac04_cpu_23_list),
+ },
+#endif
+ {
+ .desc = "Broken CNTVOFF_EL2",
+ .capability = ARM64_WORKAROUND_QCOM_ORYON_CNTVOFF,
+ ERRATA_MIDR_RANGE_LIST(((const struct midr_range[]) {
+ MIDR_ALL_VERSIONS(MIDR_QCOM_ORYON_X1),
+ {}
+ })),
+ },
+ {
+ .desc = "Apple IMPDEF PMUv3 Traps",
+ .capability = ARM64_WORKAROUND_PMUV3_IMPDEF_TRAPS,
+ .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM,
+ .matches = has_impdef_pmuv3,
+ .cpu_enable = cpu_enable_impdef_pmuv3_traps,
+ },
{
}
};
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 48e7029f1054..c840a93b9ef9 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -75,6 +75,7 @@
#include <linux/cpu.h>
#include <linux/kasan.h>
#include <linux/percpu.h>
+#include <linux/sched/isolation.h>
#include <asm/cpu.h>
#include <asm/cpufeature.h>
@@ -83,8 +84,10 @@
#include <asm/hwcap.h>
#include <asm/insn.h>
#include <asm/kvm_host.h>
+#include <asm/mmu.h>
#include <asm/mmu_context.h>
#include <asm/mte.h>
+#include <asm/hypervisor.h>
#include <asm/processor.h>
#include <asm/smp.h>
#include <asm/sysreg.h>
@@ -92,6 +95,7 @@
#include <asm/vectors.h>
#include <asm/virt.h>
+#include <asm/spectre.h>
/* Kernel representation of AT_HWCAP and AT_HWCAP2 */
static DECLARE_BITMAP(elf_hwcap, MAX_CPU_FEATURES) __read_mostly;
@@ -103,6 +107,7 @@ static DECLARE_BITMAP(elf_hwcap, MAX_CPU_FEATURES) __read_mostly;
COMPAT_HWCAP_LPAE)
unsigned int compat_elf_hwcap __read_mostly = COMPAT_ELF_HWCAP_DEFAULT;
unsigned int compat_elf_hwcap2 __read_mostly;
+unsigned int compat_elf_hwcap3 __read_mostly;
#endif
DECLARE_BITMAP(system_cpucaps, ARM64_NCAPS);
@@ -111,7 +116,14 @@ static struct arm64_cpu_capabilities const __ro_after_init *cpucap_ptrs[ARM64_NC
DECLARE_BITMAP(boot_cpucaps, ARM64_NCAPS);
-bool arm64_use_ng_mappings = false;
+/*
+ * arm64_use_ng_mappings must be placed in the .data section, otherwise it
+ * ends up in the .bss section where it is initialized in early_map_kernel()
+ * after the MMU (with the idmap) was enabled. create_init_idmap() - which
+ * runs before early_map_kernel() and reads the variable via PTE_MAYBE_NG -
+ * may end up generating an incorrect idmap page table attributes.
+ */
+bool arm64_use_ng_mappings __read_mostly = false;
EXPORT_SYMBOL(arm64_use_ng_mappings);
DEFINE_PER_CPU_READ_MOSTLY(const char *, this_cpu_vector) = vectors;
@@ -228,6 +240,7 @@ static const struct arm64_ftr_bits ftr_id_aa64isar0[] = {
};
static const struct arm64_ftr_bits ftr_id_aa64isar1[] = {
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_XS_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_I8MM_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_DGH_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_BF16_SHIFT, 4, 0),
@@ -266,6 +279,8 @@ static const struct arm64_ftr_bits ftr_id_aa64isar2[] = {
};
static const struct arm64_ftr_bits ftr_id_aa64isar3[] = {
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64ISAR3_EL1_FPRCVT_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64ISAR3_EL1_LSFE_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64ISAR3_EL1_FAMINMAX_SHIFT, 4, 0),
ARM64_FTR_END,
};
@@ -285,12 +300,16 @@ static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = {
S_ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_FP_SHIFT, 4, ID_AA64PFR0_EL1_FP_NI),
ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_EL3_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_EL2_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_EL1_SHIFT, 4, ID_AA64PFR0_EL1_ELx_64BIT_ONLY),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_EL0_SHIFT, 4, ID_AA64PFR0_EL1_ELx_64BIT_ONLY),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_EL1_SHIFT, 4, ID_AA64PFR0_EL1_EL1_IMP),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_EL0_SHIFT, 4, ID_AA64PFR0_EL1_EL0_IMP),
ARM64_FTR_END,
};
static const struct arm64_ftr_bits ftr_id_aa64pfr1[] = {
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_EL1_DF2_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_GCS),
+ FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_EL1_GCS_SHIFT, 4, 0),
+ S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_EL1_MTE_frac_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_EL1_SME_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_EL1_MPAM_frac_SHIFT, 4, 0),
@@ -305,6 +324,8 @@ static const struct arm64_ftr_bits ftr_id_aa64pfr1[] = {
static const struct arm64_ftr_bits ftr_id_aa64pfr2[] = {
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR2_EL1_FPMR_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR2_EL1_MTEFAR_SHIFT, 4, ID_AA64PFR2_EL1_MTEFAR_NI),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR2_EL1_MTESTOREONLY_SHIFT, 4, ID_AA64PFR2_EL1_MTESTOREONLY_NI),
ARM64_FTR_END,
};
@@ -314,6 +335,8 @@ static const struct arm64_ftr_bits ftr_id_aa64zfr0[] = {
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_EL1_F32MM_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
+ FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_EL1_F16MM_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_EL1_I8MM_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_EL1_SM4_SHIFT, 4, 0),
@@ -326,6 +349,8 @@ static const struct arm64_ftr_bits ftr_id_aa64zfr0[] = {
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_EL1_BitPerm_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
+ FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_EL1_EltPerm_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_EL1_AES_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_EL1_SVEver_SHIFT, 4, 0),
@@ -369,6 +394,16 @@ static const struct arm64_ftr_bits ftr_id_aa64smfr0[] = {
FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_SF8DP4_SHIFT, 1, 0),
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_SF8DP2_SHIFT, 1, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
+ FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_SBitPerm_SHIFT, 1, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
+ FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_AES_SHIFT, 1, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
+ FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_SFEXPA_SHIFT, 1, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
+ FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_STMOP_SHIFT, 1, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
+ FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_SMOP4_SHIFT, 1, 0),
ARM64_FTR_END,
};
@@ -377,6 +412,8 @@ static const struct arm64_ftr_bits ftr_id_aa64fpfr0[] = {
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64FPFR0_EL1_F8FMA_SHIFT, 1, 0),
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64FPFR0_EL1_F8DP4_SHIFT, 1, 0),
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64FPFR0_EL1_F8DP2_SHIFT, 1, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64FPFR0_EL1_F8MM8_SHIFT, 1, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64FPFR0_EL1_F8MM4_SHIFT, 1, 0),
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64FPFR0_EL1_F8E4M3_SHIFT, 1, 0),
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64FPFR0_EL1_F8E5M2_SHIFT, 1, 0),
ARM64_FTR_END,
@@ -429,6 +466,7 @@ static const struct arm64_ftr_bits ftr_id_aa64mmfr0[] = {
};
static const struct arm64_ftr_bits ftr_id_aa64mmfr1[] = {
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_ECBHB_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_TIDCP1_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_AFP_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_HCX_SHIFT, 4, 0),
@@ -465,13 +503,17 @@ static const struct arm64_ftr_bits ftr_id_aa64mmfr2[] = {
};
static const struct arm64_ftr_bits ftr_id_aa64mmfr3[] = {
+ ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_POE),
+ FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR3_EL1_S1POE_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR3_EL1_S1PIE_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR3_EL1_SCTLRX_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR3_EL1_TCRX_SHIFT, 4, 0),
ARM64_FTR_END,
};
static const struct arm64_ftr_bits ftr_id_aa64mmfr4[] = {
S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR4_EL1_E2H0_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR4_EL1_NV_frac_SHIFT, 4, 0),
ARM64_FTR_END,
};
@@ -681,6 +723,14 @@ static const struct arm64_ftr_bits ftr_id_dfr1[] = {
ARM64_FTR_END,
};
+static const struct arm64_ftr_bits ftr_mpamidr[] = {
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, MPAMIDR_EL1_PMG_MAX_SHIFT, MPAMIDR_EL1_PMG_MAX_WIDTH, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, MPAMIDR_EL1_VPMR_MAX_SHIFT, MPAMIDR_EL1_VPMR_MAX_WIDTH, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, MPAMIDR_EL1_HAS_HCR_SHIFT, 1, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, MPAMIDR_EL1_PARTID_MAX_SHIFT, MPAMIDR_EL1_PARTID_MAX_WIDTH, 0),
+ ARM64_FTR_END,
+};
+
/*
* Common ftr bits for a 32bit register with all hidden, strict
* attributes, with 4bit feature fields and a default safe value of
@@ -723,17 +773,17 @@ static const struct arm64_ftr_bits ftr_raz[] = {
#define ARM64_FTR_REG(id, table) \
__ARM64_FTR_REG_OVERRIDE(#id, id, table, &no_override)
-struct arm64_ftr_override id_aa64mmfr0_override;
-struct arm64_ftr_override id_aa64mmfr1_override;
-struct arm64_ftr_override id_aa64mmfr2_override;
-struct arm64_ftr_override id_aa64pfr0_override;
-struct arm64_ftr_override id_aa64pfr1_override;
-struct arm64_ftr_override id_aa64zfr0_override;
-struct arm64_ftr_override id_aa64smfr0_override;
-struct arm64_ftr_override id_aa64isar1_override;
-struct arm64_ftr_override id_aa64isar2_override;
+struct arm64_ftr_override __read_mostly id_aa64mmfr0_override;
+struct arm64_ftr_override __read_mostly id_aa64mmfr1_override;
+struct arm64_ftr_override __read_mostly id_aa64mmfr2_override;
+struct arm64_ftr_override __read_mostly id_aa64pfr0_override;
+struct arm64_ftr_override __read_mostly id_aa64pfr1_override;
+struct arm64_ftr_override __read_mostly id_aa64zfr0_override;
+struct arm64_ftr_override __read_mostly id_aa64smfr0_override;
+struct arm64_ftr_override __read_mostly id_aa64isar1_override;
+struct arm64_ftr_override __read_mostly id_aa64isar2_override;
-struct arm64_ftr_override arm64_sw_feature_override;
+struct arm64_ftr_override __read_mostly arm64_sw_feature_override;
static const struct __ftr_reg_entry {
u32 sys_id;
@@ -801,6 +851,9 @@ static const struct __ftr_reg_entry {
ARM64_FTR_REG(SYS_ID_AA64MMFR3_EL1, ftr_id_aa64mmfr3),
ARM64_FTR_REG(SYS_ID_AA64MMFR4_EL1, ftr_id_aa64mmfr4),
+ /* Op1 = 0, CRn = 10, CRm = 4 */
+ ARM64_FTR_REG(SYS_MPAMIDR_EL1, ftr_mpamidr),
+
/* Op1 = 1, CRn = 0, CRm = 0 */
ARM64_FTR_REG(SYS_GMID_EL1, ftr_gmid),
@@ -950,7 +1003,7 @@ static void __init sort_ftr_regs(void)
/*
* Initialise the CPU feature register from Boot CPU values.
- * Also initiliases the strict_mask for the register.
+ * Also initialises the strict_mask for the register.
* Any bits that are not covered by an arm64_ftr_bits entry are considered
* RES0 for the system-wide value, and must strictly match.
*/
@@ -986,17 +1039,16 @@ static void init_cpu_ftr_reg(u32 sys_reg, u64 new)
/* Override was valid */
ftr_new = tmp;
str = "forced";
- } else if (ftr_ovr == tmp) {
+ } else {
/* Override was the safe value */
str = "already set";
}
- if (str)
- pr_warn("%s[%d:%d]: %s to %llx\n",
- reg->name,
- ftrp->shift + ftrp->width - 1,
- ftrp->shift, str,
- tmp & (BIT(ftrp->width) - 1));
+ pr_warn("%s[%d:%d]: %s to %llx\n",
+ reg->name,
+ ftrp->shift + ftrp->width - 1,
+ ftrp->shift, str,
+ tmp & (BIT(ftrp->width) - 1));
} else if ((ftr_mask & reg->override->val) == ftr_mask) {
reg->override->val &= ~ftr_mask;
pr_warn("%s[%d:%d]: impossible override, ignored\n",
@@ -1149,17 +1201,16 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info)
id_aa64pfr1_sme(read_sanitised_ftr_reg(SYS_ID_AA64PFR1_EL1))) {
unsigned long cpacr = cpacr_save_enable_kernel_sme();
- /*
- * We mask out SMPS since even if the hardware
- * supports priorities the kernel does not at present
- * and we block access to them.
- */
- info->reg_smidr = read_cpuid(SMIDR_EL1) & ~SMIDR_EL1_SMPS;
vec_init_vq_map(ARM64_VEC_SME);
cpacr_restore(cpacr);
}
+ if (id_aa64pfr0_mpam(read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1))) {
+ info->reg_mpamidr = read_cpuid(MPAMIDR_EL1);
+ init_cpu_ftr_reg(SYS_MPAMIDR_EL1, info->reg_mpamidr);
+ }
+
if (id_aa64pfr1_mte(info->reg_id_aa64pfr1))
init_cpu_ftr_reg(SYS_GMID_EL1, info->reg_gmid);
}
@@ -1369,6 +1420,8 @@ void update_cpu_features(int cpu,
info->reg_id_aa64mmfr2, boot->reg_id_aa64mmfr2);
taint |= check_update_ftr_reg(SYS_ID_AA64MMFR3_EL1, cpu,
info->reg_id_aa64mmfr3, boot->reg_id_aa64mmfr3);
+ taint |= check_update_ftr_reg(SYS_ID_AA64MMFR4_EL1, cpu,
+ info->reg_id_aa64mmfr4, boot->reg_id_aa64mmfr4);
taint |= check_update_ftr_reg(SYS_ID_AA64PFR0_EL1, cpu,
info->reg_id_aa64pfr0, boot->reg_id_aa64pfr0);
@@ -1402,13 +1455,6 @@ void update_cpu_features(int cpu,
id_aa64pfr1_sme(read_sanitised_ftr_reg(SYS_ID_AA64PFR1_EL1))) {
unsigned long cpacr = cpacr_save_enable_kernel_sme();
- /*
- * We mask out SMPS since even if the hardware
- * supports priorities the kernel does not at present
- * and we block access to them.
- */
- info->reg_smidr = read_cpuid(SMIDR_EL1) & ~SMIDR_EL1_SMPS;
-
/* Probe vector lengths */
if (!system_capabilities_finalized())
vec_update_vq_map(ARM64_VEC_SME);
@@ -1416,6 +1462,12 @@ void update_cpu_features(int cpu,
cpacr_restore(cpacr);
}
+ if (id_aa64pfr0_mpam(read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1))) {
+ info->reg_mpamidr = read_cpuid(MPAMIDR_EL1);
+ taint |= check_update_ftr_reg(SYS_MPAMIDR_EL1, cpu,
+ info->reg_mpamidr, boot->reg_mpamidr);
+ }
+
/*
* The kernel uses the LDGM/STGM instructions and the number of tags
* they read/write depends on the GMID_EL1.BS field. Check that the
@@ -1615,6 +1667,11 @@ const struct cpumask *system_32bit_el0_cpumask(void)
return cpu_possible_mask;
}
+const struct cpumask *task_cpu_fallback_mask(struct task_struct *p)
+{
+ return __task_cpu_possible_mask(p, housekeeping_cpumask(HK_TYPE_TICK));
+}
+
static int __init parse_32bit_el0_param(char *str)
{
allow_mismatched_32bit_el0 = true;
@@ -1757,7 +1814,7 @@ static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry,
char const *str = "kpti command line option";
bool meltdown_safe;
- meltdown_safe = is_midr_in_range_list(read_cpuid_id(), kpti_safe_list);
+ meltdown_safe = is_midr_in_range_list(kpti_safe_list);
/* Defer to CPU feature registers */
if (has_cpuid_feature(entry, scope))
@@ -1827,7 +1884,7 @@ static bool has_nv1(const struct arm64_cpu_capabilities *entry, int scope)
return (__system_matches_cap(ARM64_HAS_NESTED_VIRT) &&
!(has_cpuid_feature(entry, scope) ||
- is_midr_in_range_list(read_cpuid_id(), nv1_ni_list)));
+ is_midr_in_range_list(nv1_ni_list)));
}
#if defined(ID_AA64MMFR0_EL1_TGRAN_LPA2) && defined(ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_LPA2)
@@ -1863,102 +1920,27 @@ static bool has_lpa2(const struct arm64_cpu_capabilities *entry, int scope)
}
#endif
-#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
-#define KPTI_NG_TEMP_VA (-(1UL << PMD_SHIFT))
-
-extern
-void create_kpti_ng_temp_pgd(pgd_t *pgdir, phys_addr_t phys, unsigned long virt,
- phys_addr_t size, pgprot_t prot,
- phys_addr_t (*pgtable_alloc)(int), int flags);
-
-static phys_addr_t __initdata kpti_ng_temp_alloc;
-
-static phys_addr_t __init kpti_ng_pgd_alloc(int shift)
-{
- kpti_ng_temp_alloc -= PAGE_SIZE;
- return kpti_ng_temp_alloc;
-}
-
-static int __init __kpti_install_ng_mappings(void *__unused)
-{
- typedef void (kpti_remap_fn)(int, int, phys_addr_t, unsigned long);
- extern kpti_remap_fn idmap_kpti_install_ng_mappings;
- kpti_remap_fn *remap_fn;
-
- int cpu = smp_processor_id();
- int levels = CONFIG_PGTABLE_LEVELS;
- int order = order_base_2(levels);
- u64 kpti_ng_temp_pgd_pa = 0;
- pgd_t *kpti_ng_temp_pgd;
- u64 alloc = 0;
-
- if (levels == 5 && !pgtable_l5_enabled())
- levels = 4;
- else if (levels == 4 && !pgtable_l4_enabled())
- levels = 3;
-
- remap_fn = (void *)__pa_symbol(idmap_kpti_install_ng_mappings);
-
- if (!cpu) {
- alloc = __get_free_pages(GFP_ATOMIC | __GFP_ZERO, order);
- kpti_ng_temp_pgd = (pgd_t *)(alloc + (levels - 1) * PAGE_SIZE);
- kpti_ng_temp_alloc = kpti_ng_temp_pgd_pa = __pa(kpti_ng_temp_pgd);
-
- //
- // Create a minimal page table hierarchy that permits us to map
- // the swapper page tables temporarily as we traverse them.
- //
- // The physical pages are laid out as follows:
- //
- // +--------+-/-------+-/------ +-/------ +-\\\--------+
- // : PTE[] : | PMD[] : | PUD[] : | P4D[] : ||| PGD[] :
- // +--------+-\-------+-\------ +-\------ +-///--------+
- // ^
- // The first page is mapped into this hierarchy at a PMD_SHIFT
- // aligned virtual address, so that we can manipulate the PTE
- // level entries while the mapping is active. The first entry
- // covers the PTE[] page itself, the remaining entries are free
- // to be used as a ad-hoc fixmap.
- //
- create_kpti_ng_temp_pgd(kpti_ng_temp_pgd, __pa(alloc),
- KPTI_NG_TEMP_VA, PAGE_SIZE, PAGE_KERNEL,
- kpti_ng_pgd_alloc, 0);
- }
-
- cpu_install_idmap();
- remap_fn(cpu, num_online_cpus(), kpti_ng_temp_pgd_pa, KPTI_NG_TEMP_VA);
- cpu_uninstall_idmap();
-
- if (!cpu) {
- free_pages(alloc, order);
- arm64_use_ng_mappings = true;
- }
-
- return 0;
-}
-
-static void __init kpti_install_ng_mappings(void)
+#ifdef CONFIG_HW_PERF_EVENTS
+static bool has_pmuv3(const struct arm64_cpu_capabilities *entry, int scope)
{
- /* Check whether KPTI is going to be used */
- if (!arm64_kernel_unmapped_at_el0())
- return;
+ u64 dfr0 = read_sanitised_ftr_reg(SYS_ID_AA64DFR0_EL1);
+ unsigned int pmuver;
/*
- * We don't need to rewrite the page-tables if either we've done
- * it already or we have KASLR enabled and therefore have not
- * created any global mappings at all.
+ * PMUVer follows the standard ID scheme for an unsigned field with the
+ * exception of 0xF (IMP_DEF) which is treated specially and implies
+ * FEAT_PMUv3 is not implemented.
+ *
+ * See DDI0487L.a D24.1.3.2 for more details.
*/
- if (arm64_use_ng_mappings)
- return;
-
- stop_machine(__kpti_install_ng_mappings, NULL, cpu_online_mask);
-}
+ pmuver = cpuid_feature_extract_unsigned_field(dfr0,
+ ID_AA64DFR0_EL1_PMUVer_SHIFT);
+ if (pmuver == ID_AA64DFR0_EL1_PMUVer_IMP_DEF)
+ return false;
-#else
-static inline void kpti_install_ng_mappings(void)
-{
+ return pmuver >= ID_AA64DFR0_EL1_PMUVer_IMP;
}
-#endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */
+#endif
static void cpu_enable_kpti(struct arm64_cpu_capabilities const *cap)
{
@@ -1988,7 +1970,7 @@ static struct cpumask dbm_cpus __read_mostly;
static inline void __cpu_enable_hw_dbm(void)
{
- u64 tcr = read_sysreg(tcr_el1) | TCR_HD;
+ u64 tcr = read_sysreg(tcr_el1) | TCR_EL1_HD;
write_sysreg(tcr, tcr_el1);
isb();
@@ -2010,7 +1992,7 @@ static bool cpu_has_broken_dbm(void)
{},
};
- return is_midr_in_range_list(read_cpuid_id(), cpus);
+ return is_midr_in_range_list(cpus);
}
static bool cpu_can_use_dbm(const struct arm64_cpu_capabilities *cap)
@@ -2127,7 +2109,7 @@ static bool has_nested_virt_support(const struct arm64_cpu_capabilities *cap,
if (kvm_get_mode() != KVM_MODE_NV)
return false;
- if (!has_cpuid_feature(cap, scope)) {
+ if (!cpucap_multi_entry_cap_matches(cap, scope)) {
pr_warn("unavailable: %s\n", cap->desc);
return false;
}
@@ -2141,6 +2123,47 @@ static bool hvhe_possible(const struct arm64_cpu_capabilities *entry,
return arm64_test_sw_feature_override(ARM64_SW_FEATURE_OVERRIDE_HVHE);
}
+bool cpu_supports_bbml2_noabort(void)
+{
+ /*
+ * We want to allow usage of BBML2 in as wide a range of kernel contexts
+ * as possible. This list is therefore an allow-list of known-good
+ * implementations that both support BBML2 and additionally, fulfill the
+ * extra constraint of never generating TLB conflict aborts when using
+ * the relaxed BBML2 semantics (such aborts make use of BBML2 in certain
+ * kernel contexts difficult to prove safe against recursive aborts).
+ *
+ * Note that implementations can only be considered "known-good" if their
+ * implementors attest to the fact that the implementation never raises
+ * TLB conflict aborts for BBML2 mapping granularity changes.
+ */
+ static const struct midr_range supports_bbml2_noabort_list[] = {
+ MIDR_REV_RANGE(MIDR_CORTEX_X4, 0, 3, 0xf),
+ MIDR_REV_RANGE(MIDR_NEOVERSE_V3, 0, 2, 0xf),
+ MIDR_REV_RANGE(MIDR_NEOVERSE_V3AE, 0, 2, 0xf),
+ MIDR_ALL_VERSIONS(MIDR_NVIDIA_OLYMPUS),
+ MIDR_ALL_VERSIONS(MIDR_AMPERE1),
+ MIDR_ALL_VERSIONS(MIDR_AMPERE1A),
+ {}
+ };
+
+ /* Does our cpu guarantee to never raise TLB conflict aborts? */
+ if (!is_midr_in_range_list(supports_bbml2_noabort_list))
+ return false;
+
+ /*
+ * We currently ignore the ID_AA64MMFR2_EL1 register, and only care
+ * about whether the MIDR check passes.
+ */
+
+ return true;
+}
+
+static bool has_bbml2_noabort(const struct arm64_cpu_capabilities *caps, int scope)
+{
+ return cpu_supports_bbml2_noabort();
+}
+
#ifdef CONFIG_ARM64_PAN
static void cpu_enable_pan(const struct arm64_cpu_capabilities *__unused)
{
@@ -2161,6 +2184,24 @@ static void cpu_clear_disr(const struct arm64_cpu_capabilities *__unused)
/* Firmware may have left a deferred SError in this register. */
write_sysreg_s(0, SYS_DISR_EL1);
}
+static bool has_rasv1p1(const struct arm64_cpu_capabilities *__unused, int scope)
+{
+ const struct arm64_cpu_capabilities rasv1p1_caps[] = {
+ {
+ ARM64_CPUID_FIELDS(ID_AA64PFR0_EL1, RAS, V1P1)
+ },
+ {
+ ARM64_CPUID_FIELDS(ID_AA64PFR0_EL1, RAS, IMP)
+ },
+ {
+ ARM64_CPUID_FIELDS(ID_AA64PFR1_EL1, RAS_frac, RASv1p1)
+ },
+ };
+
+ return (has_cpuid_feature(&rasv1p1_caps[0], scope) ||
+ (has_cpuid_feature(&rasv1p1_caps[1], scope) &&
+ has_cpuid_feature(&rasv1p1_caps[2], scope)));
+}
#endif /* CONFIG_ARM64_RAS_EXTN */
#ifdef CONFIG_ARM64_PTR_AUTH
@@ -2215,7 +2256,7 @@ static bool has_generic_auth(const struct arm64_cpu_capabilities *entry,
static void cpu_enable_e0pd(struct arm64_cpu_capabilities const *cap)
{
if (this_cpu_has_cap(ARM64_HAS_E0PD))
- sysreg_clear_set(tcr_el1, 0, TCR_E0PD1);
+ sysreg_clear_set(tcr_el1, 0, TCR_EL1_E0PD1);
}
#endif /* CONFIG_ARM64_E0PD */
@@ -2224,11 +2265,11 @@ static bool can_use_gic_priorities(const struct arm64_cpu_capabilities *entry,
int scope)
{
/*
- * ARM64_HAS_GIC_CPUIF_SYSREGS has a lower index, and is a boot CPU
+ * ARM64_HAS_GICV3_CPUIF has a lower index, and is a boot CPU
* feature, so will be detected earlier.
*/
- BUILD_BUG_ON(ARM64_HAS_GIC_PRIO_MASKING <= ARM64_HAS_GIC_CPUIF_SYSREGS);
- if (!cpus_have_cap(ARM64_HAS_GIC_CPUIF_SYSREGS))
+ BUILD_BUG_ON(ARM64_HAS_GIC_PRIO_MASKING <= ARM64_HAS_GICV3_CPUIF);
+ if (!cpus_have_cap(ARM64_HAS_GICV3_CPUIF))
return false;
return enable_pseudo_nmi;
@@ -2263,6 +2304,49 @@ static bool has_gic_prio_relaxed_sync(const struct arm64_cpu_capabilities *entry
}
#endif
+static bool can_trap_icv_dir_el1(const struct arm64_cpu_capabilities *entry,
+ int scope)
+{
+ static const struct midr_range has_vgic_v3[] = {
+ MIDR_ALL_VERSIONS(MIDR_APPLE_M1_ICESTORM),
+ MIDR_ALL_VERSIONS(MIDR_APPLE_M1_FIRESTORM),
+ MIDR_ALL_VERSIONS(MIDR_APPLE_M1_ICESTORM_PRO),
+ MIDR_ALL_VERSIONS(MIDR_APPLE_M1_FIRESTORM_PRO),
+ MIDR_ALL_VERSIONS(MIDR_APPLE_M1_ICESTORM_MAX),
+ MIDR_ALL_VERSIONS(MIDR_APPLE_M1_FIRESTORM_MAX),
+ MIDR_ALL_VERSIONS(MIDR_APPLE_M2_BLIZZARD),
+ MIDR_ALL_VERSIONS(MIDR_APPLE_M2_AVALANCHE),
+ MIDR_ALL_VERSIONS(MIDR_APPLE_M2_BLIZZARD_PRO),
+ MIDR_ALL_VERSIONS(MIDR_APPLE_M2_AVALANCHE_PRO),
+ MIDR_ALL_VERSIONS(MIDR_APPLE_M2_BLIZZARD_MAX),
+ MIDR_ALL_VERSIONS(MIDR_APPLE_M2_AVALANCHE_MAX),
+ {},
+ };
+ struct arm_smccc_res res = {};
+
+ BUILD_BUG_ON(ARM64_HAS_ICH_HCR_EL2_TDIR <= ARM64_HAS_GICV3_CPUIF);
+ BUILD_BUG_ON(ARM64_HAS_ICH_HCR_EL2_TDIR <= ARM64_HAS_GICV5_LEGACY);
+ if (!this_cpu_has_cap(ARM64_HAS_GICV3_CPUIF) &&
+ !is_midr_in_range_list(has_vgic_v3))
+ return false;
+
+ if (!is_hyp_mode_available())
+ return false;
+
+ if (this_cpu_has_cap(ARM64_HAS_GICV5_LEGACY))
+ return true;
+
+ if (is_kernel_in_hyp_mode())
+ res.a1 = read_sysreg_s(SYS_ICH_VTR_EL2);
+ else
+ arm_smccc_1_1_hvc(HVC_GET_ICH_VTR_EL2, &res);
+
+ if (res.a0 == HVC_STUB_ERR)
+ return false;
+
+ return res.a1 & ICH_VTR_EL2_TDS;
+}
+
#ifdef CONFIG_ARM64_BTI
static void bti_enable(const struct arm64_cpu_capabilities *__unused)
{
@@ -2281,17 +2365,21 @@ static void bti_enable(const struct arm64_cpu_capabilities *__unused)
#ifdef CONFIG_ARM64_MTE
static void cpu_enable_mte(struct arm64_cpu_capabilities const *cap)
{
+ static bool cleared_zero_page = false;
+
sysreg_clear_set(sctlr_el1, 0, SCTLR_ELx_ATA | SCTLR_EL1_ATA0);
mte_cpu_setup();
/*
* Clear the tags in the zero page. This needs to be done via the
- * linear map which has the Tagged attribute.
+ * linear map which has the Tagged attribute. Since this page is
+ * always mapped as pte_special(), set_pte_at() will not attempt to
+ * clear the tags or set PG_mte_tagged.
*/
- if (try_page_mte_tagging(ZERO_PAGE(0))) {
+ if (!cleared_zero_page) {
+ cleared_zero_page = true;
mte_clear_page_tags(lm_alias(empty_zero_page));
- set_page_mte_tagged(ZERO_PAGE(0));
}
kasan_init_hw_tags_cpu();
@@ -2347,6 +2435,22 @@ static void cpu_enable_mops(const struct arm64_cpu_capabilities *__unused)
sysreg_clear_set(sctlr_el1, 0, SCTLR_EL1_MSCEn);
}
+#ifdef CONFIG_ARM64_POE
+static void cpu_enable_poe(const struct arm64_cpu_capabilities *__unused)
+{
+ sysreg_clear_set(REG_TCR2_EL1, 0, TCR2_EL1_E0POE);
+ sysreg_clear_set(CPACR_EL1, 0, CPACR_EL1_E0POE);
+}
+#endif
+
+#ifdef CONFIG_ARM64_GCS
+static void cpu_enable_gcs(const struct arm64_cpu_capabilities *__unused)
+{
+ /* GCSPR_EL0 is always readable */
+ write_sysreg_s(GCSCRE0_EL1_nTR, SYS_GCSCRE0_EL1);
+}
+#endif
+
/* Internal helper functions to match cpu capability type */
static bool
cpucap_late_cpu_optional(const struct arm64_cpu_capabilities *cap)
@@ -2366,6 +2470,45 @@ cpucap_panic_on_conflict(const struct arm64_cpu_capabilities *cap)
return !!(cap->type & ARM64_CPUCAP_PANIC_ON_CONFLICT);
}
+static bool
+test_has_mpam(const struct arm64_cpu_capabilities *entry, int scope)
+{
+ if (!has_cpuid_feature(entry, scope))
+ return false;
+
+ /* Check firmware actually enabled MPAM on this cpu. */
+ return (read_sysreg_s(SYS_MPAM1_EL1) & MPAM1_EL1_MPAMEN);
+}
+
+static void
+cpu_enable_mpam(const struct arm64_cpu_capabilities *entry)
+{
+ /*
+ * Access by the kernel (at EL1) should use the reserved PARTID
+ * which is configured unrestricted. This avoids priority-inversion
+ * where latency sensitive tasks have to wait for a task that has
+ * been throttled to release the lock.
+ */
+ write_sysreg_s(0, SYS_MPAM1_EL1);
+}
+
+static bool
+test_has_mpam_hcr(const struct arm64_cpu_capabilities *entry, int scope)
+{
+ u64 idr = read_sanitised_ftr_reg(SYS_MPAMIDR_EL1);
+
+ return idr & MPAMIDR_EL1_HAS_HCR;
+}
+
+static bool
+test_has_gicv5_legacy(const struct arm64_cpu_capabilities *entry, int scope)
+{
+ if (!this_cpu_has_cap(ARM64_HAS_GICV5_CPUIF))
+ return false;
+
+ return !!(read_sysreg_s(SYS_ICC_IDR0_EL1) & ICC_IDR0_EL1_GCIE_LEGACY);
+}
+
static const struct arm64_cpu_capabilities arm64_features[] = {
{
.capability = ARM64_ALWAYS_BOOT,
@@ -2378,8 +2521,8 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.matches = has_always,
},
{
- .desc = "GIC system register CPU interface",
- .capability = ARM64_HAS_GIC_CPUIF_SYSREGS,
+ .desc = "GICv3 CPU interface",
+ .capability = ARM64_HAS_GICV3_CPUIF,
.type = ARM64_CPUCAP_STRICT_BOOT_CPU_FEATURE,
.matches = has_useable_gicv3_cpuif,
ARM64_CPUID_FIELDS(ID_AA64PFR0_EL1, GIC, IMP)
@@ -2438,7 +2581,17 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.capability = ARM64_HAS_NESTED_VIRT,
.type = ARM64_CPUCAP_SYSTEM_FEATURE,
.matches = has_nested_virt_support,
- ARM64_CPUID_FIELDS(ID_AA64MMFR2_EL1, NV, NV2)
+ .match_list = (const struct arm64_cpu_capabilities []){
+ {
+ .matches = has_cpuid_feature,
+ ARM64_CPUID_FIELDS(ID_AA64MMFR2_EL1, NV, NV2)
+ },
+ {
+ .matches = has_cpuid_feature,
+ ARM64_CPUID_FIELDS(ID_AA64MMFR4_EL1, NV_frac, NV2_ONLY)
+ },
+ { /* Sentinel */ }
+ },
},
{
.capability = ARM64_HAS_32BIT_EL0_DO_NOT_USE,
@@ -2523,6 +2676,12 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.cpu_enable = cpu_clear_disr,
ARM64_CPUID_FIELDS(ID_AA64PFR0_EL1, RAS, IMP)
},
+ {
+ .desc = "RASv1p1 Extension Support",
+ .capability = ARM64_HAS_RASV1P1_EXTN,
+ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+ .matches = has_rasv1p1,
+ },
#endif /* CONFIG_ARM64_RAS_EXTN */
#ifdef CONFIG_ARM64_AMU_EXTN
{
@@ -2580,6 +2739,21 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
ARM64_CPUID_FIELDS(ID_AA64MMFR1_EL1, HAFDBS, DBM)
},
#endif
+#ifdef CONFIG_ARM64_HAFT
+ {
+ .desc = "Hardware managed Access Flag for Table Descriptors",
+ /*
+ * Contrary to the page/block access flag, the table access flag
+ * cannot be emulated in software (no access fault will occur).
+ * Therefore this should be used only if it's supported system
+ * wide.
+ */
+ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+ .capability = ARM64_HAFT,
+ .matches = has_cpuid_feature,
+ ARM64_CPUID_FIELDS(ID_AA64MMFR1_EL1, HAFDBS, HAFT)
+ },
+#endif
{
.desc = "CRC32 instructions",
.capability = ARM64_HAS_CRC32,
@@ -2684,6 +2858,15 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.matches = has_gic_prio_relaxed_sync,
},
#endif
+ {
+ /*
+ * Depends on having GICv3
+ */
+ .desc = "ICV_DIR_EL1 trapping",
+ .capability = ARM64_HAS_ICH_HCR_EL2_TDIR,
+ .type = ARM64_CPUCAP_EARLY_LOCAL_CPU_FEATURE,
+ .matches = can_trap_icv_dir_el1,
+ },
#ifdef CONFIG_ARM64_E0PD
{
.desc = "E0PD",
@@ -2731,6 +2914,20 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.matches = has_cpuid_feature,
ARM64_CPUID_FIELDS(ID_AA64PFR1_EL1, MTE, MTE3)
},
+ {
+ .desc = "FAR on MTE Tag Check Fault",
+ .capability = ARM64_MTE_FAR,
+ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+ .matches = has_cpuid_feature,
+ ARM64_CPUID_FIELDS(ID_AA64PFR2_EL1, MTEFAR, IMP)
+ },
+ {
+ .desc = "Store Only MTE Tag Check",
+ .capability = ARM64_MTE_STORE_ONLY,
+ .type = ARM64_CPUCAP_BOOT_CPU_FEATURE,
+ .matches = has_cpuid_feature,
+ ARM64_CPUID_FIELDS(ID_AA64PFR2_EL1, MTESTOREONLY, IMP)
+ },
#endif /* CONFIG_ARM64_MTE */
{
.desc = "RCpc load-acquire (LDAPR)",
@@ -2746,6 +2943,13 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.matches = has_cpuid_feature,
ARM64_CPUID_FIELDS(ID_AA64MMFR0_EL1, FGT, IMP)
},
+ {
+ .desc = "Fine Grained Traps 2",
+ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+ .capability = ARM64_HAS_FGT2,
+ .matches = has_cpuid_feature,
+ ARM64_CPUID_FIELDS(ID_AA64MMFR0_EL1, FGT, FGT2)
+ },
#ifdef CONFIG_ARM64_SME
{
.desc = "Scalable Matrix Extension",
@@ -2831,6 +3035,12 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
ARM64_CPUID_FIELDS(ID_AA64MMFR2_EL1, EVT, IMP)
},
{
+ .desc = "BBM Level 2 without TLB conflict abort",
+ .capability = ARM64_HAS_BBML2_NOABORT,
+ .type = ARM64_CPUCAP_EARLY_LOCAL_CPU_FEATURE,
+ .matches = has_bbml2_noabort,
+ },
+ {
.desc = "52-bit Virtual Addressing for KVM (LPA2)",
.capability = ARM64_HAS_LPA2,
.type = ARM64_CPUCAP_SYSTEM_FEATURE,
@@ -2863,12 +3073,81 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
},
#endif
{
+ .desc = "Memory Partitioning And Monitoring",
+ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+ .capability = ARM64_MPAM,
+ .matches = test_has_mpam,
+ .cpu_enable = cpu_enable_mpam,
+ ARM64_CPUID_FIELDS(ID_AA64PFR0_EL1, MPAM, 1)
+ },
+ {
+ .desc = "Memory Partitioning And Monitoring Virtualisation",
+ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+ .capability = ARM64_MPAM_HCR,
+ .matches = test_has_mpam_hcr,
+ },
+ {
.desc = "NV1",
.capability = ARM64_HAS_HCR_NV1,
.type = ARM64_CPUCAP_SYSTEM_FEATURE,
.matches = has_nv1,
ARM64_CPUID_FIELDS_NEG(ID_AA64MMFR4_EL1, E2H0, NI_NV1)
},
+#ifdef CONFIG_ARM64_POE
+ {
+ .desc = "Stage-1 Permission Overlay Extension (S1POE)",
+ .capability = ARM64_HAS_S1POE,
+ .type = ARM64_CPUCAP_BOOT_CPU_FEATURE,
+ .matches = has_cpuid_feature,
+ .cpu_enable = cpu_enable_poe,
+ ARM64_CPUID_FIELDS(ID_AA64MMFR3_EL1, S1POE, IMP)
+ },
+#endif
+#ifdef CONFIG_ARM64_GCS
+ {
+ .desc = "Guarded Control Stack (GCS)",
+ .capability = ARM64_HAS_GCS,
+ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+ .cpu_enable = cpu_enable_gcs,
+ .matches = has_cpuid_feature,
+ ARM64_CPUID_FIELDS(ID_AA64PFR1_EL1, GCS, IMP)
+ },
+#endif
+#ifdef CONFIG_HW_PERF_EVENTS
+ {
+ .desc = "PMUv3",
+ .capability = ARM64_HAS_PMUV3,
+ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+ .matches = has_pmuv3,
+ },
+#endif
+ {
+ .desc = "SCTLR2",
+ .capability = ARM64_HAS_SCTLR2,
+ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+ .matches = has_cpuid_feature,
+ ARM64_CPUID_FIELDS(ID_AA64MMFR3_EL1, SCTLRX, IMP)
+ },
+ {
+ .desc = "GICv5 CPU interface",
+ .type = ARM64_CPUCAP_STRICT_BOOT_CPU_FEATURE,
+ .capability = ARM64_HAS_GICV5_CPUIF,
+ .matches = has_cpuid_feature,
+ ARM64_CPUID_FIELDS(ID_AA64PFR2_EL1, GCIE, IMP)
+ },
+ {
+ .desc = "GICv5 Legacy vCPU interface",
+ .type = ARM64_CPUCAP_EARLY_LOCAL_CPU_FEATURE,
+ .capability = ARM64_HAS_GICV5_LEGACY,
+ .matches = test_has_gicv5_legacy,
+ },
+ {
+ .desc = "XNX",
+ .capability = ARM64_HAS_XNX,
+ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+ .matches = has_cpuid_feature,
+ ARM64_CPUID_FIELDS(ID_AA64MMFR1_EL1, XNX, IMP)
+ },
{},
};
@@ -2901,6 +3180,13 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.matches = match, \
}
+#define HWCAP_CAP_MATCH_ID(match, reg, field, min_value, cap_type, cap) \
+ { \
+ __HWCAP_CAP(#cap, cap_type, cap) \
+ HWCAP_CPUID_MATCH(reg, field, min_value) \
+ .matches = match, \
+ }
+
#ifdef CONFIG_ARM64_PTR_AUTH
static const struct arm64_cpu_capabilities ptr_auth_hwcap_addr_matches[] = {
{
@@ -2929,6 +3215,20 @@ static const struct arm64_cpu_capabilities ptr_auth_hwcap_gen_matches[] = {
};
#endif
+#ifdef CONFIG_ARM64_SVE
+static bool has_sve_feature(const struct arm64_cpu_capabilities *cap, int scope)
+{
+ return system_supports_sve() && has_user_cpuid_feature(cap, scope);
+}
+#endif
+
+#ifdef CONFIG_ARM64_SME
+static bool has_sme_feature(const struct arm64_cpu_capabilities *cap, int scope)
+{
+ return system_supports_sme() && has_user_cpuid_feature(cap, scope);
+}
+#endif
+
static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
HWCAP_CAP(ID_AA64ISAR0_EL1, AES, PMULL, CAP_HWCAP, KERNEL_HWCAP_PMULL),
HWCAP_CAP(ID_AA64ISAR0_EL1, AES, AES, CAP_HWCAP, KERNEL_HWCAP_AES),
@@ -2947,6 +3247,7 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
HWCAP_CAP(ID_AA64ISAR0_EL1, TS, FLAGM, CAP_HWCAP, KERNEL_HWCAP_FLAGM),
HWCAP_CAP(ID_AA64ISAR0_EL1, TS, FLAGM2, CAP_HWCAP, KERNEL_HWCAP_FLAGM2),
HWCAP_CAP(ID_AA64ISAR0_EL1, RNDR, IMP, CAP_HWCAP, KERNEL_HWCAP_RNG),
+ HWCAP_CAP(ID_AA64ISAR3_EL1, FPRCVT, IMP, CAP_HWCAP, KERNEL_HWCAP_FPRCVT),
HWCAP_CAP(ID_AA64PFR0_EL1, FP, IMP, CAP_HWCAP, KERNEL_HWCAP_FP),
HWCAP_CAP(ID_AA64PFR0_EL1, FP, FP16, CAP_HWCAP, KERNEL_HWCAP_FPHP),
HWCAP_CAP(ID_AA64PFR0_EL1, AdvSIMD, IMP, CAP_HWCAP, KERNEL_HWCAP_ASIMD),
@@ -2968,22 +3269,31 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
HWCAP_CAP(ID_AA64ISAR1_EL1, I8MM, IMP, CAP_HWCAP, KERNEL_HWCAP_I8MM),
HWCAP_CAP(ID_AA64ISAR2_EL1, LUT, IMP, CAP_HWCAP, KERNEL_HWCAP_LUT),
HWCAP_CAP(ID_AA64ISAR3_EL1, FAMINMAX, IMP, CAP_HWCAP, KERNEL_HWCAP_FAMINMAX),
+ HWCAP_CAP(ID_AA64ISAR3_EL1, LSFE, IMP, CAP_HWCAP, KERNEL_HWCAP_LSFE),
HWCAP_CAP(ID_AA64MMFR2_EL1, AT, IMP, CAP_HWCAP, KERNEL_HWCAP_USCAT),
#ifdef CONFIG_ARM64_SVE
HWCAP_CAP(ID_AA64PFR0_EL1, SVE, IMP, CAP_HWCAP, KERNEL_HWCAP_SVE),
- HWCAP_CAP(ID_AA64ZFR0_EL1, SVEver, SVE2p1, CAP_HWCAP, KERNEL_HWCAP_SVE2P1),
- HWCAP_CAP(ID_AA64ZFR0_EL1, SVEver, SVE2, CAP_HWCAP, KERNEL_HWCAP_SVE2),
- HWCAP_CAP(ID_AA64ZFR0_EL1, AES, IMP, CAP_HWCAP, KERNEL_HWCAP_SVEAES),
- HWCAP_CAP(ID_AA64ZFR0_EL1, AES, PMULL128, CAP_HWCAP, KERNEL_HWCAP_SVEPMULL),
- HWCAP_CAP(ID_AA64ZFR0_EL1, BitPerm, IMP, CAP_HWCAP, KERNEL_HWCAP_SVEBITPERM),
- HWCAP_CAP(ID_AA64ZFR0_EL1, B16B16, IMP, CAP_HWCAP, KERNEL_HWCAP_SVE_B16B16),
- HWCAP_CAP(ID_AA64ZFR0_EL1, BF16, IMP, CAP_HWCAP, KERNEL_HWCAP_SVEBF16),
- HWCAP_CAP(ID_AA64ZFR0_EL1, BF16, EBF16, CAP_HWCAP, KERNEL_HWCAP_SVE_EBF16),
- HWCAP_CAP(ID_AA64ZFR0_EL1, SHA3, IMP, CAP_HWCAP, KERNEL_HWCAP_SVESHA3),
- HWCAP_CAP(ID_AA64ZFR0_EL1, SM4, IMP, CAP_HWCAP, KERNEL_HWCAP_SVESM4),
- HWCAP_CAP(ID_AA64ZFR0_EL1, I8MM, IMP, CAP_HWCAP, KERNEL_HWCAP_SVEI8MM),
- HWCAP_CAP(ID_AA64ZFR0_EL1, F32MM, IMP, CAP_HWCAP, KERNEL_HWCAP_SVEF32MM),
- HWCAP_CAP(ID_AA64ZFR0_EL1, F64MM, IMP, CAP_HWCAP, KERNEL_HWCAP_SVEF64MM),
+ HWCAP_CAP_MATCH_ID(has_sve_feature, ID_AA64ZFR0_EL1, SVEver, SVE2p2, CAP_HWCAP, KERNEL_HWCAP_SVE2P2),
+ HWCAP_CAP_MATCH_ID(has_sve_feature, ID_AA64ZFR0_EL1, SVEver, SVE2p1, CAP_HWCAP, KERNEL_HWCAP_SVE2P1),
+ HWCAP_CAP_MATCH_ID(has_sve_feature, ID_AA64ZFR0_EL1, SVEver, SVE2, CAP_HWCAP, KERNEL_HWCAP_SVE2),
+ HWCAP_CAP_MATCH_ID(has_sve_feature, ID_AA64ZFR0_EL1, AES, IMP, CAP_HWCAP, KERNEL_HWCAP_SVEAES),
+ HWCAP_CAP_MATCH_ID(has_sve_feature, ID_AA64ZFR0_EL1, AES, PMULL128, CAP_HWCAP, KERNEL_HWCAP_SVEPMULL),
+ HWCAP_CAP_MATCH_ID(has_sve_feature, ID_AA64ZFR0_EL1, AES, AES2, CAP_HWCAP, KERNEL_HWCAP_SVE_AES2),
+ HWCAP_CAP_MATCH_ID(has_sve_feature, ID_AA64ZFR0_EL1, BitPerm, IMP, CAP_HWCAP, KERNEL_HWCAP_SVEBITPERM),
+ HWCAP_CAP_MATCH_ID(has_sve_feature, ID_AA64ZFR0_EL1, B16B16, IMP, CAP_HWCAP, KERNEL_HWCAP_SVE_B16B16),
+ HWCAP_CAP_MATCH_ID(has_sve_feature, ID_AA64ZFR0_EL1, B16B16, BFSCALE, CAP_HWCAP, KERNEL_HWCAP_SVE_BFSCALE),
+ HWCAP_CAP_MATCH_ID(has_sve_feature, ID_AA64ZFR0_EL1, BF16, IMP, CAP_HWCAP, KERNEL_HWCAP_SVEBF16),
+ HWCAP_CAP_MATCH_ID(has_sve_feature, ID_AA64ZFR0_EL1, BF16, EBF16, CAP_HWCAP, KERNEL_HWCAP_SVE_EBF16),
+ HWCAP_CAP_MATCH_ID(has_sve_feature, ID_AA64ZFR0_EL1, SHA3, IMP, CAP_HWCAP, KERNEL_HWCAP_SVESHA3),
+ HWCAP_CAP_MATCH_ID(has_sve_feature, ID_AA64ZFR0_EL1, SM4, IMP, CAP_HWCAP, KERNEL_HWCAP_SVESM4),
+ HWCAP_CAP_MATCH_ID(has_sve_feature, ID_AA64ZFR0_EL1, I8MM, IMP, CAP_HWCAP, KERNEL_HWCAP_SVEI8MM),
+ HWCAP_CAP_MATCH_ID(has_sve_feature, ID_AA64ZFR0_EL1, F32MM, IMP, CAP_HWCAP, KERNEL_HWCAP_SVEF32MM),
+ HWCAP_CAP_MATCH_ID(has_sve_feature, ID_AA64ZFR0_EL1, F64MM, IMP, CAP_HWCAP, KERNEL_HWCAP_SVEF64MM),
+ HWCAP_CAP_MATCH_ID(has_sve_feature, ID_AA64ZFR0_EL1, F16MM, IMP, CAP_HWCAP, KERNEL_HWCAP_SVE_F16MM),
+ HWCAP_CAP_MATCH_ID(has_sve_feature, ID_AA64ZFR0_EL1, EltPerm, IMP, CAP_HWCAP, KERNEL_HWCAP_SVE_ELTPERM),
+#endif
+#ifdef CONFIG_ARM64_GCS
+ HWCAP_CAP(ID_AA64PFR1_EL1, GCS, IMP, CAP_HWCAP, KERNEL_HWCAP_GCS),
#endif
HWCAP_CAP(ID_AA64PFR1_EL1, SSBS, SSBS2, CAP_HWCAP, KERNEL_HWCAP_SSBS),
#ifdef CONFIG_ARM64_BTI
@@ -2996,10 +3306,13 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
#ifdef CONFIG_ARM64_MTE
HWCAP_CAP(ID_AA64PFR1_EL1, MTE, MTE2, CAP_HWCAP, KERNEL_HWCAP_MTE),
HWCAP_CAP(ID_AA64PFR1_EL1, MTE, MTE3, CAP_HWCAP, KERNEL_HWCAP_MTE3),
+ HWCAP_CAP(ID_AA64PFR2_EL1, MTEFAR, IMP, CAP_HWCAP, KERNEL_HWCAP_MTE_FAR),
+ HWCAP_CAP(ID_AA64PFR2_EL1, MTESTOREONLY, IMP, CAP_HWCAP , KERNEL_HWCAP_MTE_STORE_ONLY),
#endif /* CONFIG_ARM64_MTE */
HWCAP_CAP(ID_AA64MMFR0_EL1, ECV, IMP, CAP_HWCAP, KERNEL_HWCAP_ECV),
HWCAP_CAP(ID_AA64MMFR1_EL1, AFP, IMP, CAP_HWCAP, KERNEL_HWCAP_AFP),
HWCAP_CAP(ID_AA64ISAR2_EL1, CSSC, IMP, CAP_HWCAP, KERNEL_HWCAP_CSSC),
+ HWCAP_CAP(ID_AA64ISAR2_EL1, CSSC, CMPBR, CAP_HWCAP, KERNEL_HWCAP_CMPBR),
HWCAP_CAP(ID_AA64ISAR2_EL1, RPRFM, IMP, CAP_HWCAP, KERNEL_HWCAP_RPRFM),
HWCAP_CAP(ID_AA64ISAR2_EL1, RPRES, IMP, CAP_HWCAP, KERNEL_HWCAP_RPRES),
HWCAP_CAP(ID_AA64ISAR2_EL1, WFxT, IMP, CAP_HWCAP, KERNEL_HWCAP_WFXT),
@@ -3007,32 +3320,43 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
HWCAP_CAP(ID_AA64ISAR2_EL1, BC, IMP, CAP_HWCAP, KERNEL_HWCAP_HBC),
#ifdef CONFIG_ARM64_SME
HWCAP_CAP(ID_AA64PFR1_EL1, SME, IMP, CAP_HWCAP, KERNEL_HWCAP_SME),
- HWCAP_CAP(ID_AA64SMFR0_EL1, FA64, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_FA64),
- HWCAP_CAP(ID_AA64SMFR0_EL1, LUTv2, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_LUTV2),
- HWCAP_CAP(ID_AA64SMFR0_EL1, SMEver, SME2p1, CAP_HWCAP, KERNEL_HWCAP_SME2P1),
- HWCAP_CAP(ID_AA64SMFR0_EL1, SMEver, SME2, CAP_HWCAP, KERNEL_HWCAP_SME2),
- HWCAP_CAP(ID_AA64SMFR0_EL1, I16I64, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_I16I64),
- HWCAP_CAP(ID_AA64SMFR0_EL1, F64F64, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F64F64),
- HWCAP_CAP(ID_AA64SMFR0_EL1, I16I32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_I16I32),
- HWCAP_CAP(ID_AA64SMFR0_EL1, B16B16, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_B16B16),
- HWCAP_CAP(ID_AA64SMFR0_EL1, F16F16, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F16F16),
- HWCAP_CAP(ID_AA64SMFR0_EL1, F8F16, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F8F16),
- HWCAP_CAP(ID_AA64SMFR0_EL1, F8F32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F8F32),
- HWCAP_CAP(ID_AA64SMFR0_EL1, I8I32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_I8I32),
- HWCAP_CAP(ID_AA64SMFR0_EL1, F16F32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F16F32),
- HWCAP_CAP(ID_AA64SMFR0_EL1, B16F32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_B16F32),
- HWCAP_CAP(ID_AA64SMFR0_EL1, BI32I32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_BI32I32),
- HWCAP_CAP(ID_AA64SMFR0_EL1, F32F32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F32F32),
- HWCAP_CAP(ID_AA64SMFR0_EL1, SF8FMA, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SF8FMA),
- HWCAP_CAP(ID_AA64SMFR0_EL1, SF8DP4, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SF8DP4),
- HWCAP_CAP(ID_AA64SMFR0_EL1, SF8DP2, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SF8DP2),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, FA64, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_FA64),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, LUTv2, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_LUTV2),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, SMEver, SME2p2, CAP_HWCAP, KERNEL_HWCAP_SME2P2),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, SMEver, SME2p1, CAP_HWCAP, KERNEL_HWCAP_SME2P1),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, SMEver, SME2, CAP_HWCAP, KERNEL_HWCAP_SME2),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, I16I64, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_I16I64),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, F64F64, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F64F64),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, I16I32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_I16I32),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, B16B16, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_B16B16),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, F16F16, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F16F16),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, F8F16, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F8F16),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, F8F32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F8F32),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, I8I32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_I8I32),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, F16F32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F16F32),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, B16F32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_B16F32),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, BI32I32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_BI32I32),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, F32F32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F32F32),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, SF8FMA, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SF8FMA),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, SF8DP4, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SF8DP4),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, SF8DP2, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SF8DP2),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, SBitPerm, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SBITPERM),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, AES, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_AES),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, SFEXPA, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SFEXPA),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, STMOP, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_STMOP),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, SMOP4, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SMOP4),
#endif /* CONFIG_ARM64_SME */
HWCAP_CAP(ID_AA64FPFR0_EL1, F8CVT, IMP, CAP_HWCAP, KERNEL_HWCAP_F8CVT),
HWCAP_CAP(ID_AA64FPFR0_EL1, F8FMA, IMP, CAP_HWCAP, KERNEL_HWCAP_F8FMA),
HWCAP_CAP(ID_AA64FPFR0_EL1, F8DP4, IMP, CAP_HWCAP, KERNEL_HWCAP_F8DP4),
HWCAP_CAP(ID_AA64FPFR0_EL1, F8DP2, IMP, CAP_HWCAP, KERNEL_HWCAP_F8DP2),
+ HWCAP_CAP(ID_AA64FPFR0_EL1, F8MM8, IMP, CAP_HWCAP, KERNEL_HWCAP_F8MM8),
+ HWCAP_CAP(ID_AA64FPFR0_EL1, F8MM4, IMP, CAP_HWCAP, KERNEL_HWCAP_F8MM4),
HWCAP_CAP(ID_AA64FPFR0_EL1, F8E4M3, IMP, CAP_HWCAP, KERNEL_HWCAP_F8E4M3),
HWCAP_CAP(ID_AA64FPFR0_EL1, F8E5M2, IMP, CAP_HWCAP, KERNEL_HWCAP_F8E5M2),
+#ifdef CONFIG_ARM64_POE
+ HWCAP_CAP(ID_AA64MMFR3_EL1, S1POE, IMP, CAP_HWCAP, KERNEL_HWCAP_POE),
+#endif
{},
};
@@ -3143,18 +3467,49 @@ static void update_cpu_capabilities(u16 scope_mask)
scope_mask &= ARM64_CPUCAP_SCOPE_MASK;
for (i = 0; i < ARM64_NCAPS; i++) {
+ bool match_all = false;
+ bool caps_set = false;
+ bool boot_cpu = false;
+
caps = cpucap_ptrs[i];
- if (!caps || !(caps->type & scope_mask) ||
- cpus_have_cap(caps->capability) ||
- !caps->matches(caps, cpucap_default_scope(caps)))
+ if (!caps || !(caps->type & scope_mask))
continue;
- if (caps->desc && !caps->cpus)
+ match_all = cpucap_match_all_early_cpus(caps);
+ caps_set = cpus_have_cap(caps->capability);
+ boot_cpu = scope_mask & SCOPE_BOOT_CPU;
+
+ /*
+ * Unless it's a match-all CPUs feature, avoid probing if
+ * already detected.
+ */
+ if (!match_all && caps_set)
+ continue;
+
+ /*
+ * A match-all CPUs capability is only set when probing the
+ * boot CPU. It may be cleared subsequently if not detected on
+ * secondary ones.
+ */
+ if (match_all && !caps_set && !boot_cpu)
+ continue;
+
+ if (!caps->matches(caps, cpucap_default_scope(caps))) {
+ if (match_all)
+ __clear_bit(caps->capability, system_cpucaps);
+ continue;
+ }
+
+ /*
+ * Match-all CPUs capabilities are logged later when the
+ * system capabilities are finalised.
+ */
+ if (!match_all && caps->desc && !caps->cpus)
pr_info("detected: %s\n", caps->desc);
__set_bit(caps->capability, system_cpucaps);
- if ((scope_mask & SCOPE_BOOT_CPU) && (caps->type & SCOPE_BOOT_CPU))
+ if (boot_cpu && (caps->type & SCOPE_BOOT_CPU))
set_bit(caps->capability, boot_cpucaps);
}
}
@@ -3351,7 +3706,7 @@ static void verify_hyp_capabilities(void)
return;
safe_mmfr1 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1);
- mmfr0 = read_cpuid(ID_AA64MMFR0_EL1);
+ mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
mmfr1 = read_cpuid(ID_AA64MMFR1_EL1);
/* Verify VMID bits */
@@ -3372,6 +3727,36 @@ static void verify_hyp_capabilities(void)
}
}
+static void verify_mpam_capabilities(void)
+{
+ u64 cpu_idr = read_cpuid(ID_AA64PFR0_EL1);
+ u64 sys_idr = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1);
+ u16 cpu_partid_max, cpu_pmg_max, sys_partid_max, sys_pmg_max;
+
+ if (FIELD_GET(ID_AA64PFR0_EL1_MPAM_MASK, cpu_idr) !=
+ FIELD_GET(ID_AA64PFR0_EL1_MPAM_MASK, sys_idr)) {
+ pr_crit("CPU%d: MPAM version mismatch\n", smp_processor_id());
+ cpu_die_early();
+ }
+
+ cpu_idr = read_cpuid(MPAMIDR_EL1);
+ sys_idr = read_sanitised_ftr_reg(SYS_MPAMIDR_EL1);
+ if (FIELD_GET(MPAMIDR_EL1_HAS_HCR, cpu_idr) !=
+ FIELD_GET(MPAMIDR_EL1_HAS_HCR, sys_idr)) {
+ pr_crit("CPU%d: Missing MPAM HCR\n", smp_processor_id());
+ cpu_die_early();
+ }
+
+ cpu_partid_max = FIELD_GET(MPAMIDR_EL1_PARTID_MAX, cpu_idr);
+ cpu_pmg_max = FIELD_GET(MPAMIDR_EL1_PMG_MAX, cpu_idr);
+ sys_partid_max = FIELD_GET(MPAMIDR_EL1_PARTID_MAX, sys_idr);
+ sys_pmg_max = FIELD_GET(MPAMIDR_EL1_PMG_MAX, sys_idr);
+ if (cpu_partid_max < sys_partid_max || cpu_pmg_max < sys_pmg_max) {
+ pr_crit("CPU%d: MPAM PARTID/PMG max values are mismatched\n", smp_processor_id());
+ cpu_die_early();
+ }
+}
+
/*
* Run through the enabled system capabilities and enable() it on this CPU.
* The capabilities were decided based on the available CPUs at the boot time.
@@ -3398,6 +3783,9 @@ static void verify_local_cpu_capabilities(void)
if (is_hyp_mode_available())
verify_hyp_capabilities();
+
+ if (system_supports_mpam())
+ verify_mpam_capabilities();
}
void check_local_cpu_capabilities(void)
@@ -3475,8 +3863,14 @@ unsigned long cpu_get_elf_hwcap2(void)
return elf_hwcap[1];
}
+unsigned long cpu_get_elf_hwcap3(void)
+{
+ return elf_hwcap[2];
+}
+
static void __init setup_boot_cpu_capabilities(void)
{
+ kvm_arm_target_impl_cpu_init();
/*
* The boot CPU's feature register values have been recorded. Detect
* boot cpucaps and local cpucaps for the boot CPU, then enable and
@@ -3516,17 +3910,24 @@ static void __init setup_system_capabilities(void)
enable_cpu_capabilities(SCOPE_ALL & ~SCOPE_BOOT_CPU);
apply_alternatives_all();
- /*
- * Log any cpucaps with a cpumask as these aren't logged by
- * update_cpu_capabilities().
- */
for (int i = 0; i < ARM64_NCAPS; i++) {
const struct arm64_cpu_capabilities *caps = cpucap_ptrs[i];
- if (caps && caps->cpus && caps->desc &&
- cpumask_any(caps->cpus) < nr_cpu_ids)
+ if (!caps || !caps->desc)
+ continue;
+
+ /*
+ * Log any cpucaps with a cpumask as these aren't logged by
+ * update_cpu_capabilities().
+ */
+ if (caps->cpus && cpumask_any(caps->cpus) < nr_cpu_ids)
pr_info("detected: %s on CPU%*pbl\n",
caps->desc, cpumask_pr_args(caps->cpus));
+
+ /* Log match-all CPUs capabilities */
+ if (cpucap_match_all_early_cpus(caps) &&
+ cpus_have_cap(caps->capability))
+ pr_info("detected: %s\n", caps->desc);
}
/*
@@ -3534,12 +3935,18 @@ static void __init setup_system_capabilities(void)
*/
if (system_uses_ttbr0_pan())
pr_info("emulated: Privileged Access Never (PAN) using TTBR0_EL1 switching\n");
+
+ /*
+ * Report Spectre mitigations status.
+ */
+ spectre_print_disabled_mitigations();
}
void __init setup_system_features(void)
{
setup_system_capabilities();
+ linear_map_maybe_split_to_ptes();
kpti_install_ng_mappings();
sve_setup();
@@ -3577,7 +3984,14 @@ static int enable_mismatched_32bit_el0(unsigned int cpu)
static int lucky_winner = -1;
struct cpuinfo_arm64 *info = &per_cpu(cpu_data, cpu);
- bool cpu_32bit = id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0);
+ bool cpu_32bit = false;
+
+ if (id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0)) {
+ if (!housekeeping_cpu(cpu, HK_TYPE_TICK))
+ pr_info("Treating adaptive-ticks CPU %u as 64-bit only\n", cpu);
+ else
+ cpu_32bit = true;
+ }
if (cpu_32bit) {
cpumask_set_cpu(cpu, cpu_32bit_el0_mask);
diff --git a/arch/arm64/kernel/cpuidle.c b/arch/arm64/kernel/cpuidle.c
deleted file mode 100644
index f372295207fb..000000000000
--- a/arch/arm64/kernel/cpuidle.c
+++ /dev/null
@@ -1,74 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * ARM64 CPU idle arch support
- *
- * Copyright (C) 2014 ARM Ltd.
- * Author: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
- */
-
-#include <linux/acpi.h>
-#include <linux/cpuidle.h>
-#include <linux/cpu_pm.h>
-#include <linux/psci.h>
-
-#ifdef CONFIG_ACPI_PROCESSOR_IDLE
-
-#include <acpi/processor.h>
-
-#define ARM64_LPI_IS_RETENTION_STATE(arch_flags) (!(arch_flags))
-
-static int psci_acpi_cpu_init_idle(unsigned int cpu)
-{
- int i, count;
- struct acpi_lpi_state *lpi;
- struct acpi_processor *pr = per_cpu(processors, cpu);
-
- if (unlikely(!pr || !pr->flags.has_lpi))
- return -EINVAL;
-
- /*
- * If the PSCI cpu_suspend function hook has not been initialized
- * idle states must not be enabled, so bail out
- */
- if (!psci_ops.cpu_suspend)
- return -EOPNOTSUPP;
-
- count = pr->power.count - 1;
- if (count <= 0)
- return -ENODEV;
-
- for (i = 0; i < count; i++) {
- u32 state;
-
- lpi = &pr->power.lpi_states[i + 1];
- /*
- * Only bits[31:0] represent a PSCI power_state while
- * bits[63:32] must be 0x0 as per ARM ACPI FFH Specification
- */
- state = lpi->address;
- if (!psci_power_state_is_valid(state)) {
- pr_warn("Invalid PSCI power state %#x\n", state);
- return -EINVAL;
- }
- }
-
- return 0;
-}
-
-int acpi_processor_ffh_lpi_probe(unsigned int cpu)
-{
- return psci_acpi_cpu_init_idle(cpu);
-}
-
-__cpuidle int acpi_processor_ffh_lpi_enter(struct acpi_lpi_state *lpi)
-{
- u32 state = lpi->address;
-
- if (ARM64_LPI_IS_RETENTION_STATE(lpi->arch_flags))
- return CPU_PM_CPU_IDLE_ENTER_RETENTION_PARAM_RCU(psci_cpu_suspend_enter,
- lpi->index, state);
- else
- return CPU_PM_CPU_IDLE_ENTER_PARAM_RCU(psci_cpu_suspend_enter,
- lpi->index, state);
-}
-#endif
diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c
index 09eeaa24d456..c44e6d94f5de 100644
--- a/arch/arm64/kernel/cpuinfo.c
+++ b/arch/arm64/kernel/cpuinfo.c
@@ -80,6 +80,7 @@ static const char *const hwcap_str[] = {
[KERNEL_HWCAP_SB] = "sb",
[KERNEL_HWCAP_PACA] = "paca",
[KERNEL_HWCAP_PACG] = "pacg",
+ [KERNEL_HWCAP_GCS] = "gcs",
[KERNEL_HWCAP_DCPODP] = "dcpodp",
[KERNEL_HWCAP_SVE2] = "sve2",
[KERNEL_HWCAP_SVEAES] = "sveaes",
@@ -143,6 +144,25 @@ static const char *const hwcap_str[] = {
[KERNEL_HWCAP_SME_SF8FMA] = "smesf8fma",
[KERNEL_HWCAP_SME_SF8DP4] = "smesf8dp4",
[KERNEL_HWCAP_SME_SF8DP2] = "smesf8dp2",
+ [KERNEL_HWCAP_POE] = "poe",
+ [KERNEL_HWCAP_CMPBR] = "cmpbr",
+ [KERNEL_HWCAP_FPRCVT] = "fprcvt",
+ [KERNEL_HWCAP_F8MM8] = "f8mm8",
+ [KERNEL_HWCAP_F8MM4] = "f8mm4",
+ [KERNEL_HWCAP_SVE_F16MM] = "svef16mm",
+ [KERNEL_HWCAP_SVE_ELTPERM] = "sveeltperm",
+ [KERNEL_HWCAP_SVE_AES2] = "sveaes2",
+ [KERNEL_HWCAP_SVE_BFSCALE] = "svebfscale",
+ [KERNEL_HWCAP_SVE2P2] = "sve2p2",
+ [KERNEL_HWCAP_SME2P2] = "sme2p2",
+ [KERNEL_HWCAP_SME_SBITPERM] = "smesbitperm",
+ [KERNEL_HWCAP_SME_AES] = "smeaes",
+ [KERNEL_HWCAP_SME_SFEXPA] = "smesfexpa",
+ [KERNEL_HWCAP_SME_STMOP] = "smestmop",
+ [KERNEL_HWCAP_SME_SMOP4] = "smesmop4",
+ [KERNEL_HWCAP_MTE_FAR] = "mtefar",
+ [KERNEL_HWCAP_MTE_STORE_ONLY] = "mtestoreonly",
+ [KERNEL_HWCAP_LSFE] = "lsfe",
};
#ifdef CONFIG_COMPAT
@@ -192,80 +212,79 @@ static const char *const compat_hwcap2_str[] = {
static int c_show(struct seq_file *m, void *v)
{
- int i, j;
+ int j;
+ int cpu = m->index;
bool compat = personality(current->personality) == PER_LINUX32;
+ struct cpuinfo_arm64 *cpuinfo = v;
+ u32 midr = cpuinfo->reg_midr;
- for_each_online_cpu(i) {
- struct cpuinfo_arm64 *cpuinfo = &per_cpu(cpu_data, i);
- u32 midr = cpuinfo->reg_midr;
-
- /*
- * glibc reads /proc/cpuinfo to determine the number of
- * online processors, looking for lines beginning with
- * "processor". Give glibc what it expects.
- */
- seq_printf(m, "processor\t: %d\n", i);
- if (compat)
- seq_printf(m, "model name\t: ARMv8 Processor rev %d (%s)\n",
- MIDR_REVISION(midr), COMPAT_ELF_PLATFORM);
+ /*
+ * glibc reads /proc/cpuinfo to determine the number of
+ * online processors, looking for lines beginning with
+ * "processor". Give glibc what it expects.
+ */
+ seq_printf(m, "processor\t: %d\n", cpu);
+ if (compat)
+ seq_printf(m, "model name\t: ARMv8 Processor rev %d (%s)\n",
+ MIDR_REVISION(midr), COMPAT_ELF_PLATFORM);
- seq_printf(m, "BogoMIPS\t: %lu.%02lu\n",
- loops_per_jiffy / (500000UL/HZ),
- loops_per_jiffy / (5000UL/HZ) % 100);
+ seq_printf(m, "BogoMIPS\t: %lu.%02lu\n",
+ loops_per_jiffy / (500000UL/HZ),
+ loops_per_jiffy / (5000UL/HZ) % 100);
- /*
- * Dump out the common processor features in a single line.
- * Userspace should read the hwcaps with getauxval(AT_HWCAP)
- * rather than attempting to parse this, but there's a body of
- * software which does already (at least for 32-bit).
- */
- seq_puts(m, "Features\t:");
- if (compat) {
+ /*
+ * Dump out the common processor features in a single line.
+ * Userspace should read the hwcaps with getauxval(AT_HWCAP)
+ * rather than attempting to parse this, but there's a body of
+ * software which does already (at least for 32-bit).
+ */
+ seq_puts(m, "Features\t:");
+ if (compat) {
#ifdef CONFIG_COMPAT
- for (j = 0; j < ARRAY_SIZE(compat_hwcap_str); j++) {
- if (compat_elf_hwcap & (1 << j)) {
- /*
- * Warn once if any feature should not
- * have been present on arm64 platform.
- */
- if (WARN_ON_ONCE(!compat_hwcap_str[j]))
- continue;
-
- seq_printf(m, " %s", compat_hwcap_str[j]);
- }
+ for (j = 0; j < ARRAY_SIZE(compat_hwcap_str); j++) {
+ if (compat_elf_hwcap & (1 << j)) {
+ /*
+ * Warn once if any feature should not
+ * have been present on arm64 platform.
+ */
+ if (WARN_ON_ONCE(!compat_hwcap_str[j]))
+ continue;
+
+ seq_printf(m, " %s", compat_hwcap_str[j]);
}
+ }
- for (j = 0; j < ARRAY_SIZE(compat_hwcap2_str); j++)
- if (compat_elf_hwcap2 & (1 << j))
- seq_printf(m, " %s", compat_hwcap2_str[j]);
+ for (j = 0; j < ARRAY_SIZE(compat_hwcap2_str); j++)
+ if (compat_elf_hwcap2 & (1 << j))
+ seq_printf(m, " %s", compat_hwcap2_str[j]);
#endif /* CONFIG_COMPAT */
- } else {
- for (j = 0; j < ARRAY_SIZE(hwcap_str); j++)
- if (cpu_have_feature(j))
- seq_printf(m, " %s", hwcap_str[j]);
- }
- seq_puts(m, "\n");
-
- seq_printf(m, "CPU implementer\t: 0x%02x\n",
- MIDR_IMPLEMENTOR(midr));
- seq_printf(m, "CPU architecture: 8\n");
- seq_printf(m, "CPU variant\t: 0x%x\n", MIDR_VARIANT(midr));
- seq_printf(m, "CPU part\t: 0x%03x\n", MIDR_PARTNUM(midr));
- seq_printf(m, "CPU revision\t: %d\n\n", MIDR_REVISION(midr));
+ } else {
+ for (j = 0; j < ARRAY_SIZE(hwcap_str); j++)
+ if (cpu_have_feature(j))
+ seq_printf(m, " %s", hwcap_str[j]);
}
+ seq_puts(m, "\n");
+
+ seq_printf(m, "CPU implementer\t: 0x%02x\n",
+ MIDR_IMPLEMENTOR(midr));
+ seq_puts(m, "CPU architecture: 8\n");
+ seq_printf(m, "CPU variant\t: 0x%x\n", MIDR_VARIANT(midr));
+ seq_printf(m, "CPU part\t: 0x%03x\n", MIDR_PARTNUM(midr));
+ seq_printf(m, "CPU revision\t: %d\n\n", MIDR_REVISION(midr));
return 0;
}
static void *c_start(struct seq_file *m, loff_t *pos)
{
- return *pos < 1 ? (void *)1 : NULL;
+ *pos = cpumask_next(*pos - 1, cpu_online_mask);
+ return *pos < nr_cpu_ids ? &per_cpu(cpu_data, *pos) : NULL;
}
static void *c_next(struct seq_file *m, void *v, loff_t *pos)
{
++*pos;
- return NULL;
+ return c_start(m, pos);
}
static void c_stop(struct seq_file *m, void *v)
@@ -280,7 +299,7 @@ const struct seq_operations cpuinfo_op = {
};
-static struct kobj_type cpuregs_kobj_type = {
+static const struct kobj_type cpuregs_kobj_type = {
.sysfs_ops = &kobj_sysfs_ops,
};
@@ -311,11 +330,13 @@ static struct kobj_type cpuregs_kobj_type = {
CPUREGS_ATTR_RO(midr_el1, midr);
CPUREGS_ATTR_RO(revidr_el1, revidr);
+CPUREGS_ATTR_RO(aidr_el1, aidr);
CPUREGS_ATTR_RO(smidr_el1, smidr);
static struct attribute *cpuregs_id_attrs[] = {
&cpuregs_attr_midr_el1.attr,
&cpuregs_attr_revidr_el1.attr,
+ &cpuregs_attr_aidr_el1.attr,
NULL
};
@@ -452,6 +473,7 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info)
info->reg_dczid = read_cpuid(DCZID_EL0);
info->reg_midr = read_cpuid_id();
info->reg_revidr = read_cpuid(REVIDR_EL1);
+ info->reg_aidr = read_cpuid(AIDR_EL1);
info->reg_id_aa64dfr0 = read_cpuid(ID_AA64DFR0_EL1);
info->reg_id_aa64dfr1 = read_cpuid(ID_AA64DFR1_EL1);
@@ -477,6 +499,22 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info)
if (id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0))
__cpuinfo_store_cpu_32bit(&info->aarch32);
+ /*
+ * info->reg_mpamidr deferred to {init,update}_cpu_features because we
+ * don't want to read it (and trigger a trap on buggy firmware) if
+ * using an aa64pfr0_el1 override to unconditionally disable MPAM.
+ */
+
+ if (IS_ENABLED(CONFIG_ARM64_SME) &&
+ id_aa64pfr1_sme(info->reg_id_aa64pfr1)) {
+ /*
+ * We mask out SMPS since even if the hardware
+ * supports priorities the kernel does not at present
+ * and we block access to them.
+ */
+ info->reg_smidr = read_cpuid(SMIDR_EL1) & ~SMIDR_EL1_SMPS;
+ }
+
cpuinfo_detect_icache_policy(info);
}
diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c
index 64f2ecbdfe5c..29307642f4c9 100644
--- a/arch/arm64/kernel/debug-monitors.c
+++ b/arch/arm64/kernel/debug-monitors.c
@@ -21,8 +21,12 @@
#include <asm/cputype.h>
#include <asm/daifflags.h>
#include <asm/debug-monitors.h>
+#include <asm/exception.h>
+#include <asm/kgdb.h>
+#include <asm/kprobes.h>
#include <asm/system_misc.h>
#include <asm/traps.h>
+#include <asm/uprobes.h>
/* Determine debug architecture. */
u8 debug_monitors_arch(void)
@@ -34,7 +38,7 @@ u8 debug_monitors_arch(void)
/*
* MDSCR access routines.
*/
-static void mdscr_write(u32 mdscr)
+static void mdscr_write(u64 mdscr)
{
unsigned long flags;
flags = local_daif_save();
@@ -43,7 +47,7 @@ static void mdscr_write(u32 mdscr)
}
NOKPROBE_SYMBOL(mdscr_write);
-static u32 mdscr_read(void)
+static u64 mdscr_read(void)
{
return read_sysreg(mdscr_el1);
}
@@ -79,16 +83,16 @@ static DEFINE_PER_CPU(int, kde_ref_count);
void enable_debug_monitors(enum dbg_active_el el)
{
- u32 mdscr, enable = 0;
+ u64 mdscr, enable = 0;
WARN_ON(preemptible());
if (this_cpu_inc_return(mde_ref_count) == 1)
- enable = DBG_MDSCR_MDE;
+ enable = MDSCR_EL1_MDE;
if (el == DBG_ACTIVE_EL1 &&
this_cpu_inc_return(kde_ref_count) == 1)
- enable |= DBG_MDSCR_KDE;
+ enable |= MDSCR_EL1_KDE;
if (enable && debug_enabled) {
mdscr = mdscr_read();
@@ -100,16 +104,16 @@ NOKPROBE_SYMBOL(enable_debug_monitors);
void disable_debug_monitors(enum dbg_active_el el)
{
- u32 mdscr, disable = 0;
+ u64 mdscr, disable = 0;
WARN_ON(preemptible());
if (this_cpu_dec_return(mde_ref_count) == 0)
- disable = ~DBG_MDSCR_MDE;
+ disable = ~MDSCR_EL1_MDE;
if (el == DBG_ACTIVE_EL1 &&
this_cpu_dec_return(kde_ref_count) == 0)
- disable &= ~DBG_MDSCR_KDE;
+ disable &= ~MDSCR_EL1_KDE;
if (disable) {
mdscr = mdscr_read();
@@ -156,190 +160,124 @@ NOKPROBE_SYMBOL(clear_user_regs_spsr_ss);
#define set_regs_spsr_ss(r) set_user_regs_spsr_ss(&(r)->user_regs)
#define clear_regs_spsr_ss(r) clear_user_regs_spsr_ss(&(r)->user_regs)
-static DEFINE_SPINLOCK(debug_hook_lock);
-static LIST_HEAD(user_step_hook);
-static LIST_HEAD(kernel_step_hook);
-
-static void register_debug_hook(struct list_head *node, struct list_head *list)
+static void send_user_sigtrap(int si_code)
{
- spin_lock(&debug_hook_lock);
- list_add_rcu(node, list);
- spin_unlock(&debug_hook_lock);
-
-}
+ struct pt_regs *regs = current_pt_regs();
-static void unregister_debug_hook(struct list_head *node)
-{
- spin_lock(&debug_hook_lock);
- list_del_rcu(node);
- spin_unlock(&debug_hook_lock);
- synchronize_rcu();
-}
+ if (WARN_ON(!user_mode(regs)))
+ return;
-void register_user_step_hook(struct step_hook *hook)
-{
- register_debug_hook(&hook->node, &user_step_hook);
-}
+ if (!regs_irqs_disabled(regs))
+ local_irq_enable();
-void unregister_user_step_hook(struct step_hook *hook)
-{
- unregister_debug_hook(&hook->node);
+ arm64_force_sig_fault(SIGTRAP, si_code, instruction_pointer(regs),
+ "User debug trap");
}
-void register_kernel_step_hook(struct step_hook *hook)
+/*
+ * We have already unmasked interrupts and enabled preemption
+ * when calling do_el0_softstep() from entry-common.c.
+ */
+void do_el0_softstep(unsigned long esr, struct pt_regs *regs)
{
- register_debug_hook(&hook->node, &kernel_step_hook);
-}
+ if (uprobe_single_step_handler(regs, esr) == DBG_HOOK_HANDLED)
+ return;
-void unregister_kernel_step_hook(struct step_hook *hook)
-{
- unregister_debug_hook(&hook->node);
+ send_user_sigtrap(TRAP_TRACE);
+ /*
+ * ptrace will disable single step unless explicitly
+ * asked to re-enable it. For other clients, it makes
+ * sense to leave it enabled (i.e. rewind the controls
+ * to the active-not-pending state).
+ */
+ user_rewind_single_step(current);
}
-/*
- * Call registered single step handlers
- * There is no Syndrome info to check for determining the handler.
- * So we call all the registered handlers, until the right handler is
- * found which returns zero.
- */
-static int call_step_hook(struct pt_regs *regs, unsigned long esr)
+void do_el1_softstep(unsigned long esr, struct pt_regs *regs)
{
- struct step_hook *hook;
- struct list_head *list;
- int retval = DBG_HOOK_ERROR;
-
- list = user_mode(regs) ? &user_step_hook : &kernel_step_hook;
+ if (kgdb_single_step_handler(regs, esr) == DBG_HOOK_HANDLED)
+ return;
+ pr_warn("Unexpected kernel single-step exception at EL1\n");
/*
- * Since single-step exception disables interrupt, this function is
- * entirely not preemptible, and we can use rcu list safely here.
+ * Re-enable stepping since we know that we will be
+ * returning to regs.
*/
- list_for_each_entry_rcu(hook, list, node) {
- retval = hook->fn(regs, esr);
- if (retval == DBG_HOOK_HANDLED)
- break;
- }
-
- return retval;
+ set_regs_spsr_ss(regs);
}
-NOKPROBE_SYMBOL(call_step_hook);
+NOKPROBE_SYMBOL(do_el1_softstep);
-static void send_user_sigtrap(int si_code)
+static int call_el1_break_hook(struct pt_regs *regs, unsigned long esr)
{
- struct pt_regs *regs = current_pt_regs();
+ if (esr_brk_comment(esr) == BUG_BRK_IMM)
+ return bug_brk_handler(regs, esr);
- if (WARN_ON(!user_mode(regs)))
- return;
+ if (IS_ENABLED(CONFIG_CFI) && esr_is_cfi_brk(esr))
+ return cfi_brk_handler(regs, esr);
- if (interrupts_enabled(regs))
- local_irq_enable();
+ if (esr_brk_comment(esr) == FAULT_BRK_IMM)
+ return reserved_fault_brk_handler(regs, esr);
- arm64_force_sig_fault(SIGTRAP, si_code, instruction_pointer(regs),
- "User debug trap");
-}
+ if (IS_ENABLED(CONFIG_KASAN_SW_TAGS) &&
+ (esr_brk_comment(esr) & ~KASAN_BRK_MASK) == KASAN_BRK_IMM)
+ return kasan_brk_handler(regs, esr);
-static int single_step_handler(unsigned long unused, unsigned long esr,
- struct pt_regs *regs)
-{
- bool handler_found = false;
+ if (IS_ENABLED(CONFIG_UBSAN_TRAP) && esr_is_ubsan_brk(esr))
+ return ubsan_brk_handler(regs, esr);
- /*
- * If we are stepping a pending breakpoint, call the hw_breakpoint
- * handler first.
- */
- if (!reinstall_suspended_bps(regs))
- return 0;
-
- if (!handler_found && call_step_hook(regs, esr) == DBG_HOOK_HANDLED)
- handler_found = true;
-
- if (!handler_found && user_mode(regs)) {
- send_user_sigtrap(TRAP_TRACE);
-
- /*
- * ptrace will disable single step unless explicitly
- * asked to re-enable it. For other clients, it makes
- * sense to leave it enabled (i.e. rewind the controls
- * to the active-not-pending state).
- */
- user_rewind_single_step(current);
- } else if (!handler_found) {
- pr_warn("Unexpected kernel single-step exception at EL1\n");
- /*
- * Re-enable stepping since we know that we will be
- * returning to regs.
- */
- set_regs_spsr_ss(regs);
+ if (IS_ENABLED(CONFIG_KGDB)) {
+ if (esr_brk_comment(esr) == KGDB_DYN_DBG_BRK_IMM)
+ return kgdb_brk_handler(regs, esr);
+ if (esr_brk_comment(esr) == KGDB_COMPILED_DBG_BRK_IMM)
+ return kgdb_compiled_brk_handler(regs, esr);
}
- return 0;
-}
-NOKPROBE_SYMBOL(single_step_handler);
-
-static LIST_HEAD(user_break_hook);
-static LIST_HEAD(kernel_break_hook);
+ if (IS_ENABLED(CONFIG_KPROBES)) {
+ if (esr_brk_comment(esr) == KPROBES_BRK_IMM)
+ return kprobe_brk_handler(regs, esr);
+ if (esr_brk_comment(esr) == KPROBES_BRK_SS_IMM)
+ return kprobe_ss_brk_handler(regs, esr);
+ }
-void register_user_break_hook(struct break_hook *hook)
-{
- register_debug_hook(&hook->node, &user_break_hook);
-}
+ if (IS_ENABLED(CONFIG_KRETPROBES) &&
+ esr_brk_comment(esr) == KRETPROBES_BRK_IMM)
+ return kretprobe_brk_handler(regs, esr);
-void unregister_user_break_hook(struct break_hook *hook)
-{
- unregister_debug_hook(&hook->node);
+ return DBG_HOOK_ERROR;
}
+NOKPROBE_SYMBOL(call_el1_break_hook);
-void register_kernel_break_hook(struct break_hook *hook)
+/*
+ * We have already unmasked interrupts and enabled preemption
+ * when calling do_el0_brk64() from entry-common.c.
+ */
+void do_el0_brk64(unsigned long esr, struct pt_regs *regs)
{
- register_debug_hook(&hook->node, &kernel_break_hook);
-}
+ if (IS_ENABLED(CONFIG_UPROBES) &&
+ esr_brk_comment(esr) == UPROBES_BRK_IMM &&
+ uprobe_brk_handler(regs, esr) == DBG_HOOK_HANDLED)
+ return;
-void unregister_kernel_break_hook(struct break_hook *hook)
-{
- unregister_debug_hook(&hook->node);
+ send_user_sigtrap(TRAP_BRKPT);
}
-static int call_break_hook(struct pt_regs *regs, unsigned long esr)
+void do_el1_brk64(unsigned long esr, struct pt_regs *regs)
{
- struct break_hook *hook;
- struct list_head *list;
- int (*fn)(struct pt_regs *regs, unsigned long esr) = NULL;
-
- list = user_mode(regs) ? &user_break_hook : &kernel_break_hook;
-
- /*
- * Since brk exception disables interrupt, this function is
- * entirely not preemptible, and we can use rcu list safely here.
- */
- list_for_each_entry_rcu(hook, list, node) {
- unsigned long comment = esr & ESR_ELx_BRK64_ISS_COMMENT_MASK;
-
- if ((comment & ~hook->mask) == hook->imm)
- fn = hook->fn;
- }
+ if (call_el1_break_hook(regs, esr) == DBG_HOOK_HANDLED)
+ return;
- return fn ? fn(regs, esr) : DBG_HOOK_ERROR;
+ die("Oops - BRK", regs, esr);
}
-NOKPROBE_SYMBOL(call_break_hook);
+NOKPROBE_SYMBOL(do_el1_brk64);
-static int brk_handler(unsigned long unused, unsigned long esr,
- struct pt_regs *regs)
+#ifdef CONFIG_COMPAT
+void do_bkpt32(unsigned long esr, struct pt_regs *regs)
{
- if (call_break_hook(regs, esr) == DBG_HOOK_HANDLED)
- return 0;
-
- if (user_mode(regs)) {
- send_user_sigtrap(TRAP_BRKPT);
- } else {
- pr_warn("Unexpected kernel BRK exception at EL1\n");
- return -EFAULT;
- }
-
- return 0;
+ arm64_notify_die("aarch32 BKPT", regs, SIGTRAP, TRAP_BRKPT, regs->pc, esr);
}
-NOKPROBE_SYMBOL(brk_handler);
+#endif /* CONFIG_COMPAT */
-int aarch32_break_handler(struct pt_regs *regs)
+bool try_handle_aarch32_break(struct pt_regs *regs)
{
u32 arm_instr;
u16 thumb_instr;
@@ -347,7 +285,7 @@ int aarch32_break_handler(struct pt_regs *regs)
void __user *pc = (void __user *)instruction_pointer(regs);
if (!compat_user_mode(regs))
- return -EFAULT;
+ return false;
if (compat_thumb_mode(regs)) {
/* get 16-bit Thumb instruction */
@@ -371,20 +309,12 @@ int aarch32_break_handler(struct pt_regs *regs)
}
if (!bp)
- return -EFAULT;
+ return false;
send_user_sigtrap(TRAP_BRKPT);
- return 0;
-}
-NOKPROBE_SYMBOL(aarch32_break_handler);
-
-void __init debug_traps_init(void)
-{
- hook_debug_fault_code(DBG_ESR_EVT_HWSS, single_step_handler, SIGTRAP,
- TRAP_TRACE, "single-step handler");
- hook_debug_fault_code(DBG_ESR_EVT_BRK, brk_handler, SIGTRAP,
- TRAP_BRKPT, "BRK handler");
+ return true;
}
+NOKPROBE_SYMBOL(try_handle_aarch32_break);
/* Re-enable single step for syscall restarting. */
void user_rewind_single_step(struct task_struct *task)
@@ -418,7 +348,7 @@ void kernel_enable_single_step(struct pt_regs *regs)
{
WARN_ON(!irqs_disabled());
set_regs_spsr_ss(regs);
- mdscr_write(mdscr_read() | DBG_MDSCR_SS);
+ mdscr_write(mdscr_read() | MDSCR_EL1_SS);
enable_debug_monitors(DBG_ACTIVE_EL1);
}
NOKPROBE_SYMBOL(kernel_enable_single_step);
@@ -426,7 +356,7 @@ NOKPROBE_SYMBOL(kernel_enable_single_step);
void kernel_disable_single_step(void)
{
WARN_ON(!irqs_disabled());
- mdscr_write(mdscr_read() & ~DBG_MDSCR_SS);
+ mdscr_write(mdscr_read() & ~MDSCR_EL1_SS);
disable_debug_monitors(DBG_ACTIVE_EL1);
}
NOKPROBE_SYMBOL(kernel_disable_single_step);
@@ -434,7 +364,7 @@ NOKPROBE_SYMBOL(kernel_disable_single_step);
int kernel_active_single_step(void)
{
WARN_ON(!irqs_disabled());
- return mdscr_read() & DBG_MDSCR_SS;
+ return mdscr_read() & MDSCR_EL1_SS;
}
NOKPROBE_SYMBOL(kernel_active_single_step);
@@ -443,6 +373,11 @@ void kernel_rewind_single_step(struct pt_regs *regs)
set_regs_spsr_ss(regs);
}
+void kernel_fastforward_single_step(struct pt_regs *regs)
+{
+ clear_regs_spsr_ss(regs);
+}
+
/* ptrace API */
void user_enable_single_step(struct task_struct *task)
{
diff --git a/arch/arm64/kernel/efi-header.S b/arch/arm64/kernel/efi-header.S
index 11d7f7de202d..329e8df9215f 100644
--- a/arch/arm64/kernel/efi-header.S
+++ b/arch/arm64/kernel/efi-header.S
@@ -28,7 +28,7 @@
.macro __EFI_PE_HEADER
#ifdef CONFIG_EFI
.set .Lpe_header_offset, . - .L_head
- .long PE_MAGIC
+ .long IMAGE_NT_SIGNATURE
.short IMAGE_FILE_MACHINE_ARM64 // Machine
.short .Lsection_count // NumberOfSections
.long 0 // TimeDateStamp
@@ -40,7 +40,7 @@
IMAGE_FILE_LINE_NUMS_STRIPPED // Characteristics
.Loptional_header:
- .short PE_OPT_MAGIC_PE32PLUS // PE32+ format
+ .short IMAGE_NT_OPTIONAL_HDR64_MAGIC // PE32+ format
.byte 0x02 // MajorLinkerVersion
.byte 0x14 // MinorLinkerVersion
.long __initdata_begin - .Lefi_header_end // SizeOfCode
@@ -66,7 +66,7 @@
.long .Lefi_header_end - .L_head // SizeOfHeaders
.long 0 // CheckSum
.short IMAGE_SUBSYSTEM_EFI_APPLICATION // Subsystem
- .short IMAGE_DLL_CHARACTERISTICS_NX_COMPAT // DllCharacteristics
+ .short IMAGE_DLLCHARACTERISTICS_NX_COMPAT // DllCharacteristics
.quad 0 // SizeOfStackReserve
.quad 0 // SizeOfStackCommit
.quad 0 // SizeOfHeapReserve
diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c
index 712718aed5dd..a81cb4aa4738 100644
--- a/arch/arm64/kernel/efi.c
+++ b/arch/arm64/kernel/efi.c
@@ -10,11 +10,13 @@
#include <linux/efi.h>
#include <linux/init.h>
#include <linux/kmemleak.h>
+#include <linux/kthread.h>
#include <linux/screen_info.h>
#include <linux/vmalloc.h>
#include <asm/efi.h>
#include <asm/stacktrace.h>
+#include <asm/vmap_stack.h>
static bool region_is_misaligned(const efi_memory_desc_t *md)
{
@@ -29,13 +31,21 @@ static bool region_is_misaligned(const efi_memory_desc_t *md)
* executable, everything else can be mapped with the XN bits
* set. Also take the new (optional) RO/XP bits into account.
*/
-static __init pteval_t create_mapping_protection(efi_memory_desc_t *md)
+static __init ptdesc_t create_mapping_protection(efi_memory_desc_t *md)
{
u64 attr = md->attribute;
u32 type = md->type;
- if (type == EFI_MEMORY_MAPPED_IO)
- return PROT_DEVICE_nGnRE;
+ if (type == EFI_MEMORY_MAPPED_IO) {
+ pgprot_t prot = __pgprot(PROT_DEVICE_nGnRE);
+
+ if (arm64_is_protected_mmio(md->phys_addr,
+ md->num_pages << EFI_PAGE_SHIFT))
+ prot = pgprot_encrypted(prot);
+ else
+ prot = pgprot_decrypted(prot);
+ return pgprot_val(prot);
+ }
if (region_is_misaligned(md)) {
static bool __initdata code_is_misaligned;
@@ -75,7 +85,7 @@ static __init pteval_t create_mapping_protection(efi_memory_desc_t *md)
int __init efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md)
{
- pteval_t prot_val = create_mapping_protection(md);
+ ptdesc_t prot_val = create_mapping_protection(md);
bool page_mappings_only = (md->type == EFI_RUNTIME_SERVICES_CODE ||
md->type == EFI_RUNTIME_SERVICES_DATA);
@@ -156,20 +166,53 @@ asmlinkage efi_status_t efi_handle_corrupted_x18(efi_status_t s, const char *f)
return s;
}
-static DEFINE_RAW_SPINLOCK(efi_rt_lock);
-
void arch_efi_call_virt_setup(void)
{
- efi_virtmap_load();
+ efi_runtime_assert_lock_held();
+
+ if (preemptible() && (current->flags & PF_KTHREAD)) {
+ /*
+ * Disable migration to ensure that a preempted EFI runtime
+ * service call will be resumed on the same CPU. This avoids
+ * potential issues with EFI runtime calls that are preempted
+ * while polling for an asynchronous completion of a secure
+ * firmware call, which may not permit the CPU to change.
+ */
+ migrate_disable();
+ kthread_use_mm(&efi_mm);
+ } else {
+ efi_virtmap_load();
+ }
+
+ /*
+ * Enable access to the valid TTBR0_EL1 and invoke the errata
+ * workaround directly since there is no return from exception when
+ * invoking the EFI run-time services.
+ */
+ uaccess_ttbr0_enable();
+ post_ttbr_update_workaround();
+
__efi_fpsimd_begin();
- raw_spin_lock(&efi_rt_lock);
}
void arch_efi_call_virt_teardown(void)
{
- raw_spin_unlock(&efi_rt_lock);
__efi_fpsimd_end();
- efi_virtmap_unload();
+
+ /*
+ * Defer the switch to the current thread's TTBR0_EL1 until
+ * uaccess_enable(). Do so before efi_virtmap_unload() updates the
+ * saved TTBR0 value, so the userland page tables are not activated
+ * inadvertently over the back of an exception.
+ */
+ uaccess_ttbr0_disable();
+
+ if (preemptible() && (current->flags & PF_KTHREAD)) {
+ kthread_unuse_mm(&efi_mm);
+ migrate_enable();
+ } else {
+ efi_virtmap_unload();
+ }
}
asmlinkage u64 *efi_rt_stack_top __ro_after_init;
@@ -206,9 +249,8 @@ static int __init arm64_efi_rt_init(void)
if (!efi_enabled(EFI_RUNTIME_SERVICES))
return 0;
- p = __vmalloc_node(THREAD_SIZE, THREAD_ALIGN, GFP_KERNEL,
- NUMA_NO_NODE, &&l);
-l: if (!p) {
+ p = arch_alloc_vmap_stack(THREAD_SIZE, NUMA_NO_NODE);
+ if (!p) {
pr_warn("Failed to allocate EFI runtime stack\n");
clear_bit(EFI_RUNTIME_SERVICES, &efi.flags);
return -ENOMEM;
diff --git a/arch/arm64/kernel/elfcore.c b/arch/arm64/kernel/elfcore.c
index 2e94d20c4ac7..b735f4c2fe5e 100644
--- a/arch/arm64/kernel/elfcore.c
+++ b/arch/arm64/kernel/elfcore.c
@@ -27,9 +27,10 @@ static int mte_dump_tag_range(struct coredump_params *cprm,
int ret = 1;
unsigned long addr;
void *tags = NULL;
+ int locked = 0;
for (addr = start; addr < start + len; addr += PAGE_SIZE) {
- struct page *page = get_dump_page(addr);
+ struct page *page = get_dump_page(addr, &locked);
/*
* get_dump_page() returns NULL when encountering an empty
diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c
index b77a15955f28..3625797e9ee8 100644
--- a/arch/arm64/kernel/entry-common.c
+++ b/arch/arm64/kernel/entry-common.c
@@ -6,8 +6,10 @@
*/
#include <linux/context_tracking.h>
+#include <linux/irq-entry-common.h>
#include <linux/kasan.h>
#include <linux/linkage.h>
+#include <linux/livepatch.h>
#include <linux/lockdep.h>
#include <linux/ptrace.h>
#include <linux/resume_user_mode.h>
@@ -32,67 +34,28 @@
* Handle IRQ/context state management when entering from kernel mode.
* Before this function is called it is not safe to call regular kernel code,
* instrumentable code, or any code which may trigger an exception.
- *
- * This is intended to match the logic in irqentry_enter(), handling the kernel
- * mode transitions only.
*/
-static __always_inline void __enter_from_kernel_mode(struct pt_regs *regs)
+static noinstr irqentry_state_t enter_from_kernel_mode(struct pt_regs *regs)
{
- regs->exit_rcu = false;
-
- if (!IS_ENABLED(CONFIG_TINY_RCU) && is_idle_task(current)) {
- lockdep_hardirqs_off(CALLER_ADDR0);
- ct_irq_enter();
- trace_hardirqs_off_finish();
+ irqentry_state_t state;
- regs->exit_rcu = true;
- return;
- }
-
- lockdep_hardirqs_off(CALLER_ADDR0);
- rcu_irq_enter_check_tick();
- trace_hardirqs_off_finish();
-}
-
-static void noinstr enter_from_kernel_mode(struct pt_regs *regs)
-{
- __enter_from_kernel_mode(regs);
+ state = irqentry_enter(regs);
mte_check_tfsr_entry();
mte_disable_tco_entry(current);
+
+ return state;
}
/*
* Handle IRQ/context state management when exiting to kernel mode.
* After this function returns it is not safe to call regular kernel code,
* instrumentable code, or any code which may trigger an exception.
- *
- * This is intended to match the logic in irqentry_exit(), handling the kernel
- * mode transitions only, and with preemption handled elsewhere.
*/
-static __always_inline void __exit_to_kernel_mode(struct pt_regs *regs)
-{
- lockdep_assert_irqs_disabled();
-
- if (interrupts_enabled(regs)) {
- if (regs->exit_rcu) {
- trace_hardirqs_on_prepare();
- lockdep_hardirqs_on_prepare();
- ct_irq_exit();
- lockdep_hardirqs_on(CALLER_ADDR0);
- return;
- }
-
- trace_hardirqs_on();
- } else {
- if (regs->exit_rcu)
- ct_irq_exit();
- }
-}
-
-static void noinstr exit_to_kernel_mode(struct pt_regs *regs)
+static void noinstr exit_to_kernel_mode(struct pt_regs *regs,
+ irqentry_state_t state)
{
mte_check_tfsr_exit();
- __exit_to_kernel_mode(regs);
+ irqentry_exit(regs, state);
}
/*
@@ -100,129 +63,30 @@ static void noinstr exit_to_kernel_mode(struct pt_regs *regs)
* Before this function is called it is not safe to call regular kernel code,
* instrumentable code, or any code which may trigger an exception.
*/
-static __always_inline void __enter_from_user_mode(void)
+static __always_inline void arm64_enter_from_user_mode(struct pt_regs *regs)
{
- lockdep_hardirqs_off(CALLER_ADDR0);
- CT_WARN_ON(ct_state() != CONTEXT_USER);
- user_exit_irqoff();
- trace_hardirqs_off_finish();
+ enter_from_user_mode(regs);
mte_disable_tco_entry(current);
}
-static __always_inline void enter_from_user_mode(struct pt_regs *regs)
-{
- __enter_from_user_mode();
-}
-
/*
* Handle IRQ/context state management when exiting to user mode.
* After this function returns it is not safe to call regular kernel code,
* instrumentable code, or any code which may trigger an exception.
*/
-static __always_inline void __exit_to_user_mode(void)
-{
- trace_hardirqs_on_prepare();
- lockdep_hardirqs_on_prepare();
- user_enter_irqoff();
- lockdep_hardirqs_on(CALLER_ADDR0);
-}
-
-static void do_notify_resume(struct pt_regs *regs, unsigned long thread_flags)
-{
- do {
- local_irq_enable();
- if (thread_flags & _TIF_NEED_RESCHED)
- schedule();
-
- if (thread_flags & _TIF_UPROBE)
- uprobe_notify_resume(regs);
-
- if (thread_flags & _TIF_MTE_ASYNC_FAULT) {
- clear_thread_flag(TIF_MTE_ASYNC_FAULT);
- send_sig_fault(SIGSEGV, SEGV_MTEAERR,
- (void __user *)NULL, current);
- }
-
- if (thread_flags & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL))
- do_signal(regs);
-
- if (thread_flags & _TIF_NOTIFY_RESUME)
- resume_user_mode_work(regs);
-
- if (thread_flags & _TIF_FOREIGN_FPSTATE)
- fpsimd_restore_current_state();
-
- local_irq_disable();
- thread_flags = read_thread_flags();
- } while (thread_flags & _TIF_WORK_MASK);
-}
-
-static __always_inline void exit_to_user_mode_prepare(struct pt_regs *regs)
+static __always_inline void arm64_exit_to_user_mode(struct pt_regs *regs)
{
- unsigned long flags;
-
local_irq_disable();
-
- flags = read_thread_flags();
- if (unlikely(flags & _TIF_WORK_MASK))
- do_notify_resume(regs, flags);
-
+ exit_to_user_mode_prepare_legacy(regs);
local_daif_mask();
-
- lockdep_sys_exit();
-}
-
-static __always_inline void exit_to_user_mode(struct pt_regs *regs)
-{
- exit_to_user_mode_prepare(regs);
mte_check_tfsr_exit();
- __exit_to_user_mode();
+ exit_to_user_mode();
}
asmlinkage void noinstr asm_exit_to_user_mode(struct pt_regs *regs)
{
- exit_to_user_mode(regs);
-}
-
-/*
- * Handle IRQ/context state management when entering an NMI from user/kernel
- * mode. Before this function is called it is not safe to call regular kernel
- * code, instrumentable code, or any code which may trigger an exception.
- */
-static void noinstr arm64_enter_nmi(struct pt_regs *regs)
-{
- regs->lockdep_hardirqs = lockdep_hardirqs_enabled();
-
- __nmi_enter();
- lockdep_hardirqs_off(CALLER_ADDR0);
- lockdep_hardirq_enter();
- ct_nmi_enter();
-
- trace_hardirqs_off_finish();
- ftrace_nmi_enter();
-}
-
-/*
- * Handle IRQ/context state management when exiting an NMI from user/kernel
- * mode. After this function returns it is not safe to call regular kernel
- * code, instrumentable code, or any code which may trigger an exception.
- */
-static void noinstr arm64_exit_nmi(struct pt_regs *regs)
-{
- bool restore = regs->lockdep_hardirqs;
-
- ftrace_nmi_exit();
- if (restore) {
- trace_hardirqs_on_prepare();
- lockdep_hardirqs_on_prepare();
- }
-
- ct_nmi_exit();
- lockdep_hardirq_exit();
- if (restore)
- lockdep_hardirqs_on(CALLER_ADDR0);
- __nmi_exit();
+ arm64_exit_to_user_mode(regs);
}
/*
@@ -230,14 +94,18 @@ static void noinstr arm64_exit_nmi(struct pt_regs *regs)
* kernel mode. Before this function is called it is not safe to call regular
* kernel code, instrumentable code, or any code which may trigger an exception.
*/
-static void noinstr arm64_enter_el1_dbg(struct pt_regs *regs)
+static noinstr irqentry_state_t arm64_enter_el1_dbg(struct pt_regs *regs)
{
- regs->lockdep_hardirqs = lockdep_hardirqs_enabled();
+ irqentry_state_t state;
+
+ state.lockdep = lockdep_hardirqs_enabled();
lockdep_hardirqs_off(CALLER_ADDR0);
ct_nmi_enter();
trace_hardirqs_off_finish();
+
+ return state;
}
/*
@@ -245,62 +113,19 @@ static void noinstr arm64_enter_el1_dbg(struct pt_regs *regs)
* kernel mode. After this function returns it is not safe to call regular
* kernel code, instrumentable code, or any code which may trigger an exception.
*/
-static void noinstr arm64_exit_el1_dbg(struct pt_regs *regs)
+static void noinstr arm64_exit_el1_dbg(struct pt_regs *regs,
+ irqentry_state_t state)
{
- bool restore = regs->lockdep_hardirqs;
-
- if (restore) {
+ if (state.lockdep) {
trace_hardirqs_on_prepare();
lockdep_hardirqs_on_prepare();
}
ct_nmi_exit();
- if (restore)
+ if (state.lockdep)
lockdep_hardirqs_on(CALLER_ADDR0);
}
-#ifdef CONFIG_PREEMPT_DYNAMIC
-DEFINE_STATIC_KEY_TRUE(sk_dynamic_irqentry_exit_cond_resched);
-#define need_irq_preemption() \
- (static_branch_unlikely(&sk_dynamic_irqentry_exit_cond_resched))
-#else
-#define need_irq_preemption() (IS_ENABLED(CONFIG_PREEMPTION))
-#endif
-
-static void __sched arm64_preempt_schedule_irq(void)
-{
- if (!need_irq_preemption())
- return;
-
- /*
- * Note: thread_info::preempt_count includes both thread_info::count
- * and thread_info::need_resched, and is not equivalent to
- * preempt_count().
- */
- if (READ_ONCE(current_thread_info()->preempt_count) != 0)
- return;
-
- /*
- * DAIF.DA are cleared at the start of IRQ/FIQ handling, and when GIC
- * priority masking is used the GIC irqchip driver will clear DAIF.IF
- * using gic_arch_enable_irqs() for normal IRQs. If anything is set in
- * DAIF we must have handled an NMI, so skip preemption.
- */
- if (system_uses_irq_prio_masking() && read_sysreg(daif))
- return;
-
- /*
- * Preempting a task from an IRQ means we leave copies of PSTATE
- * on the stack. cpufeature's enable calls may modify PSTATE, but
- * resuming one of these preempted tasks would undo those changes.
- *
- * Only allow a task to be preempted once cpufeatures have been
- * enabled.
- */
- if (system_capabilities_finalized())
- preempt_schedule_irq();
-}
-
static void do_interrupt_handler(struct pt_regs *regs,
void (*handler)(struct pt_regs *))
{
@@ -320,7 +145,7 @@ extern void (*handle_arch_fiq)(struct pt_regs *);
static void noinstr __panic_unhandled(struct pt_regs *regs, const char *vector,
unsigned long esr)
{
- arm64_enter_nmi(regs);
+ irqentry_nmi_enter(regs);
console_verbose();
@@ -344,7 +169,7 @@ static DEFINE_PER_CPU(int, __in_cortex_a76_erratum_1463225_wa);
static void cortex_a76_erratum_1463225_svc_handler(void)
{
- u32 reg, val;
+ u64 reg, val;
if (!unlikely(test_thread_flag(TIF_SINGLESTEP)))
return;
@@ -354,7 +179,7 @@ static void cortex_a76_erratum_1463225_svc_handler(void)
__this_cpu_write(__in_cortex_a76_erratum_1463225_wa, 1);
reg = read_sysreg(mdscr_el1);
- val = reg | DBG_MDSCR_SS | DBG_MDSCR_KDE;
+ val = reg | MDSCR_EL1_SS | MDSCR_EL1_KDE;
write_sysreg(val, mdscr_el1);
asm volatile("msr daifclr, #8");
isb();
@@ -393,20 +218,16 @@ static bool cortex_a76_erratum_1463225_debug_handler(struct pt_regs *regs)
* As per the ABI exit SME streaming mode and clear the SVE state not
* shared with FPSIMD on syscall entry.
*/
-static inline void fp_user_discard(void)
+static inline void fpsimd_syscall_enter(void)
{
- /*
- * If SME is active then exit streaming mode. If ZA is active
- * then flush the SVE registers but leave userspace access to
- * both SVE and SME enabled, otherwise disable SME for the
- * task and fall through to disabling SVE too. This means
- * that after a syscall we never have any streaming mode
- * register state to track, if this changes the KVM code will
- * need updating.
- */
+ /* Ensure PSTATE.SM is clear, but leave PSTATE.ZA as-is. */
if (system_supports_sme())
sme_smstop_sm();
+ /*
+ * The CPU is not in streaming mode. If non-streaming SVE is not
+ * supported, there is no SVE state that needs to be discarded.
+ */
if (!system_supports_sve())
return;
@@ -416,8 +237,57 @@ static inline void fp_user_discard(void)
sve_vq_minus_one = sve_vq_from_vl(task_get_sve_vl(current)) - 1;
sve_flush_live(true, sve_vq_minus_one);
}
+
+ /*
+ * Any live non-FPSIMD SVE state has been zeroed. Allow
+ * fpsimd_save_user_state() to lazily discard SVE state until either
+ * the live state is unbound or fpsimd_syscall_exit() is called.
+ */
+ __this_cpu_write(fpsimd_last_state.to_save, FP_STATE_FPSIMD);
+}
+
+static __always_inline void fpsimd_syscall_exit(void)
+{
+ if (!system_supports_sve())
+ return;
+
+ /*
+ * The current task's user FPSIMD/SVE/SME state is now bound to this
+ * CPU. The fpsimd_last_state.to_save value is either:
+ *
+ * - FP_STATE_FPSIMD, if the state has not been reloaded on this CPU
+ * since fpsimd_syscall_enter().
+ *
+ * - FP_STATE_CURRENT, if the state has been reloaded on this CPU at
+ * any point.
+ *
+ * Reset this to FP_STATE_CURRENT to stop lazy discarding.
+ */
+ __this_cpu_write(fpsimd_last_state.to_save, FP_STATE_CURRENT);
}
+/*
+ * In debug exception context, we explicitly disable preemption despite
+ * having interrupts disabled.
+ * This serves two purposes: it makes it much less likely that we would
+ * accidentally schedule in exception context and it will force a warning
+ * if we somehow manage to schedule by accident.
+ */
+static void debug_exception_enter(struct pt_regs *regs)
+{
+ preempt_disable();
+
+ /* This code is a bit fragile. Test it. */
+ RCU_LOCKDEP_WARN(!rcu_is_watching(), "exception_enter didn't work");
+}
+NOKPROBE_SYMBOL(debug_exception_enter);
+
+static void debug_exception_exit(struct pt_regs *regs)
+{
+ preempt_enable_no_resched();
+}
+NOKPROBE_SYMBOL(debug_exception_exit);
+
UNHANDLED(el1t, 64, sync)
UNHANDLED(el1t, 64, irq)
UNHANDLED(el1t, 64, fiq)
@@ -426,60 +296,135 @@ UNHANDLED(el1t, 64, error)
static void noinstr el1_abort(struct pt_regs *regs, unsigned long esr)
{
unsigned long far = read_sysreg(far_el1);
+ irqentry_state_t state;
- enter_from_kernel_mode(regs);
+ state = enter_from_kernel_mode(regs);
local_daif_inherit(regs);
do_mem_abort(far, esr, regs);
local_daif_mask();
- exit_to_kernel_mode(regs);
+ exit_to_kernel_mode(regs, state);
}
static void noinstr el1_pc(struct pt_regs *regs, unsigned long esr)
{
unsigned long far = read_sysreg(far_el1);
+ irqentry_state_t state;
- enter_from_kernel_mode(regs);
+ state = enter_from_kernel_mode(regs);
local_daif_inherit(regs);
do_sp_pc_abort(far, esr, regs);
local_daif_mask();
- exit_to_kernel_mode(regs);
+ exit_to_kernel_mode(regs, state);
}
static void noinstr el1_undef(struct pt_regs *regs, unsigned long esr)
{
- enter_from_kernel_mode(regs);
+ irqentry_state_t state;
+
+ state = enter_from_kernel_mode(regs);
local_daif_inherit(regs);
do_el1_undef(regs, esr);
local_daif_mask();
- exit_to_kernel_mode(regs);
+ exit_to_kernel_mode(regs, state);
}
static void noinstr el1_bti(struct pt_regs *regs, unsigned long esr)
{
- enter_from_kernel_mode(regs);
+ irqentry_state_t state;
+
+ state = enter_from_kernel_mode(regs);
local_daif_inherit(regs);
do_el1_bti(regs, esr);
local_daif_mask();
- exit_to_kernel_mode(regs);
+ exit_to_kernel_mode(regs, state);
+}
+
+static void noinstr el1_gcs(struct pt_regs *regs, unsigned long esr)
+{
+ irqentry_state_t state;
+
+ state = enter_from_kernel_mode(regs);
+ local_daif_inherit(regs);
+ do_el1_gcs(regs, esr);
+ local_daif_mask();
+ exit_to_kernel_mode(regs, state);
+}
+
+static void noinstr el1_mops(struct pt_regs *regs, unsigned long esr)
+{
+ irqentry_state_t state;
+
+ state = enter_from_kernel_mode(regs);
+ local_daif_inherit(regs);
+ do_el1_mops(regs, esr);
+ local_daif_mask();
+ exit_to_kernel_mode(regs, state);
+}
+
+static void noinstr el1_breakpt(struct pt_regs *regs, unsigned long esr)
+{
+ irqentry_state_t state;
+
+ state = arm64_enter_el1_dbg(regs);
+ debug_exception_enter(regs);
+ do_breakpoint(esr, regs);
+ debug_exception_exit(regs);
+ arm64_exit_el1_dbg(regs, state);
+}
+
+static void noinstr el1_softstp(struct pt_regs *regs, unsigned long esr)
+{
+ irqentry_state_t state;
+
+ state = arm64_enter_el1_dbg(regs);
+ if (!cortex_a76_erratum_1463225_debug_handler(regs)) {
+ debug_exception_enter(regs);
+ /*
+ * After handling a breakpoint, we suspend the breakpoint
+ * and use single-step to move to the next instruction.
+ * If we are stepping a suspended breakpoint there's nothing more to do:
+ * the single-step is complete.
+ */
+ if (!try_step_suspended_breakpoints(regs))
+ do_el1_softstep(esr, regs);
+ debug_exception_exit(regs);
+ }
+ arm64_exit_el1_dbg(regs, state);
}
-static void noinstr el1_dbg(struct pt_regs *regs, unsigned long esr)
+static void noinstr el1_watchpt(struct pt_regs *regs, unsigned long esr)
{
+ /* Watchpoints are the only debug exception to write FAR_EL1 */
unsigned long far = read_sysreg(far_el1);
+ irqentry_state_t state;
- arm64_enter_el1_dbg(regs);
- if (!cortex_a76_erratum_1463225_debug_handler(regs))
- do_debug_exception(far, esr, regs);
- arm64_exit_el1_dbg(regs);
+ state = arm64_enter_el1_dbg(regs);
+ debug_exception_enter(regs);
+ do_watchpoint(far, esr, regs);
+ debug_exception_exit(regs);
+ arm64_exit_el1_dbg(regs, state);
+}
+
+static void noinstr el1_brk64(struct pt_regs *regs, unsigned long esr)
+{
+ irqentry_state_t state;
+
+ state = arm64_enter_el1_dbg(regs);
+ debug_exception_enter(regs);
+ do_el1_brk64(esr, regs);
+ debug_exception_exit(regs);
+ arm64_exit_el1_dbg(regs, state);
}
static void noinstr el1_fpac(struct pt_regs *regs, unsigned long esr)
{
- enter_from_kernel_mode(regs);
+ irqentry_state_t state;
+
+ state = enter_from_kernel_mode(regs);
local_daif_inherit(regs);
do_el1_fpac(regs, esr);
local_daif_mask();
- exit_to_kernel_mode(regs);
+ exit_to_kernel_mode(regs, state);
}
asmlinkage void noinstr el1h_64_sync_handler(struct pt_regs *regs)
@@ -505,11 +450,23 @@ asmlinkage void noinstr el1h_64_sync_handler(struct pt_regs *regs)
case ESR_ELx_EC_BTI:
el1_bti(regs, esr);
break;
+ case ESR_ELx_EC_GCS:
+ el1_gcs(regs, esr);
+ break;
+ case ESR_ELx_EC_MOPS:
+ el1_mops(regs, esr);
+ break;
case ESR_ELx_EC_BREAKPT_CUR:
+ el1_breakpt(regs, esr);
+ break;
case ESR_ELx_EC_SOFTSTP_CUR:
+ el1_softstp(regs, esr);
+ break;
case ESR_ELx_EC_WATCHPT_CUR:
+ el1_watchpt(regs, esr);
+ break;
case ESR_ELx_EC_BRK64:
- el1_dbg(regs, esr);
+ el1_brk64(regs, esr);
break;
case ESR_ELx_EC_FPAC:
el1_fpac(regs, esr);
@@ -522,30 +479,32 @@ asmlinkage void noinstr el1h_64_sync_handler(struct pt_regs *regs)
static __always_inline void __el1_pnmi(struct pt_regs *regs,
void (*handler)(struct pt_regs *))
{
- arm64_enter_nmi(regs);
+ irqentry_state_t state;
+
+ state = irqentry_nmi_enter(regs);
do_interrupt_handler(regs, handler);
- arm64_exit_nmi(regs);
+ irqentry_nmi_exit(regs, state);
}
static __always_inline void __el1_irq(struct pt_regs *regs,
void (*handler)(struct pt_regs *))
{
- enter_from_kernel_mode(regs);
+ irqentry_state_t state;
+
+ state = enter_from_kernel_mode(regs);
irq_enter_rcu();
do_interrupt_handler(regs, handler);
irq_exit_rcu();
- arm64_preempt_schedule_irq();
-
- exit_to_kernel_mode(regs);
+ exit_to_kernel_mode(regs, state);
}
static void noinstr el1_interrupt(struct pt_regs *regs,
void (*handler)(struct pt_regs *))
{
write_sysreg(DAIF_PROCCTX_NOIRQ, daif);
- if (IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI) && !interrupts_enabled(regs))
+ if (IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI) && regs_irqs_disabled(regs))
__el1_pnmi(regs, handler);
else
__el1_irq(regs, handler);
@@ -564,21 +523,22 @@ asmlinkage void noinstr el1h_64_fiq_handler(struct pt_regs *regs)
asmlinkage void noinstr el1h_64_error_handler(struct pt_regs *regs)
{
unsigned long esr = read_sysreg(esr_el1);
+ irqentry_state_t state;
local_daif_restore(DAIF_ERRCTX);
- arm64_enter_nmi(regs);
+ state = irqentry_nmi_enter(regs);
do_serror(regs, esr);
- arm64_exit_nmi(regs);
+ irqentry_nmi_exit(regs, state);
}
static void noinstr el0_da(struct pt_regs *regs, unsigned long esr)
{
unsigned long far = read_sysreg(far_el1);
- enter_from_user_mode(regs);
+ arm64_enter_from_user_mode(regs);
local_daif_restore(DAIF_PROCCTX);
do_mem_abort(far, esr, regs);
- exit_to_user_mode(regs);
+ arm64_exit_to_user_mode(regs);
}
static void noinstr el0_ia(struct pt_regs *regs, unsigned long esr)
@@ -593,50 +553,50 @@ static void noinstr el0_ia(struct pt_regs *regs, unsigned long esr)
if (!is_ttbr0_addr(far))
arm64_apply_bp_hardening();
- enter_from_user_mode(regs);
+ arm64_enter_from_user_mode(regs);
local_daif_restore(DAIF_PROCCTX);
do_mem_abort(far, esr, regs);
- exit_to_user_mode(regs);
+ arm64_exit_to_user_mode(regs);
}
static void noinstr el0_fpsimd_acc(struct pt_regs *regs, unsigned long esr)
{
- enter_from_user_mode(regs);
+ arm64_enter_from_user_mode(regs);
local_daif_restore(DAIF_PROCCTX);
do_fpsimd_acc(esr, regs);
- exit_to_user_mode(regs);
+ arm64_exit_to_user_mode(regs);
}
static void noinstr el0_sve_acc(struct pt_regs *regs, unsigned long esr)
{
- enter_from_user_mode(regs);
+ arm64_enter_from_user_mode(regs);
local_daif_restore(DAIF_PROCCTX);
do_sve_acc(esr, regs);
- exit_to_user_mode(regs);
+ arm64_exit_to_user_mode(regs);
}
static void noinstr el0_sme_acc(struct pt_regs *regs, unsigned long esr)
{
- enter_from_user_mode(regs);
+ arm64_enter_from_user_mode(regs);
local_daif_restore(DAIF_PROCCTX);
do_sme_acc(esr, regs);
- exit_to_user_mode(regs);
+ arm64_exit_to_user_mode(regs);
}
static void noinstr el0_fpsimd_exc(struct pt_regs *regs, unsigned long esr)
{
- enter_from_user_mode(regs);
+ arm64_enter_from_user_mode(regs);
local_daif_restore(DAIF_PROCCTX);
do_fpsimd_exc(esr, regs);
- exit_to_user_mode(regs);
+ arm64_exit_to_user_mode(regs);
}
static void noinstr el0_sys(struct pt_regs *regs, unsigned long esr)
{
- enter_from_user_mode(regs);
+ arm64_enter_from_user_mode(regs);
local_daif_restore(DAIF_PROCCTX);
do_el0_sys(esr, regs);
- exit_to_user_mode(regs);
+ arm64_exit_to_user_mode(regs);
}
static void noinstr el0_pc(struct pt_regs *regs, unsigned long esr)
@@ -646,79 +606,132 @@ static void noinstr el0_pc(struct pt_regs *regs, unsigned long esr)
if (!is_ttbr0_addr(instruction_pointer(regs)))
arm64_apply_bp_hardening();
- enter_from_user_mode(regs);
+ arm64_enter_from_user_mode(regs);
local_daif_restore(DAIF_PROCCTX);
do_sp_pc_abort(far, esr, regs);
- exit_to_user_mode(regs);
+ arm64_exit_to_user_mode(regs);
}
static void noinstr el0_sp(struct pt_regs *regs, unsigned long esr)
{
- enter_from_user_mode(regs);
+ arm64_enter_from_user_mode(regs);
local_daif_restore(DAIF_PROCCTX);
do_sp_pc_abort(regs->sp, esr, regs);
- exit_to_user_mode(regs);
+ arm64_exit_to_user_mode(regs);
}
static void noinstr el0_undef(struct pt_regs *regs, unsigned long esr)
{
- enter_from_user_mode(regs);
+ arm64_enter_from_user_mode(regs);
local_daif_restore(DAIF_PROCCTX);
do_el0_undef(regs, esr);
- exit_to_user_mode(regs);
+ arm64_exit_to_user_mode(regs);
}
static void noinstr el0_bti(struct pt_regs *regs)
{
- enter_from_user_mode(regs);
+ arm64_enter_from_user_mode(regs);
local_daif_restore(DAIF_PROCCTX);
do_el0_bti(regs);
- exit_to_user_mode(regs);
+ arm64_exit_to_user_mode(regs);
}
static void noinstr el0_mops(struct pt_regs *regs, unsigned long esr)
{
- enter_from_user_mode(regs);
+ arm64_enter_from_user_mode(regs);
local_daif_restore(DAIF_PROCCTX);
do_el0_mops(regs, esr);
- exit_to_user_mode(regs);
+ arm64_exit_to_user_mode(regs);
+}
+
+static void noinstr el0_gcs(struct pt_regs *regs, unsigned long esr)
+{
+ arm64_enter_from_user_mode(regs);
+ local_daif_restore(DAIF_PROCCTX);
+ do_el0_gcs(regs, esr);
+ arm64_exit_to_user_mode(regs);
}
static void noinstr el0_inv(struct pt_regs *regs, unsigned long esr)
{
- enter_from_user_mode(regs);
+ arm64_enter_from_user_mode(regs);
local_daif_restore(DAIF_PROCCTX);
bad_el0_sync(regs, 0, esr);
- exit_to_user_mode(regs);
+ arm64_exit_to_user_mode(regs);
+}
+
+static void noinstr el0_breakpt(struct pt_regs *regs, unsigned long esr)
+{
+ if (!is_ttbr0_addr(regs->pc))
+ arm64_apply_bp_hardening();
+
+ arm64_enter_from_user_mode(regs);
+ debug_exception_enter(regs);
+ do_breakpoint(esr, regs);
+ debug_exception_exit(regs);
+ local_daif_restore(DAIF_PROCCTX);
+ arm64_exit_to_user_mode(regs);
+}
+
+static void noinstr el0_softstp(struct pt_regs *regs, unsigned long esr)
+{
+ bool step_done;
+
+ if (!is_ttbr0_addr(regs->pc))
+ arm64_apply_bp_hardening();
+
+ arm64_enter_from_user_mode(regs);
+ /*
+ * After handling a breakpoint, we suspend the breakpoint
+ * and use single-step to move to the next instruction.
+ * If we are stepping a suspended breakpoint there's nothing more to do:
+ * the single-step is complete.
+ */
+ step_done = try_step_suspended_breakpoints(regs);
+ local_daif_restore(DAIF_PROCCTX);
+ if (!step_done)
+ do_el0_softstep(esr, regs);
+ arm64_exit_to_user_mode(regs);
}
-static void noinstr el0_dbg(struct pt_regs *regs, unsigned long esr)
+static void noinstr el0_watchpt(struct pt_regs *regs, unsigned long esr)
{
- /* Only watchpoints write FAR_EL1, otherwise its UNKNOWN */
+ /* Watchpoints are the only debug exception to write FAR_EL1 */
unsigned long far = read_sysreg(far_el1);
- enter_from_user_mode(regs);
- do_debug_exception(far, esr, regs);
+ arm64_enter_from_user_mode(regs);
+ debug_exception_enter(regs);
+ do_watchpoint(far, esr, regs);
+ debug_exception_exit(regs);
local_daif_restore(DAIF_PROCCTX);
- exit_to_user_mode(regs);
+ arm64_exit_to_user_mode(regs);
+}
+
+static void noinstr el0_brk64(struct pt_regs *regs, unsigned long esr)
+{
+ arm64_enter_from_user_mode(regs);
+ local_daif_restore(DAIF_PROCCTX);
+ do_el0_brk64(esr, regs);
+ arm64_exit_to_user_mode(regs);
}
static void noinstr el0_svc(struct pt_regs *regs)
{
- enter_from_user_mode(regs);
+ arm64_enter_from_user_mode(regs);
cortex_a76_erratum_1463225_svc_handler();
- fp_user_discard();
+ fpsimd_syscall_enter();
local_daif_restore(DAIF_PROCCTX);
do_el0_svc(regs);
- exit_to_user_mode(regs);
+ arm64_exit_to_user_mode(regs);
+ fpsimd_syscall_exit();
}
static void noinstr el0_fpac(struct pt_regs *regs, unsigned long esr)
{
- enter_from_user_mode(regs);
+ arm64_enter_from_user_mode(regs);
local_daif_restore(DAIF_PROCCTX);
do_el0_fpac(regs, esr);
- exit_to_user_mode(regs);
+ arm64_exit_to_user_mode(regs);
}
asmlinkage void noinstr el0t_64_sync_handler(struct pt_regs *regs)
@@ -766,11 +779,20 @@ asmlinkage void noinstr el0t_64_sync_handler(struct pt_regs *regs)
case ESR_ELx_EC_MOPS:
el0_mops(regs, esr);
break;
+ case ESR_ELx_EC_GCS:
+ el0_gcs(regs, esr);
+ break;
case ESR_ELx_EC_BREAKPT_LOW:
+ el0_breakpt(regs, esr);
+ break;
case ESR_ELx_EC_SOFTSTP_LOW:
+ el0_softstp(regs, esr);
+ break;
case ESR_ELx_EC_WATCHPT_LOW:
+ el0_watchpt(regs, esr);
+ break;
case ESR_ELx_EC_BRK64:
- el0_dbg(regs, esr);
+ el0_brk64(regs, esr);
break;
case ESR_ELx_EC_FPAC:
el0_fpac(regs, esr);
@@ -783,7 +805,7 @@ asmlinkage void noinstr el0t_64_sync_handler(struct pt_regs *regs)
static void noinstr el0_interrupt(struct pt_regs *regs,
void (*handler)(struct pt_regs *))
{
- enter_from_user_mode(regs);
+ arm64_enter_from_user_mode(regs);
write_sysreg(DAIF_PROCCTX_NOIRQ, daif);
@@ -794,7 +816,7 @@ static void noinstr el0_interrupt(struct pt_regs *regs,
do_interrupt_handler(regs, handler);
irq_exit_rcu();
- exit_to_user_mode(regs);
+ arm64_exit_to_user_mode(regs);
}
static void noinstr __el0_irq_handler_common(struct pt_regs *regs)
@@ -820,14 +842,15 @@ asmlinkage void noinstr el0t_64_fiq_handler(struct pt_regs *regs)
static void noinstr __el0_error_handler_common(struct pt_regs *regs)
{
unsigned long esr = read_sysreg(esr_el1);
+ irqentry_state_t state;
- enter_from_user_mode(regs);
+ arm64_enter_from_user_mode(regs);
local_daif_restore(DAIF_ERRCTX);
- arm64_enter_nmi(regs);
+ state = irqentry_nmi_enter(regs);
do_serror(regs, esr);
- arm64_exit_nmi(regs);
+ irqentry_nmi_exit(regs, state);
local_daif_restore(DAIF_PROCCTX);
- exit_to_user_mode(regs);
+ arm64_exit_to_user_mode(regs);
}
asmlinkage void noinstr el0t_64_error_handler(struct pt_regs *regs)
@@ -838,19 +861,27 @@ asmlinkage void noinstr el0t_64_error_handler(struct pt_regs *regs)
#ifdef CONFIG_COMPAT
static void noinstr el0_cp15(struct pt_regs *regs, unsigned long esr)
{
- enter_from_user_mode(regs);
+ arm64_enter_from_user_mode(regs);
local_daif_restore(DAIF_PROCCTX);
do_el0_cp15(esr, regs);
- exit_to_user_mode(regs);
+ arm64_exit_to_user_mode(regs);
}
static void noinstr el0_svc_compat(struct pt_regs *regs)
{
- enter_from_user_mode(regs);
+ arm64_enter_from_user_mode(regs);
cortex_a76_erratum_1463225_svc_handler();
local_daif_restore(DAIF_PROCCTX);
do_el0_svc_compat(regs);
- exit_to_user_mode(regs);
+ arm64_exit_to_user_mode(regs);
+}
+
+static void noinstr el0_bkpt32(struct pt_regs *regs, unsigned long esr)
+{
+ arm64_enter_from_user_mode(regs);
+ local_daif_restore(DAIF_PROCCTX);
+ do_bkpt32(esr, regs);
+ arm64_exit_to_user_mode(regs);
}
asmlinkage void noinstr el0t_32_sync_handler(struct pt_regs *regs)
@@ -887,10 +918,16 @@ asmlinkage void noinstr el0t_32_sync_handler(struct pt_regs *regs)
el0_cp15(regs, esr);
break;
case ESR_ELx_EC_BREAKPT_LOW:
+ el0_breakpt(regs, esr);
+ break;
case ESR_ELx_EC_SOFTSTP_LOW:
+ el0_softstp(regs, esr);
+ break;
case ESR_ELx_EC_WATCHPT_LOW:
+ el0_watchpt(regs, esr);
+ break;
case ESR_ELx_EC_BKPT32:
- el0_dbg(regs, esr);
+ el0_bkpt32(regs, esr);
break;
default:
el0_inv(regs, esr);
@@ -918,21 +955,20 @@ UNHANDLED(el0t, 32, fiq)
UNHANDLED(el0t, 32, error)
#endif /* CONFIG_COMPAT */
-#ifdef CONFIG_VMAP_STACK
asmlinkage void noinstr __noreturn handle_bad_stack(struct pt_regs *regs)
{
unsigned long esr = read_sysreg(esr_el1);
unsigned long far = read_sysreg(far_el1);
- arm64_enter_nmi(regs);
+ irqentry_nmi_enter(regs);
panic_bad_stack(regs, esr, far);
}
-#endif /* CONFIG_VMAP_STACK */
#ifdef CONFIG_ARM_SDE_INTERFACE
asmlinkage noinstr unsigned long
__sdei_handler(struct pt_regs *regs, struct sdei_registered_event *arg)
{
+ irqentry_state_t state;
unsigned long ret;
/*
@@ -957,9 +993,9 @@ __sdei_handler(struct pt_regs *regs, struct sdei_registered_event *arg)
else if (cpu_has_pan())
set_pstate_pan(0);
- arm64_enter_nmi(regs);
+ state = irqentry_nmi_enter(regs);
ret = do_sdei_event(regs, arg);
- arm64_exit_nmi(regs);
+ irqentry_nmi_exit(regs, state);
return ret;
}
diff --git a/arch/arm64/kernel/entry-ftrace.S b/arch/arm64/kernel/entry-ftrace.S
index f0c16640ef21..025140caafe7 100644
--- a/arch/arm64/kernel/entry-ftrace.S
+++ b/arch/arm64/kernel/entry-ftrace.S
@@ -94,7 +94,7 @@ SYM_CODE_START(ftrace_caller)
stp x29, x30, [sp, #FREGS_SIZE]
add x29, sp, #FREGS_SIZE
- /* Prepare arguments for the the tracer func */
+ /* Prepare arguments for the tracer func */
sub x0, x30, #AARCH64_INSN_SIZE // ip (callsite's BL insn)
mov x1, x9 // parent_ip (callsite's LR)
mov x3, sp // regs
@@ -329,24 +329,28 @@ SYM_FUNC_END(ftrace_stub_graph)
* @fp is checked against the value passed by ftrace_graph_caller().
*/
SYM_CODE_START(return_to_handler)
- /* save return value regs */
- sub sp, sp, #FGRET_REGS_SIZE
- stp x0, x1, [sp, #FGRET_REGS_X0]
- stp x2, x3, [sp, #FGRET_REGS_X2]
- stp x4, x5, [sp, #FGRET_REGS_X4]
- stp x6, x7, [sp, #FGRET_REGS_X6]
- str x29, [sp, #FGRET_REGS_FP] // parent's fp
+ /* Make room for ftrace_regs */
+ sub sp, sp, #FREGS_SIZE
+
+ /* Save return value regs */
+ stp x0, x1, [sp, #FREGS_X0]
+ stp x2, x3, [sp, #FREGS_X2]
+ stp x4, x5, [sp, #FREGS_X4]
+ stp x6, x7, [sp, #FREGS_X6]
+
+ /* Save the callsite's FP */
+ str x29, [sp, #FREGS_FP]
mov x0, sp
- bl ftrace_return_to_handler // addr = ftrace_return_to_hander(regs);
+ bl ftrace_return_to_handler // addr = ftrace_return_to_hander(fregs);
mov x30, x0 // restore the original return address
- /* restore return value regs */
- ldp x0, x1, [sp, #FGRET_REGS_X0]
- ldp x2, x3, [sp, #FGRET_REGS_X2]
- ldp x4, x5, [sp, #FGRET_REGS_X4]
- ldp x6, x7, [sp, #FGRET_REGS_X6]
- add sp, sp, #FGRET_REGS_SIZE
+ /* Restore return value regs */
+ ldp x0, x1, [sp, #FREGS_X0]
+ ldp x2, x3, [sp, #FREGS_X2]
+ ldp x4, x5, [sp, #FREGS_X4]
+ ldp x6, x7, [sp, #FREGS_X6]
+ add sp, sp, #FREGS_SIZE
ret
SYM_CODE_END(return_to_handler)
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 7ef0e127b149..f8018b5c1f9a 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -25,6 +25,7 @@
#include <asm/processor.h>
#include <asm/ptrace.h>
#include <asm/scs.h>
+#include <asm/stacktrace/frame.h>
#include <asm/thread_info.h>
#include <asm/asm-uaccess.h>
#include <asm/unistd.h>
@@ -54,7 +55,6 @@
.endif
sub sp, sp, #PT_REGS_SIZE
-#ifdef CONFIG_VMAP_STACK
/*
* Test whether the SP has overflowed, without corrupting a GPR.
* Task and IRQ stacks are aligned so that SP & (1 << THREAD_SHIFT)
@@ -96,7 +96,6 @@
/* We were already on the overflow stack. Restore sp/x0 and carry on. */
sub sp, sp, x0
mrs x0, tpidrro_el0
-#endif
b el\el\ht\()_\regsize\()_\label
.org .Lventry_start\@ + 128 // Did we overflow the ventry slot?
.endm
@@ -284,15 +283,16 @@ alternative_else_nop_endif
stp lr, x21, [sp, #S_LR]
/*
- * For exceptions from EL0, create a final frame record.
- * For exceptions from EL1, create a synthetic frame record so the
- * interrupted code shows up in the backtrace.
+ * Create a metadata frame record. The unwinder will use this to
+ * identify and unwind exception boundaries.
*/
- .if \el == 0
stp xzr, xzr, [sp, #S_STACKFRAME]
+ .if \el == 0
+ mov x0, #FRAME_META_TYPE_FINAL
.else
- stp x29, x22, [sp, #S_STACKFRAME]
+ mov x0, #FRAME_META_TYPE_PT_REGS
.endif
+ str x0, [sp, #S_STACKFRAME_TYPE]
add x29, sp, #S_STACKFRAME
#ifdef CONFIG_ARM64_SW_TTBR0_PAN
@@ -315,7 +315,7 @@ alternative_if_not ARM64_HAS_GIC_PRIO_MASKING
alternative_else_nop_endif
mrs_s x20, SYS_ICC_PMR_EL1
- str x20, [sp, #S_PMR_SAVE]
+ str w20, [sp, #S_PMR]
mov x20, #GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET
msr_s SYS_ICC_PMR_EL1, x20
@@ -342,7 +342,7 @@ alternative_if_not ARM64_HAS_GIC_PRIO_MASKING
b .Lskip_pmr_restore\@
alternative_else_nop_endif
- ldr x20, [sp, #S_PMR_SAVE]
+ ldr w20, [sp, #S_PMR]
msr_s SYS_ICC_PMR_EL1, x20
/* Ensure priority change is seen by redistributor */
@@ -538,7 +538,6 @@ SYM_CODE_START(vectors)
kernel_ventry 0, t, 32, error // Error 32-bit EL0
SYM_CODE_END(vectors)
-#ifdef CONFIG_VMAP_STACK
SYM_CODE_START_LOCAL(__bad_stack)
/*
* We detected an overflow in kernel_ventry, which switched to the
@@ -566,7 +565,6 @@ SYM_CODE_START_LOCAL(__bad_stack)
bl handle_bad_stack
ASM_BUG()
SYM_CODE_END(__bad_stack)
-#endif /* CONFIG_VMAP_STACK */
.macro entry_handler el:req, ht:req, regsize:req, label:req
@@ -612,7 +610,7 @@ SYM_CODE_END(ret_to_kernel)
SYM_CODE_START_LOCAL(ret_to_user)
ldr x19, [tsk, #TSK_TI_FLAGS] // re-check for single-step
enable_step_tsk x19, x2
-#ifdef CONFIG_GCC_PLUGIN_STACKLEAK
+#ifdef CONFIG_KSTACK_ERASE
bl stackleak_erase_on_task_stack
#endif
kernel_exit 0
@@ -823,6 +821,7 @@ SYM_CODE_END(__bp_harden_el1_vectors)
*
*/
SYM_FUNC_START(cpu_switch_to)
+ save_and_disable_daif x11
mov x10, #THREAD_CPU_CONTEXT
add x8, x0, x10
mov x9, sp
@@ -846,6 +845,7 @@ SYM_FUNC_START(cpu_switch_to)
ptrauth_keys_install_kernel x1, x8, x9, x10
scs_save x0
scs_load_current
+ restore_irq x11
ret
SYM_FUNC_END(cpu_switch_to)
NOKPROBE(cpu_switch_to)
@@ -872,6 +872,7 @@ NOKPROBE(ret_from_fork)
* Calls func(regs) using this CPU's irq stack and shadow irq stack.
*/
SYM_FUNC_START(call_on_irq_stack)
+ save_and_disable_daif x9
#ifdef CONFIG_SHADOW_CALL_STACK
get_current_task x16
scs_save x16
@@ -886,8 +887,10 @@ SYM_FUNC_START(call_on_irq_stack)
/* Move to the new stack and call the function there */
add sp, x16, #IRQ_STACK_SIZE
+ restore_irq x9
blr x1
+ save_and_disable_daif x9
/*
* Restore the SP from the FP, and restore the FP and LR from the frame
* record.
@@ -895,6 +898,7 @@ SYM_FUNC_START(call_on_irq_stack)
mov sp, x29
ldp x29, x30, [sp], #16
scs_load_current
+ restore_irq x9
ret
SYM_FUNC_END(call_on_irq_stack)
NOKPROBE(call_on_irq_stack)
@@ -1001,7 +1005,6 @@ SYM_CODE_START(__sdei_asm_handler)
1: adr_this_cpu dst=x5, sym=sdei_active_critical_event, tmp=x6
2: str x19, [x5]
-#ifdef CONFIG_VMAP_STACK
/*
* entry.S may have been using sp as a scratch register, find whether
* this is a normal or critical event and switch to the appropriate
@@ -1014,7 +1017,6 @@ SYM_CODE_START(__sdei_asm_handler)
2: mov x6, #SDEI_STACK_SIZE
add x5, x5, x6
mov sp, x5
-#endif
#ifdef CONFIG_SHADOW_CALL_STACK
/* Use a separate shadow call stack for normal and critical events */
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index 82e8a6017382..c154f72634e0 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -119,7 +119,7 @@
* whatever is in the FPSIMD registers is not saved to memory, but discarded.
*/
-static DEFINE_PER_CPU(struct cpu_fp_state, fpsimd_last_state);
+DEFINE_PER_CPU(struct cpu_fp_state, fpsimd_last_state);
__ro_after_init struct vl_info vl_info[ARM64_VEC_MAX] = {
#ifdef CONFIG_ARM64_SVE
@@ -180,12 +180,12 @@ static inline void set_sve_default_vl(int val)
set_default_vl(ARM64_VEC_SVE, val);
}
-static void __percpu *efi_sve_state;
+static u8 *efi_sve_state;
#else /* ! CONFIG_ARM64_SVE */
/* Dummy declaration for code that will be optimised out: */
-extern void __percpu *efi_sve_state;
+extern u8 *efi_sve_state;
#endif /* ! CONFIG_ARM64_SVE */
@@ -225,10 +225,21 @@ static void fpsimd_bind_task_to_cpu(void);
*/
static void get_cpu_fpsimd_context(void)
{
- if (!IS_ENABLED(CONFIG_PREEMPT_RT))
- local_bh_disable();
- else
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT)) {
+ /*
+ * The softirq subsystem lacks a true unmask/mask API, and
+ * re-enabling softirq processing using local_bh_enable() will
+ * not only unmask softirqs, it will also result in immediate
+ * delivery of any pending softirqs.
+ * This is undesirable when running with IRQs disabled, but in
+ * that case, there is no need to mask softirqs in the first
+ * place, so only bother doing so when IRQs are enabled.
+ */
+ if (!irqs_disabled())
+ local_bh_disable();
+ } else {
preempt_disable();
+ }
}
/*
@@ -240,10 +251,12 @@ static void get_cpu_fpsimd_context(void)
*/
static void put_cpu_fpsimd_context(void)
{
- if (!IS_ENABLED(CONFIG_PREEMPT_RT))
- local_bh_enable();
- else
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT)) {
+ if (!irqs_disabled())
+ local_bh_enable();
+ } else {
preempt_enable();
+ }
}
unsigned int task_get_vl(const struct task_struct *task, enum vec_type type)
@@ -359,20 +372,15 @@ static void task_fpsimd_load(void)
WARN_ON(preemptible());
WARN_ON(test_thread_flag(TIF_KERNEL_FPSTATE));
- if (system_supports_fpmr())
- write_sysreg_s(current->thread.uw.fpmr, SYS_FPMR);
-
if (system_supports_sve() || system_supports_sme()) {
switch (current->thread.fp_type) {
case FP_STATE_FPSIMD:
/* Stop tracking SVE for this task until next use. */
- if (test_and_clear_thread_flag(TIF_SVE))
- sve_user_disable();
+ clear_thread_flag(TIF_SVE);
break;
case FP_STATE_SVE:
- if (!thread_sm_enabled(&current->thread) &&
- !WARN_ON_ONCE(!test_and_set_thread_flag(TIF_SVE)))
- sve_user_enable();
+ if (!thread_sm_enabled(&current->thread))
+ WARN_ON_ONCE(!test_and_set_thread_flag(TIF_SVE));
if (test_thread_flag(TIF_SVE))
sve_set_vq(sve_vq_from_vl(task_get_sve_vl(current)) - 1);
@@ -386,7 +394,7 @@ static void task_fpsimd_load(void)
* fpsimd_save_user_state() or memory corruption, we
* should always record an explicit format
* when we save. We always at least have the
- * memory allocated for FPSMID registers so
+ * memory allocated for FPSIMD registers so
* try that and hope for the best.
*/
WARN_ON_ONCE(1);
@@ -413,6 +421,9 @@ static void task_fpsimd_load(void)
restore_ffr = system_supports_fa64();
}
+ if (system_supports_fpmr())
+ write_sysreg_s(current->thread.uw.fpmr, SYS_FPMR);
+
if (restore_sve_regs) {
WARN_ON_ONCE(current->thread.fp_type != FP_STATE_SVE);
sve_load_state(sve_pffr(&current->thread),
@@ -453,12 +464,15 @@ static void fpsimd_save_user_state(void)
*(last->fpmr) = read_sysreg_s(SYS_FPMR);
/*
- * If a task is in a syscall the ABI allows us to only
- * preserve the state shared with FPSIMD so don't bother
- * saving the full SVE state in that case.
+ * Save SVE state if it is live.
+ *
+ * The syscall ABI discards live SVE state at syscall entry. When
+ * entering a syscall, fpsimd_syscall_enter() sets to_save to
+ * FP_STATE_FPSIMD to allow the SVE state to be lazily discarded until
+ * either new SVE state is loaded+bound or fpsimd_syscall_exit() is
+ * called prior to a return to userspace.
*/
- if ((last->to_save == FP_STATE_CURRENT && test_thread_flag(TIF_SVE) &&
- !in_syscall(current_pt_regs())) ||
+ if ((last->to_save == FP_STATE_CURRENT && test_thread_flag(TIF_SVE)) ||
last->to_save == FP_STATE_SVE) {
save_sve_regs = true;
save_ffr = true;
@@ -535,7 +549,7 @@ static unsigned int find_supported_vector_length(enum vec_type type,
#if defined(CONFIG_ARM64_SVE) && defined(CONFIG_SYSCTL)
-static int vec_proc_do_default_vl(struct ctl_table *table, int write,
+static int vec_proc_do_default_vl(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
struct vl_info *info = table->extra1;
@@ -562,7 +576,7 @@ static int vec_proc_do_default_vl(struct ctl_table *table, int write,
return 0;
}
-static struct ctl_table sve_default_vl_table[] = {
+static const struct ctl_table sve_default_vl_table[] = {
{
.procname = "sve_default_vector_length",
.mode = 0644,
@@ -585,7 +599,7 @@ static int __init sve_sysctl_init(void) { return 0; }
#endif /* ! (CONFIG_ARM64_SVE && CONFIG_SYSCTL) */
#if defined(CONFIG_ARM64_SME) && defined(CONFIG_SYSCTL)
-static struct ctl_table sme_default_vl_table[] = {
+static const struct ctl_table sme_default_vl_table[] = {
{
.procname = "sme_default_vector_length",
.mode = 0644,
@@ -651,7 +665,7 @@ static void __fpsimd_to_sve(void *sst, struct user_fpsimd_state const *fst,
* task->thread.uw.fpsimd_state must be up to date before calling this
* function.
*/
-static void fpsimd_to_sve(struct task_struct *task)
+static inline void fpsimd_to_sve(struct task_struct *task)
{
unsigned int vq;
void *sst = task->thread.sve_state;
@@ -675,7 +689,7 @@ static void fpsimd_to_sve(struct task_struct *task)
* bytes of allocated kernel memory.
* task->thread.sve_state must be up to date before calling this function.
*/
-static void sve_to_fpsimd(struct task_struct *task)
+static inline void sve_to_fpsimd(struct task_struct *task)
{
unsigned int vq, vl;
void const *sst = task->thread.sve_state;
@@ -694,44 +708,39 @@ static void sve_to_fpsimd(struct task_struct *task)
}
}
-void cpu_enable_fpmr(const struct arm64_cpu_capabilities *__always_unused p)
+static inline void __fpsimd_zero_vregs(struct user_fpsimd_state *fpsimd)
{
- write_sysreg_s(read_sysreg_s(SYS_SCTLR_EL1) | SCTLR_EL1_EnFPM_MASK,
- SYS_SCTLR_EL1);
+ memset(&fpsimd->vregs, 0, sizeof(fpsimd->vregs));
}
-#ifdef CONFIG_ARM64_SVE
/*
- * Call __sve_free() directly only if you know task can't be scheduled
- * or preempted.
+ * Simulate the effects of an SMSTOP SM instruction.
*/
-static void __sve_free(struct task_struct *task)
+void task_smstop_sm(struct task_struct *task)
{
- kfree(task->thread.sve_state);
- task->thread.sve_state = NULL;
-}
+ if (!thread_sm_enabled(&task->thread))
+ return;
-static void sve_free(struct task_struct *task)
-{
- WARN_ON(test_tsk_thread_flag(task, TIF_SVE));
+ __fpsimd_zero_vregs(&task->thread.uw.fpsimd_state);
+ task->thread.uw.fpsimd_state.fpsr = 0x0800009f;
+ if (system_supports_fpmr())
+ task->thread.uw.fpmr = 0;
- __sve_free(task);
+ task->thread.svcr &= ~SVCR_SM_MASK;
+ task->thread.fp_type = FP_STATE_FPSIMD;
}
-/*
- * Return how many bytes of memory are required to store the full SVE
- * state for task, given task's currently configured vector length.
- */
-size_t sve_state_size(struct task_struct const *task)
+void cpu_enable_fpmr(const struct arm64_cpu_capabilities *__always_unused p)
{
- unsigned int vl = 0;
-
- if (system_supports_sve())
- vl = task_get_sve_vl(task);
- if (system_supports_sme())
- vl = max(vl, task_get_sme_vl(task));
+ write_sysreg_s(read_sysreg_s(SYS_SCTLR_EL1) | SCTLR_EL1_EnFPM_MASK,
+ SYS_SCTLR_EL1);
+}
- return SVE_SIG_REGS_SIZE(sve_vq_from_vl(vl));
+#ifdef CONFIG_ARM64_SVE
+static void sve_free(struct task_struct *task)
+{
+ kfree(task->thread.sve_state);
+ task->thread.sve_state = NULL;
}
/*
@@ -758,69 +767,34 @@ void sve_alloc(struct task_struct *task, bool flush)
kzalloc(sve_state_size(task), GFP_KERNEL);
}
-
-/*
- * Force the FPSIMD state shared with SVE to be updated in the SVE state
- * even if the SVE state is the current active state.
- *
- * This should only be called by ptrace. task must be non-runnable.
- * task->thread.sve_state must point to at least sve_state_size(task)
- * bytes of allocated kernel memory.
- */
-void fpsimd_force_sync_to_sve(struct task_struct *task)
-{
- fpsimd_to_sve(task);
-}
-
/*
- * Ensure that task->thread.sve_state is up to date with respect to
- * the user task, irrespective of when SVE is in use or not.
+ * Ensure that task->thread.uw.fpsimd_state is up to date with respect to the
+ * task's currently effective FPSIMD/SVE state.
*
- * This should only be called by ptrace. task must be non-runnable.
- * task->thread.sve_state must point to at least sve_state_size(task)
- * bytes of allocated kernel memory.
+ * The task's FPSIMD/SVE/SME state must not be subject to concurrent
+ * manipulation.
*/
-void fpsimd_sync_to_sve(struct task_struct *task)
-{
- if (!test_tsk_thread_flag(task, TIF_SVE) &&
- !thread_sm_enabled(&task->thread))
- fpsimd_to_sve(task);
-}
-
-/*
- * Ensure that task->thread.uw.fpsimd_state is up to date with respect to
- * the user task, irrespective of whether SVE is in use or not.
- *
- * This should only be called by ptrace. task must be non-runnable.
- * task->thread.sve_state must point to at least sve_state_size(task)
- * bytes of allocated kernel memory.
- */
-void sve_sync_to_fpsimd(struct task_struct *task)
+void fpsimd_sync_from_effective_state(struct task_struct *task)
{
if (task->thread.fp_type == FP_STATE_SVE)
sve_to_fpsimd(task);
}
/*
- * Ensure that task->thread.sve_state is up to date with respect to
- * the task->thread.uw.fpsimd_state.
+ * Ensure that the task's currently effective FPSIMD/SVE state is up to date
+ * with respect to task->thread.uw.fpsimd_state, zeroing any effective
+ * non-FPSIMD (S)SVE state.
*
- * This should only be called by ptrace to merge new FPSIMD register
- * values into a task for which SVE is currently active.
- * task must be non-runnable.
- * task->thread.sve_state must point to at least sve_state_size(task)
- * bytes of allocated kernel memory.
- * task->thread.uw.fpsimd_state must already have been initialised with
- * the new FPSIMD register values to be merged in.
+ * The task's FPSIMD/SVE/SME state must not be subject to concurrent
+ * manipulation.
*/
-void sve_sync_from_fpsimd_zeropad(struct task_struct *task)
+void fpsimd_sync_to_effective_state_zeropad(struct task_struct *task)
{
unsigned int vq;
void *sst = task->thread.sve_state;
struct user_fpsimd_state const *fst = &task->thread.uw.fpsimd_state;
- if (!test_tsk_thread_flag(task, TIF_SVE) &&
- !thread_sm_enabled(&task->thread))
+ if (task->thread.fp_type != FP_STATE_SVE)
return;
vq = sve_vq_from_vl(thread_get_cur_vl(&task->thread));
@@ -829,10 +803,73 @@ void sve_sync_from_fpsimd_zeropad(struct task_struct *task)
__fpsimd_to_sve(sst, fst, vq);
}
+static int change_live_vector_length(struct task_struct *task,
+ enum vec_type type,
+ unsigned long vl)
+{
+ unsigned int sve_vl = task_get_sve_vl(task);
+ unsigned int sme_vl = task_get_sme_vl(task);
+ void *sve_state = NULL, *sme_state = NULL;
+
+ if (type == ARM64_VEC_SME)
+ sme_vl = vl;
+ else
+ sve_vl = vl;
+
+ /*
+ * Allocate the new sve_state and sme_state before freeing the old
+ * copies so that allocation failure can be handled without needing to
+ * mutate the task's state in any way.
+ *
+ * Changes to the SVE vector length must not discard live ZA state or
+ * clear PSTATE.ZA, as userspace code which is unaware of the AAPCS64
+ * ZA lazy saving scheme may attempt to change the SVE vector length
+ * while unsaved/dormant ZA state exists.
+ */
+ sve_state = kzalloc(__sve_state_size(sve_vl, sme_vl), GFP_KERNEL);
+ if (!sve_state)
+ goto out_mem;
+
+ if (type == ARM64_VEC_SME) {
+ sme_state = kzalloc(__sme_state_size(sme_vl), GFP_KERNEL);
+ if (!sme_state)
+ goto out_mem;
+ }
+
+ if (task == current)
+ fpsimd_save_and_flush_current_state();
+ else
+ fpsimd_flush_task_state(task);
+
+ /*
+ * Always preserve PSTATE.SM and the effective FPSIMD state, zeroing
+ * other SVE state.
+ */
+ fpsimd_sync_from_effective_state(task);
+ task_set_vl(task, type, vl);
+ kfree(task->thread.sve_state);
+ task->thread.sve_state = sve_state;
+ fpsimd_sync_to_effective_state_zeropad(task);
+
+ if (type == ARM64_VEC_SME) {
+ task->thread.svcr &= ~SVCR_ZA_MASK;
+ kfree(task->thread.sme_state);
+ task->thread.sme_state = sme_state;
+ }
+
+ return 0;
+
+out_mem:
+ kfree(sve_state);
+ kfree(sme_state);
+ return -ENOMEM;
+}
+
int vec_set_vector_length(struct task_struct *task, enum vec_type type,
unsigned long vl, unsigned long flags)
{
- bool free_sme = false;
+ bool onexec = flags & PR_SVE_SET_VL_ONEXEC;
+ bool inherit = flags & PR_SVE_VL_INHERIT;
if (flags & ~(unsigned long)(PR_SVE_VL_INHERIT |
PR_SVE_SET_VL_ONEXEC))
@@ -852,71 +889,17 @@ int vec_set_vector_length(struct task_struct *task, enum vec_type type,
vl = find_supported_vector_length(type, vl);
- if (flags & (PR_SVE_VL_INHERIT |
- PR_SVE_SET_VL_ONEXEC))
+ if (!onexec && vl != task_get_vl(task, type)) {
+ if (change_live_vector_length(task, type, vl))
+ return -ENOMEM;
+ }
+
+ if (onexec || inherit)
task_set_vl_onexec(task, type, vl);
else
/* Reset VL to system default on next exec: */
task_set_vl_onexec(task, type, 0);
- /* Only actually set the VL if not deferred: */
- if (flags & PR_SVE_SET_VL_ONEXEC)
- goto out;
-
- if (vl == task_get_vl(task, type))
- goto out;
-
- /*
- * To ensure the FPSIMD bits of the SVE vector registers are preserved,
- * write any live register state back to task_struct, and convert to a
- * regular FPSIMD thread.
- */
- if (task == current) {
- get_cpu_fpsimd_context();
-
- fpsimd_save_user_state();
- }
-
- fpsimd_flush_task_state(task);
- if (test_and_clear_tsk_thread_flag(task, TIF_SVE) ||
- thread_sm_enabled(&task->thread)) {
- sve_to_fpsimd(task);
- task->thread.fp_type = FP_STATE_FPSIMD;
- }
-
- if (system_supports_sme()) {
- if (type == ARM64_VEC_SME ||
- !(task->thread.svcr & (SVCR_SM_MASK | SVCR_ZA_MASK))) {
- /*
- * We are changing the SME VL or weren't using
- * SME anyway, discard the state and force a
- * reallocation.
- */
- task->thread.svcr &= ~(SVCR_SM_MASK |
- SVCR_ZA_MASK);
- clear_tsk_thread_flag(task, TIF_SME);
- free_sme = true;
- }
- }
-
- if (task == current)
- put_cpu_fpsimd_context();
-
- task_set_vl(task, type, vl);
-
- /*
- * Free the changed states if they are not in use, SME will be
- * reallocated to the correct size on next use and we just
- * allocate SVE now in case it is needed for use in streaming
- * mode.
- */
- sve_free(task);
- sve_alloc(task, true);
-
- if (free_sme)
- sme_free(task);
-
-out:
update_tsk_thread_flag(task, vec_vl_inherit_flag(type),
flags & PR_SVE_VL_INHERIT);
@@ -1131,15 +1114,15 @@ static void __init sve_efi_setup(void)
if (!sve_vl_valid(max_vl))
goto fail;
- efi_sve_state = __alloc_percpu(
- SVE_SIG_REGS_SIZE(sve_vq_from_vl(max_vl)), SVE_VQ_BYTES);
+ efi_sve_state = kmalloc(SVE_SIG_REGS_SIZE(sve_vq_from_vl(max_vl)),
+ GFP_KERNEL);
if (!efi_sve_state)
goto fail;
return;
fail:
- panic("Cannot allocate percpu memory for EFI SVE save/restore");
+ panic("Cannot allocate memory for EFI SVE save/restore");
}
void cpu_enable_sve(const struct arm64_cpu_capabilities *__always_unused p)
@@ -1212,7 +1195,7 @@ void __init sve_setup(void)
*/
void fpsimd_release_task(struct task_struct *dead_task)
{
- __sve_free(dead_task);
+ sve_free(dead_task);
sme_free(dead_task);
}
@@ -1295,6 +1278,8 @@ void __init sme_setup(void)
if (!system_supports_sme())
return;
+ min_bit = find_last_bit(info->vq_map, SVE_VQ_MAX);
+
/*
* SME doesn't require any particular vector length be
* supported but it does require at least one. We should have
@@ -1302,9 +1287,8 @@ void __init sme_setup(void)
* let's double check here. The bitmap is SVE_VQ_MAP sized for
* sharing with SVE.
*/
- WARN_ON(bitmap_empty(info->vq_map, SVE_VQ_MAX));
+ WARN_ON(min_bit >= SVE_VQ_MAX);
- min_bit = find_last_bit(info->vq_map, SVE_VQ_MAX);
info->min_vl = sve_vl_from_vq(__bit_to_vq(min_bit));
max_bit = find_first_bit(info->vq_map, SVE_VQ_MAX);
@@ -1367,6 +1351,7 @@ static void sve_init_regs(void)
} else {
fpsimd_to_sve(current);
current->thread.fp_type = FP_STATE_SVE;
+ fpsimd_flush_task_state(current);
}
}
@@ -1435,7 +1420,7 @@ void do_sme_acc(unsigned long esr, struct pt_regs *regs)
* If this not a trap due to SME being disabled then something
* is being used in the wrong mode, report as SIGILL.
*/
- if (ESR_ELx_ISS(esr) != ESR_ELx_SME_ISS_SME_DISABLED) {
+ if (ESR_ELx_SME_ISS_SMTC(esr) != ESR_ELx_SME_ISS_SMTC_SME_DISABLED) {
force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc, 0);
return;
}
@@ -1459,6 +1444,8 @@ void do_sme_acc(unsigned long esr, struct pt_regs *regs)
sme_set_vq(vq_minus_one);
fpsimd_bind_task_to_cpu();
+ } else {
+ fpsimd_flush_task_state(current);
}
put_cpu_fpsimd_context();
@@ -1515,21 +1502,23 @@ static void fpsimd_load_kernel_state(struct task_struct *task)
* Elide the load if this CPU holds the most recent kernel mode
* FPSIMD context of the current task.
*/
- if (last->st == &task->thread.kernel_fpsimd_state &&
+ if (last->st == task->thread.kernel_fpsimd_state &&
task->thread.kernel_fpsimd_cpu == smp_processor_id())
return;
- fpsimd_load_state(&task->thread.kernel_fpsimd_state);
+ fpsimd_load_state(task->thread.kernel_fpsimd_state);
}
static void fpsimd_save_kernel_state(struct task_struct *task)
{
struct cpu_fp_state cpu_fp_state = {
- .st = &task->thread.kernel_fpsimd_state,
+ .st = task->thread.kernel_fpsimd_state,
.to_save = FP_STATE_FPSIMD,
};
- fpsimd_save_state(&task->thread.kernel_fpsimd_state);
+ BUG_ON(!cpu_fp_state.st);
+
+ fpsimd_save_state(task->thread.kernel_fpsimd_state);
fpsimd_bind_state_to_cpu(&cpu_fp_state);
task->thread.kernel_fpsimd_cpu = smp_processor_id();
@@ -1572,8 +1561,8 @@ void fpsimd_thread_switch(struct task_struct *next)
fpsimd_save_user_state();
if (test_tsk_thread_flag(next, TIF_KERNEL_FPSTATE)) {
- fpsimd_load_kernel_state(next);
fpsimd_flush_cpu_state();
+ fpsimd_load_kernel_state(next);
} else {
/*
* Fix up TIF_FOREIGN_FPSTATE to correctly describe next's
@@ -1660,6 +1649,9 @@ void fpsimd_flush_thread(void)
current->thread.svcr = 0;
}
+ if (system_supports_fpmr())
+ current->thread.uw.fpmr = 0;
+
current->thread.fp_type = FP_STATE_FPSIMD;
put_cpu_fpsimd_context();
@@ -1682,43 +1674,6 @@ void fpsimd_preserve_current_state(void)
}
/*
- * Like fpsimd_preserve_current_state(), but ensure that
- * current->thread.uw.fpsimd_state is updated so that it can be copied to
- * the signal frame.
- */
-void fpsimd_signal_preserve_current_state(void)
-{
- fpsimd_preserve_current_state();
- if (current->thread.fp_type == FP_STATE_SVE)
- sve_to_fpsimd(current);
-}
-
-/*
- * Called by KVM when entering the guest.
- */
-void fpsimd_kvm_prepare(void)
-{
- if (!system_supports_sve())
- return;
-
- /*
- * KVM does not save host SVE state since we can only enter
- * the guest from a syscall so the ABI means that only the
- * non-saved SVE state needs to be saved. If we have left
- * SVE enabled for performance reasons then update the task
- * state to be FPSIMD only.
- */
- get_cpu_fpsimd_context();
-
- if (test_and_clear_thread_flag(TIF_SVE)) {
- sve_to_fpsimd(current);
- current->thread.fp_type = FP_STATE_FPSIMD;
- }
-
- put_cpu_fpsimd_context();
-}
-
-/*
* Associate current's FPSIMD context with this cpu
* The caller must have ownership of the cpu FPSIMD context before calling
* this function.
@@ -1810,30 +1765,14 @@ void fpsimd_restore_current_state(void)
put_cpu_fpsimd_context();
}
-/*
- * Load an updated userland FPSIMD state for 'current' from memory and set the
- * flag that indicates that the FPSIMD register contents are the most recent
- * FPSIMD state of 'current'. This is used by the signal code to restore the
- * register state when returning from a signal handler in FPSIMD only cases,
- * any SVE context will be discarded.
- */
void fpsimd_update_current_state(struct user_fpsimd_state const *state)
{
if (WARN_ON(!system_supports_fpsimd()))
return;
- get_cpu_fpsimd_context();
-
current->thread.uw.fpsimd_state = *state;
- if (test_thread_flag(TIF_SVE))
+ if (current->thread.fp_type == FP_STATE_SVE)
fpsimd_to_sve(current);
-
- task_fpsimd_load();
- fpsimd_bind_task_to_cpu();
-
- clear_thread_flag(TIF_FOREIGN_FPSTATE);
-
- put_cpu_fpsimd_context();
}
/*
@@ -1850,6 +1789,7 @@ void fpsimd_update_current_state(struct user_fpsimd_state const *state)
void fpsimd_flush_task_state(struct task_struct *t)
{
t->thread.fpsimd_cpu = NR_CPUS;
+ t->thread.kernel_fpsimd_state = NULL;
/*
* If we don't support fpsimd, bail out after we have
* reset the fpsimd_cpu for this task and clear the
@@ -1863,6 +1803,17 @@ void fpsimd_flush_task_state(struct task_struct *t)
barrier();
}
+void fpsimd_save_and_flush_current_state(void)
+{
+ if (!system_supports_fpsimd())
+ return;
+
+ get_cpu_fpsimd_context();
+ fpsimd_save_user_state();
+ fpsimd_flush_task_state(current);
+ put_cpu_fpsimd_context();
+}
+
/*
* Save the FPSIMD state to memory and invalidate cpu view.
* This function must be called with preemption disabled.
@@ -1898,12 +1849,19 @@ void fpsimd_save_and_flush_cpu_state(void)
*
* The caller may freely use the FPSIMD registers until kernel_neon_end() is
* called.
+ *
+ * Unless called from non-preemptible task context, @state must point to a
+ * caller provided buffer that will be used to preserve the task's kernel mode
+ * FPSIMD context when it is scheduled out, or if it is interrupted by kernel
+ * mode FPSIMD occurring in softirq context. May be %NULL otherwise.
*/
-void kernel_neon_begin(void)
+void kernel_neon_begin(struct user_fpsimd_state *state)
{
if (WARN_ON(!system_supports_fpsimd()))
return;
+ WARN_ON((preemptible() || in_serving_softirq()) && !state);
+
BUG_ON(!may_use_simd());
get_cpu_fpsimd_context();
@@ -1911,7 +1869,7 @@ void kernel_neon_begin(void)
/* Save unsaved fpsimd state, if any: */
if (test_thread_flag(TIF_KERNEL_FPSTATE)) {
BUG_ON(IS_ENABLED(CONFIG_PREEMPT_RT) || !in_serving_softirq());
- fpsimd_save_kernel_state(current);
+ fpsimd_save_state(state);
} else {
fpsimd_save_user_state();
@@ -1932,8 +1890,16 @@ void kernel_neon_begin(void)
* mode in task context. So in this case, setting the flag here
* is always appropriate.
*/
- if (IS_ENABLED(CONFIG_PREEMPT_RT) || !in_serving_softirq())
+ if (IS_ENABLED(CONFIG_PREEMPT_RT) || !in_serving_softirq()) {
+ /*
+ * Record the caller provided buffer as the kernel mode
+ * FP/SIMD buffer for this task, so that the state can
+ * be preserved and restored on a context switch.
+ */
+ WARN_ON(current->thread.kernel_fpsimd_state != NULL);
+ current->thread.kernel_fpsimd_state = state;
set_thread_flag(TIF_KERNEL_FPSTATE);
+ }
}
/* Invalidate any task state remaining in the fpsimd regs: */
@@ -1951,31 +1917,39 @@ EXPORT_SYMBOL_GPL(kernel_neon_begin);
*
* The caller must not use the FPSIMD registers after this function is called,
* unless kernel_neon_begin() is called again in the meantime.
+ *
+ * The value of @state must match the value passed to the preceding call to
+ * kernel_neon_begin().
*/
-void kernel_neon_end(void)
+void kernel_neon_end(struct user_fpsimd_state *state)
{
if (!system_supports_fpsimd())
return;
+ if (!test_thread_flag(TIF_KERNEL_FPSTATE))
+ return;
+
/*
* If we are returning from a nested use of kernel mode FPSIMD, restore
* the task context kernel mode FPSIMD state. This can only happen when
* running in softirq context on non-PREEMPT_RT.
*/
- if (!IS_ENABLED(CONFIG_PREEMPT_RT) && in_serving_softirq() &&
- test_thread_flag(TIF_KERNEL_FPSTATE))
- fpsimd_load_kernel_state(current);
- else
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT) && in_serving_softirq()) {
+ fpsimd_load_state(state);
+ } else {
clear_thread_flag(TIF_KERNEL_FPSTATE);
+ WARN_ON(current->thread.kernel_fpsimd_state != state);
+ current->thread.kernel_fpsimd_state = NULL;
+ }
}
EXPORT_SYMBOL_GPL(kernel_neon_end);
#ifdef CONFIG_EFI
-static DEFINE_PER_CPU(struct user_fpsimd_state, efi_fpsimd_state);
-static DEFINE_PER_CPU(bool, efi_fpsimd_state_used);
-static DEFINE_PER_CPU(bool, efi_sve_state_used);
-static DEFINE_PER_CPU(bool, efi_sm_state);
+static struct user_fpsimd_state efi_fpsimd_state;
+static bool efi_fpsimd_state_used;
+static bool efi_sve_state_used;
+static bool efi_sm_state;
/*
* EFI runtime services support functions
@@ -1999,27 +1973,25 @@ void __efi_fpsimd_begin(void)
if (!system_supports_fpsimd())
return;
- WARN_ON(preemptible());
-
if (may_use_simd()) {
- kernel_neon_begin();
+ kernel_neon_begin(&efi_fpsimd_state);
} else {
+ WARN_ON(preemptible());
+
/*
* If !efi_sve_state, SVE can't be in use yet and doesn't need
* preserving:
*/
- if (system_supports_sve() && likely(efi_sve_state)) {
- char *sve_state = this_cpu_ptr(efi_sve_state);
+ if (system_supports_sve() && efi_sve_state != NULL) {
bool ffr = true;
u64 svcr;
- __this_cpu_write(efi_sve_state_used, true);
+ efi_sve_state_used = true;
if (system_supports_sme()) {
svcr = read_sysreg_s(SYS_SVCR);
- __this_cpu_write(efi_sm_state,
- svcr & SVCR_SM_MASK);
+ efi_sm_state = svcr & SVCR_SM_MASK;
/*
* Unless we have FA64 FFR does not
@@ -2029,19 +2001,18 @@ void __efi_fpsimd_begin(void)
ffr = !(svcr & SVCR_SM_MASK);
}
- sve_save_state(sve_state + sve_ffr_offset(sve_max_vl()),
- &this_cpu_ptr(&efi_fpsimd_state)->fpsr,
- ffr);
+ sve_save_state(efi_sve_state + sve_ffr_offset(sve_max_vl()),
+ &efi_fpsimd_state.fpsr, ffr);
if (system_supports_sme())
sysreg_clear_set_s(SYS_SVCR,
SVCR_SM_MASK, 0);
} else {
- fpsimd_save_state(this_cpu_ptr(&efi_fpsimd_state));
+ fpsimd_save_state(&efi_fpsimd_state);
}
- __this_cpu_write(efi_fpsimd_state_used, true);
+ efi_fpsimd_state_used = true;
}
}
@@ -2053,12 +2024,10 @@ void __efi_fpsimd_end(void)
if (!system_supports_fpsimd())
return;
- if (!__this_cpu_xchg(efi_fpsimd_state_used, false)) {
- kernel_neon_end();
+ if (!efi_fpsimd_state_used) {
+ kernel_neon_end(&efi_fpsimd_state);
} else {
- if (system_supports_sve() &&
- likely(__this_cpu_read(efi_sve_state_used))) {
- char const *sve_state = this_cpu_ptr(efi_sve_state);
+ if (system_supports_sve() && efi_sve_state_used) {
bool ffr = true;
/*
@@ -2067,7 +2036,7 @@ void __efi_fpsimd_end(void)
* streaming mode.
*/
if (system_supports_sme()) {
- if (__this_cpu_read(efi_sm_state)) {
+ if (efi_sm_state) {
sysreg_clear_set_s(SYS_SVCR,
0,
SVCR_SM_MASK);
@@ -2081,14 +2050,15 @@ void __efi_fpsimd_end(void)
}
}
- sve_load_state(sve_state + sve_ffr_offset(sve_max_vl()),
- &this_cpu_ptr(&efi_fpsimd_state)->fpsr,
- ffr);
+ sve_load_state(efi_sve_state + sve_ffr_offset(sve_max_vl()),
+ &efi_fpsimd_state.fpsr, ffr);
- __this_cpu_write(efi_sve_state_used, false);
+ efi_sve_state_used = false;
} else {
- fpsimd_load_state(this_cpu_ptr(&efi_fpsimd_state));
+ fpsimd_load_state(&efi_fpsimd_state);
}
+
+ efi_fpsimd_state_used = false;
}
}
diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c
index a650f5e11fc5..5a1554a44162 100644
--- a/arch/arm64/kernel/ftrace.c
+++ b/arch/arm64/kernel/ftrace.c
@@ -15,7 +15,7 @@
#include <asm/debug-monitors.h>
#include <asm/ftrace.h>
#include <asm/insn.h>
-#include <asm/patching.h>
+#include <asm/text-patching.h>
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_ARGS
struct fregs_offset {
@@ -23,10 +23,10 @@ struct fregs_offset {
int offset;
};
-#define FREGS_OFFSET(n, field) \
-{ \
- .name = n, \
- .offset = offsetof(struct ftrace_regs, field), \
+#define FREGS_OFFSET(n, field) \
+{ \
+ .name = n, \
+ .offset = offsetof(struct __arch_ftrace_regs, field), \
}
static const struct fregs_offset fregs_offsets[] = {
@@ -143,6 +143,69 @@ unsigned long ftrace_call_adjust(unsigned long addr)
return addr;
}
+/* Convert fentry_ip to the symbol address without kallsyms */
+unsigned long arch_ftrace_get_symaddr(unsigned long fentry_ip)
+{
+ u32 insn;
+
+ /*
+ * When using patchable-function-entry without pre-function NOPS, ftrace
+ * entry is the address of the first NOP after the function entry point.
+ *
+ * The compiler has either generated:
+ *
+ * func+00: func: NOP // To be patched to MOV X9, LR
+ * func+04: NOP // To be patched to BL <caller>
+ *
+ * Or:
+ *
+ * func-04: BTI C
+ * func+00: func: NOP // To be patched to MOV X9, LR
+ * func+04: NOP // To be patched to BL <caller>
+ *
+ * The fentry_ip is the address of `BL <caller>` which is at `func + 4`
+ * bytes in either case.
+ */
+ if (!IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS))
+ return fentry_ip - AARCH64_INSN_SIZE;
+
+ /*
+ * When using patchable-function-entry with pre-function NOPs, BTI is
+ * a bit different.
+ *
+ * func+00: func: NOP // To be patched to MOV X9, LR
+ * func+04: NOP // To be patched to BL <caller>
+ *
+ * Or:
+ *
+ * func+00: func: BTI C
+ * func+04: NOP // To be patched to MOV X9, LR
+ * func+08: NOP // To be patched to BL <caller>
+ *
+ * The fentry_ip is the address of `BL <caller>` which is at either
+ * `func + 4` or `func + 8` depends on whether there is a BTI.
+ */
+
+ /* If there is no BTI, the func address should be one instruction before. */
+ if (!IS_ENABLED(CONFIG_ARM64_BTI_KERNEL))
+ return fentry_ip - AARCH64_INSN_SIZE;
+
+ /* We want to be extra safe in case entry ip is on the page edge,
+ * but otherwise we need to avoid get_kernel_nofault()'s overhead.
+ */
+ if ((fentry_ip & ~PAGE_MASK) < AARCH64_INSN_SIZE * 2) {
+ if (get_kernel_nofault(insn, (u32 *)(fentry_ip - AARCH64_INSN_SIZE * 2)))
+ return 0;
+ } else {
+ insn = *(u32 *)(fentry_ip - AARCH64_INSN_SIZE * 2);
+ }
+
+ if (aarch64_insn_is_bti(le32_to_cpu((__le32)insn)))
+ return fentry_ip - AARCH64_INSN_SIZE * 2;
+
+ return fentry_ip - AARCH64_INSN_SIZE;
+}
+
/*
* Replace a single instruction, which may be a branch or NOP.
* If @validate == true, a replaced instruction is checked against 'old'.
@@ -195,10 +258,17 @@ int ftrace_update_ftrace_func(ftrace_func_t func)
return ftrace_modify_code(pc, 0, new, false);
}
-static struct plt_entry *get_ftrace_plt(struct module *mod)
+static struct plt_entry *get_ftrace_plt(struct module *mod, unsigned long addr)
{
#ifdef CONFIG_MODULES
- struct plt_entry *plt = mod->arch.ftrace_trampolines;
+ struct plt_entry *plt = NULL;
+
+ if (within_module_mem_type(addr, mod, MOD_INIT_TEXT))
+ plt = mod->arch.init_ftrace_trampolines;
+ else if (within_module_mem_type(addr, mod, MOD_TEXT))
+ plt = mod->arch.ftrace_trampolines;
+ else
+ return NULL;
return &plt[FTRACE_PLT_IDX];
#else
@@ -257,20 +327,19 @@ static bool ftrace_find_callable_addr(struct dyn_ftrace *rec,
* dealing with an out-of-range condition, we can assume it
* is due to a module being loaded far away from the kernel.
*
- * NOTE: __module_text_address() must be called with preemption
- * disabled, but we can rely on ftrace_lock to ensure that 'mod'
+ * NOTE: __module_text_address() must be called within a RCU read
+ * section, but we can rely on ftrace_lock to ensure that 'mod'
* retains its validity throughout the remainder of this code.
*/
if (!mod) {
- preempt_disable();
+ guard(rcu)();
mod = __module_text_address(pc);
- preempt_enable();
}
if (WARN_ON(!mod))
return false;
- plt = get_ftrace_plt(mod);
+ plt = get_ftrace_plt(mod, pc);
if (!plt) {
pr_err("ftrace: no module PLT for %ps\n", (void *)*addr);
return false;
@@ -423,7 +492,7 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec,
return ret;
/*
- * When using mcount, callsites in modules may have been initalized to
+ * When using mcount, callsites in modules may have been initialized to
* call an arbitrary module PLT (which redirects to the _mcount stub)
* rather than the ftrace PLT we'll use at runtime (which redirects to
* the ftrace trampoline). We can ignore the old PLT when initializing
@@ -481,7 +550,20 @@ void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent,
void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
struct ftrace_ops *op, struct ftrace_regs *fregs)
{
- prepare_ftrace_return(ip, &fregs->lr, fregs->fp);
+ unsigned long return_hooker = (unsigned long)&return_to_handler;
+ unsigned long frame_pointer = arch_ftrace_regs(fregs)->fp;
+ unsigned long *parent = &arch_ftrace_regs(fregs)->lr;
+ unsigned long old;
+
+ if (unlikely(atomic_read(&current->tracing_graph_pause)))
+ return;
+
+ old = *parent;
+
+ if (!function_graph_enter_regs(old, ip, frame_pointer,
+ (void *)frame_pointer, fregs)) {
+ *parent = return_hooker;
+ }
}
#else
/*
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index cb68adcabe07..ca04b338cb0d 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -32,6 +32,7 @@
#include <asm/scs.h>
#include <asm/smp.h>
#include <asm/sysreg.h>
+#include <asm/stacktrace/frame.h>
#include <asm/thread_info.h>
#include <asm/virt.h>
@@ -88,7 +89,7 @@ SYM_CODE_START(primary_entry)
adrp x1, early_init_stack
mov sp, x1
mov x29, xzr
- adrp x0, init_idmap_pg_dir
+ adrp x0, __pi_init_idmap_pg_dir
mov x1, xzr
bl __pi_create_init_idmap
@@ -100,7 +101,7 @@ SYM_CODE_START(primary_entry)
cbnz x19, 0f
dmb sy
mov x1, x0 // end of used region
- adrp x0, init_idmap_pg_dir
+ adrp x0, __pi_init_idmap_pg_dir
adr_l x2, dcache_inval_poc
blr x2
b 1f
@@ -199,6 +200,8 @@ SYM_CODE_END(preserve_boot_args)
sub sp, sp, #PT_REGS_SIZE
stp xzr, xzr, [sp, #S_STACKFRAME]
+ mov \tmp1, #FRAME_META_TYPE_FINAL
+ str \tmp1, [sp, #S_STACKFRAME_TYPE]
add x29, sp, #S_STACKFRAME
scs_load_current
@@ -295,25 +298,8 @@ SYM_INNER_LABEL(init_el2, SYM_L_LOCAL)
msr sctlr_el2, x0
isb
0:
- mov_q x0, HCR_HOST_NVHE_FLAGS
-
- /*
- * Compliant CPUs advertise their VHE-onlyness with
- * ID_AA64MMFR4_EL1.E2H0 < 0. HCR_EL2.E2H can be
- * RES1 in that case. Publish the E2H bit early so that
- * it can be picked up by the init_el2_state macro.
- *
- * Fruity CPUs seem to have HCR_EL2.E2H set to RAO/WI, but
- * don't advertise it (they predate this relaxation).
- */
- mrs_s x1, SYS_ID_AA64MMFR4_EL1
- tbz x1, #(ID_AA64MMFR4_EL1_E2H0_SHIFT + ID_AA64MMFR4_EL1_E2H0_WIDTH - 1), 1f
-
- orr x0, x0, #HCR_E2H
-1:
- msr hcr_el2, x0
- isb
+ init_el2_hcr HCR_HOST_NVHE_FLAGS
init_el2_state
/* Hypervisor stub */
@@ -336,7 +322,8 @@ SYM_INNER_LABEL(init_el2, SYM_L_LOCAL)
msr sctlr_el1, x1
mov x2, xzr
3:
- __init_el2_nvhe_prepare_eret
+ mov x0, #INIT_PSTATE_EL1
+ msr spsr_el2, x0
mov w0, #BOOT_CPU_MODE_EL2
orr x0, x0, x2
@@ -520,7 +507,7 @@ SYM_FUNC_END(__no_granule_support)
SYM_FUNC_START_LOCAL(__primary_switch)
adrp x1, reserved_pg_dir
- adrp x2, init_idmap_pg_dir
+ adrp x2, __pi_init_idmap_pg_dir
bl __enable_mmu
adrp x1, early_init_stack
diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c
index 02870beb271e..18749e9a6c2d 100644
--- a/arch/arm64/kernel/hibernate.c
+++ b/arch/arm64/kernel/hibernate.c
@@ -266,9 +266,15 @@ static int swsusp_mte_save_tags(void)
max_zone_pfn = zone_end_pfn(zone);
for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) {
struct page *page = pfn_to_online_page(pfn);
+ struct folio *folio;
if (!page)
continue;
+ folio = page_folio(page);
+
+ if (folio_test_hugetlb(folio) &&
+ !folio_test_hugetlb_mte_tagged(folio))
+ continue;
if (!page_mte_tagged(page))
continue;
@@ -407,7 +413,7 @@ int swsusp_arch_resume(void)
void *, phys_addr_t, phys_addr_t);
struct trans_pgd_info trans_info = {
.trans_alloc_page = hibernate_page_alloc,
- .trans_alloc_arg = (void *)GFP_ATOMIC,
+ .trans_alloc_arg = (__force void *)GFP_ATOMIC,
};
/*
diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c
index 722ac45f9f7b..ab76b36dce82 100644
--- a/arch/arm64/kernel/hw_breakpoint.c
+++ b/arch/arm64/kernel/hw_breakpoint.c
@@ -22,6 +22,7 @@
#include <asm/current.h>
#include <asm/debug-monitors.h>
#include <asm/esr.h>
+#include <asm/exception.h>
#include <asm/hw_breakpoint.h>
#include <asm/traps.h>
#include <asm/cputype.h>
@@ -618,8 +619,7 @@ NOKPROBE_SYMBOL(toggle_bp_registers);
/*
* Debug exception handlers.
*/
-static int breakpoint_handler(unsigned long unused, unsigned long esr,
- struct pt_regs *regs)
+void do_breakpoint(unsigned long esr, struct pt_regs *regs)
{
int i, step = 0, *kernel_step;
u32 ctrl_reg;
@@ -662,7 +662,7 @@ unlock:
}
if (!step)
- return 0;
+ return;
if (user_mode(regs)) {
debug_info->bps_disabled = 1;
@@ -670,7 +670,7 @@ unlock:
/* If we're already stepping a watchpoint, just return. */
if (debug_info->wps_disabled)
- return 0;
+ return;
if (test_thread_flag(TIF_SINGLESTEP))
debug_info->suspended_step = 1;
@@ -681,7 +681,7 @@ unlock:
kernel_step = this_cpu_ptr(&stepping_kernel_bp);
if (*kernel_step != ARM_KERNEL_STEP_NONE)
- return 0;
+ return;
if (kernel_active_single_step()) {
*kernel_step = ARM_KERNEL_STEP_SUSPEND;
@@ -690,10 +690,8 @@ unlock:
kernel_enable_single_step(regs);
}
}
-
- return 0;
}
-NOKPROBE_SYMBOL(breakpoint_handler);
+NOKPROBE_SYMBOL(do_breakpoint);
/*
* Arm64 hardware does not always report a watchpoint hit address that matches
@@ -752,8 +750,7 @@ static int watchpoint_report(struct perf_event *wp, unsigned long addr,
return step;
}
-static int watchpoint_handler(unsigned long addr, unsigned long esr,
- struct pt_regs *regs)
+void do_watchpoint(unsigned long addr, unsigned long esr, struct pt_regs *regs)
{
int i, step = 0, *kernel_step, access, closest_match = 0;
u64 min_dist = -1, dist;
@@ -808,7 +805,7 @@ static int watchpoint_handler(unsigned long addr, unsigned long esr,
rcu_read_unlock();
if (!step)
- return 0;
+ return;
/*
* We always disable EL0 watchpoints because the kernel can
@@ -821,7 +818,7 @@ static int watchpoint_handler(unsigned long addr, unsigned long esr,
/* If we're already stepping a breakpoint, just return. */
if (debug_info->bps_disabled)
- return 0;
+ return;
if (test_thread_flag(TIF_SINGLESTEP))
debug_info->suspended_step = 1;
@@ -832,7 +829,7 @@ static int watchpoint_handler(unsigned long addr, unsigned long esr,
kernel_step = this_cpu_ptr(&stepping_kernel_bp);
if (*kernel_step != ARM_KERNEL_STEP_NONE)
- return 0;
+ return;
if (kernel_active_single_step()) {
*kernel_step = ARM_KERNEL_STEP_SUSPEND;
@@ -841,44 +838,41 @@ static int watchpoint_handler(unsigned long addr, unsigned long esr,
kernel_enable_single_step(regs);
}
}
-
- return 0;
}
-NOKPROBE_SYMBOL(watchpoint_handler);
+NOKPROBE_SYMBOL(do_watchpoint);
/*
* Handle single-step exception.
*/
-int reinstall_suspended_bps(struct pt_regs *regs)
+bool try_step_suspended_breakpoints(struct pt_regs *regs)
{
struct debug_info *debug_info = &current->thread.debug;
- int handled_exception = 0, *kernel_step;
-
- kernel_step = this_cpu_ptr(&stepping_kernel_bp);
+ int *kernel_step = this_cpu_ptr(&stepping_kernel_bp);
+ bool handled_exception = false;
/*
- * Called from single-step exception handler.
- * Return 0 if execution can resume, 1 if a SIGTRAP should be
- * reported.
+ * Called from single-step exception entry.
+ * Return true if we stepped a breakpoint and can resume execution,
+ * false if we need to handle a single-step.
*/
if (user_mode(regs)) {
if (debug_info->bps_disabled) {
debug_info->bps_disabled = 0;
toggle_bp_registers(AARCH64_DBG_REG_BCR, DBG_ACTIVE_EL0, 1);
- handled_exception = 1;
+ handled_exception = true;
}
if (debug_info->wps_disabled) {
debug_info->wps_disabled = 0;
toggle_bp_registers(AARCH64_DBG_REG_WCR, DBG_ACTIVE_EL0, 1);
- handled_exception = 1;
+ handled_exception = true;
}
if (handled_exception) {
if (debug_info->suspended_step) {
debug_info->suspended_step = 0;
/* Allow exception handling to fall-through. */
- handled_exception = 0;
+ handled_exception = false;
} else {
user_disable_single_step(current);
}
@@ -892,17 +886,17 @@ int reinstall_suspended_bps(struct pt_regs *regs)
if (*kernel_step != ARM_KERNEL_STEP_SUSPEND) {
kernel_disable_single_step();
- handled_exception = 1;
+ handled_exception = true;
} else {
- handled_exception = 0;
+ handled_exception = false;
}
*kernel_step = ARM_KERNEL_STEP_NONE;
}
- return !handled_exception;
+ return handled_exception;
}
-NOKPROBE_SYMBOL(reinstall_suspended_bps);
+NOKPROBE_SYMBOL(try_step_suspended_breakpoints);
/*
* Context-switcher for restoring suspended breakpoints.
@@ -987,12 +981,6 @@ static int __init arch_hw_breakpoint_init(void)
pr_info("found %d breakpoint and %d watchpoint registers.\n",
core_num_brps, core_num_wrps);
- /* Register debug fault handlers. */
- hook_debug_fault_code(DBG_ESR_EVT_HWBP, breakpoint_handler, SIGTRAP,
- TRAP_HWBKPT, "hw-breakpoint handler");
- hook_debug_fault_code(DBG_ESR_EVT_HWWP, watchpoint_handler, SIGTRAP,
- TRAP_HWBKPT, "hw-watchpoint handler");
-
/*
* Reset the breakpoint resources. We assume that a halting
* debugger will leave the world in a nice state for us.
diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S
index 65f76064c86b..085bc9972f6b 100644
--- a/arch/arm64/kernel/hyp-stub.S
+++ b/arch/arm64/kernel/hyp-stub.S
@@ -54,6 +54,11 @@ SYM_CODE_START_LOCAL(elx_sync)
1: cmp x0, #HVC_FINALISE_EL2
b.eq __finalise_el2
+ cmp x0, #HVC_GET_ICH_VTR_EL2
+ b.ne 2f
+ mrs_s x1, SYS_ICH_VTR_EL2
+ b 9f
+
2: cmp x0, #HVC_SOFT_RESTART
b.ne 3f
mov x0, x2
@@ -97,7 +102,7 @@ SYM_CODE_START_LOCAL(__finalise_el2)
2:
// Engage the VHE magic!
mov_q x0, HCR_HOST_VHE_FLAGS
- msr hcr_el2, x0
+ msr_hcr_el2 x0
isb
// Use the EL1 allocated stack, per-cpu offset
@@ -114,8 +119,8 @@ SYM_CODE_START_LOCAL(__finalise_el2)
// Use EL2 translations for SPE & TRBE and disable access from EL1
mrs x0, mdcr_el2
- bic x0, x0, #(MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT)
- bic x0, x0, #(MDCR_EL2_E2TB_MASK << MDCR_EL2_E2TB_SHIFT)
+ bic x0, x0, #MDCR_EL2_E2PB_MASK
+ bic x0, x0, #MDCR_EL2_E2TB_MASK
msr mdcr_el2, x0
// Transfer the MM state from EL1 to EL2
diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h
index ba4f8f7d6a91..85bc629270bd 100644
--- a/arch/arm64/kernel/image-vars.h
+++ b/arch/arm64/kernel/image-vars.h
@@ -10,6 +10,16 @@
#error This file should only be included in vmlinux.lds.S
#endif
+#if defined(CONFIG_LD_IS_LLD) && CONFIG_LLD_VERSION < 210000
+#define ASSERT(...)
+#endif
+
+#define PI_EXPORT_SYM(sym) \
+ __PI_EXPORT_SYM(sym, __pi_ ## sym, Cannot export BSS symbol sym to startup code)
+#define __PI_EXPORT_SYM(sym, pisym, msg)\
+ PROVIDE(pisym = sym); \
+ ASSERT((sym - KIMAGE_VADDR) < (__bss_start - KIMAGE_VADDR), #msg)
+
PROVIDE(__efistub_primary_entry = primary_entry);
/*
@@ -36,40 +46,30 @@ PROVIDE(__pi___memcpy = __pi_memcpy);
PROVIDE(__pi___memmove = __pi_memmove);
PROVIDE(__pi___memset = __pi_memset);
-PROVIDE(__pi_id_aa64isar1_override = id_aa64isar1_override);
-PROVIDE(__pi_id_aa64isar2_override = id_aa64isar2_override);
-PROVIDE(__pi_id_aa64mmfr0_override = id_aa64mmfr0_override);
-PROVIDE(__pi_id_aa64mmfr1_override = id_aa64mmfr1_override);
-PROVIDE(__pi_id_aa64mmfr2_override = id_aa64mmfr2_override);
-PROVIDE(__pi_id_aa64pfr0_override = id_aa64pfr0_override);
-PROVIDE(__pi_id_aa64pfr1_override = id_aa64pfr1_override);
-PROVIDE(__pi_id_aa64smfr0_override = id_aa64smfr0_override);
-PROVIDE(__pi_id_aa64zfr0_override = id_aa64zfr0_override);
-PROVIDE(__pi_arm64_sw_feature_override = arm64_sw_feature_override);
-PROVIDE(__pi_arm64_use_ng_mappings = arm64_use_ng_mappings);
-#ifdef CONFIG_CAVIUM_ERRATUM_27456
-PROVIDE(__pi_cavium_erratum_27456_cpus = cavium_erratum_27456_cpus);
-#endif
-PROVIDE(__pi__ctype = _ctype);
-PROVIDE(__pi_memstart_offset_seed = memstart_offset_seed);
-
-PROVIDE(__pi_init_idmap_pg_dir = init_idmap_pg_dir);
-PROVIDE(__pi_init_idmap_pg_end = init_idmap_pg_end);
-PROVIDE(__pi_init_pg_dir = init_pg_dir);
-PROVIDE(__pi_init_pg_end = init_pg_end);
-PROVIDE(__pi_swapper_pg_dir = swapper_pg_dir);
-
-PROVIDE(__pi__text = _text);
-PROVIDE(__pi__stext = _stext);
-PROVIDE(__pi__etext = _etext);
-PROVIDE(__pi___start_rodata = __start_rodata);
-PROVIDE(__pi___inittext_begin = __inittext_begin);
-PROVIDE(__pi___inittext_end = __inittext_end);
-PROVIDE(__pi___initdata_begin = __initdata_begin);
-PROVIDE(__pi___initdata_end = __initdata_end);
-PROVIDE(__pi__data = _data);
-PROVIDE(__pi___bss_start = __bss_start);
-PROVIDE(__pi__end = _end);
+PI_EXPORT_SYM(id_aa64isar1_override);
+PI_EXPORT_SYM(id_aa64isar2_override);
+PI_EXPORT_SYM(id_aa64mmfr0_override);
+PI_EXPORT_SYM(id_aa64mmfr1_override);
+PI_EXPORT_SYM(id_aa64mmfr2_override);
+PI_EXPORT_SYM(id_aa64pfr0_override);
+PI_EXPORT_SYM(id_aa64pfr1_override);
+PI_EXPORT_SYM(id_aa64smfr0_override);
+PI_EXPORT_SYM(id_aa64zfr0_override);
+PI_EXPORT_SYM(arm64_sw_feature_override);
+PI_EXPORT_SYM(arm64_use_ng_mappings);
+PI_EXPORT_SYM(_ctype);
+
+PI_EXPORT_SYM(swapper_pg_dir);
+
+PI_EXPORT_SYM(_text);
+PI_EXPORT_SYM(_stext);
+PI_EXPORT_SYM(_etext);
+PI_EXPORT_SYM(__start_rodata);
+PI_EXPORT_SYM(__inittext_begin);
+PI_EXPORT_SYM(__inittext_end);
+PI_EXPORT_SYM(__initdata_begin);
+PI_EXPORT_SYM(__initdata_end);
+PI_EXPORT_SYM(_data);
#ifdef CONFIG_KVM
@@ -91,6 +91,7 @@ KVM_NVHE_ALIAS(spectre_bhb_patch_loop_mitigation_enable);
KVM_NVHE_ALIAS(spectre_bhb_patch_wa3);
KVM_NVHE_ALIAS(spectre_bhb_patch_clearbhb);
KVM_NVHE_ALIAS(alt_cb_patch_nops);
+KVM_NVHE_ALIAS(kvm_compute_ich_hcr_trap_bits);
/* Global kernel state accessed by nVHE hyp code. */
KVM_NVHE_ALIAS(kvm_vgic_global_state);
@@ -105,20 +106,16 @@ KVM_NVHE_ALIAS(__hyp_stub_vectors);
KVM_NVHE_ALIAS(vgic_v2_cpuif_trap);
KVM_NVHE_ALIAS(vgic_v3_cpuif_trap);
-#ifdef CONFIG_ARM64_PSEUDO_NMI
-/* Static key checked in GIC_PRIO_IRQOFF. */
-KVM_NVHE_ALIAS(gic_nonsecure_priorities);
-#endif
+/* Static key indicating whether GICv3 has GICv2 compatibility */
+KVM_NVHE_ALIAS(vgic_v3_has_v2_compat);
+
+/* Static key which is set if CNTVOFF_EL2 is unusable */
+KVM_NVHE_ALIAS(broken_cntvoff_key);
/* EL2 exception handling */
KVM_NVHE_ALIAS(__start___kvm_ex_table);
KVM_NVHE_ALIAS(__stop___kvm_ex_table);
-/* PMU available static key */
-#ifdef CONFIG_HW_PERF_EVENTS
-KVM_NVHE_ALIAS(kvm_arm_pmu_available);
-#endif
-
/* Position-independent library routines */
KVM_NVHE_ALIAS_HYP(clear_page, __pi_clear_page);
KVM_NVHE_ALIAS_HYP(copy_page, __pi_copy_page);
@@ -137,6 +134,8 @@ KVM_NVHE_ALIAS(__hyp_text_start);
KVM_NVHE_ALIAS(__hyp_text_end);
KVM_NVHE_ALIAS(__hyp_bss_start);
KVM_NVHE_ALIAS(__hyp_bss_end);
+KVM_NVHE_ALIAS(__hyp_data_start);
+KVM_NVHE_ALIAS(__hyp_data_end);
KVM_NVHE_ALIAS(__hyp_rodata_start);
KVM_NVHE_ALIAS(__hyp_rodata_end);
@@ -149,4 +148,17 @@ KVM_NVHE_ALIAS(kvm_protected_mode_initialized);
_kernel_codesize = ABSOLUTE(__inittext_end - _text);
#endif
+/*
+ * LLD will occasionally error out with a '__init_end does not converge' error
+ * if INIT_IDMAP_DIR_SIZE is defined in terms of _end, as this results in a
+ * circular dependency. Counter this by dimensioning the initial IDMAP page
+ * tables based on kimage_limit, which is defined such that its value should
+ * not change as a result of the initdata segment being pushed over a 64k
+ * segment boundary due to changes in INIT_IDMAP_DIR_SIZE, provided that its
+ * value doesn't change by more than 2M between linker passes.
+ */
+kimage_limit = ALIGN(ABSOLUTE(_end + SZ_64K), SZ_2M);
+
+#undef ASSERT
+
#endif /* __ARM64_KERNEL_IMAGE_VARS_H */
diff --git a/arch/arm64/kernel/io.c b/arch/arm64/kernel/io.c
index ef48089fbfe1..fe86ada23c7d 100644
--- a/arch/arm64/kernel/io.c
+++ b/arch/arm64/kernel/io.c
@@ -10,34 +10,6 @@
#include <linux/io.h>
/*
- * Copy data from IO memory space to "real" memory space.
- */
-void __memcpy_fromio(void *to, const volatile void __iomem *from, size_t count)
-{
- while (count && !IS_ALIGNED((unsigned long)from, 8)) {
- *(u8 *)to = __raw_readb(from);
- from++;
- to++;
- count--;
- }
-
- while (count >= 8) {
- *(u64 *)to = __raw_readq(from);
- from += 8;
- to += 8;
- count -= 8;
- }
-
- while (count) {
- *(u8 *)to = __raw_readb(from);
- from++;
- to++;
- count--;
- }
-}
-EXPORT_SYMBOL(__memcpy_fromio);
-
-/*
* This generates a memcpy that works on a from/to address which is aligned to
* bits. Count is in terms of the number of bits sized quantities to copy. It
* optimizes to use the STR groupings when possible so that it is WC friendly.
@@ -78,62 +50,3 @@ void __iowrite32_copy_full(void __iomem *to, const void *from, size_t count)
dgh();
}
EXPORT_SYMBOL(__iowrite32_copy_full);
-
-/*
- * Copy data from "real" memory space to IO memory space.
- */
-void __memcpy_toio(volatile void __iomem *to, const void *from, size_t count)
-{
- while (count && !IS_ALIGNED((unsigned long)to, 8)) {
- __raw_writeb(*(u8 *)from, to);
- from++;
- to++;
- count--;
- }
-
- while (count >= 8) {
- __raw_writeq(*(u64 *)from, to);
- from += 8;
- to += 8;
- count -= 8;
- }
-
- while (count) {
- __raw_writeb(*(u8 *)from, to);
- from++;
- to++;
- count--;
- }
-}
-EXPORT_SYMBOL(__memcpy_toio);
-
-/*
- * "memset" on IO memory space.
- */
-void __memset_io(volatile void __iomem *dst, int c, size_t count)
-{
- u64 qc = (u8)c;
-
- qc |= qc << 8;
- qc |= qc << 16;
- qc |= qc << 32;
-
- while (count && !IS_ALIGNED((unsigned long)dst, 8)) {
- __raw_writeb(c, dst);
- dst++;
- count--;
- }
-
- while (count >= 8) {
- __raw_writeq(qc, dst);
- dst += 8;
- count -= 8;
- }
-
- while (count) {
- __raw_writeb(c, dst);
- dst++;
- count--;
- }
-}
-EXPORT_SYMBOL(__memset_io);
diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c
index 85087e2df564..15dedb385b9e 100644
--- a/arch/arm64/kernel/irq.c
+++ b/arch/arm64/kernel/irq.c
@@ -51,7 +51,6 @@ static void init_irq_scs(void)
scs_alloc(early_cpu_to_node(cpu));
}
-#ifdef CONFIG_VMAP_STACK
static void __init init_irq_stacks(void)
{
int cpu;
@@ -62,20 +61,8 @@ static void __init init_irq_stacks(void)
per_cpu(irq_stack_ptr, cpu) = p;
}
}
-#else
-/* irq stack only needs to be 16 byte aligned - not IRQ_STACK_SIZE aligned. */
-DEFINE_PER_CPU_ALIGNED(unsigned long [IRQ_STACK_SIZE/sizeof(long)], irq_stack);
-static void init_irq_stacks(void)
-{
- int cpu;
-
- for_each_possible_cpu(cpu)
- per_cpu(irq_stack_ptr, cpu) = per_cpu(irq_stack, cpu);
-}
-#endif
-
-#ifndef CONFIG_PREEMPT_RT
+#ifdef CONFIG_SOFTIRQ_ON_OWN_STACK
static void ____do_softirq(struct pt_regs *regs)
{
__do_softirq();
diff --git a/arch/arm64/kernel/jump_label.c b/arch/arm64/kernel/jump_label.c
index faf88ec9c48e..b345425193d2 100644
--- a/arch/arm64/kernel/jump_label.c
+++ b/arch/arm64/kernel/jump_label.c
@@ -7,11 +7,12 @@
*/
#include <linux/kernel.h>
#include <linux/jump_label.h>
+#include <linux/smp.h>
#include <asm/insn.h>
-#include <asm/patching.h>
+#include <asm/text-patching.h>
-void arch_jump_label_transform(struct jump_entry *entry,
- enum jump_label_type type)
+bool arch_jump_label_transform_queue(struct jump_entry *entry,
+ enum jump_label_type type)
{
void *addr = (void *)jump_entry_code(entry);
u32 insn;
@@ -25,4 +26,10 @@ void arch_jump_label_transform(struct jump_entry *entry,
}
aarch64_insn_patch_text_nosync(addr, insn);
+ return true;
+}
+
+void arch_jump_label_transform_apply(void)
+{
+ kick_all_cpus_sync();
}
diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c
index 1da3e25f9d9e..c9503ed45a6c 100644
--- a/arch/arm64/kernel/kaslr.c
+++ b/arch/arm64/kernel/kaslr.c
@@ -10,8 +10,6 @@
#include <asm/cpufeature.h>
#include <asm/memory.h>
-u16 __initdata memstart_offset_seed;
-
bool __ro_after_init __kaslr_is_enabled = false;
void __init kaslr_init(void)
diff --git a/arch/arm64/kernel/kgdb.c b/arch/arm64/kernel/kgdb.c
index 4e1f983df3d1..968324a79a89 100644
--- a/arch/arm64/kernel/kgdb.c
+++ b/arch/arm64/kernel/kgdb.c
@@ -17,7 +17,7 @@
#include <asm/debug-monitors.h>
#include <asm/insn.h>
-#include <asm/patching.h>
+#include <asm/text-patching.h>
#include <asm/traps.h>
struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = {
@@ -234,23 +234,23 @@ int kgdb_arch_handle_exception(int exception_vector, int signo,
return err;
}
-static int kgdb_brk_fn(struct pt_regs *regs, unsigned long esr)
+int kgdb_brk_handler(struct pt_regs *regs, unsigned long esr)
{
kgdb_handle_exception(1, SIGTRAP, 0, regs);
return DBG_HOOK_HANDLED;
}
-NOKPROBE_SYMBOL(kgdb_brk_fn)
+NOKPROBE_SYMBOL(kgdb_brk_handler)
-static int kgdb_compiled_brk_fn(struct pt_regs *regs, unsigned long esr)
+int kgdb_compiled_brk_handler(struct pt_regs *regs, unsigned long esr)
{
compiled_break = 1;
kgdb_handle_exception(1, SIGTRAP, 0, regs);
return DBG_HOOK_HANDLED;
}
-NOKPROBE_SYMBOL(kgdb_compiled_brk_fn);
+NOKPROBE_SYMBOL(kgdb_compiled_brk_handler);
-static int kgdb_step_brk_fn(struct pt_regs *regs, unsigned long esr)
+int kgdb_single_step_handler(struct pt_regs *regs, unsigned long esr)
{
if (!kgdb_single_step)
return DBG_HOOK_ERROR;
@@ -258,21 +258,7 @@ static int kgdb_step_brk_fn(struct pt_regs *regs, unsigned long esr)
kgdb_handle_exception(0, SIGTRAP, 0, regs);
return DBG_HOOK_HANDLED;
}
-NOKPROBE_SYMBOL(kgdb_step_brk_fn);
-
-static struct break_hook kgdb_brkpt_hook = {
- .fn = kgdb_brk_fn,
- .imm = KGDB_DYN_DBG_BRK_IMM,
-};
-
-static struct break_hook kgdb_compiled_brkpt_hook = {
- .fn = kgdb_compiled_brk_fn,
- .imm = KGDB_COMPILED_DBG_BRK_IMM,
-};
-
-static struct step_hook kgdb_step_hook = {
- .fn = kgdb_step_brk_fn
-};
+NOKPROBE_SYMBOL(kgdb_single_step_handler);
static int __kgdb_notify(struct die_args *args, unsigned long cmd)
{
@@ -311,15 +297,7 @@ static struct notifier_block kgdb_notifier = {
*/
int kgdb_arch_init(void)
{
- int ret = register_die_notifier(&kgdb_notifier);
-
- if (ret != 0)
- return ret;
-
- register_kernel_break_hook(&kgdb_brkpt_hook);
- register_kernel_break_hook(&kgdb_compiled_brkpt_hook);
- register_kernel_step_hook(&kgdb_step_hook);
- return 0;
+ return register_die_notifier(&kgdb_notifier);
}
/*
@@ -329,9 +307,6 @@ int kgdb_arch_init(void)
*/
void kgdb_arch_exit(void)
{
- unregister_kernel_break_hook(&kgdb_brkpt_hook);
- unregister_kernel_break_hook(&kgdb_compiled_brkpt_hook);
- unregister_kernel_step_hook(&kgdb_step_hook);
unregister_die_notifier(&kgdb_notifier);
}
diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c
index 82e2203d86a3..239c16e3d02f 100644
--- a/arch/arm64/kernel/machine_kexec.c
+++ b/arch/arm64/kernel/machine_kexec.c
@@ -207,37 +207,6 @@ void machine_kexec(struct kimage *kimage)
BUG(); /* Should never get here. */
}
-static void machine_kexec_mask_interrupts(void)
-{
- unsigned int i;
- struct irq_desc *desc;
-
- for_each_irq_desc(i, desc) {
- struct irq_chip *chip;
- int ret;
-
- chip = irq_desc_get_chip(desc);
- if (!chip)
- continue;
-
- /*
- * First try to remove the active state. If this
- * fails, try to EOI the interrupt.
- */
- ret = irq_set_irqchip_state(i, IRQCHIP_STATE_ACTIVE, false);
-
- if (ret && irqd_irq_inprogress(&desc->irq_data) &&
- chip->irq_eoi)
- chip->irq_eoi(&desc->irq_data);
-
- if (chip->irq_mask)
- chip->irq_mask(&desc->irq_data);
-
- if (chip->irq_disable && !irqd_irq_disabled(&desc->irq_data))
- chip->irq_disable(&desc->irq_data);
- }
-}
-
/**
* machine_crash_shutdown - shutdown non-crashing cpus and save registers
*/
@@ -282,7 +251,7 @@ void crash_post_resume(void)
* marked as Reserved as memory was allocated via memblock_reserve().
*
* In hibernation, the pages which are Reserved and yet "nosave" are excluded
- * from the hibernation iamge. crash_is_nosave() does thich check for crash
+ * from the hibernation image. crash_is_nosave() does thich check for crash
* dump kernel and will reduce the total size of hibernation image.
*/
diff --git a/arch/arm64/kernel/machine_kexec_file.c b/arch/arm64/kernel/machine_kexec_file.c
index af1ca875c52c..410060ebd86d 100644
--- a/arch/arm64/kernel/machine_kexec_file.c
+++ b/arch/arm64/kernel/machine_kexec_file.c
@@ -94,7 +94,7 @@ int load_other_segments(struct kimage *image,
char *initrd, unsigned long initrd_len,
char *cmdline)
{
- struct kexec_buf kbuf;
+ struct kexec_buf kbuf = {};
void *dtb = NULL;
unsigned long initrd_load_addr = 0, dtb_len,
orig_segments = image->nr_segments;
diff --git a/arch/arm64/kernel/module-plts.c b/arch/arm64/kernel/module-plts.c
index bde32979c06a..7afd370da9f4 100644
--- a/arch/arm64/kernel/module-plts.c
+++ b/arch/arm64/kernel/module-plts.c
@@ -283,7 +283,7 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
unsigned long core_plts = 0;
unsigned long init_plts = 0;
Elf64_Sym *syms = NULL;
- Elf_Shdr *pltsec, *tramp = NULL;
+ Elf_Shdr *pltsec, *tramp = NULL, *init_tramp = NULL;
int i;
/*
@@ -298,6 +298,9 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
else if (!strcmp(secstrings + sechdrs[i].sh_name,
".text.ftrace_trampoline"))
tramp = sechdrs + i;
+ else if (!strcmp(secstrings + sechdrs[i].sh_name,
+ ".init.text.ftrace_trampoline"))
+ init_tramp = sechdrs + i;
else if (sechdrs[i].sh_type == SHT_SYMTAB)
syms = (Elf64_Sym *)sechdrs[i].sh_addr;
}
@@ -363,5 +366,12 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
tramp->sh_size = NR_FTRACE_PLTS * sizeof(struct plt_entry);
}
+ if (init_tramp) {
+ init_tramp->sh_type = SHT_NOBITS;
+ init_tramp->sh_flags = SHF_EXECINSTR | SHF_ALLOC;
+ init_tramp->sh_addralign = __alignof__(struct plt_entry);
+ init_tramp->sh_size = NR_FTRACE_PLTS * sizeof(struct plt_entry);
+ }
+
return 0;
}
diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c
index 36b25af56324..24adb581af0e 100644
--- a/arch/arm64/kernel/module.c
+++ b/arch/arm64/kernel/module.c
@@ -23,6 +23,7 @@
#include <asm/insn.h>
#include <asm/scs.h>
#include <asm/sections.h>
+#include <asm/text-patching.h>
enum aarch64_reloc_op {
RELOC_OP_NONE,
@@ -48,7 +49,17 @@ static u64 do_reloc(enum aarch64_reloc_op reloc_op, __le32 *place, u64 val)
return 0;
}
-static int reloc_data(enum aarch64_reloc_op op, void *place, u64 val, int len)
+#define WRITE_PLACE(place, val, mod) do { \
+ __typeof__(val) __val = (val); \
+ \
+ if (mod->state == MODULE_STATE_UNFORMED) \
+ *(place) = __val; \
+ else \
+ aarch64_insn_copy(place, &(__val), sizeof(*place)); \
+} while (0)
+
+static int reloc_data(enum aarch64_reloc_op op, void *place, u64 val, int len,
+ struct module *me)
{
s64 sval = do_reloc(op, place, val);
@@ -66,7 +77,7 @@ static int reloc_data(enum aarch64_reloc_op op, void *place, u64 val, int len)
switch (len) {
case 16:
- *(s16 *)place = sval;
+ WRITE_PLACE((s16 *)place, sval, me);
switch (op) {
case RELOC_OP_ABS:
if (sval < 0 || sval > U16_MAX)
@@ -82,7 +93,7 @@ static int reloc_data(enum aarch64_reloc_op op, void *place, u64 val, int len)
}
break;
case 32:
- *(s32 *)place = sval;
+ WRITE_PLACE((s32 *)place, sval, me);
switch (op) {
case RELOC_OP_ABS:
if (sval < 0 || sval > U32_MAX)
@@ -98,7 +109,7 @@ static int reloc_data(enum aarch64_reloc_op op, void *place, u64 val, int len)
}
break;
case 64:
- *(s64 *)place = sval;
+ WRITE_PLACE((s64 *)place, sval, me);
break;
default:
pr_err("Invalid length (%d) for data relocation\n", len);
@@ -113,7 +124,8 @@ enum aarch64_insn_movw_imm_type {
};
static int reloc_insn_movw(enum aarch64_reloc_op op, __le32 *place, u64 val,
- int lsb, enum aarch64_insn_movw_imm_type imm_type)
+ int lsb, enum aarch64_insn_movw_imm_type imm_type,
+ struct module *me)
{
u64 imm;
s64 sval;
@@ -145,7 +157,7 @@ static int reloc_insn_movw(enum aarch64_reloc_op op, __le32 *place, u64 val,
/* Update the instruction with the new encoding. */
insn = aarch64_insn_encode_immediate(AARCH64_INSN_IMM_16, insn, imm);
- *place = cpu_to_le32(insn);
+ WRITE_PLACE(place, cpu_to_le32(insn), me);
if (imm > U16_MAX)
return -ERANGE;
@@ -154,7 +166,8 @@ static int reloc_insn_movw(enum aarch64_reloc_op op, __le32 *place, u64 val,
}
static int reloc_insn_imm(enum aarch64_reloc_op op, __le32 *place, u64 val,
- int lsb, int len, enum aarch64_insn_imm_type imm_type)
+ int lsb, int len, enum aarch64_insn_imm_type imm_type,
+ struct module *me)
{
u64 imm, imm_mask;
s64 sval;
@@ -170,7 +183,7 @@ static int reloc_insn_imm(enum aarch64_reloc_op op, __le32 *place, u64 val,
/* Update the instruction's immediate field. */
insn = aarch64_insn_encode_immediate(imm_type, insn, imm);
- *place = cpu_to_le32(insn);
+ WRITE_PLACE(place, cpu_to_le32(insn), me);
/*
* Extract the upper value bits (including the sign bit) and
@@ -189,17 +202,17 @@ static int reloc_insn_imm(enum aarch64_reloc_op op, __le32 *place, u64 val,
}
static int reloc_insn_adrp(struct module *mod, Elf64_Shdr *sechdrs,
- __le32 *place, u64 val)
+ __le32 *place, u64 val, struct module *me)
{
u32 insn;
if (!is_forbidden_offset_for_adrp(place))
return reloc_insn_imm(RELOC_OP_PAGE, place, val, 12, 21,
- AARCH64_INSN_IMM_ADR);
+ AARCH64_INSN_IMM_ADR, me);
/* patch ADRP to ADR if it is in range */
if (!reloc_insn_imm(RELOC_OP_PREL, place, val & ~0xfff, 0, 21,
- AARCH64_INSN_IMM_ADR)) {
+ AARCH64_INSN_IMM_ADR, me)) {
insn = le32_to_cpu(*place);
insn &= ~BIT(31);
} else {
@@ -211,7 +224,7 @@ static int reloc_insn_adrp(struct module *mod, Elf64_Shdr *sechdrs,
AARCH64_INSN_BRANCH_NOLINK);
}
- *place = cpu_to_le32(insn);
+ WRITE_PLACE(place, cpu_to_le32(insn), me);
return 0;
}
@@ -255,23 +268,23 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
/* Data relocations. */
case R_AARCH64_ABS64:
overflow_check = false;
- ovf = reloc_data(RELOC_OP_ABS, loc, val, 64);
+ ovf = reloc_data(RELOC_OP_ABS, loc, val, 64, me);
break;
case R_AARCH64_ABS32:
- ovf = reloc_data(RELOC_OP_ABS, loc, val, 32);
+ ovf = reloc_data(RELOC_OP_ABS, loc, val, 32, me);
break;
case R_AARCH64_ABS16:
- ovf = reloc_data(RELOC_OP_ABS, loc, val, 16);
+ ovf = reloc_data(RELOC_OP_ABS, loc, val, 16, me);
break;
case R_AARCH64_PREL64:
overflow_check = false;
- ovf = reloc_data(RELOC_OP_PREL, loc, val, 64);
+ ovf = reloc_data(RELOC_OP_PREL, loc, val, 64, me);
break;
case R_AARCH64_PREL32:
- ovf = reloc_data(RELOC_OP_PREL, loc, val, 32);
+ ovf = reloc_data(RELOC_OP_PREL, loc, val, 32, me);
break;
case R_AARCH64_PREL16:
- ovf = reloc_data(RELOC_OP_PREL, loc, val, 16);
+ ovf = reloc_data(RELOC_OP_PREL, loc, val, 16, me);
break;
/* MOVW instruction relocations. */
@@ -280,88 +293,88 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
fallthrough;
case R_AARCH64_MOVW_UABS_G0:
ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 0,
- AARCH64_INSN_IMM_MOVKZ);
+ AARCH64_INSN_IMM_MOVKZ, me);
break;
case R_AARCH64_MOVW_UABS_G1_NC:
overflow_check = false;
fallthrough;
case R_AARCH64_MOVW_UABS_G1:
ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 16,
- AARCH64_INSN_IMM_MOVKZ);
+ AARCH64_INSN_IMM_MOVKZ, me);
break;
case R_AARCH64_MOVW_UABS_G2_NC:
overflow_check = false;
fallthrough;
case R_AARCH64_MOVW_UABS_G2:
ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 32,
- AARCH64_INSN_IMM_MOVKZ);
+ AARCH64_INSN_IMM_MOVKZ, me);
break;
case R_AARCH64_MOVW_UABS_G3:
/* We're using the top bits so we can't overflow. */
overflow_check = false;
ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 48,
- AARCH64_INSN_IMM_MOVKZ);
+ AARCH64_INSN_IMM_MOVKZ, me);
break;
case R_AARCH64_MOVW_SABS_G0:
ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 0,
- AARCH64_INSN_IMM_MOVNZ);
+ AARCH64_INSN_IMM_MOVNZ, me);
break;
case R_AARCH64_MOVW_SABS_G1:
ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 16,
- AARCH64_INSN_IMM_MOVNZ);
+ AARCH64_INSN_IMM_MOVNZ, me);
break;
case R_AARCH64_MOVW_SABS_G2:
ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 32,
- AARCH64_INSN_IMM_MOVNZ);
+ AARCH64_INSN_IMM_MOVNZ, me);
break;
case R_AARCH64_MOVW_PREL_G0_NC:
overflow_check = false;
ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 0,
- AARCH64_INSN_IMM_MOVKZ);
+ AARCH64_INSN_IMM_MOVKZ, me);
break;
case R_AARCH64_MOVW_PREL_G0:
ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 0,
- AARCH64_INSN_IMM_MOVNZ);
+ AARCH64_INSN_IMM_MOVNZ, me);
break;
case R_AARCH64_MOVW_PREL_G1_NC:
overflow_check = false;
ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 16,
- AARCH64_INSN_IMM_MOVKZ);
+ AARCH64_INSN_IMM_MOVKZ, me);
break;
case R_AARCH64_MOVW_PREL_G1:
ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 16,
- AARCH64_INSN_IMM_MOVNZ);
+ AARCH64_INSN_IMM_MOVNZ, me);
break;
case R_AARCH64_MOVW_PREL_G2_NC:
overflow_check = false;
ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 32,
- AARCH64_INSN_IMM_MOVKZ);
+ AARCH64_INSN_IMM_MOVKZ, me);
break;
case R_AARCH64_MOVW_PREL_G2:
ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 32,
- AARCH64_INSN_IMM_MOVNZ);
+ AARCH64_INSN_IMM_MOVNZ, me);
break;
case R_AARCH64_MOVW_PREL_G3:
/* We're using the top bits so we can't overflow. */
overflow_check = false;
ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 48,
- AARCH64_INSN_IMM_MOVNZ);
+ AARCH64_INSN_IMM_MOVNZ, me);
break;
/* Immediate instruction relocations. */
case R_AARCH64_LD_PREL_LO19:
ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2, 19,
- AARCH64_INSN_IMM_19);
+ AARCH64_INSN_IMM_19, me);
break;
case R_AARCH64_ADR_PREL_LO21:
ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 0, 21,
- AARCH64_INSN_IMM_ADR);
+ AARCH64_INSN_IMM_ADR, me);
break;
case R_AARCH64_ADR_PREL_PG_HI21_NC:
overflow_check = false;
fallthrough;
case R_AARCH64_ADR_PREL_PG_HI21:
- ovf = reloc_insn_adrp(me, sechdrs, loc, val);
+ ovf = reloc_insn_adrp(me, sechdrs, loc, val, me);
if (ovf && ovf != -ERANGE)
return ovf;
break;
@@ -369,46 +382,46 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
case R_AARCH64_LDST8_ABS_LO12_NC:
overflow_check = false;
ovf = reloc_insn_imm(RELOC_OP_ABS, loc, val, 0, 12,
- AARCH64_INSN_IMM_12);
+ AARCH64_INSN_IMM_12, me);
break;
case R_AARCH64_LDST16_ABS_LO12_NC:
overflow_check = false;
ovf = reloc_insn_imm(RELOC_OP_ABS, loc, val, 1, 11,
- AARCH64_INSN_IMM_12);
+ AARCH64_INSN_IMM_12, me);
break;
case R_AARCH64_LDST32_ABS_LO12_NC:
overflow_check = false;
ovf = reloc_insn_imm(RELOC_OP_ABS, loc, val, 2, 10,
- AARCH64_INSN_IMM_12);
+ AARCH64_INSN_IMM_12, me);
break;
case R_AARCH64_LDST64_ABS_LO12_NC:
overflow_check = false;
ovf = reloc_insn_imm(RELOC_OP_ABS, loc, val, 3, 9,
- AARCH64_INSN_IMM_12);
+ AARCH64_INSN_IMM_12, me);
break;
case R_AARCH64_LDST128_ABS_LO12_NC:
overflow_check = false;
ovf = reloc_insn_imm(RELOC_OP_ABS, loc, val, 4, 8,
- AARCH64_INSN_IMM_12);
+ AARCH64_INSN_IMM_12, me);
break;
case R_AARCH64_TSTBR14:
ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2, 14,
- AARCH64_INSN_IMM_14);
+ AARCH64_INSN_IMM_14, me);
break;
case R_AARCH64_CONDBR19:
ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2, 19,
- AARCH64_INSN_IMM_19);
+ AARCH64_INSN_IMM_19, me);
break;
case R_AARCH64_JUMP26:
case R_AARCH64_CALL26:
ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2, 26,
- AARCH64_INSN_IMM_26);
+ AARCH64_INSN_IMM_26, me);
if (ovf == -ERANGE) {
val = module_emit_plt_entry(me, sechdrs, loc, &rel[i], sym);
if (!val)
return -ENOEXEC;
ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2,
- 26, AARCH64_INSN_IMM_26);
+ 26, AARCH64_INSN_IMM_26, me);
}
break;
@@ -453,6 +466,17 @@ static int module_init_ftrace_plt(const Elf_Ehdr *hdr,
__init_plt(&plts[FTRACE_PLT_IDX], FTRACE_ADDR);
mod->arch.ftrace_trampolines = plts;
+
+ s = find_section(hdr, sechdrs, ".init.text.ftrace_trampoline");
+ if (!s)
+ return -ENOEXEC;
+
+ plts = (void *)s->sh_addr;
+
+ __init_plt(&plts[FTRACE_PLT_IDX], FTRACE_ADDR);
+
+ mod->arch.init_ftrace_trampolines = plts;
+
#endif
return 0;
}
@@ -462,14 +486,33 @@ int module_finalize(const Elf_Ehdr *hdr,
struct module *me)
{
const Elf_Shdr *s;
+ int ret;
+
s = find_section(hdr, sechdrs, ".altinstructions");
- if (s)
- apply_alternatives_module((void *)s->sh_addr, s->sh_size);
+ if (s) {
+ ret = apply_alternatives_module((void *)s->sh_addr, s->sh_size);
+ if (ret < 0) {
+ pr_err("module %s: error occurred when applying alternatives\n", me->name);
+ return ret;
+ }
+ }
if (scs_is_dynamic()) {
s = find_section(hdr, sechdrs, ".init.eh_frame");
- if (s)
- __pi_scs_patch((void *)s->sh_addr, s->sh_size);
+ if (s) {
+ /*
+ * Because we can reject modules that are malformed
+ * so SCS patching fails, skip dry run and try to patch
+ * it in place. If patching fails, the module would not
+ * be loaded anyway.
+ */
+ ret = __pi_scs_patch((void *)s->sh_addr, s->sh_size, true);
+ if (ret) {
+ pr_err("module %s: error occurred during dynamic SCS patching (%d)\n",
+ me->name, ret);
+ return -ENOEXEC;
+ }
+ }
}
return module_init_ftrace_plt(hdr, sechdrs, me);
diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c
index dcdcccd40891..32148bf09c1d 100644
--- a/arch/arm64/kernel/mte.c
+++ b/arch/arm64/kernel/mte.c
@@ -38,7 +38,24 @@ EXPORT_SYMBOL_GPL(mte_async_or_asymm_mode);
void mte_sync_tags(pte_t pte, unsigned int nr_pages)
{
struct page *page = pte_page(pte);
- unsigned int i;
+ struct folio *folio = page_folio(page);
+ unsigned long i;
+
+ if (folio_test_hugetlb(folio)) {
+ unsigned long nr = folio_nr_pages(folio);
+
+ /* Hugetlb MTE flags are set for head page only */
+ if (folio_try_hugetlb_mte_tagging(folio)) {
+ for (i = 0; i < nr; i++, page++)
+ mte_clear_page_tags(page_address(page));
+ folio_set_hugetlb_mte_tagged(folio);
+ }
+
+ /* ensure the tags are visible before the PTE is set */
+ smp_wmb();
+
+ return;
+ }
/* if PG_mte_tagged is set, tags have already been initialised */
for (i = 0; i < nr_pages; i++, page++) {
@@ -140,6 +157,24 @@ void mte_enable_kernel_asymm(void)
mte_enable_kernel_sync();
}
}
+
+int mte_enable_kernel_store_only(void)
+{
+ /*
+ * If the CPU does not support MTE store only,
+ * the kernel checks all operations.
+ */
+ if (!cpus_have_cap(ARM64_MTE_STORE_ONLY))
+ return -EINVAL;
+
+ sysreg_clear_set(sctlr_el1, SCTLR_EL1_TCSO_MASK,
+ SYS_FIELD_PREP(SCTLR_EL1, TCSO, 1));
+ isb();
+
+ pr_info_once("MTE: enabled store only mode at EL1\n");
+
+ return 0;
+}
#endif
#ifdef CONFIG_KASAN_HW_TAGS
@@ -183,7 +218,7 @@ static void mte_update_sctlr_user(struct task_struct *task)
* program requested values go with what was requested.
*/
resolved_mte_tcf = (mte_ctrl & pref) ? pref : mte_ctrl;
- sctlr &= ~SCTLR_EL1_TCF0_MASK;
+ sctlr &= ~(SCTLR_EL1_TCF0_MASK | SCTLR_EL1_TCSO0_MASK);
/*
* Pick an actual setting. The order in which we check for
* set bits and map into register values determines our
@@ -195,6 +230,10 @@ static void mte_update_sctlr_user(struct task_struct *task)
sctlr |= SYS_FIELD_PREP_ENUM(SCTLR_EL1, TCF0, ASYNC);
else if (resolved_mte_tcf & MTE_CTRL_TCF_SYNC)
sctlr |= SYS_FIELD_PREP_ENUM(SCTLR_EL1, TCF0, SYNC);
+
+ if (mte_ctrl & MTE_CTRL_STORE_ONLY)
+ sctlr |= SYS_FIELD_PREP(SCTLR_EL1, TCSO0, 1);
+
task->thread.sctlr_user = sctlr;
}
@@ -354,6 +393,9 @@ long set_mte_ctrl(struct task_struct *task, unsigned long arg)
(arg & PR_MTE_TCF_SYNC))
mte_ctrl |= MTE_CTRL_TCF_ASYMM;
+ if (arg & PR_MTE_STORE_ONLY)
+ mte_ctrl |= MTE_CTRL_STORE_ONLY;
+
task->thread.mte_ctrl = mte_ctrl;
if (task == current) {
preempt_disable();
@@ -381,6 +423,8 @@ long get_mte_ctrl(struct task_struct *task)
ret |= PR_MTE_TCF_ASYNC;
if (mte_ctrl & MTE_CTRL_TCF_SYNC)
ret |= PR_MTE_TCF_SYNC;
+ if (mte_ctrl & MTE_CTRL_STORE_ONLY)
+ ret |= PR_MTE_STORE_ONLY;
return ret;
}
@@ -410,6 +454,7 @@ static int __access_remote_tags(struct mm_struct *mm, unsigned long addr,
void *maddr;
struct page *page = get_user_page_vma_remote(mm, addr,
gup_flags, &vma);
+ struct folio *folio;
if (IS_ERR(page)) {
err = PTR_ERR(page);
@@ -428,7 +473,13 @@ static int __access_remote_tags(struct mm_struct *mm, unsigned long addr,
put_page(page);
break;
}
- WARN_ON_ONCE(!page_mte_tagged(page));
+
+ folio = page_folio(page);
+ if (folio_test_hugetlb(folio))
+ WARN_ON_ONCE(!folio_test_hugetlb_mte_tagged(folio) &&
+ !is_huge_zero_folio(folio));
+ else
+ WARN_ON_ONCE(!page_mte_tagged(page) && !is_zero_page(page));
/* limit access to the end of the page */
offset = offset_in_page(addr);
@@ -582,12 +633,9 @@ subsys_initcall(register_mte_tcf_preferred_sysctl);
size_t mte_probe_user_range(const char __user *uaddr, size_t size)
{
const char __user *end = uaddr + size;
- int err = 0;
char val;
- __raw_get_user(val, uaddr, err);
- if (err)
- return size;
+ __raw_get_user(val, uaddr, efault);
uaddr = PTR_ALIGN(uaddr, MTE_GRANULE_SIZE);
while (uaddr < end) {
@@ -595,12 +643,13 @@ size_t mte_probe_user_range(const char __user *uaddr, size_t size)
* A read is sufficient for mte, the caller should have probed
* for the pte write permission if required.
*/
- __raw_get_user(val, uaddr, err);
- if (err)
- return end - uaddr;
+ __raw_get_user(val, uaddr, efault);
uaddr += MTE_GRANULE_SIZE;
}
(void)val;
return 0;
+
+efault:
+ return end - uaddr;
}
diff --git a/arch/arm64/kernel/patching.c b/arch/arm64/kernel/patching.c
index 945df74005c7..1041bc67a3ee 100644
--- a/arch/arm64/kernel/patching.c
+++ b/arch/arm64/kernel/patching.c
@@ -10,7 +10,7 @@
#include <asm/fixmap.h>
#include <asm/insn.h>
#include <asm/kprobes.h>
-#include <asm/patching.h>
+#include <asm/text-patching.h>
#include <asm/sections.h>
static DEFINE_RAW_SPINLOCK(patch_lock);
@@ -30,20 +30,17 @@ static bool is_image_text(unsigned long addr)
static void __kprobes *patch_map(void *addr, int fixmap)
{
- unsigned long uintaddr = (uintptr_t) addr;
- bool image = is_image_text(uintaddr);
- struct page *page;
+ phys_addr_t phys;
- if (image)
- page = phys_to_page(__pa_symbol(addr));
- else if (IS_ENABLED(CONFIG_EXECMEM))
- page = vmalloc_to_page(addr);
- else
- return addr;
+ if (is_image_text((unsigned long)addr)) {
+ phys = __pa_symbol(addr);
+ } else {
+ struct page *page = vmalloc_to_page(addr);
+ BUG_ON(!page);
+ phys = page_to_phys(page) + offset_in_page(addr);
+ }
- BUG_ON(!page);
- return (void *)set_fixmap_offset(fixmap, page_to_phys(page) +
- (uintaddr & ~PAGE_MASK));
+ return (void *)set_fixmap_offset(fixmap, phys);
}
static void __kprobes patch_unmap(int fixmap)
diff --git a/arch/arm64/kernel/pci.c b/arch/arm64/kernel/pci.c
index f872c57e9909..fd9a7bed83ce 100644
--- a/arch/arm64/kernel/pci.c
+++ b/arch/arm64/kernel/pci.c
@@ -6,28 +6,7 @@
* Copyright (C) 2014 ARM Ltd.
*/
-#include <linux/acpi.h>
-#include <linux/init.h>
-#include <linux/io.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
#include <linux/pci.h>
-#include <linux/pci-acpi.h>
-#include <linux/pci-ecam.h>
-#include <linux/slab.h>
-
-#ifdef CONFIG_ACPI
-/*
- * Try to assign the IRQ number when probing a new device
- */
-int pcibios_alloc_irq(struct pci_dev *dev)
-{
- if (!acpi_disabled)
- acpi_pci_irq_enable(dev);
-
- return 0;
-}
-#endif
/*
* raw_pci_read/write - Platform-specific PCI config space access.
@@ -61,173 +40,3 @@ int pcibus_to_node(struct pci_bus *bus)
EXPORT_SYMBOL(pcibus_to_node);
#endif
-
-#ifdef CONFIG_ACPI
-
-struct acpi_pci_generic_root_info {
- struct acpi_pci_root_info common;
- struct pci_config_window *cfg; /* config space mapping */
-};
-
-int acpi_pci_bus_find_domain_nr(struct pci_bus *bus)
-{
- struct pci_config_window *cfg = bus->sysdata;
- struct acpi_device *adev = to_acpi_device(cfg->parent);
- struct acpi_pci_root *root = acpi_driver_data(adev);
-
- return root->segment;
-}
-
-int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge)
-{
- struct pci_config_window *cfg;
- struct acpi_device *adev;
- struct device *bus_dev;
-
- if (acpi_disabled)
- return 0;
-
- cfg = bridge->bus->sysdata;
-
- /*
- * On Hyper-V there is no corresponding ACPI device for a root bridge,
- * therefore ->parent is set as NULL by the driver. And set 'adev' as
- * NULL in this case because there is no proper ACPI device.
- */
- if (!cfg->parent)
- adev = NULL;
- else
- adev = to_acpi_device(cfg->parent);
-
- bus_dev = &bridge->bus->dev;
-
- ACPI_COMPANION_SET(&bridge->dev, adev);
- set_dev_node(bus_dev, acpi_get_node(acpi_device_handle(adev)));
-
- return 0;
-}
-
-static int pci_acpi_root_prepare_resources(struct acpi_pci_root_info *ci)
-{
- struct resource_entry *entry, *tmp;
- int status;
-
- status = acpi_pci_probe_root_resources(ci);
- resource_list_for_each_entry_safe(entry, tmp, &ci->resources) {
- if (!(entry->res->flags & IORESOURCE_WINDOW))
- resource_list_destroy_entry(entry);
- }
- return status;
-}
-
-/*
- * Lookup the bus range for the domain in MCFG, and set up config space
- * mapping.
- */
-static struct pci_config_window *
-pci_acpi_setup_ecam_mapping(struct acpi_pci_root *root)
-{
- struct device *dev = &root->device->dev;
- struct resource *bus_res = &root->secondary;
- u16 seg = root->segment;
- const struct pci_ecam_ops *ecam_ops;
- struct resource cfgres;
- struct acpi_device *adev;
- struct pci_config_window *cfg;
- int ret;
-
- ret = pci_mcfg_lookup(root, &cfgres, &ecam_ops);
- if (ret) {
- dev_err(dev, "%04x:%pR ECAM region not found\n", seg, bus_res);
- return NULL;
- }
-
- adev = acpi_resource_consumer(&cfgres);
- if (adev)
- dev_info(dev, "ECAM area %pR reserved by %s\n", &cfgres,
- dev_name(&adev->dev));
- else
- dev_warn(dev, FW_BUG "ECAM area %pR not reserved in ACPI namespace\n",
- &cfgres);
-
- cfg = pci_ecam_create(dev, &cfgres, bus_res, ecam_ops);
- if (IS_ERR(cfg)) {
- dev_err(dev, "%04x:%pR error %ld mapping ECAM\n", seg, bus_res,
- PTR_ERR(cfg));
- return NULL;
- }
-
- return cfg;
-}
-
-/* release_info: free resources allocated by init_info */
-static void pci_acpi_generic_release_info(struct acpi_pci_root_info *ci)
-{
- struct acpi_pci_generic_root_info *ri;
-
- ri = container_of(ci, struct acpi_pci_generic_root_info, common);
- pci_ecam_free(ri->cfg);
- kfree(ci->ops);
- kfree(ri);
-}
-
-/* Interface called from ACPI code to setup PCI host controller */
-struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root)
-{
- struct acpi_pci_generic_root_info *ri;
- struct pci_bus *bus, *child;
- struct acpi_pci_root_ops *root_ops;
- struct pci_host_bridge *host;
-
- ri = kzalloc(sizeof(*ri), GFP_KERNEL);
- if (!ri)
- return NULL;
-
- root_ops = kzalloc(sizeof(*root_ops), GFP_KERNEL);
- if (!root_ops) {
- kfree(ri);
- return NULL;
- }
-
- ri->cfg = pci_acpi_setup_ecam_mapping(root);
- if (!ri->cfg) {
- kfree(ri);
- kfree(root_ops);
- return NULL;
- }
-
- root_ops->release_info = pci_acpi_generic_release_info;
- root_ops->prepare_resources = pci_acpi_root_prepare_resources;
- root_ops->pci_ops = (struct pci_ops *)&ri->cfg->ops->pci_ops;
- bus = acpi_pci_root_create(root, root_ops, &ri->common, ri->cfg);
- if (!bus)
- return NULL;
-
- /* If we must preserve the resource configuration, claim now */
- host = pci_find_host_bridge(bus);
- if (host->preserve_config)
- pci_bus_claim_resources(bus);
-
- /*
- * Assign whatever was left unassigned. If we didn't claim above,
- * this will reassign everything.
- */
- pci_assign_unassigned_root_bus_resources(bus);
-
- list_for_each_entry(child, &bus->children, node)
- pcie_bus_configure_settings(child);
-
- return bus;
-}
-
-void pcibios_add_bus(struct pci_bus *bus)
-{
- acpi_pci_add_bus(bus);
-}
-
-void pcibios_remove_bus(struct pci_bus *bus)
-{
- acpi_pci_remove_bus(bus);
-}
-
-#endif
diff --git a/arch/arm64/kernel/perf_callchain.c b/arch/arm64/kernel/perf_callchain.c
index e8ed5673f481..9b7f26b128b5 100644
--- a/arch/arm64/kernel/perf_callchain.c
+++ b/arch/arm64/kernel/perf_callchain.c
@@ -38,31 +38,3 @@ void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry,
arch_stack_walk(callchain_trace, entry, current, regs);
}
-
-unsigned long perf_instruction_pointer(struct pt_regs *regs)
-{
- if (perf_guest_state())
- return perf_guest_get_ip();
-
- return instruction_pointer(regs);
-}
-
-unsigned long perf_misc_flags(struct pt_regs *regs)
-{
- unsigned int guest_state = perf_guest_state();
- int misc = 0;
-
- if (guest_state) {
- if (guest_state & PERF_GUEST_USER)
- misc |= PERF_RECORD_MISC_GUEST_USER;
- else
- misc |= PERF_RECORD_MISC_GUEST_KERNEL;
- } else {
- if (user_mode(regs))
- misc |= PERF_RECORD_MISC_USER;
- else
- misc |= PERF_RECORD_MISC_KERNEL;
- }
-
- return misc;
-}
diff --git a/arch/arm64/kernel/pi/Makefile b/arch/arm64/kernel/pi/Makefile
index 4d11a8c29181..be92d73c25b2 100644
--- a/arch/arm64/kernel/pi/Makefile
+++ b/arch/arm64/kernel/pi/Makefile
@@ -2,7 +2,7 @@
# Copyright 2022 Google LLC
KBUILD_CFLAGS := $(subst $(CC_FLAGS_FTRACE),,$(KBUILD_CFLAGS)) -fpie \
- -Os -DDISABLE_BRANCH_PROFILING $(DISABLE_STACKLEAK_PLUGIN) \
+ -Os -DDISABLE_BRANCH_PROFILING $(DISABLE_KSTACK_ERASE) \
$(DISABLE_LATENT_ENTROPY_PLUGIN) \
$(call cc-option,-mbranch-protection=none) \
-I$(srctree)/scripts/dtc/libfdt -fno-stack-protector \
@@ -41,4 +41,4 @@ obj-y := idreg-override.pi.o \
obj-$(CONFIG_RELOCATABLE) += relocate.pi.o
obj-$(CONFIG_RANDOMIZE_BASE) += kaslr_early.pi.o
obj-$(CONFIG_UNWIND_PATCH_PAC_INTO_SCS) += patch-scs.pi.o
-extra-y := $(patsubst %.pi.o,%.o,$(obj-y))
+targets := $(patsubst %.pi.o,%.o,$(obj-y))
diff --git a/arch/arm64/kernel/pi/idreg-override.c b/arch/arm64/kernel/pi/idreg-override.c
index 29d4b6244a6f..bc57b290e5e7 100644
--- a/arch/arm64/kernel/pi/idreg-override.c
+++ b/arch/arm64/kernel/pi/idreg-override.c
@@ -38,6 +38,15 @@ struct ftr_set_desc {
#define FIELD(n, s, f) { .name = n, .shift = s, .width = 4, .filter = f }
+static const struct ftr_set_desc mmfr0 __prel64_initconst = {
+ .name = "id_aa64mmfr0",
+ .override = &id_aa64mmfr0_override,
+ .fields = {
+ FIELD("ecv", ID_AA64MMFR0_EL1_ECV_SHIFT, NULL),
+ {}
+ },
+};
+
static bool __init mmfr1_vh_filter(u64 val)
{
/*
@@ -74,6 +83,15 @@ static bool __init mmfr2_varange_filter(u64 val)
id_aa64mmfr0_override.val |=
(ID_AA64MMFR0_EL1_TGRAN_LPA2 - 1) << ID_AA64MMFR0_EL1_TGRAN_SHIFT;
id_aa64mmfr0_override.mask |= 0xfU << ID_AA64MMFR0_EL1_TGRAN_SHIFT;
+
+ /*
+ * Override PARange to 48 bits - the override will just be
+ * ignored if the actual PARange is smaller, but this is
+ * unlikely to be the case for LPA2 capable silicon.
+ */
+ id_aa64mmfr0_override.val |=
+ ID_AA64MMFR0_EL1_PARANGE_48 << ID_AA64MMFR0_EL1_PARANGE_SHIFT;
+ id_aa64mmfr0_override.mask |= 0xfU << ID_AA64MMFR0_EL1_PARANGE_SHIFT;
}
#endif
return true;
@@ -109,6 +127,7 @@ static const struct ftr_set_desc pfr0 __prel64_initconst = {
.fields = {
FIELD("sve", ID_AA64PFR0_EL1_SVE_SHIFT, pfr0_sve_filter),
FIELD("el0", ID_AA64PFR0_EL1_EL0_SHIFT, NULL),
+ FIELD("mpam", ID_AA64PFR0_EL1_MPAM_SHIFT, NULL),
{}
},
};
@@ -133,8 +152,10 @@ static const struct ftr_set_desc pfr1 __prel64_initconst = {
.override = &id_aa64pfr1_override,
.fields = {
FIELD("bt", ID_AA64PFR1_EL1_BT_SHIFT, NULL ),
+ FIELD("gcs", ID_AA64PFR1_EL1_GCS_SHIFT, NULL),
FIELD("mte", ID_AA64PFR1_EL1_MTE_SHIFT, NULL),
FIELD("sme", ID_AA64PFR1_EL1_SME_SHIFT, pfr1_sme_filter),
+ FIELD("mpam_frac", ID_AA64PFR1_EL1_MPAM_frac_SHIFT, NULL),
{}
},
};
@@ -196,6 +217,7 @@ static const struct ftr_set_desc sw_features __prel64_initconst = {
static const
PREL64(const struct ftr_set_desc, reg) regs[] __prel64_initconst = {
+ { &mmfr0 },
{ &mmfr1 },
{ &mmfr2 },
{ &pfr0 },
@@ -215,6 +237,7 @@ static const struct {
{ "arm64.nosve", "id_aa64pfr0.sve=0" },
{ "arm64.nosme", "id_aa64pfr1.sme=0" },
{ "arm64.nobti", "id_aa64pfr1.bt=0" },
+ { "arm64.nogcs", "id_aa64pfr1.gcs=0" },
{ "arm64.nopauth",
"id_aa64isar1.gpi=0 id_aa64isar1.gpa=0 "
"id_aa64isar1.api=0 id_aa64isar1.apa=0 "
@@ -225,6 +248,7 @@ static const struct {
{ "rodata=off", "arm64_sw.rodataoff=1" },
{ "arm64.nolva", "id_aa64mmfr2.varange=0" },
{ "arm64.no32bit_el0", "id_aa64pfr0.el0=1" },
+ { "arm64.nompam", "id_aa64pfr0.mpam=0 id_aa64pfr1.mpam_frac=0" },
};
static int __init parse_hexdigit(const char *p, u64 *v)
diff --git a/arch/arm64/kernel/pi/kaslr_early.c b/arch/arm64/kernel/pi/kaslr_early.c
index 0257b43819db..e0e018046a46 100644
--- a/arch/arm64/kernel/pi/kaslr_early.c
+++ b/arch/arm64/kernel/pi/kaslr_early.c
@@ -18,8 +18,6 @@
#include "pi.h"
-extern u16 memstart_offset_seed;
-
static u64 __init get_kaslr_seed(void *fdt, int node)
{
static char const seed_str[] __initconst = "kaslr-seed";
@@ -53,8 +51,6 @@ u64 __init kaslr_early_init(void *fdt, int chosen)
return 0;
}
- memstart_offset_seed = seed & U16_MAX;
-
/*
* OK, so we are proceeding with KASLR enabled. Calculate a suitable
* kernel image offset from the seed. Let's place the kernel in the
diff --git a/arch/arm64/kernel/pi/map_kernel.c b/arch/arm64/kernel/pi/map_kernel.c
index f374a3e5a5fe..a852264958c3 100644
--- a/arch/arm64/kernel/pi/map_kernel.c
+++ b/arch/arm64/kernel/pi/map_kernel.c
@@ -18,9 +18,9 @@
extern const u8 __eh_frame_start[], __eh_frame_end[];
-extern void idmap_cpu_replace_ttbr1(void *pgdir);
+extern void idmap_cpu_replace_ttbr1(phys_addr_t pgdir);
-static void __init map_segment(pgd_t *pg_dir, u64 *pgd, u64 va_offset,
+static void __init map_segment(pgd_t *pg_dir, phys_addr_t *pgd, u64 va_offset,
void *start, void *end, pgprot_t prot,
bool may_use_cont, int root_level)
{
@@ -40,7 +40,7 @@ static void __init map_kernel(u64 kaslr_offset, u64 va_offset, int root_level)
{
bool enable_scs = IS_ENABLED(CONFIG_UNWIND_PATCH_PAC_INTO_SCS);
bool twopass = IS_ENABLED(CONFIG_RELOCATABLE);
- u64 pgdp = (u64)init_pg_dir + PAGE_SIZE;
+ phys_addr_t pgdp = (phys_addr_t)init_pg_dir + PAGE_SIZE;
pgprot_t text_prot = PAGE_KERNEL_ROX;
pgprot_t data_prot = PAGE_KERNEL;
pgprot_t prot;
@@ -78,6 +78,12 @@ static void __init map_kernel(u64 kaslr_offset, u64 va_offset, int root_level)
twopass |= enable_scs;
prot = twopass ? data_prot : text_prot;
+ /*
+ * [_stext, _text) isn't executed after boot and contains some
+ * non-executable, unpredictable data, so map it non-executable.
+ */
+ map_segment(init_pg_dir, &pgdp, va_offset, _text, _stext, data_prot,
+ false, root_level);
map_segment(init_pg_dir, &pgdp, va_offset, _stext, _etext, prot,
!twopass, root_level);
map_segment(init_pg_dir, &pgdp, va_offset, __start_rodata,
@@ -90,7 +96,7 @@ static void __init map_kernel(u64 kaslr_offset, u64 va_offset, int root_level)
true, root_level);
dsb(ishst);
- idmap_cpu_replace_ttbr1(init_pg_dir);
+ idmap_cpu_replace_ttbr1((phys_addr_t)init_pg_dir);
if (twopass) {
if (IS_ENABLED(CONFIG_RELOCATABLE))
@@ -98,7 +104,7 @@ static void __init map_kernel(u64 kaslr_offset, u64 va_offset, int root_level)
if (enable_scs) {
scs_patch(__eh_frame_start + va_offset,
- __eh_frame_end - __eh_frame_start);
+ __eh_frame_end - __eh_frame_start, false);
asm("ic ialluis");
dynamic_scs_is_enabled = true;
@@ -129,13 +135,19 @@ static void __init map_kernel(u64 kaslr_offset, u64 va_offset, int root_level)
/* Copy the root page table to its final location */
memcpy((void *)swapper_pg_dir + va_offset, init_pg_dir, PAGE_SIZE);
dsb(ishst);
- idmap_cpu_replace_ttbr1(swapper_pg_dir);
+ idmap_cpu_replace_ttbr1((phys_addr_t)swapper_pg_dir);
}
-static void noinline __section(".idmap.text") set_ttbr0_for_lpa2(u64 ttbr)
+static void noinline __section(".idmap.text") set_ttbr0_for_lpa2(phys_addr_t ttbr)
{
u64 sctlr = read_sysreg(sctlr_el1);
- u64 tcr = read_sysreg(tcr_el1) | TCR_DS;
+ u64 tcr = read_sysreg(tcr_el1) | TCR_EL1_DS;
+ u64 mmfr0 = read_sysreg(id_aa64mmfr0_el1);
+ u64 parange = cpuid_feature_extract_unsigned_field(mmfr0,
+ ID_AA64MMFR0_EL1_PARANGE_SHIFT);
+
+ tcr &= ~TCR_EL1_IPS_MASK;
+ tcr |= parange << TCR_EL1_IPS_SHIFT;
asm(" msr sctlr_el1, %0 ;"
" isb ;"
@@ -153,7 +165,7 @@ static void noinline __section(".idmap.text") set_ttbr0_for_lpa2(u64 ttbr)
static void __init remap_idmap_for_lpa2(void)
{
/* clear the bits that change meaning once LPA2 is turned on */
- pteval_t mask = PTE_SHARED;
+ ptdesc_t mask = PTE_SHARED;
/*
* We have to clear bits [9:8] in all block or page descriptors in the
@@ -166,30 +178,30 @@ static void __init remap_idmap_for_lpa2(void)
*/
create_init_idmap(init_pg_dir, mask);
dsb(ishst);
- set_ttbr0_for_lpa2((u64)init_pg_dir);
+ set_ttbr0_for_lpa2((phys_addr_t)init_pg_dir);
/*
* Recreate the initial ID map with the same granularity as before.
* Don't bother with the FDT, we no longer need it after this.
*/
memset(init_idmap_pg_dir, 0,
- (u64)init_idmap_pg_end - (u64)init_idmap_pg_dir);
+ (char *)init_idmap_pg_end - (char *)init_idmap_pg_dir);
create_init_idmap(init_idmap_pg_dir, mask);
dsb(ishst);
/* switch back to the updated initial ID map */
- set_ttbr0_for_lpa2((u64)init_idmap_pg_dir);
+ set_ttbr0_for_lpa2((phys_addr_t)init_idmap_pg_dir);
/* wipe the temporary ID map from memory */
- memset(init_pg_dir, 0, (u64)init_pg_end - (u64)init_pg_dir);
+ memset(init_pg_dir, 0, (char *)init_pg_end - (char *)init_pg_dir);
}
-static void __init map_fdt(u64 fdt)
+static void *__init map_fdt(phys_addr_t fdt)
{
static u8 ptes[INIT_IDMAP_FDT_SIZE] __initdata __aligned(PAGE_SIZE);
- u64 efdt = fdt + MAX_FDT_SIZE;
- u64 ptep = (u64)ptes;
+ phys_addr_t efdt = fdt + MAX_FDT_SIZE;
+ phys_addr_t ptep = (phys_addr_t)ptes; /* We're idmapped when called */
/*
* Map up to MAX_FDT_SIZE bytes, but avoid overlap with
@@ -199,9 +211,34 @@ static void __init map_fdt(u64 fdt)
fdt, PAGE_KERNEL, IDMAP_ROOT_LEVEL,
(pte_t *)init_idmap_pg_dir, false, 0);
dsb(ishst);
+
+ return (void *)fdt;
}
-asmlinkage void __init early_map_kernel(u64 boot_status, void *fdt)
+/*
+ * PI version of the Cavium Eratum 27456 detection, which makes it
+ * impossible to use non-global mappings.
+ */
+static bool __init ng_mappings_allowed(void)
+{
+ static const struct midr_range cavium_erratum_27456_cpus[] __initconst = {
+ /* Cavium ThunderX, T88 pass 1.x - 2.1 */
+ MIDR_RANGE(MIDR_THUNDERX, 0, 0, 1, 1),
+ /* Cavium ThunderX, T81 pass 1.0 */
+ MIDR_REV(MIDR_THUNDERX_81XX, 0, 0),
+ {},
+ };
+
+ for (const struct midr_range *r = cavium_erratum_27456_cpus; r->model; r++) {
+ if (midr_is_cpu_model_range(read_cpuid_id(), r->model,
+ r->rv_min, r->rv_max))
+ return false;
+ }
+
+ return true;
+}
+
+asmlinkage void __init early_map_kernel(u64 boot_status, phys_addr_t fdt)
{
static char const chosen_str[] __initconst = "/chosen";
u64 va_base, pa_base = (u64)&_text;
@@ -209,15 +246,14 @@ asmlinkage void __init early_map_kernel(u64 boot_status, void *fdt)
int root_level = 4 - CONFIG_PGTABLE_LEVELS;
int va_bits = VA_BITS;
int chosen;
-
- map_fdt((u64)fdt);
+ void *fdt_mapped = map_fdt(fdt);
/* Clear BSS and the initial page tables */
- memset(__bss_start, 0, (u64)init_pg_end - (u64)__bss_start);
+ memset(__bss_start, 0, (char *)init_pg_end - (char *)__bss_start);
/* Parse the command line for CPU feature overrides */
- chosen = fdt_path_offset(fdt, chosen_str);
- init_feature_override(boot_status, fdt, chosen);
+ chosen = fdt_path_offset(fdt_mapped, chosen_str);
+ init_feature_override(boot_status, fdt_mapped, chosen);
if (IS_ENABLED(CONFIG_ARM64_64K_PAGES) && !cpu_has_lva()) {
va_bits = VA_BITS_MIN;
@@ -227,7 +263,7 @@ asmlinkage void __init early_map_kernel(u64 boot_status, void *fdt)
}
if (va_bits > VA_BITS_MIN)
- sysreg_clear_set(tcr_el1, TCR_T1SZ_MASK, TCR_T1SZ(va_bits));
+ sysreg_clear_set(tcr_el1, TCR_EL1_T1SZ_MASK, TCR_T1SZ(va_bits));
/*
* The virtual KASLR displacement modulo 2MiB is decided by the
@@ -237,10 +273,10 @@ asmlinkage void __init early_map_kernel(u64 boot_status, void *fdt)
* fill in the high bits from the seed.
*/
if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) {
- u64 kaslr_seed = kaslr_early_init(fdt, chosen);
+ u64 kaslr_seed = kaslr_early_init(fdt_mapped, chosen);
if (kaslr_seed && kaslr_requires_kpti())
- arm64_use_ng_mappings = true;
+ arm64_use_ng_mappings = ng_mappings_allowed();
kaslr_offset |= kaslr_seed & ~(MIN_KIMG_ALIGN - 1);
}
diff --git a/arch/arm64/kernel/pi/map_range.c b/arch/arm64/kernel/pi/map_range.c
index 5410b2cac590..de52cd85c691 100644
--- a/arch/arm64/kernel/pi/map_range.c
+++ b/arch/arm64/kernel/pi/map_range.c
@@ -26,12 +26,13 @@
* @va_offset: Offset between a physical page and its current mapping
* in the VA space
*/
-void __init map_range(u64 *pte, u64 start, u64 end, u64 pa, pgprot_t prot,
- int level, pte_t *tbl, bool may_use_cont, u64 va_offset)
+void __init map_range(phys_addr_t *pte, u64 start, u64 end, phys_addr_t pa,
+ pgprot_t prot, int level, pte_t *tbl, bool may_use_cont,
+ u64 va_offset)
{
u64 cmask = (level == 3) ? CONT_PTE_SIZE - 1 : U64_MAX;
- u64 protval = pgprot_val(prot) & ~PTE_TYPE_MASK;
- int lshift = (3 - level) * (PAGE_SHIFT - 3);
+ ptdesc_t protval = pgprot_val(prot) & ~PTE_TYPE_MASK;
+ int lshift = (3 - level) * PTDESC_TABLE_SHIFT;
u64 lmask = (PAGE_SIZE << lshift) - 1;
start &= PAGE_MASK;
@@ -45,12 +46,12 @@ void __init map_range(u64 *pte, u64 start, u64 end, u64 pa, pgprot_t prot,
* clearing the mapping
*/
if (protval)
- protval |= (level < 3) ? PMD_TYPE_SECT : PTE_TYPE_PAGE;
+ protval |= (level == 2) ? PMD_TYPE_SECT : PTE_TYPE_PAGE;
while (start < end) {
u64 next = min((start | lmask) + 1, PAGE_ALIGN(end));
- if (level < 3 && (start | next | pa) & lmask) {
+ if (level < 2 || (level == 2 && (start | next | pa) & lmask)) {
/*
* This chunk needs a finer grained mapping. Create a
* table mapping if necessary and recurse.
@@ -87,19 +88,22 @@ void __init map_range(u64 *pte, u64 start, u64 end, u64 pa, pgprot_t prot,
}
}
-asmlinkage u64 __init create_init_idmap(pgd_t *pg_dir, pteval_t clrmask)
+asmlinkage phys_addr_t __init create_init_idmap(pgd_t *pg_dir, ptdesc_t clrmask)
{
- u64 ptep = (u64)pg_dir + PAGE_SIZE;
+ phys_addr_t ptep = (phys_addr_t)pg_dir + PAGE_SIZE; /* MMU is off */
pgprot_t text_prot = PAGE_KERNEL_ROX;
pgprot_t data_prot = PAGE_KERNEL;
pgprot_val(text_prot) &= ~clrmask;
pgprot_val(data_prot) &= ~clrmask;
- map_range(&ptep, (u64)_stext, (u64)__initdata_begin, (u64)_stext,
- text_prot, IDMAP_ROOT_LEVEL, (pte_t *)pg_dir, false, 0);
- map_range(&ptep, (u64)__initdata_begin, (u64)_end, (u64)__initdata_begin,
- data_prot, IDMAP_ROOT_LEVEL, (pte_t *)pg_dir, false, 0);
+ /* MMU is off; pointer casts to phys_addr_t are safe */
+ map_range(&ptep, (u64)_stext, (u64)__initdata_begin,
+ (phys_addr_t)_stext, text_prot, IDMAP_ROOT_LEVEL,
+ (pte_t *)pg_dir, false, 0);
+ map_range(&ptep, (u64)__initdata_begin, (u64)_end,
+ (phys_addr_t)__initdata_begin, data_prot, IDMAP_ROOT_LEVEL,
+ (pte_t *)pg_dir, false, 0);
return ptep;
}
diff --git a/arch/arm64/kernel/pi/patch-scs.c b/arch/arm64/kernel/pi/patch-scs.c
index 49d8b40e61bc..bbe7d30ed12b 100644
--- a/arch/arm64/kernel/pi/patch-scs.c
+++ b/arch/arm64/kernel/pi/patch-scs.c
@@ -50,6 +50,10 @@ bool dynamic_scs_is_enabled;
#define DW_CFA_GNU_negative_offset_extended 0x2f
#define DW_CFA_hi_user 0x3f
+#define DW_EH_PE_sdata4 0x0b
+#define DW_EH_PE_sdata8 0x0c
+#define DW_EH_PE_pcrel 0x10
+
enum {
PACIASP = 0xd503233f,
AUTIASP = 0xd50323bf,
@@ -120,7 +124,12 @@ struct eh_frame {
union {
struct { // CIE
u8 version;
- u8 augmentation_string[];
+ u8 augmentation_string[3];
+ u8 code_alignment_factor;
+ u8 data_alignment_factor;
+ u8 return_address_register;
+ u8 augmentation_data_size;
+ u8 fde_pointer_format;
};
struct { // FDE
@@ -128,29 +137,38 @@ struct eh_frame {
s32 range;
u8 opcodes[];
};
+
+ struct { // FDE
+ s64 initial_loc64;
+ s64 range64;
+ u8 opcodes64[];
+ };
};
};
static int scs_handle_fde_frame(const struct eh_frame *frame,
- bool fde_has_augmentation_data,
int code_alignment_factor,
+ bool use_sdata8,
bool dry_run)
{
int size = frame->size - offsetof(struct eh_frame, opcodes) + 4;
u64 loc = (u64)offset_to_ptr(&frame->initial_loc);
const u8 *opcode = frame->opcodes;
+ int l;
- if (fde_has_augmentation_data) {
- int l;
+ if (use_sdata8) {
+ loc = (u64)&frame->initial_loc64 + frame->initial_loc64;
+ opcode = frame->opcodes64;
+ size -= 8;
+ }
- // assume single byte uleb128_t
- if (WARN_ON(*opcode & BIT(7)))
- return -ENOEXEC;
+ // assume single byte uleb128_t for augmentation data size
+ if (*opcode & BIT(7))
+ return EDYNSCS_INVALID_FDE_AUGM_DATA_SIZE;
- l = *opcode++;
- opcode += l;
- size -= l + 1;
- }
+ l = *opcode++;
+ opcode += l;
+ size -= l + 1;
/*
* Starting from 'loc', apply the CFA opcodes that advance the location
@@ -201,20 +219,20 @@ static int scs_handle_fde_frame(const struct eh_frame *frame,
break;
default:
- return -ENOEXEC;
+ return EDYNSCS_INVALID_CFA_OPCODE;
}
}
return 0;
}
-int scs_patch(const u8 eh_frame[], int size)
+int scs_patch(const u8 eh_frame[], int size, bool skip_dry_run)
{
+ int code_alignment_factor = 1;
+ bool fde_use_sdata8 = false;
const u8 *p = eh_frame;
while (size > 4) {
const struct eh_frame *frame = (const void *)p;
- bool fde_has_augmentation_data = true;
- int code_alignment_factor = 1;
int ret;
if (frame->size == 0 ||
@@ -223,28 +241,49 @@ int scs_patch(const u8 eh_frame[], int size)
break;
if (frame->cie_id_or_pointer == 0) {
- const u8 *p = frame->augmentation_string;
-
- /* a 'z' in the augmentation string must come first */
- fde_has_augmentation_data = *p == 'z';
+ /*
+ * Require presence of augmentation data (z) with a
+ * specifier for the size of the FDE initial_loc and
+ * range fields (R), and nothing else.
+ */
+ if (strcmp(frame->augmentation_string, "zR"))
+ return EDYNSCS_INVALID_CIE_HEADER;
/*
* The code alignment factor is a uleb128 encoded field
* but given that the only sensible values are 1 or 4,
- * there is no point in decoding the whole thing.
+ * there is no point in decoding the whole thing. Also
+ * sanity check the size of the data alignment factor
+ * field, and the values of the return address register
+ * and augmentation data size fields.
*/
- p += strlen(p) + 1;
- if (!WARN_ON(*p & BIT(7)))
- code_alignment_factor = *p;
+ if ((frame->code_alignment_factor & BIT(7)) ||
+ (frame->data_alignment_factor & BIT(7)) ||
+ frame->return_address_register != 30 ||
+ frame->augmentation_data_size != 1)
+ return EDYNSCS_INVALID_CIE_HEADER;
+
+ code_alignment_factor = frame->code_alignment_factor;
+
+ switch (frame->fde_pointer_format) {
+ case DW_EH_PE_pcrel | DW_EH_PE_sdata4:
+ fde_use_sdata8 = false;
+ break;
+ case DW_EH_PE_pcrel | DW_EH_PE_sdata8:
+ fde_use_sdata8 = true;
+ break;
+ default:
+ return EDYNSCS_INVALID_CIE_SDATA_SIZE;
+ }
} else {
- ret = scs_handle_fde_frame(frame,
- fde_has_augmentation_data,
- code_alignment_factor,
- true);
+ ret = scs_handle_fde_frame(frame, code_alignment_factor,
+ fde_use_sdata8, !skip_dry_run);
if (ret)
return ret;
- scs_handle_fde_frame(frame, fde_has_augmentation_data,
- code_alignment_factor, false);
+
+ if (!skip_dry_run)
+ scs_handle_fde_frame(frame, code_alignment_factor,
+ fde_use_sdata8, false);
}
p += sizeof(frame->size) + frame->size;
diff --git a/arch/arm64/kernel/pi/pi.h b/arch/arm64/kernel/pi/pi.h
index c91e5e965cd3..aec3172d4003 100644
--- a/arch/arm64/kernel/pi/pi.h
+++ b/arch/arm64/kernel/pi/pi.h
@@ -22,15 +22,17 @@ static inline void *prel64_to_pointer(const prel64_t *offset)
extern bool dynamic_scs_is_enabled;
extern pgd_t init_idmap_pg_dir[], init_idmap_pg_end[];
+extern pgd_t init_pg_dir[], init_pg_end[];
void init_feature_override(u64 boot_status, const void *fdt, int chosen);
u64 kaslr_early_init(void *fdt, int chosen);
void relocate_kernel(u64 offset);
-int scs_patch(const u8 eh_frame[], int size);
+int scs_patch(const u8 eh_frame[], int size, bool skip_dry_run);
-void map_range(u64 *pgd, u64 start, u64 end, u64 pa, pgprot_t prot,
- int level, pte_t *tbl, bool may_use_cont, u64 va_offset);
+void map_range(phys_addr_t *pte, u64 start, u64 end, phys_addr_t pa,
+ pgprot_t prot, int level, pte_t *tbl, bool may_use_cont,
+ u64 va_offset);
-asmlinkage void early_map_kernel(u64 boot_status, void *fdt);
+asmlinkage void early_map_kernel(u64 boot_status, phys_addr_t fdt);
-asmlinkage u64 create_init_idmap(pgd_t *pgd, pteval_t clrmask);
+asmlinkage phys_addr_t create_init_idmap(pgd_t *pgd, ptdesc_t clrmask);
diff --git a/arch/arm64/kernel/probes/decode-insn.c b/arch/arm64/kernel/probes/decode-insn.c
index 968d5fffe233..4137cc5ef031 100644
--- a/arch/arm64/kernel/probes/decode-insn.c
+++ b/arch/arm64/kernel/probes/decode-insn.c
@@ -58,10 +58,13 @@ static bool __kprobes aarch64_insn_is_steppable(u32 insn)
* Instructions which load PC relative literals are not going to work
* when executed from an XOL slot. Instructions doing an exclusive
* load/store are not going to complete successfully when single-step
- * exception handling happens in the middle of the sequence.
+ * exception handling happens in the middle of the sequence. Memory
+ * copy/set instructions require that all three instructions be placed
+ * consecutively in memory.
*/
if (aarch64_insn_uses_literal(insn) ||
- aarch64_insn_is_exclusive(insn))
+ aarch64_insn_is_exclusive(insn) ||
+ aarch64_insn_is_mops(insn))
return false;
return true;
@@ -73,9 +76,18 @@ static bool __kprobes aarch64_insn_is_steppable(u32 insn)
* INSN_GOOD_NO_SLOT If instruction is supported but doesn't use its slot.
*/
enum probe_insn __kprobes
-arm_probe_decode_insn(probe_opcode_t insn, struct arch_probe_insn *api)
+arm_probe_decode_insn(u32 insn, struct arch_probe_insn *api)
{
/*
+ * While 'nop' instruction can execute in the out-of-line slot,
+ * simulating them in breakpoint handling offers better performance.
+ */
+ if (aarch64_insn_is_nop(insn)) {
+ api->handler = simulate_nop;
+ return INSN_GOOD_NO_SLOT;
+ }
+
+ /*
* Instructions reading or modifying the PC won't work from the XOL
* slot.
*/
@@ -96,13 +108,10 @@ arm_probe_decode_insn(probe_opcode_t insn, struct arch_probe_insn *api)
aarch64_insn_is_bl(insn)) {
api->handler = simulate_b_bl;
} else if (aarch64_insn_is_br(insn) ||
- aarch64_insn_is_blr(insn) ||
- aarch64_insn_is_ret(insn)) {
- api->handler = simulate_br_blr_ret;
- } else if (aarch64_insn_is_ldr_lit(insn)) {
- api->handler = simulate_ldr_literal;
- } else if (aarch64_insn_is_ldrsw_lit(insn)) {
- api->handler = simulate_ldrsw_literal;
+ aarch64_insn_is_blr(insn)) {
+ api->handler = simulate_br_blr;
+ } else if (aarch64_insn_is_ret(insn)) {
+ api->handler = simulate_ret;
} else {
/*
* Instruction cannot be stepped out-of-line and we don't
@@ -137,9 +146,20 @@ enum probe_insn __kprobes
arm_kprobe_decode_insn(kprobe_opcode_t *addr, struct arch_specific_insn *asi)
{
enum probe_insn decoded;
- probe_opcode_t insn = le32_to_cpu(*addr);
- probe_opcode_t *scan_end = NULL;
+ u32 insn = le32_to_cpu(*addr);
+ kprobe_opcode_t *scan_end = NULL;
unsigned long size = 0, offset = 0;
+ struct arch_probe_insn *api = &asi->api;
+
+ if (aarch64_insn_is_ldr_lit(insn)) {
+ api->handler = simulate_ldr_literal;
+ decoded = INSN_GOOD_NO_SLOT;
+ } else if (aarch64_insn_is_ldrsw_lit(insn)) {
+ api->handler = simulate_ldrsw_literal;
+ decoded = INSN_GOOD_NO_SLOT;
+ } else {
+ decoded = arm_probe_decode_insn(insn, &asi->api);
+ }
/*
* If there's a symbol defined in front of and near enough to
@@ -157,7 +177,6 @@ arm_kprobe_decode_insn(kprobe_opcode_t *addr, struct arch_specific_insn *asi)
else
scan_end = addr - MAX_ATOMIC_CONTEXT_SIZE;
}
- decoded = arm_probe_decode_insn(insn, &asi->api);
if (decoded != INSN_REJECTED && scan_end)
if (is_probed_address_atomic(addr - 1, scan_end))
diff --git a/arch/arm64/kernel/probes/decode-insn.h b/arch/arm64/kernel/probes/decode-insn.h
index 8b758c5a2062..0e4195de8206 100644
--- a/arch/arm64/kernel/probes/decode-insn.h
+++ b/arch/arm64/kernel/probes/decode-insn.h
@@ -28,6 +28,6 @@ enum probe_insn __kprobes
arm_kprobe_decode_insn(kprobe_opcode_t *addr, struct arch_specific_insn *asi);
#endif
enum probe_insn __kprobes
-arm_probe_decode_insn(probe_opcode_t insn, struct arch_probe_insn *asi);
+arm_probe_decode_insn(u32 insn, struct arch_probe_insn *asi);
#endif /* _ARM_KERNEL_KPROBES_ARM64_H */
diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c
index 4268678d0e86..43a0361a8bf0 100644
--- a/arch/arm64/kernel/probes/kprobes.c
+++ b/arch/arm64/kernel/probes/kprobes.c
@@ -10,6 +10,7 @@
#define pr_fmt(fmt) "kprobes: " fmt
+#include <linux/execmem.h>
#include <linux/extable.h>
#include <linux/kasan.h>
#include <linux/kernel.h>
@@ -27,7 +28,7 @@
#include <asm/debug-monitors.h>
#include <asm/insn.h>
#include <asm/irq.h>
-#include <asm/patching.h>
+#include <asm/text-patching.h>
#include <asm/ptrace.h>
#include <asm/sections.h>
#include <asm/system_misc.h>
@@ -41,9 +42,23 @@ DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
static void __kprobes
post_kprobe_handler(struct kprobe *, struct kprobe_ctlblk *, struct pt_regs *);
+void *alloc_insn_page(void)
+{
+ void *addr;
+
+ addr = execmem_alloc(EXECMEM_KPROBES, PAGE_SIZE);
+ if (!addr)
+ return NULL;
+ if (set_memory_rox((unsigned long)addr, 1)) {
+ execmem_free(addr);
+ return NULL;
+ }
+ return addr;
+}
+
static void __kprobes arch_prepare_ss_slot(struct kprobe *p)
{
- kprobe_opcode_t *addr = p->ainsn.api.insn;
+ kprobe_opcode_t *addr = p->ainsn.xol_insn;
/*
* Prepare insn slot, Mark Rutland points out it depends on a coupe of
@@ -64,20 +79,20 @@ static void __kprobes arch_prepare_ss_slot(struct kprobe *p)
* the BRK exception handler, so it is unnecessary to generate
* Contex-Synchronization-Event via ISB again.
*/
- aarch64_insn_patch_text_nosync(addr, p->opcode);
+ aarch64_insn_patch_text_nosync(addr, le32_to_cpu(p->opcode));
aarch64_insn_patch_text_nosync(addr + 1, BRK64_OPCODE_KPROBES_SS);
/*
* Needs restoring of return address after stepping xol.
*/
- p->ainsn.api.restore = (unsigned long) p->addr +
+ p->ainsn.xol_restore = (unsigned long) p->addr +
sizeof(kprobe_opcode_t);
}
static void __kprobes arch_prepare_simulate(struct kprobe *p)
{
/* This instructions is not executed xol. No need to adjust the PC */
- p->ainsn.api.restore = 0;
+ p->ainsn.xol_restore = 0;
}
static void __kprobes arch_simulate_insn(struct kprobe *p, struct pt_regs *regs)
@@ -85,7 +100,7 @@ static void __kprobes arch_simulate_insn(struct kprobe *p, struct pt_regs *regs)
struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
if (p->ainsn.api.handler)
- p->ainsn.api.handler((u32)p->opcode, (long)p->addr, regs);
+ p->ainsn.api.handler(le32_to_cpu(p->opcode), (long)p->addr, regs);
/* single step simulated, now go for post processing */
post_kprobe_handler(p, kcb, regs);
@@ -99,7 +114,7 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p)
return -EINVAL;
/* copy instruction */
- p->opcode = le32_to_cpu(*p->addr);
+ p->opcode = *p->addr;
if (search_exception_tables(probe_addr))
return -EINVAL;
@@ -110,18 +125,18 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p)
return -EINVAL;
case INSN_GOOD_NO_SLOT: /* insn need simulation */
- p->ainsn.api.insn = NULL;
+ p->ainsn.xol_insn = NULL;
break;
case INSN_GOOD: /* instruction uses slot */
- p->ainsn.api.insn = get_insn_slot();
- if (!p->ainsn.api.insn)
+ p->ainsn.xol_insn = get_insn_slot();
+ if (!p->ainsn.xol_insn)
return -ENOMEM;
break;
}
/* prepare the instruction */
- if (p->ainsn.api.insn)
+ if (p->ainsn.xol_insn)
arch_prepare_ss_slot(p);
else
arch_prepare_simulate(p);
@@ -142,15 +157,16 @@ void __kprobes arch_arm_kprobe(struct kprobe *p)
void __kprobes arch_disarm_kprobe(struct kprobe *p)
{
void *addr = p->addr;
+ u32 insn = le32_to_cpu(p->opcode);
- aarch64_insn_patch_text(&addr, &p->opcode, 1);
+ aarch64_insn_patch_text(&addr, &insn, 1);
}
void __kprobes arch_remove_kprobe(struct kprobe *p)
{
- if (p->ainsn.api.insn) {
- free_insn_slot(p->ainsn.api.insn, 0);
- p->ainsn.api.insn = NULL;
+ if (p->ainsn.xol_insn) {
+ free_insn_slot(p->ainsn.xol_insn, 0);
+ p->ainsn.xol_insn = NULL;
}
}
@@ -205,9 +221,9 @@ static void __kprobes setup_singlestep(struct kprobe *p,
}
- if (p->ainsn.api.insn) {
+ if (p->ainsn.xol_insn) {
/* prepare for single stepping */
- slot = (unsigned long)p->ainsn.api.insn;
+ slot = (unsigned long)p->ainsn.xol_insn;
kprobes_save_local_irqflag(kcb, regs);
instruction_pointer_set(regs, slot);
@@ -245,8 +261,8 @@ static void __kprobes
post_kprobe_handler(struct kprobe *cur, struct kprobe_ctlblk *kcb, struct pt_regs *regs)
{
/* return addr restore if non-branching insn */
- if (cur->ainsn.api.restore != 0)
- instruction_pointer_set(regs, cur->ainsn.api.restore);
+ if (cur->ainsn.xol_restore != 0)
+ instruction_pointer_set(regs, cur->ainsn.xol_restore);
/* restore back original saved kprobe variables and continue */
if (kcb->kprobe_status == KPROBE_REENTER) {
@@ -291,8 +307,8 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, unsigned int fsr)
return 0;
}
-static int __kprobes
-kprobe_breakpoint_handler(struct pt_regs *regs, unsigned long esr)
+int __kprobes
+kprobe_brk_handler(struct pt_regs *regs, unsigned long esr)
{
struct kprobe *p, *cur_kprobe;
struct kprobe_ctlblk *kcb;
@@ -335,20 +351,15 @@ kprobe_breakpoint_handler(struct pt_regs *regs, unsigned long esr)
return DBG_HOOK_HANDLED;
}
-static struct break_hook kprobes_break_hook = {
- .imm = KPROBES_BRK_IMM,
- .fn = kprobe_breakpoint_handler,
-};
-
-static int __kprobes
-kprobe_breakpoint_ss_handler(struct pt_regs *regs, unsigned long esr)
+int __kprobes
+kprobe_ss_brk_handler(struct pt_regs *regs, unsigned long esr)
{
struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
unsigned long addr = instruction_pointer(regs);
struct kprobe *cur = kprobe_running();
if (cur && (kcb->kprobe_status & (KPROBE_HIT_SS | KPROBE_REENTER)) &&
- ((unsigned long)&cur->ainsn.api.insn[1] == addr)) {
+ ((unsigned long)&cur->ainsn.xol_insn[1] == addr)) {
kprobes_restore_local_irqflag(kcb, regs);
post_kprobe_handler(cur, kcb, regs);
@@ -359,13 +370,8 @@ kprobe_breakpoint_ss_handler(struct pt_regs *regs, unsigned long esr)
return DBG_HOOK_ERROR;
}
-static struct break_hook kprobes_break_ss_hook = {
- .imm = KPROBES_BRK_SS_IMM,
- .fn = kprobe_breakpoint_ss_handler,
-};
-
-static int __kprobes
-kretprobe_breakpoint_handler(struct pt_regs *regs, unsigned long esr)
+int __kprobes
+kretprobe_brk_handler(struct pt_regs *regs, unsigned long esr)
{
if (regs->pc != (unsigned long)__kretprobe_trampoline)
return DBG_HOOK_ERROR;
@@ -374,11 +380,6 @@ kretprobe_breakpoint_handler(struct pt_regs *regs, unsigned long esr)
return DBG_HOOK_HANDLED;
}
-static struct break_hook kretprobes_break_hook = {
- .imm = KRETPROBES_BRK_IMM,
- .fn = kretprobe_breakpoint_handler,
-};
-
/*
* Provide a blacklist of symbols identifying ranges which cannot be kprobed.
* This blacklist is exposed to userspace via debugfs (kprobes/blacklist).
@@ -421,9 +422,5 @@ int __kprobes arch_trampoline_kprobe(struct kprobe *p)
int __init arch_init_kprobes(void)
{
- register_kernel_break_hook(&kprobes_break_hook);
- register_kernel_break_hook(&kprobes_break_ss_hook);
- register_kernel_break_hook(&kretprobes_break_hook);
-
return 0;
}
diff --git a/arch/arm64/kernel/probes/kprobes_trampoline.S b/arch/arm64/kernel/probes/kprobes_trampoline.S
index a362f3dbb3d1..b60739d3983f 100644
--- a/arch/arm64/kernel/probes/kprobes_trampoline.S
+++ b/arch/arm64/kernel/probes/kprobes_trampoline.S
@@ -12,7 +12,7 @@
SYM_CODE_START(__kretprobe_trampoline)
/*
* Trigger a breakpoint exception. The PC will be adjusted by
- * kretprobe_breakpoint_handler(), and no subsequent instructions will
+ * kretprobe_brk_handler(), and no subsequent instructions will
* be executed from the trampoline.
*/
brk #KRETPROBES_BRK_IMM
diff --git a/arch/arm64/kernel/probes/simulate-insn.c b/arch/arm64/kernel/probes/simulate-insn.c
index 22d0b3252476..89fbeb32107e 100644
--- a/arch/arm64/kernel/probes/simulate-insn.c
+++ b/arch/arm64/kernel/probes/simulate-insn.c
@@ -13,6 +13,7 @@
#include <asm/traps.h>
#include "simulate-insn.h"
+#include "asm/gcs.h"
#define bbl_displacement(insn) \
sign_extend32(((insn) & 0x3ffffff) << 2, 27)
@@ -49,6 +50,21 @@ static inline u32 get_w_reg(struct pt_regs *regs, int reg)
return lower_32_bits(pt_regs_read_reg(regs, reg));
}
+static inline int update_lr(struct pt_regs *regs, long addr)
+{
+ int err = 0;
+
+ if (user_mode(regs) && task_gcs_el0_enabled(current)) {
+ push_user_gcs(addr, &err);
+ if (err) {
+ force_sig(SIGSEGV);
+ return err;
+ }
+ }
+ procedure_link_pointer_set(regs, addr);
+ return err;
+}
+
static bool __kprobes check_cbz(u32 opcode, struct pt_regs *regs)
{
int xn = opcode & 0x1f;
@@ -107,9 +123,9 @@ simulate_b_bl(u32 opcode, long addr, struct pt_regs *regs)
{
int disp = bbl_displacement(opcode);
- /* Link register is x30 */
if (opcode & (1 << 31))
- set_x_reg(regs, 30, addr + 4);
+ if (update_lr(regs, addr + 4))
+ return;
instruction_pointer_set(regs, addr + disp);
}
@@ -126,16 +142,34 @@ simulate_b_cond(u32 opcode, long addr, struct pt_regs *regs)
}
void __kprobes
-simulate_br_blr_ret(u32 opcode, long addr, struct pt_regs *regs)
+simulate_br_blr(u32 opcode, long addr, struct pt_regs *regs)
{
int xn = (opcode >> 5) & 0x1f;
+ u64 b_target = get_x_reg(regs, xn);
- /* update pc first in case we're doing a "blr lr" */
- instruction_pointer_set(regs, get_x_reg(regs, xn));
-
- /* Link register is x30 */
if (((opcode >> 21) & 0x3) == 1)
- set_x_reg(regs, 30, addr + 4);
+ if (update_lr(regs, addr + 4))
+ return;
+
+ instruction_pointer_set(regs, b_target);
+}
+
+void __kprobes
+simulate_ret(u32 opcode, long addr, struct pt_regs *regs)
+{
+ u64 ret_addr;
+ int err = 0;
+ int xn = (opcode >> 5) & 0x1f;
+ u64 r_target = get_x_reg(regs, xn);
+
+ if (user_mode(regs) && task_gcs_el0_enabled(current)) {
+ ret_addr = pop_user_gcs(&err);
+ if (err || ret_addr != r_target) {
+ force_sig(SIGSEGV);
+ return;
+ }
+ }
+ instruction_pointer_set(regs, r_target);
}
void __kprobes
@@ -171,17 +205,15 @@ simulate_tbz_tbnz(u32 opcode, long addr, struct pt_regs *regs)
void __kprobes
simulate_ldr_literal(u32 opcode, long addr, struct pt_regs *regs)
{
- u64 *load_addr;
+ unsigned long load_addr;
int xn = opcode & 0x1f;
- int disp;
- disp = ldr_displacement(opcode);
- load_addr = (u64 *) (addr + disp);
+ load_addr = addr + ldr_displacement(opcode);
if (opcode & (1 << 30)) /* x0-x30 */
- set_x_reg(regs, xn, *load_addr);
+ set_x_reg(regs, xn, READ_ONCE(*(u64 *)load_addr));
else /* w0-w30 */
- set_w_reg(regs, xn, *load_addr);
+ set_w_reg(regs, xn, READ_ONCE(*(u32 *)load_addr));
instruction_pointer_set(regs, instruction_pointer(regs) + 4);
}
@@ -189,14 +221,18 @@ simulate_ldr_literal(u32 opcode, long addr, struct pt_regs *regs)
void __kprobes
simulate_ldrsw_literal(u32 opcode, long addr, struct pt_regs *regs)
{
- s32 *load_addr;
+ unsigned long load_addr;
int xn = opcode & 0x1f;
- int disp;
- disp = ldr_displacement(opcode);
- load_addr = (s32 *) (addr + disp);
+ load_addr = addr + ldr_displacement(opcode);
- set_x_reg(regs, xn, *load_addr);
+ set_x_reg(regs, xn, READ_ONCE(*(s32 *)load_addr));
instruction_pointer_set(regs, instruction_pointer(regs) + 4);
}
+
+void __kprobes
+simulate_nop(u32 opcode, long addr, struct pt_regs *regs)
+{
+ arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE);
+}
diff --git a/arch/arm64/kernel/probes/simulate-insn.h b/arch/arm64/kernel/probes/simulate-insn.h
index e065dc92218e..9e772a292d56 100644
--- a/arch/arm64/kernel/probes/simulate-insn.h
+++ b/arch/arm64/kernel/probes/simulate-insn.h
@@ -11,10 +11,12 @@
void simulate_adr_adrp(u32 opcode, long addr, struct pt_regs *regs);
void simulate_b_bl(u32 opcode, long addr, struct pt_regs *regs);
void simulate_b_cond(u32 opcode, long addr, struct pt_regs *regs);
-void simulate_br_blr_ret(u32 opcode, long addr, struct pt_regs *regs);
+void simulate_br_blr(u32 opcode, long addr, struct pt_regs *regs);
+void simulate_ret(u32 opcode, long addr, struct pt_regs *regs);
void simulate_cbz_cbnz(u32 opcode, long addr, struct pt_regs *regs);
void simulate_tbz_tbnz(u32 opcode, long addr, struct pt_regs *regs);
void simulate_ldr_literal(u32 opcode, long addr, struct pt_regs *regs);
void simulate_ldrsw_literal(u32 opcode, long addr, struct pt_regs *regs);
+void simulate_nop(u32 opcode, long addr, struct pt_regs *regs);
#endif /* _ARM_KERNEL_KPROBES_SIMULATE_INSN_H */
diff --git a/arch/arm64/kernel/probes/uprobes.c b/arch/arm64/kernel/probes/uprobes.c
index d49aef2657cd..941668800aea 100644
--- a/arch/arm64/kernel/probes/uprobes.c
+++ b/arch/arm64/kernel/probes/uprobes.c
@@ -6,6 +6,7 @@
#include <linux/ptrace.h>
#include <linux/uprobes.h>
#include <asm/cacheflush.h>
+#include <asm/gcs.h>
#include "decode-insn.h"
@@ -17,12 +18,20 @@ void arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr,
void *xol_page_kaddr = kmap_atomic(page);
void *dst = xol_page_kaddr + (vaddr & ~PAGE_MASK);
+ /*
+ * Initial cache maintenance of the xol page done via set_pte_at().
+ * Subsequent CMOs only needed if the xol slot changes.
+ */
+ if (!memcmp(dst, src, len))
+ goto done;
+
/* Initialize the slot */
memcpy(dst, src, len);
/* flush caches (dcache/icache) */
sync_icache_aliases((unsigned long)dst, (unsigned long)dst + len);
+done:
kunmap_atomic(xol_page_kaddr);
}
@@ -34,7 +43,7 @@ unsigned long uprobe_get_swbp_addr(struct pt_regs *regs)
int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm,
unsigned long addr)
{
- probe_opcode_t insn;
+ u32 insn;
/* TODO: Currently we do not support AARCH32 instruction probing */
if (mm->context.flags & MMCF_AARCH32)
@@ -42,7 +51,7 @@ int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm,
else if (!IS_ALIGNED(addr, AARCH64_INSN_SIZE))
return -EINVAL;
- insn = *(probe_opcode_t *)(&auprobe->insn[0]);
+ insn = le32_to_cpu(auprobe->insn);
switch (arm_probe_decode_insn(insn, &auprobe->api)) {
case INSN_REJECTED:
@@ -102,13 +111,13 @@ bool arch_uprobe_xol_was_trapped(struct task_struct *t)
bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs)
{
- probe_opcode_t insn;
+ u32 insn;
unsigned long addr;
if (!auprobe->simulate)
return false;
- insn = *(probe_opcode_t *)(&auprobe->insn[0]);
+ insn = le32_to_cpu(auprobe->insn);
addr = instruction_pointer(regs);
if (auprobe->api.handler)
@@ -122,7 +131,7 @@ void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
struct uprobe_task *utask = current->utask;
/*
- * Task has received a fatal signal, so reset back to probbed
+ * Task has received a fatal signal, so reset back to probed
* address.
*/
instruction_pointer_set(regs, utask->vaddr);
@@ -151,11 +160,43 @@ arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr,
struct pt_regs *regs)
{
unsigned long orig_ret_vaddr;
+ unsigned long gcs_ret_vaddr;
+ int err = 0;
+ u64 gcspr;
orig_ret_vaddr = procedure_link_pointer(regs);
+
+ if (task_gcs_el0_enabled(current)) {
+ gcspr = read_sysreg_s(SYS_GCSPR_EL0);
+ gcs_ret_vaddr = get_user_gcs((__force unsigned long __user *)gcspr, &err);
+ if (err) {
+ force_sig(SIGSEGV);
+ goto out;
+ }
+
+ /*
+ * If the LR and GCS return addr don't match, then some kind of PAC
+ * signing or control flow occurred since entering the probed function.
+ * Likely because the user is attempting to retprobe on an instruction
+ * that isn't a function boundary or inside a leaf function. Explicitly
+ * abort this retprobe because it will generate a GCS exception.
+ */
+ if (gcs_ret_vaddr != orig_ret_vaddr) {
+ orig_ret_vaddr = -1;
+ goto out;
+ }
+
+ put_user_gcs(trampoline_vaddr, (__force unsigned long __user *)gcspr, &err);
+ if (err) {
+ force_sig(SIGSEGV);
+ goto out;
+ }
+ }
+
/* Replace the return addr with trampoline addr */
procedure_link_pointer_set(regs, trampoline_vaddr);
+out:
return orig_ret_vaddr;
}
@@ -165,7 +206,7 @@ int arch_uprobe_exception_notify(struct notifier_block *self,
return NOTIFY_DONE;
}
-static int uprobe_breakpoint_handler(struct pt_regs *regs,
+int uprobe_brk_handler(struct pt_regs *regs,
unsigned long esr)
{
if (uprobe_pre_sstep_notifier(regs))
@@ -174,7 +215,7 @@ static int uprobe_breakpoint_handler(struct pt_regs *regs,
return DBG_HOOK_ERROR;
}
-static int uprobe_single_step_handler(struct pt_regs *regs,
+int uprobe_single_step_handler(struct pt_regs *regs,
unsigned long esr)
{
struct uprobe_task *utask = current->utask;
@@ -186,23 +227,3 @@ static int uprobe_single_step_handler(struct pt_regs *regs,
return DBG_HOOK_ERROR;
}
-/* uprobe breakpoint handler hook */
-static struct break_hook uprobes_break_hook = {
- .imm = UPROBES_BRK_IMM,
- .fn = uprobe_breakpoint_handler,
-};
-
-/* uprobe single step handler hook */
-static struct step_hook uprobes_step_hook = {
- .fn = uprobe_single_step_handler,
-};
-
-static int __init arch_init_uprobes(void)
-{
- register_user_break_hook(&uprobes_break_hook);
- register_user_step_hook(&uprobes_step_hook);
-
- return 0;
-}
-
-device_initcall(arch_init_uprobes);
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 4ae31b7af6c3..fba7ca102a8c 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -43,11 +43,13 @@
#include <linux/stacktrace.h>
#include <asm/alternative.h>
+#include <asm/arch_timer.h>
#include <asm/compat.h>
#include <asm/cpufeature.h>
#include <asm/cacheflush.h>
#include <asm/exec.h>
#include <asm/fpsimd.h>
+#include <asm/gcs.h>
#include <asm/mmu_context.h>
#include <asm/mte.h>
#include <asm/processor.h>
@@ -226,7 +228,7 @@ void __show_regs(struct pt_regs *regs)
printk("sp : %016llx\n", sp);
if (system_uses_irq_prio_masking())
- printk("pmr_save: %08llx\n", regs->pmr_save);
+ printk("pmr: %08x\n", regs->pmr);
i = top_reg;
@@ -271,12 +273,69 @@ static void flush_tagged_addr_state(void)
clear_thread_flag(TIF_TAGGED_ADDR);
}
+static void flush_poe(void)
+{
+ if (!system_supports_poe())
+ return;
+
+ write_sysreg_s(POR_EL0_INIT, SYS_POR_EL0);
+}
+
+#ifdef CONFIG_ARM64_GCS
+
+static void flush_gcs(void)
+{
+ if (!system_supports_gcs())
+ return;
+
+ current->thread.gcspr_el0 = 0;
+ current->thread.gcs_base = 0;
+ current->thread.gcs_size = 0;
+ current->thread.gcs_el0_mode = 0;
+ write_sysreg_s(GCSCRE0_EL1_nTR, SYS_GCSCRE0_EL1);
+ write_sysreg_s(0, SYS_GCSPR_EL0);
+}
+
+static int copy_thread_gcs(struct task_struct *p,
+ const struct kernel_clone_args *args)
+{
+ unsigned long gcs;
+
+ if (!system_supports_gcs())
+ return 0;
+
+ p->thread.gcs_base = 0;
+ p->thread.gcs_size = 0;
+
+ p->thread.gcs_el0_mode = current->thread.gcs_el0_mode;
+ p->thread.gcs_el0_locked = current->thread.gcs_el0_locked;
+
+ gcs = gcs_alloc_thread_stack(p, args);
+ if (IS_ERR_VALUE(gcs))
+ return PTR_ERR((void *)gcs);
+
+ return 0;
+}
+
+#else
+
+static void flush_gcs(void) { }
+static int copy_thread_gcs(struct task_struct *p,
+ const struct kernel_clone_args *args)
+{
+ return 0;
+}
+
+#endif
+
void flush_thread(void)
{
fpsimd_flush_thread();
tls_thread_flush();
flush_ptrace_hw_breakpoint(current);
flush_tagged_addr_state();
+ flush_poe();
+ flush_gcs();
}
void arch_release_task_struct(struct task_struct *tsk)
@@ -286,50 +345,34 @@ void arch_release_task_struct(struct task_struct *tsk)
int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
{
- if (current->mm)
- fpsimd_preserve_current_state();
+ /*
+ * The current/src task's FPSIMD state may or may not be live, and may
+ * have been altered by ptrace after entry to the kernel. Save the
+ * effective FPSIMD state so that this will be copied into dst.
+ */
+ fpsimd_save_and_flush_current_state();
+ fpsimd_sync_from_effective_state(src);
+
*dst = *src;
/*
- * Detach src's sve_state (if any) from dst so that it does not
- * get erroneously used or freed prematurely. dst's copies
- * will be allocated on demand later on if dst uses SVE.
- * For consistency, also clear TIF_SVE here: this could be done
- * later in copy_process(), but to avoid tripping up future
- * maintainers it is best not to leave TIF flags and buffers in
- * an inconsistent state, even temporarily.
+ * Drop stale reference to src's sve_state and convert dst to
+ * non-streaming FPSIMD mode.
*/
+ dst->thread.fp_type = FP_STATE_FPSIMD;
dst->thread.sve_state = NULL;
clear_tsk_thread_flag(dst, TIF_SVE);
+ task_smstop_sm(dst);
/*
- * In the unlikely event that we create a new thread with ZA
- * enabled we should retain the ZA and ZT state so duplicate
- * it here. This may be shortly freed if we exec() or if
- * CLONE_SETTLS but it's simpler to do it here. To avoid
- * confusing the rest of the code ensure that we have a
- * sve_state allocated whenever sme_state is allocated.
+ * Drop stale reference to src's sme_state and ensure dst has ZA
+ * disabled.
+ *
+ * When necessary, ZA will be inherited later in copy_thread_za().
*/
- if (thread_za_enabled(&src->thread)) {
- dst->thread.sve_state = kzalloc(sve_state_size(src),
- GFP_KERNEL);
- if (!dst->thread.sve_state)
- return -ENOMEM;
-
- dst->thread.sme_state = kmemdup(src->thread.sme_state,
- sme_state_size(src),
- GFP_KERNEL);
- if (!dst->thread.sme_state) {
- kfree(dst->thread.sve_state);
- dst->thread.sve_state = NULL;
- return -ENOMEM;
- }
- } else {
- dst->thread.sme_state = NULL;
- clear_tsk_thread_flag(dst, TIF_SME);
- }
-
- dst->thread.fp_type = FP_STATE_FPSIMD;
+ dst->thread.sme_state = NULL;
+ clear_tsk_thread_flag(dst, TIF_SME);
+ dst->thread.svcr &= ~SVCR_ZA_MASK;
/* clear any pending asynchronous tag fault raised by the parent */
clear_tsk_thread_flag(dst, TIF_MTE_ASYNC_FAULT);
@@ -337,14 +380,40 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
return 0;
}
+static int copy_thread_za(struct task_struct *dst, struct task_struct *src)
+{
+ if (!thread_za_enabled(&src->thread))
+ return 0;
+
+ dst->thread.sve_state = kzalloc(sve_state_size(src),
+ GFP_KERNEL);
+ if (!dst->thread.sve_state)
+ return -ENOMEM;
+
+ dst->thread.sme_state = kmemdup(src->thread.sme_state,
+ sme_state_size(src),
+ GFP_KERNEL);
+ if (!dst->thread.sme_state) {
+ kfree(dst->thread.sve_state);
+ dst->thread.sve_state = NULL;
+ return -ENOMEM;
+ }
+
+ set_tsk_thread_flag(dst, TIF_SME);
+ dst->thread.svcr |= SVCR_ZA_MASK;
+
+ return 0;
+}
+
asmlinkage void ret_from_fork(void) asm("ret_from_fork");
int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
{
- unsigned long clone_flags = args->flags;
+ u64 clone_flags = args->flags;
unsigned long stack_start = args->stack;
unsigned long tls = args->tls;
struct pt_regs *childregs = task_pt_regs(p);
+ int ret;
memset(&p->thread.cpu_context, 0, sizeof(struct cpu_context));
@@ -368,8 +437,9 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
* out-of-sync with the saved value.
*/
*task_user_tls(p) = read_sysreg(tpidr_el0);
- if (system_supports_tpidr2())
- p->thread.tpidr2_el0 = read_sysreg_s(SYS_TPIDR2_EL0);
+
+ if (system_supports_poe())
+ p->thread.por_el0 = read_sysreg_s(SYS_POR_EL0);
if (stack_start) {
if (is_compat_thread(task_thread_info(p)))
@@ -379,13 +449,43 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
}
/*
+ * Due to the AAPCS64 "ZA lazy saving scheme", PSTATE.ZA and
+ * TPIDR2 need to be manipulated as a pair, and either both
+ * need to be inherited or both need to be reset.
+ *
+ * Within a process, child threads must not inherit their
+ * parent's TPIDR2 value or they may clobber their parent's
+ * stack at some later point.
+ *
+ * When a process is fork()'d, the child must inherit ZA and
+ * TPIDR2 from its parent in case there was dormant ZA state.
+ *
+ * Use CLONE_VM to determine when the child will share the
+ * address space with the parent, and cannot safely inherit the
+ * state.
+ */
+ if (system_supports_sme()) {
+ if (!(clone_flags & CLONE_VM)) {
+ p->thread.tpidr2_el0 = read_sysreg_s(SYS_TPIDR2_EL0);
+ ret = copy_thread_za(p, current);
+ if (ret)
+ return ret;
+ } else {
+ p->thread.tpidr2_el0 = 0;
+ WARN_ON_ONCE(p->thread.svcr & SVCR_ZA_MASK);
+ }
+ }
+
+ /*
* If a TLS pointer was passed to clone, use it for the new
- * thread. We also reset TPIDR2 if it's in use.
+ * thread.
*/
- if (clone_flags & CLONE_SETTLS) {
+ if (clone_flags & CLONE_SETTLS)
p->thread.uw.tp_value = tls;
- p->thread.tpidr2_el0 = 0;
- }
+
+ ret = copy_thread_gcs(p, args);
+ if (ret != 0)
+ return ret;
} else {
/*
* A kthread has no context to ERET to, so ensure any buggy
@@ -396,9 +496,13 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
*/
memset(childregs, 0, sizeof(struct pt_regs));
childregs->pstate = PSR_MODE_EL1h | PSR_IL_BIT;
+ childregs->stackframe.type = FRAME_META_TYPE_FINAL;
p->thread.cpu_context.x19 = (unsigned long)args->fn;
p->thread.cpu_context.x20 = (unsigned long)args->fn_arg;
+
+ if (system_supports_poe())
+ p->thread.por_el0 = POR_EL0_INIT;
}
p->thread.cpu_context.pc = (unsigned long)ret_from_fork;
p->thread.cpu_context.sp = (unsigned long)childregs;
@@ -406,7 +510,7 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
* For the benefit of the unwinder, set up childregs->stackframe
* as the final frame for the new task.
*/
- p->thread.cpu_context.fp = (unsigned long)childregs->stackframe;
+ p->thread.cpu_context.fp = (unsigned long)&childregs->stackframe;
ptrace_hw_copy_thread(p);
@@ -426,7 +530,7 @@ static void tls_thread_switch(struct task_struct *next)
if (is_compat_thread(task_thread_info(next)))
write_sysreg(next->thread.uw.tp_value, tpidrro_el0);
- else if (!arm64_kernel_unmapped_at_el0())
+ else
write_sysreg(0, tpidrro_el0);
write_sysreg(*task_user_tls(next), tpidr_el0);
@@ -471,28 +575,109 @@ static void entry_task_switch(struct task_struct *next)
__this_cpu_write(__entry_task, next);
}
+#ifdef CONFIG_ARM64_GCS
+
+void gcs_preserve_current_state(void)
+{
+ current->thread.gcspr_el0 = read_sysreg_s(SYS_GCSPR_EL0);
+}
+
+static void gcs_thread_switch(struct task_struct *next)
+{
+ if (!system_supports_gcs())
+ return;
+
+ /* GCSPR_EL0 is always readable */
+ gcs_preserve_current_state();
+ write_sysreg_s(next->thread.gcspr_el0, SYS_GCSPR_EL0);
+
+ if (current->thread.gcs_el0_mode != next->thread.gcs_el0_mode)
+ gcs_set_el0_mode(next);
+
+ /*
+ * Ensure that GCS memory effects of the 'prev' thread are
+ * ordered before other memory accesses with release semantics
+ * (or preceded by a DMB) on the current PE. In addition, any
+ * memory accesses with acquire semantics (or succeeded by a
+ * DMB) are ordered before GCS memory effects of the 'next'
+ * thread. This will ensure that the GCS memory effects are
+ * visible to other PEs in case of migration.
+ */
+ if (task_gcs_el0_enabled(current) || task_gcs_el0_enabled(next))
+ gcsb_dsync();
+}
+
+#else
+
+static void gcs_thread_switch(struct task_struct *next)
+{
+}
+
+#endif
+
/*
- * ARM erratum 1418040 handling, affecting the 32bit view of CNTVCT.
- * Ensure access is disabled when switching to a 32bit task, ensure
- * access is enabled when switching to a 64bit task.
+ * Handle sysreg updates for ARM erratum 1418040 which affects the 32bit view of
+ * CNTVCT, various other errata which require trapping all CNTVCT{,_EL0}
+ * accesses and prctl(PR_SET_TSC). Ensure access is disabled iff a workaround is
+ * required or PR_TSC_SIGSEGV is set.
*/
-static void erratum_1418040_thread_switch(struct task_struct *next)
+static void update_cntkctl_el1(struct task_struct *next)
{
- if (!IS_ENABLED(CONFIG_ARM64_ERRATUM_1418040) ||
- !this_cpu_has_cap(ARM64_WORKAROUND_1418040))
- return;
+ struct thread_info *ti = task_thread_info(next);
- if (is_compat_thread(task_thread_info(next)))
+ if (test_ti_thread_flag(ti, TIF_TSC_SIGSEGV) ||
+ has_erratum_handler(read_cntvct_el0) ||
+ (IS_ENABLED(CONFIG_ARM64_ERRATUM_1418040) &&
+ this_cpu_has_cap(ARM64_WORKAROUND_1418040) &&
+ is_compat_thread(ti)))
sysreg_clear_set(cntkctl_el1, ARCH_TIMER_USR_VCT_ACCESS_EN, 0);
else
sysreg_clear_set(cntkctl_el1, 0, ARCH_TIMER_USR_VCT_ACCESS_EN);
}
-static void erratum_1418040_new_exec(void)
+static void cntkctl_thread_switch(struct task_struct *prev,
+ struct task_struct *next)
{
+ if ((read_ti_thread_flags(task_thread_info(prev)) &
+ (_TIF_32BIT | _TIF_TSC_SIGSEGV)) !=
+ (read_ti_thread_flags(task_thread_info(next)) &
+ (_TIF_32BIT | _TIF_TSC_SIGSEGV)))
+ update_cntkctl_el1(next);
+}
+
+static int do_set_tsc_mode(unsigned int val)
+{
+ bool tsc_sigsegv;
+
+ if (val == PR_TSC_SIGSEGV)
+ tsc_sigsegv = true;
+ else if (val == PR_TSC_ENABLE)
+ tsc_sigsegv = false;
+ else
+ return -EINVAL;
+
preempt_disable();
- erratum_1418040_thread_switch(current);
+ update_thread_flag(TIF_TSC_SIGSEGV, tsc_sigsegv);
+ update_cntkctl_el1(current);
preempt_enable();
+
+ return 0;
+}
+
+static void permission_overlay_switch(struct task_struct *next)
+{
+ if (!system_supports_poe())
+ return;
+
+ current->thread.por_el0 = read_sysreg_s(SYS_POR_EL0);
+ if (current->thread.por_el0 != next->thread.por_el0) {
+ write_sysreg_s(next->thread.por_el0, SYS_POR_EL0);
+ /*
+ * No ISB required as we can tolerate spurious Overlay faults -
+ * the fault handler will check again based on the new value
+ * of POR_EL0.
+ */
+ }
}
/*
@@ -528,14 +713,17 @@ struct task_struct *__switch_to(struct task_struct *prev,
contextidr_thread_switch(next);
entry_task_switch(next);
ssbs_thread_switch(next);
- erratum_1418040_thread_switch(next);
+ cntkctl_thread_switch(prev, next);
ptrauth_thread_switch_user(next);
+ permission_overlay_switch(next);
+ gcs_thread_switch(next);
/*
- * Complete any pending TLB or cache maintenance on this CPU in case
- * the thread migrates to a different CPU.
- * This full barrier is also required by the membarrier system
- * call.
+ * Complete any pending TLB or cache maintenance on this CPU in case the
+ * thread migrates to a different CPU. This full barrier is also
+ * required by the membarrier system call. Additionally it makes any
+ * in-progress pgtable writes visible to the table walker; See
+ * emit_pte_barriers().
*/
dsb(ish);
@@ -645,7 +833,7 @@ void arch_setup_new_exec(void)
current->mm->context.flags = mmflags;
ptrauth_thread_init_user();
mte_thread_init_user();
- erratum_1418040_new_exec();
+ do_set_tsc_mode(PR_TSC_ENABLE);
if (task_spec_ssb_noexec(current)) {
arch_prctl_spec_ctrl_set(current, PR_SPEC_STORE_BYPASS,
@@ -667,10 +855,14 @@ long set_tagged_addr_ctrl(struct task_struct *task, unsigned long arg)
if (is_compat_thread(ti))
return -EINVAL;
- if (system_supports_mte())
+ if (system_supports_mte()) {
valid_mask |= PR_MTE_TCF_SYNC | PR_MTE_TCF_ASYNC \
| PR_MTE_TAG_MASK;
+ if (cpus_have_cap(ARM64_MTE_STORE_ONLY))
+ valid_mask |= PR_MTE_STORE_ONLY;
+ }
+
if (arg & ~valid_mask)
return -EINVAL;
@@ -711,7 +903,7 @@ long get_tagged_addr_ctrl(struct task_struct *task)
* disable it for tasks that already opted in to the relaxed ABI.
*/
-static struct ctl_table tagged_addr_sysctl_table[] = {
+static const struct ctl_table tagged_addr_sysctl_table[] = {
{
.procname = "tagged_addr_disabled",
.mode = 0644,
@@ -754,3 +946,26 @@ int arch_elf_adjust_prot(int prot, const struct arch_elf_state *state,
return prot;
}
#endif
+
+int get_tsc_mode(unsigned long adr)
+{
+ unsigned int val;
+
+ if (is_compat_task())
+ return -EINVAL;
+
+ if (test_thread_flag(TIF_TSC_SIGSEGV))
+ val = PR_TSC_SIGSEGV;
+ else
+ val = PR_TSC_ENABLE;
+
+ return put_user(val, (unsigned int __user *)adr);
+}
+
+int set_tsc_mode(unsigned int val)
+{
+ if (is_compat_task())
+ return -EINVAL;
+
+ return do_set_tsc_mode(val);
+}
diff --git a/arch/arm64/kernel/proton-pack.c b/arch/arm64/kernel/proton-pack.c
index baca47bd443c..80a580e019c5 100644
--- a/arch/arm64/kernel/proton-pack.c
+++ b/arch/arm64/kernel/proton-pack.c
@@ -91,12 +91,7 @@ early_param("nospectre_v2", parse_spectre_v2_param);
static bool spectre_v2_mitigations_off(void)
{
- bool ret = __nospectre_v2 || cpu_mitigations_off();
-
- if (ret)
- pr_info_once("spectre-v2 mitigation disabled by command line option\n");
-
- return ret;
+ return __nospectre_v2 || cpu_mitigations_off();
}
static const char *get_bhb_affected_string(enum mitigation_state bhb_state)
@@ -172,7 +167,7 @@ static enum mitigation_state spectre_v2_get_cpu_hw_mitigation_state(void)
return SPECTRE_UNAFFECTED;
/* Alternatively, we have a list of unaffected CPUs */
- if (is_midr_in_range_list(read_cpuid_id(), spectre_v2_safe_list))
+ if (is_midr_in_range_list(spectre_v2_safe_list))
return SPECTRE_UNAFFECTED;
return SPECTRE_VULNERABLE;
@@ -331,7 +326,7 @@ bool has_spectre_v3a(const struct arm64_cpu_capabilities *entry, int scope)
};
WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible());
- return is_midr_in_range_list(read_cpuid_id(), spectre_v3a_unsafe_list);
+ return is_midr_in_range_list(spectre_v3a_unsafe_list);
}
void spectre_v3a_enable_mitigation(const struct arm64_cpu_capabilities *__unused)
@@ -421,13 +416,8 @@ early_param("ssbd", parse_spectre_v4_param);
*/
static bool spectre_v4_mitigations_off(void)
{
- bool ret = cpu_mitigations_off() ||
- __spectre_v4_policy == SPECTRE_V4_POLICY_MITIGATION_DISABLED;
-
- if (ret)
- pr_info_once("spectre-v4 mitigation disabled by command-line option\n");
-
- return ret;
+ return cpu_mitigations_off() ||
+ __spectre_v4_policy == SPECTRE_V4_POLICY_MITIGATION_DISABLED;
}
/* Do we need to toggle the mitigation state on entry to/exit from the kernel? */
@@ -475,7 +465,7 @@ static enum mitigation_state spectre_v4_get_cpu_hw_mitigation_state(void)
{ /* sentinel */ },
};
- if (is_midr_in_range_list(read_cpuid_id(), spectre_v4_safe_list))
+ if (is_midr_in_range_list(spectre_v4_safe_list))
return SPECTRE_UNAFFECTED;
/* CPU features are detected first */
@@ -567,7 +557,7 @@ static enum mitigation_state spectre_v4_enable_hw_mitigation(void)
* Mitigate this with an unconditional speculation barrier, as CPUs
* could mis-speculate branches and bypass a conditional barrier.
*/
- if (IS_ENABLED(CONFIG_ARM64_WORKAROUND_SPECULATIVE_SSBS))
+ if (IS_ENABLED(CONFIG_ARM64_ERRATUM_3194386))
spec_bar();
return SPECTRE_MITIGATED;
@@ -845,52 +835,91 @@ static unsigned long system_bhb_mitigations;
* This must be called with SCOPE_LOCAL_CPU for each type of CPU, before any
* SCOPE_SYSTEM call will give the right answer.
*/
-u8 spectre_bhb_loop_affected(int scope)
+static bool is_spectre_bhb_safe(int scope)
+{
+ static const struct midr_range spectre_bhb_safe_list[] = {
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A35),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A53),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A55),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A510),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A520),
+ MIDR_ALL_VERSIONS(MIDR_BRAHMA_B53),
+ MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_2XX_SILVER),
+ MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_3XX_SILVER),
+ MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_4XX_SILVER),
+ {},
+ };
+ static bool all_safe = true;
+
+ if (scope != SCOPE_LOCAL_CPU)
+ return all_safe;
+
+ if (is_midr_in_range_list(spectre_bhb_safe_list))
+ return true;
+
+ all_safe = false;
+
+ return false;
+}
+
+static u8 spectre_bhb_loop_affected(void)
{
u8 k = 0;
- static u8 max_bhb_k;
-
- if (scope == SCOPE_LOCAL_CPU) {
- static const struct midr_range spectre_bhb_k32_list[] = {
- MIDR_ALL_VERSIONS(MIDR_CORTEX_A78),
- MIDR_ALL_VERSIONS(MIDR_CORTEX_A78AE),
- MIDR_ALL_VERSIONS(MIDR_CORTEX_A78C),
- MIDR_ALL_VERSIONS(MIDR_CORTEX_X1),
- MIDR_ALL_VERSIONS(MIDR_CORTEX_A710),
- MIDR_ALL_VERSIONS(MIDR_CORTEX_X2),
- MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2),
- MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V1),
- {},
- };
- static const struct midr_range spectre_bhb_k24_list[] = {
- MIDR_ALL_VERSIONS(MIDR_CORTEX_A76),
- MIDR_ALL_VERSIONS(MIDR_CORTEX_A77),
- MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N1),
- {},
- };
- static const struct midr_range spectre_bhb_k11_list[] = {
- MIDR_ALL_VERSIONS(MIDR_AMPERE1),
- {},
- };
- static const struct midr_range spectre_bhb_k8_list[] = {
- MIDR_ALL_VERSIONS(MIDR_CORTEX_A72),
- MIDR_ALL_VERSIONS(MIDR_CORTEX_A57),
- {},
- };
-
- if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_k32_list))
- k = 32;
- else if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_k24_list))
- k = 24;
- else if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_k11_list))
- k = 11;
- else if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_k8_list))
- k = 8;
-
- max_bhb_k = max(max_bhb_k, k);
- } else {
- k = max_bhb_k;
- }
+
+ static const struct midr_range spectre_bhb_k132_list[] = {
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_X3),
+ MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V2),
+ {},
+ };
+ static const struct midr_range spectre_bhb_k38_list[] = {
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A715),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A720),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A720AE),
+ {},
+ };
+ static const struct midr_range spectre_bhb_k32_list[] = {
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A78),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A78AE),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A78C),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_X1),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_X1C),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A710),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_X2),
+ MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2),
+ MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V1),
+ {},
+ };
+ static const struct midr_range spectre_bhb_k24_list[] = {
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A76),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A76AE),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A77),
+ MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N1),
+ MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_4XX_GOLD),
+ MIDR_ALL_VERSIONS(MIDR_HISI_HIP09),
+ {},
+ };
+ static const struct midr_range spectre_bhb_k11_list[] = {
+ MIDR_ALL_VERSIONS(MIDR_AMPERE1),
+ {},
+ };
+ static const struct midr_range spectre_bhb_k8_list[] = {
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A72),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A57),
+ {},
+ };
+
+ if (is_midr_in_range_list(spectre_bhb_k132_list))
+ k = 132;
+ else if (is_midr_in_range_list(spectre_bhb_k38_list))
+ k = 38;
+ else if (is_midr_in_range_list(spectre_bhb_k32_list))
+ k = 32;
+ else if (is_midr_in_range_list(spectre_bhb_k24_list))
+ k = 24;
+ else if (is_midr_in_range_list(spectre_bhb_k11_list))
+ k = 11;
+ else if (is_midr_in_range_list(spectre_bhb_k8_list))
+ k = 8;
return k;
}
@@ -916,29 +945,13 @@ static enum mitigation_state spectre_bhb_get_cpu_fw_mitigation_state(void)
}
}
-static bool is_spectre_bhb_fw_affected(int scope)
+static bool has_spectre_bhb_fw_mitigation(void)
{
- static bool system_affected;
enum mitigation_state fw_state;
bool has_smccc = arm_smccc_1_1_get_conduit() != SMCCC_CONDUIT_NONE;
- static const struct midr_range spectre_bhb_firmware_mitigated_list[] = {
- MIDR_ALL_VERSIONS(MIDR_CORTEX_A73),
- MIDR_ALL_VERSIONS(MIDR_CORTEX_A75),
- {},
- };
- bool cpu_in_list = is_midr_in_range_list(read_cpuid_id(),
- spectre_bhb_firmware_mitigated_list);
-
- if (scope != SCOPE_LOCAL_CPU)
- return system_affected;
fw_state = spectre_bhb_get_cpu_fw_mitigation_state();
- if (cpu_in_list || (has_smccc && fw_state == SPECTRE_MITIGATED)) {
- system_affected = true;
- return true;
- }
-
- return false;
+ return has_smccc && fw_state == SPECTRE_MITIGATED;
}
static bool supports_ecbhb(int scope)
@@ -954,6 +967,8 @@ static bool supports_ecbhb(int scope)
ID_AA64MMFR1_EL1_ECBHB_SHIFT);
}
+static u8 max_bhb_k;
+
bool is_spectre_bhb_affected(const struct arm64_cpu_capabilities *entry,
int scope)
{
@@ -962,16 +977,23 @@ bool is_spectre_bhb_affected(const struct arm64_cpu_capabilities *entry,
if (supports_csv2p3(scope))
return false;
- if (supports_clearbhb(scope))
- return true;
+ if (is_spectre_bhb_safe(scope))
+ return false;
- if (spectre_bhb_loop_affected(scope))
- return true;
+ /*
+ * At this point the core isn't known to be "safe" so we're going to
+ * assume it's vulnerable. We still need to update `max_bhb_k` though,
+ * but only if we aren't mitigating with clearbhb though.
+ */
+ if (scope == SCOPE_LOCAL_CPU && !supports_clearbhb(SCOPE_LOCAL_CPU))
+ max_bhb_k = max(max_bhb_k, spectre_bhb_loop_affected());
- if (is_spectre_bhb_fw_affected(scope))
- return true;
+ return true;
+}
- return false;
+u8 get_spectre_bhb_loop_value(void)
+{
+ return max_bhb_k;
}
static void this_cpu_set_vectors(enum arm64_bp_harden_el1_vectors slot)
@@ -991,7 +1013,7 @@ static void this_cpu_set_vectors(enum arm64_bp_harden_el1_vectors slot)
isb();
}
-static bool __read_mostly __nospectre_bhb;
+bool __read_mostly __nospectre_bhb;
static int __init parse_spectre_bhb_param(char *str)
{
__nospectre_bhb = true;
@@ -1002,7 +1024,7 @@ early_param("nospectre_bhb", parse_spectre_bhb_param);
void spectre_bhb_enable_mitigation(const struct arm64_cpu_capabilities *entry)
{
bp_hardening_cb_t cpu_cb;
- enum mitigation_state fw_state, state = SPECTRE_VULNERABLE;
+ enum mitigation_state state = SPECTRE_VULNERABLE;
struct bp_hardening_data *data = this_cpu_ptr(&bp_hardening_data);
if (!is_spectre_bhb_affected(entry, SCOPE_LOCAL_CPU))
@@ -1011,9 +1033,7 @@ void spectre_bhb_enable_mitigation(const struct arm64_cpu_capabilities *entry)
if (arm64_get_spectre_v2_state() == SPECTRE_VULNERABLE) {
/* No point mitigating Spectre-BHB alone. */
} else if (!IS_ENABLED(CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY)) {
- pr_info_once("spectre-bhb mitigation disabled by compile time option\n");
- } else if (cpu_mitigations_off() || __nospectre_bhb) {
- pr_info_once("spectre-bhb mitigation disabled by command line option\n");
+ /* Do nothing */
} else if (supports_ecbhb(SCOPE_LOCAL_CPU)) {
state = SPECTRE_MITIGATED;
set_bit(BHB_HW, &system_bhb_mitigations);
@@ -1028,7 +1048,7 @@ void spectre_bhb_enable_mitigation(const struct arm64_cpu_capabilities *entry)
this_cpu_set_vectors(EL1_VECTOR_BHB_CLEAR_INSN);
state = SPECTRE_MITIGATED;
set_bit(BHB_INSN, &system_bhb_mitigations);
- } else if (spectre_bhb_loop_affected(SCOPE_LOCAL_CPU)) {
+ } else if (spectre_bhb_loop_affected()) {
/*
* Ensure KVM uses the indirect vector which will have the
* branchy-loop added. A57/A72-r0 will already have selected
@@ -1041,37 +1061,39 @@ void spectre_bhb_enable_mitigation(const struct arm64_cpu_capabilities *entry)
this_cpu_set_vectors(EL1_VECTOR_BHB_LOOP);
state = SPECTRE_MITIGATED;
set_bit(BHB_LOOP, &system_bhb_mitigations);
- } else if (is_spectre_bhb_fw_affected(SCOPE_LOCAL_CPU)) {
- fw_state = spectre_bhb_get_cpu_fw_mitigation_state();
- if (fw_state == SPECTRE_MITIGATED) {
- /*
- * Ensure KVM uses one of the spectre bp_hardening
- * vectors. The indirect vector doesn't include the EL3
- * call, so needs upgrading to
- * HYP_VECTOR_SPECTRE_INDIRECT.
- */
- if (!data->slot || data->slot == HYP_VECTOR_INDIRECT)
- data->slot += 1;
-
- this_cpu_set_vectors(EL1_VECTOR_BHB_FW);
-
- /*
- * The WA3 call in the vectors supersedes the WA1 call
- * made during context-switch. Uninstall any firmware
- * bp_hardening callback.
- */
- cpu_cb = spectre_v2_get_sw_mitigation_cb();
- if (__this_cpu_read(bp_hardening_data.fn) != cpu_cb)
- __this_cpu_write(bp_hardening_data.fn, NULL);
-
- state = SPECTRE_MITIGATED;
- set_bit(BHB_FW, &system_bhb_mitigations);
- }
+ } else if (has_spectre_bhb_fw_mitigation()) {
+ /*
+ * Ensure KVM uses one of the spectre bp_hardening
+ * vectors. The indirect vector doesn't include the EL3
+ * call, so needs upgrading to
+ * HYP_VECTOR_SPECTRE_INDIRECT.
+ */
+ if (!data->slot || data->slot == HYP_VECTOR_INDIRECT)
+ data->slot += 1;
+
+ this_cpu_set_vectors(EL1_VECTOR_BHB_FW);
+
+ /*
+ * The WA3 call in the vectors supersedes the WA1 call
+ * made during context-switch. Uninstall any firmware
+ * bp_hardening callback.
+ */
+ cpu_cb = spectre_v2_get_sw_mitigation_cb();
+ if (__this_cpu_read(bp_hardening_data.fn) != cpu_cb)
+ __this_cpu_write(bp_hardening_data.fn, NULL);
+
+ state = SPECTRE_MITIGATED;
+ set_bit(BHB_FW, &system_bhb_mitigations);
}
update_mitigation_state(&spectre_bhb_state, state);
}
+bool is_spectre_bhb_fw_mitigated(void)
+{
+ return test_bit(BHB_FW, &system_bhb_mitigations);
+}
+
/* Patched to NOP when enabled */
void noinstr spectre_bhb_patch_loop_mitigation_enable(struct alt_instr *alt,
__le32 *origptr,
@@ -1100,7 +1122,6 @@ void noinstr spectre_bhb_patch_loop_iter(struct alt_instr *alt,
{
u8 rd;
u32 insn;
- u16 loop_count = spectre_bhb_loop_affected(SCOPE_SYSTEM);
BUG_ON(nr_inst != 1); /* MOV -> MOV */
@@ -1109,7 +1130,7 @@ void noinstr spectre_bhb_patch_loop_iter(struct alt_instr *alt,
insn = le32_to_cpu(*origptr);
rd = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RD, insn);
- insn = aarch64_insn_gen_movewide(rd, loop_count, 0,
+ insn = aarch64_insn_gen_movewide(rd, max_bhb_k, 0,
AARCH64_INSN_VARIANT_64BIT,
AARCH64_INSN_MOVEWIDE_ZERO);
*updptr++ = cpu_to_le32(insn);
@@ -1166,3 +1187,18 @@ void unpriv_ebpf_notify(int new_state)
pr_err("WARNING: %s", EBPF_WARN);
}
#endif
+
+void spectre_print_disabled_mitigations(void)
+{
+ /* Keep a single copy of the common message suffix to avoid duplication. */
+ const char *spectre_disabled_suffix = "mitigation disabled by command-line option\n";
+
+ if (spectre_v2_mitigations_off())
+ pr_info("spectre-v2 %s", spectre_disabled_suffix);
+
+ if (spectre_v4_mitigations_off())
+ pr_info("spectre-v4 %s", spectre_disabled_suffix);
+
+ if (__nospectre_bhb || cpu_mitigations_off())
+ pr_info("spectre-bhb %s", spectre_disabled_suffix);
+}
diff --git a/arch/arm64/kernel/psci.c b/arch/arm64/kernel/psci.c
index 29a8e444db83..fabd732d0a2d 100644
--- a/arch/arm64/kernel/psci.c
+++ b/arch/arm64/kernel/psci.c
@@ -40,7 +40,7 @@ static int cpu_psci_cpu_boot(unsigned int cpu)
{
phys_addr_t pa_secondary_entry = __pa_symbol(secondary_entry);
int err = psci_ops.cpu_on(cpu_logical_map(cpu), pa_secondary_entry);
- if (err)
+ if (err && err != -EPERM)
pr_err("failed to boot CPU%d (%d)\n", cpu, err);
return err;
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index 0d022599eb61..b9bdd83fbbca 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -34,6 +34,7 @@
#include <asm/cpufeature.h>
#include <asm/debug-monitors.h>
#include <asm/fpsimd.h>
+#include <asm/gcs.h>
#include <asm/mte.h>
#include <asm/pointer_auth.h>
#include <asm/stacktrace.h>
@@ -140,7 +141,7 @@ unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, unsigned int n)
addr += n;
if (regs_within_kernel_stack(regs, (unsigned long)addr))
- return *addr;
+ return READ_ONCE_NOCHECK(*addr);
else
return 0;
}
@@ -593,7 +594,7 @@ static int __fpr_get(struct task_struct *target,
{
struct user_fpsimd_state *uregs;
- sve_sync_to_fpsimd(target);
+ fpsimd_sync_from_effective_state(target);
uregs = &target->thread.uw.fpsimd_state;
@@ -625,7 +626,7 @@ static int __fpr_set(struct task_struct *target,
* Ensure target->thread.uw.fpsimd_state is up to date, so that a
* short copyin can't resurrect stale data.
*/
- sve_sync_to_fpsimd(target);
+ fpsimd_sync_from_effective_state(target);
newstate = target->thread.uw.fpsimd_state;
@@ -652,7 +653,7 @@ static int fpr_set(struct task_struct *target, const struct user_regset *regset,
if (ret)
return ret;
- sve_sync_from_fpsimd_zeropad(target);
+ fpsimd_sync_to_effective_state_zeropad(target);
fpsimd_flush_task_state(target);
return ret;
@@ -719,6 +720,8 @@ static int fpmr_set(struct task_struct *target, const struct user_regset *regset
if (!system_supports_fpmr())
return -EINVAL;
+ fpmr = target->thread.uw.fpmr;
+
ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &fpmr, 0, count);
if (ret)
return ret;
@@ -772,6 +775,11 @@ static void sve_init_header_from_task(struct user_sve_header *header,
task_type = ARM64_VEC_SVE;
active = (task_type == type);
+ if (active && target->thread.fp_type == FP_STATE_SVE)
+ header->flags = SVE_PT_REGS_SVE;
+ else
+ header->flags = SVE_PT_REGS_FPSIMD;
+
switch (type) {
case ARM64_VEC_SVE:
if (test_tsk_thread_flag(target, TIF_SVE_VL_INHERIT))
@@ -786,19 +794,14 @@ static void sve_init_header_from_task(struct user_sve_header *header,
return;
}
- if (active) {
- if (target->thread.fp_type == FP_STATE_FPSIMD) {
- header->flags |= SVE_PT_REGS_FPSIMD;
- } else {
- header->flags |= SVE_PT_REGS_SVE;
- }
- }
-
header->vl = task_get_vl(target, type);
vq = sve_vq_from_vl(header->vl);
header->max_vl = vec_max_vl(type);
- header->size = SVE_PT_SIZE(vq, header->flags);
+ if (active)
+ header->size = SVE_PT_SIZE(vq, header->flags);
+ else
+ header->size = sizeof(header);
header->max_size = SVE_PT_SIZE(sve_vq_from_vl(header->max_vl),
SVE_PT_REGS_SVE);
}
@@ -817,18 +820,25 @@ static int sve_get_common(struct task_struct *target,
unsigned int vq;
unsigned long start, end;
+ if (target == current)
+ fpsimd_preserve_current_state();
+
/* Header */
sve_init_header_from_task(&header, target, type);
vq = sve_vq_from_vl(header.vl);
membuf_write(&to, &header, sizeof(header));
- if (target == current)
- fpsimd_preserve_current_state();
-
BUILD_BUG_ON(SVE_PT_FPSIMD_OFFSET != sizeof(header));
BUILD_BUG_ON(SVE_PT_SVE_OFFSET != sizeof(header));
+ /*
+ * When the requested vector type is not active, do not present data
+ * from the other mode to userspace.
+ */
+ if (header.size == sizeof(header))
+ return 0;
+
switch ((header.flags & SVE_PT_REGS_MASK)) {
case SVE_PT_REGS_FPSIMD:
return __fpr_get(target, regset, to);
@@ -856,7 +866,7 @@ static int sve_get_common(struct task_struct *target,
return membuf_zero(&to, end - start);
default:
- return 0;
+ BUILD_BUG();
}
}
@@ -880,6 +890,9 @@ static int sve_set_common(struct task_struct *target,
struct user_sve_header header;
unsigned int vq;
unsigned long start, end;
+ bool fpsimd;
+
+ fpsimd_flush_task_state(target);
/* Header */
if (count < sizeof(header))
@@ -887,97 +900,116 @@ static int sve_set_common(struct task_struct *target,
ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &header,
0, sizeof(header));
if (ret)
- goto out;
+ return ret;
/*
- * Apart from SVE_PT_REGS_MASK, all SVE_PT_* flags are consumed by
- * vec_set_vector_length(), which will also validate them for us:
+ * Streaming SVE data is always stored and presented in SVE format.
+ * Require the user to provide SVE formatted data for consistency, and
+ * to avoid the risk that we configure the task into an invalid state.
*/
- ret = vec_set_vector_length(target, type, header.vl,
- ((unsigned long)header.flags & ~SVE_PT_REGS_MASK) << 16);
- if (ret)
- goto out;
+ fpsimd = (header.flags & SVE_PT_REGS_MASK) == SVE_PT_REGS_FPSIMD;
+ if (fpsimd && type == ARM64_VEC_SME)
+ return -EINVAL;
+
+ /*
+ * On systems without SVE we accept FPSIMD format writes with
+ * a VL of 0 to allow exiting streaming mode, otherwise a VL
+ * is required.
+ */
+ if (header.vl) {
+ /*
+ * If the system does not support SVE we can't
+ * configure a SVE VL.
+ */
+ if (!system_supports_sve() && type == ARM64_VEC_SVE)
+ return -EINVAL;
+
+ /*
+ * Apart from SVE_PT_REGS_MASK, all SVE_PT_* flags are
+ * consumed by vec_set_vector_length(), which will
+ * also validate them for us:
+ */
+ ret = vec_set_vector_length(target, type, header.vl,
+ ((unsigned long)header.flags & ~SVE_PT_REGS_MASK) << 16);
+ if (ret)
+ return ret;
+ } else {
+ /* If the system supports SVE we require a VL. */
+ if (system_supports_sve())
+ return -EINVAL;
- /* Actual VL set may be less than the user asked for: */
+ /*
+ * Only FPSIMD formatted data with no flags set is
+ * supported.
+ */
+ if (header.flags != SVE_PT_REGS_FPSIMD)
+ return -EINVAL;
+ }
+
+ /* Allocate SME storage if necessary, preserving any existing ZA/ZT state */
+ if (type == ARM64_VEC_SME) {
+ sme_alloc(target, false);
+ if (!target->thread.sme_state)
+ return -ENOMEM;
+ }
+
+ /* Allocate SVE storage if necessary, zeroing any existing SVE state */
+ if (!fpsimd) {
+ sve_alloc(target, true);
+ if (!target->thread.sve_state)
+ return -ENOMEM;
+ }
+
+ /*
+ * Actual VL set may be different from what the user asked
+ * for, or we may have configured the _ONEXEC VL not the
+ * current VL:
+ */
vq = sve_vq_from_vl(task_get_vl(target, type));
/* Enter/exit streaming mode */
if (system_supports_sme()) {
- u64 old_svcr = target->thread.svcr;
-
switch (type) {
case ARM64_VEC_SVE:
target->thread.svcr &= ~SVCR_SM_MASK;
+ set_tsk_thread_flag(target, TIF_SVE);
break;
case ARM64_VEC_SME:
target->thread.svcr |= SVCR_SM_MASK;
-
- /*
- * Disable traps and ensure there is SME storage but
- * preserve any currently set values in ZA/ZT.
- */
- sme_alloc(target, false);
set_tsk_thread_flag(target, TIF_SME);
break;
default:
WARN_ON_ONCE(1);
- ret = -EINVAL;
- goto out;
+ return -EINVAL;
}
-
- /*
- * If we switched then invalidate any existing SVE
- * state and ensure there's storage.
- */
- if (target->thread.svcr != old_svcr)
- sve_alloc(target, true);
}
+ /* Always zero V regs, FPSR, and FPCR */
+ memset(&current->thread.uw.fpsimd_state, 0,
+ sizeof(current->thread.uw.fpsimd_state));
+
/* Registers: FPSIMD-only case */
BUILD_BUG_ON(SVE_PT_FPSIMD_OFFSET != sizeof(header));
- if ((header.flags & SVE_PT_REGS_MASK) == SVE_PT_REGS_FPSIMD) {
- ret = __fpr_set(target, regset, pos, count, kbuf, ubuf,
- SVE_PT_FPSIMD_OFFSET);
+ if (fpsimd) {
clear_tsk_thread_flag(target, TIF_SVE);
target->thread.fp_type = FP_STATE_FPSIMD;
- goto out;
+ ret = __fpr_set(target, regset, pos, count, kbuf, ubuf,
+ SVE_PT_FPSIMD_OFFSET);
+ return ret;
}
- /*
- * Otherwise: no registers or full SVE case. For backwards
- * compatibility reasons we treat empty flags as SVE registers.
- */
+ /* Otherwise: no registers or full SVE case. */
+
+ target->thread.fp_type = FP_STATE_SVE;
/*
* If setting a different VL from the requested VL and there is
* register data, the data layout will be wrong: don't even
* try to set the registers in this case.
*/
- if (count && vq != sve_vq_from_vl(header.vl)) {
- ret = -EIO;
- goto out;
- }
-
- sve_alloc(target, true);
- if (!target->thread.sve_state) {
- ret = -ENOMEM;
- clear_tsk_thread_flag(target, TIF_SVE);
- target->thread.fp_type = FP_STATE_FPSIMD;
- goto out;
- }
-
- /*
- * Ensure target->thread.sve_state is up to date with target's
- * FPSIMD regs, so that a short copyin leaves trailing
- * registers unmodified. Only enable SVE if we are
- * configuring normal SVE, a system with streaming SVE may not
- * have normal SVE.
- */
- fpsimd_sync_to_sve(target);
- if (type == ARM64_VEC_SVE)
- set_tsk_thread_flag(target, TIF_SVE);
- target->thread.fp_type = FP_STATE_SVE;
+ if (count && vq != sve_vq_from_vl(header.vl))
+ return -EIO;
BUILD_BUG_ON(SVE_PT_SVE_OFFSET != sizeof(header));
start = SVE_PT_SVE_OFFSET;
@@ -986,7 +1018,7 @@ static int sve_set_common(struct task_struct *target,
target->thread.sve_state,
start, end);
if (ret)
- goto out;
+ return ret;
start = end;
end = SVE_PT_SVE_FPSR_OFFSET(vq);
@@ -1002,8 +1034,6 @@ static int sve_set_common(struct task_struct *target,
&target->thread.uw.fpsimd_state.fpsr,
start, end);
-out:
- fpsimd_flush_task_state(target);
return ret;
}
@@ -1012,7 +1042,7 @@ static int sve_set(struct task_struct *target,
unsigned int pos, unsigned int count,
const void *kbuf, const void __user *ubuf)
{
- if (!system_supports_sve())
+ if (!system_supports_sve() && !system_supports_sme())
return -EINVAL;
return sve_set_common(target, regset, pos, count, kbuf, ubuf,
@@ -1125,7 +1155,11 @@ static int za_set(struct task_struct *target,
if (ret)
goto out;
- /* Actual VL set may be less than the user asked for: */
+ /*
+ * Actual VL set may be different from what the user asked
+ * for, or we may have configured the _ONEXEC rather than
+ * current VL:
+ */
vq = sve_vq_from_vl(task_get_sme_vl(target));
/* Ensure there is some SVE storage for streaming mode */
@@ -1418,7 +1452,7 @@ static int tagged_addr_ctrl_get(struct task_struct *target,
{
long ctrl = get_tagged_addr_ctrl(target);
- if (IS_ERR_VALUE(ctrl))
+ if (WARN_ON_ONCE(IS_ERR_VALUE(ctrl)))
return ctrl;
return membuf_write(&to, &ctrl, sizeof(ctrl));
@@ -1432,6 +1466,10 @@ static int tagged_addr_ctrl_set(struct task_struct *target, const struct
int ret;
long ctrl;
+ ctrl = get_tagged_addr_ctrl(target);
+ if (WARN_ON_ONCE(IS_ERR_VALUE(ctrl)))
+ return ctrl;
+
ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &ctrl, 0, -1);
if (ret)
return ret;
@@ -1440,6 +1478,101 @@ static int tagged_addr_ctrl_set(struct task_struct *target, const struct
}
#endif
+#ifdef CONFIG_ARM64_POE
+static int poe_get(struct task_struct *target,
+ const struct user_regset *regset,
+ struct membuf to)
+{
+ if (!system_supports_poe())
+ return -EINVAL;
+
+ return membuf_write(&to, &target->thread.por_el0,
+ sizeof(target->thread.por_el0));
+}
+
+static int poe_set(struct task_struct *target, const struct
+ user_regset *regset, unsigned int pos,
+ unsigned int count, const void *kbuf, const
+ void __user *ubuf)
+{
+ int ret;
+ long ctrl;
+
+ if (!system_supports_poe())
+ return -EINVAL;
+
+ ctrl = target->thread.por_el0;
+
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &ctrl, 0, -1);
+ if (ret)
+ return ret;
+
+ target->thread.por_el0 = ctrl;
+
+ return 0;
+}
+#endif
+
+#ifdef CONFIG_ARM64_GCS
+static void task_gcs_to_user(struct user_gcs *user_gcs,
+ const struct task_struct *target)
+{
+ user_gcs->features_enabled = target->thread.gcs_el0_mode;
+ user_gcs->features_locked = target->thread.gcs_el0_locked;
+ user_gcs->gcspr_el0 = target->thread.gcspr_el0;
+}
+
+static void task_gcs_from_user(struct task_struct *target,
+ const struct user_gcs *user_gcs)
+{
+ target->thread.gcs_el0_mode = user_gcs->features_enabled;
+ target->thread.gcs_el0_locked = user_gcs->features_locked;
+ target->thread.gcspr_el0 = user_gcs->gcspr_el0;
+}
+
+static int gcs_get(struct task_struct *target,
+ const struct user_regset *regset,
+ struct membuf to)
+{
+ struct user_gcs user_gcs;
+
+ if (!system_supports_gcs())
+ return -EINVAL;
+
+ if (target == current)
+ gcs_preserve_current_state();
+
+ task_gcs_to_user(&user_gcs, target);
+
+ return membuf_write(&to, &user_gcs, sizeof(user_gcs));
+}
+
+static int gcs_set(struct task_struct *target, const struct
+ user_regset *regset, unsigned int pos,
+ unsigned int count, const void *kbuf, const
+ void __user *ubuf)
+{
+ int ret;
+ struct user_gcs user_gcs;
+
+ if (!system_supports_gcs())
+ return -EINVAL;
+
+ task_gcs_to_user(&user_gcs, target);
+
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &user_gcs, 0, -1);
+ if (ret)
+ return ret;
+
+ if (user_gcs.features_enabled & ~PR_SHADOW_STACK_SUPPORTED_STATUS_MASK)
+ return -EINVAL;
+
+ task_gcs_from_user(target, &user_gcs);
+
+ return 0;
+}
+#endif
+
enum aarch64_regset {
REGSET_GPR,
REGSET_FPR,
@@ -1469,11 +1602,17 @@ enum aarch64_regset {
#ifdef CONFIG_ARM64_TAGGED_ADDR_ABI
REGSET_TAGGED_ADDR_CTRL,
#endif
+#ifdef CONFIG_ARM64_POE
+ REGSET_POE,
+#endif
+#ifdef CONFIG_ARM64_GCS
+ REGSET_GCS,
+#endif
};
static const struct user_regset aarch64_regsets[] = {
[REGSET_GPR] = {
- .core_note_type = NT_PRSTATUS,
+ USER_REGSET_NOTE_TYPE(PRSTATUS),
.n = sizeof(struct user_pt_regs) / sizeof(u64),
.size = sizeof(u64),
.align = sizeof(u64),
@@ -1481,7 +1620,7 @@ static const struct user_regset aarch64_regsets[] = {
.set = gpr_set
},
[REGSET_FPR] = {
- .core_note_type = NT_PRFPREG,
+ USER_REGSET_NOTE_TYPE(PRFPREG),
.n = sizeof(struct user_fpsimd_state) / sizeof(u32),
/*
* We pretend we have 32-bit registers because the fpsr and
@@ -1494,7 +1633,7 @@ static const struct user_regset aarch64_regsets[] = {
.set = fpr_set
},
[REGSET_TLS] = {
- .core_note_type = NT_ARM_TLS,
+ USER_REGSET_NOTE_TYPE(ARM_TLS),
.n = 2,
.size = sizeof(void *),
.align = sizeof(void *),
@@ -1503,7 +1642,7 @@ static const struct user_regset aarch64_regsets[] = {
},
#ifdef CONFIG_HAVE_HW_BREAKPOINT
[REGSET_HW_BREAK] = {
- .core_note_type = NT_ARM_HW_BREAK,
+ USER_REGSET_NOTE_TYPE(ARM_HW_BREAK),
.n = sizeof(struct user_hwdebug_state) / sizeof(u32),
.size = sizeof(u32),
.align = sizeof(u32),
@@ -1511,7 +1650,7 @@ static const struct user_regset aarch64_regsets[] = {
.set = hw_break_set,
},
[REGSET_HW_WATCH] = {
- .core_note_type = NT_ARM_HW_WATCH,
+ USER_REGSET_NOTE_TYPE(ARM_HW_WATCH),
.n = sizeof(struct user_hwdebug_state) / sizeof(u32),
.size = sizeof(u32),
.align = sizeof(u32),
@@ -1520,7 +1659,7 @@ static const struct user_regset aarch64_regsets[] = {
},
#endif
[REGSET_SYSTEM_CALL] = {
- .core_note_type = NT_ARM_SYSTEM_CALL,
+ USER_REGSET_NOTE_TYPE(ARM_SYSTEM_CALL),
.n = 1,
.size = sizeof(int),
.align = sizeof(int),
@@ -1528,7 +1667,7 @@ static const struct user_regset aarch64_regsets[] = {
.set = system_call_set,
},
[REGSET_FPMR] = {
- .core_note_type = NT_ARM_FPMR,
+ USER_REGSET_NOTE_TYPE(ARM_FPMR),
.n = 1,
.size = sizeof(u64),
.align = sizeof(u64),
@@ -1537,7 +1676,7 @@ static const struct user_regset aarch64_regsets[] = {
},
#ifdef CONFIG_ARM64_SVE
[REGSET_SVE] = { /* Scalable Vector Extension */
- .core_note_type = NT_ARM_SVE,
+ USER_REGSET_NOTE_TYPE(ARM_SVE),
.n = DIV_ROUND_UP(SVE_PT_SIZE(ARCH_SVE_VQ_MAX,
SVE_PT_REGS_SVE),
SVE_VQ_BYTES),
@@ -1549,7 +1688,7 @@ static const struct user_regset aarch64_regsets[] = {
#endif
#ifdef CONFIG_ARM64_SME
[REGSET_SSVE] = { /* Streaming mode SVE */
- .core_note_type = NT_ARM_SSVE,
+ USER_REGSET_NOTE_TYPE(ARM_SSVE),
.n = DIV_ROUND_UP(SVE_PT_SIZE(SME_VQ_MAX, SVE_PT_REGS_SVE),
SVE_VQ_BYTES),
.size = SVE_VQ_BYTES,
@@ -1558,7 +1697,7 @@ static const struct user_regset aarch64_regsets[] = {
.set = ssve_set,
},
[REGSET_ZA] = { /* SME ZA */
- .core_note_type = NT_ARM_ZA,
+ USER_REGSET_NOTE_TYPE(ARM_ZA),
/*
* ZA is a single register but it's variably sized and
* the ptrace core requires that the size of any data
@@ -1574,7 +1713,7 @@ static const struct user_regset aarch64_regsets[] = {
.set = za_set,
},
[REGSET_ZT] = { /* SME ZT */
- .core_note_type = NT_ARM_ZT,
+ USER_REGSET_NOTE_TYPE(ARM_ZT),
.n = 1,
.size = ZT_SIG_REG_BYTES,
.align = sizeof(u64),
@@ -1584,7 +1723,7 @@ static const struct user_regset aarch64_regsets[] = {
#endif
#ifdef CONFIG_ARM64_PTR_AUTH
[REGSET_PAC_MASK] = {
- .core_note_type = NT_ARM_PAC_MASK,
+ USER_REGSET_NOTE_TYPE(ARM_PAC_MASK),
.n = sizeof(struct user_pac_mask) / sizeof(u64),
.size = sizeof(u64),
.align = sizeof(u64),
@@ -1592,7 +1731,7 @@ static const struct user_regset aarch64_regsets[] = {
/* this cannot be set dynamically */
},
[REGSET_PAC_ENABLED_KEYS] = {
- .core_note_type = NT_ARM_PAC_ENABLED_KEYS,
+ USER_REGSET_NOTE_TYPE(ARM_PAC_ENABLED_KEYS),
.n = 1,
.size = sizeof(long),
.align = sizeof(long),
@@ -1601,7 +1740,7 @@ static const struct user_regset aarch64_regsets[] = {
},
#ifdef CONFIG_CHECKPOINT_RESTORE
[REGSET_PACA_KEYS] = {
- .core_note_type = NT_ARM_PACA_KEYS,
+ USER_REGSET_NOTE_TYPE(ARM_PACA_KEYS),
.n = sizeof(struct user_pac_address_keys) / sizeof(__uint128_t),
.size = sizeof(__uint128_t),
.align = sizeof(__uint128_t),
@@ -1609,7 +1748,7 @@ static const struct user_regset aarch64_regsets[] = {
.set = pac_address_keys_set,
},
[REGSET_PACG_KEYS] = {
- .core_note_type = NT_ARM_PACG_KEYS,
+ USER_REGSET_NOTE_TYPE(ARM_PACG_KEYS),
.n = sizeof(struct user_pac_generic_keys) / sizeof(__uint128_t),
.size = sizeof(__uint128_t),
.align = sizeof(__uint128_t),
@@ -1620,7 +1759,7 @@ static const struct user_regset aarch64_regsets[] = {
#endif
#ifdef CONFIG_ARM64_TAGGED_ADDR_ABI
[REGSET_TAGGED_ADDR_CTRL] = {
- .core_note_type = NT_ARM_TAGGED_ADDR_CTRL,
+ USER_REGSET_NOTE_TYPE(ARM_TAGGED_ADDR_CTRL),
.n = 1,
.size = sizeof(long),
.align = sizeof(long),
@@ -1628,6 +1767,26 @@ static const struct user_regset aarch64_regsets[] = {
.set = tagged_addr_ctrl_set,
},
#endif
+#ifdef CONFIG_ARM64_POE
+ [REGSET_POE] = {
+ USER_REGSET_NOTE_TYPE(ARM_POE),
+ .n = 1,
+ .size = sizeof(long),
+ .align = sizeof(long),
+ .regset_get = poe_get,
+ .set = poe_set,
+ },
+#endif
+#ifdef CONFIG_ARM64_GCS
+ [REGSET_GCS] = {
+ USER_REGSET_NOTE_TYPE(ARM_GCS),
+ .n = sizeof(struct user_gcs) / sizeof(u64),
+ .size = sizeof(u64),
+ .align = sizeof(u64),
+ .regset_get = gcs_get,
+ .set = gcs_set,
+ },
+#endif
};
static const struct user_regset_view user_aarch64_view = {
@@ -1810,7 +1969,7 @@ static int compat_tls_set(struct task_struct *target,
static const struct user_regset aarch32_regsets[] = {
[REGSET_COMPAT_GPR] = {
- .core_note_type = NT_PRSTATUS,
+ USER_REGSET_NOTE_TYPE(PRSTATUS),
.n = COMPAT_ELF_NGREG,
.size = sizeof(compat_elf_greg_t),
.align = sizeof(compat_elf_greg_t),
@@ -1818,7 +1977,7 @@ static const struct user_regset aarch32_regsets[] = {
.set = compat_gpr_set
},
[REGSET_COMPAT_VFP] = {
- .core_note_type = NT_ARM_VFP,
+ USER_REGSET_NOTE_TYPE(ARM_VFP),
.n = VFP_STATE_SIZE / sizeof(compat_ulong_t),
.size = sizeof(compat_ulong_t),
.align = sizeof(compat_ulong_t),
@@ -1835,7 +1994,7 @@ static const struct user_regset_view user_aarch32_view = {
static const struct user_regset aarch32_ptrace_regsets[] = {
[REGSET_GPR] = {
- .core_note_type = NT_PRSTATUS,
+ USER_REGSET_NOTE_TYPE(PRSTATUS),
.n = COMPAT_ELF_NGREG,
.size = sizeof(compat_elf_greg_t),
.align = sizeof(compat_elf_greg_t),
@@ -1843,7 +2002,7 @@ static const struct user_regset aarch32_ptrace_regsets[] = {
.set = compat_gpr_set
},
[REGSET_FPR] = {
- .core_note_type = NT_ARM_VFP,
+ USER_REGSET_NOTE_TYPE(ARM_VFP),
.n = VFP_STATE_SIZE / sizeof(compat_ulong_t),
.size = sizeof(compat_ulong_t),
.align = sizeof(compat_ulong_t),
@@ -1851,7 +2010,7 @@ static const struct user_regset aarch32_ptrace_regsets[] = {
.set = compat_vfp_set
},
[REGSET_TLS] = {
- .core_note_type = NT_ARM_TLS,
+ USER_REGSET_NOTE_TYPE(ARM_TLS),
.n = 1,
.size = sizeof(compat_ulong_t),
.align = sizeof(compat_ulong_t),
@@ -1860,7 +2019,7 @@ static const struct user_regset aarch32_ptrace_regsets[] = {
},
#ifdef CONFIG_HAVE_HW_BREAKPOINT
[REGSET_HW_BREAK] = {
- .core_note_type = NT_ARM_HW_BREAK,
+ USER_REGSET_NOTE_TYPE(ARM_HW_BREAK),
.n = sizeof(struct user_hwdebug_state) / sizeof(u32),
.size = sizeof(u32),
.align = sizeof(u32),
@@ -1868,7 +2027,7 @@ static const struct user_regset aarch32_ptrace_regsets[] = {
.set = hw_break_set,
},
[REGSET_HW_WATCH] = {
- .core_note_type = NT_ARM_HW_WATCH,
+ USER_REGSET_NOTE_TYPE(ARM_HW_WATCH),
.n = sizeof(struct user_hwdebug_state) / sizeof(u32),
.size = sizeof(u32),
.align = sizeof(u32),
@@ -1877,7 +2036,7 @@ static const struct user_regset aarch32_ptrace_regsets[] = {
},
#endif
[REGSET_SYSTEM_CALL] = {
- .core_note_type = NT_ARM_SYSTEM_CALL,
+ USER_REGSET_NOTE_TYPE(ARM_SYSTEM_CALL),
.n = 1,
.size = sizeof(int),
.align = sizeof(int),
diff --git a/arch/arm64/kernel/reloc_test_core.c b/arch/arm64/kernel/reloc_test_core.c
index 99f2ffe9fc05..5b0891146054 100644
--- a/arch/arm64/kernel/reloc_test_core.c
+++ b/arch/arm64/kernel/reloc_test_core.c
@@ -74,4 +74,5 @@ static void __exit reloc_test_exit(void)
module_init(reloc_test_init);
module_exit(reloc_test_exit);
+MODULE_DESCRIPTION("Relocation testing module");
MODULE_LICENSE("GPL v2");
diff --git a/arch/arm64/kernel/rsi.c b/arch/arm64/kernel/rsi.c
new file mode 100644
index 000000000000..c64a06f58c0b
--- /dev/null
+++ b/arch/arm64/kernel/rsi.c
@@ -0,0 +1,175 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2023 ARM Ltd.
+ */
+
+#include <linux/jump_label.h>
+#include <linux/memblock.h>
+#include <linux/psci.h>
+#include <linux/swiotlb.h>
+#include <linux/cc_platform.h>
+#include <linux/platform_device.h>
+
+#include <asm/io.h>
+#include <asm/mem_encrypt.h>
+#include <asm/rsi.h>
+
+static struct realm_config config;
+
+unsigned long prot_ns_shared;
+EXPORT_SYMBOL(prot_ns_shared);
+
+DEFINE_STATIC_KEY_FALSE_RO(rsi_present);
+EXPORT_SYMBOL(rsi_present);
+
+bool cc_platform_has(enum cc_attr attr)
+{
+ switch (attr) {
+ case CC_ATTR_MEM_ENCRYPT:
+ return is_realm_world();
+ default:
+ return false;
+ }
+}
+EXPORT_SYMBOL_GPL(cc_platform_has);
+
+static bool rsi_version_matches(void)
+{
+ unsigned long ver_lower, ver_higher;
+ unsigned long ret = rsi_request_version(RSI_ABI_VERSION,
+ &ver_lower,
+ &ver_higher);
+
+ if (ret == SMCCC_RET_NOT_SUPPORTED)
+ return false;
+
+ if (ret != RSI_SUCCESS) {
+ pr_err("RME: RMM doesn't support RSI version %lu.%lu. Supported range: %lu.%lu-%lu.%lu\n",
+ RSI_ABI_VERSION_MAJOR, RSI_ABI_VERSION_MINOR,
+ RSI_ABI_VERSION_GET_MAJOR(ver_lower),
+ RSI_ABI_VERSION_GET_MINOR(ver_lower),
+ RSI_ABI_VERSION_GET_MAJOR(ver_higher),
+ RSI_ABI_VERSION_GET_MINOR(ver_higher));
+ return false;
+ }
+
+ pr_info("RME: Using RSI version %lu.%lu\n",
+ RSI_ABI_VERSION_GET_MAJOR(ver_lower),
+ RSI_ABI_VERSION_GET_MINOR(ver_lower));
+
+ return true;
+}
+
+static void __init arm64_rsi_setup_memory(void)
+{
+ u64 i;
+ phys_addr_t start, end;
+
+ /*
+ * Iterate over the available memory ranges and convert the state to
+ * protected memory. We should take extra care to ensure that we DO NOT
+ * permit any "DESTROYED" pages to be converted to "RAM".
+ *
+ * panic() is used because if the attempt to switch the memory to
+ * protected has failed here, then future accesses to the memory are
+ * simply going to be reflected as a SEA (Synchronous External Abort)
+ * which we can't handle. Bailing out early prevents the guest limping
+ * on and dying later.
+ */
+ for_each_mem_range(i, &start, &end) {
+ if (rsi_set_memory_range_protected_safe(start, end)) {
+ panic("Failed to set memory range to protected: %pa-%pa",
+ &start, &end);
+ }
+ }
+}
+
+/*
+ * Check if a given PA range is Trusted (e.g., Protected memory, a Trusted Device
+ * mapping, or an MMIO emulated in the Realm world).
+ *
+ * We can rely on the RIPAS value of the region to detect if a given region is
+ * protected.
+ *
+ * RIPAS_DEV - A trusted device memory or a trusted emulated MMIO (in the Realm
+ * world
+ * RIPAS_RAM - Memory (RAM), protected by the RMM guarantees. (e.g., Firmware
+ * reserved regions for data sharing).
+ *
+ * RIPAS_DESTROYED is a special case of one of the above, where the host did
+ * something without our permission and as such we can't do anything about it.
+ *
+ * The only case where something is emulated by the untrusted hypervisor or is
+ * backed by shared memory is indicated by RSI_RIPAS_EMPTY.
+ */
+bool arm64_rsi_is_protected(phys_addr_t base, size_t size)
+{
+ enum ripas ripas;
+ phys_addr_t end, top;
+
+ /* Overflow ? */
+ if (WARN_ON(base + size <= base))
+ return false;
+
+ end = ALIGN(base + size, RSI_GRANULE_SIZE);
+ base = ALIGN_DOWN(base, RSI_GRANULE_SIZE);
+
+ while (base < end) {
+ if (WARN_ON(rsi_ipa_state_get(base, end, &ripas, &top)))
+ break;
+ if (WARN_ON(top <= base))
+ break;
+ if (ripas == RSI_RIPAS_EMPTY)
+ break;
+ base = top;
+ }
+
+ return base >= end;
+}
+EXPORT_SYMBOL(arm64_rsi_is_protected);
+
+static int realm_ioremap_hook(phys_addr_t phys, size_t size, pgprot_t *prot)
+{
+ if (arm64_rsi_is_protected(phys, size))
+ *prot = pgprot_encrypted(*prot);
+ else
+ *prot = pgprot_decrypted(*prot);
+
+ return 0;
+}
+
+void __init arm64_rsi_init(void)
+{
+ if (arm_smccc_1_1_get_conduit() != SMCCC_CONDUIT_SMC)
+ return;
+ if (!rsi_version_matches())
+ return;
+ if (WARN_ON(rsi_get_realm_config(&config)))
+ return;
+ prot_ns_shared = BIT(config.ipa_bits - 1);
+
+ if (arm64_ioremap_prot_hook_register(realm_ioremap_hook))
+ return;
+
+ if (realm_register_memory_enc_ops())
+ return;
+
+ arm64_rsi_setup_memory();
+
+ static_branch_enable(&rsi_present);
+}
+
+static struct platform_device rsi_dev = {
+ .name = RSI_PDEV_NAME,
+ .id = PLATFORM_DEVID_NONE
+};
+
+static int __init arm64_create_dummy_rsi_dev(void)
+{
+ if (is_realm_world() &&
+ platform_device_register(&rsi_dev))
+ pr_err("failed to register rsi platform device\n");
+ return 0;
+}
+
+arch_initcall(arm64_create_dummy_rsi_dev)
diff --git a/arch/arm64/kernel/sdei.c b/arch/arm64/kernel/sdei.c
index 255d12f881c2..778f2a1faac8 100644
--- a/arch/arm64/kernel/sdei.c
+++ b/arch/arm64/kernel/sdei.c
@@ -34,10 +34,8 @@ unsigned long sdei_exit_mode;
DECLARE_PER_CPU(unsigned long *, sdei_stack_normal_ptr);
DECLARE_PER_CPU(unsigned long *, sdei_stack_critical_ptr);
-#ifdef CONFIG_VMAP_STACK
DEFINE_PER_CPU(unsigned long *, sdei_stack_normal_ptr);
DEFINE_PER_CPU(unsigned long *, sdei_stack_critical_ptr);
-#endif
DECLARE_PER_CPU(unsigned long *, sdei_shadow_call_stack_normal_ptr);
DECLARE_PER_CPU(unsigned long *, sdei_shadow_call_stack_critical_ptr);
@@ -65,9 +63,6 @@ static void free_sdei_stacks(void)
{
int cpu;
- if (!IS_ENABLED(CONFIG_VMAP_STACK))
- return;
-
for_each_possible_cpu(cpu) {
_free_sdei_stack(&sdei_stack_normal_ptr, cpu);
_free_sdei_stack(&sdei_stack_critical_ptr, cpu);
@@ -91,9 +86,6 @@ static int init_sdei_stacks(void)
int cpu;
int err = 0;
- if (!IS_ENABLED(CONFIG_VMAP_STACK))
- return 0;
-
for_each_possible_cpu(cpu) {
err = _init_sdei_stack(&sdei_stack_normal_ptr, cpu);
if (err)
@@ -206,7 +198,7 @@ out_err:
/*
* do_sdei_event() returns one of:
* SDEI_EV_HANDLED - success, return to the interrupted context.
- * SDEI_EV_FAILED - failure, return this error code to firmare.
+ * SDEI_EV_FAILED - failure, return this error code to firmware.
* virtual-address - success, return to this address.
*/
unsigned long __kprobes do_sdei_event(struct pt_regs *regs,
@@ -247,7 +239,7 @@ unsigned long __kprobes do_sdei_event(struct pt_regs *regs,
* If we interrupted the kernel with interrupts masked, we always go
* back to wherever we came from.
*/
- if (mode == kernel_mode && !interrupts_enabled(regs))
+ if (mode == kernel_mode && regs_irqs_disabled(regs))
return SDEI_EV_HANDLED;
/*
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index a096e2451044..23c05dc7a8f2 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -43,6 +43,7 @@
#include <asm/cpu_ops.h>
#include <asm/kasan.h>
#include <asm/numa.h>
+#include <asm/rsi.h>
#include <asm/scs.h>
#include <asm/sections.h>
#include <asm/setup.h>
@@ -168,19 +169,23 @@ static void __init smp_build_mpidr_hash(void)
static void __init setup_machine_fdt(phys_addr_t dt_phys)
{
- int size;
+ int size = 0;
void *dt_virt = fixmap_remap_fdt(dt_phys, &size, PAGE_KERNEL);
const char *name;
if (dt_virt)
memblock_reserve(dt_phys, size);
- if (!dt_virt || !early_init_dt_scan(dt_virt)) {
+ /*
+ * dt_virt is a fixmap address, hence __pa(dt_virt) can't be used.
+ * Pass dt_phys directly.
+ */
+ if (!early_init_dt_scan(dt_virt, dt_phys)) {
pr_crit("\n"
- "Error: invalid device tree blob at physical address %pa (virtual address 0x%px)\n"
- "The dtb must be 8-byte aligned and must not exceed 2 MB in size\n"
- "\nPlease check your bootloader.",
- &dt_phys, dt_virt);
+ "Error: invalid device tree blob: PA=%pa, VA=%px, size=%d bytes\n"
+ "The dtb must be 8-byte aligned and must not exceed 2 MB in size.\n"
+ "\nPlease check your bootloader.\n",
+ &dt_phys, dt_virt, size);
/*
* Note that in this _really_ early stage we cannot even BUG()
@@ -209,7 +214,7 @@ static void __init request_standard_resources(void)
unsigned long i = 0;
size_t res_size;
- kernel_code.start = __pa_symbol(_stext);
+ kernel_code.start = __pa_symbol(_text);
kernel_code.end = __pa_symbol(__init_begin - 1);
kernel_data.start = __pa_symbol(_sdata);
kernel_data.end = __pa_symbol(_end - 1);
@@ -218,9 +223,7 @@ static void __init request_standard_resources(void)
num_standard_resources = memblock.memory.cnt;
res_size = num_standard_resources * sizeof(*standard_resources);
- standard_resources = memblock_alloc(res_size, SMP_CACHE_BYTES);
- if (!standard_resources)
- panic("%s: Failed to allocate %zu bytes\n", __func__, res_size);
+ standard_resources = memblock_alloc_or_panic(res_size, SMP_CACHE_BYTES);
for_each_mem_region(region) {
res = &standard_resources[i++];
@@ -277,7 +280,7 @@ u64 cpu_logical_map(unsigned int cpu)
void __init __no_sanitize_address setup_arch(char **cmdline_p)
{
- setup_initial_init_mm(_stext, _etext, _edata, _end);
+ setup_initial_init_mm(_text, _etext, _edata, _end);
*cmdline_p = boot_command_line;
@@ -351,13 +354,12 @@ void __init __no_sanitize_address setup_arch(char **cmdline_p)
else
psci_acpi_init();
+ arm64_rsi_init();
+
init_bootcpu_ops();
smp_init_cpus();
smp_build_mpidr_hash();
- /* Init percpu seeds for random tags after cpus are set up. */
- kasan_init_sw_tags();
-
#ifdef CONFIG_ARM64_SW_TTBR0_PAN
/*
* Make sure init_thread_info.ttbr0 always generates translation
diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
index 4a77f4976e11..1110eeb21f57 100644
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -9,6 +9,7 @@
#include <linux/cache.h>
#include <linux/compat.h>
#include <linux/errno.h>
+#include <linux/irq-entry-common.h>
#include <linux/kernel.h>
#include <linux/signal.h>
#include <linux/freezer.h>
@@ -19,12 +20,14 @@
#include <linux/ratelimit.h>
#include <linux/rseq.h>
#include <linux/syscalls.h>
+#include <linux/pkeys.h>
#include <asm/daifflags.h>
#include <asm/debug-monitors.h>
#include <asm/elf.h>
#include <asm/exception.h>
#include <asm/cacheflush.h>
+#include <asm/gcs.h>
#include <asm/ucontext.h>
#include <asm/unistd.h>
#include <asm/fpsimd.h>
@@ -34,6 +37,8 @@
#include <asm/traps.h>
#include <asm/vdso.h>
+#define GCS_SIGNAL_CAP(addr) (((unsigned long)addr) & GCS_CAP_ADDR_MASK)
+
/*
* Do a signal return; undo the signal stack. These are aligned to 128-bit.
*/
@@ -42,11 +47,6 @@ struct rt_sigframe {
struct ucontext uc;
};
-struct frame_record {
- u64 fp;
- u64 lr;
-};
-
struct rt_sigframe_user_layout {
struct rt_sigframe __user *sigframe;
struct frame_record __user *next_frame;
@@ -56,19 +56,76 @@ struct rt_sigframe_user_layout {
unsigned long fpsimd_offset;
unsigned long esr_offset;
+ unsigned long gcs_offset;
unsigned long sve_offset;
unsigned long tpidr2_offset;
unsigned long za_offset;
unsigned long zt_offset;
unsigned long fpmr_offset;
+ unsigned long poe_offset;
unsigned long extra_offset;
unsigned long end_offset;
};
-#define BASE_SIGFRAME_SIZE round_up(sizeof(struct rt_sigframe), 16)
+/*
+ * Holds any EL0-controlled state that influences unprivileged memory accesses.
+ * This includes both accesses done in userspace and uaccess done in the kernel.
+ *
+ * This state needs to be carefully managed to ensure that it doesn't cause
+ * uaccess to fail when setting up the signal frame, and the signal handler
+ * itself also expects a well-defined state when entered.
+ */
+struct user_access_state {
+ u64 por_el0;
+};
+
#define TERMINATOR_SIZE round_up(sizeof(struct _aarch64_ctx), 16)
#define EXTRA_CONTEXT_SIZE round_up(sizeof(struct extra_context), 16)
+/*
+ * Save the user access state into ua_state and reset it to disable any
+ * restrictions.
+ */
+static void save_reset_user_access_state(struct user_access_state *ua_state)
+{
+ if (system_supports_poe()) {
+ u64 por_enable_all = 0;
+
+ for (int pkey = 0; pkey < arch_max_pkey(); pkey++)
+ por_enable_all |= POR_ELx_PERM_PREP(pkey, POE_RWX);
+
+ ua_state->por_el0 = read_sysreg_s(SYS_POR_EL0);
+ write_sysreg_s(por_enable_all, SYS_POR_EL0);
+ /*
+ * No ISB required as we can tolerate spurious Overlay faults -
+ * the fault handler will check again based on the new value
+ * of POR_EL0.
+ */
+ }
+}
+
+/*
+ * Set the user access state for invoking the signal handler.
+ *
+ * No uaccess should be done after that function is called.
+ */
+static void set_handler_user_access_state(void)
+{
+ if (system_supports_poe())
+ write_sysreg_s(POR_EL0_INIT, SYS_POR_EL0);
+}
+
+/*
+ * Restore the user access state to the values saved in ua_state.
+ *
+ * No uaccess should be done after that function is called.
+ */
+static void restore_user_access_state(const struct user_access_state *ua_state)
+{
+ if (system_supports_poe())
+ write_sysreg_s(ua_state->por_el0, SYS_POR_EL0);
+}
+
static void init_user_layout(struct rt_sigframe_user_layout *user)
{
const size_t reserved_size =
@@ -185,6 +242,10 @@ struct user_ctxs {
u32 zt_size;
struct fpmr_context __user *fpmr;
u32 fpmr_size;
+ struct poe_context __user *poe;
+ u32 poe_size;
+ struct gcs_context __user *gcs;
+ u32 gcs_size;
};
static int preserve_fpsimd_context(struct fpsimd_context __user *ctx)
@@ -193,6 +254,8 @@ static int preserve_fpsimd_context(struct fpsimd_context __user *ctx)
&current->thread.uw.fpsimd_state;
int err;
+ fpsimd_sync_from_effective_state(current);
+
/* copy the FP and status/control registers */
err = __copy_to_user(ctx->vregs, fpsimd->vregs, sizeof(fpsimd->vregs));
__put_user_error(fpsimd->fpsr, &ctx->fpsr, err);
@@ -205,37 +268,46 @@ static int preserve_fpsimd_context(struct fpsimd_context __user *ctx)
return err ? -EFAULT : 0;
}
-static int restore_fpsimd_context(struct user_ctxs *user)
+static int read_fpsimd_context(struct user_fpsimd_state *fpsimd,
+ struct user_ctxs *user)
{
- struct user_fpsimd_state fpsimd;
- int err = 0;
+ int err;
/* check the size information */
if (user->fpsimd_size != sizeof(struct fpsimd_context))
return -EINVAL;
/* copy the FP and status/control registers */
- err = __copy_from_user(fpsimd.vregs, &(user->fpsimd->vregs),
- sizeof(fpsimd.vregs));
- __get_user_error(fpsimd.fpsr, &(user->fpsimd->fpsr), err);
- __get_user_error(fpsimd.fpcr, &(user->fpsimd->fpcr), err);
+ err = __copy_from_user(fpsimd->vregs, &(user->fpsimd->vregs),
+ sizeof(fpsimd->vregs));
+ __get_user_error(fpsimd->fpsr, &(user->fpsimd->fpsr), err);
+ __get_user_error(fpsimd->fpcr, &(user->fpsimd->fpcr), err);
+
+ return err ? -EFAULT : 0;
+}
+
+static int restore_fpsimd_context(struct user_ctxs *user)
+{
+ struct user_fpsimd_state fpsimd;
+ int err;
+
+ err = read_fpsimd_context(&fpsimd, user);
+ if (err)
+ return err;
clear_thread_flag(TIF_SVE);
+ current->thread.svcr &= ~SVCR_SM_MASK;
current->thread.fp_type = FP_STATE_FPSIMD;
/* load the hardware registers from the fpsimd_state structure */
- if (!err)
- fpsimd_update_current_state(&fpsimd);
-
- return err ? -EFAULT : 0;
+ fpsimd_update_current_state(&fpsimd);
+ return 0;
}
static int preserve_fpmr_context(struct fpmr_context __user *ctx)
{
int err = 0;
- current->thread.uw.fpmr = read_sysreg_s(SYS_FPMR);
-
__put_user_error(FPMR_MAGIC, &ctx->head.magic, err);
__put_user_error(sizeof(*ctx), &ctx->head.size, err);
__put_user_error(current->thread.uw.fpmr, &ctx->fpmr, err);
@@ -253,7 +325,35 @@ static int restore_fpmr_context(struct user_ctxs *user)
__get_user_error(fpmr, &user->fpmr->fpmr, err);
if (!err)
- write_sysreg_s(fpmr, SYS_FPMR);
+ current->thread.uw.fpmr = fpmr;
+
+ return err;
+}
+
+static int preserve_poe_context(struct poe_context __user *ctx,
+ const struct user_access_state *ua_state)
+{
+ int err = 0;
+
+ __put_user_error(POE_MAGIC, &ctx->head.magic, err);
+ __put_user_error(sizeof(*ctx), &ctx->head.size, err);
+ __put_user_error(ua_state->por_el0, &ctx->por_el0, err);
+
+ return err;
+}
+
+static int restore_poe_context(struct user_ctxs *user,
+ struct user_access_state *ua_state)
+{
+ u64 por_el0;
+ int err = 0;
+
+ if (user->poe_size != sizeof(*user->poe))
+ return -EINVAL;
+
+ __get_user_error(por_el0, &(user->poe->por_el0), err);
+ if (!err)
+ ua_state->por_el0 = por_el0;
return err;
}
@@ -287,11 +387,6 @@ static int preserve_sve_context(struct sve_context __user *ctx)
err |= __copy_to_user(&ctx->__reserved, reserved, sizeof(reserved));
if (vq) {
- /*
- * This assumes that the SVE state has already been saved to
- * the task struct by calling the function
- * fpsimd_signal_preserve_current_state().
- */
err |= __copy_to_user((char __user *)ctx + SVE_SIG_REGS_OFFSET,
current->thread.sve_state,
SVE_SIG_REGS_SIZE(vq));
@@ -306,6 +401,7 @@ static int restore_sve_fpsimd_context(struct user_ctxs *user)
unsigned int vl, vq;
struct user_fpsimd_state fpsimd;
u16 user_vl, flags;
+ bool sm;
if (user->sve_size < sizeof(*user->sve))
return -EINVAL;
@@ -315,7 +411,8 @@ static int restore_sve_fpsimd_context(struct user_ctxs *user)
if (err)
return err;
- if (flags & SVE_SIG_FLAG_SM) {
+ sm = flags & SVE_SIG_FLAG_SM;
+ if (sm) {
if (!system_supports_sme())
return -EINVAL;
@@ -335,28 +432,23 @@ static int restore_sve_fpsimd_context(struct user_ctxs *user)
if (user_vl != vl)
return -EINVAL;
- if (user->sve_size == sizeof(*user->sve)) {
- clear_thread_flag(TIF_SVE);
- current->thread.svcr &= ~SVCR_SM_MASK;
- current->thread.fp_type = FP_STATE_FPSIMD;
- goto fpsimd_only;
- }
+ /*
+ * Non-streaming SVE state may be preserved without an SVE payload, in
+ * which case the SVE context only has a header with VL==0, and all
+ * state can be restored from the FPSIMD context.
+ *
+ * Streaming SVE state is always preserved with an SVE payload. For
+ * consistency and robustness, reject restoring streaming SVE state
+ * without an SVE payload.
+ */
+ if (!sm && user->sve_size == sizeof(*user->sve))
+ return restore_fpsimd_context(user);
vq = sve_vq_from_vl(vl);
if (user->sve_size < SVE_SIG_CONTEXT_SIZE(vq))
return -EINVAL;
- /*
- * Careful: we are about __copy_from_user() directly into
- * thread.sve_state with preemption enabled, so protection is
- * needed to prevent a racing context switch from writing stale
- * registers back over the new data.
- */
-
- fpsimd_flush_task_state(current);
- /* From now, fpsimd_thread_switch() won't touch thread.sve_state */
-
sve_alloc(current, true);
if (!current->thread.sve_state) {
clear_thread_flag(TIF_SVE);
@@ -376,19 +468,14 @@ static int restore_sve_fpsimd_context(struct user_ctxs *user)
set_thread_flag(TIF_SVE);
current->thread.fp_type = FP_STATE_SVE;
-fpsimd_only:
- /* copy the FP and status/control registers */
- /* restore_sigframe() already checked that user->fpsimd != NULL. */
- err = __copy_from_user(fpsimd.vregs, user->fpsimd->vregs,
- sizeof(fpsimd.vregs));
- __get_user_error(fpsimd.fpsr, &user->fpsimd->fpsr, err);
- __get_user_error(fpsimd.fpcr, &user->fpsimd->fpcr, err);
+ err = read_fpsimd_context(&fpsimd, user);
+ if (err)
+ return err;
- /* load the hardware registers from the fpsimd_state structure */
- if (!err)
- fpsimd_update_current_state(&fpsimd);
+ /* Merge the FPSIMD registers into the SVE state */
+ fpsimd_update_current_state(&fpsimd);
- return err ? -EFAULT : 0;
+ return 0;
}
#else /* ! CONFIG_ARM64_SVE */
@@ -408,13 +495,12 @@ extern int preserve_sve_context(void __user *ctx);
static int preserve_tpidr2_context(struct tpidr2_context __user *ctx)
{
+ u64 tpidr2_el0 = read_sysreg_s(SYS_TPIDR2_EL0);
int err = 0;
- current->thread.tpidr2_el0 = read_sysreg_s(SYS_TPIDR2_EL0);
-
__put_user_error(TPIDR2_MAGIC, &ctx->head.magic, err);
__put_user_error(sizeof(*ctx), &ctx->head.size, err);
- __put_user_error(current->thread.tpidr2_el0, &ctx->tpidr2, err);
+ __put_user_error(tpidr2_el0, &ctx->tpidr2, err);
return err;
}
@@ -456,11 +542,6 @@ static int preserve_za_context(struct za_context __user *ctx)
err |= __copy_to_user(&ctx->__reserved, reserved, sizeof(reserved));
if (vq) {
- /*
- * This assumes that the ZA state has already been saved to
- * the task struct by calling the function
- * fpsimd_signal_preserve_current_state().
- */
err |= __copy_to_user((char __user *)ctx + ZA_SIG_REGS_OFFSET,
current->thread.sme_state,
ZA_SIG_REGS_SIZE(vq));
@@ -495,16 +576,6 @@ static int restore_za_context(struct user_ctxs *user)
if (user->za_size < ZA_SIG_CONTEXT_SIZE(vq))
return -EINVAL;
- /*
- * Careful: we are about __copy_from_user() directly into
- * thread.sme_state with preemption enabled, so protection is
- * needed to prevent a racing context switch from writing stale
- * registers back over the new data.
- */
-
- fpsimd_flush_task_state(current);
- /* From now, fpsimd_thread_switch() won't touch thread.sve_state */
-
sme_alloc(current, true);
if (!current->thread.sme_state) {
current->thread.svcr &= ~SVCR_ZA_MASK;
@@ -542,11 +613,6 @@ static int preserve_zt_context(struct zt_context __user *ctx)
BUILD_BUG_ON(sizeof(ctx->__reserved) != sizeof(reserved));
err |= __copy_to_user(&ctx->__reserved, reserved, sizeof(reserved));
- /*
- * This assumes that the ZT state has already been saved to
- * the task struct by calling the function
- * fpsimd_signal_preserve_current_state().
- */
err |= __copy_to_user((char __user *)ctx + ZT_SIG_REGS_OFFSET,
thread_zt_state(&current->thread),
ZT_SIG_REGS_SIZE(1));
@@ -572,16 +638,6 @@ static int restore_zt_context(struct user_ctxs *user)
if (nregs != 1)
return -EINVAL;
- /*
- * Careful: we are about __copy_from_user() directly into
- * thread.zt_state with preemption enabled, so protection is
- * needed to prevent a racing context switch from writing stale
- * registers back over the new data.
- */
-
- fpsimd_flush_task_state(current);
- /* From now, fpsimd_thread_switch() won't touch ZT in thread state */
-
err = __copy_from_user(thread_zt_state(&current->thread),
(char __user const *)user->zt +
ZT_SIG_REGS_OFFSET,
@@ -604,6 +660,82 @@ extern int restore_zt_context(struct user_ctxs *user);
#endif /* ! CONFIG_ARM64_SME */
+#ifdef CONFIG_ARM64_GCS
+
+static int preserve_gcs_context(struct gcs_context __user *ctx)
+{
+ int err = 0;
+ u64 gcspr = read_sysreg_s(SYS_GCSPR_EL0);
+
+ /*
+ * If GCS is enabled we will add a cap token to the frame,
+ * include it in the GCSPR_EL0 we report to support stack
+ * switching via sigreturn if GCS is enabled. We do not allow
+ * enabling via sigreturn so the token is only relevant for
+ * threads with GCS enabled.
+ */
+ if (task_gcs_el0_enabled(current))
+ gcspr -= 8;
+
+ __put_user_error(GCS_MAGIC, &ctx->head.magic, err);
+ __put_user_error(sizeof(*ctx), &ctx->head.size, err);
+ __put_user_error(gcspr, &ctx->gcspr, err);
+ __put_user_error(0, &ctx->reserved, err);
+ __put_user_error(current->thread.gcs_el0_mode,
+ &ctx->features_enabled, err);
+
+ return err;
+}
+
+static int restore_gcs_context(struct user_ctxs *user)
+{
+ u64 gcspr, enabled;
+ int err = 0;
+
+ if (user->gcs_size != sizeof(*user->gcs))
+ return -EINVAL;
+
+ __get_user_error(gcspr, &user->gcs->gcspr, err);
+ __get_user_error(enabled, &user->gcs->features_enabled, err);
+ if (err)
+ return err;
+
+ /* Don't allow unknown modes */
+ if (enabled & ~PR_SHADOW_STACK_SUPPORTED_STATUS_MASK)
+ return -EINVAL;
+
+ err = gcs_check_locked(current, enabled);
+ if (err != 0)
+ return err;
+
+ /* Don't allow enabling */
+ if (!task_gcs_el0_enabled(current) &&
+ (enabled & PR_SHADOW_STACK_ENABLE))
+ return -EINVAL;
+
+ /* If we are disabling disable everything */
+ if (!(enabled & PR_SHADOW_STACK_ENABLE))
+ enabled = 0;
+
+ current->thread.gcs_el0_mode = enabled;
+
+ /*
+ * We let userspace set GCSPR_EL0 to anything here, we will
+ * validate later in gcs_restore_signal().
+ */
+ write_sysreg_s(gcspr, SYS_GCSPR_EL0);
+
+ return 0;
+}
+
+#else /* ! CONFIG_ARM64_GCS */
+
+/* Turn any non-optimised out attempts to use these into a link error: */
+extern int preserve_gcs_context(void __user *ctx);
+extern int restore_gcs_context(struct user_ctxs *user);
+
+#endif /* ! CONFIG_ARM64_GCS */
+
static int parse_user_sigframe(struct user_ctxs *user,
struct rt_sigframe __user *sf)
{
@@ -621,6 +753,8 @@ static int parse_user_sigframe(struct user_ctxs *user,
user->za = NULL;
user->zt = NULL;
user->fpmr = NULL;
+ user->poe = NULL;
+ user->gcs = NULL;
if (!IS_ALIGNED((unsigned long)base, 16))
goto invalid;
@@ -671,6 +805,17 @@ static int parse_user_sigframe(struct user_ctxs *user,
/* ignore */
break;
+ case POE_MAGIC:
+ if (!system_supports_poe())
+ goto invalid;
+
+ if (user->poe)
+ goto invalid;
+
+ user->poe = (struct poe_context __user *)head;
+ user->poe_size = size;
+ break;
+
case SVE_MAGIC:
if (!system_supports_sve() && !system_supports_sme())
goto invalid;
@@ -726,6 +871,17 @@ static int parse_user_sigframe(struct user_ctxs *user,
user->fpmr_size = size;
break;
+ case GCS_MAGIC:
+ if (!system_supports_gcs())
+ goto invalid;
+
+ if (user->gcs)
+ goto invalid;
+
+ user->gcs = (struct gcs_context __user *)head;
+ user->gcs_size = size;
+ break;
+
case EXTRA_MAGIC:
if (have_extra_context)
goto invalid;
@@ -809,7 +965,8 @@ invalid:
}
static int restore_sigframe(struct pt_regs *regs,
- struct rt_sigframe __user *sf)
+ struct rt_sigframe __user *sf,
+ struct user_access_state *ua_state)
{
sigset_t set;
int i, err;
@@ -831,6 +988,8 @@ static int restore_sigframe(struct pt_regs *regs,
*/
forget_syscall(regs);
+ fpsimd_save_and_flush_current_state();
+
err |= !valid_user_regs(&regs->user_regs, current);
if (err == 0)
err = parse_user_sigframe(&user, sf);
@@ -845,6 +1004,9 @@ static int restore_sigframe(struct pt_regs *regs,
err = restore_fpsimd_context(&user);
}
+ if (err == 0 && system_supports_gcs() && user.gcs)
+ err = restore_gcs_context(&user);
+
if (err == 0 && system_supports_tpidr2() && user.tpidr2)
err = restore_tpidr2_context(&user);
@@ -857,13 +1019,69 @@ static int restore_sigframe(struct pt_regs *regs,
if (err == 0 && system_supports_sme2() && user.zt)
err = restore_zt_context(&user);
+ if (err == 0 && system_supports_poe() && user.poe)
+ err = restore_poe_context(&user, ua_state);
+
return err;
}
+#ifdef CONFIG_ARM64_GCS
+static int gcs_restore_signal(void)
+{
+ u64 gcspr_el0, cap;
+ int ret;
+
+ if (!system_supports_gcs())
+ return 0;
+
+ if (!(current->thread.gcs_el0_mode & PR_SHADOW_STACK_ENABLE))
+ return 0;
+
+ gcspr_el0 = read_sysreg_s(SYS_GCSPR_EL0);
+
+ /*
+ * Ensure that any changes to the GCS done via GCS operations
+ * are visible to the normal reads we do to validate the
+ * token.
+ */
+ gcsb_dsync();
+
+ /*
+ * GCSPR_EL0 should be pointing at a capped GCS, read the cap.
+ * We don't enforce that this is in a GCS page, if it is not
+ * then faults will be generated on GCS operations - the main
+ * concern is to protect GCS pages.
+ */
+ ret = copy_from_user(&cap, (unsigned long __user *)gcspr_el0,
+ sizeof(cap));
+ if (ret)
+ return -EFAULT;
+
+ /*
+ * Check that the cap is the actual GCS before replacing it.
+ */
+ if (cap != GCS_SIGNAL_CAP(gcspr_el0))
+ return -EINVAL;
+
+ /* Invalidate the token to prevent reuse */
+ put_user_gcs(0, (unsigned long __user *)gcspr_el0, &ret);
+ if (ret != 0)
+ return -EFAULT;
+
+ write_sysreg_s(gcspr_el0 + 8, SYS_GCSPR_EL0);
+
+ return 0;
+}
+
+#else
+static int gcs_restore_signal(void) { return 0; }
+#endif
+
SYSCALL_DEFINE0(rt_sigreturn)
{
struct pt_regs *regs = current_pt_regs();
struct rt_sigframe __user *frame;
+ struct user_access_state ua_state;
/* Always make any pending restarted system calls return -EINTR */
current->restart_block.fn = do_no_restart_syscall;
@@ -880,12 +1098,17 @@ SYSCALL_DEFINE0(rt_sigreturn)
if (!access_ok(frame, sizeof (*frame)))
goto badframe;
- if (restore_sigframe(regs, frame))
+ if (restore_sigframe(regs, frame, &ua_state))
+ goto badframe;
+
+ if (gcs_restore_signal())
goto badframe;
if (restore_altstack(&frame->uc.uc_stack))
goto badframe;
+ restore_user_access_state(&ua_state);
+
return regs->regs[0];
badframe:
@@ -920,6 +1143,15 @@ static int setup_sigframe_layout(struct rt_sigframe_user_layout *user,
return err;
}
+#ifdef CONFIG_ARM64_GCS
+ if (system_supports_gcs() && (add_all || current->thread.gcspr_el0)) {
+ err = sigframe_alloc(user, &user->gcs_offset,
+ sizeof(struct gcs_context));
+ if (err)
+ return err;
+ }
+#endif
+
if (system_supports_sve() || system_supports_sme()) {
unsigned int vq = 0;
@@ -980,11 +1212,19 @@ static int setup_sigframe_layout(struct rt_sigframe_user_layout *user,
return err;
}
+ if (system_supports_poe()) {
+ err = sigframe_alloc(user, &user->poe_offset,
+ sizeof(struct poe_context));
+ if (err)
+ return err;
+ }
+
return sigframe_alloc_end(user);
}
static int setup_sigframe(struct rt_sigframe_user_layout *user,
- struct pt_regs *regs, sigset_t *set)
+ struct pt_regs *regs, sigset_t *set,
+ const struct user_access_state *ua_state)
{
int i, err = 0;
struct rt_sigframe __user *sf = user->sigframe;
@@ -1020,6 +1260,12 @@ static int setup_sigframe(struct rt_sigframe_user_layout *user,
__put_user_error(current->thread.fault_code, &esr_ctx->esr, err);
}
+ if (system_supports_gcs() && err == 0 && user->gcs_offset) {
+ struct gcs_context __user *gcs_ctx =
+ apply_user_offset(user, user->gcs_offset);
+ err |= preserve_gcs_context(gcs_ctx);
+ }
+
/* Scalable Vector Extension state (including streaming), if present */
if ((system_supports_sve() || system_supports_sme()) &&
err == 0 && user->sve_offset) {
@@ -1042,6 +1288,13 @@ static int setup_sigframe(struct rt_sigframe_user_layout *user,
err |= preserve_fpmr_context(fpmr_ctx);
}
+ if (system_supports_poe() && err == 0) {
+ struct poe_context __user *poe_ctx =
+ apply_user_offset(user, user->poe_offset);
+
+ err |= preserve_poe_context(poe_ctx, ua_state);
+ }
+
/* ZA state if present */
if (system_supports_sme() && err == 0 && user->za_offset) {
struct za_context __user *za_ctx =
@@ -1130,15 +1383,81 @@ static int get_sigframe(struct rt_sigframe_user_layout *user,
return 0;
}
-static void setup_return(struct pt_regs *regs, struct k_sigaction *ka,
+#ifdef CONFIG_ARM64_GCS
+
+static int gcs_signal_entry(__sigrestore_t sigtramp, struct ksignal *ksig)
+{
+ u64 gcspr_el0;
+ int ret = 0;
+
+ if (!system_supports_gcs())
+ return 0;
+
+ if (!task_gcs_el0_enabled(current))
+ return 0;
+
+ /*
+ * We are entering a signal handler, current register state is
+ * active.
+ */
+ gcspr_el0 = read_sysreg_s(SYS_GCSPR_EL0);
+
+ /*
+ * Push a cap and the GCS entry for the trampoline onto the GCS.
+ */
+ put_user_gcs((unsigned long)sigtramp,
+ (unsigned long __user *)(gcspr_el0 - 16), &ret);
+ put_user_gcs(GCS_SIGNAL_CAP(gcspr_el0 - 8),
+ (unsigned long __user *)(gcspr_el0 - 8), &ret);
+ if (ret != 0)
+ return ret;
+
+ gcspr_el0 -= 16;
+ write_sysreg_s(gcspr_el0, SYS_GCSPR_EL0);
+
+ return 0;
+}
+#else
+
+static int gcs_signal_entry(__sigrestore_t sigtramp, struct ksignal *ksig)
+{
+ return 0;
+}
+
+#endif
+
+static int setup_return(struct pt_regs *regs, struct ksignal *ksig,
struct rt_sigframe_user_layout *user, int usig)
{
__sigrestore_t sigtramp;
+ int err;
+
+ if (ksig->ka.sa.sa_flags & SA_RESTORER)
+ sigtramp = ksig->ka.sa.sa_restorer;
+ else
+ sigtramp = VDSO_SYMBOL(current->mm->context.vdso, sigtramp);
+
+ err = gcs_signal_entry(sigtramp, ksig);
+ if (err)
+ return err;
+
+ /*
+ * We must not fail from this point onwards. We are going to update
+ * registers, including SP, in order to invoke the signal handler. If
+ * we failed and attempted to deliver a nested SIGSEGV to a handler
+ * after that point, the subsequent sigreturn would end up restoring
+ * the (partial) state for the original signal handler.
+ */
regs->regs[0] = usig;
+ if (ksig->ka.sa.sa_flags & SA_SIGINFO) {
+ regs->regs[1] = (unsigned long)&user->sigframe->info;
+ regs->regs[2] = (unsigned long)&user->sigframe->uc;
+ }
regs->sp = (unsigned long)user->sigframe;
regs->regs[29] = (unsigned long)&user->next_frame->fp;
- regs->pc = (unsigned long)ka->sa.sa_handler;
+ regs->regs[30] = (unsigned long)sigtramp;
+ regs->pc = (unsigned long)ksig->ka.sa.sa_handler;
/*
* Signal delivery is a (wacky) indirect function call in
@@ -1161,29 +1480,12 @@ static void setup_return(struct pt_regs *regs, struct k_sigaction *ka,
/* Signal handlers are invoked with ZA and streaming mode disabled */
if (system_supports_sme()) {
- /*
- * If we were in streaming mode the saved register
- * state was SVE but we will exit SM and use the
- * FPSIMD register state - flush the saved FPSIMD
- * register state in case it gets loaded.
- */
- if (current->thread.svcr & SVCR_SM_MASK) {
- memset(&current->thread.uw.fpsimd_state, 0,
- sizeof(current->thread.uw.fpsimd_state));
- current->thread.fp_type = FP_STATE_FPSIMD;
- }
-
- current->thread.svcr &= ~(SVCR_ZA_MASK |
- SVCR_SM_MASK);
- sme_smstop();
+ task_smstop_sm(current);
+ current->thread.svcr &= ~SVCR_ZA_MASK;
+ write_sysreg_s(0, SYS_TPIDR2_EL0);
}
- if (ka->sa.sa_flags & SA_RESTORER)
- sigtramp = ka->sa.sa_restorer;
- else
- sigtramp = VDSO_SYMBOL(current->mm->context.vdso, sigtramp);
-
- regs->regs[30] = (unsigned long)sigtramp;
+ return 0;
}
static int setup_rt_frame(int usig, struct ksignal *ksig, sigset_t *set,
@@ -1191,28 +1493,37 @@ static int setup_rt_frame(int usig, struct ksignal *ksig, sigset_t *set,
{
struct rt_sigframe_user_layout user;
struct rt_sigframe __user *frame;
+ struct user_access_state ua_state;
int err = 0;
- fpsimd_signal_preserve_current_state();
+ fpsimd_save_and_flush_current_state();
if (get_sigframe(&user, ksig, regs))
return 1;
+ save_reset_user_access_state(&ua_state);
frame = user.sigframe;
__put_user_error(0, &frame->uc.uc_flags, err);
__put_user_error(NULL, &frame->uc.uc_link, err);
err |= __save_altstack(&frame->uc.uc_stack, regs->sp);
- err |= setup_sigframe(&user, regs, set);
- if (err == 0) {
- setup_return(regs, &ksig->ka, &user, usig);
- if (ksig->ka.sa.sa_flags & SA_SIGINFO) {
- err |= copy_siginfo_to_user(&frame->info, &ksig->info);
- regs->regs[1] = (unsigned long)&frame->info;
- regs->regs[2] = (unsigned long)&frame->uc;
- }
- }
+ err |= setup_sigframe(&user, regs, set, &ua_state);
+ if (ksig->ka.sa.sa_flags & SA_SIGINFO)
+ err |= copy_siginfo_to_user(&frame->info, &ksig->info);
+
+ if (err == 0)
+ err = setup_return(regs, ksig, &user, usig);
+
+ /*
+ * We must not fail if setup_return() succeeded - see comment at the
+ * beginning of setup_return().
+ */
+
+ if (err == 0)
+ set_handler_user_access_state();
+ else
+ restore_user_access_state(&ua_state);
return err;
}
@@ -1266,7 +1577,7 @@ static void handle_signal(struct ksignal *ksig, struct pt_regs *regs)
* the kernel can handle, and then we build all the user-level signal handling
* stack-frames in one go after that.
*/
-void do_signal(struct pt_regs *regs)
+void arch_do_signal_or_restart(struct pt_regs *regs)
{
unsigned long continue_addr = 0, restart_addr = 0;
int retval = 0;
diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c
index bbd542704730..bb3b526ff43f 100644
--- a/arch/arm64/kernel/signal32.c
+++ b/arch/arm64/kernel/signal32.c
@@ -17,7 +17,7 @@
#include <asm/signal32.h>
#include <asm/traps.h>
#include <linux/uaccess.h>
-#include <asm/unistd.h>
+#include <asm/unistd_compat_32.h>
#include <asm/vdso.h>
struct compat_vfp_sigframe {
@@ -103,7 +103,7 @@ static int compat_preserve_vfp_context(struct compat_vfp_sigframe __user *frame)
* Note that this also saves V16-31, which aren't visible
* in AArch32.
*/
- fpsimd_signal_preserve_current_state();
+ fpsimd_save_and_flush_current_state();
/* Place structure header on the stack */
__put_user_error(magic, &frame->magic, err);
@@ -169,14 +169,17 @@ static int compat_restore_vfp_context(struct compat_vfp_sigframe __user *frame)
fpsimd.fpsr = fpscr & VFP_FPSCR_STAT_MASK;
fpsimd.fpcr = fpscr & VFP_FPSCR_CTRL_MASK;
+ if (err)
+ return -EFAULT;
+
/*
* We don't need to touch the exception register, so
* reload the hardware state.
*/
- if (!err)
- fpsimd_update_current_state(&fpsimd);
+ fpsimd_save_and_flush_current_state();
+ current->thread.uw.fpsimd_state = fpsimd;
- return err ? -EFAULT : 0;
+ return 0;
}
static int compat_restore_sigframe(struct pt_regs *regs,
@@ -451,7 +454,7 @@ int compat_setup_frame(int usig, struct ksignal *ksig, sigset_t *set,
void compat_setup_restart_syscall(struct pt_regs *regs)
{
- regs->regs[7] = __NR_compat_restart_syscall;
+ regs->regs[7] = __NR_compat32_restart_syscall;
}
/*
diff --git a/arch/arm64/kernel/sigreturn32.S b/arch/arm64/kernel/sigreturn32.S
index ccbd4aab4ba4..6f486b95b413 100644
--- a/arch/arm64/kernel/sigreturn32.S
+++ b/arch/arm64/kernel/sigreturn32.S
@@ -13,7 +13,7 @@
* need two 16-bit instructions.
*/
-#include <asm/unistd.h>
+#include <asm/unistd_compat_32.h>
.section .rodata
.globl __aarch32_sigret_code_start
@@ -22,26 +22,26 @@ __aarch32_sigret_code_start:
/*
* ARM Code
*/
- .byte __NR_compat_sigreturn, 0x70, 0xa0, 0xe3 // mov r7, #__NR_compat_sigreturn
- .byte __NR_compat_sigreturn, 0x00, 0x00, 0xef // svc #__NR_compat_sigreturn
+ .byte __NR_compat32_sigreturn, 0x70, 0xa0, 0xe3 // mov r7, #__NR_compat32_sigreturn
+ .byte __NR_compat32_sigreturn, 0x00, 0x00, 0xef // svc #__NR_compat32_sigreturn
/*
* Thumb code
*/
- .byte __NR_compat_sigreturn, 0x27 // svc #__NR_compat_sigreturn
- .byte __NR_compat_sigreturn, 0xdf // mov r7, #__NR_compat_sigreturn
+ .byte __NR_compat32_sigreturn, 0x27 // svc #__NR_compat32_sigreturn
+ .byte __NR_compat32_sigreturn, 0xdf // mov r7, #__NR_compat32_sigreturn
/*
* ARM code
*/
- .byte __NR_compat_rt_sigreturn, 0x70, 0xa0, 0xe3 // mov r7, #__NR_compat_rt_sigreturn
- .byte __NR_compat_rt_sigreturn, 0x00, 0x00, 0xef // svc #__NR_compat_rt_sigreturn
+ .byte __NR_compat32_rt_sigreturn, 0x70, 0xa0, 0xe3 // mov r7, #__NR_compat32_rt_sigreturn
+ .byte __NR_compat32_rt_sigreturn, 0x00, 0x00, 0xef // svc #__NR_compat32_rt_sigreturn
/*
* Thumb code
*/
- .byte __NR_compat_rt_sigreturn, 0x27 // svc #__NR_compat_rt_sigreturn
- .byte __NR_compat_rt_sigreturn, 0xdf // mov r7, #__NR_compat_rt_sigreturn
+ .byte __NR_compat32_rt_sigreturn, 0x27 // svc #__NR_compat32_rt_sigreturn
+ .byte __NR_compat32_rt_sigreturn, 0xdf // mov r7, #__NR_compat32_rt_sigreturn
.globl __aarch32_sigret_code_end
__aarch32_sigret_code_end:
diff --git a/arch/arm64/kernel/smccc-call.S b/arch/arm64/kernel/smccc-call.S
index 487381164ff6..2def9d0dd3dd 100644
--- a/arch/arm64/kernel/smccc-call.S
+++ b/arch/arm64/kernel/smccc-call.S
@@ -7,48 +7,19 @@
#include <asm/asm-offsets.h>
#include <asm/assembler.h>
-#include <asm/thread_info.h>
-
-/*
- * If we have SMCCC v1.3 and (as is likely) no SVE state in
- * the registers then set the SMCCC hint bit to say there's no
- * need to preserve it. Do this by directly adjusting the SMCCC
- * function value which is already stored in x0 ready to be called.
- */
-SYM_FUNC_START(__arm_smccc_sve_check)
-
- ldr_l x16, smccc_has_sve_hint
- cbz x16, 2f
-
- get_current_task x16
- ldr x16, [x16, #TSK_TI_FLAGS]
- tbnz x16, #TIF_FOREIGN_FPSTATE, 1f // Any live FP state?
- tbnz x16, #TIF_SVE, 2f // Does that state include SVE?
-
-1: orr x0, x0, ARM_SMCCC_1_3_SVE_HINT
-
-2: ret
-SYM_FUNC_END(__arm_smccc_sve_check)
-EXPORT_SYMBOL(__arm_smccc_sve_check)
.macro SMCCC instr
- stp x29, x30, [sp, #-16]!
- mov x29, sp
-alternative_if ARM64_SVE
- bl __arm_smccc_sve_check
-alternative_else_nop_endif
\instr #0
- ldr x4, [sp, #16]
+ ldr x4, [sp]
stp x0, x1, [x4, #ARM_SMCCC_RES_X0_OFFS]
stp x2, x3, [x4, #ARM_SMCCC_RES_X2_OFFS]
- ldr x4, [sp, #24]
+ ldr x4, [sp, #8]
cbz x4, 1f /* no quirk structure */
ldr x9, [x4, #ARM_SMCCC_QUIRK_ID_OFFS]
cmp x9, #ARM_SMCCC_QUIRK_QCOM_A6
b.ne 1f
str x6, [x4, ARM_SMCCC_QUIRK_STATE_OFFS]
-1: ldp x29, x30, [sp], #16
- ret
+1: ret
.endm
/*
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 31c8b3094dd7..1aa324104afb 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -55,9 +55,6 @@
#include <trace/events/ipi.h>
-DEFINE_PER_CPU_READ_MOSTLY(int, cpu_number);
-EXPORT_PER_CPU_SYMBOL(cpu_number);
-
/*
* as from 2.5, kernels no longer have an init_tasks structure
* so we need some other way of telling a new secondary core
@@ -67,26 +64,20 @@ struct secondary_data secondary_data;
/* Number of CPUs which aren't online, but looping in kernel text. */
static int cpus_stuck_in_kernel;
-enum ipi_msg_type {
- IPI_RESCHEDULE,
- IPI_CALL_FUNC,
- IPI_CPU_STOP,
- IPI_CPU_CRASH_STOP,
- IPI_TIMER,
- IPI_IRQ_WORK,
- NR_IPI,
- /*
- * Any enum >= NR_IPI and < MAX_IPI is special and not tracable
- * with trace_ipi_*
- */
- IPI_CPU_BACKTRACE = NR_IPI,
- IPI_KGDB_ROUNDUP,
- MAX_IPI
-};
-
static int ipi_irq_base __ro_after_init;
static int nr_ipi __ro_after_init = NR_IPI;
-static struct irq_desc *ipi_desc[MAX_IPI] __ro_after_init;
+
+struct ipi_descs {
+ struct irq_desc *descs[MAX_IPI];
+};
+
+static DEFINE_PER_CPU_READ_MOSTLY(struct ipi_descs, pcpu_ipi_desc);
+
+#define get_ipi_desc(__cpu, __ipi) (per_cpu_ptr(&pcpu_ipi_desc, __cpu)->descs[__ipi])
+
+static bool percpu_ipi_descs __ro_after_init;
+
+static bool crash_stop;
static void ipi_setup(int cpu);
@@ -132,7 +123,8 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle)
/* Now bring the CPU into our world */
ret = boot_secondary(cpu, idle);
if (ret) {
- pr_err("CPU%u: failed to boot: %d\n", cpu, ret);
+ if (ret != -EPERM)
+ pr_err("CPU%u: failed to boot: %d\n", cpu, ret);
return ret;
}
@@ -358,7 +350,7 @@ void arch_cpuhp_cleanup_dead_cpu(unsigned int cpu)
/*
* Now that the dying CPU is beyond the point of no return w.r.t.
- * in-kernel synchronisation, try to get the firwmare to help us to
+ * in-kernel synchronisation, try to get the firmware to help us to
* verify that it has really left the kernel before we consider
* clobbering anything it might still be using.
*/
@@ -469,6 +461,8 @@ void __init smp_prepare_boot_cpu(void)
init_gic_priority_masking();
kasan_init_hw_tags();
+ /* Init percpu seeds for random tags after cpus are set up. */
+ kasan_init_sw_tags();
}
/*
@@ -510,6 +504,59 @@ static int __init smp_cpu_setup(int cpu)
static bool bootcpu_valid __initdata;
static unsigned int cpu_count = 1;
+int arch_register_cpu(int cpu)
+{
+ acpi_handle acpi_handle = acpi_get_processor_handle(cpu);
+ struct cpu *c = &per_cpu(cpu_devices, cpu);
+
+ if (!acpi_disabled && !acpi_handle &&
+ IS_ENABLED(CONFIG_ACPI_HOTPLUG_CPU))
+ return -EPROBE_DEFER;
+
+#ifdef CONFIG_ACPI_HOTPLUG_CPU
+ /* For now block anything that looks like physical CPU Hotplug */
+ if (invalid_logical_cpuid(cpu) || !cpu_present(cpu)) {
+ pr_err_once("Changing CPU present bit is not supported\n");
+ return -ENODEV;
+ }
+#endif
+
+ /*
+ * Availability of the acpi handle is sufficient to establish
+ * that _STA has already been checked. No need to recheck here.
+ */
+ c->hotpluggable = arch_cpu_is_hotpluggable(cpu);
+
+ return register_cpu(c, cpu);
+}
+
+#ifdef CONFIG_ACPI_HOTPLUG_CPU
+void arch_unregister_cpu(int cpu)
+{
+ acpi_handle acpi_handle = acpi_get_processor_handle(cpu);
+ struct cpu *c = &per_cpu(cpu_devices, cpu);
+ acpi_status status;
+ unsigned long long sta;
+
+ if (!acpi_handle) {
+ pr_err_once("Removing a CPU without associated ACPI handle\n");
+ return;
+ }
+
+ status = acpi_evaluate_integer(acpi_handle, "_STA", NULL, &sta);
+ if (ACPI_FAILURE(status))
+ return;
+
+ /* For now do not allow anything that looks like physical CPU HP */
+ if (cpu_present(cpu) && !(sta & ACPI_STA_DEVICE_PRESENT)) {
+ pr_err_once("Changing CPU present bit is not supported\n");
+ return;
+ }
+
+ unregister_cpu(c);
+}
+#endif /* CONFIG_ACPI_HOTPLUG_CPU */
+
#ifdef CONFIG_ACPI
static struct acpi_madt_generic_interrupt cpu_madt_gicc[NR_CPUS];
@@ -530,7 +577,8 @@ acpi_map_gic_cpu_interface(struct acpi_madt_generic_interrupt *processor)
{
u64 hwid = processor->arm_mpidr;
- if (!acpi_gicc_is_usable(processor)) {
+ if (!(processor->flags &
+ (ACPI_MADT_ENABLED | ACPI_MADT_GICC_ONLINE_CAPABLE))) {
pr_debug("skipping disabled CPU entry with 0x%llx MPIDR\n", hwid);
return;
}
@@ -749,8 +797,6 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
*/
for_each_possible_cpu(cpu) {
- per_cpu(cpu_number, cpu) = cpu;
-
if (cpu == smp_processor_id())
continue;
@@ -767,13 +813,15 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
}
}
-static const char *ipi_types[NR_IPI] __tracepoint_string = {
+static const char *ipi_types[MAX_IPI] __tracepoint_string = {
[IPI_RESCHEDULE] = "Rescheduling interrupts",
[IPI_CALL_FUNC] = "Function call interrupts",
[IPI_CPU_STOP] = "CPU stop interrupts",
- [IPI_CPU_CRASH_STOP] = "CPU stop (for crash dump) interrupts",
+ [IPI_CPU_STOP_NMI] = "CPU stop NMIs",
[IPI_TIMER] = "Timer broadcast interrupts",
[IPI_IRQ_WORK] = "IRQ work interrupts",
+ [IPI_CPU_BACKTRACE] = "CPU backtrace interrupts",
+ [IPI_KGDB_ROUNDUP] = "KGDB roundup interrupts",
};
static void smp_cross_call(const struct cpumask *target, unsigned int ipinr);
@@ -784,11 +832,11 @@ int arch_show_interrupts(struct seq_file *p, int prec)
{
unsigned int cpu, i;
- for (i = 0; i < NR_IPI; i++) {
+ for (i = 0; i < MAX_IPI; i++) {
seq_printf(p, "%*s%u:%s", prec - 1, "IPI", i,
prec >= 4 ? " " : "");
for_each_online_cpu(cpu)
- seq_printf(p, "%10u ", irq_desc_kstat_cpu(ipi_desc[i], cpu));
+ seq_printf(p, "%10u ", irq_desc_kstat_cpu(get_ipi_desc(cpu, i), cpu));
seq_printf(p, " %s\n", ipi_types[i]);
}
@@ -813,9 +861,9 @@ void arch_irq_work_raise(void)
}
#endif
-static void __noreturn local_cpu_stop(void)
+static void __noreturn local_cpu_stop(unsigned int cpu)
{
- set_cpu_online(smp_processor_id(), false);
+ set_cpu_online(cpu, false);
local_daif_mask();
sdei_mask_local_cpu();
@@ -829,21 +877,26 @@ static void __noreturn local_cpu_stop(void)
*/
void __noreturn panic_smp_self_stop(void)
{
- local_cpu_stop();
+ local_cpu_stop(smp_processor_id());
}
-#ifdef CONFIG_KEXEC_CORE
-static atomic_t waiting_for_crash_ipi = ATOMIC_INIT(0);
-#endif
-
static void __noreturn ipi_cpu_crash_stop(unsigned int cpu, struct pt_regs *regs)
{
#ifdef CONFIG_KEXEC_CORE
+ /*
+ * Use local_daif_mask() instead of local_irq_disable() to make sure
+ * that pseudo-NMIs are disabled. The "crash stop" code starts with
+ * an IRQ and falls back to NMI (which might be pseudo). If the IRQ
+ * finally goes through right as we're timing out then the NMI could
+ * interrupt us. It's better to prevent the NMI and let the IRQ
+ * finish since the pt_regs will be better.
+ */
+ local_daif_mask();
+
crash_save_cpu(regs, cpu);
- atomic_dec(&waiting_for_crash_ipi);
+ set_cpu_online(cpu, false);
- local_irq_disable();
sdei_mask_local_cpu();
if (IS_ENABLED(CONFIG_HOTPLUG_CPU))
@@ -856,9 +909,20 @@ static void __noreturn ipi_cpu_crash_stop(unsigned int cpu, struct pt_regs *regs
#endif
}
+static void arm64_send_ipi(const cpumask_t *mask, unsigned int nr)
+{
+ unsigned int cpu;
+
+ if (!percpu_ipi_descs)
+ __ipi_send_mask(get_ipi_desc(0, nr), mask);
+ else
+ for_each_cpu(cpu, mask)
+ __ipi_send_single(get_ipi_desc(cpu, nr), cpu);
+}
+
static void arm64_backtrace_ipi(cpumask_t *mask)
{
- __ipi_send_mask(ipi_desc[IPI_CPU_BACKTRACE], mask);
+ arm64_send_ipi(mask, IPI_CPU_BACKTRACE);
}
void arch_trigger_cpumask_backtrace(const cpumask_t *mask, int exclude_cpu)
@@ -883,7 +947,7 @@ void kgdb_roundup_cpus(void)
if (cpu == this_cpu)
continue;
- __ipi_send_single(ipi_desc[IPI_KGDB_ROUNDUP], cpu);
+ __ipi_send_single(get_ipi_desc(cpu, IPI_KGDB_ROUNDUP), cpu);
}
}
#endif
@@ -908,14 +972,12 @@ static void do_handle_IPI(int ipinr)
break;
case IPI_CPU_STOP:
- local_cpu_stop();
- break;
-
- case IPI_CPU_CRASH_STOP:
- if (IS_ENABLED(CONFIG_KEXEC_CORE)) {
+ case IPI_CPU_STOP_NMI:
+ if (IS_ENABLED(CONFIG_KEXEC_CORE) && crash_stop) {
ipi_cpu_crash_stop(cpu, get_irq_regs());
-
unreachable();
+ } else {
+ local_cpu_stop(cpu);
}
break;
@@ -954,14 +1016,16 @@ static void do_handle_IPI(int ipinr)
static irqreturn_t ipi_handler(int irq, void *data)
{
- do_handle_IPI(irq - ipi_irq_base);
+ unsigned int ipi = (irq - ipi_irq_base) % nr_ipi;
+
+ do_handle_IPI(ipi);
return IRQ_HANDLED;
}
static void smp_cross_call(const struct cpumask *target, unsigned int ipinr)
{
trace_ipi_raise(target, ipi_types[ipinr]);
- __ipi_send_mask(ipi_desc[ipinr], target);
+ arm64_send_ipi(target, ipinr);
}
static bool ipi_should_be_nmi(enum ipi_msg_type ipi)
@@ -970,8 +1034,7 @@ static bool ipi_should_be_nmi(enum ipi_msg_type ipi)
return false;
switch (ipi) {
- case IPI_CPU_STOP:
- case IPI_CPU_CRASH_STOP:
+ case IPI_CPU_STOP_NMI:
case IPI_CPU_BACKTRACE:
case IPI_KGDB_ROUNDUP:
return true;
@@ -988,11 +1051,15 @@ static void ipi_setup(int cpu)
return;
for (i = 0; i < nr_ipi; i++) {
- if (ipi_should_be_nmi(i)) {
- prepare_percpu_nmi(ipi_irq_base + i);
- enable_percpu_nmi(ipi_irq_base + i, 0);
+ if (!percpu_ipi_descs) {
+ if (ipi_should_be_nmi(i)) {
+ prepare_percpu_nmi(ipi_irq_base + i);
+ enable_percpu_nmi(ipi_irq_base + i, 0);
+ } else {
+ enable_percpu_irq(ipi_irq_base + i, 0);
+ }
} else {
- enable_percpu_irq(ipi_irq_base + i, 0);
+ enable_irq(irq_desc_get_irq(get_ipi_desc(cpu, i)));
}
}
}
@@ -1006,44 +1073,77 @@ static void ipi_teardown(int cpu)
return;
for (i = 0; i < nr_ipi; i++) {
- if (ipi_should_be_nmi(i)) {
- disable_percpu_nmi(ipi_irq_base + i);
- teardown_percpu_nmi(ipi_irq_base + i);
+ if (!percpu_ipi_descs) {
+ if (ipi_should_be_nmi(i)) {
+ disable_percpu_nmi(ipi_irq_base + i);
+ teardown_percpu_nmi(ipi_irq_base + i);
+ } else {
+ disable_percpu_irq(ipi_irq_base + i);
+ }
} else {
- disable_percpu_irq(ipi_irq_base + i);
+ disable_irq(irq_desc_get_irq(get_ipi_desc(cpu, i)));
}
}
}
#endif
-void __init set_smp_ipi_range(int ipi_base, int n)
+static void ipi_setup_sgi(int ipi)
{
- int i;
+ int err, irq, cpu;
- WARN_ON(n < MAX_IPI);
- nr_ipi = min(n, MAX_IPI);
+ irq = ipi_irq_base + ipi;
- for (i = 0; i < nr_ipi; i++) {
- int err;
+ if (ipi_should_be_nmi(ipi)) {
+ err = request_percpu_nmi(irq, ipi_handler, "IPI", NULL, &irq_stat);
+ WARN(err, "Could not request IRQ %d as NMI, err=%d\n", irq, err);
+ } else {
+ err = request_percpu_irq(irq, ipi_handler, "IPI", &irq_stat);
+ WARN(err, "Could not request IRQ %d as IRQ, err=%d\n", irq, err);
+ }
- if (ipi_should_be_nmi(i)) {
- err = request_percpu_nmi(ipi_base + i, ipi_handler,
- "IPI", &cpu_number);
- WARN(err, "Could not request IPI %d as NMI, err=%d\n",
- i, err);
- } else {
- err = request_percpu_irq(ipi_base + i, ipi_handler,
- "IPI", &cpu_number);
- WARN(err, "Could not request IPI %d as IRQ, err=%d\n",
- i, err);
- }
+ for_each_possible_cpu(cpu)
+ get_ipi_desc(cpu, ipi) = irq_to_desc(irq);
+
+ irq_set_status_flags(irq, IRQ_HIDDEN);
+}
+
+static void ipi_setup_lpi(int ipi, int ncpus)
+{
+ for (int cpu = 0; cpu < ncpus; cpu++) {
+ int err, irq;
+
+ irq = ipi_irq_base + (cpu * nr_ipi) + ipi;
- ipi_desc[i] = irq_to_desc(ipi_base + i);
- irq_set_status_flags(ipi_base + i, IRQ_HIDDEN);
+ err = irq_force_affinity(irq, cpumask_of(cpu));
+ WARN(err, "Could not force affinity IRQ %d, err=%d\n", irq, err);
+
+ err = request_irq(irq, ipi_handler, IRQF_NO_AUTOEN, "IPI",
+ NULL);
+ WARN(err, "Could not request IRQ %d, err=%d\n", irq, err);
+
+ irq_set_status_flags(irq, (IRQ_HIDDEN | IRQ_NO_BALANCING_MASK));
+
+ get_ipi_desc(cpu, ipi) = irq_to_desc(irq);
}
+}
+
+void __init set_smp_ipi_range_percpu(int ipi_base, int n, int ncpus)
+{
+ int i;
+
+ WARN_ON(n < MAX_IPI);
+ nr_ipi = min(n, MAX_IPI);
+ percpu_ipi_descs = !!ncpus;
ipi_irq_base = ipi_base;
+ for (i = 0; i < nr_ipi; i++) {
+ if (!percpu_ipi_descs)
+ ipi_setup_sgi(i);
+ else
+ ipi_setup_lpi(i, ncpus);
+ }
+
/* Setup the boot CPU immediately */
ipi_setup(smp_processor_id());
}
@@ -1084,79 +1184,109 @@ static inline unsigned int num_other_online_cpus(void)
void smp_send_stop(void)
{
+ static unsigned long stop_in_progress;
+ static cpumask_t mask;
unsigned long timeout;
- if (num_other_online_cpus()) {
- cpumask_t mask;
+ /*
+ * If this cpu is the only one alive at this point in time, online or
+ * not, there are no stop messages to be sent around, so just back out.
+ */
+ if (num_other_online_cpus() == 0)
+ goto skip_ipi;
- cpumask_copy(&mask, cpu_online_mask);
- cpumask_clear_cpu(smp_processor_id(), &mask);
+ /* Only proceed if this is the first CPU to reach this code */
+ if (test_and_set_bit(0, &stop_in_progress))
+ return;
- if (system_state <= SYSTEM_RUNNING)
- pr_crit("SMP: stopping secondary CPUs\n");
- smp_cross_call(&mask, IPI_CPU_STOP);
- }
+ /*
+ * Send an IPI to all currently online CPUs except the CPU running
+ * this code.
+ *
+ * NOTE: we don't do anything here to prevent other CPUs from coming
+ * online after we snapshot `cpu_online_mask`. Ideally, the calling code
+ * should do something to prevent other CPUs from coming up. This code
+ * can be called in the panic path and thus it doesn't seem wise to
+ * grab the CPU hotplug mutex ourselves. Worst case:
+ * - If a CPU comes online as we're running, we'll likely notice it
+ * during the 1 second wait below and then we'll catch it when we try
+ * with an NMI (assuming NMIs are enabled) since we re-snapshot the
+ * mask before sending an NMI.
+ * - If we leave the function and see that CPUs are still online we'll
+ * at least print a warning. Especially without NMIs this function
+ * isn't foolproof anyway so calling code will just have to accept
+ * the fact that there could be cases where a CPU can't be stopped.
+ */
+ cpumask_copy(&mask, cpu_online_mask);
+ cpumask_clear_cpu(smp_processor_id(), &mask);
+
+ if (system_state <= SYSTEM_RUNNING)
+ pr_crit("SMP: stopping secondary CPUs\n");
- /* Wait up to one second for other CPUs to stop */
+ /*
+ * Start with a normal IPI and wait up to one second for other CPUs to
+ * stop. We do this first because it gives other processors a chance
+ * to exit critical sections / drop locks and makes the rest of the
+ * stop process (especially console flush) more robust.
+ */
+ smp_cross_call(&mask, IPI_CPU_STOP);
timeout = USEC_PER_SEC;
while (num_other_online_cpus() && timeout--)
udelay(1);
- if (num_other_online_cpus())
+ /*
+ * If CPUs are still online, try an NMI. There's no excuse for this to
+ * be slow, so we only give them an extra 10 ms to respond.
+ */
+ if (num_other_online_cpus() && ipi_should_be_nmi(IPI_CPU_STOP_NMI)) {
+ smp_rmb();
+ cpumask_copy(&mask, cpu_online_mask);
+ cpumask_clear_cpu(smp_processor_id(), &mask);
+
+ pr_info("SMP: retry stop with NMI for CPUs %*pbl\n",
+ cpumask_pr_args(&mask));
+
+ smp_cross_call(&mask, IPI_CPU_STOP_NMI);
+ timeout = USEC_PER_MSEC * 10;
+ while (num_other_online_cpus() && timeout--)
+ udelay(1);
+ }
+
+ if (num_other_online_cpus()) {
+ smp_rmb();
+ cpumask_copy(&mask, cpu_online_mask);
+ cpumask_clear_cpu(smp_processor_id(), &mask);
+
pr_warn("SMP: failed to stop secondary CPUs %*pbl\n",
- cpumask_pr_args(cpu_online_mask));
+ cpumask_pr_args(&mask));
+ }
+skip_ipi:
sdei_mask_local_cpu();
}
#ifdef CONFIG_KEXEC_CORE
void crash_smp_send_stop(void)
{
- static int cpus_stopped;
- cpumask_t mask;
- unsigned long timeout;
-
/*
* This function can be called twice in panic path, but obviously
* we execute this only once.
+ *
+ * We use this same boolean to tell whether the IPI we send was a
+ * stop or a "crash stop".
*/
- if (cpus_stopped)
+ if (crash_stop)
return;
+ crash_stop = 1;
- cpus_stopped = 1;
+ smp_send_stop();
- /*
- * If this cpu is the only one alive at this point in time, online or
- * not, there are no stop messages to be sent around, so just back out.
- */
- if (num_other_online_cpus() == 0)
- goto skip_ipi;
-
- cpumask_copy(&mask, cpu_online_mask);
- cpumask_clear_cpu(smp_processor_id(), &mask);
-
- atomic_set(&waiting_for_crash_ipi, num_other_online_cpus());
-
- pr_crit("SMP: stopping secondary CPUs\n");
- smp_cross_call(&mask, IPI_CPU_CRASH_STOP);
-
- /* Wait up to one second for other CPUs to stop */
- timeout = USEC_PER_SEC;
- while ((atomic_read(&waiting_for_crash_ipi) > 0) && timeout--)
- udelay(1);
-
- if (atomic_read(&waiting_for_crash_ipi) > 0)
- pr_warn("SMP: failed to stop secondary CPUs %*pbl\n",
- cpumask_pr_args(&mask));
-
-skip_ipi:
- sdei_mask_local_cpu();
sdei_handler_abort();
}
bool smp_crash_stop_failed(void)
{
- return (atomic_read(&waiting_for_crash_ipi) > 0);
+ return num_other_online_cpus() != 0;
}
#endif
diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
index 6b3258860377..3ebcf8c53fb0 100644
--- a/arch/arm64/kernel/stacktrace.c
+++ b/arch/arm64/kernel/stacktrace.c
@@ -20,11 +20,28 @@
#include <asm/stack_pointer.h>
#include <asm/stacktrace.h>
+enum kunwind_source {
+ KUNWIND_SOURCE_UNKNOWN,
+ KUNWIND_SOURCE_FRAME,
+ KUNWIND_SOURCE_CALLER,
+ KUNWIND_SOURCE_TASK,
+ KUNWIND_SOURCE_REGS_PC,
+};
+
+union unwind_flags {
+ unsigned long all;
+ struct {
+ unsigned long fgraph : 1,
+ kretprobe : 1;
+ };
+};
+
/*
* Kernel unwind state
*
* @common: Common unwind state.
* @task: The task being unwound.
+ * @graph_idx: Used by ftrace_graph_ret_addr() for optimized stack unwinding.
* @kr_cur: When KRETPROBES is selected, holds the kretprobe instance
* associated with the most recently encountered replacement lr
* value.
@@ -32,9 +49,13 @@
struct kunwind_state {
struct unwind_state common;
struct task_struct *task;
+ int graph_idx;
#ifdef CONFIG_KRETPROBES
struct llist_node *kr_cur;
#endif
+ enum kunwind_source source;
+ union unwind_flags flags;
+ struct pt_regs *regs;
};
static __always_inline void
@@ -43,6 +64,9 @@ kunwind_init(struct kunwind_state *state,
{
unwind_init_common(&state->common);
state->task = task;
+ state->source = KUNWIND_SOURCE_UNKNOWN;
+ state->flags.all = 0;
+ state->regs = NULL;
}
/*
@@ -58,8 +82,10 @@ kunwind_init_from_regs(struct kunwind_state *state,
{
kunwind_init(state, current);
+ state->regs = regs;
state->common.fp = regs->regs[29];
state->common.pc = regs->pc;
+ state->source = KUNWIND_SOURCE_REGS_PC;
}
/*
@@ -77,6 +103,7 @@ kunwind_init_from_caller(struct kunwind_state *state)
state->common.fp = (unsigned long)__builtin_frame_address(1);
state->common.pc = (unsigned long)__builtin_return_address(0);
+ state->source = KUNWIND_SOURCE_CALLER;
}
/*
@@ -97,6 +124,7 @@ kunwind_init_from_task(struct kunwind_state *state,
state->common.fp = thread_saved_fp(task);
state->common.pc = thread_saved_pc(task);
+ state->source = KUNWIND_SOURCE_TASK;
}
static __always_inline int
@@ -106,12 +134,15 @@ kunwind_recover_return_address(struct kunwind_state *state)
if (state->task->ret_stack &&
(state->common.pc == (unsigned long)return_to_handler)) {
unsigned long orig_pc;
- orig_pc = ftrace_graph_ret_addr(state->task, NULL,
+ orig_pc = ftrace_graph_ret_addr(state->task, &state->graph_idx,
state->common.pc,
(void *)state->common.fp);
- if (WARN_ON_ONCE(state->common.pc == orig_pc))
+ if (state->common.pc == orig_pc) {
+ WARN_ON_ONCE(state->task == current);
return -EINVAL;
+ }
state->common.pc = orig_pc;
+ state->flags.fgraph = 1;
}
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
@@ -121,13 +152,98 @@ kunwind_recover_return_address(struct kunwind_state *state)
orig_pc = kretprobe_find_ret_addr(state->task,
(void *)state->common.fp,
&state->kr_cur);
+ if (!orig_pc)
+ return -EINVAL;
state->common.pc = orig_pc;
+ state->flags.kretprobe = 1;
}
#endif /* CONFIG_KRETPROBES */
return 0;
}
+static __always_inline
+int kunwind_next_regs_pc(struct kunwind_state *state)
+{
+ struct stack_info *info;
+ unsigned long fp = state->common.fp;
+ struct pt_regs *regs;
+
+ regs = container_of((u64 *)fp, struct pt_regs, stackframe.record.fp);
+
+ info = unwind_find_stack(&state->common, (unsigned long)regs, sizeof(*regs));
+ if (!info)
+ return -EINVAL;
+
+ unwind_consume_stack(&state->common, info, (unsigned long)regs,
+ sizeof(*regs));
+
+ state->regs = regs;
+ state->common.pc = regs->pc;
+ state->common.fp = regs->regs[29];
+ state->regs = NULL;
+ state->source = KUNWIND_SOURCE_REGS_PC;
+ return 0;
+}
+
+static __always_inline int
+kunwind_next_frame_record_meta(struct kunwind_state *state)
+{
+ struct task_struct *tsk = state->task;
+ unsigned long fp = state->common.fp;
+ struct frame_record_meta *meta;
+ struct stack_info *info;
+
+ info = unwind_find_stack(&state->common, fp, sizeof(*meta));
+ if (!info)
+ return -EINVAL;
+
+ meta = (struct frame_record_meta *)fp;
+ switch (READ_ONCE(meta->type)) {
+ case FRAME_META_TYPE_FINAL:
+ if (meta == &task_pt_regs(tsk)->stackframe)
+ return -ENOENT;
+ WARN_ON_ONCE(tsk == current);
+ return -EINVAL;
+ case FRAME_META_TYPE_PT_REGS:
+ return kunwind_next_regs_pc(state);
+ default:
+ WARN_ON_ONCE(tsk == current);
+ return -EINVAL;
+ }
+}
+
+static __always_inline int
+kunwind_next_frame_record(struct kunwind_state *state)
+{
+ unsigned long fp = state->common.fp;
+ struct frame_record *record;
+ struct stack_info *info;
+ unsigned long new_fp, new_pc;
+
+ if (fp & 0x7)
+ return -EINVAL;
+
+ info = unwind_find_stack(&state->common, fp, sizeof(*record));
+ if (!info)
+ return -EINVAL;
+
+ record = (struct frame_record *)fp;
+ new_fp = READ_ONCE(record->fp);
+ new_pc = READ_ONCE(record->lr);
+
+ if (!new_fp && !new_pc)
+ return kunwind_next_frame_record_meta(state);
+
+ unwind_consume_stack(&state->common, info, fp, sizeof(*record));
+
+ state->common.fp = new_fp;
+ state->common.pc = new_pc;
+ state->source = KUNWIND_SOURCE_FRAME;
+
+ return 0;
+}
+
/*
* Unwind from one frame record (A) to the next frame record (B).
*
@@ -138,15 +254,21 @@ kunwind_recover_return_address(struct kunwind_state *state)
static __always_inline int
kunwind_next(struct kunwind_state *state)
{
- struct task_struct *tsk = state->task;
- unsigned long fp = state->common.fp;
int err;
- /* Final frame; nothing to unwind */
- if (fp == (unsigned long)task_pt_regs(tsk)->stackframe)
- return -ENOENT;
+ state->flags.all = 0;
+
+ switch (state->source) {
+ case KUNWIND_SOURCE_FRAME:
+ case KUNWIND_SOURCE_CALLER:
+ case KUNWIND_SOURCE_TASK:
+ case KUNWIND_SOURCE_REGS_PC:
+ err = kunwind_next_frame_record(state);
+ break;
+ default:
+ err = -EINVAL;
+ }
- err = unwind_next_frame_record(&state->common);
if (err)
return err;
@@ -157,21 +279,24 @@ kunwind_next(struct kunwind_state *state)
typedef bool (*kunwind_consume_fn)(const struct kunwind_state *state, void *cookie);
-static __always_inline void
+static __always_inline int
do_kunwind(struct kunwind_state *state, kunwind_consume_fn consume_state,
void *cookie)
{
- if (kunwind_recover_return_address(state))
- return;
+ int ret;
- while (1) {
- int ret;
+ ret = kunwind_recover_return_address(state);
+ if (ret)
+ return ret;
+ while (1) {
if (!consume_state(state, cookie))
- break;
+ return -EINVAL;
ret = kunwind_next(state);
+ if (ret == -ENOENT)
+ return 0;
if (ret < 0)
- break;
+ return ret;
}
}
@@ -204,7 +329,7 @@ do_kunwind(struct kunwind_state *state, kunwind_consume_fn consume_state,
: stackinfo_get_unknown(); \
})
-static __always_inline void
+static __always_inline int
kunwind_stack_walk(kunwind_consume_fn consume_state,
void *cookie, struct task_struct *task,
struct pt_regs *regs)
@@ -212,10 +337,8 @@ kunwind_stack_walk(kunwind_consume_fn consume_state,
struct stack_info stacks[] = {
stackinfo_get_task(task),
STACKINFO_CPU(irq),
-#if defined(CONFIG_VMAP_STACK)
STACKINFO_CPU(overflow),
-#endif
-#if defined(CONFIG_VMAP_STACK) && defined(CONFIG_ARM_SDE_INTERFACE)
+#if defined(CONFIG_ARM_SDE_INTERFACE)
STACKINFO_SDEI(normal),
STACKINFO_SDEI(critical),
#endif
@@ -232,7 +355,7 @@ kunwind_stack_walk(kunwind_consume_fn consume_state,
if (regs) {
if (task != current)
- return;
+ return -EINVAL;
kunwind_init_from_regs(&state, regs);
} else if (task == current) {
kunwind_init_from_caller(&state);
@@ -240,7 +363,7 @@ kunwind_stack_walk(kunwind_consume_fn consume_state,
kunwind_init_from_task(&state, task);
}
- do_kunwind(&state, consume_state, cookie);
+ return do_kunwind(&state, consume_state, cookie);
}
struct kunwind_consume_entry_data {
@@ -267,6 +390,36 @@ noinline noinstr void arch_stack_walk(stack_trace_consume_fn consume_entry,
kunwind_stack_walk(arch_kunwind_consume_entry, &data, task, regs);
}
+static __always_inline bool
+arch_reliable_kunwind_consume_entry(const struct kunwind_state *state, void *cookie)
+{
+ /*
+ * At an exception boundary we can reliably consume the saved PC. We do
+ * not know whether the LR was live when the exception was taken, and
+ * so we cannot perform the next unwind step reliably.
+ *
+ * All that matters is whether the *entire* unwind is reliable, so give
+ * up as soon as we hit an exception boundary.
+ */
+ if (state->source == KUNWIND_SOURCE_REGS_PC)
+ return false;
+
+ return arch_kunwind_consume_entry(state, cookie);
+}
+
+noinline noinstr int arch_stack_walk_reliable(stack_trace_consume_fn consume_entry,
+ void *cookie,
+ struct task_struct *task)
+{
+ struct kunwind_consume_entry_data data = {
+ .consume_entry = consume_entry,
+ .cookie = cookie,
+ };
+
+ return kunwind_stack_walk(arch_reliable_kunwind_consume_entry, &data,
+ task, NULL);
+}
+
struct bpf_unwind_consume_entry_data {
bool (*consume_entry)(void *cookie, u64 ip, u64 sp, u64 fp);
void *cookie;
@@ -292,10 +445,32 @@ noinline noinstr void arch_bpf_stack_walk(bool (*consume_entry)(void *cookie, u6
kunwind_stack_walk(arch_bpf_unwind_consume_entry, &data, current, NULL);
}
-static bool dump_backtrace_entry(void *arg, unsigned long where)
+static const char *state_source_string(const struct kunwind_state *state)
+{
+ switch (state->source) {
+ case KUNWIND_SOURCE_FRAME: return NULL;
+ case KUNWIND_SOURCE_CALLER: return "C";
+ case KUNWIND_SOURCE_TASK: return "T";
+ case KUNWIND_SOURCE_REGS_PC: return "P";
+ default: return "U";
+ }
+}
+
+static bool dump_backtrace_entry(const struct kunwind_state *state, void *arg)
{
+ const char *source = state_source_string(state);
+ union unwind_flags flags = state->flags;
+ bool has_info = source || flags.all;
char *loglvl = arg;
- printk("%s %pSb\n", loglvl, (void *)where);
+
+ printk("%s %pSb%s%s%s%s%s\n", loglvl,
+ (void *)state->common.pc,
+ has_info ? " (" : "",
+ source ? source : "",
+ flags.fgraph ? "F" : "",
+ flags.kretprobe ? "K" : "",
+ has_info ? ")" : "");
+
return true;
}
@@ -314,7 +489,7 @@ void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk,
return;
printk("%sCall trace:\n", loglvl);
- arch_stack_walk(dump_backtrace_entry, (void *)loglvl, tsk, regs);
+ kunwind_stack_walk(dump_backtrace_entry, (void *)loglvl, tsk, regs);
put_task_stack(tsk);
}
diff --git a/arch/arm64/kernel/sys.c b/arch/arm64/kernel/sys.c
index d5ffaaab31a7..f08408b6e826 100644
--- a/arch/arm64/kernel/sys.c
+++ b/arch/arm64/kernel/sys.c
@@ -48,14 +48,16 @@ asmlinkage long __arm64_sys_ni_syscall(const struct pt_regs *__unused)
*/
#define __arm64_sys_personality __arm64_sys_arm64_personality
+#define __SYSCALL_WITH_COMPAT(nr, native, compat) __SYSCALL(nr, native)
+
#undef __SYSCALL
#define __SYSCALL(nr, sym) asmlinkage long __arm64_##sym(const struct pt_regs *);
-#include <asm/unistd.h>
+#include <asm/syscall_table_64.h>
#undef __SYSCALL
#define __SYSCALL(nr, sym) [nr] = __arm64_##sym,
const syscall_fn_t sys_call_table[__NR_syscalls] = {
[0 ... __NR_syscalls - 1] = __arm64_sys_ni_syscall,
-#include <asm/unistd.h>
+#include <asm/syscall_table_64.h>
};
diff --git a/arch/arm64/kernel/sys32.c b/arch/arm64/kernel/sys32.c
index fc40386afb1b..96bcfb907443 100644
--- a/arch/arm64/kernel/sys32.c
+++ b/arch/arm64/kernel/sys32.c
@@ -5,17 +5,12 @@
* Copyright (C) 2015 ARM Ltd.
*/
-/*
- * Needed to avoid conflicting __NR_* macros between uapi/asm/unistd.h and
- * asm/unistd32.h.
- */
-#define __COMPAT_SYSCALL_NR
-
#include <linux/compat.h>
#include <linux/compiler.h>
#include <linux/syscalls.h>
#include <asm/syscall.h>
+#include <asm/unistd_compat_32.h>
asmlinkage long compat_sys_sigreturn(void);
asmlinkage long compat_sys_rt_sigreturn(void);
@@ -122,14 +117,16 @@ COMPAT_SYSCALL_DEFINE6(aarch32_fallocate, int, fd, int, mode,
return ksys_fallocate(fd, mode, arg_u64(offset), arg_u64(len));
}
+#define __SYSCALL_WITH_COMPAT(nr, sym, compat) __SYSCALL(nr, compat)
+
#undef __SYSCALL
#define __SYSCALL(nr, sym) asmlinkage long __arm64_##sym(const struct pt_regs *);
-#include <asm/unistd32.h>
+#include <asm/syscall_table_32.h>
#undef __SYSCALL
#define __SYSCALL(nr, sym) [nr] = __arm64_##sym,
-const syscall_fn_t compat_sys_call_table[__NR_compat_syscalls] = {
- [0 ... __NR_compat_syscalls - 1] = __arm64_sys_ni_syscall,
-#include <asm/unistd32.h>
+const syscall_fn_t compat_sys_call_table[__NR_compat32_syscalls] = {
+ [0 ... __NR_compat32_syscalls - 1] = __arm64_sys_ni_syscall,
+#include <asm/syscall_table_32.h>
};
diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c
index 7230f6e20ab8..c062badd1a56 100644
--- a/arch/arm64/kernel/syscall.c
+++ b/arch/arm64/kernel/syscall.c
@@ -14,6 +14,7 @@
#include <asm/syscall.h>
#include <asm/thread_info.h>
#include <asm/unistd.h>
+#include <asm/unistd_compat_32.h>
long compat_arm_syscall(struct pt_regs *regs, int scno);
long sys_ni_syscall(void);
@@ -42,7 +43,7 @@ static void invoke_syscall(struct pt_regs *regs, unsigned int scno,
add_random_kstack_offset();
- if (scno < sc_nr) {
+ if (likely(scno < sc_nr)) {
syscall_fn_t syscall_fn;
syscall_fn = syscall_table[array_index_nospec(scno, sc_nr)];
ret = __invoke_syscall(regs, syscall_fn);
@@ -95,7 +96,7 @@ static void el0_svc_common(struct pt_regs *regs, int scno, int sc_nr,
* (Similarly for HVC and SMC elsewhere.)
*/
- if (flags & _TIF_MTE_ASYNC_FAULT) {
+ if (unlikely(flags & _TIF_MTE_ASYNC_FAULT)) {
/*
* Process the asynchronous tag check fault before the actual
* syscall. do_notify_resume() will send a signal to userspace
@@ -153,7 +154,7 @@ void do_el0_svc(struct pt_regs *regs)
#ifdef CONFIG_COMPAT
void do_el0_svc_compat(struct pt_regs *regs)
{
- el0_svc_common(regs, regs->regs[7], __NR_compat_syscalls,
+ el0_svc_common(regs, regs->regs[7], __NR_compat32_syscalls,
compat_sys_call_table);
}
#endif
diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c
index 1a2c72f3e7f8..5d24dc53799b 100644
--- a/arch/arm64/kernel/topology.c
+++ b/arch/arm64/kernel/topology.c
@@ -15,62 +15,16 @@
#include <linux/arch_topology.h>
#include <linux/cacheinfo.h>
#include <linux/cpufreq.h>
+#include <linux/cpu_smt.h>
#include <linux/init.h>
#include <linux/percpu.h>
+#include <linux/sched/isolation.h>
+#include <linux/xarray.h>
#include <asm/cpu.h>
#include <asm/cputype.h>
#include <asm/topology.h>
-#ifdef CONFIG_ACPI
-static bool __init acpi_cpu_is_threaded(int cpu)
-{
- int is_threaded = acpi_pptt_cpu_is_thread(cpu);
-
- /*
- * if the PPTT doesn't have thread information, assume a homogeneous
- * machine and return the current CPU's thread state.
- */
- if (is_threaded < 0)
- is_threaded = read_cpuid_mpidr() & MPIDR_MT_BITMASK;
-
- return !!is_threaded;
-}
-
-/*
- * Propagate the topology information of the processor_topology_node tree to the
- * cpu_topology array.
- */
-int __init parse_acpi_topology(void)
-{
- int cpu, topology_id;
-
- if (acpi_disabled)
- return 0;
-
- for_each_possible_cpu(cpu) {
- topology_id = find_acpi_cpu_topology(cpu, 0);
- if (topology_id < 0)
- return topology_id;
-
- if (acpi_cpu_is_threaded(cpu)) {
- cpu_topology[cpu].thread_id = topology_id;
- topology_id = find_acpi_cpu_topology(cpu, 1);
- cpu_topology[cpu].core_id = topology_id;
- } else {
- cpu_topology[cpu].thread_id = -1;
- cpu_topology[cpu].core_id = topology_id;
- }
- topology_id = find_acpi_cpu_topology_cluster(cpu);
- cpu_topology[cpu].cluster_id = topology_id;
- topology_id = find_acpi_cpu_topology_package(cpu);
- cpu_topology[cpu].package_id = topology_id;
- }
-
- return 0;
-}
-#endif
-
#ifdef CONFIG_ARM64_AMU_EXTN
#define read_corecnt() read_sysreg_s(SYS_AMEVCNTR0_CORE_EL0)
#define read_constcnt() read_sysreg_s(SYS_AMEVCNTR0_CONST_EL0)
@@ -88,18 +42,28 @@ int __init parse_acpi_topology(void)
* initialized.
*/
static DEFINE_PER_CPU_READ_MOSTLY(unsigned long, arch_max_freq_scale) = 1UL << (2 * SCHED_CAPACITY_SHIFT);
-static DEFINE_PER_CPU(u64, arch_const_cycles_prev);
-static DEFINE_PER_CPU(u64, arch_core_cycles_prev);
static cpumask_var_t amu_fie_cpus;
+struct amu_cntr_sample {
+ u64 arch_const_cycles_prev;
+ u64 arch_core_cycles_prev;
+ unsigned long last_scale_update;
+};
+
+static DEFINE_PER_CPU_SHARED_ALIGNED(struct amu_cntr_sample, cpu_amu_samples);
+
void update_freq_counters_refs(void)
{
- this_cpu_write(arch_core_cycles_prev, read_corecnt());
- this_cpu_write(arch_const_cycles_prev, read_constcnt());
+ struct amu_cntr_sample *amu_sample = this_cpu_ptr(&cpu_amu_samples);
+
+ amu_sample->arch_core_cycles_prev = read_corecnt();
+ amu_sample->arch_const_cycles_prev = read_constcnt();
}
static inline bool freq_counters_valid(int cpu)
{
+ struct amu_cntr_sample *amu_sample = per_cpu_ptr(&cpu_amu_samples, cpu);
+
if ((cpu >= nr_cpu_ids) || !cpumask_test_cpu(cpu, cpu_present_mask))
return false;
@@ -108,8 +72,8 @@ static inline bool freq_counters_valid(int cpu)
return false;
}
- if (unlikely(!per_cpu(arch_const_cycles_prev, cpu) ||
- !per_cpu(arch_core_cycles_prev, cpu))) {
+ if (unlikely(!amu_sample->arch_const_cycles_prev ||
+ !amu_sample->arch_core_cycles_prev)) {
pr_debug("CPU%d: cycle counters are not enabled.\n", cpu);
return false;
}
@@ -152,17 +116,22 @@ void freq_inv_set_max_ratio(int cpu, u64 max_rate)
static void amu_scale_freq_tick(void)
{
+ struct amu_cntr_sample *amu_sample = this_cpu_ptr(&cpu_amu_samples);
u64 prev_core_cnt, prev_const_cnt;
u64 core_cnt, const_cnt, scale;
- prev_const_cnt = this_cpu_read(arch_const_cycles_prev);
- prev_core_cnt = this_cpu_read(arch_core_cycles_prev);
+ prev_const_cnt = amu_sample->arch_const_cycles_prev;
+ prev_core_cnt = amu_sample->arch_core_cycles_prev;
update_freq_counters_refs();
- const_cnt = this_cpu_read(arch_const_cycles_prev);
- core_cnt = this_cpu_read(arch_core_cycles_prev);
+ const_cnt = amu_sample->arch_const_cycles_prev;
+ core_cnt = amu_sample->arch_core_cycles_prev;
+ /*
+ * This should not happen unless the AMUs have been reset and the
+ * counter values have not been restored - unlikely
+ */
if (unlikely(core_cnt <= prev_core_cnt ||
const_cnt <= prev_const_cnt))
return;
@@ -182,6 +151,8 @@ static void amu_scale_freq_tick(void)
scale = min_t(unsigned long, scale, SCHED_CAPACITY_SCALE);
this_cpu_write(arch_freq_scale, (unsigned long)scale);
+
+ amu_sample->last_scale_update = jiffies;
}
static struct scale_freq_data amu_sfd = {
@@ -189,17 +160,114 @@ static struct scale_freq_data amu_sfd = {
.set_freq_scale = amu_scale_freq_tick,
};
+static __always_inline bool amu_fie_cpu_supported(unsigned int cpu)
+{
+ return cpumask_available(amu_fie_cpus) &&
+ cpumask_test_cpu(cpu, amu_fie_cpus);
+}
+
+void arch_cpu_idle_enter(void)
+{
+ unsigned int cpu = smp_processor_id();
+
+ if (!amu_fie_cpu_supported(cpu))
+ return;
+
+ /* Kick in AMU update but only if one has not happened already */
+ if (housekeeping_cpu(cpu, HK_TYPE_TICK) &&
+ time_is_before_jiffies(per_cpu(cpu_amu_samples.last_scale_update, cpu)))
+ amu_scale_freq_tick();
+}
+
+#define AMU_SAMPLE_EXP_MS 20
+
+int arch_freq_get_on_cpu(int cpu)
+{
+ struct amu_cntr_sample *amu_sample;
+ unsigned int start_cpu = cpu;
+ unsigned long last_update;
+ unsigned int freq = 0;
+ u64 scale;
+
+ if (!amu_fie_cpu_supported(cpu) || !arch_scale_freq_ref(cpu))
+ return -EOPNOTSUPP;
+
+ while (1) {
+
+ amu_sample = per_cpu_ptr(&cpu_amu_samples, cpu);
+
+ last_update = amu_sample->last_scale_update;
+
+ /*
+ * For those CPUs that are in full dynticks mode, or those that have
+ * not seen tick for a while, try an alternative source for the counters
+ * (and thus freq scale), if available, for given policy: this boils
+ * down to identifying an active cpu within the same freq domain, if any.
+ */
+ if (!housekeeping_cpu(cpu, HK_TYPE_TICK) ||
+ time_is_before_jiffies(last_update + msecs_to_jiffies(AMU_SAMPLE_EXP_MS))) {
+ struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
+ int ref_cpu;
+
+ if (!policy)
+ return -EINVAL;
+
+ if (!cpumask_intersects(policy->related_cpus,
+ housekeeping_cpumask(HK_TYPE_TICK))) {
+ cpufreq_cpu_put(policy);
+ return -EOPNOTSUPP;
+ }
+
+ for_each_cpu_wrap(ref_cpu, policy->cpus, cpu + 1) {
+ if (ref_cpu == start_cpu) {
+ /* Prevent verifying same CPU twice */
+ ref_cpu = nr_cpu_ids;
+ break;
+ }
+ if (!idle_cpu(ref_cpu))
+ break;
+ }
+
+ cpufreq_cpu_put(policy);
+
+ if (ref_cpu >= nr_cpu_ids)
+ /* No alternative to pull info from */
+ return -EAGAIN;
+
+ cpu = ref_cpu;
+ } else {
+ break;
+ }
+ }
+ /*
+ * Reversed computation to the one used to determine
+ * the arch_freq_scale value
+ * (see amu_scale_freq_tick for details)
+ */
+ scale = arch_scale_freq_capacity(cpu);
+ freq = scale * arch_scale_freq_ref(cpu);
+ freq >>= SCHED_CAPACITY_SHIFT;
+ return freq;
+}
+
static void amu_fie_setup(const struct cpumask *cpus)
{
int cpu;
/* We are already set since the last insmod of cpufreq driver */
- if (unlikely(cpumask_subset(cpus, amu_fie_cpus)))
+ if (cpumask_available(amu_fie_cpus) &&
+ unlikely(cpumask_subset(cpus, amu_fie_cpus)))
return;
- for_each_cpu(cpu, cpus) {
+ for_each_cpu(cpu, cpus)
if (!freq_counters_valid(cpu))
return;
+
+ if (!cpumask_available(amu_fie_cpus) &&
+ !zalloc_cpumask_var(&amu_fie_cpus, GFP_KERNEL)) {
+ WARN_ONCE(1, "Failed to allocate FIE cpumask for CPUs[%*pbl]\n",
+ cpumask_pr_args(cpus));
+ return;
}
cpumask_or(amu_fie_cpus, amu_fie_cpus, cpus);
@@ -237,17 +305,8 @@ static struct notifier_block init_amu_fie_notifier = {
static int __init init_amu_fie(void)
{
- int ret;
-
- if (!zalloc_cpumask_var(&amu_fie_cpus, GFP_KERNEL))
- return -ENOMEM;
-
- ret = cpufreq_register_notifier(&init_amu_fie_notifier,
+ return cpufreq_register_notifier(&init_amu_fie_notifier,
CPUFREQ_POLICY_NOTIFIER);
- if (ret)
- free_cpumask_var(amu_fie_cpus);
-
- return ret;
}
core_initcall(init_amu_fie);
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index 215e6d7f2df8..914282016069 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -41,7 +41,7 @@
#include <asm/extable.h>
#include <asm/insn.h>
#include <asm/kprobes.h>
-#include <asm/patching.h>
+#include <asm/text-patching.h>
#include <asm/traps.h>
#include <asm/smp.h>
#include <asm/stack_pointer.h>
@@ -149,19 +149,18 @@ pstate_check_t * const aarch32_opcode_cond_checks[16] = {
int show_unhandled_signals = 0;
-static void dump_kernel_instr(const char *lvl, struct pt_regs *regs)
+void dump_kernel_instr(unsigned long kaddr)
{
- unsigned long addr = instruction_pointer(regs);
char str[sizeof("00000000 ") * 5 + 2 + 1], *p = str;
int i;
- if (user_mode(regs))
+ if (!is_ttbr1_addr(kaddr))
return;
for (i = -4; i < 1; i++) {
unsigned int val, bad;
- bad = aarch64_insn_read(&((u32 *)addr)[i], &val);
+ bad = aarch64_insn_read(&((u32 *)kaddr)[i], &val);
if (!bad)
p += sprintf(p, i == 0 ? "(%08x) " : "%08x ", val);
@@ -169,25 +168,18 @@ static void dump_kernel_instr(const char *lvl, struct pt_regs *regs)
p += sprintf(p, i == 0 ? "(????????) " : "???????? ");
}
- printk("%sCode: %s\n", lvl, str);
+ printk(KERN_EMERG "Code: %s\n", str);
}
-#ifdef CONFIG_PREEMPT
-#define S_PREEMPT " PREEMPT"
-#elif defined(CONFIG_PREEMPT_RT)
-#define S_PREEMPT " PREEMPT_RT"
-#else
-#define S_PREEMPT ""
-#endif
-
#define S_SMP " SMP"
static int __die(const char *str, long err, struct pt_regs *regs)
{
static int die_counter;
int ret;
+ unsigned long addr = instruction_pointer(regs);
- pr_emerg("Internal error: %s: %016lx [#%d]" S_PREEMPT S_SMP "\n",
+ pr_emerg("Internal error: %s: %016lx [#%d] " S_SMP "\n",
str, err, ++die_counter);
/* trap and error numbers are mostly meaningless on ARM */
@@ -198,7 +190,10 @@ static int __die(const char *str, long err, struct pt_regs *regs)
print_modules();
show_regs(regs);
- dump_kernel_instr(KERN_EMERG, regs);
+ if (user_mode(regs))
+ return ret;
+
+ dump_kernel_instr(addr);
return ret;
}
@@ -273,6 +268,12 @@ void arm64_force_sig_fault(int signo, int code, unsigned long far,
force_sig_fault(signo, code, (void __user *)far);
}
+void arm64_force_sig_fault_pkey(unsigned long far, const char *str, int pkey)
+{
+ arm64_show_signal(SIGSEGV, str);
+ force_sig_pkuerr((void __user *)far, pkey);
+}
+
void arm64_force_sig_mceerr(int code, unsigned long far, short lsb,
const char *str)
{
@@ -456,7 +457,7 @@ void do_el0_undef(struct pt_regs *regs, unsigned long esr)
u32 insn;
/* check for AArch32 breakpoint instructions */
- if (!aarch32_break_handler(regs))
+ if (try_handle_aarch32_break(regs))
return;
if (user_insn_read(regs, &insn))
@@ -500,6 +501,16 @@ void do_el1_bti(struct pt_regs *regs, unsigned long esr)
die("Oops - BTI", regs, esr);
}
+void do_el0_gcs(struct pt_regs *regs, unsigned long esr)
+{
+ force_signal_inject(SIGSEGV, SEGV_CPERR, regs->pc, 0);
+}
+
+void do_el1_gcs(struct pt_regs *regs, unsigned long esr)
+{
+ die("Oops - GCS", regs, esr);
+}
+
void do_el0_fpac(struct pt_regs *regs, unsigned long esr)
{
force_signal_inject(SIGILL, ILL_ILLOPN, regs->pc, esr);
@@ -525,6 +536,13 @@ void do_el0_mops(struct pt_regs *regs, unsigned long esr)
user_fastforward_single_step(current);
}
+void do_el1_mops(struct pt_regs *regs, unsigned long esr)
+{
+ arm64_mops_reset_regs(&regs->user_regs, esr);
+
+ kernel_fastforward_single_step(regs);
+}
+
#define __user_cache_maint(insn, address, res) \
if (address >= TASK_SIZE_MAX) { \
res = -EFAULT; \
@@ -601,18 +619,26 @@ static void ctr_read_handler(unsigned long esr, struct pt_regs *regs)
static void cntvct_read_handler(unsigned long esr, struct pt_regs *regs)
{
- int rt = ESR_ELx_SYS64_ISS_RT(esr);
+ if (test_thread_flag(TIF_TSC_SIGSEGV)) {
+ force_sig(SIGSEGV);
+ } else {
+ int rt = ESR_ELx_SYS64_ISS_RT(esr);
- pt_regs_write_reg(regs, rt, arch_timer_read_counter());
- arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE);
+ pt_regs_write_reg(regs, rt, arch_timer_read_counter());
+ arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE);
+ }
}
static void cntfrq_read_handler(unsigned long esr, struct pt_regs *regs)
{
- int rt = ESR_ELx_SYS64_ISS_RT(esr);
+ if (test_thread_flag(TIF_TSC_SIGSEGV)) {
+ force_sig(SIGSEGV);
+ } else {
+ int rt = ESR_ELx_SYS64_ISS_RT(esr);
- pt_regs_write_reg(regs, rt, arch_timer_get_rate());
- arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE);
+ pt_regs_write_reg(regs, rt, arch_timer_get_rate());
+ arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE);
+ }
}
static void mrs_handler(unsigned long esr, struct pt_regs *regs)
@@ -838,6 +864,7 @@ static const char *esr_class_str[] = {
[ESR_ELx_EC_MOPS] = "MOPS",
[ESR_ELx_EC_FP_EXC32] = "FP (AArch32)",
[ESR_ELx_EC_FP_EXC64] = "FP (AArch64)",
+ [ESR_ELx_EC_GCS] = "Guarded Control Stack",
[ESR_ELx_EC_SERROR] = "SError",
[ESR_ELx_EC_BREAKPT_LOW] = "Breakpoint (lower EL)",
[ESR_ELx_EC_BREAKPT_CUR] = "Breakpoint (current EL)",
@@ -870,8 +897,6 @@ void bad_el0_sync(struct pt_regs *regs, int reason, unsigned long esr)
"Bad EL0 synchronous exception");
}
-#ifdef CONFIG_VMAP_STACK
-
DEFINE_PER_CPU(unsigned long [OVERFLOW_STACK_SIZE/sizeof(long)], overflow_stack)
__aligned(16);
@@ -897,16 +922,16 @@ void __noreturn panic_bad_stack(struct pt_regs *regs, unsigned long esr, unsigne
__show_regs(regs);
/*
- * We use nmi_panic to limit the potential for recusive overflows, and
+ * We use nmi_panic to limit the potential for recursive overflows, and
* to get a better stack trace.
*/
nmi_panic(NULL, "kernel stack overflow");
cpu_park_loop();
}
-#endif
void __noreturn arm64_serror_panic(struct pt_regs *regs, unsigned long esr)
{
+ add_taint(TAINT_MACHINE_CHECK, LOCKDEP_STILL_OK);
console_verbose();
pr_crit("SError Interrupt on CPU%d, code 0x%016lx -- %s\n",
@@ -963,7 +988,7 @@ void do_serror(struct pt_regs *regs, unsigned long esr)
int is_valid_bugaddr(unsigned long addr)
{
/*
- * bug_handler() only called for BRK #BUG_BRK_IMM.
+ * bug_brk_handler() only called for BRK #BUG_BRK_IMM.
* So the answer is trivial -- any spurious instances with no
* bug table entry will be rejected by report_bug() and passed
* back to the debug-monitors code and handled as a fatal
@@ -973,7 +998,7 @@ int is_valid_bugaddr(unsigned long addr)
}
#endif
-static int bug_handler(struct pt_regs *regs, unsigned long esr)
+int bug_brk_handler(struct pt_regs *regs, unsigned long esr)
{
switch (report_bug(regs->pc, regs)) {
case BUG_TRAP_TYPE_BUG:
@@ -993,13 +1018,8 @@ static int bug_handler(struct pt_regs *regs, unsigned long esr)
return DBG_HOOK_HANDLED;
}
-static struct break_hook bug_break_hook = {
- .fn = bug_handler,
- .imm = BUG_BRK_IMM,
-};
-
-#ifdef CONFIG_CFI_CLANG
-static int cfi_handler(struct pt_regs *regs, unsigned long esr)
+#ifdef CONFIG_CFI
+int cfi_brk_handler(struct pt_regs *regs, unsigned long esr)
{
unsigned long target;
u32 type;
@@ -1022,15 +1042,9 @@ static int cfi_handler(struct pt_regs *regs, unsigned long esr)
arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE);
return DBG_HOOK_HANDLED;
}
+#endif /* CONFIG_CFI */
-static struct break_hook cfi_break_hook = {
- .fn = cfi_handler,
- .imm = CFI_BRK_IMM_BASE,
- .mask = CFI_BRK_IMM_MASK,
-};
-#endif /* CONFIG_CFI_CLANG */
-
-static int reserved_fault_handler(struct pt_regs *regs, unsigned long esr)
+int reserved_fault_brk_handler(struct pt_regs *regs, unsigned long esr)
{
pr_err("%s generated an invalid instruction at %pS!\n",
"Kernel text patching",
@@ -1040,11 +1054,6 @@ static int reserved_fault_handler(struct pt_regs *regs, unsigned long esr)
return DBG_HOOK_ERROR;
}
-static struct break_hook fault_break_hook = {
- .fn = reserved_fault_handler,
- .imm = FAULT_BRK_IMM,
-};
-
#ifdef CONFIG_KASAN_SW_TAGS
#define KASAN_ESR_RECOVER 0x20
@@ -1052,7 +1061,7 @@ static struct break_hook fault_break_hook = {
#define KASAN_ESR_SIZE_MASK 0x0f
#define KASAN_ESR_SIZE(esr) (1 << ((esr) & KASAN_ESR_SIZE_MASK))
-static int kasan_handler(struct pt_regs *regs, unsigned long esr)
+int kasan_brk_handler(struct pt_regs *regs, unsigned long esr)
{
bool recover = esr & KASAN_ESR_RECOVER;
bool write = esr & KASAN_ESR_WRITE;
@@ -1083,64 +1092,12 @@ static int kasan_handler(struct pt_regs *regs, unsigned long esr)
arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE);
return DBG_HOOK_HANDLED;
}
-
-static struct break_hook kasan_break_hook = {
- .fn = kasan_handler,
- .imm = KASAN_BRK_IMM,
- .mask = KASAN_BRK_MASK,
-};
#endif
#ifdef CONFIG_UBSAN_TRAP
-static int ubsan_handler(struct pt_regs *regs, unsigned long esr)
+int ubsan_brk_handler(struct pt_regs *regs, unsigned long esr)
{
- die(report_ubsan_failure(regs, esr & UBSAN_BRK_MASK), regs, esr);
+ die(report_ubsan_failure(esr & UBSAN_BRK_MASK), regs, esr);
return DBG_HOOK_HANDLED;
}
-
-static struct break_hook ubsan_break_hook = {
- .fn = ubsan_handler,
- .imm = UBSAN_BRK_IMM,
- .mask = UBSAN_BRK_MASK,
-};
-#endif
-
-#define esr_comment(esr) ((esr) & ESR_ELx_BRK64_ISS_COMMENT_MASK)
-
-/*
- * Initial handler for AArch64 BRK exceptions
- * This handler only used until debug_traps_init().
- */
-int __init early_brk64(unsigned long addr, unsigned long esr,
- struct pt_regs *regs)
-{
-#ifdef CONFIG_CFI_CLANG
- if ((esr_comment(esr) & ~CFI_BRK_IMM_MASK) == CFI_BRK_IMM_BASE)
- return cfi_handler(regs, esr) != DBG_HOOK_HANDLED;
-#endif
-#ifdef CONFIG_KASAN_SW_TAGS
- if ((esr_comment(esr) & ~KASAN_BRK_MASK) == KASAN_BRK_IMM)
- return kasan_handler(regs, esr) != DBG_HOOK_HANDLED;
-#endif
-#ifdef CONFIG_UBSAN_TRAP
- if ((esr_comment(esr) & ~UBSAN_BRK_MASK) == UBSAN_BRK_IMM)
- return ubsan_handler(regs, esr) != DBG_HOOK_HANDLED;
-#endif
- return bug_handler(regs, esr) != DBG_HOOK_HANDLED;
-}
-
-void __init trap_init(void)
-{
- register_kernel_break_hook(&bug_break_hook);
-#ifdef CONFIG_CFI_CLANG
- register_kernel_break_hook(&cfi_break_hook);
-#endif
- register_kernel_break_hook(&fault_break_hook);
-#ifdef CONFIG_KASAN_SW_TAGS
- register_kernel_break_hook(&kasan_break_hook);
-#endif
-#ifdef CONFIG_UBSAN_TRAP
- register_kernel_break_hook(&ubsan_break_hook);
#endif
- debug_traps_init();
-}
diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c
index 89b6e7840002..78ddf6bdecad 100644
--- a/arch/arm64/kernel/vdso.c
+++ b/arch/arm64/kernel/vdso.c
@@ -18,8 +18,7 @@
#include <linux/sched.h>
#include <linux/signal.h>
#include <linux/slab.h>
-#include <linux/time_namespace.h>
-#include <linux/timekeeper_internal.h>
+#include <linux/vdso_datastore.h>
#include <linux/vmalloc.h>
#include <vdso/datapage.h>
#include <vdso/helpers.h>
@@ -34,19 +33,11 @@ enum vdso_abi {
VDSO_ABI_AA32,
};
-enum vvar_pages {
- VVAR_DATA_PAGE_OFFSET,
- VVAR_TIMENS_PAGE_OFFSET,
- VVAR_NR_PAGES,
-};
-
struct vdso_abi_info {
const char *name;
const char *vdso_code_start;
const char *vdso_code_end;
unsigned long vdso_pages;
- /* Data Mapping */
- struct vm_special_mapping *dm;
/* Code Mapping */
struct vm_special_mapping *cm;
};
@@ -66,12 +57,6 @@ static struct vdso_abi_info vdso_info[] __ro_after_init = {
#endif /* CONFIG_COMPAT_VDSO */
};
-/*
- * The vDSO data page.
- */
-static union vdso_data_store vdso_data_store __page_aligned_data;
-struct vdso_data *vdso_data = vdso_data_store.data;
-
static int vdso_mremap(const struct vm_special_mapping *sm,
struct vm_area_struct *new_vma)
{
@@ -113,75 +98,6 @@ static int __init __vdso_init(enum vdso_abi abi)
return 0;
}
-#ifdef CONFIG_TIME_NS
-struct vdso_data *arch_get_vdso_data(void *vvar_page)
-{
- return (struct vdso_data *)(vvar_page);
-}
-
-/*
- * The vvar mapping contains data for a specific time namespace, so when a task
- * changes namespace we must unmap its vvar data for the old namespace.
- * Subsequent faults will map in data for the new namespace.
- *
- * For more details see timens_setup_vdso_data().
- */
-int vdso_join_timens(struct task_struct *task, struct time_namespace *ns)
-{
- struct mm_struct *mm = task->mm;
- struct vm_area_struct *vma;
- VMA_ITERATOR(vmi, mm, 0);
-
- mmap_read_lock(mm);
-
- for_each_vma(vmi, vma) {
- if (vma_is_special_mapping(vma, vdso_info[VDSO_ABI_AA64].dm))
- zap_vma_pages(vma);
-#ifdef CONFIG_COMPAT_VDSO
- if (vma_is_special_mapping(vma, vdso_info[VDSO_ABI_AA32].dm))
- zap_vma_pages(vma);
-#endif
- }
-
- mmap_read_unlock(mm);
- return 0;
-}
-#endif
-
-static vm_fault_t vvar_fault(const struct vm_special_mapping *sm,
- struct vm_area_struct *vma, struct vm_fault *vmf)
-{
- struct page *timens_page = find_timens_vvar_page(vma);
- unsigned long pfn;
-
- switch (vmf->pgoff) {
- case VVAR_DATA_PAGE_OFFSET:
- if (timens_page)
- pfn = page_to_pfn(timens_page);
- else
- pfn = sym_to_pfn(vdso_data);
- break;
-#ifdef CONFIG_TIME_NS
- case VVAR_TIMENS_PAGE_OFFSET:
- /*
- * If a task belongs to a time namespace then a namespace
- * specific VVAR is mapped with the VVAR_DATA_PAGE_OFFSET and
- * the real VVAR page is mapped with the VVAR_TIMENS_PAGE_OFFSET
- * offset.
- * See also the comment near timens_setup_vdso_data().
- */
- if (!timens_page)
- return VM_FAULT_SIGBUS;
- pfn = sym_to_pfn(vdso_data);
- break;
-#endif /* CONFIG_TIME_NS */
- default:
- return VM_FAULT_SIGBUS;
- }
-
- return vmf_insert_pfn(vma, vmf->address, pfn);
-}
-
static int __setup_additional_pages(enum vdso_abi abi,
struct mm_struct *mm,
struct linux_binprm *bprm,
@@ -191,11 +107,11 @@ static int __setup_additional_pages(enum vdso_abi abi,
unsigned long gp_flags = 0;
void *ret;
- BUILD_BUG_ON(VVAR_NR_PAGES != __VVAR_PAGES);
+ BUILD_BUG_ON(VDSO_NR_PAGES != __VDSO_PAGES);
vdso_text_len = vdso_info[abi].vdso_pages << PAGE_SHIFT;
/* Be sure to map the data page */
- vdso_mapping_len = vdso_text_len + VVAR_NR_PAGES * PAGE_SIZE;
+ vdso_mapping_len = vdso_text_len + VDSO_NR_PAGES * PAGE_SIZE;
vdso_base = get_unmapped_area(NULL, 0, vdso_mapping_len, 0, 0);
if (IS_ERR_VALUE(vdso_base)) {
@@ -203,20 +119,19 @@ static int __setup_additional_pages(enum vdso_abi abi,
goto up_fail;
}
- ret = _install_special_mapping(mm, vdso_base, VVAR_NR_PAGES * PAGE_SIZE,
- VM_READ|VM_MAYREAD|VM_PFNMAP,
- vdso_info[abi].dm);
+ ret = vdso_install_vvar_mapping(mm, vdso_base);
if (IS_ERR(ret))
goto up_fail;
if (system_supports_bti_kernel())
gp_flags = VM_ARM64_BTI;
- vdso_base += VVAR_NR_PAGES * PAGE_SIZE;
+ vdso_base += VDSO_NR_PAGES * PAGE_SIZE;
mm->context.vdso = (void *)vdso_base;
ret = _install_special_mapping(mm, vdso_base, vdso_text_len,
VM_READ|VM_EXEC|gp_flags|
- VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
+ VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC|
+ VM_SEALED_SYSMAP,
vdso_info[abi].cm);
if (IS_ERR(ret))
goto up_fail;
@@ -235,7 +150,6 @@ up_fail:
enum aarch32_map {
AA32_MAP_VECTORS, /* kuser helpers */
AA32_MAP_SIGPAGE,
- AA32_MAP_VVAR,
AA32_MAP_VDSO,
};
@@ -260,10 +174,6 @@ static struct vm_special_mapping aarch32_vdso_maps[] = {
.pages = &aarch32_sig_page,
.mremap = aarch32_sigpage_mremap,
},
- [AA32_MAP_VVAR] = {
- .name = "[vvar]",
- .fault = vvar_fault,
- },
[AA32_MAP_VDSO] = {
.name = "[vdso]",
.mremap = vdso_mremap,
@@ -313,7 +223,6 @@ static int __init __aarch32_alloc_vdso_pages(void)
if (!IS_ENABLED(CONFIG_COMPAT_VDSO))
return 0;
- vdso_info[VDSO_ABI_AA32].dm = &aarch32_vdso_maps[AA32_MAP_VVAR];
vdso_info[VDSO_ABI_AA32].cm = &aarch32_vdso_maps[AA32_MAP_VDSO];
return __vdso_init(VDSO_ABI_AA32);
@@ -348,7 +257,8 @@ static int aarch32_kuser_helpers_setup(struct mm_struct *mm)
*/
ret = _install_special_mapping(mm, AARCH32_VECTORS_BASE, PAGE_SIZE,
VM_READ | VM_EXEC |
- VM_MAYREAD | VM_MAYEXEC,
+ VM_MAYREAD | VM_MAYEXEC |
+ VM_SEALED_SYSMAP,
&aarch32_vdso_maps[AA32_MAP_VECTORS]);
return PTR_ERR_OR_ZERO(ret);
@@ -371,7 +281,8 @@ static int aarch32_sigreturn_setup(struct mm_struct *mm)
*/
ret = _install_special_mapping(mm, addr, PAGE_SIZE,
VM_READ | VM_EXEC | VM_MAYREAD |
- VM_MAYWRITE | VM_MAYEXEC,
+ VM_MAYWRITE | VM_MAYEXEC |
+ VM_SEALED_SYSMAP,
&aarch32_vdso_maps[AA32_MAP_SIGPAGE]);
if (IS_ERR(ret))
goto out;
@@ -408,26 +319,14 @@ out:
}
#endif /* CONFIG_COMPAT */
-enum aarch64_map {
- AA64_MAP_VVAR,
- AA64_MAP_VDSO,
-};
-
-static struct vm_special_mapping aarch64_vdso_maps[] __ro_after_init = {
- [AA64_MAP_VVAR] = {
- .name = "[vvar]",
- .fault = vvar_fault,
- },
- [AA64_MAP_VDSO] = {
- .name = "[vdso]",
- .mremap = vdso_mremap,
- },
+static struct vm_special_mapping aarch64_vdso_map __ro_after_init = {
+ .name = "[vdso]",
+ .mremap = vdso_mremap,
};
static int __init vdso_init(void)
{
- vdso_info[VDSO_ABI_AA64].dm = &aarch64_vdso_maps[AA64_MAP_VVAR];
- vdso_info[VDSO_ABI_AA64].cm = &aarch64_vdso_maps[AA64_MAP_VDSO];
+ vdso_info[VDSO_ABI_AA64].cm = &aarch64_vdso_map;
return __vdso_init(VDSO_ABI_AA64);
}
diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile
index d63930c82839..7dec05dd33b7 100644
--- a/arch/arm64/kernel/vdso/Makefile
+++ b/arch/arm64/kernel/vdso/Makefile
@@ -7,9 +7,9 @@
#
# Include the generic Makefile to check the built vdso.
-include $(srctree)/lib/vdso/Makefile
+include $(srctree)/lib/vdso/Makefile.include
-obj-vdso := vgettimeofday.o note.o sigreturn.o
+obj-vdso := vgettimeofday.o note.o sigreturn.o vgetrandom.o vgetrandom-chacha.o
# Build rules
targets := $(obj-vdso) vdso.so vdso.so.dbg
@@ -21,7 +21,7 @@ btildflags-$(CONFIG_ARM64_BTI_KERNEL) += -z force-bti
# potential future proofing if we end up with internal calls to the exported
# routines, as x86 does (see 6f121e548f83 ("x86, vdso: Reimplement vdso.so
# preparation in build-time C")).
-ldflags-y := -shared -soname=linux-vdso.so.1 --hash-style=sysv \
+ldflags-y := -shared -soname=linux-vdso.so.1 \
-Bsymbolic --build-id=sha1 -n $(btildflags-y)
ifdef CONFIG_LD_ORPHAN_WARN
@@ -34,19 +34,29 @@ ccflags-y := -fno-common -fno-builtin -fno-stack-protector -ffixed-x18
ccflags-y += -DDISABLE_BRANCH_PROFILING -DBUILD_VDSO
# -Wmissing-prototypes and -Wmissing-declarations are removed from
-# the CFLAGS of vgettimeofday.c to make possible to build the
-# kernel with CONFIG_WERROR enabled.
-CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os $(CC_FLAGS_SCS) \
- $(RANDSTRUCT_CFLAGS) $(GCC_PLUGINS_CFLAGS) \
- $(CC_FLAGS_LTO) $(CC_FLAGS_CFI) \
- -Wmissing-prototypes -Wmissing-declarations
+# the CFLAGS to make possible to build the kernel with CONFIG_WERROR enabled.
+CC_FLAGS_REMOVE_VDSO := $(CC_FLAGS_FTRACE) -Os $(CC_FLAGS_SCS) \
+ $(RANDSTRUCT_CFLAGS) $(KSTACK_ERASE_CFLAGS) \
+ $(GCC_PLUGINS_CFLAGS) \
+ $(CC_FLAGS_LTO) $(CC_FLAGS_CFI) \
+ -Wmissing-prototypes -Wmissing-declarations
-CFLAGS_vgettimeofday.o = -O2 -mcmodel=tiny -fasynchronous-unwind-tables
+CC_FLAGS_ADD_VDSO := -O2 -mcmodel=tiny -fasynchronous-unwind-tables
+
+CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_REMOVE_VDSO)
+CFLAGS_REMOVE_vgetrandom.o = $(CC_FLAGS_REMOVE_VDSO)
+
+CFLAGS_vgettimeofday.o = $(CC_FLAGS_ADD_VDSO)
+CFLAGS_vgetrandom.o = $(CC_FLAGS_ADD_VDSO)
ifneq ($(c-gettimeofday-y),)
CFLAGS_vgettimeofday.o += -include $(c-gettimeofday-y)
endif
+ifneq ($(c-getrandom-y),)
+ CFLAGS_vgetrandom.o += -include $(c-getrandom-y)
+endif
+
targets += vdso.lds
CPPFLAGS_vdso.lds += -P -C -U$(ARCH)
diff --git a/arch/arm64/kernel/vdso/vdso.lds.S b/arch/arm64/kernel/vdso/vdso.lds.S
index 45354f2ddf70..52314be29191 100644
--- a/arch/arm64/kernel/vdso/vdso.lds.S
+++ b/arch/arm64/kernel/vdso/vdso.lds.S
@@ -11,18 +11,18 @@
#include <linux/const.h>
#include <asm/page.h>
#include <asm/vdso.h>
+#include <asm/vdso/vsyscall.h>
#include <asm-generic/vmlinux.lds.h>
+#include <vdso/datapage.h>
OUTPUT_FORMAT("elf64-littleaarch64", "elf64-bigaarch64", "elf64-littleaarch64")
OUTPUT_ARCH(aarch64)
SECTIONS
{
- PROVIDE(_vdso_data = . - __VVAR_PAGES * PAGE_SIZE);
-#ifdef CONFIG_TIME_NS
- PROVIDE(_timens_data = _vdso_data + PAGE_SIZE);
-#endif
- . = VDSO_LBASE + SIZEOF_HEADERS;
+ VDSO_VVAR_SYMS
+
+ . = SIZEOF_HEADERS;
.hash : { *(.hash) } :text
.gnu.hash : { *(.gnu.hash) }
@@ -38,6 +38,7 @@ SECTIONS
*/
/DISCARD/ : {
*(.note.GNU-stack .note.gnu.property)
+ *(.ARM.attributes)
}
.note : { *(.note.*) } :text :note
@@ -102,6 +103,7 @@ VERSION
__kernel_gettimeofday;
__kernel_clock_gettime;
__kernel_clock_getres;
+ __kernel_getrandom;
local: *;
};
}
diff --git a/arch/arm64/kernel/vdso/vgetrandom-chacha.S b/arch/arm64/kernel/vdso/vgetrandom-chacha.S
new file mode 100644
index 000000000000..67890b445309
--- /dev/null
+++ b/arch/arm64/kernel/vdso/vgetrandom-chacha.S
@@ -0,0 +1,172 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/linkage.h>
+#include <asm/cache.h>
+#include <asm/assembler.h>
+
+ .text
+
+#define state0 v0
+#define state1 v1
+#define state2 v2
+#define state3 v3
+#define copy0 v4
+#define copy0_q q4
+#define copy1 v5
+#define copy2 v6
+#define copy3 v7
+#define copy3_d d7
+#define one_d d16
+#define one_q q16
+#define one_v v16
+#define tmp v17
+#define rot8 v18
+
+/*
+ * ARM64 ChaCha20 implementation meant for vDSO. Produces a given positive
+ * number of blocks of output with nonce 0, taking an input key and 8-bytes
+ * counter. Importantly does not spill to the stack.
+ *
+ * This implementation avoids d8-d15 because they are callee-save in user
+ * space.
+ *
+ * void __arch_chacha20_blocks_nostack(uint8_t *dst_bytes,
+ * const uint8_t *key,
+ * uint32_t *counter,
+ * size_t nblocks)
+ *
+ * x0: output bytes
+ * x1: 32-byte key input
+ * x2: 8-byte counter input/output
+ * x3: number of 64-byte block to write to output
+ */
+SYM_FUNC_START(__arch_chacha20_blocks_nostack)
+
+ /* copy0 = "expand 32-byte k" */
+ mov_q x8, 0x3320646e61707865
+ mov_q x9, 0x6b20657479622d32
+ mov copy0.d[0], x8
+ mov copy0.d[1], x9
+
+ /* copy1,copy2 = key */
+ ld1 { copy1.4s, copy2.4s }, [x1]
+ /* copy3 = counter || zero nonce */
+ ld1 { copy3.2s }, [x2]
+
+ movi one_v.2s, #1
+ uzp1 one_v.4s, one_v.4s, one_v.4s
+
+.Lblock:
+ /* copy state to auxiliary vectors for the final add after the permute. */
+ mov state0.16b, copy0.16b
+ mov state1.16b, copy1.16b
+ mov state2.16b, copy2.16b
+ mov state3.16b, copy3.16b
+
+ mov w4, 20
+.Lpermute:
+ /*
+ * Permute one 64-byte block where the state matrix is stored in the four NEON
+ * registers state0-state3. It performs matrix operations on four words in parallel,
+ * but requires shuffling to rearrange the words after each round.
+ */
+
+.Ldoubleround:
+ /* state0 += state1, state3 = rotl32(state3 ^ state0, 16) */
+ add state0.4s, state0.4s, state1.4s
+ eor state3.16b, state3.16b, state0.16b
+ rev32 state3.8h, state3.8h
+
+ /* state2 += state3, state1 = rotl32(state1 ^ state2, 12) */
+ add state2.4s, state2.4s, state3.4s
+ eor tmp.16b, state1.16b, state2.16b
+ shl state1.4s, tmp.4s, #12
+ sri state1.4s, tmp.4s, #20
+
+ /* state0 += state1, state3 = rotl32(state3 ^ state0, 8) */
+ add state0.4s, state0.4s, state1.4s
+ eor tmp.16b, state3.16b, state0.16b
+ shl state3.4s, tmp.4s, #8
+ sri state3.4s, tmp.4s, #24
+
+ /* state2 += state3, state1 = rotl32(state1 ^ state2, 7) */
+ add state2.4s, state2.4s, state3.4s
+ eor tmp.16b, state1.16b, state2.16b
+ shl state1.4s, tmp.4s, #7
+ sri state1.4s, tmp.4s, #25
+
+ /* state1[0,1,2,3] = state1[1,2,3,0] */
+ ext state1.16b, state1.16b, state1.16b, #4
+ /* state2[0,1,2,3] = state2[2,3,0,1] */
+ ext state2.16b, state2.16b, state2.16b, #8
+ /* state3[0,1,2,3] = state3[1,2,3,0] */
+ ext state3.16b, state3.16b, state3.16b, #12
+
+ /* state0 += state1, state3 = rotl32(state3 ^ state0, 16) */
+ add state0.4s, state0.4s, state1.4s
+ eor state3.16b, state3.16b, state0.16b
+ rev32 state3.8h, state3.8h
+
+ /* state2 += state3, state1 = rotl32(state1 ^ state2, 12) */
+ add state2.4s, state2.4s, state3.4s
+ eor tmp.16b, state1.16b, state2.16b
+ shl state1.4s, tmp.4s, #12
+ sri state1.4s, tmp.4s, #20
+
+ /* state0 += state1, state3 = rotl32(state3 ^ state0, 8) */
+ add state0.4s, state0.4s, state1.4s
+ eor tmp.16b, state3.16b, state0.16b
+ shl state3.4s, tmp.4s, #8
+ sri state3.4s, tmp.4s, #24
+
+ /* state2 += state3, state1 = rotl32(state1 ^ state2, 7) */
+ add state2.4s, state2.4s, state3.4s
+ eor tmp.16b, state1.16b, state2.16b
+ shl state1.4s, tmp.4s, #7
+ sri state1.4s, tmp.4s, #25
+
+ /* state1[0,1,2,3] = state1[3,0,1,2] */
+ ext state1.16b, state1.16b, state1.16b, #12
+ /* state2[0,1,2,3] = state2[2,3,0,1] */
+ ext state2.16b, state2.16b, state2.16b, #8
+ /* state3[0,1,2,3] = state3[1,2,3,0] */
+ ext state3.16b, state3.16b, state3.16b, #4
+
+ subs w4, w4, #2
+ b.ne .Ldoubleround
+
+ /* output0 = state0 + state0 */
+ add state0.4s, state0.4s, copy0.4s
+ /* output1 = state1 + state1 */
+ add state1.4s, state1.4s, copy1.4s
+ /* output2 = state2 + state2 */
+ add state2.4s, state2.4s, copy2.4s
+ /* output2 = state3 + state3 */
+ add state3.4s, state3.4s, copy3.4s
+ st1 { state0.16b - state3.16b }, [x0]
+
+ /*
+ * ++copy3.counter, the 'add' clears the upper half of the SIMD register
+ * which is the expected behaviour here.
+ */
+ add copy3_d, copy3_d, one_d
+
+ /* output += 64, --nblocks */
+ add x0, x0, 64
+ subs x3, x3, #1
+ b.ne .Lblock
+
+ /* counter = copy3.counter */
+ st1 { copy3.2s }, [x2]
+
+ /* Zero out the potentially sensitive regs, in case nothing uses these again. */
+ movi state0.16b, #0
+ movi state1.16b, #0
+ movi state2.16b, #0
+ movi state3.16b, #0
+ movi copy1.16b, #0
+ movi copy2.16b, #0
+ ret
+SYM_FUNC_END(__arch_chacha20_blocks_nostack)
+
+emit_aarch64_feature_1_and
diff --git a/arch/arm64/kernel/vdso/vgetrandom.c b/arch/arm64/kernel/vdso/vgetrandom.c
new file mode 100644
index 000000000000..832fe195292b
--- /dev/null
+++ b/arch/arm64/kernel/vdso/vgetrandom.c
@@ -0,0 +1,15 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <uapi/asm-generic/errno.h>
+
+typeof(__cvdso_getrandom) __kernel_getrandom;
+
+ssize_t __kernel_getrandom(void *buffer, size_t len, unsigned int flags, void *opaque_state, size_t opaque_len)
+{
+ if (alternative_has_cap_likely(ARM64_HAS_FPSIMD))
+ return __cvdso_getrandom(buffer, len, flags, opaque_state, opaque_len);
+
+ if (unlikely(opaque_len == ~0UL && !buffer && !len && !flags))
+ return -ENOSYS;
+ return getrandom_syscall(buffer, len, flags);
+}
diff --git a/arch/arm64/kernel/vdso32/Makefile b/arch/arm64/kernel/vdso32/Makefile
index cc4508c604b2..9d0efed91414 100644
--- a/arch/arm64/kernel/vdso32/Makefile
+++ b/arch/arm64/kernel/vdso32/Makefile
@@ -3,7 +3,7 @@
# Makefile for vdso32
#
-include $(srctree)/lib/vdso/Makefile
+include $(srctree)/lib/vdso/Makefile.include
# Same as cc-*option, but using CC_COMPAT instead of CC
ifeq ($(CONFIG_CC_IS_CLANG), y)
@@ -21,8 +21,6 @@ endif
cc32-option = $(call try-run,\
$(CC_COMPAT) $(1) -c -x c /dev/null -o "$$TMP",$(1),$(2))
-cc32-disable-warning = $(call try-run,\
- $(CC_COMPAT) -W$(strip $(1)) -c -x c /dev/null -o "$$TMP",-Wno-$(strip $(1)))
# We cannot use the global flags to compile the vDSO files, the main reason
# being that the 32-bit compiler may be older than the main (64-bit) compiler
@@ -59,13 +57,13 @@ VDSO_CAFLAGS += -DDISABLE_BRANCH_PROFILING
VDSO_CAFLAGS += -march=armv8-a
VDSO_CFLAGS := $(VDSO_CAFLAGS)
-VDSO_CFLAGS += -DENABLE_COMPAT_VDSO=1
# KBUILD_CFLAGS from top-level Makefile
VDSO_CFLAGS += -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \
-fno-strict-aliasing -fno-common \
+ $(filter -Werror,$(KBUILD_CPPFLAGS)) \
-Werror-implicit-function-declaration \
-Wno-format-security \
- -std=gnu11
+ -std=gnu11 -fms-extensions
VDSO_CFLAGS += -O2
# Some useful compiler-dependent flags from top-level Makefile
VDSO_CFLAGS += $(call cc32-option,-Wno-pointer-sign)
@@ -73,16 +71,7 @@ VDSO_CFLAGS += -fno-strict-overflow
VDSO_CFLAGS += $(call cc32-option,-Werror=strict-prototypes)
VDSO_CFLAGS += -Werror=date-time
VDSO_CFLAGS += $(call cc32-option,-Werror=incompatible-pointer-types)
-
-# The 32-bit compiler does not provide 128-bit integers, which are used in
-# some headers that are indirectly included from the vDSO code.
-# This hack makes the compiler happy and should trigger a warning/error if
-# variables of such type are referenced.
-VDSO_CFLAGS += -D__uint128_t='void*'
-# Silence some warnings coming from headers that operate on long's
-# (on GCC 4.8 or older, there is unfortunately no way to silence this warning)
-VDSO_CFLAGS += $(call cc32-disable-warning,shift-count-overflow)
-VDSO_CFLAGS += -Wno-int-to-pointer-cast
+VDSO_CFLAGS += $(if $(CONFIG_CC_IS_CLANG),-Wno-microsoft-anon-tag)
# Compile as THUMB2 or ARM. Unwinding via frame-pointers in THUMB2 is
# unreliable.
@@ -98,7 +87,7 @@ VDSO_AFLAGS += -D__ASSEMBLY__
# From arm vDSO Makefile
VDSO_LDFLAGS += -Bsymbolic --no-undefined -soname=linux-vdso.so.1
VDSO_LDFLAGS += -z max-page-size=4096 -z common-page-size=4096
-VDSO_LDFLAGS += -shared --hash-style=sysv --build-id=sha1
+VDSO_LDFLAGS += -shared --build-id=sha1
VDSO_LDFLAGS += --orphan-handling=$(CONFIG_LD_ORPHAN_WARN_LEVEL)
diff --git a/arch/arm64/kernel/vdso32/vdso.lds.S b/arch/arm64/kernel/vdso32/vdso.lds.S
index 8d95d7d35057..e02b27487ce8 100644
--- a/arch/arm64/kernel/vdso32/vdso.lds.S
+++ b/arch/arm64/kernel/vdso32/vdso.lds.S
@@ -12,17 +12,16 @@
#include <asm/page.h>
#include <asm/vdso.h>
#include <asm-generic/vmlinux.lds.h>
+#include <vdso/datapage.h>
OUTPUT_FORMAT("elf32-littlearm", "elf32-bigarm", "elf32-littlearm")
OUTPUT_ARCH(arm)
SECTIONS
{
- PROVIDE_HIDDEN(_vdso_data = . - __VVAR_PAGES * PAGE_SIZE);
-#ifdef CONFIG_TIME_NS
- PROVIDE_HIDDEN(_timens_data = _vdso_data + PAGE_SIZE);
-#endif
- . = VDSO_LBASE + SIZEOF_HEADERS;
+ VDSO_VVAR_SYMS
+
+ . = SIZEOF_HEADERS;
.hash : { *(.hash) } :text
.gnu.hash : { *(.gnu.hash) }
diff --git a/arch/arm64/kernel/vmcore_info.c b/arch/arm64/kernel/vmcore_info.c
index b19d5d6cb8b3..9619ece66b79 100644
--- a/arch/arm64/kernel/vmcore_info.c
+++ b/arch/arm64/kernel/vmcore_info.c
@@ -14,7 +14,7 @@ static inline u64 get_tcr_el1_t1sz(void);
static inline u64 get_tcr_el1_t1sz(void)
{
- return (read_sysreg(tcr_el1) & TCR_T1SZ_MASK) >> TCR_T1SZ_OFFSET;
+ return (read_sysreg(tcr_el1) & TCR_EL1_T1SZ_MASK) >> TCR_EL1_T1SZ_SHIFT;
}
void arch_crash_save_vmcoreinfo(void)
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index 755a22d4f840..ad6133b89e7a 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -13,7 +13,7 @@
*(__kvm_ex_table) \
__stop___kvm_ex_table = .;
-#define HYPERVISOR_DATA_SECTIONS \
+#define HYPERVISOR_RODATA_SECTIONS \
HYP_SECTION_NAME(.rodata) : { \
. = ALIGN(PAGE_SIZE); \
__hyp_rodata_start = .; \
@@ -23,6 +23,15 @@
__hyp_rodata_end = .; \
}
+#define HYPERVISOR_DATA_SECTION \
+ HYP_SECTION_NAME(.data) : { \
+ . = ALIGN(PAGE_SIZE); \
+ __hyp_data_start = .; \
+ *(HYP_SECTION_NAME(.data)) \
+ . = ALIGN(PAGE_SIZE); \
+ __hyp_data_end = .; \
+ }
+
#define HYPERVISOR_PERCPU_SECTION \
. = ALIGN(PAGE_SIZE); \
HYP_SECTION_NAME(.data..percpu) : { \
@@ -51,7 +60,8 @@
#define SBSS_ALIGN PAGE_SIZE
#else /* CONFIG_KVM */
#define HYPERVISOR_EXTABLE
-#define HYPERVISOR_DATA_SECTIONS
+#define HYPERVISOR_RODATA_SECTIONS
+#define HYPERVISOR_DATA_SECTION
#define HYPERVISOR_PERCPU_SECTION
#define HYPERVISOR_RELOC_SECTION
#define SBSS_ALIGN 0
@@ -162,6 +172,7 @@ SECTIONS
/DISCARD/ : {
*(.interp .dynamic)
*(.dynsym .dynstr .hash .gnu.hash)
+ *(.ARM.attributes)
}
. = KIMAGE_VADDR;
@@ -189,7 +200,7 @@ SECTIONS
/* everything from this point to __init_begin will be marked RO NX */
RO_DATA(PAGE_SIZE)
- HYPERVISOR_DATA_SECTIONS
+ HYPERVISOR_RODATA_SECTIONS
.got : { *(.got) }
/*
@@ -248,9 +259,9 @@ SECTIONS
__inittext_end = .;
__initdata_begin = .;
- init_idmap_pg_dir = .;
+ __pi_init_idmap_pg_dir = .;
. += INIT_IDMAP_DIR_SIZE;
- init_idmap_pg_end = .;
+ __pi_init_idmap_pg_end = .;
.init.data : {
INIT_DATA
@@ -264,6 +275,8 @@ SECTIONS
EXIT_DATA
}
+ RUNTIME_CONST_VARIABLES
+
PERCPU_SECTION(L1_CACHE_BYTES)
HYPERVISOR_PERCPU_SECTION
@@ -285,10 +298,15 @@ SECTIONS
__initdata_end = .;
__init_end = .;
+ .data.rel.ro : { *(.data.rel.ro) }
+ ASSERT(SIZEOF(.data.rel.ro) == 0, "Unexpected RELRO detected!")
+
_data = .;
_sdata = .;
RW_DATA(L1_CACHE_BYTES, PAGE_SIZE, THREAD_ALIGN)
+ HYPERVISOR_DATA_SECTION
+
/*
* Data written with the MMU off but read with the MMU on requires
* cache lines to be invalidated, discarding up to a Cache Writeback
@@ -313,11 +331,12 @@ SECTIONS
/* start of zero-init region */
BSS_SECTION(SBSS_ALIGN, 0, 0)
+ __pi___bss_start = __bss_start;
. = ALIGN(PAGE_SIZE);
- init_pg_dir = .;
+ __pi_init_pg_dir = .;
. += INIT_DIR_SIZE;
- init_pg_end = .;
+ __pi_init_pg_end = .;
/* end of zero-init region */
. += SZ_4K; /* stack for the early C runtime */
@@ -326,6 +345,7 @@ SECTIONS
. = ALIGN(SEGMENT_ALIGN);
__pecoff_data_size = ABSOLUTE(. - __initdata_begin);
_end = .;
+ __pi__end = .;
STABS_DEBUG
DWARF_DEBUG
@@ -341,9 +361,6 @@ SECTIONS
*(.plt) *(.plt.*) *(.iplt) *(.igot .igot.plt)
}
ASSERT(SIZEOF(.plt) == 0, "Unexpected run-time procedure linkages detected!")
-
- .data.rel.ro : { *(.data.rel.ro) }
- ASSERT(SIZEOF(.data.rel.ro) == 0, "Unexpected RELRO detected!")
}
#include "image-vars.h"
diff --git a/arch/arm64/kernel/watchdog_hld.c b/arch/arm64/kernel/watchdog_hld.c
index dcd25322127c..3093037dcb7b 100644
--- a/arch/arm64/kernel/watchdog_hld.c
+++ b/arch/arm64/kernel/watchdog_hld.c
@@ -34,3 +34,61 @@ bool __init arch_perf_nmi_is_available(void)
*/
return arm_pmu_irq_is_nmi();
}
+
+static int watchdog_perf_update_period(void *data)
+{
+ int cpu = smp_processor_id();
+ u64 max_cpu_freq, new_period;
+
+ max_cpu_freq = cpufreq_get_hw_max_freq(cpu) * 1000UL;
+ if (!max_cpu_freq)
+ return 0;
+
+ new_period = watchdog_thresh * max_cpu_freq;
+ hardlockup_detector_perf_adjust_period(new_period);
+
+ return 0;
+}
+
+static int watchdog_freq_notifier_callback(struct notifier_block *nb,
+ unsigned long val, void *data)
+{
+ struct cpufreq_policy *policy = data;
+ int cpu;
+
+ if (val != CPUFREQ_CREATE_POLICY)
+ return NOTIFY_DONE;
+
+ /*
+ * Let each online CPU related to the policy update the period by their
+ * own. This will serialize with the framework on start/stop the lockup
+ * detector (softlockup_{start,stop}_all) and avoid potential race
+ * condition. Otherwise we may have below theoretical race condition:
+ * (core 0/1 share the same policy)
+ * [core 0] [core 1]
+ * hardlockup_detector_event_create()
+ * hw_nmi_get_sample_period()
+ * (cpufreq registered, notifier callback invoked)
+ * watchdog_freq_notifier_callback()
+ * watchdog_perf_update_period()
+ * (since core 1's event's not yet created,
+ * the period is not set)
+ * perf_event_create_kernel_counter()
+ * (event's period is SAFE_MAX_CPU_FREQ)
+ */
+ for_each_cpu(cpu, policy->cpus)
+ smp_call_on_cpu(cpu, watchdog_perf_update_period, NULL, false);
+
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block watchdog_freq_notifier = {
+ .notifier_call = watchdog_freq_notifier_callback,
+};
+
+static int __init init_watchdog_freq_notifier(void)
+{
+ return cpufreq_register_notifier(&watchdog_freq_notifier,
+ CPUFREQ_POLICY_NOTIFIER);
+}
+core_initcall(init_watchdog_freq_notifier);