summaryrefslogtreecommitdiff
path: root/arch/arm64/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arm64/kernel')
-rw-r--r--arch/arm64/kernel/Makefile17
-rw-r--r--arch/arm64/kernel/Makefile.syscalls6
-rw-r--r--arch/arm64/kernel/acpi.c139
-rw-r--r--arch/arm64/kernel/acpi_numa.c13
-rw-r--r--arch/arm64/kernel/armv8_deprecated.c5
-rw-r--r--arch/arm64/kernel/asm-offsets.c65
-rw-r--r--arch/arm64/kernel/cacheinfo.c12
-rw-r--r--arch/arm64/kernel/cpu_errata.c52
-rw-r--r--arch/arm64/kernel/cpufeature.c575
-rw-r--r--arch/arm64/kernel/cpuidle.c74
-rw-r--r--arch/arm64/kernel/cpuinfo.c51
-rw-r--r--arch/arm64/kernel/debug-monitors.c14
-rw-r--r--arch/arm64/kernel/efi.c19
-rw-r--r--arch/arm64/kernel/entry-common.c73
-rw-r--r--arch/arm64/kernel/entry-ftrace.S32
-rw-r--r--arch/arm64/kernel/entry.S16
-rw-r--r--arch/arm64/kernel/fpsimd.c96
-rw-r--r--arch/arm64/kernel/ftrace.c88
-rw-r--r--arch/arm64/kernel/head.S620
-rw-r--r--arch/arm64/kernel/hibernate.c8
-rw-r--r--arch/arm64/kernel/hw_breakpoint.c7
-rw-r--r--arch/arm64/kernel/hyp-stub.S4
-rw-r--r--arch/arm64/kernel/image-vars.h41
-rw-r--r--arch/arm64/kernel/io.c113
-rw-r--r--arch/arm64/kernel/jump_label.c13
-rw-r--r--arch/arm64/kernel/kaslr.c4
-rw-r--r--arch/arm64/kernel/kgdb.c2
-rw-r--r--arch/arm64/kernel/machine_kexec.c33
-rw-r--r--arch/arm64/kernel/machine_kexec_file.c10
-rw-r--r--arch/arm64/kernel/module.c136
-rw-r--r--arch/arm64/kernel/mte.c41
-rw-r--r--arch/arm64/kernel/patching.c98
-rw-r--r--arch/arm64/kernel/pci.c191
-rw-r--r--arch/arm64/kernel/perf_callchain.c146
-rw-r--r--arch/arm64/kernel/pi/.gitignore3
-rw-r--r--arch/arm64/kernel/pi/Makefile31
-rw-r--r--arch/arm64/kernel/pi/idreg-override.c (renamed from arch/arm64/kernel/idreg-override.c)105
-rw-r--r--arch/arm64/kernel/pi/kaslr_early.c78
-rw-r--r--arch/arm64/kernel/pi/map_kernel.c259
-rw-r--r--arch/arm64/kernel/pi/map_range.c105
-rw-r--r--arch/arm64/kernel/pi/patch-scs.c (renamed from arch/arm64/kernel/patch-scs.c)125
-rw-r--r--arch/arm64/kernel/pi/pi.h36
-rw-r--r--arch/arm64/kernel/pi/relacheck.c130
-rw-r--r--arch/arm64/kernel/pi/relocate.c64
-rw-r--r--arch/arm64/kernel/probes/decode-insn.c38
-rw-r--r--arch/arm64/kernel/probes/decode-insn.h2
-rw-r--r--arch/arm64/kernel/probes/kprobes.c69
-rw-r--r--arch/arm64/kernel/probes/kprobes_trampoline.S78
-rw-r--r--arch/arm64/kernel/probes/simulate-insn.c24
-rw-r--r--arch/arm64/kernel/probes/simulate-insn.h1
-rw-r--r--arch/arm64/kernel/probes/uprobes.c16
-rw-r--r--arch/arm64/kernel/process.c206
-rw-r--r--arch/arm64/kernel/proton-pack.c12
-rw-r--r--arch/arm64/kernel/psci.c2
-rw-r--r--arch/arm64/kernel/ptrace.c197
-rw-r--r--arch/arm64/kernel/reloc_test_core.c1
-rw-r--r--arch/arm64/kernel/rsi.c157
-rw-r--r--arch/arm64/kernel/setup.c50
-rw-r--r--arch/arm64/kernel/signal.c500
-rw-r--r--arch/arm64/kernel/signal32.c4
-rw-r--r--arch/arm64/kernel/sigreturn32.S18
-rw-r--r--arch/arm64/kernel/sleep.S3
-rw-r--r--arch/arm64/kernel/smccc-call.S35
-rw-r--r--arch/arm64/kernel/smp.c243
-rw-r--r--arch/arm64/kernel/stacktrace.c310
-rw-r--r--arch/arm64/kernel/sys.c6
-rw-r--r--arch/arm64/kernel/sys32.c17
-rw-r--r--arch/arm64/kernel/syscall.c24
-rw-r--r--arch/arm64/kernel/topology.c22
-rw-r--r--arch/arm64/kernel/trace-events-emulation.h2
-rw-r--r--arch/arm64/kernel/traps.c54
-rw-r--r--arch/arm64/kernel/vdso.c55
-rw-r--r--arch/arm64/kernel/vdso/Makefile37
-rw-r--r--arch/arm64/kernel/vdso/vdso.lds.S7
-rw-r--r--arch/arm64/kernel/vdso/vgetrandom-chacha.S172
-rw-r--r--arch/arm64/kernel/vdso/vgetrandom.c15
-rw-r--r--arch/arm64/kernel/vdso32/Makefile4
-rw-r--r--arch/arm64/kernel/vdso32/vdso.lds.S2
-rw-r--r--arch/arm64/kernel/vmcore_info.c (renamed from arch/arm64/kernel/crash_core.c)3
-rw-r--r--arch/arm64/kernel/vmlinux.lds.S26
80 files changed, 3868 insertions, 2294 deletions
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 467cb7117273..71c29a2a2f19 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -33,8 +33,8 @@ obj-y := debug-monitors.o entry.o irq.o fpsimd.o \
return_address.o cpuinfo.o cpu_errata.o \
cpufeature.o alternative.o cacheinfo.o \
smp.o smp_spin_table.o topology.o smccc-call.o \
- syscall.o proton-pack.o idreg-override.o idle.o \
- patching.o
+ syscall.o proton-pack.o idle.o patching.o pi/ \
+ rsi.o
obj-$(CONFIG_COMPAT) += sys32.o signal32.o \
sys_compat.o
@@ -47,7 +47,6 @@ obj-$(CONFIG_PERF_EVENTS) += perf_regs.o perf_callchain.o
obj-$(CONFIG_HARDLOCKUP_DETECTOR_PERF) += watchdog_hld.o
obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
obj-$(CONFIG_CPU_PM) += sleep.o suspend.o
-obj-$(CONFIG_CPU_IDLE) += cpuidle.o
obj-$(CONFIG_JUMP_LABEL) += jump_label.o
obj-$(CONFIG_KGDB) += kgdb.o
obj-$(CONFIG_EFI) += efi.o efi-rt-wrapper.o
@@ -57,7 +56,7 @@ obj-$(CONFIG_ACPI) += acpi.o
obj-$(CONFIG_ACPI_NUMA) += acpi_numa.o
obj-$(CONFIG_ARM64_ACPI_PARKING_PROTOCOL) += acpi_parking_protocol.o
obj-$(CONFIG_PARAVIRT) += paravirt.o
-obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o pi/
+obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
obj-$(CONFIG_HIBERNATION) += hibernate.o hibernate-asm.o
obj-$(CONFIG_ELF_CORE) += elfcore.o
obj-$(CONFIG_KEXEC_CORE) += machine_kexec.o relocate_kernel.o \
@@ -66,20 +65,12 @@ obj-$(CONFIG_KEXEC_FILE) += machine_kexec_file.o kexec_image.o
obj-$(CONFIG_ARM64_RELOC_TEST) += arm64-reloc-test.o
arm64-reloc-test-y := reloc_test_core.o reloc_test_syms.o
obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
-obj-$(CONFIG_CRASH_CORE) += crash_core.o
+obj-$(CONFIG_VMCORE_INFO) += vmcore_info.o
obj-$(CONFIG_ARM_SDE_INTERFACE) += sdei.o
obj-$(CONFIG_ARM64_PTR_AUTH) += pointer_auth.o
obj-$(CONFIG_ARM64_MTE) += mte.o
obj-y += vdso-wrap.o
obj-$(CONFIG_COMPAT_VDSO) += vdso32-wrap.o
-obj-$(CONFIG_UNWIND_PATCH_PAC_INTO_SCS) += patch-scs.o
-
-# We need to prevent the SCS patching code from patching itself. Using
-# -mbranch-protection=none here to avoid the patchable PAC opcodes from being
-# generated triggers an issue with full LTO on Clang, which stops emitting PAC
-# instructions altogether. So disable LTO as well for the compilation unit.
-CFLAGS_patch-scs.o += -mbranch-protection=none
-CFLAGS_REMOVE_patch-scs.o += $(CC_FLAGS_LTO)
# Force dependency (vdso*-wrap.S includes vdso.so through incbin)
$(obj)/vdso-wrap.o: $(obj)/vdso/vdso.so
diff --git a/arch/arm64/kernel/Makefile.syscalls b/arch/arm64/kernel/Makefile.syscalls
new file mode 100644
index 000000000000..0542a718871a
--- /dev/null
+++ b/arch/arm64/kernel/Makefile.syscalls
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0
+
+syscall_abis_32 +=
+syscall_abis_64 += renameat rlimit memfd_secret
+
+syscalltbl = arch/arm64/tools/syscall_%.tbl
diff --git a/arch/arm64/kernel/acpi.c b/arch/arm64/kernel/acpi.c
index dba8fcec7f33..e6f66491fbe9 100644
--- a/arch/arm64/kernel/acpi.c
+++ b/arch/arm64/kernel/acpi.c
@@ -26,9 +26,11 @@
#include <linux/libfdt.h>
#include <linux/smp.h>
#include <linux/serial_core.h>
+#include <linux/suspend.h>
#include <linux/pgtable.h>
#include <acpi/ghes.h>
+#include <acpi/processor.h>
#include <asm/cputype.h>
#include <asm/cpu_ops.h>
#include <asm/daifflags.h>
@@ -44,6 +46,7 @@ EXPORT_SYMBOL(acpi_pci_disabled);
static bool param_acpi_off __initdata;
static bool param_acpi_on __initdata;
static bool param_acpi_force __initdata;
+static bool param_acpi_nospcr __initdata;
static int __init parse_acpi(char *arg)
{
@@ -57,6 +60,8 @@ static int __init parse_acpi(char *arg)
param_acpi_on = true;
else if (strcmp(arg, "force") == 0) /* force ACPI to be enabled */
param_acpi_force = true;
+ else if (strcmp(arg, "nospcr") == 0) /* disable SPCR as default console */
+ param_acpi_nospcr = true;
else
return -EINVAL; /* Core will print when we return error */
@@ -227,7 +232,29 @@ done:
if (earlycon_acpi_spcr_enable)
early_init_dt_scan_chosen_stdout();
} else {
- acpi_parse_spcr(earlycon_acpi_spcr_enable, true);
+#ifdef CONFIG_HIBERNATION
+ struct acpi_table_header *facs = NULL;
+ acpi_get_table(ACPI_SIG_FACS, 1, &facs);
+ if (facs) {
+ swsusp_hardware_signature =
+ ((struct acpi_table_facs *)facs)->hardware_signature;
+ acpi_put_table(facs);
+ }
+#endif
+
+ /*
+ * For varying privacy and security reasons, sometimes need
+ * to completely silence the serial console output, and only
+ * enable it when needed.
+ * But there are many existing systems that depend on this
+ * behaviour, use acpi=nospcr to disable console in ACPI SPCR
+ * table as default serial console.
+ */
+ acpi_parse_spcr(earlycon_acpi_spcr_enable,
+ !param_acpi_nospcr);
+ pr_info("Use ACPI SPCR as default console: %s\n",
+ param_acpi_nospcr ? "No" : "Yes");
+
if (IS_ENABLED(CONFIG_ACPI_BGRT))
acpi_table_parse(ACPI_SIG_BGRT, acpi_parse_bgrt);
}
@@ -413,107 +440,23 @@ void arch_reserve_mem_area(acpi_physical_address addr, size_t size)
memblock_mark_nomap(addr, size);
}
-#ifdef CONFIG_ACPI_FFH
-/*
- * Implements ARM64 specific callbacks to support ACPI FFH Operation Region as
- * specified in https://developer.arm.com/docs/den0048/latest
- */
-struct acpi_ffh_data {
- struct acpi_ffh_info info;
- void (*invoke_ffh_fn)(unsigned long a0, unsigned long a1,
- unsigned long a2, unsigned long a3,
- unsigned long a4, unsigned long a5,
- unsigned long a6, unsigned long a7,
- struct arm_smccc_res *args,
- struct arm_smccc_quirk *res);
- void (*invoke_ffh64_fn)(const struct arm_smccc_1_2_regs *args,
- struct arm_smccc_1_2_regs *res);
-};
-
-int acpi_ffh_address_space_arch_setup(void *handler_ctxt, void **region_ctxt)
+#ifdef CONFIG_ACPI_HOTPLUG_CPU
+int acpi_map_cpu(acpi_handle handle, phys_cpuid_t physid, u32 apci_id,
+ int *pcpu)
{
- enum arm_smccc_conduit conduit;
- struct acpi_ffh_data *ffh_ctxt;
-
- if (arm_smccc_get_version() < ARM_SMCCC_VERSION_1_2)
- return -EOPNOTSUPP;
-
- conduit = arm_smccc_1_1_get_conduit();
- if (conduit == SMCCC_CONDUIT_NONE) {
- pr_err("%s: invalid SMCCC conduit\n", __func__);
- return -EOPNOTSUPP;
+ /* If an error code is passed in this stub can't fix it */
+ if (*pcpu < 0) {
+ pr_warn_once("Unable to map CPU to valid ID\n");
+ return *pcpu;
}
- ffh_ctxt = kzalloc(sizeof(*ffh_ctxt), GFP_KERNEL);
- if (!ffh_ctxt)
- return -ENOMEM;
-
- if (conduit == SMCCC_CONDUIT_SMC) {
- ffh_ctxt->invoke_ffh_fn = __arm_smccc_smc;
- ffh_ctxt->invoke_ffh64_fn = arm_smccc_1_2_smc;
- } else {
- ffh_ctxt->invoke_ffh_fn = __arm_smccc_hvc;
- ffh_ctxt->invoke_ffh64_fn = arm_smccc_1_2_hvc;
- }
-
- memcpy(ffh_ctxt, handler_ctxt, sizeof(ffh_ctxt->info));
-
- *region_ctxt = ffh_ctxt;
- return AE_OK;
-}
-
-static bool acpi_ffh_smccc_owner_allowed(u32 fid)
-{
- int owner = ARM_SMCCC_OWNER_NUM(fid);
-
- if (owner == ARM_SMCCC_OWNER_STANDARD ||
- owner == ARM_SMCCC_OWNER_SIP || owner == ARM_SMCCC_OWNER_OEM)
- return true;
-
- return false;
+ return 0;
}
+EXPORT_SYMBOL(acpi_map_cpu);
-int acpi_ffh_address_space_arch_handler(acpi_integer *value, void *region_context)
+int acpi_unmap_cpu(int cpu)
{
- int ret = 0;
- struct acpi_ffh_data *ffh_ctxt = region_context;
-
- if (ffh_ctxt->info.offset == 0) {
- /* SMC/HVC 32bit call */
- struct arm_smccc_res res;
- u32 a[8] = { 0 }, *ptr = (u32 *)value;
-
- if (!ARM_SMCCC_IS_FAST_CALL(*ptr) || ARM_SMCCC_IS_64(*ptr) ||
- !acpi_ffh_smccc_owner_allowed(*ptr) ||
- ffh_ctxt->info.length > 32) {
- ret = AE_ERROR;
- } else {
- int idx, len = ffh_ctxt->info.length >> 2;
-
- for (idx = 0; idx < len; idx++)
- a[idx] = *(ptr + idx);
-
- ffh_ctxt->invoke_ffh_fn(a[0], a[1], a[2], a[3], a[4],
- a[5], a[6], a[7], &res, NULL);
- memcpy(value, &res, sizeof(res));
- }
-
- } else if (ffh_ctxt->info.offset == 1) {
- /* SMC/HVC 64bit call */
- struct arm_smccc_1_2_regs *r = (struct arm_smccc_1_2_regs *)value;
-
- if (!ARM_SMCCC_IS_FAST_CALL(r->a0) || !ARM_SMCCC_IS_64(r->a0) ||
- !acpi_ffh_smccc_owner_allowed(r->a0) ||
- ffh_ctxt->info.length > sizeof(*r)) {
- ret = AE_ERROR;
- } else {
- ffh_ctxt->invoke_ffh64_fn(r, r);
- memcpy(value, r, ffh_ctxt->info.length);
- }
- } else {
- ret = AE_ERROR;
- }
-
- return ret;
+ return 0;
}
-#endif /* CONFIG_ACPI_FFH */
+EXPORT_SYMBOL(acpi_unmap_cpu);
+#endif /* CONFIG_ACPI_HOTPLUG_CPU */
diff --git a/arch/arm64/kernel/acpi_numa.c b/arch/arm64/kernel/acpi_numa.c
index e51535a5f939..2465f291c7e1 100644
--- a/arch/arm64/kernel/acpi_numa.c
+++ b/arch/arm64/kernel/acpi_numa.c
@@ -27,24 +27,13 @@
#include <asm/numa.h>
-static int acpi_early_node_map[NR_CPUS] __initdata = { NUMA_NO_NODE };
+static int acpi_early_node_map[NR_CPUS] __initdata = { [0 ... NR_CPUS - 1] = NUMA_NO_NODE };
int __init acpi_numa_get_nid(unsigned int cpu)
{
return acpi_early_node_map[cpu];
}
-static inline int get_cpu_for_acpi_id(u32 uid)
-{
- int cpu;
-
- for (cpu = 0; cpu < nr_cpu_ids; cpu++)
- if (uid == get_acpi_id_for_cpu(cpu))
- return cpu;
-
- return -EINVAL;
-}
-
static int __init acpi_parse_gicc_pxm(union acpi_subtable_headers *header,
const unsigned long end)
{
diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c
index dd6ce86d4332..e737c6295ec7 100644
--- a/arch/arm64/kernel/armv8_deprecated.c
+++ b/arch/arm64/kernel/armv8_deprecated.c
@@ -462,6 +462,9 @@ static int run_all_insn_set_hw_mode(unsigned int cpu)
for (int i = 0; i < ARRAY_SIZE(insn_emulations); i++) {
struct insn_emulation *insn = insn_emulations[i];
bool enable = READ_ONCE(insn->current_mode) == INSN_HW;
+ if (insn->status == INSN_UNAVAILABLE)
+ continue;
+
if (insn->set_hw_mode && insn->set_hw_mode(enable)) {
pr_warn("CPU[%u] cannot support the emulation of %s",
cpu, insn->name);
@@ -504,7 +507,7 @@ static int update_insn_emulation_mode(struct insn_emulation *insn,
return ret;
}
-static int emulation_proc_handler(struct ctl_table *table, int write,
+static int emulation_proc_handler(const struct ctl_table *table, int write,
void *buffer, size_t *lenp,
loff_t *ppos)
{
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 5a7dbbe0ce63..eb1a840e4110 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -12,15 +12,12 @@
#include <linux/ftrace.h>
#include <linux/kexec.h>
#include <linux/mm.h>
-#include <linux/dma-mapping.h>
#include <linux/kvm_host.h>
-#include <linux/preempt.h>
#include <linux/suspend.h>
#include <asm/cpufeature.h>
#include <asm/fixmap.h>
#include <asm/thread_info.h>
#include <asm/memory.h>
-#include <asm/signal32.h>
#include <asm/smp_plat.h>
#include <asm/suspend.h>
#include <linux/kbuild.h>
@@ -28,8 +25,6 @@
int main(void)
{
- DEFINE(TSK_ACTIVE_MM, offsetof(struct task_struct, active_mm));
- BLANK();
DEFINE(TSK_TI_CPU, offsetof(struct task_struct, thread_info.cpu));
DEFINE(TSK_TI_FLAGS, offsetof(struct task_struct, thread_info.flags));
DEFINE(TSK_TI_PREEMPT, offsetof(struct task_struct, thread_info.preempt_count));
@@ -75,49 +70,31 @@ int main(void)
DEFINE(S_FP, offsetof(struct pt_regs, regs[29]));
DEFINE(S_LR, offsetof(struct pt_regs, regs[30]));
DEFINE(S_SP, offsetof(struct pt_regs, sp));
- DEFINE(S_PSTATE, offsetof(struct pt_regs, pstate));
DEFINE(S_PC, offsetof(struct pt_regs, pc));
+ DEFINE(S_PSTATE, offsetof(struct pt_regs, pstate));
DEFINE(S_SYSCALLNO, offsetof(struct pt_regs, syscallno));
DEFINE(S_SDEI_TTBR1, offsetof(struct pt_regs, sdei_ttbr1));
- DEFINE(S_PMR_SAVE, offsetof(struct pt_regs, pmr_save));
+ DEFINE(S_PMR, offsetof(struct pt_regs, pmr));
DEFINE(S_STACKFRAME, offsetof(struct pt_regs, stackframe));
+ DEFINE(S_STACKFRAME_TYPE, offsetof(struct pt_regs, stackframe.type));
DEFINE(PT_REGS_SIZE, sizeof(struct pt_regs));
BLANK();
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_ARGS
- DEFINE(FREGS_X0, offsetof(struct ftrace_regs, regs[0]));
- DEFINE(FREGS_X2, offsetof(struct ftrace_regs, regs[2]));
- DEFINE(FREGS_X4, offsetof(struct ftrace_regs, regs[4]));
- DEFINE(FREGS_X6, offsetof(struct ftrace_regs, regs[6]));
- DEFINE(FREGS_X8, offsetof(struct ftrace_regs, regs[8]));
- DEFINE(FREGS_FP, offsetof(struct ftrace_regs, fp));
- DEFINE(FREGS_LR, offsetof(struct ftrace_regs, lr));
- DEFINE(FREGS_SP, offsetof(struct ftrace_regs, sp));
- DEFINE(FREGS_PC, offsetof(struct ftrace_regs, pc));
+ DEFINE(FREGS_X0, offsetof(struct __arch_ftrace_regs, regs[0]));
+ DEFINE(FREGS_X2, offsetof(struct __arch_ftrace_regs, regs[2]));
+ DEFINE(FREGS_X4, offsetof(struct __arch_ftrace_regs, regs[4]));
+ DEFINE(FREGS_X6, offsetof(struct __arch_ftrace_regs, regs[6]));
+ DEFINE(FREGS_X8, offsetof(struct __arch_ftrace_regs, regs[8]));
+ DEFINE(FREGS_FP, offsetof(struct __arch_ftrace_regs, fp));
+ DEFINE(FREGS_LR, offsetof(struct __arch_ftrace_regs, lr));
+ DEFINE(FREGS_SP, offsetof(struct __arch_ftrace_regs, sp));
+ DEFINE(FREGS_PC, offsetof(struct __arch_ftrace_regs, pc));
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
- DEFINE(FREGS_DIRECT_TRAMP, offsetof(struct ftrace_regs, direct_tramp));
+ DEFINE(FREGS_DIRECT_TRAMP, offsetof(struct __arch_ftrace_regs, direct_tramp));
#endif
- DEFINE(FREGS_SIZE, sizeof(struct ftrace_regs));
+ DEFINE(FREGS_SIZE, sizeof(struct __arch_ftrace_regs));
BLANK();
#endif
-#ifdef CONFIG_COMPAT
- DEFINE(COMPAT_SIGFRAME_REGS_OFFSET, offsetof(struct compat_sigframe, uc.uc_mcontext.arm_r0));
- DEFINE(COMPAT_RT_SIGFRAME_REGS_OFFSET, offsetof(struct compat_rt_sigframe, sig.uc.uc_mcontext.arm_r0));
- BLANK();
-#endif
- DEFINE(MM_CONTEXT_ID, offsetof(struct mm_struct, context.id.counter));
- BLANK();
- DEFINE(VMA_VM_MM, offsetof(struct vm_area_struct, vm_mm));
- DEFINE(VMA_VM_FLAGS, offsetof(struct vm_area_struct, vm_flags));
- BLANK();
- DEFINE(VM_EXEC, VM_EXEC);
- BLANK();
- DEFINE(PAGE_SZ, PAGE_SIZE);
- BLANK();
- DEFINE(DMA_TO_DEVICE, DMA_TO_DEVICE);
- DEFINE(DMA_FROM_DEVICE, DMA_FROM_DEVICE);
- BLANK();
- DEFINE(PREEMPT_DISABLE_OFFSET, PREEMPT_DISABLE_OFFSET);
- BLANK();
DEFINE(CPU_BOOT_TASK, offsetof(struct secondary_data, task));
BLANK();
DEFINE(FTR_OVR_VAL_OFFSET, offsetof(struct arm64_ftr_override, val));
@@ -128,6 +105,7 @@ int main(void)
DEFINE(VCPU_FAULT_DISR, offsetof(struct kvm_vcpu, arch.fault.disr_el1));
DEFINE(VCPU_HCR_EL2, offsetof(struct kvm_vcpu, arch.hcr_el2));
DEFINE(CPU_USER_PT_REGS, offsetof(struct kvm_cpu_context, regs));
+ DEFINE(CPU_ELR_EL2, offsetof(struct kvm_cpu_context, sys_regs[ELR_EL2]));
DEFINE(CPU_RGSR_EL1, offsetof(struct kvm_cpu_context, sys_regs[RGSR_EL1]));
DEFINE(CPU_GCR_EL1, offsetof(struct kvm_cpu_context, sys_regs[GCR_EL1]));
DEFINE(CPU_APIAKEYLO_EL1, offsetof(struct kvm_cpu_context, sys_regs[APIAKEYLO_EL1]));
@@ -145,6 +123,7 @@ int main(void)
DEFINE(NVHE_INIT_HCR_EL2, offsetof(struct kvm_nvhe_init_params, hcr_el2));
DEFINE(NVHE_INIT_VTTBR, offsetof(struct kvm_nvhe_init_params, vttbr));
DEFINE(NVHE_INIT_VTCR, offsetof(struct kvm_nvhe_init_params, vtcr));
+ DEFINE(NVHE_INIT_TMP, offsetof(struct kvm_nvhe_init_params, tmp));
#endif
#ifdef CONFIG_CPU_PM
DEFINE(CPU_CTX_SP, offsetof(struct cpu_suspend_ctx, sp));
@@ -200,18 +179,6 @@ int main(void)
DEFINE(FTRACE_OPS_FUNC, offsetof(struct ftrace_ops, func));
#endif
BLANK();
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
- DEFINE(FGRET_REGS_X0, offsetof(struct fgraph_ret_regs, regs[0]));
- DEFINE(FGRET_REGS_X1, offsetof(struct fgraph_ret_regs, regs[1]));
- DEFINE(FGRET_REGS_X2, offsetof(struct fgraph_ret_regs, regs[2]));
- DEFINE(FGRET_REGS_X3, offsetof(struct fgraph_ret_regs, regs[3]));
- DEFINE(FGRET_REGS_X4, offsetof(struct fgraph_ret_regs, regs[4]));
- DEFINE(FGRET_REGS_X5, offsetof(struct fgraph_ret_regs, regs[5]));
- DEFINE(FGRET_REGS_X6, offsetof(struct fgraph_ret_regs, regs[6]));
- DEFINE(FGRET_REGS_X7, offsetof(struct fgraph_ret_regs, regs[7]));
- DEFINE(FGRET_REGS_FP, offsetof(struct fgraph_ret_regs, fp));
- DEFINE(FGRET_REGS_SIZE, sizeof(struct fgraph_ret_regs));
-#endif
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
DEFINE(FTRACE_OPS_DIRECT_CALL, offsetof(struct ftrace_ops, direct_call));
#endif
diff --git a/arch/arm64/kernel/cacheinfo.c b/arch/arm64/kernel/cacheinfo.c
index d9c9218fa1fd..309942b06c5b 100644
--- a/arch/arm64/kernel/cacheinfo.c
+++ b/arch/arm64/kernel/cacheinfo.c
@@ -101,16 +101,18 @@ int populate_cache_leaves(unsigned int cpu)
unsigned int level, idx;
enum cache_type type;
struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
- struct cacheinfo *this_leaf = this_cpu_ci->info_list;
+ struct cacheinfo *infos = this_cpu_ci->info_list;
for (idx = 0, level = 1; level <= this_cpu_ci->num_levels &&
- idx < this_cpu_ci->num_leaves; idx++, level++) {
+ idx < this_cpu_ci->num_leaves; level++) {
type = get_cache_type(level);
if (type == CACHE_TYPE_SEPARATE) {
- ci_leaf_init(this_leaf++, CACHE_TYPE_DATA, level);
- ci_leaf_init(this_leaf++, CACHE_TYPE_INST, level);
+ if (idx + 1 >= this_cpu_ci->num_leaves)
+ break;
+ ci_leaf_init(&infos[idx++], CACHE_TYPE_DATA, level);
+ ci_leaf_init(&infos[idx++], CACHE_TYPE_INST, level);
} else {
- ci_leaf_init(this_leaf++, type, level);
+ ci_leaf_init(&infos[idx++], type, level);
}
}
return 0;
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index 76b8dd37092a..7ce555862895 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -432,6 +432,41 @@ static const struct midr_range erratum_spec_unpriv_load_list[] = {
};
#endif
+#ifdef CONFIG_ARM64_ERRATUM_3194386
+static const struct midr_range erratum_spec_ssbs_list[] = {
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A76),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A77),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A78),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A78C),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A710),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A715),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A720),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A725),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_X1),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_X1C),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_X2),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_X3),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_X4),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_X925),
+ MIDR_ALL_VERSIONS(MIDR_MICROSOFT_AZURE_COBALT_100),
+ MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N1),
+ MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2),
+ MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N3),
+ MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V1),
+ MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V2),
+ MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V3),
+ {}
+};
+#endif
+
+#ifdef CONFIG_AMPERE_ERRATUM_AC03_CPU_38
+static const struct midr_range erratum_ac03_cpu_38_list[] = {
+ MIDR_ALL_VERSIONS(MIDR_AMPERE1),
+ MIDR_ALL_VERSIONS(MIDR_AMPERE1A),
+ {},
+};
+#endif
+
const struct arm64_cpu_capabilities arm64_errata[] = {
#ifdef CONFIG_ARM64_WORKAROUND_CLEAN_CACHE
{
@@ -729,6 +764,13 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
MIDR_FIXED(MIDR_CPU_VAR_REV(1,1), BIT(25)),
},
#endif
+#ifdef CONFIG_ARM64_ERRATUM_3194386
+ {
+ .desc = "SSBS not fully self-synchronizing",
+ .capability = ARM64_WORKAROUND_SPECULATIVE_SSBS,
+ ERRATA_MIDR_RANGE_LIST(erratum_spec_ssbs_list),
+ },
+#endif
#ifdef CONFIG_ARM64_WORKAROUND_SPECULATIVE_UNPRIV_LOAD
{
.desc = "ARM errata 2966298, 3117295",
@@ -741,9 +783,17 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
{
.desc = "AmpereOne erratum AC03_CPU_38",
.capability = ARM64_WORKAROUND_AMPERE_AC03_CPU_38,
- ERRATA_MIDR_ALL_VERSIONS(MIDR_AMPERE1),
+ ERRATA_MIDR_RANGE_LIST(erratum_ac03_cpu_38_list),
},
#endif
{
+ .desc = "Broken CNTVOFF_EL2",
+ .capability = ARM64_WORKAROUND_QCOM_ORYON_CNTVOFF,
+ ERRATA_MIDR_RANGE_LIST(((const struct midr_range[]) {
+ MIDR_ALL_VERSIONS(MIDR_QCOM_ORYON_X1),
+ {}
+ })),
+ },
+ {
}
};
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 8d1a634a403e..d561cf3b8ac7 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -75,6 +75,7 @@
#include <linux/cpu.h>
#include <linux/kasan.h>
#include <linux/percpu.h>
+#include <linux/sched/isolation.h>
#include <asm/cpu.h>
#include <asm/cpufeature.h>
@@ -103,6 +104,7 @@ static DECLARE_BITMAP(elf_hwcap, MAX_CPU_FEATURES) __read_mostly;
COMPAT_HWCAP_LPAE)
unsigned int compat_elf_hwcap __read_mostly = COMPAT_ELF_HWCAP_DEFAULT;
unsigned int compat_elf_hwcap2 __read_mostly;
+unsigned int compat_elf_hwcap3 __read_mostly;
#endif
DECLARE_BITMAP(system_cpucaps, ARM64_NCAPS);
@@ -140,12 +142,42 @@ void dump_cpu_features(void)
pr_emerg("0x%*pb\n", ARM64_NCAPS, &system_cpucaps);
}
+#define __ARM64_MAX_POSITIVE(reg, field) \
+ ((reg##_##field##_SIGNED ? \
+ BIT(reg##_##field##_WIDTH - 1) : \
+ BIT(reg##_##field##_WIDTH)) - 1)
+
+#define __ARM64_MIN_NEGATIVE(reg, field) BIT(reg##_##field##_WIDTH - 1)
+
+#define __ARM64_CPUID_FIELDS(reg, field, min_value, max_value) \
+ .sys_reg = SYS_##reg, \
+ .field_pos = reg##_##field##_SHIFT, \
+ .field_width = reg##_##field##_WIDTH, \
+ .sign = reg##_##field##_SIGNED, \
+ .min_field_value = min_value, \
+ .max_field_value = max_value,
+
+/*
+ * ARM64_CPUID_FIELDS() encodes a field with a range from min_value to
+ * an implicit maximum that depends on the sign-ess of the field.
+ *
+ * An unsigned field will be capped at all ones, while a signed field
+ * will be limited to the positive half only.
+ */
#define ARM64_CPUID_FIELDS(reg, field, min_value) \
- .sys_reg = SYS_##reg, \
- .field_pos = reg##_##field##_SHIFT, \
- .field_width = reg##_##field##_WIDTH, \
- .sign = reg##_##field##_SIGNED, \
- .min_field_value = reg##_##field##_##min_value,
+ __ARM64_CPUID_FIELDS(reg, field, \
+ SYS_FIELD_VALUE(reg, field, min_value), \
+ __ARM64_MAX_POSITIVE(reg, field))
+
+/*
+ * ARM64_CPUID_FIELDS_NEG() encodes a field with a range from an
+ * implicit minimal value to max_value. This should be used when
+ * matching a non-implemented property.
+ */
+#define ARM64_CPUID_FIELDS_NEG(reg, field, max_value) \
+ __ARM64_CPUID_FIELDS(reg, field, \
+ __ARM64_MIN_NEGATIVE(reg, field), \
+ SYS_FIELD_VALUE(reg, field, max_value))
#define __ARM64_FTR_BITS(SIGNED, VISIBLE, STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) \
{ \
@@ -198,6 +230,7 @@ static const struct arm64_ftr_bits ftr_id_aa64isar0[] = {
};
static const struct arm64_ftr_bits ftr_id_aa64isar1[] = {
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_XS_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_I8MM_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_DGH_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_BF16_SHIFT, 4, 0),
@@ -220,6 +253,7 @@ static const struct arm64_ftr_bits ftr_id_aa64isar1[] = {
};
static const struct arm64_ftr_bits ftr_id_aa64isar2[] = {
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_EL1_LUT_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_EL1_CSSC_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_EL1_RPRFM_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_EL1_CLRBHB_SHIFT, 4, 0),
@@ -234,6 +268,12 @@ static const struct arm64_ftr_bits ftr_id_aa64isar2[] = {
ARM64_FTR_END,
};
+static const struct arm64_ftr_bits ftr_id_aa64isar3[] = {
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64ISAR3_EL1_FPRCVT_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64ISAR3_EL1_FAMINMAX_SHIFT, 4, 0),
+ ARM64_FTR_END,
+};
+
static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = {
ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_CSV3_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_CSV2_SHIFT, 4, 0),
@@ -249,12 +289,14 @@ static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = {
S_ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_FP_SHIFT, 4, ID_AA64PFR0_EL1_FP_NI),
ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_EL3_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_EL2_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_EL1_SHIFT, 4, ID_AA64PFR0_EL1_ELx_64BIT_ONLY),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_EL0_SHIFT, 4, ID_AA64PFR0_EL1_ELx_64BIT_ONLY),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_EL1_SHIFT, 4, ID_AA64PFR0_EL1_EL1_IMP),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_EL0_SHIFT, 4, ID_AA64PFR0_EL1_EL0_IMP),
ARM64_FTR_END,
};
static const struct arm64_ftr_bits ftr_id_aa64pfr1[] = {
+ ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_GCS),
+ FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_EL1_GCS_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_EL1_SME_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_EL1_MPAM_frac_SHIFT, 4, 0),
@@ -267,12 +309,19 @@ static const struct arm64_ftr_bits ftr_id_aa64pfr1[] = {
ARM64_FTR_END,
};
+static const struct arm64_ftr_bits ftr_id_aa64pfr2[] = {
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR2_EL1_FPMR_SHIFT, 4, 0),
+ ARM64_FTR_END,
+};
+
static const struct arm64_ftr_bits ftr_id_aa64zfr0[] = {
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_EL1_F64MM_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_EL1_F32MM_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
+ FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_EL1_F16MM_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_EL1_I8MM_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_EL1_SM4_SHIFT, 4, 0),
@@ -285,6 +334,8 @@ static const struct arm64_ftr_bits ftr_id_aa64zfr0[] = {
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_EL1_BitPerm_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
+ FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_EL1_EltPerm_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_EL1_AES_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_EL1_SVEver_SHIFT, 4, 0),
@@ -295,6 +346,8 @@ static const struct arm64_ftr_bits ftr_id_aa64smfr0[] = {
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_FA64_SHIFT, 1, 0),
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
+ FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_LUTv2_SHIFT, 1, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_SMEver_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_I16I64_SHIFT, 4, 0),
@@ -307,6 +360,10 @@ static const struct arm64_ftr_bits ftr_id_aa64smfr0[] = {
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_F16F16_SHIFT, 1, 0),
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
+ FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_F8F16_SHIFT, 1, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
+ FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_F8F32_SHIFT, 1, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_I8I32_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_F16F32_SHIFT, 1, 0),
@@ -316,6 +373,34 @@ static const struct arm64_ftr_bits ftr_id_aa64smfr0[] = {
FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_BI32I32_SHIFT, 1, 0),
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_F32F32_SHIFT, 1, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
+ FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_SF8FMA_SHIFT, 1, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
+ FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_SF8DP4_SHIFT, 1, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
+ FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_SF8DP2_SHIFT, 1, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
+ FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_SBitPerm_SHIFT, 1, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
+ FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_AES_SHIFT, 1, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
+ FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_SFEXPA_SHIFT, 1, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
+ FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_STMOP_SHIFT, 1, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
+ FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_SMOP4_SHIFT, 1, 0),
+ ARM64_FTR_END,
+};
+
+static const struct arm64_ftr_bits ftr_id_aa64fpfr0[] = {
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64FPFR0_EL1_F8CVT_SHIFT, 1, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64FPFR0_EL1_F8FMA_SHIFT, 1, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64FPFR0_EL1_F8DP4_SHIFT, 1, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64FPFR0_EL1_F8DP2_SHIFT, 1, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64FPFR0_EL1_F8MM8_SHIFT, 1, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64FPFR0_EL1_F8MM4_SHIFT, 1, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64FPFR0_EL1_F8E4M3_SHIFT, 1, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64FPFR0_EL1_F8E5M2_SHIFT, 1, 0),
ARM64_FTR_END,
};
@@ -366,6 +451,7 @@ static const struct arm64_ftr_bits ftr_id_aa64mmfr0[] = {
};
static const struct arm64_ftr_bits ftr_id_aa64mmfr1[] = {
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_ECBHB_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_TIDCP1_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_AFP_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_HCX_SHIFT, 4, 0),
@@ -402,11 +488,18 @@ static const struct arm64_ftr_bits ftr_id_aa64mmfr2[] = {
};
static const struct arm64_ftr_bits ftr_id_aa64mmfr3[] = {
+ ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_POE),
+ FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR3_EL1_S1POE_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR3_EL1_S1PIE_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR3_EL1_TCRX_SHIFT, 4, 0),
ARM64_FTR_END,
};
+static const struct arm64_ftr_bits ftr_id_aa64mmfr4[] = {
+ S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR4_EL1_E2H0_SHIFT, 4, 0),
+ ARM64_FTR_END,
+};
+
static const struct arm64_ftr_bits ftr_ctr[] = {
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, 31, 1, 1), /* RES1 */
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, CTR_EL0_DIC_SHIFT, 1, 1),
@@ -613,6 +706,14 @@ static const struct arm64_ftr_bits ftr_id_dfr1[] = {
ARM64_FTR_END,
};
+static const struct arm64_ftr_bits ftr_mpamidr[] = {
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, MPAMIDR_EL1_PMG_MAX_SHIFT, MPAMIDR_EL1_PMG_MAX_WIDTH, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, MPAMIDR_EL1_VPMR_MAX_SHIFT, MPAMIDR_EL1_VPMR_MAX_WIDTH, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, MPAMIDR_EL1_HAS_HCR_SHIFT, 1, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, MPAMIDR_EL1_PARTID_MAX_SHIFT, MPAMIDR_EL1_PARTID_MAX_WIDTH, 0),
+ ARM64_FTR_END,
+};
+
/*
* Common ftr bits for a 32bit register with all hidden, strict
* attributes, with 4bit feature fields and a default safe value of
@@ -655,13 +756,15 @@ static const struct arm64_ftr_bits ftr_raz[] = {
#define ARM64_FTR_REG(id, table) \
__ARM64_FTR_REG_OVERRIDE(#id, id, table, &no_override)
-struct arm64_ftr_override __ro_after_init id_aa64mmfr1_override;
-struct arm64_ftr_override __ro_after_init id_aa64pfr0_override;
-struct arm64_ftr_override __ro_after_init id_aa64pfr1_override;
-struct arm64_ftr_override __ro_after_init id_aa64zfr0_override;
-struct arm64_ftr_override __ro_after_init id_aa64smfr0_override;
-struct arm64_ftr_override __ro_after_init id_aa64isar1_override;
-struct arm64_ftr_override __ro_after_init id_aa64isar2_override;
+struct arm64_ftr_override id_aa64mmfr0_override;
+struct arm64_ftr_override id_aa64mmfr1_override;
+struct arm64_ftr_override id_aa64mmfr2_override;
+struct arm64_ftr_override id_aa64pfr0_override;
+struct arm64_ftr_override id_aa64pfr1_override;
+struct arm64_ftr_override id_aa64zfr0_override;
+struct arm64_ftr_override id_aa64smfr0_override;
+struct arm64_ftr_override id_aa64isar1_override;
+struct arm64_ftr_override id_aa64isar2_override;
struct arm64_ftr_override arm64_sw_feature_override;
@@ -702,10 +805,12 @@ static const struct __ftr_reg_entry {
&id_aa64pfr0_override),
ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64PFR1_EL1, ftr_id_aa64pfr1,
&id_aa64pfr1_override),
+ ARM64_FTR_REG(SYS_ID_AA64PFR2_EL1, ftr_id_aa64pfr2),
ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64ZFR0_EL1, ftr_id_aa64zfr0,
&id_aa64zfr0_override),
ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64SMFR0_EL1, ftr_id_aa64smfr0,
&id_aa64smfr0_override),
+ ARM64_FTR_REG(SYS_ID_AA64FPFR0_EL1, ftr_id_aa64fpfr0),
/* Op1 = 0, CRn = 0, CRm = 5 */
ARM64_FTR_REG(SYS_ID_AA64DFR0_EL1, ftr_id_aa64dfr0),
@@ -717,13 +822,20 @@ static const struct __ftr_reg_entry {
&id_aa64isar1_override),
ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64ISAR2_EL1, ftr_id_aa64isar2,
&id_aa64isar2_override),
+ ARM64_FTR_REG(SYS_ID_AA64ISAR3_EL1, ftr_id_aa64isar3),
/* Op1 = 0, CRn = 0, CRm = 7 */
- ARM64_FTR_REG(SYS_ID_AA64MMFR0_EL1, ftr_id_aa64mmfr0),
+ ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64MMFR0_EL1, ftr_id_aa64mmfr0,
+ &id_aa64mmfr0_override),
ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64MMFR1_EL1, ftr_id_aa64mmfr1,
&id_aa64mmfr1_override),
- ARM64_FTR_REG(SYS_ID_AA64MMFR2_EL1, ftr_id_aa64mmfr2),
+ ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64MMFR2_EL1, ftr_id_aa64mmfr2,
+ &id_aa64mmfr2_override),
ARM64_FTR_REG(SYS_ID_AA64MMFR3_EL1, ftr_id_aa64mmfr3),
+ ARM64_FTR_REG(SYS_ID_AA64MMFR4_EL1, ftr_id_aa64mmfr4),
+
+ /* Op1 = 0, CRn = 10, CRm = 4 */
+ ARM64_FTR_REG(SYS_MPAMIDR_EL1, ftr_mpamidr),
/* Op1 = 1, CRn = 0, CRm = 0 */
ARM64_FTR_REG(SYS_GMID_EL1, ftr_gmid),
@@ -910,16 +1022,16 @@ static void init_cpu_ftr_reg(u32 sys_reg, u64 new)
/* Override was valid */
ftr_new = tmp;
str = "forced";
- } else if (ftr_ovr == tmp) {
+ } else {
/* Override was the safe value */
str = "already set";
}
- if (str)
- pr_warn("%s[%d:%d]: %s to %llx\n",
- reg->name,
- ftrp->shift + ftrp->width - 1,
- ftrp->shift, str, tmp);
+ pr_warn("%s[%d:%d]: %s to %llx\n",
+ reg->name,
+ ftrp->shift + ftrp->width - 1,
+ ftrp->shift, str,
+ tmp & (BIT(ftrp->width) - 1));
} else if ((ftr_mask & reg->override->val) == ftr_mask) {
reg->override->val &= ~ftr_mask;
pr_warn("%s[%d:%d]: impossible override, ignored\n",
@@ -1043,14 +1155,18 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info)
init_cpu_ftr_reg(SYS_ID_AA64ISAR0_EL1, info->reg_id_aa64isar0);
init_cpu_ftr_reg(SYS_ID_AA64ISAR1_EL1, info->reg_id_aa64isar1);
init_cpu_ftr_reg(SYS_ID_AA64ISAR2_EL1, info->reg_id_aa64isar2);
+ init_cpu_ftr_reg(SYS_ID_AA64ISAR3_EL1, info->reg_id_aa64isar3);
init_cpu_ftr_reg(SYS_ID_AA64MMFR0_EL1, info->reg_id_aa64mmfr0);
init_cpu_ftr_reg(SYS_ID_AA64MMFR1_EL1, info->reg_id_aa64mmfr1);
init_cpu_ftr_reg(SYS_ID_AA64MMFR2_EL1, info->reg_id_aa64mmfr2);
init_cpu_ftr_reg(SYS_ID_AA64MMFR3_EL1, info->reg_id_aa64mmfr3);
+ init_cpu_ftr_reg(SYS_ID_AA64MMFR4_EL1, info->reg_id_aa64mmfr4);
init_cpu_ftr_reg(SYS_ID_AA64PFR0_EL1, info->reg_id_aa64pfr0);
init_cpu_ftr_reg(SYS_ID_AA64PFR1_EL1, info->reg_id_aa64pfr1);
+ init_cpu_ftr_reg(SYS_ID_AA64PFR2_EL1, info->reg_id_aa64pfr2);
init_cpu_ftr_reg(SYS_ID_AA64ZFR0_EL1, info->reg_id_aa64zfr0);
init_cpu_ftr_reg(SYS_ID_AA64SMFR0_EL1, info->reg_id_aa64smfr0);
+ init_cpu_ftr_reg(SYS_ID_AA64FPFR0_EL1, info->reg_id_aa64fpfr0);
if (id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0))
init_32bit_cpu_features(&info->aarch32);
@@ -1068,17 +1184,14 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info)
id_aa64pfr1_sme(read_sanitised_ftr_reg(SYS_ID_AA64PFR1_EL1))) {
unsigned long cpacr = cpacr_save_enable_kernel_sme();
- /*
- * We mask out SMPS since even if the hardware
- * supports priorities the kernel does not at present
- * and we block access to them.
- */
- info->reg_smidr = read_cpuid(SMIDR_EL1) & ~SMIDR_EL1_SMPS;
vec_init_vq_map(ARM64_VEC_SME);
cpacr_restore(cpacr);
}
+ if (id_aa64pfr0_mpam(info->reg_id_aa64pfr0))
+ init_cpu_ftr_reg(SYS_MPAMIDR_EL1, info->reg_mpamidr);
+
if (id_aa64pfr1_mte(info->reg_id_aa64pfr1))
init_cpu_ftr_reg(SYS_GMID_EL1, info->reg_gmid);
}
@@ -1272,6 +1385,8 @@ void update_cpu_features(int cpu,
info->reg_id_aa64isar1, boot->reg_id_aa64isar1);
taint |= check_update_ftr_reg(SYS_ID_AA64ISAR2_EL1, cpu,
info->reg_id_aa64isar2, boot->reg_id_aa64isar2);
+ taint |= check_update_ftr_reg(SYS_ID_AA64ISAR3_EL1, cpu,
+ info->reg_id_aa64isar3, boot->reg_id_aa64isar3);
/*
* Differing PARange support is fine as long as all peripherals and
@@ -1291,6 +1406,8 @@ void update_cpu_features(int cpu,
info->reg_id_aa64pfr0, boot->reg_id_aa64pfr0);
taint |= check_update_ftr_reg(SYS_ID_AA64PFR1_EL1, cpu,
info->reg_id_aa64pfr1, boot->reg_id_aa64pfr1);
+ taint |= check_update_ftr_reg(SYS_ID_AA64PFR2_EL1, cpu,
+ info->reg_id_aa64pfr2, boot->reg_id_aa64pfr2);
taint |= check_update_ftr_reg(SYS_ID_AA64ZFR0_EL1, cpu,
info->reg_id_aa64zfr0, boot->reg_id_aa64zfr0);
@@ -1298,6 +1415,9 @@ void update_cpu_features(int cpu,
taint |= check_update_ftr_reg(SYS_ID_AA64SMFR0_EL1, cpu,
info->reg_id_aa64smfr0, boot->reg_id_aa64smfr0);
+ taint |= check_update_ftr_reg(SYS_ID_AA64FPFR0_EL1, cpu,
+ info->reg_id_aa64fpfr0, boot->reg_id_aa64fpfr0);
+
/* Probe vector lengths */
if (IS_ENABLED(CONFIG_ARM64_SVE) &&
id_aa64pfr0_sve(read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1))) {
@@ -1314,13 +1434,6 @@ void update_cpu_features(int cpu,
id_aa64pfr1_sme(read_sanitised_ftr_reg(SYS_ID_AA64PFR1_EL1))) {
unsigned long cpacr = cpacr_save_enable_kernel_sme();
- /*
- * We mask out SMPS since even if the hardware
- * supports priorities the kernel does not at present
- * and we block access to them.
- */
- info->reg_smidr = read_cpuid(SMIDR_EL1) & ~SMIDR_EL1_SMPS;
-
/* Probe vector lengths */
if (!system_capabilities_finalized())
vec_update_vq_map(ARM64_VEC_SME);
@@ -1328,6 +1441,11 @@ void update_cpu_features(int cpu,
cpacr_restore(cpacr);
}
+ if (id_aa64pfr0_mpam(info->reg_id_aa64pfr0)) {
+ taint |= check_update_ftr_reg(SYS_MPAMIDR_EL1, cpu,
+ info->reg_mpamidr, boot->reg_mpamidr);
+ }
+
/*
* The kernel uses the LDGM/STGM instructions and the number of tags
* they read/write depends on the GMID_EL1.BS field. Check that the
@@ -1410,17 +1528,21 @@ u64 __read_sysreg_by_encoding(u32 sys_id)
read_sysreg_case(SYS_ID_AA64PFR0_EL1);
read_sysreg_case(SYS_ID_AA64PFR1_EL1);
+ read_sysreg_case(SYS_ID_AA64PFR2_EL1);
read_sysreg_case(SYS_ID_AA64ZFR0_EL1);
read_sysreg_case(SYS_ID_AA64SMFR0_EL1);
+ read_sysreg_case(SYS_ID_AA64FPFR0_EL1);
read_sysreg_case(SYS_ID_AA64DFR0_EL1);
read_sysreg_case(SYS_ID_AA64DFR1_EL1);
read_sysreg_case(SYS_ID_AA64MMFR0_EL1);
read_sysreg_case(SYS_ID_AA64MMFR1_EL1);
read_sysreg_case(SYS_ID_AA64MMFR2_EL1);
read_sysreg_case(SYS_ID_AA64MMFR3_EL1);
+ read_sysreg_case(SYS_ID_AA64MMFR4_EL1);
read_sysreg_case(SYS_ID_AA64ISAR0_EL1);
read_sysreg_case(SYS_ID_AA64ISAR1_EL1);
read_sysreg_case(SYS_ID_AA64ISAR2_EL1);
+ read_sysreg_case(SYS_ID_AA64ISAR3_EL1);
read_sysreg_case(SYS_CNTFRQ_EL0);
read_sysreg_case(SYS_CTR_EL0);
@@ -1451,11 +1573,28 @@ has_always(const struct arm64_cpu_capabilities *entry, int scope)
static bool
feature_matches(u64 reg, const struct arm64_cpu_capabilities *entry)
{
- int val = cpuid_feature_extract_field_width(reg, entry->field_pos,
- entry->field_width,
- entry->sign);
+ int val, min, max;
+ u64 tmp;
+
+ val = cpuid_feature_extract_field_width(reg, entry->field_pos,
+ entry->field_width,
+ entry->sign);
+
+ tmp = entry->min_field_value;
+ tmp <<= entry->field_pos;
+
+ min = cpuid_feature_extract_field_width(tmp, entry->field_pos,
+ entry->field_width,
+ entry->sign);
+
+ tmp = entry->max_field_value;
+ tmp <<= entry->field_pos;
+
+ max = cpuid_feature_extract_field_width(tmp, entry->field_pos,
+ entry->field_width,
+ entry->sign);
- return val >= entry->min_field_value;
+ return val >= min && val <= max;
}
static u64
@@ -1506,6 +1645,11 @@ const struct cpumask *system_32bit_el0_cpumask(void)
return cpu_possible_mask;
}
+const struct cpumask *task_cpu_fallback_mask(struct task_struct *p)
+{
+ return __task_cpu_possible_mask(p, housekeeping_cpumask(HK_TYPE_TICK));
+}
+
static int __init parse_32bit_el0_param(char *str)
{
allow_mismatched_32bit_el0 = true;
@@ -1620,46 +1764,6 @@ has_useable_cnp(const struct arm64_cpu_capabilities *entry, int scope)
return has_cpuid_feature(entry, scope);
}
-/*
- * This check is triggered during the early boot before the cpufeature
- * is initialised. Checking the status on the local CPU allows the boot
- * CPU to detect the need for non-global mappings and thus avoiding a
- * pagetable re-write after all the CPUs are booted. This check will be
- * anyway run on individual CPUs, allowing us to get the consistent
- * state once the SMP CPUs are up and thus make the switch to non-global
- * mappings if required.
- */
-bool kaslr_requires_kpti(void)
-{
- if (!IS_ENABLED(CONFIG_RANDOMIZE_BASE))
- return false;
-
- /*
- * E0PD does a similar job to KPTI so can be used instead
- * where available.
- */
- if (IS_ENABLED(CONFIG_ARM64_E0PD)) {
- u64 mmfr2 = read_sysreg_s(SYS_ID_AA64MMFR2_EL1);
- if (cpuid_feature_extract_unsigned_field(mmfr2,
- ID_AA64MMFR2_EL1_E0PD_SHIFT))
- return false;
- }
-
- /*
- * Systems affected by Cavium erratum 24756 are incompatible
- * with KPTI.
- */
- if (IS_ENABLED(CONFIG_CAVIUM_ERRATUM_27456)) {
- extern const struct midr_range cavium_erratum_27456_cpus[];
-
- if (is_midr_in_range_list(read_cpuid_id(),
- cavium_erratum_27456_cpus))
- return false;
- }
-
- return kaslr_enabled();
-}
-
static bool __meltdown_safe = true;
static int __kpti_forced; /* 0: not forced, >0: forced on, <0: forced off */
@@ -1712,7 +1816,7 @@ static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry,
}
/* Useful for KASLR robustness */
- if (kaslr_requires_kpti()) {
+ if (kaslr_enabled() && kaslr_requires_kpti()) {
if (!__kpti_forced) {
str = "KASLR";
__kpti_forced = 1;
@@ -1739,6 +1843,28 @@ static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry,
return !meltdown_safe;
}
+static bool has_nv1(const struct arm64_cpu_capabilities *entry, int scope)
+{
+ /*
+ * Although the Apple M2 family appears to support NV1, the
+ * PTW barfs on the nVHE EL2 S1 page table format. Pretend
+ * that it doesn't support NV1 at all.
+ */
+ static const struct midr_range nv1_ni_list[] = {
+ MIDR_ALL_VERSIONS(MIDR_APPLE_M2_BLIZZARD),
+ MIDR_ALL_VERSIONS(MIDR_APPLE_M2_AVALANCHE),
+ MIDR_ALL_VERSIONS(MIDR_APPLE_M2_BLIZZARD_PRO),
+ MIDR_ALL_VERSIONS(MIDR_APPLE_M2_AVALANCHE_PRO),
+ MIDR_ALL_VERSIONS(MIDR_APPLE_M2_BLIZZARD_MAX),
+ MIDR_ALL_VERSIONS(MIDR_APPLE_M2_AVALANCHE_MAX),
+ {}
+ };
+
+ return (__system_matches_cap(ARM64_HAS_NESTED_VIRT) &&
+ !(has_cpuid_feature(entry, scope) ||
+ is_midr_in_range_list(read_cpuid_id(), nv1_ni_list)));
+}
+
#if defined(ID_AA64MMFR0_EL1_TGRAN_LPA2) && defined(ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_LPA2)
static bool has_lpa2_at_stage1(u64 mmfr0)
{
@@ -1801,6 +1927,11 @@ static int __init __kpti_install_ng_mappings(void *__unused)
pgd_t *kpti_ng_temp_pgd;
u64 alloc = 0;
+ if (levels == 5 && !pgtable_l5_enabled())
+ levels = 4;
+ else if (levels == 4 && !pgtable_l4_enabled())
+ levels = 3;
+
remap_fn = (void *)__pa_symbol(idmap_kpti_install_ng_mappings);
if (!cpu) {
@@ -1814,9 +1945,9 @@ static int __init __kpti_install_ng_mappings(void *__unused)
//
// The physical pages are laid out as follows:
//
- // +--------+-/-------+-/------ +-\\--------+
- // : PTE[] : | PMD[] : | PUD[] : || PGD[] :
- // +--------+-\-------+-\------ +-//--------+
+ // +--------+-/-------+-/------ +-/------ +-\\\--------+
+ // : PTE[] : | PMD[] : | PUD[] : | P4D[] : ||| PGD[] :
+ // +--------+-\-------+-\------ +-\------ +-///--------+
// ^
// The first page is mapped into this hierarchy at a PMD_SHIFT
// aligned virtual address, so that we can manipulate the PTE
@@ -2042,14 +2173,7 @@ static bool has_nested_virt_support(const struct arm64_cpu_capabilities *cap,
static bool hvhe_possible(const struct arm64_cpu_capabilities *entry,
int __unused)
{
- u64 val;
-
- val = read_sysreg(id_aa64mmfr1_el1);
- if (!cpuid_feature_extract_unsigned_field(val, ID_AA64MMFR1_EL1_VH_SHIFT))
- return false;
-
- val = arm64_sw_feature_override.val & arm64_sw_feature_override.mask;
- return cpuid_feature_extract_unsigned_field(val, ARM64_SW_FEATURE_OVERRIDE_HVHE);
+ return arm64_test_sw_feature_override(ARM64_SW_FEATURE_OVERRIDE_HVHE);
}
#ifdef CONFIG_ARM64_PAN
@@ -2218,6 +2342,14 @@ static void user_feature_fixup(void)
if (regp)
regp->user_mask &= ~ID_AA64ISAR1_EL1_BF16_MASK;
}
+
+ if (cpus_have_cap(ARM64_WORKAROUND_SPECULATIVE_SSBS)) {
+ struct arm64_ftr_reg *regp;
+
+ regp = get_arm64_ftr_reg(SYS_ID_AA64PFR1_EL1);
+ if (regp)
+ regp->user_mask &= ~ID_AA64PFR1_EL1_SSBS_MASK;
+ }
}
static void elf_hwcap_fixup(void)
@@ -2250,6 +2382,22 @@ static void cpu_enable_mops(const struct arm64_cpu_capabilities *__unused)
sysreg_clear_set(sctlr_el1, 0, SCTLR_EL1_MSCEn);
}
+#ifdef CONFIG_ARM64_POE
+static void cpu_enable_poe(const struct arm64_cpu_capabilities *__unused)
+{
+ sysreg_clear_set(REG_TCR2_EL1, 0, TCR2_EL1_E0POE);
+ sysreg_clear_set(CPACR_EL1, 0, CPACR_EL1_E0POE);
+}
+#endif
+
+#ifdef CONFIG_ARM64_GCS
+static void cpu_enable_gcs(const struct arm64_cpu_capabilities *__unused)
+{
+ /* GCSPR_EL0 is always readable */
+ write_sysreg_s(GCSCRE0_EL1_nTR, SYS_GCSCRE0_EL1);
+}
+#endif
+
/* Internal helper functions to match cpu capability type */
static bool
cpucap_late_cpu_optional(const struct arm64_cpu_capabilities *cap)
@@ -2269,6 +2417,36 @@ cpucap_panic_on_conflict(const struct arm64_cpu_capabilities *cap)
return !!(cap->type & ARM64_CPUCAP_PANIC_ON_CONFLICT);
}
+static bool
+test_has_mpam(const struct arm64_cpu_capabilities *entry, int scope)
+{
+ if (!has_cpuid_feature(entry, scope))
+ return false;
+
+ /* Check firmware actually enabled MPAM on this cpu. */
+ return (read_sysreg_s(SYS_MPAM1_EL1) & MPAM1_EL1_MPAMEN);
+}
+
+static void
+cpu_enable_mpam(const struct arm64_cpu_capabilities *entry)
+{
+ /*
+ * Access by the kernel (at EL1) should use the reserved PARTID
+ * which is configured unrestricted. This avoids priority-inversion
+ * where latency sensitive tasks have to wait for a task that has
+ * been throttled to release the lock.
+ */
+ write_sysreg_s(0, SYS_MPAM1_EL1);
+}
+
+static bool
+test_has_mpam_hcr(const struct arm64_cpu_capabilities *entry, int scope)
+{
+ u64 idr = read_sanitised_ftr_reg(SYS_MPAMIDR_EL1);
+
+ return idr & MPAMIDR_EL1_HAS_HCR;
+}
+
static const struct arm64_cpu_capabilities arm64_features[] = {
{
.capability = ARM64_ALWAYS_BOOT,
@@ -2483,6 +2661,21 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
ARM64_CPUID_FIELDS(ID_AA64MMFR1_EL1, HAFDBS, DBM)
},
#endif
+#ifdef CONFIG_ARM64_HAFT
+ {
+ .desc = "Hardware managed Access Flag for Table Descriptors",
+ /*
+ * Contrary to the page/block access flag, the table access flag
+ * cannot be emulated in software (no access fault will occur).
+ * Therefore this should be used only if it's supported system
+ * wide.
+ */
+ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+ .capability = ARM64_HAFT,
+ .matches = has_cpuid_feature,
+ ARM64_CPUID_FIELDS(ID_AA64MMFR1_EL1, HAFDBS, HAFT)
+ },
+#endif
{
.desc = "CRC32 instructions",
.capability = ARM64_HAS_CRC32,
@@ -2739,6 +2932,73 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.type = ARM64_CPUCAP_SYSTEM_FEATURE,
.matches = has_lpa2,
},
+ {
+ .desc = "FPMR",
+ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+ .capability = ARM64_HAS_FPMR,
+ .matches = has_cpuid_feature,
+ .cpu_enable = cpu_enable_fpmr,
+ ARM64_CPUID_FIELDS(ID_AA64PFR2_EL1, FPMR, IMP)
+ },
+#ifdef CONFIG_ARM64_VA_BITS_52
+ {
+ .capability = ARM64_HAS_VA52,
+ .type = ARM64_CPUCAP_BOOT_CPU_FEATURE,
+ .matches = has_cpuid_feature,
+#ifdef CONFIG_ARM64_64K_PAGES
+ .desc = "52-bit Virtual Addressing (LVA)",
+ ARM64_CPUID_FIELDS(ID_AA64MMFR2_EL1, VARange, 52)
+#else
+ .desc = "52-bit Virtual Addressing (LPA2)",
+#ifdef CONFIG_ARM64_4K_PAGES
+ ARM64_CPUID_FIELDS(ID_AA64MMFR0_EL1, TGRAN4, 52_BIT)
+#else
+ ARM64_CPUID_FIELDS(ID_AA64MMFR0_EL1, TGRAN16, 52_BIT)
+#endif
+#endif
+ },
+#endif
+ {
+ .desc = "Memory Partitioning And Monitoring",
+ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+ .capability = ARM64_MPAM,
+ .matches = test_has_mpam,
+ .cpu_enable = cpu_enable_mpam,
+ ARM64_CPUID_FIELDS(ID_AA64PFR0_EL1, MPAM, 1)
+ },
+ {
+ .desc = "Memory Partitioning And Monitoring Virtualisation",
+ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+ .capability = ARM64_MPAM_HCR,
+ .matches = test_has_mpam_hcr,
+ },
+ {
+ .desc = "NV1",
+ .capability = ARM64_HAS_HCR_NV1,
+ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+ .matches = has_nv1,
+ ARM64_CPUID_FIELDS_NEG(ID_AA64MMFR4_EL1, E2H0, NI_NV1)
+ },
+#ifdef CONFIG_ARM64_POE
+ {
+ .desc = "Stage-1 Permission Overlay Extension (S1POE)",
+ .capability = ARM64_HAS_S1POE,
+ .type = ARM64_CPUCAP_BOOT_CPU_FEATURE,
+ .matches = has_cpuid_feature,
+ .cpu_enable = cpu_enable_poe,
+ ARM64_CPUID_FIELDS(ID_AA64MMFR3_EL1, S1POE, IMP)
+ },
+#endif
+#ifdef CONFIG_ARM64_GCS
+ {
+ .desc = "Guarded Control Stack (GCS)",
+ .capability = ARM64_HAS_GCS,
+ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+ .cpu_enable = cpu_enable_gcs,
+ .matches = has_cpuid_feature,
+ ARM64_CPUID_FIELDS(ID_AA64PFR1_EL1, GCS, IMP)
+ },
+#endif
{},
};
@@ -2771,6 +3031,13 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.matches = match, \
}
+#define HWCAP_CAP_MATCH_ID(match, reg, field, min_value, cap_type, cap) \
+ { \
+ __HWCAP_CAP(#cap, cap_type, cap) \
+ HWCAP_CPUID_MATCH(reg, field, min_value) \
+ .matches = match, \
+ }
+
#ifdef CONFIG_ARM64_PTR_AUTH
static const struct arm64_cpu_capabilities ptr_auth_hwcap_addr_matches[] = {
{
@@ -2799,6 +3066,13 @@ static const struct arm64_cpu_capabilities ptr_auth_hwcap_gen_matches[] = {
};
#endif
+#ifdef CONFIG_ARM64_SVE
+static bool has_sve_feature(const struct arm64_cpu_capabilities *cap, int scope)
+{
+ return system_supports_sve() && has_user_cpuid_feature(cap, scope);
+}
+#endif
+
static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
HWCAP_CAP(ID_AA64ISAR0_EL1, AES, PMULL, CAP_HWCAP, KERNEL_HWCAP_PMULL),
HWCAP_CAP(ID_AA64ISAR0_EL1, AES, AES, CAP_HWCAP, KERNEL_HWCAP_AES),
@@ -2817,11 +3091,13 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
HWCAP_CAP(ID_AA64ISAR0_EL1, TS, FLAGM, CAP_HWCAP, KERNEL_HWCAP_FLAGM),
HWCAP_CAP(ID_AA64ISAR0_EL1, TS, FLAGM2, CAP_HWCAP, KERNEL_HWCAP_FLAGM2),
HWCAP_CAP(ID_AA64ISAR0_EL1, RNDR, IMP, CAP_HWCAP, KERNEL_HWCAP_RNG),
+ HWCAP_CAP(ID_AA64ISAR3_EL1, FPRCVT, IMP, CAP_HWCAP, KERNEL_HWCAP_FPRCVT),
HWCAP_CAP(ID_AA64PFR0_EL1, FP, IMP, CAP_HWCAP, KERNEL_HWCAP_FP),
HWCAP_CAP(ID_AA64PFR0_EL1, FP, FP16, CAP_HWCAP, KERNEL_HWCAP_FPHP),
HWCAP_CAP(ID_AA64PFR0_EL1, AdvSIMD, IMP, CAP_HWCAP, KERNEL_HWCAP_ASIMD),
HWCAP_CAP(ID_AA64PFR0_EL1, AdvSIMD, FP16, CAP_HWCAP, KERNEL_HWCAP_ASIMDHP),
HWCAP_CAP(ID_AA64PFR0_EL1, DIT, IMP, CAP_HWCAP, KERNEL_HWCAP_DIT),
+ HWCAP_CAP(ID_AA64PFR2_EL1, FPMR, IMP, CAP_HWCAP, KERNEL_HWCAP_FPMR),
HWCAP_CAP(ID_AA64ISAR1_EL1, DPB, IMP, CAP_HWCAP, KERNEL_HWCAP_DCPOP),
HWCAP_CAP(ID_AA64ISAR1_EL1, DPB, DPB2, CAP_HWCAP, KERNEL_HWCAP_DCPODP),
HWCAP_CAP(ID_AA64ISAR1_EL1, JSCVT, IMP, CAP_HWCAP, KERNEL_HWCAP_JSCVT),
@@ -2835,22 +3111,32 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
HWCAP_CAP(ID_AA64ISAR1_EL1, BF16, EBF16, CAP_HWCAP, KERNEL_HWCAP_EBF16),
HWCAP_CAP(ID_AA64ISAR1_EL1, DGH, IMP, CAP_HWCAP, KERNEL_HWCAP_DGH),
HWCAP_CAP(ID_AA64ISAR1_EL1, I8MM, IMP, CAP_HWCAP, KERNEL_HWCAP_I8MM),
+ HWCAP_CAP(ID_AA64ISAR2_EL1, LUT, IMP, CAP_HWCAP, KERNEL_HWCAP_LUT),
+ HWCAP_CAP(ID_AA64ISAR3_EL1, FAMINMAX, IMP, CAP_HWCAP, KERNEL_HWCAP_FAMINMAX),
HWCAP_CAP(ID_AA64MMFR2_EL1, AT, IMP, CAP_HWCAP, KERNEL_HWCAP_USCAT),
#ifdef CONFIG_ARM64_SVE
HWCAP_CAP(ID_AA64PFR0_EL1, SVE, IMP, CAP_HWCAP, KERNEL_HWCAP_SVE),
- HWCAP_CAP(ID_AA64ZFR0_EL1, SVEver, SVE2p1, CAP_HWCAP, KERNEL_HWCAP_SVE2P1),
- HWCAP_CAP(ID_AA64ZFR0_EL1, SVEver, SVE2, CAP_HWCAP, KERNEL_HWCAP_SVE2),
- HWCAP_CAP(ID_AA64ZFR0_EL1, AES, IMP, CAP_HWCAP, KERNEL_HWCAP_SVEAES),
- HWCAP_CAP(ID_AA64ZFR0_EL1, AES, PMULL128, CAP_HWCAP, KERNEL_HWCAP_SVEPMULL),
- HWCAP_CAP(ID_AA64ZFR0_EL1, BitPerm, IMP, CAP_HWCAP, KERNEL_HWCAP_SVEBITPERM),
- HWCAP_CAP(ID_AA64ZFR0_EL1, B16B16, IMP, CAP_HWCAP, KERNEL_HWCAP_SVE_B16B16),
- HWCAP_CAP(ID_AA64ZFR0_EL1, BF16, IMP, CAP_HWCAP, KERNEL_HWCAP_SVEBF16),
- HWCAP_CAP(ID_AA64ZFR0_EL1, BF16, EBF16, CAP_HWCAP, KERNEL_HWCAP_SVE_EBF16),
- HWCAP_CAP(ID_AA64ZFR0_EL1, SHA3, IMP, CAP_HWCAP, KERNEL_HWCAP_SVESHA3),
- HWCAP_CAP(ID_AA64ZFR0_EL1, SM4, IMP, CAP_HWCAP, KERNEL_HWCAP_SVESM4),
- HWCAP_CAP(ID_AA64ZFR0_EL1, I8MM, IMP, CAP_HWCAP, KERNEL_HWCAP_SVEI8MM),
- HWCAP_CAP(ID_AA64ZFR0_EL1, F32MM, IMP, CAP_HWCAP, KERNEL_HWCAP_SVEF32MM),
- HWCAP_CAP(ID_AA64ZFR0_EL1, F64MM, IMP, CAP_HWCAP, KERNEL_HWCAP_SVEF64MM),
+ HWCAP_CAP_MATCH_ID(has_sve_feature, ID_AA64ZFR0_EL1, SVEver, SVE2p2, CAP_HWCAP, KERNEL_HWCAP_SVE2P2),
+ HWCAP_CAP_MATCH_ID(has_sve_feature, ID_AA64ZFR0_EL1, SVEver, SVE2p1, CAP_HWCAP, KERNEL_HWCAP_SVE2P1),
+ HWCAP_CAP_MATCH_ID(has_sve_feature, ID_AA64ZFR0_EL1, SVEver, SVE2, CAP_HWCAP, KERNEL_HWCAP_SVE2),
+ HWCAP_CAP_MATCH_ID(has_sve_feature, ID_AA64ZFR0_EL1, AES, IMP, CAP_HWCAP, KERNEL_HWCAP_SVEAES),
+ HWCAP_CAP_MATCH_ID(has_sve_feature, ID_AA64ZFR0_EL1, AES, PMULL128, CAP_HWCAP, KERNEL_HWCAP_SVEPMULL),
+ HWCAP_CAP_MATCH_ID(has_sve_feature, ID_AA64ZFR0_EL1, AES, AES2, CAP_HWCAP, KERNEL_HWCAP_SVE_AES2),
+ HWCAP_CAP_MATCH_ID(has_sve_feature, ID_AA64ZFR0_EL1, BitPerm, IMP, CAP_HWCAP, KERNEL_HWCAP_SVEBITPERM),
+ HWCAP_CAP_MATCH_ID(has_sve_feature, ID_AA64ZFR0_EL1, B16B16, IMP, CAP_HWCAP, KERNEL_HWCAP_SVE_B16B16),
+ HWCAP_CAP_MATCH_ID(has_sve_feature, ID_AA64ZFR0_EL1, B16B16, BFSCALE, CAP_HWCAP, KERNEL_HWCAP_SVE_BFSCALE),
+ HWCAP_CAP_MATCH_ID(has_sve_feature, ID_AA64ZFR0_EL1, BF16, IMP, CAP_HWCAP, KERNEL_HWCAP_SVEBF16),
+ HWCAP_CAP_MATCH_ID(has_sve_feature, ID_AA64ZFR0_EL1, BF16, EBF16, CAP_HWCAP, KERNEL_HWCAP_SVE_EBF16),
+ HWCAP_CAP_MATCH_ID(has_sve_feature, ID_AA64ZFR0_EL1, SHA3, IMP, CAP_HWCAP, KERNEL_HWCAP_SVESHA3),
+ HWCAP_CAP_MATCH_ID(has_sve_feature, ID_AA64ZFR0_EL1, SM4, IMP, CAP_HWCAP, KERNEL_HWCAP_SVESM4),
+ HWCAP_CAP_MATCH_ID(has_sve_feature, ID_AA64ZFR0_EL1, I8MM, IMP, CAP_HWCAP, KERNEL_HWCAP_SVEI8MM),
+ HWCAP_CAP_MATCH_ID(has_sve_feature, ID_AA64ZFR0_EL1, F32MM, IMP, CAP_HWCAP, KERNEL_HWCAP_SVEF32MM),
+ HWCAP_CAP_MATCH_ID(has_sve_feature, ID_AA64ZFR0_EL1, F64MM, IMP, CAP_HWCAP, KERNEL_HWCAP_SVEF64MM),
+ HWCAP_CAP_MATCH_ID(has_sve_feature, ID_AA64ZFR0_EL1, F16MM, IMP, CAP_HWCAP, KERNEL_HWCAP_SVE_F16MM),
+ HWCAP_CAP_MATCH_ID(has_sve_feature, ID_AA64ZFR0_EL1, EltPerm, IMP, CAP_HWCAP, KERNEL_HWCAP_SVE_ELTPERM),
+#endif
+#ifdef CONFIG_ARM64_GCS
+ HWCAP_CAP(ID_AA64PFR1_EL1, GCS, IMP, CAP_HWCAP, KERNEL_HWCAP_GCS),
#endif
HWCAP_CAP(ID_AA64PFR1_EL1, SSBS, SSBS2, CAP_HWCAP, KERNEL_HWCAP_SSBS),
#ifdef CONFIG_ARM64_BTI
@@ -2867,6 +3153,7 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
HWCAP_CAP(ID_AA64MMFR0_EL1, ECV, IMP, CAP_HWCAP, KERNEL_HWCAP_ECV),
HWCAP_CAP(ID_AA64MMFR1_EL1, AFP, IMP, CAP_HWCAP, KERNEL_HWCAP_AFP),
HWCAP_CAP(ID_AA64ISAR2_EL1, CSSC, IMP, CAP_HWCAP, KERNEL_HWCAP_CSSC),
+ HWCAP_CAP(ID_AA64ISAR2_EL1, CSSC, CMPBR, CAP_HWCAP, KERNEL_HWCAP_CMPBR),
HWCAP_CAP(ID_AA64ISAR2_EL1, RPRFM, IMP, CAP_HWCAP, KERNEL_HWCAP_RPRFM),
HWCAP_CAP(ID_AA64ISAR2_EL1, RPRES, IMP, CAP_HWCAP, KERNEL_HWCAP_RPRES),
HWCAP_CAP(ID_AA64ISAR2_EL1, WFxT, IMP, CAP_HWCAP, KERNEL_HWCAP_WFXT),
@@ -2875,6 +3162,8 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
#ifdef CONFIG_ARM64_SME
HWCAP_CAP(ID_AA64PFR1_EL1, SME, IMP, CAP_HWCAP, KERNEL_HWCAP_SME),
HWCAP_CAP(ID_AA64SMFR0_EL1, FA64, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_FA64),
+ HWCAP_CAP(ID_AA64SMFR0_EL1, LUTv2, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_LUTV2),
+ HWCAP_CAP(ID_AA64SMFR0_EL1, SMEver, SME2p2, CAP_HWCAP, KERNEL_HWCAP_SME2P2),
HWCAP_CAP(ID_AA64SMFR0_EL1, SMEver, SME2p1, CAP_HWCAP, KERNEL_HWCAP_SME2P1),
HWCAP_CAP(ID_AA64SMFR0_EL1, SMEver, SME2, CAP_HWCAP, KERNEL_HWCAP_SME2),
HWCAP_CAP(ID_AA64SMFR0_EL1, I16I64, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_I16I64),
@@ -2882,12 +3171,33 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
HWCAP_CAP(ID_AA64SMFR0_EL1, I16I32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_I16I32),
HWCAP_CAP(ID_AA64SMFR0_EL1, B16B16, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_B16B16),
HWCAP_CAP(ID_AA64SMFR0_EL1, F16F16, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F16F16),
+ HWCAP_CAP(ID_AA64SMFR0_EL1, F8F16, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F8F16),
+ HWCAP_CAP(ID_AA64SMFR0_EL1, F8F32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F8F32),
HWCAP_CAP(ID_AA64SMFR0_EL1, I8I32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_I8I32),
HWCAP_CAP(ID_AA64SMFR0_EL1, F16F32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F16F32),
HWCAP_CAP(ID_AA64SMFR0_EL1, B16F32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_B16F32),
HWCAP_CAP(ID_AA64SMFR0_EL1, BI32I32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_BI32I32),
HWCAP_CAP(ID_AA64SMFR0_EL1, F32F32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F32F32),
+ HWCAP_CAP(ID_AA64SMFR0_EL1, SF8FMA, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SF8FMA),
+ HWCAP_CAP(ID_AA64SMFR0_EL1, SF8DP4, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SF8DP4),
+ HWCAP_CAP(ID_AA64SMFR0_EL1, SF8DP2, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SF8DP2),
+ HWCAP_CAP(ID_AA64SMFR0_EL1, SBitPerm, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SBITPERM),
+ HWCAP_CAP(ID_AA64SMFR0_EL1, AES, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_AES),
+ HWCAP_CAP(ID_AA64SMFR0_EL1, SFEXPA, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SFEXPA),
+ HWCAP_CAP(ID_AA64SMFR0_EL1, STMOP, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_STMOP),
+ HWCAP_CAP(ID_AA64SMFR0_EL1, SMOP4, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SMOP4),
#endif /* CONFIG_ARM64_SME */
+ HWCAP_CAP(ID_AA64FPFR0_EL1, F8CVT, IMP, CAP_HWCAP, KERNEL_HWCAP_F8CVT),
+ HWCAP_CAP(ID_AA64FPFR0_EL1, F8FMA, IMP, CAP_HWCAP, KERNEL_HWCAP_F8FMA),
+ HWCAP_CAP(ID_AA64FPFR0_EL1, F8DP4, IMP, CAP_HWCAP, KERNEL_HWCAP_F8DP4),
+ HWCAP_CAP(ID_AA64FPFR0_EL1, F8DP2, IMP, CAP_HWCAP, KERNEL_HWCAP_F8DP2),
+ HWCAP_CAP(ID_AA64FPFR0_EL1, F8MM8, IMP, CAP_HWCAP, KERNEL_HWCAP_F8MM8),
+ HWCAP_CAP(ID_AA64FPFR0_EL1, F8MM4, IMP, CAP_HWCAP, KERNEL_HWCAP_F8MM4),
+ HWCAP_CAP(ID_AA64FPFR0_EL1, F8E4M3, IMP, CAP_HWCAP, KERNEL_HWCAP_F8E4M3),
+ HWCAP_CAP(ID_AA64FPFR0_EL1, F8E5M2, IMP, CAP_HWCAP, KERNEL_HWCAP_F8E5M2),
+#ifdef CONFIG_ARM64_POE
+ HWCAP_CAP(ID_AA64MMFR3_EL1, S1POE, IMP, CAP_HWCAP, KERNEL_HWCAP_POE),
+#endif
{},
};
@@ -3052,13 +3362,9 @@ static void __init enable_cpu_capabilities(u16 scope_mask)
boot_scope = !!(scope_mask & SCOPE_BOOT_CPU);
for (i = 0; i < ARM64_NCAPS; i++) {
- unsigned int num;
-
caps = cpucap_ptrs[i];
- if (!caps || !(caps->type & scope_mask))
- continue;
- num = caps->capability;
- if (!cpus_have_cap(num))
+ if (!caps || !(caps->type & scope_mask) ||
+ !cpus_have_cap(caps->capability))
continue;
if (boot_scope && caps->cpu_enable)
@@ -3210,7 +3516,7 @@ static void verify_hyp_capabilities(void)
return;
safe_mmfr1 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1);
- mmfr0 = read_cpuid(ID_AA64MMFR0_EL1);
+ mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
mmfr1 = read_cpuid(ID_AA64MMFR1_EL1);
/* Verify VMID bits */
@@ -3231,6 +3537,36 @@ static void verify_hyp_capabilities(void)
}
}
+static void verify_mpam_capabilities(void)
+{
+ u64 cpu_idr = read_cpuid(ID_AA64PFR0_EL1);
+ u64 sys_idr = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1);
+ u16 cpu_partid_max, cpu_pmg_max, sys_partid_max, sys_pmg_max;
+
+ if (FIELD_GET(ID_AA64PFR0_EL1_MPAM_MASK, cpu_idr) !=
+ FIELD_GET(ID_AA64PFR0_EL1_MPAM_MASK, sys_idr)) {
+ pr_crit("CPU%d: MPAM version mismatch\n", smp_processor_id());
+ cpu_die_early();
+ }
+
+ cpu_idr = read_cpuid(MPAMIDR_EL1);
+ sys_idr = read_sanitised_ftr_reg(SYS_MPAMIDR_EL1);
+ if (FIELD_GET(MPAMIDR_EL1_HAS_HCR, cpu_idr) !=
+ FIELD_GET(MPAMIDR_EL1_HAS_HCR, sys_idr)) {
+ pr_crit("CPU%d: Missing MPAM HCR\n", smp_processor_id());
+ cpu_die_early();
+ }
+
+ cpu_partid_max = FIELD_GET(MPAMIDR_EL1_PARTID_MAX, cpu_idr);
+ cpu_pmg_max = FIELD_GET(MPAMIDR_EL1_PMG_MAX, cpu_idr);
+ sys_partid_max = FIELD_GET(MPAMIDR_EL1_PARTID_MAX, sys_idr);
+ sys_pmg_max = FIELD_GET(MPAMIDR_EL1_PMG_MAX, sys_idr);
+ if (cpu_partid_max < sys_partid_max || cpu_pmg_max < sys_pmg_max) {
+ pr_crit("CPU%d: MPAM PARTID/PMG max values are mismatched\n", smp_processor_id());
+ cpu_die_early();
+ }
+}
+
/*
* Run through the enabled system capabilities and enable() it on this CPU.
* The capabilities were decided based on the available CPUs at the boot time.
@@ -3257,6 +3593,9 @@ static void verify_local_cpu_capabilities(void)
if (is_hyp_mode_available())
verify_hyp_capabilities();
+
+ if (system_supports_mpam())
+ verify_mpam_capabilities();
}
void check_local_cpu_capabilities(void)
@@ -3334,6 +3673,11 @@ unsigned long cpu_get_elf_hwcap2(void)
return elf_hwcap[1];
}
+unsigned long cpu_get_elf_hwcap3(void)
+{
+ return elf_hwcap[2];
+}
+
static void __init setup_boot_cpu_capabilities(void)
{
/*
@@ -3436,7 +3780,14 @@ static int enable_mismatched_32bit_el0(unsigned int cpu)
static int lucky_winner = -1;
struct cpuinfo_arm64 *info = &per_cpu(cpu_data, cpu);
- bool cpu_32bit = id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0);
+ bool cpu_32bit = false;
+
+ if (id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0)) {
+ if (!housekeeping_cpu(cpu, HK_TYPE_TICK))
+ pr_info("Treating adaptive-ticks CPU %u as 64-bit only\n", cpu);
+ else
+ cpu_32bit = true;
+ }
if (cpu_32bit) {
cpumask_set_cpu(cpu, cpu_32bit_el0_mask);
diff --git a/arch/arm64/kernel/cpuidle.c b/arch/arm64/kernel/cpuidle.c
deleted file mode 100644
index f372295207fb..000000000000
--- a/arch/arm64/kernel/cpuidle.c
+++ /dev/null
@@ -1,74 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * ARM64 CPU idle arch support
- *
- * Copyright (C) 2014 ARM Ltd.
- * Author: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
- */
-
-#include <linux/acpi.h>
-#include <linux/cpuidle.h>
-#include <linux/cpu_pm.h>
-#include <linux/psci.h>
-
-#ifdef CONFIG_ACPI_PROCESSOR_IDLE
-
-#include <acpi/processor.h>
-
-#define ARM64_LPI_IS_RETENTION_STATE(arch_flags) (!(arch_flags))
-
-static int psci_acpi_cpu_init_idle(unsigned int cpu)
-{
- int i, count;
- struct acpi_lpi_state *lpi;
- struct acpi_processor *pr = per_cpu(processors, cpu);
-
- if (unlikely(!pr || !pr->flags.has_lpi))
- return -EINVAL;
-
- /*
- * If the PSCI cpu_suspend function hook has not been initialized
- * idle states must not be enabled, so bail out
- */
- if (!psci_ops.cpu_suspend)
- return -EOPNOTSUPP;
-
- count = pr->power.count - 1;
- if (count <= 0)
- return -ENODEV;
-
- for (i = 0; i < count; i++) {
- u32 state;
-
- lpi = &pr->power.lpi_states[i + 1];
- /*
- * Only bits[31:0] represent a PSCI power_state while
- * bits[63:32] must be 0x0 as per ARM ACPI FFH Specification
- */
- state = lpi->address;
- if (!psci_power_state_is_valid(state)) {
- pr_warn("Invalid PSCI power state %#x\n", state);
- return -EINVAL;
- }
- }
-
- return 0;
-}
-
-int acpi_processor_ffh_lpi_probe(unsigned int cpu)
-{
- return psci_acpi_cpu_init_idle(cpu);
-}
-
-__cpuidle int acpi_processor_ffh_lpi_enter(struct acpi_lpi_state *lpi)
-{
- u32 state = lpi->address;
-
- if (ARM64_LPI_IS_RETENTION_STATE(lpi->arch_flags))
- return CPU_PM_CPU_IDLE_ENTER_RETENTION_PARAM_RCU(psci_cpu_suspend_enter,
- lpi->index, state);
- else
- return CPU_PM_CPU_IDLE_ENTER_PARAM_RCU(psci_cpu_suspend_enter,
- lpi->index, state);
-}
-#endif
diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c
index 47043c0d95ec..285d7d538342 100644
--- a/arch/arm64/kernel/cpuinfo.c
+++ b/arch/arm64/kernel/cpuinfo.c
@@ -80,6 +80,7 @@ static const char *const hwcap_str[] = {
[KERNEL_HWCAP_SB] = "sb",
[KERNEL_HWCAP_PACA] = "paca",
[KERNEL_HWCAP_PACG] = "pacg",
+ [KERNEL_HWCAP_GCS] = "gcs",
[KERNEL_HWCAP_DCPODP] = "dcpodp",
[KERNEL_HWCAP_SVE2] = "sve2",
[KERNEL_HWCAP_SVEAES] = "sveaes",
@@ -128,6 +129,37 @@ static const char *const hwcap_str[] = {
[KERNEL_HWCAP_SVE_B16B16] = "sveb16b16",
[KERNEL_HWCAP_LRCPC3] = "lrcpc3",
[KERNEL_HWCAP_LSE128] = "lse128",
+ [KERNEL_HWCAP_FPMR] = "fpmr",
+ [KERNEL_HWCAP_LUT] = "lut",
+ [KERNEL_HWCAP_FAMINMAX] = "faminmax",
+ [KERNEL_HWCAP_F8CVT] = "f8cvt",
+ [KERNEL_HWCAP_F8FMA] = "f8fma",
+ [KERNEL_HWCAP_F8DP4] = "f8dp4",
+ [KERNEL_HWCAP_F8DP2] = "f8dp2",
+ [KERNEL_HWCAP_F8E4M3] = "f8e4m3",
+ [KERNEL_HWCAP_F8E5M2] = "f8e5m2",
+ [KERNEL_HWCAP_SME_LUTV2] = "smelutv2",
+ [KERNEL_HWCAP_SME_F8F16] = "smef8f16",
+ [KERNEL_HWCAP_SME_F8F32] = "smef8f32",
+ [KERNEL_HWCAP_SME_SF8FMA] = "smesf8fma",
+ [KERNEL_HWCAP_SME_SF8DP4] = "smesf8dp4",
+ [KERNEL_HWCAP_SME_SF8DP2] = "smesf8dp2",
+ [KERNEL_HWCAP_POE] = "poe",
+ [KERNEL_HWCAP_CMPBR] = "cmpbr",
+ [KERNEL_HWCAP_FPRCVT] = "fprcvt",
+ [KERNEL_HWCAP_F8MM8] = "f8mm8",
+ [KERNEL_HWCAP_F8MM4] = "f8mm4",
+ [KERNEL_HWCAP_SVE_F16MM] = "svef16mm",
+ [KERNEL_HWCAP_SVE_ELTPERM] = "sveeltperm",
+ [KERNEL_HWCAP_SVE_AES2] = "sveaes2",
+ [KERNEL_HWCAP_SVE_BFSCALE] = "svebfscale",
+ [KERNEL_HWCAP_SVE2P2] = "sve2p2",
+ [KERNEL_HWCAP_SME2P2] = "sme2p2",
+ [KERNEL_HWCAP_SME_SBITPERM] = "smesbitperm",
+ [KERNEL_HWCAP_SME_AES] = "smeaes",
+ [KERNEL_HWCAP_SME_SFEXPA] = "smesfexpa",
+ [KERNEL_HWCAP_SME_STMOP] = "smestmop",
+ [KERNEL_HWCAP_SME_SMOP4] = "smesmop4",
};
#ifdef CONFIG_COMPAT
@@ -265,7 +297,7 @@ const struct seq_operations cpuinfo_op = {
};
-static struct kobj_type cpuregs_kobj_type = {
+static const struct kobj_type cpuregs_kobj_type = {
.sysfs_ops = &kobj_sysfs_ops,
};
@@ -443,14 +475,18 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info)
info->reg_id_aa64isar0 = read_cpuid(ID_AA64ISAR0_EL1);
info->reg_id_aa64isar1 = read_cpuid(ID_AA64ISAR1_EL1);
info->reg_id_aa64isar2 = read_cpuid(ID_AA64ISAR2_EL1);
+ info->reg_id_aa64isar3 = read_cpuid(ID_AA64ISAR3_EL1);
info->reg_id_aa64mmfr0 = read_cpuid(ID_AA64MMFR0_EL1);
info->reg_id_aa64mmfr1 = read_cpuid(ID_AA64MMFR1_EL1);
info->reg_id_aa64mmfr2 = read_cpuid(ID_AA64MMFR2_EL1);
info->reg_id_aa64mmfr3 = read_cpuid(ID_AA64MMFR3_EL1);
+ info->reg_id_aa64mmfr4 = read_cpuid(ID_AA64MMFR4_EL1);
info->reg_id_aa64pfr0 = read_cpuid(ID_AA64PFR0_EL1);
info->reg_id_aa64pfr1 = read_cpuid(ID_AA64PFR1_EL1);
+ info->reg_id_aa64pfr2 = read_cpuid(ID_AA64PFR2_EL1);
info->reg_id_aa64zfr0 = read_cpuid(ID_AA64ZFR0_EL1);
info->reg_id_aa64smfr0 = read_cpuid(ID_AA64SMFR0_EL1);
+ info->reg_id_aa64fpfr0 = read_cpuid(ID_AA64FPFR0_EL1);
if (id_aa64pfr1_mte(info->reg_id_aa64pfr1))
info->reg_gmid = read_cpuid(GMID_EL1);
@@ -458,6 +494,19 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info)
if (id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0))
__cpuinfo_store_cpu_32bit(&info->aarch32);
+ if (id_aa64pfr0_mpam(info->reg_id_aa64pfr0))
+ info->reg_mpamidr = read_cpuid(MPAMIDR_EL1);
+
+ if (IS_ENABLED(CONFIG_ARM64_SME) &&
+ id_aa64pfr1_sme(info->reg_id_aa64pfr1)) {
+ /*
+ * We mask out SMPS since even if the hardware
+ * supports priorities the kernel does not at present
+ * and we block access to them.
+ */
+ info->reg_smidr = read_cpuid(SMIDR_EL1) & ~SMIDR_EL1_SMPS;
+ }
+
cpuinfo_detect_icache_policy(info);
}
diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c
index 64f2ecbdfe5c..58f047de3e1c 100644
--- a/arch/arm64/kernel/debug-monitors.c
+++ b/arch/arm64/kernel/debug-monitors.c
@@ -303,7 +303,6 @@ static int call_break_hook(struct pt_regs *regs, unsigned long esr)
{
struct break_hook *hook;
struct list_head *list;
- int (*fn)(struct pt_regs *regs, unsigned long esr) = NULL;
list = user_mode(regs) ? &user_break_hook : &kernel_break_hook;
@@ -312,13 +311,11 @@ static int call_break_hook(struct pt_regs *regs, unsigned long esr)
* entirely not preemptible, and we can use rcu list safely here.
*/
list_for_each_entry_rcu(hook, list, node) {
- unsigned long comment = esr & ESR_ELx_BRK64_ISS_COMMENT_MASK;
-
- if ((comment & ~hook->mask) == hook->imm)
- fn = hook->fn;
+ if ((esr_brk_comment(esr) & ~hook->mask) == hook->imm)
+ return hook->fn(regs, esr);
}
- return fn ? fn(regs, esr) : DBG_HOOK_ERROR;
+ return DBG_HOOK_ERROR;
}
NOKPROBE_SYMBOL(call_break_hook);
@@ -443,6 +440,11 @@ void kernel_rewind_single_step(struct pt_regs *regs)
set_regs_spsr_ss(regs);
}
+void kernel_fastforward_single_step(struct pt_regs *regs)
+{
+ clear_regs_spsr_ss(regs);
+}
+
/* ptrace API */
void user_enable_single_step(struct task_struct *task)
{
diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c
index 0228001347be..1d25d8899dbf 100644
--- a/arch/arm64/kernel/efi.c
+++ b/arch/arm64/kernel/efi.c
@@ -9,7 +9,9 @@
#include <linux/efi.h>
#include <linux/init.h>
+#include <linux/kmemleak.h>
#include <linux/screen_info.h>
+#include <linux/vmalloc.h>
#include <asm/efi.h>
#include <asm/stacktrace.h>
@@ -32,8 +34,16 @@ static __init pteval_t create_mapping_protection(efi_memory_desc_t *md)
u64 attr = md->attribute;
u32 type = md->type;
- if (type == EFI_MEMORY_MAPPED_IO)
- return PROT_DEVICE_nGnRE;
+ if (type == EFI_MEMORY_MAPPED_IO) {
+ pgprot_t prot = __pgprot(PROT_DEVICE_nGnRE);
+
+ if (arm64_is_protected_mmio(md->phys_addr,
+ md->num_pages << EFI_PAGE_SHIFT))
+ prot = pgprot_encrypted(prot);
+ else
+ prot = pgprot_decrypted(prot);
+ return pgprot_val(prot);
+ }
if (region_is_misaligned(md)) {
static bool __initdata code_is_misaligned;
@@ -103,7 +113,7 @@ static int __init set_permissions(pte_t *ptep, unsigned long addr, void *data)
{
struct set_perm_data *spd = data;
const efi_memory_desc_t *md = spd->md;
- pte_t pte = READ_ONCE(*ptep);
+ pte_t pte = __ptep_get(ptep);
if (md->attribute & EFI_MEMORY_RO)
pte = set_pte_bit(pte, __pgprot(PTE_RDONLY));
@@ -111,7 +121,7 @@ static int __init set_permissions(pte_t *ptep, unsigned long addr, void *data)
pte = set_pte_bit(pte, __pgprot(PTE_PXN));
else if (system_supports_bti_kernel() && spd->has_bti)
pte = set_pte_bit(pte, __pgprot(PTE_GP));
- set_pte(ptep, pte);
+ __set_pte(ptep, pte);
return 0;
}
@@ -212,6 +222,7 @@ l: if (!p) {
return -ENOMEM;
}
+ kmemleak_not_leak(p);
efi_rt_stack_top = p + THREAD_SIZE;
return 0;
}
diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c
index 0fc94207e69a..b260ddc4d3e9 100644
--- a/arch/arm64/kernel/entry-common.c
+++ b/arch/arm64/kernel/entry-common.c
@@ -10,6 +10,7 @@
#include <linux/linkage.h>
#include <linux/lockdep.h>
#include <linux/ptrace.h>
+#include <linux/resume_user_mode.h>
#include <linux/sched.h>
#include <linux/sched/debug.h>
#include <linux/thread_info.h>
@@ -102,7 +103,7 @@ static void noinstr exit_to_kernel_mode(struct pt_regs *regs)
static __always_inline void __enter_from_user_mode(void)
{
lockdep_hardirqs_off(CALLER_ADDR0);
- CT_WARN_ON(ct_state() != CONTEXT_USER);
+ CT_WARN_ON(ct_state() != CT_STATE_USER);
user_exit_irqoff();
trace_hardirqs_off_finish();
mte_disable_tco_entry(current);
@@ -126,16 +127,49 @@ static __always_inline void __exit_to_user_mode(void)
lockdep_hardirqs_on(CALLER_ADDR0);
}
+static void do_notify_resume(struct pt_regs *regs, unsigned long thread_flags)
+{
+ do {
+ local_irq_enable();
+
+ if (thread_flags & _TIF_NEED_RESCHED)
+ schedule();
+
+ if (thread_flags & _TIF_UPROBE)
+ uprobe_notify_resume(regs);
+
+ if (thread_flags & _TIF_MTE_ASYNC_FAULT) {
+ clear_thread_flag(TIF_MTE_ASYNC_FAULT);
+ send_sig_fault(SIGSEGV, SEGV_MTEAERR,
+ (void __user *)NULL, current);
+ }
+
+ if (thread_flags & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL))
+ do_signal(regs);
+
+ if (thread_flags & _TIF_NOTIFY_RESUME)
+ resume_user_mode_work(regs);
+
+ if (thread_flags & _TIF_FOREIGN_FPSTATE)
+ fpsimd_restore_current_state();
+
+ local_irq_disable();
+ thread_flags = read_thread_flags();
+ } while (thread_flags & _TIF_WORK_MASK);
+}
+
static __always_inline void exit_to_user_mode_prepare(struct pt_regs *regs)
{
unsigned long flags;
- local_daif_mask();
+ local_irq_disable();
flags = read_thread_flags();
if (unlikely(flags & _TIF_WORK_MASK))
do_notify_resume(regs, flags);
+ local_daif_mask();
+
lockdep_sys_exit();
}
@@ -429,6 +463,24 @@ static void noinstr el1_bti(struct pt_regs *regs, unsigned long esr)
exit_to_kernel_mode(regs);
}
+static void noinstr el1_gcs(struct pt_regs *regs, unsigned long esr)
+{
+ enter_from_kernel_mode(regs);
+ local_daif_inherit(regs);
+ do_el1_gcs(regs, esr);
+ local_daif_mask();
+ exit_to_kernel_mode(regs);
+}
+
+static void noinstr el1_mops(struct pt_regs *regs, unsigned long esr)
+{
+ enter_from_kernel_mode(regs);
+ local_daif_inherit(regs);
+ do_el1_mops(regs, esr);
+ local_daif_mask();
+ exit_to_kernel_mode(regs);
+}
+
static void noinstr el1_dbg(struct pt_regs *regs, unsigned long esr)
{
unsigned long far = read_sysreg(far_el1);
@@ -471,6 +523,12 @@ asmlinkage void noinstr el1h_64_sync_handler(struct pt_regs *regs)
case ESR_ELx_EC_BTI:
el1_bti(regs, esr);
break;
+ case ESR_ELx_EC_GCS:
+ el1_gcs(regs, esr);
+ break;
+ case ESR_ELx_EC_MOPS:
+ el1_mops(regs, esr);
+ break;
case ESR_ELx_EC_BREAKPT_CUR:
case ESR_ELx_EC_SOFTSTP_CUR:
case ESR_ELx_EC_WATCHPT_CUR:
@@ -650,6 +708,14 @@ static void noinstr el0_mops(struct pt_regs *regs, unsigned long esr)
exit_to_user_mode(regs);
}
+static void noinstr el0_gcs(struct pt_regs *regs, unsigned long esr)
+{
+ enter_from_user_mode(regs);
+ local_daif_restore(DAIF_PROCCTX);
+ do_el0_gcs(regs, esr);
+ exit_to_user_mode(regs);
+}
+
static void noinstr el0_inv(struct pt_regs *regs, unsigned long esr)
{
enter_from_user_mode(regs);
@@ -732,6 +798,9 @@ asmlinkage void noinstr el0t_64_sync_handler(struct pt_regs *regs)
case ESR_ELx_EC_MOPS:
el0_mops(regs, esr);
break;
+ case ESR_ELx_EC_GCS:
+ el0_gcs(regs, esr);
+ break;
case ESR_ELx_EC_BREAKPT_LOW:
case ESR_ELx_EC_SOFTSTP_LOW:
case ESR_ELx_EC_WATCHPT_LOW:
diff --git a/arch/arm64/kernel/entry-ftrace.S b/arch/arm64/kernel/entry-ftrace.S
index f0c16640ef21..169ccf600066 100644
--- a/arch/arm64/kernel/entry-ftrace.S
+++ b/arch/arm64/kernel/entry-ftrace.S
@@ -329,24 +329,28 @@ SYM_FUNC_END(ftrace_stub_graph)
* @fp is checked against the value passed by ftrace_graph_caller().
*/
SYM_CODE_START(return_to_handler)
- /* save return value regs */
- sub sp, sp, #FGRET_REGS_SIZE
- stp x0, x1, [sp, #FGRET_REGS_X0]
- stp x2, x3, [sp, #FGRET_REGS_X2]
- stp x4, x5, [sp, #FGRET_REGS_X4]
- stp x6, x7, [sp, #FGRET_REGS_X6]
- str x29, [sp, #FGRET_REGS_FP] // parent's fp
+ /* Make room for ftrace_regs */
+ sub sp, sp, #FREGS_SIZE
+
+ /* Save return value regs */
+ stp x0, x1, [sp, #FREGS_X0]
+ stp x2, x3, [sp, #FREGS_X2]
+ stp x4, x5, [sp, #FREGS_X4]
+ stp x6, x7, [sp, #FREGS_X6]
+
+ /* Save the callsite's FP */
+ str x29, [sp, #FREGS_FP]
mov x0, sp
- bl ftrace_return_to_handler // addr = ftrace_return_to_hander(regs);
+ bl ftrace_return_to_handler // addr = ftrace_return_to_hander(fregs);
mov x30, x0 // restore the original return address
- /* restore return value regs */
- ldp x0, x1, [sp, #FGRET_REGS_X0]
- ldp x2, x3, [sp, #FGRET_REGS_X2]
- ldp x4, x5, [sp, #FGRET_REGS_X4]
- ldp x6, x7, [sp, #FGRET_REGS_X6]
- add sp, sp, #FGRET_REGS_SIZE
+ /* Restore return value regs */
+ ldp x0, x1, [sp, #FREGS_X0]
+ ldp x2, x3, [sp, #FREGS_X2]
+ ldp x4, x5, [sp, #FREGS_X4]
+ ldp x6, x7, [sp, #FREGS_X6]
+ add sp, sp, #FREGS_SIZE
ret
SYM_CODE_END(return_to_handler)
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 7ef0e127b149..5ae2a34b50bd 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -25,6 +25,7 @@
#include <asm/processor.h>
#include <asm/ptrace.h>
#include <asm/scs.h>
+#include <asm/stacktrace/frame.h>
#include <asm/thread_info.h>
#include <asm/asm-uaccess.h>
#include <asm/unistd.h>
@@ -284,15 +285,16 @@ alternative_else_nop_endif
stp lr, x21, [sp, #S_LR]
/*
- * For exceptions from EL0, create a final frame record.
- * For exceptions from EL1, create a synthetic frame record so the
- * interrupted code shows up in the backtrace.
+ * Create a metadata frame record. The unwinder will use this to
+ * identify and unwind exception boundaries.
*/
- .if \el == 0
stp xzr, xzr, [sp, #S_STACKFRAME]
+ .if \el == 0
+ mov x0, #FRAME_META_TYPE_FINAL
.else
- stp x29, x22, [sp, #S_STACKFRAME]
+ mov x0, #FRAME_META_TYPE_PT_REGS
.endif
+ str x0, [sp, #S_STACKFRAME_TYPE]
add x29, sp, #S_STACKFRAME
#ifdef CONFIG_ARM64_SW_TTBR0_PAN
@@ -315,7 +317,7 @@ alternative_if_not ARM64_HAS_GIC_PRIO_MASKING
alternative_else_nop_endif
mrs_s x20, SYS_ICC_PMR_EL1
- str x20, [sp, #S_PMR_SAVE]
+ str w20, [sp, #S_PMR]
mov x20, #GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET
msr_s SYS_ICC_PMR_EL1, x20
@@ -342,7 +344,7 @@ alternative_if_not ARM64_HAS_GIC_PRIO_MASKING
b .Lskip_pmr_restore\@
alternative_else_nop_endif
- ldr x20, [sp, #S_PMR_SAVE]
+ ldr w20, [sp, #S_PMR]
msr_s SYS_ICC_PMR_EL1, x20
/* Ensure priority change is seen by redistributor */
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index f27acca550d5..8370d55f0353 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -359,6 +359,9 @@ static void task_fpsimd_load(void)
WARN_ON(preemptible());
WARN_ON(test_thread_flag(TIF_KERNEL_FPSTATE));
+ if (system_supports_fpmr())
+ write_sysreg_s(current->thread.uw.fpmr, SYS_FPMR);
+
if (system_supports_sve() || system_supports_sme()) {
switch (current->thread.fp_type) {
case FP_STATE_FPSIMD:
@@ -383,7 +386,7 @@ static void task_fpsimd_load(void)
* fpsimd_save_user_state() or memory corruption, we
* should always record an explicit format
* when we save. We always at least have the
- * memory allocated for FPSMID registers so
+ * memory allocated for FPSIMD registers so
* try that and hope for the best.
*/
WARN_ON_ONCE(1);
@@ -446,6 +449,9 @@ static void fpsimd_save_user_state(void)
if (test_thread_flag(TIF_FOREIGN_FPSTATE))
return;
+ if (system_supports_fpmr())
+ *(last->fpmr) = read_sysreg_s(SYS_FPMR);
+
/*
* If a task is in a syscall the ABI allows us to only
* preserve the state shared with FPSIMD so don't bother
@@ -529,7 +535,7 @@ static unsigned int find_supported_vector_length(enum vec_type type,
#if defined(CONFIG_ARM64_SVE) && defined(CONFIG_SYSCTL)
-static int vec_proc_do_default_vl(struct ctl_table *table, int write,
+static int vec_proc_do_default_vl(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
struct vl_info *info = table->extra1;
@@ -556,7 +562,7 @@ static int vec_proc_do_default_vl(struct ctl_table *table, int write,
return 0;
}
-static struct ctl_table sve_default_vl_table[] = {
+static const struct ctl_table sve_default_vl_table[] = {
{
.procname = "sve_default_vector_length",
.mode = 0644,
@@ -579,7 +585,7 @@ static int __init sve_sysctl_init(void) { return 0; }
#endif /* ! (CONFIG_ARM64_SVE && CONFIG_SYSCTL) */
#if defined(CONFIG_ARM64_SME) && defined(CONFIG_SYSCTL)
-static struct ctl_table sme_default_vl_table[] = {
+static const struct ctl_table sme_default_vl_table[] = {
{
.procname = "sme_default_vector_length",
.mode = 0644,
@@ -688,6 +694,12 @@ static void sve_to_fpsimd(struct task_struct *task)
}
}
+void cpu_enable_fpmr(const struct arm64_cpu_capabilities *__always_unused p)
+{
+ write_sysreg_s(read_sysreg_s(SYS_SCTLR_EL1) | SCTLR_EL1_EnFPM_MASK,
+ SYS_SCTLR_EL1);
+}
+
#ifdef CONFIG_ARM64_SVE
/*
* Call __sve_free() directly only if you know task can't be scheduled
@@ -1134,6 +1146,8 @@ void cpu_enable_sve(const struct arm64_cpu_capabilities *__always_unused p)
{
write_sysreg(read_sysreg(CPACR_EL1) | CPACR_EL1_ZEN_EL1EN, CPACR_EL1);
isb();
+
+ write_sysreg_s(0, SYS_ZCR_EL1);
}
void __init sve_setup(void)
@@ -1245,6 +1259,9 @@ void cpu_enable_sme(const struct arm64_cpu_capabilities *__always_unused p)
write_sysreg(read_sysreg(CPACR_EL1) | CPACR_EL1_SMEN_EL1EN, CPACR_EL1);
isb();
+ /* Ensure all bits in SMCR are set to known values */
+ write_sysreg_s(0, SYS_SMCR_EL1);
+
/* Allow EL0 to access TPIDR2 */
write_sysreg(read_sysreg(SCTLR_EL1) | SCTLR_ELx_ENTP2, SCTLR_EL1);
isb();
@@ -1350,6 +1367,7 @@ static void sve_init_regs(void)
} else {
fpsimd_to_sve(current);
current->thread.fp_type = FP_STATE_SVE;
+ fpsimd_flush_task_state(current);
}
}
@@ -1518,6 +1536,27 @@ static void fpsimd_save_kernel_state(struct task_struct *task)
task->thread.kernel_fpsimd_cpu = smp_processor_id();
}
+/*
+ * Invalidate any task's FPSIMD state that is present on this cpu.
+ * The FPSIMD context should be acquired with get_cpu_fpsimd_context()
+ * before calling this function.
+ */
+static void fpsimd_flush_cpu_state(void)
+{
+ WARN_ON(!system_supports_fpsimd());
+ __this_cpu_write(fpsimd_last_state.st, NULL);
+
+ /*
+ * Leaving streaming mode enabled will cause issues for any kernel
+ * NEON and leaving streaming mode or ZA enabled may increase power
+ * consumption.
+ */
+ if (system_supports_sme())
+ sme_smstop();
+
+ set_thread_flag(TIF_FOREIGN_FPSTATE);
+}
+
void fpsimd_thread_switch(struct task_struct *next)
{
bool wrong_task, wrong_cpu;
@@ -1535,7 +1574,7 @@ void fpsimd_thread_switch(struct task_struct *next)
if (test_tsk_thread_flag(next, TIF_KERNEL_FPSTATE)) {
fpsimd_load_kernel_state(next);
- set_tsk_thread_flag(next, TIF_FOREIGN_FPSTATE);
+ fpsimd_flush_cpu_state();
} else {
/*
* Fix up TIF_FOREIGN_FPSTATE to correctly describe next's
@@ -1656,31 +1695,6 @@ void fpsimd_signal_preserve_current_state(void)
}
/*
- * Called by KVM when entering the guest.
- */
-void fpsimd_kvm_prepare(void)
-{
- if (!system_supports_sve())
- return;
-
- /*
- * KVM does not save host SVE state since we can only enter
- * the guest from a syscall so the ABI means that only the
- * non-saved SVE state needs to be saved. If we have left
- * SVE enabled for performance reasons then update the task
- * state to be FPSIMD only.
- */
- get_cpu_fpsimd_context();
-
- if (test_and_clear_thread_flag(TIF_SVE)) {
- sve_to_fpsimd(current);
- current->thread.fp_type = FP_STATE_FPSIMD;
- }
-
- put_cpu_fpsimd_context();
-}
-
-/*
* Associate current's FPSIMD context with this cpu
* The caller must have ownership of the cpu FPSIMD context before calling
* this function.
@@ -1696,6 +1710,7 @@ static void fpsimd_bind_task_to_cpu(void)
last->sve_vl = task_get_sve_vl(current);
last->sme_vl = task_get_sme_vl(current);
last->svcr = &current->thread.svcr;
+ last->fpmr = &current->thread.uw.fpmr;
last->fp_type = &current->thread.fp_type;
last->to_save = FP_STATE_CURRENT;
current->thread.fpsimd_cpu = smp_processor_id();
@@ -1825,27 +1840,6 @@ void fpsimd_flush_task_state(struct task_struct *t)
}
/*
- * Invalidate any task's FPSIMD state that is present on this cpu.
- * The FPSIMD context should be acquired with get_cpu_fpsimd_context()
- * before calling this function.
- */
-static void fpsimd_flush_cpu_state(void)
-{
- WARN_ON(!system_supports_fpsimd());
- __this_cpu_write(fpsimd_last_state.st, NULL);
-
- /*
- * Leaving streaming mode enabled will cause issues for any kernel
- * NEON and leaving streaming mode or ZA enabled may increase power
- * consumption.
- */
- if (system_supports_sme())
- sme_smstop();
-
- set_thread_flag(TIF_FOREIGN_FPSTATE);
-}
-
-/*
* Save the FPSIMD state to memory and invalidate cpu view.
* This function must be called with preemption disabled.
*/
diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c
index a650f5e11fc5..d7c0d023dfe5 100644
--- a/arch/arm64/kernel/ftrace.c
+++ b/arch/arm64/kernel/ftrace.c
@@ -15,7 +15,7 @@
#include <asm/debug-monitors.h>
#include <asm/ftrace.h>
#include <asm/insn.h>
-#include <asm/patching.h>
+#include <asm/text-patching.h>
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_ARGS
struct fregs_offset {
@@ -23,10 +23,10 @@ struct fregs_offset {
int offset;
};
-#define FREGS_OFFSET(n, field) \
-{ \
- .name = n, \
- .offset = offsetof(struct ftrace_regs, field), \
+#define FREGS_OFFSET(n, field) \
+{ \
+ .name = n, \
+ .offset = offsetof(struct __arch_ftrace_regs, field), \
}
static const struct fregs_offset fregs_offsets[] = {
@@ -143,6 +143,69 @@ unsigned long ftrace_call_adjust(unsigned long addr)
return addr;
}
+/* Convert fentry_ip to the symbol address without kallsyms */
+unsigned long arch_ftrace_get_symaddr(unsigned long fentry_ip)
+{
+ u32 insn;
+
+ /*
+ * When using patchable-function-entry without pre-function NOPS, ftrace
+ * entry is the address of the first NOP after the function entry point.
+ *
+ * The compiler has either generated:
+ *
+ * func+00: func: NOP // To be patched to MOV X9, LR
+ * func+04: NOP // To be patched to BL <caller>
+ *
+ * Or:
+ *
+ * func-04: BTI C
+ * func+00: func: NOP // To be patched to MOV X9, LR
+ * func+04: NOP // To be patched to BL <caller>
+ *
+ * The fentry_ip is the address of `BL <caller>` which is at `func + 4`
+ * bytes in either case.
+ */
+ if (!IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS))
+ return fentry_ip - AARCH64_INSN_SIZE;
+
+ /*
+ * When using patchable-function-entry with pre-function NOPs, BTI is
+ * a bit different.
+ *
+ * func+00: func: NOP // To be patched to MOV X9, LR
+ * func+04: NOP // To be patched to BL <caller>
+ *
+ * Or:
+ *
+ * func+00: func: BTI C
+ * func+04: NOP // To be patched to MOV X9, LR
+ * func+08: NOP // To be patched to BL <caller>
+ *
+ * The fentry_ip is the address of `BL <caller>` which is at either
+ * `func + 4` or `func + 8` depends on whether there is a BTI.
+ */
+
+ /* If there is no BTI, the func address should be one instruction before. */
+ if (!IS_ENABLED(CONFIG_ARM64_BTI_KERNEL))
+ return fentry_ip - AARCH64_INSN_SIZE;
+
+ /* We want to be extra safe in case entry ip is on the page edge,
+ * but otherwise we need to avoid get_kernel_nofault()'s overhead.
+ */
+ if ((fentry_ip & ~PAGE_MASK) < AARCH64_INSN_SIZE * 2) {
+ if (get_kernel_nofault(insn, (u32 *)(fentry_ip - AARCH64_INSN_SIZE * 2)))
+ return 0;
+ } else {
+ insn = *(u32 *)(fentry_ip - AARCH64_INSN_SIZE * 2);
+ }
+
+ if (aarch64_insn_is_bti(le32_to_cpu((__le32)insn)))
+ return fentry_ip - AARCH64_INSN_SIZE * 2;
+
+ return fentry_ip - AARCH64_INSN_SIZE;
+}
+
/*
* Replace a single instruction, which may be a branch or NOP.
* If @validate == true, a replaced instruction is checked against 'old'.
@@ -481,7 +544,20 @@ void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent,
void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
struct ftrace_ops *op, struct ftrace_regs *fregs)
{
- prepare_ftrace_return(ip, &fregs->lr, fregs->fp);
+ unsigned long return_hooker = (unsigned long)&return_to_handler;
+ unsigned long frame_pointer = arch_ftrace_regs(fregs)->fp;
+ unsigned long *parent = &arch_ftrace_regs(fregs)->lr;
+ unsigned long old;
+
+ if (unlikely(atomic_read(&current->tracing_graph_pause)))
+ return;
+
+ old = *parent;
+
+ if (!function_graph_enter_regs(old, ip, frame_pointer,
+ (void *)frame_pointer, fregs)) {
+ *parent = return_hooker;
+ }
}
#else
/*
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 99f8a8c082d3..2ce73525de2c 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -32,6 +32,7 @@
#include <asm/scs.h>
#include <asm/smp.h>
#include <asm/sysreg.h>
+#include <asm/stacktrace/frame.h>
#include <asm/thread_info.h>
#include <asm/virt.h>
@@ -41,26 +42,6 @@
#error PAGE_OFFSET must be at least 2MB aligned
#endif
- .macro putc_base_pa, base:req
- mov \base, #0xf0000000
- add \base, \base, #0x00512000
- .endm
- .macro putc_base_va, base:req
- mov \base, #0xffffff8000000000
- orr \base, \base, #0x30000000
- add \base, \base, #0x00512000
- .endm
- .macro wait, base:req, val:req
-1: ldrb \val, [\base, #5 * 4]
- tbz \val, #6, 1b
- .endm
- .macro putc, base:req, val:req, ch
- .ifnb \ch
- mov \val, \ch
- .endif
- strb \val, [\base, #0]
- .endm
-
/*
* Kernel startup entry point.
* ---------------------------
@@ -100,127 +81,42 @@
* x19 primary_entry() .. start_kernel() whether we entered with the MMU on
* x20 primary_entry() .. __primary_switch() CPU boot mode
* x21 primary_entry() .. start_kernel() FDT pointer passed at boot in x0
- * x22 create_idmap() .. start_kernel() ID map VA of the DT blob
- * x23 primary_entry() .. start_kernel() physical misalignment/KASLR offset
- * x24 __primary_switch() linear map KASLR seed
- * x25 primary_entry() .. start_kernel() supported VA size
- * x28 create_idmap() callee preserved temp register
*/
SYM_CODE_START(primary_entry)
-#if 0
- putc_base_pa x21
- putc x21, w23, #'x'
- wait x21, w23
- putc x21, w23, #'0'
- wait x21, w23
- putc x21, w23, #':'
- mov x19, x0
- mov x20, #16
-1: wait x21, w23
- lsr x23, x19, #60
- and w23, w23, #15
- cmp w23, #10
- bcc 2f
- add w23, w23, #7
-2: add w23, w23, #'0'
- putc x21, w23
- lsl x19, x19, #4
- subs x20, x20, #1
- bcs 1b
- wait x21, w23
- putc x21, w23, #'\r'
- wait x21, w23
- putc x21, w23, #'\n'
- wait x21, w23
-
- putc x21, w23, #'x'
- wait x21, w23
- putc x21, w23, #'1'
- wait x21, w23
- putc x21, w23, #':'
- mov x19, x1
- mov x20, #16
-1: wait x21, w23
- lsr x23, x19, #60
- and w23, w23, #15
- cmp w23, #10
- bcc 2f
- add w23, w23, #7
-2: add w23, w23, #'0'
- putc x21, w23
- lsl x19, x19, #4
- subs x20, x20, #1
- bcs 1b
- wait x21, w23
- putc x21, w23, #'\r'
- wait x21, w23
- putc x21, w23, #'\n'
- wait x21, w23
-
- putc x21, w23, #'x'
- wait x21, w23
- putc x21, w23, #'2'
- wait x21, w23
- putc x21, w23, #':'
- mov x19, x2
- mov x20, #16
-1: wait x21, w23
- lsr x23, x19, #60
- and w23, w23, #15
- cmp w23, #10
- bcc 2f
- add w23, w23, #7
-2: add w23, w23, #'0'
- putc x21, w23
- lsl x19, x19, #4
- subs x20, x20, #1
- bcs 1b
- wait x21, w23
- putc x21, w23, #'\r'
- wait x21, w23
- putc x21, w23, #'\n'
- wait x21, w23
-
- putc x21, w23, #'x'
- wait x21, w23
- putc x21, w23, #'3'
- wait x21, w23
- putc x21, w23, #':'
- mov x19, x3
- mov x20, #16
-1: wait x21, w23
- lsr x23, x19, #60
- and w23, w23, #15
- cmp w23, #10
- bcc 2f
- add w23, w23, #7
-2: add w23, w23, #'0'
- putc x21, w23
- lsl x19, x19, #4
- subs x20, x20, #1
- bcs 1b
- wait x21, w23
- putc x21, w23, #'\r'
- wait x21, w23
- putc x21, w23, #'\n'
- wait x21, w23
-#endif
bl record_mmu_state
-
bl preserve_boot_args
- bl create_idmap
+
+ adrp x1, early_init_stack
+ mov sp, x1
+ mov x29, xzr
+ adrp x0, init_idmap_pg_dir
+ mov x1, xzr
+ bl __pi_create_init_idmap
+
+ /*
+ * If the page tables have been populated with non-cacheable
+ * accesses (MMU disabled), invalidate those tables again to
+ * remove any speculatively loaded cache lines.
+ */
+ cbnz x19, 0f
+ dmb sy
+ mov x1, x0 // end of used region
+ adrp x0, init_idmap_pg_dir
+ adr_l x2, dcache_inval_poc
+ blr x2
+ b 1f
/*
* If we entered with the MMU and caches on, clean the ID mapped part
* of the primary boot code to the PoC so we can safely execute it with
* the MMU off.
*/
- cbz x19, 0f
- adrp x0, __idmap_text_start
+0: adrp x0, __idmap_text_start
adr_l x1, __idmap_text_end
adr_l x2, dcache_clean_poc
blr x2
-0: mov x0, x19
+
+1: mov x0, x19
bl init_kernel_el // w0=cpu_boot_mode
mov x20, x0
@@ -230,14 +126,6 @@ SYM_CODE_START(primary_entry)
* On return, the CPU will be ready for the MMU to be turned on and
* the TCR will have been set.
*/
-#if VA_BITS > 48
- mrs_s x0, SYS_ID_AA64MMFR2_EL1
- tst x0, ID_AA64MMFR2_EL1_VARange_MASK
- mov x0, #VA_BITS
- mov x25, #VA_BITS_MIN
- csel x25, x25, x0, eq
- mov x0, x25
-#endif
bl __cpu_setup // initialise processor
b __primary_switch
SYM_CODE_END(primary_entry)
@@ -296,275 +184,6 @@ SYM_CODE_START_LOCAL(preserve_boot_args)
ret
SYM_CODE_END(preserve_boot_args)
-SYM_FUNC_START_LOCAL(clear_page_tables)
- /*
- * Clear the init page tables.
- */
- adrp x0, init_pg_dir
- adrp x1, init_pg_end
- sub x2, x1, x0
- mov x1, xzr
- b __pi_memset // tail call
-SYM_FUNC_END(clear_page_tables)
-
-/*
- * Macro to populate page table entries, these entries can be pointers to the next level
- * or last level entries pointing to physical memory.
- *
- * tbl: page table address
- * rtbl: pointer to page table or physical memory
- * index: start index to write
- * eindex: end index to write - [index, eindex] written to
- * flags: flags for pagetable entry to or in
- * inc: increment to rtbl between each entry
- * tmp1: temporary variable
- *
- * Preserves: tbl, eindex, flags, inc
- * Corrupts: index, tmp1
- * Returns: rtbl
- */
- .macro populate_entries, tbl, rtbl, index, eindex, flags, inc, tmp1
-.Lpe\@: phys_to_pte \tmp1, \rtbl
- orr \tmp1, \tmp1, \flags // tmp1 = table entry
- str \tmp1, [\tbl, \index, lsl #3]
- add \rtbl, \rtbl, \inc // rtbl = pa next level
- add \index, \index, #1
- cmp \index, \eindex
- b.ls .Lpe\@
- .endm
-
-/*
- * Compute indices of table entries from virtual address range. If multiple entries
- * were needed in the previous page table level then the next page table level is assumed
- * to be composed of multiple pages. (This effectively scales the end index).
- *
- * vstart: virtual address of start of range
- * vend: virtual address of end of range - we map [vstart, vend]
- * shift: shift used to transform virtual address into index
- * order: #imm 2log(number of entries in page table)
- * istart: index in table corresponding to vstart
- * iend: index in table corresponding to vend
- * count: On entry: how many extra entries were required in previous level, scales
- * our end index.
- * On exit: returns how many extra entries required for next page table level
- *
- * Preserves: vstart, vend
- * Returns: istart, iend, count
- */
- .macro compute_indices, vstart, vend, shift, order, istart, iend, count
- ubfx \istart, \vstart, \shift, \order
- ubfx \iend, \vend, \shift, \order
- add \iend, \iend, \count, lsl \order
- sub \count, \iend, \istart
- .endm
-
-/*
- * Map memory for specified virtual address range. Each level of page table needed supports
- * multiple entries. If a level requires n entries the next page table level is assumed to be
- * formed from n pages.
- *
- * tbl: location of page table
- * rtbl: address to be used for first level page table entry (typically tbl + PAGE_SIZE)
- * vstart: virtual address of start of range
- * vend: virtual address of end of range - we map [vstart, vend - 1]
- * flags: flags to use to map last level entries
- * phys: physical address corresponding to vstart - physical memory is contiguous
- * order: #imm 2log(number of entries in PGD table)
- *
- * If extra_shift is set, an extra level will be populated if the end address does
- * not fit in 'extra_shift' bits. This assumes vend is in the TTBR0 range.
- *
- * Temporaries: istart, iend, tmp, count, sv - these need to be different registers
- * Preserves: vstart, flags
- * Corrupts: tbl, rtbl, vend, istart, iend, tmp, count, sv
- */
- .macro map_memory, tbl, rtbl, vstart, vend, flags, phys, order, istart, iend, tmp, count, sv, extra_shift
- sub \vend, \vend, #1
- add \rtbl, \tbl, #PAGE_SIZE
- mov \count, #0
-
- .ifnb \extra_shift
- tst \vend, #~((1 << (\extra_shift)) - 1)
- b.eq .L_\@
- compute_indices \vstart, \vend, #\extra_shift, #(PAGE_SHIFT - 3), \istart, \iend, \count
- mov \sv, \rtbl
- populate_entries \tbl, \rtbl, \istart, \iend, #PMD_TYPE_TABLE, #PAGE_SIZE, \tmp
- mov \tbl, \sv
- .endif
-.L_\@:
- compute_indices \vstart, \vend, #PGDIR_SHIFT, #\order, \istart, \iend, \count
- mov \sv, \rtbl
- populate_entries \tbl, \rtbl, \istart, \iend, #PMD_TYPE_TABLE, #PAGE_SIZE, \tmp
- mov \tbl, \sv
-
-#if SWAPPER_PGTABLE_LEVELS > 3
- compute_indices \vstart, \vend, #PUD_SHIFT, #(PAGE_SHIFT - 3), \istart, \iend, \count
- mov \sv, \rtbl
- populate_entries \tbl, \rtbl, \istart, \iend, #PMD_TYPE_TABLE, #PAGE_SIZE, \tmp
- mov \tbl, \sv
-#endif
-
-#if SWAPPER_PGTABLE_LEVELS > 2
- compute_indices \vstart, \vend, #SWAPPER_TABLE_SHIFT, #(PAGE_SHIFT - 3), \istart, \iend, \count
- mov \sv, \rtbl
- populate_entries \tbl, \rtbl, \istart, \iend, #PMD_TYPE_TABLE, #PAGE_SIZE, \tmp
- mov \tbl, \sv
-#endif
-
- compute_indices \vstart, \vend, #SWAPPER_BLOCK_SHIFT, #(PAGE_SHIFT - 3), \istart, \iend, \count
- bic \rtbl, \phys, #SWAPPER_BLOCK_SIZE - 1
- populate_entries \tbl, \rtbl, \istart, \iend, \flags, #SWAPPER_BLOCK_SIZE, \tmp
- .endm
-
-/*
- * Remap a subregion created with the map_memory macro with modified attributes
- * or output address. The entire remapped region must have been covered in the
- * invocation of map_memory.
- *
- * x0: last level table address (returned in first argument to map_memory)
- * x1: start VA of the existing mapping
- * x2: start VA of the region to update
- * x3: end VA of the region to update (exclusive)
- * x4: start PA associated with the region to update
- * x5: attributes to set on the updated region
- * x6: order of the last level mappings
- */
-SYM_FUNC_START_LOCAL(remap_region)
- sub x3, x3, #1 // make end inclusive
-
- // Get the index offset for the start of the last level table
- lsr x1, x1, x6
- bfi x1, xzr, #0, #PAGE_SHIFT - 3
-
- // Derive the start and end indexes into the last level table
- // associated with the provided region
- lsr x2, x2, x6
- lsr x3, x3, x6
- sub x2, x2, x1
- sub x3, x3, x1
-
- mov x1, #1
- lsl x6, x1, x6 // block size at this level
-
- populate_entries x0, x4, x2, x3, x5, x6, x7
- ret
-SYM_FUNC_END(remap_region)
-
-SYM_FUNC_START_LOCAL(create_idmap)
- mov x28, lr
- /*
- * The ID map carries a 1:1 mapping of the physical address range
- * covered by the loaded image, which could be anywhere in DRAM. This
- * means that the required size of the VA (== PA) space is decided at
- * boot time, and could be more than the configured size of the VA
- * space for ordinary kernel and user space mappings.
- *
- * There are three cases to consider here:
- * - 39 <= VA_BITS < 48, and the ID map needs up to 48 VA bits to cover
- * the placement of the image. In this case, we configure one extra
- * level of translation on the fly for the ID map only. (This case
- * also covers 42-bit VA/52-bit PA on 64k pages).
- *
- * - VA_BITS == 48, and the ID map needs more than 48 VA bits. This can
- * only happen when using 64k pages, in which case we need to extend
- * the root level table rather than add a level. Note that we can
- * treat this case as 'always extended' as long as we take care not
- * to program an unsupported T0SZ value into the TCR register.
- *
- * - Combinations that would require two additional levels of
- * translation are not supported, e.g., VA_BITS==36 on 16k pages, or
- * VA_BITS==39/4k pages with 5-level paging, where the input address
- * requires more than 47 or 48 bits, respectively.
- */
-#if (VA_BITS < 48)
-#define IDMAP_PGD_ORDER (VA_BITS - PGDIR_SHIFT)
-#define EXTRA_SHIFT (PGDIR_SHIFT + PAGE_SHIFT - 3)
-
- /*
- * If VA_BITS < 48, we have to configure an additional table level.
- * First, we have to verify our assumption that the current value of
- * VA_BITS was chosen such that all translation levels are fully
- * utilised, and that lowering T0SZ will always result in an additional
- * translation level to be configured.
- */
-#if VA_BITS != EXTRA_SHIFT
-#error "Mismatch between VA_BITS and page size/number of translation levels"
-#endif
-#else
-#define IDMAP_PGD_ORDER (PHYS_MASK_SHIFT - PGDIR_SHIFT)
-#define EXTRA_SHIFT
- /*
- * If VA_BITS == 48, we don't have to configure an additional
- * translation level, but the top-level table has more entries.
- */
-#endif
- adrp x0, init_idmap_pg_dir
- adrp x3, _text
- adrp x6, _end + MAX_FDT_SIZE + SWAPPER_BLOCK_SIZE
- mov_q x7, SWAPPER_RX_MMUFLAGS
-
- map_memory x0, x1, x3, x6, x7, x3, IDMAP_PGD_ORDER, x10, x11, x12, x13, x14, EXTRA_SHIFT
-
- /* Remap the kernel page tables r/w in the ID map */
- adrp x1, _text
- adrp x2, init_pg_dir
- adrp x3, init_pg_end
- bic x4, x2, #SWAPPER_BLOCK_SIZE - 1
- mov_q x5, SWAPPER_RW_MMUFLAGS
- mov x6, #SWAPPER_BLOCK_SHIFT
- bl remap_region
-
- /* Remap the FDT after the kernel image */
- adrp x1, _text
- adrp x22, _end + SWAPPER_BLOCK_SIZE
- bic x2, x22, #SWAPPER_BLOCK_SIZE - 1
- bfi x22, x21, #0, #SWAPPER_BLOCK_SHIFT // remapped FDT address
- add x3, x2, #MAX_FDT_SIZE + SWAPPER_BLOCK_SIZE
- bic x4, x21, #SWAPPER_BLOCK_SIZE - 1
- mov_q x5, SWAPPER_RW_MMUFLAGS
- mov x6, #SWAPPER_BLOCK_SHIFT
- bl remap_region
-
-#if 0
- mov x7, (PMD_ATTRINDX(MT_DEVICE_nGnRnE) | SWAPPER_PMD_FLAGS)
- mov x3, #0xf0000000
- mov x4, #0x30000000
- mov x6, #0x31000000
- create_block_map x0, x7, x3, x4, x6
-#endif
-
- /*
- * Since the page tables have been populated with non-cacheable
- * accesses (MMU disabled), invalidate those tables again to
- * remove any speculatively loaded cache lines.
- */
- cbnz x19, 0f // skip cache invalidation if MMU is on
- dmb sy
-
- adrp x0, init_idmap_pg_dir
- adrp x1, init_idmap_pg_end
- bl dcache_inval_poc
-0: ret x28
-SYM_FUNC_END(create_idmap)
-
-SYM_FUNC_START_LOCAL(create_kernel_mapping)
- adrp x0, init_pg_dir
- mov_q x5, KIMAGE_VADDR // compile time __va(_text)
-#ifdef CONFIG_RELOCATABLE
- add x5, x5, x23 // add KASLR displacement
-#endif
- adrp x6, _end // runtime __pa(_end)
- adrp x3, _text // runtime __pa(_text)
- sub x6, x6, x3 // _end - _text
- add x6, x6, x5 // runtime __va(_end)
- mov_q x7, SWAPPER_RW_MMUFLAGS
-
- map_memory x0, x1, x5, x6, x7, x3, (VA_BITS - PGDIR_SHIFT), x10, x11, x12, x13, x14
-
- dsb ishst // sync with page table walker
- ret
-SYM_FUNC_END(create_kernel_mapping)
-
/*
* Initialize CPU registers with task-specific and cpu-specific context.
*
@@ -581,6 +200,8 @@ SYM_FUNC_END(create_kernel_mapping)
sub sp, sp, #PT_REGS_SIZE
stp xzr, xzr, [sp, #S_STACKFRAME]
+ mov \tmp1, #FRAME_META_TYPE_FINAL
+ str \tmp1, [sp, #S_STACKFRAME_TYPE]
add x29, sp, #S_STACKFRAME
scs_load_current
@@ -616,34 +237,9 @@ SYM_FUNC_START_LOCAL(__primary_switched)
mov x0, x20
bl set_cpu_boot_mode_flag
- // Clear BSS
- adr_l x0, __bss_start
- mov x1, xzr
- adr_l x2, __bss_stop
- sub x2, x2, x0
- bl __pi_memset
- dsb ishst // Make zero page visible to PTW
-
-#if VA_BITS > 48
- adr_l x8, vabits_actual // Set this early so KASAN early init
- str x25, [x8] // ... observes the correct value
- dc civac, x8 // Make visible to booting secondaries
-#endif
-
-#ifdef CONFIG_RANDOMIZE_BASE
- adrp x5, memstart_offset_seed // Save KASLR linear map seed
- strh w24, [x5, :lo12:memstart_offset_seed]
-#endif
#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
bl kasan_early_init
#endif
- mov x0, x21 // pass FDT address in x0
- bl early_fdt_map // Try mapping the FDT early
- mov x0, x20 // pass the full boot status
- bl init_feature_override // Parse cpu feature overrides
-#ifdef CONFIG_UNWIND_PATCH_PAC_INTO_SCS
- bl scs_patch_vmlinux
-#endif
mov x0, x20
bl finalise_el2 // Prefer VHE if possible
ldp x29, x30, [sp], #16
@@ -696,11 +292,14 @@ SYM_INNER_LABEL(init_el2, SYM_L_LOCAL)
adr_l x1, __hyp_text_end
adr_l x2, dcache_clean_poc
blr x2
-0:
- mov_q x0, HCR_HOST_NVHE_FLAGS
- msr hcr_el2, x0
+
+ mov_q x0, INIT_SCTLR_EL2_MMU_OFF
+ pre_disable_mmu_workaround
+ msr sctlr_el2, x0
isb
+0:
+ init_el2_hcr HCR_HOST_NVHE_FLAGS
init_el2_state
/* Hypervisor stub */
@@ -710,27 +309,21 @@ SYM_INNER_LABEL(init_el2, SYM_L_LOCAL)
mov_q x1, INIT_SCTLR_EL1_MMU_OFF
- /*
- * Fruity CPUs seem to have HCR_EL2.E2H set to RES1,
- * making it impossible to start in nVHE mode. Is that
- * compliant with the architecture? Absolutely not!
- */
mrs x0, hcr_el2
and x0, x0, #HCR_E2H
- cbz x0, 1f
+ cbz x0, 2f
/* Set a sane SCTLR_EL1, the VHE way */
- pre_disable_mmu_workaround
msr_s SYS_SCTLR_EL12, x1
mov x2, #BOOT_CPU_FLAG_E2H
- b 2f
+ b 3f
-1:
- pre_disable_mmu_workaround
+2:
msr sctlr_el1, x1
mov x2, xzr
-2:
- __init_el2_nvhe_prepare_eret
+3:
+ mov x0, #INIT_PSTATE_EL1
+ msr spsr_el2, x0
mov w0, #BOOT_CPU_MODE_EL2
orr x0, x0, x2
@@ -770,10 +363,13 @@ SYM_FUNC_START_LOCAL(secondary_startup)
* Common entry point for secondary CPUs.
*/
mov x20, x0 // preserve boot mode
+
+#ifdef CONFIG_ARM64_VA_BITS_52
+alternative_if ARM64_HAS_VA52
bl __cpu_secondary_check52bitva
-#if VA_BITS > 48
- ldr_l x0, vabits_actual
+alternative_else_nop_endif
#endif
+
bl __cpu_setup // initialise processor
adrp x1, swapper_pg_dir
adrp x2, idmap_pg_dir
@@ -876,15 +472,18 @@ SYM_FUNC_START(__enable_mmu)
ret
SYM_FUNC_END(__enable_mmu)
+#ifdef CONFIG_ARM64_VA_BITS_52
SYM_FUNC_START(__cpu_secondary_check52bitva)
-#if VA_BITS > 48
- ldr_l x0, vabits_actual
- cmp x0, #52
- b.ne 2f
-
+#ifndef CONFIG_ARM64_LPA2
mrs_s x0, SYS_ID_AA64MMFR2_EL1
and x0, x0, ID_AA64MMFR2_EL1_VARange_MASK
cbnz x0, 2f
+#else
+ mrs x0, id_aa64mmfr0_el1
+ sbfx x0, x0, #ID_AA64MMFR0_EL1_TGRAN_SHIFT, 4
+ cmp x0, #ID_AA64MMFR0_EL1_TGRAN_LPA2
+ b.ge 2f
+#endif
update_early_cpu_boot_status \
CPU_STUCK_IN_KERNEL | CPU_STUCK_REASON_52_BIT_VA, x0, x1
@@ -892,9 +491,9 @@ SYM_FUNC_START(__cpu_secondary_check52bitva)
wfi
b 1b
-#endif
2: ret
SYM_FUNC_END(__cpu_secondary_check52bitva)
+#endif
SYM_FUNC_START_LOCAL(__no_granule_support)
/* Indicate that this CPU can't boot and is stuck in the kernel */
@@ -906,123 +505,18 @@ SYM_FUNC_START_LOCAL(__no_granule_support)
b 1b
SYM_FUNC_END(__no_granule_support)
-#ifdef CONFIG_RELOCATABLE
-SYM_FUNC_START_LOCAL(__relocate_kernel)
- /*
- * Iterate over each entry in the relocation table, and apply the
- * relocations in place.
- */
- adr_l x9, __rela_start
- adr_l x10, __rela_end
- mov_q x11, KIMAGE_VADDR // default virtual offset
- add x11, x11, x23 // actual virtual offset
-
-0: cmp x9, x10
- b.hs 1f
- ldp x12, x13, [x9], #24
- ldr x14, [x9, #-8]
- cmp w13, #R_AARCH64_RELATIVE
- b.ne 0b
- add x14, x14, x23 // relocate
- str x14, [x12, x23]
- b 0b
-
-1:
-#ifdef CONFIG_RELR
- /*
- * Apply RELR relocations.
- *
- * RELR is a compressed format for storing relative relocations. The
- * encoded sequence of entries looks like:
- * [ AAAAAAAA BBBBBBB1 BBBBBBB1 ... AAAAAAAA BBBBBB1 ... ]
- *
- * i.e. start with an address, followed by any number of bitmaps. The
- * address entry encodes 1 relocation. The subsequent bitmap entries
- * encode up to 63 relocations each, at subsequent offsets following
- * the last address entry.
- *
- * The bitmap entries must have 1 in the least significant bit. The
- * assumption here is that an address cannot have 1 in lsb. Odd
- * addresses are not supported. Any odd addresses are stored in the RELA
- * section, which is handled above.
- *
- * Excluding the least significant bit in the bitmap, each non-zero
- * bit in the bitmap represents a relocation to be applied to
- * a corresponding machine word that follows the base address
- * word. The second least significant bit represents the machine
- * word immediately following the initial address, and each bit
- * that follows represents the next word, in linear order. As such,
- * a single bitmap can encode up to 63 relocations in a 64-bit object.
- *
- * In this implementation we store the address of the next RELR table
- * entry in x9, the address being relocated by the current address or
- * bitmap entry in x13 and the address being relocated by the current
- * bit in x14.
- */
- adr_l x9, __relr_start
- adr_l x10, __relr_end
-
-2: cmp x9, x10
- b.hs 7f
- ldr x11, [x9], #8
- tbnz x11, #0, 3f // branch to handle bitmaps
- add x13, x11, x23
- ldr x12, [x13] // relocate address entry
- add x12, x12, x23
- str x12, [x13], #8 // adjust to start of bitmap
- b 2b
-
-3: mov x14, x13
-4: lsr x11, x11, #1
- cbz x11, 6f
- tbz x11, #0, 5f // skip bit if not set
- ldr x12, [x14] // relocate bit
- add x12, x12, x23
- str x12, [x14]
-
-5: add x14, x14, #8 // move to next bit's address
- b 4b
-
-6: /*
- * Move to the next bitmap's address. 8 is the word size, and 63 is the
- * number of significant bits in a bitmap entry.
- */
- add x13, x13, #(8 * 63)
- b 2b
-
-7:
-#endif
- ret
-
-SYM_FUNC_END(__relocate_kernel)
-#endif
-
SYM_FUNC_START_LOCAL(__primary_switch)
adrp x1, reserved_pg_dir
adrp x2, init_idmap_pg_dir
bl __enable_mmu
-#ifdef CONFIG_RELOCATABLE
- adrp x23, KERNEL_START
- and x23, x23, MIN_KIMG_ALIGN - 1
-#ifdef CONFIG_RANDOMIZE_BASE
- mov x0, x22
- adrp x1, init_pg_end
+
+ adrp x1, early_init_stack
mov sp, x1
mov x29, xzr
- bl __pi_kaslr_early_init
- and x24, x0, #SZ_2M - 1 // capture memstart offset seed
- bic x0, x0, #SZ_2M - 1
- orr x23, x23, x0 // record kernel offset
-#endif
-#endif
- bl clear_page_tables
- bl create_kernel_mapping
+ mov x0, x20 // pass the full boot status
+ mov x1, x21 // pass the FDT
+ bl __pi_early_map_kernel // Map and relocate the kernel
- adrp x1, init_pg_dir
- load_ttbr1 x1, x1, x2
-#ifdef CONFIG_RELOCATABLE
- bl __relocate_kernel
-#endif
ldr x8, =__primary_switched
adrp x0, KERNEL_START // __pa(KERNEL_START)
br x8
diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c
index 02870beb271e..18749e9a6c2d 100644
--- a/arch/arm64/kernel/hibernate.c
+++ b/arch/arm64/kernel/hibernate.c
@@ -266,9 +266,15 @@ static int swsusp_mte_save_tags(void)
max_zone_pfn = zone_end_pfn(zone);
for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) {
struct page *page = pfn_to_online_page(pfn);
+ struct folio *folio;
if (!page)
continue;
+ folio = page_folio(page);
+
+ if (folio_test_hugetlb(folio) &&
+ !folio_test_hugetlb_mte_tagged(folio))
+ continue;
if (!page_mte_tagged(page))
continue;
@@ -407,7 +413,7 @@ int swsusp_arch_resume(void)
void *, phys_addr_t, phys_addr_t);
struct trans_pgd_info trans_info = {
.trans_alloc_page = hibernate_page_alloc,
- .trans_alloc_arg = (void *)GFP_ATOMIC,
+ .trans_alloc_arg = (__force void *)GFP_ATOMIC,
};
/*
diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c
index 35225632d70a..722ac45f9f7b 100644
--- a/arch/arm64/kernel/hw_breakpoint.c
+++ b/arch/arm64/kernel/hw_breakpoint.c
@@ -21,6 +21,7 @@
#include <asm/current.h>
#include <asm/debug-monitors.h>
+#include <asm/esr.h>
#include <asm/hw_breakpoint.h>
#include <asm/traps.h>
#include <asm/cputype.h>
@@ -654,7 +655,7 @@ static int breakpoint_handler(unsigned long unused, unsigned long esr,
perf_bp_event(bp, regs);
/* Do we need to handle the stepping? */
- if (uses_default_overflow_handler(bp))
+ if (is_default_overflow_handler(bp))
step = 1;
unlock:
rcu_read_unlock();
@@ -733,7 +734,7 @@ static u64 get_distance_from_watchpoint(unsigned long addr, u64 val,
static int watchpoint_report(struct perf_event *wp, unsigned long addr,
struct pt_regs *regs)
{
- int step = uses_default_overflow_handler(wp);
+ int step = is_default_overflow_handler(wp);
struct arch_hw_breakpoint *info = counter_arch_bp(wp);
info->trigger = addr;
@@ -779,7 +780,7 @@ static int watchpoint_handler(unsigned long addr, unsigned long esr,
* Check that the access type matches.
* 0 => load, otherwise => store
*/
- access = (esr & AARCH64_ESR_ACCESS_MASK) ? HW_BREAKPOINT_W :
+ access = (esr & ESR_ELx_WNR) ? HW_BREAKPOINT_W :
HW_BREAKPOINT_R;
if (!(access & hw_breakpoint_type(wp)))
continue;
diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S
index 65f76064c86b..ae990da1eae5 100644
--- a/arch/arm64/kernel/hyp-stub.S
+++ b/arch/arm64/kernel/hyp-stub.S
@@ -114,8 +114,8 @@ SYM_CODE_START_LOCAL(__finalise_el2)
// Use EL2 translations for SPE & TRBE and disable access from EL1
mrs x0, mdcr_el2
- bic x0, x0, #(MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT)
- bic x0, x0, #(MDCR_EL2_E2TB_MASK << MDCR_EL2_E2TB_SHIFT)
+ bic x0, x0, #MDCR_EL2_E2PB_MASK
+ bic x0, x0, #MDCR_EL2_E2TB_MASK
msr mdcr_el2, x0
// Transfer the MM state from EL1 to EL2
diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h
index 5e4dc72ab1bd..ef3a69cc398e 100644
--- a/arch/arm64/kernel/image-vars.h
+++ b/arch/arm64/kernel/image-vars.h
@@ -36,6 +36,41 @@ PROVIDE(__pi___memcpy = __pi_memcpy);
PROVIDE(__pi___memmove = __pi_memmove);
PROVIDE(__pi___memset = __pi_memset);
+PROVIDE(__pi_id_aa64isar1_override = id_aa64isar1_override);
+PROVIDE(__pi_id_aa64isar2_override = id_aa64isar2_override);
+PROVIDE(__pi_id_aa64mmfr0_override = id_aa64mmfr0_override);
+PROVIDE(__pi_id_aa64mmfr1_override = id_aa64mmfr1_override);
+PROVIDE(__pi_id_aa64mmfr2_override = id_aa64mmfr2_override);
+PROVIDE(__pi_id_aa64pfr0_override = id_aa64pfr0_override);
+PROVIDE(__pi_id_aa64pfr1_override = id_aa64pfr1_override);
+PROVIDE(__pi_id_aa64smfr0_override = id_aa64smfr0_override);
+PROVIDE(__pi_id_aa64zfr0_override = id_aa64zfr0_override);
+PROVIDE(__pi_arm64_sw_feature_override = arm64_sw_feature_override);
+PROVIDE(__pi_arm64_use_ng_mappings = arm64_use_ng_mappings);
+#ifdef CONFIG_CAVIUM_ERRATUM_27456
+PROVIDE(__pi_cavium_erratum_27456_cpus = cavium_erratum_27456_cpus);
+#endif
+PROVIDE(__pi__ctype = _ctype);
+PROVIDE(__pi_memstart_offset_seed = memstart_offset_seed);
+
+PROVIDE(__pi_init_idmap_pg_dir = init_idmap_pg_dir);
+PROVIDE(__pi_init_idmap_pg_end = init_idmap_pg_end);
+PROVIDE(__pi_init_pg_dir = init_pg_dir);
+PROVIDE(__pi_init_pg_end = init_pg_end);
+PROVIDE(__pi_swapper_pg_dir = swapper_pg_dir);
+
+PROVIDE(__pi__text = _text);
+PROVIDE(__pi__stext = _stext);
+PROVIDE(__pi__etext = _etext);
+PROVIDE(__pi___start_rodata = __start_rodata);
+PROVIDE(__pi___inittext_begin = __inittext_begin);
+PROVIDE(__pi___inittext_end = __inittext_end);
+PROVIDE(__pi___initdata_begin = __initdata_begin);
+PROVIDE(__pi___initdata_end = __initdata_end);
+PROVIDE(__pi__data = _data);
+PROVIDE(__pi___bss_start = __bss_start);
+PROVIDE(__pi__end = _end);
+
#ifdef CONFIG_KVM
/*
@@ -70,10 +105,8 @@ KVM_NVHE_ALIAS(__hyp_stub_vectors);
KVM_NVHE_ALIAS(vgic_v2_cpuif_trap);
KVM_NVHE_ALIAS(vgic_v3_cpuif_trap);
-#ifdef CONFIG_ARM64_PSEUDO_NMI
-/* Static key checked in GIC_PRIO_IRQOFF. */
-KVM_NVHE_ALIAS(gic_nonsecure_priorities);
-#endif
+/* Static key which is set if CNTVOFF_EL2 is unusable */
+KVM_NVHE_ALIAS(broken_cntvoff_key);
/* EL2 exception handling */
KVM_NVHE_ALIAS(__start___kvm_ex_table);
diff --git a/arch/arm64/kernel/io.c b/arch/arm64/kernel/io.c
index aa7a4ec6a3ae..fe86ada23c7d 100644
--- a/arch/arm64/kernel/io.c
+++ b/arch/arm64/kernel/io.c
@@ -10,88 +10,43 @@
#include <linux/io.h>
/*
- * Copy data from IO memory space to "real" memory space.
+ * This generates a memcpy that works on a from/to address which is aligned to
+ * bits. Count is in terms of the number of bits sized quantities to copy. It
+ * optimizes to use the STR groupings when possible so that it is WC friendly.
*/
-void __memcpy_fromio(void *to, const volatile void __iomem *from, size_t count)
+#define memcpy_toio_aligned(to, from, count, bits) \
+ ({ \
+ volatile u##bits __iomem *_to = to; \
+ const u##bits *_from = from; \
+ size_t _count = count; \
+ const u##bits *_end_from = _from + ALIGN_DOWN(_count, 8); \
+ \
+ for (; _from < _end_from; _from += 8, _to += 8) \
+ __const_memcpy_toio_aligned##bits(_to, _from, 8); \
+ if ((_count % 8) >= 4) { \
+ __const_memcpy_toio_aligned##bits(_to, _from, 4); \
+ _from += 4; \
+ _to += 4; \
+ } \
+ if ((_count % 4) >= 2) { \
+ __const_memcpy_toio_aligned##bits(_to, _from, 2); \
+ _from += 2; \
+ _to += 2; \
+ } \
+ if (_count % 2) \
+ __const_memcpy_toio_aligned##bits(_to, _from, 1); \
+ })
+
+void __iowrite64_copy_full(void __iomem *to, const void *from, size_t count)
{
- while (count && !IS_ALIGNED((unsigned long)from, 8)) {
- *(u8 *)to = __raw_readb(from);
- from++;
- to++;
- count--;
- }
-
- while (count >= 8) {
- *(u64 *)to = __raw_readq(from);
- from += 8;
- to += 8;
- count -= 8;
- }
-
- while (count) {
- *(u8 *)to = __raw_readb(from);
- from++;
- to++;
- count--;
- }
+ memcpy_toio_aligned(to, from, count, 64);
+ dgh();
}
-EXPORT_SYMBOL(__memcpy_fromio);
+EXPORT_SYMBOL(__iowrite64_copy_full);
-/*
- * Copy data from "real" memory space to IO memory space.
- */
-void __memcpy_toio(volatile void __iomem *to, const void *from, size_t count)
+void __iowrite32_copy_full(void __iomem *to, const void *from, size_t count)
{
- while (count && !IS_ALIGNED((unsigned long)to, 8)) {
- __raw_writeb(*(u8 *)from, to);
- from++;
- to++;
- count--;
- }
-
- while (count >= 8) {
- __raw_writeq(*(u64 *)from, to);
- from += 8;
- to += 8;
- count -= 8;
- }
-
- while (count) {
- __raw_writeb(*(u8 *)from, to);
- from++;
- to++;
- count--;
- }
-}
-EXPORT_SYMBOL(__memcpy_toio);
-
-/*
- * "memset" on IO memory space.
- */
-void __memset_io(volatile void __iomem *dst, int c, size_t count)
-{
- u64 qc = (u8)c;
-
- qc |= qc << 8;
- qc |= qc << 16;
- qc |= qc << 32;
-
- while (count && !IS_ALIGNED((unsigned long)dst, 8)) {
- __raw_writeb(c, dst);
- dst++;
- count--;
- }
-
- while (count >= 8) {
- __raw_writeq(qc, dst);
- dst += 8;
- count -= 8;
- }
-
- while (count) {
- __raw_writeb(c, dst);
- dst++;
- count--;
- }
+ memcpy_toio_aligned(to, from, count, 32);
+ dgh();
}
-EXPORT_SYMBOL(__memset_io);
+EXPORT_SYMBOL(__iowrite32_copy_full);
diff --git a/arch/arm64/kernel/jump_label.c b/arch/arm64/kernel/jump_label.c
index faf88ec9c48e..b345425193d2 100644
--- a/arch/arm64/kernel/jump_label.c
+++ b/arch/arm64/kernel/jump_label.c
@@ -7,11 +7,12 @@
*/
#include <linux/kernel.h>
#include <linux/jump_label.h>
+#include <linux/smp.h>
#include <asm/insn.h>
-#include <asm/patching.h>
+#include <asm/text-patching.h>
-void arch_jump_label_transform(struct jump_entry *entry,
- enum jump_label_type type)
+bool arch_jump_label_transform_queue(struct jump_entry *entry,
+ enum jump_label_type type)
{
void *addr = (void *)jump_entry_code(entry);
u32 insn;
@@ -25,4 +26,10 @@ void arch_jump_label_transform(struct jump_entry *entry,
}
aarch64_insn_patch_text_nosync(addr, insn);
+ return true;
+}
+
+void arch_jump_label_transform_apply(void)
+{
+ kick_all_cpus_sync();
}
diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c
index 12c7f3c8ba76..1da3e25f9d9e 100644
--- a/arch/arm64/kernel/kaslr.c
+++ b/arch/arm64/kernel/kaslr.c
@@ -16,9 +16,7 @@ bool __ro_after_init __kaslr_is_enabled = false;
void __init kaslr_init(void)
{
- if (cpuid_feature_extract_unsigned_field(arm64_sw_feature_override.val &
- arm64_sw_feature_override.mask,
- ARM64_SW_FEATURE_OVERRIDE_NOKASLR)) {
+ if (kaslr_disabled_cmdline()) {
pr_info("KASLR disabled on command line\n");
return;
}
diff --git a/arch/arm64/kernel/kgdb.c b/arch/arm64/kernel/kgdb.c
index 4e1f983df3d1..f3c4d3a8a20f 100644
--- a/arch/arm64/kernel/kgdb.c
+++ b/arch/arm64/kernel/kgdb.c
@@ -17,7 +17,7 @@
#include <asm/debug-monitors.h>
#include <asm/insn.h>
-#include <asm/patching.h>
+#include <asm/text-patching.h>
#include <asm/traps.h>
struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = {
diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c
index b38aae5b488d..6f121a0164a4 100644
--- a/arch/arm64/kernel/machine_kexec.c
+++ b/arch/arm64/kernel/machine_kexec.c
@@ -207,37 +207,6 @@ void machine_kexec(struct kimage *kimage)
BUG(); /* Should never get here. */
}
-static void machine_kexec_mask_interrupts(void)
-{
- unsigned int i;
- struct irq_desc *desc;
-
- for_each_irq_desc(i, desc) {
- struct irq_chip *chip;
- int ret;
-
- chip = irq_desc_get_chip(desc);
- if (!chip)
- continue;
-
- /*
- * First try to remove the active state. If this
- * fails, try to EOI the interrupt.
- */
- ret = irq_set_irqchip_state(i, IRQCHIP_STATE_ACTIVE, false);
-
- if (ret && irqd_irq_inprogress(&desc->irq_data) &&
- chip->irq_eoi)
- chip->irq_eoi(&desc->irq_data);
-
- if (chip->irq_mask)
- chip->irq_mask(&desc->irq_data);
-
- if (chip->irq_disable && !irqd_irq_disabled(&desc->irq_data))
- chip->irq_disable(&desc->irq_data);
- }
-}
-
/**
* machine_crash_shutdown - shutdown non-crashing cpus and save registers
*/
@@ -255,7 +224,7 @@ void machine_crash_shutdown(struct pt_regs *regs)
pr_info("Starting crashdump kernel...\n");
}
-#ifdef CONFIG_HIBERNATION
+#if defined(CONFIG_CRASH_DUMP) && defined(CONFIG_HIBERNATION)
/*
* To preserve the crash dump kernel image, the relevant memory segments
* should be mapped again around the hibernation.
diff --git a/arch/arm64/kernel/machine_kexec_file.c b/arch/arm64/kernel/machine_kexec_file.c
index 0e017358f4ba..af1ca875c52c 100644
--- a/arch/arm64/kernel/machine_kexec_file.c
+++ b/arch/arm64/kernel/machine_kexec_file.c
@@ -39,6 +39,7 @@ int arch_kimage_file_post_load_cleanup(struct kimage *image)
return kexec_image_post_load_cleanup_default(image);
}
+#ifdef CONFIG_CRASH_DUMP
static int prepare_elf_headers(void **addr, unsigned long *sz)
{
struct crash_mem *cmem;
@@ -80,6 +81,7 @@ out:
kfree(cmem);
return ret;
}
+#endif
/*
* Tries to add the initrd and DTB to the image. If it is not possible to find
@@ -93,8 +95,8 @@ int load_other_segments(struct kimage *image,
char *cmdline)
{
struct kexec_buf kbuf;
- void *headers, *dtb = NULL;
- unsigned long headers_sz, initrd_load_addr = 0, dtb_len,
+ void *dtb = NULL;
+ unsigned long initrd_load_addr = 0, dtb_len,
orig_segments = image->nr_segments;
int ret = 0;
@@ -102,7 +104,10 @@ int load_other_segments(struct kimage *image,
/* not allocate anything below the kernel */
kbuf.buf_min = kernel_load_addr + kernel_size;
+#ifdef CONFIG_CRASH_DUMP
/* load elf core header */
+ void *headers;
+ unsigned long headers_sz;
if (image->type == KEXEC_TYPE_CRASH) {
ret = prepare_elf_headers(&headers, &headers_sz);
if (ret) {
@@ -130,6 +135,7 @@ int load_other_segments(struct kimage *image,
kexec_dprintk("Loaded elf core header at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
image->elf_load_addr, kbuf.bufsz, kbuf.memsz);
}
+#endif
/* load initrd */
if (initrd) {
diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c
index dd851297596e..06bb680bfe97 100644
--- a/arch/arm64/kernel/module.c
+++ b/arch/arm64/kernel/module.c
@@ -12,144 +12,18 @@
#include <linux/bitops.h>
#include <linux/elf.h>
#include <linux/ftrace.h>
-#include <linux/gfp.h>
#include <linux/kasan.h>
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/moduleloader.h>
#include <linux/random.h>
#include <linux/scs.h>
-#include <linux/vmalloc.h>
#include <asm/alternative.h>
#include <asm/insn.h>
#include <asm/scs.h>
#include <asm/sections.h>
-static u64 module_direct_base __ro_after_init = 0;
-static u64 module_plt_base __ro_after_init = 0;
-
-/*
- * Choose a random page-aligned base address for a window of 'size' bytes which
- * entirely contains the interval [start, end - 1].
- */
-static u64 __init random_bounding_box(u64 size, u64 start, u64 end)
-{
- u64 max_pgoff, pgoff;
-
- if ((end - start) >= size)
- return 0;
-
- max_pgoff = (size - (end - start)) / PAGE_SIZE;
- pgoff = get_random_u32_inclusive(0, max_pgoff);
-
- return start - pgoff * PAGE_SIZE;
-}
-
-/*
- * Modules may directly reference data and text anywhere within the kernel
- * image and other modules. References using PREL32 relocations have a +/-2G
- * range, and so we need to ensure that the entire kernel image and all modules
- * fall within a 2G window such that these are always within range.
- *
- * Modules may directly branch to functions and code within the kernel text,
- * and to functions and code within other modules. These branches will use
- * CALL26/JUMP26 relocations with a +/-128M range. Without PLTs, we must ensure
- * that the entire kernel text and all module text falls within a 128M window
- * such that these are always within range. With PLTs, we can expand this to a
- * 2G window.
- *
- * We chose the 128M region to surround the entire kernel image (rather than
- * just the text) as using the same bounds for the 128M and 2G regions ensures
- * by construction that we never select a 128M region that is not a subset of
- * the 2G region. For very large and unusual kernel configurations this means
- * we may fall back to PLTs where they could have been avoided, but this keeps
- * the logic significantly simpler.
- */
-static int __init module_init_limits(void)
-{
- u64 kernel_end = (u64)_end;
- u64 kernel_start = (u64)_text;
- u64 kernel_size = kernel_end - kernel_start;
-
- /*
- * The default modules region is placed immediately below the kernel
- * image, and is large enough to use the full 2G relocation range.
- */
- BUILD_BUG_ON(KIMAGE_VADDR != MODULES_END);
- BUILD_BUG_ON(MODULES_VSIZE < SZ_2G);
-
- if (!kaslr_enabled()) {
- if (kernel_size < SZ_128M)
- module_direct_base = kernel_end - SZ_128M;
- if (kernel_size < SZ_2G)
- module_plt_base = kernel_end - SZ_2G;
- } else {
- u64 min = kernel_start;
- u64 max = kernel_end;
-
- if (IS_ENABLED(CONFIG_RANDOMIZE_MODULE_REGION_FULL)) {
- pr_info("2G module region forced by RANDOMIZE_MODULE_REGION_FULL\n");
- } else {
- module_direct_base = random_bounding_box(SZ_128M, min, max);
- if (module_direct_base) {
- min = module_direct_base;
- max = module_direct_base + SZ_128M;
- }
- }
-
- module_plt_base = random_bounding_box(SZ_2G, min, max);
- }
-
- pr_info("%llu pages in range for non-PLT usage",
- module_direct_base ? (SZ_128M - kernel_size) / PAGE_SIZE : 0);
- pr_info("%llu pages in range for PLT usage",
- module_plt_base ? (SZ_2G - kernel_size) / PAGE_SIZE : 0);
-
- return 0;
-}
-subsys_initcall(module_init_limits);
-
-void *module_alloc(unsigned long size)
-{
- void *p = NULL;
-
- /*
- * Where possible, prefer to allocate within direct branch range of the
- * kernel such that no PLTs are necessary.
- */
- if (module_direct_base) {
- p = __vmalloc_node_range(size, MODULE_ALIGN,
- module_direct_base,
- module_direct_base + SZ_128M,
- GFP_KERNEL | __GFP_NOWARN,
- PAGE_KERNEL, 0, NUMA_NO_NODE,
- __builtin_return_address(0));
- }
-
- if (!p && module_plt_base) {
- p = __vmalloc_node_range(size, MODULE_ALIGN,
- module_plt_base,
- module_plt_base + SZ_2G,
- GFP_KERNEL | __GFP_NOWARN,
- PAGE_KERNEL, 0, NUMA_NO_NODE,
- __builtin_return_address(0));
- }
-
- if (!p) {
- pr_warn_ratelimited("%s: unable to allocate memory\n",
- __func__);
- }
-
- if (p && (kasan_alloc_module_shadow(p, size, GFP_KERNEL) < 0)) {
- vfree(p);
- return NULL;
- }
-
- /* Memory is intended to be executable, reset the pointer tag. */
- return kasan_reset_tag(p);
-}
-
enum aarch64_reloc_op {
RELOC_OP_NONE,
RELOC_OP_ABS,
@@ -588,14 +462,20 @@ int module_finalize(const Elf_Ehdr *hdr,
struct module *me)
{
const Elf_Shdr *s;
+ int ret;
+
s = find_section(hdr, sechdrs, ".altinstructions");
if (s)
apply_alternatives_module((void *)s->sh_addr, s->sh_size);
if (scs_is_dynamic()) {
s = find_section(hdr, sechdrs, ".init.eh_frame");
- if (s)
- scs_patch((void *)s->sh_addr, s->sh_size);
+ if (s) {
+ ret = __pi_scs_patch((void *)s->sh_addr, s->sh_size);
+ if (ret)
+ pr_err("module %s: error occurred during dynamic SCS patching (%d)\n",
+ me->name, ret);
+ }
}
return module_init_ftrace_plt(hdr, sechdrs, me);
diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c
index a41ef3213e1e..2fbfd27ff5f2 100644
--- a/arch/arm64/kernel/mte.c
+++ b/arch/arm64/kernel/mte.c
@@ -38,7 +38,24 @@ EXPORT_SYMBOL_GPL(mte_async_or_asymm_mode);
void mte_sync_tags(pte_t pte, unsigned int nr_pages)
{
struct page *page = pte_page(pte);
- unsigned int i;
+ struct folio *folio = page_folio(page);
+ unsigned long i;
+
+ if (folio_test_hugetlb(folio)) {
+ unsigned long nr = folio_nr_pages(folio);
+
+ /* Hugetlb MTE flags are set for head page only */
+ if (folio_try_hugetlb_mte_tagging(folio)) {
+ for (i = 0; i < nr; i++, page++)
+ mte_clear_page_tags(page_address(page));
+ folio_set_hugetlb_mte_tagged(folio);
+ }
+
+ /* ensure the tags are visible before the PTE is set */
+ smp_wmb();
+
+ return;
+ }
/* if PG_mte_tagged is set, tags have already been initialised */
for (i = 0; i < nr_pages; i++, page++) {
@@ -67,7 +84,7 @@ int memcmp_pages(struct page *page1, struct page *page2)
/*
* If the page content is identical but at least one of the pages is
* tagged, return non-zero to avoid KSM merging. If only one of the
- * pages is tagged, set_pte_at() may zero or change the tags of the
+ * pages is tagged, __set_ptes() may zero or change the tags of the
* other page via mte_sync_tags().
*/
if (page_mte_tagged(page1) || page_mte_tagged(page2))
@@ -410,6 +427,7 @@ static int __access_remote_tags(struct mm_struct *mm, unsigned long addr,
void *maddr;
struct page *page = get_user_page_vma_remote(mm, addr,
gup_flags, &vma);
+ struct folio *folio;
if (IS_ERR(page)) {
err = PTR_ERR(page);
@@ -428,7 +446,12 @@ static int __access_remote_tags(struct mm_struct *mm, unsigned long addr,
put_page(page);
break;
}
- WARN_ON_ONCE(!page_mte_tagged(page));
+
+ folio = page_folio(page);
+ if (folio_test_hugetlb(folio))
+ WARN_ON_ONCE(!folio_test_hugetlb_mte_tagged(folio));
+ else
+ WARN_ON_ONCE(!page_mte_tagged(page));
/* limit access to the end of the page */
offset = offset_in_page(addr);
@@ -582,12 +605,9 @@ subsys_initcall(register_mte_tcf_preferred_sysctl);
size_t mte_probe_user_range(const char __user *uaddr, size_t size)
{
const char __user *end = uaddr + size;
- int err = 0;
char val;
- __raw_get_user(val, uaddr, err);
- if (err)
- return size;
+ __raw_get_user(val, uaddr, efault);
uaddr = PTR_ALIGN(uaddr, MTE_GRANULE_SIZE);
while (uaddr < end) {
@@ -595,12 +615,13 @@ size_t mte_probe_user_range(const char __user *uaddr, size_t size)
* A read is sufficient for mte, the caller should have probed
* for the pte write permission if required.
*/
- __raw_get_user(val, uaddr, err);
- if (err)
- return end - uaddr;
+ __raw_get_user(val, uaddr, efault);
uaddr += MTE_GRANULE_SIZE;
}
(void)val;
return 0;
+
+efault:
+ return end - uaddr;
}
diff --git a/arch/arm64/kernel/patching.c b/arch/arm64/kernel/patching.c
index b4835f6d594b..1041bc67a3ee 100644
--- a/arch/arm64/kernel/patching.c
+++ b/arch/arm64/kernel/patching.c
@@ -10,7 +10,7 @@
#include <asm/fixmap.h>
#include <asm/insn.h>
#include <asm/kprobes.h>
-#include <asm/patching.h>
+#include <asm/text-patching.h>
#include <asm/sections.h>
static DEFINE_RAW_SPINLOCK(patch_lock);
@@ -30,20 +30,17 @@ static bool is_image_text(unsigned long addr)
static void __kprobes *patch_map(void *addr, int fixmap)
{
- unsigned long uintaddr = (uintptr_t) addr;
- bool image = is_image_text(uintaddr);
- struct page *page;
+ phys_addr_t phys;
- if (image)
- page = phys_to_page(__pa_symbol(addr));
- else if (IS_ENABLED(CONFIG_STRICT_MODULE_RWX))
- page = vmalloc_to_page(addr);
- else
- return addr;
+ if (is_image_text((unsigned long)addr)) {
+ phys = __pa_symbol(addr);
+ } else {
+ struct page *page = vmalloc_to_page(addr);
+ BUG_ON(!page);
+ phys = page_to_phys(page) + offset_in_page(addr);
+ }
- BUG_ON(!page);
- return (void *)set_fixmap_offset(fixmap, page_to_phys(page) +
- (uintaddr & ~PAGE_MASK));
+ return (void *)set_fixmap_offset(fixmap, phys);
}
static void __kprobes patch_unmap(int fixmap)
@@ -105,6 +102,81 @@ noinstr int aarch64_insn_write_literal_u64(void *addr, u64 val)
return ret;
}
+typedef void text_poke_f(void *dst, void *src, size_t patched, size_t len);
+
+static void *__text_poke(text_poke_f func, void *addr, void *src, size_t len)
+{
+ unsigned long flags;
+ size_t patched = 0;
+ size_t size;
+ void *waddr;
+ void *ptr;
+
+ raw_spin_lock_irqsave(&patch_lock, flags);
+
+ while (patched < len) {
+ ptr = addr + patched;
+ size = min_t(size_t, PAGE_SIZE - offset_in_page(ptr),
+ len - patched);
+
+ waddr = patch_map(ptr, FIX_TEXT_POKE0);
+ func(waddr, src, patched, size);
+ patch_unmap(FIX_TEXT_POKE0);
+
+ patched += size;
+ }
+ raw_spin_unlock_irqrestore(&patch_lock, flags);
+
+ flush_icache_range((uintptr_t)addr, (uintptr_t)addr + len);
+
+ return addr;
+}
+
+static void text_poke_memcpy(void *dst, void *src, size_t patched, size_t len)
+{
+ copy_to_kernel_nofault(dst, src + patched, len);
+}
+
+static void text_poke_memset(void *dst, void *src, size_t patched, size_t len)
+{
+ u32 c = *(u32 *)src;
+
+ memset32(dst, c, len / 4);
+}
+
+/**
+ * aarch64_insn_copy - Copy instructions into (an unused part of) RX memory
+ * @dst: address to modify
+ * @src: source of the copy
+ * @len: length to copy
+ *
+ * Useful for JITs to dump new code blocks into unused regions of RX memory.
+ */
+noinstr void *aarch64_insn_copy(void *dst, void *src, size_t len)
+{
+ /* A64 instructions must be word aligned */
+ if ((uintptr_t)dst & 0x3)
+ return NULL;
+
+ return __text_poke(text_poke_memcpy, dst, src, len);
+}
+
+/**
+ * aarch64_insn_set - memset for RX memory regions.
+ * @dst: address to modify
+ * @insn: value to set
+ * @len: length of memory region.
+ *
+ * Useful for JITs to fill regions of RX memory with illegal instructions.
+ */
+noinstr void *aarch64_insn_set(void *dst, u32 insn, size_t len)
+{
+ if ((uintptr_t)dst & 0x3)
+ return NULL;
+
+ return __text_poke(text_poke_memset, dst, &insn, len);
+}
+
int __kprobes aarch64_insn_patch_text_nosync(void *addr, u32 insn)
{
u32 *tp = addr;
diff --git a/arch/arm64/kernel/pci.c b/arch/arm64/kernel/pci.c
index f872c57e9909..fd9a7bed83ce 100644
--- a/arch/arm64/kernel/pci.c
+++ b/arch/arm64/kernel/pci.c
@@ -6,28 +6,7 @@
* Copyright (C) 2014 ARM Ltd.
*/
-#include <linux/acpi.h>
-#include <linux/init.h>
-#include <linux/io.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
#include <linux/pci.h>
-#include <linux/pci-acpi.h>
-#include <linux/pci-ecam.h>
-#include <linux/slab.h>
-
-#ifdef CONFIG_ACPI
-/*
- * Try to assign the IRQ number when probing a new device
- */
-int pcibios_alloc_irq(struct pci_dev *dev)
-{
- if (!acpi_disabled)
- acpi_pci_irq_enable(dev);
-
- return 0;
-}
-#endif
/*
* raw_pci_read/write - Platform-specific PCI config space access.
@@ -61,173 +40,3 @@ int pcibus_to_node(struct pci_bus *bus)
EXPORT_SYMBOL(pcibus_to_node);
#endif
-
-#ifdef CONFIG_ACPI
-
-struct acpi_pci_generic_root_info {
- struct acpi_pci_root_info common;
- struct pci_config_window *cfg; /* config space mapping */
-};
-
-int acpi_pci_bus_find_domain_nr(struct pci_bus *bus)
-{
- struct pci_config_window *cfg = bus->sysdata;
- struct acpi_device *adev = to_acpi_device(cfg->parent);
- struct acpi_pci_root *root = acpi_driver_data(adev);
-
- return root->segment;
-}
-
-int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge)
-{
- struct pci_config_window *cfg;
- struct acpi_device *adev;
- struct device *bus_dev;
-
- if (acpi_disabled)
- return 0;
-
- cfg = bridge->bus->sysdata;
-
- /*
- * On Hyper-V there is no corresponding ACPI device for a root bridge,
- * therefore ->parent is set as NULL by the driver. And set 'adev' as
- * NULL in this case because there is no proper ACPI device.
- */
- if (!cfg->parent)
- adev = NULL;
- else
- adev = to_acpi_device(cfg->parent);
-
- bus_dev = &bridge->bus->dev;
-
- ACPI_COMPANION_SET(&bridge->dev, adev);
- set_dev_node(bus_dev, acpi_get_node(acpi_device_handle(adev)));
-
- return 0;
-}
-
-static int pci_acpi_root_prepare_resources(struct acpi_pci_root_info *ci)
-{
- struct resource_entry *entry, *tmp;
- int status;
-
- status = acpi_pci_probe_root_resources(ci);
- resource_list_for_each_entry_safe(entry, tmp, &ci->resources) {
- if (!(entry->res->flags & IORESOURCE_WINDOW))
- resource_list_destroy_entry(entry);
- }
- return status;
-}
-
-/*
- * Lookup the bus range for the domain in MCFG, and set up config space
- * mapping.
- */
-static struct pci_config_window *
-pci_acpi_setup_ecam_mapping(struct acpi_pci_root *root)
-{
- struct device *dev = &root->device->dev;
- struct resource *bus_res = &root->secondary;
- u16 seg = root->segment;
- const struct pci_ecam_ops *ecam_ops;
- struct resource cfgres;
- struct acpi_device *adev;
- struct pci_config_window *cfg;
- int ret;
-
- ret = pci_mcfg_lookup(root, &cfgres, &ecam_ops);
- if (ret) {
- dev_err(dev, "%04x:%pR ECAM region not found\n", seg, bus_res);
- return NULL;
- }
-
- adev = acpi_resource_consumer(&cfgres);
- if (adev)
- dev_info(dev, "ECAM area %pR reserved by %s\n", &cfgres,
- dev_name(&adev->dev));
- else
- dev_warn(dev, FW_BUG "ECAM area %pR not reserved in ACPI namespace\n",
- &cfgres);
-
- cfg = pci_ecam_create(dev, &cfgres, bus_res, ecam_ops);
- if (IS_ERR(cfg)) {
- dev_err(dev, "%04x:%pR error %ld mapping ECAM\n", seg, bus_res,
- PTR_ERR(cfg));
- return NULL;
- }
-
- return cfg;
-}
-
-/* release_info: free resources allocated by init_info */
-static void pci_acpi_generic_release_info(struct acpi_pci_root_info *ci)
-{
- struct acpi_pci_generic_root_info *ri;
-
- ri = container_of(ci, struct acpi_pci_generic_root_info, common);
- pci_ecam_free(ri->cfg);
- kfree(ci->ops);
- kfree(ri);
-}
-
-/* Interface called from ACPI code to setup PCI host controller */
-struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root)
-{
- struct acpi_pci_generic_root_info *ri;
- struct pci_bus *bus, *child;
- struct acpi_pci_root_ops *root_ops;
- struct pci_host_bridge *host;
-
- ri = kzalloc(sizeof(*ri), GFP_KERNEL);
- if (!ri)
- return NULL;
-
- root_ops = kzalloc(sizeof(*root_ops), GFP_KERNEL);
- if (!root_ops) {
- kfree(ri);
- return NULL;
- }
-
- ri->cfg = pci_acpi_setup_ecam_mapping(root);
- if (!ri->cfg) {
- kfree(ri);
- kfree(root_ops);
- return NULL;
- }
-
- root_ops->release_info = pci_acpi_generic_release_info;
- root_ops->prepare_resources = pci_acpi_root_prepare_resources;
- root_ops->pci_ops = (struct pci_ops *)&ri->cfg->ops->pci_ops;
- bus = acpi_pci_root_create(root, root_ops, &ri->common, ri->cfg);
- if (!bus)
- return NULL;
-
- /* If we must preserve the resource configuration, claim now */
- host = pci_find_host_bridge(bus);
- if (host->preserve_config)
- pci_bus_claim_resources(bus);
-
- /*
- * Assign whatever was left unassigned. If we didn't claim above,
- * this will reassign everything.
- */
- pci_assign_unassigned_root_bus_resources(bus);
-
- list_for_each_entry(child, &bus->children, node)
- pcie_bus_configure_settings(child);
-
- return bus;
-}
-
-void pcibios_add_bus(struct pci_bus *bus)
-{
- acpi_pci_add_bus(bus);
-}
-
-void pcibios_remove_bus(struct pci_bus *bus)
-{
- acpi_pci_remove_bus(bus);
-}
-
-#endif
diff --git a/arch/arm64/kernel/perf_callchain.c b/arch/arm64/kernel/perf_callchain.c
index 6d157f32187b..9b7f26b128b5 100644
--- a/arch/arm64/kernel/perf_callchain.c
+++ b/arch/arm64/kernel/perf_callchain.c
@@ -10,94 +10,12 @@
#include <asm/pointer_auth.h>
-struct frame_tail {
- struct frame_tail __user *fp;
- unsigned long lr;
-} __attribute__((packed));
-
-/*
- * Get the return address for a single stackframe and return a pointer to the
- * next frame tail.
- */
-static struct frame_tail __user *
-user_backtrace(struct frame_tail __user *tail,
- struct perf_callchain_entry_ctx *entry)
-{
- struct frame_tail buftail;
- unsigned long err;
- unsigned long lr;
-
- /* Also check accessibility of one struct frame_tail beyond */
- if (!access_ok(tail, sizeof(buftail)))
- return NULL;
-
- pagefault_disable();
- err = __copy_from_user_inatomic(&buftail, tail, sizeof(buftail));
- pagefault_enable();
-
- if (err)
- return NULL;
-
- lr = ptrauth_strip_user_insn_pac(buftail.lr);
-
- perf_callchain_store(entry, lr);
-
- /*
- * Frame pointers should strictly progress back up the stack
- * (towards higher addresses).
- */
- if (tail >= buftail.fp)
- return NULL;
-
- return buftail.fp;
-}
-
-#ifdef CONFIG_COMPAT
-/*
- * The registers we're interested in are at the end of the variable
- * length saved register structure. The fp points at the end of this
- * structure so the address of this struct is:
- * (struct compat_frame_tail *)(xxx->fp)-1
- *
- * This code has been adapted from the ARM OProfile support.
- */
-struct compat_frame_tail {
- compat_uptr_t fp; /* a (struct compat_frame_tail *) in compat mode */
- u32 sp;
- u32 lr;
-} __attribute__((packed));
-
-static struct compat_frame_tail __user *
-compat_user_backtrace(struct compat_frame_tail __user *tail,
- struct perf_callchain_entry_ctx *entry)
+static bool callchain_trace(void *data, unsigned long pc)
{
- struct compat_frame_tail buftail;
- unsigned long err;
-
- /* Also check accessibility of one struct frame_tail beyond */
- if (!access_ok(tail, sizeof(buftail)))
- return NULL;
-
- pagefault_disable();
- err = __copy_from_user_inatomic(&buftail, tail, sizeof(buftail));
- pagefault_enable();
-
- if (err)
- return NULL;
-
- perf_callchain_store(entry, buftail.lr);
-
- /*
- * Frame pointers should strictly progress back up the stack
- * (towards higher addresses).
- */
- if (tail + 1 >= (struct compat_frame_tail __user *)
- compat_ptr(buftail.fp))
- return NULL;
+ struct perf_callchain_entry_ctx *entry = data;
- return (struct compat_frame_tail __user *)compat_ptr(buftail.fp) - 1;
+ return perf_callchain_store(entry, pc) == 0;
}
-#endif /* CONFIG_COMPAT */
void perf_callchain_user(struct perf_callchain_entry_ctx *entry,
struct pt_regs *regs)
@@ -107,35 +25,7 @@ void perf_callchain_user(struct perf_callchain_entry_ctx *entry,
return;
}
- perf_callchain_store(entry, regs->pc);
-
- if (!compat_user_mode(regs)) {
- /* AARCH64 mode */
- struct frame_tail __user *tail;
-
- tail = (struct frame_tail __user *)regs->regs[29];
-
- while (entry->nr < entry->max_stack &&
- tail && !((unsigned long)tail & 0x7))
- tail = user_backtrace(tail, entry);
- } else {
-#ifdef CONFIG_COMPAT
- /* AARCH32 compat mode */
- struct compat_frame_tail __user *tail;
-
- tail = (struct compat_frame_tail __user *)regs->compat_fp - 1;
-
- while ((entry->nr < entry->max_stack) &&
- tail && !((unsigned long)tail & 0x3))
- tail = compat_user_backtrace(tail, entry);
-#endif
- }
-}
-
-static bool callchain_trace(void *data, unsigned long pc)
-{
- struct perf_callchain_entry_ctx *entry = data;
- return perf_callchain_store(entry, pc) == 0;
+ arch_stack_walk_user(callchain_trace, entry, regs);
}
void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry,
@@ -148,31 +38,3 @@ void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry,
arch_stack_walk(callchain_trace, entry, current, regs);
}
-
-unsigned long perf_instruction_pointer(struct pt_regs *regs)
-{
- if (perf_guest_state())
- return perf_guest_get_ip();
-
- return instruction_pointer(regs);
-}
-
-unsigned long perf_misc_flags(struct pt_regs *regs)
-{
- unsigned int guest_state = perf_guest_state();
- int misc = 0;
-
- if (guest_state) {
- if (guest_state & PERF_GUEST_USER)
- misc |= PERF_RECORD_MISC_GUEST_USER;
- else
- misc |= PERF_RECORD_MISC_GUEST_KERNEL;
- } else {
- if (user_mode(regs))
- misc |= PERF_RECORD_MISC_USER;
- else
- misc |= PERF_RECORD_MISC_KERNEL;
- }
-
- return misc;
-}
diff --git a/arch/arm64/kernel/pi/.gitignore b/arch/arm64/kernel/pi/.gitignore
new file mode 100644
index 000000000000..efb29b663e85
--- /dev/null
+++ b/arch/arm64/kernel/pi/.gitignore
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+relacheck
diff --git a/arch/arm64/kernel/pi/Makefile b/arch/arm64/kernel/pi/Makefile
index c844a0546d7f..4d11a8c29181 100644
--- a/arch/arm64/kernel/pi/Makefile
+++ b/arch/arm64/kernel/pi/Makefile
@@ -11,25 +11,34 @@ KBUILD_CFLAGS := $(subst $(CC_FLAGS_FTRACE),,$(KBUILD_CFLAGS)) -fpie \
-fno-asynchronous-unwind-tables -fno-unwind-tables \
$(call cc-option,-fno-addrsig)
+# this code may run with the MMU off so disable unaligned accesses
+CFLAGS_map_range.o += -mstrict-align
+
# remove SCS flags from all objects in this directory
KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_SCS), $(KBUILD_CFLAGS))
# disable LTO
KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_LTO), $(KBUILD_CFLAGS))
-GCOV_PROFILE := n
-KASAN_SANITIZE := n
-KCSAN_SANITIZE := n
-UBSAN_SANITIZE := n
-KCOV_INSTRUMENT := n
+hostprogs := relacheck
+
+quiet_cmd_piobjcopy = $(quiet_cmd_objcopy)
+ cmd_piobjcopy = $(cmd_objcopy) && $(obj)/relacheck $(@) $(<)
$(obj)/%.pi.o: OBJCOPYFLAGS := --prefix-symbols=__pi_ \
- --remove-section=.note.gnu.property \
- --prefix-alloc-sections=.init
-$(obj)/%.pi.o: $(obj)/%.o FORCE
- $(call if_changed,objcopy)
+ --remove-section=.note.gnu.property
+$(obj)/%.pi.o: $(obj)/%.o $(obj)/relacheck FORCE
+ $(call if_changed,piobjcopy)
+
+# ensure that all the lib- code ends up as __init code and data
+$(obj)/lib-%.pi.o: OBJCOPYFLAGS += --prefix-alloc-sections=.init
$(obj)/lib-%.o: $(srctree)/lib/%.c FORCE
$(call if_changed_rule,cc_o_c)
-obj-y := kaslr_early.pi.o lib-fdt.pi.o lib-fdt_ro.pi.o
-extra-y := $(patsubst %.pi.o,%.o,$(obj-y))
+obj-y := idreg-override.pi.o \
+ map_kernel.pi.o map_range.pi.o \
+ lib-fdt.pi.o lib-fdt_ro.pi.o
+obj-$(CONFIG_RELOCATABLE) += relocate.pi.o
+obj-$(CONFIG_RANDOMIZE_BASE) += kaslr_early.pi.o
+obj-$(CONFIG_UNWIND_PATCH_PAC_INTO_SCS) += patch-scs.pi.o
+extra-y := $(patsubst %.pi.o,%.o,$(obj-y))
diff --git a/arch/arm64/kernel/idreg-override.c b/arch/arm64/kernel/pi/idreg-override.c
index e30fd9e32ef3..c6b185b885f7 100644
--- a/arch/arm64/kernel/idreg-override.c
+++ b/arch/arm64/kernel/pi/idreg-override.c
@@ -14,6 +14,8 @@
#include <asm/cpufeature.h>
#include <asm/setup.h>
+#include "pi.h"
+
#define FTR_DESC_NAME_LEN 20
#define FTR_DESC_FIELD_LEN 10
#define FTR_ALIAS_NAME_LEN 30
@@ -21,15 +23,6 @@
static u64 __boot_status __initdata;
-// temporary __prel64 related definitions
-// to be removed when this code is moved under pi/
-
-#define __prel64_initconst __initconst
-
-#define PREL64(type, name) union { type *name; }
-
-#define prel64_pointer(__d) (__d)
-
typedef bool filter_t(u64 val);
struct ftr_set_desc {
@@ -45,6 +38,15 @@ struct ftr_set_desc {
#define FIELD(n, s, f) { .name = n, .shift = s, .width = 4, .filter = f }
+static const struct ftr_set_desc mmfr0 __prel64_initconst = {
+ .name = "id_aa64mmfr0",
+ .override = &id_aa64mmfr0_override,
+ .fields = {
+ FIELD("ecv", ID_AA64MMFR0_EL1_ECV_SHIFT, NULL),
+ {}
+ },
+};
+
static bool __init mmfr1_vh_filter(u64 val)
{
/*
@@ -66,6 +68,44 @@ static const struct ftr_set_desc mmfr1 __prel64_initconst = {
},
};
+
+static bool __init mmfr2_varange_filter(u64 val)
+{
+ int __maybe_unused feat;
+
+ if (val)
+ return false;
+
+#ifdef CONFIG_ARM64_LPA2
+ feat = cpuid_feature_extract_signed_field(read_sysreg(id_aa64mmfr0_el1),
+ ID_AA64MMFR0_EL1_TGRAN_SHIFT);
+ if (feat >= ID_AA64MMFR0_EL1_TGRAN_LPA2) {
+ id_aa64mmfr0_override.val |=
+ (ID_AA64MMFR0_EL1_TGRAN_LPA2 - 1) << ID_AA64MMFR0_EL1_TGRAN_SHIFT;
+ id_aa64mmfr0_override.mask |= 0xfU << ID_AA64MMFR0_EL1_TGRAN_SHIFT;
+
+ /*
+ * Override PARange to 48 bits - the override will just be
+ * ignored if the actual PARange is smaller, but this is
+ * unlikely to be the case for LPA2 capable silicon.
+ */
+ id_aa64mmfr0_override.val |=
+ ID_AA64MMFR0_EL1_PARANGE_48 << ID_AA64MMFR0_EL1_PARANGE_SHIFT;
+ id_aa64mmfr0_override.mask |= 0xfU << ID_AA64MMFR0_EL1_PARANGE_SHIFT;
+ }
+#endif
+ return true;
+}
+
+static const struct ftr_set_desc mmfr2 __prel64_initconst = {
+ .name = "id_aa64mmfr2",
+ .override = &id_aa64mmfr2_override,
+ .fields = {
+ FIELD("varange", ID_AA64MMFR2_EL1_VARange_SHIFT, mmfr2_varange_filter),
+ {}
+ },
+};
+
static bool __init pfr0_sve_filter(u64 val)
{
/*
@@ -86,6 +126,7 @@ static const struct ftr_set_desc pfr0 __prel64_initconst = {
.override = &id_aa64pfr0_override,
.fields = {
FIELD("sve", ID_AA64PFR0_EL1_SVE_SHIFT, pfr0_sve_filter),
+ FIELD("el0", ID_AA64PFR0_EL1_EL0_SHIFT, NULL),
{}
},
};
@@ -110,6 +151,7 @@ static const struct ftr_set_desc pfr1 __prel64_initconst = {
.override = &id_aa64pfr1_override,
.fields = {
FIELD("bt", ID_AA64PFR1_EL1_BT_SHIFT, NULL ),
+ FIELD("gcs", ID_AA64PFR1_EL1_GCS_SHIFT, NULL),
FIELD("mte", ID_AA64PFR1_EL1_MTE_SHIFT, NULL),
FIELD("sme", ID_AA64PFR1_EL1_SME_SHIFT, pfr1_sme_filter),
{}
@@ -166,13 +208,16 @@ static const struct ftr_set_desc sw_features __prel64_initconst = {
.fields = {
FIELD("nokaslr", ARM64_SW_FEATURE_OVERRIDE_NOKASLR, NULL),
FIELD("hvhe", ARM64_SW_FEATURE_OVERRIDE_HVHE, hvhe_filter),
+ FIELD("rodataoff", ARM64_SW_FEATURE_OVERRIDE_RODATA_OFF, NULL),
{}
},
};
static const
PREL64(const struct ftr_set_desc, reg) regs[] __prel64_initconst = {
+ { &mmfr0 },
{ &mmfr1 },
+ { &mmfr2 },
{ &pfr0 },
{ &pfr1 },
{ &isar1 },
@@ -185,11 +230,12 @@ static const struct {
char alias[FTR_ALIAS_NAME_LEN];
char feature[FTR_ALIAS_OPTION_LEN];
} aliases[] __initconst = {
- { "kvm_arm.mode=nvhe", "id_aa64mmfr1.vh=0" },
- { "kvm_arm.mode=protected", "id_aa64mmfr1.vh=0" },
+ { "kvm_arm.mode=nvhe", "arm64_sw.hvhe=0 id_aa64mmfr1.vh=0" },
+ { "kvm_arm.mode=protected", "arm64_sw.hvhe=1" },
{ "arm64.nosve", "id_aa64pfr0.sve=0" },
{ "arm64.nosme", "id_aa64pfr1.sme=0" },
{ "arm64.nobti", "id_aa64pfr1.bt=0" },
+ { "arm64.nogcs", "id_aa64pfr1.gcs=0" },
{ "arm64.nopauth",
"id_aa64isar1.gpi=0 id_aa64isar1.gpa=0 "
"id_aa64isar1.api=0 id_aa64isar1.apa=0 "
@@ -197,6 +243,9 @@ static const struct {
{ "arm64.nomops", "id_aa64isar2.mops=0" },
{ "arm64.nomte", "id_aa64pfr1.mte=0" },
{ "nokaslr", "arm64_sw.nokaslr=1" },
+ { "rodata=off", "arm64_sw.rodataoff=1" },
+ { "arm64.nolva", "id_aa64mmfr2.varange=0" },
+ { "arm64.no32bit_el0", "id_aa64pfr0.el0=1" },
};
static int __init parse_hexdigit(const char *p, u64 *v)
@@ -313,42 +362,35 @@ static __init void __parse_cmdline(const char *cmdline, bool parse_aliases)
} while (1);
}
-static __init const u8 *get_bootargs_cmdline(void)
+static __init const u8 *get_bootargs_cmdline(const void *fdt, int node)
{
+ static char const bootargs[] __initconst = "bootargs";
const u8 *prop;
- void *fdt;
- int node;
- fdt = get_early_fdt_ptr();
- if (!fdt)
- return NULL;
-
- node = fdt_path_offset(fdt, "/chosen");
if (node < 0)
return NULL;
- prop = fdt_getprop(fdt, node, "bootargs", NULL);
+ prop = fdt_getprop(fdt, node, bootargs, NULL);
if (!prop)
return NULL;
return strlen(prop) ? prop : NULL;
}
-static __init void parse_cmdline(void)
+static __init void parse_cmdline(const void *fdt, int chosen)
{
- const u8 *prop = get_bootargs_cmdline();
+ static char const cmdline[] __initconst = CONFIG_CMDLINE;
+ const u8 *prop = get_bootargs_cmdline(fdt, chosen);
if (IS_ENABLED(CONFIG_CMDLINE_FORCE) || !prop)
- __parse_cmdline(CONFIG_CMDLINE, true);
+ __parse_cmdline(cmdline, true);
if (!IS_ENABLED(CONFIG_CMDLINE_FORCE) && prop)
__parse_cmdline(prop, true);
}
-/* Keep checkers quiet */
-void init_feature_override(u64 boot_status);
-
-asmlinkage void __init init_feature_override(u64 boot_status)
+void __init init_feature_override(u64 boot_status, const void *fdt,
+ int chosen)
{
struct arm64_ftr_override *override;
const struct ftr_set_desc *reg;
@@ -364,7 +406,7 @@ asmlinkage void __init init_feature_override(u64 boot_status)
__boot_status = boot_status;
- parse_cmdline();
+ parse_cmdline(fdt, chosen);
for (i = 0; i < ARRAY_SIZE(regs); i++) {
reg = prel64_pointer(regs[i].reg);
@@ -373,3 +415,10 @@ asmlinkage void __init init_feature_override(u64 boot_status)
(unsigned long)(override + 1));
}
}
+
+char * __init skip_spaces(const char *str)
+{
+ while (isspace(*str))
+ ++str;
+ return (char *)str;
+}
diff --git a/arch/arm64/kernel/pi/kaslr_early.c b/arch/arm64/kernel/pi/kaslr_early.c
index 17bff6e399e4..0257b43819db 100644
--- a/arch/arm64/kernel/pi/kaslr_early.c
+++ b/arch/arm64/kernel/pi/kaslr_early.c
@@ -14,69 +14,23 @@
#include <asm/archrandom.h>
#include <asm/memory.h>
+#include <asm/pgtable.h>
-/* taken from lib/string.c */
-static char *__strstr(const char *s1, const char *s2)
-{
- size_t l1, l2;
-
- l2 = strlen(s2);
- if (!l2)
- return (char *)s1;
- l1 = strlen(s1);
- while (l1 >= l2) {
- l1--;
- if (!memcmp(s1, s2, l2))
- return (char *)s1;
- s1++;
- }
- return NULL;
-}
-static bool cmdline_contains_nokaslr(const u8 *cmdline)
-{
- const u8 *str;
-
- str = __strstr(cmdline, "nokaslr");
- return str == cmdline || (str > cmdline && *(str - 1) == ' ');
-}
-
-static bool is_kaslr_disabled_cmdline(void *fdt)
-{
- if (!IS_ENABLED(CONFIG_CMDLINE_FORCE)) {
- int node;
- const u8 *prop;
-
- node = fdt_path_offset(fdt, "/chosen");
- if (node < 0)
- goto out;
-
- prop = fdt_getprop(fdt, node, "bootargs", NULL);
- if (!prop)
- goto out;
-
- if (cmdline_contains_nokaslr(prop))
- return true;
+#include "pi.h"
- if (IS_ENABLED(CONFIG_CMDLINE_EXTEND))
- goto out;
+extern u16 memstart_offset_seed;
- return false;
- }
-out:
- return cmdline_contains_nokaslr(CONFIG_CMDLINE);
-}
-
-static u64 get_kaslr_seed(void *fdt)
+static u64 __init get_kaslr_seed(void *fdt, int node)
{
- int node, len;
+ static char const seed_str[] __initconst = "kaslr-seed";
fdt64_t *prop;
u64 ret;
+ int len;
- node = fdt_path_offset(fdt, "/chosen");
if (node < 0)
return 0;
- prop = fdt_getprop_w(fdt, node, "kaslr-seed", &len);
+ prop = fdt_getprop_w(fdt, node, seed_str, &len);
if (!prop || len != sizeof(u64))
return 0;
@@ -85,26 +39,28 @@ static u64 get_kaslr_seed(void *fdt)
return ret;
}
-asmlinkage u64 kaslr_early_init(void *fdt)
+u64 __init kaslr_early_init(void *fdt, int chosen)
{
- u64 seed;
+ u64 seed, range;
- if (is_kaslr_disabled_cmdline(fdt))
+ if (kaslr_disabled_cmdline())
return 0;
- seed = get_kaslr_seed(fdt);
+ seed = get_kaslr_seed(fdt, chosen);
if (!seed) {
if (!__early_cpu_has_rndr() ||
!__arm64_rndr((unsigned long *)&seed))
return 0;
}
+ memstart_offset_seed = seed & U16_MAX;
+
/*
* OK, so we are proceeding with KASLR enabled. Calculate a suitable
* kernel image offset from the seed. Let's place the kernel in the
- * middle half of the VMALLOC area (VA_BITS_MIN - 2), and stay clear of
- * the lower and upper quarters to avoid colliding with other
- * allocations.
+ * 'middle' half of the VMALLOC area, and stay clear of the lower and
+ * upper quarters to avoid colliding with other allocations.
*/
- return BIT(VA_BITS_MIN - 3) + (seed & GENMASK(VA_BITS_MIN - 3, 0));
+ range = (VMALLOC_END - KIMAGE_VADDR) / 2;
+ return range / 2 + (((__uint128_t)range * seed) >> 64);
}
diff --git a/arch/arm64/kernel/pi/map_kernel.c b/arch/arm64/kernel/pi/map_kernel.c
new file mode 100644
index 000000000000..e57b043f324b
--- /dev/null
+++ b/arch/arm64/kernel/pi/map_kernel.c
@@ -0,0 +1,259 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright 2023 Google LLC
+// Author: Ard Biesheuvel <ardb@google.com>
+
+#include <linux/init.h>
+#include <linux/libfdt.h>
+#include <linux/linkage.h>
+#include <linux/types.h>
+#include <linux/sizes.h>
+#include <linux/string.h>
+
+#include <asm/memory.h>
+#include <asm/pgalloc.h>
+#include <asm/pgtable.h>
+#include <asm/tlbflush.h>
+
+#include "pi.h"
+
+extern const u8 __eh_frame_start[], __eh_frame_end[];
+
+extern void idmap_cpu_replace_ttbr1(void *pgdir);
+
+static void __init map_segment(pgd_t *pg_dir, u64 *pgd, u64 va_offset,
+ void *start, void *end, pgprot_t prot,
+ bool may_use_cont, int root_level)
+{
+ map_range(pgd, ((u64)start + va_offset) & ~PAGE_OFFSET,
+ ((u64)end + va_offset) & ~PAGE_OFFSET, (u64)start,
+ prot, root_level, (pte_t *)pg_dir, may_use_cont, 0);
+}
+
+static void __init unmap_segment(pgd_t *pg_dir, u64 va_offset, void *start,
+ void *end, int root_level)
+{
+ map_segment(pg_dir, NULL, va_offset, start, end, __pgprot(0),
+ false, root_level);
+}
+
+static void __init map_kernel(u64 kaslr_offset, u64 va_offset, int root_level)
+{
+ bool enable_scs = IS_ENABLED(CONFIG_UNWIND_PATCH_PAC_INTO_SCS);
+ bool twopass = IS_ENABLED(CONFIG_RELOCATABLE);
+ u64 pgdp = (u64)init_pg_dir + PAGE_SIZE;
+ pgprot_t text_prot = PAGE_KERNEL_ROX;
+ pgprot_t data_prot = PAGE_KERNEL;
+ pgprot_t prot;
+
+ /*
+ * External debuggers may need to write directly to the text mapping to
+ * install SW breakpoints. Allow this (only) when explicitly requested
+ * with rodata=off.
+ */
+ if (arm64_test_sw_feature_override(ARM64_SW_FEATURE_OVERRIDE_RODATA_OFF))
+ text_prot = PAGE_KERNEL_EXEC;
+
+ /*
+ * We only enable the shadow call stack dynamically if we are running
+ * on a system that does not implement PAC or BTI. PAC and SCS provide
+ * roughly the same level of protection, and BTI relies on the PACIASP
+ * instructions serving as landing pads, preventing us from patching
+ * those instructions into something else.
+ */
+ if (IS_ENABLED(CONFIG_ARM64_PTR_AUTH_KERNEL) && cpu_has_pac())
+ enable_scs = false;
+
+ if (IS_ENABLED(CONFIG_ARM64_BTI_KERNEL) && cpu_has_bti()) {
+ enable_scs = false;
+
+ /*
+ * If we have a CPU that supports BTI and a kernel built for
+ * BTI then mark the kernel executable text as guarded pages
+ * now so we don't have to rewrite the page tables later.
+ */
+ text_prot = __pgprot_modify(text_prot, PTE_GP, PTE_GP);
+ }
+
+ /* Map all code read-write on the first pass if needed */
+ twopass |= enable_scs;
+ prot = twopass ? data_prot : text_prot;
+
+ map_segment(init_pg_dir, &pgdp, va_offset, _stext, _etext, prot,
+ !twopass, root_level);
+ map_segment(init_pg_dir, &pgdp, va_offset, __start_rodata,
+ __inittext_begin, data_prot, false, root_level);
+ map_segment(init_pg_dir, &pgdp, va_offset, __inittext_begin,
+ __inittext_end, prot, false, root_level);
+ map_segment(init_pg_dir, &pgdp, va_offset, __initdata_begin,
+ __initdata_end, data_prot, false, root_level);
+ map_segment(init_pg_dir, &pgdp, va_offset, _data, _end, data_prot,
+ true, root_level);
+ dsb(ishst);
+
+ idmap_cpu_replace_ttbr1(init_pg_dir);
+
+ if (twopass) {
+ if (IS_ENABLED(CONFIG_RELOCATABLE))
+ relocate_kernel(kaslr_offset);
+
+ if (enable_scs) {
+ scs_patch(__eh_frame_start + va_offset,
+ __eh_frame_end - __eh_frame_start);
+ asm("ic ialluis");
+
+ dynamic_scs_is_enabled = true;
+ }
+
+ /*
+ * Unmap the text region before remapping it, to avoid
+ * potential TLB conflicts when creating the contiguous
+ * descriptors.
+ */
+ unmap_segment(init_pg_dir, va_offset, _stext, _etext,
+ root_level);
+ dsb(ishst);
+ isb();
+ __tlbi(vmalle1);
+ isb();
+
+ /*
+ * Remap these segments with different permissions
+ * No new page table allocations should be needed
+ */
+ map_segment(init_pg_dir, NULL, va_offset, _stext, _etext,
+ text_prot, true, root_level);
+ map_segment(init_pg_dir, NULL, va_offset, __inittext_begin,
+ __inittext_end, text_prot, false, root_level);
+ }
+
+ /* Copy the root page table to its final location */
+ memcpy((void *)swapper_pg_dir + va_offset, init_pg_dir, PAGE_SIZE);
+ dsb(ishst);
+ idmap_cpu_replace_ttbr1(swapper_pg_dir);
+}
+
+static void noinline __section(".idmap.text") set_ttbr0_for_lpa2(u64 ttbr)
+{
+ u64 sctlr = read_sysreg(sctlr_el1);
+ u64 tcr = read_sysreg(tcr_el1) | TCR_DS;
+ u64 mmfr0 = read_sysreg(id_aa64mmfr0_el1);
+ u64 parange = cpuid_feature_extract_unsigned_field(mmfr0,
+ ID_AA64MMFR0_EL1_PARANGE_SHIFT);
+
+ tcr &= ~TCR_IPS_MASK;
+ tcr |= parange << TCR_IPS_SHIFT;
+
+ asm(" msr sctlr_el1, %0 ;"
+ " isb ;"
+ " msr ttbr0_el1, %1 ;"
+ " msr tcr_el1, %2 ;"
+ " isb ;"
+ " tlbi vmalle1 ;"
+ " dsb nsh ;"
+ " isb ;"
+ " msr sctlr_el1, %3 ;"
+ " isb ;"
+ :: "r"(sctlr & ~SCTLR_ELx_M), "r"(ttbr), "r"(tcr), "r"(sctlr));
+}
+
+static void __init remap_idmap_for_lpa2(void)
+{
+ /* clear the bits that change meaning once LPA2 is turned on */
+ pteval_t mask = PTE_SHARED;
+
+ /*
+ * We have to clear bits [9:8] in all block or page descriptors in the
+ * initial ID map, as otherwise they will be (mis)interpreted as
+ * physical address bits once we flick the LPA2 switch (TCR.DS). Since
+ * we cannot manipulate live descriptors in that way without creating
+ * potential TLB conflicts, let's create another temporary ID map in a
+ * LPA2 compatible fashion, and update the initial ID map while running
+ * from that.
+ */
+ create_init_idmap(init_pg_dir, mask);
+ dsb(ishst);
+ set_ttbr0_for_lpa2((u64)init_pg_dir);
+
+ /*
+ * Recreate the initial ID map with the same granularity as before.
+ * Don't bother with the FDT, we no longer need it after this.
+ */
+ memset(init_idmap_pg_dir, 0,
+ (u64)init_idmap_pg_end - (u64)init_idmap_pg_dir);
+
+ create_init_idmap(init_idmap_pg_dir, mask);
+ dsb(ishst);
+
+ /* switch back to the updated initial ID map */
+ set_ttbr0_for_lpa2((u64)init_idmap_pg_dir);
+
+ /* wipe the temporary ID map from memory */
+ memset(init_pg_dir, 0, (u64)init_pg_end - (u64)init_pg_dir);
+}
+
+static void __init map_fdt(u64 fdt)
+{
+ static u8 ptes[INIT_IDMAP_FDT_SIZE] __initdata __aligned(PAGE_SIZE);
+ u64 efdt = fdt + MAX_FDT_SIZE;
+ u64 ptep = (u64)ptes;
+
+ /*
+ * Map up to MAX_FDT_SIZE bytes, but avoid overlap with
+ * the kernel image.
+ */
+ map_range(&ptep, fdt, (u64)_text > fdt ? min((u64)_text, efdt) : efdt,
+ fdt, PAGE_KERNEL, IDMAP_ROOT_LEVEL,
+ (pte_t *)init_idmap_pg_dir, false, 0);
+ dsb(ishst);
+}
+
+asmlinkage void __init early_map_kernel(u64 boot_status, void *fdt)
+{
+ static char const chosen_str[] __initconst = "/chosen";
+ u64 va_base, pa_base = (u64)&_text;
+ u64 kaslr_offset = pa_base % MIN_KIMG_ALIGN;
+ int root_level = 4 - CONFIG_PGTABLE_LEVELS;
+ int va_bits = VA_BITS;
+ int chosen;
+
+ map_fdt((u64)fdt);
+
+ /* Clear BSS and the initial page tables */
+ memset(__bss_start, 0, (u64)init_pg_end - (u64)__bss_start);
+
+ /* Parse the command line for CPU feature overrides */
+ chosen = fdt_path_offset(fdt, chosen_str);
+ init_feature_override(boot_status, fdt, chosen);
+
+ if (IS_ENABLED(CONFIG_ARM64_64K_PAGES) && !cpu_has_lva()) {
+ va_bits = VA_BITS_MIN;
+ } else if (IS_ENABLED(CONFIG_ARM64_LPA2) && !cpu_has_lpa2()) {
+ va_bits = VA_BITS_MIN;
+ root_level++;
+ }
+
+ if (va_bits > VA_BITS_MIN)
+ sysreg_clear_set(tcr_el1, TCR_T1SZ_MASK, TCR_T1SZ(va_bits));
+
+ /*
+ * The virtual KASLR displacement modulo 2MiB is decided by the
+ * physical placement of the image, as otherwise, we might not be able
+ * to create the early kernel mapping using 2 MiB block descriptors. So
+ * take the low bits of the KASLR offset from the physical address, and
+ * fill in the high bits from the seed.
+ */
+ if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) {
+ u64 kaslr_seed = kaslr_early_init(fdt, chosen);
+
+ if (kaslr_seed && kaslr_requires_kpti())
+ arm64_use_ng_mappings = true;
+
+ kaslr_offset |= kaslr_seed & ~(MIN_KIMG_ALIGN - 1);
+ }
+
+ if (IS_ENABLED(CONFIG_ARM64_LPA2) && va_bits > VA_BITS_MIN)
+ remap_idmap_for_lpa2();
+
+ va_base = KIMAGE_VADDR + kaslr_offset;
+ map_kernel(kaslr_offset, va_base - pa_base, root_level);
+}
diff --git a/arch/arm64/kernel/pi/map_range.c b/arch/arm64/kernel/pi/map_range.c
new file mode 100644
index 000000000000..2b69e3beeef8
--- /dev/null
+++ b/arch/arm64/kernel/pi/map_range.c
@@ -0,0 +1,105 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright 2023 Google LLC
+// Author: Ard Biesheuvel <ardb@google.com>
+
+#include <linux/types.h>
+#include <linux/sizes.h>
+
+#include <asm/memory.h>
+#include <asm/pgalloc.h>
+#include <asm/pgtable.h>
+
+#include "pi.h"
+
+/**
+ * map_range - Map a contiguous range of physical pages into virtual memory
+ *
+ * @pte: Address of physical pointer to array of pages to
+ * allocate page tables from
+ * @start: Virtual address of the start of the range
+ * @end: Virtual address of the end of the range (exclusive)
+ * @pa: Physical address of the start of the range
+ * @prot: Access permissions of the range
+ * @level: Translation level for the mapping
+ * @tbl: The level @level page table to create the mappings in
+ * @may_use_cont: Whether the use of the contiguous attribute is allowed
+ * @va_offset: Offset between a physical page and its current mapping
+ * in the VA space
+ */
+void __init map_range(u64 *pte, u64 start, u64 end, u64 pa, pgprot_t prot,
+ int level, pte_t *tbl, bool may_use_cont, u64 va_offset)
+{
+ u64 cmask = (level == 3) ? CONT_PTE_SIZE - 1 : U64_MAX;
+ pteval_t protval = pgprot_val(prot) & ~PTE_TYPE_MASK;
+ int lshift = (3 - level) * (PAGE_SHIFT - 3);
+ u64 lmask = (PAGE_SIZE << lshift) - 1;
+
+ start &= PAGE_MASK;
+ pa &= PAGE_MASK;
+
+ /* Advance tbl to the entry that covers start */
+ tbl += (start >> (lshift + PAGE_SHIFT)) % PTRS_PER_PTE;
+
+ /*
+ * Set the right block/page bits for this level unless we are
+ * clearing the mapping
+ */
+ if (protval)
+ protval |= (level < 3) ? PMD_TYPE_SECT : PTE_TYPE_PAGE;
+
+ while (start < end) {
+ u64 next = min((start | lmask) + 1, PAGE_ALIGN(end));
+
+ if (level < 3 && (start | next | pa) & lmask) {
+ /*
+ * This chunk needs a finer grained mapping. Create a
+ * table mapping if necessary and recurse.
+ */
+ if (pte_none(*tbl)) {
+ *tbl = __pte(__phys_to_pte_val(*pte) |
+ PMD_TYPE_TABLE | PMD_TABLE_UXN);
+ *pte += PTRS_PER_PTE * sizeof(pte_t);
+ }
+ map_range(pte, start, next, pa, prot, level + 1,
+ (pte_t *)(__pte_to_phys(*tbl) + va_offset),
+ may_use_cont, va_offset);
+ } else {
+ /*
+ * Start a contiguous range if start and pa are
+ * suitably aligned
+ */
+ if (((start | pa) & cmask) == 0 && may_use_cont)
+ protval |= PTE_CONT;
+
+ /*
+ * Clear the contiguous attribute if the remaining
+ * range does not cover a contiguous block
+ */
+ if ((end & ~cmask) <= start)
+ protval &= ~PTE_CONT;
+
+ /* Put down a block or page mapping */
+ *tbl = __pte(__phys_to_pte_val(pa) | protval);
+ }
+ pa += next - start;
+ start = next;
+ tbl++;
+ }
+}
+
+asmlinkage u64 __init create_init_idmap(pgd_t *pg_dir, pteval_t clrmask)
+{
+ u64 ptep = (u64)pg_dir + PAGE_SIZE;
+ pgprot_t text_prot = PAGE_KERNEL_ROX;
+ pgprot_t data_prot = PAGE_KERNEL;
+
+ pgprot_val(text_prot) &= ~clrmask;
+ pgprot_val(data_prot) &= ~clrmask;
+
+ map_range(&ptep, (u64)_stext, (u64)__initdata_begin, (u64)_stext,
+ text_prot, IDMAP_ROOT_LEVEL, (pte_t *)pg_dir, false, 0);
+ map_range(&ptep, (u64)__initdata_begin, (u64)_end, (u64)__initdata_begin,
+ data_prot, IDMAP_ROOT_LEVEL, (pte_t *)pg_dir, false, 0);
+
+ return ptep;
+}
diff --git a/arch/arm64/kernel/patch-scs.c b/arch/arm64/kernel/pi/patch-scs.c
index a1fe4b4ff591..55d0cd64ef71 100644
--- a/arch/arm64/kernel/patch-scs.c
+++ b/arch/arm64/kernel/pi/patch-scs.c
@@ -4,16 +4,17 @@
* Author: Ard Biesheuvel <ardb@google.com>
*/
-#include <linux/bug.h>
#include <linux/errno.h>
#include <linux/init.h>
#include <linux/linkage.h>
-#include <linux/printk.h>
#include <linux/types.h>
-#include <asm/cacheflush.h>
#include <asm/scs.h>
+#include "pi.h"
+
+bool dynamic_scs_is_enabled;
+
//
// This minimal DWARF CFI parser is partially based on the code in
// arch/arc/kernel/unwind.c, and on the document below:
@@ -49,7 +50,9 @@
#define DW_CFA_GNU_negative_offset_extended 0x2f
#define DW_CFA_hi_user 0x3f
-extern const u8 __eh_frame_start[], __eh_frame_end[];
+#define DW_EH_PE_sdata4 0x0b
+#define DW_EH_PE_sdata8 0x0c
+#define DW_EH_PE_pcrel 0x10
enum {
PACIASP = 0xd503233f,
@@ -81,7 +84,11 @@ static void __always_inline scs_patch_loc(u64 loc)
*/
return;
}
- dcache_clean_pou(loc, loc + sizeof(u32));
+ if (IS_ENABLED(CONFIG_ARM64_WORKAROUND_CLEAN_CACHE))
+ asm("dc civac, %0" :: "r"(loc));
+ else
+ asm(ALTERNATIVE("dc cvau, %0", "nop", ARM64_HAS_CACHE_IDC)
+ :: "r"(loc));
}
/*
@@ -117,7 +124,12 @@ struct eh_frame {
union {
struct { // CIE
u8 version;
- u8 augmentation_string[];
+ u8 augmentation_string[3];
+ u8 code_alignment_factor;
+ u8 data_alignment_factor;
+ u8 return_address_register;
+ u8 augmentation_data_size;
+ u8 fde_pointer_format;
};
struct { // FDE
@@ -125,29 +137,38 @@ struct eh_frame {
s32 range;
u8 opcodes[];
};
+
+ struct { // FDE
+ s64 initial_loc64;
+ s64 range64;
+ u8 opcodes64[];
+ };
};
};
-static int noinstr scs_handle_fde_frame(const struct eh_frame *frame,
- bool fde_has_augmentation_data,
- int code_alignment_factor,
- bool dry_run)
+static int scs_handle_fde_frame(const struct eh_frame *frame,
+ int code_alignment_factor,
+ bool use_sdata8,
+ bool dry_run)
{
int size = frame->size - offsetof(struct eh_frame, opcodes) + 4;
u64 loc = (u64)offset_to_ptr(&frame->initial_loc);
const u8 *opcode = frame->opcodes;
+ int l;
- if (fde_has_augmentation_data) {
- int l;
+ if (use_sdata8) {
+ loc = (u64)&frame->initial_loc64 + frame->initial_loc64;
+ opcode = frame->opcodes64;
+ size -= 8;
+ }
- // assume single byte uleb128_t
- if (WARN_ON(*opcode & BIT(7)))
- return -ENOEXEC;
+ // assume single byte uleb128_t for augmentation data size
+ if (*opcode & BIT(7))
+ return EDYNSCS_INVALID_FDE_AUGM_DATA_SIZE;
- l = *opcode++;
- opcode += l;
- size -= l + 1;
- }
+ l = *opcode++;
+ opcode += l;
+ size -= l + 1;
/*
* Starting from 'loc', apply the CFA opcodes that advance the location
@@ -198,21 +219,20 @@ static int noinstr scs_handle_fde_frame(const struct eh_frame *frame,
break;
default:
- pr_err("unhandled opcode: %02x in FDE frame %lx\n", opcode[-1], (uintptr_t)frame);
- return -ENOEXEC;
+ return EDYNSCS_INVALID_CFA_OPCODE;
}
}
return 0;
}
-int noinstr scs_patch(const u8 eh_frame[], int size)
+int scs_patch(const u8 eh_frame[], int size)
{
+ int code_alignment_factor = 1;
+ bool fde_use_sdata8 = false;
const u8 *p = eh_frame;
while (size > 4) {
const struct eh_frame *frame = (const void *)p;
- bool fde_has_augmentation_data = true;
- int code_alignment_factor = 1;
int ret;
if (frame->size == 0 ||
@@ -221,28 +241,47 @@ int noinstr scs_patch(const u8 eh_frame[], int size)
break;
if (frame->cie_id_or_pointer == 0) {
- const u8 *p = frame->augmentation_string;
-
- /* a 'z' in the augmentation string must come first */
- fde_has_augmentation_data = *p == 'z';
+ /*
+ * Require presence of augmentation data (z) with a
+ * specifier for the size of the FDE initial_loc and
+ * range fields (R), and nothing else.
+ */
+ if (strcmp(frame->augmentation_string, "zR"))
+ return EDYNSCS_INVALID_CIE_HEADER;
/*
* The code alignment factor is a uleb128 encoded field
* but given that the only sensible values are 1 or 4,
- * there is no point in decoding the whole thing.
+ * there is no point in decoding the whole thing. Also
+ * sanity check the size of the data alignment factor
+ * field, and the values of the return address register
+ * and augmentation data size fields.
*/
- p += strlen(p) + 1;
- if (!WARN_ON(*p & BIT(7)))
- code_alignment_factor = *p;
+ if ((frame->code_alignment_factor & BIT(7)) ||
+ (frame->data_alignment_factor & BIT(7)) ||
+ frame->return_address_register != 30 ||
+ frame->augmentation_data_size != 1)
+ return EDYNSCS_INVALID_CIE_HEADER;
+
+ code_alignment_factor = frame->code_alignment_factor;
+
+ switch (frame->fde_pointer_format) {
+ case DW_EH_PE_pcrel | DW_EH_PE_sdata4:
+ fde_use_sdata8 = false;
+ break;
+ case DW_EH_PE_pcrel | DW_EH_PE_sdata8:
+ fde_use_sdata8 = true;
+ break;
+ default:
+ return EDYNSCS_INVALID_CIE_SDATA_SIZE;
+ }
} else {
- ret = scs_handle_fde_frame(frame,
- fde_has_augmentation_data,
- code_alignment_factor,
- true);
+ ret = scs_handle_fde_frame(frame, code_alignment_factor,
+ fde_use_sdata8, true);
if (ret)
return ret;
- scs_handle_fde_frame(frame, fde_has_augmentation_data,
- code_alignment_factor, false);
+ scs_handle_fde_frame(frame, code_alignment_factor,
+ fde_use_sdata8, false);
}
p += sizeof(frame->size) + frame->size;
@@ -250,13 +289,3 @@ int noinstr scs_patch(const u8 eh_frame[], int size)
}
return 0;
}
-
-asmlinkage void __init scs_patch_vmlinux(void)
-{
- if (!should_patch_pac_into_scs())
- return;
-
- WARN_ON(scs_patch(__eh_frame_start, __eh_frame_end - __eh_frame_start));
- icache_inval_all_pou();
- isb();
-}
diff --git a/arch/arm64/kernel/pi/pi.h b/arch/arm64/kernel/pi/pi.h
new file mode 100644
index 000000000000..c91e5e965cd3
--- /dev/null
+++ b/arch/arm64/kernel/pi/pi.h
@@ -0,0 +1,36 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright 2023 Google LLC
+// Author: Ard Biesheuvel <ardb@google.com>
+
+#include <linux/types.h>
+
+#define __prel64_initconst __section(".init.rodata.prel64")
+
+#define PREL64(type, name) union { type *name; prel64_t name ## _prel; }
+
+#define prel64_pointer(__d) (typeof(__d))prel64_to_pointer(&__d##_prel)
+
+typedef volatile signed long prel64_t;
+
+static inline void *prel64_to_pointer(const prel64_t *offset)
+{
+ if (!*offset)
+ return NULL;
+ return (void *)offset + *offset;
+}
+
+extern bool dynamic_scs_is_enabled;
+
+extern pgd_t init_idmap_pg_dir[], init_idmap_pg_end[];
+
+void init_feature_override(u64 boot_status, const void *fdt, int chosen);
+u64 kaslr_early_init(void *fdt, int chosen);
+void relocate_kernel(u64 offset);
+int scs_patch(const u8 eh_frame[], int size);
+
+void map_range(u64 *pgd, u64 start, u64 end, u64 pa, pgprot_t prot,
+ int level, pte_t *tbl, bool may_use_cont, u64 va_offset);
+
+asmlinkage void early_map_kernel(u64 boot_status, void *fdt);
+
+asmlinkage u64 create_init_idmap(pgd_t *pgd, pteval_t clrmask);
diff --git a/arch/arm64/kernel/pi/relacheck.c b/arch/arm64/kernel/pi/relacheck.c
new file mode 100644
index 000000000000..b0cd4d0d275b
--- /dev/null
+++ b/arch/arm64/kernel/pi/relacheck.c
@@ -0,0 +1,130 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2023 - Google LLC
+ * Author: Ard Biesheuvel <ardb@google.com>
+ */
+
+#include <elf.h>
+#include <fcntl.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#define HOST_ORDER ELFDATA2LSB
+#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+#define HOST_ORDER ELFDATA2MSB
+#endif
+
+static Elf64_Ehdr *ehdr;
+static Elf64_Shdr *shdr;
+static const char *strtab;
+static bool swap;
+
+static uint64_t swab_elfxword(uint64_t val)
+{
+ return swap ? __builtin_bswap64(val) : val;
+}
+
+static uint32_t swab_elfword(uint32_t val)
+{
+ return swap ? __builtin_bswap32(val) : val;
+}
+
+static uint16_t swab_elfhword(uint16_t val)
+{
+ return swap ? __builtin_bswap16(val) : val;
+}
+
+int main(int argc, char *argv[])
+{
+ struct stat stat;
+ int fd, ret;
+
+ if (argc < 3) {
+ fprintf(stderr, "file arguments missing\n");
+ exit(EXIT_FAILURE);
+ }
+
+ fd = open(argv[1], O_RDWR);
+ if (fd < 0) {
+ fprintf(stderr, "failed to open %s\n", argv[1]);
+ exit(EXIT_FAILURE);
+ }
+
+ ret = fstat(fd, &stat);
+ if (ret < 0) {
+ fprintf(stderr, "failed to stat() %s\n", argv[1]);
+ exit(EXIT_FAILURE);
+ }
+
+ ehdr = mmap(0, stat.st_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+ if (ehdr == MAP_FAILED) {
+ fprintf(stderr, "failed to mmap() %s\n", argv[1]);
+ exit(EXIT_FAILURE);
+ }
+
+ swap = ehdr->e_ident[EI_DATA] != HOST_ORDER;
+ shdr = (void *)ehdr + swab_elfxword(ehdr->e_shoff);
+ strtab = (void *)ehdr +
+ swab_elfxword(shdr[swab_elfhword(ehdr->e_shstrndx)].sh_offset);
+
+ for (int i = 0; i < swab_elfhword(ehdr->e_shnum); i++) {
+ unsigned long info, flags;
+ bool prel64 = false;
+ Elf64_Rela *rela;
+ int numrela;
+
+ if (swab_elfword(shdr[i].sh_type) != SHT_RELA)
+ continue;
+
+ /* only consider RELA sections operating on data */
+ info = swab_elfword(shdr[i].sh_info);
+ flags = swab_elfxword(shdr[info].sh_flags);
+ if ((flags & (SHF_ALLOC | SHF_EXECINSTR)) != SHF_ALLOC)
+ continue;
+
+ /*
+ * We generally don't permit ABS64 relocations in the code that
+ * runs before relocation processing occurs. If statically
+ * initialized absolute symbol references are unavoidable, they
+ * may be emitted into a *.rodata.prel64 section and they will
+ * be converted to place-relative 64-bit references. This
+ * requires special handling in the referring code.
+ */
+ if (strstr(strtab + swab_elfword(shdr[info].sh_name),
+ ".rodata.prel64")) {
+ prel64 = true;
+ }
+
+ rela = (void *)ehdr + swab_elfxword(shdr[i].sh_offset);
+ numrela = swab_elfxword(shdr[i].sh_size) / sizeof(*rela);
+
+ for (int j = 0; j < numrela; j++) {
+ uint64_t info = swab_elfxword(rela[j].r_info);
+
+ if (ELF64_R_TYPE(info) != R_AARCH64_ABS64)
+ continue;
+
+ if (prel64) {
+ /* convert ABS64 into PREL64 */
+ info ^= R_AARCH64_ABS64 ^ R_AARCH64_PREL64;
+ rela[j].r_info = swab_elfxword(info);
+ } else {
+ fprintf(stderr,
+ "Unexpected absolute relocations detected in %s\n",
+ argv[2]);
+ close(fd);
+ unlink(argv[1]);
+ exit(EXIT_FAILURE);
+ }
+ }
+ }
+ close(fd);
+ return 0;
+}
diff --git a/arch/arm64/kernel/pi/relocate.c b/arch/arm64/kernel/pi/relocate.c
new file mode 100644
index 000000000000..2407d2696398
--- /dev/null
+++ b/arch/arm64/kernel/pi/relocate.c
@@ -0,0 +1,64 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright 2023 Google LLC
+// Authors: Ard Biesheuvel <ardb@google.com>
+// Peter Collingbourne <pcc@google.com>
+
+#include <linux/elf.h>
+#include <linux/init.h>
+#include <linux/types.h>
+
+#include "pi.h"
+
+extern const Elf64_Rela rela_start[], rela_end[];
+extern const u64 relr_start[], relr_end[];
+
+void __init relocate_kernel(u64 offset)
+{
+ u64 *place = NULL;
+
+ for (const Elf64_Rela *rela = rela_start; rela < rela_end; rela++) {
+ if (ELF64_R_TYPE(rela->r_info) != R_AARCH64_RELATIVE)
+ continue;
+ *(u64 *)(rela->r_offset + offset) = rela->r_addend + offset;
+ }
+
+ if (!IS_ENABLED(CONFIG_RELR) || !offset)
+ return;
+
+ /*
+ * Apply RELR relocations.
+ *
+ * RELR is a compressed format for storing relative relocations. The
+ * encoded sequence of entries looks like:
+ * [ AAAAAAAA BBBBBBB1 BBBBBBB1 ... AAAAAAAA BBBBBB1 ... ]
+ *
+ * i.e. start with an address, followed by any number of bitmaps. The
+ * address entry encodes 1 relocation. The subsequent bitmap entries
+ * encode up to 63 relocations each, at subsequent offsets following
+ * the last address entry.
+ *
+ * The bitmap entries must have 1 in the least significant bit. The
+ * assumption here is that an address cannot have 1 in lsb. Odd
+ * addresses are not supported. Any odd addresses are stored in the
+ * RELA section, which is handled above.
+ *
+ * With the exception of the least significant bit, each bit in the
+ * bitmap corresponds with a machine word that follows the base address
+ * word, and the bit value indicates whether or not a relocation needs
+ * to be applied to it. The second least significant bit represents the
+ * machine word immediately following the initial address, and each bit
+ * that follows represents the next word, in linear order. As such, a
+ * single bitmap can encode up to 63 relocations in a 64-bit object.
+ */
+ for (const u64 *relr = relr_start; relr < relr_end; relr++) {
+ if ((*relr & 1) == 0) {
+ place = (u64 *)(*relr + offset);
+ *place++ += offset;
+ } else {
+ for (u64 *p = place, r = *relr >> 1; r; p++, r >>= 1)
+ if (r & 1)
+ *p += offset;
+ place += 63;
+ }
+ }
+}
diff --git a/arch/arm64/kernel/probes/decode-insn.c b/arch/arm64/kernel/probes/decode-insn.c
index 968d5fffe233..6438bf62e753 100644
--- a/arch/arm64/kernel/probes/decode-insn.c
+++ b/arch/arm64/kernel/probes/decode-insn.c
@@ -58,10 +58,13 @@ static bool __kprobes aarch64_insn_is_steppable(u32 insn)
* Instructions which load PC relative literals are not going to work
* when executed from an XOL slot. Instructions doing an exclusive
* load/store are not going to complete successfully when single-step
- * exception handling happens in the middle of the sequence.
+ * exception handling happens in the middle of the sequence. Memory
+ * copy/set instructions require that all three instructions be placed
+ * consecutively in memory.
*/
if (aarch64_insn_uses_literal(insn) ||
- aarch64_insn_is_exclusive(insn))
+ aarch64_insn_is_exclusive(insn) ||
+ aarch64_insn_is_mops(insn))
return false;
return true;
@@ -73,9 +76,18 @@ static bool __kprobes aarch64_insn_is_steppable(u32 insn)
* INSN_GOOD_NO_SLOT If instruction is supported but doesn't use its slot.
*/
enum probe_insn __kprobes
-arm_probe_decode_insn(probe_opcode_t insn, struct arch_probe_insn *api)
+arm_probe_decode_insn(u32 insn, struct arch_probe_insn *api)
{
/*
+ * While 'nop' instruction can execute in the out-of-line slot,
+ * simulating them in breakpoint handling offers better performance.
+ */
+ if (aarch64_insn_is_nop(insn)) {
+ api->handler = simulate_nop;
+ return INSN_GOOD_NO_SLOT;
+ }
+
+ /*
* Instructions reading or modifying the PC won't work from the XOL
* slot.
*/
@@ -99,10 +111,6 @@ arm_probe_decode_insn(probe_opcode_t insn, struct arch_probe_insn *api)
aarch64_insn_is_blr(insn) ||
aarch64_insn_is_ret(insn)) {
api->handler = simulate_br_blr_ret;
- } else if (aarch64_insn_is_ldr_lit(insn)) {
- api->handler = simulate_ldr_literal;
- } else if (aarch64_insn_is_ldrsw_lit(insn)) {
- api->handler = simulate_ldrsw_literal;
} else {
/*
* Instruction cannot be stepped out-of-line and we don't
@@ -137,9 +145,20 @@ enum probe_insn __kprobes
arm_kprobe_decode_insn(kprobe_opcode_t *addr, struct arch_specific_insn *asi)
{
enum probe_insn decoded;
- probe_opcode_t insn = le32_to_cpu(*addr);
- probe_opcode_t *scan_end = NULL;
+ u32 insn = le32_to_cpu(*addr);
+ kprobe_opcode_t *scan_end = NULL;
unsigned long size = 0, offset = 0;
+ struct arch_probe_insn *api = &asi->api;
+
+ if (aarch64_insn_is_ldr_lit(insn)) {
+ api->handler = simulate_ldr_literal;
+ decoded = INSN_GOOD_NO_SLOT;
+ } else if (aarch64_insn_is_ldrsw_lit(insn)) {
+ api->handler = simulate_ldrsw_literal;
+ decoded = INSN_GOOD_NO_SLOT;
+ } else {
+ decoded = arm_probe_decode_insn(insn, &asi->api);
+ }
/*
* If there's a symbol defined in front of and near enough to
@@ -157,7 +176,6 @@ arm_kprobe_decode_insn(kprobe_opcode_t *addr, struct arch_specific_insn *asi)
else
scan_end = addr - MAX_ATOMIC_CONTEXT_SIZE;
}
- decoded = arm_probe_decode_insn(insn, &asi->api);
if (decoded != INSN_REJECTED && scan_end)
if (is_probed_address_atomic(addr - 1, scan_end))
diff --git a/arch/arm64/kernel/probes/decode-insn.h b/arch/arm64/kernel/probes/decode-insn.h
index 8b758c5a2062..0e4195de8206 100644
--- a/arch/arm64/kernel/probes/decode-insn.h
+++ b/arch/arm64/kernel/probes/decode-insn.h
@@ -28,6 +28,6 @@ enum probe_insn __kprobes
arm_kprobe_decode_insn(kprobe_opcode_t *addr, struct arch_specific_insn *asi);
#endif
enum probe_insn __kprobes
-arm_probe_decode_insn(probe_opcode_t insn, struct arch_probe_insn *asi);
+arm_probe_decode_insn(u32 insn, struct arch_probe_insn *asi);
#endif /* _ARM_KERNEL_KPROBES_ARM64_H */
diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c
index 70b91a8c6bb3..d9e462eafb95 100644
--- a/arch/arm64/kernel/probes/kprobes.c
+++ b/arch/arm64/kernel/probes/kprobes.c
@@ -27,7 +27,7 @@
#include <asm/debug-monitors.h>
#include <asm/insn.h>
#include <asm/irq.h>
-#include <asm/patching.h>
+#include <asm/text-patching.h>
#include <asm/ptrace.h>
#include <asm/sections.h>
#include <asm/system_misc.h>
@@ -43,7 +43,7 @@ post_kprobe_handler(struct kprobe *, struct kprobe_ctlblk *, struct pt_regs *);
static void __kprobes arch_prepare_ss_slot(struct kprobe *p)
{
- kprobe_opcode_t *addr = p->ainsn.api.insn;
+ kprobe_opcode_t *addr = p->ainsn.xol_insn;
/*
* Prepare insn slot, Mark Rutland points out it depends on a coupe of
@@ -64,20 +64,20 @@ static void __kprobes arch_prepare_ss_slot(struct kprobe *p)
* the BRK exception handler, so it is unnecessary to generate
* Contex-Synchronization-Event via ISB again.
*/
- aarch64_insn_patch_text_nosync(addr, p->opcode);
+ aarch64_insn_patch_text_nosync(addr, le32_to_cpu(p->opcode));
aarch64_insn_patch_text_nosync(addr + 1, BRK64_OPCODE_KPROBES_SS);
/*
* Needs restoring of return address after stepping xol.
*/
- p->ainsn.api.restore = (unsigned long) p->addr +
+ p->ainsn.xol_restore = (unsigned long) p->addr +
sizeof(kprobe_opcode_t);
}
static void __kprobes arch_prepare_simulate(struct kprobe *p)
{
/* This instructions is not executed xol. No need to adjust the PC */
- p->ainsn.api.restore = 0;
+ p->ainsn.xol_restore = 0;
}
static void __kprobes arch_simulate_insn(struct kprobe *p, struct pt_regs *regs)
@@ -85,7 +85,7 @@ static void __kprobes arch_simulate_insn(struct kprobe *p, struct pt_regs *regs)
struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
if (p->ainsn.api.handler)
- p->ainsn.api.handler((u32)p->opcode, (long)p->addr, regs);
+ p->ainsn.api.handler(le32_to_cpu(p->opcode), (long)p->addr, regs);
/* single step simulated, now go for post processing */
post_kprobe_handler(p, kcb, regs);
@@ -99,7 +99,7 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p)
return -EINVAL;
/* copy instruction */
- p->opcode = le32_to_cpu(*p->addr);
+ p->opcode = *p->addr;
if (search_exception_tables(probe_addr))
return -EINVAL;
@@ -110,18 +110,18 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p)
return -EINVAL;
case INSN_GOOD_NO_SLOT: /* insn need simulation */
- p->ainsn.api.insn = NULL;
+ p->ainsn.xol_insn = NULL;
break;
case INSN_GOOD: /* instruction uses slot */
- p->ainsn.api.insn = get_insn_slot();
- if (!p->ainsn.api.insn)
+ p->ainsn.xol_insn = get_insn_slot();
+ if (!p->ainsn.xol_insn)
return -ENOMEM;
break;
}
/* prepare the instruction */
- if (p->ainsn.api.insn)
+ if (p->ainsn.xol_insn)
arch_prepare_ss_slot(p);
else
arch_prepare_simulate(p);
@@ -129,13 +129,6 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p)
return 0;
}
-void *alloc_insn_page(void)
-{
- return __vmalloc_node_range(PAGE_SIZE, 1, VMALLOC_START, VMALLOC_END,
- GFP_KERNEL, PAGE_KERNEL_ROX, VM_FLUSH_RESET_PERMS,
- NUMA_NO_NODE, __builtin_return_address(0));
-}
-
/* arm kprobe: install breakpoint in text */
void __kprobes arch_arm_kprobe(struct kprobe *p)
{
@@ -149,15 +142,16 @@ void __kprobes arch_arm_kprobe(struct kprobe *p)
void __kprobes arch_disarm_kprobe(struct kprobe *p)
{
void *addr = p->addr;
+ u32 insn = le32_to_cpu(p->opcode);
- aarch64_insn_patch_text(&addr, &p->opcode, 1);
+ aarch64_insn_patch_text(&addr, &insn, 1);
}
void __kprobes arch_remove_kprobe(struct kprobe *p)
{
- if (p->ainsn.api.insn) {
- free_insn_slot(p->ainsn.api.insn, 0);
- p->ainsn.api.insn = NULL;
+ if (p->ainsn.xol_insn) {
+ free_insn_slot(p->ainsn.xol_insn, 0);
+ p->ainsn.xol_insn = NULL;
}
}
@@ -212,9 +206,9 @@ static void __kprobes setup_singlestep(struct kprobe *p,
}
- if (p->ainsn.api.insn) {
+ if (p->ainsn.xol_insn) {
/* prepare for single stepping */
- slot = (unsigned long)p->ainsn.api.insn;
+ slot = (unsigned long)p->ainsn.xol_insn;
kprobes_save_local_irqflag(kcb, regs);
instruction_pointer_set(regs, slot);
@@ -252,8 +246,8 @@ static void __kprobes
post_kprobe_handler(struct kprobe *cur, struct kprobe_ctlblk *kcb, struct pt_regs *regs)
{
/* return addr restore if non-branching insn */
- if (cur->ainsn.api.restore != 0)
- instruction_pointer_set(regs, cur->ainsn.api.restore);
+ if (cur->ainsn.xol_restore != 0)
+ instruction_pointer_set(regs, cur->ainsn.xol_restore);
/* restore back original saved kprobe variables and continue */
if (kcb->kprobe_status == KPROBE_REENTER) {
@@ -355,7 +349,7 @@ kprobe_breakpoint_ss_handler(struct pt_regs *regs, unsigned long esr)
struct kprobe *cur = kprobe_running();
if (cur && (kcb->kprobe_status & (KPROBE_HIT_SS | KPROBE_REENTER)) &&
- ((unsigned long)&cur->ainsn.api.insn[1] == addr)) {
+ ((unsigned long)&cur->ainsn.xol_insn[1] == addr)) {
kprobes_restore_local_irqflag(kcb, regs);
post_kprobe_handler(cur, kcb, regs);
@@ -371,6 +365,21 @@ static struct break_hook kprobes_break_ss_hook = {
.fn = kprobe_breakpoint_ss_handler,
};
+static int __kprobes
+kretprobe_breakpoint_handler(struct pt_regs *regs, unsigned long esr)
+{
+ if (regs->pc != (unsigned long)__kretprobe_trampoline)
+ return DBG_HOOK_ERROR;
+
+ regs->pc = kretprobe_trampoline_handler(regs, (void *)regs->regs[29]);
+ return DBG_HOOK_HANDLED;
+}
+
+static struct break_hook kretprobes_break_hook = {
+ .imm = KRETPROBES_BRK_IMM,
+ .fn = kretprobe_breakpoint_handler,
+};
+
/*
* Provide a blacklist of symbols identifying ranges which cannot be kprobed.
* This blacklist is exposed to userspace via debugfs (kprobes/blacklist).
@@ -396,11 +405,6 @@ int __init arch_populate_kprobe_blacklist(void)
return ret;
}
-void __kprobes __used *trampoline_probe_handler(struct pt_regs *regs)
-{
- return (void *)kretprobe_trampoline_handler(regs, (void *)regs->regs[29]);
-}
-
void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
struct pt_regs *regs)
{
@@ -420,6 +424,7 @@ int __init arch_init_kprobes(void)
{
register_kernel_break_hook(&kprobes_break_hook);
register_kernel_break_hook(&kprobes_break_ss_hook);
+ register_kernel_break_hook(&kretprobes_break_hook);
return 0;
}
diff --git a/arch/arm64/kernel/probes/kprobes_trampoline.S b/arch/arm64/kernel/probes/kprobes_trampoline.S
index 9a6499bed58b..a362f3dbb3d1 100644
--- a/arch/arm64/kernel/probes/kprobes_trampoline.S
+++ b/arch/arm64/kernel/probes/kprobes_trampoline.S
@@ -4,83 +4,17 @@
*/
#include <linux/linkage.h>
-#include <asm/asm-offsets.h>
+#include <asm/asm-bug.h>
#include <asm/assembler.h>
.text
- .macro save_all_base_regs
- stp x0, x1, [sp, #S_X0]
- stp x2, x3, [sp, #S_X2]
- stp x4, x5, [sp, #S_X4]
- stp x6, x7, [sp, #S_X6]
- stp x8, x9, [sp, #S_X8]
- stp x10, x11, [sp, #S_X10]
- stp x12, x13, [sp, #S_X12]
- stp x14, x15, [sp, #S_X14]
- stp x16, x17, [sp, #S_X16]
- stp x18, x19, [sp, #S_X18]
- stp x20, x21, [sp, #S_X20]
- stp x22, x23, [sp, #S_X22]
- stp x24, x25, [sp, #S_X24]
- stp x26, x27, [sp, #S_X26]
- stp x28, x29, [sp, #S_X28]
- add x0, sp, #PT_REGS_SIZE
- stp lr, x0, [sp, #S_LR]
- /*
- * Construct a useful saved PSTATE
- */
- mrs x0, nzcv
- mrs x1, daif
- orr x0, x0, x1
- mrs x1, CurrentEL
- orr x0, x0, x1
- mrs x1, SPSel
- orr x0, x0, x1
- stp xzr, x0, [sp, #S_PC]
- .endm
-
- .macro restore_all_base_regs
- ldr x0, [sp, #S_PSTATE]
- and x0, x0, #(PSR_N_BIT | PSR_Z_BIT | PSR_C_BIT | PSR_V_BIT)
- msr nzcv, x0
- ldp x0, x1, [sp, #S_X0]
- ldp x2, x3, [sp, #S_X2]
- ldp x4, x5, [sp, #S_X4]
- ldp x6, x7, [sp, #S_X6]
- ldp x8, x9, [sp, #S_X8]
- ldp x10, x11, [sp, #S_X10]
- ldp x12, x13, [sp, #S_X12]
- ldp x14, x15, [sp, #S_X14]
- ldp x16, x17, [sp, #S_X16]
- ldp x18, x19, [sp, #S_X18]
- ldp x20, x21, [sp, #S_X20]
- ldp x22, x23, [sp, #S_X22]
- ldp x24, x25, [sp, #S_X24]
- ldp x26, x27, [sp, #S_X26]
- ldp x28, x29, [sp, #S_X28]
- .endm
-
SYM_CODE_START(__kretprobe_trampoline)
- sub sp, sp, #PT_REGS_SIZE
-
- save_all_base_regs
-
- /* Setup a frame pointer. */
- add x29, sp, #S_FP
-
- mov x0, sp
- bl trampoline_probe_handler
/*
- * Replace trampoline address in lr with actual orig_ret_addr return
- * address.
+ * Trigger a breakpoint exception. The PC will be adjusted by
+ * kretprobe_breakpoint_handler(), and no subsequent instructions will
+ * be executed from the trampoline.
*/
- mov lr, x0
-
- /* The frame pointer (x29) is restored with other registers. */
- restore_all_base_regs
-
- add sp, sp, #PT_REGS_SIZE
- ret
-
+ brk #KRETPROBES_BRK_IMM
+ ASM_BUG()
SYM_CODE_END(__kretprobe_trampoline)
diff --git a/arch/arm64/kernel/probes/simulate-insn.c b/arch/arm64/kernel/probes/simulate-insn.c
index 22d0b3252476..4c6d2d712fbd 100644
--- a/arch/arm64/kernel/probes/simulate-insn.c
+++ b/arch/arm64/kernel/probes/simulate-insn.c
@@ -171,17 +171,15 @@ simulate_tbz_tbnz(u32 opcode, long addr, struct pt_regs *regs)
void __kprobes
simulate_ldr_literal(u32 opcode, long addr, struct pt_regs *regs)
{
- u64 *load_addr;
+ unsigned long load_addr;
int xn = opcode & 0x1f;
- int disp;
- disp = ldr_displacement(opcode);
- load_addr = (u64 *) (addr + disp);
+ load_addr = addr + ldr_displacement(opcode);
if (opcode & (1 << 30)) /* x0-x30 */
- set_x_reg(regs, xn, *load_addr);
+ set_x_reg(regs, xn, READ_ONCE(*(u64 *)load_addr));
else /* w0-w30 */
- set_w_reg(regs, xn, *load_addr);
+ set_w_reg(regs, xn, READ_ONCE(*(u32 *)load_addr));
instruction_pointer_set(regs, instruction_pointer(regs) + 4);
}
@@ -189,14 +187,18 @@ simulate_ldr_literal(u32 opcode, long addr, struct pt_regs *regs)
void __kprobes
simulate_ldrsw_literal(u32 opcode, long addr, struct pt_regs *regs)
{
- s32 *load_addr;
+ unsigned long load_addr;
int xn = opcode & 0x1f;
- int disp;
- disp = ldr_displacement(opcode);
- load_addr = (s32 *) (addr + disp);
+ load_addr = addr + ldr_displacement(opcode);
- set_x_reg(regs, xn, *load_addr);
+ set_x_reg(regs, xn, READ_ONCE(*(s32 *)load_addr));
instruction_pointer_set(regs, instruction_pointer(regs) + 4);
}
+
+void __kprobes
+simulate_nop(u32 opcode, long addr, struct pt_regs *regs)
+{
+ arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE);
+}
diff --git a/arch/arm64/kernel/probes/simulate-insn.h b/arch/arm64/kernel/probes/simulate-insn.h
index e065dc92218e..efb2803ec943 100644
--- a/arch/arm64/kernel/probes/simulate-insn.h
+++ b/arch/arm64/kernel/probes/simulate-insn.h
@@ -16,5 +16,6 @@ void simulate_cbz_cbnz(u32 opcode, long addr, struct pt_regs *regs);
void simulate_tbz_tbnz(u32 opcode, long addr, struct pt_regs *regs);
void simulate_ldr_literal(u32 opcode, long addr, struct pt_regs *regs);
void simulate_ldrsw_literal(u32 opcode, long addr, struct pt_regs *regs);
+void simulate_nop(u32 opcode, long addr, struct pt_regs *regs);
#endif /* _ARM_KERNEL_KPROBES_SIMULATE_INSN_H */
diff --git a/arch/arm64/kernel/probes/uprobes.c b/arch/arm64/kernel/probes/uprobes.c
index d49aef2657cd..cb3d05af36e3 100644
--- a/arch/arm64/kernel/probes/uprobes.c
+++ b/arch/arm64/kernel/probes/uprobes.c
@@ -17,12 +17,20 @@ void arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr,
void *xol_page_kaddr = kmap_atomic(page);
void *dst = xol_page_kaddr + (vaddr & ~PAGE_MASK);
+ /*
+ * Initial cache maintenance of the xol page done via set_pte_at().
+ * Subsequent CMOs only needed if the xol slot changes.
+ */
+ if (!memcmp(dst, src, len))
+ goto done;
+
/* Initialize the slot */
memcpy(dst, src, len);
/* flush caches (dcache/icache) */
sync_icache_aliases((unsigned long)dst, (unsigned long)dst + len);
+done:
kunmap_atomic(xol_page_kaddr);
}
@@ -34,7 +42,7 @@ unsigned long uprobe_get_swbp_addr(struct pt_regs *regs)
int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm,
unsigned long addr)
{
- probe_opcode_t insn;
+ u32 insn;
/* TODO: Currently we do not support AARCH32 instruction probing */
if (mm->context.flags & MMCF_AARCH32)
@@ -42,7 +50,7 @@ int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm,
else if (!IS_ALIGNED(addr, AARCH64_INSN_SIZE))
return -EINVAL;
- insn = *(probe_opcode_t *)(&auprobe->insn[0]);
+ insn = le32_to_cpu(auprobe->insn);
switch (arm_probe_decode_insn(insn, &auprobe->api)) {
case INSN_REJECTED:
@@ -102,13 +110,13 @@ bool arch_uprobe_xol_was_trapped(struct task_struct *t)
bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs)
{
- probe_opcode_t insn;
+ u32 insn;
unsigned long addr;
if (!auprobe->simulate)
return false;
- insn = *(probe_opcode_t *)(&auprobe->insn[0]);
+ insn = le32_to_cpu(auprobe->insn);
addr = instruction_pointer(regs);
if (auprobe->api.handler)
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 7387b68c745b..42faebb7b712 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -43,11 +43,13 @@
#include <linux/stacktrace.h>
#include <asm/alternative.h>
+#include <asm/arch_timer.h>
#include <asm/compat.h>
#include <asm/cpufeature.h>
#include <asm/cacheflush.h>
#include <asm/exec.h>
#include <asm/fpsimd.h>
+#include <asm/gcs.h>
#include <asm/mmu_context.h>
#include <asm/mte.h>
#include <asm/processor.h>
@@ -226,7 +228,7 @@ void __show_regs(struct pt_regs *regs)
printk("sp : %016llx\n", sp);
if (system_uses_irq_prio_masking())
- printk("pmr_save: %08llx\n", regs->pmr_save);
+ printk("pmr: %08x\n", regs->pmr);
i = top_reg;
@@ -271,17 +273,73 @@ static void flush_tagged_addr_state(void)
clear_thread_flag(TIF_TAGGED_ADDR);
}
+static void flush_poe(void)
+{
+ if (!system_supports_poe())
+ return;
+
+ write_sysreg_s(POR_EL0_INIT, SYS_POR_EL0);
+}
+
+#ifdef CONFIG_ARM64_GCS
+
+static void flush_gcs(void)
+{
+ if (!system_supports_gcs())
+ return;
+
+ gcs_free(current);
+ current->thread.gcs_el0_mode = 0;
+ write_sysreg_s(GCSCRE0_EL1_nTR, SYS_GCSCRE0_EL1);
+ write_sysreg_s(0, SYS_GCSPR_EL0);
+}
+
+static int copy_thread_gcs(struct task_struct *p,
+ const struct kernel_clone_args *args)
+{
+ unsigned long gcs;
+
+ if (!system_supports_gcs())
+ return 0;
+
+ p->thread.gcs_base = 0;
+ p->thread.gcs_size = 0;
+
+ gcs = gcs_alloc_thread_stack(p, args);
+ if (IS_ERR_VALUE(gcs))
+ return PTR_ERR((void *)gcs);
+
+ p->thread.gcs_el0_mode = current->thread.gcs_el0_mode;
+ p->thread.gcs_el0_locked = current->thread.gcs_el0_locked;
+
+ return 0;
+}
+
+#else
+
+static void flush_gcs(void) { }
+static int copy_thread_gcs(struct task_struct *p,
+ const struct kernel_clone_args *args)
+{
+ return 0;
+}
+
+#endif
+
void flush_thread(void)
{
fpsimd_flush_thread();
tls_thread_flush();
flush_ptrace_hw_breakpoint(current);
flush_tagged_addr_state();
+ flush_poe();
+ flush_gcs();
}
void arch_release_task_struct(struct task_struct *tsk)
{
fpsimd_release_task(tsk);
+ gcs_free(tsk);
}
int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
@@ -290,9 +348,6 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
fpsimd_preserve_current_state();
*dst = *src;
- /* We rely on the above assignment to initialize dst's thread_flags: */
- BUILD_BUG_ON(!IS_ENABLED(CONFIG_THREAD_INFO_IN_TASK));
-
/*
* Detach src's sve_state (if any) from dst so that it does not
* get erroneously used or freed prematurely. dst's copies
@@ -348,6 +403,7 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
unsigned long stack_start = args->stack;
unsigned long tls = args->tls;
struct pt_regs *childregs = task_pt_regs(p);
+ int ret;
memset(&p->thread.cpu_context, 0, sizeof(struct cpu_context));
@@ -374,6 +430,9 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
if (system_supports_tpidr2())
p->thread.tpidr2_el0 = read_sysreg_s(SYS_TPIDR2_EL0);
+ if (system_supports_poe())
+ p->thread.por_el0 = read_sysreg_s(SYS_POR_EL0);
+
if (stack_start) {
if (is_compat_thread(task_thread_info(p)))
childregs->compat_sp = stack_start;
@@ -389,6 +448,10 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
p->thread.uw.tp_value = tls;
p->thread.tpidr2_el0 = 0;
}
+
+ ret = copy_thread_gcs(p, args);
+ if (ret != 0)
+ return ret;
} else {
/*
* A kthread has no context to ERET to, so ensure any buggy
@@ -399,9 +462,13 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
*/
memset(childregs, 0, sizeof(struct pt_regs));
childregs->pstate = PSR_MODE_EL1h | PSR_IL_BIT;
+ childregs->stackframe.type = FRAME_META_TYPE_FINAL;
p->thread.cpu_context.x19 = (unsigned long)args->fn;
p->thread.cpu_context.x20 = (unsigned long)args->fn_arg;
+
+ if (system_supports_poe())
+ p->thread.por_el0 = POR_EL0_INIT;
}
p->thread.cpu_context.pc = (unsigned long)ret_from_fork;
p->thread.cpu_context.sp = (unsigned long)childregs;
@@ -409,7 +476,7 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
* For the benefit of the unwinder, set up childregs->stackframe
* as the final frame for the new task.
*/
- p->thread.cpu_context.fp = (unsigned long)childregs->stackframe;
+ p->thread.cpu_context.fp = (unsigned long)&childregs->stackframe;
ptrace_hw_copy_thread(p);
@@ -429,7 +496,7 @@ static void tls_thread_switch(struct task_struct *next)
if (is_compat_thread(task_thread_info(next)))
write_sysreg(next->thread.uw.tp_value, tpidrro_el0);
- else if (!arm64_kernel_unmapped_at_el0())
+ else
write_sysreg(0, tpidrro_el0);
write_sysreg(*task_user_tls(next), tpidr_el0);
@@ -474,28 +541,104 @@ static void entry_task_switch(struct task_struct *next)
__this_cpu_write(__entry_task, next);
}
+#ifdef CONFIG_ARM64_GCS
+
+void gcs_preserve_current_state(void)
+{
+ current->thread.gcspr_el0 = read_sysreg_s(SYS_GCSPR_EL0);
+}
+
+static void gcs_thread_switch(struct task_struct *next)
+{
+ if (!system_supports_gcs())
+ return;
+
+ /* GCSPR_EL0 is always readable */
+ gcs_preserve_current_state();
+ write_sysreg_s(next->thread.gcspr_el0, SYS_GCSPR_EL0);
+
+ if (current->thread.gcs_el0_mode != next->thread.gcs_el0_mode)
+ gcs_set_el0_mode(next);
+
+ /*
+ * Ensure that GCS memory effects of the 'prev' thread are
+ * ordered before other memory accesses with release semantics
+ * (or preceded by a DMB) on the current PE. In addition, any
+ * memory accesses with acquire semantics (or succeeded by a
+ * DMB) are ordered before GCS memory effects of the 'next'
+ * thread. This will ensure that the GCS memory effects are
+ * visible to other PEs in case of migration.
+ */
+ if (task_gcs_el0_enabled(current) || task_gcs_el0_enabled(next))
+ gcsb_dsync();
+}
+
+#else
+
+static void gcs_thread_switch(struct task_struct *next)
+{
+}
+
+#endif
+
/*
- * ARM erratum 1418040 handling, affecting the 32bit view of CNTVCT.
- * Ensure access is disabled when switching to a 32bit task, ensure
- * access is enabled when switching to a 64bit task.
+ * Handle sysreg updates for ARM erratum 1418040 which affects the 32bit view of
+ * CNTVCT, various other errata which require trapping all CNTVCT{,_EL0}
+ * accesses and prctl(PR_SET_TSC). Ensure access is disabled iff a workaround is
+ * required or PR_TSC_SIGSEGV is set.
*/
-static void erratum_1418040_thread_switch(struct task_struct *next)
+static void update_cntkctl_el1(struct task_struct *next)
{
- if (!IS_ENABLED(CONFIG_ARM64_ERRATUM_1418040) ||
- !this_cpu_has_cap(ARM64_WORKAROUND_1418040))
- return;
+ struct thread_info *ti = task_thread_info(next);
- if (is_compat_thread(task_thread_info(next)))
+ if (test_ti_thread_flag(ti, TIF_TSC_SIGSEGV) ||
+ has_erratum_handler(read_cntvct_el0) ||
+ (IS_ENABLED(CONFIG_ARM64_ERRATUM_1418040) &&
+ this_cpu_has_cap(ARM64_WORKAROUND_1418040) &&
+ is_compat_thread(ti)))
sysreg_clear_set(cntkctl_el1, ARCH_TIMER_USR_VCT_ACCESS_EN, 0);
else
sysreg_clear_set(cntkctl_el1, 0, ARCH_TIMER_USR_VCT_ACCESS_EN);
}
-static void erratum_1418040_new_exec(void)
+static void cntkctl_thread_switch(struct task_struct *prev,
+ struct task_struct *next)
+{
+ if ((read_ti_thread_flags(task_thread_info(prev)) &
+ (_TIF_32BIT | _TIF_TSC_SIGSEGV)) !=
+ (read_ti_thread_flags(task_thread_info(next)) &
+ (_TIF_32BIT | _TIF_TSC_SIGSEGV)))
+ update_cntkctl_el1(next);
+}
+
+static int do_set_tsc_mode(unsigned int val)
{
+ bool tsc_sigsegv;
+
+ if (val == PR_TSC_SIGSEGV)
+ tsc_sigsegv = true;
+ else if (val == PR_TSC_ENABLE)
+ tsc_sigsegv = false;
+ else
+ return -EINVAL;
+
preempt_disable();
- erratum_1418040_thread_switch(current);
+ update_thread_flag(TIF_TSC_SIGSEGV, tsc_sigsegv);
+ update_cntkctl_el1(current);
preempt_enable();
+
+ return 0;
+}
+
+static void permission_overlay_switch(struct task_struct *next)
+{
+ if (!system_supports_poe())
+ return;
+
+ current->thread.por_el0 = read_sysreg_s(SYS_POR_EL0);
+ if (current->thread.por_el0 != next->thread.por_el0) {
+ write_sysreg_s(next->thread.por_el0, SYS_POR_EL0);
+ }
}
/*
@@ -531,8 +674,10 @@ struct task_struct *__switch_to(struct task_struct *prev,
contextidr_thread_switch(next);
entry_task_switch(next);
ssbs_thread_switch(next);
- erratum_1418040_thread_switch(next);
+ cntkctl_thread_switch(prev, next);
ptrauth_thread_switch_user(next);
+ permission_overlay_switch(next);
+ gcs_thread_switch(next);
/*
* Complete any pending TLB or cache maintenance on this CPU in case
@@ -648,7 +793,7 @@ void arch_setup_new_exec(void)
current->mm->context.flags = mmflags;
ptrauth_thread_init_user();
mte_thread_init_user();
- erratum_1418040_new_exec();
+ do_set_tsc_mode(PR_TSC_ENABLE);
if (task_spec_ssb_noexec(current)) {
arch_prctl_spec_ctrl_set(current, PR_SPEC_STORE_BYPASS,
@@ -714,7 +859,7 @@ long get_tagged_addr_ctrl(struct task_struct *task)
* disable it for tasks that already opted in to the relaxed ABI.
*/
-static struct ctl_table tagged_addr_sysctl_table[] = {
+static const struct ctl_table tagged_addr_sysctl_table[] = {
{
.procname = "tagged_addr_disabled",
.mode = 0644,
@@ -757,3 +902,26 @@ int arch_elf_adjust_prot(int prot, const struct arch_elf_state *state,
return prot;
}
#endif
+
+int get_tsc_mode(unsigned long adr)
+{
+ unsigned int val;
+
+ if (is_compat_task())
+ return -EINVAL;
+
+ if (test_thread_flag(TIF_TSC_SIGSEGV))
+ val = PR_TSC_SIGSEGV;
+ else
+ val = PR_TSC_ENABLE;
+
+ return put_user(val, (unsigned int __user *)adr);
+}
+
+int set_tsc_mode(unsigned int val)
+{
+ if (is_compat_task())
+ return -EINVAL;
+
+ return do_set_tsc_mode(val);
+}
diff --git a/arch/arm64/kernel/proton-pack.c b/arch/arm64/kernel/proton-pack.c
index 6268a13a1d58..da53722f95d4 100644
--- a/arch/arm64/kernel/proton-pack.c
+++ b/arch/arm64/kernel/proton-pack.c
@@ -558,6 +558,18 @@ static enum mitigation_state spectre_v4_enable_hw_mitigation(void)
/* SCTLR_EL1.DSSBS was initialised to 0 during boot */
set_pstate_ssbs(0);
+
+ /*
+ * SSBS is self-synchronizing and is intended to affect subsequent
+ * speculative instructions, but some CPUs can speculate with a stale
+ * value of SSBS.
+ *
+ * Mitigate this with an unconditional speculation barrier, as CPUs
+ * could mis-speculate branches and bypass a conditional barrier.
+ */
+ if (IS_ENABLED(CONFIG_ARM64_ERRATUM_3194386))
+ spec_bar();
+
return SPECTRE_MITIGATED;
}
diff --git a/arch/arm64/kernel/psci.c b/arch/arm64/kernel/psci.c
index 29a8e444db83..fabd732d0a2d 100644
--- a/arch/arm64/kernel/psci.c
+++ b/arch/arm64/kernel/psci.c
@@ -40,7 +40,7 @@ static int cpu_psci_cpu_boot(unsigned int cpu)
{
phys_addr_t pa_secondary_entry = __pa_symbol(secondary_entry);
int err = psci_ops.cpu_on(cpu_logical_map(cpu), pa_secondary_entry);
- if (err)
+ if (err && err != -EPERM)
pr_err("failed to boot CPU%d (%d)\n", cpu, err);
return err;
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index e3bef38fc2e2..f79b0d5f71ac 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -34,6 +34,7 @@
#include <asm/cpufeature.h>
#include <asm/debug-monitors.h>
#include <asm/fpsimd.h>
+#include <asm/gcs.h>
#include <asm/mte.h>
#include <asm/pointer_auth.h>
#include <asm/stacktrace.h>
@@ -174,7 +175,6 @@ static void ptrace_hbptriggered(struct perf_event *bp,
struct arch_hw_breakpoint *bkpt = counter_arch_bp(bp);
const char *desc = "Hardware breakpoint trap (ptrace)";
-#ifdef CONFIG_COMPAT
if (is_compat_task()) {
int si_errno = 0;
int i;
@@ -196,7 +196,7 @@ static void ptrace_hbptriggered(struct perf_event *bp,
desc);
return;
}
-#endif
+
arm64_force_sig_fault(SIGTRAP, TRAP_HWBKPT, bkpt->trigger, desc);
}
@@ -698,6 +698,41 @@ static int tls_set(struct task_struct *target, const struct user_regset *regset,
return ret;
}
+static int fpmr_get(struct task_struct *target, const struct user_regset *regset,
+ struct membuf to)
+{
+ if (!system_supports_fpmr())
+ return -EINVAL;
+
+ if (target == current)
+ fpsimd_preserve_current_state();
+
+ return membuf_store(&to, target->thread.uw.fpmr);
+}
+
+static int fpmr_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ int ret;
+ unsigned long fpmr;
+
+ if (!system_supports_fpmr())
+ return -EINVAL;
+
+ fpmr = target->thread.uw.fpmr;
+
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &fpmr, 0, count);
+ if (ret)
+ return ret;
+
+ target->thread.uw.fpmr = fpmr;
+
+ fpsimd_flush_task_state(target);
+
+ return 0;
+}
+
static int system_call_get(struct task_struct *target,
const struct user_regset *regset,
struct membuf to)
@@ -729,7 +764,6 @@ static void sve_init_header_from_task(struct user_sve_header *header,
{
unsigned int vq;
bool active;
- bool fpsimd_only;
enum vec_type task_type;
memset(header, 0, sizeof(*header));
@@ -745,12 +779,10 @@ static void sve_init_header_from_task(struct user_sve_header *header,
case ARM64_VEC_SVE:
if (test_tsk_thread_flag(target, TIF_SVE_VL_INHERIT))
header->flags |= SVE_PT_VL_INHERIT;
- fpsimd_only = !test_tsk_thread_flag(target, TIF_SVE);
break;
case ARM64_VEC_SME:
if (test_tsk_thread_flag(target, TIF_SME_VL_INHERIT))
header->flags |= SVE_PT_VL_INHERIT;
- fpsimd_only = false;
break;
default:
WARN_ON_ONCE(1);
@@ -758,7 +790,7 @@ static void sve_init_header_from_task(struct user_sve_header *header,
}
if (active) {
- if (fpsimd_only) {
+ if (target->thread.fp_type == FP_STATE_FPSIMD) {
header->flags |= SVE_PT_REGS_FPSIMD;
} else {
header->flags |= SVE_PT_REGS_SVE;
@@ -869,7 +901,11 @@ static int sve_set_common(struct task_struct *target,
if (ret)
goto out;
- /* Actual VL set may be less than the user asked for: */
+ /*
+ * Actual VL set may be different from what the user asked
+ * for, or we may have configured the _ONEXEC VL not the
+ * current VL:
+ */
vq = sve_vq_from_vl(task_get_vl(target, type));
/* Enter/exit streaming mode */
@@ -1096,7 +1132,11 @@ static int za_set(struct task_struct *target,
if (ret)
goto out;
- /* Actual VL set may be less than the user asked for: */
+ /*
+ * Actual VL set may be different from what the user asked
+ * for, or we may have configured the _ONEXEC rather than
+ * current VL:
+ */
vq = sve_vq_from_vl(task_get_sme_vl(target));
/* Ensure there is some SVE storage for streaming mode */
@@ -1389,7 +1429,7 @@ static int tagged_addr_ctrl_get(struct task_struct *target,
{
long ctrl = get_tagged_addr_ctrl(target);
- if (IS_ERR_VALUE(ctrl))
+ if (WARN_ON_ONCE(IS_ERR_VALUE(ctrl)))
return ctrl;
return membuf_write(&to, &ctrl, sizeof(ctrl));
@@ -1403,6 +1443,10 @@ static int tagged_addr_ctrl_set(struct task_struct *target, const struct
int ret;
long ctrl;
+ ctrl = get_tagged_addr_ctrl(target);
+ if (WARN_ON_ONCE(IS_ERR_VALUE(ctrl)))
+ return ctrl;
+
ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &ctrl, 0, -1);
if (ret)
return ret;
@@ -1411,6 +1455,101 @@ static int tagged_addr_ctrl_set(struct task_struct *target, const struct
}
#endif
+#ifdef CONFIG_ARM64_POE
+static int poe_get(struct task_struct *target,
+ const struct user_regset *regset,
+ struct membuf to)
+{
+ if (!system_supports_poe())
+ return -EINVAL;
+
+ return membuf_write(&to, &target->thread.por_el0,
+ sizeof(target->thread.por_el0));
+}
+
+static int poe_set(struct task_struct *target, const struct
+ user_regset *regset, unsigned int pos,
+ unsigned int count, const void *kbuf, const
+ void __user *ubuf)
+{
+ int ret;
+ long ctrl;
+
+ if (!system_supports_poe())
+ return -EINVAL;
+
+ ctrl = target->thread.por_el0;
+
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &ctrl, 0, -1);
+ if (ret)
+ return ret;
+
+ target->thread.por_el0 = ctrl;
+
+ return 0;
+}
+#endif
+
+#ifdef CONFIG_ARM64_GCS
+static void task_gcs_to_user(struct user_gcs *user_gcs,
+ const struct task_struct *target)
+{
+ user_gcs->features_enabled = target->thread.gcs_el0_mode;
+ user_gcs->features_locked = target->thread.gcs_el0_locked;
+ user_gcs->gcspr_el0 = target->thread.gcspr_el0;
+}
+
+static void task_gcs_from_user(struct task_struct *target,
+ const struct user_gcs *user_gcs)
+{
+ target->thread.gcs_el0_mode = user_gcs->features_enabled;
+ target->thread.gcs_el0_locked = user_gcs->features_locked;
+ target->thread.gcspr_el0 = user_gcs->gcspr_el0;
+}
+
+static int gcs_get(struct task_struct *target,
+ const struct user_regset *regset,
+ struct membuf to)
+{
+ struct user_gcs user_gcs;
+
+ if (!system_supports_gcs())
+ return -EINVAL;
+
+ if (target == current)
+ gcs_preserve_current_state();
+
+ task_gcs_to_user(&user_gcs, target);
+
+ return membuf_write(&to, &user_gcs, sizeof(user_gcs));
+}
+
+static int gcs_set(struct task_struct *target, const struct
+ user_regset *regset, unsigned int pos,
+ unsigned int count, const void *kbuf, const
+ void __user *ubuf)
+{
+ int ret;
+ struct user_gcs user_gcs;
+
+ if (!system_supports_gcs())
+ return -EINVAL;
+
+ task_gcs_to_user(&user_gcs, target);
+
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &user_gcs, 0, -1);
+ if (ret)
+ return ret;
+
+ if (user_gcs.features_enabled & ~PR_SHADOW_STACK_SUPPORTED_STATUS_MASK)
+ return -EINVAL;
+
+ task_gcs_from_user(target, &user_gcs);
+
+ return 0;
+}
+#endif
+
enum aarch64_regset {
REGSET_GPR,
REGSET_FPR,
@@ -1419,6 +1558,7 @@ enum aarch64_regset {
REGSET_HW_BREAK,
REGSET_HW_WATCH,
#endif
+ REGSET_FPMR,
REGSET_SYSTEM_CALL,
#ifdef CONFIG_ARM64_SVE
REGSET_SVE,
@@ -1439,6 +1579,12 @@ enum aarch64_regset {
#ifdef CONFIG_ARM64_TAGGED_ADDR_ABI
REGSET_TAGGED_ADDR_CTRL,
#endif
+#ifdef CONFIG_ARM64_POE
+ REGSET_POE,
+#endif
+#ifdef CONFIG_ARM64_GCS
+ REGSET_GCS,
+#endif
};
static const struct user_regset aarch64_regsets[] = {
@@ -1497,6 +1643,14 @@ static const struct user_regset aarch64_regsets[] = {
.regset_get = system_call_get,
.set = system_call_set,
},
+ [REGSET_FPMR] = {
+ .core_note_type = NT_ARM_FPMR,
+ .n = 1,
+ .size = sizeof(u64),
+ .align = sizeof(u64),
+ .regset_get = fpmr_get,
+ .set = fpmr_set,
+ },
#ifdef CONFIG_ARM64_SVE
[REGSET_SVE] = { /* Scalable Vector Extension */
.core_note_type = NT_ARM_SVE,
@@ -1590,6 +1744,26 @@ static const struct user_regset aarch64_regsets[] = {
.set = tagged_addr_ctrl_set,
},
#endif
+#ifdef CONFIG_ARM64_POE
+ [REGSET_POE] = {
+ .core_note_type = NT_ARM_POE,
+ .n = 1,
+ .size = sizeof(long),
+ .align = sizeof(long),
+ .regset_get = poe_get,
+ .set = poe_set,
+ },
+#endif
+#ifdef CONFIG_ARM64_GCS
+ [REGSET_GCS] = {
+ .core_note_type = NT_ARM_GCS,
+ .n = sizeof(struct user_gcs) / sizeof(u64),
+ .size = sizeof(u64),
+ .align = sizeof(u64),
+ .regset_get = gcs_get,
+ .set = gcs_set,
+ },
+#endif
};
static const struct user_regset_view user_aarch64_view = {
@@ -1597,7 +1771,6 @@ static const struct user_regset_view user_aarch64_view = {
.regsets = aarch64_regsets, .n = ARRAY_SIZE(aarch64_regsets)
};
-#ifdef CONFIG_COMPAT
enum compat_regset {
REGSET_COMPAT_GPR,
REGSET_COMPAT_VFP,
@@ -1854,6 +2027,7 @@ static const struct user_regset_view user_aarch32_ptrace_view = {
.regsets = aarch32_ptrace_regsets, .n = ARRAY_SIZE(aarch32_ptrace_regsets)
};
+#ifdef CONFIG_COMPAT
static int compat_ptrace_read_user(struct task_struct *tsk, compat_ulong_t off,
compat_ulong_t __user *ret)
{
@@ -2115,7 +2289,6 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
const struct user_regset_view *task_user_regset_view(struct task_struct *task)
{
-#ifdef CONFIG_COMPAT
/*
* Core dumping of 32-bit tasks or compat ptrace requests must use the
* user_aarch32_view compatible with arm32. Native ptrace requests on
@@ -2126,7 +2299,7 @@ const struct user_regset_view *task_user_regset_view(struct task_struct *task)
return &user_aarch32_view;
else if (is_compat_thread(task_thread_info(task)))
return &user_aarch32_ptrace_view;
-#endif
+
return &user_aarch64_view;
}
diff --git a/arch/arm64/kernel/reloc_test_core.c b/arch/arm64/kernel/reloc_test_core.c
index 99f2ffe9fc05..5b0891146054 100644
--- a/arch/arm64/kernel/reloc_test_core.c
+++ b/arch/arm64/kernel/reloc_test_core.c
@@ -74,4 +74,5 @@ static void __exit reloc_test_exit(void)
module_init(reloc_test_init);
module_exit(reloc_test_exit);
+MODULE_DESCRIPTION("Relocation testing module");
MODULE_LICENSE("GPL v2");
diff --git a/arch/arm64/kernel/rsi.c b/arch/arm64/kernel/rsi.c
new file mode 100644
index 000000000000..ce4778141ec7
--- /dev/null
+++ b/arch/arm64/kernel/rsi.c
@@ -0,0 +1,157 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2023 ARM Ltd.
+ */
+
+#include <linux/jump_label.h>
+#include <linux/memblock.h>
+#include <linux/psci.h>
+#include <linux/swiotlb.h>
+#include <linux/cc_platform.h>
+#include <linux/platform_device.h>
+
+#include <asm/io.h>
+#include <asm/mem_encrypt.h>
+#include <asm/rsi.h>
+
+static struct realm_config config;
+
+unsigned long prot_ns_shared;
+EXPORT_SYMBOL(prot_ns_shared);
+
+DEFINE_STATIC_KEY_FALSE_RO(rsi_present);
+EXPORT_SYMBOL(rsi_present);
+
+bool cc_platform_has(enum cc_attr attr)
+{
+ switch (attr) {
+ case CC_ATTR_MEM_ENCRYPT:
+ return is_realm_world();
+ default:
+ return false;
+ }
+}
+EXPORT_SYMBOL_GPL(cc_platform_has);
+
+static bool rsi_version_matches(void)
+{
+ unsigned long ver_lower, ver_higher;
+ unsigned long ret = rsi_request_version(RSI_ABI_VERSION,
+ &ver_lower,
+ &ver_higher);
+
+ if (ret == SMCCC_RET_NOT_SUPPORTED)
+ return false;
+
+ if (ret != RSI_SUCCESS) {
+ pr_err("RME: RMM doesn't support RSI version %lu.%lu. Supported range: %lu.%lu-%lu.%lu\n",
+ RSI_ABI_VERSION_MAJOR, RSI_ABI_VERSION_MINOR,
+ RSI_ABI_VERSION_GET_MAJOR(ver_lower),
+ RSI_ABI_VERSION_GET_MINOR(ver_lower),
+ RSI_ABI_VERSION_GET_MAJOR(ver_higher),
+ RSI_ABI_VERSION_GET_MINOR(ver_higher));
+ return false;
+ }
+
+ pr_info("RME: Using RSI version %lu.%lu\n",
+ RSI_ABI_VERSION_GET_MAJOR(ver_lower),
+ RSI_ABI_VERSION_GET_MINOR(ver_lower));
+
+ return true;
+}
+
+static void __init arm64_rsi_setup_memory(void)
+{
+ u64 i;
+ phys_addr_t start, end;
+
+ /*
+ * Iterate over the available memory ranges and convert the state to
+ * protected memory. We should take extra care to ensure that we DO NOT
+ * permit any "DESTROYED" pages to be converted to "RAM".
+ *
+ * panic() is used because if the attempt to switch the memory to
+ * protected has failed here, then future accesses to the memory are
+ * simply going to be reflected as a SEA (Synchronous External Abort)
+ * which we can't handle. Bailing out early prevents the guest limping
+ * on and dying later.
+ */
+ for_each_mem_range(i, &start, &end) {
+ if (rsi_set_memory_range_protected_safe(start, end)) {
+ panic("Failed to set memory range to protected: %pa-%pa",
+ &start, &end);
+ }
+ }
+}
+
+bool __arm64_is_protected_mmio(phys_addr_t base, size_t size)
+{
+ enum ripas ripas;
+ phys_addr_t end, top;
+
+ /* Overflow ? */
+ if (WARN_ON(base + size <= base))
+ return false;
+
+ end = ALIGN(base + size, RSI_GRANULE_SIZE);
+ base = ALIGN_DOWN(base, RSI_GRANULE_SIZE);
+
+ while (base < end) {
+ if (WARN_ON(rsi_ipa_state_get(base, end, &ripas, &top)))
+ break;
+ if (WARN_ON(top <= base))
+ break;
+ if (ripas != RSI_RIPAS_DEV)
+ break;
+ base = top;
+ }
+
+ return base >= end;
+}
+EXPORT_SYMBOL(__arm64_is_protected_mmio);
+
+static int realm_ioremap_hook(phys_addr_t phys, size_t size, pgprot_t *prot)
+{
+ if (__arm64_is_protected_mmio(phys, size))
+ *prot = pgprot_encrypted(*prot);
+ else
+ *prot = pgprot_decrypted(*prot);
+
+ return 0;
+}
+
+void __init arm64_rsi_init(void)
+{
+ if (arm_smccc_1_1_get_conduit() != SMCCC_CONDUIT_SMC)
+ return;
+ if (!rsi_version_matches())
+ return;
+ if (WARN_ON(rsi_get_realm_config(&config)))
+ return;
+ prot_ns_shared = BIT(config.ipa_bits - 1);
+
+ if (arm64_ioremap_prot_hook_register(realm_ioremap_hook))
+ return;
+
+ if (realm_register_memory_enc_ops())
+ return;
+
+ arm64_rsi_setup_memory();
+
+ static_branch_enable(&rsi_present);
+}
+
+static struct platform_device rsi_dev = {
+ .name = RSI_PDEV_NAME,
+ .id = PLATFORM_DEVID_NONE
+};
+
+static int __init arm64_create_dummy_rsi_dev(void)
+{
+ if (is_realm_world() &&
+ platform_device_register(&rsi_dev))
+ pr_err("failed to register rsi platform device\n");
+ return 0;
+}
+
+arch_initcall(arm64_create_dummy_rsi_dev)
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index 42c690bb2d60..85104587f849 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -43,6 +43,7 @@
#include <asm/cpu_ops.h>
#include <asm/kasan.h>
#include <asm/numa.h>
+#include <asm/rsi.h>
#include <asm/scs.h>
#include <asm/sections.h>
#include <asm/setup.h>
@@ -166,21 +167,6 @@ static void __init smp_build_mpidr_hash(void)
pr_warn("Large number of MPIDR hash buckets detected\n");
}
-static void *early_fdt_ptr __initdata;
-
-void __init *get_early_fdt_ptr(void)
-{
- return early_fdt_ptr;
-}
-
-asmlinkage void __init early_fdt_map(u64 dt_phys)
-{
- int fdt_size;
-
- early_fixmap_init();
- early_fdt_ptr = fixmap_remap_fdt(dt_phys, &fdt_size, PAGE_KERNEL);
-}
-
static void __init setup_machine_fdt(phys_addr_t dt_phys)
{
int size;
@@ -190,7 +176,11 @@ static void __init setup_machine_fdt(phys_addr_t dt_phys)
if (dt_virt)
memblock_reserve(dt_phys, size);
- if (!dt_virt || !early_init_dt_scan(dt_virt)) {
+ /*
+ * dt_virt is a fixmap address, hence __pa(dt_virt) can't be used.
+ * Pass dt_phys directly.
+ */
+ if (!early_init_dt_scan(dt_virt, dt_phys)) {
pr_crit("\n"
"Error: invalid device tree blob at physical address %pa (virtual address 0x%px)\n"
"The dtb must be 8-byte aligned and must not exceed 2 MB in size\n"
@@ -233,9 +223,7 @@ static void __init request_standard_resources(void)
num_standard_resources = memblock.memory.cnt;
res_size = num_standard_resources * sizeof(*standard_resources);
- standard_resources = memblock_alloc(res_size, SMP_CACHE_BYTES);
- if (!standard_resources)
- panic("%s: Failed to allocate %zu bytes\n", __func__, res_size);
+ standard_resources = memblock_alloc_or_panic(res_size, SMP_CACHE_BYTES);
for_each_mem_region(region) {
res = &standard_resources[i++];
@@ -298,13 +286,6 @@ void __init __no_sanitize_address setup_arch(char **cmdline_p)
kaslr_init();
- /*
- * If know now we are going to need KPTI then use non-global
- * mappings from the start, avoiding the cost of rewriting
- * everything later.
- */
- arm64_use_ng_mappings = kaslr_requires_kpti();
-
early_fixmap_init();
early_ioremap_init();
@@ -320,9 +301,15 @@ void __init __no_sanitize_address setup_arch(char **cmdline_p)
dynamic_scs_init();
/*
- * Unmask asynchronous aborts and fiq after bringing up possible
- * earlycon. (Report possible System Errors once we can report this
- * occurred).
+ * The primary CPU enters the kernel with all DAIF exceptions masked.
+ *
+ * We must unmask Debug and SError before preemption or scheduling is
+ * possible to ensure that these are consistently unmasked across
+ * threads, and we want to unmask SError as soon as possible after
+ * initializing earlycon so that we can report any SErrors immediately.
+ *
+ * IRQ and FIQ will be unmasked after the root irqchip has been
+ * detected and initialized.
*/
local_daif_restore(DAIF_PROCCTX_NOIRQ);
@@ -367,13 +354,12 @@ void __init __no_sanitize_address setup_arch(char **cmdline_p)
else
psci_acpi_init();
+ arm64_rsi_init();
+
init_bootcpu_ops();
smp_init_cpus();
smp_build_mpidr_hash();
- /* Init percpu seeds for random tags after cpus are set up. */
- kasan_init_sw_tags();
-
#ifdef CONFIG_ARM64_SW_TTBR0_PAN
/*
* Make sure init_thread_info.ttbr0 always generates translation
diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
index 425b1bc17a3f..99ea26d400ff 100644
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -16,15 +16,17 @@
#include <linux/uaccess.h>
#include <linux/sizes.h>
#include <linux/string.h>
-#include <linux/resume_user_mode.h>
#include <linux/ratelimit.h>
+#include <linux/rseq.h>
#include <linux/syscalls.h>
+#include <linux/pkeys.h>
#include <asm/daifflags.h>
#include <asm/debug-monitors.h>
#include <asm/elf.h>
#include <asm/exception.h>
#include <asm/cacheflush.h>
+#include <asm/gcs.h>
#include <asm/ucontext.h>
#include <asm/unistd.h>
#include <asm/fpsimd.h>
@@ -34,6 +36,8 @@
#include <asm/traps.h>
#include <asm/vdso.h>
+#define GCS_SIGNAL_CAP(addr) (((unsigned long)addr) & GCS_CAP_ADDR_MASK)
+
/*
* Do a signal return; undo the signal stack. These are aligned to 128-bit.
*/
@@ -42,11 +46,6 @@ struct rt_sigframe {
struct ucontext uc;
};
-struct frame_record {
- u64 fp;
- u64 lr;
-};
-
struct rt_sigframe_user_layout {
struct rt_sigframe __user *sigframe;
struct frame_record __user *next_frame;
@@ -56,18 +55,73 @@ struct rt_sigframe_user_layout {
unsigned long fpsimd_offset;
unsigned long esr_offset;
+ unsigned long gcs_offset;
unsigned long sve_offset;
unsigned long tpidr2_offset;
unsigned long za_offset;
unsigned long zt_offset;
+ unsigned long fpmr_offset;
+ unsigned long poe_offset;
unsigned long extra_offset;
unsigned long end_offset;
};
-#define BASE_SIGFRAME_SIZE round_up(sizeof(struct rt_sigframe), 16)
+/*
+ * Holds any EL0-controlled state that influences unprivileged memory accesses.
+ * This includes both accesses done in userspace and uaccess done in the kernel.
+ *
+ * This state needs to be carefully managed to ensure that it doesn't cause
+ * uaccess to fail when setting up the signal frame, and the signal handler
+ * itself also expects a well-defined state when entered.
+ */
+struct user_access_state {
+ u64 por_el0;
+};
+
#define TERMINATOR_SIZE round_up(sizeof(struct _aarch64_ctx), 16)
#define EXTRA_CONTEXT_SIZE round_up(sizeof(struct extra_context), 16)
+/*
+ * Save the user access state into ua_state and reset it to disable any
+ * restrictions.
+ */
+static void save_reset_user_access_state(struct user_access_state *ua_state)
+{
+ if (system_supports_poe()) {
+ u64 por_enable_all = 0;
+
+ for (int pkey = 0; pkey < arch_max_pkey(); pkey++)
+ por_enable_all |= POE_RXW << (pkey * POR_BITS_PER_PKEY);
+
+ ua_state->por_el0 = read_sysreg_s(SYS_POR_EL0);
+ write_sysreg_s(por_enable_all, SYS_POR_EL0);
+ /* Ensure that any subsequent uaccess observes the updated value */
+ isb();
+ }
+}
+
+/*
+ * Set the user access state for invoking the signal handler.
+ *
+ * No uaccess should be done after that function is called.
+ */
+static void set_handler_user_access_state(void)
+{
+ if (system_supports_poe())
+ write_sysreg_s(POR_EL0_INIT, SYS_POR_EL0);
+}
+
+/*
+ * Restore the user access state to the values saved in ua_state.
+ *
+ * No uaccess should be done after that function is called.
+ */
+static void restore_user_access_state(const struct user_access_state *ua_state)
+{
+ if (system_supports_poe())
+ write_sysreg_s(ua_state->por_el0, SYS_POR_EL0);
+}
+
static void init_user_layout(struct rt_sigframe_user_layout *user)
{
const size_t reserved_size =
@@ -182,6 +236,12 @@ struct user_ctxs {
u32 za_size;
struct zt_context __user *zt;
u32 zt_size;
+ struct fpmr_context __user *fpmr;
+ u32 fpmr_size;
+ struct poe_context __user *poe;
+ u32 poe_size;
+ struct gcs_context __user *gcs;
+ u32 gcs_size;
};
static int preserve_fpsimd_context(struct fpsimd_context __user *ctx)
@@ -227,6 +287,61 @@ static int restore_fpsimd_context(struct user_ctxs *user)
return err ? -EFAULT : 0;
}
+static int preserve_fpmr_context(struct fpmr_context __user *ctx)
+{
+ int err = 0;
+
+ current->thread.uw.fpmr = read_sysreg_s(SYS_FPMR);
+
+ __put_user_error(FPMR_MAGIC, &ctx->head.magic, err);
+ __put_user_error(sizeof(*ctx), &ctx->head.size, err);
+ __put_user_error(current->thread.uw.fpmr, &ctx->fpmr, err);
+
+ return err;
+}
+
+static int restore_fpmr_context(struct user_ctxs *user)
+{
+ u64 fpmr;
+ int err = 0;
+
+ if (user->fpmr_size != sizeof(*user->fpmr))
+ return -EINVAL;
+
+ __get_user_error(fpmr, &user->fpmr->fpmr, err);
+ if (!err)
+ write_sysreg_s(fpmr, SYS_FPMR);
+
+ return err;
+}
+
+static int preserve_poe_context(struct poe_context __user *ctx,
+ const struct user_access_state *ua_state)
+{
+ int err = 0;
+
+ __put_user_error(POE_MAGIC, &ctx->head.magic, err);
+ __put_user_error(sizeof(*ctx), &ctx->head.size, err);
+ __put_user_error(ua_state->por_el0, &ctx->por_el0, err);
+
+ return err;
+}
+
+static int restore_poe_context(struct user_ctxs *user,
+ struct user_access_state *ua_state)
+{
+ u64 por_el0;
+ int err = 0;
+
+ if (user->poe_size != sizeof(*user->poe))
+ return -EINVAL;
+
+ __get_user_error(por_el0, &(user->poe->por_el0), err);
+ if (!err)
+ ua_state->por_el0 = por_el0;
+
+ return err;
+}
#ifdef CONFIG_ARM64_SVE
@@ -574,6 +689,82 @@ extern int restore_zt_context(struct user_ctxs *user);
#endif /* ! CONFIG_ARM64_SME */
+#ifdef CONFIG_ARM64_GCS
+
+static int preserve_gcs_context(struct gcs_context __user *ctx)
+{
+ int err = 0;
+ u64 gcspr = read_sysreg_s(SYS_GCSPR_EL0);
+
+ /*
+ * If GCS is enabled we will add a cap token to the frame,
+ * include it in the GCSPR_EL0 we report to support stack
+ * switching via sigreturn if GCS is enabled. We do not allow
+ * enabling via sigreturn so the token is only relevant for
+ * threads with GCS enabled.
+ */
+ if (task_gcs_el0_enabled(current))
+ gcspr -= 8;
+
+ __put_user_error(GCS_MAGIC, &ctx->head.magic, err);
+ __put_user_error(sizeof(*ctx), &ctx->head.size, err);
+ __put_user_error(gcspr, &ctx->gcspr, err);
+ __put_user_error(0, &ctx->reserved, err);
+ __put_user_error(current->thread.gcs_el0_mode,
+ &ctx->features_enabled, err);
+
+ return err;
+}
+
+static int restore_gcs_context(struct user_ctxs *user)
+{
+ u64 gcspr, enabled;
+ int err = 0;
+
+ if (user->gcs_size != sizeof(*user->gcs))
+ return -EINVAL;
+
+ __get_user_error(gcspr, &user->gcs->gcspr, err);
+ __get_user_error(enabled, &user->gcs->features_enabled, err);
+ if (err)
+ return err;
+
+ /* Don't allow unknown modes */
+ if (enabled & ~PR_SHADOW_STACK_SUPPORTED_STATUS_MASK)
+ return -EINVAL;
+
+ err = gcs_check_locked(current, enabled);
+ if (err != 0)
+ return err;
+
+ /* Don't allow enabling */
+ if (!task_gcs_el0_enabled(current) &&
+ (enabled & PR_SHADOW_STACK_ENABLE))
+ return -EINVAL;
+
+ /* If we are disabling disable everything */
+ if (!(enabled & PR_SHADOW_STACK_ENABLE))
+ enabled = 0;
+
+ current->thread.gcs_el0_mode = enabled;
+
+ /*
+ * We let userspace set GCSPR_EL0 to anything here, we will
+ * validate later in gcs_restore_signal().
+ */
+ write_sysreg_s(gcspr, SYS_GCSPR_EL0);
+
+ return 0;
+}
+
+#else /* ! CONFIG_ARM64_GCS */
+
+/* Turn any non-optimised out attempts to use these into a link error: */
+extern int preserve_gcs_context(void __user *ctx);
+extern int restore_gcs_context(struct user_ctxs *user);
+
+#endif /* ! CONFIG_ARM64_GCS */
+
static int parse_user_sigframe(struct user_ctxs *user,
struct rt_sigframe __user *sf)
{
@@ -590,6 +781,9 @@ static int parse_user_sigframe(struct user_ctxs *user,
user->tpidr2 = NULL;
user->za = NULL;
user->zt = NULL;
+ user->fpmr = NULL;
+ user->poe = NULL;
+ user->gcs = NULL;
if (!IS_ALIGNED((unsigned long)base, 16))
goto invalid;
@@ -640,6 +834,17 @@ static int parse_user_sigframe(struct user_ctxs *user,
/* ignore */
break;
+ case POE_MAGIC:
+ if (!system_supports_poe())
+ goto invalid;
+
+ if (user->poe)
+ goto invalid;
+
+ user->poe = (struct poe_context __user *)head;
+ user->poe_size = size;
+ break;
+
case SVE_MAGIC:
if (!system_supports_sve() && !system_supports_sme())
goto invalid;
@@ -684,6 +889,28 @@ static int parse_user_sigframe(struct user_ctxs *user,
user->zt_size = size;
break;
+ case FPMR_MAGIC:
+ if (!system_supports_fpmr())
+ goto invalid;
+
+ if (user->fpmr)
+ goto invalid;
+
+ user->fpmr = (struct fpmr_context __user *)head;
+ user->fpmr_size = size;
+ break;
+
+ case GCS_MAGIC:
+ if (!system_supports_gcs())
+ goto invalid;
+
+ if (user->gcs)
+ goto invalid;
+
+ user->gcs = (struct gcs_context __user *)head;
+ user->gcs_size = size;
+ break;
+
case EXTRA_MAGIC:
if (have_extra_context)
goto invalid;
@@ -767,7 +994,8 @@ invalid:
}
static int restore_sigframe(struct pt_regs *regs,
- struct rt_sigframe __user *sf)
+ struct rt_sigframe __user *sf,
+ struct user_access_state *ua_state)
{
sigset_t set;
int i, err;
@@ -803,22 +1031,84 @@ static int restore_sigframe(struct pt_regs *regs,
err = restore_fpsimd_context(&user);
}
+ if (err == 0 && system_supports_gcs() && user.gcs)
+ err = restore_gcs_context(&user);
+
if (err == 0 && system_supports_tpidr2() && user.tpidr2)
err = restore_tpidr2_context(&user);
+ if (err == 0 && system_supports_fpmr() && user.fpmr)
+ err = restore_fpmr_context(&user);
+
if (err == 0 && system_supports_sme() && user.za)
err = restore_za_context(&user);
if (err == 0 && system_supports_sme2() && user.zt)
err = restore_zt_context(&user);
+ if (err == 0 && system_supports_poe() && user.poe)
+ err = restore_poe_context(&user, ua_state);
+
return err;
}
+#ifdef CONFIG_ARM64_GCS
+static int gcs_restore_signal(void)
+{
+ u64 gcspr_el0, cap;
+ int ret;
+
+ if (!system_supports_gcs())
+ return 0;
+
+ if (!(current->thread.gcs_el0_mode & PR_SHADOW_STACK_ENABLE))
+ return 0;
+
+ gcspr_el0 = read_sysreg_s(SYS_GCSPR_EL0);
+
+ /*
+ * Ensure that any changes to the GCS done via GCS operations
+ * are visible to the normal reads we do to validate the
+ * token.
+ */
+ gcsb_dsync();
+
+ /*
+ * GCSPR_EL0 should be pointing at a capped GCS, read the cap.
+ * We don't enforce that this is in a GCS page, if it is not
+ * then faults will be generated on GCS operations - the main
+ * concern is to protect GCS pages.
+ */
+ ret = copy_from_user(&cap, (unsigned long __user *)gcspr_el0,
+ sizeof(cap));
+ if (ret)
+ return -EFAULT;
+
+ /*
+ * Check that the cap is the actual GCS before replacing it.
+ */
+ if (cap != GCS_SIGNAL_CAP(gcspr_el0))
+ return -EINVAL;
+
+ /* Invalidate the token to prevent reuse */
+ put_user_gcs(0, (unsigned long __user *)gcspr_el0, &ret);
+ if (ret != 0)
+ return -EFAULT;
+
+ write_sysreg_s(gcspr_el0 + 8, SYS_GCSPR_EL0);
+
+ return 0;
+}
+
+#else
+static int gcs_restore_signal(void) { return 0; }
+#endif
+
SYSCALL_DEFINE0(rt_sigreturn)
{
struct pt_regs *regs = current_pt_regs();
struct rt_sigframe __user *frame;
+ struct user_access_state ua_state;
/* Always make any pending restarted system calls return -EINTR */
current->restart_block.fn = do_no_restart_syscall;
@@ -835,12 +1125,17 @@ SYSCALL_DEFINE0(rt_sigreturn)
if (!access_ok(frame, sizeof (*frame)))
goto badframe;
- if (restore_sigframe(regs, frame))
+ if (restore_sigframe(regs, frame, &ua_state))
+ goto badframe;
+
+ if (gcs_restore_signal())
goto badframe;
if (restore_altstack(&frame->uc.uc_stack))
goto badframe;
+ restore_user_access_state(&ua_state);
+
return regs->regs[0];
badframe:
@@ -875,6 +1170,15 @@ static int setup_sigframe_layout(struct rt_sigframe_user_layout *user,
return err;
}
+#ifdef CONFIG_ARM64_GCS
+ if (system_supports_gcs() && (add_all || current->thread.gcspr_el0)) {
+ err = sigframe_alloc(user, &user->gcs_offset,
+ sizeof(struct gcs_context));
+ if (err)
+ return err;
+ }
+#endif
+
if (system_supports_sve() || system_supports_sme()) {
unsigned int vq = 0;
@@ -928,11 +1232,26 @@ static int setup_sigframe_layout(struct rt_sigframe_user_layout *user,
}
}
+ if (system_supports_fpmr()) {
+ err = sigframe_alloc(user, &user->fpmr_offset,
+ sizeof(struct fpmr_context));
+ if (err)
+ return err;
+ }
+
+ if (system_supports_poe()) {
+ err = sigframe_alloc(user, &user->poe_offset,
+ sizeof(struct poe_context));
+ if (err)
+ return err;
+ }
+
return sigframe_alloc_end(user);
}
static int setup_sigframe(struct rt_sigframe_user_layout *user,
- struct pt_regs *regs, sigset_t *set)
+ struct pt_regs *regs, sigset_t *set,
+ const struct user_access_state *ua_state)
{
int i, err = 0;
struct rt_sigframe __user *sf = user->sigframe;
@@ -968,6 +1287,12 @@ static int setup_sigframe(struct rt_sigframe_user_layout *user,
__put_user_error(current->thread.fault_code, &esr_ctx->esr, err);
}
+ if (system_supports_gcs() && err == 0 && user->gcs_offset) {
+ struct gcs_context __user *gcs_ctx =
+ apply_user_offset(user, user->gcs_offset);
+ err |= preserve_gcs_context(gcs_ctx);
+ }
+
/* Scalable Vector Extension state (including streaming), if present */
if ((system_supports_sve() || system_supports_sme()) &&
err == 0 && user->sve_offset) {
@@ -983,6 +1308,20 @@ static int setup_sigframe(struct rt_sigframe_user_layout *user,
err |= preserve_tpidr2_context(tpidr2_ctx);
}
+ /* FPMR if supported */
+ if (system_supports_fpmr() && err == 0) {
+ struct fpmr_context __user *fpmr_ctx =
+ apply_user_offset(user, user->fpmr_offset);
+ err |= preserve_fpmr_context(fpmr_ctx);
+ }
+
+ if (system_supports_poe() && err == 0) {
+ struct poe_context __user *poe_ctx =
+ apply_user_offset(user, user->poe_offset);
+
+ err |= preserve_poe_context(poe_ctx, ua_state);
+ }
+
/* ZA state if present */
if (system_supports_sme() && err == 0 && user->za_offset) {
struct za_context __user *za_ctx =
@@ -1071,15 +1410,81 @@ static int get_sigframe(struct rt_sigframe_user_layout *user,
return 0;
}
-static void setup_return(struct pt_regs *regs, struct k_sigaction *ka,
+#ifdef CONFIG_ARM64_GCS
+
+static int gcs_signal_entry(__sigrestore_t sigtramp, struct ksignal *ksig)
+{
+ u64 gcspr_el0;
+ int ret = 0;
+
+ if (!system_supports_gcs())
+ return 0;
+
+ if (!task_gcs_el0_enabled(current))
+ return 0;
+
+ /*
+ * We are entering a signal handler, current register state is
+ * active.
+ */
+ gcspr_el0 = read_sysreg_s(SYS_GCSPR_EL0);
+
+ /*
+ * Push a cap and the GCS entry for the trampoline onto the GCS.
+ */
+ put_user_gcs((unsigned long)sigtramp,
+ (unsigned long __user *)(gcspr_el0 - 16), &ret);
+ put_user_gcs(GCS_SIGNAL_CAP(gcspr_el0 - 8),
+ (unsigned long __user *)(gcspr_el0 - 8), &ret);
+ if (ret != 0)
+ return ret;
+
+ gcspr_el0 -= 16;
+ write_sysreg_s(gcspr_el0, SYS_GCSPR_EL0);
+
+ return 0;
+}
+#else
+
+static int gcs_signal_entry(__sigrestore_t sigtramp, struct ksignal *ksig)
+{
+ return 0;
+}
+
+#endif
+
+static int setup_return(struct pt_regs *regs, struct ksignal *ksig,
struct rt_sigframe_user_layout *user, int usig)
{
__sigrestore_t sigtramp;
+ int err;
+
+ if (ksig->ka.sa.sa_flags & SA_RESTORER)
+ sigtramp = ksig->ka.sa.sa_restorer;
+ else
+ sigtramp = VDSO_SYMBOL(current->mm->context.vdso, sigtramp);
+
+ err = gcs_signal_entry(sigtramp, ksig);
+ if (err)
+ return err;
+
+ /*
+ * We must not fail from this point onwards. We are going to update
+ * registers, including SP, in order to invoke the signal handler. If
+ * we failed and attempted to deliver a nested SIGSEGV to a handler
+ * after that point, the subsequent sigreturn would end up restoring
+ * the (partial) state for the original signal handler.
+ */
regs->regs[0] = usig;
+ if (ksig->ka.sa.sa_flags & SA_SIGINFO) {
+ regs->regs[1] = (unsigned long)&user->sigframe->info;
+ regs->regs[2] = (unsigned long)&user->sigframe->uc;
+ }
regs->sp = (unsigned long)user->sigframe;
regs->regs[29] = (unsigned long)&user->next_frame->fp;
- regs->pc = (unsigned long)ka->sa.sa_handler;
+ regs->regs[30] = (unsigned long)sigtramp;
+ regs->pc = (unsigned long)ksig->ka.sa.sa_handler;
/*
* Signal delivery is a (wacky) indirect function call in
@@ -1119,12 +1524,7 @@ static void setup_return(struct pt_regs *regs, struct k_sigaction *ka,
sme_smstop();
}
- if (ka->sa.sa_flags & SA_RESTORER)
- sigtramp = ka->sa.sa_restorer;
- else
- sigtramp = VDSO_SYMBOL(current->mm->context.vdso, sigtramp);
-
- regs->regs[30] = (unsigned long)sigtramp;
+ return 0;
}
static int setup_rt_frame(int usig, struct ksignal *ksig, sigset_t *set,
@@ -1132,6 +1532,7 @@ static int setup_rt_frame(int usig, struct ksignal *ksig, sigset_t *set,
{
struct rt_sigframe_user_layout user;
struct rt_sigframe __user *frame;
+ struct user_access_state ua_state;
int err = 0;
fpsimd_signal_preserve_current_state();
@@ -1139,21 +1540,29 @@ static int setup_rt_frame(int usig, struct ksignal *ksig, sigset_t *set,
if (get_sigframe(&user, ksig, regs))
return 1;
+ save_reset_user_access_state(&ua_state);
frame = user.sigframe;
__put_user_error(0, &frame->uc.uc_flags, err);
__put_user_error(NULL, &frame->uc.uc_link, err);
err |= __save_altstack(&frame->uc.uc_stack, regs->sp);
- err |= setup_sigframe(&user, regs, set);
- if (err == 0) {
- setup_return(regs, &ksig->ka, &user, usig);
- if (ksig->ka.sa.sa_flags & SA_SIGINFO) {
- err |= copy_siginfo_to_user(&frame->info, &ksig->info);
- regs->regs[1] = (unsigned long)&frame->info;
- regs->regs[2] = (unsigned long)&frame->uc;
- }
- }
+ err |= setup_sigframe(&user, regs, set, &ua_state);
+ if (ksig->ka.sa.sa_flags & SA_SIGINFO)
+ err |= copy_siginfo_to_user(&frame->info, &ksig->info);
+
+ if (err == 0)
+ err = setup_return(regs, ksig, &user, usig);
+
+ /*
+ * We must not fail if setup_return() succeeded - see comment at the
+ * beginning of setup_return().
+ */
+
+ if (err == 0)
+ set_handler_user_access_state();
+ else
+ restore_user_access_state(&ua_state);
return err;
}
@@ -1207,7 +1616,7 @@ static void handle_signal(struct ksignal *ksig, struct pt_regs *regs)
* the kernel can handle, and then we build all the user-level signal handling
* stack-frames in one go after that.
*/
-static void do_signal(struct pt_regs *regs)
+void do_signal(struct pt_regs *regs)
{
unsigned long continue_addr = 0, restart_addr = 0;
int retval = 0;
@@ -1278,41 +1687,6 @@ static void do_signal(struct pt_regs *regs)
restore_saved_sigmask();
}
-void do_notify_resume(struct pt_regs *regs, unsigned long thread_flags)
-{
- do {
- if (thread_flags & _TIF_NEED_RESCHED) {
- /* Unmask Debug and SError for the next task */
- local_daif_restore(DAIF_PROCCTX_NOIRQ);
-
- schedule();
- } else {
- local_daif_restore(DAIF_PROCCTX);
-
- if (thread_flags & _TIF_UPROBE)
- uprobe_notify_resume(regs);
-
- if (thread_flags & _TIF_MTE_ASYNC_FAULT) {
- clear_thread_flag(TIF_MTE_ASYNC_FAULT);
- send_sig_fault(SIGSEGV, SEGV_MTEAERR,
- (void __user *)NULL, current);
- }
-
- if (thread_flags & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL))
- do_signal(regs);
-
- if (thread_flags & _TIF_NOTIFY_RESUME)
- resume_user_mode_work(regs);
-
- if (thread_flags & _TIF_FOREIGN_FPSTATE)
- fpsimd_restore_current_state();
- }
-
- local_daif_mask();
- thread_flags = read_thread_flags();
- } while (thread_flags & _TIF_WORK_MASK);
-}
-
unsigned long __ro_after_init signal_minsigstksz;
/*
diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c
index bbd542704730..81e798b6dada 100644
--- a/arch/arm64/kernel/signal32.c
+++ b/arch/arm64/kernel/signal32.c
@@ -17,7 +17,7 @@
#include <asm/signal32.h>
#include <asm/traps.h>
#include <linux/uaccess.h>
-#include <asm/unistd.h>
+#include <asm/unistd_compat_32.h>
#include <asm/vdso.h>
struct compat_vfp_sigframe {
@@ -451,7 +451,7 @@ int compat_setup_frame(int usig, struct ksignal *ksig, sigset_t *set,
void compat_setup_restart_syscall(struct pt_regs *regs)
{
- regs->regs[7] = __NR_compat_restart_syscall;
+ regs->regs[7] = __NR_compat32_restart_syscall;
}
/*
diff --git a/arch/arm64/kernel/sigreturn32.S b/arch/arm64/kernel/sigreturn32.S
index ccbd4aab4ba4..6f486b95b413 100644
--- a/arch/arm64/kernel/sigreturn32.S
+++ b/arch/arm64/kernel/sigreturn32.S
@@ -13,7 +13,7 @@
* need two 16-bit instructions.
*/
-#include <asm/unistd.h>
+#include <asm/unistd_compat_32.h>
.section .rodata
.globl __aarch32_sigret_code_start
@@ -22,26 +22,26 @@ __aarch32_sigret_code_start:
/*
* ARM Code
*/
- .byte __NR_compat_sigreturn, 0x70, 0xa0, 0xe3 // mov r7, #__NR_compat_sigreturn
- .byte __NR_compat_sigreturn, 0x00, 0x00, 0xef // svc #__NR_compat_sigreturn
+ .byte __NR_compat32_sigreturn, 0x70, 0xa0, 0xe3 // mov r7, #__NR_compat32_sigreturn
+ .byte __NR_compat32_sigreturn, 0x00, 0x00, 0xef // svc #__NR_compat32_sigreturn
/*
* Thumb code
*/
- .byte __NR_compat_sigreturn, 0x27 // svc #__NR_compat_sigreturn
- .byte __NR_compat_sigreturn, 0xdf // mov r7, #__NR_compat_sigreturn
+ .byte __NR_compat32_sigreturn, 0x27 // svc #__NR_compat32_sigreturn
+ .byte __NR_compat32_sigreturn, 0xdf // mov r7, #__NR_compat32_sigreturn
/*
* ARM code
*/
- .byte __NR_compat_rt_sigreturn, 0x70, 0xa0, 0xe3 // mov r7, #__NR_compat_rt_sigreturn
- .byte __NR_compat_rt_sigreturn, 0x00, 0x00, 0xef // svc #__NR_compat_rt_sigreturn
+ .byte __NR_compat32_rt_sigreturn, 0x70, 0xa0, 0xe3 // mov r7, #__NR_compat32_rt_sigreturn
+ .byte __NR_compat32_rt_sigreturn, 0x00, 0x00, 0xef // svc #__NR_compat32_rt_sigreturn
/*
* Thumb code
*/
- .byte __NR_compat_rt_sigreturn, 0x27 // svc #__NR_compat_rt_sigreturn
- .byte __NR_compat_rt_sigreturn, 0xdf // mov r7, #__NR_compat_rt_sigreturn
+ .byte __NR_compat32_rt_sigreturn, 0x27 // svc #__NR_compat32_rt_sigreturn
+ .byte __NR_compat32_rt_sigreturn, 0xdf // mov r7, #__NR_compat32_rt_sigreturn
.globl __aarch32_sigret_code_end
__aarch32_sigret_code_end:
diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S
index 2aa5129d8253..f093cdf71be1 100644
--- a/arch/arm64/kernel/sleep.S
+++ b/arch/arm64/kernel/sleep.S
@@ -102,9 +102,6 @@ SYM_CODE_START(cpu_resume)
mov x0, xzr
bl init_kernel_el
mov x19, x0 // preserve boot mode
-#if VA_BITS > 48
- ldr_l x0, vabits_actual
-#endif
bl __cpu_setup
/* enable the MMU early - so we can access sleep_save_stash by va */
adrp x1, swapper_pg_dir
diff --git a/arch/arm64/kernel/smccc-call.S b/arch/arm64/kernel/smccc-call.S
index 487381164ff6..2def9d0dd3dd 100644
--- a/arch/arm64/kernel/smccc-call.S
+++ b/arch/arm64/kernel/smccc-call.S
@@ -7,48 +7,19 @@
#include <asm/asm-offsets.h>
#include <asm/assembler.h>
-#include <asm/thread_info.h>
-
-/*
- * If we have SMCCC v1.3 and (as is likely) no SVE state in
- * the registers then set the SMCCC hint bit to say there's no
- * need to preserve it. Do this by directly adjusting the SMCCC
- * function value which is already stored in x0 ready to be called.
- */
-SYM_FUNC_START(__arm_smccc_sve_check)
-
- ldr_l x16, smccc_has_sve_hint
- cbz x16, 2f
-
- get_current_task x16
- ldr x16, [x16, #TSK_TI_FLAGS]
- tbnz x16, #TIF_FOREIGN_FPSTATE, 1f // Any live FP state?
- tbnz x16, #TIF_SVE, 2f // Does that state include SVE?
-
-1: orr x0, x0, ARM_SMCCC_1_3_SVE_HINT
-
-2: ret
-SYM_FUNC_END(__arm_smccc_sve_check)
-EXPORT_SYMBOL(__arm_smccc_sve_check)
.macro SMCCC instr
- stp x29, x30, [sp, #-16]!
- mov x29, sp
-alternative_if ARM64_SVE
- bl __arm_smccc_sve_check
-alternative_else_nop_endif
\instr #0
- ldr x4, [sp, #16]
+ ldr x4, [sp]
stp x0, x1, [x4, #ARM_SMCCC_RES_X0_OFFS]
stp x2, x3, [x4, #ARM_SMCCC_RES_X2_OFFS]
- ldr x4, [sp, #24]
+ ldr x4, [sp, #8]
cbz x4, 1f /* no quirk structure */
ldr x9, [x4, #ARM_SMCCC_QUIRK_ID_OFFS]
cmp x9, #ARM_SMCCC_QUIRK_QCOM_A6
b.ne 1f
str x6, [x4, ARM_SMCCC_QUIRK_STATE_OFFS]
-1: ldp x29, x30, [sp], #16
- ret
+1: ret
.endm
/*
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 4ced34f62dab..3b3f6b56e733 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -55,9 +55,6 @@
#include <trace/events/ipi.h>
-DEFINE_PER_CPU_READ_MOSTLY(int, cpu_number);
-EXPORT_PER_CPU_SYMBOL(cpu_number);
-
/*
* as from 2.5, kernels no longer have an init_tasks structure
* so we need some other way of telling a new secondary core
@@ -71,7 +68,7 @@ enum ipi_msg_type {
IPI_RESCHEDULE,
IPI_CALL_FUNC,
IPI_CPU_STOP,
- IPI_CPU_CRASH_STOP,
+ IPI_CPU_STOP_NMI,
IPI_TIMER,
IPI_IRQ_WORK,
NR_IPI,
@@ -88,6 +85,8 @@ static int ipi_irq_base __ro_after_init;
static int nr_ipi __ro_after_init = NR_IPI;
static struct irq_desc *ipi_desc[MAX_IPI] __ro_after_init;
+static bool crash_stop;
+
static void ipi_setup(int cpu);
#ifdef CONFIG_HOTPLUG_CPU
@@ -132,7 +131,8 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle)
/* Now bring the CPU into our world */
ret = boot_secondary(cpu, idle);
if (ret) {
- pr_err("CPU%u: failed to boot: %d\n", cpu, ret);
+ if (ret != -EPERM)
+ pr_err("CPU%u: failed to boot: %d\n", cpu, ret);
return ret;
}
@@ -264,6 +264,13 @@ asmlinkage notrace void secondary_start_kernel(void)
set_cpu_online(cpu, true);
complete(&cpu_running);
+ /*
+ * Secondary CPUs enter the kernel with all DAIF exceptions masked.
+ *
+ * As with setup_arch() we must unmask Debug and SError exceptions, and
+ * as the root irqchip has already been detected and initialized we can
+ * unmask IRQ and FIQ at the same time.
+ */
local_daif_restore(DAIF_PROCCTX);
/*
@@ -462,6 +469,8 @@ void __init smp_prepare_boot_cpu(void)
init_gic_priority_masking();
kasan_init_hw_tags();
+ /* Init percpu seeds for random tags after cpus are set up. */
+ kasan_init_sw_tags();
}
/*
@@ -503,6 +512,59 @@ static int __init smp_cpu_setup(int cpu)
static bool bootcpu_valid __initdata;
static unsigned int cpu_count = 1;
+int arch_register_cpu(int cpu)
+{
+ acpi_handle acpi_handle = acpi_get_processor_handle(cpu);
+ struct cpu *c = &per_cpu(cpu_devices, cpu);
+
+ if (!acpi_disabled && !acpi_handle &&
+ IS_ENABLED(CONFIG_ACPI_HOTPLUG_CPU))
+ return -EPROBE_DEFER;
+
+#ifdef CONFIG_ACPI_HOTPLUG_CPU
+ /* For now block anything that looks like physical CPU Hotplug */
+ if (invalid_logical_cpuid(cpu) || !cpu_present(cpu)) {
+ pr_err_once("Changing CPU present bit is not supported\n");
+ return -ENODEV;
+ }
+#endif
+
+ /*
+ * Availability of the acpi handle is sufficient to establish
+ * that _STA has aleady been checked. No need to recheck here.
+ */
+ c->hotpluggable = arch_cpu_is_hotpluggable(cpu);
+
+ return register_cpu(c, cpu);
+}
+
+#ifdef CONFIG_ACPI_HOTPLUG_CPU
+void arch_unregister_cpu(int cpu)
+{
+ acpi_handle acpi_handle = acpi_get_processor_handle(cpu);
+ struct cpu *c = &per_cpu(cpu_devices, cpu);
+ acpi_status status;
+ unsigned long long sta;
+
+ if (!acpi_handle) {
+ pr_err_once("Removing a CPU without associated ACPI handle\n");
+ return;
+ }
+
+ status = acpi_evaluate_integer(acpi_handle, "_STA", NULL, &sta);
+ if (ACPI_FAILURE(status))
+ return;
+
+ /* For now do not allow anything that looks like physical CPU HP */
+ if (cpu_present(cpu) && !(sta & ACPI_STA_DEVICE_PRESENT)) {
+ pr_err_once("Changing CPU present bit is not supported\n");
+ return;
+ }
+
+ unregister_cpu(c);
+}
+#endif /* CONFIG_ACPI_HOTPLUG_CPU */
+
#ifdef CONFIG_ACPI
static struct acpi_madt_generic_interrupt cpu_madt_gicc[NR_CPUS];
@@ -523,7 +585,8 @@ acpi_map_gic_cpu_interface(struct acpi_madt_generic_interrupt *processor)
{
u64 hwid = processor->arm_mpidr;
- if (!acpi_gicc_is_usable(processor)) {
+ if (!(processor->flags &
+ (ACPI_MADT_ENABLED | ACPI_MADT_GICC_ONLINE_CAPABLE))) {
pr_debug("skipping disabled CPU entry with 0x%llx MPIDR\n", hwid);
return;
}
@@ -742,8 +805,6 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
*/
for_each_possible_cpu(cpu) {
- per_cpu(cpu_number, cpu) = cpu;
-
if (cpu == smp_processor_id())
continue;
@@ -760,13 +821,15 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
}
}
-static const char *ipi_types[NR_IPI] __tracepoint_string = {
+static const char *ipi_types[MAX_IPI] __tracepoint_string = {
[IPI_RESCHEDULE] = "Rescheduling interrupts",
[IPI_CALL_FUNC] = "Function call interrupts",
[IPI_CPU_STOP] = "CPU stop interrupts",
- [IPI_CPU_CRASH_STOP] = "CPU stop (for crash dump) interrupts",
+ [IPI_CPU_STOP_NMI] = "CPU stop NMIs",
[IPI_TIMER] = "Timer broadcast interrupts",
[IPI_IRQ_WORK] = "IRQ work interrupts",
+ [IPI_CPU_BACKTRACE] = "CPU backtrace interrupts",
+ [IPI_KGDB_ROUNDUP] = "KGDB roundup interrupts",
};
static void smp_cross_call(const struct cpumask *target, unsigned int ipinr);
@@ -777,7 +840,7 @@ int arch_show_interrupts(struct seq_file *p, int prec)
{
unsigned int cpu, i;
- for (i = 0; i < NR_IPI; i++) {
+ for (i = 0; i < MAX_IPI; i++) {
seq_printf(p, "%*s%u:%s", prec - 1, "IPI", i,
prec >= 4 ? " " : "");
for_each_online_cpu(cpu)
@@ -806,9 +869,9 @@ void arch_irq_work_raise(void)
}
#endif
-static void __noreturn local_cpu_stop(void)
+static void __noreturn local_cpu_stop(unsigned int cpu)
{
- set_cpu_online(smp_processor_id(), false);
+ set_cpu_online(cpu, false);
local_daif_mask();
sdei_mask_local_cpu();
@@ -822,21 +885,26 @@ static void __noreturn local_cpu_stop(void)
*/
void __noreturn panic_smp_self_stop(void)
{
- local_cpu_stop();
+ local_cpu_stop(smp_processor_id());
}
-#ifdef CONFIG_KEXEC_CORE
-static atomic_t waiting_for_crash_ipi = ATOMIC_INIT(0);
-#endif
-
static void __noreturn ipi_cpu_crash_stop(unsigned int cpu, struct pt_regs *regs)
{
#ifdef CONFIG_KEXEC_CORE
+ /*
+ * Use local_daif_mask() instead of local_irq_disable() to make sure
+ * that pseudo-NMIs are disabled. The "crash stop" code starts with
+ * an IRQ and falls back to NMI (which might be pseudo). If the IRQ
+ * finally goes through right as we're timing out then the NMI could
+ * interrupt us. It's better to prevent the NMI and let the IRQ
+ * finish since the pt_regs will be better.
+ */
+ local_daif_mask();
+
crash_save_cpu(regs, cpu);
- atomic_dec(&waiting_for_crash_ipi);
+ set_cpu_online(cpu, false);
- local_irq_disable();
sdei_mask_local_cpu();
if (IS_ENABLED(CONFIG_HOTPLUG_CPU))
@@ -901,14 +969,12 @@ static void do_handle_IPI(int ipinr)
break;
case IPI_CPU_STOP:
- local_cpu_stop();
- break;
-
- case IPI_CPU_CRASH_STOP:
- if (IS_ENABLED(CONFIG_KEXEC_CORE)) {
+ case IPI_CPU_STOP_NMI:
+ if (IS_ENABLED(CONFIG_KEXEC_CORE) && crash_stop) {
ipi_cpu_crash_stop(cpu, get_irq_regs());
-
unreachable();
+ } else {
+ local_cpu_stop(cpu);
}
break;
@@ -963,8 +1029,7 @@ static bool ipi_should_be_nmi(enum ipi_msg_type ipi)
return false;
switch (ipi) {
- case IPI_CPU_STOP:
- case IPI_CPU_CRASH_STOP:
+ case IPI_CPU_STOP_NMI:
case IPI_CPU_BACKTRACE:
case IPI_KGDB_ROUNDUP:
return true;
@@ -1021,12 +1086,12 @@ void __init set_smp_ipi_range(int ipi_base, int n)
if (ipi_should_be_nmi(i)) {
err = request_percpu_nmi(ipi_base + i, ipi_handler,
- "IPI", &cpu_number);
+ "IPI", &irq_stat);
WARN(err, "Could not request IPI %d as NMI, err=%d\n",
i, err);
} else {
err = request_percpu_irq(ipi_base + i, ipi_handler,
- "IPI", &cpu_number);
+ "IPI", &irq_stat);
WARN(err, "Could not request IPI %d as IRQ, err=%d\n",
i, err);
}
@@ -1077,79 +1142,109 @@ static inline unsigned int num_other_online_cpus(void)
void smp_send_stop(void)
{
+ static unsigned long stop_in_progress;
+ cpumask_t mask;
unsigned long timeout;
- if (num_other_online_cpus()) {
- cpumask_t mask;
+ /*
+ * If this cpu is the only one alive at this point in time, online or
+ * not, there are no stop messages to be sent around, so just back out.
+ */
+ if (num_other_online_cpus() == 0)
+ goto skip_ipi;
- cpumask_copy(&mask, cpu_online_mask);
- cpumask_clear_cpu(smp_processor_id(), &mask);
+ /* Only proceed if this is the first CPU to reach this code */
+ if (test_and_set_bit(0, &stop_in_progress))
+ return;
- if (system_state <= SYSTEM_RUNNING)
- pr_crit("SMP: stopping secondary CPUs\n");
- smp_cross_call(&mask, IPI_CPU_STOP);
- }
+ /*
+ * Send an IPI to all currently online CPUs except the CPU running
+ * this code.
+ *
+ * NOTE: we don't do anything here to prevent other CPUs from coming
+ * online after we snapshot `cpu_online_mask`. Ideally, the calling code
+ * should do something to prevent other CPUs from coming up. This code
+ * can be called in the panic path and thus it doesn't seem wise to
+ * grab the CPU hotplug mutex ourselves. Worst case:
+ * - If a CPU comes online as we're running, we'll likely notice it
+ * during the 1 second wait below and then we'll catch it when we try
+ * with an NMI (assuming NMIs are enabled) since we re-snapshot the
+ * mask before sending an NMI.
+ * - If we leave the function and see that CPUs are still online we'll
+ * at least print a warning. Especially without NMIs this function
+ * isn't foolproof anyway so calling code will just have to accept
+ * the fact that there could be cases where a CPU can't be stopped.
+ */
+ cpumask_copy(&mask, cpu_online_mask);
+ cpumask_clear_cpu(smp_processor_id(), &mask);
+
+ if (system_state <= SYSTEM_RUNNING)
+ pr_crit("SMP: stopping secondary CPUs\n");
- /* Wait up to one second for other CPUs to stop */
+ /*
+ * Start with a normal IPI and wait up to one second for other CPUs to
+ * stop. We do this first because it gives other processors a chance
+ * to exit critical sections / drop locks and makes the rest of the
+ * stop process (especially console flush) more robust.
+ */
+ smp_cross_call(&mask, IPI_CPU_STOP);
timeout = USEC_PER_SEC;
while (num_other_online_cpus() && timeout--)
udelay(1);
- if (num_other_online_cpus())
+ /*
+ * If CPUs are still online, try an NMI. There's no excuse for this to
+ * be slow, so we only give them an extra 10 ms to respond.
+ */
+ if (num_other_online_cpus() && ipi_should_be_nmi(IPI_CPU_STOP_NMI)) {
+ smp_rmb();
+ cpumask_copy(&mask, cpu_online_mask);
+ cpumask_clear_cpu(smp_processor_id(), &mask);
+
+ pr_info("SMP: retry stop with NMI for CPUs %*pbl\n",
+ cpumask_pr_args(&mask));
+
+ smp_cross_call(&mask, IPI_CPU_STOP_NMI);
+ timeout = USEC_PER_MSEC * 10;
+ while (num_other_online_cpus() && timeout--)
+ udelay(1);
+ }
+
+ if (num_other_online_cpus()) {
+ smp_rmb();
+ cpumask_copy(&mask, cpu_online_mask);
+ cpumask_clear_cpu(smp_processor_id(), &mask);
+
pr_warn("SMP: failed to stop secondary CPUs %*pbl\n",
- cpumask_pr_args(cpu_online_mask));
+ cpumask_pr_args(&mask));
+ }
+skip_ipi:
sdei_mask_local_cpu();
}
#ifdef CONFIG_KEXEC_CORE
void crash_smp_send_stop(void)
{
- static int cpus_stopped;
- cpumask_t mask;
- unsigned long timeout;
-
/*
* This function can be called twice in panic path, but obviously
* we execute this only once.
+ *
+ * We use this same boolean to tell whether the IPI we send was a
+ * stop or a "crash stop".
*/
- if (cpus_stopped)
+ if (crash_stop)
return;
+ crash_stop = 1;
- cpus_stopped = 1;
-
- /*
- * If this cpu is the only one alive at this point in time, online or
- * not, there are no stop messages to be sent around, so just back out.
- */
- if (num_other_online_cpus() == 0)
- goto skip_ipi;
-
- cpumask_copy(&mask, cpu_online_mask);
- cpumask_clear_cpu(smp_processor_id(), &mask);
-
- atomic_set(&waiting_for_crash_ipi, num_other_online_cpus());
+ smp_send_stop();
- pr_crit("SMP: stopping secondary CPUs\n");
- smp_cross_call(&mask, IPI_CPU_CRASH_STOP);
-
- /* Wait up to one second for other CPUs to stop */
- timeout = USEC_PER_SEC;
- while ((atomic_read(&waiting_for_crash_ipi) > 0) && timeout--)
- udelay(1);
-
- if (atomic_read(&waiting_for_crash_ipi) > 0)
- pr_warn("SMP: failed to stop secondary CPUs %*pbl\n",
- cpumask_pr_args(&mask));
-
-skip_ipi:
- sdei_mask_local_cpu();
sdei_handler_abort();
}
bool smp_crash_stop_failed(void)
{
- return (atomic_read(&waiting_for_crash_ipi) > 0);
+ return num_other_online_cpus() != 0;
}
#endif
diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
index b2a60e0bcfd2..1d9d51d7627f 100644
--- a/arch/arm64/kernel/stacktrace.c
+++ b/arch/arm64/kernel/stacktrace.c
@@ -7,6 +7,7 @@
#include <linux/kernel.h>
#include <linux/efi.h>
#include <linux/export.h>
+#include <linux/filter.h>
#include <linux/ftrace.h>
#include <linux/kprobes.h>
#include <linux/sched.h>
@@ -19,11 +20,28 @@
#include <asm/stack_pointer.h>
#include <asm/stacktrace.h>
+enum kunwind_source {
+ KUNWIND_SOURCE_UNKNOWN,
+ KUNWIND_SOURCE_FRAME,
+ KUNWIND_SOURCE_CALLER,
+ KUNWIND_SOURCE_TASK,
+ KUNWIND_SOURCE_REGS_PC,
+};
+
+union unwind_flags {
+ unsigned long all;
+ struct {
+ unsigned long fgraph : 1,
+ kretprobe : 1;
+ };
+};
+
/*
* Kernel unwind state
*
* @common: Common unwind state.
* @task: The task being unwound.
+ * @graph_idx: Used by ftrace_graph_ret_addr() for optimized stack unwinding.
* @kr_cur: When KRETPROBES is selected, holds the kretprobe instance
* associated with the most recently encountered replacement lr
* value.
@@ -31,9 +49,13 @@
struct kunwind_state {
struct unwind_state common;
struct task_struct *task;
+ int graph_idx;
#ifdef CONFIG_KRETPROBES
struct llist_node *kr_cur;
#endif
+ enum kunwind_source source;
+ union unwind_flags flags;
+ struct pt_regs *regs;
};
static __always_inline void
@@ -42,6 +64,9 @@ kunwind_init(struct kunwind_state *state,
{
unwind_init_common(&state->common);
state->task = task;
+ state->source = KUNWIND_SOURCE_UNKNOWN;
+ state->flags.all = 0;
+ state->regs = NULL;
}
/*
@@ -57,8 +82,10 @@ kunwind_init_from_regs(struct kunwind_state *state,
{
kunwind_init(state, current);
+ state->regs = regs;
state->common.fp = regs->regs[29];
state->common.pc = regs->pc;
+ state->source = KUNWIND_SOURCE_REGS_PC;
}
/*
@@ -76,6 +103,7 @@ kunwind_init_from_caller(struct kunwind_state *state)
state->common.fp = (unsigned long)__builtin_frame_address(1);
state->common.pc = (unsigned long)__builtin_return_address(0);
+ state->source = KUNWIND_SOURCE_CALLER;
}
/*
@@ -96,6 +124,7 @@ kunwind_init_from_task(struct kunwind_state *state,
state->common.fp = thread_saved_fp(task);
state->common.pc = thread_saved_pc(task);
+ state->source = KUNWIND_SOURCE_TASK;
}
static __always_inline int
@@ -105,12 +134,15 @@ kunwind_recover_return_address(struct kunwind_state *state)
if (state->task->ret_stack &&
(state->common.pc == (unsigned long)return_to_handler)) {
unsigned long orig_pc;
- orig_pc = ftrace_graph_ret_addr(state->task, NULL,
+ orig_pc = ftrace_graph_ret_addr(state->task, &state->graph_idx,
state->common.pc,
(void *)state->common.fp);
- if (WARN_ON_ONCE(state->common.pc == orig_pc))
+ if (state->common.pc == orig_pc) {
+ WARN_ON_ONCE(state->task == current);
return -EINVAL;
+ }
state->common.pc = orig_pc;
+ state->flags.fgraph = 1;
}
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
@@ -121,12 +153,95 @@ kunwind_recover_return_address(struct kunwind_state *state)
(void *)state->common.fp,
&state->kr_cur);
state->common.pc = orig_pc;
+ state->flags.kretprobe = 1;
}
#endif /* CONFIG_KRETPROBES */
return 0;
}
+static __always_inline
+int kunwind_next_regs_pc(struct kunwind_state *state)
+{
+ struct stack_info *info;
+ unsigned long fp = state->common.fp;
+ struct pt_regs *regs;
+
+ regs = container_of((u64 *)fp, struct pt_regs, stackframe.record.fp);
+
+ info = unwind_find_stack(&state->common, (unsigned long)regs, sizeof(*regs));
+ if (!info)
+ return -EINVAL;
+
+ unwind_consume_stack(&state->common, info, (unsigned long)regs,
+ sizeof(*regs));
+
+ state->regs = regs;
+ state->common.pc = regs->pc;
+ state->common.fp = regs->regs[29];
+ state->regs = NULL;
+ state->source = KUNWIND_SOURCE_REGS_PC;
+ return 0;
+}
+
+static __always_inline int
+kunwind_next_frame_record_meta(struct kunwind_state *state)
+{
+ struct task_struct *tsk = state->task;
+ unsigned long fp = state->common.fp;
+ struct frame_record_meta *meta;
+ struct stack_info *info;
+
+ info = unwind_find_stack(&state->common, fp, sizeof(*meta));
+ if (!info)
+ return -EINVAL;
+
+ meta = (struct frame_record_meta *)fp;
+ switch (READ_ONCE(meta->type)) {
+ case FRAME_META_TYPE_FINAL:
+ if (meta == &task_pt_regs(tsk)->stackframe)
+ return -ENOENT;
+ WARN_ON_ONCE(tsk == current);
+ return -EINVAL;
+ case FRAME_META_TYPE_PT_REGS:
+ return kunwind_next_regs_pc(state);
+ default:
+ WARN_ON_ONCE(tsk == current);
+ return -EINVAL;
+ }
+}
+
+static __always_inline int
+kunwind_next_frame_record(struct kunwind_state *state)
+{
+ unsigned long fp = state->common.fp;
+ struct frame_record *record;
+ struct stack_info *info;
+ unsigned long new_fp, new_pc;
+
+ if (fp & 0x7)
+ return -EINVAL;
+
+ info = unwind_find_stack(&state->common, fp, sizeof(*record));
+ if (!info)
+ return -EINVAL;
+
+ record = (struct frame_record *)fp;
+ new_fp = READ_ONCE(record->fp);
+ new_pc = READ_ONCE(record->lr);
+
+ if (!new_fp && !new_pc)
+ return kunwind_next_frame_record_meta(state);
+
+ unwind_consume_stack(&state->common, info, fp, sizeof(*record));
+
+ state->common.fp = new_fp;
+ state->common.pc = new_pc;
+ state->source = KUNWIND_SOURCE_FRAME;
+
+ return 0;
+}
+
/*
* Unwind from one frame record (A) to the next frame record (B).
*
@@ -137,15 +252,21 @@ kunwind_recover_return_address(struct kunwind_state *state)
static __always_inline int
kunwind_next(struct kunwind_state *state)
{
- struct task_struct *tsk = state->task;
- unsigned long fp = state->common.fp;
int err;
- /* Final frame; nothing to unwind */
- if (fp == (unsigned long)task_pt_regs(tsk)->stackframe)
- return -ENOENT;
+ state->flags.all = 0;
+
+ switch (state->source) {
+ case KUNWIND_SOURCE_FRAME:
+ case KUNWIND_SOURCE_CALLER:
+ case KUNWIND_SOURCE_TASK:
+ case KUNWIND_SOURCE_REGS_PC:
+ err = kunwind_next_frame_record(state);
+ break;
+ default:
+ err = -EINVAL;
+ }
- err = unwind_next_frame_record(&state->common);
if (err)
return err;
@@ -266,10 +387,57 @@ noinline noinstr void arch_stack_walk(stack_trace_consume_fn consume_entry,
kunwind_stack_walk(arch_kunwind_consume_entry, &data, task, regs);
}
-static bool dump_backtrace_entry(void *arg, unsigned long where)
+struct bpf_unwind_consume_entry_data {
+ bool (*consume_entry)(void *cookie, u64 ip, u64 sp, u64 fp);
+ void *cookie;
+};
+
+static bool
+arch_bpf_unwind_consume_entry(const struct kunwind_state *state, void *cookie)
+{
+ struct bpf_unwind_consume_entry_data *data = cookie;
+
+ return data->consume_entry(data->cookie, state->common.pc, 0,
+ state->common.fp);
+}
+
+noinline noinstr void arch_bpf_stack_walk(bool (*consume_entry)(void *cookie, u64 ip, u64 sp,
+ u64 fp), void *cookie)
{
+ struct bpf_unwind_consume_entry_data data = {
+ .consume_entry = consume_entry,
+ .cookie = cookie,
+ };
+
+ kunwind_stack_walk(arch_bpf_unwind_consume_entry, &data, current, NULL);
+}
+
+static const char *state_source_string(const struct kunwind_state *state)
+{
+ switch (state->source) {
+ case KUNWIND_SOURCE_FRAME: return NULL;
+ case KUNWIND_SOURCE_CALLER: return "C";
+ case KUNWIND_SOURCE_TASK: return "T";
+ case KUNWIND_SOURCE_REGS_PC: return "P";
+ default: return "U";
+ }
+}
+
+static bool dump_backtrace_entry(const struct kunwind_state *state, void *arg)
+{
+ const char *source = state_source_string(state);
+ union unwind_flags flags = state->flags;
+ bool has_info = source || flags.all;
char *loglvl = arg;
- printk("%s %pSb\n", loglvl, (void *)where);
+
+ printk("%s %pSb%s%s%s%s%s\n", loglvl,
+ (void *)state->common.pc,
+ has_info ? " (" : "",
+ source ? source : "",
+ flags.fgraph ? "F" : "",
+ flags.kretprobe ? "K" : "",
+ has_info ? ")" : "");
+
return true;
}
@@ -288,7 +456,7 @@ void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk,
return;
printk("%sCall trace:\n", loglvl);
- arch_stack_walk(dump_backtrace_entry, (void *)loglvl, tsk, regs);
+ kunwind_stack_walk(dump_backtrace_entry, (void *)loglvl, tsk, regs);
put_task_stack(tsk);
}
@@ -298,3 +466,123 @@ void show_stack(struct task_struct *tsk, unsigned long *sp, const char *loglvl)
dump_backtrace(NULL, tsk, loglvl);
barrier();
}
+
+/*
+ * The struct defined for userspace stack frame in AARCH64 mode.
+ */
+struct frame_tail {
+ struct frame_tail __user *fp;
+ unsigned long lr;
+} __attribute__((packed));
+
+/*
+ * Get the return address for a single stackframe and return a pointer to the
+ * next frame tail.
+ */
+static struct frame_tail __user *
+unwind_user_frame(struct frame_tail __user *tail, void *cookie,
+ stack_trace_consume_fn consume_entry)
+{
+ struct frame_tail buftail;
+ unsigned long err;
+ unsigned long lr;
+
+ /* Also check accessibility of one struct frame_tail beyond */
+ if (!access_ok(tail, sizeof(buftail)))
+ return NULL;
+
+ pagefault_disable();
+ err = __copy_from_user_inatomic(&buftail, tail, sizeof(buftail));
+ pagefault_enable();
+
+ if (err)
+ return NULL;
+
+ lr = ptrauth_strip_user_insn_pac(buftail.lr);
+
+ if (!consume_entry(cookie, lr))
+ return NULL;
+
+ /*
+ * Frame pointers should strictly progress back up the stack
+ * (towards higher addresses).
+ */
+ if (tail >= buftail.fp)
+ return NULL;
+
+ return buftail.fp;
+}
+
+#ifdef CONFIG_COMPAT
+/*
+ * The registers we're interested in are at the end of the variable
+ * length saved register structure. The fp points at the end of this
+ * structure so the address of this struct is:
+ * (struct compat_frame_tail *)(xxx->fp)-1
+ *
+ * This code has been adapted from the ARM OProfile support.
+ */
+struct compat_frame_tail {
+ compat_uptr_t fp; /* a (struct compat_frame_tail *) in compat mode */
+ u32 sp;
+ u32 lr;
+} __attribute__((packed));
+
+static struct compat_frame_tail __user *
+unwind_compat_user_frame(struct compat_frame_tail __user *tail, void *cookie,
+ stack_trace_consume_fn consume_entry)
+{
+ struct compat_frame_tail buftail;
+ unsigned long err;
+
+ /* Also check accessibility of one struct frame_tail beyond */
+ if (!access_ok(tail, sizeof(buftail)))
+ return NULL;
+
+ pagefault_disable();
+ err = __copy_from_user_inatomic(&buftail, tail, sizeof(buftail));
+ pagefault_enable();
+
+ if (err)
+ return NULL;
+
+ if (!consume_entry(cookie, buftail.lr))
+ return NULL;
+
+ /*
+ * Frame pointers should strictly progress back up the stack
+ * (towards higher addresses).
+ */
+ if (tail + 1 >= (struct compat_frame_tail __user *)
+ compat_ptr(buftail.fp))
+ return NULL;
+
+ return (struct compat_frame_tail __user *)compat_ptr(buftail.fp) - 1;
+}
+#endif /* CONFIG_COMPAT */
+
+
+void arch_stack_walk_user(stack_trace_consume_fn consume_entry, void *cookie,
+ const struct pt_regs *regs)
+{
+ if (!consume_entry(cookie, regs->pc))
+ return;
+
+ if (!compat_user_mode(regs)) {
+ /* AARCH64 mode */
+ struct frame_tail __user *tail;
+
+ tail = (struct frame_tail __user *)regs->regs[29];
+ while (tail && !((unsigned long)tail & 0x7))
+ tail = unwind_user_frame(tail, cookie, consume_entry);
+ } else {
+#ifdef CONFIG_COMPAT
+ /* AARCH32 compat mode */
+ struct compat_frame_tail __user *tail;
+
+ tail = (struct compat_frame_tail __user *)regs->compat_fp - 1;
+ while (tail && !((unsigned long)tail & 0x3))
+ tail = unwind_compat_user_frame(tail, cookie, consume_entry);
+#endif
+ }
+}
diff --git a/arch/arm64/kernel/sys.c b/arch/arm64/kernel/sys.c
index d5ffaaab31a7..f08408b6e826 100644
--- a/arch/arm64/kernel/sys.c
+++ b/arch/arm64/kernel/sys.c
@@ -48,14 +48,16 @@ asmlinkage long __arm64_sys_ni_syscall(const struct pt_regs *__unused)
*/
#define __arm64_sys_personality __arm64_sys_arm64_personality
+#define __SYSCALL_WITH_COMPAT(nr, native, compat) __SYSCALL(nr, native)
+
#undef __SYSCALL
#define __SYSCALL(nr, sym) asmlinkage long __arm64_##sym(const struct pt_regs *);
-#include <asm/unistd.h>
+#include <asm/syscall_table_64.h>
#undef __SYSCALL
#define __SYSCALL(nr, sym) [nr] = __arm64_##sym,
const syscall_fn_t sys_call_table[__NR_syscalls] = {
[0 ... __NR_syscalls - 1] = __arm64_sys_ni_syscall,
-#include <asm/unistd.h>
+#include <asm/syscall_table_64.h>
};
diff --git a/arch/arm64/kernel/sys32.c b/arch/arm64/kernel/sys32.c
index fc40386afb1b..96bcfb907443 100644
--- a/arch/arm64/kernel/sys32.c
+++ b/arch/arm64/kernel/sys32.c
@@ -5,17 +5,12 @@
* Copyright (C) 2015 ARM Ltd.
*/
-/*
- * Needed to avoid conflicting __NR_* macros between uapi/asm/unistd.h and
- * asm/unistd32.h.
- */
-#define __COMPAT_SYSCALL_NR
-
#include <linux/compat.h>
#include <linux/compiler.h>
#include <linux/syscalls.h>
#include <asm/syscall.h>
+#include <asm/unistd_compat_32.h>
asmlinkage long compat_sys_sigreturn(void);
asmlinkage long compat_sys_rt_sigreturn(void);
@@ -122,14 +117,16 @@ COMPAT_SYSCALL_DEFINE6(aarch32_fallocate, int, fd, int, mode,
return ksys_fallocate(fd, mode, arg_u64(offset), arg_u64(len));
}
+#define __SYSCALL_WITH_COMPAT(nr, sym, compat) __SYSCALL(nr, compat)
+
#undef __SYSCALL
#define __SYSCALL(nr, sym) asmlinkage long __arm64_##sym(const struct pt_regs *);
-#include <asm/unistd32.h>
+#include <asm/syscall_table_32.h>
#undef __SYSCALL
#define __SYSCALL(nr, sym) [nr] = __arm64_##sym,
-const syscall_fn_t compat_sys_call_table[__NR_compat_syscalls] = {
- [0 ... __NR_compat_syscalls - 1] = __arm64_sys_ni_syscall,
-#include <asm/unistd32.h>
+const syscall_fn_t compat_sys_call_table[__NR_compat32_syscalls] = {
+ [0 ... __NR_compat32_syscalls - 1] = __arm64_sys_ni_syscall,
+#include <asm/syscall_table_32.h>
};
diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c
index 9a70d9746b66..c442fcec6b9e 100644
--- a/arch/arm64/kernel/syscall.c
+++ b/arch/arm64/kernel/syscall.c
@@ -14,20 +14,18 @@
#include <asm/syscall.h>
#include <asm/thread_info.h>
#include <asm/unistd.h>
+#include <asm/unistd_compat_32.h>
long compat_arm_syscall(struct pt_regs *regs, int scno);
long sys_ni_syscall(void);
static long do_ni_syscall(struct pt_regs *regs, int scno)
{
-#ifdef CONFIG_COMPAT
- long ret;
if (is_compat_task()) {
- ret = compat_arm_syscall(regs, scno);
+ long ret = compat_arm_syscall(regs, scno);
if (ret != -ENOSYS)
return ret;
}
-#endif
return sys_ni_syscall();
}
@@ -56,17 +54,15 @@ static void invoke_syscall(struct pt_regs *regs, unsigned int scno,
syscall_set_return_value(current, regs, 0, ret);
/*
- * Ultimately, this value will get limited by KSTACK_OFFSET_MAX(),
- * but not enough for arm64 stack utilization comfort. To keep
- * reasonable stack head room, reduce the maximum offset to 9 bits.
- *
- * The actual entropy will be further reduced by the compiler when
- * applying stack alignment constraints: the AAPCS mandates a
- * 16-byte (i.e. 4-bit) aligned SP at function boundaries.
+ * This value will get limited by KSTACK_OFFSET_MAX(), which is 10
+ * bits. The actual entropy will be further reduced by the compiler
+ * when applying stack alignment constraints: the AAPCS mandates a
+ * 16-byte aligned SP at function boundaries, which will remove the
+ * 4 low bits from any entropy chosen here.
*
- * The resulting 5 bits of entropy is seen in SP[8:4].
+ * The resulting 6 bits of entropy is seen in SP[9:4].
*/
- choose_random_kstack_offset(get_random_u16() & 0x1FF);
+ choose_random_kstack_offset(get_random_u16());
}
static inline bool has_syscall_work(unsigned long flags)
@@ -158,7 +154,7 @@ void do_el0_svc(struct pt_regs *regs)
#ifdef CONFIG_COMPAT
void do_el0_svc_compat(struct pt_regs *regs)
{
- el0_svc_common(regs, regs->regs[7], __NR_compat_syscalls,
+ el0_svc_common(regs, regs->regs[7], __NR_compat32_syscalls,
compat_sys_call_table);
}
#endif
diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c
index 1a2c72f3e7f8..cb180684d10d 100644
--- a/arch/arm64/kernel/topology.c
+++ b/arch/arm64/kernel/topology.c
@@ -194,12 +194,19 @@ static void amu_fie_setup(const struct cpumask *cpus)
int cpu;
/* We are already set since the last insmod of cpufreq driver */
- if (unlikely(cpumask_subset(cpus, amu_fie_cpus)))
+ if (cpumask_available(amu_fie_cpus) &&
+ unlikely(cpumask_subset(cpus, amu_fie_cpus)))
return;
- for_each_cpu(cpu, cpus) {
+ for_each_cpu(cpu, cpus)
if (!freq_counters_valid(cpu))
return;
+
+ if (!cpumask_available(amu_fie_cpus) &&
+ !zalloc_cpumask_var(&amu_fie_cpus, GFP_KERNEL)) {
+ WARN_ONCE(1, "Failed to allocate FIE cpumask for CPUs[%*pbl]\n",
+ cpumask_pr_args(cpus));
+ return;
}
cpumask_or(amu_fie_cpus, amu_fie_cpus, cpus);
@@ -237,17 +244,8 @@ static struct notifier_block init_amu_fie_notifier = {
static int __init init_amu_fie(void)
{
- int ret;
-
- if (!zalloc_cpumask_var(&amu_fie_cpus, GFP_KERNEL))
- return -ENOMEM;
-
- ret = cpufreq_register_notifier(&init_amu_fie_notifier,
+ return cpufreq_register_notifier(&init_amu_fie_notifier,
CPUFREQ_POLICY_NOTIFIER);
- if (ret)
- free_cpumask_var(amu_fie_cpus);
-
- return ret;
}
core_initcall(init_amu_fie);
diff --git a/arch/arm64/kernel/trace-events-emulation.h b/arch/arm64/kernel/trace-events-emulation.h
index 6c40f58b844a..c51b547b583e 100644
--- a/arch/arm64/kernel/trace-events-emulation.h
+++ b/arch/arm64/kernel/trace-events-emulation.h
@@ -18,7 +18,7 @@ TRACE_EVENT(instruction_emulation,
),
TP_fast_assign(
- __assign_str(instr, instr);
+ __assign_str(instr);
__entry->addr = addr;
),
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index 215e6d7f2df8..4e26bd356a48 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -41,7 +41,7 @@
#include <asm/extable.h>
#include <asm/insn.h>
#include <asm/kprobes.h>
-#include <asm/patching.h>
+#include <asm/text-patching.h>
#include <asm/traps.h>
#include <asm/smp.h>
#include <asm/stack_pointer.h>
@@ -273,6 +273,12 @@ void arm64_force_sig_fault(int signo, int code, unsigned long far,
force_sig_fault(signo, code, (void __user *)far);
}
+void arm64_force_sig_fault_pkey(unsigned long far, const char *str, int pkey)
+{
+ arm64_show_signal(SIGSEGV, str);
+ force_sig_pkuerr((void __user *)far, pkey);
+}
+
void arm64_force_sig_mceerr(int code, unsigned long far, short lsb,
const char *str)
{
@@ -500,6 +506,16 @@ void do_el1_bti(struct pt_regs *regs, unsigned long esr)
die("Oops - BTI", regs, esr);
}
+void do_el0_gcs(struct pt_regs *regs, unsigned long esr)
+{
+ force_signal_inject(SIGSEGV, SEGV_CPERR, regs->pc, 0);
+}
+
+void do_el1_gcs(struct pt_regs *regs, unsigned long esr)
+{
+ die("Oops - GCS", regs, esr);
+}
+
void do_el0_fpac(struct pt_regs *regs, unsigned long esr)
{
force_signal_inject(SIGILL, ILL_ILLOPN, regs->pc, esr);
@@ -525,6 +541,13 @@ void do_el0_mops(struct pt_regs *regs, unsigned long esr)
user_fastforward_single_step(current);
}
+void do_el1_mops(struct pt_regs *regs, unsigned long esr)
+{
+ arm64_mops_reset_regs(&regs->user_regs, esr);
+
+ kernel_fastforward_single_step(regs);
+}
+
#define __user_cache_maint(insn, address, res) \
if (address >= TASK_SIZE_MAX) { \
res = -EFAULT; \
@@ -601,18 +624,26 @@ static void ctr_read_handler(unsigned long esr, struct pt_regs *regs)
static void cntvct_read_handler(unsigned long esr, struct pt_regs *regs)
{
- int rt = ESR_ELx_SYS64_ISS_RT(esr);
+ if (test_thread_flag(TIF_TSC_SIGSEGV)) {
+ force_sig(SIGSEGV);
+ } else {
+ int rt = ESR_ELx_SYS64_ISS_RT(esr);
- pt_regs_write_reg(regs, rt, arch_timer_read_counter());
- arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE);
+ pt_regs_write_reg(regs, rt, arch_timer_read_counter());
+ arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE);
+ }
}
static void cntfrq_read_handler(unsigned long esr, struct pt_regs *regs)
{
- int rt = ESR_ELx_SYS64_ISS_RT(esr);
+ if (test_thread_flag(TIF_TSC_SIGSEGV)) {
+ force_sig(SIGSEGV);
+ } else {
+ int rt = ESR_ELx_SYS64_ISS_RT(esr);
- pt_regs_write_reg(regs, rt, arch_timer_get_rate());
- arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE);
+ pt_regs_write_reg(regs, rt, arch_timer_get_rate());
+ arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE);
+ }
}
static void mrs_handler(unsigned long esr, struct pt_regs *regs)
@@ -838,6 +869,7 @@ static const char *esr_class_str[] = {
[ESR_ELx_EC_MOPS] = "MOPS",
[ESR_ELx_EC_FP_EXC32] = "FP (AArch32)",
[ESR_ELx_EC_FP_EXC64] = "FP (AArch64)",
+ [ESR_ELx_EC_GCS] = "Guarded Control Stack",
[ESR_ELx_EC_SERROR] = "SError",
[ESR_ELx_EC_BREAKPT_LOW] = "Breakpoint (lower EL)",
[ESR_ELx_EC_BREAKPT_CUR] = "Breakpoint (current EL)",
@@ -1105,8 +1137,6 @@ static struct break_hook ubsan_break_hook = {
};
#endif
-#define esr_comment(esr) ((esr) & ESR_ELx_BRK64_ISS_COMMENT_MASK)
-
/*
* Initial handler for AArch64 BRK exceptions
* This handler only used until debug_traps_init().
@@ -1115,15 +1145,15 @@ int __init early_brk64(unsigned long addr, unsigned long esr,
struct pt_regs *regs)
{
#ifdef CONFIG_CFI_CLANG
- if ((esr_comment(esr) & ~CFI_BRK_IMM_MASK) == CFI_BRK_IMM_BASE)
+ if (esr_is_cfi_brk(esr))
return cfi_handler(regs, esr) != DBG_HOOK_HANDLED;
#endif
#ifdef CONFIG_KASAN_SW_TAGS
- if ((esr_comment(esr) & ~KASAN_BRK_MASK) == KASAN_BRK_IMM)
+ if ((esr_brk_comment(esr) & ~KASAN_BRK_MASK) == KASAN_BRK_IMM)
return kasan_handler(regs, esr) != DBG_HOOK_HANDLED;
#endif
#ifdef CONFIG_UBSAN_TRAP
- if ((esr_comment(esr) & ~UBSAN_BRK_MASK) == UBSAN_BRK_IMM)
+ if ((esr_brk_comment(esr) & ~UBSAN_BRK_MASK) == UBSAN_BRK_IMM)
return ubsan_handler(regs, esr) != DBG_HOOK_HANDLED;
#endif
return bug_handler(regs, esr) != DBG_HOOK_HANDLED;
diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c
index 5562daf38a22..e8ed8e5b713b 100644
--- a/arch/arm64/kernel/vdso.c
+++ b/arch/arm64/kernel/vdso.c
@@ -19,7 +19,6 @@
#include <linux/signal.h>
#include <linux/slab.h>
#include <linux/time_namespace.h>
-#include <linux/timekeeper_internal.h>
#include <linux/vmalloc.h>
#include <vdso/datapage.h>
#include <vdso/helpers.h>
@@ -34,19 +33,11 @@ enum vdso_abi {
VDSO_ABI_AA32,
};
-enum vvar_pages {
- VVAR_DATA_PAGE_OFFSET,
- VVAR_TIMENS_PAGE_OFFSET,
- VVAR_NR_PAGES,
-};
-
struct vdso_abi_info {
const char *name;
const char *vdso_code_start;
const char *vdso_code_end;
unsigned long vdso_pages;
- /* Data Mapping */
- struct vm_special_mapping *dm;
/* Code Mapping */
struct vm_special_mapping *cm;
};
@@ -69,10 +60,7 @@ static struct vdso_abi_info vdso_info[] __ro_after_init = {
/*
* The vDSO data page.
*/
-static union {
- struct vdso_data data[CS_BASES];
- u8 page[PAGE_SIZE];
-} vdso_data_store __page_aligned_data;
+static union vdso_data_store vdso_data_store __page_aligned_data;
struct vdso_data *vdso_data = vdso_data_store.data;
static int vdso_mremap(const struct vm_special_mapping *sm,
@@ -122,6 +110,8 @@ struct vdso_data *arch_get_vdso_data(void *vvar_page)
return (struct vdso_data *)(vvar_page);
}
+static const struct vm_special_mapping vvar_map;
+
/*
* The vvar mapping contains data for a specific time namespace, so when a task
* changes namespace we must unmap its vvar data for the old namespace.
@@ -138,12 +128,8 @@ int vdso_join_timens(struct task_struct *task, struct time_namespace *ns)
mmap_read_lock(mm);
for_each_vma(vmi, vma) {
- if (vma_is_special_mapping(vma, vdso_info[VDSO_ABI_AA64].dm))
+ if (vma_is_special_mapping(vma, &vvar_map))
zap_vma_pages(vma);
-#ifdef CONFIG_COMPAT_VDSO
- if (vma_is_special_mapping(vma, vdso_info[VDSO_ABI_AA32].dm))
- zap_vma_pages(vma);
-#endif
}
mmap_read_unlock(mm);
@@ -185,6 +171,11 @@ static vm_fault_t vvar_fault(const struct vm_special_mapping *sm,
return vmf_insert_pfn(vma, vmf->address, pfn);
}
+static const struct vm_special_mapping vvar_map = {
+ .name = "[vvar]",
+ .fault = vvar_fault,
+};
+
static int __setup_additional_pages(enum vdso_abi abi,
struct mm_struct *mm,
struct linux_binprm *bprm,
@@ -208,7 +199,7 @@ static int __setup_additional_pages(enum vdso_abi abi,
ret = _install_special_mapping(mm, vdso_base, VVAR_NR_PAGES * PAGE_SIZE,
VM_READ|VM_MAYREAD|VM_PFNMAP,
- vdso_info[abi].dm);
+ &vvar_map);
if (IS_ERR(ret))
goto up_fail;
@@ -238,7 +229,6 @@ up_fail:
enum aarch32_map {
AA32_MAP_VECTORS, /* kuser helpers */
AA32_MAP_SIGPAGE,
- AA32_MAP_VVAR,
AA32_MAP_VDSO,
};
@@ -263,10 +253,6 @@ static struct vm_special_mapping aarch32_vdso_maps[] = {
.pages = &aarch32_sig_page,
.mremap = aarch32_sigpage_mremap,
},
- [AA32_MAP_VVAR] = {
- .name = "[vvar]",
- .fault = vvar_fault,
- },
[AA32_MAP_VDSO] = {
.name = "[vdso]",
.mremap = vdso_mremap,
@@ -316,7 +302,6 @@ static int __init __aarch32_alloc_vdso_pages(void)
if (!IS_ENABLED(CONFIG_COMPAT_VDSO))
return 0;
- vdso_info[VDSO_ABI_AA32].dm = &aarch32_vdso_maps[AA32_MAP_VVAR];
vdso_info[VDSO_ABI_AA32].cm = &aarch32_vdso_maps[AA32_MAP_VDSO];
return __vdso_init(VDSO_ABI_AA32);
@@ -411,26 +396,14 @@ out:
}
#endif /* CONFIG_COMPAT */
-enum aarch64_map {
- AA64_MAP_VVAR,
- AA64_MAP_VDSO,
-};
-
-static struct vm_special_mapping aarch64_vdso_maps[] __ro_after_init = {
- [AA64_MAP_VVAR] = {
- .name = "[vvar]",
- .fault = vvar_fault,
- },
- [AA64_MAP_VDSO] = {
- .name = "[vdso]",
- .mremap = vdso_mremap,
- },
+static struct vm_special_mapping aarch64_vdso_map __ro_after_init = {
+ .name = "[vdso]",
+ .mremap = vdso_mremap,
};
static int __init vdso_init(void)
{
- vdso_info[VDSO_ABI_AA64].dm = &aarch64_vdso_maps[AA64_MAP_VVAR];
- vdso_info[VDSO_ABI_AA64].cm = &aarch64_vdso_maps[AA64_MAP_VDSO];
+ vdso_info[VDSO_ABI_AA64].cm = &aarch64_vdso_map;
return __vdso_init(VDSO_ABI_AA64);
}
diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile
index 8818287f1095..35685c036044 100644
--- a/arch/arm64/kernel/vdso/Makefile
+++ b/arch/arm64/kernel/vdso/Makefile
@@ -9,7 +9,7 @@
# Include the generic Makefile to check the built vdso.
include $(srctree)/lib/vdso/Makefile
-obj-vdso := vgettimeofday.o note.o sigreturn.o
+obj-vdso := vgettimeofday.o note.o sigreturn.o vgetrandom.o vgetrandom-chacha.o
# Build rules
targets := $(obj-vdso) vdso.so vdso.so.dbg
@@ -21,7 +21,7 @@ btildflags-$(CONFIG_ARM64_BTI_KERNEL) += -z force-bti
# potential future proofing if we end up with internal calls to the exported
# routines, as x86 does (see 6f121e548f83 ("x86, vdso: Reimplement vdso.so
# preparation in build-time C")).
-ldflags-y := -shared -soname=linux-vdso.so.1 --hash-style=sysv \
+ldflags-y := -shared -soname=linux-vdso.so.1 \
-Bsymbolic --build-id=sha1 -n $(btildflags-y)
ifdef CONFIG_LD_ORPHAN_WARN
@@ -34,26 +34,27 @@ ccflags-y := -fno-common -fno-builtin -fno-stack-protector -ffixed-x18
ccflags-y += -DDISABLE_BRANCH_PROFILING -DBUILD_VDSO
# -Wmissing-prototypes and -Wmissing-declarations are removed from
-# the CFLAGS of vgettimeofday.c to make possible to build the
-# kernel with CONFIG_WERROR enabled.
-CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os $(CC_FLAGS_SCS) \
- $(RANDSTRUCT_CFLAGS) $(GCC_PLUGINS_CFLAGS) \
- $(CC_FLAGS_LTO) $(CC_FLAGS_CFI) \
- -Wmissing-prototypes -Wmissing-declarations
-KASAN_SANITIZE := n
-KCSAN_SANITIZE := n
-UBSAN_SANITIZE := n
-OBJECT_FILES_NON_STANDARD := y
-KCOV_INSTRUMENT := n
-
-CFLAGS_vgettimeofday.o = -O2 -mcmodel=tiny -fasynchronous-unwind-tables
+# the CFLAGS to make possible to build the kernel with CONFIG_WERROR enabled.
+CC_FLAGS_REMOVE_VDSO := $(CC_FLAGS_FTRACE) -Os $(CC_FLAGS_SCS) \
+ $(RANDSTRUCT_CFLAGS) $(GCC_PLUGINS_CFLAGS) \
+ $(CC_FLAGS_LTO) $(CC_FLAGS_CFI) \
+ -Wmissing-prototypes -Wmissing-declarations
+
+CC_FLAGS_ADD_VDSO := -O2 -mcmodel=tiny -fasynchronous-unwind-tables
+
+CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_REMOVE_VDSO)
+CFLAGS_REMOVE_vgetrandom.o = $(CC_FLAGS_REMOVE_VDSO)
+
+CFLAGS_vgettimeofday.o = $(CC_FLAGS_ADD_VDSO)
+CFLAGS_vgetrandom.o = $(CC_FLAGS_ADD_VDSO)
ifneq ($(c-gettimeofday-y),)
CFLAGS_vgettimeofday.o += -include $(c-gettimeofday-y)
endif
-# Disable gcov profiling for VDSO code
-GCOV_PROFILE := n
+ifneq ($(c-getrandom-y),)
+ CFLAGS_vgetrandom.o += -include $(c-getrandom-y)
+endif
targets += vdso.lds
CPPFLAGS_vdso.lds += -P -C -U$(ARCH)
@@ -68,7 +69,7 @@ $(obj)/%.so: $(obj)/%.so.dbg FORCE
$(call if_changed,objcopy)
# Generate VDSO offsets using helper script
-gen-vdsosym := $(srctree)/$(src)/gen_vdso_offsets.sh
+gen-vdsosym := $(src)/gen_vdso_offsets.sh
quiet_cmd_vdsosym = VDSOSYM $@
cmd_vdsosym = $(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@
diff --git a/arch/arm64/kernel/vdso/vdso.lds.S b/arch/arm64/kernel/vdso/vdso.lds.S
index 45354f2ddf70..47ad6944f9f0 100644
--- a/arch/arm64/kernel/vdso/vdso.lds.S
+++ b/arch/arm64/kernel/vdso/vdso.lds.S
@@ -11,7 +11,9 @@
#include <linux/const.h>
#include <asm/page.h>
#include <asm/vdso.h>
+#include <asm/vdso/vsyscall.h>
#include <asm-generic/vmlinux.lds.h>
+#include <vdso/datapage.h>
OUTPUT_FORMAT("elf64-littleaarch64", "elf64-bigaarch64", "elf64-littleaarch64")
OUTPUT_ARCH(aarch64)
@@ -19,10 +21,11 @@ OUTPUT_ARCH(aarch64)
SECTIONS
{
PROVIDE(_vdso_data = . - __VVAR_PAGES * PAGE_SIZE);
+ PROVIDE(_vdso_rng_data = _vdso_data + __VDSO_RND_DATA_OFFSET);
#ifdef CONFIG_TIME_NS
PROVIDE(_timens_data = _vdso_data + PAGE_SIZE);
#endif
- . = VDSO_LBASE + SIZEOF_HEADERS;
+ . = SIZEOF_HEADERS;
.hash : { *(.hash) } :text
.gnu.hash : { *(.gnu.hash) }
@@ -38,6 +41,7 @@ SECTIONS
*/
/DISCARD/ : {
*(.note.GNU-stack .note.gnu.property)
+ *(.ARM.attributes)
}
.note : { *(.note.*) } :text :note
@@ -102,6 +106,7 @@ VERSION
__kernel_gettimeofday;
__kernel_clock_gettime;
__kernel_clock_getres;
+ __kernel_getrandom;
local: *;
};
}
diff --git a/arch/arm64/kernel/vdso/vgetrandom-chacha.S b/arch/arm64/kernel/vdso/vgetrandom-chacha.S
new file mode 100644
index 000000000000..67890b445309
--- /dev/null
+++ b/arch/arm64/kernel/vdso/vgetrandom-chacha.S
@@ -0,0 +1,172 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/linkage.h>
+#include <asm/cache.h>
+#include <asm/assembler.h>
+
+ .text
+
+#define state0 v0
+#define state1 v1
+#define state2 v2
+#define state3 v3
+#define copy0 v4
+#define copy0_q q4
+#define copy1 v5
+#define copy2 v6
+#define copy3 v7
+#define copy3_d d7
+#define one_d d16
+#define one_q q16
+#define one_v v16
+#define tmp v17
+#define rot8 v18
+
+/*
+ * ARM64 ChaCha20 implementation meant for vDSO. Produces a given positive
+ * number of blocks of output with nonce 0, taking an input key and 8-bytes
+ * counter. Importantly does not spill to the stack.
+ *
+ * This implementation avoids d8-d15 because they are callee-save in user
+ * space.
+ *
+ * void __arch_chacha20_blocks_nostack(uint8_t *dst_bytes,
+ * const uint8_t *key,
+ * uint32_t *counter,
+ * size_t nblocks)
+ *
+ * x0: output bytes
+ * x1: 32-byte key input
+ * x2: 8-byte counter input/output
+ * x3: number of 64-byte block to write to output
+ */
+SYM_FUNC_START(__arch_chacha20_blocks_nostack)
+
+ /* copy0 = "expand 32-byte k" */
+ mov_q x8, 0x3320646e61707865
+ mov_q x9, 0x6b20657479622d32
+ mov copy0.d[0], x8
+ mov copy0.d[1], x9
+
+ /* copy1,copy2 = key */
+ ld1 { copy1.4s, copy2.4s }, [x1]
+ /* copy3 = counter || zero nonce */
+ ld1 { copy3.2s }, [x2]
+
+ movi one_v.2s, #1
+ uzp1 one_v.4s, one_v.4s, one_v.4s
+
+.Lblock:
+ /* copy state to auxiliary vectors for the final add after the permute. */
+ mov state0.16b, copy0.16b
+ mov state1.16b, copy1.16b
+ mov state2.16b, copy2.16b
+ mov state3.16b, copy3.16b
+
+ mov w4, 20
+.Lpermute:
+ /*
+ * Permute one 64-byte block where the state matrix is stored in the four NEON
+ * registers state0-state3. It performs matrix operations on four words in parallel,
+ * but requires shuffling to rearrange the words after each round.
+ */
+
+.Ldoubleround:
+ /* state0 += state1, state3 = rotl32(state3 ^ state0, 16) */
+ add state0.4s, state0.4s, state1.4s
+ eor state3.16b, state3.16b, state0.16b
+ rev32 state3.8h, state3.8h
+
+ /* state2 += state3, state1 = rotl32(state1 ^ state2, 12) */
+ add state2.4s, state2.4s, state3.4s
+ eor tmp.16b, state1.16b, state2.16b
+ shl state1.4s, tmp.4s, #12
+ sri state1.4s, tmp.4s, #20
+
+ /* state0 += state1, state3 = rotl32(state3 ^ state0, 8) */
+ add state0.4s, state0.4s, state1.4s
+ eor tmp.16b, state3.16b, state0.16b
+ shl state3.4s, tmp.4s, #8
+ sri state3.4s, tmp.4s, #24
+
+ /* state2 += state3, state1 = rotl32(state1 ^ state2, 7) */
+ add state2.4s, state2.4s, state3.4s
+ eor tmp.16b, state1.16b, state2.16b
+ shl state1.4s, tmp.4s, #7
+ sri state1.4s, tmp.4s, #25
+
+ /* state1[0,1,2,3] = state1[1,2,3,0] */
+ ext state1.16b, state1.16b, state1.16b, #4
+ /* state2[0,1,2,3] = state2[2,3,0,1] */
+ ext state2.16b, state2.16b, state2.16b, #8
+ /* state3[0,1,2,3] = state3[1,2,3,0] */
+ ext state3.16b, state3.16b, state3.16b, #12
+
+ /* state0 += state1, state3 = rotl32(state3 ^ state0, 16) */
+ add state0.4s, state0.4s, state1.4s
+ eor state3.16b, state3.16b, state0.16b
+ rev32 state3.8h, state3.8h
+
+ /* state2 += state3, state1 = rotl32(state1 ^ state2, 12) */
+ add state2.4s, state2.4s, state3.4s
+ eor tmp.16b, state1.16b, state2.16b
+ shl state1.4s, tmp.4s, #12
+ sri state1.4s, tmp.4s, #20
+
+ /* state0 += state1, state3 = rotl32(state3 ^ state0, 8) */
+ add state0.4s, state0.4s, state1.4s
+ eor tmp.16b, state3.16b, state0.16b
+ shl state3.4s, tmp.4s, #8
+ sri state3.4s, tmp.4s, #24
+
+ /* state2 += state3, state1 = rotl32(state1 ^ state2, 7) */
+ add state2.4s, state2.4s, state3.4s
+ eor tmp.16b, state1.16b, state2.16b
+ shl state1.4s, tmp.4s, #7
+ sri state1.4s, tmp.4s, #25
+
+ /* state1[0,1,2,3] = state1[3,0,1,2] */
+ ext state1.16b, state1.16b, state1.16b, #12
+ /* state2[0,1,2,3] = state2[2,3,0,1] */
+ ext state2.16b, state2.16b, state2.16b, #8
+ /* state3[0,1,2,3] = state3[1,2,3,0] */
+ ext state3.16b, state3.16b, state3.16b, #4
+
+ subs w4, w4, #2
+ b.ne .Ldoubleround
+
+ /* output0 = state0 + state0 */
+ add state0.4s, state0.4s, copy0.4s
+ /* output1 = state1 + state1 */
+ add state1.4s, state1.4s, copy1.4s
+ /* output2 = state2 + state2 */
+ add state2.4s, state2.4s, copy2.4s
+ /* output2 = state3 + state3 */
+ add state3.4s, state3.4s, copy3.4s
+ st1 { state0.16b - state3.16b }, [x0]
+
+ /*
+ * ++copy3.counter, the 'add' clears the upper half of the SIMD register
+ * which is the expected behaviour here.
+ */
+ add copy3_d, copy3_d, one_d
+
+ /* output += 64, --nblocks */
+ add x0, x0, 64
+ subs x3, x3, #1
+ b.ne .Lblock
+
+ /* counter = copy3.counter */
+ st1 { copy3.2s }, [x2]
+
+ /* Zero out the potentially sensitive regs, in case nothing uses these again. */
+ movi state0.16b, #0
+ movi state1.16b, #0
+ movi state2.16b, #0
+ movi state3.16b, #0
+ movi copy1.16b, #0
+ movi copy2.16b, #0
+ ret
+SYM_FUNC_END(__arch_chacha20_blocks_nostack)
+
+emit_aarch64_feature_1_and
diff --git a/arch/arm64/kernel/vdso/vgetrandom.c b/arch/arm64/kernel/vdso/vgetrandom.c
new file mode 100644
index 000000000000..832fe195292b
--- /dev/null
+++ b/arch/arm64/kernel/vdso/vgetrandom.c
@@ -0,0 +1,15 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <uapi/asm-generic/errno.h>
+
+typeof(__cvdso_getrandom) __kernel_getrandom;
+
+ssize_t __kernel_getrandom(void *buffer, size_t len, unsigned int flags, void *opaque_state, size_t opaque_len)
+{
+ if (alternative_has_cap_likely(ARM64_HAS_FPSIMD))
+ return __cvdso_getrandom(buffer, len, flags, opaque_state, opaque_len);
+
+ if (unlikely(opaque_len == ~0UL && !buffer && !len && !flags))
+ return -ENOSYS;
+ return getrandom_syscall(buffer, len, flags);
+}
diff --git a/arch/arm64/kernel/vdso32/Makefile b/arch/arm64/kernel/vdso32/Makefile
index f5f80fdce0fe..25a2cb6317f3 100644
--- a/arch/arm64/kernel/vdso32/Makefile
+++ b/arch/arm64/kernel/vdso32/Makefile
@@ -98,7 +98,7 @@ VDSO_AFLAGS += -D__ASSEMBLY__
# From arm vDSO Makefile
VDSO_LDFLAGS += -Bsymbolic --no-undefined -soname=linux-vdso.so.1
VDSO_LDFLAGS += -z max-page-size=4096 -z common-page-size=4096
-VDSO_LDFLAGS += -shared --hash-style=sysv --build-id=sha1
+VDSO_LDFLAGS += -shared --build-id=sha1
VDSO_LDFLAGS += --orphan-handling=$(CONFIG_LD_ORPHAN_WARN_LEVEL)
@@ -136,7 +136,7 @@ $(obj)/vdso32.so.dbg: $(obj)/vdso.so.raw $(obj)/$(munge) FORCE
$(call if_changed,vdsomunge)
# Link rule for the .so file, .lds has to be first
-$(obj)/vdso.so.raw: $(src)/vdso.lds $(obj-vdso) FORCE
+$(obj)/vdso.so.raw: $(obj)/vdso.lds $(obj-vdso) FORCE
$(call if_changed,vdsold_and_vdso_check)
# Compilation rules for the vDSO sources
diff --git a/arch/arm64/kernel/vdso32/vdso.lds.S b/arch/arm64/kernel/vdso32/vdso.lds.S
index 8d95d7d35057..732702a187e9 100644
--- a/arch/arm64/kernel/vdso32/vdso.lds.S
+++ b/arch/arm64/kernel/vdso32/vdso.lds.S
@@ -22,7 +22,7 @@ SECTIONS
#ifdef CONFIG_TIME_NS
PROVIDE_HIDDEN(_timens_data = _vdso_data + PAGE_SIZE);
#endif
- . = VDSO_LBASE + SIZEOF_HEADERS;
+ . = SIZEOF_HEADERS;
.hash : { *(.hash) } :text
.gnu.hash : { *(.gnu.hash) }
diff --git a/arch/arm64/kernel/crash_core.c b/arch/arm64/kernel/vmcore_info.c
index 66cde752cd74..b19d5d6cb8b3 100644
--- a/arch/arm64/kernel/crash_core.c
+++ b/arch/arm64/kernel/vmcore_info.c
@@ -4,7 +4,7 @@
* Copyright (C) Huawei Futurewei Technologies.
*/
-#include <linux/crash_core.h>
+#include <linux/vmcore_info.h>
#include <asm/cpufeature.h>
#include <asm/memory.h>
#include <asm/pgtable-hwdef.h>
@@ -23,7 +23,6 @@ void arch_crash_save_vmcoreinfo(void)
/* Please note VMCOREINFO_NUMBER() uses "%d", not "%x" */
vmcoreinfo_append_str("NUMBER(MODULES_VADDR)=0x%lx\n", MODULES_VADDR);
vmcoreinfo_append_str("NUMBER(MODULES_END)=0x%lx\n", MODULES_END);
- vmcoreinfo_append_str("NUMBER(VMALLOC_START)=0x%lx\n", VMALLOC_START);
vmcoreinfo_append_str("NUMBER(VMALLOC_END)=0x%lx\n", VMALLOC_END);
vmcoreinfo_append_str("NUMBER(VMEMMAP_START)=0x%lx\n", VMEMMAP_START);
vmcoreinfo_append_str("NUMBER(VMEMMAP_END)=0x%lx\n", VMEMMAP_END);
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index 3cd7e76cc562..e73326bd3ff7 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -126,9 +126,9 @@ jiffies = jiffies_64;
#ifdef CONFIG_UNWIND_TABLES
#define UNWIND_DATA_SECTIONS \
.eh_frame : { \
- __eh_frame_start = .; \
+ __pi___eh_frame_start = .; \
*(.eh_frame) \
- __eh_frame_end = .; \
+ __pi___eh_frame_end = .; \
}
#else
#define UNWIND_DATA_SECTIONS
@@ -162,6 +162,7 @@ SECTIONS
/DISCARD/ : {
*(.interp .dynamic)
*(.dynsym .dynstr .hash .gnu.hash)
+ *(.ARM.attributes)
}
. = KIMAGE_VADDR;
@@ -264,27 +265,32 @@ SECTIONS
EXIT_DATA
}
+ RUNTIME_CONST_VARIABLES
+
PERCPU_SECTION(L1_CACHE_BYTES)
HYPERVISOR_PERCPU_SECTION
HYPERVISOR_RELOC_SECTION
.rela.dyn : ALIGN(8) {
- __rela_start = .;
+ __pi_rela_start = .;
*(.rela .rela*)
- __rela_end = .;
+ __pi_rela_end = .;
}
.relr.dyn : ALIGN(8) {
- __relr_start = .;
+ __pi_relr_start = .;
*(.relr.dyn)
- __relr_end = .;
+ __pi_relr_end = .;
}
. = ALIGN(SEGMENT_ALIGN);
__initdata_end = .;
__init_end = .;
+ .data.rel.ro : { *(.data.rel.ro) }
+ ASSERT(SIZEOF(.data.rel.ro) == 0, "Unexpected RELRO detected!")
+
_data = .;
_sdata = .;
RW_DATA(L1_CACHE_BYTES, PAGE_SIZE, THREAD_ALIGN)
@@ -311,12 +317,17 @@ SECTIONS
__pecoff_data_rawsize = ABSOLUTE(. - __initdata_begin);
_edata = .;
+ /* start of zero-init region */
BSS_SECTION(SBSS_ALIGN, 0, 0)
. = ALIGN(PAGE_SIZE);
init_pg_dir = .;
. += INIT_DIR_SIZE;
init_pg_end = .;
+ /* end of zero-init region */
+
+ . += SZ_4K; /* stack for the early C runtime */
+ early_init_stack = .;
. = ALIGN(SEGMENT_ALIGN);
__pecoff_data_size = ABSOLUTE(. - __initdata_begin);
@@ -336,9 +347,6 @@ SECTIONS
*(.plt) *(.plt.*) *(.iplt) *(.igot .igot.plt)
}
ASSERT(SIZEOF(.plt) == 0, "Unexpected run-time procedure linkages detected!")
-
- .data.rel.ro : { *(.data.rel.ro) }
- ASSERT(SIZEOF(.data.rel.ro) == 0, "Unexpected RELRO detected!")
}
#include "image-vars.h"