summaryrefslogtreecommitdiff
path: root/arch/arm64/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arm64/kernel')
-rw-r--r--arch/arm64/kernel/Makefile16
-rw-r--r--arch/arm64/kernel/Makefile.syscalls6
-rw-r--r--arch/arm64/kernel/acpi.c145
-rw-r--r--arch/arm64/kernel/acpi_numa.c13
-rw-r--r--arch/arm64/kernel/acpi_parking_protocol.c2
-rw-r--r--arch/arm64/kernel/alternative.c19
-rw-r--r--arch/arm64/kernel/armv8_deprecated.c13
-rw-r--r--arch/arm64/kernel/asm-offsets.c70
-rw-r--r--arch/arm64/kernel/cacheinfo.c12
-rw-r--r--arch/arm64/kernel/compat_alignment.c2
-rw-r--r--arch/arm64/kernel/cpu_errata.c228
-rw-r--r--arch/arm64/kernel/cpufeature.c1351
-rw-r--r--arch/arm64/kernel/cpuidle.c74
-rw-r--r--arch/arm64/kernel/cpuinfo.c175
-rw-r--r--arch/arm64/kernel/debug-monitors.c271
-rw-r--r--arch/arm64/kernel/efi-header.S6
-rw-r--r--arch/arm64/kernel/efi.c82
-rw-r--r--arch/arm64/kernel/elfcore.c3
-rw-r--r--arch/arm64/kernel/entry-common.c590
-rw-r--r--arch/arm64/kernel/entry-ftrace.S34
-rw-r--r--arch/arm64/kernel/entry.S51
-rw-r--r--arch/arm64/kernel/fpsimd.c848
-rw-r--r--arch/arm64/kernel/ftrace.c110
-rw-r--r--arch/arm64/kernel/head.S497
-rw-r--r--arch/arm64/kernel/hibernate.c8
-rw-r--r--arch/arm64/kernel/hw_breakpoint.c67
-rw-r--r--arch/arm64/kernel/hyp-stub.S11
-rw-r--r--arch/arm64/kernel/idle.c4
-rw-r--r--arch/arm64/kernel/idreg-override.c338
-rw-r--r--arch/arm64/kernel/image-vars.h67
-rw-r--r--arch/arm64/kernel/io.c113
-rw-r--r--arch/arm64/kernel/irq.c22
-rw-r--r--arch/arm64/kernel/jump_label.c13
-rw-r--r--arch/arm64/kernel/kaslr.c13
-rw-r--r--arch/arm64/kernel/kexec_image.c6
-rw-r--r--arch/arm64/kernel/kgdb.c41
-rw-r--r--arch/arm64/kernel/machine_kexec.c61
-rw-r--r--arch/arm64/kernel/machine_kexec_file.c24
-rw-r--r--arch/arm64/kernel/module-plts.c25
-rw-r--r--arch/arm64/kernel/module.c265
-rw-r--r--arch/arm64/kernel/mte.c77
-rw-r--r--arch/arm64/kernel/patching.c98
-rw-r--r--arch/arm64/kernel/pci.c191
-rw-r--r--arch/arm64/kernel/perf_callchain.c146
-rw-r--r--arch/arm64/kernel/pi/.gitignore3
-rw-r--r--arch/arm64/kernel/pi/Makefile34
-rw-r--r--arch/arm64/kernel/pi/idreg-override.c427
-rw-r--r--arch/arm64/kernel/pi/kaslr_early.c76
-rw-r--r--arch/arm64/kernel/pi/map_kernel.c289
-rw-r--r--arch/arm64/kernel/pi/map_range.c109
-rw-r--r--arch/arm64/kernel/pi/patch-scs.c (renamed from arch/arm64/kernel/patch-scs.c)127
-rw-r--r--arch/arm64/kernel/pi/pi.h38
-rw-r--r--arch/arm64/kernel/pi/relacheck.c130
-rw-r--r--arch/arm64/kernel/pi/relocate.c64
-rw-r--r--arch/arm64/kernel/probes/decode-insn.c45
-rw-r--r--arch/arm64/kernel/probes/decode-insn.h2
-rw-r--r--arch/arm64/kernel/probes/kprobes.c97
-rw-r--r--arch/arm64/kernel/probes/kprobes_trampoline.S78
-rw-r--r--arch/arm64/kernel/probes/simulate-insn.c74
-rw-r--r--arch/arm64/kernel/probes/simulate-insn.h4
-rw-r--r--arch/arm64/kernel/probes/uprobes.c75
-rw-r--r--arch/arm64/kernel/process.c347
-rw-r--r--arch/arm64/kernel/proton-pack.c288
-rw-r--r--arch/arm64/kernel/psci.c2
-rw-r--r--arch/arm64/kernel/ptrace.c435
-rw-r--r--arch/arm64/kernel/reloc_test_core.c1
-rw-r--r--arch/arm64/kernel/rsi.c175
-rw-r--r--arch/arm64/kernel/sdei.c12
-rw-r--r--arch/arm64/kernel/setup.c77
-rw-r--r--arch/arm64/kernel/signal.c649
-rw-r--r--arch/arm64/kernel/signal32.c17
-rw-r--r--arch/arm64/kernel/sigreturn32.S18
-rw-r--r--arch/arm64/kernel/sleep.S3
-rw-r--r--arch/arm64/kernel/smccc-call.S35
-rw-r--r--arch/arm64/kernel/smp.c465
-rw-r--r--arch/arm64/kernel/stacktrace.c501
-rw-r--r--arch/arm64/kernel/suspend.c16
-rw-r--r--arch/arm64/kernel/sys.c6
-rw-r--r--arch/arm64/kernel/sys32.c17
-rw-r--r--arch/arm64/kernel/sys_compat.c2
-rw-r--r--arch/arm64/kernel/syscall.c28
-rw-r--r--arch/arm64/kernel/topology.c227
-rw-r--r--arch/arm64/kernel/trace-events-emulation.h2
-rw-r--r--arch/arm64/kernel/traps.c215
-rw-r--r--arch/arm64/kernel/vdso.c136
-rw-r--r--arch/arm64/kernel/vdso/Makefile50
-rw-r--r--arch/arm64/kernel/vdso/vdso.lds.S12
-rw-r--r--arch/arm64/kernel/vdso/vgetrandom-chacha.S172
-rw-r--r--arch/arm64/kernel/vdso/vgetrandom.c15
-rw-r--r--arch/arm64/kernel/vdso32/Makefile48
-rw-r--r--arch/arm64/kernel/vdso32/vdso.lds.S9
-rw-r--r--arch/arm64/kernel/vdso32/vgettimeofday.c2
-rw-r--r--arch/arm64/kernel/vmcore_info.c (renamed from arch/arm64/kernel/crash_core.c)5
-rw-r--r--arch/arm64/kernel/vmlinux.lds.S54
-rw-r--r--arch/arm64/kernel/watchdog_hld.c58
95 files changed, 7209 insertions, 4770 deletions
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index d95b3d6b471a..76f32e424065 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -33,8 +33,8 @@ obj-y := debug-monitors.o entry.o irq.o fpsimd.o \
return_address.o cpuinfo.o cpu_errata.o \
cpufeature.o alternative.o cacheinfo.o \
smp.o smp_spin_table.o topology.o smccc-call.o \
- syscall.o proton-pack.o idreg-override.o idle.o \
- patching.o
+ syscall.o proton-pack.o idle.o patching.o pi/ \
+ rsi.o jump_label.o
obj-$(CONFIG_COMPAT) += sys32.o signal32.o \
sys_compat.o
@@ -47,8 +47,6 @@ obj-$(CONFIG_PERF_EVENTS) += perf_regs.o perf_callchain.o
obj-$(CONFIG_HARDLOCKUP_DETECTOR_PERF) += watchdog_hld.o
obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
obj-$(CONFIG_CPU_PM) += sleep.o suspend.o
-obj-$(CONFIG_CPU_IDLE) += cpuidle.o
-obj-$(CONFIG_JUMP_LABEL) += jump_label.o
obj-$(CONFIG_KGDB) += kgdb.o
obj-$(CONFIG_EFI) += efi.o efi-rt-wrapper.o
obj-$(CONFIG_PCI) += pci.o
@@ -57,7 +55,7 @@ obj-$(CONFIG_ACPI) += acpi.o
obj-$(CONFIG_ACPI_NUMA) += acpi_numa.o
obj-$(CONFIG_ARM64_ACPI_PARKING_PROTOCOL) += acpi_parking_protocol.o
obj-$(CONFIG_PARAVIRT) += paravirt.o
-obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o pi/
+obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
obj-$(CONFIG_HIBERNATION) += hibernate.o hibernate-asm.o
obj-$(CONFIG_ELF_CORE) += elfcore.o
obj-$(CONFIG_KEXEC_CORE) += machine_kexec.o relocate_kernel.o \
@@ -66,14 +64,12 @@ obj-$(CONFIG_KEXEC_FILE) += machine_kexec_file.o kexec_image.o
obj-$(CONFIG_ARM64_RELOC_TEST) += arm64-reloc-test.o
arm64-reloc-test-y := reloc_test_core.o reloc_test_syms.o
obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
-obj-$(CONFIG_CRASH_CORE) += crash_core.o
+obj-$(CONFIG_VMCORE_INFO) += vmcore_info.o
obj-$(CONFIG_ARM_SDE_INTERFACE) += sdei.o
obj-$(CONFIG_ARM64_PTR_AUTH) += pointer_auth.o
obj-$(CONFIG_ARM64_MTE) += mte.o
obj-y += vdso-wrap.o
obj-$(CONFIG_COMPAT_VDSO) += vdso32-wrap.o
-obj-$(CONFIG_UNWIND_PATCH_PAC_INTO_SCS) += patch-scs.o
-CFLAGS_patch-scs.o += -mbranch-protection=none
# Force dependency (vdso*-wrap.S includes vdso.so through incbin)
$(obj)/vdso-wrap.o: $(obj)/vdso/vdso.so
@@ -81,10 +77,10 @@ $(obj)/vdso32-wrap.o: $(obj)/vdso32/vdso.so
obj-y += probes/
obj-y += head.o
-extra-y += vmlinux.lds
+always-$(KBUILD_BUILTIN) += vmlinux.lds
ifeq ($(CONFIG_DEBUG_EFI),y)
-AFLAGS_head.o += -DVMLINUX_PATH="\"$(realpath $(objtree)/vmlinux)\""
+AFLAGS_head.o += -DVMLINUX_PATH="\"$(abspath vmlinux)\""
endif
# for cleaning
diff --git a/arch/arm64/kernel/Makefile.syscalls b/arch/arm64/kernel/Makefile.syscalls
new file mode 100644
index 000000000000..0542a718871a
--- /dev/null
+++ b/arch/arm64/kernel/Makefile.syscalls
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0
+
+syscall_abis_32 +=
+syscall_abis_64 += renameat rlimit memfd_secret
+
+syscalltbl = arch/arm64/tools/syscall_%.tbl
diff --git a/arch/arm64/kernel/acpi.c b/arch/arm64/kernel/acpi.c
index dba8fcec7f33..af90128cfed5 100644
--- a/arch/arm64/kernel/acpi.c
+++ b/arch/arm64/kernel/acpi.c
@@ -26,9 +26,11 @@
#include <linux/libfdt.h>
#include <linux/smp.h>
#include <linux/serial_core.h>
+#include <linux/suspend.h>
#include <linux/pgtable.h>
#include <acpi/ghes.h>
+#include <acpi/processor.h>
#include <asm/cputype.h>
#include <asm/cpu_ops.h>
#include <asm/daifflags.h>
@@ -44,6 +46,7 @@ EXPORT_SYMBOL(acpi_pci_disabled);
static bool param_acpi_off __initdata;
static bool param_acpi_on __initdata;
static bool param_acpi_force __initdata;
+static bool param_acpi_nospcr __initdata;
static int __init parse_acpi(char *arg)
{
@@ -57,6 +60,8 @@ static int __init parse_acpi(char *arg)
param_acpi_on = true;
else if (strcmp(arg, "force") == 0) /* force ACPI to be enabled */
param_acpi_force = true;
+ else if (strcmp(arg, "nospcr") == 0) /* disable SPCR as default console */
+ param_acpi_nospcr = true;
else
return -EINVAL; /* Core will print when we return error */
@@ -128,7 +133,7 @@ static int __init acpi_fadt_sanity_check(void)
/*
* FADT is required on arm64; retrieve it to check its presence
- * and carry out revision and ACPI HW reduced compliancy tests
+ * and carry out revision and ACPI HW reduced compliance tests
*/
status = acpi_get_table(ACPI_SIG_FADT, 0, &table);
if (ACPI_FAILURE(status)) {
@@ -227,7 +232,27 @@ done:
if (earlycon_acpi_spcr_enable)
early_init_dt_scan_chosen_stdout();
} else {
- acpi_parse_spcr(earlycon_acpi_spcr_enable, true);
+#ifdef CONFIG_HIBERNATION
+ struct acpi_table_header *facs = NULL;
+ acpi_get_table(ACPI_SIG_FACS, 1, &facs);
+ if (facs) {
+ swsusp_hardware_signature =
+ ((struct acpi_table_facs *)facs)->hardware_signature;
+ acpi_put_table(facs);
+ }
+#endif
+
+ /*
+ * For varying privacy and security reasons, sometimes need
+ * to completely silence the serial console output, and only
+ * enable it when needed.
+ * But there are many existing systems that depend on this
+ * behaviour, use acpi=nospcr to disable console in ACPI SPCR
+ * table as default serial console.
+ */
+ acpi_parse_spcr(earlycon_acpi_spcr_enable,
+ !param_acpi_nospcr);
+
if (IS_ENABLED(CONFIG_ACPI_BGRT))
acpi_table_parse(ACPI_SIG_BGRT, acpi_parse_bgrt);
}
@@ -352,7 +377,7 @@ void __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size)
prot = __acpi_get_writethrough_mem_attribute();
}
}
- return ioremap_prot(phys, size, pgprot_val(prot));
+ return ioremap_prot(phys, size, prot);
}
/*
@@ -376,7 +401,7 @@ int apei_claim_sea(struct pt_regs *regs)
return_to_irqs_enabled = !irqs_disabled_flags(arch_local_save_flags());
if (regs)
- return_to_irqs_enabled = interrupts_enabled(regs);
+ return_to_irqs_enabled = !regs_irqs_disabled(regs);
/*
* SEA can interrupt SError, mask it and describe this as an NMI so
@@ -398,7 +423,7 @@ int apei_claim_sea(struct pt_regs *regs)
irq_work_run();
__irq_exit();
} else {
- pr_warn_ratelimited("APEI work queued but not completed");
+ pr_warn_ratelimited("APEI work queued but not completed\n");
err = -EINPROGRESS;
}
}
@@ -413,107 +438,23 @@ void arch_reserve_mem_area(acpi_physical_address addr, size_t size)
memblock_mark_nomap(addr, size);
}
-#ifdef CONFIG_ACPI_FFH
-/*
- * Implements ARM64 specific callbacks to support ACPI FFH Operation Region as
- * specified in https://developer.arm.com/docs/den0048/latest
- */
-struct acpi_ffh_data {
- struct acpi_ffh_info info;
- void (*invoke_ffh_fn)(unsigned long a0, unsigned long a1,
- unsigned long a2, unsigned long a3,
- unsigned long a4, unsigned long a5,
- unsigned long a6, unsigned long a7,
- struct arm_smccc_res *args,
- struct arm_smccc_quirk *res);
- void (*invoke_ffh64_fn)(const struct arm_smccc_1_2_regs *args,
- struct arm_smccc_1_2_regs *res);
-};
-
-int acpi_ffh_address_space_arch_setup(void *handler_ctxt, void **region_ctxt)
+#ifdef CONFIG_ACPI_HOTPLUG_CPU
+int acpi_map_cpu(acpi_handle handle, phys_cpuid_t physid, u32 apci_id,
+ int *pcpu)
{
- enum arm_smccc_conduit conduit;
- struct acpi_ffh_data *ffh_ctxt;
-
- if (arm_smccc_get_version() < ARM_SMCCC_VERSION_1_2)
- return -EOPNOTSUPP;
-
- conduit = arm_smccc_1_1_get_conduit();
- if (conduit == SMCCC_CONDUIT_NONE) {
- pr_err("%s: invalid SMCCC conduit\n", __func__);
- return -EOPNOTSUPP;
+ /* If an error code is passed in this stub can't fix it */
+ if (*pcpu < 0) {
+ pr_warn_once("Unable to map CPU to valid ID\n");
+ return *pcpu;
}
- ffh_ctxt = kzalloc(sizeof(*ffh_ctxt), GFP_KERNEL);
- if (!ffh_ctxt)
- return -ENOMEM;
-
- if (conduit == SMCCC_CONDUIT_SMC) {
- ffh_ctxt->invoke_ffh_fn = __arm_smccc_smc;
- ffh_ctxt->invoke_ffh64_fn = arm_smccc_1_2_smc;
- } else {
- ffh_ctxt->invoke_ffh_fn = __arm_smccc_hvc;
- ffh_ctxt->invoke_ffh64_fn = arm_smccc_1_2_hvc;
- }
-
- memcpy(ffh_ctxt, handler_ctxt, sizeof(ffh_ctxt->info));
-
- *region_ctxt = ffh_ctxt;
- return AE_OK;
-}
-
-static bool acpi_ffh_smccc_owner_allowed(u32 fid)
-{
- int owner = ARM_SMCCC_OWNER_NUM(fid);
-
- if (owner == ARM_SMCCC_OWNER_STANDARD ||
- owner == ARM_SMCCC_OWNER_SIP || owner == ARM_SMCCC_OWNER_OEM)
- return true;
-
- return false;
+ return 0;
}
+EXPORT_SYMBOL(acpi_map_cpu);
-int acpi_ffh_address_space_arch_handler(acpi_integer *value, void *region_context)
+int acpi_unmap_cpu(int cpu)
{
- int ret = 0;
- struct acpi_ffh_data *ffh_ctxt = region_context;
-
- if (ffh_ctxt->info.offset == 0) {
- /* SMC/HVC 32bit call */
- struct arm_smccc_res res;
- u32 a[8] = { 0 }, *ptr = (u32 *)value;
-
- if (!ARM_SMCCC_IS_FAST_CALL(*ptr) || ARM_SMCCC_IS_64(*ptr) ||
- !acpi_ffh_smccc_owner_allowed(*ptr) ||
- ffh_ctxt->info.length > 32) {
- ret = AE_ERROR;
- } else {
- int idx, len = ffh_ctxt->info.length >> 2;
-
- for (idx = 0; idx < len; idx++)
- a[idx] = *(ptr + idx);
-
- ffh_ctxt->invoke_ffh_fn(a[0], a[1], a[2], a[3], a[4],
- a[5], a[6], a[7], &res, NULL);
- memcpy(value, &res, sizeof(res));
- }
-
- } else if (ffh_ctxt->info.offset == 1) {
- /* SMC/HVC 64bit call */
- struct arm_smccc_1_2_regs *r = (struct arm_smccc_1_2_regs *)value;
-
- if (!ARM_SMCCC_IS_FAST_CALL(r->a0) || !ARM_SMCCC_IS_64(r->a0) ||
- !acpi_ffh_smccc_owner_allowed(r->a0) ||
- ffh_ctxt->info.length > sizeof(*r)) {
- ret = AE_ERROR;
- } else {
- ffh_ctxt->invoke_ffh64_fn(r, r);
- memcpy(value, r, ffh_ctxt->info.length);
- }
- } else {
- ret = AE_ERROR;
- }
-
- return ret;
+ return 0;
}
-#endif /* CONFIG_ACPI_FFH */
+EXPORT_SYMBOL(acpi_unmap_cpu);
+#endif /* CONFIG_ACPI_HOTPLUG_CPU */
diff --git a/arch/arm64/kernel/acpi_numa.c b/arch/arm64/kernel/acpi_numa.c
index e51535a5f939..2465f291c7e1 100644
--- a/arch/arm64/kernel/acpi_numa.c
+++ b/arch/arm64/kernel/acpi_numa.c
@@ -27,24 +27,13 @@
#include <asm/numa.h>
-static int acpi_early_node_map[NR_CPUS] __initdata = { NUMA_NO_NODE };
+static int acpi_early_node_map[NR_CPUS] __initdata = { [0 ... NR_CPUS - 1] = NUMA_NO_NODE };
int __init acpi_numa_get_nid(unsigned int cpu)
{
return acpi_early_node_map[cpu];
}
-static inline int get_cpu_for_acpi_id(u32 uid)
-{
- int cpu;
-
- for (cpu = 0; cpu < nr_cpu_ids; cpu++)
- if (uid == get_acpi_id_for_cpu(cpu))
- return cpu;
-
- return -EINVAL;
-}
-
static int __init acpi_parse_gicc_pxm(union acpi_subtable_headers *header,
const unsigned long end)
{
diff --git a/arch/arm64/kernel/acpi_parking_protocol.c b/arch/arm64/kernel/acpi_parking_protocol.c
index b1990e38aed0..e1be29e608b7 100644
--- a/arch/arm64/kernel/acpi_parking_protocol.c
+++ b/arch/arm64/kernel/acpi_parking_protocol.c
@@ -103,7 +103,7 @@ static int acpi_parking_protocol_cpu_boot(unsigned int cpu)
&mailbox->entry_point);
writel_relaxed(cpu_entry->gic_cpu_id, &mailbox->cpu_id);
- arch_send_wakeup_ipi_mask(cpumask_of(cpu));
+ arch_send_wakeup_ipi(cpu);
return 0;
}
diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c
index 8ff6610af496..f5ec7e7c1d3f 100644
--- a/arch/arm64/kernel/alternative.c
+++ b/arch/arm64/kernel/alternative.c
@@ -139,9 +139,9 @@ static noinstr void clean_dcache_range_nopatch(u64 start, u64 end)
} while (cur += d_size, cur < end);
}
-static void __apply_alternatives(const struct alt_region *region,
- bool is_module,
- unsigned long *cpucap_mask)
+static int __apply_alternatives(const struct alt_region *region,
+ bool is_module,
+ unsigned long *cpucap_mask)
{
struct alt_instr *alt;
__le32 *origptr, *updptr;
@@ -166,10 +166,13 @@ static void __apply_alternatives(const struct alt_region *region,
updptr = is_module ? origptr : lm_alias(origptr);
nr_inst = alt->orig_len / AARCH64_INSN_SIZE;
- if (ALT_HAS_CB(alt))
+ if (ALT_HAS_CB(alt)) {
alt_cb = ALT_REPL_PTR(alt);
- else
+ if (is_module && !core_kernel_text((unsigned long)alt_cb))
+ return -ENOEXEC;
+ } else {
alt_cb = patch_alternative;
+ }
alt_cb(alt, origptr, updptr, nr_inst);
@@ -193,6 +196,8 @@ static void __apply_alternatives(const struct alt_region *region,
bitmap_and(applied_alternatives, applied_alternatives,
system_cpucaps, ARM64_NCAPS);
}
+
+ return 0;
}
static void __init apply_alternatives_vdso(void)
@@ -277,7 +282,7 @@ void __init apply_boot_alternatives(void)
}
#ifdef CONFIG_MODULES
-void apply_alternatives_module(void *start, size_t length)
+int apply_alternatives_module(void *start, size_t length)
{
struct alt_region region = {
.begin = start,
@@ -287,7 +292,7 @@ void apply_alternatives_module(void *start, size_t length)
bitmap_fill(all_capabilities, ARM64_NCAPS);
- __apply_alternatives(&region, true, &all_capabilities[0]);
+ return __apply_alternatives(&region, true, &all_capabilities[0]);
}
#endif
diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c
index e459cfd33711..e737c6295ec7 100644
--- a/arch/arm64/kernel/armv8_deprecated.c
+++ b/arch/arm64/kernel/armv8_deprecated.c
@@ -52,10 +52,8 @@ struct insn_emulation {
int min;
int max;
- /*
- * sysctl for this emulation + a sentinal entry.
- */
- struct ctl_table sysctl[2];
+ /* sysctl for this emulation */
+ struct ctl_table sysctl;
};
#define ARM_OPCODE_CONDTEST_FAIL 0
@@ -464,6 +462,9 @@ static int run_all_insn_set_hw_mode(unsigned int cpu)
for (int i = 0; i < ARRAY_SIZE(insn_emulations); i++) {
struct insn_emulation *insn = insn_emulations[i];
bool enable = READ_ONCE(insn->current_mode) == INSN_HW;
+ if (insn->status == INSN_UNAVAILABLE)
+ continue;
+
if (insn->set_hw_mode && insn->set_hw_mode(enable)) {
pr_warn("CPU[%u] cannot support the emulation of %s",
cpu, insn->name);
@@ -506,7 +507,7 @@ static int update_insn_emulation_mode(struct insn_emulation *insn,
return ret;
}
-static int emulation_proc_handler(struct ctl_table *table, int write,
+static int emulation_proc_handler(const struct ctl_table *table, int write,
void *buffer, size_t *lenp,
loff_t *ppos)
{
@@ -558,7 +559,7 @@ static void __init register_insn_emulation(struct insn_emulation *insn)
update_insn_emulation_mode(insn, INSN_UNDEF);
if (insn->status != INSN_UNAVAILABLE) {
- sysctl = &insn->sysctl[0];
+ sysctl = &insn->sysctl;
sysctl->mode = 0644;
sysctl->maxlen = sizeof(int);
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 5ff1942b04fc..b6367ff3a49c 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -6,21 +6,19 @@
* 2001-2002 Keith Owens
* Copyright (C) 2012 ARM Ltd.
*/
+#define COMPILE_OFFSETS
#include <linux/arm_sdei.h>
#include <linux/sched.h>
#include <linux/ftrace.h>
#include <linux/kexec.h>
#include <linux/mm.h>
-#include <linux/dma-mapping.h>
#include <linux/kvm_host.h>
-#include <linux/preempt.h>
#include <linux/suspend.h>
#include <asm/cpufeature.h>
#include <asm/fixmap.h>
#include <asm/thread_info.h>
#include <asm/memory.h>
-#include <asm/signal32.h>
#include <asm/smp_plat.h>
#include <asm/suspend.h>
#include <linux/kbuild.h>
@@ -28,8 +26,6 @@
int main(void)
{
- DEFINE(TSK_ACTIVE_MM, offsetof(struct task_struct, active_mm));
- BLANK();
DEFINE(TSK_TI_CPU, offsetof(struct task_struct, thread_info.cpu));
DEFINE(TSK_TI_FLAGS, offsetof(struct task_struct, thread_info.flags));
DEFINE(TSK_TI_PREEMPT, offsetof(struct task_struct, thread_info.preempt_count));
@@ -75,51 +71,31 @@ int main(void)
DEFINE(S_FP, offsetof(struct pt_regs, regs[29]));
DEFINE(S_LR, offsetof(struct pt_regs, regs[30]));
DEFINE(S_SP, offsetof(struct pt_regs, sp));
- DEFINE(S_PSTATE, offsetof(struct pt_regs, pstate));
DEFINE(S_PC, offsetof(struct pt_regs, pc));
+ DEFINE(S_PSTATE, offsetof(struct pt_regs, pstate));
DEFINE(S_SYSCALLNO, offsetof(struct pt_regs, syscallno));
DEFINE(S_SDEI_TTBR1, offsetof(struct pt_regs, sdei_ttbr1));
- DEFINE(S_PMR_SAVE, offsetof(struct pt_regs, pmr_save));
+ DEFINE(S_PMR, offsetof(struct pt_regs, pmr));
DEFINE(S_STACKFRAME, offsetof(struct pt_regs, stackframe));
+ DEFINE(S_STACKFRAME_TYPE, offsetof(struct pt_regs, stackframe.type));
DEFINE(PT_REGS_SIZE, sizeof(struct pt_regs));
BLANK();
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_ARGS
- DEFINE(FREGS_X0, offsetof(struct ftrace_regs, regs[0]));
- DEFINE(FREGS_X2, offsetof(struct ftrace_regs, regs[2]));
- DEFINE(FREGS_X4, offsetof(struct ftrace_regs, regs[4]));
- DEFINE(FREGS_X6, offsetof(struct ftrace_regs, regs[6]));
- DEFINE(FREGS_X8, offsetof(struct ftrace_regs, regs[8]));
- DEFINE(FREGS_FP, offsetof(struct ftrace_regs, fp));
- DEFINE(FREGS_LR, offsetof(struct ftrace_regs, lr));
- DEFINE(FREGS_SP, offsetof(struct ftrace_regs, sp));
- DEFINE(FREGS_PC, offsetof(struct ftrace_regs, pc));
+ DEFINE(FREGS_X0, offsetof(struct __arch_ftrace_regs, regs[0]));
+ DEFINE(FREGS_X2, offsetof(struct __arch_ftrace_regs, regs[2]));
+ DEFINE(FREGS_X4, offsetof(struct __arch_ftrace_regs, regs[4]));
+ DEFINE(FREGS_X6, offsetof(struct __arch_ftrace_regs, regs[6]));
+ DEFINE(FREGS_X8, offsetof(struct __arch_ftrace_regs, regs[8]));
+ DEFINE(FREGS_FP, offsetof(struct __arch_ftrace_regs, fp));
+ DEFINE(FREGS_LR, offsetof(struct __arch_ftrace_regs, lr));
+ DEFINE(FREGS_SP, offsetof(struct __arch_ftrace_regs, sp));
+ DEFINE(FREGS_PC, offsetof(struct __arch_ftrace_regs, pc));
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
- DEFINE(FREGS_DIRECT_TRAMP, offsetof(struct ftrace_regs, direct_tramp));
+ DEFINE(FREGS_DIRECT_TRAMP, offsetof(struct __arch_ftrace_regs, direct_tramp));
#endif
- DEFINE(FREGS_SIZE, sizeof(struct ftrace_regs));
+ DEFINE(FREGS_SIZE, sizeof(struct __arch_ftrace_regs));
BLANK();
#endif
-#ifdef CONFIG_COMPAT
- DEFINE(COMPAT_SIGFRAME_REGS_OFFSET, offsetof(struct compat_sigframe, uc.uc_mcontext.arm_r0));
- DEFINE(COMPAT_RT_SIGFRAME_REGS_OFFSET, offsetof(struct compat_rt_sigframe, sig.uc.uc_mcontext.arm_r0));
- BLANK();
-#endif
- DEFINE(MM_CONTEXT_ID, offsetof(struct mm_struct, context.id.counter));
- BLANK();
- DEFINE(VMA_VM_MM, offsetof(struct vm_area_struct, vm_mm));
- DEFINE(VMA_VM_FLAGS, offsetof(struct vm_area_struct, vm_flags));
- BLANK();
- DEFINE(VM_EXEC, VM_EXEC);
- BLANK();
- DEFINE(PAGE_SZ, PAGE_SIZE);
- BLANK();
- DEFINE(DMA_TO_DEVICE, DMA_TO_DEVICE);
- DEFINE(DMA_FROM_DEVICE, DMA_FROM_DEVICE);
- BLANK();
- DEFINE(PREEMPT_DISABLE_OFFSET, PREEMPT_DISABLE_OFFSET);
- DEFINE(SOFTIRQ_SHIFT, SOFTIRQ_SHIFT);
- DEFINE(IRQ_CPUSTAT_SOFTIRQ_PENDING, offsetof(irq_cpustat_t, __softirq_pending));
- BLANK();
DEFINE(CPU_BOOT_TASK, offsetof(struct secondary_data, task));
BLANK();
DEFINE(FTR_OVR_VAL_OFFSET, offsetof(struct arm64_ftr_override, val));
@@ -130,6 +106,7 @@ int main(void)
DEFINE(VCPU_FAULT_DISR, offsetof(struct kvm_vcpu, arch.fault.disr_el1));
DEFINE(VCPU_HCR_EL2, offsetof(struct kvm_vcpu, arch.hcr_el2));
DEFINE(CPU_USER_PT_REGS, offsetof(struct kvm_cpu_context, regs));
+ DEFINE(CPU_ELR_EL2, offsetof(struct kvm_cpu_context, sys_regs[ELR_EL2]));
DEFINE(CPU_RGSR_EL1, offsetof(struct kvm_cpu_context, sys_regs[RGSR_EL1]));
DEFINE(CPU_GCR_EL1, offsetof(struct kvm_cpu_context, sys_regs[GCR_EL1]));
DEFINE(CPU_APIAKEYLO_EL1, offsetof(struct kvm_cpu_context, sys_regs[APIAKEYLO_EL1]));
@@ -147,6 +124,7 @@ int main(void)
DEFINE(NVHE_INIT_HCR_EL2, offsetof(struct kvm_nvhe_init_params, hcr_el2));
DEFINE(NVHE_INIT_VTTBR, offsetof(struct kvm_nvhe_init_params, vttbr));
DEFINE(NVHE_INIT_VTCR, offsetof(struct kvm_nvhe_init_params, vtcr));
+ DEFINE(NVHE_INIT_TMP, offsetof(struct kvm_nvhe_init_params, tmp));
#endif
#ifdef CONFIG_CPU_PM
DEFINE(CPU_CTX_SP, offsetof(struct cpu_suspend_ctx, sp));
@@ -202,20 +180,10 @@ int main(void)
DEFINE(FTRACE_OPS_FUNC, offsetof(struct ftrace_ops, func));
#endif
BLANK();
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
- DEFINE(FGRET_REGS_X0, offsetof(struct fgraph_ret_regs, regs[0]));
- DEFINE(FGRET_REGS_X1, offsetof(struct fgraph_ret_regs, regs[1]));
- DEFINE(FGRET_REGS_X2, offsetof(struct fgraph_ret_regs, regs[2]));
- DEFINE(FGRET_REGS_X3, offsetof(struct fgraph_ret_regs, regs[3]));
- DEFINE(FGRET_REGS_X4, offsetof(struct fgraph_ret_regs, regs[4]));
- DEFINE(FGRET_REGS_X5, offsetof(struct fgraph_ret_regs, regs[5]));
- DEFINE(FGRET_REGS_X6, offsetof(struct fgraph_ret_regs, regs[6]));
- DEFINE(FGRET_REGS_X7, offsetof(struct fgraph_ret_regs, regs[7]));
- DEFINE(FGRET_REGS_FP, offsetof(struct fgraph_ret_regs, fp));
- DEFINE(FGRET_REGS_SIZE, sizeof(struct fgraph_ret_regs));
-#endif
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
DEFINE(FTRACE_OPS_DIRECT_CALL, offsetof(struct ftrace_ops, direct_call));
#endif
+ DEFINE(PIE_E0_ASM, PIE_E0);
+ DEFINE(PIE_E1_ASM, PIE_E1);
return 0;
}
diff --git a/arch/arm64/kernel/cacheinfo.c b/arch/arm64/kernel/cacheinfo.c
index d9c9218fa1fd..309942b06c5b 100644
--- a/arch/arm64/kernel/cacheinfo.c
+++ b/arch/arm64/kernel/cacheinfo.c
@@ -101,16 +101,18 @@ int populate_cache_leaves(unsigned int cpu)
unsigned int level, idx;
enum cache_type type;
struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
- struct cacheinfo *this_leaf = this_cpu_ci->info_list;
+ struct cacheinfo *infos = this_cpu_ci->info_list;
for (idx = 0, level = 1; level <= this_cpu_ci->num_levels &&
- idx < this_cpu_ci->num_leaves; idx++, level++) {
+ idx < this_cpu_ci->num_leaves; level++) {
type = get_cache_type(level);
if (type == CACHE_TYPE_SEPARATE) {
- ci_leaf_init(this_leaf++, CACHE_TYPE_DATA, level);
- ci_leaf_init(this_leaf++, CACHE_TYPE_INST, level);
+ if (idx + 1 >= this_cpu_ci->num_leaves)
+ break;
+ ci_leaf_init(&infos[idx++], CACHE_TYPE_DATA, level);
+ ci_leaf_init(&infos[idx++], CACHE_TYPE_INST, level);
} else {
- ci_leaf_init(this_leaf++, type, level);
+ ci_leaf_init(&infos[idx++], type, level);
}
}
return 0;
diff --git a/arch/arm64/kernel/compat_alignment.c b/arch/arm64/kernel/compat_alignment.c
index deff21bfa680..b68e1d328d4c 100644
--- a/arch/arm64/kernel/compat_alignment.c
+++ b/arch/arm64/kernel/compat_alignment.c
@@ -368,6 +368,8 @@ int do_compat_alignment_fixup(unsigned long addr, struct pt_regs *regs)
return 1;
}
+ if (!handler)
+ return 1;
type = handler(addr, instr, regs);
if (type == TYPE_ERROR || type == TYPE_FAULT)
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index be66e94a21bd..8cb3b575a031 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -14,31 +14,85 @@
#include <asm/kvm_asm.h>
#include <asm/smp_plat.h>
+static u64 target_impl_cpu_num;
+static struct target_impl_cpu *target_impl_cpus;
+
+bool cpu_errata_set_target_impl(u64 num, void *impl_cpus)
+{
+ if (target_impl_cpu_num || !num || !impl_cpus)
+ return false;
+
+ target_impl_cpu_num = num;
+ target_impl_cpus = impl_cpus;
+ return true;
+}
+
+static inline bool is_midr_in_range(struct midr_range const *range)
+{
+ int i;
+
+ if (!target_impl_cpu_num)
+ return midr_is_cpu_model_range(read_cpuid_id(), range->model,
+ range->rv_min, range->rv_max);
+
+ for (i = 0; i < target_impl_cpu_num; i++) {
+ if (midr_is_cpu_model_range(target_impl_cpus[i].midr,
+ range->model,
+ range->rv_min, range->rv_max))
+ return true;
+ }
+ return false;
+}
+
+bool is_midr_in_range_list(struct midr_range const *ranges)
+{
+ while (ranges->model)
+ if (is_midr_in_range(ranges++))
+ return true;
+ return false;
+}
+EXPORT_SYMBOL_GPL(is_midr_in_range_list);
+
static bool __maybe_unused
-is_affected_midr_range(const struct arm64_cpu_capabilities *entry, int scope)
+__is_affected_midr_range(const struct arm64_cpu_capabilities *entry,
+ u32 midr, u32 revidr)
{
const struct arm64_midr_revidr *fix;
- u32 midr = read_cpuid_id(), revidr;
-
- WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible());
- if (!is_midr_in_range(midr, &entry->midr_range))
+ if (!is_midr_in_range(&entry->midr_range))
return false;
midr &= MIDR_REVISION_MASK | MIDR_VARIANT_MASK;
- revidr = read_cpuid(REVIDR_EL1);
for (fix = entry->fixed_revs; fix && fix->revidr_mask; fix++)
if (midr == fix->midr_rv && (revidr & fix->revidr_mask))
return false;
-
return true;
}
static bool __maybe_unused
+is_affected_midr_range(const struct arm64_cpu_capabilities *entry, int scope)
+{
+ int i;
+
+ if (!target_impl_cpu_num) {
+ WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible());
+ return __is_affected_midr_range(entry, read_cpuid_id(),
+ read_cpuid(REVIDR_EL1));
+ }
+
+ for (i = 0; i < target_impl_cpu_num; i++) {
+ if (__is_affected_midr_range(entry, target_impl_cpus[i].midr,
+ target_impl_cpus[i].midr))
+ return true;
+ }
+ return false;
+}
+
+static bool __maybe_unused
is_affected_midr_range_list(const struct arm64_cpu_capabilities *entry,
int scope)
{
WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible());
- return is_midr_in_range_list(read_cpuid_id(), entry->midr_range_list);
+ return is_midr_in_range_list(entry->midr_range_list);
}
static bool __maybe_unused
@@ -121,22 +175,6 @@ cpu_enable_cache_maint_trap(const struct arm64_cpu_capabilities *__unused)
sysreg_clear_set(sctlr_el1, SCTLR_EL1_UCI, 0);
}
-static DEFINE_RAW_SPINLOCK(reg_user_mask_modification);
-static void __maybe_unused
-cpu_clear_bf16_from_user_emulation(const struct arm64_cpu_capabilities *__unused)
-{
- struct arm64_ftr_reg *regp;
-
- regp = get_arm64_ftr_reg(SYS_ID_AA64ISAR1_EL1);
- if (!regp)
- return;
-
- raw_spin_lock(&reg_user_mask_modification);
- if (regp->user_mask & ID_AA64ISAR1_EL1_BF16_MASK)
- regp->user_mask &= ~ID_AA64ISAR1_EL1_BF16_MASK;
- raw_spin_unlock(&reg_user_mask_modification);
-}
-
#define CAP_MIDR_RANGE(model, v_min, r_min, v_max, r_max) \
.matches = is_affected_midr_range, \
.midr_range = MIDR_RANGE(model, v_min, r_min, v_max, r_max)
@@ -202,12 +240,48 @@ static bool __maybe_unused
has_neoverse_n1_erratum_1542419(const struct arm64_cpu_capabilities *entry,
int scope)
{
- u32 midr = read_cpuid_id();
bool has_dic = read_cpuid_cachetype() & BIT(CTR_EL0_DIC_SHIFT);
const struct midr_range range = MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N1);
WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible());
- return is_midr_in_range(midr, &range) && has_dic;
+ return is_midr_in_range(&range) && has_dic;
+}
+
+static const struct midr_range impdef_pmuv3_cpus[] = {
+ MIDR_ALL_VERSIONS(MIDR_APPLE_M1_ICESTORM),
+ MIDR_ALL_VERSIONS(MIDR_APPLE_M1_FIRESTORM),
+ MIDR_ALL_VERSIONS(MIDR_APPLE_M1_ICESTORM_PRO),
+ MIDR_ALL_VERSIONS(MIDR_APPLE_M1_FIRESTORM_PRO),
+ MIDR_ALL_VERSIONS(MIDR_APPLE_M1_ICESTORM_MAX),
+ MIDR_ALL_VERSIONS(MIDR_APPLE_M1_FIRESTORM_MAX),
+ MIDR_ALL_VERSIONS(MIDR_APPLE_M2_BLIZZARD),
+ MIDR_ALL_VERSIONS(MIDR_APPLE_M2_AVALANCHE),
+ MIDR_ALL_VERSIONS(MIDR_APPLE_M2_BLIZZARD_PRO),
+ MIDR_ALL_VERSIONS(MIDR_APPLE_M2_AVALANCHE_PRO),
+ MIDR_ALL_VERSIONS(MIDR_APPLE_M2_BLIZZARD_MAX),
+ MIDR_ALL_VERSIONS(MIDR_APPLE_M2_AVALANCHE_MAX),
+ {},
+};
+
+static bool has_impdef_pmuv3(const struct arm64_cpu_capabilities *entry, int scope)
+{
+ u64 dfr0 = read_sanitised_ftr_reg(SYS_ID_AA64DFR0_EL1);
+ unsigned int pmuver;
+
+ if (!is_kernel_in_hyp_mode())
+ return false;
+
+ pmuver = cpuid_feature_extract_unsigned_field(dfr0,
+ ID_AA64DFR0_EL1_PMUVer_SHIFT);
+ if (pmuver != ID_AA64DFR0_EL1_PMUVer_IMP_DEF)
+ return false;
+
+ return is_midr_in_range_list(impdef_pmuv3_cpus);
+}
+
+static void cpu_enable_impdef_pmuv3_traps(const struct arm64_cpu_capabilities *__unused)
+{
+ sysreg_clear_set_s(SYS_HACR_EL2, 0, BIT(56));
}
#ifdef CONFIG_ARM64_WORKAROUND_REPEAT_TLBI
@@ -261,7 +335,7 @@ static const struct midr_range cavium_erratum_23154_cpus[] = {
#endif
#ifdef CONFIG_CAVIUM_ERRATUM_27456
-const struct midr_range cavium_erratum_27456_cpus[] = {
+static const struct midr_range cavium_erratum_27456_cpus[] = {
/* Cavium ThunderX, T88 pass 1.x - 2.1 */
MIDR_RANGE(MIDR_THUNDERX, 0, 0, 1, 1),
/* Cavium ThunderX, T81 pass 1.0 */
@@ -390,6 +464,7 @@ static const struct midr_range erratum_1463225[] = {
static const struct midr_range trbe_overwrite_fill_mode_cpus[] = {
#ifdef CONFIG_ARM64_ERRATUM_2139208
MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2),
+ MIDR_ALL_VERSIONS(MIDR_MICROSOFT_AZURE_COBALT_100),
#endif
#ifdef CONFIG_ARM64_ERRATUM_2119858
MIDR_ALL_VERSIONS(MIDR_CORTEX_A710),
@@ -403,6 +478,7 @@ static const struct midr_range trbe_overwrite_fill_mode_cpus[] = {
static const struct midr_range tsb_flush_fail_cpus[] = {
#ifdef CONFIG_ARM64_ERRATUM_2067961
MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2),
+ MIDR_ALL_VERSIONS(MIDR_MICROSOFT_AZURE_COBALT_100),
#endif
#ifdef CONFIG_ARM64_ERRATUM_2054223
MIDR_ALL_VERSIONS(MIDR_CORTEX_A710),
@@ -415,6 +491,7 @@ static const struct midr_range tsb_flush_fail_cpus[] = {
static struct midr_range trbe_write_out_of_range_cpus[] = {
#ifdef CONFIG_ARM64_ERRATUM_2253138
MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2),
+ MIDR_ALL_VERSIONS(MIDR_MICROSOFT_AZURE_COBALT_100),
#endif
#ifdef CONFIG_ARM64_ERRATUM_2224489
MIDR_ALL_VERSIONS(MIDR_CORTEX_A710),
@@ -432,6 +509,63 @@ static struct midr_range broken_aarch32_aes[] = {
};
#endif /* CONFIG_ARM64_WORKAROUND_TRBE_WRITE_OUT_OF_RANGE */
+#ifdef CONFIG_ARM64_WORKAROUND_SPECULATIVE_UNPRIV_LOAD
+static const struct midr_range erratum_spec_unpriv_load_list[] = {
+#ifdef CONFIG_ARM64_ERRATUM_3117295
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A510),
+#endif
+#ifdef CONFIG_ARM64_ERRATUM_2966298
+ /* Cortex-A520 r0p0 to r0p1 */
+ MIDR_REV_RANGE(MIDR_CORTEX_A520, 0, 0, 1),
+#endif
+ {},
+};
+#endif
+
+#ifdef CONFIG_ARM64_ERRATUM_3194386
+static const struct midr_range erratum_spec_ssbs_list[] = {
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A76),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A77),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A78),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A78C),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A710),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A715),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A720),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A720AE),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A725),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_X1),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_X1C),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_X2),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_X3),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_X4),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_X925),
+ MIDR_ALL_VERSIONS(MIDR_MICROSOFT_AZURE_COBALT_100),
+ MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N1),
+ MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2),
+ MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N3),
+ MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V1),
+ MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V2),
+ MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V3),
+ MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V3AE),
+ {}
+};
+#endif
+
+#ifdef CONFIG_AMPERE_ERRATUM_AC03_CPU_38
+static const struct midr_range erratum_ac03_cpu_38_list[] = {
+ MIDR_ALL_VERSIONS(MIDR_AMPERE1),
+ MIDR_ALL_VERSIONS(MIDR_AMPERE1A),
+ {},
+};
+#endif
+
+#ifdef CONFIG_AMPERE_ERRATUM_AC04_CPU_23
+static const struct midr_range erratum_ac04_cpu_23_list[] = {
+ MIDR_ALL_VERSIONS(MIDR_AMPERE1A),
+ {},
+};
+#endif
+
const struct arm64_cpu_capabilities arm64_errata[] = {
#ifdef CONFIG_ARM64_WORKAROUND_CLEAN_CACHE
{
@@ -727,16 +861,52 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
/* Cortex-A510 r0p0 - r1p1 */
ERRATA_MIDR_RANGE(MIDR_CORTEX_A510, 0, 0, 1, 1),
MIDR_FIXED(MIDR_CPU_VAR_REV(1,1), BIT(25)),
- .cpu_enable = cpu_clear_bf16_from_user_emulation,
+ },
+#endif
+#ifdef CONFIG_ARM64_ERRATUM_3194386
+ {
+ .desc = "SSBS not fully self-synchronizing",
+ .capability = ARM64_WORKAROUND_SPECULATIVE_SSBS,
+ ERRATA_MIDR_RANGE_LIST(erratum_spec_ssbs_list),
+ },
+#endif
+#ifdef CONFIG_ARM64_WORKAROUND_SPECULATIVE_UNPRIV_LOAD
+ {
+ .desc = "ARM errata 2966298, 3117295",
+ .capability = ARM64_WORKAROUND_SPECULATIVE_UNPRIV_LOAD,
+ /* Cortex-A520 r0p0 - r0p1 */
+ ERRATA_MIDR_RANGE_LIST(erratum_spec_unpriv_load_list),
},
#endif
#ifdef CONFIG_AMPERE_ERRATUM_AC03_CPU_38
{
.desc = "AmpereOne erratum AC03_CPU_38",
.capability = ARM64_WORKAROUND_AMPERE_AC03_CPU_38,
- ERRATA_MIDR_ALL_VERSIONS(MIDR_AMPERE1),
+ ERRATA_MIDR_RANGE_LIST(erratum_ac03_cpu_38_list),
+ },
+#endif
+#ifdef CONFIG_AMPERE_ERRATUM_AC04_CPU_23
+ {
+ .desc = "AmpereOne erratum AC04_CPU_23",
+ .capability = ARM64_WORKAROUND_AMPERE_AC04_CPU_23,
+ ERRATA_MIDR_RANGE_LIST(erratum_ac04_cpu_23_list),
},
#endif
{
+ .desc = "Broken CNTVOFF_EL2",
+ .capability = ARM64_WORKAROUND_QCOM_ORYON_CNTVOFF,
+ ERRATA_MIDR_RANGE_LIST(((const struct midr_range[]) {
+ MIDR_ALL_VERSIONS(MIDR_QCOM_ORYON_X1),
+ {}
+ })),
+ },
+ {
+ .desc = "Apple IMPDEF PMUv3 Traps",
+ .capability = ARM64_WORKAROUND_PMUV3_IMPDEF_TRAPS,
+ .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM,
+ .matches = has_impdef_pmuv3,
+ .cpu_enable = cpu_enable_impdef_pmuv3_traps,
+ },
+ {
}
};
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index a5f533f63b60..c840a93b9ef9 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -75,6 +75,7 @@
#include <linux/cpu.h>
#include <linux/kasan.h>
#include <linux/percpu.h>
+#include <linux/sched/isolation.h>
#include <asm/cpu.h>
#include <asm/cpufeature.h>
@@ -83,8 +84,10 @@
#include <asm/hwcap.h>
#include <asm/insn.h>
#include <asm/kvm_host.h>
+#include <asm/mmu.h>
#include <asm/mmu_context.h>
#include <asm/mte.h>
+#include <asm/hypervisor.h>
#include <asm/processor.h>
#include <asm/smp.h>
#include <asm/sysreg.h>
@@ -92,6 +95,7 @@
#include <asm/vectors.h>
#include <asm/virt.h>
+#include <asm/spectre.h>
/* Kernel representation of AT_HWCAP and AT_HWCAP2 */
static DECLARE_BITMAP(elf_hwcap, MAX_CPU_FEATURES) __read_mostly;
@@ -103,6 +107,7 @@ static DECLARE_BITMAP(elf_hwcap, MAX_CPU_FEATURES) __read_mostly;
COMPAT_HWCAP_LPAE)
unsigned int compat_elf_hwcap __read_mostly = COMPAT_ELF_HWCAP_DEFAULT;
unsigned int compat_elf_hwcap2 __read_mostly;
+unsigned int compat_elf_hwcap3 __read_mostly;
#endif
DECLARE_BITMAP(system_cpucaps, ARM64_NCAPS);
@@ -111,7 +116,14 @@ static struct arm64_cpu_capabilities const __ro_after_init *cpucap_ptrs[ARM64_NC
DECLARE_BITMAP(boot_cpucaps, ARM64_NCAPS);
-bool arm64_use_ng_mappings = false;
+/*
+ * arm64_use_ng_mappings must be placed in the .data section, otherwise it
+ * ends up in the .bss section where it is initialized in early_map_kernel()
+ * after the MMU (with the idmap) was enabled. create_init_idmap() - which
+ * runs before early_map_kernel() and reads the variable via PTE_MAYBE_NG -
+ * may end up generating an incorrect idmap page table attributes.
+ */
+bool arm64_use_ng_mappings __read_mostly = false;
EXPORT_SYMBOL(arm64_use_ng_mappings);
DEFINE_PER_CPU_READ_MOSTLY(const char *, this_cpu_vector) = vectors;
@@ -140,12 +152,42 @@ void dump_cpu_features(void)
pr_emerg("0x%*pb\n", ARM64_NCAPS, &system_cpucaps);
}
+#define __ARM64_MAX_POSITIVE(reg, field) \
+ ((reg##_##field##_SIGNED ? \
+ BIT(reg##_##field##_WIDTH - 1) : \
+ BIT(reg##_##field##_WIDTH)) - 1)
+
+#define __ARM64_MIN_NEGATIVE(reg, field) BIT(reg##_##field##_WIDTH - 1)
+
+#define __ARM64_CPUID_FIELDS(reg, field, min_value, max_value) \
+ .sys_reg = SYS_##reg, \
+ .field_pos = reg##_##field##_SHIFT, \
+ .field_width = reg##_##field##_WIDTH, \
+ .sign = reg##_##field##_SIGNED, \
+ .min_field_value = min_value, \
+ .max_field_value = max_value,
+
+/*
+ * ARM64_CPUID_FIELDS() encodes a field with a range from min_value to
+ * an implicit maximum that depends on the sign-ess of the field.
+ *
+ * An unsigned field will be capped at all ones, while a signed field
+ * will be limited to the positive half only.
+ */
#define ARM64_CPUID_FIELDS(reg, field, min_value) \
- .sys_reg = SYS_##reg, \
- .field_pos = reg##_##field##_SHIFT, \
- .field_width = reg##_##field##_WIDTH, \
- .sign = reg##_##field##_SIGNED, \
- .min_field_value = reg##_##field##_##min_value,
+ __ARM64_CPUID_FIELDS(reg, field, \
+ SYS_FIELD_VALUE(reg, field, min_value), \
+ __ARM64_MAX_POSITIVE(reg, field))
+
+/*
+ * ARM64_CPUID_FIELDS_NEG() encodes a field with a range from an
+ * implicit minimal value to max_value. This should be used when
+ * matching a non-implemented property.
+ */
+#define ARM64_CPUID_FIELDS_NEG(reg, field, max_value) \
+ __ARM64_CPUID_FIELDS(reg, field, \
+ __ARM64_MIN_NEGATIVE(reg, field), \
+ SYS_FIELD_VALUE(reg, field, max_value))
#define __ARM64_FTR_BITS(SIGNED, VISIBLE, STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) \
{ \
@@ -198,6 +240,7 @@ static const struct arm64_ftr_bits ftr_id_aa64isar0[] = {
};
static const struct arm64_ftr_bits ftr_id_aa64isar1[] = {
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_XS_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_I8MM_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_DGH_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_BF16_SHIFT, 4, 0),
@@ -220,9 +263,11 @@ static const struct arm64_ftr_bits ftr_id_aa64isar1[] = {
};
static const struct arm64_ftr_bits ftr_id_aa64isar2[] = {
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_EL1_LUT_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_EL1_CSSC_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_EL1_RPRFM_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_HIGHER_SAFE, ID_AA64ISAR2_EL1_BC_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_EL1_CLRBHB_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_EL1_BC_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_EL1_MOPS_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_PTR_AUTH),
FTR_STRICT, FTR_EXACT, ID_AA64ISAR2_EL1_APA3_SHIFT, 4, 0),
@@ -233,6 +278,13 @@ static const struct arm64_ftr_bits ftr_id_aa64isar2[] = {
ARM64_FTR_END,
};
+static const struct arm64_ftr_bits ftr_id_aa64isar3[] = {
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64ISAR3_EL1_FPRCVT_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64ISAR3_EL1_LSFE_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64ISAR3_EL1_FAMINMAX_SHIFT, 4, 0),
+ ARM64_FTR_END,
+};
+
static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = {
ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_CSV3_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_CSV2_SHIFT, 4, 0),
@@ -248,12 +300,16 @@ static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = {
S_ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_FP_SHIFT, 4, ID_AA64PFR0_EL1_FP_NI),
ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_EL3_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_EL2_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_EL1_SHIFT, 4, ID_AA64PFR0_EL1_ELx_64BIT_ONLY),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_EL0_SHIFT, 4, ID_AA64PFR0_EL1_ELx_64BIT_ONLY),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_EL1_SHIFT, 4, ID_AA64PFR0_EL1_EL1_IMP),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_EL0_SHIFT, 4, ID_AA64PFR0_EL1_EL0_IMP),
ARM64_FTR_END,
};
static const struct arm64_ftr_bits ftr_id_aa64pfr1[] = {
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_EL1_DF2_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_GCS),
+ FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_EL1_GCS_SHIFT, 4, 0),
+ S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_EL1_MTE_frac_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_EL1_SME_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_EL1_MPAM_frac_SHIFT, 4, 0),
@@ -266,22 +322,35 @@ static const struct arm64_ftr_bits ftr_id_aa64pfr1[] = {
ARM64_FTR_END,
};
+static const struct arm64_ftr_bits ftr_id_aa64pfr2[] = {
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR2_EL1_FPMR_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR2_EL1_MTEFAR_SHIFT, 4, ID_AA64PFR2_EL1_MTEFAR_NI),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR2_EL1_MTESTOREONLY_SHIFT, 4, ID_AA64PFR2_EL1_MTESTOREONLY_NI),
+ ARM64_FTR_END,
+};
+
static const struct arm64_ftr_bits ftr_id_aa64zfr0[] = {
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_EL1_F64MM_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_EL1_F32MM_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
+ FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_EL1_F16MM_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_EL1_I8MM_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_EL1_SM4_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_EL1_SHA3_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
+ FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_EL1_B16B16_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_EL1_BF16_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_EL1_BitPerm_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
+ FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_EL1_EltPerm_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_EL1_AES_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_EL1_SVEver_SHIFT, 4, 0),
@@ -292,6 +361,8 @@ static const struct arm64_ftr_bits ftr_id_aa64smfr0[] = {
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_FA64_SHIFT, 1, 0),
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
+ FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_LUTv2_SHIFT, 1, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_SMEver_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_I16I64_SHIFT, 4, 0),
@@ -304,6 +375,10 @@ static const struct arm64_ftr_bits ftr_id_aa64smfr0[] = {
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_F16F16_SHIFT, 1, 0),
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
+ FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_F8F16_SHIFT, 1, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
+ FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_F8F32_SHIFT, 1, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_I8I32_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_F16F32_SHIFT, 1, 0),
@@ -313,6 +388,34 @@ static const struct arm64_ftr_bits ftr_id_aa64smfr0[] = {
FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_BI32I32_SHIFT, 1, 0),
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_F32F32_SHIFT, 1, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
+ FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_SF8FMA_SHIFT, 1, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
+ FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_SF8DP4_SHIFT, 1, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
+ FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_SF8DP2_SHIFT, 1, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
+ FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_SBitPerm_SHIFT, 1, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
+ FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_AES_SHIFT, 1, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
+ FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_SFEXPA_SHIFT, 1, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
+ FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_STMOP_SHIFT, 1, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
+ FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_SMOP4_SHIFT, 1, 0),
+ ARM64_FTR_END,
+};
+
+static const struct arm64_ftr_bits ftr_id_aa64fpfr0[] = {
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64FPFR0_EL1_F8CVT_SHIFT, 1, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64FPFR0_EL1_F8FMA_SHIFT, 1, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64FPFR0_EL1_F8DP4_SHIFT, 1, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64FPFR0_EL1_F8DP2_SHIFT, 1, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64FPFR0_EL1_F8MM8_SHIFT, 1, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64FPFR0_EL1_F8MM4_SHIFT, 1, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64FPFR0_EL1_F8E4M3_SHIFT, 1, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64FPFR0_EL1_F8E5M2_SHIFT, 1, 0),
ARM64_FTR_END,
};
@@ -363,6 +466,7 @@ static const struct arm64_ftr_bits ftr_id_aa64mmfr0[] = {
};
static const struct arm64_ftr_bits ftr_id_aa64mmfr1[] = {
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_ECBHB_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_TIDCP1_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_AFP_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_HCX_SHIFT, 4, 0),
@@ -399,11 +503,20 @@ static const struct arm64_ftr_bits ftr_id_aa64mmfr2[] = {
};
static const struct arm64_ftr_bits ftr_id_aa64mmfr3[] = {
+ ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_POE),
+ FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR3_EL1_S1POE_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR3_EL1_S1PIE_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR3_EL1_SCTLRX_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR3_EL1_TCRX_SHIFT, 4, 0),
ARM64_FTR_END,
};
+static const struct arm64_ftr_bits ftr_id_aa64mmfr4[] = {
+ S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR4_EL1_E2H0_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR4_EL1_NV_frac_SHIFT, 4, 0),
+ ARM64_FTR_END,
+};
+
static const struct arm64_ftr_bits ftr_ctr[] = {
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, 31, 1, 1), /* RES1 */
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, CTR_EL0_DIC_SHIFT, 1, 1),
@@ -610,15 +723,11 @@ static const struct arm64_ftr_bits ftr_id_dfr1[] = {
ARM64_FTR_END,
};
-static const struct arm64_ftr_bits ftr_zcr[] = {
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE,
- ZCR_ELx_LEN_SHIFT, ZCR_ELx_LEN_WIDTH, 0), /* LEN */
- ARM64_FTR_END,
-};
-
-static const struct arm64_ftr_bits ftr_smcr[] = {
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE,
- SMCR_ELx_LEN_SHIFT, SMCR_ELx_LEN_WIDTH, 0), /* LEN */
+static const struct arm64_ftr_bits ftr_mpamidr[] = {
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, MPAMIDR_EL1_PMG_MAX_SHIFT, MPAMIDR_EL1_PMG_MAX_WIDTH, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, MPAMIDR_EL1_VPMR_MAX_SHIFT, MPAMIDR_EL1_VPMR_MAX_WIDTH, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, MPAMIDR_EL1_HAS_HCR_SHIFT, 1, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, MPAMIDR_EL1_PARTID_MAX_SHIFT, MPAMIDR_EL1_PARTID_MAX_WIDTH, 0),
ARM64_FTR_END,
};
@@ -664,15 +773,17 @@ static const struct arm64_ftr_bits ftr_raz[] = {
#define ARM64_FTR_REG(id, table) \
__ARM64_FTR_REG_OVERRIDE(#id, id, table, &no_override)
-struct arm64_ftr_override __ro_after_init id_aa64mmfr1_override;
-struct arm64_ftr_override __ro_after_init id_aa64pfr0_override;
-struct arm64_ftr_override __ro_after_init id_aa64pfr1_override;
-struct arm64_ftr_override __ro_after_init id_aa64zfr0_override;
-struct arm64_ftr_override __ro_after_init id_aa64smfr0_override;
-struct arm64_ftr_override __ro_after_init id_aa64isar1_override;
-struct arm64_ftr_override __ro_after_init id_aa64isar2_override;
+struct arm64_ftr_override __read_mostly id_aa64mmfr0_override;
+struct arm64_ftr_override __read_mostly id_aa64mmfr1_override;
+struct arm64_ftr_override __read_mostly id_aa64mmfr2_override;
+struct arm64_ftr_override __read_mostly id_aa64pfr0_override;
+struct arm64_ftr_override __read_mostly id_aa64pfr1_override;
+struct arm64_ftr_override __read_mostly id_aa64zfr0_override;
+struct arm64_ftr_override __read_mostly id_aa64smfr0_override;
+struct arm64_ftr_override __read_mostly id_aa64isar1_override;
+struct arm64_ftr_override __read_mostly id_aa64isar2_override;
-struct arm64_ftr_override arm64_sw_feature_override;
+struct arm64_ftr_override __read_mostly arm64_sw_feature_override;
static const struct __ftr_reg_entry {
u32 sys_id;
@@ -711,10 +822,12 @@ static const struct __ftr_reg_entry {
&id_aa64pfr0_override),
ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64PFR1_EL1, ftr_id_aa64pfr1,
&id_aa64pfr1_override),
+ ARM64_FTR_REG(SYS_ID_AA64PFR2_EL1, ftr_id_aa64pfr2),
ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64ZFR0_EL1, ftr_id_aa64zfr0,
&id_aa64zfr0_override),
ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64SMFR0_EL1, ftr_id_aa64smfr0,
&id_aa64smfr0_override),
+ ARM64_FTR_REG(SYS_ID_AA64FPFR0_EL1, ftr_id_aa64fpfr0),
/* Op1 = 0, CRn = 0, CRm = 5 */
ARM64_FTR_REG(SYS_ID_AA64DFR0_EL1, ftr_id_aa64dfr0),
@@ -726,17 +839,20 @@ static const struct __ftr_reg_entry {
&id_aa64isar1_override),
ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64ISAR2_EL1, ftr_id_aa64isar2,
&id_aa64isar2_override),
+ ARM64_FTR_REG(SYS_ID_AA64ISAR3_EL1, ftr_id_aa64isar3),
/* Op1 = 0, CRn = 0, CRm = 7 */
- ARM64_FTR_REG(SYS_ID_AA64MMFR0_EL1, ftr_id_aa64mmfr0),
+ ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64MMFR0_EL1, ftr_id_aa64mmfr0,
+ &id_aa64mmfr0_override),
ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64MMFR1_EL1, ftr_id_aa64mmfr1,
&id_aa64mmfr1_override),
- ARM64_FTR_REG(SYS_ID_AA64MMFR2_EL1, ftr_id_aa64mmfr2),
+ ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64MMFR2_EL1, ftr_id_aa64mmfr2,
+ &id_aa64mmfr2_override),
ARM64_FTR_REG(SYS_ID_AA64MMFR3_EL1, ftr_id_aa64mmfr3),
+ ARM64_FTR_REG(SYS_ID_AA64MMFR4_EL1, ftr_id_aa64mmfr4),
- /* Op1 = 0, CRn = 1, CRm = 2 */
- ARM64_FTR_REG(SYS_ZCR_EL1, ftr_zcr),
- ARM64_FTR_REG(SYS_SMCR_EL1, ftr_smcr),
+ /* Op1 = 0, CRn = 10, CRm = 4 */
+ ARM64_FTR_REG(SYS_MPAMIDR_EL1, ftr_mpamidr),
/* Op1 = 1, CRn = 0, CRm = 0 */
ARM64_FTR_REG(SYS_GMID_EL1, ftr_gmid),
@@ -887,7 +1003,7 @@ static void __init sort_ftr_regs(void)
/*
* Initialise the CPU feature register from Boot CPU values.
- * Also initiliases the strict_mask for the register.
+ * Also initialises the strict_mask for the register.
* Any bits that are not covered by an arm64_ftr_bits entry are considered
* RES0 for the system-wide value, and must strictly match.
*/
@@ -923,16 +1039,16 @@ static void init_cpu_ftr_reg(u32 sys_reg, u64 new)
/* Override was valid */
ftr_new = tmp;
str = "forced";
- } else if (ftr_ovr == tmp) {
+ } else {
/* Override was the safe value */
str = "already set";
}
- if (str)
- pr_warn("%s[%d:%d]: %s to %llx\n",
- reg->name,
- ftrp->shift + ftrp->width - 1,
- ftrp->shift, str, tmp);
+ pr_warn("%s[%d:%d]: %s to %llx\n",
+ reg->name,
+ ftrp->shift + ftrp->width - 1,
+ ftrp->shift, str,
+ tmp & (BIT(ftrp->width) - 1));
} else if ((ftr_mask & reg->override->val) == ftr_mask) {
reg->override->val &= ~ftr_mask;
pr_warn("%s[%d:%d]: impossible override, ignored\n",
@@ -1012,6 +1128,37 @@ static void init_32bit_cpu_features(struct cpuinfo_32bit *info)
init_cpu_ftr_reg(SYS_MVFR2_EL1, info->reg_mvfr2);
}
+#ifdef CONFIG_ARM64_PSEUDO_NMI
+static bool enable_pseudo_nmi;
+
+static int __init early_enable_pseudo_nmi(char *p)
+{
+ return kstrtobool(p, &enable_pseudo_nmi);
+}
+early_param("irqchip.gicv3_pseudo_nmi", early_enable_pseudo_nmi);
+
+static __init void detect_system_supports_pseudo_nmi(void)
+{
+ struct device_node *np;
+
+ if (!enable_pseudo_nmi)
+ return;
+
+ /*
+ * Detect broken MediaTek firmware that doesn't properly save and
+ * restore GIC priorities.
+ */
+ np = of_find_compatible_node(NULL, NULL, "arm,gic-v3");
+ if (np && of_property_read_bool(np, "mediatek,broken-save-restore-fw")) {
+ pr_info("Pseudo-NMI disabled due to MediaTek Chromebook GICR save problem\n");
+ enable_pseudo_nmi = false;
+ }
+ of_node_put(np);
+}
+#else /* CONFIG_ARM64_PSEUDO_NMI */
+static inline void detect_system_supports_pseudo_nmi(void) { }
+#endif
+
void __init init_cpu_features(struct cpuinfo_arm64 *info)
{
/* Before we start using the tables, make sure it is sorted */
@@ -1025,52 +1172,47 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info)
init_cpu_ftr_reg(SYS_ID_AA64ISAR0_EL1, info->reg_id_aa64isar0);
init_cpu_ftr_reg(SYS_ID_AA64ISAR1_EL1, info->reg_id_aa64isar1);
init_cpu_ftr_reg(SYS_ID_AA64ISAR2_EL1, info->reg_id_aa64isar2);
+ init_cpu_ftr_reg(SYS_ID_AA64ISAR3_EL1, info->reg_id_aa64isar3);
init_cpu_ftr_reg(SYS_ID_AA64MMFR0_EL1, info->reg_id_aa64mmfr0);
init_cpu_ftr_reg(SYS_ID_AA64MMFR1_EL1, info->reg_id_aa64mmfr1);
init_cpu_ftr_reg(SYS_ID_AA64MMFR2_EL1, info->reg_id_aa64mmfr2);
init_cpu_ftr_reg(SYS_ID_AA64MMFR3_EL1, info->reg_id_aa64mmfr3);
+ init_cpu_ftr_reg(SYS_ID_AA64MMFR4_EL1, info->reg_id_aa64mmfr4);
init_cpu_ftr_reg(SYS_ID_AA64PFR0_EL1, info->reg_id_aa64pfr0);
init_cpu_ftr_reg(SYS_ID_AA64PFR1_EL1, info->reg_id_aa64pfr1);
+ init_cpu_ftr_reg(SYS_ID_AA64PFR2_EL1, info->reg_id_aa64pfr2);
init_cpu_ftr_reg(SYS_ID_AA64ZFR0_EL1, info->reg_id_aa64zfr0);
init_cpu_ftr_reg(SYS_ID_AA64SMFR0_EL1, info->reg_id_aa64smfr0);
+ init_cpu_ftr_reg(SYS_ID_AA64FPFR0_EL1, info->reg_id_aa64fpfr0);
if (id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0))
init_32bit_cpu_features(&info->aarch32);
if (IS_ENABLED(CONFIG_ARM64_SVE) &&
id_aa64pfr0_sve(read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1))) {
- info->reg_zcr = read_zcr_features();
- init_cpu_ftr_reg(SYS_ZCR_EL1, info->reg_zcr);
+ unsigned long cpacr = cpacr_save_enable_kernel_sve();
+
vec_init_vq_map(ARM64_VEC_SVE);
+
+ cpacr_restore(cpacr);
}
if (IS_ENABLED(CONFIG_ARM64_SME) &&
id_aa64pfr1_sme(read_sanitised_ftr_reg(SYS_ID_AA64PFR1_EL1))) {
- info->reg_smcr = read_smcr_features();
- /*
- * We mask out SMPS since even if the hardware
- * supports priorities the kernel does not at present
- * and we block access to them.
- */
- info->reg_smidr = read_cpuid(SMIDR_EL1) & ~SMIDR_EL1_SMPS;
- init_cpu_ftr_reg(SYS_SMCR_EL1, info->reg_smcr);
+ unsigned long cpacr = cpacr_save_enable_kernel_sme();
+
vec_init_vq_map(ARM64_VEC_SME);
+
+ cpacr_restore(cpacr);
+ }
+
+ if (id_aa64pfr0_mpam(read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1))) {
+ info->reg_mpamidr = read_cpuid(MPAMIDR_EL1);
+ init_cpu_ftr_reg(SYS_MPAMIDR_EL1, info->reg_mpamidr);
}
if (id_aa64pfr1_mte(info->reg_id_aa64pfr1))
init_cpu_ftr_reg(SYS_GMID_EL1, info->reg_gmid);
-
- /*
- * Initialize the indirect array of CPU capabilities pointers before we
- * handle the boot CPU below.
- */
- init_cpucap_indirect_list();
-
- /*
- * Detect and enable early CPU capabilities based on the boot CPU,
- * after we have initialised the CPU feature infrastructure.
- */
- setup_boot_cpu_capabilities();
}
static void update_cpu_ftr_reg(struct arm64_ftr_reg *reg, u64 new)
@@ -1262,6 +1404,8 @@ void update_cpu_features(int cpu,
info->reg_id_aa64isar1, boot->reg_id_aa64isar1);
taint |= check_update_ftr_reg(SYS_ID_AA64ISAR2_EL1, cpu,
info->reg_id_aa64isar2, boot->reg_id_aa64isar2);
+ taint |= check_update_ftr_reg(SYS_ID_AA64ISAR3_EL1, cpu,
+ info->reg_id_aa64isar3, boot->reg_id_aa64isar3);
/*
* Differing PARange support is fine as long as all peripherals and
@@ -1276,11 +1420,15 @@ void update_cpu_features(int cpu,
info->reg_id_aa64mmfr2, boot->reg_id_aa64mmfr2);
taint |= check_update_ftr_reg(SYS_ID_AA64MMFR3_EL1, cpu,
info->reg_id_aa64mmfr3, boot->reg_id_aa64mmfr3);
+ taint |= check_update_ftr_reg(SYS_ID_AA64MMFR4_EL1, cpu,
+ info->reg_id_aa64mmfr4, boot->reg_id_aa64mmfr4);
taint |= check_update_ftr_reg(SYS_ID_AA64PFR0_EL1, cpu,
info->reg_id_aa64pfr0, boot->reg_id_aa64pfr0);
taint |= check_update_ftr_reg(SYS_ID_AA64PFR1_EL1, cpu,
info->reg_id_aa64pfr1, boot->reg_id_aa64pfr1);
+ taint |= check_update_ftr_reg(SYS_ID_AA64PFR2_EL1, cpu,
+ info->reg_id_aa64pfr2, boot->reg_id_aa64pfr2);
taint |= check_update_ftr_reg(SYS_ID_AA64ZFR0_EL1, cpu,
info->reg_id_aa64zfr0, boot->reg_id_aa64zfr0);
@@ -1288,32 +1436,36 @@ void update_cpu_features(int cpu,
taint |= check_update_ftr_reg(SYS_ID_AA64SMFR0_EL1, cpu,
info->reg_id_aa64smfr0, boot->reg_id_aa64smfr0);
+ taint |= check_update_ftr_reg(SYS_ID_AA64FPFR0_EL1, cpu,
+ info->reg_id_aa64fpfr0, boot->reg_id_aa64fpfr0);
+
+ /* Probe vector lengths */
if (IS_ENABLED(CONFIG_ARM64_SVE) &&
id_aa64pfr0_sve(read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1))) {
- info->reg_zcr = read_zcr_features();
- taint |= check_update_ftr_reg(SYS_ZCR_EL1, cpu,
- info->reg_zcr, boot->reg_zcr);
+ if (!system_capabilities_finalized()) {
+ unsigned long cpacr = cpacr_save_enable_kernel_sve();
- /* Probe vector lengths */
- if (!system_capabilities_finalized())
vec_update_vq_map(ARM64_VEC_SVE);
+
+ cpacr_restore(cpacr);
+ }
}
if (IS_ENABLED(CONFIG_ARM64_SME) &&
id_aa64pfr1_sme(read_sanitised_ftr_reg(SYS_ID_AA64PFR1_EL1))) {
- info->reg_smcr = read_smcr_features();
- /*
- * We mask out SMPS since even if the hardware
- * supports priorities the kernel does not at present
- * and we block access to them.
- */
- info->reg_smidr = read_cpuid(SMIDR_EL1) & ~SMIDR_EL1_SMPS;
- taint |= check_update_ftr_reg(SYS_SMCR_EL1, cpu,
- info->reg_smcr, boot->reg_smcr);
+ unsigned long cpacr = cpacr_save_enable_kernel_sme();
/* Probe vector lengths */
if (!system_capabilities_finalized())
vec_update_vq_map(ARM64_VEC_SME);
+
+ cpacr_restore(cpacr);
+ }
+
+ if (id_aa64pfr0_mpam(read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1))) {
+ info->reg_mpamidr = read_cpuid(MPAMIDR_EL1);
+ taint |= check_update_ftr_reg(SYS_MPAMIDR_EL1, cpu,
+ info->reg_mpamidr, boot->reg_mpamidr);
}
/*
@@ -1398,17 +1550,21 @@ u64 __read_sysreg_by_encoding(u32 sys_id)
read_sysreg_case(SYS_ID_AA64PFR0_EL1);
read_sysreg_case(SYS_ID_AA64PFR1_EL1);
+ read_sysreg_case(SYS_ID_AA64PFR2_EL1);
read_sysreg_case(SYS_ID_AA64ZFR0_EL1);
read_sysreg_case(SYS_ID_AA64SMFR0_EL1);
+ read_sysreg_case(SYS_ID_AA64FPFR0_EL1);
read_sysreg_case(SYS_ID_AA64DFR0_EL1);
read_sysreg_case(SYS_ID_AA64DFR1_EL1);
read_sysreg_case(SYS_ID_AA64MMFR0_EL1);
read_sysreg_case(SYS_ID_AA64MMFR1_EL1);
read_sysreg_case(SYS_ID_AA64MMFR2_EL1);
read_sysreg_case(SYS_ID_AA64MMFR3_EL1);
+ read_sysreg_case(SYS_ID_AA64MMFR4_EL1);
read_sysreg_case(SYS_ID_AA64ISAR0_EL1);
read_sysreg_case(SYS_ID_AA64ISAR1_EL1);
read_sysreg_case(SYS_ID_AA64ISAR2_EL1);
+ read_sysreg_case(SYS_ID_AA64ISAR3_EL1);
read_sysreg_case(SYS_CNTFRQ_EL0);
read_sysreg_case(SYS_CTR_EL0);
@@ -1439,11 +1595,28 @@ has_always(const struct arm64_cpu_capabilities *entry, int scope)
static bool
feature_matches(u64 reg, const struct arm64_cpu_capabilities *entry)
{
- int val = cpuid_feature_extract_field_width(reg, entry->field_pos,
- entry->field_width,
- entry->sign);
+ int val, min, max;
+ u64 tmp;
- return val >= entry->min_field_value;
+ val = cpuid_feature_extract_field_width(reg, entry->field_pos,
+ entry->field_width,
+ entry->sign);
+
+ tmp = entry->min_field_value;
+ tmp <<= entry->field_pos;
+
+ min = cpuid_feature_extract_field_width(tmp, entry->field_pos,
+ entry->field_width,
+ entry->sign);
+
+ tmp = entry->max_field_value;
+ tmp <<= entry->field_pos;
+
+ max = cpuid_feature_extract_field_width(tmp, entry->field_pos,
+ entry->field_width,
+ entry->sign);
+
+ return val >= min && val <= max;
}
static u64
@@ -1494,6 +1667,11 @@ const struct cpumask *system_32bit_el0_cpumask(void)
return cpu_possible_mask;
}
+const struct cpumask *task_cpu_fallback_mask(struct task_struct *p)
+{
+ return __task_cpu_possible_mask(p, housekeeping_cpumask(HK_TYPE_TICK));
+}
+
static int __init parse_32bit_el0_param(char *str)
{
allow_mismatched_32bit_el0 = true;
@@ -1553,24 +1731,6 @@ static bool has_useable_gicv3_cpuif(const struct arm64_cpu_capabilities *entry,
return has_sre;
}
-static bool has_no_hw_prefetch(const struct arm64_cpu_capabilities *entry, int __unused)
-{
- u32 midr = read_cpuid_id();
-
- /* Cavium ThunderX pass 1.x and 2.x */
- return midr_is_cpu_model_range(midr, MIDR_THUNDERX,
- MIDR_CPU_VAR_REV(0, 0),
- MIDR_CPU_VAR_REV(1, MIDR_REVISION_MASK));
-}
-
-static bool has_no_fpsimd(const struct arm64_cpu_capabilities *entry, int __unused)
-{
- u64 pfr0 = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1);
-
- return cpuid_feature_extract_signed_field(pfr0,
- ID_AA64PFR0_EL1_FP_SHIFT) < 0;
-}
-
static bool has_cache_idc(const struct arm64_cpu_capabilities *entry,
int scope)
{
@@ -1620,52 +1780,12 @@ has_useable_cnp(const struct arm64_cpu_capabilities *entry, int scope)
if (is_kdump_kernel())
return false;
- if (cpus_have_const_cap(ARM64_WORKAROUND_NVIDIA_CARMEL_CNP))
+ if (cpus_have_cap(ARM64_WORKAROUND_NVIDIA_CARMEL_CNP))
return false;
return has_cpuid_feature(entry, scope);
}
-/*
- * This check is triggered during the early boot before the cpufeature
- * is initialised. Checking the status on the local CPU allows the boot
- * CPU to detect the need for non-global mappings and thus avoiding a
- * pagetable re-write after all the CPUs are booted. This check will be
- * anyway run on individual CPUs, allowing us to get the consistent
- * state once the SMP CPUs are up and thus make the switch to non-global
- * mappings if required.
- */
-bool kaslr_requires_kpti(void)
-{
- if (!IS_ENABLED(CONFIG_RANDOMIZE_BASE))
- return false;
-
- /*
- * E0PD does a similar job to KPTI so can be used instead
- * where available.
- */
- if (IS_ENABLED(CONFIG_ARM64_E0PD)) {
- u64 mmfr2 = read_sysreg_s(SYS_ID_AA64MMFR2_EL1);
- if (cpuid_feature_extract_unsigned_field(mmfr2,
- ID_AA64MMFR2_EL1_E0PD_SHIFT))
- return false;
- }
-
- /*
- * Systems affected by Cavium erratum 24756 are incompatible
- * with KPTI.
- */
- if (IS_ENABLED(CONFIG_CAVIUM_ERRATUM_27456)) {
- extern const struct midr_range cavium_erratum_27456_cpus[];
-
- if (is_midr_in_range_list(read_cpuid_id(),
- cavium_erratum_27456_cpus))
- return false;
- }
-
- return kaslr_enabled();
-}
-
static bool __meltdown_safe = true;
static int __kpti_forced; /* 0: not forced, >0: forced on, <0: forced off */
@@ -1694,7 +1814,7 @@ static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry,
char const *str = "kpti command line option";
bool meltdown_safe;
- meltdown_safe = is_midr_in_range_list(read_cpuid_id(), kpti_safe_list);
+ meltdown_safe = is_midr_in_range_list(kpti_safe_list);
/* Defer to CPU feature registers */
if (has_cpuid_feature(entry, scope))
@@ -1718,7 +1838,7 @@ static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry,
}
/* Useful for KASLR robustness */
- if (kaslr_requires_kpti()) {
+ if (kaslr_enabled() && kaslr_requires_kpti()) {
if (!__kpti_forced) {
str = "KASLR";
__kpti_forced = 1;
@@ -1745,93 +1865,92 @@ static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry,
return !meltdown_safe;
}
-#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
-#define KPTI_NG_TEMP_VA (-(1UL << PMD_SHIFT))
-
-extern
-void create_kpti_ng_temp_pgd(pgd_t *pgdir, phys_addr_t phys, unsigned long virt,
- phys_addr_t size, pgprot_t prot,
- phys_addr_t (*pgtable_alloc)(int), int flags);
+static bool has_nv1(const struct arm64_cpu_capabilities *entry, int scope)
+{
+ /*
+ * Although the Apple M2 family appears to support NV1, the
+ * PTW barfs on the nVHE EL2 S1 page table format. Pretend
+ * that it doesn't support NV1 at all.
+ */
+ static const struct midr_range nv1_ni_list[] = {
+ MIDR_ALL_VERSIONS(MIDR_APPLE_M2_BLIZZARD),
+ MIDR_ALL_VERSIONS(MIDR_APPLE_M2_AVALANCHE),
+ MIDR_ALL_VERSIONS(MIDR_APPLE_M2_BLIZZARD_PRO),
+ MIDR_ALL_VERSIONS(MIDR_APPLE_M2_AVALANCHE_PRO),
+ MIDR_ALL_VERSIONS(MIDR_APPLE_M2_BLIZZARD_MAX),
+ MIDR_ALL_VERSIONS(MIDR_APPLE_M2_AVALANCHE_MAX),
+ {}
+ };
-static phys_addr_t kpti_ng_temp_alloc;
+ return (__system_matches_cap(ARM64_HAS_NESTED_VIRT) &&
+ !(has_cpuid_feature(entry, scope) ||
+ is_midr_in_range_list(nv1_ni_list)));
+}
-static phys_addr_t kpti_ng_pgd_alloc(int shift)
+#if defined(ID_AA64MMFR0_EL1_TGRAN_LPA2) && defined(ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_LPA2)
+static bool has_lpa2_at_stage1(u64 mmfr0)
{
- kpti_ng_temp_alloc -= PAGE_SIZE;
- return kpti_ng_temp_alloc;
+ unsigned int tgran;
+
+ tgran = cpuid_feature_extract_unsigned_field(mmfr0,
+ ID_AA64MMFR0_EL1_TGRAN_SHIFT);
+ return tgran == ID_AA64MMFR0_EL1_TGRAN_LPA2;
}
-static void
-kpti_install_ng_mappings(const struct arm64_cpu_capabilities *__unused)
+static bool has_lpa2_at_stage2(u64 mmfr0)
{
- typedef void (kpti_remap_fn)(int, int, phys_addr_t, unsigned long);
- extern kpti_remap_fn idmap_kpti_install_ng_mappings;
- kpti_remap_fn *remap_fn;
+ unsigned int tgran;
- int cpu = smp_processor_id();
- int levels = CONFIG_PGTABLE_LEVELS;
- int order = order_base_2(levels);
- u64 kpti_ng_temp_pgd_pa = 0;
- pgd_t *kpti_ng_temp_pgd;
- u64 alloc = 0;
+ tgran = cpuid_feature_extract_unsigned_field(mmfr0,
+ ID_AA64MMFR0_EL1_TGRAN_2_SHIFT);
+ return tgran == ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_LPA2;
+}
- if (__this_cpu_read(this_cpu_vector) == vectors) {
- const char *v = arm64_get_bp_hardening_vector(EL1_VECTOR_KPTI);
+static bool has_lpa2(const struct arm64_cpu_capabilities *entry, int scope)
+{
+ u64 mmfr0;
- __this_cpu_write(this_cpu_vector, v);
- }
+ mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
+ return has_lpa2_at_stage1(mmfr0) && has_lpa2_at_stage2(mmfr0);
+}
+#else
+static bool has_lpa2(const struct arm64_cpu_capabilities *entry, int scope)
+{
+ return false;
+}
+#endif
+
+#ifdef CONFIG_HW_PERF_EVENTS
+static bool has_pmuv3(const struct arm64_cpu_capabilities *entry, int scope)
+{
+ u64 dfr0 = read_sanitised_ftr_reg(SYS_ID_AA64DFR0_EL1);
+ unsigned int pmuver;
/*
- * We don't need to rewrite the page-tables if either we've done
- * it already or we have KASLR enabled and therefore have not
- * created any global mappings at all.
+ * PMUVer follows the standard ID scheme for an unsigned field with the
+ * exception of 0xF (IMP_DEF) which is treated specially and implies
+ * FEAT_PMUv3 is not implemented.
+ *
+ * See DDI0487L.a D24.1.3.2 for more details.
*/
- if (arm64_use_ng_mappings)
- return;
+ pmuver = cpuid_feature_extract_unsigned_field(dfr0,
+ ID_AA64DFR0_EL1_PMUVer_SHIFT);
+ if (pmuver == ID_AA64DFR0_EL1_PMUVer_IMP_DEF)
+ return false;
- remap_fn = (void *)__pa_symbol(idmap_kpti_install_ng_mappings);
-
- if (!cpu) {
- alloc = __get_free_pages(GFP_ATOMIC | __GFP_ZERO, order);
- kpti_ng_temp_pgd = (pgd_t *)(alloc + (levels - 1) * PAGE_SIZE);
- kpti_ng_temp_alloc = kpti_ng_temp_pgd_pa = __pa(kpti_ng_temp_pgd);
-
- //
- // Create a minimal page table hierarchy that permits us to map
- // the swapper page tables temporarily as we traverse them.
- //
- // The physical pages are laid out as follows:
- //
- // +--------+-/-------+-/------ +-\\--------+
- // : PTE[] : | PMD[] : | PUD[] : || PGD[] :
- // +--------+-\-------+-\------ +-//--------+
- // ^
- // The first page is mapped into this hierarchy at a PMD_SHIFT
- // aligned virtual address, so that we can manipulate the PTE
- // level entries while the mapping is active. The first entry
- // covers the PTE[] page itself, the remaining entries are free
- // to be used as a ad-hoc fixmap.
- //
- create_kpti_ng_temp_pgd(kpti_ng_temp_pgd, __pa(alloc),
- KPTI_NG_TEMP_VA, PAGE_SIZE, PAGE_KERNEL,
- kpti_ng_pgd_alloc, 0);
- }
+ return pmuver >= ID_AA64DFR0_EL1_PMUVer_IMP;
+}
+#endif
- cpu_install_idmap();
- remap_fn(cpu, num_online_cpus(), kpti_ng_temp_pgd_pa, KPTI_NG_TEMP_VA);
- cpu_uninstall_idmap();
+static void cpu_enable_kpti(struct arm64_cpu_capabilities const *cap)
+{
+ if (__this_cpu_read(this_cpu_vector) == vectors) {
+ const char *v = arm64_get_bp_hardening_vector(EL1_VECTOR_KPTI);
- if (!cpu) {
- free_pages(alloc, order);
- arm64_use_ng_mappings = true;
+ __this_cpu_write(this_cpu_vector, v);
}
+
}
-#else
-static void
-kpti_install_ng_mappings(const struct arm64_cpu_capabilities *__unused)
-{
-}
-#endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */
static int __init parse_kpti(char *str)
{
@@ -1847,9 +1966,11 @@ static int __init parse_kpti(char *str)
early_param("kpti", parse_kpti);
#ifdef CONFIG_ARM64_HW_AFDBM
+static struct cpumask dbm_cpus __read_mostly;
+
static inline void __cpu_enable_hw_dbm(void)
{
- u64 tcr = read_sysreg(tcr_el1) | TCR_HD;
+ u64 tcr = read_sysreg(tcr_el1) | TCR_EL1_HD;
write_sysreg(tcr, tcr_el1);
isb();
@@ -1871,7 +1992,7 @@ static bool cpu_has_broken_dbm(void)
{},
};
- return is_midr_in_range_list(read_cpuid_id(), cpus);
+ return is_midr_in_range_list(cpus);
}
static bool cpu_can_use_dbm(const struct arm64_cpu_capabilities *cap)
@@ -1882,35 +2003,22 @@ static bool cpu_can_use_dbm(const struct arm64_cpu_capabilities *cap)
static void cpu_enable_hw_dbm(struct arm64_cpu_capabilities const *cap)
{
- if (cpu_can_use_dbm(cap))
+ if (cpu_can_use_dbm(cap)) {
__cpu_enable_hw_dbm();
+ cpumask_set_cpu(smp_processor_id(), &dbm_cpus);
+ }
}
static bool has_hw_dbm(const struct arm64_cpu_capabilities *cap,
int __unused)
{
- static bool detected = false;
/*
* DBM is a non-conflicting feature. i.e, the kernel can safely
* run a mix of CPUs with and without the feature. So, we
* unconditionally enable the capability to allow any late CPU
* to use the feature. We only enable the control bits on the
- * CPU, if it actually supports.
- *
- * We have to make sure we print the "feature" detection only
- * when at least one CPU actually uses it. So check if this CPU
- * can actually use it and print the message exactly once.
- *
- * This is safe as all CPUs (including secondary CPUs - due to the
- * LOCAL_CPU scope - and the hotplugged CPUs - via verification)
- * goes through the "matches" check exactly once. Also if a CPU
- * matches the criteria, it is guaranteed that the CPU will turn
- * the DBM on, as the capability is unconditionally enabled.
+ * CPU, if it is supported.
*/
- if (!detected && cpu_can_use_dbm(cap)) {
- detected = true;
- pr_info("detected: Hardware dirty bit management\n");
- }
return true;
}
@@ -1943,8 +2051,6 @@ int get_cpu_with_amu_feat(void)
static void cpu_amu_enable(struct arm64_cpu_capabilities const *cap)
{
if (has_cpuid_feature(cap, SCOPE_LOCAL_CPU)) {
- pr_info("detected CPU%d: Activity Monitors Unit (AMU)\n",
- smp_processor_id());
cpumask_set_cpu(smp_processor_id(), &amu_cpus);
/* 0 reference values signal broken/disabled counters */
@@ -2003,7 +2109,7 @@ static bool has_nested_virt_support(const struct arm64_cpu_capabilities *cap,
if (kvm_get_mode() != KVM_MODE_NV)
return false;
- if (!has_cpuid_feature(cap, scope)) {
+ if (!cpucap_multi_entry_cap_matches(cap, scope)) {
pr_warn("unavailable: %s\n", cap->desc);
return false;
}
@@ -2014,14 +2120,48 @@ static bool has_nested_virt_support(const struct arm64_cpu_capabilities *cap,
static bool hvhe_possible(const struct arm64_cpu_capabilities *entry,
int __unused)
{
- u64 val;
+ return arm64_test_sw_feature_override(ARM64_SW_FEATURE_OVERRIDE_HVHE);
+}
+
+bool cpu_supports_bbml2_noabort(void)
+{
+ /*
+ * We want to allow usage of BBML2 in as wide a range of kernel contexts
+ * as possible. This list is therefore an allow-list of known-good
+ * implementations that both support BBML2 and additionally, fulfill the
+ * extra constraint of never generating TLB conflict aborts when using
+ * the relaxed BBML2 semantics (such aborts make use of BBML2 in certain
+ * kernel contexts difficult to prove safe against recursive aborts).
+ *
+ * Note that implementations can only be considered "known-good" if their
+ * implementors attest to the fact that the implementation never raises
+ * TLB conflict aborts for BBML2 mapping granularity changes.
+ */
+ static const struct midr_range supports_bbml2_noabort_list[] = {
+ MIDR_REV_RANGE(MIDR_CORTEX_X4, 0, 3, 0xf),
+ MIDR_REV_RANGE(MIDR_NEOVERSE_V3, 0, 2, 0xf),
+ MIDR_REV_RANGE(MIDR_NEOVERSE_V3AE, 0, 2, 0xf),
+ MIDR_ALL_VERSIONS(MIDR_NVIDIA_OLYMPUS),
+ MIDR_ALL_VERSIONS(MIDR_AMPERE1),
+ MIDR_ALL_VERSIONS(MIDR_AMPERE1A),
+ {}
+ };
- val = read_sysreg(id_aa64mmfr1_el1);
- if (!cpuid_feature_extract_unsigned_field(val, ID_AA64MMFR1_EL1_VH_SHIFT))
+ /* Does our cpu guarantee to never raise TLB conflict aborts? */
+ if (!is_midr_in_range_list(supports_bbml2_noabort_list))
return false;
- val = arm64_sw_feature_override.val & arm64_sw_feature_override.mask;
- return cpuid_feature_extract_unsigned_field(val, ARM64_SW_FEATURE_OVERRIDE_HVHE);
+ /*
+ * We currently ignore the ID_AA64MMFR2_EL1 register, and only care
+ * about whether the MIDR check passes.
+ */
+
+ return true;
+}
+
+static bool has_bbml2_noabort(const struct arm64_cpu_capabilities *caps, int scope)
+{
+ return cpu_supports_bbml2_noabort();
}
#ifdef CONFIG_ARM64_PAN
@@ -2044,6 +2184,24 @@ static void cpu_clear_disr(const struct arm64_cpu_capabilities *__unused)
/* Firmware may have left a deferred SError in this register. */
write_sysreg_s(0, SYS_DISR_EL1);
}
+static bool has_rasv1p1(const struct arm64_cpu_capabilities *__unused, int scope)
+{
+ const struct arm64_cpu_capabilities rasv1p1_caps[] = {
+ {
+ ARM64_CPUID_FIELDS(ID_AA64PFR0_EL1, RAS, V1P1)
+ },
+ {
+ ARM64_CPUID_FIELDS(ID_AA64PFR0_EL1, RAS, IMP)
+ },
+ {
+ ARM64_CPUID_FIELDS(ID_AA64PFR1_EL1, RAS_frac, RASv1p1)
+ },
+ };
+
+ return (has_cpuid_feature(&rasv1p1_caps[0], scope) ||
+ (has_cpuid_feature(&rasv1p1_caps[1], scope) &&
+ has_cpuid_feature(&rasv1p1_caps[2], scope)));
+}
#endif /* CONFIG_ARM64_RAS_EXTN */
#ifdef CONFIG_ARM64_PTR_AUTH
@@ -2098,28 +2256,20 @@ static bool has_generic_auth(const struct arm64_cpu_capabilities *entry,
static void cpu_enable_e0pd(struct arm64_cpu_capabilities const *cap)
{
if (this_cpu_has_cap(ARM64_HAS_E0PD))
- sysreg_clear_set(tcr_el1, 0, TCR_E0PD1);
+ sysreg_clear_set(tcr_el1, 0, TCR_EL1_E0PD1);
}
#endif /* CONFIG_ARM64_E0PD */
#ifdef CONFIG_ARM64_PSEUDO_NMI
-static bool enable_pseudo_nmi;
-
-static int __init early_enable_pseudo_nmi(char *p)
-{
- return kstrtobool(p, &enable_pseudo_nmi);
-}
-early_param("irqchip.gicv3_pseudo_nmi", early_enable_pseudo_nmi);
-
static bool can_use_gic_priorities(const struct arm64_cpu_capabilities *entry,
int scope)
{
/*
- * ARM64_HAS_GIC_CPUIF_SYSREGS has a lower index, and is a boot CPU
+ * ARM64_HAS_GICV3_CPUIF has a lower index, and is a boot CPU
* feature, so will be detected earlier.
*/
- BUILD_BUG_ON(ARM64_HAS_GIC_PRIO_MASKING <= ARM64_HAS_GIC_CPUIF_SYSREGS);
- if (!cpus_have_cap(ARM64_HAS_GIC_CPUIF_SYSREGS))
+ BUILD_BUG_ON(ARM64_HAS_GIC_PRIO_MASKING <= ARM64_HAS_GICV3_CPUIF);
+ if (!cpus_have_cap(ARM64_HAS_GICV3_CPUIF))
return false;
return enable_pseudo_nmi;
@@ -2154,6 +2304,49 @@ static bool has_gic_prio_relaxed_sync(const struct arm64_cpu_capabilities *entry
}
#endif
+static bool can_trap_icv_dir_el1(const struct arm64_cpu_capabilities *entry,
+ int scope)
+{
+ static const struct midr_range has_vgic_v3[] = {
+ MIDR_ALL_VERSIONS(MIDR_APPLE_M1_ICESTORM),
+ MIDR_ALL_VERSIONS(MIDR_APPLE_M1_FIRESTORM),
+ MIDR_ALL_VERSIONS(MIDR_APPLE_M1_ICESTORM_PRO),
+ MIDR_ALL_VERSIONS(MIDR_APPLE_M1_FIRESTORM_PRO),
+ MIDR_ALL_VERSIONS(MIDR_APPLE_M1_ICESTORM_MAX),
+ MIDR_ALL_VERSIONS(MIDR_APPLE_M1_FIRESTORM_MAX),
+ MIDR_ALL_VERSIONS(MIDR_APPLE_M2_BLIZZARD),
+ MIDR_ALL_VERSIONS(MIDR_APPLE_M2_AVALANCHE),
+ MIDR_ALL_VERSIONS(MIDR_APPLE_M2_BLIZZARD_PRO),
+ MIDR_ALL_VERSIONS(MIDR_APPLE_M2_AVALANCHE_PRO),
+ MIDR_ALL_VERSIONS(MIDR_APPLE_M2_BLIZZARD_MAX),
+ MIDR_ALL_VERSIONS(MIDR_APPLE_M2_AVALANCHE_MAX),
+ {},
+ };
+ struct arm_smccc_res res = {};
+
+ BUILD_BUG_ON(ARM64_HAS_ICH_HCR_EL2_TDIR <= ARM64_HAS_GICV3_CPUIF);
+ BUILD_BUG_ON(ARM64_HAS_ICH_HCR_EL2_TDIR <= ARM64_HAS_GICV5_LEGACY);
+ if (!this_cpu_has_cap(ARM64_HAS_GICV3_CPUIF) &&
+ !is_midr_in_range_list(has_vgic_v3))
+ return false;
+
+ if (!is_hyp_mode_available())
+ return false;
+
+ if (this_cpu_has_cap(ARM64_HAS_GICV5_LEGACY))
+ return true;
+
+ if (is_kernel_in_hyp_mode())
+ res.a1 = read_sysreg_s(SYS_ICH_VTR_EL2);
+ else
+ arm_smccc_1_1_hvc(HVC_GET_ICH_VTR_EL2, &res);
+
+ if (res.a0 == HVC_STUB_ERR)
+ return false;
+
+ return res.a1 & ICH_VTR_EL2_TDS;
+}
+
#ifdef CONFIG_ARM64_BTI
static void bti_enable(const struct arm64_cpu_capabilities *__unused)
{
@@ -2172,29 +2365,52 @@ static void bti_enable(const struct arm64_cpu_capabilities *__unused)
#ifdef CONFIG_ARM64_MTE
static void cpu_enable_mte(struct arm64_cpu_capabilities const *cap)
{
+ static bool cleared_zero_page = false;
+
sysreg_clear_set(sctlr_el1, 0, SCTLR_ELx_ATA | SCTLR_EL1_ATA0);
mte_cpu_setup();
/*
* Clear the tags in the zero page. This needs to be done via the
- * linear map which has the Tagged attribute.
+ * linear map which has the Tagged attribute. Since this page is
+ * always mapped as pte_special(), set_pte_at() will not attempt to
+ * clear the tags or set PG_mte_tagged.
*/
- if (try_page_mte_tagging(ZERO_PAGE(0))) {
+ if (!cleared_zero_page) {
+ cleared_zero_page = true;
mte_clear_page_tags(lm_alias(empty_zero_page));
- set_page_mte_tagged(ZERO_PAGE(0));
}
kasan_init_hw_tags_cpu();
}
#endif /* CONFIG_ARM64_MTE */
+static void user_feature_fixup(void)
+{
+ if (cpus_have_cap(ARM64_WORKAROUND_2658417)) {
+ struct arm64_ftr_reg *regp;
+
+ regp = get_arm64_ftr_reg(SYS_ID_AA64ISAR1_EL1);
+ if (regp)
+ regp->user_mask &= ~ID_AA64ISAR1_EL1_BF16_MASK;
+ }
+
+ if (cpus_have_cap(ARM64_WORKAROUND_SPECULATIVE_SSBS)) {
+ struct arm64_ftr_reg *regp;
+
+ regp = get_arm64_ftr_reg(SYS_ID_AA64PFR1_EL1);
+ if (regp)
+ regp->user_mask &= ~ID_AA64PFR1_EL1_SSBS_MASK;
+ }
+}
+
static void elf_hwcap_fixup(void)
{
-#ifdef CONFIG_ARM64_ERRATUM_1742098
- if (cpus_have_const_cap(ARM64_WORKAROUND_1742098))
+#ifdef CONFIG_COMPAT
+ if (cpus_have_cap(ARM64_WORKAROUND_1742098))
compat_elf_hwcap2 &= ~COMPAT_HWCAP2_AES;
-#endif /* ARM64_ERRATUM_1742098 */
+#endif /* CONFIG_COMPAT */
}
#ifdef CONFIG_KVM
@@ -2219,6 +2435,22 @@ static void cpu_enable_mops(const struct arm64_cpu_capabilities *__unused)
sysreg_clear_set(sctlr_el1, 0, SCTLR_EL1_MSCEn);
}
+#ifdef CONFIG_ARM64_POE
+static void cpu_enable_poe(const struct arm64_cpu_capabilities *__unused)
+{
+ sysreg_clear_set(REG_TCR2_EL1, 0, TCR2_EL1_E0POE);
+ sysreg_clear_set(CPACR_EL1, 0, CPACR_EL1_E0POE);
+}
+#endif
+
+#ifdef CONFIG_ARM64_GCS
+static void cpu_enable_gcs(const struct arm64_cpu_capabilities *__unused)
+{
+ /* GCSPR_EL0 is always readable */
+ write_sysreg_s(GCSCRE0_EL1_nTR, SYS_GCSCRE0_EL1);
+}
+#endif
+
/* Internal helper functions to match cpu capability type */
static bool
cpucap_late_cpu_optional(const struct arm64_cpu_capabilities *cap)
@@ -2238,6 +2470,45 @@ cpucap_panic_on_conflict(const struct arm64_cpu_capabilities *cap)
return !!(cap->type & ARM64_CPUCAP_PANIC_ON_CONFLICT);
}
+static bool
+test_has_mpam(const struct arm64_cpu_capabilities *entry, int scope)
+{
+ if (!has_cpuid_feature(entry, scope))
+ return false;
+
+ /* Check firmware actually enabled MPAM on this cpu. */
+ return (read_sysreg_s(SYS_MPAM1_EL1) & MPAM1_EL1_MPAMEN);
+}
+
+static void
+cpu_enable_mpam(const struct arm64_cpu_capabilities *entry)
+{
+ /*
+ * Access by the kernel (at EL1) should use the reserved PARTID
+ * which is configured unrestricted. This avoids priority-inversion
+ * where latency sensitive tasks have to wait for a task that has
+ * been throttled to release the lock.
+ */
+ write_sysreg_s(0, SYS_MPAM1_EL1);
+}
+
+static bool
+test_has_mpam_hcr(const struct arm64_cpu_capabilities *entry, int scope)
+{
+ u64 idr = read_sanitised_ftr_reg(SYS_MPAMIDR_EL1);
+
+ return idr & MPAMIDR_EL1_HAS_HCR;
+}
+
+static bool
+test_has_gicv5_legacy(const struct arm64_cpu_capabilities *entry, int scope)
+{
+ if (!this_cpu_has_cap(ARM64_HAS_GICV5_CPUIF))
+ return false;
+
+ return !!(read_sysreg_s(SYS_ICC_IDR0_EL1) & ICC_IDR0_EL1_GCIE_LEGACY);
+}
+
static const struct arm64_cpu_capabilities arm64_features[] = {
{
.capability = ARM64_ALWAYS_BOOT,
@@ -2250,8 +2521,8 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.matches = has_always,
},
{
- .desc = "GIC system register CPU interface",
- .capability = ARM64_HAS_GIC_CPUIF_SYSREGS,
+ .desc = "GICv3 CPU interface",
+ .capability = ARM64_HAS_GICV3_CPUIF,
.type = ARM64_CPUCAP_STRICT_BOOT_CPU_FEATURE,
.matches = has_useable_gicv3_cpuif,
ARM64_CPUID_FIELDS(ID_AA64PFR0_EL1, GIC, IMP)
@@ -2299,12 +2570,6 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
},
#endif /* CONFIG_ARM64_LSE_ATOMICS */
{
- .desc = "Software prefetching using PRFM",
- .capability = ARM64_HAS_NO_HW_PREFETCH,
- .type = ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE,
- .matches = has_no_hw_prefetch,
- },
- {
.desc = "Virtualization Host Extensions",
.capability = ARM64_HAS_VIRT_HOST_EXTN,
.type = ARM64_CPUCAP_STRICT_BOOT_CPU_FEATURE,
@@ -2316,7 +2581,17 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.capability = ARM64_HAS_NESTED_VIRT,
.type = ARM64_CPUCAP_SYSTEM_FEATURE,
.matches = has_nested_virt_support,
- ARM64_CPUID_FIELDS(ID_AA64MMFR2_EL1, NV, IMP)
+ .match_list = (const struct arm64_cpu_capabilities []){
+ {
+ .matches = has_cpuid_feature,
+ ARM64_CPUID_FIELDS(ID_AA64MMFR2_EL1, NV, NV2)
+ },
+ {
+ .matches = has_cpuid_feature,
+ ARM64_CPUID_FIELDS(ID_AA64MMFR4_EL1, NV_frac, NV2_ONLY)
+ },
+ { /* Sentinel */ }
+ },
},
{
.capability = ARM64_HAS_32BIT_EL0_DO_NOT_USE,
@@ -2350,7 +2625,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.desc = "Kernel page table isolation (KPTI)",
.capability = ARM64_UNMAP_KERNEL_AT_EL0,
.type = ARM64_CPUCAP_BOOT_RESTRICTED_CPU_LOCAL_FEATURE,
- .cpu_enable = kpti_install_ng_mappings,
+ .cpu_enable = cpu_enable_kpti,
.matches = unmap_kernel_at_el0,
/*
* The ID feature fields below are used to indicate that
@@ -2360,11 +2635,11 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
ARM64_CPUID_FIELDS(ID_AA64PFR0_EL1, CSV3, IMP)
},
{
- /* FP/SIMD is not implemented */
- .capability = ARM64_HAS_NO_FPSIMD,
- .type = ARM64_CPUCAP_BOOT_RESTRICTED_CPU_LOCAL_FEATURE,
- .min_field_value = 0,
- .matches = has_no_fpsimd,
+ .capability = ARM64_HAS_FPSIMD,
+ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+ .matches = has_cpuid_feature,
+ .cpu_enable = cpu_enable_fpsimd,
+ ARM64_CPUID_FIELDS(ID_AA64PFR0_EL1, FP, IMP)
},
#ifdef CONFIG_ARM64_PMEM
{
@@ -2387,7 +2662,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.desc = "Scalable Vector Extension",
.type = ARM64_CPUCAP_SYSTEM_FEATURE,
.capability = ARM64_SVE,
- .cpu_enable = sve_kernel_enable,
+ .cpu_enable = cpu_enable_sve,
.matches = has_cpuid_feature,
ARM64_CPUID_FIELDS(ID_AA64PFR0_EL1, SVE, IMP)
},
@@ -2401,19 +2676,21 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.cpu_enable = cpu_clear_disr,
ARM64_CPUID_FIELDS(ID_AA64PFR0_EL1, RAS, IMP)
},
+ {
+ .desc = "RASv1p1 Extension Support",
+ .capability = ARM64_HAS_RASV1P1_EXTN,
+ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+ .matches = has_rasv1p1,
+ },
#endif /* CONFIG_ARM64_RAS_EXTN */
#ifdef CONFIG_ARM64_AMU_EXTN
{
- /*
- * The feature is enabled by default if CONFIG_ARM64_AMU_EXTN=y.
- * Therefore, don't provide .desc as we don't want the detection
- * message to be shown until at least one CPU is detected to
- * support the feature.
- */
+ .desc = "Activity Monitors Unit (AMU)",
.capability = ARM64_HAS_AMU_EXTN,
.type = ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE,
.matches = has_amu,
.cpu_enable = cpu_amu_enable,
+ .cpus = &amu_cpus,
ARM64_CPUID_FIELDS(ID_AA64PFR0_EL1, AMU, IMP)
},
#endif /* CONFIG_ARM64_AMU_EXTN */
@@ -2453,21 +2730,30 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
},
#ifdef CONFIG_ARM64_HW_AFDBM
{
- /*
- * Since we turn this on always, we don't want the user to
- * think that the feature is available when it may not be.
- * So hide the description.
- *
- * .desc = "Hardware pagetable Dirty Bit Management",
- *
- */
+ .desc = "Hardware dirty bit management",
.type = ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE,
.capability = ARM64_HW_DBM,
.matches = has_hw_dbm,
.cpu_enable = cpu_enable_hw_dbm,
+ .cpus = &dbm_cpus,
ARM64_CPUID_FIELDS(ID_AA64MMFR1_EL1, HAFDBS, DBM)
},
#endif
+#ifdef CONFIG_ARM64_HAFT
+ {
+ .desc = "Hardware managed Access Flag for Table Descriptors",
+ /*
+ * Contrary to the page/block access flag, the table access flag
+ * cannot be emulated in software (no access fault will occur).
+ * Therefore this should be used only if it's supported system
+ * wide.
+ */
+ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+ .capability = ARM64_HAFT,
+ .matches = has_cpuid_feature,
+ ARM64_CPUID_FIELDS(ID_AA64MMFR1_EL1, HAFDBS, HAFT)
+ },
+#endif
{
.desc = "CRC32 instructions",
.capability = ARM64_HAS_CRC32,
@@ -2572,6 +2858,15 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.matches = has_gic_prio_relaxed_sync,
},
#endif
+ {
+ /*
+ * Depends on having GICv3
+ */
+ .desc = "ICV_DIR_EL1 trapping",
+ .capability = ARM64_HAS_ICH_HCR_EL2_TDIR,
+ .type = ARM64_CPUCAP_EARLY_LOCAL_CPU_FEATURE,
+ .matches = can_trap_icv_dir_el1,
+ },
#ifdef CONFIG_ARM64_E0PD
{
.desc = "E0PD",
@@ -2619,6 +2914,20 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.matches = has_cpuid_feature,
ARM64_CPUID_FIELDS(ID_AA64PFR1_EL1, MTE, MTE3)
},
+ {
+ .desc = "FAR on MTE Tag Check Fault",
+ .capability = ARM64_MTE_FAR,
+ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+ .matches = has_cpuid_feature,
+ ARM64_CPUID_FIELDS(ID_AA64PFR2_EL1, MTEFAR, IMP)
+ },
+ {
+ .desc = "Store Only MTE Tag Check",
+ .capability = ARM64_MTE_STORE_ONLY,
+ .type = ARM64_CPUCAP_BOOT_CPU_FEATURE,
+ .matches = has_cpuid_feature,
+ ARM64_CPUID_FIELDS(ID_AA64PFR2_EL1, MTESTOREONLY, IMP)
+ },
#endif /* CONFIG_ARM64_MTE */
{
.desc = "RCpc load-acquire (LDAPR)",
@@ -2627,13 +2936,27 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.matches = has_cpuid_feature,
ARM64_CPUID_FIELDS(ID_AA64ISAR1_EL1, LRCPC, IMP)
},
+ {
+ .desc = "Fine Grained Traps",
+ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+ .capability = ARM64_HAS_FGT,
+ .matches = has_cpuid_feature,
+ ARM64_CPUID_FIELDS(ID_AA64MMFR0_EL1, FGT, IMP)
+ },
+ {
+ .desc = "Fine Grained Traps 2",
+ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+ .capability = ARM64_HAS_FGT2,
+ .matches = has_cpuid_feature,
+ ARM64_CPUID_FIELDS(ID_AA64MMFR0_EL1, FGT, FGT2)
+ },
#ifdef CONFIG_ARM64_SME
{
.desc = "Scalable Matrix Extension",
.type = ARM64_CPUCAP_SYSTEM_FEATURE,
.capability = ARM64_SME,
.matches = has_cpuid_feature,
- .cpu_enable = sme_kernel_enable,
+ .cpu_enable = cpu_enable_sme,
ARM64_CPUID_FIELDS(ID_AA64PFR1_EL1, SME, IMP)
},
/* FA64 should be sorted after the base SME capability */
@@ -2642,7 +2965,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.type = ARM64_CPUCAP_SYSTEM_FEATURE,
.capability = ARM64_SME_FA64,
.matches = has_cpuid_feature,
- .cpu_enable = fa64_kernel_enable,
+ .cpu_enable = cpu_enable_fa64,
ARM64_CPUID_FIELDS(ID_AA64SMFR0_EL1, FA64, IMP)
},
{
@@ -2650,7 +2973,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.type = ARM64_CPUCAP_SYSTEM_FEATURE,
.capability = ARM64_SME2,
.matches = has_cpuid_feature,
- .cpu_enable = sme2_kernel_enable,
+ .cpu_enable = cpu_enable_sme2,
ARM64_CPUID_FIELDS(ID_AA64PFR1_EL1, SME, SME2)
},
#endif /* CONFIG_ARM64_SME */
@@ -2711,6 +3034,120 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.matches = has_cpuid_feature,
ARM64_CPUID_FIELDS(ID_AA64MMFR2_EL1, EVT, IMP)
},
+ {
+ .desc = "BBM Level 2 without TLB conflict abort",
+ .capability = ARM64_HAS_BBML2_NOABORT,
+ .type = ARM64_CPUCAP_EARLY_LOCAL_CPU_FEATURE,
+ .matches = has_bbml2_noabort,
+ },
+ {
+ .desc = "52-bit Virtual Addressing for KVM (LPA2)",
+ .capability = ARM64_HAS_LPA2,
+ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+ .matches = has_lpa2,
+ },
+ {
+ .desc = "FPMR",
+ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+ .capability = ARM64_HAS_FPMR,
+ .matches = has_cpuid_feature,
+ .cpu_enable = cpu_enable_fpmr,
+ ARM64_CPUID_FIELDS(ID_AA64PFR2_EL1, FPMR, IMP)
+ },
+#ifdef CONFIG_ARM64_VA_BITS_52
+ {
+ .capability = ARM64_HAS_VA52,
+ .type = ARM64_CPUCAP_BOOT_CPU_FEATURE,
+ .matches = has_cpuid_feature,
+#ifdef CONFIG_ARM64_64K_PAGES
+ .desc = "52-bit Virtual Addressing (LVA)",
+ ARM64_CPUID_FIELDS(ID_AA64MMFR2_EL1, VARange, 52)
+#else
+ .desc = "52-bit Virtual Addressing (LPA2)",
+#ifdef CONFIG_ARM64_4K_PAGES
+ ARM64_CPUID_FIELDS(ID_AA64MMFR0_EL1, TGRAN4, 52_BIT)
+#else
+ ARM64_CPUID_FIELDS(ID_AA64MMFR0_EL1, TGRAN16, 52_BIT)
+#endif
+#endif
+ },
+#endif
+ {
+ .desc = "Memory Partitioning And Monitoring",
+ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+ .capability = ARM64_MPAM,
+ .matches = test_has_mpam,
+ .cpu_enable = cpu_enable_mpam,
+ ARM64_CPUID_FIELDS(ID_AA64PFR0_EL1, MPAM, 1)
+ },
+ {
+ .desc = "Memory Partitioning And Monitoring Virtualisation",
+ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+ .capability = ARM64_MPAM_HCR,
+ .matches = test_has_mpam_hcr,
+ },
+ {
+ .desc = "NV1",
+ .capability = ARM64_HAS_HCR_NV1,
+ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+ .matches = has_nv1,
+ ARM64_CPUID_FIELDS_NEG(ID_AA64MMFR4_EL1, E2H0, NI_NV1)
+ },
+#ifdef CONFIG_ARM64_POE
+ {
+ .desc = "Stage-1 Permission Overlay Extension (S1POE)",
+ .capability = ARM64_HAS_S1POE,
+ .type = ARM64_CPUCAP_BOOT_CPU_FEATURE,
+ .matches = has_cpuid_feature,
+ .cpu_enable = cpu_enable_poe,
+ ARM64_CPUID_FIELDS(ID_AA64MMFR3_EL1, S1POE, IMP)
+ },
+#endif
+#ifdef CONFIG_ARM64_GCS
+ {
+ .desc = "Guarded Control Stack (GCS)",
+ .capability = ARM64_HAS_GCS,
+ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+ .cpu_enable = cpu_enable_gcs,
+ .matches = has_cpuid_feature,
+ ARM64_CPUID_FIELDS(ID_AA64PFR1_EL1, GCS, IMP)
+ },
+#endif
+#ifdef CONFIG_HW_PERF_EVENTS
+ {
+ .desc = "PMUv3",
+ .capability = ARM64_HAS_PMUV3,
+ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+ .matches = has_pmuv3,
+ },
+#endif
+ {
+ .desc = "SCTLR2",
+ .capability = ARM64_HAS_SCTLR2,
+ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+ .matches = has_cpuid_feature,
+ ARM64_CPUID_FIELDS(ID_AA64MMFR3_EL1, SCTLRX, IMP)
+ },
+ {
+ .desc = "GICv5 CPU interface",
+ .type = ARM64_CPUCAP_STRICT_BOOT_CPU_FEATURE,
+ .capability = ARM64_HAS_GICV5_CPUIF,
+ .matches = has_cpuid_feature,
+ ARM64_CPUID_FIELDS(ID_AA64PFR2_EL1, GCIE, IMP)
+ },
+ {
+ .desc = "GICv5 Legacy vCPU interface",
+ .type = ARM64_CPUCAP_EARLY_LOCAL_CPU_FEATURE,
+ .capability = ARM64_HAS_GICV5_LEGACY,
+ .matches = test_has_gicv5_legacy,
+ },
+ {
+ .desc = "XNX",
+ .capability = ARM64_HAS_XNX,
+ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+ .matches = has_cpuid_feature,
+ ARM64_CPUID_FIELDS(ID_AA64MMFR1_EL1, XNX, IMP)
+ },
{},
};
@@ -2743,6 +3180,13 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.matches = match, \
}
+#define HWCAP_CAP_MATCH_ID(match, reg, field, min_value, cap_type, cap) \
+ { \
+ __HWCAP_CAP(#cap, cap_type, cap) \
+ HWCAP_CPUID_MATCH(reg, field, min_value) \
+ .matches = match, \
+ }
+
#ifdef CONFIG_ARM64_PTR_AUTH
static const struct arm64_cpu_capabilities ptr_auth_hwcap_addr_matches[] = {
{
@@ -2771,6 +3215,20 @@ static const struct arm64_cpu_capabilities ptr_auth_hwcap_gen_matches[] = {
};
#endif
+#ifdef CONFIG_ARM64_SVE
+static bool has_sve_feature(const struct arm64_cpu_capabilities *cap, int scope)
+{
+ return system_supports_sve() && has_user_cpuid_feature(cap, scope);
+}
+#endif
+
+#ifdef CONFIG_ARM64_SME
+static bool has_sme_feature(const struct arm64_cpu_capabilities *cap, int scope)
+{
+ return system_supports_sme() && has_user_cpuid_feature(cap, scope);
+}
+#endif
+
static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
HWCAP_CAP(ID_AA64ISAR0_EL1, AES, PMULL, CAP_HWCAP, KERNEL_HWCAP_PMULL),
HWCAP_CAP(ID_AA64ISAR0_EL1, AES, AES, CAP_HWCAP, KERNEL_HWCAP_AES),
@@ -2779,6 +3237,7 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
HWCAP_CAP(ID_AA64ISAR0_EL1, SHA2, SHA512, CAP_HWCAP, KERNEL_HWCAP_SHA512),
HWCAP_CAP(ID_AA64ISAR0_EL1, CRC32, IMP, CAP_HWCAP, KERNEL_HWCAP_CRC32),
HWCAP_CAP(ID_AA64ISAR0_EL1, ATOMIC, IMP, CAP_HWCAP, KERNEL_HWCAP_ATOMICS),
+ HWCAP_CAP(ID_AA64ISAR0_EL1, ATOMIC, FEAT_LSE128, CAP_HWCAP, KERNEL_HWCAP_LSE128),
HWCAP_CAP(ID_AA64ISAR0_EL1, RDM, IMP, CAP_HWCAP, KERNEL_HWCAP_ASIMDRDM),
HWCAP_CAP(ID_AA64ISAR0_EL1, SHA3, IMP, CAP_HWCAP, KERNEL_HWCAP_SHA3),
HWCAP_CAP(ID_AA64ISAR0_EL1, SM3, IMP, CAP_HWCAP, KERNEL_HWCAP_SM3),
@@ -2788,38 +3247,53 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
HWCAP_CAP(ID_AA64ISAR0_EL1, TS, FLAGM, CAP_HWCAP, KERNEL_HWCAP_FLAGM),
HWCAP_CAP(ID_AA64ISAR0_EL1, TS, FLAGM2, CAP_HWCAP, KERNEL_HWCAP_FLAGM2),
HWCAP_CAP(ID_AA64ISAR0_EL1, RNDR, IMP, CAP_HWCAP, KERNEL_HWCAP_RNG),
+ HWCAP_CAP(ID_AA64ISAR3_EL1, FPRCVT, IMP, CAP_HWCAP, KERNEL_HWCAP_FPRCVT),
HWCAP_CAP(ID_AA64PFR0_EL1, FP, IMP, CAP_HWCAP, KERNEL_HWCAP_FP),
HWCAP_CAP(ID_AA64PFR0_EL1, FP, FP16, CAP_HWCAP, KERNEL_HWCAP_FPHP),
HWCAP_CAP(ID_AA64PFR0_EL1, AdvSIMD, IMP, CAP_HWCAP, KERNEL_HWCAP_ASIMD),
HWCAP_CAP(ID_AA64PFR0_EL1, AdvSIMD, FP16, CAP_HWCAP, KERNEL_HWCAP_ASIMDHP),
HWCAP_CAP(ID_AA64PFR0_EL1, DIT, IMP, CAP_HWCAP, KERNEL_HWCAP_DIT),
+ HWCAP_CAP(ID_AA64PFR2_EL1, FPMR, IMP, CAP_HWCAP, KERNEL_HWCAP_FPMR),
HWCAP_CAP(ID_AA64ISAR1_EL1, DPB, IMP, CAP_HWCAP, KERNEL_HWCAP_DCPOP),
HWCAP_CAP(ID_AA64ISAR1_EL1, DPB, DPB2, CAP_HWCAP, KERNEL_HWCAP_DCPODP),
HWCAP_CAP(ID_AA64ISAR1_EL1, JSCVT, IMP, CAP_HWCAP, KERNEL_HWCAP_JSCVT),
HWCAP_CAP(ID_AA64ISAR1_EL1, FCMA, IMP, CAP_HWCAP, KERNEL_HWCAP_FCMA),
HWCAP_CAP(ID_AA64ISAR1_EL1, LRCPC, IMP, CAP_HWCAP, KERNEL_HWCAP_LRCPC),
HWCAP_CAP(ID_AA64ISAR1_EL1, LRCPC, LRCPC2, CAP_HWCAP, KERNEL_HWCAP_ILRCPC),
+ HWCAP_CAP(ID_AA64ISAR1_EL1, LRCPC, LRCPC3, CAP_HWCAP, KERNEL_HWCAP_LRCPC3),
HWCAP_CAP(ID_AA64ISAR1_EL1, FRINTTS, IMP, CAP_HWCAP, KERNEL_HWCAP_FRINT),
HWCAP_CAP(ID_AA64ISAR1_EL1, SB, IMP, CAP_HWCAP, KERNEL_HWCAP_SB),
HWCAP_CAP(ID_AA64ISAR1_EL1, BF16, IMP, CAP_HWCAP, KERNEL_HWCAP_BF16),
HWCAP_CAP(ID_AA64ISAR1_EL1, BF16, EBF16, CAP_HWCAP, KERNEL_HWCAP_EBF16),
HWCAP_CAP(ID_AA64ISAR1_EL1, DGH, IMP, CAP_HWCAP, KERNEL_HWCAP_DGH),
HWCAP_CAP(ID_AA64ISAR1_EL1, I8MM, IMP, CAP_HWCAP, KERNEL_HWCAP_I8MM),
+ HWCAP_CAP(ID_AA64ISAR2_EL1, LUT, IMP, CAP_HWCAP, KERNEL_HWCAP_LUT),
+ HWCAP_CAP(ID_AA64ISAR3_EL1, FAMINMAX, IMP, CAP_HWCAP, KERNEL_HWCAP_FAMINMAX),
+ HWCAP_CAP(ID_AA64ISAR3_EL1, LSFE, IMP, CAP_HWCAP, KERNEL_HWCAP_LSFE),
HWCAP_CAP(ID_AA64MMFR2_EL1, AT, IMP, CAP_HWCAP, KERNEL_HWCAP_USCAT),
#ifdef CONFIG_ARM64_SVE
HWCAP_CAP(ID_AA64PFR0_EL1, SVE, IMP, CAP_HWCAP, KERNEL_HWCAP_SVE),
- HWCAP_CAP(ID_AA64ZFR0_EL1, SVEver, SVE2p1, CAP_HWCAP, KERNEL_HWCAP_SVE2P1),
- HWCAP_CAP(ID_AA64ZFR0_EL1, SVEver, SVE2, CAP_HWCAP, KERNEL_HWCAP_SVE2),
- HWCAP_CAP(ID_AA64ZFR0_EL1, AES, IMP, CAP_HWCAP, KERNEL_HWCAP_SVEAES),
- HWCAP_CAP(ID_AA64ZFR0_EL1, AES, PMULL128, CAP_HWCAP, KERNEL_HWCAP_SVEPMULL),
- HWCAP_CAP(ID_AA64ZFR0_EL1, BitPerm, IMP, CAP_HWCAP, KERNEL_HWCAP_SVEBITPERM),
- HWCAP_CAP(ID_AA64ZFR0_EL1, BF16, IMP, CAP_HWCAP, KERNEL_HWCAP_SVEBF16),
- HWCAP_CAP(ID_AA64ZFR0_EL1, BF16, EBF16, CAP_HWCAP, KERNEL_HWCAP_SVE_EBF16),
- HWCAP_CAP(ID_AA64ZFR0_EL1, SHA3, IMP, CAP_HWCAP, KERNEL_HWCAP_SVESHA3),
- HWCAP_CAP(ID_AA64ZFR0_EL1, SM4, IMP, CAP_HWCAP, KERNEL_HWCAP_SVESM4),
- HWCAP_CAP(ID_AA64ZFR0_EL1, I8MM, IMP, CAP_HWCAP, KERNEL_HWCAP_SVEI8MM),
- HWCAP_CAP(ID_AA64ZFR0_EL1, F32MM, IMP, CAP_HWCAP, KERNEL_HWCAP_SVEF32MM),
- HWCAP_CAP(ID_AA64ZFR0_EL1, F64MM, IMP, CAP_HWCAP, KERNEL_HWCAP_SVEF64MM),
+ HWCAP_CAP_MATCH_ID(has_sve_feature, ID_AA64ZFR0_EL1, SVEver, SVE2p2, CAP_HWCAP, KERNEL_HWCAP_SVE2P2),
+ HWCAP_CAP_MATCH_ID(has_sve_feature, ID_AA64ZFR0_EL1, SVEver, SVE2p1, CAP_HWCAP, KERNEL_HWCAP_SVE2P1),
+ HWCAP_CAP_MATCH_ID(has_sve_feature, ID_AA64ZFR0_EL1, SVEver, SVE2, CAP_HWCAP, KERNEL_HWCAP_SVE2),
+ HWCAP_CAP_MATCH_ID(has_sve_feature, ID_AA64ZFR0_EL1, AES, IMP, CAP_HWCAP, KERNEL_HWCAP_SVEAES),
+ HWCAP_CAP_MATCH_ID(has_sve_feature, ID_AA64ZFR0_EL1, AES, PMULL128, CAP_HWCAP, KERNEL_HWCAP_SVEPMULL),
+ HWCAP_CAP_MATCH_ID(has_sve_feature, ID_AA64ZFR0_EL1, AES, AES2, CAP_HWCAP, KERNEL_HWCAP_SVE_AES2),
+ HWCAP_CAP_MATCH_ID(has_sve_feature, ID_AA64ZFR0_EL1, BitPerm, IMP, CAP_HWCAP, KERNEL_HWCAP_SVEBITPERM),
+ HWCAP_CAP_MATCH_ID(has_sve_feature, ID_AA64ZFR0_EL1, B16B16, IMP, CAP_HWCAP, KERNEL_HWCAP_SVE_B16B16),
+ HWCAP_CAP_MATCH_ID(has_sve_feature, ID_AA64ZFR0_EL1, B16B16, BFSCALE, CAP_HWCAP, KERNEL_HWCAP_SVE_BFSCALE),
+ HWCAP_CAP_MATCH_ID(has_sve_feature, ID_AA64ZFR0_EL1, BF16, IMP, CAP_HWCAP, KERNEL_HWCAP_SVEBF16),
+ HWCAP_CAP_MATCH_ID(has_sve_feature, ID_AA64ZFR0_EL1, BF16, EBF16, CAP_HWCAP, KERNEL_HWCAP_SVE_EBF16),
+ HWCAP_CAP_MATCH_ID(has_sve_feature, ID_AA64ZFR0_EL1, SHA3, IMP, CAP_HWCAP, KERNEL_HWCAP_SVESHA3),
+ HWCAP_CAP_MATCH_ID(has_sve_feature, ID_AA64ZFR0_EL1, SM4, IMP, CAP_HWCAP, KERNEL_HWCAP_SVESM4),
+ HWCAP_CAP_MATCH_ID(has_sve_feature, ID_AA64ZFR0_EL1, I8MM, IMP, CAP_HWCAP, KERNEL_HWCAP_SVEI8MM),
+ HWCAP_CAP_MATCH_ID(has_sve_feature, ID_AA64ZFR0_EL1, F32MM, IMP, CAP_HWCAP, KERNEL_HWCAP_SVEF32MM),
+ HWCAP_CAP_MATCH_ID(has_sve_feature, ID_AA64ZFR0_EL1, F64MM, IMP, CAP_HWCAP, KERNEL_HWCAP_SVEF64MM),
+ HWCAP_CAP_MATCH_ID(has_sve_feature, ID_AA64ZFR0_EL1, F16MM, IMP, CAP_HWCAP, KERNEL_HWCAP_SVE_F16MM),
+ HWCAP_CAP_MATCH_ID(has_sve_feature, ID_AA64ZFR0_EL1, EltPerm, IMP, CAP_HWCAP, KERNEL_HWCAP_SVE_ELTPERM),
+#endif
+#ifdef CONFIG_ARM64_GCS
+ HWCAP_CAP(ID_AA64PFR1_EL1, GCS, IMP, CAP_HWCAP, KERNEL_HWCAP_GCS),
#endif
HWCAP_CAP(ID_AA64PFR1_EL1, SSBS, SSBS2, CAP_HWCAP, KERNEL_HWCAP_SSBS),
#ifdef CONFIG_ARM64_BTI
@@ -2832,10 +3306,13 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
#ifdef CONFIG_ARM64_MTE
HWCAP_CAP(ID_AA64PFR1_EL1, MTE, MTE2, CAP_HWCAP, KERNEL_HWCAP_MTE),
HWCAP_CAP(ID_AA64PFR1_EL1, MTE, MTE3, CAP_HWCAP, KERNEL_HWCAP_MTE3),
+ HWCAP_CAP(ID_AA64PFR2_EL1, MTEFAR, IMP, CAP_HWCAP, KERNEL_HWCAP_MTE_FAR),
+ HWCAP_CAP(ID_AA64PFR2_EL1, MTESTOREONLY, IMP, CAP_HWCAP , KERNEL_HWCAP_MTE_STORE_ONLY),
#endif /* CONFIG_ARM64_MTE */
HWCAP_CAP(ID_AA64MMFR0_EL1, ECV, IMP, CAP_HWCAP, KERNEL_HWCAP_ECV),
HWCAP_CAP(ID_AA64MMFR1_EL1, AFP, IMP, CAP_HWCAP, KERNEL_HWCAP_AFP),
HWCAP_CAP(ID_AA64ISAR2_EL1, CSSC, IMP, CAP_HWCAP, KERNEL_HWCAP_CSSC),
+ HWCAP_CAP(ID_AA64ISAR2_EL1, CSSC, CMPBR, CAP_HWCAP, KERNEL_HWCAP_CMPBR),
HWCAP_CAP(ID_AA64ISAR2_EL1, RPRFM, IMP, CAP_HWCAP, KERNEL_HWCAP_RPRFM),
HWCAP_CAP(ID_AA64ISAR2_EL1, RPRES, IMP, CAP_HWCAP, KERNEL_HWCAP_RPRES),
HWCAP_CAP(ID_AA64ISAR2_EL1, WFxT, IMP, CAP_HWCAP, KERNEL_HWCAP_WFXT),
@@ -2843,20 +3320,43 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
HWCAP_CAP(ID_AA64ISAR2_EL1, BC, IMP, CAP_HWCAP, KERNEL_HWCAP_HBC),
#ifdef CONFIG_ARM64_SME
HWCAP_CAP(ID_AA64PFR1_EL1, SME, IMP, CAP_HWCAP, KERNEL_HWCAP_SME),
- HWCAP_CAP(ID_AA64SMFR0_EL1, FA64, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_FA64),
- HWCAP_CAP(ID_AA64SMFR0_EL1, SMEver, SME2p1, CAP_HWCAP, KERNEL_HWCAP_SME2P1),
- HWCAP_CAP(ID_AA64SMFR0_EL1, SMEver, SME2, CAP_HWCAP, KERNEL_HWCAP_SME2),
- HWCAP_CAP(ID_AA64SMFR0_EL1, I16I64, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_I16I64),
- HWCAP_CAP(ID_AA64SMFR0_EL1, F64F64, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F64F64),
- HWCAP_CAP(ID_AA64SMFR0_EL1, I16I32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_I16I32),
- HWCAP_CAP(ID_AA64SMFR0_EL1, B16B16, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_B16B16),
- HWCAP_CAP(ID_AA64SMFR0_EL1, F16F16, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F16F16),
- HWCAP_CAP(ID_AA64SMFR0_EL1, I8I32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_I8I32),
- HWCAP_CAP(ID_AA64SMFR0_EL1, F16F32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F16F32),
- HWCAP_CAP(ID_AA64SMFR0_EL1, B16F32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_B16F32),
- HWCAP_CAP(ID_AA64SMFR0_EL1, BI32I32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_BI32I32),
- HWCAP_CAP(ID_AA64SMFR0_EL1, F32F32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F32F32),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, FA64, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_FA64),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, LUTv2, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_LUTV2),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, SMEver, SME2p2, CAP_HWCAP, KERNEL_HWCAP_SME2P2),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, SMEver, SME2p1, CAP_HWCAP, KERNEL_HWCAP_SME2P1),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, SMEver, SME2, CAP_HWCAP, KERNEL_HWCAP_SME2),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, I16I64, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_I16I64),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, F64F64, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F64F64),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, I16I32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_I16I32),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, B16B16, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_B16B16),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, F16F16, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F16F16),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, F8F16, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F8F16),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, F8F32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F8F32),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, I8I32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_I8I32),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, F16F32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F16F32),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, B16F32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_B16F32),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, BI32I32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_BI32I32),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, F32F32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F32F32),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, SF8FMA, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SF8FMA),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, SF8DP4, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SF8DP4),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, SF8DP2, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SF8DP2),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, SBitPerm, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SBITPERM),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, AES, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_AES),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, SFEXPA, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SFEXPA),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, STMOP, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_STMOP),
+ HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, SMOP4, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SMOP4),
#endif /* CONFIG_ARM64_SME */
+ HWCAP_CAP(ID_AA64FPFR0_EL1, F8CVT, IMP, CAP_HWCAP, KERNEL_HWCAP_F8CVT),
+ HWCAP_CAP(ID_AA64FPFR0_EL1, F8FMA, IMP, CAP_HWCAP, KERNEL_HWCAP_F8FMA),
+ HWCAP_CAP(ID_AA64FPFR0_EL1, F8DP4, IMP, CAP_HWCAP, KERNEL_HWCAP_F8DP4),
+ HWCAP_CAP(ID_AA64FPFR0_EL1, F8DP2, IMP, CAP_HWCAP, KERNEL_HWCAP_F8DP2),
+ HWCAP_CAP(ID_AA64FPFR0_EL1, F8MM8, IMP, CAP_HWCAP, KERNEL_HWCAP_F8MM8),
+ HWCAP_CAP(ID_AA64FPFR0_EL1, F8MM4, IMP, CAP_HWCAP, KERNEL_HWCAP_F8MM4),
+ HWCAP_CAP(ID_AA64FPFR0_EL1, F8E4M3, IMP, CAP_HWCAP, KERNEL_HWCAP_F8E4M3),
+ HWCAP_CAP(ID_AA64FPFR0_EL1, F8E5M2, IMP, CAP_HWCAP, KERNEL_HWCAP_F8E5M2),
+#ifdef CONFIG_ARM64_POE
+ HWCAP_CAP(ID_AA64MMFR3_EL1, S1POE, IMP, CAP_HWCAP, KERNEL_HWCAP_POE),
+#endif
{},
};
@@ -2967,18 +3467,49 @@ static void update_cpu_capabilities(u16 scope_mask)
scope_mask &= ARM64_CPUCAP_SCOPE_MASK;
for (i = 0; i < ARM64_NCAPS; i++) {
+ bool match_all = false;
+ bool caps_set = false;
+ bool boot_cpu = false;
+
caps = cpucap_ptrs[i];
- if (!caps || !(caps->type & scope_mask) ||
- cpus_have_cap(caps->capability) ||
- !caps->matches(caps, cpucap_default_scope(caps)))
+ if (!caps || !(caps->type & scope_mask))
continue;
- if (caps->desc)
+ match_all = cpucap_match_all_early_cpus(caps);
+ caps_set = cpus_have_cap(caps->capability);
+ boot_cpu = scope_mask & SCOPE_BOOT_CPU;
+
+ /*
+ * Unless it's a match-all CPUs feature, avoid probing if
+ * already detected.
+ */
+ if (!match_all && caps_set)
+ continue;
+
+ /*
+ * A match-all CPUs capability is only set when probing the
+ * boot CPU. It may be cleared subsequently if not detected on
+ * secondary ones.
+ */
+ if (match_all && !caps_set && !boot_cpu)
+ continue;
+
+ if (!caps->matches(caps, cpucap_default_scope(caps))) {
+ if (match_all)
+ __clear_bit(caps->capability, system_cpucaps);
+ continue;
+ }
+
+ /*
+ * Match-all CPUs capabilities are logged later when the
+ * system capabilities are finalised.
+ */
+ if (!match_all && caps->desc && !caps->cpus)
pr_info("detected: %s\n", caps->desc);
__set_bit(caps->capability, system_cpucaps);
- if ((scope_mask & SCOPE_BOOT_CPU) && (caps->type & SCOPE_BOOT_CPU))
+ if (boot_cpu && (caps->type & SCOPE_BOOT_CPU))
set_bit(caps->capability, boot_cpucaps);
}
}
@@ -3021,13 +3552,9 @@ static void __init enable_cpu_capabilities(u16 scope_mask)
boot_scope = !!(scope_mask & SCOPE_BOOT_CPU);
for (i = 0; i < ARM64_NCAPS; i++) {
- unsigned int num;
-
caps = cpucap_ptrs[i];
- if (!caps || !(caps->type & scope_mask))
- continue;
- num = caps->capability;
- if (!cpus_have_cap(num))
+ if (!caps || !(caps->type & scope_mask) ||
+ !cpus_have_cap(caps->capability))
continue;
if (boot_scope && caps->cpu_enable)
@@ -3145,36 +3672,28 @@ static void verify_local_elf_hwcaps(void)
static void verify_sve_features(void)
{
- u64 safe_zcr = read_sanitised_ftr_reg(SYS_ZCR_EL1);
- u64 zcr = read_zcr_features();
+ unsigned long cpacr = cpacr_save_enable_kernel_sve();
- unsigned int safe_len = safe_zcr & ZCR_ELx_LEN_MASK;
- unsigned int len = zcr & ZCR_ELx_LEN_MASK;
-
- if (len < safe_len || vec_verify_vq_map(ARM64_VEC_SVE)) {
+ if (vec_verify_vq_map(ARM64_VEC_SVE)) {
pr_crit("CPU%d: SVE: vector length support mismatch\n",
smp_processor_id());
cpu_die_early();
}
- /* Add checks on other ZCR bits here if necessary */
+ cpacr_restore(cpacr);
}
static void verify_sme_features(void)
{
- u64 safe_smcr = read_sanitised_ftr_reg(SYS_SMCR_EL1);
- u64 smcr = read_smcr_features();
-
- unsigned int safe_len = safe_smcr & SMCR_ELx_LEN_MASK;
- unsigned int len = smcr & SMCR_ELx_LEN_MASK;
+ unsigned long cpacr = cpacr_save_enable_kernel_sme();
- if (len < safe_len || vec_verify_vq_map(ARM64_VEC_SME)) {
+ if (vec_verify_vq_map(ARM64_VEC_SME)) {
pr_crit("CPU%d: SME: vector length support mismatch\n",
smp_processor_id());
cpu_die_early();
}
- /* Add checks on other SMCR bits here if necessary */
+ cpacr_restore(cpacr);
}
static void verify_hyp_capabilities(void)
@@ -3187,7 +3706,7 @@ static void verify_hyp_capabilities(void)
return;
safe_mmfr1 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1);
- mmfr0 = read_cpuid(ID_AA64MMFR0_EL1);
+ mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
mmfr1 = read_cpuid(ID_AA64MMFR1_EL1);
/* Verify VMID bits */
@@ -3208,6 +3727,36 @@ static void verify_hyp_capabilities(void)
}
}
+static void verify_mpam_capabilities(void)
+{
+ u64 cpu_idr = read_cpuid(ID_AA64PFR0_EL1);
+ u64 sys_idr = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1);
+ u16 cpu_partid_max, cpu_pmg_max, sys_partid_max, sys_pmg_max;
+
+ if (FIELD_GET(ID_AA64PFR0_EL1_MPAM_MASK, cpu_idr) !=
+ FIELD_GET(ID_AA64PFR0_EL1_MPAM_MASK, sys_idr)) {
+ pr_crit("CPU%d: MPAM version mismatch\n", smp_processor_id());
+ cpu_die_early();
+ }
+
+ cpu_idr = read_cpuid(MPAMIDR_EL1);
+ sys_idr = read_sanitised_ftr_reg(SYS_MPAMIDR_EL1);
+ if (FIELD_GET(MPAMIDR_EL1_HAS_HCR, cpu_idr) !=
+ FIELD_GET(MPAMIDR_EL1_HAS_HCR, sys_idr)) {
+ pr_crit("CPU%d: Missing MPAM HCR\n", smp_processor_id());
+ cpu_die_early();
+ }
+
+ cpu_partid_max = FIELD_GET(MPAMIDR_EL1_PARTID_MAX, cpu_idr);
+ cpu_pmg_max = FIELD_GET(MPAMIDR_EL1_PMG_MAX, cpu_idr);
+ sys_partid_max = FIELD_GET(MPAMIDR_EL1_PARTID_MAX, sys_idr);
+ sys_pmg_max = FIELD_GET(MPAMIDR_EL1_PMG_MAX, sys_idr);
+ if (cpu_partid_max < sys_partid_max || cpu_pmg_max < sys_pmg_max) {
+ pr_crit("CPU%d: MPAM PARTID/PMG max values are mismatched\n", smp_processor_id());
+ cpu_die_early();
+ }
+}
+
/*
* Run through the enabled system capabilities and enable() it on this CPU.
* The capabilities were decided based on the available CPUs at the boot time.
@@ -3234,6 +3783,9 @@ static void verify_local_cpu_capabilities(void)
if (is_hyp_mode_available())
verify_hyp_capabilities();
+
+ if (system_supports_mpam())
+ verify_mpam_capabilities();
}
void check_local_cpu_capabilities(void)
@@ -3256,14 +3808,6 @@ void check_local_cpu_capabilities(void)
verify_local_cpu_capabilities();
}
-static void __init setup_boot_cpu_capabilities(void)
-{
- /* Detect capabilities with either SCOPE_BOOT_CPU or SCOPE_LOCAL_CPU */
- update_cpu_capabilities(SCOPE_BOOT_CPU | SCOPE_LOCAL_CPU);
- /* Enable the SCOPE_BOOT_CPU capabilities alone right away */
- enable_cpu_capabilities(SCOPE_BOOT_CPU);
-}
-
bool this_cpu_has_cap(unsigned int n)
{
if (!WARN_ON(preemptible()) && n < ARM64_NCAPS) {
@@ -3281,7 +3825,6 @@ EXPORT_SYMBOL_GPL(this_cpu_has_cap);
* This helper function is used in a narrow window when,
* - The system wide safe registers are set with all the SMP CPUs and,
* - The SYSTEM_FEATURE system_cpucaps may not have been set.
- * In all other cases cpus_have_{const_}cap() should be used.
*/
static bool __maybe_unused __system_matches_cap(unsigned int n)
{
@@ -3320,46 +3863,117 @@ unsigned long cpu_get_elf_hwcap2(void)
return elf_hwcap[1];
}
+unsigned long cpu_get_elf_hwcap3(void)
+{
+ return elf_hwcap[2];
+}
+
+static void __init setup_boot_cpu_capabilities(void)
+{
+ kvm_arm_target_impl_cpu_init();
+ /*
+ * The boot CPU's feature register values have been recorded. Detect
+ * boot cpucaps and local cpucaps for the boot CPU, then enable and
+ * patch alternatives for the available boot cpucaps.
+ */
+ update_cpu_capabilities(SCOPE_BOOT_CPU | SCOPE_LOCAL_CPU);
+ enable_cpu_capabilities(SCOPE_BOOT_CPU);
+ apply_boot_alternatives();
+}
+
+void __init setup_boot_cpu_features(void)
+{
+ /*
+ * Initialize the indirect array of CPU capabilities pointers before we
+ * handle the boot CPU.
+ */
+ init_cpucap_indirect_list();
+
+ /*
+ * Detect broken pseudo-NMI. Must be called _before_ the call to
+ * setup_boot_cpu_capabilities() since it interacts with
+ * can_use_gic_priorities().
+ */
+ detect_system_supports_pseudo_nmi();
+
+ setup_boot_cpu_capabilities();
+}
+
static void __init setup_system_capabilities(void)
{
/*
- * We have finalised the system-wide safe feature
- * registers, finalise the capabilities that depend
- * on it. Also enable all the available capabilities,
- * that are not enabled already.
+ * The system-wide safe feature register values have been finalized.
+ * Detect, enable, and patch alternatives for the available system
+ * cpucaps.
*/
update_cpu_capabilities(SCOPE_SYSTEM);
enable_cpu_capabilities(SCOPE_ALL & ~SCOPE_BOOT_CPU);
-}
+ apply_alternatives_all();
-void __init setup_cpu_features(void)
-{
- u32 cwg;
+ for (int i = 0; i < ARM64_NCAPS; i++) {
+ const struct arm64_cpu_capabilities *caps = cpucap_ptrs[i];
- setup_system_capabilities();
- setup_elf_hwcaps(arm64_elf_hwcaps);
+ if (!caps || !caps->desc)
+ continue;
- if (system_supports_32bit_el0()) {
- setup_elf_hwcaps(compat_elf_hwcaps);
- elf_hwcap_fixup();
+ /*
+ * Log any cpucaps with a cpumask as these aren't logged by
+ * update_cpu_capabilities().
+ */
+ if (caps->cpus && cpumask_any(caps->cpus) < nr_cpu_ids)
+ pr_info("detected: %s on CPU%*pbl\n",
+ caps->desc, cpumask_pr_args(caps->cpus));
+
+ /* Log match-all CPUs capabilities */
+ if (cpucap_match_all_early_cpus(caps) &&
+ cpus_have_cap(caps->capability))
+ pr_info("detected: %s\n", caps->desc);
}
+ /*
+ * TTBR0 PAN doesn't have its own cpucap, so log it manually.
+ */
if (system_uses_ttbr0_pan())
pr_info("emulated: Privileged Access Never (PAN) using TTBR0_EL1 switching\n");
+ /*
+ * Report Spectre mitigations status.
+ */
+ spectre_print_disabled_mitigations();
+}
+
+void __init setup_system_features(void)
+{
+ setup_system_capabilities();
+
+ linear_map_maybe_split_to_ptes();
+ kpti_install_ng_mappings();
+
sve_setup();
sme_setup();
- minsigstksz_setup();
/*
* Check for sane CTR_EL0.CWG value.
*/
- cwg = cache_type_cwg();
- if (!cwg)
+ if (!cache_type_cwg())
pr_warn("No Cache Writeback Granule information, assuming %d\n",
ARCH_DMA_MINALIGN);
}
+void __init setup_user_features(void)
+{
+ user_feature_fixup();
+
+ setup_elf_hwcaps(arm64_elf_hwcaps);
+
+ if (system_supports_32bit_el0()) {
+ setup_elf_hwcaps(compat_elf_hwcaps);
+ elf_hwcap_fixup();
+ }
+
+ minsigstksz_setup();
+}
+
static int enable_mismatched_32bit_el0(unsigned int cpu)
{
/*
@@ -3370,7 +3984,14 @@ static int enable_mismatched_32bit_el0(unsigned int cpu)
static int lucky_winner = -1;
struct cpuinfo_arm64 *info = &per_cpu(cpu_data, cpu);
- bool cpu_32bit = id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0);
+ bool cpu_32bit = false;
+
+ if (id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0)) {
+ if (!housekeeping_cpu(cpu, HK_TYPE_TICK))
+ pr_info("Treating adaptive-ticks CPU %u as 64-bit only\n", cpu);
+ else
+ cpu_32bit = true;
+ }
if (cpu_32bit) {
cpumask_set_cpu(cpu, cpu_32bit_el0_mask);
@@ -3414,7 +4035,7 @@ subsys_initcall_sync(init_32bit_el0_mask);
static void __maybe_unused cpu_enable_cnp(struct arm64_cpu_capabilities const *cap)
{
- cpu_replace_ttbr1(lm_alias(swapper_pg_dir), idmap_pg_dir);
+ cpu_enable_swapper_cnp();
}
/*
diff --git a/arch/arm64/kernel/cpuidle.c b/arch/arm64/kernel/cpuidle.c
deleted file mode 100644
index f372295207fb..000000000000
--- a/arch/arm64/kernel/cpuidle.c
+++ /dev/null
@@ -1,74 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * ARM64 CPU idle arch support
- *
- * Copyright (C) 2014 ARM Ltd.
- * Author: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
- */
-
-#include <linux/acpi.h>
-#include <linux/cpuidle.h>
-#include <linux/cpu_pm.h>
-#include <linux/psci.h>
-
-#ifdef CONFIG_ACPI_PROCESSOR_IDLE
-
-#include <acpi/processor.h>
-
-#define ARM64_LPI_IS_RETENTION_STATE(arch_flags) (!(arch_flags))
-
-static int psci_acpi_cpu_init_idle(unsigned int cpu)
-{
- int i, count;
- struct acpi_lpi_state *lpi;
- struct acpi_processor *pr = per_cpu(processors, cpu);
-
- if (unlikely(!pr || !pr->flags.has_lpi))
- return -EINVAL;
-
- /*
- * If the PSCI cpu_suspend function hook has not been initialized
- * idle states must not be enabled, so bail out
- */
- if (!psci_ops.cpu_suspend)
- return -EOPNOTSUPP;
-
- count = pr->power.count - 1;
- if (count <= 0)
- return -ENODEV;
-
- for (i = 0; i < count; i++) {
- u32 state;
-
- lpi = &pr->power.lpi_states[i + 1];
- /*
- * Only bits[31:0] represent a PSCI power_state while
- * bits[63:32] must be 0x0 as per ARM ACPI FFH Specification
- */
- state = lpi->address;
- if (!psci_power_state_is_valid(state)) {
- pr_warn("Invalid PSCI power state %#x\n", state);
- return -EINVAL;
- }
- }
-
- return 0;
-}
-
-int acpi_processor_ffh_lpi_probe(unsigned int cpu)
-{
- return psci_acpi_cpu_init_idle(cpu);
-}
-
-__cpuidle int acpi_processor_ffh_lpi_enter(struct acpi_lpi_state *lpi)
-{
- u32 state = lpi->address;
-
- if (ARM64_LPI_IS_RETENTION_STATE(lpi->arch_flags))
- return CPU_PM_CPU_IDLE_ENTER_RETENTION_PARAM_RCU(psci_cpu_suspend_enter,
- lpi->index, state);
- else
- return CPU_PM_CPU_IDLE_ENTER_PARAM_RCU(psci_cpu_suspend_enter,
- lpi->index, state);
-}
-#endif
diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c
index 98fda8500535..c44e6d94f5de 100644
--- a/arch/arm64/kernel/cpuinfo.c
+++ b/arch/arm64/kernel/cpuinfo.c
@@ -36,8 +36,6 @@ static struct cpuinfo_arm64 boot_cpu_data;
static inline const char *icache_policy_str(int l1ip)
{
switch (l1ip) {
- case CTR_EL0_L1Ip_VPIPT:
- return "VPIPT";
case CTR_EL0_L1Ip_VIPT:
return "VIPT";
case CTR_EL0_L1Ip_PIPT:
@@ -82,6 +80,7 @@ static const char *const hwcap_str[] = {
[KERNEL_HWCAP_SB] = "sb",
[KERNEL_HWCAP_PACA] = "paca",
[KERNEL_HWCAP_PACG] = "pacg",
+ [KERNEL_HWCAP_GCS] = "gcs",
[KERNEL_HWCAP_DCPODP] = "dcpodp",
[KERNEL_HWCAP_SVE2] = "sve2",
[KERNEL_HWCAP_SVEAES] = "sveaes",
@@ -127,6 +126,43 @@ static const char *const hwcap_str[] = {
[KERNEL_HWCAP_SME_F16F16] = "smef16f16",
[KERNEL_HWCAP_MOPS] = "mops",
[KERNEL_HWCAP_HBC] = "hbc",
+ [KERNEL_HWCAP_SVE_B16B16] = "sveb16b16",
+ [KERNEL_HWCAP_LRCPC3] = "lrcpc3",
+ [KERNEL_HWCAP_LSE128] = "lse128",
+ [KERNEL_HWCAP_FPMR] = "fpmr",
+ [KERNEL_HWCAP_LUT] = "lut",
+ [KERNEL_HWCAP_FAMINMAX] = "faminmax",
+ [KERNEL_HWCAP_F8CVT] = "f8cvt",
+ [KERNEL_HWCAP_F8FMA] = "f8fma",
+ [KERNEL_HWCAP_F8DP4] = "f8dp4",
+ [KERNEL_HWCAP_F8DP2] = "f8dp2",
+ [KERNEL_HWCAP_F8E4M3] = "f8e4m3",
+ [KERNEL_HWCAP_F8E5M2] = "f8e5m2",
+ [KERNEL_HWCAP_SME_LUTV2] = "smelutv2",
+ [KERNEL_HWCAP_SME_F8F16] = "smef8f16",
+ [KERNEL_HWCAP_SME_F8F32] = "smef8f32",
+ [KERNEL_HWCAP_SME_SF8FMA] = "smesf8fma",
+ [KERNEL_HWCAP_SME_SF8DP4] = "smesf8dp4",
+ [KERNEL_HWCAP_SME_SF8DP2] = "smesf8dp2",
+ [KERNEL_HWCAP_POE] = "poe",
+ [KERNEL_HWCAP_CMPBR] = "cmpbr",
+ [KERNEL_HWCAP_FPRCVT] = "fprcvt",
+ [KERNEL_HWCAP_F8MM8] = "f8mm8",
+ [KERNEL_HWCAP_F8MM4] = "f8mm4",
+ [KERNEL_HWCAP_SVE_F16MM] = "svef16mm",
+ [KERNEL_HWCAP_SVE_ELTPERM] = "sveeltperm",
+ [KERNEL_HWCAP_SVE_AES2] = "sveaes2",
+ [KERNEL_HWCAP_SVE_BFSCALE] = "svebfscale",
+ [KERNEL_HWCAP_SVE2P2] = "sve2p2",
+ [KERNEL_HWCAP_SME2P2] = "sme2p2",
+ [KERNEL_HWCAP_SME_SBITPERM] = "smesbitperm",
+ [KERNEL_HWCAP_SME_AES] = "smeaes",
+ [KERNEL_HWCAP_SME_SFEXPA] = "smesfexpa",
+ [KERNEL_HWCAP_SME_STMOP] = "smestmop",
+ [KERNEL_HWCAP_SME_SMOP4] = "smesmop4",
+ [KERNEL_HWCAP_MTE_FAR] = "mtefar",
+ [KERNEL_HWCAP_MTE_STORE_ONLY] = "mtestoreonly",
+ [KERNEL_HWCAP_LSFE] = "lsfe",
};
#ifdef CONFIG_COMPAT
@@ -176,80 +212,79 @@ static const char *const compat_hwcap2_str[] = {
static int c_show(struct seq_file *m, void *v)
{
- int i, j;
+ int j;
+ int cpu = m->index;
bool compat = personality(current->personality) == PER_LINUX32;
+ struct cpuinfo_arm64 *cpuinfo = v;
+ u32 midr = cpuinfo->reg_midr;
- for_each_online_cpu(i) {
- struct cpuinfo_arm64 *cpuinfo = &per_cpu(cpu_data, i);
- u32 midr = cpuinfo->reg_midr;
-
- /*
- * glibc reads /proc/cpuinfo to determine the number of
- * online processors, looking for lines beginning with
- * "processor". Give glibc what it expects.
- */
- seq_printf(m, "processor\t: %d\n", i);
- if (compat)
- seq_printf(m, "model name\t: ARMv8 Processor rev %d (%s)\n",
- MIDR_REVISION(midr), COMPAT_ELF_PLATFORM);
+ /*
+ * glibc reads /proc/cpuinfo to determine the number of
+ * online processors, looking for lines beginning with
+ * "processor". Give glibc what it expects.
+ */
+ seq_printf(m, "processor\t: %d\n", cpu);
+ if (compat)
+ seq_printf(m, "model name\t: ARMv8 Processor rev %d (%s)\n",
+ MIDR_REVISION(midr), COMPAT_ELF_PLATFORM);
- seq_printf(m, "BogoMIPS\t: %lu.%02lu\n",
- loops_per_jiffy / (500000UL/HZ),
- loops_per_jiffy / (5000UL/HZ) % 100);
+ seq_printf(m, "BogoMIPS\t: %lu.%02lu\n",
+ loops_per_jiffy / (500000UL/HZ),
+ loops_per_jiffy / (5000UL/HZ) % 100);
- /*
- * Dump out the common processor features in a single line.
- * Userspace should read the hwcaps with getauxval(AT_HWCAP)
- * rather than attempting to parse this, but there's a body of
- * software which does already (at least for 32-bit).
- */
- seq_puts(m, "Features\t:");
- if (compat) {
+ /*
+ * Dump out the common processor features in a single line.
+ * Userspace should read the hwcaps with getauxval(AT_HWCAP)
+ * rather than attempting to parse this, but there's a body of
+ * software which does already (at least for 32-bit).
+ */
+ seq_puts(m, "Features\t:");
+ if (compat) {
#ifdef CONFIG_COMPAT
- for (j = 0; j < ARRAY_SIZE(compat_hwcap_str); j++) {
- if (compat_elf_hwcap & (1 << j)) {
- /*
- * Warn once if any feature should not
- * have been present on arm64 platform.
- */
- if (WARN_ON_ONCE(!compat_hwcap_str[j]))
- continue;
-
- seq_printf(m, " %s", compat_hwcap_str[j]);
- }
+ for (j = 0; j < ARRAY_SIZE(compat_hwcap_str); j++) {
+ if (compat_elf_hwcap & (1 << j)) {
+ /*
+ * Warn once if any feature should not
+ * have been present on arm64 platform.
+ */
+ if (WARN_ON_ONCE(!compat_hwcap_str[j]))
+ continue;
+
+ seq_printf(m, " %s", compat_hwcap_str[j]);
}
+ }
- for (j = 0; j < ARRAY_SIZE(compat_hwcap2_str); j++)
- if (compat_elf_hwcap2 & (1 << j))
- seq_printf(m, " %s", compat_hwcap2_str[j]);
+ for (j = 0; j < ARRAY_SIZE(compat_hwcap2_str); j++)
+ if (compat_elf_hwcap2 & (1 << j))
+ seq_printf(m, " %s", compat_hwcap2_str[j]);
#endif /* CONFIG_COMPAT */
- } else {
- for (j = 0; j < ARRAY_SIZE(hwcap_str); j++)
- if (cpu_have_feature(j))
- seq_printf(m, " %s", hwcap_str[j]);
- }
- seq_puts(m, "\n");
-
- seq_printf(m, "CPU implementer\t: 0x%02x\n",
- MIDR_IMPLEMENTOR(midr));
- seq_printf(m, "CPU architecture: 8\n");
- seq_printf(m, "CPU variant\t: 0x%x\n", MIDR_VARIANT(midr));
- seq_printf(m, "CPU part\t: 0x%03x\n", MIDR_PARTNUM(midr));
- seq_printf(m, "CPU revision\t: %d\n\n", MIDR_REVISION(midr));
+ } else {
+ for (j = 0; j < ARRAY_SIZE(hwcap_str); j++)
+ if (cpu_have_feature(j))
+ seq_printf(m, " %s", hwcap_str[j]);
}
+ seq_puts(m, "\n");
+
+ seq_printf(m, "CPU implementer\t: 0x%02x\n",
+ MIDR_IMPLEMENTOR(midr));
+ seq_puts(m, "CPU architecture: 8\n");
+ seq_printf(m, "CPU variant\t: 0x%x\n", MIDR_VARIANT(midr));
+ seq_printf(m, "CPU part\t: 0x%03x\n", MIDR_PARTNUM(midr));
+ seq_printf(m, "CPU revision\t: %d\n\n", MIDR_REVISION(midr));
return 0;
}
static void *c_start(struct seq_file *m, loff_t *pos)
{
- return *pos < 1 ? (void *)1 : NULL;
+ *pos = cpumask_next(*pos - 1, cpu_online_mask);
+ return *pos < nr_cpu_ids ? &per_cpu(cpu_data, *pos) : NULL;
}
static void *c_next(struct seq_file *m, void *v, loff_t *pos)
{
++*pos;
- return NULL;
+ return c_start(m, pos);
}
static void c_stop(struct seq_file *m, void *v)
@@ -264,7 +299,7 @@ const struct seq_operations cpuinfo_op = {
};
-static struct kobj_type cpuregs_kobj_type = {
+static const struct kobj_type cpuregs_kobj_type = {
.sysfs_ops = &kobj_sysfs_ops,
};
@@ -295,11 +330,13 @@ static struct kobj_type cpuregs_kobj_type = {
CPUREGS_ATTR_RO(midr_el1, midr);
CPUREGS_ATTR_RO(revidr_el1, revidr);
+CPUREGS_ATTR_RO(aidr_el1, aidr);
CPUREGS_ATTR_RO(smidr_el1, smidr);
static struct attribute *cpuregs_id_attrs[] = {
&cpuregs_attr_midr_el1.attr,
&cpuregs_attr_revidr_el1.attr,
+ &cpuregs_attr_aidr_el1.attr,
NULL
};
@@ -385,9 +422,6 @@ static void cpuinfo_detect_icache_policy(struct cpuinfo_arm64 *info)
switch (l1ip) {
case CTR_EL0_L1Ip_PIPT:
break;
- case CTR_EL0_L1Ip_VPIPT:
- set_bit(ICACHEF_VPIPT, &__icache_flags);
- break;
case CTR_EL0_L1Ip_VIPT:
default:
/* Assume aliasing */
@@ -439,20 +473,25 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info)
info->reg_dczid = read_cpuid(DCZID_EL0);
info->reg_midr = read_cpuid_id();
info->reg_revidr = read_cpuid(REVIDR_EL1);
+ info->reg_aidr = read_cpuid(AIDR_EL1);
info->reg_id_aa64dfr0 = read_cpuid(ID_AA64DFR0_EL1);
info->reg_id_aa64dfr1 = read_cpuid(ID_AA64DFR1_EL1);
info->reg_id_aa64isar0 = read_cpuid(ID_AA64ISAR0_EL1);
info->reg_id_aa64isar1 = read_cpuid(ID_AA64ISAR1_EL1);
info->reg_id_aa64isar2 = read_cpuid(ID_AA64ISAR2_EL1);
+ info->reg_id_aa64isar3 = read_cpuid(ID_AA64ISAR3_EL1);
info->reg_id_aa64mmfr0 = read_cpuid(ID_AA64MMFR0_EL1);
info->reg_id_aa64mmfr1 = read_cpuid(ID_AA64MMFR1_EL1);
info->reg_id_aa64mmfr2 = read_cpuid(ID_AA64MMFR2_EL1);
info->reg_id_aa64mmfr3 = read_cpuid(ID_AA64MMFR3_EL1);
+ info->reg_id_aa64mmfr4 = read_cpuid(ID_AA64MMFR4_EL1);
info->reg_id_aa64pfr0 = read_cpuid(ID_AA64PFR0_EL1);
info->reg_id_aa64pfr1 = read_cpuid(ID_AA64PFR1_EL1);
+ info->reg_id_aa64pfr2 = read_cpuid(ID_AA64PFR2_EL1);
info->reg_id_aa64zfr0 = read_cpuid(ID_AA64ZFR0_EL1);
info->reg_id_aa64smfr0 = read_cpuid(ID_AA64SMFR0_EL1);
+ info->reg_id_aa64fpfr0 = read_cpuid(ID_AA64FPFR0_EL1);
if (id_aa64pfr1_mte(info->reg_id_aa64pfr1))
info->reg_gmid = read_cpuid(GMID_EL1);
@@ -460,6 +499,22 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info)
if (id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0))
__cpuinfo_store_cpu_32bit(&info->aarch32);
+ /*
+ * info->reg_mpamidr deferred to {init,update}_cpu_features because we
+ * don't want to read it (and trigger a trap on buggy firmware) if
+ * using an aa64pfr0_el1 override to unconditionally disable MPAM.
+ */
+
+ if (IS_ENABLED(CONFIG_ARM64_SME) &&
+ id_aa64pfr1_sme(info->reg_id_aa64pfr1)) {
+ /*
+ * We mask out SMPS since even if the hardware
+ * supports priorities the kernel does not at present
+ * and we block access to them.
+ */
+ info->reg_smidr = read_cpuid(SMIDR_EL1) & ~SMIDR_EL1_SMPS;
+ }
+
cpuinfo_detect_icache_policy(info);
}
diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c
index 64f2ecbdfe5c..29307642f4c9 100644
--- a/arch/arm64/kernel/debug-monitors.c
+++ b/arch/arm64/kernel/debug-monitors.c
@@ -21,8 +21,12 @@
#include <asm/cputype.h>
#include <asm/daifflags.h>
#include <asm/debug-monitors.h>
+#include <asm/exception.h>
+#include <asm/kgdb.h>
+#include <asm/kprobes.h>
#include <asm/system_misc.h>
#include <asm/traps.h>
+#include <asm/uprobes.h>
/* Determine debug architecture. */
u8 debug_monitors_arch(void)
@@ -34,7 +38,7 @@ u8 debug_monitors_arch(void)
/*
* MDSCR access routines.
*/
-static void mdscr_write(u32 mdscr)
+static void mdscr_write(u64 mdscr)
{
unsigned long flags;
flags = local_daif_save();
@@ -43,7 +47,7 @@ static void mdscr_write(u32 mdscr)
}
NOKPROBE_SYMBOL(mdscr_write);
-static u32 mdscr_read(void)
+static u64 mdscr_read(void)
{
return read_sysreg(mdscr_el1);
}
@@ -79,16 +83,16 @@ static DEFINE_PER_CPU(int, kde_ref_count);
void enable_debug_monitors(enum dbg_active_el el)
{
- u32 mdscr, enable = 0;
+ u64 mdscr, enable = 0;
WARN_ON(preemptible());
if (this_cpu_inc_return(mde_ref_count) == 1)
- enable = DBG_MDSCR_MDE;
+ enable = MDSCR_EL1_MDE;
if (el == DBG_ACTIVE_EL1 &&
this_cpu_inc_return(kde_ref_count) == 1)
- enable |= DBG_MDSCR_KDE;
+ enable |= MDSCR_EL1_KDE;
if (enable && debug_enabled) {
mdscr = mdscr_read();
@@ -100,16 +104,16 @@ NOKPROBE_SYMBOL(enable_debug_monitors);
void disable_debug_monitors(enum dbg_active_el el)
{
- u32 mdscr, disable = 0;
+ u64 mdscr, disable = 0;
WARN_ON(preemptible());
if (this_cpu_dec_return(mde_ref_count) == 0)
- disable = ~DBG_MDSCR_MDE;
+ disable = ~MDSCR_EL1_MDE;
if (el == DBG_ACTIVE_EL1 &&
this_cpu_dec_return(kde_ref_count) == 0)
- disable &= ~DBG_MDSCR_KDE;
+ disable &= ~MDSCR_EL1_KDE;
if (disable) {
mdscr = mdscr_read();
@@ -156,190 +160,124 @@ NOKPROBE_SYMBOL(clear_user_regs_spsr_ss);
#define set_regs_spsr_ss(r) set_user_regs_spsr_ss(&(r)->user_regs)
#define clear_regs_spsr_ss(r) clear_user_regs_spsr_ss(&(r)->user_regs)
-static DEFINE_SPINLOCK(debug_hook_lock);
-static LIST_HEAD(user_step_hook);
-static LIST_HEAD(kernel_step_hook);
-
-static void register_debug_hook(struct list_head *node, struct list_head *list)
+static void send_user_sigtrap(int si_code)
{
- spin_lock(&debug_hook_lock);
- list_add_rcu(node, list);
- spin_unlock(&debug_hook_lock);
-
-}
+ struct pt_regs *regs = current_pt_regs();
-static void unregister_debug_hook(struct list_head *node)
-{
- spin_lock(&debug_hook_lock);
- list_del_rcu(node);
- spin_unlock(&debug_hook_lock);
- synchronize_rcu();
-}
+ if (WARN_ON(!user_mode(regs)))
+ return;
-void register_user_step_hook(struct step_hook *hook)
-{
- register_debug_hook(&hook->node, &user_step_hook);
-}
+ if (!regs_irqs_disabled(regs))
+ local_irq_enable();
-void unregister_user_step_hook(struct step_hook *hook)
-{
- unregister_debug_hook(&hook->node);
+ arm64_force_sig_fault(SIGTRAP, si_code, instruction_pointer(regs),
+ "User debug trap");
}
-void register_kernel_step_hook(struct step_hook *hook)
+/*
+ * We have already unmasked interrupts and enabled preemption
+ * when calling do_el0_softstep() from entry-common.c.
+ */
+void do_el0_softstep(unsigned long esr, struct pt_regs *regs)
{
- register_debug_hook(&hook->node, &kernel_step_hook);
-}
+ if (uprobe_single_step_handler(regs, esr) == DBG_HOOK_HANDLED)
+ return;
-void unregister_kernel_step_hook(struct step_hook *hook)
-{
- unregister_debug_hook(&hook->node);
+ send_user_sigtrap(TRAP_TRACE);
+ /*
+ * ptrace will disable single step unless explicitly
+ * asked to re-enable it. For other clients, it makes
+ * sense to leave it enabled (i.e. rewind the controls
+ * to the active-not-pending state).
+ */
+ user_rewind_single_step(current);
}
-/*
- * Call registered single step handlers
- * There is no Syndrome info to check for determining the handler.
- * So we call all the registered handlers, until the right handler is
- * found which returns zero.
- */
-static int call_step_hook(struct pt_regs *regs, unsigned long esr)
+void do_el1_softstep(unsigned long esr, struct pt_regs *regs)
{
- struct step_hook *hook;
- struct list_head *list;
- int retval = DBG_HOOK_ERROR;
-
- list = user_mode(regs) ? &user_step_hook : &kernel_step_hook;
+ if (kgdb_single_step_handler(regs, esr) == DBG_HOOK_HANDLED)
+ return;
+ pr_warn("Unexpected kernel single-step exception at EL1\n");
/*
- * Since single-step exception disables interrupt, this function is
- * entirely not preemptible, and we can use rcu list safely here.
+ * Re-enable stepping since we know that we will be
+ * returning to regs.
*/
- list_for_each_entry_rcu(hook, list, node) {
- retval = hook->fn(regs, esr);
- if (retval == DBG_HOOK_HANDLED)
- break;
- }
-
- return retval;
+ set_regs_spsr_ss(regs);
}
-NOKPROBE_SYMBOL(call_step_hook);
+NOKPROBE_SYMBOL(do_el1_softstep);
-static void send_user_sigtrap(int si_code)
+static int call_el1_break_hook(struct pt_regs *regs, unsigned long esr)
{
- struct pt_regs *regs = current_pt_regs();
+ if (esr_brk_comment(esr) == BUG_BRK_IMM)
+ return bug_brk_handler(regs, esr);
- if (WARN_ON(!user_mode(regs)))
- return;
+ if (IS_ENABLED(CONFIG_CFI) && esr_is_cfi_brk(esr))
+ return cfi_brk_handler(regs, esr);
- if (interrupts_enabled(regs))
- local_irq_enable();
+ if (esr_brk_comment(esr) == FAULT_BRK_IMM)
+ return reserved_fault_brk_handler(regs, esr);
- arm64_force_sig_fault(SIGTRAP, si_code, instruction_pointer(regs),
- "User debug trap");
-}
+ if (IS_ENABLED(CONFIG_KASAN_SW_TAGS) &&
+ (esr_brk_comment(esr) & ~KASAN_BRK_MASK) == KASAN_BRK_IMM)
+ return kasan_brk_handler(regs, esr);
-static int single_step_handler(unsigned long unused, unsigned long esr,
- struct pt_regs *regs)
-{
- bool handler_found = false;
+ if (IS_ENABLED(CONFIG_UBSAN_TRAP) && esr_is_ubsan_brk(esr))
+ return ubsan_brk_handler(regs, esr);
- /*
- * If we are stepping a pending breakpoint, call the hw_breakpoint
- * handler first.
- */
- if (!reinstall_suspended_bps(regs))
- return 0;
-
- if (!handler_found && call_step_hook(regs, esr) == DBG_HOOK_HANDLED)
- handler_found = true;
-
- if (!handler_found && user_mode(regs)) {
- send_user_sigtrap(TRAP_TRACE);
-
- /*
- * ptrace will disable single step unless explicitly
- * asked to re-enable it. For other clients, it makes
- * sense to leave it enabled (i.e. rewind the controls
- * to the active-not-pending state).
- */
- user_rewind_single_step(current);
- } else if (!handler_found) {
- pr_warn("Unexpected kernel single-step exception at EL1\n");
- /*
- * Re-enable stepping since we know that we will be
- * returning to regs.
- */
- set_regs_spsr_ss(regs);
+ if (IS_ENABLED(CONFIG_KGDB)) {
+ if (esr_brk_comment(esr) == KGDB_DYN_DBG_BRK_IMM)
+ return kgdb_brk_handler(regs, esr);
+ if (esr_brk_comment(esr) == KGDB_COMPILED_DBG_BRK_IMM)
+ return kgdb_compiled_brk_handler(regs, esr);
}
- return 0;
-}
-NOKPROBE_SYMBOL(single_step_handler);
-
-static LIST_HEAD(user_break_hook);
-static LIST_HEAD(kernel_break_hook);
+ if (IS_ENABLED(CONFIG_KPROBES)) {
+ if (esr_brk_comment(esr) == KPROBES_BRK_IMM)
+ return kprobe_brk_handler(regs, esr);
+ if (esr_brk_comment(esr) == KPROBES_BRK_SS_IMM)
+ return kprobe_ss_brk_handler(regs, esr);
+ }
-void register_user_break_hook(struct break_hook *hook)
-{
- register_debug_hook(&hook->node, &user_break_hook);
-}
+ if (IS_ENABLED(CONFIG_KRETPROBES) &&
+ esr_brk_comment(esr) == KRETPROBES_BRK_IMM)
+ return kretprobe_brk_handler(regs, esr);
-void unregister_user_break_hook(struct break_hook *hook)
-{
- unregister_debug_hook(&hook->node);
+ return DBG_HOOK_ERROR;
}
+NOKPROBE_SYMBOL(call_el1_break_hook);
-void register_kernel_break_hook(struct break_hook *hook)
+/*
+ * We have already unmasked interrupts and enabled preemption
+ * when calling do_el0_brk64() from entry-common.c.
+ */
+void do_el0_brk64(unsigned long esr, struct pt_regs *regs)
{
- register_debug_hook(&hook->node, &kernel_break_hook);
-}
+ if (IS_ENABLED(CONFIG_UPROBES) &&
+ esr_brk_comment(esr) == UPROBES_BRK_IMM &&
+ uprobe_brk_handler(regs, esr) == DBG_HOOK_HANDLED)
+ return;
-void unregister_kernel_break_hook(struct break_hook *hook)
-{
- unregister_debug_hook(&hook->node);
+ send_user_sigtrap(TRAP_BRKPT);
}
-static int call_break_hook(struct pt_regs *regs, unsigned long esr)
+void do_el1_brk64(unsigned long esr, struct pt_regs *regs)
{
- struct break_hook *hook;
- struct list_head *list;
- int (*fn)(struct pt_regs *regs, unsigned long esr) = NULL;
-
- list = user_mode(regs) ? &user_break_hook : &kernel_break_hook;
-
- /*
- * Since brk exception disables interrupt, this function is
- * entirely not preemptible, and we can use rcu list safely here.
- */
- list_for_each_entry_rcu(hook, list, node) {
- unsigned long comment = esr & ESR_ELx_BRK64_ISS_COMMENT_MASK;
-
- if ((comment & ~hook->mask) == hook->imm)
- fn = hook->fn;
- }
+ if (call_el1_break_hook(regs, esr) == DBG_HOOK_HANDLED)
+ return;
- return fn ? fn(regs, esr) : DBG_HOOK_ERROR;
+ die("Oops - BRK", regs, esr);
}
-NOKPROBE_SYMBOL(call_break_hook);
+NOKPROBE_SYMBOL(do_el1_brk64);
-static int brk_handler(unsigned long unused, unsigned long esr,
- struct pt_regs *regs)
+#ifdef CONFIG_COMPAT
+void do_bkpt32(unsigned long esr, struct pt_regs *regs)
{
- if (call_break_hook(regs, esr) == DBG_HOOK_HANDLED)
- return 0;
-
- if (user_mode(regs)) {
- send_user_sigtrap(TRAP_BRKPT);
- } else {
- pr_warn("Unexpected kernel BRK exception at EL1\n");
- return -EFAULT;
- }
-
- return 0;
+ arm64_notify_die("aarch32 BKPT", regs, SIGTRAP, TRAP_BRKPT, regs->pc, esr);
}
-NOKPROBE_SYMBOL(brk_handler);
+#endif /* CONFIG_COMPAT */
-int aarch32_break_handler(struct pt_regs *regs)
+bool try_handle_aarch32_break(struct pt_regs *regs)
{
u32 arm_instr;
u16 thumb_instr;
@@ -347,7 +285,7 @@ int aarch32_break_handler(struct pt_regs *regs)
void __user *pc = (void __user *)instruction_pointer(regs);
if (!compat_user_mode(regs))
- return -EFAULT;
+ return false;
if (compat_thumb_mode(regs)) {
/* get 16-bit Thumb instruction */
@@ -371,20 +309,12 @@ int aarch32_break_handler(struct pt_regs *regs)
}
if (!bp)
- return -EFAULT;
+ return false;
send_user_sigtrap(TRAP_BRKPT);
- return 0;
-}
-NOKPROBE_SYMBOL(aarch32_break_handler);
-
-void __init debug_traps_init(void)
-{
- hook_debug_fault_code(DBG_ESR_EVT_HWSS, single_step_handler, SIGTRAP,
- TRAP_TRACE, "single-step handler");
- hook_debug_fault_code(DBG_ESR_EVT_BRK, brk_handler, SIGTRAP,
- TRAP_BRKPT, "BRK handler");
+ return true;
}
+NOKPROBE_SYMBOL(try_handle_aarch32_break);
/* Re-enable single step for syscall restarting. */
void user_rewind_single_step(struct task_struct *task)
@@ -418,7 +348,7 @@ void kernel_enable_single_step(struct pt_regs *regs)
{
WARN_ON(!irqs_disabled());
set_regs_spsr_ss(regs);
- mdscr_write(mdscr_read() | DBG_MDSCR_SS);
+ mdscr_write(mdscr_read() | MDSCR_EL1_SS);
enable_debug_monitors(DBG_ACTIVE_EL1);
}
NOKPROBE_SYMBOL(kernel_enable_single_step);
@@ -426,7 +356,7 @@ NOKPROBE_SYMBOL(kernel_enable_single_step);
void kernel_disable_single_step(void)
{
WARN_ON(!irqs_disabled());
- mdscr_write(mdscr_read() & ~DBG_MDSCR_SS);
+ mdscr_write(mdscr_read() & ~MDSCR_EL1_SS);
disable_debug_monitors(DBG_ACTIVE_EL1);
}
NOKPROBE_SYMBOL(kernel_disable_single_step);
@@ -434,7 +364,7 @@ NOKPROBE_SYMBOL(kernel_disable_single_step);
int kernel_active_single_step(void)
{
WARN_ON(!irqs_disabled());
- return mdscr_read() & DBG_MDSCR_SS;
+ return mdscr_read() & MDSCR_EL1_SS;
}
NOKPROBE_SYMBOL(kernel_active_single_step);
@@ -443,6 +373,11 @@ void kernel_rewind_single_step(struct pt_regs *regs)
set_regs_spsr_ss(regs);
}
+void kernel_fastforward_single_step(struct pt_regs *regs)
+{
+ clear_regs_spsr_ss(regs);
+}
+
/* ptrace API */
void user_enable_single_step(struct task_struct *task)
{
diff --git a/arch/arm64/kernel/efi-header.S b/arch/arm64/kernel/efi-header.S
index 11d7f7de202d..329e8df9215f 100644
--- a/arch/arm64/kernel/efi-header.S
+++ b/arch/arm64/kernel/efi-header.S
@@ -28,7 +28,7 @@
.macro __EFI_PE_HEADER
#ifdef CONFIG_EFI
.set .Lpe_header_offset, . - .L_head
- .long PE_MAGIC
+ .long IMAGE_NT_SIGNATURE
.short IMAGE_FILE_MACHINE_ARM64 // Machine
.short .Lsection_count // NumberOfSections
.long 0 // TimeDateStamp
@@ -40,7 +40,7 @@
IMAGE_FILE_LINE_NUMS_STRIPPED // Characteristics
.Loptional_header:
- .short PE_OPT_MAGIC_PE32PLUS // PE32+ format
+ .short IMAGE_NT_OPTIONAL_HDR64_MAGIC // PE32+ format
.byte 0x02 // MajorLinkerVersion
.byte 0x14 // MinorLinkerVersion
.long __initdata_begin - .Lefi_header_end // SizeOfCode
@@ -66,7 +66,7 @@
.long .Lefi_header_end - .L_head // SizeOfHeaders
.long 0 // CheckSum
.short IMAGE_SUBSYSTEM_EFI_APPLICATION // Subsystem
- .short IMAGE_DLL_CHARACTERISTICS_NX_COMPAT // DllCharacteristics
+ .short IMAGE_DLLCHARACTERISTICS_NX_COMPAT // DllCharacteristics
.quad 0 // SizeOfStackReserve
.quad 0 // SizeOfStackCommit
.quad 0 // SizeOfHeapReserve
diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c
index 2b478ca356b0..a81cb4aa4738 100644
--- a/arch/arm64/kernel/efi.c
+++ b/arch/arm64/kernel/efi.c
@@ -9,10 +9,14 @@
#include <linux/efi.h>
#include <linux/init.h>
+#include <linux/kmemleak.h>
+#include <linux/kthread.h>
#include <linux/screen_info.h>
+#include <linux/vmalloc.h>
#include <asm/efi.h>
#include <asm/stacktrace.h>
+#include <asm/vmap_stack.h>
static bool region_is_misaligned(const efi_memory_desc_t *md)
{
@@ -27,13 +31,21 @@ static bool region_is_misaligned(const efi_memory_desc_t *md)
* executable, everything else can be mapped with the XN bits
* set. Also take the new (optional) RO/XP bits into account.
*/
-static __init pteval_t create_mapping_protection(efi_memory_desc_t *md)
+static __init ptdesc_t create_mapping_protection(efi_memory_desc_t *md)
{
u64 attr = md->attribute;
u32 type = md->type;
- if (type == EFI_MEMORY_MAPPED_IO)
- return PROT_DEVICE_nGnRE;
+ if (type == EFI_MEMORY_MAPPED_IO) {
+ pgprot_t prot = __pgprot(PROT_DEVICE_nGnRE);
+
+ if (arm64_is_protected_mmio(md->phys_addr,
+ md->num_pages << EFI_PAGE_SHIFT))
+ prot = pgprot_encrypted(prot);
+ else
+ prot = pgprot_decrypted(prot);
+ return pgprot_val(prot);
+ }
if (region_is_misaligned(md)) {
static bool __initdata code_is_misaligned;
@@ -71,13 +83,9 @@ static __init pteval_t create_mapping_protection(efi_memory_desc_t *md)
return pgprot_val(PAGE_KERNEL_EXEC);
}
-/* we will fill this structure from the stub, so don't put it in .bss */
-struct screen_info screen_info __section(".data");
-EXPORT_SYMBOL(screen_info);
-
int __init efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md)
{
- pteval_t prot_val = create_mapping_protection(md);
+ ptdesc_t prot_val = create_mapping_protection(md);
bool page_mappings_only = (md->type == EFI_RUNTIME_SERVICES_CODE ||
md->type == EFI_RUNTIME_SERVICES_DATA);
@@ -107,16 +115,15 @@ static int __init set_permissions(pte_t *ptep, unsigned long addr, void *data)
{
struct set_perm_data *spd = data;
const efi_memory_desc_t *md = spd->md;
- pte_t pte = READ_ONCE(*ptep);
+ pte_t pte = __ptep_get(ptep);
if (md->attribute & EFI_MEMORY_RO)
pte = set_pte_bit(pte, __pgprot(PTE_RDONLY));
if (md->attribute & EFI_MEMORY_XP)
pte = set_pte_bit(pte, __pgprot(PTE_PXN));
- else if (IS_ENABLED(CONFIG_ARM64_BTI_KERNEL) &&
- system_supports_bti() && spd->has_bti)
+ else if (system_supports_bti_kernel() && spd->has_bti)
pte = set_pte_bit(pte, __pgprot(PTE_GP));
- set_pte(ptep, pte);
+ __set_pte(ptep, pte);
return 0;
}
@@ -159,20 +166,53 @@ asmlinkage efi_status_t efi_handle_corrupted_x18(efi_status_t s, const char *f)
return s;
}
-static DEFINE_RAW_SPINLOCK(efi_rt_lock);
-
void arch_efi_call_virt_setup(void)
{
- efi_virtmap_load();
+ efi_runtime_assert_lock_held();
+
+ if (preemptible() && (current->flags & PF_KTHREAD)) {
+ /*
+ * Disable migration to ensure that a preempted EFI runtime
+ * service call will be resumed on the same CPU. This avoids
+ * potential issues with EFI runtime calls that are preempted
+ * while polling for an asynchronous completion of a secure
+ * firmware call, which may not permit the CPU to change.
+ */
+ migrate_disable();
+ kthread_use_mm(&efi_mm);
+ } else {
+ efi_virtmap_load();
+ }
+
+ /*
+ * Enable access to the valid TTBR0_EL1 and invoke the errata
+ * workaround directly since there is no return from exception when
+ * invoking the EFI run-time services.
+ */
+ uaccess_ttbr0_enable();
+ post_ttbr_update_workaround();
+
__efi_fpsimd_begin();
- raw_spin_lock(&efi_rt_lock);
}
void arch_efi_call_virt_teardown(void)
{
- raw_spin_unlock(&efi_rt_lock);
__efi_fpsimd_end();
- efi_virtmap_unload();
+
+ /*
+ * Defer the switch to the current thread's TTBR0_EL1 until
+ * uaccess_enable(). Do so before efi_virtmap_unload() updates the
+ * saved TTBR0 value, so the userland page tables are not activated
+ * inadvertently over the back of an exception.
+ */
+ uaccess_ttbr0_disable();
+
+ if (preemptible() && (current->flags & PF_KTHREAD)) {
+ kthread_unuse_mm(&efi_mm);
+ migrate_enable();
+ } else {
+ efi_virtmap_unload();
+ }
}
asmlinkage u64 *efi_rt_stack_top __ro_after_init;
@@ -209,14 +249,14 @@ static int __init arm64_efi_rt_init(void)
if (!efi_enabled(EFI_RUNTIME_SERVICES))
return 0;
- p = __vmalloc_node(THREAD_SIZE, THREAD_ALIGN, GFP_KERNEL,
- NUMA_NO_NODE, &&l);
-l: if (!p) {
+ p = arch_alloc_vmap_stack(THREAD_SIZE, NUMA_NO_NODE);
+ if (!p) {
pr_warn("Failed to allocate EFI runtime stack\n");
clear_bit(EFI_RUNTIME_SERVICES, &efi.flags);
return -ENOMEM;
}
+ kmemleak_not_leak(p);
efi_rt_stack_top = p + THREAD_SIZE;
return 0;
}
diff --git a/arch/arm64/kernel/elfcore.c b/arch/arm64/kernel/elfcore.c
index 2e94d20c4ac7..b735f4c2fe5e 100644
--- a/arch/arm64/kernel/elfcore.c
+++ b/arch/arm64/kernel/elfcore.c
@@ -27,9 +27,10 @@ static int mte_dump_tag_range(struct coredump_params *cprm,
int ret = 1;
unsigned long addr;
void *tags = NULL;
+ int locked = 0;
for (addr = start; addr < start + len; addr += PAGE_SIZE) {
- struct page *page = get_dump_page(addr);
+ struct page *page = get_dump_page(addr, &locked);
/*
* get_dump_page() returns NULL when encountering an empty
diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c
index 0fc94207e69a..3625797e9ee8 100644
--- a/arch/arm64/kernel/entry-common.c
+++ b/arch/arm64/kernel/entry-common.c
@@ -6,10 +6,13 @@
*/
#include <linux/context_tracking.h>
+#include <linux/irq-entry-common.h>
#include <linux/kasan.h>
#include <linux/linkage.h>
+#include <linux/livepatch.h>
#include <linux/lockdep.h>
#include <linux/ptrace.h>
+#include <linux/resume_user_mode.h>
#include <linux/sched.h>
#include <linux/sched/debug.h>
#include <linux/thread_info.h>
@@ -31,67 +34,28 @@
* Handle IRQ/context state management when entering from kernel mode.
* Before this function is called it is not safe to call regular kernel code,
* instrumentable code, or any code which may trigger an exception.
- *
- * This is intended to match the logic in irqentry_enter(), handling the kernel
- * mode transitions only.
*/
-static __always_inline void __enter_from_kernel_mode(struct pt_regs *regs)
+static noinstr irqentry_state_t enter_from_kernel_mode(struct pt_regs *regs)
{
- regs->exit_rcu = false;
-
- if (!IS_ENABLED(CONFIG_TINY_RCU) && is_idle_task(current)) {
- lockdep_hardirqs_off(CALLER_ADDR0);
- ct_irq_enter();
- trace_hardirqs_off_finish();
-
- regs->exit_rcu = true;
- return;
- }
-
- lockdep_hardirqs_off(CALLER_ADDR0);
- rcu_irq_enter_check_tick();
- trace_hardirqs_off_finish();
-}
+ irqentry_state_t state;
-static void noinstr enter_from_kernel_mode(struct pt_regs *regs)
-{
- __enter_from_kernel_mode(regs);
+ state = irqentry_enter(regs);
mte_check_tfsr_entry();
mte_disable_tco_entry(current);
+
+ return state;
}
/*
* Handle IRQ/context state management when exiting to kernel mode.
* After this function returns it is not safe to call regular kernel code,
* instrumentable code, or any code which may trigger an exception.
- *
- * This is intended to match the logic in irqentry_exit(), handling the kernel
- * mode transitions only, and with preemption handled elsewhere.
*/
-static __always_inline void __exit_to_kernel_mode(struct pt_regs *regs)
-{
- lockdep_assert_irqs_disabled();
-
- if (interrupts_enabled(regs)) {
- if (regs->exit_rcu) {
- trace_hardirqs_on_prepare();
- lockdep_hardirqs_on_prepare();
- ct_irq_exit();
- lockdep_hardirqs_on(CALLER_ADDR0);
- return;
- }
-
- trace_hardirqs_on();
- } else {
- if (regs->exit_rcu)
- ct_irq_exit();
- }
-}
-
-static void noinstr exit_to_kernel_mode(struct pt_regs *regs)
+static void noinstr exit_to_kernel_mode(struct pt_regs *regs,
+ irqentry_state_t state)
{
mte_check_tfsr_exit();
- __exit_to_kernel_mode(regs);
+ irqentry_exit(regs, state);
}
/*
@@ -99,96 +63,30 @@ static void noinstr exit_to_kernel_mode(struct pt_regs *regs)
* Before this function is called it is not safe to call regular kernel code,
* instrumentable code, or any code which may trigger an exception.
*/
-static __always_inline void __enter_from_user_mode(void)
+static __always_inline void arm64_enter_from_user_mode(struct pt_regs *regs)
{
- lockdep_hardirqs_off(CALLER_ADDR0);
- CT_WARN_ON(ct_state() != CONTEXT_USER);
- user_exit_irqoff();
- trace_hardirqs_off_finish();
+ enter_from_user_mode(regs);
mte_disable_tco_entry(current);
}
-static __always_inline void enter_from_user_mode(struct pt_regs *regs)
-{
- __enter_from_user_mode();
-}
-
/*
* Handle IRQ/context state management when exiting to user mode.
* After this function returns it is not safe to call regular kernel code,
* instrumentable code, or any code which may trigger an exception.
*/
-static __always_inline void __exit_to_user_mode(void)
-{
- trace_hardirqs_on_prepare();
- lockdep_hardirqs_on_prepare();
- user_enter_irqoff();
- lockdep_hardirqs_on(CALLER_ADDR0);
-}
-static __always_inline void exit_to_user_mode_prepare(struct pt_regs *regs)
+static __always_inline void arm64_exit_to_user_mode(struct pt_regs *regs)
{
- unsigned long flags;
-
+ local_irq_disable();
+ exit_to_user_mode_prepare_legacy(regs);
local_daif_mask();
-
- flags = read_thread_flags();
- if (unlikely(flags & _TIF_WORK_MASK))
- do_notify_resume(regs, flags);
-
- lockdep_sys_exit();
-}
-
-static __always_inline void exit_to_user_mode(struct pt_regs *regs)
-{
- exit_to_user_mode_prepare(regs);
mte_check_tfsr_exit();
- __exit_to_user_mode();
+ exit_to_user_mode();
}
asmlinkage void noinstr asm_exit_to_user_mode(struct pt_regs *regs)
{
- exit_to_user_mode(regs);
-}
-
-/*
- * Handle IRQ/context state management when entering an NMI from user/kernel
- * mode. Before this function is called it is not safe to call regular kernel
- * code, instrumentable code, or any code which may trigger an exception.
- */
-static void noinstr arm64_enter_nmi(struct pt_regs *regs)
-{
- regs->lockdep_hardirqs = lockdep_hardirqs_enabled();
-
- __nmi_enter();
- lockdep_hardirqs_off(CALLER_ADDR0);
- lockdep_hardirq_enter();
- ct_nmi_enter();
-
- trace_hardirqs_off_finish();
- ftrace_nmi_enter();
-}
-
-/*
- * Handle IRQ/context state management when exiting an NMI from user/kernel
- * mode. After this function returns it is not safe to call regular kernel
- * code, instrumentable code, or any code which may trigger an exception.
- */
-static void noinstr arm64_exit_nmi(struct pt_regs *regs)
-{
- bool restore = regs->lockdep_hardirqs;
-
- ftrace_nmi_exit();
- if (restore) {
- trace_hardirqs_on_prepare();
- lockdep_hardirqs_on_prepare();
- }
-
- ct_nmi_exit();
- lockdep_hardirq_exit();
- if (restore)
- lockdep_hardirqs_on(CALLER_ADDR0);
- __nmi_exit();
+ arm64_exit_to_user_mode(regs);
}
/*
@@ -196,14 +94,18 @@ static void noinstr arm64_exit_nmi(struct pt_regs *regs)
* kernel mode. Before this function is called it is not safe to call regular
* kernel code, instrumentable code, or any code which may trigger an exception.
*/
-static void noinstr arm64_enter_el1_dbg(struct pt_regs *regs)
+static noinstr irqentry_state_t arm64_enter_el1_dbg(struct pt_regs *regs)
{
- regs->lockdep_hardirqs = lockdep_hardirqs_enabled();
+ irqentry_state_t state;
+
+ state.lockdep = lockdep_hardirqs_enabled();
lockdep_hardirqs_off(CALLER_ADDR0);
ct_nmi_enter();
trace_hardirqs_off_finish();
+
+ return state;
}
/*
@@ -211,62 +113,19 @@ static void noinstr arm64_enter_el1_dbg(struct pt_regs *regs)
* kernel mode. After this function returns it is not safe to call regular
* kernel code, instrumentable code, or any code which may trigger an exception.
*/
-static void noinstr arm64_exit_el1_dbg(struct pt_regs *regs)
+static void noinstr arm64_exit_el1_dbg(struct pt_regs *regs,
+ irqentry_state_t state)
{
- bool restore = regs->lockdep_hardirqs;
-
- if (restore) {
+ if (state.lockdep) {
trace_hardirqs_on_prepare();
lockdep_hardirqs_on_prepare();
}
ct_nmi_exit();
- if (restore)
+ if (state.lockdep)
lockdep_hardirqs_on(CALLER_ADDR0);
}
-#ifdef CONFIG_PREEMPT_DYNAMIC
-DEFINE_STATIC_KEY_TRUE(sk_dynamic_irqentry_exit_cond_resched);
-#define need_irq_preemption() \
- (static_branch_unlikely(&sk_dynamic_irqentry_exit_cond_resched))
-#else
-#define need_irq_preemption() (IS_ENABLED(CONFIG_PREEMPTION))
-#endif
-
-static void __sched arm64_preempt_schedule_irq(void)
-{
- if (!need_irq_preemption())
- return;
-
- /*
- * Note: thread_info::preempt_count includes both thread_info::count
- * and thread_info::need_resched, and is not equivalent to
- * preempt_count().
- */
- if (READ_ONCE(current_thread_info()->preempt_count) != 0)
- return;
-
- /*
- * DAIF.DA are cleared at the start of IRQ/FIQ handling, and when GIC
- * priority masking is used the GIC irqchip driver will clear DAIF.IF
- * using gic_arch_enable_irqs() for normal IRQs. If anything is set in
- * DAIF we must have handled an NMI, so skip preemption.
- */
- if (system_uses_irq_prio_masking() && read_sysreg(daif))
- return;
-
- /*
- * Preempting a task from an IRQ means we leave copies of PSTATE
- * on the stack. cpufeature's enable calls may modify PSTATE, but
- * resuming one of these preempted tasks would undo those changes.
- *
- * Only allow a task to be preempted once cpufeatures have been
- * enabled.
- */
- if (system_capabilities_finalized())
- preempt_schedule_irq();
-}
-
static void do_interrupt_handler(struct pt_regs *regs,
void (*handler)(struct pt_regs *))
{
@@ -286,7 +145,7 @@ extern void (*handle_arch_fiq)(struct pt_regs *);
static void noinstr __panic_unhandled(struct pt_regs *regs, const char *vector,
unsigned long esr)
{
- arm64_enter_nmi(regs);
+ irqentry_nmi_enter(regs);
console_verbose();
@@ -310,7 +169,7 @@ static DEFINE_PER_CPU(int, __in_cortex_a76_erratum_1463225_wa);
static void cortex_a76_erratum_1463225_svc_handler(void)
{
- u32 reg, val;
+ u64 reg, val;
if (!unlikely(test_thread_flag(TIF_SINGLESTEP)))
return;
@@ -320,7 +179,7 @@ static void cortex_a76_erratum_1463225_svc_handler(void)
__this_cpu_write(__in_cortex_a76_erratum_1463225_wa, 1);
reg = read_sysreg(mdscr_el1);
- val = reg | DBG_MDSCR_SS | DBG_MDSCR_KDE;
+ val = reg | MDSCR_EL1_SS | MDSCR_EL1_KDE;
write_sysreg(val, mdscr_el1);
asm volatile("msr daifclr, #8");
isb();
@@ -359,20 +218,16 @@ static bool cortex_a76_erratum_1463225_debug_handler(struct pt_regs *regs)
* As per the ABI exit SME streaming mode and clear the SVE state not
* shared with FPSIMD on syscall entry.
*/
-static inline void fp_user_discard(void)
+static inline void fpsimd_syscall_enter(void)
{
- /*
- * If SME is active then exit streaming mode. If ZA is active
- * then flush the SVE registers but leave userspace access to
- * both SVE and SME enabled, otherwise disable SME for the
- * task and fall through to disabling SVE too. This means
- * that after a syscall we never have any streaming mode
- * register state to track, if this changes the KVM code will
- * need updating.
- */
+ /* Ensure PSTATE.SM is clear, but leave PSTATE.ZA as-is. */
if (system_supports_sme())
sme_smstop_sm();
+ /*
+ * The CPU is not in streaming mode. If non-streaming SVE is not
+ * supported, there is no SVE state that needs to be discarded.
+ */
if (!system_supports_sve())
return;
@@ -382,7 +237,56 @@ static inline void fp_user_discard(void)
sve_vq_minus_one = sve_vq_from_vl(task_get_sve_vl(current)) - 1;
sve_flush_live(true, sve_vq_minus_one);
}
+
+ /*
+ * Any live non-FPSIMD SVE state has been zeroed. Allow
+ * fpsimd_save_user_state() to lazily discard SVE state until either
+ * the live state is unbound or fpsimd_syscall_exit() is called.
+ */
+ __this_cpu_write(fpsimd_last_state.to_save, FP_STATE_FPSIMD);
+}
+
+static __always_inline void fpsimd_syscall_exit(void)
+{
+ if (!system_supports_sve())
+ return;
+
+ /*
+ * The current task's user FPSIMD/SVE/SME state is now bound to this
+ * CPU. The fpsimd_last_state.to_save value is either:
+ *
+ * - FP_STATE_FPSIMD, if the state has not been reloaded on this CPU
+ * since fpsimd_syscall_enter().
+ *
+ * - FP_STATE_CURRENT, if the state has been reloaded on this CPU at
+ * any point.
+ *
+ * Reset this to FP_STATE_CURRENT to stop lazy discarding.
+ */
+ __this_cpu_write(fpsimd_last_state.to_save, FP_STATE_CURRENT);
+}
+
+/*
+ * In debug exception context, we explicitly disable preemption despite
+ * having interrupts disabled.
+ * This serves two purposes: it makes it much less likely that we would
+ * accidentally schedule in exception context and it will force a warning
+ * if we somehow manage to schedule by accident.
+ */
+static void debug_exception_enter(struct pt_regs *regs)
+{
+ preempt_disable();
+
+ /* This code is a bit fragile. Test it. */
+ RCU_LOCKDEP_WARN(!rcu_is_watching(), "exception_enter didn't work");
}
+NOKPROBE_SYMBOL(debug_exception_enter);
+
+static void debug_exception_exit(struct pt_regs *regs)
+{
+ preempt_enable_no_resched();
+}
+NOKPROBE_SYMBOL(debug_exception_exit);
UNHANDLED(el1t, 64, sync)
UNHANDLED(el1t, 64, irq)
@@ -392,60 +296,135 @@ UNHANDLED(el1t, 64, error)
static void noinstr el1_abort(struct pt_regs *regs, unsigned long esr)
{
unsigned long far = read_sysreg(far_el1);
+ irqentry_state_t state;
- enter_from_kernel_mode(regs);
+ state = enter_from_kernel_mode(regs);
local_daif_inherit(regs);
do_mem_abort(far, esr, regs);
local_daif_mask();
- exit_to_kernel_mode(regs);
+ exit_to_kernel_mode(regs, state);
}
static void noinstr el1_pc(struct pt_regs *regs, unsigned long esr)
{
unsigned long far = read_sysreg(far_el1);
+ irqentry_state_t state;
- enter_from_kernel_mode(regs);
+ state = enter_from_kernel_mode(regs);
local_daif_inherit(regs);
do_sp_pc_abort(far, esr, regs);
local_daif_mask();
- exit_to_kernel_mode(regs);
+ exit_to_kernel_mode(regs, state);
}
static void noinstr el1_undef(struct pt_regs *regs, unsigned long esr)
{
- enter_from_kernel_mode(regs);
+ irqentry_state_t state;
+
+ state = enter_from_kernel_mode(regs);
local_daif_inherit(regs);
do_el1_undef(regs, esr);
local_daif_mask();
- exit_to_kernel_mode(regs);
+ exit_to_kernel_mode(regs, state);
}
static void noinstr el1_bti(struct pt_regs *regs, unsigned long esr)
{
- enter_from_kernel_mode(regs);
+ irqentry_state_t state;
+
+ state = enter_from_kernel_mode(regs);
local_daif_inherit(regs);
do_el1_bti(regs, esr);
local_daif_mask();
- exit_to_kernel_mode(regs);
+ exit_to_kernel_mode(regs, state);
+}
+
+static void noinstr el1_gcs(struct pt_regs *regs, unsigned long esr)
+{
+ irqentry_state_t state;
+
+ state = enter_from_kernel_mode(regs);
+ local_daif_inherit(regs);
+ do_el1_gcs(regs, esr);
+ local_daif_mask();
+ exit_to_kernel_mode(regs, state);
+}
+
+static void noinstr el1_mops(struct pt_regs *regs, unsigned long esr)
+{
+ irqentry_state_t state;
+
+ state = enter_from_kernel_mode(regs);
+ local_daif_inherit(regs);
+ do_el1_mops(regs, esr);
+ local_daif_mask();
+ exit_to_kernel_mode(regs, state);
+}
+
+static void noinstr el1_breakpt(struct pt_regs *regs, unsigned long esr)
+{
+ irqentry_state_t state;
+
+ state = arm64_enter_el1_dbg(regs);
+ debug_exception_enter(regs);
+ do_breakpoint(esr, regs);
+ debug_exception_exit(regs);
+ arm64_exit_el1_dbg(regs, state);
}
-static void noinstr el1_dbg(struct pt_regs *regs, unsigned long esr)
+static void noinstr el1_softstp(struct pt_regs *regs, unsigned long esr)
{
+ irqentry_state_t state;
+
+ state = arm64_enter_el1_dbg(regs);
+ if (!cortex_a76_erratum_1463225_debug_handler(regs)) {
+ debug_exception_enter(regs);
+ /*
+ * After handling a breakpoint, we suspend the breakpoint
+ * and use single-step to move to the next instruction.
+ * If we are stepping a suspended breakpoint there's nothing more to do:
+ * the single-step is complete.
+ */
+ if (!try_step_suspended_breakpoints(regs))
+ do_el1_softstep(esr, regs);
+ debug_exception_exit(regs);
+ }
+ arm64_exit_el1_dbg(regs, state);
+}
+
+static void noinstr el1_watchpt(struct pt_regs *regs, unsigned long esr)
+{
+ /* Watchpoints are the only debug exception to write FAR_EL1 */
unsigned long far = read_sysreg(far_el1);
+ irqentry_state_t state;
+
+ state = arm64_enter_el1_dbg(regs);
+ debug_exception_enter(regs);
+ do_watchpoint(far, esr, regs);
+ debug_exception_exit(regs);
+ arm64_exit_el1_dbg(regs, state);
+}
+
+static void noinstr el1_brk64(struct pt_regs *regs, unsigned long esr)
+{
+ irqentry_state_t state;
- arm64_enter_el1_dbg(regs);
- if (!cortex_a76_erratum_1463225_debug_handler(regs))
- do_debug_exception(far, esr, regs);
- arm64_exit_el1_dbg(regs);
+ state = arm64_enter_el1_dbg(regs);
+ debug_exception_enter(regs);
+ do_el1_brk64(esr, regs);
+ debug_exception_exit(regs);
+ arm64_exit_el1_dbg(regs, state);
}
static void noinstr el1_fpac(struct pt_regs *regs, unsigned long esr)
{
- enter_from_kernel_mode(regs);
+ irqentry_state_t state;
+
+ state = enter_from_kernel_mode(regs);
local_daif_inherit(regs);
do_el1_fpac(regs, esr);
local_daif_mask();
- exit_to_kernel_mode(regs);
+ exit_to_kernel_mode(regs, state);
}
asmlinkage void noinstr el1h_64_sync_handler(struct pt_regs *regs)
@@ -471,11 +450,23 @@ asmlinkage void noinstr el1h_64_sync_handler(struct pt_regs *regs)
case ESR_ELx_EC_BTI:
el1_bti(regs, esr);
break;
+ case ESR_ELx_EC_GCS:
+ el1_gcs(regs, esr);
+ break;
+ case ESR_ELx_EC_MOPS:
+ el1_mops(regs, esr);
+ break;
case ESR_ELx_EC_BREAKPT_CUR:
+ el1_breakpt(regs, esr);
+ break;
case ESR_ELx_EC_SOFTSTP_CUR:
+ el1_softstp(regs, esr);
+ break;
case ESR_ELx_EC_WATCHPT_CUR:
+ el1_watchpt(regs, esr);
+ break;
case ESR_ELx_EC_BRK64:
- el1_dbg(regs, esr);
+ el1_brk64(regs, esr);
break;
case ESR_ELx_EC_FPAC:
el1_fpac(regs, esr);
@@ -488,30 +479,32 @@ asmlinkage void noinstr el1h_64_sync_handler(struct pt_regs *regs)
static __always_inline void __el1_pnmi(struct pt_regs *regs,
void (*handler)(struct pt_regs *))
{
- arm64_enter_nmi(regs);
+ irqentry_state_t state;
+
+ state = irqentry_nmi_enter(regs);
do_interrupt_handler(regs, handler);
- arm64_exit_nmi(regs);
+ irqentry_nmi_exit(regs, state);
}
static __always_inline void __el1_irq(struct pt_regs *regs,
void (*handler)(struct pt_regs *))
{
- enter_from_kernel_mode(regs);
+ irqentry_state_t state;
+
+ state = enter_from_kernel_mode(regs);
irq_enter_rcu();
do_interrupt_handler(regs, handler);
irq_exit_rcu();
- arm64_preempt_schedule_irq();
-
- exit_to_kernel_mode(regs);
+ exit_to_kernel_mode(regs, state);
}
static void noinstr el1_interrupt(struct pt_regs *regs,
void (*handler)(struct pt_regs *))
{
write_sysreg(DAIF_PROCCTX_NOIRQ, daif);
- if (IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI) && !interrupts_enabled(regs))
+ if (IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI) && regs_irqs_disabled(regs))
__el1_pnmi(regs, handler);
else
__el1_irq(regs, handler);
@@ -530,21 +523,22 @@ asmlinkage void noinstr el1h_64_fiq_handler(struct pt_regs *regs)
asmlinkage void noinstr el1h_64_error_handler(struct pt_regs *regs)
{
unsigned long esr = read_sysreg(esr_el1);
+ irqentry_state_t state;
local_daif_restore(DAIF_ERRCTX);
- arm64_enter_nmi(regs);
+ state = irqentry_nmi_enter(regs);
do_serror(regs, esr);
- arm64_exit_nmi(regs);
+ irqentry_nmi_exit(regs, state);
}
static void noinstr el0_da(struct pt_regs *regs, unsigned long esr)
{
unsigned long far = read_sysreg(far_el1);
- enter_from_user_mode(regs);
+ arm64_enter_from_user_mode(regs);
local_daif_restore(DAIF_PROCCTX);
do_mem_abort(far, esr, regs);
- exit_to_user_mode(regs);
+ arm64_exit_to_user_mode(regs);
}
static void noinstr el0_ia(struct pt_regs *regs, unsigned long esr)
@@ -559,50 +553,50 @@ static void noinstr el0_ia(struct pt_regs *regs, unsigned long esr)
if (!is_ttbr0_addr(far))
arm64_apply_bp_hardening();
- enter_from_user_mode(regs);
+ arm64_enter_from_user_mode(regs);
local_daif_restore(DAIF_PROCCTX);
do_mem_abort(far, esr, regs);
- exit_to_user_mode(regs);
+ arm64_exit_to_user_mode(regs);
}
static void noinstr el0_fpsimd_acc(struct pt_regs *regs, unsigned long esr)
{
- enter_from_user_mode(regs);
+ arm64_enter_from_user_mode(regs);
local_daif_restore(DAIF_PROCCTX);
do_fpsimd_acc(esr, regs);
- exit_to_user_mode(regs);
+ arm64_exit_to_user_mode(regs);
}
static void noinstr el0_sve_acc(struct pt_regs *regs, unsigned long esr)
{
- enter_from_user_mode(regs);
+ arm64_enter_from_user_mode(regs);
local_daif_restore(DAIF_PROCCTX);
do_sve_acc(esr, regs);
- exit_to_user_mode(regs);
+ arm64_exit_to_user_mode(regs);
}
static void noinstr el0_sme_acc(struct pt_regs *regs, unsigned long esr)
{
- enter_from_user_mode(regs);
+ arm64_enter_from_user_mode(regs);
local_daif_restore(DAIF_PROCCTX);
do_sme_acc(esr, regs);
- exit_to_user_mode(regs);
+ arm64_exit_to_user_mode(regs);
}
static void noinstr el0_fpsimd_exc(struct pt_regs *regs, unsigned long esr)
{
- enter_from_user_mode(regs);
+ arm64_enter_from_user_mode(regs);
local_daif_restore(DAIF_PROCCTX);
do_fpsimd_exc(esr, regs);
- exit_to_user_mode(regs);
+ arm64_exit_to_user_mode(regs);
}
static void noinstr el0_sys(struct pt_regs *regs, unsigned long esr)
{
- enter_from_user_mode(regs);
+ arm64_enter_from_user_mode(regs);
local_daif_restore(DAIF_PROCCTX);
do_el0_sys(esr, regs);
- exit_to_user_mode(regs);
+ arm64_exit_to_user_mode(regs);
}
static void noinstr el0_pc(struct pt_regs *regs, unsigned long esr)
@@ -612,79 +606,132 @@ static void noinstr el0_pc(struct pt_regs *regs, unsigned long esr)
if (!is_ttbr0_addr(instruction_pointer(regs)))
arm64_apply_bp_hardening();
- enter_from_user_mode(regs);
+ arm64_enter_from_user_mode(regs);
local_daif_restore(DAIF_PROCCTX);
do_sp_pc_abort(far, esr, regs);
- exit_to_user_mode(regs);
+ arm64_exit_to_user_mode(regs);
}
static void noinstr el0_sp(struct pt_regs *regs, unsigned long esr)
{
- enter_from_user_mode(regs);
+ arm64_enter_from_user_mode(regs);
local_daif_restore(DAIF_PROCCTX);
do_sp_pc_abort(regs->sp, esr, regs);
- exit_to_user_mode(regs);
+ arm64_exit_to_user_mode(regs);
}
static void noinstr el0_undef(struct pt_regs *regs, unsigned long esr)
{
- enter_from_user_mode(regs);
+ arm64_enter_from_user_mode(regs);
local_daif_restore(DAIF_PROCCTX);
do_el0_undef(regs, esr);
- exit_to_user_mode(regs);
+ arm64_exit_to_user_mode(regs);
}
static void noinstr el0_bti(struct pt_regs *regs)
{
- enter_from_user_mode(regs);
+ arm64_enter_from_user_mode(regs);
local_daif_restore(DAIF_PROCCTX);
do_el0_bti(regs);
- exit_to_user_mode(regs);
+ arm64_exit_to_user_mode(regs);
}
static void noinstr el0_mops(struct pt_regs *regs, unsigned long esr)
{
- enter_from_user_mode(regs);
+ arm64_enter_from_user_mode(regs);
local_daif_restore(DAIF_PROCCTX);
do_el0_mops(regs, esr);
- exit_to_user_mode(regs);
+ arm64_exit_to_user_mode(regs);
+}
+
+static void noinstr el0_gcs(struct pt_regs *regs, unsigned long esr)
+{
+ arm64_enter_from_user_mode(regs);
+ local_daif_restore(DAIF_PROCCTX);
+ do_el0_gcs(regs, esr);
+ arm64_exit_to_user_mode(regs);
}
static void noinstr el0_inv(struct pt_regs *regs, unsigned long esr)
{
- enter_from_user_mode(regs);
+ arm64_enter_from_user_mode(regs);
local_daif_restore(DAIF_PROCCTX);
bad_el0_sync(regs, 0, esr);
- exit_to_user_mode(regs);
+ arm64_exit_to_user_mode(regs);
+}
+
+static void noinstr el0_breakpt(struct pt_regs *regs, unsigned long esr)
+{
+ if (!is_ttbr0_addr(regs->pc))
+ arm64_apply_bp_hardening();
+
+ arm64_enter_from_user_mode(regs);
+ debug_exception_enter(regs);
+ do_breakpoint(esr, regs);
+ debug_exception_exit(regs);
+ local_daif_restore(DAIF_PROCCTX);
+ arm64_exit_to_user_mode(regs);
}
-static void noinstr el0_dbg(struct pt_regs *regs, unsigned long esr)
+static void noinstr el0_softstp(struct pt_regs *regs, unsigned long esr)
{
- /* Only watchpoints write FAR_EL1, otherwise its UNKNOWN */
+ bool step_done;
+
+ if (!is_ttbr0_addr(regs->pc))
+ arm64_apply_bp_hardening();
+
+ arm64_enter_from_user_mode(regs);
+ /*
+ * After handling a breakpoint, we suspend the breakpoint
+ * and use single-step to move to the next instruction.
+ * If we are stepping a suspended breakpoint there's nothing more to do:
+ * the single-step is complete.
+ */
+ step_done = try_step_suspended_breakpoints(regs);
+ local_daif_restore(DAIF_PROCCTX);
+ if (!step_done)
+ do_el0_softstep(esr, regs);
+ arm64_exit_to_user_mode(regs);
+}
+
+static void noinstr el0_watchpt(struct pt_regs *regs, unsigned long esr)
+{
+ /* Watchpoints are the only debug exception to write FAR_EL1 */
unsigned long far = read_sysreg(far_el1);
- enter_from_user_mode(regs);
- do_debug_exception(far, esr, regs);
+ arm64_enter_from_user_mode(regs);
+ debug_exception_enter(regs);
+ do_watchpoint(far, esr, regs);
+ debug_exception_exit(regs);
+ local_daif_restore(DAIF_PROCCTX);
+ arm64_exit_to_user_mode(regs);
+}
+
+static void noinstr el0_brk64(struct pt_regs *regs, unsigned long esr)
+{
+ arm64_enter_from_user_mode(regs);
local_daif_restore(DAIF_PROCCTX);
- exit_to_user_mode(regs);
+ do_el0_brk64(esr, regs);
+ arm64_exit_to_user_mode(regs);
}
static void noinstr el0_svc(struct pt_regs *regs)
{
- enter_from_user_mode(regs);
+ arm64_enter_from_user_mode(regs);
cortex_a76_erratum_1463225_svc_handler();
- fp_user_discard();
+ fpsimd_syscall_enter();
local_daif_restore(DAIF_PROCCTX);
do_el0_svc(regs);
- exit_to_user_mode(regs);
+ arm64_exit_to_user_mode(regs);
+ fpsimd_syscall_exit();
}
static void noinstr el0_fpac(struct pt_regs *regs, unsigned long esr)
{
- enter_from_user_mode(regs);
+ arm64_enter_from_user_mode(regs);
local_daif_restore(DAIF_PROCCTX);
do_el0_fpac(regs, esr);
- exit_to_user_mode(regs);
+ arm64_exit_to_user_mode(regs);
}
asmlinkage void noinstr el0t_64_sync_handler(struct pt_regs *regs)
@@ -732,11 +779,20 @@ asmlinkage void noinstr el0t_64_sync_handler(struct pt_regs *regs)
case ESR_ELx_EC_MOPS:
el0_mops(regs, esr);
break;
+ case ESR_ELx_EC_GCS:
+ el0_gcs(regs, esr);
+ break;
case ESR_ELx_EC_BREAKPT_LOW:
+ el0_breakpt(regs, esr);
+ break;
case ESR_ELx_EC_SOFTSTP_LOW:
+ el0_softstp(regs, esr);
+ break;
case ESR_ELx_EC_WATCHPT_LOW:
+ el0_watchpt(regs, esr);
+ break;
case ESR_ELx_EC_BRK64:
- el0_dbg(regs, esr);
+ el0_brk64(regs, esr);
break;
case ESR_ELx_EC_FPAC:
el0_fpac(regs, esr);
@@ -749,7 +805,7 @@ asmlinkage void noinstr el0t_64_sync_handler(struct pt_regs *regs)
static void noinstr el0_interrupt(struct pt_regs *regs,
void (*handler)(struct pt_regs *))
{
- enter_from_user_mode(regs);
+ arm64_enter_from_user_mode(regs);
write_sysreg(DAIF_PROCCTX_NOIRQ, daif);
@@ -760,7 +816,7 @@ static void noinstr el0_interrupt(struct pt_regs *regs,
do_interrupt_handler(regs, handler);
irq_exit_rcu();
- exit_to_user_mode(regs);
+ arm64_exit_to_user_mode(regs);
}
static void noinstr __el0_irq_handler_common(struct pt_regs *regs)
@@ -786,14 +842,15 @@ asmlinkage void noinstr el0t_64_fiq_handler(struct pt_regs *regs)
static void noinstr __el0_error_handler_common(struct pt_regs *regs)
{
unsigned long esr = read_sysreg(esr_el1);
+ irqentry_state_t state;
- enter_from_user_mode(regs);
+ arm64_enter_from_user_mode(regs);
local_daif_restore(DAIF_ERRCTX);
- arm64_enter_nmi(regs);
+ state = irqentry_nmi_enter(regs);
do_serror(regs, esr);
- arm64_exit_nmi(regs);
+ irqentry_nmi_exit(regs, state);
local_daif_restore(DAIF_PROCCTX);
- exit_to_user_mode(regs);
+ arm64_exit_to_user_mode(regs);
}
asmlinkage void noinstr el0t_64_error_handler(struct pt_regs *regs)
@@ -804,19 +861,27 @@ asmlinkage void noinstr el0t_64_error_handler(struct pt_regs *regs)
#ifdef CONFIG_COMPAT
static void noinstr el0_cp15(struct pt_regs *regs, unsigned long esr)
{
- enter_from_user_mode(regs);
+ arm64_enter_from_user_mode(regs);
local_daif_restore(DAIF_PROCCTX);
do_el0_cp15(esr, regs);
- exit_to_user_mode(regs);
+ arm64_exit_to_user_mode(regs);
}
static void noinstr el0_svc_compat(struct pt_regs *regs)
{
- enter_from_user_mode(regs);
+ arm64_enter_from_user_mode(regs);
cortex_a76_erratum_1463225_svc_handler();
local_daif_restore(DAIF_PROCCTX);
do_el0_svc_compat(regs);
- exit_to_user_mode(regs);
+ arm64_exit_to_user_mode(regs);
+}
+
+static void noinstr el0_bkpt32(struct pt_regs *regs, unsigned long esr)
+{
+ arm64_enter_from_user_mode(regs);
+ local_daif_restore(DAIF_PROCCTX);
+ do_bkpt32(esr, regs);
+ arm64_exit_to_user_mode(regs);
}
asmlinkage void noinstr el0t_32_sync_handler(struct pt_regs *regs)
@@ -853,10 +918,16 @@ asmlinkage void noinstr el0t_32_sync_handler(struct pt_regs *regs)
el0_cp15(regs, esr);
break;
case ESR_ELx_EC_BREAKPT_LOW:
+ el0_breakpt(regs, esr);
+ break;
case ESR_ELx_EC_SOFTSTP_LOW:
+ el0_softstp(regs, esr);
+ break;
case ESR_ELx_EC_WATCHPT_LOW:
+ el0_watchpt(regs, esr);
+ break;
case ESR_ELx_EC_BKPT32:
- el0_dbg(regs, esr);
+ el0_bkpt32(regs, esr);
break;
default:
el0_inv(regs, esr);
@@ -884,21 +955,20 @@ UNHANDLED(el0t, 32, fiq)
UNHANDLED(el0t, 32, error)
#endif /* CONFIG_COMPAT */
-#ifdef CONFIG_VMAP_STACK
asmlinkage void noinstr __noreturn handle_bad_stack(struct pt_regs *regs)
{
unsigned long esr = read_sysreg(esr_el1);
unsigned long far = read_sysreg(far_el1);
- arm64_enter_nmi(regs);
+ irqentry_nmi_enter(regs);
panic_bad_stack(regs, esr, far);
}
-#endif /* CONFIG_VMAP_STACK */
#ifdef CONFIG_ARM_SDE_INTERFACE
asmlinkage noinstr unsigned long
__sdei_handler(struct pt_regs *regs, struct sdei_registered_event *arg)
{
+ irqentry_state_t state;
unsigned long ret;
/*
@@ -923,9 +993,9 @@ __sdei_handler(struct pt_regs *regs, struct sdei_registered_event *arg)
else if (cpu_has_pan())
set_pstate_pan(0);
- arm64_enter_nmi(regs);
+ state = irqentry_nmi_enter(regs);
ret = do_sdei_event(regs, arg);
- arm64_exit_nmi(regs);
+ irqentry_nmi_exit(regs, state);
return ret;
}
diff --git a/arch/arm64/kernel/entry-ftrace.S b/arch/arm64/kernel/entry-ftrace.S
index f0c16640ef21..025140caafe7 100644
--- a/arch/arm64/kernel/entry-ftrace.S
+++ b/arch/arm64/kernel/entry-ftrace.S
@@ -94,7 +94,7 @@ SYM_CODE_START(ftrace_caller)
stp x29, x30, [sp, #FREGS_SIZE]
add x29, sp, #FREGS_SIZE
- /* Prepare arguments for the the tracer func */
+ /* Prepare arguments for the tracer func */
sub x0, x30, #AARCH64_INSN_SIZE // ip (callsite's BL insn)
mov x1, x9 // parent_ip (callsite's LR)
mov x3, sp // regs
@@ -329,24 +329,28 @@ SYM_FUNC_END(ftrace_stub_graph)
* @fp is checked against the value passed by ftrace_graph_caller().
*/
SYM_CODE_START(return_to_handler)
- /* save return value regs */
- sub sp, sp, #FGRET_REGS_SIZE
- stp x0, x1, [sp, #FGRET_REGS_X0]
- stp x2, x3, [sp, #FGRET_REGS_X2]
- stp x4, x5, [sp, #FGRET_REGS_X4]
- stp x6, x7, [sp, #FGRET_REGS_X6]
- str x29, [sp, #FGRET_REGS_FP] // parent's fp
+ /* Make room for ftrace_regs */
+ sub sp, sp, #FREGS_SIZE
+
+ /* Save return value regs */
+ stp x0, x1, [sp, #FREGS_X0]
+ stp x2, x3, [sp, #FREGS_X2]
+ stp x4, x5, [sp, #FREGS_X4]
+ stp x6, x7, [sp, #FREGS_X6]
+
+ /* Save the callsite's FP */
+ str x29, [sp, #FREGS_FP]
mov x0, sp
- bl ftrace_return_to_handler // addr = ftrace_return_to_hander(regs);
+ bl ftrace_return_to_handler // addr = ftrace_return_to_hander(fregs);
mov x30, x0 // restore the original return address
- /* restore return value regs */
- ldp x0, x1, [sp, #FGRET_REGS_X0]
- ldp x2, x3, [sp, #FGRET_REGS_X2]
- ldp x4, x5, [sp, #FGRET_REGS_X4]
- ldp x6, x7, [sp, #FGRET_REGS_X6]
- add sp, sp, #FGRET_REGS_SIZE
+ /* Restore return value regs */
+ ldp x0, x1, [sp, #FREGS_X0]
+ ldp x2, x3, [sp, #FREGS_X2]
+ ldp x4, x5, [sp, #FREGS_X4]
+ ldp x6, x7, [sp, #FREGS_X6]
+ add sp, sp, #FREGS_SIZE
ret
SYM_CODE_END(return_to_handler)
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 6ad61de03d0a..f8018b5c1f9a 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -25,6 +25,7 @@
#include <asm/processor.h>
#include <asm/ptrace.h>
#include <asm/scs.h>
+#include <asm/stacktrace/frame.h>
#include <asm/thread_info.h>
#include <asm/asm-uaccess.h>
#include <asm/unistd.h>
@@ -54,7 +55,6 @@
.endif
sub sp, sp, #PT_REGS_SIZE
-#ifdef CONFIG_VMAP_STACK
/*
* Test whether the SP has overflowed, without corrupting a GPR.
* Task and IRQ stacks are aligned so that SP & (1 << THREAD_SHIFT)
@@ -96,7 +96,6 @@
/* We were already on the overflow stack. Restore sp/x0 and carry on. */
sub sp, sp, x0
mrs x0, tpidrro_el0
-#endif
b el\el\ht\()_\regsize\()_\label
.org .Lventry_start\@ + 128 // Did we overflow the ventry slot?
.endm
@@ -284,15 +283,16 @@ alternative_else_nop_endif
stp lr, x21, [sp, #S_LR]
/*
- * For exceptions from EL0, create a final frame record.
- * For exceptions from EL1, create a synthetic frame record so the
- * interrupted code shows up in the backtrace.
+ * Create a metadata frame record. The unwinder will use this to
+ * identify and unwind exception boundaries.
*/
- .if \el == 0
stp xzr, xzr, [sp, #S_STACKFRAME]
+ .if \el == 0
+ mov x0, #FRAME_META_TYPE_FINAL
.else
- stp x29, x22, [sp, #S_STACKFRAME]
+ mov x0, #FRAME_META_TYPE_PT_REGS
.endif
+ str x0, [sp, #S_STACKFRAME_TYPE]
add x29, sp, #S_STACKFRAME
#ifdef CONFIG_ARM64_SW_TTBR0_PAN
@@ -315,7 +315,7 @@ alternative_if_not ARM64_HAS_GIC_PRIO_MASKING
alternative_else_nop_endif
mrs_s x20, SYS_ICC_PMR_EL1
- str x20, [sp, #S_PMR_SAVE]
+ str w20, [sp, #S_PMR]
mov x20, #GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET
msr_s SYS_ICC_PMR_EL1, x20
@@ -342,7 +342,7 @@ alternative_if_not ARM64_HAS_GIC_PRIO_MASKING
b .Lskip_pmr_restore\@
alternative_else_nop_endif
- ldr x20, [sp, #S_PMR_SAVE]
+ ldr w20, [sp, #S_PMR]
msr_s SYS_ICC_PMR_EL1, x20
/* Ensure priority change is seen by redistributor */
@@ -428,12 +428,9 @@ alternative_else_nop_endif
ldp x28, x29, [sp, #16 * 14]
.if \el == 0
-alternative_if_not ARM64_UNMAP_KERNEL_AT_EL0
- ldr lr, [sp, #S_LR]
- add sp, sp, #PT_REGS_SIZE // restore sp
- eret
-alternative_else_nop_endif
#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
+ alternative_insn "b .L_skip_tramp_exit_\@", nop, ARM64_UNMAP_KERNEL_AT_EL0
+
msr far_el1, x29
ldr_this_cpu x30, this_cpu_vector, x29
@@ -442,16 +439,26 @@ alternative_else_nop_endif
ldr lr, [sp, #S_LR] // restore x30
add sp, sp, #PT_REGS_SIZE // restore sp
br x29
+
+.L_skip_tramp_exit_\@:
#endif
- .else
+ .endif
+
ldr lr, [sp, #S_LR]
add sp, sp, #PT_REGS_SIZE // restore sp
+ .if \el == 0
+ /* This must be after the last explicit memory access */
+alternative_if ARM64_WORKAROUND_SPECULATIVE_UNPRIV_LOAD
+ tlbi vale1, xzr
+ dsb nsh
+alternative_else_nop_endif
+ .else
/* Ensure any device/NC reads complete */
alternative_insn nop, "dmb sy", ARM64_WORKAROUND_1508412
+ .endif
eret
- .endif
sb
.endm
@@ -531,7 +538,6 @@ SYM_CODE_START(vectors)
kernel_ventry 0, t, 32, error // Error 32-bit EL0
SYM_CODE_END(vectors)
-#ifdef CONFIG_VMAP_STACK
SYM_CODE_START_LOCAL(__bad_stack)
/*
* We detected an overflow in kernel_ventry, which switched to the
@@ -559,7 +565,6 @@ SYM_CODE_START_LOCAL(__bad_stack)
bl handle_bad_stack
ASM_BUG()
SYM_CODE_END(__bad_stack)
-#endif /* CONFIG_VMAP_STACK */
.macro entry_handler el:req, ht:req, regsize:req, label:req
@@ -605,7 +610,7 @@ SYM_CODE_END(ret_to_kernel)
SYM_CODE_START_LOCAL(ret_to_user)
ldr x19, [tsk, #TSK_TI_FLAGS] // re-check for single-step
enable_step_tsk x19, x2
-#ifdef CONFIG_GCC_PLUGIN_STACKLEAK
+#ifdef CONFIG_KSTACK_ERASE
bl stackleak_erase_on_task_stack
#endif
kernel_exit 0
@@ -816,6 +821,7 @@ SYM_CODE_END(__bp_harden_el1_vectors)
*
*/
SYM_FUNC_START(cpu_switch_to)
+ save_and_disable_daif x11
mov x10, #THREAD_CPU_CONTEXT
add x8, x0, x10
mov x9, sp
@@ -839,6 +845,7 @@ SYM_FUNC_START(cpu_switch_to)
ptrauth_keys_install_kernel x1, x8, x9, x10
scs_save x0
scs_load_current
+ restore_irq x11
ret
SYM_FUNC_END(cpu_switch_to)
NOKPROBE(cpu_switch_to)
@@ -865,6 +872,7 @@ NOKPROBE(ret_from_fork)
* Calls func(regs) using this CPU's irq stack and shadow irq stack.
*/
SYM_FUNC_START(call_on_irq_stack)
+ save_and_disable_daif x9
#ifdef CONFIG_SHADOW_CALL_STACK
get_current_task x16
scs_save x16
@@ -879,8 +887,10 @@ SYM_FUNC_START(call_on_irq_stack)
/* Move to the new stack and call the function there */
add sp, x16, #IRQ_STACK_SIZE
+ restore_irq x9
blr x1
+ save_and_disable_daif x9
/*
* Restore the SP from the FP, and restore the FP and LR from the frame
* record.
@@ -888,6 +898,7 @@ SYM_FUNC_START(call_on_irq_stack)
mov sp, x29
ldp x29, x30, [sp], #16
scs_load_current
+ restore_irq x9
ret
SYM_FUNC_END(call_on_irq_stack)
NOKPROBE(call_on_irq_stack)
@@ -994,7 +1005,6 @@ SYM_CODE_START(__sdei_asm_handler)
1: adr_this_cpu dst=x5, sym=sdei_active_critical_event, tmp=x6
2: str x19, [x5]
-#ifdef CONFIG_VMAP_STACK
/*
* entry.S may have been using sp as a scratch register, find whether
* this is a normal or critical event and switch to the appropriate
@@ -1007,7 +1017,6 @@ SYM_CODE_START(__sdei_asm_handler)
2: mov x6, #SDEI_STACK_SIZE
add x5, x5, x6
mov sp, x5
-#endif
#ifdef CONFIG_SHADOW_CALL_STACK
/* Use a separate shadow call stack for normal and critical events */
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index 91e44ac7150f..c154f72634e0 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -85,13 +85,13 @@
* softirq kicks in. Upon vcpu_put(), KVM will save the vcpu FP state and
* flag the register state as invalid.
*
- * In order to allow softirq handlers to use FPSIMD, kernel_neon_begin() may
- * save the task's FPSIMD context back to task_struct from softirq context.
- * To prevent this from racing with the manipulation of the task's FPSIMD state
- * from task context and thereby corrupting the state, it is necessary to
- * protect any manipulation of a task's fpsimd_state or TIF_FOREIGN_FPSTATE
- * flag with {, __}get_cpu_fpsimd_context(). This will still allow softirqs to
- * run but prevent them to use FPSIMD.
+ * In order to allow softirq handlers to use FPSIMD, kernel_neon_begin() may be
+ * called from softirq context, which will save the task's FPSIMD context back
+ * to task_struct. To prevent this from racing with the manipulation of the
+ * task's FPSIMD state from task context and thereby corrupting the state, it
+ * is necessary to protect any manipulation of a task's fpsimd_state or
+ * TIF_FOREIGN_FPSTATE flag with get_cpu_fpsimd_context(), which will suspend
+ * softirq servicing entirely until put_cpu_fpsimd_context() is called.
*
* For a certain task, the sequence may look something like this:
* - the task gets scheduled in; if both the task's fpsimd_cpu field
@@ -119,7 +119,7 @@
* whatever is in the FPSIMD registers is not saved to memory, but discarded.
*/
-static DEFINE_PER_CPU(struct cpu_fp_state, fpsimd_last_state);
+DEFINE_PER_CPU(struct cpu_fp_state, fpsimd_last_state);
__ro_after_init struct vl_info vl_info[ARM64_VEC_MAX] = {
#ifdef CONFIG_ARM64_SVE
@@ -180,12 +180,12 @@ static inline void set_sve_default_vl(int val)
set_default_vl(ARM64_VEC_SVE, val);
}
-static void __percpu *efi_sve_state;
+static u8 *efi_sve_state;
#else /* ! CONFIG_ARM64_SVE */
/* Dummy declaration for code that will be optimised out: */
-extern void __percpu *efi_sve_state;
+extern u8 *efi_sve_state;
#endif /* ! CONFIG_ARM64_SVE */
@@ -209,27 +209,14 @@ static inline void sme_free(struct task_struct *t) { }
#endif
-DEFINE_PER_CPU(bool, fpsimd_context_busy);
-EXPORT_PER_CPU_SYMBOL(fpsimd_context_busy);
-
static void fpsimd_bind_task_to_cpu(void);
-static void __get_cpu_fpsimd_context(void)
-{
- bool busy = __this_cpu_xchg(fpsimd_context_busy, true);
-
- WARN_ON(busy);
-}
-
/*
* Claim ownership of the CPU FPSIMD context for use by the calling context.
*
* The caller may freely manipulate the FPSIMD context metadata until
* put_cpu_fpsimd_context() is called.
*
- * The double-underscore version must only be called if you know the task
- * can't be preempted.
- *
* On RT kernels local_bh_disable() is not sufficient because it only
* serializes soft interrupt related sections via a local lock, but stays
* preemptible. Disabling preemption is the right choice here as bottom
@@ -238,18 +225,21 @@ static void __get_cpu_fpsimd_context(void)
*/
static void get_cpu_fpsimd_context(void)
{
- if (!IS_ENABLED(CONFIG_PREEMPT_RT))
- local_bh_disable();
- else
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT)) {
+ /*
+ * The softirq subsystem lacks a true unmask/mask API, and
+ * re-enabling softirq processing using local_bh_enable() will
+ * not only unmask softirqs, it will also result in immediate
+ * delivery of any pending softirqs.
+ * This is undesirable when running with IRQs disabled, but in
+ * that case, there is no need to mask softirqs in the first
+ * place, so only bother doing so when IRQs are enabled.
+ */
+ if (!irqs_disabled())
+ local_bh_disable();
+ } else {
preempt_disable();
- __get_cpu_fpsimd_context();
-}
-
-static void __put_cpu_fpsimd_context(void)
-{
- bool busy = __this_cpu_xchg(fpsimd_context_busy, false);
-
- WARN_ON(!busy); /* No matching get_cpu_fpsimd_context()? */
+ }
}
/*
@@ -261,16 +251,12 @@ static void __put_cpu_fpsimd_context(void)
*/
static void put_cpu_fpsimd_context(void)
{
- __put_cpu_fpsimd_context();
- if (!IS_ENABLED(CONFIG_PREEMPT_RT))
- local_bh_enable();
- else
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT)) {
+ if (!irqs_disabled())
+ local_bh_enable();
+ } else {
preempt_enable();
-}
-
-static bool have_cpu_fpsimd_context(void)
-{
- return !preemptible() && __this_cpu_read(fpsimd_context_busy);
+ }
}
unsigned int task_get_vl(const struct task_struct *task, enum vec_type type)
@@ -383,19 +369,18 @@ static void task_fpsimd_load(void)
bool restore_ffr;
WARN_ON(!system_supports_fpsimd());
- WARN_ON(!have_cpu_fpsimd_context());
+ WARN_ON(preemptible());
+ WARN_ON(test_thread_flag(TIF_KERNEL_FPSTATE));
if (system_supports_sve() || system_supports_sme()) {
switch (current->thread.fp_type) {
case FP_STATE_FPSIMD:
/* Stop tracking SVE for this task until next use. */
- if (test_and_clear_thread_flag(TIF_SVE))
- sve_user_disable();
+ clear_thread_flag(TIF_SVE);
break;
case FP_STATE_SVE:
- if (!thread_sm_enabled(&current->thread) &&
- !WARN_ON_ONCE(!test_and_set_thread_flag(TIF_SVE)))
- sve_user_enable();
+ if (!thread_sm_enabled(&current->thread))
+ WARN_ON_ONCE(!test_and_set_thread_flag(TIF_SVE));
if (test_thread_flag(TIF_SVE))
sve_set_vq(sve_vq_from_vl(task_get_sve_vl(current)) - 1);
@@ -406,10 +391,10 @@ static void task_fpsimd_load(void)
default:
/*
* This indicates either a bug in
- * fpsimd_save() or memory corruption, we
+ * fpsimd_save_user_state() or memory corruption, we
* should always record an explicit format
* when we save. We always at least have the
- * memory allocated for FPSMID registers so
+ * memory allocated for FPSIMD registers so
* try that and hope for the best.
*/
WARN_ON_ONCE(1);
@@ -436,6 +421,9 @@ static void task_fpsimd_load(void)
restore_ffr = system_supports_fa64();
}
+ if (system_supports_fpmr())
+ write_sysreg_s(current->thread.uw.fpmr, SYS_FPMR);
+
if (restore_sve_regs) {
WARN_ON_ONCE(current->thread.fp_type != FP_STATE_SVE);
sve_load_state(sve_pffr(&current->thread),
@@ -457,7 +445,7 @@ static void task_fpsimd_load(void)
* than via current, if we are saving KVM state then it will have
* ensured that the type of registers to save is set in last->to_save.
*/
-static void fpsimd_save(void)
+static void fpsimd_save_user_state(void)
{
struct cpu_fp_state const *last =
this_cpu_ptr(&fpsimd_last_state);
@@ -467,18 +455,24 @@ static void fpsimd_save(void)
unsigned int vl;
WARN_ON(!system_supports_fpsimd());
- WARN_ON(!have_cpu_fpsimd_context());
+ WARN_ON(preemptible());
if (test_thread_flag(TIF_FOREIGN_FPSTATE))
return;
+ if (system_supports_fpmr())
+ *(last->fpmr) = read_sysreg_s(SYS_FPMR);
+
/*
- * If a task is in a syscall the ABI allows us to only
- * preserve the state shared with FPSIMD so don't bother
- * saving the full SVE state in that case.
+ * Save SVE state if it is live.
+ *
+ * The syscall ABI discards live SVE state at syscall entry. When
+ * entering a syscall, fpsimd_syscall_enter() sets to_save to
+ * FP_STATE_FPSIMD to allow the SVE state to be lazily discarded until
+ * either new SVE state is loaded+bound or fpsimd_syscall_exit() is
+ * called prior to a return to userspace.
*/
- if ((last->to_save == FP_STATE_CURRENT && test_thread_flag(TIF_SVE) &&
- !in_syscall(current_pt_regs())) ||
+ if ((last->to_save == FP_STATE_CURRENT && test_thread_flag(TIF_SVE)) ||
last->to_save == FP_STATE_SVE) {
save_sve_regs = true;
save_ffr = true;
@@ -555,7 +549,7 @@ static unsigned int find_supported_vector_length(enum vec_type type,
#if defined(CONFIG_ARM64_SVE) && defined(CONFIG_SYSCTL)
-static int vec_proc_do_default_vl(struct ctl_table *table, int write,
+static int vec_proc_do_default_vl(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
struct vl_info *info = table->extra1;
@@ -582,14 +576,13 @@ static int vec_proc_do_default_vl(struct ctl_table *table, int write,
return 0;
}
-static struct ctl_table sve_default_vl_table[] = {
+static const struct ctl_table sve_default_vl_table[] = {
{
.procname = "sve_default_vector_length",
.mode = 0644,
.proc_handler = vec_proc_do_default_vl,
.extra1 = &vl_info[ARM64_VEC_SVE],
},
- { }
};
static int __init sve_sysctl_init(void)
@@ -606,14 +599,13 @@ static int __init sve_sysctl_init(void) { return 0; }
#endif /* ! (CONFIG_ARM64_SVE && CONFIG_SYSCTL) */
#if defined(CONFIG_ARM64_SME) && defined(CONFIG_SYSCTL)
-static struct ctl_table sme_default_vl_table[] = {
+static const struct ctl_table sme_default_vl_table[] = {
{
.procname = "sme_default_vector_length",
.mode = 0644,
.proc_handler = vec_proc_do_default_vl,
.extra1 = &vl_info[ARM64_VEC_SME],
},
- { }
};
static int __init sme_sysctl_init(void)
@@ -673,7 +665,7 @@ static void __fpsimd_to_sve(void *sst, struct user_fpsimd_state const *fst,
* task->thread.uw.fpsimd_state must be up to date before calling this
* function.
*/
-static void fpsimd_to_sve(struct task_struct *task)
+static inline void fpsimd_to_sve(struct task_struct *task)
{
unsigned int vq;
void *sst = task->thread.sve_state;
@@ -697,7 +689,7 @@ static void fpsimd_to_sve(struct task_struct *task)
* bytes of allocated kernel memory.
* task->thread.sve_state must be up to date before calling this function.
*/
-static void sve_to_fpsimd(struct task_struct *task)
+static inline void sve_to_fpsimd(struct task_struct *task)
{
unsigned int vq, vl;
void const *sst = task->thread.sve_state;
@@ -716,38 +708,39 @@ static void sve_to_fpsimd(struct task_struct *task)
}
}
-#ifdef CONFIG_ARM64_SVE
-/*
- * Call __sve_free() directly only if you know task can't be scheduled
- * or preempted.
- */
-static void __sve_free(struct task_struct *task)
+static inline void __fpsimd_zero_vregs(struct user_fpsimd_state *fpsimd)
{
- kfree(task->thread.sve_state);
- task->thread.sve_state = NULL;
+ memset(&fpsimd->vregs, 0, sizeof(fpsimd->vregs));
}
-static void sve_free(struct task_struct *task)
+/*
+ * Simulate the effects of an SMSTOP SM instruction.
+ */
+void task_smstop_sm(struct task_struct *task)
{
- WARN_ON(test_tsk_thread_flag(task, TIF_SVE));
+ if (!thread_sm_enabled(&task->thread))
+ return;
+
+ __fpsimd_zero_vregs(&task->thread.uw.fpsimd_state);
+ task->thread.uw.fpsimd_state.fpsr = 0x0800009f;
+ if (system_supports_fpmr())
+ task->thread.uw.fpmr = 0;
- __sve_free(task);
+ task->thread.svcr &= ~SVCR_SM_MASK;
+ task->thread.fp_type = FP_STATE_FPSIMD;
}
-/*
- * Return how many bytes of memory are required to store the full SVE
- * state for task, given task's currently configured vector length.
- */
-size_t sve_state_size(struct task_struct const *task)
+void cpu_enable_fpmr(const struct arm64_cpu_capabilities *__always_unused p)
{
- unsigned int vl = 0;
-
- if (system_supports_sve())
- vl = task_get_sve_vl(task);
- if (system_supports_sme())
- vl = max(vl, task_get_sme_vl(task));
+ write_sysreg_s(read_sysreg_s(SYS_SCTLR_EL1) | SCTLR_EL1_EnFPM_MASK,
+ SYS_SCTLR_EL1);
+}
- return SVE_SIG_REGS_SIZE(sve_vq_from_vl(vl));
+#ifdef CONFIG_ARM64_SVE
+static void sve_free(struct task_struct *task)
+{
+ kfree(task->thread.sve_state);
+ task->thread.sve_state = NULL;
}
/*
@@ -774,69 +767,34 @@ void sve_alloc(struct task_struct *task, bool flush)
kzalloc(sve_state_size(task), GFP_KERNEL);
}
-
/*
- * Force the FPSIMD state shared with SVE to be updated in the SVE state
- * even if the SVE state is the current active state.
+ * Ensure that task->thread.uw.fpsimd_state is up to date with respect to the
+ * task's currently effective FPSIMD/SVE state.
*
- * This should only be called by ptrace. task must be non-runnable.
- * task->thread.sve_state must point to at least sve_state_size(task)
- * bytes of allocated kernel memory.
+ * The task's FPSIMD/SVE/SME state must not be subject to concurrent
+ * manipulation.
*/
-void fpsimd_force_sync_to_sve(struct task_struct *task)
-{
- fpsimd_to_sve(task);
-}
-
-/*
- * Ensure that task->thread.sve_state is up to date with respect to
- * the user task, irrespective of when SVE is in use or not.
- *
- * This should only be called by ptrace. task must be non-runnable.
- * task->thread.sve_state must point to at least sve_state_size(task)
- * bytes of allocated kernel memory.
- */
-void fpsimd_sync_to_sve(struct task_struct *task)
-{
- if (!test_tsk_thread_flag(task, TIF_SVE) &&
- !thread_sm_enabled(&task->thread))
- fpsimd_to_sve(task);
-}
-
-/*
- * Ensure that task->thread.uw.fpsimd_state is up to date with respect to
- * the user task, irrespective of whether SVE is in use or not.
- *
- * This should only be called by ptrace. task must be non-runnable.
- * task->thread.sve_state must point to at least sve_state_size(task)
- * bytes of allocated kernel memory.
- */
-void sve_sync_to_fpsimd(struct task_struct *task)
+void fpsimd_sync_from_effective_state(struct task_struct *task)
{
if (task->thread.fp_type == FP_STATE_SVE)
sve_to_fpsimd(task);
}
/*
- * Ensure that task->thread.sve_state is up to date with respect to
- * the task->thread.uw.fpsimd_state.
+ * Ensure that the task's currently effective FPSIMD/SVE state is up to date
+ * with respect to task->thread.uw.fpsimd_state, zeroing any effective
+ * non-FPSIMD (S)SVE state.
*
- * This should only be called by ptrace to merge new FPSIMD register
- * values into a task for which SVE is currently active.
- * task must be non-runnable.
- * task->thread.sve_state must point to at least sve_state_size(task)
- * bytes of allocated kernel memory.
- * task->thread.uw.fpsimd_state must already have been initialised with
- * the new FPSIMD register values to be merged in.
+ * The task's FPSIMD/SVE/SME state must not be subject to concurrent
+ * manipulation.
*/
-void sve_sync_from_fpsimd_zeropad(struct task_struct *task)
+void fpsimd_sync_to_effective_state_zeropad(struct task_struct *task)
{
unsigned int vq;
void *sst = task->thread.sve_state;
struct user_fpsimd_state const *fst = &task->thread.uw.fpsimd_state;
- if (!test_tsk_thread_flag(task, TIF_SVE) &&
- !thread_sm_enabled(&task->thread))
+ if (task->thread.fp_type != FP_STATE_SVE)
return;
vq = sve_vq_from_vl(thread_get_cur_vl(&task->thread));
@@ -845,10 +803,73 @@ void sve_sync_from_fpsimd_zeropad(struct task_struct *task)
__fpsimd_to_sve(sst, fst, vq);
}
+static int change_live_vector_length(struct task_struct *task,
+ enum vec_type type,
+ unsigned long vl)
+{
+ unsigned int sve_vl = task_get_sve_vl(task);
+ unsigned int sme_vl = task_get_sme_vl(task);
+ void *sve_state = NULL, *sme_state = NULL;
+
+ if (type == ARM64_VEC_SME)
+ sme_vl = vl;
+ else
+ sve_vl = vl;
+
+ /*
+ * Allocate the new sve_state and sme_state before freeing the old
+ * copies so that allocation failure can be handled without needing to
+ * mutate the task's state in any way.
+ *
+ * Changes to the SVE vector length must not discard live ZA state or
+ * clear PSTATE.ZA, as userspace code which is unaware of the AAPCS64
+ * ZA lazy saving scheme may attempt to change the SVE vector length
+ * while unsaved/dormant ZA state exists.
+ */
+ sve_state = kzalloc(__sve_state_size(sve_vl, sme_vl), GFP_KERNEL);
+ if (!sve_state)
+ goto out_mem;
+
+ if (type == ARM64_VEC_SME) {
+ sme_state = kzalloc(__sme_state_size(sme_vl), GFP_KERNEL);
+ if (!sme_state)
+ goto out_mem;
+ }
+
+ if (task == current)
+ fpsimd_save_and_flush_current_state();
+ else
+ fpsimd_flush_task_state(task);
+
+ /*
+ * Always preserve PSTATE.SM and the effective FPSIMD state, zeroing
+ * other SVE state.
+ */
+ fpsimd_sync_from_effective_state(task);
+ task_set_vl(task, type, vl);
+ kfree(task->thread.sve_state);
+ task->thread.sve_state = sve_state;
+ fpsimd_sync_to_effective_state_zeropad(task);
+
+ if (type == ARM64_VEC_SME) {
+ task->thread.svcr &= ~SVCR_ZA_MASK;
+ kfree(task->thread.sme_state);
+ task->thread.sme_state = sme_state;
+ }
+
+ return 0;
+
+out_mem:
+ kfree(sve_state);
+ kfree(sme_state);
+ return -ENOMEM;
+}
+
int vec_set_vector_length(struct task_struct *task, enum vec_type type,
unsigned long vl, unsigned long flags)
{
- bool free_sme = false;
+ bool onexec = flags & PR_SVE_SET_VL_ONEXEC;
+ bool inherit = flags & PR_SVE_VL_INHERIT;
if (flags & ~(unsigned long)(PR_SVE_VL_INHERIT |
PR_SVE_SET_VL_ONEXEC))
@@ -868,73 +889,17 @@ int vec_set_vector_length(struct task_struct *task, enum vec_type type,
vl = find_supported_vector_length(type, vl);
- if (flags & (PR_SVE_VL_INHERIT |
- PR_SVE_SET_VL_ONEXEC))
+ if (!onexec && vl != task_get_vl(task, type)) {
+ if (change_live_vector_length(task, type, vl))
+ return -ENOMEM;
+ }
+
+ if (onexec || inherit)
task_set_vl_onexec(task, type, vl);
else
/* Reset VL to system default on next exec: */
task_set_vl_onexec(task, type, 0);
- /* Only actually set the VL if not deferred: */
- if (flags & PR_SVE_SET_VL_ONEXEC)
- goto out;
-
- if (vl == task_get_vl(task, type))
- goto out;
-
- /*
- * To ensure the FPSIMD bits of the SVE vector registers are preserved,
- * write any live register state back to task_struct, and convert to a
- * regular FPSIMD thread.
- */
- if (task == current) {
- get_cpu_fpsimd_context();
-
- fpsimd_save();
- }
-
- fpsimd_flush_task_state(task);
- if (test_and_clear_tsk_thread_flag(task, TIF_SVE) ||
- thread_sm_enabled(&task->thread)) {
- sve_to_fpsimd(task);
- task->thread.fp_type = FP_STATE_FPSIMD;
- }
-
- if (system_supports_sme()) {
- if (type == ARM64_VEC_SME ||
- !(task->thread.svcr & (SVCR_SM_MASK | SVCR_ZA_MASK))) {
- /*
- * We are changing the SME VL or weren't using
- * SME anyway, discard the state and force a
- * reallocation.
- */
- task->thread.svcr &= ~(SVCR_SM_MASK |
- SVCR_ZA_MASK);
- clear_tsk_thread_flag(task, TIF_SME);
- free_sme = true;
- }
- }
-
- if (task == current)
- put_cpu_fpsimd_context();
-
- task_set_vl(task, type, vl);
-
- /*
- * Free the changed states if they are not in use, SME will be
- * reallocated to the correct size on next use and we just
- * allocate SVE now in case it is needed for use in streaming
- * mode.
- */
- if (system_supports_sve()) {
- sve_free(task);
- sve_alloc(task, true);
- }
-
- if (free_sme)
- sme_free(task);
-
-out:
update_tsk_thread_flag(task, vec_vl_inherit_flag(type),
flags & PR_SVE_VL_INHERIT);
@@ -1149,53 +1114,31 @@ static void __init sve_efi_setup(void)
if (!sve_vl_valid(max_vl))
goto fail;
- efi_sve_state = __alloc_percpu(
- SVE_SIG_REGS_SIZE(sve_vq_from_vl(max_vl)), SVE_VQ_BYTES);
+ efi_sve_state = kmalloc(SVE_SIG_REGS_SIZE(sve_vq_from_vl(max_vl)),
+ GFP_KERNEL);
if (!efi_sve_state)
goto fail;
return;
fail:
- panic("Cannot allocate percpu memory for EFI SVE save/restore");
+ panic("Cannot allocate memory for EFI SVE save/restore");
}
-/*
- * Enable SVE for EL1.
- * Intended for use by the cpufeatures code during CPU boot.
- */
-void sve_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p)
+void cpu_enable_sve(const struct arm64_cpu_capabilities *__always_unused p)
{
write_sysreg(read_sysreg(CPACR_EL1) | CPACR_EL1_ZEN_EL1EN, CPACR_EL1);
isb();
-}
-/*
- * Read the pseudo-ZCR used by cpufeatures to identify the supported SVE
- * vector length.
- *
- * Use only if SVE is present.
- * This function clobbers the SVE vector length.
- */
-u64 read_zcr_features(void)
-{
- /*
- * Set the maximum possible VL, and write zeroes to all other
- * bits to see if they stick.
- */
- sve_kernel_enable(NULL);
- write_sysreg_s(ZCR_ELx_LEN_MASK, SYS_ZCR_EL1);
-
- /* Return LEN value that would be written to get the maximum VL */
- return sve_vq_from_vl(sve_get_vl()) - 1;
+ write_sysreg_s(0, SYS_ZCR_EL1);
}
void __init sve_setup(void)
{
struct vl_info *info = &vl_info[ARM64_VEC_SVE];
- u64 zcr;
DECLARE_BITMAP(tmp_map, SVE_VQ_MAX);
unsigned long b;
+ int max_bit;
if (!system_supports_sve())
return;
@@ -1208,17 +1151,8 @@ void __init sve_setup(void)
if (WARN_ON(!test_bit(__vq_to_bit(SVE_VQ_MIN), info->vq_map)))
set_bit(__vq_to_bit(SVE_VQ_MIN), info->vq_map);
- zcr = read_sanitised_ftr_reg(SYS_ZCR_EL1);
- info->max_vl = sve_vl_from_vq((zcr & ZCR_ELx_LEN_MASK) + 1);
-
- /*
- * Sanity-check that the max VL we determined through CPU features
- * corresponds properly to sve_vq_map. If not, do our best:
- */
- if (WARN_ON(info->max_vl != find_supported_vector_length(ARM64_VEC_SVE,
- info->max_vl)))
- info->max_vl = find_supported_vector_length(ARM64_VEC_SVE,
- info->max_vl);
+ max_bit = find_first_bit(info->vq_map, SVE_VQ_MAX);
+ info->max_vl = sve_vl_from_vq(__bit_to_vq(max_bit));
/*
* For the default VL, pick the maximum supported value <= 64.
@@ -1261,7 +1195,7 @@ void __init sve_setup(void)
*/
void fpsimd_release_task(struct task_struct *dead_task)
{
- __sve_free(dead_task);
+ sve_free(dead_task);
sme_free(dead_task);
}
@@ -1280,8 +1214,10 @@ void fpsimd_release_task(struct task_struct *dead_task)
*/
void sme_alloc(struct task_struct *task, bool flush)
{
- if (task->thread.sme_state && flush) {
- memset(task->thread.sme_state, 0, sme_state_size(task));
+ if (task->thread.sme_state) {
+ if (flush)
+ memset(task->thread.sme_state, 0,
+ sme_state_size(task));
return;
}
@@ -1296,7 +1232,7 @@ static void sme_free(struct task_struct *task)
task->thread.sme_state = NULL;
}
-void sme_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p)
+void cpu_enable_sme(const struct arm64_cpu_capabilities *__always_unused p)
{
/* Set priority for all PEs to architecturally defined minimum */
write_sysreg_s(read_sysreg_s(SYS_SMPRI_EL1) & ~SMPRI_EL1_PRIORITY_MASK,
@@ -1306,85 +1242,57 @@ void sme_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p)
write_sysreg(read_sysreg(CPACR_EL1) | CPACR_EL1_SMEN_EL1EN, CPACR_EL1);
isb();
+ /* Ensure all bits in SMCR are set to known values */
+ write_sysreg_s(0, SYS_SMCR_EL1);
+
/* Allow EL0 to access TPIDR2 */
write_sysreg(read_sysreg(SCTLR_EL1) | SCTLR_ELx_ENTP2, SCTLR_EL1);
isb();
}
-/*
- * This must be called after sme_kernel_enable(), we rely on the
- * feature table being sorted to ensure this.
- */
-void sme2_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p)
+void cpu_enable_sme2(const struct arm64_cpu_capabilities *__always_unused p)
{
+ /* This must be enabled after SME */
+ BUILD_BUG_ON(ARM64_SME2 <= ARM64_SME);
+
/* Allow use of ZT0 */
write_sysreg_s(read_sysreg_s(SYS_SMCR_EL1) | SMCR_ELx_EZT0_MASK,
SYS_SMCR_EL1);
}
-/*
- * This must be called after sme_kernel_enable(), we rely on the
- * feature table being sorted to ensure this.
- */
-void fa64_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p)
+void cpu_enable_fa64(const struct arm64_cpu_capabilities *__always_unused p)
{
+ /* This must be enabled after SME */
+ BUILD_BUG_ON(ARM64_SME_FA64 <= ARM64_SME);
+
/* Allow use of FA64 */
write_sysreg_s(read_sysreg_s(SYS_SMCR_EL1) | SMCR_ELx_FA64_MASK,
SYS_SMCR_EL1);
}
-/*
- * Read the pseudo-SMCR used by cpufeatures to identify the supported
- * vector length.
- *
- * Use only if SME is present.
- * This function clobbers the SME vector length.
- */
-u64 read_smcr_features(void)
-{
- sme_kernel_enable(NULL);
-
- /*
- * Set the maximum possible VL.
- */
- write_sysreg_s(read_sysreg_s(SYS_SMCR_EL1) | SMCR_ELx_LEN_MASK,
- SYS_SMCR_EL1);
-
- /* Return LEN value that would be written to get the maximum VL */
- return sve_vq_from_vl(sme_get_vl()) - 1;
-}
-
void __init sme_setup(void)
{
struct vl_info *info = &vl_info[ARM64_VEC_SME];
- u64 smcr;
- int min_bit;
+ int min_bit, max_bit;
if (!system_supports_sme())
return;
+ min_bit = find_last_bit(info->vq_map, SVE_VQ_MAX);
+
/*
* SME doesn't require any particular vector length be
* supported but it does require at least one. We should have
* disabled the feature entirely while bringing up CPUs but
- * let's double check here.
+ * let's double check here. The bitmap is SVE_VQ_MAP sized for
+ * sharing with SVE.
*/
- WARN_ON(bitmap_empty(info->vq_map, SVE_VQ_MAX));
+ WARN_ON(min_bit >= SVE_VQ_MAX);
- min_bit = find_last_bit(info->vq_map, SVE_VQ_MAX);
info->min_vl = sve_vl_from_vq(__bit_to_vq(min_bit));
- smcr = read_sanitised_ftr_reg(SYS_SMCR_EL1);
- info->max_vl = sve_vl_from_vq((smcr & SMCR_ELx_LEN_MASK) + 1);
-
- /*
- * Sanity-check that the max VL we determined through CPU features
- * corresponds properly to sme_vq_map. If not, do our best:
- */
- if (WARN_ON(info->max_vl != find_supported_vector_length(ARM64_VEC_SME,
- info->max_vl)))
- info->max_vl = find_supported_vector_length(ARM64_VEC_SME,
- info->max_vl);
+ max_bit = find_first_bit(info->vq_map, SVE_VQ_MAX);
+ info->max_vl = sve_vl_from_vq(__bit_to_vq(max_bit));
WARN_ON(info->min_vl > info->max_vl);
@@ -1404,6 +1312,22 @@ void __init sme_setup(void)
get_sme_default_vl());
}
+void sme_suspend_exit(void)
+{
+ u64 smcr = 0;
+
+ if (!system_supports_sme())
+ return;
+
+ if (system_supports_fa64())
+ smcr |= SMCR_ELx_FA64;
+ if (system_supports_sme2())
+ smcr |= SMCR_ELx_EZT0;
+
+ write_sysreg_s(smcr, SYS_SMCR_EL1);
+ write_sysreg_s(0, SYS_SMPRI_EL1);
+}
+
#endif /* CONFIG_ARM64_SME */
static void sve_init_regs(void)
@@ -1427,6 +1351,7 @@ static void sve_init_regs(void)
} else {
fpsimd_to_sve(current);
current->thread.fp_type = FP_STATE_SVE;
+ fpsimd_flush_task_state(current);
}
}
@@ -1495,7 +1420,7 @@ void do_sme_acc(unsigned long esr, struct pt_regs *regs)
* If this not a trap due to SME being disabled then something
* is being used in the wrong mode, report as SIGILL.
*/
- if (ESR_ELx_ISS(esr) != ESR_ELx_SME_ISS_SME_DISABLED) {
+ if (ESR_ELx_SME_ISS_SMTC(esr) != ESR_ELx_SME_ISS_SMTC_SME_DISABLED) {
force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc, 0);
return;
}
@@ -1519,6 +1444,8 @@ void do_sme_acc(unsigned long esr, struct pt_regs *regs)
sme_set_vq(vq_minus_one);
fpsimd_bind_task_to_cpu();
+ } else {
+ fpsimd_flush_task_state(current);
}
put_cpu_fpsimd_context();
@@ -1529,8 +1456,17 @@ void do_sme_acc(unsigned long esr, struct pt_regs *regs)
*/
void do_fpsimd_acc(unsigned long esr, struct pt_regs *regs)
{
- /* TODO: implement lazy context saving/restoring */
- WARN_ON(1);
+ /* Even if we chose not to use FPSIMD, the hardware could still trap: */
+ if (!system_supports_fpsimd()) {
+ force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc, 0);
+ return;
+ }
+
+ /*
+ * When FPSIMD is enabled, we should never take a trap unless something
+ * has gone very wrong.
+ */
+ BUG();
}
/*
@@ -1558,6 +1494,57 @@ void do_fpsimd_exc(unsigned long esr, struct pt_regs *regs)
current);
}
+static void fpsimd_load_kernel_state(struct task_struct *task)
+{
+ struct cpu_fp_state *last = this_cpu_ptr(&fpsimd_last_state);
+
+ /*
+ * Elide the load if this CPU holds the most recent kernel mode
+ * FPSIMD context of the current task.
+ */
+ if (last->st == task->thread.kernel_fpsimd_state &&
+ task->thread.kernel_fpsimd_cpu == smp_processor_id())
+ return;
+
+ fpsimd_load_state(task->thread.kernel_fpsimd_state);
+}
+
+static void fpsimd_save_kernel_state(struct task_struct *task)
+{
+ struct cpu_fp_state cpu_fp_state = {
+ .st = task->thread.kernel_fpsimd_state,
+ .to_save = FP_STATE_FPSIMD,
+ };
+
+ BUG_ON(!cpu_fp_state.st);
+
+ fpsimd_save_state(task->thread.kernel_fpsimd_state);
+ fpsimd_bind_state_to_cpu(&cpu_fp_state);
+
+ task->thread.kernel_fpsimd_cpu = smp_processor_id();
+}
+
+/*
+ * Invalidate any task's FPSIMD state that is present on this cpu.
+ * The FPSIMD context should be acquired with get_cpu_fpsimd_context()
+ * before calling this function.
+ */
+static void fpsimd_flush_cpu_state(void)
+{
+ WARN_ON(!system_supports_fpsimd());
+ __this_cpu_write(fpsimd_last_state.st, NULL);
+
+ /*
+ * Leaving streaming mode enabled will cause issues for any kernel
+ * NEON and leaving streaming mode or ZA enabled may increase power
+ * consumption.
+ */
+ if (system_supports_sme())
+ sme_smstop();
+
+ set_thread_flag(TIF_FOREIGN_FPSTATE);
+}
+
void fpsimd_thread_switch(struct task_struct *next)
{
bool wrong_task, wrong_cpu;
@@ -1565,24 +1552,31 @@ void fpsimd_thread_switch(struct task_struct *next)
if (!system_supports_fpsimd())
return;
- __get_cpu_fpsimd_context();
+ WARN_ON_ONCE(!irqs_disabled());
/* Save unsaved fpsimd state, if any: */
- fpsimd_save();
-
- /*
- * Fix up TIF_FOREIGN_FPSTATE to correctly describe next's
- * state. For kernel threads, FPSIMD registers are never loaded
- * and wrong_task and wrong_cpu will always be true.
- */
- wrong_task = __this_cpu_read(fpsimd_last_state.st) !=
- &next->thread.uw.fpsimd_state;
- wrong_cpu = next->thread.fpsimd_cpu != smp_processor_id();
+ if (test_thread_flag(TIF_KERNEL_FPSTATE))
+ fpsimd_save_kernel_state(current);
+ else
+ fpsimd_save_user_state();
- update_tsk_thread_flag(next, TIF_FOREIGN_FPSTATE,
- wrong_task || wrong_cpu);
+ if (test_tsk_thread_flag(next, TIF_KERNEL_FPSTATE)) {
+ fpsimd_flush_cpu_state();
+ fpsimd_load_kernel_state(next);
+ } else {
+ /*
+ * Fix up TIF_FOREIGN_FPSTATE to correctly describe next's
+ * state. For kernel threads, FPSIMD registers are never
+ * loaded with user mode FPSIMD state and so wrong_task and
+ * wrong_cpu will always be true.
+ */
+ wrong_task = __this_cpu_read(fpsimd_last_state.st) !=
+ &next->thread.uw.fpsimd_state;
+ wrong_cpu = next->thread.fpsimd_cpu != smp_processor_id();
- __put_cpu_fpsimd_context();
+ update_tsk_thread_flag(next, TIF_FOREIGN_FPSTATE,
+ wrong_task || wrong_cpu);
+ }
}
static void fpsimd_flush_thread_vl(enum vec_type type)
@@ -1655,6 +1649,9 @@ void fpsimd_flush_thread(void)
current->thread.svcr = 0;
}
+ if (system_supports_fpmr())
+ current->thread.uw.fpmr = 0;
+
current->thread.fp_type = FP_STATE_FPSIMD;
put_cpu_fpsimd_context();
@@ -1672,44 +1669,7 @@ void fpsimd_preserve_current_state(void)
return;
get_cpu_fpsimd_context();
- fpsimd_save();
- put_cpu_fpsimd_context();
-}
-
-/*
- * Like fpsimd_preserve_current_state(), but ensure that
- * current->thread.uw.fpsimd_state is updated so that it can be copied to
- * the signal frame.
- */
-void fpsimd_signal_preserve_current_state(void)
-{
- fpsimd_preserve_current_state();
- if (test_thread_flag(TIF_SVE))
- sve_to_fpsimd(current);
-}
-
-/*
- * Called by KVM when entering the guest.
- */
-void fpsimd_kvm_prepare(void)
-{
- if (!system_supports_sve())
- return;
-
- /*
- * KVM does not save host SVE state since we can only enter
- * the guest from a syscall so the ABI means that only the
- * non-saved SVE state needs to be saved. If we have left
- * SVE enabled for performance reasons then update the task
- * state to be FPSIMD only.
- */
- get_cpu_fpsimd_context();
-
- if (test_and_clear_thread_flag(TIF_SVE)) {
- sve_to_fpsimd(current);
- current->thread.fp_type = FP_STATE_FPSIMD;
- }
-
+ fpsimd_save_user_state();
put_cpu_fpsimd_context();
}
@@ -1729,6 +1689,7 @@ static void fpsimd_bind_task_to_cpu(void)
last->sve_vl = task_get_sve_vl(current);
last->sme_vl = task_get_sme_vl(current);
last->svcr = &current->thread.svcr;
+ last->fpmr = &current->thread.uw.fpmr;
last->fp_type = &current->thread.fp_type;
last->to_save = FP_STATE_CURRENT;
current->thread.fpsimd_cpu = smp_processor_id();
@@ -1771,13 +1732,23 @@ void fpsimd_bind_state_to_cpu(struct cpu_fp_state *state)
void fpsimd_restore_current_state(void)
{
/*
- * For the tasks that were created before we detected the absence of
- * FP/SIMD, the TIF_FOREIGN_FPSTATE could be set via fpsimd_thread_switch(),
- * e.g, init. This could be then inherited by the children processes.
- * If we later detect that the system doesn't support FP/SIMD,
- * we must clear the flag for all the tasks to indicate that the
- * FPSTATE is clean (as we can't have one) to avoid looping for ever in
- * do_notify_resume().
+ * TIF_FOREIGN_FPSTATE is set on the init task and copied by
+ * arch_dup_task_struct() regardless of whether FP/SIMD is detected.
+ * Thus user threads can have this set even when FP/SIMD hasn't been
+ * detected.
+ *
+ * When FP/SIMD is detected, begin_new_exec() will set
+ * TIF_FOREIGN_FPSTATE via flush_thread() -> fpsimd_flush_thread(),
+ * and fpsimd_thread_switch() will set TIF_FOREIGN_FPSTATE when
+ * switching tasks. We detect FP/SIMD before we exec the first user
+ * process, ensuring this has TIF_FOREIGN_FPSTATE set and
+ * do_notify_resume() will call fpsimd_restore_current_state() to
+ * install the user FP/SIMD context.
+ *
+ * When FP/SIMD is not detected, nothing else will clear or set
+ * TIF_FOREIGN_FPSTATE prior to the first return to userspace, and
+ * we must clear TIF_FOREIGN_FPSTATE to avoid do_notify_resume()
+ * looping forever calling fpsimd_restore_current_state().
*/
if (!system_supports_fpsimd()) {
clear_thread_flag(TIF_FOREIGN_FPSTATE);
@@ -1794,30 +1765,14 @@ void fpsimd_restore_current_state(void)
put_cpu_fpsimd_context();
}
-/*
- * Load an updated userland FPSIMD state for 'current' from memory and set the
- * flag that indicates that the FPSIMD register contents are the most recent
- * FPSIMD state of 'current'. This is used by the signal code to restore the
- * register state when returning from a signal handler in FPSIMD only cases,
- * any SVE context will be discarded.
- */
void fpsimd_update_current_state(struct user_fpsimd_state const *state)
{
if (WARN_ON(!system_supports_fpsimd()))
return;
- get_cpu_fpsimd_context();
-
current->thread.uw.fpsimd_state = *state;
- if (test_thread_flag(TIF_SVE))
+ if (current->thread.fp_type == FP_STATE_SVE)
fpsimd_to_sve(current);
-
- task_fpsimd_load();
- fpsimd_bind_task_to_cpu();
-
- clear_thread_flag(TIF_FOREIGN_FPSTATE);
-
- put_cpu_fpsimd_context();
}
/*
@@ -1834,6 +1789,7 @@ void fpsimd_update_current_state(struct user_fpsimd_state const *state)
void fpsimd_flush_task_state(struct task_struct *t)
{
t->thread.fpsimd_cpu = NR_CPUS;
+ t->thread.kernel_fpsimd_state = NULL;
/*
* If we don't support fpsimd, bail out after we have
* reset the fpsimd_cpu for this task and clear the
@@ -1847,25 +1803,15 @@ void fpsimd_flush_task_state(struct task_struct *t)
barrier();
}
-/*
- * Invalidate any task's FPSIMD state that is present on this cpu.
- * The FPSIMD context should be acquired with get_cpu_fpsimd_context()
- * before calling this function.
- */
-static void fpsimd_flush_cpu_state(void)
+void fpsimd_save_and_flush_current_state(void)
{
- WARN_ON(!system_supports_fpsimd());
- __this_cpu_write(fpsimd_last_state.st, NULL);
-
- /*
- * Leaving streaming mode enabled will cause issues for any kernel
- * NEON and leaving streaming mode or ZA enabled may increase power
- * consumption.
- */
- if (system_supports_sme())
- sme_smstop();
+ if (!system_supports_fpsimd())
+ return;
- set_thread_flag(TIF_FOREIGN_FPSTATE);
+ get_cpu_fpsimd_context();
+ fpsimd_save_user_state();
+ fpsimd_flush_task_state(current);
+ put_cpu_fpsimd_context();
}
/*
@@ -1874,13 +1820,15 @@ static void fpsimd_flush_cpu_state(void)
*/
void fpsimd_save_and_flush_cpu_state(void)
{
+ unsigned long flags;
+
if (!system_supports_fpsimd())
return;
WARN_ON(preemptible());
- __get_cpu_fpsimd_context();
- fpsimd_save();
+ local_irq_save(flags);
+ fpsimd_save_user_state();
fpsimd_flush_cpu_state();
- __put_cpu_fpsimd_context();
+ local_irq_restore(flags);
}
#ifdef CONFIG_KERNEL_MODE_NEON
@@ -1901,21 +1849,63 @@ void fpsimd_save_and_flush_cpu_state(void)
*
* The caller may freely use the FPSIMD registers until kernel_neon_end() is
* called.
+ *
+ * Unless called from non-preemptible task context, @state must point to a
+ * caller provided buffer that will be used to preserve the task's kernel mode
+ * FPSIMD context when it is scheduled out, or if it is interrupted by kernel
+ * mode FPSIMD occurring in softirq context. May be %NULL otherwise.
*/
-void kernel_neon_begin(void)
+void kernel_neon_begin(struct user_fpsimd_state *state)
{
if (WARN_ON(!system_supports_fpsimd()))
return;
+ WARN_ON((preemptible() || in_serving_softirq()) && !state);
+
BUG_ON(!may_use_simd());
get_cpu_fpsimd_context();
/* Save unsaved fpsimd state, if any: */
- fpsimd_save();
+ if (test_thread_flag(TIF_KERNEL_FPSTATE)) {
+ BUG_ON(IS_ENABLED(CONFIG_PREEMPT_RT) || !in_serving_softirq());
+ fpsimd_save_state(state);
+ } else {
+ fpsimd_save_user_state();
+
+ /*
+ * Set the thread flag so that the kernel mode FPSIMD state
+ * will be context switched along with the rest of the task
+ * state.
+ *
+ * On non-PREEMPT_RT, softirqs may interrupt task level kernel
+ * mode FPSIMD, but the task will not be preemptible so setting
+ * TIF_KERNEL_FPSTATE for those would be both wrong (as it
+ * would mark the task context FPSIMD state as requiring a
+ * context switch) and unnecessary.
+ *
+ * On PREEMPT_RT, softirqs are serviced from a separate thread,
+ * which is scheduled as usual, and this guarantees that these
+ * softirqs are not interrupting use of the FPSIMD in kernel
+ * mode in task context. So in this case, setting the flag here
+ * is always appropriate.
+ */
+ if (IS_ENABLED(CONFIG_PREEMPT_RT) || !in_serving_softirq()) {
+ /*
+ * Record the caller provided buffer as the kernel mode
+ * FP/SIMD buffer for this task, so that the state can
+ * be preserved and restored on a context switch.
+ */
+ WARN_ON(current->thread.kernel_fpsimd_state != NULL);
+ current->thread.kernel_fpsimd_state = state;
+ set_thread_flag(TIF_KERNEL_FPSTATE);
+ }
+ }
/* Invalidate any task state remaining in the fpsimd regs: */
fpsimd_flush_cpu_state();
+
+ put_cpu_fpsimd_context();
}
EXPORT_SYMBOL_GPL(kernel_neon_begin);
@@ -1927,22 +1917,39 @@ EXPORT_SYMBOL_GPL(kernel_neon_begin);
*
* The caller must not use the FPSIMD registers after this function is called,
* unless kernel_neon_begin() is called again in the meantime.
+ *
+ * The value of @state must match the value passed to the preceding call to
+ * kernel_neon_begin().
*/
-void kernel_neon_end(void)
+void kernel_neon_end(struct user_fpsimd_state *state)
{
if (!system_supports_fpsimd())
return;
- put_cpu_fpsimd_context();
+ if (!test_thread_flag(TIF_KERNEL_FPSTATE))
+ return;
+
+ /*
+ * If we are returning from a nested use of kernel mode FPSIMD, restore
+ * the task context kernel mode FPSIMD state. This can only happen when
+ * running in softirq context on non-PREEMPT_RT.
+ */
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT) && in_serving_softirq()) {
+ fpsimd_load_state(state);
+ } else {
+ clear_thread_flag(TIF_KERNEL_FPSTATE);
+ WARN_ON(current->thread.kernel_fpsimd_state != state);
+ current->thread.kernel_fpsimd_state = NULL;
+ }
}
EXPORT_SYMBOL_GPL(kernel_neon_end);
#ifdef CONFIG_EFI
-static DEFINE_PER_CPU(struct user_fpsimd_state, efi_fpsimd_state);
-static DEFINE_PER_CPU(bool, efi_fpsimd_state_used);
-static DEFINE_PER_CPU(bool, efi_sve_state_used);
-static DEFINE_PER_CPU(bool, efi_sm_state);
+static struct user_fpsimd_state efi_fpsimd_state;
+static bool efi_fpsimd_state_used;
+static bool efi_sve_state_used;
+static bool efi_sm_state;
/*
* EFI runtime services support functions
@@ -1966,27 +1973,25 @@ void __efi_fpsimd_begin(void)
if (!system_supports_fpsimd())
return;
- WARN_ON(preemptible());
-
if (may_use_simd()) {
- kernel_neon_begin();
+ kernel_neon_begin(&efi_fpsimd_state);
} else {
+ WARN_ON(preemptible());
+
/*
* If !efi_sve_state, SVE can't be in use yet and doesn't need
* preserving:
*/
- if (system_supports_sve() && likely(efi_sve_state)) {
- char *sve_state = this_cpu_ptr(efi_sve_state);
+ if (system_supports_sve() && efi_sve_state != NULL) {
bool ffr = true;
u64 svcr;
- __this_cpu_write(efi_sve_state_used, true);
+ efi_sve_state_used = true;
if (system_supports_sme()) {
svcr = read_sysreg_s(SYS_SVCR);
- __this_cpu_write(efi_sm_state,
- svcr & SVCR_SM_MASK);
+ efi_sm_state = svcr & SVCR_SM_MASK;
/*
* Unless we have FA64 FFR does not
@@ -1996,19 +2001,18 @@ void __efi_fpsimd_begin(void)
ffr = !(svcr & SVCR_SM_MASK);
}
- sve_save_state(sve_state + sve_ffr_offset(sve_max_vl()),
- &this_cpu_ptr(&efi_fpsimd_state)->fpsr,
- ffr);
+ sve_save_state(efi_sve_state + sve_ffr_offset(sve_max_vl()),
+ &efi_fpsimd_state.fpsr, ffr);
if (system_supports_sme())
sysreg_clear_set_s(SYS_SVCR,
SVCR_SM_MASK, 0);
} else {
- fpsimd_save_state(this_cpu_ptr(&efi_fpsimd_state));
+ fpsimd_save_state(&efi_fpsimd_state);
}
- __this_cpu_write(efi_fpsimd_state_used, true);
+ efi_fpsimd_state_used = true;
}
}
@@ -2020,12 +2024,10 @@ void __efi_fpsimd_end(void)
if (!system_supports_fpsimd())
return;
- if (!__this_cpu_xchg(efi_fpsimd_state_used, false)) {
- kernel_neon_end();
+ if (!efi_fpsimd_state_used) {
+ kernel_neon_end(&efi_fpsimd_state);
} else {
- if (system_supports_sve() &&
- likely(__this_cpu_read(efi_sve_state_used))) {
- char const *sve_state = this_cpu_ptr(efi_sve_state);
+ if (system_supports_sve() && efi_sve_state_used) {
bool ffr = true;
/*
@@ -2034,7 +2036,7 @@ void __efi_fpsimd_end(void)
* streaming mode.
*/
if (system_supports_sme()) {
- if (__this_cpu_read(efi_sm_state)) {
+ if (efi_sm_state) {
sysreg_clear_set_s(SYS_SVCR,
0,
SVCR_SM_MASK);
@@ -2048,14 +2050,15 @@ void __efi_fpsimd_end(void)
}
}
- sve_load_state(sve_state + sve_ffr_offset(sve_max_vl()),
- &this_cpu_ptr(&efi_fpsimd_state)->fpsr,
- ffr);
+ sve_load_state(efi_sve_state + sve_ffr_offset(sve_max_vl()),
+ &efi_fpsimd_state.fpsr, ffr);
- __this_cpu_write(efi_sve_state_used, false);
+ efi_sve_state_used = false;
} else {
- fpsimd_load_state(this_cpu_ptr(&efi_fpsimd_state));
+ fpsimd_load_state(&efi_fpsimd_state);
}
+
+ efi_fpsimd_state_used = false;
}
}
@@ -2110,6 +2113,13 @@ static inline void fpsimd_hotplug_init(void)
static inline void fpsimd_hotplug_init(void) { }
#endif
+void cpu_enable_fpsimd(const struct arm64_cpu_capabilities *__always_unused p)
+{
+ unsigned long enable = CPACR_EL1_FPEN_EL1EN | CPACR_EL1_FPEN_EL0EN;
+ write_sysreg(read_sysreg(CPACR_EL1) | enable, CPACR_EL1);
+ isb();
+}
+
/*
* FP/SIMD support code initialisation.
*/
diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c
index a650f5e11fc5..5a1554a44162 100644
--- a/arch/arm64/kernel/ftrace.c
+++ b/arch/arm64/kernel/ftrace.c
@@ -15,7 +15,7 @@
#include <asm/debug-monitors.h>
#include <asm/ftrace.h>
#include <asm/insn.h>
-#include <asm/patching.h>
+#include <asm/text-patching.h>
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_ARGS
struct fregs_offset {
@@ -23,10 +23,10 @@ struct fregs_offset {
int offset;
};
-#define FREGS_OFFSET(n, field) \
-{ \
- .name = n, \
- .offset = offsetof(struct ftrace_regs, field), \
+#define FREGS_OFFSET(n, field) \
+{ \
+ .name = n, \
+ .offset = offsetof(struct __arch_ftrace_regs, field), \
}
static const struct fregs_offset fregs_offsets[] = {
@@ -143,6 +143,69 @@ unsigned long ftrace_call_adjust(unsigned long addr)
return addr;
}
+/* Convert fentry_ip to the symbol address without kallsyms */
+unsigned long arch_ftrace_get_symaddr(unsigned long fentry_ip)
+{
+ u32 insn;
+
+ /*
+ * When using patchable-function-entry without pre-function NOPS, ftrace
+ * entry is the address of the first NOP after the function entry point.
+ *
+ * The compiler has either generated:
+ *
+ * func+00: func: NOP // To be patched to MOV X9, LR
+ * func+04: NOP // To be patched to BL <caller>
+ *
+ * Or:
+ *
+ * func-04: BTI C
+ * func+00: func: NOP // To be patched to MOV X9, LR
+ * func+04: NOP // To be patched to BL <caller>
+ *
+ * The fentry_ip is the address of `BL <caller>` which is at `func + 4`
+ * bytes in either case.
+ */
+ if (!IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS))
+ return fentry_ip - AARCH64_INSN_SIZE;
+
+ /*
+ * When using patchable-function-entry with pre-function NOPs, BTI is
+ * a bit different.
+ *
+ * func+00: func: NOP // To be patched to MOV X9, LR
+ * func+04: NOP // To be patched to BL <caller>
+ *
+ * Or:
+ *
+ * func+00: func: BTI C
+ * func+04: NOP // To be patched to MOV X9, LR
+ * func+08: NOP // To be patched to BL <caller>
+ *
+ * The fentry_ip is the address of `BL <caller>` which is at either
+ * `func + 4` or `func + 8` depends on whether there is a BTI.
+ */
+
+ /* If there is no BTI, the func address should be one instruction before. */
+ if (!IS_ENABLED(CONFIG_ARM64_BTI_KERNEL))
+ return fentry_ip - AARCH64_INSN_SIZE;
+
+ /* We want to be extra safe in case entry ip is on the page edge,
+ * but otherwise we need to avoid get_kernel_nofault()'s overhead.
+ */
+ if ((fentry_ip & ~PAGE_MASK) < AARCH64_INSN_SIZE * 2) {
+ if (get_kernel_nofault(insn, (u32 *)(fentry_ip - AARCH64_INSN_SIZE * 2)))
+ return 0;
+ } else {
+ insn = *(u32 *)(fentry_ip - AARCH64_INSN_SIZE * 2);
+ }
+
+ if (aarch64_insn_is_bti(le32_to_cpu((__le32)insn)))
+ return fentry_ip - AARCH64_INSN_SIZE * 2;
+
+ return fentry_ip - AARCH64_INSN_SIZE;
+}
+
/*
* Replace a single instruction, which may be a branch or NOP.
* If @validate == true, a replaced instruction is checked against 'old'.
@@ -195,10 +258,17 @@ int ftrace_update_ftrace_func(ftrace_func_t func)
return ftrace_modify_code(pc, 0, new, false);
}
-static struct plt_entry *get_ftrace_plt(struct module *mod)
+static struct plt_entry *get_ftrace_plt(struct module *mod, unsigned long addr)
{
#ifdef CONFIG_MODULES
- struct plt_entry *plt = mod->arch.ftrace_trampolines;
+ struct plt_entry *plt = NULL;
+
+ if (within_module_mem_type(addr, mod, MOD_INIT_TEXT))
+ plt = mod->arch.init_ftrace_trampolines;
+ else if (within_module_mem_type(addr, mod, MOD_TEXT))
+ plt = mod->arch.ftrace_trampolines;
+ else
+ return NULL;
return &plt[FTRACE_PLT_IDX];
#else
@@ -257,20 +327,19 @@ static bool ftrace_find_callable_addr(struct dyn_ftrace *rec,
* dealing with an out-of-range condition, we can assume it
* is due to a module being loaded far away from the kernel.
*
- * NOTE: __module_text_address() must be called with preemption
- * disabled, but we can rely on ftrace_lock to ensure that 'mod'
+ * NOTE: __module_text_address() must be called within a RCU read
+ * section, but we can rely on ftrace_lock to ensure that 'mod'
* retains its validity throughout the remainder of this code.
*/
if (!mod) {
- preempt_disable();
+ guard(rcu)();
mod = __module_text_address(pc);
- preempt_enable();
}
if (WARN_ON(!mod))
return false;
- plt = get_ftrace_plt(mod);
+ plt = get_ftrace_plt(mod, pc);
if (!plt) {
pr_err("ftrace: no module PLT for %ps\n", (void *)*addr);
return false;
@@ -423,7 +492,7 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec,
return ret;
/*
- * When using mcount, callsites in modules may have been initalized to
+ * When using mcount, callsites in modules may have been initialized to
* call an arbitrary module PLT (which redirects to the _mcount stub)
* rather than the ftrace PLT we'll use at runtime (which redirects to
* the ftrace trampoline). We can ignore the old PLT when initializing
@@ -481,7 +550,20 @@ void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent,
void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
struct ftrace_ops *op, struct ftrace_regs *fregs)
{
- prepare_ftrace_return(ip, &fregs->lr, fregs->fp);
+ unsigned long return_hooker = (unsigned long)&return_to_handler;
+ unsigned long frame_pointer = arch_ftrace_regs(fregs)->fp;
+ unsigned long *parent = &arch_ftrace_regs(fregs)->lr;
+ unsigned long old;
+
+ if (unlikely(atomic_read(&current->tracing_graph_pause)))
+ return;
+
+ old = *parent;
+
+ if (!function_graph_enter_regs(old, ip, frame_pointer,
+ (void *)frame_pointer, fregs)) {
+ *parent = return_hooker;
+ }
}
#else
/*
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 7b236994f0e1..ca04b338cb0d 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -32,6 +32,7 @@
#include <asm/scs.h>
#include <asm/smp.h>
#include <asm/sysreg.h>
+#include <asm/stacktrace/frame.h>
#include <asm/thread_info.h>
#include <asm/virt.h>
@@ -80,28 +81,42 @@
* x19 primary_entry() .. start_kernel() whether we entered with the MMU on
* x20 primary_entry() .. __primary_switch() CPU boot mode
* x21 primary_entry() .. start_kernel() FDT pointer passed at boot in x0
- * x22 create_idmap() .. start_kernel() ID map VA of the DT blob
- * x23 primary_entry() .. start_kernel() physical misalignment/KASLR offset
- * x24 __primary_switch() linear map KASLR seed
- * x25 primary_entry() .. start_kernel() supported VA size
- * x28 create_idmap() callee preserved temp register
*/
SYM_CODE_START(primary_entry)
bl record_mmu_state
bl preserve_boot_args
- bl create_idmap
+
+ adrp x1, early_init_stack
+ mov sp, x1
+ mov x29, xzr
+ adrp x0, __pi_init_idmap_pg_dir
+ mov x1, xzr
+ bl __pi_create_init_idmap
+
+ /*
+ * If the page tables have been populated with non-cacheable
+ * accesses (MMU disabled), invalidate those tables again to
+ * remove any speculatively loaded cache lines.
+ */
+ cbnz x19, 0f
+ dmb sy
+ mov x1, x0 // end of used region
+ adrp x0, __pi_init_idmap_pg_dir
+ adr_l x2, dcache_inval_poc
+ blr x2
+ b 1f
/*
* If we entered with the MMU and caches on, clean the ID mapped part
* of the primary boot code to the PoC so we can safely execute it with
* the MMU off.
*/
- cbz x19, 0f
- adrp x0, __idmap_text_start
+0: adrp x0, __idmap_text_start
adr_l x1, __idmap_text_end
adr_l x2, dcache_clean_poc
blr x2
-0: mov x0, x19
+
+1: mov x0, x19
bl init_kernel_el // w0=cpu_boot_mode
mov x20, x0
@@ -111,14 +126,6 @@ SYM_CODE_START(primary_entry)
* On return, the CPU will be ready for the MMU to be turned on and
* the TCR will have been set.
*/
-#if VA_BITS > 48
- mrs_s x0, SYS_ID_AA64MMFR2_EL1
- tst x0, ID_AA64MMFR2_EL1_VARange_MASK
- mov x0, #VA_BITS
- mov x25, #VA_BITS_MIN
- csel x25, x25, x0, eq
- mov x0, x25
-#endif
bl __cpu_setup // initialise processor
b __primary_switch
SYM_CODE_END(primary_entry)
@@ -177,267 +184,6 @@ SYM_CODE_START_LOCAL(preserve_boot_args)
ret
SYM_CODE_END(preserve_boot_args)
-SYM_FUNC_START_LOCAL(clear_page_tables)
- /*
- * Clear the init page tables.
- */
- adrp x0, init_pg_dir
- adrp x1, init_pg_end
- sub x2, x1, x0
- mov x1, xzr
- b __pi_memset // tail call
-SYM_FUNC_END(clear_page_tables)
-
-/*
- * Macro to populate page table entries, these entries can be pointers to the next level
- * or last level entries pointing to physical memory.
- *
- * tbl: page table address
- * rtbl: pointer to page table or physical memory
- * index: start index to write
- * eindex: end index to write - [index, eindex] written to
- * flags: flags for pagetable entry to or in
- * inc: increment to rtbl between each entry
- * tmp1: temporary variable
- *
- * Preserves: tbl, eindex, flags, inc
- * Corrupts: index, tmp1
- * Returns: rtbl
- */
- .macro populate_entries, tbl, rtbl, index, eindex, flags, inc, tmp1
-.Lpe\@: phys_to_pte \tmp1, \rtbl
- orr \tmp1, \tmp1, \flags // tmp1 = table entry
- str \tmp1, [\tbl, \index, lsl #3]
- add \rtbl, \rtbl, \inc // rtbl = pa next level
- add \index, \index, #1
- cmp \index, \eindex
- b.ls .Lpe\@
- .endm
-
-/*
- * Compute indices of table entries from virtual address range. If multiple entries
- * were needed in the previous page table level then the next page table level is assumed
- * to be composed of multiple pages. (This effectively scales the end index).
- *
- * vstart: virtual address of start of range
- * vend: virtual address of end of range - we map [vstart, vend]
- * shift: shift used to transform virtual address into index
- * order: #imm 2log(number of entries in page table)
- * istart: index in table corresponding to vstart
- * iend: index in table corresponding to vend
- * count: On entry: how many extra entries were required in previous level, scales
- * our end index.
- * On exit: returns how many extra entries required for next page table level
- *
- * Preserves: vstart, vend
- * Returns: istart, iend, count
- */
- .macro compute_indices, vstart, vend, shift, order, istart, iend, count
- ubfx \istart, \vstart, \shift, \order
- ubfx \iend, \vend, \shift, \order
- add \iend, \iend, \count, lsl \order
- sub \count, \iend, \istart
- .endm
-
-/*
- * Map memory for specified virtual address range. Each level of page table needed supports
- * multiple entries. If a level requires n entries the next page table level is assumed to be
- * formed from n pages.
- *
- * tbl: location of page table
- * rtbl: address to be used for first level page table entry (typically tbl + PAGE_SIZE)
- * vstart: virtual address of start of range
- * vend: virtual address of end of range - we map [vstart, vend - 1]
- * flags: flags to use to map last level entries
- * phys: physical address corresponding to vstart - physical memory is contiguous
- * order: #imm 2log(number of entries in PGD table)
- *
- * If extra_shift is set, an extra level will be populated if the end address does
- * not fit in 'extra_shift' bits. This assumes vend is in the TTBR0 range.
- *
- * Temporaries: istart, iend, tmp, count, sv - these need to be different registers
- * Preserves: vstart, flags
- * Corrupts: tbl, rtbl, vend, istart, iend, tmp, count, sv
- */
- .macro map_memory, tbl, rtbl, vstart, vend, flags, phys, order, istart, iend, tmp, count, sv, extra_shift
- sub \vend, \vend, #1
- add \rtbl, \tbl, #PAGE_SIZE
- mov \count, #0
-
- .ifnb \extra_shift
- tst \vend, #~((1 << (\extra_shift)) - 1)
- b.eq .L_\@
- compute_indices \vstart, \vend, #\extra_shift, #(PAGE_SHIFT - 3), \istart, \iend, \count
- mov \sv, \rtbl
- populate_entries \tbl, \rtbl, \istart, \iend, #PMD_TYPE_TABLE, #PAGE_SIZE, \tmp
- mov \tbl, \sv
- .endif
-.L_\@:
- compute_indices \vstart, \vend, #PGDIR_SHIFT, #\order, \istart, \iend, \count
- mov \sv, \rtbl
- populate_entries \tbl, \rtbl, \istart, \iend, #PMD_TYPE_TABLE, #PAGE_SIZE, \tmp
- mov \tbl, \sv
-
-#if SWAPPER_PGTABLE_LEVELS > 3
- compute_indices \vstart, \vend, #PUD_SHIFT, #(PAGE_SHIFT - 3), \istart, \iend, \count
- mov \sv, \rtbl
- populate_entries \tbl, \rtbl, \istart, \iend, #PMD_TYPE_TABLE, #PAGE_SIZE, \tmp
- mov \tbl, \sv
-#endif
-
-#if SWAPPER_PGTABLE_LEVELS > 2
- compute_indices \vstart, \vend, #SWAPPER_TABLE_SHIFT, #(PAGE_SHIFT - 3), \istart, \iend, \count
- mov \sv, \rtbl
- populate_entries \tbl, \rtbl, \istart, \iend, #PMD_TYPE_TABLE, #PAGE_SIZE, \tmp
- mov \tbl, \sv
-#endif
-
- compute_indices \vstart, \vend, #SWAPPER_BLOCK_SHIFT, #(PAGE_SHIFT - 3), \istart, \iend, \count
- bic \rtbl, \phys, #SWAPPER_BLOCK_SIZE - 1
- populate_entries \tbl, \rtbl, \istart, \iend, \flags, #SWAPPER_BLOCK_SIZE, \tmp
- .endm
-
-/*
- * Remap a subregion created with the map_memory macro with modified attributes
- * or output address. The entire remapped region must have been covered in the
- * invocation of map_memory.
- *
- * x0: last level table address (returned in first argument to map_memory)
- * x1: start VA of the existing mapping
- * x2: start VA of the region to update
- * x3: end VA of the region to update (exclusive)
- * x4: start PA associated with the region to update
- * x5: attributes to set on the updated region
- * x6: order of the last level mappings
- */
-SYM_FUNC_START_LOCAL(remap_region)
- sub x3, x3, #1 // make end inclusive
-
- // Get the index offset for the start of the last level table
- lsr x1, x1, x6
- bfi x1, xzr, #0, #PAGE_SHIFT - 3
-
- // Derive the start and end indexes into the last level table
- // associated with the provided region
- lsr x2, x2, x6
- lsr x3, x3, x6
- sub x2, x2, x1
- sub x3, x3, x1
-
- mov x1, #1
- lsl x6, x1, x6 // block size at this level
-
- populate_entries x0, x4, x2, x3, x5, x6, x7
- ret
-SYM_FUNC_END(remap_region)
-
-SYM_FUNC_START_LOCAL(create_idmap)
- mov x28, lr
- /*
- * The ID map carries a 1:1 mapping of the physical address range
- * covered by the loaded image, which could be anywhere in DRAM. This
- * means that the required size of the VA (== PA) space is decided at
- * boot time, and could be more than the configured size of the VA
- * space for ordinary kernel and user space mappings.
- *
- * There are three cases to consider here:
- * - 39 <= VA_BITS < 48, and the ID map needs up to 48 VA bits to cover
- * the placement of the image. In this case, we configure one extra
- * level of translation on the fly for the ID map only. (This case
- * also covers 42-bit VA/52-bit PA on 64k pages).
- *
- * - VA_BITS == 48, and the ID map needs more than 48 VA bits. This can
- * only happen when using 64k pages, in which case we need to extend
- * the root level table rather than add a level. Note that we can
- * treat this case as 'always extended' as long as we take care not
- * to program an unsupported T0SZ value into the TCR register.
- *
- * - Combinations that would require two additional levels of
- * translation are not supported, e.g., VA_BITS==36 on 16k pages, or
- * VA_BITS==39/4k pages with 5-level paging, where the input address
- * requires more than 47 or 48 bits, respectively.
- */
-#if (VA_BITS < 48)
-#define IDMAP_PGD_ORDER (VA_BITS - PGDIR_SHIFT)
-#define EXTRA_SHIFT (PGDIR_SHIFT + PAGE_SHIFT - 3)
-
- /*
- * If VA_BITS < 48, we have to configure an additional table level.
- * First, we have to verify our assumption that the current value of
- * VA_BITS was chosen such that all translation levels are fully
- * utilised, and that lowering T0SZ will always result in an additional
- * translation level to be configured.
- */
-#if VA_BITS != EXTRA_SHIFT
-#error "Mismatch between VA_BITS and page size/number of translation levels"
-#endif
-#else
-#define IDMAP_PGD_ORDER (PHYS_MASK_SHIFT - PGDIR_SHIFT)
-#define EXTRA_SHIFT
- /*
- * If VA_BITS == 48, we don't have to configure an additional
- * translation level, but the top-level table has more entries.
- */
-#endif
- adrp x0, init_idmap_pg_dir
- adrp x3, _text
- adrp x6, _end + MAX_FDT_SIZE + SWAPPER_BLOCK_SIZE
- mov_q x7, SWAPPER_RX_MMUFLAGS
-
- map_memory x0, x1, x3, x6, x7, x3, IDMAP_PGD_ORDER, x10, x11, x12, x13, x14, EXTRA_SHIFT
-
- /* Remap the kernel page tables r/w in the ID map */
- adrp x1, _text
- adrp x2, init_pg_dir
- adrp x3, init_pg_end
- bic x4, x2, #SWAPPER_BLOCK_SIZE - 1
- mov_q x5, SWAPPER_RW_MMUFLAGS
- mov x6, #SWAPPER_BLOCK_SHIFT
- bl remap_region
-
- /* Remap the FDT after the kernel image */
- adrp x1, _text
- adrp x22, _end + SWAPPER_BLOCK_SIZE
- bic x2, x22, #SWAPPER_BLOCK_SIZE - 1
- bfi x22, x21, #0, #SWAPPER_BLOCK_SHIFT // remapped FDT address
- add x3, x2, #MAX_FDT_SIZE + SWAPPER_BLOCK_SIZE
- bic x4, x21, #SWAPPER_BLOCK_SIZE - 1
- mov_q x5, SWAPPER_RW_MMUFLAGS
- mov x6, #SWAPPER_BLOCK_SHIFT
- bl remap_region
-
- /*
- * Since the page tables have been populated with non-cacheable
- * accesses (MMU disabled), invalidate those tables again to
- * remove any speculatively loaded cache lines.
- */
- cbnz x19, 0f // skip cache invalidation if MMU is on
- dmb sy
-
- adrp x0, init_idmap_pg_dir
- adrp x1, init_idmap_pg_end
- bl dcache_inval_poc
-0: ret x28
-SYM_FUNC_END(create_idmap)
-
-SYM_FUNC_START_LOCAL(create_kernel_mapping)
- adrp x0, init_pg_dir
- mov_q x5, KIMAGE_VADDR // compile time __va(_text)
-#ifdef CONFIG_RELOCATABLE
- add x5, x5, x23 // add KASLR displacement
-#endif
- adrp x6, _end // runtime __pa(_end)
- adrp x3, _text // runtime __pa(_text)
- sub x6, x6, x3 // _end - _text
- add x6, x6, x5 // runtime __va(_end)
- mov_q x7, SWAPPER_RW_MMUFLAGS
-
- map_memory x0, x1, x5, x6, x7, x3, (VA_BITS - PGDIR_SHIFT), x10, x11, x12, x13, x14
-
- dsb ishst // sync with page table walker
- ret
-SYM_FUNC_END(create_kernel_mapping)
-
/*
* Initialize CPU registers with task-specific and cpu-specific context.
*
@@ -454,6 +200,8 @@ SYM_FUNC_END(create_kernel_mapping)
sub sp, sp, #PT_REGS_SIZE
stp xzr, xzr, [sp, #S_STACKFRAME]
+ mov \tmp1, #FRAME_META_TYPE_FINAL
+ str \tmp1, [sp, #S_STACKFRAME_TYPE]
add x29, sp, #S_STACKFRAME
scs_load_current
@@ -482,41 +230,16 @@ SYM_FUNC_START_LOCAL(__primary_switched)
str_l x21, __fdt_pointer, x5 // Save FDT pointer
- ldr_l x4, kimage_vaddr // Save the offset between
+ adrp x4, _text // Save the offset between
sub x4, x4, x0 // the kernel virtual and
str_l x4, kimage_voffset, x5 // physical mappings
mov x0, x20
bl set_cpu_boot_mode_flag
- // Clear BSS
- adr_l x0, __bss_start
- mov x1, xzr
- adr_l x2, __bss_stop
- sub x2, x2, x0
- bl __pi_memset
- dsb ishst // Make zero page visible to PTW
-
-#if VA_BITS > 48
- adr_l x8, vabits_actual // Set this early so KASAN early init
- str x25, [x8] // ... observes the correct value
- dc civac, x8 // Make visible to booting secondaries
-#endif
-
-#ifdef CONFIG_RANDOMIZE_BASE
- adrp x5, memstart_offset_seed // Save KASLR linear map seed
- strh w24, [x5, :lo12:memstart_offset_seed]
-#endif
#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
bl kasan_early_init
#endif
- mov x0, x21 // pass FDT address in x0
- bl early_fdt_map // Try mapping the FDT early
- mov x0, x20 // pass the full boot status
- bl init_feature_override // Parse cpu feature overrides
-#ifdef CONFIG_UNWIND_PATCH_PAC_INTO_SCS
- bl scs_patch_vmlinux
-#endif
mov x0, x20
bl finalise_el2 // Prefer VHE if possible
ldp x29, x30, [sp], #16
@@ -569,11 +292,14 @@ SYM_INNER_LABEL(init_el2, SYM_L_LOCAL)
adr_l x1, __hyp_text_end
adr_l x2, dcache_clean_poc
blr x2
-0:
- mov_q x0, HCR_HOST_NVHE_FLAGS
- msr hcr_el2, x0
+
+ mov_q x0, INIT_SCTLR_EL2_MMU_OFF
+ pre_disable_mmu_workaround
+ msr sctlr_el2, x0
isb
+0:
+ init_el2_hcr HCR_HOST_NVHE_FLAGS
init_el2_state
/* Hypervisor stub */
@@ -583,27 +309,21 @@ SYM_INNER_LABEL(init_el2, SYM_L_LOCAL)
mov_q x1, INIT_SCTLR_EL1_MMU_OFF
- /*
- * Fruity CPUs seem to have HCR_EL2.E2H set to RES1,
- * making it impossible to start in nVHE mode. Is that
- * compliant with the architecture? Absolutely not!
- */
mrs x0, hcr_el2
and x0, x0, #HCR_E2H
- cbz x0, 1f
+ cbz x0, 2f
/* Set a sane SCTLR_EL1, the VHE way */
- pre_disable_mmu_workaround
msr_s SYS_SCTLR_EL12, x1
mov x2, #BOOT_CPU_FLAG_E2H
- b 2f
+ b 3f
-1:
- pre_disable_mmu_workaround
+2:
msr sctlr_el1, x1
mov x2, xzr
-2:
- __init_el2_nvhe_prepare_eret
+3:
+ mov x0, #INIT_PSTATE_EL1
+ msr spsr_el2, x0
mov w0, #BOOT_CPU_MODE_EL2
orr x0, x0, x2
@@ -643,10 +363,13 @@ SYM_FUNC_START_LOCAL(secondary_startup)
* Common entry point for secondary CPUs.
*/
mov x20, x0 // preserve boot mode
+
+#ifdef CONFIG_ARM64_VA_BITS_52
+alternative_if ARM64_HAS_VA52
bl __cpu_secondary_check52bitva
-#if VA_BITS > 48
- ldr_l x0, vabits_actual
+alternative_else_nop_endif
#endif
+
bl __cpu_setup // initialise processor
adrp x1, swapper_pg_dir
adrp x2, idmap_pg_dir
@@ -749,15 +472,18 @@ SYM_FUNC_START(__enable_mmu)
ret
SYM_FUNC_END(__enable_mmu)
+#ifdef CONFIG_ARM64_VA_BITS_52
SYM_FUNC_START(__cpu_secondary_check52bitva)
-#if VA_BITS > 48
- ldr_l x0, vabits_actual
- cmp x0, #52
- b.ne 2f
-
+#ifndef CONFIG_ARM64_LPA2
mrs_s x0, SYS_ID_AA64MMFR2_EL1
and x0, x0, ID_AA64MMFR2_EL1_VARange_MASK
cbnz x0, 2f
+#else
+ mrs x0, id_aa64mmfr0_el1
+ sbfx x0, x0, #ID_AA64MMFR0_EL1_TGRAN_SHIFT, 4
+ cmp x0, #ID_AA64MMFR0_EL1_TGRAN_LPA2
+ b.ge 2f
+#endif
update_early_cpu_boot_status \
CPU_STUCK_IN_KERNEL | CPU_STUCK_REASON_52_BIT_VA, x0, x1
@@ -765,9 +491,9 @@ SYM_FUNC_START(__cpu_secondary_check52bitva)
wfi
b 1b
-#endif
2: ret
SYM_FUNC_END(__cpu_secondary_check52bitva)
+#endif
SYM_FUNC_START_LOCAL(__no_granule_support)
/* Indicate that this CPU can't boot and is stuck in the kernel */
@@ -779,123 +505,18 @@ SYM_FUNC_START_LOCAL(__no_granule_support)
b 1b
SYM_FUNC_END(__no_granule_support)
-#ifdef CONFIG_RELOCATABLE
-SYM_FUNC_START_LOCAL(__relocate_kernel)
- /*
- * Iterate over each entry in the relocation table, and apply the
- * relocations in place.
- */
- adr_l x9, __rela_start
- adr_l x10, __rela_end
- mov_q x11, KIMAGE_VADDR // default virtual offset
- add x11, x11, x23 // actual virtual offset
-
-0: cmp x9, x10
- b.hs 1f
- ldp x12, x13, [x9], #24
- ldr x14, [x9, #-8]
- cmp w13, #R_AARCH64_RELATIVE
- b.ne 0b
- add x14, x14, x23 // relocate
- str x14, [x12, x23]
- b 0b
-
-1:
-#ifdef CONFIG_RELR
- /*
- * Apply RELR relocations.
- *
- * RELR is a compressed format for storing relative relocations. The
- * encoded sequence of entries looks like:
- * [ AAAAAAAA BBBBBBB1 BBBBBBB1 ... AAAAAAAA BBBBBB1 ... ]
- *
- * i.e. start with an address, followed by any number of bitmaps. The
- * address entry encodes 1 relocation. The subsequent bitmap entries
- * encode up to 63 relocations each, at subsequent offsets following
- * the last address entry.
- *
- * The bitmap entries must have 1 in the least significant bit. The
- * assumption here is that an address cannot have 1 in lsb. Odd
- * addresses are not supported. Any odd addresses are stored in the RELA
- * section, which is handled above.
- *
- * Excluding the least significant bit in the bitmap, each non-zero
- * bit in the bitmap represents a relocation to be applied to
- * a corresponding machine word that follows the base address
- * word. The second least significant bit represents the machine
- * word immediately following the initial address, and each bit
- * that follows represents the next word, in linear order. As such,
- * a single bitmap can encode up to 63 relocations in a 64-bit object.
- *
- * In this implementation we store the address of the next RELR table
- * entry in x9, the address being relocated by the current address or
- * bitmap entry in x13 and the address being relocated by the current
- * bit in x14.
- */
- adr_l x9, __relr_start
- adr_l x10, __relr_end
-
-2: cmp x9, x10
- b.hs 7f
- ldr x11, [x9], #8
- tbnz x11, #0, 3f // branch to handle bitmaps
- add x13, x11, x23
- ldr x12, [x13] // relocate address entry
- add x12, x12, x23
- str x12, [x13], #8 // adjust to start of bitmap
- b 2b
-
-3: mov x14, x13
-4: lsr x11, x11, #1
- cbz x11, 6f
- tbz x11, #0, 5f // skip bit if not set
- ldr x12, [x14] // relocate bit
- add x12, x12, x23
- str x12, [x14]
-
-5: add x14, x14, #8 // move to next bit's address
- b 4b
-
-6: /*
- * Move to the next bitmap's address. 8 is the word size, and 63 is the
- * number of significant bits in a bitmap entry.
- */
- add x13, x13, #(8 * 63)
- b 2b
-
-7:
-#endif
- ret
-
-SYM_FUNC_END(__relocate_kernel)
-#endif
-
SYM_FUNC_START_LOCAL(__primary_switch)
adrp x1, reserved_pg_dir
- adrp x2, init_idmap_pg_dir
+ adrp x2, __pi_init_idmap_pg_dir
bl __enable_mmu
-#ifdef CONFIG_RELOCATABLE
- adrp x23, KERNEL_START
- and x23, x23, MIN_KIMG_ALIGN - 1
-#ifdef CONFIG_RANDOMIZE_BASE
- mov x0, x22
- adrp x1, init_pg_end
+
+ adrp x1, early_init_stack
mov sp, x1
mov x29, xzr
- bl __pi_kaslr_early_init
- and x24, x0, #SZ_2M - 1 // capture memstart offset seed
- bic x0, x0, #SZ_2M - 1
- orr x23, x23, x0 // record kernel offset
-#endif
-#endif
- bl clear_page_tables
- bl create_kernel_mapping
+ mov x0, x20 // pass the full boot status
+ mov x1, x21 // pass the FDT
+ bl __pi_early_map_kernel // Map and relocate the kernel
- adrp x1, init_pg_dir
- load_ttbr1 x1, x1, x2
-#ifdef CONFIG_RELOCATABLE
- bl __relocate_kernel
-#endif
ldr x8, =__primary_switched
adrp x0, KERNEL_START // __pa(KERNEL_START)
br x8
diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c
index 02870beb271e..18749e9a6c2d 100644
--- a/arch/arm64/kernel/hibernate.c
+++ b/arch/arm64/kernel/hibernate.c
@@ -266,9 +266,15 @@ static int swsusp_mte_save_tags(void)
max_zone_pfn = zone_end_pfn(zone);
for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) {
struct page *page = pfn_to_online_page(pfn);
+ struct folio *folio;
if (!page)
continue;
+ folio = page_folio(page);
+
+ if (folio_test_hugetlb(folio) &&
+ !folio_test_hugetlb_mte_tagged(folio))
+ continue;
if (!page_mte_tagged(page))
continue;
@@ -407,7 +413,7 @@ int swsusp_arch_resume(void)
void *, phys_addr_t, phys_addr_t);
struct trans_pgd_info trans_info = {
.trans_alloc_page = hibernate_page_alloc,
- .trans_alloc_arg = (void *)GFP_ATOMIC,
+ .trans_alloc_arg = (__force void *)GFP_ATOMIC,
};
/*
diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c
index 35225632d70a..ab76b36dce82 100644
--- a/arch/arm64/kernel/hw_breakpoint.c
+++ b/arch/arm64/kernel/hw_breakpoint.c
@@ -21,6 +21,8 @@
#include <asm/current.h>
#include <asm/debug-monitors.h>
+#include <asm/esr.h>
+#include <asm/exception.h>
#include <asm/hw_breakpoint.h>
#include <asm/traps.h>
#include <asm/cputype.h>
@@ -617,8 +619,7 @@ NOKPROBE_SYMBOL(toggle_bp_registers);
/*
* Debug exception handlers.
*/
-static int breakpoint_handler(unsigned long unused, unsigned long esr,
- struct pt_regs *regs)
+void do_breakpoint(unsigned long esr, struct pt_regs *regs)
{
int i, step = 0, *kernel_step;
u32 ctrl_reg;
@@ -654,14 +655,14 @@ static int breakpoint_handler(unsigned long unused, unsigned long esr,
perf_bp_event(bp, regs);
/* Do we need to handle the stepping? */
- if (uses_default_overflow_handler(bp))
+ if (is_default_overflow_handler(bp))
step = 1;
unlock:
rcu_read_unlock();
}
if (!step)
- return 0;
+ return;
if (user_mode(regs)) {
debug_info->bps_disabled = 1;
@@ -669,7 +670,7 @@ unlock:
/* If we're already stepping a watchpoint, just return. */
if (debug_info->wps_disabled)
- return 0;
+ return;
if (test_thread_flag(TIF_SINGLESTEP))
debug_info->suspended_step = 1;
@@ -680,7 +681,7 @@ unlock:
kernel_step = this_cpu_ptr(&stepping_kernel_bp);
if (*kernel_step != ARM_KERNEL_STEP_NONE)
- return 0;
+ return;
if (kernel_active_single_step()) {
*kernel_step = ARM_KERNEL_STEP_SUSPEND;
@@ -689,10 +690,8 @@ unlock:
kernel_enable_single_step(regs);
}
}
-
- return 0;
}
-NOKPROBE_SYMBOL(breakpoint_handler);
+NOKPROBE_SYMBOL(do_breakpoint);
/*
* Arm64 hardware does not always report a watchpoint hit address that matches
@@ -733,7 +732,7 @@ static u64 get_distance_from_watchpoint(unsigned long addr, u64 val,
static int watchpoint_report(struct perf_event *wp, unsigned long addr,
struct pt_regs *regs)
{
- int step = uses_default_overflow_handler(wp);
+ int step = is_default_overflow_handler(wp);
struct arch_hw_breakpoint *info = counter_arch_bp(wp);
info->trigger = addr;
@@ -751,8 +750,7 @@ static int watchpoint_report(struct perf_event *wp, unsigned long addr,
return step;
}
-static int watchpoint_handler(unsigned long addr, unsigned long esr,
- struct pt_regs *regs)
+void do_watchpoint(unsigned long addr, unsigned long esr, struct pt_regs *regs)
{
int i, step = 0, *kernel_step, access, closest_match = 0;
u64 min_dist = -1, dist;
@@ -779,7 +777,7 @@ static int watchpoint_handler(unsigned long addr, unsigned long esr,
* Check that the access type matches.
* 0 => load, otherwise => store
*/
- access = (esr & AARCH64_ESR_ACCESS_MASK) ? HW_BREAKPOINT_W :
+ access = (esr & ESR_ELx_WNR) ? HW_BREAKPOINT_W :
HW_BREAKPOINT_R;
if (!(access & hw_breakpoint_type(wp)))
continue;
@@ -807,7 +805,7 @@ static int watchpoint_handler(unsigned long addr, unsigned long esr,
rcu_read_unlock();
if (!step)
- return 0;
+ return;
/*
* We always disable EL0 watchpoints because the kernel can
@@ -820,7 +818,7 @@ static int watchpoint_handler(unsigned long addr, unsigned long esr,
/* If we're already stepping a breakpoint, just return. */
if (debug_info->bps_disabled)
- return 0;
+ return;
if (test_thread_flag(TIF_SINGLESTEP))
debug_info->suspended_step = 1;
@@ -831,7 +829,7 @@ static int watchpoint_handler(unsigned long addr, unsigned long esr,
kernel_step = this_cpu_ptr(&stepping_kernel_bp);
if (*kernel_step != ARM_KERNEL_STEP_NONE)
- return 0;
+ return;
if (kernel_active_single_step()) {
*kernel_step = ARM_KERNEL_STEP_SUSPEND;
@@ -840,44 +838,41 @@ static int watchpoint_handler(unsigned long addr, unsigned long esr,
kernel_enable_single_step(regs);
}
}
-
- return 0;
}
-NOKPROBE_SYMBOL(watchpoint_handler);
+NOKPROBE_SYMBOL(do_watchpoint);
/*
* Handle single-step exception.
*/
-int reinstall_suspended_bps(struct pt_regs *regs)
+bool try_step_suspended_breakpoints(struct pt_regs *regs)
{
struct debug_info *debug_info = &current->thread.debug;
- int handled_exception = 0, *kernel_step;
-
- kernel_step = this_cpu_ptr(&stepping_kernel_bp);
+ int *kernel_step = this_cpu_ptr(&stepping_kernel_bp);
+ bool handled_exception = false;
/*
- * Called from single-step exception handler.
- * Return 0 if execution can resume, 1 if a SIGTRAP should be
- * reported.
+ * Called from single-step exception entry.
+ * Return true if we stepped a breakpoint and can resume execution,
+ * false if we need to handle a single-step.
*/
if (user_mode(regs)) {
if (debug_info->bps_disabled) {
debug_info->bps_disabled = 0;
toggle_bp_registers(AARCH64_DBG_REG_BCR, DBG_ACTIVE_EL0, 1);
- handled_exception = 1;
+ handled_exception = true;
}
if (debug_info->wps_disabled) {
debug_info->wps_disabled = 0;
toggle_bp_registers(AARCH64_DBG_REG_WCR, DBG_ACTIVE_EL0, 1);
- handled_exception = 1;
+ handled_exception = true;
}
if (handled_exception) {
if (debug_info->suspended_step) {
debug_info->suspended_step = 0;
/* Allow exception handling to fall-through. */
- handled_exception = 0;
+ handled_exception = false;
} else {
user_disable_single_step(current);
}
@@ -891,17 +886,17 @@ int reinstall_suspended_bps(struct pt_regs *regs)
if (*kernel_step != ARM_KERNEL_STEP_SUSPEND) {
kernel_disable_single_step();
- handled_exception = 1;
+ handled_exception = true;
} else {
- handled_exception = 0;
+ handled_exception = false;
}
*kernel_step = ARM_KERNEL_STEP_NONE;
}
- return !handled_exception;
+ return handled_exception;
}
-NOKPROBE_SYMBOL(reinstall_suspended_bps);
+NOKPROBE_SYMBOL(try_step_suspended_breakpoints);
/*
* Context-switcher for restoring suspended breakpoints.
@@ -986,12 +981,6 @@ static int __init arch_hw_breakpoint_init(void)
pr_info("found %d breakpoint and %d watchpoint registers.\n",
core_num_brps, core_num_wrps);
- /* Register debug fault handlers. */
- hook_debug_fault_code(DBG_ESR_EVT_HWBP, breakpoint_handler, SIGTRAP,
- TRAP_HWBKPT, "hw-breakpoint handler");
- hook_debug_fault_code(DBG_ESR_EVT_HWWP, watchpoint_handler, SIGTRAP,
- TRAP_HWBKPT, "hw-watchpoint handler");
-
/*
* Reset the breakpoint resources. We assume that a halting
* debugger will leave the world in a nice state for us.
diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S
index 65f76064c86b..085bc9972f6b 100644
--- a/arch/arm64/kernel/hyp-stub.S
+++ b/arch/arm64/kernel/hyp-stub.S
@@ -54,6 +54,11 @@ SYM_CODE_START_LOCAL(elx_sync)
1: cmp x0, #HVC_FINALISE_EL2
b.eq __finalise_el2
+ cmp x0, #HVC_GET_ICH_VTR_EL2
+ b.ne 2f
+ mrs_s x1, SYS_ICH_VTR_EL2
+ b 9f
+
2: cmp x0, #HVC_SOFT_RESTART
b.ne 3f
mov x0, x2
@@ -97,7 +102,7 @@ SYM_CODE_START_LOCAL(__finalise_el2)
2:
// Engage the VHE magic!
mov_q x0, HCR_HOST_VHE_FLAGS
- msr hcr_el2, x0
+ msr_hcr_el2 x0
isb
// Use the EL1 allocated stack, per-cpu offset
@@ -114,8 +119,8 @@ SYM_CODE_START_LOCAL(__finalise_el2)
// Use EL2 translations for SPE & TRBE and disable access from EL1
mrs x0, mdcr_el2
- bic x0, x0, #(MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT)
- bic x0, x0, #(MDCR_EL2_E2TB_MASK << MDCR_EL2_E2TB_SHIFT)
+ bic x0, x0, #MDCR_EL2_E2PB_MASK
+ bic x0, x0, #MDCR_EL2_E2TB_MASK
msr mdcr_el2, x0
// Transfer the MM state from EL1 to EL2
diff --git a/arch/arm64/kernel/idle.c b/arch/arm64/kernel/idle.c
index c1125753fe9b..05cfb347ec26 100644
--- a/arch/arm64/kernel/idle.c
+++ b/arch/arm64/kernel/idle.c
@@ -20,7 +20,7 @@
* ensure that interrupts are not masked at the PMR (because the core will
* not wake up if we block the wake up signal in the interrupt controller).
*/
-void noinstr cpu_do_idle(void)
+void __cpuidle cpu_do_idle(void)
{
struct arm_cpuidle_irq_context context;
@@ -35,7 +35,7 @@ void noinstr cpu_do_idle(void)
/*
* This is our default idle handler.
*/
-void noinstr arch_cpu_idle(void)
+void __cpuidle arch_cpu_idle(void)
{
/*
* This should do all the clock switching and wait for interrupt
diff --git a/arch/arm64/kernel/idreg-override.c b/arch/arm64/kernel/idreg-override.c
deleted file mode 100644
index aee12c75b738..000000000000
--- a/arch/arm64/kernel/idreg-override.c
+++ /dev/null
@@ -1,338 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Early cpufeature override framework
- *
- * Copyright (C) 2020 Google LLC
- * Author: Marc Zyngier <maz@kernel.org>
- */
-
-#include <linux/ctype.h>
-#include <linux/kernel.h>
-#include <linux/libfdt.h>
-
-#include <asm/cacheflush.h>
-#include <asm/cpufeature.h>
-#include <asm/setup.h>
-
-#define FTR_DESC_NAME_LEN 20
-#define FTR_DESC_FIELD_LEN 10
-#define FTR_ALIAS_NAME_LEN 30
-#define FTR_ALIAS_OPTION_LEN 116
-
-static u64 __boot_status __initdata;
-
-struct ftr_set_desc {
- char name[FTR_DESC_NAME_LEN];
- struct arm64_ftr_override *override;
- struct {
- char name[FTR_DESC_FIELD_LEN];
- u8 shift;
- u8 width;
- bool (*filter)(u64 val);
- } fields[];
-};
-
-#define FIELD(n, s, f) { .name = n, .shift = s, .width = 4, .filter = f }
-
-static bool __init mmfr1_vh_filter(u64 val)
-{
- /*
- * If we ever reach this point while running VHE, we're
- * guaranteed to be on one of these funky, VHE-stuck CPUs. If
- * the user was trying to force nVHE on us, proceed with
- * attitude adjustment.
- */
- return !(__boot_status == (BOOT_CPU_FLAG_E2H | BOOT_CPU_MODE_EL2) &&
- val == 0);
-}
-
-static const struct ftr_set_desc mmfr1 __initconst = {
- .name = "id_aa64mmfr1",
- .override = &id_aa64mmfr1_override,
- .fields = {
- FIELD("vh", ID_AA64MMFR1_EL1_VH_SHIFT, mmfr1_vh_filter),
- {}
- },
-};
-
-static bool __init pfr0_sve_filter(u64 val)
-{
- /*
- * Disabling SVE also means disabling all the features that
- * are associated with it. The easiest way to do it is just to
- * override id_aa64zfr0_el1 to be 0.
- */
- if (!val) {
- id_aa64zfr0_override.val = 0;
- id_aa64zfr0_override.mask = GENMASK(63, 0);
- }
-
- return true;
-}
-
-static const struct ftr_set_desc pfr0 __initconst = {
- .name = "id_aa64pfr0",
- .override = &id_aa64pfr0_override,
- .fields = {
- FIELD("sve", ID_AA64PFR0_EL1_SVE_SHIFT, pfr0_sve_filter),
- {}
- },
-};
-
-static bool __init pfr1_sme_filter(u64 val)
-{
- /*
- * Similarly to SVE, disabling SME also means disabling all
- * the features that are associated with it. Just set
- * id_aa64smfr0_el1 to 0 and don't look back.
- */
- if (!val) {
- id_aa64smfr0_override.val = 0;
- id_aa64smfr0_override.mask = GENMASK(63, 0);
- }
-
- return true;
-}
-
-static const struct ftr_set_desc pfr1 __initconst = {
- .name = "id_aa64pfr1",
- .override = &id_aa64pfr1_override,
- .fields = {
- FIELD("bt", ID_AA64PFR1_EL1_BT_SHIFT, NULL ),
- FIELD("mte", ID_AA64PFR1_EL1_MTE_SHIFT, NULL),
- FIELD("sme", ID_AA64PFR1_EL1_SME_SHIFT, pfr1_sme_filter),
- {}
- },
-};
-
-static const struct ftr_set_desc isar1 __initconst = {
- .name = "id_aa64isar1",
- .override = &id_aa64isar1_override,
- .fields = {
- FIELD("gpi", ID_AA64ISAR1_EL1_GPI_SHIFT, NULL),
- FIELD("gpa", ID_AA64ISAR1_EL1_GPA_SHIFT, NULL),
- FIELD("api", ID_AA64ISAR1_EL1_API_SHIFT, NULL),
- FIELD("apa", ID_AA64ISAR1_EL1_APA_SHIFT, NULL),
- {}
- },
-};
-
-static const struct ftr_set_desc isar2 __initconst = {
- .name = "id_aa64isar2",
- .override = &id_aa64isar2_override,
- .fields = {
- FIELD("gpa3", ID_AA64ISAR2_EL1_GPA3_SHIFT, NULL),
- FIELD("apa3", ID_AA64ISAR2_EL1_APA3_SHIFT, NULL),
- FIELD("mops", ID_AA64ISAR2_EL1_MOPS_SHIFT, NULL),
- {}
- },
-};
-
-static const struct ftr_set_desc smfr0 __initconst = {
- .name = "id_aa64smfr0",
- .override = &id_aa64smfr0_override,
- .fields = {
- FIELD("smever", ID_AA64SMFR0_EL1_SMEver_SHIFT, NULL),
- /* FA64 is a one bit field... :-/ */
- { "fa64", ID_AA64SMFR0_EL1_FA64_SHIFT, 1, },
- {}
- },
-};
-
-static bool __init hvhe_filter(u64 val)
-{
- u64 mmfr1 = read_sysreg(id_aa64mmfr1_el1);
-
- return (val == 1 &&
- lower_32_bits(__boot_status) == BOOT_CPU_MODE_EL2 &&
- cpuid_feature_extract_unsigned_field(mmfr1,
- ID_AA64MMFR1_EL1_VH_SHIFT));
-}
-
-static const struct ftr_set_desc sw_features __initconst = {
- .name = "arm64_sw",
- .override = &arm64_sw_feature_override,
- .fields = {
- FIELD("nokaslr", ARM64_SW_FEATURE_OVERRIDE_NOKASLR, NULL),
- FIELD("hvhe", ARM64_SW_FEATURE_OVERRIDE_HVHE, hvhe_filter),
- {}
- },
-};
-
-static const struct ftr_set_desc * const regs[] __initconst = {
- &mmfr1,
- &pfr0,
- &pfr1,
- &isar1,
- &isar2,
- &smfr0,
- &sw_features,
-};
-
-static const struct {
- char alias[FTR_ALIAS_NAME_LEN];
- char feature[FTR_ALIAS_OPTION_LEN];
-} aliases[] __initconst = {
- { "kvm-arm.mode=nvhe", "id_aa64mmfr1.vh=0" },
- { "kvm-arm.mode=protected", "id_aa64mmfr1.vh=0" },
- { "arm64.nosve", "id_aa64pfr0.sve=0" },
- { "arm64.nosme", "id_aa64pfr1.sme=0" },
- { "arm64.nobti", "id_aa64pfr1.bt=0" },
- { "arm64.nopauth",
- "id_aa64isar1.gpi=0 id_aa64isar1.gpa=0 "
- "id_aa64isar1.api=0 id_aa64isar1.apa=0 "
- "id_aa64isar2.gpa3=0 id_aa64isar2.apa3=0" },
- { "arm64.nomops", "id_aa64isar2.mops=0" },
- { "arm64.nomte", "id_aa64pfr1.mte=0" },
- { "nokaslr", "arm64_sw.nokaslr=1" },
-};
-
-static int __init parse_nokaslr(char *unused)
-{
- /* nokaslr param handling is done by early cpufeature code */
- return 0;
-}
-early_param("nokaslr", parse_nokaslr);
-
-static int __init find_field(const char *cmdline,
- const struct ftr_set_desc *reg, int f, u64 *v)
-{
- char opt[FTR_DESC_NAME_LEN + FTR_DESC_FIELD_LEN + 2];
- int len;
-
- len = snprintf(opt, ARRAY_SIZE(opt), "%s.%s=",
- reg->name, reg->fields[f].name);
-
- if (!parameqn(cmdline, opt, len))
- return -1;
-
- return kstrtou64(cmdline + len, 0, v);
-}
-
-static void __init match_options(const char *cmdline)
-{
- int i;
-
- for (i = 0; i < ARRAY_SIZE(regs); i++) {
- int f;
-
- if (!regs[i]->override)
- continue;
-
- for (f = 0; strlen(regs[i]->fields[f].name); f++) {
- u64 shift = regs[i]->fields[f].shift;
- u64 width = regs[i]->fields[f].width ?: 4;
- u64 mask = GENMASK_ULL(shift + width - 1, shift);
- u64 v;
-
- if (find_field(cmdline, regs[i], f, &v))
- continue;
-
- /*
- * If an override gets filtered out, advertise
- * it by setting the value to the all-ones while
- * clearing the mask... Yes, this is fragile.
- */
- if (regs[i]->fields[f].filter &&
- !regs[i]->fields[f].filter(v)) {
- regs[i]->override->val |= mask;
- regs[i]->override->mask &= ~mask;
- continue;
- }
-
- regs[i]->override->val &= ~mask;
- regs[i]->override->val |= (v << shift) & mask;
- regs[i]->override->mask |= mask;
-
- return;
- }
- }
-}
-
-static __init void __parse_cmdline(const char *cmdline, bool parse_aliases)
-{
- do {
- char buf[256];
- size_t len;
- int i;
-
- cmdline = skip_spaces(cmdline);
-
- for (len = 0; cmdline[len] && !isspace(cmdline[len]); len++);
- if (!len)
- return;
-
- len = strscpy(buf, cmdline, ARRAY_SIZE(buf));
- if (len == -E2BIG)
- len = ARRAY_SIZE(buf) - 1;
-
- if (strcmp(buf, "--") == 0)
- return;
-
- cmdline += len;
-
- match_options(buf);
-
- for (i = 0; parse_aliases && i < ARRAY_SIZE(aliases); i++)
- if (parameq(buf, aliases[i].alias))
- __parse_cmdline(aliases[i].feature, false);
- } while (1);
-}
-
-static __init const u8 *get_bootargs_cmdline(void)
-{
- const u8 *prop;
- void *fdt;
- int node;
-
- fdt = get_early_fdt_ptr();
- if (!fdt)
- return NULL;
-
- node = fdt_path_offset(fdt, "/chosen");
- if (node < 0)
- return NULL;
-
- prop = fdt_getprop(fdt, node, "bootargs", NULL);
- if (!prop)
- return NULL;
-
- return strlen(prop) ? prop : NULL;
-}
-
-static __init void parse_cmdline(void)
-{
- const u8 *prop = get_bootargs_cmdline();
-
- if (IS_ENABLED(CONFIG_CMDLINE_FORCE) || !prop)
- __parse_cmdline(CONFIG_CMDLINE, true);
-
- if (!IS_ENABLED(CONFIG_CMDLINE_FORCE) && prop)
- __parse_cmdline(prop, true);
-}
-
-/* Keep checkers quiet */
-void init_feature_override(u64 boot_status);
-
-asmlinkage void __init init_feature_override(u64 boot_status)
-{
- int i;
-
- for (i = 0; i < ARRAY_SIZE(regs); i++) {
- if (regs[i]->override) {
- regs[i]->override->val = 0;
- regs[i]->override->mask = 0;
- }
- }
-
- __boot_status = boot_status;
-
- parse_cmdline();
-
- for (i = 0; i < ARRAY_SIZE(regs); i++) {
- if (regs[i]->override)
- dcache_clean_inval_poc((unsigned long)regs[i]->override,
- (unsigned long)regs[i]->override +
- sizeof(*regs[i]->override));
- }
-}
diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h
index 35f3c7959513..85bc629270bd 100644
--- a/arch/arm64/kernel/image-vars.h
+++ b/arch/arm64/kernel/image-vars.h
@@ -10,6 +10,16 @@
#error This file should only be included in vmlinux.lds.S
#endif
+#if defined(CONFIG_LD_IS_LLD) && CONFIG_LLD_VERSION < 210000
+#define ASSERT(...)
+#endif
+
+#define PI_EXPORT_SYM(sym) \
+ __PI_EXPORT_SYM(sym, __pi_ ## sym, Cannot export BSS symbol sym to startup code)
+#define __PI_EXPORT_SYM(sym, pisym, msg)\
+ PROVIDE(pisym = sym); \
+ ASSERT((sym - KIMAGE_VADDR) < (__bss_start - KIMAGE_VADDR), #msg)
+
PROVIDE(__efistub_primary_entry = primary_entry);
/*
@@ -27,13 +37,40 @@ PROVIDE(__efistub__text = _text);
PROVIDE(__efistub__end = _end);
PROVIDE(__efistub___inittext_end = __inittext_end);
PROVIDE(__efistub__edata = _edata);
+#if defined(CONFIG_EFI_EARLYCON) || defined(CONFIG_SYSFB)
PROVIDE(__efistub_screen_info = screen_info);
+#endif
PROVIDE(__efistub__ctype = _ctype);
PROVIDE(__pi___memcpy = __pi_memcpy);
PROVIDE(__pi___memmove = __pi_memmove);
PROVIDE(__pi___memset = __pi_memset);
+PI_EXPORT_SYM(id_aa64isar1_override);
+PI_EXPORT_SYM(id_aa64isar2_override);
+PI_EXPORT_SYM(id_aa64mmfr0_override);
+PI_EXPORT_SYM(id_aa64mmfr1_override);
+PI_EXPORT_SYM(id_aa64mmfr2_override);
+PI_EXPORT_SYM(id_aa64pfr0_override);
+PI_EXPORT_SYM(id_aa64pfr1_override);
+PI_EXPORT_SYM(id_aa64smfr0_override);
+PI_EXPORT_SYM(id_aa64zfr0_override);
+PI_EXPORT_SYM(arm64_sw_feature_override);
+PI_EXPORT_SYM(arm64_use_ng_mappings);
+PI_EXPORT_SYM(_ctype);
+
+PI_EXPORT_SYM(swapper_pg_dir);
+
+PI_EXPORT_SYM(_text);
+PI_EXPORT_SYM(_stext);
+PI_EXPORT_SYM(_etext);
+PI_EXPORT_SYM(__start_rodata);
+PI_EXPORT_SYM(__inittext_begin);
+PI_EXPORT_SYM(__inittext_end);
+PI_EXPORT_SYM(__initdata_begin);
+PI_EXPORT_SYM(__initdata_end);
+PI_EXPORT_SYM(_data);
+
#ifdef CONFIG_KVM
/*
@@ -54,6 +91,7 @@ KVM_NVHE_ALIAS(spectre_bhb_patch_loop_mitigation_enable);
KVM_NVHE_ALIAS(spectre_bhb_patch_wa3);
KVM_NVHE_ALIAS(spectre_bhb_patch_clearbhb);
KVM_NVHE_ALIAS(alt_cb_patch_nops);
+KVM_NVHE_ALIAS(kvm_compute_ich_hcr_trap_bits);
/* Global kernel state accessed by nVHE hyp code. */
KVM_NVHE_ALIAS(kvm_vgic_global_state);
@@ -68,20 +106,16 @@ KVM_NVHE_ALIAS(__hyp_stub_vectors);
KVM_NVHE_ALIAS(vgic_v2_cpuif_trap);
KVM_NVHE_ALIAS(vgic_v3_cpuif_trap);
-#ifdef CONFIG_ARM64_PSEUDO_NMI
-/* Static key checked in GIC_PRIO_IRQOFF. */
-KVM_NVHE_ALIAS(gic_nonsecure_priorities);
-#endif
+/* Static key indicating whether GICv3 has GICv2 compatibility */
+KVM_NVHE_ALIAS(vgic_v3_has_v2_compat);
+
+/* Static key which is set if CNTVOFF_EL2 is unusable */
+KVM_NVHE_ALIAS(broken_cntvoff_key);
/* EL2 exception handling */
KVM_NVHE_ALIAS(__start___kvm_ex_table);
KVM_NVHE_ALIAS(__stop___kvm_ex_table);
-/* PMU available static key */
-#ifdef CONFIG_HW_PERF_EVENTS
-KVM_NVHE_ALIAS(kvm_arm_pmu_available);
-#endif
-
/* Position-independent library routines */
KVM_NVHE_ALIAS_HYP(clear_page, __pi_clear_page);
KVM_NVHE_ALIAS_HYP(copy_page, __pi_copy_page);
@@ -100,6 +134,8 @@ KVM_NVHE_ALIAS(__hyp_text_start);
KVM_NVHE_ALIAS(__hyp_text_end);
KVM_NVHE_ALIAS(__hyp_bss_start);
KVM_NVHE_ALIAS(__hyp_bss_end);
+KVM_NVHE_ALIAS(__hyp_data_start);
+KVM_NVHE_ALIAS(__hyp_data_end);
KVM_NVHE_ALIAS(__hyp_rodata_start);
KVM_NVHE_ALIAS(__hyp_rodata_end);
@@ -112,4 +148,17 @@ KVM_NVHE_ALIAS(kvm_protected_mode_initialized);
_kernel_codesize = ABSOLUTE(__inittext_end - _text);
#endif
+/*
+ * LLD will occasionally error out with a '__init_end does not converge' error
+ * if INIT_IDMAP_DIR_SIZE is defined in terms of _end, as this results in a
+ * circular dependency. Counter this by dimensioning the initial IDMAP page
+ * tables based on kimage_limit, which is defined such that its value should
+ * not change as a result of the initdata segment being pushed over a 64k
+ * segment boundary due to changes in INIT_IDMAP_DIR_SIZE, provided that its
+ * value doesn't change by more than 2M between linker passes.
+ */
+kimage_limit = ALIGN(ABSOLUTE(_end + SZ_64K), SZ_2M);
+
+#undef ASSERT
+
#endif /* __ARM64_KERNEL_IMAGE_VARS_H */
diff --git a/arch/arm64/kernel/io.c b/arch/arm64/kernel/io.c
index aa7a4ec6a3ae..fe86ada23c7d 100644
--- a/arch/arm64/kernel/io.c
+++ b/arch/arm64/kernel/io.c
@@ -10,88 +10,43 @@
#include <linux/io.h>
/*
- * Copy data from IO memory space to "real" memory space.
+ * This generates a memcpy that works on a from/to address which is aligned to
+ * bits. Count is in terms of the number of bits sized quantities to copy. It
+ * optimizes to use the STR groupings when possible so that it is WC friendly.
*/
-void __memcpy_fromio(void *to, const volatile void __iomem *from, size_t count)
+#define memcpy_toio_aligned(to, from, count, bits) \
+ ({ \
+ volatile u##bits __iomem *_to = to; \
+ const u##bits *_from = from; \
+ size_t _count = count; \
+ const u##bits *_end_from = _from + ALIGN_DOWN(_count, 8); \
+ \
+ for (; _from < _end_from; _from += 8, _to += 8) \
+ __const_memcpy_toio_aligned##bits(_to, _from, 8); \
+ if ((_count % 8) >= 4) { \
+ __const_memcpy_toio_aligned##bits(_to, _from, 4); \
+ _from += 4; \
+ _to += 4; \
+ } \
+ if ((_count % 4) >= 2) { \
+ __const_memcpy_toio_aligned##bits(_to, _from, 2); \
+ _from += 2; \
+ _to += 2; \
+ } \
+ if (_count % 2) \
+ __const_memcpy_toio_aligned##bits(_to, _from, 1); \
+ })
+
+void __iowrite64_copy_full(void __iomem *to, const void *from, size_t count)
{
- while (count && !IS_ALIGNED((unsigned long)from, 8)) {
- *(u8 *)to = __raw_readb(from);
- from++;
- to++;
- count--;
- }
-
- while (count >= 8) {
- *(u64 *)to = __raw_readq(from);
- from += 8;
- to += 8;
- count -= 8;
- }
-
- while (count) {
- *(u8 *)to = __raw_readb(from);
- from++;
- to++;
- count--;
- }
+ memcpy_toio_aligned(to, from, count, 64);
+ dgh();
}
-EXPORT_SYMBOL(__memcpy_fromio);
+EXPORT_SYMBOL(__iowrite64_copy_full);
-/*
- * Copy data from "real" memory space to IO memory space.
- */
-void __memcpy_toio(volatile void __iomem *to, const void *from, size_t count)
+void __iowrite32_copy_full(void __iomem *to, const void *from, size_t count)
{
- while (count && !IS_ALIGNED((unsigned long)to, 8)) {
- __raw_writeb(*(u8 *)from, to);
- from++;
- to++;
- count--;
- }
-
- while (count >= 8) {
- __raw_writeq(*(u64 *)from, to);
- from += 8;
- to += 8;
- count -= 8;
- }
-
- while (count) {
- __raw_writeb(*(u8 *)from, to);
- from++;
- to++;
- count--;
- }
-}
-EXPORT_SYMBOL(__memcpy_toio);
-
-/*
- * "memset" on IO memory space.
- */
-void __memset_io(volatile void __iomem *dst, int c, size_t count)
-{
- u64 qc = (u8)c;
-
- qc |= qc << 8;
- qc |= qc << 16;
- qc |= qc << 32;
-
- while (count && !IS_ALIGNED((unsigned long)dst, 8)) {
- __raw_writeb(c, dst);
- dst++;
- count--;
- }
-
- while (count >= 8) {
- __raw_writeq(qc, dst);
- dst += 8;
- count -= 8;
- }
-
- while (count) {
- __raw_writeb(c, dst);
- dst++;
- count--;
- }
+ memcpy_toio_aligned(to, from, count, 32);
+ dgh();
}
-EXPORT_SYMBOL(__memset_io);
+EXPORT_SYMBOL(__iowrite32_copy_full);
diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c
index 6ad5c6ef5329..15dedb385b9e 100644
--- a/arch/arm64/kernel/irq.c
+++ b/arch/arm64/kernel/irq.c
@@ -22,6 +22,7 @@
#include <linux/vmalloc.h>
#include <asm/daifflags.h>
#include <asm/exception.h>
+#include <asm/numa.h>
#include <asm/softirq_stack.h>
#include <asm/stacktrace.h>
#include <asm/vmap_stack.h>
@@ -47,34 +48,21 @@ static void init_irq_scs(void)
for_each_possible_cpu(cpu)
per_cpu(irq_shadow_call_stack_ptr, cpu) =
- scs_alloc(cpu_to_node(cpu));
+ scs_alloc(early_cpu_to_node(cpu));
}
-#ifdef CONFIG_VMAP_STACK
-static void init_irq_stacks(void)
+static void __init init_irq_stacks(void)
{
int cpu;
unsigned long *p;
for_each_possible_cpu(cpu) {
- p = arch_alloc_vmap_stack(IRQ_STACK_SIZE, cpu_to_node(cpu));
+ p = arch_alloc_vmap_stack(IRQ_STACK_SIZE, early_cpu_to_node(cpu));
per_cpu(irq_stack_ptr, cpu) = p;
}
}
-#else
-/* irq stack only needs to be 16 byte aligned - not IRQ_STACK_SIZE aligned. */
-DEFINE_PER_CPU_ALIGNED(unsigned long [IRQ_STACK_SIZE/sizeof(long)], irq_stack);
-static void init_irq_stacks(void)
-{
- int cpu;
-
- for_each_possible_cpu(cpu)
- per_cpu(irq_stack_ptr, cpu) = per_cpu(irq_stack, cpu);
-}
-#endif
-
-#ifndef CONFIG_PREEMPT_RT
+#ifdef CONFIG_SOFTIRQ_ON_OWN_STACK
static void ____do_softirq(struct pt_regs *regs)
{
__do_softirq();
diff --git a/arch/arm64/kernel/jump_label.c b/arch/arm64/kernel/jump_label.c
index faf88ec9c48e..b345425193d2 100644
--- a/arch/arm64/kernel/jump_label.c
+++ b/arch/arm64/kernel/jump_label.c
@@ -7,11 +7,12 @@
*/
#include <linux/kernel.h>
#include <linux/jump_label.h>
+#include <linux/smp.h>
#include <asm/insn.h>
-#include <asm/patching.h>
+#include <asm/text-patching.h>
-void arch_jump_label_transform(struct jump_entry *entry,
- enum jump_label_type type)
+bool arch_jump_label_transform_queue(struct jump_entry *entry,
+ enum jump_label_type type)
{
void *addr = (void *)jump_entry_code(entry);
u32 insn;
@@ -25,4 +26,10 @@ void arch_jump_label_transform(struct jump_entry *entry,
}
aarch64_insn_patch_text_nosync(addr, insn);
+ return true;
+}
+
+void arch_jump_label_transform_apply(void)
+{
+ kick_all_cpus_sync();
}
diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c
index 94a269cd1f07..c9503ed45a6c 100644
--- a/arch/arm64/kernel/kaslr.c
+++ b/arch/arm64/kernel/kaslr.c
@@ -10,15 +10,11 @@
#include <asm/cpufeature.h>
#include <asm/memory.h>
-u16 __initdata memstart_offset_seed;
-
bool __ro_after_init __kaslr_is_enabled = false;
void __init kaslr_init(void)
{
- if (cpuid_feature_extract_unsigned_field(arm64_sw_feature_override.val &
- arm64_sw_feature_override.mask,
- ARM64_SW_FEATURE_OVERRIDE_NOKASLR)) {
+ if (kaslr_disabled_cmdline()) {
pr_info("KASLR disabled on command line\n");
return;
}
@@ -36,3 +32,10 @@ void __init kaslr_init(void)
pr_info("KASLR enabled\n");
__kaslr_is_enabled = true;
}
+
+static int __init parse_nokaslr(char *unused)
+{
+ /* nokaslr param handling is done by early cpufeature code */
+ return 0;
+}
+early_param("nokaslr", parse_nokaslr);
diff --git a/arch/arm64/kernel/kexec_image.c b/arch/arm64/kernel/kexec_image.c
index 636be6715155..532d72ea42ee 100644
--- a/arch/arm64/kernel/kexec_image.c
+++ b/arch/arm64/kernel/kexec_image.c
@@ -122,9 +122,9 @@ static void *image_load(struct kimage *image,
kernel_segment->memsz -= text_offset;
image->start = kernel_segment->mem;
- pr_debug("Loaded kernel at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
- kernel_segment->mem, kbuf.bufsz,
- kernel_segment->memsz);
+ kexec_dprintk("Loaded kernel at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
+ kernel_segment->mem, kbuf.bufsz,
+ kernel_segment->memsz);
return NULL;
}
diff --git a/arch/arm64/kernel/kgdb.c b/arch/arm64/kernel/kgdb.c
index 4e1f983df3d1..968324a79a89 100644
--- a/arch/arm64/kernel/kgdb.c
+++ b/arch/arm64/kernel/kgdb.c
@@ -17,7 +17,7 @@
#include <asm/debug-monitors.h>
#include <asm/insn.h>
-#include <asm/patching.h>
+#include <asm/text-patching.h>
#include <asm/traps.h>
struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = {
@@ -234,23 +234,23 @@ int kgdb_arch_handle_exception(int exception_vector, int signo,
return err;
}
-static int kgdb_brk_fn(struct pt_regs *regs, unsigned long esr)
+int kgdb_brk_handler(struct pt_regs *regs, unsigned long esr)
{
kgdb_handle_exception(1, SIGTRAP, 0, regs);
return DBG_HOOK_HANDLED;
}
-NOKPROBE_SYMBOL(kgdb_brk_fn)
+NOKPROBE_SYMBOL(kgdb_brk_handler)
-static int kgdb_compiled_brk_fn(struct pt_regs *regs, unsigned long esr)
+int kgdb_compiled_brk_handler(struct pt_regs *regs, unsigned long esr)
{
compiled_break = 1;
kgdb_handle_exception(1, SIGTRAP, 0, regs);
return DBG_HOOK_HANDLED;
}
-NOKPROBE_SYMBOL(kgdb_compiled_brk_fn);
+NOKPROBE_SYMBOL(kgdb_compiled_brk_handler);
-static int kgdb_step_brk_fn(struct pt_regs *regs, unsigned long esr)
+int kgdb_single_step_handler(struct pt_regs *regs, unsigned long esr)
{
if (!kgdb_single_step)
return DBG_HOOK_ERROR;
@@ -258,21 +258,7 @@ static int kgdb_step_brk_fn(struct pt_regs *regs, unsigned long esr)
kgdb_handle_exception(0, SIGTRAP, 0, regs);
return DBG_HOOK_HANDLED;
}
-NOKPROBE_SYMBOL(kgdb_step_brk_fn);
-
-static struct break_hook kgdb_brkpt_hook = {
- .fn = kgdb_brk_fn,
- .imm = KGDB_DYN_DBG_BRK_IMM,
-};
-
-static struct break_hook kgdb_compiled_brkpt_hook = {
- .fn = kgdb_compiled_brk_fn,
- .imm = KGDB_COMPILED_DBG_BRK_IMM,
-};
-
-static struct step_hook kgdb_step_hook = {
- .fn = kgdb_step_brk_fn
-};
+NOKPROBE_SYMBOL(kgdb_single_step_handler);
static int __kgdb_notify(struct die_args *args, unsigned long cmd)
{
@@ -311,15 +297,7 @@ static struct notifier_block kgdb_notifier = {
*/
int kgdb_arch_init(void)
{
- int ret = register_die_notifier(&kgdb_notifier);
-
- if (ret != 0)
- return ret;
-
- register_kernel_break_hook(&kgdb_brkpt_hook);
- register_kernel_break_hook(&kgdb_compiled_brkpt_hook);
- register_kernel_step_hook(&kgdb_step_hook);
- return 0;
+ return register_die_notifier(&kgdb_notifier);
}
/*
@@ -329,9 +307,6 @@ int kgdb_arch_init(void)
*/
void kgdb_arch_exit(void)
{
- unregister_kernel_break_hook(&kgdb_brkpt_hook);
- unregister_kernel_break_hook(&kgdb_compiled_brkpt_hook);
- unregister_kernel_step_hook(&kgdb_step_hook);
unregister_die_notifier(&kgdb_notifier);
}
diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c
index 078910db77a4..239c16e3d02f 100644
--- a/arch/arm64/kernel/machine_kexec.c
+++ b/arch/arm64/kernel/machine_kexec.c
@@ -32,26 +32,12 @@
static void _kexec_image_info(const char *func, int line,
const struct kimage *kimage)
{
- unsigned long i;
-
- pr_debug("%s:%d:\n", func, line);
- pr_debug(" kexec kimage info:\n");
- pr_debug(" type: %d\n", kimage->type);
- pr_debug(" start: %lx\n", kimage->start);
- pr_debug(" head: %lx\n", kimage->head);
- pr_debug(" nr_segments: %lu\n", kimage->nr_segments);
- pr_debug(" dtb_mem: %pa\n", &kimage->arch.dtb_mem);
- pr_debug(" kern_reloc: %pa\n", &kimage->arch.kern_reloc);
- pr_debug(" el2_vectors: %pa\n", &kimage->arch.el2_vectors);
-
- for (i = 0; i < kimage->nr_segments; i++) {
- pr_debug(" segment[%lu]: %016lx - %016lx, 0x%lx bytes, %lu pages\n",
- i,
- kimage->segment[i].mem,
- kimage->segment[i].mem + kimage->segment[i].memsz,
- kimage->segment[i].memsz,
- kimage->segment[i].memsz / PAGE_SIZE);
- }
+ kexec_dprintk("%s:%d:\n", func, line);
+ kexec_dprintk(" kexec kimage info:\n");
+ kexec_dprintk(" type: %d\n", kimage->type);
+ kexec_dprintk(" head: %lx\n", kimage->head);
+ kexec_dprintk(" kern_reloc: %pa\n", &kimage->arch.kern_reloc);
+ kexec_dprintk(" el2_vectors: %pa\n", &kimage->arch.el2_vectors);
}
void machine_kexec_cleanup(struct kimage *kimage)
@@ -221,37 +207,6 @@ void machine_kexec(struct kimage *kimage)
BUG(); /* Should never get here. */
}
-static void machine_kexec_mask_interrupts(void)
-{
- unsigned int i;
- struct irq_desc *desc;
-
- for_each_irq_desc(i, desc) {
- struct irq_chip *chip;
- int ret;
-
- chip = irq_desc_get_chip(desc);
- if (!chip)
- continue;
-
- /*
- * First try to remove the active state. If this
- * fails, try to EOI the interrupt.
- */
- ret = irq_set_irqchip_state(i, IRQCHIP_STATE_ACTIVE, false);
-
- if (ret && irqd_irq_inprogress(&desc->irq_data) &&
- chip->irq_eoi)
- chip->irq_eoi(&desc->irq_data);
-
- if (chip->irq_mask)
- chip->irq_mask(&desc->irq_data);
-
- if (chip->irq_disable && !irqd_irq_disabled(&desc->irq_data))
- chip->irq_disable(&desc->irq_data);
- }
-}
-
/**
* machine_crash_shutdown - shutdown non-crashing cpus and save registers
*/
@@ -269,7 +224,7 @@ void machine_crash_shutdown(struct pt_regs *regs)
pr_info("Starting crashdump kernel...\n");
}
-#ifdef CONFIG_HIBERNATION
+#if defined(CONFIG_CRASH_DUMP) && defined(CONFIG_HIBERNATION)
/*
* To preserve the crash dump kernel image, the relevant memory segments
* should be mapped again around the hibernation.
@@ -296,7 +251,7 @@ void crash_post_resume(void)
* marked as Reserved as memory was allocated via memblock_reserve().
*
* In hibernation, the pages which are Reserved and yet "nosave" are excluded
- * from the hibernation iamge. crash_is_nosave() does thich check for crash
+ * from the hibernation image. crash_is_nosave() does thich check for crash
* dump kernel and will reduce the total size of hibernation image.
*/
diff --git a/arch/arm64/kernel/machine_kexec_file.c b/arch/arm64/kernel/machine_kexec_file.c
index a11a6e14ba89..410060ebd86d 100644
--- a/arch/arm64/kernel/machine_kexec_file.c
+++ b/arch/arm64/kernel/machine_kexec_file.c
@@ -39,6 +39,7 @@ int arch_kimage_file_post_load_cleanup(struct kimage *image)
return kexec_image_post_load_cleanup_default(image);
}
+#ifdef CONFIG_CRASH_DUMP
static int prepare_elf_headers(void **addr, unsigned long *sz)
{
struct crash_mem *cmem;
@@ -80,6 +81,7 @@ out:
kfree(cmem);
return ret;
}
+#endif
/*
* Tries to add the initrd and DTB to the image. If it is not possible to find
@@ -92,9 +94,9 @@ int load_other_segments(struct kimage *image,
char *initrd, unsigned long initrd_len,
char *cmdline)
{
- struct kexec_buf kbuf;
- void *headers, *dtb = NULL;
- unsigned long headers_sz, initrd_load_addr = 0, dtb_len,
+ struct kexec_buf kbuf = {};
+ void *dtb = NULL;
+ unsigned long initrd_load_addr = 0, dtb_len,
orig_segments = image->nr_segments;
int ret = 0;
@@ -102,7 +104,10 @@ int load_other_segments(struct kimage *image,
/* not allocate anything below the kernel */
kbuf.buf_min = kernel_load_addr + kernel_size;
+#ifdef CONFIG_CRASH_DUMP
/* load elf core header */
+ void *headers;
+ unsigned long headers_sz;
if (image->type == KEXEC_TYPE_CRASH) {
ret = prepare_elf_headers(&headers, &headers_sz);
if (ret) {
@@ -127,9 +132,10 @@ int load_other_segments(struct kimage *image,
image->elf_load_addr = kbuf.mem;
image->elf_headers_sz = headers_sz;
- pr_debug("Loaded elf core header at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
- image->elf_load_addr, kbuf.bufsz, kbuf.memsz);
+ kexec_dprintk("Loaded elf core header at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
+ image->elf_load_addr, kbuf.bufsz, kbuf.memsz);
}
+#endif
/* load initrd */
if (initrd) {
@@ -148,8 +154,8 @@ int load_other_segments(struct kimage *image,
goto out_err;
initrd_load_addr = kbuf.mem;
- pr_debug("Loaded initrd at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
- initrd_load_addr, kbuf.bufsz, kbuf.memsz);
+ kexec_dprintk("Loaded initrd at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
+ initrd_load_addr, kbuf.bufsz, kbuf.memsz);
}
/* load dtb */
@@ -179,8 +185,8 @@ int load_other_segments(struct kimage *image,
image->arch.dtb = dtb;
image->arch.dtb_mem = kbuf.mem;
- pr_debug("Loaded dtb at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
- kbuf.mem, kbuf.bufsz, kbuf.memsz);
+ kexec_dprintk("Loaded dtb at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
+ kbuf.mem, kbuf.bufsz, kbuf.memsz);
return 0;
diff --git a/arch/arm64/kernel/module-plts.c b/arch/arm64/kernel/module-plts.c
index bd69a4e7cd60..7afd370da9f4 100644
--- a/arch/arm64/kernel/module-plts.c
+++ b/arch/arm64/kernel/module-plts.c
@@ -167,9 +167,6 @@ static unsigned int count_plts(Elf64_Sym *syms, Elf64_Rela *rela, int num,
switch (ELF64_R_TYPE(rela[i].r_info)) {
case R_AARCH64_JUMP26:
case R_AARCH64_CALL26:
- if (!IS_ENABLED(CONFIG_RANDOMIZE_BASE))
- break;
-
/*
* We only have to consider branch targets that resolve
* to symbols that are defined in a different section.
@@ -203,8 +200,7 @@ static unsigned int count_plts(Elf64_Sym *syms, Elf64_Rela *rela, int num,
break;
case R_AARCH64_ADR_PREL_PG_HI21_NC:
case R_AARCH64_ADR_PREL_PG_HI21:
- if (!IS_ENABLED(CONFIG_ARM64_ERRATUM_843419) ||
- !cpus_have_const_cap(ARM64_WORKAROUND_843419))
+ if (!cpus_have_final_cap(ARM64_WORKAROUND_843419))
break;
/*
@@ -239,13 +235,13 @@ static unsigned int count_plts(Elf64_Sym *syms, Elf64_Rela *rela, int num,
}
}
- if (IS_ENABLED(CONFIG_ARM64_ERRATUM_843419) &&
- cpus_have_const_cap(ARM64_WORKAROUND_843419))
+ if (cpus_have_final_cap(ARM64_WORKAROUND_843419)) {
/*
* Add some slack so we can skip PLT slots that may trigger
* the erratum due to the placement of the ADRP instruction.
*/
ret += DIV_ROUND_UP(ret, (SZ_4K / sizeof(struct plt_entry)));
+ }
return ret;
}
@@ -269,9 +265,6 @@ static int partition_branch_plt_relas(Elf64_Sym *syms, Elf64_Rela *rela,
{
int i = 0, j = numrels - 1;
- if (!IS_ENABLED(CONFIG_RANDOMIZE_BASE))
- return 0;
-
while (i < j) {
if (branch_rela_needs_plt(syms, &rela[i], dstidx))
i++;
@@ -290,7 +283,7 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
unsigned long core_plts = 0;
unsigned long init_plts = 0;
Elf64_Sym *syms = NULL;
- Elf_Shdr *pltsec, *tramp = NULL;
+ Elf_Shdr *pltsec, *tramp = NULL, *init_tramp = NULL;
int i;
/*
@@ -305,6 +298,9 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
else if (!strcmp(secstrings + sechdrs[i].sh_name,
".text.ftrace_trampoline"))
tramp = sechdrs + i;
+ else if (!strcmp(secstrings + sechdrs[i].sh_name,
+ ".init.text.ftrace_trampoline"))
+ init_tramp = sechdrs + i;
else if (sechdrs[i].sh_type == SHT_SYMTAB)
syms = (Elf64_Sym *)sechdrs[i].sh_addr;
}
@@ -370,5 +366,12 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
tramp->sh_size = NR_FTRACE_PLTS * sizeof(struct plt_entry);
}
+ if (init_tramp) {
+ init_tramp->sh_type = SHT_NOBITS;
+ init_tramp->sh_flags = SHF_EXECINSTR | SHF_ALLOC;
+ init_tramp->sh_addralign = __alignof__(struct plt_entry);
+ init_tramp->sh_size = NR_FTRACE_PLTS * sizeof(struct plt_entry);
+ }
+
return 0;
}
diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c
index dd851297596e..24adb581af0e 100644
--- a/arch/arm64/kernel/module.c
+++ b/arch/arm64/kernel/module.c
@@ -12,143 +12,18 @@
#include <linux/bitops.h>
#include <linux/elf.h>
#include <linux/ftrace.h>
-#include <linux/gfp.h>
#include <linux/kasan.h>
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/moduleloader.h>
#include <linux/random.h>
#include <linux/scs.h>
-#include <linux/vmalloc.h>
#include <asm/alternative.h>
#include <asm/insn.h>
#include <asm/scs.h>
#include <asm/sections.h>
-
-static u64 module_direct_base __ro_after_init = 0;
-static u64 module_plt_base __ro_after_init = 0;
-
-/*
- * Choose a random page-aligned base address for a window of 'size' bytes which
- * entirely contains the interval [start, end - 1].
- */
-static u64 __init random_bounding_box(u64 size, u64 start, u64 end)
-{
- u64 max_pgoff, pgoff;
-
- if ((end - start) >= size)
- return 0;
-
- max_pgoff = (size - (end - start)) / PAGE_SIZE;
- pgoff = get_random_u32_inclusive(0, max_pgoff);
-
- return start - pgoff * PAGE_SIZE;
-}
-
-/*
- * Modules may directly reference data and text anywhere within the kernel
- * image and other modules. References using PREL32 relocations have a +/-2G
- * range, and so we need to ensure that the entire kernel image and all modules
- * fall within a 2G window such that these are always within range.
- *
- * Modules may directly branch to functions and code within the kernel text,
- * and to functions and code within other modules. These branches will use
- * CALL26/JUMP26 relocations with a +/-128M range. Without PLTs, we must ensure
- * that the entire kernel text and all module text falls within a 128M window
- * such that these are always within range. With PLTs, we can expand this to a
- * 2G window.
- *
- * We chose the 128M region to surround the entire kernel image (rather than
- * just the text) as using the same bounds for the 128M and 2G regions ensures
- * by construction that we never select a 128M region that is not a subset of
- * the 2G region. For very large and unusual kernel configurations this means
- * we may fall back to PLTs where they could have been avoided, but this keeps
- * the logic significantly simpler.
- */
-static int __init module_init_limits(void)
-{
- u64 kernel_end = (u64)_end;
- u64 kernel_start = (u64)_text;
- u64 kernel_size = kernel_end - kernel_start;
-
- /*
- * The default modules region is placed immediately below the kernel
- * image, and is large enough to use the full 2G relocation range.
- */
- BUILD_BUG_ON(KIMAGE_VADDR != MODULES_END);
- BUILD_BUG_ON(MODULES_VSIZE < SZ_2G);
-
- if (!kaslr_enabled()) {
- if (kernel_size < SZ_128M)
- module_direct_base = kernel_end - SZ_128M;
- if (kernel_size < SZ_2G)
- module_plt_base = kernel_end - SZ_2G;
- } else {
- u64 min = kernel_start;
- u64 max = kernel_end;
-
- if (IS_ENABLED(CONFIG_RANDOMIZE_MODULE_REGION_FULL)) {
- pr_info("2G module region forced by RANDOMIZE_MODULE_REGION_FULL\n");
- } else {
- module_direct_base = random_bounding_box(SZ_128M, min, max);
- if (module_direct_base) {
- min = module_direct_base;
- max = module_direct_base + SZ_128M;
- }
- }
-
- module_plt_base = random_bounding_box(SZ_2G, min, max);
- }
-
- pr_info("%llu pages in range for non-PLT usage",
- module_direct_base ? (SZ_128M - kernel_size) / PAGE_SIZE : 0);
- pr_info("%llu pages in range for PLT usage",
- module_plt_base ? (SZ_2G - kernel_size) / PAGE_SIZE : 0);
-
- return 0;
-}
-subsys_initcall(module_init_limits);
-
-void *module_alloc(unsigned long size)
-{
- void *p = NULL;
-
- /*
- * Where possible, prefer to allocate within direct branch range of the
- * kernel such that no PLTs are necessary.
- */
- if (module_direct_base) {
- p = __vmalloc_node_range(size, MODULE_ALIGN,
- module_direct_base,
- module_direct_base + SZ_128M,
- GFP_KERNEL | __GFP_NOWARN,
- PAGE_KERNEL, 0, NUMA_NO_NODE,
- __builtin_return_address(0));
- }
-
- if (!p && module_plt_base) {
- p = __vmalloc_node_range(size, MODULE_ALIGN,
- module_plt_base,
- module_plt_base + SZ_2G,
- GFP_KERNEL | __GFP_NOWARN,
- PAGE_KERNEL, 0, NUMA_NO_NODE,
- __builtin_return_address(0));
- }
-
- if (!p) {
- pr_warn_ratelimited("%s: unable to allocate memory\n",
- __func__);
- }
-
- if (p && (kasan_alloc_module_shadow(p, size, GFP_KERNEL) < 0)) {
- vfree(p);
- return NULL;
- }
-
- /* Memory is intended to be executable, reset the pointer tag. */
- return kasan_reset_tag(p);
-}
+#include <asm/text-patching.h>
enum aarch64_reloc_op {
RELOC_OP_NONE,
@@ -174,7 +49,17 @@ static u64 do_reloc(enum aarch64_reloc_op reloc_op, __le32 *place, u64 val)
return 0;
}
-static int reloc_data(enum aarch64_reloc_op op, void *place, u64 val, int len)
+#define WRITE_PLACE(place, val, mod) do { \
+ __typeof__(val) __val = (val); \
+ \
+ if (mod->state == MODULE_STATE_UNFORMED) \
+ *(place) = __val; \
+ else \
+ aarch64_insn_copy(place, &(__val), sizeof(*place)); \
+} while (0)
+
+static int reloc_data(enum aarch64_reloc_op op, void *place, u64 val, int len,
+ struct module *me)
{
s64 sval = do_reloc(op, place, val);
@@ -192,7 +77,7 @@ static int reloc_data(enum aarch64_reloc_op op, void *place, u64 val, int len)
switch (len) {
case 16:
- *(s16 *)place = sval;
+ WRITE_PLACE((s16 *)place, sval, me);
switch (op) {
case RELOC_OP_ABS:
if (sval < 0 || sval > U16_MAX)
@@ -208,7 +93,7 @@ static int reloc_data(enum aarch64_reloc_op op, void *place, u64 val, int len)
}
break;
case 32:
- *(s32 *)place = sval;
+ WRITE_PLACE((s32 *)place, sval, me);
switch (op) {
case RELOC_OP_ABS:
if (sval < 0 || sval > U32_MAX)
@@ -224,7 +109,7 @@ static int reloc_data(enum aarch64_reloc_op op, void *place, u64 val, int len)
}
break;
case 64:
- *(s64 *)place = sval;
+ WRITE_PLACE((s64 *)place, sval, me);
break;
default:
pr_err("Invalid length (%d) for data relocation\n", len);
@@ -239,7 +124,8 @@ enum aarch64_insn_movw_imm_type {
};
static int reloc_insn_movw(enum aarch64_reloc_op op, __le32 *place, u64 val,
- int lsb, enum aarch64_insn_movw_imm_type imm_type)
+ int lsb, enum aarch64_insn_movw_imm_type imm_type,
+ struct module *me)
{
u64 imm;
s64 sval;
@@ -271,7 +157,7 @@ static int reloc_insn_movw(enum aarch64_reloc_op op, __le32 *place, u64 val,
/* Update the instruction with the new encoding. */
insn = aarch64_insn_encode_immediate(AARCH64_INSN_IMM_16, insn, imm);
- *place = cpu_to_le32(insn);
+ WRITE_PLACE(place, cpu_to_le32(insn), me);
if (imm > U16_MAX)
return -ERANGE;
@@ -280,7 +166,8 @@ static int reloc_insn_movw(enum aarch64_reloc_op op, __le32 *place, u64 val,
}
static int reloc_insn_imm(enum aarch64_reloc_op op, __le32 *place, u64 val,
- int lsb, int len, enum aarch64_insn_imm_type imm_type)
+ int lsb, int len, enum aarch64_insn_imm_type imm_type,
+ struct module *me)
{
u64 imm, imm_mask;
s64 sval;
@@ -296,7 +183,7 @@ static int reloc_insn_imm(enum aarch64_reloc_op op, __le32 *place, u64 val,
/* Update the instruction's immediate field. */
insn = aarch64_insn_encode_immediate(imm_type, insn, imm);
- *place = cpu_to_le32(insn);
+ WRITE_PLACE(place, cpu_to_le32(insn), me);
/*
* Extract the upper value bits (including the sign bit) and
@@ -315,17 +202,17 @@ static int reloc_insn_imm(enum aarch64_reloc_op op, __le32 *place, u64 val,
}
static int reloc_insn_adrp(struct module *mod, Elf64_Shdr *sechdrs,
- __le32 *place, u64 val)
+ __le32 *place, u64 val, struct module *me)
{
u32 insn;
if (!is_forbidden_offset_for_adrp(place))
return reloc_insn_imm(RELOC_OP_PAGE, place, val, 12, 21,
- AARCH64_INSN_IMM_ADR);
+ AARCH64_INSN_IMM_ADR, me);
/* patch ADRP to ADR if it is in range */
if (!reloc_insn_imm(RELOC_OP_PREL, place, val & ~0xfff, 0, 21,
- AARCH64_INSN_IMM_ADR)) {
+ AARCH64_INSN_IMM_ADR, me)) {
insn = le32_to_cpu(*place);
insn &= ~BIT(31);
} else {
@@ -337,7 +224,7 @@ static int reloc_insn_adrp(struct module *mod, Elf64_Shdr *sechdrs,
AARCH64_INSN_BRANCH_NOLINK);
}
- *place = cpu_to_le32(insn);
+ WRITE_PLACE(place, cpu_to_le32(insn), me);
return 0;
}
@@ -381,23 +268,23 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
/* Data relocations. */
case R_AARCH64_ABS64:
overflow_check = false;
- ovf = reloc_data(RELOC_OP_ABS, loc, val, 64);
+ ovf = reloc_data(RELOC_OP_ABS, loc, val, 64, me);
break;
case R_AARCH64_ABS32:
- ovf = reloc_data(RELOC_OP_ABS, loc, val, 32);
+ ovf = reloc_data(RELOC_OP_ABS, loc, val, 32, me);
break;
case R_AARCH64_ABS16:
- ovf = reloc_data(RELOC_OP_ABS, loc, val, 16);
+ ovf = reloc_data(RELOC_OP_ABS, loc, val, 16, me);
break;
case R_AARCH64_PREL64:
overflow_check = false;
- ovf = reloc_data(RELOC_OP_PREL, loc, val, 64);
+ ovf = reloc_data(RELOC_OP_PREL, loc, val, 64, me);
break;
case R_AARCH64_PREL32:
- ovf = reloc_data(RELOC_OP_PREL, loc, val, 32);
+ ovf = reloc_data(RELOC_OP_PREL, loc, val, 32, me);
break;
case R_AARCH64_PREL16:
- ovf = reloc_data(RELOC_OP_PREL, loc, val, 16);
+ ovf = reloc_data(RELOC_OP_PREL, loc, val, 16, me);
break;
/* MOVW instruction relocations. */
@@ -406,88 +293,88 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
fallthrough;
case R_AARCH64_MOVW_UABS_G0:
ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 0,
- AARCH64_INSN_IMM_MOVKZ);
+ AARCH64_INSN_IMM_MOVKZ, me);
break;
case R_AARCH64_MOVW_UABS_G1_NC:
overflow_check = false;
fallthrough;
case R_AARCH64_MOVW_UABS_G1:
ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 16,
- AARCH64_INSN_IMM_MOVKZ);
+ AARCH64_INSN_IMM_MOVKZ, me);
break;
case R_AARCH64_MOVW_UABS_G2_NC:
overflow_check = false;
fallthrough;
case R_AARCH64_MOVW_UABS_G2:
ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 32,
- AARCH64_INSN_IMM_MOVKZ);
+ AARCH64_INSN_IMM_MOVKZ, me);
break;
case R_AARCH64_MOVW_UABS_G3:
/* We're using the top bits so we can't overflow. */
overflow_check = false;
ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 48,
- AARCH64_INSN_IMM_MOVKZ);
+ AARCH64_INSN_IMM_MOVKZ, me);
break;
case R_AARCH64_MOVW_SABS_G0:
ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 0,
- AARCH64_INSN_IMM_MOVNZ);
+ AARCH64_INSN_IMM_MOVNZ, me);
break;
case R_AARCH64_MOVW_SABS_G1:
ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 16,
- AARCH64_INSN_IMM_MOVNZ);
+ AARCH64_INSN_IMM_MOVNZ, me);
break;
case R_AARCH64_MOVW_SABS_G2:
ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 32,
- AARCH64_INSN_IMM_MOVNZ);
+ AARCH64_INSN_IMM_MOVNZ, me);
break;
case R_AARCH64_MOVW_PREL_G0_NC:
overflow_check = false;
ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 0,
- AARCH64_INSN_IMM_MOVKZ);
+ AARCH64_INSN_IMM_MOVKZ, me);
break;
case R_AARCH64_MOVW_PREL_G0:
ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 0,
- AARCH64_INSN_IMM_MOVNZ);
+ AARCH64_INSN_IMM_MOVNZ, me);
break;
case R_AARCH64_MOVW_PREL_G1_NC:
overflow_check = false;
ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 16,
- AARCH64_INSN_IMM_MOVKZ);
+ AARCH64_INSN_IMM_MOVKZ, me);
break;
case R_AARCH64_MOVW_PREL_G1:
ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 16,
- AARCH64_INSN_IMM_MOVNZ);
+ AARCH64_INSN_IMM_MOVNZ, me);
break;
case R_AARCH64_MOVW_PREL_G2_NC:
overflow_check = false;
ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 32,
- AARCH64_INSN_IMM_MOVKZ);
+ AARCH64_INSN_IMM_MOVKZ, me);
break;
case R_AARCH64_MOVW_PREL_G2:
ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 32,
- AARCH64_INSN_IMM_MOVNZ);
+ AARCH64_INSN_IMM_MOVNZ, me);
break;
case R_AARCH64_MOVW_PREL_G3:
/* We're using the top bits so we can't overflow. */
overflow_check = false;
ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 48,
- AARCH64_INSN_IMM_MOVNZ);
+ AARCH64_INSN_IMM_MOVNZ, me);
break;
/* Immediate instruction relocations. */
case R_AARCH64_LD_PREL_LO19:
ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2, 19,
- AARCH64_INSN_IMM_19);
+ AARCH64_INSN_IMM_19, me);
break;
case R_AARCH64_ADR_PREL_LO21:
ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 0, 21,
- AARCH64_INSN_IMM_ADR);
+ AARCH64_INSN_IMM_ADR, me);
break;
case R_AARCH64_ADR_PREL_PG_HI21_NC:
overflow_check = false;
fallthrough;
case R_AARCH64_ADR_PREL_PG_HI21:
- ovf = reloc_insn_adrp(me, sechdrs, loc, val);
+ ovf = reloc_insn_adrp(me, sechdrs, loc, val, me);
if (ovf && ovf != -ERANGE)
return ovf;
break;
@@ -495,46 +382,46 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
case R_AARCH64_LDST8_ABS_LO12_NC:
overflow_check = false;
ovf = reloc_insn_imm(RELOC_OP_ABS, loc, val, 0, 12,
- AARCH64_INSN_IMM_12);
+ AARCH64_INSN_IMM_12, me);
break;
case R_AARCH64_LDST16_ABS_LO12_NC:
overflow_check = false;
ovf = reloc_insn_imm(RELOC_OP_ABS, loc, val, 1, 11,
- AARCH64_INSN_IMM_12);
+ AARCH64_INSN_IMM_12, me);
break;
case R_AARCH64_LDST32_ABS_LO12_NC:
overflow_check = false;
ovf = reloc_insn_imm(RELOC_OP_ABS, loc, val, 2, 10,
- AARCH64_INSN_IMM_12);
+ AARCH64_INSN_IMM_12, me);
break;
case R_AARCH64_LDST64_ABS_LO12_NC:
overflow_check = false;
ovf = reloc_insn_imm(RELOC_OP_ABS, loc, val, 3, 9,
- AARCH64_INSN_IMM_12);
+ AARCH64_INSN_IMM_12, me);
break;
case R_AARCH64_LDST128_ABS_LO12_NC:
overflow_check = false;
ovf = reloc_insn_imm(RELOC_OP_ABS, loc, val, 4, 8,
- AARCH64_INSN_IMM_12);
+ AARCH64_INSN_IMM_12, me);
break;
case R_AARCH64_TSTBR14:
ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2, 14,
- AARCH64_INSN_IMM_14);
+ AARCH64_INSN_IMM_14, me);
break;
case R_AARCH64_CONDBR19:
ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2, 19,
- AARCH64_INSN_IMM_19);
+ AARCH64_INSN_IMM_19, me);
break;
case R_AARCH64_JUMP26:
case R_AARCH64_CALL26:
ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2, 26,
- AARCH64_INSN_IMM_26);
+ AARCH64_INSN_IMM_26, me);
if (ovf == -ERANGE) {
val = module_emit_plt_entry(me, sechdrs, loc, &rel[i], sym);
if (!val)
return -ENOEXEC;
ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2,
- 26, AARCH64_INSN_IMM_26);
+ 26, AARCH64_INSN_IMM_26, me);
}
break;
@@ -579,6 +466,17 @@ static int module_init_ftrace_plt(const Elf_Ehdr *hdr,
__init_plt(&plts[FTRACE_PLT_IDX], FTRACE_ADDR);
mod->arch.ftrace_trampolines = plts;
+
+ s = find_section(hdr, sechdrs, ".init.text.ftrace_trampoline");
+ if (!s)
+ return -ENOEXEC;
+
+ plts = (void *)s->sh_addr;
+
+ __init_plt(&plts[FTRACE_PLT_IDX], FTRACE_ADDR);
+
+ mod->arch.init_ftrace_trampolines = plts;
+
#endif
return 0;
}
@@ -588,14 +486,33 @@ int module_finalize(const Elf_Ehdr *hdr,
struct module *me)
{
const Elf_Shdr *s;
+ int ret;
+
s = find_section(hdr, sechdrs, ".altinstructions");
- if (s)
- apply_alternatives_module((void *)s->sh_addr, s->sh_size);
+ if (s) {
+ ret = apply_alternatives_module((void *)s->sh_addr, s->sh_size);
+ if (ret < 0) {
+ pr_err("module %s: error occurred when applying alternatives\n", me->name);
+ return ret;
+ }
+ }
if (scs_is_dynamic()) {
s = find_section(hdr, sechdrs, ".init.eh_frame");
- if (s)
- scs_patch((void *)s->sh_addr, s->sh_size);
+ if (s) {
+ /*
+ * Because we can reject modules that are malformed
+ * so SCS patching fails, skip dry run and try to patch
+ * it in place. If patching fails, the module would not
+ * be loaded anyway.
+ */
+ ret = __pi_scs_patch((void *)s->sh_addr, s->sh_size, true);
+ if (ret) {
+ pr_err("module %s: error occurred during dynamic SCS patching (%d)\n",
+ me->name, ret);
+ return -ENOEXEC;
+ }
+ }
}
return module_init_ftrace_plt(hdr, sechdrs, me);
diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c
index 4edecaac8f91..32148bf09c1d 100644
--- a/arch/arm64/kernel/mte.c
+++ b/arch/arm64/kernel/mte.c
@@ -35,10 +35,27 @@ DEFINE_STATIC_KEY_FALSE(mte_async_or_asymm_mode);
EXPORT_SYMBOL_GPL(mte_async_or_asymm_mode);
#endif
-void mte_sync_tags(pte_t pte)
+void mte_sync_tags(pte_t pte, unsigned int nr_pages)
{
struct page *page = pte_page(pte);
- long i, nr_pages = compound_nr(page);
+ struct folio *folio = page_folio(page);
+ unsigned long i;
+
+ if (folio_test_hugetlb(folio)) {
+ unsigned long nr = folio_nr_pages(folio);
+
+ /* Hugetlb MTE flags are set for head page only */
+ if (folio_try_hugetlb_mte_tagging(folio)) {
+ for (i = 0; i < nr; i++, page++)
+ mte_clear_page_tags(page_address(page));
+ folio_set_hugetlb_mte_tagged(folio);
+ }
+
+ /* ensure the tags are visible before the PTE is set */
+ smp_wmb();
+
+ return;
+ }
/* if PG_mte_tagged is set, tags have already been initialised */
for (i = 0; i < nr_pages; i++, page++) {
@@ -67,7 +84,7 @@ int memcmp_pages(struct page *page1, struct page *page2)
/*
* If the page content is identical but at least one of the pages is
* tagged, return non-zero to avoid KSM merging. If only one of the
- * pages is tagged, set_pte_at() may zero or change the tags of the
+ * pages is tagged, __set_ptes() may zero or change the tags of the
* other page via mte_sync_tags().
*/
if (page_mte_tagged(page1) || page_mte_tagged(page2))
@@ -140,6 +157,24 @@ void mte_enable_kernel_asymm(void)
mte_enable_kernel_sync();
}
}
+
+int mte_enable_kernel_store_only(void)
+{
+ /*
+ * If the CPU does not support MTE store only,
+ * the kernel checks all operations.
+ */
+ if (!cpus_have_cap(ARM64_MTE_STORE_ONLY))
+ return -EINVAL;
+
+ sysreg_clear_set(sctlr_el1, SCTLR_EL1_TCSO_MASK,
+ SYS_FIELD_PREP(SCTLR_EL1, TCSO, 1));
+ isb();
+
+ pr_info_once("MTE: enabled store only mode at EL1\n");
+
+ return 0;
+}
#endif
#ifdef CONFIG_KASAN_HW_TAGS
@@ -183,7 +218,7 @@ static void mte_update_sctlr_user(struct task_struct *task)
* program requested values go with what was requested.
*/
resolved_mte_tcf = (mte_ctrl & pref) ? pref : mte_ctrl;
- sctlr &= ~SCTLR_EL1_TCF0_MASK;
+ sctlr &= ~(SCTLR_EL1_TCF0_MASK | SCTLR_EL1_TCSO0_MASK);
/*
* Pick an actual setting. The order in which we check for
* set bits and map into register values determines our
@@ -195,6 +230,10 @@ static void mte_update_sctlr_user(struct task_struct *task)
sctlr |= SYS_FIELD_PREP_ENUM(SCTLR_EL1, TCF0, ASYNC);
else if (resolved_mte_tcf & MTE_CTRL_TCF_SYNC)
sctlr |= SYS_FIELD_PREP_ENUM(SCTLR_EL1, TCF0, SYNC);
+
+ if (mte_ctrl & MTE_CTRL_STORE_ONLY)
+ sctlr |= SYS_FIELD_PREP(SCTLR_EL1, TCSO0, 1);
+
task->thread.sctlr_user = sctlr;
}
@@ -354,6 +393,9 @@ long set_mte_ctrl(struct task_struct *task, unsigned long arg)
(arg & PR_MTE_TCF_SYNC))
mte_ctrl |= MTE_CTRL_TCF_ASYMM;
+ if (arg & PR_MTE_STORE_ONLY)
+ mte_ctrl |= MTE_CTRL_STORE_ONLY;
+
task->thread.mte_ctrl = mte_ctrl;
if (task == current) {
preempt_disable();
@@ -381,6 +423,8 @@ long get_mte_ctrl(struct task_struct *task)
ret |= PR_MTE_TCF_ASYNC;
if (mte_ctrl & MTE_CTRL_TCF_SYNC)
ret |= PR_MTE_TCF_SYNC;
+ if (mte_ctrl & MTE_CTRL_STORE_ONLY)
+ ret |= PR_MTE_STORE_ONLY;
return ret;
}
@@ -410,9 +454,10 @@ static int __access_remote_tags(struct mm_struct *mm, unsigned long addr,
void *maddr;
struct page *page = get_user_page_vma_remote(mm, addr,
gup_flags, &vma);
+ struct folio *folio;
- if (IS_ERR_OR_NULL(page)) {
- err = page == NULL ? -EIO : PTR_ERR(page);
+ if (IS_ERR(page)) {
+ err = PTR_ERR(page);
break;
}
@@ -428,7 +473,13 @@ static int __access_remote_tags(struct mm_struct *mm, unsigned long addr,
put_page(page);
break;
}
- WARN_ON_ONCE(!page_mte_tagged(page));
+
+ folio = page_folio(page);
+ if (folio_test_hugetlb(folio))
+ WARN_ON_ONCE(!folio_test_hugetlb_mte_tagged(folio) &&
+ !is_huge_zero_folio(folio));
+ else
+ WARN_ON_ONCE(!page_mte_tagged(page) && !is_zero_page(page));
/* limit access to the end of the page */
offset = offset_in_page(addr);
@@ -582,12 +633,9 @@ subsys_initcall(register_mte_tcf_preferred_sysctl);
size_t mte_probe_user_range(const char __user *uaddr, size_t size)
{
const char __user *end = uaddr + size;
- int err = 0;
char val;
- __raw_get_user(val, uaddr, err);
- if (err)
- return size;
+ __raw_get_user(val, uaddr, efault);
uaddr = PTR_ALIGN(uaddr, MTE_GRANULE_SIZE);
while (uaddr < end) {
@@ -595,12 +643,13 @@ size_t mte_probe_user_range(const char __user *uaddr, size_t size)
* A read is sufficient for mte, the caller should have probed
* for the pte write permission if required.
*/
- __raw_get_user(val, uaddr, err);
- if (err)
- return end - uaddr;
+ __raw_get_user(val, uaddr, efault);
uaddr += MTE_GRANULE_SIZE;
}
(void)val;
return 0;
+
+efault:
+ return end - uaddr;
}
diff --git a/arch/arm64/kernel/patching.c b/arch/arm64/kernel/patching.c
index b4835f6d594b..1041bc67a3ee 100644
--- a/arch/arm64/kernel/patching.c
+++ b/arch/arm64/kernel/patching.c
@@ -10,7 +10,7 @@
#include <asm/fixmap.h>
#include <asm/insn.h>
#include <asm/kprobes.h>
-#include <asm/patching.h>
+#include <asm/text-patching.h>
#include <asm/sections.h>
static DEFINE_RAW_SPINLOCK(patch_lock);
@@ -30,20 +30,17 @@ static bool is_image_text(unsigned long addr)
static void __kprobes *patch_map(void *addr, int fixmap)
{
- unsigned long uintaddr = (uintptr_t) addr;
- bool image = is_image_text(uintaddr);
- struct page *page;
+ phys_addr_t phys;
- if (image)
- page = phys_to_page(__pa_symbol(addr));
- else if (IS_ENABLED(CONFIG_STRICT_MODULE_RWX))
- page = vmalloc_to_page(addr);
- else
- return addr;
+ if (is_image_text((unsigned long)addr)) {
+ phys = __pa_symbol(addr);
+ } else {
+ struct page *page = vmalloc_to_page(addr);
+ BUG_ON(!page);
+ phys = page_to_phys(page) + offset_in_page(addr);
+ }
- BUG_ON(!page);
- return (void *)set_fixmap_offset(fixmap, page_to_phys(page) +
- (uintaddr & ~PAGE_MASK));
+ return (void *)set_fixmap_offset(fixmap, phys);
}
static void __kprobes patch_unmap(int fixmap)
@@ -105,6 +102,81 @@ noinstr int aarch64_insn_write_literal_u64(void *addr, u64 val)
return ret;
}
+typedef void text_poke_f(void *dst, void *src, size_t patched, size_t len);
+
+static void *__text_poke(text_poke_f func, void *addr, void *src, size_t len)
+{
+ unsigned long flags;
+ size_t patched = 0;
+ size_t size;
+ void *waddr;
+ void *ptr;
+
+ raw_spin_lock_irqsave(&patch_lock, flags);
+
+ while (patched < len) {
+ ptr = addr + patched;
+ size = min_t(size_t, PAGE_SIZE - offset_in_page(ptr),
+ len - patched);
+
+ waddr = patch_map(ptr, FIX_TEXT_POKE0);
+ func(waddr, src, patched, size);
+ patch_unmap(FIX_TEXT_POKE0);
+
+ patched += size;
+ }
+ raw_spin_unlock_irqrestore(&patch_lock, flags);
+
+ flush_icache_range((uintptr_t)addr, (uintptr_t)addr + len);
+
+ return addr;
+}
+
+static void text_poke_memcpy(void *dst, void *src, size_t patched, size_t len)
+{
+ copy_to_kernel_nofault(dst, src + patched, len);
+}
+
+static void text_poke_memset(void *dst, void *src, size_t patched, size_t len)
+{
+ u32 c = *(u32 *)src;
+
+ memset32(dst, c, len / 4);
+}
+
+/**
+ * aarch64_insn_copy - Copy instructions into (an unused part of) RX memory
+ * @dst: address to modify
+ * @src: source of the copy
+ * @len: length to copy
+ *
+ * Useful for JITs to dump new code blocks into unused regions of RX memory.
+ */
+noinstr void *aarch64_insn_copy(void *dst, void *src, size_t len)
+{
+ /* A64 instructions must be word aligned */
+ if ((uintptr_t)dst & 0x3)
+ return NULL;
+
+ return __text_poke(text_poke_memcpy, dst, src, len);
+}
+
+/**
+ * aarch64_insn_set - memset for RX memory regions.
+ * @dst: address to modify
+ * @insn: value to set
+ * @len: length of memory region.
+ *
+ * Useful for JITs to fill regions of RX memory with illegal instructions.
+ */
+noinstr void *aarch64_insn_set(void *dst, u32 insn, size_t len)
+{
+ if ((uintptr_t)dst & 0x3)
+ return NULL;
+
+ return __text_poke(text_poke_memset, dst, &insn, len);
+}
+
int __kprobes aarch64_insn_patch_text_nosync(void *addr, u32 insn)
{
u32 *tp = addr;
diff --git a/arch/arm64/kernel/pci.c b/arch/arm64/kernel/pci.c
index f872c57e9909..fd9a7bed83ce 100644
--- a/arch/arm64/kernel/pci.c
+++ b/arch/arm64/kernel/pci.c
@@ -6,28 +6,7 @@
* Copyright (C) 2014 ARM Ltd.
*/
-#include <linux/acpi.h>
-#include <linux/init.h>
-#include <linux/io.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
#include <linux/pci.h>
-#include <linux/pci-acpi.h>
-#include <linux/pci-ecam.h>
-#include <linux/slab.h>
-
-#ifdef CONFIG_ACPI
-/*
- * Try to assign the IRQ number when probing a new device
- */
-int pcibios_alloc_irq(struct pci_dev *dev)
-{
- if (!acpi_disabled)
- acpi_pci_irq_enable(dev);
-
- return 0;
-}
-#endif
/*
* raw_pci_read/write - Platform-specific PCI config space access.
@@ -61,173 +40,3 @@ int pcibus_to_node(struct pci_bus *bus)
EXPORT_SYMBOL(pcibus_to_node);
#endif
-
-#ifdef CONFIG_ACPI
-
-struct acpi_pci_generic_root_info {
- struct acpi_pci_root_info common;
- struct pci_config_window *cfg; /* config space mapping */
-};
-
-int acpi_pci_bus_find_domain_nr(struct pci_bus *bus)
-{
- struct pci_config_window *cfg = bus->sysdata;
- struct acpi_device *adev = to_acpi_device(cfg->parent);
- struct acpi_pci_root *root = acpi_driver_data(adev);
-
- return root->segment;
-}
-
-int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge)
-{
- struct pci_config_window *cfg;
- struct acpi_device *adev;
- struct device *bus_dev;
-
- if (acpi_disabled)
- return 0;
-
- cfg = bridge->bus->sysdata;
-
- /*
- * On Hyper-V there is no corresponding ACPI device for a root bridge,
- * therefore ->parent is set as NULL by the driver. And set 'adev' as
- * NULL in this case because there is no proper ACPI device.
- */
- if (!cfg->parent)
- adev = NULL;
- else
- adev = to_acpi_device(cfg->parent);
-
- bus_dev = &bridge->bus->dev;
-
- ACPI_COMPANION_SET(&bridge->dev, adev);
- set_dev_node(bus_dev, acpi_get_node(acpi_device_handle(adev)));
-
- return 0;
-}
-
-static int pci_acpi_root_prepare_resources(struct acpi_pci_root_info *ci)
-{
- struct resource_entry *entry, *tmp;
- int status;
-
- status = acpi_pci_probe_root_resources(ci);
- resource_list_for_each_entry_safe(entry, tmp, &ci->resources) {
- if (!(entry->res->flags & IORESOURCE_WINDOW))
- resource_list_destroy_entry(entry);
- }
- return status;
-}
-
-/*
- * Lookup the bus range for the domain in MCFG, and set up config space
- * mapping.
- */
-static struct pci_config_window *
-pci_acpi_setup_ecam_mapping(struct acpi_pci_root *root)
-{
- struct device *dev = &root->device->dev;
- struct resource *bus_res = &root->secondary;
- u16 seg = root->segment;
- const struct pci_ecam_ops *ecam_ops;
- struct resource cfgres;
- struct acpi_device *adev;
- struct pci_config_window *cfg;
- int ret;
-
- ret = pci_mcfg_lookup(root, &cfgres, &ecam_ops);
- if (ret) {
- dev_err(dev, "%04x:%pR ECAM region not found\n", seg, bus_res);
- return NULL;
- }
-
- adev = acpi_resource_consumer(&cfgres);
- if (adev)
- dev_info(dev, "ECAM area %pR reserved by %s\n", &cfgres,
- dev_name(&adev->dev));
- else
- dev_warn(dev, FW_BUG "ECAM area %pR not reserved in ACPI namespace\n",
- &cfgres);
-
- cfg = pci_ecam_create(dev, &cfgres, bus_res, ecam_ops);
- if (IS_ERR(cfg)) {
- dev_err(dev, "%04x:%pR error %ld mapping ECAM\n", seg, bus_res,
- PTR_ERR(cfg));
- return NULL;
- }
-
- return cfg;
-}
-
-/* release_info: free resources allocated by init_info */
-static void pci_acpi_generic_release_info(struct acpi_pci_root_info *ci)
-{
- struct acpi_pci_generic_root_info *ri;
-
- ri = container_of(ci, struct acpi_pci_generic_root_info, common);
- pci_ecam_free(ri->cfg);
- kfree(ci->ops);
- kfree(ri);
-}
-
-/* Interface called from ACPI code to setup PCI host controller */
-struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root)
-{
- struct acpi_pci_generic_root_info *ri;
- struct pci_bus *bus, *child;
- struct acpi_pci_root_ops *root_ops;
- struct pci_host_bridge *host;
-
- ri = kzalloc(sizeof(*ri), GFP_KERNEL);
- if (!ri)
- return NULL;
-
- root_ops = kzalloc(sizeof(*root_ops), GFP_KERNEL);
- if (!root_ops) {
- kfree(ri);
- return NULL;
- }
-
- ri->cfg = pci_acpi_setup_ecam_mapping(root);
- if (!ri->cfg) {
- kfree(ri);
- kfree(root_ops);
- return NULL;
- }
-
- root_ops->release_info = pci_acpi_generic_release_info;
- root_ops->prepare_resources = pci_acpi_root_prepare_resources;
- root_ops->pci_ops = (struct pci_ops *)&ri->cfg->ops->pci_ops;
- bus = acpi_pci_root_create(root, root_ops, &ri->common, ri->cfg);
- if (!bus)
- return NULL;
-
- /* If we must preserve the resource configuration, claim now */
- host = pci_find_host_bridge(bus);
- if (host->preserve_config)
- pci_bus_claim_resources(bus);
-
- /*
- * Assign whatever was left unassigned. If we didn't claim above,
- * this will reassign everything.
- */
- pci_assign_unassigned_root_bus_resources(bus);
-
- list_for_each_entry(child, &bus->children, node)
- pcie_bus_configure_settings(child);
-
- return bus;
-}
-
-void pcibios_add_bus(struct pci_bus *bus)
-{
- acpi_pci_add_bus(bus);
-}
-
-void pcibios_remove_bus(struct pci_bus *bus)
-{
- acpi_pci_remove_bus(bus);
-}
-
-#endif
diff --git a/arch/arm64/kernel/perf_callchain.c b/arch/arm64/kernel/perf_callchain.c
index 6d157f32187b..9b7f26b128b5 100644
--- a/arch/arm64/kernel/perf_callchain.c
+++ b/arch/arm64/kernel/perf_callchain.c
@@ -10,94 +10,12 @@
#include <asm/pointer_auth.h>
-struct frame_tail {
- struct frame_tail __user *fp;
- unsigned long lr;
-} __attribute__((packed));
-
-/*
- * Get the return address for a single stackframe and return a pointer to the
- * next frame tail.
- */
-static struct frame_tail __user *
-user_backtrace(struct frame_tail __user *tail,
- struct perf_callchain_entry_ctx *entry)
-{
- struct frame_tail buftail;
- unsigned long err;
- unsigned long lr;
-
- /* Also check accessibility of one struct frame_tail beyond */
- if (!access_ok(tail, sizeof(buftail)))
- return NULL;
-
- pagefault_disable();
- err = __copy_from_user_inatomic(&buftail, tail, sizeof(buftail));
- pagefault_enable();
-
- if (err)
- return NULL;
-
- lr = ptrauth_strip_user_insn_pac(buftail.lr);
-
- perf_callchain_store(entry, lr);
-
- /*
- * Frame pointers should strictly progress back up the stack
- * (towards higher addresses).
- */
- if (tail >= buftail.fp)
- return NULL;
-
- return buftail.fp;
-}
-
-#ifdef CONFIG_COMPAT
-/*
- * The registers we're interested in are at the end of the variable
- * length saved register structure. The fp points at the end of this
- * structure so the address of this struct is:
- * (struct compat_frame_tail *)(xxx->fp)-1
- *
- * This code has been adapted from the ARM OProfile support.
- */
-struct compat_frame_tail {
- compat_uptr_t fp; /* a (struct compat_frame_tail *) in compat mode */
- u32 sp;
- u32 lr;
-} __attribute__((packed));
-
-static struct compat_frame_tail __user *
-compat_user_backtrace(struct compat_frame_tail __user *tail,
- struct perf_callchain_entry_ctx *entry)
+static bool callchain_trace(void *data, unsigned long pc)
{
- struct compat_frame_tail buftail;
- unsigned long err;
-
- /* Also check accessibility of one struct frame_tail beyond */
- if (!access_ok(tail, sizeof(buftail)))
- return NULL;
-
- pagefault_disable();
- err = __copy_from_user_inatomic(&buftail, tail, sizeof(buftail));
- pagefault_enable();
-
- if (err)
- return NULL;
-
- perf_callchain_store(entry, buftail.lr);
-
- /*
- * Frame pointers should strictly progress back up the stack
- * (towards higher addresses).
- */
- if (tail + 1 >= (struct compat_frame_tail __user *)
- compat_ptr(buftail.fp))
- return NULL;
+ struct perf_callchain_entry_ctx *entry = data;
- return (struct compat_frame_tail __user *)compat_ptr(buftail.fp) - 1;
+ return perf_callchain_store(entry, pc) == 0;
}
-#endif /* CONFIG_COMPAT */
void perf_callchain_user(struct perf_callchain_entry_ctx *entry,
struct pt_regs *regs)
@@ -107,35 +25,7 @@ void perf_callchain_user(struct perf_callchain_entry_ctx *entry,
return;
}
- perf_callchain_store(entry, regs->pc);
-
- if (!compat_user_mode(regs)) {
- /* AARCH64 mode */
- struct frame_tail __user *tail;
-
- tail = (struct frame_tail __user *)regs->regs[29];
-
- while (entry->nr < entry->max_stack &&
- tail && !((unsigned long)tail & 0x7))
- tail = user_backtrace(tail, entry);
- } else {
-#ifdef CONFIG_COMPAT
- /* AARCH32 compat mode */
- struct compat_frame_tail __user *tail;
-
- tail = (struct compat_frame_tail __user *)regs->compat_fp - 1;
-
- while ((entry->nr < entry->max_stack) &&
- tail && !((unsigned long)tail & 0x3))
- tail = compat_user_backtrace(tail, entry);
-#endif
- }
-}
-
-static bool callchain_trace(void *data, unsigned long pc)
-{
- struct perf_callchain_entry_ctx *entry = data;
- return perf_callchain_store(entry, pc) == 0;
+ arch_stack_walk_user(callchain_trace, entry, regs);
}
void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry,
@@ -148,31 +38,3 @@ void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry,
arch_stack_walk(callchain_trace, entry, current, regs);
}
-
-unsigned long perf_instruction_pointer(struct pt_regs *regs)
-{
- if (perf_guest_state())
- return perf_guest_get_ip();
-
- return instruction_pointer(regs);
-}
-
-unsigned long perf_misc_flags(struct pt_regs *regs)
-{
- unsigned int guest_state = perf_guest_state();
- int misc = 0;
-
- if (guest_state) {
- if (guest_state & PERF_GUEST_USER)
- misc |= PERF_RECORD_MISC_GUEST_USER;
- else
- misc |= PERF_RECORD_MISC_GUEST_KERNEL;
- } else {
- if (user_mode(regs))
- misc |= PERF_RECORD_MISC_USER;
- else
- misc |= PERF_RECORD_MISC_KERNEL;
- }
-
- return misc;
-}
diff --git a/arch/arm64/kernel/pi/.gitignore b/arch/arm64/kernel/pi/.gitignore
new file mode 100644
index 000000000000..efb29b663e85
--- /dev/null
+++ b/arch/arm64/kernel/pi/.gitignore
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+relacheck
diff --git a/arch/arm64/kernel/pi/Makefile b/arch/arm64/kernel/pi/Makefile
index 4c0ea3cd4ea4..be92d73c25b2 100644
--- a/arch/arm64/kernel/pi/Makefile
+++ b/arch/arm64/kernel/pi/Makefile
@@ -2,7 +2,8 @@
# Copyright 2022 Google LLC
KBUILD_CFLAGS := $(subst $(CC_FLAGS_FTRACE),,$(KBUILD_CFLAGS)) -fpie \
- -Os -DDISABLE_BRANCH_PROFILING $(DISABLE_STACKLEAK_PLUGIN) \
+ -Os -DDISABLE_BRANCH_PROFILING $(DISABLE_KSTACK_ERASE) \
+ $(DISABLE_LATENT_ENTROPY_PLUGIN) \
$(call cc-option,-mbranch-protection=none) \
-I$(srctree)/scripts/dtc/libfdt -fno-stack-protector \
-include $(srctree)/include/linux/hidden.h \
@@ -10,25 +11,34 @@ KBUILD_CFLAGS := $(subst $(CC_FLAGS_FTRACE),,$(KBUILD_CFLAGS)) -fpie \
-fno-asynchronous-unwind-tables -fno-unwind-tables \
$(call cc-option,-fno-addrsig)
+# this code may run with the MMU off so disable unaligned accesses
+CFLAGS_map_range.o += -mstrict-align
+
# remove SCS flags from all objects in this directory
KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_SCS), $(KBUILD_CFLAGS))
# disable LTO
KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_LTO), $(KBUILD_CFLAGS))
-GCOV_PROFILE := n
-KASAN_SANITIZE := n
-KCSAN_SANITIZE := n
-UBSAN_SANITIZE := n
-KCOV_INSTRUMENT := n
+hostprogs := relacheck
+
+quiet_cmd_piobjcopy = $(quiet_cmd_objcopy)
+ cmd_piobjcopy = $(cmd_objcopy) && $(obj)/relacheck $(@) $(<)
$(obj)/%.pi.o: OBJCOPYFLAGS := --prefix-symbols=__pi_ \
- --remove-section=.note.gnu.property \
- --prefix-alloc-sections=.init
-$(obj)/%.pi.o: $(obj)/%.o FORCE
- $(call if_changed,objcopy)
+ --remove-section=.note.gnu.property
+$(obj)/%.pi.o: $(obj)/%.o $(obj)/relacheck FORCE
+ $(call if_changed,piobjcopy)
+
+# ensure that all the lib- code ends up as __init code and data
+$(obj)/lib-%.pi.o: OBJCOPYFLAGS += --prefix-alloc-sections=.init
$(obj)/lib-%.o: $(srctree)/lib/%.c FORCE
$(call if_changed_rule,cc_o_c)
-obj-y := kaslr_early.pi.o lib-fdt.pi.o lib-fdt_ro.pi.o
-extra-y := $(patsubst %.pi.o,%.o,$(obj-y))
+obj-y := idreg-override.pi.o \
+ map_kernel.pi.o map_range.pi.o \
+ lib-fdt.pi.o lib-fdt_ro.pi.o
+obj-$(CONFIG_RELOCATABLE) += relocate.pi.o
+obj-$(CONFIG_RANDOMIZE_BASE) += kaslr_early.pi.o
+obj-$(CONFIG_UNWIND_PATCH_PAC_INTO_SCS) += patch-scs.pi.o
+targets := $(patsubst %.pi.o,%.o,$(obj-y))
diff --git a/arch/arm64/kernel/pi/idreg-override.c b/arch/arm64/kernel/pi/idreg-override.c
new file mode 100644
index 000000000000..bc57b290e5e7
--- /dev/null
+++ b/arch/arm64/kernel/pi/idreg-override.c
@@ -0,0 +1,427 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Early cpufeature override framework
+ *
+ * Copyright (C) 2020 Google LLC
+ * Author: Marc Zyngier <maz@kernel.org>
+ */
+
+#include <linux/ctype.h>
+#include <linux/kernel.h>
+#include <linux/libfdt.h>
+
+#include <asm/cacheflush.h>
+#include <asm/cpufeature.h>
+#include <asm/setup.h>
+
+#include "pi.h"
+
+#define FTR_DESC_NAME_LEN 20
+#define FTR_DESC_FIELD_LEN 10
+#define FTR_ALIAS_NAME_LEN 30
+#define FTR_ALIAS_OPTION_LEN 116
+
+static u64 __boot_status __initdata;
+
+typedef bool filter_t(u64 val);
+
+struct ftr_set_desc {
+ char name[FTR_DESC_NAME_LEN];
+ PREL64(struct arm64_ftr_override, override);
+ struct {
+ char name[FTR_DESC_FIELD_LEN];
+ u8 shift;
+ u8 width;
+ PREL64(filter_t, filter);
+ } fields[];
+};
+
+#define FIELD(n, s, f) { .name = n, .shift = s, .width = 4, .filter = f }
+
+static const struct ftr_set_desc mmfr0 __prel64_initconst = {
+ .name = "id_aa64mmfr0",
+ .override = &id_aa64mmfr0_override,
+ .fields = {
+ FIELD("ecv", ID_AA64MMFR0_EL1_ECV_SHIFT, NULL),
+ {}
+ },
+};
+
+static bool __init mmfr1_vh_filter(u64 val)
+{
+ /*
+ * If we ever reach this point while running VHE, we're
+ * guaranteed to be on one of these funky, VHE-stuck CPUs. If
+ * the user was trying to force nVHE on us, proceed with
+ * attitude adjustment.
+ */
+ return !(__boot_status == (BOOT_CPU_FLAG_E2H | BOOT_CPU_MODE_EL2) &&
+ val == 0);
+}
+
+static const struct ftr_set_desc mmfr1 __prel64_initconst = {
+ .name = "id_aa64mmfr1",
+ .override = &id_aa64mmfr1_override,
+ .fields = {
+ FIELD("vh", ID_AA64MMFR1_EL1_VH_SHIFT, mmfr1_vh_filter),
+ {}
+ },
+};
+
+
+static bool __init mmfr2_varange_filter(u64 val)
+{
+ int __maybe_unused feat;
+
+ if (val)
+ return false;
+
+#ifdef CONFIG_ARM64_LPA2
+ feat = cpuid_feature_extract_signed_field(read_sysreg(id_aa64mmfr0_el1),
+ ID_AA64MMFR0_EL1_TGRAN_SHIFT);
+ if (feat >= ID_AA64MMFR0_EL1_TGRAN_LPA2) {
+ id_aa64mmfr0_override.val |=
+ (ID_AA64MMFR0_EL1_TGRAN_LPA2 - 1) << ID_AA64MMFR0_EL1_TGRAN_SHIFT;
+ id_aa64mmfr0_override.mask |= 0xfU << ID_AA64MMFR0_EL1_TGRAN_SHIFT;
+
+ /*
+ * Override PARange to 48 bits - the override will just be
+ * ignored if the actual PARange is smaller, but this is
+ * unlikely to be the case for LPA2 capable silicon.
+ */
+ id_aa64mmfr0_override.val |=
+ ID_AA64MMFR0_EL1_PARANGE_48 << ID_AA64MMFR0_EL1_PARANGE_SHIFT;
+ id_aa64mmfr0_override.mask |= 0xfU << ID_AA64MMFR0_EL1_PARANGE_SHIFT;
+ }
+#endif
+ return true;
+}
+
+static const struct ftr_set_desc mmfr2 __prel64_initconst = {
+ .name = "id_aa64mmfr2",
+ .override = &id_aa64mmfr2_override,
+ .fields = {
+ FIELD("varange", ID_AA64MMFR2_EL1_VARange_SHIFT, mmfr2_varange_filter),
+ {}
+ },
+};
+
+static bool __init pfr0_sve_filter(u64 val)
+{
+ /*
+ * Disabling SVE also means disabling all the features that
+ * are associated with it. The easiest way to do it is just to
+ * override id_aa64zfr0_el1 to be 0.
+ */
+ if (!val) {
+ id_aa64zfr0_override.val = 0;
+ id_aa64zfr0_override.mask = GENMASK(63, 0);
+ }
+
+ return true;
+}
+
+static const struct ftr_set_desc pfr0 __prel64_initconst = {
+ .name = "id_aa64pfr0",
+ .override = &id_aa64pfr0_override,
+ .fields = {
+ FIELD("sve", ID_AA64PFR0_EL1_SVE_SHIFT, pfr0_sve_filter),
+ FIELD("el0", ID_AA64PFR0_EL1_EL0_SHIFT, NULL),
+ FIELD("mpam", ID_AA64PFR0_EL1_MPAM_SHIFT, NULL),
+ {}
+ },
+};
+
+static bool __init pfr1_sme_filter(u64 val)
+{
+ /*
+ * Similarly to SVE, disabling SME also means disabling all
+ * the features that are associated with it. Just set
+ * id_aa64smfr0_el1 to 0 and don't look back.
+ */
+ if (!val) {
+ id_aa64smfr0_override.val = 0;
+ id_aa64smfr0_override.mask = GENMASK(63, 0);
+ }
+
+ return true;
+}
+
+static const struct ftr_set_desc pfr1 __prel64_initconst = {
+ .name = "id_aa64pfr1",
+ .override = &id_aa64pfr1_override,
+ .fields = {
+ FIELD("bt", ID_AA64PFR1_EL1_BT_SHIFT, NULL ),
+ FIELD("gcs", ID_AA64PFR1_EL1_GCS_SHIFT, NULL),
+ FIELD("mte", ID_AA64PFR1_EL1_MTE_SHIFT, NULL),
+ FIELD("sme", ID_AA64PFR1_EL1_SME_SHIFT, pfr1_sme_filter),
+ FIELD("mpam_frac", ID_AA64PFR1_EL1_MPAM_frac_SHIFT, NULL),
+ {}
+ },
+};
+
+static const struct ftr_set_desc isar1 __prel64_initconst = {
+ .name = "id_aa64isar1",
+ .override = &id_aa64isar1_override,
+ .fields = {
+ FIELD("gpi", ID_AA64ISAR1_EL1_GPI_SHIFT, NULL),
+ FIELD("gpa", ID_AA64ISAR1_EL1_GPA_SHIFT, NULL),
+ FIELD("api", ID_AA64ISAR1_EL1_API_SHIFT, NULL),
+ FIELD("apa", ID_AA64ISAR1_EL1_APA_SHIFT, NULL),
+ {}
+ },
+};
+
+static const struct ftr_set_desc isar2 __prel64_initconst = {
+ .name = "id_aa64isar2",
+ .override = &id_aa64isar2_override,
+ .fields = {
+ FIELD("gpa3", ID_AA64ISAR2_EL1_GPA3_SHIFT, NULL),
+ FIELD("apa3", ID_AA64ISAR2_EL1_APA3_SHIFT, NULL),
+ FIELD("mops", ID_AA64ISAR2_EL1_MOPS_SHIFT, NULL),
+ {}
+ },
+};
+
+static const struct ftr_set_desc smfr0 __prel64_initconst = {
+ .name = "id_aa64smfr0",
+ .override = &id_aa64smfr0_override,
+ .fields = {
+ FIELD("smever", ID_AA64SMFR0_EL1_SMEver_SHIFT, NULL),
+ /* FA64 is a one bit field... :-/ */
+ { "fa64", ID_AA64SMFR0_EL1_FA64_SHIFT, 1, },
+ {}
+ },
+};
+
+static bool __init hvhe_filter(u64 val)
+{
+ u64 mmfr1 = read_sysreg(id_aa64mmfr1_el1);
+
+ return (val == 1 &&
+ lower_32_bits(__boot_status) == BOOT_CPU_MODE_EL2 &&
+ cpuid_feature_extract_unsigned_field(mmfr1,
+ ID_AA64MMFR1_EL1_VH_SHIFT));
+}
+
+static const struct ftr_set_desc sw_features __prel64_initconst = {
+ .name = "arm64_sw",
+ .override = &arm64_sw_feature_override,
+ .fields = {
+ FIELD("nokaslr", ARM64_SW_FEATURE_OVERRIDE_NOKASLR, NULL),
+ FIELD("hvhe", ARM64_SW_FEATURE_OVERRIDE_HVHE, hvhe_filter),
+ FIELD("rodataoff", ARM64_SW_FEATURE_OVERRIDE_RODATA_OFF, NULL),
+ {}
+ },
+};
+
+static const
+PREL64(const struct ftr_set_desc, reg) regs[] __prel64_initconst = {
+ { &mmfr0 },
+ { &mmfr1 },
+ { &mmfr2 },
+ { &pfr0 },
+ { &pfr1 },
+ { &isar1 },
+ { &isar2 },
+ { &smfr0 },
+ { &sw_features },
+};
+
+static const struct {
+ char alias[FTR_ALIAS_NAME_LEN];
+ char feature[FTR_ALIAS_OPTION_LEN];
+} aliases[] __initconst = {
+ { "kvm_arm.mode=nvhe", "arm64_sw.hvhe=0 id_aa64mmfr1.vh=0" },
+ { "kvm_arm.mode=protected", "arm64_sw.hvhe=1" },
+ { "arm64.nosve", "id_aa64pfr0.sve=0" },
+ { "arm64.nosme", "id_aa64pfr1.sme=0" },
+ { "arm64.nobti", "id_aa64pfr1.bt=0" },
+ { "arm64.nogcs", "id_aa64pfr1.gcs=0" },
+ { "arm64.nopauth",
+ "id_aa64isar1.gpi=0 id_aa64isar1.gpa=0 "
+ "id_aa64isar1.api=0 id_aa64isar1.apa=0 "
+ "id_aa64isar2.gpa3=0 id_aa64isar2.apa3=0" },
+ { "arm64.nomops", "id_aa64isar2.mops=0" },
+ { "arm64.nomte", "id_aa64pfr1.mte=0" },
+ { "nokaslr", "arm64_sw.nokaslr=1" },
+ { "rodata=off", "arm64_sw.rodataoff=1" },
+ { "arm64.nolva", "id_aa64mmfr2.varange=0" },
+ { "arm64.no32bit_el0", "id_aa64pfr0.el0=1" },
+ { "arm64.nompam", "id_aa64pfr0.mpam=0 id_aa64pfr1.mpam_frac=0" },
+};
+
+static int __init parse_hexdigit(const char *p, u64 *v)
+{
+ // skip "0x" if it comes next
+ if (p[0] == '0' && tolower(p[1]) == 'x')
+ p += 2;
+
+ // check whether the RHS is a single hex digit
+ if (!isxdigit(p[0]) || (p[1] && !isspace(p[1])))
+ return -EINVAL;
+
+ *v = tolower(*p) - (isdigit(*p) ? '0' : 'a' - 10);
+ return 0;
+}
+
+static int __init find_field(const char *cmdline, char *opt, int len,
+ const struct ftr_set_desc *reg, int f, u64 *v)
+{
+ int flen = strlen(reg->fields[f].name);
+
+ // append '<fieldname>=' to obtain '<name>.<fieldname>='
+ memcpy(opt + len, reg->fields[f].name, flen);
+ len += flen;
+ opt[len++] = '=';
+
+ if (memcmp(cmdline, opt, len))
+ return -1;
+
+ return parse_hexdigit(cmdline + len, v);
+}
+
+static void __init match_options(const char *cmdline)
+{
+ char opt[FTR_DESC_NAME_LEN + FTR_DESC_FIELD_LEN + 2];
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(regs); i++) {
+ const struct ftr_set_desc *reg = prel64_pointer(regs[i].reg);
+ struct arm64_ftr_override *override;
+ int len = strlen(reg->name);
+ int f;
+
+ override = prel64_pointer(reg->override);
+
+ // set opt[] to '<name>.'
+ memcpy(opt, reg->name, len);
+ opt[len++] = '.';
+
+ for (f = 0; reg->fields[f].name[0] != '\0'; f++) {
+ u64 shift = reg->fields[f].shift;
+ u64 width = reg->fields[f].width ?: 4;
+ u64 mask = GENMASK_ULL(shift + width - 1, shift);
+ bool (*filter)(u64 val);
+ u64 v;
+
+ if (find_field(cmdline, opt, len, reg, f, &v))
+ continue;
+
+ /*
+ * If an override gets filtered out, advertise
+ * it by setting the value to the all-ones while
+ * clearing the mask... Yes, this is fragile.
+ */
+ filter = prel64_pointer(reg->fields[f].filter);
+ if (filter && !filter(v)) {
+ override->val |= mask;
+ override->mask &= ~mask;
+ continue;
+ }
+
+ override->val &= ~mask;
+ override->val |= (v << shift) & mask;
+ override->mask |= mask;
+
+ return;
+ }
+ }
+}
+
+static __init void __parse_cmdline(const char *cmdline, bool parse_aliases)
+{
+ do {
+ char buf[256];
+ size_t len;
+ int i;
+
+ cmdline = skip_spaces(cmdline);
+
+ /* terminate on "--" appearing on the command line by itself */
+ if (cmdline[0] == '-' && cmdline[1] == '-' && isspace(cmdline[2]))
+ return;
+
+ for (len = 0; cmdline[len] && !isspace(cmdline[len]); len++) {
+ if (len >= sizeof(buf) - 1)
+ break;
+ if (cmdline[len] == '-')
+ buf[len] = '_';
+ else
+ buf[len] = cmdline[len];
+ }
+ if (!len)
+ return;
+
+ buf[len] = 0;
+
+ cmdline += len;
+
+ match_options(buf);
+
+ for (i = 0; parse_aliases && i < ARRAY_SIZE(aliases); i++)
+ if (!memcmp(buf, aliases[i].alias, len + 1))
+ __parse_cmdline(aliases[i].feature, false);
+ } while (1);
+}
+
+static __init const u8 *get_bootargs_cmdline(const void *fdt, int node)
+{
+ static char const bootargs[] __initconst = "bootargs";
+ const u8 *prop;
+
+ if (node < 0)
+ return NULL;
+
+ prop = fdt_getprop(fdt, node, bootargs, NULL);
+ if (!prop)
+ return NULL;
+
+ return strlen(prop) ? prop : NULL;
+}
+
+static __init void parse_cmdline(const void *fdt, int chosen)
+{
+ static char const cmdline[] __initconst = CONFIG_CMDLINE;
+ const u8 *prop = get_bootargs_cmdline(fdt, chosen);
+
+ if (IS_ENABLED(CONFIG_CMDLINE_FORCE) || !prop)
+ __parse_cmdline(cmdline, true);
+
+ if (!IS_ENABLED(CONFIG_CMDLINE_FORCE) && prop)
+ __parse_cmdline(prop, true);
+}
+
+void __init init_feature_override(u64 boot_status, const void *fdt,
+ int chosen)
+{
+ struct arm64_ftr_override *override;
+ const struct ftr_set_desc *reg;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(regs); i++) {
+ reg = prel64_pointer(regs[i].reg);
+ override = prel64_pointer(reg->override);
+
+ override->val = 0;
+ override->mask = 0;
+ }
+
+ __boot_status = boot_status;
+
+ parse_cmdline(fdt, chosen);
+
+ for (i = 0; i < ARRAY_SIZE(regs); i++) {
+ reg = prel64_pointer(regs[i].reg);
+ override = prel64_pointer(reg->override);
+ dcache_clean_inval_poc((unsigned long)override,
+ (unsigned long)(override + 1));
+ }
+}
+
+char * __init skip_spaces(const char *str)
+{
+ while (isspace(*str))
+ ++str;
+ return (char *)str;
+}
diff --git a/arch/arm64/kernel/pi/kaslr_early.c b/arch/arm64/kernel/pi/kaslr_early.c
index 17bff6e399e4..e0e018046a46 100644
--- a/arch/arm64/kernel/pi/kaslr_early.c
+++ b/arch/arm64/kernel/pi/kaslr_early.c
@@ -14,69 +14,21 @@
#include <asm/archrandom.h>
#include <asm/memory.h>
+#include <asm/pgtable.h>
-/* taken from lib/string.c */
-static char *__strstr(const char *s1, const char *s2)
-{
- size_t l1, l2;
-
- l2 = strlen(s2);
- if (!l2)
- return (char *)s1;
- l1 = strlen(s1);
- while (l1 >= l2) {
- l1--;
- if (!memcmp(s1, s2, l2))
- return (char *)s1;
- s1++;
- }
- return NULL;
-}
-static bool cmdline_contains_nokaslr(const u8 *cmdline)
-{
- const u8 *str;
-
- str = __strstr(cmdline, "nokaslr");
- return str == cmdline || (str > cmdline && *(str - 1) == ' ');
-}
-
-static bool is_kaslr_disabled_cmdline(void *fdt)
-{
- if (!IS_ENABLED(CONFIG_CMDLINE_FORCE)) {
- int node;
- const u8 *prop;
-
- node = fdt_path_offset(fdt, "/chosen");
- if (node < 0)
- goto out;
-
- prop = fdt_getprop(fdt, node, "bootargs", NULL);
- if (!prop)
- goto out;
-
- if (cmdline_contains_nokaslr(prop))
- return true;
-
- if (IS_ENABLED(CONFIG_CMDLINE_EXTEND))
- goto out;
-
- return false;
- }
-out:
- return cmdline_contains_nokaslr(CONFIG_CMDLINE);
-}
+#include "pi.h"
-static u64 get_kaslr_seed(void *fdt)
+static u64 __init get_kaslr_seed(void *fdt, int node)
{
- int node, len;
+ static char const seed_str[] __initconst = "kaslr-seed";
fdt64_t *prop;
u64 ret;
+ int len;
- node = fdt_path_offset(fdt, "/chosen");
if (node < 0)
return 0;
- prop = fdt_getprop_w(fdt, node, "kaslr-seed", &len);
+ prop = fdt_getprop_w(fdt, node, seed_str, &len);
if (!prop || len != sizeof(u64))
return 0;
@@ -85,14 +37,14 @@ static u64 get_kaslr_seed(void *fdt)
return ret;
}
-asmlinkage u64 kaslr_early_init(void *fdt)
+u64 __init kaslr_early_init(void *fdt, int chosen)
{
- u64 seed;
+ u64 seed, range;
- if (is_kaslr_disabled_cmdline(fdt))
+ if (kaslr_disabled_cmdline())
return 0;
- seed = get_kaslr_seed(fdt);
+ seed = get_kaslr_seed(fdt, chosen);
if (!seed) {
if (!__early_cpu_has_rndr() ||
!__arm64_rndr((unsigned long *)&seed))
@@ -102,9 +54,9 @@ asmlinkage u64 kaslr_early_init(void *fdt)
/*
* OK, so we are proceeding with KASLR enabled. Calculate a suitable
* kernel image offset from the seed. Let's place the kernel in the
- * middle half of the VMALLOC area (VA_BITS_MIN - 2), and stay clear of
- * the lower and upper quarters to avoid colliding with other
- * allocations.
+ * 'middle' half of the VMALLOC area, and stay clear of the lower and
+ * upper quarters to avoid colliding with other allocations.
*/
- return BIT(VA_BITS_MIN - 3) + (seed & GENMASK(VA_BITS_MIN - 3, 0));
+ range = (VMALLOC_END - KIMAGE_VADDR) / 2;
+ return range / 2 + (((__uint128_t)range * seed) >> 64);
}
diff --git a/arch/arm64/kernel/pi/map_kernel.c b/arch/arm64/kernel/pi/map_kernel.c
new file mode 100644
index 000000000000..a852264958c3
--- /dev/null
+++ b/arch/arm64/kernel/pi/map_kernel.c
@@ -0,0 +1,289 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright 2023 Google LLC
+// Author: Ard Biesheuvel <ardb@google.com>
+
+#include <linux/init.h>
+#include <linux/libfdt.h>
+#include <linux/linkage.h>
+#include <linux/types.h>
+#include <linux/sizes.h>
+#include <linux/string.h>
+
+#include <asm/memory.h>
+#include <asm/pgalloc.h>
+#include <asm/pgtable.h>
+#include <asm/tlbflush.h>
+
+#include "pi.h"
+
+extern const u8 __eh_frame_start[], __eh_frame_end[];
+
+extern void idmap_cpu_replace_ttbr1(phys_addr_t pgdir);
+
+static void __init map_segment(pgd_t *pg_dir, phys_addr_t *pgd, u64 va_offset,
+ void *start, void *end, pgprot_t prot,
+ bool may_use_cont, int root_level)
+{
+ map_range(pgd, ((u64)start + va_offset) & ~PAGE_OFFSET,
+ ((u64)end + va_offset) & ~PAGE_OFFSET, (u64)start,
+ prot, root_level, (pte_t *)pg_dir, may_use_cont, 0);
+}
+
+static void __init unmap_segment(pgd_t *pg_dir, u64 va_offset, void *start,
+ void *end, int root_level)
+{
+ map_segment(pg_dir, NULL, va_offset, start, end, __pgprot(0),
+ false, root_level);
+}
+
+static void __init map_kernel(u64 kaslr_offset, u64 va_offset, int root_level)
+{
+ bool enable_scs = IS_ENABLED(CONFIG_UNWIND_PATCH_PAC_INTO_SCS);
+ bool twopass = IS_ENABLED(CONFIG_RELOCATABLE);
+ phys_addr_t pgdp = (phys_addr_t)init_pg_dir + PAGE_SIZE;
+ pgprot_t text_prot = PAGE_KERNEL_ROX;
+ pgprot_t data_prot = PAGE_KERNEL;
+ pgprot_t prot;
+
+ /*
+ * External debuggers may need to write directly to the text mapping to
+ * install SW breakpoints. Allow this (only) when explicitly requested
+ * with rodata=off.
+ */
+ if (arm64_test_sw_feature_override(ARM64_SW_FEATURE_OVERRIDE_RODATA_OFF))
+ text_prot = PAGE_KERNEL_EXEC;
+
+ /*
+ * We only enable the shadow call stack dynamically if we are running
+ * on a system that does not implement PAC or BTI. PAC and SCS provide
+ * roughly the same level of protection, and BTI relies on the PACIASP
+ * instructions serving as landing pads, preventing us from patching
+ * those instructions into something else.
+ */
+ if (IS_ENABLED(CONFIG_ARM64_PTR_AUTH_KERNEL) && cpu_has_pac())
+ enable_scs = false;
+
+ if (IS_ENABLED(CONFIG_ARM64_BTI_KERNEL) && cpu_has_bti()) {
+ enable_scs = false;
+
+ /*
+ * If we have a CPU that supports BTI and a kernel built for
+ * BTI then mark the kernel executable text as guarded pages
+ * now so we don't have to rewrite the page tables later.
+ */
+ text_prot = __pgprot_modify(text_prot, PTE_GP, PTE_GP);
+ }
+
+ /* Map all code read-write on the first pass if needed */
+ twopass |= enable_scs;
+ prot = twopass ? data_prot : text_prot;
+
+ /*
+ * [_stext, _text) isn't executed after boot and contains some
+ * non-executable, unpredictable data, so map it non-executable.
+ */
+ map_segment(init_pg_dir, &pgdp, va_offset, _text, _stext, data_prot,
+ false, root_level);
+ map_segment(init_pg_dir, &pgdp, va_offset, _stext, _etext, prot,
+ !twopass, root_level);
+ map_segment(init_pg_dir, &pgdp, va_offset, __start_rodata,
+ __inittext_begin, data_prot, false, root_level);
+ map_segment(init_pg_dir, &pgdp, va_offset, __inittext_begin,
+ __inittext_end, prot, false, root_level);
+ map_segment(init_pg_dir, &pgdp, va_offset, __initdata_begin,
+ __initdata_end, data_prot, false, root_level);
+ map_segment(init_pg_dir, &pgdp, va_offset, _data, _end, data_prot,
+ true, root_level);
+ dsb(ishst);
+
+ idmap_cpu_replace_ttbr1((phys_addr_t)init_pg_dir);
+
+ if (twopass) {
+ if (IS_ENABLED(CONFIG_RELOCATABLE))
+ relocate_kernel(kaslr_offset);
+
+ if (enable_scs) {
+ scs_patch(__eh_frame_start + va_offset,
+ __eh_frame_end - __eh_frame_start, false);
+ asm("ic ialluis");
+
+ dynamic_scs_is_enabled = true;
+ }
+
+ /*
+ * Unmap the text region before remapping it, to avoid
+ * potential TLB conflicts when creating the contiguous
+ * descriptors.
+ */
+ unmap_segment(init_pg_dir, va_offset, _stext, _etext,
+ root_level);
+ dsb(ishst);
+ isb();
+ __tlbi(vmalle1);
+ isb();
+
+ /*
+ * Remap these segments with different permissions
+ * No new page table allocations should be needed
+ */
+ map_segment(init_pg_dir, NULL, va_offset, _stext, _etext,
+ text_prot, true, root_level);
+ map_segment(init_pg_dir, NULL, va_offset, __inittext_begin,
+ __inittext_end, text_prot, false, root_level);
+ }
+
+ /* Copy the root page table to its final location */
+ memcpy((void *)swapper_pg_dir + va_offset, init_pg_dir, PAGE_SIZE);
+ dsb(ishst);
+ idmap_cpu_replace_ttbr1((phys_addr_t)swapper_pg_dir);
+}
+
+static void noinline __section(".idmap.text") set_ttbr0_for_lpa2(phys_addr_t ttbr)
+{
+ u64 sctlr = read_sysreg(sctlr_el1);
+ u64 tcr = read_sysreg(tcr_el1) | TCR_EL1_DS;
+ u64 mmfr0 = read_sysreg(id_aa64mmfr0_el1);
+ u64 parange = cpuid_feature_extract_unsigned_field(mmfr0,
+ ID_AA64MMFR0_EL1_PARANGE_SHIFT);
+
+ tcr &= ~TCR_EL1_IPS_MASK;
+ tcr |= parange << TCR_EL1_IPS_SHIFT;
+
+ asm(" msr sctlr_el1, %0 ;"
+ " isb ;"
+ " msr ttbr0_el1, %1 ;"
+ " msr tcr_el1, %2 ;"
+ " isb ;"
+ " tlbi vmalle1 ;"
+ " dsb nsh ;"
+ " isb ;"
+ " msr sctlr_el1, %3 ;"
+ " isb ;"
+ :: "r"(sctlr & ~SCTLR_ELx_M), "r"(ttbr), "r"(tcr), "r"(sctlr));
+}
+
+static void __init remap_idmap_for_lpa2(void)
+{
+ /* clear the bits that change meaning once LPA2 is turned on */
+ ptdesc_t mask = PTE_SHARED;
+
+ /*
+ * We have to clear bits [9:8] in all block or page descriptors in the
+ * initial ID map, as otherwise they will be (mis)interpreted as
+ * physical address bits once we flick the LPA2 switch (TCR.DS). Since
+ * we cannot manipulate live descriptors in that way without creating
+ * potential TLB conflicts, let's create another temporary ID map in a
+ * LPA2 compatible fashion, and update the initial ID map while running
+ * from that.
+ */
+ create_init_idmap(init_pg_dir, mask);
+ dsb(ishst);
+ set_ttbr0_for_lpa2((phys_addr_t)init_pg_dir);
+
+ /*
+ * Recreate the initial ID map with the same granularity as before.
+ * Don't bother with the FDT, we no longer need it after this.
+ */
+ memset(init_idmap_pg_dir, 0,
+ (char *)init_idmap_pg_end - (char *)init_idmap_pg_dir);
+
+ create_init_idmap(init_idmap_pg_dir, mask);
+ dsb(ishst);
+
+ /* switch back to the updated initial ID map */
+ set_ttbr0_for_lpa2((phys_addr_t)init_idmap_pg_dir);
+
+ /* wipe the temporary ID map from memory */
+ memset(init_pg_dir, 0, (char *)init_pg_end - (char *)init_pg_dir);
+}
+
+static void *__init map_fdt(phys_addr_t fdt)
+{
+ static u8 ptes[INIT_IDMAP_FDT_SIZE] __initdata __aligned(PAGE_SIZE);
+ phys_addr_t efdt = fdt + MAX_FDT_SIZE;
+ phys_addr_t ptep = (phys_addr_t)ptes; /* We're idmapped when called */
+
+ /*
+ * Map up to MAX_FDT_SIZE bytes, but avoid overlap with
+ * the kernel image.
+ */
+ map_range(&ptep, fdt, (u64)_text > fdt ? min((u64)_text, efdt) : efdt,
+ fdt, PAGE_KERNEL, IDMAP_ROOT_LEVEL,
+ (pte_t *)init_idmap_pg_dir, false, 0);
+ dsb(ishst);
+
+ return (void *)fdt;
+}
+
+/*
+ * PI version of the Cavium Eratum 27456 detection, which makes it
+ * impossible to use non-global mappings.
+ */
+static bool __init ng_mappings_allowed(void)
+{
+ static const struct midr_range cavium_erratum_27456_cpus[] __initconst = {
+ /* Cavium ThunderX, T88 pass 1.x - 2.1 */
+ MIDR_RANGE(MIDR_THUNDERX, 0, 0, 1, 1),
+ /* Cavium ThunderX, T81 pass 1.0 */
+ MIDR_REV(MIDR_THUNDERX_81XX, 0, 0),
+ {},
+ };
+
+ for (const struct midr_range *r = cavium_erratum_27456_cpus; r->model; r++) {
+ if (midr_is_cpu_model_range(read_cpuid_id(), r->model,
+ r->rv_min, r->rv_max))
+ return false;
+ }
+
+ return true;
+}
+
+asmlinkage void __init early_map_kernel(u64 boot_status, phys_addr_t fdt)
+{
+ static char const chosen_str[] __initconst = "/chosen";
+ u64 va_base, pa_base = (u64)&_text;
+ u64 kaslr_offset = pa_base % MIN_KIMG_ALIGN;
+ int root_level = 4 - CONFIG_PGTABLE_LEVELS;
+ int va_bits = VA_BITS;
+ int chosen;
+ void *fdt_mapped = map_fdt(fdt);
+
+ /* Clear BSS and the initial page tables */
+ memset(__bss_start, 0, (char *)init_pg_end - (char *)__bss_start);
+
+ /* Parse the command line for CPU feature overrides */
+ chosen = fdt_path_offset(fdt_mapped, chosen_str);
+ init_feature_override(boot_status, fdt_mapped, chosen);
+
+ if (IS_ENABLED(CONFIG_ARM64_64K_PAGES) && !cpu_has_lva()) {
+ va_bits = VA_BITS_MIN;
+ } else if (IS_ENABLED(CONFIG_ARM64_LPA2) && !cpu_has_lpa2()) {
+ va_bits = VA_BITS_MIN;
+ root_level++;
+ }
+
+ if (va_bits > VA_BITS_MIN)
+ sysreg_clear_set(tcr_el1, TCR_EL1_T1SZ_MASK, TCR_T1SZ(va_bits));
+
+ /*
+ * The virtual KASLR displacement modulo 2MiB is decided by the
+ * physical placement of the image, as otherwise, we might not be able
+ * to create the early kernel mapping using 2 MiB block descriptors. So
+ * take the low bits of the KASLR offset from the physical address, and
+ * fill in the high bits from the seed.
+ */
+ if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) {
+ u64 kaslr_seed = kaslr_early_init(fdt_mapped, chosen);
+
+ if (kaslr_seed && kaslr_requires_kpti())
+ arm64_use_ng_mappings = ng_mappings_allowed();
+
+ kaslr_offset |= kaslr_seed & ~(MIN_KIMG_ALIGN - 1);
+ }
+
+ if (IS_ENABLED(CONFIG_ARM64_LPA2) && va_bits > VA_BITS_MIN)
+ remap_idmap_for_lpa2();
+
+ va_base = KIMAGE_VADDR + kaslr_offset;
+ map_kernel(kaslr_offset, va_base - pa_base, root_level);
+}
diff --git a/arch/arm64/kernel/pi/map_range.c b/arch/arm64/kernel/pi/map_range.c
new file mode 100644
index 000000000000..de52cd85c691
--- /dev/null
+++ b/arch/arm64/kernel/pi/map_range.c
@@ -0,0 +1,109 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright 2023 Google LLC
+// Author: Ard Biesheuvel <ardb@google.com>
+
+#include <linux/types.h>
+#include <linux/sizes.h>
+
+#include <asm/memory.h>
+#include <asm/pgalloc.h>
+#include <asm/pgtable.h>
+
+#include "pi.h"
+
+/**
+ * map_range - Map a contiguous range of physical pages into virtual memory
+ *
+ * @pte: Address of physical pointer to array of pages to
+ * allocate page tables from
+ * @start: Virtual address of the start of the range
+ * @end: Virtual address of the end of the range (exclusive)
+ * @pa: Physical address of the start of the range
+ * @prot: Access permissions of the range
+ * @level: Translation level for the mapping
+ * @tbl: The level @level page table to create the mappings in
+ * @may_use_cont: Whether the use of the contiguous attribute is allowed
+ * @va_offset: Offset between a physical page and its current mapping
+ * in the VA space
+ */
+void __init map_range(phys_addr_t *pte, u64 start, u64 end, phys_addr_t pa,
+ pgprot_t prot, int level, pte_t *tbl, bool may_use_cont,
+ u64 va_offset)
+{
+ u64 cmask = (level == 3) ? CONT_PTE_SIZE - 1 : U64_MAX;
+ ptdesc_t protval = pgprot_val(prot) & ~PTE_TYPE_MASK;
+ int lshift = (3 - level) * PTDESC_TABLE_SHIFT;
+ u64 lmask = (PAGE_SIZE << lshift) - 1;
+
+ start &= PAGE_MASK;
+ pa &= PAGE_MASK;
+
+ /* Advance tbl to the entry that covers start */
+ tbl += (start >> (lshift + PAGE_SHIFT)) % PTRS_PER_PTE;
+
+ /*
+ * Set the right block/page bits for this level unless we are
+ * clearing the mapping
+ */
+ if (protval)
+ protval |= (level == 2) ? PMD_TYPE_SECT : PTE_TYPE_PAGE;
+
+ while (start < end) {
+ u64 next = min((start | lmask) + 1, PAGE_ALIGN(end));
+
+ if (level < 2 || (level == 2 && (start | next | pa) & lmask)) {
+ /*
+ * This chunk needs a finer grained mapping. Create a
+ * table mapping if necessary and recurse.
+ */
+ if (pte_none(*tbl)) {
+ *tbl = __pte(__phys_to_pte_val(*pte) |
+ PMD_TYPE_TABLE | PMD_TABLE_UXN);
+ *pte += PTRS_PER_PTE * sizeof(pte_t);
+ }
+ map_range(pte, start, next, pa, prot, level + 1,
+ (pte_t *)(__pte_to_phys(*tbl) + va_offset),
+ may_use_cont, va_offset);
+ } else {
+ /*
+ * Start a contiguous range if start and pa are
+ * suitably aligned
+ */
+ if (((start | pa) & cmask) == 0 && may_use_cont)
+ protval |= PTE_CONT;
+
+ /*
+ * Clear the contiguous attribute if the remaining
+ * range does not cover a contiguous block
+ */
+ if ((end & ~cmask) <= start)
+ protval &= ~PTE_CONT;
+
+ /* Put down a block or page mapping */
+ *tbl = __pte(__phys_to_pte_val(pa) | protval);
+ }
+ pa += next - start;
+ start = next;
+ tbl++;
+ }
+}
+
+asmlinkage phys_addr_t __init create_init_idmap(pgd_t *pg_dir, ptdesc_t clrmask)
+{
+ phys_addr_t ptep = (phys_addr_t)pg_dir + PAGE_SIZE; /* MMU is off */
+ pgprot_t text_prot = PAGE_KERNEL_ROX;
+ pgprot_t data_prot = PAGE_KERNEL;
+
+ pgprot_val(text_prot) &= ~clrmask;
+ pgprot_val(data_prot) &= ~clrmask;
+
+ /* MMU is off; pointer casts to phys_addr_t are safe */
+ map_range(&ptep, (u64)_stext, (u64)__initdata_begin,
+ (phys_addr_t)_stext, text_prot, IDMAP_ROOT_LEVEL,
+ (pte_t *)pg_dir, false, 0);
+ map_range(&ptep, (u64)__initdata_begin, (u64)_end,
+ (phys_addr_t)__initdata_begin, data_prot, IDMAP_ROOT_LEVEL,
+ (pte_t *)pg_dir, false, 0);
+
+ return ptep;
+}
diff --git a/arch/arm64/kernel/patch-scs.c b/arch/arm64/kernel/pi/patch-scs.c
index a1fe4b4ff591..bbe7d30ed12b 100644
--- a/arch/arm64/kernel/patch-scs.c
+++ b/arch/arm64/kernel/pi/patch-scs.c
@@ -4,16 +4,17 @@
* Author: Ard Biesheuvel <ardb@google.com>
*/
-#include <linux/bug.h>
#include <linux/errno.h>
#include <linux/init.h>
#include <linux/linkage.h>
-#include <linux/printk.h>
#include <linux/types.h>
-#include <asm/cacheflush.h>
#include <asm/scs.h>
+#include "pi.h"
+
+bool dynamic_scs_is_enabled;
+
//
// This minimal DWARF CFI parser is partially based on the code in
// arch/arc/kernel/unwind.c, and on the document below:
@@ -49,7 +50,9 @@
#define DW_CFA_GNU_negative_offset_extended 0x2f
#define DW_CFA_hi_user 0x3f
-extern const u8 __eh_frame_start[], __eh_frame_end[];
+#define DW_EH_PE_sdata4 0x0b
+#define DW_EH_PE_sdata8 0x0c
+#define DW_EH_PE_pcrel 0x10
enum {
PACIASP = 0xd503233f,
@@ -81,7 +84,11 @@ static void __always_inline scs_patch_loc(u64 loc)
*/
return;
}
- dcache_clean_pou(loc, loc + sizeof(u32));
+ if (IS_ENABLED(CONFIG_ARM64_WORKAROUND_CLEAN_CACHE))
+ asm("dc civac, %0" :: "r"(loc));
+ else
+ asm(ALTERNATIVE("dc cvau, %0", "nop", ARM64_HAS_CACHE_IDC)
+ :: "r"(loc));
}
/*
@@ -117,7 +124,12 @@ struct eh_frame {
union {
struct { // CIE
u8 version;
- u8 augmentation_string[];
+ u8 augmentation_string[3];
+ u8 code_alignment_factor;
+ u8 data_alignment_factor;
+ u8 return_address_register;
+ u8 augmentation_data_size;
+ u8 fde_pointer_format;
};
struct { // FDE
@@ -125,29 +137,38 @@ struct eh_frame {
s32 range;
u8 opcodes[];
};
+
+ struct { // FDE
+ s64 initial_loc64;
+ s64 range64;
+ u8 opcodes64[];
+ };
};
};
-static int noinstr scs_handle_fde_frame(const struct eh_frame *frame,
- bool fde_has_augmentation_data,
- int code_alignment_factor,
- bool dry_run)
+static int scs_handle_fde_frame(const struct eh_frame *frame,
+ int code_alignment_factor,
+ bool use_sdata8,
+ bool dry_run)
{
int size = frame->size - offsetof(struct eh_frame, opcodes) + 4;
u64 loc = (u64)offset_to_ptr(&frame->initial_loc);
const u8 *opcode = frame->opcodes;
+ int l;
- if (fde_has_augmentation_data) {
- int l;
+ if (use_sdata8) {
+ loc = (u64)&frame->initial_loc64 + frame->initial_loc64;
+ opcode = frame->opcodes64;
+ size -= 8;
+ }
- // assume single byte uleb128_t
- if (WARN_ON(*opcode & BIT(7)))
- return -ENOEXEC;
+ // assume single byte uleb128_t for augmentation data size
+ if (*opcode & BIT(7))
+ return EDYNSCS_INVALID_FDE_AUGM_DATA_SIZE;
- l = *opcode++;
- opcode += l;
- size -= l + 1;
- }
+ l = *opcode++;
+ opcode += l;
+ size -= l + 1;
/*
* Starting from 'loc', apply the CFA opcodes that advance the location
@@ -198,21 +219,20 @@ static int noinstr scs_handle_fde_frame(const struct eh_frame *frame,
break;
default:
- pr_err("unhandled opcode: %02x in FDE frame %lx\n", opcode[-1], (uintptr_t)frame);
- return -ENOEXEC;
+ return EDYNSCS_INVALID_CFA_OPCODE;
}
}
return 0;
}
-int noinstr scs_patch(const u8 eh_frame[], int size)
+int scs_patch(const u8 eh_frame[], int size, bool skip_dry_run)
{
+ int code_alignment_factor = 1;
+ bool fde_use_sdata8 = false;
const u8 *p = eh_frame;
while (size > 4) {
const struct eh_frame *frame = (const void *)p;
- bool fde_has_augmentation_data = true;
- int code_alignment_factor = 1;
int ret;
if (frame->size == 0 ||
@@ -221,28 +241,49 @@ int noinstr scs_patch(const u8 eh_frame[], int size)
break;
if (frame->cie_id_or_pointer == 0) {
- const u8 *p = frame->augmentation_string;
-
- /* a 'z' in the augmentation string must come first */
- fde_has_augmentation_data = *p == 'z';
+ /*
+ * Require presence of augmentation data (z) with a
+ * specifier for the size of the FDE initial_loc and
+ * range fields (R), and nothing else.
+ */
+ if (strcmp(frame->augmentation_string, "zR"))
+ return EDYNSCS_INVALID_CIE_HEADER;
/*
* The code alignment factor is a uleb128 encoded field
* but given that the only sensible values are 1 or 4,
- * there is no point in decoding the whole thing.
+ * there is no point in decoding the whole thing. Also
+ * sanity check the size of the data alignment factor
+ * field, and the values of the return address register
+ * and augmentation data size fields.
*/
- p += strlen(p) + 1;
- if (!WARN_ON(*p & BIT(7)))
- code_alignment_factor = *p;
+ if ((frame->code_alignment_factor & BIT(7)) ||
+ (frame->data_alignment_factor & BIT(7)) ||
+ frame->return_address_register != 30 ||
+ frame->augmentation_data_size != 1)
+ return EDYNSCS_INVALID_CIE_HEADER;
+
+ code_alignment_factor = frame->code_alignment_factor;
+
+ switch (frame->fde_pointer_format) {
+ case DW_EH_PE_pcrel | DW_EH_PE_sdata4:
+ fde_use_sdata8 = false;
+ break;
+ case DW_EH_PE_pcrel | DW_EH_PE_sdata8:
+ fde_use_sdata8 = true;
+ break;
+ default:
+ return EDYNSCS_INVALID_CIE_SDATA_SIZE;
+ }
} else {
- ret = scs_handle_fde_frame(frame,
- fde_has_augmentation_data,
- code_alignment_factor,
- true);
+ ret = scs_handle_fde_frame(frame, code_alignment_factor,
+ fde_use_sdata8, !skip_dry_run);
if (ret)
return ret;
- scs_handle_fde_frame(frame, fde_has_augmentation_data,
- code_alignment_factor, false);
+
+ if (!skip_dry_run)
+ scs_handle_fde_frame(frame, code_alignment_factor,
+ fde_use_sdata8, false);
}
p += sizeof(frame->size) + frame->size;
@@ -250,13 +291,3 @@ int noinstr scs_patch(const u8 eh_frame[], int size)
}
return 0;
}
-
-asmlinkage void __init scs_patch_vmlinux(void)
-{
- if (!should_patch_pac_into_scs())
- return;
-
- WARN_ON(scs_patch(__eh_frame_start, __eh_frame_end - __eh_frame_start));
- icache_inval_all_pou();
- isb();
-}
diff --git a/arch/arm64/kernel/pi/pi.h b/arch/arm64/kernel/pi/pi.h
new file mode 100644
index 000000000000..aec3172d4003
--- /dev/null
+++ b/arch/arm64/kernel/pi/pi.h
@@ -0,0 +1,38 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright 2023 Google LLC
+// Author: Ard Biesheuvel <ardb@google.com>
+
+#include <linux/types.h>
+
+#define __prel64_initconst __section(".init.rodata.prel64")
+
+#define PREL64(type, name) union { type *name; prel64_t name ## _prel; }
+
+#define prel64_pointer(__d) (typeof(__d))prel64_to_pointer(&__d##_prel)
+
+typedef volatile signed long prel64_t;
+
+static inline void *prel64_to_pointer(const prel64_t *offset)
+{
+ if (!*offset)
+ return NULL;
+ return (void *)offset + *offset;
+}
+
+extern bool dynamic_scs_is_enabled;
+
+extern pgd_t init_idmap_pg_dir[], init_idmap_pg_end[];
+extern pgd_t init_pg_dir[], init_pg_end[];
+
+void init_feature_override(u64 boot_status, const void *fdt, int chosen);
+u64 kaslr_early_init(void *fdt, int chosen);
+void relocate_kernel(u64 offset);
+int scs_patch(const u8 eh_frame[], int size, bool skip_dry_run);
+
+void map_range(phys_addr_t *pte, u64 start, u64 end, phys_addr_t pa,
+ pgprot_t prot, int level, pte_t *tbl, bool may_use_cont,
+ u64 va_offset);
+
+asmlinkage void early_map_kernel(u64 boot_status, phys_addr_t fdt);
+
+asmlinkage phys_addr_t create_init_idmap(pgd_t *pgd, ptdesc_t clrmask);
diff --git a/arch/arm64/kernel/pi/relacheck.c b/arch/arm64/kernel/pi/relacheck.c
new file mode 100644
index 000000000000..b0cd4d0d275b
--- /dev/null
+++ b/arch/arm64/kernel/pi/relacheck.c
@@ -0,0 +1,130 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2023 - Google LLC
+ * Author: Ard Biesheuvel <ardb@google.com>
+ */
+
+#include <elf.h>
+#include <fcntl.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#define HOST_ORDER ELFDATA2LSB
+#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+#define HOST_ORDER ELFDATA2MSB
+#endif
+
+static Elf64_Ehdr *ehdr;
+static Elf64_Shdr *shdr;
+static const char *strtab;
+static bool swap;
+
+static uint64_t swab_elfxword(uint64_t val)
+{
+ return swap ? __builtin_bswap64(val) : val;
+}
+
+static uint32_t swab_elfword(uint32_t val)
+{
+ return swap ? __builtin_bswap32(val) : val;
+}
+
+static uint16_t swab_elfhword(uint16_t val)
+{
+ return swap ? __builtin_bswap16(val) : val;
+}
+
+int main(int argc, char *argv[])
+{
+ struct stat stat;
+ int fd, ret;
+
+ if (argc < 3) {
+ fprintf(stderr, "file arguments missing\n");
+ exit(EXIT_FAILURE);
+ }
+
+ fd = open(argv[1], O_RDWR);
+ if (fd < 0) {
+ fprintf(stderr, "failed to open %s\n", argv[1]);
+ exit(EXIT_FAILURE);
+ }
+
+ ret = fstat(fd, &stat);
+ if (ret < 0) {
+ fprintf(stderr, "failed to stat() %s\n", argv[1]);
+ exit(EXIT_FAILURE);
+ }
+
+ ehdr = mmap(0, stat.st_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+ if (ehdr == MAP_FAILED) {
+ fprintf(stderr, "failed to mmap() %s\n", argv[1]);
+ exit(EXIT_FAILURE);
+ }
+
+ swap = ehdr->e_ident[EI_DATA] != HOST_ORDER;
+ shdr = (void *)ehdr + swab_elfxword(ehdr->e_shoff);
+ strtab = (void *)ehdr +
+ swab_elfxword(shdr[swab_elfhword(ehdr->e_shstrndx)].sh_offset);
+
+ for (int i = 0; i < swab_elfhword(ehdr->e_shnum); i++) {
+ unsigned long info, flags;
+ bool prel64 = false;
+ Elf64_Rela *rela;
+ int numrela;
+
+ if (swab_elfword(shdr[i].sh_type) != SHT_RELA)
+ continue;
+
+ /* only consider RELA sections operating on data */
+ info = swab_elfword(shdr[i].sh_info);
+ flags = swab_elfxword(shdr[info].sh_flags);
+ if ((flags & (SHF_ALLOC | SHF_EXECINSTR)) != SHF_ALLOC)
+ continue;
+
+ /*
+ * We generally don't permit ABS64 relocations in the code that
+ * runs before relocation processing occurs. If statically
+ * initialized absolute symbol references are unavoidable, they
+ * may be emitted into a *.rodata.prel64 section and they will
+ * be converted to place-relative 64-bit references. This
+ * requires special handling in the referring code.
+ */
+ if (strstr(strtab + swab_elfword(shdr[info].sh_name),
+ ".rodata.prel64")) {
+ prel64 = true;
+ }
+
+ rela = (void *)ehdr + swab_elfxword(shdr[i].sh_offset);
+ numrela = swab_elfxword(shdr[i].sh_size) / sizeof(*rela);
+
+ for (int j = 0; j < numrela; j++) {
+ uint64_t info = swab_elfxword(rela[j].r_info);
+
+ if (ELF64_R_TYPE(info) != R_AARCH64_ABS64)
+ continue;
+
+ if (prel64) {
+ /* convert ABS64 into PREL64 */
+ info ^= R_AARCH64_ABS64 ^ R_AARCH64_PREL64;
+ rela[j].r_info = swab_elfxword(info);
+ } else {
+ fprintf(stderr,
+ "Unexpected absolute relocations detected in %s\n",
+ argv[2]);
+ close(fd);
+ unlink(argv[1]);
+ exit(EXIT_FAILURE);
+ }
+ }
+ }
+ close(fd);
+ return 0;
+}
diff --git a/arch/arm64/kernel/pi/relocate.c b/arch/arm64/kernel/pi/relocate.c
new file mode 100644
index 000000000000..2407d2696398
--- /dev/null
+++ b/arch/arm64/kernel/pi/relocate.c
@@ -0,0 +1,64 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright 2023 Google LLC
+// Authors: Ard Biesheuvel <ardb@google.com>
+// Peter Collingbourne <pcc@google.com>
+
+#include <linux/elf.h>
+#include <linux/init.h>
+#include <linux/types.h>
+
+#include "pi.h"
+
+extern const Elf64_Rela rela_start[], rela_end[];
+extern const u64 relr_start[], relr_end[];
+
+void __init relocate_kernel(u64 offset)
+{
+ u64 *place = NULL;
+
+ for (const Elf64_Rela *rela = rela_start; rela < rela_end; rela++) {
+ if (ELF64_R_TYPE(rela->r_info) != R_AARCH64_RELATIVE)
+ continue;
+ *(u64 *)(rela->r_offset + offset) = rela->r_addend + offset;
+ }
+
+ if (!IS_ENABLED(CONFIG_RELR) || !offset)
+ return;
+
+ /*
+ * Apply RELR relocations.
+ *
+ * RELR is a compressed format for storing relative relocations. The
+ * encoded sequence of entries looks like:
+ * [ AAAAAAAA BBBBBBB1 BBBBBBB1 ... AAAAAAAA BBBBBB1 ... ]
+ *
+ * i.e. start with an address, followed by any number of bitmaps. The
+ * address entry encodes 1 relocation. The subsequent bitmap entries
+ * encode up to 63 relocations each, at subsequent offsets following
+ * the last address entry.
+ *
+ * The bitmap entries must have 1 in the least significant bit. The
+ * assumption here is that an address cannot have 1 in lsb. Odd
+ * addresses are not supported. Any odd addresses are stored in the
+ * RELA section, which is handled above.
+ *
+ * With the exception of the least significant bit, each bit in the
+ * bitmap corresponds with a machine word that follows the base address
+ * word, and the bit value indicates whether or not a relocation needs
+ * to be applied to it. The second least significant bit represents the
+ * machine word immediately following the initial address, and each bit
+ * that follows represents the next word, in linear order. As such, a
+ * single bitmap can encode up to 63 relocations in a 64-bit object.
+ */
+ for (const u64 *relr = relr_start; relr < relr_end; relr++) {
+ if ((*relr & 1) == 0) {
+ place = (u64 *)(*relr + offset);
+ *place++ += offset;
+ } else {
+ for (u64 *p = place, r = *relr >> 1; r; p++, r >>= 1)
+ if (r & 1)
+ *p += offset;
+ place += 63;
+ }
+ }
+}
diff --git a/arch/arm64/kernel/probes/decode-insn.c b/arch/arm64/kernel/probes/decode-insn.c
index 968d5fffe233..4137cc5ef031 100644
--- a/arch/arm64/kernel/probes/decode-insn.c
+++ b/arch/arm64/kernel/probes/decode-insn.c
@@ -58,10 +58,13 @@ static bool __kprobes aarch64_insn_is_steppable(u32 insn)
* Instructions which load PC relative literals are not going to work
* when executed from an XOL slot. Instructions doing an exclusive
* load/store are not going to complete successfully when single-step
- * exception handling happens in the middle of the sequence.
+ * exception handling happens in the middle of the sequence. Memory
+ * copy/set instructions require that all three instructions be placed
+ * consecutively in memory.
*/
if (aarch64_insn_uses_literal(insn) ||
- aarch64_insn_is_exclusive(insn))
+ aarch64_insn_is_exclusive(insn) ||
+ aarch64_insn_is_mops(insn))
return false;
return true;
@@ -73,9 +76,18 @@ static bool __kprobes aarch64_insn_is_steppable(u32 insn)
* INSN_GOOD_NO_SLOT If instruction is supported but doesn't use its slot.
*/
enum probe_insn __kprobes
-arm_probe_decode_insn(probe_opcode_t insn, struct arch_probe_insn *api)
+arm_probe_decode_insn(u32 insn, struct arch_probe_insn *api)
{
/*
+ * While 'nop' instruction can execute in the out-of-line slot,
+ * simulating them in breakpoint handling offers better performance.
+ */
+ if (aarch64_insn_is_nop(insn)) {
+ api->handler = simulate_nop;
+ return INSN_GOOD_NO_SLOT;
+ }
+
+ /*
* Instructions reading or modifying the PC won't work from the XOL
* slot.
*/
@@ -96,13 +108,10 @@ arm_probe_decode_insn(probe_opcode_t insn, struct arch_probe_insn *api)
aarch64_insn_is_bl(insn)) {
api->handler = simulate_b_bl;
} else if (aarch64_insn_is_br(insn) ||
- aarch64_insn_is_blr(insn) ||
- aarch64_insn_is_ret(insn)) {
- api->handler = simulate_br_blr_ret;
- } else if (aarch64_insn_is_ldr_lit(insn)) {
- api->handler = simulate_ldr_literal;
- } else if (aarch64_insn_is_ldrsw_lit(insn)) {
- api->handler = simulate_ldrsw_literal;
+ aarch64_insn_is_blr(insn)) {
+ api->handler = simulate_br_blr;
+ } else if (aarch64_insn_is_ret(insn)) {
+ api->handler = simulate_ret;
} else {
/*
* Instruction cannot be stepped out-of-line and we don't
@@ -137,9 +146,20 @@ enum probe_insn __kprobes
arm_kprobe_decode_insn(kprobe_opcode_t *addr, struct arch_specific_insn *asi)
{
enum probe_insn decoded;
- probe_opcode_t insn = le32_to_cpu(*addr);
- probe_opcode_t *scan_end = NULL;
+ u32 insn = le32_to_cpu(*addr);
+ kprobe_opcode_t *scan_end = NULL;
unsigned long size = 0, offset = 0;
+ struct arch_probe_insn *api = &asi->api;
+
+ if (aarch64_insn_is_ldr_lit(insn)) {
+ api->handler = simulate_ldr_literal;
+ decoded = INSN_GOOD_NO_SLOT;
+ } else if (aarch64_insn_is_ldrsw_lit(insn)) {
+ api->handler = simulate_ldrsw_literal;
+ decoded = INSN_GOOD_NO_SLOT;
+ } else {
+ decoded = arm_probe_decode_insn(insn, &asi->api);
+ }
/*
* If there's a symbol defined in front of and near enough to
@@ -157,7 +177,6 @@ arm_kprobe_decode_insn(kprobe_opcode_t *addr, struct arch_specific_insn *asi)
else
scan_end = addr - MAX_ATOMIC_CONTEXT_SIZE;
}
- decoded = arm_probe_decode_insn(insn, &asi->api);
if (decoded != INSN_REJECTED && scan_end)
if (is_probed_address_atomic(addr - 1, scan_end))
diff --git a/arch/arm64/kernel/probes/decode-insn.h b/arch/arm64/kernel/probes/decode-insn.h
index 8b758c5a2062..0e4195de8206 100644
--- a/arch/arm64/kernel/probes/decode-insn.h
+++ b/arch/arm64/kernel/probes/decode-insn.h
@@ -28,6 +28,6 @@ enum probe_insn __kprobes
arm_kprobe_decode_insn(kprobe_opcode_t *addr, struct arch_specific_insn *asi);
#endif
enum probe_insn __kprobes
-arm_probe_decode_insn(probe_opcode_t insn, struct arch_probe_insn *asi);
+arm_probe_decode_insn(u32 insn, struct arch_probe_insn *asi);
#endif /* _ARM_KERNEL_KPROBES_ARM64_H */
diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c
index 70b91a8c6bb3..43a0361a8bf0 100644
--- a/arch/arm64/kernel/probes/kprobes.c
+++ b/arch/arm64/kernel/probes/kprobes.c
@@ -10,6 +10,7 @@
#define pr_fmt(fmt) "kprobes: " fmt
+#include <linux/execmem.h>
#include <linux/extable.h>
#include <linux/kasan.h>
#include <linux/kernel.h>
@@ -27,7 +28,7 @@
#include <asm/debug-monitors.h>
#include <asm/insn.h>
#include <asm/irq.h>
-#include <asm/patching.h>
+#include <asm/text-patching.h>
#include <asm/ptrace.h>
#include <asm/sections.h>
#include <asm/system_misc.h>
@@ -41,9 +42,23 @@ DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
static void __kprobes
post_kprobe_handler(struct kprobe *, struct kprobe_ctlblk *, struct pt_regs *);
+void *alloc_insn_page(void)
+{
+ void *addr;
+
+ addr = execmem_alloc(EXECMEM_KPROBES, PAGE_SIZE);
+ if (!addr)
+ return NULL;
+ if (set_memory_rox((unsigned long)addr, 1)) {
+ execmem_free(addr);
+ return NULL;
+ }
+ return addr;
+}
+
static void __kprobes arch_prepare_ss_slot(struct kprobe *p)
{
- kprobe_opcode_t *addr = p->ainsn.api.insn;
+ kprobe_opcode_t *addr = p->ainsn.xol_insn;
/*
* Prepare insn slot, Mark Rutland points out it depends on a coupe of
@@ -64,20 +79,20 @@ static void __kprobes arch_prepare_ss_slot(struct kprobe *p)
* the BRK exception handler, so it is unnecessary to generate
* Contex-Synchronization-Event via ISB again.
*/
- aarch64_insn_patch_text_nosync(addr, p->opcode);
+ aarch64_insn_patch_text_nosync(addr, le32_to_cpu(p->opcode));
aarch64_insn_patch_text_nosync(addr + 1, BRK64_OPCODE_KPROBES_SS);
/*
* Needs restoring of return address after stepping xol.
*/
- p->ainsn.api.restore = (unsigned long) p->addr +
+ p->ainsn.xol_restore = (unsigned long) p->addr +
sizeof(kprobe_opcode_t);
}
static void __kprobes arch_prepare_simulate(struct kprobe *p)
{
/* This instructions is not executed xol. No need to adjust the PC */
- p->ainsn.api.restore = 0;
+ p->ainsn.xol_restore = 0;
}
static void __kprobes arch_simulate_insn(struct kprobe *p, struct pt_regs *regs)
@@ -85,7 +100,7 @@ static void __kprobes arch_simulate_insn(struct kprobe *p, struct pt_regs *regs)
struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
if (p->ainsn.api.handler)
- p->ainsn.api.handler((u32)p->opcode, (long)p->addr, regs);
+ p->ainsn.api.handler(le32_to_cpu(p->opcode), (long)p->addr, regs);
/* single step simulated, now go for post processing */
post_kprobe_handler(p, kcb, regs);
@@ -99,7 +114,7 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p)
return -EINVAL;
/* copy instruction */
- p->opcode = le32_to_cpu(*p->addr);
+ p->opcode = *p->addr;
if (search_exception_tables(probe_addr))
return -EINVAL;
@@ -110,18 +125,18 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p)
return -EINVAL;
case INSN_GOOD_NO_SLOT: /* insn need simulation */
- p->ainsn.api.insn = NULL;
+ p->ainsn.xol_insn = NULL;
break;
case INSN_GOOD: /* instruction uses slot */
- p->ainsn.api.insn = get_insn_slot();
- if (!p->ainsn.api.insn)
+ p->ainsn.xol_insn = get_insn_slot();
+ if (!p->ainsn.xol_insn)
return -ENOMEM;
break;
}
/* prepare the instruction */
- if (p->ainsn.api.insn)
+ if (p->ainsn.xol_insn)
arch_prepare_ss_slot(p);
else
arch_prepare_simulate(p);
@@ -129,13 +144,6 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p)
return 0;
}
-void *alloc_insn_page(void)
-{
- return __vmalloc_node_range(PAGE_SIZE, 1, VMALLOC_START, VMALLOC_END,
- GFP_KERNEL, PAGE_KERNEL_ROX, VM_FLUSH_RESET_PERMS,
- NUMA_NO_NODE, __builtin_return_address(0));
-}
-
/* arm kprobe: install breakpoint in text */
void __kprobes arch_arm_kprobe(struct kprobe *p)
{
@@ -149,15 +157,16 @@ void __kprobes arch_arm_kprobe(struct kprobe *p)
void __kprobes arch_disarm_kprobe(struct kprobe *p)
{
void *addr = p->addr;
+ u32 insn = le32_to_cpu(p->opcode);
- aarch64_insn_patch_text(&addr, &p->opcode, 1);
+ aarch64_insn_patch_text(&addr, &insn, 1);
}
void __kprobes arch_remove_kprobe(struct kprobe *p)
{
- if (p->ainsn.api.insn) {
- free_insn_slot(p->ainsn.api.insn, 0);
- p->ainsn.api.insn = NULL;
+ if (p->ainsn.xol_insn) {
+ free_insn_slot(p->ainsn.xol_insn, 0);
+ p->ainsn.xol_insn = NULL;
}
}
@@ -212,9 +221,9 @@ static void __kprobes setup_singlestep(struct kprobe *p,
}
- if (p->ainsn.api.insn) {
+ if (p->ainsn.xol_insn) {
/* prepare for single stepping */
- slot = (unsigned long)p->ainsn.api.insn;
+ slot = (unsigned long)p->ainsn.xol_insn;
kprobes_save_local_irqflag(kcb, regs);
instruction_pointer_set(regs, slot);
@@ -252,8 +261,8 @@ static void __kprobes
post_kprobe_handler(struct kprobe *cur, struct kprobe_ctlblk *kcb, struct pt_regs *regs)
{
/* return addr restore if non-branching insn */
- if (cur->ainsn.api.restore != 0)
- instruction_pointer_set(regs, cur->ainsn.api.restore);
+ if (cur->ainsn.xol_restore != 0)
+ instruction_pointer_set(regs, cur->ainsn.xol_restore);
/* restore back original saved kprobe variables and continue */
if (kcb->kprobe_status == KPROBE_REENTER) {
@@ -298,8 +307,8 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, unsigned int fsr)
return 0;
}
-static int __kprobes
-kprobe_breakpoint_handler(struct pt_regs *regs, unsigned long esr)
+int __kprobes
+kprobe_brk_handler(struct pt_regs *regs, unsigned long esr)
{
struct kprobe *p, *cur_kprobe;
struct kprobe_ctlblk *kcb;
@@ -342,20 +351,15 @@ kprobe_breakpoint_handler(struct pt_regs *regs, unsigned long esr)
return DBG_HOOK_HANDLED;
}
-static struct break_hook kprobes_break_hook = {
- .imm = KPROBES_BRK_IMM,
- .fn = kprobe_breakpoint_handler,
-};
-
-static int __kprobes
-kprobe_breakpoint_ss_handler(struct pt_regs *regs, unsigned long esr)
+int __kprobes
+kprobe_ss_brk_handler(struct pt_regs *regs, unsigned long esr)
{
struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
unsigned long addr = instruction_pointer(regs);
struct kprobe *cur = kprobe_running();
if (cur && (kcb->kprobe_status & (KPROBE_HIT_SS | KPROBE_REENTER)) &&
- ((unsigned long)&cur->ainsn.api.insn[1] == addr)) {
+ ((unsigned long)&cur->ainsn.xol_insn[1] == addr)) {
kprobes_restore_local_irqflag(kcb, regs);
post_kprobe_handler(cur, kcb, regs);
@@ -366,10 +370,15 @@ kprobe_breakpoint_ss_handler(struct pt_regs *regs, unsigned long esr)
return DBG_HOOK_ERROR;
}
-static struct break_hook kprobes_break_ss_hook = {
- .imm = KPROBES_BRK_SS_IMM,
- .fn = kprobe_breakpoint_ss_handler,
-};
+int __kprobes
+kretprobe_brk_handler(struct pt_regs *regs, unsigned long esr)
+{
+ if (regs->pc != (unsigned long)__kretprobe_trampoline)
+ return DBG_HOOK_ERROR;
+
+ regs->pc = kretprobe_trampoline_handler(regs, (void *)regs->regs[29]);
+ return DBG_HOOK_HANDLED;
+}
/*
* Provide a blacklist of symbols identifying ranges which cannot be kprobed.
@@ -396,11 +405,6 @@ int __init arch_populate_kprobe_blacklist(void)
return ret;
}
-void __kprobes __used *trampoline_probe_handler(struct pt_regs *regs)
-{
- return (void *)kretprobe_trampoline_handler(regs, (void *)regs->regs[29]);
-}
-
void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
struct pt_regs *regs)
{
@@ -418,8 +422,5 @@ int __kprobes arch_trampoline_kprobe(struct kprobe *p)
int __init arch_init_kprobes(void)
{
- register_kernel_break_hook(&kprobes_break_hook);
- register_kernel_break_hook(&kprobes_break_ss_hook);
-
return 0;
}
diff --git a/arch/arm64/kernel/probes/kprobes_trampoline.S b/arch/arm64/kernel/probes/kprobes_trampoline.S
index 9a6499bed58b..b60739d3983f 100644
--- a/arch/arm64/kernel/probes/kprobes_trampoline.S
+++ b/arch/arm64/kernel/probes/kprobes_trampoline.S
@@ -4,83 +4,17 @@
*/
#include <linux/linkage.h>
-#include <asm/asm-offsets.h>
+#include <asm/asm-bug.h>
#include <asm/assembler.h>
.text
- .macro save_all_base_regs
- stp x0, x1, [sp, #S_X0]
- stp x2, x3, [sp, #S_X2]
- stp x4, x5, [sp, #S_X4]
- stp x6, x7, [sp, #S_X6]
- stp x8, x9, [sp, #S_X8]
- stp x10, x11, [sp, #S_X10]
- stp x12, x13, [sp, #S_X12]
- stp x14, x15, [sp, #S_X14]
- stp x16, x17, [sp, #S_X16]
- stp x18, x19, [sp, #S_X18]
- stp x20, x21, [sp, #S_X20]
- stp x22, x23, [sp, #S_X22]
- stp x24, x25, [sp, #S_X24]
- stp x26, x27, [sp, #S_X26]
- stp x28, x29, [sp, #S_X28]
- add x0, sp, #PT_REGS_SIZE
- stp lr, x0, [sp, #S_LR]
- /*
- * Construct a useful saved PSTATE
- */
- mrs x0, nzcv
- mrs x1, daif
- orr x0, x0, x1
- mrs x1, CurrentEL
- orr x0, x0, x1
- mrs x1, SPSel
- orr x0, x0, x1
- stp xzr, x0, [sp, #S_PC]
- .endm
-
- .macro restore_all_base_regs
- ldr x0, [sp, #S_PSTATE]
- and x0, x0, #(PSR_N_BIT | PSR_Z_BIT | PSR_C_BIT | PSR_V_BIT)
- msr nzcv, x0
- ldp x0, x1, [sp, #S_X0]
- ldp x2, x3, [sp, #S_X2]
- ldp x4, x5, [sp, #S_X4]
- ldp x6, x7, [sp, #S_X6]
- ldp x8, x9, [sp, #S_X8]
- ldp x10, x11, [sp, #S_X10]
- ldp x12, x13, [sp, #S_X12]
- ldp x14, x15, [sp, #S_X14]
- ldp x16, x17, [sp, #S_X16]
- ldp x18, x19, [sp, #S_X18]
- ldp x20, x21, [sp, #S_X20]
- ldp x22, x23, [sp, #S_X22]
- ldp x24, x25, [sp, #S_X24]
- ldp x26, x27, [sp, #S_X26]
- ldp x28, x29, [sp, #S_X28]
- .endm
-
SYM_CODE_START(__kretprobe_trampoline)
- sub sp, sp, #PT_REGS_SIZE
-
- save_all_base_regs
-
- /* Setup a frame pointer. */
- add x29, sp, #S_FP
-
- mov x0, sp
- bl trampoline_probe_handler
/*
- * Replace trampoline address in lr with actual orig_ret_addr return
- * address.
+ * Trigger a breakpoint exception. The PC will be adjusted by
+ * kretprobe_brk_handler(), and no subsequent instructions will
+ * be executed from the trampoline.
*/
- mov lr, x0
-
- /* The frame pointer (x29) is restored with other registers. */
- restore_all_base_regs
-
- add sp, sp, #PT_REGS_SIZE
- ret
-
+ brk #KRETPROBES_BRK_IMM
+ ASM_BUG()
SYM_CODE_END(__kretprobe_trampoline)
diff --git a/arch/arm64/kernel/probes/simulate-insn.c b/arch/arm64/kernel/probes/simulate-insn.c
index 22d0b3252476..89fbeb32107e 100644
--- a/arch/arm64/kernel/probes/simulate-insn.c
+++ b/arch/arm64/kernel/probes/simulate-insn.c
@@ -13,6 +13,7 @@
#include <asm/traps.h>
#include "simulate-insn.h"
+#include "asm/gcs.h"
#define bbl_displacement(insn) \
sign_extend32(((insn) & 0x3ffffff) << 2, 27)
@@ -49,6 +50,21 @@ static inline u32 get_w_reg(struct pt_regs *regs, int reg)
return lower_32_bits(pt_regs_read_reg(regs, reg));
}
+static inline int update_lr(struct pt_regs *regs, long addr)
+{
+ int err = 0;
+
+ if (user_mode(regs) && task_gcs_el0_enabled(current)) {
+ push_user_gcs(addr, &err);
+ if (err) {
+ force_sig(SIGSEGV);
+ return err;
+ }
+ }
+ procedure_link_pointer_set(regs, addr);
+ return err;
+}
+
static bool __kprobes check_cbz(u32 opcode, struct pt_regs *regs)
{
int xn = opcode & 0x1f;
@@ -107,9 +123,9 @@ simulate_b_bl(u32 opcode, long addr, struct pt_regs *regs)
{
int disp = bbl_displacement(opcode);
- /* Link register is x30 */
if (opcode & (1 << 31))
- set_x_reg(regs, 30, addr + 4);
+ if (update_lr(regs, addr + 4))
+ return;
instruction_pointer_set(regs, addr + disp);
}
@@ -126,16 +142,34 @@ simulate_b_cond(u32 opcode, long addr, struct pt_regs *regs)
}
void __kprobes
-simulate_br_blr_ret(u32 opcode, long addr, struct pt_regs *regs)
+simulate_br_blr(u32 opcode, long addr, struct pt_regs *regs)
{
int xn = (opcode >> 5) & 0x1f;
+ u64 b_target = get_x_reg(regs, xn);
- /* update pc first in case we're doing a "blr lr" */
- instruction_pointer_set(regs, get_x_reg(regs, xn));
-
- /* Link register is x30 */
if (((opcode >> 21) & 0x3) == 1)
- set_x_reg(regs, 30, addr + 4);
+ if (update_lr(regs, addr + 4))
+ return;
+
+ instruction_pointer_set(regs, b_target);
+}
+
+void __kprobes
+simulate_ret(u32 opcode, long addr, struct pt_regs *regs)
+{
+ u64 ret_addr;
+ int err = 0;
+ int xn = (opcode >> 5) & 0x1f;
+ u64 r_target = get_x_reg(regs, xn);
+
+ if (user_mode(regs) && task_gcs_el0_enabled(current)) {
+ ret_addr = pop_user_gcs(&err);
+ if (err || ret_addr != r_target) {
+ force_sig(SIGSEGV);
+ return;
+ }
+ }
+ instruction_pointer_set(regs, r_target);
}
void __kprobes
@@ -171,17 +205,15 @@ simulate_tbz_tbnz(u32 opcode, long addr, struct pt_regs *regs)
void __kprobes
simulate_ldr_literal(u32 opcode, long addr, struct pt_regs *regs)
{
- u64 *load_addr;
+ unsigned long load_addr;
int xn = opcode & 0x1f;
- int disp;
- disp = ldr_displacement(opcode);
- load_addr = (u64 *) (addr + disp);
+ load_addr = addr + ldr_displacement(opcode);
if (opcode & (1 << 30)) /* x0-x30 */
- set_x_reg(regs, xn, *load_addr);
+ set_x_reg(regs, xn, READ_ONCE(*(u64 *)load_addr));
else /* w0-w30 */
- set_w_reg(regs, xn, *load_addr);
+ set_w_reg(regs, xn, READ_ONCE(*(u32 *)load_addr));
instruction_pointer_set(regs, instruction_pointer(regs) + 4);
}
@@ -189,14 +221,18 @@ simulate_ldr_literal(u32 opcode, long addr, struct pt_regs *regs)
void __kprobes
simulate_ldrsw_literal(u32 opcode, long addr, struct pt_regs *regs)
{
- s32 *load_addr;
+ unsigned long load_addr;
int xn = opcode & 0x1f;
- int disp;
- disp = ldr_displacement(opcode);
- load_addr = (s32 *) (addr + disp);
+ load_addr = addr + ldr_displacement(opcode);
- set_x_reg(regs, xn, *load_addr);
+ set_x_reg(regs, xn, READ_ONCE(*(s32 *)load_addr));
instruction_pointer_set(regs, instruction_pointer(regs) + 4);
}
+
+void __kprobes
+simulate_nop(u32 opcode, long addr, struct pt_regs *regs)
+{
+ arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE);
+}
diff --git a/arch/arm64/kernel/probes/simulate-insn.h b/arch/arm64/kernel/probes/simulate-insn.h
index e065dc92218e..9e772a292d56 100644
--- a/arch/arm64/kernel/probes/simulate-insn.h
+++ b/arch/arm64/kernel/probes/simulate-insn.h
@@ -11,10 +11,12 @@
void simulate_adr_adrp(u32 opcode, long addr, struct pt_regs *regs);
void simulate_b_bl(u32 opcode, long addr, struct pt_regs *regs);
void simulate_b_cond(u32 opcode, long addr, struct pt_regs *regs);
-void simulate_br_blr_ret(u32 opcode, long addr, struct pt_regs *regs);
+void simulate_br_blr(u32 opcode, long addr, struct pt_regs *regs);
+void simulate_ret(u32 opcode, long addr, struct pt_regs *regs);
void simulate_cbz_cbnz(u32 opcode, long addr, struct pt_regs *regs);
void simulate_tbz_tbnz(u32 opcode, long addr, struct pt_regs *regs);
void simulate_ldr_literal(u32 opcode, long addr, struct pt_regs *regs);
void simulate_ldrsw_literal(u32 opcode, long addr, struct pt_regs *regs);
+void simulate_nop(u32 opcode, long addr, struct pt_regs *regs);
#endif /* _ARM_KERNEL_KPROBES_SIMULATE_INSN_H */
diff --git a/arch/arm64/kernel/probes/uprobes.c b/arch/arm64/kernel/probes/uprobes.c
index d49aef2657cd..941668800aea 100644
--- a/arch/arm64/kernel/probes/uprobes.c
+++ b/arch/arm64/kernel/probes/uprobes.c
@@ -6,6 +6,7 @@
#include <linux/ptrace.h>
#include <linux/uprobes.h>
#include <asm/cacheflush.h>
+#include <asm/gcs.h>
#include "decode-insn.h"
@@ -17,12 +18,20 @@ void arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr,
void *xol_page_kaddr = kmap_atomic(page);
void *dst = xol_page_kaddr + (vaddr & ~PAGE_MASK);
+ /*
+ * Initial cache maintenance of the xol page done via set_pte_at().
+ * Subsequent CMOs only needed if the xol slot changes.
+ */
+ if (!memcmp(dst, src, len))
+ goto done;
+
/* Initialize the slot */
memcpy(dst, src, len);
/* flush caches (dcache/icache) */
sync_icache_aliases((unsigned long)dst, (unsigned long)dst + len);
+done:
kunmap_atomic(xol_page_kaddr);
}
@@ -34,7 +43,7 @@ unsigned long uprobe_get_swbp_addr(struct pt_regs *regs)
int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm,
unsigned long addr)
{
- probe_opcode_t insn;
+ u32 insn;
/* TODO: Currently we do not support AARCH32 instruction probing */
if (mm->context.flags & MMCF_AARCH32)
@@ -42,7 +51,7 @@ int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm,
else if (!IS_ALIGNED(addr, AARCH64_INSN_SIZE))
return -EINVAL;
- insn = *(probe_opcode_t *)(&auprobe->insn[0]);
+ insn = le32_to_cpu(auprobe->insn);
switch (arm_probe_decode_insn(insn, &auprobe->api)) {
case INSN_REJECTED:
@@ -102,13 +111,13 @@ bool arch_uprobe_xol_was_trapped(struct task_struct *t)
bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs)
{
- probe_opcode_t insn;
+ u32 insn;
unsigned long addr;
if (!auprobe->simulate)
return false;
- insn = *(probe_opcode_t *)(&auprobe->insn[0]);
+ insn = le32_to_cpu(auprobe->insn);
addr = instruction_pointer(regs);
if (auprobe->api.handler)
@@ -122,7 +131,7 @@ void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
struct uprobe_task *utask = current->utask;
/*
- * Task has received a fatal signal, so reset back to probbed
+ * Task has received a fatal signal, so reset back to probed
* address.
*/
instruction_pointer_set(regs, utask->vaddr);
@@ -151,11 +160,43 @@ arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr,
struct pt_regs *regs)
{
unsigned long orig_ret_vaddr;
+ unsigned long gcs_ret_vaddr;
+ int err = 0;
+ u64 gcspr;
orig_ret_vaddr = procedure_link_pointer(regs);
+
+ if (task_gcs_el0_enabled(current)) {
+ gcspr = read_sysreg_s(SYS_GCSPR_EL0);
+ gcs_ret_vaddr = get_user_gcs((__force unsigned long __user *)gcspr, &err);
+ if (err) {
+ force_sig(SIGSEGV);
+ goto out;
+ }
+
+ /*
+ * If the LR and GCS return addr don't match, then some kind of PAC
+ * signing or control flow occurred since entering the probed function.
+ * Likely because the user is attempting to retprobe on an instruction
+ * that isn't a function boundary or inside a leaf function. Explicitly
+ * abort this retprobe because it will generate a GCS exception.
+ */
+ if (gcs_ret_vaddr != orig_ret_vaddr) {
+ orig_ret_vaddr = -1;
+ goto out;
+ }
+
+ put_user_gcs(trampoline_vaddr, (__force unsigned long __user *)gcspr, &err);
+ if (err) {
+ force_sig(SIGSEGV);
+ goto out;
+ }
+ }
+
/* Replace the return addr with trampoline addr */
procedure_link_pointer_set(regs, trampoline_vaddr);
+out:
return orig_ret_vaddr;
}
@@ -165,7 +206,7 @@ int arch_uprobe_exception_notify(struct notifier_block *self,
return NOTIFY_DONE;
}
-static int uprobe_breakpoint_handler(struct pt_regs *regs,
+int uprobe_brk_handler(struct pt_regs *regs,
unsigned long esr)
{
if (uprobe_pre_sstep_notifier(regs))
@@ -174,7 +215,7 @@ static int uprobe_breakpoint_handler(struct pt_regs *regs,
return DBG_HOOK_ERROR;
}
-static int uprobe_single_step_handler(struct pt_regs *regs,
+int uprobe_single_step_handler(struct pt_regs *regs,
unsigned long esr)
{
struct uprobe_task *utask = current->utask;
@@ -186,23 +227,3 @@ static int uprobe_single_step_handler(struct pt_regs *regs,
return DBG_HOOK_ERROR;
}
-/* uprobe breakpoint handler hook */
-static struct break_hook uprobes_break_hook = {
- .imm = UPROBES_BRK_IMM,
- .fn = uprobe_breakpoint_handler,
-};
-
-/* uprobe single step handler hook */
-static struct step_hook uprobes_step_hook = {
- .fn = uprobe_single_step_handler,
-};
-
-static int __init arch_init_uprobes(void)
-{
- register_user_break_hook(&uprobes_break_hook);
- register_user_step_hook(&uprobes_step_hook);
-
- return 0;
-}
-
-device_initcall(arch_init_uprobes);
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 0fcc4eb1a7ab..fba7ca102a8c 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -43,11 +43,13 @@
#include <linux/stacktrace.h>
#include <asm/alternative.h>
+#include <asm/arch_timer.h>
#include <asm/compat.h>
#include <asm/cpufeature.h>
#include <asm/cacheflush.h>
#include <asm/exec.h>
#include <asm/fpsimd.h>
+#include <asm/gcs.h>
#include <asm/mmu_context.h>
#include <asm/mte.h>
#include <asm/processor.h>
@@ -226,7 +228,7 @@ void __show_regs(struct pt_regs *regs)
printk("sp : %016llx\n", sp);
if (system_uses_irq_prio_masking())
- printk("pmr_save: %08llx\n", regs->pmr_save);
+ printk("pmr: %08x\n", regs->pmr);
i = top_reg;
@@ -271,12 +273,69 @@ static void flush_tagged_addr_state(void)
clear_thread_flag(TIF_TAGGED_ADDR);
}
+static void flush_poe(void)
+{
+ if (!system_supports_poe())
+ return;
+
+ write_sysreg_s(POR_EL0_INIT, SYS_POR_EL0);
+}
+
+#ifdef CONFIG_ARM64_GCS
+
+static void flush_gcs(void)
+{
+ if (!system_supports_gcs())
+ return;
+
+ current->thread.gcspr_el0 = 0;
+ current->thread.gcs_base = 0;
+ current->thread.gcs_size = 0;
+ current->thread.gcs_el0_mode = 0;
+ write_sysreg_s(GCSCRE0_EL1_nTR, SYS_GCSCRE0_EL1);
+ write_sysreg_s(0, SYS_GCSPR_EL0);
+}
+
+static int copy_thread_gcs(struct task_struct *p,
+ const struct kernel_clone_args *args)
+{
+ unsigned long gcs;
+
+ if (!system_supports_gcs())
+ return 0;
+
+ p->thread.gcs_base = 0;
+ p->thread.gcs_size = 0;
+
+ p->thread.gcs_el0_mode = current->thread.gcs_el0_mode;
+ p->thread.gcs_el0_locked = current->thread.gcs_el0_locked;
+
+ gcs = gcs_alloc_thread_stack(p, args);
+ if (IS_ERR_VALUE(gcs))
+ return PTR_ERR((void *)gcs);
+
+ return 0;
+}
+
+#else
+
+static void flush_gcs(void) { }
+static int copy_thread_gcs(struct task_struct *p,
+ const struct kernel_clone_args *args)
+{
+ return 0;
+}
+
+#endif
+
void flush_thread(void)
{
fpsimd_flush_thread();
tls_thread_flush();
flush_ptrace_hw_breakpoint(current);
flush_tagged_addr_state();
+ flush_poe();
+ flush_gcs();
}
void arch_release_task_struct(struct task_struct *tsk)
@@ -286,53 +345,34 @@ void arch_release_task_struct(struct task_struct *tsk)
int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
{
- if (current->mm)
- fpsimd_preserve_current_state();
- *dst = *src;
+ /*
+ * The current/src task's FPSIMD state may or may not be live, and may
+ * have been altered by ptrace after entry to the kernel. Save the
+ * effective FPSIMD state so that this will be copied into dst.
+ */
+ fpsimd_save_and_flush_current_state();
+ fpsimd_sync_from_effective_state(src);
- /* We rely on the above assignment to initialize dst's thread_flags: */
- BUILD_BUG_ON(!IS_ENABLED(CONFIG_THREAD_INFO_IN_TASK));
+ *dst = *src;
/*
- * Detach src's sve_state (if any) from dst so that it does not
- * get erroneously used or freed prematurely. dst's copies
- * will be allocated on demand later on if dst uses SVE.
- * For consistency, also clear TIF_SVE here: this could be done
- * later in copy_process(), but to avoid tripping up future
- * maintainers it is best not to leave TIF flags and buffers in
- * an inconsistent state, even temporarily.
+ * Drop stale reference to src's sve_state and convert dst to
+ * non-streaming FPSIMD mode.
*/
+ dst->thread.fp_type = FP_STATE_FPSIMD;
dst->thread.sve_state = NULL;
clear_tsk_thread_flag(dst, TIF_SVE);
+ task_smstop_sm(dst);
/*
- * In the unlikely event that we create a new thread with ZA
- * enabled we should retain the ZA and ZT state so duplicate
- * it here. This may be shortly freed if we exec() or if
- * CLONE_SETTLS but it's simpler to do it here. To avoid
- * confusing the rest of the code ensure that we have a
- * sve_state allocated whenever sme_state is allocated.
+ * Drop stale reference to src's sme_state and ensure dst has ZA
+ * disabled.
+ *
+ * When necessary, ZA will be inherited later in copy_thread_za().
*/
- if (thread_za_enabled(&src->thread)) {
- dst->thread.sve_state = kzalloc(sve_state_size(src),
- GFP_KERNEL);
- if (!dst->thread.sve_state)
- return -ENOMEM;
-
- dst->thread.sme_state = kmemdup(src->thread.sme_state,
- sme_state_size(src),
- GFP_KERNEL);
- if (!dst->thread.sme_state) {
- kfree(dst->thread.sve_state);
- dst->thread.sve_state = NULL;
- return -ENOMEM;
- }
- } else {
- dst->thread.sme_state = NULL;
- clear_tsk_thread_flag(dst, TIF_SME);
- }
-
- dst->thread.fp_type = FP_STATE_FPSIMD;
+ dst->thread.sme_state = NULL;
+ clear_tsk_thread_flag(dst, TIF_SME);
+ dst->thread.svcr &= ~SVCR_ZA_MASK;
/* clear any pending asynchronous tag fault raised by the parent */
clear_tsk_thread_flag(dst, TIF_MTE_ASYNC_FAULT);
@@ -340,14 +380,40 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
return 0;
}
+static int copy_thread_za(struct task_struct *dst, struct task_struct *src)
+{
+ if (!thread_za_enabled(&src->thread))
+ return 0;
+
+ dst->thread.sve_state = kzalloc(sve_state_size(src),
+ GFP_KERNEL);
+ if (!dst->thread.sve_state)
+ return -ENOMEM;
+
+ dst->thread.sme_state = kmemdup(src->thread.sme_state,
+ sme_state_size(src),
+ GFP_KERNEL);
+ if (!dst->thread.sme_state) {
+ kfree(dst->thread.sve_state);
+ dst->thread.sve_state = NULL;
+ return -ENOMEM;
+ }
+
+ set_tsk_thread_flag(dst, TIF_SME);
+ dst->thread.svcr |= SVCR_ZA_MASK;
+
+ return 0;
+}
+
asmlinkage void ret_from_fork(void) asm("ret_from_fork");
int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
{
- unsigned long clone_flags = args->flags;
+ u64 clone_flags = args->flags;
unsigned long stack_start = args->stack;
unsigned long tls = args->tls;
struct pt_regs *childregs = task_pt_regs(p);
+ int ret;
memset(&p->thread.cpu_context, 0, sizeof(struct cpu_context));
@@ -371,8 +437,9 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
* out-of-sync with the saved value.
*/
*task_user_tls(p) = read_sysreg(tpidr_el0);
- if (system_supports_tpidr2())
- p->thread.tpidr2_el0 = read_sysreg_s(SYS_TPIDR2_EL0);
+
+ if (system_supports_poe())
+ p->thread.por_el0 = read_sysreg_s(SYS_POR_EL0);
if (stack_start) {
if (is_compat_thread(task_thread_info(p)))
@@ -382,13 +449,43 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
}
/*
+ * Due to the AAPCS64 "ZA lazy saving scheme", PSTATE.ZA and
+ * TPIDR2 need to be manipulated as a pair, and either both
+ * need to be inherited or both need to be reset.
+ *
+ * Within a process, child threads must not inherit their
+ * parent's TPIDR2 value or they may clobber their parent's
+ * stack at some later point.
+ *
+ * When a process is fork()'d, the child must inherit ZA and
+ * TPIDR2 from its parent in case there was dormant ZA state.
+ *
+ * Use CLONE_VM to determine when the child will share the
+ * address space with the parent, and cannot safely inherit the
+ * state.
+ */
+ if (system_supports_sme()) {
+ if (!(clone_flags & CLONE_VM)) {
+ p->thread.tpidr2_el0 = read_sysreg_s(SYS_TPIDR2_EL0);
+ ret = copy_thread_za(p, current);
+ if (ret)
+ return ret;
+ } else {
+ p->thread.tpidr2_el0 = 0;
+ WARN_ON_ONCE(p->thread.svcr & SVCR_ZA_MASK);
+ }
+ }
+
+ /*
* If a TLS pointer was passed to clone, use it for the new
- * thread. We also reset TPIDR2 if it's in use.
+ * thread.
*/
- if (clone_flags & CLONE_SETTLS) {
+ if (clone_flags & CLONE_SETTLS)
p->thread.uw.tp_value = tls;
- p->thread.tpidr2_el0 = 0;
- }
+
+ ret = copy_thread_gcs(p, args);
+ if (ret != 0)
+ return ret;
} else {
/*
* A kthread has no context to ERET to, so ensure any buggy
@@ -399,9 +496,13 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
*/
memset(childregs, 0, sizeof(struct pt_regs));
childregs->pstate = PSR_MODE_EL1h | PSR_IL_BIT;
+ childregs->stackframe.type = FRAME_META_TYPE_FINAL;
p->thread.cpu_context.x19 = (unsigned long)args->fn;
p->thread.cpu_context.x20 = (unsigned long)args->fn_arg;
+
+ if (system_supports_poe())
+ p->thread.por_el0 = POR_EL0_INIT;
}
p->thread.cpu_context.pc = (unsigned long)ret_from_fork;
p->thread.cpu_context.sp = (unsigned long)childregs;
@@ -409,7 +510,7 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
* For the benefit of the unwinder, set up childregs->stackframe
* as the final frame for the new task.
*/
- p->thread.cpu_context.fp = (unsigned long)childregs->stackframe;
+ p->thread.cpu_context.fp = (unsigned long)&childregs->stackframe;
ptrace_hw_copy_thread(p);
@@ -429,7 +530,7 @@ static void tls_thread_switch(struct task_struct *next)
if (is_compat_thread(task_thread_info(next)))
write_sysreg(next->thread.uw.tp_value, tpidrro_el0);
- else if (!arm64_kernel_unmapped_at_el0())
+ else
write_sysreg(0, tpidrro_el0);
write_sysreg(*task_user_tls(next), tpidr_el0);
@@ -454,7 +555,7 @@ static void ssbs_thread_switch(struct task_struct *next)
* If all CPUs implement the SSBS extension, then we just need to
* context-switch the PSTATE field.
*/
- if (cpus_have_const_cap(ARM64_SSBS))
+ if (alternative_has_cap_unlikely(ARM64_SSBS))
return;
spectre_v4_enable_task_mitigation(next);
@@ -474,28 +575,109 @@ static void entry_task_switch(struct task_struct *next)
__this_cpu_write(__entry_task, next);
}
+#ifdef CONFIG_ARM64_GCS
+
+void gcs_preserve_current_state(void)
+{
+ current->thread.gcspr_el0 = read_sysreg_s(SYS_GCSPR_EL0);
+}
+
+static void gcs_thread_switch(struct task_struct *next)
+{
+ if (!system_supports_gcs())
+ return;
+
+ /* GCSPR_EL0 is always readable */
+ gcs_preserve_current_state();
+ write_sysreg_s(next->thread.gcspr_el0, SYS_GCSPR_EL0);
+
+ if (current->thread.gcs_el0_mode != next->thread.gcs_el0_mode)
+ gcs_set_el0_mode(next);
+
+ /*
+ * Ensure that GCS memory effects of the 'prev' thread are
+ * ordered before other memory accesses with release semantics
+ * (or preceded by a DMB) on the current PE. In addition, any
+ * memory accesses with acquire semantics (or succeeded by a
+ * DMB) are ordered before GCS memory effects of the 'next'
+ * thread. This will ensure that the GCS memory effects are
+ * visible to other PEs in case of migration.
+ */
+ if (task_gcs_el0_enabled(current) || task_gcs_el0_enabled(next))
+ gcsb_dsync();
+}
+
+#else
+
+static void gcs_thread_switch(struct task_struct *next)
+{
+}
+
+#endif
+
/*
- * ARM erratum 1418040 handling, affecting the 32bit view of CNTVCT.
- * Ensure access is disabled when switching to a 32bit task, ensure
- * access is enabled when switching to a 64bit task.
+ * Handle sysreg updates for ARM erratum 1418040 which affects the 32bit view of
+ * CNTVCT, various other errata which require trapping all CNTVCT{,_EL0}
+ * accesses and prctl(PR_SET_TSC). Ensure access is disabled iff a workaround is
+ * required or PR_TSC_SIGSEGV is set.
*/
-static void erratum_1418040_thread_switch(struct task_struct *next)
+static void update_cntkctl_el1(struct task_struct *next)
{
- if (!IS_ENABLED(CONFIG_ARM64_ERRATUM_1418040) ||
- !this_cpu_has_cap(ARM64_WORKAROUND_1418040))
- return;
+ struct thread_info *ti = task_thread_info(next);
- if (is_compat_thread(task_thread_info(next)))
+ if (test_ti_thread_flag(ti, TIF_TSC_SIGSEGV) ||
+ has_erratum_handler(read_cntvct_el0) ||
+ (IS_ENABLED(CONFIG_ARM64_ERRATUM_1418040) &&
+ this_cpu_has_cap(ARM64_WORKAROUND_1418040) &&
+ is_compat_thread(ti)))
sysreg_clear_set(cntkctl_el1, ARCH_TIMER_USR_VCT_ACCESS_EN, 0);
else
sysreg_clear_set(cntkctl_el1, 0, ARCH_TIMER_USR_VCT_ACCESS_EN);
}
-static void erratum_1418040_new_exec(void)
+static void cntkctl_thread_switch(struct task_struct *prev,
+ struct task_struct *next)
{
+ if ((read_ti_thread_flags(task_thread_info(prev)) &
+ (_TIF_32BIT | _TIF_TSC_SIGSEGV)) !=
+ (read_ti_thread_flags(task_thread_info(next)) &
+ (_TIF_32BIT | _TIF_TSC_SIGSEGV)))
+ update_cntkctl_el1(next);
+}
+
+static int do_set_tsc_mode(unsigned int val)
+{
+ bool tsc_sigsegv;
+
+ if (val == PR_TSC_SIGSEGV)
+ tsc_sigsegv = true;
+ else if (val == PR_TSC_ENABLE)
+ tsc_sigsegv = false;
+ else
+ return -EINVAL;
+
preempt_disable();
- erratum_1418040_thread_switch(current);
+ update_thread_flag(TIF_TSC_SIGSEGV, tsc_sigsegv);
+ update_cntkctl_el1(current);
preempt_enable();
+
+ return 0;
+}
+
+static void permission_overlay_switch(struct task_struct *next)
+{
+ if (!system_supports_poe())
+ return;
+
+ current->thread.por_el0 = read_sysreg_s(SYS_POR_EL0);
+ if (current->thread.por_el0 != next->thread.por_el0) {
+ write_sysreg_s(next->thread.por_el0, SYS_POR_EL0);
+ /*
+ * No ISB required as we can tolerate spurious Overlay faults -
+ * the fault handler will check again based on the new value
+ * of POR_EL0.
+ */
+ }
}
/*
@@ -531,14 +713,17 @@ struct task_struct *__switch_to(struct task_struct *prev,
contextidr_thread_switch(next);
entry_task_switch(next);
ssbs_thread_switch(next);
- erratum_1418040_thread_switch(next);
+ cntkctl_thread_switch(prev, next);
ptrauth_thread_switch_user(next);
+ permission_overlay_switch(next);
+ gcs_thread_switch(next);
/*
- * Complete any pending TLB or cache maintenance on this CPU in case
- * the thread migrates to a different CPU.
- * This full barrier is also required by the membarrier system
- * call.
+ * Complete any pending TLB or cache maintenance on this CPU in case the
+ * thread migrates to a different CPU. This full barrier is also
+ * required by the membarrier system call. Additionally it makes any
+ * in-progress pgtable writes visible to the table walker; See
+ * emit_pte_barriers().
*/
dsb(ish);
@@ -648,7 +833,7 @@ void arch_setup_new_exec(void)
current->mm->context.flags = mmflags;
ptrauth_thread_init_user();
mte_thread_init_user();
- erratum_1418040_new_exec();
+ do_set_tsc_mode(PR_TSC_ENABLE);
if (task_spec_ssb_noexec(current)) {
arch_prctl_spec_ctrl_set(current, PR_SPEC_STORE_BYPASS,
@@ -670,10 +855,14 @@ long set_tagged_addr_ctrl(struct task_struct *task, unsigned long arg)
if (is_compat_thread(ti))
return -EINVAL;
- if (system_supports_mte())
+ if (system_supports_mte()) {
valid_mask |= PR_MTE_TCF_SYNC | PR_MTE_TCF_ASYNC \
| PR_MTE_TAG_MASK;
+ if (cpus_have_cap(ARM64_MTE_STORE_ONLY))
+ valid_mask |= PR_MTE_STORE_ONLY;
+ }
+
if (arg & ~valid_mask)
return -EINVAL;
@@ -714,7 +903,7 @@ long get_tagged_addr_ctrl(struct task_struct *task)
* disable it for tasks that already opted in to the relaxed ABI.
*/
-static struct ctl_table tagged_addr_sysctl_table[] = {
+static const struct ctl_table tagged_addr_sysctl_table[] = {
{
.procname = "tagged_addr_disabled",
.mode = 0644,
@@ -724,7 +913,6 @@ static struct ctl_table tagged_addr_sysctl_table[] = {
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE,
},
- { }
};
static int __init tagged_addr_init(void)
@@ -758,3 +946,26 @@ int arch_elf_adjust_prot(int prot, const struct arch_elf_state *state,
return prot;
}
#endif
+
+int get_tsc_mode(unsigned long adr)
+{
+ unsigned int val;
+
+ if (is_compat_task())
+ return -EINVAL;
+
+ if (test_thread_flag(TIF_TSC_SIGSEGV))
+ val = PR_TSC_SIGSEGV;
+ else
+ val = PR_TSC_ENABLE;
+
+ return put_user(val, (unsigned int __user *)adr);
+}
+
+int set_tsc_mode(unsigned int val)
+{
+ if (is_compat_task())
+ return -EINVAL;
+
+ return do_set_tsc_mode(val);
+}
diff --git a/arch/arm64/kernel/proton-pack.c b/arch/arm64/kernel/proton-pack.c
index 05f40c4e18fd..80a580e019c5 100644
--- a/arch/arm64/kernel/proton-pack.c
+++ b/arch/arm64/kernel/proton-pack.c
@@ -91,12 +91,7 @@ early_param("nospectre_v2", parse_spectre_v2_param);
static bool spectre_v2_mitigations_off(void)
{
- bool ret = __nospectre_v2 || cpu_mitigations_off();
-
- if (ret)
- pr_info_once("spectre-v2 mitigation disabled by command line option\n");
-
- return ret;
+ return __nospectre_v2 || cpu_mitigations_off();
}
static const char *get_bhb_affected_string(enum mitigation_state bhb_state)
@@ -172,7 +167,7 @@ static enum mitigation_state spectre_v2_get_cpu_hw_mitigation_state(void)
return SPECTRE_UNAFFECTED;
/* Alternatively, we have a list of unaffected CPUs */
- if (is_midr_in_range_list(read_cpuid_id(), spectre_v2_safe_list))
+ if (is_midr_in_range_list(spectre_v2_safe_list))
return SPECTRE_UNAFFECTED;
return SPECTRE_VULNERABLE;
@@ -331,7 +326,7 @@ bool has_spectre_v3a(const struct arm64_cpu_capabilities *entry, int scope)
};
WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible());
- return is_midr_in_range_list(read_cpuid_id(), spectre_v3a_unsafe_list);
+ return is_midr_in_range_list(spectre_v3a_unsafe_list);
}
void spectre_v3a_enable_mitigation(const struct arm64_cpu_capabilities *__unused)
@@ -421,13 +416,8 @@ early_param("ssbd", parse_spectre_v4_param);
*/
static bool spectre_v4_mitigations_off(void)
{
- bool ret = cpu_mitigations_off() ||
- __spectre_v4_policy == SPECTRE_V4_POLICY_MITIGATION_DISABLED;
-
- if (ret)
- pr_info_once("spectre-v4 mitigation disabled by command-line option\n");
-
- return ret;
+ return cpu_mitigations_off() ||
+ __spectre_v4_policy == SPECTRE_V4_POLICY_MITIGATION_DISABLED;
}
/* Do we need to toggle the mitigation state on entry to/exit from the kernel? */
@@ -475,7 +465,7 @@ static enum mitigation_state spectre_v4_get_cpu_hw_mitigation_state(void)
{ /* sentinel */ },
};
- if (is_midr_in_range_list(read_cpuid_id(), spectre_v4_safe_list))
+ if (is_midr_in_range_list(spectre_v4_safe_list))
return SPECTRE_UNAFFECTED;
/* CPU features are detected first */
@@ -558,6 +548,18 @@ static enum mitigation_state spectre_v4_enable_hw_mitigation(void)
/* SCTLR_EL1.DSSBS was initialised to 0 during boot */
set_pstate_ssbs(0);
+
+ /*
+ * SSBS is self-synchronizing and is intended to affect subsequent
+ * speculative instructions, but some CPUs can speculate with a stale
+ * value of SSBS.
+ *
+ * Mitigate this with an unconditional speculation barrier, as CPUs
+ * could mis-speculate branches and bypass a conditional barrier.
+ */
+ if (IS_ENABLED(CONFIG_ARM64_ERRATUM_3194386))
+ spec_bar();
+
return SPECTRE_MITIGATED;
}
@@ -833,52 +835,91 @@ static unsigned long system_bhb_mitigations;
* This must be called with SCOPE_LOCAL_CPU for each type of CPU, before any
* SCOPE_SYSTEM call will give the right answer.
*/
-u8 spectre_bhb_loop_affected(int scope)
+static bool is_spectre_bhb_safe(int scope)
+{
+ static const struct midr_range spectre_bhb_safe_list[] = {
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A35),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A53),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A55),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A510),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A520),
+ MIDR_ALL_VERSIONS(MIDR_BRAHMA_B53),
+ MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_2XX_SILVER),
+ MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_3XX_SILVER),
+ MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_4XX_SILVER),
+ {},
+ };
+ static bool all_safe = true;
+
+ if (scope != SCOPE_LOCAL_CPU)
+ return all_safe;
+
+ if (is_midr_in_range_list(spectre_bhb_safe_list))
+ return true;
+
+ all_safe = false;
+
+ return false;
+}
+
+static u8 spectre_bhb_loop_affected(void)
{
u8 k = 0;
- static u8 max_bhb_k;
-
- if (scope == SCOPE_LOCAL_CPU) {
- static const struct midr_range spectre_bhb_k32_list[] = {
- MIDR_ALL_VERSIONS(MIDR_CORTEX_A78),
- MIDR_ALL_VERSIONS(MIDR_CORTEX_A78AE),
- MIDR_ALL_VERSIONS(MIDR_CORTEX_A78C),
- MIDR_ALL_VERSIONS(MIDR_CORTEX_X1),
- MIDR_ALL_VERSIONS(MIDR_CORTEX_A710),
- MIDR_ALL_VERSIONS(MIDR_CORTEX_X2),
- MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2),
- MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V1),
- {},
- };
- static const struct midr_range spectre_bhb_k24_list[] = {
- MIDR_ALL_VERSIONS(MIDR_CORTEX_A76),
- MIDR_ALL_VERSIONS(MIDR_CORTEX_A77),
- MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N1),
- {},
- };
- static const struct midr_range spectre_bhb_k11_list[] = {
- MIDR_ALL_VERSIONS(MIDR_AMPERE1),
- {},
- };
- static const struct midr_range spectre_bhb_k8_list[] = {
- MIDR_ALL_VERSIONS(MIDR_CORTEX_A72),
- MIDR_ALL_VERSIONS(MIDR_CORTEX_A57),
- {},
- };
-
- if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_k32_list))
- k = 32;
- else if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_k24_list))
- k = 24;
- else if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_k11_list))
- k = 11;
- else if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_k8_list))
- k = 8;
-
- max_bhb_k = max(max_bhb_k, k);
- } else {
- k = max_bhb_k;
- }
+
+ static const struct midr_range spectre_bhb_k132_list[] = {
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_X3),
+ MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V2),
+ {},
+ };
+ static const struct midr_range spectre_bhb_k38_list[] = {
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A715),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A720),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A720AE),
+ {},
+ };
+ static const struct midr_range spectre_bhb_k32_list[] = {
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A78),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A78AE),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A78C),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_X1),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_X1C),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A710),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_X2),
+ MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2),
+ MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V1),
+ {},
+ };
+ static const struct midr_range spectre_bhb_k24_list[] = {
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A76),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A76AE),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A77),
+ MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N1),
+ MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_4XX_GOLD),
+ MIDR_ALL_VERSIONS(MIDR_HISI_HIP09),
+ {},
+ };
+ static const struct midr_range spectre_bhb_k11_list[] = {
+ MIDR_ALL_VERSIONS(MIDR_AMPERE1),
+ {},
+ };
+ static const struct midr_range spectre_bhb_k8_list[] = {
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A72),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A57),
+ {},
+ };
+
+ if (is_midr_in_range_list(spectre_bhb_k132_list))
+ k = 132;
+ else if (is_midr_in_range_list(spectre_bhb_k38_list))
+ k = 38;
+ else if (is_midr_in_range_list(spectre_bhb_k32_list))
+ k = 32;
+ else if (is_midr_in_range_list(spectre_bhb_k24_list))
+ k = 24;
+ else if (is_midr_in_range_list(spectre_bhb_k11_list))
+ k = 11;
+ else if (is_midr_in_range_list(spectre_bhb_k8_list))
+ k = 8;
return k;
}
@@ -904,29 +945,13 @@ static enum mitigation_state spectre_bhb_get_cpu_fw_mitigation_state(void)
}
}
-static bool is_spectre_bhb_fw_affected(int scope)
+static bool has_spectre_bhb_fw_mitigation(void)
{
- static bool system_affected;
enum mitigation_state fw_state;
bool has_smccc = arm_smccc_1_1_get_conduit() != SMCCC_CONDUIT_NONE;
- static const struct midr_range spectre_bhb_firmware_mitigated_list[] = {
- MIDR_ALL_VERSIONS(MIDR_CORTEX_A73),
- MIDR_ALL_VERSIONS(MIDR_CORTEX_A75),
- {},
- };
- bool cpu_in_list = is_midr_in_range_list(read_cpuid_id(),
- spectre_bhb_firmware_mitigated_list);
-
- if (scope != SCOPE_LOCAL_CPU)
- return system_affected;
fw_state = spectre_bhb_get_cpu_fw_mitigation_state();
- if (cpu_in_list || (has_smccc && fw_state == SPECTRE_MITIGATED)) {
- system_affected = true;
- return true;
- }
-
- return false;
+ return has_smccc && fw_state == SPECTRE_MITIGATED;
}
static bool supports_ecbhb(int scope)
@@ -942,6 +967,8 @@ static bool supports_ecbhb(int scope)
ID_AA64MMFR1_EL1_ECBHB_SHIFT);
}
+static u8 max_bhb_k;
+
bool is_spectre_bhb_affected(const struct arm64_cpu_capabilities *entry,
int scope)
{
@@ -950,16 +977,23 @@ bool is_spectre_bhb_affected(const struct arm64_cpu_capabilities *entry,
if (supports_csv2p3(scope))
return false;
- if (supports_clearbhb(scope))
- return true;
+ if (is_spectre_bhb_safe(scope))
+ return false;
- if (spectre_bhb_loop_affected(scope))
- return true;
+ /*
+ * At this point the core isn't known to be "safe" so we're going to
+ * assume it's vulnerable. We still need to update `max_bhb_k` though,
+ * but only if we aren't mitigating with clearbhb though.
+ */
+ if (scope == SCOPE_LOCAL_CPU && !supports_clearbhb(SCOPE_LOCAL_CPU))
+ max_bhb_k = max(max_bhb_k, spectre_bhb_loop_affected());
- if (is_spectre_bhb_fw_affected(scope))
- return true;
+ return true;
+}
- return false;
+u8 get_spectre_bhb_loop_value(void)
+{
+ return max_bhb_k;
}
static void this_cpu_set_vectors(enum arm64_bp_harden_el1_vectors slot)
@@ -972,14 +1006,14 @@ static void this_cpu_set_vectors(enum arm64_bp_harden_el1_vectors slot)
* When KPTI is in use, the vectors are switched when exiting to
* user-space.
*/
- if (arm64_kernel_unmapped_at_el0())
+ if (cpus_have_cap(ARM64_UNMAP_KERNEL_AT_EL0))
return;
write_sysreg(v, vbar_el1);
isb();
}
-static bool __read_mostly __nospectre_bhb;
+bool __read_mostly __nospectre_bhb;
static int __init parse_spectre_bhb_param(char *str)
{
__nospectre_bhb = true;
@@ -990,7 +1024,7 @@ early_param("nospectre_bhb", parse_spectre_bhb_param);
void spectre_bhb_enable_mitigation(const struct arm64_cpu_capabilities *entry)
{
bp_hardening_cb_t cpu_cb;
- enum mitigation_state fw_state, state = SPECTRE_VULNERABLE;
+ enum mitigation_state state = SPECTRE_VULNERABLE;
struct bp_hardening_data *data = this_cpu_ptr(&bp_hardening_data);
if (!is_spectre_bhb_affected(entry, SCOPE_LOCAL_CPU))
@@ -999,9 +1033,7 @@ void spectre_bhb_enable_mitigation(const struct arm64_cpu_capabilities *entry)
if (arm64_get_spectre_v2_state() == SPECTRE_VULNERABLE) {
/* No point mitigating Spectre-BHB alone. */
} else if (!IS_ENABLED(CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY)) {
- pr_info_once("spectre-bhb mitigation disabled by compile time option\n");
- } else if (cpu_mitigations_off() || __nospectre_bhb) {
- pr_info_once("spectre-bhb mitigation disabled by command line option\n");
+ /* Do nothing */
} else if (supports_ecbhb(SCOPE_LOCAL_CPU)) {
state = SPECTRE_MITIGATED;
set_bit(BHB_HW, &system_bhb_mitigations);
@@ -1016,7 +1048,7 @@ void spectre_bhb_enable_mitigation(const struct arm64_cpu_capabilities *entry)
this_cpu_set_vectors(EL1_VECTOR_BHB_CLEAR_INSN);
state = SPECTRE_MITIGATED;
set_bit(BHB_INSN, &system_bhb_mitigations);
- } else if (spectre_bhb_loop_affected(SCOPE_LOCAL_CPU)) {
+ } else if (spectre_bhb_loop_affected()) {
/*
* Ensure KVM uses the indirect vector which will have the
* branchy-loop added. A57/A72-r0 will already have selected
@@ -1029,37 +1061,39 @@ void spectre_bhb_enable_mitigation(const struct arm64_cpu_capabilities *entry)
this_cpu_set_vectors(EL1_VECTOR_BHB_LOOP);
state = SPECTRE_MITIGATED;
set_bit(BHB_LOOP, &system_bhb_mitigations);
- } else if (is_spectre_bhb_fw_affected(SCOPE_LOCAL_CPU)) {
- fw_state = spectre_bhb_get_cpu_fw_mitigation_state();
- if (fw_state == SPECTRE_MITIGATED) {
- /*
- * Ensure KVM uses one of the spectre bp_hardening
- * vectors. The indirect vector doesn't include the EL3
- * call, so needs upgrading to
- * HYP_VECTOR_SPECTRE_INDIRECT.
- */
- if (!data->slot || data->slot == HYP_VECTOR_INDIRECT)
- data->slot += 1;
-
- this_cpu_set_vectors(EL1_VECTOR_BHB_FW);
-
- /*
- * The WA3 call in the vectors supersedes the WA1 call
- * made during context-switch. Uninstall any firmware
- * bp_hardening callback.
- */
- cpu_cb = spectre_v2_get_sw_mitigation_cb();
- if (__this_cpu_read(bp_hardening_data.fn) != cpu_cb)
- __this_cpu_write(bp_hardening_data.fn, NULL);
-
- state = SPECTRE_MITIGATED;
- set_bit(BHB_FW, &system_bhb_mitigations);
- }
+ } else if (has_spectre_bhb_fw_mitigation()) {
+ /*
+ * Ensure KVM uses one of the spectre bp_hardening
+ * vectors. The indirect vector doesn't include the EL3
+ * call, so needs upgrading to
+ * HYP_VECTOR_SPECTRE_INDIRECT.
+ */
+ if (!data->slot || data->slot == HYP_VECTOR_INDIRECT)
+ data->slot += 1;
+
+ this_cpu_set_vectors(EL1_VECTOR_BHB_FW);
+
+ /*
+ * The WA3 call in the vectors supersedes the WA1 call
+ * made during context-switch. Uninstall any firmware
+ * bp_hardening callback.
+ */
+ cpu_cb = spectre_v2_get_sw_mitigation_cb();
+ if (__this_cpu_read(bp_hardening_data.fn) != cpu_cb)
+ __this_cpu_write(bp_hardening_data.fn, NULL);
+
+ state = SPECTRE_MITIGATED;
+ set_bit(BHB_FW, &system_bhb_mitigations);
}
update_mitigation_state(&spectre_bhb_state, state);
}
+bool is_spectre_bhb_fw_mitigated(void)
+{
+ return test_bit(BHB_FW, &system_bhb_mitigations);
+}
+
/* Patched to NOP when enabled */
void noinstr spectre_bhb_patch_loop_mitigation_enable(struct alt_instr *alt,
__le32 *origptr,
@@ -1088,7 +1122,6 @@ void noinstr spectre_bhb_patch_loop_iter(struct alt_instr *alt,
{
u8 rd;
u32 insn;
- u16 loop_count = spectre_bhb_loop_affected(SCOPE_SYSTEM);
BUG_ON(nr_inst != 1); /* MOV -> MOV */
@@ -1097,7 +1130,7 @@ void noinstr spectre_bhb_patch_loop_iter(struct alt_instr *alt,
insn = le32_to_cpu(*origptr);
rd = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RD, insn);
- insn = aarch64_insn_gen_movewide(rd, loop_count, 0,
+ insn = aarch64_insn_gen_movewide(rd, max_bhb_k, 0,
AARCH64_INSN_VARIANT_64BIT,
AARCH64_INSN_MOVEWIDE_ZERO);
*updptr++ = cpu_to_le32(insn);
@@ -1154,3 +1187,18 @@ void unpriv_ebpf_notify(int new_state)
pr_err("WARNING: %s", EBPF_WARN);
}
#endif
+
+void spectre_print_disabled_mitigations(void)
+{
+ /* Keep a single copy of the common message suffix to avoid duplication. */
+ const char *spectre_disabled_suffix = "mitigation disabled by command-line option\n";
+
+ if (spectre_v2_mitigations_off())
+ pr_info("spectre-v2 %s", spectre_disabled_suffix);
+
+ if (spectre_v4_mitigations_off())
+ pr_info("spectre-v4 %s", spectre_disabled_suffix);
+
+ if (__nospectre_bhb || cpu_mitigations_off())
+ pr_info("spectre-bhb %s", spectre_disabled_suffix);
+}
diff --git a/arch/arm64/kernel/psci.c b/arch/arm64/kernel/psci.c
index 29a8e444db83..fabd732d0a2d 100644
--- a/arch/arm64/kernel/psci.c
+++ b/arch/arm64/kernel/psci.c
@@ -40,7 +40,7 @@ static int cpu_psci_cpu_boot(unsigned int cpu)
{
phys_addr_t pa_secondary_entry = __pa_symbol(secondary_entry);
int err = psci_ops.cpu_on(cpu_logical_map(cpu), pa_secondary_entry);
- if (err)
+ if (err && err != -EPERM)
pr_err("failed to boot CPU%d (%d)\n", cpu, err);
return err;
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index 20d7ef82de90..b9bdd83fbbca 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -28,11 +28,13 @@
#include <linux/hw_breakpoint.h>
#include <linux/regset.h>
#include <linux/elf.h>
+#include <linux/rseq.h>
#include <asm/compat.h>
#include <asm/cpufeature.h>
#include <asm/debug-monitors.h>
#include <asm/fpsimd.h>
+#include <asm/gcs.h>
#include <asm/mte.h>
#include <asm/pointer_auth.h>
#include <asm/stacktrace.h>
@@ -139,7 +141,7 @@ unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, unsigned int n)
addr += n;
if (regs_within_kernel_stack(regs, (unsigned long)addr))
- return *addr;
+ return READ_ONCE_NOCHECK(*addr);
else
return 0;
}
@@ -173,7 +175,6 @@ static void ptrace_hbptriggered(struct perf_event *bp,
struct arch_hw_breakpoint *bkpt = counter_arch_bp(bp);
const char *desc = "Hardware breakpoint trap (ptrace)";
-#ifdef CONFIG_COMPAT
if (is_compat_task()) {
int si_errno = 0;
int i;
@@ -195,7 +196,7 @@ static void ptrace_hbptriggered(struct perf_event *bp,
desc);
return;
}
-#endif
+
arm64_force_sig_fault(SIGTRAP, TRAP_HWBKPT, bkpt->trigger, desc);
}
@@ -593,7 +594,7 @@ static int __fpr_get(struct task_struct *target,
{
struct user_fpsimd_state *uregs;
- sve_sync_to_fpsimd(target);
+ fpsimd_sync_from_effective_state(target);
uregs = &target->thread.uw.fpsimd_state;
@@ -625,7 +626,7 @@ static int __fpr_set(struct task_struct *target,
* Ensure target->thread.uw.fpsimd_state is up to date, so that a
* short copyin can't resurrect stale data.
*/
- sve_sync_to_fpsimd(target);
+ fpsimd_sync_from_effective_state(target);
newstate = target->thread.uw.fpsimd_state;
@@ -652,7 +653,7 @@ static int fpr_set(struct task_struct *target, const struct user_regset *regset,
if (ret)
return ret;
- sve_sync_from_fpsimd_zeropad(target);
+ fpsimd_sync_to_effective_state_zeropad(target);
fpsimd_flush_task_state(target);
return ret;
@@ -697,6 +698,41 @@ static int tls_set(struct task_struct *target, const struct user_regset *regset,
return ret;
}
+static int fpmr_get(struct task_struct *target, const struct user_regset *regset,
+ struct membuf to)
+{
+ if (!system_supports_fpmr())
+ return -EINVAL;
+
+ if (target == current)
+ fpsimd_preserve_current_state();
+
+ return membuf_store(&to, target->thread.uw.fpmr);
+}
+
+static int fpmr_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ int ret;
+ unsigned long fpmr;
+
+ if (!system_supports_fpmr())
+ return -EINVAL;
+
+ fpmr = target->thread.uw.fpmr;
+
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &fpmr, 0, count);
+ if (ret)
+ return ret;
+
+ target->thread.uw.fpmr = fpmr;
+
+ fpsimd_flush_task_state(target);
+
+ return 0;
+}
+
static int system_call_get(struct task_struct *target,
const struct user_regset *regset,
struct membuf to)
@@ -728,7 +764,6 @@ static void sve_init_header_from_task(struct user_sve_header *header,
{
unsigned int vq;
bool active;
- bool fpsimd_only;
enum vec_type task_type;
memset(header, 0, sizeof(*header));
@@ -740,35 +775,33 @@ static void sve_init_header_from_task(struct user_sve_header *header,
task_type = ARM64_VEC_SVE;
active = (task_type == type);
+ if (active && target->thread.fp_type == FP_STATE_SVE)
+ header->flags = SVE_PT_REGS_SVE;
+ else
+ header->flags = SVE_PT_REGS_FPSIMD;
+
switch (type) {
case ARM64_VEC_SVE:
if (test_tsk_thread_flag(target, TIF_SVE_VL_INHERIT))
header->flags |= SVE_PT_VL_INHERIT;
- fpsimd_only = !test_tsk_thread_flag(target, TIF_SVE);
break;
case ARM64_VEC_SME:
if (test_tsk_thread_flag(target, TIF_SME_VL_INHERIT))
header->flags |= SVE_PT_VL_INHERIT;
- fpsimd_only = false;
break;
default:
WARN_ON_ONCE(1);
return;
}
- if (active) {
- if (fpsimd_only) {
- header->flags |= SVE_PT_REGS_FPSIMD;
- } else {
- header->flags |= SVE_PT_REGS_SVE;
- }
- }
-
header->vl = task_get_vl(target, type);
vq = sve_vq_from_vl(header->vl);
header->max_vl = vec_max_vl(type);
- header->size = SVE_PT_SIZE(vq, header->flags);
+ if (active)
+ header->size = SVE_PT_SIZE(vq, header->flags);
+ else
+ header->size = sizeof(header);
header->max_size = SVE_PT_SIZE(sve_vq_from_vl(header->max_vl),
SVE_PT_REGS_SVE);
}
@@ -787,18 +820,25 @@ static int sve_get_common(struct task_struct *target,
unsigned int vq;
unsigned long start, end;
+ if (target == current)
+ fpsimd_preserve_current_state();
+
/* Header */
sve_init_header_from_task(&header, target, type);
vq = sve_vq_from_vl(header.vl);
membuf_write(&to, &header, sizeof(header));
- if (target == current)
- fpsimd_preserve_current_state();
-
BUILD_BUG_ON(SVE_PT_FPSIMD_OFFSET != sizeof(header));
BUILD_BUG_ON(SVE_PT_SVE_OFFSET != sizeof(header));
+ /*
+ * When the requested vector type is not active, do not present data
+ * from the other mode to userspace.
+ */
+ if (header.size == sizeof(header))
+ return 0;
+
switch ((header.flags & SVE_PT_REGS_MASK)) {
case SVE_PT_REGS_FPSIMD:
return __fpr_get(target, regset, to);
@@ -826,7 +866,7 @@ static int sve_get_common(struct task_struct *target,
return membuf_zero(&to, end - start);
default:
- return 0;
+ BUILD_BUG();
}
}
@@ -850,6 +890,9 @@ static int sve_set_common(struct task_struct *target,
struct user_sve_header header;
unsigned int vq;
unsigned long start, end;
+ bool fpsimd;
+
+ fpsimd_flush_task_state(target);
/* Header */
if (count < sizeof(header))
@@ -857,97 +900,116 @@ static int sve_set_common(struct task_struct *target,
ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &header,
0, sizeof(header));
if (ret)
- goto out;
+ return ret;
/*
- * Apart from SVE_PT_REGS_MASK, all SVE_PT_* flags are consumed by
- * vec_set_vector_length(), which will also validate them for us:
+ * Streaming SVE data is always stored and presented in SVE format.
+ * Require the user to provide SVE formatted data for consistency, and
+ * to avoid the risk that we configure the task into an invalid state.
*/
- ret = vec_set_vector_length(target, type, header.vl,
- ((unsigned long)header.flags & ~SVE_PT_REGS_MASK) << 16);
- if (ret)
- goto out;
+ fpsimd = (header.flags & SVE_PT_REGS_MASK) == SVE_PT_REGS_FPSIMD;
+ if (fpsimd && type == ARM64_VEC_SME)
+ return -EINVAL;
+
+ /*
+ * On systems without SVE we accept FPSIMD format writes with
+ * a VL of 0 to allow exiting streaming mode, otherwise a VL
+ * is required.
+ */
+ if (header.vl) {
+ /*
+ * If the system does not support SVE we can't
+ * configure a SVE VL.
+ */
+ if (!system_supports_sve() && type == ARM64_VEC_SVE)
+ return -EINVAL;
+
+ /*
+ * Apart from SVE_PT_REGS_MASK, all SVE_PT_* flags are
+ * consumed by vec_set_vector_length(), which will
+ * also validate them for us:
+ */
+ ret = vec_set_vector_length(target, type, header.vl,
+ ((unsigned long)header.flags & ~SVE_PT_REGS_MASK) << 16);
+ if (ret)
+ return ret;
+ } else {
+ /* If the system supports SVE we require a VL. */
+ if (system_supports_sve())
+ return -EINVAL;
- /* Actual VL set may be less than the user asked for: */
+ /*
+ * Only FPSIMD formatted data with no flags set is
+ * supported.
+ */
+ if (header.flags != SVE_PT_REGS_FPSIMD)
+ return -EINVAL;
+ }
+
+ /* Allocate SME storage if necessary, preserving any existing ZA/ZT state */
+ if (type == ARM64_VEC_SME) {
+ sme_alloc(target, false);
+ if (!target->thread.sme_state)
+ return -ENOMEM;
+ }
+
+ /* Allocate SVE storage if necessary, zeroing any existing SVE state */
+ if (!fpsimd) {
+ sve_alloc(target, true);
+ if (!target->thread.sve_state)
+ return -ENOMEM;
+ }
+
+ /*
+ * Actual VL set may be different from what the user asked
+ * for, or we may have configured the _ONEXEC VL not the
+ * current VL:
+ */
vq = sve_vq_from_vl(task_get_vl(target, type));
/* Enter/exit streaming mode */
if (system_supports_sme()) {
- u64 old_svcr = target->thread.svcr;
-
switch (type) {
case ARM64_VEC_SVE:
target->thread.svcr &= ~SVCR_SM_MASK;
+ set_tsk_thread_flag(target, TIF_SVE);
break;
case ARM64_VEC_SME:
target->thread.svcr |= SVCR_SM_MASK;
-
- /*
- * Disable traps and ensure there is SME storage but
- * preserve any currently set values in ZA/ZT.
- */
- sme_alloc(target, false);
set_tsk_thread_flag(target, TIF_SME);
break;
default:
WARN_ON_ONCE(1);
- ret = -EINVAL;
- goto out;
+ return -EINVAL;
}
-
- /*
- * If we switched then invalidate any existing SVE
- * state and ensure there's storage.
- */
- if (target->thread.svcr != old_svcr)
- sve_alloc(target, true);
}
+ /* Always zero V regs, FPSR, and FPCR */
+ memset(&current->thread.uw.fpsimd_state, 0,
+ sizeof(current->thread.uw.fpsimd_state));
+
/* Registers: FPSIMD-only case */
BUILD_BUG_ON(SVE_PT_FPSIMD_OFFSET != sizeof(header));
- if ((header.flags & SVE_PT_REGS_MASK) == SVE_PT_REGS_FPSIMD) {
- ret = __fpr_set(target, regset, pos, count, kbuf, ubuf,
- SVE_PT_FPSIMD_OFFSET);
+ if (fpsimd) {
clear_tsk_thread_flag(target, TIF_SVE);
target->thread.fp_type = FP_STATE_FPSIMD;
- goto out;
+ ret = __fpr_set(target, regset, pos, count, kbuf, ubuf,
+ SVE_PT_FPSIMD_OFFSET);
+ return ret;
}
- /*
- * Otherwise: no registers or full SVE case. For backwards
- * compatibility reasons we treat empty flags as SVE registers.
- */
+ /* Otherwise: no registers or full SVE case. */
+
+ target->thread.fp_type = FP_STATE_SVE;
/*
* If setting a different VL from the requested VL and there is
* register data, the data layout will be wrong: don't even
* try to set the registers in this case.
*/
- if (count && vq != sve_vq_from_vl(header.vl)) {
- ret = -EIO;
- goto out;
- }
-
- sve_alloc(target, true);
- if (!target->thread.sve_state) {
- ret = -ENOMEM;
- clear_tsk_thread_flag(target, TIF_SVE);
- target->thread.fp_type = FP_STATE_FPSIMD;
- goto out;
- }
-
- /*
- * Ensure target->thread.sve_state is up to date with target's
- * FPSIMD regs, so that a short copyin leaves trailing
- * registers unmodified. Only enable SVE if we are
- * configuring normal SVE, a system with streaming SVE may not
- * have normal SVE.
- */
- fpsimd_sync_to_sve(target);
- if (type == ARM64_VEC_SVE)
- set_tsk_thread_flag(target, TIF_SVE);
- target->thread.fp_type = FP_STATE_SVE;
+ if (count && vq != sve_vq_from_vl(header.vl))
+ return -EIO;
BUILD_BUG_ON(SVE_PT_SVE_OFFSET != sizeof(header));
start = SVE_PT_SVE_OFFSET;
@@ -956,7 +1018,7 @@ static int sve_set_common(struct task_struct *target,
target->thread.sve_state,
start, end);
if (ret)
- goto out;
+ return ret;
start = end;
end = SVE_PT_SVE_FPSR_OFFSET(vq);
@@ -972,8 +1034,6 @@ static int sve_set_common(struct task_struct *target,
&target->thread.uw.fpsimd_state.fpsr,
start, end);
-out:
- fpsimd_flush_task_state(target);
return ret;
}
@@ -982,7 +1042,7 @@ static int sve_set(struct task_struct *target,
unsigned int pos, unsigned int count,
const void *kbuf, const void __user *ubuf)
{
- if (!system_supports_sve())
+ if (!system_supports_sve() && !system_supports_sme())
return -EINVAL;
return sve_set_common(target, regset, pos, count, kbuf, ubuf,
@@ -1095,7 +1155,11 @@ static int za_set(struct task_struct *target,
if (ret)
goto out;
- /* Actual VL set may be less than the user asked for: */
+ /*
+ * Actual VL set may be different from what the user asked
+ * for, or we may have configured the _ONEXEC rather than
+ * current VL:
+ */
vq = sve_vq_from_vl(task_get_sme_vl(target));
/* Ensure there is some SVE storage for streaming mode */
@@ -1107,12 +1171,13 @@ static int za_set(struct task_struct *target,
}
}
- /* Allocate/reinit ZA storage */
- sme_alloc(target, true);
- if (!target->thread.sme_state) {
- ret = -ENOMEM;
- goto out;
- }
+ /*
+ * Only flush the storage if PSTATE.ZA was not already set,
+ * otherwise preserve any existing data.
+ */
+ sme_alloc(target, !thread_za_enabled(&target->thread));
+ if (!target->thread.sme_state)
+ return -ENOMEM;
/* If there is no data then disable ZA */
if (!count) {
@@ -1387,7 +1452,7 @@ static int tagged_addr_ctrl_get(struct task_struct *target,
{
long ctrl = get_tagged_addr_ctrl(target);
- if (IS_ERR_VALUE(ctrl))
+ if (WARN_ON_ONCE(IS_ERR_VALUE(ctrl)))
return ctrl;
return membuf_write(&to, &ctrl, sizeof(ctrl));
@@ -1401,6 +1466,10 @@ static int tagged_addr_ctrl_set(struct task_struct *target, const struct
int ret;
long ctrl;
+ ctrl = get_tagged_addr_ctrl(target);
+ if (WARN_ON_ONCE(IS_ERR_VALUE(ctrl)))
+ return ctrl;
+
ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &ctrl, 0, -1);
if (ret)
return ret;
@@ -1409,6 +1478,101 @@ static int tagged_addr_ctrl_set(struct task_struct *target, const struct
}
#endif
+#ifdef CONFIG_ARM64_POE
+static int poe_get(struct task_struct *target,
+ const struct user_regset *regset,
+ struct membuf to)
+{
+ if (!system_supports_poe())
+ return -EINVAL;
+
+ return membuf_write(&to, &target->thread.por_el0,
+ sizeof(target->thread.por_el0));
+}
+
+static int poe_set(struct task_struct *target, const struct
+ user_regset *regset, unsigned int pos,
+ unsigned int count, const void *kbuf, const
+ void __user *ubuf)
+{
+ int ret;
+ long ctrl;
+
+ if (!system_supports_poe())
+ return -EINVAL;
+
+ ctrl = target->thread.por_el0;
+
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &ctrl, 0, -1);
+ if (ret)
+ return ret;
+
+ target->thread.por_el0 = ctrl;
+
+ return 0;
+}
+#endif
+
+#ifdef CONFIG_ARM64_GCS
+static void task_gcs_to_user(struct user_gcs *user_gcs,
+ const struct task_struct *target)
+{
+ user_gcs->features_enabled = target->thread.gcs_el0_mode;
+ user_gcs->features_locked = target->thread.gcs_el0_locked;
+ user_gcs->gcspr_el0 = target->thread.gcspr_el0;
+}
+
+static void task_gcs_from_user(struct task_struct *target,
+ const struct user_gcs *user_gcs)
+{
+ target->thread.gcs_el0_mode = user_gcs->features_enabled;
+ target->thread.gcs_el0_locked = user_gcs->features_locked;
+ target->thread.gcspr_el0 = user_gcs->gcspr_el0;
+}
+
+static int gcs_get(struct task_struct *target,
+ const struct user_regset *regset,
+ struct membuf to)
+{
+ struct user_gcs user_gcs;
+
+ if (!system_supports_gcs())
+ return -EINVAL;
+
+ if (target == current)
+ gcs_preserve_current_state();
+
+ task_gcs_to_user(&user_gcs, target);
+
+ return membuf_write(&to, &user_gcs, sizeof(user_gcs));
+}
+
+static int gcs_set(struct task_struct *target, const struct
+ user_regset *regset, unsigned int pos,
+ unsigned int count, const void *kbuf, const
+ void __user *ubuf)
+{
+ int ret;
+ struct user_gcs user_gcs;
+
+ if (!system_supports_gcs())
+ return -EINVAL;
+
+ task_gcs_to_user(&user_gcs, target);
+
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &user_gcs, 0, -1);
+ if (ret)
+ return ret;
+
+ if (user_gcs.features_enabled & ~PR_SHADOW_STACK_SUPPORTED_STATUS_MASK)
+ return -EINVAL;
+
+ task_gcs_from_user(target, &user_gcs);
+
+ return 0;
+}
+#endif
+
enum aarch64_regset {
REGSET_GPR,
REGSET_FPR,
@@ -1417,6 +1581,7 @@ enum aarch64_regset {
REGSET_HW_BREAK,
REGSET_HW_WATCH,
#endif
+ REGSET_FPMR,
REGSET_SYSTEM_CALL,
#ifdef CONFIG_ARM64_SVE
REGSET_SVE,
@@ -1437,11 +1602,17 @@ enum aarch64_regset {
#ifdef CONFIG_ARM64_TAGGED_ADDR_ABI
REGSET_TAGGED_ADDR_CTRL,
#endif
+#ifdef CONFIG_ARM64_POE
+ REGSET_POE,
+#endif
+#ifdef CONFIG_ARM64_GCS
+ REGSET_GCS,
+#endif
};
static const struct user_regset aarch64_regsets[] = {
[REGSET_GPR] = {
- .core_note_type = NT_PRSTATUS,
+ USER_REGSET_NOTE_TYPE(PRSTATUS),
.n = sizeof(struct user_pt_regs) / sizeof(u64),
.size = sizeof(u64),
.align = sizeof(u64),
@@ -1449,7 +1620,7 @@ static const struct user_regset aarch64_regsets[] = {
.set = gpr_set
},
[REGSET_FPR] = {
- .core_note_type = NT_PRFPREG,
+ USER_REGSET_NOTE_TYPE(PRFPREG),
.n = sizeof(struct user_fpsimd_state) / sizeof(u32),
/*
* We pretend we have 32-bit registers because the fpsr and
@@ -1462,7 +1633,7 @@ static const struct user_regset aarch64_regsets[] = {
.set = fpr_set
},
[REGSET_TLS] = {
- .core_note_type = NT_ARM_TLS,
+ USER_REGSET_NOTE_TYPE(ARM_TLS),
.n = 2,
.size = sizeof(void *),
.align = sizeof(void *),
@@ -1471,7 +1642,7 @@ static const struct user_regset aarch64_regsets[] = {
},
#ifdef CONFIG_HAVE_HW_BREAKPOINT
[REGSET_HW_BREAK] = {
- .core_note_type = NT_ARM_HW_BREAK,
+ USER_REGSET_NOTE_TYPE(ARM_HW_BREAK),
.n = sizeof(struct user_hwdebug_state) / sizeof(u32),
.size = sizeof(u32),
.align = sizeof(u32),
@@ -1479,7 +1650,7 @@ static const struct user_regset aarch64_regsets[] = {
.set = hw_break_set,
},
[REGSET_HW_WATCH] = {
- .core_note_type = NT_ARM_HW_WATCH,
+ USER_REGSET_NOTE_TYPE(ARM_HW_WATCH),
.n = sizeof(struct user_hwdebug_state) / sizeof(u32),
.size = sizeof(u32),
.align = sizeof(u32),
@@ -1488,17 +1659,26 @@ static const struct user_regset aarch64_regsets[] = {
},
#endif
[REGSET_SYSTEM_CALL] = {
- .core_note_type = NT_ARM_SYSTEM_CALL,
+ USER_REGSET_NOTE_TYPE(ARM_SYSTEM_CALL),
.n = 1,
.size = sizeof(int),
.align = sizeof(int),
.regset_get = system_call_get,
.set = system_call_set,
},
+ [REGSET_FPMR] = {
+ USER_REGSET_NOTE_TYPE(ARM_FPMR),
+ .n = 1,
+ .size = sizeof(u64),
+ .align = sizeof(u64),
+ .regset_get = fpmr_get,
+ .set = fpmr_set,
+ },
#ifdef CONFIG_ARM64_SVE
[REGSET_SVE] = { /* Scalable Vector Extension */
- .core_note_type = NT_ARM_SVE,
- .n = DIV_ROUND_UP(SVE_PT_SIZE(SVE_VQ_MAX, SVE_PT_REGS_SVE),
+ USER_REGSET_NOTE_TYPE(ARM_SVE),
+ .n = DIV_ROUND_UP(SVE_PT_SIZE(ARCH_SVE_VQ_MAX,
+ SVE_PT_REGS_SVE),
SVE_VQ_BYTES),
.size = SVE_VQ_BYTES,
.align = SVE_VQ_BYTES,
@@ -1508,7 +1688,7 @@ static const struct user_regset aarch64_regsets[] = {
#endif
#ifdef CONFIG_ARM64_SME
[REGSET_SSVE] = { /* Streaming mode SVE */
- .core_note_type = NT_ARM_SSVE,
+ USER_REGSET_NOTE_TYPE(ARM_SSVE),
.n = DIV_ROUND_UP(SVE_PT_SIZE(SME_VQ_MAX, SVE_PT_REGS_SVE),
SVE_VQ_BYTES),
.size = SVE_VQ_BYTES,
@@ -1517,7 +1697,7 @@ static const struct user_regset aarch64_regsets[] = {
.set = ssve_set,
},
[REGSET_ZA] = { /* SME ZA */
- .core_note_type = NT_ARM_ZA,
+ USER_REGSET_NOTE_TYPE(ARM_ZA),
/*
* ZA is a single register but it's variably sized and
* the ptrace core requires that the size of any data
@@ -1533,7 +1713,7 @@ static const struct user_regset aarch64_regsets[] = {
.set = za_set,
},
[REGSET_ZT] = { /* SME ZT */
- .core_note_type = NT_ARM_ZT,
+ USER_REGSET_NOTE_TYPE(ARM_ZT),
.n = 1,
.size = ZT_SIG_REG_BYTES,
.align = sizeof(u64),
@@ -1543,7 +1723,7 @@ static const struct user_regset aarch64_regsets[] = {
#endif
#ifdef CONFIG_ARM64_PTR_AUTH
[REGSET_PAC_MASK] = {
- .core_note_type = NT_ARM_PAC_MASK,
+ USER_REGSET_NOTE_TYPE(ARM_PAC_MASK),
.n = sizeof(struct user_pac_mask) / sizeof(u64),
.size = sizeof(u64),
.align = sizeof(u64),
@@ -1551,7 +1731,7 @@ static const struct user_regset aarch64_regsets[] = {
/* this cannot be set dynamically */
},
[REGSET_PAC_ENABLED_KEYS] = {
- .core_note_type = NT_ARM_PAC_ENABLED_KEYS,
+ USER_REGSET_NOTE_TYPE(ARM_PAC_ENABLED_KEYS),
.n = 1,
.size = sizeof(long),
.align = sizeof(long),
@@ -1560,7 +1740,7 @@ static const struct user_regset aarch64_regsets[] = {
},
#ifdef CONFIG_CHECKPOINT_RESTORE
[REGSET_PACA_KEYS] = {
- .core_note_type = NT_ARM_PACA_KEYS,
+ USER_REGSET_NOTE_TYPE(ARM_PACA_KEYS),
.n = sizeof(struct user_pac_address_keys) / sizeof(__uint128_t),
.size = sizeof(__uint128_t),
.align = sizeof(__uint128_t),
@@ -1568,7 +1748,7 @@ static const struct user_regset aarch64_regsets[] = {
.set = pac_address_keys_set,
},
[REGSET_PACG_KEYS] = {
- .core_note_type = NT_ARM_PACG_KEYS,
+ USER_REGSET_NOTE_TYPE(ARM_PACG_KEYS),
.n = sizeof(struct user_pac_generic_keys) / sizeof(__uint128_t),
.size = sizeof(__uint128_t),
.align = sizeof(__uint128_t),
@@ -1579,7 +1759,7 @@ static const struct user_regset aarch64_regsets[] = {
#endif
#ifdef CONFIG_ARM64_TAGGED_ADDR_ABI
[REGSET_TAGGED_ADDR_CTRL] = {
- .core_note_type = NT_ARM_TAGGED_ADDR_CTRL,
+ USER_REGSET_NOTE_TYPE(ARM_TAGGED_ADDR_CTRL),
.n = 1,
.size = sizeof(long),
.align = sizeof(long),
@@ -1587,6 +1767,26 @@ static const struct user_regset aarch64_regsets[] = {
.set = tagged_addr_ctrl_set,
},
#endif
+#ifdef CONFIG_ARM64_POE
+ [REGSET_POE] = {
+ USER_REGSET_NOTE_TYPE(ARM_POE),
+ .n = 1,
+ .size = sizeof(long),
+ .align = sizeof(long),
+ .regset_get = poe_get,
+ .set = poe_set,
+ },
+#endif
+#ifdef CONFIG_ARM64_GCS
+ [REGSET_GCS] = {
+ USER_REGSET_NOTE_TYPE(ARM_GCS),
+ .n = sizeof(struct user_gcs) / sizeof(u64),
+ .size = sizeof(u64),
+ .align = sizeof(u64),
+ .regset_get = gcs_get,
+ .set = gcs_set,
+ },
+#endif
};
static const struct user_regset_view user_aarch64_view = {
@@ -1594,7 +1794,6 @@ static const struct user_regset_view user_aarch64_view = {
.regsets = aarch64_regsets, .n = ARRAY_SIZE(aarch64_regsets)
};
-#ifdef CONFIG_COMPAT
enum compat_regset {
REGSET_COMPAT_GPR,
REGSET_COMPAT_VFP,
@@ -1770,7 +1969,7 @@ static int compat_tls_set(struct task_struct *target,
static const struct user_regset aarch32_regsets[] = {
[REGSET_COMPAT_GPR] = {
- .core_note_type = NT_PRSTATUS,
+ USER_REGSET_NOTE_TYPE(PRSTATUS),
.n = COMPAT_ELF_NGREG,
.size = sizeof(compat_elf_greg_t),
.align = sizeof(compat_elf_greg_t),
@@ -1778,7 +1977,7 @@ static const struct user_regset aarch32_regsets[] = {
.set = compat_gpr_set
},
[REGSET_COMPAT_VFP] = {
- .core_note_type = NT_ARM_VFP,
+ USER_REGSET_NOTE_TYPE(ARM_VFP),
.n = VFP_STATE_SIZE / sizeof(compat_ulong_t),
.size = sizeof(compat_ulong_t),
.align = sizeof(compat_ulong_t),
@@ -1795,7 +1994,7 @@ static const struct user_regset_view user_aarch32_view = {
static const struct user_regset aarch32_ptrace_regsets[] = {
[REGSET_GPR] = {
- .core_note_type = NT_PRSTATUS,
+ USER_REGSET_NOTE_TYPE(PRSTATUS),
.n = COMPAT_ELF_NGREG,
.size = sizeof(compat_elf_greg_t),
.align = sizeof(compat_elf_greg_t),
@@ -1803,7 +2002,7 @@ static const struct user_regset aarch32_ptrace_regsets[] = {
.set = compat_gpr_set
},
[REGSET_FPR] = {
- .core_note_type = NT_ARM_VFP,
+ USER_REGSET_NOTE_TYPE(ARM_VFP),
.n = VFP_STATE_SIZE / sizeof(compat_ulong_t),
.size = sizeof(compat_ulong_t),
.align = sizeof(compat_ulong_t),
@@ -1811,7 +2010,7 @@ static const struct user_regset aarch32_ptrace_regsets[] = {
.set = compat_vfp_set
},
[REGSET_TLS] = {
- .core_note_type = NT_ARM_TLS,
+ USER_REGSET_NOTE_TYPE(ARM_TLS),
.n = 1,
.size = sizeof(compat_ulong_t),
.align = sizeof(compat_ulong_t),
@@ -1820,7 +2019,7 @@ static const struct user_regset aarch32_ptrace_regsets[] = {
},
#ifdef CONFIG_HAVE_HW_BREAKPOINT
[REGSET_HW_BREAK] = {
- .core_note_type = NT_ARM_HW_BREAK,
+ USER_REGSET_NOTE_TYPE(ARM_HW_BREAK),
.n = sizeof(struct user_hwdebug_state) / sizeof(u32),
.size = sizeof(u32),
.align = sizeof(u32),
@@ -1828,7 +2027,7 @@ static const struct user_regset aarch32_ptrace_regsets[] = {
.set = hw_break_set,
},
[REGSET_HW_WATCH] = {
- .core_note_type = NT_ARM_HW_WATCH,
+ USER_REGSET_NOTE_TYPE(ARM_HW_WATCH),
.n = sizeof(struct user_hwdebug_state) / sizeof(u32),
.size = sizeof(u32),
.align = sizeof(u32),
@@ -1837,7 +2036,7 @@ static const struct user_regset aarch32_ptrace_regsets[] = {
},
#endif
[REGSET_SYSTEM_CALL] = {
- .core_note_type = NT_ARM_SYSTEM_CALL,
+ USER_REGSET_NOTE_TYPE(ARM_SYSTEM_CALL),
.n = 1,
.size = sizeof(int),
.align = sizeof(int),
@@ -1851,6 +2050,7 @@ static const struct user_regset_view user_aarch32_ptrace_view = {
.regsets = aarch32_ptrace_regsets, .n = ARRAY_SIZE(aarch32_ptrace_regsets)
};
+#ifdef CONFIG_COMPAT
static int compat_ptrace_read_user(struct task_struct *tsk, compat_ulong_t off,
compat_ulong_t __user *ret)
{
@@ -2112,7 +2312,6 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
const struct user_regset_view *task_user_regset_view(struct task_struct *task)
{
-#ifdef CONFIG_COMPAT
/*
* Core dumping of 32-bit tasks or compat ptrace requests must use the
* user_aarch32_view compatible with arm32. Native ptrace requests on
@@ -2123,7 +2322,7 @@ const struct user_regset_view *task_user_regset_view(struct task_struct *task)
return &user_aarch32_view;
else if (is_compat_thread(task_thread_info(task)))
return &user_aarch32_ptrace_view;
-#endif
+
return &user_aarch64_view;
}
diff --git a/arch/arm64/kernel/reloc_test_core.c b/arch/arm64/kernel/reloc_test_core.c
index 99f2ffe9fc05..5b0891146054 100644
--- a/arch/arm64/kernel/reloc_test_core.c
+++ b/arch/arm64/kernel/reloc_test_core.c
@@ -74,4 +74,5 @@ static void __exit reloc_test_exit(void)
module_init(reloc_test_init);
module_exit(reloc_test_exit);
+MODULE_DESCRIPTION("Relocation testing module");
MODULE_LICENSE("GPL v2");
diff --git a/arch/arm64/kernel/rsi.c b/arch/arm64/kernel/rsi.c
new file mode 100644
index 000000000000..c64a06f58c0b
--- /dev/null
+++ b/arch/arm64/kernel/rsi.c
@@ -0,0 +1,175 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2023 ARM Ltd.
+ */
+
+#include <linux/jump_label.h>
+#include <linux/memblock.h>
+#include <linux/psci.h>
+#include <linux/swiotlb.h>
+#include <linux/cc_platform.h>
+#include <linux/platform_device.h>
+
+#include <asm/io.h>
+#include <asm/mem_encrypt.h>
+#include <asm/rsi.h>
+
+static struct realm_config config;
+
+unsigned long prot_ns_shared;
+EXPORT_SYMBOL(prot_ns_shared);
+
+DEFINE_STATIC_KEY_FALSE_RO(rsi_present);
+EXPORT_SYMBOL(rsi_present);
+
+bool cc_platform_has(enum cc_attr attr)
+{
+ switch (attr) {
+ case CC_ATTR_MEM_ENCRYPT:
+ return is_realm_world();
+ default:
+ return false;
+ }
+}
+EXPORT_SYMBOL_GPL(cc_platform_has);
+
+static bool rsi_version_matches(void)
+{
+ unsigned long ver_lower, ver_higher;
+ unsigned long ret = rsi_request_version(RSI_ABI_VERSION,
+ &ver_lower,
+ &ver_higher);
+
+ if (ret == SMCCC_RET_NOT_SUPPORTED)
+ return false;
+
+ if (ret != RSI_SUCCESS) {
+ pr_err("RME: RMM doesn't support RSI version %lu.%lu. Supported range: %lu.%lu-%lu.%lu\n",
+ RSI_ABI_VERSION_MAJOR, RSI_ABI_VERSION_MINOR,
+ RSI_ABI_VERSION_GET_MAJOR(ver_lower),
+ RSI_ABI_VERSION_GET_MINOR(ver_lower),
+ RSI_ABI_VERSION_GET_MAJOR(ver_higher),
+ RSI_ABI_VERSION_GET_MINOR(ver_higher));
+ return false;
+ }
+
+ pr_info("RME: Using RSI version %lu.%lu\n",
+ RSI_ABI_VERSION_GET_MAJOR(ver_lower),
+ RSI_ABI_VERSION_GET_MINOR(ver_lower));
+
+ return true;
+}
+
+static void __init arm64_rsi_setup_memory(void)
+{
+ u64 i;
+ phys_addr_t start, end;
+
+ /*
+ * Iterate over the available memory ranges and convert the state to
+ * protected memory. We should take extra care to ensure that we DO NOT
+ * permit any "DESTROYED" pages to be converted to "RAM".
+ *
+ * panic() is used because if the attempt to switch the memory to
+ * protected has failed here, then future accesses to the memory are
+ * simply going to be reflected as a SEA (Synchronous External Abort)
+ * which we can't handle. Bailing out early prevents the guest limping
+ * on and dying later.
+ */
+ for_each_mem_range(i, &start, &end) {
+ if (rsi_set_memory_range_protected_safe(start, end)) {
+ panic("Failed to set memory range to protected: %pa-%pa",
+ &start, &end);
+ }
+ }
+}
+
+/*
+ * Check if a given PA range is Trusted (e.g., Protected memory, a Trusted Device
+ * mapping, or an MMIO emulated in the Realm world).
+ *
+ * We can rely on the RIPAS value of the region to detect if a given region is
+ * protected.
+ *
+ * RIPAS_DEV - A trusted device memory or a trusted emulated MMIO (in the Realm
+ * world
+ * RIPAS_RAM - Memory (RAM), protected by the RMM guarantees. (e.g., Firmware
+ * reserved regions for data sharing).
+ *
+ * RIPAS_DESTROYED is a special case of one of the above, where the host did
+ * something without our permission and as such we can't do anything about it.
+ *
+ * The only case where something is emulated by the untrusted hypervisor or is
+ * backed by shared memory is indicated by RSI_RIPAS_EMPTY.
+ */
+bool arm64_rsi_is_protected(phys_addr_t base, size_t size)
+{
+ enum ripas ripas;
+ phys_addr_t end, top;
+
+ /* Overflow ? */
+ if (WARN_ON(base + size <= base))
+ return false;
+
+ end = ALIGN(base + size, RSI_GRANULE_SIZE);
+ base = ALIGN_DOWN(base, RSI_GRANULE_SIZE);
+
+ while (base < end) {
+ if (WARN_ON(rsi_ipa_state_get(base, end, &ripas, &top)))
+ break;
+ if (WARN_ON(top <= base))
+ break;
+ if (ripas == RSI_RIPAS_EMPTY)
+ break;
+ base = top;
+ }
+
+ return base >= end;
+}
+EXPORT_SYMBOL(arm64_rsi_is_protected);
+
+static int realm_ioremap_hook(phys_addr_t phys, size_t size, pgprot_t *prot)
+{
+ if (arm64_rsi_is_protected(phys, size))
+ *prot = pgprot_encrypted(*prot);
+ else
+ *prot = pgprot_decrypted(*prot);
+
+ return 0;
+}
+
+void __init arm64_rsi_init(void)
+{
+ if (arm_smccc_1_1_get_conduit() != SMCCC_CONDUIT_SMC)
+ return;
+ if (!rsi_version_matches())
+ return;
+ if (WARN_ON(rsi_get_realm_config(&config)))
+ return;
+ prot_ns_shared = BIT(config.ipa_bits - 1);
+
+ if (arm64_ioremap_prot_hook_register(realm_ioremap_hook))
+ return;
+
+ if (realm_register_memory_enc_ops())
+ return;
+
+ arm64_rsi_setup_memory();
+
+ static_branch_enable(&rsi_present);
+}
+
+static struct platform_device rsi_dev = {
+ .name = RSI_PDEV_NAME,
+ .id = PLATFORM_DEVID_NONE
+};
+
+static int __init arm64_create_dummy_rsi_dev(void)
+{
+ if (is_realm_world() &&
+ platform_device_register(&rsi_dev))
+ pr_err("failed to register rsi platform device\n");
+ return 0;
+}
+
+arch_initcall(arm64_create_dummy_rsi_dev)
diff --git a/arch/arm64/kernel/sdei.c b/arch/arm64/kernel/sdei.c
index 255d12f881c2..778f2a1faac8 100644
--- a/arch/arm64/kernel/sdei.c
+++ b/arch/arm64/kernel/sdei.c
@@ -34,10 +34,8 @@ unsigned long sdei_exit_mode;
DECLARE_PER_CPU(unsigned long *, sdei_stack_normal_ptr);
DECLARE_PER_CPU(unsigned long *, sdei_stack_critical_ptr);
-#ifdef CONFIG_VMAP_STACK
DEFINE_PER_CPU(unsigned long *, sdei_stack_normal_ptr);
DEFINE_PER_CPU(unsigned long *, sdei_stack_critical_ptr);
-#endif
DECLARE_PER_CPU(unsigned long *, sdei_shadow_call_stack_normal_ptr);
DECLARE_PER_CPU(unsigned long *, sdei_shadow_call_stack_critical_ptr);
@@ -65,9 +63,6 @@ static void free_sdei_stacks(void)
{
int cpu;
- if (!IS_ENABLED(CONFIG_VMAP_STACK))
- return;
-
for_each_possible_cpu(cpu) {
_free_sdei_stack(&sdei_stack_normal_ptr, cpu);
_free_sdei_stack(&sdei_stack_critical_ptr, cpu);
@@ -91,9 +86,6 @@ static int init_sdei_stacks(void)
int cpu;
int err = 0;
- if (!IS_ENABLED(CONFIG_VMAP_STACK))
- return 0;
-
for_each_possible_cpu(cpu) {
err = _init_sdei_stack(&sdei_stack_normal_ptr, cpu);
if (err)
@@ -206,7 +198,7 @@ out_err:
/*
* do_sdei_event() returns one of:
* SDEI_EV_HANDLED - success, return to the interrupted context.
- * SDEI_EV_FAILED - failure, return this error code to firmare.
+ * SDEI_EV_FAILED - failure, return this error code to firmware.
* virtual-address - success, return to this address.
*/
unsigned long __kprobes do_sdei_event(struct pt_regs *regs,
@@ -247,7 +239,7 @@ unsigned long __kprobes do_sdei_event(struct pt_regs *regs,
* If we interrupted the kernel with interrupts masked, we always go
* back to wherever we came from.
*/
- if (mode == kernel_mode && !interrupts_enabled(regs))
+ if (mode == kernel_mode && regs_irqs_disabled(regs))
return SDEI_EV_HANDLED;
/*
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index 417a8a86b2db..23c05dc7a8f2 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -43,6 +43,7 @@
#include <asm/cpu_ops.h>
#include <asm/kasan.h>
#include <asm/numa.h>
+#include <asm/rsi.h>
#include <asm/scs.h>
#include <asm/sections.h>
#include <asm/setup.h>
@@ -166,36 +167,25 @@ static void __init smp_build_mpidr_hash(void)
pr_warn("Large number of MPIDR hash buckets detected\n");
}
-static void *early_fdt_ptr __initdata;
-
-void __init *get_early_fdt_ptr(void)
-{
- return early_fdt_ptr;
-}
-
-asmlinkage void __init early_fdt_map(u64 dt_phys)
-{
- int fdt_size;
-
- early_fixmap_init();
- early_fdt_ptr = fixmap_remap_fdt(dt_phys, &fdt_size, PAGE_KERNEL);
-}
-
static void __init setup_machine_fdt(phys_addr_t dt_phys)
{
- int size;
+ int size = 0;
void *dt_virt = fixmap_remap_fdt(dt_phys, &size, PAGE_KERNEL);
const char *name;
if (dt_virt)
memblock_reserve(dt_phys, size);
- if (!dt_virt || !early_init_dt_scan(dt_virt)) {
+ /*
+ * dt_virt is a fixmap address, hence __pa(dt_virt) can't be used.
+ * Pass dt_phys directly.
+ */
+ if (!early_init_dt_scan(dt_virt, dt_phys)) {
pr_crit("\n"
- "Error: invalid device tree blob at physical address %pa (virtual address 0x%px)\n"
- "The dtb must be 8-byte aligned and must not exceed 2 MB in size\n"
- "\nPlease check your bootloader.",
- &dt_phys, dt_virt);
+ "Error: invalid device tree blob: PA=%pa, VA=%px, size=%d bytes\n"
+ "The dtb must be 8-byte aligned and must not exceed 2 MB in size.\n"
+ "\nPlease check your bootloader.\n",
+ &dt_phys, dt_virt, size);
/*
* Note that in this _really_ early stage we cannot even BUG()
@@ -224,7 +214,7 @@ static void __init request_standard_resources(void)
unsigned long i = 0;
size_t res_size;
- kernel_code.start = __pa_symbol(_stext);
+ kernel_code.start = __pa_symbol(_text);
kernel_code.end = __pa_symbol(__init_begin - 1);
kernel_data.start = __pa_symbol(_sdata);
kernel_data.end = __pa_symbol(_end - 1);
@@ -233,9 +223,7 @@ static void __init request_standard_resources(void)
num_standard_resources = memblock.memory.cnt;
res_size = num_standard_resources * sizeof(*standard_resources);
- standard_resources = memblock_alloc(res_size, SMP_CACHE_BYTES);
- if (!standard_resources)
- panic("%s: Failed to allocate %zu bytes\n", __func__, res_size);
+ standard_resources = memblock_alloc_or_panic(res_size, SMP_CACHE_BYTES);
for_each_mem_region(region) {
res = &standard_resources[i++];
@@ -292,19 +280,12 @@ u64 cpu_logical_map(unsigned int cpu)
void __init __no_sanitize_address setup_arch(char **cmdline_p)
{
- setup_initial_init_mm(_stext, _etext, _edata, _end);
+ setup_initial_init_mm(_text, _etext, _edata, _end);
*cmdline_p = boot_command_line;
kaslr_init();
- /*
- * If know now we are going to need KPTI then use non-global
- * mappings from the start, avoiding the cost of rewriting
- * everything later.
- */
- arm64_use_ng_mappings = kaslr_requires_kpti();
-
early_fixmap_init();
early_ioremap_init();
@@ -320,9 +301,15 @@ void __init __no_sanitize_address setup_arch(char **cmdline_p)
dynamic_scs_init();
/*
- * Unmask asynchronous aborts and fiq after bringing up possible
- * earlycon. (Report possible System Errors once we can report this
- * occurred).
+ * The primary CPU enters the kernel with all DAIF exceptions masked.
+ *
+ * We must unmask Debug and SError before preemption or scheduling is
+ * possible to ensure that these are consistently unmasked across
+ * threads, and we want to unmask SError as soon as possible after
+ * initializing earlycon so that we can report any SErrors immediately.
+ *
+ * IRQ and FIQ will be unmasked after the root irqchip has been
+ * detected and initialized.
*/
local_daif_restore(DAIF_PROCCTX_NOIRQ);
@@ -367,13 +354,12 @@ void __init __no_sanitize_address setup_arch(char **cmdline_p)
else
psci_acpi_init();
+ arm64_rsi_init();
+
init_bootcpu_ops();
smp_init_cpus();
smp_build_mpidr_hash();
- /* Init percpu seeds for random tags after cpus are set up. */
- kasan_init_sw_tags();
-
#ifdef CONFIG_ARM64_SW_TTBR0_PAN
/*
* Make sure init_thread_info.ttbr0 always generates translation
@@ -402,19 +388,10 @@ static inline bool cpu_can_disable(unsigned int cpu)
return false;
}
-static int __init topology_init(void)
+bool arch_cpu_is_hotpluggable(int num)
{
- int i;
-
- for_each_possible_cpu(i) {
- struct cpu *cpu = &per_cpu(cpu_data.cpu, i);
- cpu->hotpluggable = cpu_can_disable(i);
- register_cpu(cpu, i);
- }
-
- return 0;
+ return cpu_can_disable(num);
}
-subsys_initcall(topology_init);
static void dump_kernel_offset(void)
{
diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
index c7ebe744c64e..1110eeb21f57 100644
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -9,6 +9,7 @@
#include <linux/cache.h>
#include <linux/compat.h>
#include <linux/errno.h>
+#include <linux/irq-entry-common.h>
#include <linux/kernel.h>
#include <linux/signal.h>
#include <linux/freezer.h>
@@ -16,15 +17,17 @@
#include <linux/uaccess.h>
#include <linux/sizes.h>
#include <linux/string.h>
-#include <linux/resume_user_mode.h>
#include <linux/ratelimit.h>
+#include <linux/rseq.h>
#include <linux/syscalls.h>
+#include <linux/pkeys.h>
#include <asm/daifflags.h>
#include <asm/debug-monitors.h>
#include <asm/elf.h>
#include <asm/exception.h>
#include <asm/cacheflush.h>
+#include <asm/gcs.h>
#include <asm/ucontext.h>
#include <asm/unistd.h>
#include <asm/fpsimd.h>
@@ -34,6 +37,8 @@
#include <asm/traps.h>
#include <asm/vdso.h>
+#define GCS_SIGNAL_CAP(addr) (((unsigned long)addr) & GCS_CAP_ADDR_MASK)
+
/*
* Do a signal return; undo the signal stack. These are aligned to 128-bit.
*/
@@ -42,11 +47,6 @@ struct rt_sigframe {
struct ucontext uc;
};
-struct frame_record {
- u64 fp;
- u64 lr;
-};
-
struct rt_sigframe_user_layout {
struct rt_sigframe __user *sigframe;
struct frame_record __user *next_frame;
@@ -56,18 +56,76 @@ struct rt_sigframe_user_layout {
unsigned long fpsimd_offset;
unsigned long esr_offset;
+ unsigned long gcs_offset;
unsigned long sve_offset;
unsigned long tpidr2_offset;
unsigned long za_offset;
unsigned long zt_offset;
+ unsigned long fpmr_offset;
+ unsigned long poe_offset;
unsigned long extra_offset;
unsigned long end_offset;
};
-#define BASE_SIGFRAME_SIZE round_up(sizeof(struct rt_sigframe), 16)
+/*
+ * Holds any EL0-controlled state that influences unprivileged memory accesses.
+ * This includes both accesses done in userspace and uaccess done in the kernel.
+ *
+ * This state needs to be carefully managed to ensure that it doesn't cause
+ * uaccess to fail when setting up the signal frame, and the signal handler
+ * itself also expects a well-defined state when entered.
+ */
+struct user_access_state {
+ u64 por_el0;
+};
+
#define TERMINATOR_SIZE round_up(sizeof(struct _aarch64_ctx), 16)
#define EXTRA_CONTEXT_SIZE round_up(sizeof(struct extra_context), 16)
+/*
+ * Save the user access state into ua_state and reset it to disable any
+ * restrictions.
+ */
+static void save_reset_user_access_state(struct user_access_state *ua_state)
+{
+ if (system_supports_poe()) {
+ u64 por_enable_all = 0;
+
+ for (int pkey = 0; pkey < arch_max_pkey(); pkey++)
+ por_enable_all |= POR_ELx_PERM_PREP(pkey, POE_RWX);
+
+ ua_state->por_el0 = read_sysreg_s(SYS_POR_EL0);
+ write_sysreg_s(por_enable_all, SYS_POR_EL0);
+ /*
+ * No ISB required as we can tolerate spurious Overlay faults -
+ * the fault handler will check again based on the new value
+ * of POR_EL0.
+ */
+ }
+}
+
+/*
+ * Set the user access state for invoking the signal handler.
+ *
+ * No uaccess should be done after that function is called.
+ */
+static void set_handler_user_access_state(void)
+{
+ if (system_supports_poe())
+ write_sysreg_s(POR_EL0_INIT, SYS_POR_EL0);
+}
+
+/*
+ * Restore the user access state to the values saved in ua_state.
+ *
+ * No uaccess should be done after that function is called.
+ */
+static void restore_user_access_state(const struct user_access_state *ua_state)
+{
+ if (system_supports_poe())
+ write_sysreg_s(ua_state->por_el0, SYS_POR_EL0);
+}
+
static void init_user_layout(struct rt_sigframe_user_layout *user)
{
const size_t reserved_size =
@@ -182,6 +240,12 @@ struct user_ctxs {
u32 za_size;
struct zt_context __user *zt;
u32 zt_size;
+ struct fpmr_context __user *fpmr;
+ u32 fpmr_size;
+ struct poe_context __user *poe;
+ u32 poe_size;
+ struct gcs_context __user *gcs;
+ u32 gcs_size;
};
static int preserve_fpsimd_context(struct fpsimd_context __user *ctx)
@@ -190,6 +254,8 @@ static int preserve_fpsimd_context(struct fpsimd_context __user *ctx)
&current->thread.uw.fpsimd_state;
int err;
+ fpsimd_sync_from_effective_state(current);
+
/* copy the FP and status/control registers */
err = __copy_to_user(ctx->vregs, fpsimd->vregs, sizeof(fpsimd->vregs));
__put_user_error(fpsimd->fpsr, &ctx->fpsr, err);
@@ -202,31 +268,95 @@ static int preserve_fpsimd_context(struct fpsimd_context __user *ctx)
return err ? -EFAULT : 0;
}
-static int restore_fpsimd_context(struct user_ctxs *user)
+static int read_fpsimd_context(struct user_fpsimd_state *fpsimd,
+ struct user_ctxs *user)
{
- struct user_fpsimd_state fpsimd;
- int err = 0;
+ int err;
/* check the size information */
if (user->fpsimd_size != sizeof(struct fpsimd_context))
return -EINVAL;
/* copy the FP and status/control registers */
- err = __copy_from_user(fpsimd.vregs, &(user->fpsimd->vregs),
- sizeof(fpsimd.vregs));
- __get_user_error(fpsimd.fpsr, &(user->fpsimd->fpsr), err);
- __get_user_error(fpsimd.fpcr, &(user->fpsimd->fpcr), err);
+ err = __copy_from_user(fpsimd->vregs, &(user->fpsimd->vregs),
+ sizeof(fpsimd->vregs));
+ __get_user_error(fpsimd->fpsr, &(user->fpsimd->fpsr), err);
+ __get_user_error(fpsimd->fpcr, &(user->fpsimd->fpcr), err);
+
+ return err ? -EFAULT : 0;
+}
+
+static int restore_fpsimd_context(struct user_ctxs *user)
+{
+ struct user_fpsimd_state fpsimd;
+ int err;
+
+ err = read_fpsimd_context(&fpsimd, user);
+ if (err)
+ return err;
clear_thread_flag(TIF_SVE);
+ current->thread.svcr &= ~SVCR_SM_MASK;
current->thread.fp_type = FP_STATE_FPSIMD;
/* load the hardware registers from the fpsimd_state structure */
+ fpsimd_update_current_state(&fpsimd);
+ return 0;
+}
+
+static int preserve_fpmr_context(struct fpmr_context __user *ctx)
+{
+ int err = 0;
+
+ __put_user_error(FPMR_MAGIC, &ctx->head.magic, err);
+ __put_user_error(sizeof(*ctx), &ctx->head.size, err);
+ __put_user_error(current->thread.uw.fpmr, &ctx->fpmr, err);
+
+ return err;
+}
+
+static int restore_fpmr_context(struct user_ctxs *user)
+{
+ u64 fpmr;
+ int err = 0;
+
+ if (user->fpmr_size != sizeof(*user->fpmr))
+ return -EINVAL;
+
+ __get_user_error(fpmr, &user->fpmr->fpmr, err);
if (!err)
- fpsimd_update_current_state(&fpsimd);
+ current->thread.uw.fpmr = fpmr;
- return err ? -EFAULT : 0;
+ return err;
+}
+
+static int preserve_poe_context(struct poe_context __user *ctx,
+ const struct user_access_state *ua_state)
+{
+ int err = 0;
+
+ __put_user_error(POE_MAGIC, &ctx->head.magic, err);
+ __put_user_error(sizeof(*ctx), &ctx->head.size, err);
+ __put_user_error(ua_state->por_el0, &ctx->por_el0, err);
+
+ return err;
}
+static int restore_poe_context(struct user_ctxs *user,
+ struct user_access_state *ua_state)
+{
+ u64 por_el0;
+ int err = 0;
+
+ if (user->poe_size != sizeof(*user->poe))
+ return -EINVAL;
+
+ __get_user_error(por_el0, &(user->poe->por_el0), err);
+ if (!err)
+ ua_state->por_el0 = por_el0;
+
+ return err;
+}
#ifdef CONFIG_ARM64_SVE
@@ -242,7 +372,7 @@ static int preserve_sve_context(struct sve_context __user *ctx)
vl = task_get_sme_vl(current);
vq = sve_vq_from_vl(vl);
flags |= SVE_SIG_FLAG_SM;
- } else if (test_thread_flag(TIF_SVE)) {
+ } else if (current->thread.fp_type == FP_STATE_SVE) {
vq = sve_vq_from_vl(vl);
}
@@ -257,11 +387,6 @@ static int preserve_sve_context(struct sve_context __user *ctx)
err |= __copy_to_user(&ctx->__reserved, reserved, sizeof(reserved));
if (vq) {
- /*
- * This assumes that the SVE state has already been saved to
- * the task struct by calling the function
- * fpsimd_signal_preserve_current_state().
- */
err |= __copy_to_user((char __user *)ctx + SVE_SIG_REGS_OFFSET,
current->thread.sve_state,
SVE_SIG_REGS_SIZE(vq));
@@ -276,6 +401,7 @@ static int restore_sve_fpsimd_context(struct user_ctxs *user)
unsigned int vl, vq;
struct user_fpsimd_state fpsimd;
u16 user_vl, flags;
+ bool sm;
if (user->sve_size < sizeof(*user->sve))
return -EINVAL;
@@ -285,7 +411,8 @@ static int restore_sve_fpsimd_context(struct user_ctxs *user)
if (err)
return err;
- if (flags & SVE_SIG_FLAG_SM) {
+ sm = flags & SVE_SIG_FLAG_SM;
+ if (sm) {
if (!system_supports_sme())
return -EINVAL;
@@ -305,28 +432,23 @@ static int restore_sve_fpsimd_context(struct user_ctxs *user)
if (user_vl != vl)
return -EINVAL;
- if (user->sve_size == sizeof(*user->sve)) {
- clear_thread_flag(TIF_SVE);
- current->thread.svcr &= ~SVCR_SM_MASK;
- current->thread.fp_type = FP_STATE_FPSIMD;
- goto fpsimd_only;
- }
+ /*
+ * Non-streaming SVE state may be preserved without an SVE payload, in
+ * which case the SVE context only has a header with VL==0, and all
+ * state can be restored from the FPSIMD context.
+ *
+ * Streaming SVE state is always preserved with an SVE payload. For
+ * consistency and robustness, reject restoring streaming SVE state
+ * without an SVE payload.
+ */
+ if (!sm && user->sve_size == sizeof(*user->sve))
+ return restore_fpsimd_context(user);
vq = sve_vq_from_vl(vl);
if (user->sve_size < SVE_SIG_CONTEXT_SIZE(vq))
return -EINVAL;
- /*
- * Careful: we are about __copy_from_user() directly into
- * thread.sve_state with preemption enabled, so protection is
- * needed to prevent a racing context switch from writing stale
- * registers back over the new data.
- */
-
- fpsimd_flush_task_state(current);
- /* From now, fpsimd_thread_switch() won't touch thread.sve_state */
-
sve_alloc(current, true);
if (!current->thread.sve_state) {
clear_thread_flag(TIF_SVE);
@@ -346,19 +468,14 @@ static int restore_sve_fpsimd_context(struct user_ctxs *user)
set_thread_flag(TIF_SVE);
current->thread.fp_type = FP_STATE_SVE;
-fpsimd_only:
- /* copy the FP and status/control registers */
- /* restore_sigframe() already checked that user->fpsimd != NULL. */
- err = __copy_from_user(fpsimd.vregs, user->fpsimd->vregs,
- sizeof(fpsimd.vregs));
- __get_user_error(fpsimd.fpsr, &user->fpsimd->fpsr, err);
- __get_user_error(fpsimd.fpcr, &user->fpsimd->fpcr, err);
+ err = read_fpsimd_context(&fpsimd, user);
+ if (err)
+ return err;
- /* load the hardware registers from the fpsimd_state structure */
- if (!err)
- fpsimd_update_current_state(&fpsimd);
+ /* Merge the FPSIMD registers into the SVE state */
+ fpsimd_update_current_state(&fpsimd);
- return err ? -EFAULT : 0;
+ return 0;
}
#else /* ! CONFIG_ARM64_SVE */
@@ -378,13 +495,12 @@ extern int preserve_sve_context(void __user *ctx);
static int preserve_tpidr2_context(struct tpidr2_context __user *ctx)
{
+ u64 tpidr2_el0 = read_sysreg_s(SYS_TPIDR2_EL0);
int err = 0;
- current->thread.tpidr2_el0 = read_sysreg_s(SYS_TPIDR2_EL0);
-
__put_user_error(TPIDR2_MAGIC, &ctx->head.magic, err);
__put_user_error(sizeof(*ctx), &ctx->head.size, err);
- __put_user_error(current->thread.tpidr2_el0, &ctx->tpidr2, err);
+ __put_user_error(tpidr2_el0, &ctx->tpidr2, err);
return err;
}
@@ -426,11 +542,6 @@ static int preserve_za_context(struct za_context __user *ctx)
err |= __copy_to_user(&ctx->__reserved, reserved, sizeof(reserved));
if (vq) {
- /*
- * This assumes that the ZA state has already been saved to
- * the task struct by calling the function
- * fpsimd_signal_preserve_current_state().
- */
err |= __copy_to_user((char __user *)ctx + ZA_SIG_REGS_OFFSET,
current->thread.sme_state,
ZA_SIG_REGS_SIZE(vq));
@@ -465,16 +576,6 @@ static int restore_za_context(struct user_ctxs *user)
if (user->za_size < ZA_SIG_CONTEXT_SIZE(vq))
return -EINVAL;
- /*
- * Careful: we are about __copy_from_user() directly into
- * thread.sme_state with preemption enabled, so protection is
- * needed to prevent a racing context switch from writing stale
- * registers back over the new data.
- */
-
- fpsimd_flush_task_state(current);
- /* From now, fpsimd_thread_switch() won't touch thread.sve_state */
-
sme_alloc(current, true);
if (!current->thread.sme_state) {
current->thread.svcr &= ~SVCR_ZA_MASK;
@@ -512,11 +613,6 @@ static int preserve_zt_context(struct zt_context __user *ctx)
BUILD_BUG_ON(sizeof(ctx->__reserved) != sizeof(reserved));
err |= __copy_to_user(&ctx->__reserved, reserved, sizeof(reserved));
- /*
- * This assumes that the ZT state has already been saved to
- * the task struct by calling the function
- * fpsimd_signal_preserve_current_state().
- */
err |= __copy_to_user((char __user *)ctx + ZT_SIG_REGS_OFFSET,
thread_zt_state(&current->thread),
ZT_SIG_REGS_SIZE(1));
@@ -542,16 +638,6 @@ static int restore_zt_context(struct user_ctxs *user)
if (nregs != 1)
return -EINVAL;
- /*
- * Careful: we are about __copy_from_user() directly into
- * thread.zt_state with preemption enabled, so protection is
- * needed to prevent a racing context switch from writing stale
- * registers back over the new data.
- */
-
- fpsimd_flush_task_state(current);
- /* From now, fpsimd_thread_switch() won't touch ZT in thread state */
-
err = __copy_from_user(thread_zt_state(&current->thread),
(char __user const *)user->zt +
ZT_SIG_REGS_OFFSET,
@@ -574,6 +660,82 @@ extern int restore_zt_context(struct user_ctxs *user);
#endif /* ! CONFIG_ARM64_SME */
+#ifdef CONFIG_ARM64_GCS
+
+static int preserve_gcs_context(struct gcs_context __user *ctx)
+{
+ int err = 0;
+ u64 gcspr = read_sysreg_s(SYS_GCSPR_EL0);
+
+ /*
+ * If GCS is enabled we will add a cap token to the frame,
+ * include it in the GCSPR_EL0 we report to support stack
+ * switching via sigreturn if GCS is enabled. We do not allow
+ * enabling via sigreturn so the token is only relevant for
+ * threads with GCS enabled.
+ */
+ if (task_gcs_el0_enabled(current))
+ gcspr -= 8;
+
+ __put_user_error(GCS_MAGIC, &ctx->head.magic, err);
+ __put_user_error(sizeof(*ctx), &ctx->head.size, err);
+ __put_user_error(gcspr, &ctx->gcspr, err);
+ __put_user_error(0, &ctx->reserved, err);
+ __put_user_error(current->thread.gcs_el0_mode,
+ &ctx->features_enabled, err);
+
+ return err;
+}
+
+static int restore_gcs_context(struct user_ctxs *user)
+{
+ u64 gcspr, enabled;
+ int err = 0;
+
+ if (user->gcs_size != sizeof(*user->gcs))
+ return -EINVAL;
+
+ __get_user_error(gcspr, &user->gcs->gcspr, err);
+ __get_user_error(enabled, &user->gcs->features_enabled, err);
+ if (err)
+ return err;
+
+ /* Don't allow unknown modes */
+ if (enabled & ~PR_SHADOW_STACK_SUPPORTED_STATUS_MASK)
+ return -EINVAL;
+
+ err = gcs_check_locked(current, enabled);
+ if (err != 0)
+ return err;
+
+ /* Don't allow enabling */
+ if (!task_gcs_el0_enabled(current) &&
+ (enabled & PR_SHADOW_STACK_ENABLE))
+ return -EINVAL;
+
+ /* If we are disabling disable everything */
+ if (!(enabled & PR_SHADOW_STACK_ENABLE))
+ enabled = 0;
+
+ current->thread.gcs_el0_mode = enabled;
+
+ /*
+ * We let userspace set GCSPR_EL0 to anything here, we will
+ * validate later in gcs_restore_signal().
+ */
+ write_sysreg_s(gcspr, SYS_GCSPR_EL0);
+
+ return 0;
+}
+
+#else /* ! CONFIG_ARM64_GCS */
+
+/* Turn any non-optimised out attempts to use these into a link error: */
+extern int preserve_gcs_context(void __user *ctx);
+extern int restore_gcs_context(struct user_ctxs *user);
+
+#endif /* ! CONFIG_ARM64_GCS */
+
static int parse_user_sigframe(struct user_ctxs *user,
struct rt_sigframe __user *sf)
{
@@ -590,6 +752,9 @@ static int parse_user_sigframe(struct user_ctxs *user,
user->tpidr2 = NULL;
user->za = NULL;
user->zt = NULL;
+ user->fpmr = NULL;
+ user->poe = NULL;
+ user->gcs = NULL;
if (!IS_ALIGNED((unsigned long)base, 16))
goto invalid;
@@ -640,6 +805,17 @@ static int parse_user_sigframe(struct user_ctxs *user,
/* ignore */
break;
+ case POE_MAGIC:
+ if (!system_supports_poe())
+ goto invalid;
+
+ if (user->poe)
+ goto invalid;
+
+ user->poe = (struct poe_context __user *)head;
+ user->poe_size = size;
+ break;
+
case SVE_MAGIC:
if (!system_supports_sve() && !system_supports_sme())
goto invalid;
@@ -684,6 +860,28 @@ static int parse_user_sigframe(struct user_ctxs *user,
user->zt_size = size;
break;
+ case FPMR_MAGIC:
+ if (!system_supports_fpmr())
+ goto invalid;
+
+ if (user->fpmr)
+ goto invalid;
+
+ user->fpmr = (struct fpmr_context __user *)head;
+ user->fpmr_size = size;
+ break;
+
+ case GCS_MAGIC:
+ if (!system_supports_gcs())
+ goto invalid;
+
+ if (user->gcs)
+ goto invalid;
+
+ user->gcs = (struct gcs_context __user *)head;
+ user->gcs_size = size;
+ break;
+
case EXTRA_MAGIC:
if (have_extra_context)
goto invalid;
@@ -767,7 +965,8 @@ invalid:
}
static int restore_sigframe(struct pt_regs *regs,
- struct rt_sigframe __user *sf)
+ struct rt_sigframe __user *sf,
+ struct user_access_state *ua_state)
{
sigset_t set;
int i, err;
@@ -789,6 +988,8 @@ static int restore_sigframe(struct pt_regs *regs,
*/
forget_syscall(regs);
+ fpsimd_save_and_flush_current_state();
+
err |= !valid_user_regs(&regs->user_regs, current);
if (err == 0)
err = parse_user_sigframe(&user, sf);
@@ -803,22 +1004,84 @@ static int restore_sigframe(struct pt_regs *regs,
err = restore_fpsimd_context(&user);
}
+ if (err == 0 && system_supports_gcs() && user.gcs)
+ err = restore_gcs_context(&user);
+
if (err == 0 && system_supports_tpidr2() && user.tpidr2)
err = restore_tpidr2_context(&user);
+ if (err == 0 && system_supports_fpmr() && user.fpmr)
+ err = restore_fpmr_context(&user);
+
if (err == 0 && system_supports_sme() && user.za)
err = restore_za_context(&user);
if (err == 0 && system_supports_sme2() && user.zt)
err = restore_zt_context(&user);
+ if (err == 0 && system_supports_poe() && user.poe)
+ err = restore_poe_context(&user, ua_state);
+
return err;
}
+#ifdef CONFIG_ARM64_GCS
+static int gcs_restore_signal(void)
+{
+ u64 gcspr_el0, cap;
+ int ret;
+
+ if (!system_supports_gcs())
+ return 0;
+
+ if (!(current->thread.gcs_el0_mode & PR_SHADOW_STACK_ENABLE))
+ return 0;
+
+ gcspr_el0 = read_sysreg_s(SYS_GCSPR_EL0);
+
+ /*
+ * Ensure that any changes to the GCS done via GCS operations
+ * are visible to the normal reads we do to validate the
+ * token.
+ */
+ gcsb_dsync();
+
+ /*
+ * GCSPR_EL0 should be pointing at a capped GCS, read the cap.
+ * We don't enforce that this is in a GCS page, if it is not
+ * then faults will be generated on GCS operations - the main
+ * concern is to protect GCS pages.
+ */
+ ret = copy_from_user(&cap, (unsigned long __user *)gcspr_el0,
+ sizeof(cap));
+ if (ret)
+ return -EFAULT;
+
+ /*
+ * Check that the cap is the actual GCS before replacing it.
+ */
+ if (cap != GCS_SIGNAL_CAP(gcspr_el0))
+ return -EINVAL;
+
+ /* Invalidate the token to prevent reuse */
+ put_user_gcs(0, (unsigned long __user *)gcspr_el0, &ret);
+ if (ret != 0)
+ return -EFAULT;
+
+ write_sysreg_s(gcspr_el0 + 8, SYS_GCSPR_EL0);
+
+ return 0;
+}
+
+#else
+static int gcs_restore_signal(void) { return 0; }
+#endif
+
SYSCALL_DEFINE0(rt_sigreturn)
{
struct pt_regs *regs = current_pt_regs();
struct rt_sigframe __user *frame;
+ struct user_access_state ua_state;
/* Always make any pending restarted system calls return -EINTR */
current->restart_block.fn = do_no_restart_syscall;
@@ -835,12 +1098,17 @@ SYSCALL_DEFINE0(rt_sigreturn)
if (!access_ok(frame, sizeof (*frame)))
goto badframe;
- if (restore_sigframe(regs, frame))
+ if (restore_sigframe(regs, frame, &ua_state))
+ goto badframe;
+
+ if (gcs_restore_signal())
goto badframe;
if (restore_altstack(&frame->uc.uc_stack))
goto badframe;
+ restore_user_access_state(&ua_state);
+
return regs->regs[0];
badframe:
@@ -875,10 +1143,19 @@ static int setup_sigframe_layout(struct rt_sigframe_user_layout *user,
return err;
}
+#ifdef CONFIG_ARM64_GCS
+ if (system_supports_gcs() && (add_all || current->thread.gcspr_el0)) {
+ err = sigframe_alloc(user, &user->gcs_offset,
+ sizeof(struct gcs_context));
+ if (err)
+ return err;
+ }
+#endif
+
if (system_supports_sve() || system_supports_sme()) {
unsigned int vq = 0;
- if (add_all || test_thread_flag(TIF_SVE) ||
+ if (add_all || current->thread.fp_type == FP_STATE_SVE ||
thread_sm_enabled(&current->thread)) {
int vl = max(sve_max_vl(), sme_max_vl());
@@ -928,11 +1205,26 @@ static int setup_sigframe_layout(struct rt_sigframe_user_layout *user,
}
}
+ if (system_supports_fpmr()) {
+ err = sigframe_alloc(user, &user->fpmr_offset,
+ sizeof(struct fpmr_context));
+ if (err)
+ return err;
+ }
+
+ if (system_supports_poe()) {
+ err = sigframe_alloc(user, &user->poe_offset,
+ sizeof(struct poe_context));
+ if (err)
+ return err;
+ }
+
return sigframe_alloc_end(user);
}
static int setup_sigframe(struct rt_sigframe_user_layout *user,
- struct pt_regs *regs, sigset_t *set)
+ struct pt_regs *regs, sigset_t *set,
+ const struct user_access_state *ua_state)
{
int i, err = 0;
struct rt_sigframe __user *sf = user->sigframe;
@@ -968,6 +1260,12 @@ static int setup_sigframe(struct rt_sigframe_user_layout *user,
__put_user_error(current->thread.fault_code, &esr_ctx->esr, err);
}
+ if (system_supports_gcs() && err == 0 && user->gcs_offset) {
+ struct gcs_context __user *gcs_ctx =
+ apply_user_offset(user, user->gcs_offset);
+ err |= preserve_gcs_context(gcs_ctx);
+ }
+
/* Scalable Vector Extension state (including streaming), if present */
if ((system_supports_sve() || system_supports_sme()) &&
err == 0 && user->sve_offset) {
@@ -983,6 +1281,20 @@ static int setup_sigframe(struct rt_sigframe_user_layout *user,
err |= preserve_tpidr2_context(tpidr2_ctx);
}
+ /* FPMR if supported */
+ if (system_supports_fpmr() && err == 0) {
+ struct fpmr_context __user *fpmr_ctx =
+ apply_user_offset(user, user->fpmr_offset);
+ err |= preserve_fpmr_context(fpmr_ctx);
+ }
+
+ if (system_supports_poe() && err == 0) {
+ struct poe_context __user *poe_ctx =
+ apply_user_offset(user, user->poe_offset);
+
+ err |= preserve_poe_context(poe_ctx, ua_state);
+ }
+
/* ZA state if present */
if (system_supports_sme() && err == 0 && user->za_offset) {
struct za_context __user *za_ctx =
@@ -1071,15 +1383,81 @@ static int get_sigframe(struct rt_sigframe_user_layout *user,
return 0;
}
-static void setup_return(struct pt_regs *regs, struct k_sigaction *ka,
+#ifdef CONFIG_ARM64_GCS
+
+static int gcs_signal_entry(__sigrestore_t sigtramp, struct ksignal *ksig)
+{
+ u64 gcspr_el0;
+ int ret = 0;
+
+ if (!system_supports_gcs())
+ return 0;
+
+ if (!task_gcs_el0_enabled(current))
+ return 0;
+
+ /*
+ * We are entering a signal handler, current register state is
+ * active.
+ */
+ gcspr_el0 = read_sysreg_s(SYS_GCSPR_EL0);
+
+ /*
+ * Push a cap and the GCS entry for the trampoline onto the GCS.
+ */
+ put_user_gcs((unsigned long)sigtramp,
+ (unsigned long __user *)(gcspr_el0 - 16), &ret);
+ put_user_gcs(GCS_SIGNAL_CAP(gcspr_el0 - 8),
+ (unsigned long __user *)(gcspr_el0 - 8), &ret);
+ if (ret != 0)
+ return ret;
+
+ gcspr_el0 -= 16;
+ write_sysreg_s(gcspr_el0, SYS_GCSPR_EL0);
+
+ return 0;
+}
+#else
+
+static int gcs_signal_entry(__sigrestore_t sigtramp, struct ksignal *ksig)
+{
+ return 0;
+}
+
+#endif
+
+static int setup_return(struct pt_regs *regs, struct ksignal *ksig,
struct rt_sigframe_user_layout *user, int usig)
{
__sigrestore_t sigtramp;
+ int err;
+
+ if (ksig->ka.sa.sa_flags & SA_RESTORER)
+ sigtramp = ksig->ka.sa.sa_restorer;
+ else
+ sigtramp = VDSO_SYMBOL(current->mm->context.vdso, sigtramp);
+
+ err = gcs_signal_entry(sigtramp, ksig);
+ if (err)
+ return err;
+
+ /*
+ * We must not fail from this point onwards. We are going to update
+ * registers, including SP, in order to invoke the signal handler. If
+ * we failed and attempted to deliver a nested SIGSEGV to a handler
+ * after that point, the subsequent sigreturn would end up restoring
+ * the (partial) state for the original signal handler.
+ */
regs->regs[0] = usig;
+ if (ksig->ka.sa.sa_flags & SA_SIGINFO) {
+ regs->regs[1] = (unsigned long)&user->sigframe->info;
+ regs->regs[2] = (unsigned long)&user->sigframe->uc;
+ }
regs->sp = (unsigned long)user->sigframe;
regs->regs[29] = (unsigned long)&user->next_frame->fp;
- regs->pc = (unsigned long)ka->sa.sa_handler;
+ regs->regs[30] = (unsigned long)sigtramp;
+ regs->pc = (unsigned long)ksig->ka.sa.sa_handler;
/*
* Signal delivery is a (wacky) indirect function call in
@@ -1102,29 +1480,12 @@ static void setup_return(struct pt_regs *regs, struct k_sigaction *ka,
/* Signal handlers are invoked with ZA and streaming mode disabled */
if (system_supports_sme()) {
- /*
- * If we were in streaming mode the saved register
- * state was SVE but we will exit SM and use the
- * FPSIMD register state - flush the saved FPSIMD
- * register state in case it gets loaded.
- */
- if (current->thread.svcr & SVCR_SM_MASK) {
- memset(&current->thread.uw.fpsimd_state, 0,
- sizeof(current->thread.uw.fpsimd_state));
- current->thread.fp_type = FP_STATE_FPSIMD;
- }
-
- current->thread.svcr &= ~(SVCR_ZA_MASK |
- SVCR_SM_MASK);
- sme_smstop();
+ task_smstop_sm(current);
+ current->thread.svcr &= ~SVCR_ZA_MASK;
+ write_sysreg_s(0, SYS_TPIDR2_EL0);
}
- if (ka->sa.sa_flags & SA_RESTORER)
- sigtramp = ka->sa.sa_restorer;
- else
- sigtramp = VDSO_SYMBOL(current->mm->context.vdso, sigtramp);
-
- regs->regs[30] = (unsigned long)sigtramp;
+ return 0;
}
static int setup_rt_frame(int usig, struct ksignal *ksig, sigset_t *set,
@@ -1132,28 +1493,37 @@ static int setup_rt_frame(int usig, struct ksignal *ksig, sigset_t *set,
{
struct rt_sigframe_user_layout user;
struct rt_sigframe __user *frame;
+ struct user_access_state ua_state;
int err = 0;
- fpsimd_signal_preserve_current_state();
+ fpsimd_save_and_flush_current_state();
if (get_sigframe(&user, ksig, regs))
return 1;
+ save_reset_user_access_state(&ua_state);
frame = user.sigframe;
__put_user_error(0, &frame->uc.uc_flags, err);
__put_user_error(NULL, &frame->uc.uc_link, err);
err |= __save_altstack(&frame->uc.uc_stack, regs->sp);
- err |= setup_sigframe(&user, regs, set);
- if (err == 0) {
- setup_return(regs, &ksig->ka, &user, usig);
- if (ksig->ka.sa.sa_flags & SA_SIGINFO) {
- err |= copy_siginfo_to_user(&frame->info, &ksig->info);
- regs->regs[1] = (unsigned long)&frame->info;
- regs->regs[2] = (unsigned long)&frame->uc;
- }
- }
+ err |= setup_sigframe(&user, regs, set, &ua_state);
+ if (ksig->ka.sa.sa_flags & SA_SIGINFO)
+ err |= copy_siginfo_to_user(&frame->info, &ksig->info);
+
+ if (err == 0)
+ err = setup_return(regs, ksig, &user, usig);
+
+ /*
+ * We must not fail if setup_return() succeeded - see comment at the
+ * beginning of setup_return().
+ */
+
+ if (err == 0)
+ set_handler_user_access_state();
+ else
+ restore_user_access_state(&ua_state);
return err;
}
@@ -1207,7 +1577,7 @@ static void handle_signal(struct ksignal *ksig, struct pt_regs *regs)
* the kernel can handle, and then we build all the user-level signal handling
* stack-frames in one go after that.
*/
-static void do_signal(struct pt_regs *regs)
+void arch_do_signal_or_restart(struct pt_regs *regs)
{
unsigned long continue_addr = 0, restart_addr = 0;
int retval = 0;
@@ -1278,41 +1648,6 @@ static void do_signal(struct pt_regs *regs)
restore_saved_sigmask();
}
-void do_notify_resume(struct pt_regs *regs, unsigned long thread_flags)
-{
- do {
- if (thread_flags & _TIF_NEED_RESCHED) {
- /* Unmask Debug and SError for the next task */
- local_daif_restore(DAIF_PROCCTX_NOIRQ);
-
- schedule();
- } else {
- local_daif_restore(DAIF_PROCCTX);
-
- if (thread_flags & _TIF_UPROBE)
- uprobe_notify_resume(regs);
-
- if (thread_flags & _TIF_MTE_ASYNC_FAULT) {
- clear_thread_flag(TIF_MTE_ASYNC_FAULT);
- send_sig_fault(SIGSEGV, SEGV_MTEAERR,
- (void __user *)NULL, current);
- }
-
- if (thread_flags & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL))
- do_signal(regs);
-
- if (thread_flags & _TIF_NOTIFY_RESUME)
- resume_user_mode_work(regs);
-
- if (thread_flags & _TIF_FOREIGN_FPSTATE)
- fpsimd_restore_current_state();
- }
-
- local_daif_mask();
- thread_flags = read_thread_flags();
- } while (thread_flags & _TIF_WORK_MASK);
-}
-
unsigned long __ro_after_init signal_minsigstksz;
/*
@@ -1344,7 +1679,7 @@ void __init minsigstksz_setup(void)
*/
static_assert(NSIGILL == 11);
static_assert(NSIGFPE == 15);
-static_assert(NSIGSEGV == 9);
+static_assert(NSIGSEGV == 10);
static_assert(NSIGBUS == 5);
static_assert(NSIGTRAP == 6);
static_assert(NSIGCHLD == 6);
diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c
index 4700f8522d27..bb3b526ff43f 100644
--- a/arch/arm64/kernel/signal32.c
+++ b/arch/arm64/kernel/signal32.c
@@ -17,7 +17,7 @@
#include <asm/signal32.h>
#include <asm/traps.h>
#include <linux/uaccess.h>
-#include <asm/unistd.h>
+#include <asm/unistd_compat_32.h>
#include <asm/vdso.h>
struct compat_vfp_sigframe {
@@ -103,7 +103,7 @@ static int compat_preserve_vfp_context(struct compat_vfp_sigframe __user *frame)
* Note that this also saves V16-31, which aren't visible
* in AArch32.
*/
- fpsimd_signal_preserve_current_state();
+ fpsimd_save_and_flush_current_state();
/* Place structure header on the stack */
__put_user_error(magic, &frame->magic, err);
@@ -169,14 +169,17 @@ static int compat_restore_vfp_context(struct compat_vfp_sigframe __user *frame)
fpsimd.fpsr = fpscr & VFP_FPSCR_STAT_MASK;
fpsimd.fpcr = fpscr & VFP_FPSCR_CTRL_MASK;
+ if (err)
+ return -EFAULT;
+
/*
* We don't need to touch the exception register, so
* reload the hardware state.
*/
- if (!err)
- fpsimd_update_current_state(&fpsimd);
+ fpsimd_save_and_flush_current_state();
+ current->thread.uw.fpsimd_state = fpsimd;
- return err ? -EFAULT : 0;
+ return 0;
}
static int compat_restore_sigframe(struct pt_regs *regs,
@@ -451,7 +454,7 @@ int compat_setup_frame(int usig, struct ksignal *ksig, sigset_t *set,
void compat_setup_restart_syscall(struct pt_regs *regs)
{
- regs->regs[7] = __NR_compat_restart_syscall;
+ regs->regs[7] = __NR_compat32_restart_syscall;
}
/*
@@ -460,7 +463,7 @@ void compat_setup_restart_syscall(struct pt_regs *regs)
*/
static_assert(NSIGILL == 11);
static_assert(NSIGFPE == 15);
-static_assert(NSIGSEGV == 9);
+static_assert(NSIGSEGV == 10);
static_assert(NSIGBUS == 5);
static_assert(NSIGTRAP == 6);
static_assert(NSIGCHLD == 6);
diff --git a/arch/arm64/kernel/sigreturn32.S b/arch/arm64/kernel/sigreturn32.S
index ccbd4aab4ba4..6f486b95b413 100644
--- a/arch/arm64/kernel/sigreturn32.S
+++ b/arch/arm64/kernel/sigreturn32.S
@@ -13,7 +13,7 @@
* need two 16-bit instructions.
*/
-#include <asm/unistd.h>
+#include <asm/unistd_compat_32.h>
.section .rodata
.globl __aarch32_sigret_code_start
@@ -22,26 +22,26 @@ __aarch32_sigret_code_start:
/*
* ARM Code
*/
- .byte __NR_compat_sigreturn, 0x70, 0xa0, 0xe3 // mov r7, #__NR_compat_sigreturn
- .byte __NR_compat_sigreturn, 0x00, 0x00, 0xef // svc #__NR_compat_sigreturn
+ .byte __NR_compat32_sigreturn, 0x70, 0xa0, 0xe3 // mov r7, #__NR_compat32_sigreturn
+ .byte __NR_compat32_sigreturn, 0x00, 0x00, 0xef // svc #__NR_compat32_sigreturn
/*
* Thumb code
*/
- .byte __NR_compat_sigreturn, 0x27 // svc #__NR_compat_sigreturn
- .byte __NR_compat_sigreturn, 0xdf // mov r7, #__NR_compat_sigreturn
+ .byte __NR_compat32_sigreturn, 0x27 // svc #__NR_compat32_sigreturn
+ .byte __NR_compat32_sigreturn, 0xdf // mov r7, #__NR_compat32_sigreturn
/*
* ARM code
*/
- .byte __NR_compat_rt_sigreturn, 0x70, 0xa0, 0xe3 // mov r7, #__NR_compat_rt_sigreturn
- .byte __NR_compat_rt_sigreturn, 0x00, 0x00, 0xef // svc #__NR_compat_rt_sigreturn
+ .byte __NR_compat32_rt_sigreturn, 0x70, 0xa0, 0xe3 // mov r7, #__NR_compat32_rt_sigreturn
+ .byte __NR_compat32_rt_sigreturn, 0x00, 0x00, 0xef // svc #__NR_compat32_rt_sigreturn
/*
* Thumb code
*/
- .byte __NR_compat_rt_sigreturn, 0x27 // svc #__NR_compat_rt_sigreturn
- .byte __NR_compat_rt_sigreturn, 0xdf // mov r7, #__NR_compat_rt_sigreturn
+ .byte __NR_compat32_rt_sigreturn, 0x27 // svc #__NR_compat32_rt_sigreturn
+ .byte __NR_compat32_rt_sigreturn, 0xdf // mov r7, #__NR_compat32_rt_sigreturn
.globl __aarch32_sigret_code_end
__aarch32_sigret_code_end:
diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S
index 2aa5129d8253..f093cdf71be1 100644
--- a/arch/arm64/kernel/sleep.S
+++ b/arch/arm64/kernel/sleep.S
@@ -102,9 +102,6 @@ SYM_CODE_START(cpu_resume)
mov x0, xzr
bl init_kernel_el
mov x19, x0 // preserve boot mode
-#if VA_BITS > 48
- ldr_l x0, vabits_actual
-#endif
bl __cpu_setup
/* enable the MMU early - so we can access sleep_save_stash by va */
adrp x1, swapper_pg_dir
diff --git a/arch/arm64/kernel/smccc-call.S b/arch/arm64/kernel/smccc-call.S
index 487381164ff6..2def9d0dd3dd 100644
--- a/arch/arm64/kernel/smccc-call.S
+++ b/arch/arm64/kernel/smccc-call.S
@@ -7,48 +7,19 @@
#include <asm/asm-offsets.h>
#include <asm/assembler.h>
-#include <asm/thread_info.h>
-
-/*
- * If we have SMCCC v1.3 and (as is likely) no SVE state in
- * the registers then set the SMCCC hint bit to say there's no
- * need to preserve it. Do this by directly adjusting the SMCCC
- * function value which is already stored in x0 ready to be called.
- */
-SYM_FUNC_START(__arm_smccc_sve_check)
-
- ldr_l x16, smccc_has_sve_hint
- cbz x16, 2f
-
- get_current_task x16
- ldr x16, [x16, #TSK_TI_FLAGS]
- tbnz x16, #TIF_FOREIGN_FPSTATE, 1f // Any live FP state?
- tbnz x16, #TIF_SVE, 2f // Does that state include SVE?
-
-1: orr x0, x0, ARM_SMCCC_1_3_SVE_HINT
-
-2: ret
-SYM_FUNC_END(__arm_smccc_sve_check)
-EXPORT_SYMBOL(__arm_smccc_sve_check)
.macro SMCCC instr
- stp x29, x30, [sp, #-16]!
- mov x29, sp
-alternative_if ARM64_SVE
- bl __arm_smccc_sve_check
-alternative_else_nop_endif
\instr #0
- ldr x4, [sp, #16]
+ ldr x4, [sp]
stp x0, x1, [x4, #ARM_SMCCC_RES_X0_OFFS]
stp x2, x3, [x4, #ARM_SMCCC_RES_X2_OFFS]
- ldr x4, [sp, #24]
+ ldr x4, [sp, #8]
cbz x4, 1f /* no quirk structure */
ldr x9, [x4, #ARM_SMCCC_QUIRK_ID_OFFS]
cmp x9, #ARM_SMCCC_QUIRK_QCOM_A6
b.ne 1f
str x6, [x4, ARM_SMCCC_QUIRK_STATE_OFFS]
-1: ldp x29, x30, [sp], #16
- ret
+1: ret
.endm
/*
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 960b98b43506..1aa324104afb 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -32,7 +32,9 @@
#include <linux/irq_work.h>
#include <linux/kernel_stat.h>
#include <linux/kexec.h>
+#include <linux/kgdb.h>
#include <linux/kvm_host.h>
+#include <linux/nmi.h>
#include <asm/alternative.h>
#include <asm/atomic.h>
@@ -53,9 +55,6 @@
#include <trace/events/ipi.h>
-DEFINE_PER_CPU_READ_MOSTLY(int, cpu_number);
-EXPORT_PER_CPU_SYMBOL(cpu_number);
-
/*
* as from 2.5, kernels no longer have an init_tasks structure
* so we need some other way of telling a new secondary core
@@ -65,20 +64,20 @@ struct secondary_data secondary_data;
/* Number of CPUs which aren't online, but looping in kernel text. */
static int cpus_stuck_in_kernel;
-enum ipi_msg_type {
- IPI_RESCHEDULE,
- IPI_CALL_FUNC,
- IPI_CPU_STOP,
- IPI_CPU_CRASH_STOP,
- IPI_TIMER,
- IPI_IRQ_WORK,
- IPI_WAKEUP,
- NR_IPI
+static int ipi_irq_base __ro_after_init;
+static int nr_ipi __ro_after_init = NR_IPI;
+
+struct ipi_descs {
+ struct irq_desc *descs[MAX_IPI];
};
-static int ipi_irq_base __read_mostly;
-static int nr_ipi __read_mostly = NR_IPI;
-static struct irq_desc *ipi_desc[NR_IPI] __read_mostly;
+static DEFINE_PER_CPU_READ_MOSTLY(struct ipi_descs, pcpu_ipi_desc);
+
+#define get_ipi_desc(__cpu, __ipi) (per_cpu_ptr(&pcpu_ipi_desc, __cpu)->descs[__ipi])
+
+static bool percpu_ipi_descs __ro_after_init;
+
+static bool crash_stop;
static void ipi_setup(int cpu);
@@ -124,7 +123,8 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle)
/* Now bring the CPU into our world */
ret = boot_secondary(cpu, idle);
if (ret) {
- pr_err("CPU%u: failed to boot: %d\n", cpu, ret);
+ if (ret != -EPERM)
+ pr_err("CPU%u: failed to boot: %d\n", cpu, ret);
return ret;
}
@@ -215,7 +215,7 @@ asmlinkage notrace void secondary_start_kernel(void)
if (system_uses_irq_prio_masking())
init_gic_priority_masking();
- rcu_cpu_starting(cpu);
+ rcutree_report_cpu_starting(cpu);
trace_hardirqs_off();
/*
@@ -256,6 +256,13 @@ asmlinkage notrace void secondary_start_kernel(void)
set_cpu_online(cpu, true);
complete(&cpu_running);
+ /*
+ * Secondary CPUs enter the kernel with all DAIF exceptions masked.
+ *
+ * As with setup_arch() we must unmask Debug and SError exceptions, and
+ * as the root irqchip has already been detected and initialized we can
+ * unmask IRQ and FIQ at the same time.
+ */
local_daif_restore(DAIF_PROCCTX);
/*
@@ -343,7 +350,7 @@ void arch_cpuhp_cleanup_dead_cpu(unsigned int cpu)
/*
* Now that the dying CPU is beyond the point of no return w.r.t.
- * in-kernel synchronisation, try to get the firwmare to help us to
+ * in-kernel synchronisation, try to get the firmware to help us to
* verify that it has really left the kernel before we consider
* clobbering anything it might still be using.
*/
@@ -401,7 +408,7 @@ void __noreturn cpu_die_early(void)
/* Mark this CPU absent */
set_cpu_present(cpu, 0);
- rcu_report_dead(cpu);
+ rcutree_report_cpu_dead();
if (IS_ENABLED(CONFIG_HOTPLUG_CPU)) {
update_cpu_boot_status(CPU_KILL_ME);
@@ -431,9 +438,9 @@ static void __init hyp_mode_check(void)
void __init smp_cpus_done(unsigned int max_cpus)
{
pr_info("SMP: Total of %d processors activated.\n", num_online_cpus());
- setup_cpu_features();
hyp_mode_check();
- apply_alternatives_all();
+ setup_system_features();
+ setup_user_features();
mark_linear_text_alias_ro();
}
@@ -445,20 +452,17 @@ void __init smp_prepare_boot_cpu(void)
* freed shortly, so we must move over to the runtime per-cpu area.
*/
set_my_cpu_offset(per_cpu_offset(smp_processor_id()));
- cpuinfo_store_boot_cpu();
- /*
- * We now know enough about the boot CPU to apply the
- * alternatives that cannot wait until interrupt handling
- * and/or scheduling is enabled.
- */
- apply_boot_alternatives();
+ cpuinfo_store_boot_cpu();
+ setup_boot_cpu_features();
/* Conditionally switch to GIC PMR for interrupt masking */
if (system_uses_irq_prio_masking())
init_gic_priority_masking();
kasan_init_hw_tags();
+ /* Init percpu seeds for random tags after cpus are set up. */
+ kasan_init_sw_tags();
}
/*
@@ -500,6 +504,59 @@ static int __init smp_cpu_setup(int cpu)
static bool bootcpu_valid __initdata;
static unsigned int cpu_count = 1;
+int arch_register_cpu(int cpu)
+{
+ acpi_handle acpi_handle = acpi_get_processor_handle(cpu);
+ struct cpu *c = &per_cpu(cpu_devices, cpu);
+
+ if (!acpi_disabled && !acpi_handle &&
+ IS_ENABLED(CONFIG_ACPI_HOTPLUG_CPU))
+ return -EPROBE_DEFER;
+
+#ifdef CONFIG_ACPI_HOTPLUG_CPU
+ /* For now block anything that looks like physical CPU Hotplug */
+ if (invalid_logical_cpuid(cpu) || !cpu_present(cpu)) {
+ pr_err_once("Changing CPU present bit is not supported\n");
+ return -ENODEV;
+ }
+#endif
+
+ /*
+ * Availability of the acpi handle is sufficient to establish
+ * that _STA has already been checked. No need to recheck here.
+ */
+ c->hotpluggable = arch_cpu_is_hotpluggable(cpu);
+
+ return register_cpu(c, cpu);
+}
+
+#ifdef CONFIG_ACPI_HOTPLUG_CPU
+void arch_unregister_cpu(int cpu)
+{
+ acpi_handle acpi_handle = acpi_get_processor_handle(cpu);
+ struct cpu *c = &per_cpu(cpu_devices, cpu);
+ acpi_status status;
+ unsigned long long sta;
+
+ if (!acpi_handle) {
+ pr_err_once("Removing a CPU without associated ACPI handle\n");
+ return;
+ }
+
+ status = acpi_evaluate_integer(acpi_handle, "_STA", NULL, &sta);
+ if (ACPI_FAILURE(status))
+ return;
+
+ /* For now do not allow anything that looks like physical CPU HP */
+ if (cpu_present(cpu) && !(sta & ACPI_STA_DEVICE_PRESENT)) {
+ pr_err_once("Changing CPU present bit is not supported\n");
+ return;
+ }
+
+ unregister_cpu(c);
+}
+#endif /* CONFIG_ACPI_HOTPLUG_CPU */
+
#ifdef CONFIG_ACPI
static struct acpi_madt_generic_interrupt cpu_madt_gicc[NR_CPUS];
@@ -520,7 +577,8 @@ acpi_map_gic_cpu_interface(struct acpi_madt_generic_interrupt *processor)
{
u64 hwid = processor->arm_mpidr;
- if (!(processor->flags & ACPI_MADT_ENABLED)) {
+ if (!(processor->flags &
+ (ACPI_MADT_ENABLED | ACPI_MADT_GICC_ONLINE_CAPABLE))) {
pr_debug("skipping disabled CPU entry with 0x%llx MPIDR\n", hwid);
return;
}
@@ -739,8 +797,6 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
*/
for_each_possible_cpu(cpu) {
- per_cpu(cpu_number, cpu) = cpu;
-
if (cpu == smp_processor_id())
continue;
@@ -757,14 +813,15 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
}
}
-static const char *ipi_types[NR_IPI] __tracepoint_string = {
+static const char *ipi_types[MAX_IPI] __tracepoint_string = {
[IPI_RESCHEDULE] = "Rescheduling interrupts",
[IPI_CALL_FUNC] = "Function call interrupts",
[IPI_CPU_STOP] = "CPU stop interrupts",
- [IPI_CPU_CRASH_STOP] = "CPU stop (for crash dump) interrupts",
+ [IPI_CPU_STOP_NMI] = "CPU stop NMIs",
[IPI_TIMER] = "Timer broadcast interrupts",
[IPI_IRQ_WORK] = "IRQ work interrupts",
- [IPI_WAKEUP] = "CPU wake-up interrupts",
+ [IPI_CPU_BACKTRACE] = "CPU backtrace interrupts",
+ [IPI_KGDB_ROUNDUP] = "KGDB roundup interrupts",
};
static void smp_cross_call(const struct cpumask *target, unsigned int ipinr);
@@ -775,11 +832,11 @@ int arch_show_interrupts(struct seq_file *p, int prec)
{
unsigned int cpu, i;
- for (i = 0; i < NR_IPI; i++) {
+ for (i = 0; i < MAX_IPI; i++) {
seq_printf(p, "%*s%u:%s", prec - 1, "IPI", i,
prec >= 4 ? " " : "");
for_each_online_cpu(cpu)
- seq_printf(p, "%10u ", irq_desc_kstat_cpu(ipi_desc[i], cpu));
+ seq_printf(p, "%10u ", irq_desc_kstat_cpu(get_ipi_desc(cpu, i), cpu));
seq_printf(p, " %s\n", ipi_types[i]);
}
@@ -797,13 +854,6 @@ void arch_send_call_function_single_ipi(int cpu)
smp_cross_call(cpumask_of(cpu), IPI_CALL_FUNC);
}
-#ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL
-void arch_send_wakeup_ipi_mask(const struct cpumask *mask)
-{
- smp_cross_call(mask, IPI_WAKEUP);
-}
-#endif
-
#ifdef CONFIG_IRQ_WORK
void arch_irq_work_raise(void)
{
@@ -811,9 +861,9 @@ void arch_irq_work_raise(void)
}
#endif
-static void __noreturn local_cpu_stop(void)
+static void __noreturn local_cpu_stop(unsigned int cpu)
{
- set_cpu_online(smp_processor_id(), false);
+ set_cpu_online(cpu, false);
local_daif_mask();
sdei_mask_local_cpu();
@@ -827,21 +877,26 @@ static void __noreturn local_cpu_stop(void)
*/
void __noreturn panic_smp_self_stop(void)
{
- local_cpu_stop();
+ local_cpu_stop(smp_processor_id());
}
-#ifdef CONFIG_KEXEC_CORE
-static atomic_t waiting_for_crash_ipi = ATOMIC_INIT(0);
-#endif
-
static void __noreturn ipi_cpu_crash_stop(unsigned int cpu, struct pt_regs *regs)
{
#ifdef CONFIG_KEXEC_CORE
+ /*
+ * Use local_daif_mask() instead of local_irq_disable() to make sure
+ * that pseudo-NMIs are disabled. The "crash stop" code starts with
+ * an IRQ and falls back to NMI (which might be pseudo). If the IRQ
+ * finally goes through right as we're timing out then the NMI could
+ * interrupt us. It's better to prevent the NMI and let the IRQ
+ * finish since the pt_regs will be better.
+ */
+ local_daif_mask();
+
crash_save_cpu(regs, cpu);
- atomic_dec(&waiting_for_crash_ipi);
+ set_cpu_online(cpu, false);
- local_irq_disable();
sdei_mask_local_cpu();
if (IS_ENABLED(CONFIG_HOTPLUG_CPU))
@@ -854,6 +909,49 @@ static void __noreturn ipi_cpu_crash_stop(unsigned int cpu, struct pt_regs *regs
#endif
}
+static void arm64_send_ipi(const cpumask_t *mask, unsigned int nr)
+{
+ unsigned int cpu;
+
+ if (!percpu_ipi_descs)
+ __ipi_send_mask(get_ipi_desc(0, nr), mask);
+ else
+ for_each_cpu(cpu, mask)
+ __ipi_send_single(get_ipi_desc(cpu, nr), cpu);
+}
+
+static void arm64_backtrace_ipi(cpumask_t *mask)
+{
+ arm64_send_ipi(mask, IPI_CPU_BACKTRACE);
+}
+
+void arch_trigger_cpumask_backtrace(const cpumask_t *mask, int exclude_cpu)
+{
+ /*
+ * NOTE: though nmi_trigger_cpumask_backtrace() has "nmi_" in the name,
+ * nothing about it truly needs to be implemented using an NMI, it's
+ * just that it's _allowed_ to work with NMIs. If ipi_should_be_nmi()
+ * returned false our backtrace attempt will just use a regular IPI.
+ */
+ nmi_trigger_cpumask_backtrace(mask, exclude_cpu, arm64_backtrace_ipi);
+}
+
+#ifdef CONFIG_KGDB
+void kgdb_roundup_cpus(void)
+{
+ int this_cpu = raw_smp_processor_id();
+ int cpu;
+
+ for_each_online_cpu(cpu) {
+ /* No need to roundup ourselves */
+ if (cpu == this_cpu)
+ continue;
+
+ __ipi_send_single(get_ipi_desc(cpu, IPI_KGDB_ROUNDUP), cpu);
+ }
+}
+#endif
+
/*
* Main handler for inter-processor interrupts
*/
@@ -874,14 +972,12 @@ static void do_handle_IPI(int ipinr)
break;
case IPI_CPU_STOP:
- local_cpu_stop();
- break;
-
- case IPI_CPU_CRASH_STOP:
- if (IS_ENABLED(CONFIG_KEXEC_CORE)) {
+ case IPI_CPU_STOP_NMI:
+ if (IS_ENABLED(CONFIG_KEXEC_CORE) && crash_stop) {
ipi_cpu_crash_stop(cpu, get_irq_regs());
-
unreachable();
+ } else {
+ local_cpu_stop(cpu);
}
break;
@@ -897,13 +993,17 @@ static void do_handle_IPI(int ipinr)
break;
#endif
-#ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL
- case IPI_WAKEUP:
- WARN_ONCE(!acpi_parking_protocol_valid(cpu),
- "CPU%u: Wake-up IPI outside the ACPI parking protocol\n",
- cpu);
+ case IPI_CPU_BACKTRACE:
+ /*
+ * NOTE: in some cases this _won't_ be NMI context. See the
+ * comment in arch_trigger_cpumask_backtrace().
+ */
+ nmi_cpu_backtrace(get_irq_regs());
+ break;
+
+ case IPI_KGDB_ROUNDUP:
+ kgdb_nmicallback(cpu, get_irq_regs());
break;
-#endif
default:
pr_crit("CPU%u: Unknown IPI message 0x%x\n", cpu, ipinr);
@@ -916,14 +1016,31 @@ static void do_handle_IPI(int ipinr)
static irqreturn_t ipi_handler(int irq, void *data)
{
- do_handle_IPI(irq - ipi_irq_base);
+ unsigned int ipi = (irq - ipi_irq_base) % nr_ipi;
+
+ do_handle_IPI(ipi);
return IRQ_HANDLED;
}
static void smp_cross_call(const struct cpumask *target, unsigned int ipinr)
{
trace_ipi_raise(target, ipi_types[ipinr]);
- __ipi_send_mask(ipi_desc[ipinr], target);
+ arm64_send_ipi(target, ipinr);
+}
+
+static bool ipi_should_be_nmi(enum ipi_msg_type ipi)
+{
+ if (!system_uses_irq_prio_masking())
+ return false;
+
+ switch (ipi) {
+ case IPI_CPU_STOP_NMI:
+ case IPI_CPU_BACKTRACE:
+ case IPI_KGDB_ROUNDUP:
+ return true;
+ default:
+ return false;
+ }
}
static void ipi_setup(int cpu)
@@ -933,8 +1050,18 @@ static void ipi_setup(int cpu)
if (WARN_ON_ONCE(!ipi_irq_base))
return;
- for (i = 0; i < nr_ipi; i++)
- enable_percpu_irq(ipi_irq_base + i, 0);
+ for (i = 0; i < nr_ipi; i++) {
+ if (!percpu_ipi_descs) {
+ if (ipi_should_be_nmi(i)) {
+ prepare_percpu_nmi(ipi_irq_base + i);
+ enable_percpu_nmi(ipi_irq_base + i, 0);
+ } else {
+ enable_percpu_irq(ipi_irq_base + i, 0);
+ }
+ } else {
+ enable_irq(irq_desc_get_irq(get_ipi_desc(cpu, i)));
+ }
+ }
}
#ifdef CONFIG_HOTPLUG_CPU
@@ -945,31 +1072,78 @@ static void ipi_teardown(int cpu)
if (WARN_ON_ONCE(!ipi_irq_base))
return;
- for (i = 0; i < nr_ipi; i++)
- disable_percpu_irq(ipi_irq_base + i);
+ for (i = 0; i < nr_ipi; i++) {
+ if (!percpu_ipi_descs) {
+ if (ipi_should_be_nmi(i)) {
+ disable_percpu_nmi(ipi_irq_base + i);
+ teardown_percpu_nmi(ipi_irq_base + i);
+ } else {
+ disable_percpu_irq(ipi_irq_base + i);
+ }
+ } else {
+ disable_irq(irq_desc_get_irq(get_ipi_desc(cpu, i)));
+ }
+ }
}
#endif
-void __init set_smp_ipi_range(int ipi_base, int n)
+static void ipi_setup_sgi(int ipi)
{
- int i;
+ int err, irq, cpu;
- WARN_ON(n < NR_IPI);
- nr_ipi = min(n, NR_IPI);
+ irq = ipi_irq_base + ipi;
- for (i = 0; i < nr_ipi; i++) {
- int err;
+ if (ipi_should_be_nmi(ipi)) {
+ err = request_percpu_nmi(irq, ipi_handler, "IPI", NULL, &irq_stat);
+ WARN(err, "Could not request IRQ %d as NMI, err=%d\n", irq, err);
+ } else {
+ err = request_percpu_irq(irq, ipi_handler, "IPI", &irq_stat);
+ WARN(err, "Could not request IRQ %d as IRQ, err=%d\n", irq, err);
+ }
+
+ for_each_possible_cpu(cpu)
+ get_ipi_desc(cpu, ipi) = irq_to_desc(irq);
- err = request_percpu_irq(ipi_base + i, ipi_handler,
- "IPI", &cpu_number);
- WARN_ON(err);
+ irq_set_status_flags(irq, IRQ_HIDDEN);
+}
- ipi_desc[i] = irq_to_desc(ipi_base + i);
- irq_set_status_flags(ipi_base + i, IRQ_HIDDEN);
+static void ipi_setup_lpi(int ipi, int ncpus)
+{
+ for (int cpu = 0; cpu < ncpus; cpu++) {
+ int err, irq;
+
+ irq = ipi_irq_base + (cpu * nr_ipi) + ipi;
+
+ err = irq_force_affinity(irq, cpumask_of(cpu));
+ WARN(err, "Could not force affinity IRQ %d, err=%d\n", irq, err);
+
+ err = request_irq(irq, ipi_handler, IRQF_NO_AUTOEN, "IPI",
+ NULL);
+ WARN(err, "Could not request IRQ %d, err=%d\n", irq, err);
+
+ irq_set_status_flags(irq, (IRQ_HIDDEN | IRQ_NO_BALANCING_MASK));
+
+ get_ipi_desc(cpu, ipi) = irq_to_desc(irq);
}
+}
+
+void __init set_smp_ipi_range_percpu(int ipi_base, int n, int ncpus)
+{
+ int i;
+
+ WARN_ON(n < MAX_IPI);
+ nr_ipi = min(n, MAX_IPI);
+ percpu_ipi_descs = !!ncpus;
ipi_irq_base = ipi_base;
+ for (i = 0; i < nr_ipi; i++) {
+ if (!percpu_ipi_descs)
+ ipi_setup_sgi(i);
+ else
+ ipi_setup_lpi(i, ncpus);
+ }
+
/* Setup the boot CPU immediately */
ipi_setup(smp_processor_id());
}
@@ -979,6 +1153,17 @@ void arch_smp_send_reschedule(int cpu)
smp_cross_call(cpumask_of(cpu), IPI_RESCHEDULE);
}
+#ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL
+void arch_send_wakeup_ipi(unsigned int cpu)
+{
+ /*
+ * We use a scheduler IPI to wake the CPU as this avoids the need for a
+ * dedicated IPI and we can safely handle spurious scheduler IPIs.
+ */
+ smp_send_reschedule(cpu);
+}
+#endif
+
#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
void tick_broadcast(const struct cpumask *mask)
{
@@ -999,79 +1184,109 @@ static inline unsigned int num_other_online_cpus(void)
void smp_send_stop(void)
{
+ static unsigned long stop_in_progress;
+ static cpumask_t mask;
unsigned long timeout;
- if (num_other_online_cpus()) {
- cpumask_t mask;
+ /*
+ * If this cpu is the only one alive at this point in time, online or
+ * not, there are no stop messages to be sent around, so just back out.
+ */
+ if (num_other_online_cpus() == 0)
+ goto skip_ipi;
- cpumask_copy(&mask, cpu_online_mask);
- cpumask_clear_cpu(smp_processor_id(), &mask);
+ /* Only proceed if this is the first CPU to reach this code */
+ if (test_and_set_bit(0, &stop_in_progress))
+ return;
- if (system_state <= SYSTEM_RUNNING)
- pr_crit("SMP: stopping secondary CPUs\n");
- smp_cross_call(&mask, IPI_CPU_STOP);
- }
+ /*
+ * Send an IPI to all currently online CPUs except the CPU running
+ * this code.
+ *
+ * NOTE: we don't do anything here to prevent other CPUs from coming
+ * online after we snapshot `cpu_online_mask`. Ideally, the calling code
+ * should do something to prevent other CPUs from coming up. This code
+ * can be called in the panic path and thus it doesn't seem wise to
+ * grab the CPU hotplug mutex ourselves. Worst case:
+ * - If a CPU comes online as we're running, we'll likely notice it
+ * during the 1 second wait below and then we'll catch it when we try
+ * with an NMI (assuming NMIs are enabled) since we re-snapshot the
+ * mask before sending an NMI.
+ * - If we leave the function and see that CPUs are still online we'll
+ * at least print a warning. Especially without NMIs this function
+ * isn't foolproof anyway so calling code will just have to accept
+ * the fact that there could be cases where a CPU can't be stopped.
+ */
+ cpumask_copy(&mask, cpu_online_mask);
+ cpumask_clear_cpu(smp_processor_id(), &mask);
- /* Wait up to one second for other CPUs to stop */
+ if (system_state <= SYSTEM_RUNNING)
+ pr_crit("SMP: stopping secondary CPUs\n");
+
+ /*
+ * Start with a normal IPI and wait up to one second for other CPUs to
+ * stop. We do this first because it gives other processors a chance
+ * to exit critical sections / drop locks and makes the rest of the
+ * stop process (especially console flush) more robust.
+ */
+ smp_cross_call(&mask, IPI_CPU_STOP);
timeout = USEC_PER_SEC;
while (num_other_online_cpus() && timeout--)
udelay(1);
- if (num_other_online_cpus())
+ /*
+ * If CPUs are still online, try an NMI. There's no excuse for this to
+ * be slow, so we only give them an extra 10 ms to respond.
+ */
+ if (num_other_online_cpus() && ipi_should_be_nmi(IPI_CPU_STOP_NMI)) {
+ smp_rmb();
+ cpumask_copy(&mask, cpu_online_mask);
+ cpumask_clear_cpu(smp_processor_id(), &mask);
+
+ pr_info("SMP: retry stop with NMI for CPUs %*pbl\n",
+ cpumask_pr_args(&mask));
+
+ smp_cross_call(&mask, IPI_CPU_STOP_NMI);
+ timeout = USEC_PER_MSEC * 10;
+ while (num_other_online_cpus() && timeout--)
+ udelay(1);
+ }
+
+ if (num_other_online_cpus()) {
+ smp_rmb();
+ cpumask_copy(&mask, cpu_online_mask);
+ cpumask_clear_cpu(smp_processor_id(), &mask);
+
pr_warn("SMP: failed to stop secondary CPUs %*pbl\n",
- cpumask_pr_args(cpu_online_mask));
+ cpumask_pr_args(&mask));
+ }
+skip_ipi:
sdei_mask_local_cpu();
}
#ifdef CONFIG_KEXEC_CORE
void crash_smp_send_stop(void)
{
- static int cpus_stopped;
- cpumask_t mask;
- unsigned long timeout;
-
/*
* This function can be called twice in panic path, but obviously
* we execute this only once.
+ *
+ * We use this same boolean to tell whether the IPI we send was a
+ * stop or a "crash stop".
*/
- if (cpus_stopped)
+ if (crash_stop)
return;
+ crash_stop = 1;
- cpus_stopped = 1;
-
- /*
- * If this cpu is the only one alive at this point in time, online or
- * not, there are no stop messages to be sent around, so just back out.
- */
- if (num_other_online_cpus() == 0)
- goto skip_ipi;
-
- cpumask_copy(&mask, cpu_online_mask);
- cpumask_clear_cpu(smp_processor_id(), &mask);
-
- atomic_set(&waiting_for_crash_ipi, num_other_online_cpus());
-
- pr_crit("SMP: stopping secondary CPUs\n");
- smp_cross_call(&mask, IPI_CPU_CRASH_STOP);
-
- /* Wait up to one second for other CPUs to stop */
- timeout = USEC_PER_SEC;
- while ((atomic_read(&waiting_for_crash_ipi) > 0) && timeout--)
- udelay(1);
+ smp_send_stop();
- if (atomic_read(&waiting_for_crash_ipi) > 0)
- pr_warn("SMP: failed to stop secondary CPUs %*pbl\n",
- cpumask_pr_args(&mask));
-
-skip_ipi:
- sdei_mask_local_cpu();
sdei_handler_abort();
}
bool smp_crash_stop_failed(void)
{
- return (atomic_read(&waiting_for_crash_ipi) > 0);
+ return num_other_online_cpus() != 0;
}
#endif
diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
index 17f66a74c745..3ebcf8c53fb0 100644
--- a/arch/arm64/kernel/stacktrace.c
+++ b/arch/arm64/kernel/stacktrace.c
@@ -7,7 +7,9 @@
#include <linux/kernel.h>
#include <linux/efi.h>
#include <linux/export.h>
+#include <linux/filter.h>
#include <linux/ftrace.h>
+#include <linux/kprobes.h>
#include <linux/sched.h>
#include <linux/sched/debug.h>
#include <linux/sched/task_stack.h>
@@ -18,6 +20,55 @@
#include <asm/stack_pointer.h>
#include <asm/stacktrace.h>
+enum kunwind_source {
+ KUNWIND_SOURCE_UNKNOWN,
+ KUNWIND_SOURCE_FRAME,
+ KUNWIND_SOURCE_CALLER,
+ KUNWIND_SOURCE_TASK,
+ KUNWIND_SOURCE_REGS_PC,
+};
+
+union unwind_flags {
+ unsigned long all;
+ struct {
+ unsigned long fgraph : 1,
+ kretprobe : 1;
+ };
+};
+
+/*
+ * Kernel unwind state
+ *
+ * @common: Common unwind state.
+ * @task: The task being unwound.
+ * @graph_idx: Used by ftrace_graph_ret_addr() for optimized stack unwinding.
+ * @kr_cur: When KRETPROBES is selected, holds the kretprobe instance
+ * associated with the most recently encountered replacement lr
+ * value.
+ */
+struct kunwind_state {
+ struct unwind_state common;
+ struct task_struct *task;
+ int graph_idx;
+#ifdef CONFIG_KRETPROBES
+ struct llist_node *kr_cur;
+#endif
+ enum kunwind_source source;
+ union unwind_flags flags;
+ struct pt_regs *regs;
+};
+
+static __always_inline void
+kunwind_init(struct kunwind_state *state,
+ struct task_struct *task)
+{
+ unwind_init_common(&state->common);
+ state->task = task;
+ state->source = KUNWIND_SOURCE_UNKNOWN;
+ state->flags.all = 0;
+ state->regs = NULL;
+}
+
/*
* Start an unwind from a pt_regs.
*
@@ -26,13 +77,15 @@
* The regs must be on a stack currently owned by the calling task.
*/
static __always_inline void
-unwind_init_from_regs(struct unwind_state *state,
- struct pt_regs *regs)
+kunwind_init_from_regs(struct kunwind_state *state,
+ struct pt_regs *regs)
{
- unwind_init_common(state, current);
+ kunwind_init(state, current);
- state->fp = regs->regs[29];
- state->pc = regs->pc;
+ state->regs = regs;
+ state->common.fp = regs->regs[29];
+ state->common.pc = regs->pc;
+ state->source = KUNWIND_SOURCE_REGS_PC;
}
/*
@@ -44,12 +97,13 @@ unwind_init_from_regs(struct unwind_state *state,
* The function which invokes this must be noinline.
*/
static __always_inline void
-unwind_init_from_caller(struct unwind_state *state)
+kunwind_init_from_caller(struct kunwind_state *state)
{
- unwind_init_common(state, current);
+ kunwind_init(state, current);
- state->fp = (unsigned long)__builtin_frame_address(1);
- state->pc = (unsigned long)__builtin_return_address(0);
+ state->common.fp = (unsigned long)__builtin_frame_address(1);
+ state->common.pc = (unsigned long)__builtin_return_address(0);
+ state->source = KUNWIND_SOURCE_CALLER;
}
/*
@@ -63,41 +117,133 @@ unwind_init_from_caller(struct unwind_state *state)
* call this for the current task.
*/
static __always_inline void
-unwind_init_from_task(struct unwind_state *state,
- struct task_struct *task)
+kunwind_init_from_task(struct kunwind_state *state,
+ struct task_struct *task)
{
- unwind_init_common(state, task);
+ kunwind_init(state, task);
- state->fp = thread_saved_fp(task);
- state->pc = thread_saved_pc(task);
+ state->common.fp = thread_saved_fp(task);
+ state->common.pc = thread_saved_pc(task);
+ state->source = KUNWIND_SOURCE_TASK;
}
static __always_inline int
-unwind_recover_return_address(struct unwind_state *state)
+kunwind_recover_return_address(struct kunwind_state *state)
{
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
if (state->task->ret_stack &&
- (state->pc == (unsigned long)return_to_handler)) {
+ (state->common.pc == (unsigned long)return_to_handler)) {
unsigned long orig_pc;
- orig_pc = ftrace_graph_ret_addr(state->task, NULL, state->pc,
- (void *)state->fp);
- if (WARN_ON_ONCE(state->pc == orig_pc))
+ orig_pc = ftrace_graph_ret_addr(state->task, &state->graph_idx,
+ state->common.pc,
+ (void *)state->common.fp);
+ if (state->common.pc == orig_pc) {
+ WARN_ON_ONCE(state->task == current);
return -EINVAL;
- state->pc = orig_pc;
+ }
+ state->common.pc = orig_pc;
+ state->flags.fgraph = 1;
}
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
#ifdef CONFIG_KRETPROBES
- if (is_kretprobe_trampoline(state->pc)) {
- state->pc = kretprobe_find_ret_addr(state->task,
- (void *)state->fp,
- &state->kr_cur);
+ if (is_kretprobe_trampoline(state->common.pc)) {
+ unsigned long orig_pc;
+ orig_pc = kretprobe_find_ret_addr(state->task,
+ (void *)state->common.fp,
+ &state->kr_cur);
+ if (!orig_pc)
+ return -EINVAL;
+ state->common.pc = orig_pc;
+ state->flags.kretprobe = 1;
}
#endif /* CONFIG_KRETPROBES */
return 0;
}
+static __always_inline
+int kunwind_next_regs_pc(struct kunwind_state *state)
+{
+ struct stack_info *info;
+ unsigned long fp = state->common.fp;
+ struct pt_regs *regs;
+
+ regs = container_of((u64 *)fp, struct pt_regs, stackframe.record.fp);
+
+ info = unwind_find_stack(&state->common, (unsigned long)regs, sizeof(*regs));
+ if (!info)
+ return -EINVAL;
+
+ unwind_consume_stack(&state->common, info, (unsigned long)regs,
+ sizeof(*regs));
+
+ state->regs = regs;
+ state->common.pc = regs->pc;
+ state->common.fp = regs->regs[29];
+ state->regs = NULL;
+ state->source = KUNWIND_SOURCE_REGS_PC;
+ return 0;
+}
+
+static __always_inline int
+kunwind_next_frame_record_meta(struct kunwind_state *state)
+{
+ struct task_struct *tsk = state->task;
+ unsigned long fp = state->common.fp;
+ struct frame_record_meta *meta;
+ struct stack_info *info;
+
+ info = unwind_find_stack(&state->common, fp, sizeof(*meta));
+ if (!info)
+ return -EINVAL;
+
+ meta = (struct frame_record_meta *)fp;
+ switch (READ_ONCE(meta->type)) {
+ case FRAME_META_TYPE_FINAL:
+ if (meta == &task_pt_regs(tsk)->stackframe)
+ return -ENOENT;
+ WARN_ON_ONCE(tsk == current);
+ return -EINVAL;
+ case FRAME_META_TYPE_PT_REGS:
+ return kunwind_next_regs_pc(state);
+ default:
+ WARN_ON_ONCE(tsk == current);
+ return -EINVAL;
+ }
+}
+
+static __always_inline int
+kunwind_next_frame_record(struct kunwind_state *state)
+{
+ unsigned long fp = state->common.fp;
+ struct frame_record *record;
+ struct stack_info *info;
+ unsigned long new_fp, new_pc;
+
+ if (fp & 0x7)
+ return -EINVAL;
+
+ info = unwind_find_stack(&state->common, fp, sizeof(*record));
+ if (!info)
+ return -EINVAL;
+
+ record = (struct frame_record *)fp;
+ new_fp = READ_ONCE(record->fp);
+ new_pc = READ_ONCE(record->lr);
+
+ if (!new_fp && !new_pc)
+ return kunwind_next_frame_record_meta(state);
+
+ unwind_consume_stack(&state->common, info, fp, sizeof(*record));
+
+ state->common.fp = new_fp;
+ state->common.pc = new_pc;
+ state->source = KUNWIND_SOURCE_FRAME;
+
+ return 0;
+}
+
/*
* Unwind from one frame record (A) to the next frame record (B).
*
@@ -106,40 +252,51 @@ unwind_recover_return_address(struct unwind_state *state)
* and the location (but not the fp value) of B.
*/
static __always_inline int
-unwind_next(struct unwind_state *state)
+kunwind_next(struct kunwind_state *state)
{
- struct task_struct *tsk = state->task;
- unsigned long fp = state->fp;
int err;
- /* Final frame; nothing to unwind */
- if (fp == (unsigned long)task_pt_regs(tsk)->stackframe)
- return -ENOENT;
+ state->flags.all = 0;
+
+ switch (state->source) {
+ case KUNWIND_SOURCE_FRAME:
+ case KUNWIND_SOURCE_CALLER:
+ case KUNWIND_SOURCE_TASK:
+ case KUNWIND_SOURCE_REGS_PC:
+ err = kunwind_next_frame_record(state);
+ break;
+ default:
+ err = -EINVAL;
+ }
- err = unwind_next_frame_record(state);
if (err)
return err;
- state->pc = ptrauth_strip_kernel_insn_pac(state->pc);
+ state->common.pc = ptrauth_strip_kernel_insn_pac(state->common.pc);
- return unwind_recover_return_address(state);
+ return kunwind_recover_return_address(state);
}
-static __always_inline void
-unwind(struct unwind_state *state, stack_trace_consume_fn consume_entry,
- void *cookie)
+typedef bool (*kunwind_consume_fn)(const struct kunwind_state *state, void *cookie);
+
+static __always_inline int
+do_kunwind(struct kunwind_state *state, kunwind_consume_fn consume_state,
+ void *cookie)
{
- if (unwind_recover_return_address(state))
- return;
+ int ret;
- while (1) {
- int ret;
+ ret = kunwind_recover_return_address(state);
+ if (ret)
+ return ret;
- if (!consume_entry(cookie, state->pc))
- break;
- ret = unwind_next(state);
+ while (1) {
+ if (!consume_state(state, cookie))
+ return -EINVAL;
+ ret = kunwind_next(state);
+ if (ret == -ENOENT)
+ return 0;
if (ret < 0)
- break;
+ return ret;
}
}
@@ -172,17 +329,16 @@ unwind(struct unwind_state *state, stack_trace_consume_fn consume_entry,
: stackinfo_get_unknown(); \
})
-noinline noinstr void arch_stack_walk(stack_trace_consume_fn consume_entry,
- void *cookie, struct task_struct *task,
- struct pt_regs *regs)
+static __always_inline int
+kunwind_stack_walk(kunwind_consume_fn consume_state,
+ void *cookie, struct task_struct *task,
+ struct pt_regs *regs)
{
struct stack_info stacks[] = {
stackinfo_get_task(task),
STACKINFO_CPU(irq),
-#if defined(CONFIG_VMAP_STACK)
STACKINFO_CPU(overflow),
-#endif
-#if defined(CONFIG_VMAP_STACK) && defined(CONFIG_ARM_SDE_INTERFACE)
+#if defined(CONFIG_ARM_SDE_INTERFACE)
STACKINFO_SDEI(normal),
STACKINFO_SDEI(critical),
#endif
@@ -190,28 +346,131 @@ noinline noinstr void arch_stack_walk(stack_trace_consume_fn consume_entry,
STACKINFO_EFI,
#endif
};
- struct unwind_state state = {
- .stacks = stacks,
- .nr_stacks = ARRAY_SIZE(stacks),
+ struct kunwind_state state = {
+ .common = {
+ .stacks = stacks,
+ .nr_stacks = ARRAY_SIZE(stacks),
+ },
};
if (regs) {
if (task != current)
- return;
- unwind_init_from_regs(&state, regs);
+ return -EINVAL;
+ kunwind_init_from_regs(&state, regs);
} else if (task == current) {
- unwind_init_from_caller(&state);
+ kunwind_init_from_caller(&state);
} else {
- unwind_init_from_task(&state, task);
+ kunwind_init_from_task(&state, task);
}
- unwind(&state, consume_entry, cookie);
+ return do_kunwind(&state, consume_state, cookie);
+}
+
+struct kunwind_consume_entry_data {
+ stack_trace_consume_fn consume_entry;
+ void *cookie;
+};
+
+static __always_inline bool
+arch_kunwind_consume_entry(const struct kunwind_state *state, void *cookie)
+{
+ struct kunwind_consume_entry_data *data = cookie;
+ return data->consume_entry(data->cookie, state->common.pc);
}
-static bool dump_backtrace_entry(void *arg, unsigned long where)
+noinline noinstr void arch_stack_walk(stack_trace_consume_fn consume_entry,
+ void *cookie, struct task_struct *task,
+ struct pt_regs *regs)
{
+ struct kunwind_consume_entry_data data = {
+ .consume_entry = consume_entry,
+ .cookie = cookie,
+ };
+
+ kunwind_stack_walk(arch_kunwind_consume_entry, &data, task, regs);
+}
+
+static __always_inline bool
+arch_reliable_kunwind_consume_entry(const struct kunwind_state *state, void *cookie)
+{
+ /*
+ * At an exception boundary we can reliably consume the saved PC. We do
+ * not know whether the LR was live when the exception was taken, and
+ * so we cannot perform the next unwind step reliably.
+ *
+ * All that matters is whether the *entire* unwind is reliable, so give
+ * up as soon as we hit an exception boundary.
+ */
+ if (state->source == KUNWIND_SOURCE_REGS_PC)
+ return false;
+
+ return arch_kunwind_consume_entry(state, cookie);
+}
+
+noinline noinstr int arch_stack_walk_reliable(stack_trace_consume_fn consume_entry,
+ void *cookie,
+ struct task_struct *task)
+{
+ struct kunwind_consume_entry_data data = {
+ .consume_entry = consume_entry,
+ .cookie = cookie,
+ };
+
+ return kunwind_stack_walk(arch_reliable_kunwind_consume_entry, &data,
+ task, NULL);
+}
+
+struct bpf_unwind_consume_entry_data {
+ bool (*consume_entry)(void *cookie, u64 ip, u64 sp, u64 fp);
+ void *cookie;
+};
+
+static bool
+arch_bpf_unwind_consume_entry(const struct kunwind_state *state, void *cookie)
+{
+ struct bpf_unwind_consume_entry_data *data = cookie;
+
+ return data->consume_entry(data->cookie, state->common.pc, 0,
+ state->common.fp);
+}
+
+noinline noinstr void arch_bpf_stack_walk(bool (*consume_entry)(void *cookie, u64 ip, u64 sp,
+ u64 fp), void *cookie)
+{
+ struct bpf_unwind_consume_entry_data data = {
+ .consume_entry = consume_entry,
+ .cookie = cookie,
+ };
+
+ kunwind_stack_walk(arch_bpf_unwind_consume_entry, &data, current, NULL);
+}
+
+static const char *state_source_string(const struct kunwind_state *state)
+{
+ switch (state->source) {
+ case KUNWIND_SOURCE_FRAME: return NULL;
+ case KUNWIND_SOURCE_CALLER: return "C";
+ case KUNWIND_SOURCE_TASK: return "T";
+ case KUNWIND_SOURCE_REGS_PC: return "P";
+ default: return "U";
+ }
+}
+
+static bool dump_backtrace_entry(const struct kunwind_state *state, void *arg)
+{
+ const char *source = state_source_string(state);
+ union unwind_flags flags = state->flags;
+ bool has_info = source || flags.all;
char *loglvl = arg;
- printk("%s %pSb\n", loglvl, (void *)where);
+
+ printk("%s %pSb%s%s%s%s%s\n", loglvl,
+ (void *)state->common.pc,
+ has_info ? " (" : "",
+ source ? source : "",
+ flags.fgraph ? "F" : "",
+ flags.kretprobe ? "K" : "",
+ has_info ? ")" : "");
+
return true;
}
@@ -230,7 +489,7 @@ void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk,
return;
printk("%sCall trace:\n", loglvl);
- arch_stack_walk(dump_backtrace_entry, (void *)loglvl, tsk, regs);
+ kunwind_stack_walk(dump_backtrace_entry, (void *)loglvl, tsk, regs);
put_task_stack(tsk);
}
@@ -240,3 +499,123 @@ void show_stack(struct task_struct *tsk, unsigned long *sp, const char *loglvl)
dump_backtrace(NULL, tsk, loglvl);
barrier();
}
+
+/*
+ * The struct defined for userspace stack frame in AARCH64 mode.
+ */
+struct frame_tail {
+ struct frame_tail __user *fp;
+ unsigned long lr;
+} __attribute__((packed));
+
+/*
+ * Get the return address for a single stackframe and return a pointer to the
+ * next frame tail.
+ */
+static struct frame_tail __user *
+unwind_user_frame(struct frame_tail __user *tail, void *cookie,
+ stack_trace_consume_fn consume_entry)
+{
+ struct frame_tail buftail;
+ unsigned long err;
+ unsigned long lr;
+
+ /* Also check accessibility of one struct frame_tail beyond */
+ if (!access_ok(tail, sizeof(buftail)))
+ return NULL;
+
+ pagefault_disable();
+ err = __copy_from_user_inatomic(&buftail, tail, sizeof(buftail));
+ pagefault_enable();
+
+ if (err)
+ return NULL;
+
+ lr = ptrauth_strip_user_insn_pac(buftail.lr);
+
+ if (!consume_entry(cookie, lr))
+ return NULL;
+
+ /*
+ * Frame pointers should strictly progress back up the stack
+ * (towards higher addresses).
+ */
+ if (tail >= buftail.fp)
+ return NULL;
+
+ return buftail.fp;
+}
+
+#ifdef CONFIG_COMPAT
+/*
+ * The registers we're interested in are at the end of the variable
+ * length saved register structure. The fp points at the end of this
+ * structure so the address of this struct is:
+ * (struct compat_frame_tail *)(xxx->fp)-1
+ *
+ * This code has been adapted from the ARM OProfile support.
+ */
+struct compat_frame_tail {
+ compat_uptr_t fp; /* a (struct compat_frame_tail *) in compat mode */
+ u32 sp;
+ u32 lr;
+} __attribute__((packed));
+
+static struct compat_frame_tail __user *
+unwind_compat_user_frame(struct compat_frame_tail __user *tail, void *cookie,
+ stack_trace_consume_fn consume_entry)
+{
+ struct compat_frame_tail buftail;
+ unsigned long err;
+
+ /* Also check accessibility of one struct frame_tail beyond */
+ if (!access_ok(tail, sizeof(buftail)))
+ return NULL;
+
+ pagefault_disable();
+ err = __copy_from_user_inatomic(&buftail, tail, sizeof(buftail));
+ pagefault_enable();
+
+ if (err)
+ return NULL;
+
+ if (!consume_entry(cookie, buftail.lr))
+ return NULL;
+
+ /*
+ * Frame pointers should strictly progress back up the stack
+ * (towards higher addresses).
+ */
+ if (tail + 1 >= (struct compat_frame_tail __user *)
+ compat_ptr(buftail.fp))
+ return NULL;
+
+ return (struct compat_frame_tail __user *)compat_ptr(buftail.fp) - 1;
+}
+#endif /* CONFIG_COMPAT */
+
+
+void arch_stack_walk_user(stack_trace_consume_fn consume_entry, void *cookie,
+ const struct pt_regs *regs)
+{
+ if (!consume_entry(cookie, regs->pc))
+ return;
+
+ if (!compat_user_mode(regs)) {
+ /* AARCH64 mode */
+ struct frame_tail __user *tail;
+
+ tail = (struct frame_tail __user *)regs->regs[29];
+ while (tail && !((unsigned long)tail & 0x7))
+ tail = unwind_user_frame(tail, cookie, consume_entry);
+ } else {
+#ifdef CONFIG_COMPAT
+ /* AARCH32 compat mode */
+ struct compat_frame_tail __user *tail;
+
+ tail = (struct compat_frame_tail __user *)regs->compat_fp - 1;
+ while (tail && !((unsigned long)tail & 0x3))
+ tail = unwind_compat_user_frame(tail, cookie, consume_entry);
+#endif
+ }
+}
diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c
index 0fbdf5fe64d8..eaaff94329cd 100644
--- a/arch/arm64/kernel/suspend.c
+++ b/arch/arm64/kernel/suspend.c
@@ -12,6 +12,7 @@
#include <asm/daifflags.h>
#include <asm/debug-monitors.h>
#include <asm/exec.h>
+#include <asm/fpsimd.h>
#include <asm/mte.h>
#include <asm/memory.h>
#include <asm/mmu_context.h>
@@ -55,13 +56,13 @@ void notrace __cpu_suspend_exit(void)
/* Restore CnP bit in TTBR1_EL1 */
if (system_supports_cnp())
- cpu_replace_ttbr1(lm_alias(swapper_pg_dir), idmap_pg_dir);
+ cpu_enable_swapper_cnp();
/*
* PSTATE was not saved over suspend/resume, re-enable any detected
* features that might not have been set correctly.
*/
- if (cpus_have_const_cap(ARM64_HAS_DIT))
+ if (alternative_has_cap_unlikely(ARM64_HAS_DIT))
set_pstate_dit(1);
__uaccess_enable_hw_pan();
@@ -80,6 +81,8 @@ void notrace __cpu_suspend_exit(void)
*/
spectre_v4_enable_mitigation(NULL);
+ sme_suspend_exit();
+
/* Restore additional feature-specific configuration */
ptrauth_suspend_exit();
}
@@ -98,6 +101,15 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
struct sleep_stack_data state;
struct arm_cpuidle_irq_context context;
+ /*
+ * Some portions of CPU state (e.g. PSTATE.{PAN,DIT}) are initialized
+ * before alternatives are patched, but are only restored by
+ * __cpu_suspend_exit() after alternatives are patched. To avoid
+ * accidentally losing these bits we must not attempt to suspend until
+ * after alternatives have been patched.
+ */
+ WARN_ON(!system_capabilities_finalized());
+
/* Report any MTE async fault before going to suspend */
mte_suspend_enter();
diff --git a/arch/arm64/kernel/sys.c b/arch/arm64/kernel/sys.c
index d5ffaaab31a7..f08408b6e826 100644
--- a/arch/arm64/kernel/sys.c
+++ b/arch/arm64/kernel/sys.c
@@ -48,14 +48,16 @@ asmlinkage long __arm64_sys_ni_syscall(const struct pt_regs *__unused)
*/
#define __arm64_sys_personality __arm64_sys_arm64_personality
+#define __SYSCALL_WITH_COMPAT(nr, native, compat) __SYSCALL(nr, native)
+
#undef __SYSCALL
#define __SYSCALL(nr, sym) asmlinkage long __arm64_##sym(const struct pt_regs *);
-#include <asm/unistd.h>
+#include <asm/syscall_table_64.h>
#undef __SYSCALL
#define __SYSCALL(nr, sym) [nr] = __arm64_##sym,
const syscall_fn_t sys_call_table[__NR_syscalls] = {
[0 ... __NR_syscalls - 1] = __arm64_sys_ni_syscall,
-#include <asm/unistd.h>
+#include <asm/syscall_table_64.h>
};
diff --git a/arch/arm64/kernel/sys32.c b/arch/arm64/kernel/sys32.c
index fc40386afb1b..96bcfb907443 100644
--- a/arch/arm64/kernel/sys32.c
+++ b/arch/arm64/kernel/sys32.c
@@ -5,17 +5,12 @@
* Copyright (C) 2015 ARM Ltd.
*/
-/*
- * Needed to avoid conflicting __NR_* macros between uapi/asm/unistd.h and
- * asm/unistd32.h.
- */
-#define __COMPAT_SYSCALL_NR
-
#include <linux/compat.h>
#include <linux/compiler.h>
#include <linux/syscalls.h>
#include <asm/syscall.h>
+#include <asm/unistd_compat_32.h>
asmlinkage long compat_sys_sigreturn(void);
asmlinkage long compat_sys_rt_sigreturn(void);
@@ -122,14 +117,16 @@ COMPAT_SYSCALL_DEFINE6(aarch32_fallocate, int, fd, int, mode,
return ksys_fallocate(fd, mode, arg_u64(offset), arg_u64(len));
}
+#define __SYSCALL_WITH_COMPAT(nr, sym, compat) __SYSCALL(nr, compat)
+
#undef __SYSCALL
#define __SYSCALL(nr, sym) asmlinkage long __arm64_##sym(const struct pt_regs *);
-#include <asm/unistd32.h>
+#include <asm/syscall_table_32.h>
#undef __SYSCALL
#define __SYSCALL(nr, sym) [nr] = __arm64_##sym,
-const syscall_fn_t compat_sys_call_table[__NR_compat_syscalls] = {
- [0 ... __NR_compat_syscalls - 1] = __arm64_sys_ni_syscall,
-#include <asm/unistd32.h>
+const syscall_fn_t compat_sys_call_table[__NR_compat32_syscalls] = {
+ [0 ... __NR_compat32_syscalls - 1] = __arm64_sys_ni_syscall,
+#include <asm/syscall_table_32.h>
};
diff --git a/arch/arm64/kernel/sys_compat.c b/arch/arm64/kernel/sys_compat.c
index df14336c3a29..4a609e9b65de 100644
--- a/arch/arm64/kernel/sys_compat.c
+++ b/arch/arm64/kernel/sys_compat.c
@@ -31,7 +31,7 @@ __do_compat_cache_op(unsigned long start, unsigned long end)
if (fatal_signal_pending(current))
return 0;
- if (cpus_have_const_cap(ARM64_WORKAROUND_1542419)) {
+ if (cpus_have_final_cap(ARM64_WORKAROUND_1542419)) {
/*
* The workaround requires an inner-shareable tlbi.
* We pick the reserved-ASID to minimise the impact.
diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c
index 9a70d9746b66..c062badd1a56 100644
--- a/arch/arm64/kernel/syscall.c
+++ b/arch/arm64/kernel/syscall.c
@@ -14,20 +14,18 @@
#include <asm/syscall.h>
#include <asm/thread_info.h>
#include <asm/unistd.h>
+#include <asm/unistd_compat_32.h>
long compat_arm_syscall(struct pt_regs *regs, int scno);
long sys_ni_syscall(void);
static long do_ni_syscall(struct pt_regs *regs, int scno)
{
-#ifdef CONFIG_COMPAT
- long ret;
if (is_compat_task()) {
- ret = compat_arm_syscall(regs, scno);
+ long ret = compat_arm_syscall(regs, scno);
if (ret != -ENOSYS)
return ret;
}
-#endif
return sys_ni_syscall();
}
@@ -45,7 +43,7 @@ static void invoke_syscall(struct pt_regs *regs, unsigned int scno,
add_random_kstack_offset();
- if (scno < sc_nr) {
+ if (likely(scno < sc_nr)) {
syscall_fn_t syscall_fn;
syscall_fn = syscall_table[array_index_nospec(scno, sc_nr)];
ret = __invoke_syscall(regs, syscall_fn);
@@ -56,17 +54,15 @@ static void invoke_syscall(struct pt_regs *regs, unsigned int scno,
syscall_set_return_value(current, regs, 0, ret);
/*
- * Ultimately, this value will get limited by KSTACK_OFFSET_MAX(),
- * but not enough for arm64 stack utilization comfort. To keep
- * reasonable stack head room, reduce the maximum offset to 9 bits.
- *
- * The actual entropy will be further reduced by the compiler when
- * applying stack alignment constraints: the AAPCS mandates a
- * 16-byte (i.e. 4-bit) aligned SP at function boundaries.
+ * This value will get limited by KSTACK_OFFSET_MAX(), which is 10
+ * bits. The actual entropy will be further reduced by the compiler
+ * when applying stack alignment constraints: the AAPCS mandates a
+ * 16-byte aligned SP at function boundaries, which will remove the
+ * 4 low bits from any entropy chosen here.
*
- * The resulting 5 bits of entropy is seen in SP[8:4].
+ * The resulting 6 bits of entropy is seen in SP[9:4].
*/
- choose_random_kstack_offset(get_random_u16() & 0x1FF);
+ choose_random_kstack_offset(get_random_u16());
}
static inline bool has_syscall_work(unsigned long flags)
@@ -100,7 +96,7 @@ static void el0_svc_common(struct pt_regs *regs, int scno, int sc_nr,
* (Similarly for HVC and SMC elsewhere.)
*/
- if (flags & _TIF_MTE_ASYNC_FAULT) {
+ if (unlikely(flags & _TIF_MTE_ASYNC_FAULT)) {
/*
* Process the asynchronous tag check fault before the actual
* syscall. do_notify_resume() will send a signal to userspace
@@ -158,7 +154,7 @@ void do_el0_svc(struct pt_regs *regs)
#ifdef CONFIG_COMPAT
void do_el0_svc_compat(struct pt_regs *regs)
{
- el0_svc_common(regs, regs->regs[7], __NR_compat_syscalls,
+ el0_svc_common(regs, regs->regs[7], __NR_compat32_syscalls,
compat_sys_call_table);
}
#endif
diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c
index 817d788cd866..5d24dc53799b 100644
--- a/arch/arm64/kernel/topology.c
+++ b/arch/arm64/kernel/topology.c
@@ -15,62 +15,16 @@
#include <linux/arch_topology.h>
#include <linux/cacheinfo.h>
#include <linux/cpufreq.h>
+#include <linux/cpu_smt.h>
#include <linux/init.h>
#include <linux/percpu.h>
+#include <linux/sched/isolation.h>
+#include <linux/xarray.h>
#include <asm/cpu.h>
#include <asm/cputype.h>
#include <asm/topology.h>
-#ifdef CONFIG_ACPI
-static bool __init acpi_cpu_is_threaded(int cpu)
-{
- int is_threaded = acpi_pptt_cpu_is_thread(cpu);
-
- /*
- * if the PPTT doesn't have thread information, assume a homogeneous
- * machine and return the current CPU's thread state.
- */
- if (is_threaded < 0)
- is_threaded = read_cpuid_mpidr() & MPIDR_MT_BITMASK;
-
- return !!is_threaded;
-}
-
-/*
- * Propagate the topology information of the processor_topology_node tree to the
- * cpu_topology array.
- */
-int __init parse_acpi_topology(void)
-{
- int cpu, topology_id;
-
- if (acpi_disabled)
- return 0;
-
- for_each_possible_cpu(cpu) {
- topology_id = find_acpi_cpu_topology(cpu, 0);
- if (topology_id < 0)
- return topology_id;
-
- if (acpi_cpu_is_threaded(cpu)) {
- cpu_topology[cpu].thread_id = topology_id;
- topology_id = find_acpi_cpu_topology(cpu, 1);
- cpu_topology[cpu].core_id = topology_id;
- } else {
- cpu_topology[cpu].thread_id = -1;
- cpu_topology[cpu].core_id = topology_id;
- }
- topology_id = find_acpi_cpu_topology_cluster(cpu);
- cpu_topology[cpu].cluster_id = topology_id;
- topology_id = find_acpi_cpu_topology_package(cpu);
- cpu_topology[cpu].package_id = topology_id;
- }
-
- return 0;
-}
-#endif
-
#ifdef CONFIG_ARM64_AMU_EXTN
#define read_corecnt() read_sysreg_s(SYS_AMEVCNTR0_CORE_EL0)
#define read_constcnt() read_sysreg_s(SYS_AMEVCNTR0_CONST_EL0)
@@ -82,19 +36,34 @@ int __init parse_acpi_topology(void)
#undef pr_fmt
#define pr_fmt(fmt) "AMU: " fmt
-static DEFINE_PER_CPU_READ_MOSTLY(unsigned long, arch_max_freq_scale);
-static DEFINE_PER_CPU(u64, arch_const_cycles_prev);
-static DEFINE_PER_CPU(u64, arch_core_cycles_prev);
+/*
+ * Ensure that amu_scale_freq_tick() will return SCHED_CAPACITY_SCALE until
+ * the CPU capacity and its associated frequency have been correctly
+ * initialized.
+ */
+static DEFINE_PER_CPU_READ_MOSTLY(unsigned long, arch_max_freq_scale) = 1UL << (2 * SCHED_CAPACITY_SHIFT);
static cpumask_var_t amu_fie_cpus;
+struct amu_cntr_sample {
+ u64 arch_const_cycles_prev;
+ u64 arch_core_cycles_prev;
+ unsigned long last_scale_update;
+};
+
+static DEFINE_PER_CPU_SHARED_ALIGNED(struct amu_cntr_sample, cpu_amu_samples);
+
void update_freq_counters_refs(void)
{
- this_cpu_write(arch_core_cycles_prev, read_corecnt());
- this_cpu_write(arch_const_cycles_prev, read_constcnt());
+ struct amu_cntr_sample *amu_sample = this_cpu_ptr(&cpu_amu_samples);
+
+ amu_sample->arch_core_cycles_prev = read_corecnt();
+ amu_sample->arch_const_cycles_prev = read_constcnt();
}
static inline bool freq_counters_valid(int cpu)
{
+ struct amu_cntr_sample *amu_sample = per_cpu_ptr(&cpu_amu_samples, cpu);
+
if ((cpu >= nr_cpu_ids) || !cpumask_test_cpu(cpu, cpu_present_mask))
return false;
@@ -103,8 +72,8 @@ static inline bool freq_counters_valid(int cpu)
return false;
}
- if (unlikely(!per_cpu(arch_const_cycles_prev, cpu) ||
- !per_cpu(arch_core_cycles_prev, cpu))) {
+ if (unlikely(!amu_sample->arch_const_cycles_prev ||
+ !amu_sample->arch_core_cycles_prev)) {
pr_debug("CPU%d: cycle counters are not enabled.\n", cpu);
return false;
}
@@ -112,14 +81,14 @@ static inline bool freq_counters_valid(int cpu)
return true;
}
-static int freq_inv_set_max_ratio(int cpu, u64 max_rate, u64 ref_rate)
+void freq_inv_set_max_ratio(int cpu, u64 max_rate)
{
- u64 ratio;
+ u64 ratio, ref_rate = arch_timer_get_rate();
if (unlikely(!max_rate || !ref_rate)) {
- pr_debug("CPU%d: invalid maximum or reference frequency.\n",
+ WARN_ONCE(1, "CPU%d: invalid maximum or reference frequency.\n",
cpu);
- return -EINVAL;
+ return;
}
/*
@@ -139,27 +108,30 @@ static int freq_inv_set_max_ratio(int cpu, u64 max_rate, u64 ref_rate)
ratio = div64_u64(ratio, max_rate);
if (!ratio) {
WARN_ONCE(1, "Reference frequency too low.\n");
- return -EINVAL;
+ return;
}
- per_cpu(arch_max_freq_scale, cpu) = (unsigned long)ratio;
-
- return 0;
+ WRITE_ONCE(per_cpu(arch_max_freq_scale, cpu), (unsigned long)ratio);
}
static void amu_scale_freq_tick(void)
{
+ struct amu_cntr_sample *amu_sample = this_cpu_ptr(&cpu_amu_samples);
u64 prev_core_cnt, prev_const_cnt;
u64 core_cnt, const_cnt, scale;
- prev_const_cnt = this_cpu_read(arch_const_cycles_prev);
- prev_core_cnt = this_cpu_read(arch_core_cycles_prev);
+ prev_const_cnt = amu_sample->arch_const_cycles_prev;
+ prev_core_cnt = amu_sample->arch_core_cycles_prev;
update_freq_counters_refs();
- const_cnt = this_cpu_read(arch_const_cycles_prev);
- core_cnt = this_cpu_read(arch_core_cycles_prev);
+ const_cnt = amu_sample->arch_const_cycles_prev;
+ core_cnt = amu_sample->arch_core_cycles_prev;
+ /*
+ * This should not happen unless the AMUs have been reset and the
+ * counter values have not been restored - unlikely
+ */
if (unlikely(core_cnt <= prev_core_cnt ||
const_cnt <= prev_const_cnt))
return;
@@ -179,6 +151,8 @@ static void amu_scale_freq_tick(void)
scale = min_t(unsigned long, scale, SCHED_CAPACITY_SCALE);
this_cpu_write(arch_freq_scale, (unsigned long)scale);
+
+ amu_sample->last_scale_update = jiffies;
}
static struct scale_freq_data amu_sfd = {
@@ -186,20 +160,114 @@ static struct scale_freq_data amu_sfd = {
.set_freq_scale = amu_scale_freq_tick,
};
+static __always_inline bool amu_fie_cpu_supported(unsigned int cpu)
+{
+ return cpumask_available(amu_fie_cpus) &&
+ cpumask_test_cpu(cpu, amu_fie_cpus);
+}
+
+void arch_cpu_idle_enter(void)
+{
+ unsigned int cpu = smp_processor_id();
+
+ if (!amu_fie_cpu_supported(cpu))
+ return;
+
+ /* Kick in AMU update but only if one has not happened already */
+ if (housekeeping_cpu(cpu, HK_TYPE_TICK) &&
+ time_is_before_jiffies(per_cpu(cpu_amu_samples.last_scale_update, cpu)))
+ amu_scale_freq_tick();
+}
+
+#define AMU_SAMPLE_EXP_MS 20
+
+int arch_freq_get_on_cpu(int cpu)
+{
+ struct amu_cntr_sample *amu_sample;
+ unsigned int start_cpu = cpu;
+ unsigned long last_update;
+ unsigned int freq = 0;
+ u64 scale;
+
+ if (!amu_fie_cpu_supported(cpu) || !arch_scale_freq_ref(cpu))
+ return -EOPNOTSUPP;
+
+ while (1) {
+
+ amu_sample = per_cpu_ptr(&cpu_amu_samples, cpu);
+
+ last_update = amu_sample->last_scale_update;
+
+ /*
+ * For those CPUs that are in full dynticks mode, or those that have
+ * not seen tick for a while, try an alternative source for the counters
+ * (and thus freq scale), if available, for given policy: this boils
+ * down to identifying an active cpu within the same freq domain, if any.
+ */
+ if (!housekeeping_cpu(cpu, HK_TYPE_TICK) ||
+ time_is_before_jiffies(last_update + msecs_to_jiffies(AMU_SAMPLE_EXP_MS))) {
+ struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
+ int ref_cpu;
+
+ if (!policy)
+ return -EINVAL;
+
+ if (!cpumask_intersects(policy->related_cpus,
+ housekeeping_cpumask(HK_TYPE_TICK))) {
+ cpufreq_cpu_put(policy);
+ return -EOPNOTSUPP;
+ }
+
+ for_each_cpu_wrap(ref_cpu, policy->cpus, cpu + 1) {
+ if (ref_cpu == start_cpu) {
+ /* Prevent verifying same CPU twice */
+ ref_cpu = nr_cpu_ids;
+ break;
+ }
+ if (!idle_cpu(ref_cpu))
+ break;
+ }
+
+ cpufreq_cpu_put(policy);
+
+ if (ref_cpu >= nr_cpu_ids)
+ /* No alternative to pull info from */
+ return -EAGAIN;
+
+ cpu = ref_cpu;
+ } else {
+ break;
+ }
+ }
+ /*
+ * Reversed computation to the one used to determine
+ * the arch_freq_scale value
+ * (see amu_scale_freq_tick for details)
+ */
+ scale = arch_scale_freq_capacity(cpu);
+ freq = scale * arch_scale_freq_ref(cpu);
+ freq >>= SCHED_CAPACITY_SHIFT;
+ return freq;
+}
+
static void amu_fie_setup(const struct cpumask *cpus)
{
int cpu;
/* We are already set since the last insmod of cpufreq driver */
- if (unlikely(cpumask_subset(cpus, amu_fie_cpus)))
+ if (cpumask_available(amu_fie_cpus) &&
+ unlikely(cpumask_subset(cpus, amu_fie_cpus)))
return;
- for_each_cpu(cpu, cpus) {
- if (!freq_counters_valid(cpu) ||
- freq_inv_set_max_ratio(cpu,
- cpufreq_get_hw_max_freq(cpu) * 1000ULL,
- arch_timer_get_rate()))
+ for_each_cpu(cpu, cpus)
+ if (!freq_counters_valid(cpu))
return;
+
+ if (!cpumask_available(amu_fie_cpus) &&
+ !zalloc_cpumask_var(&amu_fie_cpus, GFP_KERNEL)) {
+ WARN_ONCE(1, "Failed to allocate FIE cpumask for CPUs[%*pbl]\n",
+ cpumask_pr_args(cpus));
+ return;
}
cpumask_or(amu_fie_cpus, amu_fie_cpus, cpus);
@@ -237,17 +305,8 @@ static struct notifier_block init_amu_fie_notifier = {
static int __init init_amu_fie(void)
{
- int ret;
-
- if (!zalloc_cpumask_var(&amu_fie_cpus, GFP_KERNEL))
- return -ENOMEM;
-
- ret = cpufreq_register_notifier(&init_amu_fie_notifier,
+ return cpufreq_register_notifier(&init_amu_fie_notifier,
CPUFREQ_POLICY_NOTIFIER);
- if (ret)
- free_cpumask_var(amu_fie_cpus);
-
- return ret;
}
core_initcall(init_amu_fie);
diff --git a/arch/arm64/kernel/trace-events-emulation.h b/arch/arm64/kernel/trace-events-emulation.h
index 6c40f58b844a..c51b547b583e 100644
--- a/arch/arm64/kernel/trace-events-emulation.h
+++ b/arch/arm64/kernel/trace-events-emulation.h
@@ -18,7 +18,7 @@ TRACE_EVENT(instruction_emulation,
),
TP_fast_assign(
- __assign_str(instr, instr);
+ __assign_str(instr);
__entry->addr = addr;
),
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index 8b70759cdbb9..914282016069 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -41,7 +41,7 @@
#include <asm/extable.h>
#include <asm/insn.h>
#include <asm/kprobes.h>
-#include <asm/patching.h>
+#include <asm/text-patching.h>
#include <asm/traps.h>
#include <asm/smp.h>
#include <asm/stack_pointer.h>
@@ -149,19 +149,18 @@ pstate_check_t * const aarch32_opcode_cond_checks[16] = {
int show_unhandled_signals = 0;
-static void dump_kernel_instr(const char *lvl, struct pt_regs *regs)
+void dump_kernel_instr(unsigned long kaddr)
{
- unsigned long addr = instruction_pointer(regs);
char str[sizeof("00000000 ") * 5 + 2 + 1], *p = str;
int i;
- if (user_mode(regs))
+ if (!is_ttbr1_addr(kaddr))
return;
for (i = -4; i < 1; i++) {
unsigned int val, bad;
- bad = aarch64_insn_read(&((u32 *)addr)[i], &val);
+ bad = aarch64_insn_read(&((u32 *)kaddr)[i], &val);
if (!bad)
p += sprintf(p, i == 0 ? "(%08x) " : "%08x ", val);
@@ -169,25 +168,18 @@ static void dump_kernel_instr(const char *lvl, struct pt_regs *regs)
p += sprintf(p, i == 0 ? "(????????) " : "???????? ");
}
- printk("%sCode: %s\n", lvl, str);
+ printk(KERN_EMERG "Code: %s\n", str);
}
-#ifdef CONFIG_PREEMPT
-#define S_PREEMPT " PREEMPT"
-#elif defined(CONFIG_PREEMPT_RT)
-#define S_PREEMPT " PREEMPT_RT"
-#else
-#define S_PREEMPT ""
-#endif
-
#define S_SMP " SMP"
static int __die(const char *str, long err, struct pt_regs *regs)
{
static int die_counter;
int ret;
+ unsigned long addr = instruction_pointer(regs);
- pr_emerg("Internal error: %s: %016lx [#%d]" S_PREEMPT S_SMP "\n",
+ pr_emerg("Internal error: %s: %016lx [#%d] " S_SMP "\n",
str, err, ++die_counter);
/* trap and error numbers are mostly meaningless on ARM */
@@ -198,7 +190,10 @@ static int __die(const char *str, long err, struct pt_regs *regs)
print_modules();
show_regs(regs);
- dump_kernel_instr(KERN_EMERG, regs);
+ if (user_mode(regs))
+ return ret;
+
+ dump_kernel_instr(addr);
return ret;
}
@@ -273,6 +268,12 @@ void arm64_force_sig_fault(int signo, int code, unsigned long far,
force_sig_fault(signo, code, (void __user *)far);
}
+void arm64_force_sig_fault_pkey(unsigned long far, const char *str, int pkey)
+{
+ arm64_show_signal(SIGSEGV, str);
+ force_sig_pkuerr((void __user *)far, pkey);
+}
+
void arm64_force_sig_mceerr(int code, unsigned long far, short lsb,
const char *str)
{
@@ -456,7 +457,7 @@ void do_el0_undef(struct pt_regs *regs, unsigned long esr)
u32 insn;
/* check for AArch32 breakpoint instructions */
- if (!aarch32_break_handler(regs))
+ if (try_handle_aarch32_break(regs))
return;
if (user_insn_read(regs, &insn))
@@ -500,6 +501,16 @@ void do_el1_bti(struct pt_regs *regs, unsigned long esr)
die("Oops - BTI", regs, esr);
}
+void do_el0_gcs(struct pt_regs *regs, unsigned long esr)
+{
+ force_signal_inject(SIGSEGV, SEGV_CPERR, regs->pc, 0);
+}
+
+void do_el1_gcs(struct pt_regs *regs, unsigned long esr)
+{
+ die("Oops - GCS", regs, esr);
+}
+
void do_el0_fpac(struct pt_regs *regs, unsigned long esr)
{
force_signal_inject(SIGILL, ILL_ILLOPN, regs->pc, esr);
@@ -516,53 +527,7 @@ void do_el1_fpac(struct pt_regs *regs, unsigned long esr)
void do_el0_mops(struct pt_regs *regs, unsigned long esr)
{
- bool wrong_option = esr & ESR_ELx_MOPS_ISS_WRONG_OPTION;
- bool option_a = esr & ESR_ELx_MOPS_ISS_OPTION_A;
- int dstreg = ESR_ELx_MOPS_ISS_DESTREG(esr);
- int srcreg = ESR_ELx_MOPS_ISS_SRCREG(esr);
- int sizereg = ESR_ELx_MOPS_ISS_SIZEREG(esr);
- unsigned long dst, src, size;
-
- dst = pt_regs_read_reg(regs, dstreg);
- src = pt_regs_read_reg(regs, srcreg);
- size = pt_regs_read_reg(regs, sizereg);
-
- /*
- * Put the registers back in the original format suitable for a
- * prologue instruction, using the generic return routine from the
- * Arm ARM (DDI 0487I.a) rules CNTMJ and MWFQH.
- */
- if (esr & ESR_ELx_MOPS_ISS_MEM_INST) {
- /* SET* instruction */
- if (option_a ^ wrong_option) {
- /* Format is from Option A; forward set */
- pt_regs_write_reg(regs, dstreg, dst + size);
- pt_regs_write_reg(regs, sizereg, -size);
- }
- } else {
- /* CPY* instruction */
- if (!(option_a ^ wrong_option)) {
- /* Format is from Option B */
- if (regs->pstate & PSR_N_BIT) {
- /* Backward copy */
- pt_regs_write_reg(regs, dstreg, dst - size);
- pt_regs_write_reg(regs, srcreg, src - size);
- }
- } else {
- /* Format is from Option A */
- if (size & BIT(63)) {
- /* Forward copy */
- pt_regs_write_reg(regs, dstreg, dst + size);
- pt_regs_write_reg(regs, srcreg, src + size);
- pt_regs_write_reg(regs, sizereg, -size);
- }
- }
- }
-
- if (esr & ESR_ELx_MOPS_ISS_FROM_EPILOGUE)
- regs->pc -= 8;
- else
- regs->pc -= 4;
+ arm64_mops_reset_regs(&regs->user_regs, esr);
/*
* If single stepping then finish the step before executing the
@@ -571,6 +536,13 @@ void do_el0_mops(struct pt_regs *regs, unsigned long esr)
user_fastforward_single_step(current);
}
+void do_el1_mops(struct pt_regs *regs, unsigned long esr)
+{
+ arm64_mops_reset_regs(&regs->user_regs, esr);
+
+ kernel_fastforward_single_step(regs);
+}
+
#define __user_cache_maint(insn, address, res) \
if (address >= TASK_SIZE_MAX) { \
res = -EFAULT; \
@@ -631,7 +603,7 @@ static void ctr_read_handler(unsigned long esr, struct pt_regs *regs)
int rt = ESR_ELx_SYS64_ISS_RT(esr);
unsigned long val = arm64_ftr_reg_user_value(&arm64_ftr_reg_ctrel0);
- if (cpus_have_const_cap(ARM64_WORKAROUND_1542419)) {
+ if (cpus_have_final_cap(ARM64_WORKAROUND_1542419)) {
/* Hide DIC so that we can trap the unnecessary maintenance...*/
val &= ~BIT(CTR_EL0_DIC_SHIFT);
@@ -647,18 +619,26 @@ static void ctr_read_handler(unsigned long esr, struct pt_regs *regs)
static void cntvct_read_handler(unsigned long esr, struct pt_regs *regs)
{
- int rt = ESR_ELx_SYS64_ISS_RT(esr);
+ if (test_thread_flag(TIF_TSC_SIGSEGV)) {
+ force_sig(SIGSEGV);
+ } else {
+ int rt = ESR_ELx_SYS64_ISS_RT(esr);
- pt_regs_write_reg(regs, rt, arch_timer_read_counter());
- arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE);
+ pt_regs_write_reg(regs, rt, arch_timer_read_counter());
+ arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE);
+ }
}
static void cntfrq_read_handler(unsigned long esr, struct pt_regs *regs)
{
- int rt = ESR_ELx_SYS64_ISS_RT(esr);
+ if (test_thread_flag(TIF_TSC_SIGSEGV)) {
+ force_sig(SIGSEGV);
+ } else {
+ int rt = ESR_ELx_SYS64_ISS_RT(esr);
- pt_regs_write_reg(regs, rt, arch_timer_get_rate());
- arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE);
+ pt_regs_write_reg(regs, rt, arch_timer_get_rate());
+ arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE);
+ }
}
static void mrs_handler(unsigned long esr, struct pt_regs *regs)
@@ -884,6 +864,7 @@ static const char *esr_class_str[] = {
[ESR_ELx_EC_MOPS] = "MOPS",
[ESR_ELx_EC_FP_EXC32] = "FP (AArch32)",
[ESR_ELx_EC_FP_EXC64] = "FP (AArch64)",
+ [ESR_ELx_EC_GCS] = "Guarded Control Stack",
[ESR_ELx_EC_SERROR] = "SError",
[ESR_ELx_EC_BREAKPT_LOW] = "Breakpoint (lower EL)",
[ESR_ELx_EC_BREAKPT_CUR] = "Breakpoint (current EL)",
@@ -916,8 +897,6 @@ void bad_el0_sync(struct pt_regs *regs, int reason, unsigned long esr)
"Bad EL0 synchronous exception");
}
-#ifdef CONFIG_VMAP_STACK
-
DEFINE_PER_CPU(unsigned long [OVERFLOW_STACK_SIZE/sizeof(long)], overflow_stack)
__aligned(16);
@@ -943,16 +922,16 @@ void __noreturn panic_bad_stack(struct pt_regs *regs, unsigned long esr, unsigne
__show_regs(regs);
/*
- * We use nmi_panic to limit the potential for recusive overflows, and
+ * We use nmi_panic to limit the potential for recursive overflows, and
* to get a better stack trace.
*/
nmi_panic(NULL, "kernel stack overflow");
cpu_park_loop();
}
-#endif
void __noreturn arm64_serror_panic(struct pt_regs *regs, unsigned long esr)
{
+ add_taint(TAINT_MACHINE_CHECK, LOCKDEP_STILL_OK);
console_verbose();
pr_crit("SError Interrupt on CPU%d, code 0x%016lx -- %s\n",
@@ -1009,7 +988,7 @@ void do_serror(struct pt_regs *regs, unsigned long esr)
int is_valid_bugaddr(unsigned long addr)
{
/*
- * bug_handler() only called for BRK #BUG_BRK_IMM.
+ * bug_brk_handler() only called for BRK #BUG_BRK_IMM.
* So the answer is trivial -- any spurious instances with no
* bug table entry will be rejected by report_bug() and passed
* back to the debug-monitors code and handled as a fatal
@@ -1019,7 +998,7 @@ int is_valid_bugaddr(unsigned long addr)
}
#endif
-static int bug_handler(struct pt_regs *regs, unsigned long esr)
+int bug_brk_handler(struct pt_regs *regs, unsigned long esr)
{
switch (report_bug(regs->pc, regs)) {
case BUG_TRAP_TYPE_BUG:
@@ -1039,13 +1018,8 @@ static int bug_handler(struct pt_regs *regs, unsigned long esr)
return DBG_HOOK_HANDLED;
}
-static struct break_hook bug_break_hook = {
- .fn = bug_handler,
- .imm = BUG_BRK_IMM,
-};
-
-#ifdef CONFIG_CFI_CLANG
-static int cfi_handler(struct pt_regs *regs, unsigned long esr)
+#ifdef CONFIG_CFI
+int cfi_brk_handler(struct pt_regs *regs, unsigned long esr)
{
unsigned long target;
u32 type;
@@ -1068,15 +1042,9 @@ static int cfi_handler(struct pt_regs *regs, unsigned long esr)
arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE);
return DBG_HOOK_HANDLED;
}
+#endif /* CONFIG_CFI */
-static struct break_hook cfi_break_hook = {
- .fn = cfi_handler,
- .imm = CFI_BRK_IMM_BASE,
- .mask = CFI_BRK_IMM_MASK,
-};
-#endif /* CONFIG_CFI_CLANG */
-
-static int reserved_fault_handler(struct pt_regs *regs, unsigned long esr)
+int reserved_fault_brk_handler(struct pt_regs *regs, unsigned long esr)
{
pr_err("%s generated an invalid instruction at %pS!\n",
"Kernel text patching",
@@ -1086,11 +1054,6 @@ static int reserved_fault_handler(struct pt_regs *regs, unsigned long esr)
return DBG_HOOK_ERROR;
}
-static struct break_hook fault_break_hook = {
- .fn = reserved_fault_handler,
- .imm = FAULT_BRK_IMM,
-};
-
#ifdef CONFIG_KASAN_SW_TAGS
#define KASAN_ESR_RECOVER 0x20
@@ -1098,7 +1061,7 @@ static struct break_hook fault_break_hook = {
#define KASAN_ESR_SIZE_MASK 0x0f
#define KASAN_ESR_SIZE(esr) (1 << ((esr) & KASAN_ESR_SIZE_MASK))
-static int kasan_handler(struct pt_regs *regs, unsigned long esr)
+int kasan_brk_handler(struct pt_regs *regs, unsigned long esr)
{
bool recover = esr & KASAN_ESR_RECOVER;
bool write = esr & KASAN_ESR_WRITE;
@@ -1129,64 +1092,12 @@ static int kasan_handler(struct pt_regs *regs, unsigned long esr)
arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE);
return DBG_HOOK_HANDLED;
}
-
-static struct break_hook kasan_break_hook = {
- .fn = kasan_handler,
- .imm = KASAN_BRK_IMM,
- .mask = KASAN_BRK_MASK,
-};
#endif
#ifdef CONFIG_UBSAN_TRAP
-static int ubsan_handler(struct pt_regs *regs, unsigned long esr)
+int ubsan_brk_handler(struct pt_regs *regs, unsigned long esr)
{
- die(report_ubsan_failure(regs, esr & UBSAN_BRK_MASK), regs, esr);
+ die(report_ubsan_failure(esr & UBSAN_BRK_MASK), regs, esr);
return DBG_HOOK_HANDLED;
}
-
-static struct break_hook ubsan_break_hook = {
- .fn = ubsan_handler,
- .imm = UBSAN_BRK_IMM,
- .mask = UBSAN_BRK_MASK,
-};
-#endif
-
-#define esr_comment(esr) ((esr) & ESR_ELx_BRK64_ISS_COMMENT_MASK)
-
-/*
- * Initial handler for AArch64 BRK exceptions
- * This handler only used until debug_traps_init().
- */
-int __init early_brk64(unsigned long addr, unsigned long esr,
- struct pt_regs *regs)
-{
-#ifdef CONFIG_CFI_CLANG
- if ((esr_comment(esr) & ~CFI_BRK_IMM_MASK) == CFI_BRK_IMM_BASE)
- return cfi_handler(regs, esr) != DBG_HOOK_HANDLED;
-#endif
-#ifdef CONFIG_KASAN_SW_TAGS
- if ((esr_comment(esr) & ~KASAN_BRK_MASK) == KASAN_BRK_IMM)
- return kasan_handler(regs, esr) != DBG_HOOK_HANDLED;
-#endif
-#ifdef CONFIG_UBSAN_TRAP
- if ((esr_comment(esr) & ~UBSAN_BRK_MASK) == UBSAN_BRK_IMM)
- return ubsan_handler(regs, esr) != DBG_HOOK_HANDLED;
-#endif
- return bug_handler(regs, esr) != DBG_HOOK_HANDLED;
-}
-
-void __init trap_init(void)
-{
- register_kernel_break_hook(&bug_break_hook);
-#ifdef CONFIG_CFI_CLANG
- register_kernel_break_hook(&cfi_break_hook);
#endif
- register_kernel_break_hook(&fault_break_hook);
-#ifdef CONFIG_KASAN_SW_TAGS
- register_kernel_break_hook(&kasan_break_hook);
-#endif
-#ifdef CONFIG_UBSAN_TRAP
- register_kernel_break_hook(&ubsan_break_hook);
-#endif
- debug_traps_init();
-}
diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c
index d9e1355730ef..78ddf6bdecad 100644
--- a/arch/arm64/kernel/vdso.c
+++ b/arch/arm64/kernel/vdso.c
@@ -18,8 +18,7 @@
#include <linux/sched.h>
#include <linux/signal.h>
#include <linux/slab.h>
-#include <linux/time_namespace.h>
-#include <linux/timekeeper_internal.h>
+#include <linux/vdso_datastore.h>
#include <linux/vmalloc.h>
#include <vdso/datapage.h>
#include <vdso/helpers.h>
@@ -34,19 +33,11 @@ enum vdso_abi {
VDSO_ABI_AA32,
};
-enum vvar_pages {
- VVAR_DATA_PAGE_OFFSET,
- VVAR_TIMENS_PAGE_OFFSET,
- VVAR_NR_PAGES,
-};
-
struct vdso_abi_info {
const char *name;
const char *vdso_code_start;
const char *vdso_code_end;
unsigned long vdso_pages;
- /* Data Mapping */
- struct vm_special_mapping *dm;
/* Code Mapping */
struct vm_special_mapping *cm;
};
@@ -66,15 +57,6 @@ static struct vdso_abi_info vdso_info[] __ro_after_init = {
#endif /* CONFIG_COMPAT_VDSO */
};
-/*
- * The vDSO data page.
- */
-static union {
- struct vdso_data data[CS_BASES];
- u8 page[PAGE_SIZE];
-} vdso_data_store __page_aligned_data;
-struct vdso_data *vdso_data = vdso_data_store.data;
-
static int vdso_mremap(const struct vm_special_mapping *sm,
struct vm_area_struct *new_vma)
{
@@ -116,75 +98,6 @@ static int __init __vdso_init(enum vdso_abi abi)
return 0;
}
-#ifdef CONFIG_TIME_NS
-struct vdso_data *arch_get_vdso_data(void *vvar_page)
-{
- return (struct vdso_data *)(vvar_page);
-}
-
-/*
- * The vvar mapping contains data for a specific time namespace, so when a task
- * changes namespace we must unmap its vvar data for the old namespace.
- * Subsequent faults will map in data for the new namespace.
- *
- * For more details see timens_setup_vdso_data().
- */
-int vdso_join_timens(struct task_struct *task, struct time_namespace *ns)
-{
- struct mm_struct *mm = task->mm;
- struct vm_area_struct *vma;
- VMA_ITERATOR(vmi, mm, 0);
-
- mmap_read_lock(mm);
-
- for_each_vma(vmi, vma) {
- if (vma_is_special_mapping(vma, vdso_info[VDSO_ABI_AA64].dm))
- zap_vma_pages(vma);
-#ifdef CONFIG_COMPAT_VDSO
- if (vma_is_special_mapping(vma, vdso_info[VDSO_ABI_AA32].dm))
- zap_vma_pages(vma);
-#endif
- }
-
- mmap_read_unlock(mm);
- return 0;
-}
-#endif
-
-static vm_fault_t vvar_fault(const struct vm_special_mapping *sm,
- struct vm_area_struct *vma, struct vm_fault *vmf)
-{
- struct page *timens_page = find_timens_vvar_page(vma);
- unsigned long pfn;
-
- switch (vmf->pgoff) {
- case VVAR_DATA_PAGE_OFFSET:
- if (timens_page)
- pfn = page_to_pfn(timens_page);
- else
- pfn = sym_to_pfn(vdso_data);
- break;
-#ifdef CONFIG_TIME_NS
- case VVAR_TIMENS_PAGE_OFFSET:
- /*
- * If a task belongs to a time namespace then a namespace
- * specific VVAR is mapped with the VVAR_DATA_PAGE_OFFSET and
- * the real VVAR page is mapped with the VVAR_TIMENS_PAGE_OFFSET
- * offset.
- * See also the comment near timens_setup_vdso_data().
- */
- if (!timens_page)
- return VM_FAULT_SIGBUS;
- pfn = sym_to_pfn(vdso_data);
- break;
-#endif /* CONFIG_TIME_NS */
- default:
- return VM_FAULT_SIGBUS;
- }
-
- return vmf_insert_pfn(vma, vmf->address, pfn);
-}
-
static int __setup_additional_pages(enum vdso_abi abi,
struct mm_struct *mm,
struct linux_binprm *bprm,
@@ -194,11 +107,11 @@ static int __setup_additional_pages(enum vdso_abi abi,
unsigned long gp_flags = 0;
void *ret;
- BUILD_BUG_ON(VVAR_NR_PAGES != __VVAR_PAGES);
+ BUILD_BUG_ON(VDSO_NR_PAGES != __VDSO_PAGES);
vdso_text_len = vdso_info[abi].vdso_pages << PAGE_SHIFT;
/* Be sure to map the data page */
- vdso_mapping_len = vdso_text_len + VVAR_NR_PAGES * PAGE_SIZE;
+ vdso_mapping_len = vdso_text_len + VDSO_NR_PAGES * PAGE_SIZE;
vdso_base = get_unmapped_area(NULL, 0, vdso_mapping_len, 0, 0);
if (IS_ERR_VALUE(vdso_base)) {
@@ -206,20 +119,19 @@ static int __setup_additional_pages(enum vdso_abi abi,
goto up_fail;
}
- ret = _install_special_mapping(mm, vdso_base, VVAR_NR_PAGES * PAGE_SIZE,
- VM_READ|VM_MAYREAD|VM_PFNMAP,
- vdso_info[abi].dm);
+ ret = vdso_install_vvar_mapping(mm, vdso_base);
if (IS_ERR(ret))
goto up_fail;
- if (IS_ENABLED(CONFIG_ARM64_BTI_KERNEL) && system_supports_bti())
+ if (system_supports_bti_kernel())
gp_flags = VM_ARM64_BTI;
- vdso_base += VVAR_NR_PAGES * PAGE_SIZE;
+ vdso_base += VDSO_NR_PAGES * PAGE_SIZE;
mm->context.vdso = (void *)vdso_base;
ret = _install_special_mapping(mm, vdso_base, vdso_text_len,
VM_READ|VM_EXEC|gp_flags|
- VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
+ VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC|
+ VM_SEALED_SYSMAP,
vdso_info[abi].cm);
if (IS_ERR(ret))
goto up_fail;
@@ -238,7 +150,6 @@ up_fail:
enum aarch32_map {
AA32_MAP_VECTORS, /* kuser helpers */
AA32_MAP_SIGPAGE,
- AA32_MAP_VVAR,
AA32_MAP_VDSO,
};
@@ -263,10 +174,6 @@ static struct vm_special_mapping aarch32_vdso_maps[] = {
.pages = &aarch32_sig_page,
.mremap = aarch32_sigpage_mremap,
},
- [AA32_MAP_VVAR] = {
- .name = "[vvar]",
- .fault = vvar_fault,
- },
[AA32_MAP_VDSO] = {
.name = "[vdso]",
.mremap = vdso_mremap,
@@ -316,7 +223,6 @@ static int __init __aarch32_alloc_vdso_pages(void)
if (!IS_ENABLED(CONFIG_COMPAT_VDSO))
return 0;
- vdso_info[VDSO_ABI_AA32].dm = &aarch32_vdso_maps[AA32_MAP_VVAR];
vdso_info[VDSO_ABI_AA32].cm = &aarch32_vdso_maps[AA32_MAP_VDSO];
return __vdso_init(VDSO_ABI_AA32);
@@ -351,7 +257,8 @@ static int aarch32_kuser_helpers_setup(struct mm_struct *mm)
*/
ret = _install_special_mapping(mm, AARCH32_VECTORS_BASE, PAGE_SIZE,
VM_READ | VM_EXEC |
- VM_MAYREAD | VM_MAYEXEC,
+ VM_MAYREAD | VM_MAYEXEC |
+ VM_SEALED_SYSMAP,
&aarch32_vdso_maps[AA32_MAP_VECTORS]);
return PTR_ERR_OR_ZERO(ret);
@@ -374,7 +281,8 @@ static int aarch32_sigreturn_setup(struct mm_struct *mm)
*/
ret = _install_special_mapping(mm, addr, PAGE_SIZE,
VM_READ | VM_EXEC | VM_MAYREAD |
- VM_MAYWRITE | VM_MAYEXEC,
+ VM_MAYWRITE | VM_MAYEXEC |
+ VM_SEALED_SYSMAP,
&aarch32_vdso_maps[AA32_MAP_SIGPAGE]);
if (IS_ERR(ret))
goto out;
@@ -411,26 +319,14 @@ out:
}
#endif /* CONFIG_COMPAT */
-enum aarch64_map {
- AA64_MAP_VVAR,
- AA64_MAP_VDSO,
-};
-
-static struct vm_special_mapping aarch64_vdso_maps[] __ro_after_init = {
- [AA64_MAP_VVAR] = {
- .name = "[vvar]",
- .fault = vvar_fault,
- },
- [AA64_MAP_VDSO] = {
- .name = "[vdso]",
- .mremap = vdso_mremap,
- },
+static struct vm_special_mapping aarch64_vdso_map __ro_after_init = {
+ .name = "[vdso]",
+ .mremap = vdso_mremap,
};
static int __init vdso_init(void)
{
- vdso_info[VDSO_ABI_AA64].dm = &aarch64_vdso_maps[AA64_MAP_VVAR];
- vdso_info[VDSO_ABI_AA64].cm = &aarch64_vdso_maps[AA64_MAP_VDSO];
+ vdso_info[VDSO_ABI_AA64].cm = &aarch64_vdso_map;
return __vdso_init(VDSO_ABI_AA64);
}
diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile
index fe7a53c6781f..7dec05dd33b7 100644
--- a/arch/arm64/kernel/vdso/Makefile
+++ b/arch/arm64/kernel/vdso/Makefile
@@ -7,9 +7,9 @@
#
# Include the generic Makefile to check the built vdso.
-include $(srctree)/lib/vdso/Makefile
+include $(srctree)/lib/vdso/Makefile.include
-obj-vdso := vgettimeofday.o note.o sigreturn.o
+obj-vdso := vgettimeofday.o note.o sigreturn.o vgetrandom.o vgetrandom-chacha.o
# Build rules
targets := $(obj-vdso) vdso.so vdso.so.dbg
@@ -21,7 +21,7 @@ btildflags-$(CONFIG_ARM64_BTI_KERNEL) += -z force-bti
# potential future proofing if we end up with internal calls to the exported
# routines, as x86 does (see 6f121e548f83 ("x86, vdso: Reimplement vdso.so
# preparation in build-time C")).
-ldflags-y := -shared -soname=linux-vdso.so.1 --hash-style=sysv \
+ldflags-y := -shared -soname=linux-vdso.so.1 \
-Bsymbolic --build-id=sha1 -n $(btildflags-y)
ifdef CONFIG_LD_ORPHAN_WARN
@@ -34,26 +34,28 @@ ccflags-y := -fno-common -fno-builtin -fno-stack-protector -ffixed-x18
ccflags-y += -DDISABLE_BRANCH_PROFILING -DBUILD_VDSO
# -Wmissing-prototypes and -Wmissing-declarations are removed from
-# the CFLAGS of vgettimeofday.c to make possible to build the
-# kernel with CONFIG_WERROR enabled.
-CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os $(CC_FLAGS_SCS) \
- $(RANDSTRUCT_CFLAGS) $(GCC_PLUGINS_CFLAGS) \
- $(CC_FLAGS_LTO) $(CC_FLAGS_CFI) \
- -Wmissing-prototypes -Wmissing-declarations
-KASAN_SANITIZE := n
-KCSAN_SANITIZE := n
-UBSAN_SANITIZE := n
-OBJECT_FILES_NON_STANDARD := y
-KCOV_INSTRUMENT := n
-
-CFLAGS_vgettimeofday.o = -O2 -mcmodel=tiny -fasynchronous-unwind-tables
+# the CFLAGS to make possible to build the kernel with CONFIG_WERROR enabled.
+CC_FLAGS_REMOVE_VDSO := $(CC_FLAGS_FTRACE) -Os $(CC_FLAGS_SCS) \
+ $(RANDSTRUCT_CFLAGS) $(KSTACK_ERASE_CFLAGS) \
+ $(GCC_PLUGINS_CFLAGS) \
+ $(CC_FLAGS_LTO) $(CC_FLAGS_CFI) \
+ -Wmissing-prototypes -Wmissing-declarations
+
+CC_FLAGS_ADD_VDSO := -O2 -mcmodel=tiny -fasynchronous-unwind-tables
+
+CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_REMOVE_VDSO)
+CFLAGS_REMOVE_vgetrandom.o = $(CC_FLAGS_REMOVE_VDSO)
+
+CFLAGS_vgettimeofday.o = $(CC_FLAGS_ADD_VDSO)
+CFLAGS_vgetrandom.o = $(CC_FLAGS_ADD_VDSO)
ifneq ($(c-gettimeofday-y),)
CFLAGS_vgettimeofday.o += -include $(c-gettimeofday-y)
endif
-# Disable gcov profiling for VDSO code
-GCOV_PROFILE := n
+ifneq ($(c-getrandom-y),)
+ CFLAGS_vgetrandom.o += -include $(c-getrandom-y)
+endif
targets += vdso.lds
CPPFLAGS_vdso.lds += -P -C -U$(ARCH)
@@ -68,7 +70,7 @@ $(obj)/%.so: $(obj)/%.so.dbg FORCE
$(call if_changed,objcopy)
# Generate VDSO offsets using helper script
-gen-vdsosym := $(srctree)/$(src)/gen_vdso_offsets.sh
+gen-vdsosym := $(src)/gen_vdso_offsets.sh
quiet_cmd_vdsosym = VDSOSYM $@
cmd_vdsosym = $(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@
@@ -78,13 +80,3 @@ include/generated/vdso-offsets.h: $(obj)/vdso.so.dbg FORCE
# Actual build commands
quiet_cmd_vdsold_and_vdso_check = LD $@
cmd_vdsold_and_vdso_check = $(cmd_ld); $(cmd_vdso_check)
-
-# Install commands for the unstripped file
-quiet_cmd_vdso_install = INSTALL $@
- cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/$@
-
-vdso.so: $(obj)/vdso.so.dbg
- @mkdir -p $(MODLIB)/vdso
- $(call cmd,vdso_install)
-
-vdso_install: vdso.so
diff --git a/arch/arm64/kernel/vdso/vdso.lds.S b/arch/arm64/kernel/vdso/vdso.lds.S
index 45354f2ddf70..52314be29191 100644
--- a/arch/arm64/kernel/vdso/vdso.lds.S
+++ b/arch/arm64/kernel/vdso/vdso.lds.S
@@ -11,18 +11,18 @@
#include <linux/const.h>
#include <asm/page.h>
#include <asm/vdso.h>
+#include <asm/vdso/vsyscall.h>
#include <asm-generic/vmlinux.lds.h>
+#include <vdso/datapage.h>
OUTPUT_FORMAT("elf64-littleaarch64", "elf64-bigaarch64", "elf64-littleaarch64")
OUTPUT_ARCH(aarch64)
SECTIONS
{
- PROVIDE(_vdso_data = . - __VVAR_PAGES * PAGE_SIZE);
-#ifdef CONFIG_TIME_NS
- PROVIDE(_timens_data = _vdso_data + PAGE_SIZE);
-#endif
- . = VDSO_LBASE + SIZEOF_HEADERS;
+ VDSO_VVAR_SYMS
+
+ . = SIZEOF_HEADERS;
.hash : { *(.hash) } :text
.gnu.hash : { *(.gnu.hash) }
@@ -38,6 +38,7 @@ SECTIONS
*/
/DISCARD/ : {
*(.note.GNU-stack .note.gnu.property)
+ *(.ARM.attributes)
}
.note : { *(.note.*) } :text :note
@@ -102,6 +103,7 @@ VERSION
__kernel_gettimeofday;
__kernel_clock_gettime;
__kernel_clock_getres;
+ __kernel_getrandom;
local: *;
};
}
diff --git a/arch/arm64/kernel/vdso/vgetrandom-chacha.S b/arch/arm64/kernel/vdso/vgetrandom-chacha.S
new file mode 100644
index 000000000000..67890b445309
--- /dev/null
+++ b/arch/arm64/kernel/vdso/vgetrandom-chacha.S
@@ -0,0 +1,172 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/linkage.h>
+#include <asm/cache.h>
+#include <asm/assembler.h>
+
+ .text
+
+#define state0 v0
+#define state1 v1
+#define state2 v2
+#define state3 v3
+#define copy0 v4
+#define copy0_q q4
+#define copy1 v5
+#define copy2 v6
+#define copy3 v7
+#define copy3_d d7
+#define one_d d16
+#define one_q q16
+#define one_v v16
+#define tmp v17
+#define rot8 v18
+
+/*
+ * ARM64 ChaCha20 implementation meant for vDSO. Produces a given positive
+ * number of blocks of output with nonce 0, taking an input key and 8-bytes
+ * counter. Importantly does not spill to the stack.
+ *
+ * This implementation avoids d8-d15 because they are callee-save in user
+ * space.
+ *
+ * void __arch_chacha20_blocks_nostack(uint8_t *dst_bytes,
+ * const uint8_t *key,
+ * uint32_t *counter,
+ * size_t nblocks)
+ *
+ * x0: output bytes
+ * x1: 32-byte key input
+ * x2: 8-byte counter input/output
+ * x3: number of 64-byte block to write to output
+ */
+SYM_FUNC_START(__arch_chacha20_blocks_nostack)
+
+ /* copy0 = "expand 32-byte k" */
+ mov_q x8, 0x3320646e61707865
+ mov_q x9, 0x6b20657479622d32
+ mov copy0.d[0], x8
+ mov copy0.d[1], x9
+
+ /* copy1,copy2 = key */
+ ld1 { copy1.4s, copy2.4s }, [x1]
+ /* copy3 = counter || zero nonce */
+ ld1 { copy3.2s }, [x2]
+
+ movi one_v.2s, #1
+ uzp1 one_v.4s, one_v.4s, one_v.4s
+
+.Lblock:
+ /* copy state to auxiliary vectors for the final add after the permute. */
+ mov state0.16b, copy0.16b
+ mov state1.16b, copy1.16b
+ mov state2.16b, copy2.16b
+ mov state3.16b, copy3.16b
+
+ mov w4, 20
+.Lpermute:
+ /*
+ * Permute one 64-byte block where the state matrix is stored in the four NEON
+ * registers state0-state3. It performs matrix operations on four words in parallel,
+ * but requires shuffling to rearrange the words after each round.
+ */
+
+.Ldoubleround:
+ /* state0 += state1, state3 = rotl32(state3 ^ state0, 16) */
+ add state0.4s, state0.4s, state1.4s
+ eor state3.16b, state3.16b, state0.16b
+ rev32 state3.8h, state3.8h
+
+ /* state2 += state3, state1 = rotl32(state1 ^ state2, 12) */
+ add state2.4s, state2.4s, state3.4s
+ eor tmp.16b, state1.16b, state2.16b
+ shl state1.4s, tmp.4s, #12
+ sri state1.4s, tmp.4s, #20
+
+ /* state0 += state1, state3 = rotl32(state3 ^ state0, 8) */
+ add state0.4s, state0.4s, state1.4s
+ eor tmp.16b, state3.16b, state0.16b
+ shl state3.4s, tmp.4s, #8
+ sri state3.4s, tmp.4s, #24
+
+ /* state2 += state3, state1 = rotl32(state1 ^ state2, 7) */
+ add state2.4s, state2.4s, state3.4s
+ eor tmp.16b, state1.16b, state2.16b
+ shl state1.4s, tmp.4s, #7
+ sri state1.4s, tmp.4s, #25
+
+ /* state1[0,1,2,3] = state1[1,2,3,0] */
+ ext state1.16b, state1.16b, state1.16b, #4
+ /* state2[0,1,2,3] = state2[2,3,0,1] */
+ ext state2.16b, state2.16b, state2.16b, #8
+ /* state3[0,1,2,3] = state3[1,2,3,0] */
+ ext state3.16b, state3.16b, state3.16b, #12
+
+ /* state0 += state1, state3 = rotl32(state3 ^ state0, 16) */
+ add state0.4s, state0.4s, state1.4s
+ eor state3.16b, state3.16b, state0.16b
+ rev32 state3.8h, state3.8h
+
+ /* state2 += state3, state1 = rotl32(state1 ^ state2, 12) */
+ add state2.4s, state2.4s, state3.4s
+ eor tmp.16b, state1.16b, state2.16b
+ shl state1.4s, tmp.4s, #12
+ sri state1.4s, tmp.4s, #20
+
+ /* state0 += state1, state3 = rotl32(state3 ^ state0, 8) */
+ add state0.4s, state0.4s, state1.4s
+ eor tmp.16b, state3.16b, state0.16b
+ shl state3.4s, tmp.4s, #8
+ sri state3.4s, tmp.4s, #24
+
+ /* state2 += state3, state1 = rotl32(state1 ^ state2, 7) */
+ add state2.4s, state2.4s, state3.4s
+ eor tmp.16b, state1.16b, state2.16b
+ shl state1.4s, tmp.4s, #7
+ sri state1.4s, tmp.4s, #25
+
+ /* state1[0,1,2,3] = state1[3,0,1,2] */
+ ext state1.16b, state1.16b, state1.16b, #12
+ /* state2[0,1,2,3] = state2[2,3,0,1] */
+ ext state2.16b, state2.16b, state2.16b, #8
+ /* state3[0,1,2,3] = state3[1,2,3,0] */
+ ext state3.16b, state3.16b, state3.16b, #4
+
+ subs w4, w4, #2
+ b.ne .Ldoubleround
+
+ /* output0 = state0 + state0 */
+ add state0.4s, state0.4s, copy0.4s
+ /* output1 = state1 + state1 */
+ add state1.4s, state1.4s, copy1.4s
+ /* output2 = state2 + state2 */
+ add state2.4s, state2.4s, copy2.4s
+ /* output2 = state3 + state3 */
+ add state3.4s, state3.4s, copy3.4s
+ st1 { state0.16b - state3.16b }, [x0]
+
+ /*
+ * ++copy3.counter, the 'add' clears the upper half of the SIMD register
+ * which is the expected behaviour here.
+ */
+ add copy3_d, copy3_d, one_d
+
+ /* output += 64, --nblocks */
+ add x0, x0, 64
+ subs x3, x3, #1
+ b.ne .Lblock
+
+ /* counter = copy3.counter */
+ st1 { copy3.2s }, [x2]
+
+ /* Zero out the potentially sensitive regs, in case nothing uses these again. */
+ movi state0.16b, #0
+ movi state1.16b, #0
+ movi state2.16b, #0
+ movi state3.16b, #0
+ movi copy1.16b, #0
+ movi copy2.16b, #0
+ ret
+SYM_FUNC_END(__arch_chacha20_blocks_nostack)
+
+emit_aarch64_feature_1_and
diff --git a/arch/arm64/kernel/vdso/vgetrandom.c b/arch/arm64/kernel/vdso/vgetrandom.c
new file mode 100644
index 000000000000..832fe195292b
--- /dev/null
+++ b/arch/arm64/kernel/vdso/vgetrandom.c
@@ -0,0 +1,15 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <uapi/asm-generic/errno.h>
+
+typeof(__cvdso_getrandom) __kernel_getrandom;
+
+ssize_t __kernel_getrandom(void *buffer, size_t len, unsigned int flags, void *opaque_state, size_t opaque_len)
+{
+ if (alternative_has_cap_likely(ARM64_HAS_FPSIMD))
+ return __cvdso_getrandom(buffer, len, flags, opaque_state, opaque_len);
+
+ if (unlikely(opaque_len == ~0UL && !buffer && !len && !flags))
+ return -ENOSYS;
+ return getrandom_syscall(buffer, len, flags);
+}
diff --git a/arch/arm64/kernel/vdso32/Makefile b/arch/arm64/kernel/vdso32/Makefile
index 2f73e5bca213..9d0efed91414 100644
--- a/arch/arm64/kernel/vdso32/Makefile
+++ b/arch/arm64/kernel/vdso32/Makefile
@@ -3,7 +3,7 @@
# Makefile for vdso32
#
-include $(srctree)/lib/vdso/Makefile
+include $(srctree)/lib/vdso/Makefile.include
# Same as cc-*option, but using CC_COMPAT instead of CC
ifeq ($(CONFIG_CC_IS_CLANG), y)
@@ -21,8 +21,6 @@ endif
cc32-option = $(call try-run,\
$(CC_COMPAT) $(1) -c -x c /dev/null -o "$$TMP",$(1),$(2))
-cc32-disable-warning = $(call try-run,\
- $(CC_COMPAT) -W$(strip $(1)) -c -x c /dev/null -o "$$TMP",-Wno-$(strip $(1)))
# We cannot use the global flags to compile the vDSO files, the main reason
# being that the 32-bit compiler may be older than the main (64-bit) compiler
@@ -59,13 +57,13 @@ VDSO_CAFLAGS += -DDISABLE_BRANCH_PROFILING
VDSO_CAFLAGS += -march=armv8-a
VDSO_CFLAGS := $(VDSO_CAFLAGS)
-VDSO_CFLAGS += -DENABLE_COMPAT_VDSO=1
# KBUILD_CFLAGS from top-level Makefile
VDSO_CFLAGS += -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \
-fno-strict-aliasing -fno-common \
+ $(filter -Werror,$(KBUILD_CPPFLAGS)) \
-Werror-implicit-function-declaration \
-Wno-format-security \
- -std=gnu11
+ -std=gnu11 -fms-extensions
VDSO_CFLAGS += -O2
# Some useful compiler-dependent flags from top-level Makefile
VDSO_CFLAGS += $(call cc32-option,-Wno-pointer-sign)
@@ -73,16 +71,7 @@ VDSO_CFLAGS += -fno-strict-overflow
VDSO_CFLAGS += $(call cc32-option,-Werror=strict-prototypes)
VDSO_CFLAGS += -Werror=date-time
VDSO_CFLAGS += $(call cc32-option,-Werror=incompatible-pointer-types)
-
-# The 32-bit compiler does not provide 128-bit integers, which are used in
-# some headers that are indirectly included from the vDSO code.
-# This hack makes the compiler happy and should trigger a warning/error if
-# variables of such type are referenced.
-VDSO_CFLAGS += -D__uint128_t='void*'
-# Silence some warnings coming from headers that operate on long's
-# (on GCC 4.8 or older, there is unfortunately no way to silence this warning)
-VDSO_CFLAGS += $(call cc32-disable-warning,shift-count-overflow)
-VDSO_CFLAGS += -Wno-int-to-pointer-cast
+VDSO_CFLAGS += $(if $(CONFIG_CC_IS_CLANG),-Wno-microsoft-anon-tag)
# Compile as THUMB2 or ARM. Unwinding via frame-pointers in THUMB2 is
# unreliable.
@@ -98,7 +87,7 @@ VDSO_AFLAGS += -D__ASSEMBLY__
# From arm vDSO Makefile
VDSO_LDFLAGS += -Bsymbolic --no-undefined -soname=linux-vdso.so.1
VDSO_LDFLAGS += -z max-page-size=4096 -z common-page-size=4096
-VDSO_LDFLAGS += -shared --hash-style=sysv --build-id=sha1
+VDSO_LDFLAGS += -shared --build-id=sha1
VDSO_LDFLAGS += --orphan-handling=$(CONFIG_LD_ORPHAN_WARN_LEVEL)
@@ -118,7 +107,7 @@ endif
VDSO_CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os
# Build rules
-targets := $(c-obj-vdso) $(c-obj-vdso-gettimeofday) $(asm-obj-vdso) vdso.so vdso.so.dbg vdso.so.raw
+targets := $(c-obj-vdso) $(c-obj-vdso-gettimeofday) $(asm-obj-vdso) vdso.so vdso32.so.dbg vdso.so.raw
c-obj-vdso := $(addprefix $(obj)/, $(c-obj-vdso))
c-obj-vdso-gettimeofday := $(addprefix $(obj)/, $(c-obj-vdso-gettimeofday))
asm-obj-vdso := $(addprefix $(obj)/, $(asm-obj-vdso))
@@ -127,19 +116,16 @@ obj-vdso := $(c-obj-vdso) $(c-obj-vdso-gettimeofday) $(asm-obj-vdso)
targets += vdso.lds
CPPFLAGS_vdso.lds += -P -C -U$(ARCH)
-include/generated/vdso32-offsets.h: $(obj)/vdso.so.dbg FORCE
- $(call if_changed,vdsosym)
-
# Strip rule for vdso.so
$(obj)/vdso.so: OBJCOPYFLAGS := -S
-$(obj)/vdso.so: $(obj)/vdso.so.dbg FORCE
+$(obj)/vdso.so: $(obj)/vdso32.so.dbg FORCE
$(call if_changed,objcopy)
-$(obj)/vdso.so.dbg: $(obj)/vdso.so.raw $(obj)/$(munge) FORCE
+$(obj)/vdso32.so.dbg: $(obj)/vdso.so.raw $(obj)/$(munge) FORCE
$(call if_changed,vdsomunge)
# Link rule for the .so file, .lds has to be first
-$(obj)/vdso.so.raw: $(src)/vdso.lds $(obj-vdso) FORCE
+$(obj)/vdso.so.raw: $(obj)/vdso.lds $(obj-vdso) FORCE
$(call if_changed,vdsold_and_vdso_check)
# Compilation rules for the vDSO sources
@@ -166,19 +152,3 @@ quiet_cmd_vdsoas = AS32 $@
quiet_cmd_vdsomunge = MUNGE $@
cmd_vdsomunge = $(obj)/$(munge) $< $@
-
-# Generate vDSO offsets using helper script (borrowed from the 64-bit vDSO)
-gen-vdsosym := $(srctree)/$(src)/../vdso/gen_vdso_offsets.sh
-quiet_cmd_vdsosym = VDSOSYM $@
-# The AArch64 nm should be able to read an AArch32 binary
- cmd_vdsosym = $(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@
-
-# Install commands for the unstripped file
-quiet_cmd_vdso_install = INSTALL32 $@
- cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/vdso32.so
-
-vdso.so: $(obj)/vdso.so.dbg
- @mkdir -p $(MODLIB)/vdso
- $(call cmd,vdso_install)
-
-vdso_install: vdso.so
diff --git a/arch/arm64/kernel/vdso32/vdso.lds.S b/arch/arm64/kernel/vdso32/vdso.lds.S
index 8d95d7d35057..e02b27487ce8 100644
--- a/arch/arm64/kernel/vdso32/vdso.lds.S
+++ b/arch/arm64/kernel/vdso32/vdso.lds.S
@@ -12,17 +12,16 @@
#include <asm/page.h>
#include <asm/vdso.h>
#include <asm-generic/vmlinux.lds.h>
+#include <vdso/datapage.h>
OUTPUT_FORMAT("elf32-littlearm", "elf32-bigarm", "elf32-littlearm")
OUTPUT_ARCH(arm)
SECTIONS
{
- PROVIDE_HIDDEN(_vdso_data = . - __VVAR_PAGES * PAGE_SIZE);
-#ifdef CONFIG_TIME_NS
- PROVIDE_HIDDEN(_timens_data = _vdso_data + PAGE_SIZE);
-#endif
- . = VDSO_LBASE + SIZEOF_HEADERS;
+ VDSO_VVAR_SYMS
+
+ . = SIZEOF_HEADERS;
.hash : { *(.hash) } :text
.gnu.hash : { *(.gnu.hash) }
diff --git a/arch/arm64/kernel/vdso32/vgettimeofday.c b/arch/arm64/kernel/vdso32/vgettimeofday.c
index 5acff29c5991..29b4d8f61e39 100644
--- a/arch/arm64/kernel/vdso32/vgettimeofday.c
+++ b/arch/arm64/kernel/vdso32/vgettimeofday.c
@@ -5,6 +5,8 @@
* Copyright (C) 2018 ARM Limited
*
*/
+#define BUILD_VDSO32_64
+#include <vdso/gettime.h>
int __vdso_clock_gettime(clockid_t clock,
struct old_timespec32 *ts)
diff --git a/arch/arm64/kernel/crash_core.c b/arch/arm64/kernel/vmcore_info.c
index 66cde752cd74..9619ece66b79 100644
--- a/arch/arm64/kernel/crash_core.c
+++ b/arch/arm64/kernel/vmcore_info.c
@@ -4,7 +4,7 @@
* Copyright (C) Huawei Futurewei Technologies.
*/
-#include <linux/crash_core.h>
+#include <linux/vmcore_info.h>
#include <asm/cpufeature.h>
#include <asm/memory.h>
#include <asm/pgtable-hwdef.h>
@@ -14,7 +14,7 @@ static inline u64 get_tcr_el1_t1sz(void);
static inline u64 get_tcr_el1_t1sz(void)
{
- return (read_sysreg(tcr_el1) & TCR_T1SZ_MASK) >> TCR_T1SZ_OFFSET;
+ return (read_sysreg(tcr_el1) & TCR_EL1_T1SZ_MASK) >> TCR_EL1_T1SZ_SHIFT;
}
void arch_crash_save_vmcoreinfo(void)
@@ -23,7 +23,6 @@ void arch_crash_save_vmcoreinfo(void)
/* Please note VMCOREINFO_NUMBER() uses "%d", not "%x" */
vmcoreinfo_append_str("NUMBER(MODULES_VADDR)=0x%lx\n", MODULES_VADDR);
vmcoreinfo_append_str("NUMBER(MODULES_END)=0x%lx\n", MODULES_END);
- vmcoreinfo_append_str("NUMBER(VMALLOC_START)=0x%lx\n", VMALLOC_START);
vmcoreinfo_append_str("NUMBER(VMALLOC_END)=0x%lx\n", VMALLOC_END);
vmcoreinfo_append_str("NUMBER(VMEMMAP_START)=0x%lx\n", VMEMMAP_START);
vmcoreinfo_append_str("NUMBER(VMEMMAP_END)=0x%lx\n", VMEMMAP_END);
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index 3cd7e76cc562..ad6133b89e7a 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -13,7 +13,7 @@
*(__kvm_ex_table) \
__stop___kvm_ex_table = .;
-#define HYPERVISOR_DATA_SECTIONS \
+#define HYPERVISOR_RODATA_SECTIONS \
HYP_SECTION_NAME(.rodata) : { \
. = ALIGN(PAGE_SIZE); \
__hyp_rodata_start = .; \
@@ -23,6 +23,15 @@
__hyp_rodata_end = .; \
}
+#define HYPERVISOR_DATA_SECTION \
+ HYP_SECTION_NAME(.data) : { \
+ . = ALIGN(PAGE_SIZE); \
+ __hyp_data_start = .; \
+ *(HYP_SECTION_NAME(.data)) \
+ . = ALIGN(PAGE_SIZE); \
+ __hyp_data_end = .; \
+ }
+
#define HYPERVISOR_PERCPU_SECTION \
. = ALIGN(PAGE_SIZE); \
HYP_SECTION_NAME(.data..percpu) : { \
@@ -51,7 +60,8 @@
#define SBSS_ALIGN PAGE_SIZE
#else /* CONFIG_KVM */
#define HYPERVISOR_EXTABLE
-#define HYPERVISOR_DATA_SECTIONS
+#define HYPERVISOR_RODATA_SECTIONS
+#define HYPERVISOR_DATA_SECTION
#define HYPERVISOR_PERCPU_SECTION
#define HYPERVISOR_RELOC_SECTION
#define SBSS_ALIGN 0
@@ -126,9 +136,9 @@ jiffies = jiffies_64;
#ifdef CONFIG_UNWIND_TABLES
#define UNWIND_DATA_SECTIONS \
.eh_frame : { \
- __eh_frame_start = .; \
+ __pi___eh_frame_start = .; \
*(.eh_frame) \
- __eh_frame_end = .; \
+ __pi___eh_frame_end = .; \
}
#else
#define UNWIND_DATA_SECTIONS
@@ -162,6 +172,7 @@ SECTIONS
/DISCARD/ : {
*(.interp .dynamic)
*(.dynsym .dynstr .hash .gnu.hash)
+ *(.ARM.attributes)
}
. = KIMAGE_VADDR;
@@ -189,7 +200,7 @@ SECTIONS
/* everything from this point to __init_begin will be marked RO NX */
RO_DATA(PAGE_SIZE)
- HYPERVISOR_DATA_SECTIONS
+ HYPERVISOR_RODATA_SECTIONS
.got : { *(.got) }
/*
@@ -248,9 +259,9 @@ SECTIONS
__inittext_end = .;
__initdata_begin = .;
- init_idmap_pg_dir = .;
+ __pi_init_idmap_pg_dir = .;
. += INIT_IDMAP_DIR_SIZE;
- init_idmap_pg_end = .;
+ __pi_init_idmap_pg_end = .;
.init.data : {
INIT_DATA
@@ -264,31 +275,38 @@ SECTIONS
EXIT_DATA
}
+ RUNTIME_CONST_VARIABLES
+
PERCPU_SECTION(L1_CACHE_BYTES)
HYPERVISOR_PERCPU_SECTION
HYPERVISOR_RELOC_SECTION
.rela.dyn : ALIGN(8) {
- __rela_start = .;
+ __pi_rela_start = .;
*(.rela .rela*)
- __rela_end = .;
+ __pi_rela_end = .;
}
.relr.dyn : ALIGN(8) {
- __relr_start = .;
+ __pi_relr_start = .;
*(.relr.dyn)
- __relr_end = .;
+ __pi_relr_end = .;
}
. = ALIGN(SEGMENT_ALIGN);
__initdata_end = .;
__init_end = .;
+ .data.rel.ro : { *(.data.rel.ro) }
+ ASSERT(SIZEOF(.data.rel.ro) == 0, "Unexpected RELRO detected!")
+
_data = .;
_sdata = .;
RW_DATA(L1_CACHE_BYTES, PAGE_SIZE, THREAD_ALIGN)
+ HYPERVISOR_DATA_SECTION
+
/*
* Data written with the MMU off but read with the MMU on requires
* cache lines to be invalidated, discarding up to a Cache Writeback
@@ -311,16 +329,23 @@ SECTIONS
__pecoff_data_rawsize = ABSOLUTE(. - __initdata_begin);
_edata = .;
+ /* start of zero-init region */
BSS_SECTION(SBSS_ALIGN, 0, 0)
+ __pi___bss_start = __bss_start;
. = ALIGN(PAGE_SIZE);
- init_pg_dir = .;
+ __pi_init_pg_dir = .;
. += INIT_DIR_SIZE;
- init_pg_end = .;
+ __pi_init_pg_end = .;
+ /* end of zero-init region */
+
+ . += SZ_4K; /* stack for the early C runtime */
+ early_init_stack = .;
. = ALIGN(SEGMENT_ALIGN);
__pecoff_data_size = ABSOLUTE(. - __initdata_begin);
_end = .;
+ __pi__end = .;
STABS_DEBUG
DWARF_DEBUG
@@ -336,9 +361,6 @@ SECTIONS
*(.plt) *(.plt.*) *(.iplt) *(.igot .igot.plt)
}
ASSERT(SIZEOF(.plt) == 0, "Unexpected run-time procedure linkages detected!")
-
- .data.rel.ro : { *(.data.rel.ro) }
- ASSERT(SIZEOF(.data.rel.ro) == 0, "Unexpected RELRO detected!")
}
#include "image-vars.h"
diff --git a/arch/arm64/kernel/watchdog_hld.c b/arch/arm64/kernel/watchdog_hld.c
index dcd25322127c..3093037dcb7b 100644
--- a/arch/arm64/kernel/watchdog_hld.c
+++ b/arch/arm64/kernel/watchdog_hld.c
@@ -34,3 +34,61 @@ bool __init arch_perf_nmi_is_available(void)
*/
return arm_pmu_irq_is_nmi();
}
+
+static int watchdog_perf_update_period(void *data)
+{
+ int cpu = smp_processor_id();
+ u64 max_cpu_freq, new_period;
+
+ max_cpu_freq = cpufreq_get_hw_max_freq(cpu) * 1000UL;
+ if (!max_cpu_freq)
+ return 0;
+
+ new_period = watchdog_thresh * max_cpu_freq;
+ hardlockup_detector_perf_adjust_period(new_period);
+
+ return 0;
+}
+
+static int watchdog_freq_notifier_callback(struct notifier_block *nb,
+ unsigned long val, void *data)
+{
+ struct cpufreq_policy *policy = data;
+ int cpu;
+
+ if (val != CPUFREQ_CREATE_POLICY)
+ return NOTIFY_DONE;
+
+ /*
+ * Let each online CPU related to the policy update the period by their
+ * own. This will serialize with the framework on start/stop the lockup
+ * detector (softlockup_{start,stop}_all) and avoid potential race
+ * condition. Otherwise we may have below theoretical race condition:
+ * (core 0/1 share the same policy)
+ * [core 0] [core 1]
+ * hardlockup_detector_event_create()
+ * hw_nmi_get_sample_period()
+ * (cpufreq registered, notifier callback invoked)
+ * watchdog_freq_notifier_callback()
+ * watchdog_perf_update_period()
+ * (since core 1's event's not yet created,
+ * the period is not set)
+ * perf_event_create_kernel_counter()
+ * (event's period is SAFE_MAX_CPU_FREQ)
+ */
+ for_each_cpu(cpu, policy->cpus)
+ smp_call_on_cpu(cpu, watchdog_perf_update_period, NULL, false);
+
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block watchdog_freq_notifier = {
+ .notifier_call = watchdog_freq_notifier_callback,
+};
+
+static int __init init_watchdog_freq_notifier(void)
+{
+ return cpufreq_register_notifier(&watchdog_freq_notifier,
+ CPUFREQ_POLICY_NOTIFIER);
+}
+core_initcall(init_watchdog_freq_notifier);