summaryrefslogtreecommitdiff
path: root/arch/arm64/include/asm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arm64/include/asm')
-rw-r--r--arch/arm64/include/asm/Kbuild66
-rw-r--r--arch/arm64/include/asm/acenv.h15
-rw-r--r--arch/arm64/include/asm/acpi.h191
-rw-r--r--arch/arm64/include/asm/alternative-macros.h268
-rw-r--r--arch/arm64/include/asm/alternative.h41
-rw-r--r--arch/arm64/include/asm/apple_m1_pmu.h65
-rw-r--r--arch/arm64/include/asm/arch_gicv3.h192
-rw-r--r--arch/arm64/include/asm/arch_timer.h207
-rw-r--r--arch/arm64/include/asm/archrandom.h132
-rw-r--r--arch/arm64/include/asm/arm-cci.h16
-rw-r--r--arch/arm64/include/asm/arm_dsu_pmu.h126
-rw-r--r--arch/arm64/include/asm/arm_pmuv3.h191
-rw-r--r--arch/arm64/include/asm/asm-bug.h69
-rw-r--r--arch/arm64/include/asm/asm-extable.h137
-rw-r--r--arch/arm64/include/asm/asm-prototypes.h32
-rw-r--r--arch/arm64/include/asm/asm-uaccess.h97
-rw-r--r--arch/arm64/include/asm/asm_pointer_auth.h92
-rw-r--r--arch/arm64/include/asm/assembler.h882
-rw-r--r--arch/arm64/include/asm/atomic.h423
-rw-r--r--arch/arm64/include/asm/atomic_ll_sc.h339
-rw-r--r--arch/arm64/include/asm/atomic_lse.h315
-rw-r--r--arch/arm64/include/asm/barrier.h232
-rw-r--r--arch/arm64/include/asm/bitops.h44
-rw-r--r--arch/arm64/include/asm/bitrev.h20
-rw-r--r--arch/arm64/include/asm/boot.h20
-rw-r--r--arch/arm64/include/asm/brk-imm.h42
-rw-r--r--arch/arm64/include/asm/bug.h28
-rw-r--r--arch/arm64/include/asm/cache.h138
-rw-r--r--arch/arm64/include/asm/cacheflush.h158
-rw-r--r--arch/arm64/include/asm/cachetype.h48
-rw-r--r--arch/arm64/include/asm/cfi.h7
-rw-r--r--arch/arm64/include/asm/checksum.h49
-rw-r--r--arch/arm64/include/asm/clocksource.h7
-rw-r--r--arch/arm64/include/asm/cmpxchg.h394
-rw-r--r--arch/arm64/include/asm/compat.h273
-rw-r--r--arch/arm64/include/asm/compiler.h62
-rw-r--r--arch/arm64/include/asm/cpu.h82
-rw-r--r--arch/arm64/include/asm/cpu_ops.h57
-rw-r--r--arch/arm64/include/asm/cpucaps.h82
-rw-r--r--arch/arm64/include/asm/cpufeature.h1083
-rw-r--r--arch/arm64/include/asm/cpuidle.h41
-rw-r--r--arch/arm64/include/asm/cputable.h30
-rw-r--r--arch/arm64/include/asm/cputype.h333
-rw-r--r--arch/arm64/include/asm/crash_reserve.h10
-rw-r--r--arch/arm64/include/asm/current.h29
-rw-r--r--arch/arm64/include/asm/daifflags.h144
-rw-r--r--arch/arm64/include/asm/dcc.h41
-rw-r--r--arch/arm64/include/asm/debug-monitors.h95
-rw-r--r--arch/arm64/include/asm/device.h17
-rw-r--r--arch/arm64/include/asm/dma-mapping.h130
-rw-r--r--arch/arm64/include/asm/dmi.h31
-rw-r--r--arch/arm64/include/asm/efi.h154
-rw-r--r--arch/arm64/include/asm/el2_setup.h593
-rw-r--r--arch/arm64/include/asm/elf.h211
-rw-r--r--arch/arm64/include/asm/entry-common.h57
-rw-r--r--arch/arm64/include/asm/esr.h574
-rw-r--r--arch/arm64/include/asm/exception.h98
-rw-r--r--arch/arm64/include/asm/exec.h15
-rw-r--r--arch/arm64/include/asm/extable.h51
-rw-r--r--arch/arm64/include/asm/fb.h34
-rw-r--r--arch/arm64/include/asm/fixmap.h121
-rw-r--r--arch/arm64/include/asm/fpsimd.h474
-rw-r--r--arch/arm64/include/asm/fpsimdmacros.h319
-rw-r--r--arch/arm64/include/asm/fpu.h27
-rw-r--r--arch/arm64/include/asm/ftrace.h231
-rw-r--r--arch/arm64/include/asm/futex.h136
-rw-r--r--arch/arm64/include/asm/gcs.h196
-rw-r--r--arch/arm64/include/asm/gpr-num.h26
-rw-r--r--arch/arm64/include/asm/hardirq.h105
-rw-r--r--arch/arm64/include/asm/hugetlb.h171
-rw-r--r--arch/arm64/include/asm/hw_breakpoint.h89
-rw-r--r--arch/arm64/include/asm/hwcap.h201
-rw-r--r--arch/arm64/include/asm/hyp_image.h68
-rw-r--r--arch/arm64/include/asm/hypervisor.h16
-rw-r--r--arch/arm64/include/asm/image.h59
-rw-r--r--arch/arm64/include/asm/insn-def.h23
-rw-r--r--arch/arm64/include/asm/insn.h735
-rw-r--r--arch/arm64/include/asm/io.h399
-rw-r--r--arch/arm64/include/asm/irq.h21
-rw-r--r--arch/arm64/include/asm/irq_work.h10
-rw-r--r--arch/arm64/include/asm/irqflags.h216
-rw-r--r--arch/arm64/include/asm/jump_label.h62
-rw-r--r--arch/arm64/include/asm/kasan.h26
-rw-r--r--arch/arm64/include/asm/kernel-pgtable.h86
-rw-r--r--arch/arm64/include/asm/kexec.h135
-rw-r--r--arch/arm64/include/asm/kfence.h31
-rw-r--r--arch/arm64/include/asm/kgdb.h114
-rw-r--r--arch/arm64/include/asm/kprobes.h52
-rw-r--r--arch/arm64/include/asm/kvm_arm.h532
-rw-r--r--arch/arm64/include/asm/kvm_asm.h441
-rw-r--r--arch/arm64/include/asm/kvm_coproc.h56
-rw-r--r--arch/arm64/include/asm/kvm_emulate.h661
-rw-r--r--arch/arm64/include/asm/kvm_host.h1678
-rw-r--r--arch/arm64/include/asm/kvm_hyp.h151
-rw-r--r--arch/arm64/include/asm/kvm_mmio.h59
-rw-r--r--arch/arm64/include/asm/kvm_mmu.h417
-rw-r--r--arch/arm64/include/asm/kvm_mte.h66
-rw-r--r--arch/arm64/include/asm/kvm_nested.h409
-rw-r--r--arch/arm64/include/asm/kvm_pgtable.h881
-rw-r--r--arch/arm64/include/asm/kvm_pkvm.h203
-rw-r--r--arch/arm64/include/asm/kvm_psci.h23
-rw-r--r--arch/arm64/include/asm/kvm_ptrauth.h124
-rw-r--r--arch/arm64/include/asm/kvm_types.h8
-rw-r--r--arch/arm64/include/asm/linkage.h43
-rw-r--r--arch/arm64/include/asm/lse.h37
-rw-r--r--arch/arm64/include/asm/mem_encrypt.h37
-rw-r--r--arch/arm64/include/asm/memblock.h21
-rw-r--r--arch/arm64/include/asm/memory.h430
-rw-r--r--arch/arm64/include/asm/mman.h98
-rw-r--r--arch/arm64/include/asm/mmu.h121
-rw-r--r--arch/arm64/include/asm/mmu_context.h368
-rw-r--r--arch/arm64/include/asm/module.h72
-rw-r--r--arch/arm64/include/asm/module.lds.h27
-rw-r--r--arch/arm64/include/asm/mshyperv.h66
-rw-r--r--arch/arm64/include/asm/mte-def.h18
-rw-r--r--arch/arm64/include/asm/mte-kasan.h264
-rw-r--r--arch/arm64/include/asm/mte.h286
-rw-r--r--arch/arm64/include/asm/neon-intrinsics.h40
-rw-r--r--arch/arm64/include/asm/neon.h19
-rw-r--r--arch/arm64/include/asm/numa.h8
-rw-r--r--arch/arm64/include/asm/page-def.h15
-rw-r--r--arch/arm64/include/asm/page.h67
-rw-r--r--arch/arm64/include/asm/paravirt.h29
-rw-r--r--arch/arm64/include/asm/paravirt_api_clock.h1
-rw-r--r--arch/arm64/include/asm/pci.h24
-rw-r--r--arch/arm64/include/asm/percpu.h278
-rw-r--r--arch/arm64/include/asm/perf_event.h30
-rw-r--r--arch/arm64/include/asm/pgalloc.h119
-rw-r--r--arch/arm64/include/asm/pgtable-2level-hwdef.h43
-rw-r--r--arch/arm64/include/asm/pgtable-2level-types.h60
-rw-r--r--arch/arm64/include/asm/pgtable-3level-hwdef.h50
-rw-r--r--arch/arm64/include/asm/pgtable-3level-types.h66
-rw-r--r--arch/arm64/include/asm/pgtable-hwdef.h269
-rw-r--r--arch/arm64/include/asm/pgtable-prot.h192
-rw-r--r--arch/arm64/include/asm/pgtable-types.h69
-rw-r--r--arch/arm64/include/asm/pgtable.h2027
-rw-r--r--arch/arm64/include/asm/pkeys.h106
-rw-r--r--arch/arm64/include/asm/pmu.h82
-rw-r--r--arch/arm64/include/asm/pointer_auth.h153
-rw-r--r--arch/arm64/include/asm/por.h34
-rw-r--r--arch/arm64/include/asm/preempt.h102
-rw-r--r--arch/arm64/include/asm/probes.h27
-rw-r--r--arch/arm64/include/asm/proc-fns.h37
-rw-r--r--arch/arm64/include/asm/processor.h382
-rw-r--r--arch/arm64/include/asm/prom.h1
-rw-r--r--arch/arm64/include/asm/psci.h38
-rw-r--r--arch/arm64/include/asm/ptdump.h89
-rw-r--r--arch/arm64/include/asm/ptrace.h342
-rw-r--r--arch/arm64/include/asm/pvclock-abi.h17
-rw-r--r--arch/arm64/include/asm/rqspinlock.h93
-rw-r--r--arch/arm64/include/asm/rsi.h70
-rw-r--r--arch/arm64/include/asm/rsi_cmds.h162
-rw-r--r--arch/arm64/include/asm/rsi_smc.h193
-rw-r--r--arch/arm64/include/asm/runtime-const.h88
-rw-r--r--arch/arm64/include/asm/rwonce.h69
-rw-r--r--arch/arm64/include/asm/scs.h60
-rw-r--r--arch/arm64/include/asm/sdei.h53
-rw-r--r--arch/arm64/include/asm/seccomp.h31
-rw-r--r--arch/arm64/include/asm/sections.h32
-rw-r--r--arch/arm64/include/asm/semihost.h24
-rw-r--r--arch/arm64/include/asm/set_memory.h22
-rw-r--r--arch/arm64/include/asm/setup.h44
-rw-r--r--arch/arm64/include/asm/shmparam.h15
-rw-r--r--arch/arm64/include/asm/sigcontext.h31
-rw-r--r--arch/arm64/include/asm/signal.h25
-rw-r--r--arch/arm64/include/asm/signal32.h72
-rw-r--r--arch/arm64/include/asm/simd.h53
-rw-r--r--arch/arm64/include/asm/smp.h156
-rw-r--r--arch/arm64/include/asm/smp_plat.h44
-rw-r--r--arch/arm64/include/asm/sparsemem.h40
-rw-r--r--arch/arm64/include/asm/spectre.h123
-rw-r--r--arch/arm64/include/asm/spinlock.h200
-rw-r--r--arch/arm64/include/asm/spinlock_types.h33
-rw-r--r--arch/arm64/include/asm/stack_pointer.h10
-rw-r--r--arch/arm64/include/asm/stackprotector.h40
-rw-r--r--arch/arm64/include/asm/stacktrace.h131
-rw-r--r--arch/arm64/include/asm/stacktrace/common.h171
-rw-r--r--arch/arm64/include/asm/stacktrace/frame.h48
-rw-r--r--arch/arm64/include/asm/stacktrace/nvhe.h55
-rw-r--r--arch/arm64/include/asm/stage2_pgtable.h33
-rw-r--r--arch/arm64/include/asm/stat.h14
-rw-r--r--arch/arm64/include/asm/string.h62
-rw-r--r--arch/arm64/include/asm/suspend.h54
-rw-r--r--arch/arm64/include/asm/sync_bitops.h17
-rw-r--r--arch/arm64/include/asm/syscall.h117
-rw-r--r--arch/arm64/include/asm/syscall_wrapper.h82
-rw-r--r--arch/arm64/include/asm/syscalls.h30
-rw-r--r--arch/arm64/include/asm/sysreg.h1294
-rw-r--r--arch/arm64/include/asm/system_misc.h39
-rw-r--r--arch/arm64/include/asm/text-patching.h17
-rw-r--r--arch/arm64/include/asm/thread_info.h171
-rw-r--r--arch/arm64/include/asm/timex.h15
-rw-r--r--arch/arm64/include/asm/tlb.h227
-rw-r--r--arch/arm64/include/asm/tlbbatch.h12
-rw-r--r--arch/arm64/include/asm/tlbflush.h616
-rw-r--r--arch/arm64/include/asm/topology.h44
-rw-r--r--arch/arm64/include/asm/trans_pgd.h41
-rw-r--r--arch/arm64/include/asm/traps.h166
-rw-r--r--arch/arm64/include/asm/uaccess.h610
-rw-r--r--arch/arm64/include/asm/ucontext.h30
-rw-r--r--arch/arm64/include/asm/unistd.h31
-rw-r--r--arch/arm64/include/asm/unistd32.h422
-rw-r--r--arch/arm64/include/asm/uprobes.h42
-rw-r--r--arch/arm64/include/asm/vdso.h31
-rw-r--r--arch/arm64/include/asm/vdso/clocksource.h11
-rw-r--r--arch/arm64/include/asm/vdso/compat_barrier.h36
-rw-r--r--arch/arm64/include/asm/vdso/compat_gettimeofday.h166
-rw-r--r--arch/arm64/include/asm/vdso/getrandom.h38
-rw-r--r--arch/arm64/include/asm/vdso/gettimeofday.h107
-rw-r--r--arch/arm64/include/asm/vdso/processor.h17
-rw-r--r--arch/arm64/include/asm/vdso/vsyscall.h27
-rw-r--r--arch/arm64/include/asm/vdso_datapage.h43
-rw-r--r--arch/arm64/include/asm/vectors.h73
-rw-r--r--arch/arm64/include/asm/vermagic.h10
-rw-r--r--arch/arm64/include/asm/virt.h151
-rw-r--r--arch/arm64/include/asm/vmalloc.h74
-rw-r--r--arch/arm64/include/asm/vmap_stack.h25
-rw-r--r--arch/arm64/include/asm/vncr_mapping.h112
-rw-r--r--arch/arm64/include/asm/word-at-a-time.h69
-rw-r--r--arch/arm64/include/asm/xen/events.h9
-rw-r--r--arch/arm64/include/asm/xen/hypercall.h2
-rw-r--r--arch/arm64/include/asm/xen/hypervisor.h2
-rw-r--r--arch/arm64/include/asm/xen/interface.h2
-rw-r--r--arch/arm64/include/asm/xen/page.h8
-rw-r--r--arch/arm64/include/asm/xen/swiotlb-xen.h1
-rw-r--r--arch/arm64/include/asm/xen/xen-ops.h2
-rw-r--r--arch/arm64/include/asm/xor.h73
227 files changed, 28043 insertions, 5392 deletions
diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild
index 79a642d199f2..d2ff8f6c3231 100644
--- a/arch/arm64/include/asm/Kbuild
+++ b/arch/arm64/include/asm/Kbuild
@@ -1,52 +1,20 @@
+# SPDX-License-Identifier: GPL-2.0
+syscall-y += syscall_table_32.h
+syscall-y += syscall_table_64.h
+# arm32 syscall table used by lib/compat_audit.c:
+syscall-y += unistd_32.h
+# same constants with prefixes, used by vdso, seccomp and sigreturn:
+syscall-y += unistd_compat_32.h
-generic-y += bug.h
-generic-y += bugs.h
-generic-y += checksum.h
-generic-y += clkdev.h
-generic-y += cputime.h
-generic-y += current.h
-generic-y += delay.h
-generic-y += div64.h
-generic-y += dma.h
-generic-y += emergency-restart.h
-generic-y += errno.h
-generic-y += ftrace.h
-generic-y += hw_irq.h
-generic-y += ioctl.h
-generic-y += ioctls.h
-generic-y += ipcbuf.h
-generic-y += irq_regs.h
-generic-y += kdebug.h
-generic-y += kmap_types.h
-generic-y += kvm_para.h
-generic-y += local.h
-generic-y += local64.h
-generic-y += mman.h
-generic-y += msgbuf.h
-generic-y += mutex.h
-generic-y += pci.h
-generic-y += percpu.h
-generic-y += poll.h
-generic-y += posix_types.h
-generic-y += resource.h
-generic-y += scatterlist.h
-generic-y += sections.h
-generic-y += segment.h
-generic-y += sembuf.h
-generic-y += serial.h
-generic-y += shmbuf.h
-generic-y += sizes.h
-generic-y += socket.h
-generic-y += sockios.h
-generic-y += switch_to.h
-generic-y += swab.h
-generic-y += termbits.h
-generic-y += termios.h
-generic-y += topology.h
-generic-y += trace_clock.h
-generic-y += types.h
-generic-y += unaligned.h
+generic-y += early_ioremap.h
+generic-y += fprobe.h
+generic-y += mcs_spinlock.h
+generic-y += mmzone.h
+generic-y += qrwlock.h
+generic-y += qspinlock.h
+generic-y += parport.h
generic-y += user.h
-generic-y += vga.h
-generic-y += xor.h
+
+generated-y += cpucap-defs.h
+generated-y += sysreg-defs.h
diff --git a/arch/arm64/include/asm/acenv.h b/arch/arm64/include/asm/acenv.h
new file mode 100644
index 000000000000..f1f810dc9ec8
--- /dev/null
+++ b/arch/arm64/include/asm/acenv.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * ARM64 specific ACPICA environments and implementation
+ *
+ * Copyright (C) 2014, Linaro Ltd.
+ * Author: Hanjun Guo <hanjun.guo@linaro.org>
+ * Author: Graeme Gregory <graeme.gregory@linaro.org>
+ */
+
+#ifndef _ASM_ACENV_H
+#define _ASM_ACENV_H
+
+/* It is required unconditionally by ACPI core, update it when needed. */
+
+#endif /* _ASM_ACENV_H */
diff --git a/arch/arm64/include/asm/acpi.h b/arch/arm64/include/asm/acpi.h
new file mode 100644
index 000000000000..c07a58b96329
--- /dev/null
+++ b/arch/arm64/include/asm/acpi.h
@@ -0,0 +1,191 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2013-2014, Linaro Ltd.
+ * Author: Al Stone <al.stone@linaro.org>
+ * Author: Graeme Gregory <graeme.gregory@linaro.org>
+ * Author: Hanjun Guo <hanjun.guo@linaro.org>
+ */
+
+#ifndef _ASM_ACPI_H
+#define _ASM_ACPI_H
+
+#include <linux/cpuidle.h>
+#include <linux/efi.h>
+#include <linux/memblock.h>
+#include <linux/psci.h>
+#include <linux/stddef.h>
+
+#include <asm/cputype.h>
+#include <asm/io.h>
+#include <asm/ptrace.h>
+#include <asm/smp_plat.h>
+#include <asm/tlbflush.h>
+
+/* Macros for consistency checks of the GICC subtable of MADT */
+
+/*
+ * MADT GICC minimum length refers to the MADT GICC structure table length as
+ * defined in the earliest ACPI version supported on arm64, ie ACPI 5.1.
+ *
+ * The efficiency_class member was added to the
+ * struct acpi_madt_generic_interrupt to represent the MADT GICC structure
+ * "Processor Power Efficiency Class" field, added in ACPI 6.0 whose offset
+ * is therefore used to delimit the MADT GICC structure minimum length
+ * appropriately.
+ */
+#define ACPI_MADT_GICC_MIN_LENGTH offsetof( \
+ struct acpi_madt_generic_interrupt, efficiency_class)
+
+#define BAD_MADT_GICC_ENTRY(entry, end) \
+ (!(entry) || (entry)->header.length < ACPI_MADT_GICC_MIN_LENGTH || \
+ (unsigned long)(entry) + (entry)->header.length > (end))
+
+#define ACPI_MADT_GICC_SPE (offsetof(struct acpi_madt_generic_interrupt, \
+ spe_interrupt) + sizeof(u16))
+
+#define ACPI_MADT_GICC_TRBE (offsetof(struct acpi_madt_generic_interrupt, \
+ trbe_interrupt) + sizeof(u16))
+/*
+ * Arm® Functional Fixed Hardware Specification Version 1.2.
+ * Table 2: Arm Architecture context loss flags
+ */
+#define CPUIDLE_CORE_CTXT BIT(0) /* Core context Lost */
+
+static inline unsigned int arch_get_idle_state_flags(u32 arch_flags)
+{
+ if (arch_flags & CPUIDLE_CORE_CTXT)
+ return CPUIDLE_FLAG_TIMER_STOP;
+
+ return 0;
+}
+#define arch_get_idle_state_flags arch_get_idle_state_flags
+
+#define CPUIDLE_TRACE_CTXT BIT(1) /* Trace context loss */
+#define CPUIDLE_GICR_CTXT BIT(2) /* GICR */
+#define CPUIDLE_GICD_CTXT BIT(3) /* GICD */
+
+/* Basic configuration for ACPI */
+#ifdef CONFIG_ACPI
+pgprot_t __acpi_get_mem_attribute(phys_addr_t addr);
+
+/* ACPI table mapping after acpi_permanent_mmap is set */
+void __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size);
+#define acpi_os_ioremap acpi_os_ioremap
+
+typedef u64 phys_cpuid_t;
+#define PHYS_CPUID_INVALID INVALID_HWID
+
+#define acpi_strict 1 /* No out-of-spec workarounds on ARM64 */
+extern int acpi_disabled;
+extern int acpi_noirq;
+extern int acpi_pci_disabled;
+
+static inline void disable_acpi(void)
+{
+ acpi_disabled = 1;
+ acpi_pci_disabled = 1;
+ acpi_noirq = 1;
+}
+
+static inline void enable_acpi(void)
+{
+ acpi_disabled = 0;
+ acpi_pci_disabled = 0;
+ acpi_noirq = 0;
+}
+
+/*
+ * The ACPI processor driver for ACPI core code needs this macro
+ * to find out this cpu was already mapped (mapping from CPU hardware
+ * ID to CPU logical ID) or not.
+ */
+#define cpu_physical_id(cpu) cpu_logical_map(cpu)
+
+/*
+ * It's used from ACPI core in kdump to boot UP system with SMP kernel,
+ * with this check the ACPI core will not override the CPU index
+ * obtained from GICC with 0 and not print some error message as well.
+ * Since MADT must provide at least one GICC structure for GIC
+ * initialization, CPU will be always available in MADT on ARM64.
+ */
+static inline bool acpi_has_cpu_in_madt(void)
+{
+ return true;
+}
+
+struct acpi_madt_generic_interrupt *acpi_cpu_get_madt_gicc(int cpu);
+static inline u32 get_acpi_id_for_cpu(unsigned int cpu)
+{
+ return acpi_cpu_get_madt_gicc(cpu)->uid;
+}
+
+static inline int get_cpu_for_acpi_id(u32 uid)
+{
+ int cpu;
+
+ for (cpu = 0; cpu < nr_cpu_ids; cpu++)
+ if (acpi_cpu_get_madt_gicc(cpu) &&
+ uid == get_acpi_id_for_cpu(cpu))
+ return cpu;
+
+ return -EINVAL;
+}
+
+static inline void arch_fix_phys_package_id(int num, u32 slot) { }
+void __init acpi_init_cpus(void);
+int apei_claim_sea(struct pt_regs *regs);
+#else
+static inline void acpi_init_cpus(void) { }
+static inline int apei_claim_sea(struct pt_regs *regs) { return -ENOENT; }
+#endif /* CONFIG_ACPI */
+
+#ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL
+bool acpi_parking_protocol_valid(int cpu);
+void __init
+acpi_set_mailbox_entry(int cpu, struct acpi_madt_generic_interrupt *processor);
+#else
+static inline bool acpi_parking_protocol_valid(int cpu) { return false; }
+static inline void
+acpi_set_mailbox_entry(int cpu, struct acpi_madt_generic_interrupt *processor)
+{}
+#endif
+
+static __always_inline const char *acpi_get_enable_method(int cpu)
+{
+ if (acpi_psci_present())
+ return "psci";
+
+ if (acpi_parking_protocol_valid(cpu))
+ return "parking-protocol";
+
+ return NULL;
+}
+
+#ifdef CONFIG_ACPI_APEI
+/*
+ * acpi_disable_cmcff is used in drivers/acpi/apei/hest.c for disabling
+ * IA-32 Architecture Corrected Machine Check (CMC) Firmware-First mode
+ * with a kernel command line parameter "acpi=nocmcoff". But we don't
+ * have this IA-32 specific feature on ARM64, this definition is only
+ * for compatibility.
+ */
+#define acpi_disable_cmcff 1
+static inline pgprot_t arch_apei_get_mem_attribute(phys_addr_t addr)
+{
+ return __acpi_get_mem_attribute(addr);
+}
+#endif /* CONFIG_ACPI_APEI */
+
+#ifdef CONFIG_ACPI_NUMA
+int arm64_acpi_numa_init(void);
+int acpi_numa_get_nid(unsigned int cpu);
+void acpi_map_cpus_to_nodes(void);
+#else
+static inline int arm64_acpi_numa_init(void) { return -ENOSYS; }
+static inline int acpi_numa_get_nid(unsigned int cpu) { return NUMA_NO_NODE; }
+static inline void acpi_map_cpus_to_nodes(void) { }
+#endif /* CONFIG_ACPI_NUMA */
+
+#define ACPI_TABLE_UPGRADE_MAX_PHYS MEMBLOCK_ALLOC_ACCESSIBLE
+
+#endif /*_ASM_ACPI_H*/
diff --git a/arch/arm64/include/asm/alternative-macros.h b/arch/arm64/include/asm/alternative-macros.h
new file mode 100644
index 000000000000..862416624852
--- /dev/null
+++ b/arch/arm64/include/asm/alternative-macros.h
@@ -0,0 +1,268 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_ALTERNATIVE_MACROS_H
+#define __ASM_ALTERNATIVE_MACROS_H
+
+#include <linux/const.h>
+#include <vdso/bits.h>
+
+#include <asm/cpucaps.h>
+#include <asm/insn-def.h>
+
+/*
+ * Binutils 2.27.0 can't handle a 'UL' suffix on constants, so for the assembly
+ * macros below we must use we must use `(1 << ARM64_CB_SHIFT)`.
+ */
+#define ARM64_CB_SHIFT 15
+#define ARM64_CB_BIT BIT(ARM64_CB_SHIFT)
+
+#if ARM64_NCAPS >= ARM64_CB_BIT
+#error "cpucaps have overflown ARM64_CB_BIT"
+#endif
+
+#ifndef __ASSEMBLER__
+
+#include <linux/stringify.h>
+
+#define ALTINSTR_ENTRY(cpucap) \
+ " .word 661b - .\n" /* label */ \
+ " .word 663f - .\n" /* new instruction */ \
+ " .hword " __stringify(cpucap) "\n" /* cpucap */ \
+ " .byte 662b-661b\n" /* source len */ \
+ " .byte 664f-663f\n" /* replacement len */
+
+#define ALTINSTR_ENTRY_CB(cpucap, cb) \
+ " .word 661b - .\n" /* label */ \
+ " .word " __stringify(cb) "- .\n" /* callback */ \
+ " .hword " __stringify(cpucap) "\n" /* cpucap */ \
+ " .byte 662b-661b\n" /* source len */ \
+ " .byte 664f-663f\n" /* replacement len */
+
+/*
+ * alternative assembly primitive:
+ *
+ * If any of these .org directive fail, it means that insn1 and insn2
+ * don't have the same length. This used to be written as
+ *
+ * .if ((664b-663b) != (662b-661b))
+ * .error "Alternatives instruction length mismatch"
+ * .endif
+ *
+ * but most assemblers die if insn1 or insn2 have a .inst. This should
+ * be fixed in a binutils release posterior to 2.25.51.0.2 (anything
+ * containing commit 4e4d08cf7399b606 or c1baaddf8861).
+ *
+ * Alternatives with callbacks do not generate replacement instructions.
+ */
+#define __ALTERNATIVE_CFG(oldinstr, newinstr, cpucap, cfg_enabled) \
+ ".if "__stringify(cfg_enabled)" == 1\n" \
+ "661:\n\t" \
+ oldinstr "\n" \
+ "662:\n" \
+ ".pushsection .altinstructions,\"a\"\n" \
+ ALTINSTR_ENTRY(cpucap) \
+ ".popsection\n" \
+ ".subsection 1\n" \
+ "663:\n\t" \
+ newinstr "\n" \
+ "664:\n\t" \
+ ".org . - (664b-663b) + (662b-661b)\n\t" \
+ ".org . - (662b-661b) + (664b-663b)\n\t" \
+ ".previous\n" \
+ ".endif\n"
+
+#define __ALTERNATIVE_CFG_CB(oldinstr, cpucap, cfg_enabled, cb) \
+ ".if "__stringify(cfg_enabled)" == 1\n" \
+ "661:\n\t" \
+ oldinstr "\n" \
+ "662:\n" \
+ ".pushsection .altinstructions,\"a\"\n" \
+ ALTINSTR_ENTRY_CB(cpucap, cb) \
+ ".popsection\n" \
+ "663:\n\t" \
+ "664:\n\t" \
+ ".endif\n"
+
+#define _ALTERNATIVE_CFG(oldinstr, newinstr, cpucap, cfg, ...) \
+ __ALTERNATIVE_CFG(oldinstr, newinstr, cpucap, IS_ENABLED(cfg))
+
+#define ALTERNATIVE_CB(oldinstr, cpucap, cb) \
+ __ALTERNATIVE_CFG_CB(oldinstr, (1 << ARM64_CB_SHIFT) | (cpucap), 1, cb)
+#else
+
+#include <asm/assembler.h>
+
+.macro altinstruction_entry orig_offset alt_offset cpucap orig_len alt_len
+ .word \orig_offset - .
+ .word \alt_offset - .
+ .hword (\cpucap)
+ .byte \orig_len
+ .byte \alt_len
+.endm
+
+.macro alternative_insn insn1, insn2, cap, enable = 1
+ .if \enable
+661: \insn1
+662: .pushsection .altinstructions, "a"
+ altinstruction_entry 661b, 663f, \cap, 662b-661b, 664f-663f
+ .popsection
+ .subsection 1
+663: \insn2
+664: .org . - (664b-663b) + (662b-661b)
+ .org . - (662b-661b) + (664b-663b)
+ .previous
+ .endif
+.endm
+
+/*
+ * Alternative sequences
+ *
+ * The code for the case where the capability is not present will be
+ * assembled and linked as normal. There are no restrictions on this
+ * code.
+ *
+ * The code for the case where the capability is present will be
+ * assembled into a special section to be used for dynamic patching.
+ * Code for that case must:
+ *
+ * 1. Be exactly the same length (in bytes) as the default code
+ * sequence.
+ *
+ * 2. Not contain a branch target that is used outside of the
+ * alternative sequence it is defined in (branches into an
+ * alternative sequence are not fixed up).
+ */
+
+/*
+ * Begin an alternative code sequence.
+ */
+.macro alternative_if_not cap
+ .set .Lasm_alt_mode, 0
+ .pushsection .altinstructions, "a"
+ altinstruction_entry 661f, 663f, \cap, 662f-661f, 664f-663f
+ .popsection
+661:
+.endm
+
+.macro alternative_if cap
+ .set .Lasm_alt_mode, 1
+ .pushsection .altinstructions, "a"
+ altinstruction_entry 663f, 661f, \cap, 664f-663f, 662f-661f
+ .popsection
+ .subsection 1
+ .align 2 /* So GAS knows label 661 is suitably aligned */
+661:
+.endm
+
+.macro alternative_cb cap, cb
+ .set .Lasm_alt_mode, 0
+ .pushsection .altinstructions, "a"
+ altinstruction_entry 661f, \cb, (1 << ARM64_CB_SHIFT) | \cap, 662f-661f, 0
+ .popsection
+661:
+.endm
+
+/*
+ * Provide the other half of the alternative code sequence.
+ */
+.macro alternative_else
+662:
+ .if .Lasm_alt_mode==0
+ .subsection 1
+ .else
+ .previous
+ .endif
+663:
+.endm
+
+/*
+ * Complete an alternative code sequence.
+ */
+.macro alternative_endif
+664:
+ .org . - (664b-663b) + (662b-661b)
+ .org . - (662b-661b) + (664b-663b)
+ .if .Lasm_alt_mode==0
+ .previous
+ .endif
+.endm
+
+/*
+ * Callback-based alternative epilogue
+ */
+.macro alternative_cb_end
+662:
+.endm
+
+/*
+ * Provides a trivial alternative or default sequence consisting solely
+ * of NOPs. The number of NOPs is chosen automatically to match the
+ * previous case.
+ */
+.macro alternative_else_nop_endif
+alternative_else
+ nops (662b-661b) / AARCH64_INSN_SIZE
+alternative_endif
+.endm
+
+#define _ALTERNATIVE_CFG(insn1, insn2, cap, cfg, ...) \
+ alternative_insn insn1, insn2, cap, IS_ENABLED(cfg)
+
+#endif /* __ASSEMBLER__ */
+
+/*
+ * Usage: asm(ALTERNATIVE(oldinstr, newinstr, cpucap));
+ *
+ * Usage: asm(ALTERNATIVE(oldinstr, newinstr, cpucap, CONFIG_FOO));
+ * N.B. If CONFIG_FOO is specified, but not selected, the whole block
+ * will be omitted, including oldinstr.
+ */
+#define ALTERNATIVE(oldinstr, newinstr, ...) \
+ _ALTERNATIVE_CFG(oldinstr, newinstr, __VA_ARGS__, 1)
+
+#ifndef __ASSEMBLER__
+
+#include <linux/types.h>
+
+static __always_inline bool
+alternative_has_cap_likely(const unsigned long cpucap)
+{
+ if (!cpucap_is_possible(cpucap))
+ return false;
+
+ asm goto(
+#ifdef BUILD_VDSO
+ ALTERNATIVE("b %l[l_no]", "nop", %[cpucap])
+#else
+ ALTERNATIVE_CB("b %l[l_no]", %[cpucap], alt_cb_patch_nops)
+#endif
+ :
+ : [cpucap] "i" (cpucap)
+ :
+ : l_no);
+
+ return true;
+l_no:
+ return false;
+}
+
+static __always_inline bool
+alternative_has_cap_unlikely(const unsigned long cpucap)
+{
+ if (!cpucap_is_possible(cpucap))
+ return false;
+
+ asm goto(
+ ALTERNATIVE("nop", "b %l[l_yes]", %[cpucap])
+ :
+ : [cpucap] "i" (cpucap)
+ :
+ : l_yes);
+
+ return false;
+l_yes:
+ return true;
+}
+
+#endif /* __ASSEMBLER__ */
+
+#endif /* __ASM_ALTERNATIVE_MACROS_H */
diff --git a/arch/arm64/include/asm/alternative.h b/arch/arm64/include/asm/alternative.h
new file mode 100644
index 000000000000..621aa8550174
--- /dev/null
+++ b/arch/arm64/include/asm/alternative.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_ALTERNATIVE_H
+#define __ASM_ALTERNATIVE_H
+
+#include <asm/alternative-macros.h>
+
+#ifndef __ASSEMBLER__
+
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/stddef.h>
+
+struct alt_instr {
+ s32 orig_offset; /* offset to original instruction */
+ s32 alt_offset; /* offset to replacement instruction */
+ u16 cpucap; /* cpucap bit set for replacement */
+ u8 orig_len; /* size of original instruction(s) */
+ u8 alt_len; /* size of new instruction(s), <= orig_len */
+};
+
+typedef void (*alternative_cb_t)(struct alt_instr *alt,
+ __le32 *origptr, __le32 *updptr, int nr_inst);
+
+void __init apply_boot_alternatives(void);
+void __init apply_alternatives_all(void);
+bool alternative_is_applied(u16 cpucap);
+
+#ifdef CONFIG_MODULES
+int apply_alternatives_module(void *start, size_t length);
+#else
+static inline int apply_alternatives_module(void *start, size_t length)
+{
+ return 0;
+}
+#endif
+
+void alt_cb_patch_nops(struct alt_instr *alt, __le32 *origptr,
+ __le32 *updptr, int nr_inst);
+
+#endif /* __ASSEMBLER__ */
+#endif /* __ASM_ALTERNATIVE_H */
diff --git a/arch/arm64/include/asm/apple_m1_pmu.h b/arch/arm64/include/asm/apple_m1_pmu.h
new file mode 100644
index 000000000000..02e05d05851f
--- /dev/null
+++ b/arch/arm64/include/asm/apple_m1_pmu.h
@@ -0,0 +1,65 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#ifndef __ASM_APPLE_M1_PMU_h
+#define __ASM_APPLE_M1_PMU_h
+
+#include <linux/bits.h>
+#include <asm/sysreg.h>
+
+/* Counters */
+#define SYS_IMP_APL_PMC0_EL1 sys_reg(3, 2, 15, 0, 0)
+#define SYS_IMP_APL_PMC1_EL1 sys_reg(3, 2, 15, 1, 0)
+#define SYS_IMP_APL_PMC2_EL1 sys_reg(3, 2, 15, 2, 0)
+#define SYS_IMP_APL_PMC3_EL1 sys_reg(3, 2, 15, 3, 0)
+#define SYS_IMP_APL_PMC4_EL1 sys_reg(3, 2, 15, 4, 0)
+#define SYS_IMP_APL_PMC5_EL1 sys_reg(3, 2, 15, 5, 0)
+#define SYS_IMP_APL_PMC6_EL1 sys_reg(3, 2, 15, 6, 0)
+#define SYS_IMP_APL_PMC7_EL1 sys_reg(3, 2, 15, 7, 0)
+#define SYS_IMP_APL_PMC8_EL1 sys_reg(3, 2, 15, 9, 0)
+#define SYS_IMP_APL_PMC9_EL1 sys_reg(3, 2, 15, 10, 0)
+
+/* Core PMC control register */
+#define SYS_IMP_APL_PMCR0_EL1 sys_reg(3, 1, 15, 0, 0)
+#define PMCR0_CNT_ENABLE_0_7 GENMASK(7, 0)
+#define PMCR0_IMODE GENMASK(10, 8)
+#define PMCR0_IMODE_OFF 0
+#define PMCR0_IMODE_PMI 1
+#define PMCR0_IMODE_AIC 2
+#define PMCR0_IMODE_HALT 3
+#define PMCR0_IMODE_FIQ 4
+#define PMCR0_IACT BIT(11)
+#define PMCR0_PMI_ENABLE_0_7 GENMASK(19, 12)
+#define PMCR0_STOP_CNT_ON_PMI BIT(20)
+#define PMCR0_CNT_GLOB_L2C_EVT BIT(21)
+#define PMCR0_DEFER_PMI_TO_ERET BIT(22)
+#define PMCR0_ALLOW_CNT_EN_EL0 BIT(30)
+#define PMCR0_CNT_ENABLE_8_9 GENMASK(33, 32)
+#define PMCR0_PMI_ENABLE_8_9 GENMASK(45, 44)
+
+#define SYS_IMP_APL_PMCR1_EL1 sys_reg(3, 1, 15, 1, 0)
+#define SYS_IMP_APL_PMCR1_EL12 sys_reg(3, 1, 15, 7, 2)
+#define PMCR1_COUNT_A64_EL0_0_7 GENMASK(15, 8)
+#define PMCR1_COUNT_A64_EL1_0_7 GENMASK(23, 16)
+#define PMCR1_COUNT_A64_EL0_8_9 GENMASK(41, 40)
+#define PMCR1_COUNT_A64_EL1_8_9 GENMASK(49, 48)
+
+#define SYS_IMP_APL_PMCR2_EL1 sys_reg(3, 1, 15, 2, 0)
+#define SYS_IMP_APL_PMCR3_EL1 sys_reg(3, 1, 15, 3, 0)
+#define SYS_IMP_APL_PMCR4_EL1 sys_reg(3, 1, 15, 4, 0)
+
+#define SYS_IMP_APL_PMESR0_EL1 sys_reg(3, 1, 15, 5, 0)
+#define PMESR0_EVT_CNT_2 GENMASK(7, 0)
+#define PMESR0_EVT_CNT_3 GENMASK(15, 8)
+#define PMESR0_EVT_CNT_4 GENMASK(23, 16)
+#define PMESR0_EVT_CNT_5 GENMASK(31, 24)
+
+#define SYS_IMP_APL_PMESR1_EL1 sys_reg(3, 1, 15, 6, 0)
+#define PMESR1_EVT_CNT_6 GENMASK(7, 0)
+#define PMESR1_EVT_CNT_7 GENMASK(15, 8)
+#define PMESR1_EVT_CNT_8 GENMASK(23, 16)
+#define PMESR1_EVT_CNT_9 GENMASK(31, 24)
+
+#define SYS_IMP_APL_PMSR_EL1 sys_reg(3, 1, 15, 13, 0)
+#define PMSR_OVERFLOW GENMASK(9, 0)
+
+#endif /* __ASM_APPLE_M1_PMU_h */
diff --git a/arch/arm64/include/asm/arch_gicv3.h b/arch/arm64/include/asm/arch_gicv3.h
new file mode 100644
index 000000000000..d20b03931a8d
--- /dev/null
+++ b/arch/arm64/include/asm/arch_gicv3.h
@@ -0,0 +1,192 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * arch/arm64/include/asm/arch_gicv3.h
+ *
+ * Copyright (C) 2015 ARM Ltd.
+ */
+#ifndef __ASM_ARCH_GICV3_H
+#define __ASM_ARCH_GICV3_H
+
+#include <asm/sysreg.h>
+
+#ifndef __ASSEMBLER__
+
+#include <linux/irqchip/arm-gic-common.h>
+#include <linux/stringify.h>
+#include <asm/barrier.h>
+#include <asm/cacheflush.h>
+
+#define read_gicreg(r) read_sysreg_s(SYS_ ## r)
+#define write_gicreg(v, r) write_sysreg_s(v, SYS_ ## r)
+
+/*
+ * Low-level accessors
+ *
+ * These system registers are 32 bits, but we make sure that the compiler
+ * sets the GP register's most significant bits to 0 with an explicit cast.
+ */
+
+static __always_inline void gic_write_dir(u32 irq)
+{
+ write_sysreg_s(irq, SYS_ICC_DIR_EL1);
+ isb();
+}
+
+static inline u64 gic_read_iar_common(void)
+{
+ u64 irqstat;
+
+ irqstat = read_sysreg_s(SYS_ICC_IAR1_EL1);
+ dsb(sy);
+ return irqstat;
+}
+
+/*
+ * Cavium ThunderX erratum 23154
+ *
+ * The gicv3 of ThunderX requires a modified version for reading the
+ * IAR status to ensure data synchronization (access to icc_iar1_el1
+ * is not sync'ed before and after).
+ *
+ * Erratum 38545
+ *
+ * When a IAR register read races with a GIC interrupt RELEASE event,
+ * GIC-CPU interface could wrongly return a valid INTID to the CPU
+ * for an interrupt that is already released(non activated) instead of 0x3ff.
+ *
+ * To workaround this, return a valid interrupt ID only if there is a change
+ * in the active priority list after the IAR read.
+ *
+ * Common function used for both the workarounds since,
+ * 1. On Thunderx 88xx 1.x both erratas are applicable.
+ * 2. Having extra nops doesn't add any side effects for Silicons where
+ * erratum 23154 is not applicable.
+ */
+static inline u64 gic_read_iar_cavium_thunderx(void)
+{
+ u64 irqstat, apr;
+
+ apr = read_sysreg_s(SYS_ICC_AP1R0_EL1);
+ nops(8);
+ irqstat = read_sysreg_s(SYS_ICC_IAR1_EL1);
+ nops(4);
+ mb();
+
+ /* Max priority groups implemented is only 32 */
+ if (likely(apr != read_sysreg_s(SYS_ICC_AP1R0_EL1)))
+ return irqstat;
+
+ return 0x3ff;
+}
+
+static u64 __maybe_unused gic_read_iar(void)
+{
+ if (alternative_has_cap_unlikely(ARM64_WORKAROUND_CAVIUM_23154))
+ return gic_read_iar_cavium_thunderx();
+ else
+ return gic_read_iar_common();
+}
+
+static inline void gic_write_ctlr(u32 val)
+{
+ write_sysreg_s(val, SYS_ICC_CTLR_EL1);
+ isb();
+}
+
+static inline u32 gic_read_ctlr(void)
+{
+ return read_sysreg_s(SYS_ICC_CTLR_EL1);
+}
+
+static inline void gic_write_grpen1(u32 val)
+{
+ write_sysreg_s(val, SYS_ICC_IGRPEN1_EL1);
+ isb();
+}
+
+static inline void gic_write_sgi1r(u64 val)
+{
+ write_sysreg_s(val, SYS_ICC_SGI1R_EL1);
+}
+
+static inline u32 gic_read_sre(void)
+{
+ return read_sysreg_s(SYS_ICC_SRE_EL1);
+}
+
+static inline void gic_write_sre(u32 val)
+{
+ write_sysreg_s(val, SYS_ICC_SRE_EL1);
+ isb();
+}
+
+static inline void gic_write_bpr1(u32 val)
+{
+ write_sysreg_s(val, SYS_ICC_BPR1_EL1);
+}
+
+static inline u32 gic_read_pmr(void)
+{
+ return read_sysreg_s(SYS_ICC_PMR_EL1);
+}
+
+static __always_inline void gic_write_pmr(u32 val)
+{
+ write_sysreg_s(val, SYS_ICC_PMR_EL1);
+}
+
+static inline u32 gic_read_rpr(void)
+{
+ return read_sysreg_s(SYS_ICC_RPR_EL1);
+}
+
+#define gic_read_typer(c) readq_relaxed(c)
+#define gic_write_irouter(v, c) writeq_relaxed(v, c)
+#define gic_read_lpir(c) readq_relaxed(c)
+#define gic_write_lpir(v, c) writeq_relaxed(v, c)
+
+#define gic_flush_dcache_to_poc(a,l) \
+ dcache_clean_inval_poc((unsigned long)(a), (unsigned long)(a)+(l))
+
+#define gits_read_baser(c) readq_relaxed(c)
+#define gits_write_baser(v, c) writeq_relaxed(v, c)
+
+#define gits_read_cbaser(c) readq_relaxed(c)
+#define gits_write_cbaser(v, c) writeq_relaxed(v, c)
+
+#define gits_write_cwriter(v, c) writeq_relaxed(v, c)
+
+#define gicr_read_propbaser(c) readq_relaxed(c)
+#define gicr_write_propbaser(v, c) writeq_relaxed(v, c)
+
+#define gicr_write_pendbaser(v, c) writeq_relaxed(v, c)
+#define gicr_read_pendbaser(c) readq_relaxed(c)
+
+#define gicr_write_vpropbaser(v, c) writeq_relaxed(v, c)
+#define gicr_read_vpropbaser(c) readq_relaxed(c)
+
+#define gicr_write_vpendbaser(v, c) writeq_relaxed(v, c)
+#define gicr_read_vpendbaser(c) readq_relaxed(c)
+
+static inline bool gic_prio_masking_enabled(void)
+{
+ return system_uses_irq_prio_masking();
+}
+
+static inline void gic_pmr_mask_irqs(void)
+{
+ gic_write_pmr(GIC_PRIO_IRQOFF);
+}
+
+static inline void gic_arch_enable_irqs(void)
+{
+ asm volatile ("msr daifclr, #3" : : : "memory");
+}
+
+static inline bool gic_has_relaxed_pmr_sync(void)
+{
+ return cpus_have_cap(ARM64_HAS_GIC_PRIO_RELAXED_SYNC);
+}
+
+#endif /* __ASSEMBLER__ */
+#endif /* __ASM_ARCH_GICV3_H */
diff --git a/arch/arm64/include/asm/arch_timer.h b/arch/arm64/include/asm/arch_timer.h
index d56ed11ba9a3..f5794d50f51d 100644
--- a/arch/arm64/include/asm/arch_timer.h
+++ b/arch/arm64/include/asm/arch_timer.h
@@ -1,40 +1,112 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* arch/arm64/include/asm/arch_timer.h
*
* Copyright (C) 2012 ARM Ltd.
* Author: Marc Zyngier <marc.zyngier@arm.com>
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __ASM_ARCH_TIMER_H
#define __ASM_ARCH_TIMER_H
#include <asm/barrier.h>
+#include <asm/hwcap.h>
+#include <asm/sysreg.h>
+#include <linux/bug.h>
#include <linux/init.h>
+#include <linux/jump_label.h>
+#include <linux/percpu.h>
#include <linux/types.h>
#include <clocksource/arm_arch_timer.h>
-static inline void arch_timer_reg_write(int access, int reg, u32 val)
+#if IS_ENABLED(CONFIG_ARM_ARCH_TIMER_OOL_WORKAROUND)
+#define has_erratum_handler(h) \
+ ({ \
+ const struct arch_timer_erratum_workaround *__wa; \
+ __wa = __this_cpu_read(timer_unstable_counter_workaround); \
+ (__wa && __wa->h); \
+ })
+
+#define erratum_handler(h) \
+ ({ \
+ const struct arch_timer_erratum_workaround *__wa; \
+ __wa = __this_cpu_read(timer_unstable_counter_workaround); \
+ (__wa && __wa->h) ? ({ isb(); __wa->h;}) : arch_timer_##h; \
+ })
+
+#else
+#define has_erratum_handler(h) false
+#define erratum_handler(h) (arch_timer_##h)
+#endif
+
+enum arch_timer_erratum_match_type {
+ ate_match_dt,
+ ate_match_local_cap_id,
+ ate_match_acpi_oem_info,
+};
+
+struct clock_event_device;
+
+struct arch_timer_erratum_workaround {
+ enum arch_timer_erratum_match_type match_type;
+ const void *id;
+ const char *desc;
+ u64 (*read_cntpct_el0)(void);
+ u64 (*read_cntvct_el0)(void);
+ int (*set_next_event_phys)(unsigned long, struct clock_event_device *);
+ int (*set_next_event_virt)(unsigned long, struct clock_event_device *);
+ bool disable_compat_vdso;
+};
+
+DECLARE_PER_CPU(const struct arch_timer_erratum_workaround *,
+ timer_unstable_counter_workaround);
+
+static inline notrace u64 arch_timer_read_cntpct_el0(void)
+{
+ u64 cnt;
+
+ asm volatile(ALTERNATIVE("isb\n mrs %0, cntpct_el0",
+ "nop\n" __mrs_s("%0", SYS_CNTPCTSS_EL0),
+ ARM64_HAS_ECV)
+ : "=r" (cnt));
+
+ return cnt;
+}
+
+static inline notrace u64 arch_timer_read_cntvct_el0(void)
+{
+ u64 cnt;
+
+ asm volatile(ALTERNATIVE("isb\n mrs %0, cntvct_el0",
+ "nop\n" __mrs_s("%0", SYS_CNTVCTSS_EL0),
+ ARM64_HAS_ECV)
+ : "=r" (cnt));
+
+ return cnt;
+}
+
+#define arch_timer_reg_read_stable(reg) \
+ ({ \
+ erratum_handler(read_ ## reg)(); \
+ })
+
+/*
+ * These register accessors are marked inline so the compiler can
+ * nicely work out which register we want, and chuck away the rest of
+ * the code.
+ */
+static __always_inline
+void arch_timer_reg_write_cp15(int access, enum arch_timer_reg reg, u64 val)
{
if (access == ARCH_TIMER_PHYS_ACCESS) {
switch (reg) {
case ARCH_TIMER_REG_CTRL:
- asm volatile("msr cntp_ctl_el0, %0" : : "r" (val));
+ write_sysreg(val, cntp_ctl_el0);
+ isb();
break;
- case ARCH_TIMER_REG_TVAL:
- asm volatile("msr cntp_tval_el0, %0" : : "r" (val));
+ case ARCH_TIMER_REG_CVAL:
+ write_sysreg(val, cntp_cval_el0);
break;
default:
BUILD_BUG();
@@ -42,10 +114,11 @@ static inline void arch_timer_reg_write(int access, int reg, u32 val)
} else if (access == ARCH_TIMER_VIRT_ACCESS) {
switch (reg) {
case ARCH_TIMER_REG_CTRL:
- asm volatile("msr cntv_ctl_el0, %0" : : "r" (val));
+ write_sysreg(val, cntv_ctl_el0);
+ isb();
break;
- case ARCH_TIMER_REG_TVAL:
- asm volatile("msr cntv_tval_el0, %0" : : "r" (val));
+ case ARCH_TIMER_REG_CVAL:
+ write_sysreg(val, cntv_cval_el0);
break;
default:
BUILD_BUG();
@@ -53,71 +126,87 @@ static inline void arch_timer_reg_write(int access, int reg, u32 val)
} else {
BUILD_BUG();
}
-
- isb();
}
-static inline u32 arch_timer_reg_read(int access, int reg)
+static __always_inline
+u64 arch_timer_reg_read_cp15(int access, enum arch_timer_reg reg)
{
- u32 val;
-
if (access == ARCH_TIMER_PHYS_ACCESS) {
switch (reg) {
case ARCH_TIMER_REG_CTRL:
- asm volatile("mrs %0, cntp_ctl_el0" : "=r" (val));
- break;
- case ARCH_TIMER_REG_TVAL:
- asm volatile("mrs %0, cntp_tval_el0" : "=r" (val));
- break;
+ return read_sysreg(cntp_ctl_el0);
default:
BUILD_BUG();
}
} else if (access == ARCH_TIMER_VIRT_ACCESS) {
switch (reg) {
case ARCH_TIMER_REG_CTRL:
- asm volatile("mrs %0, cntv_ctl_el0" : "=r" (val));
- break;
- case ARCH_TIMER_REG_TVAL:
- asm volatile("mrs %0, cntv_tval_el0" : "=r" (val));
- break;
+ return read_sysreg(cntv_ctl_el0);
default:
BUILD_BUG();
}
- } else {
- BUILD_BUG();
}
- return val;
+ BUILD_BUG();
+ unreachable();
}
static inline u32 arch_timer_get_cntfrq(void)
{
- u32 val;
- asm volatile("mrs %0, cntfrq_el0" : "=r" (val));
- return val;
+ return read_sysreg(cntfrq_el0);
+}
+
+static inline u32 arch_timer_get_cntkctl(void)
+{
+ return read_sysreg(cntkctl_el1);
+}
+
+static inline void arch_timer_set_cntkctl(u32 cntkctl)
+{
+ write_sysreg(cntkctl, cntkctl_el1);
+ isb();
}
-static inline void __cpuinit arch_counter_set_user_access(void)
+static __always_inline u64 __arch_counter_get_cntpct_stable(void)
{
- u32 cntkctl;
+ u64 cnt;
- /* Disable user access to the timers and the physical counter. */
- asm volatile("mrs %0, cntkctl_el1" : "=r" (cntkctl));
- cntkctl &= ~((3 << 8) | (1 << 0));
+ cnt = arch_timer_reg_read_stable(cntpct_el0);
+ arch_counter_enforce_ordering(cnt);
+ return cnt;
+}
- /* Enable user access to the virtual counter and frequency. */
- cntkctl |= (1 << 1);
- asm volatile("msr cntkctl_el1, %0" : : "r" (cntkctl));
+static __always_inline u64 __arch_counter_get_cntpct(void)
+{
+ u64 cnt;
+
+ asm volatile(ALTERNATIVE("isb\n mrs %0, cntpct_el0",
+ "nop\n" __mrs_s("%0", SYS_CNTPCTSS_EL0),
+ ARM64_HAS_ECV)
+ : "=r" (cnt));
+ arch_counter_enforce_ordering(cnt);
+ return cnt;
}
-static inline u64 arch_counter_get_cntvct(void)
+static __always_inline u64 __arch_counter_get_cntvct_stable(void)
{
- u64 cval;
+ u64 cnt;
- isb();
- asm volatile("mrs %0, cntvct_el0" : "=r" (cval));
+ cnt = arch_timer_reg_read_stable(cntvct_el0);
+ arch_counter_enforce_ordering(cnt);
+ return cnt;
+}
- return cval;
+static __always_inline u64 __arch_counter_get_cntvct(void)
+{
+ u64 cnt;
+
+ asm volatile(ALTERNATIVE("isb\n mrs %0, cntvct_el0",
+ "nop\n" __mrs_s("%0", SYS_CNTVCTSS_EL0),
+ ARM64_HAS_ECV)
+ : "=r" (cnt));
+ arch_counter_enforce_ordering(cnt);
+ return cnt;
}
static inline int arch_timer_arch_init(void)
@@ -125,4 +214,16 @@ static inline int arch_timer_arch_init(void)
return 0;
}
+static inline void arch_timer_set_evtstrm_feature(void)
+{
+ cpu_set_named_feature(EVTSTRM);
+#ifdef CONFIG_COMPAT
+ compat_elf_hwcap |= COMPAT_HWCAP_EVTSTRM;
+#endif
+}
+
+static inline bool arch_timer_have_evtstrm_feature(void)
+{
+ return cpu_have_named_feature(EVTSTRM);
+}
#endif
diff --git a/arch/arm64/include/asm/archrandom.h b/arch/arm64/include/asm/archrandom.h
new file mode 100644
index 000000000000..8babfbe31f95
--- /dev/null
+++ b/arch/arm64/include/asm/archrandom.h
@@ -0,0 +1,132 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_ARCHRANDOM_H
+#define _ASM_ARCHRANDOM_H
+
+#include <linux/arm-smccc.h>
+#include <linux/bug.h>
+#include <linux/kernel.h>
+#include <linux/irqflags.h>
+#include <asm/cpufeature.h>
+
+#define ARM_SMCCC_TRNG_MIN_VERSION 0x10000UL
+
+extern bool smccc_trng_available;
+
+static inline bool __init smccc_probe_trng(void)
+{
+ struct arm_smccc_res res;
+
+ arm_smccc_1_1_invoke(ARM_SMCCC_TRNG_VERSION, &res);
+ if ((s32)res.a0 < 0)
+ return false;
+
+ return res.a0 >= ARM_SMCCC_TRNG_MIN_VERSION;
+}
+
+static inline bool __arm64_rndr(unsigned long *v)
+{
+ bool ok;
+
+ /*
+ * Reads of RNDR set PSTATE.NZCV to 0b0000 on success,
+ * and set PSTATE.NZCV to 0b0100 otherwise.
+ */
+ asm volatile(
+ __mrs_s("%0", SYS_RNDR_EL0) "\n"
+ " cset %w1, ne\n"
+ : "=r" (*v), "=r" (ok)
+ :
+ : "cc");
+
+ return ok;
+}
+
+static inline bool __arm64_rndrrs(unsigned long *v)
+{
+ bool ok;
+
+ /*
+ * Reads of RNDRRS set PSTATE.NZCV to 0b0000 on success,
+ * and set PSTATE.NZCV to 0b0100 otherwise.
+ */
+ asm volatile(
+ __mrs_s("%0", SYS_RNDRRS_EL0) "\n"
+ " cset %w1, ne\n"
+ : "=r" (*v), "=r" (ok)
+ :
+ : "cc");
+
+ return ok;
+}
+
+static __always_inline bool __cpu_has_rng(void)
+{
+ if (unlikely(!system_capabilities_finalized() && !preemptible()))
+ return this_cpu_has_cap(ARM64_HAS_RNG);
+ return alternative_has_cap_unlikely(ARM64_HAS_RNG);
+}
+
+static inline size_t __must_check arch_get_random_longs(unsigned long *v, size_t max_longs)
+{
+ /*
+ * Only support the generic interface after we have detected
+ * the system wide capability, avoiding complexity with the
+ * cpufeature code and with potential scheduling between CPUs
+ * with and without the feature.
+ */
+ if (max_longs && __cpu_has_rng() && __arm64_rndr(v))
+ return 1;
+ return 0;
+}
+
+static inline size_t __must_check arch_get_random_seed_longs(unsigned long *v, size_t max_longs)
+{
+ if (!max_longs)
+ return 0;
+
+ /*
+ * We prefer the SMCCC call, since its semantics (return actual
+ * hardware backed entropy) is closer to the idea behind this
+ * function here than what even the RNDRSS register provides
+ * (the output of a pseudo RNG freshly seeded by a TRNG).
+ */
+ if (smccc_trng_available) {
+ struct arm_smccc_res res;
+
+ max_longs = min_t(size_t, 3, max_longs);
+ arm_smccc_1_1_invoke(ARM_SMCCC_TRNG_RND64, max_longs * 64, &res);
+ if ((int)res.a0 >= 0) {
+ switch (max_longs) {
+ case 3:
+ *v++ = res.a1;
+ fallthrough;
+ case 2:
+ *v++ = res.a2;
+ fallthrough;
+ case 1:
+ *v++ = res.a3;
+ break;
+ }
+ return max_longs;
+ }
+ }
+
+ /*
+ * RNDRRS is not backed by an entropy source but by a DRBG that is
+ * reseeded after each invocation. This is not a 100% fit but good
+ * enough to implement this API if no other entropy source exists.
+ */
+ if (__cpu_has_rng() && __arm64_rndrrs(v))
+ return 1;
+
+ return 0;
+}
+
+static inline bool __init __early_cpu_has_rndr(void)
+{
+ /* Open code as we run prior to the first call to cpufeature. */
+ unsigned long ftr = read_sysreg_s(SYS_ID_AA64ISAR0_EL1);
+ return (ftr >> ID_AA64ISAR0_EL1_RNDR_SHIFT) & 0xf;
+}
+
+#endif /* _ASM_ARCHRANDOM_H */
diff --git a/arch/arm64/include/asm/arm-cci.h b/arch/arm64/include/asm/arm-cci.h
new file mode 100644
index 000000000000..6d4abbd0cc13
--- /dev/null
+++ b/arch/arm64/include/asm/arm-cci.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * arch/arm64/include/asm/arm-cci.h
+ *
+ * Copyright (C) 2015 ARM Ltd.
+ */
+
+#ifndef __ASM_ARM_CCI_H
+#define __ASM_ARM_CCI_H
+
+static inline bool platform_has_secure_cci_access(void)
+{
+ return false;
+}
+
+#endif
diff --git a/arch/arm64/include/asm/arm_dsu_pmu.h b/arch/arm64/include/asm/arm_dsu_pmu.h
new file mode 100644
index 000000000000..16cdedd5f2c5
--- /dev/null
+++ b/arch/arm64/include/asm/arm_dsu_pmu.h
@@ -0,0 +1,126 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * ARM DynamIQ Shared Unit (DSU) PMU Low level register access routines.
+ *
+ * Copyright (C) ARM Limited, 2017.
+ *
+ * Author: Suzuki K Poulose <suzuki.poulose@arm.com>
+ */
+
+#include <linux/bitops.h>
+#include <linux/build_bug.h>
+#include <linux/compiler.h>
+#include <linux/types.h>
+#include <asm/barrier.h>
+#include <asm/sysreg.h>
+
+
+#define CLUSTERPMCR_EL1 sys_reg(3, 0, 15, 5, 0)
+#define CLUSTERPMCNTENSET_EL1 sys_reg(3, 0, 15, 5, 1)
+#define CLUSTERPMCNTENCLR_EL1 sys_reg(3, 0, 15, 5, 2)
+#define CLUSTERPMOVSSET_EL1 sys_reg(3, 0, 15, 5, 3)
+#define CLUSTERPMOVSCLR_EL1 sys_reg(3, 0, 15, 5, 4)
+#define CLUSTERPMSELR_EL1 sys_reg(3, 0, 15, 5, 5)
+#define CLUSTERPMINTENSET_EL1 sys_reg(3, 0, 15, 5, 6)
+#define CLUSTERPMINTENCLR_EL1 sys_reg(3, 0, 15, 5, 7)
+#define CLUSTERPMCCNTR_EL1 sys_reg(3, 0, 15, 6, 0)
+#define CLUSTERPMXEVTYPER_EL1 sys_reg(3, 0, 15, 6, 1)
+#define CLUSTERPMXEVCNTR_EL1 sys_reg(3, 0, 15, 6, 2)
+#define CLUSTERPMMDCR_EL1 sys_reg(3, 0, 15, 6, 3)
+#define CLUSTERPMCEID0_EL1 sys_reg(3, 0, 15, 6, 4)
+#define CLUSTERPMCEID1_EL1 sys_reg(3, 0, 15, 6, 5)
+
+static inline u32 __dsu_pmu_read_pmcr(void)
+{
+ return read_sysreg_s(CLUSTERPMCR_EL1);
+}
+
+static inline void __dsu_pmu_write_pmcr(u32 val)
+{
+ write_sysreg_s(val, CLUSTERPMCR_EL1);
+ isb();
+}
+
+static inline u32 __dsu_pmu_get_reset_overflow(void)
+{
+ u32 val = read_sysreg_s(CLUSTERPMOVSCLR_EL1);
+ /* Clear the bit */
+ write_sysreg_s(val, CLUSTERPMOVSCLR_EL1);
+ isb();
+ return val;
+}
+
+static inline void __dsu_pmu_select_counter(int counter)
+{
+ write_sysreg_s(counter, CLUSTERPMSELR_EL1);
+ isb();
+}
+
+static inline u64 __dsu_pmu_read_counter(int counter)
+{
+ __dsu_pmu_select_counter(counter);
+ return read_sysreg_s(CLUSTERPMXEVCNTR_EL1);
+}
+
+static inline void __dsu_pmu_write_counter(int counter, u64 val)
+{
+ __dsu_pmu_select_counter(counter);
+ write_sysreg_s(val, CLUSTERPMXEVCNTR_EL1);
+ isb();
+}
+
+static inline void __dsu_pmu_set_event(int counter, u32 event)
+{
+ __dsu_pmu_select_counter(counter);
+ write_sysreg_s(event, CLUSTERPMXEVTYPER_EL1);
+ isb();
+}
+
+static inline u64 __dsu_pmu_read_pmccntr(void)
+{
+ return read_sysreg_s(CLUSTERPMCCNTR_EL1);
+}
+
+static inline void __dsu_pmu_write_pmccntr(u64 val)
+{
+ write_sysreg_s(val, CLUSTERPMCCNTR_EL1);
+ isb();
+}
+
+static inline void __dsu_pmu_disable_counter(int counter)
+{
+ write_sysreg_s(BIT(counter), CLUSTERPMCNTENCLR_EL1);
+ isb();
+}
+
+static inline void __dsu_pmu_enable_counter(int counter)
+{
+ write_sysreg_s(BIT(counter), CLUSTERPMCNTENSET_EL1);
+ isb();
+}
+
+static inline void __dsu_pmu_counter_interrupt_enable(int counter)
+{
+ write_sysreg_s(BIT(counter), CLUSTERPMINTENSET_EL1);
+ isb();
+}
+
+static inline void __dsu_pmu_counter_interrupt_disable(int counter)
+{
+ write_sysreg_s(BIT(counter), CLUSTERPMINTENCLR_EL1);
+ isb();
+}
+
+
+static inline u32 __dsu_pmu_read_pmceid(int n)
+{
+ switch (n) {
+ case 0:
+ return read_sysreg_s(CLUSTERPMCEID0_EL1);
+ case 1:
+ return read_sysreg_s(CLUSTERPMCEID1_EL1);
+ default:
+ BUILD_BUG();
+ return 0;
+ }
+}
diff --git a/arch/arm64/include/asm/arm_pmuv3.h b/arch/arm64/include/asm/arm_pmuv3.h
new file mode 100644
index 000000000000..8a777dec8d88
--- /dev/null
+++ b/arch/arm64/include/asm/arm_pmuv3.h
@@ -0,0 +1,191 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2012 ARM Ltd.
+ */
+
+#ifndef __ASM_PMUV3_H
+#define __ASM_PMUV3_H
+
+#include <asm/kvm_host.h>
+
+#include <asm/cpufeature.h>
+#include <asm/sysreg.h>
+
+#define RETURN_READ_PMEVCNTRN(n) \
+ return read_sysreg(pmevcntr##n##_el0)
+static inline unsigned long read_pmevcntrn(int n)
+{
+ PMEVN_SWITCH(n, RETURN_READ_PMEVCNTRN);
+ return 0;
+}
+
+#define WRITE_PMEVCNTRN(n) \
+ write_sysreg(val, pmevcntr##n##_el0)
+static inline void write_pmevcntrn(int n, unsigned long val)
+{
+ PMEVN_SWITCH(n, WRITE_PMEVCNTRN);
+}
+
+#define WRITE_PMEVTYPERN(n) \
+ write_sysreg(val, pmevtyper##n##_el0)
+static inline void write_pmevtypern(int n, unsigned long val)
+{
+ PMEVN_SWITCH(n, WRITE_PMEVTYPERN);
+}
+
+#define RETURN_READ_PMEVTYPERN(n) \
+ return read_sysreg(pmevtyper##n##_el0)
+static inline unsigned long read_pmevtypern(int n)
+{
+ PMEVN_SWITCH(n, RETURN_READ_PMEVTYPERN);
+ return 0;
+}
+
+static inline unsigned long read_pmmir(void)
+{
+ return read_cpuid(PMMIR_EL1);
+}
+
+static inline u32 read_pmuver(void)
+{
+ u64 dfr0 = read_sysreg(id_aa64dfr0_el1);
+
+ return cpuid_feature_extract_unsigned_field(dfr0,
+ ID_AA64DFR0_EL1_PMUVer_SHIFT);
+}
+
+static inline bool pmuv3_has_icntr(void)
+{
+ u64 dfr1 = read_sysreg(id_aa64dfr1_el1);
+
+ return !!cpuid_feature_extract_unsigned_field(dfr1,
+ ID_AA64DFR1_EL1_PMICNTR_SHIFT);
+}
+
+static inline void write_pmcr(u64 val)
+{
+ write_sysreg(val, pmcr_el0);
+}
+
+static inline u64 read_pmcr(void)
+{
+ return read_sysreg(pmcr_el0);
+}
+
+static inline void write_pmselr(u32 val)
+{
+ write_sysreg(val, pmselr_el0);
+}
+
+static inline void write_pmccntr(u64 val)
+{
+ write_sysreg(val, pmccntr_el0);
+}
+
+static inline u64 read_pmccntr(void)
+{
+ return read_sysreg(pmccntr_el0);
+}
+
+static inline void write_pmicntr(u64 val)
+{
+ write_sysreg_s(val, SYS_PMICNTR_EL0);
+}
+
+static inline u64 read_pmicntr(void)
+{
+ return read_sysreg_s(SYS_PMICNTR_EL0);
+}
+
+static inline void write_pmcntenset(u64 val)
+{
+ write_sysreg(val, pmcntenset_el0);
+}
+
+static inline void write_pmcntenclr(u64 val)
+{
+ write_sysreg(val, pmcntenclr_el0);
+}
+
+static inline void write_pmintenset(u64 val)
+{
+ write_sysreg(val, pmintenset_el1);
+}
+
+static inline void write_pmintenclr(u64 val)
+{
+ write_sysreg(val, pmintenclr_el1);
+}
+
+static inline void write_pmccfiltr(u64 val)
+{
+ write_sysreg(val, pmccfiltr_el0);
+}
+
+static inline u64 read_pmccfiltr(void)
+{
+ return read_sysreg(pmccfiltr_el0);
+}
+
+static inline void write_pmicfiltr(u64 val)
+{
+ write_sysreg_s(val, SYS_PMICFILTR_EL0);
+}
+
+static inline u64 read_pmicfiltr(void)
+{
+ return read_sysreg_s(SYS_PMICFILTR_EL0);
+}
+
+static inline void write_pmovsclr(u64 val)
+{
+ write_sysreg(val, pmovsclr_el0);
+}
+
+static inline u64 read_pmovsclr(void)
+{
+ return read_sysreg(pmovsclr_el0);
+}
+
+static inline void write_pmuserenr(u32 val)
+{
+ write_sysreg(val, pmuserenr_el0);
+}
+
+static inline void write_pmuacr(u64 val)
+{
+ write_sysreg_s(val, SYS_PMUACR_EL1);
+}
+
+static inline u64 read_pmceid0(void)
+{
+ return read_sysreg(pmceid0_el0);
+}
+
+static inline u64 read_pmceid1(void)
+{
+ return read_sysreg(pmceid1_el0);
+}
+
+static inline bool pmuv3_implemented(int pmuver)
+{
+ return !(pmuver == ID_AA64DFR0_EL1_PMUVer_IMP_DEF ||
+ pmuver == ID_AA64DFR0_EL1_PMUVer_NI);
+}
+
+static inline bool is_pmuv3p4(int pmuver)
+{
+ return pmuver >= ID_AA64DFR0_EL1_PMUVer_V3P4;
+}
+
+static inline bool is_pmuv3p5(int pmuver)
+{
+ return pmuver >= ID_AA64DFR0_EL1_PMUVer_V3P5;
+}
+
+static inline bool is_pmuv3p9(int pmuver)
+{
+ return pmuver >= ID_AA64DFR0_EL1_PMUVer_V3P9;
+}
+
+#endif
diff --git a/arch/arm64/include/asm/asm-bug.h b/arch/arm64/include/asm/asm-bug.h
new file mode 100644
index 000000000000..a5f13801b784
--- /dev/null
+++ b/arch/arm64/include/asm/asm-bug.h
@@ -0,0 +1,69 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef __ASM_ASM_BUG_H
+/*
+ * Copyright (C) 2017 ARM Limited
+ */
+#define __ASM_ASM_BUG_H
+
+#include <asm/brk-imm.h>
+
+#ifdef CONFIG_DEBUG_BUGVERBOSE
+#define _BUGVERBOSE_LOCATION(file, line) __BUGVERBOSE_LOCATION(file, line)
+#define __BUGVERBOSE_LOCATION(file, line) \
+ .pushsection .rodata.str,"aMS",@progbits,1; \
+ 14472: .string file; \
+ .popsection; \
+ \
+ .long 14472b - .; \
+ .short line;
+#else
+#define _BUGVERBOSE_LOCATION(file, line)
+#endif
+
+#ifdef CONFIG_GENERIC_BUG
+#define __BUG_ENTRY_START \
+ .pushsection __bug_table,"aw"; \
+ .align 2; \
+ 14470: .long 14471f - .; \
+
+#define __BUG_ENTRY_END \
+ .align 2; \
+ .popsection; \
+ 14471:
+
+#define __BUG_ENTRY(flags) \
+ __BUG_ENTRY_START \
+_BUGVERBOSE_LOCATION(__FILE__, __LINE__) \
+ .short flags; \
+ __BUG_ENTRY_END
+#else
+#define __BUG_ENTRY(flags)
+#endif
+
+#define ASM_BUG_FLAGS(flags) \
+ __BUG_ENTRY(flags) \
+ brk BUG_BRK_IMM
+
+#define ASM_BUG() ASM_BUG_FLAGS(0)
+
+#ifdef CONFIG_DEBUG_BUGVERBOSE
+#define __BUG_LOCATION_STRING(file, line) \
+ ".long " file "- .;" \
+ ".short " line ";"
+#else
+#define __BUG_LOCATION_STRING(file, line)
+#endif
+
+#define __BUG_ENTRY_STRING(file, line, flags) \
+ __stringify(__BUG_ENTRY_START) \
+ __BUG_LOCATION_STRING(file, line) \
+ ".short " flags ";" \
+ __stringify(__BUG_ENTRY_END)
+
+#define ARCH_WARN_ASM(file, line, flags, size) \
+ __BUG_ENTRY_STRING(file, line, flags) \
+ __stringify(brk BUG_BRK_IMM)
+
+#define ARCH_WARN_REACHABLE
+
+#endif /* __ASM_ASM_BUG_H */
diff --git a/arch/arm64/include/asm/asm-extable.h b/arch/arm64/include/asm/asm-extable.h
new file mode 100644
index 000000000000..d67e2fdd1aee
--- /dev/null
+++ b/arch/arm64/include/asm/asm-extable.h
@@ -0,0 +1,137 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef __ASM_ASM_EXTABLE_H
+#define __ASM_ASM_EXTABLE_H
+
+#include <linux/bits.h>
+#include <asm/gpr-num.h>
+
+#define EX_TYPE_NONE 0
+#define EX_TYPE_BPF 1
+#define EX_TYPE_UACCESS_ERR_ZERO 2
+#define EX_TYPE_KACCESS_ERR_ZERO 3
+#define EX_TYPE_UACCESS_CPY 4
+#define EX_TYPE_LOAD_UNALIGNED_ZEROPAD 5
+
+/* Data fields for EX_TYPE_UACCESS_ERR_ZERO */
+#define EX_DATA_REG_ERR_SHIFT 0
+#define EX_DATA_REG_ERR GENMASK(4, 0)
+#define EX_DATA_REG_ZERO_SHIFT 5
+#define EX_DATA_REG_ZERO GENMASK(9, 5)
+
+/* Data fields for EX_TYPE_LOAD_UNALIGNED_ZEROPAD */
+#define EX_DATA_REG_DATA_SHIFT 0
+#define EX_DATA_REG_DATA GENMASK(4, 0)
+#define EX_DATA_REG_ADDR_SHIFT 5
+#define EX_DATA_REG_ADDR GENMASK(9, 5)
+
+/* Data fields for EX_TYPE_UACCESS_CPY */
+#define EX_DATA_UACCESS_WRITE BIT(0)
+
+#ifdef __ASSEMBLER__
+
+#define __ASM_EXTABLE_RAW(insn, fixup, type, data) \
+ .pushsection __ex_table, "a"; \
+ .align 2; \
+ .long ((insn) - .); \
+ .long ((fixup) - .); \
+ .short (type); \
+ .short (data); \
+ .popsection;
+
+#define EX_DATA_REG(reg, gpr) \
+ (.L__gpr_num_##gpr << EX_DATA_REG_##reg##_SHIFT)
+
+#define _ASM_EXTABLE_UACCESS_ERR_ZERO(insn, fixup, err, zero) \
+ __ASM_EXTABLE_RAW(insn, fixup, \
+ EX_TYPE_UACCESS_ERR_ZERO, \
+ ( \
+ EX_DATA_REG(ERR, err) | \
+ EX_DATA_REG(ZERO, zero) \
+ ))
+
+#define _ASM_EXTABLE_UACCESS_ERR(insn, fixup, err) \
+ _ASM_EXTABLE_UACCESS_ERR_ZERO(insn, fixup, err, wzr)
+
+#define _ASM_EXTABLE_UACCESS(insn, fixup) \
+ _ASM_EXTABLE_UACCESS_ERR_ZERO(insn, fixup, wzr, wzr)
+
+/*
+ * Create an exception table entry for uaccess `insn`, which will branch to `fixup`
+ * when an unhandled fault is taken.
+ */
+ .macro _asm_extable_uaccess, insn, fixup
+ _ASM_EXTABLE_UACCESS(\insn, \fixup)
+ .endm
+
+/*
+ * Create an exception table entry for `insn` if `fixup` is provided. Otherwise
+ * do nothing.
+ */
+ .macro _cond_uaccess_extable, insn, fixup
+ .ifnc \fixup,
+ _asm_extable_uaccess \insn, \fixup
+ .endif
+ .endm
+
+ .macro _asm_extable_uaccess_cpy, insn, fixup, uaccess_is_write
+ __ASM_EXTABLE_RAW(\insn, \fixup, EX_TYPE_UACCESS_CPY, \uaccess_is_write)
+ .endm
+
+#else /* __ASSEMBLER__ */
+
+#include <linux/stringify.h>
+
+#define __ASM_EXTABLE_RAW(insn, fixup, type, data) \
+ ".pushsection __ex_table, \"a\"\n" \
+ ".align 2\n" \
+ ".long ((" insn ") - .)\n" \
+ ".long ((" fixup ") - .)\n" \
+ ".short (" type ")\n" \
+ ".short (" data ")\n" \
+ ".popsection\n"
+
+#define EX_DATA_REG(reg, gpr) \
+ "((.L__gpr_num_" #gpr ") << " __stringify(EX_DATA_REG_##reg##_SHIFT) ")"
+
+#define _ASM_EXTABLE_UACCESS_ERR_ZERO(insn, fixup, err, zero) \
+ __DEFINE_ASM_GPR_NUMS \
+ __ASM_EXTABLE_RAW(#insn, #fixup, \
+ __stringify(EX_TYPE_UACCESS_ERR_ZERO), \
+ "(" \
+ EX_DATA_REG(ERR, err) " | " \
+ EX_DATA_REG(ZERO, zero) \
+ ")")
+
+#define _ASM_EXTABLE_KACCESS_ERR_ZERO(insn, fixup, err, zero) \
+ __DEFINE_ASM_GPR_NUMS \
+ __ASM_EXTABLE_RAW(#insn, #fixup, \
+ __stringify(EX_TYPE_KACCESS_ERR_ZERO), \
+ "(" \
+ EX_DATA_REG(ERR, err) " | " \
+ EX_DATA_REG(ZERO, zero) \
+ ")")
+
+#define _ASM_EXTABLE_UACCESS_ERR(insn, fixup, err) \
+ _ASM_EXTABLE_UACCESS_ERR_ZERO(insn, fixup, err, wzr)
+
+#define _ASM_EXTABLE_UACCESS(insn, fixup) \
+ _ASM_EXTABLE_UACCESS_ERR_ZERO(insn, fixup, wzr, wzr)
+
+#define _ASM_EXTABLE_KACCESS_ERR(insn, fixup, err) \
+ _ASM_EXTABLE_KACCESS_ERR_ZERO(insn, fixup, err, wzr)
+
+#define _ASM_EXTABLE_KACCESS(insn, fixup) \
+ _ASM_EXTABLE_KACCESS_ERR_ZERO(insn, fixup, wzr, wzr)
+
+#define _ASM_EXTABLE_LOAD_UNALIGNED_ZEROPAD(insn, fixup, data, addr) \
+ __DEFINE_ASM_GPR_NUMS \
+ __ASM_EXTABLE_RAW(#insn, #fixup, \
+ __stringify(EX_TYPE_LOAD_UNALIGNED_ZEROPAD), \
+ "(" \
+ EX_DATA_REG(DATA, data) " | " \
+ EX_DATA_REG(ADDR, addr) \
+ ")")
+
+#endif /* __ASSEMBLER__ */
+
+#endif /* __ASM_ASM_EXTABLE_H */
diff --git a/arch/arm64/include/asm/asm-prototypes.h b/arch/arm64/include/asm/asm-prototypes.h
new file mode 100644
index 000000000000..ec1d9655f885
--- /dev/null
+++ b/arch/arm64/include/asm/asm-prototypes.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_PROTOTYPES_H
+#define __ASM_PROTOTYPES_H
+/*
+ * CONFIG_MODVERSIONS requires a C declaration to generate the appropriate CRC
+ * for each symbol. Since commit:
+ *
+ * 4efca4ed05cbdfd1 ("kbuild: modversions for EXPORT_SYMBOL() for asm")
+ *
+ * ... kbuild will automatically pick these up from <asm/asm-prototypes.h> and
+ * feed this to genksyms when building assembly files.
+ */
+#include <linux/arm-smccc.h>
+
+#include <asm/ftrace.h>
+#include <asm/page.h>
+#include <asm/string.h>
+#include <asm/uaccess.h>
+
+#include <asm-generic/asm-prototypes.h>
+
+long long __ashlti3(long long a, int b);
+long long __ashrti3(long long a, int b);
+long long __lshrti3(long long a, int b);
+
+/*
+ * This function uses a custom calling convention and cannot be called from C so
+ * this prototype is not entirely accurate.
+ */
+void __hwasan_tag_mismatch(unsigned long addr, unsigned long access_info);
+
+#endif /* __ASM_PROTOTYPES_H */
diff --git a/arch/arm64/include/asm/asm-uaccess.h b/arch/arm64/include/asm/asm-uaccess.h
new file mode 100644
index 000000000000..9148f5a31968
--- /dev/null
+++ b/arch/arm64/include/asm/asm-uaccess.h
@@ -0,0 +1,97 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_ASM_UACCESS_H
+#define __ASM_ASM_UACCESS_H
+
+#include <asm/alternative-macros.h>
+#include <asm/asm-extable.h>
+#include <asm/assembler.h>
+#include <asm/kernel-pgtable.h>
+#include <asm/mmu.h>
+#include <asm/sysreg.h>
+
+/*
+ * User access enabling/disabling macros.
+ */
+#ifdef CONFIG_ARM64_SW_TTBR0_PAN
+ .macro __uaccess_ttbr0_disable, tmp1
+ mrs \tmp1, ttbr1_el1 // swapper_pg_dir
+ bic \tmp1, \tmp1, #TTBR_ASID_MASK
+ sub \tmp1, \tmp1, #RESERVED_SWAPPER_OFFSET // reserved_pg_dir
+ msr ttbr0_el1, \tmp1 // set reserved TTBR0_EL1
+ add \tmp1, \tmp1, #RESERVED_SWAPPER_OFFSET
+ msr ttbr1_el1, \tmp1 // set reserved ASID
+ isb
+ .endm
+
+ .macro __uaccess_ttbr0_enable, tmp1, tmp2
+ get_current_task \tmp1
+ ldr \tmp1, [\tmp1, #TSK_TI_TTBR0] // load saved TTBR0_EL1
+ mrs \tmp2, ttbr1_el1
+ extr \tmp2, \tmp2, \tmp1, #48
+ ror \tmp2, \tmp2, #16
+ msr ttbr1_el1, \tmp2 // set the active ASID
+ msr ttbr0_el1, \tmp1 // set the non-PAN TTBR0_EL1
+ isb
+ .endm
+
+ .macro uaccess_ttbr0_disable, tmp1, tmp2
+alternative_if_not ARM64_HAS_PAN
+ save_and_disable_irq \tmp2 // avoid preemption
+ __uaccess_ttbr0_disable \tmp1
+ restore_irq \tmp2
+alternative_else_nop_endif
+ .endm
+
+ .macro uaccess_ttbr0_enable, tmp1, tmp2, tmp3
+alternative_if_not ARM64_HAS_PAN
+ save_and_disable_irq \tmp3 // avoid preemption
+ __uaccess_ttbr0_enable \tmp1, \tmp2
+ restore_irq \tmp3
+alternative_else_nop_endif
+ .endm
+#else
+ .macro uaccess_ttbr0_disable, tmp1, tmp2
+ .endm
+
+ .macro uaccess_ttbr0_enable, tmp1, tmp2, tmp3
+ .endm
+#endif
+
+#define USER(l, x...) \
+9999: x; \
+ _asm_extable_uaccess 9999b, l
+
+#define USER_CPY(l, uaccess_is_write, x...) \
+9999: x; \
+ _asm_extable_uaccess_cpy 9999b, l, uaccess_is_write
+
+/*
+ * Generate the assembly for LDTR/STTR with exception table entries.
+ * This is complicated as there is no post-increment or pair versions of the
+ * unprivileged instructions, and USER() only works for single instructions.
+ */
+ .macro user_ldp l, reg1, reg2, addr, post_inc
+8888: ldtr \reg1, [\addr];
+8889: ldtr \reg2, [\addr, #8];
+ add \addr, \addr, \post_inc;
+
+ _asm_extable_uaccess 8888b, \l;
+ _asm_extable_uaccess 8889b, \l;
+ .endm
+
+ .macro user_stp l, reg1, reg2, addr, post_inc
+8888: sttr \reg1, [\addr];
+8889: sttr \reg2, [\addr, #8];
+ add \addr, \addr, \post_inc;
+
+ _asm_extable_uaccess 8888b,\l;
+ _asm_extable_uaccess 8889b,\l;
+ .endm
+
+ .macro user_ldst l, inst, reg, addr, post_inc
+8888: \inst \reg, [\addr];
+ add \addr, \addr, \post_inc;
+
+ _asm_extable_uaccess 8888b, \l;
+ .endm
+#endif
diff --git a/arch/arm64/include/asm/asm_pointer_auth.h b/arch/arm64/include/asm/asm_pointer_auth.h
new file mode 100644
index 000000000000..13ecc79854ee
--- /dev/null
+++ b/arch/arm64/include/asm/asm_pointer_auth.h
@@ -0,0 +1,92 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_ASM_POINTER_AUTH_H
+#define __ASM_ASM_POINTER_AUTH_H
+
+#include <asm/alternative.h>
+#include <asm/asm-offsets.h>
+#include <asm/cpufeature.h>
+#include <asm/sysreg.h>
+
+#ifdef CONFIG_ARM64_PTR_AUTH_KERNEL
+
+ .macro __ptrauth_keys_install_kernel_nosync tsk, tmp1, tmp2, tmp3
+ mov \tmp1, #THREAD_KEYS_KERNEL
+ add \tmp1, \tsk, \tmp1
+ ldp \tmp2, \tmp3, [\tmp1, #PTRAUTH_KERNEL_KEY_APIA]
+ msr_s SYS_APIAKEYLO_EL1, \tmp2
+ msr_s SYS_APIAKEYHI_EL1, \tmp3
+ .endm
+
+ .macro ptrauth_keys_install_kernel_nosync tsk, tmp1, tmp2, tmp3
+alternative_if ARM64_HAS_ADDRESS_AUTH
+ __ptrauth_keys_install_kernel_nosync \tsk, \tmp1, \tmp2, \tmp3
+alternative_else_nop_endif
+ .endm
+
+ .macro ptrauth_keys_install_kernel tsk, tmp1, tmp2, tmp3
+alternative_if ARM64_HAS_ADDRESS_AUTH
+ __ptrauth_keys_install_kernel_nosync \tsk, \tmp1, \tmp2, \tmp3
+ isb
+alternative_else_nop_endif
+ .endm
+
+#else /* CONFIG_ARM64_PTR_AUTH_KERNEL */
+
+ .macro __ptrauth_keys_install_kernel_nosync tsk, tmp1, tmp2, tmp3
+ .endm
+
+ .macro ptrauth_keys_install_kernel_nosync tsk, tmp1, tmp2, tmp3
+ .endm
+
+ .macro ptrauth_keys_install_kernel tsk, tmp1, tmp2, tmp3
+ .endm
+
+#endif /* CONFIG_ARM64_PTR_AUTH_KERNEL */
+
+#ifdef CONFIG_ARM64_PTR_AUTH
+/*
+ * thread.keys_user.ap* as offset exceeds the #imm offset range
+ * so use the base value of ldp as thread.keys_user and offset as
+ * thread.keys_user.ap*.
+ */
+ .macro __ptrauth_keys_install_user tsk, tmp1, tmp2, tmp3
+ mov \tmp1, #THREAD_KEYS_USER
+ add \tmp1, \tsk, \tmp1
+ ldp \tmp2, \tmp3, [\tmp1, #PTRAUTH_USER_KEY_APIA]
+ msr_s SYS_APIAKEYLO_EL1, \tmp2
+ msr_s SYS_APIAKEYHI_EL1, \tmp3
+ .endm
+
+ .macro __ptrauth_keys_init_cpu tsk, tmp1, tmp2, tmp3
+ mrs \tmp1, id_aa64isar1_el1
+ ubfx \tmp1, \tmp1, #ID_AA64ISAR1_EL1_APA_SHIFT, #8
+ mrs_s \tmp2, SYS_ID_AA64ISAR2_EL1
+ ubfx \tmp2, \tmp2, #ID_AA64ISAR2_EL1_APA3_SHIFT, #4
+ orr \tmp1, \tmp1, \tmp2
+ cbz \tmp1, .Lno_addr_auth\@
+ mov_q \tmp1, (SCTLR_ELx_ENIA | SCTLR_ELx_ENIB | \
+ SCTLR_ELx_ENDA | SCTLR_ELx_ENDB)
+ mrs \tmp2, sctlr_el1
+ orr \tmp2, \tmp2, \tmp1
+ msr sctlr_el1, \tmp2
+ __ptrauth_keys_install_kernel_nosync \tsk, \tmp1, \tmp2, \tmp3
+ isb
+.Lno_addr_auth\@:
+ .endm
+
+ .macro ptrauth_keys_init_cpu tsk, tmp1, tmp2, tmp3
+alternative_if_not ARM64_HAS_ADDRESS_AUTH
+ b .Lno_addr_auth\@
+alternative_else_nop_endif
+ __ptrauth_keys_init_cpu \tsk, \tmp1, \tmp2, \tmp3
+.Lno_addr_auth\@:
+ .endm
+
+#else /* !CONFIG_ARM64_PTR_AUTH */
+
+ .macro ptrauth_keys_install_user tsk, tmp1, tmp2, tmp3
+ .endm
+
+#endif /* CONFIG_ARM64_PTR_AUTH */
+
+#endif /* __ASM_ASM_POINTER_AUTH_H */
diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index 5aceb83b3f5c..f0ca7196f6fa 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -1,117 +1,869 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
- * Based on arch/arm/include/asm/assembler.h
+ * Based on arch/arm/include/asm/assembler.h, arch/arm/mm/proc-macros.S
*
* Copyright (C) 1996-2000 Russell King
* Copyright (C) 2012 ARM Ltd.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#error "Only include this from assembly code"
#endif
+#ifndef __ASM_ASSEMBLER_H
+#define __ASM_ASSEMBLER_H
+
+#include <linux/export.h>
+
+#include <asm/alternative.h>
+#include <asm/asm-bug.h>
+#include <asm/asm-extable.h>
+#include <asm/asm-offsets.h>
+#include <asm/cpufeature.h>
+#include <asm/cputype.h>
+#include <asm/debug-monitors.h>
+#include <asm/page.h>
+#include <asm/pgtable-hwdef.h>
#include <asm/ptrace.h>
+#include <asm/thread_info.h>
+
+ /*
+ * Provide a wxN alias for each wN register so what we can paste a xN
+ * reference after a 'w' to obtain the 32-bit version.
+ */
+ .irp n,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30
+ wx\n .req w\n
+ .endr
+
+ .macro disable_daif
+ msr daifset, #0xf
+ .endm
/*
- * Stack pushing/popping (register pairs only). Equivalent to store decrement
- * before, load increment after.
+ * Save/restore interrupts.
*/
- .macro push, xreg1, xreg2
- stp \xreg1, \xreg2, [sp, #-16]!
+ .macro save_and_disable_daif, flags
+ mrs \flags, daif
+ msr daifset, #0xf
+ .endm
+
+ .macro save_and_disable_irq, flags
+ mrs \flags, daif
+ msr daifset, #3
.endm
- .macro pop, xreg1, xreg2
- ldp \xreg1, \xreg2, [sp], #16
+ .macro restore_irq, flags
+ msr daif, \flags
+ .endm
+
+ .macro disable_step_tsk, flgs, tmp
+ tbz \flgs, #TIF_SINGLESTEP, 9990f
+ mrs \tmp, mdscr_el1
+ bic \tmp, \tmp, #MDSCR_EL1_SS
+ msr mdscr_el1, \tmp
+ isb // Take effect before a subsequent clear of DAIF.D
+9990:
+ .endm
+
+ /* call with daif masked */
+ .macro enable_step_tsk, flgs, tmp
+ tbz \flgs, #TIF_SINGLESTEP, 9990f
+ mrs \tmp, mdscr_el1
+ orr \tmp, \tmp, #MDSCR_EL1_SS
+ msr mdscr_el1, \tmp
+9990:
.endm
/*
- * Enable and disable interrupts.
+ * RAS Error Synchronization barrier
*/
- .macro disable_irq
- msr daifset, #2
+ .macro esb
+#ifdef CONFIG_ARM64_RAS_EXTN
+ hint #16
+#else
+ nop
+#endif
.endm
- .macro enable_irq
- msr daifclr, #2
+/*
+ * Value prediction barrier
+ */
+ .macro csdb
+ hint #20
.endm
/*
- * Save/disable and restore interrupts.
+ * Clear Branch History instruction
*/
- .macro save_and_disable_irqs, olddaif
- mrs \olddaif, daif
- disable_irq
+ .macro clearbhb
+ hint #22
.endm
- .macro restore_irqs, olddaif
- msr daif, \olddaif
+/*
+ * Speculation barrier
+ */
+ .macro sb
+alternative_if_not ARM64_HAS_SB
+ dsb nsh
+ isb
+alternative_else
+ SB_BARRIER_INSN
+ nop
+alternative_endif
.endm
/*
- * Enable and disable debug exceptions.
+ * NOP sequence
*/
- .macro disable_dbg
- msr daifset, #8
+ .macro nops, num
+ .rept \num
+ nop
+ .endr
.endm
- .macro enable_dbg
- msr daifclr, #8
+/*
+ * Register aliases.
+ */
+lr .req x30 // link register
+
+/*
+ * Vector entry
+ */
+ .macro ventry label
+ .align 7
+ b \label
.endm
- .macro disable_step, tmp
- mrs \tmp, mdscr_el1
- bic \tmp, \tmp, #1
- msr mdscr_el1, \tmp
+/*
+ * Select code when configured for BE.
+ */
+#ifdef CONFIG_CPU_BIG_ENDIAN
+#define CPU_BE(code...) code
+#else
+#define CPU_BE(code...)
+#endif
+
+/*
+ * Select code when configured for LE.
+ */
+#ifdef CONFIG_CPU_BIG_ENDIAN
+#define CPU_LE(code...)
+#else
+#define CPU_LE(code...) code
+#endif
+
+/*
+ * Define a macro that constructs a 64-bit value by concatenating two
+ * 32-bit registers. Note that on big endian systems the order of the
+ * registers is swapped.
+ */
+#ifndef CONFIG_CPU_BIG_ENDIAN
+ .macro regs_to_64, rd, lbits, hbits
+#else
+ .macro regs_to_64, rd, hbits, lbits
+#endif
+ orr \rd, \lbits, \hbits, lsl #32
.endm
- .macro enable_step, tmp
- mrs \tmp, mdscr_el1
- orr \tmp, \tmp, #1
- msr mdscr_el1, \tmp
+/*
+ * Pseudo-ops for PC-relative adr/ldr/str <reg>, <symbol> where
+ * <symbol> is within the range +/- 4 GB of the PC.
+ */
+ /*
+ * @dst: destination register (64 bit wide)
+ * @sym: name of the symbol
+ */
+ .macro adr_l, dst, sym
+ adrp \dst, \sym
+ add \dst, \dst, :lo12:\sym
.endm
- .macro enable_dbg_if_not_stepping, tmp
- mrs \tmp, mdscr_el1
- tbnz \tmp, #0, 9990f
- enable_dbg
-9990:
+ /*
+ * @dst: destination register (32 or 64 bit wide)
+ * @sym: name of the symbol
+ * @tmp: optional 64-bit scratch register to be used if <dst> is a
+ * 32-bit wide register, in which case it cannot be used to hold
+ * the address
+ */
+ .macro ldr_l, dst, sym, tmp=
+ .ifb \tmp
+ adrp \dst, \sym
+ ldr \dst, [\dst, :lo12:\sym]
+ .else
+ adrp \tmp, \sym
+ ldr \dst, [\tmp, :lo12:\sym]
+ .endif
+ .endm
+
+ /*
+ * @src: source register (32 or 64 bit wide)
+ * @sym: name of the symbol
+ * @tmp: mandatory 64-bit scratch register to calculate the address
+ * while <src> needs to be preserved.
+ */
+ .macro str_l, src, sym, tmp
+ adrp \tmp, \sym
+ str \src, [\tmp, :lo12:\sym]
+ .endm
+
+ /*
+ * @dst: destination register
+ */
+#if defined(__KVM_NVHE_HYPERVISOR__) || defined(__KVM_VHE_HYPERVISOR__)
+ .macro get_this_cpu_offset, dst
+ mrs \dst, tpidr_el2
+ .endm
+#else
+ .macro get_this_cpu_offset, dst
+alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
+ mrs \dst, tpidr_el1
+alternative_else
+ mrs \dst, tpidr_el2
+alternative_endif
+ .endm
+
+ .macro set_this_cpu_offset, src
+alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
+ msr tpidr_el1, \src
+alternative_else
+ msr tpidr_el2, \src
+alternative_endif
+ .endm
+#endif
+
+ /*
+ * @dst: Result of per_cpu(sym, smp_processor_id()) (can be SP)
+ * @sym: The name of the per-cpu variable
+ * @tmp: scratch register
+ */
+ .macro adr_this_cpu, dst, sym, tmp
+ adrp \tmp, \sym
+ add \dst, \tmp, #:lo12:\sym
+ get_this_cpu_offset \tmp
+ add \dst, \dst, \tmp
+ .endm
+
+ /*
+ * @dst: Result of READ_ONCE(per_cpu(sym, smp_processor_id()))
+ * @sym: The name of the per-cpu variable
+ * @tmp: scratch register
+ */
+ .macro ldr_this_cpu dst, sym, tmp
+ adr_l \dst, \sym
+ get_this_cpu_offset \tmp
+ ldr \dst, [\dst, \tmp]
.endm
/*
- * SMP data memory barrier
+ * read_ctr - read CTR_EL0. If the system has mismatched register fields,
+ * provide the system wide safe value from arm64_ftr_reg_ctrel0.sys_val
*/
- .macro smp_dmb, opt
-#ifdef CONFIG_SMP
- dmb \opt
+ .macro read_ctr, reg
+#ifndef __KVM_NVHE_HYPERVISOR__
+alternative_if_not ARM64_MISMATCHED_CACHE_TYPE
+ mrs \reg, ctr_el0 // read CTR
+ nop
+alternative_else
+ ldr_l \reg, arm64_ftr_reg_ctrel0 + ARM64_FTR_SYSVAL
+alternative_endif
+#else
+alternative_if_not ARM64_KVM_PROTECTED_MODE
+ ASM_BUG()
+alternative_else_nop_endif
+alternative_cb ARM64_ALWAYS_SYSTEM, kvm_compute_final_ctr_el0
+ movz \reg, #0
+ movk \reg, #0, lsl #16
+ movk \reg, #0, lsl #32
+ movk \reg, #0, lsl #48
+alternative_cb_end
#endif
.endm
-#define USER(l, x...) \
-9999: x; \
- .section __ex_table,"a"; \
- .align 3; \
- .quad 9999b,l; \
- .previous
/*
- * Register aliases.
+ * raw_dcache_line_size - get the minimum D-cache line size on this CPU
+ * from the CTR register.
*/
-lr .req x30 // link register
+ .macro raw_dcache_line_size, reg, tmp
+ mrs \tmp, ctr_el0 // read CTR
+ ubfm \tmp, \tmp, #16, #19 // cache line size encoding
+ mov \reg, #4 // bytes per word
+ lsl \reg, \reg, \tmp // actual cache line size
+ .endm
/*
- * Vector entry
+ * dcache_line_size - get the safe D-cache line size across all CPUs
*/
- .macro ventry label
- .align 7
- b \label
+ .macro dcache_line_size, reg, tmp
+ read_ctr \tmp
+ ubfm \tmp, \tmp, #16, #19 // cache line size encoding
+ mov \reg, #4 // bytes per word
+ lsl \reg, \reg, \tmp // actual cache line size
+ .endm
+
+/*
+ * raw_icache_line_size - get the minimum I-cache line size on this CPU
+ * from the CTR register.
+ */
+ .macro raw_icache_line_size, reg, tmp
+ mrs \tmp, ctr_el0 // read CTR
+ and \tmp, \tmp, #0xf // cache line size encoding
+ mov \reg, #4 // bytes per word
+ lsl \reg, \reg, \tmp // actual cache line size
+ .endm
+
+/*
+ * icache_line_size - get the safe I-cache line size across all CPUs
+ */
+ .macro icache_line_size, reg, tmp
+ read_ctr \tmp
+ and \tmp, \tmp, #0xf // cache line size encoding
+ mov \reg, #4 // bytes per word
+ lsl \reg, \reg, \tmp // actual cache line size
+ .endm
+
+/*
+ * tcr_set_t0sz - update TCR.T0SZ so that we can load the ID map
+ */
+ .macro tcr_set_t0sz, valreg, t0sz
+ bfi \valreg, \t0sz, #TCR_EL1_T0SZ_SHIFT, #TCR_EL1_T0SZ_WIDTH
+ .endm
+
+/*
+ * tcr_set_t1sz - update TCR.T1SZ
+ */
+ .macro tcr_set_t1sz, valreg, t1sz
+ bfi \valreg, \t1sz, #TCR_EL1_T1SZ_SHIFT, #TCR_EL1_T1SZ_WIDTH
+ .endm
+
+/*
+ * tcr_compute_pa_size - set TCR.(I)PS to the highest supported
+ * ID_AA64MMFR0_EL1.PARange value
+ *
+ * tcr: register with the TCR_ELx value to be updated
+ * pos: IPS or PS bitfield position
+ * tmp{0,1}: temporary registers
+ */
+ .macro tcr_compute_pa_size, tcr, pos, tmp0, tmp1
+ mrs \tmp0, ID_AA64MMFR0_EL1
+ // Narrow PARange to fit the PS field in TCR_ELx
+ ubfx \tmp0, \tmp0, #ID_AA64MMFR0_EL1_PARANGE_SHIFT, #3
+ mov \tmp1, #ID_AA64MMFR0_EL1_PARANGE_MAX
+#ifdef CONFIG_ARM64_LPA2
+alternative_if_not ARM64_HAS_VA52
+ mov \tmp1, #ID_AA64MMFR0_EL1_PARANGE_48
+alternative_else_nop_endif
+#endif
+ cmp \tmp0, \tmp1
+ csel \tmp0, \tmp1, \tmp0, hi
+ bfi \tcr, \tmp0, \pos, #3
+ .endm
+
+ .macro __dcache_op_workaround_clean_cache, op, addr
+alternative_if_not ARM64_WORKAROUND_CLEAN_CACHE
+ dc \op, \addr
+alternative_else
+ dc civac, \addr
+alternative_endif
+ .endm
+
+/*
+ * Macro to perform a data cache maintenance for the interval
+ * [start, end) with dcache line size explicitly provided.
+ *
+ * op: operation passed to dc instruction
+ * domain: domain used in dsb instruction
+ * start: starting virtual address of the region
+ * end: end virtual address of the region
+ * linesz: dcache line size
+ * fixup: optional label to branch to on user fault
+ * Corrupts: start, end, tmp
+ */
+ .macro dcache_by_myline_op op, domain, start, end, linesz, tmp, fixup
+ sub \tmp, \linesz, #1
+ bic \start, \start, \tmp
+.Ldcache_op\@:
+ .ifc \op, cvau
+ __dcache_op_workaround_clean_cache \op, \start
+ .else
+ .ifc \op, cvac
+ __dcache_op_workaround_clean_cache \op, \start
+ .else
+ .ifc \op, cvap
+ sys 3, c7, c12, 1, \start // dc cvap
+ .else
+ .ifc \op, cvadp
+ sys 3, c7, c13, 1, \start // dc cvadp
+ .else
+ dc \op, \start
+ .endif
+ .endif
+ .endif
+ .endif
+ add \start, \start, \linesz
+ cmp \start, \end
+ b.lo .Ldcache_op\@
+ dsb \domain
+
+ _cond_uaccess_extable .Ldcache_op\@, \fixup
+ .endm
+
+/*
+ * Macro to perform a data cache maintenance for the interval
+ * [start, end)
+ *
+ * op: operation passed to dc instruction
+ * domain: domain used in dsb instruction
+ * start: starting virtual address of the region
+ * end: end virtual address of the region
+ * fixup: optional label to branch to on user fault
+ * Corrupts: start, end, tmp1, tmp2
+ */
+ .macro dcache_by_line_op op, domain, start, end, tmp1, tmp2, fixup
+ dcache_line_size \tmp1, \tmp2
+ dcache_by_myline_op \op, \domain, \start, \end, \tmp1, \tmp2, \fixup
+ .endm
+
+/*
+ * Macro to perform an instruction cache maintenance for the interval
+ * [start, end)
+ *
+ * start, end: virtual addresses describing the region
+ * fixup: optional label to branch to on user fault
+ * Corrupts: tmp1, tmp2
+ */
+ .macro invalidate_icache_by_line start, end, tmp1, tmp2, fixup
+ icache_line_size \tmp1, \tmp2
+ sub \tmp2, \tmp1, #1
+ bic \tmp2, \start, \tmp2
+.Licache_op\@:
+ ic ivau, \tmp2 // invalidate I line PoU
+ add \tmp2, \tmp2, \tmp1
+ cmp \tmp2, \end
+ b.lo .Licache_op\@
+ dsb ish
+ isb
+
+ _cond_uaccess_extable .Licache_op\@, \fixup
+ .endm
+
+/*
+ * load_ttbr1 - install @pgtbl as a TTBR1 page table
+ * pgtbl preserved
+ * tmp1/tmp2 clobbered, either may overlap with pgtbl
+ */
+ .macro load_ttbr1, pgtbl, tmp1, tmp2
+ phys_to_ttbr \tmp1, \pgtbl
+ offset_ttbr1 \tmp1, \tmp2
+ msr ttbr1_el1, \tmp1
+ isb
+ .endm
+
+/*
+ * To prevent the possibility of old and new partial table walks being visible
+ * in the tlb, switch the ttbr to a zero page when we invalidate the old
+ * records. D4.7.1 'General TLB maintenance requirements' in ARM DDI 0487A.i
+ * Even switching to our copied tables will cause a changed output address at
+ * each stage of the walk.
+ */
+ .macro break_before_make_ttbr_switch zero_page, page_table, tmp, tmp2
+ phys_to_ttbr \tmp, \zero_page
+ msr ttbr1_el1, \tmp
+ isb
+ tlbi vmalle1
+ dsb nsh
+ load_ttbr1 \page_table, \tmp, \tmp2
+ .endm
+
+/*
+ * reset_pmuserenr_el0 - reset PMUSERENR_EL0 if PMUv3 present
+ */
+ .macro reset_pmuserenr_el0, tmpreg
+ mrs \tmpreg, id_aa64dfr0_el1
+ ubfx \tmpreg, \tmpreg, #ID_AA64DFR0_EL1_PMUVer_SHIFT, #4
+ cmp \tmpreg, #ID_AA64DFR0_EL1_PMUVer_NI
+ ccmp \tmpreg, #ID_AA64DFR0_EL1_PMUVer_IMP_DEF, #4, ne
+ b.eq 9000f // Skip if no PMU present or IMP_DEF
+ msr pmuserenr_el0, xzr // Disable PMU access from EL0
+9000:
+ .endm
+
+/*
+ * reset_amuserenr_el0 - reset AMUSERENR_EL0 if AMUv1 present
+ */
+ .macro reset_amuserenr_el0, tmpreg
+ mrs \tmpreg, id_aa64pfr0_el1 // Check ID_AA64PFR0_EL1
+ ubfx \tmpreg, \tmpreg, #ID_AA64PFR0_EL1_AMU_SHIFT, #4
+ cbz \tmpreg, .Lskip_\@ // Skip if no AMU present
+ msr_s SYS_AMUSERENR_EL0, xzr // Disable AMU access from EL0
+.Lskip_\@:
+ .endm
+/*
+ * copy_page - copy src to dest using temp registers t1-t8
+ */
+ .macro copy_page dest:req src:req t1:req t2:req t3:req t4:req t5:req t6:req t7:req t8:req
+9998: ldp \t1, \t2, [\src]
+ ldp \t3, \t4, [\src, #16]
+ ldp \t5, \t6, [\src, #32]
+ ldp \t7, \t8, [\src, #48]
+ add \src, \src, #64
+ stnp \t1, \t2, [\dest]
+ stnp \t3, \t4, [\dest, #16]
+ stnp \t5, \t6, [\dest, #32]
+ stnp \t7, \t8, [\dest, #48]
+ add \dest, \dest, #64
+ tst \src, #(PAGE_SIZE - 1)
+ b.ne 9998b
+ .endm
+
+/*
+ * Annotate a function as being unsuitable for kprobes.
+ */
+#ifdef CONFIG_KPROBES
+#define NOKPROBE(x) \
+ .pushsection "_kprobe_blacklist", "aw"; \
+ .quad x; \
+ .popsection;
+#else
+#define NOKPROBE(x)
+#endif
+
+#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
+#define EXPORT_SYMBOL_NOKASAN(name)
+#else
+#define EXPORT_SYMBOL_NOKASAN(name) EXPORT_SYMBOL(name)
+#endif
+
+ /*
+ * Emit a 64-bit absolute little endian symbol reference in a way that
+ * ensures that it will be resolved at build time, even when building a
+ * PIE binary. This requires cooperation from the linker script, which
+ * must emit the lo32/hi32 halves individually.
+ */
+ .macro le64sym, sym
+ .long \sym\()_lo32
+ .long \sym\()_hi32
+ .endm
+
+ /*
+ * mov_q - move an immediate constant into a 64-bit register using
+ * between 2 and 4 movz/movk instructions (depending on the
+ * magnitude and sign of the operand)
+ */
+ .macro mov_q, reg, val
+ .if (((\val) >> 31) == 0 || ((\val) >> 31) == 0x1ffffffff)
+ movz \reg, :abs_g1_s:\val
+ .else
+ .if (((\val) >> 47) == 0 || ((\val) >> 47) == 0x1ffff)
+ movz \reg, :abs_g2_s:\val
+ .else
+ movz \reg, :abs_g3:\val
+ movk \reg, :abs_g2_nc:\val
+ .endif
+ movk \reg, :abs_g1_nc:\val
+ .endif
+ movk \reg, :abs_g0_nc:\val
+ .endm
+
+/*
+ * Return the current task_struct.
+ */
+ .macro get_current_task, rd
+ mrs \rd, sp_el0
+ .endm
+
+/*
+ * If the kernel is built for 52-bit virtual addressing but the hardware only
+ * supports 48 bits, we cannot program the pgdir address into TTBR1 directly,
+ * but we have to add an offset so that the TTBR1 address corresponds with the
+ * pgdir entry that covers the lowest 48-bit addressable VA.
+ *
+ * Note that this trick is only used for LVA/64k pages - LPA2/4k pages uses an
+ * additional paging level, and on LPA2/16k pages, we would end up with a root
+ * level table with only 2 entries, which is suboptimal in terms of TLB
+ * utilization, so there we fall back to 47 bits of translation if LPA2 is not
+ * supported.
+ *
+ * orr is used as it can cover the immediate value (and is idempotent).
+ * ttbr: Value of ttbr to set, modified.
+ */
+ .macro offset_ttbr1, ttbr, tmp
+#if defined(CONFIG_ARM64_VA_BITS_52) && !defined(CONFIG_ARM64_LPA2)
+ mrs \tmp, tcr_el1
+ and \tmp, \tmp, #TCR_EL1_T1SZ_MASK
+ cmp \tmp, #TCR_T1SZ(VA_BITS_MIN)
+ orr \tmp, \ttbr, #TTBR1_BADDR_4852_OFFSET
+ csel \ttbr, \tmp, \ttbr, eq
+#endif
+ .endm
+
+/*
+ * Arrange a physical address in a TTBR register, taking care of 52-bit
+ * addresses.
+ *
+ * phys: physical address, preserved
+ * ttbr: returns the TTBR value
+ */
+ .macro phys_to_ttbr, ttbr, phys
+#ifdef CONFIG_ARM64_PA_BITS_52
+ orr \ttbr, \phys, \phys, lsr #46
+ and \ttbr, \ttbr, #TTBR_BADDR_MASK_52
+#else
+ mov \ttbr, \phys
+#endif
+ .endm
+
+ .macro phys_to_pte, pte, phys
+#ifdef CONFIG_ARM64_PA_BITS_52
+ orr \pte, \phys, \phys, lsr #PTE_ADDR_HIGH_SHIFT
+ and \pte, \pte, #PHYS_TO_PTE_ADDR_MASK
+#else
+ mov \pte, \phys
+#endif
+ .endm
+
+/*
+ * tcr_clear_errata_bits - Clear TCR bits that trigger an errata on this CPU.
+ */
+ .macro tcr_clear_errata_bits, tcr, tmp1, tmp2
+#ifdef CONFIG_FUJITSU_ERRATUM_010001
+ mrs \tmp1, midr_el1
+
+ mov_q \tmp2, MIDR_FUJITSU_ERRATUM_010001_MASK
+ and \tmp1, \tmp1, \tmp2
+ mov_q \tmp2, MIDR_FUJITSU_ERRATUM_010001
+ cmp \tmp1, \tmp2
+ b.ne 10f
+
+ mov_q \tmp2, TCR_CLEAR_FUJITSU_ERRATUM_010001
+ bic \tcr, \tcr, \tmp2
+10:
+#endif /* CONFIG_FUJITSU_ERRATUM_010001 */
+ .endm
+
+/**
+ * Errata workaround prior to disable MMU. Insert an ISB immediately prior
+ * to executing the MSR that will change SCTLR_ELn[M] from a value of 1 to 0.
+ */
+ .macro pre_disable_mmu_workaround
+#ifdef CONFIG_QCOM_FALKOR_ERRATUM_E1041
+ isb
+#endif
+ .endm
+
+ /*
+ * frame_push - Push @regcount callee saved registers to the stack,
+ * starting at x19, as well as x29/x30, and set x29 to
+ * the new value of sp. Add @extra bytes of stack space
+ * for locals.
+ */
+ .macro frame_push, regcount:req, extra
+ __frame st, \regcount, \extra
+ .endm
+
+ /*
+ * frame_pop - Pop the callee saved registers from the stack that were
+ * pushed in the most recent call to frame_push, as well
+ * as x29/x30 and any extra stack space that may have been
+ * allocated.
+ */
+ .macro frame_pop
+ __frame ld
+ .endm
+
+ .macro __frame_regs, reg1, reg2, op, num
+ .if .Lframe_regcount == \num
+ \op\()r \reg1, [sp, #(\num + 1) * 8]
+ .elseif .Lframe_regcount > \num
+ \op\()p \reg1, \reg2, [sp, #(\num + 1) * 8]
+ .endif
+ .endm
+
+ .macro __frame, op, regcount, extra=0
+ .ifc \op, st
+ .if (\regcount) < 0 || (\regcount) > 10
+ .error "regcount should be in the range [0 ... 10]"
+ .endif
+ .if ((\extra) % 16) != 0
+ .error "extra should be a multiple of 16 bytes"
+ .endif
+ .ifdef .Lframe_regcount
+ .if .Lframe_regcount != -1
+ .error "frame_push/frame_pop may not be nested"
+ .endif
+ .endif
+ .set .Lframe_regcount, \regcount
+ .set .Lframe_extra, \extra
+ .set .Lframe_local_offset, ((\regcount + 3) / 2) * 16
+ stp x29, x30, [sp, #-.Lframe_local_offset - .Lframe_extra]!
+ mov x29, sp
+ .endif
+
+ __frame_regs x19, x20, \op, 1
+ __frame_regs x21, x22, \op, 3
+ __frame_regs x23, x24, \op, 5
+ __frame_regs x25, x26, \op, 7
+ __frame_regs x27, x28, \op, 9
+
+ .ifc \op, ld
+ .if .Lframe_regcount == -1
+ .error "frame_push/frame_pop may not be nested"
+ .endif
+ ldp x29, x30, [sp], #.Lframe_local_offset + .Lframe_extra
+ .set .Lframe_regcount, -1
+ .endif
+ .endm
+
+/*
+ * Set SCTLR_ELx to the @reg value, and invalidate the local icache
+ * in the process. This is called when setting the MMU on.
+ */
+.macro set_sctlr, sreg, reg
+ msr \sreg, \reg
+ isb
+ /*
+ * Invalidate the local I-cache so that any instructions fetched
+ * speculatively from the PoC are discarded, since they may have
+ * been dynamically patched at the PoU.
+ */
+ ic iallu
+ dsb nsh
+ isb
+.endm
+
+.macro set_sctlr_el1, reg
+ set_sctlr sctlr_el1, \reg
+.endm
+
+.macro set_sctlr_el2, reg
+ set_sctlr sctlr_el2, \reg
+.endm
+
+ /*
+ * Check whether asm code should yield as soon as it is able. This is
+ * the case if we are currently running in task context, and the
+ * TIF_NEED_RESCHED flag is set. (Note that the TIF_NEED_RESCHED flag
+ * is stored negated in the top word of the thread_info::preempt_count
+ * field)
+ */
+ .macro cond_yield, lbl:req, tmp:req, tmp2
+#ifdef CONFIG_PREEMPT_VOLUNTARY
+ get_current_task \tmp
+ ldr \tmp, [\tmp, #TSK_TI_PREEMPT]
+ /*
+ * If we are serving a softirq, there is no point in yielding: the
+ * softirq will not be preempted no matter what we do, so we should
+ * run to completion as quickly as we can. The preempt_count field will
+ * have BIT(SOFTIRQ_SHIFT) set in this case, so the zero check will
+ * catch this case too.
+ */
+ cbz \tmp, \lbl
+#endif
+ .endm
+
+/*
+ * Branch Target Identifier (BTI)
+ */
+ .macro bti, targets
+ .equ .L__bti_targets_c, 34
+ .equ .L__bti_targets_j, 36
+ .equ .L__bti_targets_jc,38
+ hint #.L__bti_targets_\targets
+ .endm
+
+/*
+ * This macro emits a program property note section identifying
+ * architecture features which require special handling, mainly for
+ * use in assembly files included in the VDSO.
+ */
+
+#define NT_GNU_PROPERTY_TYPE_0 5
+#define GNU_PROPERTY_AARCH64_FEATURE_1_AND 0xc0000000
+
+#define GNU_PROPERTY_AARCH64_FEATURE_1_BTI (1U << 0)
+#define GNU_PROPERTY_AARCH64_FEATURE_1_PAC (1U << 1)
+
+#ifdef CONFIG_ARM64_BTI_KERNEL
+#define GNU_PROPERTY_AARCH64_FEATURE_1_DEFAULT \
+ ((GNU_PROPERTY_AARCH64_FEATURE_1_BTI | \
+ GNU_PROPERTY_AARCH64_FEATURE_1_PAC))
+#endif
+
+#ifdef GNU_PROPERTY_AARCH64_FEATURE_1_DEFAULT
+.macro emit_aarch64_feature_1_and, feat=GNU_PROPERTY_AARCH64_FEATURE_1_DEFAULT
+ .pushsection .note.gnu.property, "a"
+ .align 3
+ .long 2f - 1f
+ .long 6f - 3f
+ .long NT_GNU_PROPERTY_TYPE_0
+1: .string "GNU"
+2:
+ .align 3
+3: .long GNU_PROPERTY_AARCH64_FEATURE_1_AND
+ .long 5f - 4f
+4:
+ /*
+ * This is described with an array of char in the Linux API
+ * spec but the text and all other usage (including binutils,
+ * clang and GCC) treat this as a 32 bit value so no swizzling
+ * is required for big endian.
+ */
+ .long \feat
+5:
+ .align 3
+6:
+ .popsection
+.endm
+
+#else
+.macro emit_aarch64_feature_1_and, feat=0
+.endm
+
+#endif /* GNU_PROPERTY_AARCH64_FEATURE_1_DEFAULT */
+
+ .macro __mitigate_spectre_bhb_loop tmp
+#ifdef CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY
+alternative_cb ARM64_ALWAYS_SYSTEM, spectre_bhb_patch_loop_iter
+ mov \tmp, #32 // Patched to correct the immediate
+alternative_cb_end
+.Lspectre_bhb_loop\@:
+ b . + 4
+ subs \tmp, \tmp, #1
+ b.ne .Lspectre_bhb_loop\@
+ sb
+#endif /* CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY */
+ .endm
+
+ .macro mitigate_spectre_bhb_loop tmp
+#ifdef CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY
+alternative_cb ARM64_ALWAYS_SYSTEM, spectre_bhb_patch_loop_mitigation_enable
+ b .L_spectre_bhb_loop_done\@ // Patched to NOP
+alternative_cb_end
+ __mitigate_spectre_bhb_loop \tmp
+.L_spectre_bhb_loop_done\@:
+#endif /* CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY */
+ .endm
+
+ /* Save/restores x0-x3 to the stack */
+ .macro __mitigate_spectre_bhb_fw
+#ifdef CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY
+ stp x0, x1, [sp, #-16]!
+ stp x2, x3, [sp, #-16]!
+ mov w0, #ARM_SMCCC_ARCH_WORKAROUND_3
+alternative_cb ARM64_ALWAYS_SYSTEM, smccc_patch_fw_mitigation_conduit
+ nop // Patched to SMC/HVC #0
+alternative_cb_end
+ ldp x2, x3, [sp], #16
+ ldp x0, x1, [sp], #16
+#endif /* CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY */
+ .endm
+
+ .macro mitigate_spectre_bhb_clear_insn
+#ifdef CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY
+alternative_cb ARM64_ALWAYS_SYSTEM, spectre_bhb_patch_clearbhb
+ /* Patched to NOP when not supported */
+ clearbhb
+ isb
+alternative_cb_end
+#endif /* CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY */
.endm
+#endif /* __ASM_ASSEMBLER_H */
diff --git a/arch/arm64/include/asm/atomic.h b/arch/arm64/include/asm/atomic.h
index 836364468571..400d279e0f8d 100644
--- a/arch/arm64/include/asm/atomic.h
+++ b/arch/arm64/include/asm/atomic.h
@@ -1,21 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Based on arch/arm/include/asm/atomic.h
*
* Copyright (C) 1996 Russell King.
* Copyright (C) 2002 Deep Blue Solutions Ltd.
* Copyright (C) 2012 ARM Ltd.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __ASM_ATOMIC_H
#define __ASM_ATOMIC_H
@@ -25,281 +14,185 @@
#include <asm/barrier.h>
#include <asm/cmpxchg.h>
+#include <asm/lse.h>
-#define ATOMIC_INIT(i) { (i) }
-
-#ifdef __KERNEL__
-
-/*
- * On ARM, ordinary assignment (str instruction) doesn't clear the local
- * strex/ldrex monitor on some implementations. The reason we can use it for
- * atomic_set() is the clrex or dummy strex done on every exception return.
- */
-#define atomic_read(v) (*(volatile int *)&(v)->counter)
-#define atomic_set(v,i) (((v)->counter) = (i))
-
-/*
- * AArch64 UP and SMP safe atomic ops. We use load exclusive and
- * store exclusive to ensure that these are atomic. We may loop
- * to ensure that the update happens.
- */
-static inline void atomic_add(int i, atomic_t *v)
-{
- unsigned long tmp;
- int result;
-
- asm volatile("// atomic_add\n"
-"1: ldxr %w0, %2\n"
-" add %w0, %w0, %w3\n"
-" stxr %w1, %w0, %2\n"
-" cbnz %w1, 1b"
- : "=&r" (result), "=&r" (tmp), "+Q" (v->counter)
- : "Ir" (i)
- : "cc");
-}
-
-static inline int atomic_add_return(int i, atomic_t *v)
-{
- unsigned long tmp;
- int result;
-
- asm volatile("// atomic_add_return\n"
-"1: ldaxr %w0, %2\n"
-" add %w0, %w0, %w3\n"
-" stlxr %w1, %w0, %2\n"
-" cbnz %w1, 1b"
- : "=&r" (result), "=&r" (tmp), "+Q" (v->counter)
- : "Ir" (i)
- : "cc", "memory");
-
- return result;
+#define ATOMIC_OP(op) \
+static __always_inline void arch_##op(int i, atomic_t *v) \
+{ \
+ __lse_ll_sc_body(op, i, v); \
}
-static inline void atomic_sub(int i, atomic_t *v)
-{
- unsigned long tmp;
- int result;
-
- asm volatile("// atomic_sub\n"
-"1: ldxr %w0, %2\n"
-" sub %w0, %w0, %w3\n"
-" stxr %w1, %w0, %2\n"
-" cbnz %w1, 1b"
- : "=&r" (result), "=&r" (tmp), "+Q" (v->counter)
- : "Ir" (i)
- : "cc");
-}
+ATOMIC_OP(atomic_andnot)
+ATOMIC_OP(atomic_or)
+ATOMIC_OP(atomic_xor)
+ATOMIC_OP(atomic_add)
+ATOMIC_OP(atomic_and)
+ATOMIC_OP(atomic_sub)
-static inline int atomic_sub_return(int i, atomic_t *v)
-{
- unsigned long tmp;
- int result;
-
- asm volatile("// atomic_sub_return\n"
-"1: ldaxr %w0, %2\n"
-" sub %w0, %w0, %w3\n"
-" stlxr %w1, %w0, %2\n"
-" cbnz %w1, 1b"
- : "=&r" (result), "=&r" (tmp), "+Q" (v->counter)
- : "Ir" (i)
- : "cc", "memory");
-
- return result;
-}
+#undef ATOMIC_OP
-static inline int atomic_cmpxchg(atomic_t *ptr, int old, int new)
-{
- unsigned long tmp;
- int oldval;
-
- asm volatile("// atomic_cmpxchg\n"
-"1: ldaxr %w1, %2\n"
-" cmp %w1, %w3\n"
-" b.ne 2f\n"
-" stlxr %w0, %w4, %2\n"
-" cbnz %w0, 1b\n"
-"2:"
- : "=&r" (tmp), "=&r" (oldval), "+Q" (ptr->counter)
- : "Ir" (old), "r" (new)
- : "cc", "memory");
-
- return oldval;
+#define ATOMIC_FETCH_OP(name, op) \
+static __always_inline int arch_##op##name(int i, atomic_t *v) \
+{ \
+ return __lse_ll_sc_body(op##name, i, v); \
}
-static inline void atomic_clear_mask(unsigned long mask, unsigned long *addr)
-{
- unsigned long tmp, tmp2;
-
- asm volatile("// atomic_clear_mask\n"
-"1: ldxr %0, %2\n"
-" bic %0, %0, %3\n"
-" stxr %w1, %0, %2\n"
-" cbnz %w1, 1b"
- : "=&r" (tmp), "=&r" (tmp2), "+Q" (*addr)
- : "Ir" (mask)
- : "cc");
+#define ATOMIC_FETCH_OPS(op) \
+ ATOMIC_FETCH_OP(_relaxed, op) \
+ ATOMIC_FETCH_OP(_acquire, op) \
+ ATOMIC_FETCH_OP(_release, op) \
+ ATOMIC_FETCH_OP( , op)
+
+ATOMIC_FETCH_OPS(atomic_fetch_andnot)
+ATOMIC_FETCH_OPS(atomic_fetch_or)
+ATOMIC_FETCH_OPS(atomic_fetch_xor)
+ATOMIC_FETCH_OPS(atomic_fetch_add)
+ATOMIC_FETCH_OPS(atomic_fetch_and)
+ATOMIC_FETCH_OPS(atomic_fetch_sub)
+ATOMIC_FETCH_OPS(atomic_add_return)
+ATOMIC_FETCH_OPS(atomic_sub_return)
+
+#undef ATOMIC_FETCH_OP
+#undef ATOMIC_FETCH_OPS
+
+#define ATOMIC64_OP(op) \
+static __always_inline void arch_##op(long i, atomic64_t *v) \
+{ \
+ __lse_ll_sc_body(op, i, v); \
}
-#define atomic_xchg(v, new) (xchg(&((v)->counter), new))
+ATOMIC64_OP(atomic64_andnot)
+ATOMIC64_OP(atomic64_or)
+ATOMIC64_OP(atomic64_xor)
+ATOMIC64_OP(atomic64_add)
+ATOMIC64_OP(atomic64_and)
+ATOMIC64_OP(atomic64_sub)
-static inline int __atomic_add_unless(atomic_t *v, int a, int u)
-{
- int c, old;
+#undef ATOMIC64_OP
- c = atomic_read(v);
- while (c != u && (old = atomic_cmpxchg((v), c, c + a)) != c)
- c = old;
- return c;
+#define ATOMIC64_FETCH_OP(name, op) \
+static __always_inline long arch_##op##name(long i, atomic64_t *v) \
+{ \
+ return __lse_ll_sc_body(op##name, i, v); \
}
-#define atomic_inc(v) atomic_add(1, v)
-#define atomic_dec(v) atomic_sub(1, v)
-
-#define atomic_inc_and_test(v) (atomic_add_return(1, v) == 0)
-#define atomic_dec_and_test(v) (atomic_sub_return(1, v) == 0)
-#define atomic_inc_return(v) (atomic_add_return(1, v))
-#define atomic_dec_return(v) (atomic_sub_return(1, v))
-#define atomic_sub_and_test(i, v) (atomic_sub_return(i, v) == 0)
-
-#define atomic_add_negative(i,v) (atomic_add_return(i, v) < 0)
-
-#define smp_mb__before_atomic_dec() smp_mb()
-#define smp_mb__after_atomic_dec() smp_mb()
-#define smp_mb__before_atomic_inc() smp_mb()
-#define smp_mb__after_atomic_inc() smp_mb()
-
-/*
- * 64-bit atomic operations.
- */
-#define ATOMIC64_INIT(i) { (i) }
-
-#define atomic64_read(v) (*(volatile long long *)&(v)->counter)
-#define atomic64_set(v,i) (((v)->counter) = (i))
-
-static inline void atomic64_add(u64 i, atomic64_t *v)
+#define ATOMIC64_FETCH_OPS(op) \
+ ATOMIC64_FETCH_OP(_relaxed, op) \
+ ATOMIC64_FETCH_OP(_acquire, op) \
+ ATOMIC64_FETCH_OP(_release, op) \
+ ATOMIC64_FETCH_OP( , op)
+
+ATOMIC64_FETCH_OPS(atomic64_fetch_andnot)
+ATOMIC64_FETCH_OPS(atomic64_fetch_or)
+ATOMIC64_FETCH_OPS(atomic64_fetch_xor)
+ATOMIC64_FETCH_OPS(atomic64_fetch_add)
+ATOMIC64_FETCH_OPS(atomic64_fetch_and)
+ATOMIC64_FETCH_OPS(atomic64_fetch_sub)
+ATOMIC64_FETCH_OPS(atomic64_add_return)
+ATOMIC64_FETCH_OPS(atomic64_sub_return)
+
+#undef ATOMIC64_FETCH_OP
+#undef ATOMIC64_FETCH_OPS
+
+static __always_inline long arch_atomic64_dec_if_positive(atomic64_t *v)
{
- long result;
- unsigned long tmp;
-
- asm volatile("// atomic64_add\n"
-"1: ldxr %0, %2\n"
-" add %0, %0, %3\n"
-" stxr %w1, %0, %2\n"
-" cbnz %w1, 1b"
- : "=&r" (result), "=&r" (tmp), "+Q" (v->counter)
- : "Ir" (i)
- : "cc");
+ return __lse_ll_sc_body(atomic64_dec_if_positive, v);
}
-static inline long atomic64_add_return(long i, atomic64_t *v)
-{
- long result;
- unsigned long tmp;
-
- asm volatile("// atomic64_add_return\n"
-"1: ldaxr %0, %2\n"
-" add %0, %0, %3\n"
-" stlxr %w1, %0, %2\n"
-" cbnz %w1, 1b"
- : "=&r" (result), "=&r" (tmp), "+Q" (v->counter)
- : "Ir" (i)
- : "cc", "memory");
-
- return result;
-}
+#define arch_atomic_read(v) __READ_ONCE((v)->counter)
+#define arch_atomic_set(v, i) __WRITE_ONCE(((v)->counter), (i))
-static inline void atomic64_sub(u64 i, atomic64_t *v)
-{
- long result;
- unsigned long tmp;
-
- asm volatile("// atomic64_sub\n"
-"1: ldxr %0, %2\n"
-" sub %0, %0, %3\n"
-" stxr %w1, %0, %2\n"
-" cbnz %w1, 1b"
- : "=&r" (result), "=&r" (tmp), "+Q" (v->counter)
- : "Ir" (i)
- : "cc");
-}
+#define arch_atomic_add_return_relaxed arch_atomic_add_return_relaxed
+#define arch_atomic_add_return_acquire arch_atomic_add_return_acquire
+#define arch_atomic_add_return_release arch_atomic_add_return_release
+#define arch_atomic_add_return arch_atomic_add_return
-static inline long atomic64_sub_return(long i, atomic64_t *v)
-{
- long result;
- unsigned long tmp;
-
- asm volatile("// atomic64_sub_return\n"
-"1: ldaxr %0, %2\n"
-" sub %0, %0, %3\n"
-" stlxr %w1, %0, %2\n"
-" cbnz %w1, 1b"
- : "=&r" (result), "=&r" (tmp), "+Q" (v->counter)
- : "Ir" (i)
- : "cc", "memory");
-
- return result;
-}
+#define arch_atomic_sub_return_relaxed arch_atomic_sub_return_relaxed
+#define arch_atomic_sub_return_acquire arch_atomic_sub_return_acquire
+#define arch_atomic_sub_return_release arch_atomic_sub_return_release
+#define arch_atomic_sub_return arch_atomic_sub_return
-static inline long atomic64_cmpxchg(atomic64_t *ptr, long old, long new)
-{
- long oldval;
- unsigned long res;
-
- asm volatile("// atomic64_cmpxchg\n"
-"1: ldaxr %1, %2\n"
-" cmp %1, %3\n"
-" b.ne 2f\n"
-" stlxr %w0, %4, %2\n"
-" cbnz %w0, 1b\n"
-"2:"
- : "=&r" (res), "=&r" (oldval), "+Q" (ptr->counter)
- : "Ir" (old), "r" (new)
- : "cc", "memory");
-
- return oldval;
-}
+#define arch_atomic_fetch_add_relaxed arch_atomic_fetch_add_relaxed
+#define arch_atomic_fetch_add_acquire arch_atomic_fetch_add_acquire
+#define arch_atomic_fetch_add_release arch_atomic_fetch_add_release
+#define arch_atomic_fetch_add arch_atomic_fetch_add
-#define atomic64_xchg(v, new) (xchg(&((v)->counter), new))
+#define arch_atomic_fetch_sub_relaxed arch_atomic_fetch_sub_relaxed
+#define arch_atomic_fetch_sub_acquire arch_atomic_fetch_sub_acquire
+#define arch_atomic_fetch_sub_release arch_atomic_fetch_sub_release
+#define arch_atomic_fetch_sub arch_atomic_fetch_sub
-static inline long atomic64_dec_if_positive(atomic64_t *v)
-{
- long result;
- unsigned long tmp;
-
- asm volatile("// atomic64_dec_if_positive\n"
-"1: ldaxr %0, %2\n"
-" subs %0, %0, #1\n"
-" b.mi 2f\n"
-" stlxr %w1, %0, %2\n"
-" cbnz %w1, 1b\n"
-"2:"
- : "=&r" (result), "=&r" (tmp), "+Q" (v->counter)
- :
- : "cc", "memory");
-
- return result;
-}
+#define arch_atomic_fetch_and_relaxed arch_atomic_fetch_and_relaxed
+#define arch_atomic_fetch_and_acquire arch_atomic_fetch_and_acquire
+#define arch_atomic_fetch_and_release arch_atomic_fetch_and_release
+#define arch_atomic_fetch_and arch_atomic_fetch_and
-static inline int atomic64_add_unless(atomic64_t *v, long a, long u)
-{
- long c, old;
+#define arch_atomic_fetch_andnot_relaxed arch_atomic_fetch_andnot_relaxed
+#define arch_atomic_fetch_andnot_acquire arch_atomic_fetch_andnot_acquire
+#define arch_atomic_fetch_andnot_release arch_atomic_fetch_andnot_release
+#define arch_atomic_fetch_andnot arch_atomic_fetch_andnot
- c = atomic64_read(v);
- while (c != u && (old = atomic64_cmpxchg((v), c, c + a)) != c)
- c = old;
+#define arch_atomic_fetch_or_relaxed arch_atomic_fetch_or_relaxed
+#define arch_atomic_fetch_or_acquire arch_atomic_fetch_or_acquire
+#define arch_atomic_fetch_or_release arch_atomic_fetch_or_release
+#define arch_atomic_fetch_or arch_atomic_fetch_or
- return c != u;
-}
+#define arch_atomic_fetch_xor_relaxed arch_atomic_fetch_xor_relaxed
+#define arch_atomic_fetch_xor_acquire arch_atomic_fetch_xor_acquire
+#define arch_atomic_fetch_xor_release arch_atomic_fetch_xor_release
+#define arch_atomic_fetch_xor arch_atomic_fetch_xor
+
+#define arch_atomic_andnot arch_atomic_andnot
-#define atomic64_add_negative(a, v) (atomic64_add_return((a), (v)) < 0)
-#define atomic64_inc(v) atomic64_add(1LL, (v))
-#define atomic64_inc_return(v) atomic64_add_return(1LL, (v))
-#define atomic64_inc_and_test(v) (atomic64_inc_return(v) == 0)
-#define atomic64_sub_and_test(a, v) (atomic64_sub_return((a), (v)) == 0)
-#define atomic64_dec(v) atomic64_sub(1LL, (v))
-#define atomic64_dec_return(v) atomic64_sub_return(1LL, (v))
-#define atomic64_dec_and_test(v) (atomic64_dec_return((v)) == 0)
-#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1LL, 0LL)
-
-#endif
-#endif
+/*
+ * 64-bit arch_atomic operations.
+ */
+#define ATOMIC64_INIT ATOMIC_INIT
+#define arch_atomic64_read arch_atomic_read
+#define arch_atomic64_set arch_atomic_set
+
+#define arch_atomic64_add_return_relaxed arch_atomic64_add_return_relaxed
+#define arch_atomic64_add_return_acquire arch_atomic64_add_return_acquire
+#define arch_atomic64_add_return_release arch_atomic64_add_return_release
+#define arch_atomic64_add_return arch_atomic64_add_return
+
+#define arch_atomic64_sub_return_relaxed arch_atomic64_sub_return_relaxed
+#define arch_atomic64_sub_return_acquire arch_atomic64_sub_return_acquire
+#define arch_atomic64_sub_return_release arch_atomic64_sub_return_release
+#define arch_atomic64_sub_return arch_atomic64_sub_return
+
+#define arch_atomic64_fetch_add_relaxed arch_atomic64_fetch_add_relaxed
+#define arch_atomic64_fetch_add_acquire arch_atomic64_fetch_add_acquire
+#define arch_atomic64_fetch_add_release arch_atomic64_fetch_add_release
+#define arch_atomic64_fetch_add arch_atomic64_fetch_add
+
+#define arch_atomic64_fetch_sub_relaxed arch_atomic64_fetch_sub_relaxed
+#define arch_atomic64_fetch_sub_acquire arch_atomic64_fetch_sub_acquire
+#define arch_atomic64_fetch_sub_release arch_atomic64_fetch_sub_release
+#define arch_atomic64_fetch_sub arch_atomic64_fetch_sub
+
+#define arch_atomic64_fetch_and_relaxed arch_atomic64_fetch_and_relaxed
+#define arch_atomic64_fetch_and_acquire arch_atomic64_fetch_and_acquire
+#define arch_atomic64_fetch_and_release arch_atomic64_fetch_and_release
+#define arch_atomic64_fetch_and arch_atomic64_fetch_and
+
+#define arch_atomic64_fetch_andnot_relaxed arch_atomic64_fetch_andnot_relaxed
+#define arch_atomic64_fetch_andnot_acquire arch_atomic64_fetch_andnot_acquire
+#define arch_atomic64_fetch_andnot_release arch_atomic64_fetch_andnot_release
+#define arch_atomic64_fetch_andnot arch_atomic64_fetch_andnot
+
+#define arch_atomic64_fetch_or_relaxed arch_atomic64_fetch_or_relaxed
+#define arch_atomic64_fetch_or_acquire arch_atomic64_fetch_or_acquire
+#define arch_atomic64_fetch_or_release arch_atomic64_fetch_or_release
+#define arch_atomic64_fetch_or arch_atomic64_fetch_or
+
+#define arch_atomic64_fetch_xor_relaxed arch_atomic64_fetch_xor_relaxed
+#define arch_atomic64_fetch_xor_acquire arch_atomic64_fetch_xor_acquire
+#define arch_atomic64_fetch_xor_release arch_atomic64_fetch_xor_release
+#define arch_atomic64_fetch_xor arch_atomic64_fetch_xor
+
+#define arch_atomic64_andnot arch_atomic64_andnot
+
+#define arch_atomic64_dec_if_positive arch_atomic64_dec_if_positive
+
+#endif /* __ASM_ATOMIC_H */
diff --git a/arch/arm64/include/asm/atomic_ll_sc.h b/arch/arm64/include/asm/atomic_ll_sc.h
new file mode 100644
index 000000000000..89d2ba272359
--- /dev/null
+++ b/arch/arm64/include/asm/atomic_ll_sc.h
@@ -0,0 +1,339 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Based on arch/arm/include/asm/atomic.h
+ *
+ * Copyright (C) 1996 Russell King.
+ * Copyright (C) 2002 Deep Blue Solutions Ltd.
+ * Copyright (C) 2012 ARM Ltd.
+ */
+
+#ifndef __ASM_ATOMIC_LL_SC_H
+#define __ASM_ATOMIC_LL_SC_H
+
+#include <linux/stringify.h>
+
+#ifndef CONFIG_CC_HAS_K_CONSTRAINT
+#define K
+#endif
+
+/*
+ * AArch64 UP and SMP safe atomic ops. We use load exclusive and
+ * store exclusive to ensure that these are atomic. We may loop
+ * to ensure that the update happens.
+ */
+
+#define ATOMIC_OP(op, asm_op, constraint) \
+static __always_inline void \
+__ll_sc_atomic_##op(int i, atomic_t *v) \
+{ \
+ unsigned long tmp; \
+ int result; \
+ \
+ asm volatile("// atomic_" #op "\n" \
+ " prfm pstl1strm, %2\n" \
+ "1: ldxr %w0, %2\n" \
+ " " #asm_op " %w0, %w0, %w3\n" \
+ " stxr %w1, %w0, %2\n" \
+ " cbnz %w1, 1b\n" \
+ : "=&r" (result), "=&r" (tmp), "+Q" (v->counter) \
+ : __stringify(constraint) "r" (i)); \
+}
+
+#define ATOMIC_OP_RETURN(name, mb, acq, rel, cl, op, asm_op, constraint)\
+static __always_inline int \
+__ll_sc_atomic_##op##_return##name(int i, atomic_t *v) \
+{ \
+ unsigned long tmp; \
+ int result; \
+ \
+ asm volatile("// atomic_" #op "_return" #name "\n" \
+ " prfm pstl1strm, %2\n" \
+ "1: ld" #acq "xr %w0, %2\n" \
+ " " #asm_op " %w0, %w0, %w3\n" \
+ " st" #rel "xr %w1, %w0, %2\n" \
+ " cbnz %w1, 1b\n" \
+ " " #mb \
+ : "=&r" (result), "=&r" (tmp), "+Q" (v->counter) \
+ : __stringify(constraint) "r" (i) \
+ : cl); \
+ \
+ return result; \
+}
+
+#define ATOMIC_FETCH_OP(name, mb, acq, rel, cl, op, asm_op, constraint) \
+static __always_inline int \
+__ll_sc_atomic_fetch_##op##name(int i, atomic_t *v) \
+{ \
+ unsigned long tmp; \
+ int val, result; \
+ \
+ asm volatile("// atomic_fetch_" #op #name "\n" \
+ " prfm pstl1strm, %3\n" \
+ "1: ld" #acq "xr %w0, %3\n" \
+ " " #asm_op " %w1, %w0, %w4\n" \
+ " st" #rel "xr %w2, %w1, %3\n" \
+ " cbnz %w2, 1b\n" \
+ " " #mb \
+ : "=&r" (result), "=&r" (val), "=&r" (tmp), "+Q" (v->counter) \
+ : __stringify(constraint) "r" (i) \
+ : cl); \
+ \
+ return result; \
+}
+
+#define ATOMIC_OPS(...) \
+ ATOMIC_OP(__VA_ARGS__) \
+ ATOMIC_OP_RETURN( , dmb ish, , l, "memory", __VA_ARGS__)\
+ ATOMIC_OP_RETURN(_relaxed, , , , , __VA_ARGS__)\
+ ATOMIC_OP_RETURN(_acquire, , a, , "memory", __VA_ARGS__)\
+ ATOMIC_OP_RETURN(_release, , , l, "memory", __VA_ARGS__)\
+ ATOMIC_FETCH_OP ( , dmb ish, , l, "memory", __VA_ARGS__)\
+ ATOMIC_FETCH_OP (_relaxed, , , , , __VA_ARGS__)\
+ ATOMIC_FETCH_OP (_acquire, , a, , "memory", __VA_ARGS__)\
+ ATOMIC_FETCH_OP (_release, , , l, "memory", __VA_ARGS__)
+
+ATOMIC_OPS(add, add, I)
+ATOMIC_OPS(sub, sub, J)
+
+#undef ATOMIC_OPS
+#define ATOMIC_OPS(...) \
+ ATOMIC_OP(__VA_ARGS__) \
+ ATOMIC_FETCH_OP ( , dmb ish, , l, "memory", __VA_ARGS__)\
+ ATOMIC_FETCH_OP (_relaxed, , , , , __VA_ARGS__)\
+ ATOMIC_FETCH_OP (_acquire, , a, , "memory", __VA_ARGS__)\
+ ATOMIC_FETCH_OP (_release, , , l, "memory", __VA_ARGS__)
+
+ATOMIC_OPS(and, and, K)
+ATOMIC_OPS(or, orr, K)
+ATOMIC_OPS(xor, eor, K)
+/*
+ * GAS converts the mysterious and undocumented BIC (immediate) alias to
+ * an AND (immediate) instruction with the immediate inverted. We don't
+ * have a constraint for this, so fall back to register.
+ */
+ATOMIC_OPS(andnot, bic, )
+
+#undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP
+#undef ATOMIC_OP_RETURN
+#undef ATOMIC_OP
+
+#define ATOMIC64_OP(op, asm_op, constraint) \
+static __always_inline void \
+__ll_sc_atomic64_##op(s64 i, atomic64_t *v) \
+{ \
+ s64 result; \
+ unsigned long tmp; \
+ \
+ asm volatile("// atomic64_" #op "\n" \
+ " prfm pstl1strm, %2\n" \
+ "1: ldxr %0, %2\n" \
+ " " #asm_op " %0, %0, %3\n" \
+ " stxr %w1, %0, %2\n" \
+ " cbnz %w1, 1b" \
+ : "=&r" (result), "=&r" (tmp), "+Q" (v->counter) \
+ : __stringify(constraint) "r" (i)); \
+}
+
+#define ATOMIC64_OP_RETURN(name, mb, acq, rel, cl, op, asm_op, constraint)\
+static __always_inline long \
+__ll_sc_atomic64_##op##_return##name(s64 i, atomic64_t *v) \
+{ \
+ s64 result; \
+ unsigned long tmp; \
+ \
+ asm volatile("// atomic64_" #op "_return" #name "\n" \
+ " prfm pstl1strm, %2\n" \
+ "1: ld" #acq "xr %0, %2\n" \
+ " " #asm_op " %0, %0, %3\n" \
+ " st" #rel "xr %w1, %0, %2\n" \
+ " cbnz %w1, 1b\n" \
+ " " #mb \
+ : "=&r" (result), "=&r" (tmp), "+Q" (v->counter) \
+ : __stringify(constraint) "r" (i) \
+ : cl); \
+ \
+ return result; \
+}
+
+#define ATOMIC64_FETCH_OP(name, mb, acq, rel, cl, op, asm_op, constraint)\
+static __always_inline long \
+__ll_sc_atomic64_fetch_##op##name(s64 i, atomic64_t *v) \
+{ \
+ s64 result, val; \
+ unsigned long tmp; \
+ \
+ asm volatile("// atomic64_fetch_" #op #name "\n" \
+ " prfm pstl1strm, %3\n" \
+ "1: ld" #acq "xr %0, %3\n" \
+ " " #asm_op " %1, %0, %4\n" \
+ " st" #rel "xr %w2, %1, %3\n" \
+ " cbnz %w2, 1b\n" \
+ " " #mb \
+ : "=&r" (result), "=&r" (val), "=&r" (tmp), "+Q" (v->counter) \
+ : __stringify(constraint) "r" (i) \
+ : cl); \
+ \
+ return result; \
+}
+
+#define ATOMIC64_OPS(...) \
+ ATOMIC64_OP(__VA_ARGS__) \
+ ATOMIC64_OP_RETURN(, dmb ish, , l, "memory", __VA_ARGS__) \
+ ATOMIC64_OP_RETURN(_relaxed,, , , , __VA_ARGS__) \
+ ATOMIC64_OP_RETURN(_acquire,, a, , "memory", __VA_ARGS__) \
+ ATOMIC64_OP_RETURN(_release,, , l, "memory", __VA_ARGS__) \
+ ATOMIC64_FETCH_OP (, dmb ish, , l, "memory", __VA_ARGS__) \
+ ATOMIC64_FETCH_OP (_relaxed,, , , , __VA_ARGS__) \
+ ATOMIC64_FETCH_OP (_acquire,, a, , "memory", __VA_ARGS__) \
+ ATOMIC64_FETCH_OP (_release,, , l, "memory", __VA_ARGS__)
+
+ATOMIC64_OPS(add, add, I)
+ATOMIC64_OPS(sub, sub, J)
+
+#undef ATOMIC64_OPS
+#define ATOMIC64_OPS(...) \
+ ATOMIC64_OP(__VA_ARGS__) \
+ ATOMIC64_FETCH_OP (, dmb ish, , l, "memory", __VA_ARGS__) \
+ ATOMIC64_FETCH_OP (_relaxed,, , , , __VA_ARGS__) \
+ ATOMIC64_FETCH_OP (_acquire,, a, , "memory", __VA_ARGS__) \
+ ATOMIC64_FETCH_OP (_release,, , l, "memory", __VA_ARGS__)
+
+ATOMIC64_OPS(and, and, L)
+ATOMIC64_OPS(or, orr, L)
+ATOMIC64_OPS(xor, eor, L)
+/*
+ * GAS converts the mysterious and undocumented BIC (immediate) alias to
+ * an AND (immediate) instruction with the immediate inverted. We don't
+ * have a constraint for this, so fall back to register.
+ */
+ATOMIC64_OPS(andnot, bic, )
+
+#undef ATOMIC64_OPS
+#undef ATOMIC64_FETCH_OP
+#undef ATOMIC64_OP_RETURN
+#undef ATOMIC64_OP
+
+static __always_inline s64
+__ll_sc_atomic64_dec_if_positive(atomic64_t *v)
+{
+ s64 result;
+ unsigned long tmp;
+
+ asm volatile("// atomic64_dec_if_positive\n"
+ " prfm pstl1strm, %2\n"
+ "1: ldxr %0, %2\n"
+ " subs %0, %0, #1\n"
+ " b.lt 2f\n"
+ " stlxr %w1, %0, %2\n"
+ " cbnz %w1, 1b\n"
+ " dmb ish\n"
+ "2:"
+ : "=&r" (result), "=&r" (tmp), "+Q" (v->counter)
+ :
+ : "cc", "memory");
+
+ return result;
+}
+
+#define __CMPXCHG_CASE(w, sfx, name, sz, mb, acq, rel, cl, constraint) \
+static __always_inline u##sz \
+__ll_sc__cmpxchg_case_##name##sz(volatile void *ptr, \
+ unsigned long old, \
+ u##sz new) \
+{ \
+ unsigned long tmp; \
+ u##sz oldval; \
+ \
+ /* \
+ * Sub-word sizes require explicit casting so that the compare \
+ * part of the cmpxchg doesn't end up interpreting non-zero \
+ * upper bits of the register containing "old". \
+ */ \
+ if (sz < 32) \
+ old = (u##sz)old; \
+ \
+ asm volatile( \
+ " prfm pstl1strm, %[v]\n" \
+ "1: ld" #acq "xr" #sfx "\t%" #w "[oldval], %[v]\n" \
+ " eor %" #w "[tmp], %" #w "[oldval], %" #w "[old]\n" \
+ " cbnz %" #w "[tmp], 2f\n" \
+ " st" #rel "xr" #sfx "\t%w[tmp], %" #w "[new], %[v]\n" \
+ " cbnz %w[tmp], 1b\n" \
+ " " #mb "\n" \
+ "2:" \
+ : [tmp] "=&r" (tmp), [oldval] "=&r" (oldval), \
+ [v] "+Q" (*(u##sz *)ptr) \
+ : [old] __stringify(constraint) "r" (old), [new] "r" (new) \
+ : cl); \
+ \
+ return oldval; \
+}
+
+/*
+ * Earlier versions of GCC (no later than 8.1.0) appear to incorrectly
+ * handle the 'K' constraint for the value 4294967295 - thus we use no
+ * constraint for 32 bit operations.
+ */
+__CMPXCHG_CASE(w, b, , 8, , , , , K)
+__CMPXCHG_CASE(w, h, , 16, , , , , K)
+__CMPXCHG_CASE(w, , , 32, , , , , K)
+__CMPXCHG_CASE( , , , 64, , , , , L)
+__CMPXCHG_CASE(w, b, acq_, 8, , a, , "memory", K)
+__CMPXCHG_CASE(w, h, acq_, 16, , a, , "memory", K)
+__CMPXCHG_CASE(w, , acq_, 32, , a, , "memory", K)
+__CMPXCHG_CASE( , , acq_, 64, , a, , "memory", L)
+__CMPXCHG_CASE(w, b, rel_, 8, , , l, "memory", K)
+__CMPXCHG_CASE(w, h, rel_, 16, , , l, "memory", K)
+__CMPXCHG_CASE(w, , rel_, 32, , , l, "memory", K)
+__CMPXCHG_CASE( , , rel_, 64, , , l, "memory", L)
+__CMPXCHG_CASE(w, b, mb_, 8, dmb ish, , l, "memory", K)
+__CMPXCHG_CASE(w, h, mb_, 16, dmb ish, , l, "memory", K)
+__CMPXCHG_CASE(w, , mb_, 32, dmb ish, , l, "memory", K)
+__CMPXCHG_CASE( , , mb_, 64, dmb ish, , l, "memory", L)
+
+#undef __CMPXCHG_CASE
+
+union __u128_halves {
+ u128 full;
+ struct {
+ u64 low, high;
+ };
+};
+
+#define __CMPXCHG128(name, mb, rel, cl...) \
+static __always_inline u128 \
+__ll_sc__cmpxchg128##name(volatile u128 *ptr, u128 old, u128 new) \
+{ \
+ union __u128_halves r, o = { .full = (old) }, \
+ n = { .full = (new) }; \
+ unsigned int tmp; \
+ \
+ asm volatile("// __cmpxchg128" #name "\n" \
+ " prfm pstl1strm, %[v]\n" \
+ "1: ldxp %[rl], %[rh], %[v]\n" \
+ " cmp %[rl], %[ol]\n" \
+ " ccmp %[rh], %[oh], 0, eq\n" \
+ " b.ne 2f\n" \
+ " st" #rel "xp %w[tmp], %[nl], %[nh], %[v]\n" \
+ " cbnz %w[tmp], 1b\n" \
+ " " #mb "\n" \
+ "2:" \
+ : [v] "+Q" (*(u128 *)ptr), \
+ [rl] "=&r" (r.low), [rh] "=&r" (r.high), \
+ [tmp] "=&r" (tmp) \
+ : [ol] "r" (o.low), [oh] "r" (o.high), \
+ [nl] "r" (n.low), [nh] "r" (n.high) \
+ : "cc", ##cl); \
+ \
+ return r.full; \
+}
+
+__CMPXCHG128( , , )
+__CMPXCHG128(_mb, dmb ish, l, "memory")
+
+#undef __CMPXCHG128
+
+#undef K
+
+#endif /* __ASM_ATOMIC_LL_SC_H */
diff --git a/arch/arm64/include/asm/atomic_lse.h b/arch/arm64/include/asm/atomic_lse.h
new file mode 100644
index 000000000000..afad1849c4cf
--- /dev/null
+++ b/arch/arm64/include/asm/atomic_lse.h
@@ -0,0 +1,315 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Based on arch/arm/include/asm/atomic.h
+ *
+ * Copyright (C) 1996 Russell King.
+ * Copyright (C) 2002 Deep Blue Solutions Ltd.
+ * Copyright (C) 2012 ARM Ltd.
+ */
+
+#ifndef __ASM_ATOMIC_LSE_H
+#define __ASM_ATOMIC_LSE_H
+
+#define ATOMIC_OP(op, asm_op) \
+static __always_inline void \
+__lse_atomic_##op(int i, atomic_t *v) \
+{ \
+ asm volatile( \
+ __LSE_PREAMBLE \
+ " " #asm_op " %w[i], %[v]\n" \
+ : [v] "+Q" (v->counter) \
+ : [i] "r" (i)); \
+}
+
+ATOMIC_OP(andnot, stclr)
+ATOMIC_OP(or, stset)
+ATOMIC_OP(xor, steor)
+ATOMIC_OP(add, stadd)
+
+static __always_inline void __lse_atomic_sub(int i, atomic_t *v)
+{
+ __lse_atomic_add(-i, v);
+}
+
+#undef ATOMIC_OP
+
+#define ATOMIC_FETCH_OP(name, mb, op, asm_op, cl...) \
+static __always_inline int \
+__lse_atomic_fetch_##op##name(int i, atomic_t *v) \
+{ \
+ int old; \
+ \
+ asm volatile( \
+ __LSE_PREAMBLE \
+ " " #asm_op #mb " %w[i], %w[old], %[v]" \
+ : [v] "+Q" (v->counter), \
+ [old] "=r" (old) \
+ : [i] "r" (i) \
+ : cl); \
+ \
+ return old; \
+}
+
+#define ATOMIC_FETCH_OPS(op, asm_op) \
+ ATOMIC_FETCH_OP(_relaxed, , op, asm_op) \
+ ATOMIC_FETCH_OP(_acquire, a, op, asm_op, "memory") \
+ ATOMIC_FETCH_OP(_release, l, op, asm_op, "memory") \
+ ATOMIC_FETCH_OP( , al, op, asm_op, "memory")
+
+ATOMIC_FETCH_OPS(andnot, ldclr)
+ATOMIC_FETCH_OPS(or, ldset)
+ATOMIC_FETCH_OPS(xor, ldeor)
+ATOMIC_FETCH_OPS(add, ldadd)
+
+#undef ATOMIC_FETCH_OP
+#undef ATOMIC_FETCH_OPS
+
+#define ATOMIC_FETCH_OP_SUB(name) \
+static __always_inline int \
+__lse_atomic_fetch_sub##name(int i, atomic_t *v) \
+{ \
+ return __lse_atomic_fetch_add##name(-i, v); \
+}
+
+ATOMIC_FETCH_OP_SUB(_relaxed)
+ATOMIC_FETCH_OP_SUB(_acquire)
+ATOMIC_FETCH_OP_SUB(_release)
+ATOMIC_FETCH_OP_SUB( )
+
+#undef ATOMIC_FETCH_OP_SUB
+
+#define ATOMIC_OP_ADD_SUB_RETURN(name) \
+static __always_inline int \
+__lse_atomic_add_return##name(int i, atomic_t *v) \
+{ \
+ return __lse_atomic_fetch_add##name(i, v) + i; \
+} \
+ \
+static __always_inline int \
+__lse_atomic_sub_return##name(int i, atomic_t *v) \
+{ \
+ return __lse_atomic_fetch_sub(i, v) - i; \
+}
+
+ATOMIC_OP_ADD_SUB_RETURN(_relaxed)
+ATOMIC_OP_ADD_SUB_RETURN(_acquire)
+ATOMIC_OP_ADD_SUB_RETURN(_release)
+ATOMIC_OP_ADD_SUB_RETURN( )
+
+#undef ATOMIC_OP_ADD_SUB_RETURN
+
+static __always_inline void __lse_atomic_and(int i, atomic_t *v)
+{
+ return __lse_atomic_andnot(~i, v);
+}
+
+#define ATOMIC_FETCH_OP_AND(name) \
+static __always_inline int \
+__lse_atomic_fetch_and##name(int i, atomic_t *v) \
+{ \
+ return __lse_atomic_fetch_andnot##name(~i, v); \
+}
+
+ATOMIC_FETCH_OP_AND(_relaxed)
+ATOMIC_FETCH_OP_AND(_acquire)
+ATOMIC_FETCH_OP_AND(_release)
+ATOMIC_FETCH_OP_AND( )
+
+#undef ATOMIC_FETCH_OP_AND
+
+#define ATOMIC64_OP(op, asm_op) \
+static __always_inline void \
+__lse_atomic64_##op(s64 i, atomic64_t *v) \
+{ \
+ asm volatile( \
+ __LSE_PREAMBLE \
+ " " #asm_op " %[i], %[v]\n" \
+ : [v] "+Q" (v->counter) \
+ : [i] "r" (i)); \
+}
+
+ATOMIC64_OP(andnot, stclr)
+ATOMIC64_OP(or, stset)
+ATOMIC64_OP(xor, steor)
+ATOMIC64_OP(add, stadd)
+
+static __always_inline void __lse_atomic64_sub(s64 i, atomic64_t *v)
+{
+ __lse_atomic64_add(-i, v);
+}
+
+#undef ATOMIC64_OP
+
+#define ATOMIC64_FETCH_OP(name, mb, op, asm_op, cl...) \
+static __always_inline long \
+__lse_atomic64_fetch_##op##name(s64 i, atomic64_t *v) \
+{ \
+ s64 old; \
+ \
+ asm volatile( \
+ __LSE_PREAMBLE \
+ " " #asm_op #mb " %[i], %[old], %[v]" \
+ : [v] "+Q" (v->counter), \
+ [old] "=r" (old) \
+ : [i] "r" (i) \
+ : cl); \
+ \
+ return old; \
+}
+
+#define ATOMIC64_FETCH_OPS(op, asm_op) \
+ ATOMIC64_FETCH_OP(_relaxed, , op, asm_op) \
+ ATOMIC64_FETCH_OP(_acquire, a, op, asm_op, "memory") \
+ ATOMIC64_FETCH_OP(_release, l, op, asm_op, "memory") \
+ ATOMIC64_FETCH_OP( , al, op, asm_op, "memory")
+
+ATOMIC64_FETCH_OPS(andnot, ldclr)
+ATOMIC64_FETCH_OPS(or, ldset)
+ATOMIC64_FETCH_OPS(xor, ldeor)
+ATOMIC64_FETCH_OPS(add, ldadd)
+
+#undef ATOMIC64_FETCH_OP
+#undef ATOMIC64_FETCH_OPS
+
+#define ATOMIC64_FETCH_OP_SUB(name) \
+static __always_inline long \
+__lse_atomic64_fetch_sub##name(s64 i, atomic64_t *v) \
+{ \
+ return __lse_atomic64_fetch_add##name(-i, v); \
+}
+
+ATOMIC64_FETCH_OP_SUB(_relaxed)
+ATOMIC64_FETCH_OP_SUB(_acquire)
+ATOMIC64_FETCH_OP_SUB(_release)
+ATOMIC64_FETCH_OP_SUB( )
+
+#undef ATOMIC64_FETCH_OP_SUB
+
+#define ATOMIC64_OP_ADD_SUB_RETURN(name) \
+static __always_inline long \
+__lse_atomic64_add_return##name(s64 i, atomic64_t *v) \
+{ \
+ return __lse_atomic64_fetch_add##name(i, v) + i; \
+} \
+ \
+static __always_inline long \
+__lse_atomic64_sub_return##name(s64 i, atomic64_t *v) \
+{ \
+ return __lse_atomic64_fetch_sub##name(i, v) - i; \
+}
+
+ATOMIC64_OP_ADD_SUB_RETURN(_relaxed)
+ATOMIC64_OP_ADD_SUB_RETURN(_acquire)
+ATOMIC64_OP_ADD_SUB_RETURN(_release)
+ATOMIC64_OP_ADD_SUB_RETURN( )
+
+#undef ATOMIC64_OP_ADD_SUB_RETURN
+
+static __always_inline void __lse_atomic64_and(s64 i, atomic64_t *v)
+{
+ return __lse_atomic64_andnot(~i, v);
+}
+
+#define ATOMIC64_FETCH_OP_AND(name) \
+static __always_inline long \
+__lse_atomic64_fetch_and##name(s64 i, atomic64_t *v) \
+{ \
+ return __lse_atomic64_fetch_andnot##name(~i, v); \
+}
+
+ATOMIC64_FETCH_OP_AND(_relaxed)
+ATOMIC64_FETCH_OP_AND(_acquire)
+ATOMIC64_FETCH_OP_AND(_release)
+ATOMIC64_FETCH_OP_AND( )
+
+#undef ATOMIC64_FETCH_OP_AND
+
+static __always_inline s64 __lse_atomic64_dec_if_positive(atomic64_t *v)
+{
+ unsigned long tmp;
+
+ asm volatile(
+ __LSE_PREAMBLE
+ "1: ldr %x[tmp], %[v]\n"
+ " subs %[ret], %x[tmp], #1\n"
+ " b.lt 2f\n"
+ " casal %x[tmp], %[ret], %[v]\n"
+ " sub %x[tmp], %x[tmp], #1\n"
+ " sub %x[tmp], %x[tmp], %[ret]\n"
+ " cbnz %x[tmp], 1b\n"
+ "2:"
+ : [ret] "+&r" (v), [v] "+Q" (v->counter), [tmp] "=&r" (tmp)
+ :
+ : "cc", "memory");
+
+ return (long)v;
+}
+
+#define __CMPXCHG_CASE(w, sfx, name, sz, mb, cl...) \
+static __always_inline u##sz \
+__lse__cmpxchg_case_##name##sz(volatile void *ptr, \
+ u##sz old, \
+ u##sz new) \
+{ \
+ asm volatile( \
+ __LSE_PREAMBLE \
+ " cas" #mb #sfx " %" #w "[old], %" #w "[new], %[v]\n" \
+ : [v] "+Q" (*(u##sz *)ptr), \
+ [old] "+r" (old) \
+ : [new] "rZ" (new) \
+ : cl); \
+ \
+ return old; \
+}
+
+__CMPXCHG_CASE(w, b, , 8, )
+__CMPXCHG_CASE(w, h, , 16, )
+__CMPXCHG_CASE(w, , , 32, )
+__CMPXCHG_CASE(x, , , 64, )
+__CMPXCHG_CASE(w, b, acq_, 8, a, "memory")
+__CMPXCHG_CASE(w, h, acq_, 16, a, "memory")
+__CMPXCHG_CASE(w, , acq_, 32, a, "memory")
+__CMPXCHG_CASE(x, , acq_, 64, a, "memory")
+__CMPXCHG_CASE(w, b, rel_, 8, l, "memory")
+__CMPXCHG_CASE(w, h, rel_, 16, l, "memory")
+__CMPXCHG_CASE(w, , rel_, 32, l, "memory")
+__CMPXCHG_CASE(x, , rel_, 64, l, "memory")
+__CMPXCHG_CASE(w, b, mb_, 8, al, "memory")
+__CMPXCHG_CASE(w, h, mb_, 16, al, "memory")
+__CMPXCHG_CASE(w, , mb_, 32, al, "memory")
+__CMPXCHG_CASE(x, , mb_, 64, al, "memory")
+
+#undef __CMPXCHG_CASE
+
+#define __CMPXCHG128(name, mb, cl...) \
+static __always_inline u128 \
+__lse__cmpxchg128##name(volatile u128 *ptr, u128 old, u128 new) \
+{ \
+ union __u128_halves r, o = { .full = (old) }, \
+ n = { .full = (new) }; \
+ register unsigned long x0 asm ("x0") = o.low; \
+ register unsigned long x1 asm ("x1") = o.high; \
+ register unsigned long x2 asm ("x2") = n.low; \
+ register unsigned long x3 asm ("x3") = n.high; \
+ register unsigned long x4 asm ("x4") = (unsigned long)ptr; \
+ \
+ asm volatile( \
+ __LSE_PREAMBLE \
+ " casp" #mb "\t%[old1], %[old2], %[new1], %[new2], %[v]\n"\
+ : [old1] "+&r" (x0), [old2] "+&r" (x1), \
+ [v] "+Q" (*(u128 *)ptr) \
+ : [new1] "r" (x2), [new2] "r" (x3), [ptr] "r" (x4), \
+ [oldval1] "r" (o.low), [oldval2] "r" (o.high) \
+ : cl); \
+ \
+ r.low = x0; r.high = x1; \
+ \
+ return r.full; \
+}
+
+__CMPXCHG128( , )
+__CMPXCHG128(_mb, al, "memory")
+
+#undef __CMPXCHG128
+
+#endif /* __ASM_ATOMIC_LSE_H */
diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h
index d4a63338a53c..9495c4441a46 100644
--- a/arch/arm64/include/asm/barrier.h
+++ b/arch/arm64/include/asm/barrier.h
@@ -1,52 +1,226 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Based on arch/arm/include/asm/barrier.h
*
* Copyright (C) 2012 ARM Ltd.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __ASM_BARRIER_H
#define __ASM_BARRIER_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
+
+#include <linux/kasan-checks.h>
+
+#include <asm/alternative-macros.h>
+
+#define __nops(n) ".rept " #n "\nnop\n.endr\n"
+#define nops(n) asm volatile(__nops(n))
#define sev() asm volatile("sev" : : : "memory")
#define wfe() asm volatile("wfe" : : : "memory")
+#define wfet(val) asm volatile("msr s0_3_c1_c0_0, %0" \
+ : : "r" (val) : "memory")
#define wfi() asm volatile("wfi" : : : "memory")
+#define wfit(val) asm volatile("msr s0_3_c1_c0_1, %0" \
+ : : "r" (val) : "memory")
#define isb() asm volatile("isb" : : : "memory")
-#define dsb() asm volatile("dsb sy" : : : "memory")
+#define dmb(opt) asm volatile("dmb " #opt : : : "memory")
+#define dsb(opt) asm volatile("dsb " #opt : : : "memory")
-#define mb() dsb()
-#define rmb() asm volatile("dsb ld" : : : "memory")
-#define wmb() asm volatile("dsb st" : : : "memory")
+#define psb_csync() asm volatile("hint #17" : : : "memory")
+#define __tsb_csync() asm volatile("hint #18" : : : "memory")
+#define csdb() asm volatile("hint #20" : : : "memory")
-#ifndef CONFIG_SMP
-#define smp_mb() barrier()
-#define smp_rmb() barrier()
-#define smp_wmb() barrier()
+/*
+ * Data Gathering Hint:
+ * This instruction prevents merging memory accesses with Normal-NC or
+ * Device-GRE attributes before the hint instruction with any memory accesses
+ * appearing after the hint instruction.
+ */
+#define dgh() asm volatile("hint #6" : : : "memory")
+
+#define spec_bar() asm volatile(ALTERNATIVE("dsb nsh\nisb\n", \
+ SB_BARRIER_INSN"nop\n", \
+ ARM64_HAS_SB))
+
+#define gsb_ack() asm volatile(GSB_ACK_BARRIER_INSN : : : "memory")
+#define gsb_sys() asm volatile(GSB_SYS_BARRIER_INSN : : : "memory")
+
+#ifdef CONFIG_ARM64_PSEUDO_NMI
+#define pmr_sync() \
+ do { \
+ asm volatile( \
+ ALTERNATIVE_CB("dsb sy", \
+ ARM64_HAS_GIC_PRIO_RELAXED_SYNC, \
+ alt_cb_patch_nops) \
+ ); \
+ } while(0)
#else
-#define smp_mb() asm volatile("dmb ish" : : : "memory")
-#define smp_rmb() asm volatile("dmb ishld" : : : "memory")
-#define smp_wmb() asm volatile("dmb ishst" : : : "memory")
+#define pmr_sync() do {} while (0)
#endif
-#define read_barrier_depends() do { } while(0)
-#define smp_read_barrier_depends() do { } while(0)
+#define __mb() dsb(sy)
+#define __rmb() dsb(ld)
+#define __wmb() dsb(st)
+
+#define __dma_mb() dmb(osh)
+#define __dma_rmb() dmb(oshld)
+#define __dma_wmb() dmb(oshst)
+
+#define io_stop_wc() dgh()
+
+#define tsb_csync() \
+ do { \
+ /* \
+ * CPUs affected by Arm Erratum 2054223 or 2067961 needs \
+ * another TSB to ensure the trace is flushed. The barriers \
+ * don't have to be strictly back to back, as long as the \
+ * CPU is in trace prohibited state. \
+ */ \
+ if (cpus_have_final_cap(ARM64_WORKAROUND_TSB_FLUSH_FAILURE)) \
+ __tsb_csync(); \
+ __tsb_csync(); \
+ } while (0)
+
+/*
+ * Generate a mask for array_index__nospec() that is ~0UL when 0 <= idx < sz
+ * and 0 otherwise.
+ */
+#define array_index_mask_nospec array_index_mask_nospec
+static inline unsigned long array_index_mask_nospec(unsigned long idx,
+ unsigned long sz)
+{
+ unsigned long mask;
+
+ asm volatile(
+ " cmp %1, %2\n"
+ " sbc %0, xzr, xzr\n"
+ : "=r" (mask)
+ : "r" (idx), "Ir" (sz)
+ : "cc");
+
+ csdb();
+ return mask;
+}
+
+/*
+ * Ensure that reads of the counter are treated the same as memory reads
+ * for the purposes of ordering by subsequent memory barriers.
+ *
+ * This insanity brought to you by speculative system register reads,
+ * out-of-order memory accesses, sequence locks and Thomas Gleixner.
+ *
+ * https://lore.kernel.org/r/alpine.DEB.2.21.1902081950260.1662@nanos.tec.linutronix.de/
+ */
+#define arch_counter_enforce_ordering(val) do { \
+ u64 tmp, _val = (val); \
+ \
+ asm volatile( \
+ " eor %0, %1, %1\n" \
+ " add %0, sp, %0\n" \
+ " ldr xzr, [%0]" \
+ : "=r" (tmp) : "r" (_val)); \
+} while (0)
+
+#define __smp_mb() dmb(ish)
+#define __smp_rmb() dmb(ishld)
+#define __smp_wmb() dmb(ishst)
+
+#define __smp_store_release(p, v) \
+do { \
+ typeof(p) __p = (p); \
+ union { __unqual_scalar_typeof(*p) __val; char __c[1]; } __u = \
+ { .__val = (__force __unqual_scalar_typeof(*p)) (v) }; \
+ compiletime_assert_atomic_type(*p); \
+ kasan_check_write(__p, sizeof(*p)); \
+ switch (sizeof(*p)) { \
+ case 1: \
+ asm volatile ("stlrb %w1, %0" \
+ : "=Q" (*__p) \
+ : "rZ" (*(__u8 *)__u.__c) \
+ : "memory"); \
+ break; \
+ case 2: \
+ asm volatile ("stlrh %w1, %0" \
+ : "=Q" (*__p) \
+ : "rZ" (*(__u16 *)__u.__c) \
+ : "memory"); \
+ break; \
+ case 4: \
+ asm volatile ("stlr %w1, %0" \
+ : "=Q" (*__p) \
+ : "rZ" (*(__u32 *)__u.__c) \
+ : "memory"); \
+ break; \
+ case 8: \
+ asm volatile ("stlr %x1, %0" \
+ : "=Q" (*__p) \
+ : "rZ" (*(__u64 *)__u.__c) \
+ : "memory"); \
+ break; \
+ } \
+} while (0)
+
+#define __smp_load_acquire(p) \
+({ \
+ union { __unqual_scalar_typeof(*p) __val; char __c[1]; } __u; \
+ typeof(p) __p = (p); \
+ compiletime_assert_atomic_type(*p); \
+ kasan_check_read(__p, sizeof(*p)); \
+ switch (sizeof(*p)) { \
+ case 1: \
+ asm volatile ("ldarb %w0, %1" \
+ : "=r" (*(__u8 *)__u.__c) \
+ : "Q" (*__p) : "memory"); \
+ break; \
+ case 2: \
+ asm volatile ("ldarh %w0, %1" \
+ : "=r" (*(__u16 *)__u.__c) \
+ : "Q" (*__p) : "memory"); \
+ break; \
+ case 4: \
+ asm volatile ("ldar %w0, %1" \
+ : "=r" (*(__u32 *)__u.__c) \
+ : "Q" (*__p) : "memory"); \
+ break; \
+ case 8: \
+ asm volatile ("ldar %0, %1" \
+ : "=r" (*(__u64 *)__u.__c) \
+ : "Q" (*__p) : "memory"); \
+ break; \
+ } \
+ (typeof(*p))__u.__val; \
+})
+
+#define smp_cond_load_relaxed(ptr, cond_expr) \
+({ \
+ typeof(ptr) __PTR = (ptr); \
+ __unqual_scalar_typeof(*ptr) VAL; \
+ for (;;) { \
+ VAL = READ_ONCE(*__PTR); \
+ if (cond_expr) \
+ break; \
+ __cmpwait_relaxed(__PTR, VAL); \
+ } \
+ (typeof(*ptr))VAL; \
+})
+
+#define smp_cond_load_acquire(ptr, cond_expr) \
+({ \
+ typeof(ptr) __PTR = (ptr); \
+ __unqual_scalar_typeof(*ptr) VAL; \
+ for (;;) { \
+ VAL = smp_load_acquire(__PTR); \
+ if (cond_expr) \
+ break; \
+ __cmpwait_relaxed(__PTR, VAL); \
+ } \
+ (typeof(*ptr))VAL; \
+})
-#define set_mb(var, value) do { var = value; smp_mb(); } while (0)
-#define nop() asm volatile("nop");
+#include <asm-generic/barrier.h>
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* __ASM_BARRIER_H */
diff --git a/arch/arm64/include/asm/bitops.h b/arch/arm64/include/asm/bitops.h
index aa5b59d6ba43..9b3c787132d2 100644
--- a/arch/arm64/include/asm/bitops.h
+++ b/arch/arm64/include/asm/bitops.h
@@ -1,47 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (C) 2012 ARM Ltd.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __ASM_BITOPS_H
#define __ASM_BITOPS_H
#include <linux/compiler.h>
-#include <asm/barrier.h>
-
-/*
- * clear_bit may not imply a memory barrier
- */
-#ifndef smp_mb__before_clear_bit
-#define smp_mb__before_clear_bit() smp_mb()
-#define smp_mb__after_clear_bit() smp_mb()
-#endif
-
#ifndef _LINUX_BITOPS_H
#error only <linux/bitops.h> can be included directly
#endif
-/*
- * Little endian assembly atomic bitops.
- */
-extern void set_bit(int nr, volatile unsigned long *p);
-extern void clear_bit(int nr, volatile unsigned long *p);
-extern void change_bit(int nr, volatile unsigned long *p);
-extern int test_and_set_bit(int nr, volatile unsigned long *p);
-extern int test_and_clear_bit(int nr, volatile unsigned long *p);
-extern int test_and_change_bit(int nr, volatile unsigned long *p);
-
#include <asm-generic/bitops/builtin-__ffs.h>
#include <asm-generic/bitops/builtin-ffs.h>
#include <asm-generic/bitops/builtin-__fls.h>
@@ -49,19 +18,14 @@ extern int test_and_change_bit(int nr, volatile unsigned long *p);
#include <asm-generic/bitops/ffz.h>
#include <asm-generic/bitops/fls64.h>
-#include <asm-generic/bitops/find.h>
#include <asm-generic/bitops/sched.h>
#include <asm-generic/bitops/hweight.h>
-#include <asm-generic/bitops/lock.h>
+#include <asm-generic/bitops/atomic.h>
+#include <asm-generic/bitops/lock.h>
#include <asm-generic/bitops/non-atomic.h>
#include <asm-generic/bitops/le.h>
-
-/*
- * Ext2 is defined to use little-endian byte ordering.
- */
-#define ext2_set_bit_atomic(lock, nr, p) test_and_set_bit_le(nr, p)
-#define ext2_clear_bit_atomic(lock, nr, p) test_and_clear_bit_le(nr, p)
+#include <asm-generic/bitops/ext2-atomic-setbit.h>
#endif /* __ASM_BITOPS_H */
diff --git a/arch/arm64/include/asm/bitrev.h b/arch/arm64/include/asm/bitrev.h
new file mode 100644
index 000000000000..6faf9fba8c65
--- /dev/null
+++ b/arch/arm64/include/asm/bitrev.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_BITREV_H
+#define __ASM_BITREV_H
+static __always_inline __attribute_const__ u32 __arch_bitrev32(u32 x)
+{
+ __asm__ ("rbit %w0, %w1" : "=r" (x) : "r" (x));
+ return x;
+}
+
+static __always_inline __attribute_const__ u16 __arch_bitrev16(u16 x)
+{
+ return __arch_bitrev32((u32)x) >> 16;
+}
+
+static __always_inline __attribute_const__ u8 __arch_bitrev8(u8 x)
+{
+ return __arch_bitrev32((u32)x) >> 24;
+}
+
+#endif
diff --git a/arch/arm64/include/asm/boot.h b/arch/arm64/include/asm/boot.h
new file mode 100644
index 000000000000..3e7943fd17a4
--- /dev/null
+++ b/arch/arm64/include/asm/boot.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __ASM_BOOT_H
+#define __ASM_BOOT_H
+
+#include <linux/sizes.h>
+
+/*
+ * arm64 requires the DTB to be 8 byte aligned and
+ * not exceed 2MB in size.
+ */
+#define MIN_FDT_ALIGN 8
+#define MAX_FDT_SIZE SZ_2M
+
+/*
+ * arm64 requires the kernel image to placed at a 2 MB aligned base address
+ */
+#define MIN_KIMG_ALIGN SZ_2M
+
+#endif
diff --git a/arch/arm64/include/asm/brk-imm.h b/arch/arm64/include/asm/brk-imm.h
new file mode 100644
index 000000000000..beb42c62b6ac
--- /dev/null
+++ b/arch/arm64/include/asm/brk-imm.h
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2012 ARM Ltd.
+ */
+
+#ifndef __ASM_BRK_IMM_H
+#define __ASM_BRK_IMM_H
+
+/*
+ * #imm16 values used for BRK instruction generation
+ * 0x004: for installing kprobes
+ * 0x005: for installing uprobes
+ * 0x006: for kprobe software single-step
+ * 0x007: for kretprobe return
+ * Allowed values for kgdb are 0x400 - 0x7ff
+ * 0x100: for triggering a fault on purpose (reserved)
+ * 0x400: for dynamic BRK instruction
+ * 0x401: for compile time BRK instruction
+ * 0x800: kernel-mode BUG() and WARN() traps
+ * 0x9xx: tag-based KASAN trap (allowed values 0x900 - 0x9ff)
+ * 0x55xx: Undefined Behavior Sanitizer traps ('U' << 8)
+ * 0x8xxx: Control-Flow Integrity traps
+ */
+#define KPROBES_BRK_IMM 0x004
+#define UPROBES_BRK_IMM 0x005
+#define KPROBES_BRK_SS_IMM 0x006
+#define KRETPROBES_BRK_IMM 0x007
+#define FAULT_BRK_IMM 0x100
+#define KGDB_DYN_DBG_BRK_IMM 0x400
+#define KGDB_COMPILED_DBG_BRK_IMM 0x401
+#define BUG_BRK_IMM 0x800
+#define KASAN_BRK_IMM 0x900
+#define KASAN_BRK_MASK 0x0ff
+#define UBSAN_BRK_IMM 0x5500
+#define UBSAN_BRK_MASK 0x00ff
+
+#define CFI_BRK_IMM_TARGET GENMASK(4, 0)
+#define CFI_BRK_IMM_TYPE GENMASK(9, 5)
+#define CFI_BRK_IMM_BASE 0x8000
+#define CFI_BRK_IMM_MASK (CFI_BRK_IMM_TARGET | CFI_BRK_IMM_TYPE)
+
+#endif
diff --git a/arch/arm64/include/asm/bug.h b/arch/arm64/include/asm/bug.h
new file mode 100644
index 000000000000..bceeaec21fb9
--- /dev/null
+++ b/arch/arm64/include/asm/bug.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2015 ARM Limited
+ * Author: Dave Martin <Dave.Martin@arm.com>
+ */
+
+#ifndef _ARCH_ARM64_ASM_BUG_H
+#define _ARCH_ARM64_ASM_BUG_H
+
+#include <linux/stringify.h>
+
+#include <asm/asm-bug.h>
+
+#define __BUG_FLAGS(flags) \
+ asm volatile (__stringify(ASM_BUG_FLAGS(flags)));
+
+#define BUG() do { \
+ __BUG_FLAGS(0); \
+ unreachable(); \
+} while (0)
+
+#define __WARN_FLAGS(cond_str, flags) __BUG_FLAGS(BUGFLAG_WARNING|(flags))
+
+#define HAVE_ARCH_BUG
+
+#include <asm-generic/bug.h>
+
+#endif /* ! _ARCH_ARM64_ASM_BUG_H */
diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h
index 390308a67f0d..dd2c8586a725 100644
--- a/arch/arm64/include/asm/cache.h
+++ b/arch/arm64/include/asm/cache.h
@@ -1,24 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (C) 2012 ARM Ltd.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __ASM_CACHE_H
#define __ASM_CACHE_H
-#define L1_CACHE_SHIFT 6
+#define L1_CACHE_SHIFT (6)
#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT)
+#define CLIDR_LOUU_SHIFT 27
+#define CLIDR_LOC_SHIFT 24
+#define CLIDR_LOUIS_SHIFT 21
+
+#define CLIDR_LOUU(clidr) (((clidr) >> CLIDR_LOUU_SHIFT) & 0x7)
+#define CLIDR_LOC(clidr) (((clidr) >> CLIDR_LOC_SHIFT) & 0x7)
+#define CLIDR_LOUIS(clidr) (((clidr) >> CLIDR_LOUIS_SHIFT) & 0x7)
+
+/* Ctypen, bits[3(n - 1) + 2 : 3(n - 1)], for n = 1 to 7 */
+#define CLIDR_CTYPE_SHIFT(level) (3 * (level - 1))
+#define CLIDR_CTYPE_MASK(level) (7 << CLIDR_CTYPE_SHIFT(level))
+#define CLIDR_CTYPE(clidr, level) \
+ (((clidr) & CLIDR_CTYPE_MASK(level)) >> CLIDR_CTYPE_SHIFT(level))
+
+/* Ttypen, bits [2(n - 1) + 34 : 2(n - 1) + 33], for n = 1 to 7 */
+#define CLIDR_TTYPE_SHIFT(level) (2 * ((level) - 1) + CLIDR_EL1_Ttypen_SHIFT)
+
/*
* Memory returned by kmalloc() may be used for DMA, so we must make
* sure that all such allocations are cache aligned. Otherwise,
@@ -26,7 +32,109 @@
* cache before the transfer is done, causing old data to be seen by
* the CPU.
*/
-#define ARCH_DMA_MINALIGN L1_CACHE_BYTES
-#define ARCH_SLAB_MINALIGN 8
+#define ARCH_DMA_MINALIGN (128)
+#define ARCH_KMALLOC_MINALIGN (8)
+
+#if !defined(__ASSEMBLER__) && !defined(BUILD_VDSO)
+
+#include <linux/bitops.h>
+#include <linux/kasan-enabled.h>
+
+#include <asm/cputype.h>
+#include <asm/mte-def.h>
+#include <asm/sysreg.h>
+
+#ifdef CONFIG_KASAN_SW_TAGS
+#define ARCH_SLAB_MINALIGN (1ULL << KASAN_SHADOW_SCALE_SHIFT)
+#elif defined(CONFIG_KASAN_HW_TAGS)
+static inline unsigned int arch_slab_minalign(void)
+{
+ return kasan_hw_tags_enabled() ? MTE_GRANULE_SIZE :
+ __alignof__(unsigned long long);
+}
+#define arch_slab_minalign() arch_slab_minalign()
+#endif
+
+#define CTR_L1IP(ctr) SYS_FIELD_GET(CTR_EL0, L1Ip, ctr)
+
+#define ICACHEF_ALIASING 0
+extern unsigned long __icache_flags;
+
+/*
+ * Whilst the D-side always behaves as PIPT on AArch64, aliasing is
+ * permitted in the I-cache.
+ */
+static inline int icache_is_aliasing(void)
+{
+ return test_bit(ICACHEF_ALIASING, &__icache_flags);
+}
+
+static inline u32 cache_type_cwg(void)
+{
+ return SYS_FIELD_GET(CTR_EL0, CWG, read_cpuid_cachetype());
+}
+
+#define __read_mostly __section(".data..read_mostly")
+
+static inline int cache_line_size_of_cpu(void)
+{
+ u32 cwg = cache_type_cwg();
+
+ return cwg ? 4 << cwg : ARCH_DMA_MINALIGN;
+}
+
+int cache_line_size(void);
+
+#define dma_get_cache_alignment cache_line_size
+
+/* Compress a u64 MPIDR value into 32 bits. */
+static inline u64 arch_compact_of_hwid(u64 id)
+{
+ u64 aff3 = MPIDR_AFFINITY_LEVEL(id, 3);
+
+ /*
+ * These bits are expected to be RES0. If not, return a value with
+ * the upper 32 bits set to force the caller to give up on 32 bit
+ * cache ids.
+ */
+ if (FIELD_GET(GENMASK_ULL(63, 40), id))
+ return id;
+
+ return (aff3 << 24) | FIELD_GET(GENMASK_ULL(23, 0), id);
+}
+#define arch_compact_of_hwid arch_compact_of_hwid
+
+/*
+ * Read the effective value of CTR_EL0.
+ *
+ * According to ARM ARM for ARMv8-A (ARM DDI 0487C.a),
+ * section D10.2.33 "CTR_EL0, Cache Type Register" :
+ *
+ * CTR_EL0.IDC reports the data cache clean requirements for
+ * instruction to data coherence.
+ *
+ * 0 - dcache clean to PoU is required unless :
+ * (CLIDR_EL1.LoC == 0) || (CLIDR_EL1.LoUIS == 0 && CLIDR_EL1.LoUU == 0)
+ * 1 - dcache clean to PoU is not required for i-to-d coherence.
+ *
+ * This routine provides the CTR_EL0 with the IDC field updated to the
+ * effective state.
+ */
+static inline u32 __attribute_const__ read_cpuid_effective_cachetype(void)
+{
+ u32 ctr = read_cpuid_cachetype();
+
+ if (!(ctr & BIT(CTR_EL0_IDC_SHIFT))) {
+ u64 clidr = read_sysreg(clidr_el1);
+
+ if (CLIDR_LOC(clidr) == 0 ||
+ (CLIDR_LOUIS(clidr) == 0 && CLIDR_LOUU(clidr) == 0))
+ ctr |= BIT(CTR_EL0_IDC_SHIFT);
+ }
+
+ return ctr;
+}
+
+#endif /* !defined(__ASSEMBLER__) && !defined(BUILD_VDSO) */
#endif
diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h
index fea9ee327206..28ab96e808ef 100644
--- a/arch/arm64/include/asm/cacheflush.h
+++ b/arch/arm64/include/asm/cacheflush.h
@@ -1,24 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Based on arch/arm/include/asm/cacheflush.h
*
* Copyright (C) 1999-2002 Russell King.
* Copyright (C) 2012 ARM Ltd.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __ASM_CACHEFLUSH_H
#define __ASM_CACHEFLUSH_H
+#include <linux/kgdb.h>
#include <linux/mm.h>
/*
@@ -36,53 +26,83 @@
* Start addresses are inclusive and end addresses are exclusive; start
* addresses should be rounded down, end addresses up.
*
- * See Documentation/cachetlb.txt for more information. Please note that
+ * See Documentation/core-api/cachetlb.rst for more information. Please note that
* the implementation assumes non-aliasing VIPT D-cache and (aliasing)
- * VIPT or ASID-tagged VIVT I-cache.
+ * VIPT I-cache.
+ *
+ * All functions below apply to the interval [start, end)
+ * - start - virtual start address (inclusive)
+ * - end - virtual end address (exclusive)
+ *
+ * caches_clean_inval_pou(start, end)
+ *
+ * Ensure coherency between the I-cache and the D-cache region to
+ * the Point of Unification.
+ *
+ * caches_clean_inval_user_pou(start, end)
+ *
+ * Ensure coherency between the I-cache and the D-cache region to
+ * the Point of Unification.
+ * Use only if the region might access user memory.
+ *
+ * icache_inval_pou(start, end)
*
- * flush_cache_all()
+ * Invalidate I-cache region to the Point of Unification.
*
- * Unconditionally clean and invalidate the entire cache.
+ * dcache_clean_inval_poc(start, end)
*
- * flush_cache_mm(mm)
+ * Clean and invalidate D-cache region to the Point of Coherency.
*
- * Clean and invalidate all user space cache entries
- * before a change of page tables.
+ * dcache_inval_poc(start, end)
*
- * flush_icache_range(start, end)
+ * Invalidate D-cache region to the Point of Coherency.
*
- * Ensure coherency between the I-cache and the D-cache in the
- * region described by start, end.
- * - start - virtual start address
- * - end - virtual end address
+ * dcache_clean_poc(start, end)
*
- * __flush_cache_user_range(start, end)
+ * Clean D-cache region to the Point of Coherency.
*
- * Ensure coherency between the I-cache and the D-cache in the
- * region described by start, end.
- * - start - virtual start address
- * - end - virtual end address
+ * dcache_clean_pop(start, end)
*
- * __flush_dcache_area(kaddr, size)
+ * Clean D-cache region to the Point of Persistence.
*
- * Ensure that the data held in page is written back.
- * - kaddr - page address
- * - size - region size
+ * dcache_clean_pou(start, end)
+ *
+ * Clean D-cache region to the Point of Unification.
*/
-extern void flush_cache_all(void);
-extern void flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end);
-extern void flush_icache_range(unsigned long start, unsigned long end);
-extern void __flush_dcache_area(void *addr, size_t len);
-extern void __flush_cache_user_range(unsigned long start, unsigned long end);
+extern void caches_clean_inval_pou(unsigned long start, unsigned long end);
+extern void icache_inval_pou(unsigned long start, unsigned long end);
+extern void dcache_clean_inval_poc(unsigned long start, unsigned long end);
+extern void dcache_inval_poc(unsigned long start, unsigned long end);
+extern void dcache_clean_poc(unsigned long start, unsigned long end);
+extern void dcache_clean_pop(unsigned long start, unsigned long end);
+extern void dcache_clean_pou(unsigned long start, unsigned long end);
+extern long caches_clean_inval_user_pou(unsigned long start, unsigned long end);
+extern void sync_icache_aliases(unsigned long start, unsigned long end);
-static inline void flush_cache_mm(struct mm_struct *mm)
+static inline void flush_icache_range(unsigned long start, unsigned long end)
{
-}
+ caches_clean_inval_pou(start, end);
-static inline void flush_cache_page(struct vm_area_struct *vma,
- unsigned long user_addr, unsigned long pfn)
-{
+ /*
+ * IPI all online CPUs so that they undergo a context synchronization
+ * event and are forced to refetch the new instructions.
+ */
+
+ /*
+ * KGDB performs cache maintenance with interrupts disabled, so we
+ * will deadlock trying to IPI the secondary CPUs. In theory, we can
+ * set CACHE_FLUSH_IS_SAFE to 0 to avoid this known issue, but that
+ * just means that KGDB will elide the maintenance altogether! As it
+ * turns out, KGDB uses IPIs to round-up the secondary CPUs during
+ * the patching operation, so we don't need extra IPIs here anyway.
+ * In which case, add a KGDB-specific bodge and return early.
+ */
+ if (in_dbg_master())
+ return;
+
+ kick_all_cpus_sync();
}
+#define flush_icache_range flush_icache_range
/*
* Copy user data from/to a page which is mapped into a different
@@ -91,18 +111,13 @@ static inline void flush_cache_page(struct vm_area_struct *vma,
*/
extern void copy_to_user_page(struct vm_area_struct *, struct page *,
unsigned long, void *, const void *, unsigned long);
-#define copy_from_user_page(vma, page, vaddr, dst, src, len) \
- do { \
- memcpy(dst, src, len); \
- } while (0)
-
-#define flush_cache_dup_mm(mm) flush_cache_mm(mm)
+#define copy_to_user_page copy_to_user_page
/*
- * flush_dcache_page is used when the kernel has written to the page
+ * flush_dcache_folio is used when the kernel has written to the page
* cache page at virtual address page->virtual.
*
- * If this page isn't mapped (ie, page_mapping == NULL), or it might
+ * If this page isn't mapped (ie, folio_mapping == NULL), or it might
* have userspace mappings, then we _must_ always clean + invalidate
* the dcache entries associated with the kernel mapping.
*
@@ -112,41 +127,18 @@ extern void copy_to_user_page(struct vm_area_struct *, struct page *,
*/
#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
extern void flush_dcache_page(struct page *);
+void flush_dcache_folio(struct folio *);
+#define flush_dcache_folio flush_dcache_folio
-static inline void __flush_icache_all(void)
+static __always_inline void icache_inval_all_pou(void)
{
- asm("ic ialluis");
-}
-
-#define flush_dcache_mmap_lock(mapping) \
- spin_lock_irq(&(mapping)->tree_lock)
-#define flush_dcache_mmap_unlock(mapping) \
- spin_unlock_irq(&(mapping)->tree_lock)
-
-/*
- * We don't appear to need to do anything here. In fact, if we did, we'd
- * duplicate cache flushing elsewhere performed by flush_dcache_page().
- */
-#define flush_icache_page(vma,page) do { } while (0)
+ if (alternative_has_cap_unlikely(ARM64_HAS_CACHE_DIC))
+ return;
-/*
- * flush_cache_vmap() is used when creating mappings (eg, via vmap,
- * vmalloc, ioremap etc) in kernel space for pages. On non-VIPT
- * caches, since the direct-mappings of these pages may contain cached
- * data, we need to do a full cache flush to ensure that writebacks
- * don't corrupt data placed into these pages via the new mappings.
- */
-static inline void flush_cache_vmap(unsigned long start, unsigned long end)
-{
- /*
- * set_pte_at() called from vmap_pte_range() does not
- * have a DSB after cleaning the cache line.
- */
- dsb();
+ asm("ic ialluis");
+ dsb(ish);
}
-static inline void flush_cache_vunmap(unsigned long start, unsigned long end)
-{
-}
+#include <asm-generic/cacheflush.h>
-#endif
+#endif /* __ASM_CACHEFLUSH_H */
diff --git a/arch/arm64/include/asm/cachetype.h b/arch/arm64/include/asm/cachetype.h
deleted file mode 100644
index 85f5f511352a..000000000000
--- a/arch/arm64/include/asm/cachetype.h
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * Copyright (C) 2012 ARM Ltd.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-#ifndef __ASM_CACHETYPE_H
-#define __ASM_CACHETYPE_H
-
-#include <asm/cputype.h>
-
-#define CTR_L1IP_SHIFT 14
-#define CTR_L1IP_MASK 3
-
-#define ICACHE_POLICY_RESERVED 0
-#define ICACHE_POLICY_AIVIVT 1
-#define ICACHE_POLICY_VIPT 2
-#define ICACHE_POLICY_PIPT 3
-
-static inline u32 icache_policy(void)
-{
- return (read_cpuid_cachetype() >> CTR_L1IP_SHIFT) & CTR_L1IP_MASK;
-}
-
-/*
- * Whilst the D-side always behaves as PIPT on AArch64, aliasing is
- * permitted in the I-cache.
- */
-static inline int icache_is_aliasing(void)
-{
- return icache_policy() != ICACHE_POLICY_PIPT;
-}
-
-static inline int icache_is_aivivt(void)
-{
- return icache_policy() == ICACHE_POLICY_AIVIVT;
-}
-
-#endif /* __ASM_CACHETYPE_H */
diff --git a/arch/arm64/include/asm/cfi.h b/arch/arm64/include/asm/cfi.h
new file mode 100644
index 000000000000..ab90f0351b7a
--- /dev/null
+++ b/arch/arm64/include/asm/cfi.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_ARM64_CFI_H
+#define _ASM_ARM64_CFI_H
+
+#define __bpfcall
+
+#endif /* _ASM_ARM64_CFI_H */
diff --git a/arch/arm64/include/asm/checksum.h b/arch/arm64/include/asm/checksum.h
new file mode 100644
index 000000000000..dc52b733675d
--- /dev/null
+++ b/arch/arm64/include/asm/checksum.h
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2016 ARM Ltd.
+ */
+#ifndef __ASM_CHECKSUM_H
+#define __ASM_CHECKSUM_H
+
+#include <linux/in6.h>
+
+#define _HAVE_ARCH_IPV6_CSUM
+__sum16 csum_ipv6_magic(const struct in6_addr *saddr,
+ const struct in6_addr *daddr,
+ __u32 len, __u8 proto, __wsum sum);
+
+static inline __sum16 csum_fold(__wsum csum)
+{
+ u32 sum = (__force u32)csum;
+ sum += (sum >> 16) | (sum << 16);
+ return ~(__force __sum16)(sum >> 16);
+}
+#define csum_fold csum_fold
+
+static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl)
+{
+ __uint128_t tmp;
+ u64 sum;
+ int n = ihl; /* we want it signed */
+
+ tmp = *(const __uint128_t *)iph;
+ iph += 16;
+ n -= 4;
+ tmp += ((tmp >> 64) | (tmp << 64));
+ sum = tmp >> 64;
+ do {
+ sum += *(const u32 *)iph;
+ iph += 4;
+ } while (--n > 0);
+
+ sum += ((sum >> 32) | (sum << 32));
+ return csum_fold((__force __wsum)(sum >> 32));
+}
+#define ip_fast_csum ip_fast_csum
+
+extern unsigned int do_csum(const unsigned char *buff, int len);
+#define do_csum do_csum
+
+#include <asm-generic/checksum.h>
+
+#endif /* __ASM_CHECKSUM_H */
diff --git a/arch/arm64/include/asm/clocksource.h b/arch/arm64/include/asm/clocksource.h
new file mode 100644
index 000000000000..482185566b0c
--- /dev/null
+++ b/arch/arm64/include/asm/clocksource.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_CLOCKSOURCE_H
+#define _ASM_CLOCKSOURCE_H
+
+#include <asm/vdso/clocksource.h>
+
+#endif
diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h
index 8a8ce0e73a38..d7a540736741 100644
--- a/arch/arm64/include/asm/cmpxchg.h
+++ b/arch/arm64/include/asm/cmpxchg.h
@@ -1,176 +1,264 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Based on arch/arm/include/asm/cmpxchg.h
*
* Copyright (C) 2012 ARM Ltd.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __ASM_CMPXCHG_H
#define __ASM_CMPXCHG_H
-#include <linux/bug.h>
+#include <linux/build_bug.h>
+#include <linux/compiler.h>
#include <asm/barrier.h>
+#include <asm/lse.h>
+
+/*
+ * We need separate acquire parameters for ll/sc and lse, since the full
+ * barrier case is generated as release+dmb for the former and
+ * acquire+release for the latter.
+ */
+#define __XCHG_CASE(w, sfx, name, sz, mb, nop_lse, acq, acq_lse, rel, cl) \
+static inline u##sz __xchg_case_##name##sz(u##sz x, volatile void *ptr) \
+{ \
+ u##sz ret; \
+ unsigned long tmp; \
+ \
+ asm volatile(ARM64_LSE_ATOMIC_INSN( \
+ /* LL/SC */ \
+ " prfm pstl1strm, %2\n" \
+ "1: ld" #acq "xr" #sfx "\t%" #w "0, %2\n" \
+ " st" #rel "xr" #sfx "\t%w1, %" #w "3, %2\n" \
+ " cbnz %w1, 1b\n" \
+ " " #mb, \
+ /* LSE atomics */ \
+ " swp" #acq_lse #rel #sfx "\t%" #w "3, %" #w "0, %2\n" \
+ __nops(3) \
+ " " #nop_lse) \
+ : "=&r" (ret), "=&r" (tmp), "+Q" (*(u##sz *)ptr) \
+ : "r" (x) \
+ : cl); \
+ \
+ return ret; \
+}
+
+__XCHG_CASE(w, b, , 8, , , , , , )
+__XCHG_CASE(w, h, , 16, , , , , , )
+__XCHG_CASE(w, , , 32, , , , , , )
+__XCHG_CASE( , , , 64, , , , , , )
+__XCHG_CASE(w, b, acq_, 8, , , a, a, , "memory")
+__XCHG_CASE(w, h, acq_, 16, , , a, a, , "memory")
+__XCHG_CASE(w, , acq_, 32, , , a, a, , "memory")
+__XCHG_CASE( , , acq_, 64, , , a, a, , "memory")
+__XCHG_CASE(w, b, rel_, 8, , , , , l, "memory")
+__XCHG_CASE(w, h, rel_, 16, , , , , l, "memory")
+__XCHG_CASE(w, , rel_, 32, , , , , l, "memory")
+__XCHG_CASE( , , rel_, 64, , , , , l, "memory")
+__XCHG_CASE(w, b, mb_, 8, dmb ish, nop, , a, l, "memory")
+__XCHG_CASE(w, h, mb_, 16, dmb ish, nop, , a, l, "memory")
+__XCHG_CASE(w, , mb_, 32, dmb ish, nop, , a, l, "memory")
+__XCHG_CASE( , , mb_, 64, dmb ish, nop, , a, l, "memory")
-static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size)
-{
- unsigned long ret, tmp;
-
- switch (size) {
- case 1:
- asm volatile("// __xchg1\n"
- "1: ldaxrb %w0, %2\n"
- " stlxrb %w1, %w3, %2\n"
- " cbnz %w1, 1b\n"
- : "=&r" (ret), "=&r" (tmp), "+Q" (*(u8 *)ptr)
- : "r" (x)
- : "cc", "memory");
- break;
- case 2:
- asm volatile("// __xchg2\n"
- "1: ldaxrh %w0, %2\n"
- " stlxrh %w1, %w3, %2\n"
- " cbnz %w1, 1b\n"
- : "=&r" (ret), "=&r" (tmp), "+Q" (*(u16 *)ptr)
- : "r" (x)
- : "cc", "memory");
- break;
- case 4:
- asm volatile("// __xchg4\n"
- "1: ldaxr %w0, %2\n"
- " stlxr %w1, %w3, %2\n"
- " cbnz %w1, 1b\n"
- : "=&r" (ret), "=&r" (tmp), "+Q" (*(u32 *)ptr)
- : "r" (x)
- : "cc", "memory");
- break;
- case 8:
- asm volatile("// __xchg8\n"
- "1: ldaxr %0, %2\n"
- " stlxr %w1, %3, %2\n"
- " cbnz %w1, 1b\n"
- : "=&r" (ret), "=&r" (tmp), "+Q" (*(u64 *)ptr)
- : "r" (x)
- : "cc", "memory");
- break;
- default:
- BUILD_BUG();
- }
-
- return ret;
+#undef __XCHG_CASE
+
+#define __XCHG_GEN(sfx) \
+static __always_inline unsigned long \
+__arch_xchg##sfx(unsigned long x, volatile void *ptr, int size) \
+{ \
+ switch (size) { \
+ case 1: \
+ return __xchg_case##sfx##_8(x, ptr); \
+ case 2: \
+ return __xchg_case##sfx##_16(x, ptr); \
+ case 4: \
+ return __xchg_case##sfx##_32(x, ptr); \
+ case 8: \
+ return __xchg_case##sfx##_64(x, ptr); \
+ default: \
+ BUILD_BUG(); \
+ } \
+ \
+ unreachable(); \
+}
+
+__XCHG_GEN()
+__XCHG_GEN(_acq)
+__XCHG_GEN(_rel)
+__XCHG_GEN(_mb)
+
+#undef __XCHG_GEN
+
+#define __xchg_wrapper(sfx, ptr, x) \
+({ \
+ __typeof__(*(ptr)) __ret; \
+ __ret = (__typeof__(*(ptr))) \
+ __arch_xchg##sfx((unsigned long)(x), (ptr), sizeof(*(ptr))); \
+ __ret; \
+})
+
+/* xchg */
+#define arch_xchg_relaxed(...) __xchg_wrapper( , __VA_ARGS__)
+#define arch_xchg_acquire(...) __xchg_wrapper(_acq, __VA_ARGS__)
+#define arch_xchg_release(...) __xchg_wrapper(_rel, __VA_ARGS__)
+#define arch_xchg(...) __xchg_wrapper( _mb, __VA_ARGS__)
+
+#define __CMPXCHG_CASE(name, sz) \
+static inline u##sz __cmpxchg_case_##name##sz(volatile void *ptr, \
+ u##sz old, \
+ u##sz new) \
+{ \
+ return __lse_ll_sc_body(_cmpxchg_case_##name##sz, \
+ ptr, old, new); \
}
-#define xchg(ptr,x) \
- ((__typeof__(*(ptr)))__xchg((unsigned long)(x),(ptr),sizeof(*(ptr))))
-
-static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
- unsigned long new, int size)
-{
- unsigned long oldval = 0, res;
-
- switch (size) {
- case 1:
- do {
- asm volatile("// __cmpxchg1\n"
- " ldxrb %w1, %2\n"
- " mov %w0, #0\n"
- " cmp %w1, %w3\n"
- " b.ne 1f\n"
- " stxrb %w0, %w4, %2\n"
- "1:\n"
- : "=&r" (res), "=&r" (oldval), "+Q" (*(u8 *)ptr)
- : "Ir" (old), "r" (new)
- : "cc");
- } while (res);
- break;
-
- case 2:
- do {
- asm volatile("// __cmpxchg2\n"
- " ldxrh %w1, %2\n"
- " mov %w0, #0\n"
- " cmp %w1, %w3\n"
- " b.ne 1f\n"
- " stxrh %w0, %w4, %2\n"
- "1:\n"
- : "=&r" (res), "=&r" (oldval), "+Q" (*(u16 *)ptr)
- : "Ir" (old), "r" (new)
- : "cc");
- } while (res);
- break;
-
- case 4:
- do {
- asm volatile("// __cmpxchg4\n"
- " ldxr %w1, %2\n"
- " mov %w0, #0\n"
- " cmp %w1, %w3\n"
- " b.ne 1f\n"
- " stxr %w0, %w4, %2\n"
- "1:\n"
- : "=&r" (res), "=&r" (oldval), "+Q" (*(u32 *)ptr)
- : "Ir" (old), "r" (new)
- : "cc");
- } while (res);
- break;
-
- case 8:
- do {
- asm volatile("// __cmpxchg8\n"
- " ldxr %1, %2\n"
- " mov %w0, #0\n"
- " cmp %1, %3\n"
- " b.ne 1f\n"
- " stxr %w0, %4, %2\n"
- "1:\n"
- : "=&r" (res), "=&r" (oldval), "+Q" (*(u64 *)ptr)
- : "Ir" (old), "r" (new)
- : "cc");
- } while (res);
- break;
-
- default:
- BUILD_BUG();
- }
-
- return oldval;
+__CMPXCHG_CASE( , 8)
+__CMPXCHG_CASE( , 16)
+__CMPXCHG_CASE( , 32)
+__CMPXCHG_CASE( , 64)
+__CMPXCHG_CASE(acq_, 8)
+__CMPXCHG_CASE(acq_, 16)
+__CMPXCHG_CASE(acq_, 32)
+__CMPXCHG_CASE(acq_, 64)
+__CMPXCHG_CASE(rel_, 8)
+__CMPXCHG_CASE(rel_, 16)
+__CMPXCHG_CASE(rel_, 32)
+__CMPXCHG_CASE(rel_, 64)
+__CMPXCHG_CASE(mb_, 8)
+__CMPXCHG_CASE(mb_, 16)
+__CMPXCHG_CASE(mb_, 32)
+__CMPXCHG_CASE(mb_, 64)
+
+#undef __CMPXCHG_CASE
+
+#define __CMPXCHG128(name) \
+static inline u128 __cmpxchg128##name(volatile u128 *ptr, \
+ u128 old, u128 new) \
+{ \
+ return __lse_ll_sc_body(_cmpxchg128##name, \
+ ptr, old, new); \
+}
+
+__CMPXCHG128( )
+__CMPXCHG128(_mb)
+
+#undef __CMPXCHG128
+
+#define __CMPXCHG_GEN(sfx) \
+static __always_inline unsigned long __cmpxchg##sfx(volatile void *ptr, \
+ unsigned long old, \
+ unsigned long new, \
+ int size) \
+{ \
+ switch (size) { \
+ case 1: \
+ return __cmpxchg_case##sfx##_8(ptr, old, new); \
+ case 2: \
+ return __cmpxchg_case##sfx##_16(ptr, old, new); \
+ case 4: \
+ return __cmpxchg_case##sfx##_32(ptr, old, new); \
+ case 8: \
+ return __cmpxchg_case##sfx##_64(ptr, old, new); \
+ default: \
+ BUILD_BUG(); \
+ } \
+ \
+ unreachable(); \
+}
+
+__CMPXCHG_GEN()
+__CMPXCHG_GEN(_acq)
+__CMPXCHG_GEN(_rel)
+__CMPXCHG_GEN(_mb)
+
+#undef __CMPXCHG_GEN
+
+#define __cmpxchg_wrapper(sfx, ptr, o, n) \
+({ \
+ __typeof__(*(ptr)) __ret; \
+ __ret = (__typeof__(*(ptr))) \
+ __cmpxchg##sfx((ptr), (unsigned long)(o), \
+ (unsigned long)(n), sizeof(*(ptr))); \
+ __ret; \
+})
+
+/* cmpxchg */
+#define arch_cmpxchg_relaxed(...) __cmpxchg_wrapper( , __VA_ARGS__)
+#define arch_cmpxchg_acquire(...) __cmpxchg_wrapper(_acq, __VA_ARGS__)
+#define arch_cmpxchg_release(...) __cmpxchg_wrapper(_rel, __VA_ARGS__)
+#define arch_cmpxchg(...) __cmpxchg_wrapper( _mb, __VA_ARGS__)
+#define arch_cmpxchg_local arch_cmpxchg_relaxed
+
+/* cmpxchg64 */
+#define arch_cmpxchg64_relaxed arch_cmpxchg_relaxed
+#define arch_cmpxchg64_acquire arch_cmpxchg_acquire
+#define arch_cmpxchg64_release arch_cmpxchg_release
+#define arch_cmpxchg64 arch_cmpxchg
+#define arch_cmpxchg64_local arch_cmpxchg_local
+
+/* cmpxchg128 */
+#define system_has_cmpxchg128() 1
+
+#define arch_cmpxchg128(ptr, o, n) \
+({ \
+ __cmpxchg128_mb((ptr), (o), (n)); \
+})
+
+#define arch_cmpxchg128_local(ptr, o, n) \
+({ \
+ __cmpxchg128((ptr), (o), (n)); \
+})
+
+#define __CMPWAIT_CASE(w, sfx, sz) \
+static inline void __cmpwait_case_##sz(volatile void *ptr, \
+ unsigned long val) \
+{ \
+ unsigned long tmp; \
+ \
+ asm volatile( \
+ " sevl\n" \
+ " wfe\n" \
+ " ldxr" #sfx "\t%" #w "[tmp], %[v]\n" \
+ " eor %" #w "[tmp], %" #w "[tmp], %" #w "[val]\n" \
+ " cbnz %" #w "[tmp], 1f\n" \
+ " wfe\n" \
+ "1:" \
+ : [tmp] "=&r" (tmp), [v] "+Q" (*(u##sz *)ptr) \
+ : [val] "r" (val)); \
}
-static inline unsigned long __cmpxchg_mb(volatile void *ptr, unsigned long old,
- unsigned long new, int size)
-{
- unsigned long ret;
+__CMPWAIT_CASE(w, b, 8);
+__CMPWAIT_CASE(w, h, 16);
+__CMPWAIT_CASE(w, , 32);
+__CMPWAIT_CASE( , , 64);
- smp_mb();
- ret = __cmpxchg(ptr, old, new, size);
- smp_mb();
+#undef __CMPWAIT_CASE
- return ret;
+#define __CMPWAIT_GEN(sfx) \
+static __always_inline void __cmpwait##sfx(volatile void *ptr, \
+ unsigned long val, \
+ int size) \
+{ \
+ switch (size) { \
+ case 1: \
+ return __cmpwait_case##sfx##_8(ptr, (u8)val); \
+ case 2: \
+ return __cmpwait_case##sfx##_16(ptr, (u16)val); \
+ case 4: \
+ return __cmpwait_case##sfx##_32(ptr, val); \
+ case 8: \
+ return __cmpwait_case##sfx##_64(ptr, val); \
+ default: \
+ BUILD_BUG(); \
+ } \
+ \
+ unreachable(); \
}
-#define cmpxchg(ptr,o,n) \
- ((__typeof__(*(ptr)))__cmpxchg_mb((ptr), \
- (unsigned long)(o), \
- (unsigned long)(n), \
- sizeof(*(ptr))))
+__CMPWAIT_GEN()
-#define cmpxchg_local(ptr,o,n) \
- ((__typeof__(*(ptr)))__cmpxchg((ptr), \
- (unsigned long)(o), \
- (unsigned long)(n), \
- sizeof(*(ptr))))
+#undef __CMPWAIT_GEN
-#define cmpxchg64(ptr,o,n) cmpxchg((ptr),(o),(n))
-#define cmpxchg64_local(ptr,o,n) cmpxchg_local((ptr),(o),(n))
+#define __cmpwait_relaxed(ptr, val) \
+ __cmpwait((ptr), (unsigned long)(val), sizeof(*(ptr)))
#endif /* __ASM_CMPXCHG_H */
diff --git a/arch/arm64/include/asm/compat.h b/arch/arm64/include/asm/compat.h
index 899af807ef0f..ae904a1ad529 100644
--- a/arch/arm64/include/asm/compat.h
+++ b/arch/arm64/include/asm/compat.h
@@ -1,21 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (C) 2012 ARM Ltd.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __ASM_COMPAT_H
#define __ASM_COMPAT_H
-#ifdef __KERNEL__
+
+#define compat_mode_t compat_mode_t
+typedef u16 compat_mode_t;
+
+#define __compat_uid_t __compat_uid_t
+typedef u16 __compat_uid_t;
+typedef u16 __compat_gid_t;
+
+#define compat_ipc_pid_t compat_ipc_pid_t
+typedef u16 compat_ipc_pid_t;
+
+#define compat_statfs compat_statfs
+
+#include <asm-generic/compat.h>
+
#ifdef CONFIG_COMPAT
/*
@@ -23,95 +26,48 @@
*/
#include <linux/types.h>
#include <linux/sched.h>
-#include <linux/ptrace.h>
+#include <linux/sched/task_stack.h>
-#define COMPAT_USER_HZ 100
+#ifdef __AARCH64EB__
+#define COMPAT_UTS_MACHINE "armv8b\0\0"
+#else
#define COMPAT_UTS_MACHINE "armv8l\0\0"
+#endif
-typedef u32 compat_size_t;
-typedef s32 compat_ssize_t;
-typedef s32 compat_time_t;
-typedef s32 compat_clock_t;
-typedef s32 compat_pid_t;
-typedef u32 __compat_uid_t;
-typedef u32 __compat_gid_t;
typedef u16 __compat_uid16_t;
typedef u16 __compat_gid16_t;
-typedef u32 __compat_uid32_t;
-typedef u32 __compat_gid32_t;
-typedef u16 compat_mode_t;
-typedef u32 compat_ino_t;
-typedef u32 compat_dev_t;
-typedef s32 compat_off_t;
-typedef s64 compat_loff_t;
typedef s32 compat_nlink_t;
-typedef u16 compat_ipc_pid_t;
-typedef s32 compat_daddr_t;
-typedef u32 compat_caddr_t;
-typedef __kernel_fsid_t compat_fsid_t;
-typedef s32 compat_key_t;
-typedef s32 compat_timer_t;
-
-typedef s16 compat_short_t;
-typedef s32 compat_int_t;
-typedef s32 compat_long_t;
-typedef s64 compat_s64;
-typedef u16 compat_ushort_t;
-typedef u32 compat_uint_t;
-typedef u32 compat_ulong_t;
-typedef u64 compat_u64;
-typedef u32 compat_uptr_t;
-
-struct compat_timespec {
- compat_time_t tv_sec;
- s32 tv_nsec;
-};
-
-struct compat_timeval {
- compat_time_t tv_sec;
- s32 tv_usec;
-};
struct compat_stat {
+#ifdef __AARCH64EB__
+ short st_dev;
+ short __pad1;
+#else
compat_dev_t st_dev;
+#endif
compat_ino_t st_ino;
compat_mode_t st_mode;
compat_ushort_t st_nlink;
__compat_uid16_t st_uid;
__compat_gid16_t st_gid;
+#ifdef __AARCH64EB__
+ short st_rdev;
+ short __pad2;
+#else
compat_dev_t st_rdev;
+#endif
compat_off_t st_size;
compat_off_t st_blksize;
compat_off_t st_blocks;
- compat_time_t st_atime;
+ old_time32_t st_atime;
compat_ulong_t st_atime_nsec;
- compat_time_t st_mtime;
+ old_time32_t st_mtime;
compat_ulong_t st_mtime_nsec;
- compat_time_t st_ctime;
+ old_time32_t st_ctime;
compat_ulong_t st_ctime_nsec;
compat_ulong_t __unused4[2];
};
-struct compat_flock {
- short l_type;
- short l_whence;
- compat_off_t l_start;
- compat_off_t l_len;
- compat_pid_t l_pid;
-};
-
-#define F_GETLK64 12 /* using 'struct flock64' */
-#define F_SETLK64 13
-#define F_SETLKW64 14
-
-struct compat_flock64 {
- short l_type;
- short l_whence;
- compat_loff_t l_start;
- compat_loff_t l_len;
- compat_pid_t l_pid;
-};
-
struct compat_statfs {
int f_type;
int f_bsize;
@@ -127,157 +83,8 @@ struct compat_statfs {
int f_spare[4];
};
-#define COMPAT_RLIM_INFINITY 0xffffffff
-
-typedef u32 compat_old_sigset_t;
-
-#define _COMPAT_NSIG 64
-#define _COMPAT_NSIG_BPW 32
-
-typedef u32 compat_sigset_word;
-
-typedef union compat_sigval {
- compat_int_t sival_int;
- compat_uptr_t sival_ptr;
-} compat_sigval_t;
-
-typedef struct compat_siginfo {
- int si_signo;
- int si_errno;
- int si_code;
-
- union {
- /* The padding is the same size as AArch64. */
- int _pad[128/sizeof(int) - 3];
-
- /* kill() */
- struct {
- compat_pid_t _pid; /* sender's pid */
- __compat_uid32_t _uid; /* sender's uid */
- } _kill;
-
- /* POSIX.1b timers */
- struct {
- compat_timer_t _tid; /* timer id */
- int _overrun; /* overrun count */
- compat_sigval_t _sigval; /* same as below */
- int _sys_private; /* not to be passed to user */
- } _timer;
-
- /* POSIX.1b signals */
- struct {
- compat_pid_t _pid; /* sender's pid */
- __compat_uid32_t _uid; /* sender's uid */
- compat_sigval_t _sigval;
- } _rt;
-
- /* SIGCHLD */
- struct {
- compat_pid_t _pid; /* which child */
- __compat_uid32_t _uid; /* sender's uid */
- int _status; /* exit code */
- compat_clock_t _utime;
- compat_clock_t _stime;
- } _sigchld;
-
- /* SIGILL, SIGFPE, SIGSEGV, SIGBUS */
- struct {
- compat_uptr_t _addr; /* faulting insn/memory ref. */
- short _addr_lsb; /* LSB of the reported address */
- } _sigfault;
-
- /* SIGPOLL */
- struct {
- compat_long_t _band; /* POLL_IN, POLL_OUT, POLL_MSG */
- int _fd;
- } _sigpoll;
- } _sifields;
-} compat_siginfo_t;
-
-#define COMPAT_OFF_T_MAX 0x7fffffff
-#define COMPAT_LOFF_T_MAX 0x7fffffffffffffffL
-
-/*
- * A pointer passed in from user mode. This should not
- * be used for syscall parameters, just declare them
- * as pointers because the syscall entry code will have
- * appropriately converted them already.
- */
-
-static inline void __user *compat_ptr(compat_uptr_t uptr)
-{
- return (void __user *)(unsigned long)uptr;
-}
-
-static inline compat_uptr_t ptr_to_compat(void __user *uptr)
-{
- return (u32)(unsigned long)uptr;
-}
-
-#define compat_user_stack_pointer() (current_pt_regs()->compat_sp)
-
-static inline void __user *arch_compat_alloc_user_space(long len)
-{
- return (void __user *)compat_user_stack_pointer() - len;
-}
-
-struct compat_ipc64_perm {
- compat_key_t key;
- __compat_uid32_t uid;
- __compat_gid32_t gid;
- __compat_uid32_t cuid;
- __compat_gid32_t cgid;
- unsigned short mode;
- unsigned short __pad1;
- unsigned short seq;
- unsigned short __pad2;
- compat_ulong_t unused1;
- compat_ulong_t unused2;
-};
-
-struct compat_semid64_ds {
- struct compat_ipc64_perm sem_perm;
- compat_time_t sem_otime;
- compat_ulong_t __unused1;
- compat_time_t sem_ctime;
- compat_ulong_t __unused2;
- compat_ulong_t sem_nsems;
- compat_ulong_t __unused3;
- compat_ulong_t __unused4;
-};
-
-struct compat_msqid64_ds {
- struct compat_ipc64_perm msg_perm;
- compat_time_t msg_stime;
- compat_ulong_t __unused1;
- compat_time_t msg_rtime;
- compat_ulong_t __unused2;
- compat_time_t msg_ctime;
- compat_ulong_t __unused3;
- compat_ulong_t msg_cbytes;
- compat_ulong_t msg_qnum;
- compat_ulong_t msg_qbytes;
- compat_pid_t msg_lspid;
- compat_pid_t msg_lrpid;
- compat_ulong_t __unused4;
- compat_ulong_t __unused5;
-};
-
-struct compat_shmid64_ds {
- struct compat_ipc64_perm shm_perm;
- compat_size_t shm_segsz;
- compat_time_t shm_atime;
- compat_ulong_t __unused1;
- compat_time_t shm_dtime;
- compat_ulong_t __unused2;
- compat_time_t shm_ctime;
- compat_ulong_t __unused3;
- compat_pid_t shm_cpid;
- compat_pid_t shm_lpid;
- compat_ulong_t shm_nattch;
- compat_ulong_t __unused4;
- compat_ulong_t __unused5;
-};
+#define compat_user_stack_pointer() (user_stack_pointer(task_pt_regs(current)))
+#define COMPAT_MINSIGSTKSZ 2048
static inline int is_compat_task(void)
{
@@ -289,12 +96,9 @@ static inline int is_compat_thread(struct thread_info *thread)
return test_ti_thread_flag(thread, TIF_32BIT);
}
-#else /* !CONFIG_COMPAT */
+long compat_arm_syscall(struct pt_regs *regs, int scno);
-static inline int is_compat_task(void)
-{
- return 0;
-}
+#else /* !CONFIG_COMPAT */
static inline int is_compat_thread(struct thread_info *thread)
{
@@ -302,5 +106,4 @@ static inline int is_compat_thread(struct thread_info *thread)
}
#endif /* CONFIG_COMPAT */
-#endif /* __KERNEL__ */
#endif /* __ASM_COMPAT_H */
diff --git a/arch/arm64/include/asm/compiler.h b/arch/arm64/include/asm/compiler.h
index ee35fd0f2236..9bbd7b7097ff 100644
--- a/arch/arm64/include/asm/compiler.h
+++ b/arch/arm64/include/asm/compiler.h
@@ -1,30 +1,40 @@
-/*
- * Based on arch/arm/include/asm/compiler.h
- *
- * Copyright (C) 2012 ARM Ltd.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __ASM_COMPILER_H
#define __ASM_COMPILER_H
-/*
- * This is used to ensure the compiler did actually allocate the register we
- * asked it for some inline assembly sequences. Apparently we can't trust the
- * compiler from one version to another so a bit of paranoia won't hurt. This
- * string is meant to be concatenated with the inline asm string and will
- * cause compilation to stop on mismatch. (for details, see gcc PR 15089)
- */
-#define __asmeq(x, y) ".ifnc " x "," y " ; .err ; .endif\n\t"
+#ifdef ARM64_ASM_ARCH
+#define ARM64_ASM_PREAMBLE ".arch " ARM64_ASM_ARCH "\n"
+#else
+#define ARM64_ASM_PREAMBLE
+#endif
-#endif /* __ASM_COMPILER_H */
+#define xpaclri(ptr) \
+({ \
+ register unsigned long __xpaclri_ptr asm("x30") = (ptr); \
+ \
+ asm( \
+ ARM64_ASM_PREAMBLE \
+ " hint #7\n" \
+ : "+r" (__xpaclri_ptr)); \
+ \
+ __xpaclri_ptr; \
+})
+
+#ifdef CONFIG_ARM64_PTR_AUTH_KERNEL
+#define ptrauth_strip_kernel_insn_pac(ptr) xpaclri(ptr)
+#else
+#define ptrauth_strip_kernel_insn_pac(ptr) (ptr)
+#endif
+
+#ifdef CONFIG_ARM64_PTR_AUTH
+#define ptrauth_strip_user_insn_pac(ptr) xpaclri(ptr)
+#else
+#define ptrauth_strip_user_insn_pac(ptr) (ptr)
+#endif
+
+#if !defined(CONFIG_BUILTIN_RETURN_ADDRESS_STRIPS_PAC)
+#define __builtin_return_address(val) \
+ (void *)(ptrauth_strip_kernel_insn_pac((unsigned long)__builtin_return_address(val)))
+#endif
+
+#endif /* __ASM_COMPILER_H */
diff --git a/arch/arm64/include/asm/cpu.h b/arch/arm64/include/asm/cpu.h
new file mode 100644
index 000000000000..71493b760b83
--- /dev/null
+++ b/arch/arm64/include/asm/cpu.h
@@ -0,0 +1,82 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2014 ARM Ltd.
+ */
+#ifndef __ASM_CPU_H
+#define __ASM_CPU_H
+
+#include <linux/cpu.h>
+#include <linux/init.h>
+#include <linux/percpu.h>
+
+/*
+ * Records attributes of an individual CPU.
+ */
+struct cpuinfo_32bit {
+ u32 reg_id_dfr0;
+ u32 reg_id_dfr1;
+ u32 reg_id_isar0;
+ u32 reg_id_isar1;
+ u32 reg_id_isar2;
+ u32 reg_id_isar3;
+ u32 reg_id_isar4;
+ u32 reg_id_isar5;
+ u32 reg_id_isar6;
+ u32 reg_id_mmfr0;
+ u32 reg_id_mmfr1;
+ u32 reg_id_mmfr2;
+ u32 reg_id_mmfr3;
+ u32 reg_id_mmfr4;
+ u32 reg_id_mmfr5;
+ u32 reg_id_pfr0;
+ u32 reg_id_pfr1;
+ u32 reg_id_pfr2;
+
+ u32 reg_mvfr0;
+ u32 reg_mvfr1;
+ u32 reg_mvfr2;
+};
+
+struct cpuinfo_arm64 {
+ struct kobject kobj;
+ u64 reg_ctr;
+ u64 reg_cntfrq;
+ u64 reg_dczid;
+ u64 reg_midr;
+ u64 reg_revidr;
+ u64 reg_aidr;
+ u64 reg_gmid;
+ u64 reg_smidr;
+ u64 reg_mpamidr;
+
+ u64 reg_id_aa64dfr0;
+ u64 reg_id_aa64dfr1;
+ u64 reg_id_aa64isar0;
+ u64 reg_id_aa64isar1;
+ u64 reg_id_aa64isar2;
+ u64 reg_id_aa64isar3;
+ u64 reg_id_aa64mmfr0;
+ u64 reg_id_aa64mmfr1;
+ u64 reg_id_aa64mmfr2;
+ u64 reg_id_aa64mmfr3;
+ u64 reg_id_aa64mmfr4;
+ u64 reg_id_aa64pfr0;
+ u64 reg_id_aa64pfr1;
+ u64 reg_id_aa64pfr2;
+ u64 reg_id_aa64zfr0;
+ u64 reg_id_aa64smfr0;
+ u64 reg_id_aa64fpfr0;
+
+ struct cpuinfo_32bit aarch32;
+};
+
+DECLARE_PER_CPU(struct cpuinfo_arm64, cpu_data);
+
+void cpuinfo_store_cpu(void);
+void __init cpuinfo_store_boot_cpu(void);
+
+void __init init_cpu_features(struct cpuinfo_arm64 *info);
+void update_cpu_features(int cpu, struct cpuinfo_arm64 *info,
+ struct cpuinfo_arm64 *boot);
+
+#endif /* __ASM_CPU_H */
diff --git a/arch/arm64/include/asm/cpu_ops.h b/arch/arm64/include/asm/cpu_ops.h
new file mode 100644
index 000000000000..a444c8915e88
--- /dev/null
+++ b/arch/arm64/include/asm/cpu_ops.h
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2013 ARM Ltd.
+ */
+#ifndef __ASM_CPU_OPS_H
+#define __ASM_CPU_OPS_H
+
+#include <linux/init.h>
+#include <linux/threads.h>
+
+/**
+ * struct cpu_operations - Callback operations for hotplugging CPUs.
+ *
+ * @name: Name of the property as appears in a devicetree cpu node's
+ * enable-method property. On systems booting with ACPI, @name
+ * identifies the struct cpu_operations entry corresponding to
+ * the boot protocol specified in the ACPI MADT table.
+ * @cpu_init: Reads any data necessary for a specific enable-method for a
+ * proposed logical id.
+ * @cpu_prepare: Early one-time preparation step for a cpu. If there is a
+ * mechanism for doing so, tests whether it is possible to boot
+ * the given CPU.
+ * @cpu_boot: Boots a cpu into the kernel.
+ * @cpu_postboot: Optionally, perform any post-boot cleanup or necessary
+ * synchronisation. Called from the cpu being booted.
+ * @cpu_can_disable: Determines whether a CPU can be disabled based on
+ * mechanism-specific information.
+ * @cpu_disable: Prepares a cpu to die. May fail for some mechanism-specific
+ * reason, which will cause the hot unplug to be aborted. Called
+ * from the cpu to be killed.
+ * @cpu_die: Makes a cpu leave the kernel. Must not fail. Called from the
+ * cpu being killed.
+ * @cpu_kill: Ensures a cpu has left the kernel. Called from another cpu.
+ */
+struct cpu_operations {
+ const char *name;
+ int (*cpu_init)(unsigned int);
+ int (*cpu_prepare)(unsigned int);
+ int (*cpu_boot)(unsigned int);
+ void (*cpu_postboot)(void);
+#ifdef CONFIG_HOTPLUG_CPU
+ bool (*cpu_can_disable)(unsigned int cpu);
+ int (*cpu_disable)(unsigned int cpu);
+ void (*cpu_die)(unsigned int cpu);
+ int (*cpu_kill)(unsigned int cpu);
+#endif
+};
+
+int __init init_cpu_ops(int cpu);
+extern const struct cpu_operations *get_cpu_ops(int cpu);
+
+static inline void __init init_bootcpu_ops(void)
+{
+ init_cpu_ops(0);
+}
+
+#endif /* ifndef __ASM_CPU_OPS_H */
diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h
new file mode 100644
index 000000000000..2c8029472ad4
--- /dev/null
+++ b/arch/arm64/include/asm/cpucaps.h
@@ -0,0 +1,82 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef __ASM_CPUCAPS_H
+#define __ASM_CPUCAPS_H
+
+#include <asm/cpucap-defs.h>
+
+#ifndef __ASSEMBLER__
+#include <linux/types.h>
+/*
+ * Check whether a cpucap is possible at compiletime.
+ */
+static __always_inline bool
+cpucap_is_possible(const unsigned int cap)
+{
+ compiletime_assert(__builtin_constant_p(cap),
+ "cap must be a constant");
+ compiletime_assert(cap < ARM64_NCAPS,
+ "cap must be < ARM64_NCAPS");
+
+ switch (cap) {
+ case ARM64_HAS_PAN:
+ return IS_ENABLED(CONFIG_ARM64_PAN);
+ case ARM64_HAS_EPAN:
+ return IS_ENABLED(CONFIG_ARM64_EPAN);
+ case ARM64_SVE:
+ return IS_ENABLED(CONFIG_ARM64_SVE);
+ case ARM64_SME:
+ case ARM64_SME2:
+ case ARM64_SME_FA64:
+ return IS_ENABLED(CONFIG_ARM64_SME);
+ case ARM64_HAS_CNP:
+ return IS_ENABLED(CONFIG_ARM64_CNP);
+ case ARM64_HAS_ADDRESS_AUTH:
+ case ARM64_HAS_GENERIC_AUTH:
+ return IS_ENABLED(CONFIG_ARM64_PTR_AUTH);
+ case ARM64_HAS_GIC_PRIO_MASKING:
+ return IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI);
+ case ARM64_MTE:
+ return IS_ENABLED(CONFIG_ARM64_MTE);
+ case ARM64_BTI:
+ return IS_ENABLED(CONFIG_ARM64_BTI);
+ case ARM64_HAS_TLB_RANGE:
+ return IS_ENABLED(CONFIG_ARM64_TLB_RANGE);
+ case ARM64_HAS_S1POE:
+ return IS_ENABLED(CONFIG_ARM64_POE);
+ case ARM64_HAS_GCS:
+ return IS_ENABLED(CONFIG_ARM64_GCS);
+ case ARM64_HAFT:
+ return IS_ENABLED(CONFIG_ARM64_HAFT);
+ case ARM64_UNMAP_KERNEL_AT_EL0:
+ return IS_ENABLED(CONFIG_UNMAP_KERNEL_AT_EL0);
+ case ARM64_WORKAROUND_843419:
+ return IS_ENABLED(CONFIG_ARM64_ERRATUM_843419);
+ case ARM64_WORKAROUND_1742098:
+ return IS_ENABLED(CONFIG_ARM64_ERRATUM_1742098);
+ case ARM64_WORKAROUND_2645198:
+ return IS_ENABLED(CONFIG_ARM64_ERRATUM_2645198);
+ case ARM64_WORKAROUND_2658417:
+ return IS_ENABLED(CONFIG_ARM64_ERRATUM_2658417);
+ case ARM64_WORKAROUND_CAVIUM_23154:
+ return IS_ENABLED(CONFIG_CAVIUM_ERRATUM_23154);
+ case ARM64_WORKAROUND_NVIDIA_CARMEL_CNP:
+ return IS_ENABLED(CONFIG_NVIDIA_CARMEL_CNP_ERRATUM);
+ case ARM64_WORKAROUND_REPEAT_TLBI:
+ return IS_ENABLED(CONFIG_ARM64_WORKAROUND_REPEAT_TLBI);
+ case ARM64_WORKAROUND_SPECULATIVE_SSBS:
+ return IS_ENABLED(CONFIG_ARM64_ERRATUM_3194386);
+ case ARM64_MPAM:
+ /*
+ * KVM MPAM support doesn't rely on the host kernel supporting MPAM.
+ */
+ return true;
+ case ARM64_HAS_PMUV3:
+ return IS_ENABLED(CONFIG_HW_PERF_EVENTS);
+ }
+
+ return true;
+}
+#endif /* __ASSEMBLER__ */
+
+#endif /* __ASM_CPUCAPS_H */
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
new file mode 100644
index 000000000000..4de51f8d92cb
--- /dev/null
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -0,0 +1,1083 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2014 Linaro Ltd. <ard.biesheuvel@linaro.org>
+ */
+
+#ifndef __ASM_CPUFEATURE_H
+#define __ASM_CPUFEATURE_H
+
+#include <asm/alternative-macros.h>
+#include <asm/cpucaps.h>
+#include <asm/cputype.h>
+#include <asm/hwcap.h>
+#include <asm/sysreg.h>
+
+#define MAX_CPU_FEATURES 192
+#define cpu_feature(x) KERNEL_HWCAP_ ## x
+
+#define ARM64_SW_FEATURE_OVERRIDE_NOKASLR 0
+#define ARM64_SW_FEATURE_OVERRIDE_HVHE 4
+#define ARM64_SW_FEATURE_OVERRIDE_RODATA_OFF 8
+
+#ifndef __ASSEMBLER__
+
+#include <linux/bug.h>
+#include <linux/jump_label.h>
+#include <linux/kernel.h>
+#include <linux/cpumask.h>
+
+/*
+ * CPU feature register tracking
+ *
+ * The safe value of a CPUID feature field is dependent on the implications
+ * of the values assigned to it by the architecture. Based on the relationship
+ * between the values, the features are classified into 3 types - LOWER_SAFE,
+ * HIGHER_SAFE and EXACT.
+ *
+ * The lowest value of all the CPUs is chosen for LOWER_SAFE and highest
+ * for HIGHER_SAFE. It is expected that all CPUs have the same value for
+ * a field when EXACT is specified, failing which, the safe value specified
+ * in the table is chosen.
+ */
+
+enum ftr_type {
+ FTR_EXACT, /* Use a predefined safe value */
+ FTR_LOWER_SAFE, /* Smaller value is safe */
+ FTR_HIGHER_SAFE, /* Bigger value is safe */
+ FTR_HIGHER_OR_ZERO_SAFE, /* Bigger value is safe, but 0 is biggest */
+};
+
+#define FTR_STRICT true /* SANITY check strict matching required */
+#define FTR_NONSTRICT false /* SANITY check ignored */
+
+#define FTR_SIGNED true /* Value should be treated as signed */
+#define FTR_UNSIGNED false /* Value should be treated as unsigned */
+
+#define FTR_VISIBLE true /* Feature visible to the user space */
+#define FTR_HIDDEN false /* Feature is hidden from the user */
+
+#define FTR_VISIBLE_IF_IS_ENABLED(config) \
+ (IS_ENABLED(config) ? FTR_VISIBLE : FTR_HIDDEN)
+
+struct arm64_ftr_bits {
+ bool sign; /* Value is signed ? */
+ bool visible;
+ bool strict; /* CPU Sanity check: strict matching required ? */
+ enum ftr_type type;
+ u8 shift;
+ u8 width;
+ s64 safe_val; /* safe value for FTR_EXACT features */
+};
+
+/*
+ * Describe the early feature override to the core override code:
+ *
+ * @val Values that are to be merged into the final
+ * sanitised value of the register. Only the bitfields
+ * set to 1 in @mask are valid
+ * @mask Mask of the features that are overridden by @val
+ *
+ * A @mask field set to full-1 indicates that the corresponding field
+ * in @val is a valid override.
+ *
+ * A @mask field set to full-0 with the corresponding @val field set
+ * to full-0 denotes that this field has no override
+ *
+ * A @mask field set to full-0 with the corresponding @val field set
+ * to full-1 denotes that this field has an invalid override.
+ */
+struct arm64_ftr_override {
+ u64 val;
+ u64 mask;
+};
+
+/*
+ * @arm64_ftr_reg - Feature register
+ * @strict_mask Bits which should match across all CPUs for sanity.
+ * @sys_val Safe value across the CPUs (system view)
+ */
+struct arm64_ftr_reg {
+ const char *name;
+ u64 strict_mask;
+ u64 user_mask;
+ u64 sys_val;
+ u64 user_val;
+ struct arm64_ftr_override *override;
+ const struct arm64_ftr_bits *ftr_bits;
+};
+
+extern struct arm64_ftr_reg arm64_ftr_reg_ctrel0;
+
+/*
+ * CPU capabilities:
+ *
+ * We use arm64_cpu_capabilities to represent system features, errata work
+ * arounds (both used internally by kernel and tracked in system_cpucaps) and
+ * ELF HWCAPs (which are exposed to user).
+ *
+ * To support systems with heterogeneous CPUs, we need to make sure that we
+ * detect the capabilities correctly on the system and take appropriate
+ * measures to ensure there are no incompatibilities.
+ *
+ * This comment tries to explain how we treat the capabilities.
+ * Each capability has the following list of attributes :
+ *
+ * 1) Scope of Detection : The system detects a given capability by
+ * performing some checks at runtime. This could be, e.g, checking the
+ * value of a field in CPU ID feature register or checking the cpu
+ * model. The capability provides a call back ( @matches() ) to
+ * perform the check. Scope defines how the checks should be performed.
+ * There are three cases:
+ *
+ * a) SCOPE_LOCAL_CPU: check all the CPUs and "detect" if at least one
+ * matches. This implies, we have to run the check on all the
+ * booting CPUs, until the system decides that state of the
+ * capability is finalised. (See section 2 below)
+ * Or
+ * b) SCOPE_SYSTEM: check all the CPUs and "detect" if all the CPUs
+ * matches. This implies, we run the check only once, when the
+ * system decides to finalise the state of the capability. If the
+ * capability relies on a field in one of the CPU ID feature
+ * registers, we use the sanitised value of the register from the
+ * CPU feature infrastructure to make the decision.
+ * Or
+ * c) SCOPE_BOOT_CPU: Check only on the primary boot CPU to detect the
+ * feature. This category is for features that are "finalised"
+ * (or used) by the kernel very early even before the SMP cpus
+ * are brought up.
+ *
+ * The process of detection is usually denoted by "update" capability
+ * state in the code.
+ *
+ * 2) Finalise the state : The kernel should finalise the state of a
+ * capability at some point during its execution and take necessary
+ * actions if any. Usually, this is done, after all the boot-time
+ * enabled CPUs are brought up by the kernel, so that it can make
+ * better decision based on the available set of CPUs. However, there
+ * are some special cases, where the action is taken during the early
+ * boot by the primary boot CPU. (e.g, running the kernel at EL2 with
+ * Virtualisation Host Extensions). The kernel usually disallows any
+ * changes to the state of a capability once it finalises the capability
+ * and takes any action, as it may be impossible to execute the actions
+ * safely. A CPU brought up after a capability is "finalised" is
+ * referred to as "Late CPU" w.r.t the capability. e.g, all secondary
+ * CPUs are treated "late CPUs" for capabilities determined by the boot
+ * CPU.
+ *
+ * At the moment there are two passes of finalising the capabilities.
+ * a) Boot CPU scope capabilities - Finalised by primary boot CPU via
+ * setup_boot_cpu_capabilities().
+ * b) Everything except (a) - Run via setup_system_capabilities().
+ *
+ * 3) Verification: When a CPU is brought online (e.g, by user or by the
+ * kernel), the kernel should make sure that it is safe to use the CPU,
+ * by verifying that the CPU is compliant with the state of the
+ * capabilities finalised already. This happens via :
+ *
+ * secondary_start_kernel()-> check_local_cpu_capabilities()
+ *
+ * As explained in (2) above, capabilities could be finalised at
+ * different points in the execution. Each newly booted CPU is verified
+ * against the capabilities that have been finalised by the time it
+ * boots.
+ *
+ * a) SCOPE_BOOT_CPU : All CPUs are verified against the capability
+ * except for the primary boot CPU.
+ *
+ * b) SCOPE_LOCAL_CPU, SCOPE_SYSTEM: All CPUs hotplugged on by the
+ * user after the kernel boot are verified against the capability.
+ *
+ * If there is a conflict, the kernel takes an action, based on the
+ * severity (e.g, a CPU could be prevented from booting or cause a
+ * kernel panic). The CPU is allowed to "affect" the state of the
+ * capability, if it has not been finalised already. See section 5
+ * for more details on conflicts.
+ *
+ * 4) Action: As mentioned in (2), the kernel can take an action for each
+ * detected capability, on all CPUs on the system. Appropriate actions
+ * include, turning on an architectural feature, modifying the control
+ * registers (e.g, SCTLR, TCR etc.) or patching the kernel via
+ * alternatives. The kernel patching is batched and performed at later
+ * point. The actions are always initiated only after the capability
+ * is finalised. This is usually denoted by "enabling" the capability.
+ * The actions are initiated as follows :
+ * a) Action is triggered on all online CPUs, after the capability is
+ * finalised, invoked within the stop_machine() context from
+ * enable_cpu_capabilitie().
+ *
+ * b) Any late CPU, brought up after (1), the action is triggered via:
+ *
+ * check_local_cpu_capabilities() -> verify_local_cpu_capabilities()
+ *
+ * 5) Conflicts: Based on the state of the capability on a late CPU vs.
+ * the system state, we could have the following combinations :
+ *
+ * x-----------------------------x
+ * | Type | System | Late CPU |
+ * |-----------------------------|
+ * | a | y | n |
+ * |-----------------------------|
+ * | b | n | y |
+ * x-----------------------------x
+ *
+ * Two separate flag bits are defined to indicate whether each kind of
+ * conflict can be allowed:
+ * ARM64_CPUCAP_OPTIONAL_FOR_LATE_CPU - Case(a) is allowed
+ * ARM64_CPUCAP_PERMITTED_FOR_LATE_CPU - Case(b) is allowed
+ *
+ * Case (a) is not permitted for a capability that the system requires
+ * all CPUs to have in order for the capability to be enabled. This is
+ * typical for capabilities that represent enhanced functionality.
+ *
+ * Case (b) is not permitted for a capability that must be enabled
+ * during boot if any CPU in the system requires it in order to run
+ * safely. This is typical for erratum work arounds that cannot be
+ * enabled after the corresponding capability is finalised.
+ *
+ * In some non-typical cases either both (a) and (b), or neither,
+ * should be permitted. This can be described by including neither
+ * or both flags in the capability's type field.
+ *
+ * In case of a conflict, the CPU is prevented from booting. If the
+ * ARM64_CPUCAP_PANIC_ON_CONFLICT flag is specified for the capability,
+ * then a kernel panic is triggered.
+ */
+
+
+/*
+ * Decide how the capability is detected.
+ * On any local CPU vs System wide vs the primary boot CPU
+ */
+#define ARM64_CPUCAP_SCOPE_LOCAL_CPU ((u16)BIT(0))
+#define ARM64_CPUCAP_SCOPE_SYSTEM ((u16)BIT(1))
+/*
+ * The capability is detected on the Boot CPU and is used by kernel
+ * during early boot. i.e, the capability should be "detected" and
+ * "enabled" as early as possibly on all booting CPUs.
+ */
+#define ARM64_CPUCAP_SCOPE_BOOT_CPU ((u16)BIT(2))
+#define ARM64_CPUCAP_SCOPE_MASK \
+ (ARM64_CPUCAP_SCOPE_SYSTEM | \
+ ARM64_CPUCAP_SCOPE_LOCAL_CPU | \
+ ARM64_CPUCAP_SCOPE_BOOT_CPU)
+
+#define SCOPE_SYSTEM ARM64_CPUCAP_SCOPE_SYSTEM
+#define SCOPE_LOCAL_CPU ARM64_CPUCAP_SCOPE_LOCAL_CPU
+#define SCOPE_BOOT_CPU ARM64_CPUCAP_SCOPE_BOOT_CPU
+#define SCOPE_ALL ARM64_CPUCAP_SCOPE_MASK
+
+/*
+ * Is it permitted for a late CPU to have this capability when system
+ * hasn't already enabled it ?
+ */
+#define ARM64_CPUCAP_PERMITTED_FOR_LATE_CPU ((u16)BIT(4))
+/* Is it safe for a late CPU to miss this capability when system has it */
+#define ARM64_CPUCAP_OPTIONAL_FOR_LATE_CPU ((u16)BIT(5))
+/* Panic when a conflict is detected */
+#define ARM64_CPUCAP_PANIC_ON_CONFLICT ((u16)BIT(6))
+/*
+ * When paired with SCOPE_LOCAL_CPU, all early CPUs must satisfy the
+ * condition. This is different from SCOPE_SYSTEM where the check is performed
+ * only once at the end of the SMP boot on the sanitised ID registers.
+ * SCOPE_SYSTEM is not suitable for cases where the capability depends on
+ * properties local to a CPU like MIDR_EL1.
+ */
+#define ARM64_CPUCAP_MATCH_ALL_EARLY_CPUS ((u16)BIT(7))
+
+/*
+ * CPU errata workarounds that need to be enabled at boot time if one or
+ * more CPUs in the system requires it. When one of these capabilities
+ * has been enabled, it is safe to allow any CPU to boot that doesn't
+ * require the workaround. However, it is not safe if a "late" CPU
+ * requires a workaround and the system hasn't enabled it already.
+ */
+#define ARM64_CPUCAP_LOCAL_CPU_ERRATUM \
+ (ARM64_CPUCAP_SCOPE_LOCAL_CPU | ARM64_CPUCAP_OPTIONAL_FOR_LATE_CPU)
+/*
+ * CPU feature detected at boot time based on system-wide value of a
+ * feature. It is safe for a late CPU to have this feature even though
+ * the system hasn't enabled it, although the feature will not be used
+ * by Linux in this case. If the system has enabled this feature already,
+ * then every late CPU must have it.
+ */
+#define ARM64_CPUCAP_SYSTEM_FEATURE \
+ (ARM64_CPUCAP_SCOPE_SYSTEM | ARM64_CPUCAP_PERMITTED_FOR_LATE_CPU)
+/*
+ * CPU feature detected at boot time based on feature of one or more CPUs.
+ * All possible conflicts for a late CPU are ignored.
+ * NOTE: this means that a late CPU with the feature will *not* cause the
+ * capability to be advertised by cpus_have_*cap()!
+ */
+#define ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE \
+ (ARM64_CPUCAP_SCOPE_LOCAL_CPU | \
+ ARM64_CPUCAP_OPTIONAL_FOR_LATE_CPU | \
+ ARM64_CPUCAP_PERMITTED_FOR_LATE_CPU)
+/*
+ * CPU feature detected at boot time and present on all early CPUs. Late CPUs
+ * are permitted to have the feature even if it hasn't been enabled, although
+ * the feature will not be used by Linux in this case. If all early CPUs have
+ * the feature, then every late CPU must have it.
+ */
+#define ARM64_CPUCAP_EARLY_LOCAL_CPU_FEATURE \
+ (ARM64_CPUCAP_SCOPE_LOCAL_CPU | \
+ ARM64_CPUCAP_PERMITTED_FOR_LATE_CPU | \
+ ARM64_CPUCAP_MATCH_ALL_EARLY_CPUS)
+
+/*
+ * CPU feature detected at boot time, on one or more CPUs. A late CPU
+ * is not allowed to have the capability when the system doesn't have it.
+ * It is Ok for a late CPU to miss the feature.
+ */
+#define ARM64_CPUCAP_BOOT_RESTRICTED_CPU_LOCAL_FEATURE \
+ (ARM64_CPUCAP_SCOPE_LOCAL_CPU | \
+ ARM64_CPUCAP_OPTIONAL_FOR_LATE_CPU)
+
+/*
+ * CPU feature used early in the boot based on the boot CPU. All secondary
+ * CPUs must match the state of the capability as detected by the boot CPU. In
+ * case of a conflict, a kernel panic is triggered.
+ */
+#define ARM64_CPUCAP_STRICT_BOOT_CPU_FEATURE \
+ (ARM64_CPUCAP_SCOPE_BOOT_CPU | ARM64_CPUCAP_PANIC_ON_CONFLICT)
+
+/*
+ * CPU feature used early in the boot based on the boot CPU. It is safe for a
+ * late CPU to have this feature even though the boot CPU hasn't enabled it,
+ * although the feature will not be used by Linux in this case. If the boot CPU
+ * has enabled this feature already, then every late CPU must have it.
+ */
+#define ARM64_CPUCAP_BOOT_CPU_FEATURE \
+ (ARM64_CPUCAP_SCOPE_BOOT_CPU | ARM64_CPUCAP_PERMITTED_FOR_LATE_CPU)
+
+struct arm64_cpu_capabilities {
+ const char *desc;
+ u16 capability;
+ u16 type;
+ bool (*matches)(const struct arm64_cpu_capabilities *caps, int scope);
+ /*
+ * Take the appropriate actions to configure this capability
+ * for this CPU. If the capability is detected by the kernel
+ * this will be called on all the CPUs in the system,
+ * including the hotplugged CPUs, regardless of whether the
+ * capability is available on that specific CPU. This is
+ * useful for some capabilities (e.g, working around CPU
+ * errata), where all the CPUs must take some action (e.g,
+ * changing system control/configuration). Thus, if an action
+ * is required only if the CPU has the capability, then the
+ * routine must check it before taking any action.
+ */
+ void (*cpu_enable)(const struct arm64_cpu_capabilities *cap);
+ union {
+ struct { /* To be used for erratum handling only */
+ struct midr_range midr_range;
+ const struct arm64_midr_revidr {
+ u32 midr_rv; /* revision/variant */
+ u32 revidr_mask;
+ } * const fixed_revs;
+ };
+
+ const struct midr_range *midr_range_list;
+ struct { /* Feature register checking */
+ u32 sys_reg;
+ u8 field_pos;
+ u8 field_width;
+ u8 min_field_value;
+ u8 max_field_value;
+ u8 hwcap_type;
+ bool sign;
+ unsigned long hwcap;
+ };
+ };
+
+ /*
+ * An optional list of "matches/cpu_enable" pair for the same
+ * "capability" of the same "type" as described by the parent.
+ * Only matches(), cpu_enable() and fields relevant to these
+ * methods are significant in the list. The cpu_enable is
+ * invoked only if the corresponding entry "matches()".
+ * However, if a cpu_enable() method is associated
+ * with multiple matches(), care should be taken that either
+ * the match criteria are mutually exclusive, or that the
+ * method is robust against being called multiple times.
+ */
+ const struct arm64_cpu_capabilities *match_list;
+ const struct cpumask *cpus;
+};
+
+static inline int cpucap_default_scope(const struct arm64_cpu_capabilities *cap)
+{
+ return cap->type & ARM64_CPUCAP_SCOPE_MASK;
+}
+
+static inline bool cpucap_match_all_early_cpus(const struct arm64_cpu_capabilities *cap)
+{
+ return cap->type & ARM64_CPUCAP_MATCH_ALL_EARLY_CPUS;
+}
+
+/*
+ * Generic helper for handling capabilities with multiple (match,enable) pairs
+ * of call backs, sharing the same capability bit.
+ * Iterate over each entry to see if at least one matches.
+ */
+static inline bool
+cpucap_multi_entry_cap_matches(const struct arm64_cpu_capabilities *entry,
+ int scope)
+{
+ const struct arm64_cpu_capabilities *caps;
+
+ for (caps = entry->match_list; caps->matches; caps++)
+ if (caps->matches(caps, scope))
+ return true;
+
+ return false;
+}
+
+static __always_inline bool is_vhe_hyp_code(void)
+{
+ /* Only defined for code run in VHE hyp context */
+ return __is_defined(__KVM_VHE_HYPERVISOR__);
+}
+
+static __always_inline bool is_nvhe_hyp_code(void)
+{
+ /* Only defined for code run in NVHE hyp context */
+ return __is_defined(__KVM_NVHE_HYPERVISOR__);
+}
+
+static __always_inline bool is_hyp_code(void)
+{
+ return is_vhe_hyp_code() || is_nvhe_hyp_code();
+}
+
+extern DECLARE_BITMAP(system_cpucaps, ARM64_NCAPS);
+
+extern DECLARE_BITMAP(boot_cpucaps, ARM64_NCAPS);
+
+#define for_each_available_cap(cap) \
+ for_each_set_bit(cap, system_cpucaps, ARM64_NCAPS)
+
+bool this_cpu_has_cap(unsigned int cap);
+void cpu_set_feature(unsigned int num);
+bool cpu_have_feature(unsigned int num);
+unsigned long cpu_get_elf_hwcap(void);
+unsigned long cpu_get_elf_hwcap2(void);
+unsigned long cpu_get_elf_hwcap3(void);
+
+#define cpu_set_named_feature(name) cpu_set_feature(cpu_feature(name))
+#define cpu_have_named_feature(name) cpu_have_feature(cpu_feature(name))
+
+static __always_inline bool boot_capabilities_finalized(void)
+{
+ return alternative_has_cap_likely(ARM64_ALWAYS_BOOT);
+}
+
+static __always_inline bool system_capabilities_finalized(void)
+{
+ return alternative_has_cap_likely(ARM64_ALWAYS_SYSTEM);
+}
+
+/*
+ * Test for a capability with a runtime check.
+ *
+ * Before the capability is detected, this returns false.
+ */
+static __always_inline bool cpus_have_cap(unsigned int num)
+{
+ if (__builtin_constant_p(num) && !cpucap_is_possible(num))
+ return false;
+ if (num >= ARM64_NCAPS)
+ return false;
+ return arch_test_bit(num, system_cpucaps);
+}
+
+/*
+ * Test for a capability without a runtime check.
+ *
+ * Before boot capabilities are finalized, this will BUG().
+ * After boot capabilities are finalized, this is patched to avoid a runtime
+ * check.
+ *
+ * @num must be a compile-time constant.
+ */
+static __always_inline bool cpus_have_final_boot_cap(int num)
+{
+ if (boot_capabilities_finalized())
+ return alternative_has_cap_unlikely(num);
+ else
+ BUG();
+}
+
+/*
+ * Test for a capability without a runtime check.
+ *
+ * Before system capabilities are finalized, this will BUG().
+ * After system capabilities are finalized, this is patched to avoid a runtime
+ * check.
+ *
+ * @num must be a compile-time constant.
+ */
+static __always_inline bool cpus_have_final_cap(int num)
+{
+ if (system_capabilities_finalized())
+ return alternative_has_cap_unlikely(num);
+ else
+ BUG();
+}
+
+static inline int __attribute_const__
+cpuid_feature_extract_signed_field_width(u64 features, int field, int width)
+{
+ return (s64)(features << (64 - width - field)) >> (64 - width);
+}
+
+static inline int __attribute_const__
+cpuid_feature_extract_signed_field(u64 features, int field)
+{
+ return cpuid_feature_extract_signed_field_width(features, field, 4);
+}
+
+static __always_inline unsigned int __attribute_const__
+cpuid_feature_extract_unsigned_field_width(u64 features, int field, int width)
+{
+ return (u64)(features << (64 - width - field)) >> (64 - width);
+}
+
+static __always_inline unsigned int __attribute_const__
+cpuid_feature_extract_unsigned_field(u64 features, int field)
+{
+ return cpuid_feature_extract_unsigned_field_width(features, field, 4);
+}
+
+static inline u64 arm64_ftr_mask(const struct arm64_ftr_bits *ftrp)
+{
+ return (u64)GENMASK(ftrp->shift + ftrp->width - 1, ftrp->shift);
+}
+
+static inline u64 arm64_ftr_reg_user_value(const struct arm64_ftr_reg *reg)
+{
+ return (reg->user_val | (reg->sys_val & reg->user_mask));
+}
+
+static inline int __attribute_const__
+cpuid_feature_extract_field_width(u64 features, int field, int width, bool sign)
+{
+ if (WARN_ON_ONCE(!width))
+ width = 4;
+ return (sign) ?
+ cpuid_feature_extract_signed_field_width(features, field, width) :
+ cpuid_feature_extract_unsigned_field_width(features, field, width);
+}
+
+static inline int __attribute_const__
+cpuid_feature_extract_field(u64 features, int field, bool sign)
+{
+ return cpuid_feature_extract_field_width(features, field, 4, sign);
+}
+
+static inline s64 arm64_ftr_value(const struct arm64_ftr_bits *ftrp, u64 val)
+{
+ return (s64)cpuid_feature_extract_field_width(val, ftrp->shift, ftrp->width, ftrp->sign);
+}
+
+static inline bool id_aa64mmfr0_mixed_endian_el0(u64 mmfr0)
+{
+ return cpuid_feature_extract_unsigned_field(mmfr0, ID_AA64MMFR0_EL1_BIGEND_SHIFT) == 0x1 ||
+ cpuid_feature_extract_unsigned_field(mmfr0, ID_AA64MMFR0_EL1_BIGENDEL0_SHIFT) == 0x1;
+}
+
+static inline bool id_aa64pfr0_32bit_el1(u64 pfr0)
+{
+ u32 val = cpuid_feature_extract_unsigned_field(pfr0, ID_AA64PFR0_EL1_EL1_SHIFT);
+
+ return val == ID_AA64PFR0_EL1_EL1_AARCH32;
+}
+
+static inline bool id_aa64pfr0_32bit_el0(u64 pfr0)
+{
+ u32 val = cpuid_feature_extract_unsigned_field(pfr0, ID_AA64PFR0_EL1_EL0_SHIFT);
+
+ return val == ID_AA64PFR0_EL1_EL0_AARCH32;
+}
+
+static inline bool id_aa64pfr0_sve(u64 pfr0)
+{
+ u32 val = cpuid_feature_extract_unsigned_field(pfr0, ID_AA64PFR0_EL1_SVE_SHIFT);
+
+ return val > 0;
+}
+
+static inline bool id_aa64pfr1_sme(u64 pfr1)
+{
+ u32 val = cpuid_feature_extract_unsigned_field(pfr1, ID_AA64PFR1_EL1_SME_SHIFT);
+
+ return val > 0;
+}
+
+static inline bool id_aa64pfr0_mpam(u64 pfr0)
+{
+ u32 val = cpuid_feature_extract_unsigned_field(pfr0, ID_AA64PFR0_EL1_MPAM_SHIFT);
+
+ return val > 0;
+}
+
+static inline bool id_aa64pfr1_mte(u64 pfr1)
+{
+ u32 val = cpuid_feature_extract_unsigned_field(pfr1, ID_AA64PFR1_EL1_MTE_SHIFT);
+
+ return val >= ID_AA64PFR1_EL1_MTE_MTE2;
+}
+
+void __init setup_boot_cpu_features(void);
+void __init setup_system_features(void);
+void __init setup_user_features(void);
+
+void check_local_cpu_capabilities(void);
+
+u64 read_sanitised_ftr_reg(u32 id);
+u64 __read_sysreg_by_encoding(u32 sys_id);
+
+static inline bool cpu_supports_mixed_endian_el0(void)
+{
+ return id_aa64mmfr0_mixed_endian_el0(read_cpuid(ID_AA64MMFR0_EL1));
+}
+
+
+static inline bool supports_csv2p3(int scope)
+{
+ u64 pfr0;
+ u8 csv2_val;
+
+ if (scope == SCOPE_LOCAL_CPU)
+ pfr0 = read_sysreg_s(SYS_ID_AA64PFR0_EL1);
+ else
+ pfr0 = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1);
+
+ csv2_val = cpuid_feature_extract_unsigned_field(pfr0,
+ ID_AA64PFR0_EL1_CSV2_SHIFT);
+ return csv2_val == 3;
+}
+
+static inline bool supports_clearbhb(int scope)
+{
+ u64 isar2;
+
+ if (scope == SCOPE_LOCAL_CPU)
+ isar2 = read_sysreg_s(SYS_ID_AA64ISAR2_EL1);
+ else
+ isar2 = read_sanitised_ftr_reg(SYS_ID_AA64ISAR2_EL1);
+
+ return cpuid_feature_extract_unsigned_field(isar2,
+ ID_AA64ISAR2_EL1_CLRBHB_SHIFT);
+}
+
+const struct cpumask *system_32bit_el0_cpumask(void);
+const struct cpumask *fallback_32bit_el0_cpumask(void);
+DECLARE_STATIC_KEY_FALSE(arm64_mismatched_32bit_el0);
+
+static inline bool system_supports_32bit_el0(void)
+{
+ u64 pfr0 = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1);
+
+ return static_branch_unlikely(&arm64_mismatched_32bit_el0) ||
+ id_aa64pfr0_32bit_el0(pfr0);
+}
+
+static inline bool system_supports_4kb_granule(void)
+{
+ u64 mmfr0;
+ u32 val;
+
+ mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
+ val = cpuid_feature_extract_unsigned_field(mmfr0,
+ ID_AA64MMFR0_EL1_TGRAN4_SHIFT);
+
+ return (val >= ID_AA64MMFR0_EL1_TGRAN4_SUPPORTED_MIN) &&
+ (val <= ID_AA64MMFR0_EL1_TGRAN4_SUPPORTED_MAX);
+}
+
+static inline bool system_supports_64kb_granule(void)
+{
+ u64 mmfr0;
+ u32 val;
+
+ mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
+ val = cpuid_feature_extract_unsigned_field(mmfr0,
+ ID_AA64MMFR0_EL1_TGRAN64_SHIFT);
+
+ return (val >= ID_AA64MMFR0_EL1_TGRAN64_SUPPORTED_MIN) &&
+ (val <= ID_AA64MMFR0_EL1_TGRAN64_SUPPORTED_MAX);
+}
+
+static inline bool system_supports_16kb_granule(void)
+{
+ u64 mmfr0;
+ u32 val;
+
+ mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
+ val = cpuid_feature_extract_unsigned_field(mmfr0,
+ ID_AA64MMFR0_EL1_TGRAN16_SHIFT);
+
+ return (val >= ID_AA64MMFR0_EL1_TGRAN16_SUPPORTED_MIN) &&
+ (val <= ID_AA64MMFR0_EL1_TGRAN16_SUPPORTED_MAX);
+}
+
+static inline bool system_supports_mixed_endian_el0(void)
+{
+ return id_aa64mmfr0_mixed_endian_el0(read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1));
+}
+
+static inline bool system_supports_mixed_endian(void)
+{
+ u64 mmfr0;
+ u32 val;
+
+ mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
+ val = cpuid_feature_extract_unsigned_field(mmfr0,
+ ID_AA64MMFR0_EL1_BIGEND_SHIFT);
+
+ return val == 0x1;
+}
+
+static __always_inline bool system_supports_fpsimd(void)
+{
+ return alternative_has_cap_likely(ARM64_HAS_FPSIMD);
+}
+
+static inline bool system_uses_hw_pan(void)
+{
+ return alternative_has_cap_unlikely(ARM64_HAS_PAN);
+}
+
+static inline bool system_uses_ttbr0_pan(void)
+{
+ return IS_ENABLED(CONFIG_ARM64_SW_TTBR0_PAN) &&
+ !system_uses_hw_pan();
+}
+
+static __always_inline bool system_supports_sve(void)
+{
+ return alternative_has_cap_unlikely(ARM64_SVE);
+}
+
+static __always_inline bool system_supports_sme(void)
+{
+ return alternative_has_cap_unlikely(ARM64_SME);
+}
+
+static __always_inline bool system_supports_sme2(void)
+{
+ return alternative_has_cap_unlikely(ARM64_SME2);
+}
+
+static __always_inline bool system_supports_fa64(void)
+{
+ return alternative_has_cap_unlikely(ARM64_SME_FA64);
+}
+
+static __always_inline bool system_supports_tpidr2(void)
+{
+ return system_supports_sme();
+}
+
+static __always_inline bool system_supports_fpmr(void)
+{
+ return alternative_has_cap_unlikely(ARM64_HAS_FPMR);
+}
+
+static __always_inline bool system_supports_cnp(void)
+{
+ return alternative_has_cap_unlikely(ARM64_HAS_CNP);
+}
+
+static inline bool system_supports_address_auth(void)
+{
+ return cpus_have_final_boot_cap(ARM64_HAS_ADDRESS_AUTH);
+}
+
+static inline bool system_supports_generic_auth(void)
+{
+ return alternative_has_cap_unlikely(ARM64_HAS_GENERIC_AUTH);
+}
+
+static inline bool system_has_full_ptr_auth(void)
+{
+ return system_supports_address_auth() && system_supports_generic_auth();
+}
+
+static __always_inline bool system_uses_irq_prio_masking(void)
+{
+ return alternative_has_cap_unlikely(ARM64_HAS_GIC_PRIO_MASKING);
+}
+
+static inline bool system_supports_mte(void)
+{
+ return alternative_has_cap_unlikely(ARM64_MTE);
+}
+
+static inline bool system_has_prio_mask_debugging(void)
+{
+ return IS_ENABLED(CONFIG_ARM64_DEBUG_PRIORITY_MASKING) &&
+ system_uses_irq_prio_masking();
+}
+
+static inline bool system_supports_bti(void)
+{
+ return cpus_have_final_cap(ARM64_BTI);
+}
+
+static inline bool system_supports_bti_kernel(void)
+{
+ return IS_ENABLED(CONFIG_ARM64_BTI_KERNEL) &&
+ cpus_have_final_boot_cap(ARM64_BTI);
+}
+
+static inline bool system_supports_tlb_range(void)
+{
+ return alternative_has_cap_unlikely(ARM64_HAS_TLB_RANGE);
+}
+
+static inline bool system_supports_lpa2(void)
+{
+ return cpus_have_final_cap(ARM64_HAS_LPA2);
+}
+
+static inline bool system_supports_poe(void)
+{
+ return alternative_has_cap_unlikely(ARM64_HAS_S1POE);
+}
+
+static inline bool system_supports_gcs(void)
+{
+ return alternative_has_cap_unlikely(ARM64_HAS_GCS);
+}
+
+static inline bool system_supports_haft(void)
+{
+ return cpus_have_final_cap(ARM64_HAFT);
+}
+
+static __always_inline bool system_supports_mpam(void)
+{
+ return alternative_has_cap_unlikely(ARM64_MPAM);
+}
+
+static __always_inline bool system_supports_mpam_hcr(void)
+{
+ return alternative_has_cap_unlikely(ARM64_MPAM_HCR);
+}
+
+static inline bool system_supports_pmuv3(void)
+{
+ return cpus_have_final_cap(ARM64_HAS_PMUV3);
+}
+
+bool cpu_supports_bbml2_noabort(void);
+
+static inline bool system_supports_bbml2_noabort(void)
+{
+ return alternative_has_cap_unlikely(ARM64_HAS_BBML2_NOABORT);
+}
+
+int do_emulate_mrs(struct pt_regs *regs, u32 sys_reg, u32 rt);
+bool try_emulate_mrs(struct pt_regs *regs, u32 isn);
+
+static inline u32 id_aa64mmfr0_parange_to_phys_shift(int parange)
+{
+ switch (parange) {
+ case ID_AA64MMFR0_EL1_PARANGE_32: return 32;
+ case ID_AA64MMFR0_EL1_PARANGE_36: return 36;
+ case ID_AA64MMFR0_EL1_PARANGE_40: return 40;
+ case ID_AA64MMFR0_EL1_PARANGE_42: return 42;
+ case ID_AA64MMFR0_EL1_PARANGE_44: return 44;
+ case ID_AA64MMFR0_EL1_PARANGE_48: return 48;
+ case ID_AA64MMFR0_EL1_PARANGE_52: return 52;
+ /*
+ * A future PE could use a value unknown to the kernel.
+ * However, by the "D10.1.4 Principles of the ID scheme
+ * for fields in ID registers", ARM DDI 0487C.a, any new
+ * value is guaranteed to be higher than what we know already.
+ * As a safe limit, we return the limit supported by the kernel.
+ */
+ default: return CONFIG_ARM64_PA_BITS;
+ }
+}
+
+/* Check whether hardware update of the Access flag is supported */
+static inline bool cpu_has_hw_af(void)
+{
+ u64 mmfr1;
+
+ if (!IS_ENABLED(CONFIG_ARM64_HW_AFDBM))
+ return false;
+
+ /*
+ * Use cached version to avoid emulated msr operation on KVM
+ * guests.
+ */
+ mmfr1 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1);
+ return cpuid_feature_extract_unsigned_field(mmfr1,
+ ID_AA64MMFR1_EL1_HAFDBS_SHIFT);
+}
+
+static inline bool cpu_has_pan(void)
+{
+ u64 mmfr1 = read_cpuid(ID_AA64MMFR1_EL1);
+ return cpuid_feature_extract_unsigned_field(mmfr1,
+ ID_AA64MMFR1_EL1_PAN_SHIFT);
+}
+
+#ifdef CONFIG_ARM64_AMU_EXTN
+/* Check whether the cpu supports the Activity Monitors Unit (AMU) */
+extern bool cpu_has_amu_feat(int cpu);
+#else
+static inline bool cpu_has_amu_feat(int cpu)
+{
+ return false;
+}
+#endif
+
+/* Get a cpu that supports the Activity Monitors Unit (AMU) */
+extern int get_cpu_with_amu_feat(void);
+
+static inline unsigned int get_vmid_bits(u64 mmfr1)
+{
+ int vmid_bits;
+
+ vmid_bits = cpuid_feature_extract_unsigned_field(mmfr1,
+ ID_AA64MMFR1_EL1_VMIDBits_SHIFT);
+ if (vmid_bits == ID_AA64MMFR1_EL1_VMIDBits_16)
+ return 16;
+
+ /*
+ * Return the default here even if any reserved
+ * value is fetched from the system register.
+ */
+ return 8;
+}
+
+s64 arm64_ftr_safe_value(const struct arm64_ftr_bits *ftrp, s64 new, s64 cur);
+struct arm64_ftr_reg *get_arm64_ftr_reg(u32 sys_id);
+
+extern struct arm64_ftr_override id_aa64mmfr0_override;
+extern struct arm64_ftr_override id_aa64mmfr1_override;
+extern struct arm64_ftr_override id_aa64mmfr2_override;
+extern struct arm64_ftr_override id_aa64pfr0_override;
+extern struct arm64_ftr_override id_aa64pfr1_override;
+extern struct arm64_ftr_override id_aa64zfr0_override;
+extern struct arm64_ftr_override id_aa64smfr0_override;
+extern struct arm64_ftr_override id_aa64isar1_override;
+extern struct arm64_ftr_override id_aa64isar2_override;
+
+extern struct arm64_ftr_override arm64_sw_feature_override;
+
+static inline
+u64 arm64_apply_feature_override(u64 val, int feat, int width,
+ const struct arm64_ftr_override *override)
+{
+ u64 oval = override->val;
+
+ /*
+ * When it encounters an invalid override (e.g., an override that
+ * cannot be honoured due to a missing CPU feature), the early idreg
+ * override code will set the mask to 0x0 and the value to non-zero for
+ * the field in question. In order to determine whether the override is
+ * valid or not for the field we are interested in, we first need to
+ * disregard bits belonging to other fields.
+ */
+ oval &= GENMASK_ULL(feat + width - 1, feat);
+
+ /*
+ * The override is valid if all value bits are accounted for in the
+ * mask. If so, replace the masked bits with the override value.
+ */
+ if (oval == (oval & override->mask)) {
+ val &= ~override->mask;
+ val |= oval;
+ }
+
+ /* Extract the field from the updated value */
+ return cpuid_feature_extract_unsigned_field(val, feat);
+}
+
+static inline bool arm64_test_sw_feature_override(int feat)
+{
+ /*
+ * Software features are pseudo CPU features that have no underlying
+ * CPUID system register value to apply the override to.
+ */
+ return arm64_apply_feature_override(0, feat, 4,
+ &arm64_sw_feature_override);
+}
+
+static inline bool kaslr_disabled_cmdline(void)
+{
+ return arm64_test_sw_feature_override(ARM64_SW_FEATURE_OVERRIDE_NOKASLR);
+}
+
+u32 get_kvm_ipa_limit(void);
+void dump_cpu_features(void);
+
+static inline bool cpu_has_bti(void)
+{
+ if (!IS_ENABLED(CONFIG_ARM64_BTI))
+ return false;
+
+ return arm64_apply_feature_override(read_cpuid(ID_AA64PFR1_EL1),
+ ID_AA64PFR1_EL1_BT_SHIFT, 4,
+ &id_aa64pfr1_override);
+}
+
+static inline bool cpu_has_pac(void)
+{
+ u64 isar1, isar2;
+
+ if (!IS_ENABLED(CONFIG_ARM64_PTR_AUTH))
+ return false;
+
+ isar1 = read_cpuid(ID_AA64ISAR1_EL1);
+ isar2 = read_cpuid(ID_AA64ISAR2_EL1);
+
+ if (arm64_apply_feature_override(isar1, ID_AA64ISAR1_EL1_APA_SHIFT, 4,
+ &id_aa64isar1_override))
+ return true;
+
+ if (arm64_apply_feature_override(isar1, ID_AA64ISAR1_EL1_API_SHIFT, 4,
+ &id_aa64isar1_override))
+ return true;
+
+ return arm64_apply_feature_override(isar2, ID_AA64ISAR2_EL1_APA3_SHIFT, 4,
+ &id_aa64isar2_override);
+}
+
+static inline bool cpu_has_lva(void)
+{
+ u64 mmfr2;
+
+ mmfr2 = read_sysreg_s(SYS_ID_AA64MMFR2_EL1);
+ mmfr2 &= ~id_aa64mmfr2_override.mask;
+ mmfr2 |= id_aa64mmfr2_override.val;
+ return cpuid_feature_extract_unsigned_field(mmfr2,
+ ID_AA64MMFR2_EL1_VARange_SHIFT);
+}
+
+static inline bool cpu_has_lpa2(void)
+{
+#ifdef CONFIG_ARM64_LPA2
+ u64 mmfr0;
+ int feat;
+
+ mmfr0 = read_sysreg(id_aa64mmfr0_el1);
+ mmfr0 &= ~id_aa64mmfr0_override.mask;
+ mmfr0 |= id_aa64mmfr0_override.val;
+ feat = cpuid_feature_extract_signed_field(mmfr0,
+ ID_AA64MMFR0_EL1_TGRAN_SHIFT);
+
+ return feat >= ID_AA64MMFR0_EL1_TGRAN_LPA2;
+#else
+ return false;
+#endif
+}
+
+#endif /* __ASSEMBLER__ */
+
+#endif
diff --git a/arch/arm64/include/asm/cpuidle.h b/arch/arm64/include/asm/cpuidle.h
new file mode 100644
index 000000000000..2047713e097d
--- /dev/null
+++ b/arch/arm64/include/asm/cpuidle.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_CPUIDLE_H
+#define __ASM_CPUIDLE_H
+
+#include <asm/proc-fns.h>
+
+#ifdef CONFIG_ARM64_PSEUDO_NMI
+#include <asm/arch_gicv3.h>
+
+struct arm_cpuidle_irq_context {
+ unsigned long pmr;
+ unsigned long daif_bits;
+};
+
+#define arm_cpuidle_save_irq_context(__c) \
+ do { \
+ struct arm_cpuidle_irq_context *c = __c; \
+ if (system_uses_irq_prio_masking()) { \
+ c->daif_bits = read_sysreg(daif); \
+ write_sysreg(c->daif_bits | PSR_I_BIT | PSR_F_BIT, \
+ daif); \
+ c->pmr = gic_read_pmr(); \
+ gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET); \
+ } \
+ } while (0)
+
+#define arm_cpuidle_restore_irq_context(__c) \
+ do { \
+ struct arm_cpuidle_irq_context *c = __c; \
+ if (system_uses_irq_prio_masking()) { \
+ gic_write_pmr(c->pmr); \
+ write_sysreg(c->daif_bits, daif); \
+ } \
+ } while (0)
+#else
+struct arm_cpuidle_irq_context { };
+
+#define arm_cpuidle_save_irq_context(c) (void)c
+#define arm_cpuidle_restore_irq_context(c) (void)c
+#endif
+#endif
diff --git a/arch/arm64/include/asm/cputable.h b/arch/arm64/include/asm/cputable.h
deleted file mode 100644
index e3bd983d3661..000000000000
--- a/arch/arm64/include/asm/cputable.h
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- * arch/arm64/include/asm/cputable.h
- *
- * Copyright (C) 2012 ARM Ltd.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-#ifndef __ASM_CPUTABLE_H
-#define __ASM_CPUTABLE_H
-
-struct cpu_info {
- unsigned int cpu_id_val;
- unsigned int cpu_id_mask;
- const char *cpu_name;
- unsigned long (*cpu_setup)(void);
-};
-
-extern struct cpu_info *lookup_processor_type(unsigned int);
-
-#endif
diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
index 5fe138e0b828..08860d482e60 100644
--- a/arch/arm64/include/asm/cputype.h
+++ b/arch/arm64/include/asm/cputype.h
@@ -1,51 +1,259 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (C) 2012 ARM Ltd.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __ASM_CPUTYPE_H
#define __ASM_CPUTYPE_H
-#define ID_MIDR_EL1 "midr_el1"
-#define ID_MPIDR_EL1 "mpidr_el1"
-#define ID_CTR_EL0 "ctr_el0"
+#define INVALID_HWID ULONG_MAX
-#define ID_AA64PFR0_EL1 "id_aa64pfr0_el1"
-#define ID_AA64DFR0_EL1 "id_aa64dfr0_el1"
-#define ID_AA64AFR0_EL1 "id_aa64afr0_el1"
-#define ID_AA64ISAR0_EL1 "id_aa64isar0_el1"
-#define ID_AA64MMFR0_EL1 "id_aa64mmfr0_el1"
+#define MPIDR_UP_BITMASK (0x1 << 30)
+#define MPIDR_MT_BITMASK (0x1 << 24)
+#define MPIDR_HWID_BITMASK UL(0xff00ffffff)
-#define INVALID_HWID ULONG_MAX
+#define MPIDR_LEVEL_BITS_SHIFT 3
+#define MPIDR_LEVEL_BITS (1 << MPIDR_LEVEL_BITS_SHIFT)
+#define MPIDR_LEVEL_MASK ((1 << MPIDR_LEVEL_BITS) - 1)
+
+#define MPIDR_LEVEL_SHIFT(level) \
+ (((1 << level) >> 1) << MPIDR_LEVEL_BITS_SHIFT)
+
+#define MPIDR_AFFINITY_LEVEL(mpidr, level) \
+ ((mpidr >> MPIDR_LEVEL_SHIFT(level)) & MPIDR_LEVEL_MASK)
+
+#define MIDR_REVISION_MASK 0xf
+#define MIDR_REVISION(midr) ((midr) & MIDR_REVISION_MASK)
+#define MIDR_PARTNUM_SHIFT 4
+#define MIDR_PARTNUM_MASK (0xfff << MIDR_PARTNUM_SHIFT)
+#define MIDR_PARTNUM(midr) \
+ (((midr) & MIDR_PARTNUM_MASK) >> MIDR_PARTNUM_SHIFT)
+#define MIDR_ARCHITECTURE_SHIFT 16
+#define MIDR_ARCHITECTURE_MASK (0xf << MIDR_ARCHITECTURE_SHIFT)
+#define MIDR_ARCHITECTURE(midr) \
+ (((midr) & MIDR_ARCHITECTURE_MASK) >> MIDR_ARCHITECTURE_SHIFT)
+#define MIDR_VARIANT_SHIFT 20
+#define MIDR_VARIANT_MASK (0xf << MIDR_VARIANT_SHIFT)
+#define MIDR_VARIANT(midr) \
+ (((midr) & MIDR_VARIANT_MASK) >> MIDR_VARIANT_SHIFT)
+#define MIDR_IMPLEMENTOR_SHIFT 24
+#define MIDR_IMPLEMENTOR_MASK (0xffU << MIDR_IMPLEMENTOR_SHIFT)
+#define MIDR_IMPLEMENTOR(midr) \
+ (((midr) & MIDR_IMPLEMENTOR_MASK) >> MIDR_IMPLEMENTOR_SHIFT)
+
+#define MIDR_CPU_MODEL(imp, partnum) \
+ ((_AT(u32, imp) << MIDR_IMPLEMENTOR_SHIFT) | \
+ (0xf << MIDR_ARCHITECTURE_SHIFT) | \
+ ((partnum) << MIDR_PARTNUM_SHIFT))
+
+#define MIDR_CPU_VAR_REV(var, rev) \
+ (((var) << MIDR_VARIANT_SHIFT) | (rev))
-#define MPIDR_HWID_BITMASK 0xff00ffffff
+#define MIDR_CPU_MODEL_MASK (MIDR_IMPLEMENTOR_MASK | MIDR_PARTNUM_MASK | \
+ MIDR_ARCHITECTURE_MASK)
-#define read_cpuid(reg) ({ \
- u64 __val; \
- asm("mrs %0, " reg : "=r" (__val)); \
- __val; \
-})
+#define ARM_CPU_IMP_ARM 0x41
+#define ARM_CPU_IMP_APM 0x50
+#define ARM_CPU_IMP_CAVIUM 0x43
+#define ARM_CPU_IMP_BRCM 0x42
+#define ARM_CPU_IMP_QCOM 0x51
+#define ARM_CPU_IMP_NVIDIA 0x4E
+#define ARM_CPU_IMP_FUJITSU 0x46
+#define ARM_CPU_IMP_HISI 0x48
+#define ARM_CPU_IMP_APPLE 0x61
+#define ARM_CPU_IMP_AMPERE 0xC0
+#define ARM_CPU_IMP_MICROSOFT 0x6D
-#define ARM_CPU_IMP_ARM 0x41
-#define ARM_CPU_IMP_APM 0x50
+#define ARM_CPU_PART_AEM_V8 0xD0F
+#define ARM_CPU_PART_FOUNDATION 0xD00
+#define ARM_CPU_PART_CORTEX_A57 0xD07
+#define ARM_CPU_PART_CORTEX_A72 0xD08
+#define ARM_CPU_PART_CORTEX_A53 0xD03
+#define ARM_CPU_PART_CORTEX_A73 0xD09
+#define ARM_CPU_PART_CORTEX_A75 0xD0A
+#define ARM_CPU_PART_CORTEX_A35 0xD04
+#define ARM_CPU_PART_CORTEX_A55 0xD05
+#define ARM_CPU_PART_CORTEX_A76 0xD0B
+#define ARM_CPU_PART_NEOVERSE_N1 0xD0C
+#define ARM_CPU_PART_CORTEX_A77 0xD0D
+#define ARM_CPU_PART_CORTEX_A76AE 0xD0E
+#define ARM_CPU_PART_NEOVERSE_V1 0xD40
+#define ARM_CPU_PART_CORTEX_A78 0xD41
+#define ARM_CPU_PART_CORTEX_A78AE 0xD42
+#define ARM_CPU_PART_CORTEX_X1 0xD44
+#define ARM_CPU_PART_CORTEX_A510 0xD46
+#define ARM_CPU_PART_CORTEX_A520 0xD80
+#define ARM_CPU_PART_CORTEX_A710 0xD47
+#define ARM_CPU_PART_CORTEX_A715 0xD4D
+#define ARM_CPU_PART_CORTEX_X2 0xD48
+#define ARM_CPU_PART_NEOVERSE_N2 0xD49
+#define ARM_CPU_PART_CORTEX_A78C 0xD4B
+#define ARM_CPU_PART_CORTEX_X1C 0xD4C
+#define ARM_CPU_PART_CORTEX_X3 0xD4E
+#define ARM_CPU_PART_NEOVERSE_V2 0xD4F
+#define ARM_CPU_PART_CORTEX_A720 0xD81
+#define ARM_CPU_PART_CORTEX_X4 0xD82
+#define ARM_CPU_PART_NEOVERSE_V3AE 0xD83
+#define ARM_CPU_PART_NEOVERSE_V3 0xD84
+#define ARM_CPU_PART_CORTEX_X925 0xD85
+#define ARM_CPU_PART_CORTEX_A725 0xD87
+#define ARM_CPU_PART_CORTEX_A720AE 0xD89
+#define ARM_CPU_PART_NEOVERSE_N3 0xD8E
-#define ARM_CPU_PART_AEM_V8 0xD0F0
-#define ARM_CPU_PART_FOUNDATION 0xD000
-#define ARM_CPU_PART_CORTEX_A57 0xD070
+#define APM_CPU_PART_XGENE 0x000
+#define APM_CPU_VAR_POTENZA 0x00
-#define APM_CPU_PART_POTENZA 0x0000
+#define CAVIUM_CPU_PART_THUNDERX 0x0A1
+#define CAVIUM_CPU_PART_THUNDERX_81XX 0x0A2
+#define CAVIUM_CPU_PART_THUNDERX_83XX 0x0A3
+#define CAVIUM_CPU_PART_THUNDERX2 0x0AF
+/* OcteonTx2 series */
+#define CAVIUM_CPU_PART_OCTX2_98XX 0x0B1
+#define CAVIUM_CPU_PART_OCTX2_96XX 0x0B2
+#define CAVIUM_CPU_PART_OCTX2_95XX 0x0B3
+#define CAVIUM_CPU_PART_OCTX2_95XXN 0x0B4
+#define CAVIUM_CPU_PART_OCTX2_95XXMM 0x0B5
+#define CAVIUM_CPU_PART_OCTX2_95XXO 0x0B6
-#ifndef __ASSEMBLY__
+#define BRCM_CPU_PART_BRAHMA_B53 0x100
+#define BRCM_CPU_PART_VULCAN 0x516
+
+#define QCOM_CPU_PART_FALKOR_V1 0x800
+#define QCOM_CPU_PART_FALKOR 0xC00
+#define QCOM_CPU_PART_KRYO 0x200
+#define QCOM_CPU_PART_KRYO_2XX_GOLD 0x800
+#define QCOM_CPU_PART_KRYO_2XX_SILVER 0x801
+#define QCOM_CPU_PART_KRYO_3XX_GOLD 0x802
+#define QCOM_CPU_PART_KRYO_3XX_SILVER 0x803
+#define QCOM_CPU_PART_KRYO_4XX_GOLD 0x804
+#define QCOM_CPU_PART_KRYO_4XX_SILVER 0x805
+#define QCOM_CPU_PART_ORYON_X1 0x001
+
+#define NVIDIA_CPU_PART_DENVER 0x003
+#define NVIDIA_CPU_PART_CARMEL 0x004
+#define NVIDIA_CPU_PART_OLYMPUS 0x010
+
+#define FUJITSU_CPU_PART_A64FX 0x001
+
+#define HISI_CPU_PART_TSV110 0xD01
+#define HISI_CPU_PART_HIP09 0xD02
+#define HISI_CPU_PART_HIP12 0xD06
+
+#define APPLE_CPU_PART_M1_ICESTORM 0x022
+#define APPLE_CPU_PART_M1_FIRESTORM 0x023
+#define APPLE_CPU_PART_M1_ICESTORM_PRO 0x024
+#define APPLE_CPU_PART_M1_FIRESTORM_PRO 0x025
+#define APPLE_CPU_PART_M1_ICESTORM_MAX 0x028
+#define APPLE_CPU_PART_M1_FIRESTORM_MAX 0x029
+#define APPLE_CPU_PART_M2_BLIZZARD 0x032
+#define APPLE_CPU_PART_M2_AVALANCHE 0x033
+#define APPLE_CPU_PART_M2_BLIZZARD_PRO 0x034
+#define APPLE_CPU_PART_M2_AVALANCHE_PRO 0x035
+#define APPLE_CPU_PART_M2_BLIZZARD_MAX 0x038
+#define APPLE_CPU_PART_M2_AVALANCHE_MAX 0x039
+
+#define AMPERE_CPU_PART_AMPERE1 0xAC3
+#define AMPERE_CPU_PART_AMPERE1A 0xAC4
+
+#define MICROSOFT_CPU_PART_AZURE_COBALT_100 0xD49 /* Based on r0p0 of ARM Neoverse N2 */
+
+#define MIDR_CORTEX_A53 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53)
+#define MIDR_CORTEX_A57 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57)
+#define MIDR_CORTEX_A72 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A72)
+#define MIDR_CORTEX_A73 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A73)
+#define MIDR_CORTEX_A75 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A75)
+#define MIDR_CORTEX_A35 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A35)
+#define MIDR_CORTEX_A55 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A55)
+#define MIDR_CORTEX_A76 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A76)
+#define MIDR_NEOVERSE_N1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_N1)
+#define MIDR_CORTEX_A77 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A77)
+#define MIDR_CORTEX_A76AE MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A76AE)
+#define MIDR_NEOVERSE_V1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_V1)
+#define MIDR_CORTEX_A78 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78)
+#define MIDR_CORTEX_A78AE MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78AE)
+#define MIDR_CORTEX_X1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X1)
+#define MIDR_CORTEX_A510 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A510)
+#define MIDR_CORTEX_A520 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A520)
+#define MIDR_CORTEX_A710 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A710)
+#define MIDR_CORTEX_A715 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A715)
+#define MIDR_CORTEX_X2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X2)
+#define MIDR_NEOVERSE_N2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_N2)
+#define MIDR_CORTEX_A78C MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78C)
+#define MIDR_CORTEX_X1C MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X1C)
+#define MIDR_CORTEX_X3 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X3)
+#define MIDR_NEOVERSE_V2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_V2)
+#define MIDR_CORTEX_A720 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A720)
+#define MIDR_CORTEX_X4 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X4)
+#define MIDR_NEOVERSE_V3AE MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_V3AE)
+#define MIDR_NEOVERSE_V3 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_V3)
+#define MIDR_CORTEX_X925 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X925)
+#define MIDR_CORTEX_A725 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A725)
+#define MIDR_CORTEX_A720AE MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A720AE)
+#define MIDR_NEOVERSE_N3 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_N3)
+#define MIDR_THUNDERX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX)
+#define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX)
+#define MIDR_THUNDERX_83XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_83XX)
+#define MIDR_OCTX2_98XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_OCTX2_98XX)
+#define MIDR_OCTX2_96XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_OCTX2_96XX)
+#define MIDR_OCTX2_95XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_OCTX2_95XX)
+#define MIDR_OCTX2_95XXN MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_OCTX2_95XXN)
+#define MIDR_OCTX2_95XXMM MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_OCTX2_95XXMM)
+#define MIDR_OCTX2_95XXO MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_OCTX2_95XXO)
+#define MIDR_CAVIUM_THUNDERX2 MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX2)
+#define MIDR_BRAHMA_B53 MIDR_CPU_MODEL(ARM_CPU_IMP_BRCM, BRCM_CPU_PART_BRAHMA_B53)
+#define MIDR_BRCM_VULCAN MIDR_CPU_MODEL(ARM_CPU_IMP_BRCM, BRCM_CPU_PART_VULCAN)
+#define MIDR_QCOM_FALKOR_V1 MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_FALKOR_V1)
+#define MIDR_QCOM_FALKOR MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_FALKOR)
+#define MIDR_QCOM_KRYO MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO)
+#define MIDR_QCOM_KRYO_2XX_GOLD MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_2XX_GOLD)
+#define MIDR_QCOM_KRYO_2XX_SILVER MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_2XX_SILVER)
+#define MIDR_QCOM_KRYO_3XX_GOLD MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_3XX_GOLD)
+#define MIDR_QCOM_KRYO_3XX_SILVER MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_3XX_SILVER)
+#define MIDR_QCOM_KRYO_4XX_GOLD MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_4XX_GOLD)
+#define MIDR_QCOM_KRYO_4XX_SILVER MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_4XX_SILVER)
+#define MIDR_QCOM_ORYON_X1 MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_ORYON_X1)
+
+/*
+ * NOTES:
+ * - Qualcomm Kryo 5XX Prime / Gold ID themselves as MIDR_CORTEX_A77
+ * - Qualcomm Kryo 5XX Silver IDs itself as MIDR_QCOM_KRYO_4XX_SILVER
+ * - Qualcomm Kryo 6XX Prime IDs itself as MIDR_CORTEX_X1
+ * - Qualcomm Kryo 6XX Gold IDs itself as ARM_CPU_PART_CORTEX_A78
+ * - Qualcomm Kryo 6XX Silver IDs itself as MIDR_CORTEX_A55
+ */
+
+#define MIDR_NVIDIA_DENVER MIDR_CPU_MODEL(ARM_CPU_IMP_NVIDIA, NVIDIA_CPU_PART_DENVER)
+#define MIDR_NVIDIA_CARMEL MIDR_CPU_MODEL(ARM_CPU_IMP_NVIDIA, NVIDIA_CPU_PART_CARMEL)
+#define MIDR_NVIDIA_OLYMPUS MIDR_CPU_MODEL(ARM_CPU_IMP_NVIDIA, NVIDIA_CPU_PART_OLYMPUS)
+#define MIDR_FUJITSU_A64FX MIDR_CPU_MODEL(ARM_CPU_IMP_FUJITSU, FUJITSU_CPU_PART_A64FX)
+#define MIDR_HISI_TSV110 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_TSV110)
+#define MIDR_HISI_HIP09 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_HIP09)
+#define MIDR_HISI_HIP12 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_HIP12)
+#define MIDR_APPLE_M1_ICESTORM MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM)
+#define MIDR_APPLE_M1_FIRESTORM MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM)
+#define MIDR_APPLE_M1_ICESTORM_PRO MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM_PRO)
+#define MIDR_APPLE_M1_FIRESTORM_PRO MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM_PRO)
+#define MIDR_APPLE_M1_ICESTORM_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM_MAX)
+#define MIDR_APPLE_M1_FIRESTORM_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM_MAX)
+#define MIDR_APPLE_M2_BLIZZARD MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_BLIZZARD)
+#define MIDR_APPLE_M2_AVALANCHE MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_AVALANCHE)
+#define MIDR_APPLE_M2_BLIZZARD_PRO MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_BLIZZARD_PRO)
+#define MIDR_APPLE_M2_AVALANCHE_PRO MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_AVALANCHE_PRO)
+#define MIDR_APPLE_M2_BLIZZARD_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_BLIZZARD_MAX)
+#define MIDR_APPLE_M2_AVALANCHE_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_AVALANCHE_MAX)
+#define MIDR_AMPERE1 MIDR_CPU_MODEL(ARM_CPU_IMP_AMPERE, AMPERE_CPU_PART_AMPERE1)
+#define MIDR_AMPERE1A MIDR_CPU_MODEL(ARM_CPU_IMP_AMPERE, AMPERE_CPU_PART_AMPERE1A)
+#define MIDR_MICROSOFT_AZURE_COBALT_100 MIDR_CPU_MODEL(ARM_CPU_IMP_MICROSOFT, MICROSOFT_CPU_PART_AZURE_COBALT_100)
+
+/* Fujitsu Erratum 010001 affects A64FX 1.0 and 1.1, (v0r0 and v1r0) */
+#define MIDR_FUJITSU_ERRATUM_010001 MIDR_FUJITSU_A64FX
+#define MIDR_FUJITSU_ERRATUM_010001_MASK (~MIDR_CPU_VAR_REV(1, 0))
+#define TCR_CLEAR_FUJITSU_ERRATUM_010001 (TCR_EL1_NFD1 | TCR_EL1_NFD0)
+
+#ifndef __ASSEMBLER__
+
+#include <asm/sysreg.h>
+
+#define read_cpuid(reg) read_sysreg_s(SYS_ ## reg)
/*
* The CPU ID never changes at run time, so we might as well tell the
@@ -54,29 +262,72 @@
*/
static inline u32 __attribute_const__ read_cpuid_id(void)
{
- return read_cpuid(ID_MIDR_EL1);
+ return read_cpuid(MIDR_EL1);
}
+/*
+ * Represent a range of MIDR values for a given CPU model and a
+ * range of variant/revision values.
+ *
+ * @model - CPU model as defined by MIDR_CPU_MODEL
+ * @rv_min - Minimum value for the revision/variant as defined by
+ * MIDR_CPU_VAR_REV
+ * @rv_max - Maximum value for the variant/revision for the range.
+ */
+struct midr_range {
+ u32 model;
+ u32 rv_min;
+ u32 rv_max;
+};
+
+#define MIDR_RANGE(m, v_min, r_min, v_max, r_max) \
+ { \
+ .model = m, \
+ .rv_min = MIDR_CPU_VAR_REV(v_min, r_min), \
+ .rv_max = MIDR_CPU_VAR_REV(v_max, r_max), \
+ }
+
+#define MIDR_REV_RANGE(m, v, r_min, r_max) MIDR_RANGE(m, v, r_min, v, r_max)
+#define MIDR_REV(m, v, r) MIDR_RANGE(m, v, r, v, r)
+#define MIDR_ALL_VERSIONS(m) MIDR_RANGE(m, 0, 0, 0xf, 0xf)
+
+static inline bool midr_is_cpu_model_range(u32 midr, u32 model, u32 rv_min,
+ u32 rv_max)
+{
+ u32 _model = midr & MIDR_CPU_MODEL_MASK;
+ u32 rv = midr & (MIDR_REVISION_MASK | MIDR_VARIANT_MASK);
+
+ return _model == model && rv >= rv_min && rv <= rv_max;
+}
+
+struct target_impl_cpu {
+ u64 midr;
+ u64 revidr;
+ u64 aidr;
+};
+
+bool cpu_errata_set_target_impl(u64 num, void *impl_cpus);
+bool is_midr_in_range_list(struct midr_range const *ranges);
+
static inline u64 __attribute_const__ read_cpuid_mpidr(void)
{
- return read_cpuid(ID_MPIDR_EL1);
+ return read_cpuid(MPIDR_EL1);
}
static inline unsigned int __attribute_const__ read_cpuid_implementor(void)
{
- return (read_cpuid_id() & 0xFF000000) >> 24;
+ return MIDR_IMPLEMENTOR(read_cpuid_id());
}
static inline unsigned int __attribute_const__ read_cpuid_part_number(void)
{
- return (read_cpuid_id() & 0xFFF0);
+ return MIDR_PARTNUM(read_cpuid_id());
}
static inline u32 __attribute_const__ read_cpuid_cachetype(void)
{
- return read_cpuid(ID_CTR_EL0);
+ return read_cpuid(CTR_EL0);
}
-
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif
diff --git a/arch/arm64/include/asm/crash_reserve.h b/arch/arm64/include/asm/crash_reserve.h
new file mode 100644
index 000000000000..4afe027a4e7b
--- /dev/null
+++ b/arch/arm64/include/asm/crash_reserve.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef _ARM64_CRASH_RESERVE_H
+#define _ARM64_CRASH_RESERVE_H
+
+/* Current arm64 boot protocol requires 2MB alignment */
+#define CRASH_ALIGN SZ_2M
+
+#define CRASH_ADDR_LOW_MAX arm64_dma_phys_limit
+#define CRASH_ADDR_HIGH_MAX (PHYS_MASK + 1)
+#endif
diff --git a/arch/arm64/include/asm/current.h b/arch/arm64/include/asm/current.h
new file mode 100644
index 000000000000..c92912eaf186
--- /dev/null
+++ b/arch/arm64/include/asm/current.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_CURRENT_H
+#define __ASM_CURRENT_H
+
+#include <linux/compiler.h>
+
+#ifndef __ASSEMBLER__
+
+struct task_struct;
+
+/*
+ * We don't use read_sysreg() as we want the compiler to cache the value where
+ * possible.
+ */
+static __always_inline struct task_struct *get_current(void)
+{
+ unsigned long sp_el0;
+
+ asm ("mrs %0, sp_el0" : "=r" (sp_el0));
+
+ return (struct task_struct *)sp_el0;
+}
+
+#define current get_current()
+
+#endif /* __ASSEMBLER__ */
+
+#endif /* __ASM_CURRENT_H */
+
diff --git a/arch/arm64/include/asm/daifflags.h b/arch/arm64/include/asm/daifflags.h
new file mode 100644
index 000000000000..5fca48009043
--- /dev/null
+++ b/arch/arm64/include/asm/daifflags.h
@@ -0,0 +1,144 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2017 ARM Ltd.
+ */
+#ifndef __ASM_DAIFFLAGS_H
+#define __ASM_DAIFFLAGS_H
+
+#include <linux/irqflags.h>
+
+#include <asm/arch_gicv3.h>
+#include <asm/barrier.h>
+#include <asm/cpufeature.h>
+#include <asm/ptrace.h>
+
+#define DAIF_PROCCTX 0
+#define DAIF_PROCCTX_NOIRQ (PSR_I_BIT | PSR_F_BIT)
+#define DAIF_ERRCTX (PSR_A_BIT | PSR_I_BIT | PSR_F_BIT)
+#define DAIF_MASK (PSR_D_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT)
+
+
+/* mask/save/unmask/restore all exceptions, including interrupts. */
+static inline void local_daif_mask(void)
+{
+ WARN_ON(system_has_prio_mask_debugging() &&
+ (read_sysreg_s(SYS_ICC_PMR_EL1) == (GIC_PRIO_IRQOFF |
+ GIC_PRIO_PSR_I_SET)));
+
+ asm volatile(
+ "msr daifset, #0xf // local_daif_mask\n"
+ :
+ :
+ : "memory");
+
+ /* Don't really care for a dsb here, we don't intend to enable IRQs */
+ if (system_uses_irq_prio_masking())
+ gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET);
+
+ trace_hardirqs_off();
+}
+
+static inline unsigned long local_daif_save_flags(void)
+{
+ unsigned long flags;
+
+ flags = read_sysreg(daif);
+
+ if (system_uses_irq_prio_masking()) {
+ /* If IRQs are masked with PMR, reflect it in the flags */
+ if (read_sysreg_s(SYS_ICC_PMR_EL1) != GIC_PRIO_IRQON)
+ flags |= PSR_I_BIT | PSR_F_BIT;
+ }
+
+ return flags;
+}
+
+static inline unsigned long local_daif_save(void)
+{
+ unsigned long flags;
+
+ flags = local_daif_save_flags();
+
+ local_daif_mask();
+
+ return flags;
+}
+
+static inline void local_daif_restore(unsigned long flags)
+{
+ bool irq_disabled = flags & PSR_I_BIT;
+
+ WARN_ON(system_has_prio_mask_debugging() &&
+ (read_sysreg(daif) & (PSR_I_BIT | PSR_F_BIT)) != (PSR_I_BIT | PSR_F_BIT));
+
+ if (!irq_disabled) {
+ trace_hardirqs_on();
+
+ if (system_uses_irq_prio_masking()) {
+ gic_write_pmr(GIC_PRIO_IRQON);
+ pmr_sync();
+ }
+ } else if (system_uses_irq_prio_masking()) {
+ u64 pmr;
+
+ if (!(flags & PSR_A_BIT)) {
+ /*
+ * If interrupts are disabled but we can take
+ * asynchronous errors, we can take NMIs
+ */
+ flags &= ~(PSR_I_BIT | PSR_F_BIT);
+ pmr = GIC_PRIO_IRQOFF;
+ } else {
+ pmr = GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET;
+ }
+
+ /*
+ * There has been concern that the write to daif
+ * might be reordered before this write to PMR.
+ * From the ARM ARM DDI 0487D.a, section D1.7.1
+ * "Accessing PSTATE fields":
+ * Writes to the PSTATE fields have side-effects on
+ * various aspects of the PE operation. All of these
+ * side-effects are guaranteed:
+ * - Not to be visible to earlier instructions in
+ * the execution stream.
+ * - To be visible to later instructions in the
+ * execution stream
+ *
+ * Also, writes to PMR are self-synchronizing, so no
+ * interrupts with a lower priority than PMR is signaled
+ * to the PE after the write.
+ *
+ * So we don't need additional synchronization here.
+ */
+ gic_write_pmr(pmr);
+ }
+
+ write_sysreg(flags, daif);
+
+ if (irq_disabled)
+ trace_hardirqs_off();
+}
+
+/*
+ * Called by synchronous exception handlers to restore the DAIF bits that were
+ * modified by taking an exception.
+ */
+static inline void local_daif_inherit(struct pt_regs *regs)
+{
+ unsigned long flags = regs->pstate & DAIF_MASK;
+
+ if (!regs_irqs_disabled(regs))
+ trace_hardirqs_on();
+
+ if (system_uses_irq_prio_masking())
+ gic_write_pmr(regs->pmr);
+
+ /*
+ * We can't use local_daif_restore(regs->pstate) here as
+ * system_has_prio_mask_debugging() won't restore the I bit if it can
+ * use the pmr instead.
+ */
+ write_sysreg(flags, daif);
+}
+#endif
diff --git a/arch/arm64/include/asm/dcc.h b/arch/arm64/include/asm/dcc.h
new file mode 100644
index 000000000000..ebd9fb4720c0
--- /dev/null
+++ b/arch/arm64/include/asm/dcc.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright (c) 2014-2015 The Linux Foundation. All rights reserved.
+ *
+ * A call to __dcc_getchar() or __dcc_putchar() is typically followed by
+ * a call to __dcc_getstatus(). We want to make sure that the CPU does
+ * not speculative read the DCC status before executing the read or write
+ * instruction. That's what the ISBs are for.
+ *
+ * The 'volatile' ensures that the compiler does not cache the status bits,
+ * and instead reads the DCC register every time.
+ */
+#ifndef __ASM_DCC_H
+#define __ASM_DCC_H
+
+#include <asm/barrier.h>
+#include <asm/sysreg.h>
+
+static inline u32 __dcc_getstatus(void)
+{
+ return read_sysreg(mdccsr_el0);
+}
+
+static inline char __dcc_getchar(void)
+{
+ char c = read_sysreg(dbgdtrrx_el0);
+ isb();
+
+ return c;
+}
+
+static inline void __dcc_putchar(char c)
+{
+ /*
+ * The typecast is to make absolutely certain that 'c' is
+ * zero-extended.
+ */
+ write_sysreg((unsigned char)c, dbgdtrtx_el0);
+ isb();
+}
+
+#endif
diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h
index ef8235c68c09..8d5f92418838 100644
--- a/arch/arm64/include/asm/debug-monitors.h
+++ b/arch/arm64/include/asm/debug-monitors.h
@@ -1,22 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (C) 2012 ARM Ltd.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __ASM_DEBUG_MONITORS_H
#define __ASM_DEBUG_MONITORS_H
-#ifdef __KERNEL__
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <asm/brk-imm.h>
+#include <asm/esr.h>
+#include <asm/insn.h>
+#include <asm/ptrace.h>
+
+/* Low-level stepping controls. */
+#define DBG_SPSR_SS (1 << 21)
#define DBG_ESR_EVT(x) (((x) >> 27) & 0x7)
@@ -26,10 +23,21 @@
#define DBG_ESR_EVT_HWWP 0x2
#define DBG_ESR_EVT_BRK 0x6
-enum debug_el {
- DBG_ACTIVE_EL0 = 0,
- DBG_ACTIVE_EL1,
-};
+/*
+ * Break point instruction encoding
+ */
+#define BREAK_INSTR_SIZE AARCH64_INSN_SIZE
+
+#define AARCH64_BREAK_KGDB_DYN_DBG \
+ (AARCH64_BREAK_MON | (KGDB_DYN_DBG_BRK_IMM << 5))
+
+#define CACHE_FLUSH_IS_SAFE 1
+
+/* kprobes BRK opcodes with ESR encoding */
+#define BRK64_OPCODE_KPROBES (AARCH64_BREAK_MON | (KPROBES_BRK_IMM << 5))
+#define BRK64_OPCODE_KPROBES_SS (AARCH64_BREAK_MON | (KPROBES_BRK_SS_IMM << 5))
+/* uprobes BRK opcodes with ESR encoding */
+#define BRK64_OPCODE_UPROBES (AARCH64_BREAK_MON | (UPROBES_BRK_IMM << 5))
/* AArch32 */
#define DBG_ESR_EVT_BKPT 0x4
@@ -40,58 +48,45 @@ enum debug_el {
#define AARCH32_BREAK_THUMB2_LO 0xf7f0
#define AARCH32_BREAK_THUMB2_HI 0xa000
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
struct task_struct;
-#define local_dbg_save(flags) \
- do { \
- typecheck(unsigned long, flags); \
- asm volatile( \
- "mrs %0, daif // local_dbg_save\n" \
- "msr daifset, #8" \
- : "=r" (flags) : : "memory"); \
- } while (0)
-
-#define local_dbg_restore(flags) \
- do { \
- typecheck(unsigned long, flags); \
- asm volatile( \
- "msr daif, %0 // local_dbg_restore\n" \
- : : "r" (flags) : "memory"); \
- } while (0)
-
#define DBG_ARCH_ID_RESERVED 0 /* In case of ptrace ABI updates. */
+#define DBG_HOOK_HANDLED 0
+#define DBG_HOOK_ERROR 1
+
u8 debug_monitors_arch(void);
-void enable_debug_monitors(enum debug_el el);
-void disable_debug_monitors(enum debug_el el);
+enum dbg_active_el {
+ DBG_ACTIVE_EL0 = 0,
+ DBG_ACTIVE_EL1,
+};
+
+void enable_debug_monitors(enum dbg_active_el el);
+void disable_debug_monitors(enum dbg_active_el el);
void user_rewind_single_step(struct task_struct *task);
void user_fastforward_single_step(struct task_struct *task);
+void user_regs_reset_single_step(struct user_pt_regs *regs,
+ struct task_struct *task);
void kernel_enable_single_step(struct pt_regs *regs);
void kernel_disable_single_step(void);
int kernel_active_single_step(void);
+void kernel_rewind_single_step(struct pt_regs *regs);
+void kernel_fastforward_single_step(struct pt_regs *regs);
#ifdef CONFIG_HAVE_HW_BREAKPOINT
-int reinstall_suspended_bps(struct pt_regs *regs);
+bool try_step_suspended_breakpoints(struct pt_regs *regs);
#else
-static inline int reinstall_suspended_bps(struct pt_regs *regs)
+static inline bool try_step_suspended_breakpoints(struct pt_regs *regs)
{
- return -ENODEV;
+ return false;
}
#endif
-#ifdef CONFIG_COMPAT
-int aarch32_break_handler(struct pt_regs *regs);
-#else
-static int aarch32_break_handler(struct pt_regs *regs)
-{
- return -EFAULT;
-}
-#endif
+bool try_handle_aarch32_break(struct pt_regs *regs);
-#endif /* __ASSEMBLY */
-#endif /* __KERNEL__ */
+#endif /* __ASSEMBLER__ */
#endif /* __ASM_DEBUG_MONITORS_H */
diff --git a/arch/arm64/include/asm/device.h b/arch/arm64/include/asm/device.h
index cf98b362094b..996498751318 100644
--- a/arch/arm64/include/asm/device.h
+++ b/arch/arm64/include/asm/device.h
@@ -1,26 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (C) 2012 ARM Ltd.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __ASM_DEVICE_H
#define __ASM_DEVICE_H
struct dev_archdata {
- struct dma_map_ops *dma_ops;
-#ifdef CONFIG_IOMMU_API
- void *iommu; /* private IOMMU data */
-#endif
};
struct pdev_archdata {
diff --git a/arch/arm64/include/asm/dma-mapping.h b/arch/arm64/include/asm/dma-mapping.h
deleted file mode 100644
index 8d1810001aef..000000000000
--- a/arch/arm64/include/asm/dma-mapping.h
+++ /dev/null
@@ -1,130 +0,0 @@
-/*
- * Copyright (C) 2012 ARM Ltd.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-#ifndef __ASM_DMA_MAPPING_H
-#define __ASM_DMA_MAPPING_H
-
-#ifdef __KERNEL__
-
-#include <linux/types.h>
-#include <linux/vmalloc.h>
-
-#include <asm-generic/dma-coherent.h>
-
-#define ARCH_HAS_DMA_GET_REQUIRED_MASK
-
-extern struct dma_map_ops *dma_ops;
-
-static inline struct dma_map_ops *get_dma_ops(struct device *dev)
-{
- if (unlikely(!dev) || !dev->archdata.dma_ops)
- return dma_ops;
- else
- return dev->archdata.dma_ops;
-}
-
-#include <asm-generic/dma-mapping-common.h>
-
-static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
-{
- return (dma_addr_t)paddr;
-}
-
-static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t dev_addr)
-{
- return (phys_addr_t)dev_addr;
-}
-
-static inline int dma_mapping_error(struct device *dev, dma_addr_t dev_addr)
-{
- struct dma_map_ops *ops = get_dma_ops(dev);
- debug_dma_mapping_error(dev, dev_addr);
- return ops->mapping_error(dev, dev_addr);
-}
-
-static inline int dma_supported(struct device *dev, u64 mask)
-{
- struct dma_map_ops *ops = get_dma_ops(dev);
- return ops->dma_supported(dev, mask);
-}
-
-static inline int dma_set_mask(struct device *dev, u64 mask)
-{
- if (!dev->dma_mask || !dma_supported(dev, mask))
- return -EIO;
- *dev->dma_mask = mask;
-
- return 0;
-}
-
-static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
-{
- if (!dev->dma_mask)
- return 0;
-
- return addr + size - 1 <= *dev->dma_mask;
-}
-
-static inline void dma_mark_clean(void *addr, size_t size)
-{
-}
-
-#define dma_alloc_coherent(d, s, h, f) dma_alloc_attrs(d, s, h, f, NULL)
-#define dma_free_coherent(d, s, h, f) dma_free_attrs(d, s, h, f, NULL)
-
-static inline void *dma_alloc_attrs(struct device *dev, size_t size,
- dma_addr_t *dma_handle, gfp_t flags,
- struct dma_attrs *attrs)
-{
- struct dma_map_ops *ops = get_dma_ops(dev);
- void *vaddr;
-
- if (dma_alloc_from_coherent(dev, size, dma_handle, &vaddr))
- return vaddr;
-
- vaddr = ops->alloc(dev, size, dma_handle, flags, attrs);
- debug_dma_alloc_coherent(dev, size, *dma_handle, vaddr);
- return vaddr;
-}
-
-static inline void dma_free_attrs(struct device *dev, size_t size,
- void *vaddr, dma_addr_t dev_addr,
- struct dma_attrs *attrs)
-{
- struct dma_map_ops *ops = get_dma_ops(dev);
-
- if (dma_release_from_coherent(dev, get_order(size), vaddr))
- return;
-
- debug_dma_free_coherent(dev, size, vaddr, dev_addr);
- ops->free(dev, size, vaddr, dev_addr, attrs);
-}
-
-/*
- * There is no dma_cache_sync() implementation, so just return NULL here.
- */
-static inline void *dma_alloc_noncoherent(struct device *dev, size_t size,
- dma_addr_t *handle, gfp_t flags)
-{
- return NULL;
-}
-
-static inline void dma_free_noncoherent(struct device *dev, size_t size,
- void *cpu_addr, dma_addr_t handle)
-{
-}
-
-#endif /* __KERNEL__ */
-#endif /* __ASM_DMA_MAPPING_H */
diff --git a/arch/arm64/include/asm/dmi.h b/arch/arm64/include/asm/dmi.h
new file mode 100644
index 000000000000..69d37d87b159
--- /dev/null
+++ b/arch/arm64/include/asm/dmi.h
@@ -0,0 +1,31 @@
+/*
+ * arch/arm64/include/asm/dmi.h
+ *
+ * Copyright (C) 2013 Linaro Limited.
+ * Written by: Yi Li (yi.li@linaro.org)
+ *
+ * based on arch/ia64/include/asm/dmi.h
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+
+#ifndef __ASM_DMI_H
+#define __ASM_DMI_H
+
+#include <linux/io.h>
+#include <linux/slab.h>
+
+/*
+ * According to section 2.3.6 of the UEFI spec, the firmware should not
+ * request a virtual mapping for configuration tables such as SMBIOS.
+ * This means we have to map them before use.
+ */
+#define dmi_early_remap(x, l) ioremap_cache(x, l)
+#define dmi_early_unmap(x, l) iounmap(x)
+#define dmi_remap(x, l) ioremap_cache(x, l)
+#define dmi_unmap(x) iounmap(x)
+#define dmi_alloc(l) kzalloc(l, GFP_KERNEL)
+
+#endif
diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h
new file mode 100644
index 000000000000..aa91165ca140
--- /dev/null
+++ b/arch/arm64/include/asm/efi.h
@@ -0,0 +1,154 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_EFI_H
+#define _ASM_EFI_H
+
+#include <asm/boot.h>
+#include <asm/cpufeature.h>
+#include <asm/fpsimd.h>
+#include <asm/io.h>
+#include <asm/memory.h>
+#include <asm/mmu_context.h>
+#include <asm/neon.h>
+#include <asm/ptrace.h>
+#include <asm/tlbflush.h>
+
+#ifdef CONFIG_EFI
+extern void efi_init(void);
+
+bool efi_runtime_fixup_exception(struct pt_regs *regs, const char *msg);
+#else
+#define efi_init()
+
+static inline
+bool efi_runtime_fixup_exception(struct pt_regs *regs, const char *msg)
+{
+ return false;
+}
+#endif
+
+int efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md);
+int efi_set_mapping_permissions(struct mm_struct *mm, efi_memory_desc_t *md,
+ bool has_bti);
+
+#undef arch_efi_call_virt
+#define arch_efi_call_virt(p, f, args...) \
+ __efi_rt_asm_wrapper((p)->f, #f, args)
+
+extern u64 *efi_rt_stack_top;
+efi_status_t __efi_rt_asm_wrapper(void *, const char *, ...);
+
+void arch_efi_call_virt_setup(void);
+void arch_efi_call_virt_teardown(void);
+
+/*
+ * efi_rt_stack_top[-1] contains the value the stack pointer had before
+ * switching to the EFI runtime stack.
+ */
+#define current_in_efi() \
+ (!preemptible() && efi_rt_stack_top != NULL && \
+ on_task_stack(current, READ_ONCE(efi_rt_stack_top[-1]), 1))
+
+#define ARCH_EFI_IRQ_FLAGS_MASK (PSR_D_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT)
+
+/*
+ * Even when Linux uses IRQ priorities for IRQ disabling, EFI does not.
+ * And EFI shouldn't really play around with priority masking as it is not aware
+ * which priorities the OS has assigned to its interrupts.
+ */
+#define arch_efi_save_flags(state_flags) \
+ ((void)((state_flags) = read_sysreg(daif)))
+
+#define arch_efi_restore_flags(state_flags) write_sysreg(state_flags, daif)
+
+
+/* arch specific definitions used by the stub code */
+
+/*
+ * In some configurations (e.g. VMAP_STACK && 64K pages), stacks built into the
+ * kernel need greater alignment than we require the segments to be padded to.
+ */
+#define EFI_KIMG_ALIGN \
+ (SEGMENT_ALIGN > THREAD_ALIGN ? SEGMENT_ALIGN : THREAD_ALIGN)
+
+/*
+ * On arm64, we have to ensure that the initrd ends up in the linear region,
+ * which is a 1 GB aligned region of size '1UL << (VA_BITS_MIN - 1)' that is
+ * guaranteed to cover the kernel Image.
+ *
+ * Since the EFI stub is part of the kernel Image, we can relax the
+ * usual requirements in Documentation/arch/arm64/booting.rst, which still
+ * apply to other bootloaders, and are required for some kernel
+ * configurations.
+ */
+static inline unsigned long efi_get_max_initrd_addr(unsigned long image_addr)
+{
+ return (image_addr & ~(SZ_1G - 1UL)) + (1UL << (VA_BITS_MIN - 1));
+}
+
+static inline unsigned long efi_get_kimg_min_align(void)
+{
+ extern bool efi_nokaslr;
+
+ /*
+ * Although relocatable kernels can fix up the misalignment with
+ * respect to MIN_KIMG_ALIGN, the resulting virtual text addresses are
+ * subtly out of sync with those recorded in the vmlinux when kaslr is
+ * disabled but the image required relocation anyway. Therefore retain
+ * 2M alignment if KASLR was explicitly disabled, even if it was not
+ * going to be activated to begin with.
+ */
+ return efi_nokaslr ? MIN_KIMG_ALIGN : EFI_KIMG_ALIGN;
+}
+
+#define EFI_ALLOC_ALIGN SZ_64K
+#define EFI_ALLOC_LIMIT ((1UL << 48) - 1)
+
+extern unsigned long primary_entry_offset(void);
+
+/*
+ * On ARM systems, virtually remapped UEFI runtime services are set up in two
+ * distinct stages:
+ * - The stub retrieves the final version of the memory map from UEFI, populates
+ * the virt_addr fields and calls the SetVirtualAddressMap() [SVAM] runtime
+ * service to communicate the new mapping to the firmware (Note that the new
+ * mapping is not live at this time)
+ * - During an early initcall(), the EFI system table is permanently remapped
+ * and the virtual remapping of the UEFI Runtime Services regions is loaded
+ * into a private set of page tables. If this all succeeds, the Runtime
+ * Services are enabled and the EFI_RUNTIME_SERVICES bit set.
+ */
+
+static inline void efi_set_pgd(struct mm_struct *mm)
+{
+ __switch_mm(mm);
+
+ if (system_uses_ttbr0_pan()) {
+ if (mm != current->active_mm) {
+ /*
+ * Update the current thread's saved ttbr0 since it is
+ * restored as part of a return from exception.
+ */
+ update_saved_ttbr0(current, mm);
+ } else {
+ /*
+ * Restore the current thread's saved ttbr0
+ * corresponding to its active_mm
+ */
+ update_saved_ttbr0(current, current->active_mm);
+ }
+ }
+}
+
+void efi_virtmap_load(void);
+void efi_virtmap_unload(void);
+
+static inline void efi_capsule_flush_cache_range(void *addr, int size)
+{
+ dcache_clean_inval_poc((unsigned long)addr, (unsigned long)addr + size);
+}
+
+efi_status_t efi_handle_corrupted_x18(efi_status_t s, const char *f);
+
+void efi_icache_sync(unsigned long start, unsigned long end);
+
+#endif /* _ASM_EFI_H */
diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h
new file mode 100644
index 000000000000..cacd20df1786
--- /dev/null
+++ b/arch/arm64/include/asm/el2_setup.h
@@ -0,0 +1,593 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ */
+
+#ifndef __ARM_KVM_INIT_H__
+#define __ARM_KVM_INIT_H__
+
+#ifndef __ASSEMBLER__
+#error Assembly-only header
+#endif
+
+#include <asm/kvm_arm.h>
+#include <asm/ptrace.h>
+#include <asm/sysreg.h>
+#include <linux/irqchip/arm-gic-v3.h>
+
+.macro init_el2_hcr val
+ mov_q x0, \val
+
+ /*
+ * Compliant CPUs advertise their VHE-onlyness with
+ * ID_AA64MMFR4_EL1.E2H0 < 0. On such CPUs HCR_EL2.E2H is RES1, but it
+ * can reset into an UNKNOWN state and might not read as 1 until it has
+ * been initialized explicitly.
+ * Initialize HCR_EL2.E2H so that later code can rely upon HCR_EL2.E2H
+ * indicating whether the CPU is running in E2H mode.
+ */
+ mrs_s x1, SYS_ID_AA64MMFR4_EL1
+ sbfx x1, x1, #ID_AA64MMFR4_EL1_E2H0_SHIFT, #ID_AA64MMFR4_EL1_E2H0_WIDTH
+ cmp x1, #0
+ b.lt .LnE2H0_\@
+
+ /*
+ * Unfortunately, HCR_EL2.E2H can be RES1 even if not advertised
+ * as such via ID_AA64MMFR4_EL1.E2H0:
+ *
+ * - Fruity CPUs predate the !FEAT_E2H0 relaxation, and seem to
+ * have HCR_EL2.E2H implemented as RAO/WI.
+ *
+ * - On CPUs that lack FEAT_FGT, a hypervisor can't trap guest
+ * reads of ID_AA64MMFR4_EL1 to advertise !FEAT_E2H0. NV
+ * guests on these hosts can write to HCR_EL2.E2H without
+ * trapping to the hypervisor, but these writes have no
+ * functional effect.
+ *
+ * Handle both cases by checking for an essential VHE property
+ * (system register remapping) to decide whether we're
+ * effectively VHE-only or not.
+ */
+ msr_hcr_el2 x0 // Setup HCR_EL2 as nVHE
+ isb
+ mov x1, #1 // Write something to FAR_EL1
+ msr far_el1, x1
+ isb
+ mov x1, #2 // Try to overwrite it via FAR_EL2
+ msr far_el2, x1
+ isb
+ mrs x1, far_el1 // If we see the latest write in FAR_EL1,
+ cmp x1, #2 // we can safely assume we are VHE only.
+ b.ne .LnVHE_\@ // Otherwise, we know that nVHE works.
+
+.LnE2H0_\@:
+ orr x0, x0, #HCR_E2H
+ msr_hcr_el2 x0
+ isb
+.LnVHE_\@:
+.endm
+
+.macro __init_el2_sctlr
+ mov_q x0, INIT_SCTLR_EL2_MMU_OFF
+ msr sctlr_el2, x0
+ isb
+.endm
+
+.macro __init_el2_hcrx
+ mrs x0, id_aa64mmfr1_el1
+ ubfx x0, x0, #ID_AA64MMFR1_EL1_HCX_SHIFT, #4
+ cbz x0, .Lskip_hcrx_\@
+ mov_q x0, (HCRX_EL2_MSCEn | HCRX_EL2_TCR2En | HCRX_EL2_EnFPM)
+
+ /* Enable GCS if supported */
+ mrs_s x1, SYS_ID_AA64PFR1_EL1
+ ubfx x1, x1, #ID_AA64PFR1_EL1_GCS_SHIFT, #4
+ cbz x1, .Lset_hcrx_\@
+ orr x0, x0, #HCRX_EL2_GCSEn
+
+.Lset_hcrx_\@:
+ msr_s SYS_HCRX_EL2, x0
+.Lskip_hcrx_\@:
+.endm
+
+/* Check if running in host at EL2 mode, i.e., (h)VHE. Jump to fail if not. */
+.macro __check_hvhe fail, tmp
+ mrs \tmp, hcr_el2
+ and \tmp, \tmp, #HCR_E2H
+ cbz \tmp, \fail
+.endm
+
+/*
+ * Allow Non-secure EL1 and EL0 to access physical timer and counter.
+ * This is not necessary for VHE, since the host kernel runs in EL2,
+ * and EL0 accesses are configured in the later stage of boot process.
+ * Note that when HCR_EL2.E2H == 1, CNTHCTL_EL2 has the same bit layout
+ * as CNTKCTL_EL1, and CNTKCTL_EL1 accessing instructions are redefined
+ * to access CNTHCTL_EL2. This allows the kernel designed to run at EL1
+ * to transparently mess with the EL0 bits via CNTKCTL_EL1 access in
+ * EL2.
+ */
+.macro __init_el2_timers
+ mov x0, #3 // Enable EL1 physical timers
+ __check_hvhe .LnVHE_\@, x1
+ lsl x0, x0, #10
+.LnVHE_\@:
+ msr cnthctl_el2, x0
+ msr cntvoff_el2, xzr // Clear virtual offset
+.endm
+
+/* Branch to skip_label if SPE version is less than given version */
+.macro __spe_vers_imp skip_label, version, tmp
+ mrs \tmp, id_aa64dfr0_el1
+ ubfx \tmp, \tmp, #ID_AA64DFR0_EL1_PMSVer_SHIFT, #4
+ cmp \tmp, \version
+ b.lt \skip_label
+.endm
+
+.macro __init_el2_debug
+ mrs x1, id_aa64dfr0_el1
+ ubfx x0, x1, #ID_AA64DFR0_EL1_PMUVer_SHIFT, #4
+ cmp x0, #ID_AA64DFR0_EL1_PMUVer_NI
+ ccmp x0, #ID_AA64DFR0_EL1_PMUVer_IMP_DEF, #4, ne
+ b.eq .Lskip_pmu_\@ // Skip if no PMU present or IMP_DEF
+ mrs x0, pmcr_el0 // Disable debug access traps
+ ubfx x0, x0, #11, #5 // to EL2 and allow access to
+.Lskip_pmu_\@:
+ csel x2, xzr, x0, eq // all PMU counters from EL1
+
+ /* Statistical profiling */
+ __spe_vers_imp .Lskip_spe_\@, ID_AA64DFR0_EL1_PMSVer_IMP, x0 // Skip if SPE not present
+
+ mrs_s x0, SYS_PMBIDR_EL1 // If SPE available at EL2,
+ and x0, x0, #(1 << PMBIDR_EL1_P_SHIFT)
+ cbnz x0, .Lskip_spe_el2_\@ // then permit sampling of physical
+ mov x0, #(1 << PMSCR_EL2_PCT_SHIFT | \
+ 1 << PMSCR_EL2_PA_SHIFT)
+ msr_s SYS_PMSCR_EL2, x0 // addresses and physical counter
+.Lskip_spe_el2_\@:
+ mov x0, #MDCR_EL2_E2PB_MASK
+ orr x2, x2, x0 // If we don't have VHE, then
+ // use EL1&0 translation.
+
+.Lskip_spe_\@:
+ /* Trace buffer */
+ ubfx x0, x1, #ID_AA64DFR0_EL1_TraceBuffer_SHIFT, #4
+ cbz x0, .Lskip_trace_\@ // Skip if TraceBuffer is not present
+
+ mrs_s x0, SYS_TRBIDR_EL1
+ and x0, x0, TRBIDR_EL1_P
+ cbnz x0, .Lskip_trace_\@ // If TRBE is available at EL2
+
+ mov x0, #MDCR_EL2_E2TB_MASK
+ orr x2, x2, x0 // allow the EL1&0 translation
+ // to own it.
+
+.Lskip_trace_\@:
+ msr mdcr_el2, x2 // Configure debug traps
+.endm
+
+/* LORegions */
+.macro __init_el2_lor
+ mrs x1, id_aa64mmfr1_el1
+ ubfx x0, x1, #ID_AA64MMFR1_EL1_LO_SHIFT, 4
+ cbz x0, .Lskip_lor_\@
+ msr_s SYS_LORC_EL1, xzr
+.Lskip_lor_\@:
+.endm
+
+/* Stage-2 translation */
+.macro __init_el2_stage2
+ msr vttbr_el2, xzr
+.endm
+
+/* GICv3 system register access */
+.macro __init_el2_gicv3
+ mrs x0, id_aa64pfr0_el1
+ ubfx x0, x0, #ID_AA64PFR0_EL1_GIC_SHIFT, #4
+ cbz x0, .Lskip_gicv3_\@
+
+ mrs_s x0, SYS_ICC_SRE_EL2
+ orr x0, x0, #ICC_SRE_EL2_SRE // Set ICC_SRE_EL2.SRE==1
+ orr x0, x0, #ICC_SRE_EL2_ENABLE // Set ICC_SRE_EL2.Enable==1
+ msr_s SYS_ICC_SRE_EL2, x0
+ isb // Make sure SRE is now set
+ mrs_s x0, SYS_ICC_SRE_EL2 // Read SRE back,
+ tbz x0, #0, .Lskip_gicv3_\@ // and check that it sticks
+ msr_s SYS_ICH_HCR_EL2, xzr // Reset ICH_HCR_EL2 to defaults
+.Lskip_gicv3_\@:
+.endm
+
+/* GICv5 system register access */
+.macro __init_el2_gicv5
+ mrs_s x0, SYS_ID_AA64PFR2_EL1
+ ubfx x0, x0, #ID_AA64PFR2_EL1_GCIE_SHIFT, #4
+ cbz x0, .Lskip_gicv5_\@
+
+ mov x0, #(ICH_HFGITR_EL2_GICRCDNMIA | \
+ ICH_HFGITR_EL2_GICRCDIA | \
+ ICH_HFGITR_EL2_GICCDDI | \
+ ICH_HFGITR_EL2_GICCDEOI | \
+ ICH_HFGITR_EL2_GICCDHM | \
+ ICH_HFGITR_EL2_GICCDRCFG | \
+ ICH_HFGITR_EL2_GICCDPEND | \
+ ICH_HFGITR_EL2_GICCDAFF | \
+ ICH_HFGITR_EL2_GICCDPRI | \
+ ICH_HFGITR_EL2_GICCDDIS | \
+ ICH_HFGITR_EL2_GICCDEN)
+ msr_s SYS_ICH_HFGITR_EL2, x0 // Disable instruction traps
+ mov_q x0, (ICH_HFGRTR_EL2_ICC_PPI_ACTIVERn_EL1 | \
+ ICH_HFGRTR_EL2_ICC_PPI_PRIORITYRn_EL1 | \
+ ICH_HFGRTR_EL2_ICC_PPI_PENDRn_EL1 | \
+ ICH_HFGRTR_EL2_ICC_PPI_ENABLERn_EL1 | \
+ ICH_HFGRTR_EL2_ICC_PPI_HMRn_EL1 | \
+ ICH_HFGRTR_EL2_ICC_IAFFIDR_EL1 | \
+ ICH_HFGRTR_EL2_ICC_ICSR_EL1 | \
+ ICH_HFGRTR_EL2_ICC_PCR_EL1 | \
+ ICH_HFGRTR_EL2_ICC_HPPIR_EL1 | \
+ ICH_HFGRTR_EL2_ICC_HAPR_EL1 | \
+ ICH_HFGRTR_EL2_ICC_CR0_EL1 | \
+ ICH_HFGRTR_EL2_ICC_IDRn_EL1 | \
+ ICH_HFGRTR_EL2_ICC_APR_EL1)
+ msr_s SYS_ICH_HFGRTR_EL2, x0 // Disable reg read traps
+ mov_q x0, (ICH_HFGWTR_EL2_ICC_PPI_ACTIVERn_EL1 | \
+ ICH_HFGWTR_EL2_ICC_PPI_PRIORITYRn_EL1 | \
+ ICH_HFGWTR_EL2_ICC_PPI_PENDRn_EL1 | \
+ ICH_HFGWTR_EL2_ICC_PPI_ENABLERn_EL1 | \
+ ICH_HFGWTR_EL2_ICC_ICSR_EL1 | \
+ ICH_HFGWTR_EL2_ICC_PCR_EL1 | \
+ ICH_HFGWTR_EL2_ICC_CR0_EL1 | \
+ ICH_HFGWTR_EL2_ICC_APR_EL1)
+ msr_s SYS_ICH_HFGWTR_EL2, x0 // Disable reg write traps
+.Lskip_gicv5_\@:
+.endm
+
+.macro __init_el2_hstr
+ msr hstr_el2, xzr // Disable CP15 traps to EL2
+.endm
+
+/* Virtual CPU ID registers */
+.macro __init_el2_nvhe_idregs
+ mrs x0, midr_el1
+ mrs x1, mpidr_el1
+ msr vpidr_el2, x0
+ msr vmpidr_el2, x1
+.endm
+
+/* Coprocessor traps */
+.macro __init_el2_cptr
+ __check_hvhe .LnVHE_\@, x1
+ mov x0, #CPACR_EL1_FPEN
+ msr cpacr_el1, x0
+ b .Lskip_set_cptr_\@
+.LnVHE_\@:
+ mov x0, #0x33ff
+ msr cptr_el2, x0 // Disable copro. traps to EL2
+.Lskip_set_cptr_\@:
+.endm
+
+/*
+ * Configure BRBE to permit recording cycle counts and branch mispredicts.
+ *
+ * At any EL, to record cycle counts BRBE requires that both BRBCR_EL2.CC=1 and
+ * BRBCR_EL1.CC=1.
+ *
+ * At any EL, to record branch mispredicts BRBE requires that both
+ * BRBCR_EL2.MPRED=1 and BRBCR_EL1.MPRED=1.
+ *
+ * Set {CC,MPRED} in BRBCR_EL2 in case nVHE mode is used and we are
+ * executing in EL1.
+ */
+.macro __init_el2_brbe
+ mrs x1, id_aa64dfr0_el1
+ ubfx x1, x1, #ID_AA64DFR0_EL1_BRBE_SHIFT, #4
+ cbz x1, .Lskip_brbe_\@
+
+ mov_q x0, BRBCR_ELx_CC | BRBCR_ELx_MPRED
+ msr_s SYS_BRBCR_EL2, x0
+.Lskip_brbe_\@:
+.endm
+
+/* Disable any fine grained traps */
+.macro __init_el2_fgt
+ mrs x1, id_aa64mmfr0_el1
+ ubfx x1, x1, #ID_AA64MMFR0_EL1_FGT_SHIFT, #4
+ cbz x1, .Lskip_fgt_\@
+
+ mov x0, xzr
+ mov x2, xzr
+ /* If SPEv1p2 is implemented, */
+ __spe_vers_imp .Lskip_spe_fgt_\@, #ID_AA64DFR0_EL1_PMSVer_V1P2, x1
+ /* Disable PMSNEVFR_EL1 read and write traps */
+ orr x0, x0, #HDFGRTR_EL2_nPMSNEVFR_EL1_MASK
+ orr x2, x2, #HDFGWTR_EL2_nPMSNEVFR_EL1_MASK
+
+.Lskip_spe_fgt_\@:
+ mrs x1, id_aa64dfr0_el1
+ ubfx x1, x1, #ID_AA64DFR0_EL1_BRBE_SHIFT, #4
+ cbz x1, .Lskip_brbe_fgt_\@
+
+ /*
+ * Disable read traps for the following registers
+ *
+ * [BRBSRC|BRBTGT|RBINF]_EL1
+ * [BRBSRCINJ|BRBTGTINJ|BRBINFINJ|BRBTS]_EL1
+ */
+ orr x0, x0, #HDFGRTR_EL2_nBRBDATA_MASK
+
+ /*
+ * Disable write traps for the following registers
+ *
+ * [BRBSRCINJ|BRBTGTINJ|BRBINFINJ|BRBTS]_EL1
+ */
+ orr x2, x2, #HDFGWTR_EL2_nBRBDATA_MASK
+
+ /* Disable read and write traps for [BRBCR|BRBFCR]_EL1 */
+ orr x0, x0, #HDFGRTR_EL2_nBRBCTL_MASK
+ orr x2, x2, #HDFGWTR_EL2_nBRBCTL_MASK
+
+ /* Disable read traps for BRBIDR_EL1 */
+ orr x0, x0, #HDFGRTR_EL2_nBRBIDR_MASK
+
+.Lskip_brbe_fgt_\@:
+
+.Lset_debug_fgt_\@:
+ msr_s SYS_HDFGRTR_EL2, x0
+ msr_s SYS_HDFGWTR_EL2, x2
+
+ mov x0, xzr
+ mov x2, xzr
+
+ mrs x1, id_aa64dfr0_el1
+ ubfx x1, x1, #ID_AA64DFR0_EL1_BRBE_SHIFT, #4
+ cbz x1, .Lskip_brbe_insn_fgt_\@
+
+ /* Disable traps for BRBIALL instruction */
+ orr x2, x2, #HFGITR_EL2_nBRBIALL_MASK
+
+ /* Disable traps for BRBINJ instruction */
+ orr x2, x2, #HFGITR_EL2_nBRBINJ_MASK
+
+.Lskip_brbe_insn_fgt_\@:
+ mrs x1, id_aa64pfr1_el1
+ ubfx x1, x1, #ID_AA64PFR1_EL1_SME_SHIFT, #4
+ cbz x1, .Lskip_sme_fgt_\@
+
+ /* Disable nVHE traps of TPIDR2 and SMPRI */
+ orr x0, x0, #HFGRTR_EL2_nSMPRI_EL1_MASK
+ orr x0, x0, #HFGRTR_EL2_nTPIDR2_EL0_MASK
+
+.Lskip_sme_fgt_\@:
+ mrs_s x1, SYS_ID_AA64MMFR3_EL1
+ ubfx x1, x1, #ID_AA64MMFR3_EL1_S1PIE_SHIFT, #4
+ cbz x1, .Lskip_pie_fgt_\@
+
+ /* Disable trapping of PIR_EL1 / PIRE0_EL1 */
+ orr x0, x0, #HFGRTR_EL2_nPIR_EL1
+ orr x0, x0, #HFGRTR_EL2_nPIRE0_EL1
+
+.Lskip_pie_fgt_\@:
+ mrs_s x1, SYS_ID_AA64MMFR3_EL1
+ ubfx x1, x1, #ID_AA64MMFR3_EL1_S1POE_SHIFT, #4
+ cbz x1, .Lskip_poe_fgt_\@
+
+ /* Disable trapping of POR_EL0 */
+ orr x0, x0, #HFGRTR_EL2_nPOR_EL0
+
+.Lskip_poe_fgt_\@:
+ /* GCS depends on PIE so we don't check it if PIE is absent */
+ mrs_s x1, SYS_ID_AA64PFR1_EL1
+ ubfx x1, x1, #ID_AA64PFR1_EL1_GCS_SHIFT, #4
+ cbz x1, .Lskip_gce_fgt_\@
+
+ /* Disable traps of access to GCS registers at EL0 and EL1 */
+ orr x0, x0, #HFGRTR_EL2_nGCS_EL1_MASK
+ orr x0, x0, #HFGRTR_EL2_nGCS_EL0_MASK
+
+.Lskip_gce_fgt_\@:
+
+.Lset_fgt_\@:
+ msr_s SYS_HFGRTR_EL2, x0
+ msr_s SYS_HFGWTR_EL2, x0
+ msr_s SYS_HFGITR_EL2, x2
+
+ mrs x1, id_aa64pfr0_el1 // AMU traps UNDEF without AMU
+ ubfx x1, x1, #ID_AA64PFR0_EL1_AMU_SHIFT, #4
+ cbz x1, .Lskip_amu_fgt_\@
+
+ msr_s SYS_HAFGRTR_EL2, xzr
+
+.Lskip_amu_fgt_\@:
+
+.Lskip_fgt_\@:
+.endm
+
+.macro __init_el2_fgt2
+ mrs x1, id_aa64mmfr0_el1
+ ubfx x1, x1, #ID_AA64MMFR0_EL1_FGT_SHIFT, #4
+ cmp x1, #ID_AA64MMFR0_EL1_FGT_FGT2
+ b.lt .Lskip_fgt2_\@
+
+ mov x0, xzr
+ mrs x1, id_aa64dfr0_el1
+ ubfx x1, x1, #ID_AA64DFR0_EL1_PMUVer_SHIFT, #4
+ cmp x1, #ID_AA64DFR0_EL1_PMUVer_V3P9
+ b.lt .Lskip_pmuv3p9_\@
+
+ orr x0, x0, #HDFGRTR2_EL2_nPMICNTR_EL0
+ orr x0, x0, #HDFGRTR2_EL2_nPMICFILTR_EL0
+ orr x0, x0, #HDFGRTR2_EL2_nPMUACR_EL1
+.Lskip_pmuv3p9_\@:
+ /* If SPE is implemented, */
+ __spe_vers_imp .Lskip_spefds_\@, ID_AA64DFR0_EL1_PMSVer_IMP, x1
+ /* we can read PMSIDR and */
+ mrs_s x1, SYS_PMSIDR_EL1
+ and x1, x1, #PMSIDR_EL1_FDS
+ /* if FEAT_SPE_FDS is implemented, */
+ cbz x1, .Lskip_spefds_\@
+ /* disable traps of PMSDSFR to EL2. */
+ orr x0, x0, #HDFGRTR2_EL2_nPMSDSFR_EL1
+
+.Lskip_spefds_\@:
+ msr_s SYS_HDFGRTR2_EL2, x0
+ msr_s SYS_HDFGWTR2_EL2, x0
+ msr_s SYS_HFGRTR2_EL2, xzr
+ msr_s SYS_HFGWTR2_EL2, xzr
+ msr_s SYS_HFGITR2_EL2, xzr
+.Lskip_fgt2_\@:
+.endm
+
+/**
+ * Initialize EL2 registers to sane values. This should be called early on all
+ * cores that were booted in EL2. Note that everything gets initialised as
+ * if VHE was not available. The kernel context will be upgraded to VHE
+ * if possible later on in the boot process
+ *
+ * Regs: x0, x1 and x2 are clobbered.
+ */
+.macro init_el2_state
+ __init_el2_sctlr
+ __init_el2_hcrx
+ __init_el2_timers
+ __init_el2_debug
+ __init_el2_brbe
+ __init_el2_lor
+ __init_el2_stage2
+ __init_el2_gicv3
+ __init_el2_gicv5
+ __init_el2_hstr
+ __init_el2_nvhe_idregs
+ __init_el2_cptr
+ __init_el2_fgt
+ __init_el2_fgt2
+.endm
+
+#ifndef __KVM_NVHE_HYPERVISOR__
+// This will clobber tmp1 and tmp2, and expect tmp1 to contain
+// the id register value as read from the HW
+.macro __check_override idreg, fld, width, pass, fail, tmp1, tmp2
+ ubfx \tmp1, \tmp1, #\fld, #\width
+ cbz \tmp1, \fail
+
+ adr_l \tmp1, \idreg\()_override
+ ldr \tmp2, [\tmp1, FTR_OVR_VAL_OFFSET]
+ ldr \tmp1, [\tmp1, FTR_OVR_MASK_OFFSET]
+ ubfx \tmp2, \tmp2, #\fld, #\width
+ ubfx \tmp1, \tmp1, #\fld, #\width
+ cmp \tmp1, xzr
+ and \tmp2, \tmp2, \tmp1
+ csinv \tmp2, \tmp2, xzr, ne
+ cbnz \tmp2, \pass
+ b \fail
+.endm
+
+// This will clobber tmp1 and tmp2
+.macro check_override idreg, fld, pass, fail, tmp1, tmp2
+ mrs \tmp1, \idreg\()_el1
+ __check_override \idreg \fld 4 \pass \fail \tmp1 \tmp2
+.endm
+#else
+// This will clobber tmp
+.macro __check_override idreg, fld, width, pass, fail, tmp, ignore
+ ldr_l \tmp, \idreg\()_el1_sys_val
+ ubfx \tmp, \tmp, #\fld, #\width
+ cbnz \tmp, \pass
+ b \fail
+.endm
+
+.macro check_override idreg, fld, pass, fail, tmp, ignore
+ __check_override \idreg \fld 4 \pass \fail \tmp \ignore
+.endm
+#endif
+
+.macro finalise_el2_state
+ check_override id_aa64pfr0, ID_AA64PFR0_EL1_MPAM_SHIFT, .Linit_mpam_\@, .Lskip_mpam_\@, x1, x2
+
+.Linit_mpam_\@:
+ msr_s SYS_MPAM2_EL2, xzr // use the default partition
+ // and disable lower traps
+ mrs_s x0, SYS_MPAMIDR_EL1
+ tbz x0, #MPAMIDR_EL1_HAS_HCR_SHIFT, .Lskip_mpam_\@ // skip if no MPAMHCR reg
+ msr_s SYS_MPAMHCR_EL2, xzr // clear TRAP_MPAMIDR_EL1 -> EL2
+
+.Lskip_mpam_\@:
+ check_override id_aa64pfr1, ID_AA64PFR1_EL1_GCS_SHIFT, .Linit_gcs_\@, .Lskip_gcs_\@, x1, x2
+
+.Linit_gcs_\@:
+ msr_s SYS_GCSCR_EL1, xzr
+ msr_s SYS_GCSCRE0_EL1, xzr
+
+.Lskip_gcs_\@:
+ check_override id_aa64pfr0, ID_AA64PFR0_EL1_SVE_SHIFT, .Linit_sve_\@, .Lskip_sve_\@, x1, x2
+
+.Linit_sve_\@: /* SVE register access */
+ __check_hvhe .Lcptr_nvhe_\@, x1
+
+ // (h)VHE case
+ mrs x0, cpacr_el1 // Disable SVE traps
+ orr x0, x0, #CPACR_EL1_ZEN
+ msr cpacr_el1, x0
+ b .Lskip_set_cptr_\@
+
+.Lcptr_nvhe_\@: // nVHE case
+ mrs x0, cptr_el2 // Disable SVE traps
+ bic x0, x0, #CPTR_EL2_TZ
+ msr cptr_el2, x0
+.Lskip_set_cptr_\@:
+ isb
+ mov x1, #ZCR_ELx_LEN_MASK // SVE: Enable full vector
+ msr_s SYS_ZCR_EL2, x1 // length for EL1.
+
+.Lskip_sve_\@:
+ check_override id_aa64pfr1, ID_AA64PFR1_EL1_SME_SHIFT, .Linit_sme_\@, .Lskip_sme_\@, x1, x2
+
+.Linit_sme_\@: /* SME register access and priority mapping */
+ __check_hvhe .Lcptr_nvhe_sme_\@, x1
+
+ // (h)VHE case
+ mrs x0, cpacr_el1 // Disable SME traps
+ orr x0, x0, #CPACR_EL1_SMEN
+ msr cpacr_el1, x0
+ b .Lskip_set_cptr_sme_\@
+
+.Lcptr_nvhe_sme_\@: // nVHE case
+ mrs x0, cptr_el2 // Disable SME traps
+ bic x0, x0, #CPTR_EL2_TSM
+ msr cptr_el2, x0
+.Lskip_set_cptr_sme_\@:
+ isb
+
+ mrs x1, sctlr_el2
+ orr x1, x1, #SCTLR_ELx_ENTP2 // Disable TPIDR2 traps
+ msr sctlr_el2, x1
+ isb
+
+ mov x0, #0 // SMCR controls
+
+ // Full FP in SM?
+ mrs_s x1, SYS_ID_AA64SMFR0_EL1
+ __check_override id_aa64smfr0, ID_AA64SMFR0_EL1_FA64_SHIFT, 1, .Linit_sme_fa64_\@, .Lskip_sme_fa64_\@, x1, x2
+
+.Linit_sme_fa64_\@:
+ orr x0, x0, SMCR_ELx_FA64_MASK
+.Lskip_sme_fa64_\@:
+
+ // ZT0 available?
+ mrs_s x1, SYS_ID_AA64SMFR0_EL1
+ __check_override id_aa64smfr0, ID_AA64SMFR0_EL1_SMEver_SHIFT, 4, .Linit_sme_zt0_\@, .Lskip_sme_zt0_\@, x1, x2
+.Linit_sme_zt0_\@:
+ orr x0, x0, SMCR_ELx_EZT0_MASK
+.Lskip_sme_zt0_\@:
+
+ orr x0, x0, #SMCR_ELx_LEN_MASK // Enable full SME vector
+ msr_s SYS_SMCR_EL2, x0 // length for EL1.
+
+ mrs_s x1, SYS_SMIDR_EL1 // Priority mapping supported?
+ ubfx x1, x1, #SMIDR_EL1_SMPS_SHIFT, #1
+ cbz x1, .Lskip_sme_\@
+
+ msr_s SYS_SMPRIMAP_EL2, xzr // Make all priorities equal
+.Lskip_sme_\@:
+.endm
+
+#endif /* __ARM_KVM_INIT_H__ */
diff --git a/arch/arm64/include/asm/elf.h b/arch/arm64/include/asm/elf.h
index fe32c0e4ac01..d2779d604c7b 100644
--- a/arch/arm64/include/asm/elf.h
+++ b/arch/arm64/include/asm/elf.h
@@ -1,17 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (C) 2012 ARM Ltd.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __ASM_ELF_H
#define __ASM_ELF_H
@@ -24,17 +13,6 @@
#include <asm/ptrace.h>
#include <asm/user.h>
-typedef unsigned long elf_greg_t;
-
-#define ELF_NGREG (sizeof(struct user_pt_regs) / sizeof(elf_greg_t))
-#define ELF_CORE_COPY_REGS(dest, regs) \
- *(struct user_pt_regs *)&(dest) = (regs)->user_regs;
-
-typedef elf_greg_t elf_gregset_t[ELF_NGREG];
-typedef struct user_fpsimd_state elf_fpregset_t;
-
-#define EM_AARCH64 183
-
/*
* AArch64 static relocation types.
*/
@@ -88,34 +66,90 @@ typedef struct user_fpsimd_state elf_fpregset_t;
#define R_AARCH64_MOVW_PREL_G2_NC 292
#define R_AARCH64_MOVW_PREL_G3 293
+#define R_AARCH64_RELATIVE 1027
+
/*
* These are used to set parameters in the core dumps.
*/
#define ELF_CLASS ELFCLASS64
+#ifdef __AARCH64EB__
+#define ELF_DATA ELFDATA2MSB
+#else
#define ELF_DATA ELFDATA2LSB
+#endif
#define ELF_ARCH EM_AARCH64
+/*
+ * This yields a string that ld.so will use to load implementation
+ * specific libraries for optimization. This is more specific in
+ * intent than poking at uname or /proc/cpuinfo.
+ */
#define ELF_PLATFORM_SIZE 16
+#ifdef __AARCH64EB__
+#define ELF_PLATFORM ("aarch64_be")
+#else
#define ELF_PLATFORM ("aarch64")
+#endif
/*
* This is used to ensure we don't load something for the wrong architecture.
*/
#define elf_check_arch(x) ((x)->e_machine == EM_AARCH64)
-#define elf_read_implies_exec(ex,stk) (stk != EXSTACK_DISABLE_X)
+/*
+ * An executable for which elf_read_implies_exec() returns TRUE will
+ * have the READ_IMPLIES_EXEC personality flag set automatically.
+ *
+ * The decision process for determining the results are:
+ *
+ *              CPU*: | arm32    | arm64 |
+ * ELF:              |            |            |
+ * ---------------------|------------|------------|
+ * missing PT_GNU_STACK | exec-all   | exec-none  |
+ * PT_GNU_STACK == RWX  | exec-stack | exec-stack |
+ * PT_GNU_STACK == RW   | exec-none | exec-none |
+ *
+ * exec-all : all PROT_READ user mappings are executable, except when
+ * backed by files on a noexec-filesystem.
+ * exec-none : only PROT_EXEC user mappings are executable.
+ * exec-stack: only the stack and PROT_EXEC user mappings are executable.
+ *
+ * *all arm64 CPUs support NX, so there is no "lacks NX" column.
+ *
+ */
+#define compat_elf_read_implies_exec(ex, stk) (stk == EXSTACK_DEFAULT)
#define CORE_DUMP_USE_REGSET
#define ELF_EXEC_PAGESIZE PAGE_SIZE
/*
- * This is the location that an ET_DYN program is loaded if exec'ed. Typical
- * use of this is to invoke "./ld.so someprog" to test out a new version of
- * the loader. We need to make sure that it is out of the way of the program
- * that it will "exec", and that there is sufficient room for the brk.
+ * This is the base location for PIE (ET_DYN with INTERP) loads. On
+ * 64-bit, this is above 4GB to leave the entire 32-bit address
+ * space open for things that want to use the area for 32-bit pointers.
*/
-extern unsigned long randomize_et_dyn(unsigned long base);
-#define ELF_ET_DYN_BASE (randomize_et_dyn(2 * TASK_SIZE_64 / 3))
+#ifdef CONFIG_ARM64_FORCE_52BIT
+#define ELF_ET_DYN_BASE (2 * TASK_SIZE_64 / 3)
+#else
+#define ELF_ET_DYN_BASE (2 * DEFAULT_MAP_WINDOW_64 / 3)
+#endif /* CONFIG_ARM64_FORCE_52BIT */
+
+#ifndef __ASSEMBLER__
+
+#include <uapi/linux/elf.h>
+#include <linux/bug.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/types.h>
+#include <asm/processor.h> /* for signal_minsigstksz, used by ARCH_DLINFO */
+
+typedef unsigned long elf_greg_t;
+
+#define ELF_NGREG (sizeof(struct user_pt_regs) / sizeof(elf_greg_t))
+#define ELF_CORE_COPY_REGS(dest, regs) \
+ *(struct user_pt_regs *)&(dest) = (regs)->user_regs;
+
+typedef elf_greg_t elf_gregset_t[ELF_NGREG];
+typedef struct user_fpsimd_state elf_fpregset_t;
/*
* When the program starts, a1 contains a pointer to a function to be
@@ -124,12 +158,27 @@ extern unsigned long randomize_et_dyn(unsigned long base);
*/
#define ELF_PLAT_INIT(_r, load_addr) (_r)->regs[0] = 0
-#define SET_PERSONALITY(ex) clear_thread_flag(TIF_32BIT);
+#define SET_PERSONALITY(ex) \
+({ \
+ clear_thread_flag(TIF_32BIT); \
+ current->personality &= ~READ_IMPLIES_EXEC; \
+})
+/* update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT entries changes */
#define ARCH_DLINFO \
do { \
NEW_AUX_ENT(AT_SYSINFO_EHDR, \
(elf_addr_t)current->mm->context.vdso); \
+ \
+ /* \
+ * Should always be nonzero unless there's a kernel bug. \
+ * If we haven't determined a sensible value to give to \
+ * userspace, omit the entry: \
+ */ \
+ if (likely(signal_minsigstksz)) \
+ NEW_AUX_ENT(AT_MINSIGSTKSZ, signal_minsigstksz); \
+ else \
+ NEW_AUX_ENT(AT_IGNORE, 0); \
} while (0)
#define ARCH_HAS_SETUP_ADDITIONAL_PAGES
@@ -146,34 +195,104 @@ extern int arch_setup_additional_pages(struct linux_binprm *bprm,
#define STACK_RND_MASK (0x3ffff >> (PAGE_SHIFT - 12))
#endif
-struct mm_struct;
-extern unsigned long arch_randomize_brk(struct mm_struct *mm);
-#define arch_randomize_brk arch_randomize_brk
-
-#ifdef CONFIG_COMPAT
-#define EM_ARM 40
+#ifdef __AARCH64EB__
+#define COMPAT_ELF_PLATFORM ("v8b")
+#else
#define COMPAT_ELF_PLATFORM ("v8l")
-
-#define COMPAT_ELF_ET_DYN_BASE (randomize_et_dyn(2 * TASK_SIZE_32 / 3))
+#endif
/* AArch32 registers. */
#define COMPAT_ELF_NGREG 18
typedef unsigned int compat_elf_greg_t;
typedef compat_elf_greg_t compat_elf_gregset_t[COMPAT_ELF_NGREG];
+#ifdef CONFIG_COMPAT
+
+/* PIE load location for compat arm. Must match ARM ELF_ET_DYN_BASE. */
+#define COMPAT_ELF_ET_DYN_BASE 0x000400000UL
+
/* AArch32 EABI. */
#define EF_ARM_EABI_MASK 0xff000000
-#define compat_elf_check_arch(x) (((x)->e_machine == EM_ARM) && \
- ((x)->e_flags & EF_ARM_EABI_MASK))
-
+int compat_elf_check_arch(const struct elf32_hdr *);
+#define compat_elf_check_arch compat_elf_check_arch
#define compat_start_thread compat_start_thread
-#define COMPAT_SET_PERSONALITY(ex) set_thread_flag(TIF_32BIT);
+/*
+ * Unlike the native SET_PERSONALITY macro, the compat version maintains
+ * READ_IMPLIES_EXEC across an execve() since this is the behaviour on
+ * arch/arm/.
+ */
+#define COMPAT_SET_PERSONALITY(ex) \
+({ \
+ set_thread_flag(TIF_32BIT); \
+ })
+#ifdef CONFIG_COMPAT_VDSO
+#define COMPAT_ARCH_DLINFO \
+do { \
+ /* \
+ * Note that we use Elf64_Off instead of elf_addr_t because \
+ * elf_addr_t in compat is defined as Elf32_Addr and casting \
+ * current->mm->context.vdso to it triggers a cast warning of \
+ * cast from pointer to integer of different size. \
+ */ \
+ NEW_AUX_ENT(AT_SYSINFO_EHDR, \
+ (Elf64_Off)current->mm->context.vdso); \
+} while (0)
+#else
#define COMPAT_ARCH_DLINFO
-extern int aarch32_setup_vectors_page(struct linux_binprm *bprm,
- int uses_interp);
+#endif
+extern int aarch32_setup_additional_pages(struct linux_binprm *bprm,
+ int uses_interp);
#define compat_arch_setup_additional_pages \
- aarch32_setup_vectors_page
+ aarch32_setup_additional_pages
#endif /* CONFIG_COMPAT */
+struct arch_elf_state {
+ int flags;
+};
+
+#define ARM64_ELF_BTI (1 << 0)
+
+#define INIT_ARCH_ELF_STATE { \
+ .flags = 0, \
+}
+
+static inline int arch_parse_elf_property(u32 type, const void *data,
+ size_t datasz, bool compat,
+ struct arch_elf_state *arch)
+{
+ /* No known properties for AArch32 yet */
+ if (IS_ENABLED(CONFIG_COMPAT) && compat)
+ return 0;
+
+ if (type == GNU_PROPERTY_AARCH64_FEATURE_1_AND) {
+ const u32 *p = data;
+
+ if (datasz != sizeof(*p))
+ return -ENOEXEC;
+
+ if (system_supports_bti() &&
+ (*p & GNU_PROPERTY_AARCH64_FEATURE_1_BTI))
+ arch->flags |= ARM64_ELF_BTI;
+ }
+
+ return 0;
+}
+
+static inline int arch_elf_pt_proc(void *ehdr, void *phdr,
+ struct file *f, bool is_interp,
+ struct arch_elf_state *state)
+{
+ return 0;
+}
+
+static inline int arch_check_elf(void *ehdr, bool has_interp,
+ void *interp_ehdr,
+ struct arch_elf_state *state)
+{
+ return 0;
+}
+
+#endif /* !__ASSEMBLER__ */
+
#endif
diff --git a/arch/arm64/include/asm/entry-common.h b/arch/arm64/include/asm/entry-common.h
new file mode 100644
index 000000000000..cab8cd78f693
--- /dev/null
+++ b/arch/arm64/include/asm/entry-common.h
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _ASM_ARM64_ENTRY_COMMON_H
+#define _ASM_ARM64_ENTRY_COMMON_H
+
+#include <linux/thread_info.h>
+
+#include <asm/cpufeature.h>
+#include <asm/daifflags.h>
+#include <asm/fpsimd.h>
+#include <asm/mte.h>
+#include <asm/stacktrace.h>
+
+#define ARCH_EXIT_TO_USER_MODE_WORK (_TIF_MTE_ASYNC_FAULT | _TIF_FOREIGN_FPSTATE)
+
+static __always_inline void arch_exit_to_user_mode_work(struct pt_regs *regs,
+ unsigned long ti_work)
+{
+ if (ti_work & _TIF_MTE_ASYNC_FAULT) {
+ clear_thread_flag(TIF_MTE_ASYNC_FAULT);
+ send_sig_fault(SIGSEGV, SEGV_MTEAERR, (void __user *)NULL, current);
+ }
+
+ if (ti_work & _TIF_FOREIGN_FPSTATE)
+ fpsimd_restore_current_state();
+}
+
+#define arch_exit_to_user_mode_work arch_exit_to_user_mode_work
+
+static inline bool arch_irqentry_exit_need_resched(void)
+{
+ /*
+ * DAIF.DA are cleared at the start of IRQ/FIQ handling, and when GIC
+ * priority masking is used the GIC irqchip driver will clear DAIF.IF
+ * using gic_arch_enable_irqs() for normal IRQs. If anything is set in
+ * DAIF we must have handled an NMI, so skip preemption.
+ */
+ if (system_uses_irq_prio_masking() && read_sysreg(daif))
+ return false;
+
+ /*
+ * Preempting a task from an IRQ means we leave copies of PSTATE
+ * on the stack. cpufeature's enable calls may modify PSTATE, but
+ * resuming one of these preempted tasks would undo those changes.
+ *
+ * Only allow a task to be preempted once cpufeatures have been
+ * enabled.
+ */
+ if (!system_capabilities_finalized())
+ return false;
+
+ return true;
+}
+
+#define arch_irqentry_exit_need_resched arch_irqentry_exit_need_resched
+
+#endif /* _ASM_ARM64_ENTRY_COMMON_H */
diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h
index 78834123a32e..4975a92cbd17 100644
--- a/arch/arm64/include/asm/esr.h
+++ b/arch/arm64/include/asm/esr.h
@@ -1,55 +1,539 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (C) 2013 - ARM Ltd
* Author: Marc Zyngier <marc.zyngier@arm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __ASM_ESR_H
#define __ASM_ESR_H
-#define ESR_EL1_EC_SHIFT (26)
-#define ESR_EL1_IL (1U << 25)
-
-#define ESR_EL1_EC_UNKNOWN (0x00)
-#define ESR_EL1_EC_WFI (0x01)
-#define ESR_EL1_EC_CP15_32 (0x03)
-#define ESR_EL1_EC_CP15_64 (0x04)
-#define ESR_EL1_EC_CP14_MR (0x05)
-#define ESR_EL1_EC_CP14_LS (0x06)
-#define ESR_EL1_EC_FP_ASIMD (0x07)
-#define ESR_EL1_EC_CP10_ID (0x08)
-#define ESR_EL1_EC_CP14_64 (0x0C)
-#define ESR_EL1_EC_ILL_ISS (0x0E)
-#define ESR_EL1_EC_SVC32 (0x11)
-#define ESR_EL1_EC_SVC64 (0x15)
-#define ESR_EL1_EC_SYS64 (0x18)
-#define ESR_EL1_EC_IABT_EL0 (0x20)
-#define ESR_EL1_EC_IABT_EL1 (0x21)
-#define ESR_EL1_EC_PC_ALIGN (0x22)
-#define ESR_EL1_EC_DABT_EL0 (0x24)
-#define ESR_EL1_EC_DABT_EL1 (0x25)
-#define ESR_EL1_EC_SP_ALIGN (0x26)
-#define ESR_EL1_EC_FP_EXC32 (0x28)
-#define ESR_EL1_EC_FP_EXC64 (0x2C)
-#define ESR_EL1_EC_SERRROR (0x2F)
-#define ESR_EL1_EC_BREAKPT_EL0 (0x30)
-#define ESR_EL1_EC_BREAKPT_EL1 (0x31)
-#define ESR_EL1_EC_SOFTSTP_EL0 (0x32)
-#define ESR_EL1_EC_SOFTSTP_EL1 (0x33)
-#define ESR_EL1_EC_WATCHPT_EL0 (0x34)
-#define ESR_EL1_EC_WATCHPT_EL1 (0x35)
-#define ESR_EL1_EC_BKPT32 (0x38)
-#define ESR_EL1_EC_BRK64 (0x3C)
+#include <asm/memory.h>
+#include <asm/sysreg.h>
+
+#define ESR_ELx_EC_UNKNOWN UL(0x00)
+#define ESR_ELx_EC_WFx UL(0x01)
+/* Unallocated EC: 0x02 */
+#define ESR_ELx_EC_CP15_32 UL(0x03)
+#define ESR_ELx_EC_CP15_64 UL(0x04)
+#define ESR_ELx_EC_CP14_MR UL(0x05)
+#define ESR_ELx_EC_CP14_LS UL(0x06)
+#define ESR_ELx_EC_FP_ASIMD UL(0x07)
+#define ESR_ELx_EC_CP10_ID UL(0x08) /* EL2 only */
+#define ESR_ELx_EC_PAC UL(0x09) /* EL2 and above */
+#define ESR_ELx_EC_OTHER UL(0x0A)
+/* Unallocated EC: 0x0B */
+#define ESR_ELx_EC_CP14_64 UL(0x0C)
+#define ESR_ELx_EC_BTI UL(0x0D)
+#define ESR_ELx_EC_ILL UL(0x0E)
+/* Unallocated EC: 0x0F - 0x10 */
+#define ESR_ELx_EC_SVC32 UL(0x11)
+#define ESR_ELx_EC_HVC32 UL(0x12) /* EL2 only */
+#define ESR_ELx_EC_SMC32 UL(0x13) /* EL2 and above */
+/* Unallocated EC: 0x14 */
+#define ESR_ELx_EC_SVC64 UL(0x15)
+#define ESR_ELx_EC_HVC64 UL(0x16) /* EL2 and above */
+#define ESR_ELx_EC_SMC64 UL(0x17) /* EL2 and above */
+#define ESR_ELx_EC_SYS64 UL(0x18)
+#define ESR_ELx_EC_SVE UL(0x19)
+#define ESR_ELx_EC_ERET UL(0x1a) /* EL2 only */
+/* Unallocated EC: 0x1B */
+#define ESR_ELx_EC_FPAC UL(0x1C) /* EL1 and above */
+#define ESR_ELx_EC_SME UL(0x1D)
+/* Unallocated EC: 0x1E */
+#define ESR_ELx_EC_IMP_DEF UL(0x1f) /* EL3 only */
+#define ESR_ELx_EC_IABT_LOW UL(0x20)
+#define ESR_ELx_EC_IABT_CUR UL(0x21)
+#define ESR_ELx_EC_PC_ALIGN UL(0x22)
+/* Unallocated EC: 0x23 */
+#define ESR_ELx_EC_DABT_LOW UL(0x24)
+#define ESR_ELx_EC_DABT_CUR UL(0x25)
+#define ESR_ELx_EC_SP_ALIGN UL(0x26)
+#define ESR_ELx_EC_MOPS UL(0x27)
+#define ESR_ELx_EC_FP_EXC32 UL(0x28)
+/* Unallocated EC: 0x29 - 0x2B */
+#define ESR_ELx_EC_FP_EXC64 UL(0x2C)
+#define ESR_ELx_EC_GCS UL(0x2D)
+/* Unallocated EC: 0x2E */
+#define ESR_ELx_EC_SERROR UL(0x2F)
+#define ESR_ELx_EC_BREAKPT_LOW UL(0x30)
+#define ESR_ELx_EC_BREAKPT_CUR UL(0x31)
+#define ESR_ELx_EC_SOFTSTP_LOW UL(0x32)
+#define ESR_ELx_EC_SOFTSTP_CUR UL(0x33)
+#define ESR_ELx_EC_WATCHPT_LOW UL(0x34)
+#define ESR_ELx_EC_WATCHPT_CUR UL(0x35)
+/* Unallocated EC: 0x36 - 0x37 */
+#define ESR_ELx_EC_BKPT32 UL(0x38)
+/* Unallocated EC: 0x39 */
+#define ESR_ELx_EC_VECTOR32 UL(0x3A) /* EL2 only */
+/* Unallocated EC: 0x3B */
+#define ESR_ELx_EC_BRK64 UL(0x3C)
+/* Unallocated EC: 0x3D - 0x3F */
+#define ESR_ELx_EC_MAX UL(0x3F)
+
+#define ESR_ELx_EC_SHIFT (26)
+#define ESR_ELx_EC_WIDTH (6)
+#define ESR_ELx_EC_MASK (UL(0x3F) << ESR_ELx_EC_SHIFT)
+#define ESR_ELx_EC(esr) (((esr) & ESR_ELx_EC_MASK) >> ESR_ELx_EC_SHIFT)
+
+#define ESR_ELx_IL_SHIFT (25)
+#define ESR_ELx_IL (UL(1) << ESR_ELx_IL_SHIFT)
+#define ESR_ELx_ISS_MASK (GENMASK(24, 0))
+#define ESR_ELx_ISS(esr) ((esr) & ESR_ELx_ISS_MASK)
+#define ESR_ELx_ISS2_SHIFT (32)
+#define ESR_ELx_ISS2_MASK (GENMASK_ULL(55, 32))
+#define ESR_ELx_ISS2(esr) (((esr) & ESR_ELx_ISS2_MASK) >> ESR_ELx_ISS2_SHIFT)
+
+/* ISS field definitions shared by different classes */
+#define ESR_ELx_WNR_SHIFT (6)
+#define ESR_ELx_WNR (UL(1) << ESR_ELx_WNR_SHIFT)
+
+/* Asynchronous Error Type */
+#define ESR_ELx_IDS_SHIFT (24)
+#define ESR_ELx_IDS (UL(1) << ESR_ELx_IDS_SHIFT)
+#define ESR_ELx_AET_SHIFT (10)
+#define ESR_ELx_AET (UL(0x7) << ESR_ELx_AET_SHIFT)
+
+#define ESR_ELx_AET_UC (UL(0) << ESR_ELx_AET_SHIFT)
+#define ESR_ELx_AET_UEU (UL(1) << ESR_ELx_AET_SHIFT)
+#define ESR_ELx_AET_UEO (UL(2) << ESR_ELx_AET_SHIFT)
+#define ESR_ELx_AET_UER (UL(3) << ESR_ELx_AET_SHIFT)
+#define ESR_ELx_AET_CE (UL(6) << ESR_ELx_AET_SHIFT)
+
+/* Shared ISS field definitions for Data/Instruction aborts */
+#define ESR_ELx_VNCR_SHIFT (13)
+#define ESR_ELx_VNCR (UL(1) << ESR_ELx_VNCR_SHIFT)
+#define ESR_ELx_SET_SHIFT (11)
+#define ESR_ELx_SET_MASK (UL(3) << ESR_ELx_SET_SHIFT)
+#define ESR_ELx_FnV_SHIFT (10)
+#define ESR_ELx_FnV (UL(1) << ESR_ELx_FnV_SHIFT)
+#define ESR_ELx_EA_SHIFT (9)
+#define ESR_ELx_EA (UL(1) << ESR_ELx_EA_SHIFT)
+#define ESR_ELx_S1PTW_SHIFT (7)
+#define ESR_ELx_S1PTW (UL(1) << ESR_ELx_S1PTW_SHIFT)
+
+/* Shared ISS fault status code(IFSC/DFSC) for Data/Instruction aborts */
+#define ESR_ELx_FSC (0x3F)
+#define ESR_ELx_FSC_TYPE (0x3C)
+#define ESR_ELx_FSC_LEVEL (0x03)
+#define ESR_ELx_FSC_EXTABT (0x10)
+#define ESR_ELx_FSC_MTE (0x11)
+#define ESR_ELx_FSC_SERROR (0x11)
+#define ESR_ELx_FSC_ACCESS (0x08)
+#define ESR_ELx_FSC_FAULT (0x04)
+#define ESR_ELx_FSC_PERM (0x0C)
+#define ESR_ELx_FSC_SEA_TTW(n) (0x14 + (n))
+#define ESR_ELx_FSC_SECC (0x18)
+#define ESR_ELx_FSC_SECC_TTW(n) (0x1c + (n))
+#define ESR_ELx_FSC_ADDRSZ (0x00)
+
+/*
+ * Annoyingly, the negative levels for Address size faults aren't laid out
+ * contiguously (or in the desired order)
+ */
+#define ESR_ELx_FSC_ADDRSZ_nL(n) ((n) == -1 ? 0x25 : 0x2C)
+#define ESR_ELx_FSC_ADDRSZ_L(n) ((n) < 0 ? ESR_ELx_FSC_ADDRSZ_nL(n) : \
+ (ESR_ELx_FSC_ADDRSZ + (n)))
+
+/* Status codes for individual page table levels */
+#define ESR_ELx_FSC_ACCESS_L(n) (ESR_ELx_FSC_ACCESS + (n))
+#define ESR_ELx_FSC_PERM_L(n) (ESR_ELx_FSC_PERM + (n))
+
+#define ESR_ELx_FSC_FAULT_nL (0x2C)
+#define ESR_ELx_FSC_FAULT_L(n) (((n) < 0 ? ESR_ELx_FSC_FAULT_nL : \
+ ESR_ELx_FSC_FAULT) + (n))
+
+/* ISS field definitions for Data Aborts */
+#define ESR_ELx_ISV_SHIFT (24)
+#define ESR_ELx_ISV (UL(1) << ESR_ELx_ISV_SHIFT)
+#define ESR_ELx_SAS_SHIFT (22)
+#define ESR_ELx_SAS (UL(3) << ESR_ELx_SAS_SHIFT)
+#define ESR_ELx_SSE_SHIFT (21)
+#define ESR_ELx_SSE (UL(1) << ESR_ELx_SSE_SHIFT)
+#define ESR_ELx_SRT_SHIFT (16)
+#define ESR_ELx_SRT_MASK (UL(0x1F) << ESR_ELx_SRT_SHIFT)
+#define ESR_ELx_SF_SHIFT (15)
+#define ESR_ELx_SF (UL(1) << ESR_ELx_SF_SHIFT)
+#define ESR_ELx_AR_SHIFT (14)
+#define ESR_ELx_AR (UL(1) << ESR_ELx_AR_SHIFT)
+#define ESR_ELx_CM_SHIFT (8)
+#define ESR_ELx_CM (UL(1) << ESR_ELx_CM_SHIFT)
+
+/* ISS2 field definitions for Data Aborts */
+#define ESR_ELx_TnD_SHIFT (10)
+#define ESR_ELx_TnD (UL(1) << ESR_ELx_TnD_SHIFT)
+#define ESR_ELx_TagAccess_SHIFT (9)
+#define ESR_ELx_TagAccess (UL(1) << ESR_ELx_TagAccess_SHIFT)
+#define ESR_ELx_GCS_SHIFT (8)
+#define ESR_ELx_GCS (UL(1) << ESR_ELx_GCS_SHIFT)
+#define ESR_ELx_Overlay_SHIFT (6)
+#define ESR_ELx_Overlay (UL(1) << ESR_ELx_Overlay_SHIFT)
+#define ESR_ELx_DirtyBit_SHIFT (5)
+#define ESR_ELx_DirtyBit (UL(1) << ESR_ELx_DirtyBit_SHIFT)
+#define ESR_ELx_Xs_SHIFT (0)
+#define ESR_ELx_Xs_MASK (GENMASK_ULL(4, 0))
+
+/* ISS field definitions for exceptions taken in to Hyp */
+#define ESR_ELx_CV (UL(1) << 24)
+#define ESR_ELx_COND_SHIFT (20)
+#define ESR_ELx_COND_MASK (UL(0xF) << ESR_ELx_COND_SHIFT)
+#define ESR_ELx_WFx_ISS_RN (UL(0x1F) << 5)
+#define ESR_ELx_WFx_ISS_RV (UL(1) << 2)
+#define ESR_ELx_WFx_ISS_TI (UL(3) << 0)
+#define ESR_ELx_WFx_ISS_WFxT (UL(2) << 0)
+#define ESR_ELx_WFx_ISS_WFI (UL(0) << 0)
+#define ESR_ELx_WFx_ISS_WFE (UL(1) << 0)
+#define ESR_ELx_xVC_IMM_MASK ((UL(1) << 16) - 1)
+
+/* ISS definitions for LD64B/ST64B/{T,P}SBCSYNC instructions */
+#define ESR_ELx_ISS_OTHER_ST64BV (0)
+#define ESR_ELx_ISS_OTHER_ST64BV0 (1)
+#define ESR_ELx_ISS_OTHER_LDST64B (2)
+#define ESR_ELx_ISS_OTHER_TSBCSYNC (3)
+#define ESR_ELx_ISS_OTHER_PSBCSYNC (4)
+
+#define DISR_EL1_IDS (UL(1) << 24)
+/*
+ * DISR_EL1 and ESR_ELx share the bottom 13 bits, but the RES0 bits may mean
+ * different things in the future...
+ */
+#define DISR_EL1_ESR_MASK (ESR_ELx_AET | ESR_ELx_EA | ESR_ELx_FSC)
+
+/* ESR value templates for specific events */
+#define ESR_ELx_WFx_MASK (ESR_ELx_EC_MASK | \
+ (ESR_ELx_WFx_ISS_TI & ~ESR_ELx_WFx_ISS_WFxT))
+#define ESR_ELx_WFx_WFI_VAL ((ESR_ELx_EC_WFx << ESR_ELx_EC_SHIFT) | \
+ ESR_ELx_WFx_ISS_WFI)
+
+/* BRK instruction trap from AArch64 state */
+#define ESR_ELx_BRK64_ISS_COMMENT_MASK 0xffff
+
+/* ISS field definitions for System instruction traps */
+#define ESR_ELx_SYS64_ISS_RES0_SHIFT 22
+#define ESR_ELx_SYS64_ISS_RES0_MASK (UL(0x7) << ESR_ELx_SYS64_ISS_RES0_SHIFT)
+#define ESR_ELx_SYS64_ISS_DIR_MASK 0x1
+#define ESR_ELx_SYS64_ISS_DIR_READ 0x1
+#define ESR_ELx_SYS64_ISS_DIR_WRITE 0x0
+
+#define ESR_ELx_SYS64_ISS_RT_SHIFT 5
+#define ESR_ELx_SYS64_ISS_RT_MASK (UL(0x1f) << ESR_ELx_SYS64_ISS_RT_SHIFT)
+#define ESR_ELx_SYS64_ISS_CRM_SHIFT 1
+#define ESR_ELx_SYS64_ISS_CRM_MASK (UL(0xf) << ESR_ELx_SYS64_ISS_CRM_SHIFT)
+#define ESR_ELx_SYS64_ISS_CRN_SHIFT 10
+#define ESR_ELx_SYS64_ISS_CRN_MASK (UL(0xf) << ESR_ELx_SYS64_ISS_CRN_SHIFT)
+#define ESR_ELx_SYS64_ISS_OP1_SHIFT 14
+#define ESR_ELx_SYS64_ISS_OP1_MASK (UL(0x7) << ESR_ELx_SYS64_ISS_OP1_SHIFT)
+#define ESR_ELx_SYS64_ISS_OP2_SHIFT 17
+#define ESR_ELx_SYS64_ISS_OP2_MASK (UL(0x7) << ESR_ELx_SYS64_ISS_OP2_SHIFT)
+#define ESR_ELx_SYS64_ISS_OP0_SHIFT 20
+#define ESR_ELx_SYS64_ISS_OP0_MASK (UL(0x3) << ESR_ELx_SYS64_ISS_OP0_SHIFT)
+#define ESR_ELx_SYS64_ISS_SYS_MASK (ESR_ELx_SYS64_ISS_OP0_MASK | \
+ ESR_ELx_SYS64_ISS_OP1_MASK | \
+ ESR_ELx_SYS64_ISS_OP2_MASK | \
+ ESR_ELx_SYS64_ISS_CRN_MASK | \
+ ESR_ELx_SYS64_ISS_CRM_MASK)
+#define ESR_ELx_SYS64_ISS_SYS_VAL(op0, op1, op2, crn, crm) \
+ (((op0) << ESR_ELx_SYS64_ISS_OP0_SHIFT) | \
+ ((op1) << ESR_ELx_SYS64_ISS_OP1_SHIFT) | \
+ ((op2) << ESR_ELx_SYS64_ISS_OP2_SHIFT) | \
+ ((crn) << ESR_ELx_SYS64_ISS_CRN_SHIFT) | \
+ ((crm) << ESR_ELx_SYS64_ISS_CRM_SHIFT))
+
+#define ESR_ELx_SYS64_ISS_SYS_OP_MASK (ESR_ELx_SYS64_ISS_SYS_MASK | \
+ ESR_ELx_SYS64_ISS_DIR_MASK)
+#define ESR_ELx_SYS64_ISS_RT(esr) \
+ (((esr) & ESR_ELx_SYS64_ISS_RT_MASK) >> ESR_ELx_SYS64_ISS_RT_SHIFT)
+/*
+ * User space cache operations have the following sysreg encoding
+ * in System instructions.
+ * op0=1, op1=3, op2=1, crn=7, crm={ 5, 10, 11, 12, 13, 14 }, WRITE (L=0)
+ */
+#define ESR_ELx_SYS64_ISS_CRM_DC_CIVAC 14
+#define ESR_ELx_SYS64_ISS_CRM_DC_CVADP 13
+#define ESR_ELx_SYS64_ISS_CRM_DC_CVAP 12
+#define ESR_ELx_SYS64_ISS_CRM_DC_CVAU 11
+#define ESR_ELx_SYS64_ISS_CRM_DC_CVAC 10
+#define ESR_ELx_SYS64_ISS_CRM_IC_IVAU 5
+
+#define ESR_ELx_SYS64_ISS_EL0_CACHE_OP_MASK (ESR_ELx_SYS64_ISS_OP0_MASK | \
+ ESR_ELx_SYS64_ISS_OP1_MASK | \
+ ESR_ELx_SYS64_ISS_OP2_MASK | \
+ ESR_ELx_SYS64_ISS_CRN_MASK | \
+ ESR_ELx_SYS64_ISS_DIR_MASK)
+#define ESR_ELx_SYS64_ISS_EL0_CACHE_OP_VAL \
+ (ESR_ELx_SYS64_ISS_SYS_VAL(1, 3, 1, 7, 0) | \
+ ESR_ELx_SYS64_ISS_DIR_WRITE)
+/*
+ * User space MRS operations which are supported for emulation
+ * have the following sysreg encoding in System instructions.
+ * op0 = 3, op1= 0, crn = 0, {crm = 0, 4-7}, READ (L = 1)
+ */
+#define ESR_ELx_SYS64_ISS_SYS_MRS_OP_MASK (ESR_ELx_SYS64_ISS_OP0_MASK | \
+ ESR_ELx_SYS64_ISS_OP1_MASK | \
+ ESR_ELx_SYS64_ISS_CRN_MASK | \
+ ESR_ELx_SYS64_ISS_DIR_MASK)
+#define ESR_ELx_SYS64_ISS_SYS_MRS_OP_VAL \
+ (ESR_ELx_SYS64_ISS_SYS_VAL(3, 0, 0, 0, 0) | \
+ ESR_ELx_SYS64_ISS_DIR_READ)
+
+#define ESR_ELx_SYS64_ISS_SYS_CTR ESR_ELx_SYS64_ISS_SYS_VAL(3, 3, 1, 0, 0)
+#define ESR_ELx_SYS64_ISS_SYS_CTR_READ (ESR_ELx_SYS64_ISS_SYS_CTR | \
+ ESR_ELx_SYS64_ISS_DIR_READ)
+
+#define ESR_ELx_SYS64_ISS_SYS_CNTVCT (ESR_ELx_SYS64_ISS_SYS_VAL(3, 3, 2, 14, 0) | \
+ ESR_ELx_SYS64_ISS_DIR_READ)
+
+#define ESR_ELx_SYS64_ISS_SYS_CNTVCTSS (ESR_ELx_SYS64_ISS_SYS_VAL(3, 3, 6, 14, 0) | \
+ ESR_ELx_SYS64_ISS_DIR_READ)
+
+#define ESR_ELx_SYS64_ISS_SYS_CNTFRQ (ESR_ELx_SYS64_ISS_SYS_VAL(3, 3, 0, 14, 0) | \
+ ESR_ELx_SYS64_ISS_DIR_READ)
+
+#define esr_sys64_to_sysreg(e) \
+ sys_reg((((e) & ESR_ELx_SYS64_ISS_OP0_MASK) >> \
+ ESR_ELx_SYS64_ISS_OP0_SHIFT), \
+ (((e) & ESR_ELx_SYS64_ISS_OP1_MASK) >> \
+ ESR_ELx_SYS64_ISS_OP1_SHIFT), \
+ (((e) & ESR_ELx_SYS64_ISS_CRN_MASK) >> \
+ ESR_ELx_SYS64_ISS_CRN_SHIFT), \
+ (((e) & ESR_ELx_SYS64_ISS_CRM_MASK) >> \
+ ESR_ELx_SYS64_ISS_CRM_SHIFT), \
+ (((e) & ESR_ELx_SYS64_ISS_OP2_MASK) >> \
+ ESR_ELx_SYS64_ISS_OP2_SHIFT))
+
+#define esr_cp15_to_sysreg(e) \
+ sys_reg(3, \
+ (((e) & ESR_ELx_SYS64_ISS_OP1_MASK) >> \
+ ESR_ELx_SYS64_ISS_OP1_SHIFT), \
+ (((e) & ESR_ELx_SYS64_ISS_CRN_MASK) >> \
+ ESR_ELx_SYS64_ISS_CRN_SHIFT), \
+ (((e) & ESR_ELx_SYS64_ISS_CRM_MASK) >> \
+ ESR_ELx_SYS64_ISS_CRM_SHIFT), \
+ (((e) & ESR_ELx_SYS64_ISS_OP2_MASK) >> \
+ ESR_ELx_SYS64_ISS_OP2_SHIFT))
+
+/* ISS field definitions for ERET/ERETAA/ERETAB trapping */
+#define ESR_ELx_ERET_ISS_ERET 0x2
+#define ESR_ELx_ERET_ISS_ERETA 0x1
+
+/*
+ * ISS field definitions for floating-point exception traps
+ * (FP_EXC_32/FP_EXC_64).
+ *
+ * (The FPEXC_* constants are used instead for common bits.)
+ */
+
+#define ESR_ELx_FP_EXC_TFV (UL(1) << 23)
+
+/*
+ * ISS field definitions for CP15 accesses
+ */
+#define ESR_ELx_CP15_32_ISS_DIR_MASK 0x1
+#define ESR_ELx_CP15_32_ISS_DIR_READ 0x1
+#define ESR_ELx_CP15_32_ISS_DIR_WRITE 0x0
+
+#define ESR_ELx_CP15_32_ISS_RT_SHIFT 5
+#define ESR_ELx_CP15_32_ISS_RT_MASK (UL(0x1f) << ESR_ELx_CP15_32_ISS_RT_SHIFT)
+#define ESR_ELx_CP15_32_ISS_CRM_SHIFT 1
+#define ESR_ELx_CP15_32_ISS_CRM_MASK (UL(0xf) << ESR_ELx_CP15_32_ISS_CRM_SHIFT)
+#define ESR_ELx_CP15_32_ISS_CRN_SHIFT 10
+#define ESR_ELx_CP15_32_ISS_CRN_MASK (UL(0xf) << ESR_ELx_CP15_32_ISS_CRN_SHIFT)
+#define ESR_ELx_CP15_32_ISS_OP1_SHIFT 14
+#define ESR_ELx_CP15_32_ISS_OP1_MASK (UL(0x7) << ESR_ELx_CP15_32_ISS_OP1_SHIFT)
+#define ESR_ELx_CP15_32_ISS_OP2_SHIFT 17
+#define ESR_ELx_CP15_32_ISS_OP2_MASK (UL(0x7) << ESR_ELx_CP15_32_ISS_OP2_SHIFT)
+
+#define ESR_ELx_CP15_32_ISS_SYS_MASK (ESR_ELx_CP15_32_ISS_OP1_MASK | \
+ ESR_ELx_CP15_32_ISS_OP2_MASK | \
+ ESR_ELx_CP15_32_ISS_CRN_MASK | \
+ ESR_ELx_CP15_32_ISS_CRM_MASK | \
+ ESR_ELx_CP15_32_ISS_DIR_MASK)
+#define ESR_ELx_CP15_32_ISS_SYS_VAL(op1, op2, crn, crm) \
+ (((op1) << ESR_ELx_CP15_32_ISS_OP1_SHIFT) | \
+ ((op2) << ESR_ELx_CP15_32_ISS_OP2_SHIFT) | \
+ ((crn) << ESR_ELx_CP15_32_ISS_CRN_SHIFT) | \
+ ((crm) << ESR_ELx_CP15_32_ISS_CRM_SHIFT))
+
+#define ESR_ELx_CP15_64_ISS_DIR_MASK 0x1
+#define ESR_ELx_CP15_64_ISS_DIR_READ 0x1
+#define ESR_ELx_CP15_64_ISS_DIR_WRITE 0x0
+
+#define ESR_ELx_CP15_64_ISS_RT_SHIFT 5
+#define ESR_ELx_CP15_64_ISS_RT_MASK (UL(0x1f) << ESR_ELx_CP15_64_ISS_RT_SHIFT)
+
+#define ESR_ELx_CP15_64_ISS_RT2_SHIFT 10
+#define ESR_ELx_CP15_64_ISS_RT2_MASK (UL(0x1f) << ESR_ELx_CP15_64_ISS_RT2_SHIFT)
+
+#define ESR_ELx_CP15_64_ISS_OP1_SHIFT 16
+#define ESR_ELx_CP15_64_ISS_OP1_MASK (UL(0xf) << ESR_ELx_CP15_64_ISS_OP1_SHIFT)
+#define ESR_ELx_CP15_64_ISS_CRM_SHIFT 1
+#define ESR_ELx_CP15_64_ISS_CRM_MASK (UL(0xf) << ESR_ELx_CP15_64_ISS_CRM_SHIFT)
+
+#define ESR_ELx_CP15_64_ISS_SYS_VAL(op1, crm) \
+ (((op1) << ESR_ELx_CP15_64_ISS_OP1_SHIFT) | \
+ ((crm) << ESR_ELx_CP15_64_ISS_CRM_SHIFT))
+
+#define ESR_ELx_CP15_64_ISS_SYS_MASK (ESR_ELx_CP15_64_ISS_OP1_MASK | \
+ ESR_ELx_CP15_64_ISS_CRM_MASK | \
+ ESR_ELx_CP15_64_ISS_DIR_MASK)
+
+#define ESR_ELx_CP15_64_ISS_SYS_CNTVCT (ESR_ELx_CP15_64_ISS_SYS_VAL(1, 14) | \
+ ESR_ELx_CP15_64_ISS_DIR_READ)
+
+#define ESR_ELx_CP15_64_ISS_SYS_CNTVCTSS (ESR_ELx_CP15_64_ISS_SYS_VAL(9, 14) | \
+ ESR_ELx_CP15_64_ISS_DIR_READ)
+
+#define ESR_ELx_CP15_32_ISS_SYS_CNTFRQ (ESR_ELx_CP15_32_ISS_SYS_VAL(0, 0, 14, 0) |\
+ ESR_ELx_CP15_32_ISS_DIR_READ)
+
+/*
+ * ISS values for SME traps
+ */
+#define ESR_ELx_SME_ISS_SMTC_MASK GENMASK(2, 0)
+#define ESR_ELx_SME_ISS_SMTC(esr) ((esr) & ESR_ELx_SME_ISS_SMTC_MASK)
+
+#define ESR_ELx_SME_ISS_SMTC_SME_DISABLED 0
+#define ESR_ELx_SME_ISS_SMTC_ILL 1
+#define ESR_ELx_SME_ISS_SMTC_SM_DISABLED 2
+#define ESR_ELx_SME_ISS_SMTC_ZA_DISABLED 3
+#define ESR_ELx_SME_ISS_SMTC_ZT_DISABLED 4
+
+/* ISS field definitions for MOPS exceptions */
+#define ESR_ELx_MOPS_ISS_MEM_INST (UL(1) << 24)
+#define ESR_ELx_MOPS_ISS_FROM_EPILOGUE (UL(1) << 18)
+#define ESR_ELx_MOPS_ISS_WRONG_OPTION (UL(1) << 17)
+#define ESR_ELx_MOPS_ISS_OPTION_A (UL(1) << 16)
+#define ESR_ELx_MOPS_ISS_DESTREG(esr) (((esr) & (UL(0x1f) << 10)) >> 10)
+#define ESR_ELx_MOPS_ISS_SRCREG(esr) (((esr) & (UL(0x1f) << 5)) >> 5)
+#define ESR_ELx_MOPS_ISS_SIZEREG(esr) (((esr) & (UL(0x1f) << 0)) >> 0)
+
+/* ISS field definitions for GCS */
+#define ESR_ELx_ExType_SHIFT (20)
+#define ESR_ELx_ExType_MASK GENMASK(23, 20)
+#define ESR_ELx_Raddr_SHIFT (10)
+#define ESR_ELx_Raddr_MASK GENMASK(14, 10)
+#define ESR_ELx_Rn_SHIFT (5)
+#define ESR_ELx_Rn_MASK GENMASK(9, 5)
+#define ESR_ELx_Rvalue_SHIFT 5
+#define ESR_ELx_Rvalue_MASK GENMASK(9, 5)
+#define ESR_ELx_IT_SHIFT (0)
+#define ESR_ELx_IT_MASK GENMASK(4, 0)
+
+#define ESR_ELx_ExType_DATA_CHECK 0
+#define ESR_ELx_ExType_EXLOCK 1
+#define ESR_ELx_ExType_STR 2
+
+#define ESR_ELx_IT_RET 0
+#define ESR_ELx_IT_GCSPOPM 1
+#define ESR_ELx_IT_RET_KEYA 2
+#define ESR_ELx_IT_RET_KEYB 3
+#define ESR_ELx_IT_GCSSS1 4
+#define ESR_ELx_IT_GCSSS2 5
+#define ESR_ELx_IT_GCSPOPCX 6
+#define ESR_ELx_IT_GCSPOPX 7
+
+#ifndef __ASSEMBLER__
+#include <asm/types.h>
+
+static inline unsigned long esr_brk_comment(unsigned long esr)
+{
+ return esr & ESR_ELx_BRK64_ISS_COMMENT_MASK;
+}
+
+static inline bool esr_is_data_abort(unsigned long esr)
+{
+ const unsigned long ec = ESR_ELx_EC(esr);
+
+ return ec == ESR_ELx_EC_DABT_LOW || ec == ESR_ELx_EC_DABT_CUR;
+}
+
+static inline bool esr_is_cfi_brk(unsigned long esr)
+{
+ return ESR_ELx_EC(esr) == ESR_ELx_EC_BRK64 &&
+ (esr_brk_comment(esr) & ~CFI_BRK_IMM_MASK) == CFI_BRK_IMM_BASE;
+}
+
+static inline bool esr_is_ubsan_brk(unsigned long esr)
+{
+ return (esr_brk_comment(esr) & ~UBSAN_BRK_MASK) == UBSAN_BRK_IMM;
+}
+
+static inline bool esr_fsc_is_translation_fault(unsigned long esr)
+{
+ esr = esr & ESR_ELx_FSC;
+
+ return (esr == ESR_ELx_FSC_FAULT_L(3)) ||
+ (esr == ESR_ELx_FSC_FAULT_L(2)) ||
+ (esr == ESR_ELx_FSC_FAULT_L(1)) ||
+ (esr == ESR_ELx_FSC_FAULT_L(0)) ||
+ (esr == ESR_ELx_FSC_FAULT_L(-1));
+}
+
+static inline bool esr_fsc_is_permission_fault(unsigned long esr)
+{
+ esr = esr & ESR_ELx_FSC;
+
+ return (esr == ESR_ELx_FSC_PERM_L(3)) ||
+ (esr == ESR_ELx_FSC_PERM_L(2)) ||
+ (esr == ESR_ELx_FSC_PERM_L(1)) ||
+ (esr == ESR_ELx_FSC_PERM_L(0));
+}
+
+static inline bool esr_fsc_is_access_flag_fault(unsigned long esr)
+{
+ esr = esr & ESR_ELx_FSC;
+
+ return (esr == ESR_ELx_FSC_ACCESS_L(3)) ||
+ (esr == ESR_ELx_FSC_ACCESS_L(2)) ||
+ (esr == ESR_ELx_FSC_ACCESS_L(1)) ||
+ (esr == ESR_ELx_FSC_ACCESS_L(0));
+}
+
+static inline bool esr_fsc_is_addr_sz_fault(unsigned long esr)
+{
+ esr &= ESR_ELx_FSC;
+
+ return (esr == ESR_ELx_FSC_ADDRSZ_L(3)) ||
+ (esr == ESR_ELx_FSC_ADDRSZ_L(2)) ||
+ (esr == ESR_ELx_FSC_ADDRSZ_L(1)) ||
+ (esr == ESR_ELx_FSC_ADDRSZ_L(0)) ||
+ (esr == ESR_ELx_FSC_ADDRSZ_L(-1));
+}
+
+static inline bool esr_fsc_is_sea_ttw(unsigned long esr)
+{
+ esr = esr & ESR_ELx_FSC;
+
+ return (esr == ESR_ELx_FSC_SEA_TTW(3)) ||
+ (esr == ESR_ELx_FSC_SEA_TTW(2)) ||
+ (esr == ESR_ELx_FSC_SEA_TTW(1)) ||
+ (esr == ESR_ELx_FSC_SEA_TTW(0)) ||
+ (esr == ESR_ELx_FSC_SEA_TTW(-1));
+}
+
+static inline bool esr_fsc_is_secc_ttw(unsigned long esr)
+{
+ esr = esr & ESR_ELx_FSC;
+
+ return (esr == ESR_ELx_FSC_SECC_TTW(3)) ||
+ (esr == ESR_ELx_FSC_SECC_TTW(2)) ||
+ (esr == ESR_ELx_FSC_SECC_TTW(1)) ||
+ (esr == ESR_ELx_FSC_SECC_TTW(0)) ||
+ (esr == ESR_ELx_FSC_SECC_TTW(-1));
+}
+
+/* Indicate whether ESR.EC==0x1A is for an ERETAx instruction */
+static inline bool esr_iss_is_eretax(unsigned long esr)
+{
+ return esr & ESR_ELx_ERET_ISS_ERET;
+}
+
+/* Indicate which key is used for ERETAx (false: A-Key, true: B-Key) */
+static inline bool esr_iss_is_eretab(unsigned long esr)
+{
+ return esr & ESR_ELx_ERET_ISS_ERETA;
+}
+
+const char *esr_get_class_string(unsigned long esr);
+#endif /* __ASSEMBLER__ */
#endif /* __ASM_ESR_H */
diff --git a/arch/arm64/include/asm/exception.h b/arch/arm64/include/asm/exception.h
index 0303705fcad6..a2da3cb21c24 100644
--- a/arch/arm64/include/asm/exception.h
+++ b/arch/arm64/include/asm/exception.h
@@ -1,24 +1,94 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Based on arch/arm/include/asm/exception.h
*
* Copyright (C) 2012 ARM Ltd.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __ASM_EXCEPTION_H
#define __ASM_EXCEPTION_H
-#define __exception __attribute__((section(".exception.text")))
-#define __exception_irq_entry __exception
+#include <asm/esr.h>
+#include <asm/ptrace.h>
+
+#include <linux/interrupt.h>
+
+#define __exception_irq_entry __irq_entry
+
+static inline unsigned long disr_to_esr(u64 disr)
+{
+ unsigned long esr = ESR_ELx_EC_SERROR << ESR_ELx_EC_SHIFT;
+
+ if ((disr & DISR_EL1_IDS) == 0)
+ esr |= (disr & DISR_EL1_ESR_MASK);
+ else
+ esr |= (disr & ESR_ELx_ISS_MASK);
+
+ return esr;
+}
+
+asmlinkage void __noreturn handle_bad_stack(struct pt_regs *regs);
+
+asmlinkage void el1t_64_sync_handler(struct pt_regs *regs);
+asmlinkage void el1t_64_irq_handler(struct pt_regs *regs);
+asmlinkage void el1t_64_fiq_handler(struct pt_regs *regs);
+asmlinkage void el1t_64_error_handler(struct pt_regs *regs);
+
+asmlinkage void el1h_64_sync_handler(struct pt_regs *regs);
+asmlinkage void el1h_64_irq_handler(struct pt_regs *regs);
+asmlinkage void el1h_64_fiq_handler(struct pt_regs *regs);
+asmlinkage void el1h_64_error_handler(struct pt_regs *regs);
+
+asmlinkage void el0t_64_sync_handler(struct pt_regs *regs);
+asmlinkage void el0t_64_irq_handler(struct pt_regs *regs);
+asmlinkage void el0t_64_fiq_handler(struct pt_regs *regs);
+asmlinkage void el0t_64_error_handler(struct pt_regs *regs);
+
+asmlinkage void el0t_32_sync_handler(struct pt_regs *regs);
+asmlinkage void el0t_32_irq_handler(struct pt_regs *regs);
+asmlinkage void el0t_32_fiq_handler(struct pt_regs *regs);
+asmlinkage void el0t_32_error_handler(struct pt_regs *regs);
+
+asmlinkage void call_on_irq_stack(struct pt_regs *regs,
+ void (*func)(struct pt_regs *));
+asmlinkage void asm_exit_to_user_mode(struct pt_regs *regs);
+
+void do_mem_abort(unsigned long far, unsigned long esr, struct pt_regs *regs);
+void do_el0_undef(struct pt_regs *regs, unsigned long esr);
+void do_el1_undef(struct pt_regs *regs, unsigned long esr);
+void do_el0_bti(struct pt_regs *regs);
+void do_el1_bti(struct pt_regs *regs, unsigned long esr);
+void do_el0_gcs(struct pt_regs *regs, unsigned long esr);
+void do_el1_gcs(struct pt_regs *regs, unsigned long esr);
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+void do_breakpoint(unsigned long esr, struct pt_regs *regs);
+void do_watchpoint(unsigned long addr, unsigned long esr,
+ struct pt_regs *regs);
+#else
+static inline void do_breakpoint(unsigned long esr, struct pt_regs *regs) {}
+static inline void do_watchpoint(unsigned long addr, unsigned long esr,
+ struct pt_regs *regs) {}
+#endif /* CONFIG_HAVE_HW_BREAKPOINT */
+void do_el0_softstep(unsigned long esr, struct pt_regs *regs);
+void do_el1_softstep(unsigned long esr, struct pt_regs *regs);
+void do_el0_brk64(unsigned long esr, struct pt_regs *regs);
+void do_el1_brk64(unsigned long esr, struct pt_regs *regs);
+void do_bkpt32(unsigned long esr, struct pt_regs *regs);
+void do_fpsimd_acc(unsigned long esr, struct pt_regs *regs);
+void do_sve_acc(unsigned long esr, struct pt_regs *regs);
+void do_sme_acc(unsigned long esr, struct pt_regs *regs);
+void do_fpsimd_exc(unsigned long esr, struct pt_regs *regs);
+void do_el0_sys(unsigned long esr, struct pt_regs *regs);
+void do_sp_pc_abort(unsigned long addr, unsigned long esr, struct pt_regs *regs);
+void bad_el0_sync(struct pt_regs *regs, int reason, unsigned long esr);
+void do_el0_cp15(unsigned long esr, struct pt_regs *regs);
+int do_compat_alignment_fixup(unsigned long addr, struct pt_regs *regs);
+void do_el0_svc(struct pt_regs *regs);
+void do_el0_svc_compat(struct pt_regs *regs);
+void do_el0_fpac(struct pt_regs *regs, unsigned long esr);
+void do_el1_fpac(struct pt_regs *regs, unsigned long esr);
+void do_el0_mops(struct pt_regs *regs, unsigned long esr);
+void do_el1_mops(struct pt_regs *regs, unsigned long esr);
+void do_serror(struct pt_regs *regs, unsigned long esr);
+void __noreturn panic_bad_stack(struct pt_regs *regs, unsigned long esr, unsigned long far);
#endif /* __ASM_EXCEPTION_H */
diff --git a/arch/arm64/include/asm/exec.h b/arch/arm64/include/asm/exec.h
index db0563c23482..9a1c22ce664b 100644
--- a/arch/arm64/include/asm/exec.h
+++ b/arch/arm64/include/asm/exec.h
@@ -1,23 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Based on arch/arm/include/asm/exec.h
*
* Copyright (C) 2012 ARM Ltd.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __ASM_EXEC_H
#define __ASM_EXEC_H
+#include <linux/sched.h>
+
extern unsigned long arch_align_stack(unsigned long sp);
#endif /* __ASM_EXEC_H */
diff --git a/arch/arm64/include/asm/extable.h b/arch/arm64/include/asm/extable.h
new file mode 100644
index 000000000000..9dc39612bdf5
--- /dev/null
+++ b/arch/arm64/include/asm/extable.h
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_EXTABLE_H
+#define __ASM_EXTABLE_H
+
+/*
+ * The exception table consists of pairs of relative offsets: the first
+ * is the relative offset to an instruction that is allowed to fault,
+ * and the second is the relative offset at which the program should
+ * continue. No registers are modified, so it is entirely up to the
+ * continuation code to figure out what to do.
+ *
+ * All the routines below use bits of fixup code that are out of line
+ * with the main instruction path. This means when everything is well,
+ * we don't even have to jump over them. Further, they do not intrude
+ * on our cache or tlb entries.
+ */
+
+struct exception_table_entry
+{
+ int insn, fixup;
+ short type, data;
+};
+
+#define ARCH_HAS_RELATIVE_EXTABLE
+
+#define swap_ex_entry_fixup(a, b, tmp, delta) \
+do { \
+ (a)->fixup = (b)->fixup + (delta); \
+ (b)->fixup = (tmp).fixup - (delta); \
+ (a)->type = (b)->type; \
+ (b)->type = (tmp).type; \
+ (a)->data = (b)->data; \
+ (b)->data = (tmp).data; \
+} while (0)
+
+bool insn_may_access_user(unsigned long addr, unsigned long esr);
+
+#ifdef CONFIG_BPF_JIT
+bool ex_handler_bpf(const struct exception_table_entry *ex,
+ struct pt_regs *regs);
+#else /* !CONFIG_BPF_JIT */
+static inline
+bool ex_handler_bpf(const struct exception_table_entry *ex,
+ struct pt_regs *regs)
+{
+ return false;
+}
+#endif /* !CONFIG_BPF_JIT */
+
+bool fixup_exception(struct pt_regs *regs, unsigned long esr);
+#endif
diff --git a/arch/arm64/include/asm/fb.h b/arch/arm64/include/asm/fb.h
deleted file mode 100644
index adb88a64b2fe..000000000000
--- a/arch/arm64/include/asm/fb.h
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Copyright (C) 2012 ARM Ltd.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-#ifndef __ASM_FB_H_
-#define __ASM_FB_H_
-
-#include <linux/fb.h>
-#include <linux/fs.h>
-#include <asm/page.h>
-
-static inline void fb_pgprotect(struct file *file, struct vm_area_struct *vma,
- unsigned long off)
-{
- vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
-}
-
-static inline int fb_is_primary_device(struct fb_info *info)
-{
- return 0;
-}
-
-#endif /* __ASM_FB_H_ */
diff --git a/arch/arm64/include/asm/fixmap.h b/arch/arm64/include/asm/fixmap.h
new file mode 100644
index 000000000000..65555284446e
--- /dev/null
+++ b/arch/arm64/include/asm/fixmap.h
@@ -0,0 +1,121 @@
+/*
+ * fixmap.h: compile-time virtual memory allocation
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1998 Ingo Molnar
+ * Copyright (C) 2013 Mark Salter <msalter@redhat.com>
+ *
+ * Adapted from arch/x86 version.
+ *
+ */
+
+#ifndef _ASM_ARM64_FIXMAP_H
+#define _ASM_ARM64_FIXMAP_H
+
+#ifndef __ASSEMBLER__
+#include <linux/kernel.h>
+#include <linux/math.h>
+#include <linux/sizes.h>
+#include <asm/boot.h>
+#include <asm/page.h>
+#include <asm/pgtable-prot.h>
+
+/*
+ * Here we define all the compile-time 'special' virtual
+ * addresses. The point is to have a constant address at
+ * compile time, but to set the physical address only
+ * in the boot process.
+ *
+ * Each enum increment in these 'compile-time allocated'
+ * memory buffers is page-sized. Use set_fixmap(idx,phys)
+ * to associate physical memory with a fixmap index.
+ */
+enum fixed_addresses {
+ FIX_HOLE,
+
+ /*
+ * Reserve a virtual window for the FDT that is a page bigger than the
+ * maximum supported size. The additional space ensures that any FDT
+ * that does not exceed MAX_FDT_SIZE can be mapped regardless of
+ * whether it crosses any page boundary.
+ */
+ FIX_FDT_END,
+ FIX_FDT = FIX_FDT_END + DIV_ROUND_UP(MAX_FDT_SIZE, PAGE_SIZE) + 1,
+
+ FIX_EARLYCON_MEM_BASE,
+ FIX_TEXT_POKE0,
+
+#ifdef CONFIG_KVM
+ /* One slot per CPU, mapping the guest's VNCR page at EL2. */
+ FIX_VNCR_END,
+ FIX_VNCR = FIX_VNCR_END + NR_CPUS,
+#endif
+
+#ifdef CONFIG_ACPI_APEI_GHES
+ /* Used for GHES mapping from assorted contexts */
+ FIX_APEI_GHES_IRQ,
+ FIX_APEI_GHES_SEA,
+#ifdef CONFIG_ARM_SDE_INTERFACE
+ FIX_APEI_GHES_SDEI_NORMAL,
+ FIX_APEI_GHES_SDEI_CRITICAL,
+#endif
+#endif /* CONFIG_ACPI_APEI_GHES */
+
+#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
+#ifdef CONFIG_RELOCATABLE
+ FIX_ENTRY_TRAMP_TEXT4, /* one extra slot for the data page */
+#endif
+ FIX_ENTRY_TRAMP_TEXT3,
+ FIX_ENTRY_TRAMP_TEXT2,
+ FIX_ENTRY_TRAMP_TEXT1,
+#define TRAMP_VALIAS (__fix_to_virt(FIX_ENTRY_TRAMP_TEXT1))
+#endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */
+ __end_of_permanent_fixed_addresses,
+
+ /*
+ * Temporary boot-time mappings, used by early_ioremap(),
+ * before ioremap() is functional.
+ */
+#define NR_FIX_BTMAPS (SZ_256K / PAGE_SIZE)
+#define FIX_BTMAPS_SLOTS 7
+#define TOTAL_FIX_BTMAPS (NR_FIX_BTMAPS * FIX_BTMAPS_SLOTS)
+
+ FIX_BTMAP_END = __end_of_permanent_fixed_addresses,
+ FIX_BTMAP_BEGIN = FIX_BTMAP_END + TOTAL_FIX_BTMAPS - 1,
+
+ /*
+ * Used for kernel page table creation, so unmapped memory may be used
+ * for tables.
+ */
+ FIX_PTE,
+ FIX_PMD,
+ FIX_PUD,
+ FIX_P4D,
+ FIX_PGD,
+
+ __end_of_fixed_addresses
+};
+
+#define FIXADDR_SIZE (__end_of_permanent_fixed_addresses << PAGE_SHIFT)
+#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE)
+#define FIXADDR_TOT_SIZE (__end_of_fixed_addresses << PAGE_SHIFT)
+#define FIXADDR_TOT_START (FIXADDR_TOP - FIXADDR_TOT_SIZE)
+
+#define FIXMAP_PAGE_IO __pgprot(PROT_DEVICE_nGnRE)
+
+void __init early_fixmap_init(void);
+
+#define __early_set_fixmap __set_fixmap
+
+#define __late_set_fixmap __set_fixmap
+#define __late_clear_fixmap(idx) __set_fixmap((idx), 0, FIXMAP_PAGE_CLEAR)
+
+extern void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot);
+
+#include <asm-generic/fixmap.h>
+
+#endif /* !__ASSEMBLER__ */
+#endif /* _ASM_ARM64_FIXMAP_H */
diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h
index c43b4ac13008..1d2e33559bd5 100644
--- a/arch/arm64/include/asm/fpsimd.h
+++ b/arch/arm64/include/asm/fpsimd.h
@@ -1,45 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (C) 2012 ARM Ltd.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __ASM_FP_H
#define __ASM_FP_H
+#include <asm/errno.h>
+#include <asm/percpu.h>
#include <asm/ptrace.h>
+#include <asm/processor.h>
+#include <asm/sigcontext.h>
+#include <asm/sysreg.h>
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
-/*
- * FP/SIMD storage area has:
- * - FPSR and FPCR
- * - 32 128-bit data registers
- *
- * Note that user_fpsimd forms a prefix of this structure, which is
- * relied upon in the ptrace FP/SIMD accessors.
- */
-struct fpsimd_state {
- union {
- struct user_fpsimd_state user_fpsimd;
- struct {
- __uint128_t vregs[32];
- u32 fpsr;
- u32 fpcr;
- };
- };
-};
+#include <linux/bitmap.h>
+#include <linux/build_bug.h>
+#include <linux/bug.h>
+#include <linux/cache.h>
+#include <linux/init.h>
+#include <linux/stddef.h>
+#include <linux/types.h>
-#if defined(__KERNEL__) && defined(CONFIG_COMPAT)
/* Masks for extracting the FPSR and FPCR from the FPSCR */
#define VFP_FPSCR_STAT_MASK 0xf800009f
#define VFP_FPSCR_CTRL_MASK 0x07f79f00
@@ -48,16 +30,438 @@ struct fpsimd_state {
* control/status register.
*/
#define VFP_STATE_SIZE ((32 * 8) + 4)
-#endif
+
+static inline unsigned long cpacr_save_enable_kernel_sve(void)
+{
+ unsigned long old = read_sysreg(cpacr_el1);
+ unsigned long set = CPACR_EL1_FPEN_EL1EN | CPACR_EL1_ZEN_EL1EN;
+
+ write_sysreg(old | set, cpacr_el1);
+ isb();
+ return old;
+}
+
+static inline unsigned long cpacr_save_enable_kernel_sme(void)
+{
+ unsigned long old = read_sysreg(cpacr_el1);
+ unsigned long set = CPACR_EL1_FPEN_EL1EN | CPACR_EL1_SMEN_EL1EN;
+
+ write_sysreg(old | set, cpacr_el1);
+ isb();
+ return old;
+}
+
+static inline void cpacr_restore(unsigned long cpacr)
+{
+ write_sysreg(cpacr, cpacr_el1);
+ isb();
+}
+
+/*
+ * When we defined the maximum SVE vector length we defined the ABI so
+ * that the maximum vector length included all the reserved for future
+ * expansion bits in ZCR rather than those just currently defined by
+ * the architecture. Using this length to allocate worst size buffers
+ * results in excessively large allocations, and this effect is even
+ * more pronounced for SME due to ZA. Define more suitable VLs for
+ * these situations.
+ */
+#define ARCH_SVE_VQ_MAX ((ZCR_ELx_LEN_MASK >> ZCR_ELx_LEN_SHIFT) + 1)
+#define SME_VQ_MAX ((SMCR_ELx_LEN_MASK >> SMCR_ELx_LEN_SHIFT) + 1)
struct task_struct;
-extern void fpsimd_save_state(struct fpsimd_state *state);
-extern void fpsimd_load_state(struct fpsimd_state *state);
+extern void fpsimd_save_state(struct user_fpsimd_state *state);
+extern void fpsimd_load_state(struct user_fpsimd_state *state);
extern void fpsimd_thread_switch(struct task_struct *next);
extern void fpsimd_flush_thread(void);
+extern void fpsimd_preserve_current_state(void);
+extern void fpsimd_restore_current_state(void);
+extern void fpsimd_update_current_state(struct user_fpsimd_state const *state);
+
+struct cpu_fp_state {
+ struct user_fpsimd_state *st;
+ void *sve_state;
+ void *sme_state;
+ u64 *svcr;
+ u64 *fpmr;
+ unsigned int sve_vl;
+ unsigned int sme_vl;
+ enum fp_type *fp_type;
+ enum fp_type to_save;
+};
+
+DECLARE_PER_CPU(struct cpu_fp_state, fpsimd_last_state);
+
+extern void fpsimd_bind_state_to_cpu(struct cpu_fp_state *fp_state);
+
+extern void fpsimd_flush_task_state(struct task_struct *target);
+extern void fpsimd_save_and_flush_current_state(void);
+extern void fpsimd_save_and_flush_cpu_state(void);
+
+static inline bool thread_sm_enabled(struct thread_struct *thread)
+{
+ return system_supports_sme() && (thread->svcr & SVCR_SM_MASK);
+}
+
+static inline bool thread_za_enabled(struct thread_struct *thread)
+{
+ return system_supports_sme() && (thread->svcr & SVCR_ZA_MASK);
+}
+
+extern void task_smstop_sm(struct task_struct *task);
+
+/* Maximum VL that SVE/SME VL-agnostic software can transparently support */
+#define VL_ARCH_MAX 0x100
+
+/* Offset of FFR in the SVE register dump */
+static inline size_t sve_ffr_offset(int vl)
+{
+ return SVE_SIG_FFR_OFFSET(sve_vq_from_vl(vl)) - SVE_SIG_REGS_OFFSET;
+}
+
+static inline void *sve_pffr(struct thread_struct *thread)
+{
+ unsigned int vl;
+
+ if (system_supports_sme() && thread_sm_enabled(thread))
+ vl = thread_get_sme_vl(thread);
+ else
+ vl = thread_get_sve_vl(thread);
+
+ return (char *)thread->sve_state + sve_ffr_offset(vl);
+}
+
+static inline void *thread_zt_state(struct thread_struct *thread)
+{
+ /* The ZT register state is stored immediately after the ZA state */
+ unsigned int sme_vq = sve_vq_from_vl(thread_get_sme_vl(thread));
+ return thread->sme_state + ZA_SIG_REGS_SIZE(sme_vq);
+}
+
+extern void sve_save_state(void *state, u32 *pfpsr, int save_ffr);
+extern void sve_load_state(void const *state, u32 const *pfpsr,
+ int restore_ffr);
+extern void sve_flush_live(bool flush_ffr, unsigned long vq_minus_1);
+extern unsigned int sve_get_vl(void);
+extern void sve_set_vq(unsigned long vq_minus_1);
+extern void sme_set_vq(unsigned long vq_minus_1);
+extern void sme_save_state(void *state, int zt);
+extern void sme_load_state(void const *state, int zt);
+
+struct arm64_cpu_capabilities;
+extern void cpu_enable_fpsimd(const struct arm64_cpu_capabilities *__unused);
+extern void cpu_enable_sve(const struct arm64_cpu_capabilities *__unused);
+extern void cpu_enable_sme(const struct arm64_cpu_capabilities *__unused);
+extern void cpu_enable_sme2(const struct arm64_cpu_capabilities *__unused);
+extern void cpu_enable_fa64(const struct arm64_cpu_capabilities *__unused);
+extern void cpu_enable_fpmr(const struct arm64_cpu_capabilities *__unused);
+
+/*
+ * Helpers to translate bit indices in sve_vq_map to VQ values (and
+ * vice versa). This allows find_next_bit() to be used to find the
+ * _maximum_ VQ not exceeding a certain value.
+ */
+static inline unsigned int __vq_to_bit(unsigned int vq)
+{
+ return SVE_VQ_MAX - vq;
+}
+
+static inline unsigned int __bit_to_vq(unsigned int bit)
+{
+ return SVE_VQ_MAX - bit;
+}
+
+
+struct vl_info {
+ enum vec_type type;
+ const char *name; /* For display purposes */
+
+ /* Minimum supported vector length across all CPUs */
+ int min_vl;
+
+ /* Maximum supported vector length across all CPUs */
+ int max_vl;
+ int max_virtualisable_vl;
+
+ /*
+ * Set of available vector lengths,
+ * where length vq encoded as bit __vq_to_bit(vq):
+ */
+ DECLARE_BITMAP(vq_map, SVE_VQ_MAX);
+
+ /* Set of vector lengths present on at least one cpu: */
+ DECLARE_BITMAP(vq_partial_map, SVE_VQ_MAX);
+};
+
+#ifdef CONFIG_ARM64_SVE
+
+extern void sve_alloc(struct task_struct *task, bool flush);
+extern void fpsimd_release_task(struct task_struct *task);
+extern void fpsimd_sync_from_effective_state(struct task_struct *task);
+extern void fpsimd_sync_to_effective_state_zeropad(struct task_struct *task);
+
+extern int vec_set_vector_length(struct task_struct *task, enum vec_type type,
+ unsigned long vl, unsigned long flags);
+
+extern int sve_set_current_vl(unsigned long arg);
+extern int sve_get_current_vl(void);
+
+static inline void sve_user_disable(void)
+{
+ sysreg_clear_set(cpacr_el1, CPACR_EL1_ZEN_EL0EN, 0);
+}
+
+static inline void sve_user_enable(void)
+{
+ sysreg_clear_set(cpacr_el1, 0, CPACR_EL1_ZEN_EL0EN);
+}
+
+#define sve_cond_update_zcr_vq(val, reg) \
+ do { \
+ u64 __zcr = read_sysreg_s((reg)); \
+ u64 __new = __zcr & ~ZCR_ELx_LEN_MASK; \
+ __new |= (val) & ZCR_ELx_LEN_MASK; \
+ if (__zcr != __new) \
+ write_sysreg_s(__new, (reg)); \
+ } while (0)
+
+/*
+ * Probing and setup functions.
+ * Calls to these functions must be serialised with one another.
+ */
+enum vec_type;
+
+extern void __init vec_init_vq_map(enum vec_type type);
+extern void vec_update_vq_map(enum vec_type type);
+extern int vec_verify_vq_map(enum vec_type type);
+extern void __init sve_setup(void);
+
+extern __ro_after_init struct vl_info vl_info[ARM64_VEC_MAX];
+
+static inline void write_vl(enum vec_type type, u64 val)
+{
+ u64 tmp;
+
+ switch (type) {
+#ifdef CONFIG_ARM64_SVE
+ case ARM64_VEC_SVE:
+ tmp = read_sysreg_s(SYS_ZCR_EL1) & ~ZCR_ELx_LEN_MASK;
+ write_sysreg_s(tmp | val, SYS_ZCR_EL1);
+ break;
+#endif
+#ifdef CONFIG_ARM64_SME
+ case ARM64_VEC_SME:
+ tmp = read_sysreg_s(SYS_SMCR_EL1) & ~SMCR_ELx_LEN_MASK;
+ write_sysreg_s(tmp | val, SYS_SMCR_EL1);
+ break;
+#endif
+ default:
+ WARN_ON_ONCE(1);
+ break;
+ }
+}
+
+static inline int vec_max_vl(enum vec_type type)
+{
+ return vl_info[type].max_vl;
+}
+
+static inline int vec_max_virtualisable_vl(enum vec_type type)
+{
+ return vl_info[type].max_virtualisable_vl;
+}
+
+static inline int sve_max_vl(void)
+{
+ return vec_max_vl(ARM64_VEC_SVE);
+}
+
+static inline int sve_max_virtualisable_vl(void)
+{
+ return vec_max_virtualisable_vl(ARM64_VEC_SVE);
+}
+
+/* Ensure vq >= SVE_VQ_MIN && vq <= SVE_VQ_MAX before calling this function */
+static inline bool vq_available(enum vec_type type, unsigned int vq)
+{
+ return test_bit(__vq_to_bit(vq), vl_info[type].vq_map);
+}
+
+static inline bool sve_vq_available(unsigned int vq)
+{
+ return vq_available(ARM64_VEC_SVE, vq);
+}
+
+static inline size_t __sve_state_size(unsigned int sve_vl, unsigned int sme_vl)
+{
+ unsigned int vl = max(sve_vl, sme_vl);
+ return SVE_SIG_REGS_SIZE(sve_vq_from_vl(vl));
+}
+
+/*
+ * Return how many bytes of memory are required to store the full SVE
+ * state for task, given task's currently configured vector length.
+ */
+static inline size_t sve_state_size(struct task_struct const *task)
+{
+ unsigned int sve_vl = task_get_sve_vl(task);
+ unsigned int sme_vl = task_get_sme_vl(task);
+ return __sve_state_size(sve_vl, sme_vl);
+}
+
+#else /* ! CONFIG_ARM64_SVE */
+
+static inline void sve_alloc(struct task_struct *task, bool flush) { }
+static inline void fpsimd_release_task(struct task_struct *task) { }
+static inline void fpsimd_sync_from_effective_state(struct task_struct *task) { }
+static inline void fpsimd_sync_to_effective_state_zeropad(struct task_struct *task) { }
+
+static inline int sve_max_virtualisable_vl(void)
+{
+ return 0;
+}
+
+static inline int sve_set_current_vl(unsigned long arg)
+{
+ return -EINVAL;
+}
+
+static inline int sve_get_current_vl(void)
+{
+ return -EINVAL;
+}
+
+static inline int sve_max_vl(void)
+{
+ return -EINVAL;
+}
+
+static inline bool sve_vq_available(unsigned int vq) { return false; }
+
+static inline void sve_user_disable(void) { BUILD_BUG(); }
+static inline void sve_user_enable(void) { BUILD_BUG(); }
+
+#define sve_cond_update_zcr_vq(val, reg) do { } while (0)
+
+static inline void vec_init_vq_map(enum vec_type t) { }
+static inline void vec_update_vq_map(enum vec_type t) { }
+static inline int vec_verify_vq_map(enum vec_type t) { return 0; }
+static inline void sve_setup(void) { }
+
+static inline size_t __sve_state_size(unsigned int sve_vl, unsigned int sme_vl)
+{
+ return 0;
+}
+
+static inline size_t sve_state_size(struct task_struct const *task)
+{
+ return 0;
+}
+
+#endif /* ! CONFIG_ARM64_SVE */
+
+#ifdef CONFIG_ARM64_SME
+
+static inline void sme_user_disable(void)
+{
+ sysreg_clear_set(cpacr_el1, CPACR_EL1_SMEN_EL0EN, 0);
+}
+
+static inline void sme_user_enable(void)
+{
+ sysreg_clear_set(cpacr_el1, 0, CPACR_EL1_SMEN_EL0EN);
+}
+
+static inline void sme_smstart_sm(void)
+{
+ asm volatile(__msr_s(SYS_SVCR_SMSTART_SM_EL0, "xzr"));
+}
+
+static inline void sme_smstop_sm(void)
+{
+ asm volatile(__msr_s(SYS_SVCR_SMSTOP_SM_EL0, "xzr"));
+}
+
+static inline void sme_smstop(void)
+{
+ asm volatile(__msr_s(SYS_SVCR_SMSTOP_SMZA_EL0, "xzr"));
+}
+
+extern void __init sme_setup(void);
+
+static inline int sme_max_vl(void)
+{
+ return vec_max_vl(ARM64_VEC_SME);
+}
+
+static inline int sme_max_virtualisable_vl(void)
+{
+ return vec_max_virtualisable_vl(ARM64_VEC_SME);
+}
+
+extern void sme_alloc(struct task_struct *task, bool flush);
+extern unsigned int sme_get_vl(void);
+extern int sme_set_current_vl(unsigned long arg);
+extern int sme_get_current_vl(void);
+extern void sme_suspend_exit(void);
+
+static inline size_t __sme_state_size(unsigned int sme_vl)
+{
+ size_t size = ZA_SIG_REGS_SIZE(sve_vq_from_vl(sme_vl));
+
+ if (system_supports_sme2())
+ size += ZT_SIG_REG_SIZE;
+
+ return size;
+}
+
+/*
+ * Return how many bytes of memory are required to store the full SME
+ * specific state for task, given task's currently configured vector
+ * length.
+ */
+static inline size_t sme_state_size(struct task_struct const *task)
+{
+ return __sme_state_size(task_get_sme_vl(task));
+}
+
+#else
+
+static inline void sme_user_disable(void) { BUILD_BUG(); }
+static inline void sme_user_enable(void) { BUILD_BUG(); }
+
+static inline void sme_smstart_sm(void) { }
+static inline void sme_smstop_sm(void) { }
+static inline void sme_smstop(void) { }
+
+static inline void sme_alloc(struct task_struct *task, bool flush) { }
+static inline void sme_setup(void) { }
+static inline unsigned int sme_get_vl(void) { return 0; }
+static inline int sme_max_vl(void) { return 0; }
+static inline int sme_max_virtualisable_vl(void) { return 0; }
+static inline int sme_set_current_vl(unsigned long arg) { return -EINVAL; }
+static inline int sme_get_current_vl(void) { return -EINVAL; }
+static inline void sme_suspend_exit(void) { }
+
+static inline size_t __sme_state_size(unsigned int sme_vl)
+{
+ return 0;
+}
+
+static inline size_t sme_state_size(struct task_struct const *task)
+{
+ return 0;
+}
+
+#endif /* ! CONFIG_ARM64_SME */
+
+/* For use by EFI runtime services calls only */
+extern void __efi_fpsimd_begin(void);
+extern void __efi_fpsimd_end(void);
+
#endif
#endif
diff --git a/arch/arm64/include/asm/fpsimdmacros.h b/arch/arm64/include/asm/fpsimdmacros.h
index bbec599c96bd..cda81d009c9b 100644
--- a/arch/arm64/include/asm/fpsimdmacros.h
+++ b/arch/arm64/include/asm/fpsimdmacros.h
@@ -1,22 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* FP/SIMD state saving and restoring macros
*
* Copyright (C) 2012 ARM Ltd.
* Author: Catalin Marinas <catalin.marinas@arm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
+#include <asm/assembler.h>
+
.macro fpsimd_save state, tmpnr
stp q0, q1, [\state, #16 * 0]
stp q2, q3, [\state, #16 * 2]
@@ -40,6 +31,19 @@
str w\tmpnr, [\state, #16 * 2 + 4]
.endm
+.macro fpsimd_restore_fpcr state, tmp
+ /*
+ * Writes to fpcr may be self-synchronising, so avoid restoring
+ * the register if it hasn't changed.
+ */
+ mrs \tmp, fpcr
+ cmp \tmp, \state
+ b.eq 9999f
+ msr fpcr, \state
+9999:
+.endm
+
+/* Clobbers \state */
.macro fpsimd_restore state, tmpnr
ldp q0, q1, [\state, #16 * 0]
ldp q2, q3, [\state, #16 * 2]
@@ -60,5 +64,294 @@
ldr w\tmpnr, [\state, #16 * 2]
msr fpsr, x\tmpnr
ldr w\tmpnr, [\state, #16 * 2 + 4]
- msr fpcr, x\tmpnr
+ fpsimd_restore_fpcr x\tmpnr, \state
+.endm
+
+/* Sanity-check macros to help avoid encoding garbage instructions */
+
+.macro _check_general_reg nr
+ .if (\nr) < 0 || (\nr) > 30
+ .error "Bad register number \nr."
+ .endif
+.endm
+
+.macro _sve_check_zreg znr
+ .if (\znr) < 0 || (\znr) > 31
+ .error "Bad Scalable Vector Extension vector register number \znr."
+ .endif
+.endm
+
+.macro _sve_check_preg pnr
+ .if (\pnr) < 0 || (\pnr) > 15
+ .error "Bad Scalable Vector Extension predicate register number \pnr."
+ .endif
+.endm
+
+.macro _check_num n, min, max
+ .if (\n) < (\min) || (\n) > (\max)
+ .error "Number \n out of range [\min,\max]"
+ .endif
+.endm
+
+.macro _sme_check_wv v
+ .if (\v) < 12 || (\v) > 15
+ .error "Bad vector select register \v."
+ .endif
+.endm
+
+/* SVE instruction encodings for non-SVE-capable assemblers */
+/* (pre binutils 2.28, all kernel capable clang versions support SVE) */
+
+/* STR (vector): STR Z\nz, [X\nxbase, #\offset, MUL VL] */
+.macro _sve_str_v nz, nxbase, offset=0
+ _sve_check_zreg \nz
+ _check_general_reg \nxbase
+ _check_num (\offset), -0x100, 0xff
+ .inst 0xe5804000 \
+ | (\nz) \
+ | ((\nxbase) << 5) \
+ | (((\offset) & 7) << 10) \
+ | (((\offset) & 0x1f8) << 13)
+.endm
+
+/* LDR (vector): LDR Z\nz, [X\nxbase, #\offset, MUL VL] */
+.macro _sve_ldr_v nz, nxbase, offset=0
+ _sve_check_zreg \nz
+ _check_general_reg \nxbase
+ _check_num (\offset), -0x100, 0xff
+ .inst 0x85804000 \
+ | (\nz) \
+ | ((\nxbase) << 5) \
+ | (((\offset) & 7) << 10) \
+ | (((\offset) & 0x1f8) << 13)
+.endm
+
+/* STR (predicate): STR P\np, [X\nxbase, #\offset, MUL VL] */
+.macro _sve_str_p np, nxbase, offset=0
+ _sve_check_preg \np
+ _check_general_reg \nxbase
+ _check_num (\offset), -0x100, 0xff
+ .inst 0xe5800000 \
+ | (\np) \
+ | ((\nxbase) << 5) \
+ | (((\offset) & 7) << 10) \
+ | (((\offset) & 0x1f8) << 13)
+.endm
+
+/* LDR (predicate): LDR P\np, [X\nxbase, #\offset, MUL VL] */
+.macro _sve_ldr_p np, nxbase, offset=0
+ _sve_check_preg \np
+ _check_general_reg \nxbase
+ _check_num (\offset), -0x100, 0xff
+ .inst 0x85800000 \
+ | (\np) \
+ | ((\nxbase) << 5) \
+ | (((\offset) & 7) << 10) \
+ | (((\offset) & 0x1f8) << 13)
+.endm
+
+/* RDVL X\nx, #\imm */
+.macro _sve_rdvl nx, imm
+ _check_general_reg \nx
+ _check_num (\imm), -0x20, 0x1f
+ .inst 0x04bf5000 \
+ | (\nx) \
+ | (((\imm) & 0x3f) << 5)
+.endm
+
+/* RDFFR (unpredicated): RDFFR P\np.B */
+.macro _sve_rdffr np
+ _sve_check_preg \np
+ .inst 0x2519f000 \
+ | (\np)
+.endm
+
+/* WRFFR P\np.B */
+.macro _sve_wrffr np
+ _sve_check_preg \np
+ .inst 0x25289000 \
+ | ((\np) << 5)
+.endm
+
+/* PFALSE P\np.B */
+.macro _sve_pfalse np
+ _sve_check_preg \np
+ .inst 0x2518e400 \
+ | (\np)
+.endm
+
+/* SME instruction encodings for non-SME-capable assemblers */
+/* (pre binutils 2.38/LLVM 13) */
+
+/* RDSVL X\nx, #\imm */
+.macro _sme_rdsvl nx, imm
+ _check_general_reg \nx
+ _check_num (\imm), -0x20, 0x1f
+ .inst 0x04bf5800 \
+ | (\nx) \
+ | (((\imm) & 0x3f) << 5)
+.endm
+
+/*
+ * STR (vector from ZA array):
+ * STR ZA[\nw, #\offset], [X\nxbase, #\offset, MUL VL]
+ */
+.macro _sme_str_zav nw, nxbase, offset=0
+ _sme_check_wv \nw
+ _check_general_reg \nxbase
+ _check_num (\offset), -0x100, 0xff
+ .inst 0xe1200000 \
+ | (((\nw) & 3) << 13) \
+ | ((\nxbase) << 5) \
+ | ((\offset) & 7)
+.endm
+
+/*
+ * LDR (vector to ZA array):
+ * LDR ZA[\nw, #\offset], [X\nxbase, #\offset, MUL VL]
+ */
+.macro _sme_ldr_zav nw, nxbase, offset=0
+ _sme_check_wv \nw
+ _check_general_reg \nxbase
+ _check_num (\offset), -0x100, 0xff
+ .inst 0xe1000000 \
+ | (((\nw) & 3) << 13) \
+ | ((\nxbase) << 5) \
+ | ((\offset) & 7)
+.endm
+
+/*
+ * LDR (ZT0)
+ *
+ * LDR ZT0, nx
+ */
+.macro _ldr_zt nx
+ _check_general_reg \nx
+ .inst 0xe11f8000 \
+ | (\nx << 5)
+.endm
+
+/*
+ * STR (ZT0)
+ *
+ * STR ZT0, nx
+ */
+.macro _str_zt nx
+ _check_general_reg \nx
+ .inst 0xe13f8000 \
+ | (\nx << 5)
+.endm
+
+.macro __for from:req, to:req
+ .if (\from) == (\to)
+ _for__body %\from
+ .else
+ __for %\from, %((\from) + ((\to) - (\from)) / 2)
+ __for %((\from) + ((\to) - (\from)) / 2 + 1), %\to
+ .endif
+.endm
+
+.macro _for var:req, from:req, to:req, insn:vararg
+ .macro _for__body \var:req
+ .noaltmacro
+ \insn
+ .altmacro
+ .endm
+
+ .altmacro
+ __for \from, \to
+ .noaltmacro
+
+ .purgem _for__body
+.endm
+
+/* Update ZCR_EL1.LEN with the new VQ */
+.macro sve_load_vq xvqminus1, xtmp, xtmp2
+ mrs_s \xtmp, SYS_ZCR_EL1
+ bic \xtmp2, \xtmp, ZCR_ELx_LEN_MASK
+ orr \xtmp2, \xtmp2, \xvqminus1
+ cmp \xtmp2, \xtmp
+ b.eq 921f
+ msr_s SYS_ZCR_EL1, \xtmp2 //self-synchronising
+921:
+.endm
+
+/* Update SMCR_EL1.LEN with the new VQ */
+.macro sme_load_vq xvqminus1, xtmp, xtmp2
+ mrs_s \xtmp, SYS_SMCR_EL1
+ bic \xtmp2, \xtmp, SMCR_ELx_LEN_MASK
+ orr \xtmp2, \xtmp2, \xvqminus1
+ cmp \xtmp2, \xtmp
+ b.eq 921f
+ msr_s SYS_SMCR_EL1, \xtmp2 //self-synchronising
+921:
+.endm
+
+/* Preserve the first 128-bits of Znz and zero the rest. */
+.macro _sve_flush_z nz
+ _sve_check_zreg \nz
+ mov v\nz\().16b, v\nz\().16b
+.endm
+
+.macro sve_flush_z
+ _for n, 0, 31, _sve_flush_z \n
+.endm
+.macro sve_flush_p
+ _for n, 0, 15, _sve_pfalse \n
+.endm
+.macro sve_flush_ffr
+ _sve_wrffr 0
+.endm
+
+.macro sve_save nxbase, xpfpsr, save_ffr, nxtmp
+ _for n, 0, 31, _sve_str_v \n, \nxbase, \n - 34
+ _for n, 0, 15, _sve_str_p \n, \nxbase, \n - 16
+ cbz \save_ffr, 921f
+ _sve_rdffr 0
+ b 922f
+921:
+ _sve_pfalse 0 // Zero out FFR
+922:
+ _sve_str_p 0, \nxbase
+ _sve_ldr_p 0, \nxbase, -16
+ mrs x\nxtmp, fpsr
+ str w\nxtmp, [\xpfpsr]
+ mrs x\nxtmp, fpcr
+ str w\nxtmp, [\xpfpsr, #4]
+.endm
+
+.macro sve_load nxbase, xpfpsr, restore_ffr, nxtmp
+ _for n, 0, 31, _sve_ldr_v \n, \nxbase, \n - 34
+ cbz \restore_ffr, 921f
+ _sve_ldr_p 0, \nxbase
+ _sve_wrffr 0
+921:
+ _for n, 0, 15, _sve_ldr_p \n, \nxbase, \n - 16
+
+ ldr w\nxtmp, [\xpfpsr]
+ msr fpsr, x\nxtmp
+ ldr w\nxtmp, [\xpfpsr, #4]
+ msr fpcr, x\nxtmp
+.endm
+
+.macro sme_save_za nxbase, xvl, nw
+ mov w\nw, #0
+
+423:
+ _sme_str_zav \nw, \nxbase
+ add x\nxbase, x\nxbase, \xvl
+ add x\nw, x\nw, #1
+ cmp \xvl, x\nw
+ bne 423b
+.endm
+
+.macro sme_load_za nxbase, xvl, nw
+ mov w\nw, #0
+
+423:
+ _sme_ldr_zav \nw, \nxbase
+ add x\nxbase, x\nxbase, \xvl
+ add x\nw, x\nw, #1
+ cmp \xvl, x\nw
+ bne 423b
.endm
diff --git a/arch/arm64/include/asm/fpu.h b/arch/arm64/include/asm/fpu.h
new file mode 100644
index 000000000000..751e88a96734
--- /dev/null
+++ b/arch/arm64/include/asm/fpu.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2023 SiFive
+ */
+
+#ifndef __ASM_FPU_H
+#define __ASM_FPU_H
+
+#include <linux/preempt.h>
+#include <asm/neon.h>
+
+#define kernel_fpu_available() cpu_has_neon()
+
+static inline void kernel_fpu_begin(void)
+{
+ BUG_ON(!in_task());
+ preempt_disable();
+ kernel_neon_begin(NULL);
+}
+
+static inline void kernel_fpu_end(void)
+{
+ kernel_neon_end(NULL);
+ preempt_enable();
+}
+
+#endif /* ! __ASM_FPU_H */
diff --git a/arch/arm64/include/asm/ftrace.h b/arch/arm64/include/asm/ftrace.h
new file mode 100644
index 000000000000..1621c84f44b3
--- /dev/null
+++ b/arch/arm64/include/asm/ftrace.h
@@ -0,0 +1,231 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * arch/arm64/include/asm/ftrace.h
+ *
+ * Copyright (C) 2013 Linaro Limited
+ * Author: AKASHI Takahiro <takahiro.akashi@linaro.org>
+ */
+#ifndef __ASM_FTRACE_H
+#define __ASM_FTRACE_H
+
+#include <asm/insn.h>
+
+#define HAVE_FUNCTION_GRAPH_FP_TEST
+
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_ARGS
+#define ARCH_SUPPORTS_FTRACE_OPS 1
+#else
+#define MCOUNT_ADDR ((unsigned long)_mcount)
+#endif
+
+/* The BL at the callsite's adjusted rec->ip */
+#define MCOUNT_INSN_SIZE AARCH64_INSN_SIZE
+
+#define FTRACE_PLT_IDX 0
+#define NR_FTRACE_PLTS 1
+
+/*
+ * Currently, gcc tends to save the link register after the local variables
+ * on the stack. This causes the max stack tracer to report the function
+ * frame sizes for the wrong functions. By defining
+ * ARCH_FTRACE_SHIFT_STACK_TRACER, it will tell the stack tracer to expect
+ * to find the return address on the stack after the local variables have
+ * been set up.
+ *
+ * Note, this may change in the future, and we will need to deal with that
+ * if it were to happen.
+ */
+#define ARCH_FTRACE_SHIFT_STACK_TRACER 1
+
+#ifndef __ASSEMBLER__
+#include <linux/compat.h>
+
+extern void _mcount(unsigned long);
+extern void *return_address(unsigned int);
+
+struct dyn_arch_ftrace {
+ /* No extra data needed for arm64 */
+};
+
+extern unsigned long ftrace_graph_call;
+
+extern void return_to_handler(void);
+
+unsigned long ftrace_call_adjust(unsigned long addr);
+unsigned long arch_ftrace_get_symaddr(unsigned long fentry_ip);
+#define ftrace_get_symaddr(fentry_ip) arch_ftrace_get_symaddr(fentry_ip)
+
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_ARGS
+#define HAVE_ARCH_FTRACE_REGS
+struct dyn_ftrace;
+struct ftrace_ops;
+struct ftrace_regs;
+#define arch_ftrace_regs(fregs) ((struct __arch_ftrace_regs *)(fregs))
+
+#define arch_ftrace_get_regs(regs) NULL
+
+/*
+ * Note: sizeof(struct ftrace_regs) must be a multiple of 16 to ensure correct
+ * stack alignment
+ */
+struct __arch_ftrace_regs {
+ /* x0 - x8 */
+ unsigned long regs[9];
+
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
+ unsigned long direct_tramp;
+#else
+ unsigned long __unused;
+#endif
+
+ unsigned long fp;
+ unsigned long lr;
+
+ unsigned long sp;
+ unsigned long pc;
+};
+
+static __always_inline unsigned long
+ftrace_regs_get_instruction_pointer(const struct ftrace_regs *fregs)
+{
+ return arch_ftrace_regs(fregs)->pc;
+}
+
+static __always_inline void
+ftrace_regs_set_instruction_pointer(struct ftrace_regs *fregs,
+ unsigned long pc)
+{
+ arch_ftrace_regs(fregs)->pc = pc;
+}
+
+static __always_inline unsigned long
+ftrace_regs_get_stack_pointer(const struct ftrace_regs *fregs)
+{
+ return arch_ftrace_regs(fregs)->sp;
+}
+
+static __always_inline unsigned long
+ftrace_regs_get_argument(struct ftrace_regs *fregs, unsigned int n)
+{
+ if (n < 8)
+ return arch_ftrace_regs(fregs)->regs[n];
+ return 0;
+}
+
+static __always_inline unsigned long
+ftrace_regs_get_return_value(const struct ftrace_regs *fregs)
+{
+ return arch_ftrace_regs(fregs)->regs[0];
+}
+
+static __always_inline void
+ftrace_regs_set_return_value(struct ftrace_regs *fregs,
+ unsigned long ret)
+{
+ arch_ftrace_regs(fregs)->regs[0] = ret;
+}
+
+static __always_inline void
+ftrace_override_function_with_return(struct ftrace_regs *fregs)
+{
+ arch_ftrace_regs(fregs)->pc = arch_ftrace_regs(fregs)->lr;
+}
+
+static __always_inline unsigned long
+ftrace_regs_get_frame_pointer(const struct ftrace_regs *fregs)
+{
+ return arch_ftrace_regs(fregs)->fp;
+}
+
+static __always_inline unsigned long
+ftrace_regs_get_return_address(const struct ftrace_regs *fregs)
+{
+ return arch_ftrace_regs(fregs)->lr;
+}
+
+static __always_inline struct pt_regs *
+ftrace_partial_regs(const struct ftrace_regs *fregs, struct pt_regs *regs)
+{
+ struct __arch_ftrace_regs *afregs = arch_ftrace_regs(fregs);
+
+ memcpy(regs->regs, afregs->regs, sizeof(afregs->regs));
+ regs->sp = afregs->sp;
+ regs->pc = afregs->pc;
+ regs->regs[29] = afregs->fp;
+ regs->regs[30] = afregs->lr;
+ regs->pstate = PSR_MODE_EL1h;
+ return regs;
+}
+
+#define arch_ftrace_fill_perf_regs(fregs, _regs) do { \
+ (_regs)->pc = arch_ftrace_regs(fregs)->pc; \
+ (_regs)->regs[29] = arch_ftrace_regs(fregs)->fp; \
+ (_regs)->sp = arch_ftrace_regs(fregs)->sp; \
+ (_regs)->pstate = PSR_MODE_EL1h; \
+ } while (0)
+
+int ftrace_regs_query_register_offset(const char *name);
+
+int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec);
+#define ftrace_init_nop ftrace_init_nop
+
+void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
+ struct ftrace_ops *op, struct ftrace_regs *fregs);
+#define ftrace_graph_func ftrace_graph_func
+
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
+static inline void arch_ftrace_set_direct_caller(struct ftrace_regs *fregs,
+ unsigned long addr)
+{
+ /*
+ * The ftrace trampoline will return to this address instead of the
+ * instrumented function.
+ */
+ arch_ftrace_regs(fregs)->direct_tramp = addr;
+}
+#endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */
+
+#endif
+
+#define ftrace_return_address(n) return_address(n)
+
+/*
+ * Because AArch32 mode does not share the same syscall table with AArch64,
+ * tracing compat syscalls may result in reporting bogus syscalls or even
+ * hang-up, so just do not trace them.
+ * See kernel/trace/trace_syscalls.c
+ *
+ * x86 code says:
+ * If the user really wants these, then they should use the
+ * raw syscall tracepoints with filtering.
+ */
+#define ARCH_TRACE_IGNORE_COMPAT_SYSCALLS
+static inline bool arch_trace_is_compat_syscall(struct pt_regs *regs)
+{
+ return is_compat_task();
+}
+
+#define ARCH_HAS_SYSCALL_MATCH_SYM_NAME
+
+static inline bool arch_syscall_match_sym_name(const char *sym,
+ const char *name)
+{
+ /*
+ * Since all syscall functions have __arm64_ prefix, we must skip it.
+ * However, as we described above, we decided to ignore compat
+ * syscalls, so we don't care about __arm64_compat_ prefix here.
+ */
+ return !strcmp(sym + 8, name);
+}
+#endif /* ifndef __ASSEMBLER__ */
+
+#ifndef __ASSEMBLER__
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+
+void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent,
+ unsigned long frame_pointer);
+
+#endif /* ifdef CONFIG_FUNCTION_GRAPH_TRACER */
+#endif
+
+#endif /* __ASM_FTRACE_H */
diff --git a/arch/arm64/include/asm/futex.h b/arch/arm64/include/asm/futex.h
index c582fa316366..bc06691d2062 100644
--- a/arch/arm64/include/asm/futex.h
+++ b/arch/arm64/include/asm/futex.h
@@ -1,136 +1,120 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (C) 2012 ARM Ltd.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __ASM_FUTEX_H
#define __ASM_FUTEX_H
-#ifdef __KERNEL__
-
#include <linux/futex.h>
#include <linux/uaccess.h>
+
#include <asm/errno.h>
+#define FUTEX_MAX_LOOPS 128 /* What's the largest number you can think of? */
+
#define __futex_atomic_op(insn, ret, oldval, uaddr, tmp, oparg) \
+do { \
+ unsigned int loops = FUTEX_MAX_LOOPS; \
+ \
+ uaccess_enable_privileged(); \
asm volatile( \
-"1: ldaxr %w1, %2\n" \
+" prfm pstl1strm, %2\n" \
+"1: ldxr %w1, %2\n" \
insn "\n" \
-"2: stlxr %w3, %w0, %2\n" \
-" cbnz %w3, 1b\n" \
+"2: stlxr %w0, %w3, %2\n" \
+" cbz %w0, 3f\n" \
+" sub %w4, %w4, %w0\n" \
+" cbnz %w4, 1b\n" \
+" mov %w0, %w6\n" \
"3:\n" \
-" .pushsection .fixup,\"ax\"\n" \
-"4: mov %w0, %w5\n" \
-" b 3b\n" \
-" .popsection\n" \
-" .pushsection __ex_table,\"a\"\n" \
-" .align 3\n" \
-" .quad 1b, 4b, 2b, 4b\n" \
-" .popsection\n" \
- : "=&r" (ret), "=&r" (oldval), "+Q" (*uaddr), "=&r" (tmp) \
- : "r" (oparg), "Ir" (-EFAULT) \
- : "cc", "memory")
+" dmb ish\n" \
+ _ASM_EXTABLE_UACCESS_ERR(1b, 3b, %w0) \
+ _ASM_EXTABLE_UACCESS_ERR(2b, 3b, %w0) \
+ : "=&r" (ret), "=&r" (oldval), "+Q" (*uaddr), "=&r" (tmp), \
+ "+r" (loops) \
+ : "r" (oparg), "Ir" (-EAGAIN) \
+ : "memory"); \
+ uaccess_disable_privileged(); \
+} while (0)
static inline int
-futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr)
+arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *_uaddr)
{
- int op = (encoded_op >> 28) & 7;
- int cmp = (encoded_op >> 24) & 15;
- int oparg = (encoded_op << 8) >> 20;
- int cmparg = (encoded_op << 20) >> 20;
int oldval = 0, ret, tmp;
+ u32 __user *uaddr = __uaccess_mask_ptr(_uaddr);
- if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
- oparg = 1 << oparg;
-
- if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
+ if (!access_ok(_uaddr, sizeof(u32)))
return -EFAULT;
- pagefault_disable(); /* implies preempt_disable() */
-
switch (op) {
case FUTEX_OP_SET:
- __futex_atomic_op("mov %w0, %w4",
+ __futex_atomic_op("mov %w3, %w5",
ret, oldval, uaddr, tmp, oparg);
break;
case FUTEX_OP_ADD:
- __futex_atomic_op("add %w0, %w1, %w4",
+ __futex_atomic_op("add %w3, %w1, %w5",
ret, oldval, uaddr, tmp, oparg);
break;
case FUTEX_OP_OR:
- __futex_atomic_op("orr %w0, %w1, %w4",
+ __futex_atomic_op("orr %w3, %w1, %w5",
ret, oldval, uaddr, tmp, oparg);
break;
case FUTEX_OP_ANDN:
- __futex_atomic_op("and %w0, %w1, %w4",
+ __futex_atomic_op("and %w3, %w1, %w5",
ret, oldval, uaddr, tmp, ~oparg);
break;
case FUTEX_OP_XOR:
- __futex_atomic_op("eor %w0, %w1, %w4",
+ __futex_atomic_op("eor %w3, %w1, %w5",
ret, oldval, uaddr, tmp, oparg);
break;
default:
ret = -ENOSYS;
}
- pagefault_enable(); /* subsumes preempt_enable() */
+ if (!ret)
+ *oval = oldval;
- if (!ret) {
- switch (cmp) {
- case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break;
- case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break;
- case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break;
- case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break;
- case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break;
- case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break;
- default: ret = -ENOSYS;
- }
- }
return ret;
}
static inline int
-futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
+futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *_uaddr,
u32 oldval, u32 newval)
{
int ret = 0;
+ unsigned int loops = FUTEX_MAX_LOOPS;
u32 val, tmp;
+ u32 __user *uaddr;
- if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
+ if (!access_ok(_uaddr, sizeof(u32)))
return -EFAULT;
+ uaddr = __uaccess_mask_ptr(_uaddr);
+ uaccess_enable_privileged();
asm volatile("// futex_atomic_cmpxchg_inatomic\n"
-"1: ldaxr %w1, %2\n"
-" sub %w3, %w1, %w4\n"
-" cbnz %w3, 3f\n"
-"2: stlxr %w3, %w5, %2\n"
-" cbnz %w3, 1b\n"
+" prfm pstl1strm, %2\n"
+"1: ldxr %w1, %2\n"
+" sub %w3, %w1, %w5\n"
+" cbnz %w3, 4f\n"
+"2: stlxr %w3, %w6, %2\n"
+" cbz %w3, 3f\n"
+" sub %w4, %w4, %w3\n"
+" cbnz %w4, 1b\n"
+" mov %w0, %w7\n"
"3:\n"
-" .pushsection .fixup,\"ax\"\n"
-"4: mov %w0, %w6\n"
-" b 3b\n"
-" .popsection\n"
-" .pushsection __ex_table,\"a\"\n"
-" .align 3\n"
-" .quad 1b, 4b, 2b, 4b\n"
-" .popsection\n"
- : "+r" (ret), "=&r" (val), "+Q" (*uaddr), "=&r" (tmp)
- : "r" (oldval), "r" (newval), "Ir" (-EFAULT)
- : "cc", "memory");
+" dmb ish\n"
+"4:\n"
+ _ASM_EXTABLE_UACCESS_ERR(1b, 4b, %w0)
+ _ASM_EXTABLE_UACCESS_ERR(2b, 4b, %w0)
+ : "+r" (ret), "=&r" (val), "+Q" (*uaddr), "=&r" (tmp), "+r" (loops)
+ : "r" (oldval), "r" (newval), "Ir" (-EAGAIN)
+ : "memory");
+ uaccess_disable_privileged();
+
+ if (!ret)
+ *uval = val;
- *uval = val;
return ret;
}
-#endif /* __KERNEL__ */
#endif /* __ASM_FUTEX_H */
diff --git a/arch/arm64/include/asm/gcs.h b/arch/arm64/include/asm/gcs.h
new file mode 100644
index 000000000000..8fa0707069e8
--- /dev/null
+++ b/arch/arm64/include/asm/gcs.h
@@ -0,0 +1,196 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2023 ARM Ltd.
+ */
+#ifndef __ASM_GCS_H
+#define __ASM_GCS_H
+
+#include <asm/types.h>
+#include <asm/uaccess.h>
+
+struct kernel_clone_args;
+struct ksignal;
+
+static inline void gcsb_dsync(void)
+{
+ asm volatile(".inst 0xd503227f" : : : "memory");
+}
+
+static inline void gcsstr(u64 *addr, u64 val)
+{
+ register u64 *_addr __asm__ ("x0") = addr;
+ register long _val __asm__ ("x1") = val;
+
+ /* GCSSTTR x1, [x0] */
+ asm volatile(
+ ".inst 0xd91f1c01\n"
+ :
+ : "rZ" (_val), "r" (_addr)
+ : "memory");
+}
+
+static inline void gcsss1(u64 Xt)
+{
+ asm volatile (
+ "sys #3, C7, C7, #2, %0\n"
+ :
+ : "rZ" (Xt)
+ : "memory");
+}
+
+static inline u64 gcsss2(void)
+{
+ u64 Xt;
+
+ asm volatile(
+ "SYSL %0, #3, C7, C7, #3\n"
+ : "=r" (Xt)
+ :
+ : "memory");
+
+ return Xt;
+}
+
+#define PR_SHADOW_STACK_SUPPORTED_STATUS_MASK \
+ (PR_SHADOW_STACK_ENABLE | PR_SHADOW_STACK_WRITE | PR_SHADOW_STACK_PUSH)
+
+#ifdef CONFIG_ARM64_GCS
+
+static inline bool task_gcs_el0_enabled(struct task_struct *task)
+{
+ return task->thread.gcs_el0_mode & PR_SHADOW_STACK_ENABLE;
+}
+
+void gcs_set_el0_mode(struct task_struct *task);
+void gcs_free(struct task_struct *task);
+void gcs_preserve_current_state(void);
+unsigned long gcs_alloc_thread_stack(struct task_struct *tsk,
+ const struct kernel_clone_args *args);
+
+static inline int gcs_check_locked(struct task_struct *task,
+ unsigned long new_val)
+{
+ unsigned long cur_val = task->thread.gcs_el0_mode;
+
+ cur_val &= task->thread.gcs_el0_locked;
+ new_val &= task->thread.gcs_el0_locked;
+
+ if (cur_val != new_val)
+ return -EBUSY;
+
+ return 0;
+}
+
+static inline int gcssttr(unsigned long __user *addr, unsigned long val)
+{
+ register unsigned long __user *_addr __asm__ ("x0") = addr;
+ register unsigned long _val __asm__ ("x1") = val;
+ int err = 0;
+
+ /* GCSSTTR x1, [x0] */
+ asm volatile(
+ "1: .inst 0xd91f1c01\n"
+ "2: \n"
+ _ASM_EXTABLE_UACCESS_ERR(1b, 2b, %w0)
+ : "+r" (err)
+ : "rZ" (_val), "r" (_addr)
+ : "memory");
+
+ return err;
+}
+
+static inline void put_user_gcs(unsigned long val, unsigned long __user *addr,
+ int *err)
+{
+ int ret;
+
+ if (!access_ok((char __user *)addr, sizeof(u64))) {
+ *err = -EFAULT;
+ return;
+ }
+
+ uaccess_ttbr0_enable();
+ ret = gcssttr(addr, val);
+ if (ret != 0)
+ *err = ret;
+ uaccess_ttbr0_disable();
+}
+
+static inline void push_user_gcs(unsigned long val, int *err)
+{
+ u64 gcspr = read_sysreg_s(SYS_GCSPR_EL0);
+
+ gcspr -= sizeof(u64);
+ put_user_gcs(val, (unsigned long __user *)gcspr, err);
+ if (!*err)
+ write_sysreg_s(gcspr, SYS_GCSPR_EL0);
+}
+
+/*
+ * Unlike put/push_user_gcs() above, get/pop_user_gsc() doesn't
+ * validate the GCS permission is set on the page being read. This
+ * differs from how the hardware works when it consumes data stored at
+ * GCSPR. Callers should ensure this is acceptable.
+ */
+static inline u64 get_user_gcs(unsigned long __user *addr, int *err)
+{
+ unsigned long ret;
+ u64 load = 0;
+
+ /* Ensure previous GCS operation are visible before we read the page */
+ gcsb_dsync();
+ ret = copy_from_user(&load, addr, sizeof(load));
+ if (ret != 0)
+ *err = ret;
+ return load;
+}
+
+static inline u64 pop_user_gcs(int *err)
+{
+ u64 gcspr = read_sysreg_s(SYS_GCSPR_EL0);
+ u64 read_val;
+
+ read_val = get_user_gcs((__force unsigned long __user *)gcspr, err);
+ if (!*err)
+ write_sysreg_s(gcspr + sizeof(u64), SYS_GCSPR_EL0);
+
+ return read_val;
+}
+
+#else
+
+static inline bool task_gcs_el0_enabled(struct task_struct *task)
+{
+ return false;
+}
+
+static inline void gcs_set_el0_mode(struct task_struct *task) { }
+static inline void gcs_free(struct task_struct *task) { }
+static inline void gcs_preserve_current_state(void) { }
+static inline void put_user_gcs(unsigned long val, unsigned long __user *addr,
+ int *err) { }
+static inline void push_user_gcs(unsigned long val, int *err) { }
+
+static inline unsigned long gcs_alloc_thread_stack(struct task_struct *tsk,
+ const struct kernel_clone_args *args)
+{
+ return -ENOTSUPP;
+}
+static inline int gcs_check_locked(struct task_struct *task,
+ unsigned long new_val)
+{
+ return 0;
+}
+static inline u64 get_user_gcs(unsigned long __user *addr, int *err)
+{
+ *err = -EFAULT;
+ return 0;
+}
+static inline u64 pop_user_gcs(int *err)
+{
+ return 0;
+}
+
+#endif
+
+#endif
diff --git a/arch/arm64/include/asm/gpr-num.h b/arch/arm64/include/asm/gpr-num.h
new file mode 100644
index 000000000000..a114e4f8209b
--- /dev/null
+++ b/arch/arm64/include/asm/gpr-num.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef __ASM_GPR_NUM_H
+#define __ASM_GPR_NUM_H
+
+#ifdef __ASSEMBLER__
+
+ .irp num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30
+ .equ .L__gpr_num_x\num, \num
+ .equ .L__gpr_num_w\num, \num
+ .endr
+ .equ .L__gpr_num_xzr, 31
+ .equ .L__gpr_num_wzr, 31
+
+#else /* __ASSEMBLER__ */
+
+#define __DEFINE_ASM_GPR_NUMS \
+" .irp num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30\n" \
+" .equ .L__gpr_num_x\\num, \\num\n" \
+" .equ .L__gpr_num_w\\num, \\num\n" \
+" .endr\n" \
+" .equ .L__gpr_num_xzr, 31\n" \
+" .equ .L__gpr_num_wzr, 31\n"
+
+#endif /* __ASSEMBLER__ */
+
+#endif /* __ASM_GPR_NUM_H */
diff --git a/arch/arm64/include/asm/hardirq.h b/arch/arm64/include/asm/hardirq.h
index 990c051e7829..77d6b8c63d4e 100644
--- a/arch/arm64/include/asm/hardirq.h
+++ b/arch/arm64/include/asm/hardirq.h
@@ -1,45 +1,89 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (C) 2012 ARM Ltd.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __ASM_HARDIRQ_H
#define __ASM_HARDIRQ_H
#include <linux/cache.h>
+#include <linux/percpu.h>
#include <linux/threads.h>
+#include <asm/barrier.h>
#include <asm/irq.h>
+#include <asm/kvm_arm.h>
+#include <asm/sysreg.h>
-#define NR_IPI 4
+#define ack_bad_irq ack_bad_irq
+#include <asm-generic/hardirq.h>
-typedef struct {
- unsigned int __softirq_pending;
-#ifdef CONFIG_SMP
- unsigned int ipi_irqs[NR_IPI];
-#endif
-} ____cacheline_aligned irq_cpustat_t;
+#define __ARCH_IRQ_EXIT_IRQS_DISABLED 1
-#include <linux/irq_cpustat.h> /* Standard mappings for irq_cpustat_t above */
+struct nmi_ctx {
+ u64 hcr;
+ unsigned int cnt;
+};
-#define __inc_irq_stat(cpu, member) __IRQ_STAT(cpu, member)++
-#define __get_irq_stat(cpu, member) __IRQ_STAT(cpu, member)
+DECLARE_PER_CPU(struct nmi_ctx, nmi_contexts);
-#ifdef CONFIG_SMP
-u64 smp_irq_stat_cpu(unsigned int cpu);
-#define arch_irq_stat_cpu smp_irq_stat_cpu
-#endif
+#define arch_nmi_enter() \
+do { \
+ struct nmi_ctx *___ctx; \
+ u64 ___hcr; \
+ \
+ if (!is_kernel_in_hyp_mode()) \
+ break; \
+ \
+ ___ctx = this_cpu_ptr(&nmi_contexts); \
+ if (___ctx->cnt) { \
+ ___ctx->cnt++; \
+ break; \
+ } \
+ \
+ ___hcr = read_sysreg(hcr_el2); \
+ if (!(___hcr & HCR_TGE)) { \
+ write_sysreg_hcr(___hcr | HCR_TGE); \
+ isb(); \
+ } \
+ /* \
+ * Make sure the sysreg write is performed before ___ctx->cnt \
+ * is set to 1. NMIs that see cnt == 1 will rely on us. \
+ */ \
+ barrier(); \
+ ___ctx->cnt = 1; \
+ /* \
+ * Make sure ___ctx->cnt is set before we save ___hcr. We \
+ * don't want ___ctx->hcr to be overwritten. \
+ */ \
+ barrier(); \
+ ___ctx->hcr = ___hcr; \
+} while (0)
-#define __ARCH_IRQ_EXIT_IRQS_DISABLED 1
+#define arch_nmi_exit() \
+do { \
+ struct nmi_ctx *___ctx; \
+ u64 ___hcr; \
+ \
+ if (!is_kernel_in_hyp_mode()) \
+ break; \
+ \
+ ___ctx = this_cpu_ptr(&nmi_contexts); \
+ ___hcr = ___ctx->hcr; \
+ /* \
+ * Make sure we read ___ctx->hcr before we release \
+ * ___ctx->cnt as it makes ___ctx->hcr updatable again. \
+ */ \
+ barrier(); \
+ ___ctx->cnt--; \
+ /* \
+ * Make sure ___ctx->cnt release is visible before we \
+ * restore the sysreg. Otherwise a new NMI occurring \
+ * right after write_sysreg() can be fooled and think \
+ * we secured things for it. \
+ */ \
+ barrier(); \
+ if (!___ctx->cnt && !(___hcr & HCR_TGE)) \
+ write_sysreg_hcr(___hcr); \
+} while (0)
static inline void ack_bad_irq(unsigned int irq)
{
@@ -47,11 +91,4 @@ static inline void ack_bad_irq(unsigned int irq)
irq_err_count++;
}
-extern void handle_IRQ(unsigned int, struct pt_regs *);
-
-/*
- * No arch-specific IRQ flags.
- */
-#define set_irq_flags(irq, flags)
-
#endif /* __ASM_HARDIRQ_H */
diff --git a/arch/arm64/include/asm/hugetlb.h b/arch/arm64/include/asm/hugetlb.h
index 5b7ca8ace95f..44c1f757bfcf 100644
--- a/arch/arm64/include/asm/hugetlb.h
+++ b/arch/arm64/include/asm/hugetlb.h
@@ -1,117 +1,106 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* arch/arm64/include/asm/hugetlb.h
*
* Copyright (C) 2013 Linaro Ltd.
*
* Based on arch/x86/include/asm/hugetlb.h
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#ifndef __ASM_HUGETLB_H
#define __ASM_HUGETLB_H
-#include <asm-generic/hugetlb.h>
+#include <asm/cacheflush.h>
+#include <asm/mte.h>
#include <asm/page.h>
-static inline pte_t huge_ptep_get(pte_t *ptep)
-{
- return *ptep;
-}
-
-static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
- pte_t *ptep, pte_t pte)
-{
- set_pte_at(mm, addr, ptep, pte);
-}
-
-static inline void huge_ptep_clear_flush(struct vm_area_struct *vma,
- unsigned long addr, pte_t *ptep)
-{
- ptep_clear_flush(vma, addr, ptep);
-}
-
-static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
- unsigned long addr, pte_t *ptep)
-{
- ptep_set_wrprotect(mm, addr, ptep);
-}
-
-static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
- unsigned long addr, pte_t *ptep)
-{
- return ptep_get_and_clear(mm, addr, ptep);
-}
-
-static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma,
- unsigned long addr, pte_t *ptep,
- pte_t pte, int dirty)
-{
- return ptep_set_access_flags(vma, addr, ptep, pte, dirty);
-}
-
-static inline void hugetlb_free_pgd_range(struct mmu_gather *tlb,
- unsigned long addr, unsigned long end,
- unsigned long floor,
- unsigned long ceiling)
-{
- free_pgd_range(tlb, addr, end, floor, ceiling);
-}
+#ifdef CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION
+#define arch_hugetlb_migration_supported arch_hugetlb_migration_supported
+extern bool arch_hugetlb_migration_supported(struct hstate *h);
+#endif
-static inline int is_hugepage_only_range(struct mm_struct *mm,
- unsigned long addr, unsigned long len)
+static inline void arch_clear_hugetlb_flags(struct folio *folio)
{
- return 0;
+ clear_bit(PG_dcache_clean, &folio->flags.f);
+
+#ifdef CONFIG_ARM64_MTE
+ if (system_supports_mte()) {
+ clear_bit(PG_mte_tagged, &folio->flags.f);
+ clear_bit(PG_mte_lock, &folio->flags.f);
+ }
+#endif
}
+#define arch_clear_hugetlb_flags arch_clear_hugetlb_flags
+
+pte_t arch_make_huge_pte(pte_t entry, unsigned int shift, vm_flags_t flags);
+#define arch_make_huge_pte arch_make_huge_pte
+#define __HAVE_ARCH_HUGE_SET_HUGE_PTE_AT
+extern void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, pte_t pte, unsigned long sz);
+#define __HAVE_ARCH_HUGE_PTEP_SET_ACCESS_FLAGS
+extern int huge_ptep_set_access_flags(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep,
+ pte_t pte, int dirty);
+#define __HAVE_ARCH_HUGE_PTEP_GET_AND_CLEAR
+extern pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, unsigned long sz);
+#define __HAVE_ARCH_HUGE_PTEP_SET_WRPROTECT
+extern void huge_ptep_set_wrprotect(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep);
+#define __HAVE_ARCH_HUGE_PTEP_CLEAR_FLUSH
+extern pte_t huge_ptep_clear_flush(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep);
+#define __HAVE_ARCH_HUGE_PTE_CLEAR
+extern void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, unsigned long sz);
+#define __HAVE_ARCH_HUGE_PTEP_GET
+extern pte_t huge_ptep_get(struct mm_struct *mm, unsigned long addr, pte_t *ptep);
+
+void __init arm64_hugetlb_cma_reserve(void);
+
+#define huge_ptep_modify_prot_start huge_ptep_modify_prot_start
+extern pte_t huge_ptep_modify_prot_start(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep);
+
+#define huge_ptep_modify_prot_commit huge_ptep_modify_prot_commit
+extern void huge_ptep_modify_prot_commit(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep,
+ pte_t old_pte, pte_t new_pte);
-static inline int prepare_hugepage_range(struct file *file,
- unsigned long addr, unsigned long len)
-{
- struct hstate *h = hstate_file(file);
- if (len & ~huge_page_mask(h))
- return -EINVAL;
- if (addr & ~huge_page_mask(h))
- return -EINVAL;
- return 0;
-}
-
-static inline void hugetlb_prefault_arch_hook(struct mm_struct *mm)
-{
-}
-
-static inline int huge_pte_none(pte_t pte)
-{
- return pte_none(pte);
-}
-
-static inline pte_t huge_pte_wrprotect(pte_t pte)
-{
- return pte_wrprotect(pte);
-}
+#include <asm-generic/hugetlb.h>
-static inline int arch_prepare_hugepage(struct page *page)
+static inline void __flush_hugetlb_tlb_range(struct vm_area_struct *vma,
+ unsigned long start,
+ unsigned long end,
+ unsigned long stride,
+ bool last_level)
{
- return 0;
+ switch (stride) {
+#ifndef __PAGETABLE_PMD_FOLDED
+ case PUD_SIZE:
+ __flush_tlb_range(vma, start, end, PUD_SIZE, last_level, 1);
+ break;
+#endif
+ case CONT_PMD_SIZE:
+ case PMD_SIZE:
+ __flush_tlb_range(vma, start, end, PMD_SIZE, last_level, 2);
+ break;
+ case CONT_PTE_SIZE:
+ __flush_tlb_range(vma, start, end, PAGE_SIZE, last_level, 3);
+ break;
+ default:
+ __flush_tlb_range(vma, start, end, PAGE_SIZE, last_level, TLBI_TTL_UNKNOWN);
+ }
}
-static inline void arch_release_hugepage(struct page *page)
+#define __HAVE_ARCH_FLUSH_HUGETLB_TLB_RANGE
+static inline void flush_hugetlb_tlb_range(struct vm_area_struct *vma,
+ unsigned long start,
+ unsigned long end)
{
-}
+ unsigned long stride = huge_page_size(hstate_vma(vma));
-static inline void arch_clear_hugepage_flags(struct page *page)
-{
- clear_bit(PG_dcache_clean, &page->flags);
+ __flush_hugetlb_tlb_range(vma, start, end, stride, false);
}
#endif /* __ASM_HUGETLB_H */
diff --git a/arch/arm64/include/asm/hw_breakpoint.h b/arch/arm64/include/asm/hw_breakpoint.h
index d064047612b1..bd81cf17744a 100644
--- a/arch/arm64/include/asm/hw_breakpoint.h
+++ b/arch/arm64/include/asm/hw_breakpoint.h
@@ -1,22 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (C) 2012 ARM Ltd.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __ASM_HW_BREAKPOINT_H
#define __ASM_HW_BREAKPOINT_H
-#ifdef __KERNEL__
+#include <asm/cputype.h>
+#include <asm/cpufeature.h>
+#include <asm/sysreg.h>
+#include <asm/virt.h>
struct arch_hw_breakpoint_ctrl {
u32 __reserved : 19,
@@ -32,10 +24,21 @@ struct arch_hw_breakpoint {
struct arch_hw_breakpoint_ctrl ctrl;
};
+/* Privilege Levels */
+#define AARCH64_BREAKPOINT_EL1 1
+#define AARCH64_BREAKPOINT_EL0 2
+
+#define DBG_HMC_HYP (1 << 13)
+
static inline u32 encode_ctrl_reg(struct arch_hw_breakpoint_ctrl ctrl)
{
- return (ctrl.len << 5) | (ctrl.type << 3) | (ctrl.privilege << 1) |
+ u32 val = (ctrl.len << 5) | (ctrl.type << 3) | (ctrl.privilege << 1) |
ctrl.enabled;
+
+ if (is_kernel_in_hyp_mode() && ctrl.privilege == AARCH64_BREAKPOINT_EL1)
+ val |= DBG_HMC_HYP;
+
+ return val;
}
static inline void decode_ctrl_reg(u32 reg,
@@ -56,16 +59,15 @@ static inline void decode_ctrl_reg(u32 reg,
/* Watchpoints */
#define ARM_BREAKPOINT_LOAD 1
#define ARM_BREAKPOINT_STORE 2
-#define AARCH64_ESR_ACCESS_MASK (1 << 6)
-
-/* Privilege Levels */
-#define AARCH64_BREAKPOINT_EL1 1
-#define AARCH64_BREAKPOINT_EL0 2
/* Lengths */
#define ARM_BREAKPOINT_LEN_1 0x1
#define ARM_BREAKPOINT_LEN_2 0x3
+#define ARM_BREAKPOINT_LEN_3 0x7
#define ARM_BREAKPOINT_LEN_4 0xf
+#define ARM_BREAKPOINT_LEN_5 0x1f
+#define ARM_BREAKPOINT_LEN_6 0x3f
+#define ARM_BREAKPOINT_LEN_7 0x7f
#define ARM_BREAKPOINT_LEN_8 0xff
/* Kernel stepping */
@@ -79,7 +81,6 @@ static inline void decode_ctrl_reg(u32 reg,
*/
#define ARM_MAX_BRP 16
#define ARM_MAX_WRP 16
-#define ARM_MAX_HBP_SLOTS (ARM_MAX_BRP + ARM_MAX_WRP)
/* Virtual debug register bases. */
#define AARCH64_DBG_REG_BVR 0
@@ -88,29 +89,32 @@ static inline void decode_ctrl_reg(u32 reg,
#define AARCH64_DBG_REG_WCR (AARCH64_DBG_REG_WVR + ARM_MAX_WRP)
/* Debug register names. */
-#define AARCH64_DBG_REG_NAME_BVR "bvr"
-#define AARCH64_DBG_REG_NAME_BCR "bcr"
-#define AARCH64_DBG_REG_NAME_WVR "wvr"
-#define AARCH64_DBG_REG_NAME_WCR "wcr"
+#define AARCH64_DBG_REG_NAME_BVR bvr
+#define AARCH64_DBG_REG_NAME_BCR bcr
+#define AARCH64_DBG_REG_NAME_WVR wvr
+#define AARCH64_DBG_REG_NAME_WCR wcr
/* Accessor macros for the debug registers. */
#define AARCH64_DBG_READ(N, REG, VAL) do {\
- asm volatile("mrs %0, dbg" REG #N "_el1" : "=r" (VAL));\
+ VAL = read_sysreg(dbg##REG##N##_el1);\
} while (0)
#define AARCH64_DBG_WRITE(N, REG, VAL) do {\
- asm volatile("msr dbg" REG #N "_el1, %0" :: "r" (VAL));\
+ write_sysreg(VAL, dbg##REG##N##_el1);\
} while (0)
struct task_struct;
struct notifier_block;
+struct perf_event_attr;
struct perf_event;
struct pmu;
extern int arch_bp_generic_fields(struct arch_hw_breakpoint_ctrl ctrl,
- int *gen_len, int *gen_type);
-extern int arch_check_bp_in_kernelspace(struct perf_event *bp);
-extern int arch_validate_hwbkpt_settings(struct perf_event *bp);
+ int *gen_len, int *gen_type, int *offset);
+extern int arch_check_bp_in_kernelspace(struct arch_hw_breakpoint *hw);
+extern int hw_breakpoint_arch_parse(struct perf_event *bp,
+ const struct perf_event_attr *attr,
+ struct arch_hw_breakpoint *hw);
extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused,
unsigned long val, void *data);
@@ -131,7 +135,30 @@ static inline void ptrace_hw_copy_thread(struct task_struct *task)
}
#endif
-extern struct pmu perf_ops_bp;
+/* Determine number of BRP registers available. */
+static inline int get_num_brps(void)
+{
+ u64 dfr0 = read_sanitised_ftr_reg(SYS_ID_AA64DFR0_EL1);
+ return 1 +
+ cpuid_feature_extract_unsigned_field(dfr0,
+ ID_AA64DFR0_EL1_BRPs_SHIFT);
+}
+
+/* Determine number of WRP registers available. */
+static inline int get_num_wrps(void)
+{
+ u64 dfr0 = read_sanitised_ftr_reg(SYS_ID_AA64DFR0_EL1);
+ return 1 +
+ cpuid_feature_extract_unsigned_field(dfr0,
+ ID_AA64DFR0_EL1_WRPs_SHIFT);
+}
+
+#ifdef CONFIG_CPU_PM
+extern void cpu_suspend_set_dbg_restorer(int (*hw_bp_restore)(unsigned int));
+#else
+static inline void cpu_suspend_set_dbg_restorer(int (*hw_bp_restore)(unsigned int))
+{
+}
+#endif
-#endif /* __KERNEL__ */
#endif /* __ASM_BREAKPOINT_H */
diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h
index 6d4482fa35bc..1f63814ae6c4 100644
--- a/arch/arm64/include/asm/hwcap.h
+++ b/arch/arm64/include/asm/hwcap.h
@@ -1,48 +1,207 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (C) 2012 ARM Ltd.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __ASM_HWCAP_H
#define __ASM_HWCAP_H
#include <uapi/asm/hwcap.h>
+#include <asm/cpufeature.h>
+#define COMPAT_HWCAP_SWP (1 << 0)
#define COMPAT_HWCAP_HALF (1 << 1)
#define COMPAT_HWCAP_THUMB (1 << 2)
+#define COMPAT_HWCAP_26BIT (1 << 3)
#define COMPAT_HWCAP_FAST_MULT (1 << 4)
+#define COMPAT_HWCAP_FPA (1 << 5)
#define COMPAT_HWCAP_VFP (1 << 6)
#define COMPAT_HWCAP_EDSP (1 << 7)
+#define COMPAT_HWCAP_JAVA (1 << 8)
+#define COMPAT_HWCAP_IWMMXT (1 << 9)
+#define COMPAT_HWCAP_CRUNCH (1 << 10) /* Obsolete */
+#define COMPAT_HWCAP_THUMBEE (1 << 11)
#define COMPAT_HWCAP_NEON (1 << 12)
#define COMPAT_HWCAP_VFPv3 (1 << 13)
+#define COMPAT_HWCAP_VFPV3D16 (1 << 14)
#define COMPAT_HWCAP_TLS (1 << 15)
#define COMPAT_HWCAP_VFPv4 (1 << 16)
#define COMPAT_HWCAP_IDIVA (1 << 17)
#define COMPAT_HWCAP_IDIVT (1 << 18)
#define COMPAT_HWCAP_IDIV (COMPAT_HWCAP_IDIVA|COMPAT_HWCAP_IDIVT)
+#define COMPAT_HWCAP_VFPD32 (1 << 19)
+#define COMPAT_HWCAP_LPAE (1 << 20)
+#define COMPAT_HWCAP_EVTSTRM (1 << 21)
+#define COMPAT_HWCAP_FPHP (1 << 22)
+#define COMPAT_HWCAP_ASIMDHP (1 << 23)
+#define COMPAT_HWCAP_ASIMDDP (1 << 24)
+#define COMPAT_HWCAP_ASIMDFHM (1 << 25)
+#define COMPAT_HWCAP_ASIMDBF16 (1 << 26)
+#define COMPAT_HWCAP_I8MM (1 << 27)
+
+#define COMPAT_HWCAP2_AES (1 << 0)
+#define COMPAT_HWCAP2_PMULL (1 << 1)
+#define COMPAT_HWCAP2_SHA1 (1 << 2)
+#define COMPAT_HWCAP2_SHA2 (1 << 3)
+#define COMPAT_HWCAP2_CRC32 (1 << 4)
+#define COMPAT_HWCAP2_SB (1 << 5)
+#define COMPAT_HWCAP2_SSBS (1 << 6)
+
+#ifndef __ASSEMBLER__
+#include <linux/log2.h>
+
+/*
+ * For userspace we represent hwcaps as a collection of HWCAP{,2}_x bitfields
+ * as described in uapi/asm/hwcap.h. For the kernel we represent hwcaps as
+ * natural numbers (in a single range of size MAX_CPU_FEATURES) defined here
+ * with prefix KERNEL_HWCAP_ mapped to their HWCAP{,2}_x counterpart.
+ *
+ * Hwcaps should be set and tested within the kernel via the
+ * cpu_{set,have}_named_feature(feature) where feature is the unique suffix
+ * of KERNEL_HWCAP_{feature}.
+ */
+#define __khwcap_feature(x) const_ilog2(HWCAP_ ## x)
+#define KERNEL_HWCAP_FP __khwcap_feature(FP)
+#define KERNEL_HWCAP_ASIMD __khwcap_feature(ASIMD)
+#define KERNEL_HWCAP_EVTSTRM __khwcap_feature(EVTSTRM)
+#define KERNEL_HWCAP_AES __khwcap_feature(AES)
+#define KERNEL_HWCAP_PMULL __khwcap_feature(PMULL)
+#define KERNEL_HWCAP_SHA1 __khwcap_feature(SHA1)
+#define KERNEL_HWCAP_SHA2 __khwcap_feature(SHA2)
+#define KERNEL_HWCAP_CRC32 __khwcap_feature(CRC32)
+#define KERNEL_HWCAP_ATOMICS __khwcap_feature(ATOMICS)
+#define KERNEL_HWCAP_FPHP __khwcap_feature(FPHP)
+#define KERNEL_HWCAP_ASIMDHP __khwcap_feature(ASIMDHP)
+#define KERNEL_HWCAP_CPUID __khwcap_feature(CPUID)
+#define KERNEL_HWCAP_ASIMDRDM __khwcap_feature(ASIMDRDM)
+#define KERNEL_HWCAP_JSCVT __khwcap_feature(JSCVT)
+#define KERNEL_HWCAP_FCMA __khwcap_feature(FCMA)
+#define KERNEL_HWCAP_LRCPC __khwcap_feature(LRCPC)
+#define KERNEL_HWCAP_DCPOP __khwcap_feature(DCPOP)
+#define KERNEL_HWCAP_SHA3 __khwcap_feature(SHA3)
+#define KERNEL_HWCAP_SM3 __khwcap_feature(SM3)
+#define KERNEL_HWCAP_SM4 __khwcap_feature(SM4)
+#define KERNEL_HWCAP_ASIMDDP __khwcap_feature(ASIMDDP)
+#define KERNEL_HWCAP_SHA512 __khwcap_feature(SHA512)
+#define KERNEL_HWCAP_SVE __khwcap_feature(SVE)
+#define KERNEL_HWCAP_ASIMDFHM __khwcap_feature(ASIMDFHM)
+#define KERNEL_HWCAP_DIT __khwcap_feature(DIT)
+#define KERNEL_HWCAP_USCAT __khwcap_feature(USCAT)
+#define KERNEL_HWCAP_ILRCPC __khwcap_feature(ILRCPC)
+#define KERNEL_HWCAP_FLAGM __khwcap_feature(FLAGM)
+#define KERNEL_HWCAP_SSBS __khwcap_feature(SSBS)
+#define KERNEL_HWCAP_SB __khwcap_feature(SB)
+#define KERNEL_HWCAP_PACA __khwcap_feature(PACA)
+#define KERNEL_HWCAP_PACG __khwcap_feature(PACG)
+#define KERNEL_HWCAP_GCS __khwcap_feature(GCS)
+#define KERNEL_HWCAP_CMPBR __khwcap_feature(CMPBR)
+#define KERNEL_HWCAP_FPRCVT __khwcap_feature(FPRCVT)
+#define KERNEL_HWCAP_F8MM8 __khwcap_feature(F8MM8)
+#define KERNEL_HWCAP_F8MM4 __khwcap_feature(F8MM4)
+#define KERNEL_HWCAP_SVE_F16MM __khwcap_feature(SVE_F16MM)
+#define KERNEL_HWCAP_SVE_ELTPERM __khwcap_feature(SVE_ELTPERM)
+#define KERNEL_HWCAP_SVE_AES2 __khwcap_feature(SVE_AES2)
+#define KERNEL_HWCAP_SVE_BFSCALE __khwcap_feature(SVE_BFSCALE)
+#define KERNEL_HWCAP_SVE2P2 __khwcap_feature(SVE2P2)
+#define KERNEL_HWCAP_SME2P2 __khwcap_feature(SME2P2)
+#define KERNEL_HWCAP_SME_SBITPERM __khwcap_feature(SME_SBITPERM)
+#define KERNEL_HWCAP_SME_AES __khwcap_feature(SME_AES)
+#define KERNEL_HWCAP_SME_SFEXPA __khwcap_feature(SME_SFEXPA)
+#define KERNEL_HWCAP_SME_STMOP __khwcap_feature(SME_STMOP)
+#define KERNEL_HWCAP_SME_SMOP4 __khwcap_feature(SME_SMOP4)
+
+#define __khwcap2_feature(x) (const_ilog2(HWCAP2_ ## x) + 64)
+#define KERNEL_HWCAP_DCPODP __khwcap2_feature(DCPODP)
+#define KERNEL_HWCAP_SVE2 __khwcap2_feature(SVE2)
+#define KERNEL_HWCAP_SVEAES __khwcap2_feature(SVEAES)
+#define KERNEL_HWCAP_SVEPMULL __khwcap2_feature(SVEPMULL)
+#define KERNEL_HWCAP_SVEBITPERM __khwcap2_feature(SVEBITPERM)
+#define KERNEL_HWCAP_SVESHA3 __khwcap2_feature(SVESHA3)
+#define KERNEL_HWCAP_SVESM4 __khwcap2_feature(SVESM4)
+#define KERNEL_HWCAP_FLAGM2 __khwcap2_feature(FLAGM2)
+#define KERNEL_HWCAP_FRINT __khwcap2_feature(FRINT)
+#define KERNEL_HWCAP_SVEI8MM __khwcap2_feature(SVEI8MM)
+#define KERNEL_HWCAP_SVEF32MM __khwcap2_feature(SVEF32MM)
+#define KERNEL_HWCAP_SVEF64MM __khwcap2_feature(SVEF64MM)
+#define KERNEL_HWCAP_SVEBF16 __khwcap2_feature(SVEBF16)
+#define KERNEL_HWCAP_I8MM __khwcap2_feature(I8MM)
+#define KERNEL_HWCAP_BF16 __khwcap2_feature(BF16)
+#define KERNEL_HWCAP_DGH __khwcap2_feature(DGH)
+#define KERNEL_HWCAP_RNG __khwcap2_feature(RNG)
+#define KERNEL_HWCAP_BTI __khwcap2_feature(BTI)
+#define KERNEL_HWCAP_MTE __khwcap2_feature(MTE)
+#define KERNEL_HWCAP_ECV __khwcap2_feature(ECV)
+#define KERNEL_HWCAP_AFP __khwcap2_feature(AFP)
+#define KERNEL_HWCAP_RPRES __khwcap2_feature(RPRES)
+#define KERNEL_HWCAP_MTE3 __khwcap2_feature(MTE3)
+#define KERNEL_HWCAP_SME __khwcap2_feature(SME)
+#define KERNEL_HWCAP_SME_I16I64 __khwcap2_feature(SME_I16I64)
+#define KERNEL_HWCAP_SME_F64F64 __khwcap2_feature(SME_F64F64)
+#define KERNEL_HWCAP_SME_I8I32 __khwcap2_feature(SME_I8I32)
+#define KERNEL_HWCAP_SME_F16F32 __khwcap2_feature(SME_F16F32)
+#define KERNEL_HWCAP_SME_B16F32 __khwcap2_feature(SME_B16F32)
+#define KERNEL_HWCAP_SME_F32F32 __khwcap2_feature(SME_F32F32)
+#define KERNEL_HWCAP_SME_FA64 __khwcap2_feature(SME_FA64)
+#define KERNEL_HWCAP_WFXT __khwcap2_feature(WFXT)
+#define KERNEL_HWCAP_EBF16 __khwcap2_feature(EBF16)
+#define KERNEL_HWCAP_SVE_EBF16 __khwcap2_feature(SVE_EBF16)
+#define KERNEL_HWCAP_CSSC __khwcap2_feature(CSSC)
+#define KERNEL_HWCAP_RPRFM __khwcap2_feature(RPRFM)
+#define KERNEL_HWCAP_SVE2P1 __khwcap2_feature(SVE2P1)
+#define KERNEL_HWCAP_SME2 __khwcap2_feature(SME2)
+#define KERNEL_HWCAP_SME2P1 __khwcap2_feature(SME2P1)
+#define KERNEL_HWCAP_SME_I16I32 __khwcap2_feature(SME_I16I32)
+#define KERNEL_HWCAP_SME_BI32I32 __khwcap2_feature(SME_BI32I32)
+#define KERNEL_HWCAP_SME_B16B16 __khwcap2_feature(SME_B16B16)
+#define KERNEL_HWCAP_SME_F16F16 __khwcap2_feature(SME_F16F16)
+#define KERNEL_HWCAP_MOPS __khwcap2_feature(MOPS)
+#define KERNEL_HWCAP_HBC __khwcap2_feature(HBC)
+#define KERNEL_HWCAP_SVE_B16B16 __khwcap2_feature(SVE_B16B16)
+#define KERNEL_HWCAP_LRCPC3 __khwcap2_feature(LRCPC3)
+#define KERNEL_HWCAP_LSE128 __khwcap2_feature(LSE128)
+#define KERNEL_HWCAP_FPMR __khwcap2_feature(FPMR)
+#define KERNEL_HWCAP_LUT __khwcap2_feature(LUT)
+#define KERNEL_HWCAP_FAMINMAX __khwcap2_feature(FAMINMAX)
+#define KERNEL_HWCAP_F8CVT __khwcap2_feature(F8CVT)
+#define KERNEL_HWCAP_F8FMA __khwcap2_feature(F8FMA)
+#define KERNEL_HWCAP_F8DP4 __khwcap2_feature(F8DP4)
+#define KERNEL_HWCAP_F8DP2 __khwcap2_feature(F8DP2)
+#define KERNEL_HWCAP_F8E4M3 __khwcap2_feature(F8E4M3)
+#define KERNEL_HWCAP_F8E5M2 __khwcap2_feature(F8E5M2)
+#define KERNEL_HWCAP_SME_LUTV2 __khwcap2_feature(SME_LUTV2)
+#define KERNEL_HWCAP_SME_F8F16 __khwcap2_feature(SME_F8F16)
+#define KERNEL_HWCAP_SME_F8F32 __khwcap2_feature(SME_F8F32)
+#define KERNEL_HWCAP_SME_SF8FMA __khwcap2_feature(SME_SF8FMA)
+#define KERNEL_HWCAP_SME_SF8DP4 __khwcap2_feature(SME_SF8DP4)
+#define KERNEL_HWCAP_SME_SF8DP2 __khwcap2_feature(SME_SF8DP2)
+#define KERNEL_HWCAP_POE __khwcap2_feature(POE)
+
+#define __khwcap3_feature(x) (const_ilog2(HWCAP3_ ## x) + 128)
+#define KERNEL_HWCAP_MTE_FAR __khwcap3_feature(MTE_FAR)
+#define KERNEL_HWCAP_MTE_STORE_ONLY __khwcap3_feature(MTE_STORE_ONLY)
+#define KERNEL_HWCAP_LSFE __khwcap3_feature(LSFE)
-#ifndef __ASSEMBLY__
/*
* This yields a mask that user programs can use to figure out what
* instruction set this cpu supports.
*/
-#define ELF_HWCAP (elf_hwcap)
-#define COMPAT_ELF_HWCAP (COMPAT_HWCAP_HALF|COMPAT_HWCAP_THUMB|\
- COMPAT_HWCAP_FAST_MULT|COMPAT_HWCAP_EDSP|\
- COMPAT_HWCAP_TLS|COMPAT_HWCAP_VFP|\
- COMPAT_HWCAP_VFPv3|COMPAT_HWCAP_VFPv4|\
- COMPAT_HWCAP_NEON|COMPAT_HWCAP_IDIV)
-
-extern unsigned int elf_hwcap;
+#define ELF_HWCAP cpu_get_elf_hwcap()
+#define ELF_HWCAP2 cpu_get_elf_hwcap2()
+#define ELF_HWCAP3 cpu_get_elf_hwcap3()
+
+#ifdef CONFIG_COMPAT
+#define COMPAT_ELF_HWCAP (compat_elf_hwcap)
+#define COMPAT_ELF_HWCAP2 (compat_elf_hwcap2)
+#define COMPAT_ELF_HWCAP3 (compat_elf_hwcap3)
+extern unsigned int compat_elf_hwcap, compat_elf_hwcap2, compat_elf_hwcap3;
+#endif
+
+enum {
+ CAP_HWCAP = 1,
+#ifdef CONFIG_COMPAT
+ CAP_COMPAT_HWCAP,
+ CAP_COMPAT_HWCAP2,
+#endif
+};
+
#endif
#endif
diff --git a/arch/arm64/include/asm/hyp_image.h b/arch/arm64/include/asm/hyp_image.h
new file mode 100644
index 000000000000..b4b3076a76fb
--- /dev/null
+++ b/arch/arm64/include/asm/hyp_image.h
@@ -0,0 +1,68 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2020 Google LLC.
+ * Written by David Brazdil <dbrazdil@google.com>
+ */
+
+#ifndef __ARM64_HYP_IMAGE_H__
+#define __ARM64_HYP_IMAGE_H__
+
+#define __HYP_CONCAT(a, b) a ## b
+#define HYP_CONCAT(a, b) __HYP_CONCAT(a, b)
+
+#ifndef __KVM_NVHE_HYPERVISOR__
+/*
+ * KVM nVHE code has its own symbol namespace prefixed with __kvm_nvhe_,
+ * to separate it from the kernel proper.
+ */
+#define kvm_nvhe_sym(sym) __kvm_nvhe_##sym
+#else
+#define kvm_nvhe_sym(sym) sym
+#endif
+
+#ifdef LINKER_SCRIPT
+
+/*
+ * KVM nVHE ELF section names are prefixed with .hyp, to separate them
+ * from the kernel proper.
+ */
+#define HYP_SECTION_NAME(NAME) .hyp##NAME
+
+/* Symbol defined at the beginning of each hyp section. */
+#define HYP_SECTION_SYMBOL_NAME(NAME) \
+ HYP_CONCAT(__hyp_section_, HYP_SECTION_NAME(NAME))
+
+/*
+ * Helper to generate linker script statements starting a hyp section.
+ *
+ * A symbol with a well-known name is defined at the first byte. This
+ * is used as a base for hyp relocations (see gen-hyprel.c). It must
+ * be defined inside the section so the linker of `vmlinux` cannot
+ * separate it from the section data.
+ */
+#define BEGIN_HYP_SECTION(NAME) \
+ HYP_SECTION_NAME(NAME) : { \
+ HYP_SECTION_SYMBOL_NAME(NAME) = .;
+
+/* Helper to generate linker script statements ending a hyp section. */
+#define END_HYP_SECTION \
+ }
+
+/* Defines an ELF hyp section from input section @NAME and its subsections. */
+#define HYP_SECTION(NAME) \
+ BEGIN_HYP_SECTION(NAME) \
+ *(NAME NAME##.*) \
+ END_HYP_SECTION
+
+/*
+ * Defines a linker script alias of a kernel-proper symbol referenced by
+ * KVM nVHE hyp code.
+ */
+#define KVM_NVHE_ALIAS(sym) kvm_nvhe_sym(sym) = sym;
+
+/* Defines a linker script alias for KVM nVHE hyp symbols */
+#define KVM_NVHE_ALIAS_HYP(first, sec) kvm_nvhe_sym(first) = kvm_nvhe_sym(sec);
+
+#endif /* LINKER_SCRIPT */
+
+#endif /* __ARM64_HYP_IMAGE_H__ */
diff --git a/arch/arm64/include/asm/hypervisor.h b/arch/arm64/include/asm/hypervisor.h
index d2c79049ff11..a12fd897c877 100644
--- a/arch/arm64/include/asm/hypervisor.h
+++ b/arch/arm64/include/asm/hypervisor.h
@@ -1,6 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_ARM64_HYPERVISOR_H
#define _ASM_ARM64_HYPERVISOR_H
#include <asm/xen/hypervisor.h>
+void kvm_init_hyp_services(void);
+bool kvm_arm_hyp_service_available(u32 func_id);
+void kvm_arm_target_impl_cpu_init(void);
+
+#ifdef CONFIG_ARM_PKVM_GUEST
+void pkvm_init_hyp_services(void);
+#else
+static inline void pkvm_init_hyp_services(void) { };
+#endif
+
+static inline void kvm_arch_init_hyp_services(void)
+{
+ pkvm_init_hyp_services();
+};
+
#endif
diff --git a/arch/arm64/include/asm/image.h b/arch/arm64/include/asm/image.h
new file mode 100644
index 000000000000..9ba85173f857
--- /dev/null
+++ b/arch/arm64/include/asm/image.h
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __ASM_IMAGE_H
+#define __ASM_IMAGE_H
+
+#define ARM64_IMAGE_MAGIC "ARM\x64"
+
+#define ARM64_IMAGE_FLAG_BE_SHIFT 0
+#define ARM64_IMAGE_FLAG_PAGE_SIZE_SHIFT (ARM64_IMAGE_FLAG_BE_SHIFT + 1)
+#define ARM64_IMAGE_FLAG_PHYS_BASE_SHIFT \
+ (ARM64_IMAGE_FLAG_PAGE_SIZE_SHIFT + 2)
+#define ARM64_IMAGE_FLAG_BE_MASK 0x1
+#define ARM64_IMAGE_FLAG_PAGE_SIZE_MASK 0x3
+#define ARM64_IMAGE_FLAG_PHYS_BASE_MASK 0x1
+
+#define ARM64_IMAGE_FLAG_LE 0
+#define ARM64_IMAGE_FLAG_BE 1
+#define ARM64_IMAGE_FLAG_PAGE_SIZE_4K 1
+#define ARM64_IMAGE_FLAG_PAGE_SIZE_16K 2
+#define ARM64_IMAGE_FLAG_PAGE_SIZE_64K 3
+#define ARM64_IMAGE_FLAG_PHYS_BASE 1
+
+#ifndef __ASSEMBLER__
+
+#define arm64_image_flag_field(flags, field) \
+ (((flags) >> field##_SHIFT) & field##_MASK)
+
+/*
+ * struct arm64_image_header - arm64 kernel image header
+ * See Documentation/arch/arm64/booting.rst for details
+ *
+ * @code0: Executable code, or
+ * @mz_header alternatively used for part of MZ header
+ * @code1: Executable code
+ * @text_offset: Image load offset
+ * @image_size: Effective Image size
+ * @flags: kernel flags
+ * @reserved: reserved
+ * @magic: Magic number
+ * @reserved5: reserved, or
+ * @pe_header: alternatively used for PE COFF offset
+ */
+
+struct arm64_image_header {
+ __le32 code0;
+ __le32 code1;
+ __le64 text_offset;
+ __le64 image_size;
+ __le64 flags;
+ __le64 res2;
+ __le64 res3;
+ __le64 res4;
+ __le32 magic;
+ __le32 res5;
+};
+
+#endif /* __ASSEMBLER__ */
+
+#endif /* __ASM_IMAGE_H */
diff --git a/arch/arm64/include/asm/insn-def.h b/arch/arm64/include/asm/insn-def.h
new file mode 100644
index 000000000000..1a7d0d483698
--- /dev/null
+++ b/arch/arm64/include/asm/insn-def.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef __ASM_INSN_DEF_H
+#define __ASM_INSN_DEF_H
+
+#include <asm/brk-imm.h>
+
+/* A64 instructions are always 32 bits. */
+#define AARCH64_INSN_SIZE 4
+
+/*
+ * BRK instruction encoding
+ * The #imm16 value should be placed at bits[20:5] within BRK ins
+ */
+#define AARCH64_BREAK_MON 0xd4200000
+
+/*
+ * BRK instruction for provoking a fault on purpose
+ * Unlike kgdb, #imm16 value with unallocated handler is used for faulting.
+ */
+#define AARCH64_BREAK_FAULT (AARCH64_BREAK_MON | (FAULT_BRK_IMM << 5))
+
+#endif /* __ASM_INSN_DEF_H */
diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h
new file mode 100644
index 000000000000..e1d30ba99d01
--- /dev/null
+++ b/arch/arm64/include/asm/insn.h
@@ -0,0 +1,735 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2013 Huawei Ltd.
+ * Author: Jiang Liu <liuj97@gmail.com>
+ *
+ * Copyright (C) 2014 Zi Shen Lim <zlim.lnx@gmail.com>
+ */
+#ifndef __ASM_INSN_H
+#define __ASM_INSN_H
+#include <linux/build_bug.h>
+#include <linux/types.h>
+
+#include <asm/insn-def.h>
+
+#ifndef __ASSEMBLER__
+
+enum aarch64_insn_hint_cr_op {
+ AARCH64_INSN_HINT_NOP = 0x0 << 5,
+ AARCH64_INSN_HINT_YIELD = 0x1 << 5,
+ AARCH64_INSN_HINT_WFE = 0x2 << 5,
+ AARCH64_INSN_HINT_WFI = 0x3 << 5,
+ AARCH64_INSN_HINT_SEV = 0x4 << 5,
+ AARCH64_INSN_HINT_SEVL = 0x5 << 5,
+
+ AARCH64_INSN_HINT_XPACLRI = 0x07 << 5,
+ AARCH64_INSN_HINT_PACIA_1716 = 0x08 << 5,
+ AARCH64_INSN_HINT_PACIB_1716 = 0x0A << 5,
+ AARCH64_INSN_HINT_AUTIA_1716 = 0x0C << 5,
+ AARCH64_INSN_HINT_AUTIB_1716 = 0x0E << 5,
+ AARCH64_INSN_HINT_PACIAZ = 0x18 << 5,
+ AARCH64_INSN_HINT_PACIASP = 0x19 << 5,
+ AARCH64_INSN_HINT_PACIBZ = 0x1A << 5,
+ AARCH64_INSN_HINT_PACIBSP = 0x1B << 5,
+ AARCH64_INSN_HINT_AUTIAZ = 0x1C << 5,
+ AARCH64_INSN_HINT_AUTIASP = 0x1D << 5,
+ AARCH64_INSN_HINT_AUTIBZ = 0x1E << 5,
+ AARCH64_INSN_HINT_AUTIBSP = 0x1F << 5,
+
+ AARCH64_INSN_HINT_ESB = 0x10 << 5,
+ AARCH64_INSN_HINT_PSB = 0x11 << 5,
+ AARCH64_INSN_HINT_TSB = 0x12 << 5,
+ AARCH64_INSN_HINT_CSDB = 0x14 << 5,
+ AARCH64_INSN_HINT_CLEARBHB = 0x16 << 5,
+
+ AARCH64_INSN_HINT_BTI = 0x20 << 5,
+ AARCH64_INSN_HINT_BTIC = 0x22 << 5,
+ AARCH64_INSN_HINT_BTIJ = 0x24 << 5,
+ AARCH64_INSN_HINT_BTIJC = 0x26 << 5,
+};
+
+enum aarch64_insn_imm_type {
+ AARCH64_INSN_IMM_ADR,
+ AARCH64_INSN_IMM_26,
+ AARCH64_INSN_IMM_19,
+ AARCH64_INSN_IMM_16,
+ AARCH64_INSN_IMM_14,
+ AARCH64_INSN_IMM_12,
+ AARCH64_INSN_IMM_9,
+ AARCH64_INSN_IMM_7,
+ AARCH64_INSN_IMM_6,
+ AARCH64_INSN_IMM_S,
+ AARCH64_INSN_IMM_R,
+ AARCH64_INSN_IMM_N,
+ AARCH64_INSN_IMM_MAX
+};
+
+enum aarch64_insn_register_type {
+ AARCH64_INSN_REGTYPE_RT,
+ AARCH64_INSN_REGTYPE_RN,
+ AARCH64_INSN_REGTYPE_RT2,
+ AARCH64_INSN_REGTYPE_RM,
+ AARCH64_INSN_REGTYPE_RD,
+ AARCH64_INSN_REGTYPE_RA,
+ AARCH64_INSN_REGTYPE_RS,
+};
+
+enum aarch64_insn_register {
+ AARCH64_INSN_REG_0 = 0,
+ AARCH64_INSN_REG_1 = 1,
+ AARCH64_INSN_REG_2 = 2,
+ AARCH64_INSN_REG_3 = 3,
+ AARCH64_INSN_REG_4 = 4,
+ AARCH64_INSN_REG_5 = 5,
+ AARCH64_INSN_REG_6 = 6,
+ AARCH64_INSN_REG_7 = 7,
+ AARCH64_INSN_REG_8 = 8,
+ AARCH64_INSN_REG_9 = 9,
+ AARCH64_INSN_REG_10 = 10,
+ AARCH64_INSN_REG_11 = 11,
+ AARCH64_INSN_REG_12 = 12,
+ AARCH64_INSN_REG_13 = 13,
+ AARCH64_INSN_REG_14 = 14,
+ AARCH64_INSN_REG_15 = 15,
+ AARCH64_INSN_REG_16 = 16,
+ AARCH64_INSN_REG_17 = 17,
+ AARCH64_INSN_REG_18 = 18,
+ AARCH64_INSN_REG_19 = 19,
+ AARCH64_INSN_REG_20 = 20,
+ AARCH64_INSN_REG_21 = 21,
+ AARCH64_INSN_REG_22 = 22,
+ AARCH64_INSN_REG_23 = 23,
+ AARCH64_INSN_REG_24 = 24,
+ AARCH64_INSN_REG_25 = 25,
+ AARCH64_INSN_REG_26 = 26,
+ AARCH64_INSN_REG_27 = 27,
+ AARCH64_INSN_REG_28 = 28,
+ AARCH64_INSN_REG_29 = 29,
+ AARCH64_INSN_REG_FP = 29, /* Frame pointer */
+ AARCH64_INSN_REG_30 = 30,
+ AARCH64_INSN_REG_LR = 30, /* Link register */
+ AARCH64_INSN_REG_ZR = 31, /* Zero: as source register */
+ AARCH64_INSN_REG_SP = 31 /* Stack pointer: as load/store base reg */
+};
+
+enum aarch64_insn_special_register {
+ AARCH64_INSN_SPCLREG_SPSR_EL1 = 0xC200,
+ AARCH64_INSN_SPCLREG_ELR_EL1 = 0xC201,
+ AARCH64_INSN_SPCLREG_SP_EL0 = 0xC208,
+ AARCH64_INSN_SPCLREG_SPSEL = 0xC210,
+ AARCH64_INSN_SPCLREG_CURRENTEL = 0xC212,
+ AARCH64_INSN_SPCLREG_DAIF = 0xDA11,
+ AARCH64_INSN_SPCLREG_NZCV = 0xDA10,
+ AARCH64_INSN_SPCLREG_FPCR = 0xDA20,
+ AARCH64_INSN_SPCLREG_DSPSR_EL0 = 0xDA28,
+ AARCH64_INSN_SPCLREG_DLR_EL0 = 0xDA29,
+ AARCH64_INSN_SPCLREG_SPSR_EL2 = 0xE200,
+ AARCH64_INSN_SPCLREG_ELR_EL2 = 0xE201,
+ AARCH64_INSN_SPCLREG_SP_EL1 = 0xE208,
+ AARCH64_INSN_SPCLREG_SPSR_INQ = 0xE218,
+ AARCH64_INSN_SPCLREG_SPSR_ABT = 0xE219,
+ AARCH64_INSN_SPCLREG_SPSR_UND = 0xE21A,
+ AARCH64_INSN_SPCLREG_SPSR_FIQ = 0xE21B,
+ AARCH64_INSN_SPCLREG_SPSR_EL3 = 0xF200,
+ AARCH64_INSN_SPCLREG_ELR_EL3 = 0xF201,
+ AARCH64_INSN_SPCLREG_SP_EL2 = 0xF210
+};
+
+enum aarch64_insn_system_register {
+ AARCH64_INSN_SYSREG_TPIDR_EL1 = 0x4684,
+ AARCH64_INSN_SYSREG_TPIDR_EL2 = 0x6682,
+ AARCH64_INSN_SYSREG_SP_EL0 = 0x4208,
+};
+
+enum aarch64_insn_variant {
+ AARCH64_INSN_VARIANT_32BIT,
+ AARCH64_INSN_VARIANT_64BIT
+};
+
+enum aarch64_insn_condition {
+ AARCH64_INSN_COND_EQ = 0x0, /* == */
+ AARCH64_INSN_COND_NE = 0x1, /* != */
+ AARCH64_INSN_COND_CS = 0x2, /* unsigned >= */
+ AARCH64_INSN_COND_CC = 0x3, /* unsigned < */
+ AARCH64_INSN_COND_MI = 0x4, /* < 0 */
+ AARCH64_INSN_COND_PL = 0x5, /* >= 0 */
+ AARCH64_INSN_COND_VS = 0x6, /* overflow */
+ AARCH64_INSN_COND_VC = 0x7, /* no overflow */
+ AARCH64_INSN_COND_HI = 0x8, /* unsigned > */
+ AARCH64_INSN_COND_LS = 0x9, /* unsigned <= */
+ AARCH64_INSN_COND_GE = 0xa, /* signed >= */
+ AARCH64_INSN_COND_LT = 0xb, /* signed < */
+ AARCH64_INSN_COND_GT = 0xc, /* signed > */
+ AARCH64_INSN_COND_LE = 0xd, /* signed <= */
+ AARCH64_INSN_COND_AL = 0xe, /* always */
+};
+
+enum aarch64_insn_branch_type {
+ AARCH64_INSN_BRANCH_NOLINK,
+ AARCH64_INSN_BRANCH_LINK,
+ AARCH64_INSN_BRANCH_RETURN,
+ AARCH64_INSN_BRANCH_COMP_ZERO,
+ AARCH64_INSN_BRANCH_COMP_NONZERO,
+};
+
+enum aarch64_insn_size_type {
+ AARCH64_INSN_SIZE_8,
+ AARCH64_INSN_SIZE_16,
+ AARCH64_INSN_SIZE_32,
+ AARCH64_INSN_SIZE_64,
+};
+
+enum aarch64_insn_ldst_type {
+ AARCH64_INSN_LDST_LOAD_REG_OFFSET,
+ AARCH64_INSN_LDST_STORE_REG_OFFSET,
+ AARCH64_INSN_LDST_LOAD_IMM_OFFSET,
+ AARCH64_INSN_LDST_STORE_IMM_OFFSET,
+ AARCH64_INSN_LDST_LOAD_PAIR_PRE_INDEX,
+ AARCH64_INSN_LDST_STORE_PAIR_PRE_INDEX,
+ AARCH64_INSN_LDST_LOAD_PAIR_POST_INDEX,
+ AARCH64_INSN_LDST_STORE_PAIR_POST_INDEX,
+ AARCH64_INSN_LDST_LOAD_ACQ,
+ AARCH64_INSN_LDST_LOAD_EX,
+ AARCH64_INSN_LDST_LOAD_ACQ_EX,
+ AARCH64_INSN_LDST_STORE_REL,
+ AARCH64_INSN_LDST_STORE_EX,
+ AARCH64_INSN_LDST_STORE_REL_EX,
+ AARCH64_INSN_LDST_SIGNED_LOAD_IMM_OFFSET,
+ AARCH64_INSN_LDST_SIGNED_LOAD_REG_OFFSET,
+};
+
+enum aarch64_insn_adsb_type {
+ AARCH64_INSN_ADSB_ADD,
+ AARCH64_INSN_ADSB_SUB,
+ AARCH64_INSN_ADSB_ADD_SETFLAGS,
+ AARCH64_INSN_ADSB_SUB_SETFLAGS
+};
+
+enum aarch64_insn_movewide_type {
+ AARCH64_INSN_MOVEWIDE_ZERO,
+ AARCH64_INSN_MOVEWIDE_KEEP,
+ AARCH64_INSN_MOVEWIDE_INVERSE
+};
+
+enum aarch64_insn_bitfield_type {
+ AARCH64_INSN_BITFIELD_MOVE,
+ AARCH64_INSN_BITFIELD_MOVE_UNSIGNED,
+ AARCH64_INSN_BITFIELD_MOVE_SIGNED
+};
+
+enum aarch64_insn_data1_type {
+ AARCH64_INSN_DATA1_REVERSE_16,
+ AARCH64_INSN_DATA1_REVERSE_32,
+ AARCH64_INSN_DATA1_REVERSE_64,
+};
+
+enum aarch64_insn_data2_type {
+ AARCH64_INSN_DATA2_UDIV,
+ AARCH64_INSN_DATA2_SDIV,
+ AARCH64_INSN_DATA2_LSLV,
+ AARCH64_INSN_DATA2_LSRV,
+ AARCH64_INSN_DATA2_ASRV,
+ AARCH64_INSN_DATA2_RORV,
+};
+
+enum aarch64_insn_data3_type {
+ AARCH64_INSN_DATA3_MADD,
+ AARCH64_INSN_DATA3_MSUB,
+};
+
+enum aarch64_insn_logic_type {
+ AARCH64_INSN_LOGIC_AND,
+ AARCH64_INSN_LOGIC_BIC,
+ AARCH64_INSN_LOGIC_ORR,
+ AARCH64_INSN_LOGIC_ORN,
+ AARCH64_INSN_LOGIC_EOR,
+ AARCH64_INSN_LOGIC_EON,
+ AARCH64_INSN_LOGIC_AND_SETFLAGS,
+ AARCH64_INSN_LOGIC_BIC_SETFLAGS
+};
+
+enum aarch64_insn_prfm_type {
+ AARCH64_INSN_PRFM_TYPE_PLD,
+ AARCH64_INSN_PRFM_TYPE_PLI,
+ AARCH64_INSN_PRFM_TYPE_PST,
+};
+
+enum aarch64_insn_prfm_target {
+ AARCH64_INSN_PRFM_TARGET_L1,
+ AARCH64_INSN_PRFM_TARGET_L2,
+ AARCH64_INSN_PRFM_TARGET_L3,
+};
+
+enum aarch64_insn_prfm_policy {
+ AARCH64_INSN_PRFM_POLICY_KEEP,
+ AARCH64_INSN_PRFM_POLICY_STRM,
+};
+
+enum aarch64_insn_adr_type {
+ AARCH64_INSN_ADR_TYPE_ADRP,
+ AARCH64_INSN_ADR_TYPE_ADR,
+};
+
+enum aarch64_insn_mem_atomic_op {
+ AARCH64_INSN_MEM_ATOMIC_ADD,
+ AARCH64_INSN_MEM_ATOMIC_CLR,
+ AARCH64_INSN_MEM_ATOMIC_EOR,
+ AARCH64_INSN_MEM_ATOMIC_SET,
+ AARCH64_INSN_MEM_ATOMIC_SWP,
+};
+
+enum aarch64_insn_mem_order_type {
+ AARCH64_INSN_MEM_ORDER_NONE,
+ AARCH64_INSN_MEM_ORDER_ACQ,
+ AARCH64_INSN_MEM_ORDER_REL,
+ AARCH64_INSN_MEM_ORDER_ACQREL,
+};
+
+enum aarch64_insn_mb_type {
+ AARCH64_INSN_MB_SY,
+ AARCH64_INSN_MB_ST,
+ AARCH64_INSN_MB_LD,
+ AARCH64_INSN_MB_ISH,
+ AARCH64_INSN_MB_ISHST,
+ AARCH64_INSN_MB_ISHLD,
+ AARCH64_INSN_MB_NSH,
+ AARCH64_INSN_MB_NSHST,
+ AARCH64_INSN_MB_NSHLD,
+ AARCH64_INSN_MB_OSH,
+ AARCH64_INSN_MB_OSHST,
+ AARCH64_INSN_MB_OSHLD,
+};
+
+#define __AARCH64_INSN_FUNCS(abbr, mask, val) \
+static __always_inline bool aarch64_insn_is_##abbr(u32 code) \
+{ \
+ BUILD_BUG_ON(~(mask) & (val)); \
+ return (code & (mask)) == (val); \
+} \
+static __always_inline u32 aarch64_insn_get_##abbr##_value(void) \
+{ \
+ return (val); \
+}
+
+/*
+ * ARM Architecture Reference Manual for ARMv8 Profile-A, Issue A.a
+ * Section C3.1 "A64 instruction index by encoding":
+ * AArch64 main encoding table
+ * Bit position
+ * 28 27 26 25 Encoding Group
+ * 0 0 - - Unallocated
+ * 1 0 0 - Data processing, immediate
+ * 1 0 1 - Branch, exception generation and system instructions
+ * - 1 - 0 Loads and stores
+ * - 1 0 1 Data processing - register
+ * 0 1 1 1 Data processing - SIMD and floating point
+ * 1 1 1 1 Data processing - SIMD and floating point
+ * "-" means "don't care"
+ */
+__AARCH64_INSN_FUNCS(class_branch_sys, 0x1c000000, 0x14000000)
+
+__AARCH64_INSN_FUNCS(adr, 0x9F000000, 0x10000000)
+__AARCH64_INSN_FUNCS(adrp, 0x9F000000, 0x90000000)
+__AARCH64_INSN_FUNCS(prfm, 0x3FC00000, 0x39800000)
+__AARCH64_INSN_FUNCS(prfm_lit, 0xFF000000, 0xD8000000)
+__AARCH64_INSN_FUNCS(store_imm, 0x3FC00000, 0x39000000)
+__AARCH64_INSN_FUNCS(load_imm, 0x3FC00000, 0x39400000)
+__AARCH64_INSN_FUNCS(signed_load_imm, 0X3FC00000, 0x39800000)
+__AARCH64_INSN_FUNCS(store_pre, 0x3FE00C00, 0x38000C00)
+__AARCH64_INSN_FUNCS(load_pre, 0x3FE00C00, 0x38400C00)
+__AARCH64_INSN_FUNCS(store_post, 0x3FE00C00, 0x38000400)
+__AARCH64_INSN_FUNCS(load_post, 0x3FE00C00, 0x38400400)
+__AARCH64_INSN_FUNCS(str_reg, 0x3FE0EC00, 0x38206800)
+__AARCH64_INSN_FUNCS(str_imm, 0x3FC00000, 0x39000000)
+__AARCH64_INSN_FUNCS(ldadd, 0x3F20FC00, 0x38200000)
+__AARCH64_INSN_FUNCS(ldclr, 0x3F20FC00, 0x38201000)
+__AARCH64_INSN_FUNCS(ldeor, 0x3F20FC00, 0x38202000)
+__AARCH64_INSN_FUNCS(ldset, 0x3F20FC00, 0x38203000)
+__AARCH64_INSN_FUNCS(swp, 0x3F20FC00, 0x38208000)
+__AARCH64_INSN_FUNCS(cas, 0x3FA07C00, 0x08A07C00)
+__AARCH64_INSN_FUNCS(ldr_reg, 0x3FE0EC00, 0x38606800)
+__AARCH64_INSN_FUNCS(signed_ldr_reg, 0X3FE0FC00, 0x38A0E800)
+__AARCH64_INSN_FUNCS(ldr_imm, 0x3FC00000, 0x39400000)
+__AARCH64_INSN_FUNCS(ldr_lit, 0xBF000000, 0x18000000)
+__AARCH64_INSN_FUNCS(ldrsw_lit, 0xFF000000, 0x98000000)
+__AARCH64_INSN_FUNCS(exclusive, 0x3F800000, 0x08000000)
+__AARCH64_INSN_FUNCS(load_acq, 0x3FDFFC00, 0x08DFFC00)
+__AARCH64_INSN_FUNCS(store_rel, 0x3FDFFC00, 0x089FFC00)
+__AARCH64_INSN_FUNCS(load_ex, 0x3FC00000, 0x08400000)
+__AARCH64_INSN_FUNCS(store_ex, 0x3FC00000, 0x08000000)
+__AARCH64_INSN_FUNCS(mops, 0x3B200C00, 0x19000400)
+__AARCH64_INSN_FUNCS(stp, 0x7FC00000, 0x29000000)
+__AARCH64_INSN_FUNCS(ldp, 0x7FC00000, 0x29400000)
+__AARCH64_INSN_FUNCS(stp_post, 0x7FC00000, 0x28800000)
+__AARCH64_INSN_FUNCS(ldp_post, 0x7FC00000, 0x28C00000)
+__AARCH64_INSN_FUNCS(stp_pre, 0x7FC00000, 0x29800000)
+__AARCH64_INSN_FUNCS(ldp_pre, 0x7FC00000, 0x29C00000)
+__AARCH64_INSN_FUNCS(add_imm, 0x7F000000, 0x11000000)
+__AARCH64_INSN_FUNCS(adds_imm, 0x7F000000, 0x31000000)
+__AARCH64_INSN_FUNCS(sub_imm, 0x7F000000, 0x51000000)
+__AARCH64_INSN_FUNCS(subs_imm, 0x7F000000, 0x71000000)
+__AARCH64_INSN_FUNCS(movn, 0x7F800000, 0x12800000)
+__AARCH64_INSN_FUNCS(sbfm, 0x7F800000, 0x13000000)
+__AARCH64_INSN_FUNCS(bfm, 0x7F800000, 0x33000000)
+__AARCH64_INSN_FUNCS(movz, 0x7F800000, 0x52800000)
+__AARCH64_INSN_FUNCS(ubfm, 0x7F800000, 0x53000000)
+__AARCH64_INSN_FUNCS(movk, 0x7F800000, 0x72800000)
+__AARCH64_INSN_FUNCS(add, 0x7F200000, 0x0B000000)
+__AARCH64_INSN_FUNCS(adds, 0x7F200000, 0x2B000000)
+__AARCH64_INSN_FUNCS(sub, 0x7F200000, 0x4B000000)
+__AARCH64_INSN_FUNCS(subs, 0x7F200000, 0x6B000000)
+__AARCH64_INSN_FUNCS(madd, 0x7FE08000, 0x1B000000)
+__AARCH64_INSN_FUNCS(msub, 0x7FE08000, 0x1B008000)
+__AARCH64_INSN_FUNCS(udiv, 0x7FE0FC00, 0x1AC00800)
+__AARCH64_INSN_FUNCS(sdiv, 0x7FE0FC00, 0x1AC00C00)
+__AARCH64_INSN_FUNCS(lslv, 0x7FE0FC00, 0x1AC02000)
+__AARCH64_INSN_FUNCS(lsrv, 0x7FE0FC00, 0x1AC02400)
+__AARCH64_INSN_FUNCS(asrv, 0x7FE0FC00, 0x1AC02800)
+__AARCH64_INSN_FUNCS(rorv, 0x7FE0FC00, 0x1AC02C00)
+__AARCH64_INSN_FUNCS(rev16, 0x7FFFFC00, 0x5AC00400)
+__AARCH64_INSN_FUNCS(rev32, 0x7FFFFC00, 0x5AC00800)
+__AARCH64_INSN_FUNCS(rev64, 0x7FFFFC00, 0x5AC00C00)
+__AARCH64_INSN_FUNCS(and, 0x7F200000, 0x0A000000)
+__AARCH64_INSN_FUNCS(bic, 0x7F200000, 0x0A200000)
+__AARCH64_INSN_FUNCS(orr, 0x7F200000, 0x2A000000)
+__AARCH64_INSN_FUNCS(mov_reg, 0x7FE0FFE0, 0x2A0003E0)
+__AARCH64_INSN_FUNCS(orn, 0x7F200000, 0x2A200000)
+__AARCH64_INSN_FUNCS(eor, 0x7F200000, 0x4A000000)
+__AARCH64_INSN_FUNCS(eon, 0x7F200000, 0x4A200000)
+__AARCH64_INSN_FUNCS(ands, 0x7F200000, 0x6A000000)
+__AARCH64_INSN_FUNCS(bics, 0x7F200000, 0x6A200000)
+__AARCH64_INSN_FUNCS(and_imm, 0x7F800000, 0x12000000)
+__AARCH64_INSN_FUNCS(orr_imm, 0x7F800000, 0x32000000)
+__AARCH64_INSN_FUNCS(eor_imm, 0x7F800000, 0x52000000)
+__AARCH64_INSN_FUNCS(ands_imm, 0x7F800000, 0x72000000)
+__AARCH64_INSN_FUNCS(extr, 0x7FA00000, 0x13800000)
+__AARCH64_INSN_FUNCS(b, 0xFC000000, 0x14000000)
+__AARCH64_INSN_FUNCS(bl, 0xFC000000, 0x94000000)
+__AARCH64_INSN_FUNCS(cbz, 0x7F000000, 0x34000000)
+__AARCH64_INSN_FUNCS(cbnz, 0x7F000000, 0x35000000)
+__AARCH64_INSN_FUNCS(tbz, 0x7F000000, 0x36000000)
+__AARCH64_INSN_FUNCS(tbnz, 0x7F000000, 0x37000000)
+__AARCH64_INSN_FUNCS(bcond, 0xFF000010, 0x54000000)
+__AARCH64_INSN_FUNCS(svc, 0xFFE0001F, 0xD4000001)
+__AARCH64_INSN_FUNCS(hvc, 0xFFE0001F, 0xD4000002)
+__AARCH64_INSN_FUNCS(smc, 0xFFE0001F, 0xD4000003)
+__AARCH64_INSN_FUNCS(brk, 0xFFE0001F, 0xD4200000)
+__AARCH64_INSN_FUNCS(exception, 0xFF000000, 0xD4000000)
+__AARCH64_INSN_FUNCS(hint, 0xFFFFF01F, 0xD503201F)
+__AARCH64_INSN_FUNCS(br, 0xFFFFFC1F, 0xD61F0000)
+__AARCH64_INSN_FUNCS(br_auth, 0xFEFFF800, 0xD61F0800)
+__AARCH64_INSN_FUNCS(blr, 0xFFFFFC1F, 0xD63F0000)
+__AARCH64_INSN_FUNCS(blr_auth, 0xFEFFF800, 0xD63F0800)
+__AARCH64_INSN_FUNCS(ret, 0xFFFFFC1F, 0xD65F0000)
+__AARCH64_INSN_FUNCS(ret_auth, 0xFFFFFBFF, 0xD65F0BFF)
+__AARCH64_INSN_FUNCS(eret, 0xFFFFFFFF, 0xD69F03E0)
+__AARCH64_INSN_FUNCS(eret_auth, 0xFFFFFBFF, 0xD69F0BFF)
+__AARCH64_INSN_FUNCS(mrs, 0xFFF00000, 0xD5300000)
+__AARCH64_INSN_FUNCS(msr_imm, 0xFFF8F01F, 0xD500401F)
+__AARCH64_INSN_FUNCS(msr_reg, 0xFFF00000, 0xD5100000)
+__AARCH64_INSN_FUNCS(dmb, 0xFFFFF0FF, 0xD50330BF)
+__AARCH64_INSN_FUNCS(dsb_base, 0xFFFFF0FF, 0xD503309F)
+__AARCH64_INSN_FUNCS(dsb_nxs, 0xFFFFF3FF, 0xD503323F)
+__AARCH64_INSN_FUNCS(isb, 0xFFFFF0FF, 0xD50330DF)
+__AARCH64_INSN_FUNCS(sb, 0xFFFFFFFF, 0xD50330FF)
+__AARCH64_INSN_FUNCS(clrex, 0xFFFFF0FF, 0xD503305F)
+__AARCH64_INSN_FUNCS(ssbb, 0xFFFFFFFF, 0xD503309F)
+__AARCH64_INSN_FUNCS(pssbb, 0xFFFFFFFF, 0xD503349F)
+__AARCH64_INSN_FUNCS(bti, 0xFFFFFF3F, 0xD503241f)
+
+#undef __AARCH64_INSN_FUNCS
+
+static __always_inline bool aarch64_insn_is_steppable_hint(u32 insn)
+{
+ if (!aarch64_insn_is_hint(insn))
+ return false;
+
+ switch (insn & 0xFE0) {
+ case AARCH64_INSN_HINT_XPACLRI:
+ case AARCH64_INSN_HINT_PACIA_1716:
+ case AARCH64_INSN_HINT_PACIB_1716:
+ case AARCH64_INSN_HINT_PACIAZ:
+ case AARCH64_INSN_HINT_PACIASP:
+ case AARCH64_INSN_HINT_PACIBZ:
+ case AARCH64_INSN_HINT_PACIBSP:
+ case AARCH64_INSN_HINT_BTI:
+ case AARCH64_INSN_HINT_BTIC:
+ case AARCH64_INSN_HINT_BTIJ:
+ case AARCH64_INSN_HINT_BTIJC:
+ case AARCH64_INSN_HINT_NOP:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static __always_inline bool aarch64_insn_is_branch(u32 insn)
+{
+ /* b, bl, cb*, tb*, ret*, b.cond, br*, blr* */
+
+ return aarch64_insn_is_b(insn) ||
+ aarch64_insn_is_bl(insn) ||
+ aarch64_insn_is_cbz(insn) ||
+ aarch64_insn_is_cbnz(insn) ||
+ aarch64_insn_is_tbz(insn) ||
+ aarch64_insn_is_tbnz(insn) ||
+ aarch64_insn_is_ret(insn) ||
+ aarch64_insn_is_ret_auth(insn) ||
+ aarch64_insn_is_br(insn) ||
+ aarch64_insn_is_br_auth(insn) ||
+ aarch64_insn_is_blr(insn) ||
+ aarch64_insn_is_blr_auth(insn) ||
+ aarch64_insn_is_bcond(insn);
+}
+
+static __always_inline bool aarch64_insn_is_branch_imm(u32 insn)
+{
+ return aarch64_insn_is_b(insn) ||
+ aarch64_insn_is_bl(insn) ||
+ aarch64_insn_is_tbz(insn) ||
+ aarch64_insn_is_tbnz(insn) ||
+ aarch64_insn_is_cbz(insn) ||
+ aarch64_insn_is_cbnz(insn) ||
+ aarch64_insn_is_bcond(insn);
+}
+
+static __always_inline bool aarch64_insn_is_adr_adrp(u32 insn)
+{
+ return aarch64_insn_is_adr(insn) ||
+ aarch64_insn_is_adrp(insn);
+}
+
+static __always_inline bool aarch64_insn_is_dsb(u32 insn)
+{
+ return aarch64_insn_is_dsb_base(insn) ||
+ aarch64_insn_is_dsb_nxs(insn);
+}
+
+static __always_inline bool aarch64_insn_is_barrier(u32 insn)
+{
+ return aarch64_insn_is_dmb(insn) ||
+ aarch64_insn_is_dsb(insn) ||
+ aarch64_insn_is_isb(insn) ||
+ aarch64_insn_is_sb(insn) ||
+ aarch64_insn_is_clrex(insn) ||
+ aarch64_insn_is_ssbb(insn) ||
+ aarch64_insn_is_pssbb(insn);
+}
+
+static __always_inline bool aarch64_insn_is_store_single(u32 insn)
+{
+ return aarch64_insn_is_store_imm(insn) ||
+ aarch64_insn_is_store_pre(insn) ||
+ aarch64_insn_is_store_post(insn);
+}
+
+static __always_inline bool aarch64_insn_is_store_pair(u32 insn)
+{
+ return aarch64_insn_is_stp(insn) ||
+ aarch64_insn_is_stp_pre(insn) ||
+ aarch64_insn_is_stp_post(insn);
+}
+
+static __always_inline bool aarch64_insn_is_load_single(u32 insn)
+{
+ return aarch64_insn_is_load_imm(insn) ||
+ aarch64_insn_is_load_pre(insn) ||
+ aarch64_insn_is_load_post(insn);
+}
+
+static __always_inline bool aarch64_insn_is_load_pair(u32 insn)
+{
+ return aarch64_insn_is_ldp(insn) ||
+ aarch64_insn_is_ldp_pre(insn) ||
+ aarch64_insn_is_ldp_post(insn);
+}
+
+static __always_inline bool aarch64_insn_uses_literal(u32 insn)
+{
+ /* ldr/ldrsw (literal), prfm */
+
+ return aarch64_insn_is_ldr_lit(insn) ||
+ aarch64_insn_is_ldrsw_lit(insn) ||
+ aarch64_insn_is_adr_adrp(insn) ||
+ aarch64_insn_is_prfm_lit(insn);
+}
+
+enum aarch64_insn_encoding_class aarch64_get_insn_class(u32 insn);
+u64 aarch64_insn_decode_immediate(enum aarch64_insn_imm_type type, u32 insn);
+u32 aarch64_insn_encode_immediate(enum aarch64_insn_imm_type type,
+ u32 insn, u64 imm);
+u32 aarch64_insn_decode_register(enum aarch64_insn_register_type type,
+ u32 insn);
+u32 aarch64_insn_gen_branch_imm(unsigned long pc, unsigned long addr,
+ enum aarch64_insn_branch_type type);
+u32 aarch64_insn_gen_comp_branch_imm(unsigned long pc, unsigned long addr,
+ enum aarch64_insn_register reg,
+ enum aarch64_insn_variant variant,
+ enum aarch64_insn_branch_type type);
+u32 aarch64_insn_gen_cond_branch_imm(unsigned long pc, unsigned long addr,
+ enum aarch64_insn_condition cond);
+
+static __always_inline u32
+aarch64_insn_gen_hint(enum aarch64_insn_hint_cr_op op)
+{
+ return aarch64_insn_get_hint_value() | op;
+}
+
+static __always_inline u32 aarch64_insn_gen_nop(void)
+{
+ return aarch64_insn_gen_hint(AARCH64_INSN_HINT_NOP);
+}
+
+static __always_inline bool aarch64_insn_is_nop(u32 insn)
+{
+ return insn == aarch64_insn_gen_nop();
+}
+
+u32 aarch64_insn_gen_branch_reg(enum aarch64_insn_register reg,
+ enum aarch64_insn_branch_type type);
+u32 aarch64_insn_gen_load_store_reg(enum aarch64_insn_register reg,
+ enum aarch64_insn_register base,
+ enum aarch64_insn_register offset,
+ enum aarch64_insn_size_type size,
+ enum aarch64_insn_ldst_type type);
+u32 aarch64_insn_gen_load_store_imm(enum aarch64_insn_register reg,
+ enum aarch64_insn_register base,
+ unsigned int imm,
+ enum aarch64_insn_size_type size,
+ enum aarch64_insn_ldst_type type);
+u32 aarch64_insn_gen_load_literal(unsigned long pc, unsigned long addr,
+ enum aarch64_insn_register reg,
+ bool is64bit);
+u32 aarch64_insn_gen_load_store_pair(enum aarch64_insn_register reg1,
+ enum aarch64_insn_register reg2,
+ enum aarch64_insn_register base,
+ int offset,
+ enum aarch64_insn_variant variant,
+ enum aarch64_insn_ldst_type type);
+u32 aarch64_insn_gen_load_acq_store_rel(enum aarch64_insn_register reg,
+ enum aarch64_insn_register base,
+ enum aarch64_insn_size_type size,
+ enum aarch64_insn_ldst_type type);
+u32 aarch64_insn_gen_load_store_ex(enum aarch64_insn_register reg,
+ enum aarch64_insn_register base,
+ enum aarch64_insn_register state,
+ enum aarch64_insn_size_type size,
+ enum aarch64_insn_ldst_type type);
+u32 aarch64_insn_gen_add_sub_imm(enum aarch64_insn_register dst,
+ enum aarch64_insn_register src,
+ int imm, enum aarch64_insn_variant variant,
+ enum aarch64_insn_adsb_type type);
+u32 aarch64_insn_gen_adr(unsigned long pc, unsigned long addr,
+ enum aarch64_insn_register reg,
+ enum aarch64_insn_adr_type type);
+u32 aarch64_insn_gen_bitfield(enum aarch64_insn_register dst,
+ enum aarch64_insn_register src,
+ int immr, int imms,
+ enum aarch64_insn_variant variant,
+ enum aarch64_insn_bitfield_type type);
+u32 aarch64_insn_gen_movewide(enum aarch64_insn_register dst,
+ int imm, int shift,
+ enum aarch64_insn_variant variant,
+ enum aarch64_insn_movewide_type type);
+u32 aarch64_insn_gen_add_sub_shifted_reg(enum aarch64_insn_register dst,
+ enum aarch64_insn_register src,
+ enum aarch64_insn_register reg,
+ int shift,
+ enum aarch64_insn_variant variant,
+ enum aarch64_insn_adsb_type type);
+u32 aarch64_insn_gen_data1(enum aarch64_insn_register dst,
+ enum aarch64_insn_register src,
+ enum aarch64_insn_variant variant,
+ enum aarch64_insn_data1_type type);
+u32 aarch64_insn_gen_data2(enum aarch64_insn_register dst,
+ enum aarch64_insn_register src,
+ enum aarch64_insn_register reg,
+ enum aarch64_insn_variant variant,
+ enum aarch64_insn_data2_type type);
+u32 aarch64_insn_gen_data3(enum aarch64_insn_register dst,
+ enum aarch64_insn_register src,
+ enum aarch64_insn_register reg1,
+ enum aarch64_insn_register reg2,
+ enum aarch64_insn_variant variant,
+ enum aarch64_insn_data3_type type);
+u32 aarch64_insn_gen_logical_shifted_reg(enum aarch64_insn_register dst,
+ enum aarch64_insn_register src,
+ enum aarch64_insn_register reg,
+ int shift,
+ enum aarch64_insn_variant variant,
+ enum aarch64_insn_logic_type type);
+u32 aarch64_insn_gen_move_reg(enum aarch64_insn_register dst,
+ enum aarch64_insn_register src,
+ enum aarch64_insn_variant variant);
+u32 aarch64_insn_gen_logical_immediate(enum aarch64_insn_logic_type type,
+ enum aarch64_insn_variant variant,
+ enum aarch64_insn_register Rn,
+ enum aarch64_insn_register Rd,
+ u64 imm);
+u32 aarch64_insn_gen_extr(enum aarch64_insn_variant variant,
+ enum aarch64_insn_register Rm,
+ enum aarch64_insn_register Rn,
+ enum aarch64_insn_register Rd,
+ u8 lsb);
+#ifdef CONFIG_ARM64_LSE_ATOMICS
+u32 aarch64_insn_gen_atomic_ld_op(enum aarch64_insn_register result,
+ enum aarch64_insn_register address,
+ enum aarch64_insn_register value,
+ enum aarch64_insn_size_type size,
+ enum aarch64_insn_mem_atomic_op op,
+ enum aarch64_insn_mem_order_type order);
+u32 aarch64_insn_gen_cas(enum aarch64_insn_register result,
+ enum aarch64_insn_register address,
+ enum aarch64_insn_register value,
+ enum aarch64_insn_size_type size,
+ enum aarch64_insn_mem_order_type order);
+#else
+static inline
+u32 aarch64_insn_gen_atomic_ld_op(enum aarch64_insn_register result,
+ enum aarch64_insn_register address,
+ enum aarch64_insn_register value,
+ enum aarch64_insn_size_type size,
+ enum aarch64_insn_mem_atomic_op op,
+ enum aarch64_insn_mem_order_type order)
+{
+ return AARCH64_BREAK_FAULT;
+}
+
+static inline
+u32 aarch64_insn_gen_cas(enum aarch64_insn_register result,
+ enum aarch64_insn_register address,
+ enum aarch64_insn_register value,
+ enum aarch64_insn_size_type size,
+ enum aarch64_insn_mem_order_type order)
+{
+ return AARCH64_BREAK_FAULT;
+}
+#endif
+u32 aarch64_insn_gen_dmb(enum aarch64_insn_mb_type type);
+u32 aarch64_insn_gen_dsb(enum aarch64_insn_mb_type type);
+u32 aarch64_insn_gen_mrs(enum aarch64_insn_register result,
+ enum aarch64_insn_system_register sysreg);
+
+s32 aarch64_get_branch_offset(u32 insn);
+u32 aarch64_set_branch_offset(u32 insn, s32 offset);
+
+s32 aarch64_insn_adrp_get_offset(u32 insn);
+u32 aarch64_insn_adrp_set_offset(u32 insn, s32 offset);
+
+bool aarch32_insn_is_wide(u32 insn);
+
+#define A32_RN_OFFSET 16
+#define A32_RT_OFFSET 12
+#define A32_RT2_OFFSET 0
+
+u32 aarch64_insn_extract_system_reg(u32 insn);
+u32 aarch32_insn_extract_reg_num(u32 insn, int offset);
+u32 aarch32_insn_mcr_extract_opc2(u32 insn);
+u32 aarch32_insn_mcr_extract_crm(u32 insn);
+
+typedef bool (pstate_check_t)(unsigned long);
+extern pstate_check_t * const aarch32_opcode_cond_checks[16];
+
+#endif /* __ASSEMBLER__ */
+
+#endif /* __ASM_INSN_H */
diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h
index 1d12f89140ba..83e03abbb2ca 100644
--- a/arch/arm64/include/asm/io.h
+++ b/arch/arm64/include/asm/io.h
@@ -1,267 +1,322 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Based on arch/arm/include/asm/io.h
*
* Copyright (C) 1996-2000 Russell King
* Copyright (C) 2012 ARM Ltd.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __ASM_IO_H
#define __ASM_IO_H
-#ifdef __KERNEL__
-
#include <linux/types.h>
+#include <linux/pgtable.h>
#include <asm/byteorder.h>
#include <asm/barrier.h>
-#include <asm/pgtable.h>
+#include <asm/memory.h>
+#include <asm/early_ioremap.h>
+#include <asm/alternative.h>
+#include <asm/cpufeature.h>
+#include <asm/rsi.h>
/*
* Generic IO read/write. These perform native-endian accesses.
*/
-static inline void __raw_writeb(u8 val, volatile void __iomem *addr)
+#define __raw_writeb __raw_writeb
+static __always_inline void __raw_writeb(u8 val, volatile void __iomem *addr)
{
- asm volatile("strb %w0, [%1]" : : "r" (val), "r" (addr));
+ volatile u8 __iomem *ptr = addr;
+ asm volatile("strb %w0, %1" : : "rZ" (val), "Qo" (*ptr));
}
-static inline void __raw_writew(u16 val, volatile void __iomem *addr)
+#define __raw_writew __raw_writew
+static __always_inline void __raw_writew(u16 val, volatile void __iomem *addr)
{
- asm volatile("strh %w0, [%1]" : : "r" (val), "r" (addr));
+ volatile u16 __iomem *ptr = addr;
+ asm volatile("strh %w0, %1" : : "rZ" (val), "Qo" (*ptr));
}
-static inline void __raw_writel(u32 val, volatile void __iomem *addr)
+#define __raw_writel __raw_writel
+static __always_inline void __raw_writel(u32 val, volatile void __iomem *addr)
{
- asm volatile("str %w0, [%1]" : : "r" (val), "r" (addr));
+ volatile u32 __iomem *ptr = addr;
+ asm volatile("str %w0, %1" : : "rZ" (val), "Qo" (*ptr));
}
-static inline void __raw_writeq(u64 val, volatile void __iomem *addr)
+#define __raw_writeq __raw_writeq
+static __always_inline void __raw_writeq(u64 val, volatile void __iomem *addr)
{
- asm volatile("str %0, [%1]" : : "r" (val), "r" (addr));
+ volatile u64 __iomem *ptr = addr;
+ asm volatile("str %x0, %1" : : "rZ" (val), "Qo" (*ptr));
}
-static inline u8 __raw_readb(const volatile void __iomem *addr)
+#define __raw_readb __raw_readb
+static __always_inline u8 __raw_readb(const volatile void __iomem *addr)
{
u8 val;
- asm volatile("ldrb %w0, [%1]" : "=r" (val) : "r" (addr));
+ asm volatile(ALTERNATIVE("ldrb %w0, [%1]",
+ "ldarb %w0, [%1]",
+ ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE)
+ : "=r" (val) : "r" (addr));
return val;
}
-static inline u16 __raw_readw(const volatile void __iomem *addr)
+#define __raw_readw __raw_readw
+static __always_inline u16 __raw_readw(const volatile void __iomem *addr)
{
u16 val;
- asm volatile("ldrh %w0, [%1]" : "=r" (val) : "r" (addr));
+
+ asm volatile(ALTERNATIVE("ldrh %w0, [%1]",
+ "ldarh %w0, [%1]",
+ ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE)
+ : "=r" (val) : "r" (addr));
return val;
}
-static inline u32 __raw_readl(const volatile void __iomem *addr)
+#define __raw_readl __raw_readl
+static __always_inline u32 __raw_readl(const volatile void __iomem *addr)
{
u32 val;
- asm volatile("ldr %w0, [%1]" : "=r" (val) : "r" (addr));
+ asm volatile(ALTERNATIVE("ldr %w0, [%1]",
+ "ldar %w0, [%1]",
+ ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE)
+ : "=r" (val) : "r" (addr));
return val;
}
-static inline u64 __raw_readq(const volatile void __iomem *addr)
+#define __raw_readq __raw_readq
+static __always_inline u64 __raw_readq(const volatile void __iomem *addr)
{
u64 val;
- asm volatile("ldr %0, [%1]" : "=r" (val) : "r" (addr));
+ asm volatile(ALTERNATIVE("ldr %0, [%1]",
+ "ldar %0, [%1]",
+ ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE)
+ : "=r" (val) : "r" (addr));
return val;
}
/* IO barriers */
-#define __iormb() rmb()
-#define __iowmb() wmb()
-
-#define mmiowb() do { } while (0)
+#define __io_ar(v) \
+({ \
+ unsigned long tmp; \
+ \
+ dma_rmb(); \
+ \
+ /* \
+ * Create a dummy control dependency from the IO read to any \
+ * later instructions. This ensures that a subsequent call to \
+ * udelay() will be ordered due to the ISB in get_cycles(). \
+ */ \
+ asm volatile("eor %0, %1, %1\n" \
+ "cbnz %0, ." \
+ : "=r" (tmp) : "r" ((unsigned long)(v)) \
+ : "memory"); \
+})
+
+#define __io_bw() dma_wmb()
+#define __io_br(v)
+#define __io_aw(v)
+
+/* arm64-specific, don't use in portable drivers */
+#define __iormb(v) __io_ar(v)
+#define __iowmb() __io_bw()
+#define __iomb() dma_mb()
/*
- * Relaxed I/O memory access primitives. These follow the Device memory
- * ordering rules but do not guarantee any ordering relative to Normal memory
- * accesses.
- */
-#define readb_relaxed(c) ({ u8 __v = __raw_readb(c); __v; })
-#define readw_relaxed(c) ({ u16 __v = le16_to_cpu((__force __le16)__raw_readw(c)); __v; })
-#define readl_relaxed(c) ({ u32 __v = le32_to_cpu((__force __le32)__raw_readl(c)); __v; })
-#define readq_relaxed(c) ({ u64 __v = le64_to_cpu((__force __le64)__raw_readq(c)); __v; })
-
-#define writeb_relaxed(v,c) ((void)__raw_writeb((v),(c)))
-#define writew_relaxed(v,c) ((void)__raw_writew((__force u16)cpu_to_le16(v),(c)))
-#define writel_relaxed(v,c) ((void)__raw_writel((__force u32)cpu_to_le32(v),(c)))
-#define writeq_relaxed(v,c) ((void)__raw_writeq((__force u64)cpu_to_le64(v),(c)))
-
-/*
- * I/O memory access primitives. Reads are ordered relative to any
- * following Normal memory access. Writes are ordered relative to any prior
- * Normal memory access.
+ * I/O port access primitives.
*/
-#define readb(c) ({ u8 __v = readb_relaxed(c); __iormb(); __v; })
-#define readw(c) ({ u16 __v = readw_relaxed(c); __iormb(); __v; })
-#define readl(c) ({ u32 __v = readl_relaxed(c); __iormb(); __v; })
-#define readq(c) ({ u64 __v = readq_relaxed(c); __iormb(); __v; })
-
-#define writeb(v,c) ({ __iowmb(); writeb_relaxed((v),(c)); })
-#define writew(v,c) ({ __iowmb(); writew_relaxed((v),(c)); })
-#define writel(v,c) ({ __iowmb(); writel_relaxed((v),(c)); })
-#define writeq(v,c) ({ __iowmb(); writeq_relaxed((v),(c)); })
+#define arch_has_dev_port() (1)
+#define IO_SPACE_LIMIT (PCI_IO_SIZE - 1)
+#define PCI_IOBASE ((void __iomem *)PCI_IO_START)
/*
- * I/O port access primitives.
+ * The ARM64 iowrite implementation is intended to support drivers that want to
+ * use write combining. For instance PCI drivers using write combining with a 64
+ * byte __iowrite64_copy() expect to get a 64 byte MemWr TLP on the PCIe bus.
+ *
+ * Newer ARM core have sensitive write combining buffers, it is important that
+ * the stores be contiguous blocks of store instructions. Normal memcpy
+ * approaches have a very low chance to generate write combining.
+ *
+ * Since this is the only API on ARM64 that should be used with write combining
+ * it also integrates the DGH hint which is supposed to lower the latency to
+ * emit the large TLP from the CPU.
*/
-#define IO_SPACE_LIMIT 0xffff
-#define PCI_IOBASE ((void __iomem *)(MODULES_VADDR - SZ_2M))
-static inline u8 inb(unsigned long addr)
+static __always_inline void
+__const_memcpy_toio_aligned32(volatile u32 __iomem *to, const u32 *from,
+ size_t count)
{
- return readb(addr + PCI_IOBASE);
+ switch (count) {
+ case 8:
+ asm volatile("str %w0, [%8, #4 * 0]\n"
+ "str %w1, [%8, #4 * 1]\n"
+ "str %w2, [%8, #4 * 2]\n"
+ "str %w3, [%8, #4 * 3]\n"
+ "str %w4, [%8, #4 * 4]\n"
+ "str %w5, [%8, #4 * 5]\n"
+ "str %w6, [%8, #4 * 6]\n"
+ "str %w7, [%8, #4 * 7]\n"
+ :
+ : "rZ"(from[0]), "rZ"(from[1]), "rZ"(from[2]),
+ "rZ"(from[3]), "rZ"(from[4]), "rZ"(from[5]),
+ "rZ"(from[6]), "rZ"(from[7]), "r"(to));
+ break;
+ case 4:
+ asm volatile("str %w0, [%4, #4 * 0]\n"
+ "str %w1, [%4, #4 * 1]\n"
+ "str %w2, [%4, #4 * 2]\n"
+ "str %w3, [%4, #4 * 3]\n"
+ :
+ : "rZ"(from[0]), "rZ"(from[1]), "rZ"(from[2]),
+ "rZ"(from[3]), "r"(to));
+ break;
+ case 2:
+ asm volatile("str %w0, [%2, #4 * 0]\n"
+ "str %w1, [%2, #4 * 1]\n"
+ :
+ : "rZ"(from[0]), "rZ"(from[1]), "r"(to));
+ break;
+ case 1:
+ __raw_writel(*from, to);
+ break;
+ default:
+ BUILD_BUG();
+ }
}
-static inline u16 inw(unsigned long addr)
-{
- return readw(addr + PCI_IOBASE);
-}
+void __iowrite32_copy_full(void __iomem *to, const void *from, size_t count);
-static inline u32 inl(unsigned long addr)
+static __always_inline void
+__iowrite32_copy(void __iomem *to, const void *from, size_t count)
{
- return readl(addr + PCI_IOBASE);
+ if (__builtin_constant_p(count) &&
+ (count == 8 || count == 4 || count == 2 || count == 1)) {
+ __const_memcpy_toio_aligned32(to, from, count);
+ dgh();
+ } else {
+ __iowrite32_copy_full(to, from, count);
+ }
}
+#define __iowrite32_copy __iowrite32_copy
-static inline void outb(u8 b, unsigned long addr)
+static __always_inline void
+__const_memcpy_toio_aligned64(volatile u64 __iomem *to, const u64 *from,
+ size_t count)
{
- writeb(b, addr + PCI_IOBASE);
+ switch (count) {
+ case 8:
+ asm volatile("str %x0, [%8, #8 * 0]\n"
+ "str %x1, [%8, #8 * 1]\n"
+ "str %x2, [%8, #8 * 2]\n"
+ "str %x3, [%8, #8 * 3]\n"
+ "str %x4, [%8, #8 * 4]\n"
+ "str %x5, [%8, #8 * 5]\n"
+ "str %x6, [%8, #8 * 6]\n"
+ "str %x7, [%8, #8 * 7]\n"
+ :
+ : "rZ"(from[0]), "rZ"(from[1]), "rZ"(from[2]),
+ "rZ"(from[3]), "rZ"(from[4]), "rZ"(from[5]),
+ "rZ"(from[6]), "rZ"(from[7]), "r"(to));
+ break;
+ case 4:
+ asm volatile("str %x0, [%4, #8 * 0]\n"
+ "str %x1, [%4, #8 * 1]\n"
+ "str %x2, [%4, #8 * 2]\n"
+ "str %x3, [%4, #8 * 3]\n"
+ :
+ : "rZ"(from[0]), "rZ"(from[1]), "rZ"(from[2]),
+ "rZ"(from[3]), "r"(to));
+ break;
+ case 2:
+ asm volatile("str %x0, [%2, #8 * 0]\n"
+ "str %x1, [%2, #8 * 1]\n"
+ :
+ : "rZ"(from[0]), "rZ"(from[1]), "r"(to));
+ break;
+ case 1:
+ __raw_writeq(*from, to);
+ break;
+ default:
+ BUILD_BUG();
+ }
}
-static inline void outw(u16 b, unsigned long addr)
-{
- writew(b, addr + PCI_IOBASE);
-}
+void __iowrite64_copy_full(void __iomem *to, const void *from, size_t count);
-static inline void outl(u32 b, unsigned long addr)
+static __always_inline void
+__iowrite64_copy(void __iomem *to, const void *from, size_t count)
{
- writel(b, addr + PCI_IOBASE);
+ if (__builtin_constant_p(count) &&
+ (count == 8 || count == 4 || count == 2 || count == 1)) {
+ __const_memcpy_toio_aligned64(to, from, count);
+ dgh();
+ } else {
+ __iowrite64_copy_full(to, from, count);
+ }
}
+#define __iowrite64_copy __iowrite64_copy
-#define inb_p(addr) inb(addr)
-#define inw_p(addr) inw(addr)
-#define inl_p(addr) inl(addr)
-
-#define outb_p(x, addr) outb((x), (addr))
-#define outw_p(x, addr) outw((x), (addr))
-#define outl_p(x, addr) outl((x), (addr))
-
-static inline void insb(unsigned long addr, void *buffer, int count)
-{
- u8 *buf = buffer;
- while (count--)
- *buf++ = __raw_readb(addr + PCI_IOBASE);
-}
-
-static inline void insw(unsigned long addr, void *buffer, int count)
-{
- u16 *buf = buffer;
- while (count--)
- *buf++ = __raw_readw(addr + PCI_IOBASE);
-}
+/*
+ * I/O memory mapping functions.
+ */
-static inline void insl(unsigned long addr, void *buffer, int count)
-{
- u32 *buf = buffer;
- while (count--)
- *buf++ = __raw_readl(addr + PCI_IOBASE);
-}
+typedef int (*ioremap_prot_hook_t)(phys_addr_t phys_addr, size_t size,
+ pgprot_t *prot);
+int arm64_ioremap_prot_hook_register(const ioremap_prot_hook_t hook);
-static inline void outsb(unsigned long addr, const void *buffer, int count)
-{
- const u8 *buf = buffer;
- while (count--)
- __raw_writeb(*buf++, addr + PCI_IOBASE);
-}
+#define ioremap_prot ioremap_prot
-static inline void outsw(unsigned long addr, const void *buffer, int count)
-{
- const u16 *buf = buffer;
- while (count--)
- __raw_writew(*buf++, addr + PCI_IOBASE);
-}
+#define _PAGE_IOREMAP PROT_DEVICE_nGnRE
-static inline void outsl(unsigned long addr, const void *buffer, int count)
-{
- const u32 *buf = buffer;
- while (count--)
- __raw_writel(*buf++, addr + PCI_IOBASE);
-}
+#define ioremap_wc(addr, size) \
+ ioremap_prot((addr), (size), __pgprot(PROT_NORMAL_NC))
+#define ioremap_np(addr, size) \
+ ioremap_prot((addr), (size), __pgprot(PROT_DEVICE_nGnRnE))
-#define insb_p(port,to,len) insb(port,to,len)
-#define insw_p(port,to,len) insw(port,to,len)
-#define insl_p(port,to,len) insl(port,to,len)
-#define outsb_p(port,from,len) outsb(port,from,len)
-#define outsw_p(port,from,len) outsw(port,from,len)
-#define outsl_p(port,from,len) outsl(port,from,len)
+#define ioremap_encrypted(addr, size) \
+ ioremap_prot((addr), (size), PAGE_KERNEL)
/*
- * String version of I/O memory access operations.
+ * io{read,write}{16,32,64}be() macros
*/
-extern void __memcpy_fromio(void *, const volatile void __iomem *, size_t);
-extern void __memcpy_toio(volatile void __iomem *, const void *, size_t);
-extern void __memset_io(volatile void __iomem *, int, size_t);
+#define ioread16be(p) ({ __u16 __v = be16_to_cpu((__force __be16)__raw_readw(p)); __iormb(__v); __v; })
+#define ioread32be(p) ({ __u32 __v = be32_to_cpu((__force __be32)__raw_readl(p)); __iormb(__v); __v; })
+#define ioread64be(p) ({ __u64 __v = be64_to_cpu((__force __be64)__raw_readq(p)); __iormb(__v); __v; })
-#define memset_io(c,v,l) __memset_io((c),(v),(l))
-#define memcpy_fromio(a,c,l) __memcpy_fromio((a),(c),(l))
-#define memcpy_toio(c,a,l) __memcpy_toio((c),(a),(l))
+#define iowrite16be(v,p) ({ __iowmb(); __raw_writew((__force __u16)cpu_to_be16(v), p); })
+#define iowrite32be(v,p) ({ __iowmb(); __raw_writel((__force __u32)cpu_to_be32(v), p); })
+#define iowrite64be(v,p) ({ __iowmb(); __raw_writeq((__force __u64)cpu_to_be64(v), p); })
-/*
- * I/O memory mapping functions.
- */
-extern void __iomem *__ioremap(phys_addr_t phys_addr, size_t size, pgprot_t prot);
-extern void __iounmap(volatile void __iomem *addr);
-
-#define PROT_DEFAULT (PTE_TYPE_PAGE | PTE_AF | PTE_DIRTY)
-#define PROT_DEVICE_nGnRE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_DEVICE_nGnRE))
-#define PROT_NORMAL_NC (PROT_DEFAULT | PTE_ATTRINDX(MT_NORMAL_NC))
-#define PROT_NORMAL (PROT_DEFAULT | PTE_ATTRINDX(MT_NORMAL))
-
-#define ioremap(addr, size) __ioremap((addr), (size), __pgprot(PROT_DEVICE_nGnRE))
-#define ioremap_nocache(addr, size) __ioremap((addr), (size), __pgprot(PROT_DEVICE_nGnRE))
-#define ioremap_wc(addr, size) __ioremap((addr), (size), __pgprot(PROT_NORMAL_NC))
-#define ioremap_cached(addr, size) __ioremap((addr), (size), __pgprot(PROT_NORMAL))
-#define iounmap __iounmap
+#include <asm-generic/io.h>
-#define PROT_SECT_DEFAULT (PMD_TYPE_SECT | PMD_SECT_AF)
-#define PROT_SECT_DEVICE_nGnRE (PROT_SECT_DEFAULT | PTE_PXN | PTE_UXN | PMD_ATTRINDX(MT_DEVICE_nGnRE))
+#define ioremap_cache ioremap_cache
+static inline void __iomem *ioremap_cache(phys_addr_t addr, size_t size)
+{
+ if (pfn_is_map_memory(__phys_to_pfn(addr)))
+ return (void __iomem *)__phys_to_virt(addr);
-#define ARCH_HAS_IOREMAP_WC
-#include <asm-generic/iomap.h>
+ return ioremap_prot(addr, size, __pgprot(PROT_NORMAL));
+}
/*
* More restrictive address range checking than the default implementation
* (PHYS_OFFSET and PHYS_MASK taken into account).
*/
#define ARCH_HAS_VALID_PHYS_ADDR_RANGE
-extern int valid_phys_addr_range(unsigned long addr, size_t size);
+extern int valid_phys_addr_range(phys_addr_t addr, size_t size);
extern int valid_mmap_phys_addr_range(unsigned long pfn, size_t size);
-extern int devmem_is_allowed(unsigned long pfn);
-
-/*
- * Convert a physical pointer to a virtual kernel pointer for /dev/mem
- * access
- */
-#define xlate_dev_mem_ptr(p) __va(p)
+extern bool arch_memremap_can_ram_remap(resource_size_t offset, size_t size,
+ unsigned long flags);
+#define arch_memremap_can_ram_remap arch_memremap_can_ram_remap
-/*
- * Convert a virtual cached pointer to an uncached pointer
- */
-#define xlate_dev_kmem_ptr(p) p
+static inline bool arm64_is_protected_mmio(phys_addr_t phys_addr, size_t size)
+{
+ if (unlikely(is_realm_world()))
+ return arm64_rsi_is_protected(phys_addr, size);
+ return false;
+}
-#endif /* __KERNEL__ */
#endif /* __ASM_IO_H */
diff --git a/arch/arm64/include/asm/irq.h b/arch/arm64/include/asm/irq.h
index 0332fc077f6e..e93548914c36 100644
--- a/arch/arm64/include/asm/irq.h
+++ b/arch/arm64/include/asm/irq.h
@@ -1,9 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __ASM_IRQ_H
#define __ASM_IRQ_H
+#ifndef __ASSEMBLER__
+
+#include <linux/cpumask.h>
+
#include <asm-generic/irq.h>
-extern void (*handle_arch_irq)(struct pt_regs *);
-extern void set_handle_irq(void (*handle_irq)(struct pt_regs *));
+void arch_trigger_cpumask_backtrace(const cpumask_t *mask, int exclude_cpu);
+#define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace
+
+struct pt_regs;
+
+int set_handle_irq(void (*handle_irq)(struct pt_regs *));
+#define set_handle_irq set_handle_irq
+int set_handle_fiq(void (*handle_fiq)(struct pt_regs *));
+
+static inline int nr_legacy_irqs(void)
+{
+ return 0;
+}
+#endif /* !__ASSEMBLER__ */
#endif
diff --git a/arch/arm64/include/asm/irq_work.h b/arch/arm64/include/asm/irq_work.h
new file mode 100644
index 000000000000..a1020285ea75
--- /dev/null
+++ b/arch/arm64/include/asm/irq_work.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_IRQ_WORK_H
+#define __ASM_IRQ_WORK_H
+
+static inline bool arch_irq_work_has_interrupt(void)
+{
+ return true;
+}
+
+#endif /* __ASM_IRQ_WORK_H */
diff --git a/arch/arm64/include/asm/irqflags.h b/arch/arm64/include/asm/irqflags.h
index aa11943b8502..d4d7451c2c12 100644
--- a/arch/arm64/include/asm/irqflags.h
+++ b/arch/arm64/include/asm/irqflags.h
@@ -1,91 +1,199 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (C) 2012 ARM Ltd.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __ASM_IRQFLAGS_H
#define __ASM_IRQFLAGS_H
-#ifdef __KERNEL__
-
+#include <asm/barrier.h>
#include <asm/ptrace.h>
+#include <asm/sysreg.h>
/*
- * CPU interrupt mask handling.
+ * Aarch64 has flags for masking: Debug, Asynchronous (serror), Interrupts and
+ * FIQ exceptions, in the 'daif' register. We mask and unmask them in 'daif'
+ * order:
+ * Masking debug exceptions causes all other exceptions to be masked too/
+ * Masking SError masks IRQ/FIQ, but not debug exceptions. IRQ and FIQ are
+ * always masked and unmasked together, and have no side effects for other
+ * flags. Keeping to this order makes it easier for entry.S to know which
+ * exceptions should be unmasked.
*/
-static inline unsigned long arch_local_irq_save(void)
+
+static __always_inline void __daif_local_irq_enable(void)
{
- unsigned long flags;
- asm volatile(
- "mrs %0, daif // arch_local_irq_save\n"
- "msr daifset, #2"
- : "=r" (flags)
- :
- : "memory");
- return flags;
+ barrier();
+ asm volatile("msr daifclr, #3");
+ barrier();
+}
+
+static __always_inline void __pmr_local_irq_enable(void)
+{
+ if (IS_ENABLED(CONFIG_ARM64_DEBUG_PRIORITY_MASKING)) {
+ u32 pmr = read_sysreg_s(SYS_ICC_PMR_EL1);
+ WARN_ON_ONCE(pmr != GIC_PRIO_IRQON && pmr != GIC_PRIO_IRQOFF);
+ }
+
+ barrier();
+ write_sysreg_s(GIC_PRIO_IRQON, SYS_ICC_PMR_EL1);
+ pmr_sync();
+ barrier();
}
static inline void arch_local_irq_enable(void)
{
- asm volatile(
- "msr daifclr, #2 // arch_local_irq_enable"
- :
- :
- : "memory");
+ if (system_uses_irq_prio_masking()) {
+ __pmr_local_irq_enable();
+ } else {
+ __daif_local_irq_enable();
+ }
+}
+
+static __always_inline void __daif_local_irq_disable(void)
+{
+ barrier();
+ asm volatile("msr daifset, #3");
+ barrier();
+}
+
+static __always_inline void __pmr_local_irq_disable(void)
+{
+ if (IS_ENABLED(CONFIG_ARM64_DEBUG_PRIORITY_MASKING)) {
+ u32 pmr = read_sysreg_s(SYS_ICC_PMR_EL1);
+ WARN_ON_ONCE(pmr != GIC_PRIO_IRQON && pmr != GIC_PRIO_IRQOFF);
+ }
+
+ barrier();
+ write_sysreg_s(GIC_PRIO_IRQOFF, SYS_ICC_PMR_EL1);
+ barrier();
}
static inline void arch_local_irq_disable(void)
{
- asm volatile(
- "msr daifset, #2 // arch_local_irq_disable"
- :
- :
- : "memory");
+ if (system_uses_irq_prio_masking()) {
+ __pmr_local_irq_disable();
+ } else {
+ __daif_local_irq_disable();
+ }
+}
+
+static __always_inline unsigned long __daif_local_save_flags(void)
+{
+ return read_sysreg(daif);
}
-#define local_fiq_enable() asm("msr daifclr, #1" : : : "memory")
-#define local_fiq_disable() asm("msr daifset, #1" : : : "memory")
+static __always_inline unsigned long __pmr_local_save_flags(void)
+{
+ return read_sysreg_s(SYS_ICC_PMR_EL1);
+}
/*
* Save the current interrupt enable state.
*/
static inline unsigned long arch_local_save_flags(void)
{
- unsigned long flags;
- asm volatile(
- "mrs %0, daif // arch_local_save_flags"
- : "=r" (flags)
- :
- : "memory");
+ if (system_uses_irq_prio_masking()) {
+ return __pmr_local_save_flags();
+ } else {
+ return __daif_local_save_flags();
+ }
+}
+
+static __always_inline bool __daif_irqs_disabled_flags(unsigned long flags)
+{
+ return flags & PSR_I_BIT;
+}
+
+static __always_inline bool __pmr_irqs_disabled_flags(unsigned long flags)
+{
+ return flags != GIC_PRIO_IRQON;
+}
+
+static inline bool arch_irqs_disabled_flags(unsigned long flags)
+{
+ if (system_uses_irq_prio_masking()) {
+ return __pmr_irqs_disabled_flags(flags);
+ } else {
+ return __daif_irqs_disabled_flags(flags);
+ }
+}
+
+static __always_inline bool __daif_irqs_disabled(void)
+{
+ return __daif_irqs_disabled_flags(__daif_local_save_flags());
+}
+
+static __always_inline bool __pmr_irqs_disabled(void)
+{
+ return __pmr_irqs_disabled_flags(__pmr_local_save_flags());
+}
+
+static inline bool arch_irqs_disabled(void)
+{
+ if (system_uses_irq_prio_masking()) {
+ return __pmr_irqs_disabled();
+ } else {
+ return __daif_irqs_disabled();
+ }
+}
+
+static __always_inline unsigned long __daif_local_irq_save(void)
+{
+ unsigned long flags = __daif_local_save_flags();
+
+ __daif_local_irq_disable();
+
+ return flags;
+}
+
+static __always_inline unsigned long __pmr_local_irq_save(void)
+{
+ unsigned long flags = __pmr_local_save_flags();
+
+ /*
+ * There are too many states with IRQs disabled, just keep the current
+ * state if interrupts are already disabled/masked.
+ */
+ if (!__pmr_irqs_disabled_flags(flags))
+ __pmr_local_irq_disable();
+
return flags;
}
+static inline unsigned long arch_local_irq_save(void)
+{
+ if (system_uses_irq_prio_masking()) {
+ return __pmr_local_irq_save();
+ } else {
+ return __daif_local_irq_save();
+ }
+}
+
+static __always_inline void __daif_local_irq_restore(unsigned long flags)
+{
+ barrier();
+ write_sysreg(flags, daif);
+ barrier();
+}
+
+static __always_inline void __pmr_local_irq_restore(unsigned long flags)
+{
+ barrier();
+ write_sysreg_s(flags, SYS_ICC_PMR_EL1);
+ pmr_sync();
+ barrier();
+}
+
/*
* restore saved IRQ state
*/
static inline void arch_local_irq_restore(unsigned long flags)
{
- asm volatile(
- "msr daif, %0 // arch_local_irq_restore"
- :
- : "r" (flags)
- : "memory");
-}
-
-static inline int arch_irqs_disabled_flags(unsigned long flags)
-{
- return flags & PSR_I_BIT;
+ if (system_uses_irq_prio_masking()) {
+ __pmr_local_irq_restore(flags);
+ } else {
+ __daif_local_irq_restore(flags);
+ }
}
-#endif
-#endif
+#endif /* __ASM_IRQFLAGS_H */
diff --git a/arch/arm64/include/asm/jump_label.h b/arch/arm64/include/asm/jump_label.h
new file mode 100644
index 000000000000..0cb211d3607d
--- /dev/null
+++ b/arch/arm64/include/asm/jump_label.h
@@ -0,0 +1,62 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2013 Huawei Ltd.
+ * Author: Jiang Liu <liuj97@gmail.com>
+ *
+ * Based on arch/arm/include/asm/jump_label.h
+ */
+#ifndef __ASM_JUMP_LABEL_H
+#define __ASM_JUMP_LABEL_H
+
+#ifndef __ASSEMBLER__
+
+#include <linux/types.h>
+#include <asm/insn.h>
+
+#define HAVE_JUMP_LABEL_BATCH
+#define JUMP_LABEL_NOP_SIZE AARCH64_INSN_SIZE
+
+#define JUMP_TABLE_ENTRY(key, label) \
+ ".pushsection __jump_table, \"aw\"\n\t" \
+ ".align 3\n\t" \
+ ".long 1b - ., " label " - .\n\t" \
+ ".quad " key " - .\n\t" \
+ ".popsection\n\t"
+
+/* This macro is also expanded on the Rust side. */
+#define ARCH_STATIC_BRANCH_ASM(key, label) \
+ "1: nop\n\t" \
+ JUMP_TABLE_ENTRY(key, label)
+
+static __always_inline bool arch_static_branch(struct static_key * const key,
+ const bool branch)
+{
+ char *k = &((char *)key)[branch];
+
+ asm goto(
+ ARCH_STATIC_BRANCH_ASM("%c0", "%l[l_yes]")
+ : : "i"(k) : : l_yes
+ );
+
+ return false;
+l_yes:
+ return true;
+}
+
+static __always_inline bool arch_static_branch_jump(struct static_key * const key,
+ const bool branch)
+{
+ char *k = &((char *)key)[branch];
+
+ asm goto(
+ "1: b %l[l_yes] \n\t"
+ JUMP_TABLE_ENTRY("%c0", "%l[l_yes]")
+ : : "i"(k) : : l_yes
+ );
+ return false;
+l_yes:
+ return true;
+}
+
+#endif /* __ASSEMBLER__ */
+#endif /* __ASM_JUMP_LABEL_H */
diff --git a/arch/arm64/include/asm/kasan.h b/arch/arm64/include/asm/kasan.h
new file mode 100644
index 000000000000..b167e9d3da91
--- /dev/null
+++ b/arch/arm64/include/asm/kasan.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_KASAN_H
+#define __ASM_KASAN_H
+
+#ifndef __ASSEMBLER__
+
+#include <linux/linkage.h>
+#include <asm/memory.h>
+#include <asm/mte-kasan.h>
+#include <asm/pgtable-types.h>
+
+#define arch_kasan_set_tag(addr, tag) __tag_set(addr, tag)
+#define arch_kasan_reset_tag(addr) __tag_reset(addr)
+#define arch_kasan_get_tag(addr) __tag_get(addr)
+
+#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
+
+asmlinkage void kasan_early_init(void);
+void kasan_init(void);
+
+#else
+static inline void kasan_init(void) { }
+#endif
+
+#endif
+#endif
diff --git a/arch/arm64/include/asm/kernel-pgtable.h b/arch/arm64/include/asm/kernel-pgtable.h
new file mode 100644
index 000000000000..74a4f738c5f5
--- /dev/null
+++ b/arch/arm64/include/asm/kernel-pgtable.h
@@ -0,0 +1,86 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Kernel page table mapping
+ *
+ * Copyright (C) 2015 ARM Ltd.
+ */
+
+#ifndef __ASM_KERNEL_PGTABLE_H
+#define __ASM_KERNEL_PGTABLE_H
+
+#include <asm/boot.h>
+#include <asm/pgtable-hwdef.h>
+#include <asm/sparsemem.h>
+
+/*
+ * The physical and virtual addresses of the start of the kernel image are
+ * equal modulo 2 MiB (per the arm64 booting.txt requirements). Hence we can
+ * use section mapping with 4K (section size = 2M) but not with 16K (section
+ * size = 32M) or 64K (section size = 512M).
+ */
+#if defined(PMD_SIZE) && PMD_SIZE <= MIN_KIMG_ALIGN
+#define SWAPPER_BLOCK_SHIFT PMD_SHIFT
+#define SWAPPER_SKIP_LEVEL 1
+#else
+#define SWAPPER_BLOCK_SHIFT PAGE_SHIFT
+#define SWAPPER_SKIP_LEVEL 0
+#endif
+#define SWAPPER_BLOCK_SIZE (UL(1) << SWAPPER_BLOCK_SHIFT)
+
+#define SWAPPER_PGTABLE_LEVELS (CONFIG_PGTABLE_LEVELS - SWAPPER_SKIP_LEVEL)
+#define INIT_IDMAP_PGTABLE_LEVELS (IDMAP_LEVELS - SWAPPER_SKIP_LEVEL)
+
+#define IDMAP_VA_BITS 48
+#define IDMAP_LEVELS ARM64_HW_PGTABLE_LEVELS(IDMAP_VA_BITS)
+#define IDMAP_ROOT_LEVEL (4 - IDMAP_LEVELS)
+
+/*
+ * A relocatable kernel may execute from an address that differs from the one at
+ * which it was linked. In the worst case, its runtime placement may intersect
+ * with two adjacent PGDIR entries, which means that an additional page table
+ * may be needed at each subordinate level.
+ */
+#define EXTRA_PAGE __is_defined(CONFIG_RELOCATABLE)
+
+#define SPAN_NR_ENTRIES(vstart, vend, shift) \
+ ((((vend) - 1) >> (shift)) - ((vstart) >> (shift)) + 1)
+
+#define EARLY_ENTRIES(lvl, vstart, vend) \
+ SPAN_NR_ENTRIES(vstart, vend, SWAPPER_BLOCK_SHIFT + lvl * PTDESC_TABLE_SHIFT)
+
+#define EARLY_LEVEL(lvl, lvls, vstart, vend, add) \
+ ((lvls) > (lvl) ? EARLY_ENTRIES(lvl, vstart, vend) + (add) : 0)
+
+#define EARLY_PAGES(lvls, vstart, vend, add) (1 /* PGDIR page */ \
+ + EARLY_LEVEL(3, (lvls), (vstart), (vend), add) /* each entry needs a next level page table */ \
+ + EARLY_LEVEL(2, (lvls), (vstart), (vend), add) /* each entry needs a next level page table */ \
+ + EARLY_LEVEL(1, (lvls), (vstart), (vend), add))/* each entry needs a next level page table */
+#define INIT_DIR_SIZE (PAGE_SIZE * (EARLY_PAGES(SWAPPER_PGTABLE_LEVELS, KIMAGE_VADDR, _end, EXTRA_PAGE) \
+ + EARLY_SEGMENT_EXTRA_PAGES))
+
+#define INIT_IDMAP_DIR_PAGES (EARLY_PAGES(INIT_IDMAP_PGTABLE_LEVELS, KIMAGE_VADDR, kimage_limit, 1))
+#define INIT_IDMAP_DIR_SIZE ((INIT_IDMAP_DIR_PAGES + EARLY_IDMAP_EXTRA_PAGES) * PAGE_SIZE)
+
+#define INIT_IDMAP_FDT_PAGES (EARLY_PAGES(INIT_IDMAP_PGTABLE_LEVELS, 0UL, UL(MAX_FDT_SIZE), 1) - 1)
+#define INIT_IDMAP_FDT_SIZE ((INIT_IDMAP_FDT_PAGES + EARLY_IDMAP_EXTRA_FDT_PAGES) * PAGE_SIZE)
+
+/* The number of segments in the kernel image (text, rodata, inittext, initdata, data+bss) */
+#define KERNEL_SEGMENT_COUNT 5
+
+#if SWAPPER_BLOCK_SIZE > SEGMENT_ALIGN
+#define EARLY_SEGMENT_EXTRA_PAGES (KERNEL_SEGMENT_COUNT + 1)
+/*
+ * The initial ID map consists of the kernel image, mapped as two separate
+ * segments, and may appear misaligned wrt the swapper block size. This means
+ * we need 3 additional pages. The DT could straddle a swapper block boundary,
+ * so it may need 2.
+ */
+#define EARLY_IDMAP_EXTRA_PAGES 3
+#define EARLY_IDMAP_EXTRA_FDT_PAGES 2
+#else
+#define EARLY_SEGMENT_EXTRA_PAGES 0
+#define EARLY_IDMAP_EXTRA_PAGES 0
+#define EARLY_IDMAP_EXTRA_FDT_PAGES 0
+#endif
+
+#endif /* __ASM_KERNEL_PGTABLE_H */
diff --git a/arch/arm64/include/asm/kexec.h b/arch/arm64/include/asm/kexec.h
new file mode 100644
index 000000000000..892e5bebda95
--- /dev/null
+++ b/arch/arm64/include/asm/kexec.h
@@ -0,0 +1,135 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * kexec for arm64
+ *
+ * Copyright (C) Linaro.
+ * Copyright (C) Huawei Futurewei Technologies.
+ */
+
+#ifndef _ARM64_KEXEC_H
+#define _ARM64_KEXEC_H
+
+/* Maximum physical address we can use pages from */
+
+#define KEXEC_SOURCE_MEMORY_LIMIT (-1UL)
+
+/* Maximum address we can reach in physical address mode */
+
+#define KEXEC_DESTINATION_MEMORY_LIMIT (-1UL)
+
+/* Maximum address we can use for the control code buffer */
+
+#define KEXEC_CONTROL_MEMORY_LIMIT (-1UL)
+
+#define KEXEC_CONTROL_PAGE_SIZE 4096
+
+#define KEXEC_ARCH KEXEC_ARCH_AARCH64
+
+#ifndef __ASSEMBLER__
+
+/**
+ * crash_setup_regs() - save registers for the panic kernel
+ *
+ * @newregs: registers are saved here
+ * @oldregs: registers to be saved (may be %NULL)
+ */
+
+static inline void crash_setup_regs(struct pt_regs *newregs,
+ struct pt_regs *oldregs)
+{
+ if (oldregs) {
+ memcpy(newregs, oldregs, sizeof(*newregs));
+ } else {
+ u64 tmp1, tmp2;
+
+ __asm__ __volatile__ (
+ "stp x0, x1, [%2, #16 * 0]\n"
+ "stp x2, x3, [%2, #16 * 1]\n"
+ "stp x4, x5, [%2, #16 * 2]\n"
+ "stp x6, x7, [%2, #16 * 3]\n"
+ "stp x8, x9, [%2, #16 * 4]\n"
+ "stp x10, x11, [%2, #16 * 5]\n"
+ "stp x12, x13, [%2, #16 * 6]\n"
+ "stp x14, x15, [%2, #16 * 7]\n"
+ "stp x16, x17, [%2, #16 * 8]\n"
+ "stp x18, x19, [%2, #16 * 9]\n"
+ "stp x20, x21, [%2, #16 * 10]\n"
+ "stp x22, x23, [%2, #16 * 11]\n"
+ "stp x24, x25, [%2, #16 * 12]\n"
+ "stp x26, x27, [%2, #16 * 13]\n"
+ "stp x28, x29, [%2, #16 * 14]\n"
+ "mov %0, sp\n"
+ "stp x30, %0, [%2, #16 * 15]\n"
+
+ "/* faked current PSTATE */\n"
+ "mrs %0, CurrentEL\n"
+ "mrs %1, SPSEL\n"
+ "orr %0, %0, %1\n"
+ "mrs %1, DAIF\n"
+ "orr %0, %0, %1\n"
+ "mrs %1, NZCV\n"
+ "orr %0, %0, %1\n"
+ /* pc */
+ "adr %1, 1f\n"
+ "1:\n"
+ "stp %1, %0, [%2, #16 * 16]\n"
+ : "=&r" (tmp1), "=&r" (tmp2)
+ : "r" (newregs)
+ : "memory"
+ );
+ }
+}
+
+#if defined(CONFIG_CRASH_DUMP) && defined(CONFIG_HIBERNATION)
+extern bool crash_is_nosave(unsigned long pfn);
+extern void crash_prepare_suspend(void);
+extern void crash_post_resume(void);
+
+void crash_free_reserved_phys_range(unsigned long begin, unsigned long end);
+#define crash_free_reserved_phys_range crash_free_reserved_phys_range
+#else
+static inline bool crash_is_nosave(unsigned long pfn) {return false; }
+static inline void crash_prepare_suspend(void) {}
+static inline void crash_post_resume(void) {}
+#endif
+
+struct kimage;
+
+#if defined(CONFIG_KEXEC_CORE)
+void cpu_soft_restart(unsigned long el2_switch, unsigned long entry,
+ unsigned long arg0, unsigned long arg1,
+ unsigned long arg2);
+
+int machine_kexec_post_load(struct kimage *image);
+#define machine_kexec_post_load machine_kexec_post_load
+#endif
+
+#define ARCH_HAS_KIMAGE_ARCH
+
+struct kimage_arch {
+ void *dtb;
+ phys_addr_t dtb_mem;
+ phys_addr_t kern_reloc;
+ phys_addr_t el2_vectors;
+ phys_addr_t ttbr0;
+ phys_addr_t ttbr1;
+ phys_addr_t zero_page;
+ unsigned long phys_offset;
+ unsigned long t0sz;
+};
+
+#ifdef CONFIG_KEXEC_FILE
+extern const struct kexec_file_ops kexec_image_ops;
+
+int arch_kimage_file_post_load_cleanup(struct kimage *image);
+#define arch_kimage_file_post_load_cleanup arch_kimage_file_post_load_cleanup
+
+extern int load_other_segments(struct kimage *image,
+ unsigned long kernel_load_addr, unsigned long kernel_size,
+ char *initrd, unsigned long initrd_len,
+ char *cmdline);
+#endif
+
+#endif /* __ASSEMBLER__ */
+
+#endif
diff --git a/arch/arm64/include/asm/kfence.h b/arch/arm64/include/asm/kfence.h
new file mode 100644
index 000000000000..21dbc9dda747
--- /dev/null
+++ b/arch/arm64/include/asm/kfence.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * arm64 KFENCE support.
+ *
+ * Copyright (C) 2020, Google LLC.
+ */
+
+#ifndef __ASM_KFENCE_H
+#define __ASM_KFENCE_H
+
+#include <asm/set_memory.h>
+
+static inline bool kfence_protect_page(unsigned long addr, bool protect)
+{
+ set_memory_valid(addr, 1, !protect);
+
+ return true;
+}
+
+#ifdef CONFIG_KFENCE
+extern bool kfence_early_init;
+static inline bool arm64_kfence_can_set_direct_map(void)
+{
+ return !kfence_early_init;
+}
+bool arch_kfence_init_pool(void);
+#else /* CONFIG_KFENCE */
+static inline bool arm64_kfence_can_set_direct_map(void) { return false; }
+#endif /* CONFIG_KFENCE */
+
+#endif /* __ASM_KFENCE_H */
diff --git a/arch/arm64/include/asm/kgdb.h b/arch/arm64/include/asm/kgdb.h
new file mode 100644
index 000000000000..67ef1c5532ae
--- /dev/null
+++ b/arch/arm64/include/asm/kgdb.h
@@ -0,0 +1,114 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * AArch64 KGDB support
+ *
+ * Based on arch/arm/include/kgdb.h
+ *
+ * Copyright (C) 2013 Cavium Inc.
+ * Author: Vijaya Kumar K <vijaya.kumar@caviumnetworks.com>
+ */
+
+#ifndef __ARM_KGDB_H
+#define __ARM_KGDB_H
+
+#include <linux/ptrace.h>
+#include <asm/debug-monitors.h>
+
+#ifndef __ASSEMBLER__
+
+static inline void arch_kgdb_breakpoint(void)
+{
+ asm ("brk %0" : : "I" (KGDB_COMPILED_DBG_BRK_IMM));
+}
+
+extern void kgdb_handle_bus_error(void);
+extern int kgdb_fault_expected;
+
+int kgdb_brk_handler(struct pt_regs *regs, unsigned long esr);
+int kgdb_compiled_brk_handler(struct pt_regs *regs, unsigned long esr);
+#ifdef CONFIG_KGDB
+int kgdb_single_step_handler(struct pt_regs *regs, unsigned long esr);
+#else
+static inline int kgdb_single_step_handler(struct pt_regs *regs,
+ unsigned long esr)
+{
+ return DBG_HOOK_ERROR;
+}
+#endif
+
+#endif /* !__ASSEMBLER__ */
+
+/*
+ * gdb remote procotol (well most versions of it) expects the following
+ * register layout.
+ *
+ * General purpose regs:
+ * r0-r30: 64 bit
+ * sp,pc : 64 bit
+ * pstate : 32 bit
+ * Total: 33 + 1
+ * FPU regs:
+ * f0-f31: 128 bit
+ * fpsr & fpcr: 32 bit
+ * Total: 32 + 2
+ *
+ * To expand a little on the "most versions of it"... when the gdb remote
+ * protocol for AArch64 was developed it depended on a statement in the
+ * Architecture Reference Manual that claimed "SPSR_ELx is a 32-bit register".
+ * and, as a result, allocated only 32-bits for the PSTATE in the remote
+ * protocol. In fact this statement is still present in ARM DDI 0487A.i.
+ *
+ * Unfortunately "is a 32-bit register" has a very special meaning for
+ * system registers. It means that "the upper bits, bits[63:32], are
+ * RES0.". RES0 is heavily used in the ARM architecture documents as a
+ * way to leave space for future architecture changes. So to translate a
+ * little for people who don't spend their spare time reading ARM architecture
+ * manuals, what "is a 32-bit register" actually means in this context is
+ * "is a 64-bit register but one with no meaning allocated to any of the
+ * upper 32-bits... *yet*".
+ *
+ * Perhaps then we should not be surprised that this has led to some
+ * confusion. Specifically a patch, influenced by the above translation,
+ * that extended PSTATE to 64-bit was accepted into gdb-7.7 but the patch
+ * was reverted in gdb-7.8.1 and all later releases, when this was
+ * discovered to be an undocumented protocol change.
+ *
+ * So... it is *not* wrong for us to only allocate 32-bits to PSTATE
+ * here even though the kernel itself allocates 64-bits for the same
+ * state. That is because this bit of code tells the kernel how the gdb
+ * remote protocol (well most versions of it) describes the register state.
+ *
+ * Note that if you are using one of the versions of gdb that supports
+ * the gdb-7.7 version of the protocol you cannot use kgdb directly
+ * without providing a custom register description (gdb can load new
+ * protocol descriptions at runtime).
+ */
+
+#define _GP_REGS 33
+#define _FP_REGS 32
+#define _EXTRA_REGS 3
+/*
+ * general purpose registers size in bytes.
+ * pstate is only 4 bytes. subtract 4 bytes
+ */
+#define GP_REG_BYTES (_GP_REGS * 8)
+#define DBG_MAX_REG_NUM (_GP_REGS + _FP_REGS + _EXTRA_REGS)
+
+/*
+ * Size of I/O buffer for gdb packet.
+ * considering to hold all register contents, size is set
+ */
+
+#define BUFMAX 2048
+
+/*
+ * Number of bytes required for gdb_regs buffer.
+ * _GP_REGS: 8 bytes, _FP_REGS: 16 bytes and _EXTRA_REGS: 4 bytes each
+ * GDB fails to connect for size beyond this with error
+ * "'g' packet reply is too long"
+ */
+
+#define NUMREGBYTES ((_GP_REGS * 8) + (_FP_REGS * 16) + \
+ (_EXTRA_REGS * 4))
+
+#endif /* __ASM_KGDB_H */
diff --git a/arch/arm64/include/asm/kprobes.h b/arch/arm64/include/asm/kprobes.h
new file mode 100644
index 000000000000..f2782560647b
--- /dev/null
+++ b/arch/arm64/include/asm/kprobes.h
@@ -0,0 +1,52 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * arch/arm64/include/asm/kprobes.h
+ *
+ * Copyright (C) 2013 Linaro Limited
+ */
+
+#ifndef _ARM_KPROBES_H
+#define _ARM_KPROBES_H
+
+#include <asm-generic/kprobes.h>
+
+#ifdef CONFIG_KPROBES
+#include <linux/types.h>
+#include <linux/ptrace.h>
+#include <linux/percpu.h>
+
+#define __ARCH_WANT_KPROBES_INSN_SLOT
+#define MAX_INSN_SIZE 2
+
+#define flush_insn_slot(p) do { } while (0)
+#define kretprobe_blacklist_size 0
+
+#include <asm/probes.h>
+
+struct prev_kprobe {
+ struct kprobe *kp;
+ unsigned int status;
+};
+
+/* per-cpu kprobe control block */
+struct kprobe_ctlblk {
+ unsigned int kprobe_status;
+ unsigned long saved_irqflag;
+ struct prev_kprobe prev_kprobe;
+};
+
+void arch_remove_kprobe(struct kprobe *);
+int kprobe_fault_handler(struct pt_regs *regs, unsigned int fsr);
+void __kretprobe_trampoline(void);
+void __kprobes *trampoline_probe_handler(struct pt_regs *regs);
+
+#endif /* CONFIG_KPROBES */
+
+int __kprobes kprobe_brk_handler(struct pt_regs *regs,
+ unsigned long esr);
+int __kprobes kprobe_ss_brk_handler(struct pt_regs *regs,
+ unsigned long esr);
+int __kprobes kretprobe_brk_handler(struct pt_regs *regs,
+ unsigned long esr);
+
+#endif /* _ARM_KPROBES_H */
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index a5f28e2720c7..e500600e4b9b 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -1,245 +1,377 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (C) 2012,2013 - ARM Ltd
* Author: Marc Zyngier <marc.zyngier@arm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __ARM64_KVM_ARM_H__
#define __ARM64_KVM_ARM_H__
+#include <asm/esr.h>
+#include <asm/memory.h>
+#include <asm/sysreg.h>
#include <asm/types.h>
-/* Hyp Configuration Register (HCR) bits */
-#define HCR_ID (UL(1) << 33)
-#define HCR_CD (UL(1) << 32)
-#define HCR_RW_SHIFT 31
-#define HCR_RW (UL(1) << HCR_RW_SHIFT)
-#define HCR_TRVM (UL(1) << 30)
-#define HCR_HCD (UL(1) << 29)
-#define HCR_TDZ (UL(1) << 28)
-#define HCR_TGE (UL(1) << 27)
-#define HCR_TVM (UL(1) << 26)
-#define HCR_TTLB (UL(1) << 25)
-#define HCR_TPU (UL(1) << 24)
-#define HCR_TPC (UL(1) << 23)
-#define HCR_TSW (UL(1) << 22)
-#define HCR_TAC (UL(1) << 21)
-#define HCR_TIDCP (UL(1) << 20)
-#define HCR_TSC (UL(1) << 19)
-#define HCR_TID3 (UL(1) << 18)
-#define HCR_TID2 (UL(1) << 17)
-#define HCR_TID1 (UL(1) << 16)
-#define HCR_TID0 (UL(1) << 15)
-#define HCR_TWE (UL(1) << 14)
-#define HCR_TWI (UL(1) << 13)
-#define HCR_DC (UL(1) << 12)
-#define HCR_BSU (3 << 10)
-#define HCR_BSU_IS (UL(1) << 10)
-#define HCR_FB (UL(1) << 9)
-#define HCR_VA (UL(1) << 8)
-#define HCR_VI (UL(1) << 7)
-#define HCR_VF (UL(1) << 6)
-#define HCR_AMO (UL(1) << 5)
-#define HCR_IMO (UL(1) << 4)
-#define HCR_FMO (UL(1) << 3)
-#define HCR_PTW (UL(1) << 2)
-#define HCR_SWIO (UL(1) << 1)
-#define HCR_VM (UL(1) << 0)
+/*
+ * Because I'm terribly lazy and that repainting the whole of the KVM
+ * code with the proper names is a pain, use a helper to map the names
+ * inherited from AArch32 with the new fancy nomenclature. One day...
+ */
+#define __HCR(x) HCR_EL2_##x
+
+#define HCR_TID5 __HCR(TID5)
+#define HCR_DCT __HCR(DCT)
+#define HCR_ATA_SHIFT __HCR(ATA_SHIFT)
+#define HCR_ATA __HCR(ATA)
+#define HCR_TTLBOS __HCR(TTLBOS)
+#define HCR_TTLBIS __HCR(TTLBIS)
+#define HCR_ENSCXT __HCR(EnSCXT)
+#define HCR_TOCU __HCR(TOCU)
+#define HCR_AMVOFFEN __HCR(AMVOFFEN)
+#define HCR_TICAB __HCR(TICAB)
+#define HCR_TID4 __HCR(TID4)
+#define HCR_FIEN __HCR(FIEN)
+#define HCR_FWB __HCR(FWB)
+#define HCR_NV2 __HCR(NV2)
+#define HCR_AT __HCR(AT)
+#define HCR_NV1 __HCR(NV1)
+#define HCR_NV __HCR(NV)
+#define HCR_API __HCR(API)
+#define HCR_APK __HCR(APK)
+#define HCR_TEA __HCR(TEA)
+#define HCR_TERR __HCR(TERR)
+#define HCR_TLOR __HCR(TLOR)
+#define HCR_E2H __HCR(E2H)
+#define HCR_ID __HCR(ID)
+#define HCR_CD __HCR(CD)
+#define HCR_RW __HCR(RW)
+#define HCR_TRVM __HCR(TRVM)
+#define HCR_HCD __HCR(HCD)
+#define HCR_TDZ __HCR(TDZ)
+#define HCR_TGE __HCR(TGE)
+#define HCR_TVM __HCR(TVM)
+#define HCR_TTLB __HCR(TTLB)
+#define HCR_TPU __HCR(TPU)
+#define HCR_TPC __HCR(TPCP)
+#define HCR_TSW __HCR(TSW)
+#define HCR_TACR __HCR(TACR)
+#define HCR_TIDCP __HCR(TIDCP)
+#define HCR_TSC __HCR(TSC)
+#define HCR_TID3 __HCR(TID3)
+#define HCR_TID2 __HCR(TID2)
+#define HCR_TID1 __HCR(TID1)
+#define HCR_TID0 __HCR(TID0)
+#define HCR_TWE __HCR(TWE)
+#define HCR_TWI __HCR(TWI)
+#define HCR_DC __HCR(DC)
+#define HCR_BSU __HCR(BSU)
+#define HCR_BSU_IS __HCR(BSU_IS)
+#define HCR_FB __HCR(FB)
+#define HCR_VSE __HCR(VSE)
+#define HCR_VI __HCR(VI)
+#define HCR_VF __HCR(VF)
+#define HCR_AMO __HCR(AMO)
+#define HCR_IMO __HCR(IMO)
+#define HCR_FMO __HCR(FMO)
+#define HCR_PTW __HCR(PTW)
+#define HCR_SWIO __HCR(SWIO)
+#define HCR_VM __HCR(VM)
/*
* The bits we set in HCR:
- * RW: 64bit by default, can be overriden for 32bit VMs
- * TAC: Trap ACTLR
+ * TLOR: Trap LORegion register accesses
+ * RW: 64bit by default, can be overridden for 32bit VMs
+ * TACR: Trap ACTLR
* TSC: Trap SMC
* TSW: Trap cache operations by set/way
+ * TWE: Trap WFE
* TWI: Trap WFI
* TIDCP: Trap L2CTLR/L2ECTLR
* BSU_IS: Upgrade barriers to the inner shareable domain
- * FB: Force broadcast of all maintainance operations
+ * FB: Force broadcast of all maintenance operations
* AMO: Override CPSR.A and enable signaling with VA
* IMO: Override CPSR.I and enable signaling with VI
* FMO: Override CPSR.F and enable signaling with VF
* SWIO: Turn set/way invalidates into set/way clean+invalidate
+ * PTW: Take a stage2 fault if a stage1 walk steps in device memory
+ * TID3: Trap EL1 reads of group 3 ID registers
+ * TID1: Trap REVIDR_EL1, AIDR_EL1, and SMIDR_EL1
*/
-#define HCR_GUEST_FLAGS (HCR_TSC | HCR_TSW | HCR_TWI | HCR_VM | HCR_BSU_IS | \
- HCR_FB | HCR_TAC | HCR_AMO | HCR_IMO | HCR_FMO | \
- HCR_SWIO | HCR_TIDCP | HCR_RW)
-#define HCR_VIRT_EXCP_MASK (HCR_VA | HCR_VI | HCR_VF)
-
-/* Hyp System Control Register (SCTLR_EL2) bits */
-#define SCTLR_EL2_EE (1 << 25)
-#define SCTLR_EL2_WXN (1 << 19)
-#define SCTLR_EL2_I (1 << 12)
-#define SCTLR_EL2_SA (1 << 3)
-#define SCTLR_EL2_C (1 << 2)
-#define SCTLR_EL2_A (1 << 1)
-#define SCTLR_EL2_M 1
-#define SCTLR_EL2_FLAGS (SCTLR_EL2_M | SCTLR_EL2_A | SCTLR_EL2_C | \
- SCTLR_EL2_SA | SCTLR_EL2_I)
+#define HCR_GUEST_FLAGS (HCR_TSC | HCR_TSW | HCR_TWE | HCR_TWI | HCR_VM | \
+ HCR_BSU_IS | HCR_FB | HCR_TACR | \
+ HCR_AMO | HCR_SWIO | HCR_TIDCP | HCR_RW | HCR_TLOR | \
+ HCR_FMO | HCR_IMO | HCR_PTW | HCR_TID3 | HCR_TID1)
+#define HCR_HOST_NVHE_FLAGS (HCR_RW | HCR_API | HCR_APK | HCR_ATA)
+#define HCR_HOST_NVHE_PROTECTED_FLAGS (HCR_HOST_NVHE_FLAGS | HCR_TSC)
+#define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H | HCR_AMO | HCR_IMO | HCR_FMO)
+
+#define MPAMHCR_HOST_FLAGS 0
/* TCR_EL2 Registers bits */
-#define TCR_EL2_TBI (1 << 20)
-#define TCR_EL2_PS (7 << 16)
-#define TCR_EL2_PS_40B (2 << 16)
-#define TCR_EL2_TG0 (1 << 14)
-#define TCR_EL2_SH0 (3 << 12)
-#define TCR_EL2_ORGN0 (3 << 10)
-#define TCR_EL2_IRGN0 (3 << 8)
-#define TCR_EL2_T0SZ 0x3f
-#define TCR_EL2_MASK (TCR_EL2_TG0 | TCR_EL2_SH0 | \
- TCR_EL2_ORGN0 | TCR_EL2_IRGN0 | TCR_EL2_T0SZ)
-
-#define TCR_EL2_FLAGS (TCR_EL2_PS_40B)
+#define TCR_EL2_DS (1UL << 32)
+#define TCR_EL2_RES1 ((1U << 31) | (1 << 23))
+#define TCR_EL2_HPD (1 << 24)
+#define TCR_EL2_HA (1 << 21)
+#define TCR_EL2_TBI (1 << 20)
+#define TCR_EL2_PS_SHIFT 16
+#define TCR_EL2_PS_MASK (7 << TCR_EL2_PS_SHIFT)
+#define TCR_EL2_PS_40B (2 << TCR_EL2_PS_SHIFT)
+#define TCR_EL2_TG0_MASK TCR_TG0_MASK
+#define TCR_EL2_SH0_MASK TCR_SH0_MASK
+#define TCR_EL2_ORGN0_MASK TCR_ORGN0_MASK
+#define TCR_EL2_IRGN0_MASK TCR_IRGN0_MASK
+#define TCR_EL2_T0SZ_MASK 0x3f
+#define TCR_EL2_MASK (TCR_EL2_TG0_MASK | TCR_EL2_SH0_MASK | \
+ TCR_EL2_ORGN0_MASK | TCR_EL2_IRGN0_MASK)
/* VTCR_EL2 Registers bits */
-#define VTCR_EL2_PS_MASK (7 << 16)
-#define VTCR_EL2_PS_40B (2 << 16)
-#define VTCR_EL2_TG0_MASK (1 << 14)
-#define VTCR_EL2_TG0_4K (0 << 14)
-#define VTCR_EL2_TG0_64K (1 << 14)
-#define VTCR_EL2_SH0_MASK (3 << 12)
-#define VTCR_EL2_SH0_INNER (3 << 12)
-#define VTCR_EL2_ORGN0_MASK (3 << 10)
-#define VTCR_EL2_ORGN0_WBWA (1 << 10)
-#define VTCR_EL2_IRGN0_MASK (3 << 8)
-#define VTCR_EL2_IRGN0_WBWA (1 << 8)
-#define VTCR_EL2_SL0_MASK (3 << 6)
-#define VTCR_EL2_SL0_LVL1 (1 << 6)
+#define VTCR_EL2_DS TCR_EL2_DS
+#define VTCR_EL2_RES1 (1U << 31)
+#define VTCR_EL2_HD (1 << 22)
+#define VTCR_EL2_HA (1 << 21)
+#define VTCR_EL2_PS_SHIFT TCR_EL2_PS_SHIFT
+#define VTCR_EL2_PS_MASK TCR_EL2_PS_MASK
+#define VTCR_EL2_TG0_MASK TCR_TG0_MASK
+#define VTCR_EL2_TG0_4K TCR_TG0_4K
+#define VTCR_EL2_TG0_16K TCR_TG0_16K
+#define VTCR_EL2_TG0_64K TCR_TG0_64K
+#define VTCR_EL2_SH0_MASK TCR_SH0_MASK
+#define VTCR_EL2_SH0_INNER TCR_SH0_INNER
+#define VTCR_EL2_ORGN0_MASK TCR_ORGN0_MASK
+#define VTCR_EL2_ORGN0_WBWA TCR_ORGN0_WBWA
+#define VTCR_EL2_IRGN0_MASK TCR_IRGN0_MASK
+#define VTCR_EL2_IRGN0_WBWA TCR_IRGN0_WBWA
+#define VTCR_EL2_SL0_SHIFT 6
+#define VTCR_EL2_SL0_MASK (3 << VTCR_EL2_SL0_SHIFT)
#define VTCR_EL2_T0SZ_MASK 0x3f
-#define VTCR_EL2_T0SZ_40B 24
+#define VTCR_EL2_VS_SHIFT 19
+#define VTCR_EL2_VS_8BIT (0 << VTCR_EL2_VS_SHIFT)
+#define VTCR_EL2_VS_16BIT (1 << VTCR_EL2_VS_SHIFT)
+
+#define VTCR_EL2_T0SZ(x) TCR_T0SZ(x)
-#ifdef CONFIG_ARM64_64K_PAGES
/*
- * Stage2 translation configuration:
- * 40bits output (PS = 2)
- * 40bits input (T0SZ = 24)
- * 64kB pages (TG0 = 1)
- * 2 level page tables (SL = 1)
+ * We configure the Stage-2 page tables to always restrict the IPA space to be
+ * 40 bits wide (T0SZ = 24). Systems with a PARange smaller than 40 bits are
+ * not known to exist and will break with this configuration.
+ *
+ * The VTCR_EL2 is configured per VM and is initialised in kvm_init_stage2_mmu.
+ *
+ * Note that when using 4K pages, we concatenate two first level page tables
+ * together. With 16K pages, we concatenate 16 first level page tables.
+ *
*/
-#define VTCR_EL2_FLAGS (VTCR_EL2_PS_40B | VTCR_EL2_TG0_64K | \
- VTCR_EL2_SH0_INNER | VTCR_EL2_ORGN0_WBWA | \
- VTCR_EL2_IRGN0_WBWA | VTCR_EL2_SL0_LVL1 | \
- VTCR_EL2_T0SZ_40B)
-#define VTTBR_X (38 - VTCR_EL2_T0SZ_40B)
-#else
+
+#define VTCR_EL2_COMMON_BITS (VTCR_EL2_SH0_INNER | VTCR_EL2_ORGN0_WBWA | \
+ VTCR_EL2_IRGN0_WBWA | VTCR_EL2_RES1)
+
/*
- * Stage2 translation configuration:
- * 40bits output (PS = 2)
- * 40bits input (T0SZ = 24)
- * 4kB pages (TG0 = 0)
- * 3 level page tables (SL = 1)
+ * VTCR_EL2:SL0 indicates the entry level for Stage2 translation.
+ * Interestingly, it depends on the page size.
+ * See D.10.2.121, VTCR_EL2, in ARM DDI 0487C.a
+ *
+ * -----------------------------------------
+ * | Entry level | 4K | 16K/64K |
+ * ------------------------------------------
+ * | Level: 0 | 2 | - |
+ * ------------------------------------------
+ * | Level: 1 | 1 | 2 |
+ * ------------------------------------------
+ * | Level: 2 | 0 | 1 |
+ * ------------------------------------------
+ * | Level: 3 | - | 0 |
+ * ------------------------------------------
+ *
+ * The table roughly translates to :
+ *
+ * SL0(PAGE_SIZE, Entry_level) = TGRAN_SL0_BASE - Entry_Level
+ *
+ * Where TGRAN_SL0_BASE is a magic number depending on the page size:
+ * TGRAN_SL0_BASE(4K) = 2
+ * TGRAN_SL0_BASE(16K) = 3
+ * TGRAN_SL0_BASE(64K) = 3
+ * provided we take care of ruling out the unsupported cases and
+ * Entry_Level = 4 - Number_of_levels.
+ *
*/
-#define VTCR_EL2_FLAGS (VTCR_EL2_PS_40B | VTCR_EL2_TG0_4K | \
- VTCR_EL2_SH0_INNER | VTCR_EL2_ORGN0_WBWA | \
- VTCR_EL2_IRGN0_WBWA | VTCR_EL2_SL0_LVL1 | \
- VTCR_EL2_T0SZ_40B)
-#define VTTBR_X (37 - VTCR_EL2_T0SZ_40B)
+#ifdef CONFIG_ARM64_64K_PAGES
+
+#define VTCR_EL2_TGRAN VTCR_EL2_TG0_64K
+#define VTCR_EL2_TGRAN_SL0_BASE 3UL
+
+#elif defined(CONFIG_ARM64_16K_PAGES)
+
+#define VTCR_EL2_TGRAN VTCR_EL2_TG0_16K
+#define VTCR_EL2_TGRAN_SL0_BASE 3UL
+
+#else /* 4K */
+
+#define VTCR_EL2_TGRAN VTCR_EL2_TG0_4K
+#define VTCR_EL2_TGRAN_SL0_BASE 2UL
+
#endif
-#define VTTBR_BADDR_SHIFT (VTTBR_X - 1)
-#define VTTBR_BADDR_MASK (((1LLU << (40 - VTTBR_X)) - 1) << VTTBR_BADDR_SHIFT)
-#define VTTBR_VMID_SHIFT (48LLU)
-#define VTTBR_VMID_MASK (0xffLLU << VTTBR_VMID_SHIFT)
+#define VTCR_EL2_LVLS_TO_SL0(levels) \
+ ((VTCR_EL2_TGRAN_SL0_BASE - (4 - (levels))) << VTCR_EL2_SL0_SHIFT)
+#define VTCR_EL2_SL0_TO_LVLS(sl0) \
+ ((sl0) + 4 - VTCR_EL2_TGRAN_SL0_BASE)
+#define VTCR_EL2_LVLS(vtcr) \
+ VTCR_EL2_SL0_TO_LVLS(((vtcr) & VTCR_EL2_SL0_MASK) >> VTCR_EL2_SL0_SHIFT)
+
+#define VTCR_EL2_FLAGS (VTCR_EL2_COMMON_BITS | VTCR_EL2_TGRAN)
+#define VTCR_EL2_IPA(vtcr) (64 - ((vtcr) & VTCR_EL2_T0SZ_MASK))
+
+/*
+ * ARM VMSAv8-64 defines an algorithm for finding the translation table
+ * descriptors in section D4.2.8 in ARM DDI 0487C.a.
+ *
+ * The algorithm defines the expectations on the translation table
+ * addresses for each level, based on PAGE_SIZE, entry level
+ * and the translation table size (T0SZ). The variable "x" in the
+ * algorithm determines the alignment of a table base address at a given
+ * level and thus determines the alignment of VTTBR:BADDR for stage2
+ * page table entry level.
+ * Since the number of bits resolved at the entry level could vary
+ * depending on the T0SZ, the value of "x" is defined based on a
+ * Magic constant for a given PAGE_SIZE and Entry Level. The
+ * intermediate levels must be always aligned to the PAGE_SIZE (i.e,
+ * x = PAGE_SHIFT).
+ *
+ * The value of "x" for entry level is calculated as :
+ * x = Magic_N - T0SZ
+ *
+ * where Magic_N is an integer depending on the page size and the entry
+ * level of the page table as below:
+ *
+ * --------------------------------------------
+ * | Entry level | 4K 16K 64K |
+ * --------------------------------------------
+ * | Level: 0 (4 levels) | 28 | - | - |
+ * --------------------------------------------
+ * | Level: 1 (3 levels) | 37 | 31 | 25 |
+ * --------------------------------------------
+ * | Level: 2 (2 levels) | 46 | 42 | 38 |
+ * --------------------------------------------
+ * | Level: 3 (1 level) | - | 53 | 51 |
+ * --------------------------------------------
+ *
+ * We have a magic formula for the Magic_N below:
+ *
+ * Magic_N(PAGE_SIZE, Level) = 64 - ((PAGE_SHIFT - 3) * Number_of_levels)
+ *
+ * where Number_of_levels = (4 - Level). We are only interested in the
+ * value for Entry_Level for the stage2 page table.
+ *
+ * So, given that T0SZ = (64 - IPA_SHIFT), we can compute 'x' as follows:
+ *
+ * x = (64 - ((PAGE_SHIFT - 3) * Number_of_levels)) - (64 - IPA_SHIFT)
+ * = IPA_SHIFT - ((PAGE_SHIFT - 3) * Number of levels)
+ *
+ * Here is one way to explain the Magic Formula:
+ *
+ * x = log2(Size_of_Entry_Level_Table)
+ *
+ * Since, we can resolve (PAGE_SHIFT - 3) bits at each level, and another
+ * PAGE_SHIFT bits in the PTE, we have :
+ *
+ * Bits_Entry_level = IPA_SHIFT - ((PAGE_SHIFT - 3) * (n - 1) + PAGE_SHIFT)
+ * = IPA_SHIFT - (PAGE_SHIFT - 3) * n - 3
+ * where n = number of levels, and since each pointer is 8bytes, we have:
+ *
+ * x = Bits_Entry_Level + 3
+ * = IPA_SHIFT - (PAGE_SHIFT - 3) * n
+ *
+ * The only constraint here is that, we have to find the number of page table
+ * levels for a given IPA size (which we do, see stage2_pt_levels())
+ */
+#define ARM64_VTTBR_X(ipa, levels) ((ipa) - ((levels) * (PAGE_SHIFT - 3)))
+
+#define VTTBR_CNP_BIT (UL(1))
+#define VTTBR_VMID_SHIFT (UL(48))
+#define VTTBR_VMID_MASK(size) (_AT(u64, (1 << size) - 1) << VTTBR_VMID_SHIFT)
/* Hyp System Trap Register */
-#define HSTR_EL2_TTEE (1 << 16)
#define HSTR_EL2_T(x) (1 << x)
+/* Hyp Coprocessor Trap Register Shifts */
+#define CPTR_EL2_TFP_SHIFT 10
+
/* Hyp Coprocessor Trap Register */
-#define CPTR_EL2_TCPAC (1 << 31)
+#define CPTR_EL2_TCPAC (1U << 31)
+#define CPTR_EL2_TAM (1 << 30)
#define CPTR_EL2_TTA (1 << 20)
-#define CPTR_EL2_TFP (1 << 10)
-
-/* Hyp Debug Configuration Register bits */
-#define MDCR_EL2_TDRA (1 << 11)
-#define MDCR_EL2_TDOSA (1 << 10)
-#define MDCR_EL2_TDA (1 << 9)
-#define MDCR_EL2_TDE (1 << 8)
-#define MDCR_EL2_HPME (1 << 7)
-#define MDCR_EL2_TPM (1 << 6)
-#define MDCR_EL2_TPMCR (1 << 5)
-#define MDCR_EL2_HPMN_MASK (0x1F)
-
-/* Exception Syndrome Register (ESR) bits */
-#define ESR_EL2_EC_SHIFT (26)
-#define ESR_EL2_EC (0x3fU << ESR_EL2_EC_SHIFT)
-#define ESR_EL2_IL (1U << 25)
-#define ESR_EL2_ISS (ESR_EL2_IL - 1)
-#define ESR_EL2_ISV_SHIFT (24)
-#define ESR_EL2_ISV (1U << ESR_EL2_ISV_SHIFT)
-#define ESR_EL2_SAS_SHIFT (22)
-#define ESR_EL2_SAS (3U << ESR_EL2_SAS_SHIFT)
-#define ESR_EL2_SSE (1 << 21)
-#define ESR_EL2_SRT_SHIFT (16)
-#define ESR_EL2_SRT_MASK (0x1f << ESR_EL2_SRT_SHIFT)
-#define ESR_EL2_SF (1 << 15)
-#define ESR_EL2_AR (1 << 14)
-#define ESR_EL2_EA (1 << 9)
-#define ESR_EL2_CM (1 << 8)
-#define ESR_EL2_S1PTW (1 << 7)
-#define ESR_EL2_WNR (1 << 6)
-#define ESR_EL2_FSC (0x3f)
-#define ESR_EL2_FSC_TYPE (0x3c)
-
-#define ESR_EL2_CV_SHIFT (24)
-#define ESR_EL2_CV (1U << ESR_EL2_CV_SHIFT)
-#define ESR_EL2_COND_SHIFT (20)
-#define ESR_EL2_COND (0xfU << ESR_EL2_COND_SHIFT)
-
-
-#define FSC_FAULT (0x04)
-#define FSC_PERM (0x0c)
+#define CPTR_EL2_TSM (1 << 12)
+#define CPTR_EL2_TFP (1 << CPTR_EL2_TFP_SHIFT)
+#define CPTR_EL2_TZ (1 << 8)
+#define CPTR_NVHE_EL2_RES1 (BIT(13) | BIT(9) | GENMASK(7, 0))
+#define CPTR_NVHE_EL2_RES0 (GENMASK(63, 32) | \
+ GENMASK(29, 21) | \
+ GENMASK(19, 14) | \
+ BIT(11))
+
+#define CPTR_VHE_EL2_RES0 (GENMASK(63, 32) | \
+ GENMASK(27, 26) | \
+ GENMASK(23, 22) | \
+ GENMASK(19, 18) | \
+ GENMASK(15, 0))
+
+/*
+ * Polarity masks for HCRX_EL2, limited to the bits that we know about
+ * at this point in time. It doesn't mean that we actually *handle*
+ * them, but that at least those that are not advertised to a guest
+ * will be RES0 for that guest.
+ */
+#define __HCRX_EL2_MASK (BIT_ULL(6))
+#define __HCRX_EL2_nMASK (GENMASK_ULL(24, 14) | \
+ GENMASK_ULL(11, 7) | \
+ GENMASK_ULL(5, 0))
+#define __HCRX_EL2_RES0 ~(__HCRX_EL2_nMASK | __HCRX_EL2_MASK)
+#define __HCRX_EL2_RES1 ~(__HCRX_EL2_nMASK | \
+ __HCRX_EL2_MASK | \
+ __HCRX_EL2_RES0)
/* Hyp Prefetch Fault Address Register (HPFAR/HDFAR) */
-#define HPFAR_MASK (~0xFUL)
-
-#define ESR_EL2_EC_UNKNOWN (0x00)
-#define ESR_EL2_EC_WFI (0x01)
-#define ESR_EL2_EC_CP15_32 (0x03)
-#define ESR_EL2_EC_CP15_64 (0x04)
-#define ESR_EL2_EC_CP14_MR (0x05)
-#define ESR_EL2_EC_CP14_LS (0x06)
-#define ESR_EL2_EC_FP_ASIMD (0x07)
-#define ESR_EL2_EC_CP10_ID (0x08)
-#define ESR_EL2_EC_CP14_64 (0x0C)
-#define ESR_EL2_EC_ILL_ISS (0x0E)
-#define ESR_EL2_EC_SVC32 (0x11)
-#define ESR_EL2_EC_HVC32 (0x12)
-#define ESR_EL2_EC_SMC32 (0x13)
-#define ESR_EL2_EC_SVC64 (0x15)
-#define ESR_EL2_EC_HVC64 (0x16)
-#define ESR_EL2_EC_SMC64 (0x17)
-#define ESR_EL2_EC_SYS64 (0x18)
-#define ESR_EL2_EC_IABT (0x20)
-#define ESR_EL2_EC_IABT_HYP (0x21)
-#define ESR_EL2_EC_PC_ALIGN (0x22)
-#define ESR_EL2_EC_DABT (0x24)
-#define ESR_EL2_EC_DABT_HYP (0x25)
-#define ESR_EL2_EC_SP_ALIGN (0x26)
-#define ESR_EL2_EC_FP_EXC32 (0x28)
-#define ESR_EL2_EC_FP_EXC64 (0x2C)
-#define ESR_EL2_EC_SERRROR (0x2F)
-#define ESR_EL2_EC_BREAKPT (0x30)
-#define ESR_EL2_EC_BREAKPT_HYP (0x31)
-#define ESR_EL2_EC_SOFTSTP (0x32)
-#define ESR_EL2_EC_SOFTSTP_HYP (0x33)
-#define ESR_EL2_EC_WATCHPT (0x34)
-#define ESR_EL2_EC_WATCHPT_HYP (0x35)
-#define ESR_EL2_EC_BKPT32 (0x38)
-#define ESR_EL2_EC_VECTOR32 (0x3A)
-#define ESR_EL2_EC_BRK64 (0x3C)
-
-#define ESR_EL2_EC_xABT_xFSR_EXTABT 0x10
+#define HPFAR_MASK (~UL(0xf))
+/*
+ * We have
+ * PAR [PA_Shift - 1 : 12] = PA [PA_Shift - 1 : 12]
+ * HPFAR [PA_Shift - 9 : 4] = FIPA [PA_Shift - 1 : 12]
+ *
+ * Always assume 52 bit PA since at this point, we don't know how many PA bits
+ * the page table has been set up for. This should be safe since unused address
+ * bits in PAR are res0.
+ */
+#define PAR_TO_HPFAR(par) \
+ (((par) & GENMASK_ULL(52 - 1, 12)) >> 8)
+
+#define ECN(x) { ESR_ELx_EC_##x, #x }
+
+#define kvm_arm_exception_class \
+ ECN(UNKNOWN), ECN(WFx), ECN(CP15_32), ECN(CP15_64), ECN(CP14_MR), \
+ ECN(CP14_LS), ECN(FP_ASIMD), ECN(CP10_ID), ECN(PAC), ECN(CP14_64), \
+ ECN(SVC64), ECN(HVC64), ECN(SMC64), ECN(SYS64), ECN(SVE), \
+ ECN(IMP_DEF), ECN(IABT_LOW), ECN(IABT_CUR), \
+ ECN(PC_ALIGN), ECN(DABT_LOW), ECN(DABT_CUR), \
+ ECN(SP_ALIGN), ECN(FP_EXC32), ECN(FP_EXC64), ECN(SERROR), \
+ ECN(BREAKPT_LOW), ECN(BREAKPT_CUR), ECN(SOFTSTP_LOW), \
+ ECN(SOFTSTP_CUR), ECN(WATCHPT_LOW), ECN(WATCHPT_CUR), \
+ ECN(BKPT32), ECN(VECTOR32), ECN(BRK64), ECN(ERET)
+
+#define kvm_mode_names \
+ { PSR_MODE_EL0t, "EL0t" }, \
+ { PSR_MODE_EL1t, "EL1t" }, \
+ { PSR_MODE_EL1h, "EL1h" }, \
+ { PSR_MODE_EL2t, "EL2t" }, \
+ { PSR_MODE_EL2h, "EL2h" }, \
+ { PSR_MODE_EL3t, "EL3t" }, \
+ { PSR_MODE_EL3h, "EL3h" }, \
+ { PSR_AA32_MODE_USR, "32-bit USR" }, \
+ { PSR_AA32_MODE_FIQ, "32-bit FIQ" }, \
+ { PSR_AA32_MODE_IRQ, "32-bit IRQ" }, \
+ { PSR_AA32_MODE_SVC, "32-bit SVC" }, \
+ { PSR_AA32_MODE_ABT, "32-bit ABT" }, \
+ { PSR_AA32_MODE_HYP, "32-bit HYP" }, \
+ { PSR_AA32_MODE_UND, "32-bit UND" }, \
+ { PSR_AA32_MODE_SYS, "32-bit SYS" }
#endif /* __ARM64_KVM_ARM_H__ */
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index c92de4163eba..a1ad12c72ebf 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -1,104 +1,385 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (C) 2012,2013 - ARM Ltd
* Author: Marc Zyngier <marc.zyngier@arm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __ARM_KVM_ASM_H__
#define __ARM_KVM_ASM_H__
+#include <asm/hyp_image.h>
+#include <asm/insn.h>
+#include <asm/virt.h>
+#include <asm/sysreg.h>
+
+#define ARM_EXIT_WITH_SERROR_BIT 31
+#define ARM_EXCEPTION_CODE(x) ((x) & ~(1U << ARM_EXIT_WITH_SERROR_BIT))
+#define ARM_EXCEPTION_IS_TRAP(x) (ARM_EXCEPTION_CODE((x)) == ARM_EXCEPTION_TRAP)
+#define ARM_SERROR_PENDING(x) !!((x) & (1U << ARM_EXIT_WITH_SERROR_BIT))
+
+#define ARM_EXCEPTION_IRQ 0
+#define ARM_EXCEPTION_EL1_SERROR 1
+#define ARM_EXCEPTION_TRAP 2
+#define ARM_EXCEPTION_IL 3
+/* The hyp-stub will return this for any kvm_call_hyp() call */
+#define ARM_EXCEPTION_HYP_GONE HVC_STUB_ERR
+
+#define kvm_arm_exception_type \
+ {ARM_EXCEPTION_IRQ, "IRQ" }, \
+ {ARM_EXCEPTION_EL1_SERROR, "SERROR" }, \
+ {ARM_EXCEPTION_TRAP, "TRAP" }, \
+ {ARM_EXCEPTION_HYP_GONE, "HYP_GONE" }
+
/*
- * 0 is reserved as an invalid value.
- * Order *must* be kept in sync with the hyp switch code.
+ * Size of the HYP vectors preamble. kvm_patch_vector_branch() generates code
+ * that jumps over this.
*/
-#define MPIDR_EL1 1 /* MultiProcessor Affinity Register */
-#define CSSELR_EL1 2 /* Cache Size Selection Register */
-#define SCTLR_EL1 3 /* System Control Register */
-#define ACTLR_EL1 4 /* Auxilliary Control Register */
-#define CPACR_EL1 5 /* Coprocessor Access Control */
-#define TTBR0_EL1 6 /* Translation Table Base Register 0 */
-#define TTBR1_EL1 7 /* Translation Table Base Register 1 */
-#define TCR_EL1 8 /* Translation Control Register */
-#define ESR_EL1 9 /* Exception Syndrome Register */
-#define AFSR0_EL1 10 /* Auxilary Fault Status Register 0 */
-#define AFSR1_EL1 11 /* Auxilary Fault Status Register 1 */
-#define FAR_EL1 12 /* Fault Address Register */
-#define MAIR_EL1 13 /* Memory Attribute Indirection Register */
-#define VBAR_EL1 14 /* Vector Base Address Register */
-#define CONTEXTIDR_EL1 15 /* Context ID Register */
-#define TPIDR_EL0 16 /* Thread ID, User R/W */
-#define TPIDRRO_EL0 17 /* Thread ID, User R/O */
-#define TPIDR_EL1 18 /* Thread ID, Privileged */
-#define AMAIR_EL1 19 /* Aux Memory Attribute Indirection Register */
-#define CNTKCTL_EL1 20 /* Timer Control Register (EL1) */
-/* 32bit specific registers. Keep them at the end of the range */
-#define DACR32_EL2 21 /* Domain Access Control Register */
-#define IFSR32_EL2 22 /* Instruction Fault Status Register */
-#define FPEXC32_EL2 23 /* Floating-Point Exception Control Register */
-#define DBGVCR32_EL2 24 /* Debug Vector Catch Register */
-#define TEECR32_EL1 25 /* ThumbEE Configuration Register */
-#define TEEHBR32_EL1 26 /* ThumbEE Handler Base Register */
-#define NR_SYS_REGS 27
-
-/* 32bit mapping */
-#define c0_MPIDR (MPIDR_EL1 * 2) /* MultiProcessor ID Register */
-#define c0_CSSELR (CSSELR_EL1 * 2)/* Cache Size Selection Register */
-#define c1_SCTLR (SCTLR_EL1 * 2) /* System Control Register */
-#define c1_ACTLR (ACTLR_EL1 * 2) /* Auxiliary Control Register */
-#define c1_CPACR (CPACR_EL1 * 2) /* Coprocessor Access Control */
-#define c2_TTBR0 (TTBR0_EL1 * 2) /* Translation Table Base Register 0 */
-#define c2_TTBR0_high (c2_TTBR0 + 1) /* TTBR0 top 32 bits */
-#define c2_TTBR1 (TTBR1_EL1 * 2) /* Translation Table Base Register 1 */
-#define c2_TTBR1_high (c2_TTBR1 + 1) /* TTBR1 top 32 bits */
-#define c2_TTBCR (TCR_EL1 * 2) /* Translation Table Base Control R. */
-#define c3_DACR (DACR32_EL2 * 2)/* Domain Access Control Register */
-#define c5_DFSR (ESR_EL1 * 2) /* Data Fault Status Register */
-#define c5_IFSR (IFSR32_EL2 * 2)/* Instruction Fault Status Register */
-#define c5_ADFSR (AFSR0_EL1 * 2) /* Auxiliary Data Fault Status R */
-#define c5_AIFSR (AFSR1_EL1 * 2) /* Auxiliary Instr Fault Status R */
-#define c6_DFAR (FAR_EL1 * 2) /* Data Fault Address Register */
-#define c6_IFAR (c6_DFAR + 1) /* Instruction Fault Address Register */
-#define c10_PRRR (MAIR_EL1 * 2) /* Primary Region Remap Register */
-#define c10_NMRR (c10_PRRR + 1) /* Normal Memory Remap Register */
-#define c12_VBAR (VBAR_EL1 * 2) /* Vector Base Address Register */
-#define c13_CID (CONTEXTIDR_EL1 * 2) /* Context ID Register */
-#define c13_TID_URW (TPIDR_EL0 * 2) /* Thread ID, User R/W */
-#define c13_TID_URO (TPIDRRO_EL0 * 2)/* Thread ID, User R/O */
-#define c13_TID_PRIV (TPIDR_EL1 * 2) /* Thread ID, Privileged */
-#define c10_AMAIR (AMAIR_EL1 * 2) /* Aux Memory Attr Indirection Reg */
-#define c14_CNTKCTL (CNTKCTL_EL1 * 2) /* Timer Control Register (PL1) */
-#define NR_CP15_REGS (NR_SYS_REGS * 2)
+#define KVM_VECTOR_PREAMBLE (2 * AARCH64_INSN_SIZE)
-#define ARM_EXCEPTION_IRQ 0
-#define ARM_EXCEPTION_TRAP 1
+#define KVM_HOST_SMCCC_ID(id) \
+ ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \
+ ARM_SMCCC_SMC_64, \
+ ARM_SMCCC_OWNER_VENDOR_HYP, \
+ (id))
+
+#define KVM_HOST_SMCCC_FUNC(name) KVM_HOST_SMCCC_ID(__KVM_HOST_SMCCC_FUNC_##name)
+
+#define __KVM_HOST_SMCCC_FUNC___kvm_hyp_init 0
+
+#ifndef __ASSEMBLER__
+
+#include <linux/mm.h>
+
+enum __kvm_host_smccc_func {
+ /* Hypercalls available only prior to pKVM finalisation */
+ /* __KVM_HOST_SMCCC_FUNC___kvm_hyp_init */
+ __KVM_HOST_SMCCC_FUNC___pkvm_init = __KVM_HOST_SMCCC_FUNC___kvm_hyp_init + 1,
+ __KVM_HOST_SMCCC_FUNC___pkvm_create_private_mapping,
+ __KVM_HOST_SMCCC_FUNC___pkvm_cpu_set_vector,
+ __KVM_HOST_SMCCC_FUNC___kvm_enable_ssbs,
+ __KVM_HOST_SMCCC_FUNC___vgic_v3_init_lrs,
+ __KVM_HOST_SMCCC_FUNC___vgic_v3_get_gic_config,
+ __KVM_HOST_SMCCC_FUNC___pkvm_prot_finalize,
+
+ /* Hypercalls available after pKVM finalisation */
+ __KVM_HOST_SMCCC_FUNC___pkvm_host_share_hyp,
+ __KVM_HOST_SMCCC_FUNC___pkvm_host_unshare_hyp,
+ __KVM_HOST_SMCCC_FUNC___pkvm_host_share_guest,
+ __KVM_HOST_SMCCC_FUNC___pkvm_host_unshare_guest,
+ __KVM_HOST_SMCCC_FUNC___pkvm_host_relax_perms_guest,
+ __KVM_HOST_SMCCC_FUNC___pkvm_host_wrprotect_guest,
+ __KVM_HOST_SMCCC_FUNC___pkvm_host_test_clear_young_guest,
+ __KVM_HOST_SMCCC_FUNC___pkvm_host_mkyoung_guest,
+ __KVM_HOST_SMCCC_FUNC___kvm_adjust_pc,
+ __KVM_HOST_SMCCC_FUNC___kvm_vcpu_run,
+ __KVM_HOST_SMCCC_FUNC___kvm_flush_vm_context,
+ __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid_ipa,
+ __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid_ipa_nsh,
+ __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid,
+ __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid_range,
+ __KVM_HOST_SMCCC_FUNC___kvm_flush_cpu_context,
+ __KVM_HOST_SMCCC_FUNC___kvm_timer_set_cntvoff,
+ __KVM_HOST_SMCCC_FUNC___vgic_v3_save_aprs,
+ __KVM_HOST_SMCCC_FUNC___vgic_v3_restore_vmcr_aprs,
+ __KVM_HOST_SMCCC_FUNC___pkvm_reserve_vm,
+ __KVM_HOST_SMCCC_FUNC___pkvm_unreserve_vm,
+ __KVM_HOST_SMCCC_FUNC___pkvm_init_vm,
+ __KVM_HOST_SMCCC_FUNC___pkvm_init_vcpu,
+ __KVM_HOST_SMCCC_FUNC___pkvm_teardown_vm,
+ __KVM_HOST_SMCCC_FUNC___pkvm_vcpu_load,
+ __KVM_HOST_SMCCC_FUNC___pkvm_vcpu_put,
+ __KVM_HOST_SMCCC_FUNC___pkvm_tlb_flush_vmid,
+};
+
+#define DECLARE_KVM_VHE_SYM(sym) extern char sym[]
+#define DECLARE_KVM_NVHE_SYM(sym) extern char kvm_nvhe_sym(sym)[]
+
+/*
+ * Define a pair of symbols sharing the same name but one defined in
+ * VHE and the other in nVHE hyp implementations.
+ */
+#define DECLARE_KVM_HYP_SYM(sym) \
+ DECLARE_KVM_VHE_SYM(sym); \
+ DECLARE_KVM_NVHE_SYM(sym)
+
+#define DECLARE_KVM_VHE_PER_CPU(type, sym) \
+ DECLARE_PER_CPU(type, sym)
+#define DECLARE_KVM_NVHE_PER_CPU(type, sym) \
+ DECLARE_PER_CPU(type, kvm_nvhe_sym(sym))
+
+#define DECLARE_KVM_HYP_PER_CPU(type, sym) \
+ DECLARE_KVM_VHE_PER_CPU(type, sym); \
+ DECLARE_KVM_NVHE_PER_CPU(type, sym)
+
+/*
+ * Compute pointer to a symbol defined in nVHE percpu region.
+ * Returns NULL if percpu memory has not been allocated yet.
+ */
+#define this_cpu_ptr_nvhe_sym(sym) per_cpu_ptr_nvhe_sym(sym, smp_processor_id())
+#define per_cpu_ptr_nvhe_sym(sym, cpu) \
+ ({ \
+ unsigned long base, off; \
+ base = kvm_nvhe_sym(kvm_arm_hyp_percpu_base)[cpu]; \
+ off = (unsigned long)&CHOOSE_NVHE_SYM(sym) - \
+ (unsigned long)&CHOOSE_NVHE_SYM(__per_cpu_start); \
+ base ? (typeof(CHOOSE_NVHE_SYM(sym))*)(base + off) : NULL; \
+ })
+
+#if defined(__KVM_NVHE_HYPERVISOR__)
+
+#define CHOOSE_NVHE_SYM(sym) sym
+#define CHOOSE_HYP_SYM(sym) CHOOSE_NVHE_SYM(sym)
+
+/* The nVHE hypervisor shouldn't even try to access VHE symbols */
+extern void *__nvhe_undefined_symbol;
+#define CHOOSE_VHE_SYM(sym) __nvhe_undefined_symbol
+#define this_cpu_ptr_hyp_sym(sym) (&__nvhe_undefined_symbol)
+#define per_cpu_ptr_hyp_sym(sym, cpu) (&__nvhe_undefined_symbol)
+
+#elif defined(__KVM_VHE_HYPERVISOR__)
+
+#define CHOOSE_VHE_SYM(sym) sym
+#define CHOOSE_HYP_SYM(sym) CHOOSE_VHE_SYM(sym)
+
+/* The VHE hypervisor shouldn't even try to access nVHE symbols */
+extern void *__vhe_undefined_symbol;
+#define CHOOSE_NVHE_SYM(sym) __vhe_undefined_symbol
+#define this_cpu_ptr_hyp_sym(sym) (&__vhe_undefined_symbol)
+#define per_cpu_ptr_hyp_sym(sym, cpu) (&__vhe_undefined_symbol)
+
+#else
+
+/*
+ * BIG FAT WARNINGS:
+ *
+ * - Don't be tempted to change the following is_kernel_in_hyp_mode()
+ * to has_vhe(). has_vhe() is implemented as a *final* capability,
+ * while this is used early at boot time, when the capabilities are
+ * not final yet....
+ *
+ * - Don't let the nVHE hypervisor have access to this, as it will
+ * pick the *wrong* symbol (yes, it runs at EL2...).
+ */
+#define CHOOSE_HYP_SYM(sym) (is_kernel_in_hyp_mode() \
+ ? CHOOSE_VHE_SYM(sym) \
+ : CHOOSE_NVHE_SYM(sym))
+
+#define this_cpu_ptr_hyp_sym(sym) (is_kernel_in_hyp_mode() \
+ ? this_cpu_ptr(&sym) \
+ : this_cpu_ptr_nvhe_sym(sym))
+
+#define per_cpu_ptr_hyp_sym(sym, cpu) (is_kernel_in_hyp_mode() \
+ ? per_cpu_ptr(&sym, cpu) \
+ : per_cpu_ptr_nvhe_sym(sym, cpu))
+
+#define CHOOSE_VHE_SYM(sym) sym
+#define CHOOSE_NVHE_SYM(sym) kvm_nvhe_sym(sym)
+
+#endif
+
+struct kvm_nvhe_init_params {
+ unsigned long mair_el2;
+ unsigned long tcr_el2;
+ unsigned long tpidr_el2;
+ unsigned long stack_hyp_va;
+ unsigned long stack_pa;
+ phys_addr_t pgd_pa;
+ unsigned long hcr_el2;
+ unsigned long vttbr;
+ unsigned long vtcr;
+ unsigned long tmp;
+};
+
+/*
+ * Used by the host in EL1 to dump the nVHE hypervisor backtrace on
+ * hyp_panic() in non-protected mode.
+ *
+ * @stack_base: hyp VA of the hyp_stack base.
+ * @overflow_stack_base: hyp VA of the hyp_overflow_stack base.
+ * @fp: hyp FP where the backtrace begins.
+ * @pc: hyp PC where the backtrace begins.
+ */
+struct kvm_nvhe_stacktrace_info {
+ unsigned long stack_base;
+ unsigned long overflow_stack_base;
+ unsigned long fp;
+ unsigned long pc;
+};
+
+/* Translate a kernel address @ptr into its equivalent linear mapping */
+#define kvm_ksym_ref(ptr) \
+ ({ \
+ void *val = (ptr); \
+ if (!is_kernel_in_hyp_mode()) \
+ val = lm_alias((ptr)); \
+ val; \
+ })
+#define kvm_ksym_ref_nvhe(sym) kvm_ksym_ref(kvm_nvhe_sym(sym))
-#ifndef __ASSEMBLY__
struct kvm;
struct kvm_vcpu;
+struct kvm_s2_mmu;
-extern char __kvm_hyp_init[];
-extern char __kvm_hyp_init_end[];
+DECLARE_KVM_NVHE_SYM(__kvm_hyp_init);
+DECLARE_KVM_HYP_SYM(__kvm_hyp_vector);
+#define __kvm_hyp_init CHOOSE_NVHE_SYM(__kvm_hyp_init)
+#define __kvm_hyp_vector CHOOSE_HYP_SYM(__kvm_hyp_vector)
-extern char __kvm_hyp_vector[];
+extern unsigned long kvm_nvhe_sym(kvm_arm_hyp_percpu_base)[];
+DECLARE_KVM_NVHE_SYM(__per_cpu_start);
+DECLARE_KVM_NVHE_SYM(__per_cpu_end);
-extern char __kvm_hyp_code_start[];
-extern char __kvm_hyp_code_end[];
+DECLARE_KVM_HYP_SYM(__bp_harden_hyp_vecs);
+#define __bp_harden_hyp_vecs CHOOSE_HYP_SYM(__bp_harden_hyp_vecs)
extern void __kvm_flush_vm_context(void);
-extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa);
+extern void __kvm_flush_cpu_context(struct kvm_s2_mmu *mmu);
+extern void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, phys_addr_t ipa,
+ int level);
+extern void __kvm_tlb_flush_vmid_ipa_nsh(struct kvm_s2_mmu *mmu,
+ phys_addr_t ipa,
+ int level);
+extern void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
+ phys_addr_t start, unsigned long pages);
+extern void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu);
+
+extern int __kvm_tlbi_s1e2(struct kvm_s2_mmu *mmu, u64 va, u64 sys_encoding);
+
+extern void __kvm_timer_set_cntvoff(u64 cntvoff);
+extern int __kvm_at_s1e01(struct kvm_vcpu *vcpu, u32 op, u64 vaddr);
+extern int __kvm_at_s1e2(struct kvm_vcpu *vcpu, u32 op, u64 vaddr);
+extern int __kvm_at_s12(struct kvm_vcpu *vcpu, u32 op, u64 vaddr);
extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
+
+extern void __kvm_adjust_pc(struct kvm_vcpu *vcpu);
+
+extern u64 __vgic_v3_get_gic_config(void);
+extern void __vgic_v3_init_lrs(void);
+
+#define __KVM_EXTABLE(from, to) \
+ " .pushsection __kvm_ex_table, \"a\"\n" \
+ " .align 3\n" \
+ " .long (" #from " - .), (" #to " - .)\n" \
+ " .popsection\n"
+
+
+#define __kvm_at(at_op, addr) \
+( { \
+ int __kvm_at_err = 0; \
+ u64 spsr, elr; \
+ asm volatile( \
+ " mrs %1, spsr_el2\n" \
+ " mrs %2, elr_el2\n" \
+ "1: " __msr_s(at_op, "%3") "\n" \
+ " isb\n" \
+ " b 9f\n" \
+ "2: msr spsr_el2, %1\n" \
+ " msr elr_el2, %2\n" \
+ " mov %w0, %4\n" \
+ "9:\n" \
+ __KVM_EXTABLE(1b, 2b) \
+ : "+r" (__kvm_at_err), "=&r" (spsr), "=&r" (elr) \
+ : "r" (addr), "i" (-EFAULT)); \
+ __kvm_at_err; \
+} )
+
+void __noreturn hyp_panic(void);
+asmlinkage void kvm_unexpected_el2_exception(void);
+asmlinkage void __noreturn hyp_panic(void);
+asmlinkage void __noreturn hyp_panic_bad_stack(void);
+asmlinkage void kvm_unexpected_el2_exception(void);
+struct kvm_cpu_context;
+void handle_trap(struct kvm_cpu_context *host_ctxt);
+asmlinkage void __noreturn __kvm_host_psci_cpu_entry(bool is_cpu_on);
+void __noreturn __pkvm_init_finalise(void);
+void kvm_nvhe_prepare_backtrace(unsigned long fp, unsigned long pc);
+void kvm_patch_vector_branch(struct alt_instr *alt,
+ __le32 *origptr, __le32 *updptr, int nr_inst);
+void kvm_get_kimage_voffset(struct alt_instr *alt,
+ __le32 *origptr, __le32 *updptr, int nr_inst);
+void kvm_compute_final_ctr_el0(struct alt_instr *alt,
+ __le32 *origptr, __le32 *updptr, int nr_inst);
+void __noreturn __cold nvhe_hyp_panic_handler(u64 esr, u64 spsr, u64 elr_virt,
+ u64 elr_phys, u64 par, uintptr_t vcpu, u64 far, u64 hpfar);
+
+#else /* __ASSEMBLER__ */
+
+.macro get_host_ctxt reg, tmp
+ adr_this_cpu \reg, kvm_host_data, \tmp
+ add \reg, \reg, #HOST_DATA_CONTEXT
+.endm
+
+.macro get_vcpu_ptr vcpu, ctxt
+ get_host_ctxt \ctxt, \vcpu
+ ldr \vcpu, [\ctxt, #HOST_CONTEXT_VCPU]
+.endm
+
+.macro get_loaded_vcpu vcpu, ctxt
+ adr_this_cpu \ctxt, kvm_hyp_ctxt, \vcpu
+ ldr \vcpu, [\ctxt, #HOST_CONTEXT_VCPU]
+.endm
+
+.macro set_loaded_vcpu vcpu, ctxt, tmp
+ adr_this_cpu \ctxt, kvm_hyp_ctxt, \tmp
+ str \vcpu, [\ctxt, #HOST_CONTEXT_VCPU]
+.endm
+
+/*
+ * KVM extable for unexpected exceptions.
+ * Create a struct kvm_exception_table_entry output to a section that can be
+ * mapped by EL2. The table is not sorted.
+ *
+ * The caller must ensure:
+ * x18 has the hypervisor value to allow any Shadow-Call-Stack instrumented
+ * code to write to it, and that SPSR_EL2 and ELR_EL2 are restored by the fixup.
+ */
+.macro _kvm_extable, from, to
+ .pushsection __kvm_ex_table, "a"
+ .align 3
+ .long (\from - .), (\to - .)
+ .popsection
+.endm
+
+#define CPU_XREG_OFFSET(x) (CPU_USER_PT_REGS + 8*x)
+#define CPU_LR_OFFSET CPU_XREG_OFFSET(30)
+#define CPU_SP_EL0_OFFSET (CPU_LR_OFFSET + 8)
+
+/*
+ * We treat x18 as callee-saved as the host may use it as a platform
+ * register (e.g. for shadow call stack).
+ */
+.macro save_callee_saved_regs ctxt
+ str x18, [\ctxt, #CPU_XREG_OFFSET(18)]
+ stp x19, x20, [\ctxt, #CPU_XREG_OFFSET(19)]
+ stp x21, x22, [\ctxt, #CPU_XREG_OFFSET(21)]
+ stp x23, x24, [\ctxt, #CPU_XREG_OFFSET(23)]
+ stp x25, x26, [\ctxt, #CPU_XREG_OFFSET(25)]
+ stp x27, x28, [\ctxt, #CPU_XREG_OFFSET(27)]
+ stp x29, lr, [\ctxt, #CPU_XREG_OFFSET(29)]
+.endm
+
+.macro restore_callee_saved_regs ctxt
+ // We require \ctxt is not x18-x28
+ ldr x18, [\ctxt, #CPU_XREG_OFFSET(18)]
+ ldp x19, x20, [\ctxt, #CPU_XREG_OFFSET(19)]
+ ldp x21, x22, [\ctxt, #CPU_XREG_OFFSET(21)]
+ ldp x23, x24, [\ctxt, #CPU_XREG_OFFSET(23)]
+ ldp x25, x26, [\ctxt, #CPU_XREG_OFFSET(25)]
+ ldp x27, x28, [\ctxt, #CPU_XREG_OFFSET(27)]
+ ldp x29, lr, [\ctxt, #CPU_XREG_OFFSET(29)]
+.endm
+
+.macro save_sp_el0 ctxt, tmp
+ mrs \tmp, sp_el0
+ str \tmp, [\ctxt, #CPU_SP_EL0_OFFSET]
+.endm
+
+.macro restore_sp_el0 ctxt, tmp
+ ldr \tmp, [\ctxt, #CPU_SP_EL0_OFFSET]
+ msr sp_el0, \tmp
+.endm
+
#endif
#endif /* __ARM_KVM_ASM_H__ */
diff --git a/arch/arm64/include/asm/kvm_coproc.h b/arch/arm64/include/asm/kvm_coproc.h
deleted file mode 100644
index 9a59301cd014..000000000000
--- a/arch/arm64/include/asm/kvm_coproc.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Copyright (C) 2012,2013 - ARM Ltd
- * Author: Marc Zyngier <marc.zyngier@arm.com>
- *
- * Derived from arch/arm/include/asm/kvm_coproc.h
- * Copyright (C) 2012 Rusty Russell IBM Corporation
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef __ARM64_KVM_COPROC_H__
-#define __ARM64_KVM_COPROC_H__
-
-#include <linux/kvm_host.h>
-
-void kvm_reset_sys_regs(struct kvm_vcpu *vcpu);
-
-struct kvm_sys_reg_table {
- const struct sys_reg_desc *table;
- size_t num;
-};
-
-struct kvm_sys_reg_target_table {
- struct kvm_sys_reg_table table64;
- struct kvm_sys_reg_table table32;
-};
-
-void kvm_register_target_sys_reg_table(unsigned int target,
- struct kvm_sys_reg_target_table *table);
-
-int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu, struct kvm_run *run);
-int kvm_handle_cp14_access(struct kvm_vcpu *vcpu, struct kvm_run *run);
-int kvm_handle_cp15_32(struct kvm_vcpu *vcpu, struct kvm_run *run);
-int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run);
-int kvm_handle_sys_reg(struct kvm_vcpu *vcpu, struct kvm_run *run);
-
-#define kvm_coproc_table_init kvm_sys_reg_table_init
-void kvm_sys_reg_table_init(void);
-
-struct kvm_one_reg;
-int kvm_arm_copy_sys_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices);
-int kvm_arm_sys_reg_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *);
-int kvm_arm_sys_reg_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *);
-unsigned long kvm_arm_num_sys_reg_descs(struct kvm_vcpu *vcpu);
-
-#endif /* __ARM64_KVM_COPROC_H__ */
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index eec073875218..c9eab316398e 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (C) 2012,2013 - ARM Ltd
* Author: Marc Zyngier <marc.zyngier@arm.com>
@@ -5,60 +6,161 @@
* Derived from arch/arm/include/kvm_emulate.h
* Copyright (C) 2012 - Virtual Open Systems and Columbia University
* Author: Christoffer Dall <c.dall@virtualopensystems.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __ARM64_KVM_EMULATE_H__
#define __ARM64_KVM_EMULATE_H__
+#include <linux/bitfield.h>
#include <linux/kvm_host.h>
-#include <asm/kvm_asm.h>
+
+#include <asm/debug-monitors.h>
+#include <asm/esr.h>
#include <asm/kvm_arm.h>
-#include <asm/kvm_mmio.h>
+#include <asm/kvm_hyp.h>
+#include <asm/kvm_nested.h>
#include <asm/ptrace.h>
-
-unsigned long *vcpu_reg32(const struct kvm_vcpu *vcpu, u8 reg_num);
-unsigned long *vcpu_spsr32(const struct kvm_vcpu *vcpu);
+#include <asm/cputype.h>
+#include <asm/virt.h>
+
+#define CURRENT_EL_SP_EL0_VECTOR 0x0
+#define CURRENT_EL_SP_ELx_VECTOR 0x200
+#define LOWER_EL_AArch64_VECTOR 0x400
+#define LOWER_EL_AArch32_VECTOR 0x600
+
+enum exception_type {
+ except_type_sync = 0,
+ except_type_irq = 0x80,
+ except_type_fiq = 0x100,
+ except_type_serror = 0x180,
+};
+
+#define kvm_exception_type_names \
+ { except_type_sync, "SYNC" }, \
+ { except_type_irq, "IRQ" }, \
+ { except_type_fiq, "FIQ" }, \
+ { except_type_serror, "SERROR" }
bool kvm_condition_valid32(const struct kvm_vcpu *vcpu);
-void kvm_skip_instr32(struct kvm_vcpu *vcpu, bool is_wide_instr);
+void kvm_skip_instr32(struct kvm_vcpu *vcpu);
void kvm_inject_undefined(struct kvm_vcpu *vcpu);
-void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr);
-void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr);
+int kvm_inject_serror_esr(struct kvm_vcpu *vcpu, u64 esr);
+int kvm_inject_sea(struct kvm_vcpu *vcpu, bool iabt, u64 addr);
+void kvm_inject_size_fault(struct kvm_vcpu *vcpu);
+
+static inline int kvm_inject_sea_dabt(struct kvm_vcpu *vcpu, u64 addr)
+{
+ return kvm_inject_sea(vcpu, false, addr);
+}
-static inline unsigned long *vcpu_pc(const struct kvm_vcpu *vcpu)
+static inline int kvm_inject_sea_iabt(struct kvm_vcpu *vcpu, u64 addr)
{
- return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.pc;
+ return kvm_inject_sea(vcpu, true, addr);
}
-static inline unsigned long *vcpu_elr_el1(const struct kvm_vcpu *vcpu)
+static inline int kvm_inject_serror(struct kvm_vcpu *vcpu)
{
- return (unsigned long *)&vcpu_gp_regs(vcpu)->elr_el1;
+ /*
+ * ESR_ELx.ISV (later renamed to IDS) indicates whether or not
+ * ESR_ELx.ISS contains IMPLEMENTATION DEFINED syndrome information.
+ *
+ * Set the bit when injecting an SError w/o an ESR to indicate ISS
+ * does not follow the architected format.
+ */
+ return kvm_inject_serror_esr(vcpu, ESR_ELx_ISV);
}
-static inline unsigned long *vcpu_cpsr(const struct kvm_vcpu *vcpu)
+void kvm_vcpu_wfi(struct kvm_vcpu *vcpu);
+
+void kvm_emulate_nested_eret(struct kvm_vcpu *vcpu);
+int kvm_inject_nested_sync(struct kvm_vcpu *vcpu, u64 esr_el2);
+int kvm_inject_nested_irq(struct kvm_vcpu *vcpu);
+int kvm_inject_nested_sea(struct kvm_vcpu *vcpu, bool iabt, u64 addr);
+int kvm_inject_nested_serror(struct kvm_vcpu *vcpu, u64 esr);
+
+static inline void kvm_inject_nested_sve_trap(struct kvm_vcpu *vcpu)
{
- return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.pstate;
+ u64 esr = FIELD_PREP(ESR_ELx_EC_MASK, ESR_ELx_EC_SVE) |
+ ESR_ELx_IL;
+
+ kvm_inject_nested_sync(vcpu, esr);
}
-static inline bool vcpu_mode_is_32bit(const struct kvm_vcpu *vcpu)
+#if defined(__KVM_VHE_HYPERVISOR__) || defined(__KVM_NVHE_HYPERVISOR__)
+static __always_inline bool vcpu_el1_is_32bit(struct kvm_vcpu *vcpu)
+{
+ return !(vcpu->arch.hcr_el2 & HCR_RW);
+}
+#else
+static __always_inline bool vcpu_el1_is_32bit(struct kvm_vcpu *vcpu)
+{
+ return vcpu_has_feature(vcpu, KVM_ARM_VCPU_EL1_32BIT);
+}
+#endif
+
+static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu)
+{
+ if (!vcpu_has_run_once(vcpu))
+ vcpu->arch.hcr_el2 = HCR_GUEST_FLAGS;
+
+ /*
+ * For non-FWB CPUs, we trap VM ops (HCR_EL2.TVM) until M+C
+ * get set in SCTLR_EL1 such that we can detect when the guest
+ * MMU gets turned on and do the necessary cache maintenance
+ * then.
+ */
+ if (!cpus_have_final_cap(ARM64_HAS_STAGE2_FWB))
+ vcpu->arch.hcr_el2 |= HCR_TVM;
+}
+
+static inline unsigned long *vcpu_hcr(struct kvm_vcpu *vcpu)
+{
+ return (unsigned long *)&vcpu->arch.hcr_el2;
+}
+
+static inline void vcpu_clear_wfx_traps(struct kvm_vcpu *vcpu)
+{
+ vcpu->arch.hcr_el2 &= ~HCR_TWE;
+ if (atomic_read(&vcpu->arch.vgic_cpu.vgic_v3.its_vpe.vlpi_count) ||
+ vcpu->kvm->arch.vgic.nassgireq)
+ vcpu->arch.hcr_el2 &= ~HCR_TWI;
+ else
+ vcpu->arch.hcr_el2 |= HCR_TWI;
+}
+
+static inline void vcpu_set_wfx_traps(struct kvm_vcpu *vcpu)
+{
+ vcpu->arch.hcr_el2 |= HCR_TWE;
+ vcpu->arch.hcr_el2 |= HCR_TWI;
+}
+
+static inline unsigned long vcpu_get_vsesr(struct kvm_vcpu *vcpu)
+{
+ return vcpu->arch.vsesr_el2;
+}
+
+static inline void vcpu_set_vsesr(struct kvm_vcpu *vcpu, u64 vsesr)
+{
+ vcpu->arch.vsesr_el2 = vsesr;
+}
+
+static __always_inline unsigned long *vcpu_pc(const struct kvm_vcpu *vcpu)
+{
+ return (unsigned long *)&vcpu_gp_regs(vcpu)->pc;
+}
+
+static __always_inline unsigned long *vcpu_cpsr(const struct kvm_vcpu *vcpu)
+{
+ return (unsigned long *)&vcpu_gp_regs(vcpu)->pstate;
+}
+
+static __always_inline bool vcpu_mode_is_32bit(const struct kvm_vcpu *vcpu)
{
return !!(*vcpu_cpsr(vcpu) & PSR_MODE32_BIT);
}
-static inline bool kvm_condition_valid(const struct kvm_vcpu *vcpu)
+static __always_inline bool kvm_condition_valid(const struct kvm_vcpu *vcpu)
{
if (vcpu_mode_is_32bit(vcpu))
return kvm_condition_valid32(vcpu);
@@ -66,115 +168,532 @@ static inline bool kvm_condition_valid(const struct kvm_vcpu *vcpu)
return true;
}
-static inline void kvm_skip_instr(struct kvm_vcpu *vcpu, bool is_wide_instr)
+static inline void vcpu_set_thumb(struct kvm_vcpu *vcpu)
{
- if (vcpu_mode_is_32bit(vcpu))
- kvm_skip_instr32(vcpu, is_wide_instr);
- else
- *vcpu_pc(vcpu) += 4;
+ *vcpu_cpsr(vcpu) |= PSR_AA32_T_BIT;
}
-static inline void vcpu_set_thumb(struct kvm_vcpu *vcpu)
+/*
+ * vcpu_get_reg and vcpu_set_reg should always be passed a register number
+ * coming from a read of ESR_EL2. Otherwise, it may give the wrong result on
+ * AArch32 with banked registers.
+ */
+static __always_inline unsigned long vcpu_get_reg(const struct kvm_vcpu *vcpu,
+ u8 reg_num)
{
- *vcpu_cpsr(vcpu) |= COMPAT_PSR_T_BIT;
+ return (reg_num == 31) ? 0 : vcpu_gp_regs(vcpu)->regs[reg_num];
}
-static inline unsigned long *vcpu_reg(const struct kvm_vcpu *vcpu, u8 reg_num)
+static __always_inline void vcpu_set_reg(struct kvm_vcpu *vcpu, u8 reg_num,
+ unsigned long val)
{
- if (vcpu_mode_is_32bit(vcpu))
- return vcpu_reg32(vcpu, reg_num);
+ if (reg_num != 31)
+ vcpu_gp_regs(vcpu)->regs[reg_num] = val;
+}
- return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.regs[reg_num];
+static inline bool vcpu_is_el2_ctxt(const struct kvm_cpu_context *ctxt)
+{
+ switch (ctxt->regs.pstate & (PSR_MODE32_BIT | PSR_MODE_MASK)) {
+ case PSR_MODE_EL2h:
+ case PSR_MODE_EL2t:
+ return true;
+ default:
+ return false;
+ }
}
-/* Get vcpu SPSR for current mode */
-static inline unsigned long *vcpu_spsr(const struct kvm_vcpu *vcpu)
+static inline bool vcpu_is_el2(const struct kvm_vcpu *vcpu)
{
- if (vcpu_mode_is_32bit(vcpu))
- return vcpu_spsr32(vcpu);
+ return vcpu_is_el2_ctxt(&vcpu->arch.ctxt);
+}
+
+static inline bool vcpu_el2_e2h_is_set(const struct kvm_vcpu *vcpu)
+{
+ return (!cpus_have_final_cap(ARM64_HAS_HCR_NV1) ||
+ (__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_E2H));
+}
- return (unsigned long *)&vcpu_gp_regs(vcpu)->spsr[KVM_SPSR_EL1];
+static inline bool vcpu_el2_tge_is_set(const struct kvm_vcpu *vcpu)
+{
+ return ctxt_sys_reg(&vcpu->arch.ctxt, HCR_EL2) & HCR_TGE;
+}
+
+static inline bool vcpu_el2_amo_is_set(const struct kvm_vcpu *vcpu)
+{
+ /*
+ * DDI0487L.b Known Issue D22105
+ *
+ * When executing at EL2 and HCR_EL2.{E2H,TGE} = {1, 0} it is
+ * IMPLEMENTATION DEFINED whether the effective value of HCR_EL2.AMO
+ * is the value programmed or 1.
+ *
+ * Make the implementation choice of treating the effective value as 1 as
+ * we cannot subsequently catch changes to TGE or AMO that would
+ * otherwise lead to the SError becoming deliverable.
+ */
+ if (vcpu_is_el2(vcpu) && vcpu_el2_e2h_is_set(vcpu) && !vcpu_el2_tge_is_set(vcpu))
+ return true;
+
+ return ctxt_sys_reg(&vcpu->arch.ctxt, HCR_EL2) & HCR_AMO;
+}
+
+static inline bool is_hyp_ctxt(const struct kvm_vcpu *vcpu)
+{
+ bool e2h, tge;
+ u64 hcr;
+
+ if (!vcpu_has_nv(vcpu))
+ return false;
+
+ hcr = __vcpu_sys_reg(vcpu, HCR_EL2);
+
+ e2h = (hcr & HCR_E2H);
+ tge = (hcr & HCR_TGE);
+
+ /*
+ * We are in a hypervisor context if the vcpu mode is EL2 or
+ * E2H and TGE bits are set. The latter means we are in the user space
+ * of the VHE kernel. ARMv8.1 ARM describes this as 'InHost'
+ *
+ * Note that the HCR_EL2.{E2H,TGE}={0,1} isn't really handled in the
+ * rest of the KVM code, and will result in a misbehaving guest.
+ */
+ return vcpu_is_el2(vcpu) || (e2h && tge) || tge;
+}
+
+static inline bool vcpu_is_host_el0(const struct kvm_vcpu *vcpu)
+{
+ return is_hyp_ctxt(vcpu) && !vcpu_is_el2(vcpu);
+}
+
+static inline bool is_nested_ctxt(struct kvm_vcpu *vcpu)
+{
+ return vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu);
+}
+
+static inline bool vserror_state_is_nested(struct kvm_vcpu *vcpu)
+{
+ if (!is_nested_ctxt(vcpu))
+ return false;
+
+ return vcpu_el2_amo_is_set(vcpu) ||
+ (__vcpu_sys_reg(vcpu, HCRX_EL2) & HCRX_EL2_TMEA);
+}
+
+/*
+ * The layout of SPSR for an AArch32 state is different when observed from an
+ * AArch64 SPSR_ELx or an AArch32 SPSR_*. This function generates the AArch32
+ * view given an AArch64 view.
+ *
+ * In ARM DDI 0487E.a see:
+ *
+ * - The AArch64 view (SPSR_EL2) in section C5.2.18, page C5-426
+ * - The AArch32 view (SPSR_abt) in section G8.2.126, page G8-6256
+ * - The AArch32 view (SPSR_und) in section G8.2.132, page G8-6280
+ *
+ * Which show the following differences:
+ *
+ * | Bit | AA64 | AA32 | Notes |
+ * +-----+------+------+-----------------------------|
+ * | 24 | DIT | J | J is RES0 in ARMv8 |
+ * | 21 | SS | DIT | SS doesn't exist in AArch32 |
+ *
+ * ... and all other bits are (currently) common.
+ */
+static inline unsigned long host_spsr_to_spsr32(unsigned long spsr)
+{
+ const unsigned long overlap = BIT(24) | BIT(21);
+ unsigned long dit = !!(spsr & PSR_AA32_DIT_BIT);
+
+ spsr &= ~overlap;
+
+ spsr |= dit << 21;
+
+ return spsr;
}
static inline bool vcpu_mode_priv(const struct kvm_vcpu *vcpu)
{
- u32 mode = *vcpu_cpsr(vcpu) & PSR_MODE_MASK;
+ u32 mode;
- if (vcpu_mode_is_32bit(vcpu))
- return mode > COMPAT_PSR_MODE_USR;
+ if (vcpu_mode_is_32bit(vcpu)) {
+ mode = *vcpu_cpsr(vcpu) & PSR_AA32_MODE_MASK;
+ return mode > PSR_AA32_MODE_USR;
+ }
+
+ mode = *vcpu_cpsr(vcpu) & PSR_MODE_MASK;
return mode != PSR_MODE_EL0t;
}
-static inline u32 kvm_vcpu_get_hsr(const struct kvm_vcpu *vcpu)
+static __always_inline u64 kvm_vcpu_get_esr(const struct kvm_vcpu *vcpu)
{
return vcpu->arch.fault.esr_el2;
}
-static inline unsigned long kvm_vcpu_get_hfar(const struct kvm_vcpu *vcpu)
+static inline bool guest_hyp_wfx_traps_enabled(const struct kvm_vcpu *vcpu)
+{
+ u64 esr = kvm_vcpu_get_esr(vcpu);
+ bool is_wfe = !!(esr & ESR_ELx_WFx_ISS_WFE);
+ u64 hcr_el2 = __vcpu_sys_reg(vcpu, HCR_EL2);
+
+ if (!vcpu_has_nv(vcpu) || vcpu_is_el2(vcpu))
+ return false;
+
+ return ((is_wfe && (hcr_el2 & HCR_TWE)) ||
+ (!is_wfe && (hcr_el2 & HCR_TWI)));
+}
+
+static __always_inline int kvm_vcpu_get_condition(const struct kvm_vcpu *vcpu)
+{
+ u64 esr = kvm_vcpu_get_esr(vcpu);
+
+ if (esr & ESR_ELx_CV)
+ return (esr & ESR_ELx_COND_MASK) >> ESR_ELx_COND_SHIFT;
+
+ return -1;
+}
+
+static __always_inline unsigned long kvm_vcpu_get_hfar(const struct kvm_vcpu *vcpu)
{
return vcpu->arch.fault.far_el2;
}
-static inline phys_addr_t kvm_vcpu_get_fault_ipa(const struct kvm_vcpu *vcpu)
+static __always_inline phys_addr_t kvm_vcpu_get_fault_ipa(const struct kvm_vcpu *vcpu)
+{
+ u64 hpfar = vcpu->arch.fault.hpfar_el2;
+
+ if (unlikely(!(hpfar & HPFAR_EL2_NS)))
+ return INVALID_GPA;
+
+ return FIELD_GET(HPFAR_EL2_FIPA, hpfar) << 12;
+}
+
+static inline u64 kvm_vcpu_get_disr(const struct kvm_vcpu *vcpu)
{
- return ((phys_addr_t)vcpu->arch.fault.hpfar_el2 & HPFAR_MASK) << 8;
+ return vcpu->arch.fault.disr_el1;
}
-static inline bool kvm_vcpu_dabt_isvalid(const struct kvm_vcpu *vcpu)
+static inline u32 kvm_vcpu_hvc_get_imm(const struct kvm_vcpu *vcpu)
{
- return !!(kvm_vcpu_get_hsr(vcpu) & ESR_EL2_ISV);
+ return kvm_vcpu_get_esr(vcpu) & ESR_ELx_xVC_IMM_MASK;
}
-static inline bool kvm_vcpu_dabt_iswrite(const struct kvm_vcpu *vcpu)
+static __always_inline bool kvm_vcpu_dabt_isvalid(const struct kvm_vcpu *vcpu)
{
- return !!(kvm_vcpu_get_hsr(vcpu) & ESR_EL2_WNR);
+ return !!(kvm_vcpu_get_esr(vcpu) & ESR_ELx_ISV);
+}
+
+static inline unsigned long kvm_vcpu_dabt_iss_nisv_sanitized(const struct kvm_vcpu *vcpu)
+{
+ return kvm_vcpu_get_esr(vcpu) & (ESR_ELx_CM | ESR_ELx_WNR | ESR_ELx_FSC);
}
static inline bool kvm_vcpu_dabt_issext(const struct kvm_vcpu *vcpu)
{
- return !!(kvm_vcpu_get_hsr(vcpu) & ESR_EL2_SSE);
+ return !!(kvm_vcpu_get_esr(vcpu) & ESR_ELx_SSE);
+}
+
+static inline bool kvm_vcpu_dabt_issf(const struct kvm_vcpu *vcpu)
+{
+ return !!(kvm_vcpu_get_esr(vcpu) & ESR_ELx_SF);
}
-static inline int kvm_vcpu_dabt_get_rd(const struct kvm_vcpu *vcpu)
+static __always_inline int kvm_vcpu_dabt_get_rd(const struct kvm_vcpu *vcpu)
{
- return (kvm_vcpu_get_hsr(vcpu) & ESR_EL2_SRT_MASK) >> ESR_EL2_SRT_SHIFT;
+ return (kvm_vcpu_get_esr(vcpu) & ESR_ELx_SRT_MASK) >> ESR_ELx_SRT_SHIFT;
}
-static inline bool kvm_vcpu_dabt_isextabt(const struct kvm_vcpu *vcpu)
+static __always_inline bool kvm_vcpu_abt_iss1tw(const struct kvm_vcpu *vcpu)
{
- return !!(kvm_vcpu_get_hsr(vcpu) & ESR_EL2_EA);
+ return !!(kvm_vcpu_get_esr(vcpu) & ESR_ELx_S1PTW);
}
-static inline bool kvm_vcpu_dabt_iss1tw(const struct kvm_vcpu *vcpu)
+/* Always check for S1PTW *before* using this. */
+static __always_inline bool kvm_vcpu_dabt_iswrite(const struct kvm_vcpu *vcpu)
{
- return !!(kvm_vcpu_get_hsr(vcpu) & ESR_EL2_S1PTW);
+ return kvm_vcpu_get_esr(vcpu) & ESR_ELx_WNR;
}
-static inline int kvm_vcpu_dabt_get_as(const struct kvm_vcpu *vcpu)
+static inline bool kvm_vcpu_dabt_is_cm(const struct kvm_vcpu *vcpu)
{
- return 1 << ((kvm_vcpu_get_hsr(vcpu) & ESR_EL2_SAS) >> ESR_EL2_SAS_SHIFT);
+ return !!(kvm_vcpu_get_esr(vcpu) & ESR_ELx_CM);
+}
+
+static __always_inline unsigned int kvm_vcpu_dabt_get_as(const struct kvm_vcpu *vcpu)
+{
+ return 1 << ((kvm_vcpu_get_esr(vcpu) & ESR_ELx_SAS) >> ESR_ELx_SAS_SHIFT);
}
/* This one is not specific to Data Abort */
-static inline bool kvm_vcpu_trap_il_is32bit(const struct kvm_vcpu *vcpu)
+static __always_inline bool kvm_vcpu_trap_il_is32bit(const struct kvm_vcpu *vcpu)
{
- return !!(kvm_vcpu_get_hsr(vcpu) & ESR_EL2_IL);
+ return !!(kvm_vcpu_get_esr(vcpu) & ESR_ELx_IL);
}
-static inline u8 kvm_vcpu_trap_get_class(const struct kvm_vcpu *vcpu)
+static __always_inline u8 kvm_vcpu_trap_get_class(const struct kvm_vcpu *vcpu)
{
- return kvm_vcpu_get_hsr(vcpu) >> ESR_EL2_EC_SHIFT;
+ return ESR_ELx_EC(kvm_vcpu_get_esr(vcpu));
}
static inline bool kvm_vcpu_trap_is_iabt(const struct kvm_vcpu *vcpu)
{
- return kvm_vcpu_trap_get_class(vcpu) == ESR_EL2_EC_IABT;
+ return kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_IABT_LOW;
+}
+
+static inline bool kvm_vcpu_trap_is_exec_fault(const struct kvm_vcpu *vcpu)
+{
+ return kvm_vcpu_trap_is_iabt(vcpu) && !kvm_vcpu_abt_iss1tw(vcpu);
+}
+
+static __always_inline u8 kvm_vcpu_trap_get_fault(const struct kvm_vcpu *vcpu)
+{
+ return kvm_vcpu_get_esr(vcpu) & ESR_ELx_FSC;
+}
+
+static inline
+bool kvm_vcpu_trap_is_permission_fault(const struct kvm_vcpu *vcpu)
+{
+ return esr_fsc_is_permission_fault(kvm_vcpu_get_esr(vcpu));
+}
+
+static inline
+bool kvm_vcpu_trap_is_translation_fault(const struct kvm_vcpu *vcpu)
+{
+ return esr_fsc_is_translation_fault(kvm_vcpu_get_esr(vcpu));
+}
+
+static inline
+u64 kvm_vcpu_trap_get_perm_fault_granule(const struct kvm_vcpu *vcpu)
+{
+ unsigned long esr = kvm_vcpu_get_esr(vcpu);
+
+ BUG_ON(!esr_fsc_is_permission_fault(esr));
+ return BIT(ARM64_HW_PGTABLE_LEVEL_SHIFT(esr & ESR_ELx_FSC_LEVEL));
+}
+
+static __always_inline bool kvm_vcpu_abt_issea(const struct kvm_vcpu *vcpu)
+{
+ switch (kvm_vcpu_trap_get_fault(vcpu)) {
+ case ESR_ELx_FSC_EXTABT:
+ case ESR_ELx_FSC_SEA_TTW(-1) ... ESR_ELx_FSC_SEA_TTW(3):
+ case ESR_ELx_FSC_SECC:
+ case ESR_ELx_FSC_SECC_TTW(-1) ... ESR_ELx_FSC_SECC_TTW(3):
+ return true;
+ default:
+ return false;
+ }
+}
+
+static __always_inline int kvm_vcpu_sys_get_rt(struct kvm_vcpu *vcpu)
+{
+ u64 esr = kvm_vcpu_get_esr(vcpu);
+ return ESR_ELx_SYS64_ISS_RT(esr);
+}
+
+static inline bool kvm_is_write_fault(struct kvm_vcpu *vcpu)
+{
+ if (kvm_vcpu_abt_iss1tw(vcpu)) {
+ /*
+ * Only a permission fault on a S1PTW should be
+ * considered as a write. Otherwise, page tables baked
+ * in a read-only memslot will result in an exception
+ * being delivered in the guest.
+ *
+ * The drawback is that we end-up faulting twice if the
+ * guest is using any of HW AF/DB: a translation fault
+ * to map the page containing the PT (read only at
+ * first), then a permission fault to allow the flags
+ * to be set.
+ */
+ return kvm_vcpu_trap_is_permission_fault(vcpu);
+ }
+
+ if (kvm_vcpu_trap_is_iabt(vcpu))
+ return false;
+
+ return kvm_vcpu_dabt_iswrite(vcpu);
+}
+
+static inline unsigned long kvm_vcpu_get_mpidr_aff(struct kvm_vcpu *vcpu)
+{
+ return __vcpu_sys_reg(vcpu, MPIDR_EL1) & MPIDR_HWID_BITMASK;
+}
+
+static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu)
+{
+ if (vcpu_mode_is_32bit(vcpu)) {
+ *vcpu_cpsr(vcpu) |= PSR_AA32_E_BIT;
+ } else {
+ enum vcpu_sysreg r;
+ u64 sctlr;
+
+ r = vcpu_has_nv(vcpu) ? SCTLR_EL2 : SCTLR_EL1;
+
+ sctlr = vcpu_read_sys_reg(vcpu, r);
+ sctlr |= SCTLR_ELx_EE;
+ vcpu_write_sys_reg(vcpu, sctlr, r);
+ }
}
-static inline u8 kvm_vcpu_trap_get_fault(const struct kvm_vcpu *vcpu)
+static inline bool kvm_vcpu_is_be(struct kvm_vcpu *vcpu)
{
- return kvm_vcpu_get_hsr(vcpu) & ESR_EL2_FSC_TYPE;
+ enum vcpu_sysreg r;
+ u64 bit;
+
+ if (vcpu_mode_is_32bit(vcpu))
+ return !!(*vcpu_cpsr(vcpu) & PSR_AA32_E_BIT);
+
+ r = is_hyp_ctxt(vcpu) ? SCTLR_EL2 : SCTLR_EL1;
+ bit = vcpu_mode_priv(vcpu) ? SCTLR_ELx_EE : SCTLR_EL1_E0E;
+
+ return vcpu_read_sys_reg(vcpu, r) & bit;
}
+static inline unsigned long vcpu_data_guest_to_host(struct kvm_vcpu *vcpu,
+ unsigned long data,
+ unsigned int len)
+{
+ if (kvm_vcpu_is_be(vcpu)) {
+ switch (len) {
+ case 1:
+ return data & 0xff;
+ case 2:
+ return be16_to_cpu(data & 0xffff);
+ case 4:
+ return be32_to_cpu(data & 0xffffffff);
+ default:
+ return be64_to_cpu(data);
+ }
+ } else {
+ switch (len) {
+ case 1:
+ return data & 0xff;
+ case 2:
+ return le16_to_cpu(data & 0xffff);
+ case 4:
+ return le32_to_cpu(data & 0xffffffff);
+ default:
+ return le64_to_cpu(data);
+ }
+ }
+
+ return data; /* Leave LE untouched */
+}
+
+static inline unsigned long vcpu_data_host_to_guest(struct kvm_vcpu *vcpu,
+ unsigned long data,
+ unsigned int len)
+{
+ if (kvm_vcpu_is_be(vcpu)) {
+ switch (len) {
+ case 1:
+ return data & 0xff;
+ case 2:
+ return cpu_to_be16(data & 0xffff);
+ case 4:
+ return cpu_to_be32(data & 0xffffffff);
+ default:
+ return cpu_to_be64(data);
+ }
+ } else {
+ switch (len) {
+ case 1:
+ return data & 0xff;
+ case 2:
+ return cpu_to_le16(data & 0xffff);
+ case 4:
+ return cpu_to_le32(data & 0xffffffff);
+ default:
+ return cpu_to_le64(data);
+ }
+ }
+
+ return data; /* Leave LE untouched */
+}
+
+static __always_inline void kvm_incr_pc(struct kvm_vcpu *vcpu)
+{
+ WARN_ON(vcpu_get_flag(vcpu, PENDING_EXCEPTION));
+ vcpu_set_flag(vcpu, INCREMENT_PC);
+}
+
+#define kvm_pend_exception(v, e) \
+ do { \
+ WARN_ON(vcpu_get_flag((v), INCREMENT_PC)); \
+ vcpu_set_flag((v), PENDING_EXCEPTION); \
+ vcpu_set_flag((v), e); \
+ } while (0)
+
+/*
+ * Returns a 'sanitised' view of CPTR_EL2, translating from nVHE to the VHE
+ * format if E2H isn't set.
+ */
+static inline u64 vcpu_sanitised_cptr_el2(const struct kvm_vcpu *vcpu)
+{
+ u64 cptr = __vcpu_sys_reg(vcpu, CPTR_EL2);
+
+ if (!vcpu_el2_e2h_is_set(vcpu))
+ cptr = translate_cptr_el2_to_cpacr_el1(cptr);
+
+ return cptr;
+}
+
+static inline bool ____cptr_xen_trap_enabled(const struct kvm_vcpu *vcpu,
+ unsigned int xen)
+{
+ switch (xen) {
+ case 0b00:
+ case 0b10:
+ return true;
+ case 0b01:
+ return vcpu_el2_tge_is_set(vcpu) && !vcpu_is_el2(vcpu);
+ case 0b11:
+ default:
+ return false;
+ }
+}
+
+#define __guest_hyp_cptr_xen_trap_enabled(vcpu, xen) \
+ (!vcpu_has_nv(vcpu) ? false : \
+ ____cptr_xen_trap_enabled(vcpu, \
+ SYS_FIELD_GET(CPACR_EL1, xen, \
+ vcpu_sanitised_cptr_el2(vcpu))))
+
+static inline bool guest_hyp_fpsimd_traps_enabled(const struct kvm_vcpu *vcpu)
+{
+ return __guest_hyp_cptr_xen_trap_enabled(vcpu, FPEN);
+}
+
+static inline bool guest_hyp_sve_traps_enabled(const struct kvm_vcpu *vcpu)
+{
+ return __guest_hyp_cptr_xen_trap_enabled(vcpu, ZEN);
+}
+
+static inline void vcpu_set_hcrx(struct kvm_vcpu *vcpu)
+{
+ struct kvm *kvm = vcpu->kvm;
+
+ if (cpus_have_final_cap(ARM64_HAS_HCX)) {
+ /*
+ * In general, all HCRX_EL2 bits are gated by a feature.
+ * The only reason we can set SMPME without checking any
+ * feature is that its effects are not directly observable
+ * from the guest.
+ */
+ vcpu->arch.hcrx_el2 = HCRX_EL2_SMPME;
+
+ if (kvm_has_feat(kvm, ID_AA64ISAR2_EL1, MOPS, IMP))
+ vcpu->arch.hcrx_el2 |= (HCRX_EL2_MSCEn | HCRX_EL2_MCE2);
+
+ if (kvm_has_tcr2(kvm))
+ vcpu->arch.hcrx_el2 |= HCRX_EL2_TCR2En;
+
+ if (kvm_has_fpmr(kvm))
+ vcpu->arch.hcrx_el2 |= HCRX_EL2_EnFPM;
+
+ if (kvm_has_sctlr2(kvm))
+ vcpu->arch.hcrx_el2 |= HCRX_EL2_SCTLR2En;
+ }
+}
#endif /* __ARM64_KVM_EMULATE_H__ */
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 644d73956864..ac7f970c7883 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (C) 2012,2013 - ARM Ltd
* Author: Marc Zyngier <marc.zyngier@arm.com>
@@ -5,198 +6,1653 @@
* Derived from arch/arm/include/asm/kvm_host.h:
* Copyright (C) 2012 - Virtual Open Systems and Columbia University
* Author: Christoffer Dall <c.dall@virtualopensystems.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __ARM64_KVM_HOST_H__
#define __ARM64_KVM_HOST_H__
+#include <linux/arm-smccc.h>
+#include <linux/bitmap.h>
+#include <linux/types.h>
+#include <linux/jump_label.h>
+#include <linux/kvm_types.h>
+#include <linux/maple_tree.h>
+#include <linux/percpu.h>
+#include <linux/psci.h>
+#include <asm/arch_gicv3.h>
+#include <asm/barrier.h>
+#include <asm/cpufeature.h>
+#include <asm/cputype.h>
+#include <asm/daifflags.h>
+#include <asm/fpsimd.h>
#include <asm/kvm.h>
#include <asm/kvm_asm.h>
-#include <asm/kvm_mmio.h>
+#include <asm/vncr_mapping.h>
-#define KVM_MAX_VCPUS 4
-#define KVM_USER_MEM_SLOTS 32
-#define KVM_PRIVATE_MEM_SLOTS 4
-#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
+#define __KVM_HAVE_ARCH_INTC_INITIALIZED
+
+#define KVM_HALT_POLL_NS_DEFAULT 500000
#include <kvm/arm_vgic.h>
#include <kvm/arm_arch_timer.h>
+#include <kvm/arm_pmu.h>
+
+#define KVM_MAX_VCPUS VGIC_V3_MAX_CPUS
+
+#define KVM_VCPU_MAX_FEATURES 9
+#define KVM_VCPU_VALID_FEATURES (BIT(KVM_VCPU_MAX_FEATURES) - 1)
+
+#define KVM_REQ_SLEEP \
+ KVM_ARCH_REQ_FLAGS(0, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
+#define KVM_REQ_IRQ_PENDING KVM_ARCH_REQ(1)
+#define KVM_REQ_VCPU_RESET KVM_ARCH_REQ(2)
+#define KVM_REQ_RECORD_STEAL KVM_ARCH_REQ(3)
+#define KVM_REQ_RELOAD_GICv4 KVM_ARCH_REQ(4)
+#define KVM_REQ_RELOAD_PMU KVM_ARCH_REQ(5)
+#define KVM_REQ_SUSPEND KVM_ARCH_REQ(6)
+#define KVM_REQ_RESYNC_PMU_EL0 KVM_ARCH_REQ(7)
+#define KVM_REQ_NESTED_S2_UNMAP KVM_ARCH_REQ(8)
+#define KVM_REQ_GUEST_HYP_IRQ_PENDING KVM_ARCH_REQ(9)
+#define KVM_REQ_MAP_L1_VNCR_EL2 KVM_ARCH_REQ(10)
+#define KVM_REQ_VGIC_PROCESS_UPDATE KVM_ARCH_REQ(11)
+
+#define KVM_DIRTY_LOG_MANUAL_CAPS (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | \
+ KVM_DIRTY_LOG_INITIALLY_SET)
+
+#define KVM_HAVE_MMU_RWLOCK
+
+/*
+ * Mode of operation configurable with kvm-arm.mode early param.
+ * See Documentation/admin-guide/kernel-parameters.txt for more information.
+ */
+enum kvm_mode {
+ KVM_MODE_DEFAULT,
+ KVM_MODE_PROTECTED,
+ KVM_MODE_NV,
+ KVM_MODE_NONE,
+};
+#ifdef CONFIG_KVM
+enum kvm_mode kvm_get_mode(void);
+#else
+static inline enum kvm_mode kvm_get_mode(void) { return KVM_MODE_NONE; };
+#endif
+
+extern unsigned int __ro_after_init kvm_sve_max_vl;
+extern unsigned int __ro_after_init kvm_host_sve_max_vl;
+int __init kvm_arm_init_sve(void);
+
+u32 __attribute_const__ kvm_target_cpu(void);
+void kvm_reset_vcpu(struct kvm_vcpu *vcpu);
+void kvm_arm_vcpu_destroy(struct kvm_vcpu *vcpu);
+
+struct kvm_hyp_memcache {
+ phys_addr_t head;
+ unsigned long nr_pages;
+ struct pkvm_mapping *mapping; /* only used from EL1 */
+
+#define HYP_MEMCACHE_ACCOUNT_STAGE2 BIT(1)
+ unsigned long flags;
+};
+
+static inline void push_hyp_memcache(struct kvm_hyp_memcache *mc,
+ phys_addr_t *p,
+ phys_addr_t (*to_pa)(void *virt))
+{
+ *p = mc->head;
+ mc->head = to_pa(p);
+ mc->nr_pages++;
+}
+
+static inline void *pop_hyp_memcache(struct kvm_hyp_memcache *mc,
+ void *(*to_va)(phys_addr_t phys))
+{
+ phys_addr_t *p = to_va(mc->head & PAGE_MASK);
+
+ if (!mc->nr_pages)
+ return NULL;
+
+ mc->head = *p;
+ mc->nr_pages--;
+
+ return p;
+}
+
+static inline int __topup_hyp_memcache(struct kvm_hyp_memcache *mc,
+ unsigned long min_pages,
+ void *(*alloc_fn)(void *arg),
+ phys_addr_t (*to_pa)(void *virt),
+ void *arg)
+{
+ while (mc->nr_pages < min_pages) {
+ phys_addr_t *p = alloc_fn(arg);
+
+ if (!p)
+ return -ENOMEM;
+ push_hyp_memcache(mc, p, to_pa);
+ }
+
+ return 0;
+}
+
+static inline void __free_hyp_memcache(struct kvm_hyp_memcache *mc,
+ void (*free_fn)(void *virt, void *arg),
+ void *(*to_va)(phys_addr_t phys),
+ void *arg)
+{
+ while (mc->nr_pages)
+ free_fn(pop_hyp_memcache(mc, to_va), arg);
+}
+
+void free_hyp_memcache(struct kvm_hyp_memcache *mc);
+int topup_hyp_memcache(struct kvm_hyp_memcache *mc, unsigned long min_pages);
+
+struct kvm_vmid {
+ atomic64_t id;
+};
+
+struct kvm_s2_mmu {
+ struct kvm_vmid vmid;
+
+ /*
+ * stage2 entry level table
+ *
+ * Two kvm_s2_mmu structures in the same VM can point to the same
+ * pgd here. This happens when running a guest using a
+ * translation regime that isn't affected by its own stage-2
+ * translation, such as a non-VHE hypervisor running at vEL2, or
+ * for vEL1/EL0 with vHCR_EL2.VM == 0. In that case, we use the
+ * canonical stage-2 page tables.
+ */
+ phys_addr_t pgd_phys;
+ struct kvm_pgtable *pgt;
+
+ /*
+ * VTCR value used on the host. For a non-NV guest (or a NV
+ * guest that runs in a context where its own S2 doesn't
+ * apply), its T0SZ value reflects that of the IPA size.
+ *
+ * For a shadow S2 MMU, T0SZ reflects the PARange exposed to
+ * the guest.
+ */
+ u64 vtcr;
+
+ /* The last vcpu id that ran on each physical CPU */
+ int __percpu *last_vcpu_ran;
+
+#define KVM_ARM_EAGER_SPLIT_CHUNK_SIZE_DEFAULT 0
+ /*
+ * Memory cache used to split
+ * KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE worth of huge pages. It
+ * is used to allocate stage2 page tables while splitting huge
+ * pages. The choice of KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE
+ * influences both the capacity of the split page cache, and
+ * how often KVM reschedules. Be wary of raising CHUNK_SIZE
+ * too high.
+ *
+ * Protected by kvm->slots_lock.
+ */
+ struct kvm_mmu_memory_cache split_page_cache;
+ uint64_t split_page_chunk_size;
+
+ struct kvm_arch *arch;
-#define KVM_VCPU_MAX_FEATURES 2
+ /*
+ * For a shadow stage-2 MMU, the virtual vttbr used by the
+ * host to parse the guest S2.
+ * This either contains:
+ * - the virtual VTTBR programmed by the guest hypervisor with
+ * CnP cleared
+ * - The value 1 (VMID=0, BADDR=0, CnP=1) if invalid
+ *
+ * We also cache the full VTCR which gets used for TLB invalidation,
+ * taking the ARM ARM's "Any of the bits in VTCR_EL2 are permitted
+ * to be cached in a TLB" to the letter.
+ */
+ u64 tlb_vttbr;
+ u64 tlb_vtcr;
+
+ /*
+ * true when this represents a nested context where virtual
+ * HCR_EL2.VM == 1
+ */
+ bool nested_stage2_enabled;
-/* We don't currently support large pages. */
-#define KVM_HPAGE_GFN_SHIFT(x) 0
-#define KVM_NR_PAGE_SIZES 1
-#define KVM_PAGES_PER_HPAGE(x) (1UL<<31)
+ /*
+ * true when this MMU needs to be unmapped before being used for a new
+ * purpose.
+ */
+ bool pending_unmap;
+
+ /*
+ * 0: Nobody is currently using this, check vttbr for validity
+ * >0: Somebody is actively using this.
+ */
+ atomic_t refcnt;
+};
+
+struct kvm_arch_memory_slot {
+};
+
+/**
+ * struct kvm_smccc_features: Descriptor of the hypercall services exposed to the guests
+ *
+ * @std_bmap: Bitmap of standard secure service calls
+ * @std_hyp_bmap: Bitmap of standard hypervisor service calls
+ * @vendor_hyp_bmap: Bitmap of vendor specific hypervisor service calls
+ */
+struct kvm_smccc_features {
+ unsigned long std_bmap;
+ unsigned long std_hyp_bmap;
+ unsigned long vendor_hyp_bmap; /* Function numbers 0-63 */
+ unsigned long vendor_hyp_bmap_2; /* Function numbers 64-127 */
+};
-struct kvm_vcpu;
-int kvm_target_cpu(void);
-int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
-int kvm_arch_dev_ioctl_check_extension(long ext);
+typedef unsigned int pkvm_handle_t;
+
+struct kvm_protected_vm {
+ pkvm_handle_t handle;
+ struct kvm_hyp_memcache teardown_mc;
+ struct kvm_hyp_memcache stage2_teardown_mc;
+ bool is_protected;
+ bool is_created;
+};
+
+struct kvm_mpidr_data {
+ u64 mpidr_mask;
+ DECLARE_FLEX_ARRAY(u16, cmpidr_to_idx);
+};
+
+static inline u16 kvm_mpidr_index(struct kvm_mpidr_data *data, u64 mpidr)
+{
+ unsigned long index = 0, mask = data->mpidr_mask;
+ unsigned long aff = mpidr & MPIDR_HWID_BITMASK;
+
+ bitmap_gather(&index, &aff, &mask, fls(mask));
+
+ return index;
+}
+
+struct kvm_sysreg_masks;
+
+enum fgt_group_id {
+ __NO_FGT_GROUP__,
+ HFGRTR_GROUP,
+ HFGWTR_GROUP = HFGRTR_GROUP,
+ HDFGRTR_GROUP,
+ HDFGWTR_GROUP = HDFGRTR_GROUP,
+ HFGITR_GROUP,
+ HAFGRTR_GROUP,
+ HFGRTR2_GROUP,
+ HFGWTR2_GROUP = HFGRTR2_GROUP,
+ HDFGRTR2_GROUP,
+ HDFGWTR2_GROUP = HDFGRTR2_GROUP,
+ HFGITR2_GROUP,
+
+ /* Must be last */
+ __NR_FGT_GROUP_IDS__
+};
struct kvm_arch {
- /* The VMID generation used for the virt. memory system */
- u64 vmid_gen;
- u32 vmid;
+ struct kvm_s2_mmu mmu;
- /* 1-level 2nd stage table and lock */
- spinlock_t pgd_lock;
- pgd_t *pgd;
+ /*
+ * Fine-Grained UNDEF, mimicking the FGT layout defined by the
+ * architecture. We track them globally, as we present the
+ * same feature-set to all vcpus.
+ *
+ * Index 0 is currently spare.
+ */
+ u64 fgu[__NR_FGT_GROUP_IDS__];
- /* VTTBR value associated with above pgd and vmid */
- u64 vttbr;
+ /*
+ * Stage 2 paging state for VMs with nested S2 using a virtual
+ * VMID.
+ */
+ struct kvm_s2_mmu *nested_mmus;
+ size_t nested_mmus_size;
+ int nested_mmus_next;
/* Interrupt controller */
struct vgic_dist vgic;
- /* Timer */
- struct arch_timer_kvm timer;
-};
+ /* Timers */
+ struct arch_timer_vm_data timer_data;
-#define KVM_NR_MEM_OBJS 40
+ /* Mandated version of PSCI */
+ u32 psci_version;
-/*
- * We don't want allocation failures within the mmu code, so we preallocate
- * enough memory for a single page fault in a cache.
- */
-struct kvm_mmu_memory_cache {
- int nobjs;
- void *objects[KVM_NR_MEM_OBJS];
+ /* Protects VM-scoped configuration data */
+ struct mutex config_lock;
+
+ /*
+ * If we encounter a data abort without valid instruction syndrome
+ * information, report this to user space. User space can (and
+ * should) opt in to this feature if KVM_CAP_ARM_NISV_TO_USER is
+ * supported.
+ */
+#define KVM_ARCH_FLAG_RETURN_NISV_IO_ABORT_TO_USER 0
+ /* Memory Tagging Extension enabled for the guest */
+#define KVM_ARCH_FLAG_MTE_ENABLED 1
+ /* At least one vCPU has ran in the VM */
+#define KVM_ARCH_FLAG_HAS_RAN_ONCE 2
+ /* The vCPU feature set for the VM is configured */
+#define KVM_ARCH_FLAG_VCPU_FEATURES_CONFIGURED 3
+ /* PSCI SYSTEM_SUSPEND enabled for the guest */
+#define KVM_ARCH_FLAG_SYSTEM_SUSPEND_ENABLED 4
+ /* VM counter offset */
+#define KVM_ARCH_FLAG_VM_COUNTER_OFFSET 5
+ /* Timer PPIs made immutable */
+#define KVM_ARCH_FLAG_TIMER_PPIS_IMMUTABLE 6
+ /* Initial ID reg values loaded */
+#define KVM_ARCH_FLAG_ID_REGS_INITIALIZED 7
+ /* Fine-Grained UNDEF initialised */
+#define KVM_ARCH_FLAG_FGU_INITIALIZED 8
+ /* SVE exposed to guest */
+#define KVM_ARCH_FLAG_GUEST_HAS_SVE 9
+ /* MIDR_EL1, REVIDR_EL1, and AIDR_EL1 are writable from userspace */
+#define KVM_ARCH_FLAG_WRITABLE_IMP_ID_REGS 10
+ /* Unhandled SEAs are taken to userspace */
+#define KVM_ARCH_FLAG_EXIT_SEA 11
+ unsigned long flags;
+
+ /* VM-wide vCPU feature set */
+ DECLARE_BITMAP(vcpu_features, KVM_VCPU_MAX_FEATURES);
+
+ /* MPIDR to vcpu index mapping, optional */
+ struct kvm_mpidr_data *mpidr_data;
+
+ /*
+ * VM-wide PMU filter, implemented as a bitmap and big enough for
+ * up to 2^10 events (ARMv8.0) or 2^16 events (ARMv8.1+).
+ */
+ unsigned long *pmu_filter;
+ struct arm_pmu *arm_pmu;
+
+ cpumask_var_t supported_cpus;
+
+ /* Maximum number of counters for the guest */
+ u8 nr_pmu_counters;
+
+ /* Iterator for idreg debugfs */
+ u8 idreg_debugfs_iter;
+
+ /* Hypercall features firmware registers' descriptor */
+ struct kvm_smccc_features smccc_feat;
+ struct maple_tree smccc_filter;
+
+ /*
+ * Emulated CPU ID registers per VM
+ * (Op0, Op1, CRn, CRm, Op2) of the ID registers to be saved in it
+ * is (3, 0, 0, crm, op2), where 1<=crm<8, 0<=op2<8.
+ *
+ * These emulated idregs are VM-wide, but accessed from the context of a vCPU.
+ * Atomic access to multiple idregs are guarded by kvm_arch.config_lock.
+ */
+#define IDREG_IDX(id) (((sys_reg_CRm(id) - 1) << 3) | sys_reg_Op2(id))
+#define KVM_ARM_ID_REG_NUM (IDREG_IDX(sys_reg(3, 0, 0, 7, 7)) + 1)
+ u64 id_regs[KVM_ARM_ID_REG_NUM];
+
+ u64 midr_el1;
+ u64 revidr_el1;
+ u64 aidr_el1;
+ u64 ctr_el0;
+
+ /* Masks for VNCR-backed and general EL2 sysregs */
+ struct kvm_sysreg_masks *sysreg_masks;
+
+ /* Count the number of VNCR_EL2 currently mapped */
+ atomic_t vncr_map_count;
+
+ /*
+ * For an untrusted host VM, 'pkvm.handle' is used to lookup
+ * the associated pKVM instance in the hypervisor.
+ */
+ struct kvm_protected_vm pkvm;
};
struct kvm_vcpu_fault_info {
- u32 esr_el2; /* Hyp Syndrom Register */
+ u64 esr_el2; /* Hyp Syndrom Register */
u64 far_el2; /* Hyp Fault Address Register */
u64 hpfar_el2; /* Hyp IPA Fault Address Register */
+ u64 disr_el1; /* Deferred [SError] Status Register */
+};
+
+/*
+ * VNCR() just places the VNCR_capable registers in the enum after
+ * __VNCR_START__, and the value (after correction) to be an 8-byte offset
+ * from the VNCR base. As we don't require the enum to be otherwise ordered,
+ * we need the terrible hack below to ensure that we correctly size the
+ * sys_regs array, no matter what.
+ *
+ * The __MAX__ macro has been lifted from Sean Eron Anderson's wonderful
+ * treasure trove of bit hacks:
+ * https://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax
+ */
+#define __MAX__(x,y) ((x) ^ (((x) ^ (y)) & -((x) < (y))))
+#define VNCR(r) \
+ __before_##r, \
+ r = __VNCR_START__ + ((VNCR_ ## r) / 8), \
+ __after_##r = __MAX__(__before_##r - 1, r)
+
+#define MARKER(m) \
+ m, __after_##m = m - 1
+
+enum vcpu_sysreg {
+ __INVALID_SYSREG__, /* 0 is reserved as an invalid value */
+ MPIDR_EL1, /* MultiProcessor Affinity Register */
+ CLIDR_EL1, /* Cache Level ID Register */
+ CSSELR_EL1, /* Cache Size Selection Register */
+ TPIDR_EL0, /* Thread ID, User R/W */
+ TPIDRRO_EL0, /* Thread ID, User R/O */
+ TPIDR_EL1, /* Thread ID, Privileged */
+ CNTKCTL_EL1, /* Timer Control Register (EL1) */
+ PAR_EL1, /* Physical Address Register */
+ MDCCINT_EL1, /* Monitor Debug Comms Channel Interrupt Enable Reg */
+ OSLSR_EL1, /* OS Lock Status Register */
+ DISR_EL1, /* Deferred Interrupt Status Register */
+
+ /* Performance Monitors Registers */
+ PMCR_EL0, /* Control Register */
+ PMSELR_EL0, /* Event Counter Selection Register */
+ PMEVCNTR0_EL0, /* Event Counter Register (0-30) */
+ PMEVCNTR30_EL0 = PMEVCNTR0_EL0 + 30,
+ PMCCNTR_EL0, /* Cycle Counter Register */
+ PMEVTYPER0_EL0, /* Event Type Register (0-30) */
+ PMEVTYPER30_EL0 = PMEVTYPER0_EL0 + 30,
+ PMCCFILTR_EL0, /* Cycle Count Filter Register */
+ PMCNTENSET_EL0, /* Count Enable Set Register */
+ PMINTENSET_EL1, /* Interrupt Enable Set Register */
+ PMOVSSET_EL0, /* Overflow Flag Status Set Register */
+ PMUSERENR_EL0, /* User Enable Register */
+
+ /* Pointer Authentication Registers in a strict increasing order. */
+ APIAKEYLO_EL1,
+ APIAKEYHI_EL1,
+ APIBKEYLO_EL1,
+ APIBKEYHI_EL1,
+ APDAKEYLO_EL1,
+ APDAKEYHI_EL1,
+ APDBKEYLO_EL1,
+ APDBKEYHI_EL1,
+ APGAKEYLO_EL1,
+ APGAKEYHI_EL1,
+
+ /* Memory Tagging Extension registers */
+ RGSR_EL1, /* Random Allocation Tag Seed Register */
+ GCR_EL1, /* Tag Control Register */
+ TFSRE0_EL1, /* Tag Fault Status Register (EL0) */
+
+ POR_EL0, /* Permission Overlay Register 0 (EL0) */
+
+ /* FP/SIMD/SVE */
+ SVCR,
+ FPMR,
+
+ /* 32bit specific registers. */
+ DACR32_EL2, /* Domain Access Control Register */
+ IFSR32_EL2, /* Instruction Fault Status Register */
+ FPEXC32_EL2, /* Floating-Point Exception Control Register */
+ DBGVCR32_EL2, /* Debug Vector Catch Register */
+
+ /* EL2 registers */
+ SCTLR_EL2, /* System Control Register (EL2) */
+ ACTLR_EL2, /* Auxiliary Control Register (EL2) */
+ CPTR_EL2, /* Architectural Feature Trap Register (EL2) */
+ HACR_EL2, /* Hypervisor Auxiliary Control Register */
+ ZCR_EL2, /* SVE Control Register (EL2) */
+ TTBR0_EL2, /* Translation Table Base Register 0 (EL2) */
+ TTBR1_EL2, /* Translation Table Base Register 1 (EL2) */
+ TCR_EL2, /* Translation Control Register (EL2) */
+ PIRE0_EL2, /* Permission Indirection Register 0 (EL2) */
+ PIR_EL2, /* Permission Indirection Register 1 (EL2) */
+ POR_EL2, /* Permission Overlay Register 2 (EL2) */
+ SPSR_EL2, /* EL2 saved program status register */
+ ELR_EL2, /* EL2 exception link register */
+ AFSR0_EL2, /* Auxiliary Fault Status Register 0 (EL2) */
+ AFSR1_EL2, /* Auxiliary Fault Status Register 1 (EL2) */
+ ESR_EL2, /* Exception Syndrome Register (EL2) */
+ FAR_EL2, /* Fault Address Register (EL2) */
+ HPFAR_EL2, /* Hypervisor IPA Fault Address Register */
+ MAIR_EL2, /* Memory Attribute Indirection Register (EL2) */
+ AMAIR_EL2, /* Auxiliary Memory Attribute Indirection Register (EL2) */
+ VBAR_EL2, /* Vector Base Address Register (EL2) */
+ RVBAR_EL2, /* Reset Vector Base Address Register */
+ CONTEXTIDR_EL2, /* Context ID Register (EL2) */
+ SP_EL2, /* EL2 Stack Pointer */
+ CNTHP_CTL_EL2,
+ CNTHP_CVAL_EL2,
+ CNTHV_CTL_EL2,
+ CNTHV_CVAL_EL2,
+
+ /* Anything from this can be RES0/RES1 sanitised */
+ MARKER(__SANITISED_REG_START__),
+ TCR2_EL2, /* Extended Translation Control Register (EL2) */
+ SCTLR2_EL2, /* System Control Register 2 (EL2) */
+ MDCR_EL2, /* Monitor Debug Configuration Register (EL2) */
+ CNTHCTL_EL2, /* Counter-timer Hypervisor Control register */
+
+ /* Any VNCR-capable reg goes after this point */
+ MARKER(__VNCR_START__),
+
+ VNCR(SCTLR_EL1),/* System Control Register */
+ VNCR(ACTLR_EL1),/* Auxiliary Control Register */
+ VNCR(CPACR_EL1),/* Coprocessor Access Control */
+ VNCR(ZCR_EL1), /* SVE Control */
+ VNCR(TTBR0_EL1),/* Translation Table Base Register 0 */
+ VNCR(TTBR1_EL1),/* Translation Table Base Register 1 */
+ VNCR(TCR_EL1), /* Translation Control Register */
+ VNCR(TCR2_EL1), /* Extended Translation Control Register */
+ VNCR(SCTLR2_EL1), /* System Control Register 2 */
+ VNCR(ESR_EL1), /* Exception Syndrome Register */
+ VNCR(AFSR0_EL1),/* Auxiliary Fault Status Register 0 */
+ VNCR(AFSR1_EL1),/* Auxiliary Fault Status Register 1 */
+ VNCR(FAR_EL1), /* Fault Address Register */
+ VNCR(MAIR_EL1), /* Memory Attribute Indirection Register */
+ VNCR(VBAR_EL1), /* Vector Base Address Register */
+ VNCR(CONTEXTIDR_EL1), /* Context ID Register */
+ VNCR(AMAIR_EL1),/* Aux Memory Attribute Indirection Register */
+ VNCR(MDSCR_EL1),/* Monitor Debug System Control Register */
+ VNCR(ELR_EL1),
+ VNCR(SP_EL1),
+ VNCR(SPSR_EL1),
+ VNCR(TFSR_EL1), /* Tag Fault Status Register (EL1) */
+ VNCR(VPIDR_EL2),/* Virtualization Processor ID Register */
+ VNCR(VMPIDR_EL2),/* Virtualization Multiprocessor ID Register */
+ VNCR(HCR_EL2), /* Hypervisor Configuration Register */
+ VNCR(HSTR_EL2), /* Hypervisor System Trap Register */
+ VNCR(VTTBR_EL2),/* Virtualization Translation Table Base Register */
+ VNCR(VTCR_EL2), /* Virtualization Translation Control Register */
+ VNCR(TPIDR_EL2),/* EL2 Software Thread ID Register */
+ VNCR(HCRX_EL2), /* Extended Hypervisor Configuration Register */
+
+ /* Permission Indirection Extension registers */
+ VNCR(PIR_EL1), /* Permission Indirection Register 1 (EL1) */
+ VNCR(PIRE0_EL1), /* Permission Indirection Register 0 (EL1) */
+
+ VNCR(POR_EL1), /* Permission Overlay Register 1 (EL1) */
+
+ /* FEAT_RAS registers */
+ VNCR(VDISR_EL2),
+ VNCR(VSESR_EL2),
+
+ VNCR(HFGRTR_EL2),
+ VNCR(HFGWTR_EL2),
+ VNCR(HFGITR_EL2),
+ VNCR(HDFGRTR_EL2),
+ VNCR(HDFGWTR_EL2),
+ VNCR(HAFGRTR_EL2),
+ VNCR(HFGRTR2_EL2),
+ VNCR(HFGWTR2_EL2),
+ VNCR(HFGITR2_EL2),
+ VNCR(HDFGRTR2_EL2),
+ VNCR(HDFGWTR2_EL2),
+
+ VNCR(VNCR_EL2),
+
+ VNCR(CNTVOFF_EL2),
+ VNCR(CNTV_CVAL_EL0),
+ VNCR(CNTV_CTL_EL0),
+ VNCR(CNTP_CVAL_EL0),
+ VNCR(CNTP_CTL_EL0),
+
+ VNCR(ICH_LR0_EL2),
+ VNCR(ICH_LR1_EL2),
+ VNCR(ICH_LR2_EL2),
+ VNCR(ICH_LR3_EL2),
+ VNCR(ICH_LR4_EL2),
+ VNCR(ICH_LR5_EL2),
+ VNCR(ICH_LR6_EL2),
+ VNCR(ICH_LR7_EL2),
+ VNCR(ICH_LR8_EL2),
+ VNCR(ICH_LR9_EL2),
+ VNCR(ICH_LR10_EL2),
+ VNCR(ICH_LR11_EL2),
+ VNCR(ICH_LR12_EL2),
+ VNCR(ICH_LR13_EL2),
+ VNCR(ICH_LR14_EL2),
+ VNCR(ICH_LR15_EL2),
+
+ VNCR(ICH_AP0R0_EL2),
+ VNCR(ICH_AP0R1_EL2),
+ VNCR(ICH_AP0R2_EL2),
+ VNCR(ICH_AP0R3_EL2),
+ VNCR(ICH_AP1R0_EL2),
+ VNCR(ICH_AP1R1_EL2),
+ VNCR(ICH_AP1R2_EL2),
+ VNCR(ICH_AP1R3_EL2),
+ VNCR(ICH_HCR_EL2),
+ VNCR(ICH_VMCR_EL2),
+
+ NR_SYS_REGS /* Nothing after this line! */
};
+struct kvm_sysreg_masks {
+ struct {
+ u64 res0;
+ u64 res1;
+ } mask[NR_SYS_REGS - __SANITISED_REG_START__];
+};
+
+struct fgt_masks {
+ const char *str;
+ u64 mask;
+ u64 nmask;
+ u64 res0;
+};
+
+extern struct fgt_masks hfgrtr_masks;
+extern struct fgt_masks hfgwtr_masks;
+extern struct fgt_masks hfgitr_masks;
+extern struct fgt_masks hdfgrtr_masks;
+extern struct fgt_masks hdfgwtr_masks;
+extern struct fgt_masks hafgrtr_masks;
+extern struct fgt_masks hfgrtr2_masks;
+extern struct fgt_masks hfgwtr2_masks;
+extern struct fgt_masks hfgitr2_masks;
+extern struct fgt_masks hdfgrtr2_masks;
+extern struct fgt_masks hdfgwtr2_masks;
+
+extern struct fgt_masks kvm_nvhe_sym(hfgrtr_masks);
+extern struct fgt_masks kvm_nvhe_sym(hfgwtr_masks);
+extern struct fgt_masks kvm_nvhe_sym(hfgitr_masks);
+extern struct fgt_masks kvm_nvhe_sym(hdfgrtr_masks);
+extern struct fgt_masks kvm_nvhe_sym(hdfgwtr_masks);
+extern struct fgt_masks kvm_nvhe_sym(hafgrtr_masks);
+extern struct fgt_masks kvm_nvhe_sym(hfgrtr2_masks);
+extern struct fgt_masks kvm_nvhe_sym(hfgwtr2_masks);
+extern struct fgt_masks kvm_nvhe_sym(hfgitr2_masks);
+extern struct fgt_masks kvm_nvhe_sym(hdfgrtr2_masks);
+extern struct fgt_masks kvm_nvhe_sym(hdfgwtr2_masks);
+
struct kvm_cpu_context {
- struct kvm_regs gp_regs;
- union {
- u64 sys_regs[NR_SYS_REGS];
- u32 cp15[NR_CP15_REGS];
- };
+ struct user_pt_regs regs; /* sp = sp_el0 */
+
+ u64 spsr_abt;
+ u64 spsr_und;
+ u64 spsr_irq;
+ u64 spsr_fiq;
+
+ struct user_fpsimd_state fp_regs;
+
+ u64 sys_regs[NR_SYS_REGS];
+
+ struct kvm_vcpu *__hyp_running_vcpu;
+
+ /* This pointer has to be 4kB aligned. */
+ u64 *vncr_array;
+};
+
+struct cpu_sve_state {
+ __u64 zcr_el1;
+
+ /*
+ * Ordering is important since __sve_save_state/__sve_restore_state
+ * relies on it.
+ */
+ __u32 fpsr;
+ __u32 fpcr;
+
+ /* Must be SVE_VQ_BYTES (128 bit) aligned. */
+ __u8 sve_regs[];
+};
+
+/*
+ * This structure is instantiated on a per-CPU basis, and contains
+ * data that is:
+ *
+ * - tied to a single physical CPU, and
+ * - either have a lifetime that does not extend past vcpu_put()
+ * - or is an invariant for the lifetime of the system
+ *
+ * Use host_data_ptr(field) as a way to access a pointer to such a
+ * field.
+ */
+struct kvm_host_data {
+#define KVM_HOST_DATA_FLAG_HAS_SPE 0
+#define KVM_HOST_DATA_FLAG_HAS_TRBE 1
+#define KVM_HOST_DATA_FLAG_TRBE_ENABLED 4
+#define KVM_HOST_DATA_FLAG_EL1_TRACING_CONFIGURED 5
+#define KVM_HOST_DATA_FLAG_VCPU_IN_HYP_CONTEXT 6
+#define KVM_HOST_DATA_FLAG_L1_VNCR_MAPPED 7
+#define KVM_HOST_DATA_FLAG_HAS_BRBE 8
+ unsigned long flags;
+
+ struct kvm_cpu_context host_ctxt;
+
+ /*
+ * Hyp VA.
+ * sve_state is only used in pKVM and if system_supports_sve().
+ */
+ struct cpu_sve_state *sve_state;
+
+ /* Used by pKVM only. */
+ u64 fpmr;
+
+ /* Ownership of the FP regs */
+ enum {
+ FP_STATE_FREE,
+ FP_STATE_HOST_OWNED,
+ FP_STATE_GUEST_OWNED,
+ } fp_owner;
+
+ /*
+ * host_debug_state contains the host registers which are
+ * saved and restored during world switches.
+ */
+ struct {
+ /* {Break,watch}point registers */
+ struct kvm_guest_debug_arch regs;
+ /* Statistical profiling extension */
+ u64 pmscr_el1;
+ /* Self-hosted trace */
+ u64 trfcr_el1;
+ /* Values of trap registers for the host before guest entry. */
+ u64 mdcr_el2;
+ u64 brbcr_el1;
+ } host_debug_state;
+
+ /* Guest trace filter value */
+ u64 trfcr_while_in_guest;
+
+ /* Number of programmable event counters (PMCR_EL0.N) for this CPU */
+ unsigned int nr_event_counters;
+
+ /* Number of debug breakpoints/watchpoints for this CPU (minus 1) */
+ unsigned int debug_brps;
+ unsigned int debug_wrps;
+};
+
+struct kvm_host_psci_config {
+ /* PSCI version used by host. */
+ u32 version;
+ u32 smccc_version;
+
+ /* Function IDs used by host if version is v0.1. */
+ struct psci_0_1_function_ids function_ids_0_1;
+
+ bool psci_0_1_cpu_suspend_implemented;
+ bool psci_0_1_cpu_on_implemented;
+ bool psci_0_1_cpu_off_implemented;
+ bool psci_0_1_migrate_implemented;
+};
+
+extern struct kvm_host_psci_config kvm_nvhe_sym(kvm_host_psci_config);
+#define kvm_host_psci_config CHOOSE_NVHE_SYM(kvm_host_psci_config)
+
+extern s64 kvm_nvhe_sym(hyp_physvirt_offset);
+#define hyp_physvirt_offset CHOOSE_NVHE_SYM(hyp_physvirt_offset)
+
+extern u64 kvm_nvhe_sym(hyp_cpu_logical_map)[NR_CPUS];
+#define hyp_cpu_logical_map CHOOSE_NVHE_SYM(hyp_cpu_logical_map)
+
+struct vcpu_reset_state {
+ unsigned long pc;
+ unsigned long r0;
+ bool be;
+ bool reset;
};
-typedef struct kvm_cpu_context kvm_cpu_context_t;
+struct vncr_tlb;
struct kvm_vcpu_arch {
struct kvm_cpu_context ctxt;
- /* HYP configuration */
+ /*
+ * Guest floating point state
+ *
+ * The architecture has two main floating point extensions,
+ * the original FPSIMD and SVE. These have overlapping
+ * register views, with the FPSIMD V registers occupying the
+ * low 128 bits of the SVE Z registers. When the core
+ * floating point code saves the register state of a task it
+ * records which view it saved in fp_type.
+ */
+ void *sve_state;
+ enum fp_type fp_type;
+ unsigned int sve_max_vl;
+
+ /* Stage 2 paging state used by the hardware on next switch */
+ struct kvm_s2_mmu *hw_mmu;
+
+ /* Values of trap registers for the guest. */
u64 hcr_el2;
+ u64 hcrx_el2;
+ u64 mdcr_el2;
+
+ struct {
+ u64 r;
+ u64 w;
+ } fgt[__NR_FGT_GROUP_IDS__];
/* Exception Information */
struct kvm_vcpu_fault_info fault;
- /* Pointer to host CPU context */
- kvm_cpu_context_t *host_cpu_context;
+ /* Configuration flags, set once and for all before the vcpu can run */
+ u8 cflags;
- /* VGIC state */
- struct vgic_cpu vgic_cpu;
- struct arch_timer_cpu timer_cpu;
+ /* Input flags to the hypervisor code, potentially cleared after use */
+ u8 iflags;
+
+ /* State flags for kernel bookkeeping, unused by the hypervisor code */
+ u16 sflags;
/*
- * Anything that is not used directly from assembly code goes
- * here.
+ * Don't run the guest (internal implementation need).
+ *
+ * Contrary to the flags above, this is set/cleared outside of
+ * a vcpu context, and thus cannot be mixed with the flags
+ * themselves (or the flag accesses need to be made atomic).
*/
- /* dcache set/way operation pending */
- int last_pcpu;
- cpumask_t require_dcache_flush;
-
- /* Don't run the guest */
bool pause;
- /* IO related fields */
- struct kvm_decode mmio_decode;
+ /*
+ * We maintain more than a single set of debug registers to support
+ * debugging the guest from the host and to maintain separate host and
+ * guest state during world switches. vcpu_debug_state are the debug
+ * registers of the vcpu as the guest sees them.
+ *
+ * external_debug_state contains the debug values we want to debug the
+ * guest. This is set via the KVM_SET_GUEST_DEBUG ioctl.
+ */
+ struct kvm_guest_debug_arch vcpu_debug_state;
+ struct kvm_guest_debug_arch external_debug_state;
+ u64 external_mdscr_el1;
- /* Interrupt related fields */
- u64 irq_lines; /* IRQ and FIQ levels */
+ enum {
+ VCPU_DEBUG_FREE,
+ VCPU_DEBUG_HOST_OWNED,
+ VCPU_DEBUG_GUEST_OWNED,
+ } debug_owner;
+
+ /* VGIC state */
+ struct vgic_cpu vgic_cpu;
+ struct arch_timer_cpu timer_cpu;
+ struct kvm_pmu pmu;
+
+ /* vcpu power state */
+ struct kvm_mp_state mp_state;
+ spinlock_t mp_state_lock;
/* Cache some mmu pages needed inside spinlock regions */
struct kvm_mmu_memory_cache mmu_page_cache;
- /* Target CPU and feature flags */
- u32 target;
- DECLARE_BITMAP(features, KVM_VCPU_MAX_FEATURES);
+ /* Pages to top-up the pKVM/EL2 guest pool */
+ struct kvm_hyp_memcache pkvm_memcache;
+
+ /* Virtual SError ESR to restore when HCR_EL2.VSE is set */
+ u64 vsesr_el2;
- /* Detect first run of a vcpu */
- bool has_run_once;
+ /* Additional reset state */
+ struct vcpu_reset_state reset_state;
+
+ /* Guest PV state */
+ struct {
+ u64 last_steal;
+ gpa_t base;
+ } steal;
+
+ /* Per-vcpu CCSIDR override or NULL */
+ u32 *ccsidr;
+
+ /* Per-vcpu TLB for VNCR_EL2 -- NULL when !NV */
+ struct vncr_tlb *vncr_tlb;
};
-#define vcpu_gp_regs(v) (&(v)->arch.ctxt.gp_regs)
-#define vcpu_sys_reg(v,r) ((v)->arch.ctxt.sys_regs[(r)])
-#define vcpu_cp15(v,r) ((v)->arch.ctxt.cp15[(r)])
+/*
+ * Each 'flag' is composed of a comma-separated triplet:
+ *
+ * - the flag-set it belongs to in the vcpu->arch structure
+ * - the value for that flag
+ * - the mask for that flag
+ *
+ * __vcpu_single_flag() builds such a triplet for a single-bit flag.
+ * unpack_vcpu_flag() extract the flag value from the triplet for
+ * direct use outside of the flag accessors.
+ */
+#define __vcpu_single_flag(_set, _f) _set, (_f), (_f)
+
+#define __unpack_flag(_set, _f, _m) _f
+#define unpack_vcpu_flag(...) __unpack_flag(__VA_ARGS__)
+
+#define __build_check_flag(v, flagset, f, m) \
+ do { \
+ typeof(v->arch.flagset) *_fset; \
+ \
+ /* Check that the flags fit in the mask */ \
+ BUILD_BUG_ON(HWEIGHT(m) != HWEIGHT((f) | (m))); \
+ /* Check that the flags fit in the type */ \
+ BUILD_BUG_ON((sizeof(*_fset) * 8) <= __fls(m)); \
+ } while (0)
+
+#define __vcpu_get_flag(v, flagset, f, m) \
+ ({ \
+ __build_check_flag(v, flagset, f, m); \
+ \
+ READ_ONCE(v->arch.flagset) & (m); \
+ })
+
+/*
+ * Note that the set/clear accessors must be preempt-safe in order to
+ * avoid nesting them with load/put which also manipulate flags...
+ */
+#ifdef __KVM_NVHE_HYPERVISOR__
+/* the nVHE hypervisor is always non-preemptible */
+#define __vcpu_flags_preempt_disable()
+#define __vcpu_flags_preempt_enable()
+#else
+#define __vcpu_flags_preempt_disable() preempt_disable()
+#define __vcpu_flags_preempt_enable() preempt_enable()
+#endif
+
+#define __vcpu_set_flag(v, flagset, f, m) \
+ do { \
+ typeof(v->arch.flagset) *fset; \
+ \
+ __build_check_flag(v, flagset, f, m); \
+ \
+ fset = &v->arch.flagset; \
+ __vcpu_flags_preempt_disable(); \
+ if (HWEIGHT(m) > 1) \
+ *fset &= ~(m); \
+ *fset |= (f); \
+ __vcpu_flags_preempt_enable(); \
+ } while (0)
+
+#define __vcpu_clear_flag(v, flagset, f, m) \
+ do { \
+ typeof(v->arch.flagset) *fset; \
+ \
+ __build_check_flag(v, flagset, f, m); \
+ \
+ fset = &v->arch.flagset; \
+ __vcpu_flags_preempt_disable(); \
+ *fset &= ~(m); \
+ __vcpu_flags_preempt_enable(); \
+ } while (0)
+
+#define __vcpu_test_and_clear_flag(v, flagset, f, m) \
+ ({ \
+ typeof(v->arch.flagset) set; \
+ \
+ set = __vcpu_get_flag(v, flagset, f, m); \
+ __vcpu_clear_flag(v, flagset, f, m); \
+ \
+ set; \
+ })
+
+#define vcpu_get_flag(v, ...) __vcpu_get_flag((v), __VA_ARGS__)
+#define vcpu_set_flag(v, ...) __vcpu_set_flag((v), __VA_ARGS__)
+#define vcpu_clear_flag(v, ...) __vcpu_clear_flag((v), __VA_ARGS__)
+#define vcpu_test_and_clear_flag(v, ...) \
+ __vcpu_test_and_clear_flag((v), __VA_ARGS__)
+
+/* KVM_ARM_VCPU_INIT completed */
+#define VCPU_INITIALIZED __vcpu_single_flag(cflags, BIT(0))
+/* SVE config completed */
+#define VCPU_SVE_FINALIZED __vcpu_single_flag(cflags, BIT(1))
+/* pKVM VCPU setup completed */
+#define VCPU_PKVM_FINALIZED __vcpu_single_flag(cflags, BIT(2))
+
+/* Exception pending */
+#define PENDING_EXCEPTION __vcpu_single_flag(iflags, BIT(0))
+/*
+ * PC increment. Overlaps with EXCEPT_MASK on purpose so that it can't
+ * be set together with an exception...
+ */
+#define INCREMENT_PC __vcpu_single_flag(iflags, BIT(1))
+/* Target EL/MODE (not a single flag, but let's abuse the macro) */
+#define EXCEPT_MASK __vcpu_single_flag(iflags, GENMASK(3, 1))
+
+/* Helpers to encode exceptions with minimum fuss */
+#define __EXCEPT_MASK_VAL unpack_vcpu_flag(EXCEPT_MASK)
+#define __EXCEPT_SHIFT __builtin_ctzl(__EXCEPT_MASK_VAL)
+#define __vcpu_except_flags(_f) iflags, (_f << __EXCEPT_SHIFT), __EXCEPT_MASK_VAL
+
+/*
+ * When PENDING_EXCEPTION is set, EXCEPT_MASK can take the following
+ * values:
+ *
+ * For AArch32 EL1:
+ */
+#define EXCEPT_AA32_UND __vcpu_except_flags(0)
+#define EXCEPT_AA32_IABT __vcpu_except_flags(1)
+#define EXCEPT_AA32_DABT __vcpu_except_flags(2)
+/* For AArch64: */
+#define EXCEPT_AA64_EL1_SYNC __vcpu_except_flags(0)
+#define EXCEPT_AA64_EL1_IRQ __vcpu_except_flags(1)
+#define EXCEPT_AA64_EL1_FIQ __vcpu_except_flags(2)
+#define EXCEPT_AA64_EL1_SERR __vcpu_except_flags(3)
+/* For AArch64 with NV: */
+#define EXCEPT_AA64_EL2_SYNC __vcpu_except_flags(4)
+#define EXCEPT_AA64_EL2_IRQ __vcpu_except_flags(5)
+#define EXCEPT_AA64_EL2_FIQ __vcpu_except_flags(6)
+#define EXCEPT_AA64_EL2_SERR __vcpu_except_flags(7)
+
+/* Physical CPU not in supported_cpus */
+#define ON_UNSUPPORTED_CPU __vcpu_single_flag(sflags, BIT(0))
+/* WFIT instruction trapped */
+#define IN_WFIT __vcpu_single_flag(sflags, BIT(1))
+/* vcpu system registers loaded on physical CPU */
+#define SYSREGS_ON_CPU __vcpu_single_flag(sflags, BIT(2))
+/* Software step state is Active-pending for external debug */
+#define HOST_SS_ACTIVE_PENDING __vcpu_single_flag(sflags, BIT(3))
+/* Software step state is Active pending for guest debug */
+#define GUEST_SS_ACTIVE_PENDING __vcpu_single_flag(sflags, BIT(4))
+/* PMUSERENR for the guest EL0 is on physical CPU */
+#define PMUSERENR_ON_CPU __vcpu_single_flag(sflags, BIT(5))
+/* WFI instruction trapped */
+#define IN_WFI __vcpu_single_flag(sflags, BIT(6))
+/* KVM is currently emulating a nested ERET */
+#define IN_NESTED_ERET __vcpu_single_flag(sflags, BIT(7))
+/* SError pending for nested guest */
+#define NESTED_SERROR_PENDING __vcpu_single_flag(sflags, BIT(8))
+
+
+/* Pointer to the vcpu's SVE FFR for sve_{save,load}_state() */
+#define vcpu_sve_pffr(vcpu) (kern_hyp_va((vcpu)->arch.sve_state) + \
+ sve_ffr_offset((vcpu)->arch.sve_max_vl))
+
+#define vcpu_sve_max_vq(vcpu) sve_vq_from_vl((vcpu)->arch.sve_max_vl)
+
+#define vcpu_sve_zcr_elx(vcpu) \
+ (unlikely(is_hyp_ctxt(vcpu)) ? ZCR_EL2 : ZCR_EL1)
+
+#define sve_state_size_from_vl(sve_max_vl) ({ \
+ size_t __size_ret; \
+ unsigned int __vq; \
+ \
+ if (WARN_ON(!sve_vl_valid(sve_max_vl))) { \
+ __size_ret = 0; \
+ } else { \
+ __vq = sve_vq_from_vl(sve_max_vl); \
+ __size_ret = SVE_SIG_REGS_SIZE(__vq); \
+ } \
+ \
+ __size_ret; \
+})
+
+#define vcpu_sve_state_size(vcpu) sve_state_size_from_vl((vcpu)->arch.sve_max_vl)
+
+#define KVM_GUESTDBG_VALID_MASK (KVM_GUESTDBG_ENABLE | \
+ KVM_GUESTDBG_USE_SW_BP | \
+ KVM_GUESTDBG_USE_HW | \
+ KVM_GUESTDBG_SINGLESTEP)
+
+#define kvm_has_sve(kvm) (system_supports_sve() && \
+ test_bit(KVM_ARCH_FLAG_GUEST_HAS_SVE, &(kvm)->arch.flags))
+
+#ifdef __KVM_NVHE_HYPERVISOR__
+#define vcpu_has_sve(vcpu) kvm_has_sve(kern_hyp_va((vcpu)->kvm))
+#else
+#define vcpu_has_sve(vcpu) kvm_has_sve((vcpu)->kvm)
+#endif
+
+#ifdef CONFIG_ARM64_PTR_AUTH
+#define vcpu_has_ptrauth(vcpu) \
+ ((cpus_have_final_cap(ARM64_HAS_ADDRESS_AUTH) || \
+ cpus_have_final_cap(ARM64_HAS_GENERIC_AUTH)) && \
+ (vcpu_has_feature(vcpu, KVM_ARM_VCPU_PTRAUTH_ADDRESS) || \
+ vcpu_has_feature(vcpu, KVM_ARM_VCPU_PTRAUTH_GENERIC)))
+#else
+#define vcpu_has_ptrauth(vcpu) false
+#endif
+
+#define vcpu_on_unsupported_cpu(vcpu) \
+ vcpu_get_flag(vcpu, ON_UNSUPPORTED_CPU)
+
+#define vcpu_set_on_unsupported_cpu(vcpu) \
+ vcpu_set_flag(vcpu, ON_UNSUPPORTED_CPU)
+
+#define vcpu_clear_on_unsupported_cpu(vcpu) \
+ vcpu_clear_flag(vcpu, ON_UNSUPPORTED_CPU)
+
+#define vcpu_gp_regs(v) (&(v)->arch.ctxt.regs)
+
+/*
+ * Only use __vcpu_sys_reg/ctxt_sys_reg if you know you want the
+ * memory backed version of a register, and not the one most recently
+ * accessed by a running VCPU. For example, for userspace access or
+ * for system registers that are never context switched, but only
+ * emulated.
+ *
+ * Don't bother with VNCR-based accesses in the nVHE code, it has no
+ * business dealing with NV.
+ */
+static inline u64 *___ctxt_sys_reg(const struct kvm_cpu_context *ctxt, int r)
+{
+#if !defined (__KVM_NVHE_HYPERVISOR__)
+ if (unlikely(cpus_have_final_cap(ARM64_HAS_NESTED_VIRT) &&
+ r >= __VNCR_START__ && ctxt->vncr_array))
+ return &ctxt->vncr_array[r - __VNCR_START__];
+#endif
+ return (u64 *)&ctxt->sys_regs[r];
+}
+
+#define __ctxt_sys_reg(c,r) \
+ ({ \
+ BUILD_BUG_ON(__builtin_constant_p(r) && \
+ (r) >= NR_SYS_REGS); \
+ ___ctxt_sys_reg(c, r); \
+ })
+
+#define ctxt_sys_reg(c,r) (*__ctxt_sys_reg(c,r))
+
+u64 kvm_vcpu_apply_reg_masks(const struct kvm_vcpu *, enum vcpu_sysreg, u64);
+
+#define __vcpu_assign_sys_reg(v, r, val) \
+ do { \
+ const struct kvm_cpu_context *ctxt = &(v)->arch.ctxt; \
+ u64 __v = (val); \
+ if (vcpu_has_nv((v)) && (r) >= __SANITISED_REG_START__) \
+ __v = kvm_vcpu_apply_reg_masks((v), (r), __v); \
+ \
+ ctxt_sys_reg(ctxt, (r)) = __v; \
+ } while (0)
+
+#define __vcpu_rmw_sys_reg(v, r, op, val) \
+ do { \
+ const struct kvm_cpu_context *ctxt = &(v)->arch.ctxt; \
+ u64 __v = ctxt_sys_reg(ctxt, (r)); \
+ __v op (val); \
+ if (vcpu_has_nv((v)) && (r) >= __SANITISED_REG_START__) \
+ __v = kvm_vcpu_apply_reg_masks((v), (r), __v); \
+ \
+ ctxt_sys_reg(ctxt, (r)) = __v; \
+ } while (0)
+
+#define __vcpu_sys_reg(v,r) \
+ ({ \
+ const struct kvm_cpu_context *ctxt = &(v)->arch.ctxt; \
+ u64 __v = ctxt_sys_reg(ctxt, (r)); \
+ if (vcpu_has_nv((v)) && (r) >= __SANITISED_REG_START__) \
+ __v = kvm_vcpu_apply_reg_masks((v), (r), __v); \
+ __v; \
+ })
+
+u64 vcpu_read_sys_reg(const struct kvm_vcpu *, enum vcpu_sysreg);
+void vcpu_write_sys_reg(struct kvm_vcpu *, u64, enum vcpu_sysreg);
struct kvm_vm_stat {
- u32 remote_tlb_flush;
+ struct kvm_vm_stat_generic generic;
};
struct kvm_vcpu_stat {
- u32 halt_wakeup;
+ struct kvm_vcpu_stat_generic generic;
+ u64 hvc_exit_stat;
+ u64 wfe_exit_stat;
+ u64 wfi_exit_stat;
+ u64 mmio_exit_user;
+ u64 mmio_exit_kernel;
+ u64 signal_exits;
+ u64 exits;
};
-struct kvm_vcpu_init;
-int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
- const struct kvm_vcpu_init *init);
unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu);
int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices);
-struct kvm_one_reg;
int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
-#define KVM_ARCH_WANT_MMU_NOTIFIER
-struct kvm;
-int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
-int kvm_unmap_hva_range(struct kvm *kvm,
- unsigned long start, unsigned long end);
-void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
+unsigned long kvm_arm_num_sys_reg_descs(struct kvm_vcpu *vcpu);
+int kvm_arm_copy_sys_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices);
+
+int __kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu,
+ struct kvm_vcpu_events *events);
+
+int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu,
+ struct kvm_vcpu_events *events);
+
+void kvm_arm_halt_guest(struct kvm *kvm);
+void kvm_arm_resume_guest(struct kvm *kvm);
+
+#define vcpu_has_run_once(vcpu) (!!READ_ONCE((vcpu)->pid))
+
+#ifndef __KVM_NVHE_HYPERVISOR__
+#define kvm_call_hyp_nvhe(f, ...) \
+ ({ \
+ struct arm_smccc_res res; \
+ \
+ arm_smccc_1_1_hvc(KVM_HOST_SMCCC_FUNC(f), \
+ ##__VA_ARGS__, &res); \
+ WARN_ON(res.a0 != SMCCC_RET_SUCCESS); \
+ \
+ res.a1; \
+ })
+
+/*
+ * The isb() below is there to guarantee the same behaviour on VHE as on !VHE,
+ * where the eret to EL1 acts as a context synchronization event.
+ */
+#define kvm_call_hyp(f, ...) \
+ do { \
+ if (has_vhe()) { \
+ f(__VA_ARGS__); \
+ isb(); \
+ } else { \
+ kvm_call_hyp_nvhe(f, ##__VA_ARGS__); \
+ } \
+ } while(0)
+
+#define kvm_call_hyp_ret(f, ...) \
+ ({ \
+ typeof(f(__VA_ARGS__)) ret; \
+ \
+ if (has_vhe()) { \
+ ret = f(__VA_ARGS__); \
+ } else { \
+ ret = kvm_call_hyp_nvhe(f, ##__VA_ARGS__); \
+ } \
+ \
+ ret; \
+ })
+#else /* __KVM_NVHE_HYPERVISOR__ */
+#define kvm_call_hyp(f, ...) f(__VA_ARGS__)
+#define kvm_call_hyp_ret(f, ...) f(__VA_ARGS__)
+#define kvm_call_hyp_nvhe(f, ...) f(__VA_ARGS__)
+#endif /* __KVM_NVHE_HYPERVISOR__ */
+
+int handle_exit(struct kvm_vcpu *vcpu, int exception_index);
+void handle_exit_early(struct kvm_vcpu *vcpu, int exception_index);
+
+int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu);
+int kvm_handle_cp14_32(struct kvm_vcpu *vcpu);
+int kvm_handle_cp14_64(struct kvm_vcpu *vcpu);
+int kvm_handle_cp15_32(struct kvm_vcpu *vcpu);
+int kvm_handle_cp15_64(struct kvm_vcpu *vcpu);
+int kvm_handle_sys_reg(struct kvm_vcpu *vcpu);
+int kvm_handle_cp10_id(struct kvm_vcpu *vcpu);
+
+void kvm_sys_regs_create_debugfs(struct kvm *kvm);
+void kvm_reset_sys_regs(struct kvm_vcpu *vcpu);
+
+int __init kvm_sys_reg_table_init(void);
+struct sys_reg_desc;
+int __init populate_sysreg_config(const struct sys_reg_desc *sr,
+ unsigned int idx);
+int __init populate_nv_trap_config(void);
-/* We do not have shadow page tables, hence the empty hooks */
-static inline int kvm_age_hva(struct kvm *kvm, unsigned long hva)
+void kvm_calculate_traps(struct kvm_vcpu *vcpu);
+
+/* MMIO helpers */
+void kvm_mmio_write_buf(void *buf, unsigned int len, unsigned long data);
+unsigned long kvm_mmio_read_buf(const void *buf, unsigned int len);
+
+int kvm_handle_mmio_return(struct kvm_vcpu *vcpu);
+int io_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa);
+
+/*
+ * Returns true if a Performance Monitoring Interrupt (PMI), a.k.a. perf event,
+ * arrived in guest context. For arm64, any event that arrives while a vCPU is
+ * loaded is considered to be "in guest".
+ */
+static inline bool kvm_arch_pmi_in_guest(struct kvm_vcpu *vcpu)
{
- return 0;
+ return IS_ENABLED(CONFIG_GUEST_PERF_EVENTS) && !!vcpu;
}
-static inline int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
+long kvm_hypercall_pv_features(struct kvm_vcpu *vcpu);
+gpa_t kvm_init_stolen_time(struct kvm_vcpu *vcpu);
+void kvm_update_stolen_time(struct kvm_vcpu *vcpu);
+
+bool kvm_arm_pvtime_supported(void);
+int kvm_arm_pvtime_set_attr(struct kvm_vcpu *vcpu,
+ struct kvm_device_attr *attr);
+int kvm_arm_pvtime_get_attr(struct kvm_vcpu *vcpu,
+ struct kvm_device_attr *attr);
+int kvm_arm_pvtime_has_attr(struct kvm_vcpu *vcpu,
+ struct kvm_device_attr *attr);
+
+extern unsigned int __ro_after_init kvm_arm_vmid_bits;
+int __init kvm_arm_vmid_alloc_init(void);
+void __init kvm_arm_vmid_alloc_free(void);
+void kvm_arm_vmid_update(struct kvm_vmid *kvm_vmid);
+void kvm_arm_vmid_clear_active(void);
+
+static inline void kvm_arm_pvtime_vcpu_init(struct kvm_vcpu_arch *vcpu_arch)
{
- return 0;
+ vcpu_arch->steal.base = INVALID_GPA;
}
-struct kvm_vcpu *kvm_arm_get_running_vcpu(void);
-struct kvm_vcpu __percpu **kvm_get_running_vcpus(void);
+static inline bool kvm_arm_is_pvtime_enabled(struct kvm_vcpu_arch *vcpu_arch)
+{
+ return (vcpu_arch->steal.base != INVALID_GPA);
+}
-u64 kvm_call_hyp(void *hypfn, ...);
+struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr);
-int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
- int exception_index);
+DECLARE_KVM_HYP_PER_CPU(struct kvm_host_data, kvm_host_data);
-int kvm_perf_init(void);
-int kvm_perf_teardown(void);
+/*
+ * How we access per-CPU host data depends on the where we access it from,
+ * and the mode we're in:
+ *
+ * - VHE and nVHE hypervisor bits use their locally defined instance
+ *
+ * - the rest of the kernel use either the VHE or nVHE one, depending on
+ * the mode we're running in.
+ *
+ * Unless we're in protected mode, fully deprivileged, and the nVHE
+ * per-CPU stuff is exclusively accessible to the protected EL2 code.
+ * In this case, the EL1 code uses the *VHE* data as its private state
+ * (which makes sense in a way as there shouldn't be any shared state
+ * between the host and the hypervisor).
+ *
+ * Yes, this is all totally trivial. Shoot me now.
+ */
+#if defined(__KVM_NVHE_HYPERVISOR__) || defined(__KVM_VHE_HYPERVISOR__)
+#define host_data_ptr(f) (&this_cpu_ptr(&kvm_host_data)->f)
+#else
+#define host_data_ptr(f) \
+ (static_branch_unlikely(&kvm_protected_mode_initialized) ? \
+ &this_cpu_ptr(&kvm_host_data)->f : \
+ &this_cpu_ptr_hyp_sym(kvm_host_data)->f)
+#endif
+
+#define host_data_test_flag(flag) \
+ (test_bit(KVM_HOST_DATA_FLAG_##flag, host_data_ptr(flags)))
+#define host_data_set_flag(flag) \
+ set_bit(KVM_HOST_DATA_FLAG_##flag, host_data_ptr(flags))
+#define host_data_clear_flag(flag) \
+ clear_bit(KVM_HOST_DATA_FLAG_##flag, host_data_ptr(flags))
-static inline void __cpu_init_hyp_mode(phys_addr_t boot_pgd_ptr,
- phys_addr_t pgd_ptr,
- unsigned long hyp_stack_ptr,
- unsigned long vector_ptr)
+/* Check whether the FP regs are owned by the guest */
+static inline bool guest_owns_fp_regs(void)
{
- /*
- * Call initialization code, and switch to the full blown
- * HYP code.
- */
- kvm_call_hyp((void *)boot_pgd_ptr, pgd_ptr,
- hyp_stack_ptr, vector_ptr);
+ return *host_data_ptr(fp_owner) == FP_STATE_GUEST_OWNED;
}
+/* Check whether the FP regs are owned by the host */
+static inline bool host_owns_fp_regs(void)
+{
+ return *host_data_ptr(fp_owner) == FP_STATE_HOST_OWNED;
+}
+
+static inline void kvm_init_host_cpu_context(struct kvm_cpu_context *cpu_ctxt)
+{
+ /* The host's MPIDR is immutable, so let's set it up at boot time */
+ ctxt_sys_reg(cpu_ctxt, MPIDR_EL1) = read_cpuid_mpidr();
+}
+
+static inline bool kvm_system_needs_idmapped_vectors(void)
+{
+ return cpus_have_final_cap(ARM64_SPECTRE_V3A);
+}
+
+void kvm_init_host_debug_data(void);
+void kvm_debug_init_vhe(void);
+void kvm_vcpu_load_debug(struct kvm_vcpu *vcpu);
+void kvm_vcpu_put_debug(struct kvm_vcpu *vcpu);
+void kvm_debug_set_guest_ownership(struct kvm_vcpu *vcpu);
+void kvm_debug_handle_oslar(struct kvm_vcpu *vcpu, u64 val);
+
+#define kvm_vcpu_os_lock_enabled(vcpu) \
+ (!!(__vcpu_sys_reg(vcpu, OSLSR_EL1) & OSLSR_EL1_OSLK))
+
+#define kvm_debug_regs_in_use(vcpu) \
+ ((vcpu)->arch.debug_owner != VCPU_DEBUG_FREE)
+#define kvm_host_owns_debug_regs(vcpu) \
+ ((vcpu)->arch.debug_owner == VCPU_DEBUG_HOST_OWNED)
+#define kvm_guest_owns_debug_regs(vcpu) \
+ ((vcpu)->arch.debug_owner == VCPU_DEBUG_GUEST_OWNED)
+
+int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu,
+ struct kvm_device_attr *attr);
+int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu,
+ struct kvm_device_attr *attr);
+int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu,
+ struct kvm_device_attr *attr);
+
+int kvm_vm_ioctl_mte_copy_tags(struct kvm *kvm,
+ struct kvm_arm_copy_mte_tags *copy_tags);
+int kvm_vm_ioctl_set_counter_offset(struct kvm *kvm,
+ struct kvm_arm_counter_offset *offset);
+int kvm_vm_ioctl_get_reg_writable_masks(struct kvm *kvm,
+ struct reg_mask_range *range);
+
+/* Guest/host FPSIMD coordination helpers */
+void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu);
+void kvm_arch_vcpu_ctxflush_fp(struct kvm_vcpu *vcpu);
+void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu);
+void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu);
+
+static inline bool kvm_pmu_counter_deferred(struct perf_event_attr *attr)
+{
+ return (!has_vhe() && attr->exclude_host);
+}
+
+#ifdef CONFIG_KVM
+void kvm_set_pmu_events(u64 set, struct perf_event_attr *attr);
+void kvm_clr_pmu_events(u64 clr);
+bool kvm_set_pmuserenr(u64 val);
+void kvm_enable_trbe(void);
+void kvm_disable_trbe(void);
+void kvm_tracing_set_el1_configuration(u64 trfcr_while_in_guest);
+#else
+static inline void kvm_set_pmu_events(u64 set, struct perf_event_attr *attr) {}
+static inline void kvm_clr_pmu_events(u64 clr) {}
+static inline bool kvm_set_pmuserenr(u64 val)
+{
+ return false;
+}
+static inline void kvm_enable_trbe(void) {}
+static inline void kvm_disable_trbe(void) {}
+static inline void kvm_tracing_set_el1_configuration(u64 trfcr_while_in_guest) {}
+#endif
+
+void kvm_vcpu_load_vhe(struct kvm_vcpu *vcpu);
+void kvm_vcpu_put_vhe(struct kvm_vcpu *vcpu);
+
+int __init kvm_set_ipa_limit(void);
+u32 kvm_get_pa_bits(struct kvm *kvm);
+
+#define __KVM_HAVE_ARCH_VM_ALLOC
+struct kvm *kvm_arch_alloc_vm(void);
+
+#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS
+
+#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS_RANGE
+
+#define kvm_vm_is_protected(kvm) (is_protected_kvm_enabled() && (kvm)->arch.pkvm.is_protected)
+
+#define vcpu_is_protected(vcpu) kvm_vm_is_protected((vcpu)->kvm)
+
+int kvm_arm_vcpu_finalize(struct kvm_vcpu *vcpu, int feature);
+bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu);
+
+#define kvm_arm_vcpu_sve_finalized(vcpu) vcpu_get_flag(vcpu, VCPU_SVE_FINALIZED)
+
+#define kvm_has_mte(kvm) \
+ (system_supports_mte() && \
+ test_bit(KVM_ARCH_FLAG_MTE_ENABLED, &(kvm)->arch.flags))
+
+#define kvm_supports_32bit_el0() \
+ (system_supports_32bit_el0() && \
+ !static_branch_unlikely(&arm64_mismatched_32bit_el0))
+
+#define kvm_vm_has_ran_once(kvm) \
+ (test_bit(KVM_ARCH_FLAG_HAS_RAN_ONCE, &(kvm)->arch.flags))
+
+static inline bool __vcpu_has_feature(const struct kvm_arch *ka, int feature)
+{
+ return test_bit(feature, ka->vcpu_features);
+}
+
+#define kvm_vcpu_has_feature(k, f) __vcpu_has_feature(&(k)->arch, (f))
+#define vcpu_has_feature(v, f) __vcpu_has_feature(&(v)->kvm->arch, (f))
+
+#define kvm_vcpu_initialized(v) vcpu_get_flag(vcpu, VCPU_INITIALIZED)
+
+int kvm_trng_call(struct kvm_vcpu *vcpu);
+#ifdef CONFIG_KVM
+extern phys_addr_t hyp_mem_base;
+extern phys_addr_t hyp_mem_size;
+void __init kvm_hyp_reserve(void);
+#else
+static inline void kvm_hyp_reserve(void) { }
+#endif
+
+void kvm_arm_vcpu_power_off(struct kvm_vcpu *vcpu);
+bool kvm_arm_vcpu_stopped(struct kvm_vcpu *vcpu);
+
+static inline u64 *__vm_id_reg(struct kvm_arch *ka, u32 reg)
+{
+ switch (reg) {
+ case sys_reg(3, 0, 0, 1, 0) ... sys_reg(3, 0, 0, 7, 7):
+ return &ka->id_regs[IDREG_IDX(reg)];
+ case SYS_CTR_EL0:
+ return &ka->ctr_el0;
+ case SYS_MIDR_EL1:
+ return &ka->midr_el1;
+ case SYS_REVIDR_EL1:
+ return &ka->revidr_el1;
+ case SYS_AIDR_EL1:
+ return &ka->aidr_el1;
+ default:
+ WARN_ON_ONCE(1);
+ return NULL;
+ }
+}
+
+#define kvm_read_vm_id_reg(kvm, reg) \
+ ({ u64 __val = *__vm_id_reg(&(kvm)->arch, reg); __val; })
+
+void kvm_set_vm_id_reg(struct kvm *kvm, u32 reg, u64 val);
+
+#define __expand_field_sign_unsigned(id, fld, val) \
+ ((u64)SYS_FIELD_VALUE(id, fld, val))
+
+#define __expand_field_sign_signed(id, fld, val) \
+ ({ \
+ u64 __val = SYS_FIELD_VALUE(id, fld, val); \
+ sign_extend64(__val, id##_##fld##_WIDTH - 1); \
+ })
+
+#define get_idreg_field_unsigned(kvm, id, fld) \
+ ({ \
+ u64 __val = kvm_read_vm_id_reg((kvm), SYS_##id); \
+ FIELD_GET(id##_##fld##_MASK, __val); \
+ })
+
+#define get_idreg_field_signed(kvm, id, fld) \
+ ({ \
+ u64 __val = get_idreg_field_unsigned(kvm, id, fld); \
+ sign_extend64(__val, id##_##fld##_WIDTH - 1); \
+ })
+
+#define get_idreg_field_enum(kvm, id, fld) \
+ get_idreg_field_unsigned(kvm, id, fld)
+
+#define kvm_cmp_feat_signed(kvm, id, fld, op, limit) \
+ (get_idreg_field_signed((kvm), id, fld) op __expand_field_sign_signed(id, fld, limit))
+
+#define kvm_cmp_feat_unsigned(kvm, id, fld, op, limit) \
+ (get_idreg_field_unsigned((kvm), id, fld) op __expand_field_sign_unsigned(id, fld, limit))
+
+#define kvm_cmp_feat(kvm, id, fld, op, limit) \
+ (id##_##fld##_SIGNED ? \
+ kvm_cmp_feat_signed(kvm, id, fld, op, limit) : \
+ kvm_cmp_feat_unsigned(kvm, id, fld, op, limit))
+
+#define __kvm_has_feat(kvm, id, fld, limit) \
+ kvm_cmp_feat(kvm, id, fld, >=, limit)
+
+#define kvm_has_feat(kvm, ...) __kvm_has_feat(kvm, __VA_ARGS__)
+
+#define __kvm_has_feat_enum(kvm, id, fld, val) \
+ kvm_cmp_feat_unsigned(kvm, id, fld, ==, val)
+
+#define kvm_has_feat_enum(kvm, ...) __kvm_has_feat_enum(kvm, __VA_ARGS__)
+
+#define kvm_has_feat_range(kvm, id, fld, min, max) \
+ (kvm_cmp_feat(kvm, id, fld, >=, min) && \
+ kvm_cmp_feat(kvm, id, fld, <=, max))
+
+/* Check for a given level of PAuth support */
+#define kvm_has_pauth(k, l) \
+ ({ \
+ bool pa, pi, pa3; \
+ \
+ pa = kvm_has_feat((k), ID_AA64ISAR1_EL1, APA, l); \
+ pa &= kvm_has_feat((k), ID_AA64ISAR1_EL1, GPA, IMP); \
+ pi = kvm_has_feat((k), ID_AA64ISAR1_EL1, API, l); \
+ pi &= kvm_has_feat((k), ID_AA64ISAR1_EL1, GPI, IMP); \
+ pa3 = kvm_has_feat((k), ID_AA64ISAR2_EL1, APA3, l); \
+ pa3 &= kvm_has_feat((k), ID_AA64ISAR2_EL1, GPA3, IMP); \
+ \
+ (pa + pi + pa3) == 1; \
+ })
+
+#define kvm_has_fpmr(k) \
+ (system_supports_fpmr() && \
+ kvm_has_feat((k), ID_AA64PFR2_EL1, FPMR, IMP))
+
+#define kvm_has_tcr2(k) \
+ (kvm_has_feat((k), ID_AA64MMFR3_EL1, TCRX, IMP))
+
+#define kvm_has_s1pie(k) \
+ (kvm_has_feat((k), ID_AA64MMFR3_EL1, S1PIE, IMP))
+
+#define kvm_has_s1poe(k) \
+ (kvm_has_feat((k), ID_AA64MMFR3_EL1, S1POE, IMP))
+
+#define kvm_has_ras(k) \
+ (kvm_has_feat((k), ID_AA64PFR0_EL1, RAS, IMP))
+
+#define kvm_has_sctlr2(k) \
+ (kvm_has_feat((k), ID_AA64MMFR3_EL1, SCTLRX, IMP))
+
+static inline bool kvm_arch_has_irq_bypass(void)
+{
+ return true;
+}
+
+void compute_fgu(struct kvm *kvm, enum fgt_group_id fgt);
+void get_reg_fixed_bits(struct kvm *kvm, enum vcpu_sysreg reg, u64 *res0, u64 *res1);
+void check_feature_map(void);
+void kvm_vcpu_load_fgt(struct kvm_vcpu *vcpu);
+
+static __always_inline enum fgt_group_id __fgt_reg_to_group_id(enum vcpu_sysreg reg)
+{
+ switch (reg) {
+ case HFGRTR_EL2:
+ case HFGWTR_EL2:
+ return HFGRTR_GROUP;
+ case HFGITR_EL2:
+ return HFGITR_GROUP;
+ case HDFGRTR_EL2:
+ case HDFGWTR_EL2:
+ return HDFGRTR_GROUP;
+ case HAFGRTR_EL2:
+ return HAFGRTR_GROUP;
+ case HFGRTR2_EL2:
+ case HFGWTR2_EL2:
+ return HFGRTR2_GROUP;
+ case HFGITR2_EL2:
+ return HFGITR2_GROUP;
+ case HDFGRTR2_EL2:
+ case HDFGWTR2_EL2:
+ return HDFGRTR2_GROUP;
+ default:
+ BUILD_BUG_ON(1);
+ }
+}
+
+#define vcpu_fgt(vcpu, reg) \
+ ({ \
+ enum fgt_group_id id = __fgt_reg_to_group_id(reg); \
+ u64 *p; \
+ switch (reg) { \
+ case HFGWTR_EL2: \
+ case HDFGWTR_EL2: \
+ case HFGWTR2_EL2: \
+ case HDFGWTR2_EL2: \
+ p = &(vcpu)->arch.fgt[id].w; \
+ break; \
+ default: \
+ p = &(vcpu)->arch.fgt[id].r; \
+ break; \
+ } \
+ \
+ p; \
+ })
+
#endif /* __ARM64_KVM_HOST_H__ */
diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h
new file mode 100644
index 000000000000..76ce2b94bd97
--- /dev/null
+++ b/arch/arm64/include/asm/kvm_hyp.h
@@ -0,0 +1,151 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2015 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ */
+
+#ifndef __ARM64_KVM_HYP_H__
+#define __ARM64_KVM_HYP_H__
+
+#include <linux/compiler.h>
+#include <linux/kvm_host.h>
+#include <asm/alternative.h>
+#include <asm/sysreg.h>
+
+DECLARE_PER_CPU(struct kvm_cpu_context, kvm_hyp_ctxt);
+DECLARE_PER_CPU(unsigned long, kvm_hyp_vector);
+DECLARE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params);
+
+/*
+ * Unified accessors for registers that have a different encoding
+ * between VHE and non-VHE. They must be specified without their "ELx"
+ * encoding, but with the SYS_ prefix, as defined in asm/sysreg.h.
+ */
+
+#if defined(__KVM_VHE_HYPERVISOR__)
+
+#define read_sysreg_el0(r) read_sysreg_s(r##_EL02)
+#define write_sysreg_el0(v,r) write_sysreg_s(v, r##_EL02)
+#define read_sysreg_el1(r) read_sysreg_s(r##_EL12)
+#define write_sysreg_el1(v,r) write_sysreg_s(v, r##_EL12)
+#define read_sysreg_el2(r) read_sysreg_s(r##_EL1)
+#define write_sysreg_el2(v,r) write_sysreg_s(v, r##_EL1)
+
+#else // !__KVM_VHE_HYPERVISOR__
+
+#if defined(__KVM_NVHE_HYPERVISOR__)
+#define VHE_ALT_KEY ARM64_KVM_HVHE
+#else
+#define VHE_ALT_KEY ARM64_HAS_VIRT_HOST_EXTN
+#endif
+
+#define read_sysreg_elx(r,nvh,vh) \
+ ({ \
+ u64 reg; \
+ asm volatile(ALTERNATIVE(__mrs_s("%0", r##nvh), \
+ __mrs_s("%0", r##vh), \
+ VHE_ALT_KEY) \
+ : "=r" (reg)); \
+ reg; \
+ })
+
+#define write_sysreg_elx(v,r,nvh,vh) \
+ do { \
+ u64 __val = (u64)(v); \
+ asm volatile(ALTERNATIVE(__msr_s(r##nvh, "%x0"), \
+ __msr_s(r##vh, "%x0"), \
+ VHE_ALT_KEY) \
+ : : "rZ" (__val)); \
+ } while (0)
+
+#define read_sysreg_el0(r) read_sysreg_elx(r, _EL0, _EL02)
+#define write_sysreg_el0(v,r) write_sysreg_elx(v, r, _EL0, _EL02)
+#define read_sysreg_el1(r) read_sysreg_elx(r, _EL1, _EL12)
+#define write_sysreg_el1(v,r) write_sysreg_elx(v, r, _EL1, _EL12)
+#define read_sysreg_el2(r) read_sysreg_elx(r, _EL2, _EL1)
+#define write_sysreg_el2(v,r) write_sysreg_elx(v, r, _EL2, _EL1)
+
+#endif // __KVM_VHE_HYPERVISOR__
+
+/*
+ * Without an __arch_swab32(), we fall back to ___constant_swab32(), but the
+ * static inline can allow the compiler to out-of-line this. KVM always wants
+ * the macro version as it's always inlined.
+ */
+#define __kvm_swab32(x) ___constant_swab32(x)
+
+int __vgic_v2_perform_cpuif_access(struct kvm_vcpu *vcpu);
+
+u64 __gic_v3_get_lr(unsigned int lr);
+void __gic_v3_set_lr(u64 val, int lr);
+
+void __vgic_v3_save_state(struct vgic_v3_cpu_if *cpu_if);
+void __vgic_v3_restore_state(struct vgic_v3_cpu_if *cpu_if);
+void __vgic_v3_activate_traps(struct vgic_v3_cpu_if *cpu_if);
+void __vgic_v3_deactivate_traps(struct vgic_v3_cpu_if *cpu_if);
+void __vgic_v3_save_aprs(struct vgic_v3_cpu_if *cpu_if);
+void __vgic_v3_restore_vmcr_aprs(struct vgic_v3_cpu_if *cpu_if);
+int __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu);
+
+#ifdef __KVM_NVHE_HYPERVISOR__
+void __timer_enable_traps(struct kvm_vcpu *vcpu);
+void __timer_disable_traps(struct kvm_vcpu *vcpu);
+#endif
+
+#ifdef __KVM_NVHE_HYPERVISOR__
+void __sysreg_save_state_nvhe(struct kvm_cpu_context *ctxt);
+void __sysreg_restore_state_nvhe(struct kvm_cpu_context *ctxt);
+#else
+void __vcpu_load_switch_sysregs(struct kvm_vcpu *vcpu);
+void __vcpu_put_switch_sysregs(struct kvm_vcpu *vcpu);
+void sysreg_save_host_state_vhe(struct kvm_cpu_context *ctxt);
+void sysreg_restore_host_state_vhe(struct kvm_cpu_context *ctxt);
+void sysreg_save_guest_state_vhe(struct kvm_cpu_context *ctxt);
+void sysreg_restore_guest_state_vhe(struct kvm_cpu_context *ctxt);
+#endif
+
+void __debug_switch_to_guest(struct kvm_vcpu *vcpu);
+void __debug_switch_to_host(struct kvm_vcpu *vcpu);
+
+#ifdef __KVM_NVHE_HYPERVISOR__
+void __debug_save_host_buffers_nvhe(struct kvm_vcpu *vcpu);
+void __debug_restore_host_buffers_nvhe(struct kvm_vcpu *vcpu);
+#endif
+
+void __fpsimd_save_state(struct user_fpsimd_state *fp_regs);
+void __fpsimd_restore_state(struct user_fpsimd_state *fp_regs);
+void __sve_save_state(void *sve_pffr, u32 *fpsr, int save_ffr);
+void __sve_restore_state(void *sve_pffr, u32 *fpsr, int restore_ffr);
+
+u64 __guest_enter(struct kvm_vcpu *vcpu);
+
+bool kvm_host_psci_handler(struct kvm_cpu_context *host_ctxt, u32 func_id);
+
+#ifdef __KVM_NVHE_HYPERVISOR__
+void __noreturn __hyp_do_panic(struct kvm_cpu_context *host_ctxt, u64 spsr,
+ u64 elr, u64 par);
+#endif
+
+#ifdef __KVM_NVHE_HYPERVISOR__
+void __pkvm_init_switch_pgd(phys_addr_t pgd, unsigned long sp,
+ void (*fn)(void));
+int __pkvm_init(phys_addr_t phys, unsigned long size, unsigned long nr_cpus,
+ unsigned long *per_cpu_base, u32 hyp_va_bits);
+void __noreturn __host_enter(struct kvm_cpu_context *host_ctxt);
+#endif
+
+extern u64 kvm_nvhe_sym(id_aa64pfr0_el1_sys_val);
+extern u64 kvm_nvhe_sym(id_aa64pfr1_el1_sys_val);
+extern u64 kvm_nvhe_sym(id_aa64isar0_el1_sys_val);
+extern u64 kvm_nvhe_sym(id_aa64isar1_el1_sys_val);
+extern u64 kvm_nvhe_sym(id_aa64isar2_el1_sys_val);
+extern u64 kvm_nvhe_sym(id_aa64mmfr0_el1_sys_val);
+extern u64 kvm_nvhe_sym(id_aa64mmfr1_el1_sys_val);
+extern u64 kvm_nvhe_sym(id_aa64mmfr2_el1_sys_val);
+extern u64 kvm_nvhe_sym(id_aa64smfr0_el1_sys_val);
+
+extern unsigned long kvm_nvhe_sym(__icache_flags);
+extern unsigned int kvm_nvhe_sym(kvm_arm_vmid_bits);
+extern unsigned int kvm_nvhe_sym(kvm_host_sve_max_vl);
+
+#endif /* __ARM64_KVM_HYP_H__ */
diff --git a/arch/arm64/include/asm/kvm_mmio.h b/arch/arm64/include/asm/kvm_mmio.h
deleted file mode 100644
index fc2f689c0694..000000000000
--- a/arch/arm64/include/asm/kvm_mmio.h
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * Copyright (C) 2012 - Virtual Open Systems and Columbia University
- * Author: Christoffer Dall <c.dall@virtualopensystems.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef __ARM64_KVM_MMIO_H__
-#define __ARM64_KVM_MMIO_H__
-
-#include <linux/kvm_host.h>
-#include <asm/kvm_asm.h>
-#include <asm/kvm_arm.h>
-
-/*
- * This is annoying. The mmio code requires this, even if we don't
- * need any decoding. To be fixed.
- */
-struct kvm_decode {
- unsigned long rt;
- bool sign_extend;
-};
-
-/*
- * The in-kernel MMIO emulation code wants to use a copy of run->mmio,
- * which is an anonymous type. Use our own type instead.
- */
-struct kvm_exit_mmio {
- phys_addr_t phys_addr;
- u8 data[8];
- u32 len;
- bool is_write;
-};
-
-static inline void kvm_prepare_mmio(struct kvm_run *run,
- struct kvm_exit_mmio *mmio)
-{
- run->mmio.phys_addr = mmio->phys_addr;
- run->mmio.len = mmio->len;
- run->mmio.is_write = mmio->is_write;
- memcpy(run->mmio.data, mmio->data, mmio->len);
- run->exit_reason = KVM_EXIT_MMIO;
-}
-
-int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run);
-int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
- phys_addr_t fault_ipa);
-
-#endif /* __ARM64_KVM_MMIO_H__ */
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index efe609c6a3c9..2dc5e6e742bb 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -1,18 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (C) 2012,2013 - ARM Ltd
* Author: Marc Zyngier <marc.zyngier@arm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __ARM64_KVM_MMU_H__
@@ -20,116 +9,392 @@
#include <asm/page.h>
#include <asm/memory.h>
+#include <asm/mmu.h>
+#include <asm/cpufeature.h>
/*
- * As we only have the TTBR0_EL2 register, we cannot express
+ * As ARMv8.0 only has the TTBR0_EL2 register, we cannot express
* "negative" addresses. This makes it impossible to directly share
* mappings with the kernel.
*
* Instead, give the HYP mode its own VA region at a fixed offset from
* the kernel by just masking the top bits (which are all ones for a
- * kernel address).
+ * kernel address). We need to find out how many bits to mask.
+ *
+ * We want to build a set of page tables that cover both parts of the
+ * idmap (the trampoline page used to initialize EL2), and our normal
+ * runtime VA space, at the same time.
+ *
+ * Given that the kernel uses VA_BITS for its entire address space,
+ * and that half of that space (VA_BITS - 1) is used for the linear
+ * mapping, we can also limit the EL2 space to (VA_BITS - 1).
+ *
+ * The main question is "Within the VA_BITS space, does EL2 use the
+ * top or the bottom half of that space to shadow the kernel's linear
+ * mapping?". As we need to idmap the trampoline page, this is
+ * determined by the range in which this page lives.
+ *
+ * If the page is in the bottom half, we have to use the top half. If
+ * the page is in the top half, we have to use the bottom half:
+ *
+ * T = __pa_symbol(__hyp_idmap_text_start)
+ * if (T & BIT(VA_BITS - 1))
+ * HYP_VA_MIN = 0 //idmap in upper half
+ * else
+ * HYP_VA_MIN = 1 << (VA_BITS - 1)
+ * HYP_VA_MAX = HYP_VA_MIN + (1 << (VA_BITS - 1)) - 1
+ *
+ * When using VHE, there are no separate hyp mappings and all KVM
+ * functionality is already mapped as part of the main kernel
+ * mappings, and none of this applies in that case.
*/
-#define HYP_PAGE_OFFSET_SHIFT VA_BITS
-#define HYP_PAGE_OFFSET_MASK ((UL(1) << HYP_PAGE_OFFSET_SHIFT) - 1)
-#define HYP_PAGE_OFFSET (PAGE_OFFSET & HYP_PAGE_OFFSET_MASK)
+
+#ifdef __ASSEMBLER__
+
+#include <asm/alternative.h>
/*
- * Our virtual mapping for the idmap-ed MMU-enable code. Must be
- * shared across all the page-tables. Conveniently, we use the last
- * possible page, where no kernel mapping will ever exist.
+ * Convert a hypervisor VA to a PA
+ * reg: hypervisor address to be converted in place
+ * tmp: temporary register
*/
-#define TRAMPOLINE_VA (HYP_PAGE_OFFSET_MASK & PAGE_MASK)
-
-#ifdef __ASSEMBLY__
+.macro hyp_pa reg, tmp
+ ldr_l \tmp, hyp_physvirt_offset
+ add \reg, \reg, \tmp
+.endm
/*
- * Convert a kernel VA into a HYP VA.
- * reg: VA to be converted.
+ * Convert a hypervisor VA to a kernel image address
+ * reg: hypervisor address to be converted in place
+ * tmp: temporary register
+ *
+ * The actual code generation takes place in kvm_get_kimage_voffset, and
+ * the instructions below are only there to reserve the space and
+ * perform the register allocation (kvm_get_kimage_voffset uses the
+ * specific registers encoded in the instructions).
*/
-.macro kern_hyp_va reg
- and \reg, \reg, #HYP_PAGE_OFFSET_MASK
+.macro hyp_kimg_va reg, tmp
+ /* Convert hyp VA -> PA. */
+ hyp_pa \reg, \tmp
+
+ /* Load kimage_voffset. */
+alternative_cb ARM64_ALWAYS_SYSTEM, kvm_get_kimage_voffset
+ movz \tmp, #0
+ movk \tmp, #0, lsl #16
+ movk \tmp, #0, lsl #32
+ movk \tmp, #0, lsl #48
+alternative_cb_end
+
+ /* Convert PA -> kimg VA. */
+ add \reg, \reg, \tmp
.endm
#else
-#include <asm/cachetype.h>
+#include <linux/pgtable.h>
+#include <asm/pgalloc.h>
+#include <asm/cache.h>
#include <asm/cacheflush.h>
+#include <asm/mmu_context.h>
+#include <asm/kvm_emulate.h>
+#include <asm/kvm_host.h>
+#include <asm/kvm_nested.h>
+
+void kvm_update_va_mask(struct alt_instr *alt,
+ __le32 *origptr, __le32 *updptr, int nr_inst);
+void kvm_compute_layout(void);
+void kvm_apply_hyp_relocations(void);
-#define KERN_TO_HYP(kva) ((unsigned long)kva - PAGE_OFFSET + HYP_PAGE_OFFSET)
+#define __hyp_pa(x) (((phys_addr_t)(x)) + hyp_physvirt_offset)
/*
- * Align KVM with the kernel's view of physical memory. Should be
- * 40bit IPA, with PGD being 8kB aligned in the 4KB page configuration.
+ * Convert a kernel VA into a HYP VA.
+ *
+ * Can be called from hyp or non-hyp context.
+ *
+ * The actual code generation takes place in kvm_update_va_mask(), and
+ * the instructions below are only there to reserve the space and
+ * perform the register allocation (kvm_update_va_mask() uses the
+ * specific registers encoded in the instructions).
+ */
+static __always_inline unsigned long __kern_hyp_va(unsigned long v)
+{
+/*
+ * This #ifndef is an optimisation for when this is called from VHE hyp
+ * context. When called from a VHE non-hyp context, kvm_update_va_mask() will
+ * replace the instructions with `nop`s.
*/
-#define KVM_PHYS_SHIFT PHYS_MASK_SHIFT
-#define KVM_PHYS_SIZE (1UL << KVM_PHYS_SHIFT)
-#define KVM_PHYS_MASK (KVM_PHYS_SIZE - 1UL)
+#ifndef __KVM_VHE_HYPERVISOR__
+ asm volatile(ALTERNATIVE_CB("and %0, %0, #1\n" /* mask with va_mask */
+ "ror %0, %0, #1\n" /* rotate to the first tag bit */
+ "add %0, %0, #0\n" /* insert the low 12 bits of the tag */
+ "add %0, %0, #0, lsl 12\n" /* insert the top 12 bits of the tag */
+ "ror %0, %0, #63\n", /* rotate back */
+ ARM64_ALWAYS_SYSTEM,
+ kvm_update_va_mask)
+ : "+r" (v));
+#endif
+ return v;
+}
-/* Make sure we get the right size, and thus the right alignment */
-#define PTRS_PER_S2_PGD (1 << (KVM_PHYS_SHIFT - PGDIR_SHIFT))
-#define S2_PGD_ORDER get_order(PTRS_PER_S2_PGD * sizeof(pgd_t))
+#define kern_hyp_va(v) ((typeof(v))(__kern_hyp_va((unsigned long)(v))))
-int create_hyp_mappings(void *from, void *to);
-int create_hyp_io_mappings(void *from, void *to, phys_addr_t);
-void free_boot_hyp_pgd(void);
-void free_hyp_pgds(void);
+extern u32 __hyp_va_bits;
-int kvm_alloc_stage2_pgd(struct kvm *kvm);
-void kvm_free_stage2_pgd(struct kvm *kvm);
-int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
- phys_addr_t pa, unsigned long size);
+/*
+ * We currently support using a VM-specified IPA size. For backward
+ * compatibility, the default IPA size is fixed to 40bits.
+ */
+#define KVM_PHYS_SHIFT (40)
+
+#define kvm_phys_shift(mmu) VTCR_EL2_IPA((mmu)->vtcr)
+#define kvm_phys_size(mmu) (_AC(1, ULL) << kvm_phys_shift(mmu))
+#define kvm_phys_mask(mmu) (kvm_phys_size(mmu) - _AC(1, ULL))
+
+#include <asm/kvm_pgtable.h>
+#include <asm/stage2_pgtable.h>
+
+int kvm_share_hyp(void *from, void *to);
+void kvm_unshare_hyp(void *from, void *to);
+int create_hyp_mappings(void *from, void *to, enum kvm_pgtable_prot prot);
+int __create_hyp_mappings(unsigned long start, unsigned long size,
+ unsigned long phys, enum kvm_pgtable_prot prot);
+int hyp_alloc_private_va_range(size_t size, unsigned long *haddr);
+int create_hyp_io_mappings(phys_addr_t phys_addr, size_t size,
+ void __iomem **kaddr,
+ void __iomem **haddr);
+int create_hyp_exec_mappings(phys_addr_t phys_addr, size_t size,
+ void **haddr);
+int create_hyp_stack(phys_addr_t phys_addr, unsigned long *haddr);
+void __init free_hyp_pgds(void);
-int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run);
+void kvm_stage2_unmap_range(struct kvm_s2_mmu *mmu, phys_addr_t start,
+ u64 size, bool may_block);
+void kvm_stage2_flush_range(struct kvm_s2_mmu *mmu, phys_addr_t addr, phys_addr_t end);
+void kvm_stage2_wp_range(struct kvm_s2_mmu *mmu, phys_addr_t addr, phys_addr_t end);
-void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu);
+void stage2_unmap_vm(struct kvm *kvm);
+int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu, unsigned long type);
+void kvm_uninit_stage2_mmu(struct kvm *kvm);
+void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu);
+int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
+ phys_addr_t pa, unsigned long size, bool writable);
+
+int kvm_handle_guest_sea(struct kvm_vcpu *vcpu);
+int kvm_handle_guest_abort(struct kvm_vcpu *vcpu);
phys_addr_t kvm_mmu_get_httbr(void);
-phys_addr_t kvm_mmu_get_boot_httbr(void);
phys_addr_t kvm_get_idmap_vector(void);
-int kvm_mmu_init(void);
-void kvm_clear_hyp_idmap(void);
+int __init kvm_mmu_init(u32 *hyp_va_bits);
+
+static inline void *__kvm_vector_slot2addr(void *base,
+ enum arm64_hyp_spectre_vector slot)
+{
+ int idx = slot - (slot != HYP_VECTOR_DIRECT);
+
+ return base + (idx * SZ_2K);
+}
-#define kvm_set_pte(ptep, pte) set_pte(ptep, pte)
+struct kvm;
+
+#define kvm_flush_dcache_to_poc(a,l) \
+ dcache_clean_inval_poc((unsigned long)(a), (unsigned long)(a)+(l))
-static inline bool kvm_is_write_fault(unsigned long esr)
+static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu)
{
- unsigned long esr_ec = esr >> ESR_EL2_EC_SHIFT;
+ u64 cache_bits = SCTLR_ELx_M | SCTLR_ELx_C;
+ int reg;
+
+ if (vcpu_is_el2(vcpu))
+ reg = SCTLR_EL2;
+ else
+ reg = SCTLR_EL1;
- if (esr_ec == ESR_EL2_EC_IABT)
- return false;
+ return (vcpu_read_sys_reg(vcpu, reg) & cache_bits) == cache_bits;
+}
- if ((esr & ESR_EL2_ISV) && !(esr & ESR_EL2_WNR))
- return false;
+static inline void __clean_dcache_guest_page(void *va, size_t size)
+{
+ /*
+ * With FWB, we ensure that the guest always accesses memory using
+ * cacheable attributes, and we don't have to clean to PoC when
+ * faulting in pages. Furthermore, FWB implies IDC, so cleaning to
+ * PoU is not required either in this case.
+ */
+ if (cpus_have_final_cap(ARM64_HAS_STAGE2_FWB))
+ return;
- return true;
+ kvm_flush_dcache_to_poc(va, size);
}
-static inline void kvm_clean_dcache_area(void *addr, size_t size) {}
-static inline void kvm_clean_pgd(pgd_t *pgd) {}
-static inline void kvm_clean_pmd_entry(pmd_t *pmd) {}
-static inline void kvm_clean_pte(pte_t *pte) {}
-static inline void kvm_clean_pte_entry(pte_t *pte) {}
+static inline size_t __invalidate_icache_max_range(void)
+{
+ u8 iminline;
+ u64 ctr;
+
+ asm volatile(ALTERNATIVE_CB("movz %0, #0\n"
+ "movk %0, #0, lsl #16\n"
+ "movk %0, #0, lsl #32\n"
+ "movk %0, #0, lsl #48\n",
+ ARM64_ALWAYS_SYSTEM,
+ kvm_compute_final_ctr_el0)
+ : "=r" (ctr));
+
+ iminline = SYS_FIELD_GET(CTR_EL0, IminLine, ctr) + 2;
+ return MAX_DVM_OPS << iminline;
+}
-static inline void kvm_set_s2pte_writable(pte_t *pte)
+static inline void __invalidate_icache_guest_page(void *va, size_t size)
{
- pte_val(*pte) |= PTE_S2_RDWR;
+ /*
+ * Blow the whole I-cache if it is aliasing (i.e. VIPT) or the
+ * invalidation range exceeds our arbitrary limit on invadations by
+ * cache line.
+ */
+ if (icache_is_aliasing() || size > __invalidate_icache_max_range())
+ icache_inval_all_pou();
+ else
+ icache_inval_pou((unsigned long)va, (unsigned long)va + size);
}
-struct kvm;
+void kvm_set_way_flush(struct kvm_vcpu *vcpu);
+void kvm_toggle_cache(struct kvm_vcpu *vcpu, bool was_enabled);
+
+static inline unsigned int kvm_get_vmid_bits(void)
+{
+ int reg = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1);
-static inline void coherent_icache_guest_page(struct kvm *kvm, gfn_t gfn)
+ return get_vmid_bits(reg);
+}
+
+/*
+ * We are not in the kvm->srcu critical section most of the time, so we take
+ * the SRCU read lock here. Since we copy the data from the user page, we
+ * can immediately drop the lock again.
+ */
+static inline int kvm_read_guest_lock(struct kvm *kvm,
+ gpa_t gpa, void *data, unsigned long len)
{
- if (!icache_is_aliasing()) { /* PIPT */
- unsigned long hva = gfn_to_hva(kvm, gfn);
- flush_icache_range(hva, hva + PAGE_SIZE);
- } else if (!icache_is_aivivt()) { /* non ASID-tagged VIVT */
- /* any kind of VIPT cache */
- __flush_icache_all();
- }
+ int srcu_idx = srcu_read_lock(&kvm->srcu);
+ int ret = kvm_read_guest(kvm, gpa, data, len);
+
+ srcu_read_unlock(&kvm->srcu, srcu_idx);
+
+ return ret;
}
-#define kvm_flush_dcache_to_poc(a,l) __flush_dcache_area((a), (l))
+static inline int kvm_write_guest_lock(struct kvm *kvm, gpa_t gpa,
+ const void *data, unsigned long len)
+{
+ int srcu_idx = srcu_read_lock(&kvm->srcu);
+ int ret = kvm_write_guest(kvm, gpa, data, len);
+
+ srcu_read_unlock(&kvm->srcu, srcu_idx);
+
+ return ret;
+}
+
+#define kvm_phys_to_vttbr(addr) phys_to_ttbr(addr)
+
+/*
+ * When this is (directly or indirectly) used on the TLB invalidation
+ * path, we rely on a previously issued DSB so that page table updates
+ * and VMID reads are correctly ordered.
+ */
+static __always_inline u64 kvm_get_vttbr(struct kvm_s2_mmu *mmu)
+{
+ struct kvm_vmid *vmid = &mmu->vmid;
+ u64 vmid_field, baddr;
+ u64 cnp = system_supports_cnp() ? VTTBR_CNP_BIT : 0;
+
+ baddr = mmu->pgd_phys;
+ vmid_field = atomic64_read(&vmid->id) << VTTBR_VMID_SHIFT;
+ vmid_field &= VTTBR_VMID_MASK(kvm_arm_vmid_bits);
+ return kvm_phys_to_vttbr(baddr) | vmid_field | cnp;
+}
+
+/*
+ * Must be called from hyp code running at EL2 with an updated VTTBR
+ * and interrupts disabled.
+ */
+static __always_inline void __load_stage2(struct kvm_s2_mmu *mmu,
+ struct kvm_arch *arch)
+{
+ write_sysreg(mmu->vtcr, vtcr_el2);
+ write_sysreg(kvm_get_vttbr(mmu), vttbr_el2);
+
+ /*
+ * ARM errata 1165522 and 1530923 require the actual execution of the
+ * above before we can switch to the EL1/EL0 translation regime used by
+ * the guest.
+ */
+ asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_SPECULATIVE_AT));
+}
+
+static inline struct kvm *kvm_s2_mmu_to_kvm(struct kvm_s2_mmu *mmu)
+{
+ return container_of(mmu->arch, struct kvm, arch);
+}
+
+static inline u64 get_vmid(u64 vttbr)
+{
+ return (vttbr & VTTBR_VMID_MASK(kvm_get_vmid_bits())) >>
+ VTTBR_VMID_SHIFT;
+}
+
+static inline bool kvm_s2_mmu_valid(struct kvm_s2_mmu *mmu)
+{
+ return !(mmu->tlb_vttbr & VTTBR_CNP_BIT);
+}
+
+static inline bool kvm_is_nested_s2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu)
+{
+ /*
+ * Be careful, mmu may not be fully initialised so do look at
+ * *any* of its fields.
+ */
+ return &kvm->arch.mmu != mmu;
+}
+
+static inline void kvm_fault_lock(struct kvm *kvm)
+{
+ if (is_protected_kvm_enabled())
+ write_lock(&kvm->mmu_lock);
+ else
+ read_lock(&kvm->mmu_lock);
+}
+
+static inline void kvm_fault_unlock(struct kvm *kvm)
+{
+ if (is_protected_kvm_enabled())
+ write_unlock(&kvm->mmu_lock);
+ else
+ read_unlock(&kvm->mmu_lock);
+}
+
+/*
+ * ARM64 KVM relies on a simple conversion from physaddr to a kernel
+ * virtual address (KVA) when it does cache maintenance as the CMO
+ * instructions work on virtual addresses. This is incompatible with
+ * VM_PFNMAP VMAs which may not have a kernel direct mapping to a
+ * virtual address.
+ *
+ * With S2FWB and CACHE DIC features, KVM need not do cache flushing
+ * and CMOs are NOP'd. This has the effect of no longer requiring a
+ * KVA for addresses mapped into the S2. The presence of these features
+ * are thus necessary to support cacheable S2 mapping of VM_PFNMAP.
+ */
+static inline bool kvm_supports_cacheable_pfnmap(void)
+{
+ return cpus_have_final_cap(ARM64_HAS_STAGE2_FWB) &&
+ cpus_have_final_cap(ARM64_HAS_CACHE_DIC);
+}
+
+#ifdef CONFIG_PTDUMP_STAGE2_DEBUGFS
+void kvm_s2_ptdump_create_debugfs(struct kvm *kvm);
+#else
+static inline void kvm_s2_ptdump_create_debugfs(struct kvm *kvm) {}
+#endif /* CONFIG_PTDUMP_STAGE2_DEBUGFS */
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* __ARM64_KVM_MMU_H__ */
diff --git a/arch/arm64/include/asm/kvm_mte.h b/arch/arm64/include/asm/kvm_mte.h
new file mode 100644
index 000000000000..3171963ad25c
--- /dev/null
+++ b/arch/arm64/include/asm/kvm_mte.h
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2020-2021 ARM Ltd.
+ */
+#ifndef __ASM_KVM_MTE_H
+#define __ASM_KVM_MTE_H
+
+#ifdef __ASSEMBLER__
+
+#include <asm/sysreg.h>
+
+#ifdef CONFIG_ARM64_MTE
+
+.macro mte_switch_to_guest g_ctxt, h_ctxt, reg1
+alternative_if_not ARM64_MTE
+ b .L__skip_switch\@
+alternative_else_nop_endif
+ mrs \reg1, hcr_el2
+ tbz \reg1, #(HCR_ATA_SHIFT), .L__skip_switch\@
+
+ mrs_s \reg1, SYS_RGSR_EL1
+ str \reg1, [\h_ctxt, #CPU_RGSR_EL1]
+ mrs_s \reg1, SYS_GCR_EL1
+ str \reg1, [\h_ctxt, #CPU_GCR_EL1]
+
+ ldr \reg1, [\g_ctxt, #CPU_RGSR_EL1]
+ msr_s SYS_RGSR_EL1, \reg1
+ ldr \reg1, [\g_ctxt, #CPU_GCR_EL1]
+ msr_s SYS_GCR_EL1, \reg1
+
+.L__skip_switch\@:
+.endm
+
+.macro mte_switch_to_hyp g_ctxt, h_ctxt, reg1
+alternative_if_not ARM64_MTE
+ b .L__skip_switch\@
+alternative_else_nop_endif
+ mrs \reg1, hcr_el2
+ tbz \reg1, #(HCR_ATA_SHIFT), .L__skip_switch\@
+
+ mrs_s \reg1, SYS_RGSR_EL1
+ str \reg1, [\g_ctxt, #CPU_RGSR_EL1]
+ mrs_s \reg1, SYS_GCR_EL1
+ str \reg1, [\g_ctxt, #CPU_GCR_EL1]
+
+ ldr \reg1, [\h_ctxt, #CPU_RGSR_EL1]
+ msr_s SYS_RGSR_EL1, \reg1
+ ldr \reg1, [\h_ctxt, #CPU_GCR_EL1]
+ msr_s SYS_GCR_EL1, \reg1
+
+ isb
+
+.L__skip_switch\@:
+.endm
+
+#else /* !CONFIG_ARM64_MTE */
+
+.macro mte_switch_to_guest g_ctxt, h_ctxt, reg1
+.endm
+
+.macro mte_switch_to_hyp g_ctxt, h_ctxt, reg1
+.endm
+
+#endif /* CONFIG_ARM64_MTE */
+#endif /* __ASSEMBLER__ */
+#endif /* __ASM_KVM_MTE_H */
diff --git a/arch/arm64/include/asm/kvm_nested.h b/arch/arm64/include/asm/kvm_nested.h
new file mode 100644
index 000000000000..905c658057a4
--- /dev/null
+++ b/arch/arm64/include/asm/kvm_nested.h
@@ -0,0 +1,409 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ARM64_KVM_NESTED_H
+#define __ARM64_KVM_NESTED_H
+
+#include <linux/bitfield.h>
+#include <linux/kvm_host.h>
+#include <asm/kvm_emulate.h>
+#include <asm/kvm_pgtable.h>
+
+static inline bool vcpu_has_nv(const struct kvm_vcpu *vcpu)
+{
+ return (!__is_defined(__KVM_NVHE_HYPERVISOR__) &&
+ cpus_have_final_cap(ARM64_HAS_NESTED_VIRT) &&
+ vcpu_has_feature(vcpu, KVM_ARM_VCPU_HAS_EL2));
+}
+
+/* Translation helpers from non-VHE EL2 to EL1 */
+static inline u64 tcr_el2_ps_to_tcr_el1_ips(u64 tcr_el2)
+{
+ return (u64)FIELD_GET(TCR_EL2_PS_MASK, tcr_el2) << TCR_IPS_SHIFT;
+}
+
+static inline u64 translate_tcr_el2_to_tcr_el1(u64 tcr)
+{
+ return TCR_EPD1_MASK | /* disable TTBR1_EL1 */
+ ((tcr & TCR_EL2_TBI) ? TCR_TBI0 : 0) |
+ tcr_el2_ps_to_tcr_el1_ips(tcr) |
+ (tcr & TCR_EL2_TG0_MASK) |
+ (tcr & TCR_EL2_ORGN0_MASK) |
+ (tcr & TCR_EL2_IRGN0_MASK) |
+ (tcr & TCR_EL2_T0SZ_MASK);
+}
+
+static inline u64 translate_cptr_el2_to_cpacr_el1(u64 cptr_el2)
+{
+ u64 cpacr_el1 = CPACR_EL1_RES1;
+
+ if (cptr_el2 & CPTR_EL2_TTA)
+ cpacr_el1 |= CPACR_EL1_TTA;
+ if (!(cptr_el2 & CPTR_EL2_TFP))
+ cpacr_el1 |= CPACR_EL1_FPEN;
+ if (!(cptr_el2 & CPTR_EL2_TZ))
+ cpacr_el1 |= CPACR_EL1_ZEN;
+
+ cpacr_el1 |= cptr_el2 & (CPTR_EL2_TCPAC | CPTR_EL2_TAM);
+
+ return cpacr_el1;
+}
+
+static inline u64 translate_sctlr_el2_to_sctlr_el1(u64 val)
+{
+ /* Only preserve the minimal set of bits we support */
+ val &= (SCTLR_ELx_M | SCTLR_ELx_A | SCTLR_ELx_C | SCTLR_ELx_SA |
+ SCTLR_ELx_I | SCTLR_ELx_IESB | SCTLR_ELx_WXN | SCTLR_ELx_EE);
+ val |= SCTLR_EL1_RES1;
+
+ return val;
+}
+
+static inline u64 translate_ttbr0_el2_to_ttbr0_el1(u64 ttbr0)
+{
+ /* Clear the ASID field */
+ return ttbr0 & ~GENMASK_ULL(63, 48);
+}
+
+extern bool forward_smc_trap(struct kvm_vcpu *vcpu);
+extern bool forward_debug_exception(struct kvm_vcpu *vcpu);
+extern void kvm_init_nested(struct kvm *kvm);
+extern int kvm_vcpu_init_nested(struct kvm_vcpu *vcpu);
+extern void kvm_init_nested_s2_mmu(struct kvm_s2_mmu *mmu);
+extern struct kvm_s2_mmu *lookup_s2_mmu(struct kvm_vcpu *vcpu);
+
+union tlbi_info;
+
+extern void kvm_s2_mmu_iterate_by_vmid(struct kvm *kvm, u16 vmid,
+ const union tlbi_info *info,
+ void (*)(struct kvm_s2_mmu *,
+ const union tlbi_info *));
+extern void kvm_vcpu_load_hw_mmu(struct kvm_vcpu *vcpu);
+extern void kvm_vcpu_put_hw_mmu(struct kvm_vcpu *vcpu);
+
+extern void check_nested_vcpu_requests(struct kvm_vcpu *vcpu);
+extern void kvm_nested_flush_hwstate(struct kvm_vcpu *vcpu);
+extern void kvm_nested_sync_hwstate(struct kvm_vcpu *vcpu);
+
+extern void kvm_nested_setup_mdcr_el2(struct kvm_vcpu *vcpu);
+
+struct kvm_s2_trans {
+ phys_addr_t output;
+ unsigned long block_size;
+ bool writable;
+ bool readable;
+ int level;
+ u32 esr;
+ u64 desc;
+};
+
+static inline phys_addr_t kvm_s2_trans_output(struct kvm_s2_trans *trans)
+{
+ return trans->output;
+}
+
+static inline unsigned long kvm_s2_trans_size(struct kvm_s2_trans *trans)
+{
+ return trans->block_size;
+}
+
+static inline u32 kvm_s2_trans_esr(struct kvm_s2_trans *trans)
+{
+ return trans->esr;
+}
+
+static inline bool kvm_s2_trans_readable(struct kvm_s2_trans *trans)
+{
+ return trans->readable;
+}
+
+static inline bool kvm_s2_trans_writable(struct kvm_s2_trans *trans)
+{
+ return trans->writable;
+}
+
+static inline bool kvm_has_xnx(struct kvm *kvm)
+{
+ return cpus_have_final_cap(ARM64_HAS_XNX) &&
+ kvm_has_feat(kvm, ID_AA64MMFR1_EL1, XNX, IMP);
+}
+
+static inline bool kvm_s2_trans_exec_el0(struct kvm *kvm, struct kvm_s2_trans *trans)
+{
+ u8 xn = FIELD_GET(KVM_PTE_LEAF_ATTR_HI_S2_XN, trans->desc);
+
+ if (!kvm_has_xnx(kvm))
+ xn &= FIELD_PREP(KVM_PTE_LEAF_ATTR_HI_S2_XN, 0b10);
+
+ switch (xn) {
+ case 0b00:
+ case 0b01:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static inline bool kvm_s2_trans_exec_el1(struct kvm *kvm, struct kvm_s2_trans *trans)
+{
+ u8 xn = FIELD_GET(KVM_PTE_LEAF_ATTR_HI_S2_XN, trans->desc);
+
+ if (!kvm_has_xnx(kvm))
+ xn &= FIELD_PREP(KVM_PTE_LEAF_ATTR_HI_S2_XN, 0b10);
+
+ switch (xn) {
+ case 0b00:
+ case 0b11:
+ return true;
+ default:
+ return false;
+ }
+}
+
+extern int kvm_walk_nested_s2(struct kvm_vcpu *vcpu, phys_addr_t gipa,
+ struct kvm_s2_trans *result);
+extern int kvm_s2_handle_perm_fault(struct kvm_vcpu *vcpu,
+ struct kvm_s2_trans *trans);
+extern int kvm_inject_s2_fault(struct kvm_vcpu *vcpu, u64 esr_el2);
+extern void kvm_nested_s2_wp(struct kvm *kvm);
+extern void kvm_nested_s2_unmap(struct kvm *kvm, bool may_block);
+extern void kvm_nested_s2_flush(struct kvm *kvm);
+
+unsigned long compute_tlb_inval_range(struct kvm_s2_mmu *mmu, u64 val);
+
+static inline bool kvm_supported_tlbi_s1e1_op(struct kvm_vcpu *vpcu, u32 instr)
+{
+ struct kvm *kvm = vpcu->kvm;
+ u8 CRm = sys_reg_CRm(instr);
+
+ if (!(sys_reg_Op0(instr) == TLBI_Op0 &&
+ sys_reg_Op1(instr) == TLBI_Op1_EL1))
+ return false;
+
+ if (!(sys_reg_CRn(instr) == TLBI_CRn_XS ||
+ (sys_reg_CRn(instr) == TLBI_CRn_nXS &&
+ kvm_has_feat(kvm, ID_AA64ISAR1_EL1, XS, IMP))))
+ return false;
+
+ if (CRm == TLBI_CRm_nROS &&
+ !kvm_has_feat(kvm, ID_AA64ISAR0_EL1, TLB, OS))
+ return false;
+
+ if ((CRm == TLBI_CRm_RIS || CRm == TLBI_CRm_ROS ||
+ CRm == TLBI_CRm_RNS) &&
+ !kvm_has_feat(kvm, ID_AA64ISAR0_EL1, TLB, RANGE))
+ return false;
+
+ return true;
+}
+
+static inline bool kvm_supported_tlbi_s1e2_op(struct kvm_vcpu *vpcu, u32 instr)
+{
+ struct kvm *kvm = vpcu->kvm;
+ u8 CRm = sys_reg_CRm(instr);
+
+ if (!(sys_reg_Op0(instr) == TLBI_Op0 &&
+ sys_reg_Op1(instr) == TLBI_Op1_EL2))
+ return false;
+
+ if (!(sys_reg_CRn(instr) == TLBI_CRn_XS ||
+ (sys_reg_CRn(instr) == TLBI_CRn_nXS &&
+ kvm_has_feat(kvm, ID_AA64ISAR1_EL1, XS, IMP))))
+ return false;
+
+ if (CRm == TLBI_CRm_IPAIS || CRm == TLBI_CRm_IPAONS)
+ return false;
+
+ if (CRm == TLBI_CRm_nROS &&
+ !kvm_has_feat(kvm, ID_AA64ISAR0_EL1, TLB, OS))
+ return false;
+
+ if ((CRm == TLBI_CRm_RIS || CRm == TLBI_CRm_ROS ||
+ CRm == TLBI_CRm_RNS) &&
+ !kvm_has_feat(kvm, ID_AA64ISAR0_EL1, TLB, RANGE))
+ return false;
+
+ return true;
+}
+
+int kvm_init_nv_sysregs(struct kvm_vcpu *vcpu);
+u64 limit_nv_id_reg(struct kvm *kvm, u32 reg, u64 val);
+
+#ifdef CONFIG_ARM64_PTR_AUTH
+bool kvm_auth_eretax(struct kvm_vcpu *vcpu, u64 *elr);
+#else
+static inline bool kvm_auth_eretax(struct kvm_vcpu *vcpu, u64 *elr)
+{
+ /* We really should never execute this... */
+ WARN_ON_ONCE(1);
+ *elr = 0xbad9acc0debadbad;
+ return false;
+}
+#endif
+
+#define KVM_NV_GUEST_MAP_SZ (KVM_PGTABLE_PROT_SW1 | KVM_PGTABLE_PROT_SW0)
+
+static inline u64 kvm_encode_nested_level(struct kvm_s2_trans *trans)
+{
+ return FIELD_PREP(KVM_NV_GUEST_MAP_SZ, trans->level);
+}
+
+/* Adjust alignment for the contiguous bit as per StageOA() */
+#define contiguous_bit_shift(d, wi, l) \
+ ({ \
+ u8 shift = 0; \
+ \
+ if ((d) & PTE_CONT) { \
+ switch (BIT((wi)->pgshift)) { \
+ case SZ_4K: \
+ shift = 4; \
+ break; \
+ case SZ_16K: \
+ shift = (l) == 2 ? 5 : 7; \
+ break; \
+ case SZ_64K: \
+ shift = 5; \
+ break; \
+ } \
+ } \
+ \
+ shift; \
+ })
+
+static inline u64 decode_range_tlbi(u64 val, u64 *range, u16 *asid)
+{
+ u64 base, tg, num, scale;
+ int shift;
+
+ tg = FIELD_GET(GENMASK(47, 46), val);
+
+ switch(tg) {
+ case 1:
+ shift = 12;
+ break;
+ case 2:
+ shift = 14;
+ break;
+ case 3:
+ default: /* IMPDEF: handle tg==0 as 64k */
+ shift = 16;
+ break;
+ }
+
+ base = (val & GENMASK(36, 0)) << shift;
+
+ if (asid)
+ *asid = FIELD_GET(TLBIR_ASID_MASK, val);
+
+ scale = FIELD_GET(GENMASK(45, 44), val);
+ num = FIELD_GET(GENMASK(43, 39), val);
+ *range = __TLBI_RANGE_PAGES(num, scale) << shift;
+
+ return base;
+}
+
+static inline unsigned int ps_to_output_size(unsigned int ps, bool pa52bit)
+{
+ switch (ps) {
+ case 0: return 32;
+ case 1: return 36;
+ case 2: return 40;
+ case 3: return 42;
+ case 4: return 44;
+ case 5: return 48;
+ case 6: if (pa52bit)
+ return 52;
+ fallthrough;
+ default:
+ return 48;
+ }
+}
+
+enum trans_regime {
+ TR_EL10,
+ TR_EL20,
+ TR_EL2,
+};
+
+struct s1_walk_info;
+
+struct s1_walk_context {
+ struct s1_walk_info *wi;
+ u64 table_ipa;
+ int level;
+};
+
+struct s1_walk_filter {
+ int (*fn)(struct s1_walk_context *, void *);
+ void *priv;
+};
+
+struct s1_walk_info {
+ struct s1_walk_filter *filter;
+ u64 baddr;
+ enum trans_regime regime;
+ unsigned int max_oa_bits;
+ unsigned int pgshift;
+ unsigned int txsz;
+ int sl;
+ u8 sh;
+ bool as_el0;
+ bool hpd;
+ bool e0poe;
+ bool poe;
+ bool pan;
+ bool be;
+ bool s2;
+ bool pa52bit;
+ bool ha;
+};
+
+struct s1_walk_result {
+ union {
+ struct {
+ u64 desc;
+ u64 pa;
+ s8 level;
+ u8 APTable;
+ bool nG;
+ u16 asid;
+ bool UXNTable;
+ bool PXNTable;
+ bool uwxn;
+ bool uov;
+ bool ur;
+ bool uw;
+ bool ux;
+ bool pwxn;
+ bool pov;
+ bool pr;
+ bool pw;
+ bool px;
+ };
+ struct {
+ u8 fst;
+ bool ptw;
+ bool s2;
+ };
+ };
+ bool failed;
+};
+
+int __kvm_translate_va(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
+ struct s1_walk_result *wr, u64 va);
+int __kvm_find_s1_desc_level(struct kvm_vcpu *vcpu, u64 va, u64 ipa,
+ int *level);
+
+/* VNCR management */
+int kvm_vcpu_allocate_vncr_tlb(struct kvm_vcpu *vcpu);
+int kvm_handle_vncr_abort(struct kvm_vcpu *vcpu);
+void kvm_handle_s1e2_tlbi(struct kvm_vcpu *vcpu, u32 inst, u64 val);
+
+#define vncr_fixmap(c) \
+ ({ \
+ u32 __c = (c); \
+ BUG_ON(__c >= NR_CPUS); \
+ (FIX_VNCR - __c); \
+ })
+
+int __kvm_at_swap_desc(struct kvm *kvm, gpa_t ipa, u64 old, u64 new);
+
+#endif /* __ARM64_KVM_NESTED_H */
diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h
new file mode 100644
index 000000000000..fc02de43c68d
--- /dev/null
+++ b/arch/arm64/include/asm/kvm_pgtable.h
@@ -0,0 +1,881 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2020 Google LLC
+ * Author: Will Deacon <will@kernel.org>
+ */
+
+#ifndef __ARM64_KVM_PGTABLE_H__
+#define __ARM64_KVM_PGTABLE_H__
+
+#include <linux/bits.h>
+#include <linux/kvm_host.h>
+#include <linux/types.h>
+
+#define KVM_PGTABLE_FIRST_LEVEL -1
+#define KVM_PGTABLE_LAST_LEVEL 3
+
+/*
+ * The largest supported block sizes for KVM (no 52-bit PA support):
+ * - 4K (level 1): 1GB
+ * - 16K (level 2): 32MB
+ * - 64K (level 2): 512MB
+ */
+#ifdef CONFIG_ARM64_4K_PAGES
+#define KVM_PGTABLE_MIN_BLOCK_LEVEL 1
+#else
+#define KVM_PGTABLE_MIN_BLOCK_LEVEL 2
+#endif
+
+#define kvm_lpa2_is_enabled() system_supports_lpa2()
+
+static inline u64 kvm_get_parange_max(void)
+{
+ if (kvm_lpa2_is_enabled() ||
+ (IS_ENABLED(CONFIG_ARM64_PA_BITS_52) && PAGE_SHIFT == 16))
+ return ID_AA64MMFR0_EL1_PARANGE_52;
+ else
+ return ID_AA64MMFR0_EL1_PARANGE_48;
+}
+
+static inline u64 kvm_get_parange(u64 mmfr0)
+{
+ u64 parange_max = kvm_get_parange_max();
+ u64 parange = cpuid_feature_extract_unsigned_field(mmfr0,
+ ID_AA64MMFR0_EL1_PARANGE_SHIFT);
+ if (parange > parange_max)
+ parange = parange_max;
+
+ return parange;
+}
+
+typedef u64 kvm_pte_t;
+
+#define KVM_PTE_VALID BIT(0)
+
+#define KVM_PTE_ADDR_MASK GENMASK(47, PAGE_SHIFT)
+#define KVM_PTE_ADDR_51_48 GENMASK(15, 12)
+#define KVM_PTE_ADDR_MASK_LPA2 GENMASK(49, PAGE_SHIFT)
+#define KVM_PTE_ADDR_51_50_LPA2 GENMASK(9, 8)
+
+#define KVM_PHYS_INVALID (-1ULL)
+
+#define KVM_PTE_TYPE BIT(1)
+#define KVM_PTE_TYPE_BLOCK 0
+#define KVM_PTE_TYPE_PAGE 1
+#define KVM_PTE_TYPE_TABLE 1
+
+#define KVM_PTE_LEAF_ATTR_LO GENMASK(11, 2)
+
+#define KVM_PTE_LEAF_ATTR_LO_S1_ATTRIDX GENMASK(4, 2)
+#define KVM_PTE_LEAF_ATTR_LO_S1_AP GENMASK(7, 6)
+#define KVM_PTE_LEAF_ATTR_LO_S1_AP_RO \
+ ({ cpus_have_final_cap(ARM64_KVM_HVHE) ? 2 : 3; })
+#define KVM_PTE_LEAF_ATTR_LO_S1_AP_RW \
+ ({ cpus_have_final_cap(ARM64_KVM_HVHE) ? 0 : 1; })
+#define KVM_PTE_LEAF_ATTR_LO_S1_SH GENMASK(9, 8)
+#define KVM_PTE_LEAF_ATTR_LO_S1_SH_IS 3
+#define KVM_PTE_LEAF_ATTR_LO_S1_AF BIT(10)
+
+#define KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR GENMASK(5, 2)
+#define KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R BIT(6)
+#define KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W BIT(7)
+#define KVM_PTE_LEAF_ATTR_LO_S2_SH GENMASK(9, 8)
+#define KVM_PTE_LEAF_ATTR_LO_S2_SH_IS 3
+#define KVM_PTE_LEAF_ATTR_LO_S2_AF BIT(10)
+
+#define KVM_PTE_LEAF_ATTR_HI GENMASK(63, 50)
+
+#define KVM_PTE_LEAF_ATTR_HI_SW GENMASK(58, 55)
+
+#define KVM_PTE_LEAF_ATTR_HI_S1_XN BIT(54)
+
+#define KVM_PTE_LEAF_ATTR_HI_S2_XN GENMASK(54, 53)
+
+#define KVM_PTE_LEAF_ATTR_HI_S1_GP BIT(50)
+
+#define KVM_PTE_LEAF_ATTR_S2_PERMS (KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R | \
+ KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W | \
+ KVM_PTE_LEAF_ATTR_HI_S2_XN)
+
+#define KVM_INVALID_PTE_OWNER_MASK GENMASK(9, 2)
+#define KVM_MAX_OWNER_ID 1
+
+/*
+ * Used to indicate a pte for which a 'break-before-make' sequence is in
+ * progress.
+ */
+#define KVM_INVALID_PTE_LOCKED BIT(10)
+
+static inline bool kvm_pte_valid(kvm_pte_t pte)
+{
+ return pte & KVM_PTE_VALID;
+}
+
+static inline u64 kvm_pte_to_phys(kvm_pte_t pte)
+{
+ u64 pa;
+
+ if (kvm_lpa2_is_enabled()) {
+ pa = pte & KVM_PTE_ADDR_MASK_LPA2;
+ pa |= FIELD_GET(KVM_PTE_ADDR_51_50_LPA2, pte) << 50;
+ } else {
+ pa = pte & KVM_PTE_ADDR_MASK;
+ if (PAGE_SHIFT == 16)
+ pa |= FIELD_GET(KVM_PTE_ADDR_51_48, pte) << 48;
+ }
+
+ return pa;
+}
+
+static inline kvm_pte_t kvm_phys_to_pte(u64 pa)
+{
+ kvm_pte_t pte;
+
+ if (kvm_lpa2_is_enabled()) {
+ pte = pa & KVM_PTE_ADDR_MASK_LPA2;
+ pa &= GENMASK(51, 50);
+ pte |= FIELD_PREP(KVM_PTE_ADDR_51_50_LPA2, pa >> 50);
+ } else {
+ pte = pa & KVM_PTE_ADDR_MASK;
+ if (PAGE_SHIFT == 16) {
+ pa &= GENMASK(51, 48);
+ pte |= FIELD_PREP(KVM_PTE_ADDR_51_48, pa >> 48);
+ }
+ }
+
+ return pte;
+}
+
+static inline kvm_pfn_t kvm_pte_to_pfn(kvm_pte_t pte)
+{
+ return __phys_to_pfn(kvm_pte_to_phys(pte));
+}
+
+static inline u64 kvm_granule_shift(s8 level)
+{
+ /* Assumes KVM_PGTABLE_LAST_LEVEL is 3 */
+ return ARM64_HW_PGTABLE_LEVEL_SHIFT(level);
+}
+
+static inline u64 kvm_granule_size(s8 level)
+{
+ return BIT(kvm_granule_shift(level));
+}
+
+static inline bool kvm_level_supports_block_mapping(s8 level)
+{
+ return level >= KVM_PGTABLE_MIN_BLOCK_LEVEL;
+}
+
+static inline u32 kvm_supported_block_sizes(void)
+{
+ s8 level = KVM_PGTABLE_MIN_BLOCK_LEVEL;
+ u32 r = 0;
+
+ for (; level <= KVM_PGTABLE_LAST_LEVEL; level++)
+ r |= BIT(kvm_granule_shift(level));
+
+ return r;
+}
+
+static inline bool kvm_is_block_size_supported(u64 size)
+{
+ bool is_power_of_two = IS_ALIGNED(size, size);
+
+ return is_power_of_two && (size & kvm_supported_block_sizes());
+}
+
+/**
+ * struct kvm_pgtable_mm_ops - Memory management callbacks.
+ * @zalloc_page: Allocate a single zeroed memory page.
+ * The @arg parameter can be used by the walker
+ * to pass a memcache. The initial refcount of
+ * the page is 1.
+ * @zalloc_pages_exact: Allocate an exact number of zeroed memory pages.
+ * The @size parameter is in bytes, and is rounded
+ * up to the next page boundary. The resulting
+ * allocation is physically contiguous.
+ * @free_pages_exact: Free an exact number of memory pages previously
+ * allocated by zalloc_pages_exact.
+ * @free_unlinked_table: Free an unlinked paging structure by unlinking and
+ * dropping references.
+ * @get_page: Increment the refcount on a page.
+ * @put_page: Decrement the refcount on a page. When the
+ * refcount reaches 0 the page is automatically
+ * freed.
+ * @page_count: Return the refcount of a page.
+ * @phys_to_virt: Convert a physical address into a virtual
+ * address mapped in the current context.
+ * @virt_to_phys: Convert a virtual address mapped in the current
+ * context into a physical address.
+ * @dcache_clean_inval_poc: Clean and invalidate the data cache to the PoC
+ * for the specified memory address range.
+ * @icache_inval_pou: Invalidate the instruction cache to the PoU
+ * for the specified memory address range.
+ */
+struct kvm_pgtable_mm_ops {
+ void* (*zalloc_page)(void *arg);
+ void* (*zalloc_pages_exact)(size_t size);
+ void (*free_pages_exact)(void *addr, size_t size);
+ void (*free_unlinked_table)(void *addr, s8 level);
+ void (*get_page)(void *addr);
+ void (*put_page)(void *addr);
+ int (*page_count)(void *addr);
+ void* (*phys_to_virt)(phys_addr_t phys);
+ phys_addr_t (*virt_to_phys)(void *addr);
+ void (*dcache_clean_inval_poc)(void *addr, size_t size);
+ void (*icache_inval_pou)(void *addr, size_t size);
+};
+
+/**
+ * enum kvm_pgtable_stage2_flags - Stage-2 page-table flags.
+ * @KVM_PGTABLE_S2_NOFWB: Don't enforce Normal-WB even if the CPUs have
+ * ARM64_HAS_STAGE2_FWB.
+ * @KVM_PGTABLE_S2_IDMAP: Only use identity mappings.
+ */
+enum kvm_pgtable_stage2_flags {
+ KVM_PGTABLE_S2_NOFWB = BIT(0),
+ KVM_PGTABLE_S2_IDMAP = BIT(1),
+};
+
+/**
+ * enum kvm_pgtable_prot - Page-table permissions and attributes.
+ * @KVM_PGTABLE_PROT_UX: Unprivileged execute permission.
+ * @KVM_PGTABLE_PROT_PX: Privileged execute permission.
+ * @KVM_PGTABLE_PROT_X: Privileged and unprivileged execute permission.
+ * @KVM_PGTABLE_PROT_W: Write permission.
+ * @KVM_PGTABLE_PROT_R: Read permission.
+ * @KVM_PGTABLE_PROT_DEVICE: Device attributes.
+ * @KVM_PGTABLE_PROT_NORMAL_NC: Normal noncacheable attributes.
+ * @KVM_PGTABLE_PROT_SW0: Software bit 0.
+ * @KVM_PGTABLE_PROT_SW1: Software bit 1.
+ * @KVM_PGTABLE_PROT_SW2: Software bit 2.
+ * @KVM_PGTABLE_PROT_SW3: Software bit 3.
+ */
+enum kvm_pgtable_prot {
+ KVM_PGTABLE_PROT_PX = BIT(0),
+ KVM_PGTABLE_PROT_UX = BIT(1),
+ KVM_PGTABLE_PROT_X = KVM_PGTABLE_PROT_PX |
+ KVM_PGTABLE_PROT_UX,
+ KVM_PGTABLE_PROT_W = BIT(2),
+ KVM_PGTABLE_PROT_R = BIT(3),
+
+ KVM_PGTABLE_PROT_DEVICE = BIT(4),
+ KVM_PGTABLE_PROT_NORMAL_NC = BIT(5),
+
+ KVM_PGTABLE_PROT_SW0 = BIT(55),
+ KVM_PGTABLE_PROT_SW1 = BIT(56),
+ KVM_PGTABLE_PROT_SW2 = BIT(57),
+ KVM_PGTABLE_PROT_SW3 = BIT(58),
+};
+
+#define KVM_PGTABLE_PROT_RW (KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_W)
+#define KVM_PGTABLE_PROT_RWX (KVM_PGTABLE_PROT_RW | KVM_PGTABLE_PROT_X)
+
+#define PKVM_HOST_MEM_PROT KVM_PGTABLE_PROT_RWX
+#define PKVM_HOST_MMIO_PROT KVM_PGTABLE_PROT_RW
+
+#define PAGE_HYP KVM_PGTABLE_PROT_RW
+#define PAGE_HYP_EXEC (KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_X)
+#define PAGE_HYP_RO (KVM_PGTABLE_PROT_R)
+#define PAGE_HYP_DEVICE (PAGE_HYP | KVM_PGTABLE_PROT_DEVICE)
+
+typedef bool (*kvm_pgtable_force_pte_cb_t)(u64 addr, u64 end,
+ enum kvm_pgtable_prot prot);
+
+/**
+ * enum kvm_pgtable_walk_flags - Flags to control a depth-first page-table walk.
+ * @KVM_PGTABLE_WALK_LEAF: Visit leaf entries, including invalid
+ * entries.
+ * @KVM_PGTABLE_WALK_TABLE_PRE: Visit table entries before their
+ * children.
+ * @KVM_PGTABLE_WALK_TABLE_POST: Visit table entries after their
+ * children.
+ * @KVM_PGTABLE_WALK_SHARED: Indicates the page-tables may be shared
+ * with other software walkers.
+ * @KVM_PGTABLE_WALK_HANDLE_FAULT: Indicates the page-table walk was
+ * invoked from a fault handler.
+ * @KVM_PGTABLE_WALK_SKIP_BBM_TLBI: Visit and update table entries
+ * without Break-before-make's
+ * TLB invalidation.
+ * @KVM_PGTABLE_WALK_SKIP_CMO: Visit and update table entries
+ * without Cache maintenance
+ * operations required.
+ */
+enum kvm_pgtable_walk_flags {
+ KVM_PGTABLE_WALK_LEAF = BIT(0),
+ KVM_PGTABLE_WALK_TABLE_PRE = BIT(1),
+ KVM_PGTABLE_WALK_TABLE_POST = BIT(2),
+ KVM_PGTABLE_WALK_SHARED = BIT(3),
+ KVM_PGTABLE_WALK_HANDLE_FAULT = BIT(4),
+ KVM_PGTABLE_WALK_SKIP_BBM_TLBI = BIT(5),
+ KVM_PGTABLE_WALK_SKIP_CMO = BIT(6),
+};
+
+struct kvm_pgtable_visit_ctx {
+ kvm_pte_t *ptep;
+ kvm_pte_t old;
+ void *arg;
+ struct kvm_pgtable_mm_ops *mm_ops;
+ u64 start;
+ u64 addr;
+ u64 end;
+ s8 level;
+ enum kvm_pgtable_walk_flags flags;
+};
+
+typedef int (*kvm_pgtable_visitor_fn_t)(const struct kvm_pgtable_visit_ctx *ctx,
+ enum kvm_pgtable_walk_flags visit);
+
+static inline bool kvm_pgtable_walk_shared(const struct kvm_pgtable_visit_ctx *ctx)
+{
+ return ctx->flags & KVM_PGTABLE_WALK_SHARED;
+}
+
+/**
+ * struct kvm_pgtable_walker - Hook into a page-table walk.
+ * @cb: Callback function to invoke during the walk.
+ * @arg: Argument passed to the callback function.
+ * @flags: Bitwise-OR of flags to identify the entry types on which to
+ * invoke the callback function.
+ */
+struct kvm_pgtable_walker {
+ const kvm_pgtable_visitor_fn_t cb;
+ void * const arg;
+ const enum kvm_pgtable_walk_flags flags;
+};
+
+/*
+ * RCU cannot be used in a non-kernel context such as the hyp. As such, page
+ * table walkers used in hyp do not call into RCU and instead use other
+ * synchronization mechanisms (such as a spinlock).
+ */
+#if defined(__KVM_NVHE_HYPERVISOR__) || defined(__KVM_VHE_HYPERVISOR__)
+
+typedef kvm_pte_t *kvm_pteref_t;
+
+static inline kvm_pte_t *kvm_dereference_pteref(struct kvm_pgtable_walker *walker,
+ kvm_pteref_t pteref)
+{
+ return pteref;
+}
+
+static inline kvm_pte_t *kvm_dereference_pteref_raw(kvm_pteref_t pteref)
+{
+ return pteref;
+}
+
+static inline int kvm_pgtable_walk_begin(struct kvm_pgtable_walker *walker)
+{
+ /*
+ * Due to the lack of RCU (or a similar protection scheme), only
+ * non-shared table walkers are allowed in the hypervisor.
+ */
+ if (walker->flags & KVM_PGTABLE_WALK_SHARED)
+ return -EPERM;
+
+ return 0;
+}
+
+static inline void kvm_pgtable_walk_end(struct kvm_pgtable_walker *walker) {}
+
+static inline bool kvm_pgtable_walk_lock_held(void)
+{
+ return true;
+}
+
+#else
+
+typedef kvm_pte_t __rcu *kvm_pteref_t;
+
+static inline kvm_pte_t *kvm_dereference_pteref(struct kvm_pgtable_walker *walker,
+ kvm_pteref_t pteref)
+{
+ return rcu_dereference_check(pteref, !(walker->flags & KVM_PGTABLE_WALK_SHARED));
+}
+
+static inline kvm_pte_t *kvm_dereference_pteref_raw(kvm_pteref_t pteref)
+{
+ return rcu_dereference_raw(pteref);
+}
+
+static inline int kvm_pgtable_walk_begin(struct kvm_pgtable_walker *walker)
+{
+ if (walker->flags & KVM_PGTABLE_WALK_SHARED)
+ rcu_read_lock();
+
+ return 0;
+}
+
+static inline void kvm_pgtable_walk_end(struct kvm_pgtable_walker *walker)
+{
+ if (walker->flags & KVM_PGTABLE_WALK_SHARED)
+ rcu_read_unlock();
+}
+
+static inline bool kvm_pgtable_walk_lock_held(void)
+{
+ return rcu_read_lock_held();
+}
+
+#endif
+
+/**
+ * struct kvm_pgtable - KVM page-table.
+ * @ia_bits: Maximum input address size, in bits.
+ * @start_level: Level at which the page-table walk starts.
+ * @pgd: Pointer to the first top-level entry of the page-table.
+ * @mm_ops: Memory management callbacks.
+ * @mmu: Stage-2 KVM MMU struct. Unused for stage-1 page-tables.
+ * @flags: Stage-2 page-table flags.
+ * @force_pte_cb: Function that returns true if page level mappings must
+ * be used instead of block mappings.
+ */
+struct kvm_pgtable {
+ union {
+ struct rb_root_cached pkvm_mappings;
+ struct {
+ u32 ia_bits;
+ s8 start_level;
+ kvm_pteref_t pgd;
+ struct kvm_pgtable_mm_ops *mm_ops;
+
+ /* Stage-2 only */
+ enum kvm_pgtable_stage2_flags flags;
+ kvm_pgtable_force_pte_cb_t force_pte_cb;
+ };
+ };
+ struct kvm_s2_mmu *mmu;
+};
+
+/**
+ * kvm_pgtable_hyp_init() - Initialise a hypervisor stage-1 page-table.
+ * @pgt: Uninitialised page-table structure to initialise.
+ * @va_bits: Maximum virtual address bits.
+ * @mm_ops: Memory management callbacks.
+ *
+ * Return: 0 on success, negative error code on failure.
+ */
+int kvm_pgtable_hyp_init(struct kvm_pgtable *pgt, u32 va_bits,
+ struct kvm_pgtable_mm_ops *mm_ops);
+
+/**
+ * kvm_pgtable_hyp_destroy() - Destroy an unused hypervisor stage-1 page-table.
+ * @pgt: Page-table structure initialised by kvm_pgtable_hyp_init().
+ *
+ * The page-table is assumed to be unreachable by any hardware walkers prior
+ * to freeing and therefore no TLB invalidation is performed.
+ */
+void kvm_pgtable_hyp_destroy(struct kvm_pgtable *pgt);
+
+/**
+ * kvm_pgtable_hyp_map() - Install a mapping in a hypervisor stage-1 page-table.
+ * @pgt: Page-table structure initialised by kvm_pgtable_hyp_init().
+ * @addr: Virtual address at which to place the mapping.
+ * @size: Size of the mapping.
+ * @phys: Physical address of the memory to map.
+ * @prot: Permissions and attributes for the mapping.
+ *
+ * The offset of @addr within a page is ignored, @size is rounded-up to
+ * the next page boundary and @phys is rounded-down to the previous page
+ * boundary.
+ *
+ * If device attributes are not explicitly requested in @prot, then the
+ * mapping will be normal, cacheable. Attempts to install a new mapping
+ * for a virtual address that is already mapped will be rejected with an
+ * error and a WARN().
+ *
+ * Return: 0 on success, negative error code on failure.
+ */
+int kvm_pgtable_hyp_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys,
+ enum kvm_pgtable_prot prot);
+
+/**
+ * kvm_pgtable_hyp_unmap() - Remove a mapping from a hypervisor stage-1 page-table.
+ * @pgt: Page-table structure initialised by kvm_pgtable_hyp_init().
+ * @addr: Virtual address from which to remove the mapping.
+ * @size: Size of the mapping.
+ *
+ * The offset of @addr within a page is ignored, @size is rounded-up to
+ * the next page boundary and @phys is rounded-down to the previous page
+ * boundary.
+ *
+ * TLB invalidation is performed for each page-table entry cleared during the
+ * unmapping operation and the reference count for the page-table page
+ * containing the cleared entry is decremented, with unreferenced pages being
+ * freed. The unmapping operation will stop early if it encounters either an
+ * invalid page-table entry or a valid block mapping which maps beyond the range
+ * being unmapped.
+ *
+ * Return: Number of bytes unmapped, which may be 0.
+ */
+u64 kvm_pgtable_hyp_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size);
+
+/**
+ * kvm_get_vtcr() - Helper to construct VTCR_EL2
+ * @mmfr0: Sanitized value of SYS_ID_AA64MMFR0_EL1 register.
+ * @mmfr1: Sanitized value of SYS_ID_AA64MMFR1_EL1 register.
+ * @phys_shfit: Value to set in VTCR_EL2.T0SZ.
+ *
+ * The VTCR value is common across all the physical CPUs on the system.
+ * We use system wide sanitised values to fill in different fields,
+ * except for Hardware Management of Access Flags. HA Flag is set
+ * unconditionally on all CPUs, as it is safe to run with or without
+ * the feature and the bit is RES0 on CPUs that don't support it.
+ *
+ * Return: VTCR_EL2 value
+ */
+u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift);
+
+/**
+ * kvm_pgtable_stage2_pgd_size() - Helper to compute size of a stage-2 PGD
+ * @vtcr: Content of the VTCR register.
+ *
+ * Return: the size (in bytes) of the stage-2 PGD
+ */
+size_t kvm_pgtable_stage2_pgd_size(u64 vtcr);
+
+/**
+ * __kvm_pgtable_stage2_init() - Initialise a guest stage-2 page-table.
+ * @pgt: Uninitialised page-table structure to initialise.
+ * @mmu: S2 MMU context for this S2 translation
+ * @mm_ops: Memory management callbacks.
+ * @flags: Stage-2 configuration flags.
+ * @force_pte_cb: Function that returns true if page level mappings must
+ * be used instead of block mappings.
+ *
+ * Return: 0 on success, negative error code on failure.
+ */
+int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu,
+ struct kvm_pgtable_mm_ops *mm_ops,
+ enum kvm_pgtable_stage2_flags flags,
+ kvm_pgtable_force_pte_cb_t force_pte_cb);
+
+static inline int kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu,
+ struct kvm_pgtable_mm_ops *mm_ops)
+{
+ return __kvm_pgtable_stage2_init(pgt, mmu, mm_ops, 0, NULL);
+}
+
+/**
+ * kvm_pgtable_stage2_destroy() - Destroy an unused guest stage-2 page-table.
+ * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*().
+ *
+ * The page-table is assumed to be unreachable by any hardware walkers prior
+ * to freeing and therefore no TLB invalidation is performed.
+ */
+void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt);
+
+/**
+ * kvm_pgtable_stage2_destroy_range() - Destroy the unlinked range of addresses.
+ * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*().
+ * @addr: Intermediate physical address at which to place the mapping.
+ * @size: Size of the mapping.
+ *
+ * The page-table is assumed to be unreachable by any hardware walkers prior
+ * to freeing and therefore no TLB invalidation is performed.
+ */
+void kvm_pgtable_stage2_destroy_range(struct kvm_pgtable *pgt,
+ u64 addr, u64 size);
+
+/**
+ * kvm_pgtable_stage2_destroy_pgd() - Destroy the PGD of guest stage-2 page-table.
+ * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*().
+ *
+ * It is assumed that the rest of the page-table is freed before this operation.
+ */
+void kvm_pgtable_stage2_destroy_pgd(struct kvm_pgtable *pgt);
+
+/**
+ * kvm_pgtable_stage2_free_unlinked() - Free an unlinked stage-2 paging structure.
+ * @mm_ops: Memory management callbacks.
+ * @pgtable: Unlinked stage-2 paging structure to be freed.
+ * @level: Level of the stage-2 paging structure to be freed.
+ *
+ * The page-table is assumed to be unreachable by any hardware walkers prior to
+ * freeing and therefore no TLB invalidation is performed.
+ */
+void kvm_pgtable_stage2_free_unlinked(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, s8 level);
+
+/**
+ * kvm_pgtable_stage2_create_unlinked() - Create an unlinked stage-2 paging structure.
+ * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*().
+ * @phys: Physical address of the memory to map.
+ * @level: Starting level of the stage-2 paging structure to be created.
+ * @prot: Permissions and attributes for the mapping.
+ * @mc: Cache of pre-allocated and zeroed memory from which to allocate
+ * page-table pages.
+ * @force_pte: Force mappings to PAGE_SIZE granularity.
+ *
+ * Returns an unlinked page-table tree. This new page-table tree is
+ * not reachable (i.e., it is unlinked) from the root pgd and it's
+ * therefore unreachableby the hardware page-table walker. No TLB
+ * invalidation or CMOs are performed.
+ *
+ * If device attributes are not explicitly requested in @prot, then the
+ * mapping will be normal, cacheable.
+ *
+ * Return: The fully populated (unlinked) stage-2 paging structure, or
+ * an ERR_PTR(error) on failure.
+ */
+kvm_pte_t *kvm_pgtable_stage2_create_unlinked(struct kvm_pgtable *pgt,
+ u64 phys, s8 level,
+ enum kvm_pgtable_prot prot,
+ void *mc, bool force_pte);
+
+/**
+ * kvm_pgtable_stage2_map() - Install a mapping in a guest stage-2 page-table.
+ * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*().
+ * @addr: Intermediate physical address at which to place the mapping.
+ * @size: Size of the mapping.
+ * @phys: Physical address of the memory to map.
+ * @prot: Permissions and attributes for the mapping.
+ * @mc: Cache of pre-allocated and zeroed memory from which to allocate
+ * page-table pages.
+ * @flags: Flags to control the page-table walk (ex. a shared walk)
+ *
+ * The offset of @addr within a page is ignored, @size is rounded-up to
+ * the next page boundary and @phys is rounded-down to the previous page
+ * boundary.
+ *
+ * If device attributes are not explicitly requested in @prot, then the
+ * mapping will be normal, cacheable.
+ *
+ * Note that the update of a valid leaf PTE in this function will be aborted,
+ * if it's trying to recreate the exact same mapping or only change the access
+ * permissions. Instead, the vCPU will exit one more time from guest if still
+ * needed and then go through the path of relaxing permissions.
+ *
+ * Note that this function will both coalesce existing table entries and split
+ * existing block mappings, relying on page-faults to fault back areas outside
+ * of the new mapping lazily.
+ *
+ * Return: 0 on success, negative error code on failure.
+ */
+int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size,
+ u64 phys, enum kvm_pgtable_prot prot,
+ void *mc, enum kvm_pgtable_walk_flags flags);
+
+/**
+ * kvm_pgtable_stage2_set_owner() - Unmap and annotate pages in the IPA space to
+ * track ownership.
+ * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*().
+ * @addr: Base intermediate physical address to annotate.
+ * @size: Size of the annotated range.
+ * @mc: Cache of pre-allocated and zeroed memory from which to allocate
+ * page-table pages.
+ * @owner_id: Unique identifier for the owner of the page.
+ *
+ * By default, all page-tables are owned by identifier 0. This function can be
+ * used to mark portions of the IPA space as owned by other entities. When a
+ * stage 2 is used with identity-mappings, these annotations allow to use the
+ * page-table data structure as a simple rmap.
+ *
+ * Return: 0 on success, negative error code on failure.
+ */
+int kvm_pgtable_stage2_set_owner(struct kvm_pgtable *pgt, u64 addr, u64 size,
+ void *mc, u8 owner_id);
+
+/**
+ * kvm_pgtable_stage2_unmap() - Remove a mapping from a guest stage-2 page-table.
+ * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*().
+ * @addr: Intermediate physical address from which to remove the mapping.
+ * @size: Size of the mapping.
+ *
+ * The offset of @addr within a page is ignored and @size is rounded-up to
+ * the next page boundary.
+ *
+ * TLB invalidation is performed for each page-table entry cleared during the
+ * unmapping operation and the reference count for the page-table page
+ * containing the cleared entry is decremented, with unreferenced pages being
+ * freed. Unmapping a cacheable page will ensure that it is clean to the PoC if
+ * FWB is not supported by the CPU.
+ *
+ * Return: 0 on success, negative error code on failure.
+ */
+int kvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size);
+
+/**
+ * kvm_pgtable_stage2_wrprotect() - Write-protect guest stage-2 address range
+ * without TLB invalidation.
+ * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*().
+ * @addr: Intermediate physical address from which to write-protect,
+ * @size: Size of the range.
+ *
+ * The offset of @addr within a page is ignored and @size is rounded-up to
+ * the next page boundary.
+ *
+ * Note that it is the caller's responsibility to invalidate the TLB after
+ * calling this function to ensure that the updated permissions are visible
+ * to the CPUs.
+ *
+ * Return: 0 on success, negative error code on failure.
+ */
+int kvm_pgtable_stage2_wrprotect(struct kvm_pgtable *pgt, u64 addr, u64 size);
+
+/**
+ * kvm_pgtable_stage2_mkyoung() - Set the access flag in a page-table entry.
+ * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*().
+ * @addr: Intermediate physical address to identify the page-table entry.
+ * @flags: Flags to control the page-table walk (ex. a shared walk)
+ *
+ * The offset of @addr within a page is ignored.
+ *
+ * If there is a valid, leaf page-table entry used to translate @addr, then
+ * set the access flag in that entry.
+ */
+void kvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr,
+ enum kvm_pgtable_walk_flags flags);
+
+/**
+ * kvm_pgtable_stage2_test_clear_young() - Test and optionally clear the access
+ * flag in a page-table entry.
+ * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*().
+ * @addr: Intermediate physical address to identify the page-table entry.
+ * @size: Size of the address range to visit.
+ * @mkold: True if the access flag should be cleared.
+ *
+ * The offset of @addr within a page is ignored.
+ *
+ * Tests and conditionally clears the access flag for every valid, leaf
+ * page-table entry used to translate the range [@addr, @addr + @size).
+ *
+ * Note that it is the caller's responsibility to invalidate the TLB after
+ * calling this function to ensure that the updated permissions are visible
+ * to the CPUs.
+ *
+ * Return: True if any of the visited PTEs had the access flag set.
+ */
+bool kvm_pgtable_stage2_test_clear_young(struct kvm_pgtable *pgt, u64 addr,
+ u64 size, bool mkold);
+
+/**
+ * kvm_pgtable_stage2_relax_perms() - Relax the permissions enforced by a
+ * page-table entry.
+ * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*().
+ * @addr: Intermediate physical address to identify the page-table entry.
+ * @prot: Additional permissions to grant for the mapping.
+ * @flags: Flags to control the page-table walk (ex. a shared walk)
+ *
+ * The offset of @addr within a page is ignored.
+ *
+ * If there is a valid, leaf page-table entry used to translate @addr, then
+ * relax the permissions in that entry according to the read, write and
+ * execute permissions specified by @prot. No permissions are removed, and
+ * TLB invalidation is performed after updating the entry. Software bits cannot
+ * be set or cleared using kvm_pgtable_stage2_relax_perms().
+ *
+ * Return: 0 on success, negative error code on failure.
+ */
+int kvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr,
+ enum kvm_pgtable_prot prot,
+ enum kvm_pgtable_walk_flags flags);
+
+/**
+ * kvm_pgtable_stage2_flush_range() - Clean and invalidate data cache to Point
+ * of Coherency for guest stage-2 address
+ * range.
+ * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*().
+ * @addr: Intermediate physical address from which to flush.
+ * @size: Size of the range.
+ *
+ * The offset of @addr within a page is ignored and @size is rounded-up to
+ * the next page boundary.
+ *
+ * Return: 0 on success, negative error code on failure.
+ */
+int kvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size);
+
+/**
+ * kvm_pgtable_stage2_split() - Split a range of huge pages into leaf PTEs pointing
+ * to PAGE_SIZE guest pages.
+ * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init().
+ * @addr: Intermediate physical address from which to split.
+ * @size: Size of the range.
+ * @mc: Cache of pre-allocated and zeroed memory from which to allocate
+ * page-table pages.
+ *
+ * The function tries to split any level 1 or 2 entry that overlaps
+ * with the input range (given by @addr and @size).
+ *
+ * Return: 0 on success, negative error code on failure. Note that
+ * kvm_pgtable_stage2_split() is best effort: it tries to break as many
+ * blocks in the input range as allowed by @mc_capacity.
+ */
+int kvm_pgtable_stage2_split(struct kvm_pgtable *pgt, u64 addr, u64 size,
+ struct kvm_mmu_memory_cache *mc);
+
+/**
+ * kvm_pgtable_walk() - Walk a page-table.
+ * @pgt: Page-table structure initialised by kvm_pgtable_*_init().
+ * @addr: Input address for the start of the walk.
+ * @size: Size of the range to walk.
+ * @walker: Walker callback description.
+ *
+ * The offset of @addr within a page is ignored and @size is rounded-up to
+ * the next page boundary.
+ *
+ * The walker will walk the page-table entries corresponding to the input
+ * address range specified, visiting entries according to the walker flags.
+ * Invalid entries are treated as leaf entries. The visited page table entry is
+ * reloaded after invoking the walker callback, allowing the walker to descend
+ * into a newly installed table.
+ *
+ * Returning a negative error code from the walker callback function will
+ * terminate the walk immediately with the same error code.
+ *
+ * Return: 0 on success, negative error code on failure.
+ */
+int kvm_pgtable_walk(struct kvm_pgtable *pgt, u64 addr, u64 size,
+ struct kvm_pgtable_walker *walker);
+
+/**
+ * kvm_pgtable_get_leaf() - Walk a page-table and retrieve the leaf entry
+ * with its level.
+ * @pgt: Page-table structure initialised by kvm_pgtable_*_init()
+ * or a similar initialiser.
+ * @addr: Input address for the start of the walk.
+ * @ptep: Pointer to storage for the retrieved PTE.
+ * @level: Pointer to storage for the level of the retrieved PTE.
+ *
+ * The offset of @addr within a page is ignored.
+ *
+ * The walker will walk the page-table entries corresponding to the input
+ * address specified, retrieving the leaf corresponding to this address.
+ * Invalid entries are treated as leaf entries.
+ *
+ * Return: 0 on success, negative error code on failure.
+ */
+int kvm_pgtable_get_leaf(struct kvm_pgtable *pgt, u64 addr,
+ kvm_pte_t *ptep, s8 *level);
+
+/**
+ * kvm_pgtable_stage2_pte_prot() - Retrieve the protection attributes of a
+ * stage-2 Page-Table Entry.
+ * @pte: Page-table entry
+ *
+ * Return: protection attributes of the page-table entry in the enum
+ * kvm_pgtable_prot format.
+ */
+enum kvm_pgtable_prot kvm_pgtable_stage2_pte_prot(kvm_pte_t pte);
+
+/**
+ * kvm_pgtable_hyp_pte_prot() - Retrieve the protection attributes of a stage-1
+ * Page-Table Entry.
+ * @pte: Page-table entry
+ *
+ * Return: protection attributes of the page-table entry in the enum
+ * kvm_pgtable_prot format.
+ */
+enum kvm_pgtable_prot kvm_pgtable_hyp_pte_prot(kvm_pte_t pte);
+
+/**
+ * kvm_tlb_flush_vmid_range() - Invalidate/flush a range of TLB entries
+ *
+ * @mmu: Stage-2 KVM MMU struct
+ * @addr: The base Intermediate physical address from which to invalidate
+ * @size: Size of the range from the base to invalidate
+ */
+void kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
+ phys_addr_t addr, size_t size);
+#endif /* __ARM64_KVM_PGTABLE_H__ */
diff --git a/arch/arm64/include/asm/kvm_pkvm.h b/arch/arm64/include/asm/kvm_pkvm.h
new file mode 100644
index 000000000000..0aecd4ac5f45
--- /dev/null
+++ b/arch/arm64/include/asm/kvm_pkvm.h
@@ -0,0 +1,203 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2020 - Google LLC
+ * Author: Quentin Perret <qperret@google.com>
+ */
+#ifndef __ARM64_KVM_PKVM_H__
+#define __ARM64_KVM_PKVM_H__
+
+#include <linux/arm_ffa.h>
+#include <linux/memblock.h>
+#include <linux/scatterlist.h>
+#include <asm/kvm_pgtable.h>
+
+/* Maximum number of VMs that can co-exist under pKVM. */
+#define KVM_MAX_PVMS 255
+
+#define HYP_MEMBLOCK_REGIONS 128
+
+int pkvm_init_host_vm(struct kvm *kvm);
+int pkvm_create_hyp_vm(struct kvm *kvm);
+bool pkvm_hyp_vm_is_created(struct kvm *kvm);
+void pkvm_destroy_hyp_vm(struct kvm *kvm);
+int pkvm_create_hyp_vcpu(struct kvm_vcpu *vcpu);
+
+/*
+ * This functions as an allow-list of protected VM capabilities.
+ * Features not explicitly allowed by this function are denied.
+ */
+static inline bool kvm_pvm_ext_allowed(long ext)
+{
+ switch (ext) {
+ case KVM_CAP_IRQCHIP:
+ case KVM_CAP_ARM_PSCI:
+ case KVM_CAP_ARM_PSCI_0_2:
+ case KVM_CAP_NR_VCPUS:
+ case KVM_CAP_MAX_VCPUS:
+ case KVM_CAP_MAX_VCPU_ID:
+ case KVM_CAP_MSI_DEVID:
+ case KVM_CAP_ARM_VM_IPA_SIZE:
+ case KVM_CAP_ARM_PMU_V3:
+ case KVM_CAP_ARM_SVE:
+ case KVM_CAP_ARM_PTRAUTH_ADDRESS:
+ case KVM_CAP_ARM_PTRAUTH_GENERIC:
+ return true;
+ default:
+ return false;
+ }
+}
+
+extern struct memblock_region kvm_nvhe_sym(hyp_memory)[];
+extern unsigned int kvm_nvhe_sym(hyp_memblock_nr);
+
+static inline unsigned long
+hyp_vmemmap_memblock_size(struct memblock_region *reg, size_t vmemmap_entry_size)
+{
+ unsigned long nr_pages = reg->size >> PAGE_SHIFT;
+ unsigned long start, end;
+
+ start = (reg->base >> PAGE_SHIFT) * vmemmap_entry_size;
+ end = start + nr_pages * vmemmap_entry_size;
+ start = ALIGN_DOWN(start, PAGE_SIZE);
+ end = ALIGN(end, PAGE_SIZE);
+
+ return end - start;
+}
+
+static inline unsigned long hyp_vmemmap_pages(size_t vmemmap_entry_size)
+{
+ unsigned long res = 0, i;
+
+ for (i = 0; i < kvm_nvhe_sym(hyp_memblock_nr); i++) {
+ res += hyp_vmemmap_memblock_size(&kvm_nvhe_sym(hyp_memory)[i],
+ vmemmap_entry_size);
+ }
+
+ return res >> PAGE_SHIFT;
+}
+
+static inline unsigned long hyp_vm_table_pages(void)
+{
+ return PAGE_ALIGN(KVM_MAX_PVMS * sizeof(void *)) >> PAGE_SHIFT;
+}
+
+static inline unsigned long __hyp_pgtable_max_pages(unsigned long nr_pages)
+{
+ unsigned long total = 0;
+ int i;
+
+ /* Provision the worst case scenario */
+ for (i = KVM_PGTABLE_FIRST_LEVEL; i <= KVM_PGTABLE_LAST_LEVEL; i++) {
+ nr_pages = DIV_ROUND_UP(nr_pages, PTRS_PER_PTE);
+ total += nr_pages;
+ }
+
+ return total;
+}
+
+static inline unsigned long __hyp_pgtable_total_pages(void)
+{
+ unsigned long res = 0, i;
+
+ /* Cover all of memory with page-granularity */
+ for (i = 0; i < kvm_nvhe_sym(hyp_memblock_nr); i++) {
+ struct memblock_region *reg = &kvm_nvhe_sym(hyp_memory)[i];
+ res += __hyp_pgtable_max_pages(reg->size >> PAGE_SHIFT);
+ }
+
+ return res;
+}
+
+static inline unsigned long hyp_s1_pgtable_pages(void)
+{
+ unsigned long res;
+
+ res = __hyp_pgtable_total_pages();
+
+ /* Allow 1 GiB for private mappings */
+ res += __hyp_pgtable_max_pages(SZ_1G >> PAGE_SHIFT);
+
+ return res;
+}
+
+static inline unsigned long host_s2_pgtable_pages(void)
+{
+ unsigned long res;
+
+ /*
+ * Include an extra 16 pages to safely upper-bound the worst case of
+ * concatenated pgds.
+ */
+ res = __hyp_pgtable_total_pages() + 16;
+
+ /* Allow 1 GiB for MMIO mappings */
+ res += __hyp_pgtable_max_pages(SZ_1G >> PAGE_SHIFT);
+
+ return res;
+}
+
+#ifdef CONFIG_NVHE_EL2_DEBUG
+static inline unsigned long pkvm_selftest_pages(void) { return 32; }
+#else
+static inline unsigned long pkvm_selftest_pages(void) { return 0; }
+#endif
+
+#define KVM_FFA_MBOX_NR_PAGES 1
+
+static inline unsigned long hyp_ffa_proxy_pages(void)
+{
+ size_t desc_max;
+
+ /*
+ * The hypervisor FFA proxy needs enough memory to buffer a fragmented
+ * descriptor returned from EL3 in response to a RETRIEVE_REQ call.
+ */
+ desc_max = sizeof(struct ffa_mem_region) +
+ sizeof(struct ffa_mem_region_attributes) +
+ sizeof(struct ffa_composite_mem_region) +
+ SG_MAX_SEGMENTS * sizeof(struct ffa_mem_region_addr_range);
+
+ /* Plus a page each for the hypervisor's RX and TX mailboxes. */
+ return (2 * KVM_FFA_MBOX_NR_PAGES) + DIV_ROUND_UP(desc_max, PAGE_SIZE);
+}
+
+static inline size_t pkvm_host_sve_state_size(void)
+{
+ if (!system_supports_sve())
+ return 0;
+
+ return size_add(sizeof(struct cpu_sve_state),
+ SVE_SIG_REGS_SIZE(sve_vq_from_vl(kvm_host_sve_max_vl)));
+}
+
+struct pkvm_mapping {
+ struct rb_node node;
+ u64 gfn;
+ u64 pfn;
+ u64 nr_pages;
+ u64 __subtree_last; /* Internal member for interval tree */
+};
+
+int pkvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu,
+ struct kvm_pgtable_mm_ops *mm_ops);
+void pkvm_pgtable_stage2_destroy_range(struct kvm_pgtable *pgt,
+ u64 addr, u64 size);
+void pkvm_pgtable_stage2_destroy_pgd(struct kvm_pgtable *pgt);
+int pkvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys,
+ enum kvm_pgtable_prot prot, void *mc,
+ enum kvm_pgtable_walk_flags flags);
+int pkvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size);
+int pkvm_pgtable_stage2_wrprotect(struct kvm_pgtable *pgt, u64 addr, u64 size);
+int pkvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size);
+bool pkvm_pgtable_stage2_test_clear_young(struct kvm_pgtable *pgt, u64 addr, u64 size, bool mkold);
+int pkvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr, enum kvm_pgtable_prot prot,
+ enum kvm_pgtable_walk_flags flags);
+void pkvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr,
+ enum kvm_pgtable_walk_flags flags);
+int pkvm_pgtable_stage2_split(struct kvm_pgtable *pgt, u64 addr, u64 size,
+ struct kvm_mmu_memory_cache *mc);
+void pkvm_pgtable_stage2_free_unlinked(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, s8 level);
+kvm_pte_t *pkvm_pgtable_stage2_create_unlinked(struct kvm_pgtable *pgt, u64 phys, s8 level,
+ enum kvm_pgtable_prot prot, void *mc,
+ bool force_pte);
+#endif /* __ARM64_KVM_PKVM_H__ */
diff --git a/arch/arm64/include/asm/kvm_psci.h b/arch/arm64/include/asm/kvm_psci.h
deleted file mode 100644
index e301a4816355..000000000000
--- a/arch/arm64/include/asm/kvm_psci.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * Copyright (C) 2012,2013 - ARM Ltd
- * Author: Marc Zyngier <marc.zyngier@arm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef __ARM64_KVM_PSCI_H__
-#define __ARM64_KVM_PSCI_H__
-
-bool kvm_psci_call(struct kvm_vcpu *vcpu);
-
-#endif /* __ARM64_KVM_PSCI_H__ */
diff --git a/arch/arm64/include/asm/kvm_ptrauth.h b/arch/arm64/include/asm/kvm_ptrauth.h
new file mode 100644
index 000000000000..e50987b32483
--- /dev/null
+++ b/arch/arm64/include/asm/kvm_ptrauth.h
@@ -0,0 +1,124 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* arch/arm64/include/asm/kvm_ptrauth.h: Guest/host ptrauth save/restore
+ * Copyright 2019 Arm Limited
+ * Authors: Mark Rutland <mark.rutland@arm.com>
+ * Amit Daniel Kachhap <amit.kachhap@arm.com>
+ */
+
+#ifndef __ASM_KVM_PTRAUTH_H
+#define __ASM_KVM_PTRAUTH_H
+
+#ifdef __ASSEMBLER__
+
+#include <asm/sysreg.h>
+
+#ifdef CONFIG_ARM64_PTR_AUTH
+
+#define PTRAUTH_REG_OFFSET(x) (x - CPU_APIAKEYLO_EL1)
+
+/*
+ * CPU_AP*_EL1 values exceed immediate offset range (512) for stp
+ * instruction so below macros takes CPU_APIAKEYLO_EL1 as base and
+ * calculates the offset of the keys from this base to avoid an extra add
+ * instruction. These macros assumes the keys offsets follow the order of
+ * the sysreg enum in kvm_host.h.
+ */
+.macro ptrauth_save_state base, reg1, reg2
+ mrs_s \reg1, SYS_APIAKEYLO_EL1
+ mrs_s \reg2, SYS_APIAKEYHI_EL1
+ stp \reg1, \reg2, [\base, #PTRAUTH_REG_OFFSET(CPU_APIAKEYLO_EL1)]
+ mrs_s \reg1, SYS_APIBKEYLO_EL1
+ mrs_s \reg2, SYS_APIBKEYHI_EL1
+ stp \reg1, \reg2, [\base, #PTRAUTH_REG_OFFSET(CPU_APIBKEYLO_EL1)]
+ mrs_s \reg1, SYS_APDAKEYLO_EL1
+ mrs_s \reg2, SYS_APDAKEYHI_EL1
+ stp \reg1, \reg2, [\base, #PTRAUTH_REG_OFFSET(CPU_APDAKEYLO_EL1)]
+ mrs_s \reg1, SYS_APDBKEYLO_EL1
+ mrs_s \reg2, SYS_APDBKEYHI_EL1
+ stp \reg1, \reg2, [\base, #PTRAUTH_REG_OFFSET(CPU_APDBKEYLO_EL1)]
+ mrs_s \reg1, SYS_APGAKEYLO_EL1
+ mrs_s \reg2, SYS_APGAKEYHI_EL1
+ stp \reg1, \reg2, [\base, #PTRAUTH_REG_OFFSET(CPU_APGAKEYLO_EL1)]
+.endm
+
+.macro ptrauth_restore_state base, reg1, reg2
+ ldp \reg1, \reg2, [\base, #PTRAUTH_REG_OFFSET(CPU_APIAKEYLO_EL1)]
+ msr_s SYS_APIAKEYLO_EL1, \reg1
+ msr_s SYS_APIAKEYHI_EL1, \reg2
+ ldp \reg1, \reg2, [\base, #PTRAUTH_REG_OFFSET(CPU_APIBKEYLO_EL1)]
+ msr_s SYS_APIBKEYLO_EL1, \reg1
+ msr_s SYS_APIBKEYHI_EL1, \reg2
+ ldp \reg1, \reg2, [\base, #PTRAUTH_REG_OFFSET(CPU_APDAKEYLO_EL1)]
+ msr_s SYS_APDAKEYLO_EL1, \reg1
+ msr_s SYS_APDAKEYHI_EL1, \reg2
+ ldp \reg1, \reg2, [\base, #PTRAUTH_REG_OFFSET(CPU_APDBKEYLO_EL1)]
+ msr_s SYS_APDBKEYLO_EL1, \reg1
+ msr_s SYS_APDBKEYHI_EL1, \reg2
+ ldp \reg1, \reg2, [\base, #PTRAUTH_REG_OFFSET(CPU_APGAKEYLO_EL1)]
+ msr_s SYS_APGAKEYLO_EL1, \reg1
+ msr_s SYS_APGAKEYHI_EL1, \reg2
+.endm
+
+/*
+ * Both ptrauth_switch_to_guest and ptrauth_switch_to_hyp macros will
+ * check for the presence ARM64_HAS_ADDRESS_AUTH, which is defined as
+ * (ARM64_HAS_ADDRESS_AUTH_ARCH || ARM64_HAS_ADDRESS_AUTH_IMP_DEF) and
+ * then proceed ahead with the save/restore of Pointer Authentication
+ * key registers if enabled for the guest.
+ */
+.macro ptrauth_switch_to_guest g_ctxt, reg1, reg2, reg3
+alternative_if_not ARM64_HAS_ADDRESS_AUTH
+ b .L__skip_switch\@
+alternative_else_nop_endif
+ mrs \reg1, hcr_el2
+ and \reg1, \reg1, #(HCR_API | HCR_APK)
+ cbz \reg1, .L__skip_switch\@
+ add \reg1, \g_ctxt, #CPU_APIAKEYLO_EL1
+ ptrauth_restore_state \reg1, \reg2, \reg3
+.L__skip_switch\@:
+.endm
+
+.macro ptrauth_switch_to_hyp g_ctxt, h_ctxt, reg1, reg2, reg3
+alternative_if_not ARM64_HAS_ADDRESS_AUTH
+ b .L__skip_switch\@
+alternative_else_nop_endif
+ mrs \reg1, hcr_el2
+ and \reg1, \reg1, #(HCR_API | HCR_APK)
+ cbz \reg1, .L__skip_switch\@
+ add \reg1, \g_ctxt, #CPU_APIAKEYLO_EL1
+ ptrauth_save_state \reg1, \reg2, \reg3
+ add \reg1, \h_ctxt, #CPU_APIAKEYLO_EL1
+ ptrauth_restore_state \reg1, \reg2, \reg3
+ isb
+.L__skip_switch\@:
+.endm
+
+#else /* !CONFIG_ARM64_PTR_AUTH */
+.macro ptrauth_switch_to_guest g_ctxt, reg1, reg2, reg3
+.endm
+.macro ptrauth_switch_to_hyp g_ctxt, h_ctxt, reg1, reg2, reg3
+.endm
+#endif /* CONFIG_ARM64_PTR_AUTH */
+
+#else /* !__ASSEMBLER__ */
+
+#define __ptrauth_save_key(ctxt, key) \
+ do { \
+ u64 __val; \
+ __val = read_sysreg_s(SYS_ ## key ## KEYLO_EL1); \
+ ctxt_sys_reg(ctxt, key ## KEYLO_EL1) = __val; \
+ __val = read_sysreg_s(SYS_ ## key ## KEYHI_EL1); \
+ ctxt_sys_reg(ctxt, key ## KEYHI_EL1) = __val; \
+ } while(0)
+
+#define ptrauth_save_keys(ctxt) \
+ do { \
+ __ptrauth_save_key(ctxt, APIA); \
+ __ptrauth_save_key(ctxt, APIB); \
+ __ptrauth_save_key(ctxt, APDA); \
+ __ptrauth_save_key(ctxt, APDB); \
+ __ptrauth_save_key(ctxt, APGA); \
+ } while(0)
+
+#endif /* __ASSEMBLER__ */
+#endif /* __ASM_KVM_PTRAUTH_H */
diff --git a/arch/arm64/include/asm/kvm_types.h b/arch/arm64/include/asm/kvm_types.h
new file mode 100644
index 000000000000..9a126b9e2d7c
--- /dev/null
+++ b/arch/arm64/include/asm/kvm_types.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_ARM64_KVM_TYPES_H
+#define _ASM_ARM64_KVM_TYPES_H
+
+#define KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE 40
+
+#endif /* _ASM_ARM64_KVM_TYPES_H */
+
diff --git a/arch/arm64/include/asm/linkage.h b/arch/arm64/include/asm/linkage.h
index 636c1bced7d4..40bd17add539 100644
--- a/arch/arm64/include/asm/linkage.h
+++ b/arch/arm64/include/asm/linkage.h
@@ -1,7 +1,46 @@
#ifndef __ASM_LINKAGE_H
#define __ASM_LINKAGE_H
-#define __ALIGN .align 4
-#define __ALIGN_STR ".align 4"
+#ifdef __ASSEMBLER__
+#include <asm/assembler.h>
+#endif
+
+#define __ALIGN .balign CONFIG_FUNCTION_ALIGNMENT
+#define __ALIGN_STR ".balign " #CONFIG_FUNCTION_ALIGNMENT
+
+/*
+ * When using in-kernel BTI we need to ensure that PCS-conformant
+ * assembly functions have suitable annotations. Override
+ * SYM_FUNC_START to insert a BTI landing pad at the start of
+ * everything, the override is done unconditionally so we're more
+ * likely to notice any drift from the overridden definitions.
+ */
+#define SYM_FUNC_START(name) \
+ SYM_START(name, SYM_L_GLOBAL, SYM_A_ALIGN) \
+ bti c ;
+
+#define SYM_FUNC_START_NOALIGN(name) \
+ SYM_START(name, SYM_L_GLOBAL, SYM_A_NONE) \
+ bti c ;
+
+#define SYM_FUNC_START_LOCAL(name) \
+ SYM_START(name, SYM_L_LOCAL, SYM_A_ALIGN) \
+ bti c ;
+
+#define SYM_FUNC_START_LOCAL_NOALIGN(name) \
+ SYM_START(name, SYM_L_LOCAL, SYM_A_NONE) \
+ bti c ;
+
+#define SYM_FUNC_START_WEAK(name) \
+ SYM_START(name, SYM_L_WEAK, SYM_A_ALIGN) \
+ bti c ;
+
+#define SYM_FUNC_START_WEAK_NOALIGN(name) \
+ SYM_START(name, SYM_L_WEAK, SYM_A_NONE) \
+ bti c ;
+
+#define SYM_TYPED_FUNC_START(name) \
+ SYM_TYPED_START(name, SYM_L_GLOBAL, SYM_A_ALIGN) \
+ bti c ;
#endif
diff --git a/arch/arm64/include/asm/lse.h b/arch/arm64/include/asm/lse.h
new file mode 100644
index 000000000000..3129a5819d0e
--- /dev/null
+++ b/arch/arm64/include/asm/lse.h
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_LSE_H
+#define __ASM_LSE_H
+
+#include <asm/atomic_ll_sc.h>
+
+#ifdef CONFIG_ARM64_LSE_ATOMICS
+
+#define __LSE_PREAMBLE ".arch_extension lse\n"
+
+#include <linux/compiler_types.h>
+#include <linux/export.h>
+#include <linux/stringify.h>
+#include <asm/alternative.h>
+#include <asm/alternative-macros.h>
+#include <asm/atomic_lse.h>
+#include <asm/cpucaps.h>
+
+#define __lse_ll_sc_body(op, ...) \
+({ \
+ alternative_has_cap_likely(ARM64_HAS_LSE_ATOMICS) ? \
+ __lse_##op(__VA_ARGS__) : \
+ __ll_sc_##op(__VA_ARGS__); \
+})
+
+/* In-line patching at runtime */
+#define ARM64_LSE_ATOMIC_INSN(llsc, lse) \
+ ALTERNATIVE(llsc, __LSE_PREAMBLE lse, ARM64_HAS_LSE_ATOMICS)
+
+#else /* CONFIG_ARM64_LSE_ATOMICS */
+
+#define __lse_ll_sc_body(op, ...) __ll_sc_##op(__VA_ARGS__)
+
+#define ARM64_LSE_ATOMIC_INSN(llsc, lse) llsc
+
+#endif /* CONFIG_ARM64_LSE_ATOMICS */
+#endif /* __ASM_LSE_H */
diff --git a/arch/arm64/include/asm/mem_encrypt.h b/arch/arm64/include/asm/mem_encrypt.h
new file mode 100644
index 000000000000..314b2b52025f
--- /dev/null
+++ b/arch/arm64/include/asm/mem_encrypt.h
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef __ASM_MEM_ENCRYPT_H
+#define __ASM_MEM_ENCRYPT_H
+
+#include <asm/rsi.h>
+
+struct device;
+
+struct arm64_mem_crypt_ops {
+ int (*encrypt)(unsigned long addr, int numpages);
+ int (*decrypt)(unsigned long addr, int numpages);
+};
+
+int arm64_mem_crypt_ops_register(const struct arm64_mem_crypt_ops *ops);
+
+int set_memory_encrypted(unsigned long addr, int numpages);
+int set_memory_decrypted(unsigned long addr, int numpages);
+
+int realm_register_memory_enc_ops(void);
+
+static inline bool force_dma_unencrypted(struct device *dev)
+{
+ return is_realm_world();
+}
+
+/*
+ * For Arm CCA guests, canonical addresses are "encrypted", so no changes
+ * required for dma_addr_encrypted().
+ * The unencrypted DMA buffers must be accessed via the unprotected IPA,
+ * "top IPA bit" set.
+ */
+#define dma_addr_unencrypted(x) ((x) | PROT_NS_SHARED)
+
+/* Clear the "top" IPA bit while converting back */
+#define dma_addr_canonical(x) ((x) & ~PROT_NS_SHARED)
+
+#endif /* __ASM_MEM_ENCRYPT_H */
diff --git a/arch/arm64/include/asm/memblock.h b/arch/arm64/include/asm/memblock.h
deleted file mode 100644
index 6afeed2467f1..000000000000
--- a/arch/arm64/include/asm/memblock.h
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- * Copyright (C) 2012 ARM Ltd.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-#ifndef __ASM_MEMBLOCK_H
-#define __ASM_MEMBLOCK_H
-
-extern void arm64_memblock_init(void);
-
-#endif
diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index 20925bcf4e2a..9d54b2ea49d6 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -1,106 +1,268 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Based on arch/arm/include/asm/memory.h
*
* Copyright (C) 2000-2002 Russell King
* Copyright (C) 2012 ARM Ltd.
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- *
* Note: this file should not be included by non-asm/.h files
*/
#ifndef __ASM_MEMORY_H
#define __ASM_MEMORY_H
-#include <linux/compiler.h>
#include <linux/const.h>
-#include <linux/types.h>
-#include <asm/sizes.h>
+#include <linux/sizes.h>
+#include <asm/page-def.h>
/*
- * Allow for constants defined here to be used from assembly code
- * by prepending the UL suffix only with actual C code compilation.
+ * Size of the PCI I/O space. This must remain a power of two so that
+ * IO_SPACE_LIMIT acts as a mask for the low bits of I/O addresses.
*/
-#define UL(x) _AC(x, UL)
+#define PCI_IO_SIZE SZ_16M
/*
- * PAGE_OFFSET - the virtual address of the start of the kernel image.
+ * VMEMMAP_SIZE - allows the whole linear region to be covered by
+ * a struct page array
+ *
+ * If we are configured with a 52-bit kernel VA then our VMEMMAP_SIZE
+ * needs to cover the memory region from the beginning of the 52-bit
+ * PAGE_OFFSET all the way to PAGE_END for 48-bit. This allows us to
+ * keep a constant PAGE_OFFSET and "fallback" to using the higher end
+ * of the VMEMMAP where 52-bit support is not available in hardware.
+ */
+#define VMEMMAP_RANGE (_PAGE_END(VA_BITS_MIN) - PAGE_OFFSET)
+#define VMEMMAP_SIZE ((VMEMMAP_RANGE >> PAGE_SHIFT) * sizeof(struct page))
+
+/*
+ * PAGE_OFFSET - the virtual address of the start of the linear map, at the
+ * start of the TTBR1 address space.
+ * PAGE_END - the end of the linear map, where all other kernel mappings begin.
+ * KIMAGE_VADDR - the virtual address of the start of the kernel image.
* VA_BITS - the maximum number of bits for virtual addresses.
- * TASK_SIZE - the maximum size of a user space task.
- * TASK_UNMAPPED_BASE - the lower boundary of the mmap VM area.
- * The module space lives between the addresses given by TASK_SIZE
- * and PAGE_OFFSET - it must be within 128MB of the kernel text.
- */
-#define PAGE_OFFSET UL(0xffffffc000000000)
-#define MODULES_END (PAGE_OFFSET)
-#define MODULES_VADDR (MODULES_END - SZ_64M)
-#define EARLYCON_IOBASE (MODULES_VADDR - SZ_4M)
-#define VA_BITS (39)
-#define TASK_SIZE_64 (UL(1) << VA_BITS)
-
-#ifdef CONFIG_COMPAT
-#define TASK_SIZE_32 UL(0x100000000)
-#define TASK_SIZE (test_thread_flag(TIF_32BIT) ? \
- TASK_SIZE_32 : TASK_SIZE_64)
+ */
+#define VA_BITS (CONFIG_ARM64_VA_BITS)
+#define _PAGE_OFFSET(va) (-(UL(1) << (va)))
+#define PAGE_OFFSET (_PAGE_OFFSET(VA_BITS))
+#define KIMAGE_VADDR (MODULES_END)
+#define MODULES_END (MODULES_VADDR + MODULES_VSIZE)
+#define MODULES_VADDR (_PAGE_END(VA_BITS_MIN))
+#define MODULES_VSIZE (SZ_2G)
+#define VMEMMAP_START (VMEMMAP_END - VMEMMAP_SIZE)
+#define VMEMMAP_END (-UL(SZ_1G))
+#define PCI_IO_START (VMEMMAP_END + SZ_8M)
+#define PCI_IO_END (PCI_IO_START + PCI_IO_SIZE)
+#define FIXADDR_TOP (-UL(SZ_8M))
+
+#if VA_BITS > 48
+#ifdef CONFIG_ARM64_16K_PAGES
+#define VA_BITS_MIN (47)
+#else
+#define VA_BITS_MIN (48)
+#endif
+#else
+#define VA_BITS_MIN (VA_BITS)
+#endif
+
+#define _PAGE_END(va) (-(UL(1) << ((va) - 1)))
+
+#define KERNEL_START _text
+#define KERNEL_END _end
+
+/*
+ * Generic and Software Tag-Based KASAN modes require 1/8th and 1/16th of the
+ * kernel virtual address space for storing the shadow memory respectively.
+ *
+ * The mapping between a virtual memory address and its corresponding shadow
+ * memory address is defined based on the formula:
+ *
+ * shadow_addr = (addr >> KASAN_SHADOW_SCALE_SHIFT) + KASAN_SHADOW_OFFSET
+ *
+ * where KASAN_SHADOW_SCALE_SHIFT is the order of the number of bits that map
+ * to a single shadow byte and KASAN_SHADOW_OFFSET is a constant that offsets
+ * the mapping. Note that KASAN_SHADOW_OFFSET does not point to the start of
+ * the shadow memory region.
+ *
+ * Based on this mapping, we define two constants:
+ *
+ * KASAN_SHADOW_START: the start of the shadow memory region;
+ * KASAN_SHADOW_END: the end of the shadow memory region.
+ *
+ * KASAN_SHADOW_END is defined first as the shadow address that corresponds to
+ * the upper bound of possible virtual kernel memory addresses UL(1) << 64
+ * according to the mapping formula.
+ *
+ * KASAN_SHADOW_START is defined second based on KASAN_SHADOW_END. The shadow
+ * memory start must map to the lowest possible kernel virtual memory address
+ * and thus it depends on the actual bitness of the address space.
+ *
+ * As KASAN inserts redzones between stack variables, this increases the stack
+ * memory usage significantly. Thus, we double the (minimum) stack size.
+ */
+#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
+#define KASAN_SHADOW_OFFSET _AC(CONFIG_KASAN_SHADOW_OFFSET, UL)
+#define KASAN_SHADOW_END ((UL(1) << (64 - KASAN_SHADOW_SCALE_SHIFT)) + KASAN_SHADOW_OFFSET)
+#define _KASAN_SHADOW_START(va) (KASAN_SHADOW_END - (UL(1) << ((va) - KASAN_SHADOW_SCALE_SHIFT)))
+#define KASAN_SHADOW_START _KASAN_SHADOW_START(vabits_actual)
+#define PAGE_END KASAN_SHADOW_START
+#define KASAN_THREAD_SHIFT 1
#else
-#define TASK_SIZE TASK_SIZE_64
-#endif /* CONFIG_COMPAT */
+#define KASAN_THREAD_SHIFT 0
+#define PAGE_END (_PAGE_END(VA_BITS_MIN))
+#endif /* CONFIG_KASAN */
+
+#define DIRECT_MAP_PHYSMEM_END __pa(PAGE_END - 1)
+
+#define MIN_THREAD_SHIFT (14 + KASAN_THREAD_SHIFT)
-#define TASK_UNMAPPED_BASE (PAGE_ALIGN(TASK_SIZE / 4))
+/*
+ * VMAP'd stacks are allocated at page granularity, so we must ensure that such
+ * stacks are a multiple of page size.
+ */
+#if (MIN_THREAD_SHIFT < PAGE_SHIFT)
+#define THREAD_SHIFT PAGE_SHIFT
+#else
+#define THREAD_SHIFT MIN_THREAD_SHIFT
+#endif
-#if TASK_SIZE_64 > MODULES_VADDR
-#error Top of 64-bit user space clashes with start of module space
+#if THREAD_SHIFT >= PAGE_SHIFT
+#define THREAD_SIZE_ORDER (THREAD_SHIFT - PAGE_SHIFT)
#endif
+#define THREAD_SIZE (UL(1) << THREAD_SHIFT)
+
/*
- * Physical vs virtual RAM address space conversion. These are
- * private definitions which should NOT be used outside memory.h
- * files. Use virt_to_phys/phys_to_virt/__pa/__va instead.
+ * By aligning VMAP'd stacks to 2 * THREAD_SIZE, we can detect overflow by
+ * checking sp & (1 << THREAD_SHIFT), which we can do cheaply in the entry
+ * assembly.
*/
-#define __virt_to_phys(x) (((phys_addr_t)(x) - PAGE_OFFSET + PHYS_OFFSET))
-#define __phys_to_virt(x) ((unsigned long)((x) - PHYS_OFFSET + PAGE_OFFSET))
+#define THREAD_ALIGN (2 * THREAD_SIZE)
+
+#define IRQ_STACK_SIZE THREAD_SIZE
+
+#define OVERFLOW_STACK_SIZE SZ_4K
+
+#define NVHE_STACK_SHIFT PAGE_SHIFT
+#define NVHE_STACK_SIZE (UL(1) << NVHE_STACK_SHIFT)
/*
- * Convert a physical address to a Page Frame Number and back
+ * With the minimum frame size of [x29, x30], exactly half the combined
+ * sizes of the hyp and overflow stacks is the maximum size needed to
+ * save the unwinded stacktrace; plus an additional entry to delimit the
+ * end.
*/
-#define __phys_to_pfn(paddr) ((unsigned long)((paddr) >> PAGE_SHIFT))
-#define __pfn_to_phys(pfn) ((phys_addr_t)(pfn) << PAGE_SHIFT)
+#define NVHE_STACKTRACE_SIZE ((OVERFLOW_STACK_SIZE + NVHE_STACK_SIZE) / 2 + sizeof(long))
/*
- * Convert a page to/from a physical address
+ * Alignment of kernel segments (e.g. .text, .data).
+ *
+ * 4 KB granule: 16 level 3 entries, with contiguous bit
+ * 16 KB granule: 4 level 3 entries, without contiguous bit
+ * 64 KB granule: 1 level 3 entry
*/
-#define page_to_phys(page) (__pfn_to_phys(page_to_pfn(page)))
-#define phys_to_page(phys) (pfn_to_page(__phys_to_pfn(phys)))
+#define SEGMENT_ALIGN SZ_64K
/*
* Memory types available.
+ *
+ * IMPORTANT: MT_NORMAL must be index 0 since vm_get_page_prot() may 'or' in
+ * the MT_NORMAL_TAGGED memory type for PROT_MTE mappings. Note
+ * that protection_map[] only contains MT_NORMAL attributes.
*/
-#define MT_DEVICE_nGnRnE 0
-#define MT_DEVICE_nGnRE 1
-#define MT_DEVICE_GRE 2
-#define MT_NORMAL_NC 3
-#define MT_NORMAL 4
+#define MT_NORMAL 0
+#define MT_NORMAL_TAGGED 1
+#define MT_NORMAL_NC 2
+#define MT_DEVICE_nGnRnE 3
+#define MT_DEVICE_nGnRE 4
/*
* Memory types for Stage-2 translation
*/
#define MT_S2_NORMAL 0xf
+#define MT_S2_NORMAL_NC 0x5
#define MT_S2_DEVICE_nGnRE 0x1
-#ifndef __ASSEMBLY__
+/*
+ * Memory types for Stage-2 translation when ID_AA64MMFR2_EL1.FWB is 0001
+ * Stage-2 enforces Normal-WB and Device-nGnRE
+ */
+#define MT_S2_FWB_NORMAL 6
+#define MT_S2_FWB_NORMAL_NC 5
+#define MT_S2_FWB_DEVICE_nGnRE 1
+
+#ifdef CONFIG_ARM64_4K_PAGES
+#define IOREMAP_MAX_ORDER (PUD_SHIFT)
+#else
+#define IOREMAP_MAX_ORDER (PMD_SHIFT)
+#endif
+
+/*
+ * Open-coded (swapper_pg_dir - reserved_pg_dir) as this cannot be calculated
+ * until link time.
+ */
+#define RESERVED_SWAPPER_OFFSET (PAGE_SIZE)
+
+/*
+ * Open-coded (swapper_pg_dir - tramp_pg_dir) as this cannot be calculated
+ * until link time.
+ */
+#define TRAMP_SWAPPER_OFFSET (2 * PAGE_SIZE)
+
+#ifndef __ASSEMBLER__
+
+#include <linux/bitops.h>
+#include <linux/compiler.h>
+#include <linux/mmdebug.h>
+#include <linux/types.h>
+#include <asm/boot.h>
+#include <asm/bug.h>
+#include <asm/sections.h>
+#include <asm/sysreg.h>
+
+static inline u64 __pure read_tcr(void)
+{
+ u64 tcr;
+
+ // read_sysreg() uses asm volatile, so avoid it here
+ asm("mrs %0, tcr_el1" : "=r"(tcr));
+ return tcr;
+}
+
+#if VA_BITS > 48
+// For reasons of #include hell, we can't use TCR_T1SZ_OFFSET/TCR_T1SZ_MASK here
+#define vabits_actual (64 - ((read_tcr() >> 16) & 63))
+#else
+#define vabits_actual ((u64)VA_BITS)
+#endif
-extern phys_addr_t memstart_addr;
+extern s64 memstart_addr;
/* PHYS_OFFSET - the physical address of the start of memory. */
-#define PHYS_OFFSET ({ memstart_addr; })
+#define PHYS_OFFSET ({ VM_BUG_ON(memstart_addr & 1); memstart_addr; })
+
+/* the offset between the kernel virtual and physical mappings */
+extern u64 kimage_voffset;
+
+static inline unsigned long kaslr_offset(void)
+{
+ return (u64)&_text - KIMAGE_VADDR;
+}
+
+#ifdef CONFIG_RANDOMIZE_BASE
+void kaslr_init(void);
+static inline bool kaslr_enabled(void)
+{
+ extern bool __kaslr_is_enabled;
+ return __kaslr_is_enabled;
+}
+#else
+static inline void kaslr_init(void) { }
+static inline bool kaslr_enabled(void) { return false; }
+#endif
+
+/*
+ * Allow all memory at the discovery stage. We will clip it later.
+ */
+#define MIN_MEMBLOCK_ADDR 0
+#define MAX_MEMBLOCK_ADDR U64_MAX
/*
* PFNs are used to describe any physical page; this means
@@ -113,39 +275,173 @@ extern phys_addr_t memstart_addr;
#define PHYS_PFN_OFFSET (PHYS_OFFSET >> PAGE_SHIFT)
/*
+ * When dealing with data aborts, watchpoints, or instruction traps we may end
+ * up with a tagged userland pointer. Clear the tag to get a sane pointer to
+ * pass on to access_ok(), for instance.
+ */
+#define __untagged_addr(addr) \
+ ((__force __typeof__(addr))sign_extend64((__force u64)(addr), 55))
+
+#define untagged_addr(addr) ({ \
+ u64 __addr = (__force u64)(addr); \
+ __addr &= __untagged_addr(__addr); \
+ (__force __typeof__(addr))__addr; \
+})
+
+#if defined(CONFIG_KASAN_SW_TAGS) || defined(CONFIG_KASAN_HW_TAGS)
+#define __tag_shifted(tag) ((u64)(tag) << 56)
+#define __tag_reset(addr) __untagged_addr(addr)
+#define __tag_get(addr) (__u8)((u64)(addr) >> 56)
+#else
+#define __tag_shifted(tag) 0UL
+#define __tag_reset(addr) (addr)
+#define __tag_get(addr) 0
+#endif /* CONFIG_KASAN_SW_TAGS || CONFIG_KASAN_HW_TAGS */
+
+static inline const void *__tag_set(const void *addr, u8 tag)
+{
+ u64 __addr = (u64)addr & ~__tag_shifted(0xff);
+ return (const void *)(__addr | __tag_shifted(tag));
+}
+
+#ifdef CONFIG_KASAN_HW_TAGS
+#define arch_enable_tag_checks_sync() mte_enable_kernel_sync()
+#define arch_enable_tag_checks_async() mte_enable_kernel_async()
+#define arch_enable_tag_checks_asymm() mte_enable_kernel_asymm()
+#define arch_enable_tag_checks_write_only() mte_enable_kernel_store_only()
+#define arch_suppress_tag_checks_start() mte_enable_tco()
+#define arch_suppress_tag_checks_stop() mte_disable_tco()
+#define arch_force_async_tag_fault() mte_check_tfsr_exit()
+#define arch_get_random_tag() mte_get_random_tag()
+#define arch_get_mem_tag(addr) mte_get_mem_tag(addr)
+#define arch_set_mem_tag_range(addr, size, tag, init) \
+ mte_set_mem_tag_range((addr), (size), (tag), (init))
+#endif /* CONFIG_KASAN_HW_TAGS */
+
+/*
+ * Physical vs virtual RAM address space conversion. These are
+ * private definitions which should NOT be used outside memory.h
+ * files. Use virt_to_phys/phys_to_virt/__pa/__va instead.
+ */
+
+
+/*
+ * Check whether an arbitrary address is within the linear map, which
+ * lives in the [PAGE_OFFSET, PAGE_END) interval at the bottom of the
+ * kernel's TTBR1 address range.
+ */
+#define __is_lm_address(addr) (((u64)(addr) - PAGE_OFFSET) < (PAGE_END - PAGE_OFFSET))
+
+#define __lm_to_phys(addr) (((addr) - PAGE_OFFSET) + PHYS_OFFSET)
+#define __kimg_to_phys(addr) ((addr) - kimage_voffset)
+
+#define __virt_to_phys_nodebug(x) ({ \
+ phys_addr_t __x = (phys_addr_t)(__tag_reset(x)); \
+ __is_lm_address(__x) ? __lm_to_phys(__x) : __kimg_to_phys(__x); \
+})
+
+#define __pa_symbol_nodebug(x) __kimg_to_phys((phys_addr_t)(x))
+
+#ifdef CONFIG_DEBUG_VIRTUAL
+extern phys_addr_t __virt_to_phys(unsigned long x);
+extern phys_addr_t __phys_addr_symbol(unsigned long x);
+#else
+#define __virt_to_phys(x) __virt_to_phys_nodebug(x)
+#define __phys_addr_symbol(x) __pa_symbol_nodebug(x)
+#endif /* CONFIG_DEBUG_VIRTUAL */
+
+#define __phys_to_virt(x) ((unsigned long)((x) - PHYS_OFFSET) | PAGE_OFFSET)
+#define __phys_to_kimg(x) ((unsigned long)((x) + kimage_voffset))
+
+/*
* Note: Drivers should NOT use these. They are the wrong
* translation for translating DMA addresses. Use the driver
* DMA support - see dma-mapping.h.
*/
+#define virt_to_phys virt_to_phys
static inline phys_addr_t virt_to_phys(const volatile void *x)
{
return __virt_to_phys((unsigned long)(x));
}
+#define phys_to_virt phys_to_virt
static inline void *phys_to_virt(phys_addr_t x)
{
return (void *)(__phys_to_virt(x));
}
+/* Needed already here for resolving __phys_to_pfn() in virt_to_pfn() */
+#include <asm-generic/memory_model.h>
+
+static inline unsigned long virt_to_pfn(const void *kaddr)
+{
+ return __phys_to_pfn(virt_to_phys(kaddr));
+}
+
/*
* Drivers should NOT use these either.
*/
#define __pa(x) __virt_to_phys((unsigned long)(x))
+#define __pa_symbol(x) __phys_addr_symbol(RELOC_HIDE((unsigned long)(x), 0))
+#define __pa_nodebug(x) __virt_to_phys_nodebug((unsigned long)(x))
#define __va(x) ((void *)__phys_to_virt((phys_addr_t)(x)))
#define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT)
+#define sym_to_pfn(x) __phys_to_pfn(__pa_symbol(x))
/*
- * virt_to_page(k) convert a _valid_ virtual address to struct page *
- * virt_addr_valid(k) indicates whether a virtual address is valid
+ * virt_to_page(x) convert a _valid_ virtual address to struct page *
+ * virt_addr_valid(x) indicates whether a virtual address is valid
*/
-#define ARCH_PFN_OFFSET PHYS_PFN_OFFSET
-#define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT)
-#define virt_addr_valid(kaddr) (((void *)(kaddr) >= (void *)PAGE_OFFSET) && \
- ((void *)(kaddr) < (void *)high_memory))
+#if defined(CONFIG_DEBUG_VIRTUAL)
+#define page_to_virt(x) ({ \
+ __typeof__(x) __page = x; \
+ void *__addr = __va(page_to_phys(__page)); \
+ (void *)__tag_set((const void *)__addr, page_kasan_tag(__page));\
+})
+#define virt_to_page(x) pfn_to_page(virt_to_pfn(x))
+#else
+#define page_to_virt(x) ({ \
+ __typeof__(x) __page = x; \
+ u64 __idx = ((u64)__page - VMEMMAP_START) / sizeof(struct page);\
+ u64 __addr = PAGE_OFFSET + (__idx * PAGE_SIZE); \
+ (void *)__tag_set((const void *)__addr, page_kasan_tag(__page));\
+})
+
+#define virt_to_page(x) ({ \
+ u64 __idx = (__tag_reset((u64)x) - PAGE_OFFSET) / PAGE_SIZE; \
+ u64 __addr = VMEMMAP_START + (__idx * sizeof(struct page)); \
+ (struct page *)__addr; \
+})
+#endif /* CONFIG_DEBUG_VIRTUAL */
-#endif
+#define virt_addr_valid(addr) ({ \
+ __typeof__(addr) __addr = __tag_reset(addr); \
+ __is_lm_address(__addr) && pfn_is_map_memory(virt_to_pfn(__addr)); \
+})
-#include <asm-generic/memory_model.h>
+void dump_mem_limit(void);
+#endif /* !__ASSEMBLER__ */
+/*
+ * Given that the GIC architecture permits ITS implementations that can only be
+ * configured with a LPI table address once, GICv3 systems with many CPUs may
+ * end up reserving a lot of different regions after a kexec for their LPI
+ * tables (one per CPU), as we are forced to reuse the same memory after kexec
+ * (and thus reserve it persistently with EFI beforehand)
+ */
+#if defined(CONFIG_EFI) && defined(CONFIG_ARM_GIC_V3_ITS)
+# define INIT_MEMBLOCK_RESERVED_REGIONS (INIT_MEMBLOCK_REGIONS + NR_CPUS + 1)
#endif
+
+/*
+ * memory regions which marked with flag MEMBLOCK_NOMAP(for example, the memory
+ * of the EFI_UNUSABLE_MEMORY type) may divide a continuous memory block into
+ * multiple parts. As a result, the number of memory regions is large.
+ */
+#ifdef CONFIG_EFI
+#define INIT_MEMBLOCK_MEMORY_REGIONS (INIT_MEMBLOCK_REGIONS * 8)
+#endif
+
+
+#endif /* __ASM_MEMORY_H */
diff --git a/arch/arm64/include/asm/mman.h b/arch/arm64/include/asm/mman.h
new file mode 100644
index 000000000000..8770c7ee759f
--- /dev/null
+++ b/arch/arm64/include/asm/mman.h
@@ -0,0 +1,98 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_MMAN_H__
+#define __ASM_MMAN_H__
+
+#include <uapi/asm/mman.h>
+
+#ifndef BUILD_VDSO
+#include <linux/compiler.h>
+#include <linux/fs.h>
+#include <linux/hugetlb.h>
+#include <linux/shmem_fs.h>
+#include <linux/types.h>
+
+static inline vm_flags_t arch_calc_vm_prot_bits(unsigned long prot,
+ unsigned long pkey)
+{
+ vm_flags_t ret = 0;
+
+ if (system_supports_bti() && (prot & PROT_BTI))
+ ret |= VM_ARM64_BTI;
+
+ if (system_supports_mte() && (prot & PROT_MTE))
+ ret |= VM_MTE;
+
+#ifdef CONFIG_ARCH_HAS_PKEYS
+ if (system_supports_poe()) {
+ ret |= pkey & BIT(0) ? VM_PKEY_BIT0 : 0;
+ ret |= pkey & BIT(1) ? VM_PKEY_BIT1 : 0;
+ ret |= pkey & BIT(2) ? VM_PKEY_BIT2 : 0;
+ }
+#endif
+
+ return ret;
+}
+#define arch_calc_vm_prot_bits(prot, pkey) arch_calc_vm_prot_bits(prot, pkey)
+
+static inline vm_flags_t arch_calc_vm_flag_bits(struct file *file,
+ unsigned long flags)
+{
+ /*
+ * Only allow MTE on anonymous mappings as these are guaranteed to be
+ * backed by tags-capable memory. The vm_flags may be overridden by a
+ * filesystem supporting MTE (RAM-based).
+ */
+ if (system_supports_mte()) {
+ if (flags & (MAP_ANONYMOUS | MAP_HUGETLB))
+ return VM_MTE_ALLOWED;
+ if (shmem_file(file) || is_file_hugepages(file))
+ return VM_MTE_ALLOWED;
+ }
+
+ return 0;
+}
+#define arch_calc_vm_flag_bits(file, flags) arch_calc_vm_flag_bits(file, flags)
+
+static inline bool arch_validate_prot(unsigned long prot,
+ unsigned long addr __always_unused)
+{
+ unsigned long supported = PROT_READ | PROT_WRITE | PROT_EXEC | PROT_SEM;
+
+ if (system_supports_bti())
+ supported |= PROT_BTI;
+
+ if (system_supports_mte())
+ supported |= PROT_MTE;
+
+ return (prot & ~supported) == 0;
+}
+#define arch_validate_prot(prot, addr) arch_validate_prot(prot, addr)
+
+static inline bool arch_validate_flags(vm_flags_t vm_flags)
+{
+ if (system_supports_mte()) {
+ /*
+ * only allow VM_MTE if VM_MTE_ALLOWED has been set
+ * previously
+ */
+ if ((vm_flags & VM_MTE) && !(vm_flags & VM_MTE_ALLOWED))
+ return false;
+ }
+
+ if (system_supports_gcs() && (vm_flags & VM_SHADOW_STACK)) {
+ /* An executable GCS isn't a good idea. */
+ if (vm_flags & VM_EXEC)
+ return false;
+
+ /* The memory management core should prevent this */
+ VM_WARN_ON(vm_flags & VM_SHARED);
+ }
+
+ return true;
+
+}
+#define arch_validate_flags(vm_flags) arch_validate_flags(vm_flags)
+
+#endif /* !BUILD_VDSO */
+
+#endif /* ! __ASM_MMAN_H__ */
diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h
index 2494fc01896a..137a173df1ff 100644
--- a/arch/arm64/include/asm/mmu.h
+++ b/arch/arm64/include/asm/mmu.h
@@ -1,31 +1,116 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (C) 2012 ARM Ltd.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __ASM_MMU_H
#define __ASM_MMU_H
+#include <asm/cputype.h>
+
+#define MMCF_AARCH32 0x1 /* mm context flag for AArch32 executables */
+#define USER_ASID_BIT 48
+#define USER_ASID_FLAG (UL(1) << USER_ASID_BIT)
+#define TTBR_ASID_MASK (UL(0xffff) << 48)
+
+#ifndef __ASSEMBLER__
+
+#include <linux/refcount.h>
+#include <asm/cpufeature.h>
+
+enum pgtable_type {
+ TABLE_PTE,
+ TABLE_PMD,
+ TABLE_PUD,
+ TABLE_P4D,
+};
+
typedef struct {
- unsigned int id;
- raw_spinlock_t id_lock;
- void *vdso;
+ atomic64_t id;
+#ifdef CONFIG_COMPAT
+ void *sigpage;
+#endif
+ refcount_t pinned;
+ void *vdso;
+ unsigned long flags;
+ u8 pkey_allocation_map;
} mm_context_t;
-#define ASID(mm) ((mm)->context.id & 0xffff)
+/*
+ * We use atomic64_read() here because the ASID for an 'mm_struct' can
+ * be reallocated when scheduling one of its threads following a
+ * rollover event (see new_context() and flush_context()). In this case,
+ * a concurrent TLBI (e.g. via try_to_unmap_one() and ptep_clear_flush())
+ * may use a stale ASID. This is fine in principle as the new ASID is
+ * guaranteed to be clean in the TLB, but the TLBI routines have to take
+ * care to handle the following race:
+ *
+ * CPU 0 CPU 1 CPU 2
+ *
+ * // ptep_clear_flush(mm)
+ * xchg_relaxed(pte, 0)
+ * DSB ISHST
+ * old = ASID(mm)
+ * | <rollover>
+ * | new = new_context(mm)
+ * \-----------------> atomic_set(mm->context.id, new)
+ * cpu_switch_mm(mm)
+ * // Hardware walk of pte using new ASID
+ * TLBI(old)
+ *
+ * In this scenario, the barrier on CPU 0 and the dependency on CPU 1
+ * ensure that the page-table walker on CPU 1 *must* see the invalid PTE
+ * written by CPU 0.
+ */
+#define ASID(mm) (atomic64_read(&(mm)->context.id) & 0xffff)
+static inline bool arm64_kernel_unmapped_at_el0(void)
+{
+ return alternative_has_cap_unlikely(ARM64_UNMAP_KERNEL_AT_EL0);
+}
+
+extern void arm64_memblock_init(void);
extern void paging_init(void);
-extern void setup_mm_for_reboot(void);
-extern void __iomem *early_io_map(phys_addr_t phys, unsigned long virt);
+extern void bootmem_init(void);
+extern void create_mapping_noalloc(phys_addr_t phys, unsigned long virt,
+ phys_addr_t size, pgprot_t prot);
+extern void create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys,
+ unsigned long virt, phys_addr_t size,
+ pgprot_t prot, bool page_mappings_only);
+extern void *fixmap_remap_fdt(phys_addr_t dt_phys, int *size, pgprot_t prot);
+extern void mark_linear_text_alias_ro(void);
+extern int split_kernel_leaf_mapping(unsigned long start, unsigned long end);
+extern void linear_map_maybe_split_to_ptes(void);
+
+/*
+ * This check is triggered during the early boot before the cpufeature
+ * is initialised. Checking the status on the local CPU allows the boot
+ * CPU to detect the need for non-global mappings and thus avoiding a
+ * pagetable re-write after all the CPUs are booted. This check will be
+ * anyway run on individual CPUs, allowing us to get the consistent
+ * state once the SMP CPUs are up and thus make the switch to non-global
+ * mappings if required.
+ */
+static inline bool kaslr_requires_kpti(void)
+{
+ /*
+ * E0PD does a similar job to KPTI so can be used instead
+ * where available.
+ */
+ if (IS_ENABLED(CONFIG_ARM64_E0PD)) {
+ u64 mmfr2 = read_sysreg_s(SYS_ID_AA64MMFR2_EL1);
+ if (cpuid_feature_extract_unsigned_field(mmfr2,
+ ID_AA64MMFR2_EL1_E0PD_SHIFT))
+ return false;
+ }
+
+ return true;
+}
+
+#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
+void kpti_install_ng_mappings(void);
+#else
+static inline void kpti_install_ng_mappings(void) {}
+#endif
+#endif /* !__ASSEMBLER__ */
#endif
diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h
index a9eee33dfa62..cc80af59c69e 100644
--- a/arch/arm64/include/asm/mmu_context.h
+++ b/arch/arm64/include/asm/mmu_context.h
@@ -1,161 +1,327 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Based on arch/arm/include/asm/mmu_context.h
*
* Copyright (C) 1996 Russell King.
* Copyright (C) 2012 ARM Ltd.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __ASM_MMU_CONTEXT_H
#define __ASM_MMU_CONTEXT_H
+#ifndef __ASSEMBLER__
+
#include <linux/compiler.h>
#include <linux/sched.h>
+#include <linux/sched/hotplug.h>
+#include <linux/mm_types.h>
+#include <linux/pgtable.h>
+#include <linux/pkeys.h>
#include <asm/cacheflush.h>
+#include <asm/cpufeature.h>
+#include <asm/daifflags.h>
+#include <asm/gcs.h>
#include <asm/proc-fns.h>
-#include <asm-generic/mm_hooks.h>
#include <asm/cputype.h>
-#include <asm/pgtable.h>
+#include <asm/sysreg.h>
+#include <asm/tlbflush.h>
-#define MAX_ASID_BITS 16
+extern bool rodata_full;
-extern unsigned int cpu_last_asid;
-
-void __init_new_context(struct task_struct *tsk, struct mm_struct *mm);
-void __new_context(struct mm_struct *mm);
-
-#ifdef CONFIG_PID_IN_CONTEXTIDR
-static inline void contextidr_thread_switch(struct task_struct *next)
-{
- asm(
- " msr contextidr_el1, %0\n"
- " isb"
- :
- : "r" (task_pid_nr(next)));
-}
-#else
static inline void contextidr_thread_switch(struct task_struct *next)
{
+ if (!IS_ENABLED(CONFIG_PID_IN_CONTEXTIDR))
+ return;
+
+ write_sysreg(task_pid_nr(next), contextidr_el1);
+ isb();
}
-#endif
/*
- * Set TTBR0 to empty_zero_page. No translations will be possible via TTBR0.
+ * Set TTBR0 to reserved_pg_dir. No translations will be possible via TTBR0.
*/
+static inline void cpu_set_reserved_ttbr0_nosync(void)
+{
+ unsigned long ttbr = phys_to_ttbr(__pa_symbol(reserved_pg_dir));
+
+ write_sysreg(ttbr, ttbr0_el1);
+}
+
static inline void cpu_set_reserved_ttbr0(void)
{
- unsigned long ttbr = page_to_phys(empty_zero_page);
+ cpu_set_reserved_ttbr0_nosync();
+ isb();
+}
- asm(
- " msr ttbr0_el1, %0 // set TTBR0\n"
- " isb"
- :
- : "r" (ttbr));
+void cpu_do_switch_mm(phys_addr_t pgd_phys, struct mm_struct *mm);
+
+static inline void cpu_switch_mm(pgd_t *pgd, struct mm_struct *mm)
+{
+ BUG_ON(pgd == swapper_pg_dir);
+ cpu_do_switch_mm(virt_to_phys(pgd),mm);
}
-static inline void switch_new_context(struct mm_struct *mm)
+/*
+ * Ensure TCR.T0SZ is set to the provided value.
+ */
+static inline void __cpu_set_tcr_t0sz(unsigned long t0sz)
{
- unsigned long flags;
+ unsigned long tcr = read_sysreg(tcr_el1);
- __new_context(mm);
+ if ((tcr & TCR_EL1_T0SZ_MASK) == t0sz)
+ return;
- local_irq_save(flags);
- cpu_switch_mm(mm->pgd, mm);
- local_irq_restore(flags);
+ tcr &= ~TCR_EL1_T0SZ_MASK;
+ tcr |= t0sz;
+ write_sysreg(tcr, tcr_el1);
+ isb();
}
-static inline void check_and_switch_context(struct mm_struct *mm,
- struct task_struct *tsk)
+/*
+ * Remove the idmap from TTBR0_EL1 and install the pgd of the active mm.
+ *
+ * The idmap lives in the same VA range as userspace, but uses global entries
+ * and may use a different TCR_EL1.T0SZ. To avoid issues resulting from
+ * speculative TLB fetches, we must temporarily install the reserved page
+ * tables while we invalidate the TLBs and set up the correct TCR_EL1.T0SZ.
+ *
+ * If current is a not a user task, the mm covers the TTBR1_EL1 page tables,
+ * which should not be installed in TTBR0_EL1. In this case we can leave the
+ * reserved page tables in place.
+ */
+static inline void cpu_uninstall_idmap(void)
{
- /*
- * Required during context switch to avoid speculative page table
- * walking with the wrong TTBR.
- */
+ struct mm_struct *mm = current->active_mm;
+
cpu_set_reserved_ttbr0();
+ local_flush_tlb_all();
+ __cpu_set_tcr_t0sz(TCR_T0SZ(vabits_actual));
- if (!((mm->context.id ^ cpu_last_asid) >> MAX_ASID_BITS))
- /*
- * The ASID is from the current generation, just switch to the
- * new pgd. This condition is only true for calls from
- * context_switch() and interrupts are already disabled.
- */
+ if (mm != &init_mm && !system_uses_ttbr0_pan())
cpu_switch_mm(mm->pgd, mm);
- else if (irqs_disabled())
- /*
- * Defer the new ASID allocation until after the context
- * switch critical region since __new_context() cannot be
- * called with interrupts disabled.
- */
- set_ti_thread_flag(task_thread_info(tsk), TIF_SWITCH_MM);
- else
- /*
- * That is a direct call to switch_mm() or activate_mm() with
- * interrupts enabled and a new context.
- */
- switch_new_context(mm);
}
-#define init_new_context(tsk,mm) (__init_new_context(tsk,mm),0)
-#define destroy_context(mm) do { } while(0)
-
-#define finish_arch_post_lock_switch \
- finish_arch_post_lock_switch
-static inline void finish_arch_post_lock_switch(void)
+static inline void cpu_install_idmap(void)
{
- if (test_and_clear_thread_flag(TIF_SWITCH_MM)) {
- struct mm_struct *mm = current->mm;
- unsigned long flags;
-
- __new_context(mm);
+ cpu_set_reserved_ttbr0();
+ local_flush_tlb_all();
+ __cpu_set_tcr_t0sz(TCR_T0SZ(IDMAP_VA_BITS));
- local_irq_save(flags);
- cpu_switch_mm(mm->pgd, mm);
- local_irq_restore(flags);
- }
+ cpu_switch_mm(lm_alias(idmap_pg_dir), &init_mm);
}
/*
- * This is called when "tsk" is about to enter lazy TLB mode.
+ * Load our new page tables. A strict BBM approach requires that we ensure that
+ * TLBs are free of any entries that may overlap with the global mappings we are
+ * about to install.
*
- * mm: describes the currently active mm context
- * tsk: task which is entering lazy tlb
- * cpu: cpu number which is entering lazy tlb
+ * For a real hibernate/resume/kexec cycle TTBR0 currently points to a zero
+ * page, but TLBs may contain stale ASID-tagged entries (e.g. for EFI runtime
+ * services), while for a userspace-driven test_resume cycle it points to
+ * userspace page tables (and we must point it at a zero page ourselves).
*
- * tsk->mm will be NULL
+ * We change T0SZ as part of installing the idmap. This is undone by
+ * cpu_uninstall_idmap() in __cpu_suspend_exit().
*/
-static inline void
-enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
+static inline void cpu_install_ttbr0(phys_addr_t ttbr0, unsigned long t0sz)
+{
+ cpu_set_reserved_ttbr0();
+ local_flush_tlb_all();
+ __cpu_set_tcr_t0sz(t0sz);
+
+ /* avoid cpu_switch_mm() and its SW-PAN and CNP interactions */
+ write_sysreg(ttbr0, ttbr0_el1);
+ isb();
+}
+
+void __cpu_replace_ttbr1(pgd_t *pgdp, bool cnp);
+
+static inline void cpu_enable_swapper_cnp(void)
+{
+ __cpu_replace_ttbr1(lm_alias(swapper_pg_dir), true);
+}
+
+static inline void cpu_replace_ttbr1(pgd_t *pgdp)
{
+ /*
+ * Only for early TTBR1 replacement before cpucaps are finalized and
+ * before we've decided whether to use CNP.
+ */
+ WARN_ON(system_capabilities_finalized());
+ __cpu_replace_ttbr1(pgdp, false);
}
/*
- * This is the actual mm switch as far as the scheduler
- * is concerned. No registers are touched. We avoid
- * calling the CPU specific function when the mm hasn't
- * actually changed.
+ * It would be nice to return ASIDs back to the allocator, but unfortunately
+ * that introduces a race with a generation rollover where we could erroneously
+ * free an ASID allocated in a future generation. We could workaround this by
+ * freeing the ASID from the context of the dying mm (e.g. in arch_exit_mmap),
+ * but we'd then need to make sure that we didn't dirty any TLBs afterwards.
+ * Setting a reserved TTBR0 or EPD0 would work, but it all gets ugly when you
+ * take CPU migration into account.
*/
+void check_and_switch_context(struct mm_struct *mm);
+
+#define init_new_context(tsk, mm) init_new_context(tsk, mm)
+static inline int
+init_new_context(struct task_struct *tsk, struct mm_struct *mm)
+{
+ atomic64_set(&mm->context.id, 0);
+ refcount_set(&mm->context.pinned, 0);
+
+ /* pkey 0 is the default, so always reserve it. */
+ mm->context.pkey_allocation_map = BIT(0);
+
+ return 0;
+}
+
+static inline void arch_dup_pkeys(struct mm_struct *oldmm,
+ struct mm_struct *mm)
+{
+ /* Duplicate the oldmm pkey state in mm: */
+ mm->context.pkey_allocation_map = oldmm->context.pkey_allocation_map;
+}
+
+static inline int arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
+{
+ arch_dup_pkeys(oldmm, mm);
+
+ return 0;
+}
+
+static inline void arch_exit_mmap(struct mm_struct *mm)
+{
+}
+
+static inline void arch_unmap(struct mm_struct *mm,
+ unsigned long start, unsigned long end)
+{
+}
+
+#ifdef CONFIG_ARM64_SW_TTBR0_PAN
+static inline void update_saved_ttbr0(struct task_struct *tsk,
+ struct mm_struct *mm)
+{
+ u64 ttbr;
+
+ if (!system_uses_ttbr0_pan())
+ return;
+
+ if (mm == &init_mm)
+ ttbr = phys_to_ttbr(__pa_symbol(reserved_pg_dir));
+ else
+ ttbr = phys_to_ttbr(virt_to_phys(mm->pgd)) | ASID(mm) << 48;
+
+ WRITE_ONCE(task_thread_info(tsk)->ttbr0, ttbr);
+}
+#else
+static inline void update_saved_ttbr0(struct task_struct *tsk,
+ struct mm_struct *mm)
+{
+}
+#endif
+
+#define enter_lazy_tlb enter_lazy_tlb
+static inline void
+enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
+{
+ /*
+ * We don't actually care about the ttbr0 mapping, so point it at the
+ * zero page.
+ */
+ update_saved_ttbr0(tsk, &init_mm);
+}
+
+static inline void __switch_mm(struct mm_struct *next)
+{
+ /*
+ * init_mm.pgd does not contain any user mappings and it is always
+ * active for kernel addresses in TTBR1. Just set the reserved TTBR0.
+ */
+ if (next == &init_mm) {
+ cpu_set_reserved_ttbr0();
+ return;
+ }
+
+ check_and_switch_context(next);
+}
+
static inline void
switch_mm(struct mm_struct *prev, struct mm_struct *next,
struct task_struct *tsk)
{
- unsigned int cpu = smp_processor_id();
+ if (prev != next)
+ __switch_mm(next);
- if (!cpumask_test_and_set_cpu(cpu, mm_cpumask(next)) || prev != next)
- check_and_switch_context(next, tsk);
+ /*
+ * Update the saved TTBR0_EL1 of the scheduled-in task as the previous
+ * value may have not been initialised yet (activate_mm caller) or the
+ * ASID has changed since the last run (following the context switch
+ * of another thread of the same process).
+ */
+ update_saved_ttbr0(tsk, next);
}
-#define deactivate_mm(tsk,mm) do { } while (0)
-#define activate_mm(prev,next) switch_mm(prev, next, NULL)
+static inline const struct cpumask *
+__task_cpu_possible_mask(struct task_struct *p, const struct cpumask *mask)
+{
+ if (!static_branch_unlikely(&arm64_mismatched_32bit_el0))
+ return mask;
+
+ if (!is_compat_thread(task_thread_info(p)))
+ return mask;
-#endif
+ return system_32bit_el0_cpumask();
+}
+
+static inline const struct cpumask *
+task_cpu_possible_mask(struct task_struct *p)
+{
+ return __task_cpu_possible_mask(p, cpu_possible_mask);
+}
+#define task_cpu_possible_mask task_cpu_possible_mask
+
+const struct cpumask *task_cpu_fallback_mask(struct task_struct *p);
+
+void verify_cpu_asid_bits(void);
+void post_ttbr_update_workaround(void);
+
+unsigned long arm64_mm_context_get(struct mm_struct *mm);
+void arm64_mm_context_put(struct mm_struct *mm);
+
+#define mm_untag_mask mm_untag_mask
+static inline unsigned long mm_untag_mask(struct mm_struct *mm)
+{
+ return -1UL >> 8;
+}
+
+/*
+ * Only enforce protection keys on the current process, because there is no
+ * user context to access POR_EL0 for another address space.
+ */
+static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
+ bool write, bool execute, bool foreign)
+{
+ if (!system_supports_poe())
+ return true;
+
+ /* allow access if the VMA is not one from this process */
+ if (foreign || vma_is_foreign(vma))
+ return true;
+
+ return por_el0_allows_pkey(vma_pkey(vma), write, execute);
+}
+
+#define deactivate_mm deactivate_mm
+static inline void deactivate_mm(struct task_struct *tsk,
+ struct mm_struct *mm)
+{
+ gcs_free(tsk);
+}
+
+
+#include <asm-generic/mmu_context.h>
+
+#endif /* !__ASSEMBLER__ */
+
+#endif /* !__ASM_MMU_CONTEXT_H */
diff --git a/arch/arm64/include/asm/module.h b/arch/arm64/include/asm/module.h
index e80e232b730e..fb9b88eebeb1 100644
--- a/arch/arm64/include/asm/module.h
+++ b/arch/arm64/include/asm/module.h
@@ -1,23 +1,69 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (C) 2012 ARM Ltd.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __ASM_MODULE_H
#define __ASM_MODULE_H
#include <asm-generic/module.h>
-#define MODULE_ARCH_VERMAGIC "aarch64"
+struct mod_plt_sec {
+ int plt_shndx;
+ int plt_num_entries;
+ int plt_max_entries;
+};
+
+struct mod_arch_specific {
+ struct mod_plt_sec core;
+ struct mod_plt_sec init;
+
+ /* for CONFIG_DYNAMIC_FTRACE */
+ struct plt_entry *ftrace_trampolines;
+ struct plt_entry *init_ftrace_trampolines;
+};
+
+u64 module_emit_plt_entry(struct module *mod, Elf64_Shdr *sechdrs,
+ void *loc, const Elf64_Rela *rela,
+ Elf64_Sym *sym);
+
+u64 module_emit_veneer_for_adrp(struct module *mod, Elf64_Shdr *sechdrs,
+ void *loc, u64 val);
+
+struct plt_entry {
+ /*
+ * A program that conforms to the AArch64 Procedure Call Standard
+ * (AAPCS64) must assume that a veneer that alters IP0 (x16) and/or
+ * IP1 (x17) may be inserted at any branch instruction that is
+ * exposed to a relocation that supports long branches. Since that
+ * is exactly what we are dealing with here, we are free to use x16
+ * as a scratch register in the PLT veneers.
+ */
+ __le32 adrp; /* adrp x16, .... */
+ __le32 add; /* add x16, x16, #0x.... */
+ __le32 br; /* br x16 */
+};
+
+static inline bool is_forbidden_offset_for_adrp(void *place)
+{
+ return cpus_have_final_cap(ARM64_WORKAROUND_843419) &&
+ ((u64)place & 0xfff) >= 0xff8;
+}
+
+struct plt_entry get_plt_entry(u64 dst, void *pc);
+
+static inline const Elf_Shdr *find_section(const Elf_Ehdr *hdr,
+ const Elf_Shdr *sechdrs,
+ const char *name)
+{
+ const Elf_Shdr *s, *se;
+ const char *secstrs = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
+
+ for (s = sechdrs, se = sechdrs + hdr->e_shnum; s < se; s++) {
+ if (strcmp(name, secstrs + s->sh_name) == 0)
+ return s;
+ }
+
+ return NULL;
+}
#endif /* __ASM_MODULE_H */
diff --git a/arch/arm64/include/asm/module.lds.h b/arch/arm64/include/asm/module.lds.h
new file mode 100644
index 000000000000..fb944b46846d
--- /dev/null
+++ b/arch/arm64/include/asm/module.lds.h
@@ -0,0 +1,27 @@
+SECTIONS {
+ .plt 0 : { BYTE(0) }
+ .init.plt 0 : { BYTE(0) }
+ .text.ftrace_trampoline 0 : { BYTE(0) }
+ .init.text.ftrace_trampoline 0 : { BYTE(0) }
+
+#ifdef CONFIG_KASAN_SW_TAGS
+ /*
+ * Outlined checks go into comdat-deduplicated sections named .text.hot.
+ * Because they are in comdats they are not combined by the linker and
+ * we otherwise end up with multiple sections with the same .text.hot
+ * name in the .ko file. The kernel module loader warns if it sees
+ * multiple sections with the same name so we use this sections
+ * directive to force them into a single section and silence the
+ * warning.
+ */
+ .text.hot : { *(.text.hot) }
+#endif
+
+#ifdef CONFIG_UNWIND_TABLES
+ /*
+ * Currently, we only use unwind info at module load time, so we can
+ * put it into the .init allocation.
+ */
+ .init.eh_frame : { *(.eh_frame) }
+#endif
+}
diff --git a/arch/arm64/include/asm/mshyperv.h b/arch/arm64/include/asm/mshyperv.h
new file mode 100644
index 000000000000..b721d3134ab6
--- /dev/null
+++ b/arch/arm64/include/asm/mshyperv.h
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/*
+ * Linux-specific definitions for managing interactions with Microsoft's
+ * Hyper-V hypervisor. The definitions in this file are specific to
+ * the ARM64 architecture. See include/asm-generic/mshyperv.h for
+ * definitions are that architecture independent.
+ *
+ * Definitions that are derived from Hyper-V code or headers should not go in
+ * this file, but should instead go in the relevant files in include/hyperv.
+ *
+ * Copyright (C) 2021, Microsoft, Inc.
+ *
+ * Author : Michael Kelley <mikelley@microsoft.com>
+ */
+
+#ifndef _ASM_MSHYPERV_H
+#define _ASM_MSHYPERV_H
+
+#include <linux/types.h>
+#include <linux/arm-smccc.h>
+#include <hyperv/hvhdk.h>
+
+/*
+ * Declare calls to get and set Hyper-V VP register values on ARM64, which
+ * requires a hypercall.
+ */
+
+void hv_set_vpreg(u32 reg, u64 value);
+u64 hv_get_vpreg(u32 reg);
+void hv_get_vpreg_128(u32 reg, struct hv_get_vp_registers_output *result);
+
+static inline void hv_set_msr(unsigned int reg, u64 value)
+{
+ hv_set_vpreg(reg, value);
+}
+
+static inline u64 hv_get_msr(unsigned int reg)
+{
+ return hv_get_vpreg(reg);
+}
+
+/*
+ * Nested is not supported on arm64
+ */
+static inline void hv_set_non_nested_msr(unsigned int reg, u64 value)
+{
+ hv_set_msr(reg, value);
+}
+
+static inline u64 hv_get_non_nested_msr(unsigned int reg)
+{
+ return hv_get_msr(reg);
+}
+
+/* SMCCC hypercall parameters */
+#define HV_SMCCC_FUNC_NUMBER 1
+#define HV_FUNC_ID ARM_SMCCC_CALL_VAL( \
+ ARM_SMCCC_STD_CALL, \
+ ARM_SMCCC_SMC_64, \
+ ARM_SMCCC_OWNER_VENDOR_HYP, \
+ HV_SMCCC_FUNC_NUMBER)
+
+#include <asm-generic/mshyperv.h>
+
+#endif
diff --git a/arch/arm64/include/asm/mte-def.h b/arch/arm64/include/asm/mte-def.h
new file mode 100644
index 000000000000..14ee86b019c2
--- /dev/null
+++ b/arch/arm64/include/asm/mte-def.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2020 ARM Ltd.
+ */
+#ifndef __ASM_MTE_DEF_H
+#define __ASM_MTE_DEF_H
+
+#define MTE_GRANULE_SIZE UL(16)
+#define MTE_GRANULE_MASK (~(MTE_GRANULE_SIZE - 1))
+#define MTE_GRANULES_PER_PAGE (PAGE_SIZE / MTE_GRANULE_SIZE)
+#define MTE_TAG_SHIFT 56
+#define MTE_TAG_SIZE 4
+#define MTE_TAG_MASK GENMASK((MTE_TAG_SHIFT + (MTE_TAG_SIZE - 1)), MTE_TAG_SHIFT)
+#define MTE_PAGE_TAG_STORAGE (MTE_GRANULES_PER_PAGE * MTE_TAG_SIZE / 8)
+
+#define __MTE_PREAMBLE ARM64_ASM_PREAMBLE ".arch_extension memtag\n"
+
+#endif /* __ASM_MTE_DEF_H */
diff --git a/arch/arm64/include/asm/mte-kasan.h b/arch/arm64/include/asm/mte-kasan.h
new file mode 100644
index 000000000000..352139271918
--- /dev/null
+++ b/arch/arm64/include/asm/mte-kasan.h
@@ -0,0 +1,264 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2020 ARM Ltd.
+ */
+#ifndef __ASM_MTE_KASAN_H
+#define __ASM_MTE_KASAN_H
+
+#include <asm/compiler.h>
+#include <asm/cputype.h>
+#include <asm/mte-def.h>
+
+#ifndef __ASSEMBLER__
+
+#include <linux/types.h>
+
+#ifdef CONFIG_KASAN_HW_TAGS
+
+/* Whether the MTE asynchronous mode is enabled. */
+DECLARE_STATIC_KEY_FALSE(mte_async_or_asymm_mode);
+
+static inline bool system_uses_mte_async_or_asymm_mode(void)
+{
+ return static_branch_unlikely(&mte_async_or_asymm_mode);
+}
+
+#else /* CONFIG_KASAN_HW_TAGS */
+
+static inline bool system_uses_mte_async_or_asymm_mode(void)
+{
+ return false;
+}
+
+#endif /* CONFIG_KASAN_HW_TAGS */
+
+#ifdef CONFIG_ARM64_MTE
+
+/*
+ * The Tag Check Flag (TCF) mode for MTE is per EL, hence TCF0
+ * affects EL0 and TCF affects EL1 irrespective of which TTBR is
+ * used.
+ * The kernel accesses TTBR0 usually with LDTR/STTR instructions
+ * when UAO is available, so these would act as EL0 accesses using
+ * TCF0.
+ * However futex.h code uses exclusives which would be executed as
+ * EL1, this can potentially cause a tag check fault even if the
+ * user disables TCF0.
+ *
+ * To address the problem we set the PSTATE.TCO bit in uaccess_enable()
+ * and reset it in uaccess_disable().
+ *
+ * The Tag check override (TCO) bit disables temporarily the tag checking
+ * preventing the issue.
+ */
+static inline void mte_disable_tco(void)
+{
+ asm volatile(ALTERNATIVE("nop", SET_PSTATE_TCO(0),
+ ARM64_MTE, CONFIG_KASAN_HW_TAGS));
+}
+
+static inline void mte_enable_tco(void)
+{
+ asm volatile(ALTERNATIVE("nop", SET_PSTATE_TCO(1),
+ ARM64_MTE, CONFIG_KASAN_HW_TAGS));
+}
+
+/*
+ * These functions disable tag checking only if in MTE async mode
+ * since the sync mode generates exceptions synchronously and the
+ * nofault or load_unaligned_zeropad can handle them.
+ */
+static inline void __mte_disable_tco_async(void)
+{
+ if (system_uses_mte_async_or_asymm_mode())
+ mte_disable_tco();
+}
+
+static inline void __mte_enable_tco_async(void)
+{
+ if (system_uses_mte_async_or_asymm_mode())
+ mte_enable_tco();
+}
+
+/*
+ * These functions are meant to be only used from KASAN runtime through
+ * the arch_*() interface defined in asm/memory.h.
+ * These functions don't include system_supports_mte() checks,
+ * as KASAN only calls them when MTE is supported and enabled.
+ */
+
+static inline u8 mte_get_ptr_tag(void *ptr)
+{
+ /* Note: The format of KASAN tags is 0xF<x> */
+ u8 tag = 0xF0 | (u8)(((u64)(ptr)) >> MTE_TAG_SHIFT);
+
+ return tag;
+}
+
+/* Get allocation tag for the address. */
+static inline u8 mte_get_mem_tag(void *addr)
+{
+ asm(__MTE_PREAMBLE "ldg %0, [%0]"
+ : "+r" (addr));
+
+ return mte_get_ptr_tag(addr);
+}
+
+/* Generate a random tag. */
+static inline u8 mte_get_random_tag(void)
+{
+ void *addr;
+
+ asm(__MTE_PREAMBLE "irg %0, %0"
+ : "=r" (addr));
+
+ return mte_get_ptr_tag(addr);
+}
+
+static inline u64 __stg_post(u64 p)
+{
+ asm volatile(__MTE_PREAMBLE "stg %0, [%0], #16"
+ : "+r"(p)
+ :
+ : "memory");
+ return p;
+}
+
+static inline u64 __stzg_post(u64 p)
+{
+ asm volatile(__MTE_PREAMBLE "stzg %0, [%0], #16"
+ : "+r"(p)
+ :
+ : "memory");
+ return p;
+}
+
+static inline void __dc_gva(u64 p)
+{
+ asm volatile(__MTE_PREAMBLE "dc gva, %0" : : "r"(p) : "memory");
+}
+
+static inline void __dc_gzva(u64 p)
+{
+ asm volatile(__MTE_PREAMBLE "dc gzva, %0" : : "r"(p) : "memory");
+}
+
+/*
+ * Assign allocation tags for a region of memory based on the pointer tag.
+ * Note: The address must be non-NULL and MTE_GRANULE_SIZE aligned and
+ * size must be MTE_GRANULE_SIZE aligned.
+ */
+static inline void mte_set_mem_tag_range(void *addr, size_t size, u8 tag,
+ bool init)
+{
+ u64 curr, mask, dczid, dczid_bs, dczid_dzp, end1, end2, end3;
+
+ /* Read DC G(Z)VA block size from the system register. */
+ dczid = read_cpuid(DCZID_EL0);
+ dczid_bs = 4ul << (dczid & 0xf);
+ dczid_dzp = (dczid >> 4) & 1;
+
+ curr = (u64)__tag_set(addr, tag);
+ mask = dczid_bs - 1;
+ /* STG/STZG up to the end of the first block. */
+ end1 = curr | mask;
+ end3 = curr + size;
+ /* DC GVA / GZVA in [end1, end2) */
+ end2 = end3 & ~mask;
+
+ /*
+ * The following code uses STG on the first DC GVA block even if the
+ * start address is aligned - it appears to be faster than an alignment
+ * check + conditional branch. Also, if the range size is at least 2 DC
+ * GVA blocks, the first two loops can use post-condition to save one
+ * branch each.
+ */
+#define SET_MEMTAG_RANGE(stg_post, dc_gva) \
+ do { \
+ if (!dczid_dzp && size >= 2 * dczid_bs) {\
+ do { \
+ curr = stg_post(curr); \
+ } while (curr < end1); \
+ \
+ do { \
+ dc_gva(curr); \
+ curr += dczid_bs; \
+ } while (curr < end2); \
+ } \
+ \
+ while (curr < end3) \
+ curr = stg_post(curr); \
+ } while (0)
+
+ if (init)
+ SET_MEMTAG_RANGE(__stzg_post, __dc_gzva);
+ else
+ SET_MEMTAG_RANGE(__stg_post, __dc_gva);
+#undef SET_MEMTAG_RANGE
+}
+
+void mte_enable_kernel_sync(void);
+void mte_enable_kernel_async(void);
+void mte_enable_kernel_asymm(void);
+int mte_enable_kernel_store_only(void);
+
+#else /* CONFIG_ARM64_MTE */
+
+static inline void mte_disable_tco(void)
+{
+}
+
+static inline void mte_enable_tco(void)
+{
+}
+
+static inline void __mte_disable_tco_async(void)
+{
+}
+
+static inline void __mte_enable_tco_async(void)
+{
+}
+
+static inline u8 mte_get_ptr_tag(void *ptr)
+{
+ return 0xFF;
+}
+
+static inline u8 mte_get_mem_tag(void *addr)
+{
+ return 0xFF;
+}
+
+static inline u8 mte_get_random_tag(void)
+{
+ return 0xFF;
+}
+
+static inline void mte_set_mem_tag_range(void *addr, size_t size,
+ u8 tag, bool init)
+{
+}
+
+static inline void mte_enable_kernel_sync(void)
+{
+}
+
+static inline void mte_enable_kernel_async(void)
+{
+}
+
+static inline void mte_enable_kernel_asymm(void)
+{
+}
+
+static inline int mte_enable_kernel_store_only(void)
+{
+ return -EINVAL;
+}
+
+#endif /* CONFIG_ARM64_MTE */
+
+#endif /* __ASSEMBLER__ */
+
+#endif /* __ASM_MTE_KASAN_H */
diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h
new file mode 100644
index 000000000000..6d4a78b9dc3e
--- /dev/null
+++ b/arch/arm64/include/asm/mte.h
@@ -0,0 +1,286 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2020 ARM Ltd.
+ */
+#ifndef __ASM_MTE_H
+#define __ASM_MTE_H
+
+#include <asm/compiler.h>
+#include <asm/mte-def.h>
+
+#ifndef __ASSEMBLER__
+
+#include <linux/bitfield.h>
+#include <linux/kasan-enabled.h>
+#include <linux/page-flags.h>
+#include <linux/sched.h>
+#include <linux/types.h>
+
+#include <asm/pgtable-types.h>
+
+void mte_clear_page_tags(void *addr);
+unsigned long mte_copy_tags_from_user(void *to, const void __user *from,
+ unsigned long n);
+unsigned long mte_copy_tags_to_user(void __user *to, void *from,
+ unsigned long n);
+int mte_save_tags(struct page *page);
+void mte_save_page_tags(const void *page_addr, void *tag_storage);
+void mte_restore_tags(swp_entry_t entry, struct page *page);
+void mte_restore_page_tags(void *page_addr, const void *tag_storage);
+void mte_invalidate_tags(int type, pgoff_t offset);
+void mte_invalidate_tags_area(int type);
+void *mte_allocate_tag_storage(void);
+void mte_free_tag_storage(char *storage);
+
+#ifdef CONFIG_ARM64_MTE
+
+/* track which pages have valid allocation tags */
+#define PG_mte_tagged PG_arch_2
+/* simple lock to avoid multiple threads tagging the same page */
+#define PG_mte_lock PG_arch_3
+
+static inline void set_page_mte_tagged(struct page *page)
+{
+ VM_WARN_ON_ONCE(folio_test_hugetlb(page_folio(page)));
+
+ /*
+ * Ensure that the tags written prior to this function are visible
+ * before the page flags update.
+ */
+ smp_wmb();
+ set_bit(PG_mte_tagged, &page->flags.f);
+}
+
+static inline bool page_mte_tagged(struct page *page)
+{
+ bool ret = test_bit(PG_mte_tagged, &page->flags.f);
+
+ VM_WARN_ON_ONCE(folio_test_hugetlb(page_folio(page)));
+
+ /*
+ * If the page is tagged, ensure ordering with a likely subsequent
+ * read of the tags.
+ */
+ if (ret)
+ smp_rmb();
+ return ret;
+}
+
+/*
+ * Lock the page for tagging and return 'true' if the page can be tagged,
+ * 'false' if already tagged. PG_mte_tagged is never cleared and therefore the
+ * locking only happens once for page initialisation.
+ *
+ * The page MTE lock state:
+ *
+ * Locked: PG_mte_lock && !PG_mte_tagged
+ * Unlocked: !PG_mte_lock || PG_mte_tagged
+ *
+ * Acquire semantics only if the page is tagged (returning 'false').
+ */
+static inline bool try_page_mte_tagging(struct page *page)
+{
+ VM_WARN_ON_ONCE(folio_test_hugetlb(page_folio(page)));
+
+ if (!test_and_set_bit(PG_mte_lock, &page->flags.f))
+ return true;
+
+ /*
+ * The tags are either being initialised or may have been initialised
+ * already. Check if the PG_mte_tagged flag has been set or wait
+ * otherwise.
+ */
+ smp_cond_load_acquire(&page->flags.f, VAL & (1UL << PG_mte_tagged));
+
+ return false;
+}
+
+void mte_zero_clear_page_tags(void *addr);
+void mte_sync_tags(pte_t pte, unsigned int nr_pages);
+void mte_copy_page_tags(void *kto, const void *kfrom);
+void mte_thread_init_user(void);
+void mte_thread_switch(struct task_struct *next);
+void mte_cpu_setup(void);
+void mte_suspend_enter(void);
+void mte_suspend_exit(void);
+long set_mte_ctrl(struct task_struct *task, unsigned long arg);
+long get_mte_ctrl(struct task_struct *task);
+int mte_ptrace_copy_tags(struct task_struct *child, long request,
+ unsigned long addr, unsigned long data);
+size_t mte_probe_user_range(const char __user *uaddr, size_t size);
+
+#else /* CONFIG_ARM64_MTE */
+
+/* unused if !CONFIG_ARM64_MTE, silence the compiler */
+#define PG_mte_tagged 0
+
+static inline void set_page_mte_tagged(struct page *page)
+{
+}
+static inline bool page_mte_tagged(struct page *page)
+{
+ return false;
+}
+static inline bool try_page_mte_tagging(struct page *page)
+{
+ return false;
+}
+static inline void mte_zero_clear_page_tags(void *addr)
+{
+}
+static inline void mte_sync_tags(pte_t pte, unsigned int nr_pages)
+{
+}
+static inline void mte_copy_page_tags(void *kto, const void *kfrom)
+{
+}
+static inline void mte_thread_init_user(void)
+{
+}
+static inline void mte_thread_switch(struct task_struct *next)
+{
+}
+static inline void mte_suspend_enter(void)
+{
+}
+static inline void mte_suspend_exit(void)
+{
+}
+static inline long set_mte_ctrl(struct task_struct *task, unsigned long arg)
+{
+ return 0;
+}
+static inline long get_mte_ctrl(struct task_struct *task)
+{
+ return 0;
+}
+static inline int mte_ptrace_copy_tags(struct task_struct *child,
+ long request, unsigned long addr,
+ unsigned long data)
+{
+ return -EIO;
+}
+
+#endif /* CONFIG_ARM64_MTE */
+
+#if defined(CONFIG_HUGETLB_PAGE) && defined(CONFIG_ARM64_MTE)
+static inline void folio_set_hugetlb_mte_tagged(struct folio *folio)
+{
+ VM_WARN_ON_ONCE(!folio_test_hugetlb(folio));
+
+ /*
+ * Ensure that the tags written prior to this function are visible
+ * before the folio flags update.
+ */
+ smp_wmb();
+ set_bit(PG_mte_tagged, &folio->flags.f);
+
+}
+
+static inline bool folio_test_hugetlb_mte_tagged(struct folio *folio)
+{
+ bool ret = test_bit(PG_mte_tagged, &folio->flags.f);
+
+ VM_WARN_ON_ONCE(!folio_test_hugetlb(folio));
+
+ /*
+ * If the folio is tagged, ensure ordering with a likely subsequent
+ * read of the tags.
+ */
+ if (ret)
+ smp_rmb();
+ return ret;
+}
+
+static inline bool folio_try_hugetlb_mte_tagging(struct folio *folio)
+{
+ VM_WARN_ON_ONCE(!folio_test_hugetlb(folio));
+
+ if (!test_and_set_bit(PG_mte_lock, &folio->flags.f))
+ return true;
+
+ /*
+ * The tags are either being initialised or may have been initialised
+ * already. Check if the PG_mte_tagged flag has been set or wait
+ * otherwise.
+ */
+ smp_cond_load_acquire(&folio->flags.f, VAL & (1UL << PG_mte_tagged));
+
+ return false;
+}
+#else
+static inline void folio_set_hugetlb_mte_tagged(struct folio *folio)
+{
+}
+
+static inline bool folio_test_hugetlb_mte_tagged(struct folio *folio)
+{
+ return false;
+}
+
+static inline bool folio_try_hugetlb_mte_tagging(struct folio *folio)
+{
+ return false;
+}
+#endif
+
+static inline void mte_disable_tco_entry(struct task_struct *task)
+{
+ if (!system_supports_mte())
+ return;
+
+ /*
+ * Re-enable tag checking (TCO set on exception entry). This is only
+ * necessary if MTE is enabled in either the kernel or the userspace
+ * task in synchronous or asymmetric mode (SCTLR_EL1.TCF0 bit 0 is set
+ * for both). With MTE disabled in the kernel and disabled or
+ * asynchronous in userspace, tag check faults (including in uaccesses)
+ * are not reported, therefore there is no need to re-enable checking.
+ * This is beneficial on microarchitectures where re-enabling TCO is
+ * expensive.
+ */
+ if (kasan_hw_tags_enabled() ||
+ (task->thread.sctlr_user & (1UL << SCTLR_EL1_TCF0_SHIFT)))
+ asm volatile(SET_PSTATE_TCO(0));
+}
+
+#ifdef CONFIG_KASAN_HW_TAGS
+void mte_check_tfsr_el1(void);
+
+static inline void mte_check_tfsr_entry(void)
+{
+ if (!kasan_hw_tags_enabled())
+ return;
+
+ mte_check_tfsr_el1();
+}
+
+static inline void mte_check_tfsr_exit(void)
+{
+ if (!kasan_hw_tags_enabled())
+ return;
+
+ /*
+ * The asynchronous faults are sync'ed automatically with
+ * TFSR_EL1 on kernel entry but for exit an explicit dsb()
+ * is required.
+ */
+ dsb(nsh);
+ isb();
+
+ mte_check_tfsr_el1();
+}
+#else
+static inline void mte_check_tfsr_el1(void)
+{
+}
+static inline void mte_check_tfsr_entry(void)
+{
+}
+static inline void mte_check_tfsr_exit(void)
+{
+}
+#endif /* CONFIG_KASAN_HW_TAGS */
+
+#endif /* __ASSEMBLER__ */
+#endif /* __ASM_MTE_H */
diff --git a/arch/arm64/include/asm/neon-intrinsics.h b/arch/arm64/include/asm/neon-intrinsics.h
new file mode 100644
index 000000000000..5f13505d39fc
--- /dev/null
+++ b/arch/arm64/include/asm/neon-intrinsics.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2018 Linaro, Ltd. <ard.biesheuvel@linaro.org>
+ */
+
+#ifndef __ASM_NEON_INTRINSICS_H
+#define __ASM_NEON_INTRINSICS_H
+
+#include <asm-generic/int-ll64.h>
+
+/*
+ * In the kernel, u64/s64 are [un]signed long long, not [un]signed long.
+ * So by redefining these macros to the former, we can force gcc-stdint.h
+ * to define uint64_t / in64_t in a compatible manner.
+ */
+
+#ifdef __INT64_TYPE__
+#undef __INT64_TYPE__
+#define __INT64_TYPE__ long long
+#endif
+
+#ifdef __UINT64_TYPE__
+#undef __UINT64_TYPE__
+#define __UINT64_TYPE__ unsigned long long
+#endif
+
+/*
+ * genksyms chokes on the ARM NEON instrinsics system header, but we
+ * don't export anything it defines anyway, so just disregard when
+ * genksyms execute.
+ */
+#ifndef __GENKSYMS__
+#include <arm_neon.h>
+#endif
+
+#ifdef CONFIG_CC_IS_CLANG
+#pragma clang diagnostic ignored "-Wincompatible-pointer-types"
+#endif
+
+#endif /* __ASM_NEON_INTRINSICS_H */
diff --git a/arch/arm64/include/asm/neon.h b/arch/arm64/include/asm/neon.h
new file mode 100644
index 000000000000..acebee4605b5
--- /dev/null
+++ b/arch/arm64/include/asm/neon.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * linux/arch/arm64/include/asm/neon.h
+ *
+ * Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org>
+ */
+
+#ifndef __ASM_NEON_H
+#define __ASM_NEON_H
+
+#include <linux/types.h>
+#include <asm/fpsimd.h>
+
+#define cpu_has_neon() system_supports_fpsimd()
+
+void kernel_neon_begin(struct user_fpsimd_state *);
+void kernel_neon_end(struct user_fpsimd_state *);
+
+#endif /* ! __ASM_NEON_H */
diff --git a/arch/arm64/include/asm/numa.h b/arch/arm64/include/asm/numa.h
new file mode 100644
index 000000000000..8c8cf4297cc3
--- /dev/null
+++ b/arch/arm64/include/asm/numa.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_NUMA_H
+#define __ASM_NUMA_H
+
+#include <asm/topology.h>
+#include <asm-generic/numa.h>
+
+#endif /* __ASM_NUMA_H */
diff --git a/arch/arm64/include/asm/page-def.h b/arch/arm64/include/asm/page-def.h
new file mode 100644
index 000000000000..d402e08442ee
--- /dev/null
+++ b/arch/arm64/include/asm/page-def.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Based on arch/arm/include/asm/page.h
+ *
+ * Copyright (C) 1995-2003 Russell King
+ * Copyright (C) 2017 ARM Ltd.
+ */
+#ifndef __ASM_PAGE_DEF_H
+#define __ASM_PAGE_DEF_H
+
+#include <linux/const.h>
+
+#include <vdso/page.h>
+
+#endif /* __ASM_PAGE_DEF_H */
diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h
index 46bf66628b6a..00f117ff4f7a 100644
--- a/arch/arm64/include/asm/page.h
+++ b/arch/arm64/include/asm/page.h
@@ -1,66 +1,53 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Based on arch/arm/include/asm/page.h
*
* Copyright (C) 1995-2003 Russell King
* Copyright (C) 2012 ARM Ltd.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __ASM_PAGE_H
#define __ASM_PAGE_H
-/* PAGE_SHIFT determines the page size */
-#ifdef CONFIG_ARM64_64K_PAGES
-#define PAGE_SHIFT 16
-#else
-#define PAGE_SHIFT 12
-#endif
-#define PAGE_SIZE (_AC(1,UL) << PAGE_SHIFT)
-#define PAGE_MASK (~(PAGE_SIZE-1))
+#include <asm/page-def.h>
-/* We do define AT_SYSINFO_EHDR but don't use the gate mechanism */
-#define __HAVE_ARCH_GATE_AREA 1
+#ifndef __ASSEMBLER__
-#ifndef __ASSEMBLY__
+#include <linux/personality.h> /* for READ_IMPLIES_EXEC */
+#include <linux/types.h> /* for gfp_t */
+#include <asm/pgtable-types.h>
-#ifdef CONFIG_ARM64_64K_PAGES
-#include <asm/pgtable-2level-types.h>
-#else
-#include <asm/pgtable-3level-types.h>
-#endif
+struct page;
+struct vm_area_struct;
-extern void __cpu_clear_user_page(void *p, unsigned long user);
-extern void __cpu_copy_user_page(void *to, const void *from,
- unsigned long user);
extern void copy_page(void *to, const void *from);
extern void clear_page(void *to);
-#define clear_user_page(addr,vaddr,pg) __cpu_clear_user_page(addr, vaddr)
-#define copy_user_page(to,from,vaddr,pg) __cpu_copy_user_page(to, from, vaddr)
+void copy_user_highpage(struct page *to, struct page *from,
+ unsigned long vaddr, struct vm_area_struct *vma);
+#define __HAVE_ARCH_COPY_USER_HIGHPAGE
+
+void copy_highpage(struct page *to, struct page *from);
+#define __HAVE_ARCH_COPY_HIGHPAGE
+
+struct folio *vma_alloc_zeroed_movable_folio(struct vm_area_struct *vma,
+ unsigned long vaddr);
+#define vma_alloc_zeroed_movable_folio vma_alloc_zeroed_movable_folio
+
+bool tag_clear_highpages(struct page *to, int numpages);
+#define __HAVE_ARCH_TAG_CLEAR_HIGHPAGES
+
+#define clear_user_page(page, vaddr, pg) clear_page(page)
+#define copy_user_page(to, from, vaddr, pg) copy_page(to, from)
typedef struct page *pgtable_t;
-#ifdef CONFIG_HAVE_ARCH_PFN_VALID
-extern int pfn_valid(unsigned long);
-#endif
+int pfn_is_map_memory(unsigned long pfn);
#include <asm/memory.h>
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
-#define VM_DATA_DEFAULT_FLAGS \
- (((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0) | \
- VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
+#define VM_DATA_DEFAULT_FLAGS (VM_DATA_FLAGS_TSK_EXEC | VM_MTE_ALLOWED)
#include <asm-generic/getorder.h>
diff --git a/arch/arm64/include/asm/paravirt.h b/arch/arm64/include/asm/paravirt.h
new file mode 100644
index 000000000000..9aa193e0e8f2
--- /dev/null
+++ b/arch/arm64/include/asm/paravirt.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_ARM64_PARAVIRT_H
+#define _ASM_ARM64_PARAVIRT_H
+
+#ifdef CONFIG_PARAVIRT
+#include <linux/static_call_types.h>
+
+struct static_key;
+extern struct static_key paravirt_steal_enabled;
+extern struct static_key paravirt_steal_rq_enabled;
+
+u64 dummy_steal_clock(int cpu);
+
+DECLARE_STATIC_CALL(pv_steal_clock, dummy_steal_clock);
+
+static inline u64 paravirt_steal_clock(int cpu)
+{
+ return static_call(pv_steal_clock)(cpu);
+}
+
+int __init pv_time_init(void);
+
+#else
+
+#define pv_time_init() do {} while (0)
+
+#endif // CONFIG_PARAVIRT
+
+#endif
diff --git a/arch/arm64/include/asm/paravirt_api_clock.h b/arch/arm64/include/asm/paravirt_api_clock.h
new file mode 100644
index 000000000000..65ac7cee0dad
--- /dev/null
+++ b/arch/arm64/include/asm/paravirt_api_clock.h
@@ -0,0 +1 @@
+#include <asm/paravirt.h>
diff --git a/arch/arm64/include/asm/pci.h b/arch/arm64/include/asm/pci.h
new file mode 100644
index 000000000000..016eb6b46dc0
--- /dev/null
+++ b/arch/arm64/include/asm/pci.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_PCI_H
+#define __ASM_PCI_H
+
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/dma-mapping.h>
+
+#include <asm/io.h>
+
+#define PCIBIOS_MIN_IO 0x1000
+
+/*
+ * Set to 1 if the kernel should re-assign all PCI bus numbers
+ */
+#define pcibios_assign_all_busses() \
+ (pci_has_flag(PCI_REASSIGN_ALL_BUS))
+
+#define arch_can_pci_mmap_wc() 1
+
+/* Generic PCI */
+#include <asm-generic/pci.h>
+
+#endif /* __ASM_PCI_H */
diff --git a/arch/arm64/include/asm/percpu.h b/arch/arm64/include/asm/percpu.h
new file mode 100644
index 000000000000..b57b2bb00967
--- /dev/null
+++ b/arch/arm64/include/asm/percpu.h
@@ -0,0 +1,278 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2013 ARM Ltd.
+ */
+#ifndef __ASM_PERCPU_H
+#define __ASM_PERCPU_H
+
+#include <linux/preempt.h>
+
+#include <asm/alternative.h>
+#include <asm/cmpxchg.h>
+#include <asm/stack_pointer.h>
+#include <asm/sysreg.h>
+
+static inline void set_my_cpu_offset(unsigned long off)
+{
+ asm volatile(ALTERNATIVE("msr tpidr_el1, %0",
+ "msr tpidr_el2, %0",
+ ARM64_HAS_VIRT_HOST_EXTN)
+ :: "r" (off) : "memory");
+}
+
+static inline unsigned long __hyp_my_cpu_offset(void)
+{
+ /*
+ * Non-VHE hyp code runs with preemption disabled. No need to hazard
+ * the register access against barrier() as in __kern_my_cpu_offset.
+ */
+ return read_sysreg(tpidr_el2);
+}
+
+static inline unsigned long __kern_my_cpu_offset(void)
+{
+ unsigned long off;
+
+ /*
+ * We want to allow caching the value, so avoid using volatile and
+ * instead use a fake stack read to hazard against barrier().
+ */
+ asm(ALTERNATIVE("mrs %0, tpidr_el1",
+ "mrs %0, tpidr_el2",
+ ARM64_HAS_VIRT_HOST_EXTN)
+ : "=r" (off) :
+ "Q" (*(const unsigned long *)current_stack_pointer));
+
+ return off;
+}
+
+#ifdef __KVM_NVHE_HYPERVISOR__
+#define __my_cpu_offset __hyp_my_cpu_offset()
+#else
+#define __my_cpu_offset __kern_my_cpu_offset()
+#endif
+
+#define PERCPU_RW_OPS(sz) \
+static inline unsigned long __percpu_read_##sz(void *ptr) \
+{ \
+ return READ_ONCE(*(u##sz *)ptr); \
+} \
+ \
+static inline void __percpu_write_##sz(void *ptr, unsigned long val) \
+{ \
+ WRITE_ONCE(*(u##sz *)ptr, (u##sz)val); \
+}
+
+#define __PERCPU_OP_CASE(w, sfx, name, sz, op_llsc, op_lse) \
+static inline void \
+__percpu_##name##_case_##sz(void *ptr, unsigned long val) \
+{ \
+ unsigned int loop; \
+ u##sz tmp; \
+ \
+ asm volatile (ARM64_LSE_ATOMIC_INSN( \
+ /* LL/SC */ \
+ "1: ldxr" #sfx "\t%" #w "[tmp], %[ptr]\n" \
+ #op_llsc "\t%" #w "[tmp], %" #w "[tmp], %" #w "[val]\n" \
+ " stxr" #sfx "\t%w[loop], %" #w "[tmp], %[ptr]\n" \
+ " cbnz %w[loop], 1b", \
+ /* LSE atomics */ \
+ #op_lse "\t%" #w "[val], %" #w "[tmp], %[ptr]\n" \
+ __nops(3)) \
+ : [loop] "=&r" (loop), [tmp] "=&r" (tmp), \
+ [ptr] "+Q"(*(u##sz *)ptr) \
+ : [val] "r" ((u##sz)(val))); \
+}
+
+#define __PERCPU_RET_OP_CASE(w, sfx, name, sz, op_llsc, op_lse) \
+static inline u##sz \
+__percpu_##name##_return_case_##sz(void *ptr, unsigned long val) \
+{ \
+ unsigned int loop; \
+ u##sz ret; \
+ \
+ asm volatile (ARM64_LSE_ATOMIC_INSN( \
+ /* LL/SC */ \
+ "1: ldxr" #sfx "\t%" #w "[ret], %[ptr]\n" \
+ #op_llsc "\t%" #w "[ret], %" #w "[ret], %" #w "[val]\n" \
+ " stxr" #sfx "\t%w[loop], %" #w "[ret], %[ptr]\n" \
+ " cbnz %w[loop], 1b", \
+ /* LSE atomics */ \
+ #op_lse "\t%" #w "[val], %" #w "[ret], %[ptr]\n" \
+ #op_llsc "\t%" #w "[ret], %" #w "[ret], %" #w "[val]\n" \
+ __nops(2)) \
+ : [loop] "=&r" (loop), [ret] "=&r" (ret), \
+ [ptr] "+Q"(*(u##sz *)ptr) \
+ : [val] "r" ((u##sz)(val))); \
+ \
+ return ret; \
+}
+
+#define PERCPU_OP(name, op_llsc, op_lse) \
+ __PERCPU_OP_CASE(w, b, name, 8, op_llsc, op_lse) \
+ __PERCPU_OP_CASE(w, h, name, 16, op_llsc, op_lse) \
+ __PERCPU_OP_CASE(w, , name, 32, op_llsc, op_lse) \
+ __PERCPU_OP_CASE( , , name, 64, op_llsc, op_lse)
+
+#define PERCPU_RET_OP(name, op_llsc, op_lse) \
+ __PERCPU_RET_OP_CASE(w, b, name, 8, op_llsc, op_lse) \
+ __PERCPU_RET_OP_CASE(w, h, name, 16, op_llsc, op_lse) \
+ __PERCPU_RET_OP_CASE(w, , name, 32, op_llsc, op_lse) \
+ __PERCPU_RET_OP_CASE( , , name, 64, op_llsc, op_lse)
+
+PERCPU_RW_OPS(8)
+PERCPU_RW_OPS(16)
+PERCPU_RW_OPS(32)
+PERCPU_RW_OPS(64)
+
+/*
+ * Use value-returning atomics for CPU-local ops as they are more likely
+ * to execute "near" to the CPU (e.g. in L1$).
+ *
+ * https://lore.kernel.org/r/e7d539ed-ced0-4b96-8ecd-048a5b803b85@paulmck-laptop
+ */
+PERCPU_OP(add, add, ldadd)
+PERCPU_OP(andnot, bic, ldclr)
+PERCPU_OP(or, orr, ldset)
+PERCPU_RET_OP(add, add, ldadd)
+
+#undef PERCPU_RW_OPS
+#undef __PERCPU_OP_CASE
+#undef __PERCPU_RET_OP_CASE
+#undef PERCPU_OP
+#undef PERCPU_RET_OP
+
+/*
+ * It would be nice to avoid the conditional call into the scheduler when
+ * re-enabling preemption for preemptible kernels, but doing that in a way
+ * which builds inside a module would mean messing directly with the preempt
+ * count. If you do this, peterz and tglx will hunt you down.
+ *
+ * Not to mention it'll break the actual preemption model for missing a
+ * preemption point when TIF_NEED_RESCHED gets set while preemption is
+ * disabled.
+ */
+
+#define _pcp_protect(op, pcp, ...) \
+({ \
+ preempt_disable_notrace(); \
+ op(raw_cpu_ptr(&(pcp)), __VA_ARGS__); \
+ preempt_enable_notrace(); \
+})
+
+#define _pcp_protect_return(op, pcp, args...) \
+({ \
+ typeof(pcp) __retval; \
+ preempt_disable_notrace(); \
+ __retval = (typeof(pcp))op(raw_cpu_ptr(&(pcp)), ##args); \
+ preempt_enable_notrace(); \
+ __retval; \
+})
+
+#define this_cpu_read_1(pcp) \
+ _pcp_protect_return(__percpu_read_8, pcp)
+#define this_cpu_read_2(pcp) \
+ _pcp_protect_return(__percpu_read_16, pcp)
+#define this_cpu_read_4(pcp) \
+ _pcp_protect_return(__percpu_read_32, pcp)
+#define this_cpu_read_8(pcp) \
+ _pcp_protect_return(__percpu_read_64, pcp)
+
+#define this_cpu_write_1(pcp, val) \
+ _pcp_protect(__percpu_write_8, pcp, (unsigned long)val)
+#define this_cpu_write_2(pcp, val) \
+ _pcp_protect(__percpu_write_16, pcp, (unsigned long)val)
+#define this_cpu_write_4(pcp, val) \
+ _pcp_protect(__percpu_write_32, pcp, (unsigned long)val)
+#define this_cpu_write_8(pcp, val) \
+ _pcp_protect(__percpu_write_64, pcp, (unsigned long)val)
+
+#define this_cpu_add_1(pcp, val) \
+ _pcp_protect(__percpu_add_case_8, pcp, val)
+#define this_cpu_add_2(pcp, val) \
+ _pcp_protect(__percpu_add_case_16, pcp, val)
+#define this_cpu_add_4(pcp, val) \
+ _pcp_protect(__percpu_add_case_32, pcp, val)
+#define this_cpu_add_8(pcp, val) \
+ _pcp_protect(__percpu_add_case_64, pcp, val)
+
+#define this_cpu_add_return_1(pcp, val) \
+ _pcp_protect_return(__percpu_add_return_case_8, pcp, val)
+#define this_cpu_add_return_2(pcp, val) \
+ _pcp_protect_return(__percpu_add_return_case_16, pcp, val)
+#define this_cpu_add_return_4(pcp, val) \
+ _pcp_protect_return(__percpu_add_return_case_32, pcp, val)
+#define this_cpu_add_return_8(pcp, val) \
+ _pcp_protect_return(__percpu_add_return_case_64, pcp, val)
+
+#define this_cpu_and_1(pcp, val) \
+ _pcp_protect(__percpu_andnot_case_8, pcp, ~val)
+#define this_cpu_and_2(pcp, val) \
+ _pcp_protect(__percpu_andnot_case_16, pcp, ~val)
+#define this_cpu_and_4(pcp, val) \
+ _pcp_protect(__percpu_andnot_case_32, pcp, ~val)
+#define this_cpu_and_8(pcp, val) \
+ _pcp_protect(__percpu_andnot_case_64, pcp, ~val)
+
+#define this_cpu_or_1(pcp, val) \
+ _pcp_protect(__percpu_or_case_8, pcp, val)
+#define this_cpu_or_2(pcp, val) \
+ _pcp_protect(__percpu_or_case_16, pcp, val)
+#define this_cpu_or_4(pcp, val) \
+ _pcp_protect(__percpu_or_case_32, pcp, val)
+#define this_cpu_or_8(pcp, val) \
+ _pcp_protect(__percpu_or_case_64, pcp, val)
+
+#define this_cpu_xchg_1(pcp, val) \
+ _pcp_protect_return(xchg_relaxed, pcp, val)
+#define this_cpu_xchg_2(pcp, val) \
+ _pcp_protect_return(xchg_relaxed, pcp, val)
+#define this_cpu_xchg_4(pcp, val) \
+ _pcp_protect_return(xchg_relaxed, pcp, val)
+#define this_cpu_xchg_8(pcp, val) \
+ _pcp_protect_return(xchg_relaxed, pcp, val)
+
+#define this_cpu_cmpxchg_1(pcp, o, n) \
+ _pcp_protect_return(cmpxchg_relaxed, pcp, o, n)
+#define this_cpu_cmpxchg_2(pcp, o, n) \
+ _pcp_protect_return(cmpxchg_relaxed, pcp, o, n)
+#define this_cpu_cmpxchg_4(pcp, o, n) \
+ _pcp_protect_return(cmpxchg_relaxed, pcp, o, n)
+#define this_cpu_cmpxchg_8(pcp, o, n) \
+ _pcp_protect_return(cmpxchg_relaxed, pcp, o, n)
+
+#define this_cpu_cmpxchg64(pcp, o, n) this_cpu_cmpxchg_8(pcp, o, n)
+
+#define this_cpu_cmpxchg128(pcp, o, n) \
+({ \
+ typedef typeof(pcp) pcp_op_T__; \
+ u128 old__, new__, ret__; \
+ pcp_op_T__ *ptr__; \
+ old__ = o; \
+ new__ = n; \
+ preempt_disable_notrace(); \
+ ptr__ = raw_cpu_ptr(&(pcp)); \
+ ret__ = cmpxchg128_local((void *)ptr__, old__, new__); \
+ preempt_enable_notrace(); \
+ ret__; \
+})
+
+#ifdef __KVM_NVHE_HYPERVISOR__
+extern unsigned long __hyp_per_cpu_offset(unsigned int cpu);
+#define __per_cpu_offset
+#define per_cpu_offset(cpu) __hyp_per_cpu_offset((cpu))
+#endif
+
+#include <asm-generic/percpu.h>
+
+/* Redefine macros for nVHE hyp under DEBUG_PREEMPT to avoid its dependencies. */
+#if defined(__KVM_NVHE_HYPERVISOR__) && defined(CONFIG_DEBUG_PREEMPT)
+#undef this_cpu_ptr
+#define this_cpu_ptr raw_cpu_ptr
+#undef __this_cpu_read
+#define __this_cpu_read raw_cpu_read
+#undef __this_cpu_write
+#define __this_cpu_write raw_cpu_write
+#endif
+
+#endif /* __ASM_PERCPU_H */
diff --git a/arch/arm64/include/asm/perf_event.h b/arch/arm64/include/asm/perf_event.h
index d26d1d53c0d7..ee45b4e77347 100644
--- a/arch/arm64/include/asm/perf_event.h
+++ b/arch/arm64/include/asm/perf_event.h
@@ -1,27 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (C) 2012 ARM Ltd.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __ASM_PERF_EVENT_H
#define __ASM_PERF_EVENT_H
-#ifdef CONFIG_HW_PERF_EVENTS
-struct pt_regs;
-extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
-extern unsigned long perf_misc_flags(struct pt_regs *regs);
-#define perf_misc_flags(regs) perf_misc_flags(regs)
+#include <asm/stack_pointer.h>
+#include <asm/ptrace.h>
+
+#ifdef CONFIG_PERF_EVENTS
+#define perf_arch_bpf_user_pt_regs(regs) &regs->user_regs
#endif
+#define perf_arch_fetch_caller_regs(regs, __ip) { \
+ (regs)->pc = (__ip); \
+ (regs)->regs[29] = (unsigned long) __builtin_frame_address(0); \
+ (regs)->sp = current_stack_pointer; \
+ (regs)->pstate = PSR_MODE_EL1h; \
+}
+
#endif
diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h
index f214069ec5d5..1b4509d3382c 100644
--- a/arch/arm64/include/asm/pgalloc.h
+++ b/arch/arm64/include/asm/pgalloc.h
@@ -1,20 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Based on arch/arm/include/asm/pgalloc.h
*
* Copyright (C) 2000-2001 Russell King
* Copyright (C) 2012 ARM Ltd.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __ASM_PGALLOC_H
#define __ASM_PGALLOC_H
@@ -24,70 +13,92 @@
#include <asm/cacheflush.h>
#include <asm/tlbflush.h>
-#define check_pgt_cache() do { } while (0)
+#define __HAVE_ARCH_PGD_FREE
+#define __HAVE_ARCH_PUD_FREE
+#include <asm-generic/pgalloc.h>
-#ifndef CONFIG_ARM64_64K_PAGES
+#define PGD_SIZE (PTRS_PER_PGD * sizeof(pgd_t))
-static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
+#if CONFIG_PGTABLE_LEVELS > 2
+
+static inline void __pud_populate(pud_t *pudp, phys_addr_t pmdp, pudval_t prot)
{
- return (pmd_t *)get_zeroed_page(GFP_KERNEL | __GFP_REPEAT);
+ set_pud(pudp, __pud(__phys_to_pud_val(pmdp) | prot));
}
-static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
+static inline void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmdp)
{
- BUG_ON((unsigned long)pmd & (PAGE_SIZE-1));
- free_page((unsigned long)pmd);
-}
+ pudval_t pudval = PUD_TYPE_TABLE | PUD_TABLE_AF;
-static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
+ pudval |= (mm == &init_mm) ? PUD_TABLE_UXN : PUD_TABLE_PXN;
+ __pud_populate(pudp, __pa(pmdp), pudval);
+}
+#else
+static inline void __pud_populate(pud_t *pudp, phys_addr_t pmdp, pudval_t prot)
{
- set_pud(pud, __pud(__pa(pmd) | PMD_TYPE_TABLE));
+ BUILD_BUG();
}
+#endif /* CONFIG_PGTABLE_LEVELS > 2 */
-#endif /* CONFIG_ARM64_64K_PAGES */
-
-extern pgd_t *pgd_alloc(struct mm_struct *mm);
-extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
+#if CONFIG_PGTABLE_LEVELS > 3
-#define PGALLOC_GFP (GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO)
+static inline void __p4d_populate(p4d_t *p4dp, phys_addr_t pudp, p4dval_t prot)
+{
+ if (pgtable_l4_enabled())
+ set_p4d(p4dp, __p4d(__phys_to_p4d_val(pudp) | prot));
+}
-static inline pte_t *
-pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
+static inline void p4d_populate(struct mm_struct *mm, p4d_t *p4dp, pud_t *pudp)
{
- return (pte_t *)__get_free_page(PGALLOC_GFP);
+ p4dval_t p4dval = P4D_TYPE_TABLE | P4D_TABLE_AF;
+
+ p4dval |= (mm == &init_mm) ? P4D_TABLE_UXN : P4D_TABLE_PXN;
+ __p4d_populate(p4dp, __pa(pudp), p4dval);
}
-static inline pgtable_t
-pte_alloc_one(struct mm_struct *mm, unsigned long addr)
+static inline void pud_free(struct mm_struct *mm, pud_t *pud)
+{
+ if (!pgtable_l4_enabled())
+ return;
+ __pud_free(mm, pud);
+}
+#else
+static inline void __p4d_populate(p4d_t *p4dp, phys_addr_t pudp, p4dval_t prot)
{
- struct page *pte;
+ BUILD_BUG();
+}
+#endif /* CONFIG_PGTABLE_LEVELS > 3 */
- pte = alloc_pages(PGALLOC_GFP, 0);
- if (pte)
- pgtable_page_ctor(pte);
+#if CONFIG_PGTABLE_LEVELS > 4
- return pte;
+static inline void __pgd_populate(pgd_t *pgdp, phys_addr_t p4dp, pgdval_t prot)
+{
+ if (pgtable_l5_enabled())
+ set_pgd(pgdp, __pgd(__phys_to_pgd_val(p4dp) | prot));
}
-/*
- * Free a PTE table.
- */
-static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
+static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgdp, p4d_t *p4dp)
{
- if (pte)
- free_page((unsigned long)pte);
+ pgdval_t pgdval = PGD_TYPE_TABLE | PGD_TABLE_AF;
+
+ pgdval |= (mm == &init_mm) ? PGD_TABLE_UXN : PGD_TABLE_PXN;
+ __pgd_populate(pgdp, __pa(p4dp), pgdval);
}
-static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
+#else
+static inline void __pgd_populate(pgd_t *pgdp, phys_addr_t p4dp, pgdval_t prot)
{
- pgtable_page_dtor(pte);
- __free_page(pte);
+ BUILD_BUG();
}
+#endif /* CONFIG_PGTABLE_LEVELS > 4 */
+
+extern pgd_t *pgd_alloc(struct mm_struct *mm);
+extern void pgd_free(struct mm_struct *mm, pgd_t *pgdp);
-static inline void __pmd_populate(pmd_t *pmdp, phys_addr_t pte,
+static inline void __pmd_populate(pmd_t *pmdp, phys_addr_t ptep,
pmdval_t prot)
{
- set_pmd(pmdp, __pmd(pte | prot));
+ set_pmd(pmdp, __pmd(__phys_to_pmd_val(ptep) | prot));
}
/*
@@ -97,17 +108,17 @@ static inline void __pmd_populate(pmd_t *pmdp, phys_addr_t pte,
static inline void
pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmdp, pte_t *ptep)
{
- /*
- * The pmd must be loaded with the physical address of the PTE table
- */
- __pmd_populate(pmdp, __pa(ptep), PMD_TYPE_TABLE);
+ VM_BUG_ON(mm && mm != &init_mm);
+ __pmd_populate(pmdp, __pa(ptep),
+ PMD_TYPE_TABLE | PMD_TABLE_AF | PMD_TABLE_UXN);
}
static inline void
pmd_populate(struct mm_struct *mm, pmd_t *pmdp, pgtable_t ptep)
{
- __pmd_populate(pmdp, page_to_phys(ptep), PMD_TYPE_TABLE);
+ VM_BUG_ON(mm == &init_mm);
+ __pmd_populate(pmdp, page_to_phys(ptep),
+ PMD_TYPE_TABLE | PMD_TABLE_AF | PMD_TABLE_PXN);
}
-#define pmd_pgtable(pmd) pmd_page(pmd)
#endif
diff --git a/arch/arm64/include/asm/pgtable-2level-hwdef.h b/arch/arm64/include/asm/pgtable-2level-hwdef.h
deleted file mode 100644
index 0a8ed3f94e93..000000000000
--- a/arch/arm64/include/asm/pgtable-2level-hwdef.h
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- * Copyright (C) 2012 ARM Ltd.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-#ifndef __ASM_PGTABLE_2LEVEL_HWDEF_H
-#define __ASM_PGTABLE_2LEVEL_HWDEF_H
-
-/*
- * With LPAE and 64KB pages, there are 2 levels of page tables. Each level has
- * 8192 entries of 8 bytes each, occupying a 64KB page. Levels 0 and 1 are not
- * used. The 2nd level table (PGD for Linux) can cover a range of 4TB, each
- * entry representing 512MB. The user and kernel address spaces are limited to
- * 512GB and therefore we only use 1024 entries in the PGD.
- */
-#define PTRS_PER_PTE 8192
-#define PTRS_PER_PGD 1024
-
-/*
- * PGDIR_SHIFT determines the size a top-level page table entry can map.
- */
-#define PGDIR_SHIFT 29
-#define PGDIR_SIZE (_AC(1, UL) << PGDIR_SHIFT)
-#define PGDIR_MASK (~(PGDIR_SIZE-1))
-
-/*
- * section address mask and size definitions.
- */
-#define SECTION_SHIFT 29
-#define SECTION_SIZE (_AC(1, UL) << SECTION_SHIFT)
-#define SECTION_MASK (~(SECTION_SIZE-1))
-
-#endif
diff --git a/arch/arm64/include/asm/pgtable-2level-types.h b/arch/arm64/include/asm/pgtable-2level-types.h
deleted file mode 100644
index 3c3ca7d361e4..000000000000
--- a/arch/arm64/include/asm/pgtable-2level-types.h
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * Copyright (C) 2012 ARM Ltd.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-#ifndef __ASM_PGTABLE_2LEVEL_TYPES_H
-#define __ASM_PGTABLE_2LEVEL_TYPES_H
-
-typedef u64 pteval_t;
-typedef u64 pgdval_t;
-typedef pgdval_t pmdval_t;
-
-#undef STRICT_MM_TYPECHECKS
-
-#ifdef STRICT_MM_TYPECHECKS
-
-/*
- * These are used to make use of C type-checking..
- */
-typedef struct { pteval_t pte; } pte_t;
-typedef struct { pgdval_t pgd; } pgd_t;
-typedef struct { pteval_t pgprot; } pgprot_t;
-
-#define pte_val(x) ((x).pte)
-#define pgd_val(x) ((x).pgd)
-#define pgprot_val(x) ((x).pgprot)
-
-#define __pte(x) ((pte_t) { (x) } )
-#define __pgd(x) ((pgd_t) { (x) } )
-#define __pgprot(x) ((pgprot_t) { (x) } )
-
-#else /* !STRICT_MM_TYPECHECKS */
-
-typedef pteval_t pte_t;
-typedef pgdval_t pgd_t;
-typedef pteval_t pgprot_t;
-
-#define pte_val(x) (x)
-#define pgd_val(x) (x)
-#define pgprot_val(x) (x)
-
-#define __pte(x) (x)
-#define __pgd(x) (x)
-#define __pgprot(x) (x)
-
-#endif /* STRICT_MM_TYPECHECKS */
-
-#include <asm-generic/pgtable-nopmd.h>
-
-#endif /* __ASM_PGTABLE_2LEVEL_TYPES_H */
diff --git a/arch/arm64/include/asm/pgtable-3level-hwdef.h b/arch/arm64/include/asm/pgtable-3level-hwdef.h
deleted file mode 100644
index 3dbf941d7767..000000000000
--- a/arch/arm64/include/asm/pgtable-3level-hwdef.h
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * Copyright (C) 2012 ARM Ltd.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-#ifndef __ASM_PGTABLE_3LEVEL_HWDEF_H
-#define __ASM_PGTABLE_3LEVEL_HWDEF_H
-
-/*
- * With LPAE and 4KB pages, there are 3 levels of page tables. Each level has
- * 512 entries of 8 bytes each, occupying a 4K page. The first level table
- * covers a range of 512GB, each entry representing 1GB. The user and kernel
- * address spaces are limited to 512GB each.
- */
-#define PTRS_PER_PTE 512
-#define PTRS_PER_PMD 512
-#define PTRS_PER_PGD 512
-
-/*
- * PGDIR_SHIFT determines the size a top-level page table entry can map.
- */
-#define PGDIR_SHIFT 30
-#define PGDIR_SIZE (_AC(1, UL) << PGDIR_SHIFT)
-#define PGDIR_MASK (~(PGDIR_SIZE-1))
-
-/*
- * PMD_SHIFT determines the size a middle-level page table entry can map.
- */
-#define PMD_SHIFT 21
-#define PMD_SIZE (_AC(1, UL) << PMD_SHIFT)
-#define PMD_MASK (~(PMD_SIZE-1))
-
-/*
- * section address mask and size definitions.
- */
-#define SECTION_SHIFT 21
-#define SECTION_SIZE (_AC(1, UL) << SECTION_SHIFT)
-#define SECTION_MASK (~(SECTION_SIZE-1))
-
-#endif
diff --git a/arch/arm64/include/asm/pgtable-3level-types.h b/arch/arm64/include/asm/pgtable-3level-types.h
deleted file mode 100644
index 4489615f14a9..000000000000
--- a/arch/arm64/include/asm/pgtable-3level-types.h
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- * Copyright (C) 2012 ARM Ltd.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-#ifndef __ASM_PGTABLE_3LEVEL_TYPES_H
-#define __ASM_PGTABLE_3LEVEL_TYPES_H
-
-typedef u64 pteval_t;
-typedef u64 pmdval_t;
-typedef u64 pgdval_t;
-
-#undef STRICT_MM_TYPECHECKS
-
-#ifdef STRICT_MM_TYPECHECKS
-
-/*
- * These are used to make use of C type-checking..
- */
-typedef struct { pteval_t pte; } pte_t;
-typedef struct { pmdval_t pmd; } pmd_t;
-typedef struct { pgdval_t pgd; } pgd_t;
-typedef struct { pteval_t pgprot; } pgprot_t;
-
-#define pte_val(x) ((x).pte)
-#define pmd_val(x) ((x).pmd)
-#define pgd_val(x) ((x).pgd)
-#define pgprot_val(x) ((x).pgprot)
-
-#define __pte(x) ((pte_t) { (x) } )
-#define __pmd(x) ((pmd_t) { (x) } )
-#define __pgd(x) ((pgd_t) { (x) } )
-#define __pgprot(x) ((pgprot_t) { (x) } )
-
-#else /* !STRICT_MM_TYPECHECKS */
-
-typedef pteval_t pte_t;
-typedef pmdval_t pmd_t;
-typedef pgdval_t pgd_t;
-typedef pteval_t pgprot_t;
-
-#define pte_val(x) (x)
-#define pmd_val(x) (x)
-#define pgd_val(x) (x)
-#define pgprot_val(x) (x)
-
-#define __pte(x) (x)
-#define __pmd(x) (x)
-#define __pgd(x) (x)
-#define __pgprot(x) (x)
-
-#endif /* STRICT_MM_TYPECHECKS */
-
-#include <asm-generic/pgtable-nopud.h>
-
-#endif /* __ASM_PGTABLE_3LEVEL_TYPES_H */
diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h
index e182a356c979..d49180bb7cb3 100644
--- a/arch/arm64/include/asm/pgtable-hwdef.h
+++ b/arch/arm64/include/asm/pgtable-hwdef.h
@@ -1,56 +1,156 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (C) 2012 ARM Ltd.
+ */
+#ifndef __ASM_PGTABLE_HWDEF_H
+#define __ASM_PGTABLE_HWDEF_H
+
+#include <asm/memory.h>
+
+#define PTDESC_ORDER 3
+
+/* Number of VA bits resolved by a single translation table level */
+#define PTDESC_TABLE_SHIFT (PAGE_SHIFT - PTDESC_ORDER)
+
+/*
+ * Number of page-table levels required to address 'va_bits' wide
+ * address, without section mapping. We resolve the top (va_bits - PAGE_SHIFT)
+ * bits with PTDESC_TABLE_SHIFT bits at each page table level. Hence:
+ *
+ * levels = DIV_ROUND_UP((va_bits - PAGE_SHIFT), PTDESC_TABLE_SHIFT)
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
+ * where DIV_ROUND_UP(n, d) => (((n) + (d) - 1) / (d))
*
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
+ * We cannot include linux/kernel.h which defines DIV_ROUND_UP here
+ * due to build issues. So we open code DIV_ROUND_UP here:
*
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ * ((((va_bits) - PAGE_SHIFT) + PTDESC_TABLE_SHIFT - 1) / PTDESC_TABLE_SHIFT)
+ *
+ * which gets simplified as :
*/
-#ifndef __ASM_PGTABLE_HWDEF_H
-#define __ASM_PGTABLE_HWDEF_H
+#define ARM64_HW_PGTABLE_LEVELS(va_bits) \
+ (((va_bits) - PTDESC_ORDER - 1) / PTDESC_TABLE_SHIFT)
-#ifdef CONFIG_ARM64_64K_PAGES
-#include <asm/pgtable-2level-hwdef.h>
-#else
-#include <asm/pgtable-3level-hwdef.h>
+/*
+ * Size mapped by an entry at level n ( -1 <= n <= 3)
+ * We map PTDESC_TABLE_SHIFT at all translation levels and PAGE_SHIFT bits
+ * in the final page. The maximum number of translation levels supported by
+ * the architecture is 5. Hence, starting at level n, we have further
+ * ((4 - n) - 1) levels of translation excluding the offset within the page.
+ * So, the total number of bits mapped by an entry at level n is :
+ *
+ * ((4 - n) - 1) * PTDESC_TABLE_SHIFT + PAGE_SHIFT
+ *
+ * Rearranging it a bit we get :
+ * (4 - n) * PTDESC_TABLE_SHIFT + PTDESC_ORDER
+ */
+#define ARM64_HW_PGTABLE_LEVEL_SHIFT(n) (PTDESC_TABLE_SHIFT * (4 - (n)) + PTDESC_ORDER)
+
+#define PTRS_PER_PTE (1 << PTDESC_TABLE_SHIFT)
+
+/*
+ * PMD_SHIFT determines the size a level 2 page table entry can map.
+ */
+#if CONFIG_PGTABLE_LEVELS > 2
+#define PMD_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(2)
+#define PMD_SIZE (_AC(1, UL) << PMD_SHIFT)
+#define PMD_MASK (~(PMD_SIZE-1))
+#define PTRS_PER_PMD (1 << PTDESC_TABLE_SHIFT)
#endif
/*
+ * PUD_SHIFT determines the size a level 1 page table entry can map.
+ */
+#if CONFIG_PGTABLE_LEVELS > 3
+#define PUD_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(1)
+#define PUD_SIZE (_AC(1, UL) << PUD_SHIFT)
+#define PUD_MASK (~(PUD_SIZE-1))
+#define PTRS_PER_PUD (1 << PTDESC_TABLE_SHIFT)
+#endif
+
+#if CONFIG_PGTABLE_LEVELS > 4
+#define P4D_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(0)
+#define P4D_SIZE (_AC(1, UL) << P4D_SHIFT)
+#define P4D_MASK (~(P4D_SIZE-1))
+#define PTRS_PER_P4D (1 << PTDESC_TABLE_SHIFT)
+#endif
+
+/*
+ * PGDIR_SHIFT determines the size a top-level page table entry can map
+ * (depending on the configuration, this level can be -1, 0, 1 or 2).
+ */
+#define PGDIR_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(4 - CONFIG_PGTABLE_LEVELS)
+#define PGDIR_SIZE (_AC(1, UL) << PGDIR_SHIFT)
+#define PGDIR_MASK (~(PGDIR_SIZE-1))
+#define PTRS_PER_PGD (1 << (VA_BITS - PGDIR_SHIFT))
+
+/*
+ * Contiguous page definitions.
+ */
+#define CONT_PTE_SHIFT (CONFIG_ARM64_CONT_PTE_SHIFT + PAGE_SHIFT)
+#define CONT_PTES (1 << (CONT_PTE_SHIFT - PAGE_SHIFT))
+#define CONT_PTE_SIZE (CONT_PTES * PAGE_SIZE)
+#define CONT_PTE_MASK (~(CONT_PTE_SIZE - 1))
+
+#define CONT_PMD_SHIFT (CONFIG_ARM64_CONT_PMD_SHIFT + PMD_SHIFT)
+#define CONT_PMDS (1 << (CONT_PMD_SHIFT - PMD_SHIFT))
+#define CONT_PMD_SIZE (CONT_PMDS * PMD_SIZE)
+#define CONT_PMD_MASK (~(CONT_PMD_SIZE - 1))
+
+/*
* Hardware page table definitions.
*
- * Level 1 descriptor (PUD).
+ * Level -1 descriptor (PGD).
*/
+#define PGD_TYPE_TABLE (_AT(pgdval_t, 3) << 0)
+#define PGD_TYPE_MASK (_AT(pgdval_t, 3) << 0)
+#define PGD_TABLE_AF (_AT(pgdval_t, 1) << 10) /* Ignored if no FEAT_HAFT */
+#define PGD_TABLE_PXN (_AT(pgdval_t, 1) << 59)
+#define PGD_TABLE_UXN (_AT(pgdval_t, 1) << 60)
-#define PUD_TABLE_BIT (_AT(pgdval_t, 1) << 1)
+/*
+ * Level 0 descriptor (P4D).
+ */
+#define P4D_TYPE_TABLE (_AT(p4dval_t, 3) << 0)
+#define P4D_TYPE_MASK (_AT(p4dval_t, 3) << 0)
+#define P4D_TYPE_SECT (_AT(p4dval_t, 1) << 0)
+#define P4D_SECT_RDONLY (_AT(p4dval_t, 1) << 7) /* AP[2] */
+#define P4D_TABLE_AF (_AT(p4dval_t, 1) << 10) /* Ignored if no FEAT_HAFT */
+#define P4D_TABLE_PXN (_AT(p4dval_t, 1) << 59)
+#define P4D_TABLE_UXN (_AT(p4dval_t, 1) << 60)
+
+/*
+ * Level 1 descriptor (PUD).
+ */
+#define PUD_TYPE_TABLE (_AT(pudval_t, 3) << 0)
+#define PUD_TYPE_MASK (_AT(pudval_t, 3) << 0)
+#define PUD_TYPE_SECT (_AT(pudval_t, 1) << 0)
+#define PUD_SECT_RDONLY (_AT(pudval_t, 1) << 7) /* AP[2] */
+#define PUD_TABLE_AF (_AT(pudval_t, 1) << 10) /* Ignored if no FEAT_HAFT */
+#define PUD_TABLE_PXN (_AT(pudval_t, 1) << 59)
+#define PUD_TABLE_UXN (_AT(pudval_t, 1) << 60)
/*
* Level 2 descriptor (PMD).
*/
#define PMD_TYPE_MASK (_AT(pmdval_t, 3) << 0)
-#define PMD_TYPE_FAULT (_AT(pmdval_t, 0) << 0)
#define PMD_TYPE_TABLE (_AT(pmdval_t, 3) << 0)
#define PMD_TYPE_SECT (_AT(pmdval_t, 1) << 0)
-#define PMD_TABLE_BIT (_AT(pmdval_t, 1) << 1)
+#define PMD_TABLE_AF (_AT(pmdval_t, 1) << 10) /* Ignored if no FEAT_HAFT */
/*
* Section
*/
-#define PMD_SECT_VALID (_AT(pmdval_t, 1) << 0)
-#define PMD_SECT_PROT_NONE (_AT(pmdval_t, 1) << 2)
#define PMD_SECT_USER (_AT(pmdval_t, 1) << 6) /* AP[1] */
#define PMD_SECT_RDONLY (_AT(pmdval_t, 1) << 7) /* AP[2] */
#define PMD_SECT_S (_AT(pmdval_t, 3) << 8)
#define PMD_SECT_AF (_AT(pmdval_t, 1) << 10)
#define PMD_SECT_NG (_AT(pmdval_t, 1) << 11)
+#define PMD_SECT_CONT (_AT(pmdval_t, 1) << 52)
#define PMD_SECT_PXN (_AT(pmdval_t, 1) << 53)
#define PMD_SECT_UXN (_AT(pmdval_t, 1) << 54)
+#define PMD_TABLE_PXN (_AT(pmdval_t, 1) << 59)
+#define PMD_TABLE_UXN (_AT(pmdval_t, 1) << 60)
/*
* AttrIndx[2:0] encoding (mapping attributes defined in the MAIR* registers).
@@ -61,17 +161,33 @@
/*
* Level 3 descriptor (PTE).
*/
+#define PTE_VALID (_AT(pteval_t, 1) << 0)
#define PTE_TYPE_MASK (_AT(pteval_t, 3) << 0)
-#define PTE_TYPE_FAULT (_AT(pteval_t, 0) << 0)
#define PTE_TYPE_PAGE (_AT(pteval_t, 3) << 0)
-#define PTE_TABLE_BIT (_AT(pteval_t, 1) << 1)
#define PTE_USER (_AT(pteval_t, 1) << 6) /* AP[1] */
#define PTE_RDONLY (_AT(pteval_t, 1) << 7) /* AP[2] */
#define PTE_SHARED (_AT(pteval_t, 3) << 8) /* SH[1:0], inner shareable */
#define PTE_AF (_AT(pteval_t, 1) << 10) /* Access Flag */
#define PTE_NG (_AT(pteval_t, 1) << 11) /* nG */
+#define PTE_GP (_AT(pteval_t, 1) << 50) /* BTI guarded */
+#define PTE_DBM (_AT(pteval_t, 1) << 51) /* Dirty Bit Management */
+#define PTE_CONT (_AT(pteval_t, 1) << 52) /* Contiguous range */
#define PTE_PXN (_AT(pteval_t, 1) << 53) /* Privileged XN */
#define PTE_UXN (_AT(pteval_t, 1) << 54) /* User XN */
+#define PTE_SWBITS_MASK _AT(pteval_t, (BIT(63) | GENMASK(58, 55)))
+
+#define PTE_ADDR_LOW (((_AT(pteval_t, 1) << (50 - PAGE_SHIFT)) - 1) << PAGE_SHIFT)
+#ifdef CONFIG_ARM64_PA_BITS_52
+#ifdef CONFIG_ARM64_64K_PAGES
+#define PTE_ADDR_HIGH (_AT(pteval_t, 0xf) << 12)
+#define PTE_ADDR_HIGH_SHIFT 36
+#define PHYS_TO_PTE_ADDR_MASK (PTE_ADDR_LOW | PTE_ADDR_HIGH)
+#else
+#define PTE_ADDR_HIGH (_AT(pteval_t, 0x3) << 8)
+#define PTE_ADDR_HIGH_SHIFT 42
+#define PHYS_TO_PTE_ADDR_MASK GENMASK_ULL(49, 8)
+#endif
+#endif
/*
* AttrIndx[2:0] encoding (mapping attributes defined in the MAIR* registers).
@@ -80,47 +196,100 @@
#define PTE_ATTRINDX_MASK (_AT(pteval_t, 7) << 2)
/*
- * 2nd stage PTE definitions
+ * PIIndex[3:0] encoding (Permission Indirection Extension)
+ */
+#define PTE_PI_IDX_0 6 /* AP[1], USER */
+#define PTE_PI_IDX_1 51 /* DBM */
+#define PTE_PI_IDX_2 53 /* PXN */
+#define PTE_PI_IDX_3 54 /* UXN */
+
+/*
+ * POIndex[2:0] encoding (Permission Overlay Extension)
*/
-#define PTE_S2_RDONLY (_AT(pteval_t, 1) << 6) /* HAP[2:1] */
-#define PTE_S2_RDWR (_AT(pteval_t, 3) << 6) /* HAP[2:1] */
+#define PTE_PO_IDX_0 (_AT(pteval_t, 1) << 60)
+#define PTE_PO_IDX_1 (_AT(pteval_t, 1) << 61)
+#define PTE_PO_IDX_2 (_AT(pteval_t, 1) << 62)
+
+#define PTE_PO_IDX_MASK GENMASK_ULL(62, 60)
+
/*
* Memory Attribute override for Stage-2 (MemAttr[3:0])
*/
#define PTE_S2_MEMATTR(t) (_AT(pteval_t, (t)) << 2)
-#define PTE_S2_MEMATTR_MASK (_AT(pteval_t, 0xf) << 2)
/*
- * EL2/HYP PTE/PMD definitions
+ * Hierarchical permission for Stage-1 tables
*/
-#define PMD_HYP PMD_SECT_USER
-#define PTE_HYP PTE_USER
+#define S1_TABLE_AP (_AT(pmdval_t, 3) << 61)
+
+#define TTBR_CNP_BIT (UL(1) << 0)
/*
- * 40-bit physical address supported.
+ * TCR flags.
*/
-#define PHYS_MASK_SHIFT (40)
-#define PHYS_MASK ((UL(1) << PHYS_MASK_SHIFT) - 1)
+#define TCR_T0SZ(x) ((UL(64) - (x)) << TCR_EL1_T0SZ_SHIFT)
+#define TCR_T1SZ(x) ((UL(64) - (x)) << TCR_EL1_T1SZ_SHIFT)
+
+#define TCR_T0SZ_MASK TCR_EL1_T0SZ_MASK
+#define TCR_T1SZ_MASK TCR_EL1_T1SZ_MASK
+
+#define TCR_EPD0_MASK TCR_EL1_EPD0_MASK
+#define TCR_EPD1_MASK TCR_EL1_EPD1_MASK
+
+#define TCR_IRGN0_MASK TCR_EL1_IRGN0_MASK
+#define TCR_IRGN0_WBWA (TCR_EL1_IRGN0_WBWA << TCR_EL1_IRGN0_SHIFT)
+
+#define TCR_ORGN0_MASK TCR_EL1_ORGN0_MASK
+#define TCR_ORGN0_WBWA (TCR_EL1_ORGN0_WBWA << TCR_EL1_ORGN0_SHIFT)
+
+#define TCR_SH0_MASK TCR_EL1_SH0_MASK
+#define TCR_SH0_INNER (TCR_EL1_SH0_INNER << TCR_EL1_SH0_SHIFT)
+
+#define TCR_SH1_MASK TCR_EL1_SH1_MASK
+
+#define TCR_TG0_SHIFT TCR_EL1_TG0_SHIFT
+#define TCR_TG0_MASK TCR_EL1_TG0_MASK
+#define TCR_TG0_4K (TCR_EL1_TG0_4K << TCR_EL1_TG0_SHIFT)
+#define TCR_TG0_64K (TCR_EL1_TG0_64K << TCR_EL1_TG0_SHIFT)
+#define TCR_TG0_16K (TCR_EL1_TG0_16K << TCR_EL1_TG0_SHIFT)
+
+#define TCR_TG1_SHIFT TCR_EL1_TG1_SHIFT
+#define TCR_TG1_MASK TCR_EL1_TG1_MASK
+#define TCR_TG1_16K (TCR_EL1_TG1_16K << TCR_EL1_TG1_SHIFT)
+#define TCR_TG1_4K (TCR_EL1_TG1_4K << TCR_EL1_TG1_SHIFT)
+#define TCR_TG1_64K (TCR_EL1_TG1_64K << TCR_EL1_TG1_SHIFT)
+
+#define TCR_IPS_SHIFT TCR_EL1_IPS_SHIFT
+#define TCR_IPS_MASK TCR_EL1_IPS_MASK
+#define TCR_A1 TCR_EL1_A1
+#define TCR_ASID16 TCR_EL1_AS
+#define TCR_TBI0 TCR_EL1_TBI0
+#define TCR_TBI1 TCR_EL1_TBI1
+#define TCR_HA TCR_EL1_HA
+#define TCR_HD TCR_EL1_HD
+#define TCR_HPD0 TCR_EL1_HPD0
+#define TCR_HPD1 TCR_EL1_HPD1
+#define TCR_TBID0 TCR_EL1_TBID0
+#define TCR_TBID1 TCR_EL1_TBID1
+#define TCR_E0PD0 TCR_EL1_E0PD0
+#define TCR_E0PD1 TCR_EL1_E0PD1
+#define TCR_DS TCR_EL1_DS
/*
- * TCR flags.
+ * TTBR.
+ */
+#ifdef CONFIG_ARM64_PA_BITS_52
+/*
+ * TTBR_ELx[1] is RES0 in this configuration.
*/
-#define TCR_TxSZ(x) (((UL(64) - (x)) << 16) | ((UL(64) - (x)) << 0))
-#define TCR_IRGN_NC ((UL(0) << 8) | (UL(0) << 24))
-#define TCR_IRGN_WBWA ((UL(1) << 8) | (UL(1) << 24))
-#define TCR_IRGN_WT ((UL(2) << 8) | (UL(2) << 24))
-#define TCR_IRGN_WBnWA ((UL(3) << 8) | (UL(3) << 24))
-#define TCR_IRGN_MASK ((UL(3) << 8) | (UL(3) << 24))
-#define TCR_ORGN_NC ((UL(0) << 10) | (UL(0) << 26))
-#define TCR_ORGN_WBWA ((UL(1) << 10) | (UL(1) << 26))
-#define TCR_ORGN_WT ((UL(2) << 10) | (UL(2) << 26))
-#define TCR_ORGN_WBnWA ((UL(3) << 10) | (UL(3) << 26))
-#define TCR_ORGN_MASK ((UL(3) << 10) | (UL(3) << 26))
-#define TCR_SHARED ((UL(3) << 12) | (UL(3) << 28))
-#define TCR_TG0_64K (UL(1) << 14)
-#define TCR_TG1_64K (UL(1) << 30)
-#define TCR_IPS_40BIT (UL(2) << 32)
-#define TCR_ASID16 (UL(1) << 36)
+#define TTBR_BADDR_MASK_52 GENMASK_ULL(47, 2)
+#endif
+
+#ifdef CONFIG_ARM64_VA_BITS_52
+/* Must be at least 64-byte aligned to prevent corruption of the TTBR */
+#define TTBR1_BADDR_4852_OFFSET (((UL(1) << (52 - PGDIR_SHIFT)) - \
+ (UL(1) << (48 - PGDIR_SHIFT))) * 8)
+#endif
#endif
diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h
new file mode 100644
index 000000000000..161e8660eddd
--- /dev/null
+++ b/arch/arm64/include/asm/pgtable-prot.h
@@ -0,0 +1,192 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2016 ARM Ltd.
+ */
+#ifndef __ASM_PGTABLE_PROT_H
+#define __ASM_PGTABLE_PROT_H
+
+#include <asm/memory.h>
+#include <asm/pgtable-hwdef.h>
+
+#include <linux/const.h>
+
+/*
+ * Software defined PTE bits definition.
+ */
+#define PTE_WRITE (PTE_DBM) /* same as DBM (51) */
+#define PTE_SWP_EXCLUSIVE (_AT(pteval_t, 1) << 2) /* only for swp ptes */
+#define PTE_DIRTY (_AT(pteval_t, 1) << 55)
+#define PTE_SPECIAL (_AT(pteval_t, 1) << 56)
+
+/*
+ * PTE_PRESENT_INVALID=1 & PTE_VALID=0 indicates that the pte's fields should be
+ * interpreted according to the HW layout by SW but any attempted HW access to
+ * the address will result in a fault. pte_present() returns true.
+ */
+#define PTE_PRESENT_INVALID (PTE_NG) /* only when !PTE_VALID */
+
+#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP
+#define PTE_UFFD_WP (_AT(pteval_t, 1) << 58) /* uffd-wp tracking */
+#define PTE_SWP_UFFD_WP (_AT(pteval_t, 1) << 3) /* only for swp ptes */
+#else
+#define PTE_UFFD_WP (_AT(pteval_t, 0))
+#define PTE_SWP_UFFD_WP (_AT(pteval_t, 0))
+#endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */
+
+#define _PROT_DEFAULT (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED)
+
+#define PROT_DEFAULT (PTE_TYPE_PAGE | PTE_MAYBE_NG | PTE_MAYBE_SHARED | PTE_AF)
+#define PROT_SECT_DEFAULT (PMD_TYPE_SECT | PMD_MAYBE_NG | PMD_MAYBE_SHARED | PMD_SECT_AF)
+
+#define PROT_DEVICE_nGnRnE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRnE))
+#define PROT_DEVICE_nGnRE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRE))
+#define PROT_NORMAL_NC (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_NC))
+#define PROT_NORMAL (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL))
+#define PROT_NORMAL_TAGGED (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_TAGGED))
+
+#define PROT_SECT_DEVICE_nGnRE (PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PMD_ATTRINDX(MT_DEVICE_nGnRE))
+#define PROT_SECT_NORMAL (PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PTE_WRITE | PMD_ATTRINDX(MT_NORMAL))
+#define PROT_SECT_NORMAL_EXEC (PROT_SECT_DEFAULT | PMD_SECT_UXN | PMD_ATTRINDX(MT_NORMAL))
+
+#define _PAGE_DEFAULT (_PROT_DEFAULT | PTE_ATTRINDX(MT_NORMAL))
+
+#define _PAGE_KERNEL (PROT_NORMAL)
+#define _PAGE_KERNEL_RO ((PROT_NORMAL & ~PTE_WRITE) | PTE_RDONLY)
+#define _PAGE_KERNEL_ROX ((PROT_NORMAL & ~(PTE_WRITE | PTE_PXN)) | PTE_RDONLY)
+#define _PAGE_KERNEL_EXEC (PROT_NORMAL & ~PTE_PXN)
+#define _PAGE_KERNEL_EXEC_CONT ((PROT_NORMAL & ~PTE_PXN) | PTE_CONT)
+
+#define _PAGE_SHARED (_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN | PTE_WRITE)
+#define _PAGE_SHARED_EXEC (_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_WRITE)
+#define _PAGE_READONLY (_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN)
+#define _PAGE_READONLY_EXEC (_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN)
+#define _PAGE_EXECONLY (_PAGE_DEFAULT | PTE_RDONLY | PTE_NG | PTE_PXN)
+
+#ifndef __ASSEMBLER__
+
+#include <asm/cpufeature.h>
+#include <asm/pgtable-types.h>
+#include <asm/rsi.h>
+
+extern bool arm64_use_ng_mappings;
+extern unsigned long prot_ns_shared;
+
+#define PROT_NS_SHARED (is_realm_world() ? prot_ns_shared : 0)
+
+#define PTE_MAYBE_NG (arm64_use_ng_mappings ? PTE_NG : 0)
+#define PMD_MAYBE_NG (arm64_use_ng_mappings ? PMD_SECT_NG : 0)
+
+#ifndef CONFIG_ARM64_LPA2
+#define lpa2_is_enabled() false
+#define PTE_MAYBE_SHARED PTE_SHARED
+#define PMD_MAYBE_SHARED PMD_SECT_S
+#define PHYS_MASK_SHIFT (CONFIG_ARM64_PA_BITS)
+#else
+static inline bool __pure lpa2_is_enabled(void)
+{
+ return read_tcr() & TCR_EL1_DS;
+}
+
+#define PTE_MAYBE_SHARED (lpa2_is_enabled() ? 0 : PTE_SHARED)
+#define PMD_MAYBE_SHARED (lpa2_is_enabled() ? 0 : PMD_SECT_S)
+#define PHYS_MASK_SHIFT (lpa2_is_enabled() ? CONFIG_ARM64_PA_BITS : 48)
+#endif
+
+/*
+ * Highest possible physical address supported.
+ */
+#define PHYS_MASK ((UL(1) << PHYS_MASK_SHIFT) - 1)
+
+/*
+ * If we have userspace only BTI we don't want to mark kernel pages
+ * guarded even if the system does support BTI.
+ */
+#define PTE_MAYBE_GP (system_supports_bti_kernel() ? PTE_GP : 0)
+
+#define PAGE_KERNEL __pgprot(_PAGE_KERNEL)
+#define PAGE_KERNEL_RO __pgprot(_PAGE_KERNEL_RO)
+#define PAGE_KERNEL_ROX __pgprot(_PAGE_KERNEL_ROX)
+#define PAGE_KERNEL_EXEC __pgprot(_PAGE_KERNEL_EXEC)
+#define PAGE_KERNEL_EXEC_CONT __pgprot(_PAGE_KERNEL_EXEC_CONT)
+
+#define PAGE_S2_MEMATTR(attr, has_fwb) \
+ ({ \
+ u64 __val; \
+ if (has_fwb) \
+ __val = PTE_S2_MEMATTR(MT_S2_FWB_ ## attr); \
+ else \
+ __val = PTE_S2_MEMATTR(MT_S2_ ## attr); \
+ __val; \
+ })
+
+#define PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_VALID) | PTE_PRESENT_INVALID | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN)
+/* shared+writable pages are clean by default, hence PTE_RDONLY|PTE_WRITE */
+#define PAGE_SHARED __pgprot(_PAGE_SHARED)
+#define PAGE_SHARED_EXEC __pgprot(_PAGE_SHARED_EXEC)
+#define PAGE_READONLY __pgprot(_PAGE_READONLY)
+#define PAGE_READONLY_EXEC __pgprot(_PAGE_READONLY_EXEC)
+#define PAGE_EXECONLY __pgprot(_PAGE_EXECONLY)
+
+#endif /* __ASSEMBLER__ */
+
+#define pte_pi_index(pte) ( \
+ ((pte & BIT(PTE_PI_IDX_3)) >> (PTE_PI_IDX_3 - 3)) | \
+ ((pte & BIT(PTE_PI_IDX_2)) >> (PTE_PI_IDX_2 - 2)) | \
+ ((pte & BIT(PTE_PI_IDX_1)) >> (PTE_PI_IDX_1 - 1)) | \
+ ((pte & BIT(PTE_PI_IDX_0)) >> (PTE_PI_IDX_0 - 0)))
+
+/*
+ * Page types used via Permission Indirection Extension (PIE). PIE uses
+ * the USER, DBM, PXN and UXN bits to to generate an index which is used
+ * to look up the actual permission in PIR_ELx and PIRE0_EL1. We define
+ * combinations we use on non-PIE systems with the same encoding, for
+ * convenience these are listed here as comments as are the unallocated
+ * encodings.
+ */
+
+/* 0: PAGE_DEFAULT */
+/* 1: PTE_USER */
+/* 2: PTE_WRITE */
+/* 3: PTE_WRITE | PTE_USER */
+/* 4: PAGE_EXECONLY PTE_PXN */
+/* 5: PAGE_READONLY_EXEC PTE_PXN | PTE_USER */
+/* 6: PTE_PXN | PTE_WRITE */
+/* 7: PAGE_SHARED_EXEC PTE_PXN | PTE_WRITE | PTE_USER */
+/* 8: PAGE_KERNEL_ROX PTE_UXN */
+/* 9: PAGE_GCS_RO PTE_UXN | PTE_USER */
+/* a: PAGE_KERNEL_EXEC PTE_UXN | PTE_WRITE */
+/* b: PAGE_GCS PTE_UXN | PTE_WRITE | PTE_USER */
+/* c: PAGE_KERNEL_RO PTE_UXN | PTE_PXN */
+/* d: PAGE_READONLY PTE_UXN | PTE_PXN | PTE_USER */
+/* e: PAGE_KERNEL PTE_UXN | PTE_PXN | PTE_WRITE */
+/* f: PAGE_SHARED PTE_UXN | PTE_PXN | PTE_WRITE | PTE_USER */
+
+#define _PAGE_GCS (_PAGE_DEFAULT | PTE_NG | PTE_UXN | PTE_WRITE | PTE_USER)
+#define _PAGE_GCS_RO (_PAGE_DEFAULT | PTE_NG | PTE_UXN | PTE_USER)
+
+#define PAGE_GCS __pgprot(_PAGE_GCS)
+#define PAGE_GCS_RO __pgprot(_PAGE_GCS_RO)
+
+#define PIE_E0 ( \
+ PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_GCS), PIE_GCS) | \
+ PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_GCS_RO), PIE_R) | \
+ PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_EXECONLY), PIE_X_O) | \
+ PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_READONLY_EXEC), PIE_RX_O) | \
+ PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_SHARED_EXEC), PIE_RWX_O) | \
+ PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_READONLY), PIE_R_O) | \
+ PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_SHARED), PIE_RW_O))
+
+#define PIE_E1 ( \
+ PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_GCS), PIE_NONE_O) | \
+ PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_GCS_RO), PIE_NONE_O) | \
+ PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_EXECONLY), PIE_NONE_O) | \
+ PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_READONLY_EXEC), PIE_R) | \
+ PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_SHARED_EXEC), PIE_RW) | \
+ PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_READONLY), PIE_R) | \
+ PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_SHARED), PIE_RW) | \
+ PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_KERNEL_ROX), PIE_RX) | \
+ PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_KERNEL_EXEC), PIE_RWX) | \
+ PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_KERNEL_RO), PIE_R) | \
+ PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_KERNEL), PIE_RW))
+
+#endif /* __ASM_PGTABLE_PROT_H */
diff --git a/arch/arm64/include/asm/pgtable-types.h b/arch/arm64/include/asm/pgtable-types.h
new file mode 100644
index 000000000000..265e8301d7ba
--- /dev/null
+++ b/arch/arm64/include/asm/pgtable-types.h
@@ -0,0 +1,69 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Page table types definitions.
+ *
+ * Copyright (C) 2014 ARM Ltd.
+ * Author: Catalin Marinas <catalin.marinas@arm.com>
+ */
+
+#ifndef __ASM_PGTABLE_TYPES_H
+#define __ASM_PGTABLE_TYPES_H
+
+#include <asm/types.h>
+
+/*
+ * Page Table Descriptor
+ *
+ * Generic page table descriptor format from which
+ * all level specific descriptors can be derived.
+ */
+typedef u64 ptdesc_t;
+
+typedef ptdesc_t pteval_t;
+typedef ptdesc_t pmdval_t;
+typedef ptdesc_t pudval_t;
+typedef ptdesc_t p4dval_t;
+typedef ptdesc_t pgdval_t;
+
+/*
+ * These are used to make use of C type-checking..
+ */
+typedef struct { pteval_t pte; } pte_t;
+#define pte_val(x) ((x).pte)
+#define __pte(x) ((pte_t) { (x) } )
+
+#if CONFIG_PGTABLE_LEVELS > 2
+typedef struct { pmdval_t pmd; } pmd_t;
+#define pmd_val(x) ((x).pmd)
+#define __pmd(x) ((pmd_t) { (x) } )
+#endif
+
+#if CONFIG_PGTABLE_LEVELS > 3
+typedef struct { pudval_t pud; } pud_t;
+#define pud_val(x) ((x).pud)
+#define __pud(x) ((pud_t) { (x) } )
+#endif
+
+#if CONFIG_PGTABLE_LEVELS > 4
+typedef struct { p4dval_t p4d; } p4d_t;
+#define p4d_val(x) ((x).p4d)
+#define __p4d(x) ((p4d_t) { (x) } )
+#endif
+
+typedef struct { pgdval_t pgd; } pgd_t;
+#define pgd_val(x) ((x).pgd)
+#define __pgd(x) ((pgd_t) { (x) } )
+
+typedef struct { ptdesc_t pgprot; } pgprot_t;
+#define pgprot_val(x) ((x).pgprot)
+#define __pgprot(x) ((pgprot_t) { (x) } )
+
+#if CONFIG_PGTABLE_LEVELS == 2
+#include <asm-generic/pgtable-nopmd.h>
+#elif CONFIG_PGTABLE_LEVELS == 3
+#include <asm-generic/pgtable-nopud.h>
+#elif CONFIG_PGTABLE_LEVELS == 4
+#include <asm-generic/pgtable-nop4d.h>
+#endif
+
+#endif /* __ASM_PGTABLE_TYPES_H */
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index f0bebc5e22cd..64d5f1d9cce9 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -1,282 +1,877 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (C) 2012 ARM Ltd.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __ASM_PGTABLE_H
#define __ASM_PGTABLE_H
+#include <asm/bug.h>
#include <asm/proc-fns.h>
#include <asm/memory.h>
+#include <asm/mte.h>
#include <asm/pgtable-hwdef.h>
+#include <asm/pgtable-prot.h>
+#include <asm/tlbflush.h>
/*
- * Software defined PTE bits definition.
+ * VMALLOC range.
+ *
+ * VMALLOC_START: beginning of the kernel vmalloc space
+ * VMALLOC_END: extends to the available space below vmemmap
*/
-#define PTE_VALID (_AT(pteval_t, 1) << 0)
-#define PTE_PROT_NONE (_AT(pteval_t, 1) << 2) /* only when !PTE_VALID */
-#define PTE_FILE (_AT(pteval_t, 1) << 3) /* only when !pte_present() */
-#define PTE_DIRTY (_AT(pteval_t, 1) << 55)
-#define PTE_SPECIAL (_AT(pteval_t, 1) << 56)
+#define VMALLOC_START (MODULES_END)
+#if VA_BITS == VA_BITS_MIN
+#define VMALLOC_END (VMEMMAP_START - SZ_8M)
+#else
+#define VMEMMAP_UNUSED_NPAGES ((_PAGE_OFFSET(vabits_actual) - PAGE_OFFSET) >> PAGE_SHIFT)
+#define VMALLOC_END (VMEMMAP_START + VMEMMAP_UNUSED_NPAGES * sizeof(struct page) - SZ_8M)
+#endif
-/*
- * VMALLOC and SPARSEMEM_VMEMMAP ranges.
- */
-#define VMALLOC_START UL(0xffffff8000000000)
-#define VMALLOC_END (PAGE_OFFSET - UL(0x400000000) - SZ_64K)
+#define vmemmap ((struct page *)VMEMMAP_START - (memstart_addr >> PAGE_SHIFT))
-#define vmemmap ((struct page *)(VMALLOC_END + SZ_64K))
+#ifndef __ASSEMBLER__
-#define FIRST_USER_ADDRESS 0
+#include <asm/cmpxchg.h>
+#include <asm/fixmap.h>
+#include <asm/por.h>
+#include <linux/mmdebug.h>
+#include <linux/mm_types.h>
+#include <linux/sched.h>
+#include <linux/page_table_check.h>
-#ifndef __ASSEMBLY__
-extern void __pte_error(const char *file, int line, unsigned long val);
-extern void __pmd_error(const char *file, int line, unsigned long val);
-extern void __pgd_error(const char *file, int line, unsigned long val);
+static inline void emit_pte_barriers(void)
+{
+ /*
+ * These barriers are emitted under certain conditions after a pte entry
+ * was modified (see e.g. __set_pte_complete()). The dsb makes the store
+ * visible to the table walker. The isb ensures that any previous
+ * speculative "invalid translation" marker that is in the CPU's
+ * pipeline gets cleared, so that any access to that address after
+ * setting the pte to valid won't cause a spurious fault. If the thread
+ * gets preempted after storing to the pgtable but before emitting these
+ * barriers, __switch_to() emits a dsb which ensure the walker gets to
+ * see the store. There is no guarantee of an isb being issued though.
+ * This is safe because it will still get issued (albeit on a
+ * potentially different CPU) when the thread starts running again,
+ * before any access to the address.
+ */
+ dsb(ishst);
+ isb();
+}
-#define pte_ERROR(pte) __pte_error(__FILE__, __LINE__, pte_val(pte))
-#ifndef CONFIG_ARM64_64K_PAGES
-#define pmd_ERROR(pmd) __pmd_error(__FILE__, __LINE__, pmd_val(pmd))
-#endif
-#define pgd_ERROR(pgd) __pgd_error(__FILE__, __LINE__, pgd_val(pgd))
+static inline void queue_pte_barriers(void)
+{
+ unsigned long flags;
+
+ if (in_interrupt()) {
+ emit_pte_barriers();
+ return;
+ }
+
+ flags = read_thread_flags();
+
+ if (flags & BIT(TIF_LAZY_MMU)) {
+ /* Avoid the atomic op if already set. */
+ if (!(flags & BIT(TIF_LAZY_MMU_PENDING)))
+ set_thread_flag(TIF_LAZY_MMU_PENDING);
+ } else {
+ emit_pte_barriers();
+ }
+}
+
+#define __HAVE_ARCH_ENTER_LAZY_MMU_MODE
+static inline void arch_enter_lazy_mmu_mode(void)
+{
+ /*
+ * lazy_mmu_mode is not supposed to permit nesting. But in practice this
+ * does happen with CONFIG_DEBUG_PAGEALLOC, where a page allocation
+ * inside a lazy_mmu_mode section (such as zap_pte_range()) will change
+ * permissions on the linear map with apply_to_page_range(), which
+ * re-enters lazy_mmu_mode. So we tolerate nesting in our
+ * implementation. The first call to arch_leave_lazy_mmu_mode() will
+ * flush and clear the flag such that the remainder of the work in the
+ * outer nest behaves as if outside of lazy mmu mode. This is safe and
+ * keeps tracking simple.
+ */
+
+ if (in_interrupt())
+ return;
+
+ set_thread_flag(TIF_LAZY_MMU);
+}
+
+static inline void arch_flush_lazy_mmu_mode(void)
+{
+ if (in_interrupt())
+ return;
+
+ if (test_and_clear_thread_flag(TIF_LAZY_MMU_PENDING))
+ emit_pte_barriers();
+}
+
+static inline void arch_leave_lazy_mmu_mode(void)
+{
+ if (in_interrupt())
+ return;
+
+ arch_flush_lazy_mmu_mode();
+ clear_thread_flag(TIF_LAZY_MMU);
+}
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#define __HAVE_ARCH_FLUSH_PMD_TLB_RANGE
+
+/* Set stride and tlb_level in flush_*_tlb_range */
+#define flush_pmd_tlb_range(vma, addr, end) \
+ __flush_tlb_range(vma, addr, end, PMD_SIZE, false, 2)
+#define flush_pud_tlb_range(vma, addr, end) \
+ __flush_tlb_range(vma, addr, end, PUD_SIZE, false, 1)
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
/*
- * The pgprot_* and protection_map entries will be fixed up at runtime to
- * include the cachable and bufferable bits based on memory policy, as well as
- * any architecture dependent bits like global/ASID and SMP shared mapping
- * bits.
+ * We use local TLB invalidation instruction when reusing page in
+ * write protection fault handler to avoid TLBI broadcast in the hot
+ * path. This will cause spurious page faults if stale read-only TLB
+ * entries exist.
*/
-#define _PAGE_DEFAULT PTE_TYPE_PAGE | PTE_AF
-
-extern pgprot_t pgprot_default;
+#define flush_tlb_fix_spurious_fault(vma, address, ptep) \
+ local_flush_tlb_page_nonotify(vma, address)
-#define __pgprot_modify(prot,mask,bits) \
- __pgprot((pgprot_val(prot) & ~(mask)) | (bits))
+#define flush_tlb_fix_spurious_fault_pmd(vma, address, pmdp) \
+ local_flush_tlb_page_nonotify(vma, address)
-#define _MOD_PROT(p, b) __pgprot_modify(p, 0, b)
-
-#define PAGE_NONE __pgprot_modify(pgprot_default, PTE_TYPE_MASK, PTE_PROT_NONE | PTE_RDONLY | PTE_PXN | PTE_UXN)
-#define PAGE_SHARED _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN | PTE_UXN)
-#define PAGE_SHARED_EXEC _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN)
-#define PAGE_COPY _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_RDONLY)
-#define PAGE_COPY_EXEC _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN | PTE_RDONLY)
-#define PAGE_READONLY _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_RDONLY)
-#define PAGE_READONLY_EXEC _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN | PTE_RDONLY)
-#define PAGE_KERNEL _MOD_PROT(pgprot_default, PTE_PXN | PTE_UXN | PTE_DIRTY)
-#define PAGE_KERNEL_EXEC _MOD_PROT(pgprot_default, PTE_UXN | PTE_DIRTY)
-
-#define PAGE_HYP _MOD_PROT(pgprot_default, PTE_HYP)
-#define PAGE_HYP_DEVICE __pgprot(PROT_DEVICE_nGnRE | PTE_HYP)
-
-#define PAGE_S2 __pgprot_modify(pgprot_default, PTE_S2_MEMATTR_MASK, PTE_S2_MEMATTR(MT_S2_NORMAL) | PTE_S2_RDONLY)
-#define PAGE_S2_DEVICE __pgprot(PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_DEVICE_nGnRE) | PTE_S2_RDWR | PTE_UXN)
-
-#define __PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_TYPE_MASK) | PTE_PROT_NONE | PTE_RDONLY | PTE_PXN | PTE_UXN)
-#define __PAGE_SHARED __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN)
-#define __PAGE_SHARED_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN)
-#define __PAGE_COPY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_RDONLY)
-#define __PAGE_COPY_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_RDONLY)
-#define __PAGE_READONLY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_RDONLY)
-#define __PAGE_READONLY_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_RDONLY)
-
-#endif /* __ASSEMBLY__ */
-
-#define __P000 __PAGE_NONE
-#define __P001 __PAGE_READONLY
-#define __P010 __PAGE_COPY
-#define __P011 __PAGE_COPY
-#define __P100 __PAGE_READONLY_EXEC
-#define __P101 __PAGE_READONLY_EXEC
-#define __P110 __PAGE_COPY_EXEC
-#define __P111 __PAGE_COPY_EXEC
-
-#define __S000 __PAGE_NONE
-#define __S001 __PAGE_READONLY
-#define __S010 __PAGE_SHARED
-#define __S011 __PAGE_SHARED
-#define __S100 __PAGE_READONLY_EXEC
-#define __S101 __PAGE_READONLY_EXEC
-#define __S110 __PAGE_SHARED_EXEC
-#define __S111 __PAGE_SHARED_EXEC
-
-#ifndef __ASSEMBLY__
/*
* ZERO_PAGE is a global shared page that is always zero: used
* for zero-mapped memory areas etc..
*/
-extern struct page *empty_zero_page;
-#define ZERO_PAGE(vaddr) (empty_zero_page)
+extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
+#define ZERO_PAGE(vaddr) phys_to_page(__pa_symbol(empty_zero_page))
-#define pte_pfn(pte) ((pte_val(pte) & PHYS_MASK) >> PAGE_SHIFT)
+#define pte_ERROR(e) \
+ pr_err("%s:%d: bad pte %016llx.\n", __FILE__, __LINE__, pte_val(e))
-#define pfn_pte(pfn,prot) (__pte(((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot)))
+#ifdef CONFIG_ARM64_PA_BITS_52
+static inline phys_addr_t __pte_to_phys(pte_t pte)
+{
+ pte_val(pte) &= ~PTE_MAYBE_SHARED;
+ return (pte_val(pte) & PTE_ADDR_LOW) |
+ ((pte_val(pte) & PTE_ADDR_HIGH) << PTE_ADDR_HIGH_SHIFT);
+}
+static inline pteval_t __phys_to_pte_val(phys_addr_t phys)
+{
+ return (phys | (phys >> PTE_ADDR_HIGH_SHIFT)) & PHYS_TO_PTE_ADDR_MASK;
+}
+#else
+static inline phys_addr_t __pte_to_phys(pte_t pte)
+{
+ return pte_val(pte) & PTE_ADDR_LOW;
+}
+
+static inline pteval_t __phys_to_pte_val(phys_addr_t phys)
+{
+ return phys;
+}
+#endif
+
+#define pte_pfn(pte) (__pte_to_phys(pte) >> PAGE_SHIFT)
+#define pfn_pte(pfn,prot) \
+ __pte(__phys_to_pte_val((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot))
#define pte_none(pte) (!pte_val(pte))
-#define pte_clear(mm,addr,ptep) set_pte(ptep, __pte(0))
+#define __pte_clear(mm, addr, ptep) \
+ __set_pte(ptep, __pte(0))
#define pte_page(pte) (pfn_to_page(pte_pfn(pte)))
-#define pte_offset_kernel(dir,addr) (pmd_page_vaddr(*(dir)) + pte_index(addr))
-
-#define pte_offset_map(dir,addr) pte_offset_kernel((dir), (addr))
-#define pte_offset_map_nested(dir,addr) pte_offset_kernel((dir), (addr))
-#define pte_unmap(pte) do { } while (0)
-#define pte_unmap_nested(pte) do { } while (0)
/*
* The following only work if pte_present(). Undefined behaviour otherwise.
*/
-#define pte_present(pte) (pte_val(pte) & (PTE_VALID | PTE_PROT_NONE))
-#define pte_dirty(pte) (pte_val(pte) & PTE_DIRTY)
-#define pte_young(pte) (pte_val(pte) & PTE_AF)
-#define pte_special(pte) (pte_val(pte) & PTE_SPECIAL)
-#define pte_write(pte) (!(pte_val(pte) & PTE_RDONLY))
-#define pte_exec(pte) (!(pte_val(pte) & PTE_UXN))
-
-#define pte_valid_user(pte) \
- ((pte_val(pte) & (PTE_VALID | PTE_USER)) == (PTE_VALID | PTE_USER))
-
-#define PTE_BIT_FUNC(fn,op) \
-static inline pte_t pte_##fn(pte_t pte) { pte_val(pte) op; return pte; }
-
-PTE_BIT_FUNC(wrprotect, |= PTE_RDONLY);
-PTE_BIT_FUNC(mkwrite, &= ~PTE_RDONLY);
-PTE_BIT_FUNC(mkclean, &= ~PTE_DIRTY);
-PTE_BIT_FUNC(mkdirty, |= PTE_DIRTY);
-PTE_BIT_FUNC(mkold, &= ~PTE_AF);
-PTE_BIT_FUNC(mkyoung, |= PTE_AF);
-PTE_BIT_FUNC(mkspecial, |= PTE_SPECIAL);
+#define pte_present(pte) (pte_valid(pte) || pte_present_invalid(pte))
+#define pte_young(pte) (!!(pte_val(pte) & PTE_AF))
+#define pte_special(pte) (!!(pte_val(pte) & PTE_SPECIAL))
+#define pte_write(pte) (!!(pte_val(pte) & PTE_WRITE))
+#define pte_rdonly(pte) (!!(pte_val(pte) & PTE_RDONLY))
+#define pte_user(pte) (!!(pte_val(pte) & PTE_USER))
+#define pte_user_exec(pte) (!(pte_val(pte) & PTE_UXN))
+#define pte_cont(pte) (!!(pte_val(pte) & PTE_CONT))
+#define pte_tagged(pte) ((pte_val(pte) & PTE_ATTRINDX_MASK) == \
+ PTE_ATTRINDX(MT_NORMAL_TAGGED))
+
+#define pte_cont_addr_end(addr, end) \
+({ unsigned long __boundary = ((addr) + CONT_PTE_SIZE) & CONT_PTE_MASK; \
+ (__boundary - 1 < (end) - 1) ? __boundary : (end); \
+})
+
+#define pmd_cont_addr_end(addr, end) \
+({ unsigned long __boundary = ((addr) + CONT_PMD_SIZE) & CONT_PMD_MASK; \
+ (__boundary - 1 < (end) - 1) ? __boundary : (end); \
+})
+
+#define pte_hw_dirty(pte) (pte_write(pte) && !pte_rdonly(pte))
+#define pte_sw_dirty(pte) (!!(pte_val(pte) & PTE_DIRTY))
+#define pte_dirty(pte) (pte_sw_dirty(pte) || pte_hw_dirty(pte))
+
+#define pte_valid(pte) (!!(pte_val(pte) & PTE_VALID))
+#define pte_present_invalid(pte) \
+ ((pte_val(pte) & (PTE_VALID | PTE_PRESENT_INVALID)) == PTE_PRESENT_INVALID)
+/*
+ * Execute-only user mappings do not have the PTE_USER bit set. All valid
+ * kernel mappings have the PTE_UXN bit set.
+ */
+#define pte_valid_not_user(pte) \
+ ((pte_val(pte) & (PTE_VALID | PTE_USER | PTE_UXN)) == (PTE_VALID | PTE_UXN))
+/*
+ * Returns true if the pte is valid and has the contiguous bit set.
+ */
+#define pte_valid_cont(pte) (pte_valid(pte) && pte_cont(pte))
+/*
+ * Could the pte be present in the TLB? We must check mm_tlb_flush_pending
+ * so that we don't erroneously return false for pages that have been
+ * remapped as PROT_NONE but are yet to be flushed from the TLB.
+ * Note that we can't make any assumptions based on the state of the access
+ * flag, since __ptep_clear_flush_young() elides a DSB when invalidating the
+ * TLB.
+ */
+#define pte_accessible(mm, pte) \
+ (mm_tlb_flush_pending(mm) ? pte_present(pte) : pte_valid(pte))
-static inline void set_pte(pte_t *ptep, pte_t pte)
+static inline bool por_el0_allows_pkey(u8 pkey, bool write, bool execute)
{
- *ptep = pte;
+ u64 por;
+
+ if (!system_supports_poe())
+ return true;
+
+ por = read_sysreg_s(SYS_POR_EL0);
+
+ if (write)
+ return por_elx_allows_write(por, pkey);
+
+ if (execute)
+ return por_elx_allows_exec(por, pkey);
+
+ return por_elx_allows_read(por, pkey);
}
-extern void __sync_icache_dcache(pte_t pteval, unsigned long addr);
+/*
+ * p??_access_permitted() is true for valid user mappings (PTE_USER
+ * bit set, subject to the write permission check). For execute-only
+ * mappings, like PROT_EXEC with EPAN (both PTE_USER and PTE_UXN bits
+ * not set) must return false. PROT_NONE mappings do not have the
+ * PTE_VALID bit set.
+ */
+#define pte_access_permitted_no_overlay(pte, write) \
+ (((pte_val(pte) & (PTE_VALID | PTE_USER)) == (PTE_VALID | PTE_USER)) && (!(write) || pte_write(pte)))
+#define pte_access_permitted(pte, write) \
+ (pte_access_permitted_no_overlay(pte, write) && \
+ por_el0_allows_pkey(FIELD_GET(PTE_PO_IDX_MASK, pte_val(pte)), write, false))
+#define pmd_access_permitted(pmd, write) \
+ (pte_access_permitted(pmd_pte(pmd), (write)))
+#define pud_access_permitted(pud, write) \
+ (pte_access_permitted(pud_pte(pud), (write)))
+
+static inline pte_t clear_pte_bit(pte_t pte, pgprot_t prot)
+{
+ pte_val(pte) &= ~pgprot_val(prot);
+ return pte;
+}
-static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
- pte_t *ptep, pte_t pte)
+static inline pte_t set_pte_bit(pte_t pte, pgprot_t prot)
{
- if (pte_valid_user(pte)) {
- if (pte_exec(pte))
- __sync_icache_dcache(pte, addr);
- if (!pte_dirty(pte))
- pte = pte_wrprotect(pte);
- }
+ pte_val(pte) |= pgprot_val(prot);
+ return pte;
+}
- set_pte(ptep, pte);
+static inline pmd_t clear_pmd_bit(pmd_t pmd, pgprot_t prot)
+{
+ pmd_val(pmd) &= ~pgprot_val(prot);
+ return pmd;
}
+static inline pmd_t set_pmd_bit(pmd_t pmd, pgprot_t prot)
+{
+ pmd_val(pmd) |= pgprot_val(prot);
+ return pmd;
+}
+
+static inline pte_t pte_mkwrite_novma(pte_t pte)
+{
+ pte = set_pte_bit(pte, __pgprot(PTE_WRITE));
+ if (pte_sw_dirty(pte))
+ pte = clear_pte_bit(pte, __pgprot(PTE_RDONLY));
+ return pte;
+}
+
+static inline pte_t pte_mkclean(pte_t pte)
+{
+ pte = clear_pte_bit(pte, __pgprot(PTE_DIRTY));
+ pte = set_pte_bit(pte, __pgprot(PTE_RDONLY));
+
+ return pte;
+}
+
+static inline pte_t pte_mkdirty(pte_t pte)
+{
+ pte = set_pte_bit(pte, __pgprot(PTE_DIRTY));
+
+ if (pte_write(pte))
+ pte = clear_pte_bit(pte, __pgprot(PTE_RDONLY));
+
+ return pte;
+}
+
+static inline pte_t pte_wrprotect(pte_t pte)
+{
+ /*
+ * If hardware-dirty (PTE_WRITE/DBM bit set and PTE_RDONLY
+ * clear), set the PTE_DIRTY bit.
+ */
+ if (pte_hw_dirty(pte))
+ pte = set_pte_bit(pte, __pgprot(PTE_DIRTY));
+
+ pte = clear_pte_bit(pte, __pgprot(PTE_WRITE));
+ pte = set_pte_bit(pte, __pgprot(PTE_RDONLY));
+ return pte;
+}
+
+static inline pte_t pte_mkold(pte_t pte)
+{
+ return clear_pte_bit(pte, __pgprot(PTE_AF));
+}
+
+static inline pte_t pte_mkyoung(pte_t pte)
+{
+ return set_pte_bit(pte, __pgprot(PTE_AF));
+}
+
+static inline pte_t pte_mkspecial(pte_t pte)
+{
+ return set_pte_bit(pte, __pgprot(PTE_SPECIAL));
+}
+
+static inline pte_t pte_mkcont(pte_t pte)
+{
+ return set_pte_bit(pte, __pgprot(PTE_CONT));
+}
+
+static inline pte_t pte_mknoncont(pte_t pte)
+{
+ return clear_pte_bit(pte, __pgprot(PTE_CONT));
+}
+
+static inline pte_t pte_mkvalid(pte_t pte)
+{
+ return set_pte_bit(pte, __pgprot(PTE_VALID));
+}
+
+static inline pte_t pte_mkinvalid(pte_t pte)
+{
+ pte = set_pte_bit(pte, __pgprot(PTE_PRESENT_INVALID));
+ pte = clear_pte_bit(pte, __pgprot(PTE_VALID));
+ return pte;
+}
+
+static inline pmd_t pmd_mkcont(pmd_t pmd)
+{
+ return __pmd(pmd_val(pmd) | PMD_SECT_CONT);
+}
+
+static inline pmd_t pmd_mknoncont(pmd_t pmd)
+{
+ return __pmd(pmd_val(pmd) & ~PMD_SECT_CONT);
+}
+
+#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP
+static inline int pte_uffd_wp(pte_t pte)
+{
+ return !!(pte_val(pte) & PTE_UFFD_WP);
+}
+
+static inline pte_t pte_mkuffd_wp(pte_t pte)
+{
+ return pte_wrprotect(set_pte_bit(pte, __pgprot(PTE_UFFD_WP)));
+}
+
+static inline pte_t pte_clear_uffd_wp(pte_t pte)
+{
+ return clear_pte_bit(pte, __pgprot(PTE_UFFD_WP));
+}
+#endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */
+
+static inline void __set_pte_nosync(pte_t *ptep, pte_t pte)
+{
+ WRITE_ONCE(*ptep, pte);
+}
+
+static inline void __set_pte_complete(pte_t pte)
+{
+ /*
+ * Only if the new pte is valid and kernel, otherwise TLB maintenance
+ * has the necessary barriers.
+ */
+ if (pte_valid_not_user(pte))
+ queue_pte_barriers();
+}
+
+static inline void __set_pte(pte_t *ptep, pte_t pte)
+{
+ __set_pte_nosync(ptep, pte);
+ __set_pte_complete(pte);
+}
+
+static inline pte_t __ptep_get(pte_t *ptep)
+{
+ return READ_ONCE(*ptep);
+}
+
+extern void __sync_icache_dcache(pte_t pteval);
+bool pgattr_change_is_safe(pteval_t old, pteval_t new);
+
/*
- * Huge pte definitions.
+ * PTE bits configuration in the presence of hardware Dirty Bit Management
+ * (PTE_WRITE == PTE_DBM):
+ *
+ * Dirty Writable | PTE_RDONLY PTE_WRITE PTE_DIRTY (sw)
+ * 0 0 | 1 0 0
+ * 0 1 | 1 1 0
+ * 1 0 | 1 0 1
+ * 1 1 | 0 1 x
+ *
+ * When hardware DBM is not present, the software PTE_DIRTY bit is updated via
+ * the page fault mechanism. Checking the dirty status of a pte becomes:
+ *
+ * PTE_DIRTY || (PTE_WRITE && !PTE_RDONLY)
*/
-#define pte_huge(pte) (!(pte_val(pte) & PTE_TABLE_BIT))
-#define pte_mkhuge(pte) (__pte(pte_val(pte) & ~PTE_TABLE_BIT))
+
+static inline void __check_safe_pte_update(struct mm_struct *mm, pte_t *ptep,
+ pte_t pte)
+{
+ pte_t old_pte;
+
+ if (!IS_ENABLED(CONFIG_DEBUG_VM))
+ return;
+
+ old_pte = __ptep_get(ptep);
+
+ if (!pte_valid(old_pte) || !pte_valid(pte))
+ return;
+ if (mm != current->active_mm && atomic_read(&mm->mm_users) <= 1)
+ return;
+
+ /*
+ * Check for potential race with hardware updates of the pte
+ * (__ptep_set_access_flags safely changes valid ptes without going
+ * through an invalid entry).
+ */
+ VM_WARN_ONCE(!pte_young(pte),
+ "%s: racy access flag clearing: 0x%016llx -> 0x%016llx",
+ __func__, pte_val(old_pte), pte_val(pte));
+ VM_WARN_ONCE(pte_write(old_pte) && !pte_dirty(pte),
+ "%s: racy dirty state clearing: 0x%016llx -> 0x%016llx",
+ __func__, pte_val(old_pte), pte_val(pte));
+ VM_WARN_ONCE(!pgattr_change_is_safe(pte_val(old_pte), pte_val(pte)),
+ "%s: unsafe attribute change: 0x%016llx -> 0x%016llx",
+ __func__, pte_val(old_pte), pte_val(pte));
+}
+
+static inline void __sync_cache_and_tags(pte_t pte, unsigned int nr_pages)
+{
+ if (pte_present(pte) && pte_user_exec(pte) && !pte_special(pte))
+ __sync_icache_dcache(pte);
+
+ /*
+ * If the PTE would provide user space access to the tags associated
+ * with it then ensure that the MTE tags are synchronised. Although
+ * pte_access_permitted_no_overlay() returns false for exec only
+ * mappings, they don't expose tags (instruction fetches don't check
+ * tags).
+ */
+ if (system_supports_mte() && pte_access_permitted_no_overlay(pte, false) &&
+ !pte_special(pte) && pte_tagged(pte))
+ mte_sync_tags(pte, nr_pages);
+}
+
+/*
+ * Select all bits except the pfn
+ */
+#define pte_pgprot pte_pgprot
+static inline pgprot_t pte_pgprot(pte_t pte)
+{
+ unsigned long pfn = pte_pfn(pte);
+
+ return __pgprot(pte_val(pfn_pte(pfn, __pgprot(0))) ^ pte_val(pte));
+}
+
+#define pte_advance_pfn pte_advance_pfn
+static inline pte_t pte_advance_pfn(pte_t pte, unsigned long nr)
+{
+ return pfn_pte(pte_pfn(pte) + nr, pte_pgprot(pte));
+}
/*
* Hugetlb definitions.
*/
-#define HUGE_MAX_HSTATE 2
+#define HUGE_MAX_HSTATE 4
#define HPAGE_SHIFT PMD_SHIFT
#define HPAGE_SIZE (_AC(1, UL) << HPAGE_SHIFT)
#define HPAGE_MASK (~(HPAGE_SIZE - 1))
#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT)
-#define __HAVE_ARCH_PTE_SPECIAL
+static inline pte_t pgd_pte(pgd_t pgd)
+{
+ return __pte(pgd_val(pgd));
+}
-/*
- * Software PMD bits for THP
- */
+static inline pte_t p4d_pte(p4d_t p4d)
+{
+ return __pte(p4d_val(p4d));
+}
+
+static inline pte_t pud_pte(pud_t pud)
+{
+ return __pte(pud_val(pud));
+}
+
+static inline pud_t pte_pud(pte_t pte)
+{
+ return __pud(pte_val(pte));
+}
+
+static inline pmd_t pud_pmd(pud_t pud)
+{
+ return __pmd(pud_val(pud));
+}
+
+static inline pte_t pmd_pte(pmd_t pmd)
+{
+ return __pte(pmd_val(pmd));
+}
+
+static inline pmd_t pte_pmd(pte_t pte)
+{
+ return __pmd(pte_val(pte));
+}
+
+static inline pgprot_t mk_pud_sect_prot(pgprot_t prot)
+{
+ return __pgprot((pgprot_val(prot) & ~PUD_TYPE_MASK) | PUD_TYPE_SECT);
+}
+
+static inline pgprot_t mk_pmd_sect_prot(pgprot_t prot)
+{
+ return __pgprot((pgprot_val(prot) & ~PMD_TYPE_MASK) | PMD_TYPE_SECT);
+}
-#define PMD_SECT_DIRTY (_AT(pmdval_t, 1) << 55)
-#define PMD_SECT_SPLITTING (_AT(pmdval_t, 1) << 57)
+static inline pte_t pte_swp_mkexclusive(pte_t pte)
+{
+ return set_pte_bit(pte, __pgprot(PTE_SWP_EXCLUSIVE));
+}
+
+static inline bool pte_swp_exclusive(pte_t pte)
+{
+ return pte_val(pte) & PTE_SWP_EXCLUSIVE;
+}
+static inline pte_t pte_swp_clear_exclusive(pte_t pte)
+{
+ return clear_pte_bit(pte, __pgprot(PTE_SWP_EXCLUSIVE));
+}
+
+#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP
+static inline pte_t pte_swp_mkuffd_wp(pte_t pte)
+{
+ return set_pte_bit(pte, __pgprot(PTE_SWP_UFFD_WP));
+}
+
+static inline int pte_swp_uffd_wp(pte_t pte)
+{
+ return !!(pte_val(pte) & PTE_SWP_UFFD_WP);
+}
+
+static inline pte_t pte_swp_clear_uffd_wp(pte_t pte)
+{
+ return clear_pte_bit(pte, __pgprot(PTE_SWP_UFFD_WP));
+}
+#endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */
+
+#ifdef CONFIG_NUMA_BALANCING
/*
- * THP definitions.
+ * See the comment in include/linux/pgtable.h
*/
-#define pmd_young(pmd) (pmd_val(pmd) & PMD_SECT_AF)
+static inline int pte_protnone(pte_t pte)
+{
+ /*
+ * pte_present_invalid() tells us that the pte is invalid from HW
+ * perspective but present from SW perspective, so the fields are to be
+ * interpreted as per the HW layout. The second 2 checks are the unique
+ * encoding that we use for PROT_NONE. It is insufficient to only use
+ * the first check because we share the same encoding scheme with pmds
+ * which support pmd_mkinvalid(), so can be present-invalid without
+ * being PROT_NONE.
+ */
+ return pte_present_invalid(pte) && !pte_user(pte) && !pte_user_exec(pte);
+}
-#define __HAVE_ARCH_PMD_WRITE
-#define pmd_write(pmd) (!(pmd_val(pmd) & PMD_SECT_RDONLY))
+static inline int pmd_protnone(pmd_t pmd)
+{
+ return pte_protnone(pmd_pte(pmd));
+}
+#endif
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-#define pmd_trans_huge(pmd) (pmd_val(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT))
-#define pmd_trans_splitting(pmd) (pmd_val(pmd) & PMD_SECT_SPLITTING)
+#define pmd_present(pmd) pte_present(pmd_pte(pmd))
+#define pmd_dirty(pmd) pte_dirty(pmd_pte(pmd))
+#define pmd_young(pmd) pte_young(pmd_pte(pmd))
+#define pmd_valid(pmd) pte_valid(pmd_pte(pmd))
+#define pmd_user(pmd) pte_user(pmd_pte(pmd))
+#define pmd_user_exec(pmd) pte_user_exec(pmd_pte(pmd))
+#define pmd_cont(pmd) pte_cont(pmd_pte(pmd))
+#define pmd_wrprotect(pmd) pte_pmd(pte_wrprotect(pmd_pte(pmd)))
+#define pmd_mkold(pmd) pte_pmd(pte_mkold(pmd_pte(pmd)))
+#define pmd_mkwrite_novma(pmd) pte_pmd(pte_mkwrite_novma(pmd_pte(pmd)))
+#define pmd_mkclean(pmd) pte_pmd(pte_mkclean(pmd_pte(pmd)))
+#define pmd_mkdirty(pmd) pte_pmd(pte_mkdirty(pmd_pte(pmd)))
+#define pmd_mkyoung(pmd) pte_pmd(pte_mkyoung(pmd_pte(pmd)))
+#define pmd_mkinvalid(pmd) pte_pmd(pte_mkinvalid(pmd_pte(pmd)))
+#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP
+#define pmd_uffd_wp(pmd) pte_uffd_wp(pmd_pte(pmd))
+#define pmd_mkuffd_wp(pmd) pte_pmd(pte_mkuffd_wp(pmd_pte(pmd)))
+#define pmd_clear_uffd_wp(pmd) pte_pmd(pte_clear_uffd_wp(pmd_pte(pmd)))
+#define pmd_swp_uffd_wp(pmd) pte_swp_uffd_wp(pmd_pte(pmd))
+#define pmd_swp_mkuffd_wp(pmd) pte_pmd(pte_swp_mkuffd_wp(pmd_pte(pmd)))
+#define pmd_swp_clear_uffd_wp(pmd) \
+ pte_pmd(pte_swp_clear_uffd_wp(pmd_pte(pmd)))
+#endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */
+
+#define pmd_write(pmd) pte_write(pmd_pte(pmd))
+
+static inline pmd_t pmd_mkhuge(pmd_t pmd)
+{
+ /*
+ * It's possible that the pmd is present-invalid on entry
+ * and in that case it needs to remain present-invalid on
+ * exit. So ensure the VALID bit does not get modified.
+ */
+ pmdval_t mask = PMD_TYPE_MASK & ~PTE_VALID;
+ pmdval_t val = PMD_TYPE_SECT & ~PTE_VALID;
+
+ return __pmd((pmd_val(pmd) & ~mask) | val);
+}
+
+#ifdef CONFIG_ARCH_SUPPORTS_PMD_PFNMAP
+#define pmd_special(pte) (!!((pmd_val(pte) & PTE_SPECIAL)))
+static inline pmd_t pmd_mkspecial(pmd_t pmd)
+{
+ return set_pmd_bit(pmd, __pgprot(PTE_SPECIAL));
+}
#endif
-#define PMD_BIT_FUNC(fn,op) \
-static inline pmd_t pmd_##fn(pmd_t pmd) { pmd_val(pmd) op; return pmd; }
+#define __pmd_to_phys(pmd) __pte_to_phys(pmd_pte(pmd))
+#define __phys_to_pmd_val(phys) __phys_to_pte_val(phys)
+#define pmd_pfn(pmd) ((__pmd_to_phys(pmd) & PMD_MASK) >> PAGE_SHIFT)
+#define pfn_pmd(pfn,prot) __pmd(__phys_to_pmd_val((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot))
-PMD_BIT_FUNC(wrprotect, |= PMD_SECT_RDONLY);
-PMD_BIT_FUNC(mkold, &= ~PMD_SECT_AF);
-PMD_BIT_FUNC(mksplitting, |= PMD_SECT_SPLITTING);
-PMD_BIT_FUNC(mkwrite, &= ~PMD_SECT_RDONLY);
-PMD_BIT_FUNC(mkdirty, |= PMD_SECT_DIRTY);
-PMD_BIT_FUNC(mkyoung, |= PMD_SECT_AF);
-PMD_BIT_FUNC(mknotpresent, &= ~PMD_TYPE_MASK);
+#define pud_young(pud) pte_young(pud_pte(pud))
+#define pud_mkyoung(pud) pte_pud(pte_mkyoung(pud_pte(pud)))
+#define pud_write(pud) pte_write(pud_pte(pud))
-#define pmd_mkhuge(pmd) (__pmd(pmd_val(pmd) & ~PMD_TABLE_BIT))
+static inline pud_t pud_mkhuge(pud_t pud)
+{
+ /*
+ * It's possible that the pud is present-invalid on entry
+ * and in that case it needs to remain present-invalid on
+ * exit. So ensure the VALID bit does not get modified.
+ */
+ pudval_t mask = PUD_TYPE_MASK & ~PTE_VALID;
+ pudval_t val = PUD_TYPE_SECT & ~PTE_VALID;
+
+ return __pud((pud_val(pud) & ~mask) | val);
+}
-#define pmd_pfn(pmd) (((pmd_val(pmd) & PMD_MASK) & PHYS_MASK) >> PAGE_SHIFT)
-#define pfn_pmd(pfn,prot) (__pmd(((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot)))
-#define mk_pmd(page,prot) pfn_pmd(page_to_pfn(page),prot)
+#define __pud_to_phys(pud) __pte_to_phys(pud_pte(pud))
+#define __phys_to_pud_val(phys) __phys_to_pte_val(phys)
+#define pud_pfn(pud) ((__pud_to_phys(pud) & PUD_MASK) >> PAGE_SHIFT)
+#define pfn_pud(pfn,prot) __pud(__phys_to_pud_val((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot))
-#define pmd_page(pmd) pfn_to_page(__phys_to_pfn(pmd_val(pmd) & PHYS_MASK))
+#define pmd_pgprot pmd_pgprot
+static inline pgprot_t pmd_pgprot(pmd_t pmd)
+{
+ unsigned long pfn = pmd_pfn(pmd);
-static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
+ return __pgprot(pmd_val(pfn_pmd(pfn, __pgprot(0))) ^ pmd_val(pmd));
+}
+
+#define pud_pgprot pud_pgprot
+static inline pgprot_t pud_pgprot(pud_t pud)
{
- const pmdval_t mask = PMD_SECT_USER | PMD_SECT_PXN | PMD_SECT_UXN |
- PMD_SECT_RDONLY | PMD_SECT_PROT_NONE |
- PMD_SECT_VALID;
- pmd_val(pmd) = (pmd_val(pmd) & ~mask) | (pgprot_val(newprot) & mask);
- return pmd;
+ unsigned long pfn = pud_pfn(pud);
+
+ return __pgprot(pud_val(pfn_pud(pfn, __pgprot(0))) ^ pud_val(pud));
+}
+
+static inline void __set_ptes_anysz(struct mm_struct *mm, pte_t *ptep,
+ pte_t pte, unsigned int nr,
+ unsigned long pgsize)
+{
+ unsigned long stride = pgsize >> PAGE_SHIFT;
+
+ switch (pgsize) {
+ case PAGE_SIZE:
+ page_table_check_ptes_set(mm, ptep, pte, nr);
+ break;
+ case PMD_SIZE:
+ page_table_check_pmds_set(mm, (pmd_t *)ptep, pte_pmd(pte), nr);
+ break;
+#ifndef __PAGETABLE_PMD_FOLDED
+ case PUD_SIZE:
+ page_table_check_puds_set(mm, (pud_t *)ptep, pte_pud(pte), nr);
+ break;
+#endif
+ default:
+ VM_WARN_ON(1);
+ }
+
+ __sync_cache_and_tags(pte, nr * stride);
+
+ for (;;) {
+ __check_safe_pte_update(mm, ptep, pte);
+ __set_pte_nosync(ptep, pte);
+ if (--nr == 0)
+ break;
+ ptep++;
+ pte = pte_advance_pfn(pte, stride);
+ }
+
+ __set_pte_complete(pte);
}
-#define set_pmd_at(mm, addr, pmdp, pmd) set_pmd(pmdp, pmd)
+static inline void __set_ptes(struct mm_struct *mm,
+ unsigned long __always_unused addr,
+ pte_t *ptep, pte_t pte, unsigned int nr)
+{
+ __set_ptes_anysz(mm, ptep, pte, nr, PAGE_SIZE);
+}
-static inline int has_transparent_hugepage(void)
+static inline void __set_pmds(struct mm_struct *mm,
+ unsigned long __always_unused addr,
+ pmd_t *pmdp, pmd_t pmd, unsigned int nr)
{
- return 1;
+ __set_ptes_anysz(mm, (pte_t *)pmdp, pmd_pte(pmd), nr, PMD_SIZE);
}
+#define set_pmd_at(mm, addr, pmdp, pmd) __set_pmds(mm, addr, pmdp, pmd, 1)
+
+static inline void __set_puds(struct mm_struct *mm,
+ unsigned long __always_unused addr,
+ pud_t *pudp, pud_t pud, unsigned int nr)
+{
+ __set_ptes_anysz(mm, (pte_t *)pudp, pud_pte(pud), nr, PUD_SIZE);
+}
+#define set_pud_at(mm, addr, pudp, pud) __set_puds(mm, addr, pudp, pud, 1)
+
+#define __p4d_to_phys(p4d) __pte_to_phys(p4d_pte(p4d))
+#define __phys_to_p4d_val(phys) __phys_to_pte_val(phys)
+
+#define __pgd_to_phys(pgd) __pte_to_phys(pgd_pte(pgd))
+#define __phys_to_pgd_val(phys) __phys_to_pte_val(phys)
+
+#define __pgprot_modify(prot,mask,bits) \
+ __pgprot((pgprot_val(prot) & ~(mask)) | (bits))
+
+#define pgprot_nx(prot) \
+ __pgprot_modify(prot, PTE_MAYBE_GP, PTE_PXN)
+
+#define pgprot_decrypted(prot) \
+ __pgprot_modify(prot, PROT_NS_SHARED, PROT_NS_SHARED)
+#define pgprot_encrypted(prot) \
+ __pgprot_modify(prot, PROT_NS_SHARED, 0)
/*
* Mark the prot value as uncacheable and unbufferable.
*/
#define pgprot_noncached(prot) \
- __pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_DEVICE_nGnRnE))
+ __pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_DEVICE_nGnRnE) | PTE_PXN | PTE_UXN)
#define pgprot_writecombine(prot) \
- __pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_DEVICE_GRE))
+ __pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_NORMAL_NC) | PTE_PXN | PTE_UXN)
+#define pgprot_device(prot) \
+ __pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_DEVICE_nGnRE) | PTE_PXN | PTE_UXN)
+#define pgprot_tagged(prot) \
+ __pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_NORMAL_TAGGED))
+#define pgprot_mhp pgprot_tagged
+/*
+ * DMA allocations for non-coherent devices use what the Arm architecture calls
+ * "Normal non-cacheable" memory, which permits speculation, unaligned accesses
+ * and merging of writes. This is different from "Device-nGnR[nE]" memory which
+ * is intended for MMIO and thus forbids speculation, preserves access size,
+ * requires strict alignment and can also force write responses to come from the
+ * endpoint.
+ */
#define pgprot_dmacoherent(prot) \
- __pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_NORMAL_NC))
+ __pgprot_modify(prot, PTE_ATTRINDX_MASK, \
+ PTE_ATTRINDX(MT_NORMAL_NC) | PTE_PXN | PTE_UXN)
+
#define __HAVE_PHYS_MEM_ACCESS_PROT
struct file;
extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
unsigned long size, pgprot_t vma_prot);
#define pmd_none(pmd) (!pmd_val(pmd))
-#define pmd_present(pmd) (pmd_val(pmd))
-
-#define pmd_bad(pmd) (!(pmd_val(pmd) & 2))
#define pmd_table(pmd) ((pmd_val(pmd) & PMD_TYPE_MASK) == \
PMD_TYPE_TABLE)
#define pmd_sect(pmd) ((pmd_val(pmd) & PMD_TYPE_MASK) == \
PMD_TYPE_SECT)
+#define pmd_leaf(pmd) (pmd_present(pmd) && !pmd_table(pmd))
+#define pmd_bad(pmd) (!pmd_table(pmd))
+
+#define pmd_leaf_size(pmd) (pmd_cont(pmd) ? CONT_PMD_SIZE : PMD_SIZE)
+#define pte_leaf_size(pte) (pte_cont(pte) ? CONT_PTE_SIZE : PAGE_SIZE)
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+static inline int pmd_trans_huge(pmd_t pmd)
+{
+ /*
+ * If pmd is present-invalid, pmd_table() won't detect it
+ * as a table, so force the valid bit for the comparison.
+ */
+ return pmd_present(pmd) && !pmd_table(__pmd(pmd_val(pmd) | PTE_VALID));
+}
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+
+#if defined(CONFIG_ARM64_64K_PAGES) || CONFIG_PGTABLE_LEVELS < 3
+static inline bool pud_sect(pud_t pud) { return false; }
+static inline bool pud_table(pud_t pud) { return true; }
+#else
+#define pud_sect(pud) ((pud_val(pud) & PUD_TYPE_MASK) == \
+ PUD_TYPE_SECT)
+#define pud_table(pud) ((pud_val(pud) & PUD_TYPE_MASK) == \
+ PUD_TYPE_TABLE)
+#endif
+
+extern pgd_t swapper_pg_dir[];
+extern pgd_t idmap_pg_dir[];
+extern pgd_t tramp_pg_dir[];
+extern pgd_t reserved_pg_dir[];
+extern void set_swapper_pgd(pgd_t *pgdp, pgd_t pgd);
+
+static inline bool in_swapper_pgdir(void *addr)
+{
+ return ((unsigned long)addr & PAGE_MASK) ==
+ ((unsigned long)swapper_pg_dir & PAGE_MASK);
+}
static inline void set_pmd(pmd_t *pmdp, pmd_t pmd)
{
- *pmdp = pmd;
- dsb();
+#ifdef __PAGETABLE_PMD_FOLDED
+ if (in_swapper_pgdir(pmdp)) {
+ set_swapper_pgd((pgd_t *)pmdp, __pgd(pmd_val(pmd)));
+ return;
+ }
+#endif /* __PAGETABLE_PMD_FOLDED */
+
+ WRITE_ONCE(*pmdp, pmd);
+
+ if (pmd_valid(pmd))
+ queue_pte_barriers();
}
static inline void pmd_clear(pmd_t *pmdp)
@@ -284,29 +879,59 @@ static inline void pmd_clear(pmd_t *pmdp)
set_pmd(pmdp, __pmd(0));
}
-static inline pte_t *pmd_page_vaddr(pmd_t pmd)
+static inline phys_addr_t pmd_page_paddr(pmd_t pmd)
{
- return __va(pmd_val(pmd) & PHYS_MASK & (s32)PAGE_MASK);
+ return __pmd_to_phys(pmd);
}
-#define pmd_page(pmd) pfn_to_page(__phys_to_pfn(pmd_val(pmd) & PHYS_MASK))
+static inline unsigned long pmd_page_vaddr(pmd_t pmd)
+{
+ return (unsigned long)__va(pmd_page_paddr(pmd));
+}
-/*
- * Conversion functions: convert a page and protection to a page entry,
- * and a page entry and page directory to the page they refer to.
- */
-#define mk_pte(page,prot) pfn_pte(page_to_pfn(page),prot)
+/* Find an entry in the third-level page table. */
+#define pte_offset_phys(dir,addr) (pmd_page_paddr(READ_ONCE(*(dir))) + pte_index(addr) * sizeof(pte_t))
+
+#define pte_set_fixmap(addr) ((pte_t *)set_fixmap_offset(FIX_PTE, addr))
+#define pte_set_fixmap_offset(pmd, addr) pte_set_fixmap(pte_offset_phys(pmd, addr))
+#define pte_clear_fixmap() clear_fixmap(FIX_PTE)
-#ifndef CONFIG_ARM64_64K_PAGES
+#define pmd_page(pmd) phys_to_page(__pmd_to_phys(pmd))
+
+/* use ONLY for statically allocated translation tables */
+#define pte_offset_kimg(dir,addr) ((pte_t *)__phys_to_kimg(pte_offset_phys((dir), (addr))))
+
+#if CONFIG_PGTABLE_LEVELS > 2
+
+#define pmd_ERROR(e) \
+ pr_err("%s:%d: bad pmd %016llx.\n", __FILE__, __LINE__, pmd_val(e))
#define pud_none(pud) (!pud_val(pud))
-#define pud_bad(pud) (!(pud_val(pud) & 2))
-#define pud_present(pud) (pud_val(pud))
+#define pud_bad(pud) ((pud_val(pud) & PUD_TYPE_MASK) != \
+ PUD_TYPE_TABLE)
+#define pud_present(pud) pte_present(pud_pte(pud))
+#ifndef __PAGETABLE_PMD_FOLDED
+#define pud_leaf(pud) (pud_present(pud) && !pud_table(pud))
+#else
+#define pud_leaf(pud) false
+#endif
+#define pud_valid(pud) pte_valid(pud_pte(pud))
+#define pud_user(pud) pte_user(pud_pte(pud))
+#define pud_user_exec(pud) pte_user_exec(pud_pte(pud))
+
+static inline bool pgtable_l4_enabled(void);
static inline void set_pud(pud_t *pudp, pud_t pud)
{
- *pudp = pud;
- dsb();
+ if (!pgtable_l4_enabled() && in_swapper_pgdir(pudp)) {
+ set_swapper_pgd((pgd_t *)pudp, __pgd(pud_val(pud)));
+ return;
+ }
+
+ WRITE_ONCE(*pudp, pud);
+
+ if (pud_valid(pud))
+ queue_pte_barriers();
}
static inline void pud_clear(pud_t *pudp)
@@ -314,90 +939,1020 @@ static inline void pud_clear(pud_t *pudp)
set_pud(pudp, __pud(0));
}
-static inline pmd_t *pud_page_vaddr(pud_t pud)
+static inline phys_addr_t pud_page_paddr(pud_t pud)
{
- return __va(pud_val(pud) & PHYS_MASK & (s32)PAGE_MASK);
+ return __pud_to_phys(pud);
}
-#endif /* CONFIG_ARM64_64K_PAGES */
+static inline pmd_t *pud_pgtable(pud_t pud)
+{
+ return (pmd_t *)__va(pud_page_paddr(pud));
+}
+
+/* Find an entry in the second-level page table. */
+#define pmd_offset_phys(dir, addr) (pud_page_paddr(READ_ONCE(*(dir))) + pmd_index(addr) * sizeof(pmd_t))
+
+#define pmd_set_fixmap(addr) ((pmd_t *)set_fixmap_offset(FIX_PMD, addr))
+#define pmd_set_fixmap_offset(pud, addr) pmd_set_fixmap(pmd_offset_phys(pud, addr))
+#define pmd_clear_fixmap() clear_fixmap(FIX_PMD)
+
+#define pud_page(pud) phys_to_page(__pud_to_phys(pud))
+
+/* use ONLY for statically allocated translation tables */
+#define pmd_offset_kimg(dir,addr) ((pmd_t *)__phys_to_kimg(pmd_offset_phys((dir), (addr))))
-/* to find an entry in a page-table-directory */
-#define pgd_index(addr) (((addr) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1))
+#else
-#define pgd_offset(mm, addr) ((mm)->pgd+pgd_index(addr))
+#define pud_valid(pud) false
+#define pud_page_paddr(pud) ({ BUILD_BUG(); 0; })
+#define pud_user_exec(pud) pud_user(pud) /* Always 0 with folding */
-/* to find an entry in a kernel page-table-directory */
-#define pgd_offset_k(addr) pgd_offset(&init_mm, addr)
+/* Match pmd_offset folding in <asm/generic/pgtable-nopmd.h> */
+#define pmd_set_fixmap(addr) NULL
+#define pmd_set_fixmap_offset(pudp, addr) ((pmd_t *)pudp)
+#define pmd_clear_fixmap()
-/* Find an entry in the second-level page table.. */
-#ifndef CONFIG_ARM64_64K_PAGES
-#define pmd_index(addr) (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1))
-static inline pmd_t *pmd_offset(pud_t *pud, unsigned long addr)
+#define pmd_offset_kimg(dir,addr) ((pmd_t *)dir)
+
+#endif /* CONFIG_PGTABLE_LEVELS > 2 */
+
+#if CONFIG_PGTABLE_LEVELS > 3
+
+static __always_inline bool pgtable_l4_enabled(void)
{
- return (pmd_t *)pud_page_vaddr(*pud) + pmd_index(addr);
+ if (CONFIG_PGTABLE_LEVELS > 4 || !IS_ENABLED(CONFIG_ARM64_LPA2))
+ return true;
+ if (!alternative_has_cap_likely(ARM64_ALWAYS_BOOT))
+ return vabits_actual == VA_BITS;
+ return alternative_has_cap_unlikely(ARM64_HAS_VA52);
}
-#endif
-/* Find an entry in the third-level page table.. */
-#define pte_index(addr) (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
+static inline bool mm_pud_folded(const struct mm_struct *mm)
+{
+ return !pgtable_l4_enabled();
+}
+#define mm_pud_folded mm_pud_folded
+
+#define pud_ERROR(e) \
+ pr_err("%s:%d: bad pud %016llx.\n", __FILE__, __LINE__, pud_val(e))
+
+#define p4d_none(p4d) (pgtable_l4_enabled() && !p4d_val(p4d))
+#define p4d_bad(p4d) (pgtable_l4_enabled() && \
+ ((p4d_val(p4d) & P4D_TYPE_MASK) != \
+ P4D_TYPE_TABLE))
+#define p4d_present(p4d) (!p4d_none(p4d))
+
+static inline void set_p4d(p4d_t *p4dp, p4d_t p4d)
+{
+ if (in_swapper_pgdir(p4dp)) {
+ set_swapper_pgd((pgd_t *)p4dp, __pgd(p4d_val(p4d)));
+ return;
+ }
+
+ WRITE_ONCE(*p4dp, p4d);
+ queue_pte_barriers();
+}
+
+static inline void p4d_clear(p4d_t *p4dp)
+{
+ if (pgtable_l4_enabled())
+ set_p4d(p4dp, __p4d(0));
+}
+
+static inline phys_addr_t p4d_page_paddr(p4d_t p4d)
+{
+ return __p4d_to_phys(p4d);
+}
+
+#define pud_index(addr) (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))
+
+static inline pud_t *p4d_to_folded_pud(p4d_t *p4dp, unsigned long addr)
+{
+ /* Ensure that 'p4dp' indexes a page table according to 'addr' */
+ VM_BUG_ON(((addr >> P4D_SHIFT) ^ ((u64)p4dp >> 3)) % PTRS_PER_P4D);
+
+ return (pud_t *)PTR_ALIGN_DOWN(p4dp, PAGE_SIZE) + pud_index(addr);
+}
+
+static inline pud_t *p4d_pgtable(p4d_t p4d)
+{
+ return (pud_t *)__va(p4d_page_paddr(p4d));
+}
+
+static inline phys_addr_t pud_offset_phys(p4d_t *p4dp, unsigned long addr)
+{
+ BUG_ON(!pgtable_l4_enabled());
+
+ return p4d_page_paddr(READ_ONCE(*p4dp)) + pud_index(addr) * sizeof(pud_t);
+}
+
+static inline
+pud_t *pud_offset_lockless(p4d_t *p4dp, p4d_t p4d, unsigned long addr)
+{
+ if (!pgtable_l4_enabled())
+ return p4d_to_folded_pud(p4dp, addr);
+ return (pud_t *)__va(p4d_page_paddr(p4d)) + pud_index(addr);
+}
+#define pud_offset_lockless pud_offset_lockless
+
+static inline pud_t *pud_offset(p4d_t *p4dp, unsigned long addr)
+{
+ return pud_offset_lockless(p4dp, READ_ONCE(*p4dp), addr);
+}
+#define pud_offset pud_offset
+
+static inline pud_t *pud_set_fixmap(unsigned long addr)
+{
+ if (!pgtable_l4_enabled())
+ return NULL;
+ return (pud_t *)set_fixmap_offset(FIX_PUD, addr);
+}
+
+static inline pud_t *pud_set_fixmap_offset(p4d_t *p4dp, unsigned long addr)
+{
+ if (!pgtable_l4_enabled())
+ return p4d_to_folded_pud(p4dp, addr);
+ return pud_set_fixmap(pud_offset_phys(p4dp, addr));
+}
+
+static inline void pud_clear_fixmap(void)
+{
+ if (pgtable_l4_enabled())
+ clear_fixmap(FIX_PUD);
+}
+
+/* use ONLY for statically allocated translation tables */
+static inline pud_t *pud_offset_kimg(p4d_t *p4dp, u64 addr)
+{
+ if (!pgtable_l4_enabled())
+ return p4d_to_folded_pud(p4dp, addr);
+ return (pud_t *)__phys_to_kimg(pud_offset_phys(p4dp, addr));
+}
+
+#define p4d_page(p4d) pfn_to_page(__phys_to_pfn(__p4d_to_phys(p4d)))
+
+#else
+
+static inline bool pgtable_l4_enabled(void) { return false; }
+
+#define p4d_page_paddr(p4d) ({ BUILD_BUG(); 0;})
+
+/* Match pud_offset folding in <asm/generic/pgtable-nopud.h> */
+#define pud_set_fixmap(addr) NULL
+#define pud_set_fixmap_offset(pgdp, addr) ((pud_t *)pgdp)
+#define pud_clear_fixmap()
+
+#define pud_offset_kimg(dir,addr) ((pud_t *)dir)
+
+#endif /* CONFIG_PGTABLE_LEVELS > 3 */
+
+#if CONFIG_PGTABLE_LEVELS > 4
+
+static __always_inline bool pgtable_l5_enabled(void)
+{
+ if (!alternative_has_cap_likely(ARM64_ALWAYS_BOOT))
+ return vabits_actual == VA_BITS;
+ return alternative_has_cap_unlikely(ARM64_HAS_VA52);
+}
+
+static inline bool mm_p4d_folded(const struct mm_struct *mm)
+{
+ return !pgtable_l5_enabled();
+}
+#define mm_p4d_folded mm_p4d_folded
+
+#define p4d_ERROR(e) \
+ pr_err("%s:%d: bad p4d %016llx.\n", __FILE__, __LINE__, p4d_val(e))
+
+#define pgd_none(pgd) (pgtable_l5_enabled() && !pgd_val(pgd))
+#define pgd_bad(pgd) (pgtable_l5_enabled() && \
+ ((pgd_val(pgd) & PGD_TYPE_MASK) != \
+ PGD_TYPE_TABLE))
+#define pgd_present(pgd) (!pgd_none(pgd))
+
+static inline void set_pgd(pgd_t *pgdp, pgd_t pgd)
+{
+ if (in_swapper_pgdir(pgdp)) {
+ set_swapper_pgd(pgdp, __pgd(pgd_val(pgd)));
+ return;
+ }
+
+ WRITE_ONCE(*pgdp, pgd);
+ queue_pte_barriers();
+}
+
+static inline void pgd_clear(pgd_t *pgdp)
+{
+ if (pgtable_l5_enabled())
+ set_pgd(pgdp, __pgd(0));
+}
+
+static inline phys_addr_t pgd_page_paddr(pgd_t pgd)
+{
+ return __pgd_to_phys(pgd);
+}
+
+#define p4d_index(addr) (((addr) >> P4D_SHIFT) & (PTRS_PER_P4D - 1))
+
+static inline p4d_t *pgd_to_folded_p4d(pgd_t *pgdp, unsigned long addr)
+{
+ /* Ensure that 'pgdp' indexes a page table according to 'addr' */
+ VM_BUG_ON(((addr >> PGDIR_SHIFT) ^ ((u64)pgdp >> 3)) % PTRS_PER_PGD);
+
+ return (p4d_t *)PTR_ALIGN_DOWN(pgdp, PAGE_SIZE) + p4d_index(addr);
+}
+
+static inline phys_addr_t p4d_offset_phys(pgd_t *pgdp, unsigned long addr)
+{
+ BUG_ON(!pgtable_l5_enabled());
+
+ return pgd_page_paddr(READ_ONCE(*pgdp)) + p4d_index(addr) * sizeof(p4d_t);
+}
+
+static inline
+p4d_t *p4d_offset_lockless(pgd_t *pgdp, pgd_t pgd, unsigned long addr)
+{
+ if (!pgtable_l5_enabled())
+ return pgd_to_folded_p4d(pgdp, addr);
+ return (p4d_t *)__va(pgd_page_paddr(pgd)) + p4d_index(addr);
+}
+#define p4d_offset_lockless p4d_offset_lockless
+
+static inline p4d_t *p4d_offset(pgd_t *pgdp, unsigned long addr)
+{
+ return p4d_offset_lockless(pgdp, READ_ONCE(*pgdp), addr);
+}
+
+static inline p4d_t *p4d_set_fixmap(unsigned long addr)
+{
+ if (!pgtable_l5_enabled())
+ return NULL;
+ return (p4d_t *)set_fixmap_offset(FIX_P4D, addr);
+}
+
+static inline p4d_t *p4d_set_fixmap_offset(pgd_t *pgdp, unsigned long addr)
+{
+ if (!pgtable_l5_enabled())
+ return pgd_to_folded_p4d(pgdp, addr);
+ return p4d_set_fixmap(p4d_offset_phys(pgdp, addr));
+}
+
+static inline void p4d_clear_fixmap(void)
+{
+ if (pgtable_l5_enabled())
+ clear_fixmap(FIX_P4D);
+}
+
+/* use ONLY for statically allocated translation tables */
+static inline p4d_t *p4d_offset_kimg(pgd_t *pgdp, u64 addr)
+{
+ if (!pgtable_l5_enabled())
+ return pgd_to_folded_p4d(pgdp, addr);
+ return (p4d_t *)__phys_to_kimg(p4d_offset_phys(pgdp, addr));
+}
+
+#define pgd_page(pgd) pfn_to_page(__phys_to_pfn(__pgd_to_phys(pgd)))
+
+#else
+
+static inline bool pgtable_l5_enabled(void) { return false; }
+
+#define p4d_index(addr) (((addr) >> P4D_SHIFT) & (PTRS_PER_P4D - 1))
+
+/* Match p4d_offset folding in <asm/generic/pgtable-nop4d.h> */
+#define p4d_set_fixmap(addr) NULL
+#define p4d_set_fixmap_offset(p4dp, addr) ((p4d_t *)p4dp)
+#define p4d_clear_fixmap()
+
+#define p4d_offset_kimg(dir,addr) ((p4d_t *)dir)
+
+static inline
+p4d_t *p4d_offset_lockless_folded(pgd_t *pgdp, pgd_t pgd, unsigned long addr)
+{
+ /*
+ * With runtime folding of the pud, pud_offset_lockless() passes
+ * the 'pgd_t *' we return here to p4d_to_folded_pud(), which
+ * will offset the pointer assuming that it points into
+ * a page-table page. However, the fast GUP path passes us a
+ * pgd_t allocated on the stack and so we must use the original
+ * pointer in 'pgdp' to construct the p4d pointer instead of
+ * using the generic p4d_offset_lockless() implementation.
+ *
+ * Note: reusing the original pointer means that we may
+ * dereference the same (live) page-table entry multiple times.
+ * This is safe because it is still only loaded once in the
+ * context of each level and the CPU guarantees same-address
+ * read-after-read ordering.
+ */
+ return p4d_offset(pgdp, addr);
+}
+#define p4d_offset_lockless p4d_offset_lockless_folded
+
+#endif /* CONFIG_PGTABLE_LEVELS > 4 */
+
+#define pgd_ERROR(e) \
+ pr_err("%s:%d: bad pgd %016llx.\n", __FILE__, __LINE__, pgd_val(e))
+
+#define pgd_set_fixmap(addr) ((pgd_t *)set_fixmap_offset(FIX_PGD, addr))
+#define pgd_clear_fixmap() clear_fixmap(FIX_PGD)
static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
{
+ /*
+ * Normal and Normal-Tagged are two different memory types and indices
+ * in MAIR_EL1. The mask below has to include PTE_ATTRINDX_MASK.
+ */
const pteval_t mask = PTE_USER | PTE_PXN | PTE_UXN | PTE_RDONLY |
- PTE_PROT_NONE | PTE_VALID;
+ PTE_PRESENT_INVALID | PTE_VALID | PTE_WRITE |
+ PTE_GP | PTE_ATTRINDX_MASK | PTE_PO_IDX_MASK;
+
+ /* preserve the hardware dirty information */
+ if (pte_hw_dirty(pte))
+ pte = set_pte_bit(pte, __pgprot(PTE_DIRTY));
+
pte_val(pte) = (pte_val(pte) & ~mask) | (pgprot_val(newprot) & mask);
+ /*
+ * If we end up clearing hw dirtiness for a sw-dirty PTE, set hardware
+ * dirtiness again.
+ */
+ if (pte_sw_dirty(pte))
+ pte = pte_mkdirty(pte);
return pte;
}
-extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
-extern pgd_t idmap_pg_dir[PTRS_PER_PGD];
+static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
+{
+ return pte_pmd(pte_modify(pmd_pte(pmd), newprot));
+}
+
+extern int __ptep_set_access_flags(struct vm_area_struct *vma,
+ unsigned long address, pte_t *ptep,
+ pte_t entry, int dirty);
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#define __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS
+static inline int pmdp_set_access_flags(struct vm_area_struct *vma,
+ unsigned long address, pmd_t *pmdp,
+ pmd_t entry, int dirty)
+{
+ return __ptep_set_access_flags(vma, address, (pte_t *)pmdp,
+ pmd_pte(entry), dirty);
+}
+#endif
+
+#ifdef CONFIG_PAGE_TABLE_CHECK
+static inline bool pte_user_accessible_page(pte_t pte)
+{
+ return pte_valid(pte) && (pte_user(pte) || pte_user_exec(pte));
+}
+
+static inline bool pmd_user_accessible_page(pmd_t pmd)
+{
+ return pmd_valid(pmd) && !pmd_table(pmd) && (pmd_user(pmd) || pmd_user_exec(pmd));
+}
-#define SWAPPER_DIR_SIZE (3 * PAGE_SIZE)
-#define IDMAP_DIR_SIZE (2 * PAGE_SIZE)
+static inline bool pud_user_accessible_page(pud_t pud)
+{
+ return pud_valid(pud) && !pud_table(pud) && (pud_user(pud) || pud_user_exec(pud));
+}
+#endif
+
+/*
+ * Atomic pte/pmd modifications.
+ */
+static inline int __ptep_test_and_clear_young(struct vm_area_struct *vma,
+ unsigned long address,
+ pte_t *ptep)
+{
+ pte_t old_pte, pte;
+
+ pte = __ptep_get(ptep);
+ do {
+ old_pte = pte;
+ pte = pte_mkold(pte);
+ pte_val(pte) = cmpxchg_relaxed(&pte_val(*ptep),
+ pte_val(old_pte), pte_val(pte));
+ } while (pte_val(pte) != pte_val(old_pte));
+
+ return pte_young(pte);
+}
+
+static inline int __ptep_clear_flush_young(struct vm_area_struct *vma,
+ unsigned long address, pte_t *ptep)
+{
+ int young = __ptep_test_and_clear_young(vma, address, ptep);
+
+ if (young) {
+ /*
+ * We can elide the trailing DSB here since the worst that can
+ * happen is that a CPU continues to use the young entry in its
+ * TLB and we mistakenly reclaim the associated page. The
+ * window for such an event is bounded by the next
+ * context-switch, which provides a DSB to complete the TLB
+ * invalidation.
+ */
+ flush_tlb_page_nosync(vma, address);
+ }
+
+ return young;
+}
+
+#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG)
+#define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
+static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
+ unsigned long address,
+ pmd_t *pmdp)
+{
+ /* Operation applies to PMD table entry only if FEAT_HAFT is enabled */
+ VM_WARN_ON(pmd_table(READ_ONCE(*pmdp)) && !system_supports_haft());
+ return __ptep_test_and_clear_young(vma, address, (pte_t *)pmdp);
+}
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG */
+
+static inline pte_t __ptep_get_and_clear_anysz(struct mm_struct *mm,
+ pte_t *ptep,
+ unsigned long pgsize)
+{
+ pte_t pte = __pte(xchg_relaxed(&pte_val(*ptep), 0));
+
+ switch (pgsize) {
+ case PAGE_SIZE:
+ page_table_check_pte_clear(mm, pte);
+ break;
+ case PMD_SIZE:
+ page_table_check_pmd_clear(mm, pte_pmd(pte));
+ break;
+#ifndef __PAGETABLE_PMD_FOLDED
+ case PUD_SIZE:
+ page_table_check_pud_clear(mm, pte_pud(pte));
+ break;
+#endif
+ default:
+ VM_WARN_ON(1);
+ }
+
+ return pte;
+}
+
+static inline pte_t __ptep_get_and_clear(struct mm_struct *mm,
+ unsigned long address, pte_t *ptep)
+{
+ return __ptep_get_and_clear_anysz(mm, ptep, PAGE_SIZE);
+}
+
+static inline void __clear_full_ptes(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, unsigned int nr, int full)
+{
+ for (;;) {
+ __ptep_get_and_clear(mm, addr, ptep);
+ if (--nr == 0)
+ break;
+ ptep++;
+ addr += PAGE_SIZE;
+ }
+}
+
+static inline pte_t __get_and_clear_full_ptes(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep,
+ unsigned int nr, int full)
+{
+ pte_t pte, tmp_pte;
+
+ pte = __ptep_get_and_clear(mm, addr, ptep);
+ while (--nr) {
+ ptep++;
+ addr += PAGE_SIZE;
+ tmp_pte = __ptep_get_and_clear(mm, addr, ptep);
+ if (pte_dirty(tmp_pte))
+ pte = pte_mkdirty(pte);
+ if (pte_young(tmp_pte))
+ pte = pte_mkyoung(pte);
+ }
+ return pte;
+}
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR
+static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
+ unsigned long address, pmd_t *pmdp)
+{
+ return pte_pmd(__ptep_get_and_clear_anysz(mm, (pte_t *)pmdp, PMD_SIZE));
+}
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+
+static inline void ___ptep_set_wrprotect(struct mm_struct *mm,
+ unsigned long address, pte_t *ptep,
+ pte_t pte)
+{
+ pte_t old_pte;
+
+ do {
+ old_pte = pte;
+ pte = pte_wrprotect(pte);
+ pte_val(pte) = cmpxchg_relaxed(&pte_val(*ptep),
+ pte_val(old_pte), pte_val(pte));
+ } while (pte_val(pte) != pte_val(old_pte));
+}
+
+/*
+ * __ptep_set_wrprotect - mark read-only while transferring potential hardware
+ * dirty status (PTE_DBM && !PTE_RDONLY) to the software PTE_DIRTY bit.
+ */
+static inline void __ptep_set_wrprotect(struct mm_struct *mm,
+ unsigned long address, pte_t *ptep)
+{
+ ___ptep_set_wrprotect(mm, address, ptep, __ptep_get(ptep));
+}
+
+static inline void __wrprotect_ptes(struct mm_struct *mm, unsigned long address,
+ pte_t *ptep, unsigned int nr)
+{
+ unsigned int i;
+
+ for (i = 0; i < nr; i++, address += PAGE_SIZE, ptep++)
+ __ptep_set_wrprotect(mm, address, ptep);
+}
+
+static inline void __clear_young_dirty_pte(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep,
+ pte_t pte, cydp_t flags)
+{
+ pte_t old_pte;
+
+ do {
+ old_pte = pte;
+
+ if (flags & CYDP_CLEAR_YOUNG)
+ pte = pte_mkold(pte);
+ if (flags & CYDP_CLEAR_DIRTY)
+ pte = pte_mkclean(pte);
+
+ pte_val(pte) = cmpxchg_relaxed(&pte_val(*ptep),
+ pte_val(old_pte), pte_val(pte));
+ } while (pte_val(pte) != pte_val(old_pte));
+}
+
+static inline void __clear_young_dirty_ptes(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep,
+ unsigned int nr, cydp_t flags)
+{
+ pte_t pte;
+
+ for (;;) {
+ pte = __ptep_get(ptep);
+
+ if (flags == (CYDP_CLEAR_YOUNG | CYDP_CLEAR_DIRTY))
+ __set_pte(ptep, pte_mkclean(pte_mkold(pte)));
+ else
+ __clear_young_dirty_pte(vma, addr, ptep, pte, flags);
+
+ if (--nr == 0)
+ break;
+ ptep++;
+ addr += PAGE_SIZE;
+ }
+}
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#define __HAVE_ARCH_PMDP_SET_WRPROTECT
+static inline void pmdp_set_wrprotect(struct mm_struct *mm,
+ unsigned long address, pmd_t *pmdp)
+{
+ __ptep_set_wrprotect(mm, address, (pte_t *)pmdp);
+}
+
+#define pmdp_establish pmdp_establish
+static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
+ unsigned long address, pmd_t *pmdp, pmd_t pmd)
+{
+ page_table_check_pmd_set(vma->vm_mm, pmdp, pmd);
+ return __pmd(xchg_relaxed(&pmd_val(*pmdp), pmd_val(pmd)));
+}
+#endif
/*
* Encode and decode a swap entry:
- * bits 0, 2: present (must both be zero)
- * bit 3: PTE_FILE
- * bits 4-8: swap type
- * bits 9-63: swap offset
+ * bits 0-1: present (must be zero)
+ * bits 2: remember PG_anon_exclusive
+ * bit 3: remember uffd-wp state
+ * bits 6-10: swap type
+ * bit 11: PTE_PRESENT_INVALID (must be zero)
+ * bits 12-61: swap offset
*/
-#define __SWP_TYPE_SHIFT 4
-#define __SWP_TYPE_BITS 6
+#define __SWP_TYPE_SHIFT 6
+#define __SWP_TYPE_BITS 5
#define __SWP_TYPE_MASK ((1 << __SWP_TYPE_BITS) - 1)
-#define __SWP_OFFSET_SHIFT (__SWP_TYPE_BITS + __SWP_TYPE_SHIFT)
+#define __SWP_OFFSET_SHIFT 12
+#define __SWP_OFFSET_BITS 50
+#define __SWP_OFFSET_MASK ((1UL << __SWP_OFFSET_BITS) - 1)
#define __swp_type(x) (((x).val >> __SWP_TYPE_SHIFT) & __SWP_TYPE_MASK)
-#define __swp_offset(x) ((x).val >> __SWP_OFFSET_SHIFT)
+#define __swp_offset(x) (((x).val >> __SWP_OFFSET_SHIFT) & __SWP_OFFSET_MASK)
#define __swp_entry(type,offset) ((swp_entry_t) { ((type) << __SWP_TYPE_SHIFT) | ((offset) << __SWP_OFFSET_SHIFT) })
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
#define __swp_entry_to_pte(swp) ((pte_t) { (swp).val })
+#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
+#define __pmd_to_swp_entry(pmd) ((swp_entry_t) { pmd_val(pmd) })
+#define __swp_entry_to_pmd(swp) __pmd((swp).val)
+#endif /* CONFIG_ARCH_ENABLE_THP_MIGRATION */
+
/*
* Ensure that there are not more swap files than can be encoded in the kernel
- * the PTEs.
+ * PTEs.
*/
#define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > __SWP_TYPE_BITS)
+#ifdef CONFIG_ARM64_MTE
+
+#define __HAVE_ARCH_PREPARE_TO_SWAP
+extern int arch_prepare_to_swap(struct folio *folio);
+
+#define __HAVE_ARCH_SWAP_INVALIDATE
+static inline void arch_swap_invalidate_page(int type, pgoff_t offset)
+{
+ if (system_supports_mte())
+ mte_invalidate_tags(type, offset);
+}
+
+static inline void arch_swap_invalidate_area(int type)
+{
+ if (system_supports_mte())
+ mte_invalidate_tags_area(type);
+}
+
+#define __HAVE_ARCH_SWAP_RESTORE
+extern void arch_swap_restore(swp_entry_t entry, struct folio *folio);
+
+#endif /* CONFIG_ARM64_MTE */
+
+/*
+ * On AArch64, the cache coherency is handled via the __set_ptes() function.
+ */
+static inline void update_mmu_cache_range(struct vm_fault *vmf,
+ struct vm_area_struct *vma, unsigned long addr, pte_t *ptep,
+ unsigned int nr)
+{
+ /*
+ * We don't do anything here, so there's a very small chance of
+ * us retaking a user fault which we just fixed up. The alternative
+ * is doing a dsb(ishst), but that penalises the fastpath.
+ */
+}
+
+#define update_mmu_cache(vma, addr, ptep) \
+ update_mmu_cache_range(NULL, vma, addr, ptep, 1)
+#define update_mmu_cache_pmd(vma, address, pmd) do { } while (0)
+
+#ifdef CONFIG_ARM64_PA_BITS_52
+#define phys_to_ttbr(addr) (((addr) | ((addr) >> 46)) & TTBR_BADDR_MASK_52)
+#else
+#define phys_to_ttbr(addr) (addr)
+#endif
+
+/*
+ * On arm64 without hardware Access Flag, copying from user will fail because
+ * the pte is old and cannot be marked young. So we always end up with zeroed
+ * page after fork() + CoW for pfn mappings. We don't always have a
+ * hardware-managed access flag on arm64.
+ */
+#define arch_has_hw_pte_young cpu_has_hw_af
+
+#ifdef CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG
+#define arch_has_hw_nonleaf_pmd_young system_supports_haft
+#endif
+
+/*
+ * Experimentally, it's cheap to set the access flag in hardware and we
+ * benefit from prefaulting mappings as 'old' to start with.
+ */
+#define arch_wants_old_prefaulted_pte cpu_has_hw_af
+
+/*
+ * Request exec memory is read into pagecache in at least 64K folios. This size
+ * can be contpte-mapped when 4K base pages are in use (16 pages into 1 iTLB
+ * entry), and HPA can coalesce it (4 pages into 1 TLB entry) when 16K base
+ * pages are in use.
+ */
+#define exec_folio_order() ilog2(SZ_64K >> PAGE_SHIFT)
+
+static inline bool pud_sect_supported(void)
+{
+ return PAGE_SIZE == SZ_4K;
+}
+
+
+#define __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION
+#define ptep_modify_prot_start ptep_modify_prot_start
+extern pte_t ptep_modify_prot_start(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep);
+
+#define ptep_modify_prot_commit ptep_modify_prot_commit
+extern void ptep_modify_prot_commit(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep,
+ pte_t old_pte, pte_t new_pte);
+
+#define modify_prot_start_ptes modify_prot_start_ptes
+extern pte_t modify_prot_start_ptes(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep,
+ unsigned int nr);
+
+#define modify_prot_commit_ptes modify_prot_commit_ptes
+extern void modify_prot_commit_ptes(struct vm_area_struct *vma, unsigned long addr,
+ pte_t *ptep, pte_t old_pte, pte_t pte,
+ unsigned int nr);
+
+#ifdef CONFIG_ARM64_CONTPTE
+
/*
- * Encode and decode a file entry:
- * bits 0, 2: present (must both be zero)
- * bit 3: PTE_FILE
- * bits 4-63: file offset / PAGE_SIZE
+ * The contpte APIs are used to transparently manage the contiguous bit in ptes
+ * where it is possible and makes sense to do so. The PTE_CONT bit is considered
+ * a private implementation detail of the public ptep API (see below).
*/
-#define pte_file(pte) (pte_val(pte) & PTE_FILE)
-#define pte_to_pgoff(x) (pte_val(x) >> 4)
-#define pgoff_to_pte(x) __pte(((x) << 4) | PTE_FILE)
+extern void __contpte_try_fold(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, pte_t pte);
+extern void __contpte_try_unfold(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, pte_t pte);
+extern pte_t contpte_ptep_get(pte_t *ptep, pte_t orig_pte);
+extern pte_t contpte_ptep_get_lockless(pte_t *orig_ptep);
+extern void contpte_set_ptes(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, pte_t pte, unsigned int nr);
+extern void contpte_clear_full_ptes(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, unsigned int nr, int full);
+extern pte_t contpte_get_and_clear_full_ptes(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep,
+ unsigned int nr, int full);
+extern int contpte_ptep_test_and_clear_young(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep);
+extern int contpte_ptep_clear_flush_young(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep);
+extern void contpte_wrprotect_ptes(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, unsigned int nr);
+extern int contpte_ptep_set_access_flags(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep,
+ pte_t entry, int dirty);
+extern void contpte_clear_young_dirty_ptes(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep,
+ unsigned int nr, cydp_t flags);
+
+static __always_inline void contpte_try_fold(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep, pte_t pte)
+{
+ /*
+ * Only bother trying if both the virtual and physical addresses are
+ * aligned and correspond to the last entry in a contig range. The core
+ * code mostly modifies ranges from low to high, so this is the likely
+ * the last modification in the contig range, so a good time to fold.
+ * We can't fold special mappings, because there is no associated folio.
+ */
+
+ const unsigned long contmask = CONT_PTES - 1;
+ bool valign = ((addr >> PAGE_SHIFT) & contmask) == contmask;
+
+ if (unlikely(valign)) {
+ bool palign = (pte_pfn(pte) & contmask) == contmask;
+
+ if (unlikely(palign &&
+ pte_valid(pte) && !pte_cont(pte) && !pte_special(pte)))
+ __contpte_try_fold(mm, addr, ptep, pte);
+ }
+}
+
+static __always_inline void contpte_try_unfold(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep, pte_t pte)
+{
+ if (unlikely(pte_valid_cont(pte)))
+ __contpte_try_unfold(mm, addr, ptep, pte);
+}
+
+#define pte_batch_hint pte_batch_hint
+static inline unsigned int pte_batch_hint(pte_t *ptep, pte_t pte)
+{
+ if (!pte_valid_cont(pte))
+ return 1;
+
+ return CONT_PTES - (((unsigned long)ptep >> 3) & (CONT_PTES - 1));
+}
+
+/*
+ * The below functions constitute the public API that arm64 presents to the
+ * core-mm to manipulate PTE entries within their page tables (or at least this
+ * is the subset of the API that arm64 needs to implement). These public
+ * versions will automatically and transparently apply the contiguous bit where
+ * it makes sense to do so. Therefore any users that are contig-aware (e.g.
+ * hugetlb, kernel mapper) should NOT use these APIs, but instead use the
+ * private versions, which are prefixed with double underscore. All of these
+ * APIs except for ptep_get_lockless() are expected to be called with the PTL
+ * held. Although the contiguous bit is considered private to the
+ * implementation, it is deliberately allowed to leak through the getters (e.g.
+ * ptep_get()), back to core code. This is required so that pte_leaf_size() can
+ * provide an accurate size for perf_get_pgtable_size(). But this leakage means
+ * its possible a pte will be passed to a setter with the contiguous bit set, so
+ * we explicitly clear the contiguous bit in those cases to prevent accidentally
+ * setting it in the pgtable.
+ */
+
+#define ptep_get ptep_get
+static inline pte_t ptep_get(pte_t *ptep)
+{
+ pte_t pte = __ptep_get(ptep);
+
+ if (likely(!pte_valid_cont(pte)))
+ return pte;
+
+ return contpte_ptep_get(ptep, pte);
+}
+
+#define ptep_get_lockless ptep_get_lockless
+static inline pte_t ptep_get_lockless(pte_t *ptep)
+{
+ pte_t pte = __ptep_get(ptep);
+
+ if (likely(!pte_valid_cont(pte)))
+ return pte;
+
+ return contpte_ptep_get_lockless(ptep);
+}
+
+static inline void set_pte(pte_t *ptep, pte_t pte)
+{
+ /*
+ * We don't have the mm or vaddr so cannot unfold contig entries (since
+ * it requires tlb maintenance). set_pte() is not used in core code, so
+ * this should never even be called. Regardless do our best to service
+ * any call and emit a warning if there is any attempt to set a pte on
+ * top of an existing contig range.
+ */
+ pte_t orig_pte = __ptep_get(ptep);
+
+ WARN_ON_ONCE(pte_valid_cont(orig_pte));
+ __set_pte(ptep, pte_mknoncont(pte));
+}
+
+#define set_ptes set_ptes
+static __always_inline void set_ptes(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, pte_t pte, unsigned int nr)
+{
+ pte = pte_mknoncont(pte);
+
+ if (likely(nr == 1)) {
+ contpte_try_unfold(mm, addr, ptep, __ptep_get(ptep));
+ __set_ptes(mm, addr, ptep, pte, 1);
+ contpte_try_fold(mm, addr, ptep, pte);
+ } else {
+ contpte_set_ptes(mm, addr, ptep, pte, nr);
+ }
+}
+
+static inline void pte_clear(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep)
+{
+ contpte_try_unfold(mm, addr, ptep, __ptep_get(ptep));
+ __pte_clear(mm, addr, ptep);
+}
+
+#define clear_full_ptes clear_full_ptes
+static inline void clear_full_ptes(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, unsigned int nr, int full)
+{
+ if (likely(nr == 1)) {
+ contpte_try_unfold(mm, addr, ptep, __ptep_get(ptep));
+ __clear_full_ptes(mm, addr, ptep, nr, full);
+ } else {
+ contpte_clear_full_ptes(mm, addr, ptep, nr, full);
+ }
+}
-#define PTE_FILE_MAX_BITS 60
+#define get_and_clear_full_ptes get_and_clear_full_ptes
+static inline pte_t get_and_clear_full_ptes(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep,
+ unsigned int nr, int full)
+{
+ pte_t pte;
+
+ if (likely(nr == 1)) {
+ contpte_try_unfold(mm, addr, ptep, __ptep_get(ptep));
+ pte = __get_and_clear_full_ptes(mm, addr, ptep, nr, full);
+ } else {
+ pte = contpte_get_and_clear_full_ptes(mm, addr, ptep, nr, full);
+ }
-extern int kern_addr_valid(unsigned long addr);
+ return pte;
+}
-#include <asm-generic/pgtable.h>
+#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
+static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep)
+{
+ contpte_try_unfold(mm, addr, ptep, __ptep_get(ptep));
+ return __ptep_get_and_clear(mm, addr, ptep);
+}
+
+#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
+static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep)
+{
+ pte_t orig_pte = __ptep_get(ptep);
+
+ if (likely(!pte_valid_cont(orig_pte)))
+ return __ptep_test_and_clear_young(vma, addr, ptep);
+
+ return contpte_ptep_test_and_clear_young(vma, addr, ptep);
+}
+
+#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
+static inline int ptep_clear_flush_young(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep)
+{
+ pte_t orig_pte = __ptep_get(ptep);
-#define pgtable_cache_init() do { } while (0)
+ if (likely(!pte_valid_cont(orig_pte)))
+ return __ptep_clear_flush_young(vma, addr, ptep);
+
+ return contpte_ptep_clear_flush_young(vma, addr, ptep);
+}
+
+#define wrprotect_ptes wrprotect_ptes
+static __always_inline void wrprotect_ptes(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep, unsigned int nr)
+{
+ if (likely(nr == 1)) {
+ /*
+ * Optimization: wrprotect_ptes() can only be called for present
+ * ptes so we only need to check contig bit as condition for
+ * unfold, and we can remove the contig bit from the pte we read
+ * to avoid re-reading. This speeds up fork() which is sensitive
+ * for order-0 folios. Equivalent to contpte_try_unfold().
+ */
+ pte_t orig_pte = __ptep_get(ptep);
+
+ if (unlikely(pte_cont(orig_pte))) {
+ __contpte_try_unfold(mm, addr, ptep, orig_pte);
+ orig_pte = pte_mknoncont(orig_pte);
+ }
+ ___ptep_set_wrprotect(mm, addr, ptep, orig_pte);
+ } else {
+ contpte_wrprotect_ptes(mm, addr, ptep, nr);
+ }
+}
+
+#define __HAVE_ARCH_PTEP_SET_WRPROTECT
+static inline void ptep_set_wrprotect(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep)
+{
+ wrprotect_ptes(mm, addr, ptep, 1);
+}
+
+#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
+static inline int ptep_set_access_flags(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep,
+ pte_t entry, int dirty)
+{
+ pte_t orig_pte = __ptep_get(ptep);
+
+ entry = pte_mknoncont(entry);
+
+ if (likely(!pte_valid_cont(orig_pte)))
+ return __ptep_set_access_flags(vma, addr, ptep, entry, dirty);
+
+ return contpte_ptep_set_access_flags(vma, addr, ptep, entry, dirty);
+}
+
+#define clear_young_dirty_ptes clear_young_dirty_ptes
+static inline void clear_young_dirty_ptes(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep,
+ unsigned int nr, cydp_t flags)
+{
+ if (likely(nr == 1 && !pte_cont(__ptep_get(ptep))))
+ __clear_young_dirty_ptes(vma, addr, ptep, nr, flags);
+ else
+ contpte_clear_young_dirty_ptes(vma, addr, ptep, nr, flags);
+}
-#endif /* !__ASSEMBLY__ */
+#else /* CONFIG_ARM64_CONTPTE */
+
+#define ptep_get __ptep_get
+#define set_pte __set_pte
+#define set_ptes __set_ptes
+#define pte_clear __pte_clear
+#define clear_full_ptes __clear_full_ptes
+#define get_and_clear_full_ptes __get_and_clear_full_ptes
+#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
+#define ptep_get_and_clear __ptep_get_and_clear
+#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
+#define ptep_test_and_clear_young __ptep_test_and_clear_young
+#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
+#define ptep_clear_flush_young __ptep_clear_flush_young
+#define __HAVE_ARCH_PTEP_SET_WRPROTECT
+#define ptep_set_wrprotect __ptep_set_wrprotect
+#define wrprotect_ptes __wrprotect_ptes
+#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
+#define ptep_set_access_flags __ptep_set_access_flags
+#define clear_young_dirty_ptes __clear_young_dirty_ptes
+
+#endif /* CONFIG_ARM64_CONTPTE */
+
+#endif /* !__ASSEMBLER__ */
#endif /* __ASM_PGTABLE_H */
diff --git a/arch/arm64/include/asm/pkeys.h b/arch/arm64/include/asm/pkeys.h
new file mode 100644
index 000000000000..0ca5f83ce148
--- /dev/null
+++ b/arch/arm64/include/asm/pkeys.h
@@ -0,0 +1,106 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2023 Arm Ltd.
+ *
+ * Based on arch/x86/include/asm/pkeys.h
+ */
+
+#ifndef _ASM_ARM64_PKEYS_H
+#define _ASM_ARM64_PKEYS_H
+
+#define ARCH_VM_PKEY_FLAGS (VM_PKEY_BIT0 | VM_PKEY_BIT1 | VM_PKEY_BIT2)
+
+#define arch_max_pkey() 8
+
+int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
+ unsigned long init_val);
+
+static inline bool arch_pkeys_enabled(void)
+{
+ return system_supports_poe();
+}
+
+static inline int vma_pkey(struct vm_area_struct *vma)
+{
+ return (vma->vm_flags & ARCH_VM_PKEY_FLAGS) >> VM_PKEY_SHIFT;
+}
+
+static inline int arch_override_mprotect_pkey(struct vm_area_struct *vma,
+ int prot, int pkey)
+{
+ if (pkey != -1)
+ return pkey;
+
+ return vma_pkey(vma);
+}
+
+static inline int execute_only_pkey(struct mm_struct *mm)
+{
+ // Execute-only mappings are handled by EPAN/FEAT_PAN3.
+ return -1;
+}
+
+#define mm_pkey_allocation_map(mm) (mm)->context.pkey_allocation_map
+#define mm_set_pkey_allocated(mm, pkey) do { \
+ mm_pkey_allocation_map(mm) |= (1U << pkey); \
+} while (0)
+#define mm_set_pkey_free(mm, pkey) do { \
+ mm_pkey_allocation_map(mm) &= ~(1U << pkey); \
+} while (0)
+
+static inline bool mm_pkey_is_allocated(struct mm_struct *mm, int pkey)
+{
+ /*
+ * "Allocated" pkeys are those that have been returned
+ * from pkey_alloc() or pkey 0 which is allocated
+ * implicitly when the mm is created.
+ */
+ if (pkey < 0 || pkey >= arch_max_pkey())
+ return false;
+
+ return mm_pkey_allocation_map(mm) & (1U << pkey);
+}
+
+/*
+ * Returns a positive, 3-bit key on success, or -1 on failure.
+ */
+static inline int mm_pkey_alloc(struct mm_struct *mm)
+{
+ /*
+ * Note: this is the one and only place we make sure
+ * that the pkey is valid as far as the hardware is
+ * concerned. The rest of the kernel trusts that
+ * only good, valid pkeys come out of here.
+ */
+ u8 all_pkeys_mask = GENMASK(arch_max_pkey() - 1, 0);
+ int ret;
+
+ if (!arch_pkeys_enabled())
+ return -1;
+
+ /*
+ * Are we out of pkeys? We must handle this specially
+ * because ffz() behavior is undefined if there are no
+ * zeros.
+ */
+ if (mm_pkey_allocation_map(mm) == all_pkeys_mask)
+ return -1;
+
+ ret = ffz(mm_pkey_allocation_map(mm));
+
+ mm_set_pkey_allocated(mm, ret);
+
+ return ret;
+}
+
+static inline int mm_pkey_free(struct mm_struct *mm, int pkey)
+{
+ if (!mm_pkey_is_allocated(mm, pkey))
+ return -EINVAL;
+
+ mm_set_pkey_free(mm, pkey);
+
+ return 0;
+}
+
+#endif /* _ASM_ARM64_PKEYS_H */
diff --git a/arch/arm64/include/asm/pmu.h b/arch/arm64/include/asm/pmu.h
deleted file mode 100644
index e6f087806aaf..000000000000
--- a/arch/arm64/include/asm/pmu.h
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- * Based on arch/arm/include/asm/pmu.h
- *
- * Copyright (C) 2009 picoChip Designs Ltd, Jamie Iles
- * Copyright (C) 2012 ARM Ltd.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-#ifndef __ASM_PMU_H
-#define __ASM_PMU_H
-
-#ifdef CONFIG_HW_PERF_EVENTS
-
-/* The events for a given PMU register set. */
-struct pmu_hw_events {
- /*
- * The events that are active on the PMU for the given index.
- */
- struct perf_event **events;
-
- /*
- * A 1 bit for an index indicates that the counter is being used for
- * an event. A 0 means that the counter can be used.
- */
- unsigned long *used_mask;
-
- /*
- * Hardware lock to serialize accesses to PMU registers. Needed for the
- * read/modify/write sequences.
- */
- raw_spinlock_t pmu_lock;
-};
-
-struct arm_pmu {
- struct pmu pmu;
- cpumask_t active_irqs;
- const char *name;
- irqreturn_t (*handle_irq)(int irq_num, void *dev);
- void (*enable)(struct hw_perf_event *evt, int idx);
- void (*disable)(struct hw_perf_event *evt, int idx);
- int (*get_event_idx)(struct pmu_hw_events *hw_events,
- struct hw_perf_event *hwc);
- int (*set_event_filter)(struct hw_perf_event *evt,
- struct perf_event_attr *attr);
- u32 (*read_counter)(int idx);
- void (*write_counter)(int idx, u32 val);
- void (*start)(void);
- void (*stop)(void);
- void (*reset)(void *);
- int (*map_event)(struct perf_event *event);
- int num_events;
- atomic_t active_events;
- struct mutex reserve_mutex;
- u64 max_period;
- struct platform_device *plat_device;
- struct pmu_hw_events *(*get_hw_events)(void);
-};
-
-#define to_arm_pmu(p) (container_of(p, struct arm_pmu, pmu))
-
-int __init armpmu_register(struct arm_pmu *armpmu, char *name, int type);
-
-u64 armpmu_event_update(struct perf_event *event,
- struct hw_perf_event *hwc,
- int idx);
-
-int armpmu_event_set_period(struct perf_event *event,
- struct hw_perf_event *hwc,
- int idx);
-
-#endif /* CONFIG_HW_PERF_EVENTS */
-#endif /* __ASM_PMU_H */
diff --git a/arch/arm64/include/asm/pointer_auth.h b/arch/arm64/include/asm/pointer_auth.h
new file mode 100644
index 000000000000..d2e0306e65d3
--- /dev/null
+++ b/arch/arm64/include/asm/pointer_auth.h
@@ -0,0 +1,153 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_POINTER_AUTH_H
+#define __ASM_POINTER_AUTH_H
+
+#include <linux/bitops.h>
+#include <linux/prctl.h>
+#include <linux/random.h>
+
+#include <asm/cpufeature.h>
+#include <asm/memory.h>
+#include <asm/sysreg.h>
+
+/*
+ * The EL0/EL1 pointer bits used by a pointer authentication code.
+ * This is dependent on TBI0/TBI1 being enabled, or bits 63:56 would also apply.
+ */
+#define ptrauth_user_pac_mask() GENMASK_ULL(54, vabits_actual)
+#define ptrauth_kernel_pac_mask() GENMASK_ULL(63, vabits_actual)
+
+#define PR_PAC_ENABLED_KEYS_MASK \
+ (PR_PAC_APIAKEY | PR_PAC_APIBKEY | PR_PAC_APDAKEY | PR_PAC_APDBKEY)
+
+#ifdef CONFIG_ARM64_PTR_AUTH
+/*
+ * Each key is a 128-bit quantity which is split across a pair of 64-bit
+ * registers (Lo and Hi).
+ */
+struct ptrauth_key {
+ unsigned long lo, hi;
+};
+
+/*
+ * We give each process its own keys, which are shared by all threads. The keys
+ * are inherited upon fork(), and reinitialised upon exec*().
+ */
+struct ptrauth_keys_user {
+ struct ptrauth_key apia;
+ struct ptrauth_key apib;
+ struct ptrauth_key apda;
+ struct ptrauth_key apdb;
+ struct ptrauth_key apga;
+};
+
+#define __ptrauth_key_install_nosync(k, v) \
+do { \
+ struct ptrauth_key __pki_v = (v); \
+ write_sysreg_s(__pki_v.lo, SYS_ ## k ## KEYLO_EL1); \
+ write_sysreg_s(__pki_v.hi, SYS_ ## k ## KEYHI_EL1); \
+} while (0)
+
+#ifdef CONFIG_ARM64_PTR_AUTH_KERNEL
+
+struct ptrauth_keys_kernel {
+ struct ptrauth_key apia;
+};
+
+static __always_inline void ptrauth_keys_init_kernel(struct ptrauth_keys_kernel *keys)
+{
+ if (system_supports_address_auth())
+ get_random_bytes(&keys->apia, sizeof(keys->apia));
+}
+
+static __always_inline void ptrauth_keys_switch_kernel(struct ptrauth_keys_kernel *keys)
+{
+ if (!system_supports_address_auth())
+ return;
+
+ __ptrauth_key_install_nosync(APIA, keys->apia);
+ isb();
+}
+
+#endif /* CONFIG_ARM64_PTR_AUTH_KERNEL */
+
+static inline void ptrauth_keys_install_user(struct ptrauth_keys_user *keys)
+{
+ if (system_supports_address_auth()) {
+ __ptrauth_key_install_nosync(APIB, keys->apib);
+ __ptrauth_key_install_nosync(APDA, keys->apda);
+ __ptrauth_key_install_nosync(APDB, keys->apdb);
+ }
+
+ if (system_supports_generic_auth())
+ __ptrauth_key_install_nosync(APGA, keys->apga);
+}
+
+static inline void ptrauth_keys_init_user(struct ptrauth_keys_user *keys)
+{
+ if (system_supports_address_auth()) {
+ get_random_bytes(&keys->apia, sizeof(keys->apia));
+ get_random_bytes(&keys->apib, sizeof(keys->apib));
+ get_random_bytes(&keys->apda, sizeof(keys->apda));
+ get_random_bytes(&keys->apdb, sizeof(keys->apdb));
+ }
+
+ if (system_supports_generic_auth())
+ get_random_bytes(&keys->apga, sizeof(keys->apga));
+
+ ptrauth_keys_install_user(keys);
+}
+
+extern int ptrauth_prctl_reset_keys(struct task_struct *tsk, unsigned long arg);
+
+extern int ptrauth_set_enabled_keys(struct task_struct *tsk, unsigned long keys,
+ unsigned long enabled);
+extern int ptrauth_get_enabled_keys(struct task_struct *tsk);
+
+static __always_inline void ptrauth_enable(void)
+{
+ if (!system_supports_address_auth())
+ return;
+ sysreg_clear_set(sctlr_el1, 0, (SCTLR_ELx_ENIA | SCTLR_ELx_ENIB |
+ SCTLR_ELx_ENDA | SCTLR_ELx_ENDB));
+ isb();
+}
+
+#define ptrauth_suspend_exit() \
+ ptrauth_keys_install_user(&current->thread.keys_user)
+
+#define ptrauth_thread_init_user() \
+ do { \
+ ptrauth_keys_init_user(&current->thread.keys_user); \
+ \
+ /* enable all keys */ \
+ if (system_supports_address_auth()) \
+ ptrauth_set_enabled_keys(current, \
+ PR_PAC_ENABLED_KEYS_MASK, \
+ PR_PAC_ENABLED_KEYS_MASK); \
+ } while (0)
+
+#define ptrauth_thread_switch_user(tsk) \
+ ptrauth_keys_install_user(&(tsk)->thread.keys_user)
+
+#else /* CONFIG_ARM64_PTR_AUTH */
+#define ptrauth_enable()
+#define ptrauth_prctl_reset_keys(tsk, arg) (-EINVAL)
+#define ptrauth_set_enabled_keys(tsk, keys, enabled) (-EINVAL)
+#define ptrauth_get_enabled_keys(tsk) (-EINVAL)
+#define ptrauth_suspend_exit()
+#define ptrauth_thread_init_user()
+#define ptrauth_thread_switch_user(tsk)
+#endif /* CONFIG_ARM64_PTR_AUTH */
+
+#ifdef CONFIG_ARM64_PTR_AUTH_KERNEL
+#define ptrauth_thread_init_kernel(tsk) \
+ ptrauth_keys_init_kernel(&(tsk)->thread.keys_kernel)
+#define ptrauth_thread_switch_kernel(tsk) \
+ ptrauth_keys_switch_kernel(&(tsk)->thread.keys_kernel)
+#else
+#define ptrauth_thread_init_kernel(tsk)
+#define ptrauth_thread_switch_kernel(tsk)
+#endif /* CONFIG_ARM64_PTR_AUTH_KERNEL */
+
+#endif /* __ASM_POINTER_AUTH_H */
diff --git a/arch/arm64/include/asm/por.h b/arch/arm64/include/asm/por.h
new file mode 100644
index 000000000000..d913d5b529e4
--- /dev/null
+++ b/arch/arm64/include/asm/por.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2023 Arm Ltd.
+ */
+
+#ifndef _ASM_ARM64_POR_H
+#define _ASM_ARM64_POR_H
+
+#include <asm/sysreg.h>
+
+#define POR_EL0_INIT POR_ELx_PERM_PREP(0, POE_RWX)
+
+static inline bool por_elx_allows_read(u64 por, u8 pkey)
+{
+ u8 perm = POR_ELx_PERM_GET(pkey, por);
+
+ return perm & POE_R;
+}
+
+static inline bool por_elx_allows_write(u64 por, u8 pkey)
+{
+ u8 perm = POR_ELx_PERM_GET(pkey, por);
+
+ return perm & POE_W;
+}
+
+static inline bool por_elx_allows_exec(u64 por, u8 pkey)
+{
+ u8 perm = POR_ELx_PERM_GET(pkey, por);
+
+ return perm & POE_X;
+}
+
+#endif /* _ASM_ARM64_POR_H */
diff --git a/arch/arm64/include/asm/preempt.h b/arch/arm64/include/asm/preempt.h
new file mode 100644
index 000000000000..932ea4b62042
--- /dev/null
+++ b/arch/arm64/include/asm/preempt.h
@@ -0,0 +1,102 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_PREEMPT_H
+#define __ASM_PREEMPT_H
+
+#include <linux/thread_info.h>
+
+#define PREEMPT_NEED_RESCHED BIT(32)
+#define PREEMPT_ENABLED (PREEMPT_NEED_RESCHED)
+
+static inline int preempt_count(void)
+{
+ return READ_ONCE(current_thread_info()->preempt.count);
+}
+
+static inline void preempt_count_set(u64 pc)
+{
+ /* Preserve existing value of PREEMPT_NEED_RESCHED */
+ WRITE_ONCE(current_thread_info()->preempt.count, pc);
+}
+
+#define init_task_preempt_count(p) do { \
+ task_thread_info(p)->preempt_count = FORK_PREEMPT_COUNT; \
+} while (0)
+
+#define init_idle_preempt_count(p, cpu) do { \
+ task_thread_info(p)->preempt_count = PREEMPT_DISABLED; \
+} while (0)
+
+static inline void set_preempt_need_resched(void)
+{
+ current_thread_info()->preempt.need_resched = 0;
+}
+
+static inline void clear_preempt_need_resched(void)
+{
+ current_thread_info()->preempt.need_resched = 1;
+}
+
+static inline bool test_preempt_need_resched(void)
+{
+ return !current_thread_info()->preempt.need_resched;
+}
+
+static inline void __preempt_count_add(int val)
+{
+ u32 pc = READ_ONCE(current_thread_info()->preempt.count);
+ pc += val;
+ WRITE_ONCE(current_thread_info()->preempt.count, pc);
+}
+
+static inline void __preempt_count_sub(int val)
+{
+ u32 pc = READ_ONCE(current_thread_info()->preempt.count);
+ pc -= val;
+ WRITE_ONCE(current_thread_info()->preempt.count, pc);
+}
+
+static inline bool __preempt_count_dec_and_test(void)
+{
+ struct thread_info *ti = current_thread_info();
+ u64 pc = READ_ONCE(ti->preempt_count);
+
+ /* Update only the count field, leaving need_resched unchanged */
+ WRITE_ONCE(ti->preempt.count, --pc);
+
+ /*
+ * If we wrote back all zeroes, then we're preemptible and in
+ * need of a reschedule. Otherwise, we need to reload the
+ * preempt_count in case the need_resched flag was cleared by an
+ * interrupt occurring between the non-atomic READ_ONCE/WRITE_ONCE
+ * pair.
+ */
+ return !pc || !READ_ONCE(ti->preempt_count);
+}
+
+static inline bool should_resched(int preempt_offset)
+{
+ u64 pc = READ_ONCE(current_thread_info()->preempt_count);
+ return pc == preempt_offset;
+}
+
+#ifdef CONFIG_PREEMPTION
+
+void preempt_schedule(void);
+void preempt_schedule_notrace(void);
+
+#ifdef CONFIG_PREEMPT_DYNAMIC
+
+void dynamic_preempt_schedule(void);
+#define __preempt_schedule() dynamic_preempt_schedule()
+void dynamic_preempt_schedule_notrace(void);
+#define __preempt_schedule_notrace() dynamic_preempt_schedule_notrace()
+
+#else /* CONFIG_PREEMPT_DYNAMIC */
+
+#define __preempt_schedule() preempt_schedule()
+#define __preempt_schedule_notrace() preempt_schedule_notrace()
+
+#endif /* CONFIG_PREEMPT_DYNAMIC */
+#endif /* CONFIG_PREEMPTION */
+
+#endif /* __ASM_PREEMPT_H */
diff --git a/arch/arm64/include/asm/probes.h b/arch/arm64/include/asm/probes.h
new file mode 100644
index 000000000000..d49368886309
--- /dev/null
+++ b/arch/arm64/include/asm/probes.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * arch/arm64/include/asm/probes.h
+ *
+ * Copyright (C) 2013 Linaro Limited
+ */
+#ifndef _ARM_PROBES_H
+#define _ARM_PROBES_H
+
+#include <asm/insn.h>
+
+typedef void (probes_handler_t) (u32 opcode, long addr, struct pt_regs *);
+
+struct arch_probe_insn {
+ probes_handler_t *handler;
+};
+#ifdef CONFIG_KPROBES
+typedef __le32 kprobe_opcode_t;
+struct arch_specific_insn {
+ struct arch_probe_insn api;
+ kprobe_opcode_t *xol_insn;
+ /* restore address after step xol */
+ unsigned long xol_restore;
+};
+#endif
+
+#endif
diff --git a/arch/arm64/include/asm/proc-fns.h b/arch/arm64/include/asm/proc-fns.h
index 7cdf466fd0c5..ab78a78821a2 100644
--- a/arch/arm64/include/asm/proc-fns.h
+++ b/arch/arm64/include/asm/proc-fns.h
@@ -1,50 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Based on arch/arm/include/asm/proc-fns.h
*
* Copyright (C) 1997-1999 Russell King
* Copyright (C) 2000 Deep Blue Solutions Ltd
* Copyright (C) 2012 ARM Ltd.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __ASM_PROCFNS_H
#define __ASM_PROCFNS_H
-#ifdef __KERNEL__
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <asm/page.h>
-struct mm_struct;
+struct cpu_suspend_ctx;
-extern void cpu_cache_off(void);
extern void cpu_do_idle(void);
-extern void cpu_do_switch_mm(unsigned long pgd_phys, struct mm_struct *mm);
-extern void cpu_reset(unsigned long addr) __attribute__((noreturn));
+extern void cpu_do_suspend(struct cpu_suspend_ctx *ptr);
+extern u64 cpu_do_resume(phys_addr_t ptr, u64 idmap_ttbr);
#include <asm/memory.h>
-#define cpu_switch_mm(pgd,mm) cpu_do_switch_mm(virt_to_phys(pgd),mm)
-
-#define cpu_get_pgd() \
-({ \
- unsigned long pg; \
- asm("mrs %0, ttbr0_el1\n" \
- : "=r" (pg)); \
- pg &= ~0xffff000000003ffful; \
- (pgd_t *)phys_to_virt(pg); \
-})
-
-#endif /* __ASSEMBLY__ */
-#endif /* __KERNEL__ */
+#endif /* __ASSEMBLER__ */
#endif /* __ASM_PROCFNS_H */
diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index ab239b2c456f..e30c4c8e3a7a 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -1,41 +1,90 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Based on arch/arm/include/asm/processor.h
*
* Copyright (C) 1995-1999 Russell King
* Copyright (C) 2012 ARM Ltd.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __ASM_PROCESSOR_H
#define __ASM_PROCESSOR_H
/*
- * Default implementation of macro that returns current
- * instruction pointer ("program counter").
+ * On arm64 systems, unaligned accesses by the CPU are cheap, and so there is
+ * no point in shifting all network buffers by 2 bytes just to make some IP
+ * header fields appear aligned in memory, potentially sacrificing some DMA
+ * performance on some platforms.
*/
-#define current_text_addr() ({ __label__ _l; _l: &&_l;})
+#define NET_IP_ALIGN 0
+
+#define MTE_CTRL_GCR_USER_EXCL_SHIFT 0
+#define MTE_CTRL_GCR_USER_EXCL_MASK 0xffff
+
+#define MTE_CTRL_TCF_SYNC (1UL << 16)
+#define MTE_CTRL_TCF_ASYNC (1UL << 17)
+#define MTE_CTRL_TCF_ASYMM (1UL << 18)
+
+#define MTE_CTRL_STORE_ONLY (1UL << 19)
-#ifdef __KERNEL__
+#ifndef __ASSEMBLER__
+#include <linux/build_bug.h>
+#include <linux/cache.h>
+#include <linux/init.h>
+#include <linux/stddef.h>
#include <linux/string.h>
+#include <linux/thread_info.h>
-#include <asm/fpsimd.h>
+#include <vdso/processor.h>
+
+#include <asm/alternative.h>
+#include <asm/cpufeature.h>
#include <asm/hw_breakpoint.h>
+#include <asm/kasan.h>
+#include <asm/lse.h>
+#include <asm/pgtable-hwdef.h>
+#include <asm/pointer_auth.h>
#include <asm/ptrace.h>
+#include <asm/spectre.h>
#include <asm/types.h>
-#ifdef __KERNEL__
+/*
+ * TASK_SIZE - the maximum size of a user space task.
+ * TASK_UNMAPPED_BASE - the lower boundary of the mmap VM area.
+ */
+
+#define DEFAULT_MAP_WINDOW_64 (UL(1) << VA_BITS_MIN)
+#define TASK_SIZE_64 (UL(1) << vabits_actual)
+#define TASK_SIZE_MAX (UL(1) << VA_BITS)
+
+#ifdef CONFIG_COMPAT
+#if defined(CONFIG_ARM64_64K_PAGES) && defined(CONFIG_KUSER_HELPERS)
+/*
+ * With CONFIG_ARM64_64K_PAGES enabled, the last page is occupied
+ * by the compat vectors page.
+ */
+#define TASK_SIZE_32 UL(0x100000000)
+#else
+#define TASK_SIZE_32 (UL(0x100000000) - PAGE_SIZE)
+#endif /* CONFIG_ARM64_64K_PAGES */
+#define TASK_SIZE (test_thread_flag(TIF_32BIT) ? \
+ TASK_SIZE_32 : TASK_SIZE_64)
+#define TASK_SIZE_OF(tsk) (test_tsk_thread_flag(tsk, TIF_32BIT) ? \
+ TASK_SIZE_32 : TASK_SIZE_64)
+#define DEFAULT_MAP_WINDOW (test_thread_flag(TIF_32BIT) ? \
+ TASK_SIZE_32 : DEFAULT_MAP_WINDOW_64)
+#else
+#define TASK_SIZE TASK_SIZE_64
+#define DEFAULT_MAP_WINDOW DEFAULT_MAP_WINDOW_64
+#endif /* CONFIG_COMPAT */
+
+#ifdef CONFIG_ARM64_FORCE_52BIT
#define STACK_TOP_MAX TASK_SIZE_64
+#define TASK_UNMAPPED_BASE (PAGE_ALIGN(TASK_SIZE / 4))
+#else
+#define STACK_TOP_MAX DEFAULT_MAP_WINDOW_64
+#define TASK_UNMAPPED_BASE (PAGE_ALIGN(DEFAULT_MAP_WINDOW / 4))
+#endif /* CONFIG_ARM64_FORCE_52BIT */
+
#ifdef CONFIG_COMPAT
#define AARCH32_VECTORS_BASE 0xffff0000
#define STACK_TOP (test_thread_flag(TIF_32BIT) ? \
@@ -44,10 +93,20 @@
#define STACK_TOP STACK_TOP_MAX
#endif /* CONFIG_COMPAT */
-#define ARCH_LOW_ADDRESS_LIMIT PHYS_MASK
-#endif /* __KERNEL__ */
+#ifndef CONFIG_ARM64_FORCE_52BIT
+#define arch_get_mmap_end(addr, len, flags) \
+ (((addr) > DEFAULT_MAP_WINDOW) ? TASK_SIZE : DEFAULT_MAP_WINDOW)
+
+#define arch_get_mmap_base(addr, base) ((addr > DEFAULT_MAP_WINDOW) ? \
+ base + TASK_SIZE - DEFAULT_MAP_WINDOW :\
+ base)
+#endif /* CONFIG_ARM64_FORCE_52BIT */
+
+extern phys_addr_t arm64_dma_phys_limit;
+#define ARCH_LOW_ADDRESS_LIMIT (arm64_dma_phys_limit - 1)
struct debug_info {
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
/* Have we suspended stepping by a debugger? */
int suspended_step;
/* Allow breakpoints and watchpoints to be disabled for this thread. */
@@ -56,6 +115,19 @@ struct debug_info {
/* Hardware breakpoints pinned to this task. */
struct perf_event *hbp_break[ARM_MAX_BRP];
struct perf_event *hbp_watch[ARM_MAX_WRP];
+#endif
+};
+
+enum vec_type {
+ ARM64_VEC_SVE = 0,
+ ARM64_VEC_SME,
+ ARM64_VEC_MAX,
+};
+
+enum fp_type {
+ FP_STATE_CURRENT, /* Save based on current task state. */
+ FP_STATE_FPSIMD,
+ FP_STATE_SVE,
};
struct cpu_context {
@@ -76,26 +148,196 @@ struct cpu_context {
struct thread_struct {
struct cpu_context cpu_context; /* cpu context */
- unsigned long tp_value;
- struct fpsimd_state fpsimd_state;
+
+ /*
+ * Whitelisted fields for hardened usercopy:
+ * Maintainers must ensure manually that this contains no
+ * implicit padding.
+ */
+ struct {
+ unsigned long tp_value; /* TLS register */
+ unsigned long tp2_value;
+ u64 fpmr;
+ unsigned long pad;
+ struct user_fpsimd_state fpsimd_state;
+ } uw;
+
+ enum fp_type fp_type; /* registers FPSIMD or SVE? */
+ unsigned int fpsimd_cpu;
+ void *sve_state; /* SVE registers, if any */
+ void *sme_state; /* ZA and ZT state, if any */
+ unsigned int vl[ARM64_VEC_MAX]; /* vector length */
+ unsigned int vl_onexec[ARM64_VEC_MAX]; /* vl after next exec */
unsigned long fault_address; /* fault info */
+ unsigned long fault_code; /* ESR_EL1 value */
struct debug_info debug; /* debugging */
+
+ /*
+ * Set [cleared] by kernel_neon_begin() [kernel_neon_end()] to the
+ * address of a caller provided buffer that will be used to preserve a
+ * task's kernel mode FPSIMD state while it is scheduled out.
+ */
+ struct user_fpsimd_state *kernel_fpsimd_state;
+ unsigned int kernel_fpsimd_cpu;
+#ifdef CONFIG_ARM64_PTR_AUTH
+ struct ptrauth_keys_user keys_user;
+#ifdef CONFIG_ARM64_PTR_AUTH_KERNEL
+ struct ptrauth_keys_kernel keys_kernel;
+#endif
+#endif
+#ifdef CONFIG_ARM64_MTE
+ u64 mte_ctrl;
+#endif
+ u64 sctlr_user;
+ u64 svcr;
+ u64 tpidr2_el0;
+ u64 por_el0;
+#ifdef CONFIG_ARM64_GCS
+ unsigned int gcs_el0_mode;
+ unsigned int gcs_el0_locked;
+ u64 gcspr_el0;
+ u64 gcs_base;
+ u64 gcs_size;
+#endif
};
-#define INIT_THREAD { }
+static inline unsigned int thread_get_vl(struct thread_struct *thread,
+ enum vec_type type)
+{
+ return thread->vl[type];
+}
+
+static inline unsigned int thread_get_sve_vl(struct thread_struct *thread)
+{
+ return thread_get_vl(thread, ARM64_VEC_SVE);
+}
+
+static inline unsigned int thread_get_sme_vl(struct thread_struct *thread)
+{
+ return thread_get_vl(thread, ARM64_VEC_SME);
+}
+
+static inline unsigned int thread_get_cur_vl(struct thread_struct *thread)
+{
+ if (system_supports_sme() && (thread->svcr & SVCR_SM_MASK))
+ return thread_get_sme_vl(thread);
+ else
+ return thread_get_sve_vl(thread);
+}
+
+unsigned int task_get_vl(const struct task_struct *task, enum vec_type type);
+void task_set_vl(struct task_struct *task, enum vec_type type,
+ unsigned long vl);
+void task_set_vl_onexec(struct task_struct *task, enum vec_type type,
+ unsigned long vl);
+unsigned int task_get_vl_onexec(const struct task_struct *task,
+ enum vec_type type);
+
+static inline unsigned int task_get_sve_vl(const struct task_struct *task)
+{
+ return task_get_vl(task, ARM64_VEC_SVE);
+}
+
+static inline unsigned int task_get_sme_vl(const struct task_struct *task)
+{
+ return task_get_vl(task, ARM64_VEC_SME);
+}
+
+static inline void task_set_sve_vl(struct task_struct *task, unsigned long vl)
+{
+ task_set_vl(task, ARM64_VEC_SVE, vl);
+}
+
+static inline unsigned int task_get_sve_vl_onexec(const struct task_struct *task)
+{
+ return task_get_vl_onexec(task, ARM64_VEC_SVE);
+}
+
+static inline void task_set_sve_vl_onexec(struct task_struct *task,
+ unsigned long vl)
+{
+ task_set_vl_onexec(task, ARM64_VEC_SVE, vl);
+}
+
+#define SCTLR_USER_MASK \
+ (SCTLR_ELx_ENIA | SCTLR_ELx_ENIB | SCTLR_ELx_ENDA | SCTLR_ELx_ENDB | \
+ SCTLR_EL1_TCF0_MASK)
+
+static inline void arch_thread_struct_whitelist(unsigned long *offset,
+ unsigned long *size)
+{
+ /* Verify that there is no padding among the whitelisted fields: */
+ BUILD_BUG_ON(sizeof_field(struct thread_struct, uw) !=
+ sizeof_field(struct thread_struct, uw.tp_value) +
+ sizeof_field(struct thread_struct, uw.tp2_value) +
+ sizeof_field(struct thread_struct, uw.fpmr) +
+ sizeof_field(struct thread_struct, uw.pad) +
+ sizeof_field(struct thread_struct, uw.fpsimd_state));
+
+ *offset = offsetof(struct thread_struct, uw);
+ *size = sizeof_field(struct thread_struct, uw);
+}
+
+#ifdef CONFIG_COMPAT
+#define task_user_tls(t) \
+({ \
+ unsigned long *__tls; \
+ if (is_compat_thread(task_thread_info(t))) \
+ __tls = &(t)->thread.uw.tp2_value; \
+ else \
+ __tls = &(t)->thread.uw.tp_value; \
+ __tls; \
+ })
+#else
+#define task_user_tls(t) (&(t)->thread.uw.tp_value)
+#endif
+
+/* Sync TPIDR_EL0 back to thread_struct for current */
+void tls_preserve_current_state(void);
-static inline void start_thread_common(struct pt_regs *regs, unsigned long pc)
+#define INIT_THREAD { \
+ .fpsimd_cpu = NR_CPUS, \
+}
+
+static inline void start_thread_common(struct pt_regs *regs, unsigned long pc,
+ unsigned long pstate)
{
- memset(regs, 0, sizeof(*regs));
- regs->syscallno = ~0UL;
- regs->pc = pc;
+ /*
+ * Ensure all GPRs are zeroed, and initialize PC + PSTATE.
+ * The SP (or compat SP) will be initialized later.
+ */
+ regs->user_regs = (struct user_pt_regs) {
+ .pc = pc,
+ .pstate = pstate,
+ };
+
+ /*
+ * To allow the syscalls:sys_exit_execve tracepoint we need to preserve
+ * syscallno, but do not need orig_x0 or the original GPRs.
+ */
+ regs->orig_x0 = 0;
+
+ /*
+ * An exec from a kernel thread won't have an existing PMR value.
+ */
+ if (system_uses_irq_prio_masking())
+ regs->pmr = GIC_PRIO_IRQON;
+
+ /*
+ * The pt_regs::stackframe field must remain valid throughout this
+ * function as a stacktrace can be taken at any time. Any user or
+ * kernel task should have a valid final frame.
+ */
+ WARN_ON_ONCE(regs->stackframe.record.fp != 0);
+ WARN_ON_ONCE(regs->stackframe.record.lr != 0);
+ WARN_ON_ONCE(regs->stackframe.type != FRAME_META_TYPE_FINAL);
}
static inline void start_thread(struct pt_regs *regs, unsigned long pc,
unsigned long sp)
{
- start_thread_common(regs, pc);
- regs->pstate = PSR_MODE_EL0t;
+ start_thread_common(regs, pc, PSR_MODE_EL0t);
+ spectre_v4_enable_task_mitigation(current);
regs->sp = sp;
}
@@ -103,36 +345,46 @@ static inline void start_thread(struct pt_regs *regs, unsigned long pc,
static inline void compat_start_thread(struct pt_regs *regs, unsigned long pc,
unsigned long sp)
{
- start_thread_common(regs, pc);
- regs->pstate = COMPAT_PSR_MODE_USR;
+ unsigned long pstate = PSR_AA32_MODE_USR;
if (pc & 1)
- regs->pstate |= COMPAT_PSR_T_BIT;
+ pstate |= PSR_AA32_T_BIT;
+ if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN))
+ pstate |= PSR_AA32_E_BIT;
+
+ start_thread_common(regs, pc, pstate);
+ spectre_v4_enable_task_mitigation(current);
regs->compat_sp = sp;
}
#endif
-/* Forward declaration, a strange C thing */
-struct task_struct;
+static __always_inline bool is_ttbr0_addr(unsigned long addr)
+{
+ /* entry assembly clears tags for TTBR0 addrs */
+ return addr < TASK_SIZE;
+}
-/* Free all resources held by a thread. */
-extern void release_thread(struct task_struct *);
+static __always_inline bool is_ttbr1_addr(unsigned long addr)
+{
+ /* TTBR1 addresses may have a tag if KASAN_SW_TAGS is in use */
+ return arch_kasan_reset_tag(addr) >= PAGE_OFFSET;
+}
-/* Prepare to copy thread state - unlazy all lazy status */
-#define prepare_to_copy(tsk) do { } while (0)
+/* Forward declaration, a strange C thing */
+struct task_struct;
-unsigned long get_wchan(struct task_struct *p);
+unsigned long __get_wchan(struct task_struct *p);
-#define cpu_relax() barrier()
+void update_sctlr_el1(u64 sctlr);
/* Thread switching */
extern struct task_struct *cpu_switch_to(struct task_struct *prev,
struct task_struct *next);
#define task_pt_regs(p) \
- ((struct pt_regs *)(THREAD_START_SP + task_stack_page(p)) - 1)
+ ((struct pt_regs *)(THREAD_SIZE + task_stack_page(p)) - 1)
-#define KSTK_EIP(tsk) task_pt_regs(tsk)->pc
-#define KSTK_ESP(tsk) task_pt_regs(tsk)->sp
+#define KSTK_EIP(tsk) ((unsigned long)task_pt_regs(tsk)->pc)
+#define KSTK_ESP(tsk) user_stack_pointer(task_pt_regs(tsk))
/*
* Prefetching support
@@ -149,14 +401,46 @@ static inline void prefetchw(const void *ptr)
asm volatile("prfm pstl1keep, %a0\n" : : "p" (ptr));
}
-#define ARCH_HAS_SPINLOCK_PREFETCH
-static inline void spin_lock_prefetch(const void *x)
-{
- prefetchw(x);
-}
+extern unsigned long __ro_after_init signal_minsigstksz; /* sigframe size */
+extern void __init minsigstksz_setup(void);
+
+/*
+ * Not at the top of the file due to a direct #include cycle between
+ * <asm/fpsimd.h> and <asm/processor.h>. Deferring this #include
+ * ensures that contents of processor.h are visible to fpsimd.h even if
+ * processor.h is included first.
+ *
+ * These prctl helpers are the only things in this file that require
+ * fpsimd.h. The core code expects them to be in this header.
+ */
+#include <asm/fpsimd.h>
-#define HAVE_ARCH_PICK_MMAP_LAYOUT
+/* Userspace interface for PR_S[MV]E_{SET,GET}_VL prctl()s: */
+#define SVE_SET_VL(arg) sve_set_current_vl(arg)
+#define SVE_GET_VL() sve_get_current_vl()
+#define SME_SET_VL(arg) sme_set_current_vl(arg)
+#define SME_GET_VL() sme_get_current_vl()
+/* PR_PAC_RESET_KEYS prctl */
+#define PAC_RESET_KEYS(tsk, arg) ptrauth_prctl_reset_keys(tsk, arg)
+
+/* PR_PAC_{SET,GET}_ENABLED_KEYS prctl */
+#define PAC_SET_ENABLED_KEYS(tsk, keys, enabled) \
+ ptrauth_set_enabled_keys(tsk, keys, enabled)
+#define PAC_GET_ENABLED_KEYS(tsk) ptrauth_get_enabled_keys(tsk)
+
+#ifdef CONFIG_ARM64_TAGGED_ADDR_ABI
+/* PR_{SET,GET}_TAGGED_ADDR_CTRL prctl */
+long set_tagged_addr_ctrl(struct task_struct *task, unsigned long arg);
+long get_tagged_addr_ctrl(struct task_struct *task);
+#define SET_TAGGED_ADDR_CTRL(arg) set_tagged_addr_ctrl(current, arg)
+#define GET_TAGGED_ADDR_CTRL() get_tagged_addr_ctrl(current)
#endif
+int get_tsc_mode(unsigned long adr);
+int set_tsc_mode(unsigned int val);
+#define GET_TSC_CTL(adr) get_tsc_mode((adr))
+#define SET_TSC_CTL(val) set_tsc_mode((val))
+
+#endif /* __ASSEMBLER__ */
#endif /* __ASM_PROCESSOR_H */
diff --git a/arch/arm64/include/asm/prom.h b/arch/arm64/include/asm/prom.h
deleted file mode 100644
index 68b90e682957..000000000000
--- a/arch/arm64/include/asm/prom.h
+++ /dev/null
@@ -1 +0,0 @@
-/* Empty for now */
diff --git a/arch/arm64/include/asm/psci.h b/arch/arm64/include/asm/psci.h
deleted file mode 100644
index 0604237ecd99..000000000000
--- a/arch/arm64/include/asm/psci.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * Copyright (C) 2013 ARM Limited
- */
-
-#ifndef __ASM_PSCI_H
-#define __ASM_PSCI_H
-
-#define PSCI_POWER_STATE_TYPE_STANDBY 0
-#define PSCI_POWER_STATE_TYPE_POWER_DOWN 1
-
-struct psci_power_state {
- u16 id;
- u8 type;
- u8 affinity_level;
-};
-
-struct psci_operations {
- int (*cpu_suspend)(struct psci_power_state state,
- unsigned long entry_point);
- int (*cpu_off)(struct psci_power_state state);
- int (*cpu_on)(unsigned long cpuid, unsigned long entry_point);
- int (*migrate)(unsigned long cpuid);
-};
-
-extern struct psci_operations psci_ops;
-
-int psci_init(void);
-
-#endif /* __ASM_PSCI_H */
diff --git a/arch/arm64/include/asm/ptdump.h b/arch/arm64/include/asm/ptdump.h
new file mode 100644
index 000000000000..baff24004459
--- /dev/null
+++ b/arch/arm64/include/asm/ptdump.h
@@ -0,0 +1,89 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2014 ARM Ltd.
+ */
+#ifndef __ASM_PTDUMP_H
+#define __ASM_PTDUMP_H
+
+#include <linux/ptdump.h>
+
+DECLARE_STATIC_KEY_FALSE(arm64_ptdump_lock_key);
+
+#ifdef CONFIG_PTDUMP
+
+#include <linux/mm_types.h>
+#include <linux/seq_file.h>
+
+struct addr_marker {
+ unsigned long start_address;
+ char *name;
+};
+
+struct ptdump_info {
+ struct mm_struct *mm;
+ const struct addr_marker *markers;
+ unsigned long base_addr;
+};
+
+struct ptdump_prot_bits {
+ ptdesc_t mask;
+ ptdesc_t val;
+ const char *set;
+ const char *clear;
+};
+
+struct ptdump_pg_level {
+ const struct ptdump_prot_bits *bits;
+ char name[4];
+ int num;
+ ptdesc_t mask;
+};
+
+/*
+ * The page dumper groups page table entries of the same type into a single
+ * description. It uses pg_state to track the range information while
+ * iterating over the pte entries. When the continuity is broken it then
+ * dumps out a description of the range.
+ */
+struct ptdump_pg_state {
+ struct ptdump_state ptdump;
+ struct ptdump_pg_level *pg_level;
+ struct seq_file *seq;
+ const struct addr_marker *marker;
+ const struct mm_struct *mm;
+ unsigned long start_address;
+ int level;
+ ptdesc_t current_prot;
+ bool check_wx;
+ unsigned long wx_pages;
+ unsigned long uxn_pages;
+};
+
+void ptdump_walk(struct seq_file *s, struct ptdump_info *info);
+void note_page(struct ptdump_state *pt_st, unsigned long addr, int level,
+ pteval_t val);
+void note_page_pte(struct ptdump_state *st, unsigned long addr, pte_t pte);
+void note_page_pmd(struct ptdump_state *st, unsigned long addr, pmd_t pmd);
+void note_page_pud(struct ptdump_state *st, unsigned long addr, pud_t pud);
+void note_page_p4d(struct ptdump_state *st, unsigned long addr, p4d_t p4d);
+void note_page_pgd(struct ptdump_state *st, unsigned long addr, pgd_t pgd);
+void note_page_flush(struct ptdump_state *st);
+#ifdef CONFIG_PTDUMP_DEBUGFS
+#define EFI_RUNTIME_MAP_END DEFAULT_MAP_WINDOW_64
+void __init ptdump_debugfs_register(struct ptdump_info *info, const char *name);
+#else
+static inline void ptdump_debugfs_register(struct ptdump_info *info,
+ const char *name) { }
+#endif /* CONFIG_PTDUMP_DEBUGFS */
+#else
+static inline void note_page(struct ptdump_state *pt_st, unsigned long addr,
+ int level, pteval_t val) { }
+static inline void note_page_pte(struct ptdump_state *st, unsigned long addr, pte_t pte) { }
+static inline void note_page_pmd(struct ptdump_state *st, unsigned long addr, pmd_t pmd) { }
+static inline void note_page_pud(struct ptdump_state *st, unsigned long addr, pud_t pud) { }
+static inline void note_page_p4d(struct ptdump_state *st, unsigned long addr, p4d_t p4d) { }
+static inline void note_page_pgd(struct ptdump_state *st, unsigned long addr, pgd_t pgd) { }
+static inline void note_page_flush(struct ptdump_state *st) { }
+#endif /* CONFIG_PTDUMP */
+
+#endif /* __ASM_PTDUMP_H */
diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h
index 0dacbbf9458b..39582511ad72 100644
--- a/arch/arm64/include/asm/ptrace.h
+++ b/arch/arm64/include/asm/ptrace.h
@@ -1,26 +1,37 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Based on arch/arm/include/asm/ptrace.h
*
* Copyright (C) 1996-2003 Russell King
* Copyright (C) 2012 ARM Ltd.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __ASM_PTRACE_H
#define __ASM_PTRACE_H
+#include <asm/cpufeature.h>
+
#include <uapi/asm/ptrace.h>
+/* Current Exception Level values, as contained in CurrentEL */
+#define CurrentEL_EL1 (1 << 2)
+#define CurrentEL_EL2 (2 << 2)
+
+#define INIT_PSTATE_EL1 \
+ (PSR_D_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT | PSR_MODE_EL1h)
+#define INIT_PSTATE_EL2 \
+ (PSR_D_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT | PSR_MODE_EL2h)
+
+#include <linux/irqchip/arm-gic-v3-prio.h>
+
+#define GIC_PRIO_IRQON GICV3_PRIO_UNMASKED
+#define GIC_PRIO_IRQOFF GICV3_PRIO_IRQ
+
+#define GIC_PRIO_PSR_I_SET GICV3_PRIO_PSR_I_SET
+
+/* Additional SPSR bits not exposed in the UABI */
+#define PSR_MODE_THREAD_BIT (1 << 0)
+#define PSR_IL_BIT (1 << 20)
+
/* AArch32-specific ptrace requests */
#define COMPAT_PTRACE_GETREGS 12
#define COMPAT_PTRACE_SETREGS 13
@@ -31,28 +42,41 @@
#define COMPAT_PTRACE_GETHBPREGS 29
#define COMPAT_PTRACE_SETHBPREGS 30
-/* AArch32 CPSR bits */
-#define COMPAT_PSR_MODE_MASK 0x0000001f
-#define COMPAT_PSR_MODE_USR 0x00000010
-#define COMPAT_PSR_MODE_FIQ 0x00000011
-#define COMPAT_PSR_MODE_IRQ 0x00000012
-#define COMPAT_PSR_MODE_SVC 0x00000013
-#define COMPAT_PSR_MODE_ABT 0x00000017
-#define COMPAT_PSR_MODE_HYP 0x0000001a
-#define COMPAT_PSR_MODE_UND 0x0000001b
-#define COMPAT_PSR_MODE_SYS 0x0000001f
-#define COMPAT_PSR_T_BIT 0x00000020
-#define COMPAT_PSR_F_BIT 0x00000040
-#define COMPAT_PSR_I_BIT 0x00000080
-#define COMPAT_PSR_A_BIT 0x00000100
-#define COMPAT_PSR_E_BIT 0x00000200
-#define COMPAT_PSR_J_BIT 0x01000000
-#define COMPAT_PSR_Q_BIT 0x08000000
-#define COMPAT_PSR_V_BIT 0x10000000
-#define COMPAT_PSR_C_BIT 0x20000000
-#define COMPAT_PSR_Z_BIT 0x40000000
-#define COMPAT_PSR_N_BIT 0x80000000
-#define COMPAT_PSR_IT_MASK 0x0600fc00 /* If-Then execution state mask */
+/* SPSR_ELx bits for exceptions taken from AArch32 */
+#define PSR_AA32_MODE_MASK 0x0000001f
+#define PSR_AA32_MODE_USR 0x00000010
+#define PSR_AA32_MODE_FIQ 0x00000011
+#define PSR_AA32_MODE_IRQ 0x00000012
+#define PSR_AA32_MODE_SVC 0x00000013
+#define PSR_AA32_MODE_ABT 0x00000017
+#define PSR_AA32_MODE_HYP 0x0000001a
+#define PSR_AA32_MODE_UND 0x0000001b
+#define PSR_AA32_MODE_SYS 0x0000001f
+#define PSR_AA32_T_BIT 0x00000020
+#define PSR_AA32_F_BIT 0x00000040
+#define PSR_AA32_I_BIT 0x00000080
+#define PSR_AA32_A_BIT 0x00000100
+#define PSR_AA32_E_BIT 0x00000200
+#define PSR_AA32_PAN_BIT 0x00400000
+#define PSR_AA32_SSBS_BIT 0x00800000
+#define PSR_AA32_DIT_BIT 0x01000000
+#define PSR_AA32_Q_BIT 0x08000000
+#define PSR_AA32_V_BIT 0x10000000
+#define PSR_AA32_C_BIT 0x20000000
+#define PSR_AA32_Z_BIT 0x40000000
+#define PSR_AA32_N_BIT 0x80000000
+#define PSR_AA32_IT_MASK 0x0600fc00 /* If-Then execution state mask */
+#define PSR_AA32_GE_MASK 0x000f0000
+
+#ifdef CONFIG_CPU_BIG_ENDIAN
+#define PSR_AA32_ENDSTATE PSR_AA32_E_BIT
+#else
+#define PSR_AA32_ENDSTATE 0
+#endif
+
+/* AArch32 CPSR bits, as seen in AArch32 */
+#define COMPAT_PSR_DIT_BIT 0x00200000
+
/*
* These are 'magic' values for PTRACE_PEEKUSR that return info about where a
* process is located in memory.
@@ -60,24 +84,39 @@
#define COMPAT_PT_TEXT_ADDR 0x10000
#define COMPAT_PT_DATA_ADDR 0x10004
#define COMPAT_PT_TEXT_END_ADDR 0x10008
-#ifndef __ASSEMBLY__
+
+/*
+ * If pt_regs.syscallno == NO_SYSCALL, then the thread is not executing
+ * a syscall -- i.e., its most recent entry into the kernel from
+ * userspace was not via SVC, or otherwise a tracer cancelled the syscall.
+ *
+ * This must have the value -1, for ABI compatibility with ptrace etc.
+ */
+#define NO_SYSCALL (-1)
+
+#ifndef __ASSEMBLER__
+#include <linux/bug.h>
+#include <linux/types.h>
+
+#include <asm/stacktrace/frame.h>
/* sizeof(struct user) for AArch32 */
#define COMPAT_USER_SZ 296
/* Architecturally defined mapping between AArch32 and AArch64 registers */
#define compat_usr(x) regs[(x)]
+#define compat_fp regs[11]
#define compat_sp regs[13]
#define compat_lr regs[14]
#define compat_sp_hyp regs[15]
-#define compat_sp_irq regs[16]
-#define compat_lr_irq regs[17]
-#define compat_sp_svc regs[18]
-#define compat_lr_svc regs[19]
-#define compat_sp_abt regs[20]
-#define compat_lr_abt regs[21]
-#define compat_sp_und regs[22]
-#define compat_lr_und regs[23]
+#define compat_lr_irq regs[16]
+#define compat_sp_irq regs[17]
+#define compat_lr_svc regs[18]
+#define compat_sp_svc regs[19]
+#define compat_lr_abt regs[20]
+#define compat_sp_abt regs[21]
+#define compat_lr_und regs[22]
+#define compat_sp_und regs[23]
#define compat_r8_fiq regs[24]
#define compat_r9_fiq regs[25]
#define compat_r10_fiq regs[26]
@@ -86,10 +125,33 @@
#define compat_sp_fiq regs[29]
#define compat_lr_fiq regs[30]
+static inline unsigned long compat_psr_to_pstate(const unsigned long psr)
+{
+ unsigned long pstate;
+
+ pstate = psr & ~COMPAT_PSR_DIT_BIT;
+
+ if (psr & COMPAT_PSR_DIT_BIT)
+ pstate |= PSR_AA32_DIT_BIT;
+
+ return pstate;
+}
+
+static inline unsigned long pstate_to_compat_psr(const unsigned long pstate)
+{
+ unsigned long psr;
+
+ psr = pstate & ~PSR_AA32_DIT_BIT;
+
+ if (pstate & PSR_AA32_DIT_BIT)
+ psr |= COMPAT_PSR_DIT_BIT;
+
+ return psr;
+}
+
/*
* This struct defines the way the registers are stored on the stack during an
- * exception. Note that sizeof(struct pt_regs) has to be a multiple of 16 (for
- * stack alignment). struct user_pt_regs must form a prefix of struct pt_regs.
+ * exception. struct user_pt_regs must form a prefix of struct pt_regs.
*/
struct pt_regs {
union {
@@ -102,14 +164,33 @@ struct pt_regs {
};
};
u64 orig_x0;
- u64 syscallno;
+ s32 syscallno;
+ u32 pmr;
+
+ u64 sdei_ttbr1;
+ struct frame_record_meta stackframe;
};
+/* For correct stack alignment, pt_regs has to be a multiple of 16 bytes. */
+static_assert(IS_ALIGNED(sizeof(struct pt_regs), 16));
+
+static inline bool in_syscall(struct pt_regs const *regs)
+{
+ return regs->syscallno != NO_SYSCALL;
+}
+
+static inline void forget_syscall(struct pt_regs *regs)
+{
+ regs->syscallno = NO_SYSCALL;
+}
+
+#define MAX_REG_OFFSET offsetof(struct pt_regs, pstate)
+
#define arch_has_single_step() (1)
#ifdef CONFIG_COMPAT
#define compat_thumb_mode(regs) \
- (((regs)->pstate & COMPAT_PSR_T_BIT))
+ (((regs)->pstate & PSR_AA32_T_BIT))
#else
#define compat_thumb_mode(regs) (0)
#endif
@@ -124,52 +205,161 @@ struct pt_regs {
#define processor_mode(regs) \
((regs)->pstate & PSR_MODE_MASK)
-#define interrupts_enabled(regs) \
- (!((regs)->pstate & PSR_I_BIT))
+#define irqs_priority_unmasked(regs) \
+ (system_uses_irq_prio_masking() ? \
+ (regs)->pmr == GIC_PRIO_IRQON : \
+ true)
-#define fast_interrupts_enabled(regs) \
- (!((regs)->pstate & PSR_F_BIT))
+static __always_inline bool regs_irqs_disabled(const struct pt_regs *regs)
+{
+ return (regs->pstate & PSR_I_BIT) || !irqs_priority_unmasked(regs);
+}
-#define user_stack_pointer(regs) \
- ((regs)->sp)
+#define interrupts_enabled(regs) (!regs_irqs_disabled(regs))
-/*
- * Are the current registers suitable for user mode? (used to maintain
- * security in signal handlers)
+static inline unsigned long user_stack_pointer(struct pt_regs *regs)
+{
+ if (compat_user_mode(regs))
+ return regs->compat_sp;
+ return regs->sp;
+}
+
+extern int regs_query_register_offset(const char *name);
+extern unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs,
+ unsigned int n);
+
+/**
+ * regs_get_register() - get register value from its offset
+ * @regs: pt_regs from which register value is gotten
+ * @offset: offset of the register.
+ *
+ * regs_get_register returns the value of a register whose offset from @regs.
+ * The @offset is the offset of the register in struct pt_regs.
+ * If @offset is bigger than MAX_REG_OFFSET, this returns 0.
*/
-static inline int valid_user_regs(struct user_pt_regs *regs)
+static inline u64 regs_get_register(struct pt_regs *regs, unsigned int offset)
{
- if (user_mode(regs) && (regs->pstate & PSR_I_BIT) == 0) {
- regs->pstate &= ~(PSR_F_BIT | PSR_A_BIT);
+ u64 val = 0;
- /* The T bit is reserved for AArch64 */
- if (!(regs->pstate & PSR_MODE32_BIT))
- regs->pstate &= ~COMPAT_PSR_T_BIT;
+ WARN_ON(offset & 7);
- return 1;
+ offset >>= 3;
+ switch (offset) {
+ case 0 ... 30:
+ val = regs->regs[offset];
+ break;
+ case offsetof(struct pt_regs, sp) >> 3:
+ val = regs->sp;
+ break;
+ case offsetof(struct pt_regs, pc) >> 3:
+ val = regs->pc;
+ break;
+ case offsetof(struct pt_regs, pstate) >> 3:
+ val = regs->pstate;
+ break;
+ default:
+ val = 0;
}
+ return val;
+}
+
+/*
+ * Read a register given an architectural register index r.
+ * This handles the common case where 31 means XZR, not SP.
+ */
+static inline unsigned long pt_regs_read_reg(const struct pt_regs *regs, int r)
+{
+ return (r == 31) ? 0 : regs->regs[r];
+}
+
+/*
+ * Write a register given an architectural register index r.
+ * This handles the common case where 31 means XZR, not SP.
+ */
+static inline void pt_regs_write_reg(struct pt_regs *regs, int r,
+ unsigned long val)
+{
+ if (r != 31)
+ regs->regs[r] = val;
+}
+
+/* Valid only for Kernel mode traps. */
+static inline unsigned long kernel_stack_pointer(struct pt_regs *regs)
+{
+ return regs->sp;
+}
+
+static inline unsigned long regs_return_value(struct pt_regs *regs)
+{
+ unsigned long val = regs->regs[0];
+
/*
- * Force PSR to something logical...
+ * Audit currently uses regs_return_value() instead of
+ * syscall_get_return_value(). Apply the same sign-extension here until
+ * audit is updated to use syscall_get_return_value().
*/
- regs->pstate &= PSR_f | PSR_s | (PSR_x & ~PSR_A_BIT) | \
- COMPAT_PSR_T_BIT | PSR_MODE32_BIT;
+ if (compat_user_mode(regs))
+ val = sign_extend64(val, 31);
- if (!(regs->pstate & PSR_MODE32_BIT)) {
- regs->pstate &= ~COMPAT_PSR_T_BIT;
- regs->pstate |= PSR_MODE_EL0t;
- }
+ return val;
+}
+
+static inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc)
+{
+ regs->regs[0] = rc;
+}
+/**
+ * regs_get_kernel_argument() - get Nth function argument in kernel
+ * @regs: pt_regs of that context
+ * @n: function argument number (start from 0)
+ *
+ * regs_get_argument() returns @n th argument of the function call.
+ *
+ * Note that this chooses the most likely register mapping. In very rare
+ * cases this may not return correct data, for example, if one of the
+ * function parameters is 16 bytes or bigger. In such cases, we cannot
+ * get access the parameter correctly and the register assignment of
+ * subsequent parameters will be shifted.
+ */
+static inline unsigned long regs_get_kernel_argument(struct pt_regs *regs,
+ unsigned int n)
+{
+#define NR_REG_ARGUMENTS 8
+ if (n < NR_REG_ARGUMENTS)
+ return pt_regs_read_reg(regs, n);
return 0;
}
-#define instruction_pointer(regs) (regs)->pc
+/* We must avoid circular header include via sched.h */
+struct task_struct;
+int valid_user_regs(struct user_pt_regs *regs, struct task_struct *task);
+
+static inline unsigned long instruction_pointer(struct pt_regs *regs)
+{
+ return regs->pc;
+}
+static inline void instruction_pointer_set(struct pt_regs *regs,
+ unsigned long val)
+{
+ regs->pc = val;
+}
+
+static inline unsigned long frame_pointer(struct pt_regs *regs)
+{
+ return regs->regs[29];
+}
+
+#define procedure_link_pointer(regs) ((regs)->regs[30])
+
+static inline void procedure_link_pointer_set(struct pt_regs *regs,
+ unsigned long val)
+{
+ procedure_link_pointer(regs) = val;
+}
-#ifdef CONFIG_SMP
extern unsigned long profile_pc(struct pt_regs *regs);
-#else
-#define profile_pc(regs) instruction_pointer(regs)
-#endif
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif
diff --git a/arch/arm64/include/asm/pvclock-abi.h b/arch/arm64/include/asm/pvclock-abi.h
new file mode 100644
index 000000000000..c4f1c0a0789c
--- /dev/null
+++ b/arch/arm64/include/asm/pvclock-abi.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2019 Arm Ltd. */
+
+#ifndef __ASM_PVCLOCK_ABI_H
+#define __ASM_PVCLOCK_ABI_H
+
+/* The below structure is defined in ARM DEN0057A */
+
+struct pvclock_vcpu_stolen_time {
+ __le32 revision;
+ __le32 attributes;
+ __le64 stolen_time;
+ /* Structure must be 64 byte aligned, pad to that size */
+ u8 padding[48];
+} __packed;
+
+#endif
diff --git a/arch/arm64/include/asm/rqspinlock.h b/arch/arm64/include/asm/rqspinlock.h
new file mode 100644
index 000000000000..9ea0a74e5892
--- /dev/null
+++ b/arch/arm64/include/asm/rqspinlock.h
@@ -0,0 +1,93 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_RQSPINLOCK_H
+#define _ASM_RQSPINLOCK_H
+
+#include <asm/barrier.h>
+
+/*
+ * Hardcode res_smp_cond_load_acquire implementations for arm64 to a custom
+ * version based on [0]. In rqspinlock code, our conditional expression involves
+ * checking the value _and_ additionally a timeout. However, on arm64, the
+ * WFE-based implementation may never spin again if no stores occur to the
+ * locked byte in the lock word. As such, we may be stuck forever if
+ * event-stream based unblocking is not available on the platform for WFE spin
+ * loops (arch_timer_evtstrm_available).
+ *
+ * Once support for smp_cond_load_acquire_timewait [0] lands, we can drop this
+ * copy-paste.
+ *
+ * While we rely on the implementation to amortize the cost of sampling
+ * cond_expr for us, it will not happen when event stream support is
+ * unavailable, time_expr check is amortized. This is not the common case, and
+ * it would be difficult to fit our logic in the time_expr_ns >= time_limit_ns
+ * comparison, hence just let it be. In case of event-stream, the loop is woken
+ * up at microsecond granularity.
+ *
+ * [0]: https://lore.kernel.org/lkml/20250203214911.898276-1-ankur.a.arora@oracle.com
+ */
+
+#ifndef smp_cond_load_acquire_timewait
+
+#define smp_cond_time_check_count 200
+
+#define __smp_cond_load_relaxed_spinwait(ptr, cond_expr, time_expr_ns, \
+ time_limit_ns) ({ \
+ typeof(ptr) __PTR = (ptr); \
+ __unqual_scalar_typeof(*ptr) VAL; \
+ unsigned int __count = 0; \
+ for (;;) { \
+ VAL = READ_ONCE(*__PTR); \
+ if (cond_expr) \
+ break; \
+ cpu_relax(); \
+ if (__count++ < smp_cond_time_check_count) \
+ continue; \
+ if ((time_expr_ns) >= (time_limit_ns)) \
+ break; \
+ __count = 0; \
+ } \
+ (typeof(*ptr))VAL; \
+})
+
+#define __smp_cond_load_acquire_timewait(ptr, cond_expr, \
+ time_expr_ns, time_limit_ns) \
+({ \
+ typeof(ptr) __PTR = (ptr); \
+ __unqual_scalar_typeof(*ptr) VAL; \
+ for (;;) { \
+ VAL = smp_load_acquire(__PTR); \
+ if (cond_expr) \
+ break; \
+ __cmpwait_relaxed(__PTR, VAL); \
+ if ((time_expr_ns) >= (time_limit_ns)) \
+ break; \
+ } \
+ (typeof(*ptr))VAL; \
+})
+
+#define smp_cond_load_acquire_timewait(ptr, cond_expr, \
+ time_expr_ns, time_limit_ns) \
+({ \
+ __unqual_scalar_typeof(*ptr) _val; \
+ int __wfe = arch_timer_evtstrm_available(); \
+ \
+ if (likely(__wfe)) { \
+ _val = __smp_cond_load_acquire_timewait(ptr, cond_expr, \
+ time_expr_ns, \
+ time_limit_ns); \
+ } else { \
+ _val = __smp_cond_load_relaxed_spinwait(ptr, cond_expr, \
+ time_expr_ns, \
+ time_limit_ns); \
+ smp_acquire__after_ctrl_dep(); \
+ } \
+ (typeof(*ptr))_val; \
+})
+
+#endif
+
+#define res_smp_cond_load_acquire(v, c) smp_cond_load_acquire_timewait(v, c, 0, 1)
+
+#include <asm-generic/rqspinlock.h>
+
+#endif /* _ASM_RQSPINLOCK_H */
diff --git a/arch/arm64/include/asm/rsi.h b/arch/arm64/include/asm/rsi.h
new file mode 100644
index 000000000000..88b50d660e85
--- /dev/null
+++ b/arch/arm64/include/asm/rsi.h
@@ -0,0 +1,70 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2024 ARM Ltd.
+ */
+
+#ifndef __ASM_RSI_H_
+#define __ASM_RSI_H_
+
+#include <linux/errno.h>
+#include <linux/jump_label.h>
+#include <asm/rsi_cmds.h>
+
+#define RSI_PDEV_NAME "arm-cca-dev"
+
+DECLARE_STATIC_KEY_FALSE(rsi_present);
+
+void __init arm64_rsi_init(void);
+
+bool arm64_rsi_is_protected(phys_addr_t base, size_t size);
+
+static inline bool is_realm_world(void)
+{
+ return static_branch_unlikely(&rsi_present);
+}
+
+static inline int rsi_set_memory_range(phys_addr_t start, phys_addr_t end,
+ enum ripas state, unsigned long flags)
+{
+ unsigned long ret;
+ phys_addr_t top;
+
+ while (start != end) {
+ ret = rsi_set_addr_range_state(start, end, state, flags, &top);
+ if (ret || top < start || top > end)
+ return -EINVAL;
+ start = top;
+ }
+
+ return 0;
+}
+
+/*
+ * Convert the specified range to RAM. Do not use this if you rely on the
+ * contents of a page that may already be in RAM state.
+ */
+static inline int rsi_set_memory_range_protected(phys_addr_t start,
+ phys_addr_t end)
+{
+ return rsi_set_memory_range(start, end, RSI_RIPAS_RAM,
+ RSI_CHANGE_DESTROYED);
+}
+
+/*
+ * Convert the specified range to RAM. Do not convert any pages that may have
+ * been DESTROYED, without our permission.
+ */
+static inline int rsi_set_memory_range_protected_safe(phys_addr_t start,
+ phys_addr_t end)
+{
+ return rsi_set_memory_range(start, end, RSI_RIPAS_RAM,
+ RSI_NO_CHANGE_DESTROYED);
+}
+
+static inline int rsi_set_memory_range_shared(phys_addr_t start,
+ phys_addr_t end)
+{
+ return rsi_set_memory_range(start, end, RSI_RIPAS_EMPTY,
+ RSI_CHANGE_DESTROYED);
+}
+#endif /* __ASM_RSI_H_ */
diff --git a/arch/arm64/include/asm/rsi_cmds.h b/arch/arm64/include/asm/rsi_cmds.h
new file mode 100644
index 000000000000..2c8763876dfb
--- /dev/null
+++ b/arch/arm64/include/asm/rsi_cmds.h
@@ -0,0 +1,162 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2023 ARM Ltd.
+ */
+
+#ifndef __ASM_RSI_CMDS_H
+#define __ASM_RSI_CMDS_H
+
+#include <linux/arm-smccc.h>
+#include <linux/string.h>
+#include <asm/memory.h>
+
+#include <asm/rsi_smc.h>
+
+#define RSI_GRANULE_SHIFT 12
+#define RSI_GRANULE_SIZE (_AC(1, UL) << RSI_GRANULE_SHIFT)
+
+enum ripas {
+ RSI_RIPAS_EMPTY = 0,
+ RSI_RIPAS_RAM = 1,
+ RSI_RIPAS_DESTROYED = 2,
+ RSI_RIPAS_DEV = 3,
+};
+
+static inline unsigned long rsi_request_version(unsigned long req,
+ unsigned long *out_lower,
+ unsigned long *out_higher)
+{
+ struct arm_smccc_res res;
+
+ arm_smccc_smc(SMC_RSI_ABI_VERSION, req, 0, 0, 0, 0, 0, 0, &res);
+
+ if (out_lower)
+ *out_lower = res.a1;
+ if (out_higher)
+ *out_higher = res.a2;
+
+ return res.a0;
+}
+
+static inline unsigned long rsi_get_realm_config(struct realm_config *cfg)
+{
+ struct arm_smccc_res res;
+
+ arm_smccc_smc(SMC_RSI_REALM_CONFIG, virt_to_phys(cfg),
+ 0, 0, 0, 0, 0, 0, &res);
+ return res.a0;
+}
+
+static inline unsigned long rsi_ipa_state_get(phys_addr_t start,
+ phys_addr_t end,
+ enum ripas *state,
+ phys_addr_t *top)
+{
+ struct arm_smccc_res res;
+
+ arm_smccc_smc(SMC_RSI_IPA_STATE_GET,
+ start, end, 0, 0, 0, 0, 0,
+ &res);
+
+ if (res.a0 == RSI_SUCCESS) {
+ if (top)
+ *top = res.a1;
+ if (state)
+ *state = res.a2;
+ }
+
+ return res.a0;
+}
+
+static inline long rsi_set_addr_range_state(phys_addr_t start,
+ phys_addr_t end,
+ enum ripas state,
+ unsigned long flags,
+ phys_addr_t *top)
+{
+ struct arm_smccc_res res;
+
+ arm_smccc_smc(SMC_RSI_IPA_STATE_SET, start, end, state,
+ flags, 0, 0, 0, &res);
+
+ if (top)
+ *top = res.a1;
+
+ if (res.a2 != RSI_ACCEPT)
+ return -EPERM;
+
+ return res.a0;
+}
+
+/**
+ * rsi_attestation_token_init - Initialise the operation to retrieve an
+ * attestation token.
+ *
+ * @challenge: The challenge data to be used in the attestation token
+ * generation.
+ * @size: Size of the challenge data in bytes.
+ *
+ * Initialises the attestation token generation and returns an upper bound
+ * on the attestation token size that can be used to allocate an adequate
+ * buffer. The caller is expected to subsequently call
+ * rsi_attestation_token_continue() to retrieve the attestation token data on
+ * the same CPU.
+ *
+ * Returns:
+ * On success, returns the upper limit of the attestation report size.
+ * Otherwise, -EINVAL
+ */
+static inline long
+rsi_attestation_token_init(const u8 *challenge, unsigned long size)
+{
+ struct arm_smccc_1_2_regs regs = { 0 };
+
+ /* The challenge must be at least 32bytes and at most 64bytes */
+ if (!challenge || size < 32 || size > 64)
+ return -EINVAL;
+
+ regs.a0 = SMC_RSI_ATTESTATION_TOKEN_INIT;
+ memcpy(&regs.a1, challenge, size);
+ arm_smccc_1_2_smc(&regs, &regs);
+
+ if (regs.a0 == RSI_SUCCESS)
+ return regs.a1;
+
+ return -EINVAL;
+}
+
+/**
+ * rsi_attestation_token_continue - Continue the operation to retrieve an
+ * attestation token.
+ *
+ * @granule: {I}PA of the Granule to which the token will be written.
+ * @offset: Offset within Granule to start of buffer in bytes.
+ * @size: The size of the buffer.
+ * @len: The number of bytes written to the buffer.
+ *
+ * Retrieves up to a RSI_GRANULE_SIZE worth of token data per call. The caller
+ * is expected to call rsi_attestation_token_init() before calling this
+ * function to retrieve the attestation token.
+ *
+ * Return:
+ * * %RSI_SUCCESS - Attestation token retrieved successfully.
+ * * %RSI_INCOMPLETE - Token generation is not complete.
+ * * %RSI_ERROR_INPUT - A parameter was not valid.
+ * * %RSI_ERROR_STATE - Attestation not in progress.
+ */
+static inline unsigned long rsi_attestation_token_continue(phys_addr_t granule,
+ unsigned long offset,
+ unsigned long size,
+ unsigned long *len)
+{
+ struct arm_smccc_res res;
+
+ arm_smccc_1_1_invoke(SMC_RSI_ATTESTATION_TOKEN_CONTINUE,
+ granule, offset, size, 0, &res);
+
+ if (len)
+ *len = res.a1;
+ return res.a0;
+}
+
+#endif /* __ASM_RSI_CMDS_H */
diff --git a/arch/arm64/include/asm/rsi_smc.h b/arch/arm64/include/asm/rsi_smc.h
new file mode 100644
index 000000000000..e19253f96c94
--- /dev/null
+++ b/arch/arm64/include/asm/rsi_smc.h
@@ -0,0 +1,193 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2023 ARM Ltd.
+ */
+
+#ifndef __ASM_RSI_SMC_H_
+#define __ASM_RSI_SMC_H_
+
+#include <linux/arm-smccc.h>
+
+/*
+ * This file describes the Realm Services Interface (RSI) Application Binary
+ * Interface (ABI) for SMC calls made from within the Realm to the RMM and
+ * serviced by the RMM.
+ */
+
+/*
+ * The major version number of the RSI implementation. This is increased when
+ * the binary format or semantics of the SMC calls change.
+ */
+#define RSI_ABI_VERSION_MAJOR UL(1)
+
+/*
+ * The minor version number of the RSI implementation. This is increased when
+ * a bug is fixed, or a feature is added without breaking binary compatibility.
+ */
+#define RSI_ABI_VERSION_MINOR UL(0)
+
+#define RSI_ABI_VERSION ((RSI_ABI_VERSION_MAJOR << 16) | \
+ RSI_ABI_VERSION_MINOR)
+
+#define RSI_ABI_VERSION_GET_MAJOR(_version) ((_version) >> 16)
+#define RSI_ABI_VERSION_GET_MINOR(_version) ((_version) & 0xFFFF)
+
+#define RSI_SUCCESS UL(0)
+#define RSI_ERROR_INPUT UL(1)
+#define RSI_ERROR_STATE UL(2)
+#define RSI_INCOMPLETE UL(3)
+#define RSI_ERROR_UNKNOWN UL(4)
+
+#define SMC_RSI_FID(n) ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \
+ ARM_SMCCC_SMC_64, \
+ ARM_SMCCC_OWNER_STANDARD, \
+ n)
+
+/*
+ * Returns RSI version.
+ *
+ * arg1 == Requested interface revision
+ * ret0 == Status / error
+ * ret1 == Lower implemented interface revision
+ * ret2 == Higher implemented interface revision
+ */
+#define SMC_RSI_ABI_VERSION SMC_RSI_FID(0x190)
+
+/*
+ * Read feature register.
+ *
+ * arg1 == Feature register index
+ * ret0 == Status / error
+ * ret1 == Feature register value
+ */
+#define SMC_RSI_FEATURES SMC_RSI_FID(0x191)
+
+/*
+ * Read measurement for the current Realm.
+ *
+ * arg1 == Index, which measurements slot to read
+ * ret0 == Status / error
+ * ret1 == Measurement value, bytes: 0 - 7
+ * ret2 == Measurement value, bytes: 8 - 15
+ * ret3 == Measurement value, bytes: 16 - 23
+ * ret4 == Measurement value, bytes: 24 - 31
+ * ret5 == Measurement value, bytes: 32 - 39
+ * ret6 == Measurement value, bytes: 40 - 47
+ * ret7 == Measurement value, bytes: 48 - 55
+ * ret8 == Measurement value, bytes: 56 - 63
+ */
+#define SMC_RSI_MEASUREMENT_READ SMC_RSI_FID(0x192)
+
+/*
+ * Extend Realm Extensible Measurement (REM) value.
+ *
+ * arg1 == Index, which measurements slot to extend
+ * arg2 == Size of realm measurement in bytes, max 64 bytes
+ * arg3 == Measurement value, bytes: 0 - 7
+ * arg4 == Measurement value, bytes: 8 - 15
+ * arg5 == Measurement value, bytes: 16 - 23
+ * arg6 == Measurement value, bytes: 24 - 31
+ * arg7 == Measurement value, bytes: 32 - 39
+ * arg8 == Measurement value, bytes: 40 - 47
+ * arg9 == Measurement value, bytes: 48 - 55
+ * arg10 == Measurement value, bytes: 56 - 63
+ * ret0 == Status / error
+ */
+#define SMC_RSI_MEASUREMENT_EXTEND SMC_RSI_FID(0x193)
+
+/*
+ * Initialize the operation to retrieve an attestation token.
+ *
+ * arg1 == Challenge value, bytes: 0 - 7
+ * arg2 == Challenge value, bytes: 8 - 15
+ * arg3 == Challenge value, bytes: 16 - 23
+ * arg4 == Challenge value, bytes: 24 - 31
+ * arg5 == Challenge value, bytes: 32 - 39
+ * arg6 == Challenge value, bytes: 40 - 47
+ * arg7 == Challenge value, bytes: 48 - 55
+ * arg8 == Challenge value, bytes: 56 - 63
+ * ret0 == Status / error
+ * ret1 == Upper bound of token size in bytes
+ */
+#define SMC_RSI_ATTESTATION_TOKEN_INIT SMC_RSI_FID(0x194)
+
+/*
+ * Continue the operation to retrieve an attestation token.
+ *
+ * arg1 == The IPA of token buffer
+ * arg2 == Offset within the granule of the token buffer
+ * arg3 == Size of the granule buffer
+ * ret0 == Status / error
+ * ret1 == Length of token bytes copied to the granule buffer
+ */
+#define SMC_RSI_ATTESTATION_TOKEN_CONTINUE SMC_RSI_FID(0x195)
+
+#ifndef __ASSEMBLER__
+
+struct realm_config {
+ union {
+ struct {
+ unsigned long ipa_bits; /* Width of IPA in bits */
+ unsigned long hash_algo; /* Hash algorithm */
+ };
+ u8 pad[0x200];
+ };
+ union {
+ u8 rpv[64]; /* Realm Personalization Value */
+ u8 pad2[0xe00];
+ };
+ /*
+ * The RMM requires the configuration structure to be aligned to a 4k
+ * boundary, ensure this happens by aligning this structure.
+ */
+} __aligned(0x1000);
+
+#endif /* __ASSEMBLER__ */
+
+/*
+ * Read configuration for the current Realm.
+ *
+ * arg1 == struct realm_config addr
+ * ret0 == Status / error
+ */
+#define SMC_RSI_REALM_CONFIG SMC_RSI_FID(0x196)
+
+/*
+ * Request RIPAS of a target IPA range to be changed to a specified value.
+ *
+ * arg1 == Base IPA address of target region
+ * arg2 == Top of the region
+ * arg3 == RIPAS value
+ * arg4 == flags
+ * ret0 == Status / error
+ * ret1 == Top of modified IPA range
+ * ret2 == Whether the Host accepted or rejected the request
+ */
+#define SMC_RSI_IPA_STATE_SET SMC_RSI_FID(0x197)
+
+#define RSI_NO_CHANGE_DESTROYED UL(0)
+#define RSI_CHANGE_DESTROYED UL(1)
+
+#define RSI_ACCEPT UL(0)
+#define RSI_REJECT UL(1)
+
+/*
+ * Get RIPAS of a target IPA range.
+ *
+ * arg1 == Base IPA of target region
+ * arg2 == End of target IPA region
+ * ret0 == Status / error
+ * ret1 == Top of IPA region which has the reported RIPAS value
+ * ret2 == RIPAS value
+ */
+#define SMC_RSI_IPA_STATE_GET SMC_RSI_FID(0x198)
+
+/*
+ * Make a Host call.
+ *
+ * arg1 == IPA of host call structure
+ * ret0 == Status / error
+ */
+#define SMC_RSI_HOST_CALL SMC_RSI_FID(0x199)
+
+#endif /* __ASM_RSI_SMC_H_ */
diff --git a/arch/arm64/include/asm/runtime-const.h b/arch/arm64/include/asm/runtime-const.h
new file mode 100644
index 000000000000..be5915669d23
--- /dev/null
+++ b/arch/arm64/include/asm/runtime-const.h
@@ -0,0 +1,88 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_RUNTIME_CONST_H
+#define _ASM_RUNTIME_CONST_H
+
+#include <asm/cacheflush.h>
+
+/* Sigh. You can still run arm64 in BE mode */
+#include <asm/byteorder.h>
+
+#define runtime_const_ptr(sym) ({ \
+ typeof(sym) __ret; \
+ asm_inline("1:\t" \
+ "movz %0, #0xcdef\n\t" \
+ "movk %0, #0x89ab, lsl #16\n\t" \
+ "movk %0, #0x4567, lsl #32\n\t" \
+ "movk %0, #0x0123, lsl #48\n\t" \
+ ".pushsection runtime_ptr_" #sym ",\"a\"\n\t" \
+ ".long 1b - .\n\t" \
+ ".popsection" \
+ :"=r" (__ret)); \
+ __ret; })
+
+#define runtime_const_shift_right_32(val, sym) ({ \
+ unsigned long __ret; \
+ asm_inline("1:\t" \
+ "lsr %w0,%w1,#12\n\t" \
+ ".pushsection runtime_shift_" #sym ",\"a\"\n\t" \
+ ".long 1b - .\n\t" \
+ ".popsection" \
+ :"=r" (__ret) \
+ :"r" (0u+(val))); \
+ __ret; })
+
+#define runtime_const_init(type, sym) do { \
+ extern s32 __start_runtime_##type##_##sym[]; \
+ extern s32 __stop_runtime_##type##_##sym[]; \
+ runtime_const_fixup(__runtime_fixup_##type, \
+ (unsigned long)(sym), \
+ __start_runtime_##type##_##sym, \
+ __stop_runtime_##type##_##sym); \
+} while (0)
+
+/* 16-bit immediate for wide move (movz and movk) in bits 5..20 */
+static inline void __runtime_fixup_16(__le32 *p, unsigned int val)
+{
+ u32 insn = le32_to_cpu(*p);
+ insn &= 0xffe0001f;
+ insn |= (val & 0xffff) << 5;
+ *p = cpu_to_le32(insn);
+}
+
+static inline void __runtime_fixup_caches(void *where, unsigned int insns)
+{
+ unsigned long va = (unsigned long)where;
+ caches_clean_inval_pou(va, va + 4*insns);
+}
+
+static inline void __runtime_fixup_ptr(void *where, unsigned long val)
+{
+ __le32 *p = lm_alias(where);
+ __runtime_fixup_16(p, val);
+ __runtime_fixup_16(p+1, val >> 16);
+ __runtime_fixup_16(p+2, val >> 32);
+ __runtime_fixup_16(p+3, val >> 48);
+ __runtime_fixup_caches(where, 4);
+}
+
+/* Immediate value is 6 bits starting at bit #16 */
+static inline void __runtime_fixup_shift(void *where, unsigned long val)
+{
+ __le32 *p = lm_alias(where);
+ u32 insn = le32_to_cpu(*p);
+ insn &= 0xffc0ffff;
+ insn |= (val & 63) << 16;
+ *p = cpu_to_le32(insn);
+ __runtime_fixup_caches(where, 1);
+}
+
+static inline void runtime_const_fixup(void (*fn)(void *, unsigned long),
+ unsigned long val, s32 *start, s32 *end)
+{
+ while (start < end) {
+ fn(*start + (void *)start, val);
+ start++;
+ }
+}
+
+#endif
diff --git a/arch/arm64/include/asm/rwonce.h b/arch/arm64/include/asm/rwonce.h
new file mode 100644
index 000000000000..78beceec10cd
--- /dev/null
+++ b/arch/arm64/include/asm/rwonce.h
@@ -0,0 +1,69 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2020 Google LLC.
+ */
+#ifndef __ASM_RWONCE_H
+#define __ASM_RWONCE_H
+
+#if defined(CONFIG_LTO) && !defined(__ASSEMBLER__)
+
+#include <linux/compiler_types.h>
+#include <asm/alternative-macros.h>
+
+#ifndef BUILD_VDSO
+
+#define __LOAD_RCPC(sfx, regs...) \
+ ALTERNATIVE( \
+ "ldar" #sfx "\t" #regs, \
+ ".arch_extension rcpc\n" \
+ "ldapr" #sfx "\t" #regs, \
+ ARM64_HAS_LDAPR)
+
+/*
+ * When building with LTO, there is an increased risk of the compiler
+ * converting an address dependency headed by a READ_ONCE() invocation
+ * into a control dependency and consequently allowing for harmful
+ * reordering by the CPU.
+ *
+ * Ensure that such transformations are harmless by overriding the generic
+ * READ_ONCE() definition with one that provides RCpc acquire semantics
+ * when building with LTO.
+ */
+#define __READ_ONCE(x) \
+({ \
+ typeof(&(x)) __x = &(x); \
+ int atomic = 1; \
+ union { __unqual_scalar_typeof(*__x) __val; char __c[1]; } __u; \
+ switch (sizeof(x)) { \
+ case 1: \
+ asm volatile(__LOAD_RCPC(b, %w0, %1) \
+ : "=r" (*(__u8 *)__u.__c) \
+ : "Q" (*__x) : "memory"); \
+ break; \
+ case 2: \
+ asm volatile(__LOAD_RCPC(h, %w0, %1) \
+ : "=r" (*(__u16 *)__u.__c) \
+ : "Q" (*__x) : "memory"); \
+ break; \
+ case 4: \
+ asm volatile(__LOAD_RCPC(, %w0, %1) \
+ : "=r" (*(__u32 *)__u.__c) \
+ : "Q" (*__x) : "memory"); \
+ break; \
+ case 8: \
+ asm volatile(__LOAD_RCPC(, %0, %1) \
+ : "=r" (*(__u64 *)__u.__c) \
+ : "Q" (*__x) : "memory"); \
+ break; \
+ default: \
+ atomic = 0; \
+ } \
+ atomic ? (typeof(*__x))__u.__val : (*(volatile typeof(__x))__x);\
+})
+
+#endif /* !BUILD_VDSO */
+#endif /* CONFIG_LTO && !__ASSEMBLER__ */
+
+#include <asm-generic/rwonce.h>
+
+#endif /* __ASM_RWONCE_H */
diff --git a/arch/arm64/include/asm/scs.h b/arch/arm64/include/asm/scs.h
new file mode 100644
index 000000000000..0fbc2e7867d3
--- /dev/null
+++ b/arch/arm64/include/asm/scs.h
@@ -0,0 +1,60 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SCS_H
+#define _ASM_SCS_H
+
+#ifdef __ASSEMBLER__
+
+#include <asm/asm-offsets.h>
+#include <asm/sysreg.h>
+
+#ifdef CONFIG_SHADOW_CALL_STACK
+ scs_sp .req x18
+
+ .macro scs_load_current
+ get_current_task scs_sp
+ ldr scs_sp, [scs_sp, #TSK_TI_SCS_SP]
+ .endm
+
+ .macro scs_save tsk
+ str scs_sp, [\tsk, #TSK_TI_SCS_SP]
+ .endm
+#else
+ .macro scs_load_current
+ .endm
+
+ .macro scs_save tsk
+ .endm
+#endif /* CONFIG_SHADOW_CALL_STACK */
+
+
+#else
+
+#include <linux/scs.h>
+#include <asm/cpufeature.h>
+
+#ifdef CONFIG_UNWIND_PATCH_PAC_INTO_SCS
+static inline void dynamic_scs_init(void)
+{
+ extern bool __pi_dynamic_scs_is_enabled;
+
+ if (__pi_dynamic_scs_is_enabled) {
+ pr_info("Enabling dynamic shadow call stack\n");
+ static_branch_enable(&dynamic_scs_enabled);
+ }
+}
+#else
+static inline void dynamic_scs_init(void) {}
+#endif
+
+enum {
+ EDYNSCS_INVALID_CIE_HEADER = 1,
+ EDYNSCS_INVALID_CIE_SDATA_SIZE = 2,
+ EDYNSCS_INVALID_FDE_AUGM_DATA_SIZE = 3,
+ EDYNSCS_INVALID_CFA_OPCODE = 4,
+};
+
+int __pi_scs_patch(const u8 eh_frame[], int size, bool skip_dry_run);
+
+#endif /* __ASSEMBLER__ */
+
+#endif /* _ASM_SCS_H */
diff --git a/arch/arm64/include/asm/sdei.h b/arch/arm64/include/asm/sdei.h
new file mode 100644
index 000000000000..b2248bd3cb58
--- /dev/null
+++ b/arch/arm64/include/asm/sdei.h
@@ -0,0 +1,53 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+// Copyright (C) 2017 Arm Ltd.
+#ifndef __ASM_SDEI_H
+#define __ASM_SDEI_H
+
+/* Values for sdei_exit_mode */
+#define SDEI_EXIT_HVC 0
+#define SDEI_EXIT_SMC 1
+
+#define SDEI_STACK_SIZE IRQ_STACK_SIZE
+
+#ifndef __ASSEMBLER__
+
+#include <linux/linkage.h>
+#include <linux/preempt.h>
+#include <linux/types.h>
+
+#include <asm/virt.h>
+
+DECLARE_PER_CPU(struct sdei_registered_event *, sdei_active_normal_event);
+DECLARE_PER_CPU(struct sdei_registered_event *, sdei_active_critical_event);
+
+extern unsigned long sdei_exit_mode;
+
+/* Software Delegated Exception entry point from firmware*/
+asmlinkage void __sdei_asm_handler(unsigned long event_num, unsigned long arg,
+ unsigned long pc, unsigned long pstate);
+
+/* and its CONFIG_UNMAP_KERNEL_AT_EL0 trampoline */
+asmlinkage void __sdei_asm_entry_trampoline(unsigned long event_num,
+ unsigned long arg,
+ unsigned long pc,
+ unsigned long pstate);
+
+/* Abort a running handler. Context is discarded. */
+void __sdei_handler_abort(void);
+
+/*
+ * The above entry point does the minimum to call C code. This function does
+ * anything else, before calling the driver.
+ */
+struct sdei_registered_event;
+asmlinkage unsigned long __sdei_handler(struct pt_regs *regs,
+ struct sdei_registered_event *arg);
+
+unsigned long do_sdei_event(struct pt_regs *regs,
+ struct sdei_registered_event *arg);
+
+unsigned long sdei_arch_get_entry_point(int conduit);
+#define sdei_arch_get_entry_point(x) sdei_arch_get_entry_point(x)
+
+#endif /* __ASSEMBLER__ */
+#endif /* __ASM_SDEI_H */
diff --git a/arch/arm64/include/asm/seccomp.h b/arch/arm64/include/asm/seccomp.h
new file mode 100644
index 000000000000..bf6bf40bc5ab
--- /dev/null
+++ b/arch/arm64/include/asm/seccomp.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * arch/arm64/include/asm/seccomp.h
+ *
+ * Copyright (C) 2014 Linaro Limited
+ * Author: AKASHI Takahiro <takahiro.akashi@linaro.org>
+ */
+#ifndef _ASM_SECCOMP_H
+#define _ASM_SECCOMP_H
+
+#include <asm/unistd_compat_32.h>
+
+#ifdef CONFIG_COMPAT
+#define __NR_seccomp_read_32 __NR_compat32_read
+#define __NR_seccomp_write_32 __NR_compat32_write
+#define __NR_seccomp_exit_32 __NR_compat32_exit
+#define __NR_seccomp_sigreturn_32 __NR_compat32_rt_sigreturn
+#endif /* CONFIG_COMPAT */
+
+#include <asm-generic/seccomp.h>
+
+#define SECCOMP_ARCH_NATIVE AUDIT_ARCH_AARCH64
+#define SECCOMP_ARCH_NATIVE_NR NR_syscalls
+#define SECCOMP_ARCH_NATIVE_NAME "aarch64"
+#ifdef CONFIG_COMPAT
+# define SECCOMP_ARCH_COMPAT AUDIT_ARCH_ARM
+# define SECCOMP_ARCH_COMPAT_NR __NR_compat32_syscalls
+# define SECCOMP_ARCH_COMPAT_NAME "arm"
+#endif
+
+#endif /* _ASM_SECCOMP_H */
diff --git a/arch/arm64/include/asm/sections.h b/arch/arm64/include/asm/sections.h
new file mode 100644
index 000000000000..51b0d594239e
--- /dev/null
+++ b/arch/arm64/include/asm/sections.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2016 ARM Limited
+ */
+#ifndef __ASM_SECTIONS_H
+#define __ASM_SECTIONS_H
+
+#include <asm-generic/sections.h>
+
+extern char __alt_instructions[], __alt_instructions_end[];
+extern char __hibernate_exit_text_start[], __hibernate_exit_text_end[];
+extern char __hyp_idmap_text_start[], __hyp_idmap_text_end[];
+extern char __hyp_text_start[], __hyp_text_end[];
+extern char __hyp_data_start[], __hyp_data_end[];
+extern char __hyp_rodata_start[], __hyp_rodata_end[];
+extern char __hyp_reloc_begin[], __hyp_reloc_end[];
+extern char __hyp_bss_start[], __hyp_bss_end[];
+extern char __idmap_text_start[], __idmap_text_end[];
+extern char __initdata_begin[], __initdata_end[];
+extern char __inittext_begin[], __inittext_end[];
+extern char __exittext_begin[], __exittext_end[];
+extern char __irqentry_text_start[], __irqentry_text_end[];
+extern char __mmuoff_data_start[], __mmuoff_data_end[];
+extern char __entry_tramp_text_start[], __entry_tramp_text_end[];
+extern char __relocate_new_kernel_start[], __relocate_new_kernel_end[];
+
+static inline size_t entry_tramp_text_size(void)
+{
+ return __entry_tramp_text_end - __entry_tramp_text_start;
+}
+
+#endif /* __ASM_SECTIONS_H */
diff --git a/arch/arm64/include/asm/semihost.h b/arch/arm64/include/asm/semihost.h
new file mode 100644
index 000000000000..87e353dab868
--- /dev/null
+++ b/arch/arm64/include/asm/semihost.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2012 ARM Ltd.
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * Adapted for ARM and earlycon:
+ * Copyright (C) 2014 Linaro Ltd.
+ * Author: Rob Herring <robh@kernel.org>
+ */
+
+#ifndef _ARM64_SEMIHOST_H_
+#define _ARM64_SEMIHOST_H_
+
+struct uart_port;
+
+static inline void smh_putc(struct uart_port *port, unsigned char c)
+{
+ asm volatile("mov x1, %0\n"
+ "mov x0, #3\n"
+ "hlt 0xf000\n"
+ : : "r" (&c) : "x0", "x1", "memory");
+}
+
+#endif /* _ARM64_SEMIHOST_H_ */
diff --git a/arch/arm64/include/asm/set_memory.h b/arch/arm64/include/asm/set_memory.h
new file mode 100644
index 000000000000..90f61b17275e
--- /dev/null
+++ b/arch/arm64/include/asm/set_memory.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef _ASM_ARM64_SET_MEMORY_H
+#define _ASM_ARM64_SET_MEMORY_H
+
+#include <asm/mem_encrypt.h>
+#include <asm-generic/set_memory.h>
+
+bool can_set_direct_map(void);
+#define can_set_direct_map can_set_direct_map
+
+int set_memory_valid(unsigned long addr, int numpages, int enable);
+
+int set_direct_map_invalid_noflush(struct page *page);
+int set_direct_map_default_noflush(struct page *page);
+int set_direct_map_valid_noflush(struct page *page, unsigned nr, bool valid);
+bool kernel_page_present(struct page *page);
+
+int set_memory_encrypted(unsigned long addr, int numpages);
+int set_memory_decrypted(unsigned long addr, int numpages);
+
+#endif /* _ASM_ARM64_SET_MEMORY_H */
diff --git a/arch/arm64/include/asm/setup.h b/arch/arm64/include/asm/setup.h
new file mode 100644
index 000000000000..3d96dde4d214
--- /dev/null
+++ b/arch/arm64/include/asm/setup.h
@@ -0,0 +1,44 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#ifndef __ARM64_ASM_SETUP_H
+#define __ARM64_ASM_SETUP_H
+
+#include <linux/string.h>
+
+#include <uapi/asm/setup.h>
+
+/*
+ * These two variables are used in the head.S file.
+ */
+extern phys_addr_t __fdt_pointer __initdata;
+extern u64 __cacheline_aligned boot_args[4];
+
+static inline bool arch_parse_debug_rodata(char *arg)
+{
+ extern bool rodata_enabled;
+ extern bool rodata_full;
+
+ if (!arg)
+ return false;
+
+ if (!strcmp(arg, "on")) {
+ rodata_enabled = rodata_full = true;
+ return true;
+ }
+
+ if (!strcmp(arg, "off")) {
+ rodata_enabled = rodata_full = false;
+ return true;
+ }
+
+ if (!strcmp(arg, "noalias")) {
+ rodata_enabled = true;
+ rodata_full = false;
+ return true;
+ }
+
+ return false;
+}
+#define arch_parse_debug_rodata arch_parse_debug_rodata
+
+#endif
diff --git a/arch/arm64/include/asm/shmparam.h b/arch/arm64/include/asm/shmparam.h
index 4df608a8459e..f920e22ec677 100644
--- a/arch/arm64/include/asm/shmparam.h
+++ b/arch/arm64/include/asm/shmparam.h
@@ -1,17 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (C) 2012 ARM Ltd.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __ASM_SHMPARAM_H
#define __ASM_SHMPARAM_H
@@ -21,7 +10,7 @@
* alignment value. Since we don't have aliasing D-caches, the rest of
* the time we can safely use PAGE_SIZE.
*/
-#define COMPAT_SHMLBA 0x4000
+#define COMPAT_SHMLBA (4 * PAGE_SIZE)
#include <asm-generic/shmparam.h>
diff --git a/arch/arm64/include/asm/sigcontext.h b/arch/arm64/include/asm/sigcontext.h
deleted file mode 100644
index dca1094acc74..000000000000
--- a/arch/arm64/include/asm/sigcontext.h
+++ /dev/null
@@ -1,31 +0,0 @@
-/*
- * Copyright (C) 2012 ARM Ltd.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-#ifndef __ASM_SIGCONTEXT_H
-#define __ASM_SIGCONTEXT_H
-
-#include <uapi/asm/sigcontext.h>
-
-/*
- * Auxiliary context saved in the sigcontext.__reserved array. Not exported to
- * user space as it will change with the addition of new context. User space
- * should check the magic/size information.
- */
-struct aux_context {
- struct fpsimd_context fpsimd;
- /* additional context to be added before "end" */
- struct _aarch64_ctx end;
-};
-#endif
diff --git a/arch/arm64/include/asm/signal.h b/arch/arm64/include/asm/signal.h
new file mode 100644
index 000000000000..ef449f5f4ba8
--- /dev/null
+++ b/arch/arm64/include/asm/signal.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ARM64_ASM_SIGNAL_H
+#define __ARM64_ASM_SIGNAL_H
+
+#include <asm/memory.h>
+#include <uapi/asm/signal.h>
+#include <uapi/asm/siginfo.h>
+
+static inline void __user *arch_untagged_si_addr(void __user *addr,
+ unsigned long sig,
+ unsigned long si_code)
+{
+ /*
+ * For historical reasons, all bits of the fault address are exposed as
+ * address bits for watchpoint exceptions. New architectures should
+ * handle the tag bits consistently.
+ */
+ if (sig == SIGTRAP && si_code == TRAP_BRKPT)
+ return addr;
+
+ return untagged_addr(addr);
+}
+#define arch_untagged_si_addr arch_untagged_si_addr
+
+#endif
diff --git a/arch/arm64/include/asm/signal32.h b/arch/arm64/include/asm/signal32.h
index 7c275e3b640f..7e9f163d02ec 100644
--- a/arch/arm64/include/asm/signal32.h
+++ b/arch/arm64/include/asm/signal32.h
@@ -1,45 +1,74 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (C) 2012 ARM Ltd.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __ASM_SIGNAL32_H
#define __ASM_SIGNAL32_H
-#ifdef __KERNEL__
#ifdef CONFIG_COMPAT
#include <linux/compat.h>
-#define AARCH32_KERN_SIGRET_CODE_OFFSET 0x500
+struct compat_sigcontext {
+ /* We always set these two fields to 0 */
+ compat_ulong_t trap_no;
+ compat_ulong_t error_code;
-extern const compat_ulong_t aarch32_sigret_code[6];
+ compat_ulong_t oldmask;
+ compat_ulong_t arm_r0;
+ compat_ulong_t arm_r1;
+ compat_ulong_t arm_r2;
+ compat_ulong_t arm_r3;
+ compat_ulong_t arm_r4;
+ compat_ulong_t arm_r5;
+ compat_ulong_t arm_r6;
+ compat_ulong_t arm_r7;
+ compat_ulong_t arm_r8;
+ compat_ulong_t arm_r9;
+ compat_ulong_t arm_r10;
+ compat_ulong_t arm_fp;
+ compat_ulong_t arm_ip;
+ compat_ulong_t arm_sp;
+ compat_ulong_t arm_lr;
+ compat_ulong_t arm_pc;
+ compat_ulong_t arm_cpsr;
+ compat_ulong_t fault_address;
+};
-int compat_setup_frame(int usig, struct k_sigaction *ka, sigset_t *set,
+struct compat_ucontext {
+ compat_ulong_t uc_flags;
+ compat_uptr_t uc_link;
+ compat_stack_t uc_stack;
+ struct compat_sigcontext uc_mcontext;
+ compat_sigset_t uc_sigmask;
+ int __unused[32 - (sizeof(compat_sigset_t) / sizeof(int))];
+ compat_ulong_t uc_regspace[128] __attribute__((__aligned__(8)));
+};
+
+struct compat_sigframe {
+ struct compat_ucontext uc;
+ compat_ulong_t retcode[2];
+};
+
+struct compat_rt_sigframe {
+ struct compat_siginfo info;
+ struct compat_sigframe sig;
+};
+
+int compat_setup_frame(int usig, struct ksignal *ksig, sigset_t *set,
struct pt_regs *regs);
-int compat_setup_rt_frame(int usig, struct k_sigaction *ka, siginfo_t *info,
- sigset_t *set, struct pt_regs *regs);
+int compat_setup_rt_frame(int usig, struct ksignal *ksig, sigset_t *set,
+ struct pt_regs *regs);
void compat_setup_restart_syscall(struct pt_regs *regs);
#else
-static inline int compat_setup_frame(int usid, struct k_sigaction *ka,
+static inline int compat_setup_frame(int usid, struct ksignal *ksig,
sigset_t *set, struct pt_regs *regs)
{
return -ENOSYS;
}
-static inline int compat_setup_rt_frame(int usig, struct k_sigaction *ka,
- siginfo_t *info, sigset_t *set,
+static inline int compat_setup_rt_frame(int usig, struct ksignal *ksig, sigset_t *set,
struct pt_regs *regs)
{
return -ENOSYS;
@@ -49,5 +78,4 @@ static inline void compat_setup_restart_syscall(struct pt_regs *regs)
{
}
#endif /* CONFIG_COMPAT */
-#endif /* __KERNEL__ */
#endif /* __ASM_SIGNAL32_H */
diff --git a/arch/arm64/include/asm/simd.h b/arch/arm64/include/asm/simd.h
new file mode 100644
index 000000000000..0941f6f58a14
--- /dev/null
+++ b/arch/arm64/include/asm/simd.h
@@ -0,0 +1,53 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2017 Linaro Ltd. <ard.biesheuvel@linaro.org>
+ */
+
+#ifndef __ASM_SIMD_H
+#define __ASM_SIMD_H
+
+#include <linux/cleanup.h>
+#include <linux/compiler.h>
+#include <linux/irqflags.h>
+#include <linux/percpu.h>
+#include <linux/preempt.h>
+#include <linux/types.h>
+
+#include <asm/neon.h>
+
+#ifdef CONFIG_KERNEL_MODE_NEON
+
+/*
+ * may_use_simd - whether it is allowable at this time to issue SIMD
+ * instructions or access the SIMD register file
+ *
+ * Callers must not assume that the result remains true beyond the next
+ * preempt_enable() or return from softirq context.
+ */
+static __must_check inline bool may_use_simd(void)
+{
+ /*
+ * We must make sure that the SVE has been initialized properly
+ * before using the SIMD in kernel.
+ */
+ return !WARN_ON(!system_capabilities_finalized()) &&
+ system_supports_fpsimd() &&
+ !in_hardirq() && !in_nmi();
+}
+
+#else /* ! CONFIG_KERNEL_MODE_NEON */
+
+static __must_check inline bool may_use_simd(void) {
+ return false;
+}
+
+#endif /* ! CONFIG_KERNEL_MODE_NEON */
+
+DEFINE_LOCK_GUARD_1(ksimd,
+ struct user_fpsimd_state,
+ kernel_neon_begin(_T->lock),
+ kernel_neon_end(_T->lock))
+
+#define scoped_ksimd() scoped_guard(ksimd, &(struct user_fpsimd_state){})
+
+#endif
diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h
index 4b8023c5d146..10ea4f543069 100644
--- a/arch/arm64/include/asm/smp.h
+++ b/arch/arm64/include/asm/smp.h
@@ -1,52 +1,81 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (C) 2012 ARM Ltd.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __ASM_SMP_H
#define __ASM_SMP_H
+#include <linux/const.h>
+
+/* Values for secondary_data.status */
+#define CPU_STUCK_REASON_SHIFT (8)
+#define CPU_BOOT_STATUS_MASK ((UL(1) << CPU_STUCK_REASON_SHIFT) - 1)
+
+#define CPU_MMU_OFF (-1)
+#define CPU_BOOT_SUCCESS (0)
+/* The cpu invoked ops->cpu_die, synchronise it with cpu_kill */
+#define CPU_KILL_ME (1)
+/* The cpu couldn't die gracefully and is looping in the kernel */
+#define CPU_STUCK_IN_KERNEL (2)
+/* Fatal system error detected by secondary CPU, crash the system */
+#define CPU_PANIC_KERNEL (3)
+
+#define CPU_STUCK_REASON_52_BIT_VA (UL(1) << CPU_STUCK_REASON_SHIFT)
+#define CPU_STUCK_REASON_NO_GRAN (UL(2) << CPU_STUCK_REASON_SHIFT)
+
+#ifndef __ASSEMBLER__
+
#include <linux/threads.h>
#include <linux/cpumask.h>
#include <linux/thread_info.h>
-#ifndef CONFIG_SMP
-# error "<asm/smp.h> included in non-SMP build"
-#endif
-
#define raw_smp_processor_id() (current_thread_info()->cpu)
-struct seq_file;
-
/*
- * generate IPI list text
+ * Logical CPU mapping.
*/
-extern void show_ipi_list(struct seq_file *p, int prec);
+extern u64 __cpu_logical_map[NR_CPUS];
+extern u64 cpu_logical_map(unsigned int cpu);
-/*
- * Called from C code, this handles an IPI.
- */
-extern void handle_IPI(int ipinr, struct pt_regs *regs);
+static inline void set_cpu_logical_map(unsigned int cpu, u64 hwid)
+{
+ __cpu_logical_map[cpu] = hwid;
+}
+
+struct seq_file;
/*
- * Setup the set of possible CPUs (via set_cpu_possible)
+ * Discover the set of possible CPUs and determine their
+ * SMP operations.
*/
extern void smp_init_cpus(void);
+enum ipi_msg_type {
+ IPI_RESCHEDULE,
+ IPI_CALL_FUNC,
+ IPI_CPU_STOP,
+ IPI_CPU_STOP_NMI,
+ IPI_TIMER,
+ IPI_IRQ_WORK,
+ NR_IPI,
+ /*
+ * Any enum >= NR_IPI and < MAX_IPI is special and not tracable
+ * with trace_ipi_*
+ */
+ IPI_CPU_BACKTRACE = NR_IPI,
+ IPI_KGDB_ROUNDUP,
+ MAX_IPI
+};
+
/*
- * Provide a function to raise an IPI cross call on CPUs in callmap.
+ * Register IPI interrupts with the arch SMP code
*/
-extern void set_smp_cross_call(void (*)(const struct cpumask *, unsigned int));
+extern void set_smp_ipi_range_percpu(int ipi_base, int nr_ipi, int ncpus);
+
+static inline void set_smp_ipi_range(int ipi_base, int n)
+{
+ set_smp_ipi_range_percpu(ipi_base, n, 0);
+}
/*
* Called from the secondary holding pen, this is the secondary CPU entry point.
@@ -55,26 +84,77 @@ asmlinkage void secondary_start_kernel(void);
/*
* Initial data for bringing up a secondary CPU.
+ * @status - Result passed back from the secondary CPU to
+ * indicate failure.
*/
struct secondary_data {
- void *stack;
+ struct task_struct *task;
+ long status;
};
+
extern struct secondary_data secondary_data;
-extern void secondary_holding_pen(void);
-extern volatile unsigned long secondary_holding_pen_release;
+extern long __early_cpu_boot_status;
+extern void secondary_entry(void);
extern void arch_send_call_function_single_ipi(int cpu);
extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
-struct device_node;
+#ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL
+extern void arch_send_wakeup_ipi(unsigned int cpu);
+#else
+static inline void arch_send_wakeup_ipi(unsigned int cpu)
+{
+ BUILD_BUG();
+}
+#endif
+
+extern int __cpu_disable(void);
-struct smp_enable_ops {
- const char *name;
- int (*init_cpu)(struct device_node *, int);
- int (*prepare_cpu)(int);
-};
+static inline void __cpu_die(unsigned int cpu) { }
+extern void __noreturn cpu_die(void);
+extern void __noreturn cpu_die_early(void);
+
+static inline void __noreturn cpu_park_loop(void)
+{
+ for (;;) {
+ wfe();
+ wfi();
+ }
+}
+
+static inline void update_cpu_boot_status(int val)
+{
+ WRITE_ONCE(secondary_data.status, val);
+ /* Ensure the visibility of the status update */
+ dsb(ishst);
+}
+
+/*
+ * The calling secondary CPU has detected serious configuration mismatch,
+ * which calls for a kernel panic. Update the boot status and park the calling
+ * CPU.
+ */
+static inline void __noreturn cpu_panic_kernel(void)
+{
+ update_cpu_boot_status(CPU_PANIC_KERNEL);
+ cpu_park_loop();
+}
+
+/*
+ * If a secondary CPU enters the kernel but fails to come online,
+ * (e.g. due to mismatched features), and cannot exit the kernel,
+ * we increment cpus_stuck_in_kernel and leave the CPU in a
+ * quiesecent loop within the kernel text. The memory containing
+ * this loop must not be re-used for anything else as the 'stuck'
+ * core is executing it.
+ *
+ * This function is used to inhibit features like kexec and hibernate.
+ */
+bool cpus_are_stuck_in_kernel(void);
+
+extern void crash_smp_send_stop(void);
+extern bool smp_crash_stop_failed(void);
-extern const struct smp_enable_ops smp_spin_table_ops;
-extern const struct smp_enable_ops smp_psci_ops;
+#endif /* ifndef __ASSEMBLER__ */
#endif /* ifndef __ASM_SMP_H */
diff --git a/arch/arm64/include/asm/smp_plat.h b/arch/arm64/include/asm/smp_plat.h
index ed43a0d2b1b2..97ddc6c203b7 100644
--- a/arch/arm64/include/asm/smp_plat.h
+++ b/arch/arm64/include/asm/smp_plat.h
@@ -1,30 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Definitions specific to SMP platforms.
*
* Copyright (C) 2013 ARM Ltd.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __ASM_SMP_PLAT_H
#define __ASM_SMP_PLAT_H
+#include <linux/cpumask.h>
+
+#include <asm/smp.h>
#include <asm/types.h>
+struct mpidr_hash {
+ u64 mask;
+ u32 shift_aff[4];
+ u32 bits;
+};
+
+extern struct mpidr_hash mpidr_hash;
+
+static inline u32 mpidr_hash_size(void)
+{
+ return 1 << mpidr_hash.bits;
+}
+
/*
- * Logical CPU mapping.
+ * Retrieve logical cpu index corresponding to a given MPIDR.Aff*
+ * - mpidr: MPIDR.Aff* bits to be used for the look-up
+ *
+ * Returns the cpu logical index or -EINVAL on look-up error
*/
-extern u64 __cpu_logical_map[NR_CPUS];
-#define cpu_logical_map(cpu) __cpu_logical_map[cpu]
+static inline int get_logical_index(u64 mpidr)
+{
+ int cpu;
+ for (cpu = 0; cpu < nr_cpu_ids; cpu++)
+ if (cpu_logical_map(cpu) == mpidr)
+ return cpu;
+ return -EINVAL;
+}
#endif /* __ASM_SMP_PLAT_H */
diff --git a/arch/arm64/include/asm/sparsemem.h b/arch/arm64/include/asm/sparsemem.h
index 1be62bcb9d47..84783efdc9d1 100644
--- a/arch/arm64/include/asm/sparsemem.h
+++ b/arch/arm64/include/asm/sparsemem.h
@@ -1,24 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (C) 2012 ARM Ltd.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __ASM_SPARSEMEM_H
#define __ASM_SPARSEMEM_H
-#ifdef CONFIG_SPARSEMEM
-#define MAX_PHYSMEM_BITS 40
-#define SECTION_SIZE_BITS 30
-#endif
+#include <asm/pgtable-prot.h>
+
+#define MAX_PHYSMEM_BITS PHYS_MASK_SHIFT
+#define MAX_POSSIBLE_PHYSMEM_BITS (52)
+
+/*
+ * Section size must be at least 512MB for 64K base
+ * page size config. Otherwise it will be less than
+ * MAX_PAGE_ORDER and the build process will fail.
+ */
+#ifdef CONFIG_ARM64_64K_PAGES
+#define SECTION_SIZE_BITS 29
+
+#else
+
+/*
+ * Section size must be at least 128MB for 4K base
+ * page size config. Otherwise PMD based huge page
+ * entries could not be created for vmemmap mappings.
+ * 16K follows 4K for simplicity.
+ */
+#define SECTION_SIZE_BITS 27
+#endif /* CONFIG_ARM64_64K_PAGES */
#endif
diff --git a/arch/arm64/include/asm/spectre.h b/arch/arm64/include/asm/spectre.h
new file mode 100644
index 000000000000..296ae3420bfd
--- /dev/null
+++ b/arch/arm64/include/asm/spectre.h
@@ -0,0 +1,123 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Interface for managing mitigations for Spectre vulnerabilities.
+ *
+ * Copyright (C) 2020 Google LLC
+ * Author: Will Deacon <will@kernel.org>
+ */
+
+#ifndef __ASM_SPECTRE_H
+#define __ASM_SPECTRE_H
+
+#define BP_HARDEN_EL2_SLOTS 4
+#define __BP_HARDEN_HYP_VECS_SZ ((BP_HARDEN_EL2_SLOTS - 1) * SZ_2K)
+
+#ifndef __ASSEMBLER__
+#include <linux/smp.h>
+#include <asm/percpu.h>
+
+#include <asm/cpufeature.h>
+#include <asm/virt.h>
+
+/* Watch out, ordering is important here. */
+enum mitigation_state {
+ SPECTRE_UNAFFECTED,
+ SPECTRE_MITIGATED,
+ SPECTRE_VULNERABLE,
+};
+
+struct pt_regs;
+struct task_struct;
+
+/*
+ * Note: the order of this enum corresponds to __bp_harden_hyp_vecs and
+ * we rely on having the direct vectors first.
+ */
+enum arm64_hyp_spectre_vector {
+ /*
+ * Take exceptions directly to __kvm_hyp_vector. This must be
+ * 0 so that it used by default when mitigations are not needed.
+ */
+ HYP_VECTOR_DIRECT,
+
+ /*
+ * Bounce via a slot in the hypervisor text mapping of
+ * __bp_harden_hyp_vecs, which contains an SMC call.
+ */
+ HYP_VECTOR_SPECTRE_DIRECT,
+
+ /*
+ * Bounce via a slot in a special mapping of __bp_harden_hyp_vecs
+ * next to the idmap page.
+ */
+ HYP_VECTOR_INDIRECT,
+
+ /*
+ * Bounce via a slot in a special mapping of __bp_harden_hyp_vecs
+ * next to the idmap page, which contains an SMC call.
+ */
+ HYP_VECTOR_SPECTRE_INDIRECT,
+};
+
+typedef void (*bp_hardening_cb_t)(void);
+
+struct bp_hardening_data {
+ enum arm64_hyp_spectre_vector slot;
+ bp_hardening_cb_t fn;
+};
+
+DECLARE_PER_CPU_READ_MOSTLY(struct bp_hardening_data, bp_hardening_data);
+
+/* Called during entry so must be __always_inline */
+static __always_inline void arm64_apply_bp_hardening(void)
+{
+ struct bp_hardening_data *d;
+
+ if (!alternative_has_cap_unlikely(ARM64_SPECTRE_V2))
+ return;
+
+ d = this_cpu_ptr(&bp_hardening_data);
+ if (d->fn)
+ d->fn();
+}
+
+enum mitigation_state arm64_get_spectre_v2_state(void);
+bool has_spectre_v2(const struct arm64_cpu_capabilities *cap, int scope);
+void spectre_v2_enable_mitigation(const struct arm64_cpu_capabilities *__unused);
+
+bool has_spectre_v3a(const struct arm64_cpu_capabilities *cap, int scope);
+void spectre_v3a_enable_mitigation(const struct arm64_cpu_capabilities *__unused);
+
+enum mitigation_state arm64_get_spectre_v4_state(void);
+bool has_spectre_v4(const struct arm64_cpu_capabilities *cap, int scope);
+void spectre_v4_enable_mitigation(const struct arm64_cpu_capabilities *__unused);
+void spectre_v4_enable_task_mitigation(struct task_struct *tsk);
+
+enum mitigation_state arm64_get_meltdown_state(void);
+
+enum mitigation_state arm64_get_spectre_bhb_state(void);
+bool is_spectre_bhb_affected(const struct arm64_cpu_capabilities *entry, int scope);
+extern bool __nospectre_bhb;
+u8 get_spectre_bhb_loop_value(void);
+bool is_spectre_bhb_fw_mitigated(void);
+void spectre_bhb_enable_mitigation(const struct arm64_cpu_capabilities *__unused);
+bool try_emulate_el1_ssbs(struct pt_regs *regs, u32 instr);
+
+void spectre_v4_patch_fw_mitigation_enable(struct alt_instr *alt, __le32 *origptr,
+ __le32 *updptr, int nr_inst);
+void smccc_patch_fw_mitigation_conduit(struct alt_instr *alt, __le32 *origptr,
+ __le32 *updptr, int nr_inst);
+void spectre_bhb_patch_loop_mitigation_enable(struct alt_instr *alt, __le32 *origptr,
+ __le32 *updptr, int nr_inst);
+void spectre_bhb_patch_fw_mitigation_enabled(struct alt_instr *alt, __le32 *origptr,
+ __le32 *updptr, int nr_inst);
+void spectre_bhb_patch_loop_iter(struct alt_instr *alt,
+ __le32 *origptr, __le32 *updptr, int nr_inst);
+void spectre_bhb_patch_wa3(struct alt_instr *alt,
+ __le32 *origptr, __le32 *updptr, int nr_inst);
+void spectre_bhb_patch_clearbhb(struct alt_instr *alt,
+ __le32 *origptr, __le32 *updptr, int nr_inst);
+void spectre_print_disabled_mitigations(void);
+
+#endif /* __ASSEMBLER__ */
+#endif /* __ASM_SPECTRE_H */
diff --git a/arch/arm64/include/asm/spinlock.h b/arch/arm64/include/asm/spinlock.h
index 0defa0728a9b..0525c0b089ed 100644
--- a/arch/arm64/include/asm/spinlock.h
+++ b/arch/arm64/include/asm/spinlock.h
@@ -1,203 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (C) 2012 ARM Ltd.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __ASM_SPINLOCK_H
#define __ASM_SPINLOCK_H
-#include <asm/spinlock_types.h>
-#include <asm/processor.h>
-
-/*
- * Spinlock implementation.
- *
- * The old value is read exclusively and the new one, if unlocked, is written
- * exclusively. In case of failure, the loop is restarted.
- *
- * The memory barriers are implicit with the load-acquire and store-release
- * instructions.
- *
- * Unlocked value: 0
- * Locked value: 1
- */
-
-#define arch_spin_is_locked(x) ((x)->lock != 0)
-#define arch_spin_unlock_wait(lock) \
- do { while (arch_spin_is_locked(lock)) cpu_relax(); } while (0)
-
-#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
-
-static inline void arch_spin_lock(arch_spinlock_t *lock)
-{
- unsigned int tmp;
-
- asm volatile(
- " sevl\n"
- "1: wfe\n"
- "2: ldaxr %w0, %1\n"
- " cbnz %w0, 1b\n"
- " stxr %w0, %w2, %1\n"
- " cbnz %w0, 2b\n"
- : "=&r" (tmp), "+Q" (lock->lock)
- : "r" (1)
- : "cc", "memory");
-}
-
-static inline int arch_spin_trylock(arch_spinlock_t *lock)
-{
- unsigned int tmp;
-
- asm volatile(
- "2: ldaxr %w0, %1\n"
- " cbnz %w0, 1f\n"
- " stxr %w0, %w2, %1\n"
- " cbnz %w0, 2b\n"
- "1:\n"
- : "=&r" (tmp), "+Q" (lock->lock)
- : "r" (1)
- : "cc", "memory");
+#include <asm/qspinlock.h>
+#include <asm/qrwlock.h>
- return !tmp;
-}
-
-static inline void arch_spin_unlock(arch_spinlock_t *lock)
-{
- asm volatile(
- " stlr %w1, %0\n"
- : "=Q" (lock->lock) : "r" (0) : "memory");
-}
+/* See include/linux/spinlock.h */
+#define smp_mb__after_spinlock() smp_mb()
/*
- * Write lock implementation.
- *
- * Write locks set bit 31. Unlocking, is done by writing 0 since the lock is
- * exclusively held.
+ * Changing this will break osq_lock() thanks to the call inside
+ * smp_cond_load_relaxed().
*
- * The memory barriers are implicit with the load-acquire and store-release
- * instructions.
+ * See:
+ * https://lore.kernel.org/lkml/20200110100612.GC2827@hirez.programming.kicks-ass.net
*/
-
-static inline void arch_write_lock(arch_rwlock_t *rw)
+#define vcpu_is_preempted vcpu_is_preempted
+static inline bool vcpu_is_preempted(int cpu)
{
- unsigned int tmp;
-
- asm volatile(
- " sevl\n"
- "1: wfe\n"
- "2: ldaxr %w0, %1\n"
- " cbnz %w0, 1b\n"
- " stxr %w0, %w2, %1\n"
- " cbnz %w0, 2b\n"
- : "=&r" (tmp), "+Q" (rw->lock)
- : "r" (0x80000000)
- : "cc", "memory");
+ return false;
}
-static inline int arch_write_trylock(arch_rwlock_t *rw)
-{
- unsigned int tmp;
-
- asm volatile(
- " ldaxr %w0, %1\n"
- " cbnz %w0, 1f\n"
- " stxr %w0, %w2, %1\n"
- "1:\n"
- : "=&r" (tmp), "+Q" (rw->lock)
- : "r" (0x80000000)
- : "cc", "memory");
-
- return !tmp;
-}
-
-static inline void arch_write_unlock(arch_rwlock_t *rw)
-{
- asm volatile(
- " stlr %w1, %0\n"
- : "=Q" (rw->lock) : "r" (0) : "memory");
-}
-
-/* write_can_lock - would write_trylock() succeed? */
-#define arch_write_can_lock(x) ((x)->lock == 0)
-
-/*
- * Read lock implementation.
- *
- * It exclusively loads the lock value, increments it and stores the new value
- * back if positive and the CPU still exclusively owns the location. If the
- * value is negative, the lock is already held.
- *
- * During unlocking there may be multiple active read locks but no write lock.
- *
- * The memory barriers are implicit with the load-acquire and store-release
- * instructions.
- */
-static inline void arch_read_lock(arch_rwlock_t *rw)
-{
- unsigned int tmp, tmp2;
-
- asm volatile(
- " sevl\n"
- "1: wfe\n"
- "2: ldaxr %w0, %2\n"
- " add %w0, %w0, #1\n"
- " tbnz %w0, #31, 1b\n"
- " stxr %w1, %w0, %2\n"
- " cbnz %w1, 2b\n"
- : "=&r" (tmp), "=&r" (tmp2), "+Q" (rw->lock)
- :
- : "cc", "memory");
-}
-
-static inline void arch_read_unlock(arch_rwlock_t *rw)
-{
- unsigned int tmp, tmp2;
-
- asm volatile(
- "1: ldxr %w0, %2\n"
- " sub %w0, %w0, #1\n"
- " stlxr %w1, %w0, %2\n"
- " cbnz %w1, 1b\n"
- : "=&r" (tmp), "=&r" (tmp2), "+Q" (rw->lock)
- :
- : "cc", "memory");
-}
-
-static inline int arch_read_trylock(arch_rwlock_t *rw)
-{
- unsigned int tmp, tmp2 = 1;
-
- asm volatile(
- " ldaxr %w0, %2\n"
- " add %w0, %w0, #1\n"
- " tbnz %w0, #31, 1f\n"
- " stxr %w1, %w0, %2\n"
- "1:\n"
- : "=&r" (tmp), "+r" (tmp2), "+Q" (rw->lock)
- :
- : "cc", "memory");
-
- return !tmp2;
-}
-
-/* read_can_lock - would read_trylock() succeed? */
-#define arch_read_can_lock(x) ((x)->lock < 0x80000000)
-
-#define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
-#define arch_write_lock_flags(lock, flags) arch_write_lock(lock)
-
-#define arch_spin_relax(lock) cpu_relax()
-#define arch_read_relax(lock) cpu_relax()
-#define arch_write_relax(lock) cpu_relax()
-
#endif /* __ASM_SPINLOCK_H */
diff --git a/arch/arm64/include/asm/spinlock_types.h b/arch/arm64/include/asm/spinlock_types.h
index 9a494346efed..7cde3d8bd0ad 100644
--- a/arch/arm64/include/asm/spinlock_types.h
+++ b/arch/arm64/include/asm/spinlock_types.h
@@ -1,38 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (C) 2012 ARM Ltd.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __ASM_SPINLOCK_TYPES_H
#define __ASM_SPINLOCK_TYPES_H
-#if !defined(__LINUX_SPINLOCK_TYPES_H) && !defined(__ASM_SPINLOCK_H)
-# error "please don't include this file directly"
+#if !defined(__LINUX_SPINLOCK_TYPES_RAW_H) && !defined(__ASM_SPINLOCK_H)
+# error "Please do not include this file directly."
#endif
-/* We only require natural alignment for exclusive accesses. */
-#define __lock_aligned
-
-typedef struct {
- volatile unsigned int lock;
-} arch_spinlock_t;
-
-#define __ARCH_SPIN_LOCK_UNLOCKED { 0 }
-
-typedef struct {
- volatile unsigned int lock;
-} arch_rwlock_t;
-
-#define __ARCH_RW_LOCK_UNLOCKED { 0 }
+#include <asm-generic/qspinlock_types.h>
+#include <asm-generic/qrwlock_types.h>
#endif
diff --git a/arch/arm64/include/asm/stack_pointer.h b/arch/arm64/include/asm/stack_pointer.h
new file mode 100644
index 000000000000..8e57c96ad45e
--- /dev/null
+++ b/arch/arm64/include/asm/stack_pointer.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_STACK_POINTER_H
+#define __ASM_STACK_POINTER_H
+
+/*
+ * how to get the current stack pointer from C
+ */
+register unsigned long current_stack_pointer asm ("sp");
+
+#endif /* __ASM_STACK_POINTER_H */
diff --git a/arch/arm64/include/asm/stackprotector.h b/arch/arm64/include/asm/stackprotector.h
new file mode 100644
index 000000000000..ae3ad80f51fe
--- /dev/null
+++ b/arch/arm64/include/asm/stackprotector.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * GCC stack protector support.
+ *
+ * Stack protector works by putting predefined pattern at the start of
+ * the stack frame and verifying that it hasn't been overwritten when
+ * returning from the function. The pattern is called stack canary
+ * and gcc expects it to be defined by a global variable called
+ * "__stack_chk_guard" on ARM. This unfortunately means that on SMP
+ * we cannot have a different canary value per task.
+ */
+
+#ifndef __ASM_STACKPROTECTOR_H
+#define __ASM_STACKPROTECTOR_H
+
+#include <asm/pointer_auth.h>
+
+extern unsigned long __stack_chk_guard;
+
+/*
+ * Initialize the stackprotector canary value.
+ *
+ * NOTE: this must only be called from functions that never return,
+ * and it must always be inlined.
+ */
+static __always_inline void boot_init_stack_canary(void)
+{
+#if defined(CONFIG_STACKPROTECTOR)
+ unsigned long canary = get_random_canary();
+
+ current->stack_canary = canary;
+ if (!IS_ENABLED(CONFIG_STACKPROTECTOR_PER_TASK))
+ __stack_chk_guard = current->stack_canary;
+#endif
+ ptrauth_thread_init_kernel(current);
+ ptrauth_thread_switch_kernel(current);
+ ptrauth_enable();
+}
+
+#endif /* _ASM_STACKPROTECTOR_H */
diff --git a/arch/arm64/include/asm/stacktrace.h b/arch/arm64/include/asm/stacktrace.h
index 7318f6d54aa9..6d3280932bf5 100644
--- a/arch/arm64/include/asm/stacktrace.h
+++ b/arch/arm64/include/asm/stacktrace.h
@@ -1,29 +1,120 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (C) 2012 ARM Ltd.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __ASM_STACKTRACE_H
#define __ASM_STACKTRACE_H
-struct stackframe {
- unsigned long fp;
- unsigned long sp;
- unsigned long pc;
-};
+#include <linux/percpu.h>
+#include <linux/sched.h>
+#include <linux/sched/task_stack.h>
+#include <linux/llist.h>
-extern int unwind_frame(struct stackframe *frame);
-extern void walk_stackframe(struct stackframe *frame,
- int (*fn)(struct stackframe *, void *), void *data);
+#include <asm/memory.h>
+#include <asm/pointer_auth.h>
+#include <asm/ptrace.h>
+#include <asm/sdei.h>
+
+#include <asm/stacktrace/common.h>
+
+extern void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk,
+ const char *loglvl);
+
+DECLARE_PER_CPU(unsigned long *, irq_stack_ptr);
+
+static inline struct stack_info stackinfo_get_irq(void)
+{
+ unsigned long low = (unsigned long)raw_cpu_read(irq_stack_ptr);
+ unsigned long high = low + IRQ_STACK_SIZE;
+
+ return (struct stack_info) {
+ .low = low,
+ .high = high,
+ };
+}
+
+static inline bool on_irq_stack(unsigned long sp, unsigned long size)
+{
+ struct stack_info info = stackinfo_get_irq();
+ return stackinfo_on_stack(&info, sp, size);
+}
+
+static inline struct stack_info stackinfo_get_task(const struct task_struct *tsk)
+{
+ unsigned long low = (unsigned long)task_stack_page(tsk);
+ unsigned long high = low + THREAD_SIZE;
+
+ return (struct stack_info) {
+ .low = low,
+ .high = high,
+ };
+}
+
+static inline bool on_task_stack(const struct task_struct *tsk,
+ unsigned long sp, unsigned long size)
+{
+ struct stack_info info = stackinfo_get_task(tsk);
+ return stackinfo_on_stack(&info, sp, size);
+}
+
+#define on_thread_stack() (on_task_stack(current, current_stack_pointer, 1))
+
+DECLARE_PER_CPU(unsigned long [OVERFLOW_STACK_SIZE/sizeof(long)], overflow_stack);
+
+static inline struct stack_info stackinfo_get_overflow(void)
+{
+ unsigned long low = (unsigned long)raw_cpu_ptr(overflow_stack);
+ unsigned long high = low + OVERFLOW_STACK_SIZE;
+
+ return (struct stack_info) {
+ .low = low,
+ .high = high,
+ };
+}
+
+#if defined(CONFIG_ARM_SDE_INTERFACE)
+DECLARE_PER_CPU(unsigned long *, sdei_stack_normal_ptr);
+DECLARE_PER_CPU(unsigned long *, sdei_stack_critical_ptr);
+
+static inline struct stack_info stackinfo_get_sdei_normal(void)
+{
+ unsigned long low = (unsigned long)raw_cpu_read(sdei_stack_normal_ptr);
+ unsigned long high = low + SDEI_STACK_SIZE;
+
+ return (struct stack_info) {
+ .low = low,
+ .high = high,
+ };
+}
+
+static inline struct stack_info stackinfo_get_sdei_critical(void)
+{
+ unsigned long low = (unsigned long)raw_cpu_read(sdei_stack_critical_ptr);
+ unsigned long high = low + SDEI_STACK_SIZE;
+
+ return (struct stack_info) {
+ .low = low,
+ .high = high,
+ };
+}
+#else
+#define stackinfo_get_sdei_normal() stackinfo_get_unknown()
+#define stackinfo_get_sdei_critical() stackinfo_get_unknown()
+#endif
+
+#ifdef CONFIG_EFI
+extern u64 *efi_rt_stack_top;
+
+static inline struct stack_info stackinfo_get_efi(void)
+{
+ unsigned long high = (u64)efi_rt_stack_top;
+ unsigned long low = high - THREAD_SIZE;
+
+ return (struct stack_info) {
+ .low = low,
+ .high = high,
+ };
+}
+#endif
#endif /* __ASM_STACKTRACE_H */
diff --git a/arch/arm64/include/asm/stacktrace/common.h b/arch/arm64/include/asm/stacktrace/common.h
new file mode 100644
index 000000000000..821a8fdd31af
--- /dev/null
+++ b/arch/arm64/include/asm/stacktrace/common.h
@@ -0,0 +1,171 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Common arm64 stack unwinder code.
+ *
+ * See: arch/arm64/kernel/stacktrace.c for the reference implementation.
+ *
+ * Copyright (C) 2012 ARM Ltd.
+ */
+#ifndef __ASM_STACKTRACE_COMMON_H
+#define __ASM_STACKTRACE_COMMON_H
+
+#include <linux/types.h>
+
+struct stack_info {
+ unsigned long low;
+ unsigned long high;
+};
+
+/**
+ * struct unwind_state - state used for robust unwinding.
+ *
+ * @fp: The fp value in the frame record (or the real fp)
+ * @pc: The lr value in the frame record (or the real lr)
+ *
+ * @stack: The stack currently being unwound.
+ * @stacks: An array of stacks which can be unwound.
+ * @nr_stacks: The number of stacks in @stacks.
+ */
+struct unwind_state {
+ unsigned long fp;
+ unsigned long pc;
+
+ struct stack_info stack;
+ struct stack_info *stacks;
+ int nr_stacks;
+};
+
+static inline struct stack_info stackinfo_get_unknown(void)
+{
+ return (struct stack_info) {
+ .low = 0,
+ .high = 0,
+ };
+}
+
+static inline bool stackinfo_on_stack(const struct stack_info *info,
+ unsigned long sp, unsigned long size)
+{
+ if (!info->low)
+ return false;
+
+ if (sp < info->low || sp + size < sp || sp + size > info->high)
+ return false;
+
+ return true;
+}
+
+static inline void unwind_init_common(struct unwind_state *state)
+{
+ state->stack = stackinfo_get_unknown();
+}
+
+/**
+ * unwind_find_stack() - Find the accessible stack which entirely contains an
+ * object.
+ *
+ * @state: the current unwind state.
+ * @sp: the base address of the object.
+ * @size: the size of the object.
+ *
+ * Return: a pointer to the relevant stack_info if found; NULL otherwise.
+ */
+static struct stack_info *unwind_find_stack(struct unwind_state *state,
+ unsigned long sp,
+ unsigned long size)
+{
+ struct stack_info *info = &state->stack;
+
+ if (stackinfo_on_stack(info, sp, size))
+ return info;
+
+ for (int i = 0; i < state->nr_stacks; i++) {
+ info = &state->stacks[i];
+ if (stackinfo_on_stack(info, sp, size))
+ return info;
+ }
+
+ return NULL;
+}
+
+/**
+ * unwind_consume_stack() - Update stack boundaries so that future unwind steps
+ * cannot consume this object again.
+ *
+ * @state: the current unwind state.
+ * @info: the stack_info of the stack containing the object.
+ * @sp: the base address of the object.
+ * @size: the size of the object.
+ *
+ * Return: 0 upon success, an error code otherwise.
+ */
+static inline void unwind_consume_stack(struct unwind_state *state,
+ struct stack_info *info,
+ unsigned long sp,
+ unsigned long size)
+{
+ struct stack_info tmp;
+
+ /*
+ * Stack transitions are strictly one-way, and once we've
+ * transitioned from one stack to another, it's never valid to
+ * unwind back to the old stack.
+ *
+ * Destroy the old stack info so that it cannot be found upon a
+ * subsequent transition. If the stack has not changed, we'll
+ * immediately restore the current stack info.
+ *
+ * Note that stacks can nest in several valid orders, e.g.
+ *
+ * TASK -> IRQ -> OVERFLOW -> SDEI_NORMAL
+ * TASK -> SDEI_NORMAL -> SDEI_CRITICAL -> OVERFLOW
+ * HYP -> OVERFLOW
+ *
+ * ... so we do not check the specific order of stack
+ * transitions.
+ */
+ tmp = *info;
+ *info = stackinfo_get_unknown();
+ state->stack = tmp;
+
+ /*
+ * Future unwind steps can only consume stack above this frame record.
+ * Update the current stack to start immediately above it.
+ */
+ state->stack.low = sp + size;
+}
+
+/**
+ * unwind_next_frame_record() - Unwind to the next frame record.
+ *
+ * @state: the current unwind state.
+ *
+ * Return: 0 upon success, an error code otherwise.
+ */
+static inline int
+unwind_next_frame_record(struct unwind_state *state)
+{
+ struct stack_info *info;
+ struct frame_record *record;
+ unsigned long fp = state->fp;
+
+ if (fp & 0x7)
+ return -EINVAL;
+
+ info = unwind_find_stack(state, fp, sizeof(*record));
+ if (!info)
+ return -EINVAL;
+
+ unwind_consume_stack(state, info, fp, sizeof(*record));
+
+ /*
+ * Record this frame record's values.
+ */
+ record = (struct frame_record *)fp;
+ state->fp = READ_ONCE(record->fp);
+ state->pc = READ_ONCE(record->lr);
+
+ return 0;
+}
+
+#endif /* __ASM_STACKTRACE_COMMON_H */
diff --git a/arch/arm64/include/asm/stacktrace/frame.h b/arch/arm64/include/asm/stacktrace/frame.h
new file mode 100644
index 000000000000..796797b8db7e
--- /dev/null
+++ b/arch/arm64/include/asm/stacktrace/frame.h
@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef __ASM_STACKTRACE_FRAME_H
+#define __ASM_STACKTRACE_FRAME_H
+
+/*
+ * - FRAME_META_TYPE_NONE
+ *
+ * This value is reserved.
+ *
+ * - FRAME_META_TYPE_FINAL
+ *
+ * The record is the last entry on the stack.
+ * Unwinding should terminate successfully.
+ *
+ * - FRAME_META_TYPE_PT_REGS
+ *
+ * The record is embedded within a struct pt_regs, recording the registers at
+ * an arbitrary point in time.
+ * Unwinding should consume pt_regs::pc, followed by pt_regs::lr.
+ *
+ * Note: all other values are reserved and should result in unwinding
+ * terminating with an error.
+ */
+#define FRAME_META_TYPE_NONE 0
+#define FRAME_META_TYPE_FINAL 1
+#define FRAME_META_TYPE_PT_REGS 2
+
+#ifndef __ASSEMBLER__
+/*
+ * A standard AAPCS64 frame record.
+ */
+struct frame_record {
+ u64 fp;
+ u64 lr;
+};
+
+/*
+ * A metadata frame record indicating a special unwind.
+ * The record::{fp,lr} fields must be zero to indicate the presence of
+ * metadata.
+ */
+struct frame_record_meta {
+ struct frame_record record;
+ u64 type;
+};
+#endif /* __ASSEMBLER__ */
+
+#endif /* __ASM_STACKTRACE_FRAME_H */
diff --git a/arch/arm64/include/asm/stacktrace/nvhe.h b/arch/arm64/include/asm/stacktrace/nvhe.h
new file mode 100644
index 000000000000..171f9edef49f
--- /dev/null
+++ b/arch/arm64/include/asm/stacktrace/nvhe.h
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * KVM nVHE hypervisor stack tracing support.
+ *
+ * The unwinder implementation depends on the nVHE mode:
+ *
+ * 1) Non-protected nVHE mode - the host can directly access the
+ * HYP stack pages and unwind the HYP stack in EL1. This saves having
+ * to allocate shared buffers for the host to read the unwinded
+ * stacktrace.
+ *
+ * 2) pKVM (protected nVHE) mode - the host cannot directly access
+ * the HYP memory. The stack is unwinded in EL2 and dumped to a shared
+ * buffer where the host can read and print the stacktrace.
+ *
+ * Copyright (C) 2022 Google LLC
+ */
+#ifndef __ASM_STACKTRACE_NVHE_H
+#define __ASM_STACKTRACE_NVHE_H
+
+#include <asm/stacktrace/common.h>
+
+/**
+ * kvm_nvhe_unwind_init() - Start an unwind from the given nVHE HYP fp and pc
+ *
+ * @state : unwind_state to initialize
+ * @fp : frame pointer at which to start the unwinding.
+ * @pc : program counter at which to start the unwinding.
+ */
+static inline void kvm_nvhe_unwind_init(struct unwind_state *state,
+ unsigned long fp,
+ unsigned long pc)
+{
+ unwind_init_common(state);
+
+ state->fp = fp;
+ state->pc = pc;
+}
+
+#ifndef __KVM_NVHE_HYPERVISOR__
+/*
+ * Conventional (non-protected) nVHE HYP stack unwinder
+ *
+ * In non-protected mode, the unwinding is done from kernel proper context
+ * (by the host in EL1).
+ */
+
+DECLARE_KVM_NVHE_PER_CPU(unsigned long [OVERFLOW_STACK_SIZE/sizeof(long)], overflow_stack);
+DECLARE_KVM_NVHE_PER_CPU(struct kvm_nvhe_stacktrace_info, kvm_stacktrace_info);
+DECLARE_PER_CPU(unsigned long, kvm_arm_hyp_stack_base);
+
+void kvm_nvhe_dump_backtrace(unsigned long hyp_offset);
+
+#endif /* __KVM_NVHE_HYPERVISOR__ */
+#endif /* __ASM_STACKTRACE_NVHE_H */
diff --git a/arch/arm64/include/asm/stage2_pgtable.h b/arch/arm64/include/asm/stage2_pgtable.h
new file mode 100644
index 000000000000..23d27623e478
--- /dev/null
+++ b/arch/arm64/include/asm/stage2_pgtable.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2016 - ARM Ltd
+ *
+ * stage2 page table helpers
+ */
+
+#ifndef __ARM64_S2_PGTABLE_H_
+#define __ARM64_S2_PGTABLE_H_
+
+#include <linux/pgtable.h>
+
+/*
+ * The hardware supports concatenation of up to 16 tables at stage2 entry
+ * level and we use the feature whenever possible, which means we resolve 4
+ * additional bits of address at the entry level.
+ *
+ * This implies, the total number of page table levels required for
+ * IPA_SHIFT at stage2 expected by the hardware can be calculated using
+ * the same logic used for the (non-collapsable) stage1 page tables but for
+ * (IPA_SHIFT - 4).
+ */
+#define stage2_pgtable_levels(ipa) ARM64_HW_PGTABLE_LEVELS((ipa) - 4)
+#define kvm_stage2_levels(mmu) VTCR_EL2_LVLS((mmu)->vtcr)
+
+/*
+ * kvm_mmmu_cache_min_pages() is the number of pages required to install
+ * a stage-2 translation. We pre-allocate the entry level page table at
+ * the VM creation.
+ */
+#define kvm_mmu_cache_min_pages(mmu) (kvm_stage2_levels(mmu) - 1)
+
+#endif /* __ARM64_S2_PGTABLE_H_ */
diff --git a/arch/arm64/include/asm/stat.h b/arch/arm64/include/asm/stat.h
index 15e35598ac40..3b4a62f5aeb0 100644
--- a/arch/arm64/include/asm/stat.h
+++ b/arch/arm64/include/asm/stat.h
@@ -1,17 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (C) 2012 ARM Ltd.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __ASM_STAT_H
#define __ASM_STAT_H
@@ -20,6 +9,7 @@
#ifdef CONFIG_COMPAT
+#include <linux/time.h>
#include <asm/compat.h>
/*
diff --git a/arch/arm64/include/asm/string.h b/arch/arm64/include/asm/string.h
index 3ee8b303d9a9..3a3264ff47b9 100644
--- a/arch/arm64/include/asm/string.h
+++ b/arch/arm64/include/asm/string.h
@@ -1,37 +1,69 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (C) 2013 ARM Ltd.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __ASM_STRING_H
#define __ASM_STRING_H
+#if !(defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS))
#define __HAVE_ARCH_STRRCHR
extern char *strrchr(const char *, int c);
#define __HAVE_ARCH_STRCHR
extern char *strchr(const char *, int c);
+#define __HAVE_ARCH_STRCMP
+extern int strcmp(const char *, const char *);
+
+#define __HAVE_ARCH_STRNCMP
+extern int strncmp(const char *, const char *, __kernel_size_t);
+
+#define __HAVE_ARCH_STRLEN
+extern __kernel_size_t strlen(const char *);
+
+#define __HAVE_ARCH_STRNLEN
+extern __kernel_size_t strnlen(const char *, __kernel_size_t);
+
+#define __HAVE_ARCH_MEMCMP
+extern int memcmp(const void *, const void *, size_t);
+
+#define __HAVE_ARCH_MEMCHR
+extern void *memchr(const void *, int, __kernel_size_t);
+#endif
+
#define __HAVE_ARCH_MEMCPY
extern void *memcpy(void *, const void *, __kernel_size_t);
+extern void *__memcpy(void *, const void *, __kernel_size_t);
#define __HAVE_ARCH_MEMMOVE
extern void *memmove(void *, const void *, __kernel_size_t);
-
-#define __HAVE_ARCH_MEMCHR
-extern void *memchr(const void *, int, __kernel_size_t);
+extern void *__memmove(void *, const void *, __kernel_size_t);
#define __HAVE_ARCH_MEMSET
extern void *memset(void *, int, __kernel_size_t);
+extern void *__memset(void *, int, __kernel_size_t);
+
+#ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
+#define __HAVE_ARCH_MEMCPY_FLUSHCACHE
+void memcpy_flushcache(void *dst, const void *src, size_t cnt);
+#endif
+
+#if (defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)) && \
+ !defined(__SANITIZE_ADDRESS__)
+
+/*
+ * For files that are not instrumented (e.g. mm/slub.c) we
+ * should use not instrumented version of mem* functions.
+ */
+
+#define memcpy(dst, src, len) __memcpy(dst, src, len)
+#define memmove(dst, src, len) __memmove(dst, src, len)
+#define memset(s, c, n) __memset(s, c, n)
+
+#ifndef __NO_FORTIFY
+#define __NO_FORTIFY /* FORTIFY_SOURCE uses __builtin_memcpy, etc. */
+#endif
+
+#endif
#endif
diff --git a/arch/arm64/include/asm/suspend.h b/arch/arm64/include/asm/suspend.h
new file mode 100644
index 000000000000..e65f33edf9d6
--- /dev/null
+++ b/arch/arm64/include/asm/suspend.h
@@ -0,0 +1,54 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_SUSPEND_H
+#define __ASM_SUSPEND_H
+
+#define NR_CTX_REGS 13
+#define NR_CALLEE_SAVED_REGS 12
+
+/*
+ * struct cpu_suspend_ctx must be 16-byte aligned since it is allocated on
+ * the stack, which must be 16-byte aligned on v8
+ */
+struct cpu_suspend_ctx {
+ /*
+ * This struct must be kept in sync with
+ * cpu_do_{suspend/resume} in mm/proc.S
+ */
+ u64 ctx_regs[NR_CTX_REGS];
+ u64 sp;
+} __aligned(16);
+
+/*
+ * Memory to save the cpu state is allocated on the stack by
+ * __cpu_suspend_enter()'s caller, and populated by __cpu_suspend_enter().
+ * This data must survive until cpu_resume() is called.
+ *
+ * This struct describes the size and the layout of the saved cpu state.
+ * The layout of the callee_saved_regs is defined by the implementation
+ * of __cpu_suspend_enter(), and cpu_resume(). This struct must be passed
+ * in by the caller as __cpu_suspend_enter()'s stack-frame is gone once it
+ * returns, and the data would be subsequently corrupted by the call to the
+ * finisher.
+ */
+struct sleep_stack_data {
+ struct cpu_suspend_ctx system_regs;
+ unsigned long callee_saved_regs[NR_CALLEE_SAVED_REGS];
+};
+
+extern unsigned long *sleep_save_stash;
+
+extern int cpu_suspend(unsigned long arg, int (*fn)(unsigned long));
+extern void cpu_resume(void);
+int __cpu_suspend_enter(struct sleep_stack_data *state);
+void __cpu_suspend_exit(void);
+void _cpu_resume(void);
+
+int swsusp_arch_suspend(void);
+int swsusp_arch_resume(void);
+int arch_hibernation_header_save(void *addr, unsigned int max_size);
+int arch_hibernation_header_restore(void *addr);
+
+/* Used to resume on the CPU we hibernated on */
+int hibernate_resume_nonboot_cpu_disable(void);
+
+#endif
diff --git a/arch/arm64/include/asm/sync_bitops.h b/arch/arm64/include/asm/sync_bitops.h
index 8da0bf4f7659..e9c1a02c2154 100644
--- a/arch/arm64/include/asm/sync_bitops.h
+++ b/arch/arm64/include/asm/sync_bitops.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __ASM_SYNC_BITOPS_H__
#define __ASM_SYNC_BITOPS_H__
@@ -14,13 +15,13 @@
* ops which are SMP safe even on a UP kernel.
*/
-#define sync_set_bit(nr, p) set_bit(nr, p)
-#define sync_clear_bit(nr, p) clear_bit(nr, p)
-#define sync_change_bit(nr, p) change_bit(nr, p)
-#define sync_test_and_set_bit(nr, p) test_and_set_bit(nr, p)
-#define sync_test_and_clear_bit(nr, p) test_and_clear_bit(nr, p)
-#define sync_test_and_change_bit(nr, p) test_and_change_bit(nr, p)
-#define sync_test_bit(nr, addr) test_bit(nr, addr)
-#define sync_cmpxchg cmpxchg
+#define sync_set_bit(nr, p) set_bit(nr, p)
+#define sync_clear_bit(nr, p) clear_bit(nr, p)
+#define sync_change_bit(nr, p) change_bit(nr, p)
+#define sync_test_and_set_bit(nr, p) test_and_set_bit(nr, p)
+#define sync_test_and_clear_bit(nr, p) test_and_clear_bit(nr, p)
+#define sync_test_and_change_bit(nr, p) test_and_change_bit(nr, p)
+#define sync_test_bit(nr, addr) test_bit(nr, addr)
+#define arch_sync_cmpxchg arch_cmpxchg
#endif
diff --git a/arch/arm64/include/asm/syscall.h b/arch/arm64/include/asm/syscall.h
index 89c047f9a971..712daa90e643 100644
--- a/arch/arm64/include/asm/syscall.h
+++ b/arch/arm64/include/asm/syscall.h
@@ -1,23 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (C) 2012 ARM Ltd.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __ASM_SYSCALL_H
#define __ASM_SYSCALL_H
+#include <uapi/linux/audit.h>
+#include <linux/compat.h>
#include <linux/err.h>
+typedef long (*syscall_fn_t)(const struct pt_regs *regs);
+
+extern const syscall_fn_t sys_call_table[];
+
+#ifdef CONFIG_COMPAT
+extern const syscall_fn_t compat_sys_call_table[];
+#endif
static inline int syscall_get_nr(struct task_struct *task,
struct pt_regs *regs)
@@ -31,71 +29,92 @@ static inline void syscall_rollback(struct task_struct *task,
regs->regs[0] = regs->orig_x0;
}
+static inline long syscall_get_return_value(struct task_struct *task,
+ struct pt_regs *regs)
+{
+ unsigned long val = regs->regs[0];
+
+ if (is_compat_thread(task_thread_info(task)))
+ val = sign_extend64(val, 31);
+
+ return val;
+}
static inline long syscall_get_error(struct task_struct *task,
struct pt_regs *regs)
{
- unsigned long error = regs->regs[0];
- return IS_ERR_VALUE(error) ? error : 0;
-}
+ unsigned long error = syscall_get_return_value(task, regs);
-static inline long syscall_get_return_value(struct task_struct *task,
- struct pt_regs *regs)
-{
- return regs->regs[0];
+ return IS_ERR_VALUE(error) ? error : 0;
}
static inline void syscall_set_return_value(struct task_struct *task,
struct pt_regs *regs,
int error, long val)
{
- regs->regs[0] = (long) error ? error : val;
+ if (error)
+ val = error;
+
+ if (is_compat_thread(task_thread_info(task)))
+ val = lower_32_bits(val);
+
+ regs->regs[0] = val;
+}
+
+static inline void syscall_set_nr(struct task_struct *task,
+ struct pt_regs *regs,
+ int nr)
+{
+ regs->syscallno = nr;
+ if (nr == -1) {
+ /*
+ * When the syscall number is set to -1, the syscall will be
+ * skipped. In this case the syscall return value has to be
+ * set explicitly, otherwise the first syscall argument is
+ * returned as the syscall return value.
+ */
+ syscall_set_return_value(task, regs, -ENOSYS, 0);
+ }
}
#define SYSCALL_MAX_ARGS 6
static inline void syscall_get_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
unsigned long *args)
{
- if (i + n > SYSCALL_MAX_ARGS) {
- unsigned long *args_bad = args + SYSCALL_MAX_ARGS - i;
- unsigned int n_bad = n + i - SYSCALL_MAX_ARGS;
- pr_warning("%s called with max args %d, handling only %d\n",
- __func__, i + n, SYSCALL_MAX_ARGS);
- memset(args_bad, 0, n_bad * sizeof(args[0]));
- }
-
- if (i == 0) {
- args[0] = regs->orig_x0;
- args++;
- i++;
- n--;
- }
+ args[0] = regs->orig_x0;
+ args++;
- memcpy(args, &regs->regs[i], n * sizeof(args[0]));
+ memcpy(args, &regs->regs[1], 5 * sizeof(args[0]));
}
static inline void syscall_set_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
const unsigned long *args)
{
- if (i + n > SYSCALL_MAX_ARGS) {
- pr_warning("%s called with max args %d, handling only %d\n",
- __func__, i + n, SYSCALL_MAX_ARGS);
- n = SYSCALL_MAX_ARGS - i;
- }
+ memcpy(&regs->regs[0], args, 6 * sizeof(args[0]));
+ /*
+ * Also copy the first argument into orig_x0
+ * so that syscall_get_arguments() would return it
+ * instead of the previous value.
+ */
+ regs->orig_x0 = regs->regs[0];
+}
- if (i == 0) {
- regs->orig_x0 = args[0];
- args++;
- i++;
- n--;
- }
+/*
+ * We don't care about endianness (__AUDIT_ARCH_LE bit) here because
+ * AArch64 has the same system calls both on little- and big- endian.
+ */
+static inline int syscall_get_arch(struct task_struct *task)
+{
+ if (is_compat_thread(task_thread_info(task)))
+ return AUDIT_ARCH_ARM;
- memcpy(&regs->regs[i], args, n * sizeof(args[0]));
+ return AUDIT_ARCH_AARCH64;
}
+int syscall_trace_enter(struct pt_regs *regs);
+void syscall_trace_exit(struct pt_regs *regs);
+
#endif /* __ASM_SYSCALL_H */
diff --git a/arch/arm64/include/asm/syscall_wrapper.h b/arch/arm64/include/asm/syscall_wrapper.h
new file mode 100644
index 000000000000..abb57bc54305
--- /dev/null
+++ b/arch/arm64/include/asm/syscall_wrapper.h
@@ -0,0 +1,82 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * syscall_wrapper.h - arm64 specific wrappers to syscall definitions
+ *
+ * Based on arch/x86/include/asm_syscall_wrapper.h
+ */
+
+#ifndef __ASM_SYSCALL_WRAPPER_H
+#define __ASM_SYSCALL_WRAPPER_H
+
+#include <asm/ptrace.h>
+
+#define SC_ARM64_REGS_TO_ARGS(x, ...) \
+ __MAP(x,__SC_ARGS \
+ ,,regs->regs[0],,regs->regs[1],,regs->regs[2] \
+ ,,regs->regs[3],,regs->regs[4],,regs->regs[5])
+
+#ifdef CONFIG_COMPAT
+
+#define COMPAT_SYSCALL_DEFINEx(x, name, ...) \
+ asmlinkage long __arm64_compat_sys##name(const struct pt_regs *regs); \
+ ALLOW_ERROR_INJECTION(__arm64_compat_sys##name, ERRNO); \
+ static long __se_compat_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)); \
+ static inline long __do_compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)); \
+ asmlinkage long __arm64_compat_sys##name(const struct pt_regs *regs) \
+ { \
+ return __se_compat_sys##name(SC_ARM64_REGS_TO_ARGS(x,__VA_ARGS__)); \
+ } \
+ static long __se_compat_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)) \
+ { \
+ return __do_compat_sys##name(__MAP(x,__SC_DELOUSE,__VA_ARGS__)); \
+ } \
+ static inline long __do_compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__))
+
+#define COMPAT_SYSCALL_DEFINE0(sname) \
+ asmlinkage long __arm64_compat_sys_##sname(const struct pt_regs *__unused); \
+ ALLOW_ERROR_INJECTION(__arm64_compat_sys_##sname, ERRNO); \
+ asmlinkage long __arm64_compat_sys_##sname(const struct pt_regs *__unused)
+
+#define COND_SYSCALL_COMPAT(name) \
+ asmlinkage long __arm64_compat_sys_##name(const struct pt_regs *regs); \
+ asmlinkage long __weak __arm64_compat_sys_##name(const struct pt_regs *regs) \
+ { \
+ return sys_ni_syscall(); \
+ }
+
+#endif /* CONFIG_COMPAT */
+
+#define __SYSCALL_DEFINEx(x, name, ...) \
+ asmlinkage long __arm64_sys##name(const struct pt_regs *regs); \
+ ALLOW_ERROR_INJECTION(__arm64_sys##name, ERRNO); \
+ static long __se_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)); \
+ static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)); \
+ asmlinkage long __arm64_sys##name(const struct pt_regs *regs) \
+ { \
+ return __se_sys##name(SC_ARM64_REGS_TO_ARGS(x,__VA_ARGS__)); \
+ } \
+ static long __se_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)) \
+ { \
+ long ret = __do_sys##name(__MAP(x,__SC_CAST,__VA_ARGS__)); \
+ __MAP(x,__SC_TEST,__VA_ARGS__); \
+ __PROTECT(x, ret,__MAP(x,__SC_ARGS,__VA_ARGS__)); \
+ return ret; \
+ } \
+ static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__))
+
+#define SYSCALL_DEFINE0(sname) \
+ SYSCALL_METADATA(_##sname, 0); \
+ asmlinkage long __arm64_sys_##sname(const struct pt_regs *__unused); \
+ ALLOW_ERROR_INJECTION(__arm64_sys_##sname, ERRNO); \
+ asmlinkage long __arm64_sys_##sname(const struct pt_regs *__unused)
+
+#define COND_SYSCALL(name) \
+ asmlinkage long __arm64_sys_##name(const struct pt_regs *regs); \
+ asmlinkage long __weak __arm64_sys_##name(const struct pt_regs *regs) \
+ { \
+ return sys_ni_syscall(); \
+ }
+
+asmlinkage long __arm64_sys_ni_syscall(const struct pt_regs *__unused);
+
+#endif /* __ASM_SYSCALL_WRAPPER_H */
diff --git a/arch/arm64/include/asm/syscalls.h b/arch/arm64/include/asm/syscalls.h
deleted file mode 100644
index 48fe7c600e98..000000000000
--- a/arch/arm64/include/asm/syscalls.h
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- * Copyright (C) 2012 ARM Ltd.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-#ifndef __ASM_SYSCALLS_H
-#define __ASM_SYSCALLS_H
-
-#include <linux/linkage.h>
-#include <linux/compiler.h>
-#include <linux/signal.h>
-
-/*
- * System call wrappers implemented in kernel/entry.S.
- */
-asmlinkage long sys_rt_sigreturn_wrapper(void);
-
-#include <asm-generic/syscalls.h>
-
-#endif /* __ASM_SYSCALLS_H */
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
new file mode 100644
index 000000000000..9df51accbb02
--- /dev/null
+++ b/arch/arm64/include/asm/sysreg.h
@@ -0,0 +1,1294 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Macros for accessing system registers with older binutils.
+ *
+ * Copyright (C) 2014 ARM Ltd.
+ * Author: Catalin Marinas <catalin.marinas@arm.com>
+ */
+
+#ifndef __ASM_SYSREG_H
+#define __ASM_SYSREG_H
+
+#include <linux/bits.h>
+#include <linux/stringify.h>
+#include <linux/kasan-tags.h>
+#include <linux/kconfig.h>
+
+#include <asm/gpr-num.h>
+
+/*
+ * ARMv8 ARM reserves the following encoding for system registers:
+ * (Ref: ARMv8 ARM, Section: "System instruction class encoding overview",
+ * C5.2, version:ARM DDI 0487A.f)
+ * [20-19] : Op0
+ * [18-16] : Op1
+ * [15-12] : CRn
+ * [11-8] : CRm
+ * [7-5] : Op2
+ */
+#define Op0_shift 19
+#define Op0_mask 0x3
+#define Op1_shift 16
+#define Op1_mask 0x7
+#define CRn_shift 12
+#define CRn_mask 0xf
+#define CRm_shift 8
+#define CRm_mask 0xf
+#define Op2_shift 5
+#define Op2_mask 0x7
+
+#define sys_reg(op0, op1, crn, crm, op2) \
+ (((op0) << Op0_shift) | ((op1) << Op1_shift) | \
+ ((crn) << CRn_shift) | ((crm) << CRm_shift) | \
+ ((op2) << Op2_shift))
+
+#define sys_insn sys_reg
+
+#define sys_reg_Op0(id) (((id) >> Op0_shift) & Op0_mask)
+#define sys_reg_Op1(id) (((id) >> Op1_shift) & Op1_mask)
+#define sys_reg_CRn(id) (((id) >> CRn_shift) & CRn_mask)
+#define sys_reg_CRm(id) (((id) >> CRm_shift) & CRm_mask)
+#define sys_reg_Op2(id) (((id) >> Op2_shift) & Op2_mask)
+
+#ifndef CONFIG_BROKEN_GAS_INST
+
+#ifdef __ASSEMBLER__
+// The space separator is omitted so that __emit_inst(x) can be parsed as
+// either an assembler directive or an assembler macro argument.
+#define __emit_inst(x) .inst(x)
+#else
+#define __emit_inst(x) ".inst " __stringify((x)) "\n\t"
+#endif
+
+#else /* CONFIG_BROKEN_GAS_INST */
+
+#ifndef CONFIG_CPU_BIG_ENDIAN
+#define __INSTR_BSWAP(x) (x)
+#else /* CONFIG_CPU_BIG_ENDIAN */
+#define __INSTR_BSWAP(x) ((((x) << 24) & 0xff000000) | \
+ (((x) << 8) & 0x00ff0000) | \
+ (((x) >> 8) & 0x0000ff00) | \
+ (((x) >> 24) & 0x000000ff))
+#endif /* CONFIG_CPU_BIG_ENDIAN */
+
+#ifdef __ASSEMBLER__
+#define __emit_inst(x) .long __INSTR_BSWAP(x)
+#else /* __ASSEMBLER__ */
+#define __emit_inst(x) ".long " __stringify(__INSTR_BSWAP(x)) "\n\t"
+#endif /* __ASSEMBLER__ */
+
+#endif /* CONFIG_BROKEN_GAS_INST */
+
+/*
+ * Instructions for modifying PSTATE fields.
+ * As per Arm ARM for v8-A, Section "C.5.1.3 op0 == 0b00, architectural hints,
+ * barriers and CLREX, and PSTATE access", ARM DDI 0487 C.a, system instructions
+ * for accessing PSTATE fields have the following encoding:
+ * Op0 = 0, CRn = 4
+ * Op1, Op2 encodes the PSTATE field modified and defines the constraints.
+ * CRm = Imm4 for the instruction.
+ * Rt = 0x1f
+ */
+#define pstate_field(op1, op2) ((op1) << Op1_shift | (op2) << Op2_shift)
+#define PSTATE_Imm_shift CRm_shift
+#define SET_PSTATE(x, r) __emit_inst(0xd500401f | PSTATE_ ## r | ((!!x) << PSTATE_Imm_shift))
+
+#define PSTATE_PAN pstate_field(0, 4)
+#define PSTATE_UAO pstate_field(0, 3)
+#define PSTATE_SSBS pstate_field(3, 1)
+#define PSTATE_DIT pstate_field(3, 2)
+#define PSTATE_TCO pstate_field(3, 4)
+
+#define SET_PSTATE_PAN(x) SET_PSTATE((x), PAN)
+#define SET_PSTATE_UAO(x) SET_PSTATE((x), UAO)
+#define SET_PSTATE_SSBS(x) SET_PSTATE((x), SSBS)
+#define SET_PSTATE_DIT(x) SET_PSTATE((x), DIT)
+#define SET_PSTATE_TCO(x) SET_PSTATE((x), TCO)
+
+#define set_pstate_pan(x) asm volatile(SET_PSTATE_PAN(x))
+#define set_pstate_uao(x) asm volatile(SET_PSTATE_UAO(x))
+#define set_pstate_ssbs(x) asm volatile(SET_PSTATE_SSBS(x))
+#define set_pstate_dit(x) asm volatile(SET_PSTATE_DIT(x))
+
+/* Register-based PAN access, for save/restore purposes */
+#define SYS_PSTATE_PAN sys_reg(3, 0, 4, 2, 3)
+
+#define __SYS_BARRIER_INSN(op0, op1, CRn, CRm, op2, Rt) \
+ __emit_inst(0xd5000000 | \
+ sys_insn((op0), (op1), (CRn), (CRm), (op2)) | \
+ ((Rt) & 0x1f))
+
+#define SB_BARRIER_INSN __SYS_BARRIER_INSN(0, 3, 3, 0, 7, 31)
+#define GSB_SYS_BARRIER_INSN __SYS_BARRIER_INSN(1, 0, 12, 0, 0, 31)
+#define GSB_ACK_BARRIER_INSN __SYS_BARRIER_INSN(1, 0, 12, 0, 1, 31)
+
+/* Data cache zero operations */
+#define SYS_DC_ISW sys_insn(1, 0, 7, 6, 2)
+#define SYS_DC_IGSW sys_insn(1, 0, 7, 6, 4)
+#define SYS_DC_IGDSW sys_insn(1, 0, 7, 6, 6)
+#define SYS_DC_CSW sys_insn(1, 0, 7, 10, 2)
+#define SYS_DC_CGSW sys_insn(1, 0, 7, 10, 4)
+#define SYS_DC_CGDSW sys_insn(1, 0, 7, 10, 6)
+#define SYS_DC_CISW sys_insn(1, 0, 7, 14, 2)
+#define SYS_DC_CIGSW sys_insn(1, 0, 7, 14, 4)
+#define SYS_DC_CIGDSW sys_insn(1, 0, 7, 14, 6)
+
+#define SYS_IC_IALLUIS sys_insn(1, 0, 7, 1, 0)
+#define SYS_IC_IALLU sys_insn(1, 0, 7, 5, 0)
+#define SYS_IC_IVAU sys_insn(1, 3, 7, 5, 1)
+
+#define SYS_DC_IVAC sys_insn(1, 0, 7, 6, 1)
+#define SYS_DC_IGVAC sys_insn(1, 0, 7, 6, 3)
+#define SYS_DC_IGDVAC sys_insn(1, 0, 7, 6, 5)
+
+#define SYS_DC_CVAC sys_insn(1, 3, 7, 10, 1)
+#define SYS_DC_CGVAC sys_insn(1, 3, 7, 10, 3)
+#define SYS_DC_CGDVAC sys_insn(1, 3, 7, 10, 5)
+
+#define SYS_DC_CVAU sys_insn(1, 3, 7, 11, 1)
+
+#define SYS_DC_CVAP sys_insn(1, 3, 7, 12, 1)
+#define SYS_DC_CGVAP sys_insn(1, 3, 7, 12, 3)
+#define SYS_DC_CGDVAP sys_insn(1, 3, 7, 12, 5)
+
+#define SYS_DC_CVADP sys_insn(1, 3, 7, 13, 1)
+#define SYS_DC_CGVADP sys_insn(1, 3, 7, 13, 3)
+#define SYS_DC_CGDVADP sys_insn(1, 3, 7, 13, 5)
+
+#define SYS_DC_CIVAC sys_insn(1, 3, 7, 14, 1)
+#define SYS_DC_CIGVAC sys_insn(1, 3, 7, 14, 3)
+#define SYS_DC_CIGDVAC sys_insn(1, 3, 7, 14, 5)
+
+#define SYS_DC_ZVA sys_insn(1, 3, 7, 4, 1)
+#define SYS_DC_GVA sys_insn(1, 3, 7, 4, 3)
+#define SYS_DC_GZVA sys_insn(1, 3, 7, 4, 4)
+
+#define SYS_DC_CIVAPS sys_insn(1, 0, 7, 15, 1)
+#define SYS_DC_CIGDVAPS sys_insn(1, 0, 7, 15, 5)
+
+/*
+ * Automatically generated definitions for system registers, the
+ * manual encodings below are in the process of being converted to
+ * come from here. The header relies on the definition of sys_reg()
+ * earlier in this file.
+ */
+#include "asm/sysreg-defs.h"
+
+/*
+ * System registers, organised loosely by encoding but grouped together
+ * where the architected name contains an index. e.g. ID_MMFR<n>_EL1.
+ */
+#define SYS_SVCR_SMSTOP_SM_EL0 sys_reg(0, 3, 4, 2, 3)
+#define SYS_SVCR_SMSTART_SM_EL0 sys_reg(0, 3, 4, 3, 3)
+#define SYS_SVCR_SMSTOP_SMZA_EL0 sys_reg(0, 3, 4, 6, 3)
+
+#define SYS_DBGBVRn_EL1(n) sys_reg(2, 0, 0, n, 4)
+#define SYS_DBGBCRn_EL1(n) sys_reg(2, 0, 0, n, 5)
+#define SYS_DBGWVRn_EL1(n) sys_reg(2, 0, 0, n, 6)
+#define SYS_DBGWCRn_EL1(n) sys_reg(2, 0, 0, n, 7)
+#define SYS_MDRAR_EL1 sys_reg(2, 0, 1, 0, 0)
+
+#define SYS_OSLSR_EL1 sys_reg(2, 0, 1, 1, 4)
+#define OSLSR_EL1_OSLM_MASK (BIT(3) | BIT(0))
+#define OSLSR_EL1_OSLM_NI 0
+#define OSLSR_EL1_OSLM_IMPLEMENTED BIT(3)
+#define OSLSR_EL1_OSLK BIT(1)
+
+#define SYS_OSDLR_EL1 sys_reg(2, 0, 1, 3, 4)
+#define SYS_DBGPRCR_EL1 sys_reg(2, 0, 1, 4, 4)
+#define SYS_DBGCLAIMSET_EL1 sys_reg(2, 0, 7, 8, 6)
+#define SYS_DBGCLAIMCLR_EL1 sys_reg(2, 0, 7, 9, 6)
+#define SYS_DBGAUTHSTATUS_EL1 sys_reg(2, 0, 7, 14, 6)
+#define SYS_MDCCSR_EL0 sys_reg(2, 3, 0, 1, 0)
+#define SYS_DBGDTR_EL0 sys_reg(2, 3, 0, 4, 0)
+#define SYS_DBGDTRRX_EL0 sys_reg(2, 3, 0, 5, 0)
+#define SYS_DBGDTRTX_EL0 sys_reg(2, 3, 0, 5, 0)
+#define SYS_DBGVCR32_EL2 sys_reg(2, 4, 0, 7, 0)
+
+#define SYS_BRBINF_EL1(n) sys_reg(2, 1, 8, (n & 15), (((n & 16) >> 2) | 0))
+#define SYS_BRBSRC_EL1(n) sys_reg(2, 1, 8, (n & 15), (((n & 16) >> 2) | 1))
+#define SYS_BRBTGT_EL1(n) sys_reg(2, 1, 8, (n & 15), (((n & 16) >> 2) | 2))
+
+#define SYS_TRCITECR_EL1 sys_reg(3, 0, 1, 2, 3)
+#define SYS_TRCACATR(m) sys_reg(2, 1, 2, ((m & 7) << 1), (2 | (m >> 3)))
+#define SYS_TRCACVR(m) sys_reg(2, 1, 2, ((m & 7) << 1), (0 | (m >> 3)))
+#define SYS_TRCAUTHSTATUS sys_reg(2, 1, 7, 14, 6)
+#define SYS_TRCAUXCTLR sys_reg(2, 1, 0, 6, 0)
+#define SYS_TRCBBCTLR sys_reg(2, 1, 0, 15, 0)
+#define SYS_TRCCCCTLR sys_reg(2, 1, 0, 14, 0)
+#define SYS_TRCCIDCCTLR0 sys_reg(2, 1, 3, 0, 2)
+#define SYS_TRCCIDCCTLR1 sys_reg(2, 1, 3, 1, 2)
+#define SYS_TRCCIDCVR(m) sys_reg(2, 1, 3, ((m & 7) << 1), 0)
+#define SYS_TRCCLAIMCLR sys_reg(2, 1, 7, 9, 6)
+#define SYS_TRCCLAIMSET sys_reg(2, 1, 7, 8, 6)
+#define SYS_TRCCNTCTLR(m) sys_reg(2, 1, 0, (4 | (m & 3)), 5)
+#define SYS_TRCCNTRLDVR(m) sys_reg(2, 1, 0, (0 | (m & 3)), 5)
+#define SYS_TRCCNTVR(m) sys_reg(2, 1, 0, (8 | (m & 3)), 5)
+#define SYS_TRCCONFIGR sys_reg(2, 1, 0, 4, 0)
+#define SYS_TRCDEVARCH sys_reg(2, 1, 7, 15, 6)
+#define SYS_TRCDEVID sys_reg(2, 1, 7, 2, 7)
+#define SYS_TRCEVENTCTL0R sys_reg(2, 1, 0, 8, 0)
+#define SYS_TRCEVENTCTL1R sys_reg(2, 1, 0, 9, 0)
+#define SYS_TRCEXTINSELR(m) sys_reg(2, 1, 0, (8 | (m & 3)), 4)
+#define SYS_TRCIDR0 sys_reg(2, 1, 0, 8, 7)
+#define SYS_TRCIDR10 sys_reg(2, 1, 0, 2, 6)
+#define SYS_TRCIDR11 sys_reg(2, 1, 0, 3, 6)
+#define SYS_TRCIDR12 sys_reg(2, 1, 0, 4, 6)
+#define SYS_TRCIDR13 sys_reg(2, 1, 0, 5, 6)
+#define SYS_TRCIDR1 sys_reg(2, 1, 0, 9, 7)
+#define SYS_TRCIDR2 sys_reg(2, 1, 0, 10, 7)
+#define SYS_TRCIDR3 sys_reg(2, 1, 0, 11, 7)
+#define SYS_TRCIDR4 sys_reg(2, 1, 0, 12, 7)
+#define SYS_TRCIDR5 sys_reg(2, 1, 0, 13, 7)
+#define SYS_TRCIDR6 sys_reg(2, 1, 0, 14, 7)
+#define SYS_TRCIDR7 sys_reg(2, 1, 0, 15, 7)
+#define SYS_TRCIDR8 sys_reg(2, 1, 0, 0, 6)
+#define SYS_TRCIDR9 sys_reg(2, 1, 0, 1, 6)
+#define SYS_TRCIMSPEC(m) sys_reg(2, 1, 0, (m & 7), 7)
+#define SYS_TRCITEEDCR sys_reg(2, 1, 0, 2, 1)
+#define SYS_TRCOSLSR sys_reg(2, 1, 1, 1, 4)
+#define SYS_TRCPRGCTLR sys_reg(2, 1, 0, 1, 0)
+#define SYS_TRCQCTLR sys_reg(2, 1, 0, 1, 1)
+#define SYS_TRCRSCTLR(m) sys_reg(2, 1, 1, (m & 15), (0 | (m >> 4)))
+#define SYS_TRCRSR sys_reg(2, 1, 0, 10, 0)
+#define SYS_TRCSEQEVR(m) sys_reg(2, 1, 0, (m & 3), 4)
+#define SYS_TRCSEQRSTEVR sys_reg(2, 1, 0, 6, 4)
+#define SYS_TRCSEQSTR sys_reg(2, 1, 0, 7, 4)
+#define SYS_TRCSSCCR(m) sys_reg(2, 1, 1, (m & 7), 2)
+#define SYS_TRCSSCSR(m) sys_reg(2, 1, 1, (8 | (m & 7)), 2)
+#define SYS_TRCSSPCICR(m) sys_reg(2, 1, 1, (m & 7), 3)
+#define SYS_TRCSTALLCTLR sys_reg(2, 1, 0, 11, 0)
+#define SYS_TRCSTATR sys_reg(2, 1, 0, 3, 0)
+#define SYS_TRCSYNCPR sys_reg(2, 1, 0, 13, 0)
+#define SYS_TRCTRACEIDR sys_reg(2, 1, 0, 0, 1)
+#define SYS_TRCTSCTLR sys_reg(2, 1, 0, 12, 0)
+#define SYS_TRCVICTLR sys_reg(2, 1, 0, 0, 2)
+#define SYS_TRCVIIECTLR sys_reg(2, 1, 0, 1, 2)
+#define SYS_TRCVIPCSSCTLR sys_reg(2, 1, 0, 3, 2)
+#define SYS_TRCVISSCTLR sys_reg(2, 1, 0, 2, 2)
+#define SYS_TRCVMIDCCTLR0 sys_reg(2, 1, 3, 2, 2)
+#define SYS_TRCVMIDCCTLR1 sys_reg(2, 1, 3, 3, 2)
+#define SYS_TRCVMIDCVR(m) sys_reg(2, 1, 3, ((m & 7) << 1), 1)
+
+/* ETM */
+#define SYS_TRCOSLAR sys_reg(2, 1, 1, 0, 4)
+
+#define SYS_MIDR_EL1 sys_reg(3, 0, 0, 0, 0)
+#define SYS_MPIDR_EL1 sys_reg(3, 0, 0, 0, 5)
+#define SYS_REVIDR_EL1 sys_reg(3, 0, 0, 0, 6)
+
+#define SYS_ACTLR_EL1 sys_reg(3, 0, 1, 0, 1)
+#define SYS_RGSR_EL1 sys_reg(3, 0, 1, 0, 5)
+#define SYS_GCR_EL1 sys_reg(3, 0, 1, 0, 6)
+
+#define SYS_APIAKEYLO_EL1 sys_reg(3, 0, 2, 1, 0)
+#define SYS_APIAKEYHI_EL1 sys_reg(3, 0, 2, 1, 1)
+#define SYS_APIBKEYLO_EL1 sys_reg(3, 0, 2, 1, 2)
+#define SYS_APIBKEYHI_EL1 sys_reg(3, 0, 2, 1, 3)
+
+#define SYS_APDAKEYLO_EL1 sys_reg(3, 0, 2, 2, 0)
+#define SYS_APDAKEYHI_EL1 sys_reg(3, 0, 2, 2, 1)
+#define SYS_APDBKEYLO_EL1 sys_reg(3, 0, 2, 2, 2)
+#define SYS_APDBKEYHI_EL1 sys_reg(3, 0, 2, 2, 3)
+
+#define SYS_APGAKEYLO_EL1 sys_reg(3, 0, 2, 3, 0)
+#define SYS_APGAKEYHI_EL1 sys_reg(3, 0, 2, 3, 1)
+
+#define SYS_SPSR_EL1 sys_reg(3, 0, 4, 0, 0)
+#define SYS_ELR_EL1 sys_reg(3, 0, 4, 0, 1)
+
+#define SYS_ICC_PMR_EL1 sys_reg(3, 0, 4, 6, 0)
+
+#define SYS_AFSR0_EL1 sys_reg(3, 0, 5, 1, 0)
+#define SYS_AFSR1_EL1 sys_reg(3, 0, 5, 1, 1)
+#define SYS_ESR_EL1 sys_reg(3, 0, 5, 2, 0)
+
+#define SYS_ERRIDR_EL1 sys_reg(3, 0, 5, 3, 0)
+#define SYS_ERRSELR_EL1 sys_reg(3, 0, 5, 3, 1)
+#define SYS_ERXFR_EL1 sys_reg(3, 0, 5, 4, 0)
+#define SYS_ERXCTLR_EL1 sys_reg(3, 0, 5, 4, 1)
+#define SYS_ERXSTATUS_EL1 sys_reg(3, 0, 5, 4, 2)
+#define SYS_ERXADDR_EL1 sys_reg(3, 0, 5, 4, 3)
+#define SYS_ERXPFGF_EL1 sys_reg(3, 0, 5, 4, 4)
+#define SYS_ERXPFGCTL_EL1 sys_reg(3, 0, 5, 4, 5)
+#define SYS_ERXPFGCDN_EL1 sys_reg(3, 0, 5, 4, 6)
+#define SYS_ERXMISC0_EL1 sys_reg(3, 0, 5, 5, 0)
+#define SYS_ERXMISC1_EL1 sys_reg(3, 0, 5, 5, 1)
+#define SYS_ERXMISC2_EL1 sys_reg(3, 0, 5, 5, 2)
+#define SYS_ERXMISC3_EL1 sys_reg(3, 0, 5, 5, 3)
+#define SYS_TFSR_EL1 sys_reg(3, 0, 5, 6, 0)
+#define SYS_TFSRE0_EL1 sys_reg(3, 0, 5, 6, 1)
+
+#define SYS_PAR_EL1 sys_reg(3, 0, 7, 4, 0)
+
+#define SYS_PAR_EL1_F BIT(0)
+/* When PAR_EL1.F == 1 */
+#define SYS_PAR_EL1_FST GENMASK(6, 1)
+#define SYS_PAR_EL1_PTW BIT(8)
+#define SYS_PAR_EL1_S BIT(9)
+#define SYS_PAR_EL1_AssuredOnly BIT(12)
+#define SYS_PAR_EL1_TopLevel BIT(13)
+#define SYS_PAR_EL1_Overlay BIT(14)
+#define SYS_PAR_EL1_DirtyBit BIT(15)
+#define SYS_PAR_EL1_F1_IMPDEF GENMASK_ULL(63, 48)
+#define SYS_PAR_EL1_F1_RES0 (BIT(7) | BIT(10) | GENMASK_ULL(47, 16))
+#define SYS_PAR_EL1_RES1 BIT(11)
+/* When PAR_EL1.F == 0 */
+#define SYS_PAR_EL1_SH GENMASK_ULL(8, 7)
+#define SYS_PAR_EL1_NS BIT(9)
+#define SYS_PAR_EL1_F0_IMPDEF BIT(10)
+#define SYS_PAR_EL1_NSE BIT(11)
+#define SYS_PAR_EL1_PA GENMASK_ULL(51, 12)
+#define SYS_PAR_EL1_ATTR GENMASK_ULL(63, 56)
+#define SYS_PAR_EL1_F0_RES0 (GENMASK_ULL(6, 1) | GENMASK_ULL(55, 52))
+
+/* Buffer error reporting */
+#define PMBSR_EL1_FAULT_FSC_SHIFT PMBSR_EL1_MSS_SHIFT
+#define PMBSR_EL1_FAULT_FSC_MASK PMBSR_EL1_MSS_MASK
+
+#define PMBSR_EL1_BUF_BSC_SHIFT PMBSR_EL1_MSS_SHIFT
+#define PMBSR_EL1_BUF_BSC_MASK PMBSR_EL1_MSS_MASK
+
+#define PMBSR_EL1_BUF_BSC_FULL 0x1UL
+
+/*** End of Statistical Profiling Extension ***/
+
+#define TRBSR_EL1_BSC_MASK GENMASK(5, 0)
+#define TRBSR_EL1_BSC_SHIFT 0
+
+#define SYS_PMINTENSET_EL1 sys_reg(3, 0, 9, 14, 1)
+#define SYS_PMINTENCLR_EL1 sys_reg(3, 0, 9, 14, 2)
+
+#define SYS_PMMIR_EL1 sys_reg(3, 0, 9, 14, 6)
+
+#define SYS_MAIR_EL1 sys_reg(3, 0, 10, 2, 0)
+#define SYS_AMAIR_EL1 sys_reg(3, 0, 10, 3, 0)
+
+#define SYS_VBAR_EL1 sys_reg(3, 0, 12, 0, 0)
+#define SYS_DISR_EL1 sys_reg(3, 0, 12, 1, 1)
+
+#define SYS_ICC_IAR0_EL1 sys_reg(3, 0, 12, 8, 0)
+#define SYS_ICC_EOIR0_EL1 sys_reg(3, 0, 12, 8, 1)
+#define SYS_ICC_HPPIR0_EL1 sys_reg(3, 0, 12, 8, 2)
+#define SYS_ICC_BPR0_EL1 sys_reg(3, 0, 12, 8, 3)
+#define SYS_ICC_AP0Rn_EL1(n) sys_reg(3, 0, 12, 8, 4 | n)
+#define SYS_ICC_AP0R0_EL1 SYS_ICC_AP0Rn_EL1(0)
+#define SYS_ICC_AP0R1_EL1 SYS_ICC_AP0Rn_EL1(1)
+#define SYS_ICC_AP0R2_EL1 SYS_ICC_AP0Rn_EL1(2)
+#define SYS_ICC_AP0R3_EL1 SYS_ICC_AP0Rn_EL1(3)
+#define SYS_ICC_AP1Rn_EL1(n) sys_reg(3, 0, 12, 9, n)
+#define SYS_ICC_AP1R0_EL1 SYS_ICC_AP1Rn_EL1(0)
+#define SYS_ICC_AP1R1_EL1 SYS_ICC_AP1Rn_EL1(1)
+#define SYS_ICC_AP1R2_EL1 SYS_ICC_AP1Rn_EL1(2)
+#define SYS_ICC_AP1R3_EL1 SYS_ICC_AP1Rn_EL1(3)
+#define SYS_ICC_DIR_EL1 sys_reg(3, 0, 12, 11, 1)
+#define SYS_ICC_RPR_EL1 sys_reg(3, 0, 12, 11, 3)
+#define SYS_ICC_SGI1R_EL1 sys_reg(3, 0, 12, 11, 5)
+#define SYS_ICC_ASGI1R_EL1 sys_reg(3, 0, 12, 11, 6)
+#define SYS_ICC_SGI0R_EL1 sys_reg(3, 0, 12, 11, 7)
+#define SYS_ICC_IAR1_EL1 sys_reg(3, 0, 12, 12, 0)
+#define SYS_ICC_EOIR1_EL1 sys_reg(3, 0, 12, 12, 1)
+#define SYS_ICC_HPPIR1_EL1 sys_reg(3, 0, 12, 12, 2)
+#define SYS_ICC_BPR1_EL1 sys_reg(3, 0, 12, 12, 3)
+#define SYS_ICC_CTLR_EL1 sys_reg(3, 0, 12, 12, 4)
+#define SYS_ICC_SRE_EL1 sys_reg(3, 0, 12, 12, 5)
+#define SYS_ICC_IGRPEN0_EL1 sys_reg(3, 0, 12, 12, 6)
+#define SYS_ICC_IGRPEN1_EL1 sys_reg(3, 0, 12, 12, 7)
+
+#define SYS_ACCDATA_EL1 sys_reg(3, 0, 13, 0, 5)
+
+#define SYS_CNTKCTL_EL1 sys_reg(3, 0, 14, 1, 0)
+
+#define SYS_AIDR_EL1 sys_reg(3, 1, 0, 0, 7)
+
+#define SYS_RNDR_EL0 sys_reg(3, 3, 2, 4, 0)
+#define SYS_RNDRRS_EL0 sys_reg(3, 3, 2, 4, 1)
+
+#define SYS_PMCR_EL0 sys_reg(3, 3, 9, 12, 0)
+#define SYS_PMCNTENSET_EL0 sys_reg(3, 3, 9, 12, 1)
+#define SYS_PMCNTENCLR_EL0 sys_reg(3, 3, 9, 12, 2)
+#define SYS_PMOVSCLR_EL0 sys_reg(3, 3, 9, 12, 3)
+#define SYS_PMSWINC_EL0 sys_reg(3, 3, 9, 12, 4)
+#define SYS_PMCEID0_EL0 sys_reg(3, 3, 9, 12, 6)
+#define SYS_PMCEID1_EL0 sys_reg(3, 3, 9, 12, 7)
+#define SYS_PMCCNTR_EL0 sys_reg(3, 3, 9, 13, 0)
+#define SYS_PMXEVTYPER_EL0 sys_reg(3, 3, 9, 13, 1)
+#define SYS_PMXEVCNTR_EL0 sys_reg(3, 3, 9, 13, 2)
+#define SYS_PMUSERENR_EL0 sys_reg(3, 3, 9, 14, 0)
+#define SYS_PMOVSSET_EL0 sys_reg(3, 3, 9, 14, 3)
+
+#define SYS_TPIDR_EL0 sys_reg(3, 3, 13, 0, 2)
+#define SYS_TPIDRRO_EL0 sys_reg(3, 3, 13, 0, 3)
+#define SYS_TPIDR2_EL0 sys_reg(3, 3, 13, 0, 5)
+
+#define SYS_SCXTNUM_EL0 sys_reg(3, 3, 13, 0, 7)
+
+/* Definitions for system register interface to AMU for ARMv8.4 onwards */
+#define SYS_AM_EL0(crm, op2) sys_reg(3, 3, 13, (crm), (op2))
+#define SYS_AMCR_EL0 SYS_AM_EL0(2, 0)
+#define SYS_AMCFGR_EL0 SYS_AM_EL0(2, 1)
+#define SYS_AMCGCR_EL0 SYS_AM_EL0(2, 2)
+#define SYS_AMUSERENR_EL0 SYS_AM_EL0(2, 3)
+#define SYS_AMCNTENCLR0_EL0 SYS_AM_EL0(2, 4)
+#define SYS_AMCNTENSET0_EL0 SYS_AM_EL0(2, 5)
+#define SYS_AMCNTENCLR1_EL0 SYS_AM_EL0(3, 0)
+#define SYS_AMCNTENSET1_EL0 SYS_AM_EL0(3, 1)
+
+/*
+ * Group 0 of activity monitors (architected):
+ * op0 op1 CRn CRm op2
+ * Counter: 11 011 1101 010:n<3> n<2:0>
+ * Type: 11 011 1101 011:n<3> n<2:0>
+ * n: 0-15
+ *
+ * Group 1 of activity monitors (auxiliary):
+ * op0 op1 CRn CRm op2
+ * Counter: 11 011 1101 110:n<3> n<2:0>
+ * Type: 11 011 1101 111:n<3> n<2:0>
+ * n: 0-15
+ */
+
+#define SYS_AMEVCNTR0_EL0(n) SYS_AM_EL0(4 + ((n) >> 3), (n) & 7)
+#define SYS_AMEVTYPER0_EL0(n) SYS_AM_EL0(6 + ((n) >> 3), (n) & 7)
+#define SYS_AMEVCNTR1_EL0(n) SYS_AM_EL0(12 + ((n) >> 3), (n) & 7)
+#define SYS_AMEVTYPER1_EL0(n) SYS_AM_EL0(14 + ((n) >> 3), (n) & 7)
+
+/* AMU v1: Fixed (architecturally defined) activity monitors */
+#define SYS_AMEVCNTR0_CORE_EL0 SYS_AMEVCNTR0_EL0(0)
+#define SYS_AMEVCNTR0_CONST_EL0 SYS_AMEVCNTR0_EL0(1)
+#define SYS_AMEVCNTR0_INST_RET_EL0 SYS_AMEVCNTR0_EL0(2)
+#define SYS_AMEVCNTR0_MEM_STALL SYS_AMEVCNTR0_EL0(3)
+
+#define SYS_CNTFRQ_EL0 sys_reg(3, 3, 14, 0, 0)
+
+#define SYS_CNTPCT_EL0 sys_reg(3, 3, 14, 0, 1)
+#define SYS_CNTVCT_EL0 sys_reg(3, 3, 14, 0, 2)
+#define SYS_CNTPCTSS_EL0 sys_reg(3, 3, 14, 0, 5)
+#define SYS_CNTVCTSS_EL0 sys_reg(3, 3, 14, 0, 6)
+
+#define SYS_CNTP_TVAL_EL0 sys_reg(3, 3, 14, 2, 0)
+#define SYS_CNTP_CTL_EL0 sys_reg(3, 3, 14, 2, 1)
+#define SYS_CNTP_CVAL_EL0 sys_reg(3, 3, 14, 2, 2)
+
+#define SYS_CNTV_TVAL_EL0 sys_reg(3, 3, 14, 3, 0)
+#define SYS_CNTV_CTL_EL0 sys_reg(3, 3, 14, 3, 1)
+#define SYS_CNTV_CVAL_EL0 sys_reg(3, 3, 14, 3, 2)
+
+#define SYS_AARCH32_CNTP_TVAL sys_reg(0, 0, 14, 2, 0)
+#define SYS_AARCH32_CNTP_CTL sys_reg(0, 0, 14, 2, 1)
+#define SYS_AARCH32_CNTPCT sys_reg(0, 0, 0, 14, 0)
+#define SYS_AARCH32_CNTVCT sys_reg(0, 1, 0, 14, 0)
+#define SYS_AARCH32_CNTP_CVAL sys_reg(0, 2, 0, 14, 0)
+#define SYS_AARCH32_CNTPCTSS sys_reg(0, 8, 0, 14, 0)
+#define SYS_AARCH32_CNTVCTSS sys_reg(0, 9, 0, 14, 0)
+
+#define __PMEV_op2(n) ((n) & 0x7)
+#define __CNTR_CRm(n) (0x8 | (((n) >> 3) & 0x3))
+#define SYS_PMEVCNTSVRn_EL1(n) sys_reg(2, 0, 14, __CNTR_CRm(n), __PMEV_op2(n))
+#define SYS_PMEVCNTRn_EL0(n) sys_reg(3, 3, 14, __CNTR_CRm(n), __PMEV_op2(n))
+#define __TYPER_CRm(n) (0xc | (((n) >> 3) & 0x3))
+#define SYS_PMEVTYPERn_EL0(n) sys_reg(3, 3, 14, __TYPER_CRm(n), __PMEV_op2(n))
+
+#define SYS_PMCCFILTR_EL0 sys_reg(3, 3, 14, 15, 7)
+
+#define SYS_SPMCGCRn_EL1(n) sys_reg(2, 0, 9, 13, ((n) & 1))
+
+#define __SPMEV_op2(n) ((n) & 0x7)
+#define __SPMEV_crm(p, n) ((((p) & 7) << 1) | (((n) >> 3) & 1))
+#define SYS_SPMEVCNTRn_EL0(n) sys_reg(2, 3, 14, __SPMEV_crm(0b000, n), __SPMEV_op2(n))
+#define SYS_SPMEVFILT2Rn_EL0(n) sys_reg(2, 3, 14, __SPMEV_crm(0b011, n), __SPMEV_op2(n))
+#define SYS_SPMEVFILTRn_EL0(n) sys_reg(2, 3, 14, __SPMEV_crm(0b010, n), __SPMEV_op2(n))
+#define SYS_SPMEVTYPERn_EL0(n) sys_reg(2, 3, 14, __SPMEV_crm(0b001, n), __SPMEV_op2(n))
+
+#define SYS_VPIDR_EL2 sys_reg(3, 4, 0, 0, 0)
+#define SYS_VMPIDR_EL2 sys_reg(3, 4, 0, 0, 5)
+
+#define SYS_SCTLR_EL2 sys_reg(3, 4, 1, 0, 0)
+#define SYS_ACTLR_EL2 sys_reg(3, 4, 1, 0, 1)
+#define SYS_SCTLR2_EL2 sys_reg(3, 4, 1, 0, 3)
+#define SYS_HCR_EL2 sys_reg(3, 4, 1, 1, 0)
+#define SYS_MDCR_EL2 sys_reg(3, 4, 1, 1, 1)
+#define SYS_CPTR_EL2 sys_reg(3, 4, 1, 1, 2)
+#define SYS_HSTR_EL2 sys_reg(3, 4, 1, 1, 3)
+#define SYS_HACR_EL2 sys_reg(3, 4, 1, 1, 7)
+
+#define SYS_TTBR0_EL2 sys_reg(3, 4, 2, 0, 0)
+#define SYS_TTBR1_EL2 sys_reg(3, 4, 2, 0, 1)
+#define SYS_TCR_EL2 sys_reg(3, 4, 2, 0, 2)
+#define SYS_VTTBR_EL2 sys_reg(3, 4, 2, 1, 0)
+#define SYS_VTCR_EL2 sys_reg(3, 4, 2, 1, 2)
+
+#define SYS_HAFGRTR_EL2 sys_reg(3, 4, 3, 1, 6)
+#define SYS_SPSR_EL2 sys_reg(3, 4, 4, 0, 0)
+#define SYS_ELR_EL2 sys_reg(3, 4, 4, 0, 1)
+#define SYS_SP_EL1 sys_reg(3, 4, 4, 1, 0)
+#define SYS_SPSR_irq sys_reg(3, 4, 4, 3, 0)
+#define SYS_SPSR_abt sys_reg(3, 4, 4, 3, 1)
+#define SYS_SPSR_und sys_reg(3, 4, 4, 3, 2)
+#define SYS_SPSR_fiq sys_reg(3, 4, 4, 3, 3)
+#define SYS_IFSR32_EL2 sys_reg(3, 4, 5, 0, 1)
+#define SYS_AFSR0_EL2 sys_reg(3, 4, 5, 1, 0)
+#define SYS_AFSR1_EL2 sys_reg(3, 4, 5, 1, 1)
+#define SYS_ESR_EL2 sys_reg(3, 4, 5, 2, 0)
+#define SYS_VSESR_EL2 sys_reg(3, 4, 5, 2, 3)
+#define SYS_FPEXC32_EL2 sys_reg(3, 4, 5, 3, 0)
+#define SYS_TFSR_EL2 sys_reg(3, 4, 5, 6, 0)
+
+#define SYS_FAR_EL2 sys_reg(3, 4, 6, 0, 0)
+#define SYS_HPFAR_EL2 sys_reg(3, 4, 6, 0, 4)
+
+#define SYS_MAIR_EL2 sys_reg(3, 4, 10, 2, 0)
+#define SYS_AMAIR_EL2 sys_reg(3, 4, 10, 3, 0)
+
+#define SYS_VBAR_EL2 sys_reg(3, 4, 12, 0, 0)
+#define SYS_RVBAR_EL2 sys_reg(3, 4, 12, 0, 1)
+#define SYS_RMR_EL2 sys_reg(3, 4, 12, 0, 2)
+#define SYS_VDISR_EL2 sys_reg(3, 4, 12, 1, 1)
+#define __SYS__AP0Rx_EL2(x) sys_reg(3, 4, 12, 8, x)
+#define SYS_ICH_AP0R0_EL2 __SYS__AP0Rx_EL2(0)
+#define SYS_ICH_AP0R1_EL2 __SYS__AP0Rx_EL2(1)
+#define SYS_ICH_AP0R2_EL2 __SYS__AP0Rx_EL2(2)
+#define SYS_ICH_AP0R3_EL2 __SYS__AP0Rx_EL2(3)
+
+#define __SYS__AP1Rx_EL2(x) sys_reg(3, 4, 12, 9, x)
+#define SYS_ICH_AP1R0_EL2 __SYS__AP1Rx_EL2(0)
+#define SYS_ICH_AP1R1_EL2 __SYS__AP1Rx_EL2(1)
+#define SYS_ICH_AP1R2_EL2 __SYS__AP1Rx_EL2(2)
+#define SYS_ICH_AP1R3_EL2 __SYS__AP1Rx_EL2(3)
+
+#define SYS_ICH_VSEIR_EL2 sys_reg(3, 4, 12, 9, 4)
+#define SYS_ICC_SRE_EL2 sys_reg(3, 4, 12, 9, 5)
+#define SYS_ICH_EISR_EL2 sys_reg(3, 4, 12, 11, 3)
+#define SYS_ICH_ELRSR_EL2 sys_reg(3, 4, 12, 11, 5)
+#define SYS_ICH_VMCR_EL2 sys_reg(3, 4, 12, 11, 7)
+
+#define __SYS__LR0_EL2(x) sys_reg(3, 4, 12, 12, x)
+#define SYS_ICH_LR0_EL2 __SYS__LR0_EL2(0)
+#define SYS_ICH_LR1_EL2 __SYS__LR0_EL2(1)
+#define SYS_ICH_LR2_EL2 __SYS__LR0_EL2(2)
+#define SYS_ICH_LR3_EL2 __SYS__LR0_EL2(3)
+#define SYS_ICH_LR4_EL2 __SYS__LR0_EL2(4)
+#define SYS_ICH_LR5_EL2 __SYS__LR0_EL2(5)
+#define SYS_ICH_LR6_EL2 __SYS__LR0_EL2(6)
+#define SYS_ICH_LR7_EL2 __SYS__LR0_EL2(7)
+
+#define __SYS__LR8_EL2(x) sys_reg(3, 4, 12, 13, x)
+#define SYS_ICH_LR8_EL2 __SYS__LR8_EL2(0)
+#define SYS_ICH_LR9_EL2 __SYS__LR8_EL2(1)
+#define SYS_ICH_LR10_EL2 __SYS__LR8_EL2(2)
+#define SYS_ICH_LR11_EL2 __SYS__LR8_EL2(3)
+#define SYS_ICH_LR12_EL2 __SYS__LR8_EL2(4)
+#define SYS_ICH_LR13_EL2 __SYS__LR8_EL2(5)
+#define SYS_ICH_LR14_EL2 __SYS__LR8_EL2(6)
+#define SYS_ICH_LR15_EL2 __SYS__LR8_EL2(7)
+
+#define SYS_CONTEXTIDR_EL2 sys_reg(3, 4, 13, 0, 1)
+#define SYS_TPIDR_EL2 sys_reg(3, 4, 13, 0, 2)
+#define SYS_SCXTNUM_EL2 sys_reg(3, 4, 13, 0, 7)
+
+#define __AMEV_op2(m) (m & 0x7)
+#define __AMEV_CRm(n, m) (n | ((m & 0x8) >> 3))
+#define __SYS__AMEVCNTVOFF0n_EL2(m) sys_reg(3, 4, 13, __AMEV_CRm(0x8, m), __AMEV_op2(m))
+#define SYS_AMEVCNTVOFF0n_EL2(m) __SYS__AMEVCNTVOFF0n_EL2(m)
+#define __SYS__AMEVCNTVOFF1n_EL2(m) sys_reg(3, 4, 13, __AMEV_CRm(0xA, m), __AMEV_op2(m))
+#define SYS_AMEVCNTVOFF1n_EL2(m) __SYS__AMEVCNTVOFF1n_EL2(m)
+
+#define SYS_CNTVOFF_EL2 sys_reg(3, 4, 14, 0, 3)
+#define SYS_CNTHCTL_EL2 sys_reg(3, 4, 14, 1, 0)
+#define SYS_CNTHP_TVAL_EL2 sys_reg(3, 4, 14, 2, 0)
+#define SYS_CNTHP_CTL_EL2 sys_reg(3, 4, 14, 2, 1)
+#define SYS_CNTHP_CVAL_EL2 sys_reg(3, 4, 14, 2, 2)
+#define SYS_CNTHV_TVAL_EL2 sys_reg(3, 4, 14, 3, 0)
+#define SYS_CNTHV_CTL_EL2 sys_reg(3, 4, 14, 3, 1)
+#define SYS_CNTHV_CVAL_EL2 sys_reg(3, 4, 14, 3, 2)
+
+/* VHE encodings for architectural EL0/1 system registers */
+#define SYS_BRBCR_EL12 sys_reg(2, 5, 9, 0, 0)
+#define SYS_TTBR0_EL12 sys_reg(3, 5, 2, 0, 0)
+#define SYS_TTBR1_EL12 sys_reg(3, 5, 2, 0, 1)
+#define SYS_SPSR_EL12 sys_reg(3, 5, 4, 0, 0)
+#define SYS_ELR_EL12 sys_reg(3, 5, 4, 0, 1)
+#define SYS_AFSR0_EL12 sys_reg(3, 5, 5, 1, 0)
+#define SYS_AFSR1_EL12 sys_reg(3, 5, 5, 1, 1)
+#define SYS_ESR_EL12 sys_reg(3, 5, 5, 2, 0)
+#define SYS_TFSR_EL12 sys_reg(3, 5, 5, 6, 0)
+#define SYS_PMSCR_EL12 sys_reg(3, 5, 9, 9, 0)
+#define SYS_MAIR_EL12 sys_reg(3, 5, 10, 2, 0)
+#define SYS_AMAIR_EL12 sys_reg(3, 5, 10, 3, 0)
+#define SYS_VBAR_EL12 sys_reg(3, 5, 12, 0, 0)
+#define SYS_SCXTNUM_EL12 sys_reg(3, 5, 13, 0, 7)
+#define SYS_CNTKCTL_EL12 sys_reg(3, 5, 14, 1, 0)
+#define SYS_CNTP_TVAL_EL02 sys_reg(3, 5, 14, 2, 0)
+#define SYS_CNTP_CTL_EL02 sys_reg(3, 5, 14, 2, 1)
+#define SYS_CNTP_CVAL_EL02 sys_reg(3, 5, 14, 2, 2)
+#define SYS_CNTV_TVAL_EL02 sys_reg(3, 5, 14, 3, 0)
+#define SYS_CNTV_CTL_EL02 sys_reg(3, 5, 14, 3, 1)
+#define SYS_CNTV_CVAL_EL02 sys_reg(3, 5, 14, 3, 2)
+
+#define SYS_SP_EL2 sys_reg(3, 6, 4, 1, 0)
+
+/* AT instructions */
+#define AT_Op0 1
+#define AT_CRn 7
+
+#define OP_AT_S1E1R sys_insn(AT_Op0, 0, AT_CRn, 8, 0)
+#define OP_AT_S1E1W sys_insn(AT_Op0, 0, AT_CRn, 8, 1)
+#define OP_AT_S1E0R sys_insn(AT_Op0, 0, AT_CRn, 8, 2)
+#define OP_AT_S1E0W sys_insn(AT_Op0, 0, AT_CRn, 8, 3)
+#define OP_AT_S1E1RP sys_insn(AT_Op0, 0, AT_CRn, 9, 0)
+#define OP_AT_S1E1WP sys_insn(AT_Op0, 0, AT_CRn, 9, 1)
+#define OP_AT_S1E1A sys_insn(AT_Op0, 0, AT_CRn, 9, 2)
+#define OP_AT_S1E2R sys_insn(AT_Op0, 4, AT_CRn, 8, 0)
+#define OP_AT_S1E2W sys_insn(AT_Op0, 4, AT_CRn, 8, 1)
+#define OP_AT_S12E1R sys_insn(AT_Op0, 4, AT_CRn, 8, 4)
+#define OP_AT_S12E1W sys_insn(AT_Op0, 4, AT_CRn, 8, 5)
+#define OP_AT_S12E0R sys_insn(AT_Op0, 4, AT_CRn, 8, 6)
+#define OP_AT_S12E0W sys_insn(AT_Op0, 4, AT_CRn, 8, 7)
+#define OP_AT_S1E2A sys_insn(AT_Op0, 4, AT_CRn, 9, 2)
+
+/* TLBI instructions */
+#define TLBI_Op0 1
+
+#define TLBI_Op1_EL1 0 /* Accessible from EL1 or higher */
+#define TLBI_Op1_EL2 4 /* Accessible from EL2 or higher */
+
+#define TLBI_CRn_XS 8 /* Extra Slow (the common one) */
+#define TLBI_CRn_nXS 9 /* not Extra Slow (which nobody uses)*/
+
+#define TLBI_CRm_IPAIS 0 /* S2 Inner-Shareable */
+#define TLBI_CRm_nROS 1 /* non-Range, Outer-Sharable */
+#define TLBI_CRm_RIS 2 /* Range, Inner-Sharable */
+#define TLBI_CRm_nRIS 3 /* non-Range, Inner-Sharable */
+#define TLBI_CRm_IPAONS 4 /* S2 Outer and Non-Shareable */
+#define TLBI_CRm_ROS 5 /* Range, Outer-Sharable */
+#define TLBI_CRm_RNS 6 /* Range, Non-Sharable */
+#define TLBI_CRm_nRNS 7 /* non-Range, Non-Sharable */
+
+#define OP_TLBI_VMALLE1OS sys_insn(1, 0, 8, 1, 0)
+#define OP_TLBI_VAE1OS sys_insn(1, 0, 8, 1, 1)
+#define OP_TLBI_ASIDE1OS sys_insn(1, 0, 8, 1, 2)
+#define OP_TLBI_VAAE1OS sys_insn(1, 0, 8, 1, 3)
+#define OP_TLBI_VALE1OS sys_insn(1, 0, 8, 1, 5)
+#define OP_TLBI_VAALE1OS sys_insn(1, 0, 8, 1, 7)
+#define OP_TLBI_RVAE1IS sys_insn(1, 0, 8, 2, 1)
+#define OP_TLBI_RVAAE1IS sys_insn(1, 0, 8, 2, 3)
+#define OP_TLBI_RVALE1IS sys_insn(1, 0, 8, 2, 5)
+#define OP_TLBI_RVAALE1IS sys_insn(1, 0, 8, 2, 7)
+#define OP_TLBI_VMALLE1IS sys_insn(1, 0, 8, 3, 0)
+#define OP_TLBI_VAE1IS sys_insn(1, 0, 8, 3, 1)
+#define OP_TLBI_ASIDE1IS sys_insn(1, 0, 8, 3, 2)
+#define OP_TLBI_VAAE1IS sys_insn(1, 0, 8, 3, 3)
+#define OP_TLBI_VALE1IS sys_insn(1, 0, 8, 3, 5)
+#define OP_TLBI_VAALE1IS sys_insn(1, 0, 8, 3, 7)
+#define OP_TLBI_RVAE1OS sys_insn(1, 0, 8, 5, 1)
+#define OP_TLBI_RVAAE1OS sys_insn(1, 0, 8, 5, 3)
+#define OP_TLBI_RVALE1OS sys_insn(1, 0, 8, 5, 5)
+#define OP_TLBI_RVAALE1OS sys_insn(1, 0, 8, 5, 7)
+#define OP_TLBI_RVAE1 sys_insn(1, 0, 8, 6, 1)
+#define OP_TLBI_RVAAE1 sys_insn(1, 0, 8, 6, 3)
+#define OP_TLBI_RVALE1 sys_insn(1, 0, 8, 6, 5)
+#define OP_TLBI_RVAALE1 sys_insn(1, 0, 8, 6, 7)
+#define OP_TLBI_VMALLE1 sys_insn(1, 0, 8, 7, 0)
+#define OP_TLBI_VAE1 sys_insn(1, 0, 8, 7, 1)
+#define OP_TLBI_ASIDE1 sys_insn(1, 0, 8, 7, 2)
+#define OP_TLBI_VAAE1 sys_insn(1, 0, 8, 7, 3)
+#define OP_TLBI_VALE1 sys_insn(1, 0, 8, 7, 5)
+#define OP_TLBI_VAALE1 sys_insn(1, 0, 8, 7, 7)
+#define OP_TLBI_VMALLE1OSNXS sys_insn(1, 0, 9, 1, 0)
+#define OP_TLBI_VAE1OSNXS sys_insn(1, 0, 9, 1, 1)
+#define OP_TLBI_ASIDE1OSNXS sys_insn(1, 0, 9, 1, 2)
+#define OP_TLBI_VAAE1OSNXS sys_insn(1, 0, 9, 1, 3)
+#define OP_TLBI_VALE1OSNXS sys_insn(1, 0, 9, 1, 5)
+#define OP_TLBI_VAALE1OSNXS sys_insn(1, 0, 9, 1, 7)
+#define OP_TLBI_RVAE1ISNXS sys_insn(1, 0, 9, 2, 1)
+#define OP_TLBI_RVAAE1ISNXS sys_insn(1, 0, 9, 2, 3)
+#define OP_TLBI_RVALE1ISNXS sys_insn(1, 0, 9, 2, 5)
+#define OP_TLBI_RVAALE1ISNXS sys_insn(1, 0, 9, 2, 7)
+#define OP_TLBI_VMALLE1ISNXS sys_insn(1, 0, 9, 3, 0)
+#define OP_TLBI_VAE1ISNXS sys_insn(1, 0, 9, 3, 1)
+#define OP_TLBI_ASIDE1ISNXS sys_insn(1, 0, 9, 3, 2)
+#define OP_TLBI_VAAE1ISNXS sys_insn(1, 0, 9, 3, 3)
+#define OP_TLBI_VALE1ISNXS sys_insn(1, 0, 9, 3, 5)
+#define OP_TLBI_VAALE1ISNXS sys_insn(1, 0, 9, 3, 7)
+#define OP_TLBI_RVAE1OSNXS sys_insn(1, 0, 9, 5, 1)
+#define OP_TLBI_RVAAE1OSNXS sys_insn(1, 0, 9, 5, 3)
+#define OP_TLBI_RVALE1OSNXS sys_insn(1, 0, 9, 5, 5)
+#define OP_TLBI_RVAALE1OSNXS sys_insn(1, 0, 9, 5, 7)
+#define OP_TLBI_RVAE1NXS sys_insn(1, 0, 9, 6, 1)
+#define OP_TLBI_RVAAE1NXS sys_insn(1, 0, 9, 6, 3)
+#define OP_TLBI_RVALE1NXS sys_insn(1, 0, 9, 6, 5)
+#define OP_TLBI_RVAALE1NXS sys_insn(1, 0, 9, 6, 7)
+#define OP_TLBI_VMALLE1NXS sys_insn(1, 0, 9, 7, 0)
+#define OP_TLBI_VAE1NXS sys_insn(1, 0, 9, 7, 1)
+#define OP_TLBI_ASIDE1NXS sys_insn(1, 0, 9, 7, 2)
+#define OP_TLBI_VAAE1NXS sys_insn(1, 0, 9, 7, 3)
+#define OP_TLBI_VALE1NXS sys_insn(1, 0, 9, 7, 5)
+#define OP_TLBI_VAALE1NXS sys_insn(1, 0, 9, 7, 7)
+#define OP_TLBI_IPAS2E1IS sys_insn(1, 4, 8, 0, 1)
+#define OP_TLBI_RIPAS2E1IS sys_insn(1, 4, 8, 0, 2)
+#define OP_TLBI_IPAS2LE1IS sys_insn(1, 4, 8, 0, 5)
+#define OP_TLBI_RIPAS2LE1IS sys_insn(1, 4, 8, 0, 6)
+#define OP_TLBI_ALLE2OS sys_insn(1, 4, 8, 1, 0)
+#define OP_TLBI_VAE2OS sys_insn(1, 4, 8, 1, 1)
+#define OP_TLBI_ALLE1OS sys_insn(1, 4, 8, 1, 4)
+#define OP_TLBI_VALE2OS sys_insn(1, 4, 8, 1, 5)
+#define OP_TLBI_VMALLS12E1OS sys_insn(1, 4, 8, 1, 6)
+#define OP_TLBI_RVAE2IS sys_insn(1, 4, 8, 2, 1)
+#define OP_TLBI_RVALE2IS sys_insn(1, 4, 8, 2, 5)
+#define OP_TLBI_ALLE2IS sys_insn(1, 4, 8, 3, 0)
+#define OP_TLBI_VAE2IS sys_insn(1, 4, 8, 3, 1)
+#define OP_TLBI_ALLE1IS sys_insn(1, 4, 8, 3, 4)
+#define OP_TLBI_VALE2IS sys_insn(1, 4, 8, 3, 5)
+#define OP_TLBI_VMALLS12E1IS sys_insn(1, 4, 8, 3, 6)
+#define OP_TLBI_IPAS2E1OS sys_insn(1, 4, 8, 4, 0)
+#define OP_TLBI_IPAS2E1 sys_insn(1, 4, 8, 4, 1)
+#define OP_TLBI_RIPAS2E1 sys_insn(1, 4, 8, 4, 2)
+#define OP_TLBI_RIPAS2E1OS sys_insn(1, 4, 8, 4, 3)
+#define OP_TLBI_IPAS2LE1OS sys_insn(1, 4, 8, 4, 4)
+#define OP_TLBI_IPAS2LE1 sys_insn(1, 4, 8, 4, 5)
+#define OP_TLBI_RIPAS2LE1 sys_insn(1, 4, 8, 4, 6)
+#define OP_TLBI_RIPAS2LE1OS sys_insn(1, 4, 8, 4, 7)
+#define OP_TLBI_RVAE2OS sys_insn(1, 4, 8, 5, 1)
+#define OP_TLBI_RVALE2OS sys_insn(1, 4, 8, 5, 5)
+#define OP_TLBI_RVAE2 sys_insn(1, 4, 8, 6, 1)
+#define OP_TLBI_RVALE2 sys_insn(1, 4, 8, 6, 5)
+#define OP_TLBI_ALLE2 sys_insn(1, 4, 8, 7, 0)
+#define OP_TLBI_VAE2 sys_insn(1, 4, 8, 7, 1)
+#define OP_TLBI_ALLE1 sys_insn(1, 4, 8, 7, 4)
+#define OP_TLBI_VALE2 sys_insn(1, 4, 8, 7, 5)
+#define OP_TLBI_VMALLS12E1 sys_insn(1, 4, 8, 7, 6)
+#define OP_TLBI_IPAS2E1ISNXS sys_insn(1, 4, 9, 0, 1)
+#define OP_TLBI_RIPAS2E1ISNXS sys_insn(1, 4, 9, 0, 2)
+#define OP_TLBI_IPAS2LE1ISNXS sys_insn(1, 4, 9, 0, 5)
+#define OP_TLBI_RIPAS2LE1ISNXS sys_insn(1, 4, 9, 0, 6)
+#define OP_TLBI_ALLE2OSNXS sys_insn(1, 4, 9, 1, 0)
+#define OP_TLBI_VAE2OSNXS sys_insn(1, 4, 9, 1, 1)
+#define OP_TLBI_ALLE1OSNXS sys_insn(1, 4, 9, 1, 4)
+#define OP_TLBI_VALE2OSNXS sys_insn(1, 4, 9, 1, 5)
+#define OP_TLBI_VMALLS12E1OSNXS sys_insn(1, 4, 9, 1, 6)
+#define OP_TLBI_RVAE2ISNXS sys_insn(1, 4, 9, 2, 1)
+#define OP_TLBI_RVALE2ISNXS sys_insn(1, 4, 9, 2, 5)
+#define OP_TLBI_ALLE2ISNXS sys_insn(1, 4, 9, 3, 0)
+#define OP_TLBI_VAE2ISNXS sys_insn(1, 4, 9, 3, 1)
+#define OP_TLBI_ALLE1ISNXS sys_insn(1, 4, 9, 3, 4)
+#define OP_TLBI_VALE2ISNXS sys_insn(1, 4, 9, 3, 5)
+#define OP_TLBI_VMALLS12E1ISNXS sys_insn(1, 4, 9, 3, 6)
+#define OP_TLBI_IPAS2E1OSNXS sys_insn(1, 4, 9, 4, 0)
+#define OP_TLBI_IPAS2E1NXS sys_insn(1, 4, 9, 4, 1)
+#define OP_TLBI_RIPAS2E1NXS sys_insn(1, 4, 9, 4, 2)
+#define OP_TLBI_RIPAS2E1OSNXS sys_insn(1, 4, 9, 4, 3)
+#define OP_TLBI_IPAS2LE1OSNXS sys_insn(1, 4, 9, 4, 4)
+#define OP_TLBI_IPAS2LE1NXS sys_insn(1, 4, 9, 4, 5)
+#define OP_TLBI_RIPAS2LE1NXS sys_insn(1, 4, 9, 4, 6)
+#define OP_TLBI_RIPAS2LE1OSNXS sys_insn(1, 4, 9, 4, 7)
+#define OP_TLBI_RVAE2OSNXS sys_insn(1, 4, 9, 5, 1)
+#define OP_TLBI_RVALE2OSNXS sys_insn(1, 4, 9, 5, 5)
+#define OP_TLBI_RVAE2NXS sys_insn(1, 4, 9, 6, 1)
+#define OP_TLBI_RVALE2NXS sys_insn(1, 4, 9, 6, 5)
+#define OP_TLBI_ALLE2NXS sys_insn(1, 4, 9, 7, 0)
+#define OP_TLBI_VAE2NXS sys_insn(1, 4, 9, 7, 1)
+#define OP_TLBI_ALLE1NXS sys_insn(1, 4, 9, 7, 4)
+#define OP_TLBI_VALE2NXS sys_insn(1, 4, 9, 7, 5)
+#define OP_TLBI_VMALLS12E1NXS sys_insn(1, 4, 9, 7, 6)
+
+/* Misc instructions */
+#define OP_GCSPUSHX sys_insn(1, 0, 7, 7, 4)
+#define OP_GCSPOPCX sys_insn(1, 0, 7, 7, 5)
+#define OP_GCSPOPX sys_insn(1, 0, 7, 7, 6)
+#define OP_GCSPUSHM sys_insn(1, 3, 7, 7, 0)
+
+#define OP_BRB_IALL sys_insn(1, 1, 7, 2, 4)
+#define OP_BRB_INJ sys_insn(1, 1, 7, 2, 5)
+#define OP_CFP_RCTX sys_insn(1, 3, 7, 3, 4)
+#define OP_DVP_RCTX sys_insn(1, 3, 7, 3, 5)
+#define OP_COSP_RCTX sys_insn(1, 3, 7, 3, 6)
+#define OP_CPP_RCTX sys_insn(1, 3, 7, 3, 7)
+
+/*
+ * BRBE Instructions
+ */
+#define BRB_IALL_INSN __emit_inst(0xd5000000 | OP_BRB_IALL | (0x1f))
+#define BRB_INJ_INSN __emit_inst(0xd5000000 | OP_BRB_INJ | (0x1f))
+
+/* Common SCTLR_ELx flags. */
+#define SCTLR_ELx_ENTP2 (BIT(60))
+#define SCTLR_ELx_DSSBS (BIT(44))
+#define SCTLR_ELx_ATA (BIT(43))
+
+#define SCTLR_ELx_EE_SHIFT 25
+#define SCTLR_ELx_ENIA_SHIFT 31
+
+#define SCTLR_ELx_ITFSB (BIT(37))
+#define SCTLR_ELx_ENIA (BIT(SCTLR_ELx_ENIA_SHIFT))
+#define SCTLR_ELx_ENIB (BIT(30))
+#define SCTLR_ELx_LSMAOE (BIT(29))
+#define SCTLR_ELx_nTLSMD (BIT(28))
+#define SCTLR_ELx_ENDA (BIT(27))
+#define SCTLR_ELx_EE (BIT(SCTLR_ELx_EE_SHIFT))
+#define SCTLR_ELx_EIS (BIT(22))
+#define SCTLR_ELx_IESB (BIT(21))
+#define SCTLR_ELx_TSCXT (BIT(20))
+#define SCTLR_ELx_WXN (BIT(19))
+#define SCTLR_ELx_ENDB (BIT(13))
+#define SCTLR_ELx_I (BIT(12))
+#define SCTLR_ELx_EOS (BIT(11))
+#define SCTLR_ELx_SA (BIT(3))
+#define SCTLR_ELx_C (BIT(2))
+#define SCTLR_ELx_A (BIT(1))
+#define SCTLR_ELx_M (BIT(0))
+
+/* SCTLR_EL2 specific flags. */
+#define SCTLR_EL2_RES1 ((BIT(4)) | (BIT(5)) | (BIT(11)) | (BIT(16)) | \
+ (BIT(18)) | (BIT(22)) | (BIT(23)) | (BIT(28)) | \
+ (BIT(29)))
+
+#define SCTLR_EL2_BT (BIT(36))
+#ifdef CONFIG_CPU_BIG_ENDIAN
+#define ENDIAN_SET_EL2 SCTLR_ELx_EE
+#else
+#define ENDIAN_SET_EL2 0
+#endif
+
+#define INIT_SCTLR_EL2_MMU_ON \
+ (SCTLR_ELx_M | SCTLR_ELx_C | SCTLR_ELx_SA | SCTLR_ELx_I | \
+ SCTLR_ELx_IESB | SCTLR_ELx_WXN | ENDIAN_SET_EL2 | \
+ SCTLR_ELx_ITFSB | SCTLR_EL2_RES1)
+
+#define INIT_SCTLR_EL2_MMU_OFF \
+ (SCTLR_EL2_RES1 | ENDIAN_SET_EL2)
+
+/* SCTLR_EL1 specific flags. */
+#ifdef CONFIG_CPU_BIG_ENDIAN
+#define ENDIAN_SET_EL1 (SCTLR_EL1_E0E | SCTLR_ELx_EE)
+#else
+#define ENDIAN_SET_EL1 0
+#endif
+
+#define INIT_SCTLR_EL1_MMU_OFF \
+ (ENDIAN_SET_EL1 | SCTLR_EL1_LSMAOE | SCTLR_EL1_nTLSMD | \
+ SCTLR_EL1_EIS | SCTLR_EL1_TSCXT | SCTLR_EL1_EOS)
+
+#define INIT_SCTLR_EL1_MMU_ON \
+ (SCTLR_ELx_M | SCTLR_ELx_C | SCTLR_ELx_SA | \
+ SCTLR_EL1_SA0 | SCTLR_EL1_SED | SCTLR_ELx_I | \
+ SCTLR_EL1_DZE | SCTLR_EL1_UCT | SCTLR_EL1_nTWE | \
+ SCTLR_ELx_IESB | SCTLR_EL1_SPAN | SCTLR_ELx_ITFSB | \
+ ENDIAN_SET_EL1 | SCTLR_EL1_UCI | SCTLR_EL1_EPAN | \
+ SCTLR_EL1_LSMAOE | SCTLR_EL1_nTLSMD | SCTLR_EL1_EIS | \
+ SCTLR_EL1_TSCXT | SCTLR_EL1_EOS)
+
+/* MAIR_ELx memory attributes (used by Linux) */
+#define MAIR_ATTR_DEVICE_nGnRnE UL(0x00)
+#define MAIR_ATTR_DEVICE_nGnRE UL(0x04)
+#define MAIR_ATTR_NORMAL_NC UL(0x44)
+#define MAIR_ATTR_NORMAL_TAGGED UL(0xf0)
+#define MAIR_ATTR_NORMAL UL(0xff)
+#define MAIR_ATTR_MASK UL(0xff)
+
+/* Position the attr at the correct index */
+#define MAIR_ATTRIDX(attr, idx) ((attr) << ((idx) * 8))
+
+/* id_aa64mmfr0 */
+#define ID_AA64MMFR0_EL1_TGRAN4_SUPPORTED_MIN 0x0
+#define ID_AA64MMFR0_EL1_TGRAN4_LPA2 ID_AA64MMFR0_EL1_TGRAN4_52_BIT
+#define ID_AA64MMFR0_EL1_TGRAN4_SUPPORTED_MAX 0x7
+#define ID_AA64MMFR0_EL1_TGRAN64_SUPPORTED_MIN 0x0
+#define ID_AA64MMFR0_EL1_TGRAN64_SUPPORTED_MAX 0x7
+#define ID_AA64MMFR0_EL1_TGRAN16_SUPPORTED_MIN 0x1
+#define ID_AA64MMFR0_EL1_TGRAN16_LPA2 ID_AA64MMFR0_EL1_TGRAN16_52_BIT
+#define ID_AA64MMFR0_EL1_TGRAN16_SUPPORTED_MAX 0xf
+
+#define ARM64_MIN_PARANGE_BITS 32
+
+#define ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_DEFAULT 0x0
+#define ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_NONE 0x1
+#define ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_MIN 0x2
+#define ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_LPA2 0x3
+#define ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_MAX 0x7
+
+#ifdef CONFIG_ARM64_PA_BITS_52
+#define ID_AA64MMFR0_EL1_PARANGE_MAX ID_AA64MMFR0_EL1_PARANGE_52
+#else
+#define ID_AA64MMFR0_EL1_PARANGE_MAX ID_AA64MMFR0_EL1_PARANGE_48
+#endif
+
+#if defined(CONFIG_ARM64_4K_PAGES)
+#define ID_AA64MMFR0_EL1_TGRAN_SHIFT ID_AA64MMFR0_EL1_TGRAN4_SHIFT
+#define ID_AA64MMFR0_EL1_TGRAN_LPA2 ID_AA64MMFR0_EL1_TGRAN4_52_BIT
+#define ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MIN ID_AA64MMFR0_EL1_TGRAN4_SUPPORTED_MIN
+#define ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MAX ID_AA64MMFR0_EL1_TGRAN4_SUPPORTED_MAX
+#define ID_AA64MMFR0_EL1_TGRAN_2_SHIFT ID_AA64MMFR0_EL1_TGRAN4_2_SHIFT
+#elif defined(CONFIG_ARM64_16K_PAGES)
+#define ID_AA64MMFR0_EL1_TGRAN_SHIFT ID_AA64MMFR0_EL1_TGRAN16_SHIFT
+#define ID_AA64MMFR0_EL1_TGRAN_LPA2 ID_AA64MMFR0_EL1_TGRAN16_52_BIT
+#define ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MIN ID_AA64MMFR0_EL1_TGRAN16_SUPPORTED_MIN
+#define ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MAX ID_AA64MMFR0_EL1_TGRAN16_SUPPORTED_MAX
+#define ID_AA64MMFR0_EL1_TGRAN_2_SHIFT ID_AA64MMFR0_EL1_TGRAN16_2_SHIFT
+#elif defined(CONFIG_ARM64_64K_PAGES)
+#define ID_AA64MMFR0_EL1_TGRAN_SHIFT ID_AA64MMFR0_EL1_TGRAN64_SHIFT
+#define ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MIN ID_AA64MMFR0_EL1_TGRAN64_SUPPORTED_MIN
+#define ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MAX ID_AA64MMFR0_EL1_TGRAN64_SUPPORTED_MAX
+#define ID_AA64MMFR0_EL1_TGRAN_2_SHIFT ID_AA64MMFR0_EL1_TGRAN64_2_SHIFT
+#endif
+
+#define CPACR_EL1_FPEN_EL1EN (BIT(20)) /* enable EL1 access */
+#define CPACR_EL1_FPEN_EL0EN (BIT(21)) /* enable EL0 access, if EL1EN set */
+
+#define CPACR_EL1_SMEN_EL1EN (BIT(24)) /* enable EL1 access */
+#define CPACR_EL1_SMEN_EL0EN (BIT(25)) /* enable EL0 access, if EL1EN set */
+
+#define CPACR_EL1_ZEN_EL1EN (BIT(16)) /* enable EL1 access */
+#define CPACR_EL1_ZEN_EL0EN (BIT(17)) /* enable EL0 access, if EL1EN set */
+
+/* GCR_EL1 Definitions */
+#define SYS_GCR_EL1_RRND (BIT(16))
+#define SYS_GCR_EL1_EXCL_MASK 0xffffUL
+
+#ifdef CONFIG_KASAN_HW_TAGS
+/*
+ * KASAN always uses a whole byte for its tags. With CONFIG_KASAN_HW_TAGS it
+ * only uses tags in the range 0xF0-0xFF, which we map to MTE tags 0x0-0xF.
+ */
+#define __MTE_TAG_MIN (KASAN_TAG_MIN & 0xf)
+#define __MTE_TAG_MAX (KASAN_TAG_MAX & 0xf)
+#define __MTE_TAG_INCL GENMASK(__MTE_TAG_MAX, __MTE_TAG_MIN)
+#define KERNEL_GCR_EL1_EXCL (SYS_GCR_EL1_EXCL_MASK & ~__MTE_TAG_INCL)
+#else
+#define KERNEL_GCR_EL1_EXCL SYS_GCR_EL1_EXCL_MASK
+#endif
+
+#define KERNEL_GCR_EL1 (SYS_GCR_EL1_RRND | KERNEL_GCR_EL1_EXCL)
+
+/* RGSR_EL1 Definitions */
+#define SYS_RGSR_EL1_TAG_MASK 0xfUL
+#define SYS_RGSR_EL1_SEED_SHIFT 8
+#define SYS_RGSR_EL1_SEED_MASK 0xffffUL
+
+/* TFSR{,E0}_EL1 bit definitions */
+#define SYS_TFSR_EL1_TF0_SHIFT 0
+#define SYS_TFSR_EL1_TF1_SHIFT 1
+#define SYS_TFSR_EL1_TF0 (UL(1) << SYS_TFSR_EL1_TF0_SHIFT)
+#define SYS_TFSR_EL1_TF1 (UL(1) << SYS_TFSR_EL1_TF1_SHIFT)
+
+/* Safe value for MPIDR_EL1: Bit31:RES1, Bit30:U:0, Bit24:MT:0 */
+#define SYS_MPIDR_SAFE_VAL (BIT(31))
+
+/* GIC Hypervisor interface registers */
+/* ICH_LR*_EL2 bit definitions */
+#define ICH_LR_VIRTUAL_ID_MASK ((1ULL << 32) - 1)
+
+#define ICH_LR_EOI (1ULL << 41)
+#define ICH_LR_GROUP (1ULL << 60)
+#define ICH_LR_HW (1ULL << 61)
+#define ICH_LR_STATE (3ULL << 62)
+#define ICH_LR_PENDING_BIT (1ULL << 62)
+#define ICH_LR_ACTIVE_BIT (1ULL << 63)
+#define ICH_LR_PHYS_ID_SHIFT 32
+#define ICH_LR_PHYS_ID_MASK (0x3ffULL << ICH_LR_PHYS_ID_SHIFT)
+#define ICH_LR_PRIORITY_SHIFT 48
+#define ICH_LR_PRIORITY_MASK (0xffULL << ICH_LR_PRIORITY_SHIFT)
+
+/* ICH_VMCR_EL2 bit definitions */
+#define ICH_VMCR_ACK_CTL_SHIFT 2
+#define ICH_VMCR_ACK_CTL_MASK (1 << ICH_VMCR_ACK_CTL_SHIFT)
+#define ICH_VMCR_FIQ_EN_SHIFT 3
+#define ICH_VMCR_FIQ_EN_MASK (1 << ICH_VMCR_FIQ_EN_SHIFT)
+#define ICH_VMCR_CBPR_SHIFT 4
+#define ICH_VMCR_CBPR_MASK (1 << ICH_VMCR_CBPR_SHIFT)
+#define ICH_VMCR_EOIM_SHIFT 9
+#define ICH_VMCR_EOIM_MASK (1 << ICH_VMCR_EOIM_SHIFT)
+#define ICH_VMCR_BPR1_SHIFT 18
+#define ICH_VMCR_BPR1_MASK (7 << ICH_VMCR_BPR1_SHIFT)
+#define ICH_VMCR_BPR0_SHIFT 21
+#define ICH_VMCR_BPR0_MASK (7 << ICH_VMCR_BPR0_SHIFT)
+#define ICH_VMCR_PMR_SHIFT 24
+#define ICH_VMCR_PMR_MASK (0xffUL << ICH_VMCR_PMR_SHIFT)
+#define ICH_VMCR_ENG0_SHIFT 0
+#define ICH_VMCR_ENG0_MASK (1 << ICH_VMCR_ENG0_SHIFT)
+#define ICH_VMCR_ENG1_SHIFT 1
+#define ICH_VMCR_ENG1_MASK (1 << ICH_VMCR_ENG1_SHIFT)
+
+/*
+ * Permission Indirection Extension (PIE) permission encodings.
+ * Encodings with the _O suffix, have overlays applied (Permission Overlay Extension).
+ */
+#define PIE_NONE_O UL(0x0)
+#define PIE_R_O UL(0x1)
+#define PIE_X_O UL(0x2)
+#define PIE_RX_O UL(0x3)
+#define PIE_RW_O UL(0x5)
+#define PIE_RWnX_O UL(0x6)
+#define PIE_RWX_O UL(0x7)
+#define PIE_R UL(0x8)
+#define PIE_GCS UL(0x9)
+#define PIE_RX UL(0xa)
+#define PIE_RW UL(0xc)
+#define PIE_RWX UL(0xe)
+#define PIE_MASK UL(0xf)
+
+#define PIRx_ELx_BITS_PER_IDX 4
+#define PIRx_ELx_PERM_SHIFT(idx) ((idx) * PIRx_ELx_BITS_PER_IDX)
+#define PIRx_ELx_PERM_PREP(idx, perm) (((perm) & PIE_MASK) << PIRx_ELx_PERM_SHIFT(idx))
+
+/*
+ * Permission Overlay Extension (POE) permission encodings.
+ */
+#define POE_NONE UL(0x0)
+#define POE_R UL(0x1)
+#define POE_X UL(0x2)
+#define POE_RX UL(0x3)
+#define POE_W UL(0x4)
+#define POE_RW UL(0x5)
+#define POE_WX UL(0x6)
+#define POE_RWX UL(0x7)
+#define POE_MASK UL(0xf)
+
+#define POR_ELx_BITS_PER_IDX 4
+#define POR_ELx_PERM_SHIFT(idx) ((idx) * POR_ELx_BITS_PER_IDX)
+#define POR_ELx_PERM_GET(idx, reg) (((reg) >> POR_ELx_PERM_SHIFT(idx)) & POE_MASK)
+#define POR_ELx_PERM_PREP(idx, perm) (((perm) & POE_MASK) << POR_ELx_PERM_SHIFT(idx))
+
+/*
+ * Definitions for Guarded Control Stack
+ */
+
+#define GCS_CAP_ADDR_MASK GENMASK(63, 12)
+#define GCS_CAP_ADDR_SHIFT 12
+#define GCS_CAP_ADDR_WIDTH 52
+#define GCS_CAP_ADDR(x) FIELD_GET(GCS_CAP_ADDR_MASK, x)
+
+#define GCS_CAP_TOKEN_MASK GENMASK(11, 0)
+#define GCS_CAP_TOKEN_SHIFT 0
+#define GCS_CAP_TOKEN_WIDTH 12
+#define GCS_CAP_TOKEN(x) FIELD_GET(GCS_CAP_TOKEN_MASK, x)
+
+#define GCS_CAP_VALID_TOKEN 0x1
+#define GCS_CAP_IN_PROGRESS_TOKEN 0x5
+
+#define GCS_CAP(x) ((((unsigned long)x) & GCS_CAP_ADDR_MASK) | \
+ GCS_CAP_VALID_TOKEN)
+/*
+ * Definitions for GICv5 instructions
+ */
+#define GICV5_OP_GIC_CDAFF sys_insn(1, 0, 12, 1, 3)
+#define GICV5_OP_GIC_CDDI sys_insn(1, 0, 12, 2, 0)
+#define GICV5_OP_GIC_CDDIS sys_insn(1, 0, 12, 1, 0)
+#define GICV5_OP_GIC_CDHM sys_insn(1, 0, 12, 2, 1)
+#define GICV5_OP_GIC_CDEN sys_insn(1, 0, 12, 1, 1)
+#define GICV5_OP_GIC_CDEOI sys_insn(1, 0, 12, 1, 7)
+#define GICV5_OP_GIC_CDPEND sys_insn(1, 0, 12, 1, 4)
+#define GICV5_OP_GIC_CDPRI sys_insn(1, 0, 12, 1, 2)
+#define GICV5_OP_GIC_CDRCFG sys_insn(1, 0, 12, 1, 5)
+#define GICV5_OP_GICR_CDIA sys_insn(1, 0, 12, 3, 0)
+
+/* Definitions for GIC CDAFF */
+#define GICV5_GIC_CDAFF_IAFFID_MASK GENMASK_ULL(47, 32)
+#define GICV5_GIC_CDAFF_TYPE_MASK GENMASK_ULL(31, 29)
+#define GICV5_GIC_CDAFF_IRM_MASK BIT_ULL(28)
+#define GICV5_GIC_CDAFF_ID_MASK GENMASK_ULL(23, 0)
+
+/* Definitions for GIC CDDI */
+#define GICV5_GIC_CDDI_TYPE_MASK GENMASK_ULL(31, 29)
+#define GICV5_GIC_CDDI_ID_MASK GENMASK_ULL(23, 0)
+
+/* Definitions for GIC CDDIS */
+#define GICV5_GIC_CDDIS_TYPE_MASK GENMASK_ULL(31, 29)
+#define GICV5_GIC_CDDIS_TYPE(r) FIELD_GET(GICV5_GIC_CDDIS_TYPE_MASK, r)
+#define GICV5_GIC_CDDIS_ID_MASK GENMASK_ULL(23, 0)
+#define GICV5_GIC_CDDIS_ID(r) FIELD_GET(GICV5_GIC_CDDIS_ID_MASK, r)
+
+/* Definitions for GIC CDEN */
+#define GICV5_GIC_CDEN_TYPE_MASK GENMASK_ULL(31, 29)
+#define GICV5_GIC_CDEN_ID_MASK GENMASK_ULL(23, 0)
+
+/* Definitions for GIC CDHM */
+#define GICV5_GIC_CDHM_HM_MASK BIT_ULL(32)
+#define GICV5_GIC_CDHM_TYPE_MASK GENMASK_ULL(31, 29)
+#define GICV5_GIC_CDHM_ID_MASK GENMASK_ULL(23, 0)
+
+/* Definitions for GIC CDPEND */
+#define GICV5_GIC_CDPEND_PENDING_MASK BIT_ULL(32)
+#define GICV5_GIC_CDPEND_TYPE_MASK GENMASK_ULL(31, 29)
+#define GICV5_GIC_CDPEND_ID_MASK GENMASK_ULL(23, 0)
+
+/* Definitions for GIC CDPRI */
+#define GICV5_GIC_CDPRI_PRIORITY_MASK GENMASK_ULL(39, 35)
+#define GICV5_GIC_CDPRI_TYPE_MASK GENMASK_ULL(31, 29)
+#define GICV5_GIC_CDPRI_ID_MASK GENMASK_ULL(23, 0)
+
+/* Definitions for GIC CDRCFG */
+#define GICV5_GIC_CDRCFG_TYPE_MASK GENMASK_ULL(31, 29)
+#define GICV5_GIC_CDRCFG_ID_MASK GENMASK_ULL(23, 0)
+
+/* Definitions for GICR CDIA */
+#define GICV5_GIC_CDIA_VALID_MASK BIT_ULL(32)
+#define GICV5_GICR_CDIA_VALID(r) FIELD_GET(GICV5_GIC_CDIA_VALID_MASK, r)
+#define GICV5_GIC_CDIA_TYPE_MASK GENMASK_ULL(31, 29)
+#define GICV5_GIC_CDIA_ID_MASK GENMASK_ULL(23, 0)
+
+#define gicr_insn(insn) read_sysreg_s(GICV5_OP_GICR_##insn)
+#define gic_insn(v, insn) write_sysreg_s(v, GICV5_OP_GIC_##insn)
+
+#ifdef __ASSEMBLER__
+
+ .macro mrs_s, rt, sreg
+ __emit_inst(0xd5200000|(\sreg)|(.L__gpr_num_\rt))
+ .endm
+
+ .macro msr_s, sreg, rt
+ __emit_inst(0xd5000000|(\sreg)|(.L__gpr_num_\rt))
+ .endm
+
+ .macro msr_hcr_el2, reg
+#if IS_ENABLED(CONFIG_AMPERE_ERRATUM_AC04_CPU_23)
+ dsb nsh
+ msr hcr_el2, \reg
+ isb
+#else
+ msr hcr_el2, \reg
+#endif
+ .endm
+#else
+
+#include <linux/bitfield.h>
+#include <linux/build_bug.h>
+#include <linux/types.h>
+#include <asm/alternative.h>
+
+#define DEFINE_MRS_S \
+ __DEFINE_ASM_GPR_NUMS \
+" .macro mrs_s, rt, sreg\n" \
+ __emit_inst(0xd5200000|(\\sreg)|(.L__gpr_num_\\rt)) \
+" .endm\n"
+
+#define DEFINE_MSR_S \
+ __DEFINE_ASM_GPR_NUMS \
+" .macro msr_s, sreg, rt\n" \
+ __emit_inst(0xd5000000|(\\sreg)|(.L__gpr_num_\\rt)) \
+" .endm\n"
+
+#define UNDEFINE_MRS_S \
+" .purgem mrs_s\n"
+
+#define UNDEFINE_MSR_S \
+" .purgem msr_s\n"
+
+#define __mrs_s(v, r) \
+ DEFINE_MRS_S \
+" mrs_s " v ", " __stringify(r) "\n" \
+ UNDEFINE_MRS_S
+
+#define __msr_s(r, v) \
+ DEFINE_MSR_S \
+" msr_s " __stringify(r) ", " v "\n" \
+ UNDEFINE_MSR_S
+
+/*
+ * Unlike read_cpuid, calls to read_sysreg are never expected to be
+ * optimized away or replaced with synthetic values.
+ */
+#define read_sysreg(r) ({ \
+ u64 __val; \
+ asm volatile("mrs %0, " __stringify(r) : "=r" (__val)); \
+ __val; \
+})
+
+/*
+ * The "Z" constraint normally means a zero immediate, but when combined with
+ * the "%x0" template means XZR.
+ */
+#define write_sysreg(v, r) do { \
+ u64 __val = (u64)(v); \
+ asm volatile("msr " __stringify(r) ", %x0" \
+ : : "rZ" (__val)); \
+} while (0)
+
+/*
+ * For registers without architectural names, or simply unsupported by
+ * GAS.
+ *
+ * __check_r forces warnings to be generated by the compiler when
+ * evaluating r which wouldn't normally happen due to being passed to
+ * the assembler via __stringify(r).
+ */
+#define read_sysreg_s(r) ({ \
+ u64 __val; \
+ u32 __maybe_unused __check_r = (u32)(r); \
+ asm volatile(__mrs_s("%0", r) : "=r" (__val)); \
+ __val; \
+})
+
+/*
+ * The "Z" constraint combined with the "%x0" template should be enough
+ * to force XZR generation if (v) is a constant 0 value but LLVM does not
+ * yet understand that modifier/constraint combo so a conditional is required
+ * to nudge the compiler into using XZR as a source for a 0 constant value.
+ */
+#define write_sysreg_s(v, r) do { \
+ u64 __val = (u64)(v); \
+ u32 __maybe_unused __check_r = (u32)(r); \
+ if (__builtin_constant_p(__val) && __val == 0) \
+ asm volatile(__msr_s(r, "xzr")); \
+ else \
+ asm volatile(__msr_s(r, "%x0") : : "r" (__val)); \
+} while (0)
+
+/*
+ * Modify bits in a sysreg. Bits in the clear mask are zeroed, then bits in the
+ * set mask are set. Other bits are left as-is.
+ */
+#define sysreg_clear_set(sysreg, clear, set) do { \
+ u64 __scs_val = read_sysreg(sysreg); \
+ u64 __scs_new = (__scs_val & ~(u64)(clear)) | (set); \
+ if (__scs_new != __scs_val) \
+ write_sysreg(__scs_new, sysreg); \
+} while (0)
+
+#define sysreg_clear_set_hcr(clear, set) do { \
+ u64 __scs_val = read_sysreg(hcr_el2); \
+ u64 __scs_new = (__scs_val & ~(u64)(clear)) | (set); \
+ if (__scs_new != __scs_val) \
+ write_sysreg_hcr(__scs_new); \
+} while (0)
+
+#define sysreg_clear_set_s(sysreg, clear, set) do { \
+ u64 __scs_val = read_sysreg_s(sysreg); \
+ u64 __scs_new = (__scs_val & ~(u64)(clear)) | (set); \
+ if (__scs_new != __scs_val) \
+ write_sysreg_s(__scs_new, sysreg); \
+} while (0)
+
+#define write_sysreg_hcr(__val) do { \
+ if (IS_ENABLED(CONFIG_AMPERE_ERRATUM_AC04_CPU_23) && \
+ (!system_capabilities_finalized() || \
+ alternative_has_cap_unlikely(ARM64_WORKAROUND_AMPERE_AC04_CPU_23))) \
+ asm volatile("dsb nsh; msr hcr_el2, %x0; isb" \
+ : : "rZ" (__val)); \
+ else \
+ asm volatile("msr hcr_el2, %x0" \
+ : : "rZ" (__val)); \
+} while (0)
+
+#define read_sysreg_par() ({ \
+ u64 par; \
+ asm(ALTERNATIVE("nop", "dmb sy", ARM64_WORKAROUND_1508412)); \
+ par = read_sysreg(par_el1); \
+ asm(ALTERNATIVE("nop", "dmb sy", ARM64_WORKAROUND_1508412)); \
+ par; \
+})
+
+#define SYS_FIELD_VALUE(reg, field, val) reg##_##field##_##val
+
+#define SYS_FIELD_GET(reg, field, val) \
+ FIELD_GET(reg##_##field##_MASK, val)
+
+#define SYS_FIELD_PREP(reg, field, val) \
+ FIELD_PREP(reg##_##field##_MASK, val)
+
+#define SYS_FIELD_PREP_ENUM(reg, field, val) \
+ FIELD_PREP(reg##_##field##_MASK, \
+ SYS_FIELD_VALUE(reg, field, val))
+
+#endif
+
+#endif /* __ASM_SYSREG_H */
diff --git a/arch/arm64/include/asm/system_misc.h b/arch/arm64/include/asm/system_misc.h
index a6e1750369ef..d316a804eb38 100644
--- a/arch/arm64/include/asm/system_misc.h
+++ b/arch/arm64/include/asm/system_misc.h
@@ -1,54 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Based on arch/arm/include/asm/system_misc.h
*
* Copyright (C) 2012 ARM Ltd.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __ASM_SYSTEM_MISC_H
#define __ASM_SYSTEM_MISC_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/compiler.h>
#include <linux/linkage.h>
#include <linux/irqflags.h>
+#include <linux/signal.h>
+#include <linux/ratelimit.h>
+#include <linux/reboot.h>
struct pt_regs;
-void die(const char *msg, struct pt_regs *regs, int err);
+void die(const char *msg, struct pt_regs *regs, long err);
struct siginfo;
void arm64_notify_die(const char *str, struct pt_regs *regs,
- struct siginfo *info, int err);
-
-void hook_debug_fault_code(int nr, int (*fn)(unsigned long, unsigned int,
- struct pt_regs *),
- int sig, int code, const char *name);
+ int signo, int sicode, unsigned long far,
+ unsigned long err);
struct mm_struct;
-extern void show_pte(struct mm_struct *mm, unsigned long addr);
extern void __show_regs(struct pt_regs *);
-void soft_restart(unsigned long);
-extern void (*arm_pm_restart)(char str, const char *cmd);
-
-#define UDBG_UNDEFINED (1 << 0)
-#define UDBG_SYSCALL (1 << 1)
-#define UDBG_BADABORT (1 << 2)
-#define UDBG_SEGV (1 << 3)
-#define UDBG_BUS (1 << 4)
-
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* __ASM_SYSTEM_MISC_H */
diff --git a/arch/arm64/include/asm/text-patching.h b/arch/arm64/include/asm/text-patching.h
new file mode 100644
index 000000000000..587bdb91ab7a
--- /dev/null
+++ b/arch/arm64/include/asm/text-patching.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef __ASM_PATCHING_H
+#define __ASM_PATCHING_H
+
+#include <linux/types.h>
+
+int aarch64_insn_read(void *addr, u32 *insnp);
+int aarch64_insn_write(void *addr, u32 insn);
+
+int aarch64_insn_write_literal_u64(void *addr, u64 val);
+void *aarch64_insn_set(void *dst, u32 insn, size_t len);
+void *aarch64_insn_copy(void *dst, void *src, size_t len);
+
+int aarch64_insn_patch_text_nosync(void *addr, u32 insn);
+int aarch64_insn_patch_text(void *addrs[], u32 insns[], int cnt);
+
+#endif /* __ASM_PATCHING_H */
diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
index 3659e460071d..a803b887b0b4 100644
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@ -1,84 +1,50 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Based on arch/arm/include/asm/thread_info.h
*
* Copyright (C) 2002 Russell King.
* Copyright (C) 2012 ARM Ltd.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __ASM_THREAD_INFO_H
#define __ASM_THREAD_INFO_H
-#ifdef __KERNEL__
-
#include <linux/compiler.h>
-#ifndef CONFIG_ARM64_64K_PAGES
-#define THREAD_SIZE_ORDER 1
-#endif
-
-#define THREAD_SIZE 8192
-#define THREAD_START_SP (THREAD_SIZE - 16)
-
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
struct task_struct;
-struct exec_domain;
+#include <asm/memory.h>
+#include <asm/stack_pointer.h>
#include <asm/types.h>
-typedef unsigned long mm_segment_t;
-
/*
* low level task data that entry.S needs immediate access to.
- * __switch_to() assumes cpu_context follows immediately after cpu_domain.
*/
struct thread_info {
unsigned long flags; /* low level flags */
- mm_segment_t addr_limit; /* address limit */
- struct task_struct *task; /* main task structure */
- struct exec_domain *exec_domain; /* execution domain */
- struct restart_block restart_block;
- int preempt_count; /* 0 => preemptable, <0 => bug */
- int cpu; /* cpu */
+#ifdef CONFIG_ARM64_SW_TTBR0_PAN
+ u64 ttbr0; /* saved TTBR0_EL1 */
+#endif
+ union {
+ u64 preempt_count; /* 0 => preemptible, <0 => bug */
+ struct {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+ u32 need_resched;
+ u32 count;
+#else
+ u32 count;
+ u32 need_resched;
+#endif
+ } preempt;
+ };
+#ifdef CONFIG_SHADOW_CALL_STACK
+ void *scs_base;
+ void *scs_sp;
+#endif
+ u32 cpu;
};
-#define INIT_THREAD_INFO(tsk) \
-{ \
- .task = &tsk, \
- .exec_domain = &default_exec_domain, \
- .flags = 0, \
- .preempt_count = INIT_PREEMPT_COUNT, \
- .addr_limit = KERNEL_DS, \
- .restart_block = { \
- .fn = do_no_restart_syscall, \
- }, \
-}
-
-#define init_thread_info (init_thread_union.thread_info)
-#define init_stack (init_thread_union.stack)
-
-/*
- * how to get the thread information struct from C
- */
-static inline struct thread_info *current_thread_info(void) __attribute_const__;
-
-static inline struct thread_info *current_thread_info(void)
-{
- register unsigned long sp asm ("sp");
- return (struct thread_info *)(sp & ~(THREAD_SIZE - 1));
-}
-
#define thread_saved_pc(tsk) \
((unsigned long)(tsk->thread.cpu_context.pc))
#define thread_saved_sp(tsk) \
@@ -86,42 +52,83 @@ static inline struct thread_info *current_thread_info(void)
#define thread_saved_fp(tsk) \
((unsigned long)(tsk->thread.cpu_context.fp))
-#endif
+void arch_setup_new_exec(void);
+#define arch_setup_new_exec arch_setup_new_exec
-/*
- * We use bit 30 of the preempt_count to indicate that kernel
- * preemption is occurring. See <asm/hardirq.h>.
- */
-#define PREEMPT_ACTIVE 0x40000000
+#endif
-/*
- * thread information flags:
- * TIF_SYSCALL_TRACE - syscall trace active
- * TIF_SIGPENDING - signal pending
- * TIF_NEED_RESCHED - rescheduling necessary
- * TIF_NOTIFY_RESUME - callback before returning to user
- * TIF_USEDFPU - FPU was used by this task this quantum (SMP)
- * TIF_POLLING_NRFLAG - true if poll_idle() is polling TIF_NEED_RESCHED
- */
-#define TIF_SIGPENDING 0
-#define TIF_NEED_RESCHED 1
-#define TIF_NOTIFY_RESUME 2 /* callback before returning to user */
-#define TIF_SYSCALL_TRACE 8
-#define TIF_POLLING_NRFLAG 16
+#define TIF_SIGPENDING 0 /* signal pending */
+#define TIF_NEED_RESCHED 1 /* rescheduling necessary */
+#define TIF_NEED_RESCHED_LAZY 2 /* Lazy rescheduling needed */
+#define TIF_NOTIFY_RESUME 3 /* callback before returning to user */
+#define TIF_FOREIGN_FPSTATE 4 /* CPU's FP state is not current's */
+#define TIF_UPROBE 5 /* uprobe breakpoint or singlestep */
+#define TIF_MTE_ASYNC_FAULT 6 /* MTE Asynchronous Tag Check Fault */
+#define TIF_NOTIFY_SIGNAL 7 /* signal notifications exist */
+#define TIF_SYSCALL_TRACE 8 /* syscall trace active */
+#define TIF_SYSCALL_AUDIT 9 /* syscall auditing */
+#define TIF_SYSCALL_TRACEPOINT 10 /* syscall tracepoint for ftrace */
+#define TIF_SECCOMP 11 /* syscall secure computing */
+#define TIF_SYSCALL_EMU 12 /* syscall emulation active */
+#define TIF_PATCH_PENDING 13 /* pending live patching update */
#define TIF_MEMDIE 18 /* is terminating due to OOM killer */
#define TIF_FREEZE 19
#define TIF_RESTORE_SIGMASK 20
#define TIF_SINGLESTEP 21
#define TIF_32BIT 22 /* 32bit process */
-#define TIF_SWITCH_MM 23 /* deferred switch_mm */
+#define TIF_SVE 23 /* Scalable Vector Extension in use */
+#define TIF_SVE_VL_INHERIT 24 /* Inherit SVE vl_onexec across exec */
+#define TIF_SSBD 25 /* Wants SSB mitigation */
+#define TIF_TAGGED_ADDR 26 /* Allow tagged user addresses */
+#define TIF_SME 27 /* SME in use */
+#define TIF_SME_VL_INHERIT 28 /* Inherit SME vl_onexec across exec */
+#define TIF_KERNEL_FPSTATE 29 /* Task is in a kernel mode FPSIMD section */
+#define TIF_TSC_SIGSEGV 30 /* SIGSEGV on counter-timer access */
+#define TIF_LAZY_MMU 31 /* Task in lazy mmu mode */
+#define TIF_LAZY_MMU_PENDING 32 /* Ops pending for lazy mmu mode exit */
#define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
+#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY)
#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
+#define _TIF_FOREIGN_FPSTATE (1 << TIF_FOREIGN_FPSTATE)
+#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE)
+#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
+#define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT)
+#define _TIF_SECCOMP (1 << TIF_SECCOMP)
+#define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU)
+#define _TIF_PATCH_PENDING (1 << TIF_PATCH_PENDING)
+#define _TIF_UPROBE (1 << TIF_UPROBE)
+#define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP)
#define _TIF_32BIT (1 << TIF_32BIT)
+#define _TIF_SVE (1 << TIF_SVE)
+#define _TIF_MTE_ASYNC_FAULT (1 << TIF_MTE_ASYNC_FAULT)
+#define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL)
+#define _TIF_TSC_SIGSEGV (1 << TIF_TSC_SIGSEGV)
+
+#define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY | \
+ _TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE | \
+ _TIF_UPROBE | _TIF_MTE_ASYNC_FAULT | \
+ _TIF_NOTIFY_SIGNAL | _TIF_SIGPENDING | \
+ _TIF_PATCH_PENDING)
+
+#define _TIF_SYSCALL_WORK (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \
+ _TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP | \
+ _TIF_SYSCALL_EMU)
+
+#ifdef CONFIG_SHADOW_CALL_STACK
+#define INIT_SCS \
+ .scs_base = init_shadow_call_stack, \
+ .scs_sp = init_shadow_call_stack,
+#else
+#define INIT_SCS
+#endif
-#define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \
- _TIF_NOTIFY_RESUME)
+#define INIT_THREAD_INFO(tsk) \
+{ \
+ .flags = _TIF_FOREIGN_FPSTATE, \
+ .preempt_count = INIT_PREEMPT_COUNT, \
+ INIT_SCS \
+}
-#endif /* __KERNEL__ */
#endif /* __ASM_THREAD_INFO_H */
diff --git a/arch/arm64/include/asm/timex.h b/arch/arm64/include/asm/timex.h
index 81a076eb37fa..cf59ce91b22d 100644
--- a/arch/arm64/include/asm/timex.h
+++ b/arch/arm64/include/asm/timex.h
@@ -1,17 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (C) 2012 ARM Ltd.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __ASM_TIMEX_H
#define __ASM_TIMEX_H
@@ -22,7 +11,7 @@
* Use the current timer as a cycle counter since this is what we use for
* the delay loop.
*/
-#define get_cycles() arch_counter_get_cntvct()
+#define get_cycles() arch_timer_read_counter()
#include <asm-generic/timex.h>
diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h
index 46b3beb4b773..8d762607285c 100644
--- a/arch/arm64/include/asm/tlb.h
+++ b/arch/arm64/include/asm/tlb.h
@@ -1,196 +1,119 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Based on arch/arm/include/asm/tlb.h
*
* Copyright (C) 2002 Russell King
* Copyright (C) 2012 ARM Ltd.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __ASM_TLB_H
#define __ASM_TLB_H
#include <linux/pagemap.h>
-#include <linux/swap.h>
-#include <asm/pgalloc.h>
-#include <asm/tlbflush.h>
-#define MMU_GATHER_BUNDLE 8
+#define tlb_flush tlb_flush
+static void tlb_flush(struct mmu_gather *tlb);
-/*
- * TLB handling. This allows us to remove pages from the page
- * tables, and efficiently handle the TLB issues.
- */
-struct mmu_gather {
- struct mm_struct *mm;
- unsigned int fullmm;
- struct vm_area_struct *vma;
- unsigned long range_start;
- unsigned long range_end;
- unsigned int nr;
- unsigned int max;
- struct page **pages;
- struct page *local[MMU_GATHER_BUNDLE];
-};
+#include <asm-generic/tlb.h>
/*
- * This is unnecessarily complex. There's three ways the TLB shootdown
- * code is used:
- * 1. Unmapping a range of vmas. See zap_page_range(), unmap_region().
- * tlb->fullmm = 0, and tlb_start_vma/tlb_end_vma will be called.
- * tlb->vma will be non-NULL.
- * 2. Unmapping all vmas. See exit_mmap().
- * tlb->fullmm = 1, and tlb_start_vma/tlb_end_vma will be called.
- * tlb->vma will be non-NULL. Additionally, page tables will be freed.
- * 3. Unmapping argument pages. See shift_arg_pages().
- * tlb->fullmm = 0, but tlb_start_vma/tlb_end_vma will not be called.
- * tlb->vma will be NULL.
+ * get the tlbi levels in arm64. Default value is TLBI_TTL_UNKNOWN if more than
+ * one of cleared_* is set or neither is set - this elides the level hinting to
+ * the hardware.
*/
-static inline void tlb_flush(struct mmu_gather *tlb)
+static inline int tlb_get_level(struct mmu_gather *tlb)
{
- if (tlb->fullmm || !tlb->vma)
- flush_tlb_mm(tlb->mm);
- else if (tlb->range_end > 0) {
- flush_tlb_range(tlb->vma, tlb->range_start, tlb->range_end);
- tlb->range_start = TASK_SIZE;
- tlb->range_end = 0;
- }
-}
-
-static inline void tlb_add_flush(struct mmu_gather *tlb, unsigned long addr)
-{
- if (!tlb->fullmm) {
- if (addr < tlb->range_start)
- tlb->range_start = addr;
- if (addr + PAGE_SIZE > tlb->range_end)
- tlb->range_end = addr + PAGE_SIZE;
- }
+ /* The TTL field is only valid for the leaf entry. */
+ if (tlb->freed_tables)
+ return TLBI_TTL_UNKNOWN;
+
+ if (tlb->cleared_ptes && !(tlb->cleared_pmds ||
+ tlb->cleared_puds ||
+ tlb->cleared_p4ds))
+ return 3;
+
+ if (tlb->cleared_pmds && !(tlb->cleared_ptes ||
+ tlb->cleared_puds ||
+ tlb->cleared_p4ds))
+ return 2;
+
+ if (tlb->cleared_puds && !(tlb->cleared_ptes ||
+ tlb->cleared_pmds ||
+ tlb->cleared_p4ds))
+ return 1;
+
+ if (tlb->cleared_p4ds && !(tlb->cleared_ptes ||
+ tlb->cleared_pmds ||
+ tlb->cleared_puds))
+ return 0;
+
+ return TLBI_TTL_UNKNOWN;
}
-static inline void __tlb_alloc_page(struct mmu_gather *tlb)
+static inline void tlb_flush(struct mmu_gather *tlb)
{
- unsigned long addr = __get_free_pages(GFP_NOWAIT | __GFP_NOWARN, 0);
-
- if (addr) {
- tlb->pages = (void *)addr;
- tlb->max = PAGE_SIZE / sizeof(struct page *);
+ struct vm_area_struct vma = TLB_FLUSH_VMA(tlb->mm, 0);
+ bool last_level = !tlb->freed_tables;
+ unsigned long stride = tlb_get_unmap_size(tlb);
+ int tlb_level = tlb_get_level(tlb);
+
+ /*
+ * If we're tearing down the address space then we only care about
+ * invalidating the walk-cache, since the ASID allocator won't
+ * reallocate our ASID without invalidating the entire TLB.
+ */
+ if (tlb->fullmm) {
+ if (!last_level)
+ flush_tlb_mm(tlb->mm);
+ return;
}
-}
-
-static inline void tlb_flush_mmu(struct mmu_gather *tlb)
-{
- tlb_flush(tlb);
- free_pages_and_swap_cache(tlb->pages, tlb->nr);
- tlb->nr = 0;
- if (tlb->pages == tlb->local)
- __tlb_alloc_page(tlb);
-}
-static inline void
-tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned int fullmm)
-{
- tlb->mm = mm;
- tlb->fullmm = fullmm;
- tlb->vma = NULL;
- tlb->max = ARRAY_SIZE(tlb->local);
- tlb->pages = tlb->local;
- tlb->nr = 0;
- __tlb_alloc_page(tlb);
+ __flush_tlb_range(&vma, tlb->start, tlb->end, stride,
+ last_level, tlb_level);
}
-static inline void
-tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end)
+static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte,
+ unsigned long addr)
{
- tlb_flush_mmu(tlb);
-
- /* keep the page table cache within bounds */
- check_pgt_cache();
+ struct ptdesc *ptdesc = page_ptdesc(pte);
- if (tlb->pages != tlb->local)
- free_pages((unsigned long)tlb->pages, 0);
+ tlb_remove_ptdesc(tlb, ptdesc);
}
-/*
- * Memorize the range for the TLB flush.
- */
-static inline void
-tlb_remove_tlb_entry(struct mmu_gather *tlb, pte_t *ptep, unsigned long addr)
-{
- tlb_add_flush(tlb, addr);
-}
-
-/*
- * In the case of tlb vma handling, we can optimise these away in the
- * case where we're doing a full MM flush. When we're doing a munmap,
- * the vmas are adjusted to only cover the region to be torn down.
- */
-static inline void
-tlb_start_vma(struct mmu_gather *tlb, struct vm_area_struct *vma)
+#if CONFIG_PGTABLE_LEVELS > 2
+static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp,
+ unsigned long addr)
{
- if (!tlb->fullmm) {
- tlb->vma = vma;
- tlb->range_start = TASK_SIZE;
- tlb->range_end = 0;
- }
-}
+ struct ptdesc *ptdesc = virt_to_ptdesc(pmdp);
-static inline void
-tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vma)
-{
- if (!tlb->fullmm)
- tlb_flush(tlb);
+ tlb_remove_ptdesc(tlb, ptdesc);
}
+#endif
-static inline int __tlb_remove_page(struct mmu_gather *tlb, struct page *page)
+#if CONFIG_PGTABLE_LEVELS > 3
+static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pudp,
+ unsigned long addr)
{
- tlb->pages[tlb->nr++] = page;
- VM_BUG_ON(tlb->nr > tlb->max);
- return tlb->max - tlb->nr;
-}
+ struct ptdesc *ptdesc = virt_to_ptdesc(pudp);
-static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
-{
- if (!__tlb_remove_page(tlb, page))
- tlb_flush_mmu(tlb);
-}
+ if (!pgtable_l4_enabled())
+ return;
-static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte,
- unsigned long addr)
-{
- pgtable_page_dtor(pte);
- tlb_add_flush(tlb, addr);
- tlb_remove_page(tlb, pte);
+ tlb_remove_ptdesc(tlb, ptdesc);
}
+#endif
-#ifndef CONFIG_ARM64_64K_PAGES
-static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp,
+#if CONFIG_PGTABLE_LEVELS > 4
+static inline void __p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4dp,
unsigned long addr)
{
- tlb_add_flush(tlb, addr);
- tlb_remove_page(tlb, virt_to_page(pmdp));
-}
-#endif
-
-#define pte_free_tlb(tlb, ptep, addr) __pte_free_tlb(tlb, ptep, addr)
-#define pmd_free_tlb(tlb, pmdp, addr) __pmd_free_tlb(tlb, pmdp, addr)
-#define pud_free_tlb(tlb, pudp, addr) pud_free((tlb)->mm, pudp)
+ struct ptdesc *ptdesc = virt_to_ptdesc(p4dp);
-#define tlb_migrate_finish(mm) do { } while (0)
+ if (!pgtable_l5_enabled())
+ return;
-static inline void
-tlb_remove_pmd_tlb_entry(struct mmu_gather *tlb, pmd_t *pmdp, unsigned long addr)
-{
- tlb_add_flush(tlb, addr);
+ tlb_remove_ptdesc(tlb, ptdesc);
}
+#endif
#endif
diff --git a/arch/arm64/include/asm/tlbbatch.h b/arch/arm64/include/asm/tlbbatch.h
new file mode 100644
index 000000000000..fedb0b87b8db
--- /dev/null
+++ b/arch/arm64/include/asm/tlbbatch.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ARCH_ARM64_TLBBATCH_H
+#define _ARCH_ARM64_TLBBATCH_H
+
+struct arch_tlbflush_unmap_batch {
+ /*
+ * For arm64, HW can do tlb shootdown, so we don't
+ * need to record cpumask for sending IPI
+ */
+};
+
+#endif /* _ARCH_ARM64_TLBBATCH_H */
diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h
index 8b482035cfc2..a2d65d7d6aae 100644
--- a/arch/arm64/include/asm/tlbflush.h
+++ b/arch/arm64/include/asm/tlbflush.h
@@ -1,123 +1,611 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Based on arch/arm/include/asm/tlbflush.h
*
* Copyright (C) 1999-2003 Russell King
* Copyright (C) 2012 ARM Ltd.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __ASM_TLBFLUSH_H
#define __ASM_TLBFLUSH_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
+#include <linux/bitfield.h>
+#include <linux/mm_types.h>
#include <linux/sched.h>
+#include <linux/mmu_notifier.h>
#include <asm/cputype.h>
+#include <asm/mmu.h>
+
+/*
+ * Raw TLBI operations.
+ *
+ * Where necessary, use the __tlbi() macro to avoid asm()
+ * boilerplate. Drivers and most kernel code should use the TLB
+ * management routines in preference to the macro below.
+ *
+ * The macro can be used as __tlbi(op) or __tlbi(op, arg), depending
+ * on whether a particular TLBI operation takes an argument or
+ * not. The macros handles invoking the asm with or without the
+ * register argument as appropriate.
+ */
+#define __TLBI_0(op, arg) asm (ARM64_ASM_PREAMBLE \
+ "tlbi " #op "\n" \
+ ALTERNATIVE("nop\n nop", \
+ "dsb ish\n tlbi " #op, \
+ ARM64_WORKAROUND_REPEAT_TLBI, \
+ CONFIG_ARM64_WORKAROUND_REPEAT_TLBI) \
+ : : )
+
+#define __TLBI_1(op, arg) asm (ARM64_ASM_PREAMBLE \
+ "tlbi " #op ", %0\n" \
+ ALTERNATIVE("nop\n nop", \
+ "dsb ish\n tlbi " #op ", %0", \
+ ARM64_WORKAROUND_REPEAT_TLBI, \
+ CONFIG_ARM64_WORKAROUND_REPEAT_TLBI) \
+ : : "r" (arg))
+
+#define __TLBI_N(op, arg, n, ...) __TLBI_##n(op, arg)
+
+#define __tlbi(op, ...) __TLBI_N(op, ##__VA_ARGS__, 1, 0)
+
+#define __tlbi_user(op, arg) do { \
+ if (arm64_kernel_unmapped_at_el0()) \
+ __tlbi(op, (arg) | USER_ASID_FLAG); \
+} while (0)
+
+/* This macro creates a properly formatted VA operand for the TLBI */
+#define __TLBI_VADDR(addr, asid) \
+ ({ \
+ unsigned long __ta = (addr) >> 12; \
+ __ta &= GENMASK_ULL(43, 0); \
+ __ta |= (unsigned long)(asid) << 48; \
+ __ta; \
+ })
+
+/*
+ * Get translation granule of the system, which is decided by
+ * PAGE_SIZE. Used by TTL.
+ * - 4KB : 1
+ * - 16KB : 2
+ * - 64KB : 3
+ */
+#define TLBI_TTL_TG_4K 1
+#define TLBI_TTL_TG_16K 2
+#define TLBI_TTL_TG_64K 3
+
+static inline unsigned long get_trans_granule(void)
+{
+ switch (PAGE_SIZE) {
+ case SZ_4K:
+ return TLBI_TTL_TG_4K;
+ case SZ_16K:
+ return TLBI_TTL_TG_16K;
+ case SZ_64K:
+ return TLBI_TTL_TG_64K;
+ default:
+ return 0;
+ }
+}
-extern void __cpu_flush_user_tlb_range(unsigned long, unsigned long, struct vm_area_struct *);
-extern void __cpu_flush_kern_tlb_range(unsigned long, unsigned long);
+/*
+ * Level-based TLBI operations.
+ *
+ * When ARMv8.4-TTL exists, TLBI operations take an additional hint for
+ * the level at which the invalidation must take place. If the level is
+ * wrong, no invalidation may take place. In the case where the level
+ * cannot be easily determined, the value TLBI_TTL_UNKNOWN will perform
+ * a non-hinted invalidation. Any provided level outside the hint range
+ * will also cause fall-back to non-hinted invalidation.
+ *
+ * For Stage-2 invalidation, use the level values provided to that effect
+ * in asm/stage2_pgtable.h.
+ */
+#define TLBI_TTL_MASK GENMASK_ULL(47, 44)
+
+#define TLBI_TTL_UNKNOWN INT_MAX
-extern struct cpu_tlb_fns cpu_tlb;
+#define __tlbi_level(op, addr, level) do { \
+ u64 arg = addr; \
+ \
+ if (alternative_has_cap_unlikely(ARM64_HAS_ARMv8_4_TTL) && \
+ level >= 0 && level <= 3) { \
+ u64 ttl = level & 3; \
+ ttl |= get_trans_granule() << 2; \
+ arg &= ~TLBI_TTL_MASK; \
+ arg |= FIELD_PREP(TLBI_TTL_MASK, ttl); \
+ } \
+ \
+ __tlbi(op, arg); \
+} while(0)
+
+#define __tlbi_user_level(op, arg, level) do { \
+ if (arm64_kernel_unmapped_at_el0()) \
+ __tlbi_level(op, (arg | USER_ASID_FLAG), level); \
+} while (0)
/*
- * TLB Management
- * ==============
+ * This macro creates a properly formatted VA operand for the TLB RANGE. The
+ * value bit assignments are:
*
- * The arch/arm64/mm/tlb.S files implement these methods.
+ * +----------+------+-------+-------+-------+----------------------+
+ * | ASID | TG | SCALE | NUM | TTL | BADDR |
+ * +-----------------+-------+-------+-------+----------------------+
+ * |63 48|47 46|45 44|43 39|38 37|36 0|
*
- * The TLB specific code is expected to perform whatever tests it needs
- * to determine if it should invalidate the TLB for each call. Start
- * addresses are inclusive and end addresses are exclusive; it is safe to
- * round these addresses down.
+ * The address range is determined by below formula: [BADDR, BADDR + (NUM + 1) *
+ * 2^(5*SCALE + 1) * PAGESIZE)
+ *
+ * Note that the first argument, baddr, is pre-shifted; If LPA2 is in use, BADDR
+ * holds addr[52:16]. Else BADDR holds page number. See for example ARM DDI
+ * 0487J.a section C5.5.60 "TLBI VAE1IS, TLBI VAE1ISNXS, TLB Invalidate by VA,
+ * EL1, Inner Shareable".
+ *
+ */
+#define TLBIR_ASID_MASK GENMASK_ULL(63, 48)
+#define TLBIR_TG_MASK GENMASK_ULL(47, 46)
+#define TLBIR_SCALE_MASK GENMASK_ULL(45, 44)
+#define TLBIR_NUM_MASK GENMASK_ULL(43, 39)
+#define TLBIR_TTL_MASK GENMASK_ULL(38, 37)
+#define TLBIR_BADDR_MASK GENMASK_ULL(36, 0)
+
+#define __TLBI_VADDR_RANGE(baddr, asid, scale, num, ttl) \
+ ({ \
+ unsigned long __ta = 0; \
+ unsigned long __ttl = (ttl >= 1 && ttl <= 3) ? ttl : 0; \
+ __ta |= FIELD_PREP(TLBIR_BADDR_MASK, baddr); \
+ __ta |= FIELD_PREP(TLBIR_TTL_MASK, __ttl); \
+ __ta |= FIELD_PREP(TLBIR_NUM_MASK, num); \
+ __ta |= FIELD_PREP(TLBIR_SCALE_MASK, scale); \
+ __ta |= FIELD_PREP(TLBIR_TG_MASK, get_trans_granule()); \
+ __ta |= FIELD_PREP(TLBIR_ASID_MASK, asid); \
+ __ta; \
+ })
+
+/* These macros are used by the TLBI RANGE feature. */
+#define __TLBI_RANGE_PAGES(num, scale) \
+ ((unsigned long)((num) + 1) << (5 * (scale) + 1))
+#define MAX_TLBI_RANGE_PAGES __TLBI_RANGE_PAGES(31, 3)
+
+/*
+ * Generate 'num' values from -1 to 31 with -1 rejected by the
+ * __flush_tlb_range() loop below. Its return value is only
+ * significant for a maximum of MAX_TLBI_RANGE_PAGES pages. If
+ * 'pages' is more than that, you must iterate over the overall
+ * range.
+ */
+#define __TLBI_RANGE_NUM(pages, scale) \
+ ({ \
+ int __pages = min((pages), \
+ __TLBI_RANGE_PAGES(31, (scale))); \
+ (__pages >> (5 * (scale) + 1)) - 1; \
+ })
+
+/*
+ * TLB Invalidation
+ * ================
+ *
+ * This header file implements the low-level TLB invalidation routines
+ * (sometimes referred to as "flushing" in the kernel) for arm64.
+ *
+ * Every invalidation operation uses the following template:
+ *
+ * DSB ISHST // Ensure prior page-table updates have completed
+ * TLBI ... // Invalidate the TLB
+ * DSB ISH // Ensure the TLB invalidation has completed
+ * if (invalidated kernel mappings)
+ * ISB // Discard any instructions fetched from the old mapping
*
- * flush_tlb_all()
*
- * Invalidate the entire TLB.
+ * The following functions form part of the "core" TLB invalidation API,
+ * as documented in Documentation/core-api/cachetlb.rst:
+ *
+ * flush_tlb_all()
+ * Invalidate the entire TLB (kernel + user) on all CPUs
*
* flush_tlb_mm(mm)
+ * Invalidate an entire user address space on all CPUs.
+ * The 'mm' argument identifies the ASID to invalidate.
+ *
+ * flush_tlb_range(vma, start, end)
+ * Invalidate the virtual-address range '[start, end)' on all
+ * CPUs for the user address space corresponding to 'vma->mm'.
+ * Note that this operation also invalidates any walk-cache
+ * entries associated with translations for the specified address
+ * range.
+ *
+ * flush_tlb_kernel_range(start, end)
+ * Same as flush_tlb_range(..., start, end), but applies to
+ * kernel mappings rather than a particular user address space.
+ * Whilst not explicitly documented, this function is used when
+ * unmapping pages from vmalloc/io space.
*
- * Invalidate all TLB entries in a particular address space.
- * - mm - mm_struct describing address space
+ * flush_tlb_page(vma, addr)
+ * Invalidate a single user mapping for address 'addr' in the
+ * address space corresponding to 'vma->mm'. Note that this
+ * operation only invalidates a single, last-level page-table
+ * entry and therefore does not affect any walk-caches.
*
- * flush_tlb_range(mm,start,end)
*
- * Invalidate a range of TLB entries in the specified address
- * space.
- * - mm - mm_struct describing address space
- * - start - start address (may not be aligned)
- * - end - end address (exclusive, may not be aligned)
+ * Next, we have some undocumented invalidation routines that you probably
+ * don't want to call unless you know what you're doing:
*
- * flush_tlb_page(vaddr,vma)
+ * local_flush_tlb_all()
+ * Same as flush_tlb_all(), but only applies to the calling CPU.
*
- * Invalidate the specified page in the specified address range.
- * - vaddr - virtual address (may not be aligned)
- * - vma - vma_struct describing address range
+ * __flush_tlb_kernel_pgtable(addr)
+ * Invalidate a single kernel mapping for address 'addr' on all
+ * CPUs, ensuring that any walk-cache entries associated with the
+ * translation are also invalidated.
*
- * flush_kern_tlb_page(kaddr)
+ * __flush_tlb_range(vma, start, end, stride, last_level, tlb_level)
+ * Invalidate the virtual-address range '[start, end)' on all
+ * CPUs for the user address space corresponding to 'vma->mm'.
+ * The invalidation operations are issued at a granularity
+ * determined by 'stride' and only affect any walk-cache entries
+ * if 'last_level' is equal to false. tlb_level is the level at
+ * which the invalidation must take place. If the level is wrong,
+ * no invalidation may take place. In the case where the level
+ * cannot be easily determined, the value TLBI_TTL_UNKNOWN will
+ * perform a non-hinted invalidation.
*
- * Invalidate the TLB entry for the specified page. The address
- * will be in the kernels virtual memory space. Current uses
- * only require the D-TLB to be invalidated.
- * - kaddr - Kernel virtual memory address
+ * local_flush_tlb_page(vma, addr)
+ * Local variant of flush_tlb_page(). Stale TLB entries may
+ * remain in remote CPUs.
+ *
+ * local_flush_tlb_page_nonotify(vma, addr)
+ * Same as local_flush_tlb_page() except MMU notifier will not be
+ * called.
+ *
+ * local_flush_tlb_contpte(vma, addr)
+ * Invalidate the virtual-address range
+ * '[addr, addr+CONT_PTE_SIZE)' mapped with contpte on local CPU
+ * for the user address space corresponding to 'vma->mm'. Stale
+ * TLB entries may remain in remote CPUs.
+ *
+ * Finally, take a look at asm/tlb.h to see how tlb_flush() is implemented
+ * on top of these routines, since that is our interface to the mmu_gather
+ * API as used by munmap() and friends.
*/
+static inline void local_flush_tlb_all(void)
+{
+ dsb(nshst);
+ __tlbi(vmalle1);
+ dsb(nsh);
+ isb();
+}
+
static inline void flush_tlb_all(void)
{
- dsb();
- asm("tlbi vmalle1is");
- dsb();
+ dsb(ishst);
+ __tlbi(vmalle1is);
+ dsb(ish);
isb();
}
static inline void flush_tlb_mm(struct mm_struct *mm)
{
- unsigned long asid = (unsigned long)ASID(mm) << 48;
+ unsigned long asid;
+
+ dsb(ishst);
+ asid = __TLBI_VADDR(0, ASID(mm));
+ __tlbi(aside1is, asid);
+ __tlbi_user(aside1is, asid);
+ dsb(ish);
+ mmu_notifier_arch_invalidate_secondary_tlbs(mm, 0, -1UL);
+}
+
+static inline void __local_flush_tlb_page_nonotify_nosync(struct mm_struct *mm,
+ unsigned long uaddr)
+{
+ unsigned long addr;
+
+ dsb(nshst);
+ addr = __TLBI_VADDR(uaddr, ASID(mm));
+ __tlbi(vale1, addr);
+ __tlbi_user(vale1, addr);
+}
- dsb();
- asm("tlbi aside1is, %0" : : "r" (asid));
- dsb();
+static inline void local_flush_tlb_page_nonotify(struct vm_area_struct *vma,
+ unsigned long uaddr)
+{
+ __local_flush_tlb_page_nonotify_nosync(vma->vm_mm, uaddr);
+ dsb(nsh);
+}
+
+static inline void local_flush_tlb_page(struct vm_area_struct *vma,
+ unsigned long uaddr)
+{
+ __local_flush_tlb_page_nonotify_nosync(vma->vm_mm, uaddr);
+ mmu_notifier_arch_invalidate_secondary_tlbs(vma->vm_mm, uaddr & PAGE_MASK,
+ (uaddr & PAGE_MASK) + PAGE_SIZE);
+ dsb(nsh);
+}
+
+static inline void __flush_tlb_page_nosync(struct mm_struct *mm,
+ unsigned long uaddr)
+{
+ unsigned long addr;
+
+ dsb(ishst);
+ addr = __TLBI_VADDR(uaddr, ASID(mm));
+ __tlbi(vale1is, addr);
+ __tlbi_user(vale1is, addr);
+ mmu_notifier_arch_invalidate_secondary_tlbs(mm, uaddr & PAGE_MASK,
+ (uaddr & PAGE_MASK) + PAGE_SIZE);
+}
+
+static inline void flush_tlb_page_nosync(struct vm_area_struct *vma,
+ unsigned long uaddr)
+{
+ return __flush_tlb_page_nosync(vma->vm_mm, uaddr);
}
static inline void flush_tlb_page(struct vm_area_struct *vma,
unsigned long uaddr)
{
- unsigned long addr = uaddr >> 12 |
- ((unsigned long)ASID(vma->vm_mm) << 48);
+ flush_tlb_page_nosync(vma, uaddr);
+ dsb(ish);
+}
- dsb();
- asm("tlbi vae1is, %0" : : "r" (addr));
- dsb();
+static inline bool arch_tlbbatch_should_defer(struct mm_struct *mm)
+{
+ /*
+ * TLB flush deferral is not required on systems which are affected by
+ * ARM64_WORKAROUND_REPEAT_TLBI, as __tlbi()/__tlbi_user() implementation
+ * will have two consecutive TLBI instructions with a dsb(ish) in between
+ * defeating the purpose (i.e save overall 'dsb ish' cost).
+ */
+ if (alternative_has_cap_unlikely(ARM64_WORKAROUND_REPEAT_TLBI))
+ return false;
+
+ return true;
}
/*
- * Convert calls to our calling convention.
+ * To support TLB batched flush for multiple pages unmapping, we only send
+ * the TLBI for each page in arch_tlbbatch_add_pending() and wait for the
+ * completion at the end in arch_tlbbatch_flush(). Since we've already issued
+ * TLBI for each page so only a DSB is needed to synchronise its effect on the
+ * other CPUs.
+ *
+ * This will save the time waiting on DSB comparing issuing a TLBI;DSB sequence
+ * for each page.
*/
-#define flush_tlb_range(vma,start,end) __cpu_flush_user_tlb_range(start,end,vma)
-#define flush_tlb_kernel_range(s,e) __cpu_flush_kern_tlb_range(s,e)
+static inline void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
+{
+ dsb(ish);
+}
/*
- * On AArch64, the cache coherency is handled via the set_pte_at() function.
+ * This is meant to avoid soft lock-ups on large TLB flushing ranges and not
+ * necessarily a performance improvement.
*/
-static inline void update_mmu_cache(struct vm_area_struct *vma,
- unsigned long addr, pte_t *ptep)
+#define MAX_DVM_OPS PTRS_PER_PTE
+
+/*
+ * __flush_tlb_range_op - Perform TLBI operation upon a range
+ *
+ * @op: TLBI instruction that operates on a range (has 'r' prefix)
+ * @start: The start address of the range
+ * @pages: Range as the number of pages from 'start'
+ * @stride: Flush granularity
+ * @asid: The ASID of the task (0 for IPA instructions)
+ * @tlb_level: Translation Table level hint, if known
+ * @tlbi_user: If 'true', call an additional __tlbi_user()
+ * (typically for user ASIDs). 'flase' for IPA instructions
+ * @lpa2: If 'true', the lpa2 scheme is used as set out below
+ *
+ * When the CPU does not support TLB range operations, flush the TLB
+ * entries one by one at the granularity of 'stride'. If the TLB
+ * range ops are supported, then:
+ *
+ * 1. If FEAT_LPA2 is in use, the start address of a range operation must be
+ * 64KB aligned, so flush pages one by one until the alignment is reached
+ * using the non-range operations. This step is skipped if LPA2 is not in
+ * use.
+ *
+ * 2. The minimum range granularity is decided by 'scale', so multiple range
+ * TLBI operations may be required. Start from scale = 3, flush the largest
+ * possible number of pages ((num+1)*2^(5*scale+1)) that fit into the
+ * requested range, then decrement scale and continue until one or zero pages
+ * are left. We must start from highest scale to ensure 64KB start alignment
+ * is maintained in the LPA2 case.
+ *
+ * 3. If there is 1 page remaining, flush it through non-range operations. Range
+ * operations can only span an even number of pages. We save this for last to
+ * ensure 64KB start alignment is maintained for the LPA2 case.
+ */
+#define __flush_tlb_range_op(op, start, pages, stride, \
+ asid, tlb_level, tlbi_user, lpa2) \
+do { \
+ typeof(start) __flush_start = start; \
+ typeof(pages) __flush_pages = pages; \
+ int num = 0; \
+ int scale = 3; \
+ int shift = lpa2 ? 16 : PAGE_SHIFT; \
+ unsigned long addr; \
+ \
+ while (__flush_pages > 0) { \
+ if (!system_supports_tlb_range() || \
+ __flush_pages == 1 || \
+ (lpa2 && __flush_start != ALIGN(__flush_start, SZ_64K))) { \
+ addr = __TLBI_VADDR(__flush_start, asid); \
+ __tlbi_level(op, addr, tlb_level); \
+ if (tlbi_user) \
+ __tlbi_user_level(op, addr, tlb_level); \
+ __flush_start += stride; \
+ __flush_pages -= stride >> PAGE_SHIFT; \
+ continue; \
+ } \
+ \
+ num = __TLBI_RANGE_NUM(__flush_pages, scale); \
+ if (num >= 0) { \
+ addr = __TLBI_VADDR_RANGE(__flush_start >> shift, asid, \
+ scale, num, tlb_level); \
+ __tlbi(r##op, addr); \
+ if (tlbi_user) \
+ __tlbi_user(r##op, addr); \
+ __flush_start += __TLBI_RANGE_PAGES(num, scale) << PAGE_SHIFT; \
+ __flush_pages -= __TLBI_RANGE_PAGES(num, scale);\
+ } \
+ scale--; \
+ } \
+} while (0)
+
+#define __flush_s2_tlb_range_op(op, start, pages, stride, tlb_level) \
+ __flush_tlb_range_op(op, start, pages, stride, 0, tlb_level, false, kvm_lpa2_is_enabled());
+
+static inline bool __flush_tlb_range_limit_excess(unsigned long start,
+ unsigned long end, unsigned long pages, unsigned long stride)
{
/*
- * set_pte() does not have a DSB, so make sure that the page table
- * write is visible.
+ * When the system does not support TLB range based flush
+ * operation, (MAX_DVM_OPS - 1) pages can be handled. But
+ * with TLB range based operation, MAX_TLBI_RANGE_PAGES
+ * pages can be handled.
*/
- dsb();
+ if ((!system_supports_tlb_range() &&
+ (end - start) >= (MAX_DVM_OPS * stride)) ||
+ pages > MAX_TLBI_RANGE_PAGES)
+ return true;
+
+ return false;
+}
+
+static inline void __flush_tlb_range_nosync(struct mm_struct *mm,
+ unsigned long start, unsigned long end,
+ unsigned long stride, bool last_level,
+ int tlb_level)
+{
+ unsigned long asid, pages;
+
+ start = round_down(start, stride);
+ end = round_up(end, stride);
+ pages = (end - start) >> PAGE_SHIFT;
+
+ if (__flush_tlb_range_limit_excess(start, end, pages, stride)) {
+ flush_tlb_mm(mm);
+ return;
+ }
+
+ dsb(ishst);
+ asid = ASID(mm);
+
+ if (last_level)
+ __flush_tlb_range_op(vale1is, start, pages, stride, asid,
+ tlb_level, true, lpa2_is_enabled());
+ else
+ __flush_tlb_range_op(vae1is, start, pages, stride, asid,
+ tlb_level, true, lpa2_is_enabled());
+
+ mmu_notifier_arch_invalidate_secondary_tlbs(mm, start, end);
+}
+
+static inline void __flush_tlb_range(struct vm_area_struct *vma,
+ unsigned long start, unsigned long end,
+ unsigned long stride, bool last_level,
+ int tlb_level)
+{
+ __flush_tlb_range_nosync(vma->vm_mm, start, end, stride,
+ last_level, tlb_level);
+ dsb(ish);
+}
+
+static inline void local_flush_tlb_contpte(struct vm_area_struct *vma,
+ unsigned long addr)
+{
+ unsigned long asid;
+
+ addr = round_down(addr, CONT_PTE_SIZE);
+
+ dsb(nshst);
+ asid = ASID(vma->vm_mm);
+ __flush_tlb_range_op(vale1, addr, CONT_PTES, PAGE_SIZE, asid,
+ 3, true, lpa2_is_enabled());
+ mmu_notifier_arch_invalidate_secondary_tlbs(vma->vm_mm, addr,
+ addr + CONT_PTE_SIZE);
+ dsb(nsh);
}
-#define update_mmu_cache_pmd(vma, address, pmd) do { } while (0)
+static inline void flush_tlb_range(struct vm_area_struct *vma,
+ unsigned long start, unsigned long end)
+{
+ /*
+ * We cannot use leaf-only invalidation here, since we may be invalidating
+ * table entries as part of collapsing hugepages or moving page tables.
+ * Set the tlb_level to TLBI_TTL_UNKNOWN because we can not get enough
+ * information here.
+ */
+ __flush_tlb_range(vma, start, end, PAGE_SIZE, false, TLBI_TTL_UNKNOWN);
+}
+
+static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end)
+{
+ const unsigned long stride = PAGE_SIZE;
+ unsigned long pages;
+
+ start = round_down(start, stride);
+ end = round_up(end, stride);
+ pages = (end - start) >> PAGE_SHIFT;
+
+ if (__flush_tlb_range_limit_excess(start, end, pages, stride)) {
+ flush_tlb_all();
+ return;
+ }
+
+ dsb(ishst);
+ __flush_tlb_range_op(vaale1is, start, pages, stride, 0,
+ TLBI_TTL_UNKNOWN, false, lpa2_is_enabled());
+ dsb(ish);
+ isb();
+}
+
+/*
+ * Used to invalidate the TLB (walk caches) corresponding to intermediate page
+ * table levels (pgd/pud/pmd).
+ */
+static inline void __flush_tlb_kernel_pgtable(unsigned long kaddr)
+{
+ unsigned long addr = __TLBI_VADDR(kaddr, 0);
+
+ dsb(ishst);
+ __tlbi(vaae1is, addr);
+ dsb(ish);
+ isb();
+}
+
+static inline void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *batch,
+ struct mm_struct *mm, unsigned long start, unsigned long end)
+{
+ __flush_tlb_range_nosync(mm, start, end, PAGE_SIZE, true, 3);
+}
+
+static inline bool __pte_flags_need_flush(ptdesc_t oldval, ptdesc_t newval)
+{
+ ptdesc_t diff = oldval ^ newval;
+
+ /* invalid to valid transition requires no flush */
+ if (!(oldval & PTE_VALID))
+ return false;
+
+ /* Transition in the SW bits requires no flush */
+ diff &= ~PTE_SWBITS_MASK;
+
+ return diff;
+}
+
+static inline bool pte_needs_flush(pte_t oldpte, pte_t newpte)
+{
+ return __pte_flags_need_flush(pte_val(oldpte), pte_val(newpte));
+}
+#define pte_needs_flush pte_needs_flush
+
+static inline bool huge_pmd_needs_flush(pmd_t oldpmd, pmd_t newpmd)
+{
+ return __pte_flags_need_flush(pmd_val(oldpmd), pmd_val(newpmd));
+}
+#define huge_pmd_needs_flush huge_pmd_needs_flush
#endif
diff --git a/arch/arm64/include/asm/topology.h b/arch/arm64/include/asm/topology.h
new file mode 100644
index 000000000000..b9eaf4ad7085
--- /dev/null
+++ b/arch/arm64/include/asm/topology.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_TOPOLOGY_H
+#define __ASM_TOPOLOGY_H
+
+#include <linux/cpumask.h>
+
+#ifdef CONFIG_NUMA
+#include <asm/numa.h>
+
+struct pci_bus;
+int pcibus_to_node(struct pci_bus *bus);
+#define cpumask_of_pcibus(bus) (pcibus_to_node(bus) == -1 ? \
+ cpu_all_mask : \
+ cpumask_of_node(pcibus_to_node(bus)))
+
+#endif /* CONFIG_NUMA */
+
+#include <linux/arch_topology.h>
+
+void update_freq_counters_refs(void);
+
+/* Replace task scheduler's default frequency-invariant accounting */
+#define arch_scale_freq_tick topology_scale_freq_tick
+#define arch_set_freq_scale topology_set_freq_scale
+#define arch_scale_freq_capacity topology_get_freq_scale
+#define arch_scale_freq_invariant topology_scale_freq_invariant
+#define arch_scale_freq_ref topology_get_freq_ref
+
+/* Replace task scheduler's default cpu-invariant accounting */
+#define arch_scale_cpu_capacity topology_get_cpu_scale
+
+/* Enable topology flag updates */
+#define arch_update_cpu_topology topology_update_cpu_topology
+
+/* Replace task scheduler's default HW pressure API */
+#define arch_scale_hw_pressure topology_get_hw_pressure
+#define arch_update_hw_pressure topology_update_hw_pressure
+
+#undef arch_cpu_is_threaded
+#define arch_cpu_is_threaded() (read_cpuid_mpidr() & MPIDR_MT_BITMASK)
+
+#include <asm-generic/topology.h>
+
+#endif /* _ASM_ARM_TOPOLOGY_H */
diff --git a/arch/arm64/include/asm/trans_pgd.h b/arch/arm64/include/asm/trans_pgd.h
new file mode 100644
index 000000000000..033d400a4ea4
--- /dev/null
+++ b/arch/arm64/include/asm/trans_pgd.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/*
+ * Copyright (c) 2021, Microsoft Corporation.
+ * Pasha Tatashin <pasha.tatashin@soleen.com>
+ */
+
+#ifndef _ASM_TRANS_TABLE_H
+#define _ASM_TRANS_TABLE_H
+
+#include <linux/bits.h>
+#include <linux/types.h>
+#include <asm/pgtable-types.h>
+
+/*
+ * trans_alloc_page
+ * - Allocator that should return exactly one zeroed page, if this
+ * allocator fails, trans_pgd_create_copy() and trans_pgd_idmap_page()
+ * return -ENOMEM error.
+ *
+ * trans_alloc_arg
+ * - Passed to trans_alloc_page as an argument
+ */
+
+struct trans_pgd_info {
+ void * (*trans_alloc_page)(void *arg);
+ void *trans_alloc_arg;
+};
+
+int trans_pgd_create_copy(struct trans_pgd_info *info, pgd_t **trans_pgd,
+ unsigned long start, unsigned long end);
+
+int trans_pgd_idmap_page(struct trans_pgd_info *info, phys_addr_t *trans_ttbr0,
+ unsigned long *t0sz, void *page);
+
+int trans_pgd_copy_el2_vectors(struct trans_pgd_info *info,
+ phys_addr_t *el2_vectors);
+
+extern char trans_pgd_stub_vectors[];
+
+#endif /* _ASM_TRANS_TABLE_H */
diff --git a/arch/arm64/include/asm/traps.h b/arch/arm64/include/asm/traps.h
index 10ca8ff93cc2..e92e4a0e48fc 100644
--- a/arch/arm64/include/asm/traps.h
+++ b/arch/arm64/include/asm/traps.h
@@ -1,30 +1,162 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Based on arch/arm/include/asm/traps.h
*
* Copyright (C) 2012 ARM Ltd.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __ASM_TRAP_H
#define __ASM_TRAP_H
-static inline int in_exception_text(unsigned long ptr)
+#include <linux/list.h>
+#include <asm/esr.h>
+#include <asm/ptrace.h>
+#include <asm/sections.h>
+
+#ifdef CONFIG_ARMV8_DEPRECATED
+bool try_emulate_armv8_deprecated(struct pt_regs *regs, u32 insn);
+#else
+static inline bool
+try_emulate_armv8_deprecated(struct pt_regs *regs, u32 insn)
{
- extern char __exception_text_start[];
- extern char __exception_text_end[];
+ return false;
+}
+#endif /* CONFIG_ARMV8_DEPRECATED */
- return ptr >= (unsigned long)&__exception_text_start &&
- ptr < (unsigned long)&__exception_text_end;
+void force_signal_inject(int signal, int code, unsigned long address, unsigned long err);
+void arm64_notify_segfault(unsigned long addr);
+void arm64_force_sig_fault(int signo, int code, unsigned long far, const char *str);
+void arm64_force_sig_fault_pkey(unsigned long far, const char *str, int pkey);
+void arm64_force_sig_mceerr(int code, unsigned long far, short lsb, const char *str);
+void arm64_force_sig_ptrace_errno_trap(int errno, unsigned long far, const char *str);
+
+int bug_brk_handler(struct pt_regs *regs, unsigned long esr);
+int cfi_brk_handler(struct pt_regs *regs, unsigned long esr);
+int reserved_fault_brk_handler(struct pt_regs *regs, unsigned long esr);
+int kasan_brk_handler(struct pt_regs *regs, unsigned long esr);
+int ubsan_brk_handler(struct pt_regs *regs, unsigned long esr);
+
+int early_brk64(unsigned long addr, unsigned long esr, struct pt_regs *regs);
+void dump_kernel_instr(unsigned long kaddr);
+
+/*
+ * Move regs->pc to next instruction and do necessary setup before it
+ * is executed.
+ */
+void arm64_skip_faulting_instruction(struct pt_regs *regs, unsigned long size);
+
+static inline int __in_irqentry_text(unsigned long ptr)
+{
+ return ptr >= (unsigned long)&__irqentry_text_start &&
+ ptr < (unsigned long)&__irqentry_text_end;
+}
+
+static inline int in_entry_text(unsigned long ptr)
+{
+ return ptr >= (unsigned long)&__entry_text_start &&
+ ptr < (unsigned long)&__entry_text_end;
}
+/*
+ * CPUs with the RAS extensions have an Implementation-Defined-Syndrome bit
+ * to indicate whether this ESR has a RAS encoding. CPUs without this feature
+ * have a ISS-Valid bit in the same position.
+ * If this bit is set, we know its not a RAS SError.
+ * If its clear, we need to know if the CPU supports RAS. Uncategorized RAS
+ * errors share the same encoding as an all-zeros encoding from a CPU that
+ * doesn't support RAS.
+ */
+static inline bool arm64_is_ras_serror(unsigned long esr)
+{
+ WARN_ON(preemptible());
+
+ if (esr & ESR_ELx_IDS)
+ return false;
+
+ if (this_cpu_has_cap(ARM64_HAS_RAS_EXTN))
+ return true;
+ else
+ return false;
+}
+
+/*
+ * Return the AET bits from a RAS SError's ESR.
+ *
+ * It is implementation defined whether Uncategorized errors are containable.
+ * We treat them as Uncontainable.
+ * Non-RAS SError's are reported as Uncontained/Uncategorized.
+ */
+static inline unsigned long arm64_ras_serror_get_severity(unsigned long esr)
+{
+ unsigned long aet = esr & ESR_ELx_AET;
+
+ if (!arm64_is_ras_serror(esr)) {
+ /* Not a RAS error, we can't interpret the ESR. */
+ return ESR_ELx_AET_UC;
+ }
+
+ /*
+ * AET is RES0 if 'the value returned in the DFSC field is not
+ * [ESR_ELx_FSC_SERROR]'
+ */
+ if ((esr & ESR_ELx_FSC) != ESR_ELx_FSC_SERROR) {
+ /* No severity information : Uncategorized */
+ return ESR_ELx_AET_UC;
+ }
+
+ return aet;
+}
+
+bool arm64_is_fatal_ras_serror(struct pt_regs *regs, unsigned long esr);
+void __noreturn arm64_serror_panic(struct pt_regs *regs, unsigned long esr);
+
+static inline void arm64_mops_reset_regs(struct user_pt_regs *regs, unsigned long esr)
+{
+ bool wrong_option = esr & ESR_ELx_MOPS_ISS_WRONG_OPTION;
+ bool option_a = esr & ESR_ELx_MOPS_ISS_OPTION_A;
+ int dstreg = ESR_ELx_MOPS_ISS_DESTREG(esr);
+ int srcreg = ESR_ELx_MOPS_ISS_SRCREG(esr);
+ int sizereg = ESR_ELx_MOPS_ISS_SIZEREG(esr);
+ unsigned long dst, size;
+
+ dst = regs->regs[dstreg];
+ size = regs->regs[sizereg];
+
+ /*
+ * Put the registers back in the original format suitable for a
+ * prologue instruction, using the generic return routine from the
+ * Arm ARM (DDI 0487I.a) rules CNTMJ and MWFQH.
+ */
+ if (esr & ESR_ELx_MOPS_ISS_MEM_INST) {
+ /* SET* instruction */
+ if (option_a ^ wrong_option) {
+ /* Format is from Option A; forward set */
+ regs->regs[dstreg] = dst + size;
+ regs->regs[sizereg] = -size;
+ }
+ } else {
+ /* CPY* instruction */
+ unsigned long src = regs->regs[srcreg];
+ if (!(option_a ^ wrong_option)) {
+ /* Format is from Option B */
+ if (regs->pstate & PSR_N_BIT) {
+ /* Backward copy */
+ regs->regs[dstreg] = dst - size;
+ regs->regs[srcreg] = src - size;
+ }
+ } else {
+ /* Format is from Option A */
+ if (size & BIT(63)) {
+ /* Forward copy */
+ regs->regs[dstreg] = dst + size;
+ regs->regs[srcreg] = src + size;
+ regs->regs[sizereg] = -size;
+ }
+ }
+ }
+
+ if (esr & ESR_ELx_MOPS_ISS_FROM_EPILOGUE)
+ regs->pc -= 8;
+ else
+ regs->pc -= 4;
+}
#endif
diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h
index edb3d5c73a32..6490930deef8 100644
--- a/arch/arm64/include/asm/uaccess.h
+++ b/arch/arm64/include/asm/uaccess.h
@@ -1,105 +1,180 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Based on arch/arm/include/asm/uaccess.h
*
* Copyright (C) 2012 ARM Ltd.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __ASM_UACCESS_H
#define __ASM_UACCESS_H
+#include <asm/alternative.h>
+#include <asm/kernel-pgtable.h>
+#include <asm/sysreg.h>
+
/*
* User space memory access functions
*/
+#include <linux/bitops.h>
+#include <linux/kasan-checks.h>
#include <linux/string.h>
-#include <linux/thread_info.h>
+#include <asm/asm-extable.h>
+#include <asm/cpufeature.h>
+#include <asm/mmu.h>
+#include <asm/mte.h>
#include <asm/ptrace.h>
-#include <asm/errno.h>
#include <asm/memory.h>
-#include <asm/compiler.h>
+#include <asm/extable.h>
-#define VERIFY_READ 0
-#define VERIFY_WRITE 1
+static inline int __access_ok(const void __user *ptr, unsigned long size);
/*
- * The exception table consists of pairs of addresses: the first is the
- * address of an instruction that is allowed to fault, and the second is
- * the address at which the program should continue. No registers are
- * modified, so it is entirely up to the continuation code to figure out
- * what to do.
+ * Test whether a block of memory is a valid user space address.
+ * Returns 1 if the range is valid, 0 otherwise.
*
- * All the routines below use bits of fixup code that are out of line
- * with the main instruction path. This means when everything is well,
- * we don't even have to jump over them. Further, they do not intrude
- * on our cache or tlb entries.
+ * This is equivalent to the following test:
+ * (u65)addr + (u65)size <= (u65)TASK_SIZE_MAX
*/
+static inline int access_ok(const void __user *addr, unsigned long size)
+{
+ /*
+ * Asynchronous I/O running in a kernel thread does not have the
+ * TIF_TAGGED_ADDR flag of the process owning the mm, so always untag
+ * the user address before checking.
+ */
+ if (IS_ENABLED(CONFIG_ARM64_TAGGED_ADDR_ABI) &&
+ (current->flags & PF_KTHREAD || test_thread_flag(TIF_TAGGED_ADDR)))
+ addr = untagged_addr(addr);
+
+ return likely(__access_ok(addr, size));
+}
+#define access_ok access_ok
+
+#include <asm-generic/access_ok.h>
-struct exception_table_entry
+/*
+ * User access enabling/disabling.
+ */
+#ifdef CONFIG_ARM64_SW_TTBR0_PAN
+static inline void __uaccess_ttbr0_disable(void)
{
- unsigned long insn, fixup;
-};
+ unsigned long flags, ttbr;
+
+ local_irq_save(flags);
+ ttbr = read_sysreg(ttbr1_el1);
+ ttbr &= ~TTBR_ASID_MASK;
+ /* reserved_pg_dir placed before swapper_pg_dir */
+ write_sysreg(ttbr - RESERVED_SWAPPER_OFFSET, ttbr0_el1);
+ /* Set reserved ASID */
+ write_sysreg(ttbr, ttbr1_el1);
+ isb();
+ local_irq_restore(flags);
+}
-extern int fixup_exception(struct pt_regs *regs);
+static inline void __uaccess_ttbr0_enable(void)
+{
+ unsigned long flags, ttbr0, ttbr1;
+
+ /*
+ * Disable interrupts to avoid preemption between reading the 'ttbr0'
+ * variable and the MSR. A context switch could trigger an ASID
+ * roll-over and an update of 'ttbr0'.
+ */
+ local_irq_save(flags);
+ ttbr0 = READ_ONCE(current_thread_info()->ttbr0);
+
+ /* Restore active ASID */
+ ttbr1 = read_sysreg(ttbr1_el1);
+ ttbr1 &= ~TTBR_ASID_MASK; /* safety measure */
+ ttbr1 |= ttbr0 & TTBR_ASID_MASK;
+ write_sysreg(ttbr1, ttbr1_el1);
+
+ /* Restore user page table */
+ write_sysreg(ttbr0, ttbr0_el1);
+ isb();
+ local_irq_restore(flags);
+}
-#define KERNEL_DS (-1UL)
-#define get_ds() (KERNEL_DS)
+static inline bool uaccess_ttbr0_disable(void)
+{
+ if (!system_uses_ttbr0_pan())
+ return false;
+ __uaccess_ttbr0_disable();
+ return true;
+}
-#define USER_DS TASK_SIZE_64
-#define get_fs() (current_thread_info()->addr_limit)
+static inline bool uaccess_ttbr0_enable(void)
+{
+ if (!system_uses_ttbr0_pan())
+ return false;
+ __uaccess_ttbr0_enable();
+ return true;
+}
+#else
+static inline bool uaccess_ttbr0_disable(void)
+{
+ return false;
+}
-static inline void set_fs(mm_segment_t fs)
+static inline bool uaccess_ttbr0_enable(void)
{
- current_thread_info()->addr_limit = fs;
+ return false;
}
+#endif
-#define segment_eq(a,b) ((a) == (b))
+static inline void __uaccess_disable_hw_pan(void)
+{
+ asm(ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_HAS_PAN,
+ CONFIG_ARM64_PAN));
+}
-/*
- * Return 1 if addr < current->addr_limit, 0 otherwise.
- */
-#define __addr_ok(addr) \
-({ \
- unsigned long flag; \
- asm("cmp %1, %0; cset %0, lo" \
- : "=&r" (flag) \
- : "r" (addr), "0" (current_thread_info()->addr_limit) \
- : "cc"); \
- flag; \
-})
+static inline void __uaccess_enable_hw_pan(void)
+{
+ asm(ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_HAS_PAN,
+ CONFIG_ARM64_PAN));
+}
+
+static inline void uaccess_disable_privileged(void)
+{
+ mte_disable_tco();
+
+ if (uaccess_ttbr0_disable())
+ return;
+
+ __uaccess_enable_hw_pan();
+}
+
+static inline void uaccess_enable_privileged(void)
+{
+ mte_enable_tco();
+
+ if (uaccess_ttbr0_enable())
+ return;
+
+ __uaccess_disable_hw_pan();
+}
/*
- * Test whether a block of memory is a valid user space address.
- * Returns 1 if the range is valid, 0 otherwise.
+ * Sanitize a uaccess pointer such that it cannot reach any kernel address.
*
- * This is equivalent to the following test:
- * (u65)addr + (u65)size < (u65)current->addr_limit
- *
- * This needs 65-bit arithmetic.
+ * Clearing bit 55 ensures the pointer cannot address any portion of the TTBR1
+ * address range (i.e. any kernel address), and either the pointer falls within
+ * the TTBR0 address range or must cause a fault.
*/
-#define __range_ok(addr, size) \
-({ \
- unsigned long flag, roksum; \
- __chk_user_ptr(addr); \
- asm("adds %1, %1, %3; ccmp %1, %4, #2, cc; cset %0, cc" \
- : "=&r" (flag), "=&r" (roksum) \
- : "1" (addr), "Ir" (size), \
- "r" (current_thread_info()->addr_limit) \
- : "cc"); \
- flag; \
-})
+#define uaccess_mask_ptr(ptr) (__typeof__(ptr))__uaccess_mask_ptr(ptr)
+static inline void __user *__uaccess_mask_ptr(const void __user *ptr)
+{
+ void __user *safe_ptr;
+
+ asm volatile(
+ " bic %0, %1, %2\n"
+ : "=r" (safe_ptr)
+ : "r" (ptr),
+ "i" (BIT(55))
+ );
-#define access_ok(type, addr, size) __range_ok(addr, size)
+ return safe_ptr;
+}
/*
* The "__xxx" versions of the user access functions do not verify the address
@@ -109,189 +184,322 @@ static inline void set_fs(mm_segment_t fs)
* The "__xxx_error" versions set the third argument to -EFAULT if an error
* occurs, and leave it unchanged on success.
*/
-#define __get_user_asm(instr, reg, x, addr, err) \
+#ifdef CONFIG_CC_HAS_ASM_GOTO_OUTPUT
+#define __get_mem_asm(load, reg, x, addr, label, type) \
+ asm_goto_output( \
+ "1: " load " " reg "0, [%1]\n" \
+ _ASM_EXTABLE_##type##ACCESS(1b, %l2) \
+ : "=r" (x) \
+ : "r" (addr) : : label)
+#else
+#define __get_mem_asm(load, reg, x, addr, label, type) do { \
+ int __gma_err = 0; \
asm volatile( \
- "1: " instr " " reg "1, [%2]\n" \
+ "1: " load " " reg "1, [%2]\n" \
"2:\n" \
- " .section .fixup, \"ax\"\n" \
- " .align 2\n" \
- "3: mov %w0, %3\n" \
- " mov %1, #0\n" \
- " b 2b\n" \
- " .previous\n" \
- " .section __ex_table,\"a\"\n" \
- " .align 3\n" \
- " .quad 1b, 3b\n" \
- " .previous" \
- : "+r" (err), "=&r" (x) \
- : "r" (addr), "i" (-EFAULT))
-
-#define __get_user_err(x, ptr, err) \
+ _ASM_EXTABLE_##type##ACCESS_ERR_ZERO(1b, 2b, %w0, %w1) \
+ : "+r" (__gma_err), "=r" (x) \
+ : "r" (addr)); \
+ if (__gma_err) goto label; } while (0)
+#endif
+
+#define __raw_get_mem(ldr, x, ptr, label, type) \
+do { \
+ unsigned long __gu_val; \
+ switch (sizeof(*(ptr))) { \
+ case 1: \
+ __get_mem_asm(ldr "b", "%w", __gu_val, (ptr), label, type); \
+ break; \
+ case 2: \
+ __get_mem_asm(ldr "h", "%w", __gu_val, (ptr), label, type); \
+ break; \
+ case 4: \
+ __get_mem_asm(ldr, "%w", __gu_val, (ptr), label, type); \
+ break; \
+ case 8: \
+ __get_mem_asm(ldr, "%x", __gu_val, (ptr), label, type); \
+ break; \
+ default: \
+ BUILD_BUG(); \
+ } \
+ (x) = (__force __typeof__(*(ptr)))__gu_val; \
+} while (0)
+
+/*
+ * We must not call into the scheduler between uaccess_ttbr0_enable() and
+ * uaccess_ttbr0_disable(). As `x` and `ptr` could contain blocking functions,
+ * we must evaluate these outside of the critical section.
+ */
+#define __raw_get_user(x, ptr, label) \
do { \
- unsigned long __gu_val; \
+ __typeof__(*(ptr)) __user *__rgu_ptr = (ptr); \
+ __typeof__(x) __rgu_val; \
__chk_user_ptr(ptr); \
- switch (sizeof(*(ptr))) { \
- case 1: \
- __get_user_asm("ldrb", "%w", __gu_val, (ptr), (err)); \
+ do { \
+ __label__ __rgu_failed; \
+ uaccess_ttbr0_enable(); \
+ __raw_get_mem("ldtr", __rgu_val, __rgu_ptr, __rgu_failed, U); \
+ uaccess_ttbr0_disable(); \
+ (x) = __rgu_val; \
break; \
- case 2: \
- __get_user_asm("ldrh", "%w", __gu_val, (ptr), (err)); \
- break; \
- case 4: \
- __get_user_asm("ldr", "%w", __gu_val, (ptr), (err)); \
- break; \
- case 8: \
- __get_user_asm("ldr", "%", __gu_val, (ptr), (err)); \
- break; \
- default: \
- BUILD_BUG(); \
+ __rgu_failed: \
+ uaccess_ttbr0_disable(); \
+ goto label; \
+ } while (0); \
+} while (0)
+
+#define __get_user_error(x, ptr, err) \
+do { \
+ __label__ __gu_failed; \
+ __typeof__(*(ptr)) __user *__p = (ptr); \
+ might_fault(); \
+ if (access_ok(__p, sizeof(*__p))) { \
+ __p = uaccess_mask_ptr(__p); \
+ __raw_get_user((x), __p, __gu_failed); \
+ } else { \
+ __gu_failed: \
+ (x) = (__force __typeof__(x))0; (err) = -EFAULT; \
} \
- (x) = (__typeof__(*(ptr)))__gu_val; \
} while (0)
#define __get_user(x, ptr) \
({ \
int __gu_err = 0; \
- __get_user_err((x), (ptr), __gu_err); \
+ __get_user_error((x), (ptr), __gu_err); \
__gu_err; \
})
-#define __get_user_error(x, ptr, err) \
-({ \
- __get_user_err((x), (ptr), (err)); \
- (void)0; \
-})
-
-#define __get_user_unaligned __get_user
+#define get_user __get_user
-#define get_user(x, ptr) \
-({ \
- might_fault(); \
- access_ok(VERIFY_READ, (ptr), sizeof(*(ptr))) ? \
- __get_user((x), (ptr)) : \
- ((x) = 0, -EFAULT); \
-})
+/*
+ * We must not call into the scheduler between __mte_enable_tco_async() and
+ * __mte_disable_tco_async(). As `dst` and `src` may contain blocking
+ * functions, we must evaluate these outside of the critical section.
+ */
+#define __get_kernel_nofault(dst, src, type, err_label) \
+do { \
+ __typeof__(dst) __gkn_dst = (dst); \
+ __typeof__(src) __gkn_src = (src); \
+ do { \
+ __label__ __gkn_label; \
+ \
+ __mte_enable_tco_async(); \
+ __raw_get_mem("ldr", *((type *)(__gkn_dst)), \
+ (__force type *)(__gkn_src), __gkn_label, K); \
+ __mte_disable_tco_async(); \
+ break; \
+ __gkn_label: \
+ __mte_disable_tco_async(); \
+ goto err_label; \
+ } while (0); \
+} while (0)
-#define __put_user_asm(instr, reg, x, addr, err) \
- asm volatile( \
- "1: " instr " " reg "1, [%2]\n" \
+#define __put_mem_asm(store, reg, x, addr, label, type) \
+ asm goto( \
+ "1: " store " " reg "0, [%1]\n" \
"2:\n" \
- " .section .fixup,\"ax\"\n" \
- " .align 2\n" \
- "3: mov %w0, %3\n" \
- " b 2b\n" \
- " .previous\n" \
- " .section __ex_table,\"a\"\n" \
- " .align 3\n" \
- " .quad 1b, 3b\n" \
- " .previous" \
- : "+r" (err) \
- : "r" (x), "r" (addr), "i" (-EFAULT))
-
-#define __put_user_err(x, ptr, err) \
+ _ASM_EXTABLE_##type##ACCESS(1b, %l2) \
+ : : "rZ" (x), "r" (addr) : : label)
+
+#define __raw_put_mem(str, x, ptr, label, type) \
+do { \
+ __typeof__(*(ptr)) __pu_val = (x); \
+ switch (sizeof(*(ptr))) { \
+ case 1: \
+ __put_mem_asm(str "b", "%w", __pu_val, (ptr), label, type); \
+ break; \
+ case 2: \
+ __put_mem_asm(str "h", "%w", __pu_val, (ptr), label, type); \
+ break; \
+ case 4: \
+ __put_mem_asm(str, "%w", __pu_val, (ptr), label, type); \
+ break; \
+ case 8: \
+ __put_mem_asm(str, "%x", __pu_val, (ptr), label, type); \
+ break; \
+ default: \
+ BUILD_BUG(); \
+ } \
+} while (0)
+
+/*
+ * We must not call into the scheduler between uaccess_ttbr0_enable() and
+ * uaccess_ttbr0_disable(). As `x` and `ptr` could contain blocking functions,
+ * we must evaluate these outside of the critical section.
+ */
+#define __raw_put_user(x, ptr, label) \
do { \
- __typeof__(*(ptr)) __pu_val = (x); \
- __chk_user_ptr(ptr); \
- switch (sizeof(*(ptr))) { \
- case 1: \
- __put_user_asm("strb", "%w", __pu_val, (ptr), (err)); \
- break; \
- case 2: \
- __put_user_asm("strh", "%w", __pu_val, (ptr), (err)); \
- break; \
- case 4: \
- __put_user_asm("str", "%w", __pu_val, (ptr), (err)); \
- break; \
- case 8: \
- __put_user_asm("str", "%", __pu_val, (ptr), (err)); \
+ __label__ __rpu_failed; \
+ __typeof__(*(ptr)) __user *__rpu_ptr = (ptr); \
+ __typeof__(*(ptr)) __rpu_val = (x); \
+ __chk_user_ptr(__rpu_ptr); \
+ \
+ do { \
+ uaccess_ttbr0_enable(); \
+ __raw_put_mem("sttr", __rpu_val, __rpu_ptr, __rpu_failed, U); \
+ uaccess_ttbr0_disable(); \
break; \
- default: \
- BUILD_BUG(); \
+ __rpu_failed: \
+ uaccess_ttbr0_disable(); \
+ goto label; \
+ } while (0); \
+} while (0)
+
+#define __put_user_error(x, ptr, err) \
+do { \
+ __label__ __pu_failed; \
+ __typeof__(*(ptr)) __user *__p = (ptr); \
+ might_fault(); \
+ if (access_ok(__p, sizeof(*__p))) { \
+ __p = uaccess_mask_ptr(__p); \
+ __raw_put_user((x), __p, __pu_failed); \
+ } else { \
+ __pu_failed: \
+ (err) = -EFAULT; \
} \
} while (0)
#define __put_user(x, ptr) \
({ \
int __pu_err = 0; \
- __put_user_err((x), (ptr), __pu_err); \
+ __put_user_error((x), (ptr), __pu_err); \
__pu_err; \
})
-#define __put_user_error(x, ptr, err) \
+#define put_user __put_user
+
+/*
+ * We must not call into the scheduler between __mte_enable_tco_async() and
+ * __mte_disable_tco_async(). As `dst` and `src` may contain blocking
+ * functions, we must evaluate these outside of the critical section.
+ */
+#define __put_kernel_nofault(dst, src, type, err_label) \
+do { \
+ __typeof__(dst) __pkn_dst = (dst); \
+ __typeof__(src) __pkn_src = (src); \
+ \
+ do { \
+ __label__ __pkn_err; \
+ __mte_enable_tco_async(); \
+ __raw_put_mem("str", *((type *)(__pkn_src)), \
+ (__force type *)(__pkn_dst), __pkn_err, K); \
+ __mte_disable_tco_async(); \
+ break; \
+ __pkn_err: \
+ __mte_disable_tco_async(); \
+ goto err_label; \
+ } while (0); \
+} while(0)
+
+extern unsigned long __must_check __arch_copy_from_user(void *to, const void __user *from, unsigned long n);
+#define raw_copy_from_user(to, from, n) \
({ \
- __put_user_err((x), (ptr), (err)); \
- (void)0; \
+ unsigned long __acfu_ret; \
+ uaccess_ttbr0_enable(); \
+ __acfu_ret = __arch_copy_from_user((to), \
+ __uaccess_mask_ptr(from), (n)); \
+ uaccess_ttbr0_disable(); \
+ __acfu_ret; \
})
-#define __put_user_unaligned __put_user
-
-#define put_user(x, ptr) \
+extern unsigned long __must_check __arch_copy_to_user(void __user *to, const void *from, unsigned long n);
+#define raw_copy_to_user(to, from, n) \
({ \
- might_fault(); \
- access_ok(VERIFY_WRITE, (ptr), sizeof(*(ptr))) ? \
- __put_user((x), (ptr)) : \
- -EFAULT; \
+ unsigned long __actu_ret; \
+ uaccess_ttbr0_enable(); \
+ __actu_ret = __arch_copy_to_user(__uaccess_mask_ptr(to), \
+ (from), (n)); \
+ uaccess_ttbr0_disable(); \
+ __actu_ret; \
})
-extern unsigned long __must_check __copy_from_user(void *to, const void __user *from, unsigned long n);
-extern unsigned long __must_check __copy_to_user(void __user *to, const void *from, unsigned long n);
-extern unsigned long __must_check __copy_in_user(void __user *to, const void __user *from, unsigned long n);
-extern unsigned long __must_check __clear_user(void __user *addr, unsigned long n);
-
-extern unsigned long __must_check __strncpy_from_user(char *to, const char __user *from, unsigned long count);
-extern unsigned long __must_check __strnlen_user(const char __user *s, long n);
-
-static inline unsigned long __must_check copy_from_user(void *to, const void __user *from, unsigned long n)
+static __must_check __always_inline bool user_access_begin(const void __user *ptr, size_t len)
{
- if (access_ok(VERIFY_READ, from, n))
- n = __copy_from_user(to, from, n);
- else /* security hole - plug it */
- memset(to, 0, n);
- return n;
+ if (unlikely(!access_ok(ptr,len)))
+ return 0;
+ uaccess_ttbr0_enable();
+ return 1;
}
+#define user_access_begin(a,b) user_access_begin(a,b)
+#define user_access_end() uaccess_ttbr0_disable()
+#define arch_unsafe_put_user(x, ptr, label) \
+ __raw_put_mem("sttr", x, uaccess_mask_ptr(ptr), label, U)
+#define arch_unsafe_get_user(x, ptr, label) \
+ __raw_get_mem("ldtr", x, uaccess_mask_ptr(ptr), label, U)
-static inline unsigned long __must_check copy_to_user(void __user *to, const void *from, unsigned long n)
-{
- if (access_ok(VERIFY_WRITE, to, n))
- n = __copy_to_user(to, from, n);
- return n;
-}
+/*
+ * KCSAN uses these to save and restore ttbr state.
+ * We do not support KCSAN with ARM64_SW_TTBR0_PAN, so
+ * they are no-ops.
+ */
+static inline unsigned long user_access_save(void) { return 0; }
+static inline void user_access_restore(unsigned long enabled) { }
-static inline unsigned long __must_check copy_in_user(void __user *to, const void __user *from, unsigned long n)
-{
- if (access_ok(VERIFY_READ, from, n) && access_ok(VERIFY_WRITE, to, n))
- n = __copy_in_user(to, from, n);
- return n;
-}
+/*
+ * We want the unsafe accessors to always be inlined and use
+ * the error labels - thus the macro games.
+ */
+#define unsafe_copy_loop(dst, src, len, type, label) \
+ while (len >= sizeof(type)) { \
+ unsafe_put_user(*(type *)(src),(type __user *)(dst),label); \
+ dst += sizeof(type); \
+ src += sizeof(type); \
+ len -= sizeof(type); \
+ }
+
+#define unsafe_copy_to_user(_dst,_src,_len,label) \
+do { \
+ char __user *__ucu_dst = (_dst); \
+ const char *__ucu_src = (_src); \
+ size_t __ucu_len = (_len); \
+ unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u64, label); \
+ unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u32, label); \
+ unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u16, label); \
+ unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u8, label); \
+} while (0)
-#define __copy_to_user_inatomic __copy_to_user
-#define __copy_from_user_inatomic __copy_from_user
+#define INLINE_COPY_TO_USER
+#define INLINE_COPY_FROM_USER
-static inline unsigned long __must_check clear_user(void __user *to, unsigned long n)
+extern unsigned long __must_check __arch_clear_user(void __user *to, unsigned long n);
+static inline unsigned long __must_check __clear_user(void __user *to, unsigned long n)
{
- if (access_ok(VERIFY_WRITE, to, n))
- n = __clear_user(to, n);
+ if (access_ok(to, n)) {
+ uaccess_ttbr0_enable();
+ n = __arch_clear_user(__uaccess_mask_ptr(to), n);
+ uaccess_ttbr0_disable();
+ }
return n;
}
+#define clear_user __clear_user
-static inline long __must_check strncpy_from_user(char *dst, const char __user *src, long count)
-{
- long res = -EFAULT;
- if (access_ok(VERIFY_READ, src, 1))
- res = __strncpy_from_user(dst, src, count);
- return res;
-}
+extern long strncpy_from_user(char *dest, const char __user *src, long count);
+
+extern __must_check long strnlen_user(const char __user *str, long n);
-#define strlen_user(s) strnlen_user(s, ~0UL >> 1)
+#ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
+extern unsigned long __must_check __copy_user_flushcache(void *to, const void __user *from, unsigned long n);
-static inline long __must_check strnlen_user(const char __user *s, long n)
+static inline int __copy_from_user_flushcache(void *dst, const void __user *src, unsigned size)
{
- unsigned long res = 0;
+ kasan_check_write(dst, size);
+ return __copy_user_flushcache(dst, __uaccess_mask_ptr(src), size);
+}
+#endif
- if (__addr_ok(s))
- res = __strnlen_user(s, n);
+#ifdef CONFIG_ARCH_HAS_SUBPAGE_FAULTS
- return res;
+/*
+ * Return 0 on success, the number of bytes not probed otherwise.
+ */
+static inline size_t probe_subpage_writeable(const char __user *uaddr,
+ size_t size)
+{
+ if (!system_supports_mte())
+ return 0;
+ return mte_probe_user_range(uaddr, size);
}
+#endif /* CONFIG_ARCH_HAS_SUBPAGE_FAULTS */
+
#endif /* __ASM_UACCESS_H */
diff --git a/arch/arm64/include/asm/ucontext.h b/arch/arm64/include/asm/ucontext.h
deleted file mode 100644
index 42e04c877428..000000000000
--- a/arch/arm64/include/asm/ucontext.h
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- * Copyright (C) 2012 ARM Ltd.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-#ifndef __ASM_UCONTEXT_H
-#define __ASM_UCONTEXT_H
-
-struct ucontext {
- unsigned long uc_flags;
- struct ucontext *uc_link;
- stack_t uc_stack;
- sigset_t uc_sigmask;
- /* glibc uses a 1024-bit sigset_t */
- __u8 __unused[1024 / 8 - sizeof(sigset_t)];
- /* last for future expansion */
- struct sigcontext uc_mcontext;
-};
-
-#endif /* __ASM_UCONTEXT_H */
diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h
index 82ce217e94cf..80618c9bbcd8 100644
--- a/arch/arm64/include/asm/unistd.h
+++ b/arch/arm64/include/asm/unistd.h
@@ -1,30 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (C) 2012 ARM Ltd.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifdef CONFIG_COMPAT
+#define __ARCH_WANT_COMPAT_STAT
#define __ARCH_WANT_COMPAT_STAT64
#define __ARCH_WANT_SYS_GETHOSTNAME
#define __ARCH_WANT_SYS_PAUSE
#define __ARCH_WANT_SYS_GETPGRP
-#define __ARCH_WANT_SYS_LLSEEK
#define __ARCH_WANT_SYS_NICE
#define __ARCH_WANT_SYS_SIGPENDING
#define __ARCH_WANT_SYS_SIGPROCMASK
#define __ARCH_WANT_COMPAT_SYS_SENDFILE
+#define __ARCH_WANT_SYS_UTIME32
#define __ARCH_WANT_SYS_FORK
#define __ARCH_WANT_SYS_VFORK
+
+/*
+ * The following SVCs are ARM private.
+ */
+#define __ARM_NR_COMPAT_BASE 0x0f0000
+#define __ARM_NR_compat_cacheflush (__ARM_NR_COMPAT_BASE + 2)
+#define __ARM_NR_compat_set_tls (__ARM_NR_COMPAT_BASE + 5)
+#define __ARM_NR_COMPAT_END (__ARM_NR_COMPAT_BASE + 0x800)
#endif
+
#define __ARCH_WANT_SYS_CLONE
-#include <uapi/asm/unistd.h>
+#define __ARCH_WANT_NEW_STAT
+
+#include <asm/unistd_64.h>
+
+#define NR_syscalls (__NR_syscalls)
diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h
index 58125bf008d3..e0b1a0b57f75 100644
--- a/arch/arm64/include/asm/unistd32.h
+++ b/arch/arm64/include/asm/unistd32.h
@@ -1,419 +1,9 @@
-/*
- * AArch32 (compat) system call definitions.
- *
- * Copyright (C) 2001-2005 Russell King
- * Copyright (C) 2012 ARM Ltd.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef _UAPI__ASM_ARM_UNISTD_H
+#define _UAPI__ASM_ARM_UNISTD_H
-#ifndef __SYSCALL
-#define __SYSCALL(x, y)
-#endif
+#include <asm/unistd_32.h>
-__SYSCALL(0, sys_restart_syscall)
-__SYSCALL(1, sys_exit)
-__SYSCALL(2, sys_fork)
-__SYSCALL(3, sys_read)
-__SYSCALL(4, sys_write)
-__SYSCALL(5, compat_sys_open)
-__SYSCALL(6, sys_close)
-__SYSCALL(7, sys_ni_syscall) /* 7 was sys_waitpid */
-__SYSCALL(8, sys_creat)
-__SYSCALL(9, sys_link)
-__SYSCALL(10, sys_unlink)
-__SYSCALL(11, compat_sys_execve)
-__SYSCALL(12, sys_chdir)
-__SYSCALL(13, sys_ni_syscall) /* 13 was sys_time */
-__SYSCALL(14, sys_mknod)
-__SYSCALL(15, sys_chmod)
-__SYSCALL(16, sys_lchown16)
-__SYSCALL(17, sys_ni_syscall) /* 17 was sys_break */
-__SYSCALL(18, sys_ni_syscall) /* 18 was sys_stat */
-__SYSCALL(19, compat_sys_lseek)
-__SYSCALL(20, sys_getpid)
-__SYSCALL(21, compat_sys_mount)
-__SYSCALL(22, sys_ni_syscall) /* 22 was sys_umount */
-__SYSCALL(23, sys_setuid16)
-__SYSCALL(24, sys_getuid16)
-__SYSCALL(25, sys_ni_syscall) /* 25 was sys_stime */
-__SYSCALL(26, compat_sys_ptrace)
-__SYSCALL(27, sys_ni_syscall) /* 27 was sys_alarm */
-__SYSCALL(28, sys_ni_syscall) /* 28 was sys_fstat */
-__SYSCALL(29, sys_pause)
-__SYSCALL(30, sys_ni_syscall) /* 30 was sys_utime */
-__SYSCALL(31, sys_ni_syscall) /* 31 was sys_stty */
-__SYSCALL(32, sys_ni_syscall) /* 32 was sys_gtty */
-__SYSCALL(33, sys_access)
-__SYSCALL(34, sys_nice)
-__SYSCALL(35, sys_ni_syscall) /* 35 was sys_ftime */
-__SYSCALL(36, sys_sync)
-__SYSCALL(37, sys_kill)
-__SYSCALL(38, sys_rename)
-__SYSCALL(39, sys_mkdir)
-__SYSCALL(40, sys_rmdir)
-__SYSCALL(41, sys_dup)
-__SYSCALL(42, sys_pipe)
-__SYSCALL(43, compat_sys_times)
-__SYSCALL(44, sys_ni_syscall) /* 44 was sys_prof */
-__SYSCALL(45, sys_brk)
-__SYSCALL(46, sys_setgid16)
-__SYSCALL(47, sys_getgid16)
-__SYSCALL(48, sys_ni_syscall) /* 48 was sys_signal */
-__SYSCALL(49, sys_geteuid16)
-__SYSCALL(50, sys_getegid16)
-__SYSCALL(51, sys_acct)
-__SYSCALL(52, sys_umount)
-__SYSCALL(53, sys_ni_syscall) /* 53 was sys_lock */
-__SYSCALL(54, compat_sys_ioctl)
-__SYSCALL(55, compat_sys_fcntl)
-__SYSCALL(56, sys_ni_syscall) /* 56 was sys_mpx */
-__SYSCALL(57, sys_setpgid)
-__SYSCALL(58, sys_ni_syscall) /* 58 was sys_ulimit */
-__SYSCALL(59, sys_ni_syscall) /* 59 was sys_olduname */
-__SYSCALL(60, sys_umask)
-__SYSCALL(61, sys_chroot)
-__SYSCALL(62, compat_sys_ustat)
-__SYSCALL(63, sys_dup2)
-__SYSCALL(64, sys_getppid)
-__SYSCALL(65, sys_getpgrp)
-__SYSCALL(66, sys_setsid)
-__SYSCALL(67, compat_sys_sigaction)
-__SYSCALL(68, sys_ni_syscall) /* 68 was sys_sgetmask */
-__SYSCALL(69, sys_ni_syscall) /* 69 was sys_ssetmask */
-__SYSCALL(70, sys_setreuid16)
-__SYSCALL(71, sys_setregid16)
-__SYSCALL(72, sys_sigsuspend)
-__SYSCALL(73, compat_sys_sigpending)
-__SYSCALL(74, sys_sethostname)
-__SYSCALL(75, compat_sys_setrlimit)
-__SYSCALL(76, sys_ni_syscall) /* 76 was compat_sys_getrlimit */
-__SYSCALL(77, compat_sys_getrusage)
-__SYSCALL(78, compat_sys_gettimeofday)
-__SYSCALL(79, compat_sys_settimeofday)
-__SYSCALL(80, sys_getgroups16)
-__SYSCALL(81, sys_setgroups16)
-__SYSCALL(82, sys_ni_syscall) /* 82 was compat_sys_select */
-__SYSCALL(83, sys_symlink)
-__SYSCALL(84, sys_ni_syscall) /* 84 was sys_lstat */
-__SYSCALL(85, sys_readlink)
-__SYSCALL(86, sys_uselib)
-__SYSCALL(87, sys_swapon)
-__SYSCALL(88, sys_reboot)
-__SYSCALL(89, sys_ni_syscall) /* 89 was sys_readdir */
-__SYSCALL(90, sys_ni_syscall) /* 90 was sys_mmap */
-__SYSCALL(91, sys_munmap)
-__SYSCALL(92, compat_sys_truncate)
-__SYSCALL(93, compat_sys_ftruncate)
-__SYSCALL(94, sys_fchmod)
-__SYSCALL(95, sys_fchown16)
-__SYSCALL(96, sys_getpriority)
-__SYSCALL(97, sys_setpriority)
-__SYSCALL(98, sys_ni_syscall) /* 98 was sys_profil */
-__SYSCALL(99, compat_sys_statfs)
-__SYSCALL(100, compat_sys_fstatfs)
-__SYSCALL(101, sys_ni_syscall) /* 101 was sys_ioperm */
-__SYSCALL(102, sys_ni_syscall) /* 102 was sys_socketcall */
-__SYSCALL(103, sys_syslog)
-__SYSCALL(104, compat_sys_setitimer)
-__SYSCALL(105, compat_sys_getitimer)
-__SYSCALL(106, compat_sys_newstat)
-__SYSCALL(107, compat_sys_newlstat)
-__SYSCALL(108, compat_sys_newfstat)
-__SYSCALL(109, sys_ni_syscall) /* 109 was sys_uname */
-__SYSCALL(110, sys_ni_syscall) /* 110 was sys_iopl */
-__SYSCALL(111, sys_vhangup)
-__SYSCALL(112, sys_ni_syscall) /* 112 was sys_idle */
-__SYSCALL(113, sys_ni_syscall) /* 113 was sys_syscall */
-__SYSCALL(114, compat_sys_wait4)
-__SYSCALL(115, sys_swapoff)
-__SYSCALL(116, compat_sys_sysinfo)
-__SYSCALL(117, sys_ni_syscall) /* 117 was sys_ipc */
-__SYSCALL(118, sys_fsync)
-__SYSCALL(119, compat_sys_sigreturn_wrapper)
-__SYSCALL(120, sys_clone)
-__SYSCALL(121, sys_setdomainname)
-__SYSCALL(122, sys_newuname)
-__SYSCALL(123, sys_ni_syscall) /* 123 was sys_modify_ldt */
-__SYSCALL(124, compat_sys_adjtimex)
-__SYSCALL(125, sys_mprotect)
-__SYSCALL(126, compat_sys_sigprocmask)
-__SYSCALL(127, sys_ni_syscall) /* 127 was sys_create_module */
-__SYSCALL(128, sys_init_module)
-__SYSCALL(129, sys_delete_module)
-__SYSCALL(130, sys_ni_syscall) /* 130 was sys_get_kernel_syms */
-__SYSCALL(131, sys_quotactl)
-__SYSCALL(132, sys_getpgid)
-__SYSCALL(133, sys_fchdir)
-__SYSCALL(134, sys_bdflush)
-__SYSCALL(135, sys_sysfs)
-__SYSCALL(136, sys_personality)
-__SYSCALL(137, sys_ni_syscall) /* 137 was sys_afs_syscall */
-__SYSCALL(138, sys_setfsuid16)
-__SYSCALL(139, sys_setfsgid16)
-__SYSCALL(140, sys_llseek)
-__SYSCALL(141, compat_sys_getdents)
-__SYSCALL(142, compat_sys_select)
-__SYSCALL(143, sys_flock)
-__SYSCALL(144, sys_msync)
-__SYSCALL(145, compat_sys_readv)
-__SYSCALL(146, compat_sys_writev)
-__SYSCALL(147, sys_getsid)
-__SYSCALL(148, sys_fdatasync)
-__SYSCALL(149, compat_sys_sysctl)
-__SYSCALL(150, sys_mlock)
-__SYSCALL(151, sys_munlock)
-__SYSCALL(152, sys_mlockall)
-__SYSCALL(153, sys_munlockall)
-__SYSCALL(154, sys_sched_setparam)
-__SYSCALL(155, sys_sched_getparam)
-__SYSCALL(156, sys_sched_setscheduler)
-__SYSCALL(157, sys_sched_getscheduler)
-__SYSCALL(158, sys_sched_yield)
-__SYSCALL(159, sys_sched_get_priority_max)
-__SYSCALL(160, sys_sched_get_priority_min)
-__SYSCALL(161, compat_sys_sched_rr_get_interval)
-__SYSCALL(162, compat_sys_nanosleep)
-__SYSCALL(163, sys_mremap)
-__SYSCALL(164, sys_setresuid16)
-__SYSCALL(165, sys_getresuid16)
-__SYSCALL(166, sys_ni_syscall) /* 166 was sys_vm86 */
-__SYSCALL(167, sys_ni_syscall) /* 167 was sys_query_module */
-__SYSCALL(168, sys_poll)
-__SYSCALL(169, sys_ni_syscall)
-__SYSCALL(170, sys_setresgid16)
-__SYSCALL(171, sys_getresgid16)
-__SYSCALL(172, sys_prctl)
-__SYSCALL(173, compat_sys_rt_sigreturn_wrapper)
-__SYSCALL(174, compat_sys_rt_sigaction)
-__SYSCALL(175, compat_sys_rt_sigprocmask)
-__SYSCALL(176, compat_sys_rt_sigpending)
-__SYSCALL(177, compat_sys_rt_sigtimedwait)
-__SYSCALL(178, compat_sys_rt_sigqueueinfo)
-__SYSCALL(179, compat_sys_rt_sigsuspend)
-__SYSCALL(180, compat_sys_pread64_wrapper)
-__SYSCALL(181, compat_sys_pwrite64_wrapper)
-__SYSCALL(182, sys_chown16)
-__SYSCALL(183, sys_getcwd)
-__SYSCALL(184, sys_capget)
-__SYSCALL(185, sys_capset)
-__SYSCALL(186, compat_sys_sigaltstack)
-__SYSCALL(187, compat_sys_sendfile)
-__SYSCALL(188, sys_ni_syscall) /* 188 reserved */
-__SYSCALL(189, sys_ni_syscall) /* 189 reserved */
-__SYSCALL(190, sys_vfork)
-__SYSCALL(191, compat_sys_getrlimit) /* SuS compliant getrlimit */
-__SYSCALL(192, sys_mmap_pgoff)
-__SYSCALL(193, compat_sys_truncate64_wrapper)
-__SYSCALL(194, compat_sys_ftruncate64_wrapper)
-__SYSCALL(195, sys_stat64)
-__SYSCALL(196, sys_lstat64)
-__SYSCALL(197, sys_fstat64)
-__SYSCALL(198, sys_lchown)
-__SYSCALL(199, sys_getuid)
-__SYSCALL(200, sys_getgid)
-__SYSCALL(201, sys_geteuid)
-__SYSCALL(202, sys_getegid)
-__SYSCALL(203, sys_setreuid)
-__SYSCALL(204, sys_setregid)
-__SYSCALL(205, sys_getgroups)
-__SYSCALL(206, sys_setgroups)
-__SYSCALL(207, sys_fchown)
-__SYSCALL(208, sys_setresuid)
-__SYSCALL(209, sys_getresuid)
-__SYSCALL(210, sys_setresgid)
-__SYSCALL(211, sys_getresgid)
-__SYSCALL(212, sys_chown)
-__SYSCALL(213, sys_setuid)
-__SYSCALL(214, sys_setgid)
-__SYSCALL(215, sys_setfsuid)
-__SYSCALL(216, sys_setfsgid)
-__SYSCALL(217, compat_sys_getdents64)
-__SYSCALL(218, sys_pivot_root)
-__SYSCALL(219, sys_mincore)
-__SYSCALL(220, sys_madvise)
-__SYSCALL(221, compat_sys_fcntl64)
-__SYSCALL(222, sys_ni_syscall) /* 222 for tux */
-__SYSCALL(223, sys_ni_syscall) /* 223 is unused */
-__SYSCALL(224, sys_gettid)
-__SYSCALL(225, compat_sys_readahead_wrapper)
-__SYSCALL(226, sys_setxattr)
-__SYSCALL(227, sys_lsetxattr)
-__SYSCALL(228, sys_fsetxattr)
-__SYSCALL(229, sys_getxattr)
-__SYSCALL(230, sys_lgetxattr)
-__SYSCALL(231, sys_fgetxattr)
-__SYSCALL(232, sys_listxattr)
-__SYSCALL(233, sys_llistxattr)
-__SYSCALL(234, sys_flistxattr)
-__SYSCALL(235, sys_removexattr)
-__SYSCALL(236, sys_lremovexattr)
-__SYSCALL(237, sys_fremovexattr)
-__SYSCALL(238, sys_tkill)
-__SYSCALL(239, sys_sendfile64)
-__SYSCALL(240, compat_sys_futex)
-__SYSCALL(241, compat_sys_sched_setaffinity)
-__SYSCALL(242, compat_sys_sched_getaffinity)
-__SYSCALL(243, compat_sys_io_setup)
-__SYSCALL(244, sys_io_destroy)
-__SYSCALL(245, compat_sys_io_getevents)
-__SYSCALL(246, compat_sys_io_submit)
-__SYSCALL(247, sys_io_cancel)
-__SYSCALL(248, sys_exit_group)
-__SYSCALL(249, compat_sys_lookup_dcookie)
-__SYSCALL(250, sys_epoll_create)
-__SYSCALL(251, sys_epoll_ctl)
-__SYSCALL(252, sys_epoll_wait)
-__SYSCALL(253, sys_remap_file_pages)
-__SYSCALL(254, sys_ni_syscall) /* 254 for set_thread_area */
-__SYSCALL(255, sys_ni_syscall) /* 255 for get_thread_area */
-__SYSCALL(256, sys_set_tid_address)
-__SYSCALL(257, compat_sys_timer_create)
-__SYSCALL(258, compat_sys_timer_settime)
-__SYSCALL(259, compat_sys_timer_gettime)
-__SYSCALL(260, sys_timer_getoverrun)
-__SYSCALL(261, sys_timer_delete)
-__SYSCALL(262, compat_sys_clock_settime)
-__SYSCALL(263, compat_sys_clock_gettime)
-__SYSCALL(264, compat_sys_clock_getres)
-__SYSCALL(265, compat_sys_clock_nanosleep)
-__SYSCALL(266, compat_sys_statfs64_wrapper)
-__SYSCALL(267, compat_sys_fstatfs64_wrapper)
-__SYSCALL(268, sys_tgkill)
-__SYSCALL(269, compat_sys_utimes)
-__SYSCALL(270, compat_sys_fadvise64_64_wrapper)
-__SYSCALL(271, sys_pciconfig_iobase)
-__SYSCALL(272, sys_pciconfig_read)
-__SYSCALL(273, sys_pciconfig_write)
-__SYSCALL(274, compat_sys_mq_open)
-__SYSCALL(275, sys_mq_unlink)
-__SYSCALL(276, compat_sys_mq_timedsend)
-__SYSCALL(277, compat_sys_mq_timedreceive)
-__SYSCALL(278, compat_sys_mq_notify)
-__SYSCALL(279, compat_sys_mq_getsetattr)
-__SYSCALL(280, compat_sys_waitid)
-__SYSCALL(281, sys_socket)
-__SYSCALL(282, sys_bind)
-__SYSCALL(283, sys_connect)
-__SYSCALL(284, sys_listen)
-__SYSCALL(285, sys_accept)
-__SYSCALL(286, sys_getsockname)
-__SYSCALL(287, sys_getpeername)
-__SYSCALL(288, sys_socketpair)
-__SYSCALL(289, sys_send)
-__SYSCALL(290, sys_sendto)
-__SYSCALL(291, compat_sys_recv)
-__SYSCALL(292, compat_sys_recvfrom)
-__SYSCALL(293, sys_shutdown)
-__SYSCALL(294, compat_sys_setsockopt)
-__SYSCALL(295, compat_sys_getsockopt)
-__SYSCALL(296, compat_sys_sendmsg)
-__SYSCALL(297, compat_sys_recvmsg)
-__SYSCALL(298, sys_semop)
-__SYSCALL(299, sys_semget)
-__SYSCALL(300, compat_sys_semctl)
-__SYSCALL(301, compat_sys_msgsnd)
-__SYSCALL(302, compat_sys_msgrcv)
-__SYSCALL(303, sys_msgget)
-__SYSCALL(304, compat_sys_msgctl)
-__SYSCALL(305, compat_sys_shmat)
-__SYSCALL(306, sys_shmdt)
-__SYSCALL(307, sys_shmget)
-__SYSCALL(308, compat_sys_shmctl)
-__SYSCALL(309, sys_add_key)
-__SYSCALL(310, sys_request_key)
-__SYSCALL(311, compat_sys_keyctl)
-__SYSCALL(312, compat_sys_semtimedop)
-__SYSCALL(313, sys_ni_syscall)
-__SYSCALL(314, sys_ioprio_set)
-__SYSCALL(315, sys_ioprio_get)
-__SYSCALL(316, sys_inotify_init)
-__SYSCALL(317, sys_inotify_add_watch)
-__SYSCALL(318, sys_inotify_rm_watch)
-__SYSCALL(319, compat_sys_mbind)
-__SYSCALL(320, compat_sys_get_mempolicy)
-__SYSCALL(321, compat_sys_set_mempolicy)
-__SYSCALL(322, compat_sys_openat)
-__SYSCALL(323, sys_mkdirat)
-__SYSCALL(324, sys_mknodat)
-__SYSCALL(325, sys_fchownat)
-__SYSCALL(326, compat_sys_futimesat)
-__SYSCALL(327, sys_fstatat64)
-__SYSCALL(328, sys_unlinkat)
-__SYSCALL(329, sys_renameat)
-__SYSCALL(330, sys_linkat)
-__SYSCALL(331, sys_symlinkat)
-__SYSCALL(332, sys_readlinkat)
-__SYSCALL(333, sys_fchmodat)
-__SYSCALL(334, sys_faccessat)
-__SYSCALL(335, compat_sys_pselect6)
-__SYSCALL(336, compat_sys_ppoll)
-__SYSCALL(337, sys_unshare)
-__SYSCALL(338, compat_sys_set_robust_list)
-__SYSCALL(339, compat_sys_get_robust_list)
-__SYSCALL(340, sys_splice)
-__SYSCALL(341, compat_sys_sync_file_range2_wrapper)
-__SYSCALL(342, sys_tee)
-__SYSCALL(343, compat_sys_vmsplice)
-__SYSCALL(344, compat_sys_move_pages)
-__SYSCALL(345, sys_getcpu)
-__SYSCALL(346, compat_sys_epoll_pwait)
-__SYSCALL(347, compat_sys_kexec_load)
-__SYSCALL(348, compat_sys_utimensat)
-__SYSCALL(349, compat_sys_signalfd)
-__SYSCALL(350, sys_timerfd_create)
-__SYSCALL(351, sys_eventfd)
-__SYSCALL(352, compat_sys_fallocate_wrapper)
-__SYSCALL(353, compat_sys_timerfd_settime)
-__SYSCALL(354, compat_sys_timerfd_gettime)
-__SYSCALL(355, compat_sys_signalfd4)
-__SYSCALL(356, sys_eventfd2)
-__SYSCALL(357, sys_epoll_create1)
-__SYSCALL(358, sys_dup3)
-__SYSCALL(359, sys_pipe2)
-__SYSCALL(360, sys_inotify_init1)
-__SYSCALL(361, compat_sys_preadv)
-__SYSCALL(362, compat_sys_pwritev)
-__SYSCALL(363, compat_sys_rt_tgsigqueueinfo)
-__SYSCALL(364, sys_perf_event_open)
-__SYSCALL(365, compat_sys_recvmmsg)
-__SYSCALL(366, sys_accept4)
-__SYSCALL(367, sys_fanotify_init)
-__SYSCALL(368, compat_sys_fanotify_mark)
-__SYSCALL(369, sys_prlimit64)
-__SYSCALL(370, sys_name_to_handle_at)
-__SYSCALL(371, compat_sys_open_by_handle_at)
-__SYSCALL(372, compat_sys_clock_adjtime)
-__SYSCALL(373, sys_syncfs)
-__SYSCALL(374, compat_sys_sendmmsg)
-__SYSCALL(375, sys_setns)
-__SYSCALL(376, compat_sys_process_vm_readv)
-__SYSCALL(377, compat_sys_process_vm_writev)
-__SYSCALL(378, sys_ni_syscall) /* 378 for kcmp */
+#define __NR_sync_file_range2 __NR_arm_sync_file_range
-#define __NR_compat_syscalls 379
-
-/*
- * Compat syscall numbers used by the AArch64 kernel.
- */
-#define __NR_compat_restart_syscall 0
-#define __NR_compat_sigreturn 119
-#define __NR_compat_rt_sigreturn 173
-
-
-/*
- * The following SVCs are ARM private.
- */
-#define __ARM_NR_COMPAT_BASE 0x0f0000
-#define __ARM_NR_compat_cacheflush (__ARM_NR_COMPAT_BASE+2)
-#define __ARM_NR_compat_set_tls (__ARM_NR_COMPAT_BASE+5)
+#endif /* _UAPI__ASM_ARM_UNISTD_H */
diff --git a/arch/arm64/include/asm/uprobes.h b/arch/arm64/include/asm/uprobes.h
new file mode 100644
index 000000000000..89bfb0213a50
--- /dev/null
+++ b/arch/arm64/include/asm/uprobes.h
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2014-2016 Pratyush Anand <panand@redhat.com>
+ */
+
+#ifndef _ASM_UPROBES_H
+#define _ASM_UPROBES_H
+
+#include <asm/debug-monitors.h>
+#include <asm/insn.h>
+#include <asm/probes.h>
+
+#define UPROBE_SWBP_INSN cpu_to_le32(BRK64_OPCODE_UPROBES)
+#define UPROBE_SWBP_INSN_SIZE AARCH64_INSN_SIZE
+#define UPROBE_XOL_SLOT_BYTES AARCH64_INSN_SIZE
+
+typedef __le32 uprobe_opcode_t;
+
+struct arch_uprobe_task {
+};
+
+struct arch_uprobe {
+ union {
+ __le32 insn;
+ __le32 ixol;
+ };
+ struct arch_probe_insn api;
+ bool simulate;
+};
+
+int uprobe_brk_handler(struct pt_regs *regs, unsigned long esr);
+#ifdef CONFIG_UPROBES
+int uprobe_single_step_handler(struct pt_regs *regs, unsigned long esr);
+#else
+static inline int uprobe_single_step_handler(struct pt_regs *regs,
+ unsigned long esr)
+{
+ return DBG_HOOK_ERROR;
+}
+#endif
+
+#endif
diff --git a/arch/arm64/include/asm/vdso.h b/arch/arm64/include/asm/vdso.h
index 839ce0031bd5..232b46969088 100644
--- a/arch/arm64/include/asm/vdso.h
+++ b/arch/arm64/include/asm/vdso.h
@@ -1,41 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (C) 2012 ARM Limited
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __ASM_VDSO_H
#define __ASM_VDSO_H
-#ifdef __KERNEL__
+#define __VDSO_PAGES 4
-/*
- * Default link address for the vDSO.
- * Since we randomise the VDSO mapping, there's little point in trying
- * to prelink this.
- */
-#define VDSO_LBASE 0x0
-
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <generated/vdso-offsets.h>
#define VDSO_SYMBOL(base, name) \
({ \
- (void *)(vdso_offset_##name - VDSO_LBASE + (unsigned long)(base)); \
+ (void *)(vdso_offset_##name + (unsigned long)(base)); \
})
-#endif /* !__ASSEMBLY__ */
+extern char vdso_start[], vdso_end[];
+extern char vdso32_start[], vdso32_end[];
-#endif /* __KERNEL__ */
+#endif /* !__ASSEMBLER__ */
#endif /* __ASM_VDSO_H */
diff --git a/arch/arm64/include/asm/vdso/clocksource.h b/arch/arm64/include/asm/vdso/clocksource.h
new file mode 100644
index 000000000000..b054d9febfb5
--- /dev/null
+++ b/arch/arm64/include/asm/vdso/clocksource.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_VDSOCLOCKSOURCE_H
+#define __ASM_VDSOCLOCKSOURCE_H
+
+#define VDSO_ARCH_CLOCKMODES \
+ /* vdso clocksource for both 32 and 64bit tasks */ \
+ VDSO_CLOCKMODE_ARCHTIMER, \
+ /* vdso clocksource for 64bit tasks only */ \
+ VDSO_CLOCKMODE_ARCHTIMER_NOCOMPAT
+
+#endif
diff --git a/arch/arm64/include/asm/vdso/compat_barrier.h b/arch/arm64/include/asm/vdso/compat_barrier.h
new file mode 100644
index 000000000000..d7ebe7ceefa0
--- /dev/null
+++ b/arch/arm64/include/asm/vdso/compat_barrier.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2018 ARM Limited
+ */
+#ifndef __COMPAT_BARRIER_H
+#define __COMPAT_BARRIER_H
+
+#ifndef __ASSEMBLER__
+/*
+ * Warning: This code is meant to be used from the compat vDSO only.
+ */
+#ifdef __arch64__
+#error This header is meant to be used with from the compat vDSO only
+#endif
+
+#ifdef dmb
+#undef dmb
+#endif
+
+#define dmb(option) __asm__ __volatile__ ("dmb " #option : : : "memory")
+
+#define aarch32_smp_mb() dmb(ish)
+#define aarch32_smp_rmb() dmb(ishld)
+#define aarch32_smp_wmb() dmb(ishst)
+
+#undef smp_mb
+#undef smp_rmb
+#undef smp_wmb
+
+#define smp_mb() aarch32_smp_mb()
+#define smp_rmb() aarch32_smp_rmb()
+#define smp_wmb() aarch32_smp_wmb()
+
+#endif /* !__ASSEMBLER__ */
+
+#endif /* __COMPAT_BARRIER_H */
diff --git a/arch/arm64/include/asm/vdso/compat_gettimeofday.h b/arch/arm64/include/asm/vdso/compat_gettimeofday.h
new file mode 100644
index 000000000000..0d513f924321
--- /dev/null
+++ b/arch/arm64/include/asm/vdso/compat_gettimeofday.h
@@ -0,0 +1,166 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2018 ARM Limited
+ */
+#ifndef __ASM_VDSO_COMPAT_GETTIMEOFDAY_H
+#define __ASM_VDSO_COMPAT_GETTIMEOFDAY_H
+
+#ifndef __ASSEMBLER__
+
+#include <asm/barrier.h>
+#include <asm/unistd_compat_32.h>
+#include <asm/errno.h>
+
+#include <asm/vdso/compat_barrier.h>
+
+#define VDSO_HAS_CLOCK_GETRES 1
+
+#define BUILD_VDSO32 1
+
+static __always_inline
+int gettimeofday_fallback(struct __kernel_old_timeval *_tv,
+ struct timezone *_tz)
+{
+ register struct timezone *tz asm("r1") = _tz;
+ register struct __kernel_old_timeval *tv asm("r0") = _tv;
+ register long ret asm ("r0");
+ register long nr asm("r7") = __NR_compat32_gettimeofday;
+
+ asm volatile(
+ " swi #0\n"
+ : "=r" (ret)
+ : "r" (tv), "r" (tz), "r" (nr)
+ : "memory");
+
+ return ret;
+}
+
+static __always_inline
+long clock_gettime_fallback(clockid_t _clkid, struct __kernel_timespec *_ts)
+{
+ register struct __kernel_timespec *ts asm("r1") = _ts;
+ register clockid_t clkid asm("r0") = _clkid;
+ register long ret asm ("r0");
+ register long nr asm("r7") = __NR_compat32_clock_gettime64;
+
+ asm volatile(
+ " swi #0\n"
+ : "=r" (ret)
+ : "r" (clkid), "r" (ts), "r" (nr)
+ : "memory");
+
+ return ret;
+}
+
+static __always_inline
+long clock_gettime32_fallback(clockid_t _clkid, struct old_timespec32 *_ts)
+{
+ register struct old_timespec32 *ts asm("r1") = _ts;
+ register clockid_t clkid asm("r0") = _clkid;
+ register long ret asm ("r0");
+ register long nr asm("r7") = __NR_compat32_clock_gettime;
+
+ asm volatile(
+ " swi #0\n"
+ : "=r" (ret)
+ : "r" (clkid), "r" (ts), "r" (nr)
+ : "memory");
+
+ return ret;
+}
+
+static __always_inline
+int clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts)
+{
+ register struct __kernel_timespec *ts asm("r1") = _ts;
+ register clockid_t clkid asm("r0") = _clkid;
+ register long ret asm ("r0");
+ register long nr asm("r7") = __NR_compat32_clock_getres_time64;
+
+ asm volatile(
+ " swi #0\n"
+ : "=r" (ret)
+ : "r" (clkid), "r" (ts), "r" (nr)
+ : "memory");
+
+ return ret;
+}
+
+static __always_inline
+int clock_getres32_fallback(clockid_t _clkid, struct old_timespec32 *_ts)
+{
+ register struct old_timespec32 *ts asm("r1") = _ts;
+ register clockid_t clkid asm("r0") = _clkid;
+ register long ret asm ("r0");
+ register long nr asm("r7") = __NR_compat32_clock_getres;
+
+ asm volatile(
+ " swi #0\n"
+ : "=r" (ret)
+ : "r" (clkid), "r" (ts), "r" (nr)
+ : "memory");
+
+ return ret;
+}
+
+static __always_inline u64 __arch_get_hw_counter(s32 clock_mode,
+ const struct vdso_time_data *vd)
+{
+ u64 res;
+
+ /*
+ * Core checks for mode already, so this raced against a concurrent
+ * update. Return something. Core will do another round and then
+ * see the mode change and fallback to the syscall.
+ */
+ if (clock_mode != VDSO_CLOCKMODE_ARCHTIMER)
+ return 0;
+
+ /*
+ * This isb() is required to prevent that the counter value
+ * is speculated.
+ */
+ isb();
+ asm volatile("mrrc p15, 1, %Q0, %R0, c14" : "=r" (res));
+ /*
+ * This isb() is required to prevent that the seq lock is
+ * speculated.
+ */
+ isb();
+
+ return res;
+}
+
+static __always_inline const struct vdso_time_data *__arch_get_vdso_u_time_data(void)
+{
+ const struct vdso_time_data *ret;
+
+ /*
+ * This simply puts &_vdso_time_data into ret. The reason why we don't use
+ * `ret = _vdso_time_data` is that the compiler tends to optimise this in a
+ * very suboptimal way: instead of keeping &_vdso_time_data in a register,
+ * it goes through a relocation almost every time _vdso_time_data must be
+ * accessed (even in subfunctions). This is both time and space
+ * consuming: each relocation uses a word in the code section, and it
+ * has to be loaded at runtime.
+ *
+ * This trick hides the assignment from the compiler. Since it cannot
+ * track where the pointer comes from, it will only use one relocation
+ * where __aarch64_get_vdso_u_time_data() is called, and then keep the
+ * result in a register.
+ */
+ asm volatile("mov %0, %1" : "=r"(ret) : "r"(&vdso_u_time_data));
+
+ return ret;
+}
+#define __arch_get_vdso_u_time_data __arch_get_vdso_u_time_data
+
+static inline bool vdso_clocksource_ok(const struct vdso_clock *vc)
+{
+ return vc->clock_mode == VDSO_CLOCKMODE_ARCHTIMER;
+}
+#define vdso_clocksource_ok vdso_clocksource_ok
+
+#endif /* !__ASSEMBLER__ */
+
+#endif /* __ASM_VDSO_COMPAT_GETTIMEOFDAY_H */
diff --git a/arch/arm64/include/asm/vdso/getrandom.h b/arch/arm64/include/asm/vdso/getrandom.h
new file mode 100644
index 000000000000..da1d58bbfabe
--- /dev/null
+++ b/arch/arm64/include/asm/vdso/getrandom.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __ASM_VDSO_GETRANDOM_H
+#define __ASM_VDSO_GETRANDOM_H
+
+#ifndef __ASSEMBLER__
+
+#include <asm/unistd.h>
+#include <asm/vdso/vsyscall.h>
+#include <vdso/datapage.h>
+
+/**
+ * getrandom_syscall - Invoke the getrandom() syscall.
+ * @buffer: Destination buffer to fill with random bytes.
+ * @len: Size of @buffer in bytes.
+ * @flags: Zero or more GRND_* flags.
+ * Returns: The number of random bytes written to @buffer, or a negative value indicating an error.
+ */
+static __always_inline ssize_t getrandom_syscall(void *_buffer, size_t _len, unsigned int _flags)
+{
+ register void *buffer asm ("x0") = _buffer;
+ register size_t len asm ("x1") = _len;
+ register unsigned int flags asm ("x2") = _flags;
+ register long ret asm ("x0");
+ register long nr asm ("x8") = __NR_getrandom;
+
+ asm volatile(
+ " svc #0\n"
+ : "=r" (ret)
+ : "r" (buffer), "r" (len), "r" (flags), "r" (nr)
+ : "memory");
+
+ return ret;
+}
+
+#endif /* !__ASSEMBLER__ */
+
+#endif /* __ASM_VDSO_GETRANDOM_H */
diff --git a/arch/arm64/include/asm/vdso/gettimeofday.h b/arch/arm64/include/asm/vdso/gettimeofday.h
new file mode 100644
index 000000000000..3658a757e255
--- /dev/null
+++ b/arch/arm64/include/asm/vdso/gettimeofday.h
@@ -0,0 +1,107 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2018 ARM Limited
+ */
+#ifndef __ASM_VDSO_GETTIMEOFDAY_H
+#define __ASM_VDSO_GETTIMEOFDAY_H
+
+#ifdef __aarch64__
+
+#ifndef __ASSEMBLER__
+
+#include <asm/alternative.h>
+#include <asm/arch_timer.h>
+#include <asm/barrier.h>
+#include <asm/unistd.h>
+#include <asm/sysreg.h>
+
+#define VDSO_HAS_CLOCK_GETRES 1
+
+static __always_inline
+int gettimeofday_fallback(struct __kernel_old_timeval *_tv,
+ struct timezone *_tz)
+{
+ register struct timezone *tz asm("x1") = _tz;
+ register struct __kernel_old_timeval *tv asm("x0") = _tv;
+ register long ret asm ("x0");
+ register long nr asm("x8") = __NR_gettimeofday;
+
+ asm volatile(
+ " svc #0\n"
+ : "=r" (ret)
+ : "r" (tv), "r" (tz), "r" (nr)
+ : "memory");
+
+ return ret;
+}
+
+static __always_inline
+long clock_gettime_fallback(clockid_t _clkid, struct __kernel_timespec *_ts)
+{
+ register struct __kernel_timespec *ts asm("x1") = _ts;
+ register clockid_t clkid asm("x0") = _clkid;
+ register long ret asm ("x0");
+ register long nr asm("x8") = __NR_clock_gettime;
+
+ asm volatile(
+ " svc #0\n"
+ : "=r" (ret)
+ : "r" (clkid), "r" (ts), "r" (nr)
+ : "memory");
+
+ return ret;
+}
+
+static __always_inline
+int clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts)
+{
+ register struct __kernel_timespec *ts asm("x1") = _ts;
+ register clockid_t clkid asm("x0") = _clkid;
+ register long ret asm ("x0");
+ register long nr asm("x8") = __NR_clock_getres;
+
+ asm volatile(
+ " svc #0\n"
+ : "=r" (ret)
+ : "r" (clkid), "r" (ts), "r" (nr)
+ : "memory");
+
+ return ret;
+}
+
+static __always_inline u64 __arch_get_hw_counter(s32 clock_mode,
+ const struct vdso_time_data *vd)
+{
+ /*
+ * Core checks for mode already, so this raced against a concurrent
+ * update. Return something. Core will do another round and then
+ * see the mode change and fallback to the syscall.
+ */
+ if (clock_mode == VDSO_CLOCKMODE_NONE)
+ return 0;
+
+ return __arch_counter_get_cntvct();
+}
+
+#if IS_ENABLED(CONFIG_CC_IS_GCC) && IS_ENABLED(CONFIG_PAGE_SIZE_64KB)
+static __always_inline const struct vdso_time_data *__arch_get_vdso_u_time_data(void)
+{
+ const struct vdso_time_data *ret = &vdso_u_time_data;
+
+ /* Work around invalid absolute relocations */
+ OPTIMIZER_HIDE_VAR(ret);
+
+ return ret;
+}
+#define __arch_get_vdso_u_time_data __arch_get_vdso_u_time_data
+#endif /* IS_ENABLED(CONFIG_CC_IS_GCC) && IS_ENABLED(CONFIG_PAGE_SIZE_64KB) */
+
+#endif /* !__ASSEMBLER__ */
+
+#else /* !__aarch64__ */
+
+#include "compat_gettimeofday.h"
+
+#endif /* __aarch64__ */
+
+#endif /* __ASM_VDSO_GETTIMEOFDAY_H */
diff --git a/arch/arm64/include/asm/vdso/processor.h b/arch/arm64/include/asm/vdso/processor.h
new file mode 100644
index 000000000000..7abb0cc81cd6
--- /dev/null
+++ b/arch/arm64/include/asm/vdso/processor.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2020 ARM Ltd.
+ */
+#ifndef __ASM_VDSO_PROCESSOR_H
+#define __ASM_VDSO_PROCESSOR_H
+
+#ifndef __ASSEMBLER__
+
+static inline void cpu_relax(void)
+{
+ asm volatile("yield" ::: "memory");
+}
+
+#endif /* __ASSEMBLER__ */
+
+#endif /* __ASM_VDSO_PROCESSOR_H */
diff --git a/arch/arm64/include/asm/vdso/vsyscall.h b/arch/arm64/include/asm/vdso/vsyscall.h
new file mode 100644
index 000000000000..3f3c8eb74e2e
--- /dev/null
+++ b/arch/arm64/include/asm/vdso/vsyscall.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_VDSO_VSYSCALL_H
+#define __ASM_VDSO_VSYSCALL_H
+
+#ifndef __ASSEMBLER__
+
+#include <vdso/datapage.h>
+
+#define VDSO_PRECISION_MASK ~(0xFF00ULL<<48)
+
+
+/*
+ * Update the vDSO data page to keep in sync with kernel timekeeping.
+ */
+static __always_inline
+void __arch_update_vdso_clock(struct vdso_clock *vc)
+{
+ vc->mask = VDSO_PRECISION_MASK;
+}
+#define __arch_update_vdso_clock __arch_update_vdso_clock
+
+/* The asm-generic header needs to be included after the definitions above */
+#include <asm-generic/vdso/vsyscall.h>
+
+#endif /* !__ASSEMBLER__ */
+
+#endif /* __ASM_VDSO_VSYSCALL_H */
diff --git a/arch/arm64/include/asm/vdso_datapage.h b/arch/arm64/include/asm/vdso_datapage.h
deleted file mode 100644
index de66199673d7..000000000000
--- a/arch/arm64/include/asm/vdso_datapage.h
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- * Copyright (C) 2012 ARM Limited
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-#ifndef __ASM_VDSO_DATAPAGE_H
-#define __ASM_VDSO_DATAPAGE_H
-
-#ifdef __KERNEL__
-
-#ifndef __ASSEMBLY__
-
-struct vdso_data {
- __u64 cs_cycle_last; /* Timebase at clocksource init */
- __u64 xtime_clock_sec; /* Kernel time */
- __u64 xtime_clock_nsec;
- __u64 xtime_coarse_sec; /* Coarse time */
- __u64 xtime_coarse_nsec;
- __u64 wtm_clock_sec; /* Wall to monotonic time */
- __u64 wtm_clock_nsec;
- __u32 tb_seq_count; /* Timebase sequence counter */
- __u32 cs_mult; /* Clocksource multiplier */
- __u32 cs_shift; /* Clocksource shift */
- __u32 tz_minuteswest; /* Whacky timezone stuff */
- __u32 tz_dsttime;
- __u32 use_syscall;
-};
-
-#endif /* !__ASSEMBLY__ */
-
-#endif /* __KERNEL__ */
-
-#endif /* __ASM_VDSO_DATAPAGE_H */
diff --git a/arch/arm64/include/asm/vectors.h b/arch/arm64/include/asm/vectors.h
new file mode 100644
index 000000000000..b815d8f2c0dc
--- /dev/null
+++ b/arch/arm64/include/asm/vectors.h
@@ -0,0 +1,73 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2022 ARM Ltd.
+ */
+#ifndef __ASM_VECTORS_H
+#define __ASM_VECTORS_H
+
+#include <linux/bug.h>
+#include <linux/percpu.h>
+
+#include <asm/fixmap.h>
+
+extern char vectors[];
+extern char tramp_vectors[];
+extern char __bp_harden_el1_vectors[];
+
+/*
+ * Note: the order of this enum corresponds to two arrays in entry.S:
+ * tramp_vecs and __bp_harden_el1_vectors. By default the canonical
+ * 'full fat' vectors are used directly.
+ */
+enum arm64_bp_harden_el1_vectors {
+#ifdef CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY
+ /*
+ * Perform the BHB loop mitigation, before branching to the canonical
+ * vectors.
+ */
+ EL1_VECTOR_BHB_LOOP,
+
+ /*
+ * Make the SMC call for firmware mitigation, before branching to the
+ * canonical vectors.
+ */
+ EL1_VECTOR_BHB_FW,
+
+ /*
+ * Use the ClearBHB instruction, before branching to the canonical
+ * vectors.
+ */
+ EL1_VECTOR_BHB_CLEAR_INSN,
+#endif /* CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY */
+
+ /*
+ * Remap the kernel before branching to the canonical vectors.
+ */
+ EL1_VECTOR_KPTI,
+};
+
+#ifndef CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY
+#define EL1_VECTOR_BHB_LOOP -1
+#define EL1_VECTOR_BHB_FW -1
+#define EL1_VECTOR_BHB_CLEAR_INSN -1
+#endif /* !CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY */
+
+/* The vectors to use on return from EL0. e.g. to remap the kernel */
+DECLARE_PER_CPU_READ_MOSTLY(const char *, this_cpu_vector);
+
+#ifndef CONFIG_UNMAP_KERNEL_AT_EL0
+#define TRAMP_VALIAS 0ul
+#endif
+
+static inline const char *
+arm64_get_bp_hardening_vector(enum arm64_bp_harden_el1_vectors slot)
+{
+ if (cpus_have_cap(ARM64_UNMAP_KERNEL_AT_EL0))
+ return (char *)(TRAMP_VALIAS + SZ_2K * slot);
+
+ WARN_ON_ONCE(slot == EL1_VECTOR_KPTI);
+
+ return __bp_harden_el1_vectors + SZ_2K * slot;
+}
+
+#endif /* __ASM_VECTORS_H */
diff --git a/arch/arm64/include/asm/vermagic.h b/arch/arm64/include/asm/vermagic.h
new file mode 100644
index 000000000000..a1eec6a000f1
--- /dev/null
+++ b/arch/arm64/include/asm/vermagic.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2012 ARM Ltd.
+ */
+#ifndef _ASM_VERMAGIC_H
+#define _ASM_VERMAGIC_H
+
+#define MODULE_ARCH_VERMAGIC "aarch64"
+
+#endif /* _ASM_VERMAGIC_H */
diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h
index 439827271e3d..b51ab6840f9c 100644
--- a/arch/arm64/include/asm/virt.h
+++ b/arch/arm64/include/asm/virt.h
@@ -1,44 +1,109 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (C) 2012 ARM Ltd.
* Author: Marc Zyngier <marc.zyngier@arm.com>
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __ASM__VIRT_H
#define __ASM__VIRT_H
-#define BOOT_CPU_MODE_EL2 (0x0e12b007)
+/*
+ * The arm64 hcall implementation uses x0 to specify the hcall
+ * number. A value less than HVC_STUB_HCALL_NR indicates a special
+ * hcall, such as set vector. Any other value is handled in a
+ * hypervisor specific way.
+ *
+ * The hypercall is allowed to clobber any of the caller-saved
+ * registers (x0-x18), so it is advisable to use it through the
+ * indirection of a function call (as implemented in hyp-stub.S).
+ */
+
+/*
+ * HVC_SET_VECTORS - Set the value of the vbar_el2 register.
+ *
+ * @x1: Physical address of the new vector table.
+ */
+#define HVC_SET_VECTORS 0
+
+/*
+ * HVC_SOFT_RESTART - CPU soft reset, used by the cpu_soft_restart routine.
+ */
+#define HVC_SOFT_RESTART 1
+
+/*
+ * HVC_RESET_VECTORS - Restore the vectors to the original HYP stubs
+ */
+#define HVC_RESET_VECTORS 2
+
+/*
+ * HVC_FINALISE_EL2 - Upgrade the CPU from EL1 to EL2, if possible
+ */
+#define HVC_FINALISE_EL2 3
+
+/*
+ * HVC_GET_ICH_VTR_EL2 - Retrieve the ICH_VTR_EL2 value
+ */
+#define HVC_GET_ICH_VTR_EL2 4
+
+/* Max number of HYP stub hypercalls */
+#define HVC_STUB_HCALL_NR 5
+
+/* Error returned when an invalid stub number is passed into x0 */
+#define HVC_STUB_ERR 0xbadca11
+
+#define BOOT_CPU_MODE_EL1 (0xe11)
+#define BOOT_CPU_MODE_EL2 (0xe12)
+
+/*
+ * Flags returned together with the boot mode, but not preserved in
+ * __boot_cpu_mode. Used by the idreg override code to work out the
+ * boot state.
+ */
+#define BOOT_CPU_FLAG_E2H BIT_ULL(32)
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
+
+#include <asm/ptrace.h>
+#include <asm/sections.h>
+#include <asm/sysreg.h>
+#include <asm/cpufeature.h>
/*
* __boot_cpu_mode records what mode CPUs were booted in.
* A correctly-implemented bootloader must start all CPUs in the same mode:
* In this case, both 32bit halves of __boot_cpu_mode will contain the
- * same value (either 0 if booted in EL1, BOOT_CPU_MODE_EL2 if booted in EL2).
+ * same value (either BOOT_CPU_MODE_EL1 if booted in EL1, BOOT_CPU_MODE_EL2 if
+ * booted in EL2).
*
* Should the bootloader fail to do this, the two values will be different.
* This allows the kernel to flag an error when the secondaries have come up.
*/
extern u32 __boot_cpu_mode[2];
+#define ARM64_VECTOR_TABLE_LEN SZ_2K
+
void __hyp_set_vectors(phys_addr_t phys_vector_base);
-phys_addr_t __hyp_get_vectors(void);
+void __hyp_reset_vectors(void);
+bool is_kvm_arm_initialised(void);
+
+DECLARE_STATIC_KEY_FALSE(kvm_protected_mode_initialized);
+
+static inline bool is_pkvm_initialized(void)
+{
+ return IS_ENABLED(CONFIG_KVM) &&
+ static_branch_likely(&kvm_protected_mode_initialized);
+}
/* Reports the availability of HYP mode */
static inline bool is_hyp_mode_available(void)
{
+ /*
+ * If KVM protected mode is initialized, all CPUs must have been booted
+ * in EL2. Avoid checking __boot_cpu_mode as CPUs now come up in EL1.
+ */
+ if (is_pkvm_initialized())
+ return true;
+
return (__boot_cpu_mode[0] == BOOT_CPU_MODE_EL2 &&
__boot_cpu_mode[1] == BOOT_CPU_MODE_EL2);
}
@@ -46,9 +111,61 @@ static inline bool is_hyp_mode_available(void)
/* Check if the bootloader has booted CPUs in different modes */
static inline bool is_hyp_mode_mismatched(void)
{
+ /*
+ * If KVM protected mode is initialized, all CPUs must have been booted
+ * in EL2. Avoid checking __boot_cpu_mode as CPUs now come up in EL1.
+ */
+ if (is_pkvm_initialized())
+ return false;
+
return __boot_cpu_mode[0] != __boot_cpu_mode[1];
}
-#endif /* __ASSEMBLY__ */
+static __always_inline bool is_kernel_in_hyp_mode(void)
+{
+ BUILD_BUG_ON(__is_defined(__KVM_NVHE_HYPERVISOR__) ||
+ __is_defined(__KVM_VHE_HYPERVISOR__));
+ return read_sysreg(CurrentEL) == CurrentEL_EL2;
+}
+
+static __always_inline bool has_vhe(void)
+{
+ /*
+ * Code only run in VHE/NVHE hyp context can assume VHE is present or
+ * absent. Otherwise fall back to caps.
+ * This allows the compiler to discard VHE-specific code from the
+ * nVHE object, reducing the number of external symbol references
+ * needed to link.
+ */
+ if (is_vhe_hyp_code())
+ return true;
+ else if (is_nvhe_hyp_code())
+ return false;
+ else
+ return cpus_have_final_cap(ARM64_HAS_VIRT_HOST_EXTN);
+}
+
+static __always_inline bool is_protected_kvm_enabled(void)
+{
+ if (is_vhe_hyp_code())
+ return false;
+ else
+ return cpus_have_final_cap(ARM64_KVM_PROTECTED_MODE);
+}
+
+static __always_inline bool has_hvhe(void)
+{
+ if (is_vhe_hyp_code())
+ return false;
+
+ return cpus_have_final_cap(ARM64_KVM_HVHE);
+}
+
+static inline bool is_hyp_nvhe(void)
+{
+ return is_hyp_mode_available() && !is_kernel_in_hyp_mode();
+}
+
+#endif /* __ASSEMBLER__ */
#endif /* ! __ASM__VIRT_H */
diff --git a/arch/arm64/include/asm/vmalloc.h b/arch/arm64/include/asm/vmalloc.h
new file mode 100644
index 000000000000..4ec1acd3c1b3
--- /dev/null
+++ b/arch/arm64/include/asm/vmalloc.h
@@ -0,0 +1,74 @@
+#ifndef _ASM_ARM64_VMALLOC_H
+#define _ASM_ARM64_VMALLOC_H
+
+#include <asm/page.h>
+#include <asm/pgtable.h>
+
+#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
+
+#define arch_vmap_pud_supported arch_vmap_pud_supported
+static inline bool arch_vmap_pud_supported(pgprot_t prot)
+{
+ return pud_sect_supported();
+}
+
+#define arch_vmap_pmd_supported arch_vmap_pmd_supported
+static inline bool arch_vmap_pmd_supported(pgprot_t prot)
+{
+ return true;
+}
+
+#define arch_vmap_pte_range_map_size arch_vmap_pte_range_map_size
+static inline unsigned long arch_vmap_pte_range_map_size(unsigned long addr,
+ unsigned long end, u64 pfn,
+ unsigned int max_page_shift)
+{
+ /*
+ * If the block is at least CONT_PTE_SIZE in size, and is naturally
+ * aligned in both virtual and physical space, then we can pte-map the
+ * block using the PTE_CONT bit for more efficient use of the TLB.
+ */
+ if (max_page_shift < CONT_PTE_SHIFT)
+ return PAGE_SIZE;
+
+ if (end - addr < CONT_PTE_SIZE)
+ return PAGE_SIZE;
+
+ if (!IS_ALIGNED(addr, CONT_PTE_SIZE))
+ return PAGE_SIZE;
+
+ if (!IS_ALIGNED(PFN_PHYS(pfn), CONT_PTE_SIZE))
+ return PAGE_SIZE;
+
+ return CONT_PTE_SIZE;
+}
+
+#define arch_vmap_pte_range_unmap_size arch_vmap_pte_range_unmap_size
+static inline unsigned long arch_vmap_pte_range_unmap_size(unsigned long addr,
+ pte_t *ptep)
+{
+ /*
+ * The caller handles alignment so it's sufficient just to check
+ * PTE_CONT.
+ */
+ return pte_valid_cont(__ptep_get(ptep)) ? CONT_PTE_SIZE : PAGE_SIZE;
+}
+
+#define arch_vmap_pte_supported_shift arch_vmap_pte_supported_shift
+static inline int arch_vmap_pte_supported_shift(unsigned long size)
+{
+ if (size >= CONT_PTE_SIZE)
+ return CONT_PTE_SHIFT;
+
+ return PAGE_SHIFT;
+}
+
+#endif
+
+#define arch_vmap_pgprot_tagged arch_vmap_pgprot_tagged
+static inline pgprot_t arch_vmap_pgprot_tagged(pgprot_t prot)
+{
+ return pgprot_tagged(prot);
+}
+
+#endif /* _ASM_ARM64_VMALLOC_H */
diff --git a/arch/arm64/include/asm/vmap_stack.h b/arch/arm64/include/asm/vmap_stack.h
new file mode 100644
index 000000000000..75daee1a07e9
--- /dev/null
+++ b/arch/arm64/include/asm/vmap_stack.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+// Copyright (C) 2017 Arm Ltd.
+#ifndef __ASM_VMAP_STACK_H
+#define __ASM_VMAP_STACK_H
+
+#include <linux/gfp.h>
+#include <linux/vmalloc.h>
+#include <linux/pgtable.h>
+#include <asm/memory.h>
+#include <asm/thread_info.h>
+
+/*
+ * To ensure that VMAP'd stack overflow detection works correctly, all VMAP'd
+ * stacks need to have the same alignment.
+ */
+static inline unsigned long *arch_alloc_vmap_stack(size_t stack_size, int node)
+{
+ void *p;
+
+ p = __vmalloc_node(stack_size, THREAD_ALIGN, THREADINFO_GFP, node,
+ __builtin_return_address(0));
+ return kasan_reset_tag(p);
+}
+
+#endif /* __ASM_VMAP_STACK_H */
diff --git a/arch/arm64/include/asm/vncr_mapping.h b/arch/arm64/include/asm/vncr_mapping.h
new file mode 100644
index 000000000000..c2485a862e69
--- /dev/null
+++ b/arch/arm64/include/asm/vncr_mapping.h
@@ -0,0 +1,112 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * System register offsets in the VNCR page
+ * All offsets are *byte* displacements!
+ */
+
+#ifndef __ARM64_VNCR_MAPPING_H__
+#define __ARM64_VNCR_MAPPING_H__
+
+#define VNCR_VTTBR_EL2 0x020
+#define VNCR_VTCR_EL2 0x040
+#define VNCR_VMPIDR_EL2 0x050
+#define VNCR_CNTVOFF_EL2 0x060
+#define VNCR_HCR_EL2 0x078
+#define VNCR_HSTR_EL2 0x080
+#define VNCR_VPIDR_EL2 0x088
+#define VNCR_TPIDR_EL2 0x090
+#define VNCR_HCRX_EL2 0x0A0
+#define VNCR_VNCR_EL2 0x0B0
+#define VNCR_CPACR_EL1 0x100
+#define VNCR_CONTEXTIDR_EL1 0x108
+#define VNCR_SCTLR_EL1 0x110
+#define VNCR_ACTLR_EL1 0x118
+#define VNCR_TCR_EL1 0x120
+#define VNCR_AFSR0_EL1 0x128
+#define VNCR_AFSR1_EL1 0x130
+#define VNCR_ESR_EL1 0x138
+#define VNCR_MAIR_EL1 0x140
+#define VNCR_AMAIR_EL1 0x148
+#define VNCR_MDSCR_EL1 0x158
+#define VNCR_SPSR_EL1 0x160
+#define VNCR_CNTV_CVAL_EL0 0x168
+#define VNCR_CNTV_CTL_EL0 0x170
+#define VNCR_CNTP_CVAL_EL0 0x178
+#define VNCR_CNTP_CTL_EL0 0x180
+#define VNCR_SCXTNUM_EL1 0x188
+#define VNCR_TFSR_EL1 0x190
+#define VNCR_HDFGRTR2_EL2 0x1A0
+#define VNCR_HDFGWTR2_EL2 0x1B0
+#define VNCR_HFGRTR_EL2 0x1B8
+#define VNCR_HFGWTR_EL2 0x1C0
+#define VNCR_HFGITR_EL2 0x1C8
+#define VNCR_HDFGRTR_EL2 0x1D0
+#define VNCR_HDFGWTR_EL2 0x1D8
+#define VNCR_ZCR_EL1 0x1E0
+#define VNCR_HAFGRTR_EL2 0x1E8
+#define VNCR_TTBR0_EL1 0x200
+#define VNCR_TTBR1_EL1 0x210
+#define VNCR_FAR_EL1 0x220
+#define VNCR_ELR_EL1 0x230
+#define VNCR_SP_EL1 0x240
+#define VNCR_VBAR_EL1 0x250
+#define VNCR_TCR2_EL1 0x270
+#define VNCR_SCTLR2_EL1 0x278
+#define VNCR_PIRE0_EL1 0x290
+#define VNCR_PIR_EL1 0x2A0
+#define VNCR_POR_EL1 0x2A8
+#define VNCR_HFGRTR2_EL2 0x2C0
+#define VNCR_HFGWTR2_EL2 0x2C8
+#define VNCR_HFGITR2_EL2 0x310
+#define VNCR_ICH_LR0_EL2 0x400
+#define VNCR_ICH_LR1_EL2 0x408
+#define VNCR_ICH_LR2_EL2 0x410
+#define VNCR_ICH_LR3_EL2 0x418
+#define VNCR_ICH_LR4_EL2 0x420
+#define VNCR_ICH_LR5_EL2 0x428
+#define VNCR_ICH_LR6_EL2 0x430
+#define VNCR_ICH_LR7_EL2 0x438
+#define VNCR_ICH_LR8_EL2 0x440
+#define VNCR_ICH_LR9_EL2 0x448
+#define VNCR_ICH_LR10_EL2 0x450
+#define VNCR_ICH_LR11_EL2 0x458
+#define VNCR_ICH_LR12_EL2 0x460
+#define VNCR_ICH_LR13_EL2 0x468
+#define VNCR_ICH_LR14_EL2 0x470
+#define VNCR_ICH_LR15_EL2 0x478
+#define VNCR_ICH_AP0R0_EL2 0x480
+#define VNCR_ICH_AP0R1_EL2 0x488
+#define VNCR_ICH_AP0R2_EL2 0x490
+#define VNCR_ICH_AP0R3_EL2 0x498
+#define VNCR_ICH_AP1R0_EL2 0x4A0
+#define VNCR_ICH_AP1R1_EL2 0x4A8
+#define VNCR_ICH_AP1R2_EL2 0x4B0
+#define VNCR_ICH_AP1R3_EL2 0x4B8
+#define VNCR_ICH_HCR_EL2 0x4C0
+#define VNCR_ICH_VMCR_EL2 0x4C8
+#define VNCR_VDISR_EL2 0x500
+#define VNCR_VSESR_EL2 0x508
+#define VNCR_PMBLIMITR_EL1 0x800
+#define VNCR_PMBPTR_EL1 0x810
+#define VNCR_PMBSR_EL1 0x820
+#define VNCR_PMSCR_EL1 0x828
+#define VNCR_PMSEVFR_EL1 0x830
+#define VNCR_PMSICR_EL1 0x838
+#define VNCR_PMSIRR_EL1 0x840
+#define VNCR_PMSLATFR_EL1 0x848
+#define VNCR_PMSNEVFR_EL1 0x850
+#define VNCR_PMSDSFR_EL1 0x858
+#define VNCR_TRFCR_EL1 0x880
+#define VNCR_MPAM1_EL1 0x900
+#define VNCR_MPAMHCR_EL2 0x930
+#define VNCR_MPAMVPMV_EL2 0x938
+#define VNCR_MPAMVPM0_EL2 0x940
+#define VNCR_MPAMVPM1_EL2 0x948
+#define VNCR_MPAMVPM2_EL2 0x950
+#define VNCR_MPAMVPM3_EL2 0x958
+#define VNCR_MPAMVPM4_EL2 0x960
+#define VNCR_MPAMVPM5_EL2 0x968
+#define VNCR_MPAMVPM6_EL2 0x970
+#define VNCR_MPAMVPM7_EL2 0x978
+
+#endif /* __ARM64_VNCR_MAPPING_H__ */
diff --git a/arch/arm64/include/asm/word-at-a-time.h b/arch/arm64/include/asm/word-at-a-time.h
new file mode 100644
index 000000000000..824ca6987a51
--- /dev/null
+++ b/arch/arm64/include/asm/word-at-a-time.h
@@ -0,0 +1,69 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2013 ARM Ltd.
+ */
+#ifndef __ASM_WORD_AT_A_TIME_H
+#define __ASM_WORD_AT_A_TIME_H
+
+#include <linux/uaccess.h>
+
+#ifndef __AARCH64EB__
+
+#include <linux/bitops.h>
+#include <linux/wordpart.h>
+
+struct word_at_a_time {
+ const unsigned long one_bits, high_bits;
+};
+
+#define WORD_AT_A_TIME_CONSTANTS { REPEAT_BYTE(0x01), REPEAT_BYTE(0x80) }
+
+static inline unsigned long has_zero(unsigned long a, unsigned long *bits,
+ const struct word_at_a_time *c)
+{
+ unsigned long mask = ((a - c->one_bits) & ~a) & c->high_bits;
+ *bits = mask;
+ return mask;
+}
+
+#define prep_zero_mask(a, bits, c) (bits)
+#define create_zero_mask(bits) (bits)
+#define find_zero(bits) (__ffs(bits) >> 3)
+
+static inline unsigned long zero_bytemask(unsigned long bits)
+{
+ bits = (bits - 1) & ~bits;
+ return bits >> 7;
+}
+
+#else /* __AARCH64EB__ */
+#include <asm-generic/word-at-a-time.h>
+#endif
+
+/*
+ * Load an unaligned word from kernel space.
+ *
+ * In the (very unlikely) case of the word being a page-crosser
+ * and the next page not being mapped, take the exception and
+ * return zeroes in the non-existing part.
+ */
+static inline unsigned long load_unaligned_zeropad(const void *addr)
+{
+ unsigned long ret;
+
+ __mte_enable_tco_async();
+
+ /* Load word from unaligned pointer addr */
+ asm(
+ "1: ldr %0, %2\n"
+ "2:\n"
+ _ASM_EXTABLE_LOAD_UNALIGNED_ZEROPAD(1b, 2b, %0, %1)
+ : "=&r" (ret)
+ : "r" (addr), "Q" (*(unsigned long *)addr));
+
+ __mte_disable_tco_async();
+
+ return ret;
+}
+
+#endif /* __ASM_WORD_AT_A_TIME_H */
diff --git a/arch/arm64/include/asm/xen/events.h b/arch/arm64/include/asm/xen/events.h
index 86553213c132..2977b5fe068d 100644
--- a/arch/arm64/include/asm/xen/events.h
+++ b/arch/arm64/include/asm/xen/events.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_ARM64_XEN_EVENTS_H
#define _ASM_ARM64_XEN_EVENTS_H
@@ -13,9 +14,15 @@ enum ipi_vector {
static inline int xen_irqs_disabled(struct pt_regs *regs)
{
- return raw_irqs_disabled_flags((unsigned long) regs->pstate);
+ return regs_irqs_disabled(regs);
}
#define xchg_xen_ulong(ptr, val) xchg((ptr), (val))
+/* Rebind event channel is supported by default */
+static inline bool xen_support_evtchn_rebind(void)
+{
+ return true;
+}
+
#endif /* _ASM_ARM64_XEN_EVENTS_H */
diff --git a/arch/arm64/include/asm/xen/hypercall.h b/arch/arm64/include/asm/xen/hypercall.h
index 74b0c423ff5b..3522cbaed316 100644
--- a/arch/arm64/include/asm/xen/hypercall.h
+++ b/arch/arm64/include/asm/xen/hypercall.h
@@ -1 +1 @@
-#include <../../arm/include/asm/xen/hypercall.h>
+#include <xen/arm/hypercall.h>
diff --git a/arch/arm64/include/asm/xen/hypervisor.h b/arch/arm64/include/asm/xen/hypervisor.h
index f263da8e8769..d6e7709d0688 100644
--- a/arch/arm64/include/asm/xen/hypervisor.h
+++ b/arch/arm64/include/asm/xen/hypervisor.h
@@ -1 +1 @@
-#include <../../arm/include/asm/xen/hypervisor.h>
+#include <xen/arm/hypervisor.h>
diff --git a/arch/arm64/include/asm/xen/interface.h b/arch/arm64/include/asm/xen/interface.h
index 44457aebeed4..88c0d75da190 100644
--- a/arch/arm64/include/asm/xen/interface.h
+++ b/arch/arm64/include/asm/xen/interface.h
@@ -1 +1 @@
-#include <../../arm/include/asm/xen/interface.h>
+#include <xen/arm/interface.h>
diff --git a/arch/arm64/include/asm/xen/page.h b/arch/arm64/include/asm/xen/page.h
index bed87ec36780..dffdc773221b 100644
--- a/arch/arm64/include/asm/xen/page.h
+++ b/arch/arm64/include/asm/xen/page.h
@@ -1 +1,7 @@
-#include <../../arm/include/asm/xen/page.h>
+#include <xen/arm/page.h>
+#include <asm/mmu.h>
+
+static inline bool xen_kernel_unmapped_at_usr(void)
+{
+ return arm64_kernel_unmapped_at_el0();
+}
diff --git a/arch/arm64/include/asm/xen/swiotlb-xen.h b/arch/arm64/include/asm/xen/swiotlb-xen.h
new file mode 100644
index 000000000000..455ade5d5320
--- /dev/null
+++ b/arch/arm64/include/asm/xen/swiotlb-xen.h
@@ -0,0 +1 @@
+#include <xen/arm/swiotlb-xen.h>
diff --git a/arch/arm64/include/asm/xen/xen-ops.h b/arch/arm64/include/asm/xen/xen-ops.h
new file mode 100644
index 000000000000..7ebb7eb0bd93
--- /dev/null
+++ b/arch/arm64/include/asm/xen/xen-ops.h
@@ -0,0 +1,2 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <xen/arm/xen-ops.h>
diff --git a/arch/arm64/include/asm/xor.h b/arch/arm64/include/asm/xor.h
new file mode 100644
index 000000000000..c38e3d017a79
--- /dev/null
+++ b/arch/arm64/include/asm/xor.h
@@ -0,0 +1,73 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * arch/arm64/include/asm/xor.h
+ *
+ * Authors: Jackie Liu <liuyun01@kylinos.cn>
+ * Copyright (C) 2018,Tianjin KYLIN Information Technology Co., Ltd.
+ */
+
+#include <linux/hardirq.h>
+#include <asm-generic/xor.h>
+#include <asm/hwcap.h>
+#include <asm/simd.h>
+
+#ifdef CONFIG_KERNEL_MODE_NEON
+
+extern struct xor_block_template const xor_block_inner_neon;
+
+static void
+xor_neon_2(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2)
+{
+ scoped_ksimd()
+ xor_block_inner_neon.do_2(bytes, p1, p2);
+}
+
+static void
+xor_neon_3(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3)
+{
+ scoped_ksimd()
+ xor_block_inner_neon.do_3(bytes, p1, p2, p3);
+}
+
+static void
+xor_neon_4(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3,
+ const unsigned long * __restrict p4)
+{
+ scoped_ksimd()
+ xor_block_inner_neon.do_4(bytes, p1, p2, p3, p4);
+}
+
+static void
+xor_neon_5(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3,
+ const unsigned long * __restrict p4,
+ const unsigned long * __restrict p5)
+{
+ scoped_ksimd()
+ xor_block_inner_neon.do_5(bytes, p1, p2, p3, p4, p5);
+}
+
+static struct xor_block_template xor_block_arm64 = {
+ .name = "arm64_neon",
+ .do_2 = xor_neon_2,
+ .do_3 = xor_neon_3,
+ .do_4 = xor_neon_4,
+ .do_5 = xor_neon_5
+};
+#undef XOR_TRY_TEMPLATES
+#define XOR_TRY_TEMPLATES \
+ do { \
+ xor_speed(&xor_block_8regs); \
+ xor_speed(&xor_block_32regs); \
+ if (cpu_has_neon()) { \
+ xor_speed(&xor_block_arm64);\
+ } \
+ } while (0)
+
+#endif /* ! CONFIG_KERNEL_MODE_NEON */