summaryrefslogtreecommitdiff
path: root/arch/arm64/include/asm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arm64/include/asm')
-rw-r--r--arch/arm64/include/asm/Kbuild25
-rw-r--r--arch/arm64/include/asm/acpi.h42
-rw-r--r--arch/arm64/include/asm/alternative-macros.h264
-rw-r--r--arch/arm64/include/asm/alternative.h262
-rw-r--r--arch/arm64/include/asm/apple_m1_pmu.h64
-rw-r--r--arch/arm64/include/asm/arch_gicv3.h66
-rw-r--r--arch/arm64/include/asm/arch_timer.h108
-rw-r--r--arch/arm64/include/asm/archrandom.h134
-rw-r--r--arch/arm64/include/asm/arm_pmuv3.h140
-rw-r--r--arch/arm64/include/asm/asm-bug.h4
-rw-r--r--arch/arm64/include/asm/asm-extable.h126
-rw-r--r--arch/arm64/include/asm/asm-prototypes.h6
-rw-r--r--arch/arm64/include/asm/asm-uaccess.h44
-rw-r--r--arch/arm64/include/asm/asm_pointer_auth.h92
-rw-r--r--arch/arm64/include/asm/assembler.h495
-rw-r--r--arch/arm64/include/asm/atomic.h46
-rw-r--r--arch/arm64/include/asm/atomic_ll_sc.h184
-rw-r--r--arch/arm64/include/asm/atomic_lse.h353
-rw-r--r--arch/arm64/include/asm/barrier.h99
-rw-r--r--arch/arm64/include/asm/bitops.h1
-rw-r--r--arch/arm64/include/asm/boot.h3
-rw-r--r--arch/arm64/include/asm/brk-imm.h11
-rw-r--r--arch/arm64/include/asm/cache.h69
-rw-r--r--arch/arm64/include/asm/cacheflush.h140
-rw-r--r--arch/arm64/include/asm/checksum.h17
-rw-r--r--arch/arm64/include/asm/clocksource.h4
-rw-r--r--arch/arm64/include/asm/cmpxchg.h57
-rw-r--r--arch/arm64/include/asm/compat.h142
-rw-r--r--arch/arm64/include/asm/compiler.h40
-rw-r--r--arch/arm64/include/asm/cpu.h54
-rw-r--r--arch/arm64/include/asm/cpu_ops.h19
-rw-r--r--arch/arm64/include/asm/cpucaps.h115
-rw-r--r--arch/arm64/include/asm/cpufeature.h453
-rw-r--r--arch/arm64/include/asm/cpuidle.h42
-rw-r--r--arch/arm64/include/asm/cputype.h86
-rw-r--r--arch/arm64/include/asm/crash_core.h10
-rw-r--r--arch/arm64/include/asm/daifflags.h27
-rw-r--r--arch/arm64/include/asm/debug-monitors.h22
-rw-r--r--arch/arm64/include/asm/device.h3
-rw-r--r--arch/arm64/include/asm/efi.h106
-rw-r--r--arch/arm64/include/asm/el2_setup.h344
-rw-r--r--arch/arm64/include/asm/elf.h79
-rw-r--r--arch/arm64/include/asm/esr.h103
-rw-r--r--arch/arm64/include/asm/exception.h75
-rw-r--r--arch/arm64/include/asm/exec.h1
-rw-r--r--arch/arm64/include/asm/extable.h25
-rw-r--r--arch/arm64/include/asm/fb.h15
-rw-r--r--arch/arm64/include/asm/fixmap.h39
-rw-r--r--arch/arm64/include/asm/fpsimd.h326
-rw-r--r--arch/arm64/include/asm/fpsimdmacros.h167
-rw-r--r--arch/arm64/include/asm/ftrace.h147
-rw-r--r--arch/arm64/include/asm/futex.h38
-rw-r--r--arch/arm64/include/asm/gpr-num.h26
-rw-r--r--arch/arm64/include/asm/hardirq.h94
-rw-r--r--arch/arm64/include/asm/hugetlb.h54
-rw-r--r--arch/arm64/include/asm/hw_breakpoint.h12
-rw-r--r--arch/arm64/include/asm/hwcap.h58
-rw-r--r--arch/arm64/include/asm/hyp_image.h68
-rw-r--r--arch/arm64/include/asm/hyperv-tlfs.h78
-rw-r--r--arch/arm64/include/asm/hypervisor.h3
-rw-r--r--arch/arm64/include/asm/image.h2
-rw-r--r--arch/arm64/include/asm/insn-def.h23
-rw-r--r--arch/arm64/include/asm/insn.h330
-rw-r--r--arch/arm64/include/asm/io.h92
-rw-r--r--arch/arm64/include/asm/irq.h9
-rw-r--r--arch/arm64/include/asm/irq_work.h4
-rw-r--r--arch/arm64/include/asm/irqflags.h198
-rw-r--r--arch/arm64/include/asm/jump_label.h8
-rw-r--r--arch/arm64/include/asm/kasan.h24
-rw-r--r--arch/arm64/include/asm/kernel-pgtable.h119
-rw-r--r--arch/arm64/include/asm/kexec.h29
-rw-r--r--arch/arm64/include/asm/kfence.h32
-rw-r--r--arch/arm64/include/asm/kprobes.h13
-rw-r--r--arch/arm64/include/asm/kvm_arm.h188
-rw-r--r--arch/arm64/include/asm/kvm_asm.h341
-rw-r--r--arch/arm64/include/asm/kvm_coproc.h46
-rw-r--r--arch/arm64/include/asm/kvm_emulate.h428
-rw-r--r--arch/arm64/include/asm/kvm_host.h1186
-rw-r--r--arch/arm64/include/asm/kvm_hyp.h130
-rw-r--r--arch/arm64/include/asm/kvm_mmio.h29
-rw-r--r--arch/arm64/include/asm/kvm_mmu.h540
-rw-r--r--arch/arm64/include/asm/kvm_mte.h66
-rw-r--r--arch/arm64/include/asm/kvm_nested.h67
-rw-r--r--arch/arm64/include/asm/kvm_pgtable.h787
-rw-r--r--arch/arm64/include/asm/kvm_pkvm.h131
-rw-r--r--arch/arm64/include/asm/kvm_ptrauth.h40
-rw-r--r--arch/arm64/include/asm/kvm_ras.h2
-rw-r--r--arch/arm64/include/asm/kvm_types.h8
-rw-r--r--arch/arm64/include/asm/linkage.h43
-rw-r--r--arch/arm64/include/asm/lse.h27
-rw-r--r--arch/arm64/include/asm/memory.h202
-rw-r--r--arch/arm64/include/asm/mman.h63
-rw-r--r--arch/arm64/include/asm/mmu.h128
-rw-r--r--arch/arm64/include/asm/mmu_context.h162
-rw-r--r--arch/arm64/include/asm/module.h28
-rw-r--r--arch/arm64/include/asm/module.lds.h26
-rw-r--r--arch/arm64/include/asm/mshyperv.h54
-rw-r--r--arch/arm64/include/asm/mte-def.h18
-rw-r--r--arch/arm64/include/asm/mte-kasan.h258
-rw-r--r--arch/arm64/include/asm/mte.h219
-rw-r--r--arch/arm64/include/asm/numa.h45
-rw-r--r--arch/arm64/include/asm/page-def.h5
-rw-r--r--arch/arm64/include/asm/page.h31
-rw-r--r--arch/arm64/include/asm/paravirt.h14
-rw-r--r--arch/arm64/include/asm/paravirt_api_clock.h1
-rw-r--r--arch/arm64/include/asm/patching.h15
-rw-r--r--arch/arm64/include/asm/pci.h19
-rw-r--r--arch/arm64/include/asm/percpu.h65
-rw-r--r--arch/arm64/include/asm/perf_event.h199
-rw-r--r--arch/arm64/include/asm/pgalloc.h65
-rw-r--r--arch/arm64/include/asm/pgtable-hwdef.h85
-rw-r--r--arch/arm64/include/asm/pgtable-prot.h160
-rw-r--r--arch/arm64/include/asm/pgtable-types.h5
-rw-r--r--arch/arm64/include/asm/pgtable.h453
-rw-r--r--arch/arm64/include/asm/pointer_auth.h134
-rw-r--r--arch/arm64/include/asm/preempt.h25
-rw-r--r--arch/arm64/include/asm/probes.h2
-rw-r--r--arch/arm64/include/asm/proc-fns.h2
-rw-r--r--arch/arm64/include/asm/processor.h208
-rw-r--r--arch/arm64/include/asm/ptdump.h11
-rw-r--r--arch/arm64/include/asm/ptrace.h44
-rw-r--r--arch/arm64/include/asm/rwonce.h73
-rw-r--r--arch/arm64/include/asm/scs.h80
-rw-r--r--arch/arm64/include/asm/sdei.h25
-rw-r--r--arch/arm64/include/asm/seccomp.h9
-rw-r--r--arch/arm64/include/asm/sections.h10
-rw-r--r--arch/arm64/include/asm/semihost.h24
-rw-r--r--arch/arm64/include/asm/set_memory.h17
-rw-r--r--arch/arm64/include/asm/setup.h47
-rw-r--r--arch/arm64/include/asm/signal.h25
-rw-r--r--arch/arm64/include/asm/simd.h15
-rw-r--r--arch/arm64/include/asm/smp.h39
-rw-r--r--arch/arm64/include/asm/smp_plat.h1
-rw-r--r--arch/arm64/include/asm/sparsemem.h22
-rw-r--r--arch/arm64/include/asm/spectre.h120
-rw-r--r--arch/arm64/include/asm/spinlock.h15
-rw-r--r--arch/arm64/include/asm/spinlock_types.h2
-rw-r--r--arch/arm64/include/asm/stackprotector.h15
-rw-r--r--arch/arm64/include/asm/stacktrace.h201
-rw-r--r--arch/arm64/include/asm/stacktrace/common.h159
-rw-r--r--arch/arm64/include/asm/stacktrace/nvhe.h55
-rw-r--r--arch/arm64/include/asm/stage2_pgtable.h206
-rw-r--r--arch/arm64/include/asm/string.h5
-rw-r--r--arch/arm64/include/asm/suspend.h2
-rw-r--r--arch/arm64/include/asm/syscall.h40
-rw-r--r--arch/arm64/include/asm/syscall_wrapper.h9
-rw-r--r--arch/arm64/include/asm/sysreg.h1128
-rw-r--r--arch/arm64/include/asm/system_misc.h10
-rw-r--r--arch/arm64/include/asm/thread_info.h39
-rw-r--r--arch/arm64/include/asm/tlb.h55
-rw-r--r--arch/arm64/include/asm/tlbbatch.h12
-rw-r--r--arch/arm64/include/asm/tlbflush.h309
-rw-r--r--arch/arm64/include/asm/topology.h14
-rw-r--r--arch/arm64/include/asm/trans_pgd.h41
-rw-r--r--arch/arm64/include/asm/traps.h93
-rw-r--r--arch/arm64/include/asm/uaccess.h363
-rw-r--r--arch/arm64/include/asm/unistd.h6
-rw-r--r--arch/arm64/include/asm/unistd32.h86
-rw-r--r--arch/arm64/include/asm/uprobes.h4
-rw-r--r--arch/arm64/include/asm/vdso.h5
-rw-r--r--arch/arm64/include/asm/vdso/clocksource.h11
-rw-r--r--arch/arm64/include/asm/vdso/compat_barrier.h7
-rw-r--r--arch/arm64/include/asm/vdso/compat_gettimeofday.h46
-rw-r--r--arch/arm64/include/asm/vdso/gettimeofday.h49
-rw-r--r--arch/arm64/include/asm/vdso/processor.h17
-rw-r--r--arch/arm64/include/asm/vdso/vsyscall.h9
-rw-r--r--arch/arm64/include/asm/vectors.h73
-rw-r--r--arch/arm64/include/asm/vermagic.h10
-rw-r--r--arch/arm64/include/asm/virt.h75
-rw-r--r--arch/arm64/include/asm/vmalloc.h34
-rw-r--r--arch/arm64/include/asm/vmap_stack.h11
-rw-r--r--arch/arm64/include/asm/vncr_mapping.h103
-rw-r--r--arch/arm64/include/asm/word-at-a-time.h25
-rw-r--r--arch/arm64/include/asm/xen/page.h6
-rw-r--r--arch/arm64/include/asm/xen/swiotlb-xen.h1
-rw-r--r--arch/arm64/include/asm/xen/xen-ops.h (renamed from arch/arm64/include/asm/xen/page-coherent.h)2
-rw-r--r--arch/arm64/include/asm/xor.h21
177 files changed, 12027 insertions, 5269 deletions
diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild
index bd23f87d6c55..4b6d2d52053e 100644
--- a/arch/arm64/include/asm/Kbuild
+++ b/arch/arm64/include/asm/Kbuild
@@ -1,27 +1,10 @@
# SPDX-License-Identifier: GPL-2.0
-generic-y += bugs.h
-generic-y += delay.h
-generic-y += div64.h
-generic-y += dma.h
-generic-y += dma-contiguous.h
-generic-y += dma-mapping.h
generic-y += early_ioremap.h
-generic-y += emergency-restart.h
-generic-y += hw_irq.h
-generic-y += irq_regs.h
-generic-y += kdebug.h
-generic-y += kmap_types.h
-generic-y += local.h
-generic-y += local64.h
generic-y += mcs_spinlock.h
-generic-y += mm-arch-hooks.h
-generic-y += mmiowb.h
generic-y += qrwlock.h
generic-y += qspinlock.h
-generic-y += serial.h
-generic-y += set_memory.h
-generic-y += switch_to.h
-generic-y += trace_clock.h
-generic-y += unaligned.h
+generic-y += parport.h
generic-y += user.h
-generic-y += vga.h
+
+generated-y += cpucap-defs.h
+generated-y += sysreg-defs.h
diff --git a/arch/arm64/include/asm/acpi.h b/arch/arm64/include/asm/acpi.h
index b263e239cb59..6792a1f83f2a 100644
--- a/arch/arm64/include/asm/acpi.h
+++ b/arch/arm64/include/asm/acpi.h
@@ -9,9 +9,11 @@
#ifndef _ASM_ACPI_H
#define _ASM_ACPI_H
+#include <linux/cpuidle.h>
#include <linux/efi.h>
#include <linux/memblock.h>
#include <linux/psci.h>
+#include <linux/stddef.h>
#include <asm/cputype.h>
#include <asm/io.h>
@@ -31,35 +33,43 @@
* is therefore used to delimit the MADT GICC structure minimum length
* appropriately.
*/
-#define ACPI_MADT_GICC_MIN_LENGTH ACPI_OFFSET( \
+#define ACPI_MADT_GICC_MIN_LENGTH offsetof( \
struct acpi_madt_generic_interrupt, efficiency_class)
#define BAD_MADT_GICC_ENTRY(entry, end) \
(!(entry) || (entry)->header.length < ACPI_MADT_GICC_MIN_LENGTH || \
(unsigned long)(entry) + (entry)->header.length > (end))
-#define ACPI_MADT_GICC_SPE (ACPI_OFFSET(struct acpi_madt_generic_interrupt, \
+#define ACPI_MADT_GICC_SPE (offsetof(struct acpi_madt_generic_interrupt, \
spe_interrupt) + sizeof(u16))
+#define ACPI_MADT_GICC_TRBE (offsetof(struct acpi_madt_generic_interrupt, \
+ trbe_interrupt) + sizeof(u16))
+/*
+ * Arm® Functional Fixed Hardware Specification Version 1.2.
+ * Table 2: Arm Architecture context loss flags
+ */
+#define CPUIDLE_CORE_CTXT BIT(0) /* Core context Lost */
+
+static inline unsigned int arch_get_idle_state_flags(u32 arch_flags)
+{
+ if (arch_flags & CPUIDLE_CORE_CTXT)
+ return CPUIDLE_FLAG_TIMER_STOP;
+
+ return 0;
+}
+#define arch_get_idle_state_flags arch_get_idle_state_flags
+
+#define CPUIDLE_TRACE_CTXT BIT(1) /* Trace context loss */
+#define CPUIDLE_GICR_CTXT BIT(2) /* GICR */
+#define CPUIDLE_GICD_CTXT BIT(3) /* GICD */
+
/* Basic configuration for ACPI */
#ifdef CONFIG_ACPI
pgprot_t __acpi_get_mem_attribute(phys_addr_t addr);
/* ACPI table mapping after acpi_permanent_mmap is set */
-static inline void __iomem *acpi_os_ioremap(acpi_physical_address phys,
- acpi_size size)
-{
- /* For normal memory we already have a cacheable mapping. */
- if (memblock_is_map_memory(phys))
- return (void __iomem *)__phys_to_virt(phys);
-
- /*
- * We should still honor the memory's attribute here because
- * crash dump kernel possibly excludes some ACPI (reclaim)
- * regions from memblock list.
- */
- return __ioremap(phys, size, __acpi_get_mem_attribute(phys));
-}
+void __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size);
#define acpi_os_ioremap acpi_os_ioremap
typedef u64 phys_cpuid_t;
diff --git a/arch/arm64/include/asm/alternative-macros.h b/arch/arm64/include/asm/alternative-macros.h
new file mode 100644
index 000000000000..210bb43cff2c
--- /dev/null
+++ b/arch/arm64/include/asm/alternative-macros.h
@@ -0,0 +1,264 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_ALTERNATIVE_MACROS_H
+#define __ASM_ALTERNATIVE_MACROS_H
+
+#include <linux/const.h>
+#include <vdso/bits.h>
+
+#include <asm/cpucaps.h>
+#include <asm/insn-def.h>
+
+/*
+ * Binutils 2.27.0 can't handle a 'UL' suffix on constants, so for the assembly
+ * macros below we must use we must use `(1 << ARM64_CB_SHIFT)`.
+ */
+#define ARM64_CB_SHIFT 15
+#define ARM64_CB_BIT BIT(ARM64_CB_SHIFT)
+
+#if ARM64_NCAPS >= ARM64_CB_BIT
+#error "cpucaps have overflown ARM64_CB_BIT"
+#endif
+
+#ifndef __ASSEMBLY__
+
+#include <linux/stringify.h>
+
+#define ALTINSTR_ENTRY(cpucap) \
+ " .word 661b - .\n" /* label */ \
+ " .word 663f - .\n" /* new instruction */ \
+ " .hword " __stringify(cpucap) "\n" /* cpucap */ \
+ " .byte 662b-661b\n" /* source len */ \
+ " .byte 664f-663f\n" /* replacement len */
+
+#define ALTINSTR_ENTRY_CB(cpucap, cb) \
+ " .word 661b - .\n" /* label */ \
+ " .word " __stringify(cb) "- .\n" /* callback */ \
+ " .hword " __stringify(cpucap) "\n" /* cpucap */ \
+ " .byte 662b-661b\n" /* source len */ \
+ " .byte 664f-663f\n" /* replacement len */
+
+/*
+ * alternative assembly primitive:
+ *
+ * If any of these .org directive fail, it means that insn1 and insn2
+ * don't have the same length. This used to be written as
+ *
+ * .if ((664b-663b) != (662b-661b))
+ * .error "Alternatives instruction length mismatch"
+ * .endif
+ *
+ * but most assemblers die if insn1 or insn2 have a .inst. This should
+ * be fixed in a binutils release posterior to 2.25.51.0.2 (anything
+ * containing commit 4e4d08cf7399b606 or c1baaddf8861).
+ *
+ * Alternatives with callbacks do not generate replacement instructions.
+ */
+#define __ALTERNATIVE_CFG(oldinstr, newinstr, cpucap, cfg_enabled) \
+ ".if "__stringify(cfg_enabled)" == 1\n" \
+ "661:\n\t" \
+ oldinstr "\n" \
+ "662:\n" \
+ ".pushsection .altinstructions,\"a\"\n" \
+ ALTINSTR_ENTRY(cpucap) \
+ ".popsection\n" \
+ ".subsection 1\n" \
+ "663:\n\t" \
+ newinstr "\n" \
+ "664:\n\t" \
+ ".org . - (664b-663b) + (662b-661b)\n\t" \
+ ".org . - (662b-661b) + (664b-663b)\n\t" \
+ ".previous\n" \
+ ".endif\n"
+
+#define __ALTERNATIVE_CFG_CB(oldinstr, cpucap, cfg_enabled, cb) \
+ ".if "__stringify(cfg_enabled)" == 1\n" \
+ "661:\n\t" \
+ oldinstr "\n" \
+ "662:\n" \
+ ".pushsection .altinstructions,\"a\"\n" \
+ ALTINSTR_ENTRY_CB(cpucap, cb) \
+ ".popsection\n" \
+ "663:\n\t" \
+ "664:\n\t" \
+ ".endif\n"
+
+#define _ALTERNATIVE_CFG(oldinstr, newinstr, cpucap, cfg, ...) \
+ __ALTERNATIVE_CFG(oldinstr, newinstr, cpucap, IS_ENABLED(cfg))
+
+#define ALTERNATIVE_CB(oldinstr, cpucap, cb) \
+ __ALTERNATIVE_CFG_CB(oldinstr, (1 << ARM64_CB_SHIFT) | (cpucap), 1, cb)
+#else
+
+#include <asm/assembler.h>
+
+.macro altinstruction_entry orig_offset alt_offset cpucap orig_len alt_len
+ .word \orig_offset - .
+ .word \alt_offset - .
+ .hword (\cpucap)
+ .byte \orig_len
+ .byte \alt_len
+.endm
+
+.macro alternative_insn insn1, insn2, cap, enable = 1
+ .if \enable
+661: \insn1
+662: .pushsection .altinstructions, "a"
+ altinstruction_entry 661b, 663f, \cap, 662b-661b, 664f-663f
+ .popsection
+ .subsection 1
+663: \insn2
+664: .org . - (664b-663b) + (662b-661b)
+ .org . - (662b-661b) + (664b-663b)
+ .previous
+ .endif
+.endm
+
+/*
+ * Alternative sequences
+ *
+ * The code for the case where the capability is not present will be
+ * assembled and linked as normal. There are no restrictions on this
+ * code.
+ *
+ * The code for the case where the capability is present will be
+ * assembled into a special section to be used for dynamic patching.
+ * Code for that case must:
+ *
+ * 1. Be exactly the same length (in bytes) as the default code
+ * sequence.
+ *
+ * 2. Not contain a branch target that is used outside of the
+ * alternative sequence it is defined in (branches into an
+ * alternative sequence are not fixed up).
+ */
+
+/*
+ * Begin an alternative code sequence.
+ */
+.macro alternative_if_not cap
+ .set .Lasm_alt_mode, 0
+ .pushsection .altinstructions, "a"
+ altinstruction_entry 661f, 663f, \cap, 662f-661f, 664f-663f
+ .popsection
+661:
+.endm
+
+.macro alternative_if cap
+ .set .Lasm_alt_mode, 1
+ .pushsection .altinstructions, "a"
+ altinstruction_entry 663f, 661f, \cap, 664f-663f, 662f-661f
+ .popsection
+ .subsection 1
+ .align 2 /* So GAS knows label 661 is suitably aligned */
+661:
+.endm
+
+.macro alternative_cb cap, cb
+ .set .Lasm_alt_mode, 0
+ .pushsection .altinstructions, "a"
+ altinstruction_entry 661f, \cb, (1 << ARM64_CB_SHIFT) | \cap, 662f-661f, 0
+ .popsection
+661:
+.endm
+
+/*
+ * Provide the other half of the alternative code sequence.
+ */
+.macro alternative_else
+662:
+ .if .Lasm_alt_mode==0
+ .subsection 1
+ .else
+ .previous
+ .endif
+663:
+.endm
+
+/*
+ * Complete an alternative code sequence.
+ */
+.macro alternative_endif
+664:
+ .org . - (664b-663b) + (662b-661b)
+ .org . - (662b-661b) + (664b-663b)
+ .if .Lasm_alt_mode==0
+ .previous
+ .endif
+.endm
+
+/*
+ * Callback-based alternative epilogue
+ */
+.macro alternative_cb_end
+662:
+.endm
+
+/*
+ * Provides a trivial alternative or default sequence consisting solely
+ * of NOPs. The number of NOPs is chosen automatically to match the
+ * previous case.
+ */
+.macro alternative_else_nop_endif
+alternative_else
+ nops (662b-661b) / AARCH64_INSN_SIZE
+alternative_endif
+.endm
+
+#define _ALTERNATIVE_CFG(insn1, insn2, cap, cfg, ...) \
+ alternative_insn insn1, insn2, cap, IS_ENABLED(cfg)
+
+#endif /* __ASSEMBLY__ */
+
+/*
+ * Usage: asm(ALTERNATIVE(oldinstr, newinstr, cpucap));
+ *
+ * Usage: asm(ALTERNATIVE(oldinstr, newinstr, cpucap, CONFIG_FOO));
+ * N.B. If CONFIG_FOO is specified, but not selected, the whole block
+ * will be omitted, including oldinstr.
+ */
+#define ALTERNATIVE(oldinstr, newinstr, ...) \
+ _ALTERNATIVE_CFG(oldinstr, newinstr, __VA_ARGS__, 1)
+
+#ifndef __ASSEMBLY__
+
+#include <linux/types.h>
+
+static __always_inline bool
+alternative_has_cap_likely(const unsigned long cpucap)
+{
+ if (!cpucap_is_possible(cpucap))
+ return false;
+
+ asm_volatile_goto(
+ ALTERNATIVE_CB("b %l[l_no]", %[cpucap], alt_cb_patch_nops)
+ :
+ : [cpucap] "i" (cpucap)
+ :
+ : l_no);
+
+ return true;
+l_no:
+ return false;
+}
+
+static __always_inline bool
+alternative_has_cap_unlikely(const unsigned long cpucap)
+{
+ if (!cpucap_is_possible(cpucap))
+ return false;
+
+ asm_volatile_goto(
+ ALTERNATIVE("nop", "b %l[l_yes]", %[cpucap])
+ :
+ : [cpucap] "i" (cpucap)
+ :
+ : l_yes);
+
+ return false;
+l_yes:
+ return true;
+}
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* __ASM_ALTERNATIVE_MACROS_H */
diff --git a/arch/arm64/include/asm/alternative.h b/arch/arm64/include/asm/alternative.h
index b9f8d787eea9..00d97b8a757f 100644
--- a/arch/arm64/include/asm/alternative.h
+++ b/arch/arm64/include/asm/alternative.h
@@ -2,22 +2,18 @@
#ifndef __ASM_ALTERNATIVE_H
#define __ASM_ALTERNATIVE_H
-#include <asm/cpucaps.h>
-#include <asm/insn.h>
-
-#define ARM64_CB_PATCH ARM64_NCAPS
+#include <asm/alternative-macros.h>
#ifndef __ASSEMBLY__
#include <linux/init.h>
#include <linux/types.h>
#include <linux/stddef.h>
-#include <linux/stringify.h>
struct alt_instr {
s32 orig_offset; /* offset to original instruction */
s32 alt_offset; /* offset to replacement instruction */
- u16 cpufeature; /* cpufeature bit set for replacement */
+ u16 cpucap; /* cpucap bit set for replacement */
u8 orig_len; /* size of original instruction(s) */
u8 alt_len; /* size of new instruction(s), <= orig_len */
};
@@ -27,7 +23,7 @@ typedef void (*alternative_cb_t)(struct alt_instr *alt,
void __init apply_boot_alternatives(void);
void __init apply_alternatives_all(void);
-bool alternative_is_applied(u16 cpufeature);
+bool alternative_is_applied(u16 cpucap);
#ifdef CONFIG_MODULES
void apply_alternatives_module(void *start, size_t length);
@@ -35,254 +31,8 @@ void apply_alternatives_module(void *start, size_t length);
static inline void apply_alternatives_module(void *start, size_t length) { }
#endif
-#define ALTINSTR_ENTRY(feature,cb) \
- " .word 661b - .\n" /* label */ \
- " .if " __stringify(cb) " == 0\n" \
- " .word 663f - .\n" /* new instruction */ \
- " .else\n" \
- " .word " __stringify(cb) "- .\n" /* callback */ \
- " .endif\n" \
- " .hword " __stringify(feature) "\n" /* feature bit */ \
- " .byte 662b-661b\n" /* source len */ \
- " .byte 664f-663f\n" /* replacement len */
-
-/*
- * alternative assembly primitive:
- *
- * If any of these .org directive fail, it means that insn1 and insn2
- * don't have the same length. This used to be written as
- *
- * .if ((664b-663b) != (662b-661b))
- * .error "Alternatives instruction length mismatch"
- * .endif
- *
- * but most assemblers die if insn1 or insn2 have a .inst. This should
- * be fixed in a binutils release posterior to 2.25.51.0.2 (anything
- * containing commit 4e4d08cf7399b606 or c1baaddf8861).
- *
- * Alternatives with callbacks do not generate replacement instructions.
- */
-#define __ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg_enabled, cb) \
- ".if "__stringify(cfg_enabled)" == 1\n" \
- "661:\n\t" \
- oldinstr "\n" \
- "662:\n" \
- ".pushsection .altinstructions,\"a\"\n" \
- ALTINSTR_ENTRY(feature,cb) \
- ".popsection\n" \
- " .if " __stringify(cb) " == 0\n" \
- ".pushsection .altinstr_replacement, \"a\"\n" \
- "663:\n\t" \
- newinstr "\n" \
- "664:\n\t" \
- ".popsection\n\t" \
- ".org . - (664b-663b) + (662b-661b)\n\t" \
- ".org . - (662b-661b) + (664b-663b)\n" \
- ".else\n\t" \
- "663:\n\t" \
- "664:\n\t" \
- ".endif\n" \
- ".endif\n"
-
-#define _ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg, ...) \
- __ALTERNATIVE_CFG(oldinstr, newinstr, feature, IS_ENABLED(cfg), 0)
-
-#define ALTERNATIVE_CB(oldinstr, cb) \
- __ALTERNATIVE_CFG(oldinstr, "NOT_AN_INSTRUCTION", ARM64_CB_PATCH, 1, cb)
-#else
-
-#include <asm/assembler.h>
-
-.macro altinstruction_entry orig_offset alt_offset feature orig_len alt_len
- .word \orig_offset - .
- .word \alt_offset - .
- .hword \feature
- .byte \orig_len
- .byte \alt_len
-.endm
-
-.macro alternative_insn insn1, insn2, cap, enable = 1
- .if \enable
-661: \insn1
-662: .pushsection .altinstructions, "a"
- altinstruction_entry 661b, 663f, \cap, 662b-661b, 664f-663f
- .popsection
- .pushsection .altinstr_replacement, "ax"
-663: \insn2
-664: .popsection
- .org . - (664b-663b) + (662b-661b)
- .org . - (662b-661b) + (664b-663b)
- .endif
-.endm
-
-/*
- * Alternative sequences
- *
- * The code for the case where the capability is not present will be
- * assembled and linked as normal. There are no restrictions on this
- * code.
- *
- * The code for the case where the capability is present will be
- * assembled into a special section to be used for dynamic patching.
- * Code for that case must:
- *
- * 1. Be exactly the same length (in bytes) as the default code
- * sequence.
- *
- * 2. Not contain a branch target that is used outside of the
- * alternative sequence it is defined in (branches into an
- * alternative sequence are not fixed up).
- */
-
-/*
- * Begin an alternative code sequence.
- */
-.macro alternative_if_not cap
- .set .Lasm_alt_mode, 0
- .pushsection .altinstructions, "a"
- altinstruction_entry 661f, 663f, \cap, 662f-661f, 664f-663f
- .popsection
-661:
-.endm
-
-.macro alternative_if cap
- .set .Lasm_alt_mode, 1
- .pushsection .altinstructions, "a"
- altinstruction_entry 663f, 661f, \cap, 664f-663f, 662f-661f
- .popsection
- .pushsection .altinstr_replacement, "ax"
- .align 2 /* So GAS knows label 661 is suitably aligned */
-661:
-.endm
-
-.macro alternative_cb cb
- .set .Lasm_alt_mode, 0
- .pushsection .altinstructions, "a"
- altinstruction_entry 661f, \cb, ARM64_CB_PATCH, 662f-661f, 0
- .popsection
-661:
-.endm
-
-/*
- * Provide the other half of the alternative code sequence.
- */
-.macro alternative_else
-662:
- .if .Lasm_alt_mode==0
- .pushsection .altinstr_replacement, "ax"
- .else
- .popsection
- .endif
-663:
-.endm
-
-/*
- * Complete an alternative code sequence.
- */
-.macro alternative_endif
-664:
- .if .Lasm_alt_mode==0
- .popsection
- .endif
- .org . - (664b-663b) + (662b-661b)
- .org . - (662b-661b) + (664b-663b)
-.endm
-
-/*
- * Callback-based alternative epilogue
- */
-.macro alternative_cb_end
-662:
-.endm
-
-/*
- * Provides a trivial alternative or default sequence consisting solely
- * of NOPs. The number of NOPs is chosen automatically to match the
- * previous case.
- */
-.macro alternative_else_nop_endif
-alternative_else
- nops (662b-661b) / AARCH64_INSN_SIZE
-alternative_endif
-.endm
-
-#define _ALTERNATIVE_CFG(insn1, insn2, cap, cfg, ...) \
- alternative_insn insn1, insn2, cap, IS_ENABLED(cfg)
-
-.macro user_alt, label, oldinstr, newinstr, cond
-9999: alternative_insn "\oldinstr", "\newinstr", \cond
- _ASM_EXTABLE 9999b, \label
-.endm
-
-/*
- * Generate the assembly for UAO alternatives with exception table entries.
- * This is complicated as there is no post-increment or pair versions of the
- * unprivileged instructions, and USER() only works for single instructions.
- */
-#ifdef CONFIG_ARM64_UAO
- .macro uao_ldp l, reg1, reg2, addr, post_inc
- alternative_if_not ARM64_HAS_UAO
-8888: ldp \reg1, \reg2, [\addr], \post_inc;
-8889: nop;
- nop;
- alternative_else
- ldtr \reg1, [\addr];
- ldtr \reg2, [\addr, #8];
- add \addr, \addr, \post_inc;
- alternative_endif
-
- _asm_extable 8888b,\l;
- _asm_extable 8889b,\l;
- .endm
-
- .macro uao_stp l, reg1, reg2, addr, post_inc
- alternative_if_not ARM64_HAS_UAO
-8888: stp \reg1, \reg2, [\addr], \post_inc;
-8889: nop;
- nop;
- alternative_else
- sttr \reg1, [\addr];
- sttr \reg2, [\addr, #8];
- add \addr, \addr, \post_inc;
- alternative_endif
-
- _asm_extable 8888b,\l;
- _asm_extable 8889b,\l;
- .endm
-
- .macro uao_user_alternative l, inst, alt_inst, reg, addr, post_inc
- alternative_if_not ARM64_HAS_UAO
-8888: \inst \reg, [\addr], \post_inc;
- nop;
- alternative_else
- \alt_inst \reg, [\addr];
- add \addr, \addr, \post_inc;
- alternative_endif
-
- _asm_extable 8888b,\l;
- .endm
-#else
- .macro uao_ldp l, reg1, reg2, addr, post_inc
- USER(\l, ldp \reg1, \reg2, [\addr], \post_inc)
- .endm
- .macro uao_stp l, reg1, reg2, addr, post_inc
- USER(\l, stp \reg1, \reg2, [\addr], \post_inc)
- .endm
- .macro uao_user_alternative l, inst, alt_inst, reg, addr, post_inc
- USER(\l, \inst \reg, [\addr], \post_inc)
- .endm
-#endif
-
-#endif /* __ASSEMBLY__ */
-
-/*
- * Usage: asm(ALTERNATIVE(oldinstr, newinstr, feature));
- *
- * Usage: asm(ALTERNATIVE(oldinstr, newinstr, feature, CONFIG_FOO));
- * N.B. If CONFIG_FOO is specified, but not selected, the whole block
- * will be omitted, including oldinstr.
- */
-#define ALTERNATIVE(oldinstr, newinstr, ...) \
- _ALTERNATIVE_CFG(oldinstr, newinstr, __VA_ARGS__, 1)
+void alt_cb_patch_nops(struct alt_instr *alt, __le32 *origptr,
+ __le32 *updptr, int nr_inst);
+#endif /* __ASSEMBLY__ */
#endif /* __ASM_ALTERNATIVE_H */
diff --git a/arch/arm64/include/asm/apple_m1_pmu.h b/arch/arm64/include/asm/apple_m1_pmu.h
new file mode 100644
index 000000000000..99483b19b99f
--- /dev/null
+++ b/arch/arm64/include/asm/apple_m1_pmu.h
@@ -0,0 +1,64 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#ifndef __ASM_APPLE_M1_PMU_h
+#define __ASM_APPLE_M1_PMU_h
+
+#include <linux/bits.h>
+#include <asm/sysreg.h>
+
+/* Counters */
+#define SYS_IMP_APL_PMC0_EL1 sys_reg(3, 2, 15, 0, 0)
+#define SYS_IMP_APL_PMC1_EL1 sys_reg(3, 2, 15, 1, 0)
+#define SYS_IMP_APL_PMC2_EL1 sys_reg(3, 2, 15, 2, 0)
+#define SYS_IMP_APL_PMC3_EL1 sys_reg(3, 2, 15, 3, 0)
+#define SYS_IMP_APL_PMC4_EL1 sys_reg(3, 2, 15, 4, 0)
+#define SYS_IMP_APL_PMC5_EL1 sys_reg(3, 2, 15, 5, 0)
+#define SYS_IMP_APL_PMC6_EL1 sys_reg(3, 2, 15, 6, 0)
+#define SYS_IMP_APL_PMC7_EL1 sys_reg(3, 2, 15, 7, 0)
+#define SYS_IMP_APL_PMC8_EL1 sys_reg(3, 2, 15, 9, 0)
+#define SYS_IMP_APL_PMC9_EL1 sys_reg(3, 2, 15, 10, 0)
+
+/* Core PMC control register */
+#define SYS_IMP_APL_PMCR0_EL1 sys_reg(3, 1, 15, 0, 0)
+#define PMCR0_CNT_ENABLE_0_7 GENMASK(7, 0)
+#define PMCR0_IMODE GENMASK(10, 8)
+#define PMCR0_IMODE_OFF 0
+#define PMCR0_IMODE_PMI 1
+#define PMCR0_IMODE_AIC 2
+#define PMCR0_IMODE_HALT 3
+#define PMCR0_IMODE_FIQ 4
+#define PMCR0_IACT BIT(11)
+#define PMCR0_PMI_ENABLE_0_7 GENMASK(19, 12)
+#define PMCR0_STOP_CNT_ON_PMI BIT(20)
+#define PMCR0_CNT_GLOB_L2C_EVT BIT(21)
+#define PMCR0_DEFER_PMI_TO_ERET BIT(22)
+#define PMCR0_ALLOW_CNT_EN_EL0 BIT(30)
+#define PMCR0_CNT_ENABLE_8_9 GENMASK(33, 32)
+#define PMCR0_PMI_ENABLE_8_9 GENMASK(45, 44)
+
+#define SYS_IMP_APL_PMCR1_EL1 sys_reg(3, 1, 15, 1, 0)
+#define PMCR1_COUNT_A64_EL0_0_7 GENMASK(15, 8)
+#define PMCR1_COUNT_A64_EL1_0_7 GENMASK(23, 16)
+#define PMCR1_COUNT_A64_EL0_8_9 GENMASK(41, 40)
+#define PMCR1_COUNT_A64_EL1_8_9 GENMASK(49, 48)
+
+#define SYS_IMP_APL_PMCR2_EL1 sys_reg(3, 1, 15, 2, 0)
+#define SYS_IMP_APL_PMCR3_EL1 sys_reg(3, 1, 15, 3, 0)
+#define SYS_IMP_APL_PMCR4_EL1 sys_reg(3, 1, 15, 4, 0)
+
+#define SYS_IMP_APL_PMESR0_EL1 sys_reg(3, 1, 15, 5, 0)
+#define PMESR0_EVT_CNT_2 GENMASK(7, 0)
+#define PMESR0_EVT_CNT_3 GENMASK(15, 8)
+#define PMESR0_EVT_CNT_4 GENMASK(23, 16)
+#define PMESR0_EVT_CNT_5 GENMASK(31, 24)
+
+#define SYS_IMP_APL_PMESR1_EL1 sys_reg(3, 1, 15, 6, 0)
+#define PMESR1_EVT_CNT_6 GENMASK(7, 0)
+#define PMESR1_EVT_CNT_7 GENMASK(15, 8)
+#define PMESR1_EVT_CNT_8 GENMASK(23, 16)
+#define PMESR1_EVT_CNT_9 GENMASK(31, 24)
+
+#define SYS_IMP_APL_PMSR_EL1 sys_reg(3, 1, 15, 13, 0)
+#define PMSR_OVERFLOW GENMASK(9, 0)
+
+#endif /* __ASM_APPLE_M1_PMU_h */
diff --git a/arch/arm64/include/asm/arch_gicv3.h b/arch/arm64/include/asm/arch_gicv3.h
index 89e4c8b79349..5f172611654b 100644
--- a/arch/arm64/include/asm/arch_gicv3.h
+++ b/arch/arm64/include/asm/arch_gicv3.h
@@ -26,13 +26,7 @@
* sets the GP register's most significant bits to 0 with an explicit cast.
*/
-static inline void gic_write_eoir(u32 irq)
-{
- write_sysreg_s(irq, SYS_ICC_EOIR1_EL1);
- isb();
-}
-
-static inline void gic_write_dir(u32 irq)
+static __always_inline void gic_write_dir(u32 irq)
{
write_sysreg_s(irq, SYS_ICC_DIR_EL1);
isb();
@@ -53,17 +47,44 @@ static inline u64 gic_read_iar_common(void)
* The gicv3 of ThunderX requires a modified version for reading the
* IAR status to ensure data synchronization (access to icc_iar1_el1
* is not sync'ed before and after).
+ *
+ * Erratum 38545
+ *
+ * When a IAR register read races with a GIC interrupt RELEASE event,
+ * GIC-CPU interface could wrongly return a valid INTID to the CPU
+ * for an interrupt that is already released(non activated) instead of 0x3ff.
+ *
+ * To workaround this, return a valid interrupt ID only if there is a change
+ * in the active priority list after the IAR read.
+ *
+ * Common function used for both the workarounds since,
+ * 1. On Thunderx 88xx 1.x both erratas are applicable.
+ * 2. Having extra nops doesn't add any side effects for Silicons where
+ * erratum 23154 is not applicable.
*/
static inline u64 gic_read_iar_cavium_thunderx(void)
{
- u64 irqstat;
+ u64 irqstat, apr;
+ apr = read_sysreg_s(SYS_ICC_AP1R0_EL1);
nops(8);
irqstat = read_sysreg_s(SYS_ICC_IAR1_EL1);
nops(4);
mb();
- return irqstat;
+ /* Max priority groups implemented is only 32 */
+ if (likely(apr != read_sysreg_s(SYS_ICC_AP1R0_EL1)))
+ return irqstat;
+
+ return 0x3ff;
+}
+
+static u64 __maybe_unused gic_read_iar(void)
+{
+ if (alternative_has_cap_unlikely(ARM64_WORKAROUND_CAVIUM_23154))
+ return gic_read_iar_cavium_thunderx();
+ else
+ return gic_read_iar_common();
}
static inline void gic_write_ctlr(u32 val)
@@ -109,7 +130,7 @@ static inline u32 gic_read_pmr(void)
return read_sysreg_s(SYS_ICC_PMR_EL1);
}
-static inline void gic_write_pmr(u32 val)
+static __always_inline void gic_write_pmr(u32 val)
{
write_sysreg_s(val, SYS_ICC_PMR_EL1);
}
@@ -124,7 +145,8 @@ static inline u32 gic_read_rpr(void)
#define gic_read_lpir(c) readq_relaxed(c)
#define gic_write_lpir(v, c) writeq_relaxed(v, c)
-#define gic_flush_dcache_to_poc(a,l) __flush_dcache_area((a), (l))
+#define gic_flush_dcache_to_poc(a,l) \
+ dcache_clean_inval_poc((unsigned long)(a), (unsigned long)(a)+(l))
#define gits_read_baser(c) readq_relaxed(c)
#define gits_write_baser(v, c) writeq_relaxed(v, c)
@@ -140,10 +162,11 @@ static inline u32 gic_read_rpr(void)
#define gicr_write_pendbaser(v, c) writeq_relaxed(v, c)
#define gicr_read_pendbaser(c) readq_relaxed(c)
-#define gits_write_vpropbaser(v, c) writeq_relaxed(v, c)
+#define gicr_write_vpropbaser(v, c) writeq_relaxed(v, c)
+#define gicr_read_vpropbaser(c) readq_relaxed(c)
-#define gits_write_vpendbaser(v, c) writeq_relaxed(v, c)
-#define gits_read_vpendbaser(c) readq_relaxed(c)
+#define gicr_write_vpendbaser(v, c) writeq_relaxed(v, c)
+#define gicr_read_vpendbaser(c) readq_relaxed(c)
static inline bool gic_prio_masking_enabled(void)
{
@@ -152,7 +175,7 @@ static inline bool gic_prio_masking_enabled(void)
static inline void gic_pmr_mask_irqs(void)
{
- BUILD_BUG_ON(GICD_INT_DEF_PRI < (GIC_PRIO_IRQOFF |
+ BUILD_BUG_ON(GICD_INT_DEF_PRI < (__GIC_PRIO_IRQOFF |
GIC_PRIO_PSR_I_SET));
BUILD_BUG_ON(GICD_INT_DEF_PRI >= GIC_PRIO_IRQON);
/*
@@ -161,12 +184,23 @@ static inline void gic_pmr_mask_irqs(void)
* are applied to IRQ priorities
*/
BUILD_BUG_ON((0x80 | (GICD_INT_DEF_PRI >> 1)) >= GIC_PRIO_IRQON);
+ /*
+ * Same situation as above, but now we make sure that we can mask
+ * regular interrupts.
+ */
+ BUILD_BUG_ON((0x80 | (GICD_INT_DEF_PRI >> 1)) < (__GIC_PRIO_IRQOFF_NS |
+ GIC_PRIO_PSR_I_SET));
gic_write_pmr(GIC_PRIO_IRQOFF);
}
static inline void gic_arch_enable_irqs(void)
{
- asm volatile ("msr daifclr, #2" : : : "memory");
+ asm volatile ("msr daifclr, #3" : : : "memory");
+}
+
+static inline bool gic_has_relaxed_pmr_sync(void)
+{
+ return cpus_have_cap(ARM64_HAS_GIC_PRIO_RELAXED_SYNC);
}
#endif /* __ASSEMBLY__ */
diff --git a/arch/arm64/include/asm/arch_timer.h b/arch/arm64/include/asm/arch_timer.h
index 7ae54d7d333a..934c658ee947 100644
--- a/arch/arm64/include/asm/arch_timer.h
+++ b/arch/arm64/include/asm/arch_timer.h
@@ -32,7 +32,7 @@
({ \
const struct arch_timer_erratum_workaround *__wa; \
__wa = __this_cpu_read(timer_unstable_counter_workaround); \
- (__wa && __wa->h) ? __wa->h : arch_timer_##h; \
+ (__wa && __wa->h) ? ({ isb(); __wa->h;}) : arch_timer_##h; \
})
#else
@@ -52,47 +52,43 @@ struct arch_timer_erratum_workaround {
enum arch_timer_erratum_match_type match_type;
const void *id;
const char *desc;
- u32 (*read_cntp_tval_el0)(void);
- u32 (*read_cntv_tval_el0)(void);
u64 (*read_cntpct_el0)(void);
u64 (*read_cntvct_el0)(void);
int (*set_next_event_phys)(unsigned long, struct clock_event_device *);
int (*set_next_event_virt)(unsigned long, struct clock_event_device *);
+ bool disable_compat_vdso;
};
DECLARE_PER_CPU(const struct arch_timer_erratum_workaround *,
timer_unstable_counter_workaround);
-/* inline sysreg accessors that make erratum_handler() work */
-static inline notrace u32 arch_timer_read_cntp_tval_el0(void)
+static inline notrace u64 arch_timer_read_cntpct_el0(void)
{
- return read_sysreg(cntp_tval_el0);
-}
+ u64 cnt;
-static inline notrace u32 arch_timer_read_cntv_tval_el0(void)
-{
- return read_sysreg(cntv_tval_el0);
-}
+ asm volatile(ALTERNATIVE("isb\n mrs %0, cntpct_el0",
+ "nop\n" __mrs_s("%0", SYS_CNTPCTSS_EL0),
+ ARM64_HAS_ECV)
+ : "=r" (cnt));
-static inline notrace u64 arch_timer_read_cntpct_el0(void)
-{
- return read_sysreg(cntpct_el0);
+ return cnt;
}
static inline notrace u64 arch_timer_read_cntvct_el0(void)
{
- return read_sysreg(cntvct_el0);
+ u64 cnt;
+
+ asm volatile(ALTERNATIVE("isb\n mrs %0, cntvct_el0",
+ "nop\n" __mrs_s("%0", SYS_CNTVCTSS_EL0),
+ ARM64_HAS_ECV)
+ : "=r" (cnt));
+
+ return cnt;
}
#define arch_timer_reg_read_stable(reg) \
({ \
- u64 _val; \
- \
- preempt_disable_notrace(); \
- _val = erratum_handler(read_ ## reg)(); \
- preempt_enable_notrace(); \
- \
- _val; \
+ erratum_handler(read_ ## reg)(); \
})
/*
@@ -101,51 +97,58 @@ static inline notrace u64 arch_timer_read_cntvct_el0(void)
* the code.
*/
static __always_inline
-void arch_timer_reg_write_cp15(int access, enum arch_timer_reg reg, u32 val)
+void arch_timer_reg_write_cp15(int access, enum arch_timer_reg reg, u64 val)
{
if (access == ARCH_TIMER_PHYS_ACCESS) {
switch (reg) {
case ARCH_TIMER_REG_CTRL:
write_sysreg(val, cntp_ctl_el0);
+ isb();
break;
- case ARCH_TIMER_REG_TVAL:
- write_sysreg(val, cntp_tval_el0);
+ case ARCH_TIMER_REG_CVAL:
+ write_sysreg(val, cntp_cval_el0);
break;
+ default:
+ BUILD_BUG();
}
} else if (access == ARCH_TIMER_VIRT_ACCESS) {
switch (reg) {
case ARCH_TIMER_REG_CTRL:
write_sysreg(val, cntv_ctl_el0);
+ isb();
break;
- case ARCH_TIMER_REG_TVAL:
- write_sysreg(val, cntv_tval_el0);
+ case ARCH_TIMER_REG_CVAL:
+ write_sysreg(val, cntv_cval_el0);
break;
+ default:
+ BUILD_BUG();
}
+ } else {
+ BUILD_BUG();
}
-
- isb();
}
static __always_inline
-u32 arch_timer_reg_read_cp15(int access, enum arch_timer_reg reg)
+u64 arch_timer_reg_read_cp15(int access, enum arch_timer_reg reg)
{
if (access == ARCH_TIMER_PHYS_ACCESS) {
switch (reg) {
case ARCH_TIMER_REG_CTRL:
return read_sysreg(cntp_ctl_el0);
- case ARCH_TIMER_REG_TVAL:
- return arch_timer_reg_read_stable(cntp_tval_el0);
+ default:
+ BUILD_BUG();
}
} else if (access == ARCH_TIMER_VIRT_ACCESS) {
switch (reg) {
case ARCH_TIMER_REG_CTRL:
return read_sysreg(cntv_ctl_el0);
- case ARCH_TIMER_REG_TVAL:
- return arch_timer_reg_read_stable(cntv_tval_el0);
+ default:
+ BUILD_BUG();
}
}
- BUG();
+ BUILD_BUG();
+ unreachable();
}
static inline u32 arch_timer_get_cntfrq(void)
@@ -164,30 +167,10 @@ static inline void arch_timer_set_cntkctl(u32 cntkctl)
isb();
}
-/*
- * Ensure that reads of the counter are treated the same as memory reads
- * for the purposes of ordering by subsequent memory barriers.
- *
- * This insanity brought to you by speculative system register reads,
- * out-of-order memory accesses, sequence locks and Thomas Gleixner.
- *
- * http://lists.infradead.org/pipermail/linux-arm-kernel/2019-February/631195.html
- */
-#define arch_counter_enforce_ordering(val) do { \
- u64 tmp, _val = (val); \
- \
- asm volatile( \
- " eor %0, %1, %1\n" \
- " add %0, sp, %0\n" \
- " ldr xzr, [%0]" \
- : "=r" (tmp) : "r" (_val)); \
-} while (0)
-
static __always_inline u64 __arch_counter_get_cntpct_stable(void)
{
u64 cnt;
- isb();
cnt = arch_timer_reg_read_stable(cntpct_el0);
arch_counter_enforce_ordering(cnt);
return cnt;
@@ -197,8 +180,10 @@ static __always_inline u64 __arch_counter_get_cntpct(void)
{
u64 cnt;
- isb();
- cnt = read_sysreg(cntpct_el0);
+ asm volatile(ALTERNATIVE("isb\n mrs %0, cntpct_el0",
+ "nop\n" __mrs_s("%0", SYS_CNTPCTSS_EL0),
+ ARM64_HAS_ECV)
+ : "=r" (cnt));
arch_counter_enforce_ordering(cnt);
return cnt;
}
@@ -207,7 +192,6 @@ static __always_inline u64 __arch_counter_get_cntvct_stable(void)
{
u64 cnt;
- isb();
cnt = arch_timer_reg_read_stable(cntvct_el0);
arch_counter_enforce_ordering(cnt);
return cnt;
@@ -217,14 +201,14 @@ static __always_inline u64 __arch_counter_get_cntvct(void)
{
u64 cnt;
- isb();
- cnt = read_sysreg(cntvct_el0);
+ asm volatile(ALTERNATIVE("isb\n mrs %0, cntvct_el0",
+ "nop\n" __mrs_s("%0", SYS_CNTVCTSS_EL0),
+ ARM64_HAS_ECV)
+ : "=r" (cnt));
arch_counter_enforce_ordering(cnt);
return cnt;
}
-#undef arch_counter_enforce_ordering
-
static inline int arch_timer_arch_init(void)
{
return 0;
diff --git a/arch/arm64/include/asm/archrandom.h b/arch/arm64/include/asm/archrandom.h
new file mode 100644
index 000000000000..ecdb3cfcd0f8
--- /dev/null
+++ b/arch/arm64/include/asm/archrandom.h
@@ -0,0 +1,134 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_ARCHRANDOM_H
+#define _ASM_ARCHRANDOM_H
+
+#include <linux/arm-smccc.h>
+#include <linux/bug.h>
+#include <linux/kernel.h>
+#include <linux/irqflags.h>
+#include <asm/cpufeature.h>
+
+#define ARM_SMCCC_TRNG_MIN_VERSION 0x10000UL
+
+extern bool smccc_trng_available;
+
+static inline bool __init smccc_probe_trng(void)
+{
+ struct arm_smccc_res res;
+
+ arm_smccc_1_1_invoke(ARM_SMCCC_TRNG_VERSION, &res);
+ if ((s32)res.a0 < 0)
+ return false;
+
+ return res.a0 >= ARM_SMCCC_TRNG_MIN_VERSION;
+}
+
+static inline bool __arm64_rndr(unsigned long *v)
+{
+ bool ok;
+
+ /*
+ * Reads of RNDR set PSTATE.NZCV to 0b0000 on success,
+ * and set PSTATE.NZCV to 0b0100 otherwise.
+ */
+ asm volatile(
+ __mrs_s("%0", SYS_RNDR_EL0) "\n"
+ " cset %w1, ne\n"
+ : "=r" (*v), "=r" (ok)
+ :
+ : "cc");
+
+ return ok;
+}
+
+static inline bool __arm64_rndrrs(unsigned long *v)
+{
+ bool ok;
+
+ /*
+ * Reads of RNDRRS set PSTATE.NZCV to 0b0000 on success,
+ * and set PSTATE.NZCV to 0b0100 otherwise.
+ */
+ asm volatile(
+ __mrs_s("%0", SYS_RNDRRS_EL0) "\n"
+ " cset %w1, ne\n"
+ : "=r" (*v), "=r" (ok)
+ :
+ : "cc");
+
+ return ok;
+}
+
+static __always_inline bool __cpu_has_rng(void)
+{
+ if (unlikely(!system_capabilities_finalized() && !preemptible()))
+ return this_cpu_has_cap(ARM64_HAS_RNG);
+ return alternative_has_cap_unlikely(ARM64_HAS_RNG);
+}
+
+static inline size_t __must_check arch_get_random_longs(unsigned long *v, size_t max_longs)
+{
+ /*
+ * Only support the generic interface after we have detected
+ * the system wide capability, avoiding complexity with the
+ * cpufeature code and with potential scheduling between CPUs
+ * with and without the feature.
+ */
+ if (max_longs && __cpu_has_rng() && __arm64_rndr(v))
+ return 1;
+ return 0;
+}
+
+static inline size_t __must_check arch_get_random_seed_longs(unsigned long *v, size_t max_longs)
+{
+ if (!max_longs)
+ return 0;
+
+ /*
+ * We prefer the SMCCC call, since its semantics (return actual
+ * hardware backed entropy) is closer to the idea behind this
+ * function here than what even the RNDRSS register provides
+ * (the output of a pseudo RNG freshly seeded by a TRNG).
+ */
+ if (smccc_trng_available) {
+ struct arm_smccc_res res;
+
+ max_longs = min_t(size_t, 3, max_longs);
+ arm_smccc_1_1_invoke(ARM_SMCCC_TRNG_RND64, max_longs * 64, &res);
+ if ((int)res.a0 >= 0) {
+ switch (max_longs) {
+ case 3:
+ *v++ = res.a1;
+ fallthrough;
+ case 2:
+ *v++ = res.a2;
+ fallthrough;
+ case 1:
+ *v++ = res.a3;
+ break;
+ }
+ return max_longs;
+ }
+ }
+
+ /*
+ * RNDRRS is not backed by an entropy source but by a DRBG that is
+ * reseeded after each invocation. This is not a 100% fit but good
+ * enough to implement this API if no other entropy source exists.
+ */
+ if (__cpu_has_rng() && __arm64_rndrrs(v))
+ return 1;
+
+ return 0;
+}
+
+static inline bool __init __early_cpu_has_rndr(void)
+{
+ /* Open code as we run prior to the first call to cpufeature. */
+ unsigned long ftr = read_sysreg_s(SYS_ID_AA64ISAR0_EL1);
+ return (ftr >> ID_AA64ISAR0_EL1_RNDR_SHIFT) & 0xf;
+}
+
+u64 kaslr_early_init(void *fdt);
+
+#endif /* _ASM_ARCHRANDOM_H */
diff --git a/arch/arm64/include/asm/arm_pmuv3.h b/arch/arm64/include/asm/arm_pmuv3.h
new file mode 100644
index 000000000000..c27404fa4418
--- /dev/null
+++ b/arch/arm64/include/asm/arm_pmuv3.h
@@ -0,0 +1,140 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2012 ARM Ltd.
+ */
+
+#ifndef __ASM_PMUV3_H
+#define __ASM_PMUV3_H
+
+#include <linux/kvm_host.h>
+
+#include <asm/cpufeature.h>
+#include <asm/sysreg.h>
+
+#define RETURN_READ_PMEVCNTRN(n) \
+ return read_sysreg(pmevcntr##n##_el0)
+static inline unsigned long read_pmevcntrn(int n)
+{
+ PMEVN_SWITCH(n, RETURN_READ_PMEVCNTRN);
+ return 0;
+}
+
+#define WRITE_PMEVCNTRN(n) \
+ write_sysreg(val, pmevcntr##n##_el0)
+static inline void write_pmevcntrn(int n, unsigned long val)
+{
+ PMEVN_SWITCH(n, WRITE_PMEVCNTRN);
+}
+
+#define WRITE_PMEVTYPERN(n) \
+ write_sysreg(val, pmevtyper##n##_el0)
+static inline void write_pmevtypern(int n, unsigned long val)
+{
+ PMEVN_SWITCH(n, WRITE_PMEVTYPERN);
+}
+
+static inline unsigned long read_pmmir(void)
+{
+ return read_cpuid(PMMIR_EL1);
+}
+
+static inline u32 read_pmuver(void)
+{
+ u64 dfr0 = read_sysreg(id_aa64dfr0_el1);
+
+ return cpuid_feature_extract_unsigned_field(dfr0,
+ ID_AA64DFR0_EL1_PMUVer_SHIFT);
+}
+
+static inline void write_pmcr(u64 val)
+{
+ write_sysreg(val, pmcr_el0);
+}
+
+static inline u64 read_pmcr(void)
+{
+ return read_sysreg(pmcr_el0);
+}
+
+static inline void write_pmselr(u32 val)
+{
+ write_sysreg(val, pmselr_el0);
+}
+
+static inline void write_pmccntr(u64 val)
+{
+ write_sysreg(val, pmccntr_el0);
+}
+
+static inline u64 read_pmccntr(void)
+{
+ return read_sysreg(pmccntr_el0);
+}
+
+static inline void write_pmcntenset(u32 val)
+{
+ write_sysreg(val, pmcntenset_el0);
+}
+
+static inline void write_pmcntenclr(u32 val)
+{
+ write_sysreg(val, pmcntenclr_el0);
+}
+
+static inline void write_pmintenset(u32 val)
+{
+ write_sysreg(val, pmintenset_el1);
+}
+
+static inline void write_pmintenclr(u32 val)
+{
+ write_sysreg(val, pmintenclr_el1);
+}
+
+static inline void write_pmccfiltr(u64 val)
+{
+ write_sysreg(val, pmccfiltr_el0);
+}
+
+static inline void write_pmovsclr(u32 val)
+{
+ write_sysreg(val, pmovsclr_el0);
+}
+
+static inline u32 read_pmovsclr(void)
+{
+ return read_sysreg(pmovsclr_el0);
+}
+
+static inline void write_pmuserenr(u32 val)
+{
+ write_sysreg(val, pmuserenr_el0);
+}
+
+static inline u64 read_pmceid0(void)
+{
+ return read_sysreg(pmceid0_el0);
+}
+
+static inline u64 read_pmceid1(void)
+{
+ return read_sysreg(pmceid1_el0);
+}
+
+static inline bool pmuv3_implemented(int pmuver)
+{
+ return !(pmuver == ID_AA64DFR0_EL1_PMUVer_IMP_DEF ||
+ pmuver == ID_AA64DFR0_EL1_PMUVer_NI);
+}
+
+static inline bool is_pmuv3p4(int pmuver)
+{
+ return pmuver >= ID_AA64DFR0_EL1_PMUVer_V3P4;
+}
+
+static inline bool is_pmuv3p5(int pmuver)
+{
+ return pmuver >= ID_AA64DFR0_EL1_PMUVer_V3P5;
+}
+
+#endif
diff --git a/arch/arm64/include/asm/asm-bug.h b/arch/arm64/include/asm/asm-bug.h
index 03f52f84a4f3..c762038ba400 100644
--- a/arch/arm64/include/asm/asm-bug.h
+++ b/arch/arm64/include/asm/asm-bug.h
@@ -14,7 +14,7 @@
14472: .string file; \
.popsection; \
\
- .long 14472b - 14470b; \
+ .long 14472b - .; \
.short line;
#else
#define _BUGVERBOSE_LOCATION(file, line)
@@ -25,7 +25,7 @@
#define __BUG_ENTRY(flags) \
.pushsection __bug_table,"aw"; \
.align 2; \
- 14470: .long 14471f - 14470b; \
+ 14470: .long 14471f - .; \
_BUGVERBOSE_LOCATION(__FILE__, __LINE__) \
.short flags; \
.popsection; \
diff --git a/arch/arm64/include/asm/asm-extable.h b/arch/arm64/include/asm/asm-extable.h
new file mode 100644
index 000000000000..980d1dd8e1a3
--- /dev/null
+++ b/arch/arm64/include/asm/asm-extable.h
@@ -0,0 +1,126 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef __ASM_ASM_EXTABLE_H
+#define __ASM_ASM_EXTABLE_H
+
+#include <linux/bits.h>
+#include <asm/gpr-num.h>
+
+#define EX_TYPE_NONE 0
+#define EX_TYPE_BPF 1
+#define EX_TYPE_UACCESS_ERR_ZERO 2
+#define EX_TYPE_KACCESS_ERR_ZERO 3
+#define EX_TYPE_LOAD_UNALIGNED_ZEROPAD 4
+
+/* Data fields for EX_TYPE_UACCESS_ERR_ZERO */
+#define EX_DATA_REG_ERR_SHIFT 0
+#define EX_DATA_REG_ERR GENMASK(4, 0)
+#define EX_DATA_REG_ZERO_SHIFT 5
+#define EX_DATA_REG_ZERO GENMASK(9, 5)
+
+/* Data fields for EX_TYPE_LOAD_UNALIGNED_ZEROPAD */
+#define EX_DATA_REG_DATA_SHIFT 0
+#define EX_DATA_REG_DATA GENMASK(4, 0)
+#define EX_DATA_REG_ADDR_SHIFT 5
+#define EX_DATA_REG_ADDR GENMASK(9, 5)
+
+#ifdef __ASSEMBLY__
+
+#define __ASM_EXTABLE_RAW(insn, fixup, type, data) \
+ .pushsection __ex_table, "a"; \
+ .align 2; \
+ .long ((insn) - .); \
+ .long ((fixup) - .); \
+ .short (type); \
+ .short (data); \
+ .popsection;
+
+#define EX_DATA_REG(reg, gpr) \
+ (.L__gpr_num_##gpr << EX_DATA_REG_##reg##_SHIFT)
+
+#define _ASM_EXTABLE_UACCESS_ERR_ZERO(insn, fixup, err, zero) \
+ __ASM_EXTABLE_RAW(insn, fixup, \
+ EX_TYPE_UACCESS_ERR_ZERO, \
+ ( \
+ EX_DATA_REG(ERR, err) | \
+ EX_DATA_REG(ZERO, zero) \
+ ))
+
+#define _ASM_EXTABLE_UACCESS_ERR(insn, fixup, err) \
+ _ASM_EXTABLE_UACCESS_ERR_ZERO(insn, fixup, err, wzr)
+
+#define _ASM_EXTABLE_UACCESS(insn, fixup) \
+ _ASM_EXTABLE_UACCESS_ERR_ZERO(insn, fixup, wzr, wzr)
+
+/*
+ * Create an exception table entry for uaccess `insn`, which will branch to `fixup`
+ * when an unhandled fault is taken.
+ */
+ .macro _asm_extable_uaccess, insn, fixup
+ _ASM_EXTABLE_UACCESS(\insn, \fixup)
+ .endm
+
+/*
+ * Create an exception table entry for `insn` if `fixup` is provided. Otherwise
+ * do nothing.
+ */
+ .macro _cond_uaccess_extable, insn, fixup
+ .ifnc \fixup,
+ _asm_extable_uaccess \insn, \fixup
+ .endif
+ .endm
+
+#else /* __ASSEMBLY__ */
+
+#include <linux/stringify.h>
+
+#define __ASM_EXTABLE_RAW(insn, fixup, type, data) \
+ ".pushsection __ex_table, \"a\"\n" \
+ ".align 2\n" \
+ ".long ((" insn ") - .)\n" \
+ ".long ((" fixup ") - .)\n" \
+ ".short (" type ")\n" \
+ ".short (" data ")\n" \
+ ".popsection\n"
+
+#define EX_DATA_REG(reg, gpr) \
+ "((.L__gpr_num_" #gpr ") << " __stringify(EX_DATA_REG_##reg##_SHIFT) ")"
+
+#define _ASM_EXTABLE_UACCESS_ERR_ZERO(insn, fixup, err, zero) \
+ __DEFINE_ASM_GPR_NUMS \
+ __ASM_EXTABLE_RAW(#insn, #fixup, \
+ __stringify(EX_TYPE_UACCESS_ERR_ZERO), \
+ "(" \
+ EX_DATA_REG(ERR, err) " | " \
+ EX_DATA_REG(ZERO, zero) \
+ ")")
+
+#define _ASM_EXTABLE_KACCESS_ERR_ZERO(insn, fixup, err, zero) \
+ __DEFINE_ASM_GPR_NUMS \
+ __ASM_EXTABLE_RAW(#insn, #fixup, \
+ __stringify(EX_TYPE_KACCESS_ERR_ZERO), \
+ "(" \
+ EX_DATA_REG(ERR, err) " | " \
+ EX_DATA_REG(ZERO, zero) \
+ ")")
+
+#define _ASM_EXTABLE_UACCESS_ERR(insn, fixup, err) \
+ _ASM_EXTABLE_UACCESS_ERR_ZERO(insn, fixup, err, wzr)
+
+#define _ASM_EXTABLE_UACCESS(insn, fixup) \
+ _ASM_EXTABLE_UACCESS_ERR_ZERO(insn, fixup, wzr, wzr)
+
+#define _ASM_EXTABLE_KACCESS_ERR(insn, fixup, err) \
+ _ASM_EXTABLE_KACCESS_ERR_ZERO(insn, fixup, err, wzr)
+
+#define _ASM_EXTABLE_LOAD_UNALIGNED_ZEROPAD(insn, fixup, data, addr) \
+ __DEFINE_ASM_GPR_NUMS \
+ __ASM_EXTABLE_RAW(#insn, #fixup, \
+ __stringify(EX_TYPE_LOAD_UNALIGNED_ZEROPAD), \
+ "(" \
+ EX_DATA_REG(DATA, data) " | " \
+ EX_DATA_REG(ADDR, addr) \
+ ")")
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* __ASM_ASM_EXTABLE_H */
diff --git a/arch/arm64/include/asm/asm-prototypes.h b/arch/arm64/include/asm/asm-prototypes.h
index 1c9a3a0c5fa5..ec1d9655f885 100644
--- a/arch/arm64/include/asm/asm-prototypes.h
+++ b/arch/arm64/include/asm/asm-prototypes.h
@@ -23,4 +23,10 @@ long long __ashlti3(long long a, int b);
long long __ashrti3(long long a, int b);
long long __lshrti3(long long a, int b);
+/*
+ * This function uses a custom calling convention and cannot be called from C so
+ * this prototype is not entirely accurate.
+ */
+void __hwasan_tag_mismatch(unsigned long addr, unsigned long access_info);
+
#endif /* __ASM_PROTOTYPES_H */
diff --git a/arch/arm64/include/asm/asm-uaccess.h b/arch/arm64/include/asm/asm-uaccess.h
index f68a0e64482a..5b6efe8abeeb 100644
--- a/arch/arm64/include/asm/asm-uaccess.h
+++ b/arch/arm64/include/asm/asm-uaccess.h
@@ -2,11 +2,12 @@
#ifndef __ASM_ASM_UACCESS_H
#define __ASM_ASM_UACCESS_H
-#include <asm/alternative.h>
+#include <asm/alternative-macros.h>
+#include <asm/asm-extable.h>
+#include <asm/assembler.h>
#include <asm/kernel-pgtable.h>
#include <asm/mmu.h>
#include <asm/sysreg.h>
-#include <asm/assembler.h>
/*
* User access enabling/disabling macros.
@@ -15,10 +16,9 @@
.macro __uaccess_ttbr0_disable, tmp1
mrs \tmp1, ttbr1_el1 // swapper_pg_dir
bic \tmp1, \tmp1, #TTBR_ASID_MASK
- sub \tmp1, \tmp1, #RESERVED_TTBR0_SIZE // reserved_ttbr0 just before swapper_pg_dir
+ sub \tmp1, \tmp1, #RESERVED_SWAPPER_OFFSET // reserved_pg_dir
msr ttbr0_el1, \tmp1 // set reserved TTBR0_EL1
- isb
- add \tmp1, \tmp1, #RESERVED_TTBR0_SIZE
+ add \tmp1, \tmp1, #RESERVED_SWAPPER_OFFSET
msr ttbr1_el1, \tmp1 // set reserved ASID
isb
.endm
@@ -30,7 +30,6 @@
extr \tmp2, \tmp2, \tmp1, #48
ror \tmp2, \tmp2, #16
msr ttbr1_el1, \tmp2 // set the active ASID
- isb
msr ttbr0_el1, \tmp1 // set the non-PAN TTBR0_EL1
isb
.endm
@@ -58,4 +57,37 @@ alternative_else_nop_endif
.endm
#endif
+#define USER(l, x...) \
+9999: x; \
+ _asm_extable_uaccess 9999b, l
+
+/*
+ * Generate the assembly for LDTR/STTR with exception table entries.
+ * This is complicated as there is no post-increment or pair versions of the
+ * unprivileged instructions, and USER() only works for single instructions.
+ */
+ .macro user_ldp l, reg1, reg2, addr, post_inc
+8888: ldtr \reg1, [\addr];
+8889: ldtr \reg2, [\addr, #8];
+ add \addr, \addr, \post_inc;
+
+ _asm_extable_uaccess 8888b, \l;
+ _asm_extable_uaccess 8889b, \l;
+ .endm
+
+ .macro user_stp l, reg1, reg2, addr, post_inc
+8888: sttr \reg1, [\addr];
+8889: sttr \reg2, [\addr, #8];
+ add \addr, \addr, \post_inc;
+
+ _asm_extable_uaccess 8888b,\l;
+ _asm_extable_uaccess 8889b,\l;
+ .endm
+
+ .macro user_ldst l, inst, reg, addr, post_inc
+8888: \inst \reg, [\addr];
+ add \addr, \addr, \post_inc;
+
+ _asm_extable_uaccess 8888b, \l;
+ .endm
#endif
diff --git a/arch/arm64/include/asm/asm_pointer_auth.h b/arch/arm64/include/asm/asm_pointer_auth.h
new file mode 100644
index 000000000000..13ecc79854ee
--- /dev/null
+++ b/arch/arm64/include/asm/asm_pointer_auth.h
@@ -0,0 +1,92 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_ASM_POINTER_AUTH_H
+#define __ASM_ASM_POINTER_AUTH_H
+
+#include <asm/alternative.h>
+#include <asm/asm-offsets.h>
+#include <asm/cpufeature.h>
+#include <asm/sysreg.h>
+
+#ifdef CONFIG_ARM64_PTR_AUTH_KERNEL
+
+ .macro __ptrauth_keys_install_kernel_nosync tsk, tmp1, tmp2, tmp3
+ mov \tmp1, #THREAD_KEYS_KERNEL
+ add \tmp1, \tsk, \tmp1
+ ldp \tmp2, \tmp3, [\tmp1, #PTRAUTH_KERNEL_KEY_APIA]
+ msr_s SYS_APIAKEYLO_EL1, \tmp2
+ msr_s SYS_APIAKEYHI_EL1, \tmp3
+ .endm
+
+ .macro ptrauth_keys_install_kernel_nosync tsk, tmp1, tmp2, tmp3
+alternative_if ARM64_HAS_ADDRESS_AUTH
+ __ptrauth_keys_install_kernel_nosync \tsk, \tmp1, \tmp2, \tmp3
+alternative_else_nop_endif
+ .endm
+
+ .macro ptrauth_keys_install_kernel tsk, tmp1, tmp2, tmp3
+alternative_if ARM64_HAS_ADDRESS_AUTH
+ __ptrauth_keys_install_kernel_nosync \tsk, \tmp1, \tmp2, \tmp3
+ isb
+alternative_else_nop_endif
+ .endm
+
+#else /* CONFIG_ARM64_PTR_AUTH_KERNEL */
+
+ .macro __ptrauth_keys_install_kernel_nosync tsk, tmp1, tmp2, tmp3
+ .endm
+
+ .macro ptrauth_keys_install_kernel_nosync tsk, tmp1, tmp2, tmp3
+ .endm
+
+ .macro ptrauth_keys_install_kernel tsk, tmp1, tmp2, tmp3
+ .endm
+
+#endif /* CONFIG_ARM64_PTR_AUTH_KERNEL */
+
+#ifdef CONFIG_ARM64_PTR_AUTH
+/*
+ * thread.keys_user.ap* as offset exceeds the #imm offset range
+ * so use the base value of ldp as thread.keys_user and offset as
+ * thread.keys_user.ap*.
+ */
+ .macro __ptrauth_keys_install_user tsk, tmp1, tmp2, tmp3
+ mov \tmp1, #THREAD_KEYS_USER
+ add \tmp1, \tsk, \tmp1
+ ldp \tmp2, \tmp3, [\tmp1, #PTRAUTH_USER_KEY_APIA]
+ msr_s SYS_APIAKEYLO_EL1, \tmp2
+ msr_s SYS_APIAKEYHI_EL1, \tmp3
+ .endm
+
+ .macro __ptrauth_keys_init_cpu tsk, tmp1, tmp2, tmp3
+ mrs \tmp1, id_aa64isar1_el1
+ ubfx \tmp1, \tmp1, #ID_AA64ISAR1_EL1_APA_SHIFT, #8
+ mrs_s \tmp2, SYS_ID_AA64ISAR2_EL1
+ ubfx \tmp2, \tmp2, #ID_AA64ISAR2_EL1_APA3_SHIFT, #4
+ orr \tmp1, \tmp1, \tmp2
+ cbz \tmp1, .Lno_addr_auth\@
+ mov_q \tmp1, (SCTLR_ELx_ENIA | SCTLR_ELx_ENIB | \
+ SCTLR_ELx_ENDA | SCTLR_ELx_ENDB)
+ mrs \tmp2, sctlr_el1
+ orr \tmp2, \tmp2, \tmp1
+ msr sctlr_el1, \tmp2
+ __ptrauth_keys_install_kernel_nosync \tsk, \tmp1, \tmp2, \tmp3
+ isb
+.Lno_addr_auth\@:
+ .endm
+
+ .macro ptrauth_keys_init_cpu tsk, tmp1, tmp2, tmp3
+alternative_if_not ARM64_HAS_ADDRESS_AUTH
+ b .Lno_addr_auth\@
+alternative_else_nop_endif
+ __ptrauth_keys_init_cpu \tsk, \tmp1, \tmp2, \tmp3
+.Lno_addr_auth\@:
+ .endm
+
+#else /* !CONFIG_ARM64_PTR_AUTH */
+
+ .macro ptrauth_keys_install_user tsk, tmp1, tmp2, tmp3
+ .endm
+
+#endif /* CONFIG_ARM64_PTR_AUTH */
+
+#endif /* __ASM_ASM_POINTER_AUTH_H */
diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index b8cf7c85ffa2..513787e43329 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -12,8 +12,11 @@
#ifndef __ASM_ASSEMBLER_H
#define __ASM_ASSEMBLER_H
-#include <asm-generic/export.h>
+#include <linux/export.h>
+#include <asm/alternative.h>
+#include <asm/asm-bug.h>
+#include <asm/asm-extable.h>
#include <asm/asm-offsets.h>
#include <asm/cpufeature.h>
#include <asm/cputype.h>
@@ -23,10 +26,13 @@
#include <asm/ptrace.h>
#include <asm/thread_info.h>
- .macro save_and_disable_daif, flags
- mrs \flags, daif
- msr daifset, #0xf
- .endm
+ /*
+ * Provide a wxN alias for each wN register so what we can paste a xN
+ * reference after a 'w' to obtain the 32-bit version.
+ */
+ .irp n,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30
+ wx\n .req w\n
+ .endr
.macro disable_daif
msr daifset, #0xf
@@ -36,27 +42,12 @@
msr daifclr, #0xf
.endm
- .macro restore_daif, flags:req
- msr daif, \flags
- .endm
-
- /* Only on aarch64 pstate, PSR_D_BIT is different for aarch32 */
- .macro inherit_daif, pstate:req, tmp:req
- and \tmp, \pstate, #(PSR_D_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT)
- msr daif, \tmp
- .endm
-
- /* IRQ is the lowest priority flag, unconditionally unmask the rest. */
- .macro enable_da_f
- msr daifclr, #(8 | 4 | 1)
- .endm
-
/*
* Save/restore interrupts.
*/
.macro save_and_disable_irq, flags
mrs \flags, daif
- msr daifset, #2
+ msr daifset, #3
.endm
.macro restore_irq, flags
@@ -86,13 +77,6 @@
.endm
/*
- * SMP data memory barrier
- */
- .macro smp_dmb, opt
- dmb \opt
- .endm
-
-/*
* RAS Error Synchronization barrier
*/
.macro esb
@@ -111,6 +95,13 @@
.endm
/*
+ * Clear Branch History instruction
+ */
+ .macro clearbhb
+ hint #22
+ .endm
+
+/*
* Speculation barrier
*/
.macro sb
@@ -133,20 +124,6 @@ alternative_endif
.endm
/*
- * Emit an entry into the exception table
- */
- .macro _asm_extable, from, to
- .pushsection __ex_table, "a"
- .align 3
- .long (\from - .), (\to - .)
- .popsection
- .endm
-
-#define USER(l, x...) \
-9999: x; \
- _asm_extable 9999b, l
-
-/*
* Register aliases.
*/
lr .req x30 // link register
@@ -232,6 +209,31 @@ lr .req x30 // link register
.endm
/*
+ * @dst: destination register
+ */
+#if defined(__KVM_NVHE_HYPERVISOR__) || defined(__KVM_VHE_HYPERVISOR__)
+ .macro get_this_cpu_offset, dst
+ mrs \dst, tpidr_el2
+ .endm
+#else
+ .macro get_this_cpu_offset, dst
+alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
+ mrs \dst, tpidr_el1
+alternative_else
+ mrs \dst, tpidr_el2
+alternative_endif
+ .endm
+
+ .macro set_this_cpu_offset, src
+alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
+ msr tpidr_el1, \src
+alternative_else
+ msr tpidr_el2, \src
+alternative_endif
+ .endm
+#endif
+
+ /*
* @dst: Result of per_cpu(sym, smp_processor_id()) (can be SP)
* @sym: The name of the per-cpu variable
* @tmp: scratch register
@@ -239,11 +241,7 @@ lr .req x30 // link register
.macro adr_this_cpu, dst, sym, tmp
adrp \tmp, \sym
add \dst, \tmp, #:lo12:\sym
-alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
- mrs \tmp, tpidr_el1
-alternative_else
- mrs \tmp, tpidr_el2
-alternative_endif
+ get_this_cpu_offset \tmp
add \dst, \dst, \tmp
.endm
@@ -254,11 +252,7 @@ alternative_endif
*/
.macro ldr_this_cpu dst, sym, tmp
adr_l \dst, \sym
-alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
- mrs \tmp, tpidr_el1
-alternative_else
- mrs \tmp, tpidr_el2
-alternative_endif
+ get_this_cpu_offset \tmp
ldr \dst, [\dst, \tmp]
.endm
@@ -270,22 +264,28 @@ alternative_endif
.endm
/*
- * mmid - get context id from mm pointer (mm->context.id)
- */
- .macro mmid, rd, rn
- ldr \rd, [\rn, #MM_CONTEXT_ID]
- .endm
-/*
* read_ctr - read CTR_EL0. If the system has mismatched register fields,
* provide the system wide safe value from arm64_ftr_reg_ctrel0.sys_val
*/
.macro read_ctr, reg
+#ifndef __KVM_NVHE_HYPERVISOR__
alternative_if_not ARM64_MISMATCHED_CACHE_TYPE
mrs \reg, ctr_el0 // read CTR
nop
alternative_else
ldr_l \reg, arm64_ftr_reg_ctrel0 + ARM64_FTR_SYSVAL
alternative_endif
+#else
+alternative_if_not ARM64_KVM_PROTECTED_MODE
+ ASM_BUG()
+alternative_else_nop_endif
+alternative_cb ARM64_ALWAYS_SYSTEM, kvm_compute_final_ctr_el0
+ movz \reg, #0
+ movk \reg, #0, lsl #16
+ movk \reg, #0, lsl #32
+ movk \reg, #0, lsl #48
+alternative_cb_end
+#endif
.endm
@@ -346,6 +346,20 @@ alternative_endif
.endm
/*
+ * idmap_get_t0sz - get the T0SZ value needed to cover the ID map
+ *
+ * Calculate the maximum allowed value for TCR_EL1.T0SZ so that the
+ * entire ID map region can be mapped. As T0SZ == (64 - #bits used),
+ * this number conveniently equals the number of leading zeroes in
+ * the physical address of _end.
+ */
+ .macro idmap_get_t0sz, reg
+ adrp \reg, _end
+ orr \reg, \reg, #(1 << VA_BITS_MIN) - 1
+ clz \reg, \reg
+ .endm
+
+/*
* tcr_compute_pa_size - set TCR.(I)PS to the highest supported
* ID_AA64MMFR0_EL1.PARange value
*
@@ -356,58 +370,76 @@ alternative_endif
.macro tcr_compute_pa_size, tcr, pos, tmp0, tmp1
mrs \tmp0, ID_AA64MMFR0_EL1
// Narrow PARange to fit the PS field in TCR_ELx
- ubfx \tmp0, \tmp0, #ID_AA64MMFR0_PARANGE_SHIFT, #3
- mov \tmp1, #ID_AA64MMFR0_PARANGE_MAX
+ ubfx \tmp0, \tmp0, #ID_AA64MMFR0_EL1_PARANGE_SHIFT, #3
+ mov \tmp1, #ID_AA64MMFR0_EL1_PARANGE_MAX
cmp \tmp0, \tmp1
csel \tmp0, \tmp1, \tmp0, hi
bfi \tcr, \tmp0, \pos, #3
.endm
-/*
- * Macro to perform a data cache maintenance for the interval
- * [kaddr, kaddr + size)
- *
- * op: operation passed to dc instruction
- * domain: domain used in dsb instruciton
- * kaddr: starting virtual address of the region
- * size: size of the region
- * Corrupts: kaddr, size, tmp1, tmp2
- */
- .macro __dcache_op_workaround_clean_cache, op, kaddr
+ .macro __dcache_op_workaround_clean_cache, op, addr
alternative_if_not ARM64_WORKAROUND_CLEAN_CACHE
- dc \op, \kaddr
+ dc \op, \addr
alternative_else
- dc civac, \kaddr
+ dc civac, \addr
alternative_endif
.endm
- .macro dcache_by_line_op op, domain, kaddr, size, tmp1, tmp2
- dcache_line_size \tmp1, \tmp2
- add \size, \kaddr, \size
- sub \tmp2, \tmp1, #1
- bic \kaddr, \kaddr, \tmp2
-9998:
+/*
+ * Macro to perform a data cache maintenance for the interval
+ * [start, end) with dcache line size explicitly provided.
+ *
+ * op: operation passed to dc instruction
+ * domain: domain used in dsb instruciton
+ * start: starting virtual address of the region
+ * end: end virtual address of the region
+ * linesz: dcache line size
+ * fixup: optional label to branch to on user fault
+ * Corrupts: start, end, tmp
+ */
+ .macro dcache_by_myline_op op, domain, start, end, linesz, tmp, fixup
+ sub \tmp, \linesz, #1
+ bic \start, \start, \tmp
+.Ldcache_op\@:
.ifc \op, cvau
- __dcache_op_workaround_clean_cache \op, \kaddr
+ __dcache_op_workaround_clean_cache \op, \start
.else
.ifc \op, cvac
- __dcache_op_workaround_clean_cache \op, \kaddr
+ __dcache_op_workaround_clean_cache \op, \start
.else
.ifc \op, cvap
- sys 3, c7, c12, 1, \kaddr // dc cvap
+ sys 3, c7, c12, 1, \start // dc cvap
.else
.ifc \op, cvadp
- sys 3, c7, c13, 1, \kaddr // dc cvadp
+ sys 3, c7, c13, 1, \start // dc cvadp
.else
- dc \op, \kaddr
+ dc \op, \start
.endif
.endif
.endif
.endif
- add \kaddr, \kaddr, \tmp1
- cmp \kaddr, \size
- b.lo 9998b
+ add \start, \start, \linesz
+ cmp \start, \end
+ b.lo .Ldcache_op\@
dsb \domain
+
+ _cond_uaccess_extable .Ldcache_op\@, \fixup
+ .endm
+
+/*
+ * Macro to perform a data cache maintenance for the interval
+ * [start, end)
+ *
+ * op: operation passed to dc instruction
+ * domain: domain used in dsb instruciton
+ * start: starting virtual address of the region
+ * end: end virtual address of the region
+ * fixup: optional label to branch to on user fault
+ * Corrupts: start, end, tmp1, tmp2
+ */
+ .macro dcache_by_line_op op, domain, start, end, tmp1, tmp2, fixup
+ dcache_line_size \tmp1, \tmp2
+ dcache_by_myline_op \op, \domain, \start, \end, \tmp1, \tmp2, \fixup
.endm
/*
@@ -415,20 +447,50 @@ alternative_endif
* [start, end)
*
* start, end: virtual addresses describing the region
- * label: A label to branch to on user fault.
+ * fixup: optional label to branch to on user fault
* Corrupts: tmp1, tmp2
*/
- .macro invalidate_icache_by_line start, end, tmp1, tmp2, label
+ .macro invalidate_icache_by_line start, end, tmp1, tmp2, fixup
icache_line_size \tmp1, \tmp2
sub \tmp2, \tmp1, #1
bic \tmp2, \start, \tmp2
-9997:
-USER(\label, ic ivau, \tmp2) // invalidate I line PoU
+.Licache_op\@:
+ ic ivau, \tmp2 // invalidate I line PoU
add \tmp2, \tmp2, \tmp1
cmp \tmp2, \end
- b.lo 9997b
+ b.lo .Licache_op\@
dsb ish
isb
+
+ _cond_uaccess_extable .Licache_op\@, \fixup
+ .endm
+
+/*
+ * load_ttbr1 - install @pgtbl as a TTBR1 page table
+ * pgtbl preserved
+ * tmp1/tmp2 clobbered, either may overlap with pgtbl
+ */
+ .macro load_ttbr1, pgtbl, tmp1, tmp2
+ phys_to_ttbr \tmp1, \pgtbl
+ offset_ttbr1 \tmp1, \tmp2
+ msr ttbr1_el1, \tmp1
+ isb
+ .endm
+
+/*
+ * To prevent the possibility of old and new partial table walks being visible
+ * in the tlb, switch the ttbr to a zero page when we invalidate the old
+ * records. D4.7.1 'General TLB maintenance requirements' in ARM DDI 0487A.i
+ * Even switching to our copied tables will cause a changed output address at
+ * each stage of the walk.
+ */
+ .macro break_before_make_ttbr_switch zero_page, page_table, tmp, tmp2
+ phys_to_ttbr \tmp, \zero_page
+ msr ttbr1_el1, \tmp
+ isb
+ tlbi vmalle1
+ dsb nsh
+ load_ttbr1 \page_table, \tmp, \tmp2
.endm
/*
@@ -436,7 +498,7 @@ USER(\label, ic ivau, \tmp2) // invalidate I line PoU
*/
.macro reset_pmuserenr_el0, tmpreg
mrs \tmpreg, id_aa64dfr0_el1
- sbfx \tmpreg, \tmpreg, #ID_AA64DFR0_PMUVER_SHIFT, #4
+ sbfx \tmpreg, \tmpreg, #ID_AA64DFR0_EL1_PMUVer_SHIFT, #4
cmp \tmpreg, #1 // Skip if no PMU present
b.lt 9000f
msr pmuserenr_el0, xzr // Disable PMU access from EL0
@@ -444,6 +506,16 @@ USER(\label, ic ivau, \tmp2) // invalidate I line PoU
.endm
/*
+ * reset_amuserenr_el0 - reset AMUSERENR_EL0 if AMUv1 present
+ */
+ .macro reset_amuserenr_el0, tmpreg
+ mrs \tmpreg, id_aa64pfr0_el1 // Check ID_AA64PFR0_EL1
+ ubfx \tmpreg, \tmpreg, #ID_AA64PFR0_EL1_AMU_SHIFT, #4
+ cbz \tmpreg, .Lskip_\@ // Skip if no AMU present
+ msr_s SYS_AMUSERENR_EL0, xzr // Disable AMU access from EL0
+.Lskip_\@:
+ .endm
+/*
* copy_page - copy src to dest using temp registers t1-t8
*/
.macro copy_page dest:req src:req t1:req t2:req t3:req t4:req t5:req t6:req t7:req t8:req
@@ -462,17 +534,6 @@ USER(\label, ic ivau, \tmp2) // invalidate I line PoU
.endm
/*
- * Annotate a function as position independent, i.e., safe to be called before
- * the kernel virtual mapping is activated.
- */
-#define ENDPIPROC(x) \
- .globl __pi_##x; \
- .type __pi_##x, %function; \
- .set __pi_##x, x; \
- .size __pi_##x, . - x; \
- ENDPROC(x)
-
-/*
* Annotate a function as being unsuitable for kprobes.
*/
#ifdef CONFIG_KPROBES
@@ -484,7 +545,7 @@ USER(\label, ic ivau, \tmp2) // invalidate I line PoU
#define NOKPROBE(x)
#endif
-#ifdef CONFIG_KASAN
+#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
#define EXPORT_SYMBOL_NOKASAN(name)
#else
#define EXPORT_SYMBOL_NOKASAN(name) EXPORT_SYMBOL(name)
@@ -537,7 +598,7 @@ USER(\label, ic ivau, \tmp2) // invalidate I line PoU
.macro offset_ttbr1, ttbr, tmp
#ifdef CONFIG_ARM64_VA_BITS_52
mrs_s \tmp, SYS_ID_AA64MMFR2_EL1
- and \tmp, \tmp, #(0xf << ID_AA64MMFR2_LVA_SHIFT)
+ and \tmp, \tmp, #(0xf << ID_AA64MMFR2_EL1_VARange_SHIFT)
cbnz \tmp, .Lskipoffs_\@
orr \ttbr, \ttbr, #TTBR1_BADDR_4852_OFFSET
.Lskipoffs_\@ :
@@ -545,17 +606,6 @@ USER(\label, ic ivau, \tmp2) // invalidate I line PoU
.endm
/*
- * Perform the reverse of offset_ttbr1.
- * bic is used as it can cover the immediate value and, in future, won't need
- * to be nop'ed out when dealing with 52-bit kernel VAs.
- */
- .macro restore_ttbr1, ttbr
-#ifdef CONFIG_ARM64_VA_BITS_52
- bic \ttbr, \ttbr, #TTBR1_BADDR_4852_OFFSET
-#endif
- .endm
-
-/*
* Arrange a physical address in a TTBR register, taking care of 52-bit
* addresses.
*
@@ -585,12 +635,10 @@ USER(\label, ic ivau, \tmp2) // invalidate I line PoU
.endm
.macro pte_to_phys, phys, pte
-#ifdef CONFIG_ARM64_PA_BITS_52
- ubfiz \phys, \pte, #(48 - 16 - 12), #16
- bfxil \phys, \pte, #16, #32
- lsl \phys, \phys, #16
-#else
and \phys, \pte, #PTE_ADDR_MASK
+#ifdef CONFIG_ARM64_PA_BITS_52
+ orr \phys, \phys, \phys, lsl #PTE_ADDR_HIGH_SHIFT
+ and \phys, \phys, GENMASK_ULL(PHYS_MASK_SHIFT - 1, PAGE_SHIFT)
#endif
.endm
@@ -687,73 +735,156 @@ USER(\label, ic ivau, \tmp2) // invalidate I line PoU
.endm
/*
- * Check whether to yield to another runnable task from kernel mode NEON code
- * (which runs with preemption disabled).
- *
- * if_will_cond_yield_neon
- * // pre-yield patchup code
- * do_cond_yield_neon
- * // post-yield patchup code
- * endif_yield_neon <label>
- *
- * where <label> is optional, and marks the point where execution will resume
- * after a yield has been performed. If omitted, execution resumes right after
- * the endif_yield_neon invocation. Note that the entire sequence, including
- * the provided patchup code, will be omitted from the image if CONFIG_PREEMPT
- * is not defined.
- *
- * As a convenience, in the case where no patchup code is required, the above
- * sequence may be abbreviated to
- *
- * cond_yield_neon <label>
- *
- * Note that the patchup code does not support assembler directives that change
- * the output section, any use of such directives is undefined.
- *
- * The yield itself consists of the following:
- * - Check whether the preempt count is exactly 1 and a reschedule is also
- * needed. If so, calling of preempt_enable() in kernel_neon_end() will
- * trigger a reschedule. If it is not the case, yielding is pointless.
- * - Disable and re-enable kernel mode NEON, and branch to the yield fixup
- * code.
- *
- * This macro sequence may clobber all CPU state that is not guaranteed by the
- * AAPCS to be preserved across an ordinary function call.
+ * Set SCTLR_ELx to the @reg value, and invalidate the local icache
+ * in the process. This is called when setting the MMU on.
*/
+.macro set_sctlr, sreg, reg
+ msr \sreg, \reg
+ isb
+ /*
+ * Invalidate the local I-cache so that any instructions fetched
+ * speculatively from the PoC are discarded, since they may have
+ * been dynamically patched at the PoU.
+ */
+ ic iallu
+ dsb nsh
+ isb
+.endm
- .macro cond_yield_neon, lbl
- if_will_cond_yield_neon
- do_cond_yield_neon
- endif_yield_neon \lbl
- .endm
+.macro set_sctlr_el1, reg
+ set_sctlr sctlr_el1, \reg
+.endm
- .macro if_will_cond_yield_neon
-#ifdef CONFIG_PREEMPT
- get_current_task x0
- ldr x0, [x0, #TSK_TI_PREEMPT]
- sub x0, x0, #PREEMPT_DISABLE_OFFSET
- cbz x0, .Lyield_\@
- /* fall through to endif_yield_neon */
- .subsection 1
-.Lyield_\@ :
-#else
- .section ".discard.cond_yield_neon", "ax"
+.macro set_sctlr_el2, reg
+ set_sctlr sctlr_el2, \reg
+.endm
+
+ /*
+ * Check whether asm code should yield as soon as it is able. This is
+ * the case if we are currently running in task context, and the
+ * TIF_NEED_RESCHED flag is set. (Note that the TIF_NEED_RESCHED flag
+ * is stored negated in the top word of the thread_info::preempt_count
+ * field)
+ */
+ .macro cond_yield, lbl:req, tmp:req, tmp2
+#ifdef CONFIG_PREEMPT_VOLUNTARY
+ get_current_task \tmp
+ ldr \tmp, [\tmp, #TSK_TI_PREEMPT]
+ /*
+ * If we are serving a softirq, there is no point in yielding: the
+ * softirq will not be preempted no matter what we do, so we should
+ * run to completion as quickly as we can. The preempt_count field will
+ * have BIT(SOFTIRQ_SHIFT) set in this case, so the zero check will
+ * catch this case too.
+ */
+ cbz \tmp, \lbl
#endif
.endm
- .macro do_cond_yield_neon
- bl kernel_neon_end
- bl kernel_neon_begin
+/*
+ * Branch Target Identifier (BTI)
+ */
+ .macro bti, targets
+ .equ .L__bti_targets_c, 34
+ .equ .L__bti_targets_j, 36
+ .equ .L__bti_targets_jc,38
+ hint #.L__bti_targets_\targets
.endm
- .macro endif_yield_neon, lbl
- .ifnb \lbl
- b \lbl
- .else
- b .Lyield_out_\@
- .endif
- .previous
-.Lyield_out_\@ :
- .endm
+/*
+ * This macro emits a program property note section identifying
+ * architecture features which require special handling, mainly for
+ * use in assembly files included in the VDSO.
+ */
+
+#define NT_GNU_PROPERTY_TYPE_0 5
+#define GNU_PROPERTY_AARCH64_FEATURE_1_AND 0xc0000000
+
+#define GNU_PROPERTY_AARCH64_FEATURE_1_BTI (1U << 0)
+#define GNU_PROPERTY_AARCH64_FEATURE_1_PAC (1U << 1)
+#ifdef CONFIG_ARM64_BTI_KERNEL
+#define GNU_PROPERTY_AARCH64_FEATURE_1_DEFAULT \
+ ((GNU_PROPERTY_AARCH64_FEATURE_1_BTI | \
+ GNU_PROPERTY_AARCH64_FEATURE_1_PAC))
+#endif
+
+#ifdef GNU_PROPERTY_AARCH64_FEATURE_1_DEFAULT
+.macro emit_aarch64_feature_1_and, feat=GNU_PROPERTY_AARCH64_FEATURE_1_DEFAULT
+ .pushsection .note.gnu.property, "a"
+ .align 3
+ .long 2f - 1f
+ .long 6f - 3f
+ .long NT_GNU_PROPERTY_TYPE_0
+1: .string "GNU"
+2:
+ .align 3
+3: .long GNU_PROPERTY_AARCH64_FEATURE_1_AND
+ .long 5f - 4f
+4:
+ /*
+ * This is described with an array of char in the Linux API
+ * spec but the text and all other usage (including binutils,
+ * clang and GCC) treat this as a 32 bit value so no swizzling
+ * is required for big endian.
+ */
+ .long \feat
+5:
+ .align 3
+6:
+ .popsection
+.endm
+
+#else
+.macro emit_aarch64_feature_1_and, feat=0
+.endm
+
+#endif /* GNU_PROPERTY_AARCH64_FEATURE_1_DEFAULT */
+
+ .macro __mitigate_spectre_bhb_loop tmp
+#ifdef CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY
+alternative_cb ARM64_ALWAYS_SYSTEM, spectre_bhb_patch_loop_iter
+ mov \tmp, #32 // Patched to correct the immediate
+alternative_cb_end
+.Lspectre_bhb_loop\@:
+ b . + 4
+ subs \tmp, \tmp, #1
+ b.ne .Lspectre_bhb_loop\@
+ sb
+#endif /* CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY */
+ .endm
+
+ .macro mitigate_spectre_bhb_loop tmp
+#ifdef CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY
+alternative_cb ARM64_ALWAYS_SYSTEM, spectre_bhb_patch_loop_mitigation_enable
+ b .L_spectre_bhb_loop_done\@ // Patched to NOP
+alternative_cb_end
+ __mitigate_spectre_bhb_loop \tmp
+.L_spectre_bhb_loop_done\@:
+#endif /* CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY */
+ .endm
+
+ /* Save/restores x0-x3 to the stack */
+ .macro __mitigate_spectre_bhb_fw
+#ifdef CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY
+ stp x0, x1, [sp, #-16]!
+ stp x2, x3, [sp, #-16]!
+ mov w0, #ARM_SMCCC_ARCH_WORKAROUND_3
+alternative_cb ARM64_ALWAYS_SYSTEM, smccc_patch_fw_mitigation_conduit
+ nop // Patched to SMC/HVC #0
+alternative_cb_end
+ ldp x2, x3, [sp], #16
+ ldp x0, x1, [sp], #16
+#endif /* CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY */
+ .endm
+
+ .macro mitigate_spectre_bhb_clear_insn
+#ifdef CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY
+alternative_cb ARM64_ALWAYS_SYSTEM, spectre_bhb_patch_clearbhb
+ /* Patched to NOP when not supported */
+ clearbhb
+ isb
+alternative_cb_end
+#endif /* CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY */
+ .endm
#endif /* __ASM_ASSEMBLER_H */
diff --git a/arch/arm64/include/asm/atomic.h b/arch/arm64/include/asm/atomic.h
index 9543b5e0534d..400d279e0f8d 100644
--- a/arch/arm64/include/asm/atomic.h
+++ b/arch/arm64/include/asm/atomic.h
@@ -17,7 +17,7 @@
#include <asm/lse.h>
#define ATOMIC_OP(op) \
-static inline void arch_##op(int i, atomic_t *v) \
+static __always_inline void arch_##op(int i, atomic_t *v) \
{ \
__lse_ll_sc_body(op, i, v); \
}
@@ -32,7 +32,7 @@ ATOMIC_OP(atomic_sub)
#undef ATOMIC_OP
#define ATOMIC_FETCH_OP(name, op) \
-static inline int arch_##op##name(int i, atomic_t *v) \
+static __always_inline int arch_##op##name(int i, atomic_t *v) \
{ \
return __lse_ll_sc_body(op##name, i, v); \
}
@@ -56,7 +56,7 @@ ATOMIC_FETCH_OPS(atomic_sub_return)
#undef ATOMIC_FETCH_OPS
#define ATOMIC64_OP(op) \
-static inline void arch_##op(long i, atomic64_t *v) \
+static __always_inline void arch_##op(long i, atomic64_t *v) \
{ \
__lse_ll_sc_body(op, i, v); \
}
@@ -71,7 +71,7 @@ ATOMIC64_OP(atomic64_sub)
#undef ATOMIC64_OP
#define ATOMIC64_FETCH_OP(name, op) \
-static inline long arch_##op##name(long i, atomic64_t *v) \
+static __always_inline long arch_##op##name(long i, atomic64_t *v) \
{ \
return __lse_ll_sc_body(op##name, i, v); \
}
@@ -94,15 +94,13 @@ ATOMIC64_FETCH_OPS(atomic64_sub_return)
#undef ATOMIC64_FETCH_OP
#undef ATOMIC64_FETCH_OPS
-static inline long arch_atomic64_dec_if_positive(atomic64_t *v)
+static __always_inline long arch_atomic64_dec_if_positive(atomic64_t *v)
{
return __lse_ll_sc_body(atomic64_dec_if_positive, v);
}
-#define ATOMIC_INIT(i) { (i) }
-
-#define arch_atomic_read(v) READ_ONCE((v)->counter)
-#define arch_atomic_set(v, i) WRITE_ONCE(((v)->counter), (i))
+#define arch_atomic_read(v) __READ_ONCE((v)->counter)
+#define arch_atomic_set(v, i) __WRITE_ONCE(((v)->counter), (i))
#define arch_atomic_add_return_relaxed arch_atomic_add_return_relaxed
#define arch_atomic_add_return_acquire arch_atomic_add_return_acquire
@@ -144,24 +142,6 @@ static inline long arch_atomic64_dec_if_positive(atomic64_t *v)
#define arch_atomic_fetch_xor_release arch_atomic_fetch_xor_release
#define arch_atomic_fetch_xor arch_atomic_fetch_xor
-#define arch_atomic_xchg_relaxed(v, new) \
- arch_xchg_relaxed(&((v)->counter), (new))
-#define arch_atomic_xchg_acquire(v, new) \
- arch_xchg_acquire(&((v)->counter), (new))
-#define arch_atomic_xchg_release(v, new) \
- arch_xchg_release(&((v)->counter), (new))
-#define arch_atomic_xchg(v, new) \
- arch_xchg(&((v)->counter), (new))
-
-#define arch_atomic_cmpxchg_relaxed(v, old, new) \
- arch_cmpxchg_relaxed(&((v)->counter), (old), (new))
-#define arch_atomic_cmpxchg_acquire(v, old, new) \
- arch_cmpxchg_acquire(&((v)->counter), (old), (new))
-#define arch_atomic_cmpxchg_release(v, old, new) \
- arch_cmpxchg_release(&((v)->counter), (old), (new))
-#define arch_atomic_cmpxchg(v, old, new) \
- arch_cmpxchg(&((v)->counter), (old), (new))
-
#define arch_atomic_andnot arch_atomic_andnot
/*
@@ -211,20 +191,8 @@ static inline long arch_atomic64_dec_if_positive(atomic64_t *v)
#define arch_atomic64_fetch_xor_release arch_atomic64_fetch_xor_release
#define arch_atomic64_fetch_xor arch_atomic64_fetch_xor
-#define arch_atomic64_xchg_relaxed arch_atomic_xchg_relaxed
-#define arch_atomic64_xchg_acquire arch_atomic_xchg_acquire
-#define arch_atomic64_xchg_release arch_atomic_xchg_release
-#define arch_atomic64_xchg arch_atomic_xchg
-
-#define arch_atomic64_cmpxchg_relaxed arch_atomic_cmpxchg_relaxed
-#define arch_atomic64_cmpxchg_acquire arch_atomic_cmpxchg_acquire
-#define arch_atomic64_cmpxchg_release arch_atomic_cmpxchg_release
-#define arch_atomic64_cmpxchg arch_atomic_cmpxchg
-
#define arch_atomic64_andnot arch_atomic64_andnot
#define arch_atomic64_dec_if_positive arch_atomic64_dec_if_positive
-#include <asm-generic/atomic-instrumented.h>
-
#endif /* __ASM_ATOMIC_H */
diff --git a/arch/arm64/include/asm/atomic_ll_sc.h b/arch/arm64/include/asm/atomic_ll_sc.h
index 7b012148bfd6..89d2ba272359 100644
--- a/arch/arm64/include/asm/atomic_ll_sc.h
+++ b/arch/arm64/include/asm/atomic_ll_sc.h
@@ -12,19 +12,6 @@
#include <linux/stringify.h>
-#if IS_ENABLED(CONFIG_ARM64_LSE_ATOMICS) && IS_ENABLED(CONFIG_AS_LSE)
-#define __LL_SC_FALLBACK(asm_ops) \
-" b 3f\n" \
-" .subsection 1\n" \
-"3:\n" \
-asm_ops "\n" \
-" b 4f\n" \
-" .previous\n" \
-"4:\n"
-#else
-#define __LL_SC_FALLBACK(asm_ops) asm_ops
-#endif
-
#ifndef CONFIG_CC_HAS_K_CONSTRAINT
#define K
#endif
@@ -36,38 +23,36 @@ asm_ops "\n" \
*/
#define ATOMIC_OP(op, asm_op, constraint) \
-static inline void \
+static __always_inline void \
__ll_sc_atomic_##op(int i, atomic_t *v) \
{ \
unsigned long tmp; \
int result; \
\
asm volatile("// atomic_" #op "\n" \
- __LL_SC_FALLBACK( \
-" prfm pstl1strm, %2\n" \
-"1: ldxr %w0, %2\n" \
-" " #asm_op " %w0, %w0, %w3\n" \
-" stxr %w1, %w0, %2\n" \
-" cbnz %w1, 1b\n") \
+ " prfm pstl1strm, %2\n" \
+ "1: ldxr %w0, %2\n" \
+ " " #asm_op " %w0, %w0, %w3\n" \
+ " stxr %w1, %w0, %2\n" \
+ " cbnz %w1, 1b\n" \
: "=&r" (result), "=&r" (tmp), "+Q" (v->counter) \
: __stringify(constraint) "r" (i)); \
}
#define ATOMIC_OP_RETURN(name, mb, acq, rel, cl, op, asm_op, constraint)\
-static inline int \
+static __always_inline int \
__ll_sc_atomic_##op##_return##name(int i, atomic_t *v) \
{ \
unsigned long tmp; \
int result; \
\
asm volatile("// atomic_" #op "_return" #name "\n" \
- __LL_SC_FALLBACK( \
-" prfm pstl1strm, %2\n" \
-"1: ld" #acq "xr %w0, %2\n" \
-" " #asm_op " %w0, %w0, %w3\n" \
-" st" #rel "xr %w1, %w0, %2\n" \
-" cbnz %w1, 1b\n" \
-" " #mb ) \
+ " prfm pstl1strm, %2\n" \
+ "1: ld" #acq "xr %w0, %2\n" \
+ " " #asm_op " %w0, %w0, %w3\n" \
+ " st" #rel "xr %w1, %w0, %2\n" \
+ " cbnz %w1, 1b\n" \
+ " " #mb \
: "=&r" (result), "=&r" (tmp), "+Q" (v->counter) \
: __stringify(constraint) "r" (i) \
: cl); \
@@ -76,20 +61,19 @@ __ll_sc_atomic_##op##_return##name(int i, atomic_t *v) \
}
#define ATOMIC_FETCH_OP(name, mb, acq, rel, cl, op, asm_op, constraint) \
-static inline int \
+static __always_inline int \
__ll_sc_atomic_fetch_##op##name(int i, atomic_t *v) \
{ \
unsigned long tmp; \
int val, result; \
\
asm volatile("// atomic_fetch_" #op #name "\n" \
- __LL_SC_FALLBACK( \
-" prfm pstl1strm, %3\n" \
-"1: ld" #acq "xr %w0, %3\n" \
-" " #asm_op " %w1, %w0, %w4\n" \
-" st" #rel "xr %w2, %w1, %3\n" \
-" cbnz %w2, 1b\n" \
-" " #mb ) \
+ " prfm pstl1strm, %3\n" \
+ "1: ld" #acq "xr %w0, %3\n" \
+ " " #asm_op " %w1, %w0, %w4\n" \
+ " st" #rel "xr %w2, %w1, %3\n" \
+ " cbnz %w2, 1b\n" \
+ " " #mb \
: "=&r" (result), "=&r" (val), "=&r" (tmp), "+Q" (v->counter) \
: __stringify(constraint) "r" (i) \
: cl); \
@@ -135,38 +119,36 @@ ATOMIC_OPS(andnot, bic, )
#undef ATOMIC_OP
#define ATOMIC64_OP(op, asm_op, constraint) \
-static inline void \
+static __always_inline void \
__ll_sc_atomic64_##op(s64 i, atomic64_t *v) \
{ \
s64 result; \
unsigned long tmp; \
\
asm volatile("// atomic64_" #op "\n" \
- __LL_SC_FALLBACK( \
-" prfm pstl1strm, %2\n" \
-"1: ldxr %0, %2\n" \
-" " #asm_op " %0, %0, %3\n" \
-" stxr %w1, %0, %2\n" \
-" cbnz %w1, 1b") \
+ " prfm pstl1strm, %2\n" \
+ "1: ldxr %0, %2\n" \
+ " " #asm_op " %0, %0, %3\n" \
+ " stxr %w1, %0, %2\n" \
+ " cbnz %w1, 1b" \
: "=&r" (result), "=&r" (tmp), "+Q" (v->counter) \
: __stringify(constraint) "r" (i)); \
}
#define ATOMIC64_OP_RETURN(name, mb, acq, rel, cl, op, asm_op, constraint)\
-static inline long \
+static __always_inline long \
__ll_sc_atomic64_##op##_return##name(s64 i, atomic64_t *v) \
{ \
s64 result; \
unsigned long tmp; \
\
asm volatile("// atomic64_" #op "_return" #name "\n" \
- __LL_SC_FALLBACK( \
-" prfm pstl1strm, %2\n" \
-"1: ld" #acq "xr %0, %2\n" \
-" " #asm_op " %0, %0, %3\n" \
-" st" #rel "xr %w1, %0, %2\n" \
-" cbnz %w1, 1b\n" \
-" " #mb ) \
+ " prfm pstl1strm, %2\n" \
+ "1: ld" #acq "xr %0, %2\n" \
+ " " #asm_op " %0, %0, %3\n" \
+ " st" #rel "xr %w1, %0, %2\n" \
+ " cbnz %w1, 1b\n" \
+ " " #mb \
: "=&r" (result), "=&r" (tmp), "+Q" (v->counter) \
: __stringify(constraint) "r" (i) \
: cl); \
@@ -175,20 +157,19 @@ __ll_sc_atomic64_##op##_return##name(s64 i, atomic64_t *v) \
}
#define ATOMIC64_FETCH_OP(name, mb, acq, rel, cl, op, asm_op, constraint)\
-static inline long \
-__ll_sc_atomic64_fetch_##op##name(s64 i, atomic64_t *v) \
+static __always_inline long \
+__ll_sc_atomic64_fetch_##op##name(s64 i, atomic64_t *v) \
{ \
s64 result, val; \
unsigned long tmp; \
\
asm volatile("// atomic64_fetch_" #op #name "\n" \
- __LL_SC_FALLBACK( \
-" prfm pstl1strm, %3\n" \
-"1: ld" #acq "xr %0, %3\n" \
-" " #asm_op " %1, %0, %4\n" \
-" st" #rel "xr %w2, %1, %3\n" \
-" cbnz %w2, 1b\n" \
-" " #mb ) \
+ " prfm pstl1strm, %3\n" \
+ "1: ld" #acq "xr %0, %3\n" \
+ " " #asm_op " %1, %0, %4\n" \
+ " st" #rel "xr %w2, %1, %3\n" \
+ " cbnz %w2, 1b\n" \
+ " " #mb \
: "=&r" (result), "=&r" (val), "=&r" (tmp), "+Q" (v->counter) \
: __stringify(constraint) "r" (i) \
: cl); \
@@ -233,22 +214,21 @@ ATOMIC64_OPS(andnot, bic, )
#undef ATOMIC64_OP_RETURN
#undef ATOMIC64_OP
-static inline s64
+static __always_inline s64
__ll_sc_atomic64_dec_if_positive(atomic64_t *v)
{
s64 result;
unsigned long tmp;
asm volatile("// atomic64_dec_if_positive\n"
- __LL_SC_FALLBACK(
-" prfm pstl1strm, %2\n"
-"1: ldxr %0, %2\n"
-" subs %0, %0, #1\n"
-" b.lt 2f\n"
-" stlxr %w1, %0, %2\n"
-" cbnz %w1, 1b\n"
-" dmb ish\n"
-"2:")
+ " prfm pstl1strm, %2\n"
+ "1: ldxr %0, %2\n"
+ " subs %0, %0, #1\n"
+ " b.lt 2f\n"
+ " stlxr %w1, %0, %2\n"
+ " cbnz %w1, 1b\n"
+ " dmb ish\n"
+ "2:"
: "=&r" (result), "=&r" (tmp), "+Q" (v->counter)
:
: "cc", "memory");
@@ -257,7 +237,7 @@ __ll_sc_atomic64_dec_if_positive(atomic64_t *v)
}
#define __CMPXCHG_CASE(w, sfx, name, sz, mb, acq, rel, cl, constraint) \
-static inline u##sz \
+static __always_inline u##sz \
__ll_sc__cmpxchg_case_##name##sz(volatile void *ptr, \
unsigned long old, \
u##sz new) \
@@ -274,7 +254,6 @@ __ll_sc__cmpxchg_case_##name##sz(volatile void *ptr, \
old = (u##sz)old; \
\
asm volatile( \
- __LL_SC_FALLBACK( \
" prfm pstl1strm, %[v]\n" \
"1: ld" #acq "xr" #sfx "\t%" #w "[oldval], %[v]\n" \
" eor %" #w "[tmp], %" #w "[oldval], %" #w "[old]\n" \
@@ -282,7 +261,7 @@ __ll_sc__cmpxchg_case_##name##sz(volatile void *ptr, \
" st" #rel "xr" #sfx "\t%w[tmp], %" #w "[new], %[v]\n" \
" cbnz %w[tmp], 1b\n" \
" " #mb "\n" \
- "2:") \
+ "2:" \
: [tmp] "=&r" (tmp), [oldval] "=&r" (oldval), \
[v] "+Q" (*(u##sz *)ptr) \
: [old] __stringify(constraint) "r" (old), [new] "r" (new) \
@@ -315,39 +294,46 @@ __CMPXCHG_CASE( , , mb_, 64, dmb ish, , l, "memory", L)
#undef __CMPXCHG_CASE
-#define __CMPXCHG_DBL(name, mb, rel, cl) \
-static inline long \
-__ll_sc__cmpxchg_double##name(unsigned long old1, \
- unsigned long old2, \
- unsigned long new1, \
- unsigned long new2, \
- volatile void *ptr) \
+union __u128_halves {
+ u128 full;
+ struct {
+ u64 low, high;
+ };
+};
+
+#define __CMPXCHG128(name, mb, rel, cl...) \
+static __always_inline u128 \
+__ll_sc__cmpxchg128##name(volatile u128 *ptr, u128 old, u128 new) \
{ \
- unsigned long tmp, ret; \
+ union __u128_halves r, o = { .full = (old) }, \
+ n = { .full = (new) }; \
+ unsigned int tmp; \
\
- asm volatile("// __cmpxchg_double" #name "\n" \
- __LL_SC_FALLBACK( \
- " prfm pstl1strm, %2\n" \
- "1: ldxp %0, %1, %2\n" \
- " eor %0, %0, %3\n" \
- " eor %1, %1, %4\n" \
- " orr %1, %0, %1\n" \
- " cbnz %1, 2f\n" \
- " st" #rel "xp %w0, %5, %6, %2\n" \
- " cbnz %w0, 1b\n" \
+ asm volatile("// __cmpxchg128" #name "\n" \
+ " prfm pstl1strm, %[v]\n" \
+ "1: ldxp %[rl], %[rh], %[v]\n" \
+ " cmp %[rl], %[ol]\n" \
+ " ccmp %[rh], %[oh], 0, eq\n" \
+ " b.ne 2f\n" \
+ " st" #rel "xp %w[tmp], %[nl], %[nh], %[v]\n" \
+ " cbnz %w[tmp], 1b\n" \
" " #mb "\n" \
- "2:") \
- : "=&r" (tmp), "=&r" (ret), "+Q" (*(unsigned long *)ptr) \
- : "r" (old1), "r" (old2), "r" (new1), "r" (new2) \
- : cl); \
+ "2:" \
+ : [v] "+Q" (*(u128 *)ptr), \
+ [rl] "=&r" (r.low), [rh] "=&r" (r.high), \
+ [tmp] "=&r" (tmp) \
+ : [ol] "r" (o.low), [oh] "r" (o.high), \
+ [nl] "r" (n.low), [nh] "r" (n.high) \
+ : "cc", ##cl); \
\
- return ret; \
+ return r.full; \
}
-__CMPXCHG_DBL( , , , )
-__CMPXCHG_DBL(_mb, dmb ish, l, "memory")
+__CMPXCHG128( , , )
+__CMPXCHG128(_mb, dmb ish, l, "memory")
+
+#undef __CMPXCHG128
-#undef __CMPXCHG_DBL
#undef K
#endif /* __ASM_ATOMIC_LL_SC_H */
diff --git a/arch/arm64/include/asm/atomic_lse.h b/arch/arm64/include/asm/atomic_lse.h
index 574808b9df4c..87f568a94e55 100644
--- a/arch/arm64/include/asm/atomic_lse.h
+++ b/arch/arm64/include/asm/atomic_lse.h
@@ -11,12 +11,14 @@
#define __ASM_ATOMIC_LSE_H
#define ATOMIC_OP(op, asm_op) \
-static inline void __lse_atomic_##op(int i, atomic_t *v) \
+static __always_inline void \
+__lse_atomic_##op(int i, atomic_t *v) \
{ \
asm volatile( \
-" " #asm_op " %w[i], %[v]\n" \
- : [i] "+r" (i), [v] "+Q" (v->counter) \
- : "r" (v)); \
+ __LSE_PREAMBLE \
+ " " #asm_op " %w[i], %[v]\n" \
+ : [v] "+Q" (v->counter) \
+ : [i] "r" (i)); \
}
ATOMIC_OP(andnot, stclr)
@@ -24,18 +26,28 @@ ATOMIC_OP(or, stset)
ATOMIC_OP(xor, steor)
ATOMIC_OP(add, stadd)
+static __always_inline void __lse_atomic_sub(int i, atomic_t *v)
+{
+ __lse_atomic_add(-i, v);
+}
+
#undef ATOMIC_OP
#define ATOMIC_FETCH_OP(name, mb, op, asm_op, cl...) \
-static inline int __lse_atomic_fetch_##op##name(int i, atomic_t *v) \
+static __always_inline int \
+__lse_atomic_fetch_##op##name(int i, atomic_t *v) \
{ \
+ int old; \
+ \
asm volatile( \
-" " #asm_op #mb " %w[i], %w[i], %[v]" \
- : [i] "+r" (i), [v] "+Q" (v->counter) \
- : "r" (v) \
+ __LSE_PREAMBLE \
+ " " #asm_op #mb " %w[i], %w[old], %[v]" \
+ : [v] "+Q" (v->counter), \
+ [old] "=r" (old) \
+ : [i] "r" (i) \
: cl); \
\
- return i; \
+ return old; \
}
#define ATOMIC_FETCH_OPS(op, asm_op) \
@@ -52,48 +64,50 @@ ATOMIC_FETCH_OPS(add, ldadd)
#undef ATOMIC_FETCH_OP
#undef ATOMIC_FETCH_OPS
-#define ATOMIC_OP_ADD_RETURN(name, mb, cl...) \
-static inline int __lse_atomic_add_return##name(int i, atomic_t *v) \
+#define ATOMIC_FETCH_OP_SUB(name) \
+static __always_inline int \
+__lse_atomic_fetch_sub##name(int i, atomic_t *v) \
{ \
- u32 tmp; \
- \
- asm volatile( \
- " ldadd" #mb " %w[i], %w[tmp], %[v]\n" \
- " add %w[i], %w[i], %w[tmp]" \
- : [i] "+r" (i), [v] "+Q" (v->counter), [tmp] "=&r" (tmp) \
- : "r" (v) \
- : cl); \
+ return __lse_atomic_fetch_add##name(-i, v); \
+}
+
+ATOMIC_FETCH_OP_SUB(_relaxed)
+ATOMIC_FETCH_OP_SUB(_acquire)
+ATOMIC_FETCH_OP_SUB(_release)
+ATOMIC_FETCH_OP_SUB( )
+
+#undef ATOMIC_FETCH_OP_SUB
+
+#define ATOMIC_OP_ADD_SUB_RETURN(name) \
+static __always_inline int \
+__lse_atomic_add_return##name(int i, atomic_t *v) \
+{ \
+ return __lse_atomic_fetch_add##name(i, v) + i; \
+} \
\
- return i; \
+static __always_inline int \
+__lse_atomic_sub_return##name(int i, atomic_t *v) \
+{ \
+ return __lse_atomic_fetch_sub(i, v) - i; \
}
-ATOMIC_OP_ADD_RETURN(_relaxed, )
-ATOMIC_OP_ADD_RETURN(_acquire, a, "memory")
-ATOMIC_OP_ADD_RETURN(_release, l, "memory")
-ATOMIC_OP_ADD_RETURN( , al, "memory")
+ATOMIC_OP_ADD_SUB_RETURN(_relaxed)
+ATOMIC_OP_ADD_SUB_RETURN(_acquire)
+ATOMIC_OP_ADD_SUB_RETURN(_release)
+ATOMIC_OP_ADD_SUB_RETURN( )
-#undef ATOMIC_OP_ADD_RETURN
+#undef ATOMIC_OP_ADD_SUB_RETURN
-static inline void __lse_atomic_and(int i, atomic_t *v)
+static __always_inline void __lse_atomic_and(int i, atomic_t *v)
{
- asm volatile(
- " mvn %w[i], %w[i]\n"
- " stclr %w[i], %[v]"
- : [i] "+&r" (i), [v] "+Q" (v->counter)
- : "r" (v));
+ return __lse_atomic_andnot(~i, v);
}
#define ATOMIC_FETCH_OP_AND(name, mb, cl...) \
-static inline int __lse_atomic_fetch_and##name(int i, atomic_t *v) \
+static __always_inline int \
+__lse_atomic_fetch_and##name(int i, atomic_t *v) \
{ \
- asm volatile( \
- " mvn %w[i], %w[i]\n" \
- " ldclr" #mb " %w[i], %w[i], %[v]" \
- : [i] "+&r" (i), [v] "+Q" (v->counter) \
- : "r" (v) \
- : cl); \
- \
- return i; \
+ return __lse_atomic_fetch_andnot##name(~i, v); \
}
ATOMIC_FETCH_OP_AND(_relaxed, )
@@ -103,65 +117,15 @@ ATOMIC_FETCH_OP_AND( , al, "memory")
#undef ATOMIC_FETCH_OP_AND
-static inline void __lse_atomic_sub(int i, atomic_t *v)
-{
- asm volatile(
- " neg %w[i], %w[i]\n"
- " stadd %w[i], %[v]"
- : [i] "+&r" (i), [v] "+Q" (v->counter)
- : "r" (v));
-}
-
-#define ATOMIC_OP_SUB_RETURN(name, mb, cl...) \
-static inline int __lse_atomic_sub_return##name(int i, atomic_t *v) \
-{ \
- u32 tmp; \
- \
- asm volatile( \
- " neg %w[i], %w[i]\n" \
- " ldadd" #mb " %w[i], %w[tmp], %[v]\n" \
- " add %w[i], %w[i], %w[tmp]" \
- : [i] "+&r" (i), [v] "+Q" (v->counter), [tmp] "=&r" (tmp) \
- : "r" (v) \
- : cl); \
- \
- return i; \
-}
-
-ATOMIC_OP_SUB_RETURN(_relaxed, )
-ATOMIC_OP_SUB_RETURN(_acquire, a, "memory")
-ATOMIC_OP_SUB_RETURN(_release, l, "memory")
-ATOMIC_OP_SUB_RETURN( , al, "memory")
-
-#undef ATOMIC_OP_SUB_RETURN
-
-#define ATOMIC_FETCH_OP_SUB(name, mb, cl...) \
-static inline int __lse_atomic_fetch_sub##name(int i, atomic_t *v) \
-{ \
- asm volatile( \
- " neg %w[i], %w[i]\n" \
- " ldadd" #mb " %w[i], %w[i], %[v]" \
- : [i] "+&r" (i), [v] "+Q" (v->counter) \
- : "r" (v) \
- : cl); \
- \
- return i; \
-}
-
-ATOMIC_FETCH_OP_SUB(_relaxed, )
-ATOMIC_FETCH_OP_SUB(_acquire, a, "memory")
-ATOMIC_FETCH_OP_SUB(_release, l, "memory")
-ATOMIC_FETCH_OP_SUB( , al, "memory")
-
-#undef ATOMIC_FETCH_OP_SUB
-
#define ATOMIC64_OP(op, asm_op) \
-static inline void __lse_atomic64_##op(s64 i, atomic64_t *v) \
+static __always_inline void \
+__lse_atomic64_##op(s64 i, atomic64_t *v) \
{ \
asm volatile( \
-" " #asm_op " %[i], %[v]\n" \
- : [i] "+r" (i), [v] "+Q" (v->counter) \
- : "r" (v)); \
+ __LSE_PREAMBLE \
+ " " #asm_op " %[i], %[v]\n" \
+ : [v] "+Q" (v->counter) \
+ : [i] "r" (i)); \
}
ATOMIC64_OP(andnot, stclr)
@@ -169,18 +133,28 @@ ATOMIC64_OP(or, stset)
ATOMIC64_OP(xor, steor)
ATOMIC64_OP(add, stadd)
+static __always_inline void __lse_atomic64_sub(s64 i, atomic64_t *v)
+{
+ __lse_atomic64_add(-i, v);
+}
+
#undef ATOMIC64_OP
#define ATOMIC64_FETCH_OP(name, mb, op, asm_op, cl...) \
-static inline long __lse_atomic64_fetch_##op##name(s64 i, atomic64_t *v)\
+static __always_inline long \
+__lse_atomic64_fetch_##op##name(s64 i, atomic64_t *v) \
{ \
+ s64 old; \
+ \
asm volatile( \
-" " #asm_op #mb " %[i], %[i], %[v]" \
- : [i] "+r" (i), [v] "+Q" (v->counter) \
- : "r" (v) \
+ __LSE_PREAMBLE \
+ " " #asm_op #mb " %[i], %[old], %[v]" \
+ : [v] "+Q" (v->counter), \
+ [old] "=r" (old) \
+ : [i] "r" (i) \
: cl); \
\
- return i; \
+ return old; \
}
#define ATOMIC64_FETCH_OPS(op, asm_op) \
@@ -197,48 +171,50 @@ ATOMIC64_FETCH_OPS(add, ldadd)
#undef ATOMIC64_FETCH_OP
#undef ATOMIC64_FETCH_OPS
-#define ATOMIC64_OP_ADD_RETURN(name, mb, cl...) \
-static inline long __lse_atomic64_add_return##name(s64 i, atomic64_t *v)\
+#define ATOMIC64_FETCH_OP_SUB(name) \
+static __always_inline long \
+__lse_atomic64_fetch_sub##name(s64 i, atomic64_t *v) \
{ \
- unsigned long tmp; \
- \
- asm volatile( \
- " ldadd" #mb " %[i], %x[tmp], %[v]\n" \
- " add %[i], %[i], %x[tmp]" \
- : [i] "+r" (i), [v] "+Q" (v->counter), [tmp] "=&r" (tmp) \
- : "r" (v) \
- : cl); \
+ return __lse_atomic64_fetch_add##name(-i, v); \
+}
+
+ATOMIC64_FETCH_OP_SUB(_relaxed)
+ATOMIC64_FETCH_OP_SUB(_acquire)
+ATOMIC64_FETCH_OP_SUB(_release)
+ATOMIC64_FETCH_OP_SUB( )
+
+#undef ATOMIC64_FETCH_OP_SUB
+
+#define ATOMIC64_OP_ADD_SUB_RETURN(name) \
+static __always_inline long \
+__lse_atomic64_add_return##name(s64 i, atomic64_t *v) \
+{ \
+ return __lse_atomic64_fetch_add##name(i, v) + i; \
+} \
\
- return i; \
+static __always_inline long \
+__lse_atomic64_sub_return##name(s64 i, atomic64_t *v) \
+{ \
+ return __lse_atomic64_fetch_sub##name(i, v) - i; \
}
-ATOMIC64_OP_ADD_RETURN(_relaxed, )
-ATOMIC64_OP_ADD_RETURN(_acquire, a, "memory")
-ATOMIC64_OP_ADD_RETURN(_release, l, "memory")
-ATOMIC64_OP_ADD_RETURN( , al, "memory")
+ATOMIC64_OP_ADD_SUB_RETURN(_relaxed)
+ATOMIC64_OP_ADD_SUB_RETURN(_acquire)
+ATOMIC64_OP_ADD_SUB_RETURN(_release)
+ATOMIC64_OP_ADD_SUB_RETURN( )
-#undef ATOMIC64_OP_ADD_RETURN
+#undef ATOMIC64_OP_ADD_SUB_RETURN
-static inline void __lse_atomic64_and(s64 i, atomic64_t *v)
+static __always_inline void __lse_atomic64_and(s64 i, atomic64_t *v)
{
- asm volatile(
- " mvn %[i], %[i]\n"
- " stclr %[i], %[v]"
- : [i] "+&r" (i), [v] "+Q" (v->counter)
- : "r" (v));
+ return __lse_atomic64_andnot(~i, v);
}
#define ATOMIC64_FETCH_OP_AND(name, mb, cl...) \
-static inline long __lse_atomic64_fetch_and##name(s64 i, atomic64_t *v) \
+static __always_inline long \
+__lse_atomic64_fetch_and##name(s64 i, atomic64_t *v) \
{ \
- asm volatile( \
- " mvn %[i], %[i]\n" \
- " ldclr" #mb " %[i], %[i], %[v]" \
- : [i] "+&r" (i), [v] "+Q" (v->counter) \
- : "r" (v) \
- : cl); \
- \
- return i; \
+ return __lse_atomic64_fetch_andnot##name(~i, v); \
}
ATOMIC64_FETCH_OP_AND(_relaxed, )
@@ -248,63 +224,12 @@ ATOMIC64_FETCH_OP_AND( , al, "memory")
#undef ATOMIC64_FETCH_OP_AND
-static inline void __lse_atomic64_sub(s64 i, atomic64_t *v)
-{
- asm volatile(
- " neg %[i], %[i]\n"
- " stadd %[i], %[v]"
- : [i] "+&r" (i), [v] "+Q" (v->counter)
- : "r" (v));
-}
-
-#define ATOMIC64_OP_SUB_RETURN(name, mb, cl...) \
-static inline long __lse_atomic64_sub_return##name(s64 i, atomic64_t *v) \
-{ \
- unsigned long tmp; \
- \
- asm volatile( \
- " neg %[i], %[i]\n" \
- " ldadd" #mb " %[i], %x[tmp], %[v]\n" \
- " add %[i], %[i], %x[tmp]" \
- : [i] "+&r" (i), [v] "+Q" (v->counter), [tmp] "=&r" (tmp) \
- : "r" (v) \
- : cl); \
- \
- return i; \
-}
-
-ATOMIC64_OP_SUB_RETURN(_relaxed, )
-ATOMIC64_OP_SUB_RETURN(_acquire, a, "memory")
-ATOMIC64_OP_SUB_RETURN(_release, l, "memory")
-ATOMIC64_OP_SUB_RETURN( , al, "memory")
-
-#undef ATOMIC64_OP_SUB_RETURN
-
-#define ATOMIC64_FETCH_OP_SUB(name, mb, cl...) \
-static inline long __lse_atomic64_fetch_sub##name(s64 i, atomic64_t *v) \
-{ \
- asm volatile( \
- " neg %[i], %[i]\n" \
- " ldadd" #mb " %[i], %[i], %[v]" \
- : [i] "+&r" (i), [v] "+Q" (v->counter) \
- : "r" (v) \
- : cl); \
- \
- return i; \
-}
-
-ATOMIC64_FETCH_OP_SUB(_relaxed, )
-ATOMIC64_FETCH_OP_SUB(_acquire, a, "memory")
-ATOMIC64_FETCH_OP_SUB(_release, l, "memory")
-ATOMIC64_FETCH_OP_SUB( , al, "memory")
-
-#undef ATOMIC64_FETCH_OP_SUB
-
-static inline s64 __lse_atomic64_dec_if_positive(atomic64_t *v)
+static __always_inline s64 __lse_atomic64_dec_if_positive(atomic64_t *v)
{
unsigned long tmp;
asm volatile(
+ __LSE_PREAMBLE
"1: ldr %x[tmp], %[v]\n"
" subs %[ret], %x[tmp], #1\n"
" b.lt 2f\n"
@@ -326,21 +251,15 @@ __lse__cmpxchg_case_##name##sz(volatile void *ptr, \
u##sz old, \
u##sz new) \
{ \
- register unsigned long x0 asm ("x0") = (unsigned long)ptr; \
- register u##sz x1 asm ("x1") = old; \
- register u##sz x2 asm ("x2") = new; \
- unsigned long tmp; \
- \
asm volatile( \
- " mov %" #w "[tmp], %" #w "[old]\n" \
- " cas" #mb #sfx "\t%" #w "[tmp], %" #w "[new], %[v]\n" \
- " mov %" #w "[ret], %" #w "[tmp]" \
- : [ret] "+r" (x0), [v] "+Q" (*(unsigned long *)ptr), \
- [tmp] "=&r" (tmp) \
- : [old] "r" (x1), [new] "r" (x2) \
+ __LSE_PREAMBLE \
+ " cas" #mb #sfx " %" #w "[old], %" #w "[new], %[v]\n" \
+ : [v] "+Q" (*(u##sz *)ptr), \
+ [old] "+r" (old) \
+ : [new] "rZ" (new) \
: cl); \
\
- return x0; \
+ return old; \
}
__CMPXCHG_CASE(w, b, , 8, )
@@ -362,39 +281,35 @@ __CMPXCHG_CASE(x, , mb_, 64, al, "memory")
#undef __CMPXCHG_CASE
-#define __CMPXCHG_DBL(name, mb, cl...) \
-static __always_inline long \
-__lse__cmpxchg_double##name(unsigned long old1, \
- unsigned long old2, \
- unsigned long new1, \
- unsigned long new2, \
- volatile void *ptr) \
+#define __CMPXCHG128(name, mb, cl...) \
+static __always_inline u128 \
+__lse__cmpxchg128##name(volatile u128 *ptr, u128 old, u128 new) \
{ \
- unsigned long oldval1 = old1; \
- unsigned long oldval2 = old2; \
- register unsigned long x0 asm ("x0") = old1; \
- register unsigned long x1 asm ("x1") = old2; \
- register unsigned long x2 asm ("x2") = new1; \
- register unsigned long x3 asm ("x3") = new2; \
+ union __u128_halves r, o = { .full = (old) }, \
+ n = { .full = (new) }; \
+ register unsigned long x0 asm ("x0") = o.low; \
+ register unsigned long x1 asm ("x1") = o.high; \
+ register unsigned long x2 asm ("x2") = n.low; \
+ register unsigned long x3 asm ("x3") = n.high; \
register unsigned long x4 asm ("x4") = (unsigned long)ptr; \
\
asm volatile( \
+ __LSE_PREAMBLE \
" casp" #mb "\t%[old1], %[old2], %[new1], %[new2], %[v]\n"\
- " eor %[old1], %[old1], %[oldval1]\n" \
- " eor %[old2], %[old2], %[oldval2]\n" \
- " orr %[old1], %[old1], %[old2]" \
: [old1] "+&r" (x0), [old2] "+&r" (x1), \
- [v] "+Q" (*(unsigned long *)ptr) \
+ [v] "+Q" (*(u128 *)ptr) \
: [new1] "r" (x2), [new2] "r" (x3), [ptr] "r" (x4), \
- [oldval1] "r" (oldval1), [oldval2] "r" (oldval2) \
+ [oldval1] "r" (o.low), [oldval2] "r" (o.high) \
: cl); \
\
- return x0; \
+ r.low = x0; r.high = x1; \
+ \
+ return r.full; \
}
-__CMPXCHG_DBL( , )
-__CMPXCHG_DBL(_mb, al, "memory")
+__CMPXCHG128( , )
+__CMPXCHG128(_mb, al, "memory")
-#undef __CMPXCHG_DBL
+#undef __CMPXCHG128
#endif /* __ASM_ATOMIC_LSE_H */
diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h
index 7d9cc5ec4971..cf2987464c18 100644
--- a/arch/arm64/include/asm/barrier.h
+++ b/arch/arm64/include/asm/barrier.h
@@ -11,42 +11,70 @@
#include <linux/kasan-checks.h>
+#include <asm/alternative-macros.h>
+
#define __nops(n) ".rept " #n "\nnop\n.endr\n"
#define nops(n) asm volatile(__nops(n))
#define sev() asm volatile("sev" : : : "memory")
#define wfe() asm volatile("wfe" : : : "memory")
+#define wfet(val) asm volatile("msr s0_3_c1_c0_0, %0" \
+ : : "r" (val) : "memory")
#define wfi() asm volatile("wfi" : : : "memory")
+#define wfit(val) asm volatile("msr s0_3_c1_c0_1, %0" \
+ : : "r" (val) : "memory")
#define isb() asm volatile("isb" : : : "memory")
#define dmb(opt) asm volatile("dmb " #opt : : : "memory")
#define dsb(opt) asm volatile("dsb " #opt : : : "memory")
#define psb_csync() asm volatile("hint #17" : : : "memory")
+#define __tsb_csync() asm volatile("hint #18" : : : "memory")
#define csdb() asm volatile("hint #20" : : : "memory")
-#define spec_bar() asm volatile(ALTERNATIVE("dsb nsh\nisb\n", \
- SB_BARRIER_INSN"nop\n", \
- ARM64_HAS_SB))
+/*
+ * Data Gathering Hint:
+ * This instruction prevents merging memory accesses with Normal-NC or
+ * Device-GRE attributes before the hint instruction with any memory accesses
+ * appearing after the hint instruction.
+ */
+#define dgh() asm volatile("hint #6" : : : "memory")
#ifdef CONFIG_ARM64_PSEUDO_NMI
#define pmr_sync() \
do { \
- extern struct static_key_false gic_pmr_sync; \
- \
- if (static_branch_unlikely(&gic_pmr_sync)) \
- dsb(sy); \
+ asm volatile( \
+ ALTERNATIVE_CB("dsb sy", \
+ ARM64_HAS_GIC_PRIO_RELAXED_SYNC, \
+ alt_cb_patch_nops) \
+ ); \
} while(0)
#else
#define pmr_sync() do {} while (0)
#endif
-#define mb() dsb(sy)
-#define rmb() dsb(ld)
-#define wmb() dsb(st)
-
-#define dma_rmb() dmb(oshld)
-#define dma_wmb() dmb(oshst)
+#define __mb() dsb(sy)
+#define __rmb() dsb(ld)
+#define __wmb() dsb(st)
+
+#define __dma_mb() dmb(osh)
+#define __dma_rmb() dmb(oshld)
+#define __dma_wmb() dmb(oshst)
+
+#define io_stop_wc() dgh()
+
+#define tsb_csync() \
+ do { \
+ /* \
+ * CPUs affected by Arm Erratum 2054223 or 2067961 needs \
+ * another TSB to ensure the trace is flushed. The barriers \
+ * don't have to be strictly back to back, as long as the \
+ * CPU is in trace prohibited state. \
+ */ \
+ if (cpus_have_final_cap(ARM64_WORKAROUND_TSB_FLUSH_FAILURE)) \
+ __tsb_csync(); \
+ __tsb_csync(); \
+ } while (0)
/*
* Generate a mask for array_index__nospec() that is ~0UL when 0 <= idx < sz
@@ -69,6 +97,25 @@ static inline unsigned long array_index_mask_nospec(unsigned long idx,
return mask;
}
+/*
+ * Ensure that reads of the counter are treated the same as memory reads
+ * for the purposes of ordering by subsequent memory barriers.
+ *
+ * This insanity brought to you by speculative system register reads,
+ * out-of-order memory accesses, sequence locks and Thomas Gleixner.
+ *
+ * https://lore.kernel.org/r/alpine.DEB.2.21.1902081950260.1662@nanos.tec.linutronix.de/
+ */
+#define arch_counter_enforce_ordering(val) do { \
+ u64 tmp, _val = (val); \
+ \
+ asm volatile( \
+ " eor %0, %1, %1\n" \
+ " add %0, sp, %0\n" \
+ " ldr xzr, [%0]" \
+ : "=r" (tmp) : "r" (_val)); \
+} while (0)
+
#define __smp_mb() dmb(ish)
#define __smp_rmb() dmb(ishld)
#define __smp_wmb() dmb(ishst)
@@ -76,33 +123,33 @@ static inline unsigned long array_index_mask_nospec(unsigned long idx,
#define __smp_store_release(p, v) \
do { \
typeof(p) __p = (p); \
- union { typeof(*p) __val; char __c[1]; } __u = \
- { .__val = (__force typeof(*p)) (v) }; \
+ union { __unqual_scalar_typeof(*p) __val; char __c[1]; } __u = \
+ { .__val = (__force __unqual_scalar_typeof(*p)) (v) }; \
compiletime_assert_atomic_type(*p); \
kasan_check_write(__p, sizeof(*p)); \
switch (sizeof(*p)) { \
case 1: \
asm volatile ("stlrb %w1, %0" \
: "=Q" (*__p) \
- : "r" (*(__u8 *)__u.__c) \
+ : "rZ" (*(__u8 *)__u.__c) \
: "memory"); \
break; \
case 2: \
asm volatile ("stlrh %w1, %0" \
: "=Q" (*__p) \
- : "r" (*(__u16 *)__u.__c) \
+ : "rZ" (*(__u16 *)__u.__c) \
: "memory"); \
break; \
case 4: \
asm volatile ("stlr %w1, %0" \
: "=Q" (*__p) \
- : "r" (*(__u32 *)__u.__c) \
+ : "rZ" (*(__u32 *)__u.__c) \
: "memory"); \
break; \
case 8: \
- asm volatile ("stlr %1, %0" \
+ asm volatile ("stlr %x1, %0" \
: "=Q" (*__p) \
- : "r" (*(__u64 *)__u.__c) \
+ : "rZ" (*(__u64 *)__u.__c) \
: "memory"); \
break; \
} \
@@ -110,7 +157,7 @@ do { \
#define __smp_load_acquire(p) \
({ \
- union { typeof(*p) __val; char __c[1]; } __u; \
+ union { __unqual_scalar_typeof(*p) __val; char __c[1]; } __u; \
typeof(p) __p = (p); \
compiletime_assert_atomic_type(*p); \
kasan_check_read(__p, sizeof(*p)); \
@@ -136,33 +183,33 @@ do { \
: "Q" (*__p) : "memory"); \
break; \
} \
- __u.__val; \
+ (typeof(*p))__u.__val; \
})
#define smp_cond_load_relaxed(ptr, cond_expr) \
({ \
typeof(ptr) __PTR = (ptr); \
- typeof(*ptr) VAL; \
+ __unqual_scalar_typeof(*ptr) VAL; \
for (;;) { \
VAL = READ_ONCE(*__PTR); \
if (cond_expr) \
break; \
__cmpwait_relaxed(__PTR, VAL); \
} \
- VAL; \
+ (typeof(*ptr))VAL; \
})
#define smp_cond_load_acquire(ptr, cond_expr) \
({ \
typeof(ptr) __PTR = (ptr); \
- typeof(*ptr) VAL; \
+ __unqual_scalar_typeof(*ptr) VAL; \
for (;;) { \
VAL = smp_load_acquire(__PTR); \
if (cond_expr) \
break; \
__cmpwait_relaxed(__PTR, VAL); \
} \
- VAL; \
+ (typeof(*ptr))VAL; \
})
#include <asm-generic/barrier.h>
diff --git a/arch/arm64/include/asm/bitops.h b/arch/arm64/include/asm/bitops.h
index 81a3e519b07d..9b3c787132d2 100644
--- a/arch/arm64/include/asm/bitops.h
+++ b/arch/arm64/include/asm/bitops.h
@@ -18,7 +18,6 @@
#include <asm-generic/bitops/ffz.h>
#include <asm-generic/bitops/fls64.h>
-#include <asm-generic/bitops/find.h>
#include <asm-generic/bitops/sched.h>
#include <asm-generic/bitops/hweight.h>
diff --git a/arch/arm64/include/asm/boot.h b/arch/arm64/include/asm/boot.h
index c7f67da13cd9..3e7943fd17a4 100644
--- a/arch/arm64/include/asm/boot.h
+++ b/arch/arm64/include/asm/boot.h
@@ -13,8 +13,7 @@
#define MAX_FDT_SIZE SZ_2M
/*
- * arm64 requires the kernel image to placed
- * TEXT_OFFSET bytes beyond a 2 MB aligned base
+ * arm64 requires the kernel image to placed at a 2 MB aligned base address
*/
#define MIN_KIMG_ALIGN SZ_2M
diff --git a/arch/arm64/include/asm/brk-imm.h b/arch/arm64/include/asm/brk-imm.h
index e3d47b52161d..1abdcd508a11 100644
--- a/arch/arm64/include/asm/brk-imm.h
+++ b/arch/arm64/include/asm/brk-imm.h
@@ -10,20 +10,31 @@
* #imm16 values used for BRK instruction generation
* 0x004: for installing kprobes
* 0x005: for installing uprobes
+ * 0x006: for kprobe software single-step
* Allowed values for kgdb are 0x400 - 0x7ff
* 0x100: for triggering a fault on purpose (reserved)
* 0x400: for dynamic BRK instruction
* 0x401: for compile time BRK instruction
* 0x800: kernel-mode BUG() and WARN() traps
* 0x9xx: tag-based KASAN trap (allowed values 0x900 - 0x9ff)
+ * 0x55xx: Undefined Behavior Sanitizer traps ('U' << 8)
+ * 0x8xxx: Control-Flow Integrity traps
*/
#define KPROBES_BRK_IMM 0x004
#define UPROBES_BRK_IMM 0x005
+#define KPROBES_BRK_SS_IMM 0x006
#define FAULT_BRK_IMM 0x100
#define KGDB_DYN_DBG_BRK_IMM 0x400
#define KGDB_COMPILED_DBG_BRK_IMM 0x401
#define BUG_BRK_IMM 0x800
#define KASAN_BRK_IMM 0x900
#define KASAN_BRK_MASK 0x0ff
+#define UBSAN_BRK_IMM 0x5500
+#define UBSAN_BRK_MASK 0x00ff
+
+#define CFI_BRK_IMM_TARGET GENMASK(4, 0)
+#define CFI_BRK_IMM_TYPE GENMASK(9, 5)
+#define CFI_BRK_IMM_BASE 0x8000
+#define CFI_BRK_IMM_MASK (CFI_BRK_IMM_TARGET | CFI_BRK_IMM_TYPE)
#endif
diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h
index 806e9dc2a852..06a4670bdb0b 100644
--- a/arch/arm64/include/asm/cache.h
+++ b/arch/arm64/include/asm/cache.h
@@ -5,32 +5,9 @@
#ifndef __ASM_CACHE_H
#define __ASM_CACHE_H
-#include <asm/cputype.h>
-
-#define CTR_L1IP_SHIFT 14
-#define CTR_L1IP_MASK 3
-#define CTR_DMINLINE_SHIFT 16
-#define CTR_IMINLINE_SHIFT 0
-#define CTR_IMINLINE_MASK 0xf
-#define CTR_ERG_SHIFT 20
-#define CTR_CWG_SHIFT 24
-#define CTR_CWG_MASK 15
-#define CTR_IDC_SHIFT 28
-#define CTR_DIC_SHIFT 29
-
-#define CTR_CACHE_MINLINE_MASK \
- (0xf << CTR_DMINLINE_SHIFT | CTR_IMINLINE_MASK << CTR_IMINLINE_SHIFT)
-
-#define CTR_L1IP(ctr) (((ctr) >> CTR_L1IP_SHIFT) & CTR_L1IP_MASK)
-
-#define ICACHE_POLICY_VPIPT 0
-#define ICACHE_POLICY_VIPT 2
-#define ICACHE_POLICY_PIPT 3
-
#define L1_CACHE_SHIFT (6)
#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT)
-
#define CLIDR_LOUU_SHIFT 27
#define CLIDR_LOC_SHIFT 24
#define CLIDR_LOUIS_SHIFT 21
@@ -39,6 +16,15 @@
#define CLIDR_LOC(clidr) (((clidr) >> CLIDR_LOC_SHIFT) & 0x7)
#define CLIDR_LOUIS(clidr) (((clidr) >> CLIDR_LOUIS_SHIFT) & 0x7)
+/* Ctypen, bits[3(n - 1) + 2 : 3(n - 1)], for n = 1 to 7 */
+#define CLIDR_CTYPE_SHIFT(level) (3 * (level - 1))
+#define CLIDR_CTYPE_MASK(level) (7 << CLIDR_CTYPE_SHIFT(level))
+#define CLIDR_CTYPE(clidr, level) \
+ (((clidr) & CLIDR_CTYPE_MASK(level)) >> CLIDR_CTYPE_SHIFT(level))
+
+/* Ttypen, bits [2(n - 1) + 34 : 2(n - 1) + 33], for n = 1 to 7 */
+#define CLIDR_TTYPE_SHIFT(level) (2 * ((level) - 1) + CLIDR_EL1_Ttypen_SHIFT)
+
/*
* Memory returned by kmalloc() may be used for DMA, so we must make
* sure that all such allocations are cache aligned. Otherwise,
@@ -47,17 +33,31 @@
* the CPU.
*/
#define ARCH_DMA_MINALIGN (128)
+#define ARCH_KMALLOC_MINALIGN (8)
+
+#ifndef __ASSEMBLY__
+
+#include <linux/bitops.h>
+#include <linux/kasan-enabled.h>
+
+#include <asm/cputype.h>
+#include <asm/mte-def.h>
+#include <asm/sysreg.h>
#ifdef CONFIG_KASAN_SW_TAGS
#define ARCH_SLAB_MINALIGN (1ULL << KASAN_SHADOW_SCALE_SHIFT)
+#elif defined(CONFIG_KASAN_HW_TAGS)
+static inline unsigned int arch_slab_minalign(void)
+{
+ return kasan_hw_tags_enabled() ? MTE_GRANULE_SIZE :
+ __alignof__(unsigned long long);
+}
+#define arch_slab_minalign() arch_slab_minalign()
#endif
-#ifndef __ASSEMBLY__
-
-#include <linux/bitops.h>
+#define CTR_L1IP(ctr) SYS_FIELD_GET(CTR_EL0, L1Ip, ctr)
#define ICACHEF_ALIASING 0
-#define ICACHEF_VPIPT 1
extern unsigned long __icache_flags;
/*
@@ -69,17 +69,12 @@ static inline int icache_is_aliasing(void)
return test_bit(ICACHEF_ALIASING, &__icache_flags);
}
-static inline int icache_is_vpipt(void)
-{
- return test_bit(ICACHEF_VPIPT, &__icache_flags);
-}
-
static inline u32 cache_type_cwg(void)
{
- return (read_cpuid_cachetype() >> CTR_CWG_SHIFT) & CTR_CWG_MASK;
+ return SYS_FIELD_GET(CTR_EL0, CWG, read_cpuid_cachetype());
}
-#define __read_mostly __section(.data..read_mostly)
+#define __read_mostly __section(".data..read_mostly")
static inline int cache_line_size_of_cpu(void)
{
@@ -90,6 +85,8 @@ static inline int cache_line_size_of_cpu(void)
int cache_line_size(void);
+#define dma_get_cache_alignment cache_line_size
+
/*
* Read the effective value of CTR_EL0.
*
@@ -110,12 +107,12 @@ static inline u32 __attribute_const__ read_cpuid_effective_cachetype(void)
{
u32 ctr = read_cpuid_cachetype();
- if (!(ctr & BIT(CTR_IDC_SHIFT))) {
+ if (!(ctr & BIT(CTR_EL0_IDC_SHIFT))) {
u64 clidr = read_sysreg(clidr_el1);
if (CLIDR_LOC(clidr) == 0 ||
(CLIDR_LOUIS(clidr) == 0 && CLIDR_LOUU(clidr) == 0))
- ctr |= BIT(CTR_IDC_SHIFT);
+ ctr |= BIT(CTR_EL0_IDC_SHIFT);
}
return ctr;
diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h
index 665c78e0665a..fefac75fa009 100644
--- a/arch/arm64/include/asm/cacheflush.h
+++ b/arch/arm64/include/asm/cacheflush.h
@@ -30,56 +30,64 @@
* the implementation assumes non-aliasing VIPT D-cache and (aliasing)
* VIPT I-cache.
*
- * flush_cache_mm(mm)
+ * All functions below apply to the interval [start, end)
+ * - start - virtual start address (inclusive)
+ * - end - virtual end address (exclusive)
*
- * Clean and invalidate all user space cache entries
- * before a change of page tables.
+ * caches_clean_inval_pou(start, end)
*
- * flush_icache_range(start, end)
+ * Ensure coherency between the I-cache and the D-cache region to
+ * the Point of Unification.
*
- * Ensure coherency between the I-cache and the D-cache in the
- * region described by start, end.
- * - start - virtual start address
- * - end - virtual end address
+ * caches_clean_inval_user_pou(start, end)
*
- * invalidate_icache_range(start, end)
+ * Ensure coherency between the I-cache and the D-cache region to
+ * the Point of Unification.
+ * Use only if the region might access user memory.
*
- * Invalidate the I-cache in the region described by start, end.
- * - start - virtual start address
- * - end - virtual end address
+ * icache_inval_pou(start, end)
*
- * __flush_cache_user_range(start, end)
+ * Invalidate I-cache region to the Point of Unification.
*
- * Ensure coherency between the I-cache and the D-cache in the
- * region described by start, end.
- * - start - virtual start address
- * - end - virtual end address
+ * dcache_clean_inval_poc(start, end)
*
- * __flush_dcache_area(kaddr, size)
+ * Clean and invalidate D-cache region to the Point of Coherency.
*
- * Ensure that the data held in page is written back.
- * - kaddr - page address
- * - size - region size
+ * dcache_inval_poc(start, end)
+ *
+ * Invalidate D-cache region to the Point of Coherency.
+ *
+ * dcache_clean_poc(start, end)
+ *
+ * Clean D-cache region to the Point of Coherency.
+ *
+ * dcache_clean_pop(start, end)
+ *
+ * Clean D-cache region to the Point of Persistence.
+ *
+ * dcache_clean_pou(start, end)
+ *
+ * Clean D-cache region to the Point of Unification.
*/
-extern void __flush_icache_range(unsigned long start, unsigned long end);
-extern int invalidate_icache_range(unsigned long start, unsigned long end);
-extern void __flush_dcache_area(void *addr, size_t len);
-extern void __inval_dcache_area(void *addr, size_t len);
-extern void __clean_dcache_area_poc(void *addr, size_t len);
-extern void __clean_dcache_area_pop(void *addr, size_t len);
-extern void __clean_dcache_area_pou(void *addr, size_t len);
-extern long __flush_cache_user_range(unsigned long start, unsigned long end);
-extern void sync_icache_aliases(void *kaddr, unsigned long len);
+extern void caches_clean_inval_pou(unsigned long start, unsigned long end);
+extern void icache_inval_pou(unsigned long start, unsigned long end);
+extern void dcache_clean_inval_poc(unsigned long start, unsigned long end);
+extern void dcache_inval_poc(unsigned long start, unsigned long end);
+extern void dcache_clean_poc(unsigned long start, unsigned long end);
+extern void dcache_clean_pop(unsigned long start, unsigned long end);
+extern void dcache_clean_pou(unsigned long start, unsigned long end);
+extern long caches_clean_inval_user_pou(unsigned long start, unsigned long end);
+extern void sync_icache_aliases(unsigned long start, unsigned long end);
static inline void flush_icache_range(unsigned long start, unsigned long end)
{
- __flush_icache_range(start, end);
+ caches_clean_inval_pou(start, end);
/*
* IPI all online CPUs so that they undergo a context synchronization
* event and are forced to refetch the new instructions.
*/
-#ifdef CONFIG_KGDB
+
/*
* KGDB performs cache maintenance with interrupts disabled, so we
* will deadlock trying to IPI the secondary CPUs. In theory, we can
@@ -89,32 +97,12 @@ static inline void flush_icache_range(unsigned long start, unsigned long end)
* the patching operation, so we don't need extra IPIs here anyway.
* In which case, add a KGDB-specific bodge and return early.
*/
- if (kgdb_connected && irqs_disabled())
+ if (in_dbg_master())
return;
-#endif
- kick_all_cpus_sync();
-}
-static inline void flush_cache_mm(struct mm_struct *mm)
-{
-}
-
-static inline void flush_cache_page(struct vm_area_struct *vma,
- unsigned long user_addr, unsigned long pfn)
-{
-}
-
-static inline void flush_cache_range(struct vm_area_struct *vma,
- unsigned long start, unsigned long end)
-{
+ kick_all_cpus_sync();
}
-
-/*
- * Cache maintenance functions used by the DMA API. No to be used directly.
- */
-extern void __dma_map_area(const void *, size_t, int);
-extern void __dma_unmap_area(const void *, size_t, int);
-extern void __dma_flush_area(const void *, size_t);
+#define flush_icache_range flush_icache_range
/*
* Copy user data from/to a page which is mapped into a different
@@ -123,15 +111,10 @@ extern void __dma_flush_area(const void *, size_t);
*/
extern void copy_to_user_page(struct vm_area_struct *, struct page *,
unsigned long, void *, const void *, unsigned long);
-#define copy_from_user_page(vma, page, vaddr, dst, src, len) \
- do { \
- memcpy(dst, src, len); \
- } while (0)
-
-#define flush_cache_dup_mm(mm) flush_cache_mm(mm)
+#define copy_to_user_page copy_to_user_page
/*
- * flush_dcache_page is used when the kernel has written to the page
+ * flush_dcache_folio is used when the kernel has written to the page
* cache page at virtual address page->virtual.
*
* If this page isn't mapped (ie, page_mapping == NULL), or it might
@@ -144,39 +127,18 @@ extern void copy_to_user_page(struct vm_area_struct *, struct page *,
*/
#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
extern void flush_dcache_page(struct page *);
+void flush_dcache_folio(struct folio *);
+#define flush_dcache_folio flush_dcache_folio
-static inline void __flush_icache_all(void)
+static __always_inline void icache_inval_all_pou(void)
{
- if (cpus_have_const_cap(ARM64_HAS_CACHE_DIC))
+ if (alternative_has_cap_unlikely(ARM64_HAS_CACHE_DIC))
return;
asm("ic ialluis");
dsb(ish);
}
-#define flush_dcache_mmap_lock(mapping) do { } while (0)
-#define flush_dcache_mmap_unlock(mapping) do { } while (0)
-
-/*
- * We don't appear to need to do anything here. In fact, if we did, we'd
- * duplicate cache flushing elsewhere performed by flush_dcache_page().
- */
-#define flush_icache_page(vma,page) do { } while (0)
-
-/*
- * Not required on AArch64 (PIPT or VIPT non-aliasing D-cache).
- */
-static inline void flush_cache_vmap(unsigned long start, unsigned long end)
-{
-}
-
-static inline void flush_cache_vunmap(unsigned long start, unsigned long end)
-{
-}
-
-int set_memory_valid(unsigned long addr, int numpages, int enable);
-
-int set_direct_map_invalid_noflush(struct page *page);
-int set_direct_map_default_noflush(struct page *page);
+#include <asm-generic/cacheflush.h>
-#endif
+#endif /* __ASM_CACHEFLUSH_H */
diff --git a/arch/arm64/include/asm/checksum.h b/arch/arm64/include/asm/checksum.h
index d064a50deb5f..dc52b733675d 100644
--- a/arch/arm64/include/asm/checksum.h
+++ b/arch/arm64/include/asm/checksum.h
@@ -5,7 +5,12 @@
#ifndef __ASM_CHECKSUM_H
#define __ASM_CHECKSUM_H
-#include <linux/types.h>
+#include <linux/in6.h>
+
+#define _HAVE_ARCH_IPV6_CSUM
+__sum16 csum_ipv6_magic(const struct in6_addr *saddr,
+ const struct in6_addr *daddr,
+ __u32 len, __u8 proto, __wsum sum);
static inline __sum16 csum_fold(__wsum csum)
{
@@ -19,22 +24,26 @@ static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl)
{
__uint128_t tmp;
u64 sum;
+ int n = ihl; /* we want it signed */
tmp = *(const __uint128_t *)iph;
iph += 16;
- ihl -= 4;
+ n -= 4;
tmp += ((tmp >> 64) | (tmp << 64));
sum = tmp >> 64;
do {
sum += *(const u32 *)iph;
iph += 4;
- } while (--ihl);
+ } while (--n > 0);
sum += ((sum >> 32) | (sum << 32));
- return csum_fold((__force u32)(sum >> 32));
+ return csum_fold((__force __wsum)(sum >> 32));
}
#define ip_fast_csum ip_fast_csum
+extern unsigned int do_csum(const unsigned char *buff, int len);
+#define do_csum do_csum
+
#include <asm-generic/checksum.h>
#endif /* __ASM_CHECKSUM_H */
diff --git a/arch/arm64/include/asm/clocksource.h b/arch/arm64/include/asm/clocksource.h
index 0ece64a26c8c..482185566b0c 100644
--- a/arch/arm64/include/asm/clocksource.h
+++ b/arch/arm64/include/asm/clocksource.h
@@ -2,8 +2,6 @@
#ifndef _ASM_CLOCKSOURCE_H
#define _ASM_CLOCKSOURCE_H
-struct arch_clocksource_data {
- bool vdso_direct; /* Usable for direct VDSO access? */
-};
+#include <asm/vdso/clocksource.h>
#endif
diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h
index f9bef42c1411..d7a540736741 100644
--- a/arch/arm64/include/asm/cmpxchg.h
+++ b/arch/arm64/include/asm/cmpxchg.h
@@ -62,9 +62,8 @@ __XCHG_CASE( , , mb_, 64, dmb ish, nop, , a, l, "memory")
#undef __XCHG_CASE
#define __XCHG_GEN(sfx) \
-static __always_inline unsigned long __xchg##sfx(unsigned long x, \
- volatile void *ptr, \
- int size) \
+static __always_inline unsigned long \
+__arch_xchg##sfx(unsigned long x, volatile void *ptr, int size) \
{ \
switch (size) { \
case 1: \
@@ -93,7 +92,7 @@ __XCHG_GEN(_mb)
({ \
__typeof__(*(ptr)) __ret; \
__ret = (__typeof__(*(ptr))) \
- __xchg##sfx((unsigned long)(x), (ptr), sizeof(*(ptr))); \
+ __arch_xchg##sfx((unsigned long)(x), (ptr), sizeof(*(ptr))); \
__ret; \
})
@@ -131,21 +130,18 @@ __CMPXCHG_CASE(mb_, 64)
#undef __CMPXCHG_CASE
-#define __CMPXCHG_DBL(name) \
-static inline long __cmpxchg_double##name(unsigned long old1, \
- unsigned long old2, \
- unsigned long new1, \
- unsigned long new2, \
- volatile void *ptr) \
+#define __CMPXCHG128(name) \
+static inline u128 __cmpxchg128##name(volatile u128 *ptr, \
+ u128 old, u128 new) \
{ \
- return __lse_ll_sc_body(_cmpxchg_double##name, \
- old1, old2, new1, new2, ptr); \
+ return __lse_ll_sc_body(_cmpxchg128##name, \
+ ptr, old, new); \
}
-__CMPXCHG_DBL( )
-__CMPXCHG_DBL(_mb)
+__CMPXCHG128( )
+__CMPXCHG128(_mb)
-#undef __CMPXCHG_DBL
+#undef __CMPXCHG128
#define __CMPXCHG_GEN(sfx) \
static __always_inline unsigned long __cmpxchg##sfx(volatile void *ptr, \
@@ -199,34 +195,17 @@ __CMPXCHG_GEN(_mb)
#define arch_cmpxchg64 arch_cmpxchg
#define arch_cmpxchg64_local arch_cmpxchg_local
-/* cmpxchg_double */
-#define system_has_cmpxchg_double() 1
-
-#define __cmpxchg_double_check(ptr1, ptr2) \
-({ \
- if (sizeof(*(ptr1)) != 8) \
- BUILD_BUG(); \
- VM_BUG_ON((unsigned long *)(ptr2) - (unsigned long *)(ptr1) != 1); \
-})
+/* cmpxchg128 */
+#define system_has_cmpxchg128() 1
-#define arch_cmpxchg_double(ptr1, ptr2, o1, o2, n1, n2) \
+#define arch_cmpxchg128(ptr, o, n) \
({ \
- int __ret; \
- __cmpxchg_double_check(ptr1, ptr2); \
- __ret = !__cmpxchg_double_mb((unsigned long)(o1), (unsigned long)(o2), \
- (unsigned long)(n1), (unsigned long)(n2), \
- ptr1); \
- __ret; \
+ __cmpxchg128_mb((ptr), (o), (n)); \
})
-#define arch_cmpxchg_double_local(ptr1, ptr2, o1, o2, n1, n2) \
+#define arch_cmpxchg128_local(ptr, o, n) \
({ \
- int __ret; \
- __cmpxchg_double_check(ptr1, ptr2); \
- __ret = !__cmpxchg_double((unsigned long)(o1), (unsigned long)(o2), \
- (unsigned long)(n1), (unsigned long)(n2), \
- ptr1); \
- __ret; \
+ __cmpxchg128((ptr), (o), (n)); \
})
#define __CMPWAIT_CASE(w, sfx, sz) \
@@ -243,7 +222,7 @@ static inline void __cmpwait_case_##sz(volatile void *ptr, \
" cbnz %" #w "[tmp], 1f\n" \
" wfe\n" \
"1:" \
- : [tmp] "=&r" (tmp), [v] "+Q" (*(unsigned long *)ptr) \
+ : [tmp] "=&r" (tmp), [v] "+Q" (*(u##sz *)ptr) \
: [val] "r" (val)); \
}
diff --git a/arch/arm64/include/asm/compat.h b/arch/arm64/include/asm/compat.h
index b0d53a265f1d..ae904a1ad529 100644
--- a/arch/arm64/include/asm/compat.h
+++ b/arch/arm64/include/asm/compat.h
@@ -4,6 +4,21 @@
*/
#ifndef __ASM_COMPAT_H
#define __ASM_COMPAT_H
+
+#define compat_mode_t compat_mode_t
+typedef u16 compat_mode_t;
+
+#define __compat_uid_t __compat_uid_t
+typedef u16 __compat_uid_t;
+typedef u16 __compat_gid_t;
+
+#define compat_ipc_pid_t compat_ipc_pid_t
+typedef u16 compat_ipc_pid_t;
+
+#define compat_statfs compat_statfs
+
+#include <asm-generic/compat.h>
+
#ifdef CONFIG_COMPAT
/*
@@ -13,29 +28,15 @@
#include <linux/sched.h>
#include <linux/sched/task_stack.h>
-#include <asm-generic/compat.h>
-
-#define COMPAT_USER_HZ 100
#ifdef __AARCH64EB__
#define COMPAT_UTS_MACHINE "armv8b\0\0"
#else
#define COMPAT_UTS_MACHINE "armv8l\0\0"
#endif
-typedef u16 __compat_uid_t;
-typedef u16 __compat_gid_t;
typedef u16 __compat_uid16_t;
typedef u16 __compat_gid16_t;
-typedef u32 __compat_uid32_t;
-typedef u32 __compat_gid32_t;
-typedef u16 compat_mode_t;
-typedef u32 compat_dev_t;
typedef s32 compat_nlink_t;
-typedef u16 compat_ipc_pid_t;
-typedef u32 compat_caddr_t;
-typedef __kernel_fsid_t compat_fsid_t;
-typedef s64 compat_s64;
-typedef u64 compat_u64;
struct compat_stat {
#ifdef __AARCH64EB__
@@ -67,26 +68,6 @@ struct compat_stat {
compat_ulong_t __unused4[2];
};
-struct compat_flock {
- short l_type;
- short l_whence;
- compat_off_t l_start;
- compat_off_t l_len;
- compat_pid_t l_pid;
-};
-
-#define F_GETLK64 12 /* using 'struct flock64' */
-#define F_SETLK64 13
-#define F_SETLKW64 14
-
-struct compat_flock64 {
- short l_type;
- short l_whence;
- compat_loff_t l_start;
- compat_loff_t l_len;
- compat_pid_t l_pid;
-};
-
struct compat_statfs {
int f_type;
int f_bsize;
@@ -102,100 +83,9 @@ struct compat_statfs {
int f_spare[4];
};
-#define COMPAT_RLIM_INFINITY 0xffffffff
-
-typedef u32 compat_old_sigset_t;
-
-#define _COMPAT_NSIG 64
-#define _COMPAT_NSIG_BPW 32
-
-typedef u32 compat_sigset_word;
-
-#define COMPAT_OFF_T_MAX 0x7fffffff
-
-/*
- * A pointer passed in from user mode. This should not
- * be used for syscall parameters, just declare them
- * as pointers because the syscall entry code will have
- * appropriately converted them already.
- */
-
-static inline void __user *compat_ptr(compat_uptr_t uptr)
-{
- return (void __user *)(unsigned long)uptr;
-}
-
-static inline compat_uptr_t ptr_to_compat(void __user *uptr)
-{
- return (u32)(unsigned long)uptr;
-}
-
#define compat_user_stack_pointer() (user_stack_pointer(task_pt_regs(current)))
#define COMPAT_MINSIGSTKSZ 2048
-static inline void __user *arch_compat_alloc_user_space(long len)
-{
- return (void __user *)compat_user_stack_pointer() - len;
-}
-
-struct compat_ipc64_perm {
- compat_key_t key;
- __compat_uid32_t uid;
- __compat_gid32_t gid;
- __compat_uid32_t cuid;
- __compat_gid32_t cgid;
- unsigned short mode;
- unsigned short __pad1;
- unsigned short seq;
- unsigned short __pad2;
- compat_ulong_t unused1;
- compat_ulong_t unused2;
-};
-
-struct compat_semid64_ds {
- struct compat_ipc64_perm sem_perm;
- compat_ulong_t sem_otime;
- compat_ulong_t sem_otime_high;
- compat_ulong_t sem_ctime;
- compat_ulong_t sem_ctime_high;
- compat_ulong_t sem_nsems;
- compat_ulong_t __unused3;
- compat_ulong_t __unused4;
-};
-
-struct compat_msqid64_ds {
- struct compat_ipc64_perm msg_perm;
- compat_ulong_t msg_stime;
- compat_ulong_t msg_stime_high;
- compat_ulong_t msg_rtime;
- compat_ulong_t msg_rtime_high;
- compat_ulong_t msg_ctime;
- compat_ulong_t msg_ctime_high;
- compat_ulong_t msg_cbytes;
- compat_ulong_t msg_qnum;
- compat_ulong_t msg_qbytes;
- compat_pid_t msg_lspid;
- compat_pid_t msg_lrpid;
- compat_ulong_t __unused4;
- compat_ulong_t __unused5;
-};
-
-struct compat_shmid64_ds {
- struct compat_ipc64_perm shm_perm;
- compat_size_t shm_segsz;
- compat_ulong_t shm_atime;
- compat_ulong_t shm_atime_high;
- compat_ulong_t shm_dtime;
- compat_ulong_t shm_dtime_high;
- compat_ulong_t shm_ctime;
- compat_ulong_t shm_ctime_high;
- compat_pid_t shm_cpid;
- compat_pid_t shm_lpid;
- compat_ulong_t shm_nattch;
- compat_ulong_t __unused4;
- compat_ulong_t __unused5;
-};
-
static inline int is_compat_task(void)
{
return test_thread_flag(TIF_32BIT);
@@ -206,6 +96,8 @@ static inline int is_compat_thread(struct thread_info *thread)
return test_ti_thread_flag(thread, TIF_32BIT);
}
+long compat_arm_syscall(struct pt_regs *regs, int scno);
+
#else /* !CONFIG_COMPAT */
static inline int is_compat_thread(struct thread_info *thread)
diff --git a/arch/arm64/include/asm/compiler.h b/arch/arm64/include/asm/compiler.h
new file mode 100644
index 000000000000..9bbd7b7097ff
--- /dev/null
+++ b/arch/arm64/include/asm/compiler.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_COMPILER_H
+#define __ASM_COMPILER_H
+
+#ifdef ARM64_ASM_ARCH
+#define ARM64_ASM_PREAMBLE ".arch " ARM64_ASM_ARCH "\n"
+#else
+#define ARM64_ASM_PREAMBLE
+#endif
+
+#define xpaclri(ptr) \
+({ \
+ register unsigned long __xpaclri_ptr asm("x30") = (ptr); \
+ \
+ asm( \
+ ARM64_ASM_PREAMBLE \
+ " hint #7\n" \
+ : "+r" (__xpaclri_ptr)); \
+ \
+ __xpaclri_ptr; \
+})
+
+#ifdef CONFIG_ARM64_PTR_AUTH_KERNEL
+#define ptrauth_strip_kernel_insn_pac(ptr) xpaclri(ptr)
+#else
+#define ptrauth_strip_kernel_insn_pac(ptr) (ptr)
+#endif
+
+#ifdef CONFIG_ARM64_PTR_AUTH
+#define ptrauth_strip_user_insn_pac(ptr) xpaclri(ptr)
+#else
+#define ptrauth_strip_user_insn_pac(ptr) (ptr)
+#endif
+
+#if !defined(CONFIG_BUILTIN_RETURN_ADDRESS_STRIPS_PAC)
+#define __builtin_return_address(val) \
+ (void *)(ptrauth_strip_kernel_insn_pac((unsigned long)__builtin_return_address(val)))
+#endif
+
+#endif /* __ASM_COMPILER_H */
diff --git a/arch/arm64/include/asm/cpu.h b/arch/arm64/include/asm/cpu.h
index d72d995b7e25..b1e43f56ee46 100644
--- a/arch/arm64/include/asm/cpu.h
+++ b/arch/arm64/include/asm/cpu.h
@@ -12,46 +12,56 @@
/*
* Records attributes of an individual CPU.
*/
-struct cpuinfo_arm64 {
- struct cpu cpu;
- struct kobject kobj;
- u32 reg_ctr;
- u32 reg_cntfrq;
- u32 reg_dczid;
- u32 reg_midr;
- u32 reg_revidr;
-
- u64 reg_id_aa64dfr0;
- u64 reg_id_aa64dfr1;
- u64 reg_id_aa64isar0;
- u64 reg_id_aa64isar1;
- u64 reg_id_aa64mmfr0;
- u64 reg_id_aa64mmfr1;
- u64 reg_id_aa64mmfr2;
- u64 reg_id_aa64pfr0;
- u64 reg_id_aa64pfr1;
- u64 reg_id_aa64zfr0;
-
+struct cpuinfo_32bit {
u32 reg_id_dfr0;
+ u32 reg_id_dfr1;
u32 reg_id_isar0;
u32 reg_id_isar1;
u32 reg_id_isar2;
u32 reg_id_isar3;
u32 reg_id_isar4;
u32 reg_id_isar5;
+ u32 reg_id_isar6;
u32 reg_id_mmfr0;
u32 reg_id_mmfr1;
u32 reg_id_mmfr2;
u32 reg_id_mmfr3;
+ u32 reg_id_mmfr4;
+ u32 reg_id_mmfr5;
u32 reg_id_pfr0;
u32 reg_id_pfr1;
+ u32 reg_id_pfr2;
u32 reg_mvfr0;
u32 reg_mvfr1;
u32 reg_mvfr2;
+};
+
+struct cpuinfo_arm64 {
+ struct kobject kobj;
+ u64 reg_ctr;
+ u64 reg_cntfrq;
+ u64 reg_dczid;
+ u64 reg_midr;
+ u64 reg_revidr;
+ u64 reg_gmid;
+ u64 reg_smidr;
+
+ u64 reg_id_aa64dfr0;
+ u64 reg_id_aa64dfr1;
+ u64 reg_id_aa64isar0;
+ u64 reg_id_aa64isar1;
+ u64 reg_id_aa64isar2;
+ u64 reg_id_aa64mmfr0;
+ u64 reg_id_aa64mmfr1;
+ u64 reg_id_aa64mmfr2;
+ u64 reg_id_aa64mmfr3;
+ u64 reg_id_aa64pfr0;
+ u64 reg_id_aa64pfr1;
+ u64 reg_id_aa64zfr0;
+ u64 reg_id_aa64smfr0;
- /* pseudo-ZCR for recording maximum ZCR_EL1 LEN value: */
- u64 reg_zcr;
+ struct cpuinfo_32bit aarch32;
};
DECLARE_PER_CPU(struct cpuinfo_arm64, cpu_data);
diff --git a/arch/arm64/include/asm/cpu_ops.h b/arch/arm64/include/asm/cpu_ops.h
index 86aabf1e0199..a444c8915e88 100644
--- a/arch/arm64/include/asm/cpu_ops.h
+++ b/arch/arm64/include/asm/cpu_ops.h
@@ -21,7 +21,7 @@
* mechanism for doing so, tests whether it is possible to boot
* the given CPU.
* @cpu_boot: Boots a cpu into the kernel.
- * @cpu_postboot: Optionally, perform any post-boot cleanup or necesary
+ * @cpu_postboot: Optionally, perform any post-boot cleanup or necessary
* synchronisation. Called from the cpu being booted.
* @cpu_can_disable: Determines whether a CPU can be disabled based on
* mechanism-specific information.
@@ -31,11 +31,6 @@
* @cpu_die: Makes a cpu leave the kernel. Must not fail. Called from the
* cpu being killed.
* @cpu_kill: Ensures a cpu has left the kernel. Called from another cpu.
- * @cpu_init_idle: Reads any data necessary to initialize CPU idle states for
- * a proposed logical id.
- * @cpu_suspend: Suspends a cpu and saves the required context. May fail owing
- * to wrong parameters or error conditions. Called from the
- * CPU being suspended. Must be called with IRQs disabled.
*/
struct cpu_operations {
const char *name;
@@ -49,18 +44,14 @@ struct cpu_operations {
void (*cpu_die)(unsigned int cpu);
int (*cpu_kill)(unsigned int cpu);
#endif
-#ifdef CONFIG_CPU_IDLE
- int (*cpu_init_idle)(unsigned int);
- int (*cpu_suspend)(unsigned long);
-#endif
};
-extern const struct cpu_operations *cpu_ops[NR_CPUS];
-int __init cpu_read_ops(int cpu);
+int __init init_cpu_ops(int cpu);
+extern const struct cpu_operations *get_cpu_ops(int cpu);
-static inline void __init cpu_read_bootcpu_ops(void)
+static inline void __init init_bootcpu_ops(void)
{
- cpu_read_ops(0);
+ init_cpu_ops(0);
}
#endif /* ifndef __ASM_CPU_OPS_H */
diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h
index b92683871119..270680e2b5c4 100644
--- a/arch/arm64/include/asm/cpucaps.h
+++ b/arch/arm64/include/asm/cpucaps.h
@@ -1,62 +1,67 @@
/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * arch/arm64/include/asm/cpucaps.h
- *
- * Copyright (C) 2016 ARM Ltd.
- */
+
#ifndef __ASM_CPUCAPS_H
#define __ASM_CPUCAPS_H
-#define ARM64_WORKAROUND_CLEAN_CACHE 0
-#define ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE 1
-#define ARM64_WORKAROUND_845719 2
-#define ARM64_HAS_SYSREG_GIC_CPUIF 3
-#define ARM64_HAS_PAN 4
-#define ARM64_HAS_LSE_ATOMICS 5
-#define ARM64_WORKAROUND_CAVIUM_23154 6
-#define ARM64_WORKAROUND_834220 7
-#define ARM64_HAS_NO_HW_PREFETCH 8
-#define ARM64_HAS_UAO 9
-#define ARM64_ALT_PAN_NOT_UAO 10
-#define ARM64_HAS_VIRT_HOST_EXTN 11
-#define ARM64_WORKAROUND_CAVIUM_27456 12
-#define ARM64_HAS_32BIT_EL0 13
-#define ARM64_HARDEN_EL2_VECTORS 14
-#define ARM64_HAS_CNP 15
-#define ARM64_HAS_NO_FPSIMD 16
-#define ARM64_WORKAROUND_REPEAT_TLBI 17
-#define ARM64_WORKAROUND_QCOM_FALKOR_E1003 18
-#define ARM64_WORKAROUND_858921 19
-#define ARM64_WORKAROUND_CAVIUM_30115 20
-#define ARM64_HAS_DCPOP 21
-#define ARM64_SVE 22
-#define ARM64_UNMAP_KERNEL_AT_EL0 23
-#define ARM64_HARDEN_BRANCH_PREDICTOR 24
-#define ARM64_HAS_RAS_EXTN 25
-#define ARM64_WORKAROUND_843419 26
-#define ARM64_HAS_CACHE_IDC 27
-#define ARM64_HAS_CACHE_DIC 28
-#define ARM64_HW_DBM 29
-#define ARM64_SSBD 30
-#define ARM64_MISMATCHED_CACHE_TYPE 31
-#define ARM64_HAS_STAGE2_FWB 32
-#define ARM64_HAS_CRC32 33
-#define ARM64_SSBS 34
-#define ARM64_WORKAROUND_1418040 35
-#define ARM64_HAS_SB 36
-#define ARM64_WORKAROUND_1165522 37
-#define ARM64_HAS_ADDRESS_AUTH_ARCH 38
-#define ARM64_HAS_ADDRESS_AUTH_IMP_DEF 39
-#define ARM64_HAS_GENERIC_AUTH_ARCH 40
-#define ARM64_HAS_GENERIC_AUTH_IMP_DEF 41
-#define ARM64_HAS_IRQ_PRIO_MASKING 42
-#define ARM64_HAS_DCPODP 43
-#define ARM64_WORKAROUND_1463225 44
-#define ARM64_WORKAROUND_CAVIUM_TX2_219_TVM 45
-#define ARM64_WORKAROUND_CAVIUM_TX2_219_PRFM 46
-#define ARM64_WORKAROUND_1542419 47
-#define ARM64_WORKAROUND_1319367 48
+#include <asm/cpucap-defs.h>
+
+#ifndef __ASSEMBLY__
+#include <linux/types.h>
+/*
+ * Check whether a cpucap is possible at compiletime.
+ */
+static __always_inline bool
+cpucap_is_possible(const unsigned int cap)
+{
+ compiletime_assert(__builtin_constant_p(cap),
+ "cap must be a constant");
+ compiletime_assert(cap < ARM64_NCAPS,
+ "cap must be < ARM64_NCAPS");
+
+ switch (cap) {
+ case ARM64_HAS_PAN:
+ return IS_ENABLED(CONFIG_ARM64_PAN);
+ case ARM64_HAS_EPAN:
+ return IS_ENABLED(CONFIG_ARM64_EPAN);
+ case ARM64_SVE:
+ return IS_ENABLED(CONFIG_ARM64_SVE);
+ case ARM64_SME:
+ case ARM64_SME2:
+ case ARM64_SME_FA64:
+ return IS_ENABLED(CONFIG_ARM64_SME);
+ case ARM64_HAS_CNP:
+ return IS_ENABLED(CONFIG_ARM64_CNP);
+ case ARM64_HAS_ADDRESS_AUTH:
+ case ARM64_HAS_GENERIC_AUTH:
+ return IS_ENABLED(CONFIG_ARM64_PTR_AUTH);
+ case ARM64_HAS_GIC_PRIO_MASKING:
+ return IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI);
+ case ARM64_MTE:
+ return IS_ENABLED(CONFIG_ARM64_MTE);
+ case ARM64_BTI:
+ return IS_ENABLED(CONFIG_ARM64_BTI);
+ case ARM64_HAS_TLB_RANGE:
+ return IS_ENABLED(CONFIG_ARM64_TLB_RANGE);
+ case ARM64_UNMAP_KERNEL_AT_EL0:
+ return IS_ENABLED(CONFIG_UNMAP_KERNEL_AT_EL0);
+ case ARM64_WORKAROUND_843419:
+ return IS_ENABLED(CONFIG_ARM64_ERRATUM_843419);
+ case ARM64_WORKAROUND_1742098:
+ return IS_ENABLED(CONFIG_ARM64_ERRATUM_1742098);
+ case ARM64_WORKAROUND_2645198:
+ return IS_ENABLED(CONFIG_ARM64_ERRATUM_2645198);
+ case ARM64_WORKAROUND_2658417:
+ return IS_ENABLED(CONFIG_ARM64_ERRATUM_2658417);
+ case ARM64_WORKAROUND_CAVIUM_23154:
+ return IS_ENABLED(CONFIG_CAVIUM_ERRATUM_23154);
+ case ARM64_WORKAROUND_NVIDIA_CARMEL_CNP:
+ return IS_ENABLED(CONFIG_NVIDIA_CARMEL_CNP_ERRATUM);
+ case ARM64_WORKAROUND_REPEAT_TLBI:
+ return IS_ENABLED(CONFIG_ARM64_WORKAROUND_REPEAT_TLBI);
+ }
-#define ARM64_NCAPS 49
+ return true;
+}
+#endif /* __ASSEMBLY__ */
#endif /* __ASM_CPUCAPS_H */
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index 4261d55e8506..21c824edf8ce 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -6,19 +6,24 @@
#ifndef __ASM_CPUFEATURE_H
#define __ASM_CPUFEATURE_H
+#include <asm/alternative-macros.h>
#include <asm/cpucaps.h>
#include <asm/cputype.h>
#include <asm/hwcap.h>
#include <asm/sysreg.h>
-#define MAX_CPU_FEATURES 64
+#define MAX_CPU_FEATURES 128
#define cpu_feature(x) KERNEL_HWCAP_ ## x
+#define ARM64_SW_FEATURE_OVERRIDE_NOKASLR 0
+#define ARM64_SW_FEATURE_OVERRIDE_HVHE 4
+
#ifndef __ASSEMBLY__
#include <linux/bug.h>
#include <linux/jump_label.h>
#include <linux/kernel.h>
+#include <linux/cpumask.h>
/*
* CPU feature register tracking
@@ -64,6 +69,28 @@ struct arm64_ftr_bits {
};
/*
+ * Describe the early feature override to the core override code:
+ *
+ * @val Values that are to be merged into the final
+ * sanitised value of the register. Only the bitfields
+ * set to 1 in @mask are valid
+ * @mask Mask of the features that are overridden by @val
+ *
+ * A @mask field set to full-1 indicates that the corresponding field
+ * in @val is a valid override.
+ *
+ * A @mask field set to full-0 with the corresponding @val field set
+ * to full-0 denotes that this field has no override
+ *
+ * A @mask field set to full-0 with the corresponding @val field set
+ * to full-1 denotes thath this field has an invalid override.
+ */
+struct arm64_ftr_override {
+ u64 val;
+ u64 mask;
+};
+
+/*
* @arm64_ftr_reg - Feature register
* @strict_mask Bits which should match across all CPUs for sanity.
* @sys_val Safe value across the CPUs (system view)
@@ -74,6 +101,7 @@ struct arm64_ftr_reg {
u64 user_mask;
u64 sys_val;
u64 user_val;
+ struct arm64_ftr_override *override;
const struct arm64_ftr_bits *ftr_bits;
};
@@ -83,7 +111,7 @@ extern struct arm64_ftr_reg arm64_ftr_reg_ctrel0;
* CPU capabilities:
*
* We use arm64_cpu_capabilities to represent system features, errata work
- * arounds (both used internally by kernel and tracked in cpu_hwcaps) and
+ * arounds (both used internally by kernel and tracked in system_cpucaps) and
* ELF HWCAPs (which are exposed to user).
*
* To support systems with heterogeneous CPUs, we need to make sure that we
@@ -208,6 +236,10 @@ extern struct arm64_ftr_reg arm64_ftr_reg_ctrel0;
* In some non-typical cases either both (a) and (b), or neither,
* should be permitted. This can be described by including neither
* or both flags in the capability's type field.
+ *
+ * In case of a conflict, the CPU is prevented from booting. If the
+ * ARM64_CPUCAP_PANIC_ON_CONFLICT flag is specified for the capability,
+ * then a kernel panic is triggered.
*/
@@ -240,6 +272,8 @@ extern struct arm64_ftr_reg arm64_ftr_reg_ctrel0;
#define ARM64_CPUCAP_PERMITTED_FOR_LATE_CPU ((u16)BIT(4))
/* Is it safe for a late CPU to miss this capability when system has it */
#define ARM64_CPUCAP_OPTIONAL_FOR_LATE_CPU ((u16)BIT(5))
+/* Panic when a conflict is detected */
+#define ARM64_CPUCAP_PANIC_ON_CONFLICT ((u16)BIT(6))
/*
* CPU errata workarounds that need to be enabled at boot time if one or
@@ -262,6 +296,8 @@ extern struct arm64_ftr_reg arm64_ftr_reg_ctrel0;
/*
* CPU feature detected at boot time based on feature of one or more CPUs.
* All possible conflicts for a late CPU are ignored.
+ * NOTE: this means that a late CPU with the feature will *not* cause the
+ * capability to be advertised by cpus_have_*cap()!
*/
#define ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE \
(ARM64_CPUCAP_SCOPE_LOCAL_CPU | \
@@ -279,9 +315,20 @@ extern struct arm64_ftr_reg arm64_ftr_reg_ctrel0;
/*
* CPU feature used early in the boot based on the boot CPU. All secondary
- * CPUs must match the state of the capability as detected by the boot CPU.
+ * CPUs must match the state of the capability as detected by the boot CPU. In
+ * case of a conflict, a kernel panic is triggered.
*/
-#define ARM64_CPUCAP_STRICT_BOOT_CPU_FEATURE ARM64_CPUCAP_SCOPE_BOOT_CPU
+#define ARM64_CPUCAP_STRICT_BOOT_CPU_FEATURE \
+ (ARM64_CPUCAP_SCOPE_BOOT_CPU | ARM64_CPUCAP_PANIC_ON_CONFLICT)
+
+/*
+ * CPU feature used early in the boot based on the boot CPU. It is safe for a
+ * late CPU to have this feature even though the boot CPU hasn't enabled it,
+ * although the feature will not be used by Linux in this case. If the boot CPU
+ * has enabled this feature already, then every late CPU must have it.
+ */
+#define ARM64_CPUCAP_BOOT_CPU_FEATURE \
+ (ARM64_CPUCAP_SCOPE_BOOT_CPU | ARM64_CPUCAP_PERMITTED_FOR_LATE_CPU)
struct arm64_cpu_capabilities {
const char *desc;
@@ -314,6 +361,7 @@ struct arm64_cpu_capabilities {
struct { /* Feature register checking */
u32 sys_reg;
u8 field_pos;
+ u8 field_width;
u8 min_field_value;
u8 hwcap_type;
bool sign;
@@ -333,6 +381,7 @@ struct arm64_cpu_capabilities {
* method is robust against being called multiple times.
*/
const struct arm64_cpu_capabilities *match_list;
+ const struct cpumask *cpus;
};
static inline int cpucap_default_scope(const struct arm64_cpu_capabilities *cap)
@@ -340,20 +389,8 @@ static inline int cpucap_default_scope(const struct arm64_cpu_capabilities *cap)
return cap->type & ARM64_CPUCAP_SCOPE_MASK;
}
-static inline bool
-cpucap_late_cpu_optional(const struct arm64_cpu_capabilities *cap)
-{
- return !!(cap->type & ARM64_CPUCAP_OPTIONAL_FOR_LATE_CPU);
-}
-
-static inline bool
-cpucap_late_cpu_permitted(const struct arm64_cpu_capabilities *cap)
-{
- return !!(cap->type & ARM64_CPUCAP_PERMITTED_FOR_LATE_CPU);
-}
-
/*
- * Generic helper for handling capabilties with multiple (match,enable) pairs
+ * Generic helper for handling capabilities with multiple (match,enable) pairs
* of call backs, sharing the same capability bit.
* Iterate over each entry to see if at least one matches.
*/
@@ -370,16 +407,29 @@ cpucap_multi_entry_cap_matches(const struct arm64_cpu_capabilities *entry,
return false;
}
-extern DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS);
-extern struct static_key_false cpu_hwcap_keys[ARM64_NCAPS];
-extern struct static_key_false arm64_const_caps_ready;
+static __always_inline bool is_vhe_hyp_code(void)
+{
+ /* Only defined for code run in VHE hyp context */
+ return __is_defined(__KVM_VHE_HYPERVISOR__);
+}
+
+static __always_inline bool is_nvhe_hyp_code(void)
+{
+ /* Only defined for code run in NVHE hyp context */
+ return __is_defined(__KVM_NVHE_HYPERVISOR__);
+}
+
+static __always_inline bool is_hyp_code(void)
+{
+ return is_vhe_hyp_code() || is_nvhe_hyp_code();
+}
+
+extern DECLARE_BITMAP(system_cpucaps, ARM64_NCAPS);
-/* ARM64 CAPS + alternative_cb */
-#define ARM64_NPATCHABLE (ARM64_NCAPS + 1)
-extern DECLARE_BITMAP(boot_capabilities, ARM64_NPATCHABLE);
+extern DECLARE_BITMAP(boot_cpucaps, ARM64_NCAPS);
#define for_each_available_cap(cap) \
- for_each_set_bit(cap, cpu_hwcaps, ARM64_NCAPS)
+ for_each_set_bit(cap, system_cpucaps, ARM64_NCAPS)
bool this_cpu_has_cap(unsigned int cap);
void cpu_set_feature(unsigned int num);
@@ -390,37 +440,62 @@ unsigned long cpu_get_elf_hwcap2(void);
#define cpu_set_named_feature(name) cpu_set_feature(cpu_feature(name))
#define cpu_have_named_feature(name) cpu_have_feature(cpu_feature(name))
-/* System capability check for constant caps */
-static __always_inline bool __cpus_have_const_cap(int num)
+static __always_inline bool boot_capabilities_finalized(void)
{
- if (num >= ARM64_NCAPS)
- return false;
- return static_branch_unlikely(&cpu_hwcap_keys[num]);
+ return alternative_has_cap_likely(ARM64_ALWAYS_BOOT);
}
-static inline bool cpus_have_cap(unsigned int num)
+static __always_inline bool system_capabilities_finalized(void)
{
+ return alternative_has_cap_likely(ARM64_ALWAYS_SYSTEM);
+}
+
+/*
+ * Test for a capability with a runtime check.
+ *
+ * Before the capability is detected, this returns false.
+ */
+static __always_inline bool cpus_have_cap(unsigned int num)
+{
+ if (__builtin_constant_p(num) && !cpucap_is_possible(num))
+ return false;
if (num >= ARM64_NCAPS)
return false;
- return test_bit(num, cpu_hwcaps);
+ return arch_test_bit(num, system_cpucaps);
}
-static __always_inline bool cpus_have_const_cap(int num)
+/*
+ * Test for a capability without a runtime check.
+ *
+ * Before boot capabilities are finalized, this will BUG().
+ * After boot capabilities are finalized, this is patched to avoid a runtime
+ * check.
+ *
+ * @num must be a compile-time constant.
+ */
+static __always_inline bool cpus_have_final_boot_cap(int num)
{
- if (static_branch_likely(&arm64_const_caps_ready))
- return __cpus_have_const_cap(num);
+ if (boot_capabilities_finalized())
+ return alternative_has_cap_unlikely(num);
else
- return cpus_have_cap(num);
+ BUG();
}
-static inline void cpus_set_cap(unsigned int num)
+/*
+ * Test for a capability without a runtime check.
+ *
+ * Before system capabilities are finalized, this will BUG().
+ * After system capabilities are finalized, this is patched to avoid a runtime
+ * check.
+ *
+ * @num must be a compile-time constant.
+ */
+static __always_inline bool cpus_have_final_cap(int num)
{
- if (num >= ARM64_NCAPS) {
- pr_warn("Attempt to set an illegal CPU capability (%d >= %d)\n",
- num, ARM64_NCAPS);
- } else {
- __set_bit(num, cpu_hwcaps);
- }
+ if (system_capabilities_finalized())
+ return alternative_has_cap_unlikely(num);
+ else
+ BUG();
}
static inline int __attribute_const__
@@ -435,18 +510,41 @@ cpuid_feature_extract_signed_field(u64 features, int field)
return cpuid_feature_extract_signed_field_width(features, field, 4);
}
-static inline unsigned int __attribute_const__
+static __always_inline unsigned int __attribute_const__
cpuid_feature_extract_unsigned_field_width(u64 features, int field, int width)
{
return (u64)(features << (64 - width - field)) >> (64 - width);
}
-static inline unsigned int __attribute_const__
+static __always_inline unsigned int __attribute_const__
cpuid_feature_extract_unsigned_field(u64 features, int field)
{
return cpuid_feature_extract_unsigned_field_width(features, field, 4);
}
+/*
+ * Fields that identify the version of the Performance Monitors Extension do
+ * not follow the standard ID scheme. See ARM DDI 0487E.a page D13-2825,
+ * "Alternative ID scheme used for the Performance Monitors Extension version".
+ */
+static inline u64 __attribute_const__
+cpuid_feature_cap_perfmon_field(u64 features, int field, u64 cap)
+{
+ u64 val = cpuid_feature_extract_unsigned_field(features, field);
+ u64 mask = GENMASK_ULL(field + 3, field);
+
+ /* Treat IMPLEMENTATION DEFINED functionality as unimplemented */
+ if (val == ID_AA64DFR0_EL1_PMUVer_IMP_DEF)
+ val = 0;
+
+ if (val > cap) {
+ features &= ~mask;
+ features |= (cap << field) & mask;
+ }
+
+ return features;
+}
+
static inline u64 arm64_ftr_mask(const struct arm64_ftr_bits *ftrp)
{
return (u64)GENMASK(ftrp->shift + ftrp->width - 1, ftrp->shift);
@@ -460,6 +558,8 @@ static inline u64 arm64_ftr_reg_user_value(const struct arm64_ftr_reg *reg)
static inline int __attribute_const__
cpuid_feature_extract_field_width(u64 features, int field, int width, bool sign)
{
+ if (WARN_ON_ONCE(!width))
+ width = 4;
return (sign) ?
cpuid_feature_extract_signed_field_width(features, field, width) :
cpuid_feature_extract_unsigned_field_width(features, field, width);
@@ -478,37 +578,97 @@ static inline s64 arm64_ftr_value(const struct arm64_ftr_bits *ftrp, u64 val)
static inline bool id_aa64mmfr0_mixed_endian_el0(u64 mmfr0)
{
- return cpuid_feature_extract_unsigned_field(mmfr0, ID_AA64MMFR0_BIGENDEL_SHIFT) == 0x1 ||
- cpuid_feature_extract_unsigned_field(mmfr0, ID_AA64MMFR0_BIGENDEL0_SHIFT) == 0x1;
+ return cpuid_feature_extract_unsigned_field(mmfr0, ID_AA64MMFR0_EL1_BIGEND_SHIFT) == 0x1 ||
+ cpuid_feature_extract_unsigned_field(mmfr0, ID_AA64MMFR0_EL1_BIGENDEL0_SHIFT) == 0x1;
+}
+
+static inline bool id_aa64pfr0_32bit_el1(u64 pfr0)
+{
+ u32 val = cpuid_feature_extract_unsigned_field(pfr0, ID_AA64PFR0_EL1_EL1_SHIFT);
+
+ return val == ID_AA64PFR0_EL1_ELx_32BIT_64BIT;
}
static inline bool id_aa64pfr0_32bit_el0(u64 pfr0)
{
- u32 val = cpuid_feature_extract_unsigned_field(pfr0, ID_AA64PFR0_EL0_SHIFT);
+ u32 val = cpuid_feature_extract_unsigned_field(pfr0, ID_AA64PFR0_EL1_EL0_SHIFT);
- return val == ID_AA64PFR0_EL0_32BIT_64BIT;
+ return val == ID_AA64PFR0_EL1_ELx_32BIT_64BIT;
}
static inline bool id_aa64pfr0_sve(u64 pfr0)
{
- u32 val = cpuid_feature_extract_unsigned_field(pfr0, ID_AA64PFR0_SVE_SHIFT);
+ u32 val = cpuid_feature_extract_unsigned_field(pfr0, ID_AA64PFR0_EL1_SVE_SHIFT);
return val > 0;
}
-void __init setup_cpu_features(void);
+static inline bool id_aa64pfr1_sme(u64 pfr1)
+{
+ u32 val = cpuid_feature_extract_unsigned_field(pfr1, ID_AA64PFR1_EL1_SME_SHIFT);
+
+ return val > 0;
+}
+
+static inline bool id_aa64pfr1_mte(u64 pfr1)
+{
+ u32 val = cpuid_feature_extract_unsigned_field(pfr1, ID_AA64PFR1_EL1_MTE_SHIFT);
+
+ return val >= ID_AA64PFR1_EL1_MTE_MTE2;
+}
+
+void __init setup_boot_cpu_features(void);
+void __init setup_system_features(void);
+void __init setup_user_features(void);
+
void check_local_cpu_capabilities(void);
u64 read_sanitised_ftr_reg(u32 id);
+u64 __read_sysreg_by_encoding(u32 sys_id);
static inline bool cpu_supports_mixed_endian_el0(void)
{
return id_aa64mmfr0_mixed_endian_el0(read_cpuid(ID_AA64MMFR0_EL1));
}
+
+static inline bool supports_csv2p3(int scope)
+{
+ u64 pfr0;
+ u8 csv2_val;
+
+ if (scope == SCOPE_LOCAL_CPU)
+ pfr0 = read_sysreg_s(SYS_ID_AA64PFR0_EL1);
+ else
+ pfr0 = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1);
+
+ csv2_val = cpuid_feature_extract_unsigned_field(pfr0,
+ ID_AA64PFR0_EL1_CSV2_SHIFT);
+ return csv2_val == 3;
+}
+
+static inline bool supports_clearbhb(int scope)
+{
+ u64 isar2;
+
+ if (scope == SCOPE_LOCAL_CPU)
+ isar2 = read_sysreg_s(SYS_ID_AA64ISAR2_EL1);
+ else
+ isar2 = read_sanitised_ftr_reg(SYS_ID_AA64ISAR2_EL1);
+
+ return cpuid_feature_extract_unsigned_field(isar2,
+ ID_AA64ISAR2_EL1_CLRBHB_SHIFT);
+}
+
+const struct cpumask *system_32bit_el0_cpumask(void);
+DECLARE_STATIC_KEY_FALSE(arm64_mismatched_32bit_el0);
+
static inline bool system_supports_32bit_el0(void)
{
- return cpus_have_const_cap(ARM64_HAS_32BIT_EL0);
+ u64 pfr0 = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1);
+
+ return static_branch_unlikely(&arm64_mismatched_32bit_el0) ||
+ id_aa64pfr0_32bit_el0(pfr0);
}
static inline bool system_supports_4kb_granule(void)
@@ -518,9 +678,10 @@ static inline bool system_supports_4kb_granule(void)
mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
val = cpuid_feature_extract_unsigned_field(mmfr0,
- ID_AA64MMFR0_TGRAN4_SHIFT);
+ ID_AA64MMFR0_EL1_TGRAN4_SHIFT);
- return val == ID_AA64MMFR0_TGRAN4_SUPPORTED;
+ return (val >= ID_AA64MMFR0_EL1_TGRAN4_SUPPORTED_MIN) &&
+ (val <= ID_AA64MMFR0_EL1_TGRAN4_SUPPORTED_MAX);
}
static inline bool system_supports_64kb_granule(void)
@@ -530,9 +691,10 @@ static inline bool system_supports_64kb_granule(void)
mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
val = cpuid_feature_extract_unsigned_field(mmfr0,
- ID_AA64MMFR0_TGRAN64_SHIFT);
+ ID_AA64MMFR0_EL1_TGRAN64_SHIFT);
- return val == ID_AA64MMFR0_TGRAN64_SUPPORTED;
+ return (val >= ID_AA64MMFR0_EL1_TGRAN64_SUPPORTED_MIN) &&
+ (val <= ID_AA64MMFR0_EL1_TGRAN64_SUPPORTED_MAX);
}
static inline bool system_supports_16kb_granule(void)
@@ -542,9 +704,10 @@ static inline bool system_supports_16kb_granule(void)
mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
val = cpuid_feature_extract_unsigned_field(mmfr0,
- ID_AA64MMFR0_TGRAN16_SHIFT);
+ ID_AA64MMFR0_EL1_TGRAN16_SHIFT);
- return val == ID_AA64MMFR0_TGRAN16_SUPPORTED;
+ return (val >= ID_AA64MMFR0_EL1_TGRAN16_SUPPORTED_MIN) &&
+ (val <= ID_AA64MMFR0_EL1_TGRAN16_SUPPORTED_MAX);
}
static inline bool system_supports_mixed_endian_el0(void)
@@ -559,52 +722,80 @@ static inline bool system_supports_mixed_endian(void)
mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
val = cpuid_feature_extract_unsigned_field(mmfr0,
- ID_AA64MMFR0_BIGENDEL_SHIFT);
+ ID_AA64MMFR0_EL1_BIGEND_SHIFT);
return val == 0x1;
}
-static inline bool system_supports_fpsimd(void)
+static __always_inline bool system_supports_fpsimd(void)
{
- return !cpus_have_const_cap(ARM64_HAS_NO_FPSIMD);
+ return alternative_has_cap_likely(ARM64_HAS_FPSIMD);
+}
+
+static inline bool system_uses_hw_pan(void)
+{
+ return alternative_has_cap_unlikely(ARM64_HAS_PAN);
}
static inline bool system_uses_ttbr0_pan(void)
{
return IS_ENABLED(CONFIG_ARM64_SW_TTBR0_PAN) &&
- !cpus_have_const_cap(ARM64_HAS_PAN);
+ !system_uses_hw_pan();
}
-static inline bool system_supports_sve(void)
+static __always_inline bool system_supports_sve(void)
{
- return IS_ENABLED(CONFIG_ARM64_SVE) &&
- cpus_have_const_cap(ARM64_SVE);
+ return alternative_has_cap_unlikely(ARM64_SVE);
}
-static inline bool system_supports_cnp(void)
+static __always_inline bool system_supports_sme(void)
{
- return IS_ENABLED(CONFIG_ARM64_CNP) &&
- cpus_have_const_cap(ARM64_HAS_CNP);
+ return alternative_has_cap_unlikely(ARM64_SME);
+}
+
+static __always_inline bool system_supports_sme2(void)
+{
+ return alternative_has_cap_unlikely(ARM64_SME2);
+}
+
+static __always_inline bool system_supports_fa64(void)
+{
+ return alternative_has_cap_unlikely(ARM64_SME_FA64);
+}
+
+static __always_inline bool system_supports_tpidr2(void)
+{
+ return system_supports_sme();
+}
+
+static __always_inline bool system_supports_cnp(void)
+{
+ return alternative_has_cap_unlikely(ARM64_HAS_CNP);
}
static inline bool system_supports_address_auth(void)
{
- return IS_ENABLED(CONFIG_ARM64_PTR_AUTH) &&
- (cpus_have_const_cap(ARM64_HAS_ADDRESS_AUTH_ARCH) ||
- cpus_have_const_cap(ARM64_HAS_ADDRESS_AUTH_IMP_DEF));
+ return cpus_have_final_boot_cap(ARM64_HAS_ADDRESS_AUTH);
}
static inline bool system_supports_generic_auth(void)
{
- return IS_ENABLED(CONFIG_ARM64_PTR_AUTH) &&
- (cpus_have_const_cap(ARM64_HAS_GENERIC_AUTH_ARCH) ||
- cpus_have_const_cap(ARM64_HAS_GENERIC_AUTH_IMP_DEF));
+ return alternative_has_cap_unlikely(ARM64_HAS_GENERIC_AUTH);
+}
+
+static inline bool system_has_full_ptr_auth(void)
+{
+ return system_supports_address_auth() && system_supports_generic_auth();
+}
+
+static __always_inline bool system_uses_irq_prio_masking(void)
+{
+ return alternative_has_cap_unlikely(ARM64_HAS_GIC_PRIO_MASKING);
}
-static inline bool system_uses_irq_prio_masking(void)
+static inline bool system_supports_mte(void)
{
- return IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI) &&
- cpus_have_const_cap(ARM64_HAS_IRQ_PRIO_MASKING);
+ return alternative_has_cap_unlikely(ARM64_MTE);
}
static inline bool system_has_prio_mask_debugging(void)
@@ -613,42 +804,40 @@ static inline bool system_has_prio_mask_debugging(void)
system_uses_irq_prio_masking();
}
-#define ARM64_BP_HARDEN_UNKNOWN -1
-#define ARM64_BP_HARDEN_WA_NEEDED 0
-#define ARM64_BP_HARDEN_NOT_REQUIRED 1
-
-int get_spectre_v2_workaround_state(void);
+static inline bool system_supports_bti(void)
+{
+ return cpus_have_final_cap(ARM64_BTI);
+}
-#define ARM64_SSBD_UNKNOWN -1
-#define ARM64_SSBD_FORCE_DISABLE 0
-#define ARM64_SSBD_KERNEL 1
-#define ARM64_SSBD_FORCE_ENABLE 2
-#define ARM64_SSBD_MITIGATED 3
+static inline bool system_supports_bti_kernel(void)
+{
+ return IS_ENABLED(CONFIG_ARM64_BTI_KERNEL) &&
+ cpus_have_final_boot_cap(ARM64_BTI);
+}
-static inline int arm64_get_ssbd_state(void)
+static inline bool system_supports_tlb_range(void)
{
-#ifdef CONFIG_ARM64_SSBD
- extern int ssbd_state;
- return ssbd_state;
-#else
- return ARM64_SSBD_UNKNOWN;
-#endif
+ return alternative_has_cap_unlikely(ARM64_HAS_TLB_RANGE);
}
-void arm64_set_ssbd_mitigation(bool state);
+static inline bool system_supports_lpa2(void)
+{
+ return cpus_have_final_cap(ARM64_HAS_LPA2);
+}
-extern int do_emulate_mrs(struct pt_regs *regs, u32 sys_reg, u32 rt);
+int do_emulate_mrs(struct pt_regs *regs, u32 sys_reg, u32 rt);
+bool try_emulate_mrs(struct pt_regs *regs, u32 isn);
static inline u32 id_aa64mmfr0_parange_to_phys_shift(int parange)
{
switch (parange) {
- case 0: return 32;
- case 1: return 36;
- case 2: return 40;
- case 3: return 42;
- case 4: return 44;
- case 5: return 48;
- case 6: return 52;
+ case ID_AA64MMFR0_EL1_PARANGE_32: return 32;
+ case ID_AA64MMFR0_EL1_PARANGE_36: return 36;
+ case ID_AA64MMFR0_EL1_PARANGE_40: return 40;
+ case ID_AA64MMFR0_EL1_PARANGE_42: return 42;
+ case ID_AA64MMFR0_EL1_PARANGE_44: return 44;
+ case ID_AA64MMFR0_EL1_PARANGE_48: return 48;
+ case ID_AA64MMFR0_EL1_PARANGE_52: return 52;
/*
* A future PE could use a value unknown to the kernel.
* However, by the "D10.1.4 Principles of the ID scheme
@@ -668,11 +857,67 @@ static inline bool cpu_has_hw_af(void)
if (!IS_ENABLED(CONFIG_ARM64_HW_AFDBM))
return false;
- mmfr1 = read_cpuid(ID_AA64MMFR1_EL1);
+ /*
+ * Use cached version to avoid emulated msr operation on KVM
+ * guests.
+ */
+ mmfr1 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1);
return cpuid_feature_extract_unsigned_field(mmfr1,
- ID_AA64MMFR1_HADBS_SHIFT);
+ ID_AA64MMFR1_EL1_HAFDBS_SHIFT);
}
+static inline bool cpu_has_pan(void)
+{
+ u64 mmfr1 = read_cpuid(ID_AA64MMFR1_EL1);
+ return cpuid_feature_extract_unsigned_field(mmfr1,
+ ID_AA64MMFR1_EL1_PAN_SHIFT);
+}
+
+#ifdef CONFIG_ARM64_AMU_EXTN
+/* Check whether the cpu supports the Activity Monitors Unit (AMU) */
+extern bool cpu_has_amu_feat(int cpu);
+#else
+static inline bool cpu_has_amu_feat(int cpu)
+{
+ return false;
+}
+#endif
+
+/* Get a cpu that supports the Activity Monitors Unit (AMU) */
+extern int get_cpu_with_amu_feat(void);
+
+static inline unsigned int get_vmid_bits(u64 mmfr1)
+{
+ int vmid_bits;
+
+ vmid_bits = cpuid_feature_extract_unsigned_field(mmfr1,
+ ID_AA64MMFR1_EL1_VMIDBits_SHIFT);
+ if (vmid_bits == ID_AA64MMFR1_EL1_VMIDBits_16)
+ return 16;
+
+ /*
+ * Return the default here even if any reserved
+ * value is fetched from the system register.
+ */
+ return 8;
+}
+
+s64 arm64_ftr_safe_value(const struct arm64_ftr_bits *ftrp, s64 new, s64 cur);
+struct arm64_ftr_reg *get_arm64_ftr_reg(u32 sys_id);
+
+extern struct arm64_ftr_override id_aa64mmfr1_override;
+extern struct arm64_ftr_override id_aa64pfr0_override;
+extern struct arm64_ftr_override id_aa64pfr1_override;
+extern struct arm64_ftr_override id_aa64zfr0_override;
+extern struct arm64_ftr_override id_aa64smfr0_override;
+extern struct arm64_ftr_override id_aa64isar1_override;
+extern struct arm64_ftr_override id_aa64isar2_override;
+
+extern struct arm64_ftr_override arm64_sw_feature_override;
+
+u32 get_kvm_ipa_limit(void);
+void dump_cpu_features(void);
+
#endif /* __ASSEMBLY__ */
#endif
diff --git a/arch/arm64/include/asm/cpuidle.h b/arch/arm64/include/asm/cpuidle.h
index 3c5ddb429ea2..2047713e097d 100644
--- a/arch/arm64/include/asm/cpuidle.h
+++ b/arch/arm64/include/asm/cpuidle.h
@@ -4,18 +4,38 @@
#include <asm/proc-fns.h>
-#ifdef CONFIG_CPU_IDLE
-extern int arm_cpuidle_init(unsigned int cpu);
-extern int arm_cpuidle_suspend(int index);
+#ifdef CONFIG_ARM64_PSEUDO_NMI
+#include <asm/arch_gicv3.h>
+
+struct arm_cpuidle_irq_context {
+ unsigned long pmr;
+ unsigned long daif_bits;
+};
+
+#define arm_cpuidle_save_irq_context(__c) \
+ do { \
+ struct arm_cpuidle_irq_context *c = __c; \
+ if (system_uses_irq_prio_masking()) { \
+ c->daif_bits = read_sysreg(daif); \
+ write_sysreg(c->daif_bits | PSR_I_BIT | PSR_F_BIT, \
+ daif); \
+ c->pmr = gic_read_pmr(); \
+ gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET); \
+ } \
+ } while (0)
+
+#define arm_cpuidle_restore_irq_context(__c) \
+ do { \
+ struct arm_cpuidle_irq_context *c = __c; \
+ if (system_uses_irq_prio_masking()) { \
+ gic_write_pmr(c->pmr); \
+ write_sysreg(c->daif_bits, daif); \
+ } \
+ } while (0)
#else
-static inline int arm_cpuidle_init(unsigned int cpu)
-{
- return -EOPNOTSUPP;
-}
+struct arm_cpuidle_irq_context { };
-static inline int arm_cpuidle_suspend(int index)
-{
- return -EOPNOTSUPP;
-}
+#define arm_cpuidle_save_irq_context(c) (void)c
+#define arm_cpuidle_restore_irq_context(c) (void)c
#endif
#endif
diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
index aca07c2f6e6e..7c7493cb571f 100644
--- a/arch/arm64/include/asm/cputype.h
+++ b/arch/arm64/include/asm/cputype.h
@@ -36,12 +36,12 @@
#define MIDR_VARIANT(midr) \
(((midr) & MIDR_VARIANT_MASK) >> MIDR_VARIANT_SHIFT)
#define MIDR_IMPLEMENTOR_SHIFT 24
-#define MIDR_IMPLEMENTOR_MASK (0xff << MIDR_IMPLEMENTOR_SHIFT)
+#define MIDR_IMPLEMENTOR_MASK (0xffU << MIDR_IMPLEMENTOR_SHIFT)
#define MIDR_IMPLEMENTOR(midr) \
(((midr) & MIDR_IMPLEMENTOR_MASK) >> MIDR_IMPLEMENTOR_SHIFT)
#define MIDR_CPU_MODEL(imp, partnum) \
- (((imp) << MIDR_IMPLEMENTOR_SHIFT) | \
+ ((_AT(u32, imp) << MIDR_IMPLEMENTOR_SHIFT) | \
(0xf << MIDR_ARCHITECTURE_SHIFT) | \
((partnum) << MIDR_PARTNUM_SHIFT))
@@ -59,6 +59,8 @@
#define ARM_CPU_IMP_NVIDIA 0x4E
#define ARM_CPU_IMP_FUJITSU 0x46
#define ARM_CPU_IMP_HISI 0x48
+#define ARM_CPU_IMP_APPLE 0x61
+#define ARM_CPU_IMP_AMPERE 0xC0
#define ARM_CPU_PART_AEM_V8 0xD0F
#define ARM_CPU_PART_FOUNDATION 0xD00
@@ -71,13 +73,33 @@
#define ARM_CPU_PART_CORTEX_A55 0xD05
#define ARM_CPU_PART_CORTEX_A76 0xD0B
#define ARM_CPU_PART_NEOVERSE_N1 0xD0C
-
-#define APM_CPU_PART_POTENZA 0x000
+#define ARM_CPU_PART_CORTEX_A77 0xD0D
+#define ARM_CPU_PART_NEOVERSE_V1 0xD40
+#define ARM_CPU_PART_CORTEX_A78 0xD41
+#define ARM_CPU_PART_CORTEX_A78AE 0xD42
+#define ARM_CPU_PART_CORTEX_X1 0xD44
+#define ARM_CPU_PART_CORTEX_A510 0xD46
+#define ARM_CPU_PART_CORTEX_A520 0xD80
+#define ARM_CPU_PART_CORTEX_A710 0xD47
+#define ARM_CPU_PART_CORTEX_A715 0xD4D
+#define ARM_CPU_PART_CORTEX_X2 0xD48
+#define ARM_CPU_PART_NEOVERSE_N2 0xD49
+#define ARM_CPU_PART_CORTEX_A78C 0xD4B
+
+#define APM_CPU_PART_XGENE 0x000
+#define APM_CPU_VAR_POTENZA 0x00
#define CAVIUM_CPU_PART_THUNDERX 0x0A1
#define CAVIUM_CPU_PART_THUNDERX_81XX 0x0A2
#define CAVIUM_CPU_PART_THUNDERX_83XX 0x0A3
#define CAVIUM_CPU_PART_THUNDERX2 0x0AF
+/* OcteonTx2 series */
+#define CAVIUM_CPU_PART_OCTX2_98XX 0x0B1
+#define CAVIUM_CPU_PART_OCTX2_96XX 0x0B2
+#define CAVIUM_CPU_PART_OCTX2_95XX 0x0B3
+#define CAVIUM_CPU_PART_OCTX2_95XXN 0x0B4
+#define CAVIUM_CPU_PART_OCTX2_95XXMM 0x0B5
+#define CAVIUM_CPU_PART_OCTX2_95XXO 0x0B6
#define BRCM_CPU_PART_BRAHMA_B53 0x100
#define BRCM_CPU_PART_VULCAN 0x516
@@ -85,6 +107,11 @@
#define QCOM_CPU_PART_FALKOR_V1 0x800
#define QCOM_CPU_PART_FALKOR 0xC00
#define QCOM_CPU_PART_KRYO 0x200
+#define QCOM_CPU_PART_KRYO_2XX_GOLD 0x800
+#define QCOM_CPU_PART_KRYO_2XX_SILVER 0x801
+#define QCOM_CPU_PART_KRYO_3XX_SILVER 0x803
+#define QCOM_CPU_PART_KRYO_4XX_GOLD 0x804
+#define QCOM_CPU_PART_KRYO_4XX_SILVER 0x805
#define NVIDIA_CPU_PART_DENVER 0x003
#define NVIDIA_CPU_PART_CARMEL 0x004
@@ -93,6 +120,21 @@
#define HISI_CPU_PART_TSV110 0xD01
+#define APPLE_CPU_PART_M1_ICESTORM 0x022
+#define APPLE_CPU_PART_M1_FIRESTORM 0x023
+#define APPLE_CPU_PART_M1_ICESTORM_PRO 0x024
+#define APPLE_CPU_PART_M1_FIRESTORM_PRO 0x025
+#define APPLE_CPU_PART_M1_ICESTORM_MAX 0x028
+#define APPLE_CPU_PART_M1_FIRESTORM_MAX 0x029
+#define APPLE_CPU_PART_M2_BLIZZARD 0x032
+#define APPLE_CPU_PART_M2_AVALANCHE 0x033
+#define APPLE_CPU_PART_M2_BLIZZARD_PRO 0x034
+#define APPLE_CPU_PART_M2_AVALANCHE_PRO 0x035
+#define APPLE_CPU_PART_M2_BLIZZARD_MAX 0x038
+#define APPLE_CPU_PART_M2_AVALANCHE_MAX 0x039
+
+#define AMPERE_CPU_PART_AMPERE1 0xAC3
+
#define MIDR_CORTEX_A53 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53)
#define MIDR_CORTEX_A57 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57)
#define MIDR_CORTEX_A72 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A72)
@@ -102,19 +144,55 @@
#define MIDR_CORTEX_A55 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A55)
#define MIDR_CORTEX_A76 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A76)
#define MIDR_NEOVERSE_N1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_N1)
+#define MIDR_CORTEX_A77 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A77)
+#define MIDR_NEOVERSE_V1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_V1)
+#define MIDR_CORTEX_A78 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78)
+#define MIDR_CORTEX_A78AE MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78AE)
+#define MIDR_CORTEX_X1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X1)
+#define MIDR_CORTEX_A510 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A510)
+#define MIDR_CORTEX_A520 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A520)
+#define MIDR_CORTEX_A710 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A710)
+#define MIDR_CORTEX_A715 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A715)
+#define MIDR_CORTEX_X2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X2)
+#define MIDR_NEOVERSE_N2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_N2)
+#define MIDR_CORTEX_A78C MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78C)
#define MIDR_THUNDERX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX)
#define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX)
#define MIDR_THUNDERX_83XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_83XX)
+#define MIDR_OCTX2_98XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_OCTX2_98XX)
+#define MIDR_OCTX2_96XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_OCTX2_96XX)
+#define MIDR_OCTX2_95XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_OCTX2_95XX)
+#define MIDR_OCTX2_95XXN MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_OCTX2_95XXN)
+#define MIDR_OCTX2_95XXMM MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_OCTX2_95XXMM)
+#define MIDR_OCTX2_95XXO MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_OCTX2_95XXO)
#define MIDR_CAVIUM_THUNDERX2 MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX2)
#define MIDR_BRAHMA_B53 MIDR_CPU_MODEL(ARM_CPU_IMP_BRCM, BRCM_CPU_PART_BRAHMA_B53)
#define MIDR_BRCM_VULCAN MIDR_CPU_MODEL(ARM_CPU_IMP_BRCM, BRCM_CPU_PART_VULCAN)
#define MIDR_QCOM_FALKOR_V1 MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_FALKOR_V1)
#define MIDR_QCOM_FALKOR MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_FALKOR)
#define MIDR_QCOM_KRYO MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO)
+#define MIDR_QCOM_KRYO_2XX_GOLD MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_2XX_GOLD)
+#define MIDR_QCOM_KRYO_2XX_SILVER MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_2XX_SILVER)
+#define MIDR_QCOM_KRYO_3XX_SILVER MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_3XX_SILVER)
+#define MIDR_QCOM_KRYO_4XX_GOLD MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_4XX_GOLD)
+#define MIDR_QCOM_KRYO_4XX_SILVER MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_4XX_SILVER)
#define MIDR_NVIDIA_DENVER MIDR_CPU_MODEL(ARM_CPU_IMP_NVIDIA, NVIDIA_CPU_PART_DENVER)
#define MIDR_NVIDIA_CARMEL MIDR_CPU_MODEL(ARM_CPU_IMP_NVIDIA, NVIDIA_CPU_PART_CARMEL)
#define MIDR_FUJITSU_A64FX MIDR_CPU_MODEL(ARM_CPU_IMP_FUJITSU, FUJITSU_CPU_PART_A64FX)
#define MIDR_HISI_TSV110 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_TSV110)
+#define MIDR_APPLE_M1_ICESTORM MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM)
+#define MIDR_APPLE_M1_FIRESTORM MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM)
+#define MIDR_APPLE_M1_ICESTORM_PRO MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM_PRO)
+#define MIDR_APPLE_M1_FIRESTORM_PRO MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM_PRO)
+#define MIDR_APPLE_M1_ICESTORM_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM_MAX)
+#define MIDR_APPLE_M1_FIRESTORM_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM_MAX)
+#define MIDR_APPLE_M2_BLIZZARD MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_BLIZZARD)
+#define MIDR_APPLE_M2_AVALANCHE MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_AVALANCHE)
+#define MIDR_APPLE_M2_BLIZZARD_PRO MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_BLIZZARD_PRO)
+#define MIDR_APPLE_M2_AVALANCHE_PRO MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_AVALANCHE_PRO)
+#define MIDR_APPLE_M2_BLIZZARD_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_BLIZZARD_MAX)
+#define MIDR_APPLE_M2_AVALANCHE_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_AVALANCHE_MAX)
+#define MIDR_AMPERE1 MIDR_CPU_MODEL(ARM_CPU_IMP_AMPERE, AMPERE_CPU_PART_AMPERE1)
/* Fujitsu Erratum 010001 affects A64FX 1.0 and 1.1, (v0r0 and v1r0) */
#define MIDR_FUJITSU_ERRATUM_010001 MIDR_FUJITSU_A64FX
diff --git a/arch/arm64/include/asm/crash_core.h b/arch/arm64/include/asm/crash_core.h
new file mode 100644
index 000000000000..9f5c8d339f44
--- /dev/null
+++ b/arch/arm64/include/asm/crash_core.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef _ARM64_CRASH_CORE_H
+#define _ARM64_CRASH_CORE_H
+
+/* Current arm64 boot protocol requires 2MB alignment */
+#define CRASH_ALIGN SZ_2M
+
+#define CRASH_ADDR_LOW_MAX arm64_dma_phys_limit
+#define CRASH_ADDR_HIGH_MAX (PHYS_MASK + 1)
+#endif
diff --git a/arch/arm64/include/asm/daifflags.h b/arch/arm64/include/asm/daifflags.h
index 72acd2db167f..55f57dfa8e2f 100644
--- a/arch/arm64/include/asm/daifflags.h
+++ b/arch/arm64/include/asm/daifflags.h
@@ -13,8 +13,8 @@
#include <asm/ptrace.h>
#define DAIF_PROCCTX 0
-#define DAIF_PROCCTX_NOIRQ PSR_I_BIT
-#define DAIF_ERRCTX (PSR_I_BIT | PSR_A_BIT)
+#define DAIF_PROCCTX_NOIRQ (PSR_I_BIT | PSR_F_BIT)
+#define DAIF_ERRCTX (PSR_A_BIT | PSR_I_BIT | PSR_F_BIT)
#define DAIF_MASK (PSR_D_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT)
@@ -38,7 +38,7 @@ static inline void local_daif_mask(void)
trace_hardirqs_off();
}
-static inline unsigned long local_daif_save(void)
+static inline unsigned long local_daif_save_flags(void)
{
unsigned long flags;
@@ -47,9 +47,18 @@ static inline unsigned long local_daif_save(void)
if (system_uses_irq_prio_masking()) {
/* If IRQs are masked with PMR, reflect it in the flags */
if (read_sysreg_s(SYS_ICC_PMR_EL1) != GIC_PRIO_IRQON)
- flags |= PSR_I_BIT;
+ flags |= PSR_I_BIT | PSR_F_BIT;
}
+ return flags;
+}
+
+static inline unsigned long local_daif_save(void)
+{
+ unsigned long flags;
+
+ flags = local_daif_save_flags();
+
local_daif_mask();
return flags;
@@ -60,7 +69,7 @@ static inline void local_daif_restore(unsigned long flags)
bool irq_disabled = flags & PSR_I_BIT;
WARN_ON(system_has_prio_mask_debugging() &&
- !(read_sysreg(daif) & PSR_I_BIT));
+ (read_sysreg(daif) & (PSR_I_BIT | PSR_F_BIT)) != (PSR_I_BIT | PSR_F_BIT));
if (!irq_disabled) {
trace_hardirqs_on();
@@ -77,7 +86,7 @@ static inline void local_daif_restore(unsigned long flags)
* If interrupts are disabled but we can take
* asynchronous errors, we can take NMIs
*/
- flags &= ~PSR_I_BIT;
+ flags &= ~(PSR_I_BIT | PSR_F_BIT);
pmr = GIC_PRIO_IRQOFF;
} else {
pmr = GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET;
@@ -119,6 +128,12 @@ static inline void local_daif_inherit(struct pt_regs *regs)
{
unsigned long flags = regs->pstate & DAIF_MASK;
+ if (interrupts_enabled(regs))
+ trace_hardirqs_on();
+
+ if (system_uses_irq_prio_masking())
+ gic_write_pmr(regs->pmr_save);
+
/*
* We can't use local_daif_restore(regs->pstate) here as
* system_has_prio_mask_debugging() won't restore the I bit if it can
diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h
index 7619f473155f..13d437bcbf58 100644
--- a/arch/arm64/include/asm/debug-monitors.h
+++ b/arch/arm64/include/asm/debug-monitors.h
@@ -34,18 +34,6 @@
*/
#define BREAK_INSTR_SIZE AARCH64_INSN_SIZE
-/*
- * BRK instruction encoding
- * The #imm16 value should be placed at bits[20:5] within BRK ins
- */
-#define AARCH64_BREAK_MON 0xd4200000
-
-/*
- * BRK instruction for provoking a fault on purpose
- * Unlike kgdb, #imm16 value with unallocated handler is used for faulting.
- */
-#define AARCH64_BREAK_FAULT (AARCH64_BREAK_MON | (FAULT_BRK_IMM << 5))
-
#define AARCH64_BREAK_KGDB_DYN_DBG \
(AARCH64_BREAK_MON | (KGDB_DYN_DBG_BRK_IMM << 5))
@@ -53,6 +41,7 @@
/* kprobes BRK opcodes with ESR encoding */
#define BRK64_OPCODE_KPROBES (AARCH64_BREAK_MON | (KPROBES_BRK_IMM << 5))
+#define BRK64_OPCODE_KPROBES_SS (AARCH64_BREAK_MON | (KPROBES_BRK_SS_IMM << 5))
/* uprobes BRK opcodes with ESR encoding */
#define BRK64_OPCODE_UPROBES (AARCH64_BREAK_MON | (UPROBES_BRK_IMM << 5))
@@ -75,7 +64,7 @@ struct task_struct;
struct step_hook {
struct list_head node;
- int (*fn)(struct pt_regs *regs, unsigned int esr);
+ int (*fn)(struct pt_regs *regs, unsigned long esr);
};
void register_user_step_hook(struct step_hook *hook);
@@ -86,7 +75,7 @@ void unregister_kernel_step_hook(struct step_hook *hook);
struct break_hook {
struct list_head node;
- int (*fn)(struct pt_regs *regs, unsigned int esr);
+ int (*fn)(struct pt_regs *regs, unsigned long esr);
u16 imm;
u16 mask; /* These bits are ignored when comparing with imm */
};
@@ -109,10 +98,13 @@ void disable_debug_monitors(enum dbg_active_el el);
void user_rewind_single_step(struct task_struct *task);
void user_fastforward_single_step(struct task_struct *task);
+void user_regs_reset_single_step(struct user_pt_regs *regs,
+ struct task_struct *task);
void kernel_enable_single_step(struct pt_regs *regs);
void kernel_disable_single_step(void);
int kernel_active_single_step(void);
+void kernel_rewind_single_step(struct pt_regs *regs);
#ifdef CONFIG_HAVE_HW_BREAKPOINT
int reinstall_suspended_bps(struct pt_regs *regs);
@@ -125,5 +117,7 @@ static inline int reinstall_suspended_bps(struct pt_regs *regs)
int aarch32_break_handler(struct pt_regs *regs);
+void debug_traps_init(void);
+
#endif /* __ASSEMBLY */
#endif /* __ASM_DEBUG_MONITORS_H */
diff --git a/arch/arm64/include/asm/device.h b/arch/arm64/include/asm/device.h
index 12b778d55342..996498751318 100644
--- a/arch/arm64/include/asm/device.h
+++ b/arch/arm64/include/asm/device.h
@@ -6,9 +6,6 @@
#define __ASM_DEVICE_H
struct dev_archdata {
-#ifdef CONFIG_IOMMU_API
- void *iommu; /* private IOMMU data */
-#endif
};
struct pdev_archdata {
diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h
index b54d3a86c444..bcd5622aa096 100644
--- a/arch/arm64/include/asm/efi.h
+++ b/arch/arm64/include/asm/efi.h
@@ -14,34 +14,40 @@
#ifdef CONFIG_EFI
extern void efi_init(void);
+
+bool efi_runtime_fixup_exception(struct pt_regs *regs, const char *msg);
#else
#define efi_init()
+
+static inline
+bool efi_runtime_fixup_exception(struct pt_regs *regs, const char *msg)
+{
+ return false;
+}
#endif
int efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md);
-int efi_set_mapping_permissions(struct mm_struct *mm, efi_memory_desc_t *md);
-
-#define arch_efi_call_virt_setup() \
-({ \
- efi_virtmap_load(); \
- __efi_fpsimd_begin(); \
-})
+int efi_set_mapping_permissions(struct mm_struct *mm, efi_memory_desc_t *md,
+ bool has_bti);
+#undef arch_efi_call_virt
#define arch_efi_call_virt(p, f, args...) \
-({ \
- efi_##f##_t *__f; \
- __f = p->f; \
- __efi_rt_asm_wrapper(__f, #f, args); \
-})
-
-#define arch_efi_call_virt_teardown() \
-({ \
- __efi_fpsimd_end(); \
- efi_virtmap_unload(); \
-})
+ __efi_rt_asm_wrapper((p)->f, #f, args)
+extern u64 *efi_rt_stack_top;
efi_status_t __efi_rt_asm_wrapper(void *, const char *, ...);
+void arch_efi_call_virt_setup(void);
+void arch_efi_call_virt_teardown(void);
+
+/*
+ * efi_rt_stack_top[-1] contains the value the stack pointer had before
+ * switching to the EFI runtime stack.
+ */
+#define current_in_efi() \
+ (!preemptible() && efi_rt_stack_top != NULL && \
+ on_task_stack(current, READ_ONCE(efi_rt_stack_top[-1]), 1))
+
#define ARCH_EFI_IRQ_FLAGS_MASK (PSR_D_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT)
/*
@@ -58,67 +64,46 @@ efi_status_t __efi_rt_asm_wrapper(void *, const char *, ...);
/* arch specific definitions used by the stub code */
/*
- * AArch64 requires the DTB to be 8-byte aligned in the first 512MiB from
- * start of kernel and may not cross a 2MiB boundary. We set alignment to
- * 2MiB so we know it won't cross a 2MiB boundary.
- */
-#define EFI_FDT_ALIGN SZ_2M /* used by allocate_new_fdt_and_exit_boot() */
-
-/*
* In some configurations (e.g. VMAP_STACK && 64K pages), stacks built into the
* kernel need greater alignment than we require the segments to be padded to.
*/
#define EFI_KIMG_ALIGN \
(SEGMENT_ALIGN > THREAD_ALIGN ? SEGMENT_ALIGN : THREAD_ALIGN)
-/* on arm64, the FDT may be located anywhere in system RAM */
-static inline unsigned long efi_get_max_fdt_addr(unsigned long dram_base)
-{
- return ULONG_MAX;
-}
-
/*
* On arm64, we have to ensure that the initrd ends up in the linear region,
* which is a 1 GB aligned region of size '1UL << (VA_BITS_MIN - 1)' that is
* guaranteed to cover the kernel Image.
*
* Since the EFI stub is part of the kernel Image, we can relax the
- * usual requirements in Documentation/arm64/booting.rst, which still
+ * usual requirements in Documentation/arch/arm64/booting.rst, which still
* apply to other bootloaders, and are required for some kernel
* configurations.
*/
-static inline unsigned long efi_get_max_initrd_addr(unsigned long dram_base,
- unsigned long image_addr)
+static inline unsigned long efi_get_max_initrd_addr(unsigned long image_addr)
{
return (image_addr & ~(SZ_1G - 1UL)) + (1UL << (VA_BITS_MIN - 1));
}
-#define efi_call_early(f, ...) sys_table_arg->boottime->f(__VA_ARGS__)
-#define __efi_call_early(f, ...) f(__VA_ARGS__)
-#define efi_call_runtime(f, ...) sys_table_arg->runtime->f(__VA_ARGS__)
-#define efi_is_64bit() (true)
-
-#define efi_table_attr(table, attr, instance) \
- ((table##_t *)instance)->attr
-
-#define efi_call_proto(protocol, f, instance, ...) \
- ((protocol##_t *)instance)->f(instance, ##__VA_ARGS__)
-
-#define alloc_screen_info(x...) &screen_info
-
-static inline void free_screen_info(efi_system_table_t *sys_table_arg,
- struct screen_info *si)
-{
-}
-
-/* redeclare as 'hidden' so the compiler will generate relative references */
-extern struct screen_info screen_info __attribute__((__visibility__("hidden")));
-
-static inline void efifb_setup_from_dmi(struct screen_info *si, const char *opt)
+static inline unsigned long efi_get_kimg_min_align(void)
{
+ extern bool efi_nokaslr;
+
+ /*
+ * Although relocatable kernels can fix up the misalignment with
+ * respect to MIN_KIMG_ALIGN, the resulting virtual text addresses are
+ * subtly out of sync with those recorded in the vmlinux when kaslr is
+ * disabled but the image required relocation anyway. Therefore retain
+ * 2M alignment if KASLR was explicitly disabled, even if it was not
+ * going to be activated to begin with.
+ */
+ return efi_nokaslr ? MIN_KIMG_ALIGN : EFI_KIMG_ALIGN;
}
#define EFI_ALLOC_ALIGN SZ_64K
+#define EFI_ALLOC_LIMIT ((1UL << 48) - 1)
+
+extern unsigned long primary_entry_offset(void);
/*
* On ARM systems, virtually remapped UEFI runtime services are set up in two
@@ -164,4 +149,13 @@ static inline void efi_set_pgd(struct mm_struct *mm)
void efi_virtmap_load(void);
void efi_virtmap_unload(void);
+static inline void efi_capsule_flush_cache_range(void *addr, int size)
+{
+ dcache_clean_inval_poc((unsigned long)addr, (unsigned long)addr + size);
+}
+
+efi_status_t efi_handle_corrupted_x18(efi_status_t s, const char *f);
+
+void efi_icache_sync(unsigned long start, unsigned long end);
+
#endif /* _ASM_EFI_H */
diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h
new file mode 100644
index 000000000000..b7afaa026842
--- /dev/null
+++ b/arch/arm64/include/asm/el2_setup.h
@@ -0,0 +1,344 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ */
+
+#ifndef __ARM_KVM_INIT_H__
+#define __ARM_KVM_INIT_H__
+
+#ifndef __ASSEMBLY__
+#error Assembly-only header
+#endif
+
+#include <asm/kvm_arm.h>
+#include <asm/ptrace.h>
+#include <asm/sysreg.h>
+#include <linux/irqchip/arm-gic-v3.h>
+
+.macro __init_el2_sctlr
+ mov_q x0, INIT_SCTLR_EL2_MMU_OFF
+ msr sctlr_el2, x0
+ isb
+.endm
+
+.macro __init_el2_hcrx
+ mrs x0, id_aa64mmfr1_el1
+ ubfx x0, x0, #ID_AA64MMFR1_EL1_HCX_SHIFT, #4
+ cbz x0, .Lskip_hcrx_\@
+ mov_q x0, HCRX_HOST_FLAGS
+ msr_s SYS_HCRX_EL2, x0
+.Lskip_hcrx_\@:
+.endm
+
+/* Check if running in host at EL2 mode, i.e., (h)VHE. Jump to fail if not. */
+.macro __check_hvhe fail, tmp
+ mrs \tmp, hcr_el2
+ and \tmp, \tmp, #HCR_E2H
+ cbz \tmp, \fail
+.endm
+
+/*
+ * Allow Non-secure EL1 and EL0 to access physical timer and counter.
+ * This is not necessary for VHE, since the host kernel runs in EL2,
+ * and EL0 accesses are configured in the later stage of boot process.
+ * Note that when HCR_EL2.E2H == 1, CNTHCTL_EL2 has the same bit layout
+ * as CNTKCTL_EL1, and CNTKCTL_EL1 accessing instructions are redefined
+ * to access CNTHCTL_EL2. This allows the kernel designed to run at EL1
+ * to transparently mess with the EL0 bits via CNTKCTL_EL1 access in
+ * EL2.
+ */
+.macro __init_el2_timers
+ mov x0, #3 // Enable EL1 physical timers
+ __check_hvhe .LnVHE_\@, x1
+ lsl x0, x0, #10
+.LnVHE_\@:
+ msr cnthctl_el2, x0
+ msr cntvoff_el2, xzr // Clear virtual offset
+.endm
+
+.macro __init_el2_debug
+ mrs x1, id_aa64dfr0_el1
+ sbfx x0, x1, #ID_AA64DFR0_EL1_PMUVer_SHIFT, #4
+ cmp x0, #1
+ b.lt .Lskip_pmu_\@ // Skip if no PMU present
+ mrs x0, pmcr_el0 // Disable debug access traps
+ ubfx x0, x0, #11, #5 // to EL2 and allow access to
+.Lskip_pmu_\@:
+ csel x2, xzr, x0, lt // all PMU counters from EL1
+
+ /* Statistical profiling */
+ ubfx x0, x1, #ID_AA64DFR0_EL1_PMSVer_SHIFT, #4
+ cbz x0, .Lskip_spe_\@ // Skip if SPE not present
+
+ mrs_s x0, SYS_PMBIDR_EL1 // If SPE available at EL2,
+ and x0, x0, #(1 << PMBIDR_EL1_P_SHIFT)
+ cbnz x0, .Lskip_spe_el2_\@ // then permit sampling of physical
+ mov x0, #(1 << PMSCR_EL2_PCT_SHIFT | \
+ 1 << PMSCR_EL2_PA_SHIFT)
+ msr_s SYS_PMSCR_EL2, x0 // addresses and physical counter
+.Lskip_spe_el2_\@:
+ mov x0, #(MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT)
+ orr x2, x2, x0 // If we don't have VHE, then
+ // use EL1&0 translation.
+
+.Lskip_spe_\@:
+ /* Trace buffer */
+ ubfx x0, x1, #ID_AA64DFR0_EL1_TraceBuffer_SHIFT, #4
+ cbz x0, .Lskip_trace_\@ // Skip if TraceBuffer is not present
+
+ mrs_s x0, SYS_TRBIDR_EL1
+ and x0, x0, TRBIDR_EL1_P
+ cbnz x0, .Lskip_trace_\@ // If TRBE is available at EL2
+
+ mov x0, #(MDCR_EL2_E2TB_MASK << MDCR_EL2_E2TB_SHIFT)
+ orr x2, x2, x0 // allow the EL1&0 translation
+ // to own it.
+
+.Lskip_trace_\@:
+ msr mdcr_el2, x2 // Configure debug traps
+.endm
+
+/* LORegions */
+.macro __init_el2_lor
+ mrs x1, id_aa64mmfr1_el1
+ ubfx x0, x1, #ID_AA64MMFR1_EL1_LO_SHIFT, 4
+ cbz x0, .Lskip_lor_\@
+ msr_s SYS_LORC_EL1, xzr
+.Lskip_lor_\@:
+.endm
+
+/* Stage-2 translation */
+.macro __init_el2_stage2
+ msr vttbr_el2, xzr
+.endm
+
+/* GICv3 system register access */
+.macro __init_el2_gicv3
+ mrs x0, id_aa64pfr0_el1
+ ubfx x0, x0, #ID_AA64PFR0_EL1_GIC_SHIFT, #4
+ cbz x0, .Lskip_gicv3_\@
+
+ mrs_s x0, SYS_ICC_SRE_EL2
+ orr x0, x0, #ICC_SRE_EL2_SRE // Set ICC_SRE_EL2.SRE==1
+ orr x0, x0, #ICC_SRE_EL2_ENABLE // Set ICC_SRE_EL2.Enable==1
+ msr_s SYS_ICC_SRE_EL2, x0
+ isb // Make sure SRE is now set
+ mrs_s x0, SYS_ICC_SRE_EL2 // Read SRE back,
+ tbz x0, #0, .Lskip_gicv3_\@ // and check that it sticks
+ msr_s SYS_ICH_HCR_EL2, xzr // Reset ICH_HCR_EL2 to defaults
+.Lskip_gicv3_\@:
+.endm
+
+.macro __init_el2_hstr
+ msr hstr_el2, xzr // Disable CP15 traps to EL2
+.endm
+
+/* Virtual CPU ID registers */
+.macro __init_el2_nvhe_idregs
+ mrs x0, midr_el1
+ mrs x1, mpidr_el1
+ msr vpidr_el2, x0
+ msr vmpidr_el2, x1
+.endm
+
+/* Coprocessor traps */
+.macro __init_el2_cptr
+ __check_hvhe .LnVHE_\@, x1
+ mov x0, #(CPACR_EL1_FPEN_EL1EN | CPACR_EL1_FPEN_EL0EN)
+ msr cpacr_el1, x0
+ b .Lskip_set_cptr_\@
+.LnVHE_\@:
+ mov x0, #0x33ff
+ msr cptr_el2, x0 // Disable copro. traps to EL2
+.Lskip_set_cptr_\@:
+.endm
+
+/* Disable any fine grained traps */
+.macro __init_el2_fgt
+ mrs x1, id_aa64mmfr0_el1
+ ubfx x1, x1, #ID_AA64MMFR0_EL1_FGT_SHIFT, #4
+ cbz x1, .Lskip_fgt_\@
+
+ mov x0, xzr
+ mrs x1, id_aa64dfr0_el1
+ ubfx x1, x1, #ID_AA64DFR0_EL1_PMSVer_SHIFT, #4
+ cmp x1, #3
+ b.lt .Lset_debug_fgt_\@
+ /* Disable PMSNEVFR_EL1 read and write traps */
+ orr x0, x0, #(1 << 62)
+
+.Lset_debug_fgt_\@:
+ msr_s SYS_HDFGRTR_EL2, x0
+ msr_s SYS_HDFGWTR_EL2, x0
+
+ mov x0, xzr
+ mrs x1, id_aa64pfr1_el1
+ ubfx x1, x1, #ID_AA64PFR1_EL1_SME_SHIFT, #4
+ cbz x1, .Lset_pie_fgt_\@
+
+ /* Disable nVHE traps of TPIDR2 and SMPRI */
+ orr x0, x0, #HFGxTR_EL2_nSMPRI_EL1_MASK
+ orr x0, x0, #HFGxTR_EL2_nTPIDR2_EL0_MASK
+
+.Lset_pie_fgt_\@:
+ mrs_s x1, SYS_ID_AA64MMFR3_EL1
+ ubfx x1, x1, #ID_AA64MMFR3_EL1_S1PIE_SHIFT, #4
+ cbz x1, .Lset_fgt_\@
+
+ /* Disable trapping of PIR_EL1 / PIRE0_EL1 */
+ orr x0, x0, #HFGxTR_EL2_nPIR_EL1
+ orr x0, x0, #HFGxTR_EL2_nPIRE0_EL1
+
+.Lset_fgt_\@:
+ msr_s SYS_HFGRTR_EL2, x0
+ msr_s SYS_HFGWTR_EL2, x0
+ msr_s SYS_HFGITR_EL2, xzr
+
+ mrs x1, id_aa64pfr0_el1 // AMU traps UNDEF without AMU
+ ubfx x1, x1, #ID_AA64PFR0_EL1_AMU_SHIFT, #4
+ cbz x1, .Lskip_fgt_\@
+
+ msr_s SYS_HAFGRTR_EL2, xzr
+.Lskip_fgt_\@:
+.endm
+
+.macro __init_el2_nvhe_prepare_eret
+ mov x0, #INIT_PSTATE_EL1
+ msr spsr_el2, x0
+.endm
+
+/**
+ * Initialize EL2 registers to sane values. This should be called early on all
+ * cores that were booted in EL2. Note that everything gets initialised as
+ * if VHE was not available. The kernel context will be upgraded to VHE
+ * if possible later on in the boot process
+ *
+ * Regs: x0, x1 and x2 are clobbered.
+ */
+.macro init_el2_state
+ __init_el2_sctlr
+ __init_el2_hcrx
+ __init_el2_timers
+ __init_el2_debug
+ __init_el2_lor
+ __init_el2_stage2
+ __init_el2_gicv3
+ __init_el2_hstr
+ __init_el2_nvhe_idregs
+ __init_el2_cptr
+ __init_el2_fgt
+.endm
+
+#ifndef __KVM_NVHE_HYPERVISOR__
+// This will clobber tmp1 and tmp2, and expect tmp1 to contain
+// the id register value as read from the HW
+.macro __check_override idreg, fld, width, pass, fail, tmp1, tmp2
+ ubfx \tmp1, \tmp1, #\fld, #\width
+ cbz \tmp1, \fail
+
+ adr_l \tmp1, \idreg\()_override
+ ldr \tmp2, [\tmp1, FTR_OVR_VAL_OFFSET]
+ ldr \tmp1, [\tmp1, FTR_OVR_MASK_OFFSET]
+ ubfx \tmp2, \tmp2, #\fld, #\width
+ ubfx \tmp1, \tmp1, #\fld, #\width
+ cmp \tmp1, xzr
+ and \tmp2, \tmp2, \tmp1
+ csinv \tmp2, \tmp2, xzr, ne
+ cbnz \tmp2, \pass
+ b \fail
+.endm
+
+// This will clobber tmp1 and tmp2
+.macro check_override idreg, fld, pass, fail, tmp1, tmp2
+ mrs \tmp1, \idreg\()_el1
+ __check_override \idreg \fld 4 \pass \fail \tmp1 \tmp2
+.endm
+#else
+// This will clobber tmp
+.macro __check_override idreg, fld, width, pass, fail, tmp, ignore
+ ldr_l \tmp, \idreg\()_el1_sys_val
+ ubfx \tmp, \tmp, #\fld, #\width
+ cbnz \tmp, \pass
+ b \fail
+.endm
+
+.macro check_override idreg, fld, pass, fail, tmp, ignore
+ __check_override \idreg \fld 4 \pass \fail \tmp \ignore
+.endm
+#endif
+
+.macro finalise_el2_state
+ check_override id_aa64pfr0, ID_AA64PFR0_EL1_SVE_SHIFT, .Linit_sve_\@, .Lskip_sve_\@, x1, x2
+
+.Linit_sve_\@: /* SVE register access */
+ __check_hvhe .Lcptr_nvhe_\@, x1
+
+ // (h)VHE case
+ mrs x0, cpacr_el1 // Disable SVE traps
+ orr x0, x0, #(CPACR_EL1_ZEN_EL1EN | CPACR_EL1_ZEN_EL0EN)
+ msr cpacr_el1, x0
+ b .Lskip_set_cptr_\@
+
+.Lcptr_nvhe_\@: // nVHE case
+ mrs x0, cptr_el2 // Disable SVE traps
+ bic x0, x0, #CPTR_EL2_TZ
+ msr cptr_el2, x0
+.Lskip_set_cptr_\@:
+ isb
+ mov x1, #ZCR_ELx_LEN_MASK // SVE: Enable full vector
+ msr_s SYS_ZCR_EL2, x1 // length for EL1.
+
+.Lskip_sve_\@:
+ check_override id_aa64pfr1, ID_AA64PFR1_EL1_SME_SHIFT, .Linit_sme_\@, .Lskip_sme_\@, x1, x2
+
+.Linit_sme_\@: /* SME register access and priority mapping */
+ __check_hvhe .Lcptr_nvhe_sme_\@, x1
+
+ // (h)VHE case
+ mrs x0, cpacr_el1 // Disable SME traps
+ orr x0, x0, #(CPACR_EL1_SMEN_EL0EN | CPACR_EL1_SMEN_EL1EN)
+ msr cpacr_el1, x0
+ b .Lskip_set_cptr_sme_\@
+
+.Lcptr_nvhe_sme_\@: // nVHE case
+ mrs x0, cptr_el2 // Disable SME traps
+ bic x0, x0, #CPTR_EL2_TSM
+ msr cptr_el2, x0
+.Lskip_set_cptr_sme_\@:
+ isb
+
+ mrs x1, sctlr_el2
+ orr x1, x1, #SCTLR_ELx_ENTP2 // Disable TPIDR2 traps
+ msr sctlr_el2, x1
+ isb
+
+ mov x0, #0 // SMCR controls
+
+ // Full FP in SM?
+ mrs_s x1, SYS_ID_AA64SMFR0_EL1
+ __check_override id_aa64smfr0, ID_AA64SMFR0_EL1_FA64_SHIFT, 1, .Linit_sme_fa64_\@, .Lskip_sme_fa64_\@, x1, x2
+
+.Linit_sme_fa64_\@:
+ orr x0, x0, SMCR_ELx_FA64_MASK
+.Lskip_sme_fa64_\@:
+
+ // ZT0 available?
+ mrs_s x1, SYS_ID_AA64SMFR0_EL1
+ __check_override id_aa64smfr0, ID_AA64SMFR0_EL1_SMEver_SHIFT, 4, .Linit_sme_zt0_\@, .Lskip_sme_zt0_\@, x1, x2
+.Linit_sme_zt0_\@:
+ orr x0, x0, SMCR_ELx_EZT0_MASK
+.Lskip_sme_zt0_\@:
+
+ orr x0, x0, #SMCR_ELx_LEN_MASK // Enable full SME vector
+ msr_s SYS_SMCR_EL2, x0 // length for EL1.
+
+ mrs_s x1, SYS_SMIDR_EL1 // Priority mapping supported?
+ ubfx x1, x1, #SMIDR_EL1_SMPS_SHIFT, #1
+ cbz x1, .Lskip_sme_\@
+
+ msr_s SYS_SMPRIMAP_EL2, xzr // Make all priorities equal
+.Lskip_sme_\@:
+.endm
+
+#endif /* __ARM_KVM_INIT_H__ */
diff --git a/arch/arm64/include/asm/elf.h b/arch/arm64/include/asm/elf.h
index b618017205a3..97932fbf973d 100644
--- a/arch/arm64/include/asm/elf.h
+++ b/arch/arm64/include/asm/elf.h
@@ -96,7 +96,28 @@
*/
#define elf_check_arch(x) ((x)->e_machine == EM_AARCH64)
-#define elf_read_implies_exec(ex,stk) (stk != EXSTACK_DISABLE_X)
+/*
+ * An executable for which elf_read_implies_exec() returns TRUE will
+ * have the READ_IMPLIES_EXEC personality flag set automatically.
+ *
+ * The decision process for determining the results are:
+ *
+ *              CPU*: | arm32    | arm64 |
+ * ELF:              |            |            |
+ * ---------------------|------------|------------|
+ * missing PT_GNU_STACK | exec-all   | exec-none  |
+ * PT_GNU_STACK == RWX  | exec-stack | exec-stack |
+ * PT_GNU_STACK == RW   | exec-none | exec-none |
+ *
+ * exec-all : all PROT_READ user mappings are executable, except when
+ * backed by files on a noexec-filesystem.
+ * exec-none : only PROT_EXEC user mappings are executable.
+ * exec-stack: only the stack and PROT_EXEC user mappings are executable.
+ *
+ * *all arm64 CPUs support NX, so there is no "lacks NX" column.
+ *
+ */
+#define compat_elf_read_implies_exec(ex, stk) (stk == EXSTACK_DEFAULT)
#define CORE_DUMP_USE_REGSET
#define ELF_EXEC_PAGESIZE PAGE_SIZE
@@ -114,7 +135,11 @@
#ifndef __ASSEMBLY__
+#include <uapi/linux/elf.h>
#include <linux/bug.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/types.h>
#include <asm/processor.h> /* for signal_minsigstksz, used by ARCH_DLINFO */
typedef unsigned long elf_greg_t;
@@ -188,10 +213,8 @@ typedef compat_elf_greg_t compat_elf_gregset_t[COMPAT_ELF_NGREG];
/* AArch32 EABI. */
#define EF_ARM_EABI_MASK 0xff000000
-#define compat_elf_check_arch(x) (system_supports_32bit_el0() && \
- ((x)->e_machine == EM_ARM) && \
- ((x)->e_flags & EF_ARM_EABI_MASK))
-
+int compat_elf_check_arch(const struct elf32_hdr *);
+#define compat_elf_check_arch compat_elf_check_arch
#define compat_start_thread compat_start_thread
/*
* Unlike the native SET_PERSONALITY macro, the compat version maintains
@@ -224,6 +247,52 @@ extern int aarch32_setup_additional_pages(struct linux_binprm *bprm,
#endif /* CONFIG_COMPAT */
+struct arch_elf_state {
+ int flags;
+};
+
+#define ARM64_ELF_BTI (1 << 0)
+
+#define INIT_ARCH_ELF_STATE { \
+ .flags = 0, \
+}
+
+static inline int arch_parse_elf_property(u32 type, const void *data,
+ size_t datasz, bool compat,
+ struct arch_elf_state *arch)
+{
+ /* No known properties for AArch32 yet */
+ if (IS_ENABLED(CONFIG_COMPAT) && compat)
+ return 0;
+
+ if (type == GNU_PROPERTY_AARCH64_FEATURE_1_AND) {
+ const u32 *p = data;
+
+ if (datasz != sizeof(*p))
+ return -ENOEXEC;
+
+ if (system_supports_bti() &&
+ (*p & GNU_PROPERTY_AARCH64_FEATURE_1_BTI))
+ arch->flags |= ARM64_ELF_BTI;
+ }
+
+ return 0;
+}
+
+static inline int arch_elf_pt_proc(void *ehdr, void *phdr,
+ struct file *f, bool is_interp,
+ struct arch_elf_state *state)
+{
+ return 0;
+}
+
+static inline int arch_check_elf(void *ehdr, bool has_interp,
+ void *interp_ehdr,
+ struct arch_elf_state *state)
+{
+ return 0;
+}
+
#endif /* !__ASSEMBLY__ */
#endif
diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h
index cb29253ae86b..353fe08546cf 100644
--- a/arch/arm64/include/asm/esr.h
+++ b/arch/arm64/include/asm/esr.h
@@ -22,7 +22,7 @@
#define ESR_ELx_EC_PAC (0x09) /* EL2 and above */
/* Unallocated EC: 0x0A - 0x0B */
#define ESR_ELx_EC_CP14_64 (0x0C)
-/* Unallocated EC: 0x0d */
+#define ESR_ELx_EC_BTI (0x0D)
#define ESR_ELx_EC_ILL (0x0E)
/* Unallocated EC: 0x0F - 0x10 */
#define ESR_ELx_EC_SVC32 (0x11)
@@ -35,7 +35,10 @@
#define ESR_ELx_EC_SYS64 (0x18)
#define ESR_ELx_EC_SVE (0x19)
#define ESR_ELx_EC_ERET (0x1a) /* EL2 only */
-/* Unallocated EC: 0x1b - 0x1E */
+/* Unallocated EC: 0x1B */
+#define ESR_ELx_EC_FPAC (0x1C) /* EL1 and above */
+#define ESR_ELx_EC_SME (0x1D)
+/* Unallocated EC: 0x1E */
#define ESR_ELx_EC_IMP_DEF (0x1f) /* EL3 only */
#define ESR_ELx_EC_IABT_LOW (0x20)
#define ESR_ELx_EC_IABT_CUR (0x21)
@@ -44,7 +47,7 @@
#define ESR_ELx_EC_DABT_LOW (0x24)
#define ESR_ELx_EC_DABT_CUR (0x25)
#define ESR_ELx_EC_SP_ALIGN (0x26)
-/* Unallocated EC: 0x27 */
+#define ESR_ELx_EC_MOPS (0x27)
#define ESR_ELx_EC_FP_EXC32 (0x28)
/* Unallocated EC: 0x29 - 0x2B */
#define ESR_ELx_EC_FP_EXC64 (0x2C)
@@ -60,18 +63,23 @@
#define ESR_ELx_EC_BKPT32 (0x38)
/* Unallocated EC: 0x39 */
#define ESR_ELx_EC_VECTOR32 (0x3A) /* EL2 only */
-/* Unallocted EC: 0x3B */
+/* Unallocated EC: 0x3B */
#define ESR_ELx_EC_BRK64 (0x3C)
/* Unallocated EC: 0x3D - 0x3F */
#define ESR_ELx_EC_MAX (0x3F)
#define ESR_ELx_EC_SHIFT (26)
+#define ESR_ELx_EC_WIDTH (6)
#define ESR_ELx_EC_MASK (UL(0x3F) << ESR_ELx_EC_SHIFT)
#define ESR_ELx_EC(esr) (((esr) & ESR_ELx_EC_MASK) >> ESR_ELx_EC_SHIFT)
#define ESR_ELx_IL_SHIFT (25)
#define ESR_ELx_IL (UL(1) << ESR_ELx_IL_SHIFT)
-#define ESR_ELx_ISS_MASK (ESR_ELx_IL - 1)
+#define ESR_ELx_ISS_MASK (GENMASK(24, 0))
+#define ESR_ELx_ISS(esr) ((esr) & ESR_ELx_ISS_MASK)
+#define ESR_ELx_ISS2_SHIFT (32)
+#define ESR_ELx_ISS2_MASK (GENMASK_ULL(55, 32))
+#define ESR_ELx_ISS2(esr) (((esr) & ESR_ELx_ISS2_MASK) >> ESR_ELx_ISS2_SHIFT)
/* ISS field definitions shared by different classes */
#define ESR_ELx_WNR_SHIFT (6)
@@ -102,11 +110,22 @@
/* Shared ISS fault status code(IFSC/DFSC) for Data/Instruction aborts */
#define ESR_ELx_FSC (0x3F)
#define ESR_ELx_FSC_TYPE (0x3C)
+#define ESR_ELx_FSC_LEVEL (0x03)
#define ESR_ELx_FSC_EXTABT (0x10)
+#define ESR_ELx_FSC_MTE (0x11)
#define ESR_ELx_FSC_SERROR (0x11)
#define ESR_ELx_FSC_ACCESS (0x08)
#define ESR_ELx_FSC_FAULT (0x04)
#define ESR_ELx_FSC_PERM (0x0C)
+#define ESR_ELx_FSC_SEA_TTW0 (0x14)
+#define ESR_ELx_FSC_SEA_TTW1 (0x15)
+#define ESR_ELx_FSC_SEA_TTW2 (0x16)
+#define ESR_ELx_FSC_SEA_TTW3 (0x17)
+#define ESR_ELx_FSC_SECC (0x18)
+#define ESR_ELx_FSC_SECC_TTW0 (0x1c)
+#define ESR_ELx_FSC_SECC_TTW1 (0x1d)
+#define ESR_ELx_FSC_SECC_TTW2 (0x1e)
+#define ESR_ELx_FSC_SECC_TTW3 (0x1f)
/* ISS field definitions for Data Aborts */
#define ESR_ELx_ISV_SHIFT (24)
@@ -124,14 +143,31 @@
#define ESR_ELx_CM_SHIFT (8)
#define ESR_ELx_CM (UL(1) << ESR_ELx_CM_SHIFT)
+/* ISS2 field definitions for Data Aborts */
+#define ESR_ELx_TnD_SHIFT (10)
+#define ESR_ELx_TnD (UL(1) << ESR_ELx_TnD_SHIFT)
+#define ESR_ELx_TagAccess_SHIFT (9)
+#define ESR_ELx_TagAccess (UL(1) << ESR_ELx_TagAccess_SHIFT)
+#define ESR_ELx_GCS_SHIFT (8)
+#define ESR_ELx_GCS (UL(1) << ESR_ELx_GCS_SHIFT)
+#define ESR_ELx_Overlay_SHIFT (6)
+#define ESR_ELx_Overlay (UL(1) << ESR_ELx_Overlay_SHIFT)
+#define ESR_ELx_DirtyBit_SHIFT (5)
+#define ESR_ELx_DirtyBit (UL(1) << ESR_ELx_DirtyBit_SHIFT)
+#define ESR_ELx_Xs_SHIFT (0)
+#define ESR_ELx_Xs_MASK (GENMASK_ULL(4, 0))
+
/* ISS field definitions for exceptions taken in to Hyp */
#define ESR_ELx_CV (UL(1) << 24)
#define ESR_ELx_COND_SHIFT (20)
#define ESR_ELx_COND_MASK (UL(0xF) << ESR_ELx_COND_SHIFT)
-#define ESR_ELx_WFx_ISS_TI (UL(1) << 0)
+#define ESR_ELx_WFx_ISS_RN (UL(0x1F) << 5)
+#define ESR_ELx_WFx_ISS_RV (UL(1) << 2)
+#define ESR_ELx_WFx_ISS_TI (UL(3) << 0)
+#define ESR_ELx_WFx_ISS_WFxT (UL(2) << 0)
#define ESR_ELx_WFx_ISS_WFI (UL(0) << 0)
#define ESR_ELx_WFx_ISS_WFE (UL(1) << 0)
-#define ESR_ELx_xVC_IMM_MASK ((1UL << 16) - 1)
+#define ESR_ELx_xVC_IMM_MASK ((UL(1) << 16) - 1)
#define DISR_EL1_IDS (UL(1) << 24)
/*
@@ -141,7 +177,8 @@
#define DISR_EL1_ESR_MASK (ESR_ELx_AET | ESR_ELx_EA | ESR_ELx_FSC)
/* ESR value templates for specific events */
-#define ESR_ELx_WFx_MASK (ESR_ELx_EC_MASK | ESR_ELx_WFx_ISS_TI)
+#define ESR_ELx_WFx_MASK (ESR_ELx_EC_MASK | \
+ (ESR_ELx_WFx_ISS_TI & ~ESR_ELx_WFx_ISS_WFxT))
#define ESR_ELx_WFx_WFI_VAL ((ESR_ELx_EC_WFx << ESR_ELx_EC_SHIFT) | \
ESR_ELx_WFx_ISS_WFI)
@@ -223,6 +260,9 @@
#define ESR_ELx_SYS64_ISS_SYS_CNTVCT (ESR_ELx_SYS64_ISS_SYS_VAL(3, 3, 2, 14, 0) | \
ESR_ELx_SYS64_ISS_DIR_READ)
+#define ESR_ELx_SYS64_ISS_SYS_CNTVCTSS (ESR_ELx_SYS64_ISS_SYS_VAL(3, 3, 6, 14, 0) | \
+ ESR_ELx_SYS64_ISS_DIR_READ)
+
#define ESR_ELx_SYS64_ISS_SYS_CNTFRQ (ESR_ELx_SYS64_ISS_SYS_VAL(3, 3, 0, 14, 0) | \
ESR_ELx_SYS64_ISS_DIR_READ)
@@ -249,6 +289,10 @@
(((e) & ESR_ELx_SYS64_ISS_OP2_MASK) >> \
ESR_ELx_SYS64_ISS_OP2_SHIFT))
+/* ISS field definitions for ERET/ERETAA/ERETAB trapping */
+#define ESR_ELx_ERET_ISS_ERET 0x2
+#define ESR_ELx_ERET_ISS_ERETA 0x1
+
/*
* ISS field definitions for floating-point exception traps
* (FP_EXC_32/FP_EXC_64).
@@ -313,20 +357,57 @@
#define ESR_ELx_CP15_64_ISS_SYS_CNTVCT (ESR_ELx_CP15_64_ISS_SYS_VAL(1, 14) | \
ESR_ELx_CP15_64_ISS_DIR_READ)
+#define ESR_ELx_CP15_64_ISS_SYS_CNTVCTSS (ESR_ELx_CP15_64_ISS_SYS_VAL(9, 14) | \
+ ESR_ELx_CP15_64_ISS_DIR_READ)
+
#define ESR_ELx_CP15_32_ISS_SYS_CNTFRQ (ESR_ELx_CP15_32_ISS_SYS_VAL(0, 0, 14, 0) |\
ESR_ELx_CP15_32_ISS_DIR_READ)
+/*
+ * ISS values for SME traps
+ */
+
+#define ESR_ELx_SME_ISS_SME_DISABLED 0
+#define ESR_ELx_SME_ISS_ILL 1
+#define ESR_ELx_SME_ISS_SM_DISABLED 2
+#define ESR_ELx_SME_ISS_ZA_DISABLED 3
+#define ESR_ELx_SME_ISS_ZT_DISABLED 4
+
+/* ISS field definitions for MOPS exceptions */
+#define ESR_ELx_MOPS_ISS_MEM_INST (UL(1) << 24)
+#define ESR_ELx_MOPS_ISS_FROM_EPILOGUE (UL(1) << 18)
+#define ESR_ELx_MOPS_ISS_WRONG_OPTION (UL(1) << 17)
+#define ESR_ELx_MOPS_ISS_OPTION_A (UL(1) << 16)
+#define ESR_ELx_MOPS_ISS_DESTREG(esr) (((esr) & (UL(0x1f) << 10)) >> 10)
+#define ESR_ELx_MOPS_ISS_SRCREG(esr) (((esr) & (UL(0x1f) << 5)) >> 5)
+#define ESR_ELx_MOPS_ISS_SIZEREG(esr) (((esr) & (UL(0x1f) << 0)) >> 0)
+
#ifndef __ASSEMBLY__
#include <asm/types.h>
-static inline bool esr_is_data_abort(u32 esr)
+static inline bool esr_is_data_abort(unsigned long esr)
{
- const u32 ec = ESR_ELx_EC(esr);
+ const unsigned long ec = ESR_ELx_EC(esr);
return ec == ESR_ELx_EC_DABT_LOW || ec == ESR_ELx_EC_DABT_CUR;
}
-const char *esr_get_class_string(u32 esr);
+static inline bool esr_fsc_is_translation_fault(unsigned long esr)
+{
+ return (esr & ESR_ELx_FSC_TYPE) == ESR_ELx_FSC_FAULT;
+}
+
+static inline bool esr_fsc_is_permission_fault(unsigned long esr)
+{
+ return (esr & ESR_ELx_FSC_TYPE) == ESR_ELx_FSC_PERM;
+}
+
+static inline bool esr_fsc_is_access_flag_fault(unsigned long esr)
+{
+ return (esr & ESR_ELx_FSC_TYPE) == ESR_ELx_FSC_ACCESS;
+}
+
+const char *esr_get_class_string(unsigned long esr);
#endif /* __ASSEMBLY */
#endif /* __ASM_ESR_H */
diff --git a/arch/arm64/include/asm/exception.h b/arch/arm64/include/asm/exception.h
index 4d5f3b5f50cd..ad688e157c9b 100644
--- a/arch/arm64/include/asm/exception.h
+++ b/arch/arm64/include/asm/exception.h
@@ -8,20 +8,15 @@
#define __ASM_EXCEPTION_H
#include <asm/esr.h>
-#include <asm/kprobes.h>
#include <asm/ptrace.h>
#include <linux/interrupt.h>
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
#define __exception_irq_entry __irq_entry
-#else
-#define __exception_irq_entry __kprobes
-#endif
-static inline u32 disr_to_esr(u64 disr)
+static inline unsigned long disr_to_esr(u64 disr)
{
- unsigned int esr = ESR_ELx_EC_SERROR << ESR_ELx_EC_SHIFT;
+ unsigned long esr = ESR_ELx_EC_SERROR << ESR_ELx_EC_SHIFT;
if ((disr & DISR_EL1_IDS) == 0)
esr |= (disr & DISR_EL1_ESR_MASK);
@@ -31,23 +26,55 @@ static inline u32 disr_to_esr(u64 disr)
return esr;
}
-asmlinkage void enter_from_user_mode(void);
-void do_mem_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs);
-void do_sp_pc_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs);
-void do_undefinstr(struct pt_regs *regs);
-asmlinkage void bad_mode(struct pt_regs *regs, int reason, unsigned int esr);
-void do_debug_exception(unsigned long addr_if_watchpoint, unsigned int esr,
+asmlinkage void __noreturn handle_bad_stack(struct pt_regs *regs);
+
+asmlinkage void el1t_64_sync_handler(struct pt_regs *regs);
+asmlinkage void el1t_64_irq_handler(struct pt_regs *regs);
+asmlinkage void el1t_64_fiq_handler(struct pt_regs *regs);
+asmlinkage void el1t_64_error_handler(struct pt_regs *regs);
+
+asmlinkage void el1h_64_sync_handler(struct pt_regs *regs);
+asmlinkage void el1h_64_irq_handler(struct pt_regs *regs);
+asmlinkage void el1h_64_fiq_handler(struct pt_regs *regs);
+asmlinkage void el1h_64_error_handler(struct pt_regs *regs);
+
+asmlinkage void el0t_64_sync_handler(struct pt_regs *regs);
+asmlinkage void el0t_64_irq_handler(struct pt_regs *regs);
+asmlinkage void el0t_64_fiq_handler(struct pt_regs *regs);
+asmlinkage void el0t_64_error_handler(struct pt_regs *regs);
+
+asmlinkage void el0t_32_sync_handler(struct pt_regs *regs);
+asmlinkage void el0t_32_irq_handler(struct pt_regs *regs);
+asmlinkage void el0t_32_fiq_handler(struct pt_regs *regs);
+asmlinkage void el0t_32_error_handler(struct pt_regs *regs);
+
+asmlinkage void call_on_irq_stack(struct pt_regs *regs,
+ void (*func)(struct pt_regs *));
+asmlinkage void asm_exit_to_user_mode(struct pt_regs *regs);
+
+void do_mem_abort(unsigned long far, unsigned long esr, struct pt_regs *regs);
+void do_el0_undef(struct pt_regs *regs, unsigned long esr);
+void do_el1_undef(struct pt_regs *regs, unsigned long esr);
+void do_el0_bti(struct pt_regs *regs);
+void do_el1_bti(struct pt_regs *regs, unsigned long esr);
+void do_debug_exception(unsigned long addr_if_watchpoint, unsigned long esr,
struct pt_regs *regs);
-void do_fpsimd_acc(unsigned int esr, struct pt_regs *regs);
-void do_sve_acc(unsigned int esr, struct pt_regs *regs);
-void do_fpsimd_exc(unsigned int esr, struct pt_regs *regs);
-void do_sysinstr(unsigned int esr, struct pt_regs *regs);
-void do_sp_pc_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs);
-void bad_el0_sync(struct pt_regs *regs, int reason, unsigned int esr);
-void do_cp15instr(unsigned int esr, struct pt_regs *regs);
-void el0_svc_handler(struct pt_regs *regs);
-void el0_svc_compat_handler(struct pt_regs *regs);
-void do_el0_ia_bp_hardening(unsigned long addr, unsigned int esr,
- struct pt_regs *regs);
+void do_fpsimd_acc(unsigned long esr, struct pt_regs *regs);
+void do_sve_acc(unsigned long esr, struct pt_regs *regs);
+void do_sme_acc(unsigned long esr, struct pt_regs *regs);
+void do_fpsimd_exc(unsigned long esr, struct pt_regs *regs);
+void do_el0_sys(unsigned long esr, struct pt_regs *regs);
+void do_sp_pc_abort(unsigned long addr, unsigned long esr, struct pt_regs *regs);
+void bad_el0_sync(struct pt_regs *regs, int reason, unsigned long esr);
+void do_el0_cp15(unsigned long esr, struct pt_regs *regs);
+int do_compat_alignment_fixup(unsigned long addr, struct pt_regs *regs);
+void do_el0_svc(struct pt_regs *regs);
+void do_el0_svc_compat(struct pt_regs *regs);
+void do_el0_fpac(struct pt_regs *regs, unsigned long esr);
+void do_el1_fpac(struct pt_regs *regs, unsigned long esr);
+void do_el0_mops(struct pt_regs *regs, unsigned long esr);
+void do_serror(struct pt_regs *regs, unsigned long esr);
+void do_notify_resume(struct pt_regs *regs, unsigned long thread_flags);
+void __noreturn panic_bad_stack(struct pt_regs *regs, unsigned long esr, unsigned long far);
#endif /* __ASM_EXCEPTION_H */
diff --git a/arch/arm64/include/asm/exec.h b/arch/arm64/include/asm/exec.h
index 1aae6f9962fc..9a1c22ce664b 100644
--- a/arch/arm64/include/asm/exec.h
+++ b/arch/arm64/include/asm/exec.h
@@ -10,6 +10,5 @@
#include <linux/sched.h>
extern unsigned long arch_align_stack(unsigned long sp);
-void uao_thread_switch(struct task_struct *next);
#endif /* __ASM_EXEC_H */
diff --git a/arch/arm64/include/asm/extable.h b/arch/arm64/include/asm/extable.h
index 56a4f68b262e..72b0e71cc3de 100644
--- a/arch/arm64/include/asm/extable.h
+++ b/arch/arm64/include/asm/extable.h
@@ -18,9 +18,32 @@
struct exception_table_entry
{
int insn, fixup;
+ short type, data;
};
#define ARCH_HAS_RELATIVE_EXTABLE
-extern int fixup_exception(struct pt_regs *regs);
+#define swap_ex_entry_fixup(a, b, tmp, delta) \
+do { \
+ (a)->fixup = (b)->fixup + (delta); \
+ (b)->fixup = (tmp).fixup - (delta); \
+ (a)->type = (b)->type; \
+ (b)->type = (tmp).type; \
+ (a)->data = (b)->data; \
+ (b)->data = (tmp).data; \
+} while (0)
+
+#ifdef CONFIG_BPF_JIT
+bool ex_handler_bpf(const struct exception_table_entry *ex,
+ struct pt_regs *regs);
+#else /* !CONFIG_BPF_JIT */
+static inline
+bool ex_handler_bpf(const struct exception_table_entry *ex,
+ struct pt_regs *regs)
+{
+ return false;
+}
+#endif /* !CONFIG_BPF_JIT */
+
+bool fixup_exception(struct pt_regs *regs);
#endif
diff --git a/arch/arm64/include/asm/fb.h b/arch/arm64/include/asm/fb.h
index bdc735ee1f67..1a495d8fb2ce 100644
--- a/arch/arm64/include/asm/fb.h
+++ b/arch/arm64/include/asm/fb.h
@@ -5,19 +5,6 @@
#ifndef __ASM_FB_H_
#define __ASM_FB_H_
-#include <linux/fb.h>
-#include <linux/fs.h>
-#include <asm/page.h>
-
-static inline void fb_pgprotect(struct file *file, struct vm_area_struct *vma,
- unsigned long off)
-{
- vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
-}
-
-static inline int fb_is_primary_device(struct fb_info *info)
-{
- return 0;
-}
+#include <asm-generic/fb.h>
#endif /* __ASM_FB_H_ */
diff --git a/arch/arm64/include/asm/fixmap.h b/arch/arm64/include/asm/fixmap.h
index f987b8a8f325..58c294a96676 100644
--- a/arch/arm64/include/asm/fixmap.h
+++ b/arch/arm64/include/asm/fixmap.h
@@ -17,6 +17,7 @@
#ifndef __ASSEMBLY__
#include <linux/kernel.h>
+#include <linux/math.h>
#include <linux/sizes.h>
#include <asm/boot.h>
#include <asm/page.h>
@@ -28,26 +29,21 @@
* compile time, but to set the physical address only
* in the boot process.
*
- * These 'compile-time allocated' memory buffers are
- * page-sized. Use set_fixmap(idx,phys) to associate
- * physical memory with fixmap indices.
- *
+ * Each enum increment in these 'compile-time allocated'
+ * memory buffers is page-sized. Use set_fixmap(idx,phys)
+ * to associate physical memory with a fixmap index.
*/
enum fixed_addresses {
FIX_HOLE,
/*
- * Reserve a virtual window for the FDT that is 2 MB larger than the
- * maximum supported size, and put it at the top of the fixmap region.
- * The additional space ensures that any FDT that does not exceed
- * MAX_FDT_SIZE can be mapped regardless of whether it crosses any
- * 2 MB alignment boundaries.
- *
- * Keep this at the top so it remains 2 MB aligned.
+ * Reserve a virtual window for the FDT that is a page bigger than the
+ * maximum supported size. The additional space ensures that any FDT
+ * that does not exceed MAX_FDT_SIZE can be mapped regardless of
+ * whether it crosses any page boundary.
*/
-#define FIX_FDT_SIZE (MAX_FDT_SIZE + SZ_2M)
FIX_FDT_END,
- FIX_FDT = FIX_FDT_END + FIX_FDT_SIZE / PAGE_SIZE - 1,
+ FIX_FDT = FIX_FDT_END + DIV_ROUND_UP(MAX_FDT_SIZE, PAGE_SIZE) + 1,
FIX_EARLYCON_MEM_BASE,
FIX_TEXT_POKE0,
@@ -63,9 +59,13 @@ enum fixed_addresses {
#endif /* CONFIG_ACPI_APEI_GHES */
#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
- FIX_ENTRY_TRAMP_DATA,
- FIX_ENTRY_TRAMP_TEXT,
-#define TRAMP_VALIAS (__fix_to_virt(FIX_ENTRY_TRAMP_TEXT))
+#ifdef CONFIG_RELOCATABLE
+ FIX_ENTRY_TRAMP_TEXT4, /* one extra slot for the data page */
+#endif
+ FIX_ENTRY_TRAMP_TEXT3,
+ FIX_ENTRY_TRAMP_TEXT2,
+ FIX_ENTRY_TRAMP_TEXT1,
+#define TRAMP_VALIAS (__fix_to_virt(FIX_ENTRY_TRAMP_TEXT1))
#endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */
__end_of_permanent_fixed_addresses,
@@ -92,12 +92,15 @@ enum fixed_addresses {
__end_of_fixed_addresses
};
-#define FIXADDR_SIZE (__end_of_permanent_fixed_addresses << PAGE_SHIFT)
-#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE)
+#define FIXADDR_SIZE (__end_of_permanent_fixed_addresses << PAGE_SHIFT)
+#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE)
+#define FIXADDR_TOT_SIZE (__end_of_fixed_addresses << PAGE_SHIFT)
+#define FIXADDR_TOT_START (FIXADDR_TOP - FIXADDR_TOT_SIZE)
#define FIXMAP_PAGE_IO __pgprot(PROT_DEVICE_nGnRE)
void __init early_fixmap_init(void);
+void __init fixmap_copy(pgd_t *pgdir);
#define __early_set_fixmap __set_fixmap
diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h
index 59f10dd13f12..50e5f25d3024 100644
--- a/arch/arm64/include/asm/fpsimd.h
+++ b/arch/arm64/include/asm/fpsimd.h
@@ -32,6 +32,44 @@
#define VFP_STATE_SIZE ((32 * 8) + 4)
#endif
+static inline unsigned long cpacr_save_enable_kernel_sve(void)
+{
+ unsigned long old = read_sysreg(cpacr_el1);
+ unsigned long set = CPACR_EL1_FPEN_EL1EN | CPACR_EL1_ZEN_EL1EN;
+
+ write_sysreg(old | set, cpacr_el1);
+ isb();
+ return old;
+}
+
+static inline unsigned long cpacr_save_enable_kernel_sme(void)
+{
+ unsigned long old = read_sysreg(cpacr_el1);
+ unsigned long set = CPACR_EL1_FPEN_EL1EN | CPACR_EL1_SMEN_EL1EN;
+
+ write_sysreg(old | set, cpacr_el1);
+ isb();
+ return old;
+}
+
+static inline void cpacr_restore(unsigned long cpacr)
+{
+ write_sysreg(cpacr, cpacr_el1);
+ isb();
+}
+
+/*
+ * When we defined the maximum SVE vector length we defined the ABI so
+ * that the maximum vector length included all the reserved for future
+ * expansion bits in ZCR rather than those just currently defined by
+ * the architecture. While SME follows a similar pattern the fact that
+ * it includes a square matrix means that any allocations that attempt
+ * to cover the maximum potential vector length (such as happen with
+ * the regset used for ptrace) end up being extremely large. Define
+ * the much lower actual limit for use in such situations.
+ */
+#define SME_VQ_MAX 16
+
struct task_struct;
extern void fpsimd_save_state(struct user_fpsimd_state *state);
@@ -44,16 +82,36 @@ extern void fpsimd_signal_preserve_current_state(void);
extern void fpsimd_preserve_current_state(void);
extern void fpsimd_restore_current_state(void);
extern void fpsimd_update_current_state(struct user_fpsimd_state const *state);
+extern void fpsimd_kvm_prepare(void);
+
+struct cpu_fp_state {
+ struct user_fpsimd_state *st;
+ void *sve_state;
+ void *sme_state;
+ u64 *svcr;
+ unsigned int sve_vl;
+ unsigned int sme_vl;
+ enum fp_type *fp_type;
+ enum fp_type to_save;
+};
-extern void fpsimd_bind_task_to_cpu(void);
-extern void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *state,
- void *sve_state, unsigned int sve_vl);
+extern void fpsimd_bind_state_to_cpu(struct cpu_fp_state *fp_state);
extern void fpsimd_flush_task_state(struct task_struct *target);
extern void fpsimd_save_and_flush_cpu_state(void);
-/* Maximum VL that SVE VL-agnostic software can transparently support */
-#define SVE_VL_ARCH_MAX 0x100
+static inline bool thread_sm_enabled(struct thread_struct *thread)
+{
+ return system_supports_sme() && (thread->svcr & SVCR_SM_MASK);
+}
+
+static inline bool thread_za_enabled(struct thread_struct *thread)
+{
+ return system_supports_sme() && (thread->svcr & SVCR_ZA_MASK);
+}
+
+/* Maximum VL that SVE/SME VL-agnostic software can transparently support */
+#define VL_ARCH_MAX 0x100
/* Offset of FFR in the SVE register dump */
static inline size_t sve_ffr_offset(int vl)
@@ -63,22 +121,41 @@ static inline size_t sve_ffr_offset(int vl)
static inline void *sve_pffr(struct thread_struct *thread)
{
- return (char *)thread->sve_state + sve_ffr_offset(thread->sve_vl);
+ unsigned int vl;
+
+ if (system_supports_sme() && thread_sm_enabled(thread))
+ vl = thread_get_sme_vl(thread);
+ else
+ vl = thread_get_sve_vl(thread);
+
+ return (char *)thread->sve_state + sve_ffr_offset(vl);
}
-extern void sve_save_state(void *state, u32 *pfpsr);
+static inline void *thread_zt_state(struct thread_struct *thread)
+{
+ /* The ZT register state is stored immediately after the ZA state */
+ unsigned int sme_vq = sve_vq_from_vl(thread_get_sme_vl(thread));
+ return thread->sme_state + ZA_SIG_REGS_SIZE(sme_vq);
+}
+
+extern void sve_save_state(void *state, u32 *pfpsr, int save_ffr);
extern void sve_load_state(void const *state, u32 const *pfpsr,
- unsigned long vq_minus_1);
+ int restore_ffr);
+extern void sve_flush_live(bool flush_ffr, unsigned long vq_minus_1);
extern unsigned int sve_get_vl(void);
+extern void sve_set_vq(unsigned long vq_minus_1);
+extern void sme_set_vq(unsigned long vq_minus_1);
+extern void sme_save_state(void *state, int zt);
+extern void sme_load_state(void const *state, int zt);
struct arm64_cpu_capabilities;
-extern void sve_kernel_enable(const struct arm64_cpu_capabilities *__unused);
-
-extern u64 read_zcr_features(void);
+extern void cpu_enable_fpsimd(const struct arm64_cpu_capabilities *__unused);
+extern void cpu_enable_sve(const struct arm64_cpu_capabilities *__unused);
+extern void cpu_enable_sme(const struct arm64_cpu_capabilities *__unused);
+extern void cpu_enable_sme2(const struct arm64_cpu_capabilities *__unused);
+extern void cpu_enable_fa64(const struct arm64_cpu_capabilities *__unused);
-extern int __ro_after_init sve_max_vl;
-extern int __ro_after_init sve_max_virtualisable_vl;
-extern __ro_after_init DECLARE_BITMAP(sve_vq_map, SVE_VQ_MAX);
+extern u64 read_smcr_features(void);
/*
* Helpers to translate bit indices in sve_vq_map to VQ values (and
@@ -95,23 +172,38 @@ static inline unsigned int __bit_to_vq(unsigned int bit)
return SVE_VQ_MAX - bit;
}
-/* Ensure vq >= SVE_VQ_MIN && vq <= SVE_VQ_MAX before calling this function */
-static inline bool sve_vq_available(unsigned int vq)
-{
- return test_bit(__vq_to_bit(vq), sve_vq_map);
-}
-#ifdef CONFIG_ARM64_SVE
+struct vl_info {
+ enum vec_type type;
+ const char *name; /* For display purposes */
+
+ /* Minimum supported vector length across all CPUs */
+ int min_vl;
+
+ /* Maximum supported vector length across all CPUs */
+ int max_vl;
+ int max_virtualisable_vl;
-extern size_t sve_state_size(struct task_struct const *task);
+ /*
+ * Set of available vector lengths,
+ * where length vq encoded as bit __vq_to_bit(vq):
+ */
+ DECLARE_BITMAP(vq_map, SVE_VQ_MAX);
-extern void sve_alloc(struct task_struct *task);
+ /* Set of vector lengths present on at least one cpu: */
+ DECLARE_BITMAP(vq_partial_map, SVE_VQ_MAX);
+};
+
+#ifdef CONFIG_ARM64_SVE
+
+extern void sve_alloc(struct task_struct *task, bool flush);
extern void fpsimd_release_task(struct task_struct *task);
extern void fpsimd_sync_to_sve(struct task_struct *task);
+extern void fpsimd_force_sync_to_sve(struct task_struct *task);
extern void sve_sync_to_fpsimd(struct task_struct *task);
extern void sve_sync_from_fpsimd_zeropad(struct task_struct *task);
-extern int sve_set_vector_length(struct task_struct *task,
+extern int vec_set_vector_length(struct task_struct *task, enum vec_type type,
unsigned long vl, unsigned long flags);
extern int sve_set_current_vl(unsigned long arg);
@@ -127,22 +219,96 @@ static inline void sve_user_enable(void)
sysreg_clear_set(cpacr_el1, 0, CPACR_EL1_ZEN_EL0EN);
}
+#define sve_cond_update_zcr_vq(val, reg) \
+ do { \
+ u64 __zcr = read_sysreg_s((reg)); \
+ u64 __new = __zcr & ~ZCR_ELx_LEN_MASK; \
+ __new |= (val) & ZCR_ELx_LEN_MASK; \
+ if (__zcr != __new) \
+ write_sysreg_s(__new, (reg)); \
+ } while (0)
+
/*
* Probing and setup functions.
* Calls to these functions must be serialised with one another.
*/
-extern void __init sve_init_vq_map(void);
-extern void sve_update_vq_map(void);
-extern int sve_verify_vq_map(void);
+enum vec_type;
+
+extern void __init vec_init_vq_map(enum vec_type type);
+extern void vec_update_vq_map(enum vec_type type);
+extern int vec_verify_vq_map(enum vec_type type);
extern void __init sve_setup(void);
+extern __ro_after_init struct vl_info vl_info[ARM64_VEC_MAX];
+
+static inline void write_vl(enum vec_type type, u64 val)
+{
+ u64 tmp;
+
+ switch (type) {
+#ifdef CONFIG_ARM64_SVE
+ case ARM64_VEC_SVE:
+ tmp = read_sysreg_s(SYS_ZCR_EL1) & ~ZCR_ELx_LEN_MASK;
+ write_sysreg_s(tmp | val, SYS_ZCR_EL1);
+ break;
+#endif
+#ifdef CONFIG_ARM64_SME
+ case ARM64_VEC_SME:
+ tmp = read_sysreg_s(SYS_SMCR_EL1) & ~SMCR_ELx_LEN_MASK;
+ write_sysreg_s(tmp | val, SYS_SMCR_EL1);
+ break;
+#endif
+ default:
+ WARN_ON_ONCE(1);
+ break;
+ }
+}
+
+static inline int vec_max_vl(enum vec_type type)
+{
+ return vl_info[type].max_vl;
+}
+
+static inline int vec_max_virtualisable_vl(enum vec_type type)
+{
+ return vl_info[type].max_virtualisable_vl;
+}
+
+static inline int sve_max_vl(void)
+{
+ return vec_max_vl(ARM64_VEC_SVE);
+}
+
+static inline int sve_max_virtualisable_vl(void)
+{
+ return vec_max_virtualisable_vl(ARM64_VEC_SVE);
+}
+
+/* Ensure vq >= SVE_VQ_MIN && vq <= SVE_VQ_MAX before calling this function */
+static inline bool vq_available(enum vec_type type, unsigned int vq)
+{
+ return test_bit(__vq_to_bit(vq), vl_info[type].vq_map);
+}
+
+static inline bool sve_vq_available(unsigned int vq)
+{
+ return vq_available(ARM64_VEC_SVE, vq);
+}
+
+size_t sve_state_size(struct task_struct const *task);
+
#else /* ! CONFIG_ARM64_SVE */
-static inline void sve_alloc(struct task_struct *task) { }
+static inline void sve_alloc(struct task_struct *task, bool flush) { }
static inline void fpsimd_release_task(struct task_struct *task) { }
static inline void sve_sync_to_fpsimd(struct task_struct *task) { }
static inline void sve_sync_from_fpsimd_zeropad(struct task_struct *task) { }
+static inline int sve_max_virtualisable_vl(void)
+{
+ return 0;
+}
+
static inline int sve_set_current_vl(unsigned long arg)
{
return -EINVAL;
@@ -153,16 +319,116 @@ static inline int sve_get_current_vl(void)
return -EINVAL;
}
+static inline int sve_max_vl(void)
+{
+ return -EINVAL;
+}
+
+static inline bool sve_vq_available(unsigned int vq) { return false; }
+
static inline void sve_user_disable(void) { BUILD_BUG(); }
static inline void sve_user_enable(void) { BUILD_BUG(); }
-static inline void sve_init_vq_map(void) { }
-static inline void sve_update_vq_map(void) { }
-static inline int sve_verify_vq_map(void) { return 0; }
+#define sve_cond_update_zcr_vq(val, reg) do { } while (0)
+
+static inline void vec_init_vq_map(enum vec_type t) { }
+static inline void vec_update_vq_map(enum vec_type t) { }
+static inline int vec_verify_vq_map(enum vec_type t) { return 0; }
static inline void sve_setup(void) { }
+static inline size_t sve_state_size(struct task_struct const *task)
+{
+ return 0;
+}
+
#endif /* ! CONFIG_ARM64_SVE */
+#ifdef CONFIG_ARM64_SME
+
+static inline void sme_user_disable(void)
+{
+ sysreg_clear_set(cpacr_el1, CPACR_EL1_SMEN_EL0EN, 0);
+}
+
+static inline void sme_user_enable(void)
+{
+ sysreg_clear_set(cpacr_el1, 0, CPACR_EL1_SMEN_EL0EN);
+}
+
+static inline void sme_smstart_sm(void)
+{
+ asm volatile(__msr_s(SYS_SVCR_SMSTART_SM_EL0, "xzr"));
+}
+
+static inline void sme_smstop_sm(void)
+{
+ asm volatile(__msr_s(SYS_SVCR_SMSTOP_SM_EL0, "xzr"));
+}
+
+static inline void sme_smstop(void)
+{
+ asm volatile(__msr_s(SYS_SVCR_SMSTOP_SMZA_EL0, "xzr"));
+}
+
+extern void __init sme_setup(void);
+
+static inline int sme_max_vl(void)
+{
+ return vec_max_vl(ARM64_VEC_SME);
+}
+
+static inline int sme_max_virtualisable_vl(void)
+{
+ return vec_max_virtualisable_vl(ARM64_VEC_SME);
+}
+
+extern void sme_alloc(struct task_struct *task, bool flush);
+extern unsigned int sme_get_vl(void);
+extern int sme_set_current_vl(unsigned long arg);
+extern int sme_get_current_vl(void);
+
+/*
+ * Return how many bytes of memory are required to store the full SME
+ * specific state for task, given task's currently configured vector
+ * length.
+ */
+static inline size_t sme_state_size(struct task_struct const *task)
+{
+ unsigned int vl = task_get_sme_vl(task);
+ size_t size;
+
+ size = ZA_SIG_REGS_SIZE(sve_vq_from_vl(vl));
+
+ if (system_supports_sme2())
+ size += ZT_SIG_REG_SIZE;
+
+ return size;
+}
+
+#else
+
+static inline void sme_user_disable(void) { BUILD_BUG(); }
+static inline void sme_user_enable(void) { BUILD_BUG(); }
+
+static inline void sme_smstart_sm(void) { }
+static inline void sme_smstop_sm(void) { }
+static inline void sme_smstop(void) { }
+
+static inline void sme_alloc(struct task_struct *task, bool flush) { }
+static inline void sme_setup(void) { }
+static inline unsigned int sme_get_vl(void) { return 0; }
+static inline int sme_max_vl(void) { return 0; }
+static inline int sme_max_virtualisable_vl(void) { return 0; }
+static inline int sme_set_current_vl(unsigned long arg) { return -EINVAL; }
+static inline int sme_get_current_vl(void) { return -EINVAL; }
+
+static inline size_t sme_state_size(struct task_struct const *task)
+{
+ return 0;
+}
+
+#endif /* ! CONFIG_ARM64_SME */
+
/* For use by EFI runtime services calls only */
extern void __efi_fpsimd_begin(void);
extern void __efi_fpsimd_end(void);
diff --git a/arch/arm64/include/asm/fpsimdmacros.h b/arch/arm64/include/asm/fpsimdmacros.h
index 636e9d9c7929..cda81d009c9b 100644
--- a/arch/arm64/include/asm/fpsimdmacros.h
+++ b/arch/arm64/include/asm/fpsimdmacros.h
@@ -6,6 +6,8 @@
* Author: Catalin Marinas <catalin.marinas@arm.com>
*/
+#include <asm/assembler.h>
+
.macro fpsimd_save state, tmpnr
stp q0, q1, [\state, #16 * 0]
stp q2, q3, [\state, #16 * 2]
@@ -91,7 +93,14 @@
.endif
.endm
+.macro _sme_check_wv v
+ .if (\v) < 12 || (\v) > 15
+ .error "Bad vector select register \v."
+ .endif
+.endm
+
/* SVE instruction encodings for non-SVE-capable assemblers */
+/* (pre binutils 2.28, all kernel capable clang versions support SVE) */
/* STR (vector): STR Z\nz, [X\nxbase, #\offset, MUL VL] */
.macro _sve_str_v nz, nxbase, offset=0
@@ -164,49 +173,159 @@
| ((\np) << 5)
.endm
+/* PFALSE P\np.B */
+.macro _sve_pfalse np
+ _sve_check_preg \np
+ .inst 0x2518e400 \
+ | (\np)
+.endm
+
+/* SME instruction encodings for non-SME-capable assemblers */
+/* (pre binutils 2.38/LLVM 13) */
+
+/* RDSVL X\nx, #\imm */
+.macro _sme_rdsvl nx, imm
+ _check_general_reg \nx
+ _check_num (\imm), -0x20, 0x1f
+ .inst 0x04bf5800 \
+ | (\nx) \
+ | (((\imm) & 0x3f) << 5)
+.endm
+
+/*
+ * STR (vector from ZA array):
+ * STR ZA[\nw, #\offset], [X\nxbase, #\offset, MUL VL]
+ */
+.macro _sme_str_zav nw, nxbase, offset=0
+ _sme_check_wv \nw
+ _check_general_reg \nxbase
+ _check_num (\offset), -0x100, 0xff
+ .inst 0xe1200000 \
+ | (((\nw) & 3) << 13) \
+ | ((\nxbase) << 5) \
+ | ((\offset) & 7)
+.endm
+
+/*
+ * LDR (vector to ZA array):
+ * LDR ZA[\nw, #\offset], [X\nxbase, #\offset, MUL VL]
+ */
+.macro _sme_ldr_zav nw, nxbase, offset=0
+ _sme_check_wv \nw
+ _check_general_reg \nxbase
+ _check_num (\offset), -0x100, 0xff
+ .inst 0xe1000000 \
+ | (((\nw) & 3) << 13) \
+ | ((\nxbase) << 5) \
+ | ((\offset) & 7)
+.endm
+
+/*
+ * LDR (ZT0)
+ *
+ * LDR ZT0, nx
+ */
+.macro _ldr_zt nx
+ _check_general_reg \nx
+ .inst 0xe11f8000 \
+ | (\nx << 5)
+.endm
+
+/*
+ * STR (ZT0)
+ *
+ * STR ZT0, nx
+ */
+.macro _str_zt nx
+ _check_general_reg \nx
+ .inst 0xe13f8000 \
+ | (\nx << 5)
+.endm
+
.macro __for from:req, to:req
.if (\from) == (\to)
- _for__body \from
+ _for__body %\from
.else
- __for \from, (\from) + ((\to) - (\from)) / 2
- __for (\from) + ((\to) - (\from)) / 2 + 1, \to
+ __for %\from, %((\from) + ((\to) - (\from)) / 2)
+ __for %((\from) + ((\to) - (\from)) / 2 + 1), %\to
.endif
.endm
.macro _for var:req, from:req, to:req, insn:vararg
.macro _for__body \var:req
+ .noaltmacro
\insn
+ .altmacro
.endm
+ .altmacro
__for \from, \to
+ .noaltmacro
.purgem _for__body
.endm
-.macro sve_save nxbase, xpfpsr, nxtmp
+/* Update ZCR_EL1.LEN with the new VQ */
+.macro sve_load_vq xvqminus1, xtmp, xtmp2
+ mrs_s \xtmp, SYS_ZCR_EL1
+ bic \xtmp2, \xtmp, ZCR_ELx_LEN_MASK
+ orr \xtmp2, \xtmp2, \xvqminus1
+ cmp \xtmp2, \xtmp
+ b.eq 921f
+ msr_s SYS_ZCR_EL1, \xtmp2 //self-synchronising
+921:
+.endm
+
+/* Update SMCR_EL1.LEN with the new VQ */
+.macro sme_load_vq xvqminus1, xtmp, xtmp2
+ mrs_s \xtmp, SYS_SMCR_EL1
+ bic \xtmp2, \xtmp, SMCR_ELx_LEN_MASK
+ orr \xtmp2, \xtmp2, \xvqminus1
+ cmp \xtmp2, \xtmp
+ b.eq 921f
+ msr_s SYS_SMCR_EL1, \xtmp2 //self-synchronising
+921:
+.endm
+
+/* Preserve the first 128-bits of Znz and zero the rest. */
+.macro _sve_flush_z nz
+ _sve_check_zreg \nz
+ mov v\nz\().16b, v\nz\().16b
+.endm
+
+.macro sve_flush_z
+ _for n, 0, 31, _sve_flush_z \n
+.endm
+.macro sve_flush_p
+ _for n, 0, 15, _sve_pfalse \n
+.endm
+.macro sve_flush_ffr
+ _sve_wrffr 0
+.endm
+
+.macro sve_save nxbase, xpfpsr, save_ffr, nxtmp
_for n, 0, 31, _sve_str_v \n, \nxbase, \n - 34
_for n, 0, 15, _sve_str_p \n, \nxbase, \n - 16
+ cbz \save_ffr, 921f
_sve_rdffr 0
+ b 922f
+921:
+ _sve_pfalse 0 // Zero out FFR
+922:
_sve_str_p 0, \nxbase
_sve_ldr_p 0, \nxbase, -16
-
mrs x\nxtmp, fpsr
str w\nxtmp, [\xpfpsr]
mrs x\nxtmp, fpcr
str w\nxtmp, [\xpfpsr, #4]
.endm
-.macro sve_load nxbase, xpfpsr, xvqminus1, nxtmp, xtmp2
- mrs_s x\nxtmp, SYS_ZCR_EL1
- bic \xtmp2, x\nxtmp, ZCR_ELx_LEN_MASK
- orr \xtmp2, \xtmp2, \xvqminus1
- cmp \xtmp2, x\nxtmp
- b.eq 921f
- msr_s SYS_ZCR_EL1, \xtmp2 // self-synchronising
-921:
+.macro sve_load nxbase, xpfpsr, restore_ffr, nxtmp
_for n, 0, 31, _sve_ldr_v \n, \nxbase, \n - 34
+ cbz \restore_ffr, 921f
_sve_ldr_p 0, \nxbase
_sve_wrffr 0
+921:
_for n, 0, 15, _sve_ldr_p \n, \nxbase, \n - 16
ldr w\nxtmp, [\xpfpsr]
@@ -214,3 +333,25 @@
ldr w\nxtmp, [\xpfpsr, #4]
msr fpcr, x\nxtmp
.endm
+
+.macro sme_save_za nxbase, xvl, nw
+ mov w\nw, #0
+
+423:
+ _sme_str_zav \nw, \nxbase
+ add x\nxbase, x\nxbase, \xvl
+ add x\nw, x\nw, #1
+ cmp \xvl, x\nw
+ bne 423b
+.endm
+
+.macro sme_load_za nxbase, xvl, nw
+ mov w\nw, #0
+
+423:
+ _sme_ldr_zav \nw, \nxbase
+ add x\nxbase, x\nxbase, \xvl
+ add x\nw, x\nw, #1
+ cmp \xvl, x\nw
+ bne 423b
+.endm
diff --git a/arch/arm64/include/asm/ftrace.h b/arch/arm64/include/asm/ftrace.h
index 91fa4baa1a93..ab158196480c 100644
--- a/arch/arm64/include/asm/ftrace.h
+++ b/arch/arm64/include/asm/ftrace.h
@@ -12,7 +12,18 @@
#define HAVE_FUNCTION_GRAPH_FP_TEST
-#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+/*
+ * HAVE_FUNCTION_GRAPH_RET_ADDR_PTR means that the architecture can provide a
+ * "return address pointer" which can be used to uniquely identify a return
+ * address which has been overwritten.
+ *
+ * On arm64 we use the address of the caller's frame record, which remains the
+ * same for the lifetime of the instrumented function, unlike the return
+ * address in the LR.
+ */
+#define HAVE_FUNCTION_GRAPH_RET_ADDR_PTR
+
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_ARGS
#define ARCH_SUPPORTS_FTRACE_OPS 1
#else
#define MCOUNT_ADDR ((unsigned long)_mcount)
@@ -22,8 +33,7 @@
#define MCOUNT_INSN_SIZE AARCH64_INSN_SIZE
#define FTRACE_PLT_IDX 0
-#define FTRACE_REGS_PLT_IDX 1
-#define NR_FTRACE_PLTS 2
+#define NR_FTRACE_PLTS 1
/*
* Currently, gcc tends to save the link register after the local variables
@@ -52,25 +62,102 @@ extern unsigned long ftrace_graph_call;
extern void return_to_handler(void);
-static inline unsigned long ftrace_call_adjust(unsigned long addr)
+unsigned long ftrace_call_adjust(unsigned long addr);
+
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_ARGS
+struct dyn_ftrace;
+struct ftrace_ops;
+
+#define arch_ftrace_get_regs(regs) NULL
+
+/*
+ * Note: sizeof(struct ftrace_regs) must be a multiple of 16 to ensure correct
+ * stack alignment
+ */
+struct ftrace_regs {
+ /* x0 - x8 */
+ unsigned long regs[9];
+
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
+ unsigned long direct_tramp;
+#else
+ unsigned long __unused;
+#endif
+
+ unsigned long fp;
+ unsigned long lr;
+
+ unsigned long sp;
+ unsigned long pc;
+};
+
+static __always_inline unsigned long
+ftrace_regs_get_instruction_pointer(const struct ftrace_regs *fregs)
{
- /*
- * Adjust addr to point at the BL in the callsite.
- * See ftrace_init_nop() for the callsite sequence.
- */
- if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS))
- return addr + AARCH64_INSN_SIZE;
- /*
- * addr is the address of the mcount call instruction.
- * recordmcount does the necessary offset calculation.
- */
- return addr;
+ return fregs->pc;
}
-#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
-struct dyn_ftrace;
+static __always_inline void
+ftrace_regs_set_instruction_pointer(struct ftrace_regs *fregs,
+ unsigned long pc)
+{
+ fregs->pc = pc;
+}
+
+static __always_inline unsigned long
+ftrace_regs_get_stack_pointer(const struct ftrace_regs *fregs)
+{
+ return fregs->sp;
+}
+
+static __always_inline unsigned long
+ftrace_regs_get_argument(struct ftrace_regs *fregs, unsigned int n)
+{
+ if (n < 8)
+ return fregs->regs[n];
+ return 0;
+}
+
+static __always_inline unsigned long
+ftrace_regs_get_return_value(const struct ftrace_regs *fregs)
+{
+ return fregs->regs[0];
+}
+
+static __always_inline void
+ftrace_regs_set_return_value(struct ftrace_regs *fregs,
+ unsigned long ret)
+{
+ fregs->regs[0] = ret;
+}
+
+static __always_inline void
+ftrace_override_function_with_return(struct ftrace_regs *fregs)
+{
+ fregs->pc = fregs->lr;
+}
+
+int ftrace_regs_query_register_offset(const char *name);
+
int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec);
#define ftrace_init_nop ftrace_init_nop
+
+void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
+ struct ftrace_ops *op, struct ftrace_regs *fregs);
+#define ftrace_graph_func ftrace_graph_func
+
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
+static inline void arch_ftrace_set_direct_caller(struct ftrace_regs *fregs,
+ unsigned long addr)
+{
+ /*
+ * The ftrace trampoline will return to this address instead of the
+ * instrumented function.
+ */
+ fregs->direct_tramp = addr;
+}
+#endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */
+
#endif
#define ftrace_return_address(n) return_address(n)
@@ -105,4 +192,30 @@ static inline bool arch_syscall_match_sym_name(const char *sym,
}
#endif /* ifndef __ASSEMBLY__ */
+#ifndef __ASSEMBLY__
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+struct fgraph_ret_regs {
+ /* x0 - x7 */
+ unsigned long regs[8];
+
+ unsigned long fp;
+ unsigned long __unused;
+};
+
+static inline unsigned long fgraph_ret_regs_return_value(struct fgraph_ret_regs *ret_regs)
+{
+ return ret_regs->regs[0];
+}
+
+static inline unsigned long fgraph_ret_regs_frame_pointer(struct fgraph_ret_regs *ret_regs)
+{
+ return ret_regs->fp;
+}
+
+void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent,
+ unsigned long frame_pointer);
+
+#endif /* ifdef CONFIG_FUNCTION_GRAPH_TRACER */
+#endif
+
#endif /* __ASM_FTRACE_H */
diff --git a/arch/arm64/include/asm/futex.h b/arch/arm64/include/asm/futex.h
index 6cc26a127819..bc06691d2062 100644
--- a/arch/arm64/include/asm/futex.h
+++ b/arch/arm64/include/asm/futex.h
@@ -16,7 +16,7 @@
do { \
unsigned int loops = FUTEX_MAX_LOOPS; \
\
- uaccess_enable(); \
+ uaccess_enable_privileged(); \
asm volatile( \
" prfm pstl1strm, %2\n" \
"1: ldxr %w1, %2\n" \
@@ -25,21 +25,16 @@ do { \
" cbz %w0, 3f\n" \
" sub %w4, %w4, %w0\n" \
" cbnz %w4, 1b\n" \
-" mov %w0, %w7\n" \
+" mov %w0, %w6\n" \
"3:\n" \
" dmb ish\n" \
-" .pushsection .fixup,\"ax\"\n" \
-" .align 2\n" \
-"4: mov %w0, %w6\n" \
-" b 3b\n" \
-" .popsection\n" \
- _ASM_EXTABLE(1b, 4b) \
- _ASM_EXTABLE(2b, 4b) \
+ _ASM_EXTABLE_UACCESS_ERR(1b, 3b, %w0) \
+ _ASM_EXTABLE_UACCESS_ERR(2b, 3b, %w0) \
: "=&r" (ret), "=&r" (oldval), "+Q" (*uaddr), "=&r" (tmp), \
"+r" (loops) \
- : "r" (oparg), "Ir" (-EFAULT), "Ir" (-EAGAIN) \
+ : "r" (oparg), "Ir" (-EAGAIN) \
: "memory"); \
- uaccess_disable(); \
+ uaccess_disable_privileged(); \
} while (0)
static inline int
@@ -48,7 +43,8 @@ arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *_uaddr)
int oldval = 0, ret, tmp;
u32 __user *uaddr = __uaccess_mask_ptr(_uaddr);
- pagefault_disable();
+ if (!access_ok(_uaddr, sizeof(u32)))
+ return -EFAULT;
switch (op) {
case FUTEX_OP_SET:
@@ -75,8 +71,6 @@ arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *_uaddr)
ret = -ENOSYS;
}
- pagefault_enable();
-
if (!ret)
*oval = oldval;
@@ -96,7 +90,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *_uaddr,
return -EFAULT;
uaddr = __uaccess_mask_ptr(_uaddr);
- uaccess_enable();
+ uaccess_enable_privileged();
asm volatile("// futex_atomic_cmpxchg_inatomic\n"
" prfm pstl1strm, %2\n"
"1: ldxr %w1, %2\n"
@@ -106,20 +100,16 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *_uaddr,
" cbz %w3, 3f\n"
" sub %w4, %w4, %w3\n"
" cbnz %w4, 1b\n"
-" mov %w0, %w8\n"
+" mov %w0, %w7\n"
"3:\n"
" dmb ish\n"
"4:\n"
-" .pushsection .fixup,\"ax\"\n"
-"5: mov %w0, %w7\n"
-" b 4b\n"
-" .popsection\n"
- _ASM_EXTABLE(1b, 5b)
- _ASM_EXTABLE(2b, 5b)
+ _ASM_EXTABLE_UACCESS_ERR(1b, 4b, %w0)
+ _ASM_EXTABLE_UACCESS_ERR(2b, 4b, %w0)
: "+r" (ret), "=&r" (val), "+Q" (*uaddr), "=&r" (tmp), "+r" (loops)
- : "r" (oldval), "r" (newval), "Ir" (-EFAULT), "Ir" (-EAGAIN)
+ : "r" (oldval), "r" (newval), "Ir" (-EAGAIN)
: "memory");
- uaccess_disable();
+ uaccess_disable_privileged();
if (!ret)
*uval = val;
diff --git a/arch/arm64/include/asm/gpr-num.h b/arch/arm64/include/asm/gpr-num.h
new file mode 100644
index 000000000000..05da4a7c5788
--- /dev/null
+++ b/arch/arm64/include/asm/gpr-num.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef __ASM_GPR_NUM_H
+#define __ASM_GPR_NUM_H
+
+#ifdef __ASSEMBLY__
+
+ .irp num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30
+ .equ .L__gpr_num_x\num, \num
+ .equ .L__gpr_num_w\num, \num
+ .endr
+ .equ .L__gpr_num_xzr, 31
+ .equ .L__gpr_num_wzr, 31
+
+#else /* __ASSEMBLY__ */
+
+#define __DEFINE_ASM_GPR_NUMS \
+" .irp num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30\n" \
+" .equ .L__gpr_num_x\\num, \\num\n" \
+" .equ .L__gpr_num_w\\num, \\num\n" \
+" .endr\n" \
+" .equ .L__gpr_num_xzr, 31\n" \
+" .equ .L__gpr_num_wzr, 31\n"
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* __ASM_GPR_NUM_H */
diff --git a/arch/arm64/include/asm/hardirq.h b/arch/arm64/include/asm/hardirq.h
index 87ad961f3c97..cbfa7b6f2e09 100644
--- a/arch/arm64/include/asm/hardirq.h
+++ b/arch/arm64/include/asm/hardirq.h
@@ -13,49 +13,77 @@
#include <asm/kvm_arm.h>
#include <asm/sysreg.h>
-#define NR_IPI 7
-
-typedef struct {
- unsigned int __softirq_pending;
- unsigned int ipi_irqs[NR_IPI];
-} ____cacheline_aligned irq_cpustat_t;
-
-#include <linux/irq_cpustat.h> /* Standard mappings for irq_cpustat_t above */
-
-#define __inc_irq_stat(cpu, member) __IRQ_STAT(cpu, member)++
-#define __get_irq_stat(cpu, member) __IRQ_STAT(cpu, member)
-
-u64 smp_irq_stat_cpu(unsigned int cpu);
-#define arch_irq_stat_cpu smp_irq_stat_cpu
+#define ack_bad_irq ack_bad_irq
+#include <asm-generic/hardirq.h>
#define __ARCH_IRQ_EXIT_IRQS_DISABLED 1
struct nmi_ctx {
u64 hcr;
+ unsigned int cnt;
};
DECLARE_PER_CPU(struct nmi_ctx, nmi_contexts);
-#define arch_nmi_enter() \
- do { \
- if (is_kernel_in_hyp_mode()) { \
- struct nmi_ctx *nmi_ctx = this_cpu_ptr(&nmi_contexts); \
- nmi_ctx->hcr = read_sysreg(hcr_el2); \
- if (!(nmi_ctx->hcr & HCR_TGE)) { \
- write_sysreg(nmi_ctx->hcr | HCR_TGE, hcr_el2); \
- isb(); \
- } \
- } \
- } while (0)
+#define arch_nmi_enter() \
+do { \
+ struct nmi_ctx *___ctx; \
+ u64 ___hcr; \
+ \
+ if (!is_kernel_in_hyp_mode()) \
+ break; \
+ \
+ ___ctx = this_cpu_ptr(&nmi_contexts); \
+ if (___ctx->cnt) { \
+ ___ctx->cnt++; \
+ break; \
+ } \
+ \
+ ___hcr = read_sysreg(hcr_el2); \
+ if (!(___hcr & HCR_TGE)) { \
+ write_sysreg(___hcr | HCR_TGE, hcr_el2); \
+ isb(); \
+ } \
+ /* \
+ * Make sure the sysreg write is performed before ___ctx->cnt \
+ * is set to 1. NMIs that see cnt == 1 will rely on us. \
+ */ \
+ barrier(); \
+ ___ctx->cnt = 1; \
+ /* \
+ * Make sure ___ctx->cnt is set before we save ___hcr. We \
+ * don't want ___ctx->hcr to be overwritten. \
+ */ \
+ barrier(); \
+ ___ctx->hcr = ___hcr; \
+} while (0)
-#define arch_nmi_exit() \
- do { \
- if (is_kernel_in_hyp_mode()) { \
- struct nmi_ctx *nmi_ctx = this_cpu_ptr(&nmi_contexts); \
- if (!(nmi_ctx->hcr & HCR_TGE)) \
- write_sysreg(nmi_ctx->hcr, hcr_el2); \
- } \
- } while (0)
+#define arch_nmi_exit() \
+do { \
+ struct nmi_ctx *___ctx; \
+ u64 ___hcr; \
+ \
+ if (!is_kernel_in_hyp_mode()) \
+ break; \
+ \
+ ___ctx = this_cpu_ptr(&nmi_contexts); \
+ ___hcr = ___ctx->hcr; \
+ /* \
+ * Make sure we read ___ctx->hcr before we release \
+ * ___ctx->cnt as it makes ___ctx->hcr updatable again. \
+ */ \
+ barrier(); \
+ ___ctx->cnt--; \
+ /* \
+ * Make sure ___ctx->cnt release is visible before we \
+ * restore the sysreg. Otherwise a new NMI occurring \
+ * right after write_sysreg() can be fooled and think \
+ * we secured things for it. \
+ */ \
+ barrier(); \
+ if (!___ctx->cnt && !(___hcr & HCR_TGE)) \
+ write_sysreg(___hcr, hcr_el2); \
+} while (0)
static inline void ack_bad_irq(unsigned int irq)
{
diff --git a/arch/arm64/include/asm/hugetlb.h b/arch/arm64/include/asm/hugetlb.h
index 2eb6c234d594..2ddc33d93b13 100644
--- a/arch/arm64/include/asm/hugetlb.h
+++ b/arch/arm64/include/asm/hugetlb.h
@@ -10,6 +10,7 @@
#ifndef __ASM_HUGETLB_H
#define __ASM_HUGETLB_H
+#include <asm/cacheflush.h>
#include <asm/page.h>
#ifdef CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION
@@ -17,29 +18,17 @@
extern bool arch_hugetlb_migration_supported(struct hstate *h);
#endif
-#define __HAVE_ARCH_HUGE_PTEP_GET
-static inline pte_t huge_ptep_get(pte_t *ptep)
-{
- return READ_ONCE(*ptep);
-}
-
-static inline int is_hugepage_only_range(struct mm_struct *mm,
- unsigned long addr, unsigned long len)
-{
- return 0;
-}
-
static inline void arch_clear_hugepage_flags(struct page *page)
{
clear_bit(PG_dcache_clean, &page->flags);
}
+#define arch_clear_hugepage_flags arch_clear_hugepage_flags
-extern pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma,
- struct page *page, int writable);
+pte_t arch_make_huge_pte(pte_t entry, unsigned int shift, vm_flags_t flags);
#define arch_make_huge_pte arch_make_huge_pte
#define __HAVE_ARCH_HUGE_SET_HUGE_PTE_AT
extern void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
- pte_t *ptep, pte_t pte);
+ pte_t *ptep, pte_t pte, unsigned long sz);
#define __HAVE_ARCH_HUGE_PTEP_SET_ACCESS_FLAGS
extern int huge_ptep_set_access_flags(struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep,
@@ -51,15 +40,40 @@ extern pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
extern void huge_ptep_set_wrprotect(struct mm_struct *mm,
unsigned long addr, pte_t *ptep);
#define __HAVE_ARCH_HUGE_PTEP_CLEAR_FLUSH
-extern void huge_ptep_clear_flush(struct vm_area_struct *vma,
- unsigned long addr, pte_t *ptep);
+extern pte_t huge_ptep_clear_flush(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep);
#define __HAVE_ARCH_HUGE_PTE_CLEAR
extern void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, unsigned long sz);
-extern void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr,
- pte_t *ptep, pte_t pte, unsigned long sz);
-#define set_huge_swap_pte_at set_huge_swap_pte_at
+#define __HAVE_ARCH_HUGE_PTEP_GET
+extern pte_t huge_ptep_get(pte_t *ptep);
+
+void __init arm64_hugetlb_cma_reserve(void);
+
+#define huge_ptep_modify_prot_start huge_ptep_modify_prot_start
+extern pte_t huge_ptep_modify_prot_start(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep);
+
+#define huge_ptep_modify_prot_commit huge_ptep_modify_prot_commit
+extern void huge_ptep_modify_prot_commit(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep,
+ pte_t old_pte, pte_t new_pte);
#include <asm-generic/hugetlb.h>
+#define __HAVE_ARCH_FLUSH_HUGETLB_TLB_RANGE
+static inline void flush_hugetlb_tlb_range(struct vm_area_struct *vma,
+ unsigned long start,
+ unsigned long end)
+{
+ unsigned long stride = huge_page_size(hstate_vma(vma));
+
+ if (stride == PMD_SIZE)
+ __flush_tlb_range(vma, start, end, stride, false, 2);
+ else if (stride == PUD_SIZE)
+ __flush_tlb_range(vma, start, end, stride, false, 1);
+ else
+ __flush_tlb_range(vma, start, end, PAGE_SIZE, false, 0);
+}
+
#endif /* __ASM_HUGETLB_H */
diff --git a/arch/arm64/include/asm/hw_breakpoint.h b/arch/arm64/include/asm/hw_breakpoint.h
index bc7aaed4b34e..84055329cd8b 100644
--- a/arch/arm64/include/asm/hw_breakpoint.h
+++ b/arch/arm64/include/asm/hw_breakpoint.h
@@ -142,7 +142,7 @@ static inline int get_num_brps(void)
u64 dfr0 = read_sanitised_ftr_reg(SYS_ID_AA64DFR0_EL1);
return 1 +
cpuid_feature_extract_unsigned_field(dfr0,
- ID_AA64DFR0_BRPS_SHIFT);
+ ID_AA64DFR0_EL1_BRPs_SHIFT);
}
/* Determine number of WRP registers available. */
@@ -151,7 +151,15 @@ static inline int get_num_wrps(void)
u64 dfr0 = read_sanitised_ftr_reg(SYS_ID_AA64DFR0_EL1);
return 1 +
cpuid_feature_extract_unsigned_field(dfr0,
- ID_AA64DFR0_WRPS_SHIFT);
+ ID_AA64DFR0_EL1_WRPs_SHIFT);
}
+#ifdef CONFIG_CPU_PM
+extern void cpu_suspend_set_dbg_restorer(int (*hw_bp_restore)(unsigned int));
+#else
+static inline void cpu_suspend_set_dbg_restorer(int (*hw_bp_restore)(unsigned int))
+{
+}
+#endif
+
#endif /* __ASM_BREAKPOINT_H */
diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h
index 3d2f2472a36c..cd71e09ea14d 100644
--- a/arch/arm64/include/asm/hwcap.h
+++ b/arch/arm64/include/asm/hwcap.h
@@ -8,26 +8,43 @@
#include <uapi/asm/hwcap.h>
#include <asm/cpufeature.h>
+#define COMPAT_HWCAP_SWP (1 << 0)
#define COMPAT_HWCAP_HALF (1 << 1)
#define COMPAT_HWCAP_THUMB (1 << 2)
+#define COMPAT_HWCAP_26BIT (1 << 3)
#define COMPAT_HWCAP_FAST_MULT (1 << 4)
+#define COMPAT_HWCAP_FPA (1 << 5)
#define COMPAT_HWCAP_VFP (1 << 6)
#define COMPAT_HWCAP_EDSP (1 << 7)
+#define COMPAT_HWCAP_JAVA (1 << 8)
+#define COMPAT_HWCAP_IWMMXT (1 << 9)
+#define COMPAT_HWCAP_CRUNCH (1 << 10) /* Obsolete */
+#define COMPAT_HWCAP_THUMBEE (1 << 11)
#define COMPAT_HWCAP_NEON (1 << 12)
#define COMPAT_HWCAP_VFPv3 (1 << 13)
+#define COMPAT_HWCAP_VFPV3D16 (1 << 14)
#define COMPAT_HWCAP_TLS (1 << 15)
#define COMPAT_HWCAP_VFPv4 (1 << 16)
#define COMPAT_HWCAP_IDIVA (1 << 17)
#define COMPAT_HWCAP_IDIVT (1 << 18)
#define COMPAT_HWCAP_IDIV (COMPAT_HWCAP_IDIVA|COMPAT_HWCAP_IDIVT)
+#define COMPAT_HWCAP_VFPD32 (1 << 19)
#define COMPAT_HWCAP_LPAE (1 << 20)
#define COMPAT_HWCAP_EVTSTRM (1 << 21)
+#define COMPAT_HWCAP_FPHP (1 << 22)
+#define COMPAT_HWCAP_ASIMDHP (1 << 23)
+#define COMPAT_HWCAP_ASIMDDP (1 << 24)
+#define COMPAT_HWCAP_ASIMDFHM (1 << 25)
+#define COMPAT_HWCAP_ASIMDBF16 (1 << 26)
+#define COMPAT_HWCAP_I8MM (1 << 27)
#define COMPAT_HWCAP2_AES (1 << 0)
#define COMPAT_HWCAP2_PMULL (1 << 1)
#define COMPAT_HWCAP2_SHA1 (1 << 2)
#define COMPAT_HWCAP2_SHA2 (1 << 3)
#define COMPAT_HWCAP2_CRC32 (1 << 4)
+#define COMPAT_HWCAP2_SB (1 << 5)
+#define COMPAT_HWCAP2_SSBS (1 << 6)
#ifndef __ASSEMBLY__
#include <linux/log2.h>
@@ -76,7 +93,7 @@
#define KERNEL_HWCAP_PACA __khwcap_feature(PACA)
#define KERNEL_HWCAP_PACG __khwcap_feature(PACG)
-#define __khwcap2_feature(x) (const_ilog2(HWCAP2_ ## x) + 32)
+#define __khwcap2_feature(x) (const_ilog2(HWCAP2_ ## x) + 64)
#define KERNEL_HWCAP_DCPODP __khwcap2_feature(DCPODP)
#define KERNEL_HWCAP_SVE2 __khwcap2_feature(SVE2)
#define KERNEL_HWCAP_SVEAES __khwcap2_feature(SVEAES)
@@ -86,6 +103,45 @@
#define KERNEL_HWCAP_SVESM4 __khwcap2_feature(SVESM4)
#define KERNEL_HWCAP_FLAGM2 __khwcap2_feature(FLAGM2)
#define KERNEL_HWCAP_FRINT __khwcap2_feature(FRINT)
+#define KERNEL_HWCAP_SVEI8MM __khwcap2_feature(SVEI8MM)
+#define KERNEL_HWCAP_SVEF32MM __khwcap2_feature(SVEF32MM)
+#define KERNEL_HWCAP_SVEF64MM __khwcap2_feature(SVEF64MM)
+#define KERNEL_HWCAP_SVEBF16 __khwcap2_feature(SVEBF16)
+#define KERNEL_HWCAP_I8MM __khwcap2_feature(I8MM)
+#define KERNEL_HWCAP_BF16 __khwcap2_feature(BF16)
+#define KERNEL_HWCAP_DGH __khwcap2_feature(DGH)
+#define KERNEL_HWCAP_RNG __khwcap2_feature(RNG)
+#define KERNEL_HWCAP_BTI __khwcap2_feature(BTI)
+#define KERNEL_HWCAP_MTE __khwcap2_feature(MTE)
+#define KERNEL_HWCAP_ECV __khwcap2_feature(ECV)
+#define KERNEL_HWCAP_AFP __khwcap2_feature(AFP)
+#define KERNEL_HWCAP_RPRES __khwcap2_feature(RPRES)
+#define KERNEL_HWCAP_MTE3 __khwcap2_feature(MTE3)
+#define KERNEL_HWCAP_SME __khwcap2_feature(SME)
+#define KERNEL_HWCAP_SME_I16I64 __khwcap2_feature(SME_I16I64)
+#define KERNEL_HWCAP_SME_F64F64 __khwcap2_feature(SME_F64F64)
+#define KERNEL_HWCAP_SME_I8I32 __khwcap2_feature(SME_I8I32)
+#define KERNEL_HWCAP_SME_F16F32 __khwcap2_feature(SME_F16F32)
+#define KERNEL_HWCAP_SME_B16F32 __khwcap2_feature(SME_B16F32)
+#define KERNEL_HWCAP_SME_F32F32 __khwcap2_feature(SME_F32F32)
+#define KERNEL_HWCAP_SME_FA64 __khwcap2_feature(SME_FA64)
+#define KERNEL_HWCAP_WFXT __khwcap2_feature(WFXT)
+#define KERNEL_HWCAP_EBF16 __khwcap2_feature(EBF16)
+#define KERNEL_HWCAP_SVE_EBF16 __khwcap2_feature(SVE_EBF16)
+#define KERNEL_HWCAP_CSSC __khwcap2_feature(CSSC)
+#define KERNEL_HWCAP_RPRFM __khwcap2_feature(RPRFM)
+#define KERNEL_HWCAP_SVE2P1 __khwcap2_feature(SVE2P1)
+#define KERNEL_HWCAP_SME2 __khwcap2_feature(SME2)
+#define KERNEL_HWCAP_SME2P1 __khwcap2_feature(SME2P1)
+#define KERNEL_HWCAP_SME_I16I32 __khwcap2_feature(SME_I16I32)
+#define KERNEL_HWCAP_SME_BI32I32 __khwcap2_feature(SME_BI32I32)
+#define KERNEL_HWCAP_SME_B16B16 __khwcap2_feature(SME_B16B16)
+#define KERNEL_HWCAP_SME_F16F16 __khwcap2_feature(SME_F16F16)
+#define KERNEL_HWCAP_MOPS __khwcap2_feature(MOPS)
+#define KERNEL_HWCAP_HBC __khwcap2_feature(HBC)
+#define KERNEL_HWCAP_SVE_B16B16 __khwcap2_feature(SVE_B16B16)
+#define KERNEL_HWCAP_LRCPC3 __khwcap2_feature(LRCPC3)
+#define KERNEL_HWCAP_LSE128 __khwcap2_feature(LSE128)
/*
* This yields a mask that user programs can use to figure out what
diff --git a/arch/arm64/include/asm/hyp_image.h b/arch/arm64/include/asm/hyp_image.h
new file mode 100644
index 000000000000..b4b3076a76fb
--- /dev/null
+++ b/arch/arm64/include/asm/hyp_image.h
@@ -0,0 +1,68 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2020 Google LLC.
+ * Written by David Brazdil <dbrazdil@google.com>
+ */
+
+#ifndef __ARM64_HYP_IMAGE_H__
+#define __ARM64_HYP_IMAGE_H__
+
+#define __HYP_CONCAT(a, b) a ## b
+#define HYP_CONCAT(a, b) __HYP_CONCAT(a, b)
+
+#ifndef __KVM_NVHE_HYPERVISOR__
+/*
+ * KVM nVHE code has its own symbol namespace prefixed with __kvm_nvhe_,
+ * to separate it from the kernel proper.
+ */
+#define kvm_nvhe_sym(sym) __kvm_nvhe_##sym
+#else
+#define kvm_nvhe_sym(sym) sym
+#endif
+
+#ifdef LINKER_SCRIPT
+
+/*
+ * KVM nVHE ELF section names are prefixed with .hyp, to separate them
+ * from the kernel proper.
+ */
+#define HYP_SECTION_NAME(NAME) .hyp##NAME
+
+/* Symbol defined at the beginning of each hyp section. */
+#define HYP_SECTION_SYMBOL_NAME(NAME) \
+ HYP_CONCAT(__hyp_section_, HYP_SECTION_NAME(NAME))
+
+/*
+ * Helper to generate linker script statements starting a hyp section.
+ *
+ * A symbol with a well-known name is defined at the first byte. This
+ * is used as a base for hyp relocations (see gen-hyprel.c). It must
+ * be defined inside the section so the linker of `vmlinux` cannot
+ * separate it from the section data.
+ */
+#define BEGIN_HYP_SECTION(NAME) \
+ HYP_SECTION_NAME(NAME) : { \
+ HYP_SECTION_SYMBOL_NAME(NAME) = .;
+
+/* Helper to generate linker script statements ending a hyp section. */
+#define END_HYP_SECTION \
+ }
+
+/* Defines an ELF hyp section from input section @NAME and its subsections. */
+#define HYP_SECTION(NAME) \
+ BEGIN_HYP_SECTION(NAME) \
+ *(NAME NAME##.*) \
+ END_HYP_SECTION
+
+/*
+ * Defines a linker script alias of a kernel-proper symbol referenced by
+ * KVM nVHE hyp code.
+ */
+#define KVM_NVHE_ALIAS(sym) kvm_nvhe_sym(sym) = sym;
+
+/* Defines a linker script alias for KVM nVHE hyp symbols */
+#define KVM_NVHE_ALIAS_HYP(first, sec) kvm_nvhe_sym(first) = kvm_nvhe_sym(sec);
+
+#endif /* LINKER_SCRIPT */
+
+#endif /* __ARM64_HYP_IMAGE_H__ */
diff --git a/arch/arm64/include/asm/hyperv-tlfs.h b/arch/arm64/include/asm/hyperv-tlfs.h
new file mode 100644
index 000000000000..bc6c7ac934a1
--- /dev/null
+++ b/arch/arm64/include/asm/hyperv-tlfs.h
@@ -0,0 +1,78 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/*
+ * This file contains definitions from the Hyper-V Hypervisor Top-Level
+ * Functional Specification (TLFS):
+ * https://docs.microsoft.com/en-us/virtualization/hyper-v-on-windows/reference/tlfs
+ *
+ * Copyright (C) 2021, Microsoft, Inc.
+ *
+ * Author : Michael Kelley <mikelley@microsoft.com>
+ */
+
+#ifndef _ASM_HYPERV_TLFS_H
+#define _ASM_HYPERV_TLFS_H
+
+#include <linux/types.h>
+
+/*
+ * All data structures defined in the TLFS that are shared between Hyper-V
+ * and a guest VM use Little Endian byte ordering. This matches the default
+ * byte ordering of Linux running on ARM64, so no special handling is required.
+ */
+
+/*
+ * These Hyper-V registers provide information equivalent to the CPUID
+ * instruction on x86/x64.
+ */
+#define HV_REGISTER_HYPERVISOR_VERSION 0x00000100 /*CPUID 0x40000002 */
+#define HV_REGISTER_FEATURES 0x00000200 /*CPUID 0x40000003 */
+#define HV_REGISTER_ENLIGHTENMENTS 0x00000201 /*CPUID 0x40000004 */
+
+/*
+ * Group C Features. See the asm-generic version of hyperv-tlfs.h
+ * for a description of Feature Groups.
+ */
+
+/* Crash MSRs available */
+#define HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE BIT(8)
+
+/* STIMER direct mode is available */
+#define HV_STIMER_DIRECT_MODE_AVAILABLE BIT(13)
+
+/*
+ * Synthetic register definitions equivalent to MSRs on x86/x64
+ */
+#define HV_REGISTER_CRASH_P0 0x00000210
+#define HV_REGISTER_CRASH_P1 0x00000211
+#define HV_REGISTER_CRASH_P2 0x00000212
+#define HV_REGISTER_CRASH_P3 0x00000213
+#define HV_REGISTER_CRASH_P4 0x00000214
+#define HV_REGISTER_CRASH_CTL 0x00000215
+
+#define HV_REGISTER_GUEST_OSID 0x00090002
+#define HV_REGISTER_VP_INDEX 0x00090003
+#define HV_REGISTER_TIME_REF_COUNT 0x00090004
+#define HV_REGISTER_REFERENCE_TSC 0x00090017
+
+#define HV_REGISTER_SINT0 0x000A0000
+#define HV_REGISTER_SCONTROL 0x000A0010
+#define HV_REGISTER_SIEFP 0x000A0012
+#define HV_REGISTER_SIMP 0x000A0013
+#define HV_REGISTER_EOM 0x000A0014
+
+#define HV_REGISTER_STIMER0_CONFIG 0x000B0000
+#define HV_REGISTER_STIMER0_COUNT 0x000B0001
+
+union hv_msi_entry {
+ u64 as_uint64[2];
+ struct {
+ u64 address;
+ u32 data;
+ u32 reserved;
+ } __packed;
+};
+
+#include <asm-generic/hyperv-tlfs.h>
+
+#endif
diff --git a/arch/arm64/include/asm/hypervisor.h b/arch/arm64/include/asm/hypervisor.h
index f9cc1d021791..0ae427f352c8 100644
--- a/arch/arm64/include/asm/hypervisor.h
+++ b/arch/arm64/include/asm/hypervisor.h
@@ -4,4 +4,7 @@
#include <asm/xen/hypervisor.h>
+void kvm_init_hyp_services(void);
+bool kvm_arm_hyp_service_available(u32 func_id);
+
#endif
diff --git a/arch/arm64/include/asm/image.h b/arch/arm64/include/asm/image.h
index c2b13213c720..c09cf942dc92 100644
--- a/arch/arm64/include/asm/image.h
+++ b/arch/arm64/include/asm/image.h
@@ -27,7 +27,7 @@
/*
* struct arm64_image_header - arm64 kernel image header
- * See Documentation/arm64/booting.rst for details
+ * See Documentation/arch/arm64/booting.rst for details
*
* @code0: Executable code, or
* @mz_header alternatively used for part of MZ header
diff --git a/arch/arm64/include/asm/insn-def.h b/arch/arm64/include/asm/insn-def.h
new file mode 100644
index 000000000000..1a7d0d483698
--- /dev/null
+++ b/arch/arm64/include/asm/insn-def.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef __ASM_INSN_DEF_H
+#define __ASM_INSN_DEF_H
+
+#include <asm/brk-imm.h>
+
+/* A64 instructions are always 32 bits. */
+#define AARCH64_INSN_SIZE 4
+
+/*
+ * BRK instruction encoding
+ * The #imm16 value should be placed at bits[20:5] within BRK ins
+ */
+#define AARCH64_BREAK_MON 0xd4200000
+
+/*
+ * BRK instruction for provoking a fault on purpose
+ * Unlike kgdb, #imm16 value with unallocated handler is used for faulting.
+ */
+#define AARCH64_BREAK_FAULT (AARCH64_BREAK_MON | (FAULT_BRK_IMM << 5))
+
+#endif /* __ASM_INSN_DEF_H */
diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h
index bb313dde58a4..db1aeacd4cd9 100644
--- a/arch/arm64/include/asm/insn.h
+++ b/arch/arm64/include/asm/insn.h
@@ -10,42 +10,42 @@
#include <linux/build_bug.h>
#include <linux/types.h>
-/* A64 instructions are always 32 bits. */
-#define AARCH64_INSN_SIZE 4
+#include <asm/insn-def.h>
#ifndef __ASSEMBLY__
-/*
- * ARM Architecture Reference Manual for ARMv8 Profile-A, Issue A.a
- * Section C3.1 "A64 instruction index by encoding":
- * AArch64 main encoding table
- * Bit position
- * 28 27 26 25 Encoding Group
- * 0 0 - - Unallocated
- * 1 0 0 - Data processing, immediate
- * 1 0 1 - Branch, exception generation and system instructions
- * - 1 - 0 Loads and stores
- * - 1 0 1 Data processing - register
- * 0 1 1 1 Data processing - SIMD and floating point
- * 1 1 1 1 Data processing - SIMD and floating point
- * "-" means "don't care"
- */
-enum aarch64_insn_encoding_class {
- AARCH64_INSN_CLS_UNKNOWN, /* UNALLOCATED */
- AARCH64_INSN_CLS_DP_IMM, /* Data processing - immediate */
- AARCH64_INSN_CLS_DP_REG, /* Data processing - register */
- AARCH64_INSN_CLS_DP_FPSIMD, /* Data processing - SIMD and FP */
- AARCH64_INSN_CLS_LDST, /* Loads and stores */
- AARCH64_INSN_CLS_BR_SYS, /* Branch, exception generation and
- * system instructions */
-};
-enum aarch64_insn_hint_op {
+enum aarch64_insn_hint_cr_op {
AARCH64_INSN_HINT_NOP = 0x0 << 5,
AARCH64_INSN_HINT_YIELD = 0x1 << 5,
AARCH64_INSN_HINT_WFE = 0x2 << 5,
AARCH64_INSN_HINT_WFI = 0x3 << 5,
AARCH64_INSN_HINT_SEV = 0x4 << 5,
AARCH64_INSN_HINT_SEVL = 0x5 << 5,
+
+ AARCH64_INSN_HINT_XPACLRI = 0x07 << 5,
+ AARCH64_INSN_HINT_PACIA_1716 = 0x08 << 5,
+ AARCH64_INSN_HINT_PACIB_1716 = 0x0A << 5,
+ AARCH64_INSN_HINT_AUTIA_1716 = 0x0C << 5,
+ AARCH64_INSN_HINT_AUTIB_1716 = 0x0E << 5,
+ AARCH64_INSN_HINT_PACIAZ = 0x18 << 5,
+ AARCH64_INSN_HINT_PACIASP = 0x19 << 5,
+ AARCH64_INSN_HINT_PACIBZ = 0x1A << 5,
+ AARCH64_INSN_HINT_PACIBSP = 0x1B << 5,
+ AARCH64_INSN_HINT_AUTIAZ = 0x1C << 5,
+ AARCH64_INSN_HINT_AUTIASP = 0x1D << 5,
+ AARCH64_INSN_HINT_AUTIBZ = 0x1E << 5,
+ AARCH64_INSN_HINT_AUTIBSP = 0x1F << 5,
+
+ AARCH64_INSN_HINT_ESB = 0x10 << 5,
+ AARCH64_INSN_HINT_PSB = 0x11 << 5,
+ AARCH64_INSN_HINT_TSB = 0x12 << 5,
+ AARCH64_INSN_HINT_CSDB = 0x14 << 5,
+ AARCH64_INSN_HINT_CLEARBHB = 0x16 << 5,
+
+ AARCH64_INSN_HINT_BTI = 0x20 << 5,
+ AARCH64_INSN_HINT_BTIC = 0x22 << 5,
+ AARCH64_INSN_HINT_BTIJ = 0x24 << 5,
+ AARCH64_INSN_HINT_BTIJC = 0x26 << 5,
};
enum aarch64_insn_imm_type {
@@ -176,12 +176,18 @@ enum aarch64_insn_size_type {
enum aarch64_insn_ldst_type {
AARCH64_INSN_LDST_LOAD_REG_OFFSET,
AARCH64_INSN_LDST_STORE_REG_OFFSET,
+ AARCH64_INSN_LDST_LOAD_IMM_OFFSET,
+ AARCH64_INSN_LDST_STORE_IMM_OFFSET,
AARCH64_INSN_LDST_LOAD_PAIR_PRE_INDEX,
AARCH64_INSN_LDST_STORE_PAIR_PRE_INDEX,
AARCH64_INSN_LDST_LOAD_PAIR_POST_INDEX,
AARCH64_INSN_LDST_STORE_PAIR_POST_INDEX,
AARCH64_INSN_LDST_LOAD_EX,
+ AARCH64_INSN_LDST_LOAD_ACQ_EX,
AARCH64_INSN_LDST_STORE_EX,
+ AARCH64_INSN_LDST_STORE_REL_EX,
+ AARCH64_INSN_LDST_SIGNED_LOAD_IMM_OFFSET,
+ AARCH64_INSN_LDST_SIGNED_LOAD_REG_OFFSET,
};
enum aarch64_insn_adsb_type {
@@ -256,6 +262,36 @@ enum aarch64_insn_adr_type {
AARCH64_INSN_ADR_TYPE_ADR,
};
+enum aarch64_insn_mem_atomic_op {
+ AARCH64_INSN_MEM_ATOMIC_ADD,
+ AARCH64_INSN_MEM_ATOMIC_CLR,
+ AARCH64_INSN_MEM_ATOMIC_EOR,
+ AARCH64_INSN_MEM_ATOMIC_SET,
+ AARCH64_INSN_MEM_ATOMIC_SWP,
+};
+
+enum aarch64_insn_mem_order_type {
+ AARCH64_INSN_MEM_ORDER_NONE,
+ AARCH64_INSN_MEM_ORDER_ACQ,
+ AARCH64_INSN_MEM_ORDER_REL,
+ AARCH64_INSN_MEM_ORDER_ACQREL,
+};
+
+enum aarch64_insn_mb_type {
+ AARCH64_INSN_MB_SY,
+ AARCH64_INSN_MB_ST,
+ AARCH64_INSN_MB_LD,
+ AARCH64_INSN_MB_ISH,
+ AARCH64_INSN_MB_ISHST,
+ AARCH64_INSN_MB_ISHLD,
+ AARCH64_INSN_MB_NSH,
+ AARCH64_INSN_MB_NSHST,
+ AARCH64_INSN_MB_NSHLD,
+ AARCH64_INSN_MB_OSH,
+ AARCH64_INSN_MB_OSHST,
+ AARCH64_INSN_MB_OSHLD,
+};
+
#define __AARCH64_INSN_FUNCS(abbr, mask, val) \
static __always_inline bool aarch64_insn_is_##abbr(u32 code) \
{ \
@@ -267,18 +303,52 @@ static __always_inline u32 aarch64_insn_get_##abbr##_value(void) \
return (val); \
}
+/*
+ * ARM Architecture Reference Manual for ARMv8 Profile-A, Issue A.a
+ * Section C3.1 "A64 instruction index by encoding":
+ * AArch64 main encoding table
+ * Bit position
+ * 28 27 26 25 Encoding Group
+ * 0 0 - - Unallocated
+ * 1 0 0 - Data processing, immediate
+ * 1 0 1 - Branch, exception generation and system instructions
+ * - 1 - 0 Loads and stores
+ * - 1 0 1 Data processing - register
+ * 0 1 1 1 Data processing - SIMD and floating point
+ * 1 1 1 1 Data processing - SIMD and floating point
+ * "-" means "don't care"
+ */
+__AARCH64_INSN_FUNCS(class_branch_sys, 0x1c000000, 0x14000000)
+
__AARCH64_INSN_FUNCS(adr, 0x9F000000, 0x10000000)
__AARCH64_INSN_FUNCS(adrp, 0x9F000000, 0x90000000)
__AARCH64_INSN_FUNCS(prfm, 0x3FC00000, 0x39800000)
__AARCH64_INSN_FUNCS(prfm_lit, 0xFF000000, 0xD8000000)
+__AARCH64_INSN_FUNCS(store_imm, 0x3FC00000, 0x39000000)
+__AARCH64_INSN_FUNCS(load_imm, 0x3FC00000, 0x39400000)
+__AARCH64_INSN_FUNCS(signed_load_imm, 0X3FC00000, 0x39800000)
+__AARCH64_INSN_FUNCS(store_pre, 0x3FE00C00, 0x38000C00)
+__AARCH64_INSN_FUNCS(load_pre, 0x3FE00C00, 0x38400C00)
+__AARCH64_INSN_FUNCS(store_post, 0x3FE00C00, 0x38000400)
+__AARCH64_INSN_FUNCS(load_post, 0x3FE00C00, 0x38400400)
__AARCH64_INSN_FUNCS(str_reg, 0x3FE0EC00, 0x38206800)
+__AARCH64_INSN_FUNCS(str_imm, 0x3FC00000, 0x39000000)
__AARCH64_INSN_FUNCS(ldadd, 0x3F20FC00, 0x38200000)
+__AARCH64_INSN_FUNCS(ldclr, 0x3F20FC00, 0x38201000)
+__AARCH64_INSN_FUNCS(ldeor, 0x3F20FC00, 0x38202000)
+__AARCH64_INSN_FUNCS(ldset, 0x3F20FC00, 0x38203000)
+__AARCH64_INSN_FUNCS(swp, 0x3F20FC00, 0x38208000)
+__AARCH64_INSN_FUNCS(cas, 0x3FA07C00, 0x08A07C00)
__AARCH64_INSN_FUNCS(ldr_reg, 0x3FE0EC00, 0x38606800)
+__AARCH64_INSN_FUNCS(signed_ldr_reg, 0X3FE0FC00, 0x38A0E800)
+__AARCH64_INSN_FUNCS(ldr_imm, 0x3FC00000, 0x39400000)
__AARCH64_INSN_FUNCS(ldr_lit, 0xBF000000, 0x18000000)
__AARCH64_INSN_FUNCS(ldrsw_lit, 0xFF000000, 0x98000000)
__AARCH64_INSN_FUNCS(exclusive, 0x3F800000, 0x08000000)
__AARCH64_INSN_FUNCS(load_ex, 0x3F400000, 0x08400000)
__AARCH64_INSN_FUNCS(store_ex, 0x3F400000, 0x08000000)
+__AARCH64_INSN_FUNCS(stp, 0x7FC00000, 0x29000000)
+__AARCH64_INSN_FUNCS(ldp, 0x7FC00000, 0x29400000)
__AARCH64_INSN_FUNCS(stp_post, 0x7FC00000, 0x28800000)
__AARCH64_INSN_FUNCS(ldp_post, 0x7FC00000, 0x28C00000)
__AARCH64_INSN_FUNCS(stp_pre, 0x7FC00000, 0x29800000)
@@ -311,6 +381,7 @@ __AARCH64_INSN_FUNCS(rev64, 0x7FFFFC00, 0x5AC00C00)
__AARCH64_INSN_FUNCS(and, 0x7F200000, 0x0A000000)
__AARCH64_INSN_FUNCS(bic, 0x7F200000, 0x0A200000)
__AARCH64_INSN_FUNCS(orr, 0x7F200000, 0x2A000000)
+__AARCH64_INSN_FUNCS(mov_reg, 0x7FE0FFE0, 0x2A0003E0)
__AARCH64_INSN_FUNCS(orn, 0x7F200000, 0x2A200000)
__AARCH64_INSN_FUNCS(eor, 0x7F200000, 0x4A000000)
__AARCH64_INSN_FUNCS(eon, 0x7F200000, 0x4A200000)
@@ -335,28 +406,144 @@ __AARCH64_INSN_FUNCS(brk, 0xFFE0001F, 0xD4200000)
__AARCH64_INSN_FUNCS(exception, 0xFF000000, 0xD4000000)
__AARCH64_INSN_FUNCS(hint, 0xFFFFF01F, 0xD503201F)
__AARCH64_INSN_FUNCS(br, 0xFFFFFC1F, 0xD61F0000)
+__AARCH64_INSN_FUNCS(br_auth, 0xFEFFF800, 0xD61F0800)
__AARCH64_INSN_FUNCS(blr, 0xFFFFFC1F, 0xD63F0000)
+__AARCH64_INSN_FUNCS(blr_auth, 0xFEFFF800, 0xD63F0800)
__AARCH64_INSN_FUNCS(ret, 0xFFFFFC1F, 0xD65F0000)
+__AARCH64_INSN_FUNCS(ret_auth, 0xFFFFFBFF, 0xD65F0BFF)
__AARCH64_INSN_FUNCS(eret, 0xFFFFFFFF, 0xD69F03E0)
+__AARCH64_INSN_FUNCS(eret_auth, 0xFFFFFBFF, 0xD69F0BFF)
__AARCH64_INSN_FUNCS(mrs, 0xFFF00000, 0xD5300000)
__AARCH64_INSN_FUNCS(msr_imm, 0xFFF8F01F, 0xD500401F)
__AARCH64_INSN_FUNCS(msr_reg, 0xFFF00000, 0xD5100000)
+__AARCH64_INSN_FUNCS(dmb, 0xFFFFF0FF, 0xD50330BF)
+__AARCH64_INSN_FUNCS(dsb_base, 0xFFFFF0FF, 0xD503309F)
+__AARCH64_INSN_FUNCS(dsb_nxs, 0xFFFFF3FF, 0xD503323F)
+__AARCH64_INSN_FUNCS(isb, 0xFFFFF0FF, 0xD50330DF)
+__AARCH64_INSN_FUNCS(sb, 0xFFFFFFFF, 0xD50330FF)
+__AARCH64_INSN_FUNCS(clrex, 0xFFFFF0FF, 0xD503305F)
+__AARCH64_INSN_FUNCS(ssbb, 0xFFFFFFFF, 0xD503309F)
+__AARCH64_INSN_FUNCS(pssbb, 0xFFFFFFFF, 0xD503349F)
+__AARCH64_INSN_FUNCS(bti, 0xFFFFFF3F, 0xD503241f)
#undef __AARCH64_INSN_FUNCS
-bool aarch64_insn_is_nop(u32 insn);
-bool aarch64_insn_is_branch_imm(u32 insn);
+static __always_inline bool aarch64_insn_is_steppable_hint(u32 insn)
+{
+ if (!aarch64_insn_is_hint(insn))
+ return false;
+
+ switch (insn & 0xFE0) {
+ case AARCH64_INSN_HINT_XPACLRI:
+ case AARCH64_INSN_HINT_PACIA_1716:
+ case AARCH64_INSN_HINT_PACIB_1716:
+ case AARCH64_INSN_HINT_PACIAZ:
+ case AARCH64_INSN_HINT_PACIASP:
+ case AARCH64_INSN_HINT_PACIBZ:
+ case AARCH64_INSN_HINT_PACIBSP:
+ case AARCH64_INSN_HINT_BTI:
+ case AARCH64_INSN_HINT_BTIC:
+ case AARCH64_INSN_HINT_BTIJ:
+ case AARCH64_INSN_HINT_BTIJC:
+ case AARCH64_INSN_HINT_NOP:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static __always_inline bool aarch64_insn_is_branch(u32 insn)
+{
+ /* b, bl, cb*, tb*, ret*, b.cond, br*, blr* */
+
+ return aarch64_insn_is_b(insn) ||
+ aarch64_insn_is_bl(insn) ||
+ aarch64_insn_is_cbz(insn) ||
+ aarch64_insn_is_cbnz(insn) ||
+ aarch64_insn_is_tbz(insn) ||
+ aarch64_insn_is_tbnz(insn) ||
+ aarch64_insn_is_ret(insn) ||
+ aarch64_insn_is_ret_auth(insn) ||
+ aarch64_insn_is_br(insn) ||
+ aarch64_insn_is_br_auth(insn) ||
+ aarch64_insn_is_blr(insn) ||
+ aarch64_insn_is_blr_auth(insn) ||
+ aarch64_insn_is_bcond(insn);
+}
-static inline bool aarch64_insn_is_adr_adrp(u32 insn)
+static __always_inline bool aarch64_insn_is_branch_imm(u32 insn)
{
- return aarch64_insn_is_adr(insn) || aarch64_insn_is_adrp(insn);
+ return aarch64_insn_is_b(insn) ||
+ aarch64_insn_is_bl(insn) ||
+ aarch64_insn_is_tbz(insn) ||
+ aarch64_insn_is_tbnz(insn) ||
+ aarch64_insn_is_cbz(insn) ||
+ aarch64_insn_is_cbnz(insn) ||
+ aarch64_insn_is_bcond(insn);
+}
+
+static __always_inline bool aarch64_insn_is_adr_adrp(u32 insn)
+{
+ return aarch64_insn_is_adr(insn) ||
+ aarch64_insn_is_adrp(insn);
+}
+
+static __always_inline bool aarch64_insn_is_dsb(u32 insn)
+{
+ return aarch64_insn_is_dsb_base(insn) ||
+ aarch64_insn_is_dsb_nxs(insn);
+}
+
+static __always_inline bool aarch64_insn_is_barrier(u32 insn)
+{
+ return aarch64_insn_is_dmb(insn) ||
+ aarch64_insn_is_dsb(insn) ||
+ aarch64_insn_is_isb(insn) ||
+ aarch64_insn_is_sb(insn) ||
+ aarch64_insn_is_clrex(insn) ||
+ aarch64_insn_is_ssbb(insn) ||
+ aarch64_insn_is_pssbb(insn);
+}
+
+static __always_inline bool aarch64_insn_is_store_single(u32 insn)
+{
+ return aarch64_insn_is_store_imm(insn) ||
+ aarch64_insn_is_store_pre(insn) ||
+ aarch64_insn_is_store_post(insn);
+}
+
+static __always_inline bool aarch64_insn_is_store_pair(u32 insn)
+{
+ return aarch64_insn_is_stp(insn) ||
+ aarch64_insn_is_stp_pre(insn) ||
+ aarch64_insn_is_stp_post(insn);
+}
+
+static __always_inline bool aarch64_insn_is_load_single(u32 insn)
+{
+ return aarch64_insn_is_load_imm(insn) ||
+ aarch64_insn_is_load_pre(insn) ||
+ aarch64_insn_is_load_post(insn);
+}
+
+static __always_inline bool aarch64_insn_is_load_pair(u32 insn)
+{
+ return aarch64_insn_is_ldp(insn) ||
+ aarch64_insn_is_ldp_pre(insn) ||
+ aarch64_insn_is_ldp_post(insn);
+}
+
+static __always_inline bool aarch64_insn_uses_literal(u32 insn)
+{
+ /* ldr/ldrsw (literal), prfm */
+
+ return aarch64_insn_is_ldr_lit(insn) ||
+ aarch64_insn_is_ldrsw_lit(insn) ||
+ aarch64_insn_is_adr_adrp(insn) ||
+ aarch64_insn_is_prfm_lit(insn);
}
-int aarch64_insn_read(void *addr, u32 *insnp);
-int aarch64_insn_write(void *addr, u32 insn);
enum aarch64_insn_encoding_class aarch64_get_insn_class(u32 insn);
-bool aarch64_insn_uses_literal(u32 insn);
-bool aarch64_insn_is_branch(u32 insn);
u64 aarch64_insn_decode_immediate(enum aarch64_insn_imm_type type, u32 insn);
u32 aarch64_insn_encode_immediate(enum aarch64_insn_imm_type type,
u32 insn, u64 imm);
@@ -370,8 +557,18 @@ u32 aarch64_insn_gen_comp_branch_imm(unsigned long pc, unsigned long addr,
enum aarch64_insn_branch_type type);
u32 aarch64_insn_gen_cond_branch_imm(unsigned long pc, unsigned long addr,
enum aarch64_insn_condition cond);
-u32 aarch64_insn_gen_hint(enum aarch64_insn_hint_op op);
-u32 aarch64_insn_gen_nop(void);
+
+static __always_inline u32
+aarch64_insn_gen_hint(enum aarch64_insn_hint_cr_op op)
+{
+ return aarch64_insn_get_hint_value() | op;
+}
+
+static __always_inline u32 aarch64_insn_gen_nop(void)
+{
+ return aarch64_insn_gen_hint(AARCH64_INSN_HINT_NOP);
+}
+
u32 aarch64_insn_gen_branch_reg(enum aarch64_insn_register reg,
enum aarch64_insn_branch_type type);
u32 aarch64_insn_gen_load_store_reg(enum aarch64_insn_register reg,
@@ -379,6 +576,14 @@ u32 aarch64_insn_gen_load_store_reg(enum aarch64_insn_register reg,
enum aarch64_insn_register offset,
enum aarch64_insn_size_type size,
enum aarch64_insn_ldst_type type);
+u32 aarch64_insn_gen_load_store_imm(enum aarch64_insn_register reg,
+ enum aarch64_insn_register base,
+ unsigned int imm,
+ enum aarch64_insn_size_type size,
+ enum aarch64_insn_ldst_type type);
+u32 aarch64_insn_gen_load_literal(unsigned long pc, unsigned long addr,
+ enum aarch64_insn_register reg,
+ bool is64bit);
u32 aarch64_insn_gen_load_store_pair(enum aarch64_insn_register reg1,
enum aarch64_insn_register reg2,
enum aarch64_insn_register base,
@@ -390,13 +595,6 @@ u32 aarch64_insn_gen_load_store_ex(enum aarch64_insn_register reg,
enum aarch64_insn_register state,
enum aarch64_insn_size_type size,
enum aarch64_insn_ldst_type type);
-u32 aarch64_insn_gen_ldadd(enum aarch64_insn_register result,
- enum aarch64_insn_register address,
- enum aarch64_insn_register value,
- enum aarch64_insn_size_type size);
-u32 aarch64_insn_gen_stadd(enum aarch64_insn_register address,
- enum aarch64_insn_register value,
- enum aarch64_insn_size_type size);
u32 aarch64_insn_gen_add_sub_imm(enum aarch64_insn_register dst,
enum aarch64_insn_register src,
int imm, enum aarch64_insn_variant variant,
@@ -453,16 +651,45 @@ u32 aarch64_insn_gen_extr(enum aarch64_insn_variant variant,
enum aarch64_insn_register Rn,
enum aarch64_insn_register Rd,
u8 lsb);
-u32 aarch64_insn_gen_prefetch(enum aarch64_insn_register base,
- enum aarch64_insn_prfm_type type,
- enum aarch64_insn_prfm_target target,
- enum aarch64_insn_prfm_policy policy);
+#ifdef CONFIG_ARM64_LSE_ATOMICS
+u32 aarch64_insn_gen_atomic_ld_op(enum aarch64_insn_register result,
+ enum aarch64_insn_register address,
+ enum aarch64_insn_register value,
+ enum aarch64_insn_size_type size,
+ enum aarch64_insn_mem_atomic_op op,
+ enum aarch64_insn_mem_order_type order);
+u32 aarch64_insn_gen_cas(enum aarch64_insn_register result,
+ enum aarch64_insn_register address,
+ enum aarch64_insn_register value,
+ enum aarch64_insn_size_type size,
+ enum aarch64_insn_mem_order_type order);
+#else
+static inline
+u32 aarch64_insn_gen_atomic_ld_op(enum aarch64_insn_register result,
+ enum aarch64_insn_register address,
+ enum aarch64_insn_register value,
+ enum aarch64_insn_size_type size,
+ enum aarch64_insn_mem_atomic_op op,
+ enum aarch64_insn_mem_order_type order)
+{
+ return AARCH64_BREAK_FAULT;
+}
+
+static inline
+u32 aarch64_insn_gen_cas(enum aarch64_insn_register result,
+ enum aarch64_insn_register address,
+ enum aarch64_insn_register value,
+ enum aarch64_insn_size_type size,
+ enum aarch64_insn_mem_order_type order)
+{
+ return AARCH64_BREAK_FAULT;
+}
+#endif
+u32 aarch64_insn_gen_dmb(enum aarch64_insn_mb_type type);
+
s32 aarch64_get_branch_offset(u32 insn);
u32 aarch64_set_branch_offset(u32 insn, s32 offset);
-int aarch64_insn_patch_text_nosync(void *addr, u32 insn);
-int aarch64_insn_patch_text(void *addrs[], u32 insns[], int cnt);
-
s32 aarch64_insn_adrp_get_offset(u32 insn);
u32 aarch64_insn_adrp_set_offset(u32 insn, s32 offset);
@@ -479,6 +706,7 @@ u32 aarch32_insn_mcr_extract_crm(u32 insn);
typedef bool (pstate_check_t)(unsigned long);
extern pstate_check_t * const aarch32_opcode_cond_checks[16];
+
#endif /* __ASSEMBLY__ */
#endif /* __ASM_INSN_H */
diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h
index 4e531f57147d..3b694511b98f 100644
--- a/arch/arm64/include/asm/io.h
+++ b/arch/arm64/include/asm/io.h
@@ -9,11 +9,11 @@
#define __ASM_IO_H
#include <linux/types.h>
+#include <linux/pgtable.h>
#include <asm/byteorder.h>
#include <asm/barrier.h>
#include <asm/memory.h>
-#include <asm/pgtable.h>
#include <asm/early_ioremap.h>
#include <asm/alternative.h>
#include <asm/cpufeature.h>
@@ -22,31 +22,31 @@
* Generic IO read/write. These perform native-endian accesses.
*/
#define __raw_writeb __raw_writeb
-static inline void __raw_writeb(u8 val, volatile void __iomem *addr)
+static __always_inline void __raw_writeb(u8 val, volatile void __iomem *addr)
{
asm volatile("strb %w0, [%1]" : : "rZ" (val), "r" (addr));
}
#define __raw_writew __raw_writew
-static inline void __raw_writew(u16 val, volatile void __iomem *addr)
+static __always_inline void __raw_writew(u16 val, volatile void __iomem *addr)
{
asm volatile("strh %w0, [%1]" : : "rZ" (val), "r" (addr));
}
#define __raw_writel __raw_writel
-static inline void __raw_writel(u32 val, volatile void __iomem *addr)
+static __always_inline void __raw_writel(u32 val, volatile void __iomem *addr)
{
asm volatile("str %w0, [%1]" : : "rZ" (val), "r" (addr));
}
#define __raw_writeq __raw_writeq
-static inline void __raw_writeq(u64 val, volatile void __iomem *addr)
+static __always_inline void __raw_writeq(u64 val, volatile void __iomem *addr)
{
asm volatile("str %x0, [%1]" : : "rZ" (val), "r" (addr));
}
#define __raw_readb __raw_readb
-static inline u8 __raw_readb(const volatile void __iomem *addr)
+static __always_inline u8 __raw_readb(const volatile void __iomem *addr)
{
u8 val;
asm volatile(ALTERNATIVE("ldrb %w0, [%1]",
@@ -57,7 +57,7 @@ static inline u8 __raw_readb(const volatile void __iomem *addr)
}
#define __raw_readw __raw_readw
-static inline u16 __raw_readw(const volatile void __iomem *addr)
+static __always_inline u16 __raw_readw(const volatile void __iomem *addr)
{
u16 val;
@@ -69,7 +69,7 @@ static inline u16 __raw_readw(const volatile void __iomem *addr)
}
#define __raw_readl __raw_readl
-static inline u32 __raw_readl(const volatile void __iomem *addr)
+static __always_inline u32 __raw_readl(const volatile void __iomem *addr)
{
u32 val;
asm volatile(ALTERNATIVE("ldr %w0, [%1]",
@@ -80,7 +80,7 @@ static inline u32 __raw_readl(const volatile void __iomem *addr)
}
#define __raw_readq __raw_readq
-static inline u64 __raw_readq(const volatile void __iomem *addr)
+static __always_inline u64 __raw_readq(const volatile void __iomem *addr)
{
u64 val;
asm volatile(ALTERNATIVE("ldr %0, [%1]",
@@ -91,7 +91,7 @@ static inline u64 __raw_readq(const volatile void __iomem *addr)
}
/* IO barriers */
-#define __iormb(v) \
+#define __io_ar(v) \
({ \
unsigned long tmp; \
\
@@ -108,38 +108,14 @@ static inline u64 __raw_readq(const volatile void __iomem *addr)
: "memory"); \
})
-#define __io_par(v) __iormb(v)
-#define __iowmb() dma_wmb()
+#define __io_bw() dma_wmb()
+#define __io_br(v)
+#define __io_aw(v)
-/*
- * Relaxed I/O memory access primitives. These follow the Device memory
- * ordering rules but do not guarantee any ordering relative to Normal memory
- * accesses.
- */
-#define readb_relaxed(c) ({ u8 __r = __raw_readb(c); __r; })
-#define readw_relaxed(c) ({ u16 __r = le16_to_cpu((__force __le16)__raw_readw(c)); __r; })
-#define readl_relaxed(c) ({ u32 __r = le32_to_cpu((__force __le32)__raw_readl(c)); __r; })
-#define readq_relaxed(c) ({ u64 __r = le64_to_cpu((__force __le64)__raw_readq(c)); __r; })
-
-#define writeb_relaxed(v,c) ((void)__raw_writeb((v),(c)))
-#define writew_relaxed(v,c) ((void)__raw_writew((__force u16)cpu_to_le16(v),(c)))
-#define writel_relaxed(v,c) ((void)__raw_writel((__force u32)cpu_to_le32(v),(c)))
-#define writeq_relaxed(v,c) ((void)__raw_writeq((__force u64)cpu_to_le64(v),(c)))
-
-/*
- * I/O memory access primitives. Reads are ordered relative to any
- * following Normal memory access. Writes are ordered relative to any prior
- * Normal memory access.
- */
-#define readb(c) ({ u8 __v = readb_relaxed(c); __iormb(__v); __v; })
-#define readw(c) ({ u16 __v = readw_relaxed(c); __iormb(__v); __v; })
-#define readl(c) ({ u32 __v = readl_relaxed(c); __iormb(__v); __v; })
-#define readq(c) ({ u64 __v = readq_relaxed(c); __iormb(__v); __v; })
-
-#define writeb(v,c) ({ __iowmb(); writeb_relaxed((v),(c)); })
-#define writew(v,c) ({ __iowmb(); writew_relaxed((v),(c)); })
-#define writel(v,c) ({ __iowmb(); writel_relaxed((v),(c)); })
-#define writeq(v,c) ({ __iowmb(); writeq_relaxed((v),(c)); })
+/* arm64-specific, don't use in portable drivers */
+#define __iormb(v) __io_ar(v)
+#define __iowmb() __io_bw()
+#define __iomb() dma_mb()
/*
* I/O port access primitives.
@@ -162,22 +138,15 @@ extern void __memset_io(volatile void __iomem *, int, size_t);
/*
* I/O memory mapping functions.
*/
-extern void __iomem *__ioremap(phys_addr_t phys_addr, size_t size, pgprot_t prot);
-extern void iounmap(volatile void __iomem *addr);
-extern void __iomem *ioremap_cache(phys_addr_t phys_addr, size_t size);
-#define ioremap(addr, size) __ioremap((addr), (size), __pgprot(PROT_DEVICE_nGnRE))
-#define ioremap_wc(addr, size) __ioremap((addr), (size), __pgprot(PROT_NORMAL_NC))
+#define ioremap_prot ioremap_prot
-/*
- * PCI configuration space mapping function.
- *
- * The PCI specification disallows posted write configuration transactions.
- * Add an arch specific pci_remap_cfgspace() definition that is implemented
- * through nGnRnE device memory attribute as recommended by the ARM v8
- * Architecture reference manual Issue A.k B2.8.2 "Device memory".
- */
-#define pci_remap_cfgspace(addr, size) __ioremap((addr), (size), __pgprot(PROT_DEVICE_nGnRnE))
+#define _PAGE_IOREMAP PROT_DEVICE_nGnRE
+
+#define ioremap_wc(addr, size) \
+ ioremap_prot((addr), (size), PROT_NORMAL_NC)
+#define ioremap_np(addr, size) \
+ ioremap_prot((addr), (size), PROT_DEVICE_nGnRnE)
/*
* io{read,write}{16,32,64}be() macros
@@ -192,6 +161,15 @@ extern void __iomem *ioremap_cache(phys_addr_t phys_addr, size_t size);
#include <asm-generic/io.h>
+#define ioremap_cache ioremap_cache
+static inline void __iomem *ioremap_cache(phys_addr_t addr, size_t size)
+{
+ if (pfn_is_map_memory(__phys_to_pfn(addr)))
+ return (void __iomem *)__phys_to_virt(addr);
+
+ return ioremap_prot(addr, size, PROT_NORMAL);
+}
+
/*
* More restrictive address range checking than the default implementation
* (PHYS_OFFSET and PHYS_MASK taken into account).
@@ -200,6 +178,8 @@ extern void __iomem *ioremap_cache(phys_addr_t phys_addr, size_t size);
extern int valid_phys_addr_range(phys_addr_t addr, size_t size);
extern int valid_mmap_phys_addr_range(unsigned long pfn, size_t size);
-extern int devmem_is_allowed(unsigned long pfn);
+extern bool arch_memremap_can_ram_remap(resource_size_t offset, size_t size,
+ unsigned long flags);
+#define arch_memremap_can_ram_remap arch_memremap_can_ram_remap
#endif /* __ASM_IO_H */
diff --git a/arch/arm64/include/asm/irq.h b/arch/arm64/include/asm/irq.h
index b2b0c6405eb0..e93548914c36 100644
--- a/arch/arm64/include/asm/irq.h
+++ b/arch/arm64/include/asm/irq.h
@@ -4,10 +4,19 @@
#ifndef __ASSEMBLER__
+#include <linux/cpumask.h>
+
#include <asm-generic/irq.h>
+void arch_trigger_cpumask_backtrace(const cpumask_t *mask, int exclude_cpu);
+#define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace
+
struct pt_regs;
+int set_handle_irq(void (*handle_irq)(struct pt_regs *));
+#define set_handle_irq set_handle_irq
+int set_handle_fiq(void (*handle_fiq)(struct pt_regs *));
+
static inline int nr_legacy_irqs(void)
{
return 0;
diff --git a/arch/arm64/include/asm/irq_work.h b/arch/arm64/include/asm/irq_work.h
index 8a1ef1907760..a1020285ea75 100644
--- a/arch/arm64/include/asm/irq_work.h
+++ b/arch/arm64/include/asm/irq_work.h
@@ -2,11 +2,9 @@
#ifndef __ASM_IRQ_WORK_H
#define __ASM_IRQ_WORK_H
-#include <asm/smp.h>
-
static inline bool arch_irq_work_has_interrupt(void)
{
- return !!__smp_cross_call;
+ return true;
}
#endif /* __ASM_IRQ_WORK_H */
diff --git a/arch/arm64/include/asm/irqflags.h b/arch/arm64/include/asm/irqflags.h
index aa4b6521ef14..0a7186a93882 100644
--- a/arch/arm64/include/asm/irqflags.h
+++ b/arch/arm64/include/asm/irqflags.h
@@ -12,54 +12,80 @@
/*
* Aarch64 has flags for masking: Debug, Asynchronous (serror), Interrupts and
- * FIQ exceptions, in the 'daif' register. We mask and unmask them in 'dai'
+ * FIQ exceptions, in the 'daif' register. We mask and unmask them in 'daif'
* order:
* Masking debug exceptions causes all other exceptions to be masked too/
- * Masking SError masks irq, but not debug exceptions. Masking irqs has no
- * side effects for other flags. Keeping to this order makes it easier for
- * entry.S to know which exceptions should be unmasked.
- *
- * FIQ is never expected, but we mask it when we disable debug exceptions, and
- * unmask it at all other times.
+ * Masking SError masks IRQ/FIQ, but not debug exceptions. IRQ and FIQ are
+ * always masked and unmasked together, and have no side effects for other
+ * flags. Keeping to this order makes it easier for entry.S to know which
+ * exceptions should be unmasked.
*/
-/*
- * CPU interrupt mask handling.
- */
-static inline void arch_local_irq_enable(void)
+static __always_inline void __daif_local_irq_enable(void)
{
- if (system_has_prio_mask_debugging()) {
- u32 pmr = read_sysreg_s(SYS_ICC_PMR_EL1);
+ barrier();
+ asm volatile("msr daifclr, #3");
+ barrier();
+}
+static __always_inline void __pmr_local_irq_enable(void)
+{
+ if (IS_ENABLED(CONFIG_ARM64_DEBUG_PRIORITY_MASKING)) {
+ u32 pmr = read_sysreg_s(SYS_ICC_PMR_EL1);
WARN_ON_ONCE(pmr != GIC_PRIO_IRQON && pmr != GIC_PRIO_IRQOFF);
}
- asm volatile(ALTERNATIVE(
- "msr daifclr, #2 // arch_local_irq_enable",
- __msr_s(SYS_ICC_PMR_EL1, "%0"),
- ARM64_HAS_IRQ_PRIO_MASKING)
- :
- : "r" ((unsigned long) GIC_PRIO_IRQON)
- : "memory");
-
+ barrier();
+ write_sysreg_s(GIC_PRIO_IRQON, SYS_ICC_PMR_EL1);
pmr_sync();
+ barrier();
}
-static inline void arch_local_irq_disable(void)
+static inline void arch_local_irq_enable(void)
{
- if (system_has_prio_mask_debugging()) {
- u32 pmr = read_sysreg_s(SYS_ICC_PMR_EL1);
+ if (system_uses_irq_prio_masking()) {
+ __pmr_local_irq_enable();
+ } else {
+ __daif_local_irq_enable();
+ }
+}
+static __always_inline void __daif_local_irq_disable(void)
+{
+ barrier();
+ asm volatile("msr daifset, #3");
+ barrier();
+}
+
+static __always_inline void __pmr_local_irq_disable(void)
+{
+ if (IS_ENABLED(CONFIG_ARM64_DEBUG_PRIORITY_MASKING)) {
+ u32 pmr = read_sysreg_s(SYS_ICC_PMR_EL1);
WARN_ON_ONCE(pmr != GIC_PRIO_IRQON && pmr != GIC_PRIO_IRQOFF);
}
- asm volatile(ALTERNATIVE(
- "msr daifset, #2 // arch_local_irq_disable",
- __msr_s(SYS_ICC_PMR_EL1, "%0"),
- ARM64_HAS_IRQ_PRIO_MASKING)
- :
- : "r" ((unsigned long) GIC_PRIO_IRQOFF)
- : "memory");
+ barrier();
+ write_sysreg_s(GIC_PRIO_IRQOFF, SYS_ICC_PMR_EL1);
+ barrier();
+}
+
+static inline void arch_local_irq_disable(void)
+{
+ if (system_uses_irq_prio_masking()) {
+ __pmr_local_irq_disable();
+ } else {
+ __daif_local_irq_disable();
+ }
+}
+
+static __always_inline unsigned long __daif_local_save_flags(void)
+{
+ return read_sysreg(daif);
+}
+
+static __always_inline unsigned long __pmr_local_save_flags(void)
+{
+ return read_sysreg_s(SYS_ICC_PMR_EL1);
}
/*
@@ -67,64 +93,108 @@ static inline void arch_local_irq_disable(void)
*/
static inline unsigned long arch_local_save_flags(void)
{
- unsigned long flags;
+ if (system_uses_irq_prio_masking()) {
+ return __pmr_local_save_flags();
+ } else {
+ return __daif_local_save_flags();
+ }
+}
- asm volatile(ALTERNATIVE(
- "mrs %0, daif",
- __mrs_s("%0", SYS_ICC_PMR_EL1),
- ARM64_HAS_IRQ_PRIO_MASKING)
- : "=&r" (flags)
- :
- : "memory");
+static __always_inline bool __daif_irqs_disabled_flags(unsigned long flags)
+{
+ return flags & PSR_I_BIT;
+}
- return flags;
+static __always_inline bool __pmr_irqs_disabled_flags(unsigned long flags)
+{
+ return flags != GIC_PRIO_IRQON;
}
-static inline int arch_irqs_disabled_flags(unsigned long flags)
+static inline bool arch_irqs_disabled_flags(unsigned long flags)
{
- int res;
+ if (system_uses_irq_prio_masking()) {
+ return __pmr_irqs_disabled_flags(flags);
+ } else {
+ return __daif_irqs_disabled_flags(flags);
+ }
+}
- asm volatile(ALTERNATIVE(
- "and %w0, %w1, #" __stringify(PSR_I_BIT),
- "eor %w0, %w1, #" __stringify(GIC_PRIO_IRQON),
- ARM64_HAS_IRQ_PRIO_MASKING)
- : "=&r" (res)
- : "r" ((int) flags)
- : "memory");
+static __always_inline bool __daif_irqs_disabled(void)
+{
+ return __daif_irqs_disabled_flags(__daif_local_save_flags());
+}
- return res;
+static __always_inline bool __pmr_irqs_disabled(void)
+{
+ return __pmr_irqs_disabled_flags(__pmr_local_save_flags());
}
-static inline unsigned long arch_local_irq_save(void)
+static inline bool arch_irqs_disabled(void)
{
- unsigned long flags;
+ if (system_uses_irq_prio_masking()) {
+ return __pmr_irqs_disabled();
+ } else {
+ return __daif_irqs_disabled();
+ }
+}
+
+static __always_inline unsigned long __daif_local_irq_save(void)
+{
+ unsigned long flags = __daif_local_save_flags();
+
+ __daif_local_irq_disable();
+
+ return flags;
+}
- flags = arch_local_save_flags();
+static __always_inline unsigned long __pmr_local_irq_save(void)
+{
+ unsigned long flags = __pmr_local_save_flags();
/*
* There are too many states with IRQs disabled, just keep the current
* state if interrupts are already disabled/masked.
*/
- if (!arch_irqs_disabled_flags(flags))
- arch_local_irq_disable();
+ if (!__pmr_irqs_disabled_flags(flags))
+ __pmr_local_irq_disable();
return flags;
}
+static inline unsigned long arch_local_irq_save(void)
+{
+ if (system_uses_irq_prio_masking()) {
+ return __pmr_local_irq_save();
+ } else {
+ return __daif_local_irq_save();
+ }
+}
+
+static __always_inline void __daif_local_irq_restore(unsigned long flags)
+{
+ barrier();
+ write_sysreg(flags, daif);
+ barrier();
+}
+
+static __always_inline void __pmr_local_irq_restore(unsigned long flags)
+{
+ barrier();
+ write_sysreg_s(flags, SYS_ICC_PMR_EL1);
+ pmr_sync();
+ barrier();
+}
+
/*
* restore saved IRQ state
*/
static inline void arch_local_irq_restore(unsigned long flags)
{
- asm volatile(ALTERNATIVE(
- "msr daif, %0",
- __msr_s(SYS_ICC_PMR_EL1, "%0"),
- ARM64_HAS_IRQ_PRIO_MASKING)
- :
- : "r" (flags)
- : "memory");
-
- pmr_sync();
+ if (system_uses_irq_prio_masking()) {
+ __pmr_local_irq_restore(flags);
+ } else {
+ __daif_local_irq_restore(flags);
+ }
}
#endif /* __ASM_IRQFLAGS_H */
diff --git a/arch/arm64/include/asm/jump_label.h b/arch/arm64/include/asm/jump_label.h
index cea441b6aa5d..48ddc0f45d22 100644
--- a/arch/arm64/include/asm/jump_label.h
+++ b/arch/arm64/include/asm/jump_label.h
@@ -15,8 +15,8 @@
#define JUMP_LABEL_NOP_SIZE AARCH64_INSN_SIZE
-static __always_inline bool arch_static_branch(struct static_key *key,
- bool branch)
+static __always_inline bool arch_static_branch(struct static_key * const key,
+ const bool branch)
{
asm_volatile_goto(
"1: nop \n\t"
@@ -32,8 +32,8 @@ l_yes:
return true;
}
-static __always_inline bool arch_static_branch_jump(struct static_key *key,
- bool branch)
+static __always_inline bool arch_static_branch_jump(struct static_key * const key,
+ const bool branch)
{
asm_volatile_goto(
"1: b %l[l_yes] \n\t"
diff --git a/arch/arm64/include/asm/kasan.h b/arch/arm64/include/asm/kasan.h
index b0dc4abc3589..7eefc525a9df 100644
--- a/arch/arm64/include/asm/kasan.h
+++ b/arch/arm64/include/asm/kasan.h
@@ -6,36 +6,18 @@
#include <linux/linkage.h>
#include <asm/memory.h>
+#include <asm/mte-kasan.h>
#include <asm/pgtable-types.h>
#define arch_kasan_set_tag(addr, tag) __tag_set(addr, tag)
#define arch_kasan_reset_tag(addr) __tag_reset(addr)
#define arch_kasan_get_tag(addr) __tag_get(addr)
-#ifdef CONFIG_KASAN
-
-/*
- * KASAN_SHADOW_START: beginning of the kernel virtual addresses.
- * KASAN_SHADOW_END: KASAN_SHADOW_START + 1/N of kernel virtual addresses,
- * where N = (1 << KASAN_SHADOW_SCALE_SHIFT).
- *
- * KASAN_SHADOW_OFFSET:
- * This value is used to map an address to the corresponding shadow
- * address by the following formula:
- * shadow_addr = (address >> KASAN_SHADOW_SCALE_SHIFT) + KASAN_SHADOW_OFFSET
- *
- * (1 << (64 - KASAN_SHADOW_SCALE_SHIFT)) shadow addresses that lie in range
- * [KASAN_SHADOW_OFFSET, KASAN_SHADOW_END) cover all 64-bits of virtual
- * addresses. So KASAN_SHADOW_OFFSET should satisfy the following equation:
- * KASAN_SHADOW_OFFSET = KASAN_SHADOW_END -
- * (1ULL << (64 - KASAN_SHADOW_SCALE_SHIFT))
- */
-#define _KASAN_SHADOW_START(va) (KASAN_SHADOW_END - (1UL << ((va) - KASAN_SHADOW_SCALE_SHIFT)))
-#define KASAN_SHADOW_START _KASAN_SHADOW_START(vabits_actual)
+#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
+asmlinkage void kasan_early_init(void);
void kasan_init(void);
void kasan_copy_shadow(pgd_t *pgdir);
-asmlinkage void kasan_early_init(void);
#else
static inline void kasan_init(void) { }
diff --git a/arch/arm64/include/asm/kernel-pgtable.h b/arch/arm64/include/asm/kernel-pgtable.h
index a6e5da755359..83ddb14b95a5 100644
--- a/arch/arm64/include/asm/kernel-pgtable.h
+++ b/arch/arm64/include/asm/kernel-pgtable.h
@@ -8,7 +8,8 @@
#ifndef __ASM_KERNEL_PGTABLE_H
#define __ASM_KERNEL_PGTABLE_H
-#include <asm/pgtable.h>
+#include <asm/boot.h>
+#include <asm/pgtable-hwdef.h>
#include <asm/sparsemem.h>
/*
@@ -17,11 +18,6 @@
* with 4K (section size = 2M) but not with 16K (section size = 32M) or
* 64K (section size = 512M).
*/
-#ifdef CONFIG_ARM64_4K_PAGES
-#define ARM64_SWAPPER_USES_SECTION_MAPS 1
-#else
-#define ARM64_SWAPPER_USES_SECTION_MAPS 0
-#endif
/*
* The idmap and swapper page tables need some space reserved in the kernel
@@ -33,72 +29,59 @@
* VA range, so pages required to map highest possible PA are reserved in all
* cases.
*/
-#if ARM64_SWAPPER_USES_SECTION_MAPS
+#ifdef CONFIG_ARM64_4K_PAGES
#define SWAPPER_PGTABLE_LEVELS (CONFIG_PGTABLE_LEVELS - 1)
-#define IDMAP_PGTABLE_LEVELS (ARM64_HW_PGTABLE_LEVELS(PHYS_MASK_SHIFT) - 1)
#else
#define SWAPPER_PGTABLE_LEVELS (CONFIG_PGTABLE_LEVELS)
-#define IDMAP_PGTABLE_LEVELS (ARM64_HW_PGTABLE_LEVELS(PHYS_MASK_SHIFT))
#endif
/*
- * If KASLR is enabled, then an offset K is added to the kernel address
- * space. The bottom 21 bits of this offset are zero to guarantee 2MB
- * alignment for PA and VA.
- *
- * For each pagetable level of the swapper, we know that the shift will
- * be larger than 21 (for the 4KB granule case we use section maps thus
- * the smallest shift is actually 30) thus there is the possibility that
- * KASLR can increase the number of pagetable entries by 1, so we make
- * room for this extra entry.
- *
- * Note KASLR cannot increase the number of required entries for a level
- * by more than one because it increments both the virtual start and end
- * addresses equally (the extra entry comes from the case where the end
- * address is just pushed over a boundary and the start address isn't).
+ * A relocatable kernel may execute from an address that differs from the one at
+ * which it was linked. In the worst case, its runtime placement may intersect
+ * with two adjacent PGDIR entries, which means that an additional page table
+ * may be needed at each subordinate level.
*/
+#define EXTRA_PAGE __is_defined(CONFIG_RELOCATABLE)
-#ifdef CONFIG_RANDOMIZE_BASE
-#define EARLY_KASLR (1)
-#else
-#define EARLY_KASLR (0)
-#endif
+#define SPAN_NR_ENTRIES(vstart, vend, shift) \
+ ((((vend) - 1) >> (shift)) - ((vstart) >> (shift)) + 1)
-#define EARLY_ENTRIES(vstart, vend, shift) (((vend) >> (shift)) \
- - ((vstart) >> (shift)) + 1 + EARLY_KASLR)
+#define EARLY_ENTRIES(vstart, vend, shift, add) \
+ (SPAN_NR_ENTRIES(vstart, vend, shift) + (add))
-#define EARLY_PGDS(vstart, vend) (EARLY_ENTRIES(vstart, vend, PGDIR_SHIFT))
+#define EARLY_PGDS(vstart, vend, add) (EARLY_ENTRIES(vstart, vend, PGDIR_SHIFT, add))
#if SWAPPER_PGTABLE_LEVELS > 3
-#define EARLY_PUDS(vstart, vend) (EARLY_ENTRIES(vstart, vend, PUD_SHIFT))
+#define EARLY_PUDS(vstart, vend, add) (EARLY_ENTRIES(vstart, vend, PUD_SHIFT, add))
#else
-#define EARLY_PUDS(vstart, vend) (0)
+#define EARLY_PUDS(vstart, vend, add) (0)
#endif
#if SWAPPER_PGTABLE_LEVELS > 2
-#define EARLY_PMDS(vstart, vend) (EARLY_ENTRIES(vstart, vend, SWAPPER_TABLE_SHIFT))
+#define EARLY_PMDS(vstart, vend, add) (EARLY_ENTRIES(vstart, vend, SWAPPER_TABLE_SHIFT, add))
#else
-#define EARLY_PMDS(vstart, vend) (0)
+#define EARLY_PMDS(vstart, vend, add) (0)
#endif
-#define EARLY_PAGES(vstart, vend) ( 1 /* PGDIR page */ \
- + EARLY_PGDS((vstart), (vend)) /* each PGDIR needs a next level page table */ \
- + EARLY_PUDS((vstart), (vend)) /* each PUD needs a next level page table */ \
- + EARLY_PMDS((vstart), (vend))) /* each PMD needs a next level page table */
-#define INIT_DIR_SIZE (PAGE_SIZE * EARLY_PAGES(KIMAGE_VADDR + TEXT_OFFSET, _end))
-#define IDMAP_DIR_SIZE (IDMAP_PGTABLE_LEVELS * PAGE_SIZE)
+#define EARLY_PAGES(vstart, vend, add) ( 1 /* PGDIR page */ \
+ + EARLY_PGDS((vstart), (vend), add) /* each PGDIR needs a next level page table */ \
+ + EARLY_PUDS((vstart), (vend), add) /* each PUD needs a next level page table */ \
+ + EARLY_PMDS((vstart), (vend), add)) /* each PMD needs a next level page table */
+#define INIT_DIR_SIZE (PAGE_SIZE * EARLY_PAGES(KIMAGE_VADDR, _end, EXTRA_PAGE))
-#ifdef CONFIG_ARM64_SW_TTBR0_PAN
-#define RESERVED_TTBR0_SIZE (PAGE_SIZE)
+/* the initial ID map may need two extra pages if it needs to be extended */
+#if VA_BITS < 48
+#define INIT_IDMAP_DIR_SIZE ((INIT_IDMAP_DIR_PAGES + 2) * PAGE_SIZE)
#else
-#define RESERVED_TTBR0_SIZE (0)
+#define INIT_IDMAP_DIR_SIZE (INIT_IDMAP_DIR_PAGES * PAGE_SIZE)
#endif
+#define INIT_IDMAP_DIR_PAGES EARLY_PAGES(KIMAGE_VADDR, _end + MAX_FDT_SIZE + SWAPPER_BLOCK_SIZE, 1)
/* Initial memory map size */
-#if ARM64_SWAPPER_USES_SECTION_MAPS
-#define SWAPPER_BLOCK_SHIFT SECTION_SHIFT
-#define SWAPPER_BLOCK_SIZE SECTION_SIZE
+#ifdef CONFIG_ARM64_4K_PAGES
+#define SWAPPER_BLOCK_SHIFT PMD_SHIFT
+#define SWAPPER_BLOCK_SIZE PMD_SIZE
#define SWAPPER_TABLE_SHIFT PUD_SHIFT
#else
#define SWAPPER_BLOCK_SHIFT PAGE_SHIFT
@@ -106,46 +89,18 @@
#define SWAPPER_TABLE_SHIFT PMD_SHIFT
#endif
-/* The size of the initial kernel direct mapping */
-#define SWAPPER_INIT_MAP_SIZE (_AC(1, UL) << SWAPPER_TABLE_SHIFT)
-
/*
* Initial memory map attributes.
*/
-#define SWAPPER_PTE_FLAGS (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED)
-#define SWAPPER_PMD_FLAGS (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S)
+#define SWAPPER_PTE_FLAGS (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED | PTE_UXN)
+#define SWAPPER_PMD_FLAGS (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S | PTE_UXN)
-#if ARM64_SWAPPER_USES_SECTION_MAPS
-#define SWAPPER_MM_MMUFLAGS (PMD_ATTRINDX(MT_NORMAL) | SWAPPER_PMD_FLAGS)
-#else
-#define SWAPPER_MM_MMUFLAGS (PTE_ATTRINDX(MT_NORMAL) | SWAPPER_PTE_FLAGS)
-#endif
-
-/*
- * To make optimal use of block mappings when laying out the linear
- * mapping, round down the base of physical memory to a size that can
- * be mapped efficiently, i.e., either PUD_SIZE (4k granule) or PMD_SIZE
- * (64k granule), or a multiple that can be mapped using contiguous bits
- * in the page tables: 32 * PMD_SIZE (16k granule)
- */
-#if defined(CONFIG_ARM64_4K_PAGES)
-#define ARM64_MEMSTART_SHIFT PUD_SHIFT
-#elif defined(CONFIG_ARM64_16K_PAGES)
-#define ARM64_MEMSTART_SHIFT (PMD_SHIFT + 5)
-#else
-#define ARM64_MEMSTART_SHIFT PMD_SHIFT
-#endif
-
-/*
- * sparsemem vmemmap imposes an additional requirement on the alignment of
- * memstart_addr, due to the fact that the base of the vmemmap region
- * has a direct correspondence, and needs to appear sufficiently aligned
- * in the virtual address space.
- */
-#if defined(CONFIG_SPARSEMEM_VMEMMAP) && ARM64_MEMSTART_SHIFT < SECTION_SIZE_BITS
-#define ARM64_MEMSTART_ALIGN (1UL << SECTION_SIZE_BITS)
+#ifdef CONFIG_ARM64_4K_PAGES
+#define SWAPPER_RW_MMUFLAGS (PMD_ATTRINDX(MT_NORMAL) | SWAPPER_PMD_FLAGS | PTE_WRITE)
+#define SWAPPER_RX_MMUFLAGS (SWAPPER_RW_MMUFLAGS | PMD_SECT_RDONLY)
#else
-#define ARM64_MEMSTART_ALIGN (1UL << ARM64_MEMSTART_SHIFT)
+#define SWAPPER_RW_MMUFLAGS (PTE_ATTRINDX(MT_NORMAL) | SWAPPER_PTE_FLAGS | PTE_WRITE)
+#define SWAPPER_RX_MMUFLAGS (SWAPPER_RW_MMUFLAGS | PTE_RDONLY)
#endif
#endif /* __ASM_KERNEL_PGTABLE_H */
diff --git a/arch/arm64/include/asm/kexec.h b/arch/arm64/include/asm/kexec.h
index 12a561a54128..9ac9572a3bbe 100644
--- a/arch/arm64/include/asm/kexec.h
+++ b/arch/arm64/include/asm/kexec.h
@@ -84,25 +84,46 @@ static inline void crash_setup_regs(struct pt_regs *newregs,
extern bool crash_is_nosave(unsigned long pfn);
extern void crash_prepare_suspend(void);
extern void crash_post_resume(void);
+
+void crash_free_reserved_phys_range(unsigned long begin, unsigned long end);
+#define crash_free_reserved_phys_range crash_free_reserved_phys_range
#else
static inline bool crash_is_nosave(unsigned long pfn) {return false; }
static inline void crash_prepare_suspend(void) {}
static inline void crash_post_resume(void) {}
#endif
-#ifdef CONFIG_KEXEC_FILE
+struct kimage;
+
+#if defined(CONFIG_KEXEC_CORE)
+void cpu_soft_restart(unsigned long el2_switch, unsigned long entry,
+ unsigned long arg0, unsigned long arg1,
+ unsigned long arg2);
+
+int machine_kexec_post_load(struct kimage *image);
+#define machine_kexec_post_load machine_kexec_post_load
+#endif
+
#define ARCH_HAS_KIMAGE_ARCH
struct kimage_arch {
void *dtb;
- unsigned long dtb_mem;
+ phys_addr_t dtb_mem;
+ phys_addr_t kern_reloc;
+ phys_addr_t el2_vectors;
+ phys_addr_t ttbr0;
+ phys_addr_t ttbr1;
+ phys_addr_t zero_page;
+ unsigned long phys_offset;
+ unsigned long t0sz;
};
+#ifdef CONFIG_KEXEC_FILE
extern const struct kexec_file_ops kexec_image_ops;
-struct kimage;
+int arch_kimage_file_post_load_cleanup(struct kimage *image);
+#define arch_kimage_file_post_load_cleanup arch_kimage_file_post_load_cleanup
-extern int arch_kimage_file_post_load_cleanup(struct kimage *image);
extern int load_other_segments(struct kimage *image,
unsigned long kernel_load_addr, unsigned long kernel_size,
char *initrd, unsigned long initrd_len,
diff --git a/arch/arm64/include/asm/kfence.h b/arch/arm64/include/asm/kfence.h
new file mode 100644
index 000000000000..a81937fae9f6
--- /dev/null
+++ b/arch/arm64/include/asm/kfence.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * arm64 KFENCE support.
+ *
+ * Copyright (C) 2020, Google LLC.
+ */
+
+#ifndef __ASM_KFENCE_H
+#define __ASM_KFENCE_H
+
+#include <asm/set_memory.h>
+
+static inline bool arch_kfence_init_pool(void) { return true; }
+
+static inline bool kfence_protect_page(unsigned long addr, bool protect)
+{
+ set_memory_valid(addr, 1, !protect);
+
+ return true;
+}
+
+#ifdef CONFIG_KFENCE
+extern bool kfence_early_init;
+static inline bool arm64_kfence_can_set_direct_map(void)
+{
+ return !kfence_early_init;
+}
+#else /* CONFIG_KFENCE */
+static inline bool arm64_kfence_can_set_direct_map(void) { return false; }
+#endif /* CONFIG_KFENCE */
+
+#endif /* __ASM_KFENCE_H */
diff --git a/arch/arm64/include/asm/kprobes.h b/arch/arm64/include/asm/kprobes.h
index 97e511d645a2..be7a3680dadf 100644
--- a/arch/arm64/include/asm/kprobes.h
+++ b/arch/arm64/include/asm/kprobes.h
@@ -16,7 +16,7 @@
#include <linux/percpu.h>
#define __ARCH_WANT_KPROBES_INSN_SLOT
-#define MAX_INSN_SIZE 1
+#define MAX_INSN_SIZE 2
#define flush_insn_slot(p) do { } while (0)
#define kretprobe_blacklist_size 0
@@ -28,25 +28,16 @@ struct prev_kprobe {
unsigned int status;
};
-/* Single step context for kprobe */
-struct kprobe_step_ctx {
- unsigned long ss_pending;
- unsigned long match_addr;
-};
-
/* per-cpu kprobe control block */
struct kprobe_ctlblk {
unsigned int kprobe_status;
unsigned long saved_irqflag;
struct prev_kprobe prev_kprobe;
- struct kprobe_step_ctx ss_ctx;
};
void arch_remove_kprobe(struct kprobe *);
int kprobe_fault_handler(struct pt_regs *regs, unsigned int fsr);
-int kprobe_exceptions_notify(struct notifier_block *self,
- unsigned long val, void *data);
-void kretprobe_trampoline(void);
+void __kretprobe_trampoline(void);
void __kprobes *trampoline_probe_handler(struct pt_regs *regs);
#endif /* CONFIG_KPROBES */
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index 6e5d839f42b5..3c6f8ba1e479 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -9,10 +9,28 @@
#include <asm/esr.h>
#include <asm/memory.h>
+#include <asm/sysreg.h>
#include <asm/types.h>
/* Hyp Configuration Register (HCR) bits */
+
+#define HCR_TID5 (UL(1) << 58)
+#define HCR_DCT (UL(1) << 57)
+#define HCR_ATA_SHIFT 56
+#define HCR_ATA (UL(1) << HCR_ATA_SHIFT)
+#define HCR_TTLBOS (UL(1) << 55)
+#define HCR_TTLBIS (UL(1) << 54)
+#define HCR_ENSCXT (UL(1) << 53)
+#define HCR_TOCU (UL(1) << 52)
+#define HCR_AMVOFFEN (UL(1) << 51)
+#define HCR_TICAB (UL(1) << 50)
+#define HCR_TID4 (UL(1) << 49)
+#define HCR_FIEN (UL(1) << 47)
#define HCR_FWB (UL(1) << 46)
+#define HCR_NV2 (UL(1) << 45)
+#define HCR_AT (UL(1) << 44)
+#define HCR_NV1 (UL(1) << 43)
+#define HCR_NV (UL(1) << 42)
#define HCR_API (UL(1) << 41)
#define HCR_APK (UL(1) << 40)
#define HCR_TEA (UL(1) << 37)
@@ -30,9 +48,9 @@
#define HCR_TVM (UL(1) << 26)
#define HCR_TTLB (UL(1) << 25)
#define HCR_TPU (UL(1) << 24)
-#define HCR_TPC (UL(1) << 23)
+#define HCR_TPC (UL(1) << 23) /* HCR_TPCP if FEAT_DPB */
#define HCR_TSW (UL(1) << 22)
-#define HCR_TAC (UL(1) << 21)
+#define HCR_TACR (UL(1) << 21)
#define HCR_TIDCP (UL(1) << 20)
#define HCR_TSC (UL(1) << 19)
#define HCR_TID3 (UL(1) << 18)
@@ -54,34 +72,44 @@
#define HCR_PTW (UL(1) << 2)
#define HCR_SWIO (UL(1) << 1)
#define HCR_VM (UL(1) << 0)
+#define HCR_RES0 ((UL(1) << 48) | (UL(1) << 39))
/*
* The bits we set in HCR:
* TLOR: Trap LORegion register accesses
* RW: 64bit by default, can be overridden for 32bit VMs
- * TAC: Trap ACTLR
+ * TACR: Trap ACTLR
* TSC: Trap SMC
* TSW: Trap cache operations by set/way
* TWE: Trap WFE
* TWI: Trap WFI
* TIDCP: Trap L2CTLR/L2ECTLR
* BSU_IS: Upgrade barriers to the inner shareable domain
- * FB: Force broadcast of all maintainance operations
+ * FB: Force broadcast of all maintenance operations
* AMO: Override CPSR.A and enable signaling with VA
* IMO: Override CPSR.I and enable signaling with VI
* FMO: Override CPSR.F and enable signaling with VF
* SWIO: Turn set/way invalidates into set/way clean+invalidate
+ * PTW: Take a stage2 fault if a stage1 walk steps in device memory
+ * TID3: Trap EL1 reads of group 3 ID registers
+ * TID2: Trap CTR_EL0, CCSIDR2_EL1, CLIDR_EL1, and CSSELR_EL1
*/
#define HCR_GUEST_FLAGS (HCR_TSC | HCR_TSW | HCR_TWE | HCR_TWI | HCR_VM | \
- HCR_BSU_IS | HCR_FB | HCR_TAC | \
+ HCR_BSU_IS | HCR_FB | HCR_TACR | \
HCR_AMO | HCR_SWIO | HCR_TIDCP | HCR_RW | HCR_TLOR | \
- HCR_FMO | HCR_IMO)
-#define HCR_VIRT_EXCP_MASK (HCR_VSE | HCR_VI | HCR_VF)
-#define HCR_HOST_NVHE_FLAGS (HCR_RW | HCR_API | HCR_APK)
+ HCR_FMO | HCR_IMO | HCR_PTW | HCR_TID3)
+#define HCR_HOST_NVHE_FLAGS (HCR_RW | HCR_API | HCR_APK | HCR_ATA)
+#define HCR_HOST_NVHE_PROTECTED_FLAGS (HCR_HOST_NVHE_FLAGS | HCR_TSC)
#define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H)
+#define HCRX_GUEST_FLAGS \
+ (HCRX_EL2_SMPME | HCRX_EL2_TCR2En | \
+ (cpus_have_final_cap(ARM64_HAS_MOPS) ? (HCRX_EL2_MSCEn | HCRX_EL2_MCE2) : 0))
+#define HCRX_HOST_FLAGS (HCRX_EL2_MSCEn | HCRX_EL2_TCR2En)
+
/* TCR_EL2 Registers bits */
-#define TCR_EL2_RES1 ((1 << 31) | (1 << 23))
+#define TCR_EL2_DS (1UL << 32)
+#define TCR_EL2_RES1 ((1U << 31) | (1 << 23))
#define TCR_EL2_TBI (1 << 20)
#define TCR_EL2_PS_SHIFT 16
#define TCR_EL2_PS_MASK (7 << TCR_EL2_PS_SHIFT)
@@ -95,6 +123,7 @@
TCR_EL2_ORGN0_MASK | TCR_EL2_IRGN0_MASK | TCR_EL2_T0SZ_MASK)
/* VTCR_EL2 Registers bits */
+#define VTCR_EL2_DS TCR_EL2_DS
#define VTCR_EL2_RES1 (1U << 31)
#define VTCR_EL2_HD (1 << 22)
#define VTCR_EL2_HA (1 << 21)
@@ -124,7 +153,7 @@
* 40 bits wide (T0SZ = 24). Systems with a PARange smaller than 40 bits are
* not known to exist and will break with this configuration.
*
- * The VTCR_EL2 is configured per VM and is initialised in kvm_arm_setup_stage2().
+ * The VTCR_EL2 is configured per VM and is initialised in kvm_init_stage2_mmu.
*
* Note that when using 4K pages, we concatenate two first level page tables
* together. With 16K pages, we concatenate 16 first level page tables.
@@ -266,40 +295,98 @@
#define CPTR_EL2_TFP_SHIFT 10
/* Hyp Coprocessor Trap Register */
-#define CPTR_EL2_TCPAC (1 << 31)
+#define CPTR_EL2_TCPAC (1U << 31)
+#define CPTR_EL2_TAM (1 << 30)
#define CPTR_EL2_TTA (1 << 20)
+#define CPTR_EL2_TSM (1 << 12)
#define CPTR_EL2_TFP (1 << CPTR_EL2_TFP_SHIFT)
#define CPTR_EL2_TZ (1 << 8)
-#define CPTR_EL2_RES1 0x000032ff /* known RES1 bits in CPTR_EL2 */
-#define CPTR_EL2_DEFAULT CPTR_EL2_RES1
+#define CPTR_NVHE_EL2_RES1 0x000032ff /* known RES1 bits in CPTR_EL2 (nVHE) */
+#define CPTR_NVHE_EL2_RES0 (GENMASK(63, 32) | \
+ GENMASK(29, 21) | \
+ GENMASK(19, 14) | \
+ BIT(11))
/* Hyp Debug Configuration Register bits */
-#define MDCR_EL2_TPMS (1 << 14)
+#define MDCR_EL2_E2TB_MASK (UL(0x3))
+#define MDCR_EL2_E2TB_SHIFT (UL(24))
+#define MDCR_EL2_HPMFZS (UL(1) << 36)
+#define MDCR_EL2_HPMFZO (UL(1) << 29)
+#define MDCR_EL2_MTPME (UL(1) << 28)
+#define MDCR_EL2_TDCC (UL(1) << 27)
+#define MDCR_EL2_HLP (UL(1) << 26)
+#define MDCR_EL2_HCCD (UL(1) << 23)
+#define MDCR_EL2_TTRF (UL(1) << 19)
+#define MDCR_EL2_HPMD (UL(1) << 17)
+#define MDCR_EL2_TPMS (UL(1) << 14)
#define MDCR_EL2_E2PB_MASK (UL(0x3))
#define MDCR_EL2_E2PB_SHIFT (UL(12))
-#define MDCR_EL2_TDRA (1 << 11)
-#define MDCR_EL2_TDOSA (1 << 10)
-#define MDCR_EL2_TDA (1 << 9)
-#define MDCR_EL2_TDE (1 << 8)
-#define MDCR_EL2_HPME (1 << 7)
-#define MDCR_EL2_TPM (1 << 6)
-#define MDCR_EL2_TPMCR (1 << 5)
-#define MDCR_EL2_HPMN_MASK (0x1F)
-
-/* For compatibility with fault code shared with 32-bit */
-#define FSC_FAULT ESR_ELx_FSC_FAULT
-#define FSC_ACCESS ESR_ELx_FSC_ACCESS
-#define FSC_PERM ESR_ELx_FSC_PERM
-#define FSC_SEA ESR_ELx_FSC_EXTABT
-#define FSC_SEA_TTW0 (0x14)
-#define FSC_SEA_TTW1 (0x15)
-#define FSC_SEA_TTW2 (0x16)
-#define FSC_SEA_TTW3 (0x17)
-#define FSC_SECC (0x18)
-#define FSC_SECC_TTW0 (0x1c)
-#define FSC_SECC_TTW1 (0x1d)
-#define FSC_SECC_TTW2 (0x1e)
-#define FSC_SECC_TTW3 (0x1f)
+#define MDCR_EL2_TDRA (UL(1) << 11)
+#define MDCR_EL2_TDOSA (UL(1) << 10)
+#define MDCR_EL2_TDA (UL(1) << 9)
+#define MDCR_EL2_TDE (UL(1) << 8)
+#define MDCR_EL2_HPME (UL(1) << 7)
+#define MDCR_EL2_TPM (UL(1) << 6)
+#define MDCR_EL2_TPMCR (UL(1) << 5)
+#define MDCR_EL2_HPMN_MASK (UL(0x1F))
+#define MDCR_EL2_RES0 (GENMASK(63, 37) | \
+ GENMASK(35, 30) | \
+ GENMASK(25, 24) | \
+ GENMASK(22, 20) | \
+ BIT(18) | \
+ GENMASK(16, 15))
+
+/*
+ * FGT register definitions
+ *
+ * RES0 and polarity masks as of DDI0487J.a, to be updated as needed.
+ * We're not using the generated masks as they are usually ahead of
+ * the published ARM ARM, which we use as a reference.
+ *
+ * Once we get to a point where the two describe the same thing, we'll
+ * merge the definitions. One day.
+ */
+#define __HFGRTR_EL2_RES0 HFGxTR_EL2_RES0
+#define __HFGRTR_EL2_MASK GENMASK(49, 0)
+#define __HFGRTR_EL2_nMASK ~(__HFGRTR_EL2_RES0 | __HFGRTR_EL2_MASK)
+
+/*
+ * The HFGWTR bits are a subset of HFGRTR bits. To ensure we don't miss any
+ * future additions, define __HFGWTR* macros relative to __HFGRTR* ones.
+ */
+#define __HFGRTR_ONLY_MASK (BIT(46) | BIT(42) | BIT(40) | BIT(28) | \
+ GENMASK(26, 25) | BIT(21) | BIT(18) | \
+ GENMASK(15, 14) | GENMASK(10, 9) | BIT(2))
+#define __HFGWTR_EL2_RES0 (__HFGRTR_EL2_RES0 | __HFGRTR_ONLY_MASK)
+#define __HFGWTR_EL2_MASK (__HFGRTR_EL2_MASK & ~__HFGRTR_ONLY_MASK)
+#define __HFGWTR_EL2_nMASK ~(__HFGWTR_EL2_RES0 | __HFGWTR_EL2_MASK)
+
+#define __HFGITR_EL2_RES0 HFGITR_EL2_RES0
+#define __HFGITR_EL2_MASK (BIT(62) | BIT(60) | GENMASK(54, 0))
+#define __HFGITR_EL2_nMASK ~(__HFGITR_EL2_RES0 | __HFGITR_EL2_MASK)
+
+#define __HDFGRTR_EL2_RES0 HDFGRTR_EL2_RES0
+#define __HDFGRTR_EL2_MASK (BIT(63) | GENMASK(58, 50) | GENMASK(48, 43) | \
+ GENMASK(41, 40) | GENMASK(37, 22) | \
+ GENMASK(19, 9) | GENMASK(7, 0))
+#define __HDFGRTR_EL2_nMASK ~(__HDFGRTR_EL2_RES0 | __HDFGRTR_EL2_MASK)
+
+#define __HDFGWTR_EL2_RES0 HDFGWTR_EL2_RES0
+#define __HDFGWTR_EL2_MASK (GENMASK(57, 52) | GENMASK(50, 48) | \
+ GENMASK(46, 44) | GENMASK(42, 41) | \
+ GENMASK(37, 35) | GENMASK(33, 31) | \
+ GENMASK(29, 23) | GENMASK(21, 10) | \
+ GENMASK(8, 7) | GENMASK(5, 0))
+#define __HDFGWTR_EL2_nMASK ~(__HDFGWTR_EL2_RES0 | __HDFGWTR_EL2_MASK)
+
+#define __HAFGRTR_EL2_RES0 HAFGRTR_EL2_RES0
+#define __HAFGRTR_EL2_MASK (GENMASK(49, 17) | GENMASK(4, 0))
+#define __HAFGRTR_EL2_nMASK ~(__HAFGRTR_EL2_RES0 | __HAFGRTR_EL2_MASK)
+
+/* Similar definitions for HCRX_EL2 */
+#define __HCRX_EL2_RES0 HCRX_EL2_RES0
+#define __HCRX_EL2_MASK (BIT(6))
+#define __HCRX_EL2_nMASK ~(__HCRX_EL2_RES0 | __HCRX_EL2_MASK)
/* Hyp Prefetch Fault Address Register (HPFAR/HDFAR) */
#define HPFAR_MASK (~UL(0xf))
@@ -307,9 +394,13 @@
* We have
* PAR [PA_Shift - 1 : 12] = PA [PA_Shift - 1 : 12]
* HPFAR [PA_Shift - 9 : 4] = FIPA [PA_Shift - 1 : 12]
+ *
+ * Always assume 52 bit PA since at this point, we don't know how many PA bits
+ * the page table has been set up for. This should be safe since unused address
+ * bits in PAR are res0.
*/
#define PAR_TO_HPFAR(par) \
- (((par) & GENMASK_ULL(PHYS_MASK_SHIFT - 1, 12)) >> 8)
+ (((par) & GENMASK_ULL(52 - 1, 12)) >> 8)
#define ECN(x) { ESR_ELx_EC_##x, #x }
@@ -322,10 +413,25 @@
ECN(SP_ALIGN), ECN(FP_EXC32), ECN(FP_EXC64), ECN(SERROR), \
ECN(BREAKPT_LOW), ECN(BREAKPT_CUR), ECN(SOFTSTP_LOW), \
ECN(SOFTSTP_CUR), ECN(WATCHPT_LOW), ECN(WATCHPT_CUR), \
- ECN(BKPT32), ECN(VECTOR32), ECN(BRK64)
+ ECN(BKPT32), ECN(VECTOR32), ECN(BRK64), ECN(ERET)
-#define CPACR_EL1_FPEN (3 << 20)
#define CPACR_EL1_TTA (1 << 28)
-#define CPACR_EL1_DEFAULT (CPACR_EL1_FPEN | CPACR_EL1_ZEN_EL1EN)
+
+#define kvm_mode_names \
+ { PSR_MODE_EL0t, "EL0t" }, \
+ { PSR_MODE_EL1t, "EL1t" }, \
+ { PSR_MODE_EL1h, "EL1h" }, \
+ { PSR_MODE_EL2t, "EL2t" }, \
+ { PSR_MODE_EL2h, "EL2h" }, \
+ { PSR_MODE_EL3t, "EL3t" }, \
+ { PSR_MODE_EL3h, "EL3h" }, \
+ { PSR_AA32_MODE_USR, "32-bit USR" }, \
+ { PSR_AA32_MODE_FIQ, "32-bit FIQ" }, \
+ { PSR_AA32_MODE_IRQ, "32-bit IRQ" }, \
+ { PSR_AA32_MODE_SVC, "32-bit SVC" }, \
+ { PSR_AA32_MODE_ABT, "32-bit ABT" }, \
+ { PSR_AA32_MODE_HYP, "32-bit HYP" }, \
+ { PSR_AA32_MODE_UND, "32-bit UND" }, \
+ { PSR_AA32_MODE_SYS, "32-bit SYS" }
#endif /* __ARM64_KVM_ARM_H__ */
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index 44a243754c1b..24b5e6b23417 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -7,11 +7,10 @@
#ifndef __ARM_KVM_ASM_H__
#define __ARM_KVM_ASM_H__
+#include <asm/hyp_image.h>
+#include <asm/insn.h>
#include <asm/virt.h>
-#define VCPU_WORKAROUND_2_FLAG_SHIFT 0
-#define VCPU_WORKAROUND_2_FLAG (_AC(1, UL) << VCPU_WORKAROUND_2_FLAG_SHIFT)
-
#define ARM_EXIT_WITH_SERROR_BIT 31
#define ARM_EXCEPTION_CODE(x) ((x) & ~(1U << ARM_EXIT_WITH_SERROR_BIT))
#define ARM_EXCEPTION_IS_TRAP(x) (ARM_EXCEPTION_CODE((x)) == ARM_EXCEPTION_TRAP)
@@ -36,81 +35,339 @@
*/
#define KVM_VECTOR_PREAMBLE (2 * AARCH64_INSN_SIZE)
+#define KVM_HOST_SMCCC_ID(id) \
+ ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \
+ ARM_SMCCC_SMC_64, \
+ ARM_SMCCC_OWNER_VENDOR_HYP, \
+ (id))
+
+#define KVM_HOST_SMCCC_FUNC(name) KVM_HOST_SMCCC_ID(__KVM_HOST_SMCCC_FUNC_##name)
+
+#define __KVM_HOST_SMCCC_FUNC___kvm_hyp_init 0
+
#ifndef __ASSEMBLY__
#include <linux/mm.h>
-/* Translate a kernel address of @sym into its equivalent linear mapping */
-#define kvm_ksym_ref(sym) \
+enum __kvm_host_smccc_func {
+ /* Hypercalls available only prior to pKVM finalisation */
+ /* __KVM_HOST_SMCCC_FUNC___kvm_hyp_init */
+ __KVM_HOST_SMCCC_FUNC___kvm_get_mdcr_el2 = __KVM_HOST_SMCCC_FUNC___kvm_hyp_init + 1,
+ __KVM_HOST_SMCCC_FUNC___pkvm_init,
+ __KVM_HOST_SMCCC_FUNC___pkvm_create_private_mapping,
+ __KVM_HOST_SMCCC_FUNC___pkvm_cpu_set_vector,
+ __KVM_HOST_SMCCC_FUNC___kvm_enable_ssbs,
+ __KVM_HOST_SMCCC_FUNC___vgic_v3_init_lrs,
+ __KVM_HOST_SMCCC_FUNC___vgic_v3_get_gic_config,
+ __KVM_HOST_SMCCC_FUNC___pkvm_prot_finalize,
+
+ /* Hypercalls available after pKVM finalisation */
+ __KVM_HOST_SMCCC_FUNC___pkvm_host_share_hyp,
+ __KVM_HOST_SMCCC_FUNC___pkvm_host_unshare_hyp,
+ __KVM_HOST_SMCCC_FUNC___kvm_adjust_pc,
+ __KVM_HOST_SMCCC_FUNC___kvm_vcpu_run,
+ __KVM_HOST_SMCCC_FUNC___kvm_flush_vm_context,
+ __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid_ipa,
+ __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid_ipa_nsh,
+ __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid,
+ __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid_range,
+ __KVM_HOST_SMCCC_FUNC___kvm_flush_cpu_context,
+ __KVM_HOST_SMCCC_FUNC___kvm_timer_set_cntvoff,
+ __KVM_HOST_SMCCC_FUNC___vgic_v3_read_vmcr,
+ __KVM_HOST_SMCCC_FUNC___vgic_v3_write_vmcr,
+ __KVM_HOST_SMCCC_FUNC___vgic_v3_save_aprs,
+ __KVM_HOST_SMCCC_FUNC___vgic_v3_restore_aprs,
+ __KVM_HOST_SMCCC_FUNC___pkvm_vcpu_init_traps,
+ __KVM_HOST_SMCCC_FUNC___pkvm_init_vm,
+ __KVM_HOST_SMCCC_FUNC___pkvm_init_vcpu,
+ __KVM_HOST_SMCCC_FUNC___pkvm_teardown_vm,
+};
+
+#define DECLARE_KVM_VHE_SYM(sym) extern char sym[]
+#define DECLARE_KVM_NVHE_SYM(sym) extern char kvm_nvhe_sym(sym)[]
+
+/*
+ * Define a pair of symbols sharing the same name but one defined in
+ * VHE and the other in nVHE hyp implementations.
+ */
+#define DECLARE_KVM_HYP_SYM(sym) \
+ DECLARE_KVM_VHE_SYM(sym); \
+ DECLARE_KVM_NVHE_SYM(sym)
+
+#define DECLARE_KVM_VHE_PER_CPU(type, sym) \
+ DECLARE_PER_CPU(type, sym)
+#define DECLARE_KVM_NVHE_PER_CPU(type, sym) \
+ DECLARE_PER_CPU(type, kvm_nvhe_sym(sym))
+
+#define DECLARE_KVM_HYP_PER_CPU(type, sym) \
+ DECLARE_KVM_VHE_PER_CPU(type, sym); \
+ DECLARE_KVM_NVHE_PER_CPU(type, sym)
+
+/*
+ * Compute pointer to a symbol defined in nVHE percpu region.
+ * Returns NULL if percpu memory has not been allocated yet.
+ */
+#define this_cpu_ptr_nvhe_sym(sym) per_cpu_ptr_nvhe_sym(sym, smp_processor_id())
+#define per_cpu_ptr_nvhe_sym(sym, cpu) \
+ ({ \
+ unsigned long base, off; \
+ base = kvm_nvhe_sym(kvm_arm_hyp_percpu_base)[cpu]; \
+ off = (unsigned long)&CHOOSE_NVHE_SYM(sym) - \
+ (unsigned long)&CHOOSE_NVHE_SYM(__per_cpu_start); \
+ base ? (typeof(CHOOSE_NVHE_SYM(sym))*)(base + off) : NULL; \
+ })
+
+#if defined(__KVM_NVHE_HYPERVISOR__)
+
+#define CHOOSE_NVHE_SYM(sym) sym
+#define CHOOSE_HYP_SYM(sym) CHOOSE_NVHE_SYM(sym)
+
+/* The nVHE hypervisor shouldn't even try to access VHE symbols */
+extern void *__nvhe_undefined_symbol;
+#define CHOOSE_VHE_SYM(sym) __nvhe_undefined_symbol
+#define this_cpu_ptr_hyp_sym(sym) (&__nvhe_undefined_symbol)
+#define per_cpu_ptr_hyp_sym(sym, cpu) (&__nvhe_undefined_symbol)
+
+#elif defined(__KVM_VHE_HYPERVISOR__)
+
+#define CHOOSE_VHE_SYM(sym) sym
+#define CHOOSE_HYP_SYM(sym) CHOOSE_VHE_SYM(sym)
+
+/* The VHE hypervisor shouldn't even try to access nVHE symbols */
+extern void *__vhe_undefined_symbol;
+#define CHOOSE_NVHE_SYM(sym) __vhe_undefined_symbol
+#define this_cpu_ptr_hyp_sym(sym) (&__vhe_undefined_symbol)
+#define per_cpu_ptr_hyp_sym(sym, cpu) (&__vhe_undefined_symbol)
+
+#else
+
+/*
+ * BIG FAT WARNINGS:
+ *
+ * - Don't be tempted to change the following is_kernel_in_hyp_mode()
+ * to has_vhe(). has_vhe() is implemented as a *final* capability,
+ * while this is used early at boot time, when the capabilities are
+ * not final yet....
+ *
+ * - Don't let the nVHE hypervisor have access to this, as it will
+ * pick the *wrong* symbol (yes, it runs at EL2...).
+ */
+#define CHOOSE_HYP_SYM(sym) (is_kernel_in_hyp_mode() \
+ ? CHOOSE_VHE_SYM(sym) \
+ : CHOOSE_NVHE_SYM(sym))
+
+#define this_cpu_ptr_hyp_sym(sym) (is_kernel_in_hyp_mode() \
+ ? this_cpu_ptr(&sym) \
+ : this_cpu_ptr_nvhe_sym(sym))
+
+#define per_cpu_ptr_hyp_sym(sym, cpu) (is_kernel_in_hyp_mode() \
+ ? per_cpu_ptr(&sym, cpu) \
+ : per_cpu_ptr_nvhe_sym(sym, cpu))
+
+#define CHOOSE_VHE_SYM(sym) sym
+#define CHOOSE_NVHE_SYM(sym) kvm_nvhe_sym(sym)
+
+#endif
+
+struct kvm_nvhe_init_params {
+ unsigned long mair_el2;
+ unsigned long tcr_el2;
+ unsigned long tpidr_el2;
+ unsigned long stack_hyp_va;
+ unsigned long stack_pa;
+ phys_addr_t pgd_pa;
+ unsigned long hcr_el2;
+ unsigned long vttbr;
+ unsigned long vtcr;
+};
+
+/*
+ * Used by the host in EL1 to dump the nVHE hypervisor backtrace on
+ * hyp_panic() in non-protected mode.
+ *
+ * @stack_base: hyp VA of the hyp_stack base.
+ * @overflow_stack_base: hyp VA of the hyp_overflow_stack base.
+ * @fp: hyp FP where the backtrace begins.
+ * @pc: hyp PC where the backtrace begins.
+ */
+struct kvm_nvhe_stacktrace_info {
+ unsigned long stack_base;
+ unsigned long overflow_stack_base;
+ unsigned long fp;
+ unsigned long pc;
+};
+
+/* Translate a kernel address @ptr into its equivalent linear mapping */
+#define kvm_ksym_ref(ptr) \
({ \
- void *val = &sym; \
+ void *val = (ptr); \
if (!is_kernel_in_hyp_mode()) \
- val = lm_alias(&sym); \
+ val = lm_alias((ptr)); \
val; \
})
+#define kvm_ksym_ref_nvhe(sym) kvm_ksym_ref(kvm_nvhe_sym(sym))
struct kvm;
struct kvm_vcpu;
+struct kvm_s2_mmu;
-extern char __kvm_hyp_init[];
-extern char __kvm_hyp_init_end[];
+DECLARE_KVM_NVHE_SYM(__kvm_hyp_init);
+DECLARE_KVM_HYP_SYM(__kvm_hyp_vector);
+#define __kvm_hyp_init CHOOSE_NVHE_SYM(__kvm_hyp_init)
+#define __kvm_hyp_vector CHOOSE_HYP_SYM(__kvm_hyp_vector)
-extern char __kvm_hyp_vector[];
+extern unsigned long kvm_nvhe_sym(kvm_arm_hyp_percpu_base)[];
+DECLARE_KVM_NVHE_SYM(__per_cpu_start);
+DECLARE_KVM_NVHE_SYM(__per_cpu_end);
+
+DECLARE_KVM_HYP_SYM(__bp_harden_hyp_vecs);
+#define __bp_harden_hyp_vecs CHOOSE_HYP_SYM(__bp_harden_hyp_vecs)
extern void __kvm_flush_vm_context(void);
-extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa);
-extern void __kvm_tlb_flush_vmid(struct kvm *kvm);
-extern void __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu);
+extern void __kvm_flush_cpu_context(struct kvm_s2_mmu *mmu);
+extern void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, phys_addr_t ipa,
+ int level);
+extern void __kvm_tlb_flush_vmid_ipa_nsh(struct kvm_s2_mmu *mmu,
+ phys_addr_t ipa,
+ int level);
+extern void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
+ phys_addr_t start, unsigned long pages);
+extern void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu);
-extern void __kvm_timer_set_cntvoff(u32 cntvoff_low, u32 cntvoff_high);
+extern void __kvm_timer_set_cntvoff(u64 cntvoff);
-extern int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu);
+extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
-extern int __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu);
+extern void __kvm_adjust_pc(struct kvm_vcpu *vcpu);
-extern u64 __vgic_v3_get_ich_vtr_el2(void);
+extern u64 __vgic_v3_get_gic_config(void);
extern u64 __vgic_v3_read_vmcr(void);
extern void __vgic_v3_write_vmcr(u32 vmcr);
extern void __vgic_v3_init_lrs(void);
-extern u32 __kvm_get_mdcr_el2(void);
+extern u64 __kvm_get_mdcr_el2(void);
-/* Home-grown __this_cpu_{ptr,read} variants that always work at HYP */
-#define __hyp_this_cpu_ptr(sym) \
- ({ \
- void *__ptr = hyp_symbol_addr(sym); \
- __ptr += read_sysreg(tpidr_el2); \
- (typeof(&sym))__ptr; \
- })
+#define __KVM_EXTABLE(from, to) \
+ " .pushsection __kvm_ex_table, \"a\"\n" \
+ " .align 3\n" \
+ " .long (" #from " - .), (" #to " - .)\n" \
+ " .popsection\n"
-#define __hyp_this_cpu_read(sym) \
- ({ \
- *__hyp_this_cpu_ptr(sym); \
- })
-#else /* __ASSEMBLY__ */
+#define __kvm_at(at_op, addr) \
+( { \
+ int __kvm_at_err = 0; \
+ u64 spsr, elr; \
+ asm volatile( \
+ " mrs %1, spsr_el2\n" \
+ " mrs %2, elr_el2\n" \
+ "1: at "at_op", %3\n" \
+ " isb\n" \
+ " b 9f\n" \
+ "2: msr spsr_el2, %1\n" \
+ " msr elr_el2, %2\n" \
+ " mov %w0, %4\n" \
+ "9:\n" \
+ __KVM_EXTABLE(1b, 2b) \
+ : "+r" (__kvm_at_err), "=&r" (spsr), "=&r" (elr) \
+ : "r" (addr), "i" (-EFAULT)); \
+ __kvm_at_err; \
+} )
-.macro hyp_adr_this_cpu reg, sym, tmp
- adr_l \reg, \sym
- mrs \tmp, tpidr_el2
- add \reg, \reg, \tmp
-.endm
+void __noreturn hyp_panic(void);
+asmlinkage void kvm_unexpected_el2_exception(void);
+asmlinkage void __noreturn hyp_panic(void);
+asmlinkage void __noreturn hyp_panic_bad_stack(void);
+asmlinkage void kvm_unexpected_el2_exception(void);
+struct kvm_cpu_context;
+void handle_trap(struct kvm_cpu_context *host_ctxt);
+asmlinkage void __noreturn __kvm_host_psci_cpu_entry(bool is_cpu_on);
+void __noreturn __pkvm_init_finalise(void);
+void kvm_nvhe_prepare_backtrace(unsigned long fp, unsigned long pc);
+void kvm_patch_vector_branch(struct alt_instr *alt,
+ __le32 *origptr, __le32 *updptr, int nr_inst);
+void kvm_get_kimage_voffset(struct alt_instr *alt,
+ __le32 *origptr, __le32 *updptr, int nr_inst);
+void kvm_compute_final_ctr_el0(struct alt_instr *alt,
+ __le32 *origptr, __le32 *updptr, int nr_inst);
+void __noreturn __cold nvhe_hyp_panic_handler(u64 esr, u64 spsr, u64 elr_virt,
+ u64 elr_phys, u64 par, uintptr_t vcpu, u64 far, u64 hpfar);
-.macro hyp_ldr_this_cpu reg, sym, tmp
- adr_l \reg, \sym
- mrs \tmp, tpidr_el2
- ldr \reg, [\reg, \tmp]
-.endm
+#else /* __ASSEMBLY__ */
.macro get_host_ctxt reg, tmp
- hyp_adr_this_cpu \reg, kvm_host_data, \tmp
+ adr_this_cpu \reg, kvm_host_data, \tmp
add \reg, \reg, #HOST_DATA_CONTEXT
.endm
.macro get_vcpu_ptr vcpu, ctxt
get_host_ctxt \ctxt, \vcpu
ldr \vcpu, [\ctxt, #HOST_CONTEXT_VCPU]
- kern_hyp_va \vcpu
+.endm
+
+.macro get_loaded_vcpu vcpu, ctxt
+ adr_this_cpu \ctxt, kvm_hyp_ctxt, \vcpu
+ ldr \vcpu, [\ctxt, #HOST_CONTEXT_VCPU]
+.endm
+
+.macro set_loaded_vcpu vcpu, ctxt, tmp
+ adr_this_cpu \ctxt, kvm_hyp_ctxt, \tmp
+ str \vcpu, [\ctxt, #HOST_CONTEXT_VCPU]
+.endm
+
+/*
+ * KVM extable for unexpected exceptions.
+ * Create a struct kvm_exception_table_entry output to a section that can be
+ * mapped by EL2. The table is not sorted.
+ *
+ * The caller must ensure:
+ * x18 has the hypervisor value to allow any Shadow-Call-Stack instrumented
+ * code to write to it, and that SPSR_EL2 and ELR_EL2 are restored by the fixup.
+ */
+.macro _kvm_extable, from, to
+ .pushsection __kvm_ex_table, "a"
+ .align 3
+ .long (\from - .), (\to - .)
+ .popsection
+.endm
+
+#define CPU_XREG_OFFSET(x) (CPU_USER_PT_REGS + 8*x)
+#define CPU_LR_OFFSET CPU_XREG_OFFSET(30)
+#define CPU_SP_EL0_OFFSET (CPU_LR_OFFSET + 8)
+
+/*
+ * We treat x18 as callee-saved as the host may use it as a platform
+ * register (e.g. for shadow call stack).
+ */
+.macro save_callee_saved_regs ctxt
+ str x18, [\ctxt, #CPU_XREG_OFFSET(18)]
+ stp x19, x20, [\ctxt, #CPU_XREG_OFFSET(19)]
+ stp x21, x22, [\ctxt, #CPU_XREG_OFFSET(21)]
+ stp x23, x24, [\ctxt, #CPU_XREG_OFFSET(23)]
+ stp x25, x26, [\ctxt, #CPU_XREG_OFFSET(25)]
+ stp x27, x28, [\ctxt, #CPU_XREG_OFFSET(27)]
+ stp x29, lr, [\ctxt, #CPU_XREG_OFFSET(29)]
+.endm
+
+.macro restore_callee_saved_regs ctxt
+ // We require \ctxt is not x18-x28
+ ldr x18, [\ctxt, #CPU_XREG_OFFSET(18)]
+ ldp x19, x20, [\ctxt, #CPU_XREG_OFFSET(19)]
+ ldp x21, x22, [\ctxt, #CPU_XREG_OFFSET(21)]
+ ldp x23, x24, [\ctxt, #CPU_XREG_OFFSET(23)]
+ ldp x25, x26, [\ctxt, #CPU_XREG_OFFSET(25)]
+ ldp x27, x28, [\ctxt, #CPU_XREG_OFFSET(27)]
+ ldp x29, lr, [\ctxt, #CPU_XREG_OFFSET(29)]
+.endm
+
+.macro save_sp_el0 ctxt, tmp
+ mrs \tmp, sp_el0
+ str \tmp, [\ctxt, #CPU_SP_EL0_OFFSET]
+.endm
+
+.macro restore_sp_el0 ctxt, tmp
+ ldr \tmp, [\ctxt, #CPU_SP_EL0_OFFSET]
+ msr sp_el0, \tmp
.endm
#endif
diff --git a/arch/arm64/include/asm/kvm_coproc.h b/arch/arm64/include/asm/kvm_coproc.h
deleted file mode 100644
index 0185ee8b8b5e..000000000000
--- a/arch/arm64/include/asm/kvm_coproc.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2012,2013 - ARM Ltd
- * Author: Marc Zyngier <marc.zyngier@arm.com>
- *
- * Derived from arch/arm/include/asm/kvm_coproc.h
- * Copyright (C) 2012 Rusty Russell IBM Corporation
- */
-
-#ifndef __ARM64_KVM_COPROC_H__
-#define __ARM64_KVM_COPROC_H__
-
-#include <linux/kvm_host.h>
-
-void kvm_reset_sys_regs(struct kvm_vcpu *vcpu);
-
-struct kvm_sys_reg_table {
- const struct sys_reg_desc *table;
- size_t num;
-};
-
-struct kvm_sys_reg_target_table {
- struct kvm_sys_reg_table table64;
- struct kvm_sys_reg_table table32;
-};
-
-void kvm_register_target_sys_reg_table(unsigned int target,
- struct kvm_sys_reg_target_table *table);
-
-int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu, struct kvm_run *run);
-int kvm_handle_cp14_32(struct kvm_vcpu *vcpu, struct kvm_run *run);
-int kvm_handle_cp14_64(struct kvm_vcpu *vcpu, struct kvm_run *run);
-int kvm_handle_cp15_32(struct kvm_vcpu *vcpu, struct kvm_run *run);
-int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run);
-int kvm_handle_sys_reg(struct kvm_vcpu *vcpu, struct kvm_run *run);
-
-#define kvm_coproc_table_init kvm_sys_reg_table_init
-void kvm_sys_reg_table_init(void);
-
-struct kvm_one_reg;
-int kvm_arm_copy_sys_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices);
-int kvm_arm_sys_reg_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *);
-int kvm_arm_sys_reg_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *);
-unsigned long kvm_arm_num_sys_reg_descs(struct kvm_vcpu *vcpu);
-
-#endif /* __ARM64_KVM_COPROC_H__ */
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index 5efe5ca8fecf..b804fe832184 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -17,44 +17,69 @@
#include <asm/esr.h>
#include <asm/kvm_arm.h>
#include <asm/kvm_hyp.h>
-#include <asm/kvm_mmio.h>
+#include <asm/kvm_nested.h>
#include <asm/ptrace.h>
#include <asm/cputype.h>
#include <asm/virt.h>
-unsigned long *vcpu_reg32(const struct kvm_vcpu *vcpu, u8 reg_num);
-unsigned long vcpu_read_spsr32(const struct kvm_vcpu *vcpu);
-void vcpu_write_spsr32(struct kvm_vcpu *vcpu, unsigned long v);
+#define CURRENT_EL_SP_EL0_VECTOR 0x0
+#define CURRENT_EL_SP_ELx_VECTOR 0x200
+#define LOWER_EL_AArch64_VECTOR 0x400
+#define LOWER_EL_AArch32_VECTOR 0x600
+
+enum exception_type {
+ except_type_sync = 0,
+ except_type_irq = 0x80,
+ except_type_fiq = 0x100,
+ except_type_serror = 0x180,
+};
+
+#define kvm_exception_type_names \
+ { except_type_sync, "SYNC" }, \
+ { except_type_irq, "IRQ" }, \
+ { except_type_fiq, "FIQ" }, \
+ { except_type_serror, "SERROR" }
bool kvm_condition_valid32(const struct kvm_vcpu *vcpu);
-void kvm_skip_instr32(struct kvm_vcpu *vcpu, bool is_wide_instr);
+void kvm_skip_instr32(struct kvm_vcpu *vcpu);
void kvm_inject_undefined(struct kvm_vcpu *vcpu);
void kvm_inject_vabt(struct kvm_vcpu *vcpu);
void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr);
void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr);
-void kvm_inject_undef32(struct kvm_vcpu *vcpu);
-void kvm_inject_dabt32(struct kvm_vcpu *vcpu, unsigned long addr);
-void kvm_inject_pabt32(struct kvm_vcpu *vcpu, unsigned long addr);
+void kvm_inject_size_fault(struct kvm_vcpu *vcpu);
+
+void kvm_vcpu_wfi(struct kvm_vcpu *vcpu);
+
+void kvm_emulate_nested_eret(struct kvm_vcpu *vcpu);
+int kvm_inject_nested_sync(struct kvm_vcpu *vcpu, u64 esr_el2);
+int kvm_inject_nested_irq(struct kvm_vcpu *vcpu);
-static inline bool vcpu_el1_is_32bit(struct kvm_vcpu *vcpu)
+#if defined(__KVM_VHE_HYPERVISOR__) || defined(__KVM_NVHE_HYPERVISOR__)
+static __always_inline bool vcpu_el1_is_32bit(struct kvm_vcpu *vcpu)
{
return !(vcpu->arch.hcr_el2 & HCR_RW);
}
+#else
+static __always_inline bool vcpu_el1_is_32bit(struct kvm_vcpu *vcpu)
+{
+ return vcpu_has_feature(vcpu, KVM_ARM_VCPU_EL1_32BIT);
+}
+#endif
static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu)
{
vcpu->arch.hcr_el2 = HCR_GUEST_FLAGS;
- if (is_kernel_in_hyp_mode())
+ if (has_vhe() || has_hvhe())
vcpu->arch.hcr_el2 |= HCR_E2H;
- if (cpus_have_const_cap(ARM64_HAS_RAS_EXTN)) {
+ if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN)) {
/* route synchronous external abort exceptions to EL2 */
vcpu->arch.hcr_el2 |= HCR_TEA;
/* trap error record accesses */
vcpu->arch.hcr_el2 |= HCR_TERR;
}
- if (cpus_have_const_cap(ARM64_HAS_STAGE2_FWB)) {
+ if (cpus_have_final_cap(ARM64_HAS_STAGE2_FWB)) {
vcpu->arch.hcr_el2 |= HCR_FWB;
} else {
/*
@@ -66,20 +91,17 @@ static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu)
vcpu->arch.hcr_el2 |= HCR_TVM;
}
- if (test_bit(KVM_ARM_VCPU_EL1_32BIT, vcpu->arch.features))
- vcpu->arch.hcr_el2 &= ~HCR_RW;
+ if (cpus_have_final_cap(ARM64_HAS_EVT) &&
+ !cpus_have_final_cap(ARM64_MISMATCHED_CACHE_TYPE))
+ vcpu->arch.hcr_el2 |= HCR_TID4;
+ else
+ vcpu->arch.hcr_el2 |= HCR_TID2;
- /*
- * TID3: trap feature register accesses that we virtualise.
- * For now this is conditional, since no AArch32 feature regs
- * are currently virtualised.
- */
- if (!vcpu_el1_is_32bit(vcpu))
- vcpu->arch.hcr_el2 |= HCR_TID3;
+ if (vcpu_el1_is_32bit(vcpu))
+ vcpu->arch.hcr_el2 &= ~HCR_RW;
- if (cpus_have_const_cap(ARM64_MISMATCHED_CACHE_TYPE) ||
- vcpu_el1_is_32bit(vcpu))
- vcpu->arch.hcr_el2 |= HCR_TID2;
+ if (kvm_has_mte(vcpu->kvm))
+ vcpu->arch.hcr_el2 |= HCR_ATA;
}
static inline unsigned long *vcpu_hcr(struct kvm_vcpu *vcpu)
@@ -90,7 +112,8 @@ static inline unsigned long *vcpu_hcr(struct kvm_vcpu *vcpu)
static inline void vcpu_clear_wfx_traps(struct kvm_vcpu *vcpu)
{
vcpu->arch.hcr_el2 &= ~HCR_TWE;
- if (atomic_read(&vcpu->arch.vgic_cpu.vgic_v3.its_vpe.vlpi_count))
+ if (atomic_read(&vcpu->arch.vgic_cpu.vgic_v3.its_vpe.vlpi_count) ||
+ vcpu->kvm->arch.vgic.nassgireq)
vcpu->arch.hcr_el2 &= ~HCR_TWI;
else
vcpu->arch.hcr_el2 |= HCR_TWI;
@@ -112,12 +135,6 @@ static inline void vcpu_ptrauth_disable(struct kvm_vcpu *vcpu)
vcpu->arch.hcr_el2 &= ~(HCR_API | HCR_APK);
}
-static inline void vcpu_ptrauth_setup_lazy(struct kvm_vcpu *vcpu)
-{
- if (vcpu_has_ptrauth(vcpu))
- vcpu_ptrauth_disable(vcpu);
-}
-
static inline unsigned long vcpu_get_vsesr(struct kvm_vcpu *vcpu)
{
return vcpu->arch.vsesr_el2;
@@ -128,43 +145,22 @@ static inline void vcpu_set_vsesr(struct kvm_vcpu *vcpu, u64 vsesr)
vcpu->arch.vsesr_el2 = vsesr;
}
-static inline unsigned long *vcpu_pc(const struct kvm_vcpu *vcpu)
+static __always_inline unsigned long *vcpu_pc(const struct kvm_vcpu *vcpu)
{
- return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.pc;
-}
-
-static inline unsigned long *__vcpu_elr_el1(const struct kvm_vcpu *vcpu)
-{
- return (unsigned long *)&vcpu_gp_regs(vcpu)->elr_el1;
-}
-
-static inline unsigned long vcpu_read_elr_el1(const struct kvm_vcpu *vcpu)
-{
- if (vcpu->arch.sysregs_loaded_on_cpu)
- return read_sysreg_el1(SYS_ELR);
- else
- return *__vcpu_elr_el1(vcpu);
-}
-
-static inline void vcpu_write_elr_el1(const struct kvm_vcpu *vcpu, unsigned long v)
-{
- if (vcpu->arch.sysregs_loaded_on_cpu)
- write_sysreg_el1(v, SYS_ELR);
- else
- *__vcpu_elr_el1(vcpu) = v;
+ return (unsigned long *)&vcpu_gp_regs(vcpu)->pc;
}
-static inline unsigned long *vcpu_cpsr(const struct kvm_vcpu *vcpu)
+static __always_inline unsigned long *vcpu_cpsr(const struct kvm_vcpu *vcpu)
{
- return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.pstate;
+ return (unsigned long *)&vcpu_gp_regs(vcpu)->pstate;
}
-static inline bool vcpu_mode_is_32bit(const struct kvm_vcpu *vcpu)
+static __always_inline bool vcpu_mode_is_32bit(const struct kvm_vcpu *vcpu)
{
return !!(*vcpu_cpsr(vcpu) & PSR_MODE32_BIT);
}
-static inline bool kvm_condition_valid(const struct kvm_vcpu *vcpu)
+static __always_inline bool kvm_condition_valid(const struct kvm_vcpu *vcpu)
{
if (vcpu_mode_is_32bit(vcpu))
return kvm_condition_valid32(vcpu);
@@ -182,41 +178,105 @@ static inline void vcpu_set_thumb(struct kvm_vcpu *vcpu)
* coming from a read of ESR_EL2. Otherwise, it may give the wrong result on
* AArch32 with banked registers.
*/
-static inline unsigned long vcpu_get_reg(const struct kvm_vcpu *vcpu,
+static __always_inline unsigned long vcpu_get_reg(const struct kvm_vcpu *vcpu,
u8 reg_num)
{
- return (reg_num == 31) ? 0 : vcpu_gp_regs(vcpu)->regs.regs[reg_num];
+ return (reg_num == 31) ? 0 : vcpu_gp_regs(vcpu)->regs[reg_num];
}
-static inline void vcpu_set_reg(struct kvm_vcpu *vcpu, u8 reg_num,
+static __always_inline void vcpu_set_reg(struct kvm_vcpu *vcpu, u8 reg_num,
unsigned long val)
{
if (reg_num != 31)
- vcpu_gp_regs(vcpu)->regs.regs[reg_num] = val;
+ vcpu_gp_regs(vcpu)->regs[reg_num] = val;
}
-static inline unsigned long vcpu_read_spsr(const struct kvm_vcpu *vcpu)
+static inline bool vcpu_is_el2_ctxt(const struct kvm_cpu_context *ctxt)
{
- if (vcpu_mode_is_32bit(vcpu))
- return vcpu_read_spsr32(vcpu);
+ switch (ctxt->regs.pstate & (PSR_MODE32_BIT | PSR_MODE_MASK)) {
+ case PSR_MODE_EL2h:
+ case PSR_MODE_EL2t:
+ return true;
+ default:
+ return false;
+ }
+}
- if (vcpu->arch.sysregs_loaded_on_cpu)
- return read_sysreg_el1(SYS_SPSR);
- else
- return vcpu_gp_regs(vcpu)->spsr[KVM_SPSR_EL1];
+static inline bool vcpu_is_el2(const struct kvm_vcpu *vcpu)
+{
+ return vcpu_is_el2_ctxt(&vcpu->arch.ctxt);
}
-static inline void vcpu_write_spsr(struct kvm_vcpu *vcpu, unsigned long v)
+static inline bool __vcpu_el2_e2h_is_set(const struct kvm_cpu_context *ctxt)
{
- if (vcpu_mode_is_32bit(vcpu)) {
- vcpu_write_spsr32(vcpu, v);
- return;
- }
+ return ctxt_sys_reg(ctxt, HCR_EL2) & HCR_E2H;
+}
- if (vcpu->arch.sysregs_loaded_on_cpu)
- write_sysreg_el1(v, SYS_SPSR);
- else
- vcpu_gp_regs(vcpu)->spsr[KVM_SPSR_EL1] = v;
+static inline bool vcpu_el2_e2h_is_set(const struct kvm_vcpu *vcpu)
+{
+ return __vcpu_el2_e2h_is_set(&vcpu->arch.ctxt);
+}
+
+static inline bool __vcpu_el2_tge_is_set(const struct kvm_cpu_context *ctxt)
+{
+ return ctxt_sys_reg(ctxt, HCR_EL2) & HCR_TGE;
+}
+
+static inline bool vcpu_el2_tge_is_set(const struct kvm_vcpu *vcpu)
+{
+ return __vcpu_el2_tge_is_set(&vcpu->arch.ctxt);
+}
+
+static inline bool __is_hyp_ctxt(const struct kvm_cpu_context *ctxt)
+{
+ /*
+ * We are in a hypervisor context if the vcpu mode is EL2 or
+ * E2H and TGE bits are set. The latter means we are in the user space
+ * of the VHE kernel. ARMv8.1 ARM describes this as 'InHost'
+ *
+ * Note that the HCR_EL2.{E2H,TGE}={0,1} isn't really handled in the
+ * rest of the KVM code, and will result in a misbehaving guest.
+ */
+ return vcpu_is_el2_ctxt(ctxt) ||
+ (__vcpu_el2_e2h_is_set(ctxt) && __vcpu_el2_tge_is_set(ctxt)) ||
+ __vcpu_el2_tge_is_set(ctxt);
+}
+
+static inline bool is_hyp_ctxt(const struct kvm_vcpu *vcpu)
+{
+ return vcpu_has_nv(vcpu) && __is_hyp_ctxt(&vcpu->arch.ctxt);
+}
+
+/*
+ * The layout of SPSR for an AArch32 state is different when observed from an
+ * AArch64 SPSR_ELx or an AArch32 SPSR_*. This function generates the AArch32
+ * view given an AArch64 view.
+ *
+ * In ARM DDI 0487E.a see:
+ *
+ * - The AArch64 view (SPSR_EL2) in section C5.2.18, page C5-426
+ * - The AArch32 view (SPSR_abt) in section G8.2.126, page G8-6256
+ * - The AArch32 view (SPSR_und) in section G8.2.132, page G8-6280
+ *
+ * Which show the following differences:
+ *
+ * | Bit | AA64 | AA32 | Notes |
+ * +-----+------+------+-----------------------------|
+ * | 24 | DIT | J | J is RES0 in ARMv8 |
+ * | 21 | SS | DIT | SS doesn't exist in AArch32 |
+ *
+ * ... and all other bits are (currently) common.
+ */
+static inline unsigned long host_spsr_to_spsr32(unsigned long spsr)
+{
+ const unsigned long overlap = BIT(24) | BIT(21);
+ unsigned long dit = !!(spsr & PSR_AA32_DIT_BIT);
+
+ spsr &= ~overlap;
+
+ spsr |= dit << 21;
+
+ return spsr;
}
static inline bool vcpu_mode_priv(const struct kvm_vcpu *vcpu)
@@ -233,14 +293,14 @@ static inline bool vcpu_mode_priv(const struct kvm_vcpu *vcpu)
return mode != PSR_MODE_EL0t;
}
-static inline u32 kvm_vcpu_get_hsr(const struct kvm_vcpu *vcpu)
+static __always_inline u64 kvm_vcpu_get_esr(const struct kvm_vcpu *vcpu)
{
return vcpu->arch.fault.esr_el2;
}
-static inline int kvm_vcpu_get_condition(const struct kvm_vcpu *vcpu)
+static __always_inline int kvm_vcpu_get_condition(const struct kvm_vcpu *vcpu)
{
- u32 esr = kvm_vcpu_get_hsr(vcpu);
+ u64 esr = kvm_vcpu_get_esr(vcpu);
if (esr & ESR_ELx_CV)
return (esr & ESR_ELx_COND_MASK) >> ESR_ELx_COND_SHIFT;
@@ -248,12 +308,12 @@ static inline int kvm_vcpu_get_condition(const struct kvm_vcpu *vcpu)
return -1;
}
-static inline unsigned long kvm_vcpu_get_hfar(const struct kvm_vcpu *vcpu)
+static __always_inline unsigned long kvm_vcpu_get_hfar(const struct kvm_vcpu *vcpu)
{
return vcpu->arch.fault.far_el2;
}
-static inline phys_addr_t kvm_vcpu_get_fault_ipa(const struct kvm_vcpu *vcpu)
+static __always_inline phys_addr_t kvm_vcpu_get_fault_ipa(const struct kvm_vcpu *vcpu)
{
return ((phys_addr_t)vcpu->arch.fault.hpfar_el2 & HPFAR_MASK) << 8;
}
@@ -265,59 +325,64 @@ static inline u64 kvm_vcpu_get_disr(const struct kvm_vcpu *vcpu)
static inline u32 kvm_vcpu_hvc_get_imm(const struct kvm_vcpu *vcpu)
{
- return kvm_vcpu_get_hsr(vcpu) & ESR_ELx_xVC_IMM_MASK;
+ return kvm_vcpu_get_esr(vcpu) & ESR_ELx_xVC_IMM_MASK;
}
-static inline bool kvm_vcpu_dabt_isvalid(const struct kvm_vcpu *vcpu)
+static __always_inline bool kvm_vcpu_dabt_isvalid(const struct kvm_vcpu *vcpu)
{
- return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_ISV);
+ return !!(kvm_vcpu_get_esr(vcpu) & ESR_ELx_ISV);
}
static inline unsigned long kvm_vcpu_dabt_iss_nisv_sanitized(const struct kvm_vcpu *vcpu)
{
- return kvm_vcpu_get_hsr(vcpu) & (ESR_ELx_CM | ESR_ELx_WNR | ESR_ELx_FSC);
+ return kvm_vcpu_get_esr(vcpu) & (ESR_ELx_CM | ESR_ELx_WNR | ESR_ELx_FSC);
}
static inline bool kvm_vcpu_dabt_issext(const struct kvm_vcpu *vcpu)
{
- return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_SSE);
+ return !!(kvm_vcpu_get_esr(vcpu) & ESR_ELx_SSE);
}
-static inline int kvm_vcpu_dabt_get_rd(const struct kvm_vcpu *vcpu)
+static inline bool kvm_vcpu_dabt_issf(const struct kvm_vcpu *vcpu)
{
- return (kvm_vcpu_get_hsr(vcpu) & ESR_ELx_SRT_MASK) >> ESR_ELx_SRT_SHIFT;
+ return !!(kvm_vcpu_get_esr(vcpu) & ESR_ELx_SF);
}
-static inline bool kvm_vcpu_dabt_iss1tw(const struct kvm_vcpu *vcpu)
+static __always_inline int kvm_vcpu_dabt_get_rd(const struct kvm_vcpu *vcpu)
{
- return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_S1PTW);
+ return (kvm_vcpu_get_esr(vcpu) & ESR_ELx_SRT_MASK) >> ESR_ELx_SRT_SHIFT;
}
-static inline bool kvm_vcpu_dabt_iswrite(const struct kvm_vcpu *vcpu)
+static __always_inline bool kvm_vcpu_abt_iss1tw(const struct kvm_vcpu *vcpu)
{
- return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_WNR) ||
- kvm_vcpu_dabt_iss1tw(vcpu); /* AF/DBM update */
+ return !!(kvm_vcpu_get_esr(vcpu) & ESR_ELx_S1PTW);
+}
+
+/* Always check for S1PTW *before* using this. */
+static __always_inline bool kvm_vcpu_dabt_iswrite(const struct kvm_vcpu *vcpu)
+{
+ return kvm_vcpu_get_esr(vcpu) & ESR_ELx_WNR;
}
static inline bool kvm_vcpu_dabt_is_cm(const struct kvm_vcpu *vcpu)
{
- return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_CM);
+ return !!(kvm_vcpu_get_esr(vcpu) & ESR_ELx_CM);
}
-static inline int kvm_vcpu_dabt_get_as(const struct kvm_vcpu *vcpu)
+static __always_inline unsigned int kvm_vcpu_dabt_get_as(const struct kvm_vcpu *vcpu)
{
- return 1 << ((kvm_vcpu_get_hsr(vcpu) & ESR_ELx_SAS) >> ESR_ELx_SAS_SHIFT);
+ return 1 << ((kvm_vcpu_get_esr(vcpu) & ESR_ELx_SAS) >> ESR_ELx_SAS_SHIFT);
}
/* This one is not specific to Data Abort */
-static inline bool kvm_vcpu_trap_il_is32bit(const struct kvm_vcpu *vcpu)
+static __always_inline bool kvm_vcpu_trap_il_is32bit(const struct kvm_vcpu *vcpu)
{
- return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_IL);
+ return !!(kvm_vcpu_get_esr(vcpu) & ESR_ELx_IL);
}
-static inline u8 kvm_vcpu_trap_get_class(const struct kvm_vcpu *vcpu)
+static __always_inline u8 kvm_vcpu_trap_get_class(const struct kvm_vcpu *vcpu)
{
- return ESR_ELx_EC(kvm_vcpu_get_hsr(vcpu));
+ return ESR_ELx_EC(kvm_vcpu_get_esr(vcpu));
}
static inline bool kvm_vcpu_trap_is_iabt(const struct kvm_vcpu *vcpu)
@@ -325,43 +390,80 @@ static inline bool kvm_vcpu_trap_is_iabt(const struct kvm_vcpu *vcpu)
return kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_IABT_LOW;
}
-static inline u8 kvm_vcpu_trap_get_fault(const struct kvm_vcpu *vcpu)
+static inline bool kvm_vcpu_trap_is_exec_fault(const struct kvm_vcpu *vcpu)
{
- return kvm_vcpu_get_hsr(vcpu) & ESR_ELx_FSC;
+ return kvm_vcpu_trap_is_iabt(vcpu) && !kvm_vcpu_abt_iss1tw(vcpu);
}
-static inline u8 kvm_vcpu_trap_get_fault_type(const struct kvm_vcpu *vcpu)
+static __always_inline u8 kvm_vcpu_trap_get_fault(const struct kvm_vcpu *vcpu)
{
- return kvm_vcpu_get_hsr(vcpu) & ESR_ELx_FSC_TYPE;
+ return kvm_vcpu_get_esr(vcpu) & ESR_ELx_FSC;
}
-static inline bool kvm_vcpu_dabt_isextabt(const struct kvm_vcpu *vcpu)
+static inline
+bool kvm_vcpu_trap_is_permission_fault(const struct kvm_vcpu *vcpu)
+{
+ return esr_fsc_is_permission_fault(kvm_vcpu_get_esr(vcpu));
+}
+
+static inline
+bool kvm_vcpu_trap_is_translation_fault(const struct kvm_vcpu *vcpu)
+{
+ return esr_fsc_is_translation_fault(kvm_vcpu_get_esr(vcpu));
+}
+
+static inline
+u64 kvm_vcpu_trap_get_perm_fault_granule(const struct kvm_vcpu *vcpu)
+{
+ unsigned long esr = kvm_vcpu_get_esr(vcpu);
+
+ BUG_ON(!esr_fsc_is_permission_fault(esr));
+ return BIT(ARM64_HW_PGTABLE_LEVEL_SHIFT(esr & ESR_ELx_FSC_LEVEL));
+}
+
+static __always_inline bool kvm_vcpu_abt_issea(const struct kvm_vcpu *vcpu)
{
switch (kvm_vcpu_trap_get_fault(vcpu)) {
- case FSC_SEA:
- case FSC_SEA_TTW0:
- case FSC_SEA_TTW1:
- case FSC_SEA_TTW2:
- case FSC_SEA_TTW3:
- case FSC_SECC:
- case FSC_SECC_TTW0:
- case FSC_SECC_TTW1:
- case FSC_SECC_TTW2:
- case FSC_SECC_TTW3:
+ case ESR_ELx_FSC_EXTABT:
+ case ESR_ELx_FSC_SEA_TTW0:
+ case ESR_ELx_FSC_SEA_TTW1:
+ case ESR_ELx_FSC_SEA_TTW2:
+ case ESR_ELx_FSC_SEA_TTW3:
+ case ESR_ELx_FSC_SECC:
+ case ESR_ELx_FSC_SECC_TTW0:
+ case ESR_ELx_FSC_SECC_TTW1:
+ case ESR_ELx_FSC_SECC_TTW2:
+ case ESR_ELx_FSC_SECC_TTW3:
return true;
default:
return false;
}
}
-static inline int kvm_vcpu_sys_get_rt(struct kvm_vcpu *vcpu)
+static __always_inline int kvm_vcpu_sys_get_rt(struct kvm_vcpu *vcpu)
{
- u32 esr = kvm_vcpu_get_hsr(vcpu);
+ u64 esr = kvm_vcpu_get_esr(vcpu);
return ESR_ELx_SYS64_ISS_RT(esr);
}
static inline bool kvm_is_write_fault(struct kvm_vcpu *vcpu)
{
+ if (kvm_vcpu_abt_iss1tw(vcpu)) {
+ /*
+ * Only a permission fault on a S1PTW should be
+ * considered as a write. Otherwise, page tables baked
+ * in a read-only memslot will result in an exception
+ * being delivered in the guest.
+ *
+ * The drawback is that we end-up faulting twice if the
+ * guest is using any of HW AF/DB: a translation fault
+ * to map the page containing the PT (read only at
+ * first), then a permission fault to allow the flags
+ * to be set.
+ */
+ return kvm_vcpu_trap_is_permission_fault(vcpu);
+ }
+
if (kvm_vcpu_trap_is_iabt(vcpu))
return false;
@@ -370,21 +472,7 @@ static inline bool kvm_is_write_fault(struct kvm_vcpu *vcpu)
static inline unsigned long kvm_vcpu_get_mpidr_aff(struct kvm_vcpu *vcpu)
{
- return vcpu_read_sys_reg(vcpu, MPIDR_EL1) & MPIDR_HWID_BITMASK;
-}
-
-static inline bool kvm_arm_get_vcpu_workaround_2_flag(struct kvm_vcpu *vcpu)
-{
- return vcpu->arch.workaround_flags & VCPU_WORKAROUND_2_FLAG;
-}
-
-static inline void kvm_arm_set_vcpu_workaround_2_flag(struct kvm_vcpu *vcpu,
- bool flag)
-{
- if (flag)
- vcpu->arch.workaround_flags |= VCPU_WORKAROUND_2_FLAG;
- else
- vcpu->arch.workaround_flags &= ~VCPU_WORKAROUND_2_FLAG;
+ return __vcpu_sys_reg(vcpu, MPIDR_EL1) & MPIDR_HWID_BITMASK;
}
static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu)
@@ -393,7 +481,7 @@ static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu)
*vcpu_cpsr(vcpu) |= PSR_AA32_E_BIT;
} else {
u64 sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL1);
- sctlr |= (1 << 25);
+ sctlr |= SCTLR_ELx_EE;
vcpu_write_sys_reg(vcpu, sctlr, SCTLR_EL1);
}
}
@@ -403,7 +491,10 @@ static inline bool kvm_vcpu_is_be(struct kvm_vcpu *vcpu)
if (vcpu_mode_is_32bit(vcpu))
return !!(*vcpu_cpsr(vcpu) & PSR_AA32_E_BIT);
- return !!(vcpu_read_sys_reg(vcpu, SCTLR_EL1) & (1 << 25));
+ if (vcpu_mode_priv(vcpu))
+ return !!(vcpu_read_sys_reg(vcpu, SCTLR_EL1) & SCTLR_ELx_EE);
+ else
+ return !!(vcpu_read_sys_reg(vcpu, SCTLR_EL1) & SCTLR_EL1_E0E);
}
static inline unsigned long vcpu_data_guest_to_host(struct kvm_vcpu *vcpu,
@@ -468,30 +559,61 @@ static inline unsigned long vcpu_data_host_to_guest(struct kvm_vcpu *vcpu,
return data; /* Leave LE untouched */
}
-static inline void kvm_skip_instr(struct kvm_vcpu *vcpu, bool is_wide_instr)
+static __always_inline void kvm_incr_pc(struct kvm_vcpu *vcpu)
{
- if (vcpu_mode_is_32bit(vcpu))
- kvm_skip_instr32(vcpu, is_wide_instr);
- else
- *vcpu_pc(vcpu) += 4;
+ WARN_ON(vcpu_get_flag(vcpu, PENDING_EXCEPTION));
+ vcpu_set_flag(vcpu, INCREMENT_PC);
+}
+
+#define kvm_pend_exception(v, e) \
+ do { \
+ WARN_ON(vcpu_get_flag((v), INCREMENT_PC)); \
+ vcpu_set_flag((v), PENDING_EXCEPTION); \
+ vcpu_set_flag((v), e); \
+ } while (0)
- /* advance the singlestep state machine */
- *vcpu_cpsr(vcpu) &= ~DBG_SPSR_SS;
+static __always_inline void kvm_write_cptr_el2(u64 val)
+{
+ if (has_vhe() || has_hvhe())
+ write_sysreg(val, cpacr_el1);
+ else
+ write_sysreg(val, cptr_el2);
}
-/*
- * Skip an instruction which has been emulated at hyp while most guest sysregs
- * are live.
- */
-static inline void __hyp_text __kvm_skip_instr(struct kvm_vcpu *vcpu)
+static __always_inline u64 kvm_get_reset_cptr_el2(struct kvm_vcpu *vcpu)
{
- *vcpu_pc(vcpu) = read_sysreg_el2(SYS_ELR);
- vcpu->arch.ctxt.gp_regs.regs.pstate = read_sysreg_el2(SYS_SPSR);
+ u64 val;
- kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
+ if (has_vhe()) {
+ val = (CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN |
+ CPACR_EL1_ZEN_EL1EN);
+ if (cpus_have_final_cap(ARM64_SME))
+ val |= CPACR_EL1_SMEN_EL1EN;
+ } else if (has_hvhe()) {
+ val = (CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN);
- write_sysreg_el2(vcpu->arch.ctxt.gp_regs.regs.pstate, SYS_SPSR);
- write_sysreg_el2(*vcpu_pc(vcpu), SYS_ELR);
+ if (!vcpu_has_sve(vcpu) ||
+ (vcpu->arch.fp_state != FP_STATE_GUEST_OWNED))
+ val |= CPACR_EL1_ZEN_EL1EN | CPACR_EL1_ZEN_EL0EN;
+ if (cpus_have_final_cap(ARM64_SME))
+ val |= CPACR_EL1_SMEN_EL1EN | CPACR_EL1_SMEN_EL0EN;
+ } else {
+ val = CPTR_NVHE_EL2_RES1;
+
+ if (vcpu_has_sve(vcpu) &&
+ (vcpu->arch.fp_state == FP_STATE_GUEST_OWNED))
+ val |= CPTR_EL2_TZ;
+ if (cpus_have_final_cap(ARM64_SME))
+ val &= ~CPTR_EL2_TSM;
+ }
+
+ return val;
}
+static __always_inline void kvm_reset_cptr_el2(struct kvm_vcpu *vcpu)
+{
+ u64 val = kvm_get_reset_cptr_el2(vcpu);
+
+ kvm_write_cptr_el2(val);
+}
#endif /* __ARM64_KVM_EMULATE_H__ */
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index c61260cf63c5..21c57b812569 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -11,11 +11,14 @@
#ifndef __ARM64_KVM_HOST_H__
#define __ARM64_KVM_HOST_H__
+#include <linux/arm-smccc.h>
#include <linux/bitmap.h>
#include <linux/types.h>
#include <linux/jump_label.h>
#include <linux/kvm_types.h>
+#include <linux/maple_tree.h>
#include <linux/percpu.h>
+#include <linux/psci.h>
#include <asm/arch_gicv3.h>
#include <asm/barrier.h>
#include <asm/cpufeature.h>
@@ -24,12 +27,10 @@
#include <asm/fpsimd.h>
#include <asm/kvm.h>
#include <asm/kvm_asm.h>
-#include <asm/kvm_mmio.h>
-#include <asm/thread_info.h>
+#include <asm/vncr_mapping.h>
#define __KVM_HAVE_ARCH_INTC_INITIALIZED
-#define KVM_USER_MEM_SLOTS 512
#define KVM_HALT_POLL_NS_DEFAULT 500000
#include <kvm/arm_vgic.h>
@@ -39,109 +40,321 @@
#define KVM_MAX_VCPUS VGIC_V3_MAX_CPUS
#define KVM_VCPU_MAX_FEATURES 7
+#define KVM_VCPU_VALID_FEATURES (BIT(KVM_VCPU_MAX_FEATURES) - 1)
#define KVM_REQ_SLEEP \
KVM_ARCH_REQ_FLAGS(0, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
#define KVM_REQ_IRQ_PENDING KVM_ARCH_REQ(1)
#define KVM_REQ_VCPU_RESET KVM_ARCH_REQ(2)
#define KVM_REQ_RECORD_STEAL KVM_ARCH_REQ(3)
+#define KVM_REQ_RELOAD_GICv4 KVM_ARCH_REQ(4)
+#define KVM_REQ_RELOAD_PMU KVM_ARCH_REQ(5)
+#define KVM_REQ_SUSPEND KVM_ARCH_REQ(6)
+#define KVM_REQ_RESYNC_PMU_EL0 KVM_ARCH_REQ(7)
+
+#define KVM_DIRTY_LOG_MANUAL_CAPS (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | \
+ KVM_DIRTY_LOG_INITIALLY_SET)
+
+#define KVM_HAVE_MMU_RWLOCK
+
+/*
+ * Mode of operation configurable with kvm-arm.mode early param.
+ * See Documentation/admin-guide/kernel-parameters.txt for more information.
+ */
+enum kvm_mode {
+ KVM_MODE_DEFAULT,
+ KVM_MODE_PROTECTED,
+ KVM_MODE_NV,
+ KVM_MODE_NONE,
+};
+#ifdef CONFIG_KVM
+enum kvm_mode kvm_get_mode(void);
+#else
+static inline enum kvm_mode kvm_get_mode(void) { return KVM_MODE_NONE; };
+#endif
DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use);
-extern unsigned int kvm_sve_max_vl;
-int kvm_arm_init_sve(void);
+extern unsigned int __ro_after_init kvm_sve_max_vl;
+int __init kvm_arm_init_sve(void);
+
+u32 __attribute_const__ kvm_target_cpu(void);
+void kvm_reset_vcpu(struct kvm_vcpu *vcpu);
+void kvm_arm_vcpu_destroy(struct kvm_vcpu *vcpu);
+
+struct kvm_hyp_memcache {
+ phys_addr_t head;
+ unsigned long nr_pages;
+};
+
+static inline void push_hyp_memcache(struct kvm_hyp_memcache *mc,
+ phys_addr_t *p,
+ phys_addr_t (*to_pa)(void *virt))
+{
+ *p = mc->head;
+ mc->head = to_pa(p);
+ mc->nr_pages++;
+}
+
+static inline void *pop_hyp_memcache(struct kvm_hyp_memcache *mc,
+ void *(*to_va)(phys_addr_t phys))
+{
+ phys_addr_t *p = to_va(mc->head);
-int __attribute_const__ kvm_target_cpu(void);
-int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
-void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu);
-int kvm_arch_vm_ioctl_check_extension(struct kvm *kvm, long ext);
-void __extended_idmap_trampoline(phys_addr_t boot_pgd, phys_addr_t idmap_start);
+ if (!mc->nr_pages)
+ return NULL;
+
+ mc->head = *p;
+ mc->nr_pages--;
+
+ return p;
+}
+
+static inline int __topup_hyp_memcache(struct kvm_hyp_memcache *mc,
+ unsigned long min_pages,
+ void *(*alloc_fn)(void *arg),
+ phys_addr_t (*to_pa)(void *virt),
+ void *arg)
+{
+ while (mc->nr_pages < min_pages) {
+ phys_addr_t *p = alloc_fn(arg);
+
+ if (!p)
+ return -ENOMEM;
+ push_hyp_memcache(mc, p, to_pa);
+ }
+
+ return 0;
+}
+
+static inline void __free_hyp_memcache(struct kvm_hyp_memcache *mc,
+ void (*free_fn)(void *virt, void *arg),
+ void *(*to_va)(phys_addr_t phys),
+ void *arg)
+{
+ while (mc->nr_pages)
+ free_fn(pop_hyp_memcache(mc, to_va), arg);
+}
+
+void free_hyp_memcache(struct kvm_hyp_memcache *mc);
+int topup_hyp_memcache(struct kvm_hyp_memcache *mc, unsigned long min_pages);
struct kvm_vmid {
- /* The VMID generation used for the virt. memory system */
- u64 vmid_gen;
- u32 vmid;
+ atomic64_t id;
};
-struct kvm_arch {
+struct kvm_s2_mmu {
struct kvm_vmid vmid;
- /* stage2 entry level table */
- pgd_t *pgd;
- phys_addr_t pgd_phys;
+ /*
+ * stage2 entry level table
+ *
+ * Two kvm_s2_mmu structures in the same VM can point to the same
+ * pgd here. This happens when running a guest using a
+ * translation regime that isn't affected by its own stage-2
+ * translation, such as a non-VHE hypervisor running at vEL2, or
+ * for vEL1/EL0 with vHCR_EL2.VM == 0. In that case, we use the
+ * canonical stage-2 page tables.
+ */
+ phys_addr_t pgd_phys;
+ struct kvm_pgtable *pgt;
- /* VTCR_EL2 value for this VM */
- u64 vtcr;
+ /*
+ * VTCR value used on the host. For a non-NV guest (or a NV
+ * guest that runs in a context where its own S2 doesn't
+ * apply), its T0SZ value reflects that of the IPA size.
+ *
+ * For a shadow S2 MMU, T0SZ reflects the PARange exposed to
+ * the guest.
+ */
+ u64 vtcr;
/* The last vcpu id that ran on each physical CPU */
int __percpu *last_vcpu_ran;
- /* The maximum number of vCPUs depends on the used GIC model */
- int max_vcpus;
+#define KVM_ARM_EAGER_SPLIT_CHUNK_SIZE_DEFAULT 0
+ /*
+ * Memory cache used to split
+ * KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE worth of huge pages. It
+ * is used to allocate stage2 page tables while splitting huge
+ * pages. The choice of KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE
+ * influences both the capacity of the split page cache, and
+ * how often KVM reschedules. Be wary of raising CHUNK_SIZE
+ * too high.
+ *
+ * Protected by kvm->slots_lock.
+ */
+ struct kvm_mmu_memory_cache split_page_cache;
+ uint64_t split_page_chunk_size;
+
+ struct kvm_arch *arch;
+};
+
+struct kvm_arch_memory_slot {
+};
+
+/**
+ * struct kvm_smccc_features: Descriptor of the hypercall services exposed to the guests
+ *
+ * @std_bmap: Bitmap of standard secure service calls
+ * @std_hyp_bmap: Bitmap of standard hypervisor service calls
+ * @vendor_hyp_bmap: Bitmap of vendor specific hypervisor service calls
+ */
+struct kvm_smccc_features {
+ unsigned long std_bmap;
+ unsigned long std_hyp_bmap;
+ unsigned long vendor_hyp_bmap;
+};
+
+typedef unsigned int pkvm_handle_t;
+
+struct kvm_protected_vm {
+ pkvm_handle_t handle;
+ struct kvm_hyp_memcache teardown_mc;
+};
+
+struct kvm_mpidr_data {
+ u64 mpidr_mask;
+ DECLARE_FLEX_ARRAY(u16, cmpidr_to_idx);
+};
+
+static inline u16 kvm_mpidr_index(struct kvm_mpidr_data *data, u64 mpidr)
+{
+ unsigned long mask = data->mpidr_mask;
+ u64 aff = mpidr & MPIDR_HWID_BITMASK;
+ int nbits, bit, bit_idx = 0;
+ u16 index = 0;
+
+ /*
+ * If this looks like RISC-V's BEXT or x86's PEXT
+ * instructions, it isn't by accident.
+ */
+ nbits = fls(mask);
+ for_each_set_bit(bit, &mask, nbits) {
+ index |= (aff & BIT(bit)) >> (bit - bit_idx);
+ bit_idx++;
+ }
+
+ return index;
+}
+
+struct kvm_arch {
+ struct kvm_s2_mmu mmu;
/* Interrupt controller */
struct vgic_dist vgic;
+ /* Timers */
+ struct arch_timer_vm_data timer_data;
+
/* Mandated version of PSCI */
u32 psci_version;
+ /* Protects VM-scoped configuration data */
+ struct mutex config_lock;
+
/*
* If we encounter a data abort without valid instruction syndrome
* information, report this to user space. User space can (and
* should) opt in to this feature if KVM_CAP_ARM_NISV_TO_USER is
* supported.
*/
- bool return_nisv_io_abort_to_user;
-};
+#define KVM_ARCH_FLAG_RETURN_NISV_IO_ABORT_TO_USER 0
+ /* Memory Tagging Extension enabled for the guest */
+#define KVM_ARCH_FLAG_MTE_ENABLED 1
+ /* At least one vCPU has ran in the VM */
+#define KVM_ARCH_FLAG_HAS_RAN_ONCE 2
+ /* The vCPU feature set for the VM is configured */
+#define KVM_ARCH_FLAG_VCPU_FEATURES_CONFIGURED 3
+ /* PSCI SYSTEM_SUSPEND enabled for the guest */
+#define KVM_ARCH_FLAG_SYSTEM_SUSPEND_ENABLED 4
+ /* VM counter offset */
+#define KVM_ARCH_FLAG_VM_COUNTER_OFFSET 5
+ /* Timer PPIs made immutable */
+#define KVM_ARCH_FLAG_TIMER_PPIS_IMMUTABLE 6
+ /* Initial ID reg values loaded */
+#define KVM_ARCH_FLAG_ID_REGS_INITIALIZED 7
+ unsigned long flags;
+
+ /* VM-wide vCPU feature set */
+ DECLARE_BITMAP(vcpu_features, KVM_VCPU_MAX_FEATURES);
+
+ /* MPIDR to vcpu index mapping, optional */
+ struct kvm_mpidr_data *mpidr_data;
-#define KVM_NR_MEM_OBJS 40
+ /*
+ * VM-wide PMU filter, implemented as a bitmap and big enough for
+ * up to 2^10 events (ARMv8.0) or 2^16 events (ARMv8.1+).
+ */
+ unsigned long *pmu_filter;
+ struct arm_pmu *arm_pmu;
-/*
- * We don't want allocation failures within the mmu code, so we preallocate
- * enough memory for a single page fault in a cache.
- */
-struct kvm_mmu_memory_cache {
- int nobjs;
- void *objects[KVM_NR_MEM_OBJS];
+ cpumask_var_t supported_cpus;
+
+ /* PMCR_EL0.N value for the guest */
+ u8 pmcr_n;
+
+ /* Hypercall features firmware registers' descriptor */
+ struct kvm_smccc_features smccc_feat;
+ struct maple_tree smccc_filter;
+
+ /*
+ * Emulated CPU ID registers per VM
+ * (Op0, Op1, CRn, CRm, Op2) of the ID registers to be saved in it
+ * is (3, 0, 0, crm, op2), where 1<=crm<8, 0<=op2<8.
+ *
+ * These emulated idregs are VM-wide, but accessed from the context of a vCPU.
+ * Atomic access to multiple idregs are guarded by kvm_arch.config_lock.
+ */
+#define IDREG_IDX(id) (((sys_reg_CRm(id) - 1) << 3) | sys_reg_Op2(id))
+#define IDX_IDREG(idx) sys_reg(3, 0, 0, ((idx) >> 3) + 1, (idx) & Op2_mask)
+#define IDREG(kvm, id) ((kvm)->arch.id_regs[IDREG_IDX(id)])
+#define KVM_ARM_ID_REG_NUM (IDREG_IDX(sys_reg(3, 0, 0, 7, 7)) + 1)
+ u64 id_regs[KVM_ARM_ID_REG_NUM];
+
+ /*
+ * For an untrusted host VM, 'pkvm.handle' is used to lookup
+ * the associated pKVM instance in the hypervisor.
+ */
+ struct kvm_protected_vm pkvm;
};
struct kvm_vcpu_fault_info {
- u32 esr_el2; /* Hyp Syndrom Register */
+ u64 esr_el2; /* Hyp Syndrom Register */
u64 far_el2; /* Hyp Fault Address Register */
u64 hpfar_el2; /* Hyp IPA Fault Address Register */
u64 disr_el1; /* Deferred [SError] Status Register */
};
/*
- * 0 is reserved as an invalid value.
- * Order should be kept in sync with the save/restore code.
+ * VNCR() just places the VNCR_capable registers in the enum after
+ * __VNCR_START__, and the value (after correction) to be an 8-byte offset
+ * from the VNCR base. As we don't require the enum to be otherwise ordered,
+ * we need the terrible hack below to ensure that we correctly size the
+ * sys_regs array, no matter what.
+ *
+ * The __MAX__ macro has been lifted from Sean Eron Anderson's wonderful
+ * treasure trove of bit hacks:
+ * https://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax
*/
+#define __MAX__(x,y) ((x) ^ (((x) ^ (y)) & -((x) < (y))))
+#define VNCR(r) \
+ __before_##r, \
+ r = __VNCR_START__ + ((VNCR_ ## r) / 8), \
+ __after_##r = __MAX__(__before_##r - 1, r)
+
enum vcpu_sysreg {
- __INVALID_SYSREG__,
+ __INVALID_SYSREG__, /* 0 is reserved as an invalid value */
MPIDR_EL1, /* MultiProcessor Affinity Register */
+ CLIDR_EL1, /* Cache Level ID Register */
CSSELR_EL1, /* Cache Size Selection Register */
- SCTLR_EL1, /* System Control Register */
- ACTLR_EL1, /* Auxiliary Control Register */
- CPACR_EL1, /* Coprocessor Access Control */
- ZCR_EL1, /* SVE Control */
- TTBR0_EL1, /* Translation Table Base Register 0 */
- TTBR1_EL1, /* Translation Table Base Register 1 */
- TCR_EL1, /* Translation Control Register */
- ESR_EL1, /* Exception Syndrome Register */
- AFSR0_EL1, /* Auxiliary Fault Status Register 0 */
- AFSR1_EL1, /* Auxiliary Fault Status Register 1 */
- FAR_EL1, /* Fault Address Register */
- MAIR_EL1, /* Memory Attribute Indirection Register */
- VBAR_EL1, /* Vector Base Address Register */
- CONTEXTIDR_EL1, /* Context ID Register */
TPIDR_EL0, /* Thread ID, User R/W */
TPIDRRO_EL0, /* Thread ID, User R/O */
TPIDR_EL1, /* Thread ID, Privileged */
- AMAIR_EL1, /* Aux Memory Attribute Indirection Register */
CNTKCTL_EL1, /* Timer Control Register (EL1) */
PAR_EL1, /* Physical Address Register */
- MDSCR_EL1, /* Monitor Debug System Control Register */
MDCCINT_EL1, /* Monitor Debug Comms Channel Interrupt Enable Reg */
+ OSLSR_EL1, /* OS Lock Status Register */
DISR_EL1, /* Deferred Interrupt Status Register */
/* Performance Monitors Registers */
@@ -156,7 +369,6 @@ enum vcpu_sysreg {
PMCNTENSET_EL0, /* Count Enable Set Register */
PMINTENSET_EL1, /* Interrupt Enable Set Register */
PMOVSSET_EL0, /* Overflow Flag Status Set Register */
- PMSWINC_EL0, /* Software Increment Register */
PMUSERENR_EL0, /* User Enable Register */
/* Pointer Authentication Registers in a strict increasing order. */
@@ -171,77 +383,141 @@ enum vcpu_sysreg {
APGAKEYLO_EL1,
APGAKEYHI_EL1,
- /* 32bit specific registers. Keep them at the end of the range */
+ /* Memory Tagging Extension registers */
+ RGSR_EL1, /* Random Allocation Tag Seed Register */
+ GCR_EL1, /* Tag Control Register */
+ TFSRE0_EL1, /* Tag Fault Status Register (EL0) */
+
+ /* 32bit specific registers. */
DACR32_EL2, /* Domain Access Control Register */
IFSR32_EL2, /* Instruction Fault Status Register */
FPEXC32_EL2, /* Floating-Point Exception Control Register */
DBGVCR32_EL2, /* Debug Vector Catch Register */
+ /* EL2 registers */
+ SCTLR_EL2, /* System Control Register (EL2) */
+ ACTLR_EL2, /* Auxiliary Control Register (EL2) */
+ MDCR_EL2, /* Monitor Debug Configuration Register (EL2) */
+ CPTR_EL2, /* Architectural Feature Trap Register (EL2) */
+ HACR_EL2, /* Hypervisor Auxiliary Control Register */
+ TTBR0_EL2, /* Translation Table Base Register 0 (EL2) */
+ TTBR1_EL2, /* Translation Table Base Register 1 (EL2) */
+ TCR_EL2, /* Translation Control Register (EL2) */
+ SPSR_EL2, /* EL2 saved program status register */
+ ELR_EL2, /* EL2 exception link register */
+ AFSR0_EL2, /* Auxiliary Fault Status Register 0 (EL2) */
+ AFSR1_EL2, /* Auxiliary Fault Status Register 1 (EL2) */
+ ESR_EL2, /* Exception Syndrome Register (EL2) */
+ FAR_EL2, /* Fault Address Register (EL2) */
+ HPFAR_EL2, /* Hypervisor IPA Fault Address Register */
+ MAIR_EL2, /* Memory Attribute Indirection Register (EL2) */
+ AMAIR_EL2, /* Auxiliary Memory Attribute Indirection Register (EL2) */
+ VBAR_EL2, /* Vector Base Address Register (EL2) */
+ RVBAR_EL2, /* Reset Vector Base Address Register */
+ CONTEXTIDR_EL2, /* Context ID Register (EL2) */
+ CNTHCTL_EL2, /* Counter-timer Hypervisor Control register */
+ SP_EL2, /* EL2 Stack Pointer */
+ CNTHP_CTL_EL2,
+ CNTHP_CVAL_EL2,
+ CNTHV_CTL_EL2,
+ CNTHV_CVAL_EL2,
+
+ __VNCR_START__, /* Any VNCR-capable reg goes after this point */
+
+ VNCR(SCTLR_EL1),/* System Control Register */
+ VNCR(ACTLR_EL1),/* Auxiliary Control Register */
+ VNCR(CPACR_EL1),/* Coprocessor Access Control */
+ VNCR(ZCR_EL1), /* SVE Control */
+ VNCR(TTBR0_EL1),/* Translation Table Base Register 0 */
+ VNCR(TTBR1_EL1),/* Translation Table Base Register 1 */
+ VNCR(TCR_EL1), /* Translation Control Register */
+ VNCR(TCR2_EL1), /* Extended Translation Control Register */
+ VNCR(ESR_EL1), /* Exception Syndrome Register */
+ VNCR(AFSR0_EL1),/* Auxiliary Fault Status Register 0 */
+ VNCR(AFSR1_EL1),/* Auxiliary Fault Status Register 1 */
+ VNCR(FAR_EL1), /* Fault Address Register */
+ VNCR(MAIR_EL1), /* Memory Attribute Indirection Register */
+ VNCR(VBAR_EL1), /* Vector Base Address Register */
+ VNCR(CONTEXTIDR_EL1), /* Context ID Register */
+ VNCR(AMAIR_EL1),/* Aux Memory Attribute Indirection Register */
+ VNCR(MDSCR_EL1),/* Monitor Debug System Control Register */
+ VNCR(ELR_EL1),
+ VNCR(SP_EL1),
+ VNCR(SPSR_EL1),
+ VNCR(TFSR_EL1), /* Tag Fault Status Register (EL1) */
+ VNCR(VPIDR_EL2),/* Virtualization Processor ID Register */
+ VNCR(VMPIDR_EL2),/* Virtualization Multiprocessor ID Register */
+ VNCR(HCR_EL2), /* Hypervisor Configuration Register */
+ VNCR(HSTR_EL2), /* Hypervisor System Trap Register */
+ VNCR(VTTBR_EL2),/* Virtualization Translation Table Base Register */
+ VNCR(VTCR_EL2), /* Virtualization Translation Control Register */
+ VNCR(TPIDR_EL2),/* EL2 Software Thread ID Register */
+ VNCR(HCRX_EL2), /* Extended Hypervisor Configuration Register */
+
+ /* Permission Indirection Extension registers */
+ VNCR(PIR_EL1), /* Permission Indirection Register 1 (EL1) */
+ VNCR(PIRE0_EL1), /* Permission Indirection Register 0 (EL1) */
+
+ VNCR(HFGRTR_EL2),
+ VNCR(HFGWTR_EL2),
+ VNCR(HFGITR_EL2),
+ VNCR(HDFGRTR_EL2),
+ VNCR(HDFGWTR_EL2),
+ VNCR(HAFGRTR_EL2),
+
+ VNCR(CNTVOFF_EL2),
+ VNCR(CNTV_CVAL_EL0),
+ VNCR(CNTV_CTL_EL0),
+ VNCR(CNTP_CVAL_EL0),
+ VNCR(CNTP_CTL_EL0),
+
NR_SYS_REGS /* Nothing after this line! */
};
-/* 32bit mapping */
-#define c0_MPIDR (MPIDR_EL1 * 2) /* MultiProcessor ID Register */
-#define c0_CSSELR (CSSELR_EL1 * 2)/* Cache Size Selection Register */
-#define c1_SCTLR (SCTLR_EL1 * 2) /* System Control Register */
-#define c1_ACTLR (ACTLR_EL1 * 2) /* Auxiliary Control Register */
-#define c1_CPACR (CPACR_EL1 * 2) /* Coprocessor Access Control */
-#define c2_TTBR0 (TTBR0_EL1 * 2) /* Translation Table Base Register 0 */
-#define c2_TTBR0_high (c2_TTBR0 + 1) /* TTBR0 top 32 bits */
-#define c2_TTBR1 (TTBR1_EL1 * 2) /* Translation Table Base Register 1 */
-#define c2_TTBR1_high (c2_TTBR1 + 1) /* TTBR1 top 32 bits */
-#define c2_TTBCR (TCR_EL1 * 2) /* Translation Table Base Control R. */
-#define c3_DACR (DACR32_EL2 * 2)/* Domain Access Control Register */
-#define c5_DFSR (ESR_EL1 * 2) /* Data Fault Status Register */
-#define c5_IFSR (IFSR32_EL2 * 2)/* Instruction Fault Status Register */
-#define c5_ADFSR (AFSR0_EL1 * 2) /* Auxiliary Data Fault Status R */
-#define c5_AIFSR (AFSR1_EL1 * 2) /* Auxiliary Instr Fault Status R */
-#define c6_DFAR (FAR_EL1 * 2) /* Data Fault Address Register */
-#define c6_IFAR (c6_DFAR + 1) /* Instruction Fault Address Register */
-#define c7_PAR (PAR_EL1 * 2) /* Physical Address Register */
-#define c7_PAR_high (c7_PAR + 1) /* PAR top 32 bits */
-#define c10_PRRR (MAIR_EL1 * 2) /* Primary Region Remap Register */
-#define c10_NMRR (c10_PRRR + 1) /* Normal Memory Remap Register */
-#define c12_VBAR (VBAR_EL1 * 2) /* Vector Base Address Register */
-#define c13_CID (CONTEXTIDR_EL1 * 2) /* Context ID Register */
-#define c13_TID_URW (TPIDR_EL0 * 2) /* Thread ID, User R/W */
-#define c13_TID_URO (TPIDRRO_EL0 * 2)/* Thread ID, User R/O */
-#define c13_TID_PRIV (TPIDR_EL1 * 2) /* Thread ID, Privileged */
-#define c10_AMAIR0 (AMAIR_EL1 * 2) /* Aux Memory Attr Indirection Reg */
-#define c10_AMAIR1 (c10_AMAIR0 + 1)/* Aux Memory Attr Indirection Reg */
-#define c14_CNTKCTL (CNTKCTL_EL1 * 2) /* Timer Control Register (PL1) */
-
-#define cp14_DBGDSCRext (MDSCR_EL1 * 2)
-#define cp14_DBGBCR0 (DBGBCR0_EL1 * 2)
-#define cp14_DBGBVR0 (DBGBVR0_EL1 * 2)
-#define cp14_DBGBXVR0 (cp14_DBGBVR0 + 1)
-#define cp14_DBGWCR0 (DBGWCR0_EL1 * 2)
-#define cp14_DBGWVR0 (DBGWVR0_EL1 * 2)
-#define cp14_DBGDCCINT (MDCCINT_EL1 * 2)
-
-#define NR_COPRO_REGS (NR_SYS_REGS * 2)
-
struct kvm_cpu_context {
- struct kvm_regs gp_regs;
- union {
- u64 sys_regs[NR_SYS_REGS];
- u32 copro[NR_COPRO_REGS];
- };
+ struct user_pt_regs regs; /* sp = sp_el0 */
+
+ u64 spsr_abt;
+ u64 spsr_und;
+ u64 spsr_irq;
+ u64 spsr_fiq;
+
+ struct user_fpsimd_state fp_regs;
+
+ u64 sys_regs[NR_SYS_REGS];
struct kvm_vcpu *__hyp_running_vcpu;
-};
-struct kvm_pmu_events {
- u32 events_host;
- u32 events_guest;
+ /* This pointer has to be 4kB aligned. */
+ u64 *vncr_array;
};
struct kvm_host_data {
struct kvm_cpu_context host_ctxt;
- struct kvm_pmu_events pmu_events;
};
-typedef struct kvm_host_data kvm_host_data_t;
+struct kvm_host_psci_config {
+ /* PSCI version used by host. */
+ u32 version;
+ u32 smccc_version;
+
+ /* Function IDs used by host if version is v0.1. */
+ struct psci_0_1_function_ids function_ids_0_1;
+
+ bool psci_0_1_cpu_suspend_implemented;
+ bool psci_0_1_cpu_on_implemented;
+ bool psci_0_1_cpu_off_implemented;
+ bool psci_0_1_migrate_implemented;
+};
+
+extern struct kvm_host_psci_config kvm_nvhe_sym(kvm_host_psci_config);
+#define kvm_host_psci_config CHOOSE_NVHE_SYM(kvm_host_psci_config)
+
+extern s64 kvm_nvhe_sym(hyp_physvirt_offset);
+#define hyp_physvirt_offset CHOOSE_NVHE_SYM(hyp_physvirt_offset)
+
+extern u64 kvm_nvhe_sym(hyp_cpu_logical_map)[NR_CPUS];
+#define hyp_cpu_logical_map CHOOSE_NVHE_SYM(hyp_cpu_logical_map)
struct vcpu_reset_state {
unsigned long pc;
@@ -252,21 +528,60 @@ struct vcpu_reset_state {
struct kvm_vcpu_arch {
struct kvm_cpu_context ctxt;
+
+ /*
+ * Guest floating point state
+ *
+ * The architecture has two main floating point extensions,
+ * the original FPSIMD and SVE. These have overlapping
+ * register views, with the FPSIMD V registers occupying the
+ * low 128 bits of the SVE Z registers. When the core
+ * floating point code saves the register state of a task it
+ * records which view it saved in fp_type.
+ */
void *sve_state;
+ enum fp_type fp_type;
unsigned int sve_max_vl;
+ u64 svcr;
- /* HYP configuration */
+ /* Stage 2 paging state used by the hardware on next switch */
+ struct kvm_s2_mmu *hw_mmu;
+
+ /* Values of trap registers for the guest. */
u64 hcr_el2;
- u32 mdcr_el2;
+ u64 mdcr_el2;
+ u64 cptr_el2;
+
+ /* Values of trap registers for the host before guest entry. */
+ u64 mdcr_el2_host;
/* Exception Information */
struct kvm_vcpu_fault_info fault;
- /* State of various workarounds, see kvm_asm.h for bit assignment */
- u64 workaround_flags;
+ /* Ownership of the FP regs */
+ enum {
+ FP_STATE_FREE,
+ FP_STATE_HOST_OWNED,
+ FP_STATE_GUEST_OWNED,
+ } fp_state;
- /* Miscellaneous vcpu state flags */
- u64 flags;
+ /* Configuration flags, set once and for all before the vcpu can run */
+ u8 cflags;
+
+ /* Input flags to the hypervisor code, potentially cleared after use */
+ u8 iflags;
+
+ /* State flags for kernel bookkeeping, unused by the hypervisor code */
+ u8 sflags;
+
+ /*
+ * Don't run the guest (internal implementation need).
+ *
+ * Contrary to the flags above, this is set/cleared outside of
+ * a vcpu context, and thus cannot be mixed with the flags
+ * themselves (or the flag accesses need to be made atomic).
+ */
+ bool pause;
/*
* We maintain more than a single set of debug registers to support
@@ -285,17 +600,16 @@ struct kvm_vcpu_arch {
struct kvm_guest_debug_arch vcpu_debug_state;
struct kvm_guest_debug_arch external_debug_state;
- /* Pointer to host CPU context */
- struct kvm_cpu_context *host_cpu_context;
-
- struct thread_info *host_thread_info; /* hyp VA */
struct user_fpsimd_state *host_fpsimd_state; /* hyp VA */
+ struct task_struct *parent_task;
struct {
/* {Break,watch}point registers */
struct kvm_guest_debug_arch regs;
/* Statistical profiling extension */
u64 pmscr_el1;
+ /* Self-hosted trace */
+ u64 trfcr_el1;
} host_debug_state;
/* VGIC state */
@@ -304,11 +618,6 @@ struct kvm_vcpu_arch {
struct kvm_pmu pmu;
/*
- * Anything that is not used directly from assembly code goes
- * here.
- */
-
- /*
* Guest registers we preserve during guest debugging.
*
* These shadow registers are updated by the kvm_handle_sys_reg
@@ -317,48 +626,183 @@ struct kvm_vcpu_arch {
*/
struct {
u32 mdscr_el1;
+ bool pstate_ss;
} guest_debug_preserved;
- /* vcpu power-off state */
- bool power_off;
-
- /* Don't run the guest (internal implementation need) */
- bool pause;
-
- /* IO related fields */
- struct kvm_decode mmio_decode;
+ /* vcpu power state */
+ struct kvm_mp_state mp_state;
+ spinlock_t mp_state_lock;
/* Cache some mmu pages needed inside spinlock regions */
struct kvm_mmu_memory_cache mmu_page_cache;
- /* Target CPU and feature flags */
- int target;
- DECLARE_BITMAP(features, KVM_VCPU_MAX_FEATURES);
-
- /* Detect first run of a vcpu */
- bool has_run_once;
-
/* Virtual SError ESR to restore when HCR_EL2.VSE is set */
u64 vsesr_el2;
/* Additional reset state */
struct vcpu_reset_state reset_state;
- /* True when deferrable sysregs are loaded on the physical CPU,
- * see kvm_vcpu_load_sysregs and kvm_vcpu_put_sysregs. */
- bool sysregs_loaded_on_cpu;
-
/* Guest PV state */
struct {
- u64 steal;
u64 last_steal;
gpa_t base;
} steal;
+
+ /* Per-vcpu CCSIDR override or NULL */
+ u32 *ccsidr;
};
+/*
+ * Each 'flag' is composed of a comma-separated triplet:
+ *
+ * - the flag-set it belongs to in the vcpu->arch structure
+ * - the value for that flag
+ * - the mask for that flag
+ *
+ * __vcpu_single_flag() builds such a triplet for a single-bit flag.
+ * unpack_vcpu_flag() extract the flag value from the triplet for
+ * direct use outside of the flag accessors.
+ */
+#define __vcpu_single_flag(_set, _f) _set, (_f), (_f)
+
+#define __unpack_flag(_set, _f, _m) _f
+#define unpack_vcpu_flag(...) __unpack_flag(__VA_ARGS__)
+
+#define __build_check_flag(v, flagset, f, m) \
+ do { \
+ typeof(v->arch.flagset) *_fset; \
+ \
+ /* Check that the flags fit in the mask */ \
+ BUILD_BUG_ON(HWEIGHT(m) != HWEIGHT((f) | (m))); \
+ /* Check that the flags fit in the type */ \
+ BUILD_BUG_ON((sizeof(*_fset) * 8) <= __fls(m)); \
+ } while (0)
+
+#define __vcpu_get_flag(v, flagset, f, m) \
+ ({ \
+ __build_check_flag(v, flagset, f, m); \
+ \
+ READ_ONCE(v->arch.flagset) & (m); \
+ })
+
+/*
+ * Note that the set/clear accessors must be preempt-safe in order to
+ * avoid nesting them with load/put which also manipulate flags...
+ */
+#ifdef __KVM_NVHE_HYPERVISOR__
+/* the nVHE hypervisor is always non-preemptible */
+#define __vcpu_flags_preempt_disable()
+#define __vcpu_flags_preempt_enable()
+#else
+#define __vcpu_flags_preempt_disable() preempt_disable()
+#define __vcpu_flags_preempt_enable() preempt_enable()
+#endif
+
+#define __vcpu_set_flag(v, flagset, f, m) \
+ do { \
+ typeof(v->arch.flagset) *fset; \
+ \
+ __build_check_flag(v, flagset, f, m); \
+ \
+ fset = &v->arch.flagset; \
+ __vcpu_flags_preempt_disable(); \
+ if (HWEIGHT(m) > 1) \
+ *fset &= ~(m); \
+ *fset |= (f); \
+ __vcpu_flags_preempt_enable(); \
+ } while (0)
+
+#define __vcpu_clear_flag(v, flagset, f, m) \
+ do { \
+ typeof(v->arch.flagset) *fset; \
+ \
+ __build_check_flag(v, flagset, f, m); \
+ \
+ fset = &v->arch.flagset; \
+ __vcpu_flags_preempt_disable(); \
+ *fset &= ~(m); \
+ __vcpu_flags_preempt_enable(); \
+ } while (0)
+
+#define vcpu_get_flag(v, ...) __vcpu_get_flag((v), __VA_ARGS__)
+#define vcpu_set_flag(v, ...) __vcpu_set_flag((v), __VA_ARGS__)
+#define vcpu_clear_flag(v, ...) __vcpu_clear_flag((v), __VA_ARGS__)
+
+/* SVE exposed to guest */
+#define GUEST_HAS_SVE __vcpu_single_flag(cflags, BIT(0))
+/* SVE config completed */
+#define VCPU_SVE_FINALIZED __vcpu_single_flag(cflags, BIT(1))
+/* PTRAUTH exposed to guest */
+#define GUEST_HAS_PTRAUTH __vcpu_single_flag(cflags, BIT(2))
+/* KVM_ARM_VCPU_INIT completed */
+#define VCPU_INITIALIZED __vcpu_single_flag(cflags, BIT(3))
+
+/* Exception pending */
+#define PENDING_EXCEPTION __vcpu_single_flag(iflags, BIT(0))
+/*
+ * PC increment. Overlaps with EXCEPT_MASK on purpose so that it can't
+ * be set together with an exception...
+ */
+#define INCREMENT_PC __vcpu_single_flag(iflags, BIT(1))
+/* Target EL/MODE (not a single flag, but let's abuse the macro) */
+#define EXCEPT_MASK __vcpu_single_flag(iflags, GENMASK(3, 1))
+
+/* Helpers to encode exceptions with minimum fuss */
+#define __EXCEPT_MASK_VAL unpack_vcpu_flag(EXCEPT_MASK)
+#define __EXCEPT_SHIFT __builtin_ctzl(__EXCEPT_MASK_VAL)
+#define __vcpu_except_flags(_f) iflags, (_f << __EXCEPT_SHIFT), __EXCEPT_MASK_VAL
+
+/*
+ * When PENDING_EXCEPTION is set, EXCEPT_MASK can take the following
+ * values:
+ *
+ * For AArch32 EL1:
+ */
+#define EXCEPT_AA32_UND __vcpu_except_flags(0)
+#define EXCEPT_AA32_IABT __vcpu_except_flags(1)
+#define EXCEPT_AA32_DABT __vcpu_except_flags(2)
+/* For AArch64: */
+#define EXCEPT_AA64_EL1_SYNC __vcpu_except_flags(0)
+#define EXCEPT_AA64_EL1_IRQ __vcpu_except_flags(1)
+#define EXCEPT_AA64_EL1_FIQ __vcpu_except_flags(2)
+#define EXCEPT_AA64_EL1_SERR __vcpu_except_flags(3)
+/* For AArch64 with NV: */
+#define EXCEPT_AA64_EL2_SYNC __vcpu_except_flags(4)
+#define EXCEPT_AA64_EL2_IRQ __vcpu_except_flags(5)
+#define EXCEPT_AA64_EL2_FIQ __vcpu_except_flags(6)
+#define EXCEPT_AA64_EL2_SERR __vcpu_except_flags(7)
+/* Guest debug is live */
+#define DEBUG_DIRTY __vcpu_single_flag(iflags, BIT(4))
+/* Save SPE context if active */
+#define DEBUG_STATE_SAVE_SPE __vcpu_single_flag(iflags, BIT(5))
+/* Save TRBE context if active */
+#define DEBUG_STATE_SAVE_TRBE __vcpu_single_flag(iflags, BIT(6))
+/* vcpu running in HYP context */
+#define VCPU_HYP_CONTEXT __vcpu_single_flag(iflags, BIT(7))
+
+/* SVE enabled for host EL0 */
+#define HOST_SVE_ENABLED __vcpu_single_flag(sflags, BIT(0))
+/* SME enabled for EL0 */
+#define HOST_SME_ENABLED __vcpu_single_flag(sflags, BIT(1))
+/* Physical CPU not in supported_cpus */
+#define ON_UNSUPPORTED_CPU __vcpu_single_flag(sflags, BIT(2))
+/* WFIT instruction trapped */
+#define IN_WFIT __vcpu_single_flag(sflags, BIT(3))
+/* vcpu system registers loaded on physical CPU */
+#define SYSREGS_ON_CPU __vcpu_single_flag(sflags, BIT(4))
+/* Software step state is Active-pending */
+#define DBG_SS_ACTIVE_PENDING __vcpu_single_flag(sflags, BIT(5))
+/* PMUSERENR for the guest EL0 is on physical CPU */
+#define PMUSERENR_ON_CPU __vcpu_single_flag(sflags, BIT(6))
+/* WFI instruction trapped */
+#define IN_WFI __vcpu_single_flag(sflags, BIT(7))
+
+
/* Pointer to the vcpu's SVE FFR for sve_{save,load}_state() */
-#define vcpu_sve_pffr(vcpu) ((void *)((char *)((vcpu)->arch.sve_state) + \
- sve_ffr_offset((vcpu)->arch.sve_max_vl)))
+#define vcpu_sve_pffr(vcpu) (kern_hyp_va((vcpu)->arch.sve_state) + \
+ sve_ffr_offset((vcpu)->arch.sve_max_vl))
+
+#define vcpu_sve_max_vq(vcpu) sve_vq_from_vl((vcpu)->arch.sve_max_vl)
#define vcpu_sve_state_size(vcpu) ({ \
size_t __size_ret; \
@@ -367,91 +811,204 @@ struct kvm_vcpu_arch {
if (WARN_ON(!sve_vl_valid((vcpu)->arch.sve_max_vl))) { \
__size_ret = 0; \
} else { \
- __vcpu_vq = sve_vq_from_vl((vcpu)->arch.sve_max_vl); \
+ __vcpu_vq = vcpu_sve_max_vq(vcpu); \
__size_ret = SVE_SIG_REGS_SIZE(__vcpu_vq); \
} \
\
__size_ret; \
})
-/* vcpu_arch flags field values: */
-#define KVM_ARM64_DEBUG_DIRTY (1 << 0)
-#define KVM_ARM64_FP_ENABLED (1 << 1) /* guest FP regs loaded */
-#define KVM_ARM64_FP_HOST (1 << 2) /* host FP regs loaded */
-#define KVM_ARM64_HOST_SVE_IN_USE (1 << 3) /* backup for host TIF_SVE */
-#define KVM_ARM64_HOST_SVE_ENABLED (1 << 4) /* SVE enabled for EL0 */
-#define KVM_ARM64_GUEST_HAS_SVE (1 << 5) /* SVE exposed to guest */
-#define KVM_ARM64_VCPU_SVE_FINALIZED (1 << 6) /* SVE config completed */
-#define KVM_ARM64_GUEST_HAS_PTRAUTH (1 << 7) /* PTRAUTH exposed to guest */
+#define KVM_GUESTDBG_VALID_MASK (KVM_GUESTDBG_ENABLE | \
+ KVM_GUESTDBG_USE_SW_BP | \
+ KVM_GUESTDBG_USE_HW | \
+ KVM_GUESTDBG_SINGLESTEP)
+
+#define vcpu_has_sve(vcpu) (system_supports_sve() && \
+ vcpu_get_flag(vcpu, GUEST_HAS_SVE))
+
+#ifdef CONFIG_ARM64_PTR_AUTH
+#define vcpu_has_ptrauth(vcpu) \
+ ((cpus_have_final_cap(ARM64_HAS_ADDRESS_AUTH) || \
+ cpus_have_final_cap(ARM64_HAS_GENERIC_AUTH)) && \
+ vcpu_get_flag(vcpu, GUEST_HAS_PTRAUTH))
+#else
+#define vcpu_has_ptrauth(vcpu) false
+#endif
+
+#define vcpu_on_unsupported_cpu(vcpu) \
+ vcpu_get_flag(vcpu, ON_UNSUPPORTED_CPU)
-#define vcpu_has_sve(vcpu) (system_supports_sve() && \
- ((vcpu)->arch.flags & KVM_ARM64_GUEST_HAS_SVE))
+#define vcpu_set_on_unsupported_cpu(vcpu) \
+ vcpu_set_flag(vcpu, ON_UNSUPPORTED_CPU)
-#define vcpu_has_ptrauth(vcpu) ((system_supports_address_auth() || \
- system_supports_generic_auth()) && \
- ((vcpu)->arch.flags & KVM_ARM64_GUEST_HAS_PTRAUTH))
+#define vcpu_clear_on_unsupported_cpu(vcpu) \
+ vcpu_clear_flag(vcpu, ON_UNSUPPORTED_CPU)
-#define vcpu_gp_regs(v) (&(v)->arch.ctxt.gp_regs)
+#define vcpu_gp_regs(v) (&(v)->arch.ctxt.regs)
/*
- * Only use __vcpu_sys_reg if you know you want the memory backed version of a
- * register, and not the one most recently accessed by a running VCPU. For
- * example, for userspace access or for system registers that are never context
- * switched, but only emulated.
+ * Only use __vcpu_sys_reg/ctxt_sys_reg if you know you want the
+ * memory backed version of a register, and not the one most recently
+ * accessed by a running VCPU. For example, for userspace access or
+ * for system registers that are never context switched, but only
+ * emulated.
+ *
+ * Don't bother with VNCR-based accesses in the nVHE code, it has no
+ * business dealing with NV.
*/
-#define __vcpu_sys_reg(v,r) ((v)->arch.ctxt.sys_regs[(r)])
+static inline u64 *__ctxt_sys_reg(const struct kvm_cpu_context *ctxt, int r)
+{
+#if !defined (__KVM_NVHE_HYPERVISOR__)
+ if (unlikely(cpus_have_final_cap(ARM64_HAS_NESTED_VIRT) &&
+ r >= __VNCR_START__ && ctxt->vncr_array))
+ return &ctxt->vncr_array[r - __VNCR_START__];
+#endif
+ return (u64 *)&ctxt->sys_regs[r];
+}
+
+#define ctxt_sys_reg(c,r) (*__ctxt_sys_reg(c,r))
+
+#define __vcpu_sys_reg(v,r) (ctxt_sys_reg(&(v)->arch.ctxt, (r)))
u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg);
void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg);
-/*
- * CP14 and CP15 live in the same array, as they are backed by the
- * same system registers.
- */
-#define vcpu_cp14(v,r) ((v)->arch.ctxt.copro[(r)])
-#define vcpu_cp15(v,r) ((v)->arch.ctxt.copro[(r)])
+static inline bool __vcpu_read_sys_reg_from_cpu(int reg, u64 *val)
+{
+ /*
+ * *** VHE ONLY ***
+ *
+ * System registers listed in the switch are not saved on every
+ * exit from the guest but are only saved on vcpu_put.
+ *
+ * Note that MPIDR_EL1 for the guest is set by KVM via VMPIDR_EL2 but
+ * should never be listed below, because the guest cannot modify its
+ * own MPIDR_EL1 and MPIDR_EL1 is accessed for VCPU A from VCPU B's
+ * thread when emulating cross-VCPU communication.
+ */
+ if (!has_vhe())
+ return false;
+
+ switch (reg) {
+ case SCTLR_EL1: *val = read_sysreg_s(SYS_SCTLR_EL12); break;
+ case CPACR_EL1: *val = read_sysreg_s(SYS_CPACR_EL12); break;
+ case TTBR0_EL1: *val = read_sysreg_s(SYS_TTBR0_EL12); break;
+ case TTBR1_EL1: *val = read_sysreg_s(SYS_TTBR1_EL12); break;
+ case TCR_EL1: *val = read_sysreg_s(SYS_TCR_EL12); break;
+ case ESR_EL1: *val = read_sysreg_s(SYS_ESR_EL12); break;
+ case AFSR0_EL1: *val = read_sysreg_s(SYS_AFSR0_EL12); break;
+ case AFSR1_EL1: *val = read_sysreg_s(SYS_AFSR1_EL12); break;
+ case FAR_EL1: *val = read_sysreg_s(SYS_FAR_EL12); break;
+ case MAIR_EL1: *val = read_sysreg_s(SYS_MAIR_EL12); break;
+ case VBAR_EL1: *val = read_sysreg_s(SYS_VBAR_EL12); break;
+ case CONTEXTIDR_EL1: *val = read_sysreg_s(SYS_CONTEXTIDR_EL12);break;
+ case TPIDR_EL0: *val = read_sysreg_s(SYS_TPIDR_EL0); break;
+ case TPIDRRO_EL0: *val = read_sysreg_s(SYS_TPIDRRO_EL0); break;
+ case TPIDR_EL1: *val = read_sysreg_s(SYS_TPIDR_EL1); break;
+ case AMAIR_EL1: *val = read_sysreg_s(SYS_AMAIR_EL12); break;
+ case CNTKCTL_EL1: *val = read_sysreg_s(SYS_CNTKCTL_EL12); break;
+ case ELR_EL1: *val = read_sysreg_s(SYS_ELR_EL12); break;
+ case SPSR_EL1: *val = read_sysreg_s(SYS_SPSR_EL12); break;
+ case PAR_EL1: *val = read_sysreg_par(); break;
+ case DACR32_EL2: *val = read_sysreg_s(SYS_DACR32_EL2); break;
+ case IFSR32_EL2: *val = read_sysreg_s(SYS_IFSR32_EL2); break;
+ case DBGVCR32_EL2: *val = read_sysreg_s(SYS_DBGVCR32_EL2); break;
+ default: return false;
+ }
+
+ return true;
+}
+
+static inline bool __vcpu_write_sys_reg_to_cpu(u64 val, int reg)
+{
+ /*
+ * *** VHE ONLY ***
+ *
+ * System registers listed in the switch are not restored on every
+ * entry to the guest but are only restored on vcpu_load.
+ *
+ * Note that MPIDR_EL1 for the guest is set by KVM via VMPIDR_EL2 but
+ * should never be listed below, because the MPIDR should only be set
+ * once, before running the VCPU, and never changed later.
+ */
+ if (!has_vhe())
+ return false;
+
+ switch (reg) {
+ case SCTLR_EL1: write_sysreg_s(val, SYS_SCTLR_EL12); break;
+ case CPACR_EL1: write_sysreg_s(val, SYS_CPACR_EL12); break;
+ case TTBR0_EL1: write_sysreg_s(val, SYS_TTBR0_EL12); break;
+ case TTBR1_EL1: write_sysreg_s(val, SYS_TTBR1_EL12); break;
+ case TCR_EL1: write_sysreg_s(val, SYS_TCR_EL12); break;
+ case ESR_EL1: write_sysreg_s(val, SYS_ESR_EL12); break;
+ case AFSR0_EL1: write_sysreg_s(val, SYS_AFSR0_EL12); break;
+ case AFSR1_EL1: write_sysreg_s(val, SYS_AFSR1_EL12); break;
+ case FAR_EL1: write_sysreg_s(val, SYS_FAR_EL12); break;
+ case MAIR_EL1: write_sysreg_s(val, SYS_MAIR_EL12); break;
+ case VBAR_EL1: write_sysreg_s(val, SYS_VBAR_EL12); break;
+ case CONTEXTIDR_EL1: write_sysreg_s(val, SYS_CONTEXTIDR_EL12);break;
+ case TPIDR_EL0: write_sysreg_s(val, SYS_TPIDR_EL0); break;
+ case TPIDRRO_EL0: write_sysreg_s(val, SYS_TPIDRRO_EL0); break;
+ case TPIDR_EL1: write_sysreg_s(val, SYS_TPIDR_EL1); break;
+ case AMAIR_EL1: write_sysreg_s(val, SYS_AMAIR_EL12); break;
+ case CNTKCTL_EL1: write_sysreg_s(val, SYS_CNTKCTL_EL12); break;
+ case ELR_EL1: write_sysreg_s(val, SYS_ELR_EL12); break;
+ case SPSR_EL1: write_sysreg_s(val, SYS_SPSR_EL12); break;
+ case PAR_EL1: write_sysreg_s(val, SYS_PAR_EL1); break;
+ case DACR32_EL2: write_sysreg_s(val, SYS_DACR32_EL2); break;
+ case IFSR32_EL2: write_sysreg_s(val, SYS_IFSR32_EL2); break;
+ case DBGVCR32_EL2: write_sysreg_s(val, SYS_DBGVCR32_EL2); break;
+ default: return false;
+ }
+
+ return true;
+}
struct kvm_vm_stat {
- ulong remote_tlb_flush;
+ struct kvm_vm_stat_generic generic;
};
struct kvm_vcpu_stat {
- u64 halt_successful_poll;
- u64 halt_attempted_poll;
- u64 halt_poll_invalid;
- u64 halt_wakeup;
+ struct kvm_vcpu_stat_generic generic;
u64 hvc_exit_stat;
u64 wfe_exit_stat;
u64 wfi_exit_stat;
u64 mmio_exit_user;
u64 mmio_exit_kernel;
+ u64 signal_exits;
u64 exits;
};
-int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init);
unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu);
int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices);
int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
+
+unsigned long kvm_arm_num_sys_reg_descs(struct kvm_vcpu *vcpu);
+int kvm_arm_copy_sys_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices);
+
int __kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu,
struct kvm_vcpu_events *events);
int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu,
struct kvm_vcpu_events *events);
-#define KVM_ARCH_WANT_MMU_NOTIFIER
-int kvm_unmap_hva_range(struct kvm *kvm,
- unsigned long start, unsigned long end);
-int kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
-int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end);
-int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
-
-struct kvm_vcpu *kvm_arm_get_running_vcpu(void);
-struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void);
void kvm_arm_halt_guest(struct kvm *kvm);
void kvm_arm_resume_guest(struct kvm *kvm);
-u64 __kvm_call_hyp(void *hypfn, ...);
+#define vcpu_has_run_once(vcpu) !!rcu_access_pointer((vcpu)->pid)
+
+#ifndef __KVM_NVHE_HYPERVISOR__
+#define kvm_call_hyp_nvhe(f, ...) \
+ ({ \
+ struct arm_smccc_res res; \
+ \
+ arm_smccc_1_1_hvc(KVM_HOST_SMCCC_FUNC(f), \
+ ##__VA_ARGS__, &res); \
+ WARN_ON(res.a0 != SMCCC_RET_SUCCESS); \
+ \
+ res.a1; \
+ })
/*
* The couple of isb() below are there to guarantee the same behaviour
@@ -464,7 +1021,7 @@ u64 __kvm_call_hyp(void *hypfn, ...);
f(__VA_ARGS__); \
isb(); \
} else { \
- __kvm_call_hyp(kvm_ksym_ref(f), ##__VA_ARGS__); \
+ kvm_call_hyp_nvhe(f, ##__VA_ARGS__); \
} \
} while(0)
@@ -476,28 +1033,58 @@ u64 __kvm_call_hyp(void *hypfn, ...);
ret = f(__VA_ARGS__); \
isb(); \
} else { \
- ret = __kvm_call_hyp(kvm_ksym_ref(f), \
- ##__VA_ARGS__); \
+ ret = kvm_call_hyp_nvhe(f, ##__VA_ARGS__); \
} \
\
ret; \
})
+#else /* __KVM_NVHE_HYPERVISOR__ */
+#define kvm_call_hyp(f, ...) f(__VA_ARGS__)
+#define kvm_call_hyp_ret(f, ...) f(__VA_ARGS__)
+#define kvm_call_hyp_nvhe(f, ...) f(__VA_ARGS__)
+#endif /* __KVM_NVHE_HYPERVISOR__ */
+
+int handle_exit(struct kvm_vcpu *vcpu, int exception_index);
+void handle_exit_early(struct kvm_vcpu *vcpu, int exception_index);
+
+int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu);
+int kvm_handle_cp14_32(struct kvm_vcpu *vcpu);
+int kvm_handle_cp14_64(struct kvm_vcpu *vcpu);
+int kvm_handle_cp15_32(struct kvm_vcpu *vcpu);
+int kvm_handle_cp15_64(struct kvm_vcpu *vcpu);
+int kvm_handle_sys_reg(struct kvm_vcpu *vcpu);
+int kvm_handle_cp10_id(struct kvm_vcpu *vcpu);
+
+void kvm_reset_sys_regs(struct kvm_vcpu *vcpu);
-void force_vm_exit(const cpumask_t *mask);
-void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot);
+int __init kvm_sys_reg_table_init(void);
+int __init populate_nv_trap_config(void);
-int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
- int exception_index);
-void handle_exit_early(struct kvm_vcpu *vcpu, struct kvm_run *run,
- int exception_index);
+bool lock_all_vcpus(struct kvm *kvm);
+void unlock_all_vcpus(struct kvm *kvm);
-int kvm_perf_init(void);
-int kvm_perf_teardown(void);
+/* MMIO helpers */
+void kvm_mmio_write_buf(void *buf, unsigned int len, unsigned long data);
+unsigned long kvm_mmio_read_buf(const void *buf, unsigned int len);
+
+int kvm_handle_mmio_return(struct kvm_vcpu *vcpu);
+int io_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa);
+
+/*
+ * Returns true if a Performance Monitoring Interrupt (PMI), a.k.a. perf event,
+ * arrived in guest context. For arm64, any event that arrives while a vCPU is
+ * loaded is considered to be "in guest".
+ */
+static inline bool kvm_arch_pmi_in_guest(struct kvm_vcpu *vcpu)
+{
+ return IS_ENABLED(CONFIG_GUEST_PERF_EVENTS) && !!vcpu;
+}
long kvm_hypercall_pv_features(struct kvm_vcpu *vcpu);
gpa_t kvm_init_stolen_time(struct kvm_vcpu *vcpu);
void kvm_update_stolen_time(struct kvm_vcpu *vcpu);
+bool kvm_arm_pvtime_supported(void);
int kvm_arm_pvtime_set_attr(struct kvm_vcpu *vcpu,
struct kvm_device_attr *attr);
int kvm_arm_pvtime_get_attr(struct kvm_vcpu *vcpu,
@@ -505,89 +1092,51 @@ int kvm_arm_pvtime_get_attr(struct kvm_vcpu *vcpu,
int kvm_arm_pvtime_has_attr(struct kvm_vcpu *vcpu,
struct kvm_device_attr *attr);
+extern unsigned int __ro_after_init kvm_arm_vmid_bits;
+int __init kvm_arm_vmid_alloc_init(void);
+void __init kvm_arm_vmid_alloc_free(void);
+bool kvm_arm_vmid_update(struct kvm_vmid *kvm_vmid);
+void kvm_arm_vmid_clear_active(void);
+
static inline void kvm_arm_pvtime_vcpu_init(struct kvm_vcpu_arch *vcpu_arch)
{
- vcpu_arch->steal.base = GPA_INVALID;
+ vcpu_arch->steal.base = INVALID_GPA;
}
static inline bool kvm_arm_is_pvtime_enabled(struct kvm_vcpu_arch *vcpu_arch)
{
- return (vcpu_arch->steal.base != GPA_INVALID);
+ return (vcpu_arch->steal.base != INVALID_GPA);
}
void kvm_set_sei_esr(struct kvm_vcpu *vcpu, u64 syndrome);
struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr);
-DECLARE_PER_CPU(kvm_host_data_t, kvm_host_data);
+DECLARE_KVM_HYP_PER_CPU(struct kvm_host_data, kvm_host_data);
static inline void kvm_init_host_cpu_context(struct kvm_cpu_context *cpu_ctxt)
{
/* The host's MPIDR is immutable, so let's set it up at boot time */
- cpu_ctxt->sys_regs[MPIDR_EL1] = read_cpuid_mpidr();
-}
-
-void __kvm_enable_ssbs(void);
-
-static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr,
- unsigned long hyp_stack_ptr,
- unsigned long vector_ptr)
-{
- /*
- * Calculate the raw per-cpu offset without a translation from the
- * kernel's mapping to the linear mapping, and store it in tpidr_el2
- * so that we can use adr_l to access per-cpu variables in EL2.
- */
- u64 tpidr_el2 = ((u64)this_cpu_ptr(&kvm_host_data) -
- (u64)kvm_ksym_ref(kvm_host_data));
-
- /*
- * Call initialization code, and switch to the full blown HYP code.
- * If the cpucaps haven't been finalized yet, something has gone very
- * wrong, and hyp will crash and burn when it uses any
- * cpus_have_const_cap() wrapper.
- */
- BUG_ON(!static_branch_likely(&arm64_const_caps_ready));
- __kvm_call_hyp((void *)pgd_ptr, hyp_stack_ptr, vector_ptr, tpidr_el2);
-
- /*
- * Disabling SSBD on a non-VHE system requires us to enable SSBS
- * at EL2.
- */
- if (!has_vhe() && this_cpu_has_cap(ARM64_SSBS) &&
- arm64_get_ssbd_state() == ARM64_SSBD_FORCE_DISABLE) {
- kvm_call_hyp(__kvm_enable_ssbs);
- }
+ ctxt_sys_reg(cpu_ctxt, MPIDR_EL1) = read_cpuid_mpidr();
}
-static inline bool kvm_arch_requires_vhe(void)
+static inline bool kvm_system_needs_idmapped_vectors(void)
{
- /*
- * The Arm architecture specifies that implementation of SVE
- * requires VHE also to be implemented. The KVM code for arm64
- * relies on this when SVE is present:
- */
- if (system_supports_sve())
- return true;
-
- /* Some implementations have defects that confine them to VHE */
- if (cpus_have_cap(ARM64_WORKAROUND_1165522))
- return true;
-
- return false;
+ return cpus_have_final_cap(ARM64_SPECTRE_V3A);
}
-void kvm_arm_vcpu_ptrauth_trap(struct kvm_vcpu *vcpu);
-
-static inline void kvm_arch_hardware_unsetup(void) {}
static inline void kvm_arch_sync_events(struct kvm *kvm) {}
static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
-static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
void kvm_arm_init_debug(void);
+void kvm_arm_vcpu_init_debug(struct kvm_vcpu *vcpu);
void kvm_arm_setup_debug(struct kvm_vcpu *vcpu);
void kvm_arm_clear_debug(struct kvm_vcpu *vcpu);
void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu);
+
+#define kvm_vcpu_os_lock_enabled(vcpu) \
+ (!!(__vcpu_sys_reg(vcpu, OSLSR_EL1) & OSLSR_EL1_OSLK))
+
int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu,
struct kvm_device_attr *attr);
int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu,
@@ -595,122 +1144,93 @@ int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu,
int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu,
struct kvm_device_attr *attr);
-static inline void __cpu_init_stage2(void) {}
+int kvm_vm_ioctl_mte_copy_tags(struct kvm *kvm,
+ struct kvm_arm_copy_mte_tags *copy_tags);
+int kvm_vm_ioctl_set_counter_offset(struct kvm *kvm,
+ struct kvm_arm_counter_offset *offset);
+int kvm_vm_ioctl_get_reg_writable_masks(struct kvm *kvm,
+ struct reg_mask_range *range);
/* Guest/host FPSIMD coordination helpers */
int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu);
void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu);
+void kvm_arch_vcpu_ctxflush_fp(struct kvm_vcpu *vcpu);
void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu);
void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu);
+void kvm_vcpu_unshare_task_fp(struct kvm_vcpu *vcpu);
static inline bool kvm_pmu_counter_deferred(struct perf_event_attr *attr)
{
return (!has_vhe() && attr->exclude_host);
}
-#ifdef CONFIG_KVM /* Avoid conflicts with core headers if CONFIG_KVM=n */
-static inline int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu)
-{
- return kvm_arch_vcpu_run_map_fp(vcpu);
-}
+/* Flags for host debug state */
+void kvm_arch_vcpu_load_debug_state_flags(struct kvm_vcpu *vcpu);
+void kvm_arch_vcpu_put_debug_state_flags(struct kvm_vcpu *vcpu);
+#ifdef CONFIG_KVM
void kvm_set_pmu_events(u32 set, struct perf_event_attr *attr);
void kvm_clr_pmu_events(u32 clr);
-
-void kvm_vcpu_pmu_restore_guest(struct kvm_vcpu *vcpu);
-void kvm_vcpu_pmu_restore_host(struct kvm_vcpu *vcpu);
+bool kvm_set_pmuserenr(u64 val);
#else
static inline void kvm_set_pmu_events(u32 set, struct perf_event_attr *attr) {}
static inline void kvm_clr_pmu_events(u32 clr) {}
+static inline bool kvm_set_pmuserenr(u64 val)
+{
+ return false;
+}
#endif
-static inline void kvm_arm_vhe_guest_enter(void)
-{
- local_daif_mask();
+void kvm_vcpu_load_vhe(struct kvm_vcpu *vcpu);
+void kvm_vcpu_put_vhe(struct kvm_vcpu *vcpu);
- /*
- * Having IRQs masked via PMR when entering the guest means the GIC
- * will not signal the CPU of interrupts of lower priority, and the
- * only way to get out will be via guest exceptions.
- * Naturally, we want to avoid this.
- *
- * local_daif_mask() already sets GIC_PRIO_PSR_I_SET, we just need a
- * dsb to ensure the redistributor is forwards EL2 IRQs to the CPU.
- */
- pmr_sync();
-}
+int __init kvm_set_ipa_limit(void);
-static inline void kvm_arm_vhe_guest_exit(void)
-{
- /*
- * local_daif_restore() takes care to properly restore PSTATE.DAIF
- * and the GIC PMR if the host is using IRQ priorities.
- */
- local_daif_restore(DAIF_PROCCTX_NOIRQ);
+#define __KVM_HAVE_ARCH_VM_ALLOC
+struct kvm *kvm_arch_alloc_vm(void);
- /*
- * When we exit from the guest we change a number of CPU configuration
- * parameters, such as traps. Make sure these changes take effect
- * before running the host or additional guests.
- */
- isb();
-}
+#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS
-#define KVM_BP_HARDEN_UNKNOWN -1
-#define KVM_BP_HARDEN_WA_NEEDED 0
-#define KVM_BP_HARDEN_NOT_REQUIRED 1
+#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS_RANGE
-static inline int kvm_arm_harden_branch_predictor(void)
+static inline bool kvm_vm_is_protected(struct kvm *kvm)
{
- switch (get_spectre_v2_workaround_state()) {
- case ARM64_BP_HARDEN_WA_NEEDED:
- return KVM_BP_HARDEN_WA_NEEDED;
- case ARM64_BP_HARDEN_NOT_REQUIRED:
- return KVM_BP_HARDEN_NOT_REQUIRED;
- case ARM64_BP_HARDEN_UNKNOWN:
- default:
- return KVM_BP_HARDEN_UNKNOWN;
- }
+ return false;
}
-#define KVM_SSBD_UNKNOWN -1
-#define KVM_SSBD_FORCE_DISABLE 0
-#define KVM_SSBD_KERNEL 1
-#define KVM_SSBD_FORCE_ENABLE 2
-#define KVM_SSBD_MITIGATED 3
+int kvm_arm_vcpu_finalize(struct kvm_vcpu *vcpu, int feature);
+bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu);
-static inline int kvm_arm_have_ssbd(void)
-{
- switch (arm64_get_ssbd_state()) {
- case ARM64_SSBD_FORCE_DISABLE:
- return KVM_SSBD_FORCE_DISABLE;
- case ARM64_SSBD_KERNEL:
- return KVM_SSBD_KERNEL;
- case ARM64_SSBD_FORCE_ENABLE:
- return KVM_SSBD_FORCE_ENABLE;
- case ARM64_SSBD_MITIGATED:
- return KVM_SSBD_MITIGATED;
- case ARM64_SSBD_UNKNOWN:
- default:
- return KVM_SSBD_UNKNOWN;
- }
-}
+#define kvm_arm_vcpu_sve_finalized(vcpu) vcpu_get_flag(vcpu, VCPU_SVE_FINALIZED)
-void kvm_vcpu_load_sysregs(struct kvm_vcpu *vcpu);
-void kvm_vcpu_put_sysregs(struct kvm_vcpu *vcpu);
+#define kvm_has_mte(kvm) \
+ (system_supports_mte() && \
+ test_bit(KVM_ARCH_FLAG_MTE_ENABLED, &(kvm)->arch.flags))
-void kvm_set_ipa_limit(void);
+#define kvm_supports_32bit_el0() \
+ (system_supports_32bit_el0() && \
+ !static_branch_unlikely(&arm64_mismatched_32bit_el0))
-#define __KVM_HAVE_ARCH_VM_ALLOC
-struct kvm *kvm_arch_alloc_vm(void);
-void kvm_arch_free_vm(struct kvm *kvm);
+#define kvm_vm_has_ran_once(kvm) \
+ (test_bit(KVM_ARCH_FLAG_HAS_RAN_ONCE, &(kvm)->arch.flags))
-int kvm_arm_setup_stage2(struct kvm *kvm, unsigned long type);
+static inline bool __vcpu_has_feature(const struct kvm_arch *ka, int feature)
+{
+ return test_bit(feature, ka->vcpu_features);
+}
-int kvm_arm_vcpu_finalize(struct kvm_vcpu *vcpu, int feature);
-bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu);
+#define vcpu_has_feature(v, f) __vcpu_has_feature(&(v)->kvm->arch, (f))
+
+int kvm_trng_call(struct kvm_vcpu *vcpu);
+#ifdef CONFIG_KVM
+extern phys_addr_t hyp_mem_base;
+extern phys_addr_t hyp_mem_size;
+void __init kvm_hyp_reserve(void);
+#else
+static inline void kvm_hyp_reserve(void) { }
+#endif
-#define kvm_arm_vcpu_sve_finalized(vcpu) \
- ((vcpu)->arch.flags & KVM_ARM64_VCPU_SVE_FINALIZED)
+void kvm_arm_vcpu_power_off(struct kvm_vcpu *vcpu);
+bool kvm_arm_vcpu_stopped(struct kvm_vcpu *vcpu);
#endif /* __ARM64_KVM_HOST_H__ */
diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h
index 97f21cc66657..145ce73fc16c 100644
--- a/arch/arm64/include/asm/kvm_hyp.h
+++ b/arch/arm64/include/asm/kvm_hyp.h
@@ -10,17 +10,41 @@
#include <linux/compiler.h>
#include <linux/kvm_host.h>
#include <asm/alternative.h>
-#include <asm/kvm_mmu.h>
#include <asm/sysreg.h>
-#define __hyp_text __section(.hyp.text) notrace
+DECLARE_PER_CPU(struct kvm_cpu_context, kvm_hyp_ctxt);
+DECLARE_PER_CPU(unsigned long, kvm_hyp_vector);
+DECLARE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params);
+
+/*
+ * Unified accessors for registers that have a different encoding
+ * between VHE and non-VHE. They must be specified without their "ELx"
+ * encoding, but with the SYS_ prefix, as defined in asm/sysreg.h.
+ */
+
+#if defined(__KVM_VHE_HYPERVISOR__)
+
+#define read_sysreg_el0(r) read_sysreg_s(r##_EL02)
+#define write_sysreg_el0(v,r) write_sysreg_s(v, r##_EL02)
+#define read_sysreg_el1(r) read_sysreg_s(r##_EL12)
+#define write_sysreg_el1(v,r) write_sysreg_s(v, r##_EL12)
+#define read_sysreg_el2(r) read_sysreg_s(r##_EL1)
+#define write_sysreg_el2(v,r) write_sysreg_s(v, r##_EL1)
+
+#else // !__KVM_VHE_HYPERVISOR__
+
+#if defined(__KVM_NVHE_HYPERVISOR__)
+#define VHE_ALT_KEY ARM64_KVM_HVHE
+#else
+#define VHE_ALT_KEY ARM64_HAS_VIRT_HOST_EXTN
+#endif
#define read_sysreg_elx(r,nvh,vh) \
({ \
u64 reg; \
- asm volatile(ALTERNATIVE(__mrs_s("%0", r##nvh), \
+ asm volatile(ALTERNATIVE(__mrs_s("%0", r##nvh), \
__mrs_s("%0", r##vh), \
- ARM64_HAS_VIRT_HOST_EXTN) \
+ VHE_ALT_KEY) \
: "=r" (reg)); \
reg; \
})
@@ -30,16 +54,10 @@
u64 __val = (u64)(v); \
asm volatile(ALTERNATIVE(__msr_s(r##nvh, "%x0"), \
__msr_s(r##vh, "%x0"), \
- ARM64_HAS_VIRT_HOST_EXTN) \
+ VHE_ALT_KEY) \
: : "rZ" (__val)); \
} while (0)
-/*
- * Unified accessors for registers that have a different encoding
- * between VHE and non-VHE. They must be specified without their "ELx"
- * encoding, but with the SYS_ prefix, as defined in asm/sysreg.h.
- */
-
#define read_sysreg_el0(r) read_sysreg_elx(r, _EL0, _EL02)
#define write_sysreg_el0(v,r) write_sysreg_elx(v, r, _EL0, _EL02)
#define read_sysreg_el1(r) read_sysreg_elx(r, _EL1, _EL12)
@@ -47,56 +65,82 @@
#define read_sysreg_el2(r) read_sysreg_elx(r, _EL2, _EL1)
#define write_sysreg_el2(v,r) write_sysreg_elx(v, r, _EL2, _EL1)
+#endif // __KVM_VHE_HYPERVISOR__
+
+/*
+ * Without an __arch_swab32(), we fall back to ___constant_swab32(), but the
+ * static inline can allow the compiler to out-of-line this. KVM always wants
+ * the macro version as its always inlined.
+ */
+#define __kvm_swab32(x) ___constant_swab32(x)
+
int __vgic_v2_perform_cpuif_access(struct kvm_vcpu *vcpu);
-void __vgic_v3_save_state(struct kvm_vcpu *vcpu);
-void __vgic_v3_restore_state(struct kvm_vcpu *vcpu);
-void __vgic_v3_activate_traps(struct kvm_vcpu *vcpu);
-void __vgic_v3_deactivate_traps(struct kvm_vcpu *vcpu);
-void __vgic_v3_save_aprs(struct kvm_vcpu *vcpu);
-void __vgic_v3_restore_aprs(struct kvm_vcpu *vcpu);
+void __vgic_v3_save_state(struct vgic_v3_cpu_if *cpu_if);
+void __vgic_v3_restore_state(struct vgic_v3_cpu_if *cpu_if);
+void __vgic_v3_activate_traps(struct vgic_v3_cpu_if *cpu_if);
+void __vgic_v3_deactivate_traps(struct vgic_v3_cpu_if *cpu_if);
+void __vgic_v3_save_aprs(struct vgic_v3_cpu_if *cpu_if);
+void __vgic_v3_restore_aprs(struct vgic_v3_cpu_if *cpu_if);
int __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu);
+#ifdef __KVM_NVHE_HYPERVISOR__
void __timer_enable_traps(struct kvm_vcpu *vcpu);
void __timer_disable_traps(struct kvm_vcpu *vcpu);
+#endif
+#ifdef __KVM_NVHE_HYPERVISOR__
void __sysreg_save_state_nvhe(struct kvm_cpu_context *ctxt);
void __sysreg_restore_state_nvhe(struct kvm_cpu_context *ctxt);
+#else
+void __vcpu_load_switch_sysregs(struct kvm_vcpu *vcpu);
+void __vcpu_put_switch_sysregs(struct kvm_vcpu *vcpu);
void sysreg_save_host_state_vhe(struct kvm_cpu_context *ctxt);
void sysreg_restore_host_state_vhe(struct kvm_cpu_context *ctxt);
void sysreg_save_guest_state_vhe(struct kvm_cpu_context *ctxt);
void sysreg_restore_guest_state_vhe(struct kvm_cpu_context *ctxt);
-void __sysreg32_save_state(struct kvm_vcpu *vcpu);
-void __sysreg32_restore_state(struct kvm_vcpu *vcpu);
+#endif
void __debug_switch_to_guest(struct kvm_vcpu *vcpu);
void __debug_switch_to_host(struct kvm_vcpu *vcpu);
+#ifdef __KVM_NVHE_HYPERVISOR__
+void __debug_save_host_buffers_nvhe(struct kvm_vcpu *vcpu);
+void __debug_restore_host_buffers_nvhe(struct kvm_vcpu *vcpu);
+#endif
+
void __fpsimd_save_state(struct user_fpsimd_state *fp_regs);
void __fpsimd_restore_state(struct user_fpsimd_state *fp_regs);
-
-void activate_traps_vhe_load(struct kvm_vcpu *vcpu);
-void deactivate_traps_vhe_put(void);
-
-u64 __guest_enter(struct kvm_vcpu *vcpu, struct kvm_cpu_context *host_ctxt);
-void __noreturn __hyp_do_panic(unsigned long, ...);
-
-/*
- * Must be called from hyp code running at EL2 with an updated VTTBR
- * and interrupts disabled.
- */
-static __always_inline void __hyp_text __load_guest_stage2(struct kvm *kvm)
-{
- write_sysreg(kvm->arch.vtcr, vtcr_el2);
- write_sysreg(kvm_get_vttbr(kvm), vttbr_el2);
-
- /*
- * ARM erratum 1165522 requires the actual execution of the above
- * before we can switch to the EL1/EL0 translation regime used by
- * the guest.
- */
- asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_1165522));
-}
+void __sve_restore_state(void *sve_pffr, u32 *fpsr);
+
+u64 __guest_enter(struct kvm_vcpu *vcpu);
+
+bool kvm_host_psci_handler(struct kvm_cpu_context *host_ctxt, u32 func_id);
+
+#ifdef __KVM_NVHE_HYPERVISOR__
+void __noreturn __hyp_do_panic(struct kvm_cpu_context *host_ctxt, u64 spsr,
+ u64 elr, u64 par);
+#endif
+
+#ifdef __KVM_NVHE_HYPERVISOR__
+void __pkvm_init_switch_pgd(phys_addr_t phys, unsigned long size,
+ phys_addr_t pgd, void *sp, void *cont_fn);
+int __pkvm_init(phys_addr_t phys, unsigned long size, unsigned long nr_cpus,
+ unsigned long *per_cpu_base, u32 hyp_va_bits);
+void __noreturn __host_enter(struct kvm_cpu_context *host_ctxt);
+#endif
+
+extern u64 kvm_nvhe_sym(id_aa64pfr0_el1_sys_val);
+extern u64 kvm_nvhe_sym(id_aa64pfr1_el1_sys_val);
+extern u64 kvm_nvhe_sym(id_aa64isar0_el1_sys_val);
+extern u64 kvm_nvhe_sym(id_aa64isar1_el1_sys_val);
+extern u64 kvm_nvhe_sym(id_aa64isar2_el1_sys_val);
+extern u64 kvm_nvhe_sym(id_aa64mmfr0_el1_sys_val);
+extern u64 kvm_nvhe_sym(id_aa64mmfr1_el1_sys_val);
+extern u64 kvm_nvhe_sym(id_aa64mmfr2_el1_sys_val);
+extern u64 kvm_nvhe_sym(id_aa64smfr0_el1_sys_val);
+
+extern unsigned long kvm_nvhe_sym(__icache_flags);
+extern unsigned int kvm_nvhe_sym(kvm_arm_vmid_bits);
#endif /* __ARM64_KVM_HYP_H__ */
-
diff --git a/arch/arm64/include/asm/kvm_mmio.h b/arch/arm64/include/asm/kvm_mmio.h
deleted file mode 100644
index 02b5c48fd467..000000000000
--- a/arch/arm64/include/asm/kvm_mmio.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2012 - Virtual Open Systems and Columbia University
- * Author: Christoffer Dall <c.dall@virtualopensystems.com>
- */
-
-#ifndef __ARM64_KVM_MMIO_H__
-#define __ARM64_KVM_MMIO_H__
-
-#include <linux/kvm_host.h>
-#include <asm/kvm_arm.h>
-
-/*
- * This is annoying. The mmio code requires this, even if we don't
- * need any decoding. To be fixed.
- */
-struct kvm_decode {
- unsigned long rt;
- bool sign_extend;
-};
-
-void kvm_mmio_write_buf(void *buf, unsigned int len, unsigned long data);
-unsigned long kvm_mmio_read_buf(const void *buf, unsigned int len);
-
-int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run);
-int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
- phys_addr_t fault_ipa);
-
-#endif /* __ARM64_KVM_MMIO_H__ */
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 53d846f1bfe7..e3e793d0ec30 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -9,6 +9,7 @@
#include <asm/page.h>
#include <asm/memory.h>
+#include <asm/mmu.h>
#include <asm/cpufeature.h>
/*
@@ -43,16 +44,6 @@
* HYP_VA_MIN = 1 << (VA_BITS - 1)
* HYP_VA_MAX = HYP_VA_MIN + (1 << (VA_BITS - 1)) - 1
*
- * This of course assumes that the trampoline page exists within the
- * VA_BITS range. If it doesn't, then it means we're in the odd case
- * where the kernel idmap (as well as HYP) uses more levels than the
- * kernel runtime page tables (as seen when the kernel is configured
- * for 4k pages, 39bits VA, and yet memory lives just above that
- * limit, forcing the idmap to use 4 levels of page tables while the
- * kernel itself only uses 3). In this particular case, it doesn't
- * matter which side of VA_BITS we use, as we're guaranteed not to
- * conflict with anything.
- *
* When using VHE, there are no separate hyp mappings and all KVM
* functionality is already mapped as part of the main kernel
* mappings, and none of this applies in that case.
@@ -72,351 +63,205 @@
* specific registers encoded in the instructions).
*/
.macro kern_hyp_va reg
-alternative_cb kvm_update_va_mask
+#ifndef __KVM_VHE_HYPERVISOR__
+alternative_cb ARM64_ALWAYS_SYSTEM, kvm_update_va_mask
and \reg, \reg, #1 /* mask with va_mask */
ror \reg, \reg, #1 /* rotate to the first tag bit */
add \reg, \reg, #0 /* insert the low 12 bits of the tag */
add \reg, \reg, #0, lsl 12 /* insert the top 12 bits of the tag */
ror \reg, \reg, #63 /* rotate back */
alternative_cb_end
+#endif
+.endm
+
+/*
+ * Convert a hypervisor VA to a PA
+ * reg: hypervisor address to be converted in place
+ * tmp: temporary register
+ */
+.macro hyp_pa reg, tmp
+ ldr_l \tmp, hyp_physvirt_offset
+ add \reg, \reg, \tmp
+.endm
+
+/*
+ * Convert a hypervisor VA to a kernel image address
+ * reg: hypervisor address to be converted in place
+ * tmp: temporary register
+ *
+ * The actual code generation takes place in kvm_get_kimage_voffset, and
+ * the instructions below are only there to reserve the space and
+ * perform the register allocation (kvm_get_kimage_voffset uses the
+ * specific registers encoded in the instructions).
+ */
+.macro hyp_kimg_va reg, tmp
+ /* Convert hyp VA -> PA. */
+ hyp_pa \reg, \tmp
+
+ /* Load kimage_voffset. */
+alternative_cb ARM64_ALWAYS_SYSTEM, kvm_get_kimage_voffset
+ movz \tmp, #0
+ movk \tmp, #0, lsl #16
+ movk \tmp, #0, lsl #32
+ movk \tmp, #0, lsl #48
+alternative_cb_end
+
+ /* Convert PA -> kimg VA. */
+ add \reg, \reg, \tmp
.endm
#else
+#include <linux/pgtable.h>
#include <asm/pgalloc.h>
#include <asm/cache.h>
#include <asm/cacheflush.h>
#include <asm/mmu_context.h>
-#include <asm/pgtable.h>
+#include <asm/kvm_emulate.h>
+#include <asm/kvm_host.h>
void kvm_update_va_mask(struct alt_instr *alt,
__le32 *origptr, __le32 *updptr, int nr_inst);
void kvm_compute_layout(void);
+void kvm_apply_hyp_relocations(void);
-static inline unsigned long __kern_hyp_va(unsigned long v)
+#define __hyp_pa(x) (((phys_addr_t)(x)) + hyp_physvirt_offset)
+
+static __always_inline unsigned long __kern_hyp_va(unsigned long v)
{
+#ifndef __KVM_VHE_HYPERVISOR__
asm volatile(ALTERNATIVE_CB("and %0, %0, #1\n"
"ror %0, %0, #1\n"
"add %0, %0, #0\n"
"add %0, %0, #0, lsl 12\n"
"ror %0, %0, #63\n",
+ ARM64_ALWAYS_SYSTEM,
kvm_update_va_mask)
: "+r" (v));
+#endif
return v;
}
#define kern_hyp_va(v) ((typeof(v))(__kern_hyp_va((unsigned long)(v))))
/*
- * Obtain the PC-relative address of a kernel symbol
- * s: symbol
- *
- * The goal of this macro is to return a symbol's address based on a
- * PC-relative computation, as opposed to a loading the VA from a
- * constant pool or something similar. This works well for HYP, as an
- * absolute VA is guaranteed to be wrong. Only use this if trying to
- * obtain the address of a symbol (i.e. not something you obtained by
- * following a pointer).
- */
-#define hyp_symbol_addr(s) \
- ({ \
- typeof(s) *addr; \
- asm("adrp %0, %1\n" \
- "add %0, %0, :lo12:%1\n" \
- : "=r" (addr) : "S" (&s)); \
- addr; \
- })
-
-/*
* We currently support using a VM-specified IPA size. For backward
* compatibility, the default IPA size is fixed to 40bits.
*/
#define KVM_PHYS_SHIFT (40)
-#define kvm_phys_shift(kvm) VTCR_EL2_IPA(kvm->arch.vtcr)
-#define kvm_phys_size(kvm) (_AC(1, ULL) << kvm_phys_shift(kvm))
-#define kvm_phys_mask(kvm) (kvm_phys_size(kvm) - _AC(1, ULL))
-
-static inline bool kvm_page_empty(void *ptr)
-{
- struct page *ptr_page = virt_to_page(ptr);
- return page_count(ptr_page) == 1;
-}
+#define kvm_phys_shift(mmu) VTCR_EL2_IPA((mmu)->vtcr)
+#define kvm_phys_size(mmu) (_AC(1, ULL) << kvm_phys_shift(mmu))
+#define kvm_phys_mask(mmu) (kvm_phys_size(mmu) - _AC(1, ULL))
+#include <asm/kvm_pgtable.h>
#include <asm/stage2_pgtable.h>
-int create_hyp_mappings(void *from, void *to, pgprot_t prot);
+int kvm_share_hyp(void *from, void *to);
+void kvm_unshare_hyp(void *from, void *to);
+int create_hyp_mappings(void *from, void *to, enum kvm_pgtable_prot prot);
+int __create_hyp_mappings(unsigned long start, unsigned long size,
+ unsigned long phys, enum kvm_pgtable_prot prot);
+int hyp_alloc_private_va_range(size_t size, unsigned long *haddr);
int create_hyp_io_mappings(phys_addr_t phys_addr, size_t size,
void __iomem **kaddr,
void __iomem **haddr);
int create_hyp_exec_mappings(phys_addr_t phys_addr, size_t size,
void **haddr);
-void free_hyp_pgds(void);
+int create_hyp_stack(phys_addr_t phys_addr, unsigned long *haddr);
+void __init free_hyp_pgds(void);
void stage2_unmap_vm(struct kvm *kvm);
-int kvm_alloc_stage2_pgd(struct kvm *kvm);
-void kvm_free_stage2_pgd(struct kvm *kvm);
+int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu, unsigned long type);
+void kvm_uninit_stage2_mmu(struct kvm *kvm);
+void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu);
int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
phys_addr_t pa, unsigned long size, bool writable);
-int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run);
-
-void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu);
+int kvm_handle_guest_abort(struct kvm_vcpu *vcpu);
phys_addr_t kvm_mmu_get_httbr(void);
phys_addr_t kvm_get_idmap_vector(void);
-int kvm_mmu_init(void);
-void kvm_clear_hyp_idmap(void);
-
-#define kvm_mk_pmd(ptep) \
- __pmd(__phys_to_pmd_val(__pa(ptep)) | PMD_TYPE_TABLE)
-#define kvm_mk_pud(pmdp) \
- __pud(__phys_to_pud_val(__pa(pmdp)) | PMD_TYPE_TABLE)
-#define kvm_mk_pgd(pudp) \
- __pgd(__phys_to_pgd_val(__pa(pudp)) | PUD_TYPE_TABLE)
+int __init kvm_mmu_init(u32 *hyp_va_bits);
-#define kvm_set_pud(pudp, pud) set_pud(pudp, pud)
-
-#define kvm_pfn_pte(pfn, prot) pfn_pte(pfn, prot)
-#define kvm_pfn_pmd(pfn, prot) pfn_pmd(pfn, prot)
-#define kvm_pfn_pud(pfn, prot) pfn_pud(pfn, prot)
-
-#define kvm_pud_pfn(pud) pud_pfn(pud)
-
-#define kvm_pmd_mkhuge(pmd) pmd_mkhuge(pmd)
-#define kvm_pud_mkhuge(pud) pud_mkhuge(pud)
-
-static inline pte_t kvm_s2pte_mkwrite(pte_t pte)
+static inline void *__kvm_vector_slot2addr(void *base,
+ enum arm64_hyp_spectre_vector slot)
{
- pte_val(pte) |= PTE_S2_RDWR;
- return pte;
-}
+ int idx = slot - (slot != HYP_VECTOR_DIRECT);
-static inline pmd_t kvm_s2pmd_mkwrite(pmd_t pmd)
-{
- pmd_val(pmd) |= PMD_S2_RDWR;
- return pmd;
-}
-
-static inline pud_t kvm_s2pud_mkwrite(pud_t pud)
-{
- pud_val(pud) |= PUD_S2_RDWR;
- return pud;
-}
-
-static inline pte_t kvm_s2pte_mkexec(pte_t pte)
-{
- pte_val(pte) &= ~PTE_S2_XN;
- return pte;
-}
-
-static inline pmd_t kvm_s2pmd_mkexec(pmd_t pmd)
-{
- pmd_val(pmd) &= ~PMD_S2_XN;
- return pmd;
+ return base + (idx * SZ_2K);
}
-static inline pud_t kvm_s2pud_mkexec(pud_t pud)
-{
- pud_val(pud) &= ~PUD_S2_XN;
- return pud;
-}
-
-static inline void kvm_set_s2pte_readonly(pte_t *ptep)
-{
- pteval_t old_pteval, pteval;
-
- pteval = READ_ONCE(pte_val(*ptep));
- do {
- old_pteval = pteval;
- pteval &= ~PTE_S2_RDWR;
- pteval |= PTE_S2_RDONLY;
- pteval = cmpxchg_relaxed(&pte_val(*ptep), old_pteval, pteval);
- } while (pteval != old_pteval);
-}
-
-static inline bool kvm_s2pte_readonly(pte_t *ptep)
-{
- return (READ_ONCE(pte_val(*ptep)) & PTE_S2_RDWR) == PTE_S2_RDONLY;
-}
-
-static inline bool kvm_s2pte_exec(pte_t *ptep)
-{
- return !(READ_ONCE(pte_val(*ptep)) & PTE_S2_XN);
-}
-
-static inline void kvm_set_s2pmd_readonly(pmd_t *pmdp)
-{
- kvm_set_s2pte_readonly((pte_t *)pmdp);
-}
-
-static inline bool kvm_s2pmd_readonly(pmd_t *pmdp)
-{
- return kvm_s2pte_readonly((pte_t *)pmdp);
-}
-
-static inline bool kvm_s2pmd_exec(pmd_t *pmdp)
-{
- return !(READ_ONCE(pmd_val(*pmdp)) & PMD_S2_XN);
-}
-
-static inline void kvm_set_s2pud_readonly(pud_t *pudp)
-{
- kvm_set_s2pte_readonly((pte_t *)pudp);
-}
-
-static inline bool kvm_s2pud_readonly(pud_t *pudp)
-{
- return kvm_s2pte_readonly((pte_t *)pudp);
-}
-
-static inline bool kvm_s2pud_exec(pud_t *pudp)
-{
- return !(READ_ONCE(pud_val(*pudp)) & PUD_S2_XN);
-}
-
-static inline pud_t kvm_s2pud_mkyoung(pud_t pud)
-{
- return pud_mkyoung(pud);
-}
-
-static inline bool kvm_s2pud_young(pud_t pud)
-{
- return pud_young(pud);
-}
-
-#define hyp_pte_table_empty(ptep) kvm_page_empty(ptep)
-
-#ifdef __PAGETABLE_PMD_FOLDED
-#define hyp_pmd_table_empty(pmdp) (0)
-#else
-#define hyp_pmd_table_empty(pmdp) kvm_page_empty(pmdp)
-#endif
-
-#ifdef __PAGETABLE_PUD_FOLDED
-#define hyp_pud_table_empty(pudp) (0)
-#else
-#define hyp_pud_table_empty(pudp) kvm_page_empty(pudp)
-#endif
-
struct kvm;
-#define kvm_flush_dcache_to_poc(a,l) __flush_dcache_area((a), (l))
+#define kvm_flush_dcache_to_poc(a,l) \
+ dcache_clean_inval_poc((unsigned long)(a), (unsigned long)(a)+(l))
static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu)
{
- return (vcpu_read_sys_reg(vcpu, SCTLR_EL1) & 0b101) == 0b101;
+ u64 cache_bits = SCTLR_ELx_M | SCTLR_ELx_C;
+ int reg;
+
+ if (vcpu_is_el2(vcpu))
+ reg = SCTLR_EL2;
+ else
+ reg = SCTLR_EL1;
+
+ return (vcpu_read_sys_reg(vcpu, reg) & cache_bits) == cache_bits;
}
-static inline void __clean_dcache_guest_page(kvm_pfn_t pfn, unsigned long size)
+static inline void __clean_dcache_guest_page(void *va, size_t size)
{
- void *va = page_address(pfn_to_page(pfn));
-
/*
* With FWB, we ensure that the guest always accesses memory using
* cacheable attributes, and we don't have to clean to PoC when
* faulting in pages. Furthermore, FWB implies IDC, so cleaning to
* PoU is not required either in this case.
*/
- if (cpus_have_const_cap(ARM64_HAS_STAGE2_FWB))
+ if (cpus_have_final_cap(ARM64_HAS_STAGE2_FWB))
return;
kvm_flush_dcache_to_poc(va, size);
}
-static inline void __invalidate_icache_guest_page(kvm_pfn_t pfn,
- unsigned long size)
+static inline size_t __invalidate_icache_max_range(void)
{
- if (icache_is_aliasing()) {
- /* any kind of VIPT cache */
- __flush_icache_all();
- } else if (is_kernel_in_hyp_mode() || !icache_is_vpipt()) {
- /* PIPT or VPIPT at EL2 (see comment in __kvm_tlb_flush_vmid_ipa) */
- void *va = page_address(pfn_to_page(pfn));
-
- invalidate_icache_range((unsigned long)va,
- (unsigned long)va + size);
- }
-}
+ u8 iminline;
+ u64 ctr;
-static inline void __kvm_flush_dcache_pte(pte_t pte)
-{
- if (!cpus_have_const_cap(ARM64_HAS_STAGE2_FWB)) {
- struct page *page = pte_page(pte);
- kvm_flush_dcache_to_poc(page_address(page), PAGE_SIZE);
- }
-}
+ asm volatile(ALTERNATIVE_CB("movz %0, #0\n"
+ "movk %0, #0, lsl #16\n"
+ "movk %0, #0, lsl #32\n"
+ "movk %0, #0, lsl #48\n",
+ ARM64_ALWAYS_SYSTEM,
+ kvm_compute_final_ctr_el0)
+ : "=r" (ctr));
-static inline void __kvm_flush_dcache_pmd(pmd_t pmd)
-{
- if (!cpus_have_const_cap(ARM64_HAS_STAGE2_FWB)) {
- struct page *page = pmd_page(pmd);
- kvm_flush_dcache_to_poc(page_address(page), PMD_SIZE);
- }
+ iminline = SYS_FIELD_GET(CTR_EL0, IminLine, ctr) + 2;
+ return MAX_DVM_OPS << iminline;
}
-static inline void __kvm_flush_dcache_pud(pud_t pud)
+static inline void __invalidate_icache_guest_page(void *va, size_t size)
{
- if (!cpus_have_const_cap(ARM64_HAS_STAGE2_FWB)) {
- struct page *page = pud_page(pud);
- kvm_flush_dcache_to_poc(page_address(page), PUD_SIZE);
- }
+ /*
+ * Blow the whole I-cache if it is aliasing (i.e. VIPT) or the
+ * invalidation range exceeds our arbitrary limit on invadations by
+ * cache line.
+ */
+ if (icache_is_aliasing() || size > __invalidate_icache_max_range())
+ icache_inval_all_pou();
+ else
+ icache_inval_pou((unsigned long)va, (unsigned long)va + size);
}
-#define kvm_virt_to_phys(x) __pa_symbol(x)
-
void kvm_set_way_flush(struct kvm_vcpu *vcpu);
void kvm_toggle_cache(struct kvm_vcpu *vcpu, bool was_enabled);
-static inline bool __kvm_cpu_uses_extended_idmap(void)
-{
- return __cpu_uses_extended_idmap_level();
-}
-
-static inline unsigned long __kvm_idmap_ptrs_per_pgd(void)
-{
- return idmap_ptrs_per_pgd;
-}
-
-/*
- * Can't use pgd_populate here, because the extended idmap adds an extra level
- * above CONFIG_PGTABLE_LEVELS (which is 2 or 3 if we're using the extended
- * idmap), and pgd_populate is only available if CONFIG_PGTABLE_LEVELS = 4.
- */
-static inline void __kvm_extend_hypmap(pgd_t *boot_hyp_pgd,
- pgd_t *hyp_pgd,
- pgd_t *merged_hyp_pgd,
- unsigned long hyp_idmap_start)
-{
- int idmap_idx;
- u64 pgd_addr;
-
- /*
- * Use the first entry to access the HYP mappings. It is
- * guaranteed to be free, otherwise we wouldn't use an
- * extended idmap.
- */
- VM_BUG_ON(pgd_val(merged_hyp_pgd[0]));
- pgd_addr = __phys_to_pgd_val(__pa(hyp_pgd));
- merged_hyp_pgd[0] = __pgd(pgd_addr | PMD_TYPE_TABLE);
-
- /*
- * Create another extended level entry that points to the boot HYP map,
- * which contains an ID mapping of the HYP init code. We essentially
- * merge the boot and runtime HYP maps by doing so, but they don't
- * overlap anyway, so this is fine.
- */
- idmap_idx = hyp_idmap_start >> VA_BITS;
- VM_BUG_ON(pgd_val(merged_hyp_pgd[idmap_idx]));
- pgd_addr = __phys_to_pgd_val(__pa(boot_hyp_pgd));
- merged_hyp_pgd[idmap_idx] = __pgd(pgd_addr | PMD_TYPE_TABLE);
-}
-
static inline unsigned int kvm_get_vmid_bits(void)
{
int reg = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1);
- return (cpuid_feature_extract_unsigned_field(reg, ID_AA64MMFR1_VMIDBITS_SHIFT) == 2) ? 16 : 8;
+ return get_vmid_bits(reg);
}
/*
@@ -446,163 +291,46 @@ static inline int kvm_write_guest_lock(struct kvm *kvm, gpa_t gpa,
return ret;
}
-#ifdef CONFIG_KVM_INDIRECT_VECTORS
-/*
- * EL2 vectors can be mapped and rerouted in a number of ways,
- * depending on the kernel configuration and CPU present:
- *
- * - If the CPU has the ARM64_HARDEN_BRANCH_PREDICTOR cap, the
- * hardening sequence is placed in one of the vector slots, which is
- * executed before jumping to the real vectors.
- *
- * - If the CPU has both the ARM64_HARDEN_EL2_VECTORS cap and the
- * ARM64_HARDEN_BRANCH_PREDICTOR cap, the slot containing the
- * hardening sequence is mapped next to the idmap page, and executed
- * before jumping to the real vectors.
- *
- * - If the CPU only has the ARM64_HARDEN_EL2_VECTORS cap, then an
- * empty slot is selected, mapped next to the idmap page, and
- * executed before jumping to the real vectors.
- *
- * Note that ARM64_HARDEN_EL2_VECTORS is somewhat incompatible with
- * VHE, as we don't have hypervisor-specific mappings. If the system
- * is VHE and yet selects this capability, it will be ignored.
- */
-#include <asm/mmu.h>
-
-extern void *__kvm_bp_vect_base;
-extern int __kvm_harden_el2_vector_slot;
-
-static inline void *kvm_get_hyp_vector(void)
-{
- struct bp_hardening_data *data = arm64_get_bp_hardening_data();
- void *vect = kern_hyp_va(kvm_ksym_ref(__kvm_hyp_vector));
- int slot = -1;
-
- if (cpus_have_const_cap(ARM64_HARDEN_BRANCH_PREDICTOR) && data->fn) {
- vect = kern_hyp_va(kvm_ksym_ref(__bp_harden_hyp_vecs_start));
- slot = data->hyp_vectors_slot;
- }
-
- if (this_cpu_has_cap(ARM64_HARDEN_EL2_VECTORS) && !has_vhe()) {
- vect = __kvm_bp_vect_base;
- if (slot == -1)
- slot = __kvm_harden_el2_vector_slot;
- }
-
- if (slot != -1)
- vect += slot * SZ_2K;
-
- return vect;
-}
-
-/* This is only called on a !VHE system */
-static inline int kvm_map_vectors(void)
-{
- /*
- * HBP = ARM64_HARDEN_BRANCH_PREDICTOR
- * HEL2 = ARM64_HARDEN_EL2_VECTORS
- *
- * !HBP + !HEL2 -> use direct vectors
- * HBP + !HEL2 -> use hardened vectors in place
- * !HBP + HEL2 -> allocate one vector slot and use exec mapping
- * HBP + HEL2 -> use hardened vertors and use exec mapping
- */
- if (cpus_have_const_cap(ARM64_HARDEN_BRANCH_PREDICTOR)) {
- __kvm_bp_vect_base = kvm_ksym_ref(__bp_harden_hyp_vecs_start);
- __kvm_bp_vect_base = kern_hyp_va(__kvm_bp_vect_base);
- }
-
- if (cpus_have_const_cap(ARM64_HARDEN_EL2_VECTORS)) {
- phys_addr_t vect_pa = __pa_symbol(__bp_harden_hyp_vecs_start);
- unsigned long size = (__bp_harden_hyp_vecs_end -
- __bp_harden_hyp_vecs_start);
-
- /*
- * Always allocate a spare vector slot, as we don't
- * know yet which CPUs have a BP hardening slot that
- * we can reuse.
- */
- __kvm_harden_el2_vector_slot = atomic_inc_return(&arm64_el2_vector_last_slot);
- BUG_ON(__kvm_harden_el2_vector_slot >= BP_HARDEN_EL2_SLOTS);
- return create_hyp_exec_mappings(vect_pa, size,
- &__kvm_bp_vect_base);
- }
-
- return 0;
-}
-#else
-static inline void *kvm_get_hyp_vector(void)
-{
- return kern_hyp_va(kvm_ksym_ref(__kvm_hyp_vector));
-}
-
-static inline int kvm_map_vectors(void)
-{
- return 0;
-}
-#endif
-
-#ifdef CONFIG_ARM64_SSBD
-DECLARE_PER_CPU_READ_MOSTLY(u64, arm64_ssbd_callback_required);
-
-static inline int hyp_map_aux_data(void)
-{
- int cpu, err;
-
- for_each_possible_cpu(cpu) {
- u64 *ptr;
-
- ptr = per_cpu_ptr(&arm64_ssbd_callback_required, cpu);
- err = create_hyp_mappings(ptr, ptr + 1, PAGE_HYP);
- if (err)
- return err;
- }
- return 0;
-}
-#else
-static inline int hyp_map_aux_data(void)
-{
- return 0;
-}
-#endif
-
#define kvm_phys_to_vttbr(addr) phys_to_ttbr(addr)
/*
- * Get the magic number 'x' for VTTBR:BADDR of this KVM instance.
- * With v8.2 LVA extensions, 'x' should be a minimum of 6 with
- * 52bit IPS.
+ * When this is (directly or indirectly) used on the TLB invalidation
+ * path, we rely on a previously issued DSB so that page table updates
+ * and VMID reads are correctly ordered.
*/
-static inline int arm64_vttbr_x(u32 ipa_shift, u32 levels)
+static __always_inline u64 kvm_get_vttbr(struct kvm_s2_mmu *mmu)
{
- int x = ARM64_VTTBR_X(ipa_shift, levels);
+ struct kvm_vmid *vmid = &mmu->vmid;
+ u64 vmid_field, baddr;
+ u64 cnp = system_supports_cnp() ? VTTBR_CNP_BIT : 0;
- return (IS_ENABLED(CONFIG_ARM64_PA_BITS_52) && x < 6) ? 6 : x;
+ baddr = mmu->pgd_phys;
+ vmid_field = atomic64_read(&vmid->id) << VTTBR_VMID_SHIFT;
+ vmid_field &= VTTBR_VMID_MASK(kvm_arm_vmid_bits);
+ return kvm_phys_to_vttbr(baddr) | vmid_field | cnp;
}
-static inline u64 vttbr_baddr_mask(u32 ipa_shift, u32 levels)
+/*
+ * Must be called from hyp code running at EL2 with an updated VTTBR
+ * and interrupts disabled.
+ */
+static __always_inline void __load_stage2(struct kvm_s2_mmu *mmu,
+ struct kvm_arch *arch)
{
- unsigned int x = arm64_vttbr_x(ipa_shift, levels);
+ write_sysreg(mmu->vtcr, vtcr_el2);
+ write_sysreg(kvm_get_vttbr(mmu), vttbr_el2);
- return GENMASK_ULL(PHYS_MASK_SHIFT - 1, x);
-}
-
-static inline u64 kvm_vttbr_baddr_mask(struct kvm *kvm)
-{
- return vttbr_baddr_mask(kvm_phys_shift(kvm), kvm_stage2_levels(kvm));
+ /*
+ * ARM errata 1165522 and 1530923 require the actual execution of the
+ * above before we can switch to the EL1/EL0 translation regime used by
+ * the guest.
+ */
+ asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_SPECULATIVE_AT));
}
-static __always_inline u64 kvm_get_vttbr(struct kvm *kvm)
+static inline struct kvm *kvm_s2_mmu_to_kvm(struct kvm_s2_mmu *mmu)
{
- struct kvm_vmid *vmid = &kvm->arch.vmid;
- u64 vmid_field, baddr;
- u64 cnp = system_supports_cnp() ? VTTBR_CNP_BIT : 0;
-
- baddr = kvm->arch.pgd_phys;
- vmid_field = (u64)vmid->vmid << VTTBR_VMID_SHIFT;
- return kvm_phys_to_vttbr(baddr) | vmid_field | cnp;
+ return container_of(mmu->arch, struct kvm, arch);
}
-
#endif /* __ASSEMBLY__ */
#endif /* __ARM64_KVM_MMU_H__ */
diff --git a/arch/arm64/include/asm/kvm_mte.h b/arch/arm64/include/asm/kvm_mte.h
new file mode 100644
index 000000000000..de002636eb1f
--- /dev/null
+++ b/arch/arm64/include/asm/kvm_mte.h
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2020-2021 ARM Ltd.
+ */
+#ifndef __ASM_KVM_MTE_H
+#define __ASM_KVM_MTE_H
+
+#ifdef __ASSEMBLY__
+
+#include <asm/sysreg.h>
+
+#ifdef CONFIG_ARM64_MTE
+
+.macro mte_switch_to_guest g_ctxt, h_ctxt, reg1
+alternative_if_not ARM64_MTE
+ b .L__skip_switch\@
+alternative_else_nop_endif
+ mrs \reg1, hcr_el2
+ tbz \reg1, #(HCR_ATA_SHIFT), .L__skip_switch\@
+
+ mrs_s \reg1, SYS_RGSR_EL1
+ str \reg1, [\h_ctxt, #CPU_RGSR_EL1]
+ mrs_s \reg1, SYS_GCR_EL1
+ str \reg1, [\h_ctxt, #CPU_GCR_EL1]
+
+ ldr \reg1, [\g_ctxt, #CPU_RGSR_EL1]
+ msr_s SYS_RGSR_EL1, \reg1
+ ldr \reg1, [\g_ctxt, #CPU_GCR_EL1]
+ msr_s SYS_GCR_EL1, \reg1
+
+.L__skip_switch\@:
+.endm
+
+.macro mte_switch_to_hyp g_ctxt, h_ctxt, reg1
+alternative_if_not ARM64_MTE
+ b .L__skip_switch\@
+alternative_else_nop_endif
+ mrs \reg1, hcr_el2
+ tbz \reg1, #(HCR_ATA_SHIFT), .L__skip_switch\@
+
+ mrs_s \reg1, SYS_RGSR_EL1
+ str \reg1, [\g_ctxt, #CPU_RGSR_EL1]
+ mrs_s \reg1, SYS_GCR_EL1
+ str \reg1, [\g_ctxt, #CPU_GCR_EL1]
+
+ ldr \reg1, [\h_ctxt, #CPU_RGSR_EL1]
+ msr_s SYS_RGSR_EL1, \reg1
+ ldr \reg1, [\h_ctxt, #CPU_GCR_EL1]
+ msr_s SYS_GCR_EL1, \reg1
+
+ isb
+
+.L__skip_switch\@:
+.endm
+
+#else /* !CONFIG_ARM64_MTE */
+
+.macro mte_switch_to_guest g_ctxt, h_ctxt, reg1
+.endm
+
+.macro mte_switch_to_hyp g_ctxt, h_ctxt, reg1
+.endm
+
+#endif /* CONFIG_ARM64_MTE */
+#endif /* __ASSEMBLY__ */
+#endif /* __ASM_KVM_MTE_H */
diff --git a/arch/arm64/include/asm/kvm_nested.h b/arch/arm64/include/asm/kvm_nested.h
new file mode 100644
index 000000000000..4882905357f4
--- /dev/null
+++ b/arch/arm64/include/asm/kvm_nested.h
@@ -0,0 +1,67 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ARM64_KVM_NESTED_H
+#define __ARM64_KVM_NESTED_H
+
+#include <linux/bitfield.h>
+#include <linux/kvm_host.h>
+#include <asm/kvm_emulate.h>
+
+static inline bool vcpu_has_nv(const struct kvm_vcpu *vcpu)
+{
+ return (!__is_defined(__KVM_NVHE_HYPERVISOR__) &&
+ cpus_have_final_cap(ARM64_HAS_NESTED_VIRT) &&
+ vcpu_has_feature(vcpu, KVM_ARM_VCPU_HAS_EL2));
+}
+
+/* Translation helpers from non-VHE EL2 to EL1 */
+static inline u64 tcr_el2_ps_to_tcr_el1_ips(u64 tcr_el2)
+{
+ return (u64)FIELD_GET(TCR_EL2_PS_MASK, tcr_el2) << TCR_IPS_SHIFT;
+}
+
+static inline u64 translate_tcr_el2_to_tcr_el1(u64 tcr)
+{
+ return TCR_EPD1_MASK | /* disable TTBR1_EL1 */
+ ((tcr & TCR_EL2_TBI) ? TCR_TBI0 : 0) |
+ tcr_el2_ps_to_tcr_el1_ips(tcr) |
+ (tcr & TCR_EL2_TG0_MASK) |
+ (tcr & TCR_EL2_ORGN0_MASK) |
+ (tcr & TCR_EL2_IRGN0_MASK) |
+ (tcr & TCR_EL2_T0SZ_MASK);
+}
+
+static inline u64 translate_cptr_el2_to_cpacr_el1(u64 cptr_el2)
+{
+ u64 cpacr_el1 = 0;
+
+ if (cptr_el2 & CPTR_EL2_TTA)
+ cpacr_el1 |= CPACR_ELx_TTA;
+ if (!(cptr_el2 & CPTR_EL2_TFP))
+ cpacr_el1 |= CPACR_ELx_FPEN;
+ if (!(cptr_el2 & CPTR_EL2_TZ))
+ cpacr_el1 |= CPACR_ELx_ZEN;
+
+ return cpacr_el1;
+}
+
+static inline u64 translate_sctlr_el2_to_sctlr_el1(u64 val)
+{
+ /* Only preserve the minimal set of bits we support */
+ val &= (SCTLR_ELx_M | SCTLR_ELx_A | SCTLR_ELx_C | SCTLR_ELx_SA |
+ SCTLR_ELx_I | SCTLR_ELx_IESB | SCTLR_ELx_WXN | SCTLR_ELx_EE);
+ val |= SCTLR_EL1_RES1;
+
+ return val;
+}
+
+static inline u64 translate_ttbr0_el2_to_ttbr0_el1(u64 ttbr0)
+{
+ /* Clear the ASID field */
+ return ttbr0 & ~GENMASK_ULL(63, 48);
+}
+
+extern bool __check_nv_sr_forward(struct kvm_vcpu *vcpu);
+
+int kvm_init_nv_sysregs(struct kvm *kvm);
+
+#endif /* __ARM64_KVM_NESTED_H */
diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h
new file mode 100644
index 000000000000..cfdf40f734b1
--- /dev/null
+++ b/arch/arm64/include/asm/kvm_pgtable.h
@@ -0,0 +1,787 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2020 Google LLC
+ * Author: Will Deacon <will@kernel.org>
+ */
+
+#ifndef __ARM64_KVM_PGTABLE_H__
+#define __ARM64_KVM_PGTABLE_H__
+
+#include <linux/bits.h>
+#include <linux/kvm_host.h>
+#include <linux/types.h>
+
+#define KVM_PGTABLE_FIRST_LEVEL -1
+#define KVM_PGTABLE_LAST_LEVEL 3
+
+/*
+ * The largest supported block sizes for KVM (no 52-bit PA support):
+ * - 4K (level 1): 1GB
+ * - 16K (level 2): 32MB
+ * - 64K (level 2): 512MB
+ */
+#ifdef CONFIG_ARM64_4K_PAGES
+#define KVM_PGTABLE_MIN_BLOCK_LEVEL 1
+#else
+#define KVM_PGTABLE_MIN_BLOCK_LEVEL 2
+#endif
+
+#define kvm_lpa2_is_enabled() system_supports_lpa2()
+
+static inline u64 kvm_get_parange_max(void)
+{
+ if (kvm_lpa2_is_enabled() ||
+ (IS_ENABLED(CONFIG_ARM64_PA_BITS_52) && PAGE_SHIFT == 16))
+ return ID_AA64MMFR0_EL1_PARANGE_52;
+ else
+ return ID_AA64MMFR0_EL1_PARANGE_48;
+}
+
+static inline u64 kvm_get_parange(u64 mmfr0)
+{
+ u64 parange_max = kvm_get_parange_max();
+ u64 parange = cpuid_feature_extract_unsigned_field(mmfr0,
+ ID_AA64MMFR0_EL1_PARANGE_SHIFT);
+ if (parange > parange_max)
+ parange = parange_max;
+
+ return parange;
+}
+
+typedef u64 kvm_pte_t;
+
+#define KVM_PTE_VALID BIT(0)
+
+#define KVM_PTE_ADDR_MASK GENMASK(47, PAGE_SHIFT)
+#define KVM_PTE_ADDR_51_48 GENMASK(15, 12)
+#define KVM_PTE_ADDR_MASK_LPA2 GENMASK(49, PAGE_SHIFT)
+#define KVM_PTE_ADDR_51_50_LPA2 GENMASK(9, 8)
+
+#define KVM_PHYS_INVALID (-1ULL)
+
+static inline bool kvm_pte_valid(kvm_pte_t pte)
+{
+ return pte & KVM_PTE_VALID;
+}
+
+static inline u64 kvm_pte_to_phys(kvm_pte_t pte)
+{
+ u64 pa;
+
+ if (kvm_lpa2_is_enabled()) {
+ pa = pte & KVM_PTE_ADDR_MASK_LPA2;
+ pa |= FIELD_GET(KVM_PTE_ADDR_51_50_LPA2, pte) << 50;
+ } else {
+ pa = pte & KVM_PTE_ADDR_MASK;
+ if (PAGE_SHIFT == 16)
+ pa |= FIELD_GET(KVM_PTE_ADDR_51_48, pte) << 48;
+ }
+
+ return pa;
+}
+
+static inline kvm_pte_t kvm_phys_to_pte(u64 pa)
+{
+ kvm_pte_t pte;
+
+ if (kvm_lpa2_is_enabled()) {
+ pte = pa & KVM_PTE_ADDR_MASK_LPA2;
+ pa &= GENMASK(51, 50);
+ pte |= FIELD_PREP(KVM_PTE_ADDR_51_50_LPA2, pa >> 50);
+ } else {
+ pte = pa & KVM_PTE_ADDR_MASK;
+ if (PAGE_SHIFT == 16) {
+ pa &= GENMASK(51, 48);
+ pte |= FIELD_PREP(KVM_PTE_ADDR_51_48, pa >> 48);
+ }
+ }
+
+ return pte;
+}
+
+static inline kvm_pfn_t kvm_pte_to_pfn(kvm_pte_t pte)
+{
+ return __phys_to_pfn(kvm_pte_to_phys(pte));
+}
+
+static inline u64 kvm_granule_shift(s8 level)
+{
+ /* Assumes KVM_PGTABLE_LAST_LEVEL is 3 */
+ return ARM64_HW_PGTABLE_LEVEL_SHIFT(level);
+}
+
+static inline u64 kvm_granule_size(s8 level)
+{
+ return BIT(kvm_granule_shift(level));
+}
+
+static inline bool kvm_level_supports_block_mapping(s8 level)
+{
+ return level >= KVM_PGTABLE_MIN_BLOCK_LEVEL;
+}
+
+static inline u32 kvm_supported_block_sizes(void)
+{
+ s8 level = KVM_PGTABLE_MIN_BLOCK_LEVEL;
+ u32 r = 0;
+
+ for (; level <= KVM_PGTABLE_LAST_LEVEL; level++)
+ r |= BIT(kvm_granule_shift(level));
+
+ return r;
+}
+
+static inline bool kvm_is_block_size_supported(u64 size)
+{
+ bool is_power_of_two = IS_ALIGNED(size, size);
+
+ return is_power_of_two && (size & kvm_supported_block_sizes());
+}
+
+/**
+ * struct kvm_pgtable_mm_ops - Memory management callbacks.
+ * @zalloc_page: Allocate a single zeroed memory page.
+ * The @arg parameter can be used by the walker
+ * to pass a memcache. The initial refcount of
+ * the page is 1.
+ * @zalloc_pages_exact: Allocate an exact number of zeroed memory pages.
+ * The @size parameter is in bytes, and is rounded
+ * up to the next page boundary. The resulting
+ * allocation is physically contiguous.
+ * @free_pages_exact: Free an exact number of memory pages previously
+ * allocated by zalloc_pages_exact.
+ * @free_unlinked_table: Free an unlinked paging structure by unlinking and
+ * dropping references.
+ * @get_page: Increment the refcount on a page.
+ * @put_page: Decrement the refcount on a page. When the
+ * refcount reaches 0 the page is automatically
+ * freed.
+ * @page_count: Return the refcount of a page.
+ * @phys_to_virt: Convert a physical address into a virtual
+ * address mapped in the current context.
+ * @virt_to_phys: Convert a virtual address mapped in the current
+ * context into a physical address.
+ * @dcache_clean_inval_poc: Clean and invalidate the data cache to the PoC
+ * for the specified memory address range.
+ * @icache_inval_pou: Invalidate the instruction cache to the PoU
+ * for the specified memory address range.
+ */
+struct kvm_pgtable_mm_ops {
+ void* (*zalloc_page)(void *arg);
+ void* (*zalloc_pages_exact)(size_t size);
+ void (*free_pages_exact)(void *addr, size_t size);
+ void (*free_unlinked_table)(void *addr, s8 level);
+ void (*get_page)(void *addr);
+ void (*put_page)(void *addr);
+ int (*page_count)(void *addr);
+ void* (*phys_to_virt)(phys_addr_t phys);
+ phys_addr_t (*virt_to_phys)(void *addr);
+ void (*dcache_clean_inval_poc)(void *addr, size_t size);
+ void (*icache_inval_pou)(void *addr, size_t size);
+};
+
+/**
+ * enum kvm_pgtable_stage2_flags - Stage-2 page-table flags.
+ * @KVM_PGTABLE_S2_NOFWB: Don't enforce Normal-WB even if the CPUs have
+ * ARM64_HAS_STAGE2_FWB.
+ * @KVM_PGTABLE_S2_IDMAP: Only use identity mappings.
+ */
+enum kvm_pgtable_stage2_flags {
+ KVM_PGTABLE_S2_NOFWB = BIT(0),
+ KVM_PGTABLE_S2_IDMAP = BIT(1),
+};
+
+/**
+ * enum kvm_pgtable_prot - Page-table permissions and attributes.
+ * @KVM_PGTABLE_PROT_X: Execute permission.
+ * @KVM_PGTABLE_PROT_W: Write permission.
+ * @KVM_PGTABLE_PROT_R: Read permission.
+ * @KVM_PGTABLE_PROT_DEVICE: Device attributes.
+ * @KVM_PGTABLE_PROT_SW0: Software bit 0.
+ * @KVM_PGTABLE_PROT_SW1: Software bit 1.
+ * @KVM_PGTABLE_PROT_SW2: Software bit 2.
+ * @KVM_PGTABLE_PROT_SW3: Software bit 3.
+ */
+enum kvm_pgtable_prot {
+ KVM_PGTABLE_PROT_X = BIT(0),
+ KVM_PGTABLE_PROT_W = BIT(1),
+ KVM_PGTABLE_PROT_R = BIT(2),
+
+ KVM_PGTABLE_PROT_DEVICE = BIT(3),
+
+ KVM_PGTABLE_PROT_SW0 = BIT(55),
+ KVM_PGTABLE_PROT_SW1 = BIT(56),
+ KVM_PGTABLE_PROT_SW2 = BIT(57),
+ KVM_PGTABLE_PROT_SW3 = BIT(58),
+};
+
+#define KVM_PGTABLE_PROT_RW (KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_W)
+#define KVM_PGTABLE_PROT_RWX (KVM_PGTABLE_PROT_RW | KVM_PGTABLE_PROT_X)
+
+#define PKVM_HOST_MEM_PROT KVM_PGTABLE_PROT_RWX
+#define PKVM_HOST_MMIO_PROT KVM_PGTABLE_PROT_RW
+
+#define PAGE_HYP KVM_PGTABLE_PROT_RW
+#define PAGE_HYP_EXEC (KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_X)
+#define PAGE_HYP_RO (KVM_PGTABLE_PROT_R)
+#define PAGE_HYP_DEVICE (PAGE_HYP | KVM_PGTABLE_PROT_DEVICE)
+
+typedef bool (*kvm_pgtable_force_pte_cb_t)(u64 addr, u64 end,
+ enum kvm_pgtable_prot prot);
+
+/**
+ * enum kvm_pgtable_walk_flags - Flags to control a depth-first page-table walk.
+ * @KVM_PGTABLE_WALK_LEAF: Visit leaf entries, including invalid
+ * entries.
+ * @KVM_PGTABLE_WALK_TABLE_PRE: Visit table entries before their
+ * children.
+ * @KVM_PGTABLE_WALK_TABLE_POST: Visit table entries after their
+ * children.
+ * @KVM_PGTABLE_WALK_SHARED: Indicates the page-tables may be shared
+ * with other software walkers.
+ * @KVM_PGTABLE_WALK_HANDLE_FAULT: Indicates the page-table walk was
+ * invoked from a fault handler.
+ * @KVM_PGTABLE_WALK_SKIP_BBM_TLBI: Visit and update table entries
+ * without Break-before-make's
+ * TLB invalidation.
+ * @KVM_PGTABLE_WALK_SKIP_CMO: Visit and update table entries
+ * without Cache maintenance
+ * operations required.
+ */
+enum kvm_pgtable_walk_flags {
+ KVM_PGTABLE_WALK_LEAF = BIT(0),
+ KVM_PGTABLE_WALK_TABLE_PRE = BIT(1),
+ KVM_PGTABLE_WALK_TABLE_POST = BIT(2),
+ KVM_PGTABLE_WALK_SHARED = BIT(3),
+ KVM_PGTABLE_WALK_HANDLE_FAULT = BIT(4),
+ KVM_PGTABLE_WALK_SKIP_BBM_TLBI = BIT(5),
+ KVM_PGTABLE_WALK_SKIP_CMO = BIT(6),
+};
+
+struct kvm_pgtable_visit_ctx {
+ kvm_pte_t *ptep;
+ kvm_pte_t old;
+ void *arg;
+ struct kvm_pgtable_mm_ops *mm_ops;
+ u64 start;
+ u64 addr;
+ u64 end;
+ s8 level;
+ enum kvm_pgtable_walk_flags flags;
+};
+
+typedef int (*kvm_pgtable_visitor_fn_t)(const struct kvm_pgtable_visit_ctx *ctx,
+ enum kvm_pgtable_walk_flags visit);
+
+static inline bool kvm_pgtable_walk_shared(const struct kvm_pgtable_visit_ctx *ctx)
+{
+ return ctx->flags & KVM_PGTABLE_WALK_SHARED;
+}
+
+/**
+ * struct kvm_pgtable_walker - Hook into a page-table walk.
+ * @cb: Callback function to invoke during the walk.
+ * @arg: Argument passed to the callback function.
+ * @flags: Bitwise-OR of flags to identify the entry types on which to
+ * invoke the callback function.
+ */
+struct kvm_pgtable_walker {
+ const kvm_pgtable_visitor_fn_t cb;
+ void * const arg;
+ const enum kvm_pgtable_walk_flags flags;
+};
+
+/*
+ * RCU cannot be used in a non-kernel context such as the hyp. As such, page
+ * table walkers used in hyp do not call into RCU and instead use other
+ * synchronization mechanisms (such as a spinlock).
+ */
+#if defined(__KVM_NVHE_HYPERVISOR__) || defined(__KVM_VHE_HYPERVISOR__)
+
+typedef kvm_pte_t *kvm_pteref_t;
+
+static inline kvm_pte_t *kvm_dereference_pteref(struct kvm_pgtable_walker *walker,
+ kvm_pteref_t pteref)
+{
+ return pteref;
+}
+
+static inline int kvm_pgtable_walk_begin(struct kvm_pgtable_walker *walker)
+{
+ /*
+ * Due to the lack of RCU (or a similar protection scheme), only
+ * non-shared table walkers are allowed in the hypervisor.
+ */
+ if (walker->flags & KVM_PGTABLE_WALK_SHARED)
+ return -EPERM;
+
+ return 0;
+}
+
+static inline void kvm_pgtable_walk_end(struct kvm_pgtable_walker *walker) {}
+
+static inline bool kvm_pgtable_walk_lock_held(void)
+{
+ return true;
+}
+
+#else
+
+typedef kvm_pte_t __rcu *kvm_pteref_t;
+
+static inline kvm_pte_t *kvm_dereference_pteref(struct kvm_pgtable_walker *walker,
+ kvm_pteref_t pteref)
+{
+ return rcu_dereference_check(pteref, !(walker->flags & KVM_PGTABLE_WALK_SHARED));
+}
+
+static inline int kvm_pgtable_walk_begin(struct kvm_pgtable_walker *walker)
+{
+ if (walker->flags & KVM_PGTABLE_WALK_SHARED)
+ rcu_read_lock();
+
+ return 0;
+}
+
+static inline void kvm_pgtable_walk_end(struct kvm_pgtable_walker *walker)
+{
+ if (walker->flags & KVM_PGTABLE_WALK_SHARED)
+ rcu_read_unlock();
+}
+
+static inline bool kvm_pgtable_walk_lock_held(void)
+{
+ return rcu_read_lock_held();
+}
+
+#endif
+
+/**
+ * struct kvm_pgtable - KVM page-table.
+ * @ia_bits: Maximum input address size, in bits.
+ * @start_level: Level at which the page-table walk starts.
+ * @pgd: Pointer to the first top-level entry of the page-table.
+ * @mm_ops: Memory management callbacks.
+ * @mmu: Stage-2 KVM MMU struct. Unused for stage-1 page-tables.
+ * @flags: Stage-2 page-table flags.
+ * @force_pte_cb: Function that returns true if page level mappings must
+ * be used instead of block mappings.
+ */
+struct kvm_pgtable {
+ u32 ia_bits;
+ s8 start_level;
+ kvm_pteref_t pgd;
+ struct kvm_pgtable_mm_ops *mm_ops;
+
+ /* Stage-2 only */
+ struct kvm_s2_mmu *mmu;
+ enum kvm_pgtable_stage2_flags flags;
+ kvm_pgtable_force_pte_cb_t force_pte_cb;
+};
+
+/**
+ * kvm_pgtable_hyp_init() - Initialise a hypervisor stage-1 page-table.
+ * @pgt: Uninitialised page-table structure to initialise.
+ * @va_bits: Maximum virtual address bits.
+ * @mm_ops: Memory management callbacks.
+ *
+ * Return: 0 on success, negative error code on failure.
+ */
+int kvm_pgtable_hyp_init(struct kvm_pgtable *pgt, u32 va_bits,
+ struct kvm_pgtable_mm_ops *mm_ops);
+
+/**
+ * kvm_pgtable_hyp_destroy() - Destroy an unused hypervisor stage-1 page-table.
+ * @pgt: Page-table structure initialised by kvm_pgtable_hyp_init().
+ *
+ * The page-table is assumed to be unreachable by any hardware walkers prior
+ * to freeing and therefore no TLB invalidation is performed.
+ */
+void kvm_pgtable_hyp_destroy(struct kvm_pgtable *pgt);
+
+/**
+ * kvm_pgtable_hyp_map() - Install a mapping in a hypervisor stage-1 page-table.
+ * @pgt: Page-table structure initialised by kvm_pgtable_hyp_init().
+ * @addr: Virtual address at which to place the mapping.
+ * @size: Size of the mapping.
+ * @phys: Physical address of the memory to map.
+ * @prot: Permissions and attributes for the mapping.
+ *
+ * The offset of @addr within a page is ignored, @size is rounded-up to
+ * the next page boundary and @phys is rounded-down to the previous page
+ * boundary.
+ *
+ * If device attributes are not explicitly requested in @prot, then the
+ * mapping will be normal, cacheable. Attempts to install a new mapping
+ * for a virtual address that is already mapped will be rejected with an
+ * error and a WARN().
+ *
+ * Return: 0 on success, negative error code on failure.
+ */
+int kvm_pgtable_hyp_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys,
+ enum kvm_pgtable_prot prot);
+
+/**
+ * kvm_pgtable_hyp_unmap() - Remove a mapping from a hypervisor stage-1 page-table.
+ * @pgt: Page-table structure initialised by kvm_pgtable_hyp_init().
+ * @addr: Virtual address from which to remove the mapping.
+ * @size: Size of the mapping.
+ *
+ * The offset of @addr within a page is ignored, @size is rounded-up to
+ * the next page boundary and @phys is rounded-down to the previous page
+ * boundary.
+ *
+ * TLB invalidation is performed for each page-table entry cleared during the
+ * unmapping operation and the reference count for the page-table page
+ * containing the cleared entry is decremented, with unreferenced pages being
+ * freed. The unmapping operation will stop early if it encounters either an
+ * invalid page-table entry or a valid block mapping which maps beyond the range
+ * being unmapped.
+ *
+ * Return: Number of bytes unmapped, which may be 0.
+ */
+u64 kvm_pgtable_hyp_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size);
+
+/**
+ * kvm_get_vtcr() - Helper to construct VTCR_EL2
+ * @mmfr0: Sanitized value of SYS_ID_AA64MMFR0_EL1 register.
+ * @mmfr1: Sanitized value of SYS_ID_AA64MMFR1_EL1 register.
+ * @phys_shfit: Value to set in VTCR_EL2.T0SZ.
+ *
+ * The VTCR value is common across all the physical CPUs on the system.
+ * We use system wide sanitised values to fill in different fields,
+ * except for Hardware Management of Access Flags. HA Flag is set
+ * unconditionally on all CPUs, as it is safe to run with or without
+ * the feature and the bit is RES0 on CPUs that don't support it.
+ *
+ * Return: VTCR_EL2 value
+ */
+u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift);
+
+/**
+ * kvm_pgtable_stage2_pgd_size() - Helper to compute size of a stage-2 PGD
+ * @vtcr: Content of the VTCR register.
+ *
+ * Return: the size (in bytes) of the stage-2 PGD
+ */
+size_t kvm_pgtable_stage2_pgd_size(u64 vtcr);
+
+/**
+ * __kvm_pgtable_stage2_init() - Initialise a guest stage-2 page-table.
+ * @pgt: Uninitialised page-table structure to initialise.
+ * @mmu: S2 MMU context for this S2 translation
+ * @mm_ops: Memory management callbacks.
+ * @flags: Stage-2 configuration flags.
+ * @force_pte_cb: Function that returns true if page level mappings must
+ * be used instead of block mappings.
+ *
+ * Return: 0 on success, negative error code on failure.
+ */
+int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu,
+ struct kvm_pgtable_mm_ops *mm_ops,
+ enum kvm_pgtable_stage2_flags flags,
+ kvm_pgtable_force_pte_cb_t force_pte_cb);
+
+#define kvm_pgtable_stage2_init(pgt, mmu, mm_ops) \
+ __kvm_pgtable_stage2_init(pgt, mmu, mm_ops, 0, NULL)
+
+/**
+ * kvm_pgtable_stage2_destroy() - Destroy an unused guest stage-2 page-table.
+ * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*().
+ *
+ * The page-table is assumed to be unreachable by any hardware walkers prior
+ * to freeing and therefore no TLB invalidation is performed.
+ */
+void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt);
+
+/**
+ * kvm_pgtable_stage2_free_unlinked() - Free an unlinked stage-2 paging structure.
+ * @mm_ops: Memory management callbacks.
+ * @pgtable: Unlinked stage-2 paging structure to be freed.
+ * @level: Level of the stage-2 paging structure to be freed.
+ *
+ * The page-table is assumed to be unreachable by any hardware walkers prior to
+ * freeing and therefore no TLB invalidation is performed.
+ */
+void kvm_pgtable_stage2_free_unlinked(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, s8 level);
+
+/**
+ * kvm_pgtable_stage2_create_unlinked() - Create an unlinked stage-2 paging structure.
+ * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*().
+ * @phys: Physical address of the memory to map.
+ * @level: Starting level of the stage-2 paging structure to be created.
+ * @prot: Permissions and attributes for the mapping.
+ * @mc: Cache of pre-allocated and zeroed memory from which to allocate
+ * page-table pages.
+ * @force_pte: Force mappings to PAGE_SIZE granularity.
+ *
+ * Returns an unlinked page-table tree. This new page-table tree is
+ * not reachable (i.e., it is unlinked) from the root pgd and it's
+ * therefore unreachableby the hardware page-table walker. No TLB
+ * invalidation or CMOs are performed.
+ *
+ * If device attributes are not explicitly requested in @prot, then the
+ * mapping will be normal, cacheable.
+ *
+ * Return: The fully populated (unlinked) stage-2 paging structure, or
+ * an ERR_PTR(error) on failure.
+ */
+kvm_pte_t *kvm_pgtable_stage2_create_unlinked(struct kvm_pgtable *pgt,
+ u64 phys, s8 level,
+ enum kvm_pgtable_prot prot,
+ void *mc, bool force_pte);
+
+/**
+ * kvm_pgtable_stage2_map() - Install a mapping in a guest stage-2 page-table.
+ * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*().
+ * @addr: Intermediate physical address at which to place the mapping.
+ * @size: Size of the mapping.
+ * @phys: Physical address of the memory to map.
+ * @prot: Permissions and attributes for the mapping.
+ * @mc: Cache of pre-allocated and zeroed memory from which to allocate
+ * page-table pages.
+ * @flags: Flags to control the page-table walk (ex. a shared walk)
+ *
+ * The offset of @addr within a page is ignored, @size is rounded-up to
+ * the next page boundary and @phys is rounded-down to the previous page
+ * boundary.
+ *
+ * If device attributes are not explicitly requested in @prot, then the
+ * mapping will be normal, cacheable.
+ *
+ * Note that the update of a valid leaf PTE in this function will be aborted,
+ * if it's trying to recreate the exact same mapping or only change the access
+ * permissions. Instead, the vCPU will exit one more time from guest if still
+ * needed and then go through the path of relaxing permissions.
+ *
+ * Note that this function will both coalesce existing table entries and split
+ * existing block mappings, relying on page-faults to fault back areas outside
+ * of the new mapping lazily.
+ *
+ * Return: 0 on success, negative error code on failure.
+ */
+int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size,
+ u64 phys, enum kvm_pgtable_prot prot,
+ void *mc, enum kvm_pgtable_walk_flags flags);
+
+/**
+ * kvm_pgtable_stage2_set_owner() - Unmap and annotate pages in the IPA space to
+ * track ownership.
+ * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*().
+ * @addr: Base intermediate physical address to annotate.
+ * @size: Size of the annotated range.
+ * @mc: Cache of pre-allocated and zeroed memory from which to allocate
+ * page-table pages.
+ * @owner_id: Unique identifier for the owner of the page.
+ *
+ * By default, all page-tables are owned by identifier 0. This function can be
+ * used to mark portions of the IPA space as owned by other entities. When a
+ * stage 2 is used with identity-mappings, these annotations allow to use the
+ * page-table data structure as a simple rmap.
+ *
+ * Return: 0 on success, negative error code on failure.
+ */
+int kvm_pgtable_stage2_set_owner(struct kvm_pgtable *pgt, u64 addr, u64 size,
+ void *mc, u8 owner_id);
+
+/**
+ * kvm_pgtable_stage2_unmap() - Remove a mapping from a guest stage-2 page-table.
+ * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*().
+ * @addr: Intermediate physical address from which to remove the mapping.
+ * @size: Size of the mapping.
+ *
+ * The offset of @addr within a page is ignored and @size is rounded-up to
+ * the next page boundary.
+ *
+ * TLB invalidation is performed for each page-table entry cleared during the
+ * unmapping operation and the reference count for the page-table page
+ * containing the cleared entry is decremented, with unreferenced pages being
+ * freed. Unmapping a cacheable page will ensure that it is clean to the PoC if
+ * FWB is not supported by the CPU.
+ *
+ * Return: 0 on success, negative error code on failure.
+ */
+int kvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size);
+
+/**
+ * kvm_pgtable_stage2_wrprotect() - Write-protect guest stage-2 address range
+ * without TLB invalidation.
+ * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*().
+ * @addr: Intermediate physical address from which to write-protect,
+ * @size: Size of the range.
+ *
+ * The offset of @addr within a page is ignored and @size is rounded-up to
+ * the next page boundary.
+ *
+ * Note that it is the caller's responsibility to invalidate the TLB after
+ * calling this function to ensure that the updated permissions are visible
+ * to the CPUs.
+ *
+ * Return: 0 on success, negative error code on failure.
+ */
+int kvm_pgtable_stage2_wrprotect(struct kvm_pgtable *pgt, u64 addr, u64 size);
+
+/**
+ * kvm_pgtable_stage2_mkyoung() - Set the access flag in a page-table entry.
+ * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*().
+ * @addr: Intermediate physical address to identify the page-table entry.
+ *
+ * The offset of @addr within a page is ignored.
+ *
+ * If there is a valid, leaf page-table entry used to translate @addr, then
+ * set the access flag in that entry.
+ *
+ * Return: The old page-table entry prior to setting the flag, 0 on failure.
+ */
+kvm_pte_t kvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr);
+
+/**
+ * kvm_pgtable_stage2_test_clear_young() - Test and optionally clear the access
+ * flag in a page-table entry.
+ * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*().
+ * @addr: Intermediate physical address to identify the page-table entry.
+ * @size: Size of the address range to visit.
+ * @mkold: True if the access flag should be cleared.
+ *
+ * The offset of @addr within a page is ignored.
+ *
+ * Tests and conditionally clears the access flag for every valid, leaf
+ * page-table entry used to translate the range [@addr, @addr + @size).
+ *
+ * Note that it is the caller's responsibility to invalidate the TLB after
+ * calling this function to ensure that the updated permissions are visible
+ * to the CPUs.
+ *
+ * Return: True if any of the visited PTEs had the access flag set.
+ */
+bool kvm_pgtable_stage2_test_clear_young(struct kvm_pgtable *pgt, u64 addr,
+ u64 size, bool mkold);
+
+/**
+ * kvm_pgtable_stage2_relax_perms() - Relax the permissions enforced by a
+ * page-table entry.
+ * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*().
+ * @addr: Intermediate physical address to identify the page-table entry.
+ * @prot: Additional permissions to grant for the mapping.
+ *
+ * The offset of @addr within a page is ignored.
+ *
+ * If there is a valid, leaf page-table entry used to translate @addr, then
+ * relax the permissions in that entry according to the read, write and
+ * execute permissions specified by @prot. No permissions are removed, and
+ * TLB invalidation is performed after updating the entry. Software bits cannot
+ * be set or cleared using kvm_pgtable_stage2_relax_perms().
+ *
+ * Return: 0 on success, negative error code on failure.
+ */
+int kvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr,
+ enum kvm_pgtable_prot prot);
+
+/**
+ * kvm_pgtable_stage2_flush_range() - Clean and invalidate data cache to Point
+ * of Coherency for guest stage-2 address
+ * range.
+ * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*().
+ * @addr: Intermediate physical address from which to flush.
+ * @size: Size of the range.
+ *
+ * The offset of @addr within a page is ignored and @size is rounded-up to
+ * the next page boundary.
+ *
+ * Return: 0 on success, negative error code on failure.
+ */
+int kvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size);
+
+/**
+ * kvm_pgtable_stage2_split() - Split a range of huge pages into leaf PTEs pointing
+ * to PAGE_SIZE guest pages.
+ * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init().
+ * @addr: Intermediate physical address from which to split.
+ * @size: Size of the range.
+ * @mc: Cache of pre-allocated and zeroed memory from which to allocate
+ * page-table pages.
+ *
+ * The function tries to split any level 1 or 2 entry that overlaps
+ * with the input range (given by @addr and @size).
+ *
+ * Return: 0 on success, negative error code on failure. Note that
+ * kvm_pgtable_stage2_split() is best effort: it tries to break as many
+ * blocks in the input range as allowed by @mc_capacity.
+ */
+int kvm_pgtable_stage2_split(struct kvm_pgtable *pgt, u64 addr, u64 size,
+ struct kvm_mmu_memory_cache *mc);
+
+/**
+ * kvm_pgtable_walk() - Walk a page-table.
+ * @pgt: Page-table structure initialised by kvm_pgtable_*_init().
+ * @addr: Input address for the start of the walk.
+ * @size: Size of the range to walk.
+ * @walker: Walker callback description.
+ *
+ * The offset of @addr within a page is ignored and @size is rounded-up to
+ * the next page boundary.
+ *
+ * The walker will walk the page-table entries corresponding to the input
+ * address range specified, visiting entries according to the walker flags.
+ * Invalid entries are treated as leaf entries. The visited page table entry is
+ * reloaded after invoking the walker callback, allowing the walker to descend
+ * into a newly installed table.
+ *
+ * Returning a negative error code from the walker callback function will
+ * terminate the walk immediately with the same error code.
+ *
+ * Return: 0 on success, negative error code on failure.
+ */
+int kvm_pgtable_walk(struct kvm_pgtable *pgt, u64 addr, u64 size,
+ struct kvm_pgtable_walker *walker);
+
+/**
+ * kvm_pgtable_get_leaf() - Walk a page-table and retrieve the leaf entry
+ * with its level.
+ * @pgt: Page-table structure initialised by kvm_pgtable_*_init()
+ * or a similar initialiser.
+ * @addr: Input address for the start of the walk.
+ * @ptep: Pointer to storage for the retrieved PTE.
+ * @level: Pointer to storage for the level of the retrieved PTE.
+ *
+ * The offset of @addr within a page is ignored.
+ *
+ * The walker will walk the page-table entries corresponding to the input
+ * address specified, retrieving the leaf corresponding to this address.
+ * Invalid entries are treated as leaf entries.
+ *
+ * Return: 0 on success, negative error code on failure.
+ */
+int kvm_pgtable_get_leaf(struct kvm_pgtable *pgt, u64 addr,
+ kvm_pte_t *ptep, s8 *level);
+
+/**
+ * kvm_pgtable_stage2_pte_prot() - Retrieve the protection attributes of a
+ * stage-2 Page-Table Entry.
+ * @pte: Page-table entry
+ *
+ * Return: protection attributes of the page-table entry in the enum
+ * kvm_pgtable_prot format.
+ */
+enum kvm_pgtable_prot kvm_pgtable_stage2_pte_prot(kvm_pte_t pte);
+
+/**
+ * kvm_pgtable_hyp_pte_prot() - Retrieve the protection attributes of a stage-1
+ * Page-Table Entry.
+ * @pte: Page-table entry
+ *
+ * Return: protection attributes of the page-table entry in the enum
+ * kvm_pgtable_prot format.
+ */
+enum kvm_pgtable_prot kvm_pgtable_hyp_pte_prot(kvm_pte_t pte);
+
+/**
+ * kvm_tlb_flush_vmid_range() - Invalidate/flush a range of TLB entries
+ *
+ * @mmu: Stage-2 KVM MMU struct
+ * @addr: The base Intermediate physical address from which to invalidate
+ * @size: Size of the range from the base to invalidate
+ */
+void kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
+ phys_addr_t addr, size_t size);
+#endif /* __ARM64_KVM_PGTABLE_H__ */
diff --git a/arch/arm64/include/asm/kvm_pkvm.h b/arch/arm64/include/asm/kvm_pkvm.h
new file mode 100644
index 000000000000..ad9cfb5c1ff4
--- /dev/null
+++ b/arch/arm64/include/asm/kvm_pkvm.h
@@ -0,0 +1,131 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2020 - Google LLC
+ * Author: Quentin Perret <qperret@google.com>
+ */
+#ifndef __ARM64_KVM_PKVM_H__
+#define __ARM64_KVM_PKVM_H__
+
+#include <linux/arm_ffa.h>
+#include <linux/memblock.h>
+#include <linux/scatterlist.h>
+#include <asm/kvm_pgtable.h>
+
+/* Maximum number of VMs that can co-exist under pKVM. */
+#define KVM_MAX_PVMS 255
+
+#define HYP_MEMBLOCK_REGIONS 128
+
+int pkvm_init_host_vm(struct kvm *kvm);
+int pkvm_create_hyp_vm(struct kvm *kvm);
+void pkvm_destroy_hyp_vm(struct kvm *kvm);
+
+extern struct memblock_region kvm_nvhe_sym(hyp_memory)[];
+extern unsigned int kvm_nvhe_sym(hyp_memblock_nr);
+
+static inline unsigned long
+hyp_vmemmap_memblock_size(struct memblock_region *reg, size_t vmemmap_entry_size)
+{
+ unsigned long nr_pages = reg->size >> PAGE_SHIFT;
+ unsigned long start, end;
+
+ start = (reg->base >> PAGE_SHIFT) * vmemmap_entry_size;
+ end = start + nr_pages * vmemmap_entry_size;
+ start = ALIGN_DOWN(start, PAGE_SIZE);
+ end = ALIGN(end, PAGE_SIZE);
+
+ return end - start;
+}
+
+static inline unsigned long hyp_vmemmap_pages(size_t vmemmap_entry_size)
+{
+ unsigned long res = 0, i;
+
+ for (i = 0; i < kvm_nvhe_sym(hyp_memblock_nr); i++) {
+ res += hyp_vmemmap_memblock_size(&kvm_nvhe_sym(hyp_memory)[i],
+ vmemmap_entry_size);
+ }
+
+ return res >> PAGE_SHIFT;
+}
+
+static inline unsigned long hyp_vm_table_pages(void)
+{
+ return PAGE_ALIGN(KVM_MAX_PVMS * sizeof(void *)) >> PAGE_SHIFT;
+}
+
+static inline unsigned long __hyp_pgtable_max_pages(unsigned long nr_pages)
+{
+ unsigned long total = 0;
+ int i;
+
+ /* Provision the worst case scenario */
+ for (i = KVM_PGTABLE_FIRST_LEVEL; i <= KVM_PGTABLE_LAST_LEVEL; i++) {
+ nr_pages = DIV_ROUND_UP(nr_pages, PTRS_PER_PTE);
+ total += nr_pages;
+ }
+
+ return total;
+}
+
+static inline unsigned long __hyp_pgtable_total_pages(void)
+{
+ unsigned long res = 0, i;
+
+ /* Cover all of memory with page-granularity */
+ for (i = 0; i < kvm_nvhe_sym(hyp_memblock_nr); i++) {
+ struct memblock_region *reg = &kvm_nvhe_sym(hyp_memory)[i];
+ res += __hyp_pgtable_max_pages(reg->size >> PAGE_SHIFT);
+ }
+
+ return res;
+}
+
+static inline unsigned long hyp_s1_pgtable_pages(void)
+{
+ unsigned long res;
+
+ res = __hyp_pgtable_total_pages();
+
+ /* Allow 1 GiB for private mappings */
+ res += __hyp_pgtable_max_pages(SZ_1G >> PAGE_SHIFT);
+
+ return res;
+}
+
+static inline unsigned long host_s2_pgtable_pages(void)
+{
+ unsigned long res;
+
+ /*
+ * Include an extra 16 pages to safely upper-bound the worst case of
+ * concatenated pgds.
+ */
+ res = __hyp_pgtable_total_pages() + 16;
+
+ /* Allow 1 GiB for MMIO mappings */
+ res += __hyp_pgtable_max_pages(SZ_1G >> PAGE_SHIFT);
+
+ return res;
+}
+
+#define KVM_FFA_MBOX_NR_PAGES 1
+
+static inline unsigned long hyp_ffa_proxy_pages(void)
+{
+ size_t desc_max;
+
+ /*
+ * The hypervisor FFA proxy needs enough memory to buffer a fragmented
+ * descriptor returned from EL3 in response to a RETRIEVE_REQ call.
+ */
+ desc_max = sizeof(struct ffa_mem_region) +
+ sizeof(struct ffa_mem_region_attributes) +
+ sizeof(struct ffa_composite_mem_region) +
+ SG_MAX_SEGMENTS * sizeof(struct ffa_mem_region_addr_range);
+
+ /* Plus a page each for the hypervisor's RX and TX mailboxes. */
+ return (2 * KVM_FFA_MBOX_NR_PAGES) + DIV_ROUND_UP(desc_max, PAGE_SIZE);
+}
+
+#endif /* __ARM64_KVM_PKVM_H__ */
diff --git a/arch/arm64/include/asm/kvm_ptrauth.h b/arch/arm64/include/asm/kvm_ptrauth.h
index 6301813dcace..0cd0965255d2 100644
--- a/arch/arm64/include/asm/kvm_ptrauth.h
+++ b/arch/arm64/include/asm/kvm_ptrauth.h
@@ -60,51 +60,43 @@
.endm
/*
- * Both ptrauth_switch_to_guest and ptrauth_switch_to_host macros will
- * check for the presence of one of the cpufeature flag
- * ARM64_HAS_ADDRESS_AUTH_ARCH or ARM64_HAS_ADDRESS_AUTH_IMP_DEF and
+ * Both ptrauth_switch_to_guest and ptrauth_switch_to_hyp macros will
+ * check for the presence ARM64_HAS_ADDRESS_AUTH, which is defined as
+ * (ARM64_HAS_ADDRESS_AUTH_ARCH || ARM64_HAS_ADDRESS_AUTH_IMP_DEF) and
* then proceed ahead with the save/restore of Pointer Authentication
- * key registers.
+ * key registers if enabled for the guest.
*/
.macro ptrauth_switch_to_guest g_ctxt, reg1, reg2, reg3
-alternative_if ARM64_HAS_ADDRESS_AUTH_ARCH
- b 1000f
+alternative_if_not ARM64_HAS_ADDRESS_AUTH
+ b .L__skip_switch\@
alternative_else_nop_endif
-alternative_if_not ARM64_HAS_ADDRESS_AUTH_IMP_DEF
- b 1001f
-alternative_else_nop_endif
-1000:
- ldr \reg1, [\g_ctxt, #(VCPU_HCR_EL2 - VCPU_CONTEXT)]
+ mrs \reg1, hcr_el2
and \reg1, \reg1, #(HCR_API | HCR_APK)
- cbz \reg1, 1001f
+ cbz \reg1, .L__skip_switch\@
add \reg1, \g_ctxt, #CPU_APIAKEYLO_EL1
ptrauth_restore_state \reg1, \reg2, \reg3
-1001:
+.L__skip_switch\@:
.endm
-.macro ptrauth_switch_to_host g_ctxt, h_ctxt, reg1, reg2, reg3
-alternative_if ARM64_HAS_ADDRESS_AUTH_ARCH
- b 2000f
-alternative_else_nop_endif
-alternative_if_not ARM64_HAS_ADDRESS_AUTH_IMP_DEF
- b 2001f
+.macro ptrauth_switch_to_hyp g_ctxt, h_ctxt, reg1, reg2, reg3
+alternative_if_not ARM64_HAS_ADDRESS_AUTH
+ b .L__skip_switch\@
alternative_else_nop_endif
-2000:
- ldr \reg1, [\g_ctxt, #(VCPU_HCR_EL2 - VCPU_CONTEXT)]
+ mrs \reg1, hcr_el2
and \reg1, \reg1, #(HCR_API | HCR_APK)
- cbz \reg1, 2001f
+ cbz \reg1, .L__skip_switch\@
add \reg1, \g_ctxt, #CPU_APIAKEYLO_EL1
ptrauth_save_state \reg1, \reg2, \reg3
add \reg1, \h_ctxt, #CPU_APIAKEYLO_EL1
ptrauth_restore_state \reg1, \reg2, \reg3
isb
-2001:
+.L__skip_switch\@:
.endm
#else /* !CONFIG_ARM64_PTR_AUTH */
.macro ptrauth_switch_to_guest g_ctxt, reg1, reg2, reg3
.endm
-.macro ptrauth_switch_to_host g_ctxt, h_ctxt, reg1, reg2, reg3
+.macro ptrauth_switch_to_hyp g_ctxt, h_ctxt, reg1, reg2, reg3
.endm
#endif /* CONFIG_ARM64_PTR_AUTH */
#endif /* __ASSEMBLY__ */
diff --git a/arch/arm64/include/asm/kvm_ras.h b/arch/arm64/include/asm/kvm_ras.h
index 8ac6ee77437c..87e10d9a635b 100644
--- a/arch/arm64/include/asm/kvm_ras.h
+++ b/arch/arm64/include/asm/kvm_ras.h
@@ -14,7 +14,7 @@
* Was this synchronous external abort a RAS notification?
* Returns '0' for errors handled by some RAS subsystem, or -ENOENT.
*/
-static inline int kvm_handle_guest_sea(phys_addr_t addr, unsigned int esr)
+static inline int kvm_handle_guest_sea(phys_addr_t addr, u64 esr)
{
/* apei_claim_sea(NULL) expects to mask interrupts itself */
lockdep_assert_irqs_enabled();
diff --git a/arch/arm64/include/asm/kvm_types.h b/arch/arm64/include/asm/kvm_types.h
new file mode 100644
index 000000000000..9a126b9e2d7c
--- /dev/null
+++ b/arch/arm64/include/asm/kvm_types.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_ARM64_KVM_TYPES_H
+#define _ASM_ARM64_KVM_TYPES_H
+
+#define KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE 40
+
+#endif /* _ASM_ARM64_KVM_TYPES_H */
+
diff --git a/arch/arm64/include/asm/linkage.h b/arch/arm64/include/asm/linkage.h
index 1b266292f0be..d3acd9c87509 100644
--- a/arch/arm64/include/asm/linkage.h
+++ b/arch/arm64/include/asm/linkage.h
@@ -1,7 +1,46 @@
#ifndef __ASM_LINKAGE_H
#define __ASM_LINKAGE_H
-#define __ALIGN .align 2
-#define __ALIGN_STR ".align 2"
+#ifdef __ASSEMBLY__
+#include <asm/assembler.h>
+#endif
+
+#define __ALIGN .balign CONFIG_FUNCTION_ALIGNMENT
+#define __ALIGN_STR ".balign " #CONFIG_FUNCTION_ALIGNMENT
+
+/*
+ * When using in-kernel BTI we need to ensure that PCS-conformant
+ * assembly functions have suitable annotations. Override
+ * SYM_FUNC_START to insert a BTI landing pad at the start of
+ * everything, the override is done unconditionally so we're more
+ * likely to notice any drift from the overridden definitions.
+ */
+#define SYM_FUNC_START(name) \
+ SYM_START(name, SYM_L_GLOBAL, SYM_A_ALIGN) \
+ bti c ;
+
+#define SYM_FUNC_START_NOALIGN(name) \
+ SYM_START(name, SYM_L_GLOBAL, SYM_A_NONE) \
+ bti c ;
+
+#define SYM_FUNC_START_LOCAL(name) \
+ SYM_START(name, SYM_L_LOCAL, SYM_A_ALIGN) \
+ bti c ;
+
+#define SYM_FUNC_START_LOCAL_NOALIGN(name) \
+ SYM_START(name, SYM_L_LOCAL, SYM_A_NONE) \
+ bti c ;
+
+#define SYM_FUNC_START_WEAK(name) \
+ SYM_START(name, SYM_L_WEAK, SYM_A_ALIGN) \
+ bti c ;
+
+#define SYM_FUNC_START_WEAK_NOALIGN(name) \
+ SYM_START(name, SYM_L_WEAK, SYM_A_NONE) \
+ bti c ;
+
+#define SYM_TYPED_FUNC_START(name) \
+ SYM_TYPED_START(name, SYM_L_GLOBAL, SYM_A_ALIGN) \
+ bti c ;
#endif
diff --git a/arch/arm64/include/asm/lse.h b/arch/arm64/include/asm/lse.h
index 80b388278149..3129a5819d0e 100644
--- a/arch/arm64/include/asm/lse.h
+++ b/arch/arm64/include/asm/lse.h
@@ -4,45 +4,34 @@
#include <asm/atomic_ll_sc.h>
-#if defined(CONFIG_AS_LSE) && defined(CONFIG_ARM64_LSE_ATOMICS)
+#ifdef CONFIG_ARM64_LSE_ATOMICS
+
+#define __LSE_PREAMBLE ".arch_extension lse\n"
#include <linux/compiler_types.h>
#include <linux/export.h>
-#include <linux/jump_label.h>
#include <linux/stringify.h>
#include <asm/alternative.h>
+#include <asm/alternative-macros.h>
#include <asm/atomic_lse.h>
#include <asm/cpucaps.h>
-__asm__(".arch_extension lse");
-
-extern struct static_key_false cpu_hwcap_keys[ARM64_NCAPS];
-extern struct static_key_false arm64_const_caps_ready;
-
-static inline bool system_uses_lse_atomics(void)
-{
- return (static_branch_likely(&arm64_const_caps_ready)) &&
- static_branch_likely(&cpu_hwcap_keys[ARM64_HAS_LSE_ATOMICS]);
-}
-
#define __lse_ll_sc_body(op, ...) \
({ \
- system_uses_lse_atomics() ? \
+ alternative_has_cap_likely(ARM64_HAS_LSE_ATOMICS) ? \
__lse_##op(__VA_ARGS__) : \
__ll_sc_##op(__VA_ARGS__); \
})
/* In-line patching at runtime */
#define ARM64_LSE_ATOMIC_INSN(llsc, lse) \
- ALTERNATIVE(llsc, lse, ARM64_HAS_LSE_ATOMICS)
-
-#else /* CONFIG_AS_LSE && CONFIG_ARM64_LSE_ATOMICS */
+ ALTERNATIVE(llsc, __LSE_PREAMBLE lse, ARM64_HAS_LSE_ATOMICS)
-static inline bool system_uses_lse_atomics(void) { return false; }
+#else /* CONFIG_ARM64_LSE_ATOMICS */
#define __lse_ll_sc_body(op, ...) __ll_sc_##op(__VA_ARGS__)
#define ARM64_LSE_ATOMIC_INSN(llsc, lse) llsc
-#endif /* CONFIG_AS_LSE && CONFIG_ARM64_LSE_ATOMICS */
+#endif /* CONFIG_ARM64_LSE_ATOMICS */
#endif /* __ASM_LSE_H */
diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index a4f9ca5479b0..d82305ab420f 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -10,11 +10,8 @@
#ifndef __ASM_MEMORY_H
#define __ASM_MEMORY_H
-#include <linux/compiler.h>
#include <linux/const.h>
#include <linux/sizes.h>
-#include <linux/types.h>
-#include <asm/bug.h>
#include <asm/page-def.h>
/*
@@ -33,8 +30,8 @@
* keep a constant PAGE_OFFSET and "fallback" to using the higher end
* of the VMEMMAP where 52-bit support is not available in hardware.
*/
-#define VMEMMAP_SIZE ((_PAGE_END(VA_BITS_MIN) - PAGE_OFFSET) \
- >> (PAGE_SHIFT - STRUCT_PAGE_MAX_SHIFT))
+#define VMEMMAP_SHIFT (PAGE_SHIFT - STRUCT_PAGE_MAX_SHIFT)
+#define VMEMMAP_SIZE ((_PAGE_END(VA_BITS_MIN) - PAGE_OFFSET) >> VMEMMAP_SHIFT)
/*
* PAGE_OFFSET - the virtual address of the start of the linear map, at the
@@ -47,16 +44,14 @@
#define _PAGE_OFFSET(va) (-(UL(1) << (va)))
#define PAGE_OFFSET (_PAGE_OFFSET(VA_BITS))
#define KIMAGE_VADDR (MODULES_END)
-#define BPF_JIT_REGION_START (KASAN_SHADOW_END)
-#define BPF_JIT_REGION_SIZE (SZ_128M)
-#define BPF_JIT_REGION_END (BPF_JIT_REGION_START + BPF_JIT_REGION_SIZE)
#define MODULES_END (MODULES_VADDR + MODULES_VSIZE)
-#define MODULES_VADDR (BPF_JIT_REGION_END)
-#define MODULES_VSIZE (SZ_128M)
-#define VMEMMAP_START (-VMEMMAP_SIZE - SZ_2M)
-#define PCI_IO_END (VMEMMAP_START - SZ_2M)
+#define MODULES_VADDR (_PAGE_END(VA_BITS_MIN))
+#define MODULES_VSIZE (SZ_2G)
+#define VMEMMAP_START (-(UL(1) << (VA_BITS - VMEMMAP_SHIFT)))
+#define VMEMMAP_END (VMEMMAP_START + VMEMMAP_SIZE)
+#define PCI_IO_END (VMEMMAP_START - SZ_8M)
#define PCI_IO_START (PCI_IO_END - PCI_IO_SIZE)
-#define FIXADDR_TOP (PCI_IO_START - SZ_2M)
+#define FIXADDR_TOP (VMEMMAP_START - SZ_32M)
#if VA_BITS > 48
#define VA_BITS_MIN (48)
@@ -70,18 +65,45 @@
#define KERNEL_END _end
/*
- * Generic and tag-based KASAN require 1/8th and 1/16th of the kernel virtual
- * address space for the shadow region respectively. They can bloat the stack
- * significantly, so double the (minimum) stack size when they are in use.
+ * Generic and Software Tag-Based KASAN modes require 1/8th and 1/16th of the
+ * kernel virtual address space for storing the shadow memory respectively.
+ *
+ * The mapping between a virtual memory address and its corresponding shadow
+ * memory address is defined based on the formula:
+ *
+ * shadow_addr = (addr >> KASAN_SHADOW_SCALE_SHIFT) + KASAN_SHADOW_OFFSET
+ *
+ * where KASAN_SHADOW_SCALE_SHIFT is the order of the number of bits that map
+ * to a single shadow byte and KASAN_SHADOW_OFFSET is a constant that offsets
+ * the mapping. Note that KASAN_SHADOW_OFFSET does not point to the start of
+ * the shadow memory region.
+ *
+ * Based on this mapping, we define two constants:
+ *
+ * KASAN_SHADOW_START: the start of the shadow memory region;
+ * KASAN_SHADOW_END: the end of the shadow memory region.
+ *
+ * KASAN_SHADOW_END is defined first as the shadow address that corresponds to
+ * the upper bound of possible virtual kernel memory addresses UL(1) << 64
+ * according to the mapping formula.
+ *
+ * KASAN_SHADOW_START is defined second based on KASAN_SHADOW_END. The shadow
+ * memory start must map to the lowest possible kernel virtual memory address
+ * and thus it depends on the actual bitness of the address space.
+ *
+ * As KASAN inserts redzones between stack variables, this increases the stack
+ * memory usage significantly. Thus, we double the (minimum) stack size.
*/
-#ifdef CONFIG_KASAN
+#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
#define KASAN_SHADOW_OFFSET _AC(CONFIG_KASAN_SHADOW_OFFSET, UL)
-#define KASAN_SHADOW_END ((UL(1) << (64 - KASAN_SHADOW_SCALE_SHIFT)) \
- + KASAN_SHADOW_OFFSET)
+#define KASAN_SHADOW_END ((UL(1) << (64 - KASAN_SHADOW_SCALE_SHIFT)) + KASAN_SHADOW_OFFSET)
+#define _KASAN_SHADOW_START(va) (KASAN_SHADOW_END - (UL(1) << ((va) - KASAN_SHADOW_SCALE_SHIFT)))
+#define KASAN_SHADOW_START _KASAN_SHADOW_START(vabits_actual)
+#define PAGE_END KASAN_SHADOW_START
#define KASAN_THREAD_SHIFT 1
#else
#define KASAN_THREAD_SHIFT 0
-#define KASAN_SHADOW_END (_PAGE_END(VA_BITS_MIN))
+#define PAGE_END (_PAGE_END(VA_BITS_MIN))
#endif /* CONFIG_KASAN */
#define MIN_THREAD_SHIFT (14 + KASAN_THREAD_SHIFT)
@@ -118,33 +140,34 @@
#define OVERFLOW_STACK_SIZE SZ_4K
/*
- * Alignment of kernel segments (e.g. .text, .data).
+ * With the minimum frame size of [x29, x30], exactly half the combined
+ * sizes of the hyp and overflow stacks is the maximum size needed to
+ * save the unwinded stacktrace; plus an additional entry to delimit the
+ * end.
*/
-#if defined(CONFIG_DEBUG_ALIGN_RODATA)
-/*
- * 4 KB granule: 1 level 2 entry
- * 16 KB granule: 128 level 3 entries, with contiguous bit
- * 64 KB granule: 32 level 3 entries, with contiguous bit
- */
-#define SEGMENT_ALIGN SZ_2M
-#else
+#define NVHE_STACKTRACE_SIZE ((OVERFLOW_STACK_SIZE + PAGE_SIZE) / 2 + sizeof(long))
+
/*
+ * Alignment of kernel segments (e.g. .text, .data).
+ *
* 4 KB granule: 16 level 3 entries, with contiguous bit
* 16 KB granule: 4 level 3 entries, without contiguous bit
* 64 KB granule: 1 level 3 entry
*/
#define SEGMENT_ALIGN SZ_64K
-#endif
/*
* Memory types available.
+ *
+ * IMPORTANT: MT_NORMAL must be index 0 since vm_get_page_prot() may 'or' in
+ * the MT_NORMAL_TAGGED memory type for PROT_MTE mappings. Note
+ * that protection_map[] only contains MT_NORMAL attributes.
*/
-#define MT_DEVICE_nGnRnE 0
-#define MT_DEVICE_nGnRE 1
-#define MT_DEVICE_GRE 2
-#define MT_NORMAL_NC 3
-#define MT_NORMAL 4
-#define MT_NORMAL_WT 5
+#define MT_NORMAL 0
+#define MT_NORMAL_TAGGED 1
+#define MT_NORMAL_NC 2
+#define MT_DEVICE_nGnRnE 3
+#define MT_DEVICE_nGnRE 4
/*
* Memory types for Stage-2 translation
@@ -165,29 +188,58 @@
#define IOREMAP_MAX_ORDER (PMD_SHIFT)
#endif
+/*
+ * Open-coded (swapper_pg_dir - reserved_pg_dir) as this cannot be calculated
+ * until link time.
+ */
+#define RESERVED_SWAPPER_OFFSET (PAGE_SIZE)
+
+/*
+ * Open-coded (swapper_pg_dir - tramp_pg_dir) as this cannot be calculated
+ * until link time.
+ */
+#define TRAMP_SWAPPER_OFFSET (2 * PAGE_SIZE)
+
#ifndef __ASSEMBLY__
-extern u64 vabits_actual;
-#define PAGE_END (_PAGE_END(vabits_actual))
#include <linux/bitops.h>
+#include <linux/compiler.h>
#include <linux/mmdebug.h>
+#include <linux/types.h>
+#include <asm/boot.h>
+#include <asm/bug.h>
+#include <asm/sections.h>
+
+#if VA_BITS > 48
+extern u64 vabits_actual;
+#else
+#define vabits_actual ((u64)VA_BITS)
+#endif
-extern s64 physvirt_offset;
extern s64 memstart_addr;
/* PHYS_OFFSET - the physical address of the start of memory. */
#define PHYS_OFFSET ({ VM_BUG_ON(memstart_addr & 1); memstart_addr; })
-/* the virtual base of the kernel image (minus TEXT_OFFSET) */
-extern u64 kimage_vaddr;
-
/* the offset between the kernel virtual and physical mappings */
extern u64 kimage_voffset;
static inline unsigned long kaslr_offset(void)
{
- return kimage_vaddr - KIMAGE_VADDR;
+ return (u64)&_text - KIMAGE_VADDR;
}
+#ifdef CONFIG_RANDOMIZE_BASE
+void kaslr_init(void);
+static inline bool kaslr_enabled(void)
+{
+ extern bool __kaslr_is_enabled;
+ return __kaslr_is_enabled;
+}
+#else
+static inline void kaslr_init(void) { }
+static inline bool kaslr_enabled(void) { return false; }
+#endif
+
/*
* Allow all memory at the discovery stage. We will clip it later.
*/
@@ -213,12 +265,12 @@ static inline unsigned long kaslr_offset(void)
((__force __typeof__(addr))sign_extend64((__force u64)(addr), 55))
#define untagged_addr(addr) ({ \
- u64 __addr = (__force u64)addr; \
+ u64 __addr = (__force u64)(addr); \
__addr &= __untagged_addr(__addr); \
(__force __typeof__(addr))__addr; \
})
-#ifdef CONFIG_KASAN_SW_TAGS
+#if defined(CONFIG_KASAN_SW_TAGS) || defined(CONFIG_KASAN_HW_TAGS)
#define __tag_shifted(tag) ((u64)(tag) << 56)
#define __tag_reset(addr) __untagged_addr(addr)
#define __tag_get(addr) (__u8)((u64)(addr) >> 56)
@@ -226,7 +278,7 @@ static inline unsigned long kaslr_offset(void)
#define __tag_shifted(tag) 0UL
#define __tag_reset(addr) (addr)
#define __tag_get(addr) 0
-#endif /* CONFIG_KASAN_SW_TAGS */
+#endif /* CONFIG_KASAN_SW_TAGS || CONFIG_KASAN_HW_TAGS */
static inline const void *__tag_set(const void *addr, u8 tag)
{
@@ -234,6 +286,19 @@ static inline const void *__tag_set(const void *addr, u8 tag)
return (const void *)(__addr | __tag_shifted(tag));
}
+#ifdef CONFIG_KASAN_HW_TAGS
+#define arch_enable_tag_checks_sync() mte_enable_kernel_sync()
+#define arch_enable_tag_checks_async() mte_enable_kernel_async()
+#define arch_enable_tag_checks_asymm() mte_enable_kernel_asymm()
+#define arch_suppress_tag_checks_start() mte_enable_tco()
+#define arch_suppress_tag_checks_stop() mte_disable_tco()
+#define arch_force_async_tag_fault() mte_check_tfsr_exit()
+#define arch_get_random_tag() mte_get_random_tag()
+#define arch_get_mem_tag(addr) mte_get_mem_tag(addr)
+#define arch_set_mem_tag_range(addr, size, tag, init) \
+ mte_set_mem_tag_range((addr), (size), (tag), (init))
+#endif /* CONFIG_KASAN_HW_TAGS */
+
/*
* Physical vs virtual RAM address space conversion. These are
* private definitions which should NOT be used outside memory.h
@@ -242,13 +307,13 @@ static inline const void *__tag_set(const void *addr, u8 tag)
/*
- * The linear kernel range starts at the bottom of the virtual address
- * space. Testing the top bit for the start of the region is a
- * sufficient check and avoids having to worry about the tag.
+ * Check whether an arbitrary address is within the linear map, which
+ * lives in the [PAGE_OFFSET, PAGE_END) interval at the bottom of the
+ * kernel's TTBR1 address range.
*/
-#define __is_lm_address(addr) (!(((u64)addr) & BIT(vabits_actual - 1)))
+#define __is_lm_address(addr) (((u64)(addr) - PAGE_OFFSET) < (PAGE_END - PAGE_OFFSET))
-#define __lm_to_phys(addr) (((addr) + physvirt_offset))
+#define __lm_to_phys(addr) (((addr) - PAGE_OFFSET) + PHYS_OFFSET)
#define __kimg_to_phys(addr) ((addr) - kimage_voffset)
#define __virt_to_phys_nodebug(x) ({ \
@@ -266,7 +331,7 @@ extern phys_addr_t __phys_addr_symbol(unsigned long x);
#define __phys_addr_symbol(x) __pa_symbol_nodebug(x)
#endif /* CONFIG_DEBUG_VIRTUAL */
-#define __phys_to_virt(x) ((unsigned long)((x) - physvirt_offset))
+#define __phys_to_virt(x) ((unsigned long)((x) - PHYS_OFFSET) | PAGE_OFFSET)
#define __phys_to_kimg(x) ((unsigned long)((x) + kimage_voffset))
/*
@@ -292,6 +357,14 @@ static inline void *phys_to_virt(phys_addr_t x)
return (void *)(__phys_to_virt(x));
}
+/* Needed already here for resolving __phys_to_pfn() in virt_to_pfn() */
+#include <asm-generic/memory_model.h>
+
+static inline unsigned long virt_to_pfn(const void *kaddr)
+{
+ return __phys_to_pfn(virt_to_phys(kaddr));
+}
+
/*
* Drivers should NOT use these either.
*/
@@ -300,7 +373,6 @@ static inline void *phys_to_virt(phys_addr_t x)
#define __pa_nodebug(x) __virt_to_phys_nodebug((unsigned long)(x))
#define __va(x) ((void *)__phys_to_virt((phys_addr_t)(x)))
#define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT)
-#define virt_to_pfn(x) __phys_to_pfn(__virt_to_phys((unsigned long)(x)))
#define sym_to_pfn(x) __phys_to_pfn(__pa_symbol(x))
/*
@@ -309,7 +381,12 @@ static inline void *phys_to_virt(phys_addr_t x)
*/
#define ARCH_PFN_OFFSET ((unsigned long)PHYS_PFN_OFFSET)
-#if !defined(CONFIG_SPARSEMEM_VMEMMAP) || defined(CONFIG_DEBUG_VIRTUAL)
+#if defined(CONFIG_DEBUG_VIRTUAL)
+#define page_to_virt(x) ({ \
+ __typeof__(x) __page = x; \
+ void *__addr = __va(page_to_phys(__page)); \
+ (void *)__tag_set((const void *)__addr, page_kasan_tag(__page));\
+})
#define virt_to_page(x) pfn_to_page(virt_to_pfn(x))
#else
#define page_to_virt(x) ({ \
@@ -324,13 +401,14 @@ static inline void *phys_to_virt(phys_addr_t x)
u64 __addr = VMEMMAP_START + (__idx * sizeof(struct page)); \
(struct page *)__addr; \
})
-#endif /* !CONFIG_SPARSEMEM_VMEMMAP || CONFIG_DEBUG_VIRTUAL */
+#endif /* CONFIG_DEBUG_VIRTUAL */
#define virt_addr_valid(addr) ({ \
- __typeof__(addr) __addr = addr; \
- __is_lm_address(__addr) && pfn_valid(virt_to_pfn(__addr)); \
+ __typeof__(addr) __addr = __tag_reset(addr); \
+ __is_lm_address(__addr) && pfn_is_map_memory(virt_to_pfn(__addr)); \
})
+void dump_mem_limit(void);
#endif /* !ASSEMBLY */
/*
@@ -344,6 +422,14 @@ static inline void *phys_to_virt(phys_addr_t x)
# define INIT_MEMBLOCK_RESERVED_REGIONS (INIT_MEMBLOCK_REGIONS + NR_CPUS + 1)
#endif
-#include <asm-generic/memory_model.h>
+/*
+ * memory regions which marked with flag MEMBLOCK_NOMAP(for example, the memory
+ * of the EFI_UNUSABLE_MEMORY type) may divide a continuous memory block into
+ * multiple parts. As a result, the number of memory regions is large.
+ */
+#ifdef CONFIG_EFI
+#define INIT_MEMBLOCK_MEMORY_REGIONS (INIT_MEMBLOCK_REGIONS * 8)
+#endif
+
#endif /* __ASM_MEMORY_H */
diff --git a/arch/arm64/include/asm/mman.h b/arch/arm64/include/asm/mman.h
new file mode 100644
index 000000000000..5966ee4a6154
--- /dev/null
+++ b/arch/arm64/include/asm/mman.h
@@ -0,0 +1,63 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_MMAN_H__
+#define __ASM_MMAN_H__
+
+#include <linux/compiler.h>
+#include <linux/types.h>
+#include <uapi/asm/mman.h>
+
+static inline unsigned long arch_calc_vm_prot_bits(unsigned long prot,
+ unsigned long pkey __always_unused)
+{
+ unsigned long ret = 0;
+
+ if (system_supports_bti() && (prot & PROT_BTI))
+ ret |= VM_ARM64_BTI;
+
+ if (system_supports_mte() && (prot & PROT_MTE))
+ ret |= VM_MTE;
+
+ return ret;
+}
+#define arch_calc_vm_prot_bits(prot, pkey) arch_calc_vm_prot_bits(prot, pkey)
+
+static inline unsigned long arch_calc_vm_flag_bits(unsigned long flags)
+{
+ /*
+ * Only allow MTE on anonymous mappings as these are guaranteed to be
+ * backed by tags-capable memory. The vm_flags may be overridden by a
+ * filesystem supporting MTE (RAM-based).
+ */
+ if (system_supports_mte() && (flags & MAP_ANONYMOUS))
+ return VM_MTE_ALLOWED;
+
+ return 0;
+}
+#define arch_calc_vm_flag_bits(flags) arch_calc_vm_flag_bits(flags)
+
+static inline bool arch_validate_prot(unsigned long prot,
+ unsigned long addr __always_unused)
+{
+ unsigned long supported = PROT_READ | PROT_WRITE | PROT_EXEC | PROT_SEM;
+
+ if (system_supports_bti())
+ supported |= PROT_BTI;
+
+ if (system_supports_mte())
+ supported |= PROT_MTE;
+
+ return (prot & ~supported) == 0;
+}
+#define arch_validate_prot(prot, addr) arch_validate_prot(prot, addr)
+
+static inline bool arch_validate_flags(unsigned long vm_flags)
+{
+ if (!system_supports_mte())
+ return true;
+
+ /* only allow VM_MTE if VM_MTE_ALLOWED has been set previously */
+ return !(vm_flags & VM_MTE) || (vm_flags & VM_MTE_ALLOWED);
+}
+#define arch_validate_flags(vm_flags) arch_validate_flags(vm_flags)
+
+#endif /* ! __ASM_MMAN_H__ */
diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h
index f217e3292919..2fcf51231d6e 100644
--- a/arch/arm64/include/asm/mmu.h
+++ b/arch/arm64/include/asm/mmu.h
@@ -12,122 +12,66 @@
#define USER_ASID_FLAG (UL(1) << USER_ASID_BIT)
#define TTBR_ASID_MASK (UL(0xffff) << 48)
-#define BP_HARDEN_EL2_SLOTS 4
-
#ifndef __ASSEMBLY__
+#include <linux/refcount.h>
+#include <asm/cpufeature.h>
+
typedef struct {
atomic64_t id;
+#ifdef CONFIG_COMPAT
+ void *sigpage;
+#endif
+ refcount_t pinned;
void *vdso;
unsigned long flags;
} mm_context_t;
/*
- * This macro is only used by the TLBI code, which cannot race with an
- * ASID change and therefore doesn't need to reload the counter using
- * atomic64_read.
+ * We use atomic64_read() here because the ASID for an 'mm_struct' can
+ * be reallocated when scheduling one of its threads following a
+ * rollover event (see new_context() and flush_context()). In this case,
+ * a concurrent TLBI (e.g. via try_to_unmap_one() and ptep_clear_flush())
+ * may use a stale ASID. This is fine in principle as the new ASID is
+ * guaranteed to be clean in the TLB, but the TLBI routines have to take
+ * care to handle the following race:
+ *
+ * CPU 0 CPU 1 CPU 2
+ *
+ * // ptep_clear_flush(mm)
+ * xchg_relaxed(pte, 0)
+ * DSB ISHST
+ * old = ASID(mm)
+ * | <rollover>
+ * | new = new_context(mm)
+ * \-----------------> atomic_set(mm->context.id, new)
+ * cpu_switch_mm(mm)
+ * // Hardware walk of pte using new ASID
+ * TLBI(old)
+ *
+ * In this scenario, the barrier on CPU 0 and the dependency on CPU 1
+ * ensure that the page-table walker on CPU 1 *must* see the invalid PTE
+ * written by CPU 0.
*/
-#define ASID(mm) ((mm)->context.id.counter & 0xffff)
+#define ASID(mm) (atomic64_read(&(mm)->context.id) & 0xffff)
static inline bool arm64_kernel_unmapped_at_el0(void)
{
- return IS_ENABLED(CONFIG_UNMAP_KERNEL_AT_EL0) &&
- cpus_have_const_cap(ARM64_UNMAP_KERNEL_AT_EL0);
-}
-
-static inline bool arm64_kernel_use_ng_mappings(void)
-{
- bool tx1_bug;
-
- /* What's a kpti? Use global mappings if we don't know. */
- if (!IS_ENABLED(CONFIG_UNMAP_KERNEL_AT_EL0))
- return false;
-
- /*
- * Note: this function is called before the CPU capabilities have
- * been configured, so our early mappings will be global. If we
- * later determine that kpti is required, then
- * kpti_install_ng_mappings() will make them non-global.
- */
- if (arm64_kernel_unmapped_at_el0())
- return true;
-
- if (!IS_ENABLED(CONFIG_RANDOMIZE_BASE))
- return false;
-
- /*
- * KASLR is enabled so we're going to be enabling kpti on non-broken
- * CPUs regardless of their susceptibility to Meltdown. Rather
- * than force everybody to go through the G -> nG dance later on,
- * just put down non-global mappings from the beginning.
- */
- if (!IS_ENABLED(CONFIG_CAVIUM_ERRATUM_27456)) {
- tx1_bug = false;
-#ifndef MODULE
- } else if (!static_branch_likely(&arm64_const_caps_ready)) {
- extern const struct midr_range cavium_erratum_27456_cpus[];
-
- tx1_bug = is_midr_in_range_list(read_cpuid_id(),
- cavium_erratum_27456_cpus);
-#endif
- } else {
- tx1_bug = __cpus_have_const_cap(ARM64_WORKAROUND_CAVIUM_27456);
- }
-
- return !tx1_bug && kaslr_offset() > 0;
-}
-
-typedef void (*bp_hardening_cb_t)(void);
-
-struct bp_hardening_data {
- int hyp_vectors_slot;
- bp_hardening_cb_t fn;
-};
-
-#if (defined(CONFIG_HARDEN_BRANCH_PREDICTOR) || \
- defined(CONFIG_HARDEN_EL2_VECTORS))
-extern char __bp_harden_hyp_vecs_start[], __bp_harden_hyp_vecs_end[];
-extern atomic_t arm64_el2_vector_last_slot;
-#endif /* CONFIG_HARDEN_BRANCH_PREDICTOR || CONFIG_HARDEN_EL2_VECTORS */
-
-#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR
-DECLARE_PER_CPU_READ_MOSTLY(struct bp_hardening_data, bp_hardening_data);
-
-static inline struct bp_hardening_data *arm64_get_bp_hardening_data(void)
-{
- return this_cpu_ptr(&bp_hardening_data);
-}
-
-static inline void arm64_apply_bp_hardening(void)
-{
- struct bp_hardening_data *d;
-
- if (!cpus_have_const_cap(ARM64_HARDEN_BRANCH_PREDICTOR))
- return;
-
- d = arm64_get_bp_hardening_data();
- if (d->fn)
- d->fn();
+ return alternative_has_cap_unlikely(ARM64_UNMAP_KERNEL_AT_EL0);
}
-#else
-static inline struct bp_hardening_data *arm64_get_bp_hardening_data(void)
-{
- return NULL;
-}
-
-static inline void arm64_apply_bp_hardening(void) { }
-#endif /* CONFIG_HARDEN_BRANCH_PREDICTOR */
extern void arm64_memblock_init(void);
extern void paging_init(void);
extern void bootmem_init(void);
extern void __iomem *early_io_map(phys_addr_t phys, unsigned long virt);
-extern void init_mem_pgprot(void);
+extern void create_mapping_noalloc(phys_addr_t phys, unsigned long virt,
+ phys_addr_t size, pgprot_t prot);
extern void create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys,
unsigned long virt, phys_addr_t size,
pgprot_t prot, bool page_mappings_only);
extern void *fixmap_remap_fdt(phys_addr_t dt_phys, int *size, pgprot_t prot);
extern void mark_linear_text_alias_ro(void);
+extern bool kaslr_requires_kpti(void);
#define INIT_MM_CONTEXT(name) \
.pgd = init_pg_dir,
diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h
index 3827ff4040a3..9ce4200508b1 100644
--- a/arch/arm64/include/asm/mmu_context.h
+++ b/arch/arm64/include/asm/mmu_context.h
@@ -14,13 +14,14 @@
#include <linux/sched.h>
#include <linux/sched/hotplug.h>
#include <linux/mm_types.h>
+#include <linux/pgtable.h>
#include <asm/cacheflush.h>
#include <asm/cpufeature.h>
+#include <asm/daifflags.h>
#include <asm/proc-fns.h>
#include <asm-generic/mm_hooks.h>
#include <asm/cputype.h>
-#include <asm/pgtable.h>
#include <asm/sysreg.h>
#include <asm/tlbflush.h>
@@ -36,20 +37,26 @@ static inline void contextidr_thread_switch(struct task_struct *next)
}
/*
- * Set TTBR0 to empty_zero_page. No translations will be possible via TTBR0.
+ * Set TTBR0 to reserved_pg_dir. No translations will be possible via TTBR0.
*/
-static inline void cpu_set_reserved_ttbr0(void)
+static inline void cpu_set_reserved_ttbr0_nosync(void)
{
- unsigned long ttbr = phys_to_ttbr(__pa_symbol(empty_zero_page));
+ unsigned long ttbr = phys_to_ttbr(__pa_symbol(reserved_pg_dir));
write_sysreg(ttbr, ttbr0_el1);
+}
+
+static inline void cpu_set_reserved_ttbr0(void)
+{
+ cpu_set_reserved_ttbr0_nosync();
isb();
}
+void cpu_do_switch_mm(phys_addr_t pgd_phys, struct mm_struct *mm);
+
static inline void cpu_switch_mm(pgd_t *pgd, struct mm_struct *mm)
{
BUG_ON(pgd == swapper_pg_dir);
- cpu_set_reserved_ttbr0();
cpu_do_switch_mm(virt_to_phys(pgd),mm);
}
@@ -58,37 +65,18 @@ static inline void cpu_switch_mm(pgd_t *pgd, struct mm_struct *mm)
* TCR_T0SZ(VA_BITS), unless system RAM is positioned very high in
* physical memory, in which case it will be smaller.
*/
-extern u64 idmap_t0sz;
-extern u64 idmap_ptrs_per_pgd;
-
-static inline bool __cpu_uses_extended_idmap(void)
-{
- if (IS_ENABLED(CONFIG_ARM64_VA_BITS_52))
- return false;
-
- return unlikely(idmap_t0sz != TCR_T0SZ(VA_BITS));
-}
+extern int idmap_t0sz;
/*
- * True if the extended ID map requires an extra level of translation table
- * to be configured.
- */
-static inline bool __cpu_uses_extended_idmap_level(void)
-{
- return ARM64_HW_PGTABLE_LEVELS(64 - idmap_t0sz) > CONFIG_PGTABLE_LEVELS;
-}
-
-/*
- * Set TCR.T0SZ to its default value (based on VA_BITS)
+ * Ensure TCR.T0SZ is set to the provided value.
*/
static inline void __cpu_set_tcr_t0sz(unsigned long t0sz)
{
- unsigned long tcr;
+ unsigned long tcr = read_sysreg(tcr_el1);
- if (!__cpu_uses_extended_idmap())
+ if ((tcr & TCR_T0SZ_MASK) >> TCR_T0SZ_OFFSET == t0sz)
return;
- tcr = read_sysreg(tcr_el1);
tcr &= ~TCR_T0SZ_MASK;
tcr |= t0sz << TCR_T0SZ_OFFSET;
write_sysreg(tcr, tcr_el1);
@@ -122,47 +110,91 @@ static inline void cpu_uninstall_idmap(void)
cpu_switch_mm(mm->pgd, mm);
}
-static inline void cpu_install_idmap(void)
+static inline void __cpu_install_idmap(pgd_t *idmap)
{
cpu_set_reserved_ttbr0();
local_flush_tlb_all();
cpu_set_idmap_tcr_t0sz();
- cpu_switch_mm(lm_alias(idmap_pg_dir), &init_mm);
+ cpu_switch_mm(lm_alias(idmap), &init_mm);
+}
+
+static inline void cpu_install_idmap(void)
+{
+ __cpu_install_idmap(idmap_pg_dir);
+}
+
+/*
+ * Load our new page tables. A strict BBM approach requires that we ensure that
+ * TLBs are free of any entries that may overlap with the global mappings we are
+ * about to install.
+ *
+ * For a real hibernate/resume/kexec cycle TTBR0 currently points to a zero
+ * page, but TLBs may contain stale ASID-tagged entries (e.g. for EFI runtime
+ * services), while for a userspace-driven test_resume cycle it points to
+ * userspace page tables (and we must point it at a zero page ourselves).
+ *
+ * We change T0SZ as part of installing the idmap. This is undone by
+ * cpu_uninstall_idmap() in __cpu_suspend_exit().
+ */
+static inline void cpu_install_ttbr0(phys_addr_t ttbr0, unsigned long t0sz)
+{
+ cpu_set_reserved_ttbr0();
+ local_flush_tlb_all();
+ __cpu_set_tcr_t0sz(t0sz);
+
+ /* avoid cpu_switch_mm() and its SW-PAN and CNP interactions */
+ write_sysreg(ttbr0, ttbr0_el1);
+ isb();
}
/*
* Atomically replaces the active TTBR1_EL1 PGD with a new VA-compatible PGD,
* avoiding the possibility of conflicting TLB entries being allocated.
*/
-static inline void cpu_replace_ttbr1(pgd_t *pgdp)
+static inline void __cpu_replace_ttbr1(pgd_t *pgdp, pgd_t *idmap, bool cnp)
{
typedef void (ttbr_replace_func)(phys_addr_t);
extern ttbr_replace_func idmap_cpu_replace_ttbr1;
ttbr_replace_func *replace_phys;
+ unsigned long daif;
/* phys_to_ttbr() zeros lower 2 bits of ttbr with 52-bit PA */
phys_addr_t ttbr1 = phys_to_ttbr(virt_to_phys(pgdp));
- if (system_supports_cnp() && !WARN_ON(pgdp != lm_alias(swapper_pg_dir))) {
- /*
- * cpu_replace_ttbr1() is used when there's a boot CPU
- * up (i.e. cpufeature framework is not up yet) and
- * latter only when we enable CNP via cpufeature's
- * enable() callback.
- * Also we rely on the cpu_hwcap bit being set before
- * calling the enable() function.
- */
+ if (cnp)
ttbr1 |= TTBR_CNP_BIT;
- }
replace_phys = (void *)__pa_symbol(idmap_cpu_replace_ttbr1);
- cpu_install_idmap();
+ __cpu_install_idmap(idmap);
+
+ /*
+ * We really don't want to take *any* exceptions while TTBR1 is
+ * in the process of being replaced so mask everything.
+ */
+ daif = local_daif_save();
replace_phys(ttbr1);
+ local_daif_restore(daif);
+
cpu_uninstall_idmap();
}
+static inline void cpu_enable_swapper_cnp(void)
+{
+ __cpu_replace_ttbr1(lm_alias(swapper_pg_dir), idmap_pg_dir, true);
+}
+
+static inline void cpu_replace_ttbr1(pgd_t *pgdp, pgd_t *idmap)
+{
+ /*
+ * Only for early TTBR1 replacement before cpucaps are finalized and
+ * before we've decided whether to use CNP.
+ */
+ WARN_ON(system_capabilities_finalized());
+ __cpu_replace_ttbr1(pgdp, idmap, false);
+}
+
/*
* It would be nice to return ASIDs back to the allocator, but unfortunately
* that introduces a race with a generation rollover where we could erroneously
@@ -172,10 +204,16 @@ static inline void cpu_replace_ttbr1(pgd_t *pgdp)
* Setting a reserved TTBR0 or EPD0 would work, but it all gets ugly when you
* take CPU migration into account.
*/
-#define destroy_context(mm) do { } while(0)
-void check_and_switch_context(struct mm_struct *mm, unsigned int cpu);
+void check_and_switch_context(struct mm_struct *mm);
-#define init_new_context(tsk,mm) ({ atomic64_set(&(mm)->context.id, 0); 0; })
+#define init_new_context(tsk, mm) init_new_context(tsk, mm)
+static inline int
+init_new_context(struct task_struct *tsk, struct mm_struct *mm)
+{
+ atomic64_set(&mm->context.id, 0);
+ refcount_set(&mm->context.pinned, 0);
+ return 0;
+}
#ifdef CONFIG_ARM64_SW_TTBR0_PAN
static inline void update_saved_ttbr0(struct task_struct *tsk,
@@ -187,9 +225,9 @@ static inline void update_saved_ttbr0(struct task_struct *tsk,
return;
if (mm == &init_mm)
- ttbr = __pa_symbol(empty_zero_page);
+ ttbr = phys_to_ttbr(__pa_symbol(reserved_pg_dir));
else
- ttbr = virt_to_phys(mm->pgd) | ASID(mm) << 48;
+ ttbr = phys_to_ttbr(virt_to_phys(mm->pgd)) | ASID(mm) << 48;
WRITE_ONCE(task_thread_info(tsk)->ttbr0, ttbr);
}
@@ -200,6 +238,7 @@ static inline void update_saved_ttbr0(struct task_struct *tsk,
}
#endif
+#define enter_lazy_tlb enter_lazy_tlb
static inline void
enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
{
@@ -212,8 +251,6 @@ enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
static inline void __switch_mm(struct mm_struct *next)
{
- unsigned int cpu = smp_processor_id();
-
/*
* init_mm.pgd does not contain any user mappings and it is always
* active for kernel addresses in TTBR1. Just set the reserved TTBR0.
@@ -223,7 +260,7 @@ static inline void __switch_mm(struct mm_struct *next)
return;
}
- check_and_switch_context(next, cpu);
+ check_and_switch_context(next);
}
static inline void
@@ -242,12 +279,33 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next,
update_saved_ttbr0(tsk, next);
}
-#define deactivate_mm(tsk,mm) do { } while (0)
-#define activate_mm(prev,next) switch_mm(prev, next, current)
+static inline const struct cpumask *
+task_cpu_possible_mask(struct task_struct *p)
+{
+ if (!static_branch_unlikely(&arm64_mismatched_32bit_el0))
+ return cpu_possible_mask;
+
+ if (!is_compat_thread(task_thread_info(p)))
+ return cpu_possible_mask;
+
+ return system_32bit_el0_cpumask();
+}
+#define task_cpu_possible_mask task_cpu_possible_mask
void verify_cpu_asid_bits(void);
void post_ttbr_update_workaround(void);
+unsigned long arm64_mm_context_get(struct mm_struct *mm);
+void arm64_mm_context_put(struct mm_struct *mm);
+
+#define mm_untag_mask mm_untag_mask
+static inline unsigned long mm_untag_mask(struct mm_struct *mm)
+{
+ return -1UL >> 8;
+}
+
+#include <asm-generic/mmu_context.h>
+
#endif /* !__ASSEMBLY__ */
#endif /* !__ASM_MMU_CONTEXT_H */
diff --git a/arch/arm64/include/asm/module.h b/arch/arm64/include/asm/module.h
index 1e93de68c044..79550b22ba19 100644
--- a/arch/arm64/include/asm/module.h
+++ b/arch/arm64/include/asm/module.h
@@ -7,9 +7,6 @@
#include <asm-generic/module.h>
-#define MODULE_ARCH_VERMAGIC "aarch64"
-
-#ifdef CONFIG_ARM64_MODULE_PLTS
struct mod_plt_sec {
int plt_shndx;
int plt_num_entries;
@@ -23,7 +20,6 @@ struct mod_arch_specific {
/* for CONFIG_DYNAMIC_FTRACE */
struct plt_entry *ftrace_trampolines;
};
-#endif
u64 module_emit_plt_entry(struct module *mod, Elf64_Shdr *sechdrs,
void *loc, const Elf64_Rela *rela,
@@ -32,12 +28,6 @@ u64 module_emit_plt_entry(struct module *mod, Elf64_Shdr *sechdrs,
u64 module_emit_veneer_for_adrp(struct module *mod, Elf64_Shdr *sechdrs,
void *loc, u64 val);
-#ifdef CONFIG_RANDOMIZE_BASE
-extern u64 module_alloc_base;
-#else
-#define module_alloc_base ((u64)_etext - MODULES_VSIZE)
-#endif
-
struct plt_entry {
/*
* A program that conforms to the AArch64 Procedure Call Standard
@@ -54,17 +44,25 @@ struct plt_entry {
static inline bool is_forbidden_offset_for_adrp(void *place)
{
- return IS_ENABLED(CONFIG_ARM64_ERRATUM_843419) &&
- cpus_have_const_cap(ARM64_WORKAROUND_843419) &&
+ return cpus_have_final_cap(ARM64_WORKAROUND_843419) &&
((u64)place & 0xfff) >= 0xff8;
}
struct plt_entry get_plt_entry(u64 dst, void *pc);
-bool plt_entries_equal(const struct plt_entry *a, const struct plt_entry *b);
-static inline bool plt_entry_is_initialized(const struct plt_entry *e)
+static inline const Elf_Shdr *find_section(const Elf_Ehdr *hdr,
+ const Elf_Shdr *sechdrs,
+ const char *name)
{
- return e->adrp || e->add || e->br;
+ const Elf_Shdr *s, *se;
+ const char *secstrs = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
+
+ for (s = sechdrs, se = sechdrs + hdr->e_shnum; s < se; s++) {
+ if (strcmp(name, secstrs + s->sh_name) == 0)
+ return s;
+ }
+
+ return NULL;
}
#endif /* __ASM_MODULE_H */
diff --git a/arch/arm64/include/asm/module.lds.h b/arch/arm64/include/asm/module.lds.h
new file mode 100644
index 000000000000..b9ae8349e35d
--- /dev/null
+++ b/arch/arm64/include/asm/module.lds.h
@@ -0,0 +1,26 @@
+SECTIONS {
+ .plt 0 : { BYTE(0) }
+ .init.plt 0 : { BYTE(0) }
+ .text.ftrace_trampoline 0 : { BYTE(0) }
+
+#ifdef CONFIG_KASAN_SW_TAGS
+ /*
+ * Outlined checks go into comdat-deduplicated sections named .text.hot.
+ * Because they are in comdats they are not combined by the linker and
+ * we otherwise end up with multiple sections with the same .text.hot
+ * name in the .ko file. The kernel module loader warns if it sees
+ * multiple sections with the same name so we use this sections
+ * directive to force them into a single section and silence the
+ * warning.
+ */
+ .text.hot : { *(.text.hot) }
+#endif
+
+#ifdef CONFIG_UNWIND_TABLES
+ /*
+ * Currently, we only use unwind info at module load time, so we can
+ * put it into the .init allocation.
+ */
+ .init.eh_frame : { *(.eh_frame) }
+#endif
+}
diff --git a/arch/arm64/include/asm/mshyperv.h b/arch/arm64/include/asm/mshyperv.h
new file mode 100644
index 000000000000..20070a847304
--- /dev/null
+++ b/arch/arm64/include/asm/mshyperv.h
@@ -0,0 +1,54 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/*
+ * Linux-specific definitions for managing interactions with Microsoft's
+ * Hyper-V hypervisor. The definitions in this file are specific to
+ * the ARM64 architecture. See include/asm-generic/mshyperv.h for
+ * definitions are that architecture independent.
+ *
+ * Definitions that are specified in the Hyper-V Top Level Functional
+ * Spec (TLFS) should not go in this file, but should instead go in
+ * hyperv-tlfs.h.
+ *
+ * Copyright (C) 2021, Microsoft, Inc.
+ *
+ * Author : Michael Kelley <mikelley@microsoft.com>
+ */
+
+#ifndef _ASM_MSHYPERV_H
+#define _ASM_MSHYPERV_H
+
+#include <linux/types.h>
+#include <linux/arm-smccc.h>
+#include <asm/hyperv-tlfs.h>
+
+/*
+ * Declare calls to get and set Hyper-V VP register values on ARM64, which
+ * requires a hypercall.
+ */
+
+void hv_set_vpreg(u32 reg, u64 value);
+u64 hv_get_vpreg(u32 reg);
+void hv_get_vpreg_128(u32 reg, struct hv_get_vp_registers_output *result);
+
+static inline void hv_set_register(unsigned int reg, u64 value)
+{
+ hv_set_vpreg(reg, value);
+}
+
+static inline u64 hv_get_register(unsigned int reg)
+{
+ return hv_get_vpreg(reg);
+}
+
+/* SMCCC hypercall parameters */
+#define HV_SMCCC_FUNC_NUMBER 1
+#define HV_FUNC_ID ARM_SMCCC_CALL_VAL( \
+ ARM_SMCCC_STD_CALL, \
+ ARM_SMCCC_SMC_64, \
+ ARM_SMCCC_OWNER_VENDOR_HYP, \
+ HV_SMCCC_FUNC_NUMBER)
+
+#include <asm-generic/mshyperv.h>
+
+#endif
diff --git a/arch/arm64/include/asm/mte-def.h b/arch/arm64/include/asm/mte-def.h
new file mode 100644
index 000000000000..14ee86b019c2
--- /dev/null
+++ b/arch/arm64/include/asm/mte-def.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2020 ARM Ltd.
+ */
+#ifndef __ASM_MTE_DEF_H
+#define __ASM_MTE_DEF_H
+
+#define MTE_GRANULE_SIZE UL(16)
+#define MTE_GRANULE_MASK (~(MTE_GRANULE_SIZE - 1))
+#define MTE_GRANULES_PER_PAGE (PAGE_SIZE / MTE_GRANULE_SIZE)
+#define MTE_TAG_SHIFT 56
+#define MTE_TAG_SIZE 4
+#define MTE_TAG_MASK GENMASK((MTE_TAG_SHIFT + (MTE_TAG_SIZE - 1)), MTE_TAG_SHIFT)
+#define MTE_PAGE_TAG_STORAGE (MTE_GRANULES_PER_PAGE * MTE_TAG_SIZE / 8)
+
+#define __MTE_PREAMBLE ARM64_ASM_PREAMBLE ".arch_extension memtag\n"
+
+#endif /* __ASM_MTE_DEF_H */
diff --git a/arch/arm64/include/asm/mte-kasan.h b/arch/arm64/include/asm/mte-kasan.h
new file mode 100644
index 000000000000..2e98028c1965
--- /dev/null
+++ b/arch/arm64/include/asm/mte-kasan.h
@@ -0,0 +1,258 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2020 ARM Ltd.
+ */
+#ifndef __ASM_MTE_KASAN_H
+#define __ASM_MTE_KASAN_H
+
+#include <asm/compiler.h>
+#include <asm/cputype.h>
+#include <asm/mte-def.h>
+
+#ifndef __ASSEMBLY__
+
+#include <linux/types.h>
+
+#ifdef CONFIG_KASAN_HW_TAGS
+
+/* Whether the MTE asynchronous mode is enabled. */
+DECLARE_STATIC_KEY_FALSE(mte_async_or_asymm_mode);
+
+static inline bool system_uses_mte_async_or_asymm_mode(void)
+{
+ return static_branch_unlikely(&mte_async_or_asymm_mode);
+}
+
+#else /* CONFIG_KASAN_HW_TAGS */
+
+static inline bool system_uses_mte_async_or_asymm_mode(void)
+{
+ return false;
+}
+
+#endif /* CONFIG_KASAN_HW_TAGS */
+
+#ifdef CONFIG_ARM64_MTE
+
+/*
+ * The Tag Check Flag (TCF) mode for MTE is per EL, hence TCF0
+ * affects EL0 and TCF affects EL1 irrespective of which TTBR is
+ * used.
+ * The kernel accesses TTBR0 usually with LDTR/STTR instructions
+ * when UAO is available, so these would act as EL0 accesses using
+ * TCF0.
+ * However futex.h code uses exclusives which would be executed as
+ * EL1, this can potentially cause a tag check fault even if the
+ * user disables TCF0.
+ *
+ * To address the problem we set the PSTATE.TCO bit in uaccess_enable()
+ * and reset it in uaccess_disable().
+ *
+ * The Tag check override (TCO) bit disables temporarily the tag checking
+ * preventing the issue.
+ */
+static inline void mte_disable_tco(void)
+{
+ asm volatile(ALTERNATIVE("nop", SET_PSTATE_TCO(0),
+ ARM64_MTE, CONFIG_KASAN_HW_TAGS));
+}
+
+static inline void mte_enable_tco(void)
+{
+ asm volatile(ALTERNATIVE("nop", SET_PSTATE_TCO(1),
+ ARM64_MTE, CONFIG_KASAN_HW_TAGS));
+}
+
+/*
+ * These functions disable tag checking only if in MTE async mode
+ * since the sync mode generates exceptions synchronously and the
+ * nofault or load_unaligned_zeropad can handle them.
+ */
+static inline void __mte_disable_tco_async(void)
+{
+ if (system_uses_mte_async_or_asymm_mode())
+ mte_disable_tco();
+}
+
+static inline void __mte_enable_tco_async(void)
+{
+ if (system_uses_mte_async_or_asymm_mode())
+ mte_enable_tco();
+}
+
+/*
+ * These functions are meant to be only used from KASAN runtime through
+ * the arch_*() interface defined in asm/memory.h.
+ * These functions don't include system_supports_mte() checks,
+ * as KASAN only calls them when MTE is supported and enabled.
+ */
+
+static inline u8 mte_get_ptr_tag(void *ptr)
+{
+ /* Note: The format of KASAN tags is 0xF<x> */
+ u8 tag = 0xF0 | (u8)(((u64)(ptr)) >> MTE_TAG_SHIFT);
+
+ return tag;
+}
+
+/* Get allocation tag for the address. */
+static inline u8 mte_get_mem_tag(void *addr)
+{
+ asm(__MTE_PREAMBLE "ldg %0, [%0]"
+ : "+r" (addr));
+
+ return mte_get_ptr_tag(addr);
+}
+
+/* Generate a random tag. */
+static inline u8 mte_get_random_tag(void)
+{
+ void *addr;
+
+ asm(__MTE_PREAMBLE "irg %0, %0"
+ : "=r" (addr));
+
+ return mte_get_ptr_tag(addr);
+}
+
+static inline u64 __stg_post(u64 p)
+{
+ asm volatile(__MTE_PREAMBLE "stg %0, [%0], #16"
+ : "+r"(p)
+ :
+ : "memory");
+ return p;
+}
+
+static inline u64 __stzg_post(u64 p)
+{
+ asm volatile(__MTE_PREAMBLE "stzg %0, [%0], #16"
+ : "+r"(p)
+ :
+ : "memory");
+ return p;
+}
+
+static inline void __dc_gva(u64 p)
+{
+ asm volatile(__MTE_PREAMBLE "dc gva, %0" : : "r"(p) : "memory");
+}
+
+static inline void __dc_gzva(u64 p)
+{
+ asm volatile(__MTE_PREAMBLE "dc gzva, %0" : : "r"(p) : "memory");
+}
+
+/*
+ * Assign allocation tags for a region of memory based on the pointer tag.
+ * Note: The address must be non-NULL and MTE_GRANULE_SIZE aligned and
+ * size must be MTE_GRANULE_SIZE aligned.
+ */
+static inline void mte_set_mem_tag_range(void *addr, size_t size, u8 tag,
+ bool init)
+{
+ u64 curr, mask, dczid, dczid_bs, dczid_dzp, end1, end2, end3;
+
+ /* Read DC G(Z)VA block size from the system register. */
+ dczid = read_cpuid(DCZID_EL0);
+ dczid_bs = 4ul << (dczid & 0xf);
+ dczid_dzp = (dczid >> 4) & 1;
+
+ curr = (u64)__tag_set(addr, tag);
+ mask = dczid_bs - 1;
+ /* STG/STZG up to the end of the first block. */
+ end1 = curr | mask;
+ end3 = curr + size;
+ /* DC GVA / GZVA in [end1, end2) */
+ end2 = end3 & ~mask;
+
+ /*
+ * The following code uses STG on the first DC GVA block even if the
+ * start address is aligned - it appears to be faster than an alignment
+ * check + conditional branch. Also, if the range size is at least 2 DC
+ * GVA blocks, the first two loops can use post-condition to save one
+ * branch each.
+ */
+#define SET_MEMTAG_RANGE(stg_post, dc_gva) \
+ do { \
+ if (!dczid_dzp && size >= 2 * dczid_bs) {\
+ do { \
+ curr = stg_post(curr); \
+ } while (curr < end1); \
+ \
+ do { \
+ dc_gva(curr); \
+ curr += dczid_bs; \
+ } while (curr < end2); \
+ } \
+ \
+ while (curr < end3) \
+ curr = stg_post(curr); \
+ } while (0)
+
+ if (init)
+ SET_MEMTAG_RANGE(__stzg_post, __dc_gzva);
+ else
+ SET_MEMTAG_RANGE(__stg_post, __dc_gva);
+#undef SET_MEMTAG_RANGE
+}
+
+void mte_enable_kernel_sync(void);
+void mte_enable_kernel_async(void);
+void mte_enable_kernel_asymm(void);
+
+#else /* CONFIG_ARM64_MTE */
+
+static inline void mte_disable_tco(void)
+{
+}
+
+static inline void mte_enable_tco(void)
+{
+}
+
+static inline void __mte_disable_tco_async(void)
+{
+}
+
+static inline void __mte_enable_tco_async(void)
+{
+}
+
+static inline u8 mte_get_ptr_tag(void *ptr)
+{
+ return 0xFF;
+}
+
+static inline u8 mte_get_mem_tag(void *addr)
+{
+ return 0xFF;
+}
+
+static inline u8 mte_get_random_tag(void)
+{
+ return 0xFF;
+}
+
+static inline void mte_set_mem_tag_range(void *addr, size_t size,
+ u8 tag, bool init)
+{
+}
+
+static inline void mte_enable_kernel_sync(void)
+{
+}
+
+static inline void mte_enable_kernel_async(void)
+{
+}
+
+static inline void mte_enable_kernel_asymm(void)
+{
+}
+
+#endif /* CONFIG_ARM64_MTE */
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* __ASM_MTE_KASAN_H */
diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h
new file mode 100644
index 000000000000..91fbd5c8a391
--- /dev/null
+++ b/arch/arm64/include/asm/mte.h
@@ -0,0 +1,219 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2020 ARM Ltd.
+ */
+#ifndef __ASM_MTE_H
+#define __ASM_MTE_H
+
+#include <asm/compiler.h>
+#include <asm/mte-def.h>
+
+#ifndef __ASSEMBLY__
+
+#include <linux/bitfield.h>
+#include <linux/kasan-enabled.h>
+#include <linux/page-flags.h>
+#include <linux/sched.h>
+#include <linux/types.h>
+
+#include <asm/pgtable-types.h>
+
+void mte_clear_page_tags(void *addr);
+unsigned long mte_copy_tags_from_user(void *to, const void __user *from,
+ unsigned long n);
+unsigned long mte_copy_tags_to_user(void __user *to, void *from,
+ unsigned long n);
+int mte_save_tags(struct page *page);
+void mte_save_page_tags(const void *page_addr, void *tag_storage);
+void mte_restore_tags(swp_entry_t entry, struct page *page);
+void mte_restore_page_tags(void *page_addr, const void *tag_storage);
+void mte_invalidate_tags(int type, pgoff_t offset);
+void mte_invalidate_tags_area(int type);
+void *mte_allocate_tag_storage(void);
+void mte_free_tag_storage(char *storage);
+
+#ifdef CONFIG_ARM64_MTE
+
+/* track which pages have valid allocation tags */
+#define PG_mte_tagged PG_arch_2
+/* simple lock to avoid multiple threads tagging the same page */
+#define PG_mte_lock PG_arch_3
+
+static inline void set_page_mte_tagged(struct page *page)
+{
+ /*
+ * Ensure that the tags written prior to this function are visible
+ * before the page flags update.
+ */
+ smp_wmb();
+ set_bit(PG_mte_tagged, &page->flags);
+}
+
+static inline bool page_mte_tagged(struct page *page)
+{
+ bool ret = test_bit(PG_mte_tagged, &page->flags);
+
+ /*
+ * If the page is tagged, ensure ordering with a likely subsequent
+ * read of the tags.
+ */
+ if (ret)
+ smp_rmb();
+ return ret;
+}
+
+/*
+ * Lock the page for tagging and return 'true' if the page can be tagged,
+ * 'false' if already tagged. PG_mte_tagged is never cleared and therefore the
+ * locking only happens once for page initialisation.
+ *
+ * The page MTE lock state:
+ *
+ * Locked: PG_mte_lock && !PG_mte_tagged
+ * Unlocked: !PG_mte_lock || PG_mte_tagged
+ *
+ * Acquire semantics only if the page is tagged (returning 'false').
+ */
+static inline bool try_page_mte_tagging(struct page *page)
+{
+ if (!test_and_set_bit(PG_mte_lock, &page->flags))
+ return true;
+
+ /*
+ * The tags are either being initialised or may have been initialised
+ * already. Check if the PG_mte_tagged flag has been set or wait
+ * otherwise.
+ */
+ smp_cond_load_acquire(&page->flags, VAL & (1UL << PG_mte_tagged));
+
+ return false;
+}
+
+void mte_zero_clear_page_tags(void *addr);
+void mte_sync_tags(pte_t pte, unsigned int nr_pages);
+void mte_copy_page_tags(void *kto, const void *kfrom);
+void mte_thread_init_user(void);
+void mte_thread_switch(struct task_struct *next);
+void mte_cpu_setup(void);
+void mte_suspend_enter(void);
+void mte_suspend_exit(void);
+long set_mte_ctrl(struct task_struct *task, unsigned long arg);
+long get_mte_ctrl(struct task_struct *task);
+int mte_ptrace_copy_tags(struct task_struct *child, long request,
+ unsigned long addr, unsigned long data);
+size_t mte_probe_user_range(const char __user *uaddr, size_t size);
+
+#else /* CONFIG_ARM64_MTE */
+
+/* unused if !CONFIG_ARM64_MTE, silence the compiler */
+#define PG_mte_tagged 0
+
+static inline void set_page_mte_tagged(struct page *page)
+{
+}
+static inline bool page_mte_tagged(struct page *page)
+{
+ return false;
+}
+static inline bool try_page_mte_tagging(struct page *page)
+{
+ return false;
+}
+static inline void mte_zero_clear_page_tags(void *addr)
+{
+}
+static inline void mte_sync_tags(pte_t pte, unsigned int nr_pages)
+{
+}
+static inline void mte_copy_page_tags(void *kto, const void *kfrom)
+{
+}
+static inline void mte_thread_init_user(void)
+{
+}
+static inline void mte_thread_switch(struct task_struct *next)
+{
+}
+static inline void mte_suspend_enter(void)
+{
+}
+static inline void mte_suspend_exit(void)
+{
+}
+static inline long set_mte_ctrl(struct task_struct *task, unsigned long arg)
+{
+ return 0;
+}
+static inline long get_mte_ctrl(struct task_struct *task)
+{
+ return 0;
+}
+static inline int mte_ptrace_copy_tags(struct task_struct *child,
+ long request, unsigned long addr,
+ unsigned long data)
+{
+ return -EIO;
+}
+
+#endif /* CONFIG_ARM64_MTE */
+
+static inline void mte_disable_tco_entry(struct task_struct *task)
+{
+ if (!system_supports_mte())
+ return;
+
+ /*
+ * Re-enable tag checking (TCO set on exception entry). This is only
+ * necessary if MTE is enabled in either the kernel or the userspace
+ * task in synchronous or asymmetric mode (SCTLR_EL1.TCF0 bit 0 is set
+ * for both). With MTE disabled in the kernel and disabled or
+ * asynchronous in userspace, tag check faults (including in uaccesses)
+ * are not reported, therefore there is no need to re-enable checking.
+ * This is beneficial on microarchitectures where re-enabling TCO is
+ * expensive.
+ */
+ if (kasan_hw_tags_enabled() ||
+ (task->thread.sctlr_user & (1UL << SCTLR_EL1_TCF0_SHIFT)))
+ asm volatile(SET_PSTATE_TCO(0));
+}
+
+#ifdef CONFIG_KASAN_HW_TAGS
+void mte_check_tfsr_el1(void);
+
+static inline void mte_check_tfsr_entry(void)
+{
+ if (!system_supports_mte())
+ return;
+
+ mte_check_tfsr_el1();
+}
+
+static inline void mte_check_tfsr_exit(void)
+{
+ if (!system_supports_mte())
+ return;
+
+ /*
+ * The asynchronous faults are sync'ed automatically with
+ * TFSR_EL1 on kernel entry but for exit an explicit dsb()
+ * is required.
+ */
+ dsb(nsh);
+ isb();
+
+ mte_check_tfsr_el1();
+}
+#else
+static inline void mte_check_tfsr_el1(void)
+{
+}
+static inline void mte_check_tfsr_entry(void)
+{
+}
+static inline void mte_check_tfsr_exit(void)
+{
+}
+#endif /* CONFIG_KASAN_HW_TAGS */
+
+#endif /* __ASSEMBLY__ */
+#endif /* __ASM_MTE_H */
diff --git a/arch/arm64/include/asm/numa.h b/arch/arm64/include/asm/numa.h
index 626ad01e83bf..8c8cf4297cc3 100644
--- a/arch/arm64/include/asm/numa.h
+++ b/arch/arm64/include/asm/numa.h
@@ -3,49 +3,6 @@
#define __ASM_NUMA_H
#include <asm/topology.h>
-
-#ifdef CONFIG_NUMA
-
-#define NR_NODE_MEMBLKS (MAX_NUMNODES * 2)
-
-int __node_distance(int from, int to);
-#define node_distance(a, b) __node_distance(a, b)
-
-extern nodemask_t numa_nodes_parsed __initdata;
-
-extern bool numa_off;
-
-/* Mappings between node number and cpus on that node. */
-extern cpumask_var_t node_to_cpumask_map[MAX_NUMNODES];
-void numa_clear_node(unsigned int cpu);
-
-#ifdef CONFIG_DEBUG_PER_CPU_MAPS
-const struct cpumask *cpumask_of_node(int node);
-#else
-/* Returns a pointer to the cpumask of CPUs on Node 'node'. */
-static inline const struct cpumask *cpumask_of_node(int node)
-{
- return node_to_cpumask_map[node];
-}
-#endif
-
-void __init arm64_numa_init(void);
-int __init numa_add_memblk(int nodeid, u64 start, u64 end);
-void __init numa_set_distance(int from, int to, int distance);
-void __init numa_free_distance(void);
-void __init early_map_cpu_to_node(unsigned int cpu, int nid);
-void numa_store_cpu_info(unsigned int cpu);
-void numa_add_cpu(unsigned int cpu);
-void numa_remove_cpu(unsigned int cpu);
-
-#else /* CONFIG_NUMA */
-
-static inline void numa_store_cpu_info(unsigned int cpu) { }
-static inline void numa_add_cpu(unsigned int cpu) { }
-static inline void numa_remove_cpu(unsigned int cpu) { }
-static inline void arm64_numa_init(void) { }
-static inline void early_map_cpu_to_node(unsigned int cpu, int nid) { }
-
-#endif /* CONFIG_NUMA */
+#include <asm-generic/numa.h>
#endif /* __ASM_NUMA_H */
diff --git a/arch/arm64/include/asm/page-def.h b/arch/arm64/include/asm/page-def.h
index f99d48ecbeef..2403f7b4cdbf 100644
--- a/arch/arm64/include/asm/page-def.h
+++ b/arch/arm64/include/asm/page-def.h
@@ -11,13 +11,8 @@
#include <linux/const.h>
/* PAGE_SHIFT determines the page size */
-/* CONT_SHIFT determines the number of pages which can be tracked together */
#define PAGE_SHIFT CONFIG_ARM64_PAGE_SHIFT
-#define CONT_SHIFT CONFIG_ARM64_CONT_SHIFT
#define PAGE_SIZE (_AC(1, UL) << PAGE_SHIFT)
#define PAGE_MASK (~(PAGE_SIZE-1))
-#define CONT_SIZE (_AC(1, UL) << (CONT_SHIFT + PAGE_SHIFT))
-#define CONT_MASK (~(CONT_SIZE-1))
-
#endif /* __ASM_PAGE_DEF_H */
diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h
index d39ddb258a04..2312e6ee595f 100644
--- a/arch/arm64/include/asm/page.h
+++ b/arch/arm64/include/asm/page.h
@@ -13,28 +13,41 @@
#ifndef __ASSEMBLY__
#include <linux/personality.h> /* for READ_IMPLIES_EXEC */
+#include <linux/types.h> /* for gfp_t */
#include <asm/pgtable-types.h>
-extern void __cpu_clear_user_page(void *p, unsigned long user);
-extern void __cpu_copy_user_page(void *to, const void *from,
- unsigned long user);
+struct page;
+struct vm_area_struct;
+
extern void copy_page(void *to, const void *from);
extern void clear_page(void *to);
-#define clear_user_page(addr,vaddr,pg) __cpu_clear_user_page(addr, vaddr)
-#define copy_user_page(to,from,vaddr,pg) __cpu_copy_user_page(to, from, vaddr)
+void copy_user_highpage(struct page *to, struct page *from,
+ unsigned long vaddr, struct vm_area_struct *vma);
+#define __HAVE_ARCH_COPY_USER_HIGHPAGE
+
+void copy_highpage(struct page *to, struct page *from);
+#define __HAVE_ARCH_COPY_HIGHPAGE
+
+struct folio *vma_alloc_zeroed_movable_folio(struct vm_area_struct *vma,
+ unsigned long vaddr);
+#define vma_alloc_zeroed_movable_folio vma_alloc_zeroed_movable_folio
+
+void tag_clear_highpage(struct page *to);
+#define __HAVE_ARCH_TAG_CLEAR_HIGHPAGE
+
+#define clear_user_page(page, vaddr, pg) clear_page(page)
+#define copy_user_page(to, from, vaddr, pg) copy_page(to, from)
typedef struct page *pgtable_t;
-extern int pfn_valid(unsigned long);
+int pfn_is_map_memory(unsigned long pfn);
#include <asm/memory.h>
#endif /* !__ASSEMBLY__ */
-#define VM_DATA_DEFAULT_FLAGS \
- (((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0) | \
- VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
+#define VM_DATA_DEFAULT_FLAGS (VM_DATA_FLAGS_TSK_EXEC | VM_MTE_ALLOWED)
#include <asm-generic/getorder.h>
diff --git a/arch/arm64/include/asm/paravirt.h b/arch/arm64/include/asm/paravirt.h
index cf3a0fd7c1a7..9aa193e0e8f2 100644
--- a/arch/arm64/include/asm/paravirt.h
+++ b/arch/arm64/include/asm/paravirt.h
@@ -3,23 +3,19 @@
#define _ASM_ARM64_PARAVIRT_H
#ifdef CONFIG_PARAVIRT
+#include <linux/static_call_types.h>
+
struct static_key;
extern struct static_key paravirt_steal_enabled;
extern struct static_key paravirt_steal_rq_enabled;
-struct pv_time_ops {
- unsigned long long (*steal_clock)(int cpu);
-};
-
-struct paravirt_patch_template {
- struct pv_time_ops time;
-};
+u64 dummy_steal_clock(int cpu);
-extern struct paravirt_patch_template pv_ops;
+DECLARE_STATIC_CALL(pv_steal_clock, dummy_steal_clock);
static inline u64 paravirt_steal_clock(int cpu)
{
- return pv_ops.time.steal_clock(cpu);
+ return static_call(pv_steal_clock)(cpu);
}
int __init pv_time_init(void);
diff --git a/arch/arm64/include/asm/paravirt_api_clock.h b/arch/arm64/include/asm/paravirt_api_clock.h
new file mode 100644
index 000000000000..65ac7cee0dad
--- /dev/null
+++ b/arch/arm64/include/asm/paravirt_api_clock.h
@@ -0,0 +1 @@
+#include <asm/paravirt.h>
diff --git a/arch/arm64/include/asm/patching.h b/arch/arm64/include/asm/patching.h
new file mode 100644
index 000000000000..68908b82b168
--- /dev/null
+++ b/arch/arm64/include/asm/patching.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef __ASM_PATCHING_H
+#define __ASM_PATCHING_H
+
+#include <linux/types.h>
+
+int aarch64_insn_read(void *addr, u32 *insnp);
+int aarch64_insn_write(void *addr, u32 insn);
+
+int aarch64_insn_write_literal_u64(void *addr, u64 val);
+
+int aarch64_insn_patch_text_nosync(void *addr, u32 insn);
+int aarch64_insn_patch_text(void *addrs[], u32 insns[], int cnt);
+
+#endif /* __ASM_PATCHING_H */
diff --git a/arch/arm64/include/asm/pci.h b/arch/arm64/include/asm/pci.h
index 70b323cf8300..016eb6b46dc0 100644
--- a/arch/arm64/include/asm/pci.h
+++ b/arch/arm64/include/asm/pci.h
@@ -9,7 +9,6 @@
#include <asm/io.h>
#define PCIBIOS_MIN_IO 0x1000
-#define PCIBIOS_MIN_MEM 0
/*
* Set to 1 if the kernel should re-assign all PCI bus numbers
@@ -17,21 +16,9 @@
#define pcibios_assign_all_busses() \
(pci_has_flag(PCI_REASSIGN_ALL_BUS))
-#define ARCH_GENERIC_PCI_MMAP_RESOURCE 1
+#define arch_can_pci_mmap_wc() 1
-extern int isa_dma_bridge_buggy;
-
-#ifdef CONFIG_PCI
-static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel)
-{
- /* no legacy IRQ on arm64 */
- return -ENODEV;
-}
-
-static inline int pci_proc_domain(struct pci_bus *bus)
-{
- return 1;
-}
-#endif /* CONFIG_PCI */
+/* Generic PCI */
+#include <asm-generic/pci.h>
#endif /* __ASM_PCI_H */
diff --git a/arch/arm64/include/asm/percpu.h b/arch/arm64/include/asm/percpu.h
index 0b6409b89e5e..9abcc8ef3087 100644
--- a/arch/arm64/include/asm/percpu.h
+++ b/arch/arm64/include/asm/percpu.h
@@ -10,6 +10,7 @@
#include <asm/alternative.h>
#include <asm/cmpxchg.h>
#include <asm/stack_pointer.h>
+#include <asm/sysreg.h>
static inline void set_my_cpu_offset(unsigned long off)
{
@@ -19,7 +20,16 @@ static inline void set_my_cpu_offset(unsigned long off)
:: "r" (off) : "memory");
}
-static inline unsigned long __my_cpu_offset(void)
+static inline unsigned long __hyp_my_cpu_offset(void)
+{
+ /*
+ * Non-VHE hyp code runs with preemption disabled. No need to hazard
+ * the register access against barrier() as in __kern_my_cpu_offset.
+ */
+ return read_sysreg(tpidr_el2);
+}
+
+static inline unsigned long __kern_my_cpu_offset(void)
{
unsigned long off;
@@ -35,7 +45,12 @@ static inline unsigned long __my_cpu_offset(void)
return off;
}
-#define __my_cpu_offset __my_cpu_offset()
+
+#ifdef __KVM_NVHE_HYPERVISOR__
+#define __my_cpu_offset __hyp_my_cpu_offset()
+#else
+#define __my_cpu_offset __kern_my_cpu_offset()
+#endif
#define PERCPU_RW_OPS(sz) \
static inline unsigned long __percpu_read_##sz(void *ptr) \
@@ -125,17 +140,11 @@ PERCPU_RET_OP(add, add, ldadd)
* re-enabling preemption for preemptible kernels, but doing that in a way
* which builds inside a module would mean messing directly with the preempt
* count. If you do this, peterz and tglx will hunt you down.
+ *
+ * Not to mention it'll break the actual preemption model for missing a
+ * preemption point when TIF_NEED_RESCHED gets set while preemption is
+ * disabled.
*/
-#define this_cpu_cmpxchg_double_8(ptr1, ptr2, o1, o2, n1, n2) \
-({ \
- int __ret; \
- preempt_disable_notrace(); \
- __ret = cmpxchg_double_local( raw_cpu_ptr(&(ptr1)), \
- raw_cpu_ptr(&(ptr2)), \
- o1, o2, n1, n2); \
- preempt_enable_notrace(); \
- __ret; \
-})
#define _pcp_protect(op, pcp, ...) \
({ \
@@ -225,6 +234,38 @@ PERCPU_RET_OP(add, add, ldadd)
#define this_cpu_cmpxchg_8(pcp, o, n) \
_pcp_protect_return(cmpxchg_relaxed, pcp, o, n)
+#define this_cpu_cmpxchg64(pcp, o, n) this_cpu_cmpxchg_8(pcp, o, n)
+
+#define this_cpu_cmpxchg128(pcp, o, n) \
+({ \
+ typedef typeof(pcp) pcp_op_T__; \
+ u128 old__, new__, ret__; \
+ pcp_op_T__ *ptr__; \
+ old__ = o; \
+ new__ = n; \
+ preempt_disable_notrace(); \
+ ptr__ = raw_cpu_ptr(&(pcp)); \
+ ret__ = cmpxchg128_local((void *)ptr__, old__, new__); \
+ preempt_enable_notrace(); \
+ ret__; \
+})
+
+#ifdef __KVM_NVHE_HYPERVISOR__
+extern unsigned long __hyp_per_cpu_offset(unsigned int cpu);
+#define __per_cpu_offset
+#define per_cpu_offset(cpu) __hyp_per_cpu_offset((cpu))
+#endif
+
#include <asm-generic/percpu.h>
+/* Redefine macros for nVHE hyp under DEBUG_PREEMPT to avoid its dependencies. */
+#if defined(__KVM_NVHE_HYPERVISOR__) && defined(CONFIG_DEBUG_PREEMPT)
+#undef this_cpu_ptr
+#define this_cpu_ptr raw_cpu_ptr
+#undef __this_cpu_read
+#define __this_cpu_read raw_cpu_read
+#undef __this_cpu_write
+#define __this_cpu_write raw_cpu_write
+#endif
+
#endif /* __ASM_PERCPU_H */
diff --git a/arch/arm64/include/asm/perf_event.h b/arch/arm64/include/asm/perf_event.h
index 2bdbc79bbd01..eb7071c9eb34 100644
--- a/arch/arm64/include/asm/perf_event.h
+++ b/arch/arm64/include/asm/perf_event.h
@@ -9,205 +9,6 @@
#include <asm/stack_pointer.h>
#include <asm/ptrace.h>
-#define ARMV8_PMU_MAX_COUNTERS 32
-#define ARMV8_PMU_COUNTER_MASK (ARMV8_PMU_MAX_COUNTERS - 1)
-
-/*
- * Common architectural and microarchitectural event numbers.
- */
-#define ARMV8_PMUV3_PERFCTR_SW_INCR 0x00
-#define ARMV8_PMUV3_PERFCTR_L1I_CACHE_REFILL 0x01
-#define ARMV8_PMUV3_PERFCTR_L1I_TLB_REFILL 0x02
-#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL 0x03
-#define ARMV8_PMUV3_PERFCTR_L1D_CACHE 0x04
-#define ARMV8_PMUV3_PERFCTR_L1D_TLB_REFILL 0x05
-#define ARMV8_PMUV3_PERFCTR_LD_RETIRED 0x06
-#define ARMV8_PMUV3_PERFCTR_ST_RETIRED 0x07
-#define ARMV8_PMUV3_PERFCTR_INST_RETIRED 0x08
-#define ARMV8_PMUV3_PERFCTR_EXC_TAKEN 0x09
-#define ARMV8_PMUV3_PERFCTR_EXC_RETURN 0x0A
-#define ARMV8_PMUV3_PERFCTR_CID_WRITE_RETIRED 0x0B
-#define ARMV8_PMUV3_PERFCTR_PC_WRITE_RETIRED 0x0C
-#define ARMV8_PMUV3_PERFCTR_BR_IMMED_RETIRED 0x0D
-#define ARMV8_PMUV3_PERFCTR_BR_RETURN_RETIRED 0x0E
-#define ARMV8_PMUV3_PERFCTR_UNALIGNED_LDST_RETIRED 0x0F
-#define ARMV8_PMUV3_PERFCTR_BR_MIS_PRED 0x10
-#define ARMV8_PMUV3_PERFCTR_CPU_CYCLES 0x11
-#define ARMV8_PMUV3_PERFCTR_BR_PRED 0x12
-#define ARMV8_PMUV3_PERFCTR_MEM_ACCESS 0x13
-#define ARMV8_PMUV3_PERFCTR_L1I_CACHE 0x14
-#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_WB 0x15
-#define ARMV8_PMUV3_PERFCTR_L2D_CACHE 0x16
-#define ARMV8_PMUV3_PERFCTR_L2D_CACHE_REFILL 0x17
-#define ARMV8_PMUV3_PERFCTR_L2D_CACHE_WB 0x18
-#define ARMV8_PMUV3_PERFCTR_BUS_ACCESS 0x19
-#define ARMV8_PMUV3_PERFCTR_MEMORY_ERROR 0x1A
-#define ARMV8_PMUV3_PERFCTR_INST_SPEC 0x1B
-#define ARMV8_PMUV3_PERFCTR_TTBR_WRITE_RETIRED 0x1C
-#define ARMV8_PMUV3_PERFCTR_BUS_CYCLES 0x1D
-#define ARMV8_PMUV3_PERFCTR_CHAIN 0x1E
-#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_ALLOCATE 0x1F
-#define ARMV8_PMUV3_PERFCTR_L2D_CACHE_ALLOCATE 0x20
-#define ARMV8_PMUV3_PERFCTR_BR_RETIRED 0x21
-#define ARMV8_PMUV3_PERFCTR_BR_MIS_PRED_RETIRED 0x22
-#define ARMV8_PMUV3_PERFCTR_STALL_FRONTEND 0x23
-#define ARMV8_PMUV3_PERFCTR_STALL_BACKEND 0x24
-#define ARMV8_PMUV3_PERFCTR_L1D_TLB 0x25
-#define ARMV8_PMUV3_PERFCTR_L1I_TLB 0x26
-#define ARMV8_PMUV3_PERFCTR_L2I_CACHE 0x27
-#define ARMV8_PMUV3_PERFCTR_L2I_CACHE_REFILL 0x28
-#define ARMV8_PMUV3_PERFCTR_L3D_CACHE_ALLOCATE 0x29
-#define ARMV8_PMUV3_PERFCTR_L3D_CACHE_REFILL 0x2A
-#define ARMV8_PMUV3_PERFCTR_L3D_CACHE 0x2B
-#define ARMV8_PMUV3_PERFCTR_L3D_CACHE_WB 0x2C
-#define ARMV8_PMUV3_PERFCTR_L2D_TLB_REFILL 0x2D
-#define ARMV8_PMUV3_PERFCTR_L2I_TLB_REFILL 0x2E
-#define ARMV8_PMUV3_PERFCTR_L2D_TLB 0x2F
-#define ARMV8_PMUV3_PERFCTR_L2I_TLB 0x30
-#define ARMV8_PMUV3_PERFCTR_REMOTE_ACCESS 0x31
-#define ARMV8_PMUV3_PERFCTR_LL_CACHE 0x32
-#define ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS 0x33
-#define ARMV8_PMUV3_PERFCTR_DTLB_WALK 0x34
-#define ARMV8_PMUV3_PERFCTR_ITLB_WALK 0x35
-#define ARMV8_PMUV3_PERFCTR_LL_CACHE_RD 0x36
-#define ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS_RD 0x37
-#define ARMV8_PMUV3_PERFCTR_REMOTE_ACCESS_RD 0x38
-
-/* Statistical profiling extension microarchitectural events */
-#define ARMV8_SPE_PERFCTR_SAMPLE_POP 0x4000
-#define ARMV8_SPE_PERFCTR_SAMPLE_FEED 0x4001
-#define ARMV8_SPE_PERFCTR_SAMPLE_FILTRATE 0x4002
-#define ARMV8_SPE_PERFCTR_SAMPLE_COLLISION 0x4003
-
-/* ARMv8 recommended implementation defined event types */
-#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_RD 0x40
-#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR 0x41
-#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_RD 0x42
-#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_WR 0x43
-#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_INNER 0x44
-#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_OUTER 0x45
-#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WB_VICTIM 0x46
-#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WB_CLEAN 0x47
-#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_INVAL 0x48
-
-#define ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_RD 0x4C
-#define ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_WR 0x4D
-#define ARMV8_IMPDEF_PERFCTR_L1D_TLB_RD 0x4E
-#define ARMV8_IMPDEF_PERFCTR_L1D_TLB_WR 0x4F
-#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_RD 0x50
-#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_WR 0x51
-#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_REFILL_RD 0x52
-#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_REFILL_WR 0x53
-
-#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_WB_VICTIM 0x56
-#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_WB_CLEAN 0x57
-#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_INVAL 0x58
-
-#define ARMV8_IMPDEF_PERFCTR_L2D_TLB_REFILL_RD 0x5C
-#define ARMV8_IMPDEF_PERFCTR_L2D_TLB_REFILL_WR 0x5D
-#define ARMV8_IMPDEF_PERFCTR_L2D_TLB_RD 0x5E
-#define ARMV8_IMPDEF_PERFCTR_L2D_TLB_WR 0x5F
-#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD 0x60
-#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR 0x61
-#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_SHARED 0x62
-#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_NOT_SHARED 0x63
-#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_NORMAL 0x64
-#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_PERIPH 0x65
-#define ARMV8_IMPDEF_PERFCTR_MEM_ACCESS_RD 0x66
-#define ARMV8_IMPDEF_PERFCTR_MEM_ACCESS_WR 0x67
-#define ARMV8_IMPDEF_PERFCTR_UNALIGNED_LD_SPEC 0x68
-#define ARMV8_IMPDEF_PERFCTR_UNALIGNED_ST_SPEC 0x69
-#define ARMV8_IMPDEF_PERFCTR_UNALIGNED_LDST_SPEC 0x6A
-
-#define ARMV8_IMPDEF_PERFCTR_LDREX_SPEC 0x6C
-#define ARMV8_IMPDEF_PERFCTR_STREX_PASS_SPEC 0x6D
-#define ARMV8_IMPDEF_PERFCTR_STREX_FAIL_SPEC 0x6E
-#define ARMV8_IMPDEF_PERFCTR_STREX_SPEC 0x6F
-#define ARMV8_IMPDEF_PERFCTR_LD_SPEC 0x70
-#define ARMV8_IMPDEF_PERFCTR_ST_SPEC 0x71
-#define ARMV8_IMPDEF_PERFCTR_LDST_SPEC 0x72
-#define ARMV8_IMPDEF_PERFCTR_DP_SPEC 0x73
-#define ARMV8_IMPDEF_PERFCTR_ASE_SPEC 0x74
-#define ARMV8_IMPDEF_PERFCTR_VFP_SPEC 0x75
-#define ARMV8_IMPDEF_PERFCTR_PC_WRITE_SPEC 0x76
-#define ARMV8_IMPDEF_PERFCTR_CRYPTO_SPEC 0x77
-#define ARMV8_IMPDEF_PERFCTR_BR_IMMED_SPEC 0x78
-#define ARMV8_IMPDEF_PERFCTR_BR_RETURN_SPEC 0x79
-#define ARMV8_IMPDEF_PERFCTR_BR_INDIRECT_SPEC 0x7A
-
-#define ARMV8_IMPDEF_PERFCTR_ISB_SPEC 0x7C
-#define ARMV8_IMPDEF_PERFCTR_DSB_SPEC 0x7D
-#define ARMV8_IMPDEF_PERFCTR_DMB_SPEC 0x7E
-
-#define ARMV8_IMPDEF_PERFCTR_EXC_UNDEF 0x81
-#define ARMV8_IMPDEF_PERFCTR_EXC_SVC 0x82
-#define ARMV8_IMPDEF_PERFCTR_EXC_PABORT 0x83
-#define ARMV8_IMPDEF_PERFCTR_EXC_DABORT 0x84
-
-#define ARMV8_IMPDEF_PERFCTR_EXC_IRQ 0x86
-#define ARMV8_IMPDEF_PERFCTR_EXC_FIQ 0x87
-#define ARMV8_IMPDEF_PERFCTR_EXC_SMC 0x88
-
-#define ARMV8_IMPDEF_PERFCTR_EXC_HVC 0x8A
-#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_PABORT 0x8B
-#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_DABORT 0x8C
-#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_OTHER 0x8D
-#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_IRQ 0x8E
-#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_FIQ 0x8F
-#define ARMV8_IMPDEF_PERFCTR_RC_LD_SPEC 0x90
-#define ARMV8_IMPDEF_PERFCTR_RC_ST_SPEC 0x91
-
-#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_RD 0xA0
-#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_WR 0xA1
-#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_REFILL_RD 0xA2
-#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_REFILL_WR 0xA3
-
-#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_WB_VICTIM 0xA6
-#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_WB_CLEAN 0xA7
-#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_INVAL 0xA8
-
-/*
- * Per-CPU PMCR: config reg
- */
-#define ARMV8_PMU_PMCR_E (1 << 0) /* Enable all counters */
-#define ARMV8_PMU_PMCR_P (1 << 1) /* Reset all counters */
-#define ARMV8_PMU_PMCR_C (1 << 2) /* Cycle counter reset */
-#define ARMV8_PMU_PMCR_D (1 << 3) /* CCNT counts every 64th cpu cycle */
-#define ARMV8_PMU_PMCR_X (1 << 4) /* Export to ETM */
-#define ARMV8_PMU_PMCR_DP (1 << 5) /* Disable CCNT if non-invasive debug*/
-#define ARMV8_PMU_PMCR_LC (1 << 6) /* Overflow on 64 bit cycle counter */
-#define ARMV8_PMU_PMCR_N_SHIFT 11 /* Number of counters supported */
-#define ARMV8_PMU_PMCR_N_MASK 0x1f
-#define ARMV8_PMU_PMCR_MASK 0x7f /* Mask for writable bits */
-
-/*
- * PMOVSR: counters overflow flag status reg
- */
-#define ARMV8_PMU_OVSR_MASK 0xffffffff /* Mask for writable bits */
-#define ARMV8_PMU_OVERFLOWED_MASK ARMV8_PMU_OVSR_MASK
-
-/*
- * PMXEVTYPER: Event selection reg
- */
-#define ARMV8_PMU_EVTYPE_MASK 0xc800ffff /* Mask for writable bits */
-#define ARMV8_PMU_EVTYPE_EVENT 0xffff /* Mask for EVENT bits */
-
-/*
- * Event filters for PMUv3
- */
-#define ARMV8_PMU_EXCLUDE_EL1 (1U << 31)
-#define ARMV8_PMU_EXCLUDE_EL0 (1U << 30)
-#define ARMV8_PMU_INCLUDE_EL2 (1U << 27)
-
-/*
- * PMUSERENR: user enable reg
- */
-#define ARMV8_PMU_USERENR_MASK 0xf /* Mask for writable bits */
-#define ARMV8_PMU_USERENR_EN (1 << 0) /* PMU regs can be accessed at EL0 */
-#define ARMV8_PMU_USERENR_SW (1 << 1) /* PMSWINC can be written at EL0 */
-#define ARMV8_PMU_USERENR_CR (1 << 2) /* Cycle counter can be read at EL0 */
-#define ARMV8_PMU_USERENR_ER (1 << 3) /* Event counter can be read at EL0 */
-
#ifdef CONFIG_PERF_EVENTS
struct pt_regs;
extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h
index 172d76fa0245..237224484d0f 100644
--- a/arch/arm64/include/asm/pgalloc.h
+++ b/arch/arm64/include/asm/pgalloc.h
@@ -13,37 +13,13 @@
#include <asm/cacheflush.h>
#include <asm/tlbflush.h>
-#include <asm-generic/pgalloc.h> /* for pte_{alloc,free}_one */
+#define __HAVE_ARCH_PGD_FREE
+#include <asm-generic/pgalloc.h>
#define PGD_SIZE (PTRS_PER_PGD * sizeof(pgd_t))
#if CONFIG_PGTABLE_LEVELS > 2
-static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
-{
- gfp_t gfp = GFP_PGTABLE_USER;
- struct page *page;
-
- if (mm == &init_mm)
- gfp = GFP_PGTABLE_KERNEL;
-
- page = alloc_page(gfp);
- if (!page)
- return NULL;
- if (!pgtable_pmd_page_ctor(page)) {
- __free_page(page);
- return NULL;
- }
- return page_address(page);
-}
-
-static inline void pmd_free(struct mm_struct *mm, pmd_t *pmdp)
-{
- BUG_ON((unsigned long)pmdp & (PAGE_SIZE-1));
- pgtable_pmd_page_dtor(virt_to_page(pmdp));
- free_page((unsigned long)pmdp);
-}
-
static inline void __pud_populate(pud_t *pudp, phys_addr_t pmdp, pudval_t prot)
{
set_pud(pudp, __pud(__phys_to_pud_val(pmdp) | prot));
@@ -51,7 +27,10 @@ static inline void __pud_populate(pud_t *pudp, phys_addr_t pmdp, pudval_t prot)
static inline void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmdp)
{
- __pud_populate(pudp, __pa(pmdp), PMD_TYPE_TABLE);
+ pudval_t pudval = PUD_TYPE_TABLE;
+
+ pudval |= (mm == &init_mm) ? PUD_TABLE_UXN : PUD_TABLE_PXN;
+ __pud_populate(pudp, __pa(pmdp), pudval);
}
#else
static inline void __pud_populate(pud_t *pudp, phys_addr_t pmdp, pudval_t prot)
@@ -62,28 +41,20 @@ static inline void __pud_populate(pud_t *pudp, phys_addr_t pmdp, pudval_t prot)
#if CONFIG_PGTABLE_LEVELS > 3
-static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
-{
- return (pud_t *)__get_free_page(GFP_PGTABLE_USER);
-}
-
-static inline void pud_free(struct mm_struct *mm, pud_t *pudp)
+static inline void __p4d_populate(p4d_t *p4dp, phys_addr_t pudp, p4dval_t prot)
{
- BUG_ON((unsigned long)pudp & (PAGE_SIZE-1));
- free_page((unsigned long)pudp);
+ set_p4d(p4dp, __p4d(__phys_to_p4d_val(pudp) | prot));
}
-static inline void __pgd_populate(pgd_t *pgdp, phys_addr_t pudp, pgdval_t prot)
+static inline void p4d_populate(struct mm_struct *mm, p4d_t *p4dp, pud_t *pudp)
{
- set_pgd(pgdp, __pgd(__phys_to_pgd_val(pudp) | prot));
-}
+ p4dval_t p4dval = P4D_TYPE_TABLE;
-static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgdp, pud_t *pudp)
-{
- __pgd_populate(pgdp, __pa(pudp), PUD_TYPE_TABLE);
+ p4dval |= (mm == &init_mm) ? P4D_TABLE_UXN : P4D_TABLE_PXN;
+ __p4d_populate(p4dp, __pa(pudp), p4dval);
}
#else
-static inline void __pgd_populate(pgd_t *pgdp, phys_addr_t pudp, pgdval_t prot)
+static inline void __p4d_populate(p4d_t *p4dp, phys_addr_t pudp, p4dval_t prot)
{
BUILD_BUG();
}
@@ -105,17 +76,15 @@ static inline void __pmd_populate(pmd_t *pmdp, phys_addr_t ptep,
static inline void
pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmdp, pte_t *ptep)
{
- /*
- * The pmd must be loaded with the physical address of the PTE table
- */
- __pmd_populate(pmdp, __pa(ptep), PMD_TYPE_TABLE);
+ VM_BUG_ON(mm && mm != &init_mm);
+ __pmd_populate(pmdp, __pa(ptep), PMD_TYPE_TABLE | PMD_TABLE_UXN);
}
static inline void
pmd_populate(struct mm_struct *mm, pmd_t *pmdp, pgtable_t ptep)
{
- __pmd_populate(pmdp, page_to_phys(ptep), PMD_TYPE_TABLE);
+ VM_BUG_ON(mm == &init_mm);
+ __pmd_populate(pmdp, page_to_phys(ptep), PMD_TYPE_TABLE | PMD_TABLE_PXN);
}
-#define pmd_pgtable(pmd) pmd_page(pmd)
#endif
diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h
index d9fbd433cc17..e4944d517c99 100644
--- a/arch/arm64/include/asm/pgtable-hwdef.h
+++ b/arch/arm64/include/asm/pgtable-hwdef.h
@@ -29,7 +29,7 @@
* Size mapped by an entry at level n ( 0 <= n <= 3)
* We map (PAGE_SHIFT - 3) at all translation levels and PAGE_SHIFT bits
* in the final page. The maximum number of translation levels supported by
- * the architecture is 4. Hence, starting at at level n, we have further
+ * the architecture is 4. Hence, starting at level n, we have further
* ((4 - n) - 1) levels of translation excluding the offset within the page.
* So, the total number of bits mapped by an entry at level n is :
*
@@ -49,7 +49,7 @@
#define PMD_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(2)
#define PMD_SIZE (_AC(1, UL) << PMD_SHIFT)
#define PMD_MASK (~(PMD_SIZE-1))
-#define PTRS_PER_PMD PTRS_PER_PTE
+#define PTRS_PER_PMD (1 << (PAGE_SHIFT - 3))
#endif
/*
@@ -59,7 +59,7 @@
#define PUD_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(1)
#define PUD_SIZE (_AC(1, UL) << PUD_SHIFT)
#define PUD_MASK (~(PUD_SIZE-1))
-#define PTRS_PER_PUD PTRS_PER_PTE
+#define PTRS_PER_PUD (1 << (PAGE_SHIFT - 3))
#endif
/*
@@ -72,44 +72,41 @@
#define PTRS_PER_PGD (1 << (VA_BITS - PGDIR_SHIFT))
/*
- * Section address mask and size definitions.
- */
-#define SECTION_SHIFT PMD_SHIFT
-#define SECTION_SIZE (_AC(1, UL) << SECTION_SHIFT)
-#define SECTION_MASK (~(SECTION_SIZE-1))
-
-/*
* Contiguous page definitions.
*/
-#ifdef CONFIG_ARM64_64K_PAGES
-#define CONT_PTE_SHIFT 5
-#define CONT_PMD_SHIFT 5
-#elif defined(CONFIG_ARM64_16K_PAGES)
-#define CONT_PTE_SHIFT 7
-#define CONT_PMD_SHIFT 5
-#else
-#define CONT_PTE_SHIFT 4
-#define CONT_PMD_SHIFT 4
-#endif
-
-#define CONT_PTES (1 << CONT_PTE_SHIFT)
+#define CONT_PTE_SHIFT (CONFIG_ARM64_CONT_PTE_SHIFT + PAGE_SHIFT)
+#define CONT_PTES (1 << (CONT_PTE_SHIFT - PAGE_SHIFT))
#define CONT_PTE_SIZE (CONT_PTES * PAGE_SIZE)
#define CONT_PTE_MASK (~(CONT_PTE_SIZE - 1))
-#define CONT_PMDS (1 << CONT_PMD_SHIFT)
+
+#define CONT_PMD_SHIFT (CONFIG_ARM64_CONT_PMD_SHIFT + PMD_SHIFT)
+#define CONT_PMDS (1 << (CONT_PMD_SHIFT - PMD_SHIFT))
#define CONT_PMD_SIZE (CONT_PMDS * PMD_SIZE)
#define CONT_PMD_MASK (~(CONT_PMD_SIZE - 1))
-/* the the numerical offset of the PTE within a range of CONT_PTES */
-#define CONT_RANGE_OFFSET(addr) (((addr)>>PAGE_SHIFT)&(CONT_PTES-1))
/*
* Hardware page table definitions.
*
+ * Level 0 descriptor (P4D).
+ */
+#define P4D_TYPE_TABLE (_AT(p4dval_t, 3) << 0)
+#define P4D_TABLE_BIT (_AT(p4dval_t, 1) << 1)
+#define P4D_TYPE_MASK (_AT(p4dval_t, 3) << 0)
+#define P4D_TYPE_SECT (_AT(p4dval_t, 1) << 0)
+#define P4D_SECT_RDONLY (_AT(p4dval_t, 1) << 7) /* AP[2] */
+#define P4D_TABLE_PXN (_AT(p4dval_t, 1) << 59)
+#define P4D_TABLE_UXN (_AT(p4dval_t, 1) << 60)
+
+/*
* Level 1 descriptor (PUD).
*/
#define PUD_TYPE_TABLE (_AT(pudval_t, 3) << 0)
#define PUD_TABLE_BIT (_AT(pudval_t, 1) << 1)
#define PUD_TYPE_MASK (_AT(pudval_t, 3) << 0)
#define PUD_TYPE_SECT (_AT(pudval_t, 1) << 0)
+#define PUD_SECT_RDONLY (_AT(pudval_t, 1) << 7) /* AP[2] */
+#define PUD_TABLE_PXN (_AT(pudval_t, 1) << 59)
+#define PUD_TABLE_UXN (_AT(pudval_t, 1) << 60)
/*
* Level 2 descriptor (PMD).
@@ -131,6 +128,8 @@
#define PMD_SECT_CONT (_AT(pmdval_t, 1) << 52)
#define PMD_SECT_PXN (_AT(pmdval_t, 1) << 53)
#define PMD_SECT_UXN (_AT(pmdval_t, 1) << 54)
+#define PMD_TABLE_PXN (_AT(pmdval_t, 1) << 59)
+#define PMD_TABLE_UXN (_AT(pmdval_t, 1) << 60)
/*
* AttrIndx[2:0] encoding (mapping attributes defined in the MAIR* registers).
@@ -150,16 +149,17 @@
#define PTE_SHARED (_AT(pteval_t, 3) << 8) /* SH[1:0], inner shareable */
#define PTE_AF (_AT(pteval_t, 1) << 10) /* Access Flag */
#define PTE_NG (_AT(pteval_t, 1) << 11) /* nG */
+#define PTE_GP (_AT(pteval_t, 1) << 50) /* BTI guarded */
#define PTE_DBM (_AT(pteval_t, 1) << 51) /* Dirty Bit Management */
#define PTE_CONT (_AT(pteval_t, 1) << 52) /* Contiguous range */
#define PTE_PXN (_AT(pteval_t, 1) << 53) /* Privileged XN */
#define PTE_UXN (_AT(pteval_t, 1) << 54) /* User XN */
-#define PTE_HYP_XN (_AT(pteval_t, 1) << 54) /* HYP XN */
#define PTE_ADDR_LOW (((_AT(pteval_t, 1) << (48 - PAGE_SHIFT)) - 1) << PAGE_SHIFT)
#ifdef CONFIG_ARM64_PA_BITS_52
#define PTE_ADDR_HIGH (_AT(pteval_t, 0xf) << 12)
#define PTE_ADDR_MASK (PTE_ADDR_LOW | PTE_ADDR_HIGH)
+#define PTE_ADDR_HIGH_SHIFT 36
#else
#define PTE_ADDR_MASK PTE_ADDR_LOW
#endif
@@ -171,31 +171,17 @@
#define PTE_ATTRINDX_MASK (_AT(pteval_t, 7) << 2)
/*
- * 2nd stage PTE definitions
+ * PIIndex[3:0] encoding (Permission Indirection Extension)
*/
-#define PTE_S2_RDONLY (_AT(pteval_t, 1) << 6) /* HAP[2:1] */
-#define PTE_S2_RDWR (_AT(pteval_t, 3) << 6) /* HAP[2:1] */
-#define PTE_S2_XN (_AT(pteval_t, 2) << 53) /* XN[1:0] */
-
-#define PMD_S2_RDONLY (_AT(pmdval_t, 1) << 6) /* HAP[2:1] */
-#define PMD_S2_RDWR (_AT(pmdval_t, 3) << 6) /* HAP[2:1] */
-#define PMD_S2_XN (_AT(pmdval_t, 2) << 53) /* XN[1:0] */
-
-#define PUD_S2_RDONLY (_AT(pudval_t, 1) << 6) /* HAP[2:1] */
-#define PUD_S2_RDWR (_AT(pudval_t, 3) << 6) /* HAP[2:1] */
-#define PUD_S2_XN (_AT(pudval_t, 2) << 53) /* XN[1:0] */
+#define PTE_PI_IDX_0 6 /* AP[1], USER */
+#define PTE_PI_IDX_1 51 /* DBM */
+#define PTE_PI_IDX_2 53 /* PXN */
+#define PTE_PI_IDX_3 54 /* UXN */
/*
* Memory Attribute override for Stage-2 (MemAttr[3:0])
*/
#define PTE_S2_MEMATTR(t) (_AT(pteval_t, (t)) << 2)
-#define PTE_S2_MEMATTR_MASK (_AT(pteval_t, 0xf) << 2)
-
-/*
- * EL2/HYP PTE/PMD definitions
- */
-#define PMD_HYP PMD_SECT_USER
-#define PTE_HYP PTE_USER
/*
* Highest possible physical address supported.
@@ -215,6 +201,7 @@
#define TCR_TxSZ(x) (TCR_T0SZ(x) | TCR_T1SZ(x))
#define TCR_TxSZ_WIDTH 6
#define TCR_T0SZ_MASK (((UL(1) << TCR_TxSZ_WIDTH) - 1) << TCR_T0SZ_OFFSET)
+#define TCR_T1SZ_MASK (((UL(1) << TCR_TxSZ_WIDTH) - 1) << TCR_T1SZ_OFFSET)
#define TCR_EPD0_SHIFT 7
#define TCR_EPD0_MASK (UL(1) << TCR_EPD0_SHIFT)
@@ -290,18 +277,22 @@
#define TCR_TBI1 (UL(1) << 38)
#define TCR_HA (UL(1) << 39)
#define TCR_HD (UL(1) << 40)
+#define TCR_TBID1 (UL(1) << 52)
#define TCR_NFD0 (UL(1) << 53)
#define TCR_NFD1 (UL(1) << 54)
+#define TCR_E0PD0 (UL(1) << 55)
+#define TCR_E0PD1 (UL(1) << 56)
+#define TCR_TCMA0 (UL(1) << 57)
+#define TCR_TCMA1 (UL(1) << 58)
/*
* TTBR.
*/
#ifdef CONFIG_ARM64_PA_BITS_52
/*
- * This should be GENMASK_ULL(47, 2).
* TTBR_ELx[1] is RES0 in this configuration.
*/
-#define TTBR_BADDR_MASK_52 (((UL(1) << 46) - 1) << 2)
+#define TTBR_BADDR_MASK_52 GENMASK_ULL(47, 2)
#endif
#ifdef CONFIG_ARM64_VA_BITS_52
diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h
index 8dc6c5cdabe6..483dbfa39c4c 100644
--- a/arch/arm64/include/asm/pgtable-prot.h
+++ b/arch/arm64/include/asm/pgtable-prot.h
@@ -14,97 +14,145 @@
* Software defined PTE bits definition.
*/
#define PTE_WRITE (PTE_DBM) /* same as DBM (51) */
+#define PTE_SWP_EXCLUSIVE (_AT(pteval_t, 1) << 2) /* only for swp ptes */
#define PTE_DIRTY (_AT(pteval_t, 1) << 55)
#define PTE_SPECIAL (_AT(pteval_t, 1) << 56)
#define PTE_DEVMAP (_AT(pteval_t, 1) << 57)
#define PTE_PROT_NONE (_AT(pteval_t, 1) << 58) /* only when !PTE_VALID */
-#ifndef __ASSEMBLY__
-
-#include <asm/pgtable-types.h>
+/*
+ * This bit indicates that the entry is present i.e. pmd_page()
+ * still points to a valid huge page in memory even if the pmd
+ * has been invalidated.
+ */
+#define PMD_PRESENT_INVALID (_AT(pteval_t, 1) << 59) /* only when !PMD_SECT_VALID */
#define _PROT_DEFAULT (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED)
#define _PROT_SECT_DEFAULT (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S)
-#define PTE_MAYBE_NG (arm64_kernel_use_ng_mappings() ? PTE_NG : 0)
-#define PMD_MAYBE_NG (arm64_kernel_use_ng_mappings() ? PMD_SECT_NG : 0)
-
#define PROT_DEFAULT (_PROT_DEFAULT | PTE_MAYBE_NG)
#define PROT_SECT_DEFAULT (_PROT_SECT_DEFAULT | PMD_MAYBE_NG)
#define PROT_DEVICE_nGnRnE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRnE))
#define PROT_DEVICE_nGnRE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRE))
#define PROT_NORMAL_NC (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_NC))
-#define PROT_NORMAL_WT (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_WT))
#define PROT_NORMAL (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL))
+#define PROT_NORMAL_TAGGED (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_TAGGED))
#define PROT_SECT_DEVICE_nGnRE (PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PMD_ATTRINDX(MT_DEVICE_nGnRE))
-#define PROT_SECT_NORMAL (PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PMD_ATTRINDX(MT_NORMAL))
+#define PROT_SECT_NORMAL (PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PTE_WRITE | PMD_ATTRINDX(MT_NORMAL))
#define PROT_SECT_NORMAL_EXEC (PROT_SECT_DEFAULT | PMD_SECT_UXN | PMD_ATTRINDX(MT_NORMAL))
#define _PAGE_DEFAULT (_PROT_DEFAULT | PTE_ATTRINDX(MT_NORMAL))
-#define _HYP_PAGE_DEFAULT _PAGE_DEFAULT
-#define PAGE_KERNEL __pgprot(PROT_NORMAL)
-#define PAGE_KERNEL_RO __pgprot((PROT_NORMAL & ~PTE_WRITE) | PTE_RDONLY)
-#define PAGE_KERNEL_ROX __pgprot((PROT_NORMAL & ~(PTE_WRITE | PTE_PXN)) | PTE_RDONLY)
-#define PAGE_KERNEL_EXEC __pgprot(PROT_NORMAL & ~PTE_PXN)
-#define PAGE_KERNEL_EXEC_CONT __pgprot((PROT_NORMAL & ~PTE_PXN) | PTE_CONT)
+#define _PAGE_KERNEL (PROT_NORMAL)
+#define _PAGE_KERNEL_RO ((PROT_NORMAL & ~PTE_WRITE) | PTE_RDONLY)
+#define _PAGE_KERNEL_ROX ((PROT_NORMAL & ~(PTE_WRITE | PTE_PXN)) | PTE_RDONLY)
+#define _PAGE_KERNEL_EXEC (PROT_NORMAL & ~PTE_PXN)
+#define _PAGE_KERNEL_EXEC_CONT ((PROT_NORMAL & ~PTE_PXN) | PTE_CONT)
-#define PAGE_HYP __pgprot(_HYP_PAGE_DEFAULT | PTE_HYP | PTE_HYP_XN)
-#define PAGE_HYP_EXEC __pgprot(_HYP_PAGE_DEFAULT | PTE_HYP | PTE_RDONLY)
-#define PAGE_HYP_RO __pgprot(_HYP_PAGE_DEFAULT | PTE_HYP | PTE_RDONLY | PTE_HYP_XN)
-#define PAGE_HYP_DEVICE __pgprot(PROT_DEVICE_nGnRE | PTE_HYP)
+#define _PAGE_SHARED (_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN | PTE_WRITE)
+#define _PAGE_SHARED_EXEC (_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_WRITE)
+#define _PAGE_READONLY (_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN)
+#define _PAGE_READONLY_EXEC (_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN)
+#define _PAGE_EXECONLY (_PAGE_DEFAULT | PTE_RDONLY | PTE_NG | PTE_PXN)
-#define PAGE_S2_MEMATTR(attr) \
+#ifdef __ASSEMBLY__
+#define PTE_MAYBE_NG 0
+#endif
+
+#ifndef __ASSEMBLY__
+
+#include <asm/cpufeature.h>
+#include <asm/pgtable-types.h>
+
+extern bool arm64_use_ng_mappings;
+
+#define PTE_MAYBE_NG (arm64_use_ng_mappings ? PTE_NG : 0)
+#define PMD_MAYBE_NG (arm64_use_ng_mappings ? PMD_SECT_NG : 0)
+
+#define lpa2_is_enabled() false
+
+/*
+ * If we have userspace only BTI we don't want to mark kernel pages
+ * guarded even if the system does support BTI.
+ */
+#define PTE_MAYBE_GP (system_supports_bti_kernel() ? PTE_GP : 0)
+
+#define PAGE_KERNEL __pgprot(_PAGE_KERNEL)
+#define PAGE_KERNEL_RO __pgprot(_PAGE_KERNEL_RO)
+#define PAGE_KERNEL_ROX __pgprot(_PAGE_KERNEL_ROX)
+#define PAGE_KERNEL_EXEC __pgprot(_PAGE_KERNEL_EXEC)
+#define PAGE_KERNEL_EXEC_CONT __pgprot(_PAGE_KERNEL_EXEC_CONT)
+
+#define PAGE_S2_MEMATTR(attr, has_fwb) \
({ \
u64 __val; \
- if (cpus_have_const_cap(ARM64_HAS_STAGE2_FWB)) \
+ if (has_fwb) \
__val = PTE_S2_MEMATTR(MT_S2_FWB_ ## attr); \
else \
__val = PTE_S2_MEMATTR(MT_S2_ ## attr); \
__val; \
})
-#define PAGE_S2_XN \
- ({ \
- u64 __val; \
- if (cpus_have_const_cap(ARM64_HAS_CACHE_DIC)) \
- __val = 0; \
- else \
- __val = PTE_S2_XN; \
- __val; \
- })
-
-#define PAGE_S2 __pgprot(_PROT_DEFAULT | PAGE_S2_MEMATTR(NORMAL) | PTE_S2_RDONLY | PAGE_S2_XN)
-#define PAGE_S2_DEVICE __pgprot(_PROT_DEFAULT | PAGE_S2_MEMATTR(DEVICE_nGnRE) | PTE_S2_RDONLY | PTE_S2_XN)
-
#define PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_VALID) | PTE_PROT_NONE | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN)
/* shared+writable pages are clean by default, hence PTE_RDONLY|PTE_WRITE */
-#define PAGE_SHARED __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN | PTE_WRITE)
-#define PAGE_SHARED_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_WRITE)
-#define PAGE_READONLY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN)
-#define PAGE_READONLY_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN)
-#define PAGE_EXECONLY __pgprot(_PAGE_DEFAULT | PTE_RDONLY | PTE_NG | PTE_PXN)
-
-#define __P000 PAGE_NONE
-#define __P001 PAGE_READONLY
-#define __P010 PAGE_READONLY
-#define __P011 PAGE_READONLY
-#define __P100 PAGE_EXECONLY
-#define __P101 PAGE_READONLY_EXEC
-#define __P110 PAGE_READONLY_EXEC
-#define __P111 PAGE_READONLY_EXEC
-
-#define __S000 PAGE_NONE
-#define __S001 PAGE_READONLY
-#define __S010 PAGE_SHARED
-#define __S011 PAGE_SHARED
-#define __S100 PAGE_EXECONLY
-#define __S101 PAGE_READONLY_EXEC
-#define __S110 PAGE_SHARED_EXEC
-#define __S111 PAGE_SHARED_EXEC
+#define PAGE_SHARED __pgprot(_PAGE_SHARED)
+#define PAGE_SHARED_EXEC __pgprot(_PAGE_SHARED_EXEC)
+#define PAGE_READONLY __pgprot(_PAGE_READONLY)
+#define PAGE_READONLY_EXEC __pgprot(_PAGE_READONLY_EXEC)
+#define PAGE_EXECONLY __pgprot(_PAGE_EXECONLY)
#endif /* __ASSEMBLY__ */
+#define pte_pi_index(pte) ( \
+ ((pte & BIT(PTE_PI_IDX_3)) >> (PTE_PI_IDX_3 - 3)) | \
+ ((pte & BIT(PTE_PI_IDX_2)) >> (PTE_PI_IDX_2 - 2)) | \
+ ((pte & BIT(PTE_PI_IDX_1)) >> (PTE_PI_IDX_1 - 1)) | \
+ ((pte & BIT(PTE_PI_IDX_0)) >> (PTE_PI_IDX_0 - 0)))
+
+/*
+ * Page types used via Permission Indirection Extension (PIE). PIE uses
+ * the USER, DBM, PXN and UXN bits to to generate an index which is used
+ * to look up the actual permission in PIR_ELx and PIRE0_EL1. We define
+ * combinations we use on non-PIE systems with the same encoding, for
+ * convenience these are listed here as comments as are the unallocated
+ * encodings.
+ */
+
+/* 0: PAGE_DEFAULT */
+/* 1: PTE_USER */
+/* 2: PTE_WRITE */
+/* 3: PTE_WRITE | PTE_USER */
+/* 4: PAGE_EXECONLY PTE_PXN */
+/* 5: PAGE_READONLY_EXEC PTE_PXN | PTE_USER */
+/* 6: PTE_PXN | PTE_WRITE */
+/* 7: PAGE_SHARED_EXEC PTE_PXN | PTE_WRITE | PTE_USER */
+/* 8: PAGE_KERNEL_ROX PTE_UXN */
+/* 9: PTE_UXN | PTE_USER */
+/* a: PAGE_KERNEL_EXEC PTE_UXN | PTE_WRITE */
+/* b: PTE_UXN | PTE_WRITE | PTE_USER */
+/* c: PAGE_KERNEL_RO PTE_UXN | PTE_PXN */
+/* d: PAGE_READONLY PTE_UXN | PTE_PXN | PTE_USER */
+/* e: PAGE_KERNEL PTE_UXN | PTE_PXN | PTE_WRITE */
+/* f: PAGE_SHARED PTE_UXN | PTE_PXN | PTE_WRITE | PTE_USER */
+
+#define PIE_E0 ( \
+ PIRx_ELx_PERM(pte_pi_index(_PAGE_EXECONLY), PIE_X_O) | \
+ PIRx_ELx_PERM(pte_pi_index(_PAGE_READONLY_EXEC), PIE_RX) | \
+ PIRx_ELx_PERM(pte_pi_index(_PAGE_SHARED_EXEC), PIE_RWX) | \
+ PIRx_ELx_PERM(pte_pi_index(_PAGE_READONLY), PIE_R) | \
+ PIRx_ELx_PERM(pte_pi_index(_PAGE_SHARED), PIE_RW))
+
+#define PIE_E1 ( \
+ PIRx_ELx_PERM(pte_pi_index(_PAGE_EXECONLY), PIE_NONE_O) | \
+ PIRx_ELx_PERM(pte_pi_index(_PAGE_READONLY_EXEC), PIE_R) | \
+ PIRx_ELx_PERM(pte_pi_index(_PAGE_SHARED_EXEC), PIE_RW) | \
+ PIRx_ELx_PERM(pte_pi_index(_PAGE_READONLY), PIE_R) | \
+ PIRx_ELx_PERM(pte_pi_index(_PAGE_SHARED), PIE_RW) | \
+ PIRx_ELx_PERM(pte_pi_index(_PAGE_KERNEL_ROX), PIE_RX) | \
+ PIRx_ELx_PERM(pte_pi_index(_PAGE_KERNEL_EXEC), PIE_RWX) | \
+ PIRx_ELx_PERM(pte_pi_index(_PAGE_KERNEL_RO), PIE_R) | \
+ PIRx_ELx_PERM(pte_pi_index(_PAGE_KERNEL), PIE_RW))
+
#endif /* __ASM_PGTABLE_PROT_H */
diff --git a/arch/arm64/include/asm/pgtable-types.h b/arch/arm64/include/asm/pgtable-types.h
index acb0751a6606..b8f158ae2527 100644
--- a/arch/arm64/include/asm/pgtable-types.h
+++ b/arch/arm64/include/asm/pgtable-types.h
@@ -14,6 +14,7 @@
typedef u64 pteval_t;
typedef u64 pmdval_t;
typedef u64 pudval_t;
+typedef u64 p4dval_t;
typedef u64 pgdval_t;
/*
@@ -44,13 +45,11 @@ typedef struct { pteval_t pgprot; } pgprot_t;
#define __pgprot(x) ((pgprot_t) { (x) } )
#if CONFIG_PGTABLE_LEVELS == 2
-#define __ARCH_USE_5LEVEL_HACK
#include <asm-generic/pgtable-nopmd.h>
#elif CONFIG_PGTABLE_LEVELS == 3
-#define __ARCH_USE_5LEVEL_HACK
#include <asm-generic/pgtable-nopud.h>
#elif CONFIG_PGTABLE_LEVELS == 4
-#include <asm-generic/5level-fixup.h>
+#include <asm-generic/pgtable-nop4d.h>
#endif
#endif /* __ASM_PGTABLE_TYPES_H */
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 5d15b4735a0e..79ce70fbb751 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -9,6 +9,7 @@
#include <asm/proc-fns.h>
#include <asm/memory.h>
+#include <asm/mte.h>
#include <asm/pgtable-hwdef.h>
#include <asm/pgtable-prot.h>
#include <asm/tlbflush.h>
@@ -21,9 +22,9 @@
* and fixed mappings
*/
#define VMALLOC_START (MODULES_END)
-#define VMALLOC_END (- PUD_SIZE - VMEMMAP_SIZE - SZ_64K)
+#define VMALLOC_END (VMEMMAP_START - SZ_256M)
-#define FIRST_USER_ADDRESS 0UL
+#define vmemmap ((struct page *)VMEMMAP_START - (memstart_addr >> PAGE_SHIFT))
#ifndef __ASSEMBLY__
@@ -32,13 +33,31 @@
#include <linux/mmdebug.h>
#include <linux/mm_types.h>
#include <linux/sched.h>
+#include <linux/page_table_check.h>
-extern struct page *vmemmap;
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#define __HAVE_ARCH_FLUSH_PMD_TLB_RANGE
+
+/* Set stride and tlb_level in flush_*_tlb_range */
+#define flush_pmd_tlb_range(vma, addr, end) \
+ __flush_tlb_range(vma, addr, end, PMD_SIZE, false, 2)
+#define flush_pud_tlb_range(vma, addr, end) \
+ __flush_tlb_range(vma, addr, end, PUD_SIZE, false, 1)
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
-extern void __pte_error(const char *file, int line, unsigned long val);
-extern void __pmd_error(const char *file, int line, unsigned long val);
-extern void __pud_error(const char *file, int line, unsigned long val);
-extern void __pgd_error(const char *file, int line, unsigned long val);
+static inline bool arch_thp_swp_supported(void)
+{
+ return !system_supports_mte();
+}
+#define arch_thp_swp_supported arch_thp_swp_supported
+
+/*
+ * Outside of a few very special situations (e.g. hibernation), we always
+ * use broadcast TLB invalidation instructions, therefore a spurious page
+ * fault on one CPU which has been handled concurrently by another CPU
+ * does not need to perform additional invalidation.
+ */
+#define flush_tlb_fix_spurious_fault(vma, address, ptep) do { } while (0)
/*
* ZERO_PAGE is a global shared page that is always zero: used
@@ -47,16 +66,23 @@ extern void __pgd_error(const char *file, int line, unsigned long val);
extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
#define ZERO_PAGE(vaddr) phys_to_page(__pa_symbol(empty_zero_page))
-#define pte_ERROR(pte) __pte_error(__FILE__, __LINE__, pte_val(pte))
+#define pte_ERROR(e) \
+ pr_err("%s:%d: bad pte %016llx.\n", __FILE__, __LINE__, pte_val(e))
/*
* Macros to convert between a physical address and its placement in a
* page table entry, taking care of 52-bit addresses.
*/
#ifdef CONFIG_ARM64_PA_BITS_52
-#define __pte_to_phys(pte) \
- ((pte_val(pte) & PTE_ADDR_LOW) | ((pte_val(pte) & PTE_ADDR_HIGH) << 36))
-#define __phys_to_pte_val(phys) (((phys) | ((phys) >> 36)) & PTE_ADDR_MASK)
+static inline phys_addr_t __pte_to_phys(pte_t pte)
+{
+ return (pte_val(pte) & PTE_ADDR_LOW) |
+ ((pte_val(pte) & PTE_ADDR_HIGH) << PTE_ADDR_HIGH_SHIFT);
+}
+static inline pteval_t __phys_to_pte_val(phys_addr_t phys)
+{
+ return (phys | (phys >> PTE_ADDR_HIGH_SHIFT)) & PTE_ADDR_MASK;
+}
#else
#define __pte_to_phys(pte) (pte_val(pte) & PTE_ADDR_MASK)
#define __phys_to_pte_val(phys) (phys)
@@ -77,9 +103,13 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
#define pte_young(pte) (!!(pte_val(pte) & PTE_AF))
#define pte_special(pte) (!!(pte_val(pte) & PTE_SPECIAL))
#define pte_write(pte) (!!(pte_val(pte) & PTE_WRITE))
+#define pte_rdonly(pte) (!!(pte_val(pte) & PTE_RDONLY))
+#define pte_user(pte) (!!(pte_val(pte) & PTE_USER))
#define pte_user_exec(pte) (!(pte_val(pte) & PTE_UXN))
#define pte_cont(pte) (!!(pte_val(pte) & PTE_CONT))
#define pte_devmap(pte) (!!(pte_val(pte) & PTE_DEVMAP))
+#define pte_tagged(pte) ((pte_val(pte) & PTE_ATTRINDX_MASK) == \
+ PTE_ATTRINDX(MT_NORMAL_TAGGED))
#define pte_cont_addr_end(addr, end) \
({ unsigned long __boundary = ((addr) + CONT_PTE_SIZE) & CONT_PTE_MASK; \
@@ -91,7 +121,7 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
(__boundary - 1 < (end) - 1) ? __boundary : (end); \
})
-#define pte_hw_dirty(pte) (pte_write(pte) && !(pte_val(pte) & PTE_RDONLY))
+#define pte_hw_dirty(pte) (pte_write(pte) && !pte_rdonly(pte))
#define pte_sw_dirty(pte) (!!(pte_val(pte) & PTE_DIRTY))
#define pte_dirty(pte) (pte_sw_dirty(pte) || pte_hw_dirty(pte))
@@ -102,26 +132,26 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
*/
#define pte_valid_not_user(pte) \
((pte_val(pte) & (PTE_VALID | PTE_USER | PTE_UXN)) == (PTE_VALID | PTE_UXN))
-#define pte_valid_young(pte) \
- ((pte_val(pte) & (PTE_VALID | PTE_AF)) == (PTE_VALID | PTE_AF))
-#define pte_valid_user(pte) \
- ((pte_val(pte) & (PTE_VALID | PTE_USER)) == (PTE_VALID | PTE_USER))
-
/*
* Could the pte be present in the TLB? We must check mm_tlb_flush_pending
* so that we don't erroneously return false for pages that have been
* remapped as PROT_NONE but are yet to be flushed from the TLB.
+ * Note that we can't make any assumptions based on the state of the access
+ * flag, since ptep_clear_flush_young() elides a DSB when invalidating the
+ * TLB.
*/
#define pte_accessible(mm, pte) \
- (mm_tlb_flush_pending(mm) ? pte_present(pte) : pte_valid_young(pte))
+ (mm_tlb_flush_pending(mm) ? pte_present(pte) : pte_valid(pte))
/*
- * p??_access_permitted() is true for valid user mappings (subject to the
- * write permission check) other than user execute-only which do not have the
- * PTE_USER bit set. PROT_NONE mappings do not have the PTE_VALID bit set.
+ * p??_access_permitted() is true for valid user mappings (PTE_USER
+ * bit set, subject to the write permission check). For execute-only
+ * mappings, like PROT_EXEC with EPAN (both PTE_USER and PTE_UXN bits
+ * not set) must return false. PROT_NONE mappings do not have the
+ * PTE_VALID bit set.
*/
#define pte_access_permitted(pte, write) \
- (pte_valid_user(pte) && (!(write) || pte_write(pte)))
+ (((pte_val(pte) & (PTE_VALID | PTE_USER)) == (PTE_VALID | PTE_USER)) && (!(write) || pte_write(pte)))
#define pmd_access_permitted(pmd, write) \
(pte_access_permitted(pmd_pte(pmd), (write)))
#define pud_access_permitted(pud, write) \
@@ -139,14 +169,19 @@ static inline pte_t set_pte_bit(pte_t pte, pgprot_t prot)
return pte;
}
-static inline pte_t pte_wrprotect(pte_t pte)
+static inline pmd_t clear_pmd_bit(pmd_t pmd, pgprot_t prot)
{
- pte = clear_pte_bit(pte, __pgprot(PTE_WRITE));
- pte = set_pte_bit(pte, __pgprot(PTE_RDONLY));
- return pte;
+ pmd_val(pmd) &= ~pgprot_val(prot);
+ return pmd;
}
-static inline pte_t pte_mkwrite(pte_t pte)
+static inline pmd_t set_pmd_bit(pmd_t pmd, pgprot_t prot)
+{
+ pmd_val(pmd) |= pgprot_val(prot);
+ return pmd;
+}
+
+static inline pte_t pte_mkwrite_novma(pte_t pte)
{
pte = set_pte_bit(pte, __pgprot(PTE_WRITE));
pte = clear_pte_bit(pte, __pgprot(PTE_RDONLY));
@@ -171,6 +206,20 @@ static inline pte_t pte_mkdirty(pte_t pte)
return pte;
}
+static inline pte_t pte_wrprotect(pte_t pte)
+{
+ /*
+ * If hardware-dirty (PTE_WRITE/DBM bit set and PTE_RDONLY
+ * clear), set the PTE_DIRTY bit.
+ */
+ if (pte_hw_dirty(pte))
+ pte = set_pte_bit(pte, __pgprot(PTE_DIRTY));
+
+ pte = clear_pte_bit(pte, __pgprot(PTE_WRITE));
+ pte = set_pte_bit(pte, __pgprot(PTE_RDONLY));
+ return pte;
+}
+
static inline pte_t pte_mkold(pte_t pte)
{
return clear_pte_bit(pte, __pgprot(PTE_AF));
@@ -227,6 +276,7 @@ static inline void set_pte(pte_t *ptep, pte_t pte)
}
extern void __sync_icache_dcache(pte_t pteval);
+bool pgattr_change_is_safe(u64 old, u64 new);
/*
* PTE bits configuration in the presence of hardware Dirty Bit Management
@@ -244,7 +294,7 @@ extern void __sync_icache_dcache(pte_t pteval);
* PTE_DIRTY || (PTE_WRITE && !PTE_RDONLY)
*/
-static inline void __check_racy_pte_update(struct mm_struct *mm, pte_t *ptep,
+static inline void __check_safe_pte_update(struct mm_struct *mm, pte_t *ptep,
pte_t pte)
{
pte_t old_pte;
@@ -270,18 +320,44 @@ static inline void __check_racy_pte_update(struct mm_struct *mm, pte_t *ptep,
VM_WARN_ONCE(pte_write(old_pte) && !pte_dirty(pte),
"%s: racy dirty state clearing: 0x%016llx -> 0x%016llx",
__func__, pte_val(old_pte), pte_val(pte));
+ VM_WARN_ONCE(!pgattr_change_is_safe(pte_val(old_pte), pte_val(pte)),
+ "%s: unsafe attribute change: 0x%016llx -> 0x%016llx",
+ __func__, pte_val(old_pte), pte_val(pte));
}
-static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
- pte_t *ptep, pte_t pte)
+static inline void __sync_cache_and_tags(pte_t pte, unsigned int nr_pages)
{
if (pte_present(pte) && pte_user_exec(pte) && !pte_special(pte))
__sync_icache_dcache(pte);
- __check_racy_pte_update(mm, ptep, pte);
+ /*
+ * If the PTE would provide user space access to the tags associated
+ * with it then ensure that the MTE tags are synchronised. Although
+ * pte_access_permitted() returns false for exec only mappings, they
+ * don't expose tags (instruction fetches don't check tags).
+ */
+ if (system_supports_mte() && pte_access_permitted(pte, false) &&
+ !pte_special(pte) && pte_tagged(pte))
+ mte_sync_tags(pte, nr_pages);
+}
- set_pte(ptep, pte);
+static inline void set_ptes(struct mm_struct *mm,
+ unsigned long __always_unused addr,
+ pte_t *ptep, pte_t pte, unsigned int nr)
+{
+ page_table_check_ptes_set(mm, ptep, pte, nr);
+ __sync_cache_and_tags(pte, nr);
+
+ for (;;) {
+ __check_safe_pte_update(mm, ptep, pte);
+ set_pte(ptep, pte);
+ if (--nr == 0)
+ break;
+ ptep++;
+ pte_val(pte) += PAGE_SIZE;
+ }
}
+#define set_ptes set_ptes
/*
* Huge pte definitions.
@@ -302,6 +378,11 @@ static inline pte_t pgd_pte(pgd_t pgd)
return __pte(pgd_val(pgd));
}
+static inline pte_t p4d_pte(p4d_t p4d)
+{
+ return __pte(p4d_val(p4d));
+}
+
static inline pte_t pud_pte(pud_t pud)
{
return __pte(pud_val(pud));
@@ -337,9 +418,34 @@ static inline pgprot_t mk_pmd_sect_prot(pgprot_t prot)
return __pgprot((pgprot_val(prot) & ~PMD_TABLE_BIT) | PMD_TYPE_SECT);
}
+static inline pte_t pte_swp_mkexclusive(pte_t pte)
+{
+ return set_pte_bit(pte, __pgprot(PTE_SWP_EXCLUSIVE));
+}
+
+static inline int pte_swp_exclusive(pte_t pte)
+{
+ return pte_val(pte) & PTE_SWP_EXCLUSIVE;
+}
+
+static inline pte_t pte_swp_clear_exclusive(pte_t pte)
+{
+ return clear_pte_bit(pte, __pgprot(PTE_SWP_EXCLUSIVE));
+}
+
+/*
+ * Select all bits except the pfn
+ */
+static inline pgprot_t pte_pgprot(pte_t pte)
+{
+ unsigned long pfn = pte_pfn(pte);
+
+ return __pgprot(pte_val(pfn_pte(pfn, __pgprot(0))) ^ pte_val(pte));
+}
+
#ifdef CONFIG_NUMA_BALANCING
/*
- * See the comment in include/asm-generic/pgtable.h
+ * See the comment in include/linux/pgtable.h
*/
static inline int pte_protnone(pte_t pte)
{
@@ -352,25 +458,44 @@ static inline int pmd_protnone(pmd_t pmd)
}
#endif
+#define pmd_present_invalid(pmd) (!!(pmd_val(pmd) & PMD_PRESENT_INVALID))
+
+static inline int pmd_present(pmd_t pmd)
+{
+ return pte_present(pmd_pte(pmd)) || pmd_present_invalid(pmd);
+}
+
/*
* THP definitions.
*/
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-#define pmd_trans_huge(pmd) (pmd_val(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT))
+static inline int pmd_trans_huge(pmd_t pmd)
+{
+ return pmd_val(pmd) && pmd_present(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT);
+}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
-#define pmd_present(pmd) pte_present(pmd_pte(pmd))
#define pmd_dirty(pmd) pte_dirty(pmd_pte(pmd))
#define pmd_young(pmd) pte_young(pmd_pte(pmd))
#define pmd_valid(pmd) pte_valid(pmd_pte(pmd))
+#define pmd_user(pmd) pte_user(pmd_pte(pmd))
+#define pmd_user_exec(pmd) pte_user_exec(pmd_pte(pmd))
+#define pmd_cont(pmd) pte_cont(pmd_pte(pmd))
#define pmd_wrprotect(pmd) pte_pmd(pte_wrprotect(pmd_pte(pmd)))
#define pmd_mkold(pmd) pte_pmd(pte_mkold(pmd_pte(pmd)))
-#define pmd_mkwrite(pmd) pte_pmd(pte_mkwrite(pmd_pte(pmd)))
+#define pmd_mkwrite_novma(pmd) pte_pmd(pte_mkwrite_novma(pmd_pte(pmd)))
#define pmd_mkclean(pmd) pte_pmd(pte_mkclean(pmd_pte(pmd)))
#define pmd_mkdirty(pmd) pte_pmd(pte_mkdirty(pmd_pte(pmd)))
#define pmd_mkyoung(pmd) pte_pmd(pte_mkyoung(pmd_pte(pmd)))
-#define pmd_mknotpresent(pmd) (__pmd(pmd_val(pmd) & ~PMD_SECT_VALID))
+
+static inline pmd_t pmd_mkinvalid(pmd_t pmd)
+{
+ pmd = set_pmd_bit(pmd, __pgprot(PMD_PRESENT_INVALID));
+ pmd = clear_pmd_bit(pmd, __pgprot(PMD_SECT_VALID));
+
+ return pmd;
+}
#define pmd_thp_or_huge(pmd) (pmd_huge(pmd) || pmd_trans_huge(pmd))
@@ -403,7 +528,33 @@ static inline pmd_t pmd_mkdevmap(pmd_t pmd)
#define pud_pfn(pud) ((__pud_to_phys(pud) & PUD_MASK) >> PAGE_SHIFT)
#define pfn_pud(pfn,prot) __pud(__phys_to_pud_val((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot))
-#define set_pmd_at(mm, addr, pmdp, pmd) set_pte_at(mm, addr, (pte_t *)pmdp, pmd_pte(pmd))
+static inline void __set_pte_at(struct mm_struct *mm,
+ unsigned long __always_unused addr,
+ pte_t *ptep, pte_t pte, unsigned int nr)
+{
+ __sync_cache_and_tags(pte, nr);
+ __check_safe_pte_update(mm, ptep, pte);
+ set_pte(ptep, pte);
+}
+
+static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
+ pmd_t *pmdp, pmd_t pmd)
+{
+ page_table_check_pmd_set(mm, pmdp, pmd);
+ return __set_pte_at(mm, addr, (pte_t *)pmdp, pmd_pte(pmd),
+ PMD_SIZE >> PAGE_SHIFT);
+}
+
+static inline void set_pud_at(struct mm_struct *mm, unsigned long addr,
+ pud_t *pudp, pud_t pud)
+{
+ page_table_check_pud_set(mm, pudp, pud);
+ return __set_pte_at(mm, addr, (pte_t *)pudp, pud_pte(pud),
+ PUD_SIZE >> PAGE_SHIFT);
+}
+
+#define __p4d_to_phys(p4d) __pte_to_phys(p4d_pte(p4d))
+#define __phys_to_p4d_val(phys) __phys_to_pte_val(phys)
#define __pgd_to_phys(pgd) __pte_to_phys(pgd_pte(pgd))
#define __phys_to_pgd_val(phys) __phys_to_pte_val(phys)
@@ -411,6 +562,9 @@ static inline pmd_t pmd_mkdevmap(pmd_t pmd)
#define __pgprot_modify(prot,mask,bits) \
__pgprot((pgprot_val(prot) & ~(mask)) | (bits))
+#define pgprot_nx(prot) \
+ __pgprot_modify(prot, PTE_MAYBE_GP, PTE_PXN)
+
/*
* Mark the prot value as uncacheable and unbufferable.
*/
@@ -420,6 +574,9 @@ static inline pmd_t pmd_mkdevmap(pmd_t pmd)
__pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_NORMAL_NC) | PTE_PXN | PTE_UXN)
#define pgprot_device(prot) \
__pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_DEVICE_nGnRE) | PTE_PXN | PTE_UXN)
+#define pgprot_tagged(prot) \
+ __pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_NORMAL_TAGGED))
+#define pgprot_mhp pgprot_tagged
/*
* DMA allocations for non-coherent devices use what the Arm architecture calls
* "Normal non-cacheable" memory, which permits speculation, unaligned accesses
@@ -439,12 +596,15 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
#define pmd_none(pmd) (!pmd_val(pmd))
-#define pmd_bad(pmd) (!(pmd_val(pmd) & PMD_TABLE_BIT))
-
#define pmd_table(pmd) ((pmd_val(pmd) & PMD_TYPE_MASK) == \
PMD_TYPE_TABLE)
#define pmd_sect(pmd) ((pmd_val(pmd) & PMD_TYPE_MASK) == \
PMD_TYPE_SECT)
+#define pmd_leaf(pmd) (pmd_present(pmd) && !pmd_table(pmd))
+#define pmd_bad(pmd) (!pmd_table(pmd))
+
+#define pmd_leaf_size(pmd) (pmd_cont(pmd) ? CONT_PMD_SIZE : PMD_SIZE)
+#define pte_leaf_size(pte) (pte_cont(pte) ? CONT_PTE_SIZE : PAGE_SIZE)
#if defined(CONFIG_ARM64_64K_PAGES) || CONFIG_PGTABLE_LEVELS < 3
static inline bool pud_sect(pud_t pud) { return false; }
@@ -461,6 +621,7 @@ extern pgd_t init_pg_end[];
extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
extern pgd_t idmap_pg_dir[PTRS_PER_PGD];
extern pgd_t tramp_pg_dir[PTRS_PER_PGD];
+extern pgd_t reserved_pg_dir[PTRS_PER_PGD];
extern void set_swapper_pgd(pgd_t *pgdp, pgd_t pgd);
@@ -497,21 +658,19 @@ static inline phys_addr_t pmd_page_paddr(pmd_t pmd)
return __pmd_to_phys(pmd);
}
-static inline void pte_unmap(pte_t *pte) { }
+static inline unsigned long pmd_page_vaddr(pmd_t pmd)
+{
+ return (unsigned long)__va(pmd_page_paddr(pmd));
+}
/* Find an entry in the third-level page table. */
-#define pte_index(addr) (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
-
#define pte_offset_phys(dir,addr) (pmd_page_paddr(READ_ONCE(*(dir))) + pte_index(addr) * sizeof(pte_t))
-#define pte_offset_kernel(dir,addr) ((pte_t *)__va(pte_offset_phys((dir), (addr))))
-
-#define pte_offset_map(dir,addr) pte_offset_kernel((dir), (addr))
#define pte_set_fixmap(addr) ((pte_t *)set_fixmap_offset(FIX_PTE, addr))
#define pte_set_fixmap_offset(pmd, addr) pte_set_fixmap(pte_offset_phys(pmd, addr))
#define pte_clear_fixmap() clear_fixmap(FIX_PTE)
-#define pmd_page(pmd) pfn_to_page(__phys_to_pfn(__pmd_to_phys(pmd)))
+#define pmd_page(pmd) phys_to_page(__pmd_to_phys(pmd))
/* use ONLY for statically allocated translation tables */
#define pte_offset_kimg(dir,addr) ((pte_t *)__phys_to_kimg(pte_offset_phys((dir), (addr))))
@@ -524,12 +683,16 @@ static inline void pte_unmap(pte_t *pte) { }
#if CONFIG_PGTABLE_LEVELS > 2
-#define pmd_ERROR(pmd) __pmd_error(__FILE__, __LINE__, pmd_val(pmd))
+#define pmd_ERROR(e) \
+ pr_err("%s:%d: bad pmd %016llx.\n", __FILE__, __LINE__, pmd_val(e))
#define pud_none(pud) (!pud_val(pud))
-#define pud_bad(pud) (!(pud_val(pud) & PUD_TABLE_BIT))
+#define pud_bad(pud) (!pud_table(pud))
#define pud_present(pud) pte_present(pud_pte(pud))
+#define pud_leaf(pud) (pud_present(pud) && !pud_table(pud))
#define pud_valid(pud) pte_valid(pud_pte(pud))
+#define pud_user(pud) pte_user(pud_pte(pud))
+#define pud_user_exec(pud) pte_user_exec(pud_pte(pud))
static inline void set_pud(pud_t *pudp, pud_t pud)
{
@@ -558,17 +721,19 @@ static inline phys_addr_t pud_page_paddr(pud_t pud)
return __pud_to_phys(pud);
}
-/* Find an entry in the second-level page table. */
-#define pmd_index(addr) (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1))
+static inline pmd_t *pud_pgtable(pud_t pud)
+{
+ return (pmd_t *)__va(pud_page_paddr(pud));
+}
+/* Find an entry in the second-level page table. */
#define pmd_offset_phys(dir, addr) (pud_page_paddr(READ_ONCE(*(dir))) + pmd_index(addr) * sizeof(pmd_t))
-#define pmd_offset(dir, addr) ((pmd_t *)__va(pmd_offset_phys((dir), (addr))))
#define pmd_set_fixmap(addr) ((pmd_t *)set_fixmap_offset(FIX_PMD, addr))
#define pmd_set_fixmap_offset(pud, addr) pmd_set_fixmap(pmd_offset_phys(pud, addr))
#define pmd_clear_fixmap() clear_fixmap(FIX_PMD)
-#define pud_page(pud) pfn_to_page(__phys_to_pfn(__pud_to_phys(pud)))
+#define pud_page(pud) phys_to_page(__pud_to_phys(pud))
/* use ONLY for statically allocated translation tables */
#define pmd_offset_kimg(dir,addr) ((pmd_t *)__phys_to_kimg(pmd_offset_phys((dir), (addr))))
@@ -576,6 +741,7 @@ static inline phys_addr_t pud_page_paddr(pud_t pud)
#else
#define pud_page_paddr(pud) ({ BUILD_BUG(); 0; })
+#define pud_user_exec(pud) pud_user(pud) /* Always 0 with folding */
/* Match pmd_offset folding in <asm/generic/pgtable-nopmd.h> */
#define pmd_set_fixmap(addr) NULL
@@ -588,51 +754,55 @@ static inline phys_addr_t pud_page_paddr(pud_t pud)
#if CONFIG_PGTABLE_LEVELS > 3
-#define pud_ERROR(pud) __pud_error(__FILE__, __LINE__, pud_val(pud))
+#define pud_ERROR(e) \
+ pr_err("%s:%d: bad pud %016llx.\n", __FILE__, __LINE__, pud_val(e))
-#define pgd_none(pgd) (!pgd_val(pgd))
-#define pgd_bad(pgd) (!(pgd_val(pgd) & 2))
-#define pgd_present(pgd) (pgd_val(pgd))
+#define p4d_none(p4d) (!p4d_val(p4d))
+#define p4d_bad(p4d) (!(p4d_val(p4d) & 2))
+#define p4d_present(p4d) (p4d_val(p4d))
-static inline void set_pgd(pgd_t *pgdp, pgd_t pgd)
+static inline void set_p4d(p4d_t *p4dp, p4d_t p4d)
{
- if (in_swapper_pgdir(pgdp)) {
- set_swapper_pgd(pgdp, pgd);
+ if (in_swapper_pgdir(p4dp)) {
+ set_swapper_pgd((pgd_t *)p4dp, __pgd(p4d_val(p4d)));
return;
}
- WRITE_ONCE(*pgdp, pgd);
+ WRITE_ONCE(*p4dp, p4d);
dsb(ishst);
isb();
}
-static inline void pgd_clear(pgd_t *pgdp)
+static inline void p4d_clear(p4d_t *p4dp)
{
- set_pgd(pgdp, __pgd(0));
+ set_p4d(p4dp, __p4d(0));
}
-static inline phys_addr_t pgd_page_paddr(pgd_t pgd)
+static inline phys_addr_t p4d_page_paddr(p4d_t p4d)
{
- return __pgd_to_phys(pgd);
+ return __p4d_to_phys(p4d);
}
-/* Find an entry in the frst-level page table. */
-#define pud_index(addr) (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))
+static inline pud_t *p4d_pgtable(p4d_t p4d)
+{
+ return (pud_t *)__va(p4d_page_paddr(p4d));
+}
-#define pud_offset_phys(dir, addr) (pgd_page_paddr(READ_ONCE(*(dir))) + pud_index(addr) * sizeof(pud_t))
-#define pud_offset(dir, addr) ((pud_t *)__va(pud_offset_phys((dir), (addr))))
+/* Find an entry in the first-level page table. */
+#define pud_offset_phys(dir, addr) (p4d_page_paddr(READ_ONCE(*(dir))) + pud_index(addr) * sizeof(pud_t))
#define pud_set_fixmap(addr) ((pud_t *)set_fixmap_offset(FIX_PUD, addr))
-#define pud_set_fixmap_offset(pgd, addr) pud_set_fixmap(pud_offset_phys(pgd, addr))
+#define pud_set_fixmap_offset(p4d, addr) pud_set_fixmap(pud_offset_phys(p4d, addr))
#define pud_clear_fixmap() clear_fixmap(FIX_PUD)
-#define pgd_page(pgd) pfn_to_page(__phys_to_pfn(__pgd_to_phys(pgd)))
+#define p4d_page(p4d) pfn_to_page(__phys_to_pfn(__p4d_to_phys(p4d)))
/* use ONLY for statically allocated translation tables */
#define pud_offset_kimg(dir,addr) ((pud_t *)__phys_to_kimg(pud_offset_phys((dir), (addr))))
#else
+#define p4d_page_paddr(p4d) ({ BUILD_BUG(); 0;})
#define pgd_page_paddr(pgd) ({ BUILD_BUG(); 0;})
/* Match pud_offset folding in <asm/generic/pgtable-nopud.h> */
@@ -644,29 +814,32 @@ static inline phys_addr_t pgd_page_paddr(pgd_t pgd)
#endif /* CONFIG_PGTABLE_LEVELS > 3 */
-#define pgd_ERROR(pgd) __pgd_error(__FILE__, __LINE__, pgd_val(pgd))
-
-/* to find an entry in a page-table-directory */
-#define pgd_index(addr) (((addr) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1))
-
-#define pgd_offset_raw(pgd, addr) ((pgd) + pgd_index(addr))
-
-#define pgd_offset(mm, addr) (pgd_offset_raw((mm)->pgd, (addr)))
-
-/* to find an entry in a kernel page-table-directory */
-#define pgd_offset_k(addr) pgd_offset(&init_mm, addr)
+#define pgd_ERROR(e) \
+ pr_err("%s:%d: bad pgd %016llx.\n", __FILE__, __LINE__, pgd_val(e))
#define pgd_set_fixmap(addr) ((pgd_t *)set_fixmap_offset(FIX_PGD, addr))
#define pgd_clear_fixmap() clear_fixmap(FIX_PGD)
static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
{
+ /*
+ * Normal and Normal-Tagged are two different memory types and indices
+ * in MAIR_EL1. The mask below has to include PTE_ATTRINDX_MASK.
+ */
const pteval_t mask = PTE_USER | PTE_PXN | PTE_UXN | PTE_RDONLY |
- PTE_PROT_NONE | PTE_VALID | PTE_WRITE;
+ PTE_PROT_NONE | PTE_VALID | PTE_WRITE | PTE_GP |
+ PTE_ATTRINDX_MASK;
/* preserve the hardware dirty information */
if (pte_hw_dirty(pte))
- pte = pte_mkdirty(pte);
+ pte = set_pte_bit(pte, __pgprot(PTE_DIRTY));
+
pte_val(pte) = (pte_val(pte) & ~mask) | (pgprot_val(newprot) & mask);
+ /*
+ * If we end up clearing hw dirtiness for a sw-dirty PTE, set hardware
+ * dirtiness again.
+ */
+ if (pte_sw_dirty(pte))
+ pte = pte_mkdirty(pte);
return pte;
}
@@ -700,6 +873,23 @@ static inline int pgd_devmap(pgd_t pgd)
}
#endif
+#ifdef CONFIG_PAGE_TABLE_CHECK
+static inline bool pte_user_accessible_page(pte_t pte)
+{
+ return pte_present(pte) && (pte_user(pte) || pte_user_exec(pte));
+}
+
+static inline bool pmd_user_accessible_page(pmd_t pmd)
+{
+ return pmd_leaf(pmd) && !pmd_present_invalid(pmd) && (pmd_user(pmd) || pmd_user_exec(pmd));
+}
+
+static inline bool pud_user_accessible_page(pud_t pud)
+{
+ return pud_leaf(pud) && (pud_user(pud) || pud_user_exec(pud));
+}
+#endif
+
/*
* Atomic pte/pmd modifications.
*/
@@ -761,7 +951,11 @@ static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
unsigned long address, pte_t *ptep)
{
- return __pte(xchg_relaxed(&pte_val(*ptep), 0));
+ pte_t pte = __pte(xchg_relaxed(&pte_val(*ptep), 0));
+
+ page_table_check_pte_clear(mm, pte);
+
+ return pte;
}
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
@@ -769,7 +963,11 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
unsigned long address, pmd_t *pmdp)
{
- return pte_pmd(ptep_get_and_clear(mm, address, (pte_t *)pmdp));
+ pmd_t pmd = __pmd(xchg_relaxed(&pmd_val(*pmdp), 0));
+
+ page_table_check_pmd_clear(mm, pmd);
+
+ return pmd;
}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
@@ -785,12 +983,6 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addres
pte = READ_ONCE(*ptep);
do {
old_pte = pte;
- /*
- * If hardware-dirty (PTE_WRITE/DBM bit set and PTE_RDONLY
- * clear), set the PTE_DIRTY bit.
- */
- if (pte_hw_dirty(pte))
- pte = pte_mkdirty(pte);
pte = pte_wrprotect(pte);
pte_val(pte) = cmpxchg_relaxed(&pte_val(*ptep),
pte_val(old_pte), pte_val(pte));
@@ -809,6 +1001,7 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm,
static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
unsigned long address, pmd_t *pmdp, pmd_t pmd)
{
+ page_table_check_pmd_set(vma->vm_mm, pmdp, pmd);
return __pmd(xchg_relaxed(&pmd_val(*pmdp), pmd_val(pmd)));
}
#endif
@@ -816,12 +1009,13 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
/*
* Encode and decode a swap entry:
* bits 0-1: present (must be zero)
- * bits 2-7: swap type
+ * bits 2: remember PG_anon_exclusive
+ * bits 3-7: swap type
* bits 8-57: swap offset
* bit 58: PTE_PROT_NONE (must be zero)
*/
-#define __SWP_TYPE_SHIFT 2
-#define __SWP_TYPE_BITS 6
+#define __SWP_TYPE_SHIFT 3
+#define __SWP_TYPE_BITS 5
#define __SWP_OFFSET_BITS 50
#define __SWP_TYPE_MASK ((1 << __SWP_TYPE_BITS) - 1)
#define __SWP_OFFSET_SHIFT (__SWP_TYPE_BITS + __SWP_TYPE_SHIFT)
@@ -834,21 +1028,55 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
#define __swp_entry_to_pte(swp) ((pte_t) { (swp).val })
+#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
+#define __pmd_to_swp_entry(pmd) ((swp_entry_t) { pmd_val(pmd) })
+#define __swp_entry_to_pmd(swp) __pmd((swp).val)
+#endif /* CONFIG_ARCH_ENABLE_THP_MIGRATION */
+
/*
* Ensure that there are not more swap files than can be encoded in the kernel
* PTEs.
*/
#define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > __SWP_TYPE_BITS)
-extern int kern_addr_valid(unsigned long addr);
+#ifdef CONFIG_ARM64_MTE
+
+#define __HAVE_ARCH_PREPARE_TO_SWAP
+static inline int arch_prepare_to_swap(struct page *page)
+{
+ if (system_supports_mte())
+ return mte_save_tags(page);
+ return 0;
+}
+
+#define __HAVE_ARCH_SWAP_INVALIDATE
+static inline void arch_swap_invalidate_page(int type, pgoff_t offset)
+{
+ if (system_supports_mte())
+ mte_invalidate_tags(type, offset);
+}
-#include <asm-generic/pgtable.h>
+static inline void arch_swap_invalidate_area(int type)
+{
+ if (system_supports_mte())
+ mte_invalidate_tags_area(type);
+}
+
+#define __HAVE_ARCH_SWAP_RESTORE
+static inline void arch_swap_restore(swp_entry_t entry, struct folio *folio)
+{
+ if (system_supports_mte())
+ mte_restore_tags(entry, &folio->page);
+}
+
+#endif /* CONFIG_ARM64_MTE */
/*
* On AArch64, the cache coherency is handled via the set_pte_at() function.
*/
-static inline void update_mmu_cache(struct vm_area_struct *vma,
- unsigned long addr, pte_t *ptep)
+static inline void update_mmu_cache_range(struct vm_fault *vmf,
+ struct vm_area_struct *vma, unsigned long addr, pte_t *ptep,
+ unsigned int nr)
{
/*
* We don't do anything here, so there's a very small chance of
@@ -857,6 +1085,8 @@ static inline void update_mmu_cache(struct vm_area_struct *vma,
*/
}
+#define update_mmu_cache(vma, addr, ptep) \
+ update_mmu_cache_range(NULL, vma, addr, ptep, 1)
#define update_mmu_cache_pmd(vma, address, pmd) do { } while (0)
#ifdef CONFIG_ARM64_PA_BITS_52
@@ -871,14 +1101,29 @@ static inline void update_mmu_cache(struct vm_area_struct *vma,
* page after fork() + CoW for pfn mappings. We don't always have a
* hardware-managed access flag on arm64.
*/
-static inline bool arch_faults_on_old_pte(void)
-{
- WARN_ON(preemptible());
+#define arch_has_hw_pte_young cpu_has_hw_af
+
+/*
+ * Experimentally, it's cheap to set the access flag in hardware and we
+ * benefit from prefaulting mappings as 'old' to start with.
+ */
+#define arch_wants_old_prefaulted_pte cpu_has_hw_af
- return !cpu_has_hw_af();
+static inline bool pud_sect_supported(void)
+{
+ return PAGE_SIZE == SZ_4K;
}
-#define arch_faults_on_old_pte arch_faults_on_old_pte
+
+#define __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION
+#define ptep_modify_prot_start ptep_modify_prot_start
+extern pte_t ptep_modify_prot_start(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep);
+
+#define ptep_modify_prot_commit ptep_modify_prot_commit
+extern void ptep_modify_prot_commit(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep,
+ pte_t old_pte, pte_t new_pte);
#endif /* !__ASSEMBLY__ */
#endif /* __ASM_PGTABLE_H */
diff --git a/arch/arm64/include/asm/pointer_auth.h b/arch/arm64/include/asm/pointer_auth.h
index 7a24bad1a58b..d2e0306e65d3 100644
--- a/arch/arm64/include/asm/pointer_auth.h
+++ b/arch/arm64/include/asm/pointer_auth.h
@@ -3,12 +3,23 @@
#define __ASM_POINTER_AUTH_H
#include <linux/bitops.h>
+#include <linux/prctl.h>
#include <linux/random.h>
#include <asm/cpufeature.h>
#include <asm/memory.h>
#include <asm/sysreg.h>
+/*
+ * The EL0/EL1 pointer bits used by a pointer authentication code.
+ * This is dependent on TBI0/TBI1 being enabled, or bits 63:56 would also apply.
+ */
+#define ptrauth_user_pac_mask() GENMASK_ULL(54, vabits_actual)
+#define ptrauth_kernel_pac_mask() GENMASK_ULL(63, vabits_actual)
+
+#define PR_PAC_ENABLED_KEYS_MASK \
+ (PR_PAC_APIAKEY | PR_PAC_APIBKEY | PR_PAC_APDAKEY | PR_PAC_APDBKEY)
+
#ifdef CONFIG_ARM64_PTR_AUTH
/*
* Each key is a 128-bit quantity which is split across a pair of 64-bit
@@ -22,7 +33,7 @@ struct ptrauth_key {
* We give each process its own keys, which are shared by all threads. The keys
* are inherited upon fork(), and reinitialised upon exec*().
*/
-struct ptrauth_keys {
+struct ptrauth_keys_user {
struct ptrauth_key apia;
struct ptrauth_key apib;
struct ptrauth_key apda;
@@ -30,68 +41,113 @@ struct ptrauth_keys {
struct ptrauth_key apga;
};
-static inline void ptrauth_keys_init(struct ptrauth_keys *keys)
+#define __ptrauth_key_install_nosync(k, v) \
+do { \
+ struct ptrauth_key __pki_v = (v); \
+ write_sysreg_s(__pki_v.lo, SYS_ ## k ## KEYLO_EL1); \
+ write_sysreg_s(__pki_v.hi, SYS_ ## k ## KEYHI_EL1); \
+} while (0)
+
+#ifdef CONFIG_ARM64_PTR_AUTH_KERNEL
+
+struct ptrauth_keys_kernel {
+ struct ptrauth_key apia;
+};
+
+static __always_inline void ptrauth_keys_init_kernel(struct ptrauth_keys_kernel *keys)
{
- if (system_supports_address_auth()) {
+ if (system_supports_address_auth())
get_random_bytes(&keys->apia, sizeof(keys->apia));
- get_random_bytes(&keys->apib, sizeof(keys->apib));
- get_random_bytes(&keys->apda, sizeof(keys->apda));
- get_random_bytes(&keys->apdb, sizeof(keys->apdb));
+}
+
+static __always_inline void ptrauth_keys_switch_kernel(struct ptrauth_keys_kernel *keys)
+{
+ if (!system_supports_address_auth())
+ return;
+
+ __ptrauth_key_install_nosync(APIA, keys->apia);
+ isb();
+}
+
+#endif /* CONFIG_ARM64_PTR_AUTH_KERNEL */
+
+static inline void ptrauth_keys_install_user(struct ptrauth_keys_user *keys)
+{
+ if (system_supports_address_auth()) {
+ __ptrauth_key_install_nosync(APIB, keys->apib);
+ __ptrauth_key_install_nosync(APDA, keys->apda);
+ __ptrauth_key_install_nosync(APDB, keys->apdb);
}
if (system_supports_generic_auth())
- get_random_bytes(&keys->apga, sizeof(keys->apga));
+ __ptrauth_key_install_nosync(APGA, keys->apga);
}
-#define __ptrauth_key_install(k, v) \
-do { \
- struct ptrauth_key __pki_v = (v); \
- write_sysreg_s(__pki_v.lo, SYS_ ## k ## KEYLO_EL1); \
- write_sysreg_s(__pki_v.hi, SYS_ ## k ## KEYHI_EL1); \
-} while (0)
-
-static inline void ptrauth_keys_switch(struct ptrauth_keys *keys)
+static inline void ptrauth_keys_init_user(struct ptrauth_keys_user *keys)
{
if (system_supports_address_auth()) {
- __ptrauth_key_install(APIA, keys->apia);
- __ptrauth_key_install(APIB, keys->apib);
- __ptrauth_key_install(APDA, keys->apda);
- __ptrauth_key_install(APDB, keys->apdb);
+ get_random_bytes(&keys->apia, sizeof(keys->apia));
+ get_random_bytes(&keys->apib, sizeof(keys->apib));
+ get_random_bytes(&keys->apda, sizeof(keys->apda));
+ get_random_bytes(&keys->apdb, sizeof(keys->apdb));
}
if (system_supports_generic_auth())
- __ptrauth_key_install(APGA, keys->apga);
+ get_random_bytes(&keys->apga, sizeof(keys->apga));
+
+ ptrauth_keys_install_user(keys);
}
extern int ptrauth_prctl_reset_keys(struct task_struct *tsk, unsigned long arg);
-/*
- * The EL0 pointer bits used by a pointer authentication code.
- * This is dependent on TBI0 being enabled, or bits 63:56 would also apply.
- */
-#define ptrauth_user_pac_mask() GENMASK(54, vabits_actual)
+extern int ptrauth_set_enabled_keys(struct task_struct *tsk, unsigned long keys,
+ unsigned long enabled);
+extern int ptrauth_get_enabled_keys(struct task_struct *tsk);
-/* Only valid for EL0 TTBR0 instruction pointers */
-static inline unsigned long ptrauth_strip_insn_pac(unsigned long ptr)
+static __always_inline void ptrauth_enable(void)
{
- return ptr & ~ptrauth_user_pac_mask();
+ if (!system_supports_address_auth())
+ return;
+ sysreg_clear_set(sctlr_el1, 0, (SCTLR_ELx_ENIA | SCTLR_ELx_ENIB |
+ SCTLR_ELx_ENDA | SCTLR_ELx_ENDB));
+ isb();
}
-#define ptrauth_thread_init_user(tsk) \
-do { \
- struct task_struct *__ptiu_tsk = (tsk); \
- ptrauth_keys_init(&__ptiu_tsk->thread.keys_user); \
- ptrauth_keys_switch(&__ptiu_tsk->thread.keys_user); \
-} while (0)
+#define ptrauth_suspend_exit() \
+ ptrauth_keys_install_user(&current->thread.keys_user)
-#define ptrauth_thread_switch(tsk) \
- ptrauth_keys_switch(&(tsk)->thread.keys_user)
+#define ptrauth_thread_init_user() \
+ do { \
+ ptrauth_keys_init_user(&current->thread.keys_user); \
+ \
+ /* enable all keys */ \
+ if (system_supports_address_auth()) \
+ ptrauth_set_enabled_keys(current, \
+ PR_PAC_ENABLED_KEYS_MASK, \
+ PR_PAC_ENABLED_KEYS_MASK); \
+ } while (0)
+
+#define ptrauth_thread_switch_user(tsk) \
+ ptrauth_keys_install_user(&(tsk)->thread.keys_user)
#else /* CONFIG_ARM64_PTR_AUTH */
+#define ptrauth_enable()
#define ptrauth_prctl_reset_keys(tsk, arg) (-EINVAL)
-#define ptrauth_strip_insn_pac(lr) (lr)
-#define ptrauth_thread_init_user(tsk)
-#define ptrauth_thread_switch(tsk)
+#define ptrauth_set_enabled_keys(tsk, keys, enabled) (-EINVAL)
+#define ptrauth_get_enabled_keys(tsk) (-EINVAL)
+#define ptrauth_suspend_exit()
+#define ptrauth_thread_init_user()
+#define ptrauth_thread_switch_user(tsk)
#endif /* CONFIG_ARM64_PTR_AUTH */
+#ifdef CONFIG_ARM64_PTR_AUTH_KERNEL
+#define ptrauth_thread_init_kernel(tsk) \
+ ptrauth_keys_init_kernel(&(tsk)->thread.keys_kernel)
+#define ptrauth_thread_switch_kernel(tsk) \
+ ptrauth_keys_switch_kernel(&(tsk)->thread.keys_kernel)
+#else
+#define ptrauth_thread_init_kernel(tsk)
+#define ptrauth_thread_switch_kernel(tsk)
+#endif /* CONFIG_ARM64_PTR_AUTH_KERNEL */
+
#endif /* __ASM_POINTER_AUTH_H */
diff --git a/arch/arm64/include/asm/preempt.h b/arch/arm64/include/asm/preempt.h
index d49951647014..0159b625cc7f 100644
--- a/arch/arm64/include/asm/preempt.h
+++ b/arch/arm64/include/asm/preempt.h
@@ -2,6 +2,7 @@
#ifndef __ASM_PREEMPT_H
#define __ASM_PREEMPT_H
+#include <linux/jump_label.h>
#include <linux/thread_info.h>
#define PREEMPT_NEED_RESCHED BIT(32)
@@ -23,7 +24,7 @@ static inline void preempt_count_set(u64 pc)
} while (0)
#define init_idle_preempt_count(p, cpu) do { \
- task_thread_info(p)->preempt_count = PREEMPT_ENABLED; \
+ task_thread_info(p)->preempt_count = PREEMPT_DISABLED; \
} while (0)
static inline void set_preempt_need_resched(void)
@@ -79,11 +80,25 @@ static inline bool should_resched(int preempt_offset)
return pc == preempt_offset;
}
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
+
void preempt_schedule(void);
-#define __preempt_schedule() preempt_schedule()
void preempt_schedule_notrace(void);
-#define __preempt_schedule_notrace() preempt_schedule_notrace()
-#endif /* CONFIG_PREEMPT */
+
+#ifdef CONFIG_PREEMPT_DYNAMIC
+
+DECLARE_STATIC_KEY_TRUE(sk_dynamic_irqentry_exit_cond_resched);
+void dynamic_preempt_schedule(void);
+#define __preempt_schedule() dynamic_preempt_schedule()
+void dynamic_preempt_schedule_notrace(void);
+#define __preempt_schedule_notrace() dynamic_preempt_schedule_notrace()
+
+#else /* CONFIG_PREEMPT_DYNAMIC */
+
+#define __preempt_schedule() preempt_schedule()
+#define __preempt_schedule_notrace() preempt_schedule_notrace()
+
+#endif /* CONFIG_PREEMPT_DYNAMIC */
+#endif /* CONFIG_PREEMPTION */
#endif /* __ASM_PREEMPT_H */
diff --git a/arch/arm64/include/asm/probes.h b/arch/arm64/include/asm/probes.h
index 4266262101fe..006946745352 100644
--- a/arch/arm64/include/asm/probes.h
+++ b/arch/arm64/include/asm/probes.h
@@ -7,6 +7,8 @@
#ifndef _ARM_PROBES_H
#define _ARM_PROBES_H
+#include <asm/insn.h>
+
typedef u32 probe_opcode_t;
typedef void (probes_handler_t) (u32 opcode, long addr, struct pt_regs *);
diff --git a/arch/arm64/include/asm/proc-fns.h b/arch/arm64/include/asm/proc-fns.h
index a2ce65a0c1fa..0d5d1f0525eb 100644
--- a/arch/arm64/include/asm/proc-fns.h
+++ b/arch/arm64/include/asm/proc-fns.h
@@ -13,11 +13,9 @@
#include <asm/page.h>
-struct mm_struct;
struct cpu_suspend_ctx;
extern void cpu_do_idle(void);
-extern void cpu_do_switch_mm(unsigned long pgd_phys, struct mm_struct *mm);
extern void cpu_do_suspend(struct cpu_suspend_ctx *ptr);
extern u64 cpu_do_resume(phys_addr_t ptr, u64 idmap_ttbr);
diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index 5ba63204d078..5b0a04810b23 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -8,9 +8,6 @@
#ifndef __ASM_PROCESSOR_H
#define __ASM_PROCESSOR_H
-#define KERNEL_DS UL(-1)
-#define USER_DS ((UL(1) << VA_BITS) - 1)
-
/*
* On arm64 systems, unaligned accesses by the CPU are cheap, and so there is
* no point in shifting all network buffers by 2 bytes just to make some IP
@@ -19,6 +16,13 @@
*/
#define NET_IP_ALIGN 0
+#define MTE_CTRL_GCR_USER_EXCL_SHIFT 0
+#define MTE_CTRL_GCR_USER_EXCL_MASK 0xffff
+
+#define MTE_CTRL_TCF_SYNC (1UL << 16)
+#define MTE_CTRL_TCF_ASYNC (1UL << 17)
+#define MTE_CTRL_TCF_ASYMM (1UL << 18)
+
#ifndef __ASSEMBLY__
#include <linux/build_bug.h>
@@ -28,6 +32,8 @@
#include <linux/string.h>
#include <linux/thread_info.h>
+#include <vdso/processor.h>
+
#include <asm/alternative.h>
#include <asm/cpufeature.h>
#include <asm/hw_breakpoint.h>
@@ -36,6 +42,7 @@
#include <asm/pgtable-hwdef.h>
#include <asm/pointer_auth.h>
#include <asm/ptrace.h>
+#include <asm/spectre.h>
#include <asm/types.h>
/*
@@ -45,6 +52,7 @@
#define DEFAULT_MAP_WINDOW_64 (UL(1) << VA_BITS_MIN)
#define TASK_SIZE_64 (UL(1) << vabits_actual)
+#define TASK_SIZE_MAX (UL(1) << VA_BITS)
#ifdef CONFIG_COMPAT
#if defined(CONFIG_ARM64_64K_PAGES) && defined(CONFIG_KUSER_HELPERS)
@@ -84,8 +92,8 @@
#endif /* CONFIG_COMPAT */
#ifndef CONFIG_ARM64_FORCE_52BIT
-#define arch_get_mmap_end(addr) ((addr > DEFAULT_MAP_WINDOW) ? TASK_SIZE :\
- DEFAULT_MAP_WINDOW)
+#define arch_get_mmap_end(addr, len, flags) \
+ (((addr) > DEFAULT_MAP_WINDOW) ? TASK_SIZE : DEFAULT_MAP_WINDOW)
#define arch_get_mmap_base(addr, base) ((addr > DEFAULT_MAP_WINDOW) ? \
base + TASK_SIZE - DEFAULT_MAP_WINDOW :\
@@ -108,6 +116,18 @@ struct debug_info {
#endif
};
+enum vec_type {
+ ARM64_VEC_SVE = 0,
+ ARM64_VEC_SME,
+ ARM64_VEC_MAX,
+};
+
+enum fp_type {
+ FP_STATE_CURRENT, /* Save based on current task state. */
+ FP_STATE_FPSIMD,
+ FP_STATE_SVE,
+};
+
struct cpu_context {
unsigned long x19;
unsigned long x20;
@@ -138,18 +158,94 @@ struct thread_struct {
struct user_fpsimd_state fpsimd_state;
} uw;
+ enum fp_type fp_type; /* registers FPSIMD or SVE? */
unsigned int fpsimd_cpu;
void *sve_state; /* SVE registers, if any */
- unsigned int sve_vl; /* SVE vector length */
- unsigned int sve_vl_onexec; /* SVE vl after next exec */
+ void *sme_state; /* ZA and ZT state, if any */
+ unsigned int vl[ARM64_VEC_MAX]; /* vector length */
+ unsigned int vl_onexec[ARM64_VEC_MAX]; /* vl after next exec */
unsigned long fault_address; /* fault info */
unsigned long fault_code; /* ESR_EL1 value */
struct debug_info debug; /* debugging */
+
+ struct user_fpsimd_state kernel_fpsimd_state;
+ unsigned int kernel_fpsimd_cpu;
#ifdef CONFIG_ARM64_PTR_AUTH
- struct ptrauth_keys keys_user;
+ struct ptrauth_keys_user keys_user;
+#ifdef CONFIG_ARM64_PTR_AUTH_KERNEL
+ struct ptrauth_keys_kernel keys_kernel;
+#endif
+#endif
+#ifdef CONFIG_ARM64_MTE
+ u64 mte_ctrl;
#endif
+ u64 sctlr_user;
+ u64 svcr;
+ u64 tpidr2_el0;
};
+static inline unsigned int thread_get_vl(struct thread_struct *thread,
+ enum vec_type type)
+{
+ return thread->vl[type];
+}
+
+static inline unsigned int thread_get_sve_vl(struct thread_struct *thread)
+{
+ return thread_get_vl(thread, ARM64_VEC_SVE);
+}
+
+static inline unsigned int thread_get_sme_vl(struct thread_struct *thread)
+{
+ return thread_get_vl(thread, ARM64_VEC_SME);
+}
+
+static inline unsigned int thread_get_cur_vl(struct thread_struct *thread)
+{
+ if (system_supports_sme() && (thread->svcr & SVCR_SM_MASK))
+ return thread_get_sme_vl(thread);
+ else
+ return thread_get_sve_vl(thread);
+}
+
+unsigned int task_get_vl(const struct task_struct *task, enum vec_type type);
+void task_set_vl(struct task_struct *task, enum vec_type type,
+ unsigned long vl);
+void task_set_vl_onexec(struct task_struct *task, enum vec_type type,
+ unsigned long vl);
+unsigned int task_get_vl_onexec(const struct task_struct *task,
+ enum vec_type type);
+
+static inline unsigned int task_get_sve_vl(const struct task_struct *task)
+{
+ return task_get_vl(task, ARM64_VEC_SVE);
+}
+
+static inline unsigned int task_get_sme_vl(const struct task_struct *task)
+{
+ return task_get_vl(task, ARM64_VEC_SME);
+}
+
+static inline void task_set_sve_vl(struct task_struct *task, unsigned long vl)
+{
+ task_set_vl(task, ARM64_VEC_SVE, vl);
+}
+
+static inline unsigned int task_get_sve_vl_onexec(const struct task_struct *task)
+{
+ return task_get_vl_onexec(task, ARM64_VEC_SVE);
+}
+
+static inline void task_set_sve_vl_onexec(struct task_struct *task,
+ unsigned long vl)
+{
+ task_set_vl_onexec(task, ARM64_VEC_SVE, vl);
+}
+
+#define SCTLR_USER_MASK \
+ (SCTLR_ELx_ENIA | SCTLR_ELx_ENIB | SCTLR_ELx_ENDA | SCTLR_ELx_ENDB | \
+ SCTLR_EL1_TCF0_MASK)
+
static inline void arch_thread_struct_whitelist(unsigned long *offset,
unsigned long *size)
{
@@ -186,48 +282,24 @@ void tls_preserve_current_state(void);
static inline void start_thread_common(struct pt_regs *regs, unsigned long pc)
{
+ s32 previous_syscall = regs->syscallno;
memset(regs, 0, sizeof(*regs));
- forget_syscall(regs);
+ regs->syscallno = previous_syscall;
regs->pc = pc;
if (system_uses_irq_prio_masking())
regs->pmr_save = GIC_PRIO_IRQON;
}
-static inline void set_ssbs_bit(struct pt_regs *regs)
-{
- regs->pstate |= PSR_SSBS_BIT;
-}
-
-static inline void set_compat_ssbs_bit(struct pt_regs *regs)
-{
- regs->pstate |= PSR_AA32_SSBS_BIT;
-}
-
static inline void start_thread(struct pt_regs *regs, unsigned long pc,
unsigned long sp)
{
start_thread_common(regs, pc);
regs->pstate = PSR_MODE_EL0t;
-
- if (arm64_get_ssbd_state() != ARM64_SSBD_FORCE_ENABLE)
- set_ssbs_bit(regs);
-
+ spectre_v4_enable_task_mitigation(current);
regs->sp = sp;
}
-static inline bool is_ttbr0_addr(unsigned long addr)
-{
- /* entry assembly clears tags for TTBR0 addrs */
- return addr < TASK_SIZE;
-}
-
-static inline bool is_ttbr1_addr(unsigned long addr)
-{
- /* TTBR1 addresses may have a tag if KASAN_SW_TAGS is in use */
- return arch_kasan_reset_tag(addr) >= PAGE_OFFSET;
-}
-
#ifdef CONFIG_COMPAT
static inline void compat_start_thread(struct pt_regs *regs, unsigned long pc,
unsigned long sp)
@@ -241,25 +313,29 @@ static inline void compat_start_thread(struct pt_regs *regs, unsigned long pc,
regs->pstate |= PSR_AA32_E_BIT;
#endif
- if (arm64_get_ssbd_state() != ARM64_SSBD_FORCE_ENABLE)
- set_compat_ssbs_bit(regs);
-
+ spectre_v4_enable_task_mitigation(current);
regs->compat_sp = sp;
}
#endif
+static __always_inline bool is_ttbr0_addr(unsigned long addr)
+{
+ /* entry assembly clears tags for TTBR0 addrs */
+ return addr < TASK_SIZE;
+}
+
+static __always_inline bool is_ttbr1_addr(unsigned long addr)
+{
+ /* TTBR1 addresses may have a tag if KASAN_SW_TAGS is in use */
+ return arch_kasan_reset_tag(addr) >= PAGE_OFFSET;
+}
+
/* Forward declaration, a strange C thing */
struct task_struct;
-/* Free all resources held by a thread. */
-extern void release_thread(struct task_struct *);
+unsigned long __get_wchan(struct task_struct *p);
-unsigned long get_wchan(struct task_struct *p);
-
-static inline void cpu_relax(void)
-{
- asm volatile("yield" ::: "memory");
-}
+void update_sctlr_el1(u64 sctlr);
/* Thread switching */
extern struct task_struct *cpu_switch_to(struct task_struct *prev,
@@ -286,14 +362,6 @@ static inline void prefetchw(const void *ptr)
asm volatile("prfm pstl1keep, %a0\n" : : "p" (ptr));
}
-#define ARCH_HAS_SPINLOCK_PREFETCH
-static inline void spin_lock_prefetch(const void *ptr)
-{
- asm volatile(ARM64_LSE_ATOMIC_INSN(
- "prfm pstl1strm, %a0",
- "nop") : : "p" (ptr));
-}
-
extern unsigned long __ro_after_init signal_minsigstksz; /* sigframe size */
extern void __init minsigstksz_setup(void);
@@ -308,35 +376,27 @@ extern void __init minsigstksz_setup(void);
*/
#include <asm/fpsimd.h>
-/* Userspace interface for PR_SVE_{SET,GET}_VL prctl()s: */
+/* Userspace interface for PR_S[MV]E_{SET,GET}_VL prctl()s: */
#define SVE_SET_VL(arg) sve_set_current_vl(arg)
#define SVE_GET_VL() sve_get_current_vl()
+#define SME_SET_VL(arg) sme_set_current_vl(arg)
+#define SME_GET_VL() sme_get_current_vl()
/* PR_PAC_RESET_KEYS prctl */
#define PAC_RESET_KEYS(tsk, arg) ptrauth_prctl_reset_keys(tsk, arg)
+/* PR_PAC_{SET,GET}_ENABLED_KEYS prctl */
+#define PAC_SET_ENABLED_KEYS(tsk, keys, enabled) \
+ ptrauth_set_enabled_keys(tsk, keys, enabled)
+#define PAC_GET_ENABLED_KEYS(tsk) ptrauth_get_enabled_keys(tsk)
+
#ifdef CONFIG_ARM64_TAGGED_ADDR_ABI
/* PR_{SET,GET}_TAGGED_ADDR_CTRL prctl */
-long set_tagged_addr_ctrl(unsigned long arg);
-long get_tagged_addr_ctrl(void);
-#define SET_TAGGED_ADDR_CTRL(arg) set_tagged_addr_ctrl(arg)
-#define GET_TAGGED_ADDR_CTRL() get_tagged_addr_ctrl()
+long set_tagged_addr_ctrl(struct task_struct *task, unsigned long arg);
+long get_tagged_addr_ctrl(struct task_struct *task);
+#define SET_TAGGED_ADDR_CTRL(arg) set_tagged_addr_ctrl(current, arg)
+#define GET_TAGGED_ADDR_CTRL() get_tagged_addr_ctrl(current)
#endif
-/*
- * For CONFIG_GCC_PLUGIN_STACKLEAK
- *
- * These need to be macros because otherwise we get stuck in a nightmare
- * of header definitions for the use of task_stack_page.
- */
-
-#define current_top_of_stack() \
-({ \
- struct stack_info _info; \
- BUG_ON(!on_accessible_stack(current, current_stack_pointer, &_info)); \
- _info.high; \
-})
-#define on_thread_stack() (on_task_stack(current, current_stack_pointer, NULL))
-
#endif /* __ASSEMBLY__ */
#endif /* __ASM_PROCESSOR_H */
diff --git a/arch/arm64/include/asm/ptdump.h b/arch/arm64/include/asm/ptdump.h
index 0b8e7269ec82..581caac525b0 100644
--- a/arch/arm64/include/asm/ptdump.h
+++ b/arch/arm64/include/asm/ptdump.h
@@ -5,7 +5,7 @@
#ifndef __ASM_PTDUMP_H
#define __ASM_PTDUMP_H
-#ifdef CONFIG_ARM64_PTDUMP_CORE
+#ifdef CONFIG_PTDUMP_CORE
#include <linux/mm_types.h>
#include <linux/seq_file.h>
@@ -21,15 +21,16 @@ struct ptdump_info {
unsigned long base_addr;
};
-void ptdump_walk_pgd(struct seq_file *s, struct ptdump_info *info);
-#ifdef CONFIG_ARM64_PTDUMP_DEBUGFS
-void ptdump_debugfs_register(struct ptdump_info *info, const char *name);
+void ptdump_walk(struct seq_file *s, struct ptdump_info *info);
+#ifdef CONFIG_PTDUMP_DEBUGFS
+#define EFI_RUNTIME_MAP_END DEFAULT_MAP_WINDOW_64
+void __init ptdump_debugfs_register(struct ptdump_info *info, const char *name);
#else
static inline void ptdump_debugfs_register(struct ptdump_info *info,
const char *name) { }
#endif
void ptdump_check_wx(void);
-#endif /* CONFIG_ARM64_PTDUMP_CORE */
+#endif /* CONFIG_PTDUMP_CORE */
#ifdef CONFIG_DEBUG_WX
#define debug_checkwx() ptdump_check_wx()
diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h
index fbebb411ae20..47ec58031f11 100644
--- a/arch/arm64/include/asm/ptrace.h
+++ b/arch/arm64/include/asm/ptrace.h
@@ -16,6 +16,11 @@
#define CurrentEL_EL1 (1 << 2)
#define CurrentEL_EL2 (2 << 2)
+#define INIT_PSTATE_EL1 \
+ (PSR_D_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT | PSR_MODE_EL1h)
+#define INIT_PSTATE_EL2 \
+ (PSR_D_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT | PSR_MODE_EL2h)
+
/*
* PMR values used to mask/unmask interrupts.
*
@@ -27,14 +32,27 @@
*
* Some code sections either automatically switch back to PSR.I or explicitly
* require to not use priority masking. If bit GIC_PRIO_PSR_I_SET is included
- * in the the priority mask, it indicates that PSR.I should be set and
+ * in the priority mask, it indicates that PSR.I should be set and
* interrupt disabling temporarily does not rely on IRQ priorities.
*/
#define GIC_PRIO_IRQON 0xe0
-#define GIC_PRIO_IRQOFF (GIC_PRIO_IRQON & ~0x80)
+#define __GIC_PRIO_IRQOFF (GIC_PRIO_IRQON & ~0x80)
+#define __GIC_PRIO_IRQOFF_NS 0xa0
#define GIC_PRIO_PSR_I_SET (1 << 4)
+#define GIC_PRIO_IRQOFF \
+ ({ \
+ extern struct static_key_false gic_nonsecure_priorities;\
+ u8 __prio = __GIC_PRIO_IRQOFF; \
+ \
+ if (static_branch_unlikely(&gic_nonsecure_priorities)) \
+ __prio = __GIC_PRIO_IRQOFF_NS; \
+ \
+ __prio; \
+ })
+
/* Additional SPSR bits not exposed in the UABI */
+#define PSR_MODE_THREAD_BIT (1 << 0)
#define PSR_IL_BIT (1 << 20)
/* AArch32-specific ptrace requests */
@@ -62,6 +80,7 @@
#define PSR_AA32_I_BIT 0x00000080
#define PSR_AA32_A_BIT 0x00000100
#define PSR_AA32_E_BIT 0x00000200
+#define PSR_AA32_PAN_BIT 0x00400000
#define PSR_AA32_SSBS_BIT 0x00800000
#define PSR_AA32_DIT_BIT 0x01000000
#define PSR_AA32_Q_BIT 0x08000000
@@ -174,11 +193,14 @@ struct pt_regs {
s32 syscallno;
u32 unused2;
#endif
-
- u64 orig_addr_limit;
- /* Only valid when ARM64_HAS_IRQ_PRIO_MASKING is enabled. */
+ u64 sdei_ttbr1;
+ /* Only valid when ARM64_HAS_GIC_PRIO_MASKING is enabled. */
u64 pmr_save;
u64 stackframe[2];
+
+ /* Only valid for some EL1 exceptions. */
+ u64 lockdep_hardirqs;
+ u64 exit_rcu;
};
static inline bool in_syscall(struct pt_regs const *regs)
@@ -298,7 +320,17 @@ static inline unsigned long kernel_stack_pointer(struct pt_regs *regs)
static inline unsigned long regs_return_value(struct pt_regs *regs)
{
- return regs->regs[0];
+ unsigned long val = regs->regs[0];
+
+ /*
+ * Audit currently uses regs_return_value() instead of
+ * syscall_get_return_value(). Apply the same sign-extension here until
+ * audit is updated to use syscall_get_return_value().
+ */
+ if (compat_user_mode(regs))
+ val = sign_extend64(val, 31);
+
+ return val;
}
static inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc)
diff --git a/arch/arm64/include/asm/rwonce.h b/arch/arm64/include/asm/rwonce.h
new file mode 100644
index 000000000000..56f7b1d4d54b
--- /dev/null
+++ b/arch/arm64/include/asm/rwonce.h
@@ -0,0 +1,73 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2020 Google LLC.
+ */
+#ifndef __ASM_RWONCE_H
+#define __ASM_RWONCE_H
+
+#if defined(CONFIG_LTO) && !defined(__ASSEMBLY__)
+
+#include <linux/compiler_types.h>
+#include <asm/alternative-macros.h>
+
+#ifndef BUILD_VDSO
+
+#ifdef CONFIG_AS_HAS_LDAPR
+#define __LOAD_RCPC(sfx, regs...) \
+ ALTERNATIVE( \
+ "ldar" #sfx "\t" #regs, \
+ ".arch_extension rcpc\n" \
+ "ldapr" #sfx "\t" #regs, \
+ ARM64_HAS_LDAPR)
+#else
+#define __LOAD_RCPC(sfx, regs...) "ldar" #sfx "\t" #regs
+#endif /* CONFIG_AS_HAS_LDAPR */
+
+/*
+ * When building with LTO, there is an increased risk of the compiler
+ * converting an address dependency headed by a READ_ONCE() invocation
+ * into a control dependency and consequently allowing for harmful
+ * reordering by the CPU.
+ *
+ * Ensure that such transformations are harmless by overriding the generic
+ * READ_ONCE() definition with one that provides RCpc acquire semantics
+ * when building with LTO.
+ */
+#define __READ_ONCE(x) \
+({ \
+ typeof(&(x)) __x = &(x); \
+ int atomic = 1; \
+ union { __unqual_scalar_typeof(*__x) __val; char __c[1]; } __u; \
+ switch (sizeof(x)) { \
+ case 1: \
+ asm volatile(__LOAD_RCPC(b, %w0, %1) \
+ : "=r" (*(__u8 *)__u.__c) \
+ : "Q" (*__x) : "memory"); \
+ break; \
+ case 2: \
+ asm volatile(__LOAD_RCPC(h, %w0, %1) \
+ : "=r" (*(__u16 *)__u.__c) \
+ : "Q" (*__x) : "memory"); \
+ break; \
+ case 4: \
+ asm volatile(__LOAD_RCPC(, %w0, %1) \
+ : "=r" (*(__u32 *)__u.__c) \
+ : "Q" (*__x) : "memory"); \
+ break; \
+ case 8: \
+ asm volatile(__LOAD_RCPC(, %0, %1) \
+ : "=r" (*(__u64 *)__u.__c) \
+ : "Q" (*__x) : "memory"); \
+ break; \
+ default: \
+ atomic = 0; \
+ } \
+ atomic ? (typeof(*__x))__u.__val : (*(volatile typeof(__x))__x);\
+})
+
+#endif /* !BUILD_VDSO */
+#endif /* CONFIG_LTO && !__ASSEMBLY__ */
+
+#include <asm-generic/rwonce.h>
+
+#endif /* __ASM_RWONCE_H */
diff --git a/arch/arm64/include/asm/scs.h b/arch/arm64/include/asm/scs.h
new file mode 100644
index 000000000000..3fdae5fe3142
--- /dev/null
+++ b/arch/arm64/include/asm/scs.h
@@ -0,0 +1,80 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SCS_H
+#define _ASM_SCS_H
+
+#ifdef __ASSEMBLY__
+
+#include <asm/asm-offsets.h>
+#include <asm/sysreg.h>
+
+#ifdef CONFIG_SHADOW_CALL_STACK
+ scs_sp .req x18
+
+ .macro scs_load_current
+ get_current_task scs_sp
+ ldr scs_sp, [scs_sp, #TSK_TI_SCS_SP]
+ .endm
+
+ .macro scs_save tsk
+ str scs_sp, [\tsk, #TSK_TI_SCS_SP]
+ .endm
+#else
+ .macro scs_load_current
+ .endm
+
+ .macro scs_save tsk
+ .endm
+#endif /* CONFIG_SHADOW_CALL_STACK */
+
+
+#else
+
+#include <linux/scs.h>
+#include <asm/cpufeature.h>
+
+#ifdef CONFIG_UNWIND_PATCH_PAC_INTO_SCS
+static inline bool should_patch_pac_into_scs(void)
+{
+ u64 reg;
+
+ /*
+ * We only enable the shadow call stack dynamically if we are running
+ * on a system that does not implement PAC or BTI. PAC and SCS provide
+ * roughly the same level of protection, and BTI relies on the PACIASP
+ * instructions serving as landing pads, preventing us from patching
+ * those instructions into something else.
+ */
+ reg = read_sysreg_s(SYS_ID_AA64ISAR1_EL1);
+ if (SYS_FIELD_GET(ID_AA64ISAR1_EL1, APA, reg) |
+ SYS_FIELD_GET(ID_AA64ISAR1_EL1, API, reg))
+ return false;
+
+ reg = read_sysreg_s(SYS_ID_AA64ISAR2_EL1);
+ if (SYS_FIELD_GET(ID_AA64ISAR2_EL1, APA3, reg))
+ return false;
+
+ if (IS_ENABLED(CONFIG_ARM64_BTI_KERNEL)) {
+ reg = read_sysreg_s(SYS_ID_AA64PFR1_EL1);
+ if (reg & (0xf << ID_AA64PFR1_EL1_BT_SHIFT))
+ return false;
+ }
+ return true;
+}
+
+static inline void dynamic_scs_init(void)
+{
+ if (should_patch_pac_into_scs()) {
+ pr_info("Enabling dynamic shadow call stack\n");
+ static_branch_enable(&dynamic_scs_enabled);
+ }
+}
+#else
+static inline void dynamic_scs_init(void) {}
+#endif
+
+int scs_patch(const u8 eh_frame[], int size);
+asmlinkage void scs_patch_vmlinux(void);
+
+#endif /* __ASSEMBLY __ */
+
+#endif /* _ASM_SCS_H */
diff --git a/arch/arm64/include/asm/sdei.h b/arch/arm64/include/asm/sdei.h
index 63e0b92a5fbb..484cb6972e99 100644
--- a/arch/arm64/include/asm/sdei.h
+++ b/arch/arm64/include/asm/sdei.h
@@ -17,6 +17,9 @@
#include <asm/virt.h>
+DECLARE_PER_CPU(struct sdei_registered_event *, sdei_active_normal_event);
+DECLARE_PER_CPU(struct sdei_registered_event *, sdei_active_critical_event);
+
extern unsigned long sdei_exit_mode;
/* Software Delegated Exception entry point from firmware*/
@@ -29,6 +32,9 @@ asmlinkage void __sdei_asm_entry_trampoline(unsigned long event_num,
unsigned long pc,
unsigned long pstate);
+/* Abort a running handler. Context is discarded. */
+void __sdei_handler_abort(void);
+
/*
* The above entry point does the minimum to call C code. This function does
* anything else, before calling the driver.
@@ -37,24 +43,11 @@ struct sdei_registered_event;
asmlinkage unsigned long __sdei_handler(struct pt_regs *regs,
struct sdei_registered_event *arg);
+unsigned long do_sdei_event(struct pt_regs *regs,
+ struct sdei_registered_event *arg);
+
unsigned long sdei_arch_get_entry_point(int conduit);
#define sdei_arch_get_entry_point(x) sdei_arch_get_entry_point(x)
-struct stack_info;
-
-bool _on_sdei_stack(unsigned long sp, struct stack_info *info);
-static inline bool on_sdei_stack(unsigned long sp,
- struct stack_info *info)
-{
- if (!IS_ENABLED(CONFIG_VMAP_STACK))
- return false;
- if (!IS_ENABLED(CONFIG_ARM_SDE_INTERFACE))
- return false;
- if (in_nmi())
- return _on_sdei_stack(sp, info);
-
- return false;
-}
-
#endif /* __ASSEMBLY__ */
#endif /* __ASM_SDEI_H */
diff --git a/arch/arm64/include/asm/seccomp.h b/arch/arm64/include/asm/seccomp.h
index c36387170936..30256233788b 100644
--- a/arch/arm64/include/asm/seccomp.h
+++ b/arch/arm64/include/asm/seccomp.h
@@ -19,4 +19,13 @@
#include <asm-generic/seccomp.h>
+#define SECCOMP_ARCH_NATIVE AUDIT_ARCH_AARCH64
+#define SECCOMP_ARCH_NATIVE_NR NR_syscalls
+#define SECCOMP_ARCH_NATIVE_NAME "aarch64"
+#ifdef CONFIG_COMPAT
+# define SECCOMP_ARCH_COMPAT AUDIT_ARCH_ARM
+# define SECCOMP_ARCH_COMPAT_NR __NR_compat_syscalls
+# define SECCOMP_ARCH_COMPAT_NAME "arm"
+#endif
+
#endif /* _ASM_SECCOMP_H */
diff --git a/arch/arm64/include/asm/sections.h b/arch/arm64/include/asm/sections.h
index 25a73aab438f..40971ac1303f 100644
--- a/arch/arm64/include/asm/sections.h
+++ b/arch/arm64/include/asm/sections.h
@@ -8,10 +8,12 @@
#include <asm-generic/sections.h>
extern char __alt_instructions[], __alt_instructions_end[];
-extern char __exception_text_start[], __exception_text_end[];
extern char __hibernate_exit_text_start[], __hibernate_exit_text_end[];
extern char __hyp_idmap_text_start[], __hyp_idmap_text_end[];
extern char __hyp_text_start[], __hyp_text_end[];
+extern char __hyp_rodata_start[], __hyp_rodata_end[];
+extern char __hyp_reloc_begin[], __hyp_reloc_end[];
+extern char __hyp_bss_start[], __hyp_bss_end[];
extern char __idmap_text_start[], __idmap_text_end[];
extern char __initdata_begin[], __initdata_end[];
extern char __inittext_begin[], __inittext_end[];
@@ -19,5 +21,11 @@ extern char __exittext_begin[], __exittext_end[];
extern char __irqentry_text_start[], __irqentry_text_end[];
extern char __mmuoff_data_start[], __mmuoff_data_end[];
extern char __entry_tramp_text_start[], __entry_tramp_text_end[];
+extern char __relocate_new_kernel_start[], __relocate_new_kernel_end[];
+
+static inline size_t entry_tramp_text_size(void)
+{
+ return __entry_tramp_text_end - __entry_tramp_text_start;
+}
#endif /* __ASM_SECTIONS_H */
diff --git a/arch/arm64/include/asm/semihost.h b/arch/arm64/include/asm/semihost.h
new file mode 100644
index 000000000000..87e353dab868
--- /dev/null
+++ b/arch/arm64/include/asm/semihost.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2012 ARM Ltd.
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * Adapted for ARM and earlycon:
+ * Copyright (C) 2014 Linaro Ltd.
+ * Author: Rob Herring <robh@kernel.org>
+ */
+
+#ifndef _ARM64_SEMIHOST_H_
+#define _ARM64_SEMIHOST_H_
+
+struct uart_port;
+
+static inline void smh_putc(struct uart_port *port, unsigned char c)
+{
+ asm volatile("mov x1, %0\n"
+ "mov x0, #3\n"
+ "hlt 0xf000\n"
+ : : "r" (&c) : "x0", "x1", "memory");
+}
+
+#endif /* _ARM64_SEMIHOST_H_ */
diff --git a/arch/arm64/include/asm/set_memory.h b/arch/arm64/include/asm/set_memory.h
new file mode 100644
index 000000000000..0f740b781187
--- /dev/null
+++ b/arch/arm64/include/asm/set_memory.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef _ASM_ARM64_SET_MEMORY_H
+#define _ASM_ARM64_SET_MEMORY_H
+
+#include <asm-generic/set_memory.h>
+
+bool can_set_direct_map(void);
+#define can_set_direct_map can_set_direct_map
+
+int set_memory_valid(unsigned long addr, int numpages, int enable);
+
+int set_direct_map_invalid_noflush(struct page *page);
+int set_direct_map_default_noflush(struct page *page);
+bool kernel_page_present(struct page *page);
+
+#endif /* _ASM_ARM64_SET_MEMORY_H */
diff --git a/arch/arm64/include/asm/setup.h b/arch/arm64/include/asm/setup.h
new file mode 100644
index 000000000000..2e4d7da74fb8
--- /dev/null
+++ b/arch/arm64/include/asm/setup.h
@@ -0,0 +1,47 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#ifndef __ARM64_ASM_SETUP_H
+#define __ARM64_ASM_SETUP_H
+
+#include <linux/string.h>
+
+#include <uapi/asm/setup.h>
+
+void *get_early_fdt_ptr(void);
+void early_fdt_map(u64 dt_phys);
+
+/*
+ * These two variables are used in the head.S file.
+ */
+extern phys_addr_t __fdt_pointer __initdata;
+extern u64 __cacheline_aligned boot_args[4];
+
+static inline bool arch_parse_debug_rodata(char *arg)
+{
+ extern bool rodata_enabled;
+ extern bool rodata_full;
+
+ if (!arg)
+ return false;
+
+ if (!strcmp(arg, "full")) {
+ rodata_enabled = rodata_full = true;
+ return true;
+ }
+
+ if (!strcmp(arg, "off")) {
+ rodata_enabled = rodata_full = false;
+ return true;
+ }
+
+ if (!strcmp(arg, "on")) {
+ rodata_enabled = true;
+ rodata_full = false;
+ return true;
+ }
+
+ return false;
+}
+#define arch_parse_debug_rodata arch_parse_debug_rodata
+
+#endif
diff --git a/arch/arm64/include/asm/signal.h b/arch/arm64/include/asm/signal.h
new file mode 100644
index 000000000000..ef449f5f4ba8
--- /dev/null
+++ b/arch/arm64/include/asm/signal.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ARM64_ASM_SIGNAL_H
+#define __ARM64_ASM_SIGNAL_H
+
+#include <asm/memory.h>
+#include <uapi/asm/signal.h>
+#include <uapi/asm/siginfo.h>
+
+static inline void __user *arch_untagged_si_addr(void __user *addr,
+ unsigned long sig,
+ unsigned long si_code)
+{
+ /*
+ * For historical reasons, all bits of the fault address are exposed as
+ * address bits for watchpoint exceptions. New architectures should
+ * handle the tag bits consistently.
+ */
+ if (sig == SIGTRAP && si_code == TRAP_BRKPT)
+ return addr;
+
+ return untagged_addr(addr);
+}
+#define arch_untagged_si_addr arch_untagged_si_addr
+
+#endif
diff --git a/arch/arm64/include/asm/simd.h b/arch/arm64/include/asm/simd.h
index 7434844036d3..8e86c9e70e48 100644
--- a/arch/arm64/include/asm/simd.h
+++ b/arch/arm64/include/asm/simd.h
@@ -12,8 +12,6 @@
#include <linux/preempt.h>
#include <linux/types.h>
-DECLARE_PER_CPU(bool, fpsimd_context_busy);
-
#ifdef CONFIG_KERNEL_MODE_NEON
/*
@@ -26,15 +24,12 @@ DECLARE_PER_CPU(bool, fpsimd_context_busy);
static __must_check inline bool may_use_simd(void)
{
/*
- * fpsimd_context_busy is only set while preemption is disabled,
- * and is clear whenever preemption is enabled. Since
- * this_cpu_read() is atomic w.r.t. preemption, fpsimd_context_busy
- * cannot change under our feet -- if it's set we cannot be
- * migrated, and if it's clear we cannot be migrated to a CPU
- * where it is set.
+ * We must make sure that the SVE has been initialized properly
+ * before using the SIMD in kernel.
*/
- return !in_irq() && !irqs_disabled() && !in_nmi() &&
- !this_cpu_read(fpsimd_context_busy);
+ return !WARN_ON(!system_capabilities_finalized()) &&
+ system_supports_fpsimd() &&
+ !in_hardirq() && !irqs_disabled() && !in_nmi();
}
#else /* ! CONFIG_KERNEL_MODE_NEON */
diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h
index a0c8a0b65259..efb13112b408 100644
--- a/arch/arm64/include/asm/smp.h
+++ b/arch/arm64/include/asm/smp.h
@@ -46,19 +46,14 @@ DECLARE_PER_CPU_READ_MOSTLY(int, cpu_number);
* Logical CPU mapping.
*/
extern u64 __cpu_logical_map[NR_CPUS];
-#define cpu_logical_map(cpu) __cpu_logical_map[cpu]
+extern u64 cpu_logical_map(unsigned int cpu);
-struct seq_file;
-
-/*
- * generate IPI list text
- */
-extern void show_ipi_list(struct seq_file *p, int prec);
+static inline void set_cpu_logical_map(unsigned int cpu, u64 hwid)
+{
+ __cpu_logical_map[cpu] = hwid;
+}
-/*
- * Called from C code, this handles an IPI.
- */
-extern void handle_IPI(int ipinr, struct pt_regs *regs);
+struct seq_file;
/*
* Discover the set of possible CPUs and determine their
@@ -67,11 +62,9 @@ extern void handle_IPI(int ipinr, struct pt_regs *regs);
extern void smp_init_cpus(void);
/*
- * Provide a function to raise an IPI cross call on CPUs in callmap.
+ * Register IPI interrupts with the arch SMP code
*/
-extern void set_smp_cross_call(void (*)(const struct cpumask *, unsigned int));
-
-extern void (*__smp_cross_call)(const struct cpumask *, unsigned int);
+extern void set_smp_ipi_range(int ipi_base, int nr_ipi);
/*
* Called from the secondary holding pen, this is the secondary CPU entry point.
@@ -80,12 +73,10 @@ asmlinkage void secondary_start_kernel(void);
/*
* Initial data for bringing up a secondary CPU.
- * @stack - sp for the secondary CPU
* @status - Result passed back from the secondary CPU to
* indicate failure.
*/
struct secondary_data {
- void *stack;
struct task_struct *task;
long status;
};
@@ -98,9 +89,9 @@ extern void arch_send_call_function_single_ipi(int cpu);
extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
#ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL
-extern void arch_send_wakeup_ipi_mask(const struct cpumask *mask);
+extern void arch_send_wakeup_ipi(unsigned int cpu);
#else
-static inline void arch_send_wakeup_ipi_mask(const struct cpumask *mask)
+static inline void arch_send_wakeup_ipi(unsigned int cpu)
{
BUILD_BUG();
}
@@ -108,11 +99,11 @@ static inline void arch_send_wakeup_ipi_mask(const struct cpumask *mask)
extern int __cpu_disable(void);
-extern void __cpu_die(unsigned int cpu);
-extern void cpu_die(void);
-extern void cpu_die_early(void);
+static inline void __cpu_die(unsigned int cpu) { }
+extern void __noreturn cpu_die(void);
+extern void __noreturn cpu_die_early(void);
-static inline void cpu_park_loop(void)
+static inline void __noreturn cpu_park_loop(void)
{
for (;;) {
wfe();
@@ -132,7 +123,7 @@ static inline void update_cpu_boot_status(int val)
* which calls for a kernel panic. Update the boot status and park the calling
* CPU.
*/
-static inline void cpu_panic_kernel(void)
+static inline void __noreturn cpu_panic_kernel(void)
{
update_cpu_boot_status(CPU_PANIC_KERNEL);
cpu_park_loop();
diff --git a/arch/arm64/include/asm/smp_plat.h b/arch/arm64/include/asm/smp_plat.h
index 99ad77df8f52..97ddc6c203b7 100644
--- a/arch/arm64/include/asm/smp_plat.h
+++ b/arch/arm64/include/asm/smp_plat.h
@@ -10,6 +10,7 @@
#include <linux/cpumask.h>
+#include <asm/smp.h>
#include <asm/types.h>
struct mpidr_hash {
diff --git a/arch/arm64/include/asm/sparsemem.h b/arch/arm64/include/asm/sparsemem.h
index 1f43fcc79738..8a8acc220371 100644
--- a/arch/arm64/include/asm/sparsemem.h
+++ b/arch/arm64/include/asm/sparsemem.h
@@ -5,9 +5,25 @@
#ifndef __ASM_SPARSEMEM_H
#define __ASM_SPARSEMEM_H
-#ifdef CONFIG_SPARSEMEM
#define MAX_PHYSMEM_BITS CONFIG_ARM64_PA_BITS
-#define SECTION_SIZE_BITS 30
-#endif
+
+/*
+ * Section size must be at least 512MB for 64K base
+ * page size config. Otherwise it will be less than
+ * MAX_PAGE_ORDER and the build process will fail.
+ */
+#ifdef CONFIG_ARM64_64K_PAGES
+#define SECTION_SIZE_BITS 29
+
+#else
+
+/*
+ * Section size must be at least 128MB for 4K base
+ * page size config. Otherwise PMD based huge page
+ * entries could not be created for vmemmap mappings.
+ * 16K follows 4K for simplicity.
+ */
+#define SECTION_SIZE_BITS 27
+#endif /* CONFIG_ARM64_64K_PAGES */
#endif
diff --git a/arch/arm64/include/asm/spectre.h b/arch/arm64/include/asm/spectre.h
new file mode 100644
index 000000000000..0c4d9045c31f
--- /dev/null
+++ b/arch/arm64/include/asm/spectre.h
@@ -0,0 +1,120 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Interface for managing mitigations for Spectre vulnerabilities.
+ *
+ * Copyright (C) 2020 Google LLC
+ * Author: Will Deacon <will@kernel.org>
+ */
+
+#ifndef __ASM_SPECTRE_H
+#define __ASM_SPECTRE_H
+
+#define BP_HARDEN_EL2_SLOTS 4
+#define __BP_HARDEN_HYP_VECS_SZ ((BP_HARDEN_EL2_SLOTS - 1) * SZ_2K)
+
+#ifndef __ASSEMBLY__
+#include <linux/smp.h>
+#include <asm/percpu.h>
+
+#include <asm/cpufeature.h>
+#include <asm/virt.h>
+
+/* Watch out, ordering is important here. */
+enum mitigation_state {
+ SPECTRE_UNAFFECTED,
+ SPECTRE_MITIGATED,
+ SPECTRE_VULNERABLE,
+};
+
+struct pt_regs;
+struct task_struct;
+
+/*
+ * Note: the order of this enum corresponds to __bp_harden_hyp_vecs and
+ * we rely on having the direct vectors first.
+ */
+enum arm64_hyp_spectre_vector {
+ /*
+ * Take exceptions directly to __kvm_hyp_vector. This must be
+ * 0 so that it used by default when mitigations are not needed.
+ */
+ HYP_VECTOR_DIRECT,
+
+ /*
+ * Bounce via a slot in the hypervisor text mapping of
+ * __bp_harden_hyp_vecs, which contains an SMC call.
+ */
+ HYP_VECTOR_SPECTRE_DIRECT,
+
+ /*
+ * Bounce via a slot in a special mapping of __bp_harden_hyp_vecs
+ * next to the idmap page.
+ */
+ HYP_VECTOR_INDIRECT,
+
+ /*
+ * Bounce via a slot in a special mapping of __bp_harden_hyp_vecs
+ * next to the idmap page, which contains an SMC call.
+ */
+ HYP_VECTOR_SPECTRE_INDIRECT,
+};
+
+typedef void (*bp_hardening_cb_t)(void);
+
+struct bp_hardening_data {
+ enum arm64_hyp_spectre_vector slot;
+ bp_hardening_cb_t fn;
+};
+
+DECLARE_PER_CPU_READ_MOSTLY(struct bp_hardening_data, bp_hardening_data);
+
+/* Called during entry so must be __always_inline */
+static __always_inline void arm64_apply_bp_hardening(void)
+{
+ struct bp_hardening_data *d;
+
+ if (!alternative_has_cap_unlikely(ARM64_SPECTRE_V2))
+ return;
+
+ d = this_cpu_ptr(&bp_hardening_data);
+ if (d->fn)
+ d->fn();
+}
+
+enum mitigation_state arm64_get_spectre_v2_state(void);
+bool has_spectre_v2(const struct arm64_cpu_capabilities *cap, int scope);
+void spectre_v2_enable_mitigation(const struct arm64_cpu_capabilities *__unused);
+
+bool has_spectre_v3a(const struct arm64_cpu_capabilities *cap, int scope);
+void spectre_v3a_enable_mitigation(const struct arm64_cpu_capabilities *__unused);
+
+enum mitigation_state arm64_get_spectre_v4_state(void);
+bool has_spectre_v4(const struct arm64_cpu_capabilities *cap, int scope);
+void spectre_v4_enable_mitigation(const struct arm64_cpu_capabilities *__unused);
+void spectre_v4_enable_task_mitigation(struct task_struct *tsk);
+
+enum mitigation_state arm64_get_meltdown_state(void);
+
+enum mitigation_state arm64_get_spectre_bhb_state(void);
+bool is_spectre_bhb_affected(const struct arm64_cpu_capabilities *entry, int scope);
+u8 spectre_bhb_loop_affected(int scope);
+void spectre_bhb_enable_mitigation(const struct arm64_cpu_capabilities *__unused);
+bool try_emulate_el1_ssbs(struct pt_regs *regs, u32 instr);
+
+void spectre_v4_patch_fw_mitigation_enable(struct alt_instr *alt, __le32 *origptr,
+ __le32 *updptr, int nr_inst);
+void smccc_patch_fw_mitigation_conduit(struct alt_instr *alt, __le32 *origptr,
+ __le32 *updptr, int nr_inst);
+void spectre_bhb_patch_loop_mitigation_enable(struct alt_instr *alt, __le32 *origptr,
+ __le32 *updptr, int nr_inst);
+void spectre_bhb_patch_fw_mitigation_enabled(struct alt_instr *alt, __le32 *origptr,
+ __le32 *updptr, int nr_inst);
+void spectre_bhb_patch_loop_iter(struct alt_instr *alt,
+ __le32 *origptr, __le32 *updptr, int nr_inst);
+void spectre_bhb_patch_wa3(struct alt_instr *alt,
+ __le32 *origptr, __le32 *updptr, int nr_inst);
+void spectre_bhb_patch_clearbhb(struct alt_instr *alt,
+ __le32 *origptr, __le32 *updptr, int nr_inst);
+
+#endif /* __ASSEMBLY__ */
+#endif /* __ASM_SPECTRE_H */
diff --git a/arch/arm64/include/asm/spinlock.h b/arch/arm64/include/asm/spinlock.h
index b093b287babf..0525c0b089ed 100644
--- a/arch/arm64/include/asm/spinlock.h
+++ b/arch/arm64/include/asm/spinlock.h
@@ -5,10 +5,23 @@
#ifndef __ASM_SPINLOCK_H
#define __ASM_SPINLOCK_H
-#include <asm/qrwlock.h>
#include <asm/qspinlock.h>
+#include <asm/qrwlock.h>
/* See include/linux/spinlock.h */
#define smp_mb__after_spinlock() smp_mb()
+/*
+ * Changing this will break osq_lock() thanks to the call inside
+ * smp_cond_load_relaxed().
+ *
+ * See:
+ * https://lore.kernel.org/lkml/20200110100612.GC2827@hirez.programming.kicks-ass.net
+ */
+#define vcpu_is_preempted vcpu_is_preempted
+static inline bool vcpu_is_preempted(int cpu)
+{
+ return false;
+}
+
#endif /* __ASM_SPINLOCK_H */
diff --git a/arch/arm64/include/asm/spinlock_types.h b/arch/arm64/include/asm/spinlock_types.h
index 18782f0c4721..11ab1c077697 100644
--- a/arch/arm64/include/asm/spinlock_types.h
+++ b/arch/arm64/include/asm/spinlock_types.h
@@ -5,7 +5,7 @@
#ifndef __ASM_SPINLOCK_TYPES_H
#define __ASM_SPINLOCK_TYPES_H
-#if !defined(__LINUX_SPINLOCK_TYPES_H) && !defined(__ASM_SPINLOCK_H)
+#if !defined(__LINUX_SPINLOCK_TYPES_RAW_H) && !defined(__ASM_SPINLOCK_H)
# error "please don't include this file directly"
#endif
diff --git a/arch/arm64/include/asm/stackprotector.h b/arch/arm64/include/asm/stackprotector.h
index 5884a2b02827..ae3ad80f51fe 100644
--- a/arch/arm64/include/asm/stackprotector.h
+++ b/arch/arm64/include/asm/stackprotector.h
@@ -13,8 +13,7 @@
#ifndef __ASM_STACKPROTECTOR_H
#define __ASM_STACKPROTECTOR_H
-#include <linux/random.h>
-#include <linux/version.h>
+#include <asm/pointer_auth.h>
extern unsigned long __stack_chk_guard;
@@ -26,16 +25,16 @@ extern unsigned long __stack_chk_guard;
*/
static __always_inline void boot_init_stack_canary(void)
{
- unsigned long canary;
-
- /* Try to get a semi random initial value. */
- get_random_bytes(&canary, sizeof(canary));
- canary ^= LINUX_VERSION_CODE;
- canary &= CANARY_MASK;
+#if defined(CONFIG_STACKPROTECTOR)
+ unsigned long canary = get_random_canary();
current->stack_canary = canary;
if (!IS_ENABLED(CONFIG_STACKPROTECTOR_PER_TASK))
__stack_chk_guard = current->stack_canary;
+#endif
+ ptrauth_thread_init_kernel(current);
+ ptrauth_thread_switch_kernel(current);
+ ptrauth_enable();
}
#endif /* _ASM_STACKPROTECTOR_H */
diff --git a/arch/arm64/include/asm/stacktrace.h b/arch/arm64/include/asm/stacktrace.h
index 4d9b1f48dc39..66ec8caa6ac0 100644
--- a/arch/arm64/include/asm/stacktrace.h
+++ b/arch/arm64/include/asm/stacktrace.h
@@ -8,178 +8,117 @@
#include <linux/percpu.h>
#include <linux/sched.h>
#include <linux/sched/task_stack.h>
-#include <linux/types.h>
+#include <linux/llist.h>
#include <asm/memory.h>
+#include <asm/pointer_auth.h>
#include <asm/ptrace.h>
#include <asm/sdei.h>
-enum stack_type {
- STACK_TYPE_UNKNOWN,
- STACK_TYPE_TASK,
- STACK_TYPE_IRQ,
- STACK_TYPE_OVERFLOW,
- STACK_TYPE_SDEI_NORMAL,
- STACK_TYPE_SDEI_CRITICAL,
- __NR_STACK_TYPES
-};
-
-struct stack_info {
- unsigned long low;
- unsigned long high;
- enum stack_type type;
-};
+#include <asm/stacktrace/common.h>
-/*
- * A snapshot of a frame record or fp/lr register values, along with some
- * accounting information necessary for robust unwinding.
- *
- * @fp: The fp value in the frame record (or the real fp)
- * @pc: The fp value in the frame record (or the real lr)
- *
- * @stacks_done: Stacks which have been entirely unwound, for which it is no
- * longer valid to unwind to.
- *
- * @prev_fp: The fp that pointed to this frame record, or a synthetic value
- * of 0. This is used to ensure that within a stack, each
- * subsequent frame record is at an increasing address.
- * @prev_type: The type of stack this frame record was on, or a synthetic
- * value of STACK_TYPE_UNKNOWN. This is used to detect a
- * transition from one stack to another.
- *
- * @graph: When FUNCTION_GRAPH_TRACER is selected, holds the index of a
- * replacement lr value in the ftrace graph stack.
- */
-struct stackframe {
- unsigned long fp;
- unsigned long pc;
- DECLARE_BITMAP(stacks_done, __NR_STACK_TYPES);
- unsigned long prev_fp;
- enum stack_type prev_type;
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
- int graph;
-#endif
-};
-
-extern int unwind_frame(struct task_struct *tsk, struct stackframe *frame);
-extern void walk_stackframe(struct task_struct *tsk, struct stackframe *frame,
- int (*fn)(struct stackframe *, void *), void *data);
-extern void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk);
+extern void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk,
+ const char *loglvl);
DECLARE_PER_CPU(unsigned long *, irq_stack_ptr);
-static inline bool on_irq_stack(unsigned long sp,
- struct stack_info *info)
+static inline struct stack_info stackinfo_get_irq(void)
{
unsigned long low = (unsigned long)raw_cpu_read(irq_stack_ptr);
unsigned long high = low + IRQ_STACK_SIZE;
- if (!low)
- return false;
-
- if (sp < low || sp >= high)
- return false;
-
- if (info) {
- info->low = low;
- info->high = high;
- info->type = STACK_TYPE_IRQ;
- }
+ return (struct stack_info) {
+ .low = low,
+ .high = high,
+ };
+}
- return true;
+static inline bool on_irq_stack(unsigned long sp, unsigned long size)
+{
+ struct stack_info info = stackinfo_get_irq();
+ return stackinfo_on_stack(&info, sp, size);
}
-static inline bool on_task_stack(const struct task_struct *tsk,
- unsigned long sp,
- struct stack_info *info)
+static inline struct stack_info stackinfo_get_task(const struct task_struct *tsk)
{
unsigned long low = (unsigned long)task_stack_page(tsk);
unsigned long high = low + THREAD_SIZE;
- if (sp < low || sp >= high)
- return false;
-
- if (info) {
- info->low = low;
- info->high = high;
- info->type = STACK_TYPE_TASK;
- }
+ return (struct stack_info) {
+ .low = low,
+ .high = high,
+ };
+}
- return true;
+static inline bool on_task_stack(const struct task_struct *tsk,
+ unsigned long sp, unsigned long size)
+{
+ struct stack_info info = stackinfo_get_task(tsk);
+ return stackinfo_on_stack(&info, sp, size);
}
+#define on_thread_stack() (on_task_stack(current, current_stack_pointer, 1))
+
#ifdef CONFIG_VMAP_STACK
DECLARE_PER_CPU(unsigned long [OVERFLOW_STACK_SIZE/sizeof(long)], overflow_stack);
-static inline bool on_overflow_stack(unsigned long sp,
- struct stack_info *info)
+static inline struct stack_info stackinfo_get_overflow(void)
{
unsigned long low = (unsigned long)raw_cpu_ptr(overflow_stack);
unsigned long high = low + OVERFLOW_STACK_SIZE;
- if (sp < low || sp >= high)
- return false;
-
- if (info) {
- info->low = low;
- info->high = high;
- info->type = STACK_TYPE_OVERFLOW;
- }
-
- return true;
+ return (struct stack_info) {
+ .low = low,
+ .high = high,
+ };
}
#else
-static inline bool on_overflow_stack(unsigned long sp,
- struct stack_info *info) { return false; }
+#define stackinfo_get_overflow() stackinfo_get_unknown()
#endif
+#if defined(CONFIG_ARM_SDE_INTERFACE) && defined(CONFIG_VMAP_STACK)
+DECLARE_PER_CPU(unsigned long *, sdei_stack_normal_ptr);
+DECLARE_PER_CPU(unsigned long *, sdei_stack_critical_ptr);
-/*
- * We can only safely access per-cpu stacks from current in a non-preemptible
- * context.
- */
-static inline bool on_accessible_stack(const struct task_struct *tsk,
- unsigned long sp,
- struct stack_info *info)
+static inline struct stack_info stackinfo_get_sdei_normal(void)
{
- if (info)
- info->type = STACK_TYPE_UNKNOWN;
-
- if (on_task_stack(tsk, sp, info))
- return true;
- if (tsk != current || preemptible())
- return false;
- if (on_irq_stack(sp, info))
- return true;
- if (on_overflow_stack(sp, info))
- return true;
- if (on_sdei_stack(sp, info))
- return true;
-
- return false;
+ unsigned long low = (unsigned long)raw_cpu_read(sdei_stack_normal_ptr);
+ unsigned long high = low + SDEI_STACK_SIZE;
+
+ return (struct stack_info) {
+ .low = low,
+ .high = high,
+ };
}
-static inline void start_backtrace(struct stackframe *frame,
- unsigned long fp, unsigned long pc)
+static inline struct stack_info stackinfo_get_sdei_critical(void)
{
- frame->fp = fp;
- frame->pc = pc;
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
- frame->graph = 0;
+ unsigned long low = (unsigned long)raw_cpu_read(sdei_stack_critical_ptr);
+ unsigned long high = low + SDEI_STACK_SIZE;
+
+ return (struct stack_info) {
+ .low = low,
+ .high = high,
+ };
+}
+#else
+#define stackinfo_get_sdei_normal() stackinfo_get_unknown()
+#define stackinfo_get_sdei_critical() stackinfo_get_unknown()
#endif
- /*
- * Prime the first unwind.
- *
- * In unwind_frame() we'll check that the FP points to a valid stack,
- * which can't be STACK_TYPE_UNKNOWN, and the first unwind will be
- * treated as a transition to whichever stack that happens to be. The
- * prev_fp value won't be used, but we set it to 0 such that it is
- * definitely not an accessible stack address.
- */
- bitmap_zero(frame->stacks_done, __NR_STACK_TYPES);
- frame->prev_fp = 0;
- frame->prev_type = STACK_TYPE_UNKNOWN;
+#ifdef CONFIG_EFI
+extern u64 *efi_rt_stack_top;
+
+static inline struct stack_info stackinfo_get_efi(void)
+{
+ unsigned long high = (u64)efi_rt_stack_top;
+ unsigned long low = high - THREAD_SIZE;
+
+ return (struct stack_info) {
+ .low = low,
+ .high = high,
+ };
}
+#endif
#endif /* __ASM_STACKTRACE_H */
diff --git a/arch/arm64/include/asm/stacktrace/common.h b/arch/arm64/include/asm/stacktrace/common.h
new file mode 100644
index 000000000000..f63dc654e545
--- /dev/null
+++ b/arch/arm64/include/asm/stacktrace/common.h
@@ -0,0 +1,159 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Common arm64 stack unwinder code.
+ *
+ * See: arch/arm64/kernel/stacktrace.c for the reference implementation.
+ *
+ * Copyright (C) 2012 ARM Ltd.
+ */
+#ifndef __ASM_STACKTRACE_COMMON_H
+#define __ASM_STACKTRACE_COMMON_H
+
+#include <linux/types.h>
+
+struct stack_info {
+ unsigned long low;
+ unsigned long high;
+};
+
+/**
+ * struct unwind_state - state used for robust unwinding.
+ *
+ * @fp: The fp value in the frame record (or the real fp)
+ * @pc: The lr value in the frame record (or the real lr)
+ *
+ * @stack: The stack currently being unwound.
+ * @stacks: An array of stacks which can be unwound.
+ * @nr_stacks: The number of stacks in @stacks.
+ */
+struct unwind_state {
+ unsigned long fp;
+ unsigned long pc;
+
+ struct stack_info stack;
+ struct stack_info *stacks;
+ int nr_stacks;
+};
+
+static inline struct stack_info stackinfo_get_unknown(void)
+{
+ return (struct stack_info) {
+ .low = 0,
+ .high = 0,
+ };
+}
+
+static inline bool stackinfo_on_stack(const struct stack_info *info,
+ unsigned long sp, unsigned long size)
+{
+ if (!info->low)
+ return false;
+
+ if (sp < info->low || sp + size < sp || sp + size > info->high)
+ return false;
+
+ return true;
+}
+
+static inline void unwind_init_common(struct unwind_state *state)
+{
+ state->stack = stackinfo_get_unknown();
+}
+
+static struct stack_info *unwind_find_next_stack(const struct unwind_state *state,
+ unsigned long sp,
+ unsigned long size)
+{
+ for (int i = 0; i < state->nr_stacks; i++) {
+ struct stack_info *info = &state->stacks[i];
+
+ if (stackinfo_on_stack(info, sp, size))
+ return info;
+ }
+
+ return NULL;
+}
+
+/**
+ * unwind_consume_stack() - Check if an object is on an accessible stack,
+ * updating stack boundaries so that future unwind steps cannot consume this
+ * object again.
+ *
+ * @state: the current unwind state.
+ * @sp: the base address of the object.
+ * @size: the size of the object.
+ *
+ * Return: 0 upon success, an error code otherwise.
+ */
+static inline int unwind_consume_stack(struct unwind_state *state,
+ unsigned long sp,
+ unsigned long size)
+{
+ struct stack_info *next;
+
+ if (stackinfo_on_stack(&state->stack, sp, size))
+ goto found;
+
+ next = unwind_find_next_stack(state, sp, size);
+ if (!next)
+ return -EINVAL;
+
+ /*
+ * Stack transitions are strictly one-way, and once we've
+ * transitioned from one stack to another, it's never valid to
+ * unwind back to the old stack.
+ *
+ * Remove the current stack from the list of stacks so that it cannot
+ * be found on a subsequent transition.
+ *
+ * Note that stacks can nest in several valid orders, e.g.
+ *
+ * TASK -> IRQ -> OVERFLOW -> SDEI_NORMAL
+ * TASK -> SDEI_NORMAL -> SDEI_CRITICAL -> OVERFLOW
+ * HYP -> OVERFLOW
+ *
+ * ... so we do not check the specific order of stack
+ * transitions.
+ */
+ state->stack = *next;
+ *next = stackinfo_get_unknown();
+
+found:
+ /*
+ * Future unwind steps can only consume stack above this frame record.
+ * Update the current stack to start immediately above it.
+ */
+ state->stack.low = sp + size;
+ return 0;
+}
+
+/**
+ * unwind_next_frame_record() - Unwind to the next frame record.
+ *
+ * @state: the current unwind state.
+ *
+ * Return: 0 upon success, an error code otherwise.
+ */
+static inline int
+unwind_next_frame_record(struct unwind_state *state)
+{
+ unsigned long fp = state->fp;
+ int err;
+
+ if (fp & 0x7)
+ return -EINVAL;
+
+ err = unwind_consume_stack(state, fp, 16);
+ if (err)
+ return err;
+
+ /*
+ * Record this frame record's values.
+ */
+ state->fp = READ_ONCE(*(unsigned long *)(fp));
+ state->pc = READ_ONCE(*(unsigned long *)(fp + 8));
+
+ return 0;
+}
+
+#endif /* __ASM_STACKTRACE_COMMON_H */
diff --git a/arch/arm64/include/asm/stacktrace/nvhe.h b/arch/arm64/include/asm/stacktrace/nvhe.h
new file mode 100644
index 000000000000..44759281d0d4
--- /dev/null
+++ b/arch/arm64/include/asm/stacktrace/nvhe.h
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * KVM nVHE hypervisor stack tracing support.
+ *
+ * The unwinder implementation depends on the nVHE mode:
+ *
+ * 1) Non-protected nVHE mode - the host can directly access the
+ * HYP stack pages and unwind the HYP stack in EL1. This saves having
+ * to allocate shared buffers for the host to read the unwinded
+ * stacktrace.
+ *
+ * 2) pKVM (protected nVHE) mode - the host cannot directly access
+ * the HYP memory. The stack is unwinded in EL2 and dumped to a shared
+ * buffer where the host can read and print the stacktrace.
+ *
+ * Copyright (C) 2022 Google LLC
+ */
+#ifndef __ASM_STACKTRACE_NVHE_H
+#define __ASM_STACKTRACE_NVHE_H
+
+#include <asm/stacktrace/common.h>
+
+/**
+ * kvm_nvhe_unwind_init() - Start an unwind from the given nVHE HYP fp and pc
+ *
+ * @state : unwind_state to initialize
+ * @fp : frame pointer at which to start the unwinding.
+ * @pc : program counter at which to start the unwinding.
+ */
+static inline void kvm_nvhe_unwind_init(struct unwind_state *state,
+ unsigned long fp,
+ unsigned long pc)
+{
+ unwind_init_common(state);
+
+ state->fp = fp;
+ state->pc = pc;
+}
+
+#ifndef __KVM_NVHE_HYPERVISOR__
+/*
+ * Conventional (non-protected) nVHE HYP stack unwinder
+ *
+ * In non-protected mode, the unwinding is done from kernel proper context
+ * (by the host in EL1).
+ */
+
+DECLARE_KVM_NVHE_PER_CPU(unsigned long [OVERFLOW_STACK_SIZE/sizeof(long)], overflow_stack);
+DECLARE_KVM_NVHE_PER_CPU(struct kvm_nvhe_stacktrace_info, kvm_stacktrace_info);
+DECLARE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page);
+
+void kvm_nvhe_dump_backtrace(unsigned long hyp_offset);
+
+#endif /* __KVM_NVHE_HYPERVISOR__ */
+#endif /* __ASM_STACKTRACE_NVHE_H */
diff --git a/arch/arm64/include/asm/stage2_pgtable.h b/arch/arm64/include/asm/stage2_pgtable.h
index 326aac658b9d..23d27623e478 100644
--- a/arch/arm64/include/asm/stage2_pgtable.h
+++ b/arch/arm64/include/asm/stage2_pgtable.h
@@ -8,15 +8,7 @@
#ifndef __ARM64_S2_PGTABLE_H_
#define __ARM64_S2_PGTABLE_H_
-#include <linux/hugetlb.h>
-#include <asm/pgtable.h>
-
-/*
- * PGDIR_SHIFT determines the size a top-level page table entry can map
- * and depends on the number of levels in the page table. Compute the
- * PGDIR_SHIFT for a given number of levels.
- */
-#define pt_levels_pgdir_shift(lvls) ARM64_HW_PGTABLE_LEVEL_SHIFT(4 - (lvls))
+#include <linux/pgtable.h>
/*
* The hardware supports concatenation of up to 16 tables at stage2 entry
@@ -29,205 +21,13 @@
* (IPA_SHIFT - 4).
*/
#define stage2_pgtable_levels(ipa) ARM64_HW_PGTABLE_LEVELS((ipa) - 4)
-#define kvm_stage2_levels(kvm) VTCR_EL2_LVLS(kvm->arch.vtcr)
-
-/* stage2_pgdir_shift() is the size mapped by top-level stage2 entry for the VM */
-#define stage2_pgdir_shift(kvm) pt_levels_pgdir_shift(kvm_stage2_levels(kvm))
-#define stage2_pgdir_size(kvm) (1ULL << stage2_pgdir_shift(kvm))
-#define stage2_pgdir_mask(kvm) ~(stage2_pgdir_size(kvm) - 1)
-
-/*
- * The number of PTRS across all concatenated stage2 tables given by the
- * number of bits resolved at the initial level.
- * If we force more levels than necessary, we may have (stage2_pgdir_shift > IPA),
- * in which case, stage2_pgd_ptrs will have one entry.
- */
-#define pgd_ptrs_shift(ipa, pgdir_shift) \
- ((ipa) > (pgdir_shift) ? ((ipa) - (pgdir_shift)) : 0)
-#define __s2_pgd_ptrs(ipa, lvls) \
- (1 << (pgd_ptrs_shift((ipa), pt_levels_pgdir_shift(lvls))))
-#define __s2_pgd_size(ipa, lvls) (__s2_pgd_ptrs((ipa), (lvls)) * sizeof(pgd_t))
-
-#define stage2_pgd_ptrs(kvm) __s2_pgd_ptrs(kvm_phys_shift(kvm), kvm_stage2_levels(kvm))
-#define stage2_pgd_size(kvm) __s2_pgd_size(kvm_phys_shift(kvm), kvm_stage2_levels(kvm))
+#define kvm_stage2_levels(mmu) VTCR_EL2_LVLS((mmu)->vtcr)
/*
* kvm_mmmu_cache_min_pages() is the number of pages required to install
* a stage-2 translation. We pre-allocate the entry level page table at
* the VM creation.
*/
-#define kvm_mmu_cache_min_pages(kvm) (kvm_stage2_levels(kvm) - 1)
-
-/* Stage2 PUD definitions when the level is present */
-static inline bool kvm_stage2_has_pud(struct kvm *kvm)
-{
- return (CONFIG_PGTABLE_LEVELS > 3) && (kvm_stage2_levels(kvm) > 3);
-}
-
-#define S2_PUD_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(1)
-#define S2_PUD_SIZE (1UL << S2_PUD_SHIFT)
-#define S2_PUD_MASK (~(S2_PUD_SIZE - 1))
-
-static inline bool stage2_pgd_none(struct kvm *kvm, pgd_t pgd)
-{
- if (kvm_stage2_has_pud(kvm))
- return pgd_none(pgd);
- else
- return 0;
-}
-
-static inline void stage2_pgd_clear(struct kvm *kvm, pgd_t *pgdp)
-{
- if (kvm_stage2_has_pud(kvm))
- pgd_clear(pgdp);
-}
-
-static inline bool stage2_pgd_present(struct kvm *kvm, pgd_t pgd)
-{
- if (kvm_stage2_has_pud(kvm))
- return pgd_present(pgd);
- else
- return 1;
-}
-
-static inline void stage2_pgd_populate(struct kvm *kvm, pgd_t *pgd, pud_t *pud)
-{
- if (kvm_stage2_has_pud(kvm))
- pgd_populate(NULL, pgd, pud);
-}
-
-static inline pud_t *stage2_pud_offset(struct kvm *kvm,
- pgd_t *pgd, unsigned long address)
-{
- if (kvm_stage2_has_pud(kvm))
- return pud_offset(pgd, address);
- else
- return (pud_t *)pgd;
-}
-
-static inline void stage2_pud_free(struct kvm *kvm, pud_t *pud)
-{
- if (kvm_stage2_has_pud(kvm))
- free_page((unsigned long)pud);
-}
-
-static inline bool stage2_pud_table_empty(struct kvm *kvm, pud_t *pudp)
-{
- if (kvm_stage2_has_pud(kvm))
- return kvm_page_empty(pudp);
- else
- return false;
-}
-
-static inline phys_addr_t
-stage2_pud_addr_end(struct kvm *kvm, phys_addr_t addr, phys_addr_t end)
-{
- if (kvm_stage2_has_pud(kvm)) {
- phys_addr_t boundary = (addr + S2_PUD_SIZE) & S2_PUD_MASK;
-
- return (boundary - 1 < end - 1) ? boundary : end;
- } else {
- return end;
- }
-}
-
-/* Stage2 PMD definitions when the level is present */
-static inline bool kvm_stage2_has_pmd(struct kvm *kvm)
-{
- return (CONFIG_PGTABLE_LEVELS > 2) && (kvm_stage2_levels(kvm) > 2);
-}
-
-#define S2_PMD_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(2)
-#define S2_PMD_SIZE (1UL << S2_PMD_SHIFT)
-#define S2_PMD_MASK (~(S2_PMD_SIZE - 1))
-
-static inline bool stage2_pud_none(struct kvm *kvm, pud_t pud)
-{
- if (kvm_stage2_has_pmd(kvm))
- return pud_none(pud);
- else
- return 0;
-}
-
-static inline void stage2_pud_clear(struct kvm *kvm, pud_t *pud)
-{
- if (kvm_stage2_has_pmd(kvm))
- pud_clear(pud);
-}
-
-static inline bool stage2_pud_present(struct kvm *kvm, pud_t pud)
-{
- if (kvm_stage2_has_pmd(kvm))
- return pud_present(pud);
- else
- return 1;
-}
-
-static inline void stage2_pud_populate(struct kvm *kvm, pud_t *pud, pmd_t *pmd)
-{
- if (kvm_stage2_has_pmd(kvm))
- pud_populate(NULL, pud, pmd);
-}
-
-static inline pmd_t *stage2_pmd_offset(struct kvm *kvm,
- pud_t *pud, unsigned long address)
-{
- if (kvm_stage2_has_pmd(kvm))
- return pmd_offset(pud, address);
- else
- return (pmd_t *)pud;
-}
-
-static inline void stage2_pmd_free(struct kvm *kvm, pmd_t *pmd)
-{
- if (kvm_stage2_has_pmd(kvm))
- free_page((unsigned long)pmd);
-}
-
-static inline bool stage2_pud_huge(struct kvm *kvm, pud_t pud)
-{
- if (kvm_stage2_has_pmd(kvm))
- return pud_huge(pud);
- else
- return 0;
-}
-
-static inline bool stage2_pmd_table_empty(struct kvm *kvm, pmd_t *pmdp)
-{
- if (kvm_stage2_has_pmd(kvm))
- return kvm_page_empty(pmdp);
- else
- return 0;
-}
-
-static inline phys_addr_t
-stage2_pmd_addr_end(struct kvm *kvm, phys_addr_t addr, phys_addr_t end)
-{
- if (kvm_stage2_has_pmd(kvm)) {
- phys_addr_t boundary = (addr + S2_PMD_SIZE) & S2_PMD_MASK;
-
- return (boundary - 1 < end - 1) ? boundary : end;
- } else {
- return end;
- }
-}
-
-static inline bool stage2_pte_table_empty(struct kvm *kvm, pte_t *ptep)
-{
- return kvm_page_empty(ptep);
-}
-
-static inline unsigned long stage2_pgd_index(struct kvm *kvm, phys_addr_t addr)
-{
- return (((addr) >> stage2_pgdir_shift(kvm)) & (stage2_pgd_ptrs(kvm) - 1));
-}
-
-static inline phys_addr_t
-stage2_pgd_addr_end(struct kvm *kvm, phys_addr_t addr, phys_addr_t end)
-{
- phys_addr_t boundary = (addr + stage2_pgdir_size(kvm)) & stage2_pgdir_mask(kvm);
-
- return (boundary - 1 < end - 1) ? boundary : end;
-}
+#define kvm_mmu_cache_min_pages(mmu) (kvm_stage2_levels(mmu) - 1)
#endif /* __ARM64_S2_PGTABLE_H_ */
diff --git a/arch/arm64/include/asm/string.h b/arch/arm64/include/asm/string.h
index b31e8e87a0db..3a3264ff47b9 100644
--- a/arch/arm64/include/asm/string.h
+++ b/arch/arm64/include/asm/string.h
@@ -5,7 +5,7 @@
#ifndef __ASM_STRING_H
#define __ASM_STRING_H
-#ifndef CONFIG_KASAN
+#if !(defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS))
#define __HAVE_ARCH_STRRCHR
extern char *strrchr(const char *, int c);
@@ -48,7 +48,8 @@ extern void *__memset(void *, int, __kernel_size_t);
void memcpy_flushcache(void *dst, const void *src, size_t cnt);
#endif
-#if defined(CONFIG_KASAN) && !defined(__SANITIZE_ADDRESS__)
+#if (defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)) && \
+ !defined(__SANITIZE_ADDRESS__)
/*
* For files that are not instrumented (e.g. mm/slub.c) we
diff --git a/arch/arm64/include/asm/suspend.h b/arch/arm64/include/asm/suspend.h
index 8939c87c4dce..0cde2f473971 100644
--- a/arch/arm64/include/asm/suspend.h
+++ b/arch/arm64/include/asm/suspend.h
@@ -2,7 +2,7 @@
#ifndef __ASM_SUSPEND_H
#define __ASM_SUSPEND_H
-#define NR_CTX_REGS 12
+#define NR_CTX_REGS 13
#define NR_CALLEE_SAVED_REGS 12
/*
diff --git a/arch/arm64/include/asm/syscall.h b/arch/arm64/include/asm/syscall.h
index 65299a2dcf9c..ab8e14b96f68 100644
--- a/arch/arm64/include/asm/syscall.h
+++ b/arch/arm64/include/asm/syscall.h
@@ -29,25 +29,36 @@ static inline void syscall_rollback(struct task_struct *task,
regs->regs[0] = regs->orig_x0;
}
+static inline long syscall_get_return_value(struct task_struct *task,
+ struct pt_regs *regs)
+{
+ unsigned long val = regs->regs[0];
+
+ if (is_compat_thread(task_thread_info(task)))
+ val = sign_extend64(val, 31);
+
+ return val;
+}
static inline long syscall_get_error(struct task_struct *task,
struct pt_regs *regs)
{
- unsigned long error = regs->regs[0];
- return IS_ERR_VALUE(error) ? error : 0;
-}
+ unsigned long error = syscall_get_return_value(task, regs);
-static inline long syscall_get_return_value(struct task_struct *task,
- struct pt_regs *regs)
-{
- return regs->regs[0];
+ return IS_ERR_VALUE(error) ? error : 0;
}
static inline void syscall_set_return_value(struct task_struct *task,
struct pt_regs *regs,
int error, long val)
{
- regs->regs[0] = (long) error ? error : val;
+ if (error)
+ val = error;
+
+ if (is_compat_thread(task_thread_info(task)))
+ val = lower_32_bits(val);
+
+ regs->regs[0] = val;
}
#define SYSCALL_MAX_ARGS 6
@@ -62,16 +73,6 @@ static inline void syscall_get_arguments(struct task_struct *task,
memcpy(args, &regs->regs[1], 5 * sizeof(args[0]));
}
-static inline void syscall_set_arguments(struct task_struct *task,
- struct pt_regs *regs,
- const unsigned long *args)
-{
- regs->orig_x0 = args[0];
- args++;
-
- memcpy(&regs->regs[1], args, 5 * sizeof(args[0]));
-}
-
/*
* We don't care about endianness (__AUDIT_ARCH_LE bit) here because
* AArch64 has the same system calls both on little- and big- endian.
@@ -84,4 +85,7 @@ static inline int syscall_get_arch(struct task_struct *task)
return AUDIT_ARCH_AARCH64;
}
+int syscall_trace_enter(struct pt_regs *regs);
+void syscall_trace_exit(struct pt_regs *regs);
+
#endif /* __ASM_SYSCALL_H */
diff --git a/arch/arm64/include/asm/syscall_wrapper.h b/arch/arm64/include/asm/syscall_wrapper.h
index b383b4802a7b..abb57bc54305 100644
--- a/arch/arm64/include/asm/syscall_wrapper.h
+++ b/arch/arm64/include/asm/syscall_wrapper.h
@@ -8,7 +8,7 @@
#ifndef __ASM_SYSCALL_WRAPPER_H
#define __ASM_SYSCALL_WRAPPER_H
-struct pt_regs;
+#include <asm/ptrace.h>
#define SC_ARM64_REGS_TO_ARGS(x, ...) \
__MAP(x,__SC_ARGS \
@@ -38,14 +38,12 @@ struct pt_regs;
asmlinkage long __arm64_compat_sys_##sname(const struct pt_regs *__unused)
#define COND_SYSCALL_COMPAT(name) \
+ asmlinkage long __arm64_compat_sys_##name(const struct pt_regs *regs); \
asmlinkage long __weak __arm64_compat_sys_##name(const struct pt_regs *regs) \
{ \
return sys_ni_syscall(); \
}
-#define COMPAT_SYS_NI(name) \
- SYSCALL_ALIAS(__arm64_compat_sys_##name, sys_ni_posix_timers);
-
#endif /* CONFIG_COMPAT */
#define __SYSCALL_DEFINEx(x, name, ...) \
@@ -73,11 +71,12 @@ struct pt_regs;
asmlinkage long __arm64_sys_##sname(const struct pt_regs *__unused)
#define COND_SYSCALL(name) \
+ asmlinkage long __arm64_sys_##name(const struct pt_regs *regs); \
asmlinkage long __weak __arm64_sys_##name(const struct pt_regs *regs) \
{ \
return sys_ni_syscall(); \
}
-#define SYS_NI(name) SYSCALL_ALIAS(__arm64_sys_##name, sys_ni_posix_timers);
+asmlinkage long __arm64_sys_ni_syscall(const struct pt_regs *__unused);
#endif /* __ASM_SYSCALL_WRAPPER_H */
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 6e919fafb43d..c3b19b376c86 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -11,6 +11,9 @@
#include <linux/bits.h>
#include <linux/stringify.h>
+#include <linux/kasan-tags.h>
+
+#include <asm/gpr-num.h>
/*
* ARMv8 ARM reserves the following encoding for system registers:
@@ -49,7 +52,9 @@
#ifndef CONFIG_BROKEN_GAS_INST
#ifdef __ASSEMBLY__
-#define __emit_inst(x) .inst (x)
+// The space separator is omitted so that __emit_inst(x) can be parsed as
+// either an assembler directive or an assembler macro argument.
+#define __emit_inst(x) .inst(x)
#else
#define __emit_inst(x) ".inst " __stringify((x)) "\n\t"
#endif
@@ -85,14 +90,24 @@
*/
#define pstate_field(op1, op2) ((op1) << Op1_shift | (op2) << Op2_shift)
#define PSTATE_Imm_shift CRm_shift
+#define SET_PSTATE(x, r) __emit_inst(0xd500401f | PSTATE_ ## r | ((!!x) << PSTATE_Imm_shift))
#define PSTATE_PAN pstate_field(0, 4)
#define PSTATE_UAO pstate_field(0, 3)
#define PSTATE_SSBS pstate_field(3, 1)
+#define PSTATE_DIT pstate_field(3, 2)
+#define PSTATE_TCO pstate_field(3, 4)
+
+#define SET_PSTATE_PAN(x) SET_PSTATE((x), PAN)
+#define SET_PSTATE_UAO(x) SET_PSTATE((x), UAO)
+#define SET_PSTATE_SSBS(x) SET_PSTATE((x), SSBS)
+#define SET_PSTATE_DIT(x) SET_PSTATE((x), DIT)
+#define SET_PSTATE_TCO(x) SET_PSTATE((x), TCO)
-#define SET_PSTATE_PAN(x) __emit_inst(0xd500401f | PSTATE_PAN | ((!!x) << PSTATE_Imm_shift))
-#define SET_PSTATE_UAO(x) __emit_inst(0xd500401f | PSTATE_UAO | ((!!x) << PSTATE_Imm_shift))
-#define SET_PSTATE_SSBS(x) __emit_inst(0xd500401f | PSTATE_SSBS | ((!!x) << PSTATE_Imm_shift))
+#define set_pstate_pan(x) asm volatile(SET_PSTATE_PAN(x))
+#define set_pstate_uao(x) asm volatile(SET_PSTATE_UAO(x))
+#define set_pstate_ssbs(x) asm volatile(SET_PSTATE_SSBS(x))
+#define set_pstate_dit(x) asm volatile(SET_PSTATE_DIT(x))
#define __SYS_BARRIER_INSN(CRm, op2, Rt) \
__emit_inst(0xd5000000 | sys_insn(0, 3, 3, (CRm), (op2)) | ((Rt) & 0x1f))
@@ -100,21 +115,74 @@
#define SB_BARRIER_INSN __SYS_BARRIER_INSN(0, 7, 31)
#define SYS_DC_ISW sys_insn(1, 0, 7, 6, 2)
+#define SYS_DC_IGSW sys_insn(1, 0, 7, 6, 4)
+#define SYS_DC_IGDSW sys_insn(1, 0, 7, 6, 6)
#define SYS_DC_CSW sys_insn(1, 0, 7, 10, 2)
+#define SYS_DC_CGSW sys_insn(1, 0, 7, 10, 4)
+#define SYS_DC_CGDSW sys_insn(1, 0, 7, 10, 6)
#define SYS_DC_CISW sys_insn(1, 0, 7, 14, 2)
+#define SYS_DC_CIGSW sys_insn(1, 0, 7, 14, 4)
+#define SYS_DC_CIGDSW sys_insn(1, 0, 7, 14, 6)
+
+#define SYS_IC_IALLUIS sys_insn(1, 0, 7, 1, 0)
+#define SYS_IC_IALLU sys_insn(1, 0, 7, 5, 0)
+#define SYS_IC_IVAU sys_insn(1, 3, 7, 5, 1)
+
+#define SYS_DC_IVAC sys_insn(1, 0, 7, 6, 1)
+#define SYS_DC_IGVAC sys_insn(1, 0, 7, 6, 3)
+#define SYS_DC_IGDVAC sys_insn(1, 0, 7, 6, 5)
+
+#define SYS_DC_CVAC sys_insn(1, 3, 7, 10, 1)
+#define SYS_DC_CGVAC sys_insn(1, 3, 7, 10, 3)
+#define SYS_DC_CGDVAC sys_insn(1, 3, 7, 10, 5)
+
+#define SYS_DC_CVAU sys_insn(1, 3, 7, 11, 1)
+
+#define SYS_DC_CVAP sys_insn(1, 3, 7, 12, 1)
+#define SYS_DC_CGVAP sys_insn(1, 3, 7, 12, 3)
+#define SYS_DC_CGDVAP sys_insn(1, 3, 7, 12, 5)
+
+#define SYS_DC_CVADP sys_insn(1, 3, 7, 13, 1)
+#define SYS_DC_CGVADP sys_insn(1, 3, 7, 13, 3)
+#define SYS_DC_CGDVADP sys_insn(1, 3, 7, 13, 5)
+
+#define SYS_DC_CIVAC sys_insn(1, 3, 7, 14, 1)
+#define SYS_DC_CIGVAC sys_insn(1, 3, 7, 14, 3)
+#define SYS_DC_CIGDVAC sys_insn(1, 3, 7, 14, 5)
+
+/* Data cache zero operations */
+#define SYS_DC_ZVA sys_insn(1, 3, 7, 4, 1)
+#define SYS_DC_GVA sys_insn(1, 3, 7, 4, 3)
+#define SYS_DC_GZVA sys_insn(1, 3, 7, 4, 4)
+
+/*
+ * Automatically generated definitions for system registers, the
+ * manual encodings below are in the process of being converted to
+ * come from here. The header relies on the definition of sys_reg()
+ * earlier in this file.
+ */
+#include "asm/sysreg-defs.h"
+
+/*
+ * System registers, organised loosely by encoding but grouped together
+ * where the architected name contains an index. e.g. ID_MMFR<n>_EL1.
+ */
+#define SYS_SVCR_SMSTOP_SM_EL0 sys_reg(0, 3, 4, 2, 3)
+#define SYS_SVCR_SMSTART_SM_EL0 sys_reg(0, 3, 4, 3, 3)
+#define SYS_SVCR_SMSTOP_SMZA_EL0 sys_reg(0, 3, 4, 6, 3)
-#define SYS_OSDTRRX_EL1 sys_reg(2, 0, 0, 0, 2)
-#define SYS_MDCCINT_EL1 sys_reg(2, 0, 0, 2, 0)
-#define SYS_MDSCR_EL1 sys_reg(2, 0, 0, 2, 2)
-#define SYS_OSDTRTX_EL1 sys_reg(2, 0, 0, 3, 2)
-#define SYS_OSECCR_EL1 sys_reg(2, 0, 0, 6, 2)
#define SYS_DBGBVRn_EL1(n) sys_reg(2, 0, 0, n, 4)
#define SYS_DBGBCRn_EL1(n) sys_reg(2, 0, 0, n, 5)
#define SYS_DBGWVRn_EL1(n) sys_reg(2, 0, 0, n, 6)
#define SYS_DBGWCRn_EL1(n) sys_reg(2, 0, 0, n, 7)
#define SYS_MDRAR_EL1 sys_reg(2, 0, 1, 0, 0)
-#define SYS_OSLAR_EL1 sys_reg(2, 0, 1, 0, 4)
+
#define SYS_OSLSR_EL1 sys_reg(2, 0, 1, 1, 4)
+#define OSLSR_EL1_OSLM_MASK (BIT(3) | BIT(0))
+#define OSLSR_EL1_OSLM_NI 0
+#define OSLSR_EL1_OSLM_IMPLEMENTED BIT(3)
+#define OSLSR_EL1_OSLK BIT(1)
+
#define SYS_OSDLR_EL1 sys_reg(2, 0, 1, 3, 4)
#define SYS_DBGPRCR_EL1 sys_reg(2, 0, 1, 4, 4)
#define SYS_DBGCLAIMSET_EL1 sys_reg(2, 0, 7, 8, 6)
@@ -126,56 +194,94 @@
#define SYS_DBGDTRTX_EL0 sys_reg(2, 3, 0, 5, 0)
#define SYS_DBGVCR32_EL2 sys_reg(2, 4, 0, 7, 0)
+#define SYS_BRBINF_EL1(n) sys_reg(2, 1, 8, (n & 15), (((n & 16) >> 2) | 0))
+#define SYS_BRBINFINJ_EL1 sys_reg(2, 1, 9, 1, 0)
+#define SYS_BRBSRC_EL1(n) sys_reg(2, 1, 8, (n & 15), (((n & 16) >> 2) | 1))
+#define SYS_BRBSRCINJ_EL1 sys_reg(2, 1, 9, 1, 1)
+#define SYS_BRBTGT_EL1(n) sys_reg(2, 1, 8, (n & 15), (((n & 16) >> 2) | 2))
+#define SYS_BRBTGTINJ_EL1 sys_reg(2, 1, 9, 1, 2)
+#define SYS_BRBTS_EL1 sys_reg(2, 1, 9, 0, 2)
+
+#define SYS_BRBCR_EL1 sys_reg(2, 1, 9, 0, 0)
+#define SYS_BRBFCR_EL1 sys_reg(2, 1, 9, 0, 1)
+#define SYS_BRBIDR0_EL1 sys_reg(2, 1, 9, 2, 0)
+
+#define SYS_TRCITECR_EL1 sys_reg(3, 0, 1, 2, 3)
+#define SYS_TRCACATR(m) sys_reg(2, 1, 2, ((m & 7) << 1), (2 | (m >> 3)))
+#define SYS_TRCACVR(m) sys_reg(2, 1, 2, ((m & 7) << 1), (0 | (m >> 3)))
+#define SYS_TRCAUTHSTATUS sys_reg(2, 1, 7, 14, 6)
+#define SYS_TRCAUXCTLR sys_reg(2, 1, 0, 6, 0)
+#define SYS_TRCBBCTLR sys_reg(2, 1, 0, 15, 0)
+#define SYS_TRCCCCTLR sys_reg(2, 1, 0, 14, 0)
+#define SYS_TRCCIDCCTLR0 sys_reg(2, 1, 3, 0, 2)
+#define SYS_TRCCIDCCTLR1 sys_reg(2, 1, 3, 1, 2)
+#define SYS_TRCCIDCVR(m) sys_reg(2, 1, 3, ((m & 7) << 1), 0)
+#define SYS_TRCCLAIMCLR sys_reg(2, 1, 7, 9, 6)
+#define SYS_TRCCLAIMSET sys_reg(2, 1, 7, 8, 6)
+#define SYS_TRCCNTCTLR(m) sys_reg(2, 1, 0, (4 | (m & 3)), 5)
+#define SYS_TRCCNTRLDVR(m) sys_reg(2, 1, 0, (0 | (m & 3)), 5)
+#define SYS_TRCCNTVR(m) sys_reg(2, 1, 0, (8 | (m & 3)), 5)
+#define SYS_TRCCONFIGR sys_reg(2, 1, 0, 4, 0)
+#define SYS_TRCDEVARCH sys_reg(2, 1, 7, 15, 6)
+#define SYS_TRCDEVID sys_reg(2, 1, 7, 2, 7)
+#define SYS_TRCEVENTCTL0R sys_reg(2, 1, 0, 8, 0)
+#define SYS_TRCEVENTCTL1R sys_reg(2, 1, 0, 9, 0)
+#define SYS_TRCEXTINSELR(m) sys_reg(2, 1, 0, (8 | (m & 3)), 4)
+#define SYS_TRCIDR0 sys_reg(2, 1, 0, 8, 7)
+#define SYS_TRCIDR10 sys_reg(2, 1, 0, 2, 6)
+#define SYS_TRCIDR11 sys_reg(2, 1, 0, 3, 6)
+#define SYS_TRCIDR12 sys_reg(2, 1, 0, 4, 6)
+#define SYS_TRCIDR13 sys_reg(2, 1, 0, 5, 6)
+#define SYS_TRCIDR1 sys_reg(2, 1, 0, 9, 7)
+#define SYS_TRCIDR2 sys_reg(2, 1, 0, 10, 7)
+#define SYS_TRCIDR3 sys_reg(2, 1, 0, 11, 7)
+#define SYS_TRCIDR4 sys_reg(2, 1, 0, 12, 7)
+#define SYS_TRCIDR5 sys_reg(2, 1, 0, 13, 7)
+#define SYS_TRCIDR6 sys_reg(2, 1, 0, 14, 7)
+#define SYS_TRCIDR7 sys_reg(2, 1, 0, 15, 7)
+#define SYS_TRCIDR8 sys_reg(2, 1, 0, 0, 6)
+#define SYS_TRCIDR9 sys_reg(2, 1, 0, 1, 6)
+#define SYS_TRCIMSPEC(m) sys_reg(2, 1, 0, (m & 7), 7)
+#define SYS_TRCITEEDCR sys_reg(2, 1, 0, 2, 1)
+#define SYS_TRCOSLSR sys_reg(2, 1, 1, 1, 4)
+#define SYS_TRCPRGCTLR sys_reg(2, 1, 0, 1, 0)
+#define SYS_TRCQCTLR sys_reg(2, 1, 0, 1, 1)
+#define SYS_TRCRSCTLR(m) sys_reg(2, 1, 1, (m & 15), (0 | (m >> 4)))
+#define SYS_TRCRSR sys_reg(2, 1, 0, 10, 0)
+#define SYS_TRCSEQEVR(m) sys_reg(2, 1, 0, (m & 3), 4)
+#define SYS_TRCSEQRSTEVR sys_reg(2, 1, 0, 6, 4)
+#define SYS_TRCSEQSTR sys_reg(2, 1, 0, 7, 4)
+#define SYS_TRCSSCCR(m) sys_reg(2, 1, 1, (m & 7), 2)
+#define SYS_TRCSSCSR(m) sys_reg(2, 1, 1, (8 | (m & 7)), 2)
+#define SYS_TRCSSPCICR(m) sys_reg(2, 1, 1, (m & 7), 3)
+#define SYS_TRCSTALLCTLR sys_reg(2, 1, 0, 11, 0)
+#define SYS_TRCSTATR sys_reg(2, 1, 0, 3, 0)
+#define SYS_TRCSYNCPR sys_reg(2, 1, 0, 13, 0)
+#define SYS_TRCTRACEIDR sys_reg(2, 1, 0, 0, 1)
+#define SYS_TRCTSCTLR sys_reg(2, 1, 0, 12, 0)
+#define SYS_TRCVICTLR sys_reg(2, 1, 0, 0, 2)
+#define SYS_TRCVIIECTLR sys_reg(2, 1, 0, 1, 2)
+#define SYS_TRCVIPCSSCTLR sys_reg(2, 1, 0, 3, 2)
+#define SYS_TRCVISSCTLR sys_reg(2, 1, 0, 2, 2)
+#define SYS_TRCVMIDCCTLR0 sys_reg(2, 1, 3, 2, 2)
+#define SYS_TRCVMIDCCTLR1 sys_reg(2, 1, 3, 3, 2)
+#define SYS_TRCVMIDCVR(m) sys_reg(2, 1, 3, ((m & 7) << 1), 1)
+
+/* ETM */
+#define SYS_TRCOSLAR sys_reg(2, 1, 1, 0, 4)
+
+#define SYS_BRBCR_EL2 sys_reg(2, 4, 9, 0, 0)
+
#define SYS_MIDR_EL1 sys_reg(3, 0, 0, 0, 0)
#define SYS_MPIDR_EL1 sys_reg(3, 0, 0, 0, 5)
#define SYS_REVIDR_EL1 sys_reg(3, 0, 0, 0, 6)
-#define SYS_ID_PFR0_EL1 sys_reg(3, 0, 0, 1, 0)
-#define SYS_ID_PFR1_EL1 sys_reg(3, 0, 0, 1, 1)
-#define SYS_ID_DFR0_EL1 sys_reg(3, 0, 0, 1, 2)
-#define SYS_ID_AFR0_EL1 sys_reg(3, 0, 0, 1, 3)
-#define SYS_ID_MMFR0_EL1 sys_reg(3, 0, 0, 1, 4)
-#define SYS_ID_MMFR1_EL1 sys_reg(3, 0, 0, 1, 5)
-#define SYS_ID_MMFR2_EL1 sys_reg(3, 0, 0, 1, 6)
-#define SYS_ID_MMFR3_EL1 sys_reg(3, 0, 0, 1, 7)
-
-#define SYS_ID_ISAR0_EL1 sys_reg(3, 0, 0, 2, 0)
-#define SYS_ID_ISAR1_EL1 sys_reg(3, 0, 0, 2, 1)
-#define SYS_ID_ISAR2_EL1 sys_reg(3, 0, 0, 2, 2)
-#define SYS_ID_ISAR3_EL1 sys_reg(3, 0, 0, 2, 3)
-#define SYS_ID_ISAR4_EL1 sys_reg(3, 0, 0, 2, 4)
-#define SYS_ID_ISAR5_EL1 sys_reg(3, 0, 0, 2, 5)
-#define SYS_ID_MMFR4_EL1 sys_reg(3, 0, 0, 2, 6)
-
-#define SYS_MVFR0_EL1 sys_reg(3, 0, 0, 3, 0)
-#define SYS_MVFR1_EL1 sys_reg(3, 0, 0, 3, 1)
-#define SYS_MVFR2_EL1 sys_reg(3, 0, 0, 3, 2)
-
-#define SYS_ID_AA64PFR0_EL1 sys_reg(3, 0, 0, 4, 0)
-#define SYS_ID_AA64PFR1_EL1 sys_reg(3, 0, 0, 4, 1)
-#define SYS_ID_AA64ZFR0_EL1 sys_reg(3, 0, 0, 4, 4)
-
-#define SYS_ID_AA64DFR0_EL1 sys_reg(3, 0, 0, 5, 0)
-#define SYS_ID_AA64DFR1_EL1 sys_reg(3, 0, 0, 5, 1)
-
-#define SYS_ID_AA64AFR0_EL1 sys_reg(3, 0, 0, 5, 4)
-#define SYS_ID_AA64AFR1_EL1 sys_reg(3, 0, 0, 5, 5)
-
-#define SYS_ID_AA64ISAR0_EL1 sys_reg(3, 0, 0, 6, 0)
-#define SYS_ID_AA64ISAR1_EL1 sys_reg(3, 0, 0, 6, 1)
-
-#define SYS_ID_AA64MMFR0_EL1 sys_reg(3, 0, 0, 7, 0)
-#define SYS_ID_AA64MMFR1_EL1 sys_reg(3, 0, 0, 7, 1)
-#define SYS_ID_AA64MMFR2_EL1 sys_reg(3, 0, 0, 7, 2)
-
-#define SYS_SCTLR_EL1 sys_reg(3, 0, 1, 0, 0)
#define SYS_ACTLR_EL1 sys_reg(3, 0, 1, 0, 1)
-#define SYS_CPACR_EL1 sys_reg(3, 0, 1, 0, 2)
+#define SYS_RGSR_EL1 sys_reg(3, 0, 1, 0, 5)
+#define SYS_GCR_EL1 sys_reg(3, 0, 1, 0, 6)
-#define SYS_ZCR_EL1 sys_reg(3, 0, 1, 2, 0)
+#define SYS_TRFCR_EL1 sys_reg(3, 0, 1, 2, 1)
-#define SYS_TTBR0_EL1 sys_reg(3, 0, 2, 0, 0)
-#define SYS_TTBR1_EL1 sys_reg(3, 0, 2, 0, 1)
#define SYS_TCR_EL1 sys_reg(3, 0, 2, 0, 2)
#define SYS_APIAKEYLO_EL1 sys_reg(3, 0, 2, 1, 0)
@@ -206,120 +312,52 @@
#define SYS_ERXCTLR_EL1 sys_reg(3, 0, 5, 4, 1)
#define SYS_ERXSTATUS_EL1 sys_reg(3, 0, 5, 4, 2)
#define SYS_ERXADDR_EL1 sys_reg(3, 0, 5, 4, 3)
+#define SYS_ERXPFGF_EL1 sys_reg(3, 0, 5, 4, 4)
+#define SYS_ERXPFGCTL_EL1 sys_reg(3, 0, 5, 4, 5)
+#define SYS_ERXPFGCDN_EL1 sys_reg(3, 0, 5, 4, 6)
#define SYS_ERXMISC0_EL1 sys_reg(3, 0, 5, 5, 0)
#define SYS_ERXMISC1_EL1 sys_reg(3, 0, 5, 5, 1)
+#define SYS_ERXMISC2_EL1 sys_reg(3, 0, 5, 5, 2)
+#define SYS_ERXMISC3_EL1 sys_reg(3, 0, 5, 5, 3)
+#define SYS_TFSR_EL1 sys_reg(3, 0, 5, 6, 0)
+#define SYS_TFSRE0_EL1 sys_reg(3, 0, 5, 6, 1)
-#define SYS_FAR_EL1 sys_reg(3, 0, 6, 0, 0)
#define SYS_PAR_EL1 sys_reg(3, 0, 7, 4, 0)
#define SYS_PAR_EL1_F BIT(0)
#define SYS_PAR_EL1_FST GENMASK(6, 1)
/*** Statistical Profiling Extension ***/
-/* ID registers */
-#define SYS_PMSIDR_EL1 sys_reg(3, 0, 9, 9, 7)
-#define SYS_PMSIDR_EL1_FE_SHIFT 0
-#define SYS_PMSIDR_EL1_FT_SHIFT 1
-#define SYS_PMSIDR_EL1_FL_SHIFT 2
-#define SYS_PMSIDR_EL1_ARCHINST_SHIFT 3
-#define SYS_PMSIDR_EL1_LDS_SHIFT 4
-#define SYS_PMSIDR_EL1_ERND_SHIFT 5
-#define SYS_PMSIDR_EL1_INTERVAL_SHIFT 8
-#define SYS_PMSIDR_EL1_INTERVAL_MASK 0xfUL
-#define SYS_PMSIDR_EL1_MAXSIZE_SHIFT 12
-#define SYS_PMSIDR_EL1_MAXSIZE_MASK 0xfUL
-#define SYS_PMSIDR_EL1_COUNTSIZE_SHIFT 16
-#define SYS_PMSIDR_EL1_COUNTSIZE_MASK 0xfUL
-
-#define SYS_PMBIDR_EL1 sys_reg(3, 0, 9, 10, 7)
-#define SYS_PMBIDR_EL1_ALIGN_SHIFT 0
-#define SYS_PMBIDR_EL1_ALIGN_MASK 0xfU
-#define SYS_PMBIDR_EL1_P_SHIFT 4
-#define SYS_PMBIDR_EL1_F_SHIFT 5
-
-/* Sampling controls */
-#define SYS_PMSCR_EL1 sys_reg(3, 0, 9, 9, 0)
-#define SYS_PMSCR_EL1_E0SPE_SHIFT 0
-#define SYS_PMSCR_EL1_E1SPE_SHIFT 1
-#define SYS_PMSCR_EL1_CX_SHIFT 3
-#define SYS_PMSCR_EL1_PA_SHIFT 4
-#define SYS_PMSCR_EL1_TS_SHIFT 5
-#define SYS_PMSCR_EL1_PCT_SHIFT 6
-
-#define SYS_PMSCR_EL2 sys_reg(3, 4, 9, 9, 0)
-#define SYS_PMSCR_EL2_E0HSPE_SHIFT 0
-#define SYS_PMSCR_EL2_E2SPE_SHIFT 1
-#define SYS_PMSCR_EL2_CX_SHIFT 3
-#define SYS_PMSCR_EL2_PA_SHIFT 4
-#define SYS_PMSCR_EL2_TS_SHIFT 5
-#define SYS_PMSCR_EL2_PCT_SHIFT 6
-
-#define SYS_PMSICR_EL1 sys_reg(3, 0, 9, 9, 2)
-
-#define SYS_PMSIRR_EL1 sys_reg(3, 0, 9, 9, 3)
-#define SYS_PMSIRR_EL1_RND_SHIFT 0
-#define SYS_PMSIRR_EL1_INTERVAL_SHIFT 8
-#define SYS_PMSIRR_EL1_INTERVAL_MASK 0xffffffUL
-
-/* Filtering controls */
-#define SYS_PMSFCR_EL1 sys_reg(3, 0, 9, 9, 4)
-#define SYS_PMSFCR_EL1_FE_SHIFT 0
-#define SYS_PMSFCR_EL1_FT_SHIFT 1
-#define SYS_PMSFCR_EL1_FL_SHIFT 2
-#define SYS_PMSFCR_EL1_B_SHIFT 16
-#define SYS_PMSFCR_EL1_LD_SHIFT 17
-#define SYS_PMSFCR_EL1_ST_SHIFT 18
-
-#define SYS_PMSEVFR_EL1 sys_reg(3, 0, 9, 9, 5)
-#define SYS_PMSEVFR_EL1_RES0 0x0000ffff00ff0f55UL
-
-#define SYS_PMSLATFR_EL1 sys_reg(3, 0, 9, 9, 6)
-#define SYS_PMSLATFR_EL1_MINLAT_SHIFT 0
-
-/* Buffer controls */
-#define SYS_PMBLIMITR_EL1 sys_reg(3, 0, 9, 10, 0)
-#define SYS_PMBLIMITR_EL1_E_SHIFT 0
-#define SYS_PMBLIMITR_EL1_FM_SHIFT 1
-#define SYS_PMBLIMITR_EL1_FM_MASK 0x3UL
-#define SYS_PMBLIMITR_EL1_FM_STOP_IRQ (0 << SYS_PMBLIMITR_EL1_FM_SHIFT)
-
-#define SYS_PMBPTR_EL1 sys_reg(3, 0, 9, 10, 1)
+#define PMSEVFR_EL1_RES0_IMP \
+ (GENMASK_ULL(47, 32) | GENMASK_ULL(23, 16) | GENMASK_ULL(11, 8) |\
+ BIT_ULL(6) | BIT_ULL(4) | BIT_ULL(2) | BIT_ULL(0))
+#define PMSEVFR_EL1_RES0_V1P1 \
+ (PMSEVFR_EL1_RES0_IMP & ~(BIT_ULL(18) | BIT_ULL(17) | BIT_ULL(11)))
+#define PMSEVFR_EL1_RES0_V1P2 \
+ (PMSEVFR_EL1_RES0_V1P1 & ~BIT_ULL(6))
/* Buffer error reporting */
-#define SYS_PMBSR_EL1 sys_reg(3, 0, 9, 10, 3)
-#define SYS_PMBSR_EL1_COLL_SHIFT 16
-#define SYS_PMBSR_EL1_S_SHIFT 17
-#define SYS_PMBSR_EL1_EA_SHIFT 18
-#define SYS_PMBSR_EL1_DL_SHIFT 19
-#define SYS_PMBSR_EL1_EC_SHIFT 26
-#define SYS_PMBSR_EL1_EC_MASK 0x3fUL
+#define PMBSR_EL1_FAULT_FSC_SHIFT PMBSR_EL1_MSS_SHIFT
+#define PMBSR_EL1_FAULT_FSC_MASK PMBSR_EL1_MSS_MASK
-#define SYS_PMBSR_EL1_EC_BUF (0x0UL << SYS_PMBSR_EL1_EC_SHIFT)
-#define SYS_PMBSR_EL1_EC_FAULT_S1 (0x24UL << SYS_PMBSR_EL1_EC_SHIFT)
-#define SYS_PMBSR_EL1_EC_FAULT_S2 (0x25UL << SYS_PMBSR_EL1_EC_SHIFT)
+#define PMBSR_EL1_BUF_BSC_SHIFT PMBSR_EL1_MSS_SHIFT
+#define PMBSR_EL1_BUF_BSC_MASK PMBSR_EL1_MSS_MASK
-#define SYS_PMBSR_EL1_FAULT_FSC_SHIFT 0
-#define SYS_PMBSR_EL1_FAULT_FSC_MASK 0x3fUL
-
-#define SYS_PMBSR_EL1_BUF_BSC_SHIFT 0
-#define SYS_PMBSR_EL1_BUF_BSC_MASK 0x3fUL
-
-#define SYS_PMBSR_EL1_BUF_BSC_FULL (0x1UL << SYS_PMBSR_EL1_BUF_BSC_SHIFT)
+#define PMBSR_EL1_BUF_BSC_FULL 0x1UL
/*** End of Statistical Profiling Extension ***/
+#define TRBSR_EL1_BSC_MASK GENMASK(5, 0)
+#define TRBSR_EL1_BSC_SHIFT 0
+
#define SYS_PMINTENSET_EL1 sys_reg(3, 0, 9, 14, 1)
#define SYS_PMINTENCLR_EL1 sys_reg(3, 0, 9, 14, 2)
+#define SYS_PMMIR_EL1 sys_reg(3, 0, 9, 14, 6)
+
#define SYS_MAIR_EL1 sys_reg(3, 0, 10, 2, 0)
#define SYS_AMAIR_EL1 sys_reg(3, 0, 10, 3, 0)
-#define SYS_LORSA_EL1 sys_reg(3, 0, 10, 4, 0)
-#define SYS_LOREA_EL1 sys_reg(3, 0, 10, 4, 1)
-#define SYS_LORN_EL1 sys_reg(3, 0, 10, 4, 2)
-#define SYS_LORC_EL1 sys_reg(3, 0, 10, 4, 3)
-#define SYS_LORID_EL1 sys_reg(3, 0, 10, 4, 7)
-
#define SYS_VBAR_EL1 sys_reg(3, 0, 12, 0, 0)
#define SYS_DISR_EL1 sys_reg(3, 0, 12, 1, 1)
@@ -351,19 +389,14 @@
#define SYS_ICC_IGRPEN0_EL1 sys_reg(3, 0, 12, 12, 6)
#define SYS_ICC_IGRPEN1_EL1 sys_reg(3, 0, 12, 12, 7)
-#define SYS_CONTEXTIDR_EL1 sys_reg(3, 0, 13, 0, 1)
-#define SYS_TPIDR_EL1 sys_reg(3, 0, 13, 0, 4)
+#define SYS_ACCDATA_EL1 sys_reg(3, 0, 13, 0, 5)
#define SYS_CNTKCTL_EL1 sys_reg(3, 0, 14, 1, 0)
-#define SYS_CCSIDR_EL1 sys_reg(3, 1, 0, 0, 0)
-#define SYS_CLIDR_EL1 sys_reg(3, 1, 0, 0, 1)
#define SYS_AIDR_EL1 sys_reg(3, 1, 0, 0, 7)
-#define SYS_CSSELR_EL1 sys_reg(3, 2, 0, 0, 0)
-
-#define SYS_CTR_EL0 sys_reg(3, 3, 0, 0, 1)
-#define SYS_DCZID_EL0 sys_reg(3, 3, 0, 0, 7)
+#define SYS_RNDR_EL0 sys_reg(3, 3, 2, 4, 0)
+#define SYS_RNDRRS_EL0 sys_reg(3, 3, 2, 4, 1)
#define SYS_PMCR_EL0 sys_reg(3, 3, 9, 12, 0)
#define SYS_PMCNTENSET_EL0 sys_reg(3, 3, 9, 12, 1)
@@ -381,9 +414,52 @@
#define SYS_TPIDR_EL0 sys_reg(3, 3, 13, 0, 2)
#define SYS_TPIDRRO_EL0 sys_reg(3, 3, 13, 0, 3)
+#define SYS_TPIDR2_EL0 sys_reg(3, 3, 13, 0, 5)
+
+#define SYS_SCXTNUM_EL0 sys_reg(3, 3, 13, 0, 7)
+
+/* Definitions for system register interface to AMU for ARMv8.4 onwards */
+#define SYS_AM_EL0(crm, op2) sys_reg(3, 3, 13, (crm), (op2))
+#define SYS_AMCR_EL0 SYS_AM_EL0(2, 0)
+#define SYS_AMCFGR_EL0 SYS_AM_EL0(2, 1)
+#define SYS_AMCGCR_EL0 SYS_AM_EL0(2, 2)
+#define SYS_AMUSERENR_EL0 SYS_AM_EL0(2, 3)
+#define SYS_AMCNTENCLR0_EL0 SYS_AM_EL0(2, 4)
+#define SYS_AMCNTENSET0_EL0 SYS_AM_EL0(2, 5)
+#define SYS_AMCNTENCLR1_EL0 SYS_AM_EL0(3, 0)
+#define SYS_AMCNTENSET1_EL0 SYS_AM_EL0(3, 1)
+
+/*
+ * Group 0 of activity monitors (architected):
+ * op0 op1 CRn CRm op2
+ * Counter: 11 011 1101 010:n<3> n<2:0>
+ * Type: 11 011 1101 011:n<3> n<2:0>
+ * n: 0-15
+ *
+ * Group 1 of activity monitors (auxiliary):
+ * op0 op1 CRn CRm op2
+ * Counter: 11 011 1101 110:n<3> n<2:0>
+ * Type: 11 011 1101 111:n<3> n<2:0>
+ * n: 0-15
+ */
+
+#define SYS_AMEVCNTR0_EL0(n) SYS_AM_EL0(4 + ((n) >> 3), (n) & 7)
+#define SYS_AMEVTYPER0_EL0(n) SYS_AM_EL0(6 + ((n) >> 3), (n) & 7)
+#define SYS_AMEVCNTR1_EL0(n) SYS_AM_EL0(12 + ((n) >> 3), (n) & 7)
+#define SYS_AMEVTYPER1_EL0(n) SYS_AM_EL0(14 + ((n) >> 3), (n) & 7)
+
+/* AMU v1: Fixed (architecturally defined) activity monitors */
+#define SYS_AMEVCNTR0_CORE_EL0 SYS_AMEVCNTR0_EL0(0)
+#define SYS_AMEVCNTR0_CONST_EL0 SYS_AMEVCNTR0_EL0(1)
+#define SYS_AMEVCNTR0_INST_RET_EL0 SYS_AMEVCNTR0_EL0(2)
+#define SYS_AMEVCNTR0_MEM_STALL SYS_AMEVCNTR0_EL0(3)
#define SYS_CNTFRQ_EL0 sys_reg(3, 3, 14, 0, 0)
+#define SYS_CNTPCT_EL0 sys_reg(3, 3, 14, 0, 1)
+#define SYS_CNTPCTSS_EL0 sys_reg(3, 3, 14, 0, 5)
+#define SYS_CNTVCTSS_EL0 sys_reg(3, 3, 14, 0, 6)
+
#define SYS_CNTP_TVAL_EL0 sys_reg(3, 3, 14, 2, 0)
#define SYS_CNTP_CTL_EL0 sys_reg(3, 3, 14, 2, 1)
#define SYS_CNTP_CVAL_EL0 sys_reg(3, 3, 14, 2, 2)
@@ -393,7 +469,9 @@
#define SYS_AARCH32_CNTP_TVAL sys_reg(0, 0, 14, 2, 0)
#define SYS_AARCH32_CNTP_CTL sys_reg(0, 0, 14, 2, 1)
+#define SYS_AARCH32_CNTPCT sys_reg(0, 0, 0, 14, 0)
#define SYS_AARCH32_CNTP_CVAL sys_reg(0, 2, 0, 14, 0)
+#define SYS_AARCH32_CNTPCTSS sys_reg(0, 8, 0, 14, 0)
#define __PMEV_op2(n) ((n) & 0x7)
#define __CNTR_CRm(n) (0x8 | (((n) >> 3) & 0x3))
@@ -403,17 +481,64 @@
#define SYS_PMCCFILTR_EL0 sys_reg(3, 3, 14, 15, 7)
-#define SYS_ZCR_EL2 sys_reg(3, 4, 1, 2, 0)
-#define SYS_DACR32_EL2 sys_reg(3, 4, 3, 0, 0)
+#define SYS_VPIDR_EL2 sys_reg(3, 4, 0, 0, 0)
+#define SYS_VMPIDR_EL2 sys_reg(3, 4, 0, 0, 5)
+
+#define SYS_SCTLR_EL2 sys_reg(3, 4, 1, 0, 0)
+#define SYS_ACTLR_EL2 sys_reg(3, 4, 1, 0, 1)
+#define SYS_SCTLR2_EL2 sys_reg(3, 4, 1, 0, 3)
+#define SYS_HCR_EL2 sys_reg(3, 4, 1, 1, 0)
+#define SYS_MDCR_EL2 sys_reg(3, 4, 1, 1, 1)
+#define SYS_CPTR_EL2 sys_reg(3, 4, 1, 1, 2)
+#define SYS_HSTR_EL2 sys_reg(3, 4, 1, 1, 3)
+#define SYS_HACR_EL2 sys_reg(3, 4, 1, 1, 7)
+
+#define SYS_TTBR0_EL2 sys_reg(3, 4, 2, 0, 0)
+#define SYS_TTBR1_EL2 sys_reg(3, 4, 2, 0, 1)
+#define SYS_TCR_EL2 sys_reg(3, 4, 2, 0, 2)
+#define SYS_VTTBR_EL2 sys_reg(3, 4, 2, 1, 0)
+#define SYS_VTCR_EL2 sys_reg(3, 4, 2, 1, 2)
+
+#define SYS_TRFCR_EL2 sys_reg(3, 4, 1, 2, 1)
+#define SYS_VNCR_EL2 sys_reg(3, 4, 2, 2, 0)
+#define SYS_HAFGRTR_EL2 sys_reg(3, 4, 3, 1, 6)
#define SYS_SPSR_EL2 sys_reg(3, 4, 4, 0, 0)
#define SYS_ELR_EL2 sys_reg(3, 4, 4, 0, 1)
+#define SYS_SP_EL1 sys_reg(3, 4, 4, 1, 0)
+#define SYS_SPSR_irq sys_reg(3, 4, 4, 3, 0)
+#define SYS_SPSR_abt sys_reg(3, 4, 4, 3, 1)
+#define SYS_SPSR_und sys_reg(3, 4, 4, 3, 2)
+#define SYS_SPSR_fiq sys_reg(3, 4, 4, 3, 3)
#define SYS_IFSR32_EL2 sys_reg(3, 4, 5, 0, 1)
+#define SYS_AFSR0_EL2 sys_reg(3, 4, 5, 1, 0)
+#define SYS_AFSR1_EL2 sys_reg(3, 4, 5, 1, 1)
#define SYS_ESR_EL2 sys_reg(3, 4, 5, 2, 0)
#define SYS_VSESR_EL2 sys_reg(3, 4, 5, 2, 3)
#define SYS_FPEXC32_EL2 sys_reg(3, 4, 5, 3, 0)
-#define SYS_FAR_EL2 sys_reg(3, 4, 6, 0, 0)
+#define SYS_TFSR_EL2 sys_reg(3, 4, 5, 6, 0)
-#define SYS_VDISR_EL2 sys_reg(3, 4, 12, 1, 1)
+#define SYS_FAR_EL2 sys_reg(3, 4, 6, 0, 0)
+#define SYS_HPFAR_EL2 sys_reg(3, 4, 6, 0, 4)
+
+#define SYS_MAIR_EL2 sys_reg(3, 4, 10, 2, 0)
+#define SYS_AMAIR_EL2 sys_reg(3, 4, 10, 3, 0)
+#define SYS_MPAMHCR_EL2 sys_reg(3, 4, 10, 4, 0)
+#define SYS_MPAMVPMV_EL2 sys_reg(3, 4, 10, 4, 1)
+#define SYS_MPAM2_EL2 sys_reg(3, 4, 10, 5, 0)
+#define __SYS__MPAMVPMx_EL2(x) sys_reg(3, 4, 10, 6, x)
+#define SYS_MPAMVPM0_EL2 __SYS__MPAMVPMx_EL2(0)
+#define SYS_MPAMVPM1_EL2 __SYS__MPAMVPMx_EL2(1)
+#define SYS_MPAMVPM2_EL2 __SYS__MPAMVPMx_EL2(2)
+#define SYS_MPAMVPM3_EL2 __SYS__MPAMVPMx_EL2(3)
+#define SYS_MPAMVPM4_EL2 __SYS__MPAMVPMx_EL2(4)
+#define SYS_MPAMVPM5_EL2 __SYS__MPAMVPMx_EL2(5)
+#define SYS_MPAMVPM6_EL2 __SYS__MPAMVPMx_EL2(6)
+#define SYS_MPAMVPM7_EL2 __SYS__MPAMVPMx_EL2(7)
+
+#define SYS_VBAR_EL2 sys_reg(3, 4, 12, 0, 0)
+#define SYS_RVBAR_EL2 sys_reg(3, 4, 12, 0, 1)
+#define SYS_RMR_EL2 sys_reg(3, 4, 12, 0, 2)
+#define SYS_VDISR_EL2 sys_reg(3, 4, 12, 1, 1)
#define __SYS__AP0Rx_EL2(x) sys_reg(3, 4, 12, 8, x)
#define SYS_ICH_AP0R0_EL2 __SYS__AP0Rx_EL2(0)
#define SYS_ICH_AP0R1_EL2 __SYS__AP0Rx_EL2(1)
@@ -455,23 +580,51 @@
#define SYS_ICH_LR14_EL2 __SYS__LR8_EL2(6)
#define SYS_ICH_LR15_EL2 __SYS__LR8_EL2(7)
+#define SYS_CONTEXTIDR_EL2 sys_reg(3, 4, 13, 0, 1)
+#define SYS_TPIDR_EL2 sys_reg(3, 4, 13, 0, 2)
+#define SYS_SCXTNUM_EL2 sys_reg(3, 4, 13, 0, 7)
+
+#define __AMEV_op2(m) (m & 0x7)
+#define __AMEV_CRm(n, m) (n | ((m & 0x8) >> 3))
+#define __SYS__AMEVCNTVOFF0n_EL2(m) sys_reg(3, 4, 13, __AMEV_CRm(0x8, m), __AMEV_op2(m))
+#define SYS_AMEVCNTVOFF0n_EL2(m) __SYS__AMEVCNTVOFF0n_EL2(m)
+#define __SYS__AMEVCNTVOFF1n_EL2(m) sys_reg(3, 4, 13, __AMEV_CRm(0xA, m), __AMEV_op2(m))
+#define SYS_AMEVCNTVOFF1n_EL2(m) __SYS__AMEVCNTVOFF1n_EL2(m)
+
+#define SYS_CNTVOFF_EL2 sys_reg(3, 4, 14, 0, 3)
+#define SYS_CNTHCTL_EL2 sys_reg(3, 4, 14, 1, 0)
+#define SYS_CNTHP_TVAL_EL2 sys_reg(3, 4, 14, 2, 0)
+#define SYS_CNTHP_CTL_EL2 sys_reg(3, 4, 14, 2, 1)
+#define SYS_CNTHP_CVAL_EL2 sys_reg(3, 4, 14, 2, 2)
+#define SYS_CNTHV_TVAL_EL2 sys_reg(3, 4, 14, 3, 0)
+#define SYS_CNTHV_CTL_EL2 sys_reg(3, 4, 14, 3, 1)
+#define SYS_CNTHV_CVAL_EL2 sys_reg(3, 4, 14, 3, 2)
+
/* VHE encodings for architectural EL0/1 system registers */
+#define SYS_BRBCR_EL12 sys_reg(2, 5, 9, 0, 0)
#define SYS_SCTLR_EL12 sys_reg(3, 5, 1, 0, 0)
#define SYS_CPACR_EL12 sys_reg(3, 5, 1, 0, 2)
+#define SYS_SCTLR2_EL12 sys_reg(3, 5, 1, 0, 3)
#define SYS_ZCR_EL12 sys_reg(3, 5, 1, 2, 0)
+#define SYS_TRFCR_EL12 sys_reg(3, 5, 1, 2, 1)
+#define SYS_SMCR_EL12 sys_reg(3, 5, 1, 2, 6)
#define SYS_TTBR0_EL12 sys_reg(3, 5, 2, 0, 0)
#define SYS_TTBR1_EL12 sys_reg(3, 5, 2, 0, 1)
#define SYS_TCR_EL12 sys_reg(3, 5, 2, 0, 2)
+#define SYS_TCR2_EL12 sys_reg(3, 5, 2, 0, 3)
#define SYS_SPSR_EL12 sys_reg(3, 5, 4, 0, 0)
#define SYS_ELR_EL12 sys_reg(3, 5, 4, 0, 1)
#define SYS_AFSR0_EL12 sys_reg(3, 5, 5, 1, 0)
#define SYS_AFSR1_EL12 sys_reg(3, 5, 5, 1, 1)
#define SYS_ESR_EL12 sys_reg(3, 5, 5, 2, 0)
+#define SYS_TFSR_EL12 sys_reg(3, 5, 5, 6, 0)
#define SYS_FAR_EL12 sys_reg(3, 5, 6, 0, 0)
+#define SYS_PMSCR_EL12 sys_reg(3, 5, 9, 9, 0)
#define SYS_MAIR_EL12 sys_reg(3, 5, 10, 2, 0)
#define SYS_AMAIR_EL12 sys_reg(3, 5, 10, 3, 0)
#define SYS_VBAR_EL12 sys_reg(3, 5, 12, 0, 0)
#define SYS_CONTEXTIDR_EL12 sys_reg(3, 5, 13, 0, 1)
+#define SYS_SCXTNUM_EL12 sys_reg(3, 5, 13, 0, 7)
#define SYS_CNTKCTL_EL12 sys_reg(3, 5, 14, 1, 0)
#define SYS_CNTP_TVAL_EL02 sys_reg(3, 5, 14, 2, 0)
#define SYS_CNTP_CTL_EL02 sys_reg(3, 5, 14, 2, 1)
@@ -480,300 +633,469 @@
#define SYS_CNTV_CTL_EL02 sys_reg(3, 5, 14, 3, 1)
#define SYS_CNTV_CVAL_EL02 sys_reg(3, 5, 14, 3, 2)
+#define SYS_SP_EL2 sys_reg(3, 6, 4, 1, 0)
+
+/* AT instructions */
+#define AT_Op0 1
+#define AT_CRn 7
+
+#define OP_AT_S1E1R sys_insn(AT_Op0, 0, AT_CRn, 8, 0)
+#define OP_AT_S1E1W sys_insn(AT_Op0, 0, AT_CRn, 8, 1)
+#define OP_AT_S1E0R sys_insn(AT_Op0, 0, AT_CRn, 8, 2)
+#define OP_AT_S1E0W sys_insn(AT_Op0, 0, AT_CRn, 8, 3)
+#define OP_AT_S1E1RP sys_insn(AT_Op0, 0, AT_CRn, 9, 0)
+#define OP_AT_S1E1WP sys_insn(AT_Op0, 0, AT_CRn, 9, 1)
+#define OP_AT_S1E1A sys_insn(AT_Op0, 0, AT_CRn, 9, 2)
+#define OP_AT_S1E2R sys_insn(AT_Op0, 4, AT_CRn, 8, 0)
+#define OP_AT_S1E2W sys_insn(AT_Op0, 4, AT_CRn, 8, 1)
+#define OP_AT_S12E1R sys_insn(AT_Op0, 4, AT_CRn, 8, 4)
+#define OP_AT_S12E1W sys_insn(AT_Op0, 4, AT_CRn, 8, 5)
+#define OP_AT_S12E0R sys_insn(AT_Op0, 4, AT_CRn, 8, 6)
+#define OP_AT_S12E0W sys_insn(AT_Op0, 4, AT_CRn, 8, 7)
+
+/* TLBI instructions */
+#define OP_TLBI_VMALLE1OS sys_insn(1, 0, 8, 1, 0)
+#define OP_TLBI_VAE1OS sys_insn(1, 0, 8, 1, 1)
+#define OP_TLBI_ASIDE1OS sys_insn(1, 0, 8, 1, 2)
+#define OP_TLBI_VAAE1OS sys_insn(1, 0, 8, 1, 3)
+#define OP_TLBI_VALE1OS sys_insn(1, 0, 8, 1, 5)
+#define OP_TLBI_VAALE1OS sys_insn(1, 0, 8, 1, 7)
+#define OP_TLBI_RVAE1IS sys_insn(1, 0, 8, 2, 1)
+#define OP_TLBI_RVAAE1IS sys_insn(1, 0, 8, 2, 3)
+#define OP_TLBI_RVALE1IS sys_insn(1, 0, 8, 2, 5)
+#define OP_TLBI_RVAALE1IS sys_insn(1, 0, 8, 2, 7)
+#define OP_TLBI_VMALLE1IS sys_insn(1, 0, 8, 3, 0)
+#define OP_TLBI_VAE1IS sys_insn(1, 0, 8, 3, 1)
+#define OP_TLBI_ASIDE1IS sys_insn(1, 0, 8, 3, 2)
+#define OP_TLBI_VAAE1IS sys_insn(1, 0, 8, 3, 3)
+#define OP_TLBI_VALE1IS sys_insn(1, 0, 8, 3, 5)
+#define OP_TLBI_VAALE1IS sys_insn(1, 0, 8, 3, 7)
+#define OP_TLBI_RVAE1OS sys_insn(1, 0, 8, 5, 1)
+#define OP_TLBI_RVAAE1OS sys_insn(1, 0, 8, 5, 3)
+#define OP_TLBI_RVALE1OS sys_insn(1, 0, 8, 5, 5)
+#define OP_TLBI_RVAALE1OS sys_insn(1, 0, 8, 5, 7)
+#define OP_TLBI_RVAE1 sys_insn(1, 0, 8, 6, 1)
+#define OP_TLBI_RVAAE1 sys_insn(1, 0, 8, 6, 3)
+#define OP_TLBI_RVALE1 sys_insn(1, 0, 8, 6, 5)
+#define OP_TLBI_RVAALE1 sys_insn(1, 0, 8, 6, 7)
+#define OP_TLBI_VMALLE1 sys_insn(1, 0, 8, 7, 0)
+#define OP_TLBI_VAE1 sys_insn(1, 0, 8, 7, 1)
+#define OP_TLBI_ASIDE1 sys_insn(1, 0, 8, 7, 2)
+#define OP_TLBI_VAAE1 sys_insn(1, 0, 8, 7, 3)
+#define OP_TLBI_VALE1 sys_insn(1, 0, 8, 7, 5)
+#define OP_TLBI_VAALE1 sys_insn(1, 0, 8, 7, 7)
+#define OP_TLBI_VMALLE1OSNXS sys_insn(1, 0, 9, 1, 0)
+#define OP_TLBI_VAE1OSNXS sys_insn(1, 0, 9, 1, 1)
+#define OP_TLBI_ASIDE1OSNXS sys_insn(1, 0, 9, 1, 2)
+#define OP_TLBI_VAAE1OSNXS sys_insn(1, 0, 9, 1, 3)
+#define OP_TLBI_VALE1OSNXS sys_insn(1, 0, 9, 1, 5)
+#define OP_TLBI_VAALE1OSNXS sys_insn(1, 0, 9, 1, 7)
+#define OP_TLBI_RVAE1ISNXS sys_insn(1, 0, 9, 2, 1)
+#define OP_TLBI_RVAAE1ISNXS sys_insn(1, 0, 9, 2, 3)
+#define OP_TLBI_RVALE1ISNXS sys_insn(1, 0, 9, 2, 5)
+#define OP_TLBI_RVAALE1ISNXS sys_insn(1, 0, 9, 2, 7)
+#define OP_TLBI_VMALLE1ISNXS sys_insn(1, 0, 9, 3, 0)
+#define OP_TLBI_VAE1ISNXS sys_insn(1, 0, 9, 3, 1)
+#define OP_TLBI_ASIDE1ISNXS sys_insn(1, 0, 9, 3, 2)
+#define OP_TLBI_VAAE1ISNXS sys_insn(1, 0, 9, 3, 3)
+#define OP_TLBI_VALE1ISNXS sys_insn(1, 0, 9, 3, 5)
+#define OP_TLBI_VAALE1ISNXS sys_insn(1, 0, 9, 3, 7)
+#define OP_TLBI_RVAE1OSNXS sys_insn(1, 0, 9, 5, 1)
+#define OP_TLBI_RVAAE1OSNXS sys_insn(1, 0, 9, 5, 3)
+#define OP_TLBI_RVALE1OSNXS sys_insn(1, 0, 9, 5, 5)
+#define OP_TLBI_RVAALE1OSNXS sys_insn(1, 0, 9, 5, 7)
+#define OP_TLBI_RVAE1NXS sys_insn(1, 0, 9, 6, 1)
+#define OP_TLBI_RVAAE1NXS sys_insn(1, 0, 9, 6, 3)
+#define OP_TLBI_RVALE1NXS sys_insn(1, 0, 9, 6, 5)
+#define OP_TLBI_RVAALE1NXS sys_insn(1, 0, 9, 6, 7)
+#define OP_TLBI_VMALLE1NXS sys_insn(1, 0, 9, 7, 0)
+#define OP_TLBI_VAE1NXS sys_insn(1, 0, 9, 7, 1)
+#define OP_TLBI_ASIDE1NXS sys_insn(1, 0, 9, 7, 2)
+#define OP_TLBI_VAAE1NXS sys_insn(1, 0, 9, 7, 3)
+#define OP_TLBI_VALE1NXS sys_insn(1, 0, 9, 7, 5)
+#define OP_TLBI_VAALE1NXS sys_insn(1, 0, 9, 7, 7)
+#define OP_TLBI_IPAS2E1IS sys_insn(1, 4, 8, 0, 1)
+#define OP_TLBI_RIPAS2E1IS sys_insn(1, 4, 8, 0, 2)
+#define OP_TLBI_IPAS2LE1IS sys_insn(1, 4, 8, 0, 5)
+#define OP_TLBI_RIPAS2LE1IS sys_insn(1, 4, 8, 0, 6)
+#define OP_TLBI_ALLE2OS sys_insn(1, 4, 8, 1, 0)
+#define OP_TLBI_VAE2OS sys_insn(1, 4, 8, 1, 1)
+#define OP_TLBI_ALLE1OS sys_insn(1, 4, 8, 1, 4)
+#define OP_TLBI_VALE2OS sys_insn(1, 4, 8, 1, 5)
+#define OP_TLBI_VMALLS12E1OS sys_insn(1, 4, 8, 1, 6)
+#define OP_TLBI_RVAE2IS sys_insn(1, 4, 8, 2, 1)
+#define OP_TLBI_RVALE2IS sys_insn(1, 4, 8, 2, 5)
+#define OP_TLBI_ALLE2IS sys_insn(1, 4, 8, 3, 0)
+#define OP_TLBI_VAE2IS sys_insn(1, 4, 8, 3, 1)
+#define OP_TLBI_ALLE1IS sys_insn(1, 4, 8, 3, 4)
+#define OP_TLBI_VALE2IS sys_insn(1, 4, 8, 3, 5)
+#define OP_TLBI_VMALLS12E1IS sys_insn(1, 4, 8, 3, 6)
+#define OP_TLBI_IPAS2E1OS sys_insn(1, 4, 8, 4, 0)
+#define OP_TLBI_IPAS2E1 sys_insn(1, 4, 8, 4, 1)
+#define OP_TLBI_RIPAS2E1 sys_insn(1, 4, 8, 4, 2)
+#define OP_TLBI_RIPAS2E1OS sys_insn(1, 4, 8, 4, 3)
+#define OP_TLBI_IPAS2LE1OS sys_insn(1, 4, 8, 4, 4)
+#define OP_TLBI_IPAS2LE1 sys_insn(1, 4, 8, 4, 5)
+#define OP_TLBI_RIPAS2LE1 sys_insn(1, 4, 8, 4, 6)
+#define OP_TLBI_RIPAS2LE1OS sys_insn(1, 4, 8, 4, 7)
+#define OP_TLBI_RVAE2OS sys_insn(1, 4, 8, 5, 1)
+#define OP_TLBI_RVALE2OS sys_insn(1, 4, 8, 5, 5)
+#define OP_TLBI_RVAE2 sys_insn(1, 4, 8, 6, 1)
+#define OP_TLBI_RVALE2 sys_insn(1, 4, 8, 6, 5)
+#define OP_TLBI_ALLE2 sys_insn(1, 4, 8, 7, 0)
+#define OP_TLBI_VAE2 sys_insn(1, 4, 8, 7, 1)
+#define OP_TLBI_ALLE1 sys_insn(1, 4, 8, 7, 4)
+#define OP_TLBI_VALE2 sys_insn(1, 4, 8, 7, 5)
+#define OP_TLBI_VMALLS12E1 sys_insn(1, 4, 8, 7, 6)
+#define OP_TLBI_IPAS2E1ISNXS sys_insn(1, 4, 9, 0, 1)
+#define OP_TLBI_RIPAS2E1ISNXS sys_insn(1, 4, 9, 0, 2)
+#define OP_TLBI_IPAS2LE1ISNXS sys_insn(1, 4, 9, 0, 5)
+#define OP_TLBI_RIPAS2LE1ISNXS sys_insn(1, 4, 9, 0, 6)
+#define OP_TLBI_ALLE2OSNXS sys_insn(1, 4, 9, 1, 0)
+#define OP_TLBI_VAE2OSNXS sys_insn(1, 4, 9, 1, 1)
+#define OP_TLBI_ALLE1OSNXS sys_insn(1, 4, 9, 1, 4)
+#define OP_TLBI_VALE2OSNXS sys_insn(1, 4, 9, 1, 5)
+#define OP_TLBI_VMALLS12E1OSNXS sys_insn(1, 4, 9, 1, 6)
+#define OP_TLBI_RVAE2ISNXS sys_insn(1, 4, 9, 2, 1)
+#define OP_TLBI_RVALE2ISNXS sys_insn(1, 4, 9, 2, 5)
+#define OP_TLBI_ALLE2ISNXS sys_insn(1, 4, 9, 3, 0)
+#define OP_TLBI_VAE2ISNXS sys_insn(1, 4, 9, 3, 1)
+#define OP_TLBI_ALLE1ISNXS sys_insn(1, 4, 9, 3, 4)
+#define OP_TLBI_VALE2ISNXS sys_insn(1, 4, 9, 3, 5)
+#define OP_TLBI_VMALLS12E1ISNXS sys_insn(1, 4, 9, 3, 6)
+#define OP_TLBI_IPAS2E1OSNXS sys_insn(1, 4, 9, 4, 0)
+#define OP_TLBI_IPAS2E1NXS sys_insn(1, 4, 9, 4, 1)
+#define OP_TLBI_RIPAS2E1NXS sys_insn(1, 4, 9, 4, 2)
+#define OP_TLBI_RIPAS2E1OSNXS sys_insn(1, 4, 9, 4, 3)
+#define OP_TLBI_IPAS2LE1OSNXS sys_insn(1, 4, 9, 4, 4)
+#define OP_TLBI_IPAS2LE1NXS sys_insn(1, 4, 9, 4, 5)
+#define OP_TLBI_RIPAS2LE1NXS sys_insn(1, 4, 9, 4, 6)
+#define OP_TLBI_RIPAS2LE1OSNXS sys_insn(1, 4, 9, 4, 7)
+#define OP_TLBI_RVAE2OSNXS sys_insn(1, 4, 9, 5, 1)
+#define OP_TLBI_RVALE2OSNXS sys_insn(1, 4, 9, 5, 5)
+#define OP_TLBI_RVAE2NXS sys_insn(1, 4, 9, 6, 1)
+#define OP_TLBI_RVALE2NXS sys_insn(1, 4, 9, 6, 5)
+#define OP_TLBI_ALLE2NXS sys_insn(1, 4, 9, 7, 0)
+#define OP_TLBI_VAE2NXS sys_insn(1, 4, 9, 7, 1)
+#define OP_TLBI_ALLE1NXS sys_insn(1, 4, 9, 7, 4)
+#define OP_TLBI_VALE2NXS sys_insn(1, 4, 9, 7, 5)
+#define OP_TLBI_VMALLS12E1NXS sys_insn(1, 4, 9, 7, 6)
+
+/* Misc instructions */
+#define OP_GCSPUSHX sys_insn(1, 0, 7, 7, 4)
+#define OP_GCSPOPCX sys_insn(1, 0, 7, 7, 5)
+#define OP_GCSPOPX sys_insn(1, 0, 7, 7, 6)
+#define OP_GCSPUSHM sys_insn(1, 3, 7, 7, 0)
+
+#define OP_BRB_IALL sys_insn(1, 1, 7, 2, 4)
+#define OP_BRB_INJ sys_insn(1, 1, 7, 2, 5)
+#define OP_CFP_RCTX sys_insn(1, 3, 7, 3, 4)
+#define OP_DVP_RCTX sys_insn(1, 3, 7, 3, 5)
+#define OP_COSP_RCTX sys_insn(1, 3, 7, 3, 6)
+#define OP_CPP_RCTX sys_insn(1, 3, 7, 3, 7)
+
/* Common SCTLR_ELx flags. */
+#define SCTLR_ELx_ENTP2 (BIT(60))
#define SCTLR_ELx_DSSBS (BIT(44))
-#define SCTLR_ELx_ENIA (BIT(31))
-#define SCTLR_ELx_ENIB (BIT(30))
-#define SCTLR_ELx_ENDA (BIT(27))
-#define SCTLR_ELx_EE (BIT(25))
-#define SCTLR_ELx_IESB (BIT(21))
-#define SCTLR_ELx_WXN (BIT(19))
-#define SCTLR_ELx_ENDB (BIT(13))
-#define SCTLR_ELx_I (BIT(12))
-#define SCTLR_ELx_SA (BIT(3))
-#define SCTLR_ELx_C (BIT(2))
-#define SCTLR_ELx_A (BIT(1))
-#define SCTLR_ELx_M (BIT(0))
-
-#define SCTLR_ELx_FLAGS (SCTLR_ELx_M | SCTLR_ELx_A | SCTLR_ELx_C | \
- SCTLR_ELx_SA | SCTLR_ELx_I | SCTLR_ELx_IESB)
+#define SCTLR_ELx_ATA (BIT(43))
+
+#define SCTLR_ELx_EE_SHIFT 25
+#define SCTLR_ELx_ENIA_SHIFT 31
+
+#define SCTLR_ELx_ITFSB (BIT(37))
+#define SCTLR_ELx_ENIA (BIT(SCTLR_ELx_ENIA_SHIFT))
+#define SCTLR_ELx_ENIB (BIT(30))
+#define SCTLR_ELx_LSMAOE (BIT(29))
+#define SCTLR_ELx_nTLSMD (BIT(28))
+#define SCTLR_ELx_ENDA (BIT(27))
+#define SCTLR_ELx_EE (BIT(SCTLR_ELx_EE_SHIFT))
+#define SCTLR_ELx_EIS (BIT(22))
+#define SCTLR_ELx_IESB (BIT(21))
+#define SCTLR_ELx_TSCXT (BIT(20))
+#define SCTLR_ELx_WXN (BIT(19))
+#define SCTLR_ELx_ENDB (BIT(13))
+#define SCTLR_ELx_I (BIT(12))
+#define SCTLR_ELx_EOS (BIT(11))
+#define SCTLR_ELx_SA (BIT(3))
+#define SCTLR_ELx_C (BIT(2))
+#define SCTLR_ELx_A (BIT(1))
+#define SCTLR_ELx_M (BIT(0))
/* SCTLR_EL2 specific flags. */
#define SCTLR_EL2_RES1 ((BIT(4)) | (BIT(5)) | (BIT(11)) | (BIT(16)) | \
(BIT(18)) | (BIT(22)) | (BIT(23)) | (BIT(28)) | \
(BIT(29)))
+#define SCTLR_EL2_BT (BIT(36))
#ifdef CONFIG_CPU_BIG_ENDIAN
#define ENDIAN_SET_EL2 SCTLR_ELx_EE
#else
#define ENDIAN_SET_EL2 0
#endif
-/* SCTLR_EL1 specific flags. */
-#define SCTLR_EL1_UCI (BIT(26))
-#define SCTLR_EL1_E0E (BIT(24))
-#define SCTLR_EL1_SPAN (BIT(23))
-#define SCTLR_EL1_NTWE (BIT(18))
-#define SCTLR_EL1_NTWI (BIT(16))
-#define SCTLR_EL1_UCT (BIT(15))
-#define SCTLR_EL1_DZE (BIT(14))
-#define SCTLR_EL1_UMA (BIT(9))
-#define SCTLR_EL1_SED (BIT(8))
-#define SCTLR_EL1_ITD (BIT(7))
-#define SCTLR_EL1_CP15BEN (BIT(5))
-#define SCTLR_EL1_SA0 (BIT(4))
-
-#define SCTLR_EL1_RES1 ((BIT(11)) | (BIT(20)) | (BIT(22)) | (BIT(28)) | \
- (BIT(29)))
+#define INIT_SCTLR_EL2_MMU_ON \
+ (SCTLR_ELx_M | SCTLR_ELx_C | SCTLR_ELx_SA | SCTLR_ELx_I | \
+ SCTLR_ELx_IESB | SCTLR_ELx_WXN | ENDIAN_SET_EL2 | \
+ SCTLR_ELx_ITFSB | SCTLR_EL2_RES1)
+#define INIT_SCTLR_EL2_MMU_OFF \
+ (SCTLR_EL2_RES1 | ENDIAN_SET_EL2)
+
+/* SCTLR_EL1 specific flags. */
#ifdef CONFIG_CPU_BIG_ENDIAN
#define ENDIAN_SET_EL1 (SCTLR_EL1_E0E | SCTLR_ELx_EE)
#else
#define ENDIAN_SET_EL1 0
#endif
-#define SCTLR_EL1_SET (SCTLR_ELx_M | SCTLR_ELx_C | SCTLR_ELx_SA |\
- SCTLR_EL1_SA0 | SCTLR_EL1_SED | SCTLR_ELx_I |\
- SCTLR_EL1_DZE | SCTLR_EL1_UCT |\
- SCTLR_EL1_NTWE | SCTLR_ELx_IESB | SCTLR_EL1_SPAN |\
- ENDIAN_SET_EL1 | SCTLR_EL1_UCI | SCTLR_EL1_RES1)
-
-/* id_aa64isar0 */
-#define ID_AA64ISAR0_TS_SHIFT 52
-#define ID_AA64ISAR0_FHM_SHIFT 48
-#define ID_AA64ISAR0_DP_SHIFT 44
-#define ID_AA64ISAR0_SM4_SHIFT 40
-#define ID_AA64ISAR0_SM3_SHIFT 36
-#define ID_AA64ISAR0_SHA3_SHIFT 32
-#define ID_AA64ISAR0_RDM_SHIFT 28
-#define ID_AA64ISAR0_ATOMICS_SHIFT 20
-#define ID_AA64ISAR0_CRC32_SHIFT 16
-#define ID_AA64ISAR0_SHA2_SHIFT 12
-#define ID_AA64ISAR0_SHA1_SHIFT 8
-#define ID_AA64ISAR0_AES_SHIFT 4
-
-/* id_aa64isar1 */
-#define ID_AA64ISAR1_SB_SHIFT 36
-#define ID_AA64ISAR1_FRINTTS_SHIFT 32
-#define ID_AA64ISAR1_GPI_SHIFT 28
-#define ID_AA64ISAR1_GPA_SHIFT 24
-#define ID_AA64ISAR1_LRCPC_SHIFT 20
-#define ID_AA64ISAR1_FCMA_SHIFT 16
-#define ID_AA64ISAR1_JSCVT_SHIFT 12
-#define ID_AA64ISAR1_API_SHIFT 8
-#define ID_AA64ISAR1_APA_SHIFT 4
-#define ID_AA64ISAR1_DPB_SHIFT 0
-
-#define ID_AA64ISAR1_APA_NI 0x0
-#define ID_AA64ISAR1_APA_ARCHITECTED 0x1
-#define ID_AA64ISAR1_API_NI 0x0
-#define ID_AA64ISAR1_API_IMP_DEF 0x1
-#define ID_AA64ISAR1_GPA_NI 0x0
-#define ID_AA64ISAR1_GPA_ARCHITECTED 0x1
-#define ID_AA64ISAR1_GPI_NI 0x0
-#define ID_AA64ISAR1_GPI_IMP_DEF 0x1
+#define INIT_SCTLR_EL1_MMU_OFF \
+ (ENDIAN_SET_EL1 | SCTLR_EL1_LSMAOE | SCTLR_EL1_nTLSMD | \
+ SCTLR_EL1_EIS | SCTLR_EL1_TSCXT | SCTLR_EL1_EOS)
+
+#define INIT_SCTLR_EL1_MMU_ON \
+ (SCTLR_ELx_M | SCTLR_ELx_C | SCTLR_ELx_SA | \
+ SCTLR_EL1_SA0 | SCTLR_EL1_SED | SCTLR_ELx_I | \
+ SCTLR_EL1_DZE | SCTLR_EL1_UCT | SCTLR_EL1_nTWE | \
+ SCTLR_ELx_IESB | SCTLR_EL1_SPAN | SCTLR_ELx_ITFSB | \
+ ENDIAN_SET_EL1 | SCTLR_EL1_UCI | SCTLR_EL1_EPAN | \
+ SCTLR_EL1_LSMAOE | SCTLR_EL1_nTLSMD | SCTLR_EL1_EIS | \
+ SCTLR_EL1_TSCXT | SCTLR_EL1_EOS)
+
+/* MAIR_ELx memory attributes (used by Linux) */
+#define MAIR_ATTR_DEVICE_nGnRnE UL(0x00)
+#define MAIR_ATTR_DEVICE_nGnRE UL(0x04)
+#define MAIR_ATTR_NORMAL_NC UL(0x44)
+#define MAIR_ATTR_NORMAL_TAGGED UL(0xf0)
+#define MAIR_ATTR_NORMAL UL(0xff)
+#define MAIR_ATTR_MASK UL(0xff)
+
+/* Position the attr at the correct index */
+#define MAIR_ATTRIDX(attr, idx) ((attr) << ((idx) * 8))
/* id_aa64pfr0 */
-#define ID_AA64PFR0_CSV3_SHIFT 60
-#define ID_AA64PFR0_CSV2_SHIFT 56
-#define ID_AA64PFR0_DIT_SHIFT 48
-#define ID_AA64PFR0_SVE_SHIFT 32
-#define ID_AA64PFR0_RAS_SHIFT 28
-#define ID_AA64PFR0_GIC_SHIFT 24
-#define ID_AA64PFR0_ASIMD_SHIFT 20
-#define ID_AA64PFR0_FP_SHIFT 16
-#define ID_AA64PFR0_EL3_SHIFT 12
-#define ID_AA64PFR0_EL2_SHIFT 8
-#define ID_AA64PFR0_EL1_SHIFT 4
-#define ID_AA64PFR0_EL0_SHIFT 0
-
-#define ID_AA64PFR0_SVE 0x1
-#define ID_AA64PFR0_RAS_V1 0x1
-#define ID_AA64PFR0_FP_NI 0xf
-#define ID_AA64PFR0_FP_SUPPORTED 0x0
-#define ID_AA64PFR0_ASIMD_NI 0xf
-#define ID_AA64PFR0_ASIMD_SUPPORTED 0x0
-#define ID_AA64PFR0_EL1_64BIT_ONLY 0x1
-#define ID_AA64PFR0_EL0_64BIT_ONLY 0x1
-#define ID_AA64PFR0_EL0_32BIT_64BIT 0x2
-
-/* id_aa64pfr1 */
-#define ID_AA64PFR1_SSBS_SHIFT 4
-
-#define ID_AA64PFR1_SSBS_PSTATE_NI 0
-#define ID_AA64PFR1_SSBS_PSTATE_ONLY 1
-#define ID_AA64PFR1_SSBS_PSTATE_INSNS 2
-
-/* id_aa64zfr0 */
-#define ID_AA64ZFR0_SM4_SHIFT 40
-#define ID_AA64ZFR0_SHA3_SHIFT 32
-#define ID_AA64ZFR0_BITPERM_SHIFT 16
-#define ID_AA64ZFR0_AES_SHIFT 4
-#define ID_AA64ZFR0_SVEVER_SHIFT 0
-
-#define ID_AA64ZFR0_SM4 0x1
-#define ID_AA64ZFR0_SHA3 0x1
-#define ID_AA64ZFR0_BITPERM 0x1
-#define ID_AA64ZFR0_AES 0x1
-#define ID_AA64ZFR0_AES_PMULL 0x2
-#define ID_AA64ZFR0_SVEVER_SVE2 0x1
+#define ID_AA64PFR0_EL1_ELx_64BIT_ONLY 0x1
+#define ID_AA64PFR0_EL1_ELx_32BIT_64BIT 0x2
/* id_aa64mmfr0 */
-#define ID_AA64MMFR0_TGRAN4_SHIFT 28
-#define ID_AA64MMFR0_TGRAN64_SHIFT 24
-#define ID_AA64MMFR0_TGRAN16_SHIFT 20
-#define ID_AA64MMFR0_BIGENDEL0_SHIFT 16
-#define ID_AA64MMFR0_SNSMEM_SHIFT 12
-#define ID_AA64MMFR0_BIGENDEL_SHIFT 8
-#define ID_AA64MMFR0_ASID_SHIFT 4
-#define ID_AA64MMFR0_PARANGE_SHIFT 0
-
-#define ID_AA64MMFR0_TGRAN4_NI 0xf
-#define ID_AA64MMFR0_TGRAN4_SUPPORTED 0x0
-#define ID_AA64MMFR0_TGRAN64_NI 0xf
-#define ID_AA64MMFR0_TGRAN64_SUPPORTED 0x0
-#define ID_AA64MMFR0_TGRAN16_NI 0x0
-#define ID_AA64MMFR0_TGRAN16_SUPPORTED 0x1
-#define ID_AA64MMFR0_PARANGE_48 0x5
-#define ID_AA64MMFR0_PARANGE_52 0x6
+#define ID_AA64MMFR0_EL1_TGRAN4_SUPPORTED_MIN 0x0
+#define ID_AA64MMFR0_EL1_TGRAN4_LPA2 ID_AA64MMFR0_EL1_TGRAN4_52_BIT
+#define ID_AA64MMFR0_EL1_TGRAN4_SUPPORTED_MAX 0x7
+#define ID_AA64MMFR0_EL1_TGRAN64_SUPPORTED_MIN 0x0
+#define ID_AA64MMFR0_EL1_TGRAN64_SUPPORTED_MAX 0x7
+#define ID_AA64MMFR0_EL1_TGRAN16_SUPPORTED_MIN 0x1
+#define ID_AA64MMFR0_EL1_TGRAN16_LPA2 ID_AA64MMFR0_EL1_TGRAN16_52_BIT
+#define ID_AA64MMFR0_EL1_TGRAN16_SUPPORTED_MAX 0xf
+
+#define ARM64_MIN_PARANGE_BITS 32
+
+#define ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_DEFAULT 0x0
+#define ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_NONE 0x1
+#define ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_MIN 0x2
+#define ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_LPA2 0x3
+#define ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_MAX 0x7
#ifdef CONFIG_ARM64_PA_BITS_52
-#define ID_AA64MMFR0_PARANGE_MAX ID_AA64MMFR0_PARANGE_52
+#define ID_AA64MMFR0_EL1_PARANGE_MAX ID_AA64MMFR0_EL1_PARANGE_52
#else
-#define ID_AA64MMFR0_PARANGE_MAX ID_AA64MMFR0_PARANGE_48
+#define ID_AA64MMFR0_EL1_PARANGE_MAX ID_AA64MMFR0_EL1_PARANGE_48
#endif
-/* id_aa64mmfr1 */
-#define ID_AA64MMFR1_PAN_SHIFT 20
-#define ID_AA64MMFR1_LOR_SHIFT 16
-#define ID_AA64MMFR1_HPD_SHIFT 12
-#define ID_AA64MMFR1_VHE_SHIFT 8
-#define ID_AA64MMFR1_VMIDBITS_SHIFT 4
-#define ID_AA64MMFR1_HADBS_SHIFT 0
-
-#define ID_AA64MMFR1_VMIDBITS_8 0
-#define ID_AA64MMFR1_VMIDBITS_16 2
-
-/* id_aa64mmfr2 */
-#define ID_AA64MMFR2_FWB_SHIFT 40
-#define ID_AA64MMFR2_AT_SHIFT 32
-#define ID_AA64MMFR2_LVA_SHIFT 16
-#define ID_AA64MMFR2_IESB_SHIFT 12
-#define ID_AA64MMFR2_LSM_SHIFT 8
-#define ID_AA64MMFR2_UAO_SHIFT 4
-#define ID_AA64MMFR2_CNP_SHIFT 0
-
-/* id_aa64dfr0 */
-#define ID_AA64DFR0_PMSVER_SHIFT 32
-#define ID_AA64DFR0_CTX_CMPS_SHIFT 28
-#define ID_AA64DFR0_WRPS_SHIFT 20
-#define ID_AA64DFR0_BRPS_SHIFT 12
-#define ID_AA64DFR0_PMUVER_SHIFT 8
-#define ID_AA64DFR0_TRACEVER_SHIFT 4
-#define ID_AA64DFR0_DEBUGVER_SHIFT 0
-
-#define ID_ISAR5_RDM_SHIFT 24
-#define ID_ISAR5_CRC32_SHIFT 16
-#define ID_ISAR5_SHA2_SHIFT 12
-#define ID_ISAR5_SHA1_SHIFT 8
-#define ID_ISAR5_AES_SHIFT 4
-#define ID_ISAR5_SEVL_SHIFT 0
-
-#define MVFR0_FPROUND_SHIFT 28
-#define MVFR0_FPSHVEC_SHIFT 24
-#define MVFR0_FPSQRT_SHIFT 20
-#define MVFR0_FPDIVIDE_SHIFT 16
-#define MVFR0_FPTRAP_SHIFT 12
-#define MVFR0_FPDP_SHIFT 8
-#define MVFR0_FPSP_SHIFT 4
-#define MVFR0_SIMD_SHIFT 0
-
-#define MVFR1_SIMDFMAC_SHIFT 28
-#define MVFR1_FPHP_SHIFT 24
-#define MVFR1_SIMDHP_SHIFT 20
-#define MVFR1_SIMDSP_SHIFT 16
-#define MVFR1_SIMDINT_SHIFT 12
-#define MVFR1_SIMDLS_SHIFT 8
-#define MVFR1_FPDNAN_SHIFT 4
-#define MVFR1_FPFTZ_SHIFT 0
-
-
-#define ID_AA64MMFR0_TGRAN4_SHIFT 28
-#define ID_AA64MMFR0_TGRAN64_SHIFT 24
-#define ID_AA64MMFR0_TGRAN16_SHIFT 20
-
-#define ID_AA64MMFR0_TGRAN4_NI 0xf
-#define ID_AA64MMFR0_TGRAN4_SUPPORTED 0x0
-#define ID_AA64MMFR0_TGRAN64_NI 0xf
-#define ID_AA64MMFR0_TGRAN64_SUPPORTED 0x0
-#define ID_AA64MMFR0_TGRAN16_NI 0x0
-#define ID_AA64MMFR0_TGRAN16_SUPPORTED 0x1
-
#if defined(CONFIG_ARM64_4K_PAGES)
-#define ID_AA64MMFR0_TGRAN_SHIFT ID_AA64MMFR0_TGRAN4_SHIFT
-#define ID_AA64MMFR0_TGRAN_SUPPORTED ID_AA64MMFR0_TGRAN4_SUPPORTED
+#define ID_AA64MMFR0_EL1_TGRAN_SHIFT ID_AA64MMFR0_EL1_TGRAN4_SHIFT
+#define ID_AA64MMFR0_EL1_TGRAN_LPA2 ID_AA64MMFR0_EL1_TGRAN4_52_BIT
+#define ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MIN ID_AA64MMFR0_EL1_TGRAN4_SUPPORTED_MIN
+#define ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MAX ID_AA64MMFR0_EL1_TGRAN4_SUPPORTED_MAX
+#define ID_AA64MMFR0_EL1_TGRAN_2_SHIFT ID_AA64MMFR0_EL1_TGRAN4_2_SHIFT
#elif defined(CONFIG_ARM64_16K_PAGES)
-#define ID_AA64MMFR0_TGRAN_SHIFT ID_AA64MMFR0_TGRAN16_SHIFT
-#define ID_AA64MMFR0_TGRAN_SUPPORTED ID_AA64MMFR0_TGRAN16_SUPPORTED
+#define ID_AA64MMFR0_EL1_TGRAN_SHIFT ID_AA64MMFR0_EL1_TGRAN16_SHIFT
+#define ID_AA64MMFR0_EL1_TGRAN_LPA2 ID_AA64MMFR0_EL1_TGRAN16_52_BIT
+#define ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MIN ID_AA64MMFR0_EL1_TGRAN16_SUPPORTED_MIN
+#define ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MAX ID_AA64MMFR0_EL1_TGRAN16_SUPPORTED_MAX
+#define ID_AA64MMFR0_EL1_TGRAN_2_SHIFT ID_AA64MMFR0_EL1_TGRAN16_2_SHIFT
#elif defined(CONFIG_ARM64_64K_PAGES)
-#define ID_AA64MMFR0_TGRAN_SHIFT ID_AA64MMFR0_TGRAN64_SHIFT
-#define ID_AA64MMFR0_TGRAN_SUPPORTED ID_AA64MMFR0_TGRAN64_SUPPORTED
+#define ID_AA64MMFR0_EL1_TGRAN_SHIFT ID_AA64MMFR0_EL1_TGRAN64_SHIFT
+#define ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MIN ID_AA64MMFR0_EL1_TGRAN64_SUPPORTED_MIN
+#define ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MAX ID_AA64MMFR0_EL1_TGRAN64_SUPPORTED_MAX
+#define ID_AA64MMFR0_EL1_TGRAN_2_SHIFT ID_AA64MMFR0_EL1_TGRAN64_2_SHIFT
#endif
+#define CPACR_EL1_FPEN_EL1EN (BIT(20)) /* enable EL1 access */
+#define CPACR_EL1_FPEN_EL0EN (BIT(21)) /* enable EL0 access, if EL1EN set */
-/*
- * The ZCR_ELx_LEN_* definitions intentionally include bits [8:4] which
- * are reserved by the SVE architecture for future expansion of the LEN
- * field, with compatible semantics.
- */
-#define ZCR_ELx_LEN_SHIFT 0
-#define ZCR_ELx_LEN_SIZE 9
-#define ZCR_ELx_LEN_MASK 0x1ff
+#define CPACR_EL1_SMEN_EL1EN (BIT(24)) /* enable EL1 access */
+#define CPACR_EL1_SMEN_EL0EN (BIT(25)) /* enable EL0 access, if EL1EN set */
#define CPACR_EL1_ZEN_EL1EN (BIT(16)) /* enable EL1 access */
#define CPACR_EL1_ZEN_EL0EN (BIT(17)) /* enable EL0 access, if EL1EN set */
-#define CPACR_EL1_ZEN (CPACR_EL1_ZEN_EL1EN | CPACR_EL1_ZEN_EL0EN)
+/* GCR_EL1 Definitions */
+#define SYS_GCR_EL1_RRND (BIT(16))
+#define SYS_GCR_EL1_EXCL_MASK 0xffffUL
+
+#ifdef CONFIG_KASAN_HW_TAGS
+/*
+ * KASAN always uses a whole byte for its tags. With CONFIG_KASAN_HW_TAGS it
+ * only uses tags in the range 0xF0-0xFF, which we map to MTE tags 0x0-0xF.
+ */
+#define __MTE_TAG_MIN (KASAN_TAG_MIN & 0xf)
+#define __MTE_TAG_MAX (KASAN_TAG_MAX & 0xf)
+#define __MTE_TAG_INCL GENMASK(__MTE_TAG_MAX, __MTE_TAG_MIN)
+#define KERNEL_GCR_EL1_EXCL (SYS_GCR_EL1_EXCL_MASK & ~__MTE_TAG_INCL)
+#else
+#define KERNEL_GCR_EL1_EXCL SYS_GCR_EL1_EXCL_MASK
+#endif
+
+#define KERNEL_GCR_EL1 (SYS_GCR_EL1_RRND | KERNEL_GCR_EL1_EXCL)
+
+/* RGSR_EL1 Definitions */
+#define SYS_RGSR_EL1_TAG_MASK 0xfUL
+#define SYS_RGSR_EL1_SEED_SHIFT 8
+#define SYS_RGSR_EL1_SEED_MASK 0xffffUL
+
+/* TFSR{,E0}_EL1 bit definitions */
+#define SYS_TFSR_EL1_TF0_SHIFT 0
+#define SYS_TFSR_EL1_TF1_SHIFT 1
+#define SYS_TFSR_EL1_TF0 (UL(1) << SYS_TFSR_EL1_TF0_SHIFT)
+#define SYS_TFSR_EL1_TF1 (UL(1) << SYS_TFSR_EL1_TF1_SHIFT)
/* Safe value for MPIDR_EL1: Bit31:RES1, Bit30:U:0, Bit24:MT:0 */
#define SYS_MPIDR_SAFE_VAL (BIT(31))
-#ifdef __ASSEMBLY__
+#define TRFCR_ELx_TS_SHIFT 5
+#define TRFCR_ELx_TS_MASK ((0x3UL) << TRFCR_ELx_TS_SHIFT)
+#define TRFCR_ELx_TS_VIRTUAL ((0x1UL) << TRFCR_ELx_TS_SHIFT)
+#define TRFCR_ELx_TS_GUEST_PHYSICAL ((0x2UL) << TRFCR_ELx_TS_SHIFT)
+#define TRFCR_ELx_TS_PHYSICAL ((0x3UL) << TRFCR_ELx_TS_SHIFT)
+#define TRFCR_EL2_CX BIT(3)
+#define TRFCR_ELx_ExTRE BIT(1)
+#define TRFCR_ELx_E0TRE BIT(0)
+
+/* GIC Hypervisor interface registers */
+/* ICH_MISR_EL2 bit definitions */
+#define ICH_MISR_EOI (1 << 0)
+#define ICH_MISR_U (1 << 1)
+
+/* ICH_LR*_EL2 bit definitions */
+#define ICH_LR_VIRTUAL_ID_MASK ((1ULL << 32) - 1)
+
+#define ICH_LR_EOI (1ULL << 41)
+#define ICH_LR_GROUP (1ULL << 60)
+#define ICH_LR_HW (1ULL << 61)
+#define ICH_LR_STATE (3ULL << 62)
+#define ICH_LR_PENDING_BIT (1ULL << 62)
+#define ICH_LR_ACTIVE_BIT (1ULL << 63)
+#define ICH_LR_PHYS_ID_SHIFT 32
+#define ICH_LR_PHYS_ID_MASK (0x3ffULL << ICH_LR_PHYS_ID_SHIFT)
+#define ICH_LR_PRIORITY_SHIFT 48
+#define ICH_LR_PRIORITY_MASK (0xffULL << ICH_LR_PRIORITY_SHIFT)
+
+/* ICH_HCR_EL2 bit definitions */
+#define ICH_HCR_EN (1 << 0)
+#define ICH_HCR_UIE (1 << 1)
+#define ICH_HCR_NPIE (1 << 3)
+#define ICH_HCR_TC (1 << 10)
+#define ICH_HCR_TALL0 (1 << 11)
+#define ICH_HCR_TALL1 (1 << 12)
+#define ICH_HCR_TDIR (1 << 14)
+#define ICH_HCR_EOIcount_SHIFT 27
+#define ICH_HCR_EOIcount_MASK (0x1f << ICH_HCR_EOIcount_SHIFT)
+
+/* ICH_VMCR_EL2 bit definitions */
+#define ICH_VMCR_ACK_CTL_SHIFT 2
+#define ICH_VMCR_ACK_CTL_MASK (1 << ICH_VMCR_ACK_CTL_SHIFT)
+#define ICH_VMCR_FIQ_EN_SHIFT 3
+#define ICH_VMCR_FIQ_EN_MASK (1 << ICH_VMCR_FIQ_EN_SHIFT)
+#define ICH_VMCR_CBPR_SHIFT 4
+#define ICH_VMCR_CBPR_MASK (1 << ICH_VMCR_CBPR_SHIFT)
+#define ICH_VMCR_EOIM_SHIFT 9
+#define ICH_VMCR_EOIM_MASK (1 << ICH_VMCR_EOIM_SHIFT)
+#define ICH_VMCR_BPR1_SHIFT 18
+#define ICH_VMCR_BPR1_MASK (7 << ICH_VMCR_BPR1_SHIFT)
+#define ICH_VMCR_BPR0_SHIFT 21
+#define ICH_VMCR_BPR0_MASK (7 << ICH_VMCR_BPR0_SHIFT)
+#define ICH_VMCR_PMR_SHIFT 24
+#define ICH_VMCR_PMR_MASK (0xffUL << ICH_VMCR_PMR_SHIFT)
+#define ICH_VMCR_ENG0_SHIFT 0
+#define ICH_VMCR_ENG0_MASK (1 << ICH_VMCR_ENG0_SHIFT)
+#define ICH_VMCR_ENG1_SHIFT 1
+#define ICH_VMCR_ENG1_MASK (1 << ICH_VMCR_ENG1_SHIFT)
+
+/* ICH_VTR_EL2 bit definitions */
+#define ICH_VTR_PRI_BITS_SHIFT 29
+#define ICH_VTR_PRI_BITS_MASK (7 << ICH_VTR_PRI_BITS_SHIFT)
+#define ICH_VTR_ID_BITS_SHIFT 23
+#define ICH_VTR_ID_BITS_MASK (7 << ICH_VTR_ID_BITS_SHIFT)
+#define ICH_VTR_SEIS_SHIFT 22
+#define ICH_VTR_SEIS_MASK (1 << ICH_VTR_SEIS_SHIFT)
+#define ICH_VTR_A3V_SHIFT 21
+#define ICH_VTR_A3V_MASK (1 << ICH_VTR_A3V_SHIFT)
+#define ICH_VTR_TDS_SHIFT 19
+#define ICH_VTR_TDS_MASK (1 << ICH_VTR_TDS_SHIFT)
- .irp num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30
- .equ .L__reg_num_x\num, \num
- .endr
- .equ .L__reg_num_xzr, 31
+/*
+ * Permission Indirection Extension (PIE) permission encodings.
+ * Encodings with the _O suffix, have overlays applied (Permission Overlay Extension).
+ */
+#define PIE_NONE_O 0x0
+#define PIE_R_O 0x1
+#define PIE_X_O 0x2
+#define PIE_RX_O 0x3
+#define PIE_RW_O 0x5
+#define PIE_RWnX_O 0x6
+#define PIE_RWX_O 0x7
+#define PIE_R 0x8
+#define PIE_GCS 0x9
+#define PIE_RX 0xa
+#define PIE_RW 0xc
+#define PIE_RWX 0xe
+
+#define PIRx_ELx_PERM(idx, perm) ((perm) << ((idx) * 4))
+
+/*
+ * Permission Overlay Extension (POE) permission encodings.
+ */
+#define POE_NONE UL(0x0)
+#define POE_R UL(0x1)
+#define POE_X UL(0x2)
+#define POE_RX UL(0x3)
+#define POE_W UL(0x4)
+#define POE_RW UL(0x5)
+#define POE_XW UL(0x6)
+#define POE_RXW UL(0x7)
+#define POE_MASK UL(0xf)
+
+#define ARM64_FEATURE_FIELD_BITS 4
+
+/* Defined for compatibility only, do not add new users. */
+#define ARM64_FEATURE_MASK(x) (x##_MASK)
+
+#ifdef __ASSEMBLY__
.macro mrs_s, rt, sreg
- __emit_inst(0xd5200000|(\sreg)|(.L__reg_num_\rt))
+ __emit_inst(0xd5200000|(\sreg)|(.L__gpr_num_\rt))
.endm
.macro msr_s, sreg, rt
- __emit_inst(0xd5000000|(\sreg)|(.L__reg_num_\rt))
+ __emit_inst(0xd5000000|(\sreg)|(.L__gpr_num_\rt))
.endm
#else
+#include <linux/bitfield.h>
#include <linux/build_bug.h>
#include <linux/types.h>
-
-#define __DEFINE_MRS_MSR_S_REGNUM \
-" .irp num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30\n" \
-" .equ .L__reg_num_x\\num, \\num\n" \
-" .endr\n" \
-" .equ .L__reg_num_xzr, 31\n"
+#include <asm/alternative.h>
#define DEFINE_MRS_S \
- __DEFINE_MRS_MSR_S_REGNUM \
+ __DEFINE_ASM_GPR_NUMS \
" .macro mrs_s, rt, sreg\n" \
- __emit_inst(0xd5200000|(\\sreg)|(.L__reg_num_\\rt)) \
+ __emit_inst(0xd5200000|(\\sreg)|(.L__gpr_num_\\rt)) \
" .endm\n"
#define DEFINE_MSR_S \
- __DEFINE_MRS_MSR_S_REGNUM \
+ __DEFINE_ASM_GPR_NUMS \
" .macro msr_s, sreg, rt\n" \
- __emit_inst(0xd5000000|(\\sreg)|(.L__reg_num_\\rt)) \
+ __emit_inst(0xd5000000|(\\sreg)|(.L__gpr_num_\\rt)) \
" .endm\n"
#define UNDEFINE_MRS_S \
@@ -815,15 +1137,21 @@
/*
* For registers without architectural names, or simply unsupported by
* GAS.
+ *
+ * __check_r forces warnings to be generated by the compiler when
+ * evaluating r which wouldn't normally happen due to being passed to
+ * the assembler via __stringify(r).
*/
#define read_sysreg_s(r) ({ \
u64 __val; \
+ u32 __maybe_unused __check_r = (u32)(r); \
asm volatile(__mrs_s("%0", r) : "=r" (__val)); \
__val; \
})
#define write_sysreg_s(v, r) do { \
u64 __val = (u64)(v); \
+ u32 __maybe_unused __check_r = (u32)(r); \
asm volatile(__msr_s(r, "%x0") : : "rZ" (__val)); \
} while (0)
@@ -838,6 +1166,30 @@
write_sysreg(__scs_new, sysreg); \
} while (0)
+#define sysreg_clear_set_s(sysreg, clear, set) do { \
+ u64 __scs_val = read_sysreg_s(sysreg); \
+ u64 __scs_new = (__scs_val & ~(u64)(clear)) | (set); \
+ if (__scs_new != __scs_val) \
+ write_sysreg_s(__scs_new, sysreg); \
+} while (0)
+
+#define read_sysreg_par() ({ \
+ u64 par; \
+ asm(ALTERNATIVE("nop", "dmb sy", ARM64_WORKAROUND_1508412)); \
+ par = read_sysreg(par_el1); \
+ asm(ALTERNATIVE("nop", "dmb sy", ARM64_WORKAROUND_1508412)); \
+ par; \
+})
+
+#define SYS_FIELD_GET(reg, field, val) \
+ FIELD_GET(reg##_##field##_MASK, val)
+
+#define SYS_FIELD_PREP(reg, field, val) \
+ FIELD_PREP(reg##_##field##_MASK, val)
+
+#define SYS_FIELD_PREP_ENUM(reg, field, val) \
+ FIELD_PREP(reg##_##field##_MASK, reg##_##field##_##val)
+
#endif
#endif /* __ASM_SYSREG_H */
diff --git a/arch/arm64/include/asm/system_misc.h b/arch/arm64/include/asm/system_misc.h
index 1ab63cfbbaf1..c34344256762 100644
--- a/arch/arm64/include/asm/system_misc.h
+++ b/arch/arm64/include/asm/system_misc.h
@@ -18,22 +18,20 @@
struct pt_regs;
-void die(const char *msg, struct pt_regs *regs, int err);
+void die(const char *msg, struct pt_regs *regs, long err);
struct siginfo;
void arm64_notify_die(const char *str, struct pt_regs *regs,
- int signo, int sicode, void __user *addr,
- int err);
+ int signo, int sicode, unsigned long far,
+ unsigned long err);
-void hook_debug_fault_code(int nr, int (*fn)(unsigned long, unsigned int,
+void hook_debug_fault_code(int nr, int (*fn)(unsigned long, unsigned long,
struct pt_regs *),
int sig, int code, const char *name);
struct mm_struct;
extern void __show_regs(struct pt_regs *);
-extern void (*arm_pm_restart)(enum reboot_mode reboot_mode, const char *cmd);
-
#endif /* __ASSEMBLY__ */
#endif /* __ASM_SYSTEM_MISC_H */
diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
index f0cec4160136..e72a3bf9e563 100644
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@ -18,14 +18,11 @@ struct task_struct;
#include <asm/stack_pointer.h>
#include <asm/types.h>
-typedef unsigned long mm_segment_t;
-
/*
* low level task data that entry.S needs immediate access to.
*/
struct thread_info {
unsigned long flags; /* low level flags */
- mm_segment_t addr_limit; /* address limit */
#ifdef CONFIG_ARM64_SW_TTBR0_PAN
u64 ttbr0; /* saved TTBR0_EL1 */
#endif
@@ -41,6 +38,11 @@ struct thread_info {
#endif
} preempt;
};
+#ifdef CONFIG_SHADOW_CALL_STACK
+ void *scs_base;
+ void *scs_sp;
+#endif
+ u32 cpu;
};
#define thread_saved_pc(tsk) \
@@ -53,8 +55,6 @@ struct thread_info {
void arch_setup_new_exec(void);
#define arch_setup_new_exec arch_setup_new_exec
-void arch_release_task_struct(struct task_struct *tsk);
-
#endif
#define TIF_SIGPENDING 0 /* signal pending */
@@ -62,8 +62,8 @@ void arch_release_task_struct(struct task_struct *tsk);
#define TIF_NOTIFY_RESUME 2 /* callback before returning to user */
#define TIF_FOREIGN_FPSTATE 3 /* CPU's FP state is not current's */
#define TIF_UPROBE 4 /* uprobe breakpoint or singlestep */
-#define TIF_FSCHECK 5 /* Check FS is USER_DS on return */
-#define TIF_NOHZ 7
+#define TIF_MTE_ASYNC_FAULT 5 /* MTE Asynchronous Tag Check Fault */
+#define TIF_NOTIFY_SIGNAL 6 /* signal notifications exist */
#define TIF_SYSCALL_TRACE 8 /* syscall trace active */
#define TIF_SYSCALL_AUDIT 9 /* syscall auditing */
#define TIF_SYSCALL_TRACEPOINT 10 /* syscall tracepoint for ftrace */
@@ -75,38 +75,51 @@ void arch_release_task_struct(struct task_struct *tsk);
#define TIF_SINGLESTEP 21
#define TIF_32BIT 22 /* 32bit process */
#define TIF_SVE 23 /* Scalable Vector Extension in use */
-#define TIF_SVE_VL_INHERIT 24 /* Inherit sve_vl_onexec across exec */
+#define TIF_SVE_VL_INHERIT 24 /* Inherit SVE vl_onexec across exec */
#define TIF_SSBD 25 /* Wants SSB mitigation */
#define TIF_TAGGED_ADDR 26 /* Allow tagged user addresses */
+#define TIF_SME 27 /* SME in use */
+#define TIF_SME_VL_INHERIT 28 /* Inherit SME vl_onexec across exec */
+#define TIF_KERNEL_FPSTATE 29 /* Task is in a kernel mode FPSIMD section */
#define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
#define _TIF_FOREIGN_FPSTATE (1 << TIF_FOREIGN_FPSTATE)
-#define _TIF_NOHZ (1 << TIF_NOHZ)
#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE)
#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
#define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT)
#define _TIF_SECCOMP (1 << TIF_SECCOMP)
#define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU)
#define _TIF_UPROBE (1 << TIF_UPROBE)
-#define _TIF_FSCHECK (1 << TIF_FSCHECK)
+#define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP)
#define _TIF_32BIT (1 << TIF_32BIT)
#define _TIF_SVE (1 << TIF_SVE)
+#define _TIF_MTE_ASYNC_FAULT (1 << TIF_MTE_ASYNC_FAULT)
+#define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL)
#define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \
_TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE | \
- _TIF_UPROBE | _TIF_FSCHECK)
+ _TIF_UPROBE | _TIF_MTE_ASYNC_FAULT | \
+ _TIF_NOTIFY_SIGNAL)
#define _TIF_SYSCALL_WORK (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \
_TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP | \
- _TIF_NOHZ | _TIF_SYSCALL_EMU)
+ _TIF_SYSCALL_EMU)
+
+#ifdef CONFIG_SHADOW_CALL_STACK
+#define INIT_SCS \
+ .scs_base = init_shadow_call_stack, \
+ .scs_sp = init_shadow_call_stack,
+#else
+#define INIT_SCS
+#endif
#define INIT_THREAD_INFO(tsk) \
{ \
.flags = _TIF_FOREIGN_FPSTATE, \
.preempt_count = INIT_PREEMPT_COUNT, \
- .addr_limit = KERNEL_DS, \
+ INIT_SCS \
}
#endif /* __ASM_THREAD_INFO_H */
diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h
index b76df828e6b7..0150deb332af 100644
--- a/arch/arm64/include/asm/tlb.h
+++ b/arch/arm64/include/asm/tlb.h
@@ -21,11 +21,46 @@ static void tlb_flush(struct mmu_gather *tlb);
#include <asm-generic/tlb.h>
+/*
+ * get the tlbi levels in arm64. Default value is TLBI_TTL_UNKNOWN if more than
+ * one of cleared_* is set or neither is set - this elides the level hinting to
+ * the hardware.
+ */
+static inline int tlb_get_level(struct mmu_gather *tlb)
+{
+ /* The TTL field is only valid for the leaf entry. */
+ if (tlb->freed_tables)
+ return TLBI_TTL_UNKNOWN;
+
+ if (tlb->cleared_ptes && !(tlb->cleared_pmds ||
+ tlb->cleared_puds ||
+ tlb->cleared_p4ds))
+ return 3;
+
+ if (tlb->cleared_pmds && !(tlb->cleared_ptes ||
+ tlb->cleared_puds ||
+ tlb->cleared_p4ds))
+ return 2;
+
+ if (tlb->cleared_puds && !(tlb->cleared_ptes ||
+ tlb->cleared_pmds ||
+ tlb->cleared_p4ds))
+ return 1;
+
+ if (tlb->cleared_p4ds && !(tlb->cleared_ptes ||
+ tlb->cleared_pmds ||
+ tlb->cleared_puds))
+ return 0;
+
+ return TLBI_TTL_UNKNOWN;
+}
+
static inline void tlb_flush(struct mmu_gather *tlb)
{
struct vm_area_struct vma = TLB_FLUSH_VMA(tlb->mm, 0);
bool last_level = !tlb->freed_tables;
unsigned long stride = tlb_get_unmap_size(tlb);
+ int tlb_level = tlb_get_level(tlb);
/*
* If we're tearing down the address space then we only care about
@@ -38,24 +73,27 @@ static inline void tlb_flush(struct mmu_gather *tlb)
return;
}
- __flush_tlb_range(&vma, tlb->start, tlb->end, stride, last_level);
+ __flush_tlb_range(&vma, tlb->start, tlb->end, stride,
+ last_level, tlb_level);
}
static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte,
unsigned long addr)
{
- pgtable_pte_page_dtor(pte);
- tlb_remove_table(tlb, pte);
+ struct ptdesc *ptdesc = page_ptdesc(pte);
+
+ pagetable_pte_dtor(ptdesc);
+ tlb_remove_ptdesc(tlb, ptdesc);
}
#if CONFIG_PGTABLE_LEVELS > 2
static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp,
unsigned long addr)
{
- struct page *page = virt_to_page(pmdp);
+ struct ptdesc *ptdesc = virt_to_ptdesc(pmdp);
- pgtable_pmd_page_dtor(page);
- tlb_remove_table(tlb, page);
+ pagetable_pmd_dtor(ptdesc);
+ tlb_remove_ptdesc(tlb, ptdesc);
}
#endif
@@ -63,7 +101,10 @@ static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp,
static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pudp,
unsigned long addr)
{
- tlb_remove_table(tlb, virt_to_page(pudp));
+ struct ptdesc *ptdesc = virt_to_ptdesc(pudp);
+
+ pagetable_pud_dtor(ptdesc);
+ tlb_remove_ptdesc(tlb, ptdesc);
}
#endif
diff --git a/arch/arm64/include/asm/tlbbatch.h b/arch/arm64/include/asm/tlbbatch.h
new file mode 100644
index 000000000000..fedb0b87b8db
--- /dev/null
+++ b/arch/arm64/include/asm/tlbbatch.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ARCH_ARM64_TLBBATCH_H
+#define _ARCH_ARM64_TLBBATCH_H
+
+struct arch_tlbflush_unmap_batch {
+ /*
+ * For arm64, HW can do tlb shootdown, so we don't
+ * need to record cpumask for sending IPI
+ */
+};
+
+#endif /* _ARCH_ARM64_TLBBATCH_H */
diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h
index bc3949064725..1deb5d789c2e 100644
--- a/arch/arm64/include/asm/tlbflush.h
+++ b/arch/arm64/include/asm/tlbflush.h
@@ -10,8 +10,10 @@
#ifndef __ASSEMBLY__
+#include <linux/bitfield.h>
#include <linux/mm_types.h>
#include <linux/sched.h>
+#include <linux/mmu_notifier.h>
#include <asm/cputype.h>
#include <asm/mmu.h>
@@ -27,14 +29,16 @@
* not. The macros handles invoking the asm with or without the
* register argument as appropriate.
*/
-#define __TLBI_0(op, arg) asm ("tlbi " #op "\n" \
+#define __TLBI_0(op, arg) asm (ARM64_ASM_PREAMBLE \
+ "tlbi " #op "\n" \
ALTERNATIVE("nop\n nop", \
"dsb ish\n tlbi " #op, \
ARM64_WORKAROUND_REPEAT_TLBI, \
CONFIG_ARM64_WORKAROUND_REPEAT_TLBI) \
: : )
-#define __TLBI_1(op, arg) asm ("tlbi " #op ", %0\n" \
+#define __TLBI_1(op, arg) asm (ARM64_ASM_PREAMBLE \
+ "tlbi " #op ", %0\n" \
ALTERNATIVE("nop\n nop", \
"dsb ish\n tlbi " #op ", %0", \
ARM64_WORKAROUND_REPEAT_TLBI, \
@@ -60,6 +64,111 @@
})
/*
+ * Get translation granule of the system, which is decided by
+ * PAGE_SIZE. Used by TTL.
+ * - 4KB : 1
+ * - 16KB : 2
+ * - 64KB : 3
+ */
+#define TLBI_TTL_TG_4K 1
+#define TLBI_TTL_TG_16K 2
+#define TLBI_TTL_TG_64K 3
+
+static inline unsigned long get_trans_granule(void)
+{
+ switch (PAGE_SIZE) {
+ case SZ_4K:
+ return TLBI_TTL_TG_4K;
+ case SZ_16K:
+ return TLBI_TTL_TG_16K;
+ case SZ_64K:
+ return TLBI_TTL_TG_64K;
+ default:
+ return 0;
+ }
+}
+
+/*
+ * Level-based TLBI operations.
+ *
+ * When ARMv8.4-TTL exists, TLBI operations take an additional hint for
+ * the level at which the invalidation must take place. If the level is
+ * wrong, no invalidation may take place. In the case where the level
+ * cannot be easily determined, the value TLBI_TTL_UNKNOWN will perform
+ * a non-hinted invalidation. Any provided level outside the hint range
+ * will also cause fall-back to non-hinted invalidation.
+ *
+ * For Stage-2 invalidation, use the level values provided to that effect
+ * in asm/stage2_pgtable.h.
+ */
+#define TLBI_TTL_MASK GENMASK_ULL(47, 44)
+
+#define TLBI_TTL_UNKNOWN INT_MAX
+
+#define __tlbi_level(op, addr, level) do { \
+ u64 arg = addr; \
+ \
+ if (alternative_has_cap_unlikely(ARM64_HAS_ARMv8_4_TTL) && \
+ level >= 0 && level <= 3) { \
+ u64 ttl = level & 3; \
+ ttl |= get_trans_granule() << 2; \
+ arg &= ~TLBI_TTL_MASK; \
+ arg |= FIELD_PREP(TLBI_TTL_MASK, ttl); \
+ } \
+ \
+ __tlbi(op, arg); \
+} while(0)
+
+#define __tlbi_user_level(op, arg, level) do { \
+ if (arm64_kernel_unmapped_at_el0()) \
+ __tlbi_level(op, (arg | USER_ASID_FLAG), level); \
+} while (0)
+
+/*
+ * This macro creates a properly formatted VA operand for the TLB RANGE. The
+ * value bit assignments are:
+ *
+ * +----------+------+-------+-------+-------+----------------------+
+ * | ASID | TG | SCALE | NUM | TTL | BADDR |
+ * +-----------------+-------+-------+-------+----------------------+
+ * |63 48|47 46|45 44|43 39|38 37|36 0|
+ *
+ * The address range is determined by below formula: [BADDR, BADDR + (NUM + 1) *
+ * 2^(5*SCALE + 1) * PAGESIZE)
+ *
+ * Note that the first argument, baddr, is pre-shifted; If LPA2 is in use, BADDR
+ * holds addr[52:16]. Else BADDR holds page number. See for example ARM DDI
+ * 0487J.a section C5.5.60 "TLBI VAE1IS, TLBI VAE1ISNXS, TLB Invalidate by VA,
+ * EL1, Inner Shareable".
+ *
+ */
+#define __TLBI_VADDR_RANGE(baddr, asid, scale, num, ttl) \
+ ({ \
+ unsigned long __ta = (baddr); \
+ unsigned long __ttl = (ttl >= 1 && ttl <= 3) ? ttl : 0; \
+ __ta &= GENMASK_ULL(36, 0); \
+ __ta |= __ttl << 37; \
+ __ta |= (unsigned long)(num) << 39; \
+ __ta |= (unsigned long)(scale) << 44; \
+ __ta |= get_trans_granule() << 46; \
+ __ta |= (unsigned long)(asid) << 48; \
+ __ta; \
+ })
+
+/* These macros are used by the TLBI RANGE feature. */
+#define __TLBI_RANGE_PAGES(num, scale) \
+ ((unsigned long)((num) + 1) << (5 * (scale) + 1))
+#define MAX_TLBI_RANGE_PAGES __TLBI_RANGE_PAGES(31, 3)
+
+/*
+ * Generate 'num' values from -1 to 30 with -1 rejected by the
+ * __flush_tlb_range() loop below.
+ */
+#define TLBI_RANGE_MASK GENMASK_ULL(4, 0)
+#define __TLBI_RANGE_NUM(pages, scale) \
+ ((((pages) >> (5 * (scale) + 1)) & TLBI_RANGE_MASK) - 1)
+
+/*
* TLB Invalidation
* ================
*
@@ -116,12 +225,16 @@
* CPUs, ensuring that any walk-cache entries associated with the
* translation are also invalidated.
*
- * __flush_tlb_range(vma, start, end, stride, last_level)
+ * __flush_tlb_range(vma, start, end, stride, last_level, tlb_level)
* Invalidate the virtual-address range '[start, end)' on all
* CPUs for the user address space corresponding to 'vma->mm'.
* The invalidation operations are issued at a granularity
* determined by 'stride' and only affect any walk-cache entries
- * if 'last_level' is equal to false.
+ * if 'last_level' is equal to false. tlb_level is the level at
+ * which the invalidation must take place. If the level is wrong,
+ * no invalidation may take place. In the case where the level
+ * cannot be easily determined, the value TLBI_TTL_UNKNOWN will
+ * perform a non-hinted invalidation.
*
*
* Finally, take a look at asm/tlb.h to see how tlb_flush() is implemented
@@ -146,22 +259,33 @@ static inline void flush_tlb_all(void)
static inline void flush_tlb_mm(struct mm_struct *mm)
{
- unsigned long asid = __TLBI_VADDR(0, ASID(mm));
+ unsigned long asid;
dsb(ishst);
+ asid = __TLBI_VADDR(0, ASID(mm));
__tlbi(aside1is, asid);
__tlbi_user(aside1is, asid);
dsb(ish);
+ mmu_notifier_arch_invalidate_secondary_tlbs(mm, 0, -1UL);
}
-static inline void flush_tlb_page_nosync(struct vm_area_struct *vma,
- unsigned long uaddr)
+static inline void __flush_tlb_page_nosync(struct mm_struct *mm,
+ unsigned long uaddr)
{
- unsigned long addr = __TLBI_VADDR(uaddr, ASID(vma->vm_mm));
+ unsigned long addr;
dsb(ishst);
+ addr = __TLBI_VADDR(uaddr, ASID(mm));
__tlbi(vale1is, addr);
__tlbi_user(vale1is, addr);
+ mmu_notifier_arch_invalidate_secondary_tlbs(mm, uaddr & PAGE_MASK,
+ (uaddr & PAGE_MASK) + PAGE_SIZE);
+}
+
+static inline void flush_tlb_page_nosync(struct vm_area_struct *vma,
+ unsigned long uaddr)
+{
+ return __flush_tlb_page_nosync(vma->vm_mm, uaddr);
}
static inline void flush_tlb_page(struct vm_area_struct *vma,
@@ -171,44 +295,169 @@ static inline void flush_tlb_page(struct vm_area_struct *vma,
dsb(ish);
}
+static inline bool arch_tlbbatch_should_defer(struct mm_struct *mm)
+{
+ /*
+ * TLB flush deferral is not required on systems which are affected by
+ * ARM64_WORKAROUND_REPEAT_TLBI, as __tlbi()/__tlbi_user() implementation
+ * will have two consecutive TLBI instructions with a dsb(ish) in between
+ * defeating the purpose (i.e save overall 'dsb ish' cost).
+ */
+ if (alternative_has_cap_unlikely(ARM64_WORKAROUND_REPEAT_TLBI))
+ return false;
+
+ return true;
+}
+
+static inline void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *batch,
+ struct mm_struct *mm,
+ unsigned long uaddr)
+{
+ __flush_tlb_page_nosync(mm, uaddr);
+}
+
+/*
+ * If mprotect/munmap/etc occurs during TLB batched flushing, we need to
+ * synchronise all the TLBI issued with a DSB to avoid the race mentioned in
+ * flush_tlb_batched_pending().
+ */
+static inline void arch_flush_tlb_batched_pending(struct mm_struct *mm)
+{
+ dsb(ish);
+}
+
+/*
+ * To support TLB batched flush for multiple pages unmapping, we only send
+ * the TLBI for each page in arch_tlbbatch_add_pending() and wait for the
+ * completion at the end in arch_tlbbatch_flush(). Since we've already issued
+ * TLBI for each page so only a DSB is needed to synchronise its effect on the
+ * other CPUs.
+ *
+ * This will save the time waiting on DSB comparing issuing a TLBI;DSB sequence
+ * for each page.
+ */
+static inline void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
+{
+ dsb(ish);
+}
+
/*
* This is meant to avoid soft lock-ups on large TLB flushing ranges and not
* necessarily a performance improvement.
*/
-#define MAX_TLBI_OPS PTRS_PER_PTE
+#define MAX_DVM_OPS PTRS_PER_PTE
+
+/*
+ * __flush_tlb_range_op - Perform TLBI operation upon a range
+ *
+ * @op: TLBI instruction that operates on a range (has 'r' prefix)
+ * @start: The start address of the range
+ * @pages: Range as the number of pages from 'start'
+ * @stride: Flush granularity
+ * @asid: The ASID of the task (0 for IPA instructions)
+ * @tlb_level: Translation Table level hint, if known
+ * @tlbi_user: If 'true', call an additional __tlbi_user()
+ * (typically for user ASIDs). 'flase' for IPA instructions
+ * @lpa2: If 'true', the lpa2 scheme is used as set out below
+ *
+ * When the CPU does not support TLB range operations, flush the TLB
+ * entries one by one at the granularity of 'stride'. If the TLB
+ * range ops are supported, then:
+ *
+ * 1. If FEAT_LPA2 is in use, the start address of a range operation must be
+ * 64KB aligned, so flush pages one by one until the alignment is reached
+ * using the non-range operations. This step is skipped if LPA2 is not in
+ * use.
+ *
+ * 2. The minimum range granularity is decided by 'scale', so multiple range
+ * TLBI operations may be required. Start from scale = 3, flush the largest
+ * possible number of pages ((num+1)*2^(5*scale+1)) that fit into the
+ * requested range, then decrement scale and continue until one or zero pages
+ * are left. We must start from highest scale to ensure 64KB start alignment
+ * is maintained in the LPA2 case.
+ *
+ * 3. If there is 1 page remaining, flush it through non-range operations. Range
+ * operations can only span an even number of pages. We save this for last to
+ * ensure 64KB start alignment is maintained for the LPA2 case.
+ *
+ * Note that certain ranges can be represented by either num = 31 and
+ * scale or num = 0 and scale + 1. The loop below favours the latter
+ * since num is limited to 30 by the __TLBI_RANGE_NUM() macro.
+ */
+#define __flush_tlb_range_op(op, start, pages, stride, \
+ asid, tlb_level, tlbi_user, lpa2) \
+do { \
+ int num = 0; \
+ int scale = 3; \
+ int shift = lpa2 ? 16 : PAGE_SHIFT; \
+ unsigned long addr; \
+ \
+ while (pages > 0) { \
+ if (!system_supports_tlb_range() || \
+ pages == 1 || \
+ (lpa2 && start != ALIGN(start, SZ_64K))) { \
+ addr = __TLBI_VADDR(start, asid); \
+ __tlbi_level(op, addr, tlb_level); \
+ if (tlbi_user) \
+ __tlbi_user_level(op, addr, tlb_level); \
+ start += stride; \
+ pages -= stride >> PAGE_SHIFT; \
+ continue; \
+ } \
+ \
+ num = __TLBI_RANGE_NUM(pages, scale); \
+ if (num >= 0) { \
+ addr = __TLBI_VADDR_RANGE(start >> shift, asid, \
+ scale, num, tlb_level); \
+ __tlbi(r##op, addr); \
+ if (tlbi_user) \
+ __tlbi_user(r##op, addr); \
+ start += __TLBI_RANGE_PAGES(num, scale) << PAGE_SHIFT; \
+ pages -= __TLBI_RANGE_PAGES(num, scale); \
+ } \
+ scale--; \
+ } \
+} while (0)
+
+#define __flush_s2_tlb_range_op(op, start, pages, stride, tlb_level) \
+ __flush_tlb_range_op(op, start, pages, stride, 0, tlb_level, false, kvm_lpa2_is_enabled());
static inline void __flush_tlb_range(struct vm_area_struct *vma,
unsigned long start, unsigned long end,
- unsigned long stride, bool last_level)
+ unsigned long stride, bool last_level,
+ int tlb_level)
{
- unsigned long asid = ASID(vma->vm_mm);
- unsigned long addr;
+ unsigned long asid, pages;
start = round_down(start, stride);
end = round_up(end, stride);
+ pages = (end - start) >> PAGE_SHIFT;
- if ((end - start) >= (MAX_TLBI_OPS * stride)) {
+ /*
+ * When not uses TLB range ops, we can handle up to
+ * (MAX_DVM_OPS - 1) pages;
+ * When uses TLB range ops, we can handle up to
+ * (MAX_TLBI_RANGE_PAGES - 1) pages.
+ */
+ if ((!system_supports_tlb_range() &&
+ (end - start) >= (MAX_DVM_OPS * stride)) ||
+ pages >= MAX_TLBI_RANGE_PAGES) {
flush_tlb_mm(vma->vm_mm);
return;
}
- /* Convert the stride into units of 4k */
- stride >>= 12;
+ dsb(ishst);
+ asid = ASID(vma->vm_mm);
- start = __TLBI_VADDR(start, asid);
- end = __TLBI_VADDR(end, asid);
+ if (last_level)
+ __flush_tlb_range_op(vale1is, start, pages, stride, asid,
+ tlb_level, true, lpa2_is_enabled());
+ else
+ __flush_tlb_range_op(vae1is, start, pages, stride, asid,
+ tlb_level, true, lpa2_is_enabled());
- dsb(ishst);
- for (addr = start; addr < end; addr += stride) {
- if (last_level) {
- __tlbi(vale1is, addr);
- __tlbi_user(vale1is, addr);
- } else {
- __tlbi(vae1is, addr);
- __tlbi_user(vae1is, addr);
- }
- }
dsb(ish);
+ mmu_notifier_arch_invalidate_secondary_tlbs(vma->vm_mm, start, end);
}
static inline void flush_tlb_range(struct vm_area_struct *vma,
@@ -217,15 +466,17 @@ static inline void flush_tlb_range(struct vm_area_struct *vma,
/*
* We cannot use leaf-only invalidation here, since we may be invalidating
* table entries as part of collapsing hugepages or moving page tables.
+ * Set the tlb_level to TLBI_TTL_UNKNOWN because we can not get enough
+ * information here.
*/
- __flush_tlb_range(vma, start, end, PAGE_SIZE, false);
+ __flush_tlb_range(vma, start, end, PAGE_SIZE, false, TLBI_TTL_UNKNOWN);
}
static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end)
{
unsigned long addr;
- if ((end - start) > (MAX_TLBI_OPS * PAGE_SIZE)) {
+ if ((end - start) > (MAX_DVM_OPS * PAGE_SIZE)) {
flush_tlb_all();
return;
}
diff --git a/arch/arm64/include/asm/topology.h b/arch/arm64/include/asm/topology.h
index a4d945db95a2..a323b109b9c4 100644
--- a/arch/arm64/include/asm/topology.h
+++ b/arch/arm64/include/asm/topology.h
@@ -16,8 +16,18 @@ int pcibus_to_node(struct pci_bus *bus);
#include <linux/arch_topology.h>
+void update_freq_counters_refs(void);
+
/* Replace task scheduler's default frequency-invariant accounting */
+#define arch_scale_freq_tick topology_scale_freq_tick
+#define arch_set_freq_scale topology_set_freq_scale
#define arch_scale_freq_capacity topology_get_freq_scale
+#define arch_scale_freq_invariant topology_scale_freq_invariant
+#define arch_scale_freq_ref topology_get_freq_ref
+
+#ifdef CONFIG_ACPI_CPPC_LIB
+#define arch_init_invariance_cppc topology_init_cpu_capacity_cppc
+#endif
/* Replace task scheduler's default cpu-invariant accounting */
#define arch_scale_cpu_capacity topology_get_cpu_scale
@@ -25,6 +35,10 @@ int pcibus_to_node(struct pci_bus *bus);
/* Enable topology flag updates */
#define arch_update_cpu_topology topology_update_cpu_topology
+/* Replace task scheduler's default thermal pressure API */
+#define arch_scale_thermal_pressure topology_get_thermal_pressure
+#define arch_update_thermal_pressure topology_update_thermal_pressure
+
#include <asm-generic/topology.h>
#endif /* _ASM_ARM_TOPOLOGY_H */
diff --git a/arch/arm64/include/asm/trans_pgd.h b/arch/arm64/include/asm/trans_pgd.h
new file mode 100644
index 000000000000..033d400a4ea4
--- /dev/null
+++ b/arch/arm64/include/asm/trans_pgd.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/*
+ * Copyright (c) 2021, Microsoft Corporation.
+ * Pasha Tatashin <pasha.tatashin@soleen.com>
+ */
+
+#ifndef _ASM_TRANS_TABLE_H
+#define _ASM_TRANS_TABLE_H
+
+#include <linux/bits.h>
+#include <linux/types.h>
+#include <asm/pgtable-types.h>
+
+/*
+ * trans_alloc_page
+ * - Allocator that should return exactly one zeroed page, if this
+ * allocator fails, trans_pgd_create_copy() and trans_pgd_idmap_page()
+ * return -ENOMEM error.
+ *
+ * trans_alloc_arg
+ * - Passed to trans_alloc_page as an argument
+ */
+
+struct trans_pgd_info {
+ void * (*trans_alloc_page)(void *arg);
+ void *trans_alloc_arg;
+};
+
+int trans_pgd_create_copy(struct trans_pgd_info *info, pgd_t **trans_pgd,
+ unsigned long start, unsigned long end);
+
+int trans_pgd_idmap_page(struct trans_pgd_info *info, phys_addr_t *trans_ttbr0,
+ unsigned long *t0sz, void *page);
+
+int trans_pgd_copy_el2_vectors(struct trans_pgd_info *info,
+ phys_addr_t *el2_vectors);
+
+extern char trans_pgd_stub_vectors[];
+
+#endif /* _ASM_TRANS_TABLE_H */
diff --git a/arch/arm64/include/asm/traps.h b/arch/arm64/include/asm/traps.h
index cee5928e1b7d..eefe766d6161 100644
--- a/arch/arm64/include/asm/traps.h
+++ b/arch/arm64/include/asm/traps.h
@@ -9,26 +9,26 @@
#include <linux/list.h>
#include <asm/esr.h>
+#include <asm/ptrace.h>
#include <asm/sections.h>
-struct pt_regs;
-
-struct undef_hook {
- struct list_head node;
- u32 instr_mask;
- u32 instr_val;
- u64 pstate_mask;
- u64 pstate_val;
- int (*fn)(struct pt_regs *regs, u32 instr);
-};
+#ifdef CONFIG_ARMV8_DEPRECATED
+bool try_emulate_armv8_deprecated(struct pt_regs *regs, u32 insn);
+#else
+static inline bool
+try_emulate_armv8_deprecated(struct pt_regs *regs, u32 insn)
+{
+ return false;
+}
+#endif /* CONFIG_ARMV8_DEPRECATED */
-void register_undef_hook(struct undef_hook *hook);
-void unregister_undef_hook(struct undef_hook *hook);
-void force_signal_inject(int signal, int code, unsigned long address);
+void force_signal_inject(int signal, int code, unsigned long address, unsigned long err);
void arm64_notify_segfault(unsigned long addr);
-void arm64_force_sig_fault(int signo, int code, void __user *addr, const char *str);
-void arm64_force_sig_mceerr(int code, void __user *addr, short lsb, const char *str);
-void arm64_force_sig_ptrace_errno_trap(int errno, void __user *addr, const char *str);
+void arm64_force_sig_fault(int signo, int code, unsigned long far, const char *str);
+void arm64_force_sig_mceerr(int code, unsigned long far, short lsb, const char *str);
+void arm64_force_sig_ptrace_errno_trap(int errno, unsigned long far, const char *str);
+
+int early_brk64(unsigned long addr, unsigned long esr, struct pt_regs *regs);
/*
* Move regs->pc to next instruction and do necessary setup before it
@@ -57,7 +57,7 @@ static inline int in_entry_text(unsigned long ptr)
* errors share the same encoding as an all-zeros encoding from a CPU that
* doesn't support RAS.
*/
-static inline bool arm64_is_ras_serror(u32 esr)
+static inline bool arm64_is_ras_serror(unsigned long esr)
{
WARN_ON(preemptible());
@@ -77,9 +77,9 @@ static inline bool arm64_is_ras_serror(u32 esr)
* We treat them as Uncontainable.
* Non-RAS SError's are reported as Uncontained/Uncategorized.
*/
-static inline u32 arm64_ras_serror_get_severity(u32 esr)
+static inline unsigned long arm64_ras_serror_get_severity(unsigned long esr)
{
- u32 aet = esr & ESR_ELx_AET;
+ unsigned long aet = esr & ESR_ELx_AET;
if (!arm64_is_ras_serror(esr)) {
/* Not a RAS error, we can't interpret the ESR. */
@@ -98,6 +98,57 @@ static inline u32 arm64_ras_serror_get_severity(u32 esr)
return aet;
}
-bool arm64_is_fatal_ras_serror(struct pt_regs *regs, unsigned int esr);
-void __noreturn arm64_serror_panic(struct pt_regs *regs, u32 esr);
+bool arm64_is_fatal_ras_serror(struct pt_regs *regs, unsigned long esr);
+void __noreturn arm64_serror_panic(struct pt_regs *regs, unsigned long esr);
+
+static inline void arm64_mops_reset_regs(struct user_pt_regs *regs, unsigned long esr)
+{
+ bool wrong_option = esr & ESR_ELx_MOPS_ISS_WRONG_OPTION;
+ bool option_a = esr & ESR_ELx_MOPS_ISS_OPTION_A;
+ int dstreg = ESR_ELx_MOPS_ISS_DESTREG(esr);
+ int srcreg = ESR_ELx_MOPS_ISS_SRCREG(esr);
+ int sizereg = ESR_ELx_MOPS_ISS_SIZEREG(esr);
+ unsigned long dst, src, size;
+
+ dst = regs->regs[dstreg];
+ src = regs->regs[srcreg];
+ size = regs->regs[sizereg];
+
+ /*
+ * Put the registers back in the original format suitable for a
+ * prologue instruction, using the generic return routine from the
+ * Arm ARM (DDI 0487I.a) rules CNTMJ and MWFQH.
+ */
+ if (esr & ESR_ELx_MOPS_ISS_MEM_INST) {
+ /* SET* instruction */
+ if (option_a ^ wrong_option) {
+ /* Format is from Option A; forward set */
+ regs->regs[dstreg] = dst + size;
+ regs->regs[sizereg] = -size;
+ }
+ } else {
+ /* CPY* instruction */
+ if (!(option_a ^ wrong_option)) {
+ /* Format is from Option B */
+ if (regs->pstate & PSR_N_BIT) {
+ /* Backward copy */
+ regs->regs[dstreg] = dst - size;
+ regs->regs[srcreg] = src - size;
+ }
+ } else {
+ /* Format is from Option A */
+ if (size & BIT(63)) {
+ /* Forward copy */
+ regs->regs[dstreg] = dst + size;
+ regs->regs[srcreg] = src + size;
+ regs->regs[sizereg] = -size;
+ }
+ }
+ }
+
+ if (esr & ESR_ELx_MOPS_ISS_FROM_EPILOGUE)
+ regs->pc -= 8;
+ else
+ regs->pc -= 4;
+}
#endif
diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h
index 32fc8061aa76..14be5000c5a0 100644
--- a/arch/arm64/include/asm/uaccess.h
+++ b/arch/arm64/include/asm/uaccess.h
@@ -18,50 +18,25 @@
#include <linux/kasan-checks.h>
#include <linux/string.h>
+#include <asm/asm-extable.h>
#include <asm/cpufeature.h>
+#include <asm/mmu.h>
+#include <asm/mte.h>
#include <asm/ptrace.h>
#include <asm/memory.h>
#include <asm/extable.h>
-#define get_fs() (current_thread_info()->addr_limit)
-
-static inline void set_fs(mm_segment_t fs)
-{
- current_thread_info()->addr_limit = fs;
-
- /*
- * Prevent a mispredicted conditional call to set_fs from forwarding
- * the wrong address limit to access_ok under speculation.
- */
- spec_bar();
-
- /* On user-mode return, check fs is correct */
- set_thread_flag(TIF_FSCHECK);
-
- /*
- * Enable/disable UAO so that copy_to_user() etc can access
- * kernel memory with the unprivileged instructions.
- */
- if (IS_ENABLED(CONFIG_ARM64_UAO) && fs == KERNEL_DS)
- asm(ALTERNATIVE("nop", SET_PSTATE_UAO(1), ARM64_HAS_UAO));
- else
- asm(ALTERNATIVE("nop", SET_PSTATE_UAO(0), ARM64_HAS_UAO,
- CONFIG_ARM64_UAO));
-}
-
-#define segment_eq(a, b) ((a) == (b))
+static inline int __access_ok(const void __user *ptr, unsigned long size);
/*
* Test whether a block of memory is a valid user space address.
* Returns 1 if the range is valid, 0 otherwise.
*
* This is equivalent to the following test:
- * (u65)addr + (u65)size <= (u65)current->addr_limit + 1
+ * (u65)addr + (u65)size <= (u65)TASK_SIZE_MAX
*/
-static inline unsigned long __range_ok(const void __user *addr, unsigned long size)
+static inline int access_ok(const void __user *addr, unsigned long size)
{
- unsigned long ret, limit = current_thread_info()->addr_limit;
-
/*
* Asynchronous I/O running in a kernel thread does not have the
* TIF_TAGGED_ADDR flag of the process owning the mm, so always untag
@@ -71,35 +46,11 @@ static inline unsigned long __range_ok(const void __user *addr, unsigned long si
(current->flags & PF_KTHREAD || test_thread_flag(TIF_TAGGED_ADDR)))
addr = untagged_addr(addr);
- __chk_user_ptr(addr);
- asm volatile(
- // A + B <= C + 1 for all A,B,C, in four easy steps:
- // 1: X = A + B; X' = X % 2^64
- " adds %0, %3, %2\n"
- // 2: Set C = 0 if X > 2^64, to guarantee X' > C in step 4
- " csel %1, xzr, %1, hi\n"
- // 3: Set X' = ~0 if X >= 2^64. For X == 2^64, this decrements X'
- // to compensate for the carry flag being set in step 4. For
- // X > 2^64, X' merely has to remain nonzero, which it does.
- " csinv %0, %0, xzr, cc\n"
- // 4: For X < 2^64, this gives us X' - C - 1 <= 0, where the -1
- // comes from the carry in being clear. Otherwise, we are
- // testing X' - C == 0, subject to the previous adjustments.
- " sbcs xzr, %0, %1\n"
- " cset %0, ls\n"
- : "=&r" (ret), "+r" (limit) : "Ir" (size), "0" (addr) : "cc");
-
- return ret;
+ return likely(__access_ok(addr, size));
}
+#define access_ok access_ok
-#define access_ok(addr, size) __range_ok(addr, size)
-#define user_addr_max get_fs
-
-#define _ASM_EXTABLE(from, to) \
- " .pushsection __ex_table, \"a\"\n" \
- " .align 3\n" \
- " .long (" #from " - .), (" #to " - .)\n" \
- " .popsection\n"
+#include <asm-generic/access_ok.h>
/*
* User access enabling/disabling.
@@ -112,9 +63,8 @@ static inline void __uaccess_ttbr0_disable(void)
local_irq_save(flags);
ttbr = read_sysreg(ttbr1_el1);
ttbr &= ~TTBR_ASID_MASK;
- /* reserved_ttbr0 placed before swapper_pg_dir */
- write_sysreg(ttbr - RESERVED_TTBR0_SIZE, ttbr0_el1);
- isb();
+ /* reserved_pg_dir placed before swapper_pg_dir */
+ write_sysreg(ttbr - RESERVED_SWAPPER_OFFSET, ttbr0_el1);
/* Set reserved ASID */
write_sysreg(ttbr, ttbr1_el1);
isb();
@@ -138,7 +88,6 @@ static inline void __uaccess_ttbr0_enable(void)
ttbr1 &= ~TTBR_ASID_MASK; /* safety measure */
ttbr1 |= ttbr0 & TTBR_ASID_MASK;
write_sysreg(ttbr1, ttbr1_el1);
- isb();
/* Restore user page table */
write_sysreg(ttbr0, ttbr0_el1);
@@ -185,47 +134,32 @@ static inline void __uaccess_enable_hw_pan(void)
CONFIG_ARM64_PAN));
}
-#define __uaccess_disable(alt) \
-do { \
- if (!uaccess_ttbr0_disable()) \
- asm(ALTERNATIVE("nop", SET_PSTATE_PAN(1), alt, \
- CONFIG_ARM64_PAN)); \
-} while (0)
+static inline void uaccess_disable_privileged(void)
+{
+ mte_disable_tco();
-#define __uaccess_enable(alt) \
-do { \
- if (!uaccess_ttbr0_enable()) \
- asm(ALTERNATIVE("nop", SET_PSTATE_PAN(0), alt, \
- CONFIG_ARM64_PAN)); \
-} while (0)
+ if (uaccess_ttbr0_disable())
+ return;
-static inline void uaccess_disable(void)
-{
- __uaccess_disable(ARM64_HAS_PAN);
+ __uaccess_enable_hw_pan();
}
-static inline void uaccess_enable(void)
+static inline void uaccess_enable_privileged(void)
{
- __uaccess_enable(ARM64_HAS_PAN);
-}
+ mte_enable_tco();
-/*
- * These functions are no-ops when UAO is present.
- */
-static inline void uaccess_disable_not_uao(void)
-{
- __uaccess_disable(ARM64_ALT_PAN_NOT_UAO);
-}
+ if (uaccess_ttbr0_enable())
+ return;
-static inline void uaccess_enable_not_uao(void)
-{
- __uaccess_enable(ARM64_ALT_PAN_NOT_UAO);
+ __uaccess_disable_hw_pan();
}
/*
- * Sanitise a uaccess pointer such that it becomes NULL if above the
- * current addr_limit. In case the pointer is tagged (has the top byte set),
- * untag the pointer before checking.
+ * Sanitize a uaccess pointer such that it cannot reach any kernel address.
+ *
+ * Clearing bit 55 ensures the pointer cannot address any portion of the TTBR1
+ * address range (i.e. any kernel address), and either the pointer falls within
+ * the TTBR0 address range or must cause a fault.
*/
#define uaccess_mask_ptr(ptr) (__typeof__(ptr))__uaccess_mask_ptr(ptr)
static inline void __user *__uaccess_mask_ptr(const void __user *ptr)
@@ -233,14 +167,12 @@ static inline void __user *__uaccess_mask_ptr(const void __user *ptr)
void __user *safe_ptr;
asm volatile(
- " bics xzr, %3, %2\n"
- " csel %0, %1, xzr, eq\n"
- : "=&r" (safe_ptr)
- : "r" (ptr), "r" (current_thread_info()->addr_limit),
- "r" (untagged_addr(ptr))
- : "cc");
-
- csdb();
+ " bic %0, %1, %2\n"
+ : "=r" (safe_ptr)
+ : "r" (ptr),
+ "i" (BIT(55))
+ );
+
return safe_ptr;
}
@@ -252,48 +184,52 @@ static inline void __user *__uaccess_mask_ptr(const void __user *ptr)
* The "__xxx_error" versions set the third argument to -EFAULT if an error
* occurs, and leave it unchanged on success.
*/
-#define __get_user_asm(instr, alt_instr, reg, x, addr, err, feature) \
+#define __get_mem_asm(load, reg, x, addr, err, type) \
asm volatile( \
- "1:"ALTERNATIVE(instr " " reg "1, [%2]\n", \
- alt_instr " " reg "1, [%2]\n", feature) \
+ "1: " load " " reg "1, [%2]\n" \
"2:\n" \
- " .section .fixup, \"ax\"\n" \
- " .align 2\n" \
- "3: mov %w0, %3\n" \
- " mov %1, #0\n" \
- " b 2b\n" \
- " .previous\n" \
- _ASM_EXTABLE(1b, 3b) \
- : "+r" (err), "=&r" (x) \
- : "r" (addr), "i" (-EFAULT))
+ _ASM_EXTABLE_##type##ACCESS_ERR_ZERO(1b, 2b, %w0, %w1) \
+ : "+r" (err), "=r" (x) \
+ : "r" (addr))
+
+#define __raw_get_mem(ldr, x, ptr, err, type) \
+do { \
+ unsigned long __gu_val; \
+ switch (sizeof(*(ptr))) { \
+ case 1: \
+ __get_mem_asm(ldr "b", "%w", __gu_val, (ptr), (err), type); \
+ break; \
+ case 2: \
+ __get_mem_asm(ldr "h", "%w", __gu_val, (ptr), (err), type); \
+ break; \
+ case 4: \
+ __get_mem_asm(ldr, "%w", __gu_val, (ptr), (err), type); \
+ break; \
+ case 8: \
+ __get_mem_asm(ldr, "%x", __gu_val, (ptr), (err), type); \
+ break; \
+ default: \
+ BUILD_BUG(); \
+ } \
+ (x) = (__force __typeof__(*(ptr)))__gu_val; \
+} while (0)
+/*
+ * We must not call into the scheduler between uaccess_ttbr0_enable() and
+ * uaccess_ttbr0_disable(). As `x` and `ptr` could contain blocking functions,
+ * we must evaluate these outside of the critical section.
+ */
#define __raw_get_user(x, ptr, err) \
do { \
- unsigned long __gu_val; \
+ __typeof__(*(ptr)) __user *__rgu_ptr = (ptr); \
+ __typeof__(x) __rgu_val; \
__chk_user_ptr(ptr); \
- uaccess_enable_not_uao(); \
- switch (sizeof(*(ptr))) { \
- case 1: \
- __get_user_asm("ldrb", "ldtrb", "%w", __gu_val, (ptr), \
- (err), ARM64_HAS_UAO); \
- break; \
- case 2: \
- __get_user_asm("ldrh", "ldtrh", "%w", __gu_val, (ptr), \
- (err), ARM64_HAS_UAO); \
- break; \
- case 4: \
- __get_user_asm("ldr", "ldtr", "%w", __gu_val, (ptr), \
- (err), ARM64_HAS_UAO); \
- break; \
- case 8: \
- __get_user_asm("ldr", "ldtr", "%x", __gu_val, (ptr), \
- (err), ARM64_HAS_UAO); \
- break; \
- default: \
- BUILD_BUG(); \
- } \
- uaccess_disable_not_uao(); \
- (x) = (__force __typeof__(*(ptr)))__gu_val; \
+ \
+ uaccess_ttbr0_enable(); \
+ __raw_get_mem("ldtr", __rgu_val, __rgu_ptr, err, U); \
+ uaccess_ttbr0_disable(); \
+ \
+ (x) = __rgu_val; \
} while (0)
#define __get_user_error(x, ptr, err) \
@@ -304,7 +240,7 @@ do { \
__p = uaccess_mask_ptr(__p); \
__raw_get_user((x), __p, (err)); \
} else { \
- (x) = 0; (err) = -EFAULT; \
+ (x) = (__force __typeof__(x))0; (err) = -EFAULT; \
} \
} while (0)
@@ -317,46 +253,69 @@ do { \
#define get_user __get_user
-#define __put_user_asm(instr, alt_instr, reg, x, addr, err, feature) \
+/*
+ * We must not call into the scheduler between __mte_enable_tco_async() and
+ * __mte_disable_tco_async(). As `dst` and `src` may contain blocking
+ * functions, we must evaluate these outside of the critical section.
+ */
+#define __get_kernel_nofault(dst, src, type, err_label) \
+do { \
+ __typeof__(dst) __gkn_dst = (dst); \
+ __typeof__(src) __gkn_src = (src); \
+ int __gkn_err = 0; \
+ \
+ __mte_enable_tco_async(); \
+ __raw_get_mem("ldr", *((type *)(__gkn_dst)), \
+ (__force type *)(__gkn_src), __gkn_err, K); \
+ __mte_disable_tco_async(); \
+ \
+ if (unlikely(__gkn_err)) \
+ goto err_label; \
+} while (0)
+
+#define __put_mem_asm(store, reg, x, addr, err, type) \
asm volatile( \
- "1:"ALTERNATIVE(instr " " reg "1, [%2]\n", \
- alt_instr " " reg "1, [%2]\n", feature) \
+ "1: " store " " reg "1, [%2]\n" \
"2:\n" \
- " .section .fixup,\"ax\"\n" \
- " .align 2\n" \
- "3: mov %w0, %3\n" \
- " b 2b\n" \
- " .previous\n" \
- _ASM_EXTABLE(1b, 3b) \
+ _ASM_EXTABLE_##type##ACCESS_ERR(1b, 2b, %w0) \
: "+r" (err) \
- : "r" (x), "r" (addr), "i" (-EFAULT))
+ : "rZ" (x), "r" (addr))
+
+#define __raw_put_mem(str, x, ptr, err, type) \
+do { \
+ __typeof__(*(ptr)) __pu_val = (x); \
+ switch (sizeof(*(ptr))) { \
+ case 1: \
+ __put_mem_asm(str "b", "%w", __pu_val, (ptr), (err), type); \
+ break; \
+ case 2: \
+ __put_mem_asm(str "h", "%w", __pu_val, (ptr), (err), type); \
+ break; \
+ case 4: \
+ __put_mem_asm(str, "%w", __pu_val, (ptr), (err), type); \
+ break; \
+ case 8: \
+ __put_mem_asm(str, "%x", __pu_val, (ptr), (err), type); \
+ break; \
+ default: \
+ BUILD_BUG(); \
+ } \
+} while (0)
+/*
+ * We must not call into the scheduler between uaccess_ttbr0_enable() and
+ * uaccess_ttbr0_disable(). As `x` and `ptr` could contain blocking functions,
+ * we must evaluate these outside of the critical section.
+ */
#define __raw_put_user(x, ptr, err) \
do { \
- __typeof__(*(ptr)) __pu_val = (x); \
- __chk_user_ptr(ptr); \
- uaccess_enable_not_uao(); \
- switch (sizeof(*(ptr))) { \
- case 1: \
- __put_user_asm("strb", "sttrb", "%w", __pu_val, (ptr), \
- (err), ARM64_HAS_UAO); \
- break; \
- case 2: \
- __put_user_asm("strh", "sttrh", "%w", __pu_val, (ptr), \
- (err), ARM64_HAS_UAO); \
- break; \
- case 4: \
- __put_user_asm("str", "sttr", "%w", __pu_val, (ptr), \
- (err), ARM64_HAS_UAO); \
- break; \
- case 8: \
- __put_user_asm("str", "sttr", "%x", __pu_val, (ptr), \
- (err), ARM64_HAS_UAO); \
- break; \
- default: \
- BUILD_BUG(); \
- } \
- uaccess_disable_not_uao(); \
+ __typeof__(*(ptr)) __user *__rpu_ptr = (ptr); \
+ __typeof__(*(ptr)) __rpu_val = (x); \
+ __chk_user_ptr(__rpu_ptr); \
+ \
+ uaccess_ttbr0_enable(); \
+ __raw_put_mem("sttr", __rpu_val, __rpu_ptr, err, U); \
+ uaccess_ttbr0_disable(); \
} while (0)
#define __put_user_error(x, ptr, err) \
@@ -380,14 +339,34 @@ do { \
#define put_user __put_user
+/*
+ * We must not call into the scheduler between __mte_enable_tco_async() and
+ * __mte_disable_tco_async(). As `dst` and `src` may contain blocking
+ * functions, we must evaluate these outside of the critical section.
+ */
+#define __put_kernel_nofault(dst, src, type, err_label) \
+do { \
+ __typeof__(dst) __pkn_dst = (dst); \
+ __typeof__(src) __pkn_src = (src); \
+ int __pkn_err = 0; \
+ \
+ __mte_enable_tco_async(); \
+ __raw_put_mem("str", *((type *)(__pkn_src)), \
+ (__force type *)(__pkn_dst), __pkn_err, K); \
+ __mte_disable_tco_async(); \
+ \
+ if (unlikely(__pkn_err)) \
+ goto err_label; \
+} while(0)
+
extern unsigned long __must_check __arch_copy_from_user(void *to, const void __user *from, unsigned long n);
#define raw_copy_from_user(to, from, n) \
({ \
unsigned long __acfu_ret; \
- uaccess_enable_not_uao(); \
+ uaccess_ttbr0_enable(); \
__acfu_ret = __arch_copy_from_user((to), \
__uaccess_mask_ptr(from), (n)); \
- uaccess_disable_not_uao(); \
+ uaccess_ttbr0_disable(); \
__acfu_ret; \
})
@@ -395,24 +374,13 @@ extern unsigned long __must_check __arch_copy_to_user(void __user *to, const voi
#define raw_copy_to_user(to, from, n) \
({ \
unsigned long __actu_ret; \
- uaccess_enable_not_uao(); \
+ uaccess_ttbr0_enable(); \
__actu_ret = __arch_copy_to_user(__uaccess_mask_ptr(to), \
(from), (n)); \
- uaccess_disable_not_uao(); \
+ uaccess_ttbr0_disable(); \
__actu_ret; \
})
-extern unsigned long __must_check __arch_copy_in_user(void __user *to, const void __user *from, unsigned long n);
-#define raw_copy_in_user(to, from, n) \
-({ \
- unsigned long __aciu_ret; \
- uaccess_enable_not_uao(); \
- __aciu_ret = __arch_copy_in_user(__uaccess_mask_ptr(to), \
- __uaccess_mask_ptr(from), (n)); \
- uaccess_disable_not_uao(); \
- __aciu_ret; \
-})
-
#define INLINE_COPY_TO_USER
#define INLINE_COPY_FROM_USER
@@ -420,9 +388,9 @@ extern unsigned long __must_check __arch_clear_user(void __user *to, unsigned lo
static inline unsigned long __must_check __clear_user(void __user *to, unsigned long n)
{
if (access_ok(to, n)) {
- uaccess_enable_not_uao();
+ uaccess_ttbr0_enable();
n = __arch_clear_user(__uaccess_mask_ptr(to), n);
- uaccess_disable_not_uao();
+ uaccess_ttbr0_disable();
}
return n;
}
@@ -433,8 +401,6 @@ extern long strncpy_from_user(char *dest, const char __user *src, long count);
extern __must_check long strnlen_user(const char __user *str, long n);
#ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
-struct page;
-void memcpy_page_flushcache(char *to, struct page *page, size_t offset, size_t len);
extern unsigned long __must_check __copy_user_flushcache(void *to, const void __user *from, unsigned long n);
static inline int __copy_from_user_flushcache(void *dst, const void __user *src, unsigned size)
@@ -444,4 +410,19 @@ static inline int __copy_from_user_flushcache(void *dst, const void __user *src,
}
#endif
+#ifdef CONFIG_ARCH_HAS_SUBPAGE_FAULTS
+
+/*
+ * Return 0 on success, the number of bytes not probed otherwise.
+ */
+static inline size_t probe_subpage_writeable(const char __user *uaddr,
+ size_t size)
+{
+ if (!system_supports_mte())
+ return 0;
+ return mte_probe_user_range(uaddr, size);
+}
+
+#endif /* CONFIG_ARCH_HAS_SUBPAGE_FAULTS */
+
#endif /* __ASM_UACCESS_H */
diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h
index 2629a68b8724..491b2b9bd553 100644
--- a/arch/arm64/include/asm/unistd.h
+++ b/arch/arm64/include/asm/unistd.h
@@ -3,6 +3,7 @@
* Copyright (C) 2012 ARM Ltd.
*/
#ifdef CONFIG_COMPAT
+#define __ARCH_WANT_COMPAT_STAT
#define __ARCH_WANT_COMPAT_STAT64
#define __ARCH_WANT_SYS_GETHOSTNAME
#define __ARCH_WANT_SYS_PAUSE
@@ -25,8 +26,8 @@
#define __NR_compat_gettimeofday 78
#define __NR_compat_sigreturn 119
#define __NR_compat_rt_sigreturn 173
-#define __NR_compat_clock_getres 247
#define __NR_compat_clock_gettime 263
+#define __NR_compat_clock_getres 264
#define __NR_compat_clock_gettime64 403
#define __NR_compat_clock_getres_time64 406
@@ -38,11 +39,10 @@
#define __ARM_NR_compat_set_tls (__ARM_NR_COMPAT_BASE + 5)
#define __ARM_NR_COMPAT_END (__ARM_NR_COMPAT_BASE + 0x800)
-#define __NR_compat_syscalls 436
+#define __NR_compat_syscalls 462
#endif
#define __ARCH_WANT_SYS_CLONE
-#define __ARCH_WANT_SYS_CLONE3
#ifndef __COMPAT_SYSCALL_NR
#include <uapi/asm/unistd.h>
diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h
index 94ab29cf4f00..7118282d1c79 100644
--- a/arch/arm64/include/asm/unistd32.h
+++ b/arch/arm64/include/asm/unistd32.h
@@ -53,7 +53,7 @@ __SYSCALL(__NR_lseek, compat_sys_lseek)
#define __NR_getpid 20
__SYSCALL(__NR_getpid, sys_getpid)
#define __NR_mount 21
-__SYSCALL(__NR_mount, compat_sys_mount)
+__SYSCALL(__NR_mount, sys_mount)
/* 22 was sys_umount */
__SYSCALL(22, sys_ni_syscall)
#define __NR_setuid 23
@@ -279,7 +279,7 @@ __SYSCALL(__NR_getpgid, sys_getpgid)
#define __NR_fchdir 133
__SYSCALL(__NR_fchdir, sys_fchdir)
#define __NR_bdflush 134
-__SYSCALL(__NR_bdflush, sys_bdflush)
+__SYSCALL(__NR_bdflush, sys_ni_syscall)
#define __NR_sysfs 135
__SYSCALL(__NR_sysfs, sys_sysfs)
#define __NR_personality 136
@@ -301,15 +301,15 @@ __SYSCALL(__NR_flock, sys_flock)
#define __NR_msync 144
__SYSCALL(__NR_msync, sys_msync)
#define __NR_readv 145
-__SYSCALL(__NR_readv, compat_sys_readv)
+__SYSCALL(__NR_readv, sys_readv)
#define __NR_writev 146
-__SYSCALL(__NR_writev, compat_sys_writev)
+__SYSCALL(__NR_writev, sys_writev)
#define __NR_getsid 147
__SYSCALL(__NR_getsid, sys_getsid)
#define __NR_fdatasync 148
__SYSCALL(__NR_fdatasync, sys_fdatasync)
-#define __NR__sysctl 149
-__SYSCALL(__NR__sysctl, compat_sys_sysctl)
+ /* 149 was sys_sysctl */
+__SYSCALL(149, sys_ni_syscall)
#define __NR_mlock 150
__SYSCALL(__NR_mlock, sys_mlock)
#define __NR_munlock 151
@@ -508,8 +508,8 @@ __SYSCALL(__NR_io_submit, compat_sys_io_submit)
__SYSCALL(__NR_io_cancel, sys_io_cancel)
#define __NR_exit_group 248
__SYSCALL(__NR_exit_group, sys_exit_group)
-#define __NR_lookup_dcookie 249
-__SYSCALL(__NR_lookup_dcookie, compat_sys_lookup_dcookie)
+ /* 249 was lookup_dcookie */
+__SYSCALL(249, sys_ni_syscall)
#define __NR_epoll_create 250
__SYSCALL(__NR_epoll_create, sys_epoll_create)
#define __NR_epoll_ctl 251
@@ -599,9 +599,9 @@ __SYSCALL(__NR_recvfrom, compat_sys_recvfrom)
#define __NR_shutdown 293
__SYSCALL(__NR_shutdown, sys_shutdown)
#define __NR_setsockopt 294
-__SYSCALL(__NR_setsockopt, compat_sys_setsockopt)
+__SYSCALL(__NR_setsockopt, sys_setsockopt)
#define __NR_getsockopt 295
-__SYSCALL(__NR_getsockopt, compat_sys_getsockopt)
+__SYSCALL(__NR_getsockopt, sys_getsockopt)
#define __NR_sendmsg 296
__SYSCALL(__NR_sendmsg, compat_sys_sendmsg)
#define __NR_recvmsg 297
@@ -649,11 +649,11 @@ __SYSCALL(__NR_inotify_add_watch, sys_inotify_add_watch)
#define __NR_inotify_rm_watch 318
__SYSCALL(__NR_inotify_rm_watch, sys_inotify_rm_watch)
#define __NR_mbind 319
-__SYSCALL(__NR_mbind, compat_sys_mbind)
+__SYSCALL(__NR_mbind, sys_mbind)
#define __NR_get_mempolicy 320
-__SYSCALL(__NR_get_mempolicy, compat_sys_get_mempolicy)
+__SYSCALL(__NR_get_mempolicy, sys_get_mempolicy)
#define __NR_set_mempolicy 321
-__SYSCALL(__NR_set_mempolicy, compat_sys_set_mempolicy)
+__SYSCALL(__NR_set_mempolicy, sys_set_mempolicy)
#define __NR_openat 322
__SYSCALL(__NR_openat, compat_sys_openat)
#define __NR_mkdirat 323
@@ -697,9 +697,9 @@ __SYSCALL(__NR_sync_file_range2, compat_sys_aarch32_sync_file_range2)
#define __NR_tee 342
__SYSCALL(__NR_tee, sys_tee)
#define __NR_vmsplice 343
-__SYSCALL(__NR_vmsplice, compat_sys_vmsplice)
+__SYSCALL(__NR_vmsplice, sys_vmsplice)
#define __NR_move_pages 344
-__SYSCALL(__NR_move_pages, compat_sys_move_pages)
+__SYSCALL(__NR_move_pages, sys_move_pages)
#define __NR_getcpu 345
__SYSCALL(__NR_getcpu, sys_getcpu)
#define __NR_epoll_pwait 346
@@ -763,9 +763,9 @@ __SYSCALL(__NR_sendmmsg, compat_sys_sendmmsg)
#define __NR_setns 375
__SYSCALL(__NR_setns, sys_setns)
#define __NR_process_vm_readv 376
-__SYSCALL(__NR_process_vm_readv, compat_sys_process_vm_readv)
+__SYSCALL(__NR_process_vm_readv, sys_process_vm_readv)
#define __NR_process_vm_writev 377
-__SYSCALL(__NR_process_vm_writev, compat_sys_process_vm_writev)
+__SYSCALL(__NR_process_vm_writev, sys_process_vm_writev)
#define __NR_kcmp 378
__SYSCALL(__NR_kcmp, sys_kcmp)
#define __NR_finit_module 379
@@ -811,7 +811,7 @@ __SYSCALL(__NR_rseq, sys_rseq)
#define __NR_io_pgetevents 399
__SYSCALL(__NR_io_pgetevents, compat_sys_io_pgetevents)
#define __NR_migrate_pages 400
-__SYSCALL(__NR_migrate_pages, compat_sys_migrate_pages)
+__SYSCALL(__NR_migrate_pages, sys_migrate_pages)
#define __NR_kexec_file_load 401
__SYSCALL(__NR_kexec_file_load, sys_kexec_file_load)
/* 402 is unused */
@@ -879,6 +879,56 @@ __SYSCALL(__NR_fspick, sys_fspick)
__SYSCALL(__NR_pidfd_open, sys_pidfd_open)
#define __NR_clone3 435
__SYSCALL(__NR_clone3, sys_clone3)
+#define __NR_close_range 436
+__SYSCALL(__NR_close_range, sys_close_range)
+#define __NR_openat2 437
+__SYSCALL(__NR_openat2, sys_openat2)
+#define __NR_pidfd_getfd 438
+__SYSCALL(__NR_pidfd_getfd, sys_pidfd_getfd)
+#define __NR_faccessat2 439
+__SYSCALL(__NR_faccessat2, sys_faccessat2)
+#define __NR_process_madvise 440
+__SYSCALL(__NR_process_madvise, sys_process_madvise)
+#define __NR_epoll_pwait2 441
+__SYSCALL(__NR_epoll_pwait2, compat_sys_epoll_pwait2)
+#define __NR_mount_setattr 442
+__SYSCALL(__NR_mount_setattr, sys_mount_setattr)
+#define __NR_quotactl_fd 443
+__SYSCALL(__NR_quotactl_fd, sys_quotactl_fd)
+#define __NR_landlock_create_ruleset 444
+__SYSCALL(__NR_landlock_create_ruleset, sys_landlock_create_ruleset)
+#define __NR_landlock_add_rule 445
+__SYSCALL(__NR_landlock_add_rule, sys_landlock_add_rule)
+#define __NR_landlock_restrict_self 446
+__SYSCALL(__NR_landlock_restrict_self, sys_landlock_restrict_self)
+#define __NR_process_mrelease 448
+__SYSCALL(__NR_process_mrelease, sys_process_mrelease)
+#define __NR_futex_waitv 449
+__SYSCALL(__NR_futex_waitv, sys_futex_waitv)
+#define __NR_set_mempolicy_home_node 450
+__SYSCALL(__NR_set_mempolicy_home_node, sys_set_mempolicy_home_node)
+#define __NR_cachestat 451
+__SYSCALL(__NR_cachestat, sys_cachestat)
+#define __NR_fchmodat2 452
+__SYSCALL(__NR_fchmodat2, sys_fchmodat2)
+#define __NR_map_shadow_stack 453
+__SYSCALL(__NR_map_shadow_stack, sys_map_shadow_stack)
+#define __NR_futex_wake 454
+__SYSCALL(__NR_futex_wake, sys_futex_wake)
+#define __NR_futex_wait 455
+__SYSCALL(__NR_futex_wait, sys_futex_wait)
+#define __NR_futex_requeue 456
+__SYSCALL(__NR_futex_requeue, sys_futex_requeue)
+#define __NR_statmount 457
+__SYSCALL(__NR_statmount, sys_statmount)
+#define __NR_listmount 458
+__SYSCALL(__NR_listmount, sys_listmount)
+#define __NR_lsm_get_self_attr 459
+__SYSCALL(__NR_lsm_get_self_attr, sys_lsm_get_self_attr)
+#define __NR_lsm_set_self_attr 460
+__SYSCALL(__NR_lsm_set_self_attr, sys_lsm_set_self_attr)
+#define __NR_lsm_list_modules 461
+__SYSCALL(__NR_lsm_list_modules, sys_lsm_list_modules)
/*
* Please add new compat syscalls above this comment and update
diff --git a/arch/arm64/include/asm/uprobes.h b/arch/arm64/include/asm/uprobes.h
index 315eef654e39..2b09495499c6 100644
--- a/arch/arm64/include/asm/uprobes.h
+++ b/arch/arm64/include/asm/uprobes.h
@@ -12,11 +12,11 @@
#define MAX_UINSN_BYTES AARCH64_INSN_SIZE
-#define UPROBE_SWBP_INSN BRK64_OPCODE_UPROBES
+#define UPROBE_SWBP_INSN cpu_to_le32(BRK64_OPCODE_UPROBES)
#define UPROBE_SWBP_INSN_SIZE AARCH64_INSN_SIZE
#define UPROBE_XOL_SLOT_BYTES MAX_UINSN_BYTES
-typedef u32 uprobe_opcode_t;
+typedef __le32 uprobe_opcode_t;
struct arch_uprobe_task {
};
diff --git a/arch/arm64/include/asm/vdso.h b/arch/arm64/include/asm/vdso.h
index 07468428fd29..b4ae32109932 100644
--- a/arch/arm64/include/asm/vdso.h
+++ b/arch/arm64/include/asm/vdso.h
@@ -12,6 +12,8 @@
*/
#define VDSO_LBASE 0x0
+#define __VVAR_PAGES 2
+
#ifndef __ASSEMBLY__
#include <generated/vdso-offsets.h>
@@ -24,6 +26,9 @@
(void *)(vdso_offset_##name - VDSO_LBASE + (unsigned long)(base)); \
})
+extern char vdso_start[], vdso_end[];
+extern char vdso32_start[], vdso32_end[];
+
#endif /* !__ASSEMBLY__ */
#endif /* __ASM_VDSO_H */
diff --git a/arch/arm64/include/asm/vdso/clocksource.h b/arch/arm64/include/asm/vdso/clocksource.h
new file mode 100644
index 000000000000..b054d9febfb5
--- /dev/null
+++ b/arch/arm64/include/asm/vdso/clocksource.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_VDSOCLOCKSOURCE_H
+#define __ASM_VDSOCLOCKSOURCE_H
+
+#define VDSO_ARCH_CLOCKMODES \
+ /* vdso clocksource for both 32 and 64bit tasks */ \
+ VDSO_CLOCKMODE_ARCHTIMER, \
+ /* vdso clocksource for 64bit tasks only */ \
+ VDSO_CLOCKMODE_ARCHTIMER_NOCOMPAT
+
+#endif
diff --git a/arch/arm64/include/asm/vdso/compat_barrier.h b/arch/arm64/include/asm/vdso/compat_barrier.h
index 3fd8fd6d8fc2..3ac35f4a667c 100644
--- a/arch/arm64/include/asm/vdso/compat_barrier.h
+++ b/arch/arm64/include/asm/vdso/compat_barrier.h
@@ -20,16 +20,9 @@
#define dmb(option) __asm__ __volatile__ ("dmb " #option : : : "memory")
-#if __LINUX_ARM_ARCH__ >= 8 && defined(CONFIG_AS_DMB_ISHLD)
#define aarch32_smp_mb() dmb(ish)
#define aarch32_smp_rmb() dmb(ishld)
#define aarch32_smp_wmb() dmb(ishst)
-#else
-#define aarch32_smp_mb() dmb(ish)
-#define aarch32_smp_rmb() aarch32_smp_mb()
-#define aarch32_smp_wmb() dmb(ishst)
-#endif
-
#undef smp_mb
#undef smp_rmb
diff --git a/arch/arm64/include/asm/vdso/compat_gettimeofday.h b/arch/arm64/include/asm/vdso/compat_gettimeofday.h
index c50ee1b7d5cd..ecb6fd4c3c64 100644
--- a/arch/arm64/include/asm/vdso/compat_gettimeofday.h
+++ b/arch/arm64/include/asm/vdso/compat_gettimeofday.h
@@ -7,16 +7,15 @@
#ifndef __ASSEMBLY__
+#include <asm/barrier.h>
#include <asm/unistd.h>
-#include <uapi/linux/time.h>
+#include <asm/errno.h>
#include <asm/vdso/compat_barrier.h>
-#define __VDSO_USE_SYSCALL ULLONG_MAX
-
#define VDSO_HAS_CLOCK_GETRES 1
-#define VDSO_HAS_32BIT_FALLBACK 1
+#define BUILD_VDSO32 1
static __always_inline
int gettimeofday_fallback(struct __kernel_old_timeval *_tv,
@@ -78,10 +77,6 @@ int clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts)
register long ret asm ("r0");
register long nr asm("r7") = __NR_compat_clock_getres_time64;
- /* The checks below are required for ABI consistency with arm */
- if ((_clkid >= MAX_CLOCKS) && (_ts == NULL))
- return -EINVAL;
-
asm volatile(
" swi #0\n"
: "=r" (ret)
@@ -99,10 +94,6 @@ int clock_getres32_fallback(clockid_t _clkid, struct old_timespec32 *_ts)
register long ret asm ("r0");
register long nr asm("r7") = __NR_compat_clock_getres;
- /* The checks below are required for ABI consistency with arm */
- if ((_clkid >= MAX_CLOCKS) && (_ts == NULL))
- return -EINVAL;
-
asm volatile(
" swi #0\n"
: "=r" (ret)
@@ -112,16 +103,18 @@ int clock_getres32_fallback(clockid_t _clkid, struct old_timespec32 *_ts)
return ret;
}
-static __always_inline u64 __arch_get_hw_counter(s32 clock_mode)
+static __always_inline u64 __arch_get_hw_counter(s32 clock_mode,
+ const struct vdso_data *vd)
{
u64 res;
/*
- * clock_mode == 0 implies that vDSO are enabled otherwise
- * fallback on syscall.
+ * Core checks for mode already, so this raced against a concurrent
+ * update. Return something. Core will do another round and then
+ * see the mode change and fallback to the syscall.
*/
- if (clock_mode)
- return __VDSO_USE_SYSCALL;
+ if (clock_mode != VDSO_CLOCKMODE_ARCHTIMER)
+ return 0;
/*
* This isb() is required to prevent that the counter value
@@ -161,6 +154,25 @@ static __always_inline const struct vdso_data *__arch_get_vdso_data(void)
return ret;
}
+#ifdef CONFIG_TIME_NS
+static __always_inline
+const struct vdso_data *__arch_get_timens_vdso_data(const struct vdso_data *vd)
+{
+ const struct vdso_data *ret;
+
+ /* See __arch_get_vdso_data(). */
+ asm volatile("mov %0, %1" : "=r"(ret) : "r"(_timens_data));
+
+ return ret;
+}
+#endif
+
+static inline bool vdso_clocksource_ok(const struct vdso_data *vd)
+{
+ return vd->clock_mode == VDSO_CLOCKMODE_ARCHTIMER;
+}
+#define vdso_clocksource_ok vdso_clocksource_ok
+
#endif /* !__ASSEMBLY__ */
#endif /* __ASM_VDSO_GETTIMEOFDAY_H */
diff --git a/arch/arm64/include/asm/vdso/gettimeofday.h b/arch/arm64/include/asm/vdso/gettimeofday.h
index b08f476b72b4..764d13e2916c 100644
--- a/arch/arm64/include/asm/vdso/gettimeofday.h
+++ b/arch/arm64/include/asm/vdso/gettimeofday.h
@@ -7,10 +7,10 @@
#ifndef __ASSEMBLY__
+#include <asm/alternative.h>
+#include <asm/barrier.h>
#include <asm/unistd.h>
-#include <uapi/linux/time.h>
-
-#define __VDSO_USE_SYSCALL ULLONG_MAX
+#include <asm/sysreg.h>
#define VDSO_HAS_CLOCK_GETRES 1
@@ -66,28 +66,35 @@ int clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts)
return ret;
}
-static __always_inline u64 __arch_get_hw_counter(s32 clock_mode)
+static __always_inline u64 __arch_get_hw_counter(s32 clock_mode,
+ const struct vdso_data *vd)
{
u64 res;
/*
- * clock_mode == 0 implies that vDSO are enabled otherwise
- * fallback on syscall.
+ * Core checks for mode already, so this raced against a concurrent
+ * update. Return something. Core will do another round and then
+ * see the mode change and fallback to the syscall.
*/
- if (clock_mode)
- return __VDSO_USE_SYSCALL;
+ if (clock_mode == VDSO_CLOCKMODE_NONE)
+ return 0;
/*
- * This isb() is required to prevent that the counter value
+ * If FEAT_ECV is available, use the self-synchronizing counter.
+ * Otherwise the isb is required to prevent that the counter value
* is speculated.
- */
- isb();
- asm volatile("mrs %0, cntvct_el0" : "=r" (res) :: "memory");
- /*
- * This isb() is required to prevent that the seq lock is
- * speculated.#
- */
- isb();
+ */
+ asm volatile(
+ ALTERNATIVE("isb\n"
+ "mrs %0, cntvct_el0",
+ "nop\n"
+ __mrs_s("%0", SYS_CNTVCTSS_EL0),
+ ARM64_HAS_ECV)
+ : "=r" (res)
+ :
+ : "memory");
+
+ arch_counter_enforce_ordering(res);
return res;
}
@@ -98,6 +105,14 @@ const struct vdso_data *__arch_get_vdso_data(void)
return _vdso_data;
}
+#ifdef CONFIG_TIME_NS
+static __always_inline
+const struct vdso_data *__arch_get_timens_vdso_data(const struct vdso_data *vd)
+{
+ return _timens_data;
+}
+#endif
+
#endif /* !__ASSEMBLY__ */
#endif /* __ASM_VDSO_GETTIMEOFDAY_H */
diff --git a/arch/arm64/include/asm/vdso/processor.h b/arch/arm64/include/asm/vdso/processor.h
new file mode 100644
index 000000000000..ff830b766ad2
--- /dev/null
+++ b/arch/arm64/include/asm/vdso/processor.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2020 ARM Ltd.
+ */
+#ifndef __ASM_VDSO_PROCESSOR_H
+#define __ASM_VDSO_PROCESSOR_H
+
+#ifndef __ASSEMBLY__
+
+static inline void cpu_relax(void)
+{
+ asm volatile("yield" ::: "memory");
+}
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* __ASM_VDSO_PROCESSOR_H */
diff --git a/arch/arm64/include/asm/vdso/vsyscall.h b/arch/arm64/include/asm/vdso/vsyscall.h
index 0c20a7c1bee5..f94b1457c117 100644
--- a/arch/arm64/include/asm/vdso/vsyscall.h
+++ b/arch/arm64/include/asm/vdso/vsyscall.h
@@ -22,15 +22,6 @@ struct vdso_data *__arm64_get_k_vdso_data(void)
#define __arch_get_k_vdso_data __arm64_get_k_vdso_data
static __always_inline
-int __arm64_get_clock_mode(struct timekeeper *tk)
-{
- u32 use_syscall = !tk->tkr_mono.clock->archdata.vdso_direct;
-
- return use_syscall;
-}
-#define __arch_get_clock_mode __arm64_get_clock_mode
-
-static __always_inline
void __arm64_update_vsyscall(struct vdso_data *vdata, struct timekeeper *tk)
{
vdata[CS_HRES_COARSE].mask = VDSO_PRECISION_MASK;
diff --git a/arch/arm64/include/asm/vectors.h b/arch/arm64/include/asm/vectors.h
new file mode 100644
index 000000000000..b815d8f2c0dc
--- /dev/null
+++ b/arch/arm64/include/asm/vectors.h
@@ -0,0 +1,73 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2022 ARM Ltd.
+ */
+#ifndef __ASM_VECTORS_H
+#define __ASM_VECTORS_H
+
+#include <linux/bug.h>
+#include <linux/percpu.h>
+
+#include <asm/fixmap.h>
+
+extern char vectors[];
+extern char tramp_vectors[];
+extern char __bp_harden_el1_vectors[];
+
+/*
+ * Note: the order of this enum corresponds to two arrays in entry.S:
+ * tramp_vecs and __bp_harden_el1_vectors. By default the canonical
+ * 'full fat' vectors are used directly.
+ */
+enum arm64_bp_harden_el1_vectors {
+#ifdef CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY
+ /*
+ * Perform the BHB loop mitigation, before branching to the canonical
+ * vectors.
+ */
+ EL1_VECTOR_BHB_LOOP,
+
+ /*
+ * Make the SMC call for firmware mitigation, before branching to the
+ * canonical vectors.
+ */
+ EL1_VECTOR_BHB_FW,
+
+ /*
+ * Use the ClearBHB instruction, before branching to the canonical
+ * vectors.
+ */
+ EL1_VECTOR_BHB_CLEAR_INSN,
+#endif /* CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY */
+
+ /*
+ * Remap the kernel before branching to the canonical vectors.
+ */
+ EL1_VECTOR_KPTI,
+};
+
+#ifndef CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY
+#define EL1_VECTOR_BHB_LOOP -1
+#define EL1_VECTOR_BHB_FW -1
+#define EL1_VECTOR_BHB_CLEAR_INSN -1
+#endif /* !CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY */
+
+/* The vectors to use on return from EL0. e.g. to remap the kernel */
+DECLARE_PER_CPU_READ_MOSTLY(const char *, this_cpu_vector);
+
+#ifndef CONFIG_UNMAP_KERNEL_AT_EL0
+#define TRAMP_VALIAS 0ul
+#endif
+
+static inline const char *
+arm64_get_bp_hardening_vector(enum arm64_bp_harden_el1_vectors slot)
+{
+ if (cpus_have_cap(ARM64_UNMAP_KERNEL_AT_EL0))
+ return (char *)(TRAMP_VALIAS + SZ_2K * slot);
+
+ WARN_ON_ONCE(slot == EL1_VECTOR_KPTI);
+
+ return __bp_harden_el1_vectors + SZ_2K * slot;
+}
+
+#endif /* __ASM_VECTORS_H */
diff --git a/arch/arm64/include/asm/vermagic.h b/arch/arm64/include/asm/vermagic.h
new file mode 100644
index 000000000000..a1eec6a000f1
--- /dev/null
+++ b/arch/arm64/include/asm/vermagic.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2012 ARM Ltd.
+ */
+#ifndef _ASM_VERMAGIC_H
+#define _ASM_VERMAGIC_H
+
+#define MODULE_ARCH_VERMAGIC "aarch64"
+
+#endif /* _ASM_VERMAGIC_H */
diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h
index 0958ed6191aa..261d6e9df2e1 100644
--- a/arch/arm64/include/asm/virt.h
+++ b/arch/arm64/include/asm/virt.h
@@ -35,8 +35,13 @@
*/
#define HVC_RESET_VECTORS 2
+/*
+ * HVC_FINALISE_EL2 - Upgrade the CPU from EL1 to EL2, if possible
+ */
+#define HVC_FINALISE_EL2 3
+
/* Max number of HYP stub hypercalls */
-#define HVC_STUB_HCALL_NR 3
+#define HVC_STUB_HCALL_NR 4
/* Error returned when an invalid stub number is passed into x0 */
#define HVC_STUB_ERR 0xbadca11
@@ -44,6 +49,13 @@
#define BOOT_CPU_MODE_EL1 (0xe11)
#define BOOT_CPU_MODE_EL2 (0xe12)
+/*
+ * Flags returned together with the boot mode, but not preserved in
+ * __boot_cpu_mode. Used by the idreg override code to work out the
+ * boot state.
+ */
+#define BOOT_CPU_FLAG_E2H BIT_ULL(32)
+
#ifndef __ASSEMBLY__
#include <asm/ptrace.h>
@@ -62,12 +74,25 @@
*/
extern u32 __boot_cpu_mode[2];
+#define ARM64_VECTOR_TABLE_LEN SZ_2K
+
void __hyp_set_vectors(phys_addr_t phys_vector_base);
void __hyp_reset_vectors(void);
+bool is_kvm_arm_initialised(void);
+
+DECLARE_STATIC_KEY_FALSE(kvm_protected_mode_initialized);
/* Reports the availability of HYP mode */
static inline bool is_hyp_mode_available(void)
{
+ /*
+ * If KVM protected mode is initialized, all CPUs must have been booted
+ * in EL2. Avoid checking __boot_cpu_mode as CPUs now come up in EL1.
+ */
+ if (IS_ENABLED(CONFIG_KVM) &&
+ static_branch_likely(&kvm_protected_mode_initialized))
+ return true;
+
return (__boot_cpu_mode[0] == BOOT_CPU_MODE_EL2 &&
__boot_cpu_mode[1] == BOOT_CPU_MODE_EL2);
}
@@ -75,20 +100,60 @@ static inline bool is_hyp_mode_available(void)
/* Check if the bootloader has booted CPUs in different modes */
static inline bool is_hyp_mode_mismatched(void)
{
+ /*
+ * If KVM protected mode is initialized, all CPUs must have been booted
+ * in EL2. Avoid checking __boot_cpu_mode as CPUs now come up in EL1.
+ */
+ if (IS_ENABLED(CONFIG_KVM) &&
+ static_branch_likely(&kvm_protected_mode_initialized))
+ return false;
+
return __boot_cpu_mode[0] != __boot_cpu_mode[1];
}
-static inline bool is_kernel_in_hyp_mode(void)
+static __always_inline bool is_kernel_in_hyp_mode(void)
{
+ BUILD_BUG_ON(__is_defined(__KVM_NVHE_HYPERVISOR__) ||
+ __is_defined(__KVM_VHE_HYPERVISOR__));
return read_sysreg(CurrentEL) == CurrentEL_EL2;
}
-static inline bool has_vhe(void)
+static __always_inline bool has_vhe(void)
{
- if (cpus_have_const_cap(ARM64_HAS_VIRT_HOST_EXTN))
+ /*
+ * Code only run in VHE/NVHE hyp context can assume VHE is present or
+ * absent. Otherwise fall back to caps.
+ * This allows the compiler to discard VHE-specific code from the
+ * nVHE object, reducing the number of external symbol references
+ * needed to link.
+ */
+ if (is_vhe_hyp_code())
return true;
+ else if (is_nvhe_hyp_code())
+ return false;
+ else
+ return cpus_have_final_cap(ARM64_HAS_VIRT_HOST_EXTN);
+}
+
+static __always_inline bool is_protected_kvm_enabled(void)
+{
+ if (is_vhe_hyp_code())
+ return false;
+ else
+ return cpus_have_final_cap(ARM64_KVM_PROTECTED_MODE);
+}
- return false;
+static __always_inline bool has_hvhe(void)
+{
+ if (is_vhe_hyp_code())
+ return false;
+
+ return cpus_have_final_cap(ARM64_KVM_HVHE);
+}
+
+static inline bool is_hyp_nvhe(void)
+{
+ return is_hyp_mode_available() && !is_kernel_in_hyp_mode();
}
#endif /* __ASSEMBLY__ */
diff --git a/arch/arm64/include/asm/vmalloc.h b/arch/arm64/include/asm/vmalloc.h
new file mode 100644
index 000000000000..38fafffe699f
--- /dev/null
+++ b/arch/arm64/include/asm/vmalloc.h
@@ -0,0 +1,34 @@
+#ifndef _ASM_ARM64_VMALLOC_H
+#define _ASM_ARM64_VMALLOC_H
+
+#include <asm/page.h>
+#include <asm/pgtable.h>
+
+#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
+
+#define arch_vmap_pud_supported arch_vmap_pud_supported
+static inline bool arch_vmap_pud_supported(pgprot_t prot)
+{
+ /*
+ * SW table walks can't handle removal of intermediate entries.
+ */
+ return pud_sect_supported() &&
+ !IS_ENABLED(CONFIG_PTDUMP_DEBUGFS);
+}
+
+#define arch_vmap_pmd_supported arch_vmap_pmd_supported
+static inline bool arch_vmap_pmd_supported(pgprot_t prot)
+{
+ /* See arch_vmap_pud_supported() */
+ return !IS_ENABLED(CONFIG_PTDUMP_DEBUGFS);
+}
+
+#endif
+
+#define arch_vmap_pgprot_tagged arch_vmap_pgprot_tagged
+static inline pgprot_t arch_vmap_pgprot_tagged(pgprot_t prot)
+{
+ return pgprot_tagged(prot);
+}
+
+#endif /* _ASM_ARM64_VMALLOC_H */
diff --git a/arch/arm64/include/asm/vmap_stack.h b/arch/arm64/include/asm/vmap_stack.h
index 0a12115d9638..20873099c035 100644
--- a/arch/arm64/include/asm/vmap_stack.h
+++ b/arch/arm64/include/asm/vmap_stack.h
@@ -7,8 +7,8 @@
#include <linux/gfp.h>
#include <linux/kconfig.h>
#include <linux/vmalloc.h>
+#include <linux/pgtable.h>
#include <asm/memory.h>
-#include <asm/pgtable.h>
#include <asm/thread_info.h>
/*
@@ -17,12 +17,13 @@
*/
static inline unsigned long *arch_alloc_vmap_stack(size_t stack_size, int node)
{
+ void *p;
+
BUILD_BUG_ON(!IS_ENABLED(CONFIG_VMAP_STACK));
- return __vmalloc_node_range(stack_size, THREAD_ALIGN,
- VMALLOC_START, VMALLOC_END,
- THREADINFO_GFP, PAGE_KERNEL, 0, node,
- __builtin_return_address(0));
+ p = __vmalloc_node(stack_size, THREAD_ALIGN, THREADINFO_GFP, node,
+ __builtin_return_address(0));
+ return kasan_reset_tag(p);
}
#endif /* __ASM_VMAP_STACK_H */
diff --git a/arch/arm64/include/asm/vncr_mapping.h b/arch/arm64/include/asm/vncr_mapping.h
new file mode 100644
index 000000000000..df2c47c55972
--- /dev/null
+++ b/arch/arm64/include/asm/vncr_mapping.h
@@ -0,0 +1,103 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * System register offsets in the VNCR page
+ * All offsets are *byte* displacements!
+ */
+
+#ifndef __ARM64_VNCR_MAPPING_H__
+#define __ARM64_VNCR_MAPPING_H__
+
+#define VNCR_VTTBR_EL2 0x020
+#define VNCR_VTCR_EL2 0x040
+#define VNCR_VMPIDR_EL2 0x050
+#define VNCR_CNTVOFF_EL2 0x060
+#define VNCR_HCR_EL2 0x078
+#define VNCR_HSTR_EL2 0x080
+#define VNCR_VPIDR_EL2 0x088
+#define VNCR_TPIDR_EL2 0x090
+#define VNCR_HCRX_EL2 0x0A0
+#define VNCR_VNCR_EL2 0x0B0
+#define VNCR_CPACR_EL1 0x100
+#define VNCR_CONTEXTIDR_EL1 0x108
+#define VNCR_SCTLR_EL1 0x110
+#define VNCR_ACTLR_EL1 0x118
+#define VNCR_TCR_EL1 0x120
+#define VNCR_AFSR0_EL1 0x128
+#define VNCR_AFSR1_EL1 0x130
+#define VNCR_ESR_EL1 0x138
+#define VNCR_MAIR_EL1 0x140
+#define VNCR_AMAIR_EL1 0x148
+#define VNCR_MDSCR_EL1 0x158
+#define VNCR_SPSR_EL1 0x160
+#define VNCR_CNTV_CVAL_EL0 0x168
+#define VNCR_CNTV_CTL_EL0 0x170
+#define VNCR_CNTP_CVAL_EL0 0x178
+#define VNCR_CNTP_CTL_EL0 0x180
+#define VNCR_SCXTNUM_EL1 0x188
+#define VNCR_TFSR_EL1 0x190
+#define VNCR_HFGRTR_EL2 0x1B8
+#define VNCR_HFGWTR_EL2 0x1C0
+#define VNCR_HFGITR_EL2 0x1C8
+#define VNCR_HDFGRTR_EL2 0x1D0
+#define VNCR_HDFGWTR_EL2 0x1D8
+#define VNCR_ZCR_EL1 0x1E0
+#define VNCR_HAFGRTR_EL2 0x1E8
+#define VNCR_TTBR0_EL1 0x200
+#define VNCR_TTBR1_EL1 0x210
+#define VNCR_FAR_EL1 0x220
+#define VNCR_ELR_EL1 0x230
+#define VNCR_SP_EL1 0x240
+#define VNCR_VBAR_EL1 0x250
+#define VNCR_TCR2_EL1 0x270
+#define VNCR_PIRE0_EL1 0x290
+#define VNCR_PIRE0_EL2 0x298
+#define VNCR_PIR_EL1 0x2A0
+#define VNCR_ICH_LR0_EL2 0x400
+#define VNCR_ICH_LR1_EL2 0x408
+#define VNCR_ICH_LR2_EL2 0x410
+#define VNCR_ICH_LR3_EL2 0x418
+#define VNCR_ICH_LR4_EL2 0x420
+#define VNCR_ICH_LR5_EL2 0x428
+#define VNCR_ICH_LR6_EL2 0x430
+#define VNCR_ICH_LR7_EL2 0x438
+#define VNCR_ICH_LR8_EL2 0x440
+#define VNCR_ICH_LR9_EL2 0x448
+#define VNCR_ICH_LR10_EL2 0x450
+#define VNCR_ICH_LR11_EL2 0x458
+#define VNCR_ICH_LR12_EL2 0x460
+#define VNCR_ICH_LR13_EL2 0x468
+#define VNCR_ICH_LR14_EL2 0x470
+#define VNCR_ICH_LR15_EL2 0x478
+#define VNCR_ICH_AP0R0_EL2 0x480
+#define VNCR_ICH_AP0R1_EL2 0x488
+#define VNCR_ICH_AP0R2_EL2 0x490
+#define VNCR_ICH_AP0R3_EL2 0x498
+#define VNCR_ICH_AP1R0_EL2 0x4A0
+#define VNCR_ICH_AP1R1_EL2 0x4A8
+#define VNCR_ICH_AP1R2_EL2 0x4B0
+#define VNCR_ICH_AP1R3_EL2 0x4B8
+#define VNCR_ICH_HCR_EL2 0x4C0
+#define VNCR_ICH_VMCR_EL2 0x4C8
+#define VNCR_VDISR_EL2 0x500
+#define VNCR_PMBLIMITR_EL1 0x800
+#define VNCR_PMBPTR_EL1 0x810
+#define VNCR_PMBSR_EL1 0x820
+#define VNCR_PMSCR_EL1 0x828
+#define VNCR_PMSEVFR_EL1 0x830
+#define VNCR_PMSICR_EL1 0x838
+#define VNCR_PMSIRR_EL1 0x840
+#define VNCR_PMSLATFR_EL1 0x848
+#define VNCR_TRFCR_EL1 0x880
+#define VNCR_MPAM1_EL1 0x900
+#define VNCR_MPAMHCR_EL2 0x930
+#define VNCR_MPAMVPMV_EL2 0x938
+#define VNCR_MPAMVPM0_EL2 0x940
+#define VNCR_MPAMVPM1_EL2 0x948
+#define VNCR_MPAMVPM2_EL2 0x950
+#define VNCR_MPAMVPM3_EL2 0x958
+#define VNCR_MPAMVPM4_EL2 0x960
+#define VNCR_MPAMVPM5_EL2 0x968
+#define VNCR_MPAMVPM6_EL2 0x970
+#define VNCR_MPAMVPM7_EL2 0x978
+
+#endif /* __ARM64_VNCR_MAPPING_H__ */
diff --git a/arch/arm64/include/asm/word-at-a-time.h b/arch/arm64/include/asm/word-at-a-time.h
index 3333950b5909..f3b151ed0d7a 100644
--- a/arch/arm64/include/asm/word-at-a-time.h
+++ b/arch/arm64/include/asm/word-at-a-time.h
@@ -53,29 +53,20 @@ static inline unsigned long find_zero(unsigned long mask)
*/
static inline unsigned long load_unaligned_zeropad(const void *addr)
{
- unsigned long ret, offset;
+ unsigned long ret;
+
+ __mte_enable_tco_async();
/* Load word from unaligned pointer addr */
asm(
- "1: ldr %0, %3\n"
+ "1: ldr %0, %2\n"
"2:\n"
- " .pushsection .fixup,\"ax\"\n"
- " .align 2\n"
- "3: and %1, %2, #0x7\n"
- " bic %2, %2, #0x7\n"
- " ldr %0, [%2]\n"
- " lsl %1, %1, #0x3\n"
-#ifndef __AARCH64EB__
- " lsr %0, %0, %1\n"
-#else
- " lsl %0, %0, %1\n"
-#endif
- " b 2b\n"
- " .popsection\n"
- _ASM_EXTABLE(1b, 3b)
- : "=&r" (ret), "=&r" (offset)
+ _ASM_EXTABLE_LOAD_UNALIGNED_ZEROPAD(1b, 2b, %0, %1)
+ : "=&r" (ret)
: "r" (addr), "Q" (*(unsigned long *)addr));
+ __mte_disable_tco_async();
+
return ret;
}
diff --git a/arch/arm64/include/asm/xen/page.h b/arch/arm64/include/asm/xen/page.h
index 31bbc803cecb..dffdc773221b 100644
--- a/arch/arm64/include/asm/xen/page.h
+++ b/arch/arm64/include/asm/xen/page.h
@@ -1 +1,7 @@
#include <xen/arm/page.h>
+#include <asm/mmu.h>
+
+static inline bool xen_kernel_unmapped_at_usr(void)
+{
+ return arm64_kernel_unmapped_at_el0();
+}
diff --git a/arch/arm64/include/asm/xen/swiotlb-xen.h b/arch/arm64/include/asm/xen/swiotlb-xen.h
new file mode 100644
index 000000000000..455ade5d5320
--- /dev/null
+++ b/arch/arm64/include/asm/xen/swiotlb-xen.h
@@ -0,0 +1 @@
+#include <xen/arm/swiotlb-xen.h>
diff --git a/arch/arm64/include/asm/xen/page-coherent.h b/arch/arm64/include/asm/xen/xen-ops.h
index 27e984977402..7ebb7eb0bd93 100644
--- a/arch/arm64/include/asm/xen/page-coherent.h
+++ b/arch/arm64/include/asm/xen/xen-ops.h
@@ -1,2 +1,2 @@
/* SPDX-License-Identifier: GPL-2.0 */
-#include <xen/arm/page-coherent.h>
+#include <xen/arm/xen-ops.h>
diff --git a/arch/arm64/include/asm/xor.h b/arch/arm64/include/asm/xor.h
index 947f6a4f1aa0..befcd8a7abc9 100644
--- a/arch/arm64/include/asm/xor.h
+++ b/arch/arm64/include/asm/xor.h
@@ -16,7 +16,8 @@
extern struct xor_block_template const xor_block_inner_neon;
static void
-xor_neon_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
+xor_neon_2(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2)
{
kernel_neon_begin();
xor_block_inner_neon.do_2(bytes, p1, p2);
@@ -24,8 +25,9 @@ xor_neon_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
}
static void
-xor_neon_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
- unsigned long *p3)
+xor_neon_3(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3)
{
kernel_neon_begin();
xor_block_inner_neon.do_3(bytes, p1, p2, p3);
@@ -33,8 +35,10 @@ xor_neon_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
}
static void
-xor_neon_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
- unsigned long *p3, unsigned long *p4)
+xor_neon_4(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3,
+ const unsigned long * __restrict p4)
{
kernel_neon_begin();
xor_block_inner_neon.do_4(bytes, p1, p2, p3, p4);
@@ -42,8 +46,11 @@ xor_neon_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
}
static void
-xor_neon_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
- unsigned long *p3, unsigned long *p4, unsigned long *p5)
+xor_neon_5(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3,
+ const unsigned long * __restrict p4,
+ const unsigned long * __restrict p5)
{
kernel_neon_begin();
xor_block_inner_neon.do_5(bytes, p1, p2, p3, p4, p5);