summaryrefslogtreecommitdiff
path: root/arch/arm64
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arm64')
-rw-r--r--arch/arm64/Kconfig7
-rw-r--r--arch/arm64/boot/dts/mediatek/mt8370.dtsi16
-rw-r--r--arch/arm64/include/asm/assembler.h4
-rw-r--r--arch/arm64/include/asm/barrier.h3
-rw-r--r--arch/arm64/include/asm/cpufeature.h28
-rw-r--r--arch/arm64/include/asm/debug-monitors.h40
-rw-r--r--arch/arm64/include/asm/el2_setup.h116
-rw-r--r--arch/arm64/include/asm/exception.h14
-rw-r--r--arch/arm64/include/asm/gcs.h2
-rw-r--r--arch/arm64/include/asm/hwcap.h2
-rw-r--r--arch/arm64/include/asm/kgdb.h12
-rw-r--r--arch/arm64/include/asm/kprobes.h8
-rw-r--r--arch/arm64/include/asm/kvm_emulate.h51
-rw-r--r--arch/arm64/include/asm/kvm_host.h38
-rw-r--r--arch/arm64/include/asm/kvm_mmu.h18
-rw-r--r--arch/arm64/include/asm/kvm_nested.h2
-rw-r--r--arch/arm64/include/asm/memory.h6
-rw-r--r--arch/arm64/include/asm/processor.h2
-rw-r--r--arch/arm64/include/asm/smp.h24
-rw-r--r--arch/arm64/include/asm/stacktrace.h6
-rw-r--r--arch/arm64/include/asm/sysreg.h87
-rw-r--r--arch/arm64/include/asm/system_misc.h4
-rw-r--r--arch/arm64/include/asm/thread_info.h5
-rw-r--r--arch/arm64/include/asm/traps.h6
-rw-r--r--arch/arm64/include/asm/uprobes.h11
-rw-r--r--arch/arm64/include/asm/vncr_mapping.h2
-rw-r--r--arch/arm64/include/uapi/asm/hwcap.h2
-rw-r--r--arch/arm64/kernel/Makefile2
-rw-r--r--arch/arm64/kernel/acpi.c10
-rw-r--r--arch/arm64/kernel/cpufeature.c142
-rw-r--r--arch/arm64/kernel/cpuinfo.c2
-rw-r--r--arch/arm64/kernel/debug-monitors.c263
-rw-r--r--arch/arm64/kernel/efi.c5
-rw-r--r--arch/arm64/kernel/entry-common.c156
-rw-r--r--arch/arm64/kernel/entry.S6
-rw-r--r--arch/arm64/kernel/hw_breakpoint.c60
-rw-r--r--arch/arm64/kernel/irq.c13
-rw-r--r--arch/arm64/kernel/kgdb.c39
-rw-r--r--arch/arm64/kernel/module.c101
-rw-r--r--arch/arm64/kernel/mte.c11
-rw-r--r--arch/arm64/kernel/pi/Makefile2
-rw-r--r--arch/arm64/kernel/probes/kprobes.c31
-rw-r--r--arch/arm64/kernel/probes/kprobes_trampoline.S2
-rw-r--r--arch/arm64/kernel/probes/uprobes.c24
-rw-r--r--arch/arm64/kernel/process.c13
-rw-r--r--arch/arm64/kernel/sdei.c8
-rw-r--r--arch/arm64/kernel/signal.c7
-rw-r--r--arch/arm64/kernel/smp.c142
-rw-r--r--arch/arm64/kernel/stacktrace.c59
-rw-r--r--arch/arm64/kernel/traps.c84
-rw-r--r--arch/arm64/kernel/watchdog_hld.c58
-rw-r--r--arch/arm64/kvm/Makefile3
-rw-r--r--arch/arm64/kvm/arch_timer.c2
-rw-r--r--arch/arm64/kvm/arm.c36
-rw-r--r--arch/arm64/kvm/at.c80
-rw-r--r--arch/arm64/kvm/config.c255
-rw-r--r--arch/arm64/kvm/debug.c4
-rw-r--r--arch/arm64/kvm/emulate-nested.c49
-rw-r--r--arch/arm64/kvm/guest.c62
-rw-r--r--arch/arm64/kvm/handle_exit.c24
-rw-r--r--arch/arm64/kvm/hyp/exception.c16
-rw-r--r--arch/arm64/kvm/hyp/include/hyp/switch.h53
-rw-r--r--arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h49
-rw-r--r--arch/arm64/kvm/hyp/nvhe/debug-sr.c32
-rw-r--r--arch/arm64/kvm/hyp/nvhe/switch.c2
-rw-r--r--arch/arm64/kvm/hyp/vgic-v3-sr.c53
-rw-r--r--arch/arm64/kvm/hyp/vhe/switch.c14
-rw-r--r--arch/arm64/kvm/hyp/vhe/sysreg-sr.c6
-rw-r--r--arch/arm64/kvm/inject_fault.c235
-rw-r--r--arch/arm64/kvm/mmio.c12
-rw-r--r--arch/arm64/kvm/mmu.c105
-rw-r--r--arch/arm64/kvm/nested.c109
-rw-r--r--arch/arm64/kvm/sys_regs.c218
-rw-r--r--arch/arm64/kvm/sys_regs.h2
-rw-r--r--arch/arm64/kvm/trace_handle_exit.h2
-rw-r--r--arch/arm64/kvm/vgic-sys-reg-v3.c127
-rw-r--r--arch/arm64/kvm/vgic/vgic-init.c30
-rw-r--r--arch/arm64/kvm/vgic/vgic-its.c5
-rw-r--r--arch/arm64/kvm/vgic/vgic-kvm-device.c70
-rw-r--r--arch/arm64/kvm/vgic/vgic-mmio-v3.c33
-rw-r--r--arch/arm64/kvm/vgic/vgic-v3-nested.c2
-rw-r--r--arch/arm64/kvm/vgic/vgic-v4.c14
-rw-r--r--arch/arm64/kvm/vgic/vgic-v5.c52
-rw-r--r--arch/arm64/kvm/vgic/vgic.c4
-rw-r--r--arch/arm64/kvm/vgic/vgic.h48
-rw-r--r--arch/arm64/mm/contpte.c213
-rw-r--r--arch/arm64/mm/fault.c83
-rw-r--r--arch/arm64/mm/gcs.c6
-rw-r--r--arch/arm64/mm/hugetlbpage.c2
-rw-r--r--arch/arm64/mm/proc.S2
-rw-r--r--arch/arm64/net/bpf_jit.h5
-rw-r--r--arch/arm64/net/bpf_jit_comp.c167
-rw-r--r--arch/arm64/tools/cpucaps7
-rw-r--r--arch/arm64/tools/sysreg646
94 files changed, 3530 insertions, 1165 deletions
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 7c456aa838b9..3c117b1fa198 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -127,6 +127,7 @@ config ARM64
select ARM_GIC_V2M if PCI
select ARM_GIC_V3
select ARM_GIC_V3_ITS if PCI
+ select ARM_GIC_V5
select ARM_PSCI_FW
select BUILDTIME_TABLE_SORT
select CLONE_BACKWARDS
@@ -134,6 +135,7 @@ config ARM64
select CPU_PM if (SUSPEND || CPU_IDLE)
select CPUMASK_OFFSTACK if NR_CPUS > 256
select DCACHE_WORD_ACCESS
+ select HAVE_EXTRA_IPI_TRACEPOINTS
select DYNAMIC_FTRACE if FUNCTION_TRACER
select DMA_BOUNCE_UNALIGNED_KMALLOC
select DMA_DIRECT_REMAP
@@ -221,7 +223,6 @@ config ARM64
select HAVE_EFFICIENT_UNALIGNED_ACCESS
select HAVE_GUP_FAST
select HAVE_FTRACE_GRAPH_FUNC
- select HAVE_FTRACE_MCOUNT_RECORD
select HAVE_FUNCTION_TRACER
select HAVE_FUNCTION_ERROR_INJECTION
select HAVE_FUNCTION_GRAPH_FREGS
@@ -232,6 +233,7 @@ config ARM64
select HAVE_HW_BREAKPOINT if PERF_EVENTS
select HAVE_IOREMAP_PROT
select HAVE_IRQ_TIME_ACCOUNTING
+ select HAVE_LIVEPATCH
select HAVE_MOD_ARCH_SPECIFIC
select HAVE_NMI
select HAVE_PERF_EVENTS
@@ -240,6 +242,7 @@ config ARM64
select HAVE_PERF_USER_STACK_DUMP
select HAVE_PREEMPT_DYNAMIC_KEY
select HAVE_REGS_AND_STACK_ACCESS_API
+ select HAVE_RELIABLE_STACKTRACE
select HAVE_POSIX_CPU_TIMERS_TASK_WORK
select HAVE_FUNCTION_ARG_ACCESS_API
select MMU_GATHER_RCU_TABLE_FREE
@@ -278,6 +281,7 @@ config ARM64
select HAVE_SOFTIRQ_ON_OWN_STACK
select USER_STACKTRACE_SUPPORT
select VDSO_GETRANDOM
+ select VMAP_STACK
help
ARM 64-bit (AArch64) Linux support.
@@ -2498,3 +2502,4 @@ source "drivers/acpi/Kconfig"
source "arch/arm64/kvm/Kconfig"
+source "kernel/livepatch/Kconfig"
diff --git a/arch/arm64/boot/dts/mediatek/mt8370.dtsi b/arch/arm64/boot/dts/mediatek/mt8370.dtsi
index cf1a3759451f..7ac8b8d03494 100644
--- a/arch/arm64/boot/dts/mediatek/mt8370.dtsi
+++ b/arch/arm64/boot/dts/mediatek/mt8370.dtsi
@@ -59,6 +59,22 @@
<&cpu3 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
};
+/*
+ * Please note that overriding compatibles is a discouraged practice and is a
+ * clear indication of nodes not being, well, compatible!
+ *
+ * This is a special case, where the GPU is the same as MT8188, but with one
+ * of the cores fused out in this lower-binned SoC.
+ */
+&gpu {
+ compatible = "mediatek,mt8370-mali", "arm,mali-valhall-jm";
+
+ power-domains = <&spm MT8188_POWER_DOMAIN_MFG2>,
+ <&spm MT8188_POWER_DOMAIN_MFG3>;
+
+ power-domain-names = "core0", "core1";
+};
+
&ppi_cluster0 {
affinity = <&cpu0 &cpu1 &cpu2 &cpu3>;
};
diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index c56c21bb1eec..23be85d93348 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -58,7 +58,7 @@
.macro disable_step_tsk, flgs, tmp
tbz \flgs, #TIF_SINGLESTEP, 9990f
mrs \tmp, mdscr_el1
- bic \tmp, \tmp, #DBG_MDSCR_SS
+ bic \tmp, \tmp, #MDSCR_EL1_SS
msr mdscr_el1, \tmp
isb // Take effect before a subsequent clear of DAIF.D
9990:
@@ -68,7 +68,7 @@
.macro enable_step_tsk, flgs, tmp
tbz \flgs, #TIF_SINGLESTEP, 9990f
mrs \tmp, mdscr_el1
- orr \tmp, \tmp, #DBG_MDSCR_SS
+ orr \tmp, \tmp, #MDSCR_EL1_SS
msr mdscr_el1, \tmp
9990:
.endm
diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h
index 1ca947d5c939..f5801b0ba9e9 100644
--- a/arch/arm64/include/asm/barrier.h
+++ b/arch/arm64/include/asm/barrier.h
@@ -44,6 +44,9 @@
SB_BARRIER_INSN"nop\n", \
ARM64_HAS_SB))
+#define gsb_ack() asm volatile(GSB_ACK_BARRIER_INSN : : : "memory")
+#define gsb_sys() asm volatile(GSB_SYS_BARRIER_INSN : : : "memory")
+
#ifdef CONFIG_ARM64_PSEUDO_NMI
#define pmr_sync() \
do { \
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index c4326f1cb917..bf13d676aae2 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -275,6 +275,14 @@ extern struct arm64_ftr_reg arm64_ftr_reg_ctrel0;
#define ARM64_CPUCAP_OPTIONAL_FOR_LATE_CPU ((u16)BIT(5))
/* Panic when a conflict is detected */
#define ARM64_CPUCAP_PANIC_ON_CONFLICT ((u16)BIT(6))
+/*
+ * When paired with SCOPE_LOCAL_CPU, all early CPUs must satisfy the
+ * condition. This is different from SCOPE_SYSTEM where the check is performed
+ * only once at the end of the SMP boot on the sanitised ID registers.
+ * SCOPE_SYSTEM is not suitable for cases where the capability depends on
+ * properties local to a CPU like MIDR_EL1.
+ */
+#define ARM64_CPUCAP_MATCH_ALL_EARLY_CPUS ((u16)BIT(7))
/*
* CPU errata workarounds that need to be enabled at boot time if one or
@@ -304,6 +312,16 @@ extern struct arm64_ftr_reg arm64_ftr_reg_ctrel0;
(ARM64_CPUCAP_SCOPE_LOCAL_CPU | \
ARM64_CPUCAP_OPTIONAL_FOR_LATE_CPU | \
ARM64_CPUCAP_PERMITTED_FOR_LATE_CPU)
+/*
+ * CPU feature detected at boot time and present on all early CPUs. Late CPUs
+ * are permitted to have the feature even if it hasn't been enabled, although
+ * the feature will not be used by Linux in this case. If all early CPUs have
+ * the feature, then every late CPU must have it.
+ */
+#define ARM64_CPUCAP_EARLY_LOCAL_CPU_FEATURE \
+ (ARM64_CPUCAP_SCOPE_LOCAL_CPU | \
+ ARM64_CPUCAP_PERMITTED_FOR_LATE_CPU | \
+ ARM64_CPUCAP_MATCH_ALL_EARLY_CPUS)
/*
* CPU feature detected at boot time, on one or more CPUs. A late CPU
@@ -391,6 +409,11 @@ static inline int cpucap_default_scope(const struct arm64_cpu_capabilities *cap)
return cap->type & ARM64_CPUCAP_SCOPE_MASK;
}
+static inline bool cpucap_match_all_early_cpus(const struct arm64_cpu_capabilities *cap)
+{
+ return cap->type & ARM64_CPUCAP_MATCH_ALL_EARLY_CPUS;
+}
+
/*
* Generic helper for handling capabilities with multiple (match,enable) pairs
* of call backs, sharing the same capability bit.
@@ -848,6 +871,11 @@ static inline bool system_supports_pmuv3(void)
return cpus_have_final_cap(ARM64_HAS_PMUV3);
}
+static inline bool system_supports_bbml2_noabort(void)
+{
+ return alternative_has_cap_unlikely(ARM64_HAS_BBML2_NOABORT);
+}
+
int do_emulate_mrs(struct pt_regs *regs, u32 sys_reg, u32 rt);
bool try_emulate_mrs(struct pt_regs *regs, u32 isn);
diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h
index 8f6ba31b8658..f5e3ed2420ce 100644
--- a/arch/arm64/include/asm/debug-monitors.h
+++ b/arch/arm64/include/asm/debug-monitors.h
@@ -13,14 +13,8 @@
#include <asm/ptrace.h>
/* Low-level stepping controls. */
-#define DBG_MDSCR_SS (1 << 0)
#define DBG_SPSR_SS (1 << 21)
-/* MDSCR_EL1 enabling bits */
-#define DBG_MDSCR_KDE (1 << 13)
-#define DBG_MDSCR_MDE (1 << 15)
-#define DBG_MDSCR_MASK ~(DBG_MDSCR_KDE | DBG_MDSCR_MDE)
-
#define DBG_ESR_EVT(x) (((x) >> 27) & 0x7)
/* AArch64 */
@@ -62,30 +56,6 @@ struct task_struct;
#define DBG_HOOK_HANDLED 0
#define DBG_HOOK_ERROR 1
-struct step_hook {
- struct list_head node;
- int (*fn)(struct pt_regs *regs, unsigned long esr);
-};
-
-void register_user_step_hook(struct step_hook *hook);
-void unregister_user_step_hook(struct step_hook *hook);
-
-void register_kernel_step_hook(struct step_hook *hook);
-void unregister_kernel_step_hook(struct step_hook *hook);
-
-struct break_hook {
- struct list_head node;
- int (*fn)(struct pt_regs *regs, unsigned long esr);
- u16 imm;
- u16 mask; /* These bits are ignored when comparing with imm */
-};
-
-void register_user_break_hook(struct break_hook *hook);
-void unregister_user_break_hook(struct break_hook *hook);
-
-void register_kernel_break_hook(struct break_hook *hook);
-void unregister_kernel_break_hook(struct break_hook *hook);
-
u8 debug_monitors_arch(void);
enum dbg_active_el {
@@ -108,17 +78,15 @@ void kernel_rewind_single_step(struct pt_regs *regs);
void kernel_fastforward_single_step(struct pt_regs *regs);
#ifdef CONFIG_HAVE_HW_BREAKPOINT
-int reinstall_suspended_bps(struct pt_regs *regs);
+bool try_step_suspended_breakpoints(struct pt_regs *regs);
#else
-static inline int reinstall_suspended_bps(struct pt_regs *regs)
+static inline bool try_step_suspended_breakpoints(struct pt_regs *regs)
{
- return -ENODEV;
+ return false;
}
#endif
-int aarch32_break_handler(struct pt_regs *regs);
-
-void debug_traps_init(void);
+bool try_handle_aarch32_break(struct pt_regs *regs);
#endif /* __ASSEMBLY */
#endif /* __ASM_DEBUG_MONITORS_H */
diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h
index 9f38340d24c2..46033027510c 100644
--- a/arch/arm64/include/asm/el2_setup.h
+++ b/arch/arm64/include/asm/el2_setup.h
@@ -165,6 +165,50 @@
.Lskip_gicv3_\@:
.endm
+/* GICv5 system register access */
+.macro __init_el2_gicv5
+ mrs_s x0, SYS_ID_AA64PFR2_EL1
+ ubfx x0, x0, #ID_AA64PFR2_EL1_GCIE_SHIFT, #4
+ cbz x0, .Lskip_gicv5_\@
+
+ mov x0, #(ICH_HFGITR_EL2_GICRCDNMIA | \
+ ICH_HFGITR_EL2_GICRCDIA | \
+ ICH_HFGITR_EL2_GICCDDI | \
+ ICH_HFGITR_EL2_GICCDEOI | \
+ ICH_HFGITR_EL2_GICCDHM | \
+ ICH_HFGITR_EL2_GICCDRCFG | \
+ ICH_HFGITR_EL2_GICCDPEND | \
+ ICH_HFGITR_EL2_GICCDAFF | \
+ ICH_HFGITR_EL2_GICCDPRI | \
+ ICH_HFGITR_EL2_GICCDDIS | \
+ ICH_HFGITR_EL2_GICCDEN)
+ msr_s SYS_ICH_HFGITR_EL2, x0 // Disable instruction traps
+ mov_q x0, (ICH_HFGRTR_EL2_ICC_PPI_ACTIVERn_EL1 | \
+ ICH_HFGRTR_EL2_ICC_PPI_PRIORITYRn_EL1 | \
+ ICH_HFGRTR_EL2_ICC_PPI_PENDRn_EL1 | \
+ ICH_HFGRTR_EL2_ICC_PPI_ENABLERn_EL1 | \
+ ICH_HFGRTR_EL2_ICC_PPI_HMRn_EL1 | \
+ ICH_HFGRTR_EL2_ICC_IAFFIDR_EL1 | \
+ ICH_HFGRTR_EL2_ICC_ICSR_EL1 | \
+ ICH_HFGRTR_EL2_ICC_PCR_EL1 | \
+ ICH_HFGRTR_EL2_ICC_HPPIR_EL1 | \
+ ICH_HFGRTR_EL2_ICC_HAPR_EL1 | \
+ ICH_HFGRTR_EL2_ICC_CR0_EL1 | \
+ ICH_HFGRTR_EL2_ICC_IDRn_EL1 | \
+ ICH_HFGRTR_EL2_ICC_APR_EL1)
+ msr_s SYS_ICH_HFGRTR_EL2, x0 // Disable reg read traps
+ mov_q x0, (ICH_HFGWTR_EL2_ICC_PPI_ACTIVERn_EL1 | \
+ ICH_HFGWTR_EL2_ICC_PPI_PRIORITYRn_EL1 | \
+ ICH_HFGWTR_EL2_ICC_PPI_PENDRn_EL1 | \
+ ICH_HFGWTR_EL2_ICC_PPI_ENABLERn_EL1 | \
+ ICH_HFGWTR_EL2_ICC_ICSR_EL1 | \
+ ICH_HFGWTR_EL2_ICC_PCR_EL1 | \
+ ICH_HFGWTR_EL2_ICC_CR0_EL1 | \
+ ICH_HFGWTR_EL2_ICC_APR_EL1)
+ msr_s SYS_ICH_HFGWTR_EL2, x0 // Disable reg write traps
+.Lskip_gicv5_\@:
+.endm
+
.macro __init_el2_hstr
msr hstr_el2, xzr // Disable CP15 traps to EL2
.endm
@@ -189,6 +233,28 @@
.Lskip_set_cptr_\@:
.endm
+/*
+ * Configure BRBE to permit recording cycle counts and branch mispredicts.
+ *
+ * At any EL, to record cycle counts BRBE requires that both BRBCR_EL2.CC=1 and
+ * BRBCR_EL1.CC=1.
+ *
+ * At any EL, to record branch mispredicts BRBE requires that both
+ * BRBCR_EL2.MPRED=1 and BRBCR_EL1.MPRED=1.
+ *
+ * Set {CC,MPRED} in BRBCR_EL2 in case nVHE mode is used and we are
+ * executing in EL1.
+ */
+.macro __init_el2_brbe
+ mrs x1, id_aa64dfr0_el1
+ ubfx x1, x1, #ID_AA64DFR0_EL1_BRBE_SHIFT, #4
+ cbz x1, .Lskip_brbe_\@
+
+ mov_q x0, BRBCR_ELx_CC | BRBCR_ELx_MPRED
+ msr_s SYS_BRBCR_EL2, x0
+.Lskip_brbe_\@:
+.endm
+
/* Disable any fine grained traps */
.macro __init_el2_fgt
mrs x1, id_aa64mmfr0_el1
@@ -196,20 +262,62 @@
cbz x1, .Lskip_fgt_\@
mov x0, xzr
+ mov x2, xzr
mrs x1, id_aa64dfr0_el1
ubfx x1, x1, #ID_AA64DFR0_EL1_PMSVer_SHIFT, #4
cmp x1, #3
b.lt .Lskip_spe_fgt_\@
/* Disable PMSNEVFR_EL1 read and write traps */
- orr x0, x0, #(1 << 62)
+ orr x0, x0, #HDFGRTR_EL2_nPMSNEVFR_EL1_MASK
+ orr x2, x2, #HDFGWTR_EL2_nPMSNEVFR_EL1_MASK
.Lskip_spe_fgt_\@:
+ mrs x1, id_aa64dfr0_el1
+ ubfx x1, x1, #ID_AA64DFR0_EL1_BRBE_SHIFT, #4
+ cbz x1, .Lskip_brbe_fgt_\@
+
+ /*
+ * Disable read traps for the following registers
+ *
+ * [BRBSRC|BRBTGT|RBINF]_EL1
+ * [BRBSRCINJ|BRBTGTINJ|BRBINFINJ|BRBTS]_EL1
+ */
+ orr x0, x0, #HDFGRTR_EL2_nBRBDATA_MASK
+
+ /*
+ * Disable write traps for the following registers
+ *
+ * [BRBSRCINJ|BRBTGTINJ|BRBINFINJ|BRBTS]_EL1
+ */
+ orr x2, x2, #HDFGWTR_EL2_nBRBDATA_MASK
+
+ /* Disable read and write traps for [BRBCR|BRBFCR]_EL1 */
+ orr x0, x0, #HDFGRTR_EL2_nBRBCTL_MASK
+ orr x2, x2, #HDFGWTR_EL2_nBRBCTL_MASK
+
+ /* Disable read traps for BRBIDR_EL1 */
+ orr x0, x0, #HDFGRTR_EL2_nBRBIDR_MASK
+
+.Lskip_brbe_fgt_\@:
.Lset_debug_fgt_\@:
msr_s SYS_HDFGRTR_EL2, x0
- msr_s SYS_HDFGWTR_EL2, x0
+ msr_s SYS_HDFGWTR_EL2, x2
mov x0, xzr
+ mov x2, xzr
+
+ mrs x1, id_aa64dfr0_el1
+ ubfx x1, x1, #ID_AA64DFR0_EL1_BRBE_SHIFT, #4
+ cbz x1, .Lskip_brbe_insn_fgt_\@
+
+ /* Disable traps for BRBIALL instruction */
+ orr x2, x2, #HFGITR_EL2_nBRBIALL_MASK
+
+ /* Disable traps for BRBINJ instruction */
+ orr x2, x2, #HFGITR_EL2_nBRBINJ_MASK
+
+.Lskip_brbe_insn_fgt_\@:
mrs x1, id_aa64pfr1_el1
ubfx x1, x1, #ID_AA64PFR1_EL1_SME_SHIFT, #4
cbz x1, .Lskip_sme_fgt_\@
@@ -250,7 +358,7 @@
.Lset_fgt_\@:
msr_s SYS_HFGRTR_EL2, x0
msr_s SYS_HFGWTR_EL2, x0
- msr_s SYS_HFGITR_EL2, xzr
+ msr_s SYS_HFGITR_EL2, x2
mrs x1, id_aa64pfr0_el1 // AMU traps UNDEF without AMU
ubfx x1, x1, #ID_AA64PFR0_EL1_AMU_SHIFT, #4
@@ -300,9 +408,11 @@
__init_el2_hcrx
__init_el2_timers
__init_el2_debug
+ __init_el2_brbe
__init_el2_lor
__init_el2_stage2
__init_el2_gicv3
+ __init_el2_gicv5
__init_el2_hstr
__init_el2_nvhe_idregs
__init_el2_cptr
diff --git a/arch/arm64/include/asm/exception.h b/arch/arm64/include/asm/exception.h
index d48fc16584cd..e3874c4fc399 100644
--- a/arch/arm64/include/asm/exception.h
+++ b/arch/arm64/include/asm/exception.h
@@ -59,8 +59,20 @@ void do_el0_bti(struct pt_regs *regs);
void do_el1_bti(struct pt_regs *regs, unsigned long esr);
void do_el0_gcs(struct pt_regs *regs, unsigned long esr);
void do_el1_gcs(struct pt_regs *regs, unsigned long esr);
-void do_debug_exception(unsigned long addr_if_watchpoint, unsigned long esr,
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+void do_breakpoint(unsigned long esr, struct pt_regs *regs);
+void do_watchpoint(unsigned long addr, unsigned long esr,
struct pt_regs *regs);
+#else
+static inline void do_breakpoint(unsigned long esr, struct pt_regs *regs) {}
+static inline void do_watchpoint(unsigned long addr, unsigned long esr,
+ struct pt_regs *regs) {}
+#endif /* CONFIG_HAVE_HW_BREAKPOINT */
+void do_el0_softstep(unsigned long esr, struct pt_regs *regs);
+void do_el1_softstep(unsigned long esr, struct pt_regs *regs);
+void do_el0_brk64(unsigned long esr, struct pt_regs *regs);
+void do_el1_brk64(unsigned long esr, struct pt_regs *regs);
+void do_bkpt32(unsigned long esr, struct pt_regs *regs);
void do_fpsimd_acc(unsigned long esr, struct pt_regs *regs);
void do_sve_acc(unsigned long esr, struct pt_regs *regs);
void do_sme_acc(unsigned long esr, struct pt_regs *regs);
diff --git a/arch/arm64/include/asm/gcs.h b/arch/arm64/include/asm/gcs.h
index f50660603ecf..5bc432234d3a 100644
--- a/arch/arm64/include/asm/gcs.h
+++ b/arch/arm64/include/asm/gcs.h
@@ -58,7 +58,7 @@ static inline u64 gcsss2(void)
static inline bool task_gcs_el0_enabled(struct task_struct *task)
{
- return current->thread.gcs_el0_mode & PR_SHADOW_STACK_ENABLE;
+ return task->thread.gcs_el0_mode & PR_SHADOW_STACK_ENABLE;
}
void gcs_set_el0_mode(struct task_struct *task);
diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h
index 1c3f9617d54f..13f94c8ddfc0 100644
--- a/arch/arm64/include/asm/hwcap.h
+++ b/arch/arm64/include/asm/hwcap.h
@@ -176,6 +176,8 @@
#define KERNEL_HWCAP_POE __khwcap2_feature(POE)
#define __khwcap3_feature(x) (const_ilog2(HWCAP3_ ## x) + 128)
+#define KERNEL_HWCAP_MTE_FAR __khwcap3_feature(MTE_FAR)
+#define KERNEL_HWCAP_MTE_STORE_ONLY __khwcap3_feature(MTE_STORE_ONLY)
/*
* This yields a mask that user programs can use to figure out what
diff --git a/arch/arm64/include/asm/kgdb.h b/arch/arm64/include/asm/kgdb.h
index 21fc85e9d2be..3184f5d1e3ae 100644
--- a/arch/arm64/include/asm/kgdb.h
+++ b/arch/arm64/include/asm/kgdb.h
@@ -24,6 +24,18 @@ static inline void arch_kgdb_breakpoint(void)
extern void kgdb_handle_bus_error(void);
extern int kgdb_fault_expected;
+int kgdb_brk_handler(struct pt_regs *regs, unsigned long esr);
+int kgdb_compiled_brk_handler(struct pt_regs *regs, unsigned long esr);
+#ifdef CONFIG_KGDB
+int kgdb_single_step_handler(struct pt_regs *regs, unsigned long esr);
+#else
+static inline int kgdb_single_step_handler(struct pt_regs *regs,
+ unsigned long esr)
+{
+ return DBG_HOOK_ERROR;
+}
+#endif
+
#endif /* !__ASSEMBLY__ */
/*
diff --git a/arch/arm64/include/asm/kprobes.h b/arch/arm64/include/asm/kprobes.h
index be7a3680dadf..f2782560647b 100644
--- a/arch/arm64/include/asm/kprobes.h
+++ b/arch/arm64/include/asm/kprobes.h
@@ -41,4 +41,12 @@ void __kretprobe_trampoline(void);
void __kprobes *trampoline_probe_handler(struct pt_regs *regs);
#endif /* CONFIG_KPROBES */
+
+int __kprobes kprobe_brk_handler(struct pt_regs *regs,
+ unsigned long esr);
+int __kprobes kprobe_ss_brk_handler(struct pt_regs *regs,
+ unsigned long esr);
+int __kprobes kretprobe_brk_handler(struct pt_regs *regs,
+ unsigned long esr);
+
#endif /* _ARM_KPROBES_H */
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index 0720898f563e..fa8a08a1ccd5 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -45,16 +45,39 @@ bool kvm_condition_valid32(const struct kvm_vcpu *vcpu);
void kvm_skip_instr32(struct kvm_vcpu *vcpu);
void kvm_inject_undefined(struct kvm_vcpu *vcpu);
-void kvm_inject_vabt(struct kvm_vcpu *vcpu);
-void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr);
-void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr);
+int kvm_inject_serror_esr(struct kvm_vcpu *vcpu, u64 esr);
+int kvm_inject_sea(struct kvm_vcpu *vcpu, bool iabt, u64 addr);
void kvm_inject_size_fault(struct kvm_vcpu *vcpu);
+static inline int kvm_inject_sea_dabt(struct kvm_vcpu *vcpu, u64 addr)
+{
+ return kvm_inject_sea(vcpu, false, addr);
+}
+
+static inline int kvm_inject_sea_iabt(struct kvm_vcpu *vcpu, u64 addr)
+{
+ return kvm_inject_sea(vcpu, true, addr);
+}
+
+static inline int kvm_inject_serror(struct kvm_vcpu *vcpu)
+{
+ /*
+ * ESR_ELx.ISV (later renamed to IDS) indicates whether or not
+ * ESR_ELx.ISS contains IMPLEMENTATION DEFINED syndrome information.
+ *
+ * Set the bit when injecting an SError w/o an ESR to indicate ISS
+ * does not follow the architected format.
+ */
+ return kvm_inject_serror_esr(vcpu, ESR_ELx_ISV);
+}
+
void kvm_vcpu_wfi(struct kvm_vcpu *vcpu);
void kvm_emulate_nested_eret(struct kvm_vcpu *vcpu);
int kvm_inject_nested_sync(struct kvm_vcpu *vcpu, u64 esr_el2);
int kvm_inject_nested_irq(struct kvm_vcpu *vcpu);
+int kvm_inject_nested_sea(struct kvm_vcpu *vcpu, bool iabt, u64 addr);
+int kvm_inject_nested_serror(struct kvm_vcpu *vcpu, u64 esr);
static inline void kvm_inject_nested_sve_trap(struct kvm_vcpu *vcpu)
{
@@ -195,6 +218,11 @@ static inline bool vcpu_el2_tge_is_set(const struct kvm_vcpu *vcpu)
return ctxt_sys_reg(&vcpu->arch.ctxt, HCR_EL2) & HCR_TGE;
}
+static inline bool vcpu_el2_amo_is_set(const struct kvm_vcpu *vcpu)
+{
+ return ctxt_sys_reg(&vcpu->arch.ctxt, HCR_EL2) & HCR_AMO;
+}
+
static inline bool is_hyp_ctxt(const struct kvm_vcpu *vcpu)
{
bool e2h, tge;
@@ -224,6 +252,20 @@ static inline bool vcpu_is_host_el0(const struct kvm_vcpu *vcpu)
return is_hyp_ctxt(vcpu) && !vcpu_is_el2(vcpu);
}
+static inline bool is_nested_ctxt(struct kvm_vcpu *vcpu)
+{
+ return vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu);
+}
+
+static inline bool vserror_state_is_nested(struct kvm_vcpu *vcpu)
+{
+ if (!is_nested_ctxt(vcpu))
+ return false;
+
+ return vcpu_el2_amo_is_set(vcpu) ||
+ (__vcpu_sys_reg(vcpu, HCRX_EL2) & HCRX_EL2_TMEA);
+}
+
/*
* The layout of SPSR for an AArch32 state is different when observed from an
* AArch64 SPSR_ELx or an AArch32 SPSR_*. This function generates the AArch32
@@ -627,6 +669,9 @@ static inline void vcpu_set_hcrx(struct kvm_vcpu *vcpu)
if (kvm_has_fpmr(kvm))
vcpu->arch.hcrx_el2 |= HCRX_EL2_EnFPM;
+
+ if (kvm_has_sctlr2(kvm))
+ vcpu->arch.hcrx_el2 |= HCRX_EL2_SCTLR2En;
}
}
#endif /* __ARM64_KVM_EMULATE_H__ */
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 3e41a880b062..2f2394cce24e 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -523,6 +523,7 @@ enum vcpu_sysreg {
/* Anything from this can be RES0/RES1 sanitised */
MARKER(__SANITISED_REG_START__),
TCR2_EL2, /* Extended Translation Control Register (EL2) */
+ SCTLR2_EL2, /* System Control Register 2 (EL2) */
MDCR_EL2, /* Monitor Debug Configuration Register (EL2) */
CNTHCTL_EL2, /* Counter-timer Hypervisor Control register */
@@ -537,6 +538,7 @@ enum vcpu_sysreg {
VNCR(TTBR1_EL1),/* Translation Table Base Register 1 */
VNCR(TCR_EL1), /* Translation Control Register */
VNCR(TCR2_EL1), /* Extended Translation Control Register */
+ VNCR(SCTLR2_EL1), /* System Control Register 2 */
VNCR(ESR_EL1), /* Exception Syndrome Register */
VNCR(AFSR0_EL1),/* Auxiliary Fault Status Register 0 */
VNCR(AFSR1_EL1),/* Auxiliary Fault Status Register 1 */
@@ -565,6 +567,10 @@ enum vcpu_sysreg {
VNCR(POR_EL1), /* Permission Overlay Register 1 (EL1) */
+ /* FEAT_RAS registers */
+ VNCR(VDISR_EL2),
+ VNCR(VSESR_EL2),
+
VNCR(HFGRTR_EL2),
VNCR(HFGWTR_EL2),
VNCR(HFGITR_EL2),
@@ -704,6 +710,7 @@ struct kvm_host_data {
#define KVM_HOST_DATA_FLAG_EL1_TRACING_CONFIGURED 5
#define KVM_HOST_DATA_FLAG_VCPU_IN_HYP_CONTEXT 6
#define KVM_HOST_DATA_FLAG_L1_VNCR_MAPPED 7
+#define KVM_HOST_DATA_FLAG_HAS_BRBE 8
unsigned long flags;
struct kvm_cpu_context host_ctxt;
@@ -737,6 +744,7 @@ struct kvm_host_data {
u64 trfcr_el1;
/* Values of trap registers for the host before guest entry. */
u64 mdcr_el2;
+ u64 brbcr_el1;
} host_debug_state;
/* Guest trace filter value */
@@ -817,7 +825,7 @@ struct kvm_vcpu_arch {
u8 iflags;
/* State flags for kernel bookkeeping, unused by the hypervisor code */
- u8 sflags;
+ u16 sflags;
/*
* Don't run the guest (internal implementation need).
@@ -953,9 +961,21 @@ struct kvm_vcpu_arch {
__vcpu_flags_preempt_enable(); \
} while (0)
+#define __vcpu_test_and_clear_flag(v, flagset, f, m) \
+ ({ \
+ typeof(v->arch.flagset) set; \
+ \
+ set = __vcpu_get_flag(v, flagset, f, m); \
+ __vcpu_clear_flag(v, flagset, f, m); \
+ \
+ set; \
+ })
+
#define vcpu_get_flag(v, ...) __vcpu_get_flag((v), __VA_ARGS__)
#define vcpu_set_flag(v, ...) __vcpu_set_flag((v), __VA_ARGS__)
#define vcpu_clear_flag(v, ...) __vcpu_clear_flag((v), __VA_ARGS__)
+#define vcpu_test_and_clear_flag(v, ...) \
+ __vcpu_test_and_clear_flag((v), __VA_ARGS__)
/* KVM_ARM_VCPU_INIT completed */
#define VCPU_INITIALIZED __vcpu_single_flag(cflags, BIT(0))
@@ -1015,6 +1035,8 @@ struct kvm_vcpu_arch {
#define IN_WFI __vcpu_single_flag(sflags, BIT(6))
/* KVM is currently emulating a nested ERET */
#define IN_NESTED_ERET __vcpu_single_flag(sflags, BIT(7))
+/* SError pending for nested guest */
+#define NESTED_SERROR_PENDING __vcpu_single_flag(sflags, BIT(8))
/* Pointer to the vcpu's SVE FFR for sve_{save,load}_state() */
@@ -1149,6 +1171,8 @@ static inline bool __vcpu_read_sys_reg_from_cpu(int reg, u64 *val)
* System registers listed in the switch are not saved on every
* exit from the guest but are only saved on vcpu_put.
*
+ * SYSREGS_ON_CPU *MUST* be checked before using this helper.
+ *
* Note that MPIDR_EL1 for the guest is set by KVM via VMPIDR_EL2 but
* should never be listed below, because the guest cannot modify its
* own MPIDR_EL1 and MPIDR_EL1 is accessed for VCPU A from VCPU B's
@@ -1186,6 +1210,7 @@ static inline bool __vcpu_read_sys_reg_from_cpu(int reg, u64 *val)
case IFSR32_EL2: *val = read_sysreg_s(SYS_IFSR32_EL2); break;
case DBGVCR32_EL2: *val = read_sysreg_s(SYS_DBGVCR32_EL2); break;
case ZCR_EL1: *val = read_sysreg_s(SYS_ZCR_EL12); break;
+ case SCTLR2_EL1: *val = read_sysreg_s(SYS_SCTLR2_EL12); break;
default: return false;
}
@@ -1200,6 +1225,8 @@ static inline bool __vcpu_write_sys_reg_to_cpu(u64 val, int reg)
* System registers listed in the switch are not restored on every
* entry to the guest but are only restored on vcpu_load.
*
+ * SYSREGS_ON_CPU *MUST* be checked before using this helper.
+ *
* Note that MPIDR_EL1 for the guest is set by KVM via VMPIDR_EL2 but
* should never be listed below, because the MPIDR should only be set
* once, before running the VCPU, and never changed later.
@@ -1236,6 +1263,7 @@ static inline bool __vcpu_write_sys_reg_to_cpu(u64 val, int reg)
case IFSR32_EL2: write_sysreg_s(val, SYS_IFSR32_EL2); break;
case DBGVCR32_EL2: write_sysreg_s(val, SYS_DBGVCR32_EL2); break;
case ZCR_EL1: write_sysreg_s(val, SYS_ZCR_EL12); break;
+ case SCTLR2_EL1: write_sysreg_s(val, SYS_SCTLR2_EL12); break;
default: return false;
}
@@ -1387,8 +1415,6 @@ static inline bool kvm_arm_is_pvtime_enabled(struct kvm_vcpu_arch *vcpu_arch)
return (vcpu_arch->steal.base != INVALID_GPA);
}
-void kvm_set_sei_esr(struct kvm_vcpu *vcpu, u64 syndrome);
-
struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr);
DECLARE_KVM_HYP_PER_CPU(struct kvm_host_data, kvm_host_data);
@@ -1665,6 +1691,12 @@ void kvm_set_vm_id_reg(struct kvm *kvm, u32 reg, u64 val);
#define kvm_has_s1poe(k) \
(kvm_has_feat((k), ID_AA64MMFR3_EL1, S1POE, IMP))
+#define kvm_has_ras(k) \
+ (kvm_has_feat((k), ID_AA64PFR0_EL1, RAS, IMP))
+
+#define kvm_has_sctlr2(k) \
+ (kvm_has_feat((k), ID_AA64MMFR3_EL1, SCTLRX, IMP))
+
static inline bool kvm_arch_has_irq_bypass(void)
{
return true;
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index b98ac6aa631f..ae563ebd6aee 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -371,6 +371,24 @@ static inline void kvm_fault_unlock(struct kvm *kvm)
read_unlock(&kvm->mmu_lock);
}
+/*
+ * ARM64 KVM relies on a simple conversion from physaddr to a kernel
+ * virtual address (KVA) when it does cache maintenance as the CMO
+ * instructions work on virtual addresses. This is incompatible with
+ * VM_PFNMAP VMAs which may not have a kernel direct mapping to a
+ * virtual address.
+ *
+ * With S2FWB and CACHE DIC features, KVM need not do cache flushing
+ * and CMOs are NOP'd. This has the effect of no longer requiring a
+ * KVA for addresses mapped into the S2. The presence of these features
+ * are thus necessary to support cacheable S2 mapping of VM_PFNMAP.
+ */
+static inline bool kvm_supports_cacheable_pfnmap(void)
+{
+ return cpus_have_final_cap(ARM64_HAS_STAGE2_FWB) &&
+ cpus_have_final_cap(ARM64_HAS_CACHE_DIC);
+}
+
#ifdef CONFIG_PTDUMP_STAGE2_DEBUGFS
void kvm_s2_ptdump_create_debugfs(struct kvm *kvm);
#else
diff --git a/arch/arm64/include/asm/kvm_nested.h b/arch/arm64/include/asm/kvm_nested.h
index 0bd07ea068a1..7fd76f41c296 100644
--- a/arch/arm64/include/asm/kvm_nested.h
+++ b/arch/arm64/include/asm/kvm_nested.h
@@ -80,6 +80,8 @@ extern void kvm_vcpu_load_hw_mmu(struct kvm_vcpu *vcpu);
extern void kvm_vcpu_put_hw_mmu(struct kvm_vcpu *vcpu);
extern void check_nested_vcpu_requests(struct kvm_vcpu *vcpu);
+extern void kvm_nested_flush_hwstate(struct kvm_vcpu *vcpu);
+extern void kvm_nested_sync_hwstate(struct kvm_vcpu *vcpu);
struct kvm_s2_trans {
phys_addr_t output;
diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index 717829df294e..5213248e081b 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -118,7 +118,7 @@
* VMAP'd stacks are allocated at page granularity, so we must ensure that such
* stacks are a multiple of page size.
*/
-#if defined(CONFIG_VMAP_STACK) && (MIN_THREAD_SHIFT < PAGE_SHIFT)
+#if (MIN_THREAD_SHIFT < PAGE_SHIFT)
#define THREAD_SHIFT PAGE_SHIFT
#else
#define THREAD_SHIFT MIN_THREAD_SHIFT
@@ -135,11 +135,7 @@
* checking sp & (1 << THREAD_SHIFT), which we can do cheaply in the entry
* assembly.
*/
-#ifdef CONFIG_VMAP_STACK
#define THREAD_ALIGN (2 * THREAD_SIZE)
-#else
-#define THREAD_ALIGN THREAD_SIZE
-#endif
#define IRQ_STACK_SIZE THREAD_SIZE
diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index 1bf1a3b16e88..61d62bfd5a7b 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -23,6 +23,8 @@
#define MTE_CTRL_TCF_ASYNC (1UL << 17)
#define MTE_CTRL_TCF_ASYMM (1UL << 18)
+#define MTE_CTRL_STORE_ONLY (1UL << 19)
+
#ifndef __ASSEMBLY__
#include <linux/build_bug.h>
diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h
index 2510eec026f7..d48ef6d5abcc 100644
--- a/arch/arm64/include/asm/smp.h
+++ b/arch/arm64/include/asm/smp.h
@@ -50,10 +50,32 @@ struct seq_file;
*/
extern void smp_init_cpus(void);
+enum ipi_msg_type {
+ IPI_RESCHEDULE,
+ IPI_CALL_FUNC,
+ IPI_CPU_STOP,
+ IPI_CPU_STOP_NMI,
+ IPI_TIMER,
+ IPI_IRQ_WORK,
+ NR_IPI,
+ /*
+ * Any enum >= NR_IPI and < MAX_IPI is special and not tracable
+ * with trace_ipi_*
+ */
+ IPI_CPU_BACKTRACE = NR_IPI,
+ IPI_KGDB_ROUNDUP,
+ MAX_IPI
+};
+
/*
* Register IPI interrupts with the arch SMP code
*/
-extern void set_smp_ipi_range(int ipi_base, int nr_ipi);
+extern void set_smp_ipi_range_percpu(int ipi_base, int nr_ipi, int ncpus);
+
+static inline void set_smp_ipi_range(int ipi_base, int n)
+{
+ set_smp_ipi_range_percpu(ipi_base, n, 0);
+}
/*
* Called from the secondary holding pen, this is the secondary CPU entry point.
diff --git a/arch/arm64/include/asm/stacktrace.h b/arch/arm64/include/asm/stacktrace.h
index 66ec8caa6ac0..6d3280932bf5 100644
--- a/arch/arm64/include/asm/stacktrace.h
+++ b/arch/arm64/include/asm/stacktrace.h
@@ -59,7 +59,6 @@ static inline bool on_task_stack(const struct task_struct *tsk,
#define on_thread_stack() (on_task_stack(current, current_stack_pointer, 1))
-#ifdef CONFIG_VMAP_STACK
DECLARE_PER_CPU(unsigned long [OVERFLOW_STACK_SIZE/sizeof(long)], overflow_stack);
static inline struct stack_info stackinfo_get_overflow(void)
@@ -72,11 +71,8 @@ static inline struct stack_info stackinfo_get_overflow(void)
.high = high,
};
}
-#else
-#define stackinfo_get_overflow() stackinfo_get_unknown()
-#endif
-#if defined(CONFIG_ARM_SDE_INTERFACE) && defined(CONFIG_VMAP_STACK)
+#if defined(CONFIG_ARM_SDE_INTERFACE)
DECLARE_PER_CPU(unsigned long *, sdei_stack_normal_ptr);
DECLARE_PER_CPU(unsigned long *, sdei_stack_critical_ptr);
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index f1bb0d10c39a..d5b5f2ae1afa 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -113,10 +113,14 @@
/* Register-based PAN access, for save/restore purposes */
#define SYS_PSTATE_PAN sys_reg(3, 0, 4, 2, 3)
-#define __SYS_BARRIER_INSN(CRm, op2, Rt) \
- __emit_inst(0xd5000000 | sys_insn(0, 3, 3, (CRm), (op2)) | ((Rt) & 0x1f))
+#define __SYS_BARRIER_INSN(op0, op1, CRn, CRm, op2, Rt) \
+ __emit_inst(0xd5000000 | \
+ sys_insn((op0), (op1), (CRn), (CRm), (op2)) | \
+ ((Rt) & 0x1f))
-#define SB_BARRIER_INSN __SYS_BARRIER_INSN(0, 7, 31)
+#define SB_BARRIER_INSN __SYS_BARRIER_INSN(0, 3, 3, 0, 7, 31)
+#define GSB_SYS_BARRIER_INSN __SYS_BARRIER_INSN(1, 0, 12, 0, 0, 31)
+#define GSB_ACK_BARRIER_INSN __SYS_BARRIER_INSN(1, 0, 12, 0, 1, 31)
/* Data cache zero operations */
#define SYS_DC_ISW sys_insn(1, 0, 7, 6, 2)
@@ -202,16 +206,8 @@
#define SYS_DBGVCR32_EL2 sys_reg(2, 4, 0, 7, 0)
#define SYS_BRBINF_EL1(n) sys_reg(2, 1, 8, (n & 15), (((n & 16) >> 2) | 0))
-#define SYS_BRBINFINJ_EL1 sys_reg(2, 1, 9, 1, 0)
#define SYS_BRBSRC_EL1(n) sys_reg(2, 1, 8, (n & 15), (((n & 16) >> 2) | 1))
-#define SYS_BRBSRCINJ_EL1 sys_reg(2, 1, 9, 1, 1)
#define SYS_BRBTGT_EL1(n) sys_reg(2, 1, 8, (n & 15), (((n & 16) >> 2) | 2))
-#define SYS_BRBTGTINJ_EL1 sys_reg(2, 1, 9, 1, 2)
-#define SYS_BRBTS_EL1 sys_reg(2, 1, 9, 0, 2)
-
-#define SYS_BRBCR_EL1 sys_reg(2, 1, 9, 0, 0)
-#define SYS_BRBFCR_EL1 sys_reg(2, 1, 9, 0, 1)
-#define SYS_BRBIDR0_EL1 sys_reg(2, 1, 9, 2, 0)
#define SYS_TRCITECR_EL1 sys_reg(3, 0, 1, 2, 3)
#define SYS_TRCACATR(m) sys_reg(2, 1, 2, ((m & 7) << 1), (2 | (m >> 3)))
@@ -277,8 +273,6 @@
/* ETM */
#define SYS_TRCOSLAR sys_reg(2, 1, 1, 0, 4)
-#define SYS_BRBCR_EL2 sys_reg(2, 4, 9, 0, 0)
-
#define SYS_MIDR_EL1 sys_reg(3, 0, 0, 0, 0)
#define SYS_MPIDR_EL1 sys_reg(3, 0, 0, 0, 5)
#define SYS_REVIDR_EL1 sys_reg(3, 0, 0, 0, 6)
@@ -821,6 +815,12 @@
#define OP_COSP_RCTX sys_insn(1, 3, 7, 3, 6)
#define OP_CPP_RCTX sys_insn(1, 3, 7, 3, 7)
+/*
+ * BRBE Instructions
+ */
+#define BRB_IALL_INSN __emit_inst(0xd5000000 | OP_BRB_IALL | (0x1f))
+#define BRB_INJ_INSN __emit_inst(0xd5000000 | OP_BRB_INJ | (0x1f))
+
/* Common SCTLR_ELx flags. */
#define SCTLR_ELx_ENTP2 (BIT(60))
#define SCTLR_ELx_DSSBS (BIT(44))
@@ -1078,6 +1078,67 @@
#define GCS_CAP(x) ((((unsigned long)x) & GCS_CAP_ADDR_MASK) | \
GCS_CAP_VALID_TOKEN)
+/*
+ * Definitions for GICv5 instructions
+ */
+#define GICV5_OP_GIC_CDAFF sys_insn(1, 0, 12, 1, 3)
+#define GICV5_OP_GIC_CDDI sys_insn(1, 0, 12, 2, 0)
+#define GICV5_OP_GIC_CDDIS sys_insn(1, 0, 12, 1, 0)
+#define GICV5_OP_GIC_CDHM sys_insn(1, 0, 12, 2, 1)
+#define GICV5_OP_GIC_CDEN sys_insn(1, 0, 12, 1, 1)
+#define GICV5_OP_GIC_CDEOI sys_insn(1, 0, 12, 1, 7)
+#define GICV5_OP_GIC_CDPEND sys_insn(1, 0, 12, 1, 4)
+#define GICV5_OP_GIC_CDPRI sys_insn(1, 0, 12, 1, 2)
+#define GICV5_OP_GIC_CDRCFG sys_insn(1, 0, 12, 1, 5)
+#define GICV5_OP_GICR_CDIA sys_insn(1, 0, 12, 3, 0)
+
+/* Definitions for GIC CDAFF */
+#define GICV5_GIC_CDAFF_IAFFID_MASK GENMASK_ULL(47, 32)
+#define GICV5_GIC_CDAFF_TYPE_MASK GENMASK_ULL(31, 29)
+#define GICV5_GIC_CDAFF_IRM_MASK BIT_ULL(28)
+#define GICV5_GIC_CDAFF_ID_MASK GENMASK_ULL(23, 0)
+
+/* Definitions for GIC CDDI */
+#define GICV5_GIC_CDDI_TYPE_MASK GENMASK_ULL(31, 29)
+#define GICV5_GIC_CDDI_ID_MASK GENMASK_ULL(23, 0)
+
+/* Definitions for GIC CDDIS */
+#define GICV5_GIC_CDDIS_TYPE_MASK GENMASK_ULL(31, 29)
+#define GICV5_GIC_CDDIS_TYPE(r) FIELD_GET(GICV5_GIC_CDDIS_TYPE_MASK, r)
+#define GICV5_GIC_CDDIS_ID_MASK GENMASK_ULL(23, 0)
+#define GICV5_GIC_CDDIS_ID(r) FIELD_GET(GICV5_GIC_CDDIS_ID_MASK, r)
+
+/* Definitions for GIC CDEN */
+#define GICV5_GIC_CDEN_TYPE_MASK GENMASK_ULL(31, 29)
+#define GICV5_GIC_CDEN_ID_MASK GENMASK_ULL(23, 0)
+
+/* Definitions for GIC CDHM */
+#define GICV5_GIC_CDHM_HM_MASK BIT_ULL(32)
+#define GICV5_GIC_CDHM_TYPE_MASK GENMASK_ULL(31, 29)
+#define GICV5_GIC_CDHM_ID_MASK GENMASK_ULL(23, 0)
+
+/* Definitions for GIC CDPEND */
+#define GICV5_GIC_CDPEND_PENDING_MASK BIT_ULL(32)
+#define GICV5_GIC_CDPEND_TYPE_MASK GENMASK_ULL(31, 29)
+#define GICV5_GIC_CDPEND_ID_MASK GENMASK_ULL(23, 0)
+
+/* Definitions for GIC CDPRI */
+#define GICV5_GIC_CDPRI_PRIORITY_MASK GENMASK_ULL(39, 35)
+#define GICV5_GIC_CDPRI_TYPE_MASK GENMASK_ULL(31, 29)
+#define GICV5_GIC_CDPRI_ID_MASK GENMASK_ULL(23, 0)
+
+/* Definitions for GIC CDRCFG */
+#define GICV5_GIC_CDRCFG_TYPE_MASK GENMASK_ULL(31, 29)
+#define GICV5_GIC_CDRCFG_ID_MASK GENMASK_ULL(23, 0)
+
+/* Definitions for GICR CDIA */
+#define GICV5_GIC_CDIA_VALID_MASK BIT_ULL(32)
+#define GICV5_GICR_CDIA_VALID(r) FIELD_GET(GICV5_GIC_CDIA_VALID_MASK, r)
+#define GICV5_GIC_CDIA_TYPE_MASK GENMASK_ULL(31, 29)
+#define GICV5_GIC_CDIA_ID_MASK GENMASK_ULL(23, 0)
+
+#define gicr_insn(insn) read_sysreg_s(GICV5_OP_GICR_##insn)
+#define gic_insn(v, insn) write_sysreg_s(v, GICV5_OP_GIC_##insn)
#define ARM64_FEATURE_FIELD_BITS 4
diff --git a/arch/arm64/include/asm/system_misc.h b/arch/arm64/include/asm/system_misc.h
index c34344256762..344b1c1a4bbb 100644
--- a/arch/arm64/include/asm/system_misc.h
+++ b/arch/arm64/include/asm/system_misc.h
@@ -25,10 +25,6 @@ void arm64_notify_die(const char *str, struct pt_regs *regs,
int signo, int sicode, unsigned long far,
unsigned long err);
-void hook_debug_fault_code(int nr, int (*fn)(unsigned long, unsigned long,
- struct pt_regs *),
- int sig, int code, const char *name);
-
struct mm_struct;
extern void __show_regs(struct pt_regs *);
diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
index 1269c2487574..f241b8601ebd 100644
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@ -70,6 +70,7 @@ void arch_setup_new_exec(void);
#define TIF_SYSCALL_TRACEPOINT 10 /* syscall tracepoint for ftrace */
#define TIF_SECCOMP 11 /* syscall secure computing */
#define TIF_SYSCALL_EMU 12 /* syscall emulation active */
+#define TIF_PATCH_PENDING 13 /* pending live patching update */
#define TIF_MEMDIE 18 /* is terminating due to OOM killer */
#define TIF_FREEZE 19
#define TIF_RESTORE_SIGMASK 20
@@ -96,6 +97,7 @@ void arch_setup_new_exec(void);
#define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT)
#define _TIF_SECCOMP (1 << TIF_SECCOMP)
#define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU)
+#define _TIF_PATCH_PENDING (1 << TIF_PATCH_PENDING)
#define _TIF_UPROBE (1 << TIF_UPROBE)
#define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP)
#define _TIF_32BIT (1 << TIF_32BIT)
@@ -107,7 +109,8 @@ void arch_setup_new_exec(void);
#define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY | \
_TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE | \
_TIF_UPROBE | _TIF_MTE_ASYNC_FAULT | \
- _TIF_NOTIFY_SIGNAL | _TIF_SIGPENDING)
+ _TIF_NOTIFY_SIGNAL | _TIF_SIGPENDING | \
+ _TIF_PATCH_PENDING)
#define _TIF_SYSCALL_WORK (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \
_TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP | \
diff --git a/arch/arm64/include/asm/traps.h b/arch/arm64/include/asm/traps.h
index 82cf1f879c61..e3e8944a71c3 100644
--- a/arch/arm64/include/asm/traps.h
+++ b/arch/arm64/include/asm/traps.h
@@ -29,6 +29,12 @@ void arm64_force_sig_fault_pkey(unsigned long far, const char *str, int pkey);
void arm64_force_sig_mceerr(int code, unsigned long far, short lsb, const char *str);
void arm64_force_sig_ptrace_errno_trap(int errno, unsigned long far, const char *str);
+int bug_brk_handler(struct pt_regs *regs, unsigned long esr);
+int cfi_brk_handler(struct pt_regs *regs, unsigned long esr);
+int reserved_fault_brk_handler(struct pt_regs *regs, unsigned long esr);
+int kasan_brk_handler(struct pt_regs *regs, unsigned long esr);
+int ubsan_brk_handler(struct pt_regs *regs, unsigned long esr);
+
int early_brk64(unsigned long addr, unsigned long esr, struct pt_regs *regs);
/*
diff --git a/arch/arm64/include/asm/uprobes.h b/arch/arm64/include/asm/uprobes.h
index 014b02897f8e..89bfb0213a50 100644
--- a/arch/arm64/include/asm/uprobes.h
+++ b/arch/arm64/include/asm/uprobes.h
@@ -28,4 +28,15 @@ struct arch_uprobe {
bool simulate;
};
+int uprobe_brk_handler(struct pt_regs *regs, unsigned long esr);
+#ifdef CONFIG_UPROBES
+int uprobe_single_step_handler(struct pt_regs *regs, unsigned long esr);
+#else
+static inline int uprobe_single_step_handler(struct pt_regs *regs,
+ unsigned long esr)
+{
+ return DBG_HOOK_ERROR;
+}
+#endif
+
#endif
diff --git a/arch/arm64/include/asm/vncr_mapping.h b/arch/arm64/include/asm/vncr_mapping.h
index 6f556e993644..f6ec500ad3fa 100644
--- a/arch/arm64/include/asm/vncr_mapping.h
+++ b/arch/arm64/include/asm/vncr_mapping.h
@@ -51,6 +51,7 @@
#define VNCR_SP_EL1 0x240
#define VNCR_VBAR_EL1 0x250
#define VNCR_TCR2_EL1 0x270
+#define VNCR_SCTLR2_EL1 0x278
#define VNCR_PIRE0_EL1 0x290
#define VNCR_PIR_EL1 0x2A0
#define VNCR_POR_EL1 0x2A8
@@ -84,6 +85,7 @@
#define VNCR_ICH_HCR_EL2 0x4C0
#define VNCR_ICH_VMCR_EL2 0x4C8
#define VNCR_VDISR_EL2 0x500
+#define VNCR_VSESR_EL2 0x508
#define VNCR_PMBLIMITR_EL1 0x800
#define VNCR_PMBPTR_EL1 0x810
#define VNCR_PMBSR_EL1 0x820
diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h
index 705a7afa8e58..72c78468b806 100644
--- a/arch/arm64/include/uapi/asm/hwcap.h
+++ b/arch/arm64/include/uapi/asm/hwcap.h
@@ -143,5 +143,7 @@
/*
* HWCAP3 flags - for AT_HWCAP3
*/
+#define HWCAP3_MTE_FAR (1UL << 0)
+#define HWCAP3_MTE_STORE_ONLY (1UL << 1)
#endif /* _UAPI__ASM_HWCAP_H */
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index a2faf0049dab..76f32e424065 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -80,7 +80,7 @@ obj-y += head.o
always-$(KBUILD_BUILTIN) += vmlinux.lds
ifeq ($(CONFIG_DEBUG_EFI),y)
-AFLAGS_head.o += -DVMLINUX_PATH="\"$(realpath $(objtree)/vmlinux)\""
+AFLAGS_head.o += -DVMLINUX_PATH="\"$(abspath vmlinux)\""
endif
# for cleaning
diff --git a/arch/arm64/kernel/acpi.c b/arch/arm64/kernel/acpi.c
index b9a66fc146c9..4d529ff7ba51 100644
--- a/arch/arm64/kernel/acpi.c
+++ b/arch/arm64/kernel/acpi.c
@@ -197,6 +197,8 @@ out:
*/
void __init acpi_boot_table_init(void)
{
+ int ret;
+
/*
* Enable ACPI instead of device tree unless
* - ACPI has been disabled explicitly (acpi=off), or
@@ -250,10 +252,12 @@ done:
* behaviour, use acpi=nospcr to disable console in ACPI SPCR
* table as default serial console.
*/
- acpi_parse_spcr(earlycon_acpi_spcr_enable,
+ ret = acpi_parse_spcr(earlycon_acpi_spcr_enable,
!param_acpi_nospcr);
- pr_info("Use ACPI SPCR as default console: %s\n",
- param_acpi_nospcr ? "No" : "Yes");
+ if (!ret || param_acpi_nospcr || !IS_ENABLED(CONFIG_ACPI_SPCR_TABLE))
+ pr_info("Use ACPI SPCR as default console: No\n");
+ else
+ pr_info("Use ACPI SPCR as default console: Yes\n");
if (IS_ENABLED(CONFIG_ACPI_BGRT))
acpi_table_parse(ACPI_SIG_BGRT, acpi_parse_bgrt);
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index e151585c6cca..9ad065f15f1d 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -303,6 +303,7 @@ static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = {
};
static const struct arm64_ftr_bits ftr_id_aa64pfr1[] = {
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_EL1_DF2_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_GCS),
FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_EL1_GCS_SHIFT, 4, 0),
S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_EL1_MTE_frac_SHIFT, 4, 0),
@@ -320,6 +321,8 @@ static const struct arm64_ftr_bits ftr_id_aa64pfr1[] = {
static const struct arm64_ftr_bits ftr_id_aa64pfr2[] = {
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR2_EL1_FPMR_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR2_EL1_MTEFAR_SHIFT, 4, ID_AA64PFR2_EL1_MTEFAR_NI),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR2_EL1_MTESTOREONLY_SHIFT, 4, ID_AA64PFR2_EL1_MTESTOREONLY_NI),
ARM64_FTR_END,
};
@@ -500,6 +503,7 @@ static const struct arm64_ftr_bits ftr_id_aa64mmfr3[] = {
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_POE),
FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR3_EL1_S1POE_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR3_EL1_S1PIE_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR3_EL1_SCTLRX_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR3_EL1_TCRX_SHIFT, 4, 0),
ARM64_FTR_END,
};
@@ -2213,6 +2217,38 @@ static bool hvhe_possible(const struct arm64_cpu_capabilities *entry,
return arm64_test_sw_feature_override(ARM64_SW_FEATURE_OVERRIDE_HVHE);
}
+static bool has_bbml2_noabort(const struct arm64_cpu_capabilities *caps, int scope)
+{
+ /*
+ * We want to allow usage of BBML2 in as wide a range of kernel contexts
+ * as possible. This list is therefore an allow-list of known-good
+ * implementations that both support BBML2 and additionally, fulfill the
+ * extra constraint of never generating TLB conflict aborts when using
+ * the relaxed BBML2 semantics (such aborts make use of BBML2 in certain
+ * kernel contexts difficult to prove safe against recursive aborts).
+ *
+ * Note that implementations can only be considered "known-good" if their
+ * implementors attest to the fact that the implementation never raises
+ * TLB conflict aborts for BBML2 mapping granularity changes.
+ */
+ static const struct midr_range supports_bbml2_noabort_list[] = {
+ MIDR_REV_RANGE(MIDR_CORTEX_X4, 0, 3, 0xf),
+ MIDR_REV_RANGE(MIDR_NEOVERSE_V3, 0, 2, 0xf),
+ {}
+ };
+
+ /* Does our cpu guarantee to never raise TLB conflict aborts? */
+ if (!is_midr_in_range_list(supports_bbml2_noabort_list))
+ return false;
+
+ /*
+ * We currently ignore the ID_AA64MMFR2_EL1 register, and only care
+ * about whether the MIDR check passes.
+ */
+
+ return true;
+}
+
#ifdef CONFIG_ARM64_PAN
static void cpu_enable_pan(const struct arm64_cpu_capabilities *__unused)
{
@@ -2296,11 +2332,11 @@ static bool can_use_gic_priorities(const struct arm64_cpu_capabilities *entry,
int scope)
{
/*
- * ARM64_HAS_GIC_CPUIF_SYSREGS has a lower index, and is a boot CPU
+ * ARM64_HAS_GICV3_CPUIF has a lower index, and is a boot CPU
* feature, so will be detected earlier.
*/
- BUILD_BUG_ON(ARM64_HAS_GIC_PRIO_MASKING <= ARM64_HAS_GIC_CPUIF_SYSREGS);
- if (!cpus_have_cap(ARM64_HAS_GIC_CPUIF_SYSREGS))
+ BUILD_BUG_ON(ARM64_HAS_GIC_PRIO_MASKING <= ARM64_HAS_GICV3_CPUIF);
+ if (!cpus_have_cap(ARM64_HAS_GICV3_CPUIF))
return false;
return enable_pseudo_nmi;
@@ -2496,8 +2532,8 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.matches = has_always,
},
{
- .desc = "GIC system register CPU interface",
- .capability = ARM64_HAS_GIC_CPUIF_SYSREGS,
+ .desc = "GICv3 CPU interface",
+ .capability = ARM64_HAS_GICV3_CPUIF,
.type = ARM64_CPUCAP_STRICT_BOOT_CPU_FEATURE,
.matches = has_useable_gicv3_cpuif,
ARM64_CPUID_FIELDS(ID_AA64PFR0_EL1, GIC, IMP)
@@ -2874,6 +2910,20 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.matches = has_cpuid_feature,
ARM64_CPUID_FIELDS(ID_AA64PFR1_EL1, MTE, MTE3)
},
+ {
+ .desc = "FAR on MTE Tag Check Fault",
+ .capability = ARM64_MTE_FAR,
+ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+ .matches = has_cpuid_feature,
+ ARM64_CPUID_FIELDS(ID_AA64PFR2_EL1, MTEFAR, IMP)
+ },
+ {
+ .desc = "Store Only MTE Tag Check",
+ .capability = ARM64_MTE_STORE_ONLY,
+ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+ .matches = has_cpuid_feature,
+ ARM64_CPUID_FIELDS(ID_AA64PFR2_EL1, MTESTOREONLY, IMP)
+ },
#endif /* CONFIG_ARM64_MTE */
{
.desc = "RCpc load-acquire (LDAPR)",
@@ -2981,6 +3031,12 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
ARM64_CPUID_FIELDS(ID_AA64MMFR2_EL1, EVT, IMP)
},
{
+ .desc = "BBM Level 2 without TLB conflict abort",
+ .capability = ARM64_HAS_BBML2_NOABORT,
+ .type = ARM64_CPUCAP_EARLY_LOCAL_CPU_FEATURE,
+ .matches = has_bbml2_noabort,
+ },
+ {
.desc = "52-bit Virtual Addressing for KVM (LPA2)",
.capability = ARM64_HAS_LPA2,
.type = ARM64_CPUCAP_SYSTEM_FEATURE,
@@ -3061,6 +3117,20 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.matches = has_pmuv3,
},
#endif
+ {
+ .desc = "SCTLR2",
+ .capability = ARM64_HAS_SCTLR2,
+ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+ .matches = has_cpuid_feature,
+ ARM64_CPUID_FIELDS(ID_AA64MMFR3_EL1, SCTLRX, IMP)
+ },
+ {
+ .desc = "GICv5 CPU interface",
+ .type = ARM64_CPUCAP_STRICT_BOOT_CPU_FEATURE,
+ .capability = ARM64_HAS_GICV5_CPUIF,
+ .matches = has_cpuid_feature,
+ ARM64_CPUID_FIELDS(ID_AA64PFR2_EL1, GCIE, IMP)
+ },
{},
};
@@ -3218,6 +3288,8 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
#ifdef CONFIG_ARM64_MTE
HWCAP_CAP(ID_AA64PFR1_EL1, MTE, MTE2, CAP_HWCAP, KERNEL_HWCAP_MTE),
HWCAP_CAP(ID_AA64PFR1_EL1, MTE, MTE3, CAP_HWCAP, KERNEL_HWCAP_MTE3),
+ HWCAP_CAP(ID_AA64PFR2_EL1, MTEFAR, IMP, CAP_HWCAP, KERNEL_HWCAP_MTE_FAR),
+ HWCAP_CAP(ID_AA64PFR2_EL1, MTESTOREONLY, IMP, CAP_HWCAP , KERNEL_HWCAP_MTE_STORE_ONLY),
#endif /* CONFIG_ARM64_MTE */
HWCAP_CAP(ID_AA64MMFR0_EL1, ECV, IMP, CAP_HWCAP, KERNEL_HWCAP_ECV),
HWCAP_CAP(ID_AA64MMFR1_EL1, AFP, IMP, CAP_HWCAP, KERNEL_HWCAP_AFP),
@@ -3377,18 +3449,49 @@ static void update_cpu_capabilities(u16 scope_mask)
scope_mask &= ARM64_CPUCAP_SCOPE_MASK;
for (i = 0; i < ARM64_NCAPS; i++) {
+ bool match_all = false;
+ bool caps_set = false;
+ bool boot_cpu = false;
+
caps = cpucap_ptrs[i];
- if (!caps || !(caps->type & scope_mask) ||
- cpus_have_cap(caps->capability) ||
- !caps->matches(caps, cpucap_default_scope(caps)))
+ if (!caps || !(caps->type & scope_mask))
continue;
- if (caps->desc && !caps->cpus)
+ match_all = cpucap_match_all_early_cpus(caps);
+ caps_set = cpus_have_cap(caps->capability);
+ boot_cpu = scope_mask & SCOPE_BOOT_CPU;
+
+ /*
+ * Unless it's a match-all CPUs feature, avoid probing if
+ * already detected.
+ */
+ if (!match_all && caps_set)
+ continue;
+
+ /*
+ * A match-all CPUs capability is only set when probing the
+ * boot CPU. It may be cleared subsequently if not detected on
+ * secondary ones.
+ */
+ if (match_all && !caps_set && !boot_cpu)
+ continue;
+
+ if (!caps->matches(caps, cpucap_default_scope(caps))) {
+ if (match_all)
+ __clear_bit(caps->capability, system_cpucaps);
+ continue;
+ }
+
+ /*
+ * Match-all CPUs capabilities are logged later when the
+ * system capabilities are finalised.
+ */
+ if (!match_all && caps->desc && !caps->cpus)
pr_info("detected: %s\n", caps->desc);
__set_bit(caps->capability, system_cpucaps);
- if ((scope_mask & SCOPE_BOOT_CPU) && (caps->type & SCOPE_BOOT_CPU))
+ if (boot_cpu && (caps->type & SCOPE_BOOT_CPU))
set_bit(caps->capability, boot_cpucaps);
}
}
@@ -3789,17 +3892,24 @@ static void __init setup_system_capabilities(void)
enable_cpu_capabilities(SCOPE_ALL & ~SCOPE_BOOT_CPU);
apply_alternatives_all();
- /*
- * Log any cpucaps with a cpumask as these aren't logged by
- * update_cpu_capabilities().
- */
for (int i = 0; i < ARM64_NCAPS; i++) {
const struct arm64_cpu_capabilities *caps = cpucap_ptrs[i];
- if (caps && caps->cpus && caps->desc &&
- cpumask_any(caps->cpus) < nr_cpu_ids)
+ if (!caps || !caps->desc)
+ continue;
+
+ /*
+ * Log any cpucaps with a cpumask as these aren't logged by
+ * update_cpu_capabilities().
+ */
+ if (caps->cpus && cpumask_any(caps->cpus) < nr_cpu_ids)
pr_info("detected: %s on CPU%*pbl\n",
caps->desc, cpumask_pr_args(caps->cpus));
+
+ /* Log match-all CPUs capabilities */
+ if (cpucap_match_all_early_cpus(caps) &&
+ cpus_have_cap(caps->capability))
+ pr_info("detected: %s\n", caps->desc);
}
/*
diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c
index c1f2b6b04b41..ba834909a28b 100644
--- a/arch/arm64/kernel/cpuinfo.c
+++ b/arch/arm64/kernel/cpuinfo.c
@@ -160,6 +160,8 @@ static const char *const hwcap_str[] = {
[KERNEL_HWCAP_SME_SFEXPA] = "smesfexpa",
[KERNEL_HWCAP_SME_STMOP] = "smestmop",
[KERNEL_HWCAP_SME_SMOP4] = "smesmop4",
+ [KERNEL_HWCAP_MTE_FAR] = "mtefar",
+ [KERNEL_HWCAP_MTE_STORE_ONLY] = "mtestoreonly",
};
#ifdef CONFIG_COMPAT
diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c
index 58f047de3e1c..110d9ff54174 100644
--- a/arch/arm64/kernel/debug-monitors.c
+++ b/arch/arm64/kernel/debug-monitors.c
@@ -21,8 +21,12 @@
#include <asm/cputype.h>
#include <asm/daifflags.h>
#include <asm/debug-monitors.h>
+#include <asm/exception.h>
+#include <asm/kgdb.h>
+#include <asm/kprobes.h>
#include <asm/system_misc.h>
#include <asm/traps.h>
+#include <asm/uprobes.h>
/* Determine debug architecture. */
u8 debug_monitors_arch(void)
@@ -34,7 +38,7 @@ u8 debug_monitors_arch(void)
/*
* MDSCR access routines.
*/
-static void mdscr_write(u32 mdscr)
+static void mdscr_write(u64 mdscr)
{
unsigned long flags;
flags = local_daif_save();
@@ -43,7 +47,7 @@ static void mdscr_write(u32 mdscr)
}
NOKPROBE_SYMBOL(mdscr_write);
-static u32 mdscr_read(void)
+static u64 mdscr_read(void)
{
return read_sysreg(mdscr_el1);
}
@@ -79,16 +83,16 @@ static DEFINE_PER_CPU(int, kde_ref_count);
void enable_debug_monitors(enum dbg_active_el el)
{
- u32 mdscr, enable = 0;
+ u64 mdscr, enable = 0;
WARN_ON(preemptible());
if (this_cpu_inc_return(mde_ref_count) == 1)
- enable = DBG_MDSCR_MDE;
+ enable = MDSCR_EL1_MDE;
if (el == DBG_ACTIVE_EL1 &&
this_cpu_inc_return(kde_ref_count) == 1)
- enable |= DBG_MDSCR_KDE;
+ enable |= MDSCR_EL1_KDE;
if (enable && debug_enabled) {
mdscr = mdscr_read();
@@ -100,16 +104,16 @@ NOKPROBE_SYMBOL(enable_debug_monitors);
void disable_debug_monitors(enum dbg_active_el el)
{
- u32 mdscr, disable = 0;
+ u64 mdscr, disable = 0;
WARN_ON(preemptible());
if (this_cpu_dec_return(mde_ref_count) == 0)
- disable = ~DBG_MDSCR_MDE;
+ disable = ~MDSCR_EL1_MDE;
if (el == DBG_ACTIVE_EL1 &&
this_cpu_dec_return(kde_ref_count) == 0)
- disable &= ~DBG_MDSCR_KDE;
+ disable &= ~MDSCR_EL1_KDE;
if (disable) {
mdscr = mdscr_read();
@@ -156,74 +160,6 @@ NOKPROBE_SYMBOL(clear_user_regs_spsr_ss);
#define set_regs_spsr_ss(r) set_user_regs_spsr_ss(&(r)->user_regs)
#define clear_regs_spsr_ss(r) clear_user_regs_spsr_ss(&(r)->user_regs)
-static DEFINE_SPINLOCK(debug_hook_lock);
-static LIST_HEAD(user_step_hook);
-static LIST_HEAD(kernel_step_hook);
-
-static void register_debug_hook(struct list_head *node, struct list_head *list)
-{
- spin_lock(&debug_hook_lock);
- list_add_rcu(node, list);
- spin_unlock(&debug_hook_lock);
-
-}
-
-static void unregister_debug_hook(struct list_head *node)
-{
- spin_lock(&debug_hook_lock);
- list_del_rcu(node);
- spin_unlock(&debug_hook_lock);
- synchronize_rcu();
-}
-
-void register_user_step_hook(struct step_hook *hook)
-{
- register_debug_hook(&hook->node, &user_step_hook);
-}
-
-void unregister_user_step_hook(struct step_hook *hook)
-{
- unregister_debug_hook(&hook->node);
-}
-
-void register_kernel_step_hook(struct step_hook *hook)
-{
- register_debug_hook(&hook->node, &kernel_step_hook);
-}
-
-void unregister_kernel_step_hook(struct step_hook *hook)
-{
- unregister_debug_hook(&hook->node);
-}
-
-/*
- * Call registered single step handlers
- * There is no Syndrome info to check for determining the handler.
- * So we call all the registered handlers, until the right handler is
- * found which returns zero.
- */
-static int call_step_hook(struct pt_regs *regs, unsigned long esr)
-{
- struct step_hook *hook;
- struct list_head *list;
- int retval = DBG_HOOK_ERROR;
-
- list = user_mode(regs) ? &user_step_hook : &kernel_step_hook;
-
- /*
- * Since single-step exception disables interrupt, this function is
- * entirely not preemptible, and we can use rcu list safely here.
- */
- list_for_each_entry_rcu(hook, list, node) {
- retval = hook->fn(regs, esr);
- if (retval == DBG_HOOK_HANDLED)
- break;
- }
-
- return retval;
-}
-NOKPROBE_SYMBOL(call_step_hook);
-
static void send_user_sigtrap(int si_code)
{
struct pt_regs *regs = current_pt_regs();
@@ -238,105 +174,110 @@ static void send_user_sigtrap(int si_code)
"User debug trap");
}
-static int single_step_handler(unsigned long unused, unsigned long esr,
- struct pt_regs *regs)
+/*
+ * We have already unmasked interrupts and enabled preemption
+ * when calling do_el0_softstep() from entry-common.c.
+ */
+void do_el0_softstep(unsigned long esr, struct pt_regs *regs)
{
- bool handler_found = false;
+ if (uprobe_single_step_handler(regs, esr) == DBG_HOOK_HANDLED)
+ return;
+ send_user_sigtrap(TRAP_TRACE);
/*
- * If we are stepping a pending breakpoint, call the hw_breakpoint
- * handler first.
+ * ptrace will disable single step unless explicitly
+ * asked to re-enable it. For other clients, it makes
+ * sense to leave it enabled (i.e. rewind the controls
+ * to the active-not-pending state).
*/
- if (!reinstall_suspended_bps(regs))
- return 0;
-
- if (!handler_found && call_step_hook(regs, esr) == DBG_HOOK_HANDLED)
- handler_found = true;
-
- if (!handler_found && user_mode(regs)) {
- send_user_sigtrap(TRAP_TRACE);
-
- /*
- * ptrace will disable single step unless explicitly
- * asked to re-enable it. For other clients, it makes
- * sense to leave it enabled (i.e. rewind the controls
- * to the active-not-pending state).
- */
- user_rewind_single_step(current);
- } else if (!handler_found) {
- pr_warn("Unexpected kernel single-step exception at EL1\n");
- /*
- * Re-enable stepping since we know that we will be
- * returning to regs.
- */
- set_regs_spsr_ss(regs);
- }
-
- return 0;
+ user_rewind_single_step(current);
}
-NOKPROBE_SYMBOL(single_step_handler);
-
-static LIST_HEAD(user_break_hook);
-static LIST_HEAD(kernel_break_hook);
-void register_user_break_hook(struct break_hook *hook)
+void do_el1_softstep(unsigned long esr, struct pt_regs *regs)
{
- register_debug_hook(&hook->node, &user_break_hook);
-}
+ if (kgdb_single_step_handler(regs, esr) == DBG_HOOK_HANDLED)
+ return;
-void unregister_user_break_hook(struct break_hook *hook)
-{
- unregister_debug_hook(&hook->node);
+ pr_warn("Unexpected kernel single-step exception at EL1\n");
+ /*
+ * Re-enable stepping since we know that we will be
+ * returning to regs.
+ */
+ set_regs_spsr_ss(regs);
}
+NOKPROBE_SYMBOL(do_el1_softstep);
-void register_kernel_break_hook(struct break_hook *hook)
+static int call_el1_break_hook(struct pt_regs *regs, unsigned long esr)
{
- register_debug_hook(&hook->node, &kernel_break_hook);
-}
+ if (esr_brk_comment(esr) == BUG_BRK_IMM)
+ return bug_brk_handler(regs, esr);
-void unregister_kernel_break_hook(struct break_hook *hook)
-{
- unregister_debug_hook(&hook->node);
-}
+ if (IS_ENABLED(CONFIG_CFI_CLANG) && esr_is_cfi_brk(esr))
+ return cfi_brk_handler(regs, esr);
-static int call_break_hook(struct pt_regs *regs, unsigned long esr)
-{
- struct break_hook *hook;
- struct list_head *list;
+ if (esr_brk_comment(esr) == FAULT_BRK_IMM)
+ return reserved_fault_brk_handler(regs, esr);
- list = user_mode(regs) ? &user_break_hook : &kernel_break_hook;
+ if (IS_ENABLED(CONFIG_KASAN_SW_TAGS) &&
+ (esr_brk_comment(esr) & ~KASAN_BRK_MASK) == KASAN_BRK_IMM)
+ return kasan_brk_handler(regs, esr);
- /*
- * Since brk exception disables interrupt, this function is
- * entirely not preemptible, and we can use rcu list safely here.
- */
- list_for_each_entry_rcu(hook, list, node) {
- if ((esr_brk_comment(esr) & ~hook->mask) == hook->imm)
- return hook->fn(regs, esr);
+ if (IS_ENABLED(CONFIG_UBSAN_TRAP) && esr_is_ubsan_brk(esr))
+ return ubsan_brk_handler(regs, esr);
+
+ if (IS_ENABLED(CONFIG_KGDB)) {
+ if (esr_brk_comment(esr) == KGDB_DYN_DBG_BRK_IMM)
+ return kgdb_brk_handler(regs, esr);
+ if (esr_brk_comment(esr) == KGDB_COMPILED_DBG_BRK_IMM)
+ return kgdb_compiled_brk_handler(regs, esr);
+ }
+
+ if (IS_ENABLED(CONFIG_KPROBES)) {
+ if (esr_brk_comment(esr) == KPROBES_BRK_IMM)
+ return kprobe_brk_handler(regs, esr);
+ if (esr_brk_comment(esr) == KPROBES_BRK_SS_IMM)
+ return kprobe_ss_brk_handler(regs, esr);
}
+ if (IS_ENABLED(CONFIG_KRETPROBES) &&
+ esr_brk_comment(esr) == KRETPROBES_BRK_IMM)
+ return kretprobe_brk_handler(regs, esr);
+
return DBG_HOOK_ERROR;
}
-NOKPROBE_SYMBOL(call_break_hook);
+NOKPROBE_SYMBOL(call_el1_break_hook);
-static int brk_handler(unsigned long unused, unsigned long esr,
- struct pt_regs *regs)
+/*
+ * We have already unmasked interrupts and enabled preemption
+ * when calling do_el0_brk64() from entry-common.c.
+ */
+void do_el0_brk64(unsigned long esr, struct pt_regs *regs)
{
- if (call_break_hook(regs, esr) == DBG_HOOK_HANDLED)
- return 0;
+ if (IS_ENABLED(CONFIG_UPROBES) &&
+ esr_brk_comment(esr) == UPROBES_BRK_IMM &&
+ uprobe_brk_handler(regs, esr) == DBG_HOOK_HANDLED)
+ return;
- if (user_mode(regs)) {
- send_user_sigtrap(TRAP_BRKPT);
- } else {
- pr_warn("Unexpected kernel BRK exception at EL1\n");
- return -EFAULT;
- }
+ send_user_sigtrap(TRAP_BRKPT);
+}
- return 0;
+void do_el1_brk64(unsigned long esr, struct pt_regs *regs)
+{
+ if (call_el1_break_hook(regs, esr) == DBG_HOOK_HANDLED)
+ return;
+
+ die("Oops - BRK", regs, esr);
+}
+NOKPROBE_SYMBOL(do_el1_brk64);
+
+#ifdef CONFIG_COMPAT
+void do_bkpt32(unsigned long esr, struct pt_regs *regs)
+{
+ arm64_notify_die("aarch32 BKPT", regs, SIGTRAP, TRAP_BRKPT, regs->pc, esr);
}
-NOKPROBE_SYMBOL(brk_handler);
+#endif /* CONFIG_COMPAT */
-int aarch32_break_handler(struct pt_regs *regs)
+bool try_handle_aarch32_break(struct pt_regs *regs)
{
u32 arm_instr;
u16 thumb_instr;
@@ -344,7 +285,7 @@ int aarch32_break_handler(struct pt_regs *regs)
void __user *pc = (void __user *)instruction_pointer(regs);
if (!compat_user_mode(regs))
- return -EFAULT;
+ return false;
if (compat_thumb_mode(regs)) {
/* get 16-bit Thumb instruction */
@@ -368,20 +309,12 @@ int aarch32_break_handler(struct pt_regs *regs)
}
if (!bp)
- return -EFAULT;
+ return false;
send_user_sigtrap(TRAP_BRKPT);
- return 0;
-}
-NOKPROBE_SYMBOL(aarch32_break_handler);
-
-void __init debug_traps_init(void)
-{
- hook_debug_fault_code(DBG_ESR_EVT_HWSS, single_step_handler, SIGTRAP,
- TRAP_TRACE, "single-step handler");
- hook_debug_fault_code(DBG_ESR_EVT_BRK, brk_handler, SIGTRAP,
- TRAP_BRKPT, "BRK handler");
+ return true;
}
+NOKPROBE_SYMBOL(try_handle_aarch32_break);
/* Re-enable single step for syscall restarting. */
void user_rewind_single_step(struct task_struct *task)
@@ -415,7 +348,7 @@ void kernel_enable_single_step(struct pt_regs *regs)
{
WARN_ON(!irqs_disabled());
set_regs_spsr_ss(regs);
- mdscr_write(mdscr_read() | DBG_MDSCR_SS);
+ mdscr_write(mdscr_read() | MDSCR_EL1_SS);
enable_debug_monitors(DBG_ACTIVE_EL1);
}
NOKPROBE_SYMBOL(kernel_enable_single_step);
@@ -423,7 +356,7 @@ NOKPROBE_SYMBOL(kernel_enable_single_step);
void kernel_disable_single_step(void)
{
WARN_ON(!irqs_disabled());
- mdscr_write(mdscr_read() & ~DBG_MDSCR_SS);
+ mdscr_write(mdscr_read() & ~MDSCR_EL1_SS);
disable_debug_monitors(DBG_ACTIVE_EL1);
}
NOKPROBE_SYMBOL(kernel_disable_single_step);
@@ -431,7 +364,7 @@ NOKPROBE_SYMBOL(kernel_disable_single_step);
int kernel_active_single_step(void)
{
WARN_ON(!irqs_disabled());
- return mdscr_read() & DBG_MDSCR_SS;
+ return mdscr_read() & MDSCR_EL1_SS;
}
NOKPROBE_SYMBOL(kernel_active_single_step);
diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c
index 62230d6dd919..6c371b158b99 100644
--- a/arch/arm64/kernel/efi.c
+++ b/arch/arm64/kernel/efi.c
@@ -215,11 +215,6 @@ static int __init arm64_efi_rt_init(void)
if (!efi_enabled(EFI_RUNTIME_SERVICES))
return 0;
- if (!IS_ENABLED(CONFIG_VMAP_STACK)) {
- clear_bit(EFI_RUNTIME_SERVICES, &efi.flags);
- return -ENOMEM;
- }
-
p = arch_alloc_vmap_stack(THREAD_SIZE, NUMA_NO_NODE);
if (!p) {
pr_warn("Failed to allocate EFI runtime stack\n");
diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c
index 7c1970b341b8..2b0c5925502e 100644
--- a/arch/arm64/kernel/entry-common.c
+++ b/arch/arm64/kernel/entry-common.c
@@ -8,6 +8,7 @@
#include <linux/context_tracking.h>
#include <linux/kasan.h>
#include <linux/linkage.h>
+#include <linux/livepatch.h>
#include <linux/lockdep.h>
#include <linux/ptrace.h>
#include <linux/resume_user_mode.h>
@@ -144,6 +145,9 @@ static void do_notify_resume(struct pt_regs *regs, unsigned long thread_flags)
(void __user *)NULL, current);
}
+ if (thread_flags & _TIF_PATCH_PENDING)
+ klp_update_patch_state(current);
+
if (thread_flags & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL))
do_signal(regs);
@@ -344,7 +348,7 @@ static DEFINE_PER_CPU(int, __in_cortex_a76_erratum_1463225_wa);
static void cortex_a76_erratum_1463225_svc_handler(void)
{
- u32 reg, val;
+ u64 reg, val;
if (!unlikely(test_thread_flag(TIF_SINGLESTEP)))
return;
@@ -354,7 +358,7 @@ static void cortex_a76_erratum_1463225_svc_handler(void)
__this_cpu_write(__in_cortex_a76_erratum_1463225_wa, 1);
reg = read_sysreg(mdscr_el1);
- val = reg | DBG_MDSCR_SS | DBG_MDSCR_KDE;
+ val = reg | MDSCR_EL1_SS | MDSCR_EL1_KDE;
write_sysreg(val, mdscr_el1);
asm volatile("msr daifclr, #8");
isb();
@@ -441,6 +445,28 @@ static __always_inline void fpsimd_syscall_exit(void)
__this_cpu_write(fpsimd_last_state.to_save, FP_STATE_CURRENT);
}
+/*
+ * In debug exception context, we explicitly disable preemption despite
+ * having interrupts disabled.
+ * This serves two purposes: it makes it much less likely that we would
+ * accidentally schedule in exception context and it will force a warning
+ * if we somehow manage to schedule by accident.
+ */
+static void debug_exception_enter(struct pt_regs *regs)
+{
+ preempt_disable();
+
+ /* This code is a bit fragile. Test it. */
+ RCU_LOCKDEP_WARN(!rcu_is_watching(), "exception_enter didn't work");
+}
+NOKPROBE_SYMBOL(debug_exception_enter);
+
+static void debug_exception_exit(struct pt_regs *regs)
+{
+ preempt_enable_no_resched();
+}
+NOKPROBE_SYMBOL(debug_exception_exit);
+
UNHANDLED(el1t, 64, sync)
UNHANDLED(el1t, 64, irq)
UNHANDLED(el1t, 64, fiq)
@@ -504,13 +530,51 @@ static void noinstr el1_mops(struct pt_regs *regs, unsigned long esr)
exit_to_kernel_mode(regs);
}
-static void noinstr el1_dbg(struct pt_regs *regs, unsigned long esr)
+static void noinstr el1_breakpt(struct pt_regs *regs, unsigned long esr)
+{
+ arm64_enter_el1_dbg(regs);
+ debug_exception_enter(regs);
+ do_breakpoint(esr, regs);
+ debug_exception_exit(regs);
+ arm64_exit_el1_dbg(regs);
+}
+
+static void noinstr el1_softstp(struct pt_regs *regs, unsigned long esr)
{
+ arm64_enter_el1_dbg(regs);
+ if (!cortex_a76_erratum_1463225_debug_handler(regs)) {
+ debug_exception_enter(regs);
+ /*
+ * After handling a breakpoint, we suspend the breakpoint
+ * and use single-step to move to the next instruction.
+ * If we are stepping a suspended breakpoint there's nothing more to do:
+ * the single-step is complete.
+ */
+ if (!try_step_suspended_breakpoints(regs))
+ do_el1_softstep(esr, regs);
+ debug_exception_exit(regs);
+ }
+ arm64_exit_el1_dbg(regs);
+}
+
+static void noinstr el1_watchpt(struct pt_regs *regs, unsigned long esr)
+{
+ /* Watchpoints are the only debug exception to write FAR_EL1 */
unsigned long far = read_sysreg(far_el1);
arm64_enter_el1_dbg(regs);
- if (!cortex_a76_erratum_1463225_debug_handler(regs))
- do_debug_exception(far, esr, regs);
+ debug_exception_enter(regs);
+ do_watchpoint(far, esr, regs);
+ debug_exception_exit(regs);
+ arm64_exit_el1_dbg(regs);
+}
+
+static void noinstr el1_brk64(struct pt_regs *regs, unsigned long esr)
+{
+ arm64_enter_el1_dbg(regs);
+ debug_exception_enter(regs);
+ do_el1_brk64(esr, regs);
+ debug_exception_exit(regs);
arm64_exit_el1_dbg(regs);
}
@@ -553,10 +617,16 @@ asmlinkage void noinstr el1h_64_sync_handler(struct pt_regs *regs)
el1_mops(regs, esr);
break;
case ESR_ELx_EC_BREAKPT_CUR:
+ el1_breakpt(regs, esr);
+ break;
case ESR_ELx_EC_SOFTSTP_CUR:
+ el1_softstp(regs, esr);
+ break;
case ESR_ELx_EC_WATCHPT_CUR:
+ el1_watchpt(regs, esr);
+ break;
case ESR_ELx_EC_BRK64:
- el1_dbg(regs, esr);
+ el1_brk64(regs, esr);
break;
case ESR_ELx_EC_FPAC:
el1_fpac(regs, esr);
@@ -747,14 +817,56 @@ static void noinstr el0_inv(struct pt_regs *regs, unsigned long esr)
exit_to_user_mode(regs);
}
-static void noinstr el0_dbg(struct pt_regs *regs, unsigned long esr)
+static void noinstr el0_breakpt(struct pt_regs *regs, unsigned long esr)
{
- /* Only watchpoints write FAR_EL1, otherwise its UNKNOWN */
+ if (!is_ttbr0_addr(regs->pc))
+ arm64_apply_bp_hardening();
+
+ enter_from_user_mode(regs);
+ debug_exception_enter(regs);
+ do_breakpoint(esr, regs);
+ debug_exception_exit(regs);
+ local_daif_restore(DAIF_PROCCTX);
+ exit_to_user_mode(regs);
+}
+
+static void noinstr el0_softstp(struct pt_regs *regs, unsigned long esr)
+{
+ if (!is_ttbr0_addr(regs->pc))
+ arm64_apply_bp_hardening();
+
+ enter_from_user_mode(regs);
+ /*
+ * After handling a breakpoint, we suspend the breakpoint
+ * and use single-step to move to the next instruction.
+ * If we are stepping a suspended breakpoint there's nothing more to do:
+ * the single-step is complete.
+ */
+ if (!try_step_suspended_breakpoints(regs)) {
+ local_daif_restore(DAIF_PROCCTX);
+ do_el0_softstep(esr, regs);
+ }
+ exit_to_user_mode(regs);
+}
+
+static void noinstr el0_watchpt(struct pt_regs *regs, unsigned long esr)
+{
+ /* Watchpoints are the only debug exception to write FAR_EL1 */
unsigned long far = read_sysreg(far_el1);
enter_from_user_mode(regs);
- do_debug_exception(far, esr, regs);
+ debug_exception_enter(regs);
+ do_watchpoint(far, esr, regs);
+ debug_exception_exit(regs);
+ local_daif_restore(DAIF_PROCCTX);
+ exit_to_user_mode(regs);
+}
+
+static void noinstr el0_brk64(struct pt_regs *regs, unsigned long esr)
+{
+ enter_from_user_mode(regs);
local_daif_restore(DAIF_PROCCTX);
+ do_el0_brk64(esr, regs);
exit_to_user_mode(regs);
}
@@ -826,10 +938,16 @@ asmlinkage void noinstr el0t_64_sync_handler(struct pt_regs *regs)
el0_gcs(regs, esr);
break;
case ESR_ELx_EC_BREAKPT_LOW:
+ el0_breakpt(regs, esr);
+ break;
case ESR_ELx_EC_SOFTSTP_LOW:
+ el0_softstp(regs, esr);
+ break;
case ESR_ELx_EC_WATCHPT_LOW:
+ el0_watchpt(regs, esr);
+ break;
case ESR_ELx_EC_BRK64:
- el0_dbg(regs, esr);
+ el0_brk64(regs, esr);
break;
case ESR_ELx_EC_FPAC:
el0_fpac(regs, esr);
@@ -912,6 +1030,14 @@ static void noinstr el0_svc_compat(struct pt_regs *regs)
exit_to_user_mode(regs);
}
+static void noinstr el0_bkpt32(struct pt_regs *regs, unsigned long esr)
+{
+ enter_from_user_mode(regs);
+ local_daif_restore(DAIF_PROCCTX);
+ do_bkpt32(esr, regs);
+ exit_to_user_mode(regs);
+}
+
asmlinkage void noinstr el0t_32_sync_handler(struct pt_regs *regs)
{
unsigned long esr = read_sysreg(esr_el1);
@@ -946,10 +1072,16 @@ asmlinkage void noinstr el0t_32_sync_handler(struct pt_regs *regs)
el0_cp15(regs, esr);
break;
case ESR_ELx_EC_BREAKPT_LOW:
+ el0_breakpt(regs, esr);
+ break;
case ESR_ELx_EC_SOFTSTP_LOW:
+ el0_softstp(regs, esr);
+ break;
case ESR_ELx_EC_WATCHPT_LOW:
+ el0_watchpt(regs, esr);
+ break;
case ESR_ELx_EC_BKPT32:
- el0_dbg(regs, esr);
+ el0_bkpt32(regs, esr);
break;
default:
el0_inv(regs, esr);
@@ -977,7 +1109,6 @@ UNHANDLED(el0t, 32, fiq)
UNHANDLED(el0t, 32, error)
#endif /* CONFIG_COMPAT */
-#ifdef CONFIG_VMAP_STACK
asmlinkage void noinstr __noreturn handle_bad_stack(struct pt_regs *regs)
{
unsigned long esr = read_sysreg(esr_el1);
@@ -986,7 +1117,6 @@ asmlinkage void noinstr __noreturn handle_bad_stack(struct pt_regs *regs)
arm64_enter_nmi(regs);
panic_bad_stack(regs, esr, far);
}
-#endif /* CONFIG_VMAP_STACK */
#ifdef CONFIG_ARM_SDE_INTERFACE
asmlinkage noinstr unsigned long
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index fe62218b8e0e..f8018b5c1f9a 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -55,7 +55,6 @@
.endif
sub sp, sp, #PT_REGS_SIZE
-#ifdef CONFIG_VMAP_STACK
/*
* Test whether the SP has overflowed, without corrupting a GPR.
* Task and IRQ stacks are aligned so that SP & (1 << THREAD_SHIFT)
@@ -97,7 +96,6 @@
/* We were already on the overflow stack. Restore sp/x0 and carry on. */
sub sp, sp, x0
mrs x0, tpidrro_el0
-#endif
b el\el\ht\()_\regsize\()_\label
.org .Lventry_start\@ + 128 // Did we overflow the ventry slot?
.endm
@@ -540,7 +538,6 @@ SYM_CODE_START(vectors)
kernel_ventry 0, t, 32, error // Error 32-bit EL0
SYM_CODE_END(vectors)
-#ifdef CONFIG_VMAP_STACK
SYM_CODE_START_LOCAL(__bad_stack)
/*
* We detected an overflow in kernel_ventry, which switched to the
@@ -568,7 +565,6 @@ SYM_CODE_START_LOCAL(__bad_stack)
bl handle_bad_stack
ASM_BUG()
SYM_CODE_END(__bad_stack)
-#endif /* CONFIG_VMAP_STACK */
.macro entry_handler el:req, ht:req, regsize:req, label:req
@@ -1009,7 +1005,6 @@ SYM_CODE_START(__sdei_asm_handler)
1: adr_this_cpu dst=x5, sym=sdei_active_critical_event, tmp=x6
2: str x19, [x5]
-#ifdef CONFIG_VMAP_STACK
/*
* entry.S may have been using sp as a scratch register, find whether
* this is a normal or critical event and switch to the appropriate
@@ -1022,7 +1017,6 @@ SYM_CODE_START(__sdei_asm_handler)
2: mov x6, #SDEI_STACK_SIZE
add x5, x5, x6
mov sp, x5
-#endif
#ifdef CONFIG_SHADOW_CALL_STACK
/* Use a separate shadow call stack for normal and critical events */
diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c
index 722ac45f9f7b..ab76b36dce82 100644
--- a/arch/arm64/kernel/hw_breakpoint.c
+++ b/arch/arm64/kernel/hw_breakpoint.c
@@ -22,6 +22,7 @@
#include <asm/current.h>
#include <asm/debug-monitors.h>
#include <asm/esr.h>
+#include <asm/exception.h>
#include <asm/hw_breakpoint.h>
#include <asm/traps.h>
#include <asm/cputype.h>
@@ -618,8 +619,7 @@ NOKPROBE_SYMBOL(toggle_bp_registers);
/*
* Debug exception handlers.
*/
-static int breakpoint_handler(unsigned long unused, unsigned long esr,
- struct pt_regs *regs)
+void do_breakpoint(unsigned long esr, struct pt_regs *regs)
{
int i, step = 0, *kernel_step;
u32 ctrl_reg;
@@ -662,7 +662,7 @@ unlock:
}
if (!step)
- return 0;
+ return;
if (user_mode(regs)) {
debug_info->bps_disabled = 1;
@@ -670,7 +670,7 @@ unlock:
/* If we're already stepping a watchpoint, just return. */
if (debug_info->wps_disabled)
- return 0;
+ return;
if (test_thread_flag(TIF_SINGLESTEP))
debug_info->suspended_step = 1;
@@ -681,7 +681,7 @@ unlock:
kernel_step = this_cpu_ptr(&stepping_kernel_bp);
if (*kernel_step != ARM_KERNEL_STEP_NONE)
- return 0;
+ return;
if (kernel_active_single_step()) {
*kernel_step = ARM_KERNEL_STEP_SUSPEND;
@@ -690,10 +690,8 @@ unlock:
kernel_enable_single_step(regs);
}
}
-
- return 0;
}
-NOKPROBE_SYMBOL(breakpoint_handler);
+NOKPROBE_SYMBOL(do_breakpoint);
/*
* Arm64 hardware does not always report a watchpoint hit address that matches
@@ -752,8 +750,7 @@ static int watchpoint_report(struct perf_event *wp, unsigned long addr,
return step;
}
-static int watchpoint_handler(unsigned long addr, unsigned long esr,
- struct pt_regs *regs)
+void do_watchpoint(unsigned long addr, unsigned long esr, struct pt_regs *regs)
{
int i, step = 0, *kernel_step, access, closest_match = 0;
u64 min_dist = -1, dist;
@@ -808,7 +805,7 @@ static int watchpoint_handler(unsigned long addr, unsigned long esr,
rcu_read_unlock();
if (!step)
- return 0;
+ return;
/*
* We always disable EL0 watchpoints because the kernel can
@@ -821,7 +818,7 @@ static int watchpoint_handler(unsigned long addr, unsigned long esr,
/* If we're already stepping a breakpoint, just return. */
if (debug_info->bps_disabled)
- return 0;
+ return;
if (test_thread_flag(TIF_SINGLESTEP))
debug_info->suspended_step = 1;
@@ -832,7 +829,7 @@ static int watchpoint_handler(unsigned long addr, unsigned long esr,
kernel_step = this_cpu_ptr(&stepping_kernel_bp);
if (*kernel_step != ARM_KERNEL_STEP_NONE)
- return 0;
+ return;
if (kernel_active_single_step()) {
*kernel_step = ARM_KERNEL_STEP_SUSPEND;
@@ -841,44 +838,41 @@ static int watchpoint_handler(unsigned long addr, unsigned long esr,
kernel_enable_single_step(regs);
}
}
-
- return 0;
}
-NOKPROBE_SYMBOL(watchpoint_handler);
+NOKPROBE_SYMBOL(do_watchpoint);
/*
* Handle single-step exception.
*/
-int reinstall_suspended_bps(struct pt_regs *regs)
+bool try_step_suspended_breakpoints(struct pt_regs *regs)
{
struct debug_info *debug_info = &current->thread.debug;
- int handled_exception = 0, *kernel_step;
-
- kernel_step = this_cpu_ptr(&stepping_kernel_bp);
+ int *kernel_step = this_cpu_ptr(&stepping_kernel_bp);
+ bool handled_exception = false;
/*
- * Called from single-step exception handler.
- * Return 0 if execution can resume, 1 if a SIGTRAP should be
- * reported.
+ * Called from single-step exception entry.
+ * Return true if we stepped a breakpoint and can resume execution,
+ * false if we need to handle a single-step.
*/
if (user_mode(regs)) {
if (debug_info->bps_disabled) {
debug_info->bps_disabled = 0;
toggle_bp_registers(AARCH64_DBG_REG_BCR, DBG_ACTIVE_EL0, 1);
- handled_exception = 1;
+ handled_exception = true;
}
if (debug_info->wps_disabled) {
debug_info->wps_disabled = 0;
toggle_bp_registers(AARCH64_DBG_REG_WCR, DBG_ACTIVE_EL0, 1);
- handled_exception = 1;
+ handled_exception = true;
}
if (handled_exception) {
if (debug_info->suspended_step) {
debug_info->suspended_step = 0;
/* Allow exception handling to fall-through. */
- handled_exception = 0;
+ handled_exception = false;
} else {
user_disable_single_step(current);
}
@@ -892,17 +886,17 @@ int reinstall_suspended_bps(struct pt_regs *regs)
if (*kernel_step != ARM_KERNEL_STEP_SUSPEND) {
kernel_disable_single_step();
- handled_exception = 1;
+ handled_exception = true;
} else {
- handled_exception = 0;
+ handled_exception = false;
}
*kernel_step = ARM_KERNEL_STEP_NONE;
}
- return !handled_exception;
+ return handled_exception;
}
-NOKPROBE_SYMBOL(reinstall_suspended_bps);
+NOKPROBE_SYMBOL(try_step_suspended_breakpoints);
/*
* Context-switcher for restoring suspended breakpoints.
@@ -987,12 +981,6 @@ static int __init arch_hw_breakpoint_init(void)
pr_info("found %d breakpoint and %d watchpoint registers.\n",
core_num_brps, core_num_wrps);
- /* Register debug fault handlers. */
- hook_debug_fault_code(DBG_ESR_EVT_HWBP, breakpoint_handler, SIGTRAP,
- TRAP_HWBKPT, "hw-breakpoint handler");
- hook_debug_fault_code(DBG_ESR_EVT_HWWP, watchpoint_handler, SIGTRAP,
- TRAP_HWBKPT, "hw-watchpoint handler");
-
/*
* Reset the breakpoint resources. We assume that a halting
* debugger will leave the world in a nice state for us.
diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c
index 85087e2df564..c0065a1d77cf 100644
--- a/arch/arm64/kernel/irq.c
+++ b/arch/arm64/kernel/irq.c
@@ -51,7 +51,6 @@ static void init_irq_scs(void)
scs_alloc(early_cpu_to_node(cpu));
}
-#ifdef CONFIG_VMAP_STACK
static void __init init_irq_stacks(void)
{
int cpu;
@@ -62,18 +61,6 @@ static void __init init_irq_stacks(void)
per_cpu(irq_stack_ptr, cpu) = p;
}
}
-#else
-/* irq stack only needs to be 16 byte aligned - not IRQ_STACK_SIZE aligned. */
-DEFINE_PER_CPU_ALIGNED(unsigned long [IRQ_STACK_SIZE/sizeof(long)], irq_stack);
-
-static void init_irq_stacks(void)
-{
- int cpu;
-
- for_each_possible_cpu(cpu)
- per_cpu(irq_stack_ptr, cpu) = per_cpu(irq_stack, cpu);
-}
-#endif
#ifndef CONFIG_PREEMPT_RT
static void ____do_softirq(struct pt_regs *regs)
diff --git a/arch/arm64/kernel/kgdb.c b/arch/arm64/kernel/kgdb.c
index f3c4d3a8a20f..968324a79a89 100644
--- a/arch/arm64/kernel/kgdb.c
+++ b/arch/arm64/kernel/kgdb.c
@@ -234,23 +234,23 @@ int kgdb_arch_handle_exception(int exception_vector, int signo,
return err;
}
-static int kgdb_brk_fn(struct pt_regs *regs, unsigned long esr)
+int kgdb_brk_handler(struct pt_regs *regs, unsigned long esr)
{
kgdb_handle_exception(1, SIGTRAP, 0, regs);
return DBG_HOOK_HANDLED;
}
-NOKPROBE_SYMBOL(kgdb_brk_fn)
+NOKPROBE_SYMBOL(kgdb_brk_handler)
-static int kgdb_compiled_brk_fn(struct pt_regs *regs, unsigned long esr)
+int kgdb_compiled_brk_handler(struct pt_regs *regs, unsigned long esr)
{
compiled_break = 1;
kgdb_handle_exception(1, SIGTRAP, 0, regs);
return DBG_HOOK_HANDLED;
}
-NOKPROBE_SYMBOL(kgdb_compiled_brk_fn);
+NOKPROBE_SYMBOL(kgdb_compiled_brk_handler);
-static int kgdb_step_brk_fn(struct pt_regs *regs, unsigned long esr)
+int kgdb_single_step_handler(struct pt_regs *regs, unsigned long esr)
{
if (!kgdb_single_step)
return DBG_HOOK_ERROR;
@@ -258,21 +258,7 @@ static int kgdb_step_brk_fn(struct pt_regs *regs, unsigned long esr)
kgdb_handle_exception(0, SIGTRAP, 0, regs);
return DBG_HOOK_HANDLED;
}
-NOKPROBE_SYMBOL(kgdb_step_brk_fn);
-
-static struct break_hook kgdb_brkpt_hook = {
- .fn = kgdb_brk_fn,
- .imm = KGDB_DYN_DBG_BRK_IMM,
-};
-
-static struct break_hook kgdb_compiled_brkpt_hook = {
- .fn = kgdb_compiled_brk_fn,
- .imm = KGDB_COMPILED_DBG_BRK_IMM,
-};
-
-static struct step_hook kgdb_step_hook = {
- .fn = kgdb_step_brk_fn
-};
+NOKPROBE_SYMBOL(kgdb_single_step_handler);
static int __kgdb_notify(struct die_args *args, unsigned long cmd)
{
@@ -311,15 +297,7 @@ static struct notifier_block kgdb_notifier = {
*/
int kgdb_arch_init(void)
{
- int ret = register_die_notifier(&kgdb_notifier);
-
- if (ret != 0)
- return ret;
-
- register_kernel_break_hook(&kgdb_brkpt_hook);
- register_kernel_break_hook(&kgdb_compiled_brkpt_hook);
- register_kernel_step_hook(&kgdb_step_hook);
- return 0;
+ return register_die_notifier(&kgdb_notifier);
}
/*
@@ -329,9 +307,6 @@ int kgdb_arch_init(void)
*/
void kgdb_arch_exit(void)
{
- unregister_kernel_break_hook(&kgdb_brkpt_hook);
- unregister_kernel_break_hook(&kgdb_compiled_brkpt_hook);
- unregister_kernel_step_hook(&kgdb_step_hook);
unregister_die_notifier(&kgdb_notifier);
}
diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c
index 06bb680bfe97..40148d2725ce 100644
--- a/arch/arm64/kernel/module.c
+++ b/arch/arm64/kernel/module.c
@@ -23,6 +23,7 @@
#include <asm/insn.h>
#include <asm/scs.h>
#include <asm/sections.h>
+#include <asm/text-patching.h>
enum aarch64_reloc_op {
RELOC_OP_NONE,
@@ -48,7 +49,17 @@ static u64 do_reloc(enum aarch64_reloc_op reloc_op, __le32 *place, u64 val)
return 0;
}
-static int reloc_data(enum aarch64_reloc_op op, void *place, u64 val, int len)
+#define WRITE_PLACE(place, val, mod) do { \
+ __typeof__(val) __val = (val); \
+ \
+ if (mod->state == MODULE_STATE_UNFORMED) \
+ *(place) = __val; \
+ else \
+ aarch64_insn_copy(place, &(__val), sizeof(*place)); \
+} while (0)
+
+static int reloc_data(enum aarch64_reloc_op op, void *place, u64 val, int len,
+ struct module *me)
{
s64 sval = do_reloc(op, place, val);
@@ -66,7 +77,7 @@ static int reloc_data(enum aarch64_reloc_op op, void *place, u64 val, int len)
switch (len) {
case 16:
- *(s16 *)place = sval;
+ WRITE_PLACE((s16 *)place, sval, me);
switch (op) {
case RELOC_OP_ABS:
if (sval < 0 || sval > U16_MAX)
@@ -82,7 +93,7 @@ static int reloc_data(enum aarch64_reloc_op op, void *place, u64 val, int len)
}
break;
case 32:
- *(s32 *)place = sval;
+ WRITE_PLACE((s32 *)place, sval, me);
switch (op) {
case RELOC_OP_ABS:
if (sval < 0 || sval > U32_MAX)
@@ -98,7 +109,7 @@ static int reloc_data(enum aarch64_reloc_op op, void *place, u64 val, int len)
}
break;
case 64:
- *(s64 *)place = sval;
+ WRITE_PLACE((s64 *)place, sval, me);
break;
default:
pr_err("Invalid length (%d) for data relocation\n", len);
@@ -113,7 +124,8 @@ enum aarch64_insn_movw_imm_type {
};
static int reloc_insn_movw(enum aarch64_reloc_op op, __le32 *place, u64 val,
- int lsb, enum aarch64_insn_movw_imm_type imm_type)
+ int lsb, enum aarch64_insn_movw_imm_type imm_type,
+ struct module *me)
{
u64 imm;
s64 sval;
@@ -145,7 +157,7 @@ static int reloc_insn_movw(enum aarch64_reloc_op op, __le32 *place, u64 val,
/* Update the instruction with the new encoding. */
insn = aarch64_insn_encode_immediate(AARCH64_INSN_IMM_16, insn, imm);
- *place = cpu_to_le32(insn);
+ WRITE_PLACE(place, cpu_to_le32(insn), me);
if (imm > U16_MAX)
return -ERANGE;
@@ -154,7 +166,8 @@ static int reloc_insn_movw(enum aarch64_reloc_op op, __le32 *place, u64 val,
}
static int reloc_insn_imm(enum aarch64_reloc_op op, __le32 *place, u64 val,
- int lsb, int len, enum aarch64_insn_imm_type imm_type)
+ int lsb, int len, enum aarch64_insn_imm_type imm_type,
+ struct module *me)
{
u64 imm, imm_mask;
s64 sval;
@@ -170,7 +183,7 @@ static int reloc_insn_imm(enum aarch64_reloc_op op, __le32 *place, u64 val,
/* Update the instruction's immediate field. */
insn = aarch64_insn_encode_immediate(imm_type, insn, imm);
- *place = cpu_to_le32(insn);
+ WRITE_PLACE(place, cpu_to_le32(insn), me);
/*
* Extract the upper value bits (including the sign bit) and
@@ -189,17 +202,17 @@ static int reloc_insn_imm(enum aarch64_reloc_op op, __le32 *place, u64 val,
}
static int reloc_insn_adrp(struct module *mod, Elf64_Shdr *sechdrs,
- __le32 *place, u64 val)
+ __le32 *place, u64 val, struct module *me)
{
u32 insn;
if (!is_forbidden_offset_for_adrp(place))
return reloc_insn_imm(RELOC_OP_PAGE, place, val, 12, 21,
- AARCH64_INSN_IMM_ADR);
+ AARCH64_INSN_IMM_ADR, me);
/* patch ADRP to ADR if it is in range */
if (!reloc_insn_imm(RELOC_OP_PREL, place, val & ~0xfff, 0, 21,
- AARCH64_INSN_IMM_ADR)) {
+ AARCH64_INSN_IMM_ADR, me)) {
insn = le32_to_cpu(*place);
insn &= ~BIT(31);
} else {
@@ -211,7 +224,7 @@ static int reloc_insn_adrp(struct module *mod, Elf64_Shdr *sechdrs,
AARCH64_INSN_BRANCH_NOLINK);
}
- *place = cpu_to_le32(insn);
+ WRITE_PLACE(place, cpu_to_le32(insn), me);
return 0;
}
@@ -255,23 +268,23 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
/* Data relocations. */
case R_AARCH64_ABS64:
overflow_check = false;
- ovf = reloc_data(RELOC_OP_ABS, loc, val, 64);
+ ovf = reloc_data(RELOC_OP_ABS, loc, val, 64, me);
break;
case R_AARCH64_ABS32:
- ovf = reloc_data(RELOC_OP_ABS, loc, val, 32);
+ ovf = reloc_data(RELOC_OP_ABS, loc, val, 32, me);
break;
case R_AARCH64_ABS16:
- ovf = reloc_data(RELOC_OP_ABS, loc, val, 16);
+ ovf = reloc_data(RELOC_OP_ABS, loc, val, 16, me);
break;
case R_AARCH64_PREL64:
overflow_check = false;
- ovf = reloc_data(RELOC_OP_PREL, loc, val, 64);
+ ovf = reloc_data(RELOC_OP_PREL, loc, val, 64, me);
break;
case R_AARCH64_PREL32:
- ovf = reloc_data(RELOC_OP_PREL, loc, val, 32);
+ ovf = reloc_data(RELOC_OP_PREL, loc, val, 32, me);
break;
case R_AARCH64_PREL16:
- ovf = reloc_data(RELOC_OP_PREL, loc, val, 16);
+ ovf = reloc_data(RELOC_OP_PREL, loc, val, 16, me);
break;
/* MOVW instruction relocations. */
@@ -280,88 +293,88 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
fallthrough;
case R_AARCH64_MOVW_UABS_G0:
ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 0,
- AARCH64_INSN_IMM_MOVKZ);
+ AARCH64_INSN_IMM_MOVKZ, me);
break;
case R_AARCH64_MOVW_UABS_G1_NC:
overflow_check = false;
fallthrough;
case R_AARCH64_MOVW_UABS_G1:
ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 16,
- AARCH64_INSN_IMM_MOVKZ);
+ AARCH64_INSN_IMM_MOVKZ, me);
break;
case R_AARCH64_MOVW_UABS_G2_NC:
overflow_check = false;
fallthrough;
case R_AARCH64_MOVW_UABS_G2:
ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 32,
- AARCH64_INSN_IMM_MOVKZ);
+ AARCH64_INSN_IMM_MOVKZ, me);
break;
case R_AARCH64_MOVW_UABS_G3:
/* We're using the top bits so we can't overflow. */
overflow_check = false;
ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 48,
- AARCH64_INSN_IMM_MOVKZ);
+ AARCH64_INSN_IMM_MOVKZ, me);
break;
case R_AARCH64_MOVW_SABS_G0:
ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 0,
- AARCH64_INSN_IMM_MOVNZ);
+ AARCH64_INSN_IMM_MOVNZ, me);
break;
case R_AARCH64_MOVW_SABS_G1:
ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 16,
- AARCH64_INSN_IMM_MOVNZ);
+ AARCH64_INSN_IMM_MOVNZ, me);
break;
case R_AARCH64_MOVW_SABS_G2:
ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 32,
- AARCH64_INSN_IMM_MOVNZ);
+ AARCH64_INSN_IMM_MOVNZ, me);
break;
case R_AARCH64_MOVW_PREL_G0_NC:
overflow_check = false;
ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 0,
- AARCH64_INSN_IMM_MOVKZ);
+ AARCH64_INSN_IMM_MOVKZ, me);
break;
case R_AARCH64_MOVW_PREL_G0:
ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 0,
- AARCH64_INSN_IMM_MOVNZ);
+ AARCH64_INSN_IMM_MOVNZ, me);
break;
case R_AARCH64_MOVW_PREL_G1_NC:
overflow_check = false;
ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 16,
- AARCH64_INSN_IMM_MOVKZ);
+ AARCH64_INSN_IMM_MOVKZ, me);
break;
case R_AARCH64_MOVW_PREL_G1:
ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 16,
- AARCH64_INSN_IMM_MOVNZ);
+ AARCH64_INSN_IMM_MOVNZ, me);
break;
case R_AARCH64_MOVW_PREL_G2_NC:
overflow_check = false;
ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 32,
- AARCH64_INSN_IMM_MOVKZ);
+ AARCH64_INSN_IMM_MOVKZ, me);
break;
case R_AARCH64_MOVW_PREL_G2:
ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 32,
- AARCH64_INSN_IMM_MOVNZ);
+ AARCH64_INSN_IMM_MOVNZ, me);
break;
case R_AARCH64_MOVW_PREL_G3:
/* We're using the top bits so we can't overflow. */
overflow_check = false;
ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 48,
- AARCH64_INSN_IMM_MOVNZ);
+ AARCH64_INSN_IMM_MOVNZ, me);
break;
/* Immediate instruction relocations. */
case R_AARCH64_LD_PREL_LO19:
ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2, 19,
- AARCH64_INSN_IMM_19);
+ AARCH64_INSN_IMM_19, me);
break;
case R_AARCH64_ADR_PREL_LO21:
ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 0, 21,
- AARCH64_INSN_IMM_ADR);
+ AARCH64_INSN_IMM_ADR, me);
break;
case R_AARCH64_ADR_PREL_PG_HI21_NC:
overflow_check = false;
fallthrough;
case R_AARCH64_ADR_PREL_PG_HI21:
- ovf = reloc_insn_adrp(me, sechdrs, loc, val);
+ ovf = reloc_insn_adrp(me, sechdrs, loc, val, me);
if (ovf && ovf != -ERANGE)
return ovf;
break;
@@ -369,46 +382,46 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
case R_AARCH64_LDST8_ABS_LO12_NC:
overflow_check = false;
ovf = reloc_insn_imm(RELOC_OP_ABS, loc, val, 0, 12,
- AARCH64_INSN_IMM_12);
+ AARCH64_INSN_IMM_12, me);
break;
case R_AARCH64_LDST16_ABS_LO12_NC:
overflow_check = false;
ovf = reloc_insn_imm(RELOC_OP_ABS, loc, val, 1, 11,
- AARCH64_INSN_IMM_12);
+ AARCH64_INSN_IMM_12, me);
break;
case R_AARCH64_LDST32_ABS_LO12_NC:
overflow_check = false;
ovf = reloc_insn_imm(RELOC_OP_ABS, loc, val, 2, 10,
- AARCH64_INSN_IMM_12);
+ AARCH64_INSN_IMM_12, me);
break;
case R_AARCH64_LDST64_ABS_LO12_NC:
overflow_check = false;
ovf = reloc_insn_imm(RELOC_OP_ABS, loc, val, 3, 9,
- AARCH64_INSN_IMM_12);
+ AARCH64_INSN_IMM_12, me);
break;
case R_AARCH64_LDST128_ABS_LO12_NC:
overflow_check = false;
ovf = reloc_insn_imm(RELOC_OP_ABS, loc, val, 4, 8,
- AARCH64_INSN_IMM_12);
+ AARCH64_INSN_IMM_12, me);
break;
case R_AARCH64_TSTBR14:
ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2, 14,
- AARCH64_INSN_IMM_14);
+ AARCH64_INSN_IMM_14, me);
break;
case R_AARCH64_CONDBR19:
ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2, 19,
- AARCH64_INSN_IMM_19);
+ AARCH64_INSN_IMM_19, me);
break;
case R_AARCH64_JUMP26:
case R_AARCH64_CALL26:
ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2, 26,
- AARCH64_INSN_IMM_26);
+ AARCH64_INSN_IMM_26, me);
if (ovf == -ERANGE) {
val = module_emit_plt_entry(me, sechdrs, loc, &rel[i], sym);
if (!val)
return -ENOEXEC;
ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2,
- 26, AARCH64_INSN_IMM_26);
+ 26, AARCH64_INSN_IMM_26, me);
}
break;
diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c
index 2fbfd27ff5f2..e5e773844889 100644
--- a/arch/arm64/kernel/mte.c
+++ b/arch/arm64/kernel/mte.c
@@ -200,7 +200,7 @@ static void mte_update_sctlr_user(struct task_struct *task)
* program requested values go with what was requested.
*/
resolved_mte_tcf = (mte_ctrl & pref) ? pref : mte_ctrl;
- sctlr &= ~SCTLR_EL1_TCF0_MASK;
+ sctlr &= ~(SCTLR_EL1_TCF0_MASK | SCTLR_EL1_TCSO0_MASK);
/*
* Pick an actual setting. The order in which we check for
* set bits and map into register values determines our
@@ -212,6 +212,10 @@ static void mte_update_sctlr_user(struct task_struct *task)
sctlr |= SYS_FIELD_PREP_ENUM(SCTLR_EL1, TCF0, ASYNC);
else if (resolved_mte_tcf & MTE_CTRL_TCF_SYNC)
sctlr |= SYS_FIELD_PREP_ENUM(SCTLR_EL1, TCF0, SYNC);
+
+ if (mte_ctrl & MTE_CTRL_STORE_ONLY)
+ sctlr |= SYS_FIELD_PREP(SCTLR_EL1, TCSO0, 1);
+
task->thread.sctlr_user = sctlr;
}
@@ -371,6 +375,9 @@ long set_mte_ctrl(struct task_struct *task, unsigned long arg)
(arg & PR_MTE_TCF_SYNC))
mte_ctrl |= MTE_CTRL_TCF_ASYMM;
+ if (arg & PR_MTE_STORE_ONLY)
+ mte_ctrl |= MTE_CTRL_STORE_ONLY;
+
task->thread.mte_ctrl = mte_ctrl;
if (task == current) {
preempt_disable();
@@ -398,6 +405,8 @@ long get_mte_ctrl(struct task_struct *task)
ret |= PR_MTE_TCF_ASYNC;
if (mte_ctrl & MTE_CTRL_TCF_SYNC)
ret |= PR_MTE_TCF_SYNC;
+ if (mte_ctrl & MTE_CTRL_STORE_ONLY)
+ ret |= PR_MTE_STORE_ONLY;
return ret;
}
diff --git a/arch/arm64/kernel/pi/Makefile b/arch/arm64/kernel/pi/Makefile
index f440bf57b1a5..be92d73c25b2 100644
--- a/arch/arm64/kernel/pi/Makefile
+++ b/arch/arm64/kernel/pi/Makefile
@@ -41,4 +41,4 @@ obj-y := idreg-override.pi.o \
obj-$(CONFIG_RELOCATABLE) += relocate.pi.o
obj-$(CONFIG_RANDOMIZE_BASE) += kaslr_early.pi.o
obj-$(CONFIG_UNWIND_PATCH_PAC_INTO_SCS) += patch-scs.pi.o
-extra-y := $(patsubst %.pi.o,%.o,$(obj-y))
+targets := $(patsubst %.pi.o,%.o,$(obj-y))
diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c
index d9e462eafb95..0c5d408afd95 100644
--- a/arch/arm64/kernel/probes/kprobes.c
+++ b/arch/arm64/kernel/probes/kprobes.c
@@ -292,8 +292,8 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, unsigned int fsr)
return 0;
}
-static int __kprobes
-kprobe_breakpoint_handler(struct pt_regs *regs, unsigned long esr)
+int __kprobes
+kprobe_brk_handler(struct pt_regs *regs, unsigned long esr)
{
struct kprobe *p, *cur_kprobe;
struct kprobe_ctlblk *kcb;
@@ -336,13 +336,8 @@ kprobe_breakpoint_handler(struct pt_regs *regs, unsigned long esr)
return DBG_HOOK_HANDLED;
}
-static struct break_hook kprobes_break_hook = {
- .imm = KPROBES_BRK_IMM,
- .fn = kprobe_breakpoint_handler,
-};
-
-static int __kprobes
-kprobe_breakpoint_ss_handler(struct pt_regs *regs, unsigned long esr)
+int __kprobes
+kprobe_ss_brk_handler(struct pt_regs *regs, unsigned long esr)
{
struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
unsigned long addr = instruction_pointer(regs);
@@ -360,13 +355,8 @@ kprobe_breakpoint_ss_handler(struct pt_regs *regs, unsigned long esr)
return DBG_HOOK_ERROR;
}
-static struct break_hook kprobes_break_ss_hook = {
- .imm = KPROBES_BRK_SS_IMM,
- .fn = kprobe_breakpoint_ss_handler,
-};
-
-static int __kprobes
-kretprobe_breakpoint_handler(struct pt_regs *regs, unsigned long esr)
+int __kprobes
+kretprobe_brk_handler(struct pt_regs *regs, unsigned long esr)
{
if (regs->pc != (unsigned long)__kretprobe_trampoline)
return DBG_HOOK_ERROR;
@@ -375,11 +365,6 @@ kretprobe_breakpoint_handler(struct pt_regs *regs, unsigned long esr)
return DBG_HOOK_HANDLED;
}
-static struct break_hook kretprobes_break_hook = {
- .imm = KRETPROBES_BRK_IMM,
- .fn = kretprobe_breakpoint_handler,
-};
-
/*
* Provide a blacklist of symbols identifying ranges which cannot be kprobed.
* This blacklist is exposed to userspace via debugfs (kprobes/blacklist).
@@ -422,9 +407,5 @@ int __kprobes arch_trampoline_kprobe(struct kprobe *p)
int __init arch_init_kprobes(void)
{
- register_kernel_break_hook(&kprobes_break_hook);
- register_kernel_break_hook(&kprobes_break_ss_hook);
- register_kernel_break_hook(&kretprobes_break_hook);
-
return 0;
}
diff --git a/arch/arm64/kernel/probes/kprobes_trampoline.S b/arch/arm64/kernel/probes/kprobes_trampoline.S
index a362f3dbb3d1..b60739d3983f 100644
--- a/arch/arm64/kernel/probes/kprobes_trampoline.S
+++ b/arch/arm64/kernel/probes/kprobes_trampoline.S
@@ -12,7 +12,7 @@
SYM_CODE_START(__kretprobe_trampoline)
/*
* Trigger a breakpoint exception. The PC will be adjusted by
- * kretprobe_breakpoint_handler(), and no subsequent instructions will
+ * kretprobe_brk_handler(), and no subsequent instructions will
* be executed from the trampoline.
*/
brk #KRETPROBES_BRK_IMM
diff --git a/arch/arm64/kernel/probes/uprobes.c b/arch/arm64/kernel/probes/uprobes.c
index cb3d05af36e3..1f91fd2a8187 100644
--- a/arch/arm64/kernel/probes/uprobes.c
+++ b/arch/arm64/kernel/probes/uprobes.c
@@ -173,7 +173,7 @@ int arch_uprobe_exception_notify(struct notifier_block *self,
return NOTIFY_DONE;
}
-static int uprobe_breakpoint_handler(struct pt_regs *regs,
+int uprobe_brk_handler(struct pt_regs *regs,
unsigned long esr)
{
if (uprobe_pre_sstep_notifier(regs))
@@ -182,7 +182,7 @@ static int uprobe_breakpoint_handler(struct pt_regs *regs,
return DBG_HOOK_ERROR;
}
-static int uprobe_single_step_handler(struct pt_regs *regs,
+int uprobe_single_step_handler(struct pt_regs *regs,
unsigned long esr)
{
struct uprobe_task *utask = current->utask;
@@ -194,23 +194,3 @@ static int uprobe_single_step_handler(struct pt_regs *regs,
return DBG_HOOK_ERROR;
}
-/* uprobe breakpoint handler hook */
-static struct break_hook uprobes_break_hook = {
- .imm = UPROBES_BRK_IMM,
- .fn = uprobe_breakpoint_handler,
-};
-
-/* uprobe single step handler hook */
-static struct step_hook uprobes_step_hook = {
- .fn = uprobe_single_step_handler,
-};
-
-static int __init arch_init_uprobes(void)
-{
- register_user_break_hook(&uprobes_break_hook);
- register_user_step_hook(&uprobes_step_hook);
-
- return 0;
-}
-
-device_initcall(arch_init_uprobes);
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 08b7042a2e2d..96482a1412c6 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -307,13 +307,13 @@ static int copy_thread_gcs(struct task_struct *p,
p->thread.gcs_base = 0;
p->thread.gcs_size = 0;
+ p->thread.gcs_el0_mode = current->thread.gcs_el0_mode;
+ p->thread.gcs_el0_locked = current->thread.gcs_el0_locked;
+
gcs = gcs_alloc_thread_stack(p, args);
if (IS_ERR_VALUE(gcs))
return PTR_ERR((void *)gcs);
- p->thread.gcs_el0_mode = current->thread.gcs_el0_mode;
- p->thread.gcs_el0_locked = current->thread.gcs_el0_locked;
-
return 0;
}
@@ -341,7 +341,6 @@ void flush_thread(void)
void arch_release_task_struct(struct task_struct *tsk)
{
fpsimd_release_task(tsk);
- gcs_free(tsk);
}
int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
@@ -856,10 +855,14 @@ long set_tagged_addr_ctrl(struct task_struct *task, unsigned long arg)
if (is_compat_thread(ti))
return -EINVAL;
- if (system_supports_mte())
+ if (system_supports_mte()) {
valid_mask |= PR_MTE_TCF_SYNC | PR_MTE_TCF_ASYNC \
| PR_MTE_TAG_MASK;
+ if (cpus_have_cap(ARM64_MTE_STORE_ONLY))
+ valid_mask |= PR_MTE_STORE_ONLY;
+ }
+
if (arg & ~valid_mask)
return -EINVAL;
diff --git a/arch/arm64/kernel/sdei.c b/arch/arm64/kernel/sdei.c
index 255d12f881c2..6f24a0251e18 100644
--- a/arch/arm64/kernel/sdei.c
+++ b/arch/arm64/kernel/sdei.c
@@ -34,10 +34,8 @@ unsigned long sdei_exit_mode;
DECLARE_PER_CPU(unsigned long *, sdei_stack_normal_ptr);
DECLARE_PER_CPU(unsigned long *, sdei_stack_critical_ptr);
-#ifdef CONFIG_VMAP_STACK
DEFINE_PER_CPU(unsigned long *, sdei_stack_normal_ptr);
DEFINE_PER_CPU(unsigned long *, sdei_stack_critical_ptr);
-#endif
DECLARE_PER_CPU(unsigned long *, sdei_shadow_call_stack_normal_ptr);
DECLARE_PER_CPU(unsigned long *, sdei_shadow_call_stack_critical_ptr);
@@ -65,8 +63,7 @@ static void free_sdei_stacks(void)
{
int cpu;
- if (!IS_ENABLED(CONFIG_VMAP_STACK))
- return;
+ BUILD_BUG_ON(!IS_ENABLED(CONFIG_VMAP_STACK));
for_each_possible_cpu(cpu) {
_free_sdei_stack(&sdei_stack_normal_ptr, cpu);
@@ -91,8 +88,7 @@ static int init_sdei_stacks(void)
int cpu;
int err = 0;
- if (!IS_ENABLED(CONFIG_VMAP_STACK))
- return 0;
+ BUILD_BUG_ON(!IS_ENABLED(CONFIG_VMAP_STACK));
for_each_possible_cpu(cpu) {
err = _init_sdei_stack(&sdei_stack_normal_ptr, cpu);
diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
index 417140cd399b..db3f972f8cd9 100644
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -95,8 +95,11 @@ static void save_reset_user_access_state(struct user_access_state *ua_state)
ua_state->por_el0 = read_sysreg_s(SYS_POR_EL0);
write_sysreg_s(por_enable_all, SYS_POR_EL0);
- /* Ensure that any subsequent uaccess observes the updated value */
- isb();
+ /*
+ * No ISB required as we can tolerate spurious Overlay faults -
+ * the fault handler will check again based on the new value
+ * of POR_EL0.
+ */
}
}
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 21a795303568..68cea3a4a35c 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -64,26 +64,18 @@ struct secondary_data secondary_data;
/* Number of CPUs which aren't online, but looping in kernel text. */
static int cpus_stuck_in_kernel;
-enum ipi_msg_type {
- IPI_RESCHEDULE,
- IPI_CALL_FUNC,
- IPI_CPU_STOP,
- IPI_CPU_STOP_NMI,
- IPI_TIMER,
- IPI_IRQ_WORK,
- NR_IPI,
- /*
- * Any enum >= NR_IPI and < MAX_IPI is special and not tracable
- * with trace_ipi_*
- */
- IPI_CPU_BACKTRACE = NR_IPI,
- IPI_KGDB_ROUNDUP,
- MAX_IPI
-};
-
static int ipi_irq_base __ro_after_init;
static int nr_ipi __ro_after_init = NR_IPI;
-static struct irq_desc *ipi_desc[MAX_IPI] __ro_after_init;
+
+struct ipi_descs {
+ struct irq_desc *descs[MAX_IPI];
+};
+
+static DEFINE_PER_CPU_READ_MOSTLY(struct ipi_descs, pcpu_ipi_desc);
+
+#define get_ipi_desc(__cpu, __ipi) (per_cpu_ptr(&pcpu_ipi_desc, __cpu)->descs[__ipi])
+
+static bool percpu_ipi_descs __ro_after_init;
static bool crash_stop;
@@ -844,7 +836,7 @@ int arch_show_interrupts(struct seq_file *p, int prec)
seq_printf(p, "%*s%u:%s", prec - 1, "IPI", i,
prec >= 4 ? " " : "");
for_each_online_cpu(cpu)
- seq_printf(p, "%10u ", irq_desc_kstat_cpu(ipi_desc[i], cpu));
+ seq_printf(p, "%10u ", irq_desc_kstat_cpu(get_ipi_desc(cpu, i), cpu));
seq_printf(p, " %s\n", ipi_types[i]);
}
@@ -917,9 +909,20 @@ static void __noreturn ipi_cpu_crash_stop(unsigned int cpu, struct pt_regs *regs
#endif
}
+static void arm64_send_ipi(const cpumask_t *mask, unsigned int nr)
+{
+ unsigned int cpu;
+
+ if (!percpu_ipi_descs)
+ __ipi_send_mask(get_ipi_desc(0, nr), mask);
+ else
+ for_each_cpu(cpu, mask)
+ __ipi_send_single(get_ipi_desc(cpu, nr), cpu);
+}
+
static void arm64_backtrace_ipi(cpumask_t *mask)
{
- __ipi_send_mask(ipi_desc[IPI_CPU_BACKTRACE], mask);
+ arm64_send_ipi(mask, IPI_CPU_BACKTRACE);
}
void arch_trigger_cpumask_backtrace(const cpumask_t *mask, int exclude_cpu)
@@ -944,7 +947,7 @@ void kgdb_roundup_cpus(void)
if (cpu == this_cpu)
continue;
- __ipi_send_single(ipi_desc[IPI_KGDB_ROUNDUP], cpu);
+ __ipi_send_single(get_ipi_desc(cpu, IPI_KGDB_ROUNDUP), cpu);
}
}
#endif
@@ -1013,14 +1016,16 @@ static void do_handle_IPI(int ipinr)
static irqreturn_t ipi_handler(int irq, void *data)
{
- do_handle_IPI(irq - ipi_irq_base);
+ unsigned int ipi = (irq - ipi_irq_base) % nr_ipi;
+
+ do_handle_IPI(ipi);
return IRQ_HANDLED;
}
static void smp_cross_call(const struct cpumask *target, unsigned int ipinr)
{
trace_ipi_raise(target, ipi_types[ipinr]);
- __ipi_send_mask(ipi_desc[ipinr], target);
+ arm64_send_ipi(target, ipinr);
}
static bool ipi_should_be_nmi(enum ipi_msg_type ipi)
@@ -1046,11 +1051,15 @@ static void ipi_setup(int cpu)
return;
for (i = 0; i < nr_ipi; i++) {
- if (ipi_should_be_nmi(i)) {
- prepare_percpu_nmi(ipi_irq_base + i);
- enable_percpu_nmi(ipi_irq_base + i, 0);
+ if (!percpu_ipi_descs) {
+ if (ipi_should_be_nmi(i)) {
+ prepare_percpu_nmi(ipi_irq_base + i);
+ enable_percpu_nmi(ipi_irq_base + i, 0);
+ } else {
+ enable_percpu_irq(ipi_irq_base + i, 0);
+ }
} else {
- enable_percpu_irq(ipi_irq_base + i, 0);
+ enable_irq(irq_desc_get_irq(get_ipi_desc(cpu, i)));
}
}
}
@@ -1064,44 +1073,77 @@ static void ipi_teardown(int cpu)
return;
for (i = 0; i < nr_ipi; i++) {
- if (ipi_should_be_nmi(i)) {
- disable_percpu_nmi(ipi_irq_base + i);
- teardown_percpu_nmi(ipi_irq_base + i);
+ if (!percpu_ipi_descs) {
+ if (ipi_should_be_nmi(i)) {
+ disable_percpu_nmi(ipi_irq_base + i);
+ teardown_percpu_nmi(ipi_irq_base + i);
+ } else {
+ disable_percpu_irq(ipi_irq_base + i);
+ }
} else {
- disable_percpu_irq(ipi_irq_base + i);
+ disable_irq(irq_desc_get_irq(get_ipi_desc(cpu, i)));
}
}
}
#endif
-void __init set_smp_ipi_range(int ipi_base, int n)
+static void ipi_setup_sgi(int ipi)
{
- int i;
+ int err, irq, cpu;
- WARN_ON(n < MAX_IPI);
- nr_ipi = min(n, MAX_IPI);
+ irq = ipi_irq_base + ipi;
- for (i = 0; i < nr_ipi; i++) {
- int err;
+ if (ipi_should_be_nmi(ipi)) {
+ err = request_percpu_nmi(irq, ipi_handler, "IPI", &irq_stat);
+ WARN(err, "Could not request IRQ %d as NMI, err=%d\n", irq, err);
+ } else {
+ err = request_percpu_irq(irq, ipi_handler, "IPI", &irq_stat);
+ WARN(err, "Could not request IRQ %d as IRQ, err=%d\n", irq, err);
+ }
- if (ipi_should_be_nmi(i)) {
- err = request_percpu_nmi(ipi_base + i, ipi_handler,
- "IPI", &irq_stat);
- WARN(err, "Could not request IPI %d as NMI, err=%d\n",
- i, err);
- } else {
- err = request_percpu_irq(ipi_base + i, ipi_handler,
- "IPI", &irq_stat);
- WARN(err, "Could not request IPI %d as IRQ, err=%d\n",
- i, err);
- }
+ for_each_possible_cpu(cpu)
+ get_ipi_desc(cpu, ipi) = irq_to_desc(irq);
+
+ irq_set_status_flags(irq, IRQ_HIDDEN);
+}
+
+static void ipi_setup_lpi(int ipi, int ncpus)
+{
+ for (int cpu = 0; cpu < ncpus; cpu++) {
+ int err, irq;
+
+ irq = ipi_irq_base + (cpu * nr_ipi) + ipi;
+
+ err = irq_force_affinity(irq, cpumask_of(cpu));
+ WARN(err, "Could not force affinity IRQ %d, err=%d\n", irq, err);
+
+ err = request_irq(irq, ipi_handler, IRQF_NO_AUTOEN, "IPI",
+ NULL);
+ WARN(err, "Could not request IRQ %d, err=%d\n", irq, err);
+
+ irq_set_status_flags(irq, (IRQ_HIDDEN | IRQ_NO_BALANCING_MASK));
- ipi_desc[i] = irq_to_desc(ipi_base + i);
- irq_set_status_flags(ipi_base + i, IRQ_HIDDEN);
+ get_ipi_desc(cpu, ipi) = irq_to_desc(irq);
}
+}
+
+void __init set_smp_ipi_range_percpu(int ipi_base, int n, int ncpus)
+{
+ int i;
+
+ WARN_ON(n < MAX_IPI);
+ nr_ipi = min(n, MAX_IPI);
+ percpu_ipi_descs = !!ncpus;
ipi_irq_base = ipi_base;
+ for (i = 0; i < nr_ipi; i++) {
+ if (!percpu_ipi_descs)
+ ipi_setup_sgi(i);
+ else
+ ipi_setup_lpi(i, ncpus);
+ }
+
/* Setup the boot CPU immediately */
ipi_setup(smp_processor_id());
}
diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
index 1d9d51d7627f..3ebcf8c53fb0 100644
--- a/arch/arm64/kernel/stacktrace.c
+++ b/arch/arm64/kernel/stacktrace.c
@@ -152,6 +152,8 @@ kunwind_recover_return_address(struct kunwind_state *state)
orig_pc = kretprobe_find_ret_addr(state->task,
(void *)state->common.fp,
&state->kr_cur);
+ if (!orig_pc)
+ return -EINVAL;
state->common.pc = orig_pc;
state->flags.kretprobe = 1;
}
@@ -277,21 +279,24 @@ kunwind_next(struct kunwind_state *state)
typedef bool (*kunwind_consume_fn)(const struct kunwind_state *state, void *cookie);
-static __always_inline void
+static __always_inline int
do_kunwind(struct kunwind_state *state, kunwind_consume_fn consume_state,
void *cookie)
{
- if (kunwind_recover_return_address(state))
- return;
+ int ret;
- while (1) {
- int ret;
+ ret = kunwind_recover_return_address(state);
+ if (ret)
+ return ret;
+ while (1) {
if (!consume_state(state, cookie))
- break;
+ return -EINVAL;
ret = kunwind_next(state);
+ if (ret == -ENOENT)
+ return 0;
if (ret < 0)
- break;
+ return ret;
}
}
@@ -324,7 +329,7 @@ do_kunwind(struct kunwind_state *state, kunwind_consume_fn consume_state,
: stackinfo_get_unknown(); \
})
-static __always_inline void
+static __always_inline int
kunwind_stack_walk(kunwind_consume_fn consume_state,
void *cookie, struct task_struct *task,
struct pt_regs *regs)
@@ -332,10 +337,8 @@ kunwind_stack_walk(kunwind_consume_fn consume_state,
struct stack_info stacks[] = {
stackinfo_get_task(task),
STACKINFO_CPU(irq),
-#if defined(CONFIG_VMAP_STACK)
STACKINFO_CPU(overflow),
-#endif
-#if defined(CONFIG_VMAP_STACK) && defined(CONFIG_ARM_SDE_INTERFACE)
+#if defined(CONFIG_ARM_SDE_INTERFACE)
STACKINFO_SDEI(normal),
STACKINFO_SDEI(critical),
#endif
@@ -352,7 +355,7 @@ kunwind_stack_walk(kunwind_consume_fn consume_state,
if (regs) {
if (task != current)
- return;
+ return -EINVAL;
kunwind_init_from_regs(&state, regs);
} else if (task == current) {
kunwind_init_from_caller(&state);
@@ -360,7 +363,7 @@ kunwind_stack_walk(kunwind_consume_fn consume_state,
kunwind_init_from_task(&state, task);
}
- do_kunwind(&state, consume_state, cookie);
+ return do_kunwind(&state, consume_state, cookie);
}
struct kunwind_consume_entry_data {
@@ -387,6 +390,36 @@ noinline noinstr void arch_stack_walk(stack_trace_consume_fn consume_entry,
kunwind_stack_walk(arch_kunwind_consume_entry, &data, task, regs);
}
+static __always_inline bool
+arch_reliable_kunwind_consume_entry(const struct kunwind_state *state, void *cookie)
+{
+ /*
+ * At an exception boundary we can reliably consume the saved PC. We do
+ * not know whether the LR was live when the exception was taken, and
+ * so we cannot perform the next unwind step reliably.
+ *
+ * All that matters is whether the *entire* unwind is reliable, so give
+ * up as soon as we hit an exception boundary.
+ */
+ if (state->source == KUNWIND_SOURCE_REGS_PC)
+ return false;
+
+ return arch_kunwind_consume_entry(state, cookie);
+}
+
+noinline noinstr int arch_stack_walk_reliable(stack_trace_consume_fn consume_entry,
+ void *cookie,
+ struct task_struct *task)
+{
+ struct kunwind_consume_entry_data data = {
+ .consume_entry = consume_entry,
+ .cookie = cookie,
+ };
+
+ return kunwind_stack_walk(arch_reliable_kunwind_consume_entry, &data,
+ task, NULL);
+}
+
struct bpf_unwind_consume_entry_data {
bool (*consume_entry)(void *cookie, u64 ip, u64 sp, u64 fp);
void *cookie;
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index 9bfa5c944379..f528b6041f6a 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -454,7 +454,7 @@ void do_el0_undef(struct pt_regs *regs, unsigned long esr)
u32 insn;
/* check for AArch32 breakpoint instructions */
- if (!aarch32_break_handler(regs))
+ if (try_handle_aarch32_break(regs))
return;
if (user_insn_read(regs, &insn))
@@ -894,8 +894,6 @@ void bad_el0_sync(struct pt_regs *regs, int reason, unsigned long esr)
"Bad EL0 synchronous exception");
}
-#ifdef CONFIG_VMAP_STACK
-
DEFINE_PER_CPU(unsigned long [OVERFLOW_STACK_SIZE/sizeof(long)], overflow_stack)
__aligned(16);
@@ -927,10 +925,10 @@ void __noreturn panic_bad_stack(struct pt_regs *regs, unsigned long esr, unsigne
nmi_panic(NULL, "kernel stack overflow");
cpu_park_loop();
}
-#endif
void __noreturn arm64_serror_panic(struct pt_regs *regs, unsigned long esr)
{
+ add_taint(TAINT_MACHINE_CHECK, LOCKDEP_STILL_OK);
console_verbose();
pr_crit("SError Interrupt on CPU%d, code 0x%016lx -- %s\n",
@@ -987,7 +985,7 @@ void do_serror(struct pt_regs *regs, unsigned long esr)
int is_valid_bugaddr(unsigned long addr)
{
/*
- * bug_handler() only called for BRK #BUG_BRK_IMM.
+ * bug_brk_handler() only called for BRK #BUG_BRK_IMM.
* So the answer is trivial -- any spurious instances with no
* bug table entry will be rejected by report_bug() and passed
* back to the debug-monitors code and handled as a fatal
@@ -997,7 +995,7 @@ int is_valid_bugaddr(unsigned long addr)
}
#endif
-static int bug_handler(struct pt_regs *regs, unsigned long esr)
+int bug_brk_handler(struct pt_regs *regs, unsigned long esr)
{
switch (report_bug(regs->pc, regs)) {
case BUG_TRAP_TYPE_BUG:
@@ -1017,13 +1015,8 @@ static int bug_handler(struct pt_regs *regs, unsigned long esr)
return DBG_HOOK_HANDLED;
}
-static struct break_hook bug_break_hook = {
- .fn = bug_handler,
- .imm = BUG_BRK_IMM,
-};
-
#ifdef CONFIG_CFI_CLANG
-static int cfi_handler(struct pt_regs *regs, unsigned long esr)
+int cfi_brk_handler(struct pt_regs *regs, unsigned long esr)
{
unsigned long target;
u32 type;
@@ -1046,15 +1039,9 @@ static int cfi_handler(struct pt_regs *regs, unsigned long esr)
arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE);
return DBG_HOOK_HANDLED;
}
-
-static struct break_hook cfi_break_hook = {
- .fn = cfi_handler,
- .imm = CFI_BRK_IMM_BASE,
- .mask = CFI_BRK_IMM_MASK,
-};
#endif /* CONFIG_CFI_CLANG */
-static int reserved_fault_handler(struct pt_regs *regs, unsigned long esr)
+int reserved_fault_brk_handler(struct pt_regs *regs, unsigned long esr)
{
pr_err("%s generated an invalid instruction at %pS!\n",
"Kernel text patching",
@@ -1064,11 +1051,6 @@ static int reserved_fault_handler(struct pt_regs *regs, unsigned long esr)
return DBG_HOOK_ERROR;
}
-static struct break_hook fault_break_hook = {
- .fn = reserved_fault_handler,
- .imm = FAULT_BRK_IMM,
-};
-
#ifdef CONFIG_KASAN_SW_TAGS
#define KASAN_ESR_RECOVER 0x20
@@ -1076,7 +1058,7 @@ static struct break_hook fault_break_hook = {
#define KASAN_ESR_SIZE_MASK 0x0f
#define KASAN_ESR_SIZE(esr) (1 << ((esr) & KASAN_ESR_SIZE_MASK))
-static int kasan_handler(struct pt_regs *regs, unsigned long esr)
+int kasan_brk_handler(struct pt_regs *regs, unsigned long esr)
{
bool recover = esr & KASAN_ESR_RECOVER;
bool write = esr & KASAN_ESR_WRITE;
@@ -1107,62 +1089,12 @@ static int kasan_handler(struct pt_regs *regs, unsigned long esr)
arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE);
return DBG_HOOK_HANDLED;
}
-
-static struct break_hook kasan_break_hook = {
- .fn = kasan_handler,
- .imm = KASAN_BRK_IMM,
- .mask = KASAN_BRK_MASK,
-};
#endif
#ifdef CONFIG_UBSAN_TRAP
-static int ubsan_handler(struct pt_regs *regs, unsigned long esr)
+int ubsan_brk_handler(struct pt_regs *regs, unsigned long esr)
{
die(report_ubsan_failure(esr & UBSAN_BRK_MASK), regs, esr);
return DBG_HOOK_HANDLED;
}
-
-static struct break_hook ubsan_break_hook = {
- .fn = ubsan_handler,
- .imm = UBSAN_BRK_IMM,
- .mask = UBSAN_BRK_MASK,
-};
-#endif
-
-/*
- * Initial handler for AArch64 BRK exceptions
- * This handler only used until debug_traps_init().
- */
-int __init early_brk64(unsigned long addr, unsigned long esr,
- struct pt_regs *regs)
-{
-#ifdef CONFIG_CFI_CLANG
- if (esr_is_cfi_brk(esr))
- return cfi_handler(regs, esr) != DBG_HOOK_HANDLED;
-#endif
-#ifdef CONFIG_KASAN_SW_TAGS
- if ((esr_brk_comment(esr) & ~KASAN_BRK_MASK) == KASAN_BRK_IMM)
- return kasan_handler(regs, esr) != DBG_HOOK_HANDLED;
-#endif
-#ifdef CONFIG_UBSAN_TRAP
- if (esr_is_ubsan_brk(esr))
- return ubsan_handler(regs, esr) != DBG_HOOK_HANDLED;
-#endif
- return bug_handler(regs, esr) != DBG_HOOK_HANDLED;
-}
-
-void __init trap_init(void)
-{
- register_kernel_break_hook(&bug_break_hook);
-#ifdef CONFIG_CFI_CLANG
- register_kernel_break_hook(&cfi_break_hook);
-#endif
- register_kernel_break_hook(&fault_break_hook);
-#ifdef CONFIG_KASAN_SW_TAGS
- register_kernel_break_hook(&kasan_break_hook);
-#endif
-#ifdef CONFIG_UBSAN_TRAP
- register_kernel_break_hook(&ubsan_break_hook);
#endif
- debug_traps_init();
-}
diff --git a/arch/arm64/kernel/watchdog_hld.c b/arch/arm64/kernel/watchdog_hld.c
index dcd25322127c..3093037dcb7b 100644
--- a/arch/arm64/kernel/watchdog_hld.c
+++ b/arch/arm64/kernel/watchdog_hld.c
@@ -34,3 +34,61 @@ bool __init arch_perf_nmi_is_available(void)
*/
return arm_pmu_irq_is_nmi();
}
+
+static int watchdog_perf_update_period(void *data)
+{
+ int cpu = smp_processor_id();
+ u64 max_cpu_freq, new_period;
+
+ max_cpu_freq = cpufreq_get_hw_max_freq(cpu) * 1000UL;
+ if (!max_cpu_freq)
+ return 0;
+
+ new_period = watchdog_thresh * max_cpu_freq;
+ hardlockup_detector_perf_adjust_period(new_period);
+
+ return 0;
+}
+
+static int watchdog_freq_notifier_callback(struct notifier_block *nb,
+ unsigned long val, void *data)
+{
+ struct cpufreq_policy *policy = data;
+ int cpu;
+
+ if (val != CPUFREQ_CREATE_POLICY)
+ return NOTIFY_DONE;
+
+ /*
+ * Let each online CPU related to the policy update the period by their
+ * own. This will serialize with the framework on start/stop the lockup
+ * detector (softlockup_{start,stop}_all) and avoid potential race
+ * condition. Otherwise we may have below theoretical race condition:
+ * (core 0/1 share the same policy)
+ * [core 0] [core 1]
+ * hardlockup_detector_event_create()
+ * hw_nmi_get_sample_period()
+ * (cpufreq registered, notifier callback invoked)
+ * watchdog_freq_notifier_callback()
+ * watchdog_perf_update_period()
+ * (since core 1's event's not yet created,
+ * the period is not set)
+ * perf_event_create_kernel_counter()
+ * (event's period is SAFE_MAX_CPU_FREQ)
+ */
+ for_each_cpu(cpu, policy->cpus)
+ smp_call_on_cpu(cpu, watchdog_perf_update_period, NULL, false);
+
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block watchdog_freq_notifier = {
+ .notifier_call = watchdog_freq_notifier_callback,
+};
+
+static int __init init_watchdog_freq_notifier(void)
+{
+ return cpufreq_register_notifier(&watchdog_freq_notifier,
+ CPUFREQ_POLICY_NOTIFIER);
+}
+core_initcall(init_watchdog_freq_notifier);
diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
index 7c329e01c557..3ebc0570345c 100644
--- a/arch/arm64/kvm/Makefile
+++ b/arch/arm64/kvm/Makefile
@@ -23,7 +23,8 @@ kvm-y += arm.o mmu.o mmio.o psci.o hypercalls.o pvtime.o \
vgic/vgic-v3.o vgic/vgic-v4.o \
vgic/vgic-mmio.o vgic/vgic-mmio-v2.o \
vgic/vgic-mmio-v3.o vgic/vgic-kvm-device.o \
- vgic/vgic-its.o vgic/vgic-debug.o vgic/vgic-v3-nested.o
+ vgic/vgic-its.o vgic/vgic-debug.o vgic/vgic-v3-nested.o \
+ vgic/vgic-v5.o
kvm-$(CONFIG_HW_PERF_EVENTS) += pmu-emul.o pmu.o
kvm-$(CONFIG_ARM64_PTR_AUTH) += pauth.o
diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c
index 701ea10a63f1..dbd74e4885e2 100644
--- a/arch/arm64/kvm/arch_timer.c
+++ b/arch/arm64/kvm/arch_timer.c
@@ -830,7 +830,7 @@ static void timer_set_traps(struct kvm_vcpu *vcpu, struct timer_map *map)
* by the guest (either FEAT_VHE or FEAT_E2H0 is implemented, but
* not both). This simplifies the handling of the EL1NV* bits.
*/
- if (vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu)) {
+ if (is_nested_ctxt(vcpu)) {
u64 val = __vcpu_sys_reg(vcpu, CNTHCTL_EL2);
/* Use the VHE format for mental sanity */
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index 23dd3f3fc3eb..888f7c7abf54 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -408,6 +408,13 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_ARM_SUPPORTED_REG_MASK_RANGES:
r = BIT(0);
break;
+ case KVM_CAP_ARM_CACHEABLE_PFNMAP_SUPPORTED:
+ if (!kvm)
+ r = -EINVAL;
+ else
+ r = kvm_supports_cacheable_pfnmap();
+ break;
+
default:
r = 0;
}
@@ -521,7 +528,7 @@ static void vcpu_set_pauth_traps(struct kvm_vcpu *vcpu)
* Either we're running an L2 guest, and the API/APK bits come
* from L1's HCR_EL2, or API/APK are both set.
*/
- if (unlikely(vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu))) {
+ if (unlikely(is_nested_ctxt(vcpu))) {
u64 val;
val = __vcpu_sys_reg(vcpu, HCR_EL2);
@@ -740,7 +747,8 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
*/
int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
{
- bool irq_lines = *vcpu_hcr(v) & (HCR_VI | HCR_VF);
+ bool irq_lines = *vcpu_hcr(v) & (HCR_VI | HCR_VF | HCR_VSE);
+
return ((irq_lines || kvm_vgic_vcpu_pending_irq(v))
&& !kvm_arm_vcpu_stopped(v) && !v->arch.pause);
}
@@ -1183,6 +1191,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
*/
preempt_disable();
+ kvm_nested_flush_hwstate(vcpu);
+
if (kvm_vcpu_has_pmu(vcpu))
kvm_pmu_flush_hwstate(vcpu);
@@ -1282,6 +1292,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
/* Exit types that need handling before we can be preempted */
handle_exit_early(vcpu, ret);
+ kvm_nested_sync_hwstate(vcpu);
+
preempt_enable();
/*
@@ -2765,19 +2777,15 @@ void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons,
kvm_vgic_v4_unset_forwarding(irqfd->kvm, prod->irq);
}
-bool kvm_arch_irqfd_route_changed(struct kvm_kernel_irq_routing_entry *old,
- struct kvm_kernel_irq_routing_entry *new)
+void kvm_arch_update_irqfd_routing(struct kvm_kernel_irqfd *irqfd,
+ struct kvm_kernel_irq_routing_entry *old,
+ struct kvm_kernel_irq_routing_entry *new)
{
- if (old->type != KVM_IRQ_ROUTING_MSI ||
- new->type != KVM_IRQ_ROUTING_MSI)
- return true;
-
- return memcmp(&old->msi, &new->msi, sizeof(new->msi));
-}
+ if (old->type == KVM_IRQ_ROUTING_MSI &&
+ new->type == KVM_IRQ_ROUTING_MSI &&
+ !memcmp(&old->msi, &new->msi, sizeof(new->msi)))
+ return;
-int kvm_arch_update_irqfd_routing(struct kvm *kvm, unsigned int host_irq,
- uint32_t guest_irq, bool set)
-{
/*
* Remapping the vLPI requires taking the its_lock mutex to resolve
* the new translation. We're in spinlock land at this point, so no
@@ -2785,7 +2793,7 @@ int kvm_arch_update_irqfd_routing(struct kvm *kvm, unsigned int host_irq,
*
* Unmap the vLPI and fall back to software LPI injection.
*/
- return kvm_vgic_v4_unset_forwarding(kvm, host_irq);
+ return kvm_vgic_v4_unset_forwarding(irqfd->kvm, irqfd->producer->irq);
}
void kvm_arch_irq_bypass_stop(struct irq_bypass_consumer *cons)
diff --git a/arch/arm64/kvm/at.c b/arch/arm64/kvm/at.c
index a25be111cd8f..0e5610533949 100644
--- a/arch/arm64/kvm/at.c
+++ b/arch/arm64/kvm/at.c
@@ -1047,34 +1047,51 @@ static void compute_s1_overlay_permissions(struct kvm_vcpu *vcpu,
idx = FIELD_GET(PTE_PO_IDX_MASK, wr->desc);
- switch (wi->regime) {
- case TR_EL10:
- pov_perms = perm_idx(vcpu, POR_EL1, idx);
- uov_perms = perm_idx(vcpu, POR_EL0, idx);
- break;
- case TR_EL20:
- pov_perms = perm_idx(vcpu, POR_EL2, idx);
- uov_perms = perm_idx(vcpu, POR_EL0, idx);
- break;
- case TR_EL2:
- pov_perms = perm_idx(vcpu, POR_EL2, idx);
- uov_perms = 0;
- break;
- }
+ if (wr->pov) {
+ switch (wi->regime) {
+ case TR_EL10:
+ pov_perms = perm_idx(vcpu, POR_EL1, idx);
+ break;
+ case TR_EL20:
+ pov_perms = perm_idx(vcpu, POR_EL2, idx);
+ break;
+ case TR_EL2:
+ pov_perms = perm_idx(vcpu, POR_EL2, idx);
+ break;
+ }
+
+ if (pov_perms & ~POE_RWX)
+ pov_perms = POE_NONE;
- if (pov_perms & ~POE_RWX)
- pov_perms = POE_NONE;
+ /* R_QXXPC, S1PrivOverflow enabled */
+ if (wr->pwxn && (pov_perms & POE_X))
+ pov_perms &= ~POE_W;
- if (wi->poe && wr->pov) {
wr->pr &= pov_perms & POE_R;
wr->pw &= pov_perms & POE_W;
wr->px &= pov_perms & POE_X;
}
- if (uov_perms & ~POE_RWX)
- uov_perms = POE_NONE;
+ if (wr->uov) {
+ switch (wi->regime) {
+ case TR_EL10:
+ uov_perms = perm_idx(vcpu, POR_EL0, idx);
+ break;
+ case TR_EL20:
+ uov_perms = perm_idx(vcpu, POR_EL0, idx);
+ break;
+ case TR_EL2:
+ uov_perms = 0;
+ break;
+ }
+
+ if (uov_perms & ~POE_RWX)
+ uov_perms = POE_NONE;
+
+ /* R_NPBXC, S1UnprivOverlay enabled */
+ if (wr->uwxn && (uov_perms & POE_X))
+ uov_perms &= ~POE_W;
- if (wi->e0poe && wr->uov) {
wr->ur &= uov_perms & POE_R;
wr->uw &= uov_perms & POE_W;
wr->ux &= uov_perms & POE_X;
@@ -1095,24 +1112,15 @@ static void compute_s1_permissions(struct kvm_vcpu *vcpu,
if (!wi->hpd)
compute_s1_hierarchical_permissions(vcpu, wi, wr);
- if (wi->poe || wi->e0poe)
- compute_s1_overlay_permissions(vcpu, wi, wr);
+ compute_s1_overlay_permissions(vcpu, wi, wr);
- /* R_QXXPC */
- if (wr->pwxn) {
- if (!wr->pov && wr->pw)
- wr->px = false;
- if (wr->pov && wr->px)
- wr->pw = false;
- }
+ /* R_QXXPC, S1PrivOverlay disabled */
+ if (!wr->pov)
+ wr->px &= !(wr->pwxn && wr->pw);
- /* R_NPBXC */
- if (wr->uwxn) {
- if (!wr->uov && wr->uw)
- wr->ux = false;
- if (wr->uov && wr->ux)
- wr->uw = false;
- }
+ /* R_NPBXC, S1UnprivOverlay disabled */
+ if (!wr->uov)
+ wr->ux &= !(wr->uwxn && wr->uw);
pan = wi->pan && (wr->ur || wr->uw ||
(pan3_enabled(vcpu, wi->regime) && wr->ux));
diff --git a/arch/arm64/kvm/config.c b/arch/arm64/kvm/config.c
index 54911a93b001..da66c4a14775 100644
--- a/arch/arm64/kvm/config.c
+++ b/arch/arm64/kvm/config.c
@@ -66,7 +66,6 @@ struct reg_bits_to_feat_map {
#define FEAT_BRBE ID_AA64DFR0_EL1, BRBE, IMP
#define FEAT_TRC_SR ID_AA64DFR0_EL1, TraceVer, IMP
#define FEAT_PMUv3 ID_AA64DFR0_EL1, PMUVer, IMP
-#define FEAT_PMUv3p9 ID_AA64DFR0_EL1, PMUVer, V3P9
#define FEAT_TRBE ID_AA64DFR0_EL1, TraceBuffer, IMP
#define FEAT_TRBEv1p1 ID_AA64DFR0_EL1, TraceBuffer, TRBE_V1P1
#define FEAT_DoubleLock ID_AA64DFR0_EL1, DoubleLock, IMP
@@ -89,6 +88,7 @@ struct reg_bits_to_feat_map {
#define FEAT_RASv2 ID_AA64PFR0_EL1, RAS, V2
#define FEAT_GICv3 ID_AA64PFR0_EL1, GIC, IMP
#define FEAT_LOR ID_AA64MMFR1_EL1, LO, IMP
+#define FEAT_SPEv1p2 ID_AA64DFR0_EL1, PMSVer, V1P2
#define FEAT_SPEv1p4 ID_AA64DFR0_EL1, PMSVer, V1P4
#define FEAT_SPEv1p5 ID_AA64DFR0_EL1, PMSVer, V1P5
#define FEAT_ATS1A ID_AA64ISAR2_EL1, ATS1A, IMP
@@ -131,6 +131,27 @@ struct reg_bits_to_feat_map {
#define FEAT_SPMU ID_AA64DFR1_EL1, SPMU, IMP
#define FEAT_SPE_nVM ID_AA64DFR2_EL1, SPE_nVM, IMP
#define FEAT_STEP2 ID_AA64DFR2_EL1, STEP, IMP
+#define FEAT_SYSREG128 ID_AA64ISAR2_EL1, SYSREG_128, IMP
+#define FEAT_CPA2 ID_AA64ISAR3_EL1, CPA, CPA2
+#define FEAT_ASID2 ID_AA64MMFR4_EL1, ASID2, IMP
+#define FEAT_MEC ID_AA64MMFR3_EL1, MEC, IMP
+#define FEAT_HAFT ID_AA64MMFR1_EL1, HAFDBS, HAFT
+#define FEAT_BTI ID_AA64PFR1_EL1, BT, IMP
+#define FEAT_ExS ID_AA64MMFR0_EL1, EXS, IMP
+#define FEAT_IESB ID_AA64MMFR2_EL1, IESB, IMP
+#define FEAT_LSE2 ID_AA64MMFR2_EL1, AT, IMP
+#define FEAT_LSMAOC ID_AA64MMFR2_EL1, LSM, IMP
+#define FEAT_MixedEnd ID_AA64MMFR0_EL1, BIGEND, IMP
+#define FEAT_MixedEndEL0 ID_AA64MMFR0_EL1, BIGENDEL0, IMP
+#define FEAT_MTE2 ID_AA64PFR1_EL1, MTE, MTE2
+#define FEAT_MTE_ASYNC ID_AA64PFR1_EL1, MTE_frac, ASYNC
+#define FEAT_MTE_STORE_ONLY ID_AA64PFR2_EL1, MTESTOREONLY, IMP
+#define FEAT_PAN ID_AA64MMFR1_EL1, PAN, IMP
+#define FEAT_PAN3 ID_AA64MMFR1_EL1, PAN, PAN3
+#define FEAT_SSBS ID_AA64PFR1_EL1, SSBS, IMP
+#define FEAT_TIDCP1 ID_AA64MMFR1_EL1, TIDCP1, IMP
+#define FEAT_FGT ID_AA64MMFR0_EL1, FGT, IMP
+#define FEAT_MTPMU ID_AA64DFR0_EL1, MTPMU, IMP
static bool not_feat_aa64el3(struct kvm *kvm)
{
@@ -218,11 +239,62 @@ static bool feat_trbe_mpam(struct kvm *kvm)
(read_sysreg_s(SYS_TRBIDR_EL1) & TRBIDR_EL1_MPAM));
}
+static bool feat_asid2_e2h1(struct kvm *kvm)
+{
+ return kvm_has_feat(kvm, FEAT_ASID2) && !kvm_has_feat(kvm, FEAT_E2H0);
+}
+
+static bool feat_d128_e2h1(struct kvm *kvm)
+{
+ return kvm_has_feat(kvm, FEAT_D128) && !kvm_has_feat(kvm, FEAT_E2H0);
+}
+
+static bool feat_mec_e2h1(struct kvm *kvm)
+{
+ return kvm_has_feat(kvm, FEAT_MEC) && !kvm_has_feat(kvm, FEAT_E2H0);
+}
+
static bool feat_ebep_pmuv3_ss(struct kvm *kvm)
{
return kvm_has_feat(kvm, FEAT_EBEP) || kvm_has_feat(kvm, FEAT_PMUv3_SS);
}
+static bool feat_mixedendel0(struct kvm *kvm)
+{
+ return kvm_has_feat(kvm, FEAT_MixedEnd) || kvm_has_feat(kvm, FEAT_MixedEndEL0);
+}
+
+static bool feat_mte_async(struct kvm *kvm)
+{
+ return kvm_has_feat(kvm, FEAT_MTE2) && kvm_has_feat_enum(kvm, FEAT_MTE_ASYNC);
+}
+
+#define check_pmu_revision(k, r) \
+ ({ \
+ (kvm_has_feat((k), ID_AA64DFR0_EL1, PMUVer, r) && \
+ !kvm_has_feat((k), ID_AA64DFR0_EL1, PMUVer, IMP_DEF)); \
+ })
+
+static bool feat_pmuv3p1(struct kvm *kvm)
+{
+ return check_pmu_revision(kvm, V3P1);
+}
+
+static bool feat_pmuv3p5(struct kvm *kvm)
+{
+ return check_pmu_revision(kvm, V3P5);
+}
+
+static bool feat_pmuv3p7(struct kvm *kvm)
+{
+ return check_pmu_revision(kvm, V3P7);
+}
+
+static bool feat_pmuv3p9(struct kvm *kvm)
+{
+ return check_pmu_revision(kvm, V3P9);
+}
+
static bool compute_hcr_rw(struct kvm *kvm, u64 *bits)
{
/* This is purely academic: AArch32 and NV are mutually exclusive */
@@ -681,7 +753,7 @@ static const struct reg_bits_to_feat_map hdfgrtr2_feat_map[] = {
NEEDS_FEAT(HDFGRTR2_EL2_nPMICFILTR_EL0 |
HDFGRTR2_EL2_nPMICNTR_EL0,
FEAT_PMUv3_ICNTR),
- NEEDS_FEAT(HDFGRTR2_EL2_nPMUACR_EL1, FEAT_PMUv3p9),
+ NEEDS_FEAT(HDFGRTR2_EL2_nPMUACR_EL1, feat_pmuv3p9),
NEEDS_FEAT(HDFGRTR2_EL2_nPMSSCR_EL1 |
HDFGRTR2_EL2_nPMSSDATA,
FEAT_PMUv3_SS),
@@ -713,7 +785,7 @@ static const struct reg_bits_to_feat_map hdfgwtr2_feat_map[] = {
FEAT_PMUv3_ICNTR),
NEEDS_FEAT(HDFGWTR2_EL2_nPMUACR_EL1 |
HDFGWTR2_EL2_nPMZR_EL0,
- FEAT_PMUv3p9),
+ feat_pmuv3p9),
NEEDS_FEAT(HDFGWTR2_EL2_nPMSSCR_EL1, FEAT_PMUv3_SS),
NEEDS_FEAT(HDFGWTR2_EL2_nPMIAR_EL1, FEAT_SEBEP),
NEEDS_FEAT(HDFGWTR2_EL2_nPMSDSFR_EL1, feat_spe_fds),
@@ -832,6 +904,150 @@ static const struct reg_bits_to_feat_map hcr_feat_map[] = {
NEEDS_FEAT_FIXED(HCR_EL2_E2H, compute_hcr_e2h),
};
+static const struct reg_bits_to_feat_map sctlr2_feat_map[] = {
+ NEEDS_FEAT(SCTLR2_EL1_NMEA |
+ SCTLR2_EL1_EASE,
+ FEAT_DoubleFault2),
+ NEEDS_FEAT(SCTLR2_EL1_EnADERR, feat_aderr),
+ NEEDS_FEAT(SCTLR2_EL1_EnANERR, feat_anerr),
+ NEEDS_FEAT(SCTLR2_EL1_EnIDCP128, FEAT_SYSREG128),
+ NEEDS_FEAT(SCTLR2_EL1_EnPACM |
+ SCTLR2_EL1_EnPACM0,
+ feat_pauth_lr),
+ NEEDS_FEAT(SCTLR2_EL1_CPTA |
+ SCTLR2_EL1_CPTA0 |
+ SCTLR2_EL1_CPTM |
+ SCTLR2_EL1_CPTM0,
+ FEAT_CPA2),
+};
+
+static const struct reg_bits_to_feat_map tcr2_el2_feat_map[] = {
+ NEEDS_FEAT(TCR2_EL2_FNG1 |
+ TCR2_EL2_FNG0 |
+ TCR2_EL2_A2,
+ feat_asid2_e2h1),
+ NEEDS_FEAT(TCR2_EL2_DisCH1 |
+ TCR2_EL2_DisCH0 |
+ TCR2_EL2_D128,
+ feat_d128_e2h1),
+ NEEDS_FEAT(TCR2_EL2_AMEC1, feat_mec_e2h1),
+ NEEDS_FEAT(TCR2_EL2_AMEC0, FEAT_MEC),
+ NEEDS_FEAT(TCR2_EL2_HAFT, FEAT_HAFT),
+ NEEDS_FEAT(TCR2_EL2_PTTWI |
+ TCR2_EL2_PnCH,
+ FEAT_THE),
+ NEEDS_FEAT(TCR2_EL2_AIE, FEAT_AIE),
+ NEEDS_FEAT(TCR2_EL2_POE |
+ TCR2_EL2_E0POE,
+ FEAT_S1POE),
+ NEEDS_FEAT(TCR2_EL2_PIE, FEAT_S1PIE),
+};
+
+static const struct reg_bits_to_feat_map sctlr_el1_feat_map[] = {
+ NEEDS_FEAT(SCTLR_EL1_CP15BEN |
+ SCTLR_EL1_ITD |
+ SCTLR_EL1_SED,
+ FEAT_AA32EL0),
+ NEEDS_FEAT(SCTLR_EL1_BT0 |
+ SCTLR_EL1_BT1,
+ FEAT_BTI),
+ NEEDS_FEAT(SCTLR_EL1_CMOW, FEAT_CMOW),
+ NEEDS_FEAT(SCTLR_EL1_TSCXT, feat_csv2_2_csv2_1p2),
+ NEEDS_FEAT(SCTLR_EL1_EIS |
+ SCTLR_EL1_EOS,
+ FEAT_ExS),
+ NEEDS_FEAT(SCTLR_EL1_EnFPM, FEAT_FPMR),
+ NEEDS_FEAT(SCTLR_EL1_IESB, FEAT_IESB),
+ NEEDS_FEAT(SCTLR_EL1_EnALS, FEAT_LS64),
+ NEEDS_FEAT(SCTLR_EL1_EnAS0, FEAT_LS64_ACCDATA),
+ NEEDS_FEAT(SCTLR_EL1_EnASR, FEAT_LS64_V),
+ NEEDS_FEAT(SCTLR_EL1_nAA, FEAT_LSE2),
+ NEEDS_FEAT(SCTLR_EL1_LSMAOE |
+ SCTLR_EL1_nTLSMD,
+ FEAT_LSMAOC),
+ NEEDS_FEAT(SCTLR_EL1_EE, FEAT_MixedEnd),
+ NEEDS_FEAT(SCTLR_EL1_E0E, feat_mixedendel0),
+ NEEDS_FEAT(SCTLR_EL1_MSCEn, FEAT_MOPS),
+ NEEDS_FEAT(SCTLR_EL1_ATA0 |
+ SCTLR_EL1_ATA |
+ SCTLR_EL1_TCF0 |
+ SCTLR_EL1_TCF,
+ FEAT_MTE2),
+ NEEDS_FEAT(SCTLR_EL1_ITFSB, feat_mte_async),
+ NEEDS_FEAT(SCTLR_EL1_TCSO0 |
+ SCTLR_EL1_TCSO,
+ FEAT_MTE_STORE_ONLY),
+ NEEDS_FEAT(SCTLR_EL1_NMI |
+ SCTLR_EL1_SPINTMASK,
+ FEAT_NMI),
+ NEEDS_FEAT(SCTLR_EL1_SPAN, FEAT_PAN),
+ NEEDS_FEAT(SCTLR_EL1_EPAN, FEAT_PAN3),
+ NEEDS_FEAT(SCTLR_EL1_EnDA |
+ SCTLR_EL1_EnDB |
+ SCTLR_EL1_EnIA |
+ SCTLR_EL1_EnIB,
+ feat_pauth),
+ NEEDS_FEAT(SCTLR_EL1_EnTP2, FEAT_SME),
+ NEEDS_FEAT(SCTLR_EL1_EnRCTX, FEAT_SPECRES),
+ NEEDS_FEAT(SCTLR_EL1_DSSBS, FEAT_SSBS),
+ NEEDS_FEAT(SCTLR_EL1_TIDCP, FEAT_TIDCP1),
+ NEEDS_FEAT(SCTLR_EL1_TME0 |
+ SCTLR_EL1_TME |
+ SCTLR_EL1_TMT0 |
+ SCTLR_EL1_TMT,
+ FEAT_TME),
+ NEEDS_FEAT(SCTLR_EL1_TWEDEL |
+ SCTLR_EL1_TWEDEn,
+ FEAT_TWED),
+ NEEDS_FEAT(SCTLR_EL1_UCI |
+ SCTLR_EL1_EE |
+ SCTLR_EL1_E0E |
+ SCTLR_EL1_WXN |
+ SCTLR_EL1_nTWE |
+ SCTLR_EL1_nTWI |
+ SCTLR_EL1_UCT |
+ SCTLR_EL1_DZE |
+ SCTLR_EL1_I |
+ SCTLR_EL1_UMA |
+ SCTLR_EL1_SA0 |
+ SCTLR_EL1_SA |
+ SCTLR_EL1_C |
+ SCTLR_EL1_A |
+ SCTLR_EL1_M,
+ FEAT_AA64EL1),
+};
+
+static const struct reg_bits_to_feat_map mdcr_el2_feat_map[] = {
+ NEEDS_FEAT(MDCR_EL2_EBWE, FEAT_Debugv8p9),
+ NEEDS_FEAT(MDCR_EL2_TDOSA, FEAT_DoubleLock),
+ NEEDS_FEAT(MDCR_EL2_PMEE, FEAT_EBEP),
+ NEEDS_FEAT(MDCR_EL2_TDCC, FEAT_FGT),
+ NEEDS_FEAT(MDCR_EL2_MTPME, FEAT_MTPMU),
+ NEEDS_FEAT(MDCR_EL2_HPME |
+ MDCR_EL2_HPMN |
+ MDCR_EL2_TPMCR |
+ MDCR_EL2_TPM,
+ FEAT_PMUv3),
+ NEEDS_FEAT(MDCR_EL2_HPMD, feat_pmuv3p1),
+ NEEDS_FEAT(MDCR_EL2_HCCD |
+ MDCR_EL2_HLP,
+ feat_pmuv3p5),
+ NEEDS_FEAT(MDCR_EL2_HPMFZO, feat_pmuv3p7),
+ NEEDS_FEAT(MDCR_EL2_PMSSE, FEAT_PMUv3_SS),
+ NEEDS_FEAT(MDCR_EL2_E2PB |
+ MDCR_EL2_TPMS,
+ FEAT_SPE),
+ NEEDS_FEAT(MDCR_EL2_HPMFZS, FEAT_SPEv1p2),
+ NEEDS_FEAT(MDCR_EL2_EnSPM, FEAT_SPMU),
+ NEEDS_FEAT(MDCR_EL2_EnSTEPOP, FEAT_STEP2),
+ NEEDS_FEAT(MDCR_EL2_E2TB, FEAT_TRBE),
+ NEEDS_FEAT(MDCR_EL2_TTRF, FEAT_TRF),
+ NEEDS_FEAT(MDCR_EL2_TDA |
+ MDCR_EL2_TDE |
+ MDCR_EL2_TDRA,
+ FEAT_AA64EL1),
+};
+
static void __init check_feat_map(const struct reg_bits_to_feat_map *map,
int map_size, u64 res0, const char *str)
{
@@ -863,6 +1079,14 @@ void __init check_feature_map(void)
__HCRX_EL2_RES0, "HCRX_EL2");
check_feat_map(hcr_feat_map, ARRAY_SIZE(hcr_feat_map),
HCR_EL2_RES0, "HCR_EL2");
+ check_feat_map(sctlr2_feat_map, ARRAY_SIZE(sctlr2_feat_map),
+ SCTLR2_EL1_RES0, "SCTLR2_EL1");
+ check_feat_map(tcr2_el2_feat_map, ARRAY_SIZE(tcr2_el2_feat_map),
+ TCR2_EL2_RES0, "TCR2_EL2");
+ check_feat_map(sctlr_el1_feat_map, ARRAY_SIZE(sctlr_el1_feat_map),
+ SCTLR_EL1_RES0, "SCTLR_EL1");
+ check_feat_map(mdcr_el2_feat_map, ARRAY_SIZE(mdcr_el2_feat_map),
+ MDCR_EL2_RES0, "MDCR_EL2");
}
static bool idreg_feat_match(struct kvm *kvm, const struct reg_bits_to_feat_map *map)
@@ -1077,6 +1301,31 @@ void get_reg_fixed_bits(struct kvm *kvm, enum vcpu_sysreg reg, u64 *res0, u64 *r
*res0 |= HCR_EL2_RES0 | (mask & ~fixed);
*res1 = HCR_EL2_RES1 | (mask & fixed);
break;
+ case SCTLR2_EL1:
+ case SCTLR2_EL2:
+ *res0 = compute_res0_bits(kvm, sctlr2_feat_map,
+ ARRAY_SIZE(sctlr2_feat_map), 0, 0);
+ *res0 |= SCTLR2_EL1_RES0;
+ *res1 = SCTLR2_EL1_RES1;
+ break;
+ case TCR2_EL2:
+ *res0 = compute_res0_bits(kvm, tcr2_el2_feat_map,
+ ARRAY_SIZE(tcr2_el2_feat_map), 0, 0);
+ *res0 |= TCR2_EL2_RES0;
+ *res1 = TCR2_EL2_RES1;
+ break;
+ case SCTLR_EL1:
+ *res0 = compute_res0_bits(kvm, sctlr_el1_feat_map,
+ ARRAY_SIZE(sctlr_el1_feat_map), 0, 0);
+ *res0 |= SCTLR_EL1_RES0;
+ *res1 = SCTLR_EL1_RES1;
+ break;
+ case MDCR_EL2:
+ *res0 = compute_res0_bits(kvm, mdcr_el2_feat_map,
+ ARRAY_SIZE(mdcr_el2_feat_map), 0, 0);
+ *res0 |= MDCR_EL2_RES0;
+ *res1 = MDCR_EL2_RES1;
+ break;
default:
WARN_ON_ONCE(1);
*res0 = *res1 = 0;
diff --git a/arch/arm64/kvm/debug.c b/arch/arm64/kvm/debug.c
index 1a7dab333f55..381382c19fe4 100644
--- a/arch/arm64/kvm/debug.c
+++ b/arch/arm64/kvm/debug.c
@@ -81,6 +81,10 @@ void kvm_init_host_debug_data(void)
!(read_sysreg_s(SYS_PMBIDR_EL1) & PMBIDR_EL1_P))
host_data_set_flag(HAS_SPE);
+ /* Check if we have BRBE implemented and available at the host */
+ if (cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_EL1_BRBE_SHIFT))
+ host_data_set_flag(HAS_BRBE);
+
if (cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_EL1_TraceFilt_SHIFT)) {
/* Force disable trace in protected mode in case of no TRBE */
if (is_protected_kvm_enabled())
diff --git a/arch/arm64/kvm/emulate-nested.c b/arch/arm64/kvm/emulate-nested.c
index 3a384e9660b8..90cb4b7ae0ff 100644
--- a/arch/arm64/kvm/emulate-nested.c
+++ b/arch/arm64/kvm/emulate-nested.c
@@ -88,6 +88,7 @@ enum cgt_group_id {
CGT_HCRX_EnFPM,
CGT_HCRX_TCR2En,
+ CGT_HCRX_SCTLR2En,
CGT_CNTHCTL_EL1TVT,
CGT_CNTHCTL_EL1TVCT,
@@ -108,6 +109,7 @@ enum cgt_group_id {
CGT_HCR_TTLB_TTLBOS,
CGT_HCR_TVM_TRVM,
CGT_HCR_TVM_TRVM_HCRX_TCR2En,
+ CGT_HCR_TVM_TRVM_HCRX_SCTLR2En,
CGT_HCR_TPU_TICAB,
CGT_HCR_TPU_TOCU,
CGT_HCR_NV1_nNV2_ENSCXT,
@@ -398,6 +400,12 @@ static const struct trap_bits coarse_trap_bits[] = {
.mask = HCRX_EL2_TCR2En,
.behaviour = BEHAVE_FORWARD_RW,
},
+ [CGT_HCRX_SCTLR2En] = {
+ .index = HCRX_EL2,
+ .value = 0,
+ .mask = HCRX_EL2_SCTLR2En,
+ .behaviour = BEHAVE_FORWARD_RW,
+ },
[CGT_CNTHCTL_EL1TVT] = {
.index = CNTHCTL_EL2,
.value = CNTHCTL_EL1TVT,
@@ -449,6 +457,8 @@ static const enum cgt_group_id *coarse_control_combo[] = {
MCB(CGT_HCR_TVM_TRVM, CGT_HCR_TVM, CGT_HCR_TRVM),
MCB(CGT_HCR_TVM_TRVM_HCRX_TCR2En,
CGT_HCR_TVM, CGT_HCR_TRVM, CGT_HCRX_TCR2En),
+ MCB(CGT_HCR_TVM_TRVM_HCRX_SCTLR2En,
+ CGT_HCR_TVM, CGT_HCR_TRVM, CGT_HCRX_SCTLR2En),
MCB(CGT_HCR_TPU_TICAB, CGT_HCR_TPU, CGT_HCR_TICAB),
MCB(CGT_HCR_TPU_TOCU, CGT_HCR_TPU, CGT_HCR_TOCU),
MCB(CGT_HCR_NV1_nNV2_ENSCXT, CGT_HCR_NV1_nNV2, CGT_HCR_ENSCXT),
@@ -782,6 +792,7 @@ static const struct encoding_to_trap_config encoding_to_cgt[] __initconst = {
SR_TRAP(OP_TLBI_RVALE1OSNXS, CGT_HCR_TTLB_TTLBOS),
SR_TRAP(OP_TLBI_RVAALE1OSNXS, CGT_HCR_TTLB_TTLBOS),
SR_TRAP(SYS_SCTLR_EL1, CGT_HCR_TVM_TRVM),
+ SR_TRAP(SYS_SCTLR2_EL1, CGT_HCR_TVM_TRVM_HCRX_SCTLR2En),
SR_TRAP(SYS_TTBR0_EL1, CGT_HCR_TVM_TRVM),
SR_TRAP(SYS_TTBR1_EL1, CGT_HCR_TVM_TRVM),
SR_TRAP(SYS_TCR_EL1, CGT_HCR_TVM_TRVM),
@@ -1354,6 +1365,7 @@ static const struct encoding_to_trap_config encoding_to_fgt[] __initconst = {
SR_FGT(SYS_SCXTNUM_EL0, HFGRTR, SCXTNUM_EL0, 1),
SR_FGT(SYS_SCXTNUM_EL1, HFGRTR, SCXTNUM_EL1, 1),
SR_FGT(SYS_SCTLR_EL1, HFGRTR, SCTLR_EL1, 1),
+ SR_FGT(SYS_SCTLR2_EL1, HFGRTR, SCTLR_EL1, 1),
SR_FGT(SYS_REVIDR_EL1, HFGRTR, REVIDR_EL1, 1),
SR_FGT(SYS_PAR_EL1, HFGRTR, PAR_EL1, 1),
SR_FGT(SYS_MPIDR_EL1, HFGRTR, MPIDR_EL1, 1),
@@ -2592,13 +2604,8 @@ inject:
static bool __forward_traps(struct kvm_vcpu *vcpu, unsigned int reg, u64 control_bit)
{
- bool control_bit_set;
-
- if (!vcpu_has_nv(vcpu))
- return false;
-
- control_bit_set = __vcpu_sys_reg(vcpu, reg) & control_bit;
- if (!is_hyp_ctxt(vcpu) && control_bit_set) {
+ if (is_nested_ctxt(vcpu) &&
+ (__vcpu_sys_reg(vcpu, reg) & control_bit)) {
kvm_inject_nested_sync(vcpu, kvm_vcpu_get_esr(vcpu));
return true;
}
@@ -2719,6 +2726,9 @@ static void kvm_inject_el2_exception(struct kvm_vcpu *vcpu, u64 esr_el2,
case except_type_irq:
kvm_pend_exception(vcpu, EXCEPT_AA64_EL2_IRQ);
break;
+ case except_type_serror:
+ kvm_pend_exception(vcpu, EXCEPT_AA64_EL2_SERR);
+ break;
default:
WARN_ONCE(1, "Unsupported EL2 exception injection %d\n", type);
}
@@ -2816,3 +2826,28 @@ int kvm_inject_nested_irq(struct kvm_vcpu *vcpu)
/* esr_el2 value doesn't matter for exits due to irqs. */
return kvm_inject_nested(vcpu, 0, except_type_irq);
}
+
+int kvm_inject_nested_sea(struct kvm_vcpu *vcpu, bool iabt, u64 addr)
+{
+ u64 esr = FIELD_PREP(ESR_ELx_EC_MASK,
+ iabt ? ESR_ELx_EC_IABT_LOW : ESR_ELx_EC_DABT_LOW);
+ esr |= ESR_ELx_FSC_EXTABT | ESR_ELx_IL;
+
+ vcpu_write_sys_reg(vcpu, FAR_EL2, addr);
+
+ if (__vcpu_sys_reg(vcpu, SCTLR2_EL2) & SCTLR2_EL1_EASE)
+ return kvm_inject_nested(vcpu, esr, except_type_serror);
+
+ return kvm_inject_nested_sync(vcpu, esr);
+}
+
+int kvm_inject_nested_serror(struct kvm_vcpu *vcpu, u64 esr)
+{
+ /*
+ * Hardware sets up the EC field when propagating ESR as a result of
+ * vSError injection. Manually populate EC for an emulated SError
+ * exception.
+ */
+ esr |= FIELD_PREP(ESR_ELx_EC_MASK, ESR_ELx_EC_SERROR);
+ return kvm_inject_nested(vcpu, esr, except_type_serror);
+}
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index 2196979a24a3..16ba5e9ac86c 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -818,8 +818,9 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
int __kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu,
struct kvm_vcpu_events *events)
{
- events->exception.serror_pending = !!(vcpu->arch.hcr_el2 & HCR_VSE);
events->exception.serror_has_esr = cpus_have_final_cap(ARM64_HAS_RAS_EXTN);
+ events->exception.serror_pending = (vcpu->arch.hcr_el2 & HCR_VSE) ||
+ vcpu_get_flag(vcpu, NESTED_SERROR_PENDING);
if (events->exception.serror_pending && events->exception.serror_has_esr)
events->exception.serror_esr = vcpu_get_vsesr(vcpu);
@@ -833,29 +834,62 @@ int __kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu,
return 0;
}
+static void commit_pending_events(struct kvm_vcpu *vcpu)
+{
+ if (!vcpu_get_flag(vcpu, PENDING_EXCEPTION))
+ return;
+
+ /*
+ * Reset the MMIO emulation state to avoid stepping PC after emulating
+ * the exception entry.
+ */
+ vcpu->mmio_needed = false;
+ kvm_call_hyp(__kvm_adjust_pc, vcpu);
+}
+
int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu,
struct kvm_vcpu_events *events)
{
bool serror_pending = events->exception.serror_pending;
bool has_esr = events->exception.serror_has_esr;
bool ext_dabt_pending = events->exception.ext_dabt_pending;
+ u64 esr = events->exception.serror_esr;
+ int ret = 0;
- if (serror_pending && has_esr) {
- if (!cpus_have_final_cap(ARM64_HAS_RAS_EXTN))
- return -EINVAL;
-
- if (!((events->exception.serror_esr) & ~ESR_ELx_ISS_MASK))
- kvm_set_sei_esr(vcpu, events->exception.serror_esr);
- else
- return -EINVAL;
- } else if (serror_pending) {
- kvm_inject_vabt(vcpu);
+ /*
+ * Immediately commit the pending SEA to the vCPU's architectural
+ * state which is necessary since we do not return a pending SEA
+ * to userspace via KVM_GET_VCPU_EVENTS.
+ */
+ if (ext_dabt_pending) {
+ ret = kvm_inject_sea_dabt(vcpu, kvm_vcpu_get_hfar(vcpu));
+ commit_pending_events(vcpu);
}
- if (ext_dabt_pending)
- kvm_inject_dabt(vcpu, kvm_vcpu_get_hfar(vcpu));
+ if (ret < 0)
+ return ret;
- return 0;
+ if (!serror_pending)
+ return 0;
+
+ if (!cpus_have_final_cap(ARM64_HAS_RAS_EXTN) && has_esr)
+ return -EINVAL;
+
+ if (has_esr && (esr & ~ESR_ELx_ISS_MASK))
+ return -EINVAL;
+
+ if (has_esr)
+ ret = kvm_inject_serror_esr(vcpu, esr);
+ else
+ ret = kvm_inject_serror(vcpu);
+
+ /*
+ * We could've decided that the SError is due for immediate software
+ * injection; commit the exception in case userspace decides it wants
+ * to inject more exceptions for some strange reason.
+ */
+ commit_pending_events(vcpu);
+ return (ret < 0) ? ret : 0;
}
u32 __attribute_const__ kvm_target_cpu(void)
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index 453266c96481..a598072f36d2 100644
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -32,7 +32,7 @@ typedef int (*exit_handle_fn)(struct kvm_vcpu *);
static void kvm_handle_guest_serror(struct kvm_vcpu *vcpu, u64 esr)
{
if (!arm64_is_ras_serror(esr) || arm64_is_fatal_ras_serror(NULL, esr))
- kvm_inject_vabt(vcpu);
+ kvm_inject_serror(vcpu);
}
static int handle_hvc(struct kvm_vcpu *vcpu)
@@ -252,7 +252,7 @@ static int kvm_handle_ptrauth(struct kvm_vcpu *vcpu)
return 1;
}
- if (vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu)) {
+ if (is_nested_ctxt(vcpu)) {
kvm_inject_nested_sync(vcpu, kvm_vcpu_get_esr(vcpu));
return 1;
}
@@ -311,12 +311,11 @@ static int kvm_handle_gcs(struct kvm_vcpu *vcpu)
static int handle_other(struct kvm_vcpu *vcpu)
{
- bool is_l2 = vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu);
+ bool allowed, fwd = is_nested_ctxt(vcpu);
u64 hcrx = __vcpu_sys_reg(vcpu, HCRX_EL2);
u64 esr = kvm_vcpu_get_esr(vcpu);
u64 iss = ESR_ELx_ISS(esr);
struct kvm *kvm = vcpu->kvm;
- bool allowed, fwd = false;
/*
* We only trap for two reasons:
@@ -335,28 +334,23 @@ static int handle_other(struct kvm_vcpu *vcpu)
switch (iss) {
case ESR_ELx_ISS_OTHER_ST64BV:
allowed = kvm_has_feat(kvm, ID_AA64ISAR1_EL1, LS64, LS64_V);
- if (is_l2)
- fwd = !(hcrx & HCRX_EL2_EnASR);
+ fwd &= !(hcrx & HCRX_EL2_EnASR);
break;
case ESR_ELx_ISS_OTHER_ST64BV0:
allowed = kvm_has_feat(kvm, ID_AA64ISAR1_EL1, LS64, LS64_ACCDATA);
- if (is_l2)
- fwd = !(hcrx & HCRX_EL2_EnAS0);
+ fwd &= !(hcrx & HCRX_EL2_EnAS0);
break;
case ESR_ELx_ISS_OTHER_LDST64B:
allowed = kvm_has_feat(kvm, ID_AA64ISAR1_EL1, LS64, LS64);
- if (is_l2)
- fwd = !(hcrx & HCRX_EL2_EnALS);
+ fwd &= !(hcrx & HCRX_EL2_EnALS);
break;
case ESR_ELx_ISS_OTHER_TSBCSYNC:
allowed = kvm_has_feat(kvm, ID_AA64DFR0_EL1, TraceBuffer, TRBE_V1P1);
- if (is_l2)
- fwd = (__vcpu_sys_reg(vcpu, HFGITR2_EL2) & HFGITR2_EL2_TSBCSYNC);
+ fwd &= (__vcpu_sys_reg(vcpu, HFGITR2_EL2) & HFGITR2_EL2_TSBCSYNC);
break;
case ESR_ELx_ISS_OTHER_PSBCSYNC:
allowed = kvm_has_feat(kvm, ID_AA64DFR0_EL1, PMSVer, V1P5);
- if (is_l2)
- fwd = (__vcpu_sys_reg(vcpu, HFGITR_EL2) & HFGITR_EL2_PSBCSYNC);
+ fwd &= (__vcpu_sys_reg(vcpu, HFGITR_EL2) & HFGITR_EL2_PSBCSYNC);
break;
default:
/* Clearly, we're missing something. */
@@ -496,7 +490,7 @@ void handle_exit_early(struct kvm_vcpu *vcpu, int exception_index)
kvm_handle_guest_serror(vcpu, disr_to_esr(disr));
} else {
- kvm_inject_vabt(vcpu);
+ kvm_inject_serror(vcpu);
}
return;
diff --git a/arch/arm64/kvm/hyp/exception.c b/arch/arm64/kvm/hyp/exception.c
index 6a2a899a344e..95d186e0bf54 100644
--- a/arch/arm64/kvm/hyp/exception.c
+++ b/arch/arm64/kvm/hyp/exception.c
@@ -26,7 +26,8 @@ static inline u64 __vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg)
if (unlikely(vcpu_has_nv(vcpu)))
return vcpu_read_sys_reg(vcpu, reg);
- else if (__vcpu_read_sys_reg_from_cpu(reg, &val))
+ else if (vcpu_get_flag(vcpu, SYSREGS_ON_CPU) &&
+ __vcpu_read_sys_reg_from_cpu(reg, &val))
return val;
return __vcpu_sys_reg(vcpu, reg);
@@ -36,7 +37,8 @@ static inline void __vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg)
{
if (unlikely(vcpu_has_nv(vcpu)))
vcpu_write_sys_reg(vcpu, val, reg);
- else if (!__vcpu_write_sys_reg_to_cpu(val, reg))
+ else if (!vcpu_get_flag(vcpu, SYSREGS_ON_CPU) ||
+ !__vcpu_write_sys_reg_to_cpu(val, reg))
__vcpu_assign_sys_reg(vcpu, reg, val);
}
@@ -339,6 +341,10 @@ static void kvm_inject_exception(struct kvm_vcpu *vcpu)
enter_exception64(vcpu, PSR_MODE_EL1h, except_type_sync);
break;
+ case unpack_vcpu_flag(EXCEPT_AA64_EL1_SERR):
+ enter_exception64(vcpu, PSR_MODE_EL1h, except_type_serror);
+ break;
+
case unpack_vcpu_flag(EXCEPT_AA64_EL2_SYNC):
enter_exception64(vcpu, PSR_MODE_EL2h, except_type_sync);
break;
@@ -347,9 +353,13 @@ static void kvm_inject_exception(struct kvm_vcpu *vcpu)
enter_exception64(vcpu, PSR_MODE_EL2h, except_type_irq);
break;
+ case unpack_vcpu_flag(EXCEPT_AA64_EL2_SERR):
+ enter_exception64(vcpu, PSR_MODE_EL2h, except_type_serror);
+ break;
+
default:
/*
- * Only EL1_SYNC and EL2_{SYNC,IRQ} makes
+ * Only EL1_{SYNC,SERR} and EL2_{SYNC,IRQ,SERR} makes
* sense so far. Everything else gets silently
* ignored.
*/
diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h
index 2ad57b117385..84ec4e100fbb 100644
--- a/arch/arm64/kvm/hyp/include/hyp/switch.h
+++ b/arch/arm64/kvm/hyp/include/hyp/switch.h
@@ -298,7 +298,7 @@ static inline void __deactivate_cptr_traps(struct kvm_vcpu *vcpu)
u64 val; \
\
ctxt_sys_reg(hctxt, reg) = read_sysreg_s(SYS_ ## reg); \
- if (vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu)) \
+ if (is_nested_ctxt(vcpu)) \
compute_clr_set(vcpu, reg, c, s); \
\
compute_undef_clr_set(vcpu, kvm, reg, c, s); \
@@ -436,7 +436,7 @@ static inline void __activate_traps_common(struct kvm_vcpu *vcpu)
if (cpus_have_final_cap(ARM64_HAS_HCX)) {
u64 hcrx = vcpu->arch.hcrx_el2;
- if (vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu)) {
+ if (is_nested_ctxt(vcpu)) {
u64 val = __vcpu_sys_reg(vcpu, HCRX_EL2);
hcrx |= val & __HCRX_EL2_MASK;
hcrx &= ~(~val & __HCRX_EL2_nMASK);
@@ -476,21 +476,56 @@ static inline void ___activate_traps(struct kvm_vcpu *vcpu, u64 hcr)
write_sysreg_hcr(hcr);
- if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN) && (hcr & HCR_VSE))
- write_sysreg_s(vcpu->arch.vsesr_el2, SYS_VSESR_EL2);
+ if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN) && (hcr & HCR_VSE)) {
+ u64 vsesr;
+
+ /*
+ * When HCR_EL2.AMO is set, physical SErrors are taken to EL2
+ * and vSError injection is enabled for EL1. Conveniently, for
+ * NV this means that it is never the case where a 'physical'
+ * SError (injected by KVM or userspace) and vSError are
+ * deliverable to the same context.
+ *
+ * As such, we can trivially select between the host or guest's
+ * VSESR_EL2. Except for the case that FEAT_RAS hasn't been
+ * exposed to the guest, where ESR propagation in hardware
+ * occurs unconditionally.
+ *
+ * Paper over the architectural wart and use an IMPLEMENTATION
+ * DEFINED ESR value in case FEAT_RAS is hidden from the guest.
+ */
+ if (!vserror_state_is_nested(vcpu))
+ vsesr = vcpu->arch.vsesr_el2;
+ else if (kvm_has_ras(kern_hyp_va(vcpu->kvm)))
+ vsesr = __vcpu_sys_reg(vcpu, VSESR_EL2);
+ else
+ vsesr = ESR_ELx_ISV;
+
+ write_sysreg_s(vsesr, SYS_VSESR_EL2);
+ }
}
static inline void ___deactivate_traps(struct kvm_vcpu *vcpu)
{
+ u64 *hcr;
+
+ if (vserror_state_is_nested(vcpu))
+ hcr = __ctxt_sys_reg(&vcpu->arch.ctxt, HCR_EL2);
+ else
+ hcr = &vcpu->arch.hcr_el2;
+
/*
* If we pended a virtual abort, preserve it until it gets
* cleared. See D1.14.3 (Virtual Interrupts) for details, but
* the crucial bit is "On taking a vSError interrupt,
* HCR_EL2.VSE is cleared to 0."
+ *
+ * Additionally, when in a nested context we need to propagate the
+ * updated state to the guest hypervisor's HCR_EL2.
*/
- if (vcpu->arch.hcr_el2 & HCR_VSE) {
- vcpu->arch.hcr_el2 &= ~HCR_VSE;
- vcpu->arch.hcr_el2 |= read_sysreg(hcr_el2) & HCR_VSE;
+ if (*hcr & HCR_VSE) {
+ *hcr &= ~HCR_VSE;
+ *hcr |= read_sysreg(hcr_el2) & HCR_VSE;
}
}
@@ -531,7 +566,7 @@ static inline void __hyp_sve_restore_guest(struct kvm_vcpu *vcpu)
* nested guest, as the guest hypervisor could select a smaller VL. Slap
* that into hardware before wrapping up.
*/
- if (vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu))
+ if (is_nested_ctxt(vcpu))
sve_cond_update_zcr_vq(__vcpu_sys_reg(vcpu, ZCR_EL2), SYS_ZCR_EL2);
write_sysreg_el1(__vcpu_sys_reg(vcpu, vcpu_sve_zcr_elx(vcpu)), SYS_ZCR);
@@ -557,7 +592,7 @@ static inline void fpsimd_lazy_switch_to_guest(struct kvm_vcpu *vcpu)
if (vcpu_has_sve(vcpu)) {
/* A guest hypervisor may restrict the effective max VL. */
- if (vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu))
+ if (is_nested_ctxt(vcpu))
zcr_el2 = __vcpu_sys_reg(vcpu, ZCR_EL2);
else
zcr_el2 = vcpu_sve_max_vq(vcpu) - 1;
diff --git a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
index 4d0dbea4c56f..a17cbe7582de 100644
--- a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
+++ b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
@@ -109,6 +109,28 @@ static inline bool ctxt_has_s1poe(struct kvm_cpu_context *ctxt)
return kvm_has_s1poe(kern_hyp_va(vcpu->kvm));
}
+static inline bool ctxt_has_ras(struct kvm_cpu_context *ctxt)
+{
+ struct kvm_vcpu *vcpu;
+
+ if (!cpus_have_final_cap(ARM64_HAS_RAS_EXTN))
+ return false;
+
+ vcpu = ctxt_to_vcpu(ctxt);
+ return kvm_has_ras(kern_hyp_va(vcpu->kvm));
+}
+
+static inline bool ctxt_has_sctlr2(struct kvm_cpu_context *ctxt)
+{
+ struct kvm_vcpu *vcpu;
+
+ if (!cpus_have_final_cap(ARM64_HAS_SCTLR2))
+ return false;
+
+ vcpu = ctxt_to_vcpu(ctxt);
+ return kvm_has_sctlr2(kern_hyp_va(vcpu->kvm));
+}
+
static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt)
{
ctxt_sys_reg(ctxt, SCTLR_EL1) = read_sysreg_el1(SYS_SCTLR);
@@ -147,6 +169,9 @@ static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt)
ctxt_sys_reg(ctxt, SP_EL1) = read_sysreg(sp_el1);
ctxt_sys_reg(ctxt, ELR_EL1) = read_sysreg_el1(SYS_ELR);
ctxt_sys_reg(ctxt, SPSR_EL1) = read_sysreg_el1(SYS_SPSR);
+
+ if (ctxt_has_sctlr2(ctxt))
+ ctxt_sys_reg(ctxt, SCTLR2_EL1) = read_sysreg_el1(SYS_SCTLR2);
}
static inline void __sysreg_save_el2_return_state(struct kvm_cpu_context *ctxt)
@@ -159,8 +184,13 @@ static inline void __sysreg_save_el2_return_state(struct kvm_cpu_context *ctxt)
if (!has_vhe() && ctxt->__hyp_running_vcpu)
ctxt->regs.pstate = read_sysreg_el2(SYS_SPSR);
- if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN))
+ if (!cpus_have_final_cap(ARM64_HAS_RAS_EXTN))
+ return;
+
+ if (!vserror_state_is_nested(ctxt_to_vcpu(ctxt)))
ctxt_sys_reg(ctxt, DISR_EL1) = read_sysreg_s(SYS_VDISR_EL2);
+ else if (ctxt_has_ras(ctxt))
+ ctxt_sys_reg(ctxt, VDISR_EL2) = read_sysreg_s(SYS_VDISR_EL2);
}
static inline void __sysreg_restore_common_state(struct kvm_cpu_context *ctxt)
@@ -252,6 +282,9 @@ static inline void __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt,
write_sysreg(ctxt_sys_reg(ctxt, SP_EL1), sp_el1);
write_sysreg_el1(ctxt_sys_reg(ctxt, ELR_EL1), SYS_ELR);
write_sysreg_el1(ctxt_sys_reg(ctxt, SPSR_EL1), SYS_SPSR);
+
+ if (ctxt_has_sctlr2(ctxt))
+ write_sysreg_el1(ctxt_sys_reg(ctxt, SCTLR2_EL1), SYS_SCTLR2);
}
/* Read the VCPU state's PSTATE, but translate (v)EL2 to EL1. */
@@ -275,6 +308,7 @@ static inline void __sysreg_restore_el2_return_state(struct kvm_cpu_context *ctx
{
u64 pstate = to_hw_pstate(ctxt);
u64 mode = pstate & PSR_AA32_MODE_MASK;
+ u64 vdisr;
/*
* Safety check to ensure we're setting the CPU up to enter the guest
@@ -293,8 +327,17 @@ static inline void __sysreg_restore_el2_return_state(struct kvm_cpu_context *ctx
write_sysreg_el2(ctxt->regs.pc, SYS_ELR);
write_sysreg_el2(pstate, SYS_SPSR);
- if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN))
- write_sysreg_s(ctxt_sys_reg(ctxt, DISR_EL1), SYS_VDISR_EL2);
+ if (!cpus_have_final_cap(ARM64_HAS_RAS_EXTN))
+ return;
+
+ if (!vserror_state_is_nested(ctxt_to_vcpu(ctxt)))
+ vdisr = ctxt_sys_reg(ctxt, DISR_EL1);
+ else if (ctxt_has_ras(ctxt))
+ vdisr = ctxt_sys_reg(ctxt, VDISR_EL2);
+ else
+ vdisr = 0;
+
+ write_sysreg_s(vdisr, SYS_VDISR_EL2);
}
static inline void __sysreg32_save_state(struct kvm_vcpu *vcpu)
diff --git a/arch/arm64/kvm/hyp/nvhe/debug-sr.c b/arch/arm64/kvm/hyp/nvhe/debug-sr.c
index 2f4a4f5036bb..2a1c0f49792b 100644
--- a/arch/arm64/kvm/hyp/nvhe/debug-sr.c
+++ b/arch/arm64/kvm/hyp/nvhe/debug-sr.c
@@ -92,12 +92,42 @@ static void __trace_switch_to_host(void)
*host_data_ptr(host_debug_state.trfcr_el1));
}
+static void __debug_save_brbe(u64 *brbcr_el1)
+{
+ *brbcr_el1 = 0;
+
+ /* Check if the BRBE is enabled */
+ if (!(read_sysreg_el1(SYS_BRBCR) & (BRBCR_ELx_E0BRE | BRBCR_ELx_ExBRE)))
+ return;
+
+ /*
+ * Prohibit branch record generation while we are in guest.
+ * Since access to BRBCR_EL1 is trapped, the guest can't
+ * modify the filtering set by the host.
+ */
+ *brbcr_el1 = read_sysreg_el1(SYS_BRBCR);
+ write_sysreg_el1(0, SYS_BRBCR);
+}
+
+static void __debug_restore_brbe(u64 brbcr_el1)
+{
+ if (!brbcr_el1)
+ return;
+
+ /* Restore BRBE controls */
+ write_sysreg_el1(brbcr_el1, SYS_BRBCR);
+}
+
void __debug_save_host_buffers_nvhe(struct kvm_vcpu *vcpu)
{
/* Disable and flush SPE data generation */
if (host_data_test_flag(HAS_SPE))
__debug_save_spe(host_data_ptr(host_debug_state.pmscr_el1));
+ /* Disable BRBE branch records */
+ if (host_data_test_flag(HAS_BRBE))
+ __debug_save_brbe(host_data_ptr(host_debug_state.brbcr_el1));
+
if (__trace_needs_switch())
__trace_switch_to_guest();
}
@@ -111,6 +141,8 @@ void __debug_restore_host_buffers_nvhe(struct kvm_vcpu *vcpu)
{
if (host_data_test_flag(HAS_SPE))
__debug_restore_spe(*host_data_ptr(host_debug_state.pmscr_el1));
+ if (host_data_test_flag(HAS_BRBE))
+ __debug_restore_brbe(*host_data_ptr(host_debug_state.brbcr_el1));
if (__trace_needs_switch())
__trace_switch_to_host();
}
diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c
index 0e752b515d0f..ccd575d5f6de 100644
--- a/arch/arm64/kvm/hyp/nvhe/switch.c
+++ b/arch/arm64/kvm/hyp/nvhe/switch.c
@@ -272,7 +272,7 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu)
* We're about to restore some new MMU state. Make sure
* ongoing page-table walks that have started before we
* trapped to EL2 have completed. This also synchronises the
- * above disabling of SPE and TRBE.
+ * above disabling of BRBE, SPE and TRBE.
*
* See DDI0487I.a D8.1.5 "Out-of-context translation regimes",
* rule R_LFHQG and subsequent information statements.
diff --git a/arch/arm64/kvm/hyp/vgic-v3-sr.c b/arch/arm64/kvm/hyp/vgic-v3-sr.c
index f162b0df5cae..d81275790e69 100644
--- a/arch/arm64/kvm/hyp/vgic-v3-sr.c
+++ b/arch/arm64/kvm/hyp/vgic-v3-sr.c
@@ -296,12 +296,19 @@ void __vgic_v3_activate_traps(struct vgic_v3_cpu_if *cpu_if)
}
/*
- * Prevent the guest from touching the ICC_SRE_EL1 system
- * register. Note that this may not have any effect, as
- * ICC_SRE_EL2.Enable being RAO/WI is a valid implementation.
+ * GICv5 BET0 FEAT_GCIE_LEGACY doesn't include ICC_SRE_EL2. This is due
+ * to be relaxed in a future spec release, at which point this in
+ * condition can be dropped.
*/
- write_gicreg(read_gicreg(ICC_SRE_EL2) & ~ICC_SRE_EL2_ENABLE,
- ICC_SRE_EL2);
+ if (!cpus_have_final_cap(ARM64_HAS_GICV5_CPUIF)) {
+ /*
+ * Prevent the guest from touching the ICC_SRE_EL1 system
+ * register. Note that this may not have any effect, as
+ * ICC_SRE_EL2.Enable being RAO/WI is a valid implementation.
+ */
+ write_gicreg(read_gicreg(ICC_SRE_EL2) & ~ICC_SRE_EL2_ENABLE,
+ ICC_SRE_EL2);
+ }
/*
* If we need to trap system registers, we must write
@@ -322,8 +329,14 @@ void __vgic_v3_deactivate_traps(struct vgic_v3_cpu_if *cpu_if)
cpu_if->vgic_vmcr = read_gicreg(ICH_VMCR_EL2);
}
- val = read_gicreg(ICC_SRE_EL2);
- write_gicreg(val | ICC_SRE_EL2_ENABLE, ICC_SRE_EL2);
+ /*
+ * Can be dropped in the future when GICv5 spec is relaxed. See comment
+ * above.
+ */
+ if (!cpus_have_final_cap(ARM64_HAS_GICV5_CPUIF)) {
+ val = read_gicreg(ICC_SRE_EL2);
+ write_gicreg(val | ICC_SRE_EL2_ENABLE, ICC_SRE_EL2);
+ }
if (!cpu_if->vgic_sre) {
/* Make sure ENABLE is set at EL2 before setting SRE at EL1 */
@@ -423,10 +436,20 @@ void __vgic_v3_init_lrs(void)
*/
u64 __vgic_v3_get_gic_config(void)
{
- u64 val, sre = read_gicreg(ICC_SRE_EL1);
+ u64 val, sre;
unsigned long flags = 0;
/*
+ * In compat mode, we cannot access ICC_SRE_EL1 at any EL
+ * other than EL1 itself; just return the
+ * ICH_VTR_EL2. ICC_IDR0_EL1 is only implemented on a GICv5
+ * system, so we first check if we have GICv5 support.
+ */
+ if (cpus_have_final_cap(ARM64_HAS_GICV5_CPUIF))
+ return read_gicreg(ICH_VTR_EL2);
+
+ sre = read_gicreg(ICC_SRE_EL1);
+ /*
* To check whether we have a MMIO-based (GICv2 compatible)
* CPU interface, we need to disable the system register
* view.
@@ -471,6 +494,16 @@ u64 __vgic_v3_get_gic_config(void)
return val;
}
+static void __vgic_v3_compat_mode_enable(void)
+{
+ if (!cpus_have_final_cap(ARM64_HAS_GICV5_CPUIF))
+ return;
+
+ sysreg_clear_set_s(SYS_ICH_VCTLR_EL2, 0, ICH_VCTLR_EL2_V3);
+ /* Wait for V3 to become enabled */
+ isb();
+}
+
static u64 __vgic_v3_read_vmcr(void)
{
return read_gicreg(ICH_VMCR_EL2);
@@ -490,6 +523,8 @@ void __vgic_v3_save_vmcr_aprs(struct vgic_v3_cpu_if *cpu_if)
void __vgic_v3_restore_vmcr_aprs(struct vgic_v3_cpu_if *cpu_if)
{
+ __vgic_v3_compat_mode_enable();
+
/*
* If dealing with a GICv2 emulation on GICv3, VMCR_EL2.VFIQen
* is dependent on ICC_SRE_EL1.SRE, and we have to perform the
@@ -1050,7 +1085,7 @@ static bool __vgic_v3_check_trap_forwarding(struct kvm_vcpu *vcpu,
{
u64 ich_hcr;
- if (!vcpu_has_nv(vcpu) || is_hyp_ctxt(vcpu))
+ if (!is_nested_ctxt(vcpu))
return false;
ich_hcr = __vcpu_sys_reg(vcpu, ICH_HCR_EL2);
diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c
index 477f1580ffea..e482181c6632 100644
--- a/arch/arm64/kvm/hyp/vhe/switch.c
+++ b/arch/arm64/kvm/hyp/vhe/switch.c
@@ -48,8 +48,7 @@ DEFINE_PER_CPU(unsigned long, kvm_hyp_vector);
static u64 __compute_hcr(struct kvm_vcpu *vcpu)
{
- u64 guest_hcr = __vcpu_sys_reg(vcpu, HCR_EL2);
- u64 hcr = vcpu->arch.hcr_el2;
+ u64 guest_hcr, hcr = vcpu->arch.hcr_el2;
if (!vcpu_has_nv(vcpu))
return hcr;
@@ -68,10 +67,21 @@ static u64 __compute_hcr(struct kvm_vcpu *vcpu)
if (!vcpu_el2_e2h_is_set(vcpu))
hcr |= HCR_NV1;
+ /*
+ * Nothing in HCR_EL2 should impact running in hypervisor
+ * context, apart from bits we have defined as RESx (E2H,
+ * HCD and co), or that cannot be set directly (the EXCLUDE
+ * bits). Given that we OR the guest's view with the host's,
+ * we can use the 0 value as the starting point, and only
+ * use the config-driven RES1 bits.
+ */
+ guest_hcr = kvm_vcpu_apply_reg_masks(vcpu, HCR_EL2, 0);
+
write_sysreg_s(vcpu->arch.ctxt.vncr_array, SYS_VNCR_EL2);
} else {
host_data_clear_flag(VCPU_IN_HYP_CONTEXT);
+ guest_hcr = __vcpu_sys_reg(vcpu, HCR_EL2);
if (guest_hcr & HCR_NV) {
u64 va = __fix_to_virt(vncr_fixmap(smp_processor_id()));
diff --git a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c
index 73e4bc7fde9e..f28c6cf4fe1b 100644
--- a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c
+++ b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c
@@ -77,6 +77,9 @@ static void __sysreg_save_vel2_state(struct kvm_vcpu *vcpu)
__vcpu_assign_sys_reg(vcpu, SP_EL2, read_sysreg(sp_el1));
__vcpu_assign_sys_reg(vcpu, ELR_EL2, read_sysreg_el1(SYS_ELR));
__vcpu_assign_sys_reg(vcpu, SPSR_EL2, read_sysreg_el1(SYS_SPSR));
+
+ if (ctxt_has_sctlr2(&vcpu->arch.ctxt))
+ __vcpu_assign_sys_reg(vcpu, SCTLR2_EL2, read_sysreg_el1(SYS_SCTLR2));
}
static void __sysreg_restore_vel2_state(struct kvm_vcpu *vcpu)
@@ -139,6 +142,9 @@ static void __sysreg_restore_vel2_state(struct kvm_vcpu *vcpu)
write_sysreg(__vcpu_sys_reg(vcpu, SP_EL2), sp_el1);
write_sysreg_el1(__vcpu_sys_reg(vcpu, ELR_EL2), SYS_ELR);
write_sysreg_el1(__vcpu_sys_reg(vcpu, SPSR_EL2), SYS_SPSR);
+
+ if (ctxt_has_sctlr2(&vcpu->arch.ctxt))
+ write_sysreg_el1(__vcpu_sys_reg(vcpu, SCTLR2_EL2), SYS_SCTLR2);
}
/*
diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c
index a640e839848e..6745f38b64f9 100644
--- a/arch/arm64/kvm/inject_fault.c
+++ b/arch/arm64/kvm/inject_fault.c
@@ -15,13 +15,11 @@
#include <asm/kvm_nested.h>
#include <asm/esr.h>
-static void pend_sync_exception(struct kvm_vcpu *vcpu)
+static unsigned int exception_target_el(struct kvm_vcpu *vcpu)
{
/* If not nesting, EL1 is the only possible exception target */
- if (likely(!vcpu_has_nv(vcpu))) {
- kvm_pend_exception(vcpu, EXCEPT_AA64_EL1_SYNC);
- return;
- }
+ if (likely(!vcpu_has_nv(vcpu)))
+ return PSR_MODE_EL1h;
/*
* With NV, we need to pick between EL1 and EL2. Note that we
@@ -32,26 +30,76 @@ static void pend_sync_exception(struct kvm_vcpu *vcpu)
switch(*vcpu_cpsr(vcpu) & PSR_MODE_MASK) {
case PSR_MODE_EL2h:
case PSR_MODE_EL2t:
- kvm_pend_exception(vcpu, EXCEPT_AA64_EL2_SYNC);
- break;
+ return PSR_MODE_EL2h;
case PSR_MODE_EL1h:
case PSR_MODE_EL1t:
- kvm_pend_exception(vcpu, EXCEPT_AA64_EL1_SYNC);
- break;
+ return PSR_MODE_EL1h;
case PSR_MODE_EL0t:
- if (vcpu_el2_tge_is_set(vcpu))
- kvm_pend_exception(vcpu, EXCEPT_AA64_EL2_SYNC);
- else
- kvm_pend_exception(vcpu, EXCEPT_AA64_EL1_SYNC);
- break;
+ return vcpu_el2_tge_is_set(vcpu) ? PSR_MODE_EL2h : PSR_MODE_EL1h;
default:
BUG();
}
}
-static bool match_target_el(struct kvm_vcpu *vcpu, unsigned long target)
+static enum vcpu_sysreg exception_esr_elx(struct kvm_vcpu *vcpu)
+{
+ if (exception_target_el(vcpu) == PSR_MODE_EL2h)
+ return ESR_EL2;
+
+ return ESR_EL1;
+}
+
+static enum vcpu_sysreg exception_far_elx(struct kvm_vcpu *vcpu)
+{
+ if (exception_target_el(vcpu) == PSR_MODE_EL2h)
+ return FAR_EL2;
+
+ return FAR_EL1;
+}
+
+static void pend_sync_exception(struct kvm_vcpu *vcpu)
+{
+ if (exception_target_el(vcpu) == PSR_MODE_EL1h)
+ kvm_pend_exception(vcpu, EXCEPT_AA64_EL1_SYNC);
+ else
+ kvm_pend_exception(vcpu, EXCEPT_AA64_EL2_SYNC);
+}
+
+static void pend_serror_exception(struct kvm_vcpu *vcpu)
{
- return (vcpu_get_flag(vcpu, EXCEPT_MASK) == target);
+ if (exception_target_el(vcpu) == PSR_MODE_EL1h)
+ kvm_pend_exception(vcpu, EXCEPT_AA64_EL1_SERR);
+ else
+ kvm_pend_exception(vcpu, EXCEPT_AA64_EL2_SERR);
+}
+
+static bool __effective_sctlr2_bit(struct kvm_vcpu *vcpu, unsigned int idx)
+{
+ u64 sctlr2;
+
+ if (!kvm_has_sctlr2(vcpu->kvm))
+ return false;
+
+ if (is_nested_ctxt(vcpu) &&
+ !(__vcpu_sys_reg(vcpu, HCRX_EL2) & HCRX_EL2_SCTLR2En))
+ return false;
+
+ if (exception_target_el(vcpu) == PSR_MODE_EL1h)
+ sctlr2 = vcpu_read_sys_reg(vcpu, SCTLR2_EL1);
+ else
+ sctlr2 = vcpu_read_sys_reg(vcpu, SCTLR2_EL2);
+
+ return sctlr2 & BIT(idx);
+}
+
+static bool effective_sctlr2_ease(struct kvm_vcpu *vcpu)
+{
+ return __effective_sctlr2_bit(vcpu, SCTLR2_EL1_EASE_SHIFT);
+}
+
+static bool effective_sctlr2_nmea(struct kvm_vcpu *vcpu)
+{
+ return __effective_sctlr2_bit(vcpu, SCTLR2_EL1_NMEA_SHIFT);
}
static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr)
@@ -60,7 +108,11 @@ static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr
bool is_aarch32 = vcpu_mode_is_32bit(vcpu);
u64 esr = 0;
- pend_sync_exception(vcpu);
+ /* This delight is brought to you by FEAT_DoubleFault2. */
+ if (effective_sctlr2_ease(vcpu))
+ pend_serror_exception(vcpu);
+ else
+ pend_sync_exception(vcpu);
/*
* Build an {i,d}abort, depending on the level and the
@@ -83,13 +135,8 @@ static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr
esr |= ESR_ELx_FSC_EXTABT;
- if (match_target_el(vcpu, unpack_vcpu_flag(EXCEPT_AA64_EL1_SYNC))) {
- vcpu_write_sys_reg(vcpu, addr, FAR_EL1);
- vcpu_write_sys_reg(vcpu, esr, ESR_EL1);
- } else {
- vcpu_write_sys_reg(vcpu, addr, FAR_EL2);
- vcpu_write_sys_reg(vcpu, esr, ESR_EL2);
- }
+ vcpu_write_sys_reg(vcpu, addr, exception_far_elx(vcpu));
+ vcpu_write_sys_reg(vcpu, esr, exception_esr_elx(vcpu));
}
static void inject_undef64(struct kvm_vcpu *vcpu)
@@ -105,10 +152,7 @@ static void inject_undef64(struct kvm_vcpu *vcpu)
if (kvm_vcpu_trap_il_is32bit(vcpu))
esr |= ESR_ELx_IL;
- if (match_target_el(vcpu, unpack_vcpu_flag(EXCEPT_AA64_EL1_SYNC)))
- vcpu_write_sys_reg(vcpu, esr, ESR_EL1);
- else
- vcpu_write_sys_reg(vcpu, esr, ESR_EL2);
+ vcpu_write_sys_reg(vcpu, esr, exception_esr_elx(vcpu));
}
#define DFSR_FSC_EXTABT_LPAE 0x10
@@ -155,36 +199,35 @@ static void inject_abt32(struct kvm_vcpu *vcpu, bool is_pabt, u32 addr)
vcpu_write_sys_reg(vcpu, far, FAR_EL1);
}
-/**
- * kvm_inject_dabt - inject a data abort into the guest
- * @vcpu: The VCPU to receive the data abort
- * @addr: The address to report in the DFAR
- *
- * It is assumed that this code is called from the VCPU thread and that the
- * VCPU therefore is not currently executing guest code.
- */
-void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr)
+static void __kvm_inject_sea(struct kvm_vcpu *vcpu, bool iabt, u64 addr)
{
if (vcpu_el1_is_32bit(vcpu))
- inject_abt32(vcpu, false, addr);
+ inject_abt32(vcpu, iabt, addr);
else
- inject_abt64(vcpu, false, addr);
+ inject_abt64(vcpu, iabt, addr);
}
-/**
- * kvm_inject_pabt - inject a prefetch abort into the guest
- * @vcpu: The VCPU to receive the prefetch abort
- * @addr: The address to report in the DFAR
- *
- * It is assumed that this code is called from the VCPU thread and that the
- * VCPU therefore is not currently executing guest code.
- */
-void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr)
+static bool kvm_sea_target_is_el2(struct kvm_vcpu *vcpu)
{
- if (vcpu_el1_is_32bit(vcpu))
- inject_abt32(vcpu, true, addr);
- else
- inject_abt64(vcpu, true, addr);
+ if (__vcpu_sys_reg(vcpu, HCR_EL2) & (HCR_TGE | HCR_TEA))
+ return true;
+
+ if (!vcpu_mode_priv(vcpu))
+ return false;
+
+ return (*vcpu_cpsr(vcpu) & PSR_A_BIT) &&
+ (__vcpu_sys_reg(vcpu, HCRX_EL2) & HCRX_EL2_TMEA);
+}
+
+int kvm_inject_sea(struct kvm_vcpu *vcpu, bool iabt, u64 addr)
+{
+ lockdep_assert_held(&vcpu->mutex);
+
+ if (is_nested_ctxt(vcpu) && kvm_sea_target_is_el2(vcpu))
+ return kvm_inject_nested_sea(vcpu, iabt, addr);
+
+ __kvm_inject_sea(vcpu, iabt, addr);
+ return 1;
}
void kvm_inject_size_fault(struct kvm_vcpu *vcpu)
@@ -194,10 +237,7 @@ void kvm_inject_size_fault(struct kvm_vcpu *vcpu)
addr = kvm_vcpu_get_fault_ipa(vcpu);
addr |= kvm_vcpu_get_hfar(vcpu) & GENMASK(11, 0);
- if (kvm_vcpu_trap_is_iabt(vcpu))
- kvm_inject_pabt(vcpu, addr);
- else
- kvm_inject_dabt(vcpu, addr);
+ __kvm_inject_sea(vcpu, kvm_vcpu_trap_is_iabt(vcpu), addr);
/*
* If AArch64 or LPAE, set FSC to 0 to indicate an Address
@@ -210,9 +250,9 @@ void kvm_inject_size_fault(struct kvm_vcpu *vcpu)
!(vcpu_read_sys_reg(vcpu, TCR_EL1) & TTBCR_EAE))
return;
- esr = vcpu_read_sys_reg(vcpu, ESR_EL1);
+ esr = vcpu_read_sys_reg(vcpu, exception_esr_elx(vcpu));
esr &= ~GENMASK_ULL(5, 0);
- vcpu_write_sys_reg(vcpu, esr, ESR_EL1);
+ vcpu_write_sys_reg(vcpu, esr, exception_esr_elx(vcpu));
}
/**
@@ -230,25 +270,70 @@ void kvm_inject_undefined(struct kvm_vcpu *vcpu)
inject_undef64(vcpu);
}
-void kvm_set_sei_esr(struct kvm_vcpu *vcpu, u64 esr)
+static bool serror_is_masked(struct kvm_vcpu *vcpu)
{
- vcpu_set_vsesr(vcpu, esr & ESR_ELx_ISS_MASK);
- *vcpu_hcr(vcpu) |= HCR_VSE;
+ return (*vcpu_cpsr(vcpu) & PSR_A_BIT) && !effective_sctlr2_nmea(vcpu);
}
-/**
- * kvm_inject_vabt - inject an async abort / SError into the guest
- * @vcpu: The VCPU to receive the exception
- *
- * It is assumed that this code is called from the VCPU thread and that the
- * VCPU therefore is not currently executing guest code.
- *
- * Systems with the RAS Extensions specify an imp-def ESR (ISV/IDS = 1) with
- * the remaining ISS all-zeros so that this error is not interpreted as an
- * uncategorized RAS error. Without the RAS Extensions we can't specify an ESR
- * value, so the CPU generates an imp-def value.
- */
-void kvm_inject_vabt(struct kvm_vcpu *vcpu)
+static bool kvm_serror_target_is_el2(struct kvm_vcpu *vcpu)
+{
+ if (is_hyp_ctxt(vcpu) || vcpu_el2_amo_is_set(vcpu))
+ return true;
+
+ if (!(__vcpu_sys_reg(vcpu, HCRX_EL2) & HCRX_EL2_TMEA))
+ return false;
+
+ /*
+ * In another example where FEAT_DoubleFault2 is entirely backwards,
+ * "masked" as it relates to the routing effects of HCRX_EL2.TMEA
+ * doesn't consider SCTLR2_EL1.NMEA. That is to say, even if EL1 asked
+ * for non-maskable SErrors, the EL2 bit takes priority if A is set.
+ */
+ if (vcpu_mode_priv(vcpu))
+ return *vcpu_cpsr(vcpu) & PSR_A_BIT;
+
+ /*
+ * Otherwise SErrors are considered unmasked when taken from EL0 and
+ * NMEA is set.
+ */
+ return serror_is_masked(vcpu);
+}
+
+static bool kvm_serror_undeliverable_at_el2(struct kvm_vcpu *vcpu)
+{
+ return !(vcpu_el2_tge_is_set(vcpu) || vcpu_el2_amo_is_set(vcpu));
+}
+
+int kvm_inject_serror_esr(struct kvm_vcpu *vcpu, u64 esr)
{
- kvm_set_sei_esr(vcpu, ESR_ELx_ISV);
+ lockdep_assert_held(&vcpu->mutex);
+
+ if (is_nested_ctxt(vcpu) && kvm_serror_target_is_el2(vcpu))
+ return kvm_inject_nested_serror(vcpu, esr);
+
+ if (vcpu_is_el2(vcpu) && kvm_serror_undeliverable_at_el2(vcpu)) {
+ vcpu_set_vsesr(vcpu, esr);
+ vcpu_set_flag(vcpu, NESTED_SERROR_PENDING);
+ return 1;
+ }
+
+ /*
+ * Emulate the exception entry if SErrors are unmasked. This is useful if
+ * the vCPU is in a nested context w/ vSErrors enabled then we've already
+ * delegated he hardware vSError context (i.e. HCR_EL2.VSE, VSESR_EL2,
+ * VDISR_EL2) to the guest hypervisor.
+ *
+ * As we're emulating the SError injection we need to explicitly populate
+ * ESR_ELx.EC because hardware will not do it on our behalf.
+ */
+ if (!serror_is_masked(vcpu)) {
+ pend_serror_exception(vcpu);
+ esr |= FIELD_PREP(ESR_ELx_EC_MASK, ESR_ELx_EC_SERROR);
+ vcpu_write_sys_reg(vcpu, esr, exception_esr_elx(vcpu));
+ return 1;
+ }
+
+ vcpu_set_vsesr(vcpu, esr & ESR_ELx_ISS_MASK);
+ *vcpu_hcr(vcpu) |= HCR_VSE;
+ return 1;
}
diff --git a/arch/arm64/kvm/mmio.c b/arch/arm64/kvm/mmio.c
index ab365e839874..54f9358c9e0e 100644
--- a/arch/arm64/kvm/mmio.c
+++ b/arch/arm64/kvm/mmio.c
@@ -72,7 +72,7 @@ unsigned long kvm_mmio_read_buf(const void *buf, unsigned int len)
return data;
}
-static bool kvm_pending_sync_exception(struct kvm_vcpu *vcpu)
+static bool kvm_pending_external_abort(struct kvm_vcpu *vcpu)
{
if (!vcpu_get_flag(vcpu, PENDING_EXCEPTION))
return false;
@@ -90,6 +90,8 @@ static bool kvm_pending_sync_exception(struct kvm_vcpu *vcpu)
switch (vcpu_get_flag(vcpu, EXCEPT_MASK)) {
case unpack_vcpu_flag(EXCEPT_AA64_EL1_SYNC):
case unpack_vcpu_flag(EXCEPT_AA64_EL2_SYNC):
+ case unpack_vcpu_flag(EXCEPT_AA64_EL1_SERR):
+ case unpack_vcpu_flag(EXCEPT_AA64_EL2_SERR):
return true;
default:
return false;
@@ -113,7 +115,7 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu)
* Detect if the MMIO return was already handled or if userspace aborted
* the MMIO access.
*/
- if (unlikely(!vcpu->mmio_needed || kvm_pending_sync_exception(vcpu)))
+ if (unlikely(!vcpu->mmio_needed || kvm_pending_external_abort(vcpu)))
return 1;
vcpu->mmio_needed = 0;
@@ -169,10 +171,8 @@ int io_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa)
trace_kvm_mmio_nisv(*vcpu_pc(vcpu), kvm_vcpu_get_esr(vcpu),
kvm_vcpu_get_hfar(vcpu), fault_ipa);
- if (vcpu_is_protected(vcpu)) {
- kvm_inject_dabt(vcpu, kvm_vcpu_get_hfar(vcpu));
- return 1;
- }
+ if (vcpu_is_protected(vcpu))
+ return kvm_inject_sea_dabt(vcpu, kvm_vcpu_get_hfar(vcpu));
if (test_bit(KVM_ARCH_FLAG_RETURN_NISV_IO_ABORT_TO_USER,
&vcpu->kvm->arch.flags)) {
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index 2942ec92c5a4..1c78864767c5 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -193,11 +193,6 @@ int kvm_arch_flush_remote_tlbs_range(struct kvm *kvm,
return 0;
}
-static bool kvm_is_device_pfn(unsigned long pfn)
-{
- return !pfn_is_map_memory(pfn);
-}
-
static void *stage2_memcache_zalloc_page(void *arg)
{
struct kvm_mmu_memory_cache *mc = arg;
@@ -1470,6 +1465,18 @@ static bool kvm_vma_mte_allowed(struct vm_area_struct *vma)
return vma->vm_flags & VM_MTE_ALLOWED;
}
+static bool kvm_vma_is_cacheable(struct vm_area_struct *vma)
+{
+ switch (FIELD_GET(PTE_ATTRINDX_MASK, pgprot_val(vma->vm_page_prot))) {
+ case MT_NORMAL_NC:
+ case MT_DEVICE_nGnRnE:
+ case MT_DEVICE_nGnRE:
+ return false;
+ default:
+ return true;
+ }
+}
+
static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
struct kvm_s2_trans *nested,
struct kvm_memory_slot *memslot, unsigned long hva,
@@ -1477,8 +1484,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
{
int ret = 0;
bool write_fault, writable, force_pte = false;
- bool exec_fault, mte_allowed;
- bool device = false, vfio_allow_any_uc = false;
+ bool exec_fault, mte_allowed, is_vma_cacheable;
+ bool s2_force_noncacheable = false, vfio_allow_any_uc = false;
unsigned long mmu_seq;
phys_addr_t ipa = fault_ipa;
struct kvm *kvm = vcpu->kvm;
@@ -1492,6 +1499,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R;
struct kvm_pgtable *pgt;
struct page *page;
+ vm_flags_t vm_flags;
enum kvm_pgtable_walk_flags flags = KVM_PGTABLE_WALK_HANDLE_FAULT | KVM_PGTABLE_WALK_SHARED;
if (fault_is_perm)
@@ -1619,6 +1627,10 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
vfio_allow_any_uc = vma->vm_flags & VM_ALLOW_ANY_UNCACHED;
+ vm_flags = vma->vm_flags;
+
+ is_vma_cacheable = kvm_vma_is_cacheable(vma);
+
/* Don't use the VMA after the unlock -- it may have vanished */
vma = NULL;
@@ -1642,18 +1654,39 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
if (is_error_noslot_pfn(pfn))
return -EFAULT;
- if (kvm_is_device_pfn(pfn)) {
- /*
- * If the page was identified as device early by looking at
- * the VMA flags, vma_pagesize is already representing the
- * largest quantity we can map. If instead it was mapped
- * via __kvm_faultin_pfn(), vma_pagesize is set to PAGE_SIZE
- * and must not be upgraded.
- *
- * In both cases, we don't let transparent_hugepage_adjust()
- * change things at the last minute.
- */
- device = true;
+ /*
+ * Check if this is non-struct page memory PFN, and cannot support
+ * CMOs. It could potentially be unsafe to access as cachable.
+ */
+ if (vm_flags & (VM_PFNMAP | VM_MIXEDMAP) && !pfn_is_map_memory(pfn)) {
+ if (is_vma_cacheable) {
+ /*
+ * Whilst the VMA owner expects cacheable mapping to this
+ * PFN, hardware also has to support the FWB and CACHE DIC
+ * features.
+ *
+ * ARM64 KVM relies on kernel VA mapping to the PFN to
+ * perform cache maintenance as the CMO instructions work on
+ * virtual addresses. VM_PFNMAP region are not necessarily
+ * mapped to a KVA and hence the presence of hardware features
+ * S2FWB and CACHE DIC are mandatory to avoid the need for
+ * cache maintenance.
+ */
+ if (!kvm_supports_cacheable_pfnmap())
+ return -EFAULT;
+ } else {
+ /*
+ * If the page was identified as device early by looking at
+ * the VMA flags, vma_pagesize is already representing the
+ * largest quantity we can map. If instead it was mapped
+ * via __kvm_faultin_pfn(), vma_pagesize is set to PAGE_SIZE
+ * and must not be upgraded.
+ *
+ * In both cases, we don't let transparent_hugepage_adjust()
+ * change things at the last minute.
+ */
+ s2_force_noncacheable = true;
+ }
} else if (logging_active && !write_fault) {
/*
* Only actually map the page as writable if this was a write
@@ -1662,7 +1695,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
writable = false;
}
- if (exec_fault && device)
+ if (exec_fault && s2_force_noncacheable)
return -ENOEXEC;
/*
@@ -1695,7 +1728,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
* If we are not forced to use page mapping, check if we are
* backed by a THP and thus use block mapping if possible.
*/
- if (vma_pagesize == PAGE_SIZE && !(force_pte || device)) {
+ if (vma_pagesize == PAGE_SIZE && !(force_pte || s2_force_noncacheable)) {
if (fault_is_perm && fault_granule > PAGE_SIZE)
vma_pagesize = fault_granule;
else
@@ -1709,7 +1742,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
}
}
- if (!fault_is_perm && !device && kvm_has_mte(kvm)) {
+ if (!fault_is_perm && !s2_force_noncacheable && kvm_has_mte(kvm)) {
/* Check the VMM hasn't introduced a new disallowed VMA */
if (mte_allowed) {
sanitise_mte_tags(kvm, pfn, vma_pagesize);
@@ -1725,7 +1758,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
if (exec_fault)
prot |= KVM_PGTABLE_PROT_X;
- if (device) {
+ if (s2_force_noncacheable) {
if (vfio_allow_any_uc)
prot |= KVM_PGTABLE_PROT_NORMAL_NC;
else
@@ -1808,7 +1841,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
* There is no need to pass the error into the guest.
*/
if (kvm_handle_guest_sea())
- kvm_inject_vabt(vcpu);
+ return kvm_inject_serror(vcpu);
return 1;
}
@@ -1836,11 +1869,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
if (fault_ipa >= BIT_ULL(VTCR_EL2_IPA(vcpu->arch.hw_mmu->vtcr))) {
fault_ipa |= kvm_vcpu_get_hfar(vcpu) & GENMASK(11, 0);
- if (is_iabt)
- kvm_inject_pabt(vcpu, fault_ipa);
- else
- kvm_inject_dabt(vcpu, fault_ipa);
- return 1;
+ return kvm_inject_sea(vcpu, is_iabt, fault_ipa);
}
}
@@ -1912,8 +1941,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
}
if (kvm_vcpu_abt_iss1tw(vcpu)) {
- kvm_inject_dabt(vcpu, kvm_vcpu_get_hfar(vcpu));
- ret = 1;
+ ret = kvm_inject_sea_dabt(vcpu, kvm_vcpu_get_hfar(vcpu));
goto out_unlock;
}
@@ -1958,10 +1986,8 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
if (ret == 0)
ret = 1;
out:
- if (ret == -ENOEXEC) {
- kvm_inject_pabt(vcpu, kvm_vcpu_get_hfar(vcpu));
- ret = 1;
- }
+ if (ret == -ENOEXEC)
+ ret = kvm_inject_sea_iabt(vcpu, kvm_vcpu_get_hfar(vcpu));
out_unlock:
srcu_read_unlock(&vcpu->kvm->srcu, idx);
return ret;
@@ -2221,6 +2247,15 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
ret = -EINVAL;
break;
}
+
+ /*
+ * Cacheable PFNMAP is allowed only if the hardware
+ * supports it.
+ */
+ if (kvm_vma_is_cacheable(vma) && !kvm_supports_cacheable_pfnmap()) {
+ ret = -EINVAL;
+ break;
+ }
}
hva = min(reg_end, vma->vm_end);
} while (hva < reg_end);
diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c
index dc1d26559bfa..153b3e11b115 100644
--- a/arch/arm64/kvm/nested.c
+++ b/arch/arm64/kvm/nested.c
@@ -1441,12 +1441,11 @@ u64 limit_nv_id_reg(struct kvm *kvm, u32 reg, u64 val)
break;
case SYS_ID_AA64PFR0_EL1:
- /* No RME, AMU, MPAM, S-EL2, or RAS */
+ /* No RME, AMU, MPAM, or S-EL2 */
val &= ~(ID_AA64PFR0_EL1_RME |
ID_AA64PFR0_EL1_AMU |
ID_AA64PFR0_EL1_MPAM |
ID_AA64PFR0_EL1_SEL2 |
- ID_AA64PFR0_EL1_RAS |
ID_AA64PFR0_EL1_EL3 |
ID_AA64PFR0_EL1_EL2 |
ID_AA64PFR0_EL1_EL1 |
@@ -1683,69 +1682,21 @@ int kvm_init_nv_sysregs(struct kvm_vcpu *vcpu)
set_sysreg_masks(kvm, HFGITR2_EL2, res0, res1);
/* TCR2_EL2 */
- res0 = TCR2_EL2_RES0;
- res1 = TCR2_EL2_RES1;
- if (!kvm_has_feat(kvm, ID_AA64MMFR3_EL1, D128, IMP))
- res0 |= (TCR2_EL2_DisCH0 | TCR2_EL2_DisCH1 | TCR2_EL2_D128);
- if (!kvm_has_feat(kvm, ID_AA64MMFR3_EL1, MEC, IMP))
- res0 |= TCR2_EL2_AMEC1 | TCR2_EL2_AMEC0;
- if (!kvm_has_feat(kvm, ID_AA64MMFR1_EL1, HAFDBS, HAFT))
- res0 |= TCR2_EL2_HAFT;
- if (!kvm_has_feat(kvm, ID_AA64PFR1_EL1, THE, IMP))
- res0 |= TCR2_EL2_PTTWI | TCR2_EL2_PnCH;
- if (!kvm_has_feat(kvm, ID_AA64MMFR3_EL1, AIE, IMP))
- res0 |= TCR2_EL2_AIE;
- if (!kvm_has_s1poe(kvm))
- res0 |= TCR2_EL2_POE | TCR2_EL2_E0POE;
- if (!kvm_has_s1pie(kvm))
- res0 |= TCR2_EL2_PIE;
- if (!kvm_has_feat(kvm, ID_AA64MMFR1_EL1, VH, IMP))
- res0 |= (TCR2_EL2_E0POE | TCR2_EL2_D128 |
- TCR2_EL2_AMEC1 | TCR2_EL2_DisCH0 | TCR2_EL2_DisCH1);
+ get_reg_fixed_bits(kvm, TCR2_EL2, &res0, &res1);
set_sysreg_masks(kvm, TCR2_EL2, res0, res1);
/* SCTLR_EL1 */
- res0 = SCTLR_EL1_RES0;
- res1 = SCTLR_EL1_RES1;
- if (!kvm_has_feat(kvm, ID_AA64MMFR1_EL1, PAN, PAN3))
- res0 |= SCTLR_EL1_EPAN;
+ get_reg_fixed_bits(kvm, SCTLR_EL1, &res0, &res1);
set_sysreg_masks(kvm, SCTLR_EL1, res0, res1);
+ /* SCTLR2_ELx */
+ get_reg_fixed_bits(kvm, SCTLR2_EL1, &res0, &res1);
+ set_sysreg_masks(kvm, SCTLR2_EL1, res0, res1);
+ get_reg_fixed_bits(kvm, SCTLR2_EL2, &res0, &res1);
+ set_sysreg_masks(kvm, SCTLR2_EL2, res0, res1);
+
/* MDCR_EL2 */
- res0 = MDCR_EL2_RES0;
- res1 = MDCR_EL2_RES1;
- if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, PMUVer, IMP))
- res0 |= (MDCR_EL2_HPMN | MDCR_EL2_TPMCR |
- MDCR_EL2_TPM | MDCR_EL2_HPME);
- if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, PMSVer, IMP))
- res0 |= MDCR_EL2_E2PB | MDCR_EL2_TPMS;
- if (!kvm_has_feat(kvm, ID_AA64DFR1_EL1, SPMU, IMP))
- res0 |= MDCR_EL2_EnSPM;
- if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, PMUVer, V3P1))
- res0 |= MDCR_EL2_HPMD;
- if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, TraceFilt, IMP))
- res0 |= MDCR_EL2_TTRF;
- if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, PMUVer, V3P5))
- res0 |= MDCR_EL2_HCCD | MDCR_EL2_HLP;
- if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, TraceBuffer, IMP))
- res0 |= MDCR_EL2_E2TB;
- if (!kvm_has_feat(kvm, ID_AA64MMFR0_EL1, FGT, IMP))
- res0 |= MDCR_EL2_TDCC;
- if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, MTPMU, IMP) ||
- kvm_has_feat(kvm, ID_AA64PFR0_EL1, EL3, IMP))
- res0 |= MDCR_EL2_MTPME;
- if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, PMUVer, V3P7))
- res0 |= MDCR_EL2_HPMFZO;
- if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, PMSS, IMP))
- res0 |= MDCR_EL2_PMSSE;
- if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, PMSVer, V1P2))
- res0 |= MDCR_EL2_HPMFZS;
- if (!kvm_has_feat(kvm, ID_AA64DFR1_EL1, EBEP, IMP))
- res0 |= MDCR_EL2_PMEE;
- if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, DebugVer, V8P9))
- res0 |= MDCR_EL2_EBWE;
- if (!kvm_has_feat(kvm, ID_AA64DFR2_EL1, STEP, IMP))
- res0 |= MDCR_EL2_EnSTEPOP;
+ get_reg_fixed_bits(kvm, MDCR_EL2, &res0, &res1);
set_sysreg_masks(kvm, MDCR_EL2, res0, res1);
/* CNTHCTL_EL2 */
@@ -1802,3 +1753,43 @@ void check_nested_vcpu_requests(struct kvm_vcpu *vcpu)
if (kvm_check_request(KVM_REQ_GUEST_HYP_IRQ_PENDING, vcpu))
kvm_inject_nested_irq(vcpu);
}
+
+/*
+ * One of the many architectural bugs in FEAT_NV2 is that the guest hypervisor
+ * can write to HCR_EL2 behind our back, potentially changing the exception
+ * routing / masking for even the host context.
+ *
+ * What follows is some slop to (1) react to exception routing / masking and (2)
+ * preserve the pending SError state across translation regimes.
+ */
+void kvm_nested_flush_hwstate(struct kvm_vcpu *vcpu)
+{
+ if (!vcpu_has_nv(vcpu))
+ return;
+
+ if (unlikely(vcpu_test_and_clear_flag(vcpu, NESTED_SERROR_PENDING)))
+ kvm_inject_serror_esr(vcpu, vcpu_get_vsesr(vcpu));
+}
+
+void kvm_nested_sync_hwstate(struct kvm_vcpu *vcpu)
+{
+ unsigned long *hcr = vcpu_hcr(vcpu);
+
+ if (!vcpu_has_nv(vcpu))
+ return;
+
+ /*
+ * We previously decided that an SError was deliverable to the guest.
+ * Reap the pending state from HCR_EL2 and...
+ */
+ if (unlikely(__test_and_clear_bit(__ffs(HCR_VSE), hcr)))
+ vcpu_set_flag(vcpu, NESTED_SERROR_PENDING);
+
+ /*
+ * Re-attempt SError injection in case the deliverability has changed,
+ * which is necessary to faithfully emulate WFI the case of a pending
+ * SError being a wakeup condition.
+ */
+ if (unlikely(vcpu_test_and_clear_flag(vcpu, NESTED_SERROR_PENDING)))
+ kvm_inject_serror_esr(vcpu, vcpu_get_vsesr(vcpu));
+}
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index c20bd6f21e60..82ffb3b3b3cf 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -108,7 +108,6 @@ static bool get_el2_to_el1_mapping(unsigned int reg,
PURE_EL2_SYSREG( HACR_EL2 );
PURE_EL2_SYSREG( VTTBR_EL2 );
PURE_EL2_SYSREG( VTCR_EL2 );
- PURE_EL2_SYSREG( RVBAR_EL2 );
PURE_EL2_SYSREG( TPIDR_EL2 );
PURE_EL2_SYSREG( HPFAR_EL2 );
PURE_EL2_SYSREG( HCRX_EL2 );
@@ -144,6 +143,7 @@ static bool get_el2_to_el1_mapping(unsigned int reg,
MAPPED_EL2_SYSREG(SPSR_EL2, SPSR_EL1, NULL );
MAPPED_EL2_SYSREG(ZCR_EL2, ZCR_EL1, NULL );
MAPPED_EL2_SYSREG(CONTEXTIDR_EL2, CONTEXTIDR_EL1, NULL );
+ MAPPED_EL2_SYSREG(SCTLR2_EL2, SCTLR2_EL1, NULL );
default:
return false;
}
@@ -533,8 +533,7 @@ static bool access_gic_sre(struct kvm_vcpu *vcpu,
return ignore_write(vcpu, p);
if (p->Op1 == 4) { /* ICC_SRE_EL2 */
- p->regval = (ICC_SRE_EL2_ENABLE | ICC_SRE_EL2_SRE |
- ICC_SRE_EL1_DIB | ICC_SRE_EL1_DFB);
+ p->regval = KVM_ICC_SRE_EL2;
} else { /* ICC_SRE_EL1 */
p->regval = vcpu->arch.vgic_cpu.vgic_v3.vgic_sre;
}
@@ -773,6 +772,12 @@ static u64 reset_mpidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
return mpidr;
}
+static unsigned int hidden_visibility(const struct kvm_vcpu *vcpu,
+ const struct sys_reg_desc *r)
+{
+ return REG_HIDDEN;
+}
+
static unsigned int pmu_visibility(const struct kvm_vcpu *vcpu,
const struct sys_reg_desc *r)
{
@@ -1612,13 +1617,14 @@ static u64 __kvm_read_sanitised_id_reg(const struct kvm_vcpu *vcpu,
val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_GCS);
val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_THE);
val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MTEX);
- val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_DF2);
val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_PFAR);
val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MPAM_frac);
break;
case SYS_ID_AA64PFR2_EL1:
- /* We only expose FPMR */
- val &= ID_AA64PFR2_EL1_FPMR;
+ val &= ID_AA64PFR2_EL1_FPMR |
+ (kvm_has_mte(vcpu->kvm) ?
+ ID_AA64PFR2_EL1_MTEFAR | ID_AA64PFR2_EL1_MTESTOREONLY :
+ 0);
break;
case SYS_ID_AA64ISAR1_EL1:
if (!vcpu_has_ptrauth(vcpu))
@@ -1643,8 +1649,10 @@ static u64 __kvm_read_sanitised_id_reg(const struct kvm_vcpu *vcpu,
val &= ~ID_AA64MMFR2_EL1_NV;
break;
case SYS_ID_AA64MMFR3_EL1:
- val &= ID_AA64MMFR3_EL1_TCRX | ID_AA64MMFR3_EL1_S1POE |
- ID_AA64MMFR3_EL1_S1PIE;
+ val &= ID_AA64MMFR3_EL1_TCRX |
+ ID_AA64MMFR3_EL1_SCTLRX |
+ ID_AA64MMFR3_EL1_S1POE |
+ ID_AA64MMFR3_EL1_S1PIE;
break;
case SYS_ID_MMFR4_EL1:
val &= ~ARM64_FEATURE_MASK(ID_MMFR4_EL1_CCIDX);
@@ -1811,7 +1819,7 @@ static u64 sanitise_id_aa64pfr0_el1(const struct kvm_vcpu *vcpu, u64 val)
val |= SYS_FIELD_PREP_ENUM(ID_AA64PFR0_EL1, CSV3, IMP);
}
- if (kvm_vgic_global_state.type == VGIC_V3) {
+ if (vgic_is_v3(vcpu->kvm)) {
val &= ~ID_AA64PFR0_EL1_GIC_MASK;
val |= SYS_FIELD_PREP_ENUM(ID_AA64PFR0_EL1, GIC, IMP);
}
@@ -1953,6 +1961,14 @@ static int set_id_aa64pfr0_el1(struct kvm_vcpu *vcpu,
(vcpu_has_nv(vcpu) && !FIELD_GET(ID_AA64PFR0_EL1_EL2, user_val)))
return -EINVAL;
+ /*
+ * If we are running on a GICv5 host and support FEAT_GCIE_LEGACY, then
+ * we support GICv3. Fail attempts to do anything but set that to IMP.
+ */
+ if (vgic_is_v3_compat(vcpu->kvm) &&
+ FIELD_GET(ID_AA64PFR0_EL1_GIC_MASK, user_val) != ID_AA64PFR0_EL1_GIC_IMP)
+ return -EINVAL;
+
return set_id_reg(vcpu, rd, user_val);
}
@@ -2325,6 +2341,10 @@ static bool bad_redir_trap(struct kvm_vcpu *vcpu,
EL2_REG_FILTERED(name, acc, rst, v, el2_visibility)
#define EL2_REG_VNCR(name, rst, v) EL2_REG(name, bad_vncr_trap, rst, v)
+#define EL2_REG_VNCR_FILT(name, vis) \
+ EL2_REG_FILTERED(name, bad_vncr_trap, reset_val, 0, vis)
+#define EL2_REG_VNCR_GICv3(name) \
+ EL2_REG_VNCR_FILT(name, hidden_visibility)
#define EL2_REG_REDIR(name, rst, v) EL2_REG(name, bad_redir_trap, rst, v)
/*
@@ -2483,6 +2503,21 @@ static unsigned int vncr_el2_visibility(const struct kvm_vcpu *vcpu,
return REG_HIDDEN;
}
+static unsigned int sctlr2_visibility(const struct kvm_vcpu *vcpu,
+ const struct sys_reg_desc *rd)
+{
+ if (kvm_has_sctlr2(vcpu->kvm))
+ return 0;
+
+ return REG_HIDDEN;
+}
+
+static unsigned int sctlr2_el2_visibility(const struct kvm_vcpu *vcpu,
+ const struct sys_reg_desc *rd)
+{
+ return __el2_visibility(vcpu, rd, sctlr2_visibility);
+}
+
static bool access_zcr_el2(struct kvm_vcpu *vcpu,
struct sys_reg_params *p,
const struct sys_reg_desc *r)
@@ -2513,11 +2548,7 @@ static bool access_gic_vtr(struct kvm_vcpu *vcpu,
if (p->is_write)
return write_to_read_only(vcpu, p, r);
- p->regval = kvm_vgic_global_state.ich_vtr_el2;
- p->regval &= ~(ICH_VTR_EL2_DVIM |
- ICH_VTR_EL2_A3V |
- ICH_VTR_EL2_IDbits);
- p->regval |= ICH_VTR_EL2_nV4;
+ p->regval = kvm_get_guest_vtr_el2();
return true;
}
@@ -2588,6 +2619,26 @@ static unsigned int tcr2_el2_visibility(const struct kvm_vcpu *vcpu,
return __el2_visibility(vcpu, rd, tcr2_visibility);
}
+static unsigned int fgt2_visibility(const struct kvm_vcpu *vcpu,
+ const struct sys_reg_desc *rd)
+{
+ if (el2_visibility(vcpu, rd) == 0 &&
+ kvm_has_feat(vcpu->kvm, ID_AA64MMFR0_EL1, FGT, FGT2))
+ return 0;
+
+ return REG_HIDDEN;
+}
+
+static unsigned int fgt_visibility(const struct kvm_vcpu *vcpu,
+ const struct sys_reg_desc *rd)
+{
+ if (el2_visibility(vcpu, rd) == 0 &&
+ kvm_has_feat(vcpu->kvm, ID_AA64MMFR0_EL1, FGT, IMP))
+ return 0;
+
+ return REG_HIDDEN;
+}
+
static unsigned int s1pie_visibility(const struct kvm_vcpu *vcpu,
const struct sys_reg_desc *rd)
{
@@ -2639,6 +2690,23 @@ static bool access_mdcr(struct kvm_vcpu *vcpu,
return true;
}
+static bool access_ras(struct kvm_vcpu *vcpu,
+ struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ struct kvm *kvm = vcpu->kvm;
+
+ switch(reg_to_encoding(r)) {
+ default:
+ if (!kvm_has_feat(kvm, ID_AA64PFR0_EL1, RAS, IMP)) {
+ kvm_inject_undefined(vcpu);
+ return false;
+ }
+ }
+
+ return trap_raz_wi(vcpu, p, r);
+}
+
/*
* For historical (ahem ABI) reasons, KVM treated MIDR_EL1, REVIDR_EL1, and
* AIDR_EL1 as "invariant" registers, meaning userspace cannot change them.
@@ -2866,7 +2934,6 @@ static const struct sys_reg_desc sys_reg_descs[] = {
ID_AA64PFR0_EL1_FP)),
ID_FILTERED(ID_AA64PFR1_EL1, id_aa64pfr1_el1,
~(ID_AA64PFR1_EL1_PFAR |
- ID_AA64PFR1_EL1_DF2 |
ID_AA64PFR1_EL1_MTEX |
ID_AA64PFR1_EL1_THE |
ID_AA64PFR1_EL1_GCS |
@@ -2878,7 +2945,10 @@ static const struct sys_reg_desc sys_reg_descs[] = {
ID_AA64PFR1_EL1_MPAM_frac |
ID_AA64PFR1_EL1_RAS_frac |
ID_AA64PFR1_EL1_MTE)),
- ID_WRITABLE(ID_AA64PFR2_EL1, ID_AA64PFR2_EL1_FPMR),
+ ID_WRITABLE(ID_AA64PFR2_EL1,
+ ID_AA64PFR2_EL1_FPMR |
+ ID_AA64PFR2_EL1_MTEFAR |
+ ID_AA64PFR2_EL1_MTESTOREONLY),
ID_UNALLOCATED(4,3),
ID_WRITABLE(ID_AA64ZFR0_EL1, ~ID_AA64ZFR0_EL1_RES0),
ID_HIDDEN(ID_AA64SMFR0_EL1),
@@ -2945,6 +3015,7 @@ static const struct sys_reg_desc sys_reg_descs[] = {
ID_AA64MMFR2_EL1_NV |
ID_AA64MMFR2_EL1_CCIDX)),
ID_WRITABLE(ID_AA64MMFR3_EL1, (ID_AA64MMFR3_EL1_TCRX |
+ ID_AA64MMFR3_EL1_SCTLRX |
ID_AA64MMFR3_EL1_S1PIE |
ID_AA64MMFR3_EL1_S1POE)),
ID_WRITABLE(ID_AA64MMFR4_EL1, ID_AA64MMFR4_EL1_NV_frac),
@@ -2955,6 +3026,8 @@ static const struct sys_reg_desc sys_reg_descs[] = {
{ SYS_DESC(SYS_SCTLR_EL1), access_vm_reg, reset_val, SCTLR_EL1, 0x00C50078 },
{ SYS_DESC(SYS_ACTLR_EL1), access_actlr, reset_actlr, ACTLR_EL1 },
{ SYS_DESC(SYS_CPACR_EL1), NULL, reset_val, CPACR_EL1, 0 },
+ { SYS_DESC(SYS_SCTLR2_EL1), access_vm_reg, reset_val, SCTLR2_EL1, 0,
+ .visibility = sctlr2_visibility },
MTE_REG(RGSR_EL1),
MTE_REG(GCR_EL1),
@@ -2984,14 +3057,14 @@ static const struct sys_reg_desc sys_reg_descs[] = {
{ SYS_DESC(SYS_AFSR1_EL1), access_vm_reg, reset_unknown, AFSR1_EL1 },
{ SYS_DESC(SYS_ESR_EL1), access_vm_reg, reset_unknown, ESR_EL1 },
- { SYS_DESC(SYS_ERRIDR_EL1), trap_raz_wi },
- { SYS_DESC(SYS_ERRSELR_EL1), trap_raz_wi },
- { SYS_DESC(SYS_ERXFR_EL1), trap_raz_wi },
- { SYS_DESC(SYS_ERXCTLR_EL1), trap_raz_wi },
- { SYS_DESC(SYS_ERXSTATUS_EL1), trap_raz_wi },
- { SYS_DESC(SYS_ERXADDR_EL1), trap_raz_wi },
- { SYS_DESC(SYS_ERXMISC0_EL1), trap_raz_wi },
- { SYS_DESC(SYS_ERXMISC1_EL1), trap_raz_wi },
+ { SYS_DESC(SYS_ERRIDR_EL1), access_ras },
+ { SYS_DESC(SYS_ERRSELR_EL1), access_ras },
+ { SYS_DESC(SYS_ERXFR_EL1), access_ras },
+ { SYS_DESC(SYS_ERXCTLR_EL1), access_ras },
+ { SYS_DESC(SYS_ERXSTATUS_EL1), access_ras },
+ { SYS_DESC(SYS_ERXADDR_EL1), access_ras },
+ { SYS_DESC(SYS_ERXMISC0_EL1), access_ras },
+ { SYS_DESC(SYS_ERXMISC1_EL1), access_ras },
MTE_REG(TFSR_EL1),
MTE_REG(TFSRE0_EL1),
@@ -3302,12 +3375,14 @@ static const struct sys_reg_desc sys_reg_descs[] = {
EL2_REG_VNCR(VMPIDR_EL2, reset_unknown, 0),
EL2_REG(SCTLR_EL2, access_rw, reset_val, SCTLR_EL2_RES1),
EL2_REG(ACTLR_EL2, access_rw, reset_val, 0),
+ EL2_REG_FILTERED(SCTLR2_EL2, access_vm_reg, reset_val, 0,
+ sctlr2_el2_visibility),
EL2_REG_VNCR(HCR_EL2, reset_hcr, 0),
EL2_REG(MDCR_EL2, access_mdcr, reset_mdcr, 0),
EL2_REG(CPTR_EL2, access_rw, reset_val, CPTR_NVHE_EL2_RES1),
EL2_REG_VNCR(HSTR_EL2, reset_val, 0),
- EL2_REG_VNCR(HFGRTR_EL2, reset_val, 0),
- EL2_REG_VNCR(HFGWTR_EL2, reset_val, 0),
+ EL2_REG_VNCR_FILT(HFGRTR_EL2, fgt_visibility),
+ EL2_REG_VNCR_FILT(HFGWTR_EL2, fgt_visibility),
EL2_REG_VNCR(HFGITR_EL2, reset_val, 0),
EL2_REG_VNCR(HACR_EL2, reset_val, 0),
@@ -3327,9 +3402,14 @@ static const struct sys_reg_desc sys_reg_descs[] = {
vncr_el2_visibility),
{ SYS_DESC(SYS_DACR32_EL2), undef_access, reset_unknown, DACR32_EL2 },
- EL2_REG_VNCR(HDFGRTR_EL2, reset_val, 0),
- EL2_REG_VNCR(HDFGWTR_EL2, reset_val, 0),
- EL2_REG_VNCR(HAFGRTR_EL2, reset_val, 0),
+ EL2_REG_VNCR_FILT(HDFGRTR2_EL2, fgt2_visibility),
+ EL2_REG_VNCR_FILT(HDFGWTR2_EL2, fgt2_visibility),
+ EL2_REG_VNCR_FILT(HFGRTR2_EL2, fgt2_visibility),
+ EL2_REG_VNCR_FILT(HFGWTR2_EL2, fgt2_visibility),
+ EL2_REG_VNCR_FILT(HDFGRTR_EL2, fgt_visibility),
+ EL2_REG_VNCR_FILT(HDFGWTR_EL2, fgt_visibility),
+ EL2_REG_VNCR_FILT(HAFGRTR_EL2, fgt_visibility),
+ EL2_REG_VNCR_FILT(HFGITR2_EL2, fgt2_visibility),
EL2_REG_REDIR(SPSR_EL2, reset_val, 0),
EL2_REG_REDIR(ELR_EL2, reset_val, 0),
{ SYS_DESC(SYS_SP_EL1), access_sp_el1},
@@ -3344,6 +3424,7 @@ static const struct sys_reg_desc sys_reg_descs[] = {
EL2_REG(AFSR0_EL2, access_rw, reset_val, 0),
EL2_REG(AFSR1_EL2, access_rw, reset_val, 0),
EL2_REG_REDIR(ESR_EL2, reset_val, 0),
+ EL2_REG_VNCR(VSESR_EL2, reset_unknown, 0),
{ SYS_DESC(SYS_FPEXC32_EL2), undef_access, reset_val, FPEXC32_EL2, 0x700 },
EL2_REG_REDIR(FAR_EL2, reset_val, 0),
@@ -3370,43 +3451,44 @@ static const struct sys_reg_desc sys_reg_descs[] = {
{ SYS_DESC(SYS_MPAMVPM7_EL2), undef_access },
EL2_REG(VBAR_EL2, access_rw, reset_val, 0),
- EL2_REG(RVBAR_EL2, access_rw, reset_val, 0),
+ { SYS_DESC(SYS_RVBAR_EL2), undef_access },
{ SYS_DESC(SYS_RMR_EL2), undef_access },
+ EL2_REG_VNCR(VDISR_EL2, reset_unknown, 0),
- EL2_REG_VNCR(ICH_AP0R0_EL2, reset_val, 0),
- EL2_REG_VNCR(ICH_AP0R1_EL2, reset_val, 0),
- EL2_REG_VNCR(ICH_AP0R2_EL2, reset_val, 0),
- EL2_REG_VNCR(ICH_AP0R3_EL2, reset_val, 0),
- EL2_REG_VNCR(ICH_AP1R0_EL2, reset_val, 0),
- EL2_REG_VNCR(ICH_AP1R1_EL2, reset_val, 0),
- EL2_REG_VNCR(ICH_AP1R2_EL2, reset_val, 0),
- EL2_REG_VNCR(ICH_AP1R3_EL2, reset_val, 0),
+ EL2_REG_VNCR_GICv3(ICH_AP0R0_EL2),
+ EL2_REG_VNCR_GICv3(ICH_AP0R1_EL2),
+ EL2_REG_VNCR_GICv3(ICH_AP0R2_EL2),
+ EL2_REG_VNCR_GICv3(ICH_AP0R3_EL2),
+ EL2_REG_VNCR_GICv3(ICH_AP1R0_EL2),
+ EL2_REG_VNCR_GICv3(ICH_AP1R1_EL2),
+ EL2_REG_VNCR_GICv3(ICH_AP1R2_EL2),
+ EL2_REG_VNCR_GICv3(ICH_AP1R3_EL2),
{ SYS_DESC(SYS_ICC_SRE_EL2), access_gic_sre },
- EL2_REG_VNCR(ICH_HCR_EL2, reset_val, 0),
+ EL2_REG_VNCR_GICv3(ICH_HCR_EL2),
{ SYS_DESC(SYS_ICH_VTR_EL2), access_gic_vtr },
{ SYS_DESC(SYS_ICH_MISR_EL2), access_gic_misr },
{ SYS_DESC(SYS_ICH_EISR_EL2), access_gic_eisr },
{ SYS_DESC(SYS_ICH_ELRSR_EL2), access_gic_elrsr },
- EL2_REG_VNCR(ICH_VMCR_EL2, reset_val, 0),
-
- EL2_REG_VNCR(ICH_LR0_EL2, reset_val, 0),
- EL2_REG_VNCR(ICH_LR1_EL2, reset_val, 0),
- EL2_REG_VNCR(ICH_LR2_EL2, reset_val, 0),
- EL2_REG_VNCR(ICH_LR3_EL2, reset_val, 0),
- EL2_REG_VNCR(ICH_LR4_EL2, reset_val, 0),
- EL2_REG_VNCR(ICH_LR5_EL2, reset_val, 0),
- EL2_REG_VNCR(ICH_LR6_EL2, reset_val, 0),
- EL2_REG_VNCR(ICH_LR7_EL2, reset_val, 0),
- EL2_REG_VNCR(ICH_LR8_EL2, reset_val, 0),
- EL2_REG_VNCR(ICH_LR9_EL2, reset_val, 0),
- EL2_REG_VNCR(ICH_LR10_EL2, reset_val, 0),
- EL2_REG_VNCR(ICH_LR11_EL2, reset_val, 0),
- EL2_REG_VNCR(ICH_LR12_EL2, reset_val, 0),
- EL2_REG_VNCR(ICH_LR13_EL2, reset_val, 0),
- EL2_REG_VNCR(ICH_LR14_EL2, reset_val, 0),
- EL2_REG_VNCR(ICH_LR15_EL2, reset_val, 0),
+ EL2_REG_VNCR_GICv3(ICH_VMCR_EL2),
+
+ EL2_REG_VNCR_GICv3(ICH_LR0_EL2),
+ EL2_REG_VNCR_GICv3(ICH_LR1_EL2),
+ EL2_REG_VNCR_GICv3(ICH_LR2_EL2),
+ EL2_REG_VNCR_GICv3(ICH_LR3_EL2),
+ EL2_REG_VNCR_GICv3(ICH_LR4_EL2),
+ EL2_REG_VNCR_GICv3(ICH_LR5_EL2),
+ EL2_REG_VNCR_GICv3(ICH_LR6_EL2),
+ EL2_REG_VNCR_GICv3(ICH_LR7_EL2),
+ EL2_REG_VNCR_GICv3(ICH_LR8_EL2),
+ EL2_REG_VNCR_GICv3(ICH_LR9_EL2),
+ EL2_REG_VNCR_GICv3(ICH_LR10_EL2),
+ EL2_REG_VNCR_GICv3(ICH_LR11_EL2),
+ EL2_REG_VNCR_GICv3(ICH_LR12_EL2),
+ EL2_REG_VNCR_GICv3(ICH_LR13_EL2),
+ EL2_REG_VNCR_GICv3(ICH_LR14_EL2),
+ EL2_REG_VNCR_GICv3(ICH_LR15_EL2),
EL2_REG(CONTEXTIDR_EL2, access_rw, reset_val, 0),
EL2_REG(TPIDR_EL2, access_rw, reset_val, 0),
@@ -4275,12 +4357,12 @@ static const struct sys_reg_desc cp15_64_regs[] = {
};
static bool check_sysreg_table(const struct sys_reg_desc *table, unsigned int n,
- bool is_32)
+ bool reset_check)
{
unsigned int i;
for (i = 0; i < n; i++) {
- if (!is_32 && table[i].reg && !table[i].reset) {
+ if (reset_check && table[i].reg && !table[i].reset) {
kvm_err("sys_reg table %pS entry %d (%s) lacks reset\n",
&table[i], i, table[i].name);
return false;
@@ -4475,7 +4557,7 @@ static bool kvm_esr_cp10_id_to_sys64(u64 esr, struct sys_reg_params *params)
return true;
kvm_pr_unimpl("Unhandled cp10 register %s: %u\n",
- params->is_write ? "write" : "read", reg_id);
+ str_write_read(params->is_write), reg_id);
return false;
}
@@ -5269,18 +5351,22 @@ int kvm_finalize_sys_regs(struct kvm_vcpu *vcpu)
int __init kvm_sys_reg_table_init(void)
{
+ const struct sys_reg_desc *gicv3_regs;
bool valid = true;
- unsigned int i;
+ unsigned int i, sz;
int ret = 0;
/* Make sure tables are unique and in order. */
- valid &= check_sysreg_table(sys_reg_descs, ARRAY_SIZE(sys_reg_descs), false);
- valid &= check_sysreg_table(cp14_regs, ARRAY_SIZE(cp14_regs), true);
- valid &= check_sysreg_table(cp14_64_regs, ARRAY_SIZE(cp14_64_regs), true);
- valid &= check_sysreg_table(cp15_regs, ARRAY_SIZE(cp15_regs), true);
- valid &= check_sysreg_table(cp15_64_regs, ARRAY_SIZE(cp15_64_regs), true);
+ valid &= check_sysreg_table(sys_reg_descs, ARRAY_SIZE(sys_reg_descs), true);
+ valid &= check_sysreg_table(cp14_regs, ARRAY_SIZE(cp14_regs), false);
+ valid &= check_sysreg_table(cp14_64_regs, ARRAY_SIZE(cp14_64_regs), false);
+ valid &= check_sysreg_table(cp15_regs, ARRAY_SIZE(cp15_regs), false);
+ valid &= check_sysreg_table(cp15_64_regs, ARRAY_SIZE(cp15_64_regs), false);
valid &= check_sysreg_table(sys_insn_descs, ARRAY_SIZE(sys_insn_descs), false);
+ gicv3_regs = vgic_v3_get_sysreg_table(&sz);
+ valid &= check_sysreg_table(gicv3_regs, sz, false);
+
if (!valid)
return -EINVAL;
diff --git a/arch/arm64/kvm/sys_regs.h b/arch/arm64/kvm/sys_regs.h
index ef97d9fc67cc..317abc490368 100644
--- a/arch/arm64/kvm/sys_regs.h
+++ b/arch/arm64/kvm/sys_regs.h
@@ -108,7 +108,7 @@ inline void print_sys_reg_msg(const struct sys_reg_params *p,
/* Look, we even formatted it for you to paste into the table! */
kvm_pr_unimpl("%pV { Op0(%2u), Op1(%2u), CRn(%2u), CRm(%2u), Op2(%2u), func_%s },\n",
&(struct va_format){ fmt, &va },
- p->Op0, p->Op1, p->CRn, p->CRm, p->Op2, p->is_write ? "write" : "read");
+ p->Op0, p->Op1, p->CRn, p->CRm, p->Op2, str_write_read(p->is_write));
va_end(va);
}
diff --git a/arch/arm64/kvm/trace_handle_exit.h b/arch/arm64/kvm/trace_handle_exit.h
index f85415db7713..a7ab9a3bbed0 100644
--- a/arch/arm64/kvm/trace_handle_exit.h
+++ b/arch/arm64/kvm/trace_handle_exit.h
@@ -113,7 +113,7 @@ TRACE_EVENT(kvm_sys_access,
__entry->vcpu_pc, __entry->name ?: "UNKN",
__entry->Op0, __entry->Op1, __entry->CRn,
__entry->CRm, __entry->Op2,
- __entry->is_write ? "write" : "read")
+ str_write_read(__entry->is_write))
);
TRACE_EVENT(kvm_set_guest_debug,
diff --git a/arch/arm64/kvm/vgic-sys-reg-v3.c b/arch/arm64/kvm/vgic-sys-reg-v3.c
index 5eacb4b3250a..bdc2d57370b2 100644
--- a/arch/arm64/kvm/vgic-sys-reg-v3.c
+++ b/arch/arm64/kvm/vgic-sys-reg-v3.c
@@ -297,6 +297,91 @@ static int get_gic_sre(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
return 0;
}
+static int set_gic_ich_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
+ u64 val)
+{
+ __vcpu_assign_sys_reg(vcpu, r->reg, val);
+ return 0;
+}
+
+static int get_gic_ich_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
+ u64 *val)
+{
+ *val = __vcpu_sys_reg(vcpu, r->reg);
+ return 0;
+}
+
+static int set_gic_ich_apr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
+ u64 val)
+{
+ u8 idx = r->Op2 & 3;
+
+ if (idx > vgic_v3_max_apr_idx(vcpu))
+ return -EINVAL;
+
+ return set_gic_ich_reg(vcpu, r, val);
+}
+
+static int get_gic_ich_apr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
+ u64 *val)
+{
+ u8 idx = r->Op2 & 3;
+
+ if (idx > vgic_v3_max_apr_idx(vcpu))
+ return -EINVAL;
+
+ return get_gic_ich_reg(vcpu, r, val);
+}
+
+static int set_gic_icc_sre(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
+ u64 val)
+{
+ if (val != KVM_ICC_SRE_EL2)
+ return -EINVAL;
+ return 0;
+}
+
+static int get_gic_icc_sre(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
+ u64 *val)
+{
+ *val = KVM_ICC_SRE_EL2;
+ return 0;
+}
+
+static int set_gic_ich_vtr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
+ u64 val)
+{
+ if (val != kvm_get_guest_vtr_el2())
+ return -EINVAL;
+ return 0;
+}
+
+static int get_gic_ich_vtr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
+ u64 *val)
+{
+ *val = kvm_get_guest_vtr_el2();
+ return 0;
+}
+
+static unsigned int el2_visibility(const struct kvm_vcpu *vcpu,
+ const struct sys_reg_desc *rd)
+{
+ return vcpu_has_nv(vcpu) ? 0 : REG_HIDDEN;
+}
+
+#define __EL2_REG(r, acc, i) \
+ { \
+ SYS_DESC(SYS_ ## r), \
+ .get_user = get_gic_ ## acc, \
+ .set_user = set_gic_ ## acc, \
+ .reg = i, \
+ .visibility = el2_visibility, \
+ }
+
+#define EL2_REG(r, acc) __EL2_REG(r, acc, r)
+
+#define EL2_REG_RO(r, acc) __EL2_REG(r, acc, 0)
+
static const struct sys_reg_desc gic_v3_icc_reg_descs[] = {
{ SYS_DESC(SYS_ICC_PMR_EL1),
.set_user = set_gic_pmr, .get_user = get_gic_pmr, },
@@ -328,8 +413,42 @@ static const struct sys_reg_desc gic_v3_icc_reg_descs[] = {
.set_user = set_gic_grpen0, .get_user = get_gic_grpen0, },
{ SYS_DESC(SYS_ICC_IGRPEN1_EL1),
.set_user = set_gic_grpen1, .get_user = get_gic_grpen1, },
+ EL2_REG(ICH_AP0R0_EL2, ich_apr),
+ EL2_REG(ICH_AP0R1_EL2, ich_apr),
+ EL2_REG(ICH_AP0R2_EL2, ich_apr),
+ EL2_REG(ICH_AP0R3_EL2, ich_apr),
+ EL2_REG(ICH_AP1R0_EL2, ich_apr),
+ EL2_REG(ICH_AP1R1_EL2, ich_apr),
+ EL2_REG(ICH_AP1R2_EL2, ich_apr),
+ EL2_REG(ICH_AP1R3_EL2, ich_apr),
+ EL2_REG_RO(ICC_SRE_EL2, icc_sre),
+ EL2_REG(ICH_HCR_EL2, ich_reg),
+ EL2_REG_RO(ICH_VTR_EL2, ich_vtr),
+ EL2_REG(ICH_VMCR_EL2, ich_reg),
+ EL2_REG(ICH_LR0_EL2, ich_reg),
+ EL2_REG(ICH_LR1_EL2, ich_reg),
+ EL2_REG(ICH_LR2_EL2, ich_reg),
+ EL2_REG(ICH_LR3_EL2, ich_reg),
+ EL2_REG(ICH_LR4_EL2, ich_reg),
+ EL2_REG(ICH_LR5_EL2, ich_reg),
+ EL2_REG(ICH_LR6_EL2, ich_reg),
+ EL2_REG(ICH_LR7_EL2, ich_reg),
+ EL2_REG(ICH_LR8_EL2, ich_reg),
+ EL2_REG(ICH_LR9_EL2, ich_reg),
+ EL2_REG(ICH_LR10_EL2, ich_reg),
+ EL2_REG(ICH_LR11_EL2, ich_reg),
+ EL2_REG(ICH_LR12_EL2, ich_reg),
+ EL2_REG(ICH_LR13_EL2, ich_reg),
+ EL2_REG(ICH_LR14_EL2, ich_reg),
+ EL2_REG(ICH_LR15_EL2, ich_reg),
};
+const struct sys_reg_desc *vgic_v3_get_sysreg_table(unsigned int *sz)
+{
+ *sz = ARRAY_SIZE(gic_v3_icc_reg_descs);
+ return gic_v3_icc_reg_descs;
+}
+
static u64 attr_to_id(u64 attr)
{
return ARM64_SYS_REG(FIELD_GET(KVM_REG_ARM_VGIC_SYSREG_OP0_MASK, attr),
@@ -341,8 +460,12 @@ static u64 attr_to_id(u64 attr)
int vgic_v3_has_cpu_sysregs_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
{
- if (get_reg_by_id(attr_to_id(attr->attr), gic_v3_icc_reg_descs,
- ARRAY_SIZE(gic_v3_icc_reg_descs)))
+ const struct sys_reg_desc *r;
+
+ r = get_reg_by_id(attr_to_id(attr->attr), gic_v3_icc_reg_descs,
+ ARRAY_SIZE(gic_v3_icc_reg_descs));
+
+ if (r && !sysreg_hidden(vcpu, r))
return 0;
return -ENXIO;
diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c
index eb1205654ac8..1e680ad6e863 100644
--- a/arch/arm64/kvm/vgic/vgic-init.c
+++ b/arch/arm64/kvm/vgic/vgic-init.c
@@ -157,6 +157,7 @@ int kvm_vgic_create(struct kvm *kvm, u32 type)
kvm->arch.vgic.in_kernel = true;
kvm->arch.vgic.vgic_model = type;
+ kvm->arch.vgic.implementation_rev = KVM_VGIC_IMP_REV_LATEST;
kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF;
@@ -165,6 +166,9 @@ int kvm_vgic_create(struct kvm *kvm, u32 type)
else
INIT_LIST_HEAD(&kvm->arch.vgic.rd_regions);
+ if (type == KVM_DEV_TYPE_ARM_VGIC_V3)
+ kvm->arch.vgic.nassgicap = system_supports_direct_sgis();
+
out_unlock:
mutex_unlock(&kvm->arch.config_lock);
kvm_unlock_all_vcpus(kvm);
@@ -391,11 +395,10 @@ int vgic_init(struct kvm *kvm)
goto out;
/*
- * If we have GICv4.1 enabled, unconditionally request enable the
- * v4 support so that we get HW-accelerated vSGIs. Otherwise, only
- * enable it if we present a virtual ITS to the guest.
+ * Ensure vPEs are allocated if direct IRQ injection (e.g. vSGIs,
+ * vLPIs) is supported.
*/
- if (vgic_supports_direct_msis(kvm)) {
+ if (vgic_supports_direct_irqs(kvm)) {
ret = vgic_v4_init(kvm);
if (ret)
goto out;
@@ -409,15 +412,7 @@ int vgic_init(struct kvm *kvm)
goto out;
vgic_debug_init(kvm);
-
- /*
- * If userspace didn't set the GIC implementation revision,
- * default to the latest and greatest. You know want it.
- */
- if (!dist->implementation_rev)
- dist->implementation_rev = KVM_VGIC_IMP_REV_LATEST;
dist->initialized = true;
-
out:
return ret;
}
@@ -443,7 +438,7 @@ static void kvm_vgic_dist_destroy(struct kvm *kvm)
dist->vgic_cpu_base = VGIC_ADDR_UNDEF;
}
- if (vgic_supports_direct_msis(kvm))
+ if (vgic_supports_direct_irqs(kvm))
vgic_v4_teardown(kvm);
xa_destroy(&dist->lpi_xa);
@@ -674,10 +669,12 @@ void kvm_vgic_init_cpu_hardware(void)
* We want to make sure the list registers start out clear so that we
* only have the program the used registers.
*/
- if (kvm_vgic_global_state.type == VGIC_V2)
+ if (kvm_vgic_global_state.type == VGIC_V2) {
vgic_v2_init_lrs();
- else
+ } else if (kvm_vgic_global_state.type == VGIC_V3 ||
+ kvm_vgic_global_state.has_gcie_v3_compat) {
kvm_call_hyp(__vgic_v3_init_lrs);
+ }
}
/**
@@ -722,6 +719,9 @@ int kvm_vgic_hyp_init(void)
kvm_info("GIC system register CPU interface enabled\n");
}
break;
+ case GIC_V5:
+ ret = vgic_v5_probe(gic_kvm_info);
+ break;
default:
ret = -ENODEV;
}
diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c
index 534049c7c94b..7368c13f16b7 100644
--- a/arch/arm64/kvm/vgic/vgic-its.c
+++ b/arch/arm64/kvm/vgic/vgic-its.c
@@ -758,7 +758,7 @@ static void its_free_ite(struct kvm *kvm, struct its_ite *ite)
if (irq) {
scoped_guard(raw_spinlock_irqsave, &irq->irq_lock) {
if (irq->hw)
- WARN_ON(its_unmap_vlpi(ite->irq->host_irq));
+ its_unmap_vlpi(ite->irq->host_irq);
irq->hw = false;
}
@@ -2694,6 +2694,9 @@ static int vgic_its_ctrl(struct kvm *kvm, struct vgic_its *its, u64 attr)
case KVM_DEV_ARM_ITS_RESTORE_TABLES:
ret = abi->restore_tables(its);
break;
+ default:
+ ret = -ENXIO;
+ break;
}
mutex_unlock(&its->its_lock);
diff --git a/arch/arm64/kvm/vgic/vgic-kvm-device.c b/arch/arm64/kvm/vgic/vgic-kvm-device.c
index f9ae790163fb..3d1a776b716d 100644
--- a/arch/arm64/kvm/vgic/vgic-kvm-device.c
+++ b/arch/arm64/kvm/vgic/vgic-kvm-device.c
@@ -5,6 +5,7 @@
* Copyright (C) 2015 ARM Ltd.
* Author: Marc Zyngier <marc.zyngier@arm.com>
*/
+#include <linux/irqchip/arm-gic-v3.h>
#include <linux/kvm_host.h>
#include <kvm/arm_vgic.h>
#include <linux/uaccess.h>
@@ -303,12 +304,6 @@ static int vgic_get_common_attr(struct kvm_device *dev,
VGIC_NR_PRIVATE_IRQS, uaddr);
break;
}
- case KVM_DEV_ARM_VGIC_GRP_MAINT_IRQ: {
- u32 __user *uaddr = (u32 __user *)(long)attr->addr;
-
- r = put_user(dev->kvm->arch.vgic.mi_intid, uaddr);
- break;
- }
}
return r;
@@ -510,6 +505,24 @@ int vgic_v3_parse_attr(struct kvm_device *dev, struct kvm_device_attr *attr,
}
/*
+ * Allow access to certain ID-like registers prior to VGIC initialization,
+ * thereby allowing the VMM to provision the features / sizing of the VGIC.
+ */
+static bool reg_allowed_pre_init(struct kvm_device_attr *attr)
+{
+ if (attr->group != KVM_DEV_ARM_VGIC_GRP_DIST_REGS)
+ return false;
+
+ switch (attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK) {
+ case GICD_IIDR:
+ case GICD_TYPER2:
+ return true;
+ default:
+ return false;
+ }
+}
+
+/*
* vgic_v3_attr_regs_access - allows user space to access VGIC v3 state
*
* @dev: kvm device handle
@@ -523,7 +536,7 @@ static int vgic_v3_attr_regs_access(struct kvm_device *dev,
struct vgic_reg_attr reg_attr;
gpa_t addr;
struct kvm_vcpu *vcpu;
- bool uaccess, post_init = true;
+ bool uaccess;
u32 val;
int ret;
@@ -539,9 +552,6 @@ static int vgic_v3_attr_regs_access(struct kvm_device *dev,
/* Sysregs uaccess is performed by the sysreg handling code */
uaccess = false;
break;
- case KVM_DEV_ARM_VGIC_GRP_MAINT_IRQ:
- post_init = false;
- fallthrough;
default:
uaccess = true;
}
@@ -561,7 +571,7 @@ static int vgic_v3_attr_regs_access(struct kvm_device *dev,
mutex_lock(&dev->kvm->arch.config_lock);
- if (post_init != vgic_initialized(dev->kvm)) {
+ if (!(vgic_initialized(dev->kvm) || reg_allowed_pre_init(attr))) {
ret = -EBUSY;
goto out;
}
@@ -591,19 +601,6 @@ static int vgic_v3_attr_regs_access(struct kvm_device *dev,
}
break;
}
- case KVM_DEV_ARM_VGIC_GRP_MAINT_IRQ:
- if (!is_write) {
- val = dev->kvm->arch.vgic.mi_intid;
- ret = 0;
- break;
- }
-
- ret = -EINVAL;
- if ((val < VGIC_NR_PRIVATE_IRQS) && (val >= VGIC_NR_SGIS)) {
- dev->kvm->arch.vgic.mi_intid = val;
- ret = 0;
- }
- break;
default:
ret = -EINVAL;
break;
@@ -630,8 +627,24 @@ static int vgic_v3_set_attr(struct kvm_device *dev,
case KVM_DEV_ARM_VGIC_GRP_REDIST_REGS:
case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS:
case KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO:
- case KVM_DEV_ARM_VGIC_GRP_MAINT_IRQ:
return vgic_v3_attr_regs_access(dev, attr, true);
+ case KVM_DEV_ARM_VGIC_GRP_MAINT_IRQ: {
+ u32 __user *uaddr = (u32 __user *)attr->addr;
+ u32 val;
+
+ if (get_user(val, uaddr))
+ return -EFAULT;
+
+ guard(mutex)(&dev->kvm->arch.config_lock);
+ if (vgic_initialized(dev->kvm))
+ return -EBUSY;
+
+ if (!irq_is_ppi(val))
+ return -EINVAL;
+
+ dev->kvm->arch.vgic.mi_intid = val;
+ return 0;
+ }
default:
return vgic_set_common_attr(dev, attr);
}
@@ -645,8 +658,13 @@ static int vgic_v3_get_attr(struct kvm_device *dev,
case KVM_DEV_ARM_VGIC_GRP_REDIST_REGS:
case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS:
case KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO:
- case KVM_DEV_ARM_VGIC_GRP_MAINT_IRQ:
return vgic_v3_attr_regs_access(dev, attr, false);
+ case KVM_DEV_ARM_VGIC_GRP_MAINT_IRQ: {
+ u32 __user *uaddr = (u32 __user *)(long)attr->addr;
+
+ guard(mutex)(&dev->kvm->arch.config_lock);
+ return put_user(dev->kvm->arch.vgic.mi_intid, uaddr);
+ }
default:
return vgic_get_common_attr(dev, attr);
}
diff --git a/arch/arm64/kvm/vgic/vgic-mmio-v3.c b/arch/arm64/kvm/vgic/vgic-mmio-v3.c
index ae4c0593d114..a3ef185209e9 100644
--- a/arch/arm64/kvm/vgic/vgic-mmio-v3.c
+++ b/arch/arm64/kvm/vgic/vgic-mmio-v3.c
@@ -50,8 +50,17 @@ bool vgic_has_its(struct kvm *kvm)
bool vgic_supports_direct_msis(struct kvm *kvm)
{
- return (kvm_vgic_global_state.has_gicv4_1 ||
- (kvm_vgic_global_state.has_gicv4 && vgic_has_its(kvm)));
+ return kvm_vgic_global_state.has_gicv4 && vgic_has_its(kvm);
+}
+
+bool system_supports_direct_sgis(void)
+{
+ return kvm_vgic_global_state.has_gicv4_1 && gic_cpuif_has_vsgi();
+}
+
+bool vgic_supports_direct_sgis(struct kvm *kvm)
+{
+ return kvm->arch.vgic.nassgicap;
}
/*
@@ -86,7 +95,7 @@ static unsigned long vgic_mmio_read_v3_misc(struct kvm_vcpu *vcpu,
}
break;
case GICD_TYPER2:
- if (kvm_vgic_global_state.has_gicv4_1 && gic_cpuif_has_vsgi())
+ if (vgic_supports_direct_sgis(vcpu->kvm))
value = GICD_TYPER2_nASSGIcap;
break;
case GICD_IIDR:
@@ -119,7 +128,7 @@ static void vgic_mmio_write_v3_misc(struct kvm_vcpu *vcpu,
dist->enabled = val & GICD_CTLR_ENABLE_SS_G1;
/* Not a GICv4.1? No HW SGIs */
- if (!kvm_vgic_global_state.has_gicv4_1 || !gic_cpuif_has_vsgi())
+ if (!vgic_supports_direct_sgis(vcpu->kvm))
val &= ~GICD_CTLR_nASSGIreq;
/* Dist stays enabled? nASSGIreq is RO */
@@ -133,7 +142,7 @@ static void vgic_mmio_write_v3_misc(struct kvm_vcpu *vcpu,
if (is_hwsgi != dist->nassgireq)
vgic_v4_configure_vsgis(vcpu->kvm);
- if (kvm_vgic_global_state.has_gicv4_1 &&
+ if (vgic_supports_direct_sgis(vcpu->kvm) &&
was_enabled != dist->enabled)
kvm_make_all_cpus_request(vcpu->kvm, KVM_REQ_RELOAD_GICv4);
else if (!was_enabled && dist->enabled)
@@ -159,8 +168,18 @@ static int vgic_mmio_uaccess_write_v3_misc(struct kvm_vcpu *vcpu,
switch (addr & 0x0c) {
case GICD_TYPER2:
- if (val != vgic_mmio_read_v3_misc(vcpu, addr, len))
+ reg = vgic_mmio_read_v3_misc(vcpu, addr, len);
+
+ if (reg == val)
+ return 0;
+ if (vgic_initialized(vcpu->kvm))
+ return -EBUSY;
+ if ((reg ^ val) & ~GICD_TYPER2_nASSGIcap)
return -EINVAL;
+ if (!system_supports_direct_sgis() && val)
+ return -EINVAL;
+
+ dist->nassgicap = val & GICD_TYPER2_nASSGIcap;
return 0;
case GICD_IIDR:
reg = vgic_mmio_read_v3_misc(vcpu, addr, len);
@@ -178,7 +197,7 @@ static int vgic_mmio_uaccess_write_v3_misc(struct kvm_vcpu *vcpu,
}
case GICD_CTLR:
/* Not a GICv4.1? No HW SGIs */
- if (!kvm_vgic_global_state.has_gicv4_1)
+ if (!vgic_supports_direct_sgis(vcpu->kvm))
val &= ~GICD_CTLR_nASSGIreq;
dist->enabled = val & GICD_CTLR_ENABLE_SS_G1;
diff --git a/arch/arm64/kvm/vgic/vgic-v3-nested.c b/arch/arm64/kvm/vgic/vgic-v3-nested.c
index 679aafe77de2..7f1259b49c50 100644
--- a/arch/arm64/kvm/vgic/vgic-v3-nested.c
+++ b/arch/arm64/kvm/vgic/vgic-v3-nested.c
@@ -116,7 +116,7 @@ bool vgic_state_is_nested(struct kvm_vcpu *vcpu)
{
u64 xmo;
- if (vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu)) {
+ if (is_nested_ctxt(vcpu)) {
xmo = __vcpu_sys_reg(vcpu, HCR_EL2) & (HCR_IMO | HCR_FMO);
WARN_ONCE(xmo && xmo != (HCR_IMO | HCR_FMO),
"Separate virtual IRQ/FIQ settings not supported\n");
diff --git a/arch/arm64/kvm/vgic/vgic-v4.c b/arch/arm64/kvm/vgic/vgic-v4.c
index 193946108192..4d9343d2b0b1 100644
--- a/arch/arm64/kvm/vgic/vgic-v4.c
+++ b/arch/arm64/kvm/vgic/vgic-v4.c
@@ -356,7 +356,7 @@ int vgic_v4_put(struct kvm_vcpu *vcpu)
{
struct its_vpe *vpe = &vcpu->arch.vgic_cpu.vgic_v3.its_vpe;
- if (!vgic_supports_direct_msis(vcpu->kvm) || !vpe->resident)
+ if (!vgic_supports_direct_irqs(vcpu->kvm) || !vpe->resident)
return 0;
return its_make_vpe_non_resident(vpe, vgic_v4_want_doorbell(vcpu));
@@ -367,7 +367,7 @@ int vgic_v4_load(struct kvm_vcpu *vcpu)
struct its_vpe *vpe = &vcpu->arch.vgic_cpu.vgic_v3.its_vpe;
int err;
- if (!vgic_supports_direct_msis(vcpu->kvm) || vpe->resident)
+ if (!vgic_supports_direct_irqs(vcpu->kvm) || vpe->resident)
return 0;
if (vcpu_get_flag(vcpu, IN_WFI))
@@ -527,28 +527,26 @@ static struct vgic_irq *__vgic_host_irq_get_vlpi(struct kvm *kvm, int host_irq)
return NULL;
}
-int kvm_vgic_v4_unset_forwarding(struct kvm *kvm, int host_irq)
+void kvm_vgic_v4_unset_forwarding(struct kvm *kvm, int host_irq)
{
struct vgic_irq *irq;
unsigned long flags;
- int ret = 0;
if (!vgic_supports_direct_msis(kvm))
- return 0;
+ return;
irq = __vgic_host_irq_get_vlpi(kvm, host_irq);
if (!irq)
- return 0;
+ return;
raw_spin_lock_irqsave(&irq->irq_lock, flags);
WARN_ON(irq->hw && irq->host_irq != host_irq);
if (irq->hw) {
atomic_dec(&irq->target_vcpu->arch.vgic_cpu.vgic_v3.its_vpe.vlpi_count);
irq->hw = false;
- ret = its_unmap_vlpi(host_irq);
+ its_unmap_vlpi(host_irq);
}
raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
vgic_put_irq(kvm, irq);
- return ret;
}
diff --git a/arch/arm64/kvm/vgic/vgic-v5.c b/arch/arm64/kvm/vgic/vgic-v5.c
new file mode 100644
index 000000000000..6bdbb221bcde
--- /dev/null
+++ b/arch/arm64/kvm/vgic/vgic-v5.c
@@ -0,0 +1,52 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <kvm/arm_vgic.h>
+#include <linux/irqchip/arm-vgic-info.h>
+
+#include "vgic.h"
+
+/*
+ * Probe for a vGICv5 compatible interrupt controller, returning 0 on success.
+ * Currently only supports GICv3-based VMs on a GICv5 host, and hence only
+ * registers a VGIC_V3 device.
+ */
+int vgic_v5_probe(const struct gic_kvm_info *info)
+{
+ u64 ich_vtr_el2;
+ int ret;
+
+ if (!info->has_gcie_v3_compat)
+ return -ENODEV;
+
+ kvm_vgic_global_state.type = VGIC_V5;
+ kvm_vgic_global_state.has_gcie_v3_compat = true;
+
+ /* We only support v3 compat mode - use vGICv3 limits */
+ kvm_vgic_global_state.max_gic_vcpus = VGIC_V3_MAX_CPUS;
+
+ kvm_vgic_global_state.vcpu_base = 0;
+ kvm_vgic_global_state.vctrl_base = NULL;
+ kvm_vgic_global_state.can_emulate_gicv2 = false;
+ kvm_vgic_global_state.has_gicv4 = false;
+ kvm_vgic_global_state.has_gicv4_1 = false;
+
+ ich_vtr_el2 = kvm_call_hyp_ret(__vgic_v3_get_gic_config);
+ kvm_vgic_global_state.ich_vtr_el2 = (u32)ich_vtr_el2;
+
+ /*
+ * The ListRegs field is 5 bits, but there is an architectural
+ * maximum of 16 list registers. Just ignore bit 4...
+ */
+ kvm_vgic_global_state.nr_lr = (ich_vtr_el2 & 0xf) + 1;
+
+ ret = kvm_register_vgic_device(KVM_DEV_TYPE_ARM_VGIC_V3);
+ if (ret) {
+ kvm_err("Cannot register GICv3-legacy KVM device.\n");
+ return ret;
+ }
+
+ static_branch_enable(&kvm_vgic_global_state.gicv3_cpuif);
+ kvm_info("GCIE legacy system register CPU interface\n");
+
+ return 0;
+}
diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c
index 8f8096d48925..f5148b38120a 100644
--- a/arch/arm64/kvm/vgic/vgic.c
+++ b/arch/arm64/kvm/vgic/vgic.c
@@ -951,7 +951,7 @@ void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
* can be directly injected (GICv4).
*/
if (list_empty(&vcpu->arch.vgic_cpu.ap_list_head) &&
- !vgic_supports_direct_msis(vcpu->kvm))
+ !vgic_supports_direct_irqs(vcpu->kvm))
return;
DEBUG_SPINLOCK_BUG_ON(!irqs_disabled());
@@ -965,7 +965,7 @@ void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
if (can_access_vgic_from_kernel())
vgic_restore_state(vcpu);
- if (vgic_supports_direct_msis(vcpu->kvm))
+ if (vgic_supports_direct_irqs(vcpu->kvm))
vgic_v4_commit(vcpu);
}
diff --git a/arch/arm64/kvm/vgic/vgic.h b/arch/arm64/kvm/vgic/vgic.h
index 4349084cb9a6..1384a04c0784 100644
--- a/arch/arm64/kvm/vgic/vgic.h
+++ b/arch/arm64/kvm/vgic/vgic.h
@@ -64,6 +64,24 @@
KVM_REG_ARM_VGIC_SYSREG_CRM_MASK | \
KVM_REG_ARM_VGIC_SYSREG_OP2_MASK)
+#define KVM_ICC_SRE_EL2 (ICC_SRE_EL2_ENABLE | ICC_SRE_EL2_SRE | \
+ ICC_SRE_EL1_DIB | ICC_SRE_EL1_DFB)
+#define KVM_ICH_VTR_EL2_RES0 (ICH_VTR_EL2_DVIM | \
+ ICH_VTR_EL2_A3V | \
+ ICH_VTR_EL2_IDbits)
+#define KVM_ICH_VTR_EL2_RES1 ICH_VTR_EL2_nV4
+
+static inline u64 kvm_get_guest_vtr_el2(void)
+{
+ u64 vtr;
+
+ vtr = kvm_vgic_global_state.ich_vtr_el2;
+ vtr &= ~KVM_ICH_VTR_EL2_RES0;
+ vtr |= KVM_ICH_VTR_EL2_RES1;
+
+ return vtr;
+}
+
/*
* As per Documentation/virt/kvm/devices/arm-vgic-its.rst,
* below macros are defined for ITS table entry encoding.
@@ -297,6 +315,7 @@ int vgic_v3_redist_uaccess(struct kvm_vcpu *vcpu, bool is_write,
int vgic_v3_cpu_sysregs_uaccess(struct kvm_vcpu *vcpu,
struct kvm_device_attr *attr, bool is_write);
int vgic_v3_has_cpu_sysregs_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr);
+const struct sys_reg_desc *vgic_v3_get_sysreg_table(unsigned int *sz);
int vgic_v3_line_level_info_uaccess(struct kvm_vcpu *vcpu, bool is_write,
u32 intid, u32 *val);
int kvm_register_vgic_device(unsigned long type);
@@ -308,6 +327,8 @@ int vgic_init(struct kvm *kvm);
void vgic_debug_init(struct kvm *kvm);
void vgic_debug_destroy(struct kvm *kvm);
+int vgic_v5_probe(const struct gic_kvm_info *info);
+
static inline int vgic_v3_max_apr_idx(struct kvm_vcpu *vcpu)
{
struct vgic_cpu *cpu_if = &vcpu->arch.vgic_cpu;
@@ -369,7 +390,23 @@ void vgic_its_invalidate_all_caches(struct kvm *kvm);
int vgic_its_inv_lpi(struct kvm *kvm, struct vgic_irq *irq);
int vgic_its_invall(struct kvm_vcpu *vcpu);
+bool system_supports_direct_sgis(void);
bool vgic_supports_direct_msis(struct kvm *kvm);
+bool vgic_supports_direct_sgis(struct kvm *kvm);
+
+static inline bool vgic_supports_direct_irqs(struct kvm *kvm)
+{
+ /*
+ * Deliberately conflate vLPI and vSGI support on GICv4.1 hardware,
+ * indirectly allowing userspace to control whether or not vPEs are
+ * allocated for the VM.
+ */
+ if (system_supports_direct_sgis())
+ return vgic_supports_direct_sgis(kvm);
+
+ return vgic_supports_direct_msis(kvm);
+}
+
int vgic_v4_init(struct kvm *kvm);
void vgic_v4_teardown(struct kvm *kvm);
void vgic_v4_configure_vsgis(struct kvm *kvm);
@@ -389,6 +426,17 @@ void vgic_v3_put_nested(struct kvm_vcpu *vcpu);
void vgic_v3_handle_nested_maint_irq(struct kvm_vcpu *vcpu);
void vgic_v3_nested_update_mi(struct kvm_vcpu *vcpu);
+static inline bool vgic_is_v3_compat(struct kvm *kvm)
+{
+ return cpus_have_final_cap(ARM64_HAS_GICV5_CPUIF) &&
+ kvm_vgic_global_state.has_gcie_v3_compat;
+}
+
+static inline bool vgic_is_v3(struct kvm *kvm)
+{
+ return kvm_vgic_global_state.type == VGIC_V3 || vgic_is_v3_compat(kvm);
+}
+
int vgic_its_debug_init(struct kvm_device *dev);
void vgic_its_debug_destroy(struct kvm_device *dev);
diff --git a/arch/arm64/mm/contpte.c b/arch/arm64/mm/contpte.c
index bcac4f55f9c1..c0557945939c 100644
--- a/arch/arm64/mm/contpte.c
+++ b/arch/arm64/mm/contpte.c
@@ -68,7 +68,144 @@ static void contpte_convert(struct mm_struct *mm, unsigned long addr,
pte = pte_mkyoung(pte);
}
- __flush_tlb_range(&vma, start_addr, addr, PAGE_SIZE, true, 3);
+ /*
+ * On eliding the __tlb_flush_range() under BBML2+noabort:
+ *
+ * NOTE: Instead of using N=16 as the contiguous block length, we use
+ * N=4 for clarity.
+ *
+ * NOTE: 'n' and 'c' are used to denote the "contiguous bit" being
+ * unset and set, respectively.
+ *
+ * We worry about two cases where contiguous bit is used:
+ * - When folding N smaller non-contiguous ptes as 1 contiguous block.
+ * - When unfolding a contiguous block into N smaller non-contiguous ptes.
+ *
+ * Currently, the BBML0 folding case looks as follows:
+ *
+ * 0) Initial page-table layout:
+ *
+ * +----+----+----+----+
+ * |RO,n|RO,n|RO,n|RW,n| <--- last page being set as RO
+ * +----+----+----+----+
+ *
+ * 1) Aggregate AF + dirty flags using __ptep_get_and_clear():
+ *
+ * +----+----+----+----+
+ * | 0 | 0 | 0 | 0 |
+ * +----+----+----+----+
+ *
+ * 2) __flush_tlb_range():
+ *
+ * |____ tlbi + dsb ____|
+ *
+ * 3) __set_ptes() to repaint contiguous block:
+ *
+ * +----+----+----+----+
+ * |RO,c|RO,c|RO,c|RO,c|
+ * +----+----+----+----+
+ *
+ * 4) The kernel will eventually __flush_tlb() for changed page:
+ *
+ * |____| <--- tlbi + dsb
+ *
+ * As expected, the intermediate tlbi+dsb ensures that other PEs
+ * only ever see an invalid (0) entry, or the new contiguous TLB entry.
+ * The final tlbi+dsb will always throw away the newly installed
+ * contiguous TLB entry, which is a micro-optimisation opportunity,
+ * but does not affect correctness.
+ *
+ * In the BBML2 case, the change is avoiding the intermediate tlbi+dsb.
+ * This means a few things, but notably other PEs will still "see" any
+ * stale cached TLB entries. This could lead to a "contiguous bit
+ * misprogramming" issue until the final tlbi+dsb of the changed page,
+ * which would clear out both the stale (RW,n) entry and the new (RO,c)
+ * contiguous entry installed in its place.
+ *
+ * What this is saying, is the following:
+ *
+ * +----+----+----+----+
+ * |RO,n|RO,n|RO,n|RW,n| <--- old page tables, all non-contiguous
+ * +----+----+----+----+
+ *
+ * +----+----+----+----+
+ * |RO,c|RO,c|RO,c|RO,c| <--- new page tables, all contiguous
+ * +----+----+----+----+
+ * /\
+ * ||
+ *
+ * If both the old single (RW,n) and new contiguous (RO,c) TLB entries
+ * are present, and a write is made to this address, do we fault or
+ * is the write permitted (via amalgamation)?
+ *
+ * The relevant Arm ARM DDI 0487L.a requirements are RNGLXZ and RJQQTC,
+ * and together state that when BBML1 or BBML2 are implemented, either
+ * a TLB conflict abort is raised (which we expressly forbid), or will
+ * "produce an OA, access permissions, and memory attributes that are
+ * consistent with any of the programmed translation table values".
+ *
+ * That is to say, will either raise a TLB conflict, or produce one of
+ * the cached TLB entries, but never amalgamate.
+ *
+ * Thus, as the page tables are only considered "consistent" after
+ * the final tlbi+dsb (which evicts both the single stale (RW,n) TLB
+ * entry as well as the new contiguous (RO,c) TLB entry), omitting the
+ * initial tlbi+dsb is correct.
+ *
+ * It is also important to note that at the end of the BBML2 folding
+ * case, we are still left with potentially all N TLB entries still
+ * cached (the N-1 non-contiguous ptes, and the single contiguous
+ * block). However, over time, natural TLB pressure will cause the
+ * non-contiguous pte TLB entries to be flushed, leaving only the
+ * contiguous block TLB entry. This means that omitting the tlbi+dsb is
+ * not only correct, but also keeps our eventual performance benefits.
+ *
+ * For the unfolding case, BBML0 looks as follows:
+ *
+ * 0) Initial page-table layout:
+ *
+ * +----+----+----+----+
+ * |RW,c|RW,c|RW,c|RW,c| <--- last page being set as RO
+ * +----+----+----+----+
+ *
+ * 1) Aggregate AF + dirty flags using __ptep_get_and_clear():
+ *
+ * +----+----+----+----+
+ * | 0 | 0 | 0 | 0 |
+ * +----+----+----+----+
+ *
+ * 2) __flush_tlb_range():
+ *
+ * |____ tlbi + dsb ____|
+ *
+ * 3) __set_ptes() to repaint as non-contiguous:
+ *
+ * +----+----+----+----+
+ * |RW,n|RW,n|RW,n|RW,n|
+ * +----+----+----+----+
+ *
+ * 4) Update changed page permissions:
+ *
+ * +----+----+----+----+
+ * |RW,n|RW,n|RW,n|RO,n| <--- last page permissions set
+ * +----+----+----+----+
+ *
+ * 5) The kernel will eventually __flush_tlb() for changed page:
+ *
+ * |____| <--- tlbi + dsb
+ *
+ * For BBML2, we again remove the intermediate tlbi+dsb. Here, there
+ * are no issues, as the final tlbi+dsb covering the changed page is
+ * guaranteed to remove the original large contiguous (RW,c) TLB entry,
+ * as well as the intermediate (RW,n) TLB entry; the next access will
+ * install the new (RO,n) TLB entry and the page tables are only
+ * considered "consistent" after the final tlbi+dsb, so software must
+ * be prepared for this inconsistency prior to finishing the mm dance
+ * regardless.
+ */
+
+ if (!system_supports_bbml2_noabort())
+ __flush_tlb_range(&vma, start_addr, addr, PAGE_SIZE, true, 3);
__set_ptes(mm, start_addr, start_ptep, pte, CONT_PTES);
}
@@ -169,17 +306,46 @@ pte_t contpte_ptep_get(pte_t *ptep, pte_t orig_pte)
for (i = 0; i < CONT_PTES; i++, ptep++) {
pte = __ptep_get(ptep);
- if (pte_dirty(pte))
+ if (pte_dirty(pte)) {
orig_pte = pte_mkdirty(orig_pte);
-
- if (pte_young(pte))
+ for (; i < CONT_PTES; i++, ptep++) {
+ pte = __ptep_get(ptep);
+ if (pte_young(pte)) {
+ orig_pte = pte_mkyoung(orig_pte);
+ break;
+ }
+ }
+ break;
+ }
+
+ if (pte_young(pte)) {
orig_pte = pte_mkyoung(orig_pte);
+ i++;
+ ptep++;
+ for (; i < CONT_PTES; i++, ptep++) {
+ pte = __ptep_get(ptep);
+ if (pte_dirty(pte)) {
+ orig_pte = pte_mkdirty(orig_pte);
+ break;
+ }
+ }
+ break;
+ }
}
return orig_pte;
}
EXPORT_SYMBOL_GPL(contpte_ptep_get);
+static inline bool contpte_is_consistent(pte_t pte, unsigned long pfn,
+ pgprot_t orig_prot)
+{
+ pgprot_t prot = pte_pgprot(pte_mkold(pte_mkclean(pte)));
+
+ return pte_valid_cont(pte) && pte_pfn(pte) == pfn &&
+ pgprot_val(prot) == pgprot_val(orig_prot);
+}
+
pte_t contpte_ptep_get_lockless(pte_t *orig_ptep)
{
/*
@@ -202,7 +368,6 @@ pte_t contpte_ptep_get_lockless(pte_t *orig_ptep)
pgprot_t orig_prot;
unsigned long pfn;
pte_t orig_pte;
- pgprot_t prot;
pte_t *ptep;
pte_t pte;
int i;
@@ -219,18 +384,44 @@ retry:
for (i = 0; i < CONT_PTES; i++, ptep++, pfn++) {
pte = __ptep_get(ptep);
- prot = pte_pgprot(pte_mkold(pte_mkclean(pte)));
- if (!pte_valid_cont(pte) ||
- pte_pfn(pte) != pfn ||
- pgprot_val(prot) != pgprot_val(orig_prot))
+ if (!contpte_is_consistent(pte, pfn, orig_prot))
goto retry;
- if (pte_dirty(pte))
+ if (pte_dirty(pte)) {
orig_pte = pte_mkdirty(orig_pte);
+ for (; i < CONT_PTES; i++, ptep++, pfn++) {
+ pte = __ptep_get(ptep);
+
+ if (!contpte_is_consistent(pte, pfn, orig_prot))
+ goto retry;
+
+ if (pte_young(pte)) {
+ orig_pte = pte_mkyoung(orig_pte);
+ break;
+ }
+ }
+ break;
+ }
- if (pte_young(pte))
+ if (pte_young(pte)) {
orig_pte = pte_mkyoung(orig_pte);
+ i++;
+ ptep++;
+ pfn++;
+ for (; i < CONT_PTES; i++, ptep++, pfn++) {
+ pte = __ptep_get(ptep);
+
+ if (!contpte_is_consistent(pte, pfn, orig_prot))
+ goto retry;
+
+ if (pte_dirty(pte)) {
+ orig_pte = pte_mkdirty(orig_pte);
+ break;
+ }
+ }
+ break;
+ }
}
return orig_pte;
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 11eb8d1adc84..fcc783e8e9bb 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -53,18 +53,12 @@ struct fault_info {
};
static const struct fault_info fault_info[];
-static struct fault_info debug_fault_info[];
static inline const struct fault_info *esr_to_fault_info(unsigned long esr)
{
return fault_info + (esr & ESR_ELx_FSC);
}
-static inline const struct fault_info *esr_to_debug_fault_info(unsigned long esr)
-{
- return debug_fault_info + DBG_ESR_EVT(esr);
-}
-
static void data_abort_decode(unsigned long esr)
{
unsigned long iss2 = ESR_ELx_ISS2(esr);
@@ -838,6 +832,7 @@ static int do_sea(unsigned long far, unsigned long esr, struct pt_regs *regs)
*/
siaddr = untagged_addr(far);
}
+ add_taint(TAINT_MACHINE_CHECK, LOCKDEP_STILL_OK);
arm64_notify_die(inf->name, regs, inf->sig, inf->code, siaddr, esr);
return 0;
@@ -849,9 +844,12 @@ static int do_tag_check_fault(unsigned long far, unsigned long esr,
/*
* The architecture specifies that bits 63:60 of FAR_EL1 are UNKNOWN
* for tag check faults. Set them to corresponding bits in the untagged
- * address.
+ * address if ARM64_MTE_FAR isn't supported.
+ * Otherwise, bits 63:60 of FAR_EL1 are not UNKNOWN.
*/
- far = (__untagged_addr(far) & ~MTE_TAG_MASK) | (far & MTE_TAG_MASK);
+ if (!cpus_have_cap(ARM64_MTE_FAR))
+ far = (__untagged_addr(far) & ~MTE_TAG_MASK) | (far & MTE_TAG_MASK);
+
do_bad_area(far, esr, regs);
return 0;
}
@@ -951,75 +949,6 @@ void do_sp_pc_abort(unsigned long addr, unsigned long esr, struct pt_regs *regs)
NOKPROBE_SYMBOL(do_sp_pc_abort);
/*
- * __refdata because early_brk64 is __init, but the reference to it is
- * clobbered at arch_initcall time.
- * See traps.c and debug-monitors.c:debug_traps_init().
- */
-static struct fault_info __refdata debug_fault_info[] = {
- { do_bad, SIGTRAP, TRAP_HWBKPT, "hardware breakpoint" },
- { do_bad, SIGTRAP, TRAP_HWBKPT, "hardware single-step" },
- { do_bad, SIGTRAP, TRAP_HWBKPT, "hardware watchpoint" },
- { do_bad, SIGKILL, SI_KERNEL, "unknown 3" },
- { do_bad, SIGTRAP, TRAP_BRKPT, "aarch32 BKPT" },
- { do_bad, SIGKILL, SI_KERNEL, "aarch32 vector catch" },
- { early_brk64, SIGTRAP, TRAP_BRKPT, "aarch64 BRK" },
- { do_bad, SIGKILL, SI_KERNEL, "unknown 7" },
-};
-
-void __init hook_debug_fault_code(int nr,
- int (*fn)(unsigned long, unsigned long, struct pt_regs *),
- int sig, int code, const char *name)
-{
- BUG_ON(nr < 0 || nr >= ARRAY_SIZE(debug_fault_info));
-
- debug_fault_info[nr].fn = fn;
- debug_fault_info[nr].sig = sig;
- debug_fault_info[nr].code = code;
- debug_fault_info[nr].name = name;
-}
-
-/*
- * In debug exception context, we explicitly disable preemption despite
- * having interrupts disabled.
- * This serves two purposes: it makes it much less likely that we would
- * accidentally schedule in exception context and it will force a warning
- * if we somehow manage to schedule by accident.
- */
-static void debug_exception_enter(struct pt_regs *regs)
-{
- preempt_disable();
-
- /* This code is a bit fragile. Test it. */
- RCU_LOCKDEP_WARN(!rcu_is_watching(), "exception_enter didn't work");
-}
-NOKPROBE_SYMBOL(debug_exception_enter);
-
-static void debug_exception_exit(struct pt_regs *regs)
-{
- preempt_enable_no_resched();
-}
-NOKPROBE_SYMBOL(debug_exception_exit);
-
-void do_debug_exception(unsigned long addr_if_watchpoint, unsigned long esr,
- struct pt_regs *regs)
-{
- const struct fault_info *inf = esr_to_debug_fault_info(esr);
- unsigned long pc = instruction_pointer(regs);
-
- debug_exception_enter(regs);
-
- if (user_mode(regs) && !is_ttbr0_addr(pc))
- arm64_apply_bp_hardening();
-
- if (inf->fn(addr_if_watchpoint, esr, regs)) {
- arm64_notify_die(inf->name, regs, inf->sig, inf->code, pc, esr);
- }
-
- debug_exception_exit(regs);
-}
-NOKPROBE_SYMBOL(do_debug_exception);
-
-/*
* Used during anonymous page fault handling.
*/
struct folio *vma_alloc_zeroed_movable_folio(struct vm_area_struct *vma,
diff --git a/arch/arm64/mm/gcs.c b/arch/arm64/mm/gcs.c
index 5c46ec527b1c..6e93f78de79b 100644
--- a/arch/arm64/mm/gcs.c
+++ b/arch/arm64/mm/gcs.c
@@ -157,12 +157,6 @@ void gcs_free(struct task_struct *task)
if (!system_supports_gcs())
return;
- /*
- * When fork() with CLONE_VM fails, the child (tsk) already
- * has a GCS allocated, and exit_thread() calls this function
- * to free it. In this case the parent (current) and the
- * child share the same mm struct.
- */
if (!task->mm || task->mm != current->mm)
return;
diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c
index 0c8737f4f2ce..1d90a7e75333 100644
--- a/arch/arm64/mm/hugetlbpage.c
+++ b/arch/arm64/mm/hugetlbpage.c
@@ -225,7 +225,7 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
ncontig = num_contig_ptes(sz, &pgsize);
if (!pte_present(pte)) {
- for (i = 0; i < ncontig; i++, ptep++, addr += pgsize)
+ for (i = 0; i < ncontig; i++, ptep++)
__set_ptes_anysz(mm, ptep, pte, 1, pgsize);
return;
}
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index 54dccfd6aa11..8c75965afc9e 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -454,7 +454,7 @@ SYM_FUNC_START(__cpu_setup)
dsb nsh
msr cpacr_el1, xzr // Reset cpacr_el1
- mov x1, #1 << 12 // Reset mdscr_el1 and disable
+ mov x1, MDSCR_EL1_TDCC // Reset mdscr_el1 and disable
msr mdscr_el1, x1 // access to the DCC from EL0
reset_pmuserenr_el0 x1 // Disable PMU access from EL0
reset_amuserenr_el0 x1 // Disable AMU access from EL0
diff --git a/arch/arm64/net/bpf_jit.h b/arch/arm64/net/bpf_jit.h
index a3b0e693a125..bbea4f36f9f2 100644
--- a/arch/arm64/net/bpf_jit.h
+++ b/arch/arm64/net/bpf_jit.h
@@ -325,4 +325,9 @@
#define A64_MRS_SP_EL0(Rt) \
aarch64_insn_gen_mrs(Rt, AARCH64_INSN_SYSREG_SP_EL0)
+/* Barriers */
+#define A64_SB aarch64_insn_get_sb_value()
+#define A64_DSB_NSH (aarch64_insn_get_dsb_base_value() | 0x7 << 8)
+#define A64_ISB aarch64_insn_get_isb_value()
+
#endif /* _BPF_JIT_H */
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index da8b89dd2910..97dfd5432809 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -30,6 +30,7 @@
#define TMP_REG_2 (MAX_BPF_JIT_REG + 1)
#define TCCNT_PTR (MAX_BPF_JIT_REG + 2)
#define TMP_REG_3 (MAX_BPF_JIT_REG + 3)
+#define PRIVATE_SP (MAX_BPF_JIT_REG + 4)
#define ARENA_VM_START (MAX_BPF_JIT_REG + 5)
#define check_imm(bits, imm) do { \
@@ -68,6 +69,8 @@ static const int bpf2a64[] = {
[TCCNT_PTR] = A64_R(26),
/* temporary register for blinding constants */
[BPF_REG_AX] = A64_R(9),
+ /* callee saved register for private stack pointer */
+ [PRIVATE_SP] = A64_R(27),
/* callee saved register for kern_vm_start address */
[ARENA_VM_START] = A64_R(28),
};
@@ -86,6 +89,7 @@ struct jit_ctx {
u64 user_vm_start;
u64 arena_vm_start;
bool fp_used;
+ bool priv_sp_used;
bool write;
};
@@ -98,6 +102,10 @@ struct bpf_plt {
#define PLT_TARGET_SIZE sizeof_field(struct bpf_plt, target)
#define PLT_TARGET_OFFSET offsetof(struct bpf_plt, target)
+/* Memory size/value to protect private stack overflow/underflow */
+#define PRIV_STACK_GUARD_SZ 16
+#define PRIV_STACK_GUARD_VAL 0xEB9F12345678eb9fULL
+
static inline void emit(const u32 insn, struct jit_ctx *ctx)
{
if (ctx->image != NULL && ctx->write)
@@ -387,8 +395,11 @@ static void find_used_callee_regs(struct jit_ctx *ctx)
if (reg_used & 8)
ctx->used_callee_reg[i++] = bpf2a64[BPF_REG_9];
- if (reg_used & 16)
+ if (reg_used & 16) {
ctx->used_callee_reg[i++] = bpf2a64[BPF_REG_FP];
+ if (ctx->priv_sp_used)
+ ctx->used_callee_reg[i++] = bpf2a64[PRIVATE_SP];
+ }
if (ctx->arena_vm_start)
ctx->used_callee_reg[i++] = bpf2a64[ARENA_VM_START];
@@ -412,6 +423,7 @@ static void push_callee_regs(struct jit_ctx *ctx)
emit(A64_PUSH(A64_R(23), A64_R(24), A64_SP), ctx);
emit(A64_PUSH(A64_R(25), A64_R(26), A64_SP), ctx);
emit(A64_PUSH(A64_R(27), A64_R(28), A64_SP), ctx);
+ ctx->fp_used = true;
} else {
find_used_callee_regs(ctx);
for (i = 0; i + 1 < ctx->nr_used_callee_reg; i += 2) {
@@ -461,6 +473,19 @@ static void pop_callee_regs(struct jit_ctx *ctx)
}
}
+static void emit_percpu_ptr(const u8 dst_reg, void __percpu *ptr,
+ struct jit_ctx *ctx)
+{
+ const u8 tmp = bpf2a64[TMP_REG_1];
+
+ emit_a64_mov_i64(dst_reg, (__force const u64)ptr, ctx);
+ if (cpus_have_cap(ARM64_HAS_VIRT_HOST_EXTN))
+ emit(A64_MRS_TPIDR_EL2(tmp), ctx);
+ else
+ emit(A64_MRS_TPIDR_EL1(tmp), ctx);
+ emit(A64_ADD(1, dst_reg, dst_reg, tmp), ctx);
+}
+
#define BTI_INSNS (IS_ENABLED(CONFIG_ARM64_BTI_KERNEL) ? 1 : 0)
#define PAC_INSNS (IS_ENABLED(CONFIG_ARM64_PTR_AUTH_KERNEL) ? 1 : 0)
@@ -476,6 +501,8 @@ static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf)
const bool is_main_prog = !bpf_is_subprog(prog);
const u8 fp = bpf2a64[BPF_REG_FP];
const u8 arena_vm_base = bpf2a64[ARENA_VM_START];
+ const u8 priv_sp = bpf2a64[PRIVATE_SP];
+ void __percpu *priv_stack_ptr;
const int idx0 = ctx->idx;
int cur_offset;
@@ -551,15 +578,23 @@ static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf)
emit(A64_SUB_I(1, A64_SP, A64_FP, 96), ctx);
}
- if (ctx->fp_used)
- /* Set up BPF prog stack base register */
- emit(A64_MOV(1, fp, A64_SP), ctx);
-
/* Stack must be multiples of 16B */
ctx->stack_size = round_up(prog->aux->stack_depth, 16);
+ if (ctx->fp_used) {
+ if (ctx->priv_sp_used) {
+ /* Set up private stack pointer */
+ priv_stack_ptr = prog->aux->priv_stack_ptr + PRIV_STACK_GUARD_SZ;
+ emit_percpu_ptr(priv_sp, priv_stack_ptr, ctx);
+ emit(A64_ADD_I(1, fp, priv_sp, ctx->stack_size), ctx);
+ } else {
+ /* Set up BPF prog stack base register */
+ emit(A64_MOV(1, fp, A64_SP), ctx);
+ }
+ }
+
/* Set up function call stack */
- if (ctx->stack_size)
+ if (ctx->stack_size && !ctx->priv_sp_used)
emit(A64_SUB_I(1, A64_SP, A64_SP, ctx->stack_size), ctx);
if (ctx->arena_vm_start)
@@ -623,7 +658,7 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx)
emit(A64_STR64I(tcc, ptr, 0), ctx);
/* restore SP */
- if (ctx->stack_size)
+ if (ctx->stack_size && !ctx->priv_sp_used)
emit(A64_ADD_I(1, A64_SP, A64_SP, ctx->stack_size), ctx);
pop_callee_regs(ctx);
@@ -991,7 +1026,7 @@ static void build_epilogue(struct jit_ctx *ctx, bool was_classic)
const u8 ptr = bpf2a64[TCCNT_PTR];
/* We're done with BPF stack */
- if (ctx->stack_size)
+ if (ctx->stack_size && !ctx->priv_sp_used)
emit(A64_ADD_I(1, A64_SP, A64_SP, ctx->stack_size), ctx);
pop_callee_regs(ctx);
@@ -1120,6 +1155,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
const u8 tmp2 = bpf2a64[TMP_REG_2];
const u8 fp = bpf2a64[BPF_REG_FP];
const u8 arena_vm_base = bpf2a64[ARENA_VM_START];
+ const u8 priv_sp = bpf2a64[PRIVATE_SP];
const s16 off = insn->off;
const s32 imm = insn->imm;
const int i = insn - ctx->prog->insnsi;
@@ -1564,7 +1600,7 @@ emit_cond_jmp:
src = tmp2;
}
if (src == fp) {
- src_adj = A64_SP;
+ src_adj = ctx->priv_sp_used ? priv_sp : A64_SP;
off_adj = off + ctx->stack_size;
} else {
src_adj = src;
@@ -1630,17 +1666,14 @@ emit_cond_jmp:
return ret;
break;
- /* speculation barrier */
+ /* speculation barrier against v1 and v4 */
case BPF_ST | BPF_NOSPEC:
- /*
- * Nothing required here.
- *
- * In case of arm64, we rely on the firmware mitigation of
- * Speculative Store Bypass as controlled via the ssbd kernel
- * parameter. Whenever the mitigation is enabled, it works
- * for all of the kernel code with no need to provide any
- * additional instructions.
- */
+ if (alternative_has_cap_likely(ARM64_HAS_SB)) {
+ emit(A64_SB, ctx);
+ } else {
+ emit(A64_DSB_NSH, ctx);
+ emit(A64_ISB, ctx);
+ }
break;
/* ST: *(size *)(dst + off) = imm */
@@ -1657,7 +1690,7 @@ emit_cond_jmp:
dst = tmp2;
}
if (dst == fp) {
- dst_adj = A64_SP;
+ dst_adj = ctx->priv_sp_used ? priv_sp : A64_SP;
off_adj = off + ctx->stack_size;
} else {
dst_adj = dst;
@@ -1719,7 +1752,7 @@ emit_cond_jmp:
dst = tmp2;
}
if (dst == fp) {
- dst_adj = A64_SP;
+ dst_adj = ctx->priv_sp_used ? priv_sp : A64_SP;
off_adj = off + ctx->stack_size;
} else {
dst_adj = dst;
@@ -1862,6 +1895,39 @@ static inline void bpf_flush_icache(void *start, void *end)
flush_icache_range((unsigned long)start, (unsigned long)end);
}
+static void priv_stack_init_guard(void __percpu *priv_stack_ptr, int alloc_size)
+{
+ int cpu, underflow_idx = (alloc_size - PRIV_STACK_GUARD_SZ) >> 3;
+ u64 *stack_ptr;
+
+ for_each_possible_cpu(cpu) {
+ stack_ptr = per_cpu_ptr(priv_stack_ptr, cpu);
+ stack_ptr[0] = PRIV_STACK_GUARD_VAL;
+ stack_ptr[1] = PRIV_STACK_GUARD_VAL;
+ stack_ptr[underflow_idx] = PRIV_STACK_GUARD_VAL;
+ stack_ptr[underflow_idx + 1] = PRIV_STACK_GUARD_VAL;
+ }
+}
+
+static void priv_stack_check_guard(void __percpu *priv_stack_ptr, int alloc_size,
+ struct bpf_prog *prog)
+{
+ int cpu, underflow_idx = (alloc_size - PRIV_STACK_GUARD_SZ) >> 3;
+ u64 *stack_ptr;
+
+ for_each_possible_cpu(cpu) {
+ stack_ptr = per_cpu_ptr(priv_stack_ptr, cpu);
+ if (stack_ptr[0] != PRIV_STACK_GUARD_VAL ||
+ stack_ptr[1] != PRIV_STACK_GUARD_VAL ||
+ stack_ptr[underflow_idx] != PRIV_STACK_GUARD_VAL ||
+ stack_ptr[underflow_idx + 1] != PRIV_STACK_GUARD_VAL) {
+ pr_err("BPF private stack overflow/underflow detected for prog %sx\n",
+ bpf_jit_get_prog_name(prog));
+ break;
+ }
+ }
+}
+
struct arm64_jit_data {
struct bpf_binary_header *header;
u8 *ro_image;
@@ -1874,9 +1940,11 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
int image_size, prog_size, extable_size, extable_align, extable_offset;
struct bpf_prog *tmp, *orig_prog = prog;
struct bpf_binary_header *header;
- struct bpf_binary_header *ro_header;
+ struct bpf_binary_header *ro_header = NULL;
struct arm64_jit_data *jit_data;
+ void __percpu *priv_stack_ptr = NULL;
bool was_classic = bpf_prog_was_classic(prog);
+ int priv_stack_alloc_sz;
bool tmp_blinded = false;
bool extra_pass = false;
struct jit_ctx ctx;
@@ -1908,6 +1976,23 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
}
prog->aux->jit_data = jit_data;
}
+ priv_stack_ptr = prog->aux->priv_stack_ptr;
+ if (!priv_stack_ptr && prog->aux->jits_use_priv_stack) {
+ /* Allocate actual private stack size with verifier-calculated
+ * stack size plus two memory guards to protect overflow and
+ * underflow.
+ */
+ priv_stack_alloc_sz = round_up(prog->aux->stack_depth, 16) +
+ 2 * PRIV_STACK_GUARD_SZ;
+ priv_stack_ptr = __alloc_percpu_gfp(priv_stack_alloc_sz, 16, GFP_KERNEL);
+ if (!priv_stack_ptr) {
+ prog = orig_prog;
+ goto out_priv_stack;
+ }
+
+ priv_stack_init_guard(priv_stack_ptr, priv_stack_alloc_sz);
+ prog->aux->priv_stack_ptr = priv_stack_ptr;
+ }
if (jit_data->ctx.offset) {
ctx = jit_data->ctx;
ro_image_ptr = jit_data->ro_image;
@@ -1931,6 +2016,9 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
ctx.user_vm_start = bpf_arena_get_user_vm_start(prog->aux->arena);
ctx.arena_vm_start = bpf_arena_get_kern_vm_start(prog->aux->arena);
+ if (priv_stack_ptr)
+ ctx.priv_sp_used = true;
+
/* Pass 1: Estimate the maximum image size.
*
* BPF line info needs ctx->offset[i] to be the offset of
@@ -2070,7 +2158,12 @@ skip_init_ctx:
ctx.offset[i] *= AARCH64_INSN_SIZE;
bpf_prog_fill_jited_linfo(prog, ctx.offset + 1);
out_off:
+ if (!ro_header && priv_stack_ptr) {
+ free_percpu(priv_stack_ptr);
+ prog->aux->priv_stack_ptr = NULL;
+ }
kvfree(ctx.offset);
+out_priv_stack:
kfree(jit_data);
prog->aux->jit_data = NULL;
}
@@ -2089,6 +2182,11 @@ out_free_hdr:
goto out_off;
}
+bool bpf_jit_supports_private_stack(void)
+{
+ return true;
+}
+
bool bpf_jit_supports_kfunc_call(void)
{
return true;
@@ -2243,11 +2341,6 @@ static int calc_arg_aux(const struct btf_func_model *m,
/* the rest arguments are passed through stack */
for (; i < m->nr_args; i++) {
- /* We can not know for sure about exact alignment needs for
- * struct passed on stack, so deny those
- */
- if (m->arg_flags[i] & BTF_FMODEL_STRUCT_ARG)
- return -ENOTSUPP;
stack_slots = (m->arg_size[i] + 7) / 8;
a->bstack_for_args += stack_slots * 8;
a->ostack_for_args = a->ostack_for_args + stack_slots * 8;
@@ -2911,6 +3004,17 @@ bool bpf_jit_supports_percpu_insn(void)
return true;
}
+bool bpf_jit_bypass_spec_v4(void)
+{
+ /* In case of arm64, we rely on the firmware mitigation of Speculative
+ * Store Bypass as controlled via the ssbd kernel parameter. Whenever
+ * the mitigation is enabled, it works for all of the kernel code with
+ * no need to provide any additional instructions. Therefore, skip
+ * inserting nospec insns against Spectre v4.
+ */
+ return true;
+}
+
bool bpf_jit_inlines_helper_call(s32 imm)
{
switch (imm) {
@@ -2928,6 +3032,8 @@ void bpf_jit_free(struct bpf_prog *prog)
if (prog->jited) {
struct arm64_jit_data *jit_data = prog->aux->jit_data;
struct bpf_binary_header *hdr;
+ void __percpu *priv_stack_ptr;
+ int priv_stack_alloc_sz;
/*
* If we fail the final pass of JIT (from jit_subprogs),
@@ -2941,6 +3047,13 @@ void bpf_jit_free(struct bpf_prog *prog)
}
hdr = bpf_jit_binary_pack_hdr(prog);
bpf_jit_binary_pack_free(hdr, NULL);
+ priv_stack_ptr = prog->aux->priv_stack_ptr;
+ if (priv_stack_ptr) {
+ priv_stack_alloc_sz = round_up(prog->aux->stack_depth, 16) +
+ 2 * PRIV_STACK_GUARD_SZ;
+ priv_stack_check_guard(priv_stack_ptr, priv_stack_alloc_sz, prog);
+ free_percpu(prog->aux->priv_stack_ptr);
+ }
WARN_ON_ONCE(!bpf_prog_kallsyms_verify_off(prog));
}
diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps
index 10effd4cff6b..ef0b7946f5a4 100644
--- a/arch/arm64/tools/cpucaps
+++ b/arch/arm64/tools/cpucaps
@@ -35,7 +35,8 @@ HAS_GENERIC_AUTH
HAS_GENERIC_AUTH_ARCH_QARMA3
HAS_GENERIC_AUTH_ARCH_QARMA5
HAS_GENERIC_AUTH_IMP_DEF
-HAS_GIC_CPUIF_SYSREGS
+HAS_GICV3_CPUIF
+HAS_GICV5_CPUIF
HAS_GIC_PRIO_MASKING
HAS_GIC_PRIO_RELAXED_SYNC
HAS_HCR_NV1
@@ -45,10 +46,12 @@ HAS_LPA2
HAS_LSE_ATOMICS
HAS_MOPS
HAS_NESTED_VIRT
+HAS_BBML2_NOABORT
HAS_PAN
HAS_PMUV3
HAS_S1PIE
HAS_S1POE
+HAS_SCTLR2
HAS_RAS_EXTN
HAS_RNG
HAS_SB
@@ -68,6 +71,8 @@ MPAM
MPAM_HCR
MTE
MTE_ASYMM
+MTE_FAR
+MTE_STORE_ONLY
SME
SME_FA64
SME2
diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg
index 8a8cf6874298..696ab1f32a67 100644
--- a/arch/arm64/tools/sysreg
+++ b/arch/arm64/tools/sysreg
@@ -1314,7 +1314,10 @@ UnsignedEnum 19:16 UINJ
0b0000 NI
0b0001 IMP
EndEnum
-Res0 15:12
+UnsignedEnum 15:12 GCIE
+ 0b0000 NI
+ 0b0001 IMP
+EndEnum
UnsignedEnum 11:8 MTEFAR
0b0000 NI
0b0001 IMP
@@ -1329,6 +1332,138 @@ UnsignedEnum 3:0 MTEPERM
EndEnum
EndSysreg
+
+SysregFields BRBINFx_EL1
+Res0 63:47
+Field 46 CCU
+Field 45:40 CC_EXP
+Field 39:32 CC_MANT
+Res0 31:18
+Field 17 LASTFAILED
+Field 16 T
+Res0 15:14
+Enum 13:8 TYPE
+ 0b000000 DIRECT_UNCOND
+ 0b000001 INDIRECT
+ 0b000010 DIRECT_LINK
+ 0b000011 INDIRECT_LINK
+ 0b000101 RET
+ 0b000111 ERET
+ 0b001000 DIRECT_COND
+ 0b100001 DEBUG_HALT
+ 0b100010 CALL
+ 0b100011 TRAP
+ 0b100100 SERROR
+ 0b100110 INSN_DEBUG
+ 0b100111 DATA_DEBUG
+ 0b101010 ALIGN_FAULT
+ 0b101011 INSN_FAULT
+ 0b101100 DATA_FAULT
+ 0b101110 IRQ
+ 0b101111 FIQ
+ 0b110000 IMPDEF_TRAP_EL3
+ 0b111001 DEBUG_EXIT
+EndEnum
+Enum 7:6 EL
+ 0b00 EL0
+ 0b01 EL1
+ 0b10 EL2
+ 0b11 EL3
+EndEnum
+Field 5 MPRED
+Res0 4:2
+Enum 1:0 VALID
+ 0b00 NONE
+ 0b01 TARGET
+ 0b10 SOURCE
+ 0b11 FULL
+EndEnum
+EndSysregFields
+
+SysregFields BRBCR_ELx
+Res0 63:24
+Field 23 EXCEPTION
+Field 22 ERTN
+Res0 21:10
+Field 9 FZPSS
+Field 8 FZP
+Res0 7
+Enum 6:5 TS
+ 0b01 VIRTUAL
+ 0b10 GUEST_PHYSICAL
+ 0b11 PHYSICAL
+EndEnum
+Field 4 MPRED
+Field 3 CC
+Res0 2
+Field 1 ExBRE
+Field 0 E0BRE
+EndSysregFields
+
+Sysreg BRBCR_EL1 2 1 9 0 0
+Fields BRBCR_ELx
+EndSysreg
+
+Sysreg BRBFCR_EL1 2 1 9 0 1
+Res0 63:30
+Enum 29:28 BANK
+ 0b00 BANK_0
+ 0b01 BANK_1
+EndEnum
+Res0 27:23
+Field 22 CONDDIR
+Field 21 DIRCALL
+Field 20 INDCALL
+Field 19 RTN
+Field 18 INDIRECT
+Field 17 DIRECT
+Field 16 EnI
+Res0 15:8
+Field 7 PAUSED
+Field 6 LASTFAILED
+Res0 5:0
+EndSysreg
+
+Sysreg BRBTS_EL1 2 1 9 0 2
+Field 63:0 TS
+EndSysreg
+
+Sysreg BRBINFINJ_EL1 2 1 9 1 0
+Fields BRBINFx_EL1
+EndSysreg
+
+Sysreg BRBSRCINJ_EL1 2 1 9 1 1
+Field 63:0 ADDRESS
+EndSysreg
+
+Sysreg BRBTGTINJ_EL1 2 1 9 1 2
+Field 63:0 ADDRESS
+EndSysreg
+
+Sysreg BRBIDR0_EL1 2 1 9 2 0
+Res0 63:16
+Enum 15:12 CC
+ 0b0101 20_BIT
+EndEnum
+Enum 11:8 FORMAT
+ 0b0000 FORMAT_0
+EndEnum
+Enum 7:0 NUMREC
+ 0b00001000 8
+ 0b00010000 16
+ 0b00100000 32
+ 0b01000000 64
+EndEnum
+EndSysreg
+
+Sysreg BRBCR_EL2 2 4 9 0 0
+Fields BRBCR_ELx
+EndSysreg
+
+Sysreg BRBCR_EL12 2 5 9 0 0
+Fields BRBCR_ELx
+EndSysreg
+
Sysreg ID_AA64ZFR0_EL1 3 0 0 4 4
Res0 63:60
UnsignedEnum 59:56 F64MM
@@ -3021,6 +3156,435 @@ Sysreg PMIAR_EL1 3 0 9 14 7
Field 63:0 ADDRESS
EndSysreg
+SysregFields ICC_PPI_HMRx_EL1
+Field 63 HM63
+Field 62 HM62
+Field 61 HM61
+Field 60 HM60
+Field 59 HM59
+Field 58 HM58
+Field 57 HM57
+Field 56 HM56
+Field 55 HM55
+Field 54 HM54
+Field 53 HM53
+Field 52 HM52
+Field 51 HM51
+Field 50 HM50
+Field 49 HM49
+Field 48 HM48
+Field 47 HM47
+Field 46 HM46
+Field 45 HM45
+Field 44 HM44
+Field 43 HM43
+Field 42 HM42
+Field 41 HM41
+Field 40 HM40
+Field 39 HM39
+Field 38 HM38
+Field 37 HM37
+Field 36 HM36
+Field 35 HM35
+Field 34 HM34
+Field 33 HM33
+Field 32 HM32
+Field 31 HM31
+Field 30 HM30
+Field 29 HM29
+Field 28 HM28
+Field 27 HM27
+Field 26 HM26
+Field 25 HM25
+Field 24 HM24
+Field 23 HM23
+Field 22 HM22
+Field 21 HM21
+Field 20 HM20
+Field 19 HM19
+Field 18 HM18
+Field 17 HM17
+Field 16 HM16
+Field 15 HM15
+Field 14 HM14
+Field 13 HM13
+Field 12 HM12
+Field 11 HM11
+Field 10 HM10
+Field 9 HM9
+Field 8 HM8
+Field 7 HM7
+Field 6 HM6
+Field 5 HM5
+Field 4 HM4
+Field 3 HM3
+Field 2 HM2
+Field 1 HM1
+Field 0 HM0
+EndSysregFields
+
+Sysreg ICC_PPI_HMR0_EL1 3 0 12 10 0
+Fields ICC_PPI_HMRx_EL1
+EndSysreg
+
+Sysreg ICC_PPI_HMR1_EL1 3 0 12 10 1
+Fields ICC_PPI_HMRx_EL1
+EndSysreg
+
+Sysreg ICC_IDR0_EL1 3 0 12 10 2
+Res0 63:12
+UnsignedEnum 11:8 GCIE_LEGACY
+ 0b0000 NI
+ 0b0001 IMP
+EndEnum
+UnsignedEnum 7:4 PRI_BITS
+ 0b0011 4BITS
+ 0b0100 5BITS
+EndEnum
+UnsignedEnum 3:0 ID_BITS
+ 0b0000 16BITS
+ 0b0001 24BITS
+EndEnum
+EndSysreg
+
+Sysreg ICC_ICSR_EL1 3 0 12 10 4
+Res0 63:48
+Field 47:32 IAFFID
+Res0 31:16
+Field 15:11 Priority
+Res0 10:6
+Field 5 HM
+Field 4 Active
+Field 3 IRM
+Field 2 Pending
+Field 1 Enabled
+Field 0 F
+EndSysreg
+
+SysregFields ICC_PPI_ENABLERx_EL1
+Field 63 EN63
+Field 62 EN62
+Field 61 EN61
+Field 60 EN60
+Field 59 EN59
+Field 58 EN58
+Field 57 EN57
+Field 56 EN56
+Field 55 EN55
+Field 54 EN54
+Field 53 EN53
+Field 52 EN52
+Field 51 EN51
+Field 50 EN50
+Field 49 EN49
+Field 48 EN48
+Field 47 EN47
+Field 46 EN46
+Field 45 EN45
+Field 44 EN44
+Field 43 EN43
+Field 42 EN42
+Field 41 EN41
+Field 40 EN40
+Field 39 EN39
+Field 38 EN38
+Field 37 EN37
+Field 36 EN36
+Field 35 EN35
+Field 34 EN34
+Field 33 EN33
+Field 32 EN32
+Field 31 EN31
+Field 30 EN30
+Field 29 EN29
+Field 28 EN28
+Field 27 EN27
+Field 26 EN26
+Field 25 EN25
+Field 24 EN24
+Field 23 EN23
+Field 22 EN22
+Field 21 EN21
+Field 20 EN20
+Field 19 EN19
+Field 18 EN18
+Field 17 EN17
+Field 16 EN16
+Field 15 EN15
+Field 14 EN14
+Field 13 EN13
+Field 12 EN12
+Field 11 EN11
+Field 10 EN10
+Field 9 EN9
+Field 8 EN8
+Field 7 EN7
+Field 6 EN6
+Field 5 EN5
+Field 4 EN4
+Field 3 EN3
+Field 2 EN2
+Field 1 EN1
+Field 0 EN0
+EndSysregFields
+
+Sysreg ICC_PPI_ENABLER0_EL1 3 0 12 10 6
+Fields ICC_PPI_ENABLERx_EL1
+EndSysreg
+
+Sysreg ICC_PPI_ENABLER1_EL1 3 0 12 10 7
+Fields ICC_PPI_ENABLERx_EL1
+EndSysreg
+
+SysregFields ICC_PPI_ACTIVERx_EL1
+Field 63 Active63
+Field 62 Active62
+Field 61 Active61
+Field 60 Active60
+Field 59 Active59
+Field 58 Active58
+Field 57 Active57
+Field 56 Active56
+Field 55 Active55
+Field 54 Active54
+Field 53 Active53
+Field 52 Active52
+Field 51 Active51
+Field 50 Active50
+Field 49 Active49
+Field 48 Active48
+Field 47 Active47
+Field 46 Active46
+Field 45 Active45
+Field 44 Active44
+Field 43 Active43
+Field 42 Active42
+Field 41 Active41
+Field 40 Active40
+Field 39 Active39
+Field 38 Active38
+Field 37 Active37
+Field 36 Active36
+Field 35 Active35
+Field 34 Active34
+Field 33 Active33
+Field 32 Active32
+Field 31 Active31
+Field 30 Active30
+Field 29 Active29
+Field 28 Active28
+Field 27 Active27
+Field 26 Active26
+Field 25 Active25
+Field 24 Active24
+Field 23 Active23
+Field 22 Active22
+Field 21 Active21
+Field 20 Active20
+Field 19 Active19
+Field 18 Active18
+Field 17 Active17
+Field 16 Active16
+Field 15 Active15
+Field 14 Active14
+Field 13 Active13
+Field 12 Active12
+Field 11 Active11
+Field 10 Active10
+Field 9 Active9
+Field 8 Active8
+Field 7 Active7
+Field 6 Active6
+Field 5 Active5
+Field 4 Active4
+Field 3 Active3
+Field 2 Active2
+Field 1 Active1
+Field 0 Active0
+EndSysregFields
+
+Sysreg ICC_PPI_CACTIVER0_EL1 3 0 12 13 0
+Fields ICC_PPI_ACTIVERx_EL1
+EndSysreg
+
+Sysreg ICC_PPI_CACTIVER1_EL1 3 0 12 13 1
+Fields ICC_PPI_ACTIVERx_EL1
+EndSysreg
+
+Sysreg ICC_PPI_SACTIVER0_EL1 3 0 12 13 2
+Fields ICC_PPI_ACTIVERx_EL1
+EndSysreg
+
+Sysreg ICC_PPI_SACTIVER1_EL1 3 0 12 13 3
+Fields ICC_PPI_ACTIVERx_EL1
+EndSysreg
+
+SysregFields ICC_PPI_PENDRx_EL1
+Field 63 Pend63
+Field 62 Pend62
+Field 61 Pend61
+Field 60 Pend60
+Field 59 Pend59
+Field 58 Pend58
+Field 57 Pend57
+Field 56 Pend56
+Field 55 Pend55
+Field 54 Pend54
+Field 53 Pend53
+Field 52 Pend52
+Field 51 Pend51
+Field 50 Pend50
+Field 49 Pend49
+Field 48 Pend48
+Field 47 Pend47
+Field 46 Pend46
+Field 45 Pend45
+Field 44 Pend44
+Field 43 Pend43
+Field 42 Pend42
+Field 41 Pend41
+Field 40 Pend40
+Field 39 Pend39
+Field 38 Pend38
+Field 37 Pend37
+Field 36 Pend36
+Field 35 Pend35
+Field 34 Pend34
+Field 33 Pend33
+Field 32 Pend32
+Field 31 Pend31
+Field 30 Pend30
+Field 29 Pend29
+Field 28 Pend28
+Field 27 Pend27
+Field 26 Pend26
+Field 25 Pend25
+Field 24 Pend24
+Field 23 Pend23
+Field 22 Pend22
+Field 21 Pend21
+Field 20 Pend20
+Field 19 Pend19
+Field 18 Pend18
+Field 17 Pend17
+Field 16 Pend16
+Field 15 Pend15
+Field 14 Pend14
+Field 13 Pend13
+Field 12 Pend12
+Field 11 Pend11
+Field 10 Pend10
+Field 9 Pend9
+Field 8 Pend8
+Field 7 Pend7
+Field 6 Pend6
+Field 5 Pend5
+Field 4 Pend4
+Field 3 Pend3
+Field 2 Pend2
+Field 1 Pend1
+Field 0 Pend0
+EndSysregFields
+
+Sysreg ICC_PPI_CPENDR0_EL1 3 0 12 13 4
+Fields ICC_PPI_PENDRx_EL1
+EndSysreg
+
+Sysreg ICC_PPI_CPENDR1_EL1 3 0 12 13 5
+Fields ICC_PPI_PENDRx_EL1
+EndSysreg
+
+Sysreg ICC_PPI_SPENDR0_EL1 3 0 12 13 6
+Fields ICC_PPI_PENDRx_EL1
+EndSysreg
+
+Sysreg ICC_PPI_SPENDR1_EL1 3 0 12 13 7
+Fields ICC_PPI_PENDRx_EL1
+EndSysreg
+
+SysregFields ICC_PPI_PRIORITYRx_EL1
+Res0 63:61
+Field 60:56 Priority7
+Res0 55:53
+Field 52:48 Priority6
+Res0 47:45
+Field 44:40 Priority5
+Res0 39:37
+Field 36:32 Priority4
+Res0 31:29
+Field 28:24 Priority3
+Res0 23:21
+Field 20:16 Priority2
+Res0 15:13
+Field 12:8 Priority1
+Res0 7:5
+Field 4:0 Priority0
+EndSysregFields
+
+Sysreg ICC_PPI_PRIORITYR0_EL1 3 0 12 14 0
+Fields ICC_PPI_PRIORITYRx_EL1
+EndSysreg
+
+Sysreg ICC_PPI_PRIORITYR1_EL1 3 0 12 14 1
+Fields ICC_PPI_PRIORITYRx_EL1
+EndSysreg
+
+Sysreg ICC_PPI_PRIORITYR2_EL1 3 0 12 14 2
+Fields ICC_PPI_PRIORITYRx_EL1
+EndSysreg
+
+Sysreg ICC_PPI_PRIORITYR3_EL1 3 0 12 14 3
+Fields ICC_PPI_PRIORITYRx_EL1
+EndSysreg
+
+Sysreg ICC_PPI_PRIORITYR4_EL1 3 0 12 14 4
+Fields ICC_PPI_PRIORITYRx_EL1
+EndSysreg
+
+Sysreg ICC_PPI_PRIORITYR5_EL1 3 0 12 14 5
+Fields ICC_PPI_PRIORITYRx_EL1
+EndSysreg
+
+Sysreg ICC_PPI_PRIORITYR6_EL1 3 0 12 14 6
+Fields ICC_PPI_PRIORITYRx_EL1
+EndSysreg
+
+Sysreg ICC_PPI_PRIORITYR7_EL1 3 0 12 14 7
+Fields ICC_PPI_PRIORITYRx_EL1
+EndSysreg
+
+Sysreg ICC_PPI_PRIORITYR8_EL1 3 0 12 15 0
+Fields ICC_PPI_PRIORITYRx_EL1
+EndSysreg
+
+Sysreg ICC_PPI_PRIORITYR9_EL1 3 0 12 15 1
+Fields ICC_PPI_PRIORITYRx_EL1
+EndSysreg
+
+Sysreg ICC_PPI_PRIORITYR10_EL1 3 0 12 15 2
+Fields ICC_PPI_PRIORITYRx_EL1
+EndSysreg
+
+Sysreg ICC_PPI_PRIORITYR11_EL1 3 0 12 15 3
+Fields ICC_PPI_PRIORITYRx_EL1
+EndSysreg
+
+Sysreg ICC_PPI_PRIORITYR12_EL1 3 0 12 15 4
+Fields ICC_PPI_PRIORITYRx_EL1
+EndSysreg
+
+Sysreg ICC_PPI_PRIORITYR13_EL1 3 0 12 15 5
+Fields ICC_PPI_PRIORITYRx_EL1
+EndSysreg
+
+Sysreg ICC_PPI_PRIORITYR14_EL1 3 0 12 15 6
+Fields ICC_PPI_PRIORITYRx_EL1
+EndSysreg
+
+Sysreg ICC_PPI_PRIORITYR15_EL1 3 0 12 15 7
+Fields ICC_PPI_PRIORITYRx_EL1
+EndSysreg
+
Sysreg PMSELR_EL0 3 3 9 12 5
Res0 63:5
Field 4:0 SEL
@@ -3103,6 +3667,19 @@ Res0 14:12
Field 11:0 AFFINITY
EndSysreg
+Sysreg ICC_CR0_EL1 3 1 12 0 1
+Res0 63:39
+Field 38 PID
+Field 37:32 IPPT
+Res0 31:1
+Field 0 EN
+EndSysreg
+
+Sysreg ICC_PCR_EL1 3 1 12 0 2
+Res0 63:5
+Field 4:0 PRIORITY
+EndSysreg
+
Sysreg CSSELR_EL1 3 2 0 0 0
Res0 63:5
Field 4 TnD
@@ -3989,6 +4566,54 @@ Field 31:16 PhyPARTID29
Field 15:0 PhyPARTID28
EndSysreg
+Sysreg ICH_HFGRTR_EL2 3 4 12 9 4
+Res0 63:21
+Field 20 ICC_PPI_ACTIVERn_EL1
+Field 19 ICC_PPI_PRIORITYRn_EL1
+Field 18 ICC_PPI_PENDRn_EL1
+Field 17 ICC_PPI_ENABLERn_EL1
+Field 16 ICC_PPI_HMRn_EL1
+Res0 15:8
+Field 7 ICC_IAFFIDR_EL1
+Field 6 ICC_ICSR_EL1
+Field 5 ICC_PCR_EL1
+Field 4 ICC_HPPIR_EL1
+Field 3 ICC_HAPR_EL1
+Field 2 ICC_CR0_EL1
+Field 1 ICC_IDRn_EL1
+Field 0 ICC_APR_EL1
+EndSysreg
+
+Sysreg ICH_HFGWTR_EL2 3 4 12 9 6
+Res0 63:21
+Field 20 ICC_PPI_ACTIVERn_EL1
+Field 19 ICC_PPI_PRIORITYRn_EL1
+Field 18 ICC_PPI_PENDRn_EL1
+Field 17 ICC_PPI_ENABLERn_EL1
+Res0 16:7
+Field 6 ICC_ICSR_EL1
+Field 5 ICC_PCR_EL1
+Res0 4:3
+Field 2 ICC_CR0_EL1
+Res0 1
+Field 0 ICC_APR_EL1
+EndSysreg
+
+Sysreg ICH_HFGITR_EL2 3 4 12 9 7
+Res0 63:11
+Field 10 GICRCDNMIA
+Field 9 GICRCDIA
+Field 8 GICCDDI
+Field 7 GICCDEOI
+Field 6 GICCDHM
+Field 5 GICCDRCFG
+Field 4 GICCDPEND
+Field 3 GICCDAFF
+Field 2 GICCDPRI
+Field 1 GICCDDIS
+Field 0 GICCDEN
+EndSysreg
+
Sysreg ICH_HCR_EL2 3 4 12 11 0
Res0 63:32
Field 31:27 EOIcount
@@ -4037,6 +4662,12 @@ Field 1 U
Field 0 EOI
EndSysreg
+Sysreg ICH_VCTLR_EL2 3 4 12 11 4
+Res0 63:2
+Field 1 V3
+Field 0 En
+EndSysreg
+
Sysreg CONTEXTIDR_EL2 3 4 13 0 1
Fields CONTEXTIDR_ELx
EndSysreg
@@ -4150,7 +4781,13 @@ Mapping TCR_EL1
EndSysreg
Sysreg TCR2_EL1 3 0 2 0 3
-Res0 63:16
+Res0 63:22
+Field 21 FNGNA1
+Field 20 FNGNA0
+Res0 19
+Field 18 FNG1
+Field 17 FNG0
+Field 16 A2
Field 15 DisCH1
Field 14 DisCH0
Res0 13:12
@@ -4174,7 +4811,10 @@ Mapping TCR2_EL1
EndSysreg
Sysreg TCR2_EL2 3 4 2 0 3
-Res0 63:16
+Res0 63:19
+Field 18 FNG1
+Field 17 FNG0
+Field 16 A2
Field 15 DisCH1
Field 14 DisCH0
Field 13 AMEC1