summaryrefslogtreecommitdiff
path: root/arch/arm64
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arm64')
-rw-r--r--arch/arm64/Kconfig3
-rw-r--r--arch/arm64/boot/dts/mediatek/mt8370.dtsi16
-rw-r--r--arch/arm64/include/asm/barrier.h3
-rw-r--r--arch/arm64/include/asm/el2_setup.h45
-rw-r--r--arch/arm64/include/asm/kvm_emulate.h51
-rw-r--r--arch/arm64/include/asm/kvm_host.h36
-rw-r--r--arch/arm64/include/asm/kvm_mmu.h18
-rw-r--r--arch/arm64/include/asm/kvm_nested.h2
-rw-r--r--arch/arm64/include/asm/smp.h24
-rw-r--r--arch/arm64/include/asm/sysreg.h71
-rw-r--r--arch/arm64/include/asm/vncr_mapping.h2
-rw-r--r--arch/arm64/kernel/cpufeature.c26
-rw-r--r--arch/arm64/kernel/smp.c142
-rw-r--r--arch/arm64/kvm/Makefile3
-rw-r--r--arch/arm64/kvm/arch_timer.c2
-rw-r--r--arch/arm64/kvm/arm.c36
-rw-r--r--arch/arm64/kvm/at.c80
-rw-r--r--arch/arm64/kvm/config.c255
-rw-r--r--arch/arm64/kvm/emulate-nested.c49
-rw-r--r--arch/arm64/kvm/guest.c62
-rw-r--r--arch/arm64/kvm/handle_exit.c24
-rw-r--r--arch/arm64/kvm/hyp/exception.c16
-rw-r--r--arch/arm64/kvm/hyp/include/hyp/switch.h53
-rw-r--r--arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h49
-rw-r--r--arch/arm64/kvm/hyp/vgic-v3-sr.c53
-rw-r--r--arch/arm64/kvm/hyp/vhe/switch.c14
-rw-r--r--arch/arm64/kvm/hyp/vhe/sysreg-sr.c6
-rw-r--r--arch/arm64/kvm/inject_fault.c235
-rw-r--r--arch/arm64/kvm/mmio.c12
-rw-r--r--arch/arm64/kvm/mmu.c105
-rw-r--r--arch/arm64/kvm/nested.c109
-rw-r--r--arch/arm64/kvm/sys_regs.c207
-rw-r--r--arch/arm64/kvm/sys_regs.h2
-rw-r--r--arch/arm64/kvm/trace_handle_exit.h2
-rw-r--r--arch/arm64/kvm/vgic-sys-reg-v3.c127
-rw-r--r--arch/arm64/kvm/vgic/vgic-init.c30
-rw-r--r--arch/arm64/kvm/vgic/vgic-its.c5
-rw-r--r--arch/arm64/kvm/vgic/vgic-kvm-device.c70
-rw-r--r--arch/arm64/kvm/vgic/vgic-mmio-v3.c33
-rw-r--r--arch/arm64/kvm/vgic/vgic-v3-nested.c2
-rw-r--r--arch/arm64/kvm/vgic/vgic-v4.c14
-rw-r--r--arch/arm64/kvm/vgic/vgic-v5.c52
-rw-r--r--arch/arm64/kvm/vgic/vgic.c4
-rw-r--r--arch/arm64/kvm/vgic/vgic.h48
-rw-r--r--arch/arm64/net/bpf_jit.h5
-rw-r--r--arch/arm64/net/bpf_jit_comp.c167
-rw-r--r--arch/arm64/tools/cpucaps4
-rw-r--r--arch/arm64/tools/sysreg514
48 files changed, 2372 insertions, 516 deletions
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 8b6cbdd1441b..3c117b1fa198 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -127,6 +127,7 @@ config ARM64
select ARM_GIC_V2M if PCI
select ARM_GIC_V3
select ARM_GIC_V3_ITS if PCI
+ select ARM_GIC_V5
select ARM_PSCI_FW
select BUILDTIME_TABLE_SORT
select CLONE_BACKWARDS
@@ -134,6 +135,7 @@ config ARM64
select CPU_PM if (SUSPEND || CPU_IDLE)
select CPUMASK_OFFSTACK if NR_CPUS > 256
select DCACHE_WORD_ACCESS
+ select HAVE_EXTRA_IPI_TRACEPOINTS
select DYNAMIC_FTRACE if FUNCTION_TRACER
select DMA_BOUNCE_UNALIGNED_KMALLOC
select DMA_DIRECT_REMAP
@@ -221,7 +223,6 @@ config ARM64
select HAVE_EFFICIENT_UNALIGNED_ACCESS
select HAVE_GUP_FAST
select HAVE_FTRACE_GRAPH_FUNC
- select HAVE_FTRACE_MCOUNT_RECORD
select HAVE_FUNCTION_TRACER
select HAVE_FUNCTION_ERROR_INJECTION
select HAVE_FUNCTION_GRAPH_FREGS
diff --git a/arch/arm64/boot/dts/mediatek/mt8370.dtsi b/arch/arm64/boot/dts/mediatek/mt8370.dtsi
index cf1a3759451f..7ac8b8d03494 100644
--- a/arch/arm64/boot/dts/mediatek/mt8370.dtsi
+++ b/arch/arm64/boot/dts/mediatek/mt8370.dtsi
@@ -59,6 +59,22 @@
<&cpu3 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
};
+/*
+ * Please note that overriding compatibles is a discouraged practice and is a
+ * clear indication of nodes not being, well, compatible!
+ *
+ * This is a special case, where the GPU is the same as MT8188, but with one
+ * of the cores fused out in this lower-binned SoC.
+ */
+&gpu {
+ compatible = "mediatek,mt8370-mali", "arm,mali-valhall-jm";
+
+ power-domains = <&spm MT8188_POWER_DOMAIN_MFG2>,
+ <&spm MT8188_POWER_DOMAIN_MFG3>;
+
+ power-domain-names = "core0", "core1";
+};
+
&ppi_cluster0 {
affinity = <&cpu0 &cpu1 &cpu2 &cpu3>;
};
diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h
index 1ca947d5c939..f5801b0ba9e9 100644
--- a/arch/arm64/include/asm/barrier.h
+++ b/arch/arm64/include/asm/barrier.h
@@ -44,6 +44,9 @@
SB_BARRIER_INSN"nop\n", \
ARM64_HAS_SB))
+#define gsb_ack() asm volatile(GSB_ACK_BARRIER_INSN : : : "memory")
+#define gsb_sys() asm volatile(GSB_SYS_BARRIER_INSN : : : "memory")
+
#ifdef CONFIG_ARM64_PSEUDO_NMI
#define pmr_sync() \
do { \
diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h
index b959115e9962..46033027510c 100644
--- a/arch/arm64/include/asm/el2_setup.h
+++ b/arch/arm64/include/asm/el2_setup.h
@@ -165,6 +165,50 @@
.Lskip_gicv3_\@:
.endm
+/* GICv5 system register access */
+.macro __init_el2_gicv5
+ mrs_s x0, SYS_ID_AA64PFR2_EL1
+ ubfx x0, x0, #ID_AA64PFR2_EL1_GCIE_SHIFT, #4
+ cbz x0, .Lskip_gicv5_\@
+
+ mov x0, #(ICH_HFGITR_EL2_GICRCDNMIA | \
+ ICH_HFGITR_EL2_GICRCDIA | \
+ ICH_HFGITR_EL2_GICCDDI | \
+ ICH_HFGITR_EL2_GICCDEOI | \
+ ICH_HFGITR_EL2_GICCDHM | \
+ ICH_HFGITR_EL2_GICCDRCFG | \
+ ICH_HFGITR_EL2_GICCDPEND | \
+ ICH_HFGITR_EL2_GICCDAFF | \
+ ICH_HFGITR_EL2_GICCDPRI | \
+ ICH_HFGITR_EL2_GICCDDIS | \
+ ICH_HFGITR_EL2_GICCDEN)
+ msr_s SYS_ICH_HFGITR_EL2, x0 // Disable instruction traps
+ mov_q x0, (ICH_HFGRTR_EL2_ICC_PPI_ACTIVERn_EL1 | \
+ ICH_HFGRTR_EL2_ICC_PPI_PRIORITYRn_EL1 | \
+ ICH_HFGRTR_EL2_ICC_PPI_PENDRn_EL1 | \
+ ICH_HFGRTR_EL2_ICC_PPI_ENABLERn_EL1 | \
+ ICH_HFGRTR_EL2_ICC_PPI_HMRn_EL1 | \
+ ICH_HFGRTR_EL2_ICC_IAFFIDR_EL1 | \
+ ICH_HFGRTR_EL2_ICC_ICSR_EL1 | \
+ ICH_HFGRTR_EL2_ICC_PCR_EL1 | \
+ ICH_HFGRTR_EL2_ICC_HPPIR_EL1 | \
+ ICH_HFGRTR_EL2_ICC_HAPR_EL1 | \
+ ICH_HFGRTR_EL2_ICC_CR0_EL1 | \
+ ICH_HFGRTR_EL2_ICC_IDRn_EL1 | \
+ ICH_HFGRTR_EL2_ICC_APR_EL1)
+ msr_s SYS_ICH_HFGRTR_EL2, x0 // Disable reg read traps
+ mov_q x0, (ICH_HFGWTR_EL2_ICC_PPI_ACTIVERn_EL1 | \
+ ICH_HFGWTR_EL2_ICC_PPI_PRIORITYRn_EL1 | \
+ ICH_HFGWTR_EL2_ICC_PPI_PENDRn_EL1 | \
+ ICH_HFGWTR_EL2_ICC_PPI_ENABLERn_EL1 | \
+ ICH_HFGWTR_EL2_ICC_ICSR_EL1 | \
+ ICH_HFGWTR_EL2_ICC_PCR_EL1 | \
+ ICH_HFGWTR_EL2_ICC_CR0_EL1 | \
+ ICH_HFGWTR_EL2_ICC_APR_EL1)
+ msr_s SYS_ICH_HFGWTR_EL2, x0 // Disable reg write traps
+.Lskip_gicv5_\@:
+.endm
+
.macro __init_el2_hstr
msr hstr_el2, xzr // Disable CP15 traps to EL2
.endm
@@ -368,6 +412,7 @@
__init_el2_lor
__init_el2_stage2
__init_el2_gicv3
+ __init_el2_gicv5
__init_el2_hstr
__init_el2_nvhe_idregs
__init_el2_cptr
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index 0720898f563e..fa8a08a1ccd5 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -45,16 +45,39 @@ bool kvm_condition_valid32(const struct kvm_vcpu *vcpu);
void kvm_skip_instr32(struct kvm_vcpu *vcpu);
void kvm_inject_undefined(struct kvm_vcpu *vcpu);
-void kvm_inject_vabt(struct kvm_vcpu *vcpu);
-void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr);
-void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr);
+int kvm_inject_serror_esr(struct kvm_vcpu *vcpu, u64 esr);
+int kvm_inject_sea(struct kvm_vcpu *vcpu, bool iabt, u64 addr);
void kvm_inject_size_fault(struct kvm_vcpu *vcpu);
+static inline int kvm_inject_sea_dabt(struct kvm_vcpu *vcpu, u64 addr)
+{
+ return kvm_inject_sea(vcpu, false, addr);
+}
+
+static inline int kvm_inject_sea_iabt(struct kvm_vcpu *vcpu, u64 addr)
+{
+ return kvm_inject_sea(vcpu, true, addr);
+}
+
+static inline int kvm_inject_serror(struct kvm_vcpu *vcpu)
+{
+ /*
+ * ESR_ELx.ISV (later renamed to IDS) indicates whether or not
+ * ESR_ELx.ISS contains IMPLEMENTATION DEFINED syndrome information.
+ *
+ * Set the bit when injecting an SError w/o an ESR to indicate ISS
+ * does not follow the architected format.
+ */
+ return kvm_inject_serror_esr(vcpu, ESR_ELx_ISV);
+}
+
void kvm_vcpu_wfi(struct kvm_vcpu *vcpu);
void kvm_emulate_nested_eret(struct kvm_vcpu *vcpu);
int kvm_inject_nested_sync(struct kvm_vcpu *vcpu, u64 esr_el2);
int kvm_inject_nested_irq(struct kvm_vcpu *vcpu);
+int kvm_inject_nested_sea(struct kvm_vcpu *vcpu, bool iabt, u64 addr);
+int kvm_inject_nested_serror(struct kvm_vcpu *vcpu, u64 esr);
static inline void kvm_inject_nested_sve_trap(struct kvm_vcpu *vcpu)
{
@@ -195,6 +218,11 @@ static inline bool vcpu_el2_tge_is_set(const struct kvm_vcpu *vcpu)
return ctxt_sys_reg(&vcpu->arch.ctxt, HCR_EL2) & HCR_TGE;
}
+static inline bool vcpu_el2_amo_is_set(const struct kvm_vcpu *vcpu)
+{
+ return ctxt_sys_reg(&vcpu->arch.ctxt, HCR_EL2) & HCR_AMO;
+}
+
static inline bool is_hyp_ctxt(const struct kvm_vcpu *vcpu)
{
bool e2h, tge;
@@ -224,6 +252,20 @@ static inline bool vcpu_is_host_el0(const struct kvm_vcpu *vcpu)
return is_hyp_ctxt(vcpu) && !vcpu_is_el2(vcpu);
}
+static inline bool is_nested_ctxt(struct kvm_vcpu *vcpu)
+{
+ return vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu);
+}
+
+static inline bool vserror_state_is_nested(struct kvm_vcpu *vcpu)
+{
+ if (!is_nested_ctxt(vcpu))
+ return false;
+
+ return vcpu_el2_amo_is_set(vcpu) ||
+ (__vcpu_sys_reg(vcpu, HCRX_EL2) & HCRX_EL2_TMEA);
+}
+
/*
* The layout of SPSR for an AArch32 state is different when observed from an
* AArch64 SPSR_ELx or an AArch32 SPSR_*. This function generates the AArch32
@@ -627,6 +669,9 @@ static inline void vcpu_set_hcrx(struct kvm_vcpu *vcpu)
if (kvm_has_fpmr(kvm))
vcpu->arch.hcrx_el2 |= HCRX_EL2_EnFPM;
+
+ if (kvm_has_sctlr2(kvm))
+ vcpu->arch.hcrx_el2 |= HCRX_EL2_SCTLR2En;
}
}
#endif /* __ARM64_KVM_EMULATE_H__ */
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 673925f17cdd..2f2394cce24e 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -523,6 +523,7 @@ enum vcpu_sysreg {
/* Anything from this can be RES0/RES1 sanitised */
MARKER(__SANITISED_REG_START__),
TCR2_EL2, /* Extended Translation Control Register (EL2) */
+ SCTLR2_EL2, /* System Control Register 2 (EL2) */
MDCR_EL2, /* Monitor Debug Configuration Register (EL2) */
CNTHCTL_EL2, /* Counter-timer Hypervisor Control register */
@@ -537,6 +538,7 @@ enum vcpu_sysreg {
VNCR(TTBR1_EL1),/* Translation Table Base Register 1 */
VNCR(TCR_EL1), /* Translation Control Register */
VNCR(TCR2_EL1), /* Extended Translation Control Register */
+ VNCR(SCTLR2_EL1), /* System Control Register 2 */
VNCR(ESR_EL1), /* Exception Syndrome Register */
VNCR(AFSR0_EL1),/* Auxiliary Fault Status Register 0 */
VNCR(AFSR1_EL1),/* Auxiliary Fault Status Register 1 */
@@ -565,6 +567,10 @@ enum vcpu_sysreg {
VNCR(POR_EL1), /* Permission Overlay Register 1 (EL1) */
+ /* FEAT_RAS registers */
+ VNCR(VDISR_EL2),
+ VNCR(VSESR_EL2),
+
VNCR(HFGRTR_EL2),
VNCR(HFGWTR_EL2),
VNCR(HFGITR_EL2),
@@ -819,7 +825,7 @@ struct kvm_vcpu_arch {
u8 iflags;
/* State flags for kernel bookkeeping, unused by the hypervisor code */
- u8 sflags;
+ u16 sflags;
/*
* Don't run the guest (internal implementation need).
@@ -955,9 +961,21 @@ struct kvm_vcpu_arch {
__vcpu_flags_preempt_enable(); \
} while (0)
+#define __vcpu_test_and_clear_flag(v, flagset, f, m) \
+ ({ \
+ typeof(v->arch.flagset) set; \
+ \
+ set = __vcpu_get_flag(v, flagset, f, m); \
+ __vcpu_clear_flag(v, flagset, f, m); \
+ \
+ set; \
+ })
+
#define vcpu_get_flag(v, ...) __vcpu_get_flag((v), __VA_ARGS__)
#define vcpu_set_flag(v, ...) __vcpu_set_flag((v), __VA_ARGS__)
#define vcpu_clear_flag(v, ...) __vcpu_clear_flag((v), __VA_ARGS__)
+#define vcpu_test_and_clear_flag(v, ...) \
+ __vcpu_test_and_clear_flag((v), __VA_ARGS__)
/* KVM_ARM_VCPU_INIT completed */
#define VCPU_INITIALIZED __vcpu_single_flag(cflags, BIT(0))
@@ -1017,6 +1035,8 @@ struct kvm_vcpu_arch {
#define IN_WFI __vcpu_single_flag(sflags, BIT(6))
/* KVM is currently emulating a nested ERET */
#define IN_NESTED_ERET __vcpu_single_flag(sflags, BIT(7))
+/* SError pending for nested guest */
+#define NESTED_SERROR_PENDING __vcpu_single_flag(sflags, BIT(8))
/* Pointer to the vcpu's SVE FFR for sve_{save,load}_state() */
@@ -1151,6 +1171,8 @@ static inline bool __vcpu_read_sys_reg_from_cpu(int reg, u64 *val)
* System registers listed in the switch are not saved on every
* exit from the guest but are only saved on vcpu_put.
*
+ * SYSREGS_ON_CPU *MUST* be checked before using this helper.
+ *
* Note that MPIDR_EL1 for the guest is set by KVM via VMPIDR_EL2 but
* should never be listed below, because the guest cannot modify its
* own MPIDR_EL1 and MPIDR_EL1 is accessed for VCPU A from VCPU B's
@@ -1188,6 +1210,7 @@ static inline bool __vcpu_read_sys_reg_from_cpu(int reg, u64 *val)
case IFSR32_EL2: *val = read_sysreg_s(SYS_IFSR32_EL2); break;
case DBGVCR32_EL2: *val = read_sysreg_s(SYS_DBGVCR32_EL2); break;
case ZCR_EL1: *val = read_sysreg_s(SYS_ZCR_EL12); break;
+ case SCTLR2_EL1: *val = read_sysreg_s(SYS_SCTLR2_EL12); break;
default: return false;
}
@@ -1202,6 +1225,8 @@ static inline bool __vcpu_write_sys_reg_to_cpu(u64 val, int reg)
* System registers listed in the switch are not restored on every
* entry to the guest but are only restored on vcpu_load.
*
+ * SYSREGS_ON_CPU *MUST* be checked before using this helper.
+ *
* Note that MPIDR_EL1 for the guest is set by KVM via VMPIDR_EL2 but
* should never be listed below, because the MPIDR should only be set
* once, before running the VCPU, and never changed later.
@@ -1238,6 +1263,7 @@ static inline bool __vcpu_write_sys_reg_to_cpu(u64 val, int reg)
case IFSR32_EL2: write_sysreg_s(val, SYS_IFSR32_EL2); break;
case DBGVCR32_EL2: write_sysreg_s(val, SYS_DBGVCR32_EL2); break;
case ZCR_EL1: write_sysreg_s(val, SYS_ZCR_EL12); break;
+ case SCTLR2_EL1: write_sysreg_s(val, SYS_SCTLR2_EL12); break;
default: return false;
}
@@ -1389,8 +1415,6 @@ static inline bool kvm_arm_is_pvtime_enabled(struct kvm_vcpu_arch *vcpu_arch)
return (vcpu_arch->steal.base != INVALID_GPA);
}
-void kvm_set_sei_esr(struct kvm_vcpu *vcpu, u64 syndrome);
-
struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr);
DECLARE_KVM_HYP_PER_CPU(struct kvm_host_data, kvm_host_data);
@@ -1667,6 +1691,12 @@ void kvm_set_vm_id_reg(struct kvm *kvm, u32 reg, u64 val);
#define kvm_has_s1poe(k) \
(kvm_has_feat((k), ID_AA64MMFR3_EL1, S1POE, IMP))
+#define kvm_has_ras(k) \
+ (kvm_has_feat((k), ID_AA64PFR0_EL1, RAS, IMP))
+
+#define kvm_has_sctlr2(k) \
+ (kvm_has_feat((k), ID_AA64MMFR3_EL1, SCTLRX, IMP))
+
static inline bool kvm_arch_has_irq_bypass(void)
{
return true;
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index b98ac6aa631f..ae563ebd6aee 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -371,6 +371,24 @@ static inline void kvm_fault_unlock(struct kvm *kvm)
read_unlock(&kvm->mmu_lock);
}
+/*
+ * ARM64 KVM relies on a simple conversion from physaddr to a kernel
+ * virtual address (KVA) when it does cache maintenance as the CMO
+ * instructions work on virtual addresses. This is incompatible with
+ * VM_PFNMAP VMAs which may not have a kernel direct mapping to a
+ * virtual address.
+ *
+ * With S2FWB and CACHE DIC features, KVM need not do cache flushing
+ * and CMOs are NOP'd. This has the effect of no longer requiring a
+ * KVA for addresses mapped into the S2. The presence of these features
+ * are thus necessary to support cacheable S2 mapping of VM_PFNMAP.
+ */
+static inline bool kvm_supports_cacheable_pfnmap(void)
+{
+ return cpus_have_final_cap(ARM64_HAS_STAGE2_FWB) &&
+ cpus_have_final_cap(ARM64_HAS_CACHE_DIC);
+}
+
#ifdef CONFIG_PTDUMP_STAGE2_DEBUGFS
void kvm_s2_ptdump_create_debugfs(struct kvm *kvm);
#else
diff --git a/arch/arm64/include/asm/kvm_nested.h b/arch/arm64/include/asm/kvm_nested.h
index 0bd07ea068a1..7fd76f41c296 100644
--- a/arch/arm64/include/asm/kvm_nested.h
+++ b/arch/arm64/include/asm/kvm_nested.h
@@ -80,6 +80,8 @@ extern void kvm_vcpu_load_hw_mmu(struct kvm_vcpu *vcpu);
extern void kvm_vcpu_put_hw_mmu(struct kvm_vcpu *vcpu);
extern void check_nested_vcpu_requests(struct kvm_vcpu *vcpu);
+extern void kvm_nested_flush_hwstate(struct kvm_vcpu *vcpu);
+extern void kvm_nested_sync_hwstate(struct kvm_vcpu *vcpu);
struct kvm_s2_trans {
phys_addr_t output;
diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h
index 2510eec026f7..d48ef6d5abcc 100644
--- a/arch/arm64/include/asm/smp.h
+++ b/arch/arm64/include/asm/smp.h
@@ -50,10 +50,32 @@ struct seq_file;
*/
extern void smp_init_cpus(void);
+enum ipi_msg_type {
+ IPI_RESCHEDULE,
+ IPI_CALL_FUNC,
+ IPI_CPU_STOP,
+ IPI_CPU_STOP_NMI,
+ IPI_TIMER,
+ IPI_IRQ_WORK,
+ NR_IPI,
+ /*
+ * Any enum >= NR_IPI and < MAX_IPI is special and not tracable
+ * with trace_ipi_*
+ */
+ IPI_CPU_BACKTRACE = NR_IPI,
+ IPI_KGDB_ROUNDUP,
+ MAX_IPI
+};
+
/*
* Register IPI interrupts with the arch SMP code
*/
-extern void set_smp_ipi_range(int ipi_base, int nr_ipi);
+extern void set_smp_ipi_range_percpu(int ipi_base, int nr_ipi, int ncpus);
+
+static inline void set_smp_ipi_range(int ipi_base, int n)
+{
+ set_smp_ipi_range_percpu(ipi_base, n, 0);
+}
/*
* Called from the secondary holding pen, this is the secondary CPU entry point.
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index ef76a4b9e240..d5b5f2ae1afa 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -113,10 +113,14 @@
/* Register-based PAN access, for save/restore purposes */
#define SYS_PSTATE_PAN sys_reg(3, 0, 4, 2, 3)
-#define __SYS_BARRIER_INSN(CRm, op2, Rt) \
- __emit_inst(0xd5000000 | sys_insn(0, 3, 3, (CRm), (op2)) | ((Rt) & 0x1f))
+#define __SYS_BARRIER_INSN(op0, op1, CRn, CRm, op2, Rt) \
+ __emit_inst(0xd5000000 | \
+ sys_insn((op0), (op1), (CRn), (CRm), (op2)) | \
+ ((Rt) & 0x1f))
-#define SB_BARRIER_INSN __SYS_BARRIER_INSN(0, 7, 31)
+#define SB_BARRIER_INSN __SYS_BARRIER_INSN(0, 3, 3, 0, 7, 31)
+#define GSB_SYS_BARRIER_INSN __SYS_BARRIER_INSN(1, 0, 12, 0, 0, 31)
+#define GSB_ACK_BARRIER_INSN __SYS_BARRIER_INSN(1, 0, 12, 0, 1, 31)
/* Data cache zero operations */
#define SYS_DC_ISW sys_insn(1, 0, 7, 6, 2)
@@ -1074,6 +1078,67 @@
#define GCS_CAP(x) ((((unsigned long)x) & GCS_CAP_ADDR_MASK) | \
GCS_CAP_VALID_TOKEN)
+/*
+ * Definitions for GICv5 instructions
+ */
+#define GICV5_OP_GIC_CDAFF sys_insn(1, 0, 12, 1, 3)
+#define GICV5_OP_GIC_CDDI sys_insn(1, 0, 12, 2, 0)
+#define GICV5_OP_GIC_CDDIS sys_insn(1, 0, 12, 1, 0)
+#define GICV5_OP_GIC_CDHM sys_insn(1, 0, 12, 2, 1)
+#define GICV5_OP_GIC_CDEN sys_insn(1, 0, 12, 1, 1)
+#define GICV5_OP_GIC_CDEOI sys_insn(1, 0, 12, 1, 7)
+#define GICV5_OP_GIC_CDPEND sys_insn(1, 0, 12, 1, 4)
+#define GICV5_OP_GIC_CDPRI sys_insn(1, 0, 12, 1, 2)
+#define GICV5_OP_GIC_CDRCFG sys_insn(1, 0, 12, 1, 5)
+#define GICV5_OP_GICR_CDIA sys_insn(1, 0, 12, 3, 0)
+
+/* Definitions for GIC CDAFF */
+#define GICV5_GIC_CDAFF_IAFFID_MASK GENMASK_ULL(47, 32)
+#define GICV5_GIC_CDAFF_TYPE_MASK GENMASK_ULL(31, 29)
+#define GICV5_GIC_CDAFF_IRM_MASK BIT_ULL(28)
+#define GICV5_GIC_CDAFF_ID_MASK GENMASK_ULL(23, 0)
+
+/* Definitions for GIC CDDI */
+#define GICV5_GIC_CDDI_TYPE_MASK GENMASK_ULL(31, 29)
+#define GICV5_GIC_CDDI_ID_MASK GENMASK_ULL(23, 0)
+
+/* Definitions for GIC CDDIS */
+#define GICV5_GIC_CDDIS_TYPE_MASK GENMASK_ULL(31, 29)
+#define GICV5_GIC_CDDIS_TYPE(r) FIELD_GET(GICV5_GIC_CDDIS_TYPE_MASK, r)
+#define GICV5_GIC_CDDIS_ID_MASK GENMASK_ULL(23, 0)
+#define GICV5_GIC_CDDIS_ID(r) FIELD_GET(GICV5_GIC_CDDIS_ID_MASK, r)
+
+/* Definitions for GIC CDEN */
+#define GICV5_GIC_CDEN_TYPE_MASK GENMASK_ULL(31, 29)
+#define GICV5_GIC_CDEN_ID_MASK GENMASK_ULL(23, 0)
+
+/* Definitions for GIC CDHM */
+#define GICV5_GIC_CDHM_HM_MASK BIT_ULL(32)
+#define GICV5_GIC_CDHM_TYPE_MASK GENMASK_ULL(31, 29)
+#define GICV5_GIC_CDHM_ID_MASK GENMASK_ULL(23, 0)
+
+/* Definitions for GIC CDPEND */
+#define GICV5_GIC_CDPEND_PENDING_MASK BIT_ULL(32)
+#define GICV5_GIC_CDPEND_TYPE_MASK GENMASK_ULL(31, 29)
+#define GICV5_GIC_CDPEND_ID_MASK GENMASK_ULL(23, 0)
+
+/* Definitions for GIC CDPRI */
+#define GICV5_GIC_CDPRI_PRIORITY_MASK GENMASK_ULL(39, 35)
+#define GICV5_GIC_CDPRI_TYPE_MASK GENMASK_ULL(31, 29)
+#define GICV5_GIC_CDPRI_ID_MASK GENMASK_ULL(23, 0)
+
+/* Definitions for GIC CDRCFG */
+#define GICV5_GIC_CDRCFG_TYPE_MASK GENMASK_ULL(31, 29)
+#define GICV5_GIC_CDRCFG_ID_MASK GENMASK_ULL(23, 0)
+
+/* Definitions for GICR CDIA */
+#define GICV5_GIC_CDIA_VALID_MASK BIT_ULL(32)
+#define GICV5_GICR_CDIA_VALID(r) FIELD_GET(GICV5_GIC_CDIA_VALID_MASK, r)
+#define GICV5_GIC_CDIA_TYPE_MASK GENMASK_ULL(31, 29)
+#define GICV5_GIC_CDIA_ID_MASK GENMASK_ULL(23, 0)
+
+#define gicr_insn(insn) read_sysreg_s(GICV5_OP_GICR_##insn)
+#define gic_insn(v, insn) write_sysreg_s(v, GICV5_OP_GIC_##insn)
#define ARM64_FEATURE_FIELD_BITS 4
diff --git a/arch/arm64/include/asm/vncr_mapping.h b/arch/arm64/include/asm/vncr_mapping.h
index 6f556e993644..f6ec500ad3fa 100644
--- a/arch/arm64/include/asm/vncr_mapping.h
+++ b/arch/arm64/include/asm/vncr_mapping.h
@@ -51,6 +51,7 @@
#define VNCR_SP_EL1 0x240
#define VNCR_VBAR_EL1 0x250
#define VNCR_TCR2_EL1 0x270
+#define VNCR_SCTLR2_EL1 0x278
#define VNCR_PIRE0_EL1 0x290
#define VNCR_PIR_EL1 0x2A0
#define VNCR_POR_EL1 0x2A8
@@ -84,6 +85,7 @@
#define VNCR_ICH_HCR_EL2 0x4C0
#define VNCR_ICH_VMCR_EL2 0x4C8
#define VNCR_VDISR_EL2 0x500
+#define VNCR_VSESR_EL2 0x508
#define VNCR_PMBLIMITR_EL1 0x800
#define VNCR_PMBPTR_EL1 0x810
#define VNCR_PMBSR_EL1 0x820
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index a006e38b2684..9ad065f15f1d 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -303,6 +303,7 @@ static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = {
};
static const struct arm64_ftr_bits ftr_id_aa64pfr1[] = {
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_EL1_DF2_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_GCS),
FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_EL1_GCS_SHIFT, 4, 0),
S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_EL1_MTE_frac_SHIFT, 4, 0),
@@ -502,6 +503,7 @@ static const struct arm64_ftr_bits ftr_id_aa64mmfr3[] = {
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_POE),
FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR3_EL1_S1POE_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR3_EL1_S1PIE_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR3_EL1_SCTLRX_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR3_EL1_TCRX_SHIFT, 4, 0),
ARM64_FTR_END,
};
@@ -2330,11 +2332,11 @@ static bool can_use_gic_priorities(const struct arm64_cpu_capabilities *entry,
int scope)
{
/*
- * ARM64_HAS_GIC_CPUIF_SYSREGS has a lower index, and is a boot CPU
+ * ARM64_HAS_GICV3_CPUIF has a lower index, and is a boot CPU
* feature, so will be detected earlier.
*/
- BUILD_BUG_ON(ARM64_HAS_GIC_PRIO_MASKING <= ARM64_HAS_GIC_CPUIF_SYSREGS);
- if (!cpus_have_cap(ARM64_HAS_GIC_CPUIF_SYSREGS))
+ BUILD_BUG_ON(ARM64_HAS_GIC_PRIO_MASKING <= ARM64_HAS_GICV3_CPUIF);
+ if (!cpus_have_cap(ARM64_HAS_GICV3_CPUIF))
return false;
return enable_pseudo_nmi;
@@ -2530,8 +2532,8 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.matches = has_always,
},
{
- .desc = "GIC system register CPU interface",
- .capability = ARM64_HAS_GIC_CPUIF_SYSREGS,
+ .desc = "GICv3 CPU interface",
+ .capability = ARM64_HAS_GICV3_CPUIF,
.type = ARM64_CPUCAP_STRICT_BOOT_CPU_FEATURE,
.matches = has_useable_gicv3_cpuif,
ARM64_CPUID_FIELDS(ID_AA64PFR0_EL1, GIC, IMP)
@@ -3115,6 +3117,20 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.matches = has_pmuv3,
},
#endif
+ {
+ .desc = "SCTLR2",
+ .capability = ARM64_HAS_SCTLR2,
+ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+ .matches = has_cpuid_feature,
+ ARM64_CPUID_FIELDS(ID_AA64MMFR3_EL1, SCTLRX, IMP)
+ },
+ {
+ .desc = "GICv5 CPU interface",
+ .type = ARM64_CPUCAP_STRICT_BOOT_CPU_FEATURE,
+ .capability = ARM64_HAS_GICV5_CPUIF,
+ .matches = has_cpuid_feature,
+ ARM64_CPUID_FIELDS(ID_AA64PFR2_EL1, GCIE, IMP)
+ },
{},
};
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 21a795303568..68cea3a4a35c 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -64,26 +64,18 @@ struct secondary_data secondary_data;
/* Number of CPUs which aren't online, but looping in kernel text. */
static int cpus_stuck_in_kernel;
-enum ipi_msg_type {
- IPI_RESCHEDULE,
- IPI_CALL_FUNC,
- IPI_CPU_STOP,
- IPI_CPU_STOP_NMI,
- IPI_TIMER,
- IPI_IRQ_WORK,
- NR_IPI,
- /*
- * Any enum >= NR_IPI and < MAX_IPI is special and not tracable
- * with trace_ipi_*
- */
- IPI_CPU_BACKTRACE = NR_IPI,
- IPI_KGDB_ROUNDUP,
- MAX_IPI
-};
-
static int ipi_irq_base __ro_after_init;
static int nr_ipi __ro_after_init = NR_IPI;
-static struct irq_desc *ipi_desc[MAX_IPI] __ro_after_init;
+
+struct ipi_descs {
+ struct irq_desc *descs[MAX_IPI];
+};
+
+static DEFINE_PER_CPU_READ_MOSTLY(struct ipi_descs, pcpu_ipi_desc);
+
+#define get_ipi_desc(__cpu, __ipi) (per_cpu_ptr(&pcpu_ipi_desc, __cpu)->descs[__ipi])
+
+static bool percpu_ipi_descs __ro_after_init;
static bool crash_stop;
@@ -844,7 +836,7 @@ int arch_show_interrupts(struct seq_file *p, int prec)
seq_printf(p, "%*s%u:%s", prec - 1, "IPI", i,
prec >= 4 ? " " : "");
for_each_online_cpu(cpu)
- seq_printf(p, "%10u ", irq_desc_kstat_cpu(ipi_desc[i], cpu));
+ seq_printf(p, "%10u ", irq_desc_kstat_cpu(get_ipi_desc(cpu, i), cpu));
seq_printf(p, " %s\n", ipi_types[i]);
}
@@ -917,9 +909,20 @@ static void __noreturn ipi_cpu_crash_stop(unsigned int cpu, struct pt_regs *regs
#endif
}
+static void arm64_send_ipi(const cpumask_t *mask, unsigned int nr)
+{
+ unsigned int cpu;
+
+ if (!percpu_ipi_descs)
+ __ipi_send_mask(get_ipi_desc(0, nr), mask);
+ else
+ for_each_cpu(cpu, mask)
+ __ipi_send_single(get_ipi_desc(cpu, nr), cpu);
+}
+
static void arm64_backtrace_ipi(cpumask_t *mask)
{
- __ipi_send_mask(ipi_desc[IPI_CPU_BACKTRACE], mask);
+ arm64_send_ipi(mask, IPI_CPU_BACKTRACE);
}
void arch_trigger_cpumask_backtrace(const cpumask_t *mask, int exclude_cpu)
@@ -944,7 +947,7 @@ void kgdb_roundup_cpus(void)
if (cpu == this_cpu)
continue;
- __ipi_send_single(ipi_desc[IPI_KGDB_ROUNDUP], cpu);
+ __ipi_send_single(get_ipi_desc(cpu, IPI_KGDB_ROUNDUP), cpu);
}
}
#endif
@@ -1013,14 +1016,16 @@ static void do_handle_IPI(int ipinr)
static irqreturn_t ipi_handler(int irq, void *data)
{
- do_handle_IPI(irq - ipi_irq_base);
+ unsigned int ipi = (irq - ipi_irq_base) % nr_ipi;
+
+ do_handle_IPI(ipi);
return IRQ_HANDLED;
}
static void smp_cross_call(const struct cpumask *target, unsigned int ipinr)
{
trace_ipi_raise(target, ipi_types[ipinr]);
- __ipi_send_mask(ipi_desc[ipinr], target);
+ arm64_send_ipi(target, ipinr);
}
static bool ipi_should_be_nmi(enum ipi_msg_type ipi)
@@ -1046,11 +1051,15 @@ static void ipi_setup(int cpu)
return;
for (i = 0; i < nr_ipi; i++) {
- if (ipi_should_be_nmi(i)) {
- prepare_percpu_nmi(ipi_irq_base + i);
- enable_percpu_nmi(ipi_irq_base + i, 0);
+ if (!percpu_ipi_descs) {
+ if (ipi_should_be_nmi(i)) {
+ prepare_percpu_nmi(ipi_irq_base + i);
+ enable_percpu_nmi(ipi_irq_base + i, 0);
+ } else {
+ enable_percpu_irq(ipi_irq_base + i, 0);
+ }
} else {
- enable_percpu_irq(ipi_irq_base + i, 0);
+ enable_irq(irq_desc_get_irq(get_ipi_desc(cpu, i)));
}
}
}
@@ -1064,44 +1073,77 @@ static void ipi_teardown(int cpu)
return;
for (i = 0; i < nr_ipi; i++) {
- if (ipi_should_be_nmi(i)) {
- disable_percpu_nmi(ipi_irq_base + i);
- teardown_percpu_nmi(ipi_irq_base + i);
+ if (!percpu_ipi_descs) {
+ if (ipi_should_be_nmi(i)) {
+ disable_percpu_nmi(ipi_irq_base + i);
+ teardown_percpu_nmi(ipi_irq_base + i);
+ } else {
+ disable_percpu_irq(ipi_irq_base + i);
+ }
} else {
- disable_percpu_irq(ipi_irq_base + i);
+ disable_irq(irq_desc_get_irq(get_ipi_desc(cpu, i)));
}
}
}
#endif
-void __init set_smp_ipi_range(int ipi_base, int n)
+static void ipi_setup_sgi(int ipi)
{
- int i;
+ int err, irq, cpu;
- WARN_ON(n < MAX_IPI);
- nr_ipi = min(n, MAX_IPI);
+ irq = ipi_irq_base + ipi;
- for (i = 0; i < nr_ipi; i++) {
- int err;
+ if (ipi_should_be_nmi(ipi)) {
+ err = request_percpu_nmi(irq, ipi_handler, "IPI", &irq_stat);
+ WARN(err, "Could not request IRQ %d as NMI, err=%d\n", irq, err);
+ } else {
+ err = request_percpu_irq(irq, ipi_handler, "IPI", &irq_stat);
+ WARN(err, "Could not request IRQ %d as IRQ, err=%d\n", irq, err);
+ }
- if (ipi_should_be_nmi(i)) {
- err = request_percpu_nmi(ipi_base + i, ipi_handler,
- "IPI", &irq_stat);
- WARN(err, "Could not request IPI %d as NMI, err=%d\n",
- i, err);
- } else {
- err = request_percpu_irq(ipi_base + i, ipi_handler,
- "IPI", &irq_stat);
- WARN(err, "Could not request IPI %d as IRQ, err=%d\n",
- i, err);
- }
+ for_each_possible_cpu(cpu)
+ get_ipi_desc(cpu, ipi) = irq_to_desc(irq);
+
+ irq_set_status_flags(irq, IRQ_HIDDEN);
+}
+
+static void ipi_setup_lpi(int ipi, int ncpus)
+{
+ for (int cpu = 0; cpu < ncpus; cpu++) {
+ int err, irq;
+
+ irq = ipi_irq_base + (cpu * nr_ipi) + ipi;
+
+ err = irq_force_affinity(irq, cpumask_of(cpu));
+ WARN(err, "Could not force affinity IRQ %d, err=%d\n", irq, err);
+
+ err = request_irq(irq, ipi_handler, IRQF_NO_AUTOEN, "IPI",
+ NULL);
+ WARN(err, "Could not request IRQ %d, err=%d\n", irq, err);
+
+ irq_set_status_flags(irq, (IRQ_HIDDEN | IRQ_NO_BALANCING_MASK));
- ipi_desc[i] = irq_to_desc(ipi_base + i);
- irq_set_status_flags(ipi_base + i, IRQ_HIDDEN);
+ get_ipi_desc(cpu, ipi) = irq_to_desc(irq);
}
+}
+
+void __init set_smp_ipi_range_percpu(int ipi_base, int n, int ncpus)
+{
+ int i;
+
+ WARN_ON(n < MAX_IPI);
+ nr_ipi = min(n, MAX_IPI);
+ percpu_ipi_descs = !!ncpus;
ipi_irq_base = ipi_base;
+ for (i = 0; i < nr_ipi; i++) {
+ if (!percpu_ipi_descs)
+ ipi_setup_sgi(i);
+ else
+ ipi_setup_lpi(i, ncpus);
+ }
+
/* Setup the boot CPU immediately */
ipi_setup(smp_processor_id());
}
diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
index 7c329e01c557..3ebc0570345c 100644
--- a/arch/arm64/kvm/Makefile
+++ b/arch/arm64/kvm/Makefile
@@ -23,7 +23,8 @@ kvm-y += arm.o mmu.o mmio.o psci.o hypercalls.o pvtime.o \
vgic/vgic-v3.o vgic/vgic-v4.o \
vgic/vgic-mmio.o vgic/vgic-mmio-v2.o \
vgic/vgic-mmio-v3.o vgic/vgic-kvm-device.o \
- vgic/vgic-its.o vgic/vgic-debug.o vgic/vgic-v3-nested.o
+ vgic/vgic-its.o vgic/vgic-debug.o vgic/vgic-v3-nested.o \
+ vgic/vgic-v5.o
kvm-$(CONFIG_HW_PERF_EVENTS) += pmu-emul.o pmu.o
kvm-$(CONFIG_ARM64_PTR_AUTH) += pauth.o
diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c
index 701ea10a63f1..dbd74e4885e2 100644
--- a/arch/arm64/kvm/arch_timer.c
+++ b/arch/arm64/kvm/arch_timer.c
@@ -830,7 +830,7 @@ static void timer_set_traps(struct kvm_vcpu *vcpu, struct timer_map *map)
* by the guest (either FEAT_VHE or FEAT_E2H0 is implemented, but
* not both). This simplifies the handling of the EL1NV* bits.
*/
- if (vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu)) {
+ if (is_nested_ctxt(vcpu)) {
u64 val = __vcpu_sys_reg(vcpu, CNTHCTL_EL2);
/* Use the VHE format for mental sanity */
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index 23dd3f3fc3eb..888f7c7abf54 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -408,6 +408,13 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_ARM_SUPPORTED_REG_MASK_RANGES:
r = BIT(0);
break;
+ case KVM_CAP_ARM_CACHEABLE_PFNMAP_SUPPORTED:
+ if (!kvm)
+ r = -EINVAL;
+ else
+ r = kvm_supports_cacheable_pfnmap();
+ break;
+
default:
r = 0;
}
@@ -521,7 +528,7 @@ static void vcpu_set_pauth_traps(struct kvm_vcpu *vcpu)
* Either we're running an L2 guest, and the API/APK bits come
* from L1's HCR_EL2, or API/APK are both set.
*/
- if (unlikely(vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu))) {
+ if (unlikely(is_nested_ctxt(vcpu))) {
u64 val;
val = __vcpu_sys_reg(vcpu, HCR_EL2);
@@ -740,7 +747,8 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
*/
int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
{
- bool irq_lines = *vcpu_hcr(v) & (HCR_VI | HCR_VF);
+ bool irq_lines = *vcpu_hcr(v) & (HCR_VI | HCR_VF | HCR_VSE);
+
return ((irq_lines || kvm_vgic_vcpu_pending_irq(v))
&& !kvm_arm_vcpu_stopped(v) && !v->arch.pause);
}
@@ -1183,6 +1191,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
*/
preempt_disable();
+ kvm_nested_flush_hwstate(vcpu);
+
if (kvm_vcpu_has_pmu(vcpu))
kvm_pmu_flush_hwstate(vcpu);
@@ -1282,6 +1292,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
/* Exit types that need handling before we can be preempted */
handle_exit_early(vcpu, ret);
+ kvm_nested_sync_hwstate(vcpu);
+
preempt_enable();
/*
@@ -2765,19 +2777,15 @@ void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons,
kvm_vgic_v4_unset_forwarding(irqfd->kvm, prod->irq);
}
-bool kvm_arch_irqfd_route_changed(struct kvm_kernel_irq_routing_entry *old,
- struct kvm_kernel_irq_routing_entry *new)
+void kvm_arch_update_irqfd_routing(struct kvm_kernel_irqfd *irqfd,
+ struct kvm_kernel_irq_routing_entry *old,
+ struct kvm_kernel_irq_routing_entry *new)
{
- if (old->type != KVM_IRQ_ROUTING_MSI ||
- new->type != KVM_IRQ_ROUTING_MSI)
- return true;
-
- return memcmp(&old->msi, &new->msi, sizeof(new->msi));
-}
+ if (old->type == KVM_IRQ_ROUTING_MSI &&
+ new->type == KVM_IRQ_ROUTING_MSI &&
+ !memcmp(&old->msi, &new->msi, sizeof(new->msi)))
+ return;
-int kvm_arch_update_irqfd_routing(struct kvm *kvm, unsigned int host_irq,
- uint32_t guest_irq, bool set)
-{
/*
* Remapping the vLPI requires taking the its_lock mutex to resolve
* the new translation. We're in spinlock land at this point, so no
@@ -2785,7 +2793,7 @@ int kvm_arch_update_irqfd_routing(struct kvm *kvm, unsigned int host_irq,
*
* Unmap the vLPI and fall back to software LPI injection.
*/
- return kvm_vgic_v4_unset_forwarding(kvm, host_irq);
+ return kvm_vgic_v4_unset_forwarding(irqfd->kvm, irqfd->producer->irq);
}
void kvm_arch_irq_bypass_stop(struct irq_bypass_consumer *cons)
diff --git a/arch/arm64/kvm/at.c b/arch/arm64/kvm/at.c
index a25be111cd8f..0e5610533949 100644
--- a/arch/arm64/kvm/at.c
+++ b/arch/arm64/kvm/at.c
@@ -1047,34 +1047,51 @@ static void compute_s1_overlay_permissions(struct kvm_vcpu *vcpu,
idx = FIELD_GET(PTE_PO_IDX_MASK, wr->desc);
- switch (wi->regime) {
- case TR_EL10:
- pov_perms = perm_idx(vcpu, POR_EL1, idx);
- uov_perms = perm_idx(vcpu, POR_EL0, idx);
- break;
- case TR_EL20:
- pov_perms = perm_idx(vcpu, POR_EL2, idx);
- uov_perms = perm_idx(vcpu, POR_EL0, idx);
- break;
- case TR_EL2:
- pov_perms = perm_idx(vcpu, POR_EL2, idx);
- uov_perms = 0;
- break;
- }
+ if (wr->pov) {
+ switch (wi->regime) {
+ case TR_EL10:
+ pov_perms = perm_idx(vcpu, POR_EL1, idx);
+ break;
+ case TR_EL20:
+ pov_perms = perm_idx(vcpu, POR_EL2, idx);
+ break;
+ case TR_EL2:
+ pov_perms = perm_idx(vcpu, POR_EL2, idx);
+ break;
+ }
+
+ if (pov_perms & ~POE_RWX)
+ pov_perms = POE_NONE;
- if (pov_perms & ~POE_RWX)
- pov_perms = POE_NONE;
+ /* R_QXXPC, S1PrivOverflow enabled */
+ if (wr->pwxn && (pov_perms & POE_X))
+ pov_perms &= ~POE_W;
- if (wi->poe && wr->pov) {
wr->pr &= pov_perms & POE_R;
wr->pw &= pov_perms & POE_W;
wr->px &= pov_perms & POE_X;
}
- if (uov_perms & ~POE_RWX)
- uov_perms = POE_NONE;
+ if (wr->uov) {
+ switch (wi->regime) {
+ case TR_EL10:
+ uov_perms = perm_idx(vcpu, POR_EL0, idx);
+ break;
+ case TR_EL20:
+ uov_perms = perm_idx(vcpu, POR_EL0, idx);
+ break;
+ case TR_EL2:
+ uov_perms = 0;
+ break;
+ }
+
+ if (uov_perms & ~POE_RWX)
+ uov_perms = POE_NONE;
+
+ /* R_NPBXC, S1UnprivOverlay enabled */
+ if (wr->uwxn && (uov_perms & POE_X))
+ uov_perms &= ~POE_W;
- if (wi->e0poe && wr->uov) {
wr->ur &= uov_perms & POE_R;
wr->uw &= uov_perms & POE_W;
wr->ux &= uov_perms & POE_X;
@@ -1095,24 +1112,15 @@ static void compute_s1_permissions(struct kvm_vcpu *vcpu,
if (!wi->hpd)
compute_s1_hierarchical_permissions(vcpu, wi, wr);
- if (wi->poe || wi->e0poe)
- compute_s1_overlay_permissions(vcpu, wi, wr);
+ compute_s1_overlay_permissions(vcpu, wi, wr);
- /* R_QXXPC */
- if (wr->pwxn) {
- if (!wr->pov && wr->pw)
- wr->px = false;
- if (wr->pov && wr->px)
- wr->pw = false;
- }
+ /* R_QXXPC, S1PrivOverlay disabled */
+ if (!wr->pov)
+ wr->px &= !(wr->pwxn && wr->pw);
- /* R_NPBXC */
- if (wr->uwxn) {
- if (!wr->uov && wr->uw)
- wr->ux = false;
- if (wr->uov && wr->ux)
- wr->uw = false;
- }
+ /* R_NPBXC, S1UnprivOverlay disabled */
+ if (!wr->uov)
+ wr->ux &= !(wr->uwxn && wr->uw);
pan = wi->pan && (wr->ur || wr->uw ||
(pan3_enabled(vcpu, wi->regime) && wr->ux));
diff --git a/arch/arm64/kvm/config.c b/arch/arm64/kvm/config.c
index 54911a93b001..da66c4a14775 100644
--- a/arch/arm64/kvm/config.c
+++ b/arch/arm64/kvm/config.c
@@ -66,7 +66,6 @@ struct reg_bits_to_feat_map {
#define FEAT_BRBE ID_AA64DFR0_EL1, BRBE, IMP
#define FEAT_TRC_SR ID_AA64DFR0_EL1, TraceVer, IMP
#define FEAT_PMUv3 ID_AA64DFR0_EL1, PMUVer, IMP
-#define FEAT_PMUv3p9 ID_AA64DFR0_EL1, PMUVer, V3P9
#define FEAT_TRBE ID_AA64DFR0_EL1, TraceBuffer, IMP
#define FEAT_TRBEv1p1 ID_AA64DFR0_EL1, TraceBuffer, TRBE_V1P1
#define FEAT_DoubleLock ID_AA64DFR0_EL1, DoubleLock, IMP
@@ -89,6 +88,7 @@ struct reg_bits_to_feat_map {
#define FEAT_RASv2 ID_AA64PFR0_EL1, RAS, V2
#define FEAT_GICv3 ID_AA64PFR0_EL1, GIC, IMP
#define FEAT_LOR ID_AA64MMFR1_EL1, LO, IMP
+#define FEAT_SPEv1p2 ID_AA64DFR0_EL1, PMSVer, V1P2
#define FEAT_SPEv1p4 ID_AA64DFR0_EL1, PMSVer, V1P4
#define FEAT_SPEv1p5 ID_AA64DFR0_EL1, PMSVer, V1P5
#define FEAT_ATS1A ID_AA64ISAR2_EL1, ATS1A, IMP
@@ -131,6 +131,27 @@ struct reg_bits_to_feat_map {
#define FEAT_SPMU ID_AA64DFR1_EL1, SPMU, IMP
#define FEAT_SPE_nVM ID_AA64DFR2_EL1, SPE_nVM, IMP
#define FEAT_STEP2 ID_AA64DFR2_EL1, STEP, IMP
+#define FEAT_SYSREG128 ID_AA64ISAR2_EL1, SYSREG_128, IMP
+#define FEAT_CPA2 ID_AA64ISAR3_EL1, CPA, CPA2
+#define FEAT_ASID2 ID_AA64MMFR4_EL1, ASID2, IMP
+#define FEAT_MEC ID_AA64MMFR3_EL1, MEC, IMP
+#define FEAT_HAFT ID_AA64MMFR1_EL1, HAFDBS, HAFT
+#define FEAT_BTI ID_AA64PFR1_EL1, BT, IMP
+#define FEAT_ExS ID_AA64MMFR0_EL1, EXS, IMP
+#define FEAT_IESB ID_AA64MMFR2_EL1, IESB, IMP
+#define FEAT_LSE2 ID_AA64MMFR2_EL1, AT, IMP
+#define FEAT_LSMAOC ID_AA64MMFR2_EL1, LSM, IMP
+#define FEAT_MixedEnd ID_AA64MMFR0_EL1, BIGEND, IMP
+#define FEAT_MixedEndEL0 ID_AA64MMFR0_EL1, BIGENDEL0, IMP
+#define FEAT_MTE2 ID_AA64PFR1_EL1, MTE, MTE2
+#define FEAT_MTE_ASYNC ID_AA64PFR1_EL1, MTE_frac, ASYNC
+#define FEAT_MTE_STORE_ONLY ID_AA64PFR2_EL1, MTESTOREONLY, IMP
+#define FEAT_PAN ID_AA64MMFR1_EL1, PAN, IMP
+#define FEAT_PAN3 ID_AA64MMFR1_EL1, PAN, PAN3
+#define FEAT_SSBS ID_AA64PFR1_EL1, SSBS, IMP
+#define FEAT_TIDCP1 ID_AA64MMFR1_EL1, TIDCP1, IMP
+#define FEAT_FGT ID_AA64MMFR0_EL1, FGT, IMP
+#define FEAT_MTPMU ID_AA64DFR0_EL1, MTPMU, IMP
static bool not_feat_aa64el3(struct kvm *kvm)
{
@@ -218,11 +239,62 @@ static bool feat_trbe_mpam(struct kvm *kvm)
(read_sysreg_s(SYS_TRBIDR_EL1) & TRBIDR_EL1_MPAM));
}
+static bool feat_asid2_e2h1(struct kvm *kvm)
+{
+ return kvm_has_feat(kvm, FEAT_ASID2) && !kvm_has_feat(kvm, FEAT_E2H0);
+}
+
+static bool feat_d128_e2h1(struct kvm *kvm)
+{
+ return kvm_has_feat(kvm, FEAT_D128) && !kvm_has_feat(kvm, FEAT_E2H0);
+}
+
+static bool feat_mec_e2h1(struct kvm *kvm)
+{
+ return kvm_has_feat(kvm, FEAT_MEC) && !kvm_has_feat(kvm, FEAT_E2H0);
+}
+
static bool feat_ebep_pmuv3_ss(struct kvm *kvm)
{
return kvm_has_feat(kvm, FEAT_EBEP) || kvm_has_feat(kvm, FEAT_PMUv3_SS);
}
+static bool feat_mixedendel0(struct kvm *kvm)
+{
+ return kvm_has_feat(kvm, FEAT_MixedEnd) || kvm_has_feat(kvm, FEAT_MixedEndEL0);
+}
+
+static bool feat_mte_async(struct kvm *kvm)
+{
+ return kvm_has_feat(kvm, FEAT_MTE2) && kvm_has_feat_enum(kvm, FEAT_MTE_ASYNC);
+}
+
+#define check_pmu_revision(k, r) \
+ ({ \
+ (kvm_has_feat((k), ID_AA64DFR0_EL1, PMUVer, r) && \
+ !kvm_has_feat((k), ID_AA64DFR0_EL1, PMUVer, IMP_DEF)); \
+ })
+
+static bool feat_pmuv3p1(struct kvm *kvm)
+{
+ return check_pmu_revision(kvm, V3P1);
+}
+
+static bool feat_pmuv3p5(struct kvm *kvm)
+{
+ return check_pmu_revision(kvm, V3P5);
+}
+
+static bool feat_pmuv3p7(struct kvm *kvm)
+{
+ return check_pmu_revision(kvm, V3P7);
+}
+
+static bool feat_pmuv3p9(struct kvm *kvm)
+{
+ return check_pmu_revision(kvm, V3P9);
+}
+
static bool compute_hcr_rw(struct kvm *kvm, u64 *bits)
{
/* This is purely academic: AArch32 and NV are mutually exclusive */
@@ -681,7 +753,7 @@ static const struct reg_bits_to_feat_map hdfgrtr2_feat_map[] = {
NEEDS_FEAT(HDFGRTR2_EL2_nPMICFILTR_EL0 |
HDFGRTR2_EL2_nPMICNTR_EL0,
FEAT_PMUv3_ICNTR),
- NEEDS_FEAT(HDFGRTR2_EL2_nPMUACR_EL1, FEAT_PMUv3p9),
+ NEEDS_FEAT(HDFGRTR2_EL2_nPMUACR_EL1, feat_pmuv3p9),
NEEDS_FEAT(HDFGRTR2_EL2_nPMSSCR_EL1 |
HDFGRTR2_EL2_nPMSSDATA,
FEAT_PMUv3_SS),
@@ -713,7 +785,7 @@ static const struct reg_bits_to_feat_map hdfgwtr2_feat_map[] = {
FEAT_PMUv3_ICNTR),
NEEDS_FEAT(HDFGWTR2_EL2_nPMUACR_EL1 |
HDFGWTR2_EL2_nPMZR_EL0,
- FEAT_PMUv3p9),
+ feat_pmuv3p9),
NEEDS_FEAT(HDFGWTR2_EL2_nPMSSCR_EL1, FEAT_PMUv3_SS),
NEEDS_FEAT(HDFGWTR2_EL2_nPMIAR_EL1, FEAT_SEBEP),
NEEDS_FEAT(HDFGWTR2_EL2_nPMSDSFR_EL1, feat_spe_fds),
@@ -832,6 +904,150 @@ static const struct reg_bits_to_feat_map hcr_feat_map[] = {
NEEDS_FEAT_FIXED(HCR_EL2_E2H, compute_hcr_e2h),
};
+static const struct reg_bits_to_feat_map sctlr2_feat_map[] = {
+ NEEDS_FEAT(SCTLR2_EL1_NMEA |
+ SCTLR2_EL1_EASE,
+ FEAT_DoubleFault2),
+ NEEDS_FEAT(SCTLR2_EL1_EnADERR, feat_aderr),
+ NEEDS_FEAT(SCTLR2_EL1_EnANERR, feat_anerr),
+ NEEDS_FEAT(SCTLR2_EL1_EnIDCP128, FEAT_SYSREG128),
+ NEEDS_FEAT(SCTLR2_EL1_EnPACM |
+ SCTLR2_EL1_EnPACM0,
+ feat_pauth_lr),
+ NEEDS_FEAT(SCTLR2_EL1_CPTA |
+ SCTLR2_EL1_CPTA0 |
+ SCTLR2_EL1_CPTM |
+ SCTLR2_EL1_CPTM0,
+ FEAT_CPA2),
+};
+
+static const struct reg_bits_to_feat_map tcr2_el2_feat_map[] = {
+ NEEDS_FEAT(TCR2_EL2_FNG1 |
+ TCR2_EL2_FNG0 |
+ TCR2_EL2_A2,
+ feat_asid2_e2h1),
+ NEEDS_FEAT(TCR2_EL2_DisCH1 |
+ TCR2_EL2_DisCH0 |
+ TCR2_EL2_D128,
+ feat_d128_e2h1),
+ NEEDS_FEAT(TCR2_EL2_AMEC1, feat_mec_e2h1),
+ NEEDS_FEAT(TCR2_EL2_AMEC0, FEAT_MEC),
+ NEEDS_FEAT(TCR2_EL2_HAFT, FEAT_HAFT),
+ NEEDS_FEAT(TCR2_EL2_PTTWI |
+ TCR2_EL2_PnCH,
+ FEAT_THE),
+ NEEDS_FEAT(TCR2_EL2_AIE, FEAT_AIE),
+ NEEDS_FEAT(TCR2_EL2_POE |
+ TCR2_EL2_E0POE,
+ FEAT_S1POE),
+ NEEDS_FEAT(TCR2_EL2_PIE, FEAT_S1PIE),
+};
+
+static const struct reg_bits_to_feat_map sctlr_el1_feat_map[] = {
+ NEEDS_FEAT(SCTLR_EL1_CP15BEN |
+ SCTLR_EL1_ITD |
+ SCTLR_EL1_SED,
+ FEAT_AA32EL0),
+ NEEDS_FEAT(SCTLR_EL1_BT0 |
+ SCTLR_EL1_BT1,
+ FEAT_BTI),
+ NEEDS_FEAT(SCTLR_EL1_CMOW, FEAT_CMOW),
+ NEEDS_FEAT(SCTLR_EL1_TSCXT, feat_csv2_2_csv2_1p2),
+ NEEDS_FEAT(SCTLR_EL1_EIS |
+ SCTLR_EL1_EOS,
+ FEAT_ExS),
+ NEEDS_FEAT(SCTLR_EL1_EnFPM, FEAT_FPMR),
+ NEEDS_FEAT(SCTLR_EL1_IESB, FEAT_IESB),
+ NEEDS_FEAT(SCTLR_EL1_EnALS, FEAT_LS64),
+ NEEDS_FEAT(SCTLR_EL1_EnAS0, FEAT_LS64_ACCDATA),
+ NEEDS_FEAT(SCTLR_EL1_EnASR, FEAT_LS64_V),
+ NEEDS_FEAT(SCTLR_EL1_nAA, FEAT_LSE2),
+ NEEDS_FEAT(SCTLR_EL1_LSMAOE |
+ SCTLR_EL1_nTLSMD,
+ FEAT_LSMAOC),
+ NEEDS_FEAT(SCTLR_EL1_EE, FEAT_MixedEnd),
+ NEEDS_FEAT(SCTLR_EL1_E0E, feat_mixedendel0),
+ NEEDS_FEAT(SCTLR_EL1_MSCEn, FEAT_MOPS),
+ NEEDS_FEAT(SCTLR_EL1_ATA0 |
+ SCTLR_EL1_ATA |
+ SCTLR_EL1_TCF0 |
+ SCTLR_EL1_TCF,
+ FEAT_MTE2),
+ NEEDS_FEAT(SCTLR_EL1_ITFSB, feat_mte_async),
+ NEEDS_FEAT(SCTLR_EL1_TCSO0 |
+ SCTLR_EL1_TCSO,
+ FEAT_MTE_STORE_ONLY),
+ NEEDS_FEAT(SCTLR_EL1_NMI |
+ SCTLR_EL1_SPINTMASK,
+ FEAT_NMI),
+ NEEDS_FEAT(SCTLR_EL1_SPAN, FEAT_PAN),
+ NEEDS_FEAT(SCTLR_EL1_EPAN, FEAT_PAN3),
+ NEEDS_FEAT(SCTLR_EL1_EnDA |
+ SCTLR_EL1_EnDB |
+ SCTLR_EL1_EnIA |
+ SCTLR_EL1_EnIB,
+ feat_pauth),
+ NEEDS_FEAT(SCTLR_EL1_EnTP2, FEAT_SME),
+ NEEDS_FEAT(SCTLR_EL1_EnRCTX, FEAT_SPECRES),
+ NEEDS_FEAT(SCTLR_EL1_DSSBS, FEAT_SSBS),
+ NEEDS_FEAT(SCTLR_EL1_TIDCP, FEAT_TIDCP1),
+ NEEDS_FEAT(SCTLR_EL1_TME0 |
+ SCTLR_EL1_TME |
+ SCTLR_EL1_TMT0 |
+ SCTLR_EL1_TMT,
+ FEAT_TME),
+ NEEDS_FEAT(SCTLR_EL1_TWEDEL |
+ SCTLR_EL1_TWEDEn,
+ FEAT_TWED),
+ NEEDS_FEAT(SCTLR_EL1_UCI |
+ SCTLR_EL1_EE |
+ SCTLR_EL1_E0E |
+ SCTLR_EL1_WXN |
+ SCTLR_EL1_nTWE |
+ SCTLR_EL1_nTWI |
+ SCTLR_EL1_UCT |
+ SCTLR_EL1_DZE |
+ SCTLR_EL1_I |
+ SCTLR_EL1_UMA |
+ SCTLR_EL1_SA0 |
+ SCTLR_EL1_SA |
+ SCTLR_EL1_C |
+ SCTLR_EL1_A |
+ SCTLR_EL1_M,
+ FEAT_AA64EL1),
+};
+
+static const struct reg_bits_to_feat_map mdcr_el2_feat_map[] = {
+ NEEDS_FEAT(MDCR_EL2_EBWE, FEAT_Debugv8p9),
+ NEEDS_FEAT(MDCR_EL2_TDOSA, FEAT_DoubleLock),
+ NEEDS_FEAT(MDCR_EL2_PMEE, FEAT_EBEP),
+ NEEDS_FEAT(MDCR_EL2_TDCC, FEAT_FGT),
+ NEEDS_FEAT(MDCR_EL2_MTPME, FEAT_MTPMU),
+ NEEDS_FEAT(MDCR_EL2_HPME |
+ MDCR_EL2_HPMN |
+ MDCR_EL2_TPMCR |
+ MDCR_EL2_TPM,
+ FEAT_PMUv3),
+ NEEDS_FEAT(MDCR_EL2_HPMD, feat_pmuv3p1),
+ NEEDS_FEAT(MDCR_EL2_HCCD |
+ MDCR_EL2_HLP,
+ feat_pmuv3p5),
+ NEEDS_FEAT(MDCR_EL2_HPMFZO, feat_pmuv3p7),
+ NEEDS_FEAT(MDCR_EL2_PMSSE, FEAT_PMUv3_SS),
+ NEEDS_FEAT(MDCR_EL2_E2PB |
+ MDCR_EL2_TPMS,
+ FEAT_SPE),
+ NEEDS_FEAT(MDCR_EL2_HPMFZS, FEAT_SPEv1p2),
+ NEEDS_FEAT(MDCR_EL2_EnSPM, FEAT_SPMU),
+ NEEDS_FEAT(MDCR_EL2_EnSTEPOP, FEAT_STEP2),
+ NEEDS_FEAT(MDCR_EL2_E2TB, FEAT_TRBE),
+ NEEDS_FEAT(MDCR_EL2_TTRF, FEAT_TRF),
+ NEEDS_FEAT(MDCR_EL2_TDA |
+ MDCR_EL2_TDE |
+ MDCR_EL2_TDRA,
+ FEAT_AA64EL1),
+};
+
static void __init check_feat_map(const struct reg_bits_to_feat_map *map,
int map_size, u64 res0, const char *str)
{
@@ -863,6 +1079,14 @@ void __init check_feature_map(void)
__HCRX_EL2_RES0, "HCRX_EL2");
check_feat_map(hcr_feat_map, ARRAY_SIZE(hcr_feat_map),
HCR_EL2_RES0, "HCR_EL2");
+ check_feat_map(sctlr2_feat_map, ARRAY_SIZE(sctlr2_feat_map),
+ SCTLR2_EL1_RES0, "SCTLR2_EL1");
+ check_feat_map(tcr2_el2_feat_map, ARRAY_SIZE(tcr2_el2_feat_map),
+ TCR2_EL2_RES0, "TCR2_EL2");
+ check_feat_map(sctlr_el1_feat_map, ARRAY_SIZE(sctlr_el1_feat_map),
+ SCTLR_EL1_RES0, "SCTLR_EL1");
+ check_feat_map(mdcr_el2_feat_map, ARRAY_SIZE(mdcr_el2_feat_map),
+ MDCR_EL2_RES0, "MDCR_EL2");
}
static bool idreg_feat_match(struct kvm *kvm, const struct reg_bits_to_feat_map *map)
@@ -1077,6 +1301,31 @@ void get_reg_fixed_bits(struct kvm *kvm, enum vcpu_sysreg reg, u64 *res0, u64 *r
*res0 |= HCR_EL2_RES0 | (mask & ~fixed);
*res1 = HCR_EL2_RES1 | (mask & fixed);
break;
+ case SCTLR2_EL1:
+ case SCTLR2_EL2:
+ *res0 = compute_res0_bits(kvm, sctlr2_feat_map,
+ ARRAY_SIZE(sctlr2_feat_map), 0, 0);
+ *res0 |= SCTLR2_EL1_RES0;
+ *res1 = SCTLR2_EL1_RES1;
+ break;
+ case TCR2_EL2:
+ *res0 = compute_res0_bits(kvm, tcr2_el2_feat_map,
+ ARRAY_SIZE(tcr2_el2_feat_map), 0, 0);
+ *res0 |= TCR2_EL2_RES0;
+ *res1 = TCR2_EL2_RES1;
+ break;
+ case SCTLR_EL1:
+ *res0 = compute_res0_bits(kvm, sctlr_el1_feat_map,
+ ARRAY_SIZE(sctlr_el1_feat_map), 0, 0);
+ *res0 |= SCTLR_EL1_RES0;
+ *res1 = SCTLR_EL1_RES1;
+ break;
+ case MDCR_EL2:
+ *res0 = compute_res0_bits(kvm, mdcr_el2_feat_map,
+ ARRAY_SIZE(mdcr_el2_feat_map), 0, 0);
+ *res0 |= MDCR_EL2_RES0;
+ *res1 = MDCR_EL2_RES1;
+ break;
default:
WARN_ON_ONCE(1);
*res0 = *res1 = 0;
diff --git a/arch/arm64/kvm/emulate-nested.c b/arch/arm64/kvm/emulate-nested.c
index 3a384e9660b8..90cb4b7ae0ff 100644
--- a/arch/arm64/kvm/emulate-nested.c
+++ b/arch/arm64/kvm/emulate-nested.c
@@ -88,6 +88,7 @@ enum cgt_group_id {
CGT_HCRX_EnFPM,
CGT_HCRX_TCR2En,
+ CGT_HCRX_SCTLR2En,
CGT_CNTHCTL_EL1TVT,
CGT_CNTHCTL_EL1TVCT,
@@ -108,6 +109,7 @@ enum cgt_group_id {
CGT_HCR_TTLB_TTLBOS,
CGT_HCR_TVM_TRVM,
CGT_HCR_TVM_TRVM_HCRX_TCR2En,
+ CGT_HCR_TVM_TRVM_HCRX_SCTLR2En,
CGT_HCR_TPU_TICAB,
CGT_HCR_TPU_TOCU,
CGT_HCR_NV1_nNV2_ENSCXT,
@@ -398,6 +400,12 @@ static const struct trap_bits coarse_trap_bits[] = {
.mask = HCRX_EL2_TCR2En,
.behaviour = BEHAVE_FORWARD_RW,
},
+ [CGT_HCRX_SCTLR2En] = {
+ .index = HCRX_EL2,
+ .value = 0,
+ .mask = HCRX_EL2_SCTLR2En,
+ .behaviour = BEHAVE_FORWARD_RW,
+ },
[CGT_CNTHCTL_EL1TVT] = {
.index = CNTHCTL_EL2,
.value = CNTHCTL_EL1TVT,
@@ -449,6 +457,8 @@ static const enum cgt_group_id *coarse_control_combo[] = {
MCB(CGT_HCR_TVM_TRVM, CGT_HCR_TVM, CGT_HCR_TRVM),
MCB(CGT_HCR_TVM_TRVM_HCRX_TCR2En,
CGT_HCR_TVM, CGT_HCR_TRVM, CGT_HCRX_TCR2En),
+ MCB(CGT_HCR_TVM_TRVM_HCRX_SCTLR2En,
+ CGT_HCR_TVM, CGT_HCR_TRVM, CGT_HCRX_SCTLR2En),
MCB(CGT_HCR_TPU_TICAB, CGT_HCR_TPU, CGT_HCR_TICAB),
MCB(CGT_HCR_TPU_TOCU, CGT_HCR_TPU, CGT_HCR_TOCU),
MCB(CGT_HCR_NV1_nNV2_ENSCXT, CGT_HCR_NV1_nNV2, CGT_HCR_ENSCXT),
@@ -782,6 +792,7 @@ static const struct encoding_to_trap_config encoding_to_cgt[] __initconst = {
SR_TRAP(OP_TLBI_RVALE1OSNXS, CGT_HCR_TTLB_TTLBOS),
SR_TRAP(OP_TLBI_RVAALE1OSNXS, CGT_HCR_TTLB_TTLBOS),
SR_TRAP(SYS_SCTLR_EL1, CGT_HCR_TVM_TRVM),
+ SR_TRAP(SYS_SCTLR2_EL1, CGT_HCR_TVM_TRVM_HCRX_SCTLR2En),
SR_TRAP(SYS_TTBR0_EL1, CGT_HCR_TVM_TRVM),
SR_TRAP(SYS_TTBR1_EL1, CGT_HCR_TVM_TRVM),
SR_TRAP(SYS_TCR_EL1, CGT_HCR_TVM_TRVM),
@@ -1354,6 +1365,7 @@ static const struct encoding_to_trap_config encoding_to_fgt[] __initconst = {
SR_FGT(SYS_SCXTNUM_EL0, HFGRTR, SCXTNUM_EL0, 1),
SR_FGT(SYS_SCXTNUM_EL1, HFGRTR, SCXTNUM_EL1, 1),
SR_FGT(SYS_SCTLR_EL1, HFGRTR, SCTLR_EL1, 1),
+ SR_FGT(SYS_SCTLR2_EL1, HFGRTR, SCTLR_EL1, 1),
SR_FGT(SYS_REVIDR_EL1, HFGRTR, REVIDR_EL1, 1),
SR_FGT(SYS_PAR_EL1, HFGRTR, PAR_EL1, 1),
SR_FGT(SYS_MPIDR_EL1, HFGRTR, MPIDR_EL1, 1),
@@ -2592,13 +2604,8 @@ inject:
static bool __forward_traps(struct kvm_vcpu *vcpu, unsigned int reg, u64 control_bit)
{
- bool control_bit_set;
-
- if (!vcpu_has_nv(vcpu))
- return false;
-
- control_bit_set = __vcpu_sys_reg(vcpu, reg) & control_bit;
- if (!is_hyp_ctxt(vcpu) && control_bit_set) {
+ if (is_nested_ctxt(vcpu) &&
+ (__vcpu_sys_reg(vcpu, reg) & control_bit)) {
kvm_inject_nested_sync(vcpu, kvm_vcpu_get_esr(vcpu));
return true;
}
@@ -2719,6 +2726,9 @@ static void kvm_inject_el2_exception(struct kvm_vcpu *vcpu, u64 esr_el2,
case except_type_irq:
kvm_pend_exception(vcpu, EXCEPT_AA64_EL2_IRQ);
break;
+ case except_type_serror:
+ kvm_pend_exception(vcpu, EXCEPT_AA64_EL2_SERR);
+ break;
default:
WARN_ONCE(1, "Unsupported EL2 exception injection %d\n", type);
}
@@ -2816,3 +2826,28 @@ int kvm_inject_nested_irq(struct kvm_vcpu *vcpu)
/* esr_el2 value doesn't matter for exits due to irqs. */
return kvm_inject_nested(vcpu, 0, except_type_irq);
}
+
+int kvm_inject_nested_sea(struct kvm_vcpu *vcpu, bool iabt, u64 addr)
+{
+ u64 esr = FIELD_PREP(ESR_ELx_EC_MASK,
+ iabt ? ESR_ELx_EC_IABT_LOW : ESR_ELx_EC_DABT_LOW);
+ esr |= ESR_ELx_FSC_EXTABT | ESR_ELx_IL;
+
+ vcpu_write_sys_reg(vcpu, FAR_EL2, addr);
+
+ if (__vcpu_sys_reg(vcpu, SCTLR2_EL2) & SCTLR2_EL1_EASE)
+ return kvm_inject_nested(vcpu, esr, except_type_serror);
+
+ return kvm_inject_nested_sync(vcpu, esr);
+}
+
+int kvm_inject_nested_serror(struct kvm_vcpu *vcpu, u64 esr)
+{
+ /*
+ * Hardware sets up the EC field when propagating ESR as a result of
+ * vSError injection. Manually populate EC for an emulated SError
+ * exception.
+ */
+ esr |= FIELD_PREP(ESR_ELx_EC_MASK, ESR_ELx_EC_SERROR);
+ return kvm_inject_nested(vcpu, esr, except_type_serror);
+}
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index 2196979a24a3..16ba5e9ac86c 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -818,8 +818,9 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
int __kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu,
struct kvm_vcpu_events *events)
{
- events->exception.serror_pending = !!(vcpu->arch.hcr_el2 & HCR_VSE);
events->exception.serror_has_esr = cpus_have_final_cap(ARM64_HAS_RAS_EXTN);
+ events->exception.serror_pending = (vcpu->arch.hcr_el2 & HCR_VSE) ||
+ vcpu_get_flag(vcpu, NESTED_SERROR_PENDING);
if (events->exception.serror_pending && events->exception.serror_has_esr)
events->exception.serror_esr = vcpu_get_vsesr(vcpu);
@@ -833,29 +834,62 @@ int __kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu,
return 0;
}
+static void commit_pending_events(struct kvm_vcpu *vcpu)
+{
+ if (!vcpu_get_flag(vcpu, PENDING_EXCEPTION))
+ return;
+
+ /*
+ * Reset the MMIO emulation state to avoid stepping PC after emulating
+ * the exception entry.
+ */
+ vcpu->mmio_needed = false;
+ kvm_call_hyp(__kvm_adjust_pc, vcpu);
+}
+
int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu,
struct kvm_vcpu_events *events)
{
bool serror_pending = events->exception.serror_pending;
bool has_esr = events->exception.serror_has_esr;
bool ext_dabt_pending = events->exception.ext_dabt_pending;
+ u64 esr = events->exception.serror_esr;
+ int ret = 0;
- if (serror_pending && has_esr) {
- if (!cpus_have_final_cap(ARM64_HAS_RAS_EXTN))
- return -EINVAL;
-
- if (!((events->exception.serror_esr) & ~ESR_ELx_ISS_MASK))
- kvm_set_sei_esr(vcpu, events->exception.serror_esr);
- else
- return -EINVAL;
- } else if (serror_pending) {
- kvm_inject_vabt(vcpu);
+ /*
+ * Immediately commit the pending SEA to the vCPU's architectural
+ * state which is necessary since we do not return a pending SEA
+ * to userspace via KVM_GET_VCPU_EVENTS.
+ */
+ if (ext_dabt_pending) {
+ ret = kvm_inject_sea_dabt(vcpu, kvm_vcpu_get_hfar(vcpu));
+ commit_pending_events(vcpu);
}
- if (ext_dabt_pending)
- kvm_inject_dabt(vcpu, kvm_vcpu_get_hfar(vcpu));
+ if (ret < 0)
+ return ret;
- return 0;
+ if (!serror_pending)
+ return 0;
+
+ if (!cpus_have_final_cap(ARM64_HAS_RAS_EXTN) && has_esr)
+ return -EINVAL;
+
+ if (has_esr && (esr & ~ESR_ELx_ISS_MASK))
+ return -EINVAL;
+
+ if (has_esr)
+ ret = kvm_inject_serror_esr(vcpu, esr);
+ else
+ ret = kvm_inject_serror(vcpu);
+
+ /*
+ * We could've decided that the SError is due for immediate software
+ * injection; commit the exception in case userspace decides it wants
+ * to inject more exceptions for some strange reason.
+ */
+ commit_pending_events(vcpu);
+ return (ret < 0) ? ret : 0;
}
u32 __attribute_const__ kvm_target_cpu(void)
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index 453266c96481..a598072f36d2 100644
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -32,7 +32,7 @@ typedef int (*exit_handle_fn)(struct kvm_vcpu *);
static void kvm_handle_guest_serror(struct kvm_vcpu *vcpu, u64 esr)
{
if (!arm64_is_ras_serror(esr) || arm64_is_fatal_ras_serror(NULL, esr))
- kvm_inject_vabt(vcpu);
+ kvm_inject_serror(vcpu);
}
static int handle_hvc(struct kvm_vcpu *vcpu)
@@ -252,7 +252,7 @@ static int kvm_handle_ptrauth(struct kvm_vcpu *vcpu)
return 1;
}
- if (vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu)) {
+ if (is_nested_ctxt(vcpu)) {
kvm_inject_nested_sync(vcpu, kvm_vcpu_get_esr(vcpu));
return 1;
}
@@ -311,12 +311,11 @@ static int kvm_handle_gcs(struct kvm_vcpu *vcpu)
static int handle_other(struct kvm_vcpu *vcpu)
{
- bool is_l2 = vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu);
+ bool allowed, fwd = is_nested_ctxt(vcpu);
u64 hcrx = __vcpu_sys_reg(vcpu, HCRX_EL2);
u64 esr = kvm_vcpu_get_esr(vcpu);
u64 iss = ESR_ELx_ISS(esr);
struct kvm *kvm = vcpu->kvm;
- bool allowed, fwd = false;
/*
* We only trap for two reasons:
@@ -335,28 +334,23 @@ static int handle_other(struct kvm_vcpu *vcpu)
switch (iss) {
case ESR_ELx_ISS_OTHER_ST64BV:
allowed = kvm_has_feat(kvm, ID_AA64ISAR1_EL1, LS64, LS64_V);
- if (is_l2)
- fwd = !(hcrx & HCRX_EL2_EnASR);
+ fwd &= !(hcrx & HCRX_EL2_EnASR);
break;
case ESR_ELx_ISS_OTHER_ST64BV0:
allowed = kvm_has_feat(kvm, ID_AA64ISAR1_EL1, LS64, LS64_ACCDATA);
- if (is_l2)
- fwd = !(hcrx & HCRX_EL2_EnAS0);
+ fwd &= !(hcrx & HCRX_EL2_EnAS0);
break;
case ESR_ELx_ISS_OTHER_LDST64B:
allowed = kvm_has_feat(kvm, ID_AA64ISAR1_EL1, LS64, LS64);
- if (is_l2)
- fwd = !(hcrx & HCRX_EL2_EnALS);
+ fwd &= !(hcrx & HCRX_EL2_EnALS);
break;
case ESR_ELx_ISS_OTHER_TSBCSYNC:
allowed = kvm_has_feat(kvm, ID_AA64DFR0_EL1, TraceBuffer, TRBE_V1P1);
- if (is_l2)
- fwd = (__vcpu_sys_reg(vcpu, HFGITR2_EL2) & HFGITR2_EL2_TSBCSYNC);
+ fwd &= (__vcpu_sys_reg(vcpu, HFGITR2_EL2) & HFGITR2_EL2_TSBCSYNC);
break;
case ESR_ELx_ISS_OTHER_PSBCSYNC:
allowed = kvm_has_feat(kvm, ID_AA64DFR0_EL1, PMSVer, V1P5);
- if (is_l2)
- fwd = (__vcpu_sys_reg(vcpu, HFGITR_EL2) & HFGITR_EL2_PSBCSYNC);
+ fwd &= (__vcpu_sys_reg(vcpu, HFGITR_EL2) & HFGITR_EL2_PSBCSYNC);
break;
default:
/* Clearly, we're missing something. */
@@ -496,7 +490,7 @@ void handle_exit_early(struct kvm_vcpu *vcpu, int exception_index)
kvm_handle_guest_serror(vcpu, disr_to_esr(disr));
} else {
- kvm_inject_vabt(vcpu);
+ kvm_inject_serror(vcpu);
}
return;
diff --git a/arch/arm64/kvm/hyp/exception.c b/arch/arm64/kvm/hyp/exception.c
index 6a2a899a344e..95d186e0bf54 100644
--- a/arch/arm64/kvm/hyp/exception.c
+++ b/arch/arm64/kvm/hyp/exception.c
@@ -26,7 +26,8 @@ static inline u64 __vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg)
if (unlikely(vcpu_has_nv(vcpu)))
return vcpu_read_sys_reg(vcpu, reg);
- else if (__vcpu_read_sys_reg_from_cpu(reg, &val))
+ else if (vcpu_get_flag(vcpu, SYSREGS_ON_CPU) &&
+ __vcpu_read_sys_reg_from_cpu(reg, &val))
return val;
return __vcpu_sys_reg(vcpu, reg);
@@ -36,7 +37,8 @@ static inline void __vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg)
{
if (unlikely(vcpu_has_nv(vcpu)))
vcpu_write_sys_reg(vcpu, val, reg);
- else if (!__vcpu_write_sys_reg_to_cpu(val, reg))
+ else if (!vcpu_get_flag(vcpu, SYSREGS_ON_CPU) ||
+ !__vcpu_write_sys_reg_to_cpu(val, reg))
__vcpu_assign_sys_reg(vcpu, reg, val);
}
@@ -339,6 +341,10 @@ static void kvm_inject_exception(struct kvm_vcpu *vcpu)
enter_exception64(vcpu, PSR_MODE_EL1h, except_type_sync);
break;
+ case unpack_vcpu_flag(EXCEPT_AA64_EL1_SERR):
+ enter_exception64(vcpu, PSR_MODE_EL1h, except_type_serror);
+ break;
+
case unpack_vcpu_flag(EXCEPT_AA64_EL2_SYNC):
enter_exception64(vcpu, PSR_MODE_EL2h, except_type_sync);
break;
@@ -347,9 +353,13 @@ static void kvm_inject_exception(struct kvm_vcpu *vcpu)
enter_exception64(vcpu, PSR_MODE_EL2h, except_type_irq);
break;
+ case unpack_vcpu_flag(EXCEPT_AA64_EL2_SERR):
+ enter_exception64(vcpu, PSR_MODE_EL2h, except_type_serror);
+ break;
+
default:
/*
- * Only EL1_SYNC and EL2_{SYNC,IRQ} makes
+ * Only EL1_{SYNC,SERR} and EL2_{SYNC,IRQ,SERR} makes
* sense so far. Everything else gets silently
* ignored.
*/
diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h
index 2ad57b117385..84ec4e100fbb 100644
--- a/arch/arm64/kvm/hyp/include/hyp/switch.h
+++ b/arch/arm64/kvm/hyp/include/hyp/switch.h
@@ -298,7 +298,7 @@ static inline void __deactivate_cptr_traps(struct kvm_vcpu *vcpu)
u64 val; \
\
ctxt_sys_reg(hctxt, reg) = read_sysreg_s(SYS_ ## reg); \
- if (vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu)) \
+ if (is_nested_ctxt(vcpu)) \
compute_clr_set(vcpu, reg, c, s); \
\
compute_undef_clr_set(vcpu, kvm, reg, c, s); \
@@ -436,7 +436,7 @@ static inline void __activate_traps_common(struct kvm_vcpu *vcpu)
if (cpus_have_final_cap(ARM64_HAS_HCX)) {
u64 hcrx = vcpu->arch.hcrx_el2;
- if (vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu)) {
+ if (is_nested_ctxt(vcpu)) {
u64 val = __vcpu_sys_reg(vcpu, HCRX_EL2);
hcrx |= val & __HCRX_EL2_MASK;
hcrx &= ~(~val & __HCRX_EL2_nMASK);
@@ -476,21 +476,56 @@ static inline void ___activate_traps(struct kvm_vcpu *vcpu, u64 hcr)
write_sysreg_hcr(hcr);
- if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN) && (hcr & HCR_VSE))
- write_sysreg_s(vcpu->arch.vsesr_el2, SYS_VSESR_EL2);
+ if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN) && (hcr & HCR_VSE)) {
+ u64 vsesr;
+
+ /*
+ * When HCR_EL2.AMO is set, physical SErrors are taken to EL2
+ * and vSError injection is enabled for EL1. Conveniently, for
+ * NV this means that it is never the case where a 'physical'
+ * SError (injected by KVM or userspace) and vSError are
+ * deliverable to the same context.
+ *
+ * As such, we can trivially select between the host or guest's
+ * VSESR_EL2. Except for the case that FEAT_RAS hasn't been
+ * exposed to the guest, where ESR propagation in hardware
+ * occurs unconditionally.
+ *
+ * Paper over the architectural wart and use an IMPLEMENTATION
+ * DEFINED ESR value in case FEAT_RAS is hidden from the guest.
+ */
+ if (!vserror_state_is_nested(vcpu))
+ vsesr = vcpu->arch.vsesr_el2;
+ else if (kvm_has_ras(kern_hyp_va(vcpu->kvm)))
+ vsesr = __vcpu_sys_reg(vcpu, VSESR_EL2);
+ else
+ vsesr = ESR_ELx_ISV;
+
+ write_sysreg_s(vsesr, SYS_VSESR_EL2);
+ }
}
static inline void ___deactivate_traps(struct kvm_vcpu *vcpu)
{
+ u64 *hcr;
+
+ if (vserror_state_is_nested(vcpu))
+ hcr = __ctxt_sys_reg(&vcpu->arch.ctxt, HCR_EL2);
+ else
+ hcr = &vcpu->arch.hcr_el2;
+
/*
* If we pended a virtual abort, preserve it until it gets
* cleared. See D1.14.3 (Virtual Interrupts) for details, but
* the crucial bit is "On taking a vSError interrupt,
* HCR_EL2.VSE is cleared to 0."
+ *
+ * Additionally, when in a nested context we need to propagate the
+ * updated state to the guest hypervisor's HCR_EL2.
*/
- if (vcpu->arch.hcr_el2 & HCR_VSE) {
- vcpu->arch.hcr_el2 &= ~HCR_VSE;
- vcpu->arch.hcr_el2 |= read_sysreg(hcr_el2) & HCR_VSE;
+ if (*hcr & HCR_VSE) {
+ *hcr &= ~HCR_VSE;
+ *hcr |= read_sysreg(hcr_el2) & HCR_VSE;
}
}
@@ -531,7 +566,7 @@ static inline void __hyp_sve_restore_guest(struct kvm_vcpu *vcpu)
* nested guest, as the guest hypervisor could select a smaller VL. Slap
* that into hardware before wrapping up.
*/
- if (vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu))
+ if (is_nested_ctxt(vcpu))
sve_cond_update_zcr_vq(__vcpu_sys_reg(vcpu, ZCR_EL2), SYS_ZCR_EL2);
write_sysreg_el1(__vcpu_sys_reg(vcpu, vcpu_sve_zcr_elx(vcpu)), SYS_ZCR);
@@ -557,7 +592,7 @@ static inline void fpsimd_lazy_switch_to_guest(struct kvm_vcpu *vcpu)
if (vcpu_has_sve(vcpu)) {
/* A guest hypervisor may restrict the effective max VL. */
- if (vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu))
+ if (is_nested_ctxt(vcpu))
zcr_el2 = __vcpu_sys_reg(vcpu, ZCR_EL2);
else
zcr_el2 = vcpu_sve_max_vq(vcpu) - 1;
diff --git a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
index 4d0dbea4c56f..a17cbe7582de 100644
--- a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
+++ b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
@@ -109,6 +109,28 @@ static inline bool ctxt_has_s1poe(struct kvm_cpu_context *ctxt)
return kvm_has_s1poe(kern_hyp_va(vcpu->kvm));
}
+static inline bool ctxt_has_ras(struct kvm_cpu_context *ctxt)
+{
+ struct kvm_vcpu *vcpu;
+
+ if (!cpus_have_final_cap(ARM64_HAS_RAS_EXTN))
+ return false;
+
+ vcpu = ctxt_to_vcpu(ctxt);
+ return kvm_has_ras(kern_hyp_va(vcpu->kvm));
+}
+
+static inline bool ctxt_has_sctlr2(struct kvm_cpu_context *ctxt)
+{
+ struct kvm_vcpu *vcpu;
+
+ if (!cpus_have_final_cap(ARM64_HAS_SCTLR2))
+ return false;
+
+ vcpu = ctxt_to_vcpu(ctxt);
+ return kvm_has_sctlr2(kern_hyp_va(vcpu->kvm));
+}
+
static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt)
{
ctxt_sys_reg(ctxt, SCTLR_EL1) = read_sysreg_el1(SYS_SCTLR);
@@ -147,6 +169,9 @@ static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt)
ctxt_sys_reg(ctxt, SP_EL1) = read_sysreg(sp_el1);
ctxt_sys_reg(ctxt, ELR_EL1) = read_sysreg_el1(SYS_ELR);
ctxt_sys_reg(ctxt, SPSR_EL1) = read_sysreg_el1(SYS_SPSR);
+
+ if (ctxt_has_sctlr2(ctxt))
+ ctxt_sys_reg(ctxt, SCTLR2_EL1) = read_sysreg_el1(SYS_SCTLR2);
}
static inline void __sysreg_save_el2_return_state(struct kvm_cpu_context *ctxt)
@@ -159,8 +184,13 @@ static inline void __sysreg_save_el2_return_state(struct kvm_cpu_context *ctxt)
if (!has_vhe() && ctxt->__hyp_running_vcpu)
ctxt->regs.pstate = read_sysreg_el2(SYS_SPSR);
- if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN))
+ if (!cpus_have_final_cap(ARM64_HAS_RAS_EXTN))
+ return;
+
+ if (!vserror_state_is_nested(ctxt_to_vcpu(ctxt)))
ctxt_sys_reg(ctxt, DISR_EL1) = read_sysreg_s(SYS_VDISR_EL2);
+ else if (ctxt_has_ras(ctxt))
+ ctxt_sys_reg(ctxt, VDISR_EL2) = read_sysreg_s(SYS_VDISR_EL2);
}
static inline void __sysreg_restore_common_state(struct kvm_cpu_context *ctxt)
@@ -252,6 +282,9 @@ static inline void __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt,
write_sysreg(ctxt_sys_reg(ctxt, SP_EL1), sp_el1);
write_sysreg_el1(ctxt_sys_reg(ctxt, ELR_EL1), SYS_ELR);
write_sysreg_el1(ctxt_sys_reg(ctxt, SPSR_EL1), SYS_SPSR);
+
+ if (ctxt_has_sctlr2(ctxt))
+ write_sysreg_el1(ctxt_sys_reg(ctxt, SCTLR2_EL1), SYS_SCTLR2);
}
/* Read the VCPU state's PSTATE, but translate (v)EL2 to EL1. */
@@ -275,6 +308,7 @@ static inline void __sysreg_restore_el2_return_state(struct kvm_cpu_context *ctx
{
u64 pstate = to_hw_pstate(ctxt);
u64 mode = pstate & PSR_AA32_MODE_MASK;
+ u64 vdisr;
/*
* Safety check to ensure we're setting the CPU up to enter the guest
@@ -293,8 +327,17 @@ static inline void __sysreg_restore_el2_return_state(struct kvm_cpu_context *ctx
write_sysreg_el2(ctxt->regs.pc, SYS_ELR);
write_sysreg_el2(pstate, SYS_SPSR);
- if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN))
- write_sysreg_s(ctxt_sys_reg(ctxt, DISR_EL1), SYS_VDISR_EL2);
+ if (!cpus_have_final_cap(ARM64_HAS_RAS_EXTN))
+ return;
+
+ if (!vserror_state_is_nested(ctxt_to_vcpu(ctxt)))
+ vdisr = ctxt_sys_reg(ctxt, DISR_EL1);
+ else if (ctxt_has_ras(ctxt))
+ vdisr = ctxt_sys_reg(ctxt, VDISR_EL2);
+ else
+ vdisr = 0;
+
+ write_sysreg_s(vdisr, SYS_VDISR_EL2);
}
static inline void __sysreg32_save_state(struct kvm_vcpu *vcpu)
diff --git a/arch/arm64/kvm/hyp/vgic-v3-sr.c b/arch/arm64/kvm/hyp/vgic-v3-sr.c
index f162b0df5cae..d81275790e69 100644
--- a/arch/arm64/kvm/hyp/vgic-v3-sr.c
+++ b/arch/arm64/kvm/hyp/vgic-v3-sr.c
@@ -296,12 +296,19 @@ void __vgic_v3_activate_traps(struct vgic_v3_cpu_if *cpu_if)
}
/*
- * Prevent the guest from touching the ICC_SRE_EL1 system
- * register. Note that this may not have any effect, as
- * ICC_SRE_EL2.Enable being RAO/WI is a valid implementation.
+ * GICv5 BET0 FEAT_GCIE_LEGACY doesn't include ICC_SRE_EL2. This is due
+ * to be relaxed in a future spec release, at which point this in
+ * condition can be dropped.
*/
- write_gicreg(read_gicreg(ICC_SRE_EL2) & ~ICC_SRE_EL2_ENABLE,
- ICC_SRE_EL2);
+ if (!cpus_have_final_cap(ARM64_HAS_GICV5_CPUIF)) {
+ /*
+ * Prevent the guest from touching the ICC_SRE_EL1 system
+ * register. Note that this may not have any effect, as
+ * ICC_SRE_EL2.Enable being RAO/WI is a valid implementation.
+ */
+ write_gicreg(read_gicreg(ICC_SRE_EL2) & ~ICC_SRE_EL2_ENABLE,
+ ICC_SRE_EL2);
+ }
/*
* If we need to trap system registers, we must write
@@ -322,8 +329,14 @@ void __vgic_v3_deactivate_traps(struct vgic_v3_cpu_if *cpu_if)
cpu_if->vgic_vmcr = read_gicreg(ICH_VMCR_EL2);
}
- val = read_gicreg(ICC_SRE_EL2);
- write_gicreg(val | ICC_SRE_EL2_ENABLE, ICC_SRE_EL2);
+ /*
+ * Can be dropped in the future when GICv5 spec is relaxed. See comment
+ * above.
+ */
+ if (!cpus_have_final_cap(ARM64_HAS_GICV5_CPUIF)) {
+ val = read_gicreg(ICC_SRE_EL2);
+ write_gicreg(val | ICC_SRE_EL2_ENABLE, ICC_SRE_EL2);
+ }
if (!cpu_if->vgic_sre) {
/* Make sure ENABLE is set at EL2 before setting SRE at EL1 */
@@ -423,10 +436,20 @@ void __vgic_v3_init_lrs(void)
*/
u64 __vgic_v3_get_gic_config(void)
{
- u64 val, sre = read_gicreg(ICC_SRE_EL1);
+ u64 val, sre;
unsigned long flags = 0;
/*
+ * In compat mode, we cannot access ICC_SRE_EL1 at any EL
+ * other than EL1 itself; just return the
+ * ICH_VTR_EL2. ICC_IDR0_EL1 is only implemented on a GICv5
+ * system, so we first check if we have GICv5 support.
+ */
+ if (cpus_have_final_cap(ARM64_HAS_GICV5_CPUIF))
+ return read_gicreg(ICH_VTR_EL2);
+
+ sre = read_gicreg(ICC_SRE_EL1);
+ /*
* To check whether we have a MMIO-based (GICv2 compatible)
* CPU interface, we need to disable the system register
* view.
@@ -471,6 +494,16 @@ u64 __vgic_v3_get_gic_config(void)
return val;
}
+static void __vgic_v3_compat_mode_enable(void)
+{
+ if (!cpus_have_final_cap(ARM64_HAS_GICV5_CPUIF))
+ return;
+
+ sysreg_clear_set_s(SYS_ICH_VCTLR_EL2, 0, ICH_VCTLR_EL2_V3);
+ /* Wait for V3 to become enabled */
+ isb();
+}
+
static u64 __vgic_v3_read_vmcr(void)
{
return read_gicreg(ICH_VMCR_EL2);
@@ -490,6 +523,8 @@ void __vgic_v3_save_vmcr_aprs(struct vgic_v3_cpu_if *cpu_if)
void __vgic_v3_restore_vmcr_aprs(struct vgic_v3_cpu_if *cpu_if)
{
+ __vgic_v3_compat_mode_enable();
+
/*
* If dealing with a GICv2 emulation on GICv3, VMCR_EL2.VFIQen
* is dependent on ICC_SRE_EL1.SRE, and we have to perform the
@@ -1050,7 +1085,7 @@ static bool __vgic_v3_check_trap_forwarding(struct kvm_vcpu *vcpu,
{
u64 ich_hcr;
- if (!vcpu_has_nv(vcpu) || is_hyp_ctxt(vcpu))
+ if (!is_nested_ctxt(vcpu))
return false;
ich_hcr = __vcpu_sys_reg(vcpu, ICH_HCR_EL2);
diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c
index 477f1580ffea..e482181c6632 100644
--- a/arch/arm64/kvm/hyp/vhe/switch.c
+++ b/arch/arm64/kvm/hyp/vhe/switch.c
@@ -48,8 +48,7 @@ DEFINE_PER_CPU(unsigned long, kvm_hyp_vector);
static u64 __compute_hcr(struct kvm_vcpu *vcpu)
{
- u64 guest_hcr = __vcpu_sys_reg(vcpu, HCR_EL2);
- u64 hcr = vcpu->arch.hcr_el2;
+ u64 guest_hcr, hcr = vcpu->arch.hcr_el2;
if (!vcpu_has_nv(vcpu))
return hcr;
@@ -68,10 +67,21 @@ static u64 __compute_hcr(struct kvm_vcpu *vcpu)
if (!vcpu_el2_e2h_is_set(vcpu))
hcr |= HCR_NV1;
+ /*
+ * Nothing in HCR_EL2 should impact running in hypervisor
+ * context, apart from bits we have defined as RESx (E2H,
+ * HCD and co), or that cannot be set directly (the EXCLUDE
+ * bits). Given that we OR the guest's view with the host's,
+ * we can use the 0 value as the starting point, and only
+ * use the config-driven RES1 bits.
+ */
+ guest_hcr = kvm_vcpu_apply_reg_masks(vcpu, HCR_EL2, 0);
+
write_sysreg_s(vcpu->arch.ctxt.vncr_array, SYS_VNCR_EL2);
} else {
host_data_clear_flag(VCPU_IN_HYP_CONTEXT);
+ guest_hcr = __vcpu_sys_reg(vcpu, HCR_EL2);
if (guest_hcr & HCR_NV) {
u64 va = __fix_to_virt(vncr_fixmap(smp_processor_id()));
diff --git a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c
index 73e4bc7fde9e..f28c6cf4fe1b 100644
--- a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c
+++ b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c
@@ -77,6 +77,9 @@ static void __sysreg_save_vel2_state(struct kvm_vcpu *vcpu)
__vcpu_assign_sys_reg(vcpu, SP_EL2, read_sysreg(sp_el1));
__vcpu_assign_sys_reg(vcpu, ELR_EL2, read_sysreg_el1(SYS_ELR));
__vcpu_assign_sys_reg(vcpu, SPSR_EL2, read_sysreg_el1(SYS_SPSR));
+
+ if (ctxt_has_sctlr2(&vcpu->arch.ctxt))
+ __vcpu_assign_sys_reg(vcpu, SCTLR2_EL2, read_sysreg_el1(SYS_SCTLR2));
}
static void __sysreg_restore_vel2_state(struct kvm_vcpu *vcpu)
@@ -139,6 +142,9 @@ static void __sysreg_restore_vel2_state(struct kvm_vcpu *vcpu)
write_sysreg(__vcpu_sys_reg(vcpu, SP_EL2), sp_el1);
write_sysreg_el1(__vcpu_sys_reg(vcpu, ELR_EL2), SYS_ELR);
write_sysreg_el1(__vcpu_sys_reg(vcpu, SPSR_EL2), SYS_SPSR);
+
+ if (ctxt_has_sctlr2(&vcpu->arch.ctxt))
+ write_sysreg_el1(__vcpu_sys_reg(vcpu, SCTLR2_EL2), SYS_SCTLR2);
}
/*
diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c
index a640e839848e..6745f38b64f9 100644
--- a/arch/arm64/kvm/inject_fault.c
+++ b/arch/arm64/kvm/inject_fault.c
@@ -15,13 +15,11 @@
#include <asm/kvm_nested.h>
#include <asm/esr.h>
-static void pend_sync_exception(struct kvm_vcpu *vcpu)
+static unsigned int exception_target_el(struct kvm_vcpu *vcpu)
{
/* If not nesting, EL1 is the only possible exception target */
- if (likely(!vcpu_has_nv(vcpu))) {
- kvm_pend_exception(vcpu, EXCEPT_AA64_EL1_SYNC);
- return;
- }
+ if (likely(!vcpu_has_nv(vcpu)))
+ return PSR_MODE_EL1h;
/*
* With NV, we need to pick between EL1 and EL2. Note that we
@@ -32,26 +30,76 @@ static void pend_sync_exception(struct kvm_vcpu *vcpu)
switch(*vcpu_cpsr(vcpu) & PSR_MODE_MASK) {
case PSR_MODE_EL2h:
case PSR_MODE_EL2t:
- kvm_pend_exception(vcpu, EXCEPT_AA64_EL2_SYNC);
- break;
+ return PSR_MODE_EL2h;
case PSR_MODE_EL1h:
case PSR_MODE_EL1t:
- kvm_pend_exception(vcpu, EXCEPT_AA64_EL1_SYNC);
- break;
+ return PSR_MODE_EL1h;
case PSR_MODE_EL0t:
- if (vcpu_el2_tge_is_set(vcpu))
- kvm_pend_exception(vcpu, EXCEPT_AA64_EL2_SYNC);
- else
- kvm_pend_exception(vcpu, EXCEPT_AA64_EL1_SYNC);
- break;
+ return vcpu_el2_tge_is_set(vcpu) ? PSR_MODE_EL2h : PSR_MODE_EL1h;
default:
BUG();
}
}
-static bool match_target_el(struct kvm_vcpu *vcpu, unsigned long target)
+static enum vcpu_sysreg exception_esr_elx(struct kvm_vcpu *vcpu)
+{
+ if (exception_target_el(vcpu) == PSR_MODE_EL2h)
+ return ESR_EL2;
+
+ return ESR_EL1;
+}
+
+static enum vcpu_sysreg exception_far_elx(struct kvm_vcpu *vcpu)
+{
+ if (exception_target_el(vcpu) == PSR_MODE_EL2h)
+ return FAR_EL2;
+
+ return FAR_EL1;
+}
+
+static void pend_sync_exception(struct kvm_vcpu *vcpu)
+{
+ if (exception_target_el(vcpu) == PSR_MODE_EL1h)
+ kvm_pend_exception(vcpu, EXCEPT_AA64_EL1_SYNC);
+ else
+ kvm_pend_exception(vcpu, EXCEPT_AA64_EL2_SYNC);
+}
+
+static void pend_serror_exception(struct kvm_vcpu *vcpu)
{
- return (vcpu_get_flag(vcpu, EXCEPT_MASK) == target);
+ if (exception_target_el(vcpu) == PSR_MODE_EL1h)
+ kvm_pend_exception(vcpu, EXCEPT_AA64_EL1_SERR);
+ else
+ kvm_pend_exception(vcpu, EXCEPT_AA64_EL2_SERR);
+}
+
+static bool __effective_sctlr2_bit(struct kvm_vcpu *vcpu, unsigned int idx)
+{
+ u64 sctlr2;
+
+ if (!kvm_has_sctlr2(vcpu->kvm))
+ return false;
+
+ if (is_nested_ctxt(vcpu) &&
+ !(__vcpu_sys_reg(vcpu, HCRX_EL2) & HCRX_EL2_SCTLR2En))
+ return false;
+
+ if (exception_target_el(vcpu) == PSR_MODE_EL1h)
+ sctlr2 = vcpu_read_sys_reg(vcpu, SCTLR2_EL1);
+ else
+ sctlr2 = vcpu_read_sys_reg(vcpu, SCTLR2_EL2);
+
+ return sctlr2 & BIT(idx);
+}
+
+static bool effective_sctlr2_ease(struct kvm_vcpu *vcpu)
+{
+ return __effective_sctlr2_bit(vcpu, SCTLR2_EL1_EASE_SHIFT);
+}
+
+static bool effective_sctlr2_nmea(struct kvm_vcpu *vcpu)
+{
+ return __effective_sctlr2_bit(vcpu, SCTLR2_EL1_NMEA_SHIFT);
}
static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr)
@@ -60,7 +108,11 @@ static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr
bool is_aarch32 = vcpu_mode_is_32bit(vcpu);
u64 esr = 0;
- pend_sync_exception(vcpu);
+ /* This delight is brought to you by FEAT_DoubleFault2. */
+ if (effective_sctlr2_ease(vcpu))
+ pend_serror_exception(vcpu);
+ else
+ pend_sync_exception(vcpu);
/*
* Build an {i,d}abort, depending on the level and the
@@ -83,13 +135,8 @@ static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr
esr |= ESR_ELx_FSC_EXTABT;
- if (match_target_el(vcpu, unpack_vcpu_flag(EXCEPT_AA64_EL1_SYNC))) {
- vcpu_write_sys_reg(vcpu, addr, FAR_EL1);
- vcpu_write_sys_reg(vcpu, esr, ESR_EL1);
- } else {
- vcpu_write_sys_reg(vcpu, addr, FAR_EL2);
- vcpu_write_sys_reg(vcpu, esr, ESR_EL2);
- }
+ vcpu_write_sys_reg(vcpu, addr, exception_far_elx(vcpu));
+ vcpu_write_sys_reg(vcpu, esr, exception_esr_elx(vcpu));
}
static void inject_undef64(struct kvm_vcpu *vcpu)
@@ -105,10 +152,7 @@ static void inject_undef64(struct kvm_vcpu *vcpu)
if (kvm_vcpu_trap_il_is32bit(vcpu))
esr |= ESR_ELx_IL;
- if (match_target_el(vcpu, unpack_vcpu_flag(EXCEPT_AA64_EL1_SYNC)))
- vcpu_write_sys_reg(vcpu, esr, ESR_EL1);
- else
- vcpu_write_sys_reg(vcpu, esr, ESR_EL2);
+ vcpu_write_sys_reg(vcpu, esr, exception_esr_elx(vcpu));
}
#define DFSR_FSC_EXTABT_LPAE 0x10
@@ -155,36 +199,35 @@ static void inject_abt32(struct kvm_vcpu *vcpu, bool is_pabt, u32 addr)
vcpu_write_sys_reg(vcpu, far, FAR_EL1);
}
-/**
- * kvm_inject_dabt - inject a data abort into the guest
- * @vcpu: The VCPU to receive the data abort
- * @addr: The address to report in the DFAR
- *
- * It is assumed that this code is called from the VCPU thread and that the
- * VCPU therefore is not currently executing guest code.
- */
-void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr)
+static void __kvm_inject_sea(struct kvm_vcpu *vcpu, bool iabt, u64 addr)
{
if (vcpu_el1_is_32bit(vcpu))
- inject_abt32(vcpu, false, addr);
+ inject_abt32(vcpu, iabt, addr);
else
- inject_abt64(vcpu, false, addr);
+ inject_abt64(vcpu, iabt, addr);
}
-/**
- * kvm_inject_pabt - inject a prefetch abort into the guest
- * @vcpu: The VCPU to receive the prefetch abort
- * @addr: The address to report in the DFAR
- *
- * It is assumed that this code is called from the VCPU thread and that the
- * VCPU therefore is not currently executing guest code.
- */
-void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr)
+static bool kvm_sea_target_is_el2(struct kvm_vcpu *vcpu)
{
- if (vcpu_el1_is_32bit(vcpu))
- inject_abt32(vcpu, true, addr);
- else
- inject_abt64(vcpu, true, addr);
+ if (__vcpu_sys_reg(vcpu, HCR_EL2) & (HCR_TGE | HCR_TEA))
+ return true;
+
+ if (!vcpu_mode_priv(vcpu))
+ return false;
+
+ return (*vcpu_cpsr(vcpu) & PSR_A_BIT) &&
+ (__vcpu_sys_reg(vcpu, HCRX_EL2) & HCRX_EL2_TMEA);
+}
+
+int kvm_inject_sea(struct kvm_vcpu *vcpu, bool iabt, u64 addr)
+{
+ lockdep_assert_held(&vcpu->mutex);
+
+ if (is_nested_ctxt(vcpu) && kvm_sea_target_is_el2(vcpu))
+ return kvm_inject_nested_sea(vcpu, iabt, addr);
+
+ __kvm_inject_sea(vcpu, iabt, addr);
+ return 1;
}
void kvm_inject_size_fault(struct kvm_vcpu *vcpu)
@@ -194,10 +237,7 @@ void kvm_inject_size_fault(struct kvm_vcpu *vcpu)
addr = kvm_vcpu_get_fault_ipa(vcpu);
addr |= kvm_vcpu_get_hfar(vcpu) & GENMASK(11, 0);
- if (kvm_vcpu_trap_is_iabt(vcpu))
- kvm_inject_pabt(vcpu, addr);
- else
- kvm_inject_dabt(vcpu, addr);
+ __kvm_inject_sea(vcpu, kvm_vcpu_trap_is_iabt(vcpu), addr);
/*
* If AArch64 or LPAE, set FSC to 0 to indicate an Address
@@ -210,9 +250,9 @@ void kvm_inject_size_fault(struct kvm_vcpu *vcpu)
!(vcpu_read_sys_reg(vcpu, TCR_EL1) & TTBCR_EAE))
return;
- esr = vcpu_read_sys_reg(vcpu, ESR_EL1);
+ esr = vcpu_read_sys_reg(vcpu, exception_esr_elx(vcpu));
esr &= ~GENMASK_ULL(5, 0);
- vcpu_write_sys_reg(vcpu, esr, ESR_EL1);
+ vcpu_write_sys_reg(vcpu, esr, exception_esr_elx(vcpu));
}
/**
@@ -230,25 +270,70 @@ void kvm_inject_undefined(struct kvm_vcpu *vcpu)
inject_undef64(vcpu);
}
-void kvm_set_sei_esr(struct kvm_vcpu *vcpu, u64 esr)
+static bool serror_is_masked(struct kvm_vcpu *vcpu)
{
- vcpu_set_vsesr(vcpu, esr & ESR_ELx_ISS_MASK);
- *vcpu_hcr(vcpu) |= HCR_VSE;
+ return (*vcpu_cpsr(vcpu) & PSR_A_BIT) && !effective_sctlr2_nmea(vcpu);
}
-/**
- * kvm_inject_vabt - inject an async abort / SError into the guest
- * @vcpu: The VCPU to receive the exception
- *
- * It is assumed that this code is called from the VCPU thread and that the
- * VCPU therefore is not currently executing guest code.
- *
- * Systems with the RAS Extensions specify an imp-def ESR (ISV/IDS = 1) with
- * the remaining ISS all-zeros so that this error is not interpreted as an
- * uncategorized RAS error. Without the RAS Extensions we can't specify an ESR
- * value, so the CPU generates an imp-def value.
- */
-void kvm_inject_vabt(struct kvm_vcpu *vcpu)
+static bool kvm_serror_target_is_el2(struct kvm_vcpu *vcpu)
+{
+ if (is_hyp_ctxt(vcpu) || vcpu_el2_amo_is_set(vcpu))
+ return true;
+
+ if (!(__vcpu_sys_reg(vcpu, HCRX_EL2) & HCRX_EL2_TMEA))
+ return false;
+
+ /*
+ * In another example where FEAT_DoubleFault2 is entirely backwards,
+ * "masked" as it relates to the routing effects of HCRX_EL2.TMEA
+ * doesn't consider SCTLR2_EL1.NMEA. That is to say, even if EL1 asked
+ * for non-maskable SErrors, the EL2 bit takes priority if A is set.
+ */
+ if (vcpu_mode_priv(vcpu))
+ return *vcpu_cpsr(vcpu) & PSR_A_BIT;
+
+ /*
+ * Otherwise SErrors are considered unmasked when taken from EL0 and
+ * NMEA is set.
+ */
+ return serror_is_masked(vcpu);
+}
+
+static bool kvm_serror_undeliverable_at_el2(struct kvm_vcpu *vcpu)
+{
+ return !(vcpu_el2_tge_is_set(vcpu) || vcpu_el2_amo_is_set(vcpu));
+}
+
+int kvm_inject_serror_esr(struct kvm_vcpu *vcpu, u64 esr)
{
- kvm_set_sei_esr(vcpu, ESR_ELx_ISV);
+ lockdep_assert_held(&vcpu->mutex);
+
+ if (is_nested_ctxt(vcpu) && kvm_serror_target_is_el2(vcpu))
+ return kvm_inject_nested_serror(vcpu, esr);
+
+ if (vcpu_is_el2(vcpu) && kvm_serror_undeliverable_at_el2(vcpu)) {
+ vcpu_set_vsesr(vcpu, esr);
+ vcpu_set_flag(vcpu, NESTED_SERROR_PENDING);
+ return 1;
+ }
+
+ /*
+ * Emulate the exception entry if SErrors are unmasked. This is useful if
+ * the vCPU is in a nested context w/ vSErrors enabled then we've already
+ * delegated he hardware vSError context (i.e. HCR_EL2.VSE, VSESR_EL2,
+ * VDISR_EL2) to the guest hypervisor.
+ *
+ * As we're emulating the SError injection we need to explicitly populate
+ * ESR_ELx.EC because hardware will not do it on our behalf.
+ */
+ if (!serror_is_masked(vcpu)) {
+ pend_serror_exception(vcpu);
+ esr |= FIELD_PREP(ESR_ELx_EC_MASK, ESR_ELx_EC_SERROR);
+ vcpu_write_sys_reg(vcpu, esr, exception_esr_elx(vcpu));
+ return 1;
+ }
+
+ vcpu_set_vsesr(vcpu, esr & ESR_ELx_ISS_MASK);
+ *vcpu_hcr(vcpu) |= HCR_VSE;
+ return 1;
}
diff --git a/arch/arm64/kvm/mmio.c b/arch/arm64/kvm/mmio.c
index ab365e839874..54f9358c9e0e 100644
--- a/arch/arm64/kvm/mmio.c
+++ b/arch/arm64/kvm/mmio.c
@@ -72,7 +72,7 @@ unsigned long kvm_mmio_read_buf(const void *buf, unsigned int len)
return data;
}
-static bool kvm_pending_sync_exception(struct kvm_vcpu *vcpu)
+static bool kvm_pending_external_abort(struct kvm_vcpu *vcpu)
{
if (!vcpu_get_flag(vcpu, PENDING_EXCEPTION))
return false;
@@ -90,6 +90,8 @@ static bool kvm_pending_sync_exception(struct kvm_vcpu *vcpu)
switch (vcpu_get_flag(vcpu, EXCEPT_MASK)) {
case unpack_vcpu_flag(EXCEPT_AA64_EL1_SYNC):
case unpack_vcpu_flag(EXCEPT_AA64_EL2_SYNC):
+ case unpack_vcpu_flag(EXCEPT_AA64_EL1_SERR):
+ case unpack_vcpu_flag(EXCEPT_AA64_EL2_SERR):
return true;
default:
return false;
@@ -113,7 +115,7 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu)
* Detect if the MMIO return was already handled or if userspace aborted
* the MMIO access.
*/
- if (unlikely(!vcpu->mmio_needed || kvm_pending_sync_exception(vcpu)))
+ if (unlikely(!vcpu->mmio_needed || kvm_pending_external_abort(vcpu)))
return 1;
vcpu->mmio_needed = 0;
@@ -169,10 +171,8 @@ int io_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa)
trace_kvm_mmio_nisv(*vcpu_pc(vcpu), kvm_vcpu_get_esr(vcpu),
kvm_vcpu_get_hfar(vcpu), fault_ipa);
- if (vcpu_is_protected(vcpu)) {
- kvm_inject_dabt(vcpu, kvm_vcpu_get_hfar(vcpu));
- return 1;
- }
+ if (vcpu_is_protected(vcpu))
+ return kvm_inject_sea_dabt(vcpu, kvm_vcpu_get_hfar(vcpu));
if (test_bit(KVM_ARCH_FLAG_RETURN_NISV_IO_ABORT_TO_USER,
&vcpu->kvm->arch.flags)) {
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index 2942ec92c5a4..1c78864767c5 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -193,11 +193,6 @@ int kvm_arch_flush_remote_tlbs_range(struct kvm *kvm,
return 0;
}
-static bool kvm_is_device_pfn(unsigned long pfn)
-{
- return !pfn_is_map_memory(pfn);
-}
-
static void *stage2_memcache_zalloc_page(void *arg)
{
struct kvm_mmu_memory_cache *mc = arg;
@@ -1470,6 +1465,18 @@ static bool kvm_vma_mte_allowed(struct vm_area_struct *vma)
return vma->vm_flags & VM_MTE_ALLOWED;
}
+static bool kvm_vma_is_cacheable(struct vm_area_struct *vma)
+{
+ switch (FIELD_GET(PTE_ATTRINDX_MASK, pgprot_val(vma->vm_page_prot))) {
+ case MT_NORMAL_NC:
+ case MT_DEVICE_nGnRnE:
+ case MT_DEVICE_nGnRE:
+ return false;
+ default:
+ return true;
+ }
+}
+
static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
struct kvm_s2_trans *nested,
struct kvm_memory_slot *memslot, unsigned long hva,
@@ -1477,8 +1484,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
{
int ret = 0;
bool write_fault, writable, force_pte = false;
- bool exec_fault, mte_allowed;
- bool device = false, vfio_allow_any_uc = false;
+ bool exec_fault, mte_allowed, is_vma_cacheable;
+ bool s2_force_noncacheable = false, vfio_allow_any_uc = false;
unsigned long mmu_seq;
phys_addr_t ipa = fault_ipa;
struct kvm *kvm = vcpu->kvm;
@@ -1492,6 +1499,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R;
struct kvm_pgtable *pgt;
struct page *page;
+ vm_flags_t vm_flags;
enum kvm_pgtable_walk_flags flags = KVM_PGTABLE_WALK_HANDLE_FAULT | KVM_PGTABLE_WALK_SHARED;
if (fault_is_perm)
@@ -1619,6 +1627,10 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
vfio_allow_any_uc = vma->vm_flags & VM_ALLOW_ANY_UNCACHED;
+ vm_flags = vma->vm_flags;
+
+ is_vma_cacheable = kvm_vma_is_cacheable(vma);
+
/* Don't use the VMA after the unlock -- it may have vanished */
vma = NULL;
@@ -1642,18 +1654,39 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
if (is_error_noslot_pfn(pfn))
return -EFAULT;
- if (kvm_is_device_pfn(pfn)) {
- /*
- * If the page was identified as device early by looking at
- * the VMA flags, vma_pagesize is already representing the
- * largest quantity we can map. If instead it was mapped
- * via __kvm_faultin_pfn(), vma_pagesize is set to PAGE_SIZE
- * and must not be upgraded.
- *
- * In both cases, we don't let transparent_hugepage_adjust()
- * change things at the last minute.
- */
- device = true;
+ /*
+ * Check if this is non-struct page memory PFN, and cannot support
+ * CMOs. It could potentially be unsafe to access as cachable.
+ */
+ if (vm_flags & (VM_PFNMAP | VM_MIXEDMAP) && !pfn_is_map_memory(pfn)) {
+ if (is_vma_cacheable) {
+ /*
+ * Whilst the VMA owner expects cacheable mapping to this
+ * PFN, hardware also has to support the FWB and CACHE DIC
+ * features.
+ *
+ * ARM64 KVM relies on kernel VA mapping to the PFN to
+ * perform cache maintenance as the CMO instructions work on
+ * virtual addresses. VM_PFNMAP region are not necessarily
+ * mapped to a KVA and hence the presence of hardware features
+ * S2FWB and CACHE DIC are mandatory to avoid the need for
+ * cache maintenance.
+ */
+ if (!kvm_supports_cacheable_pfnmap())
+ return -EFAULT;
+ } else {
+ /*
+ * If the page was identified as device early by looking at
+ * the VMA flags, vma_pagesize is already representing the
+ * largest quantity we can map. If instead it was mapped
+ * via __kvm_faultin_pfn(), vma_pagesize is set to PAGE_SIZE
+ * and must not be upgraded.
+ *
+ * In both cases, we don't let transparent_hugepage_adjust()
+ * change things at the last minute.
+ */
+ s2_force_noncacheable = true;
+ }
} else if (logging_active && !write_fault) {
/*
* Only actually map the page as writable if this was a write
@@ -1662,7 +1695,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
writable = false;
}
- if (exec_fault && device)
+ if (exec_fault && s2_force_noncacheable)
return -ENOEXEC;
/*
@@ -1695,7 +1728,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
* If we are not forced to use page mapping, check if we are
* backed by a THP and thus use block mapping if possible.
*/
- if (vma_pagesize == PAGE_SIZE && !(force_pte || device)) {
+ if (vma_pagesize == PAGE_SIZE && !(force_pte || s2_force_noncacheable)) {
if (fault_is_perm && fault_granule > PAGE_SIZE)
vma_pagesize = fault_granule;
else
@@ -1709,7 +1742,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
}
}
- if (!fault_is_perm && !device && kvm_has_mte(kvm)) {
+ if (!fault_is_perm && !s2_force_noncacheable && kvm_has_mte(kvm)) {
/* Check the VMM hasn't introduced a new disallowed VMA */
if (mte_allowed) {
sanitise_mte_tags(kvm, pfn, vma_pagesize);
@@ -1725,7 +1758,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
if (exec_fault)
prot |= KVM_PGTABLE_PROT_X;
- if (device) {
+ if (s2_force_noncacheable) {
if (vfio_allow_any_uc)
prot |= KVM_PGTABLE_PROT_NORMAL_NC;
else
@@ -1808,7 +1841,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
* There is no need to pass the error into the guest.
*/
if (kvm_handle_guest_sea())
- kvm_inject_vabt(vcpu);
+ return kvm_inject_serror(vcpu);
return 1;
}
@@ -1836,11 +1869,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
if (fault_ipa >= BIT_ULL(VTCR_EL2_IPA(vcpu->arch.hw_mmu->vtcr))) {
fault_ipa |= kvm_vcpu_get_hfar(vcpu) & GENMASK(11, 0);
- if (is_iabt)
- kvm_inject_pabt(vcpu, fault_ipa);
- else
- kvm_inject_dabt(vcpu, fault_ipa);
- return 1;
+ return kvm_inject_sea(vcpu, is_iabt, fault_ipa);
}
}
@@ -1912,8 +1941,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
}
if (kvm_vcpu_abt_iss1tw(vcpu)) {
- kvm_inject_dabt(vcpu, kvm_vcpu_get_hfar(vcpu));
- ret = 1;
+ ret = kvm_inject_sea_dabt(vcpu, kvm_vcpu_get_hfar(vcpu));
goto out_unlock;
}
@@ -1958,10 +1986,8 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
if (ret == 0)
ret = 1;
out:
- if (ret == -ENOEXEC) {
- kvm_inject_pabt(vcpu, kvm_vcpu_get_hfar(vcpu));
- ret = 1;
- }
+ if (ret == -ENOEXEC)
+ ret = kvm_inject_sea_iabt(vcpu, kvm_vcpu_get_hfar(vcpu));
out_unlock:
srcu_read_unlock(&vcpu->kvm->srcu, idx);
return ret;
@@ -2221,6 +2247,15 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
ret = -EINVAL;
break;
}
+
+ /*
+ * Cacheable PFNMAP is allowed only if the hardware
+ * supports it.
+ */
+ if (kvm_vma_is_cacheable(vma) && !kvm_supports_cacheable_pfnmap()) {
+ ret = -EINVAL;
+ break;
+ }
}
hva = min(reg_end, vma->vm_end);
} while (hva < reg_end);
diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c
index dc1d26559bfa..153b3e11b115 100644
--- a/arch/arm64/kvm/nested.c
+++ b/arch/arm64/kvm/nested.c
@@ -1441,12 +1441,11 @@ u64 limit_nv_id_reg(struct kvm *kvm, u32 reg, u64 val)
break;
case SYS_ID_AA64PFR0_EL1:
- /* No RME, AMU, MPAM, S-EL2, or RAS */
+ /* No RME, AMU, MPAM, or S-EL2 */
val &= ~(ID_AA64PFR0_EL1_RME |
ID_AA64PFR0_EL1_AMU |
ID_AA64PFR0_EL1_MPAM |
ID_AA64PFR0_EL1_SEL2 |
- ID_AA64PFR0_EL1_RAS |
ID_AA64PFR0_EL1_EL3 |
ID_AA64PFR0_EL1_EL2 |
ID_AA64PFR0_EL1_EL1 |
@@ -1683,69 +1682,21 @@ int kvm_init_nv_sysregs(struct kvm_vcpu *vcpu)
set_sysreg_masks(kvm, HFGITR2_EL2, res0, res1);
/* TCR2_EL2 */
- res0 = TCR2_EL2_RES0;
- res1 = TCR2_EL2_RES1;
- if (!kvm_has_feat(kvm, ID_AA64MMFR3_EL1, D128, IMP))
- res0 |= (TCR2_EL2_DisCH0 | TCR2_EL2_DisCH1 | TCR2_EL2_D128);
- if (!kvm_has_feat(kvm, ID_AA64MMFR3_EL1, MEC, IMP))
- res0 |= TCR2_EL2_AMEC1 | TCR2_EL2_AMEC0;
- if (!kvm_has_feat(kvm, ID_AA64MMFR1_EL1, HAFDBS, HAFT))
- res0 |= TCR2_EL2_HAFT;
- if (!kvm_has_feat(kvm, ID_AA64PFR1_EL1, THE, IMP))
- res0 |= TCR2_EL2_PTTWI | TCR2_EL2_PnCH;
- if (!kvm_has_feat(kvm, ID_AA64MMFR3_EL1, AIE, IMP))
- res0 |= TCR2_EL2_AIE;
- if (!kvm_has_s1poe(kvm))
- res0 |= TCR2_EL2_POE | TCR2_EL2_E0POE;
- if (!kvm_has_s1pie(kvm))
- res0 |= TCR2_EL2_PIE;
- if (!kvm_has_feat(kvm, ID_AA64MMFR1_EL1, VH, IMP))
- res0 |= (TCR2_EL2_E0POE | TCR2_EL2_D128 |
- TCR2_EL2_AMEC1 | TCR2_EL2_DisCH0 | TCR2_EL2_DisCH1);
+ get_reg_fixed_bits(kvm, TCR2_EL2, &res0, &res1);
set_sysreg_masks(kvm, TCR2_EL2, res0, res1);
/* SCTLR_EL1 */
- res0 = SCTLR_EL1_RES0;
- res1 = SCTLR_EL1_RES1;
- if (!kvm_has_feat(kvm, ID_AA64MMFR1_EL1, PAN, PAN3))
- res0 |= SCTLR_EL1_EPAN;
+ get_reg_fixed_bits(kvm, SCTLR_EL1, &res0, &res1);
set_sysreg_masks(kvm, SCTLR_EL1, res0, res1);
+ /* SCTLR2_ELx */
+ get_reg_fixed_bits(kvm, SCTLR2_EL1, &res0, &res1);
+ set_sysreg_masks(kvm, SCTLR2_EL1, res0, res1);
+ get_reg_fixed_bits(kvm, SCTLR2_EL2, &res0, &res1);
+ set_sysreg_masks(kvm, SCTLR2_EL2, res0, res1);
+
/* MDCR_EL2 */
- res0 = MDCR_EL2_RES0;
- res1 = MDCR_EL2_RES1;
- if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, PMUVer, IMP))
- res0 |= (MDCR_EL2_HPMN | MDCR_EL2_TPMCR |
- MDCR_EL2_TPM | MDCR_EL2_HPME);
- if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, PMSVer, IMP))
- res0 |= MDCR_EL2_E2PB | MDCR_EL2_TPMS;
- if (!kvm_has_feat(kvm, ID_AA64DFR1_EL1, SPMU, IMP))
- res0 |= MDCR_EL2_EnSPM;
- if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, PMUVer, V3P1))
- res0 |= MDCR_EL2_HPMD;
- if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, TraceFilt, IMP))
- res0 |= MDCR_EL2_TTRF;
- if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, PMUVer, V3P5))
- res0 |= MDCR_EL2_HCCD | MDCR_EL2_HLP;
- if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, TraceBuffer, IMP))
- res0 |= MDCR_EL2_E2TB;
- if (!kvm_has_feat(kvm, ID_AA64MMFR0_EL1, FGT, IMP))
- res0 |= MDCR_EL2_TDCC;
- if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, MTPMU, IMP) ||
- kvm_has_feat(kvm, ID_AA64PFR0_EL1, EL3, IMP))
- res0 |= MDCR_EL2_MTPME;
- if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, PMUVer, V3P7))
- res0 |= MDCR_EL2_HPMFZO;
- if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, PMSS, IMP))
- res0 |= MDCR_EL2_PMSSE;
- if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, PMSVer, V1P2))
- res0 |= MDCR_EL2_HPMFZS;
- if (!kvm_has_feat(kvm, ID_AA64DFR1_EL1, EBEP, IMP))
- res0 |= MDCR_EL2_PMEE;
- if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, DebugVer, V8P9))
- res0 |= MDCR_EL2_EBWE;
- if (!kvm_has_feat(kvm, ID_AA64DFR2_EL1, STEP, IMP))
- res0 |= MDCR_EL2_EnSTEPOP;
+ get_reg_fixed_bits(kvm, MDCR_EL2, &res0, &res1);
set_sysreg_masks(kvm, MDCR_EL2, res0, res1);
/* CNTHCTL_EL2 */
@@ -1802,3 +1753,43 @@ void check_nested_vcpu_requests(struct kvm_vcpu *vcpu)
if (kvm_check_request(KVM_REQ_GUEST_HYP_IRQ_PENDING, vcpu))
kvm_inject_nested_irq(vcpu);
}
+
+/*
+ * One of the many architectural bugs in FEAT_NV2 is that the guest hypervisor
+ * can write to HCR_EL2 behind our back, potentially changing the exception
+ * routing / masking for even the host context.
+ *
+ * What follows is some slop to (1) react to exception routing / masking and (2)
+ * preserve the pending SError state across translation regimes.
+ */
+void kvm_nested_flush_hwstate(struct kvm_vcpu *vcpu)
+{
+ if (!vcpu_has_nv(vcpu))
+ return;
+
+ if (unlikely(vcpu_test_and_clear_flag(vcpu, NESTED_SERROR_PENDING)))
+ kvm_inject_serror_esr(vcpu, vcpu_get_vsesr(vcpu));
+}
+
+void kvm_nested_sync_hwstate(struct kvm_vcpu *vcpu)
+{
+ unsigned long *hcr = vcpu_hcr(vcpu);
+
+ if (!vcpu_has_nv(vcpu))
+ return;
+
+ /*
+ * We previously decided that an SError was deliverable to the guest.
+ * Reap the pending state from HCR_EL2 and...
+ */
+ if (unlikely(__test_and_clear_bit(__ffs(HCR_VSE), hcr)))
+ vcpu_set_flag(vcpu, NESTED_SERROR_PENDING);
+
+ /*
+ * Re-attempt SError injection in case the deliverability has changed,
+ * which is necessary to faithfully emulate WFI the case of a pending
+ * SError being a wakeup condition.
+ */
+ if (unlikely(vcpu_test_and_clear_flag(vcpu, NESTED_SERROR_PENDING)))
+ kvm_inject_serror_esr(vcpu, vcpu_get_vsesr(vcpu));
+}
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 7b8a0a6f2646..82ffb3b3b3cf 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -108,7 +108,6 @@ static bool get_el2_to_el1_mapping(unsigned int reg,
PURE_EL2_SYSREG( HACR_EL2 );
PURE_EL2_SYSREG( VTTBR_EL2 );
PURE_EL2_SYSREG( VTCR_EL2 );
- PURE_EL2_SYSREG( RVBAR_EL2 );
PURE_EL2_SYSREG( TPIDR_EL2 );
PURE_EL2_SYSREG( HPFAR_EL2 );
PURE_EL2_SYSREG( HCRX_EL2 );
@@ -144,6 +143,7 @@ static bool get_el2_to_el1_mapping(unsigned int reg,
MAPPED_EL2_SYSREG(SPSR_EL2, SPSR_EL1, NULL );
MAPPED_EL2_SYSREG(ZCR_EL2, ZCR_EL1, NULL );
MAPPED_EL2_SYSREG(CONTEXTIDR_EL2, CONTEXTIDR_EL1, NULL );
+ MAPPED_EL2_SYSREG(SCTLR2_EL2, SCTLR2_EL1, NULL );
default:
return false;
}
@@ -533,8 +533,7 @@ static bool access_gic_sre(struct kvm_vcpu *vcpu,
return ignore_write(vcpu, p);
if (p->Op1 == 4) { /* ICC_SRE_EL2 */
- p->regval = (ICC_SRE_EL2_ENABLE | ICC_SRE_EL2_SRE |
- ICC_SRE_EL1_DIB | ICC_SRE_EL1_DFB);
+ p->regval = KVM_ICC_SRE_EL2;
} else { /* ICC_SRE_EL1 */
p->regval = vcpu->arch.vgic_cpu.vgic_v3.vgic_sre;
}
@@ -773,6 +772,12 @@ static u64 reset_mpidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
return mpidr;
}
+static unsigned int hidden_visibility(const struct kvm_vcpu *vcpu,
+ const struct sys_reg_desc *r)
+{
+ return REG_HIDDEN;
+}
+
static unsigned int pmu_visibility(const struct kvm_vcpu *vcpu,
const struct sys_reg_desc *r)
{
@@ -1612,7 +1617,6 @@ static u64 __kvm_read_sanitised_id_reg(const struct kvm_vcpu *vcpu,
val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_GCS);
val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_THE);
val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MTEX);
- val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_DF2);
val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_PFAR);
val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MPAM_frac);
break;
@@ -1645,8 +1649,10 @@ static u64 __kvm_read_sanitised_id_reg(const struct kvm_vcpu *vcpu,
val &= ~ID_AA64MMFR2_EL1_NV;
break;
case SYS_ID_AA64MMFR3_EL1:
- val &= ID_AA64MMFR3_EL1_TCRX | ID_AA64MMFR3_EL1_S1POE |
- ID_AA64MMFR3_EL1_S1PIE;
+ val &= ID_AA64MMFR3_EL1_TCRX |
+ ID_AA64MMFR3_EL1_SCTLRX |
+ ID_AA64MMFR3_EL1_S1POE |
+ ID_AA64MMFR3_EL1_S1PIE;
break;
case SYS_ID_MMFR4_EL1:
val &= ~ARM64_FEATURE_MASK(ID_MMFR4_EL1_CCIDX);
@@ -1813,7 +1819,7 @@ static u64 sanitise_id_aa64pfr0_el1(const struct kvm_vcpu *vcpu, u64 val)
val |= SYS_FIELD_PREP_ENUM(ID_AA64PFR0_EL1, CSV3, IMP);
}
- if (kvm_vgic_global_state.type == VGIC_V3) {
+ if (vgic_is_v3(vcpu->kvm)) {
val &= ~ID_AA64PFR0_EL1_GIC_MASK;
val |= SYS_FIELD_PREP_ENUM(ID_AA64PFR0_EL1, GIC, IMP);
}
@@ -1955,6 +1961,14 @@ static int set_id_aa64pfr0_el1(struct kvm_vcpu *vcpu,
(vcpu_has_nv(vcpu) && !FIELD_GET(ID_AA64PFR0_EL1_EL2, user_val)))
return -EINVAL;
+ /*
+ * If we are running on a GICv5 host and support FEAT_GCIE_LEGACY, then
+ * we support GICv3. Fail attempts to do anything but set that to IMP.
+ */
+ if (vgic_is_v3_compat(vcpu->kvm) &&
+ FIELD_GET(ID_AA64PFR0_EL1_GIC_MASK, user_val) != ID_AA64PFR0_EL1_GIC_IMP)
+ return -EINVAL;
+
return set_id_reg(vcpu, rd, user_val);
}
@@ -2327,6 +2341,10 @@ static bool bad_redir_trap(struct kvm_vcpu *vcpu,
EL2_REG_FILTERED(name, acc, rst, v, el2_visibility)
#define EL2_REG_VNCR(name, rst, v) EL2_REG(name, bad_vncr_trap, rst, v)
+#define EL2_REG_VNCR_FILT(name, vis) \
+ EL2_REG_FILTERED(name, bad_vncr_trap, reset_val, 0, vis)
+#define EL2_REG_VNCR_GICv3(name) \
+ EL2_REG_VNCR_FILT(name, hidden_visibility)
#define EL2_REG_REDIR(name, rst, v) EL2_REG(name, bad_redir_trap, rst, v)
/*
@@ -2485,6 +2503,21 @@ static unsigned int vncr_el2_visibility(const struct kvm_vcpu *vcpu,
return REG_HIDDEN;
}
+static unsigned int sctlr2_visibility(const struct kvm_vcpu *vcpu,
+ const struct sys_reg_desc *rd)
+{
+ if (kvm_has_sctlr2(vcpu->kvm))
+ return 0;
+
+ return REG_HIDDEN;
+}
+
+static unsigned int sctlr2_el2_visibility(const struct kvm_vcpu *vcpu,
+ const struct sys_reg_desc *rd)
+{
+ return __el2_visibility(vcpu, rd, sctlr2_visibility);
+}
+
static bool access_zcr_el2(struct kvm_vcpu *vcpu,
struct sys_reg_params *p,
const struct sys_reg_desc *r)
@@ -2515,11 +2548,7 @@ static bool access_gic_vtr(struct kvm_vcpu *vcpu,
if (p->is_write)
return write_to_read_only(vcpu, p, r);
- p->regval = kvm_vgic_global_state.ich_vtr_el2;
- p->regval &= ~(ICH_VTR_EL2_DVIM |
- ICH_VTR_EL2_A3V |
- ICH_VTR_EL2_IDbits);
- p->regval |= ICH_VTR_EL2_nV4;
+ p->regval = kvm_get_guest_vtr_el2();
return true;
}
@@ -2590,6 +2619,26 @@ static unsigned int tcr2_el2_visibility(const struct kvm_vcpu *vcpu,
return __el2_visibility(vcpu, rd, tcr2_visibility);
}
+static unsigned int fgt2_visibility(const struct kvm_vcpu *vcpu,
+ const struct sys_reg_desc *rd)
+{
+ if (el2_visibility(vcpu, rd) == 0 &&
+ kvm_has_feat(vcpu->kvm, ID_AA64MMFR0_EL1, FGT, FGT2))
+ return 0;
+
+ return REG_HIDDEN;
+}
+
+static unsigned int fgt_visibility(const struct kvm_vcpu *vcpu,
+ const struct sys_reg_desc *rd)
+{
+ if (el2_visibility(vcpu, rd) == 0 &&
+ kvm_has_feat(vcpu->kvm, ID_AA64MMFR0_EL1, FGT, IMP))
+ return 0;
+
+ return REG_HIDDEN;
+}
+
static unsigned int s1pie_visibility(const struct kvm_vcpu *vcpu,
const struct sys_reg_desc *rd)
{
@@ -2641,6 +2690,23 @@ static bool access_mdcr(struct kvm_vcpu *vcpu,
return true;
}
+static bool access_ras(struct kvm_vcpu *vcpu,
+ struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ struct kvm *kvm = vcpu->kvm;
+
+ switch(reg_to_encoding(r)) {
+ default:
+ if (!kvm_has_feat(kvm, ID_AA64PFR0_EL1, RAS, IMP)) {
+ kvm_inject_undefined(vcpu);
+ return false;
+ }
+ }
+
+ return trap_raz_wi(vcpu, p, r);
+}
+
/*
* For historical (ahem ABI) reasons, KVM treated MIDR_EL1, REVIDR_EL1, and
* AIDR_EL1 as "invariant" registers, meaning userspace cannot change them.
@@ -2868,7 +2934,6 @@ static const struct sys_reg_desc sys_reg_descs[] = {
ID_AA64PFR0_EL1_FP)),
ID_FILTERED(ID_AA64PFR1_EL1, id_aa64pfr1_el1,
~(ID_AA64PFR1_EL1_PFAR |
- ID_AA64PFR1_EL1_DF2 |
ID_AA64PFR1_EL1_MTEX |
ID_AA64PFR1_EL1_THE |
ID_AA64PFR1_EL1_GCS |
@@ -2950,6 +3015,7 @@ static const struct sys_reg_desc sys_reg_descs[] = {
ID_AA64MMFR2_EL1_NV |
ID_AA64MMFR2_EL1_CCIDX)),
ID_WRITABLE(ID_AA64MMFR3_EL1, (ID_AA64MMFR3_EL1_TCRX |
+ ID_AA64MMFR3_EL1_SCTLRX |
ID_AA64MMFR3_EL1_S1PIE |
ID_AA64MMFR3_EL1_S1POE)),
ID_WRITABLE(ID_AA64MMFR4_EL1, ID_AA64MMFR4_EL1_NV_frac),
@@ -2960,6 +3026,8 @@ static const struct sys_reg_desc sys_reg_descs[] = {
{ SYS_DESC(SYS_SCTLR_EL1), access_vm_reg, reset_val, SCTLR_EL1, 0x00C50078 },
{ SYS_DESC(SYS_ACTLR_EL1), access_actlr, reset_actlr, ACTLR_EL1 },
{ SYS_DESC(SYS_CPACR_EL1), NULL, reset_val, CPACR_EL1, 0 },
+ { SYS_DESC(SYS_SCTLR2_EL1), access_vm_reg, reset_val, SCTLR2_EL1, 0,
+ .visibility = sctlr2_visibility },
MTE_REG(RGSR_EL1),
MTE_REG(GCR_EL1),
@@ -2989,14 +3057,14 @@ static const struct sys_reg_desc sys_reg_descs[] = {
{ SYS_DESC(SYS_AFSR1_EL1), access_vm_reg, reset_unknown, AFSR1_EL1 },
{ SYS_DESC(SYS_ESR_EL1), access_vm_reg, reset_unknown, ESR_EL1 },
- { SYS_DESC(SYS_ERRIDR_EL1), trap_raz_wi },
- { SYS_DESC(SYS_ERRSELR_EL1), trap_raz_wi },
- { SYS_DESC(SYS_ERXFR_EL1), trap_raz_wi },
- { SYS_DESC(SYS_ERXCTLR_EL1), trap_raz_wi },
- { SYS_DESC(SYS_ERXSTATUS_EL1), trap_raz_wi },
- { SYS_DESC(SYS_ERXADDR_EL1), trap_raz_wi },
- { SYS_DESC(SYS_ERXMISC0_EL1), trap_raz_wi },
- { SYS_DESC(SYS_ERXMISC1_EL1), trap_raz_wi },
+ { SYS_DESC(SYS_ERRIDR_EL1), access_ras },
+ { SYS_DESC(SYS_ERRSELR_EL1), access_ras },
+ { SYS_DESC(SYS_ERXFR_EL1), access_ras },
+ { SYS_DESC(SYS_ERXCTLR_EL1), access_ras },
+ { SYS_DESC(SYS_ERXSTATUS_EL1), access_ras },
+ { SYS_DESC(SYS_ERXADDR_EL1), access_ras },
+ { SYS_DESC(SYS_ERXMISC0_EL1), access_ras },
+ { SYS_DESC(SYS_ERXMISC1_EL1), access_ras },
MTE_REG(TFSR_EL1),
MTE_REG(TFSRE0_EL1),
@@ -3307,12 +3375,14 @@ static const struct sys_reg_desc sys_reg_descs[] = {
EL2_REG_VNCR(VMPIDR_EL2, reset_unknown, 0),
EL2_REG(SCTLR_EL2, access_rw, reset_val, SCTLR_EL2_RES1),
EL2_REG(ACTLR_EL2, access_rw, reset_val, 0),
+ EL2_REG_FILTERED(SCTLR2_EL2, access_vm_reg, reset_val, 0,
+ sctlr2_el2_visibility),
EL2_REG_VNCR(HCR_EL2, reset_hcr, 0),
EL2_REG(MDCR_EL2, access_mdcr, reset_mdcr, 0),
EL2_REG(CPTR_EL2, access_rw, reset_val, CPTR_NVHE_EL2_RES1),
EL2_REG_VNCR(HSTR_EL2, reset_val, 0),
- EL2_REG_VNCR(HFGRTR_EL2, reset_val, 0),
- EL2_REG_VNCR(HFGWTR_EL2, reset_val, 0),
+ EL2_REG_VNCR_FILT(HFGRTR_EL2, fgt_visibility),
+ EL2_REG_VNCR_FILT(HFGWTR_EL2, fgt_visibility),
EL2_REG_VNCR(HFGITR_EL2, reset_val, 0),
EL2_REG_VNCR(HACR_EL2, reset_val, 0),
@@ -3332,9 +3402,14 @@ static const struct sys_reg_desc sys_reg_descs[] = {
vncr_el2_visibility),
{ SYS_DESC(SYS_DACR32_EL2), undef_access, reset_unknown, DACR32_EL2 },
- EL2_REG_VNCR(HDFGRTR_EL2, reset_val, 0),
- EL2_REG_VNCR(HDFGWTR_EL2, reset_val, 0),
- EL2_REG_VNCR(HAFGRTR_EL2, reset_val, 0),
+ EL2_REG_VNCR_FILT(HDFGRTR2_EL2, fgt2_visibility),
+ EL2_REG_VNCR_FILT(HDFGWTR2_EL2, fgt2_visibility),
+ EL2_REG_VNCR_FILT(HFGRTR2_EL2, fgt2_visibility),
+ EL2_REG_VNCR_FILT(HFGWTR2_EL2, fgt2_visibility),
+ EL2_REG_VNCR_FILT(HDFGRTR_EL2, fgt_visibility),
+ EL2_REG_VNCR_FILT(HDFGWTR_EL2, fgt_visibility),
+ EL2_REG_VNCR_FILT(HAFGRTR_EL2, fgt_visibility),
+ EL2_REG_VNCR_FILT(HFGITR2_EL2, fgt2_visibility),
EL2_REG_REDIR(SPSR_EL2, reset_val, 0),
EL2_REG_REDIR(ELR_EL2, reset_val, 0),
{ SYS_DESC(SYS_SP_EL1), access_sp_el1},
@@ -3349,6 +3424,7 @@ static const struct sys_reg_desc sys_reg_descs[] = {
EL2_REG(AFSR0_EL2, access_rw, reset_val, 0),
EL2_REG(AFSR1_EL2, access_rw, reset_val, 0),
EL2_REG_REDIR(ESR_EL2, reset_val, 0),
+ EL2_REG_VNCR(VSESR_EL2, reset_unknown, 0),
{ SYS_DESC(SYS_FPEXC32_EL2), undef_access, reset_val, FPEXC32_EL2, 0x700 },
EL2_REG_REDIR(FAR_EL2, reset_val, 0),
@@ -3375,43 +3451,44 @@ static const struct sys_reg_desc sys_reg_descs[] = {
{ SYS_DESC(SYS_MPAMVPM7_EL2), undef_access },
EL2_REG(VBAR_EL2, access_rw, reset_val, 0),
- EL2_REG(RVBAR_EL2, access_rw, reset_val, 0),
+ { SYS_DESC(SYS_RVBAR_EL2), undef_access },
{ SYS_DESC(SYS_RMR_EL2), undef_access },
+ EL2_REG_VNCR(VDISR_EL2, reset_unknown, 0),
- EL2_REG_VNCR(ICH_AP0R0_EL2, reset_val, 0),
- EL2_REG_VNCR(ICH_AP0R1_EL2, reset_val, 0),
- EL2_REG_VNCR(ICH_AP0R2_EL2, reset_val, 0),
- EL2_REG_VNCR(ICH_AP0R3_EL2, reset_val, 0),
- EL2_REG_VNCR(ICH_AP1R0_EL2, reset_val, 0),
- EL2_REG_VNCR(ICH_AP1R1_EL2, reset_val, 0),
- EL2_REG_VNCR(ICH_AP1R2_EL2, reset_val, 0),
- EL2_REG_VNCR(ICH_AP1R3_EL2, reset_val, 0),
+ EL2_REG_VNCR_GICv3(ICH_AP0R0_EL2),
+ EL2_REG_VNCR_GICv3(ICH_AP0R1_EL2),
+ EL2_REG_VNCR_GICv3(ICH_AP0R2_EL2),
+ EL2_REG_VNCR_GICv3(ICH_AP0R3_EL2),
+ EL2_REG_VNCR_GICv3(ICH_AP1R0_EL2),
+ EL2_REG_VNCR_GICv3(ICH_AP1R1_EL2),
+ EL2_REG_VNCR_GICv3(ICH_AP1R2_EL2),
+ EL2_REG_VNCR_GICv3(ICH_AP1R3_EL2),
{ SYS_DESC(SYS_ICC_SRE_EL2), access_gic_sre },
- EL2_REG_VNCR(ICH_HCR_EL2, reset_val, 0),
+ EL2_REG_VNCR_GICv3(ICH_HCR_EL2),
{ SYS_DESC(SYS_ICH_VTR_EL2), access_gic_vtr },
{ SYS_DESC(SYS_ICH_MISR_EL2), access_gic_misr },
{ SYS_DESC(SYS_ICH_EISR_EL2), access_gic_eisr },
{ SYS_DESC(SYS_ICH_ELRSR_EL2), access_gic_elrsr },
- EL2_REG_VNCR(ICH_VMCR_EL2, reset_val, 0),
-
- EL2_REG_VNCR(ICH_LR0_EL2, reset_val, 0),
- EL2_REG_VNCR(ICH_LR1_EL2, reset_val, 0),
- EL2_REG_VNCR(ICH_LR2_EL2, reset_val, 0),
- EL2_REG_VNCR(ICH_LR3_EL2, reset_val, 0),
- EL2_REG_VNCR(ICH_LR4_EL2, reset_val, 0),
- EL2_REG_VNCR(ICH_LR5_EL2, reset_val, 0),
- EL2_REG_VNCR(ICH_LR6_EL2, reset_val, 0),
- EL2_REG_VNCR(ICH_LR7_EL2, reset_val, 0),
- EL2_REG_VNCR(ICH_LR8_EL2, reset_val, 0),
- EL2_REG_VNCR(ICH_LR9_EL2, reset_val, 0),
- EL2_REG_VNCR(ICH_LR10_EL2, reset_val, 0),
- EL2_REG_VNCR(ICH_LR11_EL2, reset_val, 0),
- EL2_REG_VNCR(ICH_LR12_EL2, reset_val, 0),
- EL2_REG_VNCR(ICH_LR13_EL2, reset_val, 0),
- EL2_REG_VNCR(ICH_LR14_EL2, reset_val, 0),
- EL2_REG_VNCR(ICH_LR15_EL2, reset_val, 0),
+ EL2_REG_VNCR_GICv3(ICH_VMCR_EL2),
+
+ EL2_REG_VNCR_GICv3(ICH_LR0_EL2),
+ EL2_REG_VNCR_GICv3(ICH_LR1_EL2),
+ EL2_REG_VNCR_GICv3(ICH_LR2_EL2),
+ EL2_REG_VNCR_GICv3(ICH_LR3_EL2),
+ EL2_REG_VNCR_GICv3(ICH_LR4_EL2),
+ EL2_REG_VNCR_GICv3(ICH_LR5_EL2),
+ EL2_REG_VNCR_GICv3(ICH_LR6_EL2),
+ EL2_REG_VNCR_GICv3(ICH_LR7_EL2),
+ EL2_REG_VNCR_GICv3(ICH_LR8_EL2),
+ EL2_REG_VNCR_GICv3(ICH_LR9_EL2),
+ EL2_REG_VNCR_GICv3(ICH_LR10_EL2),
+ EL2_REG_VNCR_GICv3(ICH_LR11_EL2),
+ EL2_REG_VNCR_GICv3(ICH_LR12_EL2),
+ EL2_REG_VNCR_GICv3(ICH_LR13_EL2),
+ EL2_REG_VNCR_GICv3(ICH_LR14_EL2),
+ EL2_REG_VNCR_GICv3(ICH_LR15_EL2),
EL2_REG(CONTEXTIDR_EL2, access_rw, reset_val, 0),
EL2_REG(TPIDR_EL2, access_rw, reset_val, 0),
@@ -4280,12 +4357,12 @@ static const struct sys_reg_desc cp15_64_regs[] = {
};
static bool check_sysreg_table(const struct sys_reg_desc *table, unsigned int n,
- bool is_32)
+ bool reset_check)
{
unsigned int i;
for (i = 0; i < n; i++) {
- if (!is_32 && table[i].reg && !table[i].reset) {
+ if (reset_check && table[i].reg && !table[i].reset) {
kvm_err("sys_reg table %pS entry %d (%s) lacks reset\n",
&table[i], i, table[i].name);
return false;
@@ -4480,7 +4557,7 @@ static bool kvm_esr_cp10_id_to_sys64(u64 esr, struct sys_reg_params *params)
return true;
kvm_pr_unimpl("Unhandled cp10 register %s: %u\n",
- params->is_write ? "write" : "read", reg_id);
+ str_write_read(params->is_write), reg_id);
return false;
}
@@ -5274,18 +5351,22 @@ int kvm_finalize_sys_regs(struct kvm_vcpu *vcpu)
int __init kvm_sys_reg_table_init(void)
{
+ const struct sys_reg_desc *gicv3_regs;
bool valid = true;
- unsigned int i;
+ unsigned int i, sz;
int ret = 0;
/* Make sure tables are unique and in order. */
- valid &= check_sysreg_table(sys_reg_descs, ARRAY_SIZE(sys_reg_descs), false);
- valid &= check_sysreg_table(cp14_regs, ARRAY_SIZE(cp14_regs), true);
- valid &= check_sysreg_table(cp14_64_regs, ARRAY_SIZE(cp14_64_regs), true);
- valid &= check_sysreg_table(cp15_regs, ARRAY_SIZE(cp15_regs), true);
- valid &= check_sysreg_table(cp15_64_regs, ARRAY_SIZE(cp15_64_regs), true);
+ valid &= check_sysreg_table(sys_reg_descs, ARRAY_SIZE(sys_reg_descs), true);
+ valid &= check_sysreg_table(cp14_regs, ARRAY_SIZE(cp14_regs), false);
+ valid &= check_sysreg_table(cp14_64_regs, ARRAY_SIZE(cp14_64_regs), false);
+ valid &= check_sysreg_table(cp15_regs, ARRAY_SIZE(cp15_regs), false);
+ valid &= check_sysreg_table(cp15_64_regs, ARRAY_SIZE(cp15_64_regs), false);
valid &= check_sysreg_table(sys_insn_descs, ARRAY_SIZE(sys_insn_descs), false);
+ gicv3_regs = vgic_v3_get_sysreg_table(&sz);
+ valid &= check_sysreg_table(gicv3_regs, sz, false);
+
if (!valid)
return -EINVAL;
diff --git a/arch/arm64/kvm/sys_regs.h b/arch/arm64/kvm/sys_regs.h
index ef97d9fc67cc..317abc490368 100644
--- a/arch/arm64/kvm/sys_regs.h
+++ b/arch/arm64/kvm/sys_regs.h
@@ -108,7 +108,7 @@ inline void print_sys_reg_msg(const struct sys_reg_params *p,
/* Look, we even formatted it for you to paste into the table! */
kvm_pr_unimpl("%pV { Op0(%2u), Op1(%2u), CRn(%2u), CRm(%2u), Op2(%2u), func_%s },\n",
&(struct va_format){ fmt, &va },
- p->Op0, p->Op1, p->CRn, p->CRm, p->Op2, p->is_write ? "write" : "read");
+ p->Op0, p->Op1, p->CRn, p->CRm, p->Op2, str_write_read(p->is_write));
va_end(va);
}
diff --git a/arch/arm64/kvm/trace_handle_exit.h b/arch/arm64/kvm/trace_handle_exit.h
index f85415db7713..a7ab9a3bbed0 100644
--- a/arch/arm64/kvm/trace_handle_exit.h
+++ b/arch/arm64/kvm/trace_handle_exit.h
@@ -113,7 +113,7 @@ TRACE_EVENT(kvm_sys_access,
__entry->vcpu_pc, __entry->name ?: "UNKN",
__entry->Op0, __entry->Op1, __entry->CRn,
__entry->CRm, __entry->Op2,
- __entry->is_write ? "write" : "read")
+ str_write_read(__entry->is_write))
);
TRACE_EVENT(kvm_set_guest_debug,
diff --git a/arch/arm64/kvm/vgic-sys-reg-v3.c b/arch/arm64/kvm/vgic-sys-reg-v3.c
index 5eacb4b3250a..bdc2d57370b2 100644
--- a/arch/arm64/kvm/vgic-sys-reg-v3.c
+++ b/arch/arm64/kvm/vgic-sys-reg-v3.c
@@ -297,6 +297,91 @@ static int get_gic_sre(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
return 0;
}
+static int set_gic_ich_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
+ u64 val)
+{
+ __vcpu_assign_sys_reg(vcpu, r->reg, val);
+ return 0;
+}
+
+static int get_gic_ich_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
+ u64 *val)
+{
+ *val = __vcpu_sys_reg(vcpu, r->reg);
+ return 0;
+}
+
+static int set_gic_ich_apr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
+ u64 val)
+{
+ u8 idx = r->Op2 & 3;
+
+ if (idx > vgic_v3_max_apr_idx(vcpu))
+ return -EINVAL;
+
+ return set_gic_ich_reg(vcpu, r, val);
+}
+
+static int get_gic_ich_apr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
+ u64 *val)
+{
+ u8 idx = r->Op2 & 3;
+
+ if (idx > vgic_v3_max_apr_idx(vcpu))
+ return -EINVAL;
+
+ return get_gic_ich_reg(vcpu, r, val);
+}
+
+static int set_gic_icc_sre(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
+ u64 val)
+{
+ if (val != KVM_ICC_SRE_EL2)
+ return -EINVAL;
+ return 0;
+}
+
+static int get_gic_icc_sre(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
+ u64 *val)
+{
+ *val = KVM_ICC_SRE_EL2;
+ return 0;
+}
+
+static int set_gic_ich_vtr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
+ u64 val)
+{
+ if (val != kvm_get_guest_vtr_el2())
+ return -EINVAL;
+ return 0;
+}
+
+static int get_gic_ich_vtr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
+ u64 *val)
+{
+ *val = kvm_get_guest_vtr_el2();
+ return 0;
+}
+
+static unsigned int el2_visibility(const struct kvm_vcpu *vcpu,
+ const struct sys_reg_desc *rd)
+{
+ return vcpu_has_nv(vcpu) ? 0 : REG_HIDDEN;
+}
+
+#define __EL2_REG(r, acc, i) \
+ { \
+ SYS_DESC(SYS_ ## r), \
+ .get_user = get_gic_ ## acc, \
+ .set_user = set_gic_ ## acc, \
+ .reg = i, \
+ .visibility = el2_visibility, \
+ }
+
+#define EL2_REG(r, acc) __EL2_REG(r, acc, r)
+
+#define EL2_REG_RO(r, acc) __EL2_REG(r, acc, 0)
+
static const struct sys_reg_desc gic_v3_icc_reg_descs[] = {
{ SYS_DESC(SYS_ICC_PMR_EL1),
.set_user = set_gic_pmr, .get_user = get_gic_pmr, },
@@ -328,8 +413,42 @@ static const struct sys_reg_desc gic_v3_icc_reg_descs[] = {
.set_user = set_gic_grpen0, .get_user = get_gic_grpen0, },
{ SYS_DESC(SYS_ICC_IGRPEN1_EL1),
.set_user = set_gic_grpen1, .get_user = get_gic_grpen1, },
+ EL2_REG(ICH_AP0R0_EL2, ich_apr),
+ EL2_REG(ICH_AP0R1_EL2, ich_apr),
+ EL2_REG(ICH_AP0R2_EL2, ich_apr),
+ EL2_REG(ICH_AP0R3_EL2, ich_apr),
+ EL2_REG(ICH_AP1R0_EL2, ich_apr),
+ EL2_REG(ICH_AP1R1_EL2, ich_apr),
+ EL2_REG(ICH_AP1R2_EL2, ich_apr),
+ EL2_REG(ICH_AP1R3_EL2, ich_apr),
+ EL2_REG_RO(ICC_SRE_EL2, icc_sre),
+ EL2_REG(ICH_HCR_EL2, ich_reg),
+ EL2_REG_RO(ICH_VTR_EL2, ich_vtr),
+ EL2_REG(ICH_VMCR_EL2, ich_reg),
+ EL2_REG(ICH_LR0_EL2, ich_reg),
+ EL2_REG(ICH_LR1_EL2, ich_reg),
+ EL2_REG(ICH_LR2_EL2, ich_reg),
+ EL2_REG(ICH_LR3_EL2, ich_reg),
+ EL2_REG(ICH_LR4_EL2, ich_reg),
+ EL2_REG(ICH_LR5_EL2, ich_reg),
+ EL2_REG(ICH_LR6_EL2, ich_reg),
+ EL2_REG(ICH_LR7_EL2, ich_reg),
+ EL2_REG(ICH_LR8_EL2, ich_reg),
+ EL2_REG(ICH_LR9_EL2, ich_reg),
+ EL2_REG(ICH_LR10_EL2, ich_reg),
+ EL2_REG(ICH_LR11_EL2, ich_reg),
+ EL2_REG(ICH_LR12_EL2, ich_reg),
+ EL2_REG(ICH_LR13_EL2, ich_reg),
+ EL2_REG(ICH_LR14_EL2, ich_reg),
+ EL2_REG(ICH_LR15_EL2, ich_reg),
};
+const struct sys_reg_desc *vgic_v3_get_sysreg_table(unsigned int *sz)
+{
+ *sz = ARRAY_SIZE(gic_v3_icc_reg_descs);
+ return gic_v3_icc_reg_descs;
+}
+
static u64 attr_to_id(u64 attr)
{
return ARM64_SYS_REG(FIELD_GET(KVM_REG_ARM_VGIC_SYSREG_OP0_MASK, attr),
@@ -341,8 +460,12 @@ static u64 attr_to_id(u64 attr)
int vgic_v3_has_cpu_sysregs_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
{
- if (get_reg_by_id(attr_to_id(attr->attr), gic_v3_icc_reg_descs,
- ARRAY_SIZE(gic_v3_icc_reg_descs)))
+ const struct sys_reg_desc *r;
+
+ r = get_reg_by_id(attr_to_id(attr->attr), gic_v3_icc_reg_descs,
+ ARRAY_SIZE(gic_v3_icc_reg_descs));
+
+ if (r && !sysreg_hidden(vcpu, r))
return 0;
return -ENXIO;
diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c
index eb1205654ac8..1e680ad6e863 100644
--- a/arch/arm64/kvm/vgic/vgic-init.c
+++ b/arch/arm64/kvm/vgic/vgic-init.c
@@ -157,6 +157,7 @@ int kvm_vgic_create(struct kvm *kvm, u32 type)
kvm->arch.vgic.in_kernel = true;
kvm->arch.vgic.vgic_model = type;
+ kvm->arch.vgic.implementation_rev = KVM_VGIC_IMP_REV_LATEST;
kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF;
@@ -165,6 +166,9 @@ int kvm_vgic_create(struct kvm *kvm, u32 type)
else
INIT_LIST_HEAD(&kvm->arch.vgic.rd_regions);
+ if (type == KVM_DEV_TYPE_ARM_VGIC_V3)
+ kvm->arch.vgic.nassgicap = system_supports_direct_sgis();
+
out_unlock:
mutex_unlock(&kvm->arch.config_lock);
kvm_unlock_all_vcpus(kvm);
@@ -391,11 +395,10 @@ int vgic_init(struct kvm *kvm)
goto out;
/*
- * If we have GICv4.1 enabled, unconditionally request enable the
- * v4 support so that we get HW-accelerated vSGIs. Otherwise, only
- * enable it if we present a virtual ITS to the guest.
+ * Ensure vPEs are allocated if direct IRQ injection (e.g. vSGIs,
+ * vLPIs) is supported.
*/
- if (vgic_supports_direct_msis(kvm)) {
+ if (vgic_supports_direct_irqs(kvm)) {
ret = vgic_v4_init(kvm);
if (ret)
goto out;
@@ -409,15 +412,7 @@ int vgic_init(struct kvm *kvm)
goto out;
vgic_debug_init(kvm);
-
- /*
- * If userspace didn't set the GIC implementation revision,
- * default to the latest and greatest. You know want it.
- */
- if (!dist->implementation_rev)
- dist->implementation_rev = KVM_VGIC_IMP_REV_LATEST;
dist->initialized = true;
-
out:
return ret;
}
@@ -443,7 +438,7 @@ static void kvm_vgic_dist_destroy(struct kvm *kvm)
dist->vgic_cpu_base = VGIC_ADDR_UNDEF;
}
- if (vgic_supports_direct_msis(kvm))
+ if (vgic_supports_direct_irqs(kvm))
vgic_v4_teardown(kvm);
xa_destroy(&dist->lpi_xa);
@@ -674,10 +669,12 @@ void kvm_vgic_init_cpu_hardware(void)
* We want to make sure the list registers start out clear so that we
* only have the program the used registers.
*/
- if (kvm_vgic_global_state.type == VGIC_V2)
+ if (kvm_vgic_global_state.type == VGIC_V2) {
vgic_v2_init_lrs();
- else
+ } else if (kvm_vgic_global_state.type == VGIC_V3 ||
+ kvm_vgic_global_state.has_gcie_v3_compat) {
kvm_call_hyp(__vgic_v3_init_lrs);
+ }
}
/**
@@ -722,6 +719,9 @@ int kvm_vgic_hyp_init(void)
kvm_info("GIC system register CPU interface enabled\n");
}
break;
+ case GIC_V5:
+ ret = vgic_v5_probe(gic_kvm_info);
+ break;
default:
ret = -ENODEV;
}
diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c
index 534049c7c94b..7368c13f16b7 100644
--- a/arch/arm64/kvm/vgic/vgic-its.c
+++ b/arch/arm64/kvm/vgic/vgic-its.c
@@ -758,7 +758,7 @@ static void its_free_ite(struct kvm *kvm, struct its_ite *ite)
if (irq) {
scoped_guard(raw_spinlock_irqsave, &irq->irq_lock) {
if (irq->hw)
- WARN_ON(its_unmap_vlpi(ite->irq->host_irq));
+ its_unmap_vlpi(ite->irq->host_irq);
irq->hw = false;
}
@@ -2694,6 +2694,9 @@ static int vgic_its_ctrl(struct kvm *kvm, struct vgic_its *its, u64 attr)
case KVM_DEV_ARM_ITS_RESTORE_TABLES:
ret = abi->restore_tables(its);
break;
+ default:
+ ret = -ENXIO;
+ break;
}
mutex_unlock(&its->its_lock);
diff --git a/arch/arm64/kvm/vgic/vgic-kvm-device.c b/arch/arm64/kvm/vgic/vgic-kvm-device.c
index f9ae790163fb..3d1a776b716d 100644
--- a/arch/arm64/kvm/vgic/vgic-kvm-device.c
+++ b/arch/arm64/kvm/vgic/vgic-kvm-device.c
@@ -5,6 +5,7 @@
* Copyright (C) 2015 ARM Ltd.
* Author: Marc Zyngier <marc.zyngier@arm.com>
*/
+#include <linux/irqchip/arm-gic-v3.h>
#include <linux/kvm_host.h>
#include <kvm/arm_vgic.h>
#include <linux/uaccess.h>
@@ -303,12 +304,6 @@ static int vgic_get_common_attr(struct kvm_device *dev,
VGIC_NR_PRIVATE_IRQS, uaddr);
break;
}
- case KVM_DEV_ARM_VGIC_GRP_MAINT_IRQ: {
- u32 __user *uaddr = (u32 __user *)(long)attr->addr;
-
- r = put_user(dev->kvm->arch.vgic.mi_intid, uaddr);
- break;
- }
}
return r;
@@ -510,6 +505,24 @@ int vgic_v3_parse_attr(struct kvm_device *dev, struct kvm_device_attr *attr,
}
/*
+ * Allow access to certain ID-like registers prior to VGIC initialization,
+ * thereby allowing the VMM to provision the features / sizing of the VGIC.
+ */
+static bool reg_allowed_pre_init(struct kvm_device_attr *attr)
+{
+ if (attr->group != KVM_DEV_ARM_VGIC_GRP_DIST_REGS)
+ return false;
+
+ switch (attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK) {
+ case GICD_IIDR:
+ case GICD_TYPER2:
+ return true;
+ default:
+ return false;
+ }
+}
+
+/*
* vgic_v3_attr_regs_access - allows user space to access VGIC v3 state
*
* @dev: kvm device handle
@@ -523,7 +536,7 @@ static int vgic_v3_attr_regs_access(struct kvm_device *dev,
struct vgic_reg_attr reg_attr;
gpa_t addr;
struct kvm_vcpu *vcpu;
- bool uaccess, post_init = true;
+ bool uaccess;
u32 val;
int ret;
@@ -539,9 +552,6 @@ static int vgic_v3_attr_regs_access(struct kvm_device *dev,
/* Sysregs uaccess is performed by the sysreg handling code */
uaccess = false;
break;
- case KVM_DEV_ARM_VGIC_GRP_MAINT_IRQ:
- post_init = false;
- fallthrough;
default:
uaccess = true;
}
@@ -561,7 +571,7 @@ static int vgic_v3_attr_regs_access(struct kvm_device *dev,
mutex_lock(&dev->kvm->arch.config_lock);
- if (post_init != vgic_initialized(dev->kvm)) {
+ if (!(vgic_initialized(dev->kvm) || reg_allowed_pre_init(attr))) {
ret = -EBUSY;
goto out;
}
@@ -591,19 +601,6 @@ static int vgic_v3_attr_regs_access(struct kvm_device *dev,
}
break;
}
- case KVM_DEV_ARM_VGIC_GRP_MAINT_IRQ:
- if (!is_write) {
- val = dev->kvm->arch.vgic.mi_intid;
- ret = 0;
- break;
- }
-
- ret = -EINVAL;
- if ((val < VGIC_NR_PRIVATE_IRQS) && (val >= VGIC_NR_SGIS)) {
- dev->kvm->arch.vgic.mi_intid = val;
- ret = 0;
- }
- break;
default:
ret = -EINVAL;
break;
@@ -630,8 +627,24 @@ static int vgic_v3_set_attr(struct kvm_device *dev,
case KVM_DEV_ARM_VGIC_GRP_REDIST_REGS:
case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS:
case KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO:
- case KVM_DEV_ARM_VGIC_GRP_MAINT_IRQ:
return vgic_v3_attr_regs_access(dev, attr, true);
+ case KVM_DEV_ARM_VGIC_GRP_MAINT_IRQ: {
+ u32 __user *uaddr = (u32 __user *)attr->addr;
+ u32 val;
+
+ if (get_user(val, uaddr))
+ return -EFAULT;
+
+ guard(mutex)(&dev->kvm->arch.config_lock);
+ if (vgic_initialized(dev->kvm))
+ return -EBUSY;
+
+ if (!irq_is_ppi(val))
+ return -EINVAL;
+
+ dev->kvm->arch.vgic.mi_intid = val;
+ return 0;
+ }
default:
return vgic_set_common_attr(dev, attr);
}
@@ -645,8 +658,13 @@ static int vgic_v3_get_attr(struct kvm_device *dev,
case KVM_DEV_ARM_VGIC_GRP_REDIST_REGS:
case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS:
case KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO:
- case KVM_DEV_ARM_VGIC_GRP_MAINT_IRQ:
return vgic_v3_attr_regs_access(dev, attr, false);
+ case KVM_DEV_ARM_VGIC_GRP_MAINT_IRQ: {
+ u32 __user *uaddr = (u32 __user *)(long)attr->addr;
+
+ guard(mutex)(&dev->kvm->arch.config_lock);
+ return put_user(dev->kvm->arch.vgic.mi_intid, uaddr);
+ }
default:
return vgic_get_common_attr(dev, attr);
}
diff --git a/arch/arm64/kvm/vgic/vgic-mmio-v3.c b/arch/arm64/kvm/vgic/vgic-mmio-v3.c
index ae4c0593d114..a3ef185209e9 100644
--- a/arch/arm64/kvm/vgic/vgic-mmio-v3.c
+++ b/arch/arm64/kvm/vgic/vgic-mmio-v3.c
@@ -50,8 +50,17 @@ bool vgic_has_its(struct kvm *kvm)
bool vgic_supports_direct_msis(struct kvm *kvm)
{
- return (kvm_vgic_global_state.has_gicv4_1 ||
- (kvm_vgic_global_state.has_gicv4 && vgic_has_its(kvm)));
+ return kvm_vgic_global_state.has_gicv4 && vgic_has_its(kvm);
+}
+
+bool system_supports_direct_sgis(void)
+{
+ return kvm_vgic_global_state.has_gicv4_1 && gic_cpuif_has_vsgi();
+}
+
+bool vgic_supports_direct_sgis(struct kvm *kvm)
+{
+ return kvm->arch.vgic.nassgicap;
}
/*
@@ -86,7 +95,7 @@ static unsigned long vgic_mmio_read_v3_misc(struct kvm_vcpu *vcpu,
}
break;
case GICD_TYPER2:
- if (kvm_vgic_global_state.has_gicv4_1 && gic_cpuif_has_vsgi())
+ if (vgic_supports_direct_sgis(vcpu->kvm))
value = GICD_TYPER2_nASSGIcap;
break;
case GICD_IIDR:
@@ -119,7 +128,7 @@ static void vgic_mmio_write_v3_misc(struct kvm_vcpu *vcpu,
dist->enabled = val & GICD_CTLR_ENABLE_SS_G1;
/* Not a GICv4.1? No HW SGIs */
- if (!kvm_vgic_global_state.has_gicv4_1 || !gic_cpuif_has_vsgi())
+ if (!vgic_supports_direct_sgis(vcpu->kvm))
val &= ~GICD_CTLR_nASSGIreq;
/* Dist stays enabled? nASSGIreq is RO */
@@ -133,7 +142,7 @@ static void vgic_mmio_write_v3_misc(struct kvm_vcpu *vcpu,
if (is_hwsgi != dist->nassgireq)
vgic_v4_configure_vsgis(vcpu->kvm);
- if (kvm_vgic_global_state.has_gicv4_1 &&
+ if (vgic_supports_direct_sgis(vcpu->kvm) &&
was_enabled != dist->enabled)
kvm_make_all_cpus_request(vcpu->kvm, KVM_REQ_RELOAD_GICv4);
else if (!was_enabled && dist->enabled)
@@ -159,8 +168,18 @@ static int vgic_mmio_uaccess_write_v3_misc(struct kvm_vcpu *vcpu,
switch (addr & 0x0c) {
case GICD_TYPER2:
- if (val != vgic_mmio_read_v3_misc(vcpu, addr, len))
+ reg = vgic_mmio_read_v3_misc(vcpu, addr, len);
+
+ if (reg == val)
+ return 0;
+ if (vgic_initialized(vcpu->kvm))
+ return -EBUSY;
+ if ((reg ^ val) & ~GICD_TYPER2_nASSGIcap)
return -EINVAL;
+ if (!system_supports_direct_sgis() && val)
+ return -EINVAL;
+
+ dist->nassgicap = val & GICD_TYPER2_nASSGIcap;
return 0;
case GICD_IIDR:
reg = vgic_mmio_read_v3_misc(vcpu, addr, len);
@@ -178,7 +197,7 @@ static int vgic_mmio_uaccess_write_v3_misc(struct kvm_vcpu *vcpu,
}
case GICD_CTLR:
/* Not a GICv4.1? No HW SGIs */
- if (!kvm_vgic_global_state.has_gicv4_1)
+ if (!vgic_supports_direct_sgis(vcpu->kvm))
val &= ~GICD_CTLR_nASSGIreq;
dist->enabled = val & GICD_CTLR_ENABLE_SS_G1;
diff --git a/arch/arm64/kvm/vgic/vgic-v3-nested.c b/arch/arm64/kvm/vgic/vgic-v3-nested.c
index 679aafe77de2..7f1259b49c50 100644
--- a/arch/arm64/kvm/vgic/vgic-v3-nested.c
+++ b/arch/arm64/kvm/vgic/vgic-v3-nested.c
@@ -116,7 +116,7 @@ bool vgic_state_is_nested(struct kvm_vcpu *vcpu)
{
u64 xmo;
- if (vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu)) {
+ if (is_nested_ctxt(vcpu)) {
xmo = __vcpu_sys_reg(vcpu, HCR_EL2) & (HCR_IMO | HCR_FMO);
WARN_ONCE(xmo && xmo != (HCR_IMO | HCR_FMO),
"Separate virtual IRQ/FIQ settings not supported\n");
diff --git a/arch/arm64/kvm/vgic/vgic-v4.c b/arch/arm64/kvm/vgic/vgic-v4.c
index 193946108192..4d9343d2b0b1 100644
--- a/arch/arm64/kvm/vgic/vgic-v4.c
+++ b/arch/arm64/kvm/vgic/vgic-v4.c
@@ -356,7 +356,7 @@ int vgic_v4_put(struct kvm_vcpu *vcpu)
{
struct its_vpe *vpe = &vcpu->arch.vgic_cpu.vgic_v3.its_vpe;
- if (!vgic_supports_direct_msis(vcpu->kvm) || !vpe->resident)
+ if (!vgic_supports_direct_irqs(vcpu->kvm) || !vpe->resident)
return 0;
return its_make_vpe_non_resident(vpe, vgic_v4_want_doorbell(vcpu));
@@ -367,7 +367,7 @@ int vgic_v4_load(struct kvm_vcpu *vcpu)
struct its_vpe *vpe = &vcpu->arch.vgic_cpu.vgic_v3.its_vpe;
int err;
- if (!vgic_supports_direct_msis(vcpu->kvm) || vpe->resident)
+ if (!vgic_supports_direct_irqs(vcpu->kvm) || vpe->resident)
return 0;
if (vcpu_get_flag(vcpu, IN_WFI))
@@ -527,28 +527,26 @@ static struct vgic_irq *__vgic_host_irq_get_vlpi(struct kvm *kvm, int host_irq)
return NULL;
}
-int kvm_vgic_v4_unset_forwarding(struct kvm *kvm, int host_irq)
+void kvm_vgic_v4_unset_forwarding(struct kvm *kvm, int host_irq)
{
struct vgic_irq *irq;
unsigned long flags;
- int ret = 0;
if (!vgic_supports_direct_msis(kvm))
- return 0;
+ return;
irq = __vgic_host_irq_get_vlpi(kvm, host_irq);
if (!irq)
- return 0;
+ return;
raw_spin_lock_irqsave(&irq->irq_lock, flags);
WARN_ON(irq->hw && irq->host_irq != host_irq);
if (irq->hw) {
atomic_dec(&irq->target_vcpu->arch.vgic_cpu.vgic_v3.its_vpe.vlpi_count);
irq->hw = false;
- ret = its_unmap_vlpi(host_irq);
+ its_unmap_vlpi(host_irq);
}
raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
vgic_put_irq(kvm, irq);
- return ret;
}
diff --git a/arch/arm64/kvm/vgic/vgic-v5.c b/arch/arm64/kvm/vgic/vgic-v5.c
new file mode 100644
index 000000000000..6bdbb221bcde
--- /dev/null
+++ b/arch/arm64/kvm/vgic/vgic-v5.c
@@ -0,0 +1,52 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <kvm/arm_vgic.h>
+#include <linux/irqchip/arm-vgic-info.h>
+
+#include "vgic.h"
+
+/*
+ * Probe for a vGICv5 compatible interrupt controller, returning 0 on success.
+ * Currently only supports GICv3-based VMs on a GICv5 host, and hence only
+ * registers a VGIC_V3 device.
+ */
+int vgic_v5_probe(const struct gic_kvm_info *info)
+{
+ u64 ich_vtr_el2;
+ int ret;
+
+ if (!info->has_gcie_v3_compat)
+ return -ENODEV;
+
+ kvm_vgic_global_state.type = VGIC_V5;
+ kvm_vgic_global_state.has_gcie_v3_compat = true;
+
+ /* We only support v3 compat mode - use vGICv3 limits */
+ kvm_vgic_global_state.max_gic_vcpus = VGIC_V3_MAX_CPUS;
+
+ kvm_vgic_global_state.vcpu_base = 0;
+ kvm_vgic_global_state.vctrl_base = NULL;
+ kvm_vgic_global_state.can_emulate_gicv2 = false;
+ kvm_vgic_global_state.has_gicv4 = false;
+ kvm_vgic_global_state.has_gicv4_1 = false;
+
+ ich_vtr_el2 = kvm_call_hyp_ret(__vgic_v3_get_gic_config);
+ kvm_vgic_global_state.ich_vtr_el2 = (u32)ich_vtr_el2;
+
+ /*
+ * The ListRegs field is 5 bits, but there is an architectural
+ * maximum of 16 list registers. Just ignore bit 4...
+ */
+ kvm_vgic_global_state.nr_lr = (ich_vtr_el2 & 0xf) + 1;
+
+ ret = kvm_register_vgic_device(KVM_DEV_TYPE_ARM_VGIC_V3);
+ if (ret) {
+ kvm_err("Cannot register GICv3-legacy KVM device.\n");
+ return ret;
+ }
+
+ static_branch_enable(&kvm_vgic_global_state.gicv3_cpuif);
+ kvm_info("GCIE legacy system register CPU interface\n");
+
+ return 0;
+}
diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c
index 8f8096d48925..f5148b38120a 100644
--- a/arch/arm64/kvm/vgic/vgic.c
+++ b/arch/arm64/kvm/vgic/vgic.c
@@ -951,7 +951,7 @@ void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
* can be directly injected (GICv4).
*/
if (list_empty(&vcpu->arch.vgic_cpu.ap_list_head) &&
- !vgic_supports_direct_msis(vcpu->kvm))
+ !vgic_supports_direct_irqs(vcpu->kvm))
return;
DEBUG_SPINLOCK_BUG_ON(!irqs_disabled());
@@ -965,7 +965,7 @@ void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
if (can_access_vgic_from_kernel())
vgic_restore_state(vcpu);
- if (vgic_supports_direct_msis(vcpu->kvm))
+ if (vgic_supports_direct_irqs(vcpu->kvm))
vgic_v4_commit(vcpu);
}
diff --git a/arch/arm64/kvm/vgic/vgic.h b/arch/arm64/kvm/vgic/vgic.h
index 4349084cb9a6..1384a04c0784 100644
--- a/arch/arm64/kvm/vgic/vgic.h
+++ b/arch/arm64/kvm/vgic/vgic.h
@@ -64,6 +64,24 @@
KVM_REG_ARM_VGIC_SYSREG_CRM_MASK | \
KVM_REG_ARM_VGIC_SYSREG_OP2_MASK)
+#define KVM_ICC_SRE_EL2 (ICC_SRE_EL2_ENABLE | ICC_SRE_EL2_SRE | \
+ ICC_SRE_EL1_DIB | ICC_SRE_EL1_DFB)
+#define KVM_ICH_VTR_EL2_RES0 (ICH_VTR_EL2_DVIM | \
+ ICH_VTR_EL2_A3V | \
+ ICH_VTR_EL2_IDbits)
+#define KVM_ICH_VTR_EL2_RES1 ICH_VTR_EL2_nV4
+
+static inline u64 kvm_get_guest_vtr_el2(void)
+{
+ u64 vtr;
+
+ vtr = kvm_vgic_global_state.ich_vtr_el2;
+ vtr &= ~KVM_ICH_VTR_EL2_RES0;
+ vtr |= KVM_ICH_VTR_EL2_RES1;
+
+ return vtr;
+}
+
/*
* As per Documentation/virt/kvm/devices/arm-vgic-its.rst,
* below macros are defined for ITS table entry encoding.
@@ -297,6 +315,7 @@ int vgic_v3_redist_uaccess(struct kvm_vcpu *vcpu, bool is_write,
int vgic_v3_cpu_sysregs_uaccess(struct kvm_vcpu *vcpu,
struct kvm_device_attr *attr, bool is_write);
int vgic_v3_has_cpu_sysregs_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr);
+const struct sys_reg_desc *vgic_v3_get_sysreg_table(unsigned int *sz);
int vgic_v3_line_level_info_uaccess(struct kvm_vcpu *vcpu, bool is_write,
u32 intid, u32 *val);
int kvm_register_vgic_device(unsigned long type);
@@ -308,6 +327,8 @@ int vgic_init(struct kvm *kvm);
void vgic_debug_init(struct kvm *kvm);
void vgic_debug_destroy(struct kvm *kvm);
+int vgic_v5_probe(const struct gic_kvm_info *info);
+
static inline int vgic_v3_max_apr_idx(struct kvm_vcpu *vcpu)
{
struct vgic_cpu *cpu_if = &vcpu->arch.vgic_cpu;
@@ -369,7 +390,23 @@ void vgic_its_invalidate_all_caches(struct kvm *kvm);
int vgic_its_inv_lpi(struct kvm *kvm, struct vgic_irq *irq);
int vgic_its_invall(struct kvm_vcpu *vcpu);
+bool system_supports_direct_sgis(void);
bool vgic_supports_direct_msis(struct kvm *kvm);
+bool vgic_supports_direct_sgis(struct kvm *kvm);
+
+static inline bool vgic_supports_direct_irqs(struct kvm *kvm)
+{
+ /*
+ * Deliberately conflate vLPI and vSGI support on GICv4.1 hardware,
+ * indirectly allowing userspace to control whether or not vPEs are
+ * allocated for the VM.
+ */
+ if (system_supports_direct_sgis())
+ return vgic_supports_direct_sgis(kvm);
+
+ return vgic_supports_direct_msis(kvm);
+}
+
int vgic_v4_init(struct kvm *kvm);
void vgic_v4_teardown(struct kvm *kvm);
void vgic_v4_configure_vsgis(struct kvm *kvm);
@@ -389,6 +426,17 @@ void vgic_v3_put_nested(struct kvm_vcpu *vcpu);
void vgic_v3_handle_nested_maint_irq(struct kvm_vcpu *vcpu);
void vgic_v3_nested_update_mi(struct kvm_vcpu *vcpu);
+static inline bool vgic_is_v3_compat(struct kvm *kvm)
+{
+ return cpus_have_final_cap(ARM64_HAS_GICV5_CPUIF) &&
+ kvm_vgic_global_state.has_gcie_v3_compat;
+}
+
+static inline bool vgic_is_v3(struct kvm *kvm)
+{
+ return kvm_vgic_global_state.type == VGIC_V3 || vgic_is_v3_compat(kvm);
+}
+
int vgic_its_debug_init(struct kvm_device *dev);
void vgic_its_debug_destroy(struct kvm_device *dev);
diff --git a/arch/arm64/net/bpf_jit.h b/arch/arm64/net/bpf_jit.h
index a3b0e693a125..bbea4f36f9f2 100644
--- a/arch/arm64/net/bpf_jit.h
+++ b/arch/arm64/net/bpf_jit.h
@@ -325,4 +325,9 @@
#define A64_MRS_SP_EL0(Rt) \
aarch64_insn_gen_mrs(Rt, AARCH64_INSN_SYSREG_SP_EL0)
+/* Barriers */
+#define A64_SB aarch64_insn_get_sb_value()
+#define A64_DSB_NSH (aarch64_insn_get_dsb_base_value() | 0x7 << 8)
+#define A64_ISB aarch64_insn_get_isb_value()
+
#endif /* _BPF_JIT_H */
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index da8b89dd2910..97dfd5432809 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -30,6 +30,7 @@
#define TMP_REG_2 (MAX_BPF_JIT_REG + 1)
#define TCCNT_PTR (MAX_BPF_JIT_REG + 2)
#define TMP_REG_3 (MAX_BPF_JIT_REG + 3)
+#define PRIVATE_SP (MAX_BPF_JIT_REG + 4)
#define ARENA_VM_START (MAX_BPF_JIT_REG + 5)
#define check_imm(bits, imm) do { \
@@ -68,6 +69,8 @@ static const int bpf2a64[] = {
[TCCNT_PTR] = A64_R(26),
/* temporary register for blinding constants */
[BPF_REG_AX] = A64_R(9),
+ /* callee saved register for private stack pointer */
+ [PRIVATE_SP] = A64_R(27),
/* callee saved register for kern_vm_start address */
[ARENA_VM_START] = A64_R(28),
};
@@ -86,6 +89,7 @@ struct jit_ctx {
u64 user_vm_start;
u64 arena_vm_start;
bool fp_used;
+ bool priv_sp_used;
bool write;
};
@@ -98,6 +102,10 @@ struct bpf_plt {
#define PLT_TARGET_SIZE sizeof_field(struct bpf_plt, target)
#define PLT_TARGET_OFFSET offsetof(struct bpf_plt, target)
+/* Memory size/value to protect private stack overflow/underflow */
+#define PRIV_STACK_GUARD_SZ 16
+#define PRIV_STACK_GUARD_VAL 0xEB9F12345678eb9fULL
+
static inline void emit(const u32 insn, struct jit_ctx *ctx)
{
if (ctx->image != NULL && ctx->write)
@@ -387,8 +395,11 @@ static void find_used_callee_regs(struct jit_ctx *ctx)
if (reg_used & 8)
ctx->used_callee_reg[i++] = bpf2a64[BPF_REG_9];
- if (reg_used & 16)
+ if (reg_used & 16) {
ctx->used_callee_reg[i++] = bpf2a64[BPF_REG_FP];
+ if (ctx->priv_sp_used)
+ ctx->used_callee_reg[i++] = bpf2a64[PRIVATE_SP];
+ }
if (ctx->arena_vm_start)
ctx->used_callee_reg[i++] = bpf2a64[ARENA_VM_START];
@@ -412,6 +423,7 @@ static void push_callee_regs(struct jit_ctx *ctx)
emit(A64_PUSH(A64_R(23), A64_R(24), A64_SP), ctx);
emit(A64_PUSH(A64_R(25), A64_R(26), A64_SP), ctx);
emit(A64_PUSH(A64_R(27), A64_R(28), A64_SP), ctx);
+ ctx->fp_used = true;
} else {
find_used_callee_regs(ctx);
for (i = 0; i + 1 < ctx->nr_used_callee_reg; i += 2) {
@@ -461,6 +473,19 @@ static void pop_callee_regs(struct jit_ctx *ctx)
}
}
+static void emit_percpu_ptr(const u8 dst_reg, void __percpu *ptr,
+ struct jit_ctx *ctx)
+{
+ const u8 tmp = bpf2a64[TMP_REG_1];
+
+ emit_a64_mov_i64(dst_reg, (__force const u64)ptr, ctx);
+ if (cpus_have_cap(ARM64_HAS_VIRT_HOST_EXTN))
+ emit(A64_MRS_TPIDR_EL2(tmp), ctx);
+ else
+ emit(A64_MRS_TPIDR_EL1(tmp), ctx);
+ emit(A64_ADD(1, dst_reg, dst_reg, tmp), ctx);
+}
+
#define BTI_INSNS (IS_ENABLED(CONFIG_ARM64_BTI_KERNEL) ? 1 : 0)
#define PAC_INSNS (IS_ENABLED(CONFIG_ARM64_PTR_AUTH_KERNEL) ? 1 : 0)
@@ -476,6 +501,8 @@ static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf)
const bool is_main_prog = !bpf_is_subprog(prog);
const u8 fp = bpf2a64[BPF_REG_FP];
const u8 arena_vm_base = bpf2a64[ARENA_VM_START];
+ const u8 priv_sp = bpf2a64[PRIVATE_SP];
+ void __percpu *priv_stack_ptr;
const int idx0 = ctx->idx;
int cur_offset;
@@ -551,15 +578,23 @@ static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf)
emit(A64_SUB_I(1, A64_SP, A64_FP, 96), ctx);
}
- if (ctx->fp_used)
- /* Set up BPF prog stack base register */
- emit(A64_MOV(1, fp, A64_SP), ctx);
-
/* Stack must be multiples of 16B */
ctx->stack_size = round_up(prog->aux->stack_depth, 16);
+ if (ctx->fp_used) {
+ if (ctx->priv_sp_used) {
+ /* Set up private stack pointer */
+ priv_stack_ptr = prog->aux->priv_stack_ptr + PRIV_STACK_GUARD_SZ;
+ emit_percpu_ptr(priv_sp, priv_stack_ptr, ctx);
+ emit(A64_ADD_I(1, fp, priv_sp, ctx->stack_size), ctx);
+ } else {
+ /* Set up BPF prog stack base register */
+ emit(A64_MOV(1, fp, A64_SP), ctx);
+ }
+ }
+
/* Set up function call stack */
- if (ctx->stack_size)
+ if (ctx->stack_size && !ctx->priv_sp_used)
emit(A64_SUB_I(1, A64_SP, A64_SP, ctx->stack_size), ctx);
if (ctx->arena_vm_start)
@@ -623,7 +658,7 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx)
emit(A64_STR64I(tcc, ptr, 0), ctx);
/* restore SP */
- if (ctx->stack_size)
+ if (ctx->stack_size && !ctx->priv_sp_used)
emit(A64_ADD_I(1, A64_SP, A64_SP, ctx->stack_size), ctx);
pop_callee_regs(ctx);
@@ -991,7 +1026,7 @@ static void build_epilogue(struct jit_ctx *ctx, bool was_classic)
const u8 ptr = bpf2a64[TCCNT_PTR];
/* We're done with BPF stack */
- if (ctx->stack_size)
+ if (ctx->stack_size && !ctx->priv_sp_used)
emit(A64_ADD_I(1, A64_SP, A64_SP, ctx->stack_size), ctx);
pop_callee_regs(ctx);
@@ -1120,6 +1155,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
const u8 tmp2 = bpf2a64[TMP_REG_2];
const u8 fp = bpf2a64[BPF_REG_FP];
const u8 arena_vm_base = bpf2a64[ARENA_VM_START];
+ const u8 priv_sp = bpf2a64[PRIVATE_SP];
const s16 off = insn->off;
const s32 imm = insn->imm;
const int i = insn - ctx->prog->insnsi;
@@ -1564,7 +1600,7 @@ emit_cond_jmp:
src = tmp2;
}
if (src == fp) {
- src_adj = A64_SP;
+ src_adj = ctx->priv_sp_used ? priv_sp : A64_SP;
off_adj = off + ctx->stack_size;
} else {
src_adj = src;
@@ -1630,17 +1666,14 @@ emit_cond_jmp:
return ret;
break;
- /* speculation barrier */
+ /* speculation barrier against v1 and v4 */
case BPF_ST | BPF_NOSPEC:
- /*
- * Nothing required here.
- *
- * In case of arm64, we rely on the firmware mitigation of
- * Speculative Store Bypass as controlled via the ssbd kernel
- * parameter. Whenever the mitigation is enabled, it works
- * for all of the kernel code with no need to provide any
- * additional instructions.
- */
+ if (alternative_has_cap_likely(ARM64_HAS_SB)) {
+ emit(A64_SB, ctx);
+ } else {
+ emit(A64_DSB_NSH, ctx);
+ emit(A64_ISB, ctx);
+ }
break;
/* ST: *(size *)(dst + off) = imm */
@@ -1657,7 +1690,7 @@ emit_cond_jmp:
dst = tmp2;
}
if (dst == fp) {
- dst_adj = A64_SP;
+ dst_adj = ctx->priv_sp_used ? priv_sp : A64_SP;
off_adj = off + ctx->stack_size;
} else {
dst_adj = dst;
@@ -1719,7 +1752,7 @@ emit_cond_jmp:
dst = tmp2;
}
if (dst == fp) {
- dst_adj = A64_SP;
+ dst_adj = ctx->priv_sp_used ? priv_sp : A64_SP;
off_adj = off + ctx->stack_size;
} else {
dst_adj = dst;
@@ -1862,6 +1895,39 @@ static inline void bpf_flush_icache(void *start, void *end)
flush_icache_range((unsigned long)start, (unsigned long)end);
}
+static void priv_stack_init_guard(void __percpu *priv_stack_ptr, int alloc_size)
+{
+ int cpu, underflow_idx = (alloc_size - PRIV_STACK_GUARD_SZ) >> 3;
+ u64 *stack_ptr;
+
+ for_each_possible_cpu(cpu) {
+ stack_ptr = per_cpu_ptr(priv_stack_ptr, cpu);
+ stack_ptr[0] = PRIV_STACK_GUARD_VAL;
+ stack_ptr[1] = PRIV_STACK_GUARD_VAL;
+ stack_ptr[underflow_idx] = PRIV_STACK_GUARD_VAL;
+ stack_ptr[underflow_idx + 1] = PRIV_STACK_GUARD_VAL;
+ }
+}
+
+static void priv_stack_check_guard(void __percpu *priv_stack_ptr, int alloc_size,
+ struct bpf_prog *prog)
+{
+ int cpu, underflow_idx = (alloc_size - PRIV_STACK_GUARD_SZ) >> 3;
+ u64 *stack_ptr;
+
+ for_each_possible_cpu(cpu) {
+ stack_ptr = per_cpu_ptr(priv_stack_ptr, cpu);
+ if (stack_ptr[0] != PRIV_STACK_GUARD_VAL ||
+ stack_ptr[1] != PRIV_STACK_GUARD_VAL ||
+ stack_ptr[underflow_idx] != PRIV_STACK_GUARD_VAL ||
+ stack_ptr[underflow_idx + 1] != PRIV_STACK_GUARD_VAL) {
+ pr_err("BPF private stack overflow/underflow detected for prog %sx\n",
+ bpf_jit_get_prog_name(prog));
+ break;
+ }
+ }
+}
+
struct arm64_jit_data {
struct bpf_binary_header *header;
u8 *ro_image;
@@ -1874,9 +1940,11 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
int image_size, prog_size, extable_size, extable_align, extable_offset;
struct bpf_prog *tmp, *orig_prog = prog;
struct bpf_binary_header *header;
- struct bpf_binary_header *ro_header;
+ struct bpf_binary_header *ro_header = NULL;
struct arm64_jit_data *jit_data;
+ void __percpu *priv_stack_ptr = NULL;
bool was_classic = bpf_prog_was_classic(prog);
+ int priv_stack_alloc_sz;
bool tmp_blinded = false;
bool extra_pass = false;
struct jit_ctx ctx;
@@ -1908,6 +1976,23 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
}
prog->aux->jit_data = jit_data;
}
+ priv_stack_ptr = prog->aux->priv_stack_ptr;
+ if (!priv_stack_ptr && prog->aux->jits_use_priv_stack) {
+ /* Allocate actual private stack size with verifier-calculated
+ * stack size plus two memory guards to protect overflow and
+ * underflow.
+ */
+ priv_stack_alloc_sz = round_up(prog->aux->stack_depth, 16) +
+ 2 * PRIV_STACK_GUARD_SZ;
+ priv_stack_ptr = __alloc_percpu_gfp(priv_stack_alloc_sz, 16, GFP_KERNEL);
+ if (!priv_stack_ptr) {
+ prog = orig_prog;
+ goto out_priv_stack;
+ }
+
+ priv_stack_init_guard(priv_stack_ptr, priv_stack_alloc_sz);
+ prog->aux->priv_stack_ptr = priv_stack_ptr;
+ }
if (jit_data->ctx.offset) {
ctx = jit_data->ctx;
ro_image_ptr = jit_data->ro_image;
@@ -1931,6 +2016,9 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
ctx.user_vm_start = bpf_arena_get_user_vm_start(prog->aux->arena);
ctx.arena_vm_start = bpf_arena_get_kern_vm_start(prog->aux->arena);
+ if (priv_stack_ptr)
+ ctx.priv_sp_used = true;
+
/* Pass 1: Estimate the maximum image size.
*
* BPF line info needs ctx->offset[i] to be the offset of
@@ -2070,7 +2158,12 @@ skip_init_ctx:
ctx.offset[i] *= AARCH64_INSN_SIZE;
bpf_prog_fill_jited_linfo(prog, ctx.offset + 1);
out_off:
+ if (!ro_header && priv_stack_ptr) {
+ free_percpu(priv_stack_ptr);
+ prog->aux->priv_stack_ptr = NULL;
+ }
kvfree(ctx.offset);
+out_priv_stack:
kfree(jit_data);
prog->aux->jit_data = NULL;
}
@@ -2089,6 +2182,11 @@ out_free_hdr:
goto out_off;
}
+bool bpf_jit_supports_private_stack(void)
+{
+ return true;
+}
+
bool bpf_jit_supports_kfunc_call(void)
{
return true;
@@ -2243,11 +2341,6 @@ static int calc_arg_aux(const struct btf_func_model *m,
/* the rest arguments are passed through stack */
for (; i < m->nr_args; i++) {
- /* We can not know for sure about exact alignment needs for
- * struct passed on stack, so deny those
- */
- if (m->arg_flags[i] & BTF_FMODEL_STRUCT_ARG)
- return -ENOTSUPP;
stack_slots = (m->arg_size[i] + 7) / 8;
a->bstack_for_args += stack_slots * 8;
a->ostack_for_args = a->ostack_for_args + stack_slots * 8;
@@ -2911,6 +3004,17 @@ bool bpf_jit_supports_percpu_insn(void)
return true;
}
+bool bpf_jit_bypass_spec_v4(void)
+{
+ /* In case of arm64, we rely on the firmware mitigation of Speculative
+ * Store Bypass as controlled via the ssbd kernel parameter. Whenever
+ * the mitigation is enabled, it works for all of the kernel code with
+ * no need to provide any additional instructions. Therefore, skip
+ * inserting nospec insns against Spectre v4.
+ */
+ return true;
+}
+
bool bpf_jit_inlines_helper_call(s32 imm)
{
switch (imm) {
@@ -2928,6 +3032,8 @@ void bpf_jit_free(struct bpf_prog *prog)
if (prog->jited) {
struct arm64_jit_data *jit_data = prog->aux->jit_data;
struct bpf_binary_header *hdr;
+ void __percpu *priv_stack_ptr;
+ int priv_stack_alloc_sz;
/*
* If we fail the final pass of JIT (from jit_subprogs),
@@ -2941,6 +3047,13 @@ void bpf_jit_free(struct bpf_prog *prog)
}
hdr = bpf_jit_binary_pack_hdr(prog);
bpf_jit_binary_pack_free(hdr, NULL);
+ priv_stack_ptr = prog->aux->priv_stack_ptr;
+ if (priv_stack_ptr) {
+ priv_stack_alloc_sz = round_up(prog->aux->stack_depth, 16) +
+ 2 * PRIV_STACK_GUARD_SZ;
+ priv_stack_check_guard(priv_stack_ptr, priv_stack_alloc_sz, prog);
+ free_percpu(prog->aux->priv_stack_ptr);
+ }
WARN_ON_ONCE(!bpf_prog_kallsyms_verify_off(prog));
}
diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps
index 066b0ee8b538..ef0b7946f5a4 100644
--- a/arch/arm64/tools/cpucaps
+++ b/arch/arm64/tools/cpucaps
@@ -35,7 +35,8 @@ HAS_GENERIC_AUTH
HAS_GENERIC_AUTH_ARCH_QARMA3
HAS_GENERIC_AUTH_ARCH_QARMA5
HAS_GENERIC_AUTH_IMP_DEF
-HAS_GIC_CPUIF_SYSREGS
+HAS_GICV3_CPUIF
+HAS_GICV5_CPUIF
HAS_GIC_PRIO_MASKING
HAS_GIC_PRIO_RELAXED_SYNC
HAS_HCR_NV1
@@ -50,6 +51,7 @@ HAS_PAN
HAS_PMUV3
HAS_S1PIE
HAS_S1POE
+HAS_SCTLR2
HAS_RAS_EXTN
HAS_RNG
HAS_SB
diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg
index 81c4e214ba33..696ab1f32a67 100644
--- a/arch/arm64/tools/sysreg
+++ b/arch/arm64/tools/sysreg
@@ -1314,7 +1314,10 @@ UnsignedEnum 19:16 UINJ
0b0000 NI
0b0001 IMP
EndEnum
-Res0 15:12
+UnsignedEnum 15:12 GCIE
+ 0b0000 NI
+ 0b0001 IMP
+EndEnum
UnsignedEnum 11:8 MTEFAR
0b0000 NI
0b0001 IMP
@@ -3153,6 +3156,435 @@ Sysreg PMIAR_EL1 3 0 9 14 7
Field 63:0 ADDRESS
EndSysreg
+SysregFields ICC_PPI_HMRx_EL1
+Field 63 HM63
+Field 62 HM62
+Field 61 HM61
+Field 60 HM60
+Field 59 HM59
+Field 58 HM58
+Field 57 HM57
+Field 56 HM56
+Field 55 HM55
+Field 54 HM54
+Field 53 HM53
+Field 52 HM52
+Field 51 HM51
+Field 50 HM50
+Field 49 HM49
+Field 48 HM48
+Field 47 HM47
+Field 46 HM46
+Field 45 HM45
+Field 44 HM44
+Field 43 HM43
+Field 42 HM42
+Field 41 HM41
+Field 40 HM40
+Field 39 HM39
+Field 38 HM38
+Field 37 HM37
+Field 36 HM36
+Field 35 HM35
+Field 34 HM34
+Field 33 HM33
+Field 32 HM32
+Field 31 HM31
+Field 30 HM30
+Field 29 HM29
+Field 28 HM28
+Field 27 HM27
+Field 26 HM26
+Field 25 HM25
+Field 24 HM24
+Field 23 HM23
+Field 22 HM22
+Field 21 HM21
+Field 20 HM20
+Field 19 HM19
+Field 18 HM18
+Field 17 HM17
+Field 16 HM16
+Field 15 HM15
+Field 14 HM14
+Field 13 HM13
+Field 12 HM12
+Field 11 HM11
+Field 10 HM10
+Field 9 HM9
+Field 8 HM8
+Field 7 HM7
+Field 6 HM6
+Field 5 HM5
+Field 4 HM4
+Field 3 HM3
+Field 2 HM2
+Field 1 HM1
+Field 0 HM0
+EndSysregFields
+
+Sysreg ICC_PPI_HMR0_EL1 3 0 12 10 0
+Fields ICC_PPI_HMRx_EL1
+EndSysreg
+
+Sysreg ICC_PPI_HMR1_EL1 3 0 12 10 1
+Fields ICC_PPI_HMRx_EL1
+EndSysreg
+
+Sysreg ICC_IDR0_EL1 3 0 12 10 2
+Res0 63:12
+UnsignedEnum 11:8 GCIE_LEGACY
+ 0b0000 NI
+ 0b0001 IMP
+EndEnum
+UnsignedEnum 7:4 PRI_BITS
+ 0b0011 4BITS
+ 0b0100 5BITS
+EndEnum
+UnsignedEnum 3:0 ID_BITS
+ 0b0000 16BITS
+ 0b0001 24BITS
+EndEnum
+EndSysreg
+
+Sysreg ICC_ICSR_EL1 3 0 12 10 4
+Res0 63:48
+Field 47:32 IAFFID
+Res0 31:16
+Field 15:11 Priority
+Res0 10:6
+Field 5 HM
+Field 4 Active
+Field 3 IRM
+Field 2 Pending
+Field 1 Enabled
+Field 0 F
+EndSysreg
+
+SysregFields ICC_PPI_ENABLERx_EL1
+Field 63 EN63
+Field 62 EN62
+Field 61 EN61
+Field 60 EN60
+Field 59 EN59
+Field 58 EN58
+Field 57 EN57
+Field 56 EN56
+Field 55 EN55
+Field 54 EN54
+Field 53 EN53
+Field 52 EN52
+Field 51 EN51
+Field 50 EN50
+Field 49 EN49
+Field 48 EN48
+Field 47 EN47
+Field 46 EN46
+Field 45 EN45
+Field 44 EN44
+Field 43 EN43
+Field 42 EN42
+Field 41 EN41
+Field 40 EN40
+Field 39 EN39
+Field 38 EN38
+Field 37 EN37
+Field 36 EN36
+Field 35 EN35
+Field 34 EN34
+Field 33 EN33
+Field 32 EN32
+Field 31 EN31
+Field 30 EN30
+Field 29 EN29
+Field 28 EN28
+Field 27 EN27
+Field 26 EN26
+Field 25 EN25
+Field 24 EN24
+Field 23 EN23
+Field 22 EN22
+Field 21 EN21
+Field 20 EN20
+Field 19 EN19
+Field 18 EN18
+Field 17 EN17
+Field 16 EN16
+Field 15 EN15
+Field 14 EN14
+Field 13 EN13
+Field 12 EN12
+Field 11 EN11
+Field 10 EN10
+Field 9 EN9
+Field 8 EN8
+Field 7 EN7
+Field 6 EN6
+Field 5 EN5
+Field 4 EN4
+Field 3 EN3
+Field 2 EN2
+Field 1 EN1
+Field 0 EN0
+EndSysregFields
+
+Sysreg ICC_PPI_ENABLER0_EL1 3 0 12 10 6
+Fields ICC_PPI_ENABLERx_EL1
+EndSysreg
+
+Sysreg ICC_PPI_ENABLER1_EL1 3 0 12 10 7
+Fields ICC_PPI_ENABLERx_EL1
+EndSysreg
+
+SysregFields ICC_PPI_ACTIVERx_EL1
+Field 63 Active63
+Field 62 Active62
+Field 61 Active61
+Field 60 Active60
+Field 59 Active59
+Field 58 Active58
+Field 57 Active57
+Field 56 Active56
+Field 55 Active55
+Field 54 Active54
+Field 53 Active53
+Field 52 Active52
+Field 51 Active51
+Field 50 Active50
+Field 49 Active49
+Field 48 Active48
+Field 47 Active47
+Field 46 Active46
+Field 45 Active45
+Field 44 Active44
+Field 43 Active43
+Field 42 Active42
+Field 41 Active41
+Field 40 Active40
+Field 39 Active39
+Field 38 Active38
+Field 37 Active37
+Field 36 Active36
+Field 35 Active35
+Field 34 Active34
+Field 33 Active33
+Field 32 Active32
+Field 31 Active31
+Field 30 Active30
+Field 29 Active29
+Field 28 Active28
+Field 27 Active27
+Field 26 Active26
+Field 25 Active25
+Field 24 Active24
+Field 23 Active23
+Field 22 Active22
+Field 21 Active21
+Field 20 Active20
+Field 19 Active19
+Field 18 Active18
+Field 17 Active17
+Field 16 Active16
+Field 15 Active15
+Field 14 Active14
+Field 13 Active13
+Field 12 Active12
+Field 11 Active11
+Field 10 Active10
+Field 9 Active9
+Field 8 Active8
+Field 7 Active7
+Field 6 Active6
+Field 5 Active5
+Field 4 Active4
+Field 3 Active3
+Field 2 Active2
+Field 1 Active1
+Field 0 Active0
+EndSysregFields
+
+Sysreg ICC_PPI_CACTIVER0_EL1 3 0 12 13 0
+Fields ICC_PPI_ACTIVERx_EL1
+EndSysreg
+
+Sysreg ICC_PPI_CACTIVER1_EL1 3 0 12 13 1
+Fields ICC_PPI_ACTIVERx_EL1
+EndSysreg
+
+Sysreg ICC_PPI_SACTIVER0_EL1 3 0 12 13 2
+Fields ICC_PPI_ACTIVERx_EL1
+EndSysreg
+
+Sysreg ICC_PPI_SACTIVER1_EL1 3 0 12 13 3
+Fields ICC_PPI_ACTIVERx_EL1
+EndSysreg
+
+SysregFields ICC_PPI_PENDRx_EL1
+Field 63 Pend63
+Field 62 Pend62
+Field 61 Pend61
+Field 60 Pend60
+Field 59 Pend59
+Field 58 Pend58
+Field 57 Pend57
+Field 56 Pend56
+Field 55 Pend55
+Field 54 Pend54
+Field 53 Pend53
+Field 52 Pend52
+Field 51 Pend51
+Field 50 Pend50
+Field 49 Pend49
+Field 48 Pend48
+Field 47 Pend47
+Field 46 Pend46
+Field 45 Pend45
+Field 44 Pend44
+Field 43 Pend43
+Field 42 Pend42
+Field 41 Pend41
+Field 40 Pend40
+Field 39 Pend39
+Field 38 Pend38
+Field 37 Pend37
+Field 36 Pend36
+Field 35 Pend35
+Field 34 Pend34
+Field 33 Pend33
+Field 32 Pend32
+Field 31 Pend31
+Field 30 Pend30
+Field 29 Pend29
+Field 28 Pend28
+Field 27 Pend27
+Field 26 Pend26
+Field 25 Pend25
+Field 24 Pend24
+Field 23 Pend23
+Field 22 Pend22
+Field 21 Pend21
+Field 20 Pend20
+Field 19 Pend19
+Field 18 Pend18
+Field 17 Pend17
+Field 16 Pend16
+Field 15 Pend15
+Field 14 Pend14
+Field 13 Pend13
+Field 12 Pend12
+Field 11 Pend11
+Field 10 Pend10
+Field 9 Pend9
+Field 8 Pend8
+Field 7 Pend7
+Field 6 Pend6
+Field 5 Pend5
+Field 4 Pend4
+Field 3 Pend3
+Field 2 Pend2
+Field 1 Pend1
+Field 0 Pend0
+EndSysregFields
+
+Sysreg ICC_PPI_CPENDR0_EL1 3 0 12 13 4
+Fields ICC_PPI_PENDRx_EL1
+EndSysreg
+
+Sysreg ICC_PPI_CPENDR1_EL1 3 0 12 13 5
+Fields ICC_PPI_PENDRx_EL1
+EndSysreg
+
+Sysreg ICC_PPI_SPENDR0_EL1 3 0 12 13 6
+Fields ICC_PPI_PENDRx_EL1
+EndSysreg
+
+Sysreg ICC_PPI_SPENDR1_EL1 3 0 12 13 7
+Fields ICC_PPI_PENDRx_EL1
+EndSysreg
+
+SysregFields ICC_PPI_PRIORITYRx_EL1
+Res0 63:61
+Field 60:56 Priority7
+Res0 55:53
+Field 52:48 Priority6
+Res0 47:45
+Field 44:40 Priority5
+Res0 39:37
+Field 36:32 Priority4
+Res0 31:29
+Field 28:24 Priority3
+Res0 23:21
+Field 20:16 Priority2
+Res0 15:13
+Field 12:8 Priority1
+Res0 7:5
+Field 4:0 Priority0
+EndSysregFields
+
+Sysreg ICC_PPI_PRIORITYR0_EL1 3 0 12 14 0
+Fields ICC_PPI_PRIORITYRx_EL1
+EndSysreg
+
+Sysreg ICC_PPI_PRIORITYR1_EL1 3 0 12 14 1
+Fields ICC_PPI_PRIORITYRx_EL1
+EndSysreg
+
+Sysreg ICC_PPI_PRIORITYR2_EL1 3 0 12 14 2
+Fields ICC_PPI_PRIORITYRx_EL1
+EndSysreg
+
+Sysreg ICC_PPI_PRIORITYR3_EL1 3 0 12 14 3
+Fields ICC_PPI_PRIORITYRx_EL1
+EndSysreg
+
+Sysreg ICC_PPI_PRIORITYR4_EL1 3 0 12 14 4
+Fields ICC_PPI_PRIORITYRx_EL1
+EndSysreg
+
+Sysreg ICC_PPI_PRIORITYR5_EL1 3 0 12 14 5
+Fields ICC_PPI_PRIORITYRx_EL1
+EndSysreg
+
+Sysreg ICC_PPI_PRIORITYR6_EL1 3 0 12 14 6
+Fields ICC_PPI_PRIORITYRx_EL1
+EndSysreg
+
+Sysreg ICC_PPI_PRIORITYR7_EL1 3 0 12 14 7
+Fields ICC_PPI_PRIORITYRx_EL1
+EndSysreg
+
+Sysreg ICC_PPI_PRIORITYR8_EL1 3 0 12 15 0
+Fields ICC_PPI_PRIORITYRx_EL1
+EndSysreg
+
+Sysreg ICC_PPI_PRIORITYR9_EL1 3 0 12 15 1
+Fields ICC_PPI_PRIORITYRx_EL1
+EndSysreg
+
+Sysreg ICC_PPI_PRIORITYR10_EL1 3 0 12 15 2
+Fields ICC_PPI_PRIORITYRx_EL1
+EndSysreg
+
+Sysreg ICC_PPI_PRIORITYR11_EL1 3 0 12 15 3
+Fields ICC_PPI_PRIORITYRx_EL1
+EndSysreg
+
+Sysreg ICC_PPI_PRIORITYR12_EL1 3 0 12 15 4
+Fields ICC_PPI_PRIORITYRx_EL1
+EndSysreg
+
+Sysreg ICC_PPI_PRIORITYR13_EL1 3 0 12 15 5
+Fields ICC_PPI_PRIORITYRx_EL1
+EndSysreg
+
+Sysreg ICC_PPI_PRIORITYR14_EL1 3 0 12 15 6
+Fields ICC_PPI_PRIORITYRx_EL1
+EndSysreg
+
+Sysreg ICC_PPI_PRIORITYR15_EL1 3 0 12 15 7
+Fields ICC_PPI_PRIORITYRx_EL1
+EndSysreg
+
Sysreg PMSELR_EL0 3 3 9 12 5
Res0 63:5
Field 4:0 SEL
@@ -3235,6 +3667,19 @@ Res0 14:12
Field 11:0 AFFINITY
EndSysreg
+Sysreg ICC_CR0_EL1 3 1 12 0 1
+Res0 63:39
+Field 38 PID
+Field 37:32 IPPT
+Res0 31:1
+Field 0 EN
+EndSysreg
+
+Sysreg ICC_PCR_EL1 3 1 12 0 2
+Res0 63:5
+Field 4:0 PRIORITY
+EndSysreg
+
Sysreg CSSELR_EL1 3 2 0 0 0
Res0 63:5
Field 4 TnD
@@ -4121,6 +4566,54 @@ Field 31:16 PhyPARTID29
Field 15:0 PhyPARTID28
EndSysreg
+Sysreg ICH_HFGRTR_EL2 3 4 12 9 4
+Res0 63:21
+Field 20 ICC_PPI_ACTIVERn_EL1
+Field 19 ICC_PPI_PRIORITYRn_EL1
+Field 18 ICC_PPI_PENDRn_EL1
+Field 17 ICC_PPI_ENABLERn_EL1
+Field 16 ICC_PPI_HMRn_EL1
+Res0 15:8
+Field 7 ICC_IAFFIDR_EL1
+Field 6 ICC_ICSR_EL1
+Field 5 ICC_PCR_EL1
+Field 4 ICC_HPPIR_EL1
+Field 3 ICC_HAPR_EL1
+Field 2 ICC_CR0_EL1
+Field 1 ICC_IDRn_EL1
+Field 0 ICC_APR_EL1
+EndSysreg
+
+Sysreg ICH_HFGWTR_EL2 3 4 12 9 6
+Res0 63:21
+Field 20 ICC_PPI_ACTIVERn_EL1
+Field 19 ICC_PPI_PRIORITYRn_EL1
+Field 18 ICC_PPI_PENDRn_EL1
+Field 17 ICC_PPI_ENABLERn_EL1
+Res0 16:7
+Field 6 ICC_ICSR_EL1
+Field 5 ICC_PCR_EL1
+Res0 4:3
+Field 2 ICC_CR0_EL1
+Res0 1
+Field 0 ICC_APR_EL1
+EndSysreg
+
+Sysreg ICH_HFGITR_EL2 3 4 12 9 7
+Res0 63:11
+Field 10 GICRCDNMIA
+Field 9 GICRCDIA
+Field 8 GICCDDI
+Field 7 GICCDEOI
+Field 6 GICCDHM
+Field 5 GICCDRCFG
+Field 4 GICCDPEND
+Field 3 GICCDAFF
+Field 2 GICCDPRI
+Field 1 GICCDDIS
+Field 0 GICCDEN
+EndSysreg
+
Sysreg ICH_HCR_EL2 3 4 12 11 0
Res0 63:32
Field 31:27 EOIcount
@@ -4169,6 +4662,12 @@ Field 1 U
Field 0 EOI
EndSysreg
+Sysreg ICH_VCTLR_EL2 3 4 12 11 4
+Res0 63:2
+Field 1 V3
+Field 0 En
+EndSysreg
+
Sysreg CONTEXTIDR_EL2 3 4 13 0 1
Fields CONTEXTIDR_ELx
EndSysreg
@@ -4282,7 +4781,13 @@ Mapping TCR_EL1
EndSysreg
Sysreg TCR2_EL1 3 0 2 0 3
-Res0 63:16
+Res0 63:22
+Field 21 FNGNA1
+Field 20 FNGNA0
+Res0 19
+Field 18 FNG1
+Field 17 FNG0
+Field 16 A2
Field 15 DisCH1
Field 14 DisCH0
Res0 13:12
@@ -4306,7 +4811,10 @@ Mapping TCR2_EL1
EndSysreg
Sysreg TCR2_EL2 3 4 2 0 3
-Res0 63:16
+Res0 63:19
+Field 18 FNG1
+Field 17 FNG0
+Field 16 A2
Field 15 DisCH1
Field 14 DisCH0
Field 13 AMEC1