summaryrefslogtreecommitdiff
path: root/arch/arm64
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arm64')
-rw-r--r--arch/arm64/include/asm/kvm_host.h111
-rw-r--r--arch/arm64/include/asm/kvm_mmu.h1
-rw-r--r--arch/arm64/include/asm/kvm_pgtable.h30
-rw-r--r--arch/arm64/include/asm/kvm_pkvm.h4
-rw-r--r--arch/arm64/include/asm/kvm_ras.h25
-rw-r--r--arch/arm64/include/asm/sysreg.h3
-rw-r--r--arch/arm64/kernel/cpufeature.c24
-rw-r--r--arch/arm64/kvm/arm.c8
-rw-r--r--arch/arm64/kvm/at.c6
-rw-r--r--arch/arm64/kvm/emulate-nested.c2
-rw-r--r--arch/arm64/kvm/hyp/exception.c20
-rw-r--r--arch/arm64/kvm/hyp/nvhe/list_debug.c2
-rw-r--r--arch/arm64/kvm/hyp/nvhe/sys_regs.c5
-rw-r--r--arch/arm64/kvm/hyp/pgtable.c25
-rw-r--r--arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c2
-rw-r--r--arch/arm64/kvm/hyp/vhe/switch.c5
-rw-r--r--arch/arm64/kvm/mmu.c65
-rw-r--r--arch/arm64/kvm/nested.c5
-rw-r--r--arch/arm64/kvm/pkvm.c11
-rw-r--r--arch/arm64/kvm/sys_regs.c419
-rw-r--r--arch/arm64/kvm/vgic/vgic-mmio-v3.c8
-rw-r--r--arch/arm64/kvm/vgic/vgic-mmio.c2
-rw-r--r--arch/arm64/kvm/vgic/vgic.h10
-rw-r--r--arch/arm64/tools/cpucaps1
24 files changed, 468 insertions, 326 deletions
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 2f2394cce24e..2b07f0a27a7d 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -1160,115 +1160,8 @@ u64 kvm_vcpu_apply_reg_masks(const struct kvm_vcpu *, enum vcpu_sysreg, u64);
__v; \
})
-u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg);
-void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg);
-
-static inline bool __vcpu_read_sys_reg_from_cpu(int reg, u64 *val)
-{
- /*
- * *** VHE ONLY ***
- *
- * System registers listed in the switch are not saved on every
- * exit from the guest but are only saved on vcpu_put.
- *
- * SYSREGS_ON_CPU *MUST* be checked before using this helper.
- *
- * Note that MPIDR_EL1 for the guest is set by KVM via VMPIDR_EL2 but
- * should never be listed below, because the guest cannot modify its
- * own MPIDR_EL1 and MPIDR_EL1 is accessed for VCPU A from VCPU B's
- * thread when emulating cross-VCPU communication.
- */
- if (!has_vhe())
- return false;
-
- switch (reg) {
- case SCTLR_EL1: *val = read_sysreg_s(SYS_SCTLR_EL12); break;
- case CPACR_EL1: *val = read_sysreg_s(SYS_CPACR_EL12); break;
- case TTBR0_EL1: *val = read_sysreg_s(SYS_TTBR0_EL12); break;
- case TTBR1_EL1: *val = read_sysreg_s(SYS_TTBR1_EL12); break;
- case TCR_EL1: *val = read_sysreg_s(SYS_TCR_EL12); break;
- case TCR2_EL1: *val = read_sysreg_s(SYS_TCR2_EL12); break;
- case PIR_EL1: *val = read_sysreg_s(SYS_PIR_EL12); break;
- case PIRE0_EL1: *val = read_sysreg_s(SYS_PIRE0_EL12); break;
- case POR_EL1: *val = read_sysreg_s(SYS_POR_EL12); break;
- case ESR_EL1: *val = read_sysreg_s(SYS_ESR_EL12); break;
- case AFSR0_EL1: *val = read_sysreg_s(SYS_AFSR0_EL12); break;
- case AFSR1_EL1: *val = read_sysreg_s(SYS_AFSR1_EL12); break;
- case FAR_EL1: *val = read_sysreg_s(SYS_FAR_EL12); break;
- case MAIR_EL1: *val = read_sysreg_s(SYS_MAIR_EL12); break;
- case VBAR_EL1: *val = read_sysreg_s(SYS_VBAR_EL12); break;
- case CONTEXTIDR_EL1: *val = read_sysreg_s(SYS_CONTEXTIDR_EL12);break;
- case TPIDR_EL0: *val = read_sysreg_s(SYS_TPIDR_EL0); break;
- case TPIDRRO_EL0: *val = read_sysreg_s(SYS_TPIDRRO_EL0); break;
- case TPIDR_EL1: *val = read_sysreg_s(SYS_TPIDR_EL1); break;
- case AMAIR_EL1: *val = read_sysreg_s(SYS_AMAIR_EL12); break;
- case CNTKCTL_EL1: *val = read_sysreg_s(SYS_CNTKCTL_EL12); break;
- case ELR_EL1: *val = read_sysreg_s(SYS_ELR_EL12); break;
- case SPSR_EL1: *val = read_sysreg_s(SYS_SPSR_EL12); break;
- case PAR_EL1: *val = read_sysreg_par(); break;
- case DACR32_EL2: *val = read_sysreg_s(SYS_DACR32_EL2); break;
- case IFSR32_EL2: *val = read_sysreg_s(SYS_IFSR32_EL2); break;
- case DBGVCR32_EL2: *val = read_sysreg_s(SYS_DBGVCR32_EL2); break;
- case ZCR_EL1: *val = read_sysreg_s(SYS_ZCR_EL12); break;
- case SCTLR2_EL1: *val = read_sysreg_s(SYS_SCTLR2_EL12); break;
- default: return false;
- }
-
- return true;
-}
-
-static inline bool __vcpu_write_sys_reg_to_cpu(u64 val, int reg)
-{
- /*
- * *** VHE ONLY ***
- *
- * System registers listed in the switch are not restored on every
- * entry to the guest but are only restored on vcpu_load.
- *
- * SYSREGS_ON_CPU *MUST* be checked before using this helper.
- *
- * Note that MPIDR_EL1 for the guest is set by KVM via VMPIDR_EL2 but
- * should never be listed below, because the MPIDR should only be set
- * once, before running the VCPU, and never changed later.
- */
- if (!has_vhe())
- return false;
-
- switch (reg) {
- case SCTLR_EL1: write_sysreg_s(val, SYS_SCTLR_EL12); break;
- case CPACR_EL1: write_sysreg_s(val, SYS_CPACR_EL12); break;
- case TTBR0_EL1: write_sysreg_s(val, SYS_TTBR0_EL12); break;
- case TTBR1_EL1: write_sysreg_s(val, SYS_TTBR1_EL12); break;
- case TCR_EL1: write_sysreg_s(val, SYS_TCR_EL12); break;
- case TCR2_EL1: write_sysreg_s(val, SYS_TCR2_EL12); break;
- case PIR_EL1: write_sysreg_s(val, SYS_PIR_EL12); break;
- case PIRE0_EL1: write_sysreg_s(val, SYS_PIRE0_EL12); break;
- case POR_EL1: write_sysreg_s(val, SYS_POR_EL12); break;
- case ESR_EL1: write_sysreg_s(val, SYS_ESR_EL12); break;
- case AFSR0_EL1: write_sysreg_s(val, SYS_AFSR0_EL12); break;
- case AFSR1_EL1: write_sysreg_s(val, SYS_AFSR1_EL12); break;
- case FAR_EL1: write_sysreg_s(val, SYS_FAR_EL12); break;
- case MAIR_EL1: write_sysreg_s(val, SYS_MAIR_EL12); break;
- case VBAR_EL1: write_sysreg_s(val, SYS_VBAR_EL12); break;
- case CONTEXTIDR_EL1: write_sysreg_s(val, SYS_CONTEXTIDR_EL12);break;
- case TPIDR_EL0: write_sysreg_s(val, SYS_TPIDR_EL0); break;
- case TPIDRRO_EL0: write_sysreg_s(val, SYS_TPIDRRO_EL0); break;
- case TPIDR_EL1: write_sysreg_s(val, SYS_TPIDR_EL1); break;
- case AMAIR_EL1: write_sysreg_s(val, SYS_AMAIR_EL12); break;
- case CNTKCTL_EL1: write_sysreg_s(val, SYS_CNTKCTL_EL12); break;
- case ELR_EL1: write_sysreg_s(val, SYS_ELR_EL12); break;
- case SPSR_EL1: write_sysreg_s(val, SYS_SPSR_EL12); break;
- case PAR_EL1: write_sysreg_s(val, SYS_PAR_EL1); break;
- case DACR32_EL2: write_sysreg_s(val, SYS_DACR32_EL2); break;
- case IFSR32_EL2: write_sysreg_s(val, SYS_IFSR32_EL2); break;
- case DBGVCR32_EL2: write_sysreg_s(val, SYS_DBGVCR32_EL2); break;
- case ZCR_EL1: write_sysreg_s(val, SYS_ZCR_EL12); break;
- case SCTLR2_EL1: write_sysreg_s(val, SYS_SCTLR2_EL12); break;
- default: return false;
- }
-
- return true;
-}
+u64 vcpu_read_sys_reg(const struct kvm_vcpu *, enum vcpu_sysreg);
+void vcpu_write_sys_reg(struct kvm_vcpu *, u64, enum vcpu_sysreg);
struct kvm_vm_stat {
struct kvm_vm_stat_generic generic;
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index ae563ebd6aee..e4069f2ce642 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -180,6 +180,7 @@ void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu);
int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
phys_addr_t pa, unsigned long size, bool writable);
+int kvm_handle_guest_sea(struct kvm_vcpu *vcpu);
int kvm_handle_guest_abort(struct kvm_vcpu *vcpu);
phys_addr_t kvm_mmu_get_httbr(void);
diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h
index 2888b5d03757..1246216616b5 100644
--- a/arch/arm64/include/asm/kvm_pgtable.h
+++ b/arch/arm64/include/asm/kvm_pgtable.h
@@ -355,6 +355,11 @@ static inline kvm_pte_t *kvm_dereference_pteref(struct kvm_pgtable_walker *walke
return pteref;
}
+static inline kvm_pte_t *kvm_dereference_pteref_raw(kvm_pteref_t pteref)
+{
+ return pteref;
+}
+
static inline int kvm_pgtable_walk_begin(struct kvm_pgtable_walker *walker)
{
/*
@@ -384,6 +389,11 @@ static inline kvm_pte_t *kvm_dereference_pteref(struct kvm_pgtable_walker *walke
return rcu_dereference_check(pteref, !(walker->flags & KVM_PGTABLE_WALK_SHARED));
}
+static inline kvm_pte_t *kvm_dereference_pteref_raw(kvm_pteref_t pteref)
+{
+ return rcu_dereference_raw(pteref);
+}
+
static inline int kvm_pgtable_walk_begin(struct kvm_pgtable_walker *walker)
{
if (walker->flags & KVM_PGTABLE_WALK_SHARED)
@@ -552,6 +562,26 @@ static inline int kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2
void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt);
/**
+ * kvm_pgtable_stage2_destroy_range() - Destroy the unlinked range of addresses.
+ * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*().
+ * @addr: Intermediate physical address at which to place the mapping.
+ * @size: Size of the mapping.
+ *
+ * The page-table is assumed to be unreachable by any hardware walkers prior
+ * to freeing and therefore no TLB invalidation is performed.
+ */
+void kvm_pgtable_stage2_destroy_range(struct kvm_pgtable *pgt,
+ u64 addr, u64 size);
+
+/**
+ * kvm_pgtable_stage2_destroy_pgd() - Destroy the PGD of guest stage-2 page-table.
+ * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*().
+ *
+ * It is assumed that the rest of the page-table is freed before this operation.
+ */
+void kvm_pgtable_stage2_destroy_pgd(struct kvm_pgtable *pgt);
+
+/**
* kvm_pgtable_stage2_free_unlinked() - Free an unlinked stage-2 paging structure.
* @mm_ops: Memory management callbacks.
* @pgtable: Unlinked stage-2 paging structure to be freed.
diff --git a/arch/arm64/include/asm/kvm_pkvm.h b/arch/arm64/include/asm/kvm_pkvm.h
index ea58282f59bb..35f9d9478004 100644
--- a/arch/arm64/include/asm/kvm_pkvm.h
+++ b/arch/arm64/include/asm/kvm_pkvm.h
@@ -179,7 +179,9 @@ struct pkvm_mapping {
int pkvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu,
struct kvm_pgtable_mm_ops *mm_ops);
-void pkvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt);
+void pkvm_pgtable_stage2_destroy_range(struct kvm_pgtable *pgt,
+ u64 addr, u64 size);
+void pkvm_pgtable_stage2_destroy_pgd(struct kvm_pgtable *pgt);
int pkvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys,
enum kvm_pgtable_prot prot, void *mc,
enum kvm_pgtable_walk_flags flags);
diff --git a/arch/arm64/include/asm/kvm_ras.h b/arch/arm64/include/asm/kvm_ras.h
deleted file mode 100644
index 9398ade632aa..000000000000
--- a/arch/arm64/include/asm/kvm_ras.h
+++ /dev/null
@@ -1,25 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2018 - Arm Ltd */
-
-#ifndef __ARM64_KVM_RAS_H__
-#define __ARM64_KVM_RAS_H__
-
-#include <linux/acpi.h>
-#include <linux/errno.h>
-#include <linux/types.h>
-
-#include <asm/acpi.h>
-
-/*
- * Was this synchronous external abort a RAS notification?
- * Returns '0' for errors handled by some RAS subsystem, or -ENOENT.
- */
-static inline int kvm_handle_guest_sea(void)
-{
- /* apei_claim_sea(NULL) expects to mask interrupts itself */
- lockdep_assert_irqs_enabled();
-
- return apei_claim_sea(NULL);
-}
-
-#endif /* __ARM64_KVM_RAS_H__ */
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index d5b5f2ae1afa..6604fd6f33f4 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -1142,9 +1142,6 @@
#define ARM64_FEATURE_FIELD_BITS 4
-/* Defined for compatibility only, do not add new users. */
-#define ARM64_FEATURE_MASK(x) (x##_MASK)
-
#ifdef __ASSEMBLY__
.macro mrs_s, rt, sreg
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 9ad065f15f1d..0d45c5e9b4da 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -2269,6 +2269,24 @@ static void cpu_clear_disr(const struct arm64_cpu_capabilities *__unused)
/* Firmware may have left a deferred SError in this register. */
write_sysreg_s(0, SYS_DISR_EL1);
}
+static bool has_rasv1p1(const struct arm64_cpu_capabilities *__unused, int scope)
+{
+ const struct arm64_cpu_capabilities rasv1p1_caps[] = {
+ {
+ ARM64_CPUID_FIELDS(ID_AA64PFR0_EL1, RAS, V1P1)
+ },
+ {
+ ARM64_CPUID_FIELDS(ID_AA64PFR0_EL1, RAS, IMP)
+ },
+ {
+ ARM64_CPUID_FIELDS(ID_AA64PFR1_EL1, RAS_frac, RASv1p1)
+ },
+ };
+
+ return (has_cpuid_feature(&rasv1p1_caps[0], scope) ||
+ (has_cpuid_feature(&rasv1p1_caps[1], scope) &&
+ has_cpuid_feature(&rasv1p1_caps[2], scope)));
+}
#endif /* CONFIG_ARM64_RAS_EXTN */
#ifdef CONFIG_ARM64_PTR_AUTH
@@ -2687,6 +2705,12 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.cpu_enable = cpu_clear_disr,
ARM64_CPUID_FIELDS(ID_AA64PFR0_EL1, RAS, IMP)
},
+ {
+ .desc = "RASv1p1 Extension Support",
+ .capability = ARM64_HAS_RASV1P1_EXTN,
+ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+ .matches = has_rasv1p1,
+ },
#endif /* CONFIG_ARM64_RAS_EXTN */
#ifdef CONFIG_ARM64_AMU_EXTN
{
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index 888f7c7abf54..5bf101c869c9 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -2408,12 +2408,12 @@ static u64 get_hyp_id_aa64pfr0_el1(void)
*/
u64 val = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1);
- val &= ~(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV2) |
- ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV3));
+ val &= ~(ID_AA64PFR0_EL1_CSV2 |
+ ID_AA64PFR0_EL1_CSV3);
- val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV2),
+ val |= FIELD_PREP(ID_AA64PFR0_EL1_CSV2,
arm64_get_spectre_v2_state() == SPECTRE_UNAFFECTED);
- val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV3),
+ val |= FIELD_PREP(ID_AA64PFR0_EL1_CSV3,
arm64_get_meltdown_state() == SPECTRE_UNAFFECTED);
return val;
diff --git a/arch/arm64/kvm/at.c b/arch/arm64/kvm/at.c
index 0e5610533949..d71ca4ddc9d1 100644
--- a/arch/arm64/kvm/at.c
+++ b/arch/arm64/kvm/at.c
@@ -1420,10 +1420,10 @@ void __kvm_at_s12(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
return;
/*
- * If we only have a single stage of translation (E2H=0 or
- * TGE=1), exit early. Same thing if {VM,DC}=={0,0}.
+ * If we only have a single stage of translation (EL2&0), exit
+ * early. Same thing if {VM,DC}=={0,0}.
*/
- if (!vcpu_el2_e2h_is_set(vcpu) || vcpu_el2_tge_is_set(vcpu) ||
+ if (compute_translation_regime(vcpu, op) == TR_EL20 ||
!(vcpu_read_sys_reg(vcpu, HCR_EL2) & (HCR_VM | HCR_DC)))
return;
diff --git a/arch/arm64/kvm/emulate-nested.c b/arch/arm64/kvm/emulate-nested.c
index 90cb4b7ae0ff..af69c897c2c3 100644
--- a/arch/arm64/kvm/emulate-nested.c
+++ b/arch/arm64/kvm/emulate-nested.c
@@ -2833,7 +2833,7 @@ int kvm_inject_nested_sea(struct kvm_vcpu *vcpu, bool iabt, u64 addr)
iabt ? ESR_ELx_EC_IABT_LOW : ESR_ELx_EC_DABT_LOW);
esr |= ESR_ELx_FSC_EXTABT | ESR_ELx_IL;
- vcpu_write_sys_reg(vcpu, FAR_EL2, addr);
+ vcpu_write_sys_reg(vcpu, addr, FAR_EL2);
if (__vcpu_sys_reg(vcpu, SCTLR2_EL2) & SCTLR2_EL1_EASE)
return kvm_inject_nested(vcpu, esr, except_type_serror);
diff --git a/arch/arm64/kvm/hyp/exception.c b/arch/arm64/kvm/hyp/exception.c
index 95d186e0bf54..bef40ddb16db 100644
--- a/arch/arm64/kvm/hyp/exception.c
+++ b/arch/arm64/kvm/hyp/exception.c
@@ -22,36 +22,28 @@
static inline u64 __vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg)
{
- u64 val;
-
- if (unlikely(vcpu_has_nv(vcpu)))
+ if (has_vhe())
return vcpu_read_sys_reg(vcpu, reg);
- else if (vcpu_get_flag(vcpu, SYSREGS_ON_CPU) &&
- __vcpu_read_sys_reg_from_cpu(reg, &val))
- return val;
return __vcpu_sys_reg(vcpu, reg);
}
static inline void __vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg)
{
- if (unlikely(vcpu_has_nv(vcpu)))
+ if (has_vhe())
vcpu_write_sys_reg(vcpu, val, reg);
- else if (!vcpu_get_flag(vcpu, SYSREGS_ON_CPU) ||
- !__vcpu_write_sys_reg_to_cpu(val, reg))
+ else
__vcpu_assign_sys_reg(vcpu, reg, val);
}
static void __vcpu_write_spsr(struct kvm_vcpu *vcpu, unsigned long target_mode,
u64 val)
{
- if (unlikely(vcpu_has_nv(vcpu))) {
+ if (has_vhe()) {
if (target_mode == PSR_MODE_EL1h)
vcpu_write_sys_reg(vcpu, val, SPSR_EL1);
else
vcpu_write_sys_reg(vcpu, val, SPSR_EL2);
- } else if (has_vhe()) {
- write_sysreg_el1(val, SYS_SPSR);
} else {
__vcpu_assign_sys_reg(vcpu, SPSR_EL1, val);
}
@@ -59,7 +51,7 @@ static void __vcpu_write_spsr(struct kvm_vcpu *vcpu, unsigned long target_mode,
static void __vcpu_write_spsr_abt(struct kvm_vcpu *vcpu, u64 val)
{
- if (has_vhe())
+ if (has_vhe() && vcpu_get_flag(vcpu, SYSREGS_ON_CPU))
write_sysreg(val, spsr_abt);
else
vcpu->arch.ctxt.spsr_abt = val;
@@ -67,7 +59,7 @@ static void __vcpu_write_spsr_abt(struct kvm_vcpu *vcpu, u64 val)
static void __vcpu_write_spsr_und(struct kvm_vcpu *vcpu, u64 val)
{
- if (has_vhe())
+ if (has_vhe() && vcpu_get_flag(vcpu, SYSREGS_ON_CPU))
write_sysreg(val, spsr_und);
else
vcpu->arch.ctxt.spsr_und = val;
diff --git a/arch/arm64/kvm/hyp/nvhe/list_debug.c b/arch/arm64/kvm/hyp/nvhe/list_debug.c
index 46a2d4f2b3c6..baa6260f88dc 100644
--- a/arch/arm64/kvm/hyp/nvhe/list_debug.c
+++ b/arch/arm64/kvm/hyp/nvhe/list_debug.c
@@ -17,7 +17,7 @@ static inline __must_check bool nvhe_check_data_corruption(bool v)
bool corruption = unlikely(condition); \
if (corruption) { \
if (IS_ENABLED(CONFIG_BUG_ON_DATA_CORRUPTION)) { \
- BUG_ON(1); \
+ BUG(); \
} else \
WARN_ON(1); \
} \
diff --git a/arch/arm64/kvm/hyp/nvhe/sys_regs.c b/arch/arm64/kvm/hyp/nvhe/sys_regs.c
index 1ddd9ed3cbb3..71d2fc97f004 100644
--- a/arch/arm64/kvm/hyp/nvhe/sys_regs.c
+++ b/arch/arm64/kvm/hyp/nvhe/sys_regs.c
@@ -253,6 +253,7 @@ static void inject_undef64(struct kvm_vcpu *vcpu)
*vcpu_pc(vcpu) = read_sysreg_el2(SYS_ELR);
*vcpu_cpsr(vcpu) = read_sysreg_el2(SYS_SPSR);
+ __vcpu_assign_sys_reg(vcpu, read_sysreg_el1(SYS_VBAR), VBAR_EL1);
kvm_pend_exception(vcpu, EXCEPT_AA64_EL1_SYNC);
@@ -372,6 +373,9 @@ static const struct sys_reg_desc pvm_sys_reg_descs[] = {
/* Debug and Trace Registers are restricted. */
+ /* Group 1 ID registers */
+ HOST_HANDLED(SYS_REVIDR_EL1),
+
/* AArch64 mappings of the AArch32 ID registers */
/* CRm=1 */
AARCH32(SYS_ID_PFR0_EL1),
@@ -460,6 +464,7 @@ static const struct sys_reg_desc pvm_sys_reg_descs[] = {
HOST_HANDLED(SYS_CCSIDR_EL1),
HOST_HANDLED(SYS_CLIDR_EL1),
+ HOST_HANDLED(SYS_AIDR_EL1),
HOST_HANDLED(SYS_CSSELR_EL1),
HOST_HANDLED(SYS_CTR_EL0),
diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index c351b4abd5db..c36f282a175d 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -1551,21 +1551,38 @@ static int stage2_free_walker(const struct kvm_pgtable_visit_ctx *ctx,
return 0;
}
-void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt)
+void kvm_pgtable_stage2_destroy_range(struct kvm_pgtable *pgt,
+ u64 addr, u64 size)
{
- size_t pgd_sz;
struct kvm_pgtable_walker walker = {
.cb = stage2_free_walker,
.flags = KVM_PGTABLE_WALK_LEAF |
KVM_PGTABLE_WALK_TABLE_POST,
};
- WARN_ON(kvm_pgtable_walk(pgt, 0, BIT(pgt->ia_bits), &walker));
+ WARN_ON(kvm_pgtable_walk(pgt, addr, size, &walker));
+}
+
+void kvm_pgtable_stage2_destroy_pgd(struct kvm_pgtable *pgt)
+{
+ size_t pgd_sz;
+
pgd_sz = kvm_pgd_pages(pgt->ia_bits, pgt->start_level) * PAGE_SIZE;
- pgt->mm_ops->free_pages_exact(kvm_dereference_pteref(&walker, pgt->pgd), pgd_sz);
+
+ /*
+ * Since the pgtable is unlinked at this point, and not shared with
+ * other walkers, safely deference pgd with kvm_dereference_pteref_raw()
+ */
+ pgt->mm_ops->free_pages_exact(kvm_dereference_pteref_raw(pgt->pgd), pgd_sz);
pgt->pgd = NULL;
}
+void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt)
+{
+ kvm_pgtable_stage2_destroy_range(pgt, 0, BIT(pgt->ia_bits));
+ kvm_pgtable_stage2_destroy_pgd(pgt);
+}
+
void kvm_pgtable_stage2_free_unlinked(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, s8 level)
{
kvm_pteref_t ptep = (kvm_pteref_t)pgtable;
diff --git a/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c b/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c
index 87a54375bd6e..78579b31a420 100644
--- a/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c
+++ b/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c
@@ -20,7 +20,7 @@ static bool __is_be(struct kvm_vcpu *vcpu)
if (vcpu_mode_is_32bit(vcpu))
return !!(read_sysreg_el2(SYS_SPSR) & PSR_AA32_E_BIT);
- return !!(read_sysreg(SCTLR_EL1) & SCTLR_ELx_EE);
+ return !!(read_sysreg_el1(SYS_SCTLR) & SCTLR_ELx_EE);
}
/*
diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c
index e482181c6632..0998ad4a2552 100644
--- a/arch/arm64/kvm/hyp/vhe/switch.c
+++ b/arch/arm64/kvm/hyp/vhe/switch.c
@@ -43,8 +43,11 @@ DEFINE_PER_CPU(unsigned long, kvm_hyp_vector);
*
* - API/APK: they are already accounted for by vcpu_load(), and can
* only take effect across a load/put cycle (such as ERET)
+ *
+ * - FIEN: no way we let a guest have access to the RAS "Common Fault
+ * Injection" thing, whatever that does
*/
-#define NV_HCR_GUEST_EXCLUDE (HCR_TGE | HCR_API | HCR_APK)
+#define NV_HCR_GUEST_EXCLUDE (HCR_TGE | HCR_API | HCR_APK | HCR_FIEN)
static u64 __compute_hcr(struct kvm_vcpu *vcpu)
{
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index 1c78864767c5..86f3d80daf37 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -4,19 +4,20 @@
* Author: Christoffer Dall <c.dall@virtualopensystems.com>
*/
+#include <linux/acpi.h>
#include <linux/mman.h>
#include <linux/kvm_host.h>
#include <linux/io.h>
#include <linux/hugetlb.h>
#include <linux/sched/signal.h>
#include <trace/events/kvm.h>
+#include <asm/acpi.h>
#include <asm/pgalloc.h>
#include <asm/cacheflush.h>
#include <asm/kvm_arm.h>
#include <asm/kvm_mmu.h>
#include <asm/kvm_pgtable.h>
#include <asm/kvm_pkvm.h>
-#include <asm/kvm_ras.h>
#include <asm/kvm_asm.h>
#include <asm/kvm_emulate.h>
#include <asm/virt.h>
@@ -903,6 +904,38 @@ static int kvm_init_ipa_range(struct kvm_s2_mmu *mmu, unsigned long type)
return 0;
}
+/*
+ * Assume that @pgt is valid and unlinked from the KVM MMU to free the
+ * page-table without taking the kvm_mmu_lock and without performing any
+ * TLB invalidations.
+ *
+ * Also, the range of addresses can be large enough to cause need_resched
+ * warnings, for instance on CONFIG_PREEMPT_NONE kernels. Hence, invoke
+ * cond_resched() periodically to prevent hogging the CPU for a long time
+ * and schedule something else, if required.
+ */
+static void stage2_destroy_range(struct kvm_pgtable *pgt, phys_addr_t addr,
+ phys_addr_t end)
+{
+ u64 next;
+
+ do {
+ next = stage2_range_addr_end(addr, end);
+ KVM_PGT_FN(kvm_pgtable_stage2_destroy_range)(pgt, addr,
+ next - addr);
+ if (next != end)
+ cond_resched();
+ } while (addr = next, addr != end);
+}
+
+static void kvm_stage2_destroy(struct kvm_pgtable *pgt)
+{
+ unsigned int ia_bits = VTCR_EL2_IPA(pgt->mmu->vtcr);
+
+ stage2_destroy_range(pgt, 0, BIT(ia_bits));
+ KVM_PGT_FN(kvm_pgtable_stage2_destroy_pgd)(pgt);
+}
+
/**
* kvm_init_stage2_mmu - Initialise a S2 MMU structure
* @kvm: The pointer to the KVM structure
@@ -979,7 +1012,7 @@ int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu, unsigned long t
return 0;
out_destroy_pgtable:
- KVM_PGT_FN(kvm_pgtable_stage2_destroy)(pgt);
+ kvm_stage2_destroy(pgt);
out_free_pgtable:
kfree(pgt);
return err;
@@ -1076,7 +1109,7 @@ void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu)
write_unlock(&kvm->mmu_lock);
if (pgt) {
- KVM_PGT_FN(kvm_pgtable_stage2_destroy)(pgt);
+ kvm_stage2_destroy(pgt);
kfree(pgt);
}
}
@@ -1811,6 +1844,19 @@ static void handle_access_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa)
read_unlock(&vcpu->kvm->mmu_lock);
}
+int kvm_handle_guest_sea(struct kvm_vcpu *vcpu)
+{
+ /*
+ * Give APEI the opportunity to claim the abort before handling it
+ * within KVM. apei_claim_sea() expects to be called with IRQs enabled.
+ */
+ lockdep_assert_irqs_enabled();
+ if (apei_claim_sea(NULL) == 0)
+ return 1;
+
+ return kvm_inject_serror(vcpu);
+}
+
/**
* kvm_handle_guest_abort - handles all 2nd stage aborts
* @vcpu: the VCPU pointer
@@ -1834,17 +1880,8 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
gfn_t gfn;
int ret, idx;
- /* Synchronous External Abort? */
- if (kvm_vcpu_abt_issea(vcpu)) {
- /*
- * For RAS the host kernel may handle this abort.
- * There is no need to pass the error into the guest.
- */
- if (kvm_handle_guest_sea())
- return kvm_inject_serror(vcpu);
-
- return 1;
- }
+ if (kvm_vcpu_abt_issea(vcpu))
+ return kvm_handle_guest_sea(vcpu);
esr = kvm_vcpu_get_esr(vcpu);
diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c
index 153b3e11b115..77db81bae86f 100644
--- a/arch/arm64/kvm/nested.c
+++ b/arch/arm64/kvm/nested.c
@@ -1287,7 +1287,10 @@ int kvm_handle_vncr_abort(struct kvm_vcpu *vcpu)
struct vncr_tlb *vt = vcpu->arch.vncr_tlb;
u64 esr = kvm_vcpu_get_esr(vcpu);
- BUG_ON(!(esr & ESR_ELx_VNCR_SHIFT));
+ WARN_ON_ONCE(!(esr & ESR_ELx_VNCR));
+
+ if (kvm_vcpu_abt_issea(vcpu))
+ return kvm_handle_guest_sea(vcpu);
if (esr_fsc_is_permission_fault(esr)) {
inject_vncr_perm(vcpu);
diff --git a/arch/arm64/kvm/pkvm.c b/arch/arm64/kvm/pkvm.c
index fcd70bfe44fb..61827cf6fea4 100644
--- a/arch/arm64/kvm/pkvm.c
+++ b/arch/arm64/kvm/pkvm.c
@@ -316,9 +316,16 @@ static int __pkvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 start, u64 e
return 0;
}
-void pkvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt)
+void pkvm_pgtable_stage2_destroy_range(struct kvm_pgtable *pgt,
+ u64 addr, u64 size)
{
- __pkvm_pgtable_stage2_unmap(pgt, 0, ~(0ULL));
+ __pkvm_pgtable_stage2_unmap(pgt, addr, addr + size);
+}
+
+void pkvm_pgtable_stage2_destroy_pgd(struct kvm_pgtable *pgt)
+{
+ /* Expected to be called after all pKVM mappings have been released. */
+ WARN_ON_ONCE(!RB_EMPTY_ROOT(&pgt->pkvm_mappings.rb_root));
}
int pkvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size,
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 82ffb3b3b3cf..b29f72478a50 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -82,43 +82,105 @@ static bool write_to_read_only(struct kvm_vcpu *vcpu,
"sys_reg write to read-only register");
}
-#define PURE_EL2_SYSREG(el2) \
- case el2: { \
- *el1r = el2; \
- return true; \
+enum sr_loc_attr {
+ SR_LOC_MEMORY = 0, /* Register definitely in memory */
+ SR_LOC_LOADED = BIT(0), /* Register on CPU, unless it cannot */
+ SR_LOC_MAPPED = BIT(1), /* Register in a different CPU register */
+ SR_LOC_XLATED = BIT(2), /* Register translated to fit another reg */
+ SR_LOC_SPECIAL = BIT(3), /* Demanding register, implies loaded */
+};
+
+struct sr_loc {
+ enum sr_loc_attr loc;
+ enum vcpu_sysreg map_reg;
+ u64 (*xlate)(u64);
+};
+
+static enum sr_loc_attr locate_direct_register(const struct kvm_vcpu *vcpu,
+ enum vcpu_sysreg reg)
+{
+ switch (reg) {
+ case SCTLR_EL1:
+ case CPACR_EL1:
+ case TTBR0_EL1:
+ case TTBR1_EL1:
+ case TCR_EL1:
+ case TCR2_EL1:
+ case PIR_EL1:
+ case PIRE0_EL1:
+ case POR_EL1:
+ case ESR_EL1:
+ case AFSR0_EL1:
+ case AFSR1_EL1:
+ case FAR_EL1:
+ case MAIR_EL1:
+ case VBAR_EL1:
+ case CONTEXTIDR_EL1:
+ case AMAIR_EL1:
+ case CNTKCTL_EL1:
+ case ELR_EL1:
+ case SPSR_EL1:
+ case ZCR_EL1:
+ case SCTLR2_EL1:
+ /*
+ * EL1 registers which have an ELx2 mapping are loaded if
+ * we're not in hypervisor context.
+ */
+ return is_hyp_ctxt(vcpu) ? SR_LOC_MEMORY : SR_LOC_LOADED;
+
+ case TPIDR_EL0:
+ case TPIDRRO_EL0:
+ case TPIDR_EL1:
+ case PAR_EL1:
+ case DACR32_EL2:
+ case IFSR32_EL2:
+ case DBGVCR32_EL2:
+ /* These registers are always loaded, no matter what */
+ return SR_LOC_LOADED;
+
+ default:
+ /* Non-mapped EL2 registers are by definition in memory. */
+ return SR_LOC_MEMORY;
}
+}
-#define MAPPED_EL2_SYSREG(el2, el1, fn) \
- case el2: { \
- *xlate = fn; \
- *el1r = el1; \
- return true; \
+static void locate_mapped_el2_register(const struct kvm_vcpu *vcpu,
+ enum vcpu_sysreg reg,
+ enum vcpu_sysreg map_reg,
+ u64 (*xlate)(u64),
+ struct sr_loc *loc)
+{
+ if (!is_hyp_ctxt(vcpu)) {
+ loc->loc = SR_LOC_MEMORY;
+ return;
+ }
+
+ loc->loc = SR_LOC_LOADED | SR_LOC_MAPPED;
+ loc->map_reg = map_reg;
+
+ WARN_ON(locate_direct_register(vcpu, map_reg) != SR_LOC_MEMORY);
+
+ if (xlate != NULL && !vcpu_el2_e2h_is_set(vcpu)) {
+ loc->loc |= SR_LOC_XLATED;
+ loc->xlate = xlate;
}
+}
-static bool get_el2_to_el1_mapping(unsigned int reg,
- unsigned int *el1r, u64 (**xlate)(u64))
+#define MAPPED_EL2_SYSREG(r, m, t) \
+ case r: { \
+ locate_mapped_el2_register(vcpu, r, m, t, loc); \
+ break; \
+ }
+
+static void locate_register(const struct kvm_vcpu *vcpu, enum vcpu_sysreg reg,
+ struct sr_loc *loc)
{
+ if (!vcpu_get_flag(vcpu, SYSREGS_ON_CPU)) {
+ loc->loc = SR_LOC_MEMORY;
+ return;
+ }
+
switch (reg) {
- PURE_EL2_SYSREG( VPIDR_EL2 );
- PURE_EL2_SYSREG( VMPIDR_EL2 );
- PURE_EL2_SYSREG( ACTLR_EL2 );
- PURE_EL2_SYSREG( HCR_EL2 );
- PURE_EL2_SYSREG( MDCR_EL2 );
- PURE_EL2_SYSREG( HSTR_EL2 );
- PURE_EL2_SYSREG( HACR_EL2 );
- PURE_EL2_SYSREG( VTTBR_EL2 );
- PURE_EL2_SYSREG( VTCR_EL2 );
- PURE_EL2_SYSREG( TPIDR_EL2 );
- PURE_EL2_SYSREG( HPFAR_EL2 );
- PURE_EL2_SYSREG( HCRX_EL2 );
- PURE_EL2_SYSREG( HFGRTR_EL2 );
- PURE_EL2_SYSREG( HFGWTR_EL2 );
- PURE_EL2_SYSREG( HFGITR_EL2 );
- PURE_EL2_SYSREG( HDFGRTR_EL2 );
- PURE_EL2_SYSREG( HDFGWTR_EL2 );
- PURE_EL2_SYSREG( HAFGRTR_EL2 );
- PURE_EL2_SYSREG( CNTVOFF_EL2 );
- PURE_EL2_SYSREG( CNTHCTL_EL2 );
MAPPED_EL2_SYSREG(SCTLR_EL2, SCTLR_EL1,
translate_sctlr_el2_to_sctlr_el1 );
MAPPED_EL2_SYSREG(CPTR_EL2, CPACR_EL1,
@@ -144,125 +206,189 @@ static bool get_el2_to_el1_mapping(unsigned int reg,
MAPPED_EL2_SYSREG(ZCR_EL2, ZCR_EL1, NULL );
MAPPED_EL2_SYSREG(CONTEXTIDR_EL2, CONTEXTIDR_EL1, NULL );
MAPPED_EL2_SYSREG(SCTLR2_EL2, SCTLR2_EL1, NULL );
+ case CNTHCTL_EL2:
+ /* CNTHCTL_EL2 is super special, until we support NV2.1 */
+ loc->loc = ((is_hyp_ctxt(vcpu) && vcpu_el2_e2h_is_set(vcpu)) ?
+ SR_LOC_SPECIAL : SR_LOC_MEMORY);
+ break;
default:
- return false;
+ loc->loc = locate_direct_register(vcpu, reg);
}
}
-u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg)
+static u64 read_sr_from_cpu(enum vcpu_sysreg reg)
{
u64 val = 0x8badf00d8badf00d;
- u64 (*xlate)(u64) = NULL;
- unsigned int el1r;
- if (!vcpu_get_flag(vcpu, SYSREGS_ON_CPU))
- goto memory_read;
+ switch (reg) {
+ case SCTLR_EL1: val = read_sysreg_s(SYS_SCTLR_EL12); break;
+ case CPACR_EL1: val = read_sysreg_s(SYS_CPACR_EL12); break;
+ case TTBR0_EL1: val = read_sysreg_s(SYS_TTBR0_EL12); break;
+ case TTBR1_EL1: val = read_sysreg_s(SYS_TTBR1_EL12); break;
+ case TCR_EL1: val = read_sysreg_s(SYS_TCR_EL12); break;
+ case TCR2_EL1: val = read_sysreg_s(SYS_TCR2_EL12); break;
+ case PIR_EL1: val = read_sysreg_s(SYS_PIR_EL12); break;
+ case PIRE0_EL1: val = read_sysreg_s(SYS_PIRE0_EL12); break;
+ case POR_EL1: val = read_sysreg_s(SYS_POR_EL12); break;
+ case ESR_EL1: val = read_sysreg_s(SYS_ESR_EL12); break;
+ case AFSR0_EL1: val = read_sysreg_s(SYS_AFSR0_EL12); break;
+ case AFSR1_EL1: val = read_sysreg_s(SYS_AFSR1_EL12); break;
+ case FAR_EL1: val = read_sysreg_s(SYS_FAR_EL12); break;
+ case MAIR_EL1: val = read_sysreg_s(SYS_MAIR_EL12); break;
+ case VBAR_EL1: val = read_sysreg_s(SYS_VBAR_EL12); break;
+ case CONTEXTIDR_EL1: val = read_sysreg_s(SYS_CONTEXTIDR_EL12);break;
+ case AMAIR_EL1: val = read_sysreg_s(SYS_AMAIR_EL12); break;
+ case CNTKCTL_EL1: val = read_sysreg_s(SYS_CNTKCTL_EL12); break;
+ case ELR_EL1: val = read_sysreg_s(SYS_ELR_EL12); break;
+ case SPSR_EL1: val = read_sysreg_s(SYS_SPSR_EL12); break;
+ case ZCR_EL1: val = read_sysreg_s(SYS_ZCR_EL12); break;
+ case SCTLR2_EL1: val = read_sysreg_s(SYS_SCTLR2_EL12); break;
+ case TPIDR_EL0: val = read_sysreg_s(SYS_TPIDR_EL0); break;
+ case TPIDRRO_EL0: val = read_sysreg_s(SYS_TPIDRRO_EL0); break;
+ case TPIDR_EL1: val = read_sysreg_s(SYS_TPIDR_EL1); break;
+ case PAR_EL1: val = read_sysreg_par(); break;
+ case DACR32_EL2: val = read_sysreg_s(SYS_DACR32_EL2); break;
+ case IFSR32_EL2: val = read_sysreg_s(SYS_IFSR32_EL2); break;
+ case DBGVCR32_EL2: val = read_sysreg_s(SYS_DBGVCR32_EL2); break;
+ default: WARN_ON_ONCE(1);
+ }
- if (unlikely(get_el2_to_el1_mapping(reg, &el1r, &xlate))) {
- if (!is_hyp_ctxt(vcpu))
- goto memory_read;
+ return val;
+}
+
+static void write_sr_to_cpu(enum vcpu_sysreg reg, u64 val)
+{
+ switch (reg) {
+ case SCTLR_EL1: write_sysreg_s(val, SYS_SCTLR_EL12); break;
+ case CPACR_EL1: write_sysreg_s(val, SYS_CPACR_EL12); break;
+ case TTBR0_EL1: write_sysreg_s(val, SYS_TTBR0_EL12); break;
+ case TTBR1_EL1: write_sysreg_s(val, SYS_TTBR1_EL12); break;
+ case TCR_EL1: write_sysreg_s(val, SYS_TCR_EL12); break;
+ case TCR2_EL1: write_sysreg_s(val, SYS_TCR2_EL12); break;
+ case PIR_EL1: write_sysreg_s(val, SYS_PIR_EL12); break;
+ case PIRE0_EL1: write_sysreg_s(val, SYS_PIRE0_EL12); break;
+ case POR_EL1: write_sysreg_s(val, SYS_POR_EL12); break;
+ case ESR_EL1: write_sysreg_s(val, SYS_ESR_EL12); break;
+ case AFSR0_EL1: write_sysreg_s(val, SYS_AFSR0_EL12); break;
+ case AFSR1_EL1: write_sysreg_s(val, SYS_AFSR1_EL12); break;
+ case FAR_EL1: write_sysreg_s(val, SYS_FAR_EL12); break;
+ case MAIR_EL1: write_sysreg_s(val, SYS_MAIR_EL12); break;
+ case VBAR_EL1: write_sysreg_s(val, SYS_VBAR_EL12); break;
+ case CONTEXTIDR_EL1: write_sysreg_s(val, SYS_CONTEXTIDR_EL12);break;
+ case AMAIR_EL1: write_sysreg_s(val, SYS_AMAIR_EL12); break;
+ case CNTKCTL_EL1: write_sysreg_s(val, SYS_CNTKCTL_EL12); break;
+ case ELR_EL1: write_sysreg_s(val, SYS_ELR_EL12); break;
+ case SPSR_EL1: write_sysreg_s(val, SYS_SPSR_EL12); break;
+ case ZCR_EL1: write_sysreg_s(val, SYS_ZCR_EL12); break;
+ case SCTLR2_EL1: write_sysreg_s(val, SYS_SCTLR2_EL12); break;
+ case TPIDR_EL0: write_sysreg_s(val, SYS_TPIDR_EL0); break;
+ case TPIDRRO_EL0: write_sysreg_s(val, SYS_TPIDRRO_EL0); break;
+ case TPIDR_EL1: write_sysreg_s(val, SYS_TPIDR_EL1); break;
+ case PAR_EL1: write_sysreg_s(val, SYS_PAR_EL1); break;
+ case DACR32_EL2: write_sysreg_s(val, SYS_DACR32_EL2); break;
+ case IFSR32_EL2: write_sysreg_s(val, SYS_IFSR32_EL2); break;
+ case DBGVCR32_EL2: write_sysreg_s(val, SYS_DBGVCR32_EL2); break;
+ default: WARN_ON_ONCE(1);
+ }
+}
+
+u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, enum vcpu_sysreg reg)
+{
+ struct sr_loc loc = {};
+
+ locate_register(vcpu, reg, &loc);
+
+ WARN_ON_ONCE(!has_vhe() && loc.loc != SR_LOC_MEMORY);
+
+ if (loc.loc & SR_LOC_SPECIAL) {
+ u64 val;
+
+ WARN_ON_ONCE(loc.loc & ~SR_LOC_SPECIAL);
/*
- * CNTHCTL_EL2 requires some special treatment to
- * account for the bits that can be set via CNTKCTL_EL1.
+ * CNTHCTL_EL2 requires some special treatment to account
+ * for the bits that can be set via CNTKCTL_EL1 when E2H==1.
*/
switch (reg) {
case CNTHCTL_EL2:
- if (vcpu_el2_e2h_is_set(vcpu)) {
- val = read_sysreg_el1(SYS_CNTKCTL);
- val &= CNTKCTL_VALID_BITS;
- val |= __vcpu_sys_reg(vcpu, reg) & ~CNTKCTL_VALID_BITS;
- return val;
- }
- break;
+ val = read_sysreg_el1(SYS_CNTKCTL);
+ val &= CNTKCTL_VALID_BITS;
+ val |= __vcpu_sys_reg(vcpu, reg) & ~CNTKCTL_VALID_BITS;
+ return val;
+ default:
+ WARN_ON_ONCE(1);
}
+ }
- /*
- * If this register does not have an EL1 counterpart,
- * then read the stored EL2 version.
- */
- if (reg == el1r)
- goto memory_read;
+ if (loc.loc & SR_LOC_LOADED) {
+ enum vcpu_sysreg map_reg = reg;
- /*
- * If we have a non-VHE guest and that the sysreg
- * requires translation to be used at EL1, use the
- * in-memory copy instead.
- */
- if (!vcpu_el2_e2h_is_set(vcpu) && xlate)
- goto memory_read;
+ if (loc.loc & SR_LOC_MAPPED)
+ map_reg = loc.map_reg;
- /* Get the current version of the EL1 counterpart. */
- WARN_ON(!__vcpu_read_sys_reg_from_cpu(el1r, &val));
- if (reg >= __SANITISED_REG_START__)
- val = kvm_vcpu_apply_reg_masks(vcpu, reg, val);
-
- return val;
- }
+ if (!(loc.loc & SR_LOC_XLATED)) {
+ u64 val = read_sr_from_cpu(map_reg);
- /* EL1 register can't be on the CPU if the guest is in vEL2. */
- if (unlikely(is_hyp_ctxt(vcpu)))
- goto memory_read;
+ if (reg >= __SANITISED_REG_START__)
+ val = kvm_vcpu_apply_reg_masks(vcpu, reg, val);
- if (__vcpu_read_sys_reg_from_cpu(reg, &val))
- return val;
+ return val;
+ }
+ }
-memory_read:
return __vcpu_sys_reg(vcpu, reg);
}
-void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg)
+void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, enum vcpu_sysreg reg)
{
- u64 (*xlate)(u64) = NULL;
- unsigned int el1r;
+ struct sr_loc loc = {};
- if (!vcpu_get_flag(vcpu, SYSREGS_ON_CPU))
- goto memory_write;
+ locate_register(vcpu, reg, &loc);
- if (unlikely(get_el2_to_el1_mapping(reg, &el1r, &xlate))) {
- if (!is_hyp_ctxt(vcpu))
- goto memory_write;
+ WARN_ON_ONCE(!has_vhe() && loc.loc != SR_LOC_MEMORY);
- /*
- * Always store a copy of the write to memory to avoid having
- * to reverse-translate virtual EL2 system registers for a
- * non-VHE guest hypervisor.
- */
- __vcpu_assign_sys_reg(vcpu, reg, val);
+ if (loc.loc & SR_LOC_SPECIAL) {
+
+ WARN_ON_ONCE(loc.loc & ~SR_LOC_SPECIAL);
switch (reg) {
case CNTHCTL_EL2:
/*
- * If E2H=0, CNHTCTL_EL2 is a pure shadow register.
- * Otherwise, some of the bits are backed by
+ * If E2H=1, some of the bits are backed by
* CNTKCTL_EL1, while the rest is kept in memory.
* Yes, this is fun stuff.
*/
- if (vcpu_el2_e2h_is_set(vcpu))
- write_sysreg_el1(val, SYS_CNTKCTL);
- return;
+ write_sysreg_el1(val, SYS_CNTKCTL);
+ break;
+ default:
+ WARN_ON_ONCE(1);
}
+ }
- /* No EL1 counterpart? We're done here.? */
- if (reg == el1r)
- return;
+ if (loc.loc & SR_LOC_LOADED) {
+ enum vcpu_sysreg map_reg = reg;
+ u64 xlated_val;
- if (!vcpu_el2_e2h_is_set(vcpu) && xlate)
- val = xlate(val);
+ if (reg >= __SANITISED_REG_START__)
+ val = kvm_vcpu_apply_reg_masks(vcpu, reg, val);
- /* Redirect this to the EL1 version of the register. */
- WARN_ON(!__vcpu_write_sys_reg_to_cpu(val, el1r));
- return;
- }
+ if (loc.loc & SR_LOC_MAPPED)
+ map_reg = loc.map_reg;
- /* EL1 register can't be on the CPU if the guest is in vEL2. */
- if (unlikely(is_hyp_ctxt(vcpu)))
- goto memory_write;
+ if (loc.loc & SR_LOC_XLATED)
+ xlated_val = loc.xlate(val);
+ else
+ xlated_val = val;
- if (__vcpu_write_sys_reg_to_cpu(val, reg))
- return;
+ write_sr_to_cpu(map_reg, xlated_val);
+
+ /*
+ * Fall through to write the backing store anyway, which
+ * allows translated registers to be directly read without a
+ * reverse translation.
+ */
+ }
-memory_write:
__vcpu_assign_sys_reg(vcpu, reg, val);
}
@@ -1584,6 +1710,7 @@ static u8 pmuver_to_perfmon(u8 pmuver)
}
static u64 sanitise_id_aa64pfr0_el1(const struct kvm_vcpu *vcpu, u64 val);
+static u64 sanitise_id_aa64pfr1_el1(const struct kvm_vcpu *vcpu, u64 val);
static u64 sanitise_id_aa64dfr0_el1(const struct kvm_vcpu *vcpu, u64 val);
/* Read a sanitised cpufeature ID register by sys_reg_desc */
@@ -1606,19 +1733,7 @@ static u64 __kvm_read_sanitised_id_reg(const struct kvm_vcpu *vcpu,
val = sanitise_id_aa64pfr0_el1(vcpu, val);
break;
case SYS_ID_AA64PFR1_EL1:
- if (!kvm_has_mte(vcpu->kvm)) {
- val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MTE);
- val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MTE_frac);
- }
-
- val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_SME);
- val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_RNDR_trap);
- val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_NMI);
- val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_GCS);
- val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_THE);
- val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MTEX);
- val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_PFAR);
- val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MPAM_frac);
+ val = sanitise_id_aa64pfr1_el1(vcpu, val);
break;
case SYS_ID_AA64PFR2_EL1:
val &= ID_AA64PFR2_EL1_FPMR |
@@ -1628,18 +1743,18 @@ static u64 __kvm_read_sanitised_id_reg(const struct kvm_vcpu *vcpu,
break;
case SYS_ID_AA64ISAR1_EL1:
if (!vcpu_has_ptrauth(vcpu))
- val &= ~(ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_APA) |
- ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_API) |
- ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_GPA) |
- ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_GPI));
+ val &= ~(ID_AA64ISAR1_EL1_APA |
+ ID_AA64ISAR1_EL1_API |
+ ID_AA64ISAR1_EL1_GPA |
+ ID_AA64ISAR1_EL1_GPI);
break;
case SYS_ID_AA64ISAR2_EL1:
if (!vcpu_has_ptrauth(vcpu))
- val &= ~(ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_APA3) |
- ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_GPA3));
+ val &= ~(ID_AA64ISAR2_EL1_APA3 |
+ ID_AA64ISAR2_EL1_GPA3);
if (!cpus_have_final_cap(ARM64_HAS_WFXT) ||
has_broken_cntvoff())
- val &= ~ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_WFxT);
+ val &= ~ID_AA64ISAR2_EL1_WFxT;
break;
case SYS_ID_AA64ISAR3_EL1:
val &= ID_AA64ISAR3_EL1_FPRCVT | ID_AA64ISAR3_EL1_FAMINMAX;
@@ -1655,7 +1770,7 @@ static u64 __kvm_read_sanitised_id_reg(const struct kvm_vcpu *vcpu,
ID_AA64MMFR3_EL1_S1PIE;
break;
case SYS_ID_MMFR4_EL1:
- val &= ~ARM64_FEATURE_MASK(ID_MMFR4_EL1_CCIDX);
+ val &= ~ID_MMFR4_EL1_CCIDX;
break;
}
@@ -1836,6 +1951,31 @@ static u64 sanitise_id_aa64pfr0_el1(const struct kvm_vcpu *vcpu, u64 val)
return val;
}
+static u64 sanitise_id_aa64pfr1_el1(const struct kvm_vcpu *vcpu, u64 val)
+{
+ u64 pfr0 = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1);
+
+ if (!kvm_has_mte(vcpu->kvm)) {
+ val &= ~ID_AA64PFR1_EL1_MTE;
+ val &= ~ID_AA64PFR1_EL1_MTE_frac;
+ }
+
+ if (!(cpus_have_final_cap(ARM64_HAS_RASV1P1_EXTN) &&
+ SYS_FIELD_GET(ID_AA64PFR0_EL1, RAS, pfr0) == ID_AA64PFR0_EL1_RAS_IMP))
+ val &= ~ID_AA64PFR1_EL1_RAS_frac;
+
+ val &= ~ID_AA64PFR1_EL1_SME;
+ val &= ~ID_AA64PFR1_EL1_RNDR_trap;
+ val &= ~ID_AA64PFR1_EL1_NMI;
+ val &= ~ID_AA64PFR1_EL1_GCS;
+ val &= ~ID_AA64PFR1_EL1_THE;
+ val &= ~ID_AA64PFR1_EL1_MTEX;
+ val &= ~ID_AA64PFR1_EL1_PFAR;
+ val &= ~ID_AA64PFR1_EL1_MPAM_frac;
+
+ return val;
+}
+
static u64 sanitise_id_aa64dfr0_el1(const struct kvm_vcpu *vcpu, u64 val)
{
val = ID_REG_LIMIT_FIELD_ENUM(val, ID_AA64DFR0_EL1, DebugVer, V8P8);
@@ -2697,6 +2837,18 @@ static bool access_ras(struct kvm_vcpu *vcpu,
struct kvm *kvm = vcpu->kvm;
switch(reg_to_encoding(r)) {
+ case SYS_ERXPFGCDN_EL1:
+ case SYS_ERXPFGCTL_EL1:
+ case SYS_ERXPFGF_EL1:
+ case SYS_ERXMISC2_EL1:
+ case SYS_ERXMISC3_EL1:
+ if (!(kvm_has_feat(kvm, ID_AA64PFR0_EL1, RAS, V1P1) ||
+ (kvm_has_feat_enum(kvm, ID_AA64PFR0_EL1, RAS, IMP) &&
+ kvm_has_feat(kvm, ID_AA64PFR1_EL1, RAS_frac, RASv1p1)))) {
+ kvm_inject_undefined(vcpu);
+ return false;
+ }
+ break;
default:
if (!kvm_has_feat(kvm, ID_AA64PFR0_EL1, RAS, IMP)) {
kvm_inject_undefined(vcpu);
@@ -2929,7 +3081,6 @@ static const struct sys_reg_desc sys_reg_descs[] = {
~(ID_AA64PFR0_EL1_AMU |
ID_AA64PFR0_EL1_MPAM |
ID_AA64PFR0_EL1_SVE |
- ID_AA64PFR0_EL1_RAS |
ID_AA64PFR0_EL1_AdvSIMD |
ID_AA64PFR0_EL1_FP)),
ID_FILTERED(ID_AA64PFR1_EL1, id_aa64pfr1_el1,
@@ -2943,7 +3094,6 @@ static const struct sys_reg_desc sys_reg_descs[] = {
ID_AA64PFR1_EL1_SME |
ID_AA64PFR1_EL1_RES0 |
ID_AA64PFR1_EL1_MPAM_frac |
- ID_AA64PFR1_EL1_RAS_frac |
ID_AA64PFR1_EL1_MTE)),
ID_WRITABLE(ID_AA64PFR2_EL1,
ID_AA64PFR2_EL1_FPMR |
@@ -3063,8 +3213,13 @@ static const struct sys_reg_desc sys_reg_descs[] = {
{ SYS_DESC(SYS_ERXCTLR_EL1), access_ras },
{ SYS_DESC(SYS_ERXSTATUS_EL1), access_ras },
{ SYS_DESC(SYS_ERXADDR_EL1), access_ras },
+ { SYS_DESC(SYS_ERXPFGF_EL1), access_ras },
+ { SYS_DESC(SYS_ERXPFGCTL_EL1), access_ras },
+ { SYS_DESC(SYS_ERXPFGCDN_EL1), access_ras },
{ SYS_DESC(SYS_ERXMISC0_EL1), access_ras },
{ SYS_DESC(SYS_ERXMISC1_EL1), access_ras },
+ { SYS_DESC(SYS_ERXMISC2_EL1), access_ras },
+ { SYS_DESC(SYS_ERXMISC3_EL1), access_ras },
MTE_REG(TFSR_EL1),
MTE_REG(TFSRE0_EL1),
diff --git a/arch/arm64/kvm/vgic/vgic-mmio-v3.c b/arch/arm64/kvm/vgic/vgic-mmio-v3.c
index a3ef185209e9..70d50c77e5dc 100644
--- a/arch/arm64/kvm/vgic/vgic-mmio-v3.c
+++ b/arch/arm64/kvm/vgic/vgic-mmio-v3.c
@@ -50,6 +50,14 @@ bool vgic_has_its(struct kvm *kvm)
bool vgic_supports_direct_msis(struct kvm *kvm)
{
+ /*
+ * Deliberately conflate vLPI and vSGI support on GICv4.1 hardware,
+ * indirectly allowing userspace to control whether or not vPEs are
+ * allocated for the VM.
+ */
+ if (system_supports_direct_sgis() && !vgic_supports_direct_sgis(kvm))
+ return false;
+
return kvm_vgic_global_state.has_gicv4 && vgic_has_its(kvm);
}
diff --git a/arch/arm64/kvm/vgic/vgic-mmio.c b/arch/arm64/kvm/vgic/vgic-mmio.c
index e416e433baff..a573b1f0c6cb 100644
--- a/arch/arm64/kvm/vgic/vgic-mmio.c
+++ b/arch/arm64/kvm/vgic/vgic-mmio.c
@@ -1091,7 +1091,7 @@ int vgic_register_dist_iodev(struct kvm *kvm, gpa_t dist_base_address,
len = vgic_v3_init_dist_iodev(io_device);
break;
default:
- BUG_ON(1);
+ BUG();
}
io_device->base_addr = dist_base_address;
diff --git a/arch/arm64/kvm/vgic/vgic.h b/arch/arm64/kvm/vgic/vgic.h
index 1384a04c0784..de1c1d3261c3 100644
--- a/arch/arm64/kvm/vgic/vgic.h
+++ b/arch/arm64/kvm/vgic/vgic.h
@@ -396,15 +396,7 @@ bool vgic_supports_direct_sgis(struct kvm *kvm);
static inline bool vgic_supports_direct_irqs(struct kvm *kvm)
{
- /*
- * Deliberately conflate vLPI and vSGI support on GICv4.1 hardware,
- * indirectly allowing userspace to control whether or not vPEs are
- * allocated for the VM.
- */
- if (system_supports_direct_sgis())
- return vgic_supports_direct_sgis(kvm);
-
- return vgic_supports_direct_msis(kvm);
+ return vgic_supports_direct_msis(kvm) || vgic_supports_direct_sgis(kvm);
}
int vgic_v4_init(struct kvm *kvm);
diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps
index ef0b7946f5a4..9ff5cdbd2759 100644
--- a/arch/arm64/tools/cpucaps
+++ b/arch/arm64/tools/cpucaps
@@ -53,6 +53,7 @@ HAS_S1PIE
HAS_S1POE
HAS_SCTLR2
HAS_RAS_EXTN
+HAS_RASV1P1_EXTN
HAS_RNG
HAS_SB
HAS_STAGE2_FWB