diff options
Diffstat (limited to 'arch/arm64')
-rw-r--r-- | arch/arm64/include/asm/kvm_host.h | 111 | ||||
-rw-r--r-- | arch/arm64/include/asm/kvm_mmu.h | 1 | ||||
-rw-r--r-- | arch/arm64/include/asm/kvm_pgtable.h | 30 | ||||
-rw-r--r-- | arch/arm64/include/asm/kvm_pkvm.h | 4 | ||||
-rw-r--r-- | arch/arm64/include/asm/kvm_ras.h | 25 | ||||
-rw-r--r-- | arch/arm64/include/asm/sysreg.h | 3 | ||||
-rw-r--r-- | arch/arm64/kernel/cpufeature.c | 24 | ||||
-rw-r--r-- | arch/arm64/kvm/arm.c | 8 | ||||
-rw-r--r-- | arch/arm64/kvm/at.c | 6 | ||||
-rw-r--r-- | arch/arm64/kvm/emulate-nested.c | 2 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/exception.c | 20 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/nvhe/list_debug.c | 2 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/nvhe/sys_regs.c | 5 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/pgtable.c | 25 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c | 2 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/vhe/switch.c | 5 | ||||
-rw-r--r-- | arch/arm64/kvm/mmu.c | 65 | ||||
-rw-r--r-- | arch/arm64/kvm/nested.c | 5 | ||||
-rw-r--r-- | arch/arm64/kvm/pkvm.c | 11 | ||||
-rw-r--r-- | arch/arm64/kvm/sys_regs.c | 419 | ||||
-rw-r--r-- | arch/arm64/kvm/vgic/vgic-mmio-v3.c | 8 | ||||
-rw-r--r-- | arch/arm64/kvm/vgic/vgic-mmio.c | 2 | ||||
-rw-r--r-- | arch/arm64/kvm/vgic/vgic.h | 10 | ||||
-rw-r--r-- | arch/arm64/tools/cpucaps | 1 |
24 files changed, 468 insertions, 326 deletions
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 2f2394cce24e..2b07f0a27a7d 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -1160,115 +1160,8 @@ u64 kvm_vcpu_apply_reg_masks(const struct kvm_vcpu *, enum vcpu_sysreg, u64); __v; \ }) -u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg); -void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg); - -static inline bool __vcpu_read_sys_reg_from_cpu(int reg, u64 *val) -{ - /* - * *** VHE ONLY *** - * - * System registers listed in the switch are not saved on every - * exit from the guest but are only saved on vcpu_put. - * - * SYSREGS_ON_CPU *MUST* be checked before using this helper. - * - * Note that MPIDR_EL1 for the guest is set by KVM via VMPIDR_EL2 but - * should never be listed below, because the guest cannot modify its - * own MPIDR_EL1 and MPIDR_EL1 is accessed for VCPU A from VCPU B's - * thread when emulating cross-VCPU communication. - */ - if (!has_vhe()) - return false; - - switch (reg) { - case SCTLR_EL1: *val = read_sysreg_s(SYS_SCTLR_EL12); break; - case CPACR_EL1: *val = read_sysreg_s(SYS_CPACR_EL12); break; - case TTBR0_EL1: *val = read_sysreg_s(SYS_TTBR0_EL12); break; - case TTBR1_EL1: *val = read_sysreg_s(SYS_TTBR1_EL12); break; - case TCR_EL1: *val = read_sysreg_s(SYS_TCR_EL12); break; - case TCR2_EL1: *val = read_sysreg_s(SYS_TCR2_EL12); break; - case PIR_EL1: *val = read_sysreg_s(SYS_PIR_EL12); break; - case PIRE0_EL1: *val = read_sysreg_s(SYS_PIRE0_EL12); break; - case POR_EL1: *val = read_sysreg_s(SYS_POR_EL12); break; - case ESR_EL1: *val = read_sysreg_s(SYS_ESR_EL12); break; - case AFSR0_EL1: *val = read_sysreg_s(SYS_AFSR0_EL12); break; - case AFSR1_EL1: *val = read_sysreg_s(SYS_AFSR1_EL12); break; - case FAR_EL1: *val = read_sysreg_s(SYS_FAR_EL12); break; - case MAIR_EL1: *val = read_sysreg_s(SYS_MAIR_EL12); break; - case VBAR_EL1: *val = read_sysreg_s(SYS_VBAR_EL12); break; - case CONTEXTIDR_EL1: *val = read_sysreg_s(SYS_CONTEXTIDR_EL12);break; - case TPIDR_EL0: *val = read_sysreg_s(SYS_TPIDR_EL0); break; - case TPIDRRO_EL0: *val = read_sysreg_s(SYS_TPIDRRO_EL0); break; - case TPIDR_EL1: *val = read_sysreg_s(SYS_TPIDR_EL1); break; - case AMAIR_EL1: *val = read_sysreg_s(SYS_AMAIR_EL12); break; - case CNTKCTL_EL1: *val = read_sysreg_s(SYS_CNTKCTL_EL12); break; - case ELR_EL1: *val = read_sysreg_s(SYS_ELR_EL12); break; - case SPSR_EL1: *val = read_sysreg_s(SYS_SPSR_EL12); break; - case PAR_EL1: *val = read_sysreg_par(); break; - case DACR32_EL2: *val = read_sysreg_s(SYS_DACR32_EL2); break; - case IFSR32_EL2: *val = read_sysreg_s(SYS_IFSR32_EL2); break; - case DBGVCR32_EL2: *val = read_sysreg_s(SYS_DBGVCR32_EL2); break; - case ZCR_EL1: *val = read_sysreg_s(SYS_ZCR_EL12); break; - case SCTLR2_EL1: *val = read_sysreg_s(SYS_SCTLR2_EL12); break; - default: return false; - } - - return true; -} - -static inline bool __vcpu_write_sys_reg_to_cpu(u64 val, int reg) -{ - /* - * *** VHE ONLY *** - * - * System registers listed in the switch are not restored on every - * entry to the guest but are only restored on vcpu_load. - * - * SYSREGS_ON_CPU *MUST* be checked before using this helper. - * - * Note that MPIDR_EL1 for the guest is set by KVM via VMPIDR_EL2 but - * should never be listed below, because the MPIDR should only be set - * once, before running the VCPU, and never changed later. - */ - if (!has_vhe()) - return false; - - switch (reg) { - case SCTLR_EL1: write_sysreg_s(val, SYS_SCTLR_EL12); break; - case CPACR_EL1: write_sysreg_s(val, SYS_CPACR_EL12); break; - case TTBR0_EL1: write_sysreg_s(val, SYS_TTBR0_EL12); break; - case TTBR1_EL1: write_sysreg_s(val, SYS_TTBR1_EL12); break; - case TCR_EL1: write_sysreg_s(val, SYS_TCR_EL12); break; - case TCR2_EL1: write_sysreg_s(val, SYS_TCR2_EL12); break; - case PIR_EL1: write_sysreg_s(val, SYS_PIR_EL12); break; - case PIRE0_EL1: write_sysreg_s(val, SYS_PIRE0_EL12); break; - case POR_EL1: write_sysreg_s(val, SYS_POR_EL12); break; - case ESR_EL1: write_sysreg_s(val, SYS_ESR_EL12); break; - case AFSR0_EL1: write_sysreg_s(val, SYS_AFSR0_EL12); break; - case AFSR1_EL1: write_sysreg_s(val, SYS_AFSR1_EL12); break; - case FAR_EL1: write_sysreg_s(val, SYS_FAR_EL12); break; - case MAIR_EL1: write_sysreg_s(val, SYS_MAIR_EL12); break; - case VBAR_EL1: write_sysreg_s(val, SYS_VBAR_EL12); break; - case CONTEXTIDR_EL1: write_sysreg_s(val, SYS_CONTEXTIDR_EL12);break; - case TPIDR_EL0: write_sysreg_s(val, SYS_TPIDR_EL0); break; - case TPIDRRO_EL0: write_sysreg_s(val, SYS_TPIDRRO_EL0); break; - case TPIDR_EL1: write_sysreg_s(val, SYS_TPIDR_EL1); break; - case AMAIR_EL1: write_sysreg_s(val, SYS_AMAIR_EL12); break; - case CNTKCTL_EL1: write_sysreg_s(val, SYS_CNTKCTL_EL12); break; - case ELR_EL1: write_sysreg_s(val, SYS_ELR_EL12); break; - case SPSR_EL1: write_sysreg_s(val, SYS_SPSR_EL12); break; - case PAR_EL1: write_sysreg_s(val, SYS_PAR_EL1); break; - case DACR32_EL2: write_sysreg_s(val, SYS_DACR32_EL2); break; - case IFSR32_EL2: write_sysreg_s(val, SYS_IFSR32_EL2); break; - case DBGVCR32_EL2: write_sysreg_s(val, SYS_DBGVCR32_EL2); break; - case ZCR_EL1: write_sysreg_s(val, SYS_ZCR_EL12); break; - case SCTLR2_EL1: write_sysreg_s(val, SYS_SCTLR2_EL12); break; - default: return false; - } - - return true; -} +u64 vcpu_read_sys_reg(const struct kvm_vcpu *, enum vcpu_sysreg); +void vcpu_write_sys_reg(struct kvm_vcpu *, u64, enum vcpu_sysreg); struct kvm_vm_stat { struct kvm_vm_stat_generic generic; diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index ae563ebd6aee..e4069f2ce642 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -180,6 +180,7 @@ void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu); int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, phys_addr_t pa, unsigned long size, bool writable); +int kvm_handle_guest_sea(struct kvm_vcpu *vcpu); int kvm_handle_guest_abort(struct kvm_vcpu *vcpu); phys_addr_t kvm_mmu_get_httbr(void); diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h index 2888b5d03757..1246216616b5 100644 --- a/arch/arm64/include/asm/kvm_pgtable.h +++ b/arch/arm64/include/asm/kvm_pgtable.h @@ -355,6 +355,11 @@ static inline kvm_pte_t *kvm_dereference_pteref(struct kvm_pgtable_walker *walke return pteref; } +static inline kvm_pte_t *kvm_dereference_pteref_raw(kvm_pteref_t pteref) +{ + return pteref; +} + static inline int kvm_pgtable_walk_begin(struct kvm_pgtable_walker *walker) { /* @@ -384,6 +389,11 @@ static inline kvm_pte_t *kvm_dereference_pteref(struct kvm_pgtable_walker *walke return rcu_dereference_check(pteref, !(walker->flags & KVM_PGTABLE_WALK_SHARED)); } +static inline kvm_pte_t *kvm_dereference_pteref_raw(kvm_pteref_t pteref) +{ + return rcu_dereference_raw(pteref); +} + static inline int kvm_pgtable_walk_begin(struct kvm_pgtable_walker *walker) { if (walker->flags & KVM_PGTABLE_WALK_SHARED) @@ -552,6 +562,26 @@ static inline int kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2 void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt); /** + * kvm_pgtable_stage2_destroy_range() - Destroy the unlinked range of addresses. + * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*(). + * @addr: Intermediate physical address at which to place the mapping. + * @size: Size of the mapping. + * + * The page-table is assumed to be unreachable by any hardware walkers prior + * to freeing and therefore no TLB invalidation is performed. + */ +void kvm_pgtable_stage2_destroy_range(struct kvm_pgtable *pgt, + u64 addr, u64 size); + +/** + * kvm_pgtable_stage2_destroy_pgd() - Destroy the PGD of guest stage-2 page-table. + * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*(). + * + * It is assumed that the rest of the page-table is freed before this operation. + */ +void kvm_pgtable_stage2_destroy_pgd(struct kvm_pgtable *pgt); + +/** * kvm_pgtable_stage2_free_unlinked() - Free an unlinked stage-2 paging structure. * @mm_ops: Memory management callbacks. * @pgtable: Unlinked stage-2 paging structure to be freed. diff --git a/arch/arm64/include/asm/kvm_pkvm.h b/arch/arm64/include/asm/kvm_pkvm.h index ea58282f59bb..35f9d9478004 100644 --- a/arch/arm64/include/asm/kvm_pkvm.h +++ b/arch/arm64/include/asm/kvm_pkvm.h @@ -179,7 +179,9 @@ struct pkvm_mapping { int pkvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu, struct kvm_pgtable_mm_ops *mm_ops); -void pkvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt); +void pkvm_pgtable_stage2_destroy_range(struct kvm_pgtable *pgt, + u64 addr, u64 size); +void pkvm_pgtable_stage2_destroy_pgd(struct kvm_pgtable *pgt); int pkvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys, enum kvm_pgtable_prot prot, void *mc, enum kvm_pgtable_walk_flags flags); diff --git a/arch/arm64/include/asm/kvm_ras.h b/arch/arm64/include/asm/kvm_ras.h deleted file mode 100644 index 9398ade632aa..000000000000 --- a/arch/arm64/include/asm/kvm_ras.h +++ /dev/null @@ -1,25 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright (C) 2018 - Arm Ltd */ - -#ifndef __ARM64_KVM_RAS_H__ -#define __ARM64_KVM_RAS_H__ - -#include <linux/acpi.h> -#include <linux/errno.h> -#include <linux/types.h> - -#include <asm/acpi.h> - -/* - * Was this synchronous external abort a RAS notification? - * Returns '0' for errors handled by some RAS subsystem, or -ENOENT. - */ -static inline int kvm_handle_guest_sea(void) -{ - /* apei_claim_sea(NULL) expects to mask interrupts itself */ - lockdep_assert_irqs_enabled(); - - return apei_claim_sea(NULL); -} - -#endif /* __ARM64_KVM_RAS_H__ */ diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index d5b5f2ae1afa..6604fd6f33f4 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -1142,9 +1142,6 @@ #define ARM64_FEATURE_FIELD_BITS 4 -/* Defined for compatibility only, do not add new users. */ -#define ARM64_FEATURE_MASK(x) (x##_MASK) - #ifdef __ASSEMBLY__ .macro mrs_s, rt, sreg diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 9ad065f15f1d..0d45c5e9b4da 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -2269,6 +2269,24 @@ static void cpu_clear_disr(const struct arm64_cpu_capabilities *__unused) /* Firmware may have left a deferred SError in this register. */ write_sysreg_s(0, SYS_DISR_EL1); } +static bool has_rasv1p1(const struct arm64_cpu_capabilities *__unused, int scope) +{ + const struct arm64_cpu_capabilities rasv1p1_caps[] = { + { + ARM64_CPUID_FIELDS(ID_AA64PFR0_EL1, RAS, V1P1) + }, + { + ARM64_CPUID_FIELDS(ID_AA64PFR0_EL1, RAS, IMP) + }, + { + ARM64_CPUID_FIELDS(ID_AA64PFR1_EL1, RAS_frac, RASv1p1) + }, + }; + + return (has_cpuid_feature(&rasv1p1_caps[0], scope) || + (has_cpuid_feature(&rasv1p1_caps[1], scope) && + has_cpuid_feature(&rasv1p1_caps[2], scope))); +} #endif /* CONFIG_ARM64_RAS_EXTN */ #ifdef CONFIG_ARM64_PTR_AUTH @@ -2687,6 +2705,12 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .cpu_enable = cpu_clear_disr, ARM64_CPUID_FIELDS(ID_AA64PFR0_EL1, RAS, IMP) }, + { + .desc = "RASv1p1 Extension Support", + .capability = ARM64_HAS_RASV1P1_EXTN, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .matches = has_rasv1p1, + }, #endif /* CONFIG_ARM64_RAS_EXTN */ #ifdef CONFIG_ARM64_AMU_EXTN { diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 888f7c7abf54..5bf101c869c9 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -2408,12 +2408,12 @@ static u64 get_hyp_id_aa64pfr0_el1(void) */ u64 val = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1); - val &= ~(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV2) | - ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV3)); + val &= ~(ID_AA64PFR0_EL1_CSV2 | + ID_AA64PFR0_EL1_CSV3); - val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV2), + val |= FIELD_PREP(ID_AA64PFR0_EL1_CSV2, arm64_get_spectre_v2_state() == SPECTRE_UNAFFECTED); - val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV3), + val |= FIELD_PREP(ID_AA64PFR0_EL1_CSV3, arm64_get_meltdown_state() == SPECTRE_UNAFFECTED); return val; diff --git a/arch/arm64/kvm/at.c b/arch/arm64/kvm/at.c index 0e5610533949..d71ca4ddc9d1 100644 --- a/arch/arm64/kvm/at.c +++ b/arch/arm64/kvm/at.c @@ -1420,10 +1420,10 @@ void __kvm_at_s12(struct kvm_vcpu *vcpu, u32 op, u64 vaddr) return; /* - * If we only have a single stage of translation (E2H=0 or - * TGE=1), exit early. Same thing if {VM,DC}=={0,0}. + * If we only have a single stage of translation (EL2&0), exit + * early. Same thing if {VM,DC}=={0,0}. */ - if (!vcpu_el2_e2h_is_set(vcpu) || vcpu_el2_tge_is_set(vcpu) || + if (compute_translation_regime(vcpu, op) == TR_EL20 || !(vcpu_read_sys_reg(vcpu, HCR_EL2) & (HCR_VM | HCR_DC))) return; diff --git a/arch/arm64/kvm/emulate-nested.c b/arch/arm64/kvm/emulate-nested.c index 90cb4b7ae0ff..af69c897c2c3 100644 --- a/arch/arm64/kvm/emulate-nested.c +++ b/arch/arm64/kvm/emulate-nested.c @@ -2833,7 +2833,7 @@ int kvm_inject_nested_sea(struct kvm_vcpu *vcpu, bool iabt, u64 addr) iabt ? ESR_ELx_EC_IABT_LOW : ESR_ELx_EC_DABT_LOW); esr |= ESR_ELx_FSC_EXTABT | ESR_ELx_IL; - vcpu_write_sys_reg(vcpu, FAR_EL2, addr); + vcpu_write_sys_reg(vcpu, addr, FAR_EL2); if (__vcpu_sys_reg(vcpu, SCTLR2_EL2) & SCTLR2_EL1_EASE) return kvm_inject_nested(vcpu, esr, except_type_serror); diff --git a/arch/arm64/kvm/hyp/exception.c b/arch/arm64/kvm/hyp/exception.c index 95d186e0bf54..bef40ddb16db 100644 --- a/arch/arm64/kvm/hyp/exception.c +++ b/arch/arm64/kvm/hyp/exception.c @@ -22,36 +22,28 @@ static inline u64 __vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg) { - u64 val; - - if (unlikely(vcpu_has_nv(vcpu))) + if (has_vhe()) return vcpu_read_sys_reg(vcpu, reg); - else if (vcpu_get_flag(vcpu, SYSREGS_ON_CPU) && - __vcpu_read_sys_reg_from_cpu(reg, &val)) - return val; return __vcpu_sys_reg(vcpu, reg); } static inline void __vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg) { - if (unlikely(vcpu_has_nv(vcpu))) + if (has_vhe()) vcpu_write_sys_reg(vcpu, val, reg); - else if (!vcpu_get_flag(vcpu, SYSREGS_ON_CPU) || - !__vcpu_write_sys_reg_to_cpu(val, reg)) + else __vcpu_assign_sys_reg(vcpu, reg, val); } static void __vcpu_write_spsr(struct kvm_vcpu *vcpu, unsigned long target_mode, u64 val) { - if (unlikely(vcpu_has_nv(vcpu))) { + if (has_vhe()) { if (target_mode == PSR_MODE_EL1h) vcpu_write_sys_reg(vcpu, val, SPSR_EL1); else vcpu_write_sys_reg(vcpu, val, SPSR_EL2); - } else if (has_vhe()) { - write_sysreg_el1(val, SYS_SPSR); } else { __vcpu_assign_sys_reg(vcpu, SPSR_EL1, val); } @@ -59,7 +51,7 @@ static void __vcpu_write_spsr(struct kvm_vcpu *vcpu, unsigned long target_mode, static void __vcpu_write_spsr_abt(struct kvm_vcpu *vcpu, u64 val) { - if (has_vhe()) + if (has_vhe() && vcpu_get_flag(vcpu, SYSREGS_ON_CPU)) write_sysreg(val, spsr_abt); else vcpu->arch.ctxt.spsr_abt = val; @@ -67,7 +59,7 @@ static void __vcpu_write_spsr_abt(struct kvm_vcpu *vcpu, u64 val) static void __vcpu_write_spsr_und(struct kvm_vcpu *vcpu, u64 val) { - if (has_vhe()) + if (has_vhe() && vcpu_get_flag(vcpu, SYSREGS_ON_CPU)) write_sysreg(val, spsr_und); else vcpu->arch.ctxt.spsr_und = val; diff --git a/arch/arm64/kvm/hyp/nvhe/list_debug.c b/arch/arm64/kvm/hyp/nvhe/list_debug.c index 46a2d4f2b3c6..baa6260f88dc 100644 --- a/arch/arm64/kvm/hyp/nvhe/list_debug.c +++ b/arch/arm64/kvm/hyp/nvhe/list_debug.c @@ -17,7 +17,7 @@ static inline __must_check bool nvhe_check_data_corruption(bool v) bool corruption = unlikely(condition); \ if (corruption) { \ if (IS_ENABLED(CONFIG_BUG_ON_DATA_CORRUPTION)) { \ - BUG_ON(1); \ + BUG(); \ } else \ WARN_ON(1); \ } \ diff --git a/arch/arm64/kvm/hyp/nvhe/sys_regs.c b/arch/arm64/kvm/hyp/nvhe/sys_regs.c index 1ddd9ed3cbb3..71d2fc97f004 100644 --- a/arch/arm64/kvm/hyp/nvhe/sys_regs.c +++ b/arch/arm64/kvm/hyp/nvhe/sys_regs.c @@ -253,6 +253,7 @@ static void inject_undef64(struct kvm_vcpu *vcpu) *vcpu_pc(vcpu) = read_sysreg_el2(SYS_ELR); *vcpu_cpsr(vcpu) = read_sysreg_el2(SYS_SPSR); + __vcpu_assign_sys_reg(vcpu, read_sysreg_el1(SYS_VBAR), VBAR_EL1); kvm_pend_exception(vcpu, EXCEPT_AA64_EL1_SYNC); @@ -372,6 +373,9 @@ static const struct sys_reg_desc pvm_sys_reg_descs[] = { /* Debug and Trace Registers are restricted. */ + /* Group 1 ID registers */ + HOST_HANDLED(SYS_REVIDR_EL1), + /* AArch64 mappings of the AArch32 ID registers */ /* CRm=1 */ AARCH32(SYS_ID_PFR0_EL1), @@ -460,6 +464,7 @@ static const struct sys_reg_desc pvm_sys_reg_descs[] = { HOST_HANDLED(SYS_CCSIDR_EL1), HOST_HANDLED(SYS_CLIDR_EL1), + HOST_HANDLED(SYS_AIDR_EL1), HOST_HANDLED(SYS_CSSELR_EL1), HOST_HANDLED(SYS_CTR_EL0), diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index c351b4abd5db..c36f282a175d 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -1551,21 +1551,38 @@ static int stage2_free_walker(const struct kvm_pgtable_visit_ctx *ctx, return 0; } -void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt) +void kvm_pgtable_stage2_destroy_range(struct kvm_pgtable *pgt, + u64 addr, u64 size) { - size_t pgd_sz; struct kvm_pgtable_walker walker = { .cb = stage2_free_walker, .flags = KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST, }; - WARN_ON(kvm_pgtable_walk(pgt, 0, BIT(pgt->ia_bits), &walker)); + WARN_ON(kvm_pgtable_walk(pgt, addr, size, &walker)); +} + +void kvm_pgtable_stage2_destroy_pgd(struct kvm_pgtable *pgt) +{ + size_t pgd_sz; + pgd_sz = kvm_pgd_pages(pgt->ia_bits, pgt->start_level) * PAGE_SIZE; - pgt->mm_ops->free_pages_exact(kvm_dereference_pteref(&walker, pgt->pgd), pgd_sz); + + /* + * Since the pgtable is unlinked at this point, and not shared with + * other walkers, safely deference pgd with kvm_dereference_pteref_raw() + */ + pgt->mm_ops->free_pages_exact(kvm_dereference_pteref_raw(pgt->pgd), pgd_sz); pgt->pgd = NULL; } +void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt) +{ + kvm_pgtable_stage2_destroy_range(pgt, 0, BIT(pgt->ia_bits)); + kvm_pgtable_stage2_destroy_pgd(pgt); +} + void kvm_pgtable_stage2_free_unlinked(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, s8 level) { kvm_pteref_t ptep = (kvm_pteref_t)pgtable; diff --git a/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c b/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c index 87a54375bd6e..78579b31a420 100644 --- a/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c +++ b/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c @@ -20,7 +20,7 @@ static bool __is_be(struct kvm_vcpu *vcpu) if (vcpu_mode_is_32bit(vcpu)) return !!(read_sysreg_el2(SYS_SPSR) & PSR_AA32_E_BIT); - return !!(read_sysreg(SCTLR_EL1) & SCTLR_ELx_EE); + return !!(read_sysreg_el1(SYS_SCTLR) & SCTLR_ELx_EE); } /* diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c index e482181c6632..0998ad4a2552 100644 --- a/arch/arm64/kvm/hyp/vhe/switch.c +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -43,8 +43,11 @@ DEFINE_PER_CPU(unsigned long, kvm_hyp_vector); * * - API/APK: they are already accounted for by vcpu_load(), and can * only take effect across a load/put cycle (such as ERET) + * + * - FIEN: no way we let a guest have access to the RAS "Common Fault + * Injection" thing, whatever that does */ -#define NV_HCR_GUEST_EXCLUDE (HCR_TGE | HCR_API | HCR_APK) +#define NV_HCR_GUEST_EXCLUDE (HCR_TGE | HCR_API | HCR_APK | HCR_FIEN) static u64 __compute_hcr(struct kvm_vcpu *vcpu) { diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 1c78864767c5..86f3d80daf37 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -4,19 +4,20 @@ * Author: Christoffer Dall <c.dall@virtualopensystems.com> */ +#include <linux/acpi.h> #include <linux/mman.h> #include <linux/kvm_host.h> #include <linux/io.h> #include <linux/hugetlb.h> #include <linux/sched/signal.h> #include <trace/events/kvm.h> +#include <asm/acpi.h> #include <asm/pgalloc.h> #include <asm/cacheflush.h> #include <asm/kvm_arm.h> #include <asm/kvm_mmu.h> #include <asm/kvm_pgtable.h> #include <asm/kvm_pkvm.h> -#include <asm/kvm_ras.h> #include <asm/kvm_asm.h> #include <asm/kvm_emulate.h> #include <asm/virt.h> @@ -903,6 +904,38 @@ static int kvm_init_ipa_range(struct kvm_s2_mmu *mmu, unsigned long type) return 0; } +/* + * Assume that @pgt is valid and unlinked from the KVM MMU to free the + * page-table without taking the kvm_mmu_lock and without performing any + * TLB invalidations. + * + * Also, the range of addresses can be large enough to cause need_resched + * warnings, for instance on CONFIG_PREEMPT_NONE kernels. Hence, invoke + * cond_resched() periodically to prevent hogging the CPU for a long time + * and schedule something else, if required. + */ +static void stage2_destroy_range(struct kvm_pgtable *pgt, phys_addr_t addr, + phys_addr_t end) +{ + u64 next; + + do { + next = stage2_range_addr_end(addr, end); + KVM_PGT_FN(kvm_pgtable_stage2_destroy_range)(pgt, addr, + next - addr); + if (next != end) + cond_resched(); + } while (addr = next, addr != end); +} + +static void kvm_stage2_destroy(struct kvm_pgtable *pgt) +{ + unsigned int ia_bits = VTCR_EL2_IPA(pgt->mmu->vtcr); + + stage2_destroy_range(pgt, 0, BIT(ia_bits)); + KVM_PGT_FN(kvm_pgtable_stage2_destroy_pgd)(pgt); +} + /** * kvm_init_stage2_mmu - Initialise a S2 MMU structure * @kvm: The pointer to the KVM structure @@ -979,7 +1012,7 @@ int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu, unsigned long t return 0; out_destroy_pgtable: - KVM_PGT_FN(kvm_pgtable_stage2_destroy)(pgt); + kvm_stage2_destroy(pgt); out_free_pgtable: kfree(pgt); return err; @@ -1076,7 +1109,7 @@ void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu) write_unlock(&kvm->mmu_lock); if (pgt) { - KVM_PGT_FN(kvm_pgtable_stage2_destroy)(pgt); + kvm_stage2_destroy(pgt); kfree(pgt); } } @@ -1811,6 +1844,19 @@ static void handle_access_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa) read_unlock(&vcpu->kvm->mmu_lock); } +int kvm_handle_guest_sea(struct kvm_vcpu *vcpu) +{ + /* + * Give APEI the opportunity to claim the abort before handling it + * within KVM. apei_claim_sea() expects to be called with IRQs enabled. + */ + lockdep_assert_irqs_enabled(); + if (apei_claim_sea(NULL) == 0) + return 1; + + return kvm_inject_serror(vcpu); +} + /** * kvm_handle_guest_abort - handles all 2nd stage aborts * @vcpu: the VCPU pointer @@ -1834,17 +1880,8 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu) gfn_t gfn; int ret, idx; - /* Synchronous External Abort? */ - if (kvm_vcpu_abt_issea(vcpu)) { - /* - * For RAS the host kernel may handle this abort. - * There is no need to pass the error into the guest. - */ - if (kvm_handle_guest_sea()) - return kvm_inject_serror(vcpu); - - return 1; - } + if (kvm_vcpu_abt_issea(vcpu)) + return kvm_handle_guest_sea(vcpu); esr = kvm_vcpu_get_esr(vcpu); diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c index 153b3e11b115..77db81bae86f 100644 --- a/arch/arm64/kvm/nested.c +++ b/arch/arm64/kvm/nested.c @@ -1287,7 +1287,10 @@ int kvm_handle_vncr_abort(struct kvm_vcpu *vcpu) struct vncr_tlb *vt = vcpu->arch.vncr_tlb; u64 esr = kvm_vcpu_get_esr(vcpu); - BUG_ON(!(esr & ESR_ELx_VNCR_SHIFT)); + WARN_ON_ONCE(!(esr & ESR_ELx_VNCR)); + + if (kvm_vcpu_abt_issea(vcpu)) + return kvm_handle_guest_sea(vcpu); if (esr_fsc_is_permission_fault(esr)) { inject_vncr_perm(vcpu); diff --git a/arch/arm64/kvm/pkvm.c b/arch/arm64/kvm/pkvm.c index fcd70bfe44fb..61827cf6fea4 100644 --- a/arch/arm64/kvm/pkvm.c +++ b/arch/arm64/kvm/pkvm.c @@ -316,9 +316,16 @@ static int __pkvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 start, u64 e return 0; } -void pkvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt) +void pkvm_pgtable_stage2_destroy_range(struct kvm_pgtable *pgt, + u64 addr, u64 size) { - __pkvm_pgtable_stage2_unmap(pgt, 0, ~(0ULL)); + __pkvm_pgtable_stage2_unmap(pgt, addr, addr + size); +} + +void pkvm_pgtable_stage2_destroy_pgd(struct kvm_pgtable *pgt) +{ + /* Expected to be called after all pKVM mappings have been released. */ + WARN_ON_ONCE(!RB_EMPTY_ROOT(&pgt->pkvm_mappings.rb_root)); } int pkvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size, diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 82ffb3b3b3cf..b29f72478a50 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -82,43 +82,105 @@ static bool write_to_read_only(struct kvm_vcpu *vcpu, "sys_reg write to read-only register"); } -#define PURE_EL2_SYSREG(el2) \ - case el2: { \ - *el1r = el2; \ - return true; \ +enum sr_loc_attr { + SR_LOC_MEMORY = 0, /* Register definitely in memory */ + SR_LOC_LOADED = BIT(0), /* Register on CPU, unless it cannot */ + SR_LOC_MAPPED = BIT(1), /* Register in a different CPU register */ + SR_LOC_XLATED = BIT(2), /* Register translated to fit another reg */ + SR_LOC_SPECIAL = BIT(3), /* Demanding register, implies loaded */ +}; + +struct sr_loc { + enum sr_loc_attr loc; + enum vcpu_sysreg map_reg; + u64 (*xlate)(u64); +}; + +static enum sr_loc_attr locate_direct_register(const struct kvm_vcpu *vcpu, + enum vcpu_sysreg reg) +{ + switch (reg) { + case SCTLR_EL1: + case CPACR_EL1: + case TTBR0_EL1: + case TTBR1_EL1: + case TCR_EL1: + case TCR2_EL1: + case PIR_EL1: + case PIRE0_EL1: + case POR_EL1: + case ESR_EL1: + case AFSR0_EL1: + case AFSR1_EL1: + case FAR_EL1: + case MAIR_EL1: + case VBAR_EL1: + case CONTEXTIDR_EL1: + case AMAIR_EL1: + case CNTKCTL_EL1: + case ELR_EL1: + case SPSR_EL1: + case ZCR_EL1: + case SCTLR2_EL1: + /* + * EL1 registers which have an ELx2 mapping are loaded if + * we're not in hypervisor context. + */ + return is_hyp_ctxt(vcpu) ? SR_LOC_MEMORY : SR_LOC_LOADED; + + case TPIDR_EL0: + case TPIDRRO_EL0: + case TPIDR_EL1: + case PAR_EL1: + case DACR32_EL2: + case IFSR32_EL2: + case DBGVCR32_EL2: + /* These registers are always loaded, no matter what */ + return SR_LOC_LOADED; + + default: + /* Non-mapped EL2 registers are by definition in memory. */ + return SR_LOC_MEMORY; } +} -#define MAPPED_EL2_SYSREG(el2, el1, fn) \ - case el2: { \ - *xlate = fn; \ - *el1r = el1; \ - return true; \ +static void locate_mapped_el2_register(const struct kvm_vcpu *vcpu, + enum vcpu_sysreg reg, + enum vcpu_sysreg map_reg, + u64 (*xlate)(u64), + struct sr_loc *loc) +{ + if (!is_hyp_ctxt(vcpu)) { + loc->loc = SR_LOC_MEMORY; + return; + } + + loc->loc = SR_LOC_LOADED | SR_LOC_MAPPED; + loc->map_reg = map_reg; + + WARN_ON(locate_direct_register(vcpu, map_reg) != SR_LOC_MEMORY); + + if (xlate != NULL && !vcpu_el2_e2h_is_set(vcpu)) { + loc->loc |= SR_LOC_XLATED; + loc->xlate = xlate; } +} -static bool get_el2_to_el1_mapping(unsigned int reg, - unsigned int *el1r, u64 (**xlate)(u64)) +#define MAPPED_EL2_SYSREG(r, m, t) \ + case r: { \ + locate_mapped_el2_register(vcpu, r, m, t, loc); \ + break; \ + } + +static void locate_register(const struct kvm_vcpu *vcpu, enum vcpu_sysreg reg, + struct sr_loc *loc) { + if (!vcpu_get_flag(vcpu, SYSREGS_ON_CPU)) { + loc->loc = SR_LOC_MEMORY; + return; + } + switch (reg) { - PURE_EL2_SYSREG( VPIDR_EL2 ); - PURE_EL2_SYSREG( VMPIDR_EL2 ); - PURE_EL2_SYSREG( ACTLR_EL2 ); - PURE_EL2_SYSREG( HCR_EL2 ); - PURE_EL2_SYSREG( MDCR_EL2 ); - PURE_EL2_SYSREG( HSTR_EL2 ); - PURE_EL2_SYSREG( HACR_EL2 ); - PURE_EL2_SYSREG( VTTBR_EL2 ); - PURE_EL2_SYSREG( VTCR_EL2 ); - PURE_EL2_SYSREG( TPIDR_EL2 ); - PURE_EL2_SYSREG( HPFAR_EL2 ); - PURE_EL2_SYSREG( HCRX_EL2 ); - PURE_EL2_SYSREG( HFGRTR_EL2 ); - PURE_EL2_SYSREG( HFGWTR_EL2 ); - PURE_EL2_SYSREG( HFGITR_EL2 ); - PURE_EL2_SYSREG( HDFGRTR_EL2 ); - PURE_EL2_SYSREG( HDFGWTR_EL2 ); - PURE_EL2_SYSREG( HAFGRTR_EL2 ); - PURE_EL2_SYSREG( CNTVOFF_EL2 ); - PURE_EL2_SYSREG( CNTHCTL_EL2 ); MAPPED_EL2_SYSREG(SCTLR_EL2, SCTLR_EL1, translate_sctlr_el2_to_sctlr_el1 ); MAPPED_EL2_SYSREG(CPTR_EL2, CPACR_EL1, @@ -144,125 +206,189 @@ static bool get_el2_to_el1_mapping(unsigned int reg, MAPPED_EL2_SYSREG(ZCR_EL2, ZCR_EL1, NULL ); MAPPED_EL2_SYSREG(CONTEXTIDR_EL2, CONTEXTIDR_EL1, NULL ); MAPPED_EL2_SYSREG(SCTLR2_EL2, SCTLR2_EL1, NULL ); + case CNTHCTL_EL2: + /* CNTHCTL_EL2 is super special, until we support NV2.1 */ + loc->loc = ((is_hyp_ctxt(vcpu) && vcpu_el2_e2h_is_set(vcpu)) ? + SR_LOC_SPECIAL : SR_LOC_MEMORY); + break; default: - return false; + loc->loc = locate_direct_register(vcpu, reg); } } -u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg) +static u64 read_sr_from_cpu(enum vcpu_sysreg reg) { u64 val = 0x8badf00d8badf00d; - u64 (*xlate)(u64) = NULL; - unsigned int el1r; - if (!vcpu_get_flag(vcpu, SYSREGS_ON_CPU)) - goto memory_read; + switch (reg) { + case SCTLR_EL1: val = read_sysreg_s(SYS_SCTLR_EL12); break; + case CPACR_EL1: val = read_sysreg_s(SYS_CPACR_EL12); break; + case TTBR0_EL1: val = read_sysreg_s(SYS_TTBR0_EL12); break; + case TTBR1_EL1: val = read_sysreg_s(SYS_TTBR1_EL12); break; + case TCR_EL1: val = read_sysreg_s(SYS_TCR_EL12); break; + case TCR2_EL1: val = read_sysreg_s(SYS_TCR2_EL12); break; + case PIR_EL1: val = read_sysreg_s(SYS_PIR_EL12); break; + case PIRE0_EL1: val = read_sysreg_s(SYS_PIRE0_EL12); break; + case POR_EL1: val = read_sysreg_s(SYS_POR_EL12); break; + case ESR_EL1: val = read_sysreg_s(SYS_ESR_EL12); break; + case AFSR0_EL1: val = read_sysreg_s(SYS_AFSR0_EL12); break; + case AFSR1_EL1: val = read_sysreg_s(SYS_AFSR1_EL12); break; + case FAR_EL1: val = read_sysreg_s(SYS_FAR_EL12); break; + case MAIR_EL1: val = read_sysreg_s(SYS_MAIR_EL12); break; + case VBAR_EL1: val = read_sysreg_s(SYS_VBAR_EL12); break; + case CONTEXTIDR_EL1: val = read_sysreg_s(SYS_CONTEXTIDR_EL12);break; + case AMAIR_EL1: val = read_sysreg_s(SYS_AMAIR_EL12); break; + case CNTKCTL_EL1: val = read_sysreg_s(SYS_CNTKCTL_EL12); break; + case ELR_EL1: val = read_sysreg_s(SYS_ELR_EL12); break; + case SPSR_EL1: val = read_sysreg_s(SYS_SPSR_EL12); break; + case ZCR_EL1: val = read_sysreg_s(SYS_ZCR_EL12); break; + case SCTLR2_EL1: val = read_sysreg_s(SYS_SCTLR2_EL12); break; + case TPIDR_EL0: val = read_sysreg_s(SYS_TPIDR_EL0); break; + case TPIDRRO_EL0: val = read_sysreg_s(SYS_TPIDRRO_EL0); break; + case TPIDR_EL1: val = read_sysreg_s(SYS_TPIDR_EL1); break; + case PAR_EL1: val = read_sysreg_par(); break; + case DACR32_EL2: val = read_sysreg_s(SYS_DACR32_EL2); break; + case IFSR32_EL2: val = read_sysreg_s(SYS_IFSR32_EL2); break; + case DBGVCR32_EL2: val = read_sysreg_s(SYS_DBGVCR32_EL2); break; + default: WARN_ON_ONCE(1); + } - if (unlikely(get_el2_to_el1_mapping(reg, &el1r, &xlate))) { - if (!is_hyp_ctxt(vcpu)) - goto memory_read; + return val; +} + +static void write_sr_to_cpu(enum vcpu_sysreg reg, u64 val) +{ + switch (reg) { + case SCTLR_EL1: write_sysreg_s(val, SYS_SCTLR_EL12); break; + case CPACR_EL1: write_sysreg_s(val, SYS_CPACR_EL12); break; + case TTBR0_EL1: write_sysreg_s(val, SYS_TTBR0_EL12); break; + case TTBR1_EL1: write_sysreg_s(val, SYS_TTBR1_EL12); break; + case TCR_EL1: write_sysreg_s(val, SYS_TCR_EL12); break; + case TCR2_EL1: write_sysreg_s(val, SYS_TCR2_EL12); break; + case PIR_EL1: write_sysreg_s(val, SYS_PIR_EL12); break; + case PIRE0_EL1: write_sysreg_s(val, SYS_PIRE0_EL12); break; + case POR_EL1: write_sysreg_s(val, SYS_POR_EL12); break; + case ESR_EL1: write_sysreg_s(val, SYS_ESR_EL12); break; + case AFSR0_EL1: write_sysreg_s(val, SYS_AFSR0_EL12); break; + case AFSR1_EL1: write_sysreg_s(val, SYS_AFSR1_EL12); break; + case FAR_EL1: write_sysreg_s(val, SYS_FAR_EL12); break; + case MAIR_EL1: write_sysreg_s(val, SYS_MAIR_EL12); break; + case VBAR_EL1: write_sysreg_s(val, SYS_VBAR_EL12); break; + case CONTEXTIDR_EL1: write_sysreg_s(val, SYS_CONTEXTIDR_EL12);break; + case AMAIR_EL1: write_sysreg_s(val, SYS_AMAIR_EL12); break; + case CNTKCTL_EL1: write_sysreg_s(val, SYS_CNTKCTL_EL12); break; + case ELR_EL1: write_sysreg_s(val, SYS_ELR_EL12); break; + case SPSR_EL1: write_sysreg_s(val, SYS_SPSR_EL12); break; + case ZCR_EL1: write_sysreg_s(val, SYS_ZCR_EL12); break; + case SCTLR2_EL1: write_sysreg_s(val, SYS_SCTLR2_EL12); break; + case TPIDR_EL0: write_sysreg_s(val, SYS_TPIDR_EL0); break; + case TPIDRRO_EL0: write_sysreg_s(val, SYS_TPIDRRO_EL0); break; + case TPIDR_EL1: write_sysreg_s(val, SYS_TPIDR_EL1); break; + case PAR_EL1: write_sysreg_s(val, SYS_PAR_EL1); break; + case DACR32_EL2: write_sysreg_s(val, SYS_DACR32_EL2); break; + case IFSR32_EL2: write_sysreg_s(val, SYS_IFSR32_EL2); break; + case DBGVCR32_EL2: write_sysreg_s(val, SYS_DBGVCR32_EL2); break; + default: WARN_ON_ONCE(1); + } +} + +u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, enum vcpu_sysreg reg) +{ + struct sr_loc loc = {}; + + locate_register(vcpu, reg, &loc); + + WARN_ON_ONCE(!has_vhe() && loc.loc != SR_LOC_MEMORY); + + if (loc.loc & SR_LOC_SPECIAL) { + u64 val; + + WARN_ON_ONCE(loc.loc & ~SR_LOC_SPECIAL); /* - * CNTHCTL_EL2 requires some special treatment to - * account for the bits that can be set via CNTKCTL_EL1. + * CNTHCTL_EL2 requires some special treatment to account + * for the bits that can be set via CNTKCTL_EL1 when E2H==1. */ switch (reg) { case CNTHCTL_EL2: - if (vcpu_el2_e2h_is_set(vcpu)) { - val = read_sysreg_el1(SYS_CNTKCTL); - val &= CNTKCTL_VALID_BITS; - val |= __vcpu_sys_reg(vcpu, reg) & ~CNTKCTL_VALID_BITS; - return val; - } - break; + val = read_sysreg_el1(SYS_CNTKCTL); + val &= CNTKCTL_VALID_BITS; + val |= __vcpu_sys_reg(vcpu, reg) & ~CNTKCTL_VALID_BITS; + return val; + default: + WARN_ON_ONCE(1); } + } - /* - * If this register does not have an EL1 counterpart, - * then read the stored EL2 version. - */ - if (reg == el1r) - goto memory_read; + if (loc.loc & SR_LOC_LOADED) { + enum vcpu_sysreg map_reg = reg; - /* - * If we have a non-VHE guest and that the sysreg - * requires translation to be used at EL1, use the - * in-memory copy instead. - */ - if (!vcpu_el2_e2h_is_set(vcpu) && xlate) - goto memory_read; + if (loc.loc & SR_LOC_MAPPED) + map_reg = loc.map_reg; - /* Get the current version of the EL1 counterpart. */ - WARN_ON(!__vcpu_read_sys_reg_from_cpu(el1r, &val)); - if (reg >= __SANITISED_REG_START__) - val = kvm_vcpu_apply_reg_masks(vcpu, reg, val); - - return val; - } + if (!(loc.loc & SR_LOC_XLATED)) { + u64 val = read_sr_from_cpu(map_reg); - /* EL1 register can't be on the CPU if the guest is in vEL2. */ - if (unlikely(is_hyp_ctxt(vcpu))) - goto memory_read; + if (reg >= __SANITISED_REG_START__) + val = kvm_vcpu_apply_reg_masks(vcpu, reg, val); - if (__vcpu_read_sys_reg_from_cpu(reg, &val)) - return val; + return val; + } + } -memory_read: return __vcpu_sys_reg(vcpu, reg); } -void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg) +void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, enum vcpu_sysreg reg) { - u64 (*xlate)(u64) = NULL; - unsigned int el1r; + struct sr_loc loc = {}; - if (!vcpu_get_flag(vcpu, SYSREGS_ON_CPU)) - goto memory_write; + locate_register(vcpu, reg, &loc); - if (unlikely(get_el2_to_el1_mapping(reg, &el1r, &xlate))) { - if (!is_hyp_ctxt(vcpu)) - goto memory_write; + WARN_ON_ONCE(!has_vhe() && loc.loc != SR_LOC_MEMORY); - /* - * Always store a copy of the write to memory to avoid having - * to reverse-translate virtual EL2 system registers for a - * non-VHE guest hypervisor. - */ - __vcpu_assign_sys_reg(vcpu, reg, val); + if (loc.loc & SR_LOC_SPECIAL) { + + WARN_ON_ONCE(loc.loc & ~SR_LOC_SPECIAL); switch (reg) { case CNTHCTL_EL2: /* - * If E2H=0, CNHTCTL_EL2 is a pure shadow register. - * Otherwise, some of the bits are backed by + * If E2H=1, some of the bits are backed by * CNTKCTL_EL1, while the rest is kept in memory. * Yes, this is fun stuff. */ - if (vcpu_el2_e2h_is_set(vcpu)) - write_sysreg_el1(val, SYS_CNTKCTL); - return; + write_sysreg_el1(val, SYS_CNTKCTL); + break; + default: + WARN_ON_ONCE(1); } + } - /* No EL1 counterpart? We're done here.? */ - if (reg == el1r) - return; + if (loc.loc & SR_LOC_LOADED) { + enum vcpu_sysreg map_reg = reg; + u64 xlated_val; - if (!vcpu_el2_e2h_is_set(vcpu) && xlate) - val = xlate(val); + if (reg >= __SANITISED_REG_START__) + val = kvm_vcpu_apply_reg_masks(vcpu, reg, val); - /* Redirect this to the EL1 version of the register. */ - WARN_ON(!__vcpu_write_sys_reg_to_cpu(val, el1r)); - return; - } + if (loc.loc & SR_LOC_MAPPED) + map_reg = loc.map_reg; - /* EL1 register can't be on the CPU if the guest is in vEL2. */ - if (unlikely(is_hyp_ctxt(vcpu))) - goto memory_write; + if (loc.loc & SR_LOC_XLATED) + xlated_val = loc.xlate(val); + else + xlated_val = val; - if (__vcpu_write_sys_reg_to_cpu(val, reg)) - return; + write_sr_to_cpu(map_reg, xlated_val); + + /* + * Fall through to write the backing store anyway, which + * allows translated registers to be directly read without a + * reverse translation. + */ + } -memory_write: __vcpu_assign_sys_reg(vcpu, reg, val); } @@ -1584,6 +1710,7 @@ static u8 pmuver_to_perfmon(u8 pmuver) } static u64 sanitise_id_aa64pfr0_el1(const struct kvm_vcpu *vcpu, u64 val); +static u64 sanitise_id_aa64pfr1_el1(const struct kvm_vcpu *vcpu, u64 val); static u64 sanitise_id_aa64dfr0_el1(const struct kvm_vcpu *vcpu, u64 val); /* Read a sanitised cpufeature ID register by sys_reg_desc */ @@ -1606,19 +1733,7 @@ static u64 __kvm_read_sanitised_id_reg(const struct kvm_vcpu *vcpu, val = sanitise_id_aa64pfr0_el1(vcpu, val); break; case SYS_ID_AA64PFR1_EL1: - if (!kvm_has_mte(vcpu->kvm)) { - val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MTE); - val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MTE_frac); - } - - val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_SME); - val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_RNDR_trap); - val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_NMI); - val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_GCS); - val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_THE); - val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MTEX); - val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_PFAR); - val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MPAM_frac); + val = sanitise_id_aa64pfr1_el1(vcpu, val); break; case SYS_ID_AA64PFR2_EL1: val &= ID_AA64PFR2_EL1_FPMR | @@ -1628,18 +1743,18 @@ static u64 __kvm_read_sanitised_id_reg(const struct kvm_vcpu *vcpu, break; case SYS_ID_AA64ISAR1_EL1: if (!vcpu_has_ptrauth(vcpu)) - val &= ~(ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_APA) | - ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_API) | - ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_GPA) | - ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_GPI)); + val &= ~(ID_AA64ISAR1_EL1_APA | + ID_AA64ISAR1_EL1_API | + ID_AA64ISAR1_EL1_GPA | + ID_AA64ISAR1_EL1_GPI); break; case SYS_ID_AA64ISAR2_EL1: if (!vcpu_has_ptrauth(vcpu)) - val &= ~(ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_APA3) | - ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_GPA3)); + val &= ~(ID_AA64ISAR2_EL1_APA3 | + ID_AA64ISAR2_EL1_GPA3); if (!cpus_have_final_cap(ARM64_HAS_WFXT) || has_broken_cntvoff()) - val &= ~ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_WFxT); + val &= ~ID_AA64ISAR2_EL1_WFxT; break; case SYS_ID_AA64ISAR3_EL1: val &= ID_AA64ISAR3_EL1_FPRCVT | ID_AA64ISAR3_EL1_FAMINMAX; @@ -1655,7 +1770,7 @@ static u64 __kvm_read_sanitised_id_reg(const struct kvm_vcpu *vcpu, ID_AA64MMFR3_EL1_S1PIE; break; case SYS_ID_MMFR4_EL1: - val &= ~ARM64_FEATURE_MASK(ID_MMFR4_EL1_CCIDX); + val &= ~ID_MMFR4_EL1_CCIDX; break; } @@ -1836,6 +1951,31 @@ static u64 sanitise_id_aa64pfr0_el1(const struct kvm_vcpu *vcpu, u64 val) return val; } +static u64 sanitise_id_aa64pfr1_el1(const struct kvm_vcpu *vcpu, u64 val) +{ + u64 pfr0 = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1); + + if (!kvm_has_mte(vcpu->kvm)) { + val &= ~ID_AA64PFR1_EL1_MTE; + val &= ~ID_AA64PFR1_EL1_MTE_frac; + } + + if (!(cpus_have_final_cap(ARM64_HAS_RASV1P1_EXTN) && + SYS_FIELD_GET(ID_AA64PFR0_EL1, RAS, pfr0) == ID_AA64PFR0_EL1_RAS_IMP)) + val &= ~ID_AA64PFR1_EL1_RAS_frac; + + val &= ~ID_AA64PFR1_EL1_SME; + val &= ~ID_AA64PFR1_EL1_RNDR_trap; + val &= ~ID_AA64PFR1_EL1_NMI; + val &= ~ID_AA64PFR1_EL1_GCS; + val &= ~ID_AA64PFR1_EL1_THE; + val &= ~ID_AA64PFR1_EL1_MTEX; + val &= ~ID_AA64PFR1_EL1_PFAR; + val &= ~ID_AA64PFR1_EL1_MPAM_frac; + + return val; +} + static u64 sanitise_id_aa64dfr0_el1(const struct kvm_vcpu *vcpu, u64 val) { val = ID_REG_LIMIT_FIELD_ENUM(val, ID_AA64DFR0_EL1, DebugVer, V8P8); @@ -2697,6 +2837,18 @@ static bool access_ras(struct kvm_vcpu *vcpu, struct kvm *kvm = vcpu->kvm; switch(reg_to_encoding(r)) { + case SYS_ERXPFGCDN_EL1: + case SYS_ERXPFGCTL_EL1: + case SYS_ERXPFGF_EL1: + case SYS_ERXMISC2_EL1: + case SYS_ERXMISC3_EL1: + if (!(kvm_has_feat(kvm, ID_AA64PFR0_EL1, RAS, V1P1) || + (kvm_has_feat_enum(kvm, ID_AA64PFR0_EL1, RAS, IMP) && + kvm_has_feat(kvm, ID_AA64PFR1_EL1, RAS_frac, RASv1p1)))) { + kvm_inject_undefined(vcpu); + return false; + } + break; default: if (!kvm_has_feat(kvm, ID_AA64PFR0_EL1, RAS, IMP)) { kvm_inject_undefined(vcpu); @@ -2929,7 +3081,6 @@ static const struct sys_reg_desc sys_reg_descs[] = { ~(ID_AA64PFR0_EL1_AMU | ID_AA64PFR0_EL1_MPAM | ID_AA64PFR0_EL1_SVE | - ID_AA64PFR0_EL1_RAS | ID_AA64PFR0_EL1_AdvSIMD | ID_AA64PFR0_EL1_FP)), ID_FILTERED(ID_AA64PFR1_EL1, id_aa64pfr1_el1, @@ -2943,7 +3094,6 @@ static const struct sys_reg_desc sys_reg_descs[] = { ID_AA64PFR1_EL1_SME | ID_AA64PFR1_EL1_RES0 | ID_AA64PFR1_EL1_MPAM_frac | - ID_AA64PFR1_EL1_RAS_frac | ID_AA64PFR1_EL1_MTE)), ID_WRITABLE(ID_AA64PFR2_EL1, ID_AA64PFR2_EL1_FPMR | @@ -3063,8 +3213,13 @@ static const struct sys_reg_desc sys_reg_descs[] = { { SYS_DESC(SYS_ERXCTLR_EL1), access_ras }, { SYS_DESC(SYS_ERXSTATUS_EL1), access_ras }, { SYS_DESC(SYS_ERXADDR_EL1), access_ras }, + { SYS_DESC(SYS_ERXPFGF_EL1), access_ras }, + { SYS_DESC(SYS_ERXPFGCTL_EL1), access_ras }, + { SYS_DESC(SYS_ERXPFGCDN_EL1), access_ras }, { SYS_DESC(SYS_ERXMISC0_EL1), access_ras }, { SYS_DESC(SYS_ERXMISC1_EL1), access_ras }, + { SYS_DESC(SYS_ERXMISC2_EL1), access_ras }, + { SYS_DESC(SYS_ERXMISC3_EL1), access_ras }, MTE_REG(TFSR_EL1), MTE_REG(TFSRE0_EL1), diff --git a/arch/arm64/kvm/vgic/vgic-mmio-v3.c b/arch/arm64/kvm/vgic/vgic-mmio-v3.c index a3ef185209e9..70d50c77e5dc 100644 --- a/arch/arm64/kvm/vgic/vgic-mmio-v3.c +++ b/arch/arm64/kvm/vgic/vgic-mmio-v3.c @@ -50,6 +50,14 @@ bool vgic_has_its(struct kvm *kvm) bool vgic_supports_direct_msis(struct kvm *kvm) { + /* + * Deliberately conflate vLPI and vSGI support on GICv4.1 hardware, + * indirectly allowing userspace to control whether or not vPEs are + * allocated for the VM. + */ + if (system_supports_direct_sgis() && !vgic_supports_direct_sgis(kvm)) + return false; + return kvm_vgic_global_state.has_gicv4 && vgic_has_its(kvm); } diff --git a/arch/arm64/kvm/vgic/vgic-mmio.c b/arch/arm64/kvm/vgic/vgic-mmio.c index e416e433baff..a573b1f0c6cb 100644 --- a/arch/arm64/kvm/vgic/vgic-mmio.c +++ b/arch/arm64/kvm/vgic/vgic-mmio.c @@ -1091,7 +1091,7 @@ int vgic_register_dist_iodev(struct kvm *kvm, gpa_t dist_base_address, len = vgic_v3_init_dist_iodev(io_device); break; default: - BUG_ON(1); + BUG(); } io_device->base_addr = dist_base_address; diff --git a/arch/arm64/kvm/vgic/vgic.h b/arch/arm64/kvm/vgic/vgic.h index 1384a04c0784..de1c1d3261c3 100644 --- a/arch/arm64/kvm/vgic/vgic.h +++ b/arch/arm64/kvm/vgic/vgic.h @@ -396,15 +396,7 @@ bool vgic_supports_direct_sgis(struct kvm *kvm); static inline bool vgic_supports_direct_irqs(struct kvm *kvm) { - /* - * Deliberately conflate vLPI and vSGI support on GICv4.1 hardware, - * indirectly allowing userspace to control whether or not vPEs are - * allocated for the VM. - */ - if (system_supports_direct_sgis()) - return vgic_supports_direct_sgis(kvm); - - return vgic_supports_direct_msis(kvm); + return vgic_supports_direct_msis(kvm) || vgic_supports_direct_sgis(kvm); } int vgic_v4_init(struct kvm *kvm); diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps index ef0b7946f5a4..9ff5cdbd2759 100644 --- a/arch/arm64/tools/cpucaps +++ b/arch/arm64/tools/cpucaps @@ -53,6 +53,7 @@ HAS_S1PIE HAS_S1POE HAS_SCTLR2 HAS_RAS_EXTN +HAS_RASV1P1_EXTN HAS_RNG HAS_SB HAS_STAGE2_FWB |