diff options
Diffstat (limited to 'arch/arm64/include/asm/kvm_emulate.h')
| -rw-r--r-- | arch/arm64/include/asm/kvm_emulate.h | 379 |
1 files changed, 302 insertions, 77 deletions
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index fd418955e31e..c9eab316398e 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -11,12 +11,14 @@ #ifndef __ARM64_KVM_EMULATE_H__ #define __ARM64_KVM_EMULATE_H__ +#include <linux/bitfield.h> #include <linux/kvm_host.h> #include <asm/debug-monitors.h> #include <asm/esr.h> #include <asm/kvm_arm.h> #include <asm/kvm_hyp.h> +#include <asm/kvm_nested.h> #include <asm/ptrace.h> #include <asm/cputype.h> #include <asm/virt.h> @@ -33,60 +35,83 @@ enum exception_type { except_type_serror = 0x180, }; +#define kvm_exception_type_names \ + { except_type_sync, "SYNC" }, \ + { except_type_irq, "IRQ" }, \ + { except_type_fiq, "FIQ" }, \ + { except_type_serror, "SERROR" } + bool kvm_condition_valid32(const struct kvm_vcpu *vcpu); void kvm_skip_instr32(struct kvm_vcpu *vcpu); void kvm_inject_undefined(struct kvm_vcpu *vcpu); -void kvm_inject_vabt(struct kvm_vcpu *vcpu); -void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr); -void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr); +int kvm_inject_serror_esr(struct kvm_vcpu *vcpu, u64 esr); +int kvm_inject_sea(struct kvm_vcpu *vcpu, bool iabt, u64 addr); +void kvm_inject_size_fault(struct kvm_vcpu *vcpu); +static inline int kvm_inject_sea_dabt(struct kvm_vcpu *vcpu, u64 addr) +{ + return kvm_inject_sea(vcpu, false, addr); +} + +static inline int kvm_inject_sea_iabt(struct kvm_vcpu *vcpu, u64 addr) +{ + return kvm_inject_sea(vcpu, true, addr); +} + +static inline int kvm_inject_serror(struct kvm_vcpu *vcpu) +{ + /* + * ESR_ELx.ISV (later renamed to IDS) indicates whether or not + * ESR_ELx.ISS contains IMPLEMENTATION DEFINED syndrome information. + * + * Set the bit when injecting an SError w/o an ESR to indicate ISS + * does not follow the architected format. + */ + return kvm_inject_serror_esr(vcpu, ESR_ELx_ISV); +} + +void kvm_vcpu_wfi(struct kvm_vcpu *vcpu); + +void kvm_emulate_nested_eret(struct kvm_vcpu *vcpu); +int kvm_inject_nested_sync(struct kvm_vcpu *vcpu, u64 esr_el2); +int kvm_inject_nested_irq(struct kvm_vcpu *vcpu); +int kvm_inject_nested_sea(struct kvm_vcpu *vcpu, bool iabt, u64 addr); +int kvm_inject_nested_serror(struct kvm_vcpu *vcpu, u64 esr); + +static inline void kvm_inject_nested_sve_trap(struct kvm_vcpu *vcpu) +{ + u64 esr = FIELD_PREP(ESR_ELx_EC_MASK, ESR_ELx_EC_SVE) | + ESR_ELx_IL; + + kvm_inject_nested_sync(vcpu, esr); +} + +#if defined(__KVM_VHE_HYPERVISOR__) || defined(__KVM_NVHE_HYPERVISOR__) static __always_inline bool vcpu_el1_is_32bit(struct kvm_vcpu *vcpu) { return !(vcpu->arch.hcr_el2 & HCR_RW); } +#else +static __always_inline bool vcpu_el1_is_32bit(struct kvm_vcpu *vcpu) +{ + return vcpu_has_feature(vcpu, KVM_ARM_VCPU_EL1_32BIT); +} +#endif static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu) { - vcpu->arch.hcr_el2 = HCR_GUEST_FLAGS; - if (is_kernel_in_hyp_mode()) - vcpu->arch.hcr_el2 |= HCR_E2H; - if (cpus_have_const_cap(ARM64_HAS_RAS_EXTN)) { - /* route synchronous external abort exceptions to EL2 */ - vcpu->arch.hcr_el2 |= HCR_TEA; - /* trap error record accesses */ - vcpu->arch.hcr_el2 |= HCR_TERR; - } - - if (cpus_have_const_cap(ARM64_HAS_STAGE2_FWB)) { - vcpu->arch.hcr_el2 |= HCR_FWB; - } else { - /* - * For non-FWB CPUs, we trap VM ops (HCR_EL2.TVM) until M+C - * get set in SCTLR_EL1 such that we can detect when the guest - * MMU gets turned on and do the necessary cache maintenance - * then. - */ - vcpu->arch.hcr_el2 |= HCR_TVM; - } - - if (test_bit(KVM_ARM_VCPU_EL1_32BIT, vcpu->arch.features)) - vcpu->arch.hcr_el2 &= ~HCR_RW; + if (!vcpu_has_run_once(vcpu)) + vcpu->arch.hcr_el2 = HCR_GUEST_FLAGS; /* - * TID3: trap feature register accesses that we virtualise. - * For now this is conditional, since no AArch32 feature regs - * are currently virtualised. + * For non-FWB CPUs, we trap VM ops (HCR_EL2.TVM) until M+C + * get set in SCTLR_EL1 such that we can detect when the guest + * MMU gets turned on and do the necessary cache maintenance + * then. */ - if (!vcpu_el1_is_32bit(vcpu)) - vcpu->arch.hcr_el2 |= HCR_TID3; - - if (cpus_have_const_cap(ARM64_MISMATCHED_CACHE_TYPE) || - vcpu_el1_is_32bit(vcpu)) - vcpu->arch.hcr_el2 |= HCR_TID2; - - if (kvm_has_mte(vcpu->kvm)) - vcpu->arch.hcr_el2 |= HCR_ATA; + if (!cpus_have_final_cap(ARM64_HAS_STAGE2_FWB)) + vcpu->arch.hcr_el2 |= HCR_TVM; } static inline unsigned long *vcpu_hcr(struct kvm_vcpu *vcpu) @@ -110,16 +135,6 @@ static inline void vcpu_set_wfx_traps(struct kvm_vcpu *vcpu) vcpu->arch.hcr_el2 |= HCR_TWI; } -static inline void vcpu_ptrauth_enable(struct kvm_vcpu *vcpu) -{ - vcpu->arch.hcr_el2 |= (HCR_API | HCR_APK); -} - -static inline void vcpu_ptrauth_disable(struct kvm_vcpu *vcpu) -{ - vcpu->arch.hcr_el2 &= ~(HCR_API | HCR_APK); -} - static inline unsigned long vcpu_get_vsesr(struct kvm_vcpu *vcpu) { return vcpu->arch.vsesr_el2; @@ -176,6 +191,95 @@ static __always_inline void vcpu_set_reg(struct kvm_vcpu *vcpu, u8 reg_num, vcpu_gp_regs(vcpu)->regs[reg_num] = val; } +static inline bool vcpu_is_el2_ctxt(const struct kvm_cpu_context *ctxt) +{ + switch (ctxt->regs.pstate & (PSR_MODE32_BIT | PSR_MODE_MASK)) { + case PSR_MODE_EL2h: + case PSR_MODE_EL2t: + return true; + default: + return false; + } +} + +static inline bool vcpu_is_el2(const struct kvm_vcpu *vcpu) +{ + return vcpu_is_el2_ctxt(&vcpu->arch.ctxt); +} + +static inline bool vcpu_el2_e2h_is_set(const struct kvm_vcpu *vcpu) +{ + return (!cpus_have_final_cap(ARM64_HAS_HCR_NV1) || + (__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_E2H)); +} + +static inline bool vcpu_el2_tge_is_set(const struct kvm_vcpu *vcpu) +{ + return ctxt_sys_reg(&vcpu->arch.ctxt, HCR_EL2) & HCR_TGE; +} + +static inline bool vcpu_el2_amo_is_set(const struct kvm_vcpu *vcpu) +{ + /* + * DDI0487L.b Known Issue D22105 + * + * When executing at EL2 and HCR_EL2.{E2H,TGE} = {1, 0} it is + * IMPLEMENTATION DEFINED whether the effective value of HCR_EL2.AMO + * is the value programmed or 1. + * + * Make the implementation choice of treating the effective value as 1 as + * we cannot subsequently catch changes to TGE or AMO that would + * otherwise lead to the SError becoming deliverable. + */ + if (vcpu_is_el2(vcpu) && vcpu_el2_e2h_is_set(vcpu) && !vcpu_el2_tge_is_set(vcpu)) + return true; + + return ctxt_sys_reg(&vcpu->arch.ctxt, HCR_EL2) & HCR_AMO; +} + +static inline bool is_hyp_ctxt(const struct kvm_vcpu *vcpu) +{ + bool e2h, tge; + u64 hcr; + + if (!vcpu_has_nv(vcpu)) + return false; + + hcr = __vcpu_sys_reg(vcpu, HCR_EL2); + + e2h = (hcr & HCR_E2H); + tge = (hcr & HCR_TGE); + + /* + * We are in a hypervisor context if the vcpu mode is EL2 or + * E2H and TGE bits are set. The latter means we are in the user space + * of the VHE kernel. ARMv8.1 ARM describes this as 'InHost' + * + * Note that the HCR_EL2.{E2H,TGE}={0,1} isn't really handled in the + * rest of the KVM code, and will result in a misbehaving guest. + */ + return vcpu_is_el2(vcpu) || (e2h && tge) || tge; +} + +static inline bool vcpu_is_host_el0(const struct kvm_vcpu *vcpu) +{ + return is_hyp_ctxt(vcpu) && !vcpu_is_el2(vcpu); +} + +static inline bool is_nested_ctxt(struct kvm_vcpu *vcpu) +{ + return vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu); +} + +static inline bool vserror_state_is_nested(struct kvm_vcpu *vcpu) +{ + if (!is_nested_ctxt(vcpu)) + return false; + + return vcpu_el2_amo_is_set(vcpu) || + (__vcpu_sys_reg(vcpu, HCRX_EL2) & HCRX_EL2_TMEA); +} + /* * The layout of SPSR for an AArch32 state is different when observed from an * AArch64 SPSR_ELx or an AArch32 SPSR_*. This function generates the AArch32 @@ -222,14 +326,27 @@ static inline bool vcpu_mode_priv(const struct kvm_vcpu *vcpu) return mode != PSR_MODE_EL0t; } -static __always_inline u32 kvm_vcpu_get_esr(const struct kvm_vcpu *vcpu) +static __always_inline u64 kvm_vcpu_get_esr(const struct kvm_vcpu *vcpu) { return vcpu->arch.fault.esr_el2; } +static inline bool guest_hyp_wfx_traps_enabled(const struct kvm_vcpu *vcpu) +{ + u64 esr = kvm_vcpu_get_esr(vcpu); + bool is_wfe = !!(esr & ESR_ELx_WFx_ISS_WFE); + u64 hcr_el2 = __vcpu_sys_reg(vcpu, HCR_EL2); + + if (!vcpu_has_nv(vcpu) || vcpu_is_el2(vcpu)) + return false; + + return ((is_wfe && (hcr_el2 & HCR_TWE)) || + (!is_wfe && (hcr_el2 & HCR_TWI))); +} + static __always_inline int kvm_vcpu_get_condition(const struct kvm_vcpu *vcpu) { - u32 esr = kvm_vcpu_get_esr(vcpu); + u64 esr = kvm_vcpu_get_esr(vcpu); if (esr & ESR_ELx_CV) return (esr & ESR_ELx_COND_MASK) >> ESR_ELx_COND_SHIFT; @@ -244,7 +361,12 @@ static __always_inline unsigned long kvm_vcpu_get_hfar(const struct kvm_vcpu *vc static __always_inline phys_addr_t kvm_vcpu_get_fault_ipa(const struct kvm_vcpu *vcpu) { - return ((phys_addr_t)vcpu->arch.fault.hpfar_el2 & HPFAR_MASK) << 8; + u64 hpfar = vcpu->arch.fault.hpfar_el2; + + if (unlikely(!(hpfar & HPFAR_EL2_NS))) + return INVALID_GPA; + + return FIELD_GET(HPFAR_EL2_FIPA, hpfar) << 12; } static inline u64 kvm_vcpu_get_disr(const struct kvm_vcpu *vcpu) @@ -329,29 +451,34 @@ static __always_inline u8 kvm_vcpu_trap_get_fault(const struct kvm_vcpu *vcpu) return kvm_vcpu_get_esr(vcpu) & ESR_ELx_FSC; } -static __always_inline u8 kvm_vcpu_trap_get_fault_type(const struct kvm_vcpu *vcpu) +static inline +bool kvm_vcpu_trap_is_permission_fault(const struct kvm_vcpu *vcpu) +{ + return esr_fsc_is_permission_fault(kvm_vcpu_get_esr(vcpu)); +} + +static inline +bool kvm_vcpu_trap_is_translation_fault(const struct kvm_vcpu *vcpu) { - return kvm_vcpu_get_esr(vcpu) & ESR_ELx_FSC_TYPE; + return esr_fsc_is_translation_fault(kvm_vcpu_get_esr(vcpu)); } -static __always_inline u8 kvm_vcpu_trap_get_fault_level(const struct kvm_vcpu *vcpu) +static inline +u64 kvm_vcpu_trap_get_perm_fault_granule(const struct kvm_vcpu *vcpu) { - return kvm_vcpu_get_esr(vcpu) & ESR_ELx_FSC_LEVEL; + unsigned long esr = kvm_vcpu_get_esr(vcpu); + + BUG_ON(!esr_fsc_is_permission_fault(esr)); + return BIT(ARM64_HW_PGTABLE_LEVEL_SHIFT(esr & ESR_ELx_FSC_LEVEL)); } static __always_inline bool kvm_vcpu_abt_issea(const struct kvm_vcpu *vcpu) { switch (kvm_vcpu_trap_get_fault(vcpu)) { - case FSC_SEA: - case FSC_SEA_TTW0: - case FSC_SEA_TTW1: - case FSC_SEA_TTW2: - case FSC_SEA_TTW3: - case FSC_SECC: - case FSC_SECC_TTW0: - case FSC_SECC_TTW1: - case FSC_SECC_TTW2: - case FSC_SECC_TTW3: + case ESR_ELx_FSC_EXTABT: + case ESR_ELx_FSC_SEA_TTW(-1) ... ESR_ELx_FSC_SEA_TTW(3): + case ESR_ELx_FSC_SECC: + case ESR_ELx_FSC_SECC_TTW(-1) ... ESR_ELx_FSC_SECC_TTW(3): return true; default: return false; @@ -360,14 +487,27 @@ static __always_inline bool kvm_vcpu_abt_issea(const struct kvm_vcpu *vcpu) static __always_inline int kvm_vcpu_sys_get_rt(struct kvm_vcpu *vcpu) { - u32 esr = kvm_vcpu_get_esr(vcpu); + u64 esr = kvm_vcpu_get_esr(vcpu); return ESR_ELx_SYS64_ISS_RT(esr); } static inline bool kvm_is_write_fault(struct kvm_vcpu *vcpu) { - if (kvm_vcpu_abt_iss1tw(vcpu)) - return true; + if (kvm_vcpu_abt_iss1tw(vcpu)) { + /* + * Only a permission fault on a S1PTW should be + * considered as a write. Otherwise, page tables baked + * in a read-only memslot will result in an exception + * being delivered in the guest. + * + * The drawback is that we end-up faulting twice if the + * guest is using any of HW AF/DB: a translation fault + * to map the page containing the PT (read only at + * first), then a permission fault to allow the flags + * to be set. + */ + return kvm_vcpu_trap_is_permission_fault(vcpu); + } if (kvm_vcpu_trap_is_iabt(vcpu)) return false; @@ -377,7 +517,7 @@ static inline bool kvm_is_write_fault(struct kvm_vcpu *vcpu) static inline unsigned long kvm_vcpu_get_mpidr_aff(struct kvm_vcpu *vcpu) { - return vcpu_read_sys_reg(vcpu, MPIDR_EL1) & MPIDR_HWID_BITMASK; + return __vcpu_sys_reg(vcpu, MPIDR_EL1) & MPIDR_HWID_BITMASK; } static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu) @@ -385,18 +525,29 @@ static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu) if (vcpu_mode_is_32bit(vcpu)) { *vcpu_cpsr(vcpu) |= PSR_AA32_E_BIT; } else { - u64 sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL1); - sctlr |= (1 << 25); - vcpu_write_sys_reg(vcpu, sctlr, SCTLR_EL1); + enum vcpu_sysreg r; + u64 sctlr; + + r = vcpu_has_nv(vcpu) ? SCTLR_EL2 : SCTLR_EL1; + + sctlr = vcpu_read_sys_reg(vcpu, r); + sctlr |= SCTLR_ELx_EE; + vcpu_write_sys_reg(vcpu, sctlr, r); } } static inline bool kvm_vcpu_is_be(struct kvm_vcpu *vcpu) { + enum vcpu_sysreg r; + u64 bit; + if (vcpu_mode_is_32bit(vcpu)) return !!(*vcpu_cpsr(vcpu) & PSR_AA32_E_BIT); - return !!(vcpu_read_sys_reg(vcpu, SCTLR_EL1) & (1 << 25)); + r = is_hyp_ctxt(vcpu) ? SCTLR_EL2 : SCTLR_EL1; + bit = vcpu_mode_priv(vcpu) ? SCTLR_ELx_EE : SCTLR_EL1_E0E; + + return vcpu_read_sys_reg(vcpu, r) & bit; } static inline unsigned long vcpu_data_guest_to_host(struct kvm_vcpu *vcpu, @@ -463,12 +614,86 @@ static inline unsigned long vcpu_data_host_to_guest(struct kvm_vcpu *vcpu, static __always_inline void kvm_incr_pc(struct kvm_vcpu *vcpu) { - vcpu->arch.flags |= KVM_ARM64_INCREMENT_PC; + WARN_ON(vcpu_get_flag(vcpu, PENDING_EXCEPTION)); + vcpu_set_flag(vcpu, INCREMENT_PC); +} + +#define kvm_pend_exception(v, e) \ + do { \ + WARN_ON(vcpu_get_flag((v), INCREMENT_PC)); \ + vcpu_set_flag((v), PENDING_EXCEPTION); \ + vcpu_set_flag((v), e); \ + } while (0) + +/* + * Returns a 'sanitised' view of CPTR_EL2, translating from nVHE to the VHE + * format if E2H isn't set. + */ +static inline u64 vcpu_sanitised_cptr_el2(const struct kvm_vcpu *vcpu) +{ + u64 cptr = __vcpu_sys_reg(vcpu, CPTR_EL2); + + if (!vcpu_el2_e2h_is_set(vcpu)) + cptr = translate_cptr_el2_to_cpacr_el1(cptr); + + return cptr; +} + +static inline bool ____cptr_xen_trap_enabled(const struct kvm_vcpu *vcpu, + unsigned int xen) +{ + switch (xen) { + case 0b00: + case 0b10: + return true; + case 0b01: + return vcpu_el2_tge_is_set(vcpu) && !vcpu_is_el2(vcpu); + case 0b11: + default: + return false; + } +} + +#define __guest_hyp_cptr_xen_trap_enabled(vcpu, xen) \ + (!vcpu_has_nv(vcpu) ? false : \ + ____cptr_xen_trap_enabled(vcpu, \ + SYS_FIELD_GET(CPACR_EL1, xen, \ + vcpu_sanitised_cptr_el2(vcpu)))) + +static inline bool guest_hyp_fpsimd_traps_enabled(const struct kvm_vcpu *vcpu) +{ + return __guest_hyp_cptr_xen_trap_enabled(vcpu, FPEN); } -static inline bool vcpu_has_feature(struct kvm_vcpu *vcpu, int feature) +static inline bool guest_hyp_sve_traps_enabled(const struct kvm_vcpu *vcpu) { - return test_bit(feature, vcpu->arch.features); + return __guest_hyp_cptr_xen_trap_enabled(vcpu, ZEN); } +static inline void vcpu_set_hcrx(struct kvm_vcpu *vcpu) +{ + struct kvm *kvm = vcpu->kvm; + + if (cpus_have_final_cap(ARM64_HAS_HCX)) { + /* + * In general, all HCRX_EL2 bits are gated by a feature. + * The only reason we can set SMPME without checking any + * feature is that its effects are not directly observable + * from the guest. + */ + vcpu->arch.hcrx_el2 = HCRX_EL2_SMPME; + + if (kvm_has_feat(kvm, ID_AA64ISAR2_EL1, MOPS, IMP)) + vcpu->arch.hcrx_el2 |= (HCRX_EL2_MSCEn | HCRX_EL2_MCE2); + + if (kvm_has_tcr2(kvm)) + vcpu->arch.hcrx_el2 |= HCRX_EL2_TCR2En; + + if (kvm_has_fpmr(kvm)) + vcpu->arch.hcrx_el2 |= HCRX_EL2_EnFPM; + + if (kvm_has_sctlr2(kvm)) + vcpu->arch.hcrx_el2 |= HCRX_EL2_SCTLR2En; + } +} #endif /* __ARM64_KVM_EMULATE_H__ */ |
