summaryrefslogtreecommitdiff
path: root/arch/arm64/include/asm/kvm_emulate.h
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arm64/include/asm/kvm_emulate.h')
-rw-r--r--arch/arm64/include/asm/kvm_emulate.h379
1 files changed, 302 insertions, 77 deletions
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index fd418955e31e..c9eab316398e 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -11,12 +11,14 @@
#ifndef __ARM64_KVM_EMULATE_H__
#define __ARM64_KVM_EMULATE_H__
+#include <linux/bitfield.h>
#include <linux/kvm_host.h>
#include <asm/debug-monitors.h>
#include <asm/esr.h>
#include <asm/kvm_arm.h>
#include <asm/kvm_hyp.h>
+#include <asm/kvm_nested.h>
#include <asm/ptrace.h>
#include <asm/cputype.h>
#include <asm/virt.h>
@@ -33,60 +35,83 @@ enum exception_type {
except_type_serror = 0x180,
};
+#define kvm_exception_type_names \
+ { except_type_sync, "SYNC" }, \
+ { except_type_irq, "IRQ" }, \
+ { except_type_fiq, "FIQ" }, \
+ { except_type_serror, "SERROR" }
+
bool kvm_condition_valid32(const struct kvm_vcpu *vcpu);
void kvm_skip_instr32(struct kvm_vcpu *vcpu);
void kvm_inject_undefined(struct kvm_vcpu *vcpu);
-void kvm_inject_vabt(struct kvm_vcpu *vcpu);
-void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr);
-void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr);
+int kvm_inject_serror_esr(struct kvm_vcpu *vcpu, u64 esr);
+int kvm_inject_sea(struct kvm_vcpu *vcpu, bool iabt, u64 addr);
+void kvm_inject_size_fault(struct kvm_vcpu *vcpu);
+static inline int kvm_inject_sea_dabt(struct kvm_vcpu *vcpu, u64 addr)
+{
+ return kvm_inject_sea(vcpu, false, addr);
+}
+
+static inline int kvm_inject_sea_iabt(struct kvm_vcpu *vcpu, u64 addr)
+{
+ return kvm_inject_sea(vcpu, true, addr);
+}
+
+static inline int kvm_inject_serror(struct kvm_vcpu *vcpu)
+{
+ /*
+ * ESR_ELx.ISV (later renamed to IDS) indicates whether or not
+ * ESR_ELx.ISS contains IMPLEMENTATION DEFINED syndrome information.
+ *
+ * Set the bit when injecting an SError w/o an ESR to indicate ISS
+ * does not follow the architected format.
+ */
+ return kvm_inject_serror_esr(vcpu, ESR_ELx_ISV);
+}
+
+void kvm_vcpu_wfi(struct kvm_vcpu *vcpu);
+
+void kvm_emulate_nested_eret(struct kvm_vcpu *vcpu);
+int kvm_inject_nested_sync(struct kvm_vcpu *vcpu, u64 esr_el2);
+int kvm_inject_nested_irq(struct kvm_vcpu *vcpu);
+int kvm_inject_nested_sea(struct kvm_vcpu *vcpu, bool iabt, u64 addr);
+int kvm_inject_nested_serror(struct kvm_vcpu *vcpu, u64 esr);
+
+static inline void kvm_inject_nested_sve_trap(struct kvm_vcpu *vcpu)
+{
+ u64 esr = FIELD_PREP(ESR_ELx_EC_MASK, ESR_ELx_EC_SVE) |
+ ESR_ELx_IL;
+
+ kvm_inject_nested_sync(vcpu, esr);
+}
+
+#if defined(__KVM_VHE_HYPERVISOR__) || defined(__KVM_NVHE_HYPERVISOR__)
static __always_inline bool vcpu_el1_is_32bit(struct kvm_vcpu *vcpu)
{
return !(vcpu->arch.hcr_el2 & HCR_RW);
}
+#else
+static __always_inline bool vcpu_el1_is_32bit(struct kvm_vcpu *vcpu)
+{
+ return vcpu_has_feature(vcpu, KVM_ARM_VCPU_EL1_32BIT);
+}
+#endif
static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu)
{
- vcpu->arch.hcr_el2 = HCR_GUEST_FLAGS;
- if (is_kernel_in_hyp_mode())
- vcpu->arch.hcr_el2 |= HCR_E2H;
- if (cpus_have_const_cap(ARM64_HAS_RAS_EXTN)) {
- /* route synchronous external abort exceptions to EL2 */
- vcpu->arch.hcr_el2 |= HCR_TEA;
- /* trap error record accesses */
- vcpu->arch.hcr_el2 |= HCR_TERR;
- }
-
- if (cpus_have_const_cap(ARM64_HAS_STAGE2_FWB)) {
- vcpu->arch.hcr_el2 |= HCR_FWB;
- } else {
- /*
- * For non-FWB CPUs, we trap VM ops (HCR_EL2.TVM) until M+C
- * get set in SCTLR_EL1 such that we can detect when the guest
- * MMU gets turned on and do the necessary cache maintenance
- * then.
- */
- vcpu->arch.hcr_el2 |= HCR_TVM;
- }
-
- if (test_bit(KVM_ARM_VCPU_EL1_32BIT, vcpu->arch.features))
- vcpu->arch.hcr_el2 &= ~HCR_RW;
+ if (!vcpu_has_run_once(vcpu))
+ vcpu->arch.hcr_el2 = HCR_GUEST_FLAGS;
/*
- * TID3: trap feature register accesses that we virtualise.
- * For now this is conditional, since no AArch32 feature regs
- * are currently virtualised.
+ * For non-FWB CPUs, we trap VM ops (HCR_EL2.TVM) until M+C
+ * get set in SCTLR_EL1 such that we can detect when the guest
+ * MMU gets turned on and do the necessary cache maintenance
+ * then.
*/
- if (!vcpu_el1_is_32bit(vcpu))
- vcpu->arch.hcr_el2 |= HCR_TID3;
-
- if (cpus_have_const_cap(ARM64_MISMATCHED_CACHE_TYPE) ||
- vcpu_el1_is_32bit(vcpu))
- vcpu->arch.hcr_el2 |= HCR_TID2;
-
- if (kvm_has_mte(vcpu->kvm))
- vcpu->arch.hcr_el2 |= HCR_ATA;
+ if (!cpus_have_final_cap(ARM64_HAS_STAGE2_FWB))
+ vcpu->arch.hcr_el2 |= HCR_TVM;
}
static inline unsigned long *vcpu_hcr(struct kvm_vcpu *vcpu)
@@ -110,16 +135,6 @@ static inline void vcpu_set_wfx_traps(struct kvm_vcpu *vcpu)
vcpu->arch.hcr_el2 |= HCR_TWI;
}
-static inline void vcpu_ptrauth_enable(struct kvm_vcpu *vcpu)
-{
- vcpu->arch.hcr_el2 |= (HCR_API | HCR_APK);
-}
-
-static inline void vcpu_ptrauth_disable(struct kvm_vcpu *vcpu)
-{
- vcpu->arch.hcr_el2 &= ~(HCR_API | HCR_APK);
-}
-
static inline unsigned long vcpu_get_vsesr(struct kvm_vcpu *vcpu)
{
return vcpu->arch.vsesr_el2;
@@ -176,6 +191,95 @@ static __always_inline void vcpu_set_reg(struct kvm_vcpu *vcpu, u8 reg_num,
vcpu_gp_regs(vcpu)->regs[reg_num] = val;
}
+static inline bool vcpu_is_el2_ctxt(const struct kvm_cpu_context *ctxt)
+{
+ switch (ctxt->regs.pstate & (PSR_MODE32_BIT | PSR_MODE_MASK)) {
+ case PSR_MODE_EL2h:
+ case PSR_MODE_EL2t:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static inline bool vcpu_is_el2(const struct kvm_vcpu *vcpu)
+{
+ return vcpu_is_el2_ctxt(&vcpu->arch.ctxt);
+}
+
+static inline bool vcpu_el2_e2h_is_set(const struct kvm_vcpu *vcpu)
+{
+ return (!cpus_have_final_cap(ARM64_HAS_HCR_NV1) ||
+ (__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_E2H));
+}
+
+static inline bool vcpu_el2_tge_is_set(const struct kvm_vcpu *vcpu)
+{
+ return ctxt_sys_reg(&vcpu->arch.ctxt, HCR_EL2) & HCR_TGE;
+}
+
+static inline bool vcpu_el2_amo_is_set(const struct kvm_vcpu *vcpu)
+{
+ /*
+ * DDI0487L.b Known Issue D22105
+ *
+ * When executing at EL2 and HCR_EL2.{E2H,TGE} = {1, 0} it is
+ * IMPLEMENTATION DEFINED whether the effective value of HCR_EL2.AMO
+ * is the value programmed or 1.
+ *
+ * Make the implementation choice of treating the effective value as 1 as
+ * we cannot subsequently catch changes to TGE or AMO that would
+ * otherwise lead to the SError becoming deliverable.
+ */
+ if (vcpu_is_el2(vcpu) && vcpu_el2_e2h_is_set(vcpu) && !vcpu_el2_tge_is_set(vcpu))
+ return true;
+
+ return ctxt_sys_reg(&vcpu->arch.ctxt, HCR_EL2) & HCR_AMO;
+}
+
+static inline bool is_hyp_ctxt(const struct kvm_vcpu *vcpu)
+{
+ bool e2h, tge;
+ u64 hcr;
+
+ if (!vcpu_has_nv(vcpu))
+ return false;
+
+ hcr = __vcpu_sys_reg(vcpu, HCR_EL2);
+
+ e2h = (hcr & HCR_E2H);
+ tge = (hcr & HCR_TGE);
+
+ /*
+ * We are in a hypervisor context if the vcpu mode is EL2 or
+ * E2H and TGE bits are set. The latter means we are in the user space
+ * of the VHE kernel. ARMv8.1 ARM describes this as 'InHost'
+ *
+ * Note that the HCR_EL2.{E2H,TGE}={0,1} isn't really handled in the
+ * rest of the KVM code, and will result in a misbehaving guest.
+ */
+ return vcpu_is_el2(vcpu) || (e2h && tge) || tge;
+}
+
+static inline bool vcpu_is_host_el0(const struct kvm_vcpu *vcpu)
+{
+ return is_hyp_ctxt(vcpu) && !vcpu_is_el2(vcpu);
+}
+
+static inline bool is_nested_ctxt(struct kvm_vcpu *vcpu)
+{
+ return vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu);
+}
+
+static inline bool vserror_state_is_nested(struct kvm_vcpu *vcpu)
+{
+ if (!is_nested_ctxt(vcpu))
+ return false;
+
+ return vcpu_el2_amo_is_set(vcpu) ||
+ (__vcpu_sys_reg(vcpu, HCRX_EL2) & HCRX_EL2_TMEA);
+}
+
/*
* The layout of SPSR for an AArch32 state is different when observed from an
* AArch64 SPSR_ELx or an AArch32 SPSR_*. This function generates the AArch32
@@ -222,14 +326,27 @@ static inline bool vcpu_mode_priv(const struct kvm_vcpu *vcpu)
return mode != PSR_MODE_EL0t;
}
-static __always_inline u32 kvm_vcpu_get_esr(const struct kvm_vcpu *vcpu)
+static __always_inline u64 kvm_vcpu_get_esr(const struct kvm_vcpu *vcpu)
{
return vcpu->arch.fault.esr_el2;
}
+static inline bool guest_hyp_wfx_traps_enabled(const struct kvm_vcpu *vcpu)
+{
+ u64 esr = kvm_vcpu_get_esr(vcpu);
+ bool is_wfe = !!(esr & ESR_ELx_WFx_ISS_WFE);
+ u64 hcr_el2 = __vcpu_sys_reg(vcpu, HCR_EL2);
+
+ if (!vcpu_has_nv(vcpu) || vcpu_is_el2(vcpu))
+ return false;
+
+ return ((is_wfe && (hcr_el2 & HCR_TWE)) ||
+ (!is_wfe && (hcr_el2 & HCR_TWI)));
+}
+
static __always_inline int kvm_vcpu_get_condition(const struct kvm_vcpu *vcpu)
{
- u32 esr = kvm_vcpu_get_esr(vcpu);
+ u64 esr = kvm_vcpu_get_esr(vcpu);
if (esr & ESR_ELx_CV)
return (esr & ESR_ELx_COND_MASK) >> ESR_ELx_COND_SHIFT;
@@ -244,7 +361,12 @@ static __always_inline unsigned long kvm_vcpu_get_hfar(const struct kvm_vcpu *vc
static __always_inline phys_addr_t kvm_vcpu_get_fault_ipa(const struct kvm_vcpu *vcpu)
{
- return ((phys_addr_t)vcpu->arch.fault.hpfar_el2 & HPFAR_MASK) << 8;
+ u64 hpfar = vcpu->arch.fault.hpfar_el2;
+
+ if (unlikely(!(hpfar & HPFAR_EL2_NS)))
+ return INVALID_GPA;
+
+ return FIELD_GET(HPFAR_EL2_FIPA, hpfar) << 12;
}
static inline u64 kvm_vcpu_get_disr(const struct kvm_vcpu *vcpu)
@@ -329,29 +451,34 @@ static __always_inline u8 kvm_vcpu_trap_get_fault(const struct kvm_vcpu *vcpu)
return kvm_vcpu_get_esr(vcpu) & ESR_ELx_FSC;
}
-static __always_inline u8 kvm_vcpu_trap_get_fault_type(const struct kvm_vcpu *vcpu)
+static inline
+bool kvm_vcpu_trap_is_permission_fault(const struct kvm_vcpu *vcpu)
+{
+ return esr_fsc_is_permission_fault(kvm_vcpu_get_esr(vcpu));
+}
+
+static inline
+bool kvm_vcpu_trap_is_translation_fault(const struct kvm_vcpu *vcpu)
{
- return kvm_vcpu_get_esr(vcpu) & ESR_ELx_FSC_TYPE;
+ return esr_fsc_is_translation_fault(kvm_vcpu_get_esr(vcpu));
}
-static __always_inline u8 kvm_vcpu_trap_get_fault_level(const struct kvm_vcpu *vcpu)
+static inline
+u64 kvm_vcpu_trap_get_perm_fault_granule(const struct kvm_vcpu *vcpu)
{
- return kvm_vcpu_get_esr(vcpu) & ESR_ELx_FSC_LEVEL;
+ unsigned long esr = kvm_vcpu_get_esr(vcpu);
+
+ BUG_ON(!esr_fsc_is_permission_fault(esr));
+ return BIT(ARM64_HW_PGTABLE_LEVEL_SHIFT(esr & ESR_ELx_FSC_LEVEL));
}
static __always_inline bool kvm_vcpu_abt_issea(const struct kvm_vcpu *vcpu)
{
switch (kvm_vcpu_trap_get_fault(vcpu)) {
- case FSC_SEA:
- case FSC_SEA_TTW0:
- case FSC_SEA_TTW1:
- case FSC_SEA_TTW2:
- case FSC_SEA_TTW3:
- case FSC_SECC:
- case FSC_SECC_TTW0:
- case FSC_SECC_TTW1:
- case FSC_SECC_TTW2:
- case FSC_SECC_TTW3:
+ case ESR_ELx_FSC_EXTABT:
+ case ESR_ELx_FSC_SEA_TTW(-1) ... ESR_ELx_FSC_SEA_TTW(3):
+ case ESR_ELx_FSC_SECC:
+ case ESR_ELx_FSC_SECC_TTW(-1) ... ESR_ELx_FSC_SECC_TTW(3):
return true;
default:
return false;
@@ -360,14 +487,27 @@ static __always_inline bool kvm_vcpu_abt_issea(const struct kvm_vcpu *vcpu)
static __always_inline int kvm_vcpu_sys_get_rt(struct kvm_vcpu *vcpu)
{
- u32 esr = kvm_vcpu_get_esr(vcpu);
+ u64 esr = kvm_vcpu_get_esr(vcpu);
return ESR_ELx_SYS64_ISS_RT(esr);
}
static inline bool kvm_is_write_fault(struct kvm_vcpu *vcpu)
{
- if (kvm_vcpu_abt_iss1tw(vcpu))
- return true;
+ if (kvm_vcpu_abt_iss1tw(vcpu)) {
+ /*
+ * Only a permission fault on a S1PTW should be
+ * considered as a write. Otherwise, page tables baked
+ * in a read-only memslot will result in an exception
+ * being delivered in the guest.
+ *
+ * The drawback is that we end-up faulting twice if the
+ * guest is using any of HW AF/DB: a translation fault
+ * to map the page containing the PT (read only at
+ * first), then a permission fault to allow the flags
+ * to be set.
+ */
+ return kvm_vcpu_trap_is_permission_fault(vcpu);
+ }
if (kvm_vcpu_trap_is_iabt(vcpu))
return false;
@@ -377,7 +517,7 @@ static inline bool kvm_is_write_fault(struct kvm_vcpu *vcpu)
static inline unsigned long kvm_vcpu_get_mpidr_aff(struct kvm_vcpu *vcpu)
{
- return vcpu_read_sys_reg(vcpu, MPIDR_EL1) & MPIDR_HWID_BITMASK;
+ return __vcpu_sys_reg(vcpu, MPIDR_EL1) & MPIDR_HWID_BITMASK;
}
static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu)
@@ -385,18 +525,29 @@ static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu)
if (vcpu_mode_is_32bit(vcpu)) {
*vcpu_cpsr(vcpu) |= PSR_AA32_E_BIT;
} else {
- u64 sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL1);
- sctlr |= (1 << 25);
- vcpu_write_sys_reg(vcpu, sctlr, SCTLR_EL1);
+ enum vcpu_sysreg r;
+ u64 sctlr;
+
+ r = vcpu_has_nv(vcpu) ? SCTLR_EL2 : SCTLR_EL1;
+
+ sctlr = vcpu_read_sys_reg(vcpu, r);
+ sctlr |= SCTLR_ELx_EE;
+ vcpu_write_sys_reg(vcpu, sctlr, r);
}
}
static inline bool kvm_vcpu_is_be(struct kvm_vcpu *vcpu)
{
+ enum vcpu_sysreg r;
+ u64 bit;
+
if (vcpu_mode_is_32bit(vcpu))
return !!(*vcpu_cpsr(vcpu) & PSR_AA32_E_BIT);
- return !!(vcpu_read_sys_reg(vcpu, SCTLR_EL1) & (1 << 25));
+ r = is_hyp_ctxt(vcpu) ? SCTLR_EL2 : SCTLR_EL1;
+ bit = vcpu_mode_priv(vcpu) ? SCTLR_ELx_EE : SCTLR_EL1_E0E;
+
+ return vcpu_read_sys_reg(vcpu, r) & bit;
}
static inline unsigned long vcpu_data_guest_to_host(struct kvm_vcpu *vcpu,
@@ -463,12 +614,86 @@ static inline unsigned long vcpu_data_host_to_guest(struct kvm_vcpu *vcpu,
static __always_inline void kvm_incr_pc(struct kvm_vcpu *vcpu)
{
- vcpu->arch.flags |= KVM_ARM64_INCREMENT_PC;
+ WARN_ON(vcpu_get_flag(vcpu, PENDING_EXCEPTION));
+ vcpu_set_flag(vcpu, INCREMENT_PC);
+}
+
+#define kvm_pend_exception(v, e) \
+ do { \
+ WARN_ON(vcpu_get_flag((v), INCREMENT_PC)); \
+ vcpu_set_flag((v), PENDING_EXCEPTION); \
+ vcpu_set_flag((v), e); \
+ } while (0)
+
+/*
+ * Returns a 'sanitised' view of CPTR_EL2, translating from nVHE to the VHE
+ * format if E2H isn't set.
+ */
+static inline u64 vcpu_sanitised_cptr_el2(const struct kvm_vcpu *vcpu)
+{
+ u64 cptr = __vcpu_sys_reg(vcpu, CPTR_EL2);
+
+ if (!vcpu_el2_e2h_is_set(vcpu))
+ cptr = translate_cptr_el2_to_cpacr_el1(cptr);
+
+ return cptr;
+}
+
+static inline bool ____cptr_xen_trap_enabled(const struct kvm_vcpu *vcpu,
+ unsigned int xen)
+{
+ switch (xen) {
+ case 0b00:
+ case 0b10:
+ return true;
+ case 0b01:
+ return vcpu_el2_tge_is_set(vcpu) && !vcpu_is_el2(vcpu);
+ case 0b11:
+ default:
+ return false;
+ }
+}
+
+#define __guest_hyp_cptr_xen_trap_enabled(vcpu, xen) \
+ (!vcpu_has_nv(vcpu) ? false : \
+ ____cptr_xen_trap_enabled(vcpu, \
+ SYS_FIELD_GET(CPACR_EL1, xen, \
+ vcpu_sanitised_cptr_el2(vcpu))))
+
+static inline bool guest_hyp_fpsimd_traps_enabled(const struct kvm_vcpu *vcpu)
+{
+ return __guest_hyp_cptr_xen_trap_enabled(vcpu, FPEN);
}
-static inline bool vcpu_has_feature(struct kvm_vcpu *vcpu, int feature)
+static inline bool guest_hyp_sve_traps_enabled(const struct kvm_vcpu *vcpu)
{
- return test_bit(feature, vcpu->arch.features);
+ return __guest_hyp_cptr_xen_trap_enabled(vcpu, ZEN);
}
+static inline void vcpu_set_hcrx(struct kvm_vcpu *vcpu)
+{
+ struct kvm *kvm = vcpu->kvm;
+
+ if (cpus_have_final_cap(ARM64_HAS_HCX)) {
+ /*
+ * In general, all HCRX_EL2 bits are gated by a feature.
+ * The only reason we can set SMPME without checking any
+ * feature is that its effects are not directly observable
+ * from the guest.
+ */
+ vcpu->arch.hcrx_el2 = HCRX_EL2_SMPME;
+
+ if (kvm_has_feat(kvm, ID_AA64ISAR2_EL1, MOPS, IMP))
+ vcpu->arch.hcrx_el2 |= (HCRX_EL2_MSCEn | HCRX_EL2_MCE2);
+
+ if (kvm_has_tcr2(kvm))
+ vcpu->arch.hcrx_el2 |= HCRX_EL2_TCR2En;
+
+ if (kvm_has_fpmr(kvm))
+ vcpu->arch.hcrx_el2 |= HCRX_EL2_EnFPM;
+
+ if (kvm_has_sctlr2(kvm))
+ vcpu->arch.hcrx_el2 |= HCRX_EL2_SCTLR2En;
+ }
+}
#endif /* __ARM64_KVM_EMULATE_H__ */