diff options
Diffstat (limited to 'arch/arm64/kvm')
37 files changed, 1433 insertions, 471 deletions
diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index 7c329e01c557..3ebc0570345c 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile @@ -23,7 +23,8 @@ kvm-y += arm.o mmu.o mmio.o psci.o hypercalls.o pvtime.o \ vgic/vgic-v3.o vgic/vgic-v4.o \ vgic/vgic-mmio.o vgic/vgic-mmio-v2.o \ vgic/vgic-mmio-v3.o vgic/vgic-kvm-device.o \ - vgic/vgic-its.o vgic/vgic-debug.o vgic/vgic-v3-nested.o + vgic/vgic-its.o vgic/vgic-debug.o vgic/vgic-v3-nested.o \ + vgic/vgic-v5.o kvm-$(CONFIG_HW_PERF_EVENTS) += pmu-emul.o pmu.o kvm-$(CONFIG_ARM64_PTR_AUTH) += pauth.o diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c index 701ea10a63f1..dbd74e4885e2 100644 --- a/arch/arm64/kvm/arch_timer.c +++ b/arch/arm64/kvm/arch_timer.c @@ -830,7 +830,7 @@ static void timer_set_traps(struct kvm_vcpu *vcpu, struct timer_map *map) * by the guest (either FEAT_VHE or FEAT_E2H0 is implemented, but * not both). This simplifies the handling of the EL1NV* bits. */ - if (vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu)) { + if (is_nested_ctxt(vcpu)) { u64 val = __vcpu_sys_reg(vcpu, CNTHCTL_EL2); /* Use the VHE format for mental sanity */ diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 38a91bb5d4c7..888f7c7abf54 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -408,6 +408,13 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_ARM_SUPPORTED_REG_MASK_RANGES: r = BIT(0); break; + case KVM_CAP_ARM_CACHEABLE_PFNMAP_SUPPORTED: + if (!kvm) + r = -EINVAL; + else + r = kvm_supports_cacheable_pfnmap(); + break; + default: r = 0; } @@ -521,7 +528,7 @@ static void vcpu_set_pauth_traps(struct kvm_vcpu *vcpu) * Either we're running an L2 guest, and the API/APK bits come * from L1's HCR_EL2, or API/APK are both set. */ - if (unlikely(vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu))) { + if (unlikely(is_nested_ctxt(vcpu))) { u64 val; val = __vcpu_sys_reg(vcpu, HCR_EL2); @@ -740,7 +747,8 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, */ int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) { - bool irq_lines = *vcpu_hcr(v) & (HCR_VI | HCR_VF); + bool irq_lines = *vcpu_hcr(v) & (HCR_VI | HCR_VF | HCR_VSE); + return ((irq_lines || kvm_vgic_vcpu_pending_irq(v)) && !kvm_arm_vcpu_stopped(v) && !v->arch.pause); } @@ -825,10 +833,6 @@ int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu) if (!kvm_arm_vcpu_is_finalized(vcpu)) return -EPERM; - ret = kvm_arch_vcpu_run_map_fp(vcpu); - if (ret) - return ret; - if (likely(vcpu_has_run_once(vcpu))) return 0; @@ -1187,6 +1191,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) */ preempt_disable(); + kvm_nested_flush_hwstate(vcpu); + if (kvm_vcpu_has_pmu(vcpu)) kvm_pmu_flush_hwstate(vcpu); @@ -1286,6 +1292,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) /* Exit types that need handling before we can be preempted */ handle_exit_early(vcpu, ret); + kvm_nested_sync_hwstate(vcpu); + preempt_enable(); /* @@ -2129,7 +2137,7 @@ static void cpu_hyp_init(void *discard) static void cpu_hyp_uninit(void *discard) { - if (__this_cpu_read(kvm_hyp_initialized)) { + if (!is_protected_kvm_enabled() && __this_cpu_read(kvm_hyp_initialized)) { cpu_hyp_reset(); __this_cpu_write(kvm_hyp_initialized, 0); } @@ -2345,8 +2353,13 @@ static void __init teardown_hyp_mode(void) free_hyp_pgds(); for_each_possible_cpu(cpu) { + if (per_cpu(kvm_hyp_initialized, cpu)) + continue; + free_pages(per_cpu(kvm_arm_hyp_stack_base, cpu), NVHE_STACK_SHIFT - PAGE_SHIFT); - free_pages(kvm_nvhe_sym(kvm_arm_hyp_percpu_base)[cpu], nvhe_percpu_order()); + + if (!kvm_nvhe_sym(kvm_arm_hyp_percpu_base)[cpu]) + continue; if (free_sve) { struct cpu_sve_state *sve_state; @@ -2354,6 +2367,9 @@ static void __init teardown_hyp_mode(void) sve_state = per_cpu_ptr_nvhe_sym(kvm_host_data, cpu)->sve_state; free_pages((unsigned long) sve_state, pkvm_host_sve_state_order()); } + + free_pages(kvm_nvhe_sym(kvm_arm_hyp_percpu_base)[cpu], nvhe_percpu_order()); + } } @@ -2761,19 +2777,15 @@ void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons, kvm_vgic_v4_unset_forwarding(irqfd->kvm, prod->irq); } -bool kvm_arch_irqfd_route_changed(struct kvm_kernel_irq_routing_entry *old, - struct kvm_kernel_irq_routing_entry *new) +void kvm_arch_update_irqfd_routing(struct kvm_kernel_irqfd *irqfd, + struct kvm_kernel_irq_routing_entry *old, + struct kvm_kernel_irq_routing_entry *new) { - if (old->type != KVM_IRQ_ROUTING_MSI || - new->type != KVM_IRQ_ROUTING_MSI) - return true; - - return memcmp(&old->msi, &new->msi, sizeof(new->msi)); -} + if (old->type == KVM_IRQ_ROUTING_MSI && + new->type == KVM_IRQ_ROUTING_MSI && + !memcmp(&old->msi, &new->msi, sizeof(new->msi))) + return; -int kvm_arch_update_irqfd_routing(struct kvm *kvm, unsigned int host_irq, - uint32_t guest_irq, bool set) -{ /* * Remapping the vLPI requires taking the its_lock mutex to resolve * the new translation. We're in spinlock land at this point, so no @@ -2781,7 +2793,7 @@ int kvm_arch_update_irqfd_routing(struct kvm *kvm, unsigned int host_irq, * * Unmap the vLPI and fall back to software LPI injection. */ - return kvm_vgic_v4_unset_forwarding(kvm, host_irq); + return kvm_vgic_v4_unset_forwarding(irqfd->kvm, irqfd->producer->irq); } void kvm_arch_irq_bypass_stop(struct irq_bypass_consumer *cons) diff --git a/arch/arm64/kvm/at.c b/arch/arm64/kvm/at.c index a25be111cd8f..0e5610533949 100644 --- a/arch/arm64/kvm/at.c +++ b/arch/arm64/kvm/at.c @@ -1047,34 +1047,51 @@ static void compute_s1_overlay_permissions(struct kvm_vcpu *vcpu, idx = FIELD_GET(PTE_PO_IDX_MASK, wr->desc); - switch (wi->regime) { - case TR_EL10: - pov_perms = perm_idx(vcpu, POR_EL1, idx); - uov_perms = perm_idx(vcpu, POR_EL0, idx); - break; - case TR_EL20: - pov_perms = perm_idx(vcpu, POR_EL2, idx); - uov_perms = perm_idx(vcpu, POR_EL0, idx); - break; - case TR_EL2: - pov_perms = perm_idx(vcpu, POR_EL2, idx); - uov_perms = 0; - break; - } + if (wr->pov) { + switch (wi->regime) { + case TR_EL10: + pov_perms = perm_idx(vcpu, POR_EL1, idx); + break; + case TR_EL20: + pov_perms = perm_idx(vcpu, POR_EL2, idx); + break; + case TR_EL2: + pov_perms = perm_idx(vcpu, POR_EL2, idx); + break; + } + + if (pov_perms & ~POE_RWX) + pov_perms = POE_NONE; - if (pov_perms & ~POE_RWX) - pov_perms = POE_NONE; + /* R_QXXPC, S1PrivOverflow enabled */ + if (wr->pwxn && (pov_perms & POE_X)) + pov_perms &= ~POE_W; - if (wi->poe && wr->pov) { wr->pr &= pov_perms & POE_R; wr->pw &= pov_perms & POE_W; wr->px &= pov_perms & POE_X; } - if (uov_perms & ~POE_RWX) - uov_perms = POE_NONE; + if (wr->uov) { + switch (wi->regime) { + case TR_EL10: + uov_perms = perm_idx(vcpu, POR_EL0, idx); + break; + case TR_EL20: + uov_perms = perm_idx(vcpu, POR_EL0, idx); + break; + case TR_EL2: + uov_perms = 0; + break; + } + + if (uov_perms & ~POE_RWX) + uov_perms = POE_NONE; + + /* R_NPBXC, S1UnprivOverlay enabled */ + if (wr->uwxn && (uov_perms & POE_X)) + uov_perms &= ~POE_W; - if (wi->e0poe && wr->uov) { wr->ur &= uov_perms & POE_R; wr->uw &= uov_perms & POE_W; wr->ux &= uov_perms & POE_X; @@ -1095,24 +1112,15 @@ static void compute_s1_permissions(struct kvm_vcpu *vcpu, if (!wi->hpd) compute_s1_hierarchical_permissions(vcpu, wi, wr); - if (wi->poe || wi->e0poe) - compute_s1_overlay_permissions(vcpu, wi, wr); + compute_s1_overlay_permissions(vcpu, wi, wr); - /* R_QXXPC */ - if (wr->pwxn) { - if (!wr->pov && wr->pw) - wr->px = false; - if (wr->pov && wr->px) - wr->pw = false; - } + /* R_QXXPC, S1PrivOverlay disabled */ + if (!wr->pov) + wr->px &= !(wr->pwxn && wr->pw); - /* R_NPBXC */ - if (wr->uwxn) { - if (!wr->uov && wr->uw) - wr->ux = false; - if (wr->uov && wr->ux) - wr->uw = false; - } + /* R_NPBXC, S1UnprivOverlay disabled */ + if (!wr->uov) + wr->ux &= !(wr->uwxn && wr->uw); pan = wi->pan && (wr->ur || wr->uw || (pan3_enabled(vcpu, wi->regime) && wr->ux)); diff --git a/arch/arm64/kvm/config.c b/arch/arm64/kvm/config.c index 54911a93b001..da66c4a14775 100644 --- a/arch/arm64/kvm/config.c +++ b/arch/arm64/kvm/config.c @@ -66,7 +66,6 @@ struct reg_bits_to_feat_map { #define FEAT_BRBE ID_AA64DFR0_EL1, BRBE, IMP #define FEAT_TRC_SR ID_AA64DFR0_EL1, TraceVer, IMP #define FEAT_PMUv3 ID_AA64DFR0_EL1, PMUVer, IMP -#define FEAT_PMUv3p9 ID_AA64DFR0_EL1, PMUVer, V3P9 #define FEAT_TRBE ID_AA64DFR0_EL1, TraceBuffer, IMP #define FEAT_TRBEv1p1 ID_AA64DFR0_EL1, TraceBuffer, TRBE_V1P1 #define FEAT_DoubleLock ID_AA64DFR0_EL1, DoubleLock, IMP @@ -89,6 +88,7 @@ struct reg_bits_to_feat_map { #define FEAT_RASv2 ID_AA64PFR0_EL1, RAS, V2 #define FEAT_GICv3 ID_AA64PFR0_EL1, GIC, IMP #define FEAT_LOR ID_AA64MMFR1_EL1, LO, IMP +#define FEAT_SPEv1p2 ID_AA64DFR0_EL1, PMSVer, V1P2 #define FEAT_SPEv1p4 ID_AA64DFR0_EL1, PMSVer, V1P4 #define FEAT_SPEv1p5 ID_AA64DFR0_EL1, PMSVer, V1P5 #define FEAT_ATS1A ID_AA64ISAR2_EL1, ATS1A, IMP @@ -131,6 +131,27 @@ struct reg_bits_to_feat_map { #define FEAT_SPMU ID_AA64DFR1_EL1, SPMU, IMP #define FEAT_SPE_nVM ID_AA64DFR2_EL1, SPE_nVM, IMP #define FEAT_STEP2 ID_AA64DFR2_EL1, STEP, IMP +#define FEAT_SYSREG128 ID_AA64ISAR2_EL1, SYSREG_128, IMP +#define FEAT_CPA2 ID_AA64ISAR3_EL1, CPA, CPA2 +#define FEAT_ASID2 ID_AA64MMFR4_EL1, ASID2, IMP +#define FEAT_MEC ID_AA64MMFR3_EL1, MEC, IMP +#define FEAT_HAFT ID_AA64MMFR1_EL1, HAFDBS, HAFT +#define FEAT_BTI ID_AA64PFR1_EL1, BT, IMP +#define FEAT_ExS ID_AA64MMFR0_EL1, EXS, IMP +#define FEAT_IESB ID_AA64MMFR2_EL1, IESB, IMP +#define FEAT_LSE2 ID_AA64MMFR2_EL1, AT, IMP +#define FEAT_LSMAOC ID_AA64MMFR2_EL1, LSM, IMP +#define FEAT_MixedEnd ID_AA64MMFR0_EL1, BIGEND, IMP +#define FEAT_MixedEndEL0 ID_AA64MMFR0_EL1, BIGENDEL0, IMP +#define FEAT_MTE2 ID_AA64PFR1_EL1, MTE, MTE2 +#define FEAT_MTE_ASYNC ID_AA64PFR1_EL1, MTE_frac, ASYNC +#define FEAT_MTE_STORE_ONLY ID_AA64PFR2_EL1, MTESTOREONLY, IMP +#define FEAT_PAN ID_AA64MMFR1_EL1, PAN, IMP +#define FEAT_PAN3 ID_AA64MMFR1_EL1, PAN, PAN3 +#define FEAT_SSBS ID_AA64PFR1_EL1, SSBS, IMP +#define FEAT_TIDCP1 ID_AA64MMFR1_EL1, TIDCP1, IMP +#define FEAT_FGT ID_AA64MMFR0_EL1, FGT, IMP +#define FEAT_MTPMU ID_AA64DFR0_EL1, MTPMU, IMP static bool not_feat_aa64el3(struct kvm *kvm) { @@ -218,11 +239,62 @@ static bool feat_trbe_mpam(struct kvm *kvm) (read_sysreg_s(SYS_TRBIDR_EL1) & TRBIDR_EL1_MPAM)); } +static bool feat_asid2_e2h1(struct kvm *kvm) +{ + return kvm_has_feat(kvm, FEAT_ASID2) && !kvm_has_feat(kvm, FEAT_E2H0); +} + +static bool feat_d128_e2h1(struct kvm *kvm) +{ + return kvm_has_feat(kvm, FEAT_D128) && !kvm_has_feat(kvm, FEAT_E2H0); +} + +static bool feat_mec_e2h1(struct kvm *kvm) +{ + return kvm_has_feat(kvm, FEAT_MEC) && !kvm_has_feat(kvm, FEAT_E2H0); +} + static bool feat_ebep_pmuv3_ss(struct kvm *kvm) { return kvm_has_feat(kvm, FEAT_EBEP) || kvm_has_feat(kvm, FEAT_PMUv3_SS); } +static bool feat_mixedendel0(struct kvm *kvm) +{ + return kvm_has_feat(kvm, FEAT_MixedEnd) || kvm_has_feat(kvm, FEAT_MixedEndEL0); +} + +static bool feat_mte_async(struct kvm *kvm) +{ + return kvm_has_feat(kvm, FEAT_MTE2) && kvm_has_feat_enum(kvm, FEAT_MTE_ASYNC); +} + +#define check_pmu_revision(k, r) \ + ({ \ + (kvm_has_feat((k), ID_AA64DFR0_EL1, PMUVer, r) && \ + !kvm_has_feat((k), ID_AA64DFR0_EL1, PMUVer, IMP_DEF)); \ + }) + +static bool feat_pmuv3p1(struct kvm *kvm) +{ + return check_pmu_revision(kvm, V3P1); +} + +static bool feat_pmuv3p5(struct kvm *kvm) +{ + return check_pmu_revision(kvm, V3P5); +} + +static bool feat_pmuv3p7(struct kvm *kvm) +{ + return check_pmu_revision(kvm, V3P7); +} + +static bool feat_pmuv3p9(struct kvm *kvm) +{ + return check_pmu_revision(kvm, V3P9); +} + static bool compute_hcr_rw(struct kvm *kvm, u64 *bits) { /* This is purely academic: AArch32 and NV are mutually exclusive */ @@ -681,7 +753,7 @@ static const struct reg_bits_to_feat_map hdfgrtr2_feat_map[] = { NEEDS_FEAT(HDFGRTR2_EL2_nPMICFILTR_EL0 | HDFGRTR2_EL2_nPMICNTR_EL0, FEAT_PMUv3_ICNTR), - NEEDS_FEAT(HDFGRTR2_EL2_nPMUACR_EL1, FEAT_PMUv3p9), + NEEDS_FEAT(HDFGRTR2_EL2_nPMUACR_EL1, feat_pmuv3p9), NEEDS_FEAT(HDFGRTR2_EL2_nPMSSCR_EL1 | HDFGRTR2_EL2_nPMSSDATA, FEAT_PMUv3_SS), @@ -713,7 +785,7 @@ static const struct reg_bits_to_feat_map hdfgwtr2_feat_map[] = { FEAT_PMUv3_ICNTR), NEEDS_FEAT(HDFGWTR2_EL2_nPMUACR_EL1 | HDFGWTR2_EL2_nPMZR_EL0, - FEAT_PMUv3p9), + feat_pmuv3p9), NEEDS_FEAT(HDFGWTR2_EL2_nPMSSCR_EL1, FEAT_PMUv3_SS), NEEDS_FEAT(HDFGWTR2_EL2_nPMIAR_EL1, FEAT_SEBEP), NEEDS_FEAT(HDFGWTR2_EL2_nPMSDSFR_EL1, feat_spe_fds), @@ -832,6 +904,150 @@ static const struct reg_bits_to_feat_map hcr_feat_map[] = { NEEDS_FEAT_FIXED(HCR_EL2_E2H, compute_hcr_e2h), }; +static const struct reg_bits_to_feat_map sctlr2_feat_map[] = { + NEEDS_FEAT(SCTLR2_EL1_NMEA | + SCTLR2_EL1_EASE, + FEAT_DoubleFault2), + NEEDS_FEAT(SCTLR2_EL1_EnADERR, feat_aderr), + NEEDS_FEAT(SCTLR2_EL1_EnANERR, feat_anerr), + NEEDS_FEAT(SCTLR2_EL1_EnIDCP128, FEAT_SYSREG128), + NEEDS_FEAT(SCTLR2_EL1_EnPACM | + SCTLR2_EL1_EnPACM0, + feat_pauth_lr), + NEEDS_FEAT(SCTLR2_EL1_CPTA | + SCTLR2_EL1_CPTA0 | + SCTLR2_EL1_CPTM | + SCTLR2_EL1_CPTM0, + FEAT_CPA2), +}; + +static const struct reg_bits_to_feat_map tcr2_el2_feat_map[] = { + NEEDS_FEAT(TCR2_EL2_FNG1 | + TCR2_EL2_FNG0 | + TCR2_EL2_A2, + feat_asid2_e2h1), + NEEDS_FEAT(TCR2_EL2_DisCH1 | + TCR2_EL2_DisCH0 | + TCR2_EL2_D128, + feat_d128_e2h1), + NEEDS_FEAT(TCR2_EL2_AMEC1, feat_mec_e2h1), + NEEDS_FEAT(TCR2_EL2_AMEC0, FEAT_MEC), + NEEDS_FEAT(TCR2_EL2_HAFT, FEAT_HAFT), + NEEDS_FEAT(TCR2_EL2_PTTWI | + TCR2_EL2_PnCH, + FEAT_THE), + NEEDS_FEAT(TCR2_EL2_AIE, FEAT_AIE), + NEEDS_FEAT(TCR2_EL2_POE | + TCR2_EL2_E0POE, + FEAT_S1POE), + NEEDS_FEAT(TCR2_EL2_PIE, FEAT_S1PIE), +}; + +static const struct reg_bits_to_feat_map sctlr_el1_feat_map[] = { + NEEDS_FEAT(SCTLR_EL1_CP15BEN | + SCTLR_EL1_ITD | + SCTLR_EL1_SED, + FEAT_AA32EL0), + NEEDS_FEAT(SCTLR_EL1_BT0 | + SCTLR_EL1_BT1, + FEAT_BTI), + NEEDS_FEAT(SCTLR_EL1_CMOW, FEAT_CMOW), + NEEDS_FEAT(SCTLR_EL1_TSCXT, feat_csv2_2_csv2_1p2), + NEEDS_FEAT(SCTLR_EL1_EIS | + SCTLR_EL1_EOS, + FEAT_ExS), + NEEDS_FEAT(SCTLR_EL1_EnFPM, FEAT_FPMR), + NEEDS_FEAT(SCTLR_EL1_IESB, FEAT_IESB), + NEEDS_FEAT(SCTLR_EL1_EnALS, FEAT_LS64), + NEEDS_FEAT(SCTLR_EL1_EnAS0, FEAT_LS64_ACCDATA), + NEEDS_FEAT(SCTLR_EL1_EnASR, FEAT_LS64_V), + NEEDS_FEAT(SCTLR_EL1_nAA, FEAT_LSE2), + NEEDS_FEAT(SCTLR_EL1_LSMAOE | + SCTLR_EL1_nTLSMD, + FEAT_LSMAOC), + NEEDS_FEAT(SCTLR_EL1_EE, FEAT_MixedEnd), + NEEDS_FEAT(SCTLR_EL1_E0E, feat_mixedendel0), + NEEDS_FEAT(SCTLR_EL1_MSCEn, FEAT_MOPS), + NEEDS_FEAT(SCTLR_EL1_ATA0 | + SCTLR_EL1_ATA | + SCTLR_EL1_TCF0 | + SCTLR_EL1_TCF, + FEAT_MTE2), + NEEDS_FEAT(SCTLR_EL1_ITFSB, feat_mte_async), + NEEDS_FEAT(SCTLR_EL1_TCSO0 | + SCTLR_EL1_TCSO, + FEAT_MTE_STORE_ONLY), + NEEDS_FEAT(SCTLR_EL1_NMI | + SCTLR_EL1_SPINTMASK, + FEAT_NMI), + NEEDS_FEAT(SCTLR_EL1_SPAN, FEAT_PAN), + NEEDS_FEAT(SCTLR_EL1_EPAN, FEAT_PAN3), + NEEDS_FEAT(SCTLR_EL1_EnDA | + SCTLR_EL1_EnDB | + SCTLR_EL1_EnIA | + SCTLR_EL1_EnIB, + feat_pauth), + NEEDS_FEAT(SCTLR_EL1_EnTP2, FEAT_SME), + NEEDS_FEAT(SCTLR_EL1_EnRCTX, FEAT_SPECRES), + NEEDS_FEAT(SCTLR_EL1_DSSBS, FEAT_SSBS), + NEEDS_FEAT(SCTLR_EL1_TIDCP, FEAT_TIDCP1), + NEEDS_FEAT(SCTLR_EL1_TME0 | + SCTLR_EL1_TME | + SCTLR_EL1_TMT0 | + SCTLR_EL1_TMT, + FEAT_TME), + NEEDS_FEAT(SCTLR_EL1_TWEDEL | + SCTLR_EL1_TWEDEn, + FEAT_TWED), + NEEDS_FEAT(SCTLR_EL1_UCI | + SCTLR_EL1_EE | + SCTLR_EL1_E0E | + SCTLR_EL1_WXN | + SCTLR_EL1_nTWE | + SCTLR_EL1_nTWI | + SCTLR_EL1_UCT | + SCTLR_EL1_DZE | + SCTLR_EL1_I | + SCTLR_EL1_UMA | + SCTLR_EL1_SA0 | + SCTLR_EL1_SA | + SCTLR_EL1_C | + SCTLR_EL1_A | + SCTLR_EL1_M, + FEAT_AA64EL1), +}; + +static const struct reg_bits_to_feat_map mdcr_el2_feat_map[] = { + NEEDS_FEAT(MDCR_EL2_EBWE, FEAT_Debugv8p9), + NEEDS_FEAT(MDCR_EL2_TDOSA, FEAT_DoubleLock), + NEEDS_FEAT(MDCR_EL2_PMEE, FEAT_EBEP), + NEEDS_FEAT(MDCR_EL2_TDCC, FEAT_FGT), + NEEDS_FEAT(MDCR_EL2_MTPME, FEAT_MTPMU), + NEEDS_FEAT(MDCR_EL2_HPME | + MDCR_EL2_HPMN | + MDCR_EL2_TPMCR | + MDCR_EL2_TPM, + FEAT_PMUv3), + NEEDS_FEAT(MDCR_EL2_HPMD, feat_pmuv3p1), + NEEDS_FEAT(MDCR_EL2_HCCD | + MDCR_EL2_HLP, + feat_pmuv3p5), + NEEDS_FEAT(MDCR_EL2_HPMFZO, feat_pmuv3p7), + NEEDS_FEAT(MDCR_EL2_PMSSE, FEAT_PMUv3_SS), + NEEDS_FEAT(MDCR_EL2_E2PB | + MDCR_EL2_TPMS, + FEAT_SPE), + NEEDS_FEAT(MDCR_EL2_HPMFZS, FEAT_SPEv1p2), + NEEDS_FEAT(MDCR_EL2_EnSPM, FEAT_SPMU), + NEEDS_FEAT(MDCR_EL2_EnSTEPOP, FEAT_STEP2), + NEEDS_FEAT(MDCR_EL2_E2TB, FEAT_TRBE), + NEEDS_FEAT(MDCR_EL2_TTRF, FEAT_TRF), + NEEDS_FEAT(MDCR_EL2_TDA | + MDCR_EL2_TDE | + MDCR_EL2_TDRA, + FEAT_AA64EL1), +}; + static void __init check_feat_map(const struct reg_bits_to_feat_map *map, int map_size, u64 res0, const char *str) { @@ -863,6 +1079,14 @@ void __init check_feature_map(void) __HCRX_EL2_RES0, "HCRX_EL2"); check_feat_map(hcr_feat_map, ARRAY_SIZE(hcr_feat_map), HCR_EL2_RES0, "HCR_EL2"); + check_feat_map(sctlr2_feat_map, ARRAY_SIZE(sctlr2_feat_map), + SCTLR2_EL1_RES0, "SCTLR2_EL1"); + check_feat_map(tcr2_el2_feat_map, ARRAY_SIZE(tcr2_el2_feat_map), + TCR2_EL2_RES0, "TCR2_EL2"); + check_feat_map(sctlr_el1_feat_map, ARRAY_SIZE(sctlr_el1_feat_map), + SCTLR_EL1_RES0, "SCTLR_EL1"); + check_feat_map(mdcr_el2_feat_map, ARRAY_SIZE(mdcr_el2_feat_map), + MDCR_EL2_RES0, "MDCR_EL2"); } static bool idreg_feat_match(struct kvm *kvm, const struct reg_bits_to_feat_map *map) @@ -1077,6 +1301,31 @@ void get_reg_fixed_bits(struct kvm *kvm, enum vcpu_sysreg reg, u64 *res0, u64 *r *res0 |= HCR_EL2_RES0 | (mask & ~fixed); *res1 = HCR_EL2_RES1 | (mask & fixed); break; + case SCTLR2_EL1: + case SCTLR2_EL2: + *res0 = compute_res0_bits(kvm, sctlr2_feat_map, + ARRAY_SIZE(sctlr2_feat_map), 0, 0); + *res0 |= SCTLR2_EL1_RES0; + *res1 = SCTLR2_EL1_RES1; + break; + case TCR2_EL2: + *res0 = compute_res0_bits(kvm, tcr2_el2_feat_map, + ARRAY_SIZE(tcr2_el2_feat_map), 0, 0); + *res0 |= TCR2_EL2_RES0; + *res1 = TCR2_EL2_RES1; + break; + case SCTLR_EL1: + *res0 = compute_res0_bits(kvm, sctlr_el1_feat_map, + ARRAY_SIZE(sctlr_el1_feat_map), 0, 0); + *res0 |= SCTLR_EL1_RES0; + *res1 = SCTLR_EL1_RES1; + break; + case MDCR_EL2: + *res0 = compute_res0_bits(kvm, mdcr_el2_feat_map, + ARRAY_SIZE(mdcr_el2_feat_map), 0, 0); + *res0 |= MDCR_EL2_RES0; + *res1 = MDCR_EL2_RES1; + break; default: WARN_ON_ONCE(1); *res0 = *res1 = 0; diff --git a/arch/arm64/kvm/debug.c b/arch/arm64/kvm/debug.c index 1a7dab333f55..381382c19fe4 100644 --- a/arch/arm64/kvm/debug.c +++ b/arch/arm64/kvm/debug.c @@ -81,6 +81,10 @@ void kvm_init_host_debug_data(void) !(read_sysreg_s(SYS_PMBIDR_EL1) & PMBIDR_EL1_P)) host_data_set_flag(HAS_SPE); + /* Check if we have BRBE implemented and available at the host */ + if (cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_EL1_BRBE_SHIFT)) + host_data_set_flag(HAS_BRBE); + if (cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_EL1_TraceFilt_SHIFT)) { /* Force disable trace in protected mode in case of no TRBE */ if (is_protected_kvm_enabled()) diff --git a/arch/arm64/kvm/emulate-nested.c b/arch/arm64/kvm/emulate-nested.c index 3a384e9660b8..90cb4b7ae0ff 100644 --- a/arch/arm64/kvm/emulate-nested.c +++ b/arch/arm64/kvm/emulate-nested.c @@ -88,6 +88,7 @@ enum cgt_group_id { CGT_HCRX_EnFPM, CGT_HCRX_TCR2En, + CGT_HCRX_SCTLR2En, CGT_CNTHCTL_EL1TVT, CGT_CNTHCTL_EL1TVCT, @@ -108,6 +109,7 @@ enum cgt_group_id { CGT_HCR_TTLB_TTLBOS, CGT_HCR_TVM_TRVM, CGT_HCR_TVM_TRVM_HCRX_TCR2En, + CGT_HCR_TVM_TRVM_HCRX_SCTLR2En, CGT_HCR_TPU_TICAB, CGT_HCR_TPU_TOCU, CGT_HCR_NV1_nNV2_ENSCXT, @@ -398,6 +400,12 @@ static const struct trap_bits coarse_trap_bits[] = { .mask = HCRX_EL2_TCR2En, .behaviour = BEHAVE_FORWARD_RW, }, + [CGT_HCRX_SCTLR2En] = { + .index = HCRX_EL2, + .value = 0, + .mask = HCRX_EL2_SCTLR2En, + .behaviour = BEHAVE_FORWARD_RW, + }, [CGT_CNTHCTL_EL1TVT] = { .index = CNTHCTL_EL2, .value = CNTHCTL_EL1TVT, @@ -449,6 +457,8 @@ static const enum cgt_group_id *coarse_control_combo[] = { MCB(CGT_HCR_TVM_TRVM, CGT_HCR_TVM, CGT_HCR_TRVM), MCB(CGT_HCR_TVM_TRVM_HCRX_TCR2En, CGT_HCR_TVM, CGT_HCR_TRVM, CGT_HCRX_TCR2En), + MCB(CGT_HCR_TVM_TRVM_HCRX_SCTLR2En, + CGT_HCR_TVM, CGT_HCR_TRVM, CGT_HCRX_SCTLR2En), MCB(CGT_HCR_TPU_TICAB, CGT_HCR_TPU, CGT_HCR_TICAB), MCB(CGT_HCR_TPU_TOCU, CGT_HCR_TPU, CGT_HCR_TOCU), MCB(CGT_HCR_NV1_nNV2_ENSCXT, CGT_HCR_NV1_nNV2, CGT_HCR_ENSCXT), @@ -782,6 +792,7 @@ static const struct encoding_to_trap_config encoding_to_cgt[] __initconst = { SR_TRAP(OP_TLBI_RVALE1OSNXS, CGT_HCR_TTLB_TTLBOS), SR_TRAP(OP_TLBI_RVAALE1OSNXS, CGT_HCR_TTLB_TTLBOS), SR_TRAP(SYS_SCTLR_EL1, CGT_HCR_TVM_TRVM), + SR_TRAP(SYS_SCTLR2_EL1, CGT_HCR_TVM_TRVM_HCRX_SCTLR2En), SR_TRAP(SYS_TTBR0_EL1, CGT_HCR_TVM_TRVM), SR_TRAP(SYS_TTBR1_EL1, CGT_HCR_TVM_TRVM), SR_TRAP(SYS_TCR_EL1, CGT_HCR_TVM_TRVM), @@ -1354,6 +1365,7 @@ static const struct encoding_to_trap_config encoding_to_fgt[] __initconst = { SR_FGT(SYS_SCXTNUM_EL0, HFGRTR, SCXTNUM_EL0, 1), SR_FGT(SYS_SCXTNUM_EL1, HFGRTR, SCXTNUM_EL1, 1), SR_FGT(SYS_SCTLR_EL1, HFGRTR, SCTLR_EL1, 1), + SR_FGT(SYS_SCTLR2_EL1, HFGRTR, SCTLR_EL1, 1), SR_FGT(SYS_REVIDR_EL1, HFGRTR, REVIDR_EL1, 1), SR_FGT(SYS_PAR_EL1, HFGRTR, PAR_EL1, 1), SR_FGT(SYS_MPIDR_EL1, HFGRTR, MPIDR_EL1, 1), @@ -2592,13 +2604,8 @@ inject: static bool __forward_traps(struct kvm_vcpu *vcpu, unsigned int reg, u64 control_bit) { - bool control_bit_set; - - if (!vcpu_has_nv(vcpu)) - return false; - - control_bit_set = __vcpu_sys_reg(vcpu, reg) & control_bit; - if (!is_hyp_ctxt(vcpu) && control_bit_set) { + if (is_nested_ctxt(vcpu) && + (__vcpu_sys_reg(vcpu, reg) & control_bit)) { kvm_inject_nested_sync(vcpu, kvm_vcpu_get_esr(vcpu)); return true; } @@ -2719,6 +2726,9 @@ static void kvm_inject_el2_exception(struct kvm_vcpu *vcpu, u64 esr_el2, case except_type_irq: kvm_pend_exception(vcpu, EXCEPT_AA64_EL2_IRQ); break; + case except_type_serror: + kvm_pend_exception(vcpu, EXCEPT_AA64_EL2_SERR); + break; default: WARN_ONCE(1, "Unsupported EL2 exception injection %d\n", type); } @@ -2816,3 +2826,28 @@ int kvm_inject_nested_irq(struct kvm_vcpu *vcpu) /* esr_el2 value doesn't matter for exits due to irqs. */ return kvm_inject_nested(vcpu, 0, except_type_irq); } + +int kvm_inject_nested_sea(struct kvm_vcpu *vcpu, bool iabt, u64 addr) +{ + u64 esr = FIELD_PREP(ESR_ELx_EC_MASK, + iabt ? ESR_ELx_EC_IABT_LOW : ESR_ELx_EC_DABT_LOW); + esr |= ESR_ELx_FSC_EXTABT | ESR_ELx_IL; + + vcpu_write_sys_reg(vcpu, FAR_EL2, addr); + + if (__vcpu_sys_reg(vcpu, SCTLR2_EL2) & SCTLR2_EL1_EASE) + return kvm_inject_nested(vcpu, esr, except_type_serror); + + return kvm_inject_nested_sync(vcpu, esr); +} + +int kvm_inject_nested_serror(struct kvm_vcpu *vcpu, u64 esr) +{ + /* + * Hardware sets up the EC field when propagating ESR as a result of + * vSError injection. Manually populate EC for an emulated SError + * exception. + */ + esr |= FIELD_PREP(ESR_ELx_EC_MASK, ESR_ELx_EC_SERROR); + return kvm_inject_nested(vcpu, esr, except_type_serror); +} diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c index 8f6c8f57c6b9..15e17aca1dec 100644 --- a/arch/arm64/kvm/fpsimd.c +++ b/arch/arm64/kvm/fpsimd.c @@ -15,32 +15,6 @@ #include <asm/sysreg.h> /* - * Called on entry to KVM_RUN unless this vcpu previously ran at least - * once and the most recent prior KVM_RUN for this vcpu was called from - * the same task as current (highly likely). - * - * This is guaranteed to execute before kvm_arch_vcpu_load_fp(vcpu), - * such that on entering hyp the relevant parts of current are already - * mapped. - */ -int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu) -{ - struct user_fpsimd_state *fpsimd = ¤t->thread.uw.fpsimd_state; - int ret; - - /* pKVM has its own tracking of the host fpsimd state. */ - if (is_protected_kvm_enabled()) - return 0; - - /* Make sure the host task fpsimd state is visible to hyp: */ - ret = kvm_share_hyp(fpsimd, fpsimd + 1); - if (ret) - return ret; - - return 0; -} - -/* * Prepare vcpu for saving the host's FPSIMD state and loading the guest's. * The actual loading is done by the FPSIMD access trap taken to hyp. * diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 2196979a24a3..16ba5e9ac86c 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -818,8 +818,9 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, int __kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu, struct kvm_vcpu_events *events) { - events->exception.serror_pending = !!(vcpu->arch.hcr_el2 & HCR_VSE); events->exception.serror_has_esr = cpus_have_final_cap(ARM64_HAS_RAS_EXTN); + events->exception.serror_pending = (vcpu->arch.hcr_el2 & HCR_VSE) || + vcpu_get_flag(vcpu, NESTED_SERROR_PENDING); if (events->exception.serror_pending && events->exception.serror_has_esr) events->exception.serror_esr = vcpu_get_vsesr(vcpu); @@ -833,29 +834,62 @@ int __kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu, return 0; } +static void commit_pending_events(struct kvm_vcpu *vcpu) +{ + if (!vcpu_get_flag(vcpu, PENDING_EXCEPTION)) + return; + + /* + * Reset the MMIO emulation state to avoid stepping PC after emulating + * the exception entry. + */ + vcpu->mmio_needed = false; + kvm_call_hyp(__kvm_adjust_pc, vcpu); +} + int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu, struct kvm_vcpu_events *events) { bool serror_pending = events->exception.serror_pending; bool has_esr = events->exception.serror_has_esr; bool ext_dabt_pending = events->exception.ext_dabt_pending; + u64 esr = events->exception.serror_esr; + int ret = 0; - if (serror_pending && has_esr) { - if (!cpus_have_final_cap(ARM64_HAS_RAS_EXTN)) - return -EINVAL; - - if (!((events->exception.serror_esr) & ~ESR_ELx_ISS_MASK)) - kvm_set_sei_esr(vcpu, events->exception.serror_esr); - else - return -EINVAL; - } else if (serror_pending) { - kvm_inject_vabt(vcpu); + /* + * Immediately commit the pending SEA to the vCPU's architectural + * state which is necessary since we do not return a pending SEA + * to userspace via KVM_GET_VCPU_EVENTS. + */ + if (ext_dabt_pending) { + ret = kvm_inject_sea_dabt(vcpu, kvm_vcpu_get_hfar(vcpu)); + commit_pending_events(vcpu); } - if (ext_dabt_pending) - kvm_inject_dabt(vcpu, kvm_vcpu_get_hfar(vcpu)); + if (ret < 0) + return ret; - return 0; + if (!serror_pending) + return 0; + + if (!cpus_have_final_cap(ARM64_HAS_RAS_EXTN) && has_esr) + return -EINVAL; + + if (has_esr && (esr & ~ESR_ELx_ISS_MASK)) + return -EINVAL; + + if (has_esr) + ret = kvm_inject_serror_esr(vcpu, esr); + else + ret = kvm_inject_serror(vcpu); + + /* + * We could've decided that the SError is due for immediate software + * injection; commit the exception in case userspace decides it wants + * to inject more exceptions for some strange reason. + */ + commit_pending_events(vcpu); + return (ret < 0) ? ret : 0; } u32 __attribute_const__ kvm_target_cpu(void) diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index 453266c96481..a598072f36d2 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -32,7 +32,7 @@ typedef int (*exit_handle_fn)(struct kvm_vcpu *); static void kvm_handle_guest_serror(struct kvm_vcpu *vcpu, u64 esr) { if (!arm64_is_ras_serror(esr) || arm64_is_fatal_ras_serror(NULL, esr)) - kvm_inject_vabt(vcpu); + kvm_inject_serror(vcpu); } static int handle_hvc(struct kvm_vcpu *vcpu) @@ -252,7 +252,7 @@ static int kvm_handle_ptrauth(struct kvm_vcpu *vcpu) return 1; } - if (vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu)) { + if (is_nested_ctxt(vcpu)) { kvm_inject_nested_sync(vcpu, kvm_vcpu_get_esr(vcpu)); return 1; } @@ -311,12 +311,11 @@ static int kvm_handle_gcs(struct kvm_vcpu *vcpu) static int handle_other(struct kvm_vcpu *vcpu) { - bool is_l2 = vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu); + bool allowed, fwd = is_nested_ctxt(vcpu); u64 hcrx = __vcpu_sys_reg(vcpu, HCRX_EL2); u64 esr = kvm_vcpu_get_esr(vcpu); u64 iss = ESR_ELx_ISS(esr); struct kvm *kvm = vcpu->kvm; - bool allowed, fwd = false; /* * We only trap for two reasons: @@ -335,28 +334,23 @@ static int handle_other(struct kvm_vcpu *vcpu) switch (iss) { case ESR_ELx_ISS_OTHER_ST64BV: allowed = kvm_has_feat(kvm, ID_AA64ISAR1_EL1, LS64, LS64_V); - if (is_l2) - fwd = !(hcrx & HCRX_EL2_EnASR); + fwd &= !(hcrx & HCRX_EL2_EnASR); break; case ESR_ELx_ISS_OTHER_ST64BV0: allowed = kvm_has_feat(kvm, ID_AA64ISAR1_EL1, LS64, LS64_ACCDATA); - if (is_l2) - fwd = !(hcrx & HCRX_EL2_EnAS0); + fwd &= !(hcrx & HCRX_EL2_EnAS0); break; case ESR_ELx_ISS_OTHER_LDST64B: allowed = kvm_has_feat(kvm, ID_AA64ISAR1_EL1, LS64, LS64); - if (is_l2) - fwd = !(hcrx & HCRX_EL2_EnALS); + fwd &= !(hcrx & HCRX_EL2_EnALS); break; case ESR_ELx_ISS_OTHER_TSBCSYNC: allowed = kvm_has_feat(kvm, ID_AA64DFR0_EL1, TraceBuffer, TRBE_V1P1); - if (is_l2) - fwd = (__vcpu_sys_reg(vcpu, HFGITR2_EL2) & HFGITR2_EL2_TSBCSYNC); + fwd &= (__vcpu_sys_reg(vcpu, HFGITR2_EL2) & HFGITR2_EL2_TSBCSYNC); break; case ESR_ELx_ISS_OTHER_PSBCSYNC: allowed = kvm_has_feat(kvm, ID_AA64DFR0_EL1, PMSVer, V1P5); - if (is_l2) - fwd = (__vcpu_sys_reg(vcpu, HFGITR_EL2) & HFGITR_EL2_PSBCSYNC); + fwd &= (__vcpu_sys_reg(vcpu, HFGITR_EL2) & HFGITR_EL2_PSBCSYNC); break; default: /* Clearly, we're missing something. */ @@ -496,7 +490,7 @@ void handle_exit_early(struct kvm_vcpu *vcpu, int exception_index) kvm_handle_guest_serror(vcpu, disr_to_esr(disr)); } else { - kvm_inject_vabt(vcpu); + kvm_inject_serror(vcpu); } return; diff --git a/arch/arm64/kvm/hyp/exception.c b/arch/arm64/kvm/hyp/exception.c index 6a2a899a344e..95d186e0bf54 100644 --- a/arch/arm64/kvm/hyp/exception.c +++ b/arch/arm64/kvm/hyp/exception.c @@ -26,7 +26,8 @@ static inline u64 __vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg) if (unlikely(vcpu_has_nv(vcpu))) return vcpu_read_sys_reg(vcpu, reg); - else if (__vcpu_read_sys_reg_from_cpu(reg, &val)) + else if (vcpu_get_flag(vcpu, SYSREGS_ON_CPU) && + __vcpu_read_sys_reg_from_cpu(reg, &val)) return val; return __vcpu_sys_reg(vcpu, reg); @@ -36,7 +37,8 @@ static inline void __vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg) { if (unlikely(vcpu_has_nv(vcpu))) vcpu_write_sys_reg(vcpu, val, reg); - else if (!__vcpu_write_sys_reg_to_cpu(val, reg)) + else if (!vcpu_get_flag(vcpu, SYSREGS_ON_CPU) || + !__vcpu_write_sys_reg_to_cpu(val, reg)) __vcpu_assign_sys_reg(vcpu, reg, val); } @@ -339,6 +341,10 @@ static void kvm_inject_exception(struct kvm_vcpu *vcpu) enter_exception64(vcpu, PSR_MODE_EL1h, except_type_sync); break; + case unpack_vcpu_flag(EXCEPT_AA64_EL1_SERR): + enter_exception64(vcpu, PSR_MODE_EL1h, except_type_serror); + break; + case unpack_vcpu_flag(EXCEPT_AA64_EL2_SYNC): enter_exception64(vcpu, PSR_MODE_EL2h, except_type_sync); break; @@ -347,9 +353,13 @@ static void kvm_inject_exception(struct kvm_vcpu *vcpu) enter_exception64(vcpu, PSR_MODE_EL2h, except_type_irq); break; + case unpack_vcpu_flag(EXCEPT_AA64_EL2_SERR): + enter_exception64(vcpu, PSR_MODE_EL2h, except_type_serror); + break; + default: /* - * Only EL1_SYNC and EL2_{SYNC,IRQ} makes + * Only EL1_{SYNC,SERR} and EL2_{SYNC,IRQ,SERR} makes * sense so far. Everything else gets silently * ignored. */ diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index 2ad57b117385..84ec4e100fbb 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -298,7 +298,7 @@ static inline void __deactivate_cptr_traps(struct kvm_vcpu *vcpu) u64 val; \ \ ctxt_sys_reg(hctxt, reg) = read_sysreg_s(SYS_ ## reg); \ - if (vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu)) \ + if (is_nested_ctxt(vcpu)) \ compute_clr_set(vcpu, reg, c, s); \ \ compute_undef_clr_set(vcpu, kvm, reg, c, s); \ @@ -436,7 +436,7 @@ static inline void __activate_traps_common(struct kvm_vcpu *vcpu) if (cpus_have_final_cap(ARM64_HAS_HCX)) { u64 hcrx = vcpu->arch.hcrx_el2; - if (vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu)) { + if (is_nested_ctxt(vcpu)) { u64 val = __vcpu_sys_reg(vcpu, HCRX_EL2); hcrx |= val & __HCRX_EL2_MASK; hcrx &= ~(~val & __HCRX_EL2_nMASK); @@ -476,21 +476,56 @@ static inline void ___activate_traps(struct kvm_vcpu *vcpu, u64 hcr) write_sysreg_hcr(hcr); - if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN) && (hcr & HCR_VSE)) - write_sysreg_s(vcpu->arch.vsesr_el2, SYS_VSESR_EL2); + if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN) && (hcr & HCR_VSE)) { + u64 vsesr; + + /* + * When HCR_EL2.AMO is set, physical SErrors are taken to EL2 + * and vSError injection is enabled for EL1. Conveniently, for + * NV this means that it is never the case where a 'physical' + * SError (injected by KVM or userspace) and vSError are + * deliverable to the same context. + * + * As such, we can trivially select between the host or guest's + * VSESR_EL2. Except for the case that FEAT_RAS hasn't been + * exposed to the guest, where ESR propagation in hardware + * occurs unconditionally. + * + * Paper over the architectural wart and use an IMPLEMENTATION + * DEFINED ESR value in case FEAT_RAS is hidden from the guest. + */ + if (!vserror_state_is_nested(vcpu)) + vsesr = vcpu->arch.vsesr_el2; + else if (kvm_has_ras(kern_hyp_va(vcpu->kvm))) + vsesr = __vcpu_sys_reg(vcpu, VSESR_EL2); + else + vsesr = ESR_ELx_ISV; + + write_sysreg_s(vsesr, SYS_VSESR_EL2); + } } static inline void ___deactivate_traps(struct kvm_vcpu *vcpu) { + u64 *hcr; + + if (vserror_state_is_nested(vcpu)) + hcr = __ctxt_sys_reg(&vcpu->arch.ctxt, HCR_EL2); + else + hcr = &vcpu->arch.hcr_el2; + /* * If we pended a virtual abort, preserve it until it gets * cleared. See D1.14.3 (Virtual Interrupts) for details, but * the crucial bit is "On taking a vSError interrupt, * HCR_EL2.VSE is cleared to 0." + * + * Additionally, when in a nested context we need to propagate the + * updated state to the guest hypervisor's HCR_EL2. */ - if (vcpu->arch.hcr_el2 & HCR_VSE) { - vcpu->arch.hcr_el2 &= ~HCR_VSE; - vcpu->arch.hcr_el2 |= read_sysreg(hcr_el2) & HCR_VSE; + if (*hcr & HCR_VSE) { + *hcr &= ~HCR_VSE; + *hcr |= read_sysreg(hcr_el2) & HCR_VSE; } } @@ -531,7 +566,7 @@ static inline void __hyp_sve_restore_guest(struct kvm_vcpu *vcpu) * nested guest, as the guest hypervisor could select a smaller VL. Slap * that into hardware before wrapping up. */ - if (vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu)) + if (is_nested_ctxt(vcpu)) sve_cond_update_zcr_vq(__vcpu_sys_reg(vcpu, ZCR_EL2), SYS_ZCR_EL2); write_sysreg_el1(__vcpu_sys_reg(vcpu, vcpu_sve_zcr_elx(vcpu)), SYS_ZCR); @@ -557,7 +592,7 @@ static inline void fpsimd_lazy_switch_to_guest(struct kvm_vcpu *vcpu) if (vcpu_has_sve(vcpu)) { /* A guest hypervisor may restrict the effective max VL. */ - if (vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu)) + if (is_nested_ctxt(vcpu)) zcr_el2 = __vcpu_sys_reg(vcpu, ZCR_EL2); else zcr_el2 = vcpu_sve_max_vq(vcpu) - 1; diff --git a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h index 4d0dbea4c56f..a17cbe7582de 100644 --- a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h +++ b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h @@ -109,6 +109,28 @@ static inline bool ctxt_has_s1poe(struct kvm_cpu_context *ctxt) return kvm_has_s1poe(kern_hyp_va(vcpu->kvm)); } +static inline bool ctxt_has_ras(struct kvm_cpu_context *ctxt) +{ + struct kvm_vcpu *vcpu; + + if (!cpus_have_final_cap(ARM64_HAS_RAS_EXTN)) + return false; + + vcpu = ctxt_to_vcpu(ctxt); + return kvm_has_ras(kern_hyp_va(vcpu->kvm)); +} + +static inline bool ctxt_has_sctlr2(struct kvm_cpu_context *ctxt) +{ + struct kvm_vcpu *vcpu; + + if (!cpus_have_final_cap(ARM64_HAS_SCTLR2)) + return false; + + vcpu = ctxt_to_vcpu(ctxt); + return kvm_has_sctlr2(kern_hyp_va(vcpu->kvm)); +} + static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt) { ctxt_sys_reg(ctxt, SCTLR_EL1) = read_sysreg_el1(SYS_SCTLR); @@ -147,6 +169,9 @@ static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt) ctxt_sys_reg(ctxt, SP_EL1) = read_sysreg(sp_el1); ctxt_sys_reg(ctxt, ELR_EL1) = read_sysreg_el1(SYS_ELR); ctxt_sys_reg(ctxt, SPSR_EL1) = read_sysreg_el1(SYS_SPSR); + + if (ctxt_has_sctlr2(ctxt)) + ctxt_sys_reg(ctxt, SCTLR2_EL1) = read_sysreg_el1(SYS_SCTLR2); } static inline void __sysreg_save_el2_return_state(struct kvm_cpu_context *ctxt) @@ -159,8 +184,13 @@ static inline void __sysreg_save_el2_return_state(struct kvm_cpu_context *ctxt) if (!has_vhe() && ctxt->__hyp_running_vcpu) ctxt->regs.pstate = read_sysreg_el2(SYS_SPSR); - if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN)) + if (!cpus_have_final_cap(ARM64_HAS_RAS_EXTN)) + return; + + if (!vserror_state_is_nested(ctxt_to_vcpu(ctxt))) ctxt_sys_reg(ctxt, DISR_EL1) = read_sysreg_s(SYS_VDISR_EL2); + else if (ctxt_has_ras(ctxt)) + ctxt_sys_reg(ctxt, VDISR_EL2) = read_sysreg_s(SYS_VDISR_EL2); } static inline void __sysreg_restore_common_state(struct kvm_cpu_context *ctxt) @@ -252,6 +282,9 @@ static inline void __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt, write_sysreg(ctxt_sys_reg(ctxt, SP_EL1), sp_el1); write_sysreg_el1(ctxt_sys_reg(ctxt, ELR_EL1), SYS_ELR); write_sysreg_el1(ctxt_sys_reg(ctxt, SPSR_EL1), SYS_SPSR); + + if (ctxt_has_sctlr2(ctxt)) + write_sysreg_el1(ctxt_sys_reg(ctxt, SCTLR2_EL1), SYS_SCTLR2); } /* Read the VCPU state's PSTATE, but translate (v)EL2 to EL1. */ @@ -275,6 +308,7 @@ static inline void __sysreg_restore_el2_return_state(struct kvm_cpu_context *ctx { u64 pstate = to_hw_pstate(ctxt); u64 mode = pstate & PSR_AA32_MODE_MASK; + u64 vdisr; /* * Safety check to ensure we're setting the CPU up to enter the guest @@ -293,8 +327,17 @@ static inline void __sysreg_restore_el2_return_state(struct kvm_cpu_context *ctx write_sysreg_el2(ctxt->regs.pc, SYS_ELR); write_sysreg_el2(pstate, SYS_SPSR); - if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN)) - write_sysreg_s(ctxt_sys_reg(ctxt, DISR_EL1), SYS_VDISR_EL2); + if (!cpus_have_final_cap(ARM64_HAS_RAS_EXTN)) + return; + + if (!vserror_state_is_nested(ctxt_to_vcpu(ctxt))) + vdisr = ctxt_sys_reg(ctxt, DISR_EL1); + else if (ctxt_has_ras(ctxt)) + vdisr = ctxt_sys_reg(ctxt, VDISR_EL2); + else + vdisr = 0; + + write_sysreg_s(vdisr, SYS_VDISR_EL2); } static inline void __sysreg32_save_state(struct kvm_vcpu *vcpu) diff --git a/arch/arm64/kvm/hyp/nvhe/Makefile b/arch/arm64/kvm/hyp/nvhe/Makefile index a76522d63c3e..0b0a68b663d4 100644 --- a/arch/arm64/kvm/hyp/nvhe/Makefile +++ b/arch/arm64/kvm/hyp/nvhe/Makefile @@ -12,7 +12,7 @@ asflags-y := -D__KVM_NVHE_HYPERVISOR__ -D__DISABLE_EXPORTS ccflags-y := -D__KVM_NVHE_HYPERVISOR__ -D__DISABLE_EXPORTS -D__DISABLE_TRACE_MMIO__ ccflags-y += -fno-stack-protector \ -DDISABLE_BRANCH_PROFILING \ - $(DISABLE_STACKLEAK_PLUGIN) + $(DISABLE_KSTACK_ERASE) hostprogs := gen-hyprel HOST_EXTRACFLAGS += -I$(objtree)/include diff --git a/arch/arm64/kvm/hyp/nvhe/debug-sr.c b/arch/arm64/kvm/hyp/nvhe/debug-sr.c index 2f4a4f5036bb..2a1c0f49792b 100644 --- a/arch/arm64/kvm/hyp/nvhe/debug-sr.c +++ b/arch/arm64/kvm/hyp/nvhe/debug-sr.c @@ -92,12 +92,42 @@ static void __trace_switch_to_host(void) *host_data_ptr(host_debug_state.trfcr_el1)); } +static void __debug_save_brbe(u64 *brbcr_el1) +{ + *brbcr_el1 = 0; + + /* Check if the BRBE is enabled */ + if (!(read_sysreg_el1(SYS_BRBCR) & (BRBCR_ELx_E0BRE | BRBCR_ELx_ExBRE))) + return; + + /* + * Prohibit branch record generation while we are in guest. + * Since access to BRBCR_EL1 is trapped, the guest can't + * modify the filtering set by the host. + */ + *brbcr_el1 = read_sysreg_el1(SYS_BRBCR); + write_sysreg_el1(0, SYS_BRBCR); +} + +static void __debug_restore_brbe(u64 brbcr_el1) +{ + if (!brbcr_el1) + return; + + /* Restore BRBE controls */ + write_sysreg_el1(brbcr_el1, SYS_BRBCR); +} + void __debug_save_host_buffers_nvhe(struct kvm_vcpu *vcpu) { /* Disable and flush SPE data generation */ if (host_data_test_flag(HAS_SPE)) __debug_save_spe(host_data_ptr(host_debug_state.pmscr_el1)); + /* Disable BRBE branch records */ + if (host_data_test_flag(HAS_BRBE)) + __debug_save_brbe(host_data_ptr(host_debug_state.brbcr_el1)); + if (__trace_needs_switch()) __trace_switch_to_guest(); } @@ -111,6 +141,8 @@ void __debug_restore_host_buffers_nvhe(struct kvm_vcpu *vcpu) { if (host_data_test_flag(HAS_SPE)) __debug_restore_spe(*host_data_ptr(host_debug_state.pmscr_el1)); + if (host_data_test_flag(HAS_BRBE)) + __debug_restore_brbe(*host_data_ptr(host_debug_state.brbcr_el1)); if (__trace_needs_switch()) __trace_switch_to_host(); } diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index 95d7534c9679..8957734d6183 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -479,6 +479,7 @@ static int host_stage2_adjust_range(u64 addr, struct kvm_mem_range *range) { struct kvm_mem_range cur; kvm_pte_t pte; + u64 granule; s8 level; int ret; @@ -496,18 +497,21 @@ static int host_stage2_adjust_range(u64 addr, struct kvm_mem_range *range) return -EPERM; } - do { - u64 granule = kvm_granule_size(level); + for (; level <= KVM_PGTABLE_LAST_LEVEL; level++) { + if (!kvm_level_supports_block_mapping(level)) + continue; + granule = kvm_granule_size(level); cur.start = ALIGN_DOWN(addr, granule); cur.end = cur.start + granule; - level++; - } while ((level <= KVM_PGTABLE_LAST_LEVEL) && - !(kvm_level_supports_block_mapping(level) && - range_included(&cur, range))); + if (!range_included(&cur, range)) + continue; + *range = cur; + return 0; + } - *range = cur; + WARN_ON(1); - return 0; + return -EINVAL; } int host_stage2_idmap_locked(phys_addr_t addr, u64 size, diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index 0e752b515d0f..ccd575d5f6de 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -272,7 +272,7 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu) * We're about to restore some new MMU state. Make sure * ongoing page-table walks that have started before we * trapped to EL2 have completed. This also synchronises the - * above disabling of SPE and TRBE. + * above disabling of BRBE, SPE and TRBE. * * See DDI0487I.a D8.1.5 "Out-of-context translation regimes", * rule R_LFHQG and subsequent information statements. diff --git a/arch/arm64/kvm/hyp/vgic-v3-sr.c b/arch/arm64/kvm/hyp/vgic-v3-sr.c index f162b0df5cae..d81275790e69 100644 --- a/arch/arm64/kvm/hyp/vgic-v3-sr.c +++ b/arch/arm64/kvm/hyp/vgic-v3-sr.c @@ -296,12 +296,19 @@ void __vgic_v3_activate_traps(struct vgic_v3_cpu_if *cpu_if) } /* - * Prevent the guest from touching the ICC_SRE_EL1 system - * register. Note that this may not have any effect, as - * ICC_SRE_EL2.Enable being RAO/WI is a valid implementation. + * GICv5 BET0 FEAT_GCIE_LEGACY doesn't include ICC_SRE_EL2. This is due + * to be relaxed in a future spec release, at which point this in + * condition can be dropped. */ - write_gicreg(read_gicreg(ICC_SRE_EL2) & ~ICC_SRE_EL2_ENABLE, - ICC_SRE_EL2); + if (!cpus_have_final_cap(ARM64_HAS_GICV5_CPUIF)) { + /* + * Prevent the guest from touching the ICC_SRE_EL1 system + * register. Note that this may not have any effect, as + * ICC_SRE_EL2.Enable being RAO/WI is a valid implementation. + */ + write_gicreg(read_gicreg(ICC_SRE_EL2) & ~ICC_SRE_EL2_ENABLE, + ICC_SRE_EL2); + } /* * If we need to trap system registers, we must write @@ -322,8 +329,14 @@ void __vgic_v3_deactivate_traps(struct vgic_v3_cpu_if *cpu_if) cpu_if->vgic_vmcr = read_gicreg(ICH_VMCR_EL2); } - val = read_gicreg(ICC_SRE_EL2); - write_gicreg(val | ICC_SRE_EL2_ENABLE, ICC_SRE_EL2); + /* + * Can be dropped in the future when GICv5 spec is relaxed. See comment + * above. + */ + if (!cpus_have_final_cap(ARM64_HAS_GICV5_CPUIF)) { + val = read_gicreg(ICC_SRE_EL2); + write_gicreg(val | ICC_SRE_EL2_ENABLE, ICC_SRE_EL2); + } if (!cpu_if->vgic_sre) { /* Make sure ENABLE is set at EL2 before setting SRE at EL1 */ @@ -423,10 +436,20 @@ void __vgic_v3_init_lrs(void) */ u64 __vgic_v3_get_gic_config(void) { - u64 val, sre = read_gicreg(ICC_SRE_EL1); + u64 val, sre; unsigned long flags = 0; /* + * In compat mode, we cannot access ICC_SRE_EL1 at any EL + * other than EL1 itself; just return the + * ICH_VTR_EL2. ICC_IDR0_EL1 is only implemented on a GICv5 + * system, so we first check if we have GICv5 support. + */ + if (cpus_have_final_cap(ARM64_HAS_GICV5_CPUIF)) + return read_gicreg(ICH_VTR_EL2); + + sre = read_gicreg(ICC_SRE_EL1); + /* * To check whether we have a MMIO-based (GICv2 compatible) * CPU interface, we need to disable the system register * view. @@ -471,6 +494,16 @@ u64 __vgic_v3_get_gic_config(void) return val; } +static void __vgic_v3_compat_mode_enable(void) +{ + if (!cpus_have_final_cap(ARM64_HAS_GICV5_CPUIF)) + return; + + sysreg_clear_set_s(SYS_ICH_VCTLR_EL2, 0, ICH_VCTLR_EL2_V3); + /* Wait for V3 to become enabled */ + isb(); +} + static u64 __vgic_v3_read_vmcr(void) { return read_gicreg(ICH_VMCR_EL2); @@ -490,6 +523,8 @@ void __vgic_v3_save_vmcr_aprs(struct vgic_v3_cpu_if *cpu_if) void __vgic_v3_restore_vmcr_aprs(struct vgic_v3_cpu_if *cpu_if) { + __vgic_v3_compat_mode_enable(); + /* * If dealing with a GICv2 emulation on GICv3, VMCR_EL2.VFIQen * is dependent on ICC_SRE_EL1.SRE, and we have to perform the @@ -1050,7 +1085,7 @@ static bool __vgic_v3_check_trap_forwarding(struct kvm_vcpu *vcpu, { u64 ich_hcr; - if (!vcpu_has_nv(vcpu) || is_hyp_ctxt(vcpu)) + if (!is_nested_ctxt(vcpu)) return false; ich_hcr = __vcpu_sys_reg(vcpu, ICH_HCR_EL2); diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c index 477f1580ffea..e482181c6632 100644 --- a/arch/arm64/kvm/hyp/vhe/switch.c +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -48,8 +48,7 @@ DEFINE_PER_CPU(unsigned long, kvm_hyp_vector); static u64 __compute_hcr(struct kvm_vcpu *vcpu) { - u64 guest_hcr = __vcpu_sys_reg(vcpu, HCR_EL2); - u64 hcr = vcpu->arch.hcr_el2; + u64 guest_hcr, hcr = vcpu->arch.hcr_el2; if (!vcpu_has_nv(vcpu)) return hcr; @@ -68,10 +67,21 @@ static u64 __compute_hcr(struct kvm_vcpu *vcpu) if (!vcpu_el2_e2h_is_set(vcpu)) hcr |= HCR_NV1; + /* + * Nothing in HCR_EL2 should impact running in hypervisor + * context, apart from bits we have defined as RESx (E2H, + * HCD and co), or that cannot be set directly (the EXCLUDE + * bits). Given that we OR the guest's view with the host's, + * we can use the 0 value as the starting point, and only + * use the config-driven RES1 bits. + */ + guest_hcr = kvm_vcpu_apply_reg_masks(vcpu, HCR_EL2, 0); + write_sysreg_s(vcpu->arch.ctxt.vncr_array, SYS_VNCR_EL2); } else { host_data_clear_flag(VCPU_IN_HYP_CONTEXT); + guest_hcr = __vcpu_sys_reg(vcpu, HCR_EL2); if (guest_hcr & HCR_NV) { u64 va = __fix_to_virt(vncr_fixmap(smp_processor_id())); diff --git a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c index 73e4bc7fde9e..f28c6cf4fe1b 100644 --- a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c +++ b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c @@ -77,6 +77,9 @@ static void __sysreg_save_vel2_state(struct kvm_vcpu *vcpu) __vcpu_assign_sys_reg(vcpu, SP_EL2, read_sysreg(sp_el1)); __vcpu_assign_sys_reg(vcpu, ELR_EL2, read_sysreg_el1(SYS_ELR)); __vcpu_assign_sys_reg(vcpu, SPSR_EL2, read_sysreg_el1(SYS_SPSR)); + + if (ctxt_has_sctlr2(&vcpu->arch.ctxt)) + __vcpu_assign_sys_reg(vcpu, SCTLR2_EL2, read_sysreg_el1(SYS_SCTLR2)); } static void __sysreg_restore_vel2_state(struct kvm_vcpu *vcpu) @@ -139,6 +142,9 @@ static void __sysreg_restore_vel2_state(struct kvm_vcpu *vcpu) write_sysreg(__vcpu_sys_reg(vcpu, SP_EL2), sp_el1); write_sysreg_el1(__vcpu_sys_reg(vcpu, ELR_EL2), SYS_ELR); write_sysreg_el1(__vcpu_sys_reg(vcpu, SPSR_EL2), SYS_SPSR); + + if (ctxt_has_sctlr2(&vcpu->arch.ctxt)) + write_sysreg_el1(__vcpu_sys_reg(vcpu, SCTLR2_EL2), SYS_SCTLR2); } /* diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c index a640e839848e..6745f38b64f9 100644 --- a/arch/arm64/kvm/inject_fault.c +++ b/arch/arm64/kvm/inject_fault.c @@ -15,13 +15,11 @@ #include <asm/kvm_nested.h> #include <asm/esr.h> -static void pend_sync_exception(struct kvm_vcpu *vcpu) +static unsigned int exception_target_el(struct kvm_vcpu *vcpu) { /* If not nesting, EL1 is the only possible exception target */ - if (likely(!vcpu_has_nv(vcpu))) { - kvm_pend_exception(vcpu, EXCEPT_AA64_EL1_SYNC); - return; - } + if (likely(!vcpu_has_nv(vcpu))) + return PSR_MODE_EL1h; /* * With NV, we need to pick between EL1 and EL2. Note that we @@ -32,26 +30,76 @@ static void pend_sync_exception(struct kvm_vcpu *vcpu) switch(*vcpu_cpsr(vcpu) & PSR_MODE_MASK) { case PSR_MODE_EL2h: case PSR_MODE_EL2t: - kvm_pend_exception(vcpu, EXCEPT_AA64_EL2_SYNC); - break; + return PSR_MODE_EL2h; case PSR_MODE_EL1h: case PSR_MODE_EL1t: - kvm_pend_exception(vcpu, EXCEPT_AA64_EL1_SYNC); - break; + return PSR_MODE_EL1h; case PSR_MODE_EL0t: - if (vcpu_el2_tge_is_set(vcpu)) - kvm_pend_exception(vcpu, EXCEPT_AA64_EL2_SYNC); - else - kvm_pend_exception(vcpu, EXCEPT_AA64_EL1_SYNC); - break; + return vcpu_el2_tge_is_set(vcpu) ? PSR_MODE_EL2h : PSR_MODE_EL1h; default: BUG(); } } -static bool match_target_el(struct kvm_vcpu *vcpu, unsigned long target) +static enum vcpu_sysreg exception_esr_elx(struct kvm_vcpu *vcpu) +{ + if (exception_target_el(vcpu) == PSR_MODE_EL2h) + return ESR_EL2; + + return ESR_EL1; +} + +static enum vcpu_sysreg exception_far_elx(struct kvm_vcpu *vcpu) +{ + if (exception_target_el(vcpu) == PSR_MODE_EL2h) + return FAR_EL2; + + return FAR_EL1; +} + +static void pend_sync_exception(struct kvm_vcpu *vcpu) +{ + if (exception_target_el(vcpu) == PSR_MODE_EL1h) + kvm_pend_exception(vcpu, EXCEPT_AA64_EL1_SYNC); + else + kvm_pend_exception(vcpu, EXCEPT_AA64_EL2_SYNC); +} + +static void pend_serror_exception(struct kvm_vcpu *vcpu) { - return (vcpu_get_flag(vcpu, EXCEPT_MASK) == target); + if (exception_target_el(vcpu) == PSR_MODE_EL1h) + kvm_pend_exception(vcpu, EXCEPT_AA64_EL1_SERR); + else + kvm_pend_exception(vcpu, EXCEPT_AA64_EL2_SERR); +} + +static bool __effective_sctlr2_bit(struct kvm_vcpu *vcpu, unsigned int idx) +{ + u64 sctlr2; + + if (!kvm_has_sctlr2(vcpu->kvm)) + return false; + + if (is_nested_ctxt(vcpu) && + !(__vcpu_sys_reg(vcpu, HCRX_EL2) & HCRX_EL2_SCTLR2En)) + return false; + + if (exception_target_el(vcpu) == PSR_MODE_EL1h) + sctlr2 = vcpu_read_sys_reg(vcpu, SCTLR2_EL1); + else + sctlr2 = vcpu_read_sys_reg(vcpu, SCTLR2_EL2); + + return sctlr2 & BIT(idx); +} + +static bool effective_sctlr2_ease(struct kvm_vcpu *vcpu) +{ + return __effective_sctlr2_bit(vcpu, SCTLR2_EL1_EASE_SHIFT); +} + +static bool effective_sctlr2_nmea(struct kvm_vcpu *vcpu) +{ + return __effective_sctlr2_bit(vcpu, SCTLR2_EL1_NMEA_SHIFT); } static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr) @@ -60,7 +108,11 @@ static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr bool is_aarch32 = vcpu_mode_is_32bit(vcpu); u64 esr = 0; - pend_sync_exception(vcpu); + /* This delight is brought to you by FEAT_DoubleFault2. */ + if (effective_sctlr2_ease(vcpu)) + pend_serror_exception(vcpu); + else + pend_sync_exception(vcpu); /* * Build an {i,d}abort, depending on the level and the @@ -83,13 +135,8 @@ static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr esr |= ESR_ELx_FSC_EXTABT; - if (match_target_el(vcpu, unpack_vcpu_flag(EXCEPT_AA64_EL1_SYNC))) { - vcpu_write_sys_reg(vcpu, addr, FAR_EL1); - vcpu_write_sys_reg(vcpu, esr, ESR_EL1); - } else { - vcpu_write_sys_reg(vcpu, addr, FAR_EL2); - vcpu_write_sys_reg(vcpu, esr, ESR_EL2); - } + vcpu_write_sys_reg(vcpu, addr, exception_far_elx(vcpu)); + vcpu_write_sys_reg(vcpu, esr, exception_esr_elx(vcpu)); } static void inject_undef64(struct kvm_vcpu *vcpu) @@ -105,10 +152,7 @@ static void inject_undef64(struct kvm_vcpu *vcpu) if (kvm_vcpu_trap_il_is32bit(vcpu)) esr |= ESR_ELx_IL; - if (match_target_el(vcpu, unpack_vcpu_flag(EXCEPT_AA64_EL1_SYNC))) - vcpu_write_sys_reg(vcpu, esr, ESR_EL1); - else - vcpu_write_sys_reg(vcpu, esr, ESR_EL2); + vcpu_write_sys_reg(vcpu, esr, exception_esr_elx(vcpu)); } #define DFSR_FSC_EXTABT_LPAE 0x10 @@ -155,36 +199,35 @@ static void inject_abt32(struct kvm_vcpu *vcpu, bool is_pabt, u32 addr) vcpu_write_sys_reg(vcpu, far, FAR_EL1); } -/** - * kvm_inject_dabt - inject a data abort into the guest - * @vcpu: The VCPU to receive the data abort - * @addr: The address to report in the DFAR - * - * It is assumed that this code is called from the VCPU thread and that the - * VCPU therefore is not currently executing guest code. - */ -void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr) +static void __kvm_inject_sea(struct kvm_vcpu *vcpu, bool iabt, u64 addr) { if (vcpu_el1_is_32bit(vcpu)) - inject_abt32(vcpu, false, addr); + inject_abt32(vcpu, iabt, addr); else - inject_abt64(vcpu, false, addr); + inject_abt64(vcpu, iabt, addr); } -/** - * kvm_inject_pabt - inject a prefetch abort into the guest - * @vcpu: The VCPU to receive the prefetch abort - * @addr: The address to report in the DFAR - * - * It is assumed that this code is called from the VCPU thread and that the - * VCPU therefore is not currently executing guest code. - */ -void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr) +static bool kvm_sea_target_is_el2(struct kvm_vcpu *vcpu) { - if (vcpu_el1_is_32bit(vcpu)) - inject_abt32(vcpu, true, addr); - else - inject_abt64(vcpu, true, addr); + if (__vcpu_sys_reg(vcpu, HCR_EL2) & (HCR_TGE | HCR_TEA)) + return true; + + if (!vcpu_mode_priv(vcpu)) + return false; + + return (*vcpu_cpsr(vcpu) & PSR_A_BIT) && + (__vcpu_sys_reg(vcpu, HCRX_EL2) & HCRX_EL2_TMEA); +} + +int kvm_inject_sea(struct kvm_vcpu *vcpu, bool iabt, u64 addr) +{ + lockdep_assert_held(&vcpu->mutex); + + if (is_nested_ctxt(vcpu) && kvm_sea_target_is_el2(vcpu)) + return kvm_inject_nested_sea(vcpu, iabt, addr); + + __kvm_inject_sea(vcpu, iabt, addr); + return 1; } void kvm_inject_size_fault(struct kvm_vcpu *vcpu) @@ -194,10 +237,7 @@ void kvm_inject_size_fault(struct kvm_vcpu *vcpu) addr = kvm_vcpu_get_fault_ipa(vcpu); addr |= kvm_vcpu_get_hfar(vcpu) & GENMASK(11, 0); - if (kvm_vcpu_trap_is_iabt(vcpu)) - kvm_inject_pabt(vcpu, addr); - else - kvm_inject_dabt(vcpu, addr); + __kvm_inject_sea(vcpu, kvm_vcpu_trap_is_iabt(vcpu), addr); /* * If AArch64 or LPAE, set FSC to 0 to indicate an Address @@ -210,9 +250,9 @@ void kvm_inject_size_fault(struct kvm_vcpu *vcpu) !(vcpu_read_sys_reg(vcpu, TCR_EL1) & TTBCR_EAE)) return; - esr = vcpu_read_sys_reg(vcpu, ESR_EL1); + esr = vcpu_read_sys_reg(vcpu, exception_esr_elx(vcpu)); esr &= ~GENMASK_ULL(5, 0); - vcpu_write_sys_reg(vcpu, esr, ESR_EL1); + vcpu_write_sys_reg(vcpu, esr, exception_esr_elx(vcpu)); } /** @@ -230,25 +270,70 @@ void kvm_inject_undefined(struct kvm_vcpu *vcpu) inject_undef64(vcpu); } -void kvm_set_sei_esr(struct kvm_vcpu *vcpu, u64 esr) +static bool serror_is_masked(struct kvm_vcpu *vcpu) { - vcpu_set_vsesr(vcpu, esr & ESR_ELx_ISS_MASK); - *vcpu_hcr(vcpu) |= HCR_VSE; + return (*vcpu_cpsr(vcpu) & PSR_A_BIT) && !effective_sctlr2_nmea(vcpu); } -/** - * kvm_inject_vabt - inject an async abort / SError into the guest - * @vcpu: The VCPU to receive the exception - * - * It is assumed that this code is called from the VCPU thread and that the - * VCPU therefore is not currently executing guest code. - * - * Systems with the RAS Extensions specify an imp-def ESR (ISV/IDS = 1) with - * the remaining ISS all-zeros so that this error is not interpreted as an - * uncategorized RAS error. Without the RAS Extensions we can't specify an ESR - * value, so the CPU generates an imp-def value. - */ -void kvm_inject_vabt(struct kvm_vcpu *vcpu) +static bool kvm_serror_target_is_el2(struct kvm_vcpu *vcpu) +{ + if (is_hyp_ctxt(vcpu) || vcpu_el2_amo_is_set(vcpu)) + return true; + + if (!(__vcpu_sys_reg(vcpu, HCRX_EL2) & HCRX_EL2_TMEA)) + return false; + + /* + * In another example where FEAT_DoubleFault2 is entirely backwards, + * "masked" as it relates to the routing effects of HCRX_EL2.TMEA + * doesn't consider SCTLR2_EL1.NMEA. That is to say, even if EL1 asked + * for non-maskable SErrors, the EL2 bit takes priority if A is set. + */ + if (vcpu_mode_priv(vcpu)) + return *vcpu_cpsr(vcpu) & PSR_A_BIT; + + /* + * Otherwise SErrors are considered unmasked when taken from EL0 and + * NMEA is set. + */ + return serror_is_masked(vcpu); +} + +static bool kvm_serror_undeliverable_at_el2(struct kvm_vcpu *vcpu) +{ + return !(vcpu_el2_tge_is_set(vcpu) || vcpu_el2_amo_is_set(vcpu)); +} + +int kvm_inject_serror_esr(struct kvm_vcpu *vcpu, u64 esr) { - kvm_set_sei_esr(vcpu, ESR_ELx_ISV); + lockdep_assert_held(&vcpu->mutex); + + if (is_nested_ctxt(vcpu) && kvm_serror_target_is_el2(vcpu)) + return kvm_inject_nested_serror(vcpu, esr); + + if (vcpu_is_el2(vcpu) && kvm_serror_undeliverable_at_el2(vcpu)) { + vcpu_set_vsesr(vcpu, esr); + vcpu_set_flag(vcpu, NESTED_SERROR_PENDING); + return 1; + } + + /* + * Emulate the exception entry if SErrors are unmasked. This is useful if + * the vCPU is in a nested context w/ vSErrors enabled then we've already + * delegated he hardware vSError context (i.e. HCR_EL2.VSE, VSESR_EL2, + * VDISR_EL2) to the guest hypervisor. + * + * As we're emulating the SError injection we need to explicitly populate + * ESR_ELx.EC because hardware will not do it on our behalf. + */ + if (!serror_is_masked(vcpu)) { + pend_serror_exception(vcpu); + esr |= FIELD_PREP(ESR_ELx_EC_MASK, ESR_ELx_EC_SERROR); + vcpu_write_sys_reg(vcpu, esr, exception_esr_elx(vcpu)); + return 1; + } + + vcpu_set_vsesr(vcpu, esr & ESR_ELx_ISS_MASK); + *vcpu_hcr(vcpu) |= HCR_VSE; + return 1; } diff --git a/arch/arm64/kvm/mmio.c b/arch/arm64/kvm/mmio.c index ab365e839874..54f9358c9e0e 100644 --- a/arch/arm64/kvm/mmio.c +++ b/arch/arm64/kvm/mmio.c @@ -72,7 +72,7 @@ unsigned long kvm_mmio_read_buf(const void *buf, unsigned int len) return data; } -static bool kvm_pending_sync_exception(struct kvm_vcpu *vcpu) +static bool kvm_pending_external_abort(struct kvm_vcpu *vcpu) { if (!vcpu_get_flag(vcpu, PENDING_EXCEPTION)) return false; @@ -90,6 +90,8 @@ static bool kvm_pending_sync_exception(struct kvm_vcpu *vcpu) switch (vcpu_get_flag(vcpu, EXCEPT_MASK)) { case unpack_vcpu_flag(EXCEPT_AA64_EL1_SYNC): case unpack_vcpu_flag(EXCEPT_AA64_EL2_SYNC): + case unpack_vcpu_flag(EXCEPT_AA64_EL1_SERR): + case unpack_vcpu_flag(EXCEPT_AA64_EL2_SERR): return true; default: return false; @@ -113,7 +115,7 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu) * Detect if the MMIO return was already handled or if userspace aborted * the MMIO access. */ - if (unlikely(!vcpu->mmio_needed || kvm_pending_sync_exception(vcpu))) + if (unlikely(!vcpu->mmio_needed || kvm_pending_external_abort(vcpu))) return 1; vcpu->mmio_needed = 0; @@ -169,10 +171,8 @@ int io_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa) trace_kvm_mmio_nisv(*vcpu_pc(vcpu), kvm_vcpu_get_esr(vcpu), kvm_vcpu_get_hfar(vcpu), fault_ipa); - if (vcpu_is_protected(vcpu)) { - kvm_inject_dabt(vcpu, kvm_vcpu_get_hfar(vcpu)); - return 1; - } + if (vcpu_is_protected(vcpu)) + return kvm_inject_sea_dabt(vcpu, kvm_vcpu_get_hfar(vcpu)); if (test_bit(KVM_ARCH_FLAG_RETURN_NISV_IO_ABORT_TO_USER, &vcpu->kvm->arch.flags)) { diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 2942ec92c5a4..1c78864767c5 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -193,11 +193,6 @@ int kvm_arch_flush_remote_tlbs_range(struct kvm *kvm, return 0; } -static bool kvm_is_device_pfn(unsigned long pfn) -{ - return !pfn_is_map_memory(pfn); -} - static void *stage2_memcache_zalloc_page(void *arg) { struct kvm_mmu_memory_cache *mc = arg; @@ -1470,6 +1465,18 @@ static bool kvm_vma_mte_allowed(struct vm_area_struct *vma) return vma->vm_flags & VM_MTE_ALLOWED; } +static bool kvm_vma_is_cacheable(struct vm_area_struct *vma) +{ + switch (FIELD_GET(PTE_ATTRINDX_MASK, pgprot_val(vma->vm_page_prot))) { + case MT_NORMAL_NC: + case MT_DEVICE_nGnRnE: + case MT_DEVICE_nGnRE: + return false; + default: + return true; + } +} + static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, struct kvm_s2_trans *nested, struct kvm_memory_slot *memslot, unsigned long hva, @@ -1477,8 +1484,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, { int ret = 0; bool write_fault, writable, force_pte = false; - bool exec_fault, mte_allowed; - bool device = false, vfio_allow_any_uc = false; + bool exec_fault, mte_allowed, is_vma_cacheable; + bool s2_force_noncacheable = false, vfio_allow_any_uc = false; unsigned long mmu_seq; phys_addr_t ipa = fault_ipa; struct kvm *kvm = vcpu->kvm; @@ -1492,6 +1499,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R; struct kvm_pgtable *pgt; struct page *page; + vm_flags_t vm_flags; enum kvm_pgtable_walk_flags flags = KVM_PGTABLE_WALK_HANDLE_FAULT | KVM_PGTABLE_WALK_SHARED; if (fault_is_perm) @@ -1619,6 +1627,10 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, vfio_allow_any_uc = vma->vm_flags & VM_ALLOW_ANY_UNCACHED; + vm_flags = vma->vm_flags; + + is_vma_cacheable = kvm_vma_is_cacheable(vma); + /* Don't use the VMA after the unlock -- it may have vanished */ vma = NULL; @@ -1642,18 +1654,39 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, if (is_error_noslot_pfn(pfn)) return -EFAULT; - if (kvm_is_device_pfn(pfn)) { - /* - * If the page was identified as device early by looking at - * the VMA flags, vma_pagesize is already representing the - * largest quantity we can map. If instead it was mapped - * via __kvm_faultin_pfn(), vma_pagesize is set to PAGE_SIZE - * and must not be upgraded. - * - * In both cases, we don't let transparent_hugepage_adjust() - * change things at the last minute. - */ - device = true; + /* + * Check if this is non-struct page memory PFN, and cannot support + * CMOs. It could potentially be unsafe to access as cachable. + */ + if (vm_flags & (VM_PFNMAP | VM_MIXEDMAP) && !pfn_is_map_memory(pfn)) { + if (is_vma_cacheable) { + /* + * Whilst the VMA owner expects cacheable mapping to this + * PFN, hardware also has to support the FWB and CACHE DIC + * features. + * + * ARM64 KVM relies on kernel VA mapping to the PFN to + * perform cache maintenance as the CMO instructions work on + * virtual addresses. VM_PFNMAP region are not necessarily + * mapped to a KVA and hence the presence of hardware features + * S2FWB and CACHE DIC are mandatory to avoid the need for + * cache maintenance. + */ + if (!kvm_supports_cacheable_pfnmap()) + return -EFAULT; + } else { + /* + * If the page was identified as device early by looking at + * the VMA flags, vma_pagesize is already representing the + * largest quantity we can map. If instead it was mapped + * via __kvm_faultin_pfn(), vma_pagesize is set to PAGE_SIZE + * and must not be upgraded. + * + * In both cases, we don't let transparent_hugepage_adjust() + * change things at the last minute. + */ + s2_force_noncacheable = true; + } } else if (logging_active && !write_fault) { /* * Only actually map the page as writable if this was a write @@ -1662,7 +1695,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, writable = false; } - if (exec_fault && device) + if (exec_fault && s2_force_noncacheable) return -ENOEXEC; /* @@ -1695,7 +1728,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, * If we are not forced to use page mapping, check if we are * backed by a THP and thus use block mapping if possible. */ - if (vma_pagesize == PAGE_SIZE && !(force_pte || device)) { + if (vma_pagesize == PAGE_SIZE && !(force_pte || s2_force_noncacheable)) { if (fault_is_perm && fault_granule > PAGE_SIZE) vma_pagesize = fault_granule; else @@ -1709,7 +1742,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, } } - if (!fault_is_perm && !device && kvm_has_mte(kvm)) { + if (!fault_is_perm && !s2_force_noncacheable && kvm_has_mte(kvm)) { /* Check the VMM hasn't introduced a new disallowed VMA */ if (mte_allowed) { sanitise_mte_tags(kvm, pfn, vma_pagesize); @@ -1725,7 +1758,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, if (exec_fault) prot |= KVM_PGTABLE_PROT_X; - if (device) { + if (s2_force_noncacheable) { if (vfio_allow_any_uc) prot |= KVM_PGTABLE_PROT_NORMAL_NC; else @@ -1808,7 +1841,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu) * There is no need to pass the error into the guest. */ if (kvm_handle_guest_sea()) - kvm_inject_vabt(vcpu); + return kvm_inject_serror(vcpu); return 1; } @@ -1836,11 +1869,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu) if (fault_ipa >= BIT_ULL(VTCR_EL2_IPA(vcpu->arch.hw_mmu->vtcr))) { fault_ipa |= kvm_vcpu_get_hfar(vcpu) & GENMASK(11, 0); - if (is_iabt) - kvm_inject_pabt(vcpu, fault_ipa); - else - kvm_inject_dabt(vcpu, fault_ipa); - return 1; + return kvm_inject_sea(vcpu, is_iabt, fault_ipa); } } @@ -1912,8 +1941,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu) } if (kvm_vcpu_abt_iss1tw(vcpu)) { - kvm_inject_dabt(vcpu, kvm_vcpu_get_hfar(vcpu)); - ret = 1; + ret = kvm_inject_sea_dabt(vcpu, kvm_vcpu_get_hfar(vcpu)); goto out_unlock; } @@ -1958,10 +1986,8 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu) if (ret == 0) ret = 1; out: - if (ret == -ENOEXEC) { - kvm_inject_pabt(vcpu, kvm_vcpu_get_hfar(vcpu)); - ret = 1; - } + if (ret == -ENOEXEC) + ret = kvm_inject_sea_iabt(vcpu, kvm_vcpu_get_hfar(vcpu)); out_unlock: srcu_read_unlock(&vcpu->kvm->srcu, idx); return ret; @@ -2221,6 +2247,15 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, ret = -EINVAL; break; } + + /* + * Cacheable PFNMAP is allowed only if the hardware + * supports it. + */ + if (kvm_vma_is_cacheable(vma) && !kvm_supports_cacheable_pfnmap()) { + ret = -EINVAL; + break; + } } hva = min(reg_end, vma->vm_end); } while (hva < reg_end); diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c index 5b191f4dc566..153b3e11b115 100644 --- a/arch/arm64/kvm/nested.c +++ b/arch/arm64/kvm/nested.c @@ -1402,6 +1402,21 @@ static void kvm_map_l1_vncr(struct kvm_vcpu *vcpu) } } +#define has_tgran_2(__r, __sz) \ + ({ \ + u64 _s1, _s2, _mmfr0 = __r; \ + \ + _s2 = SYS_FIELD_GET(ID_AA64MMFR0_EL1, \ + TGRAN##__sz##_2, _mmfr0); \ + \ + _s1 = SYS_FIELD_GET(ID_AA64MMFR0_EL1, \ + TGRAN##__sz, _mmfr0); \ + \ + ((_s2 != ID_AA64MMFR0_EL1_TGRAN##__sz##_2_NI && \ + _s2 != ID_AA64MMFR0_EL1_TGRAN##__sz##_2_TGRAN##__sz) || \ + (_s2 == ID_AA64MMFR0_EL1_TGRAN##__sz##_2_TGRAN##__sz && \ + _s1 != ID_AA64MMFR0_EL1_TGRAN##__sz##_NI)); \ + }) /* * Our emulated CPU doesn't support all the possible features. For the * sake of simplicity (and probably mental sanity), wipe out a number @@ -1411,6 +1426,8 @@ static void kvm_map_l1_vncr(struct kvm_vcpu *vcpu) */ u64 limit_nv_id_reg(struct kvm *kvm, u32 reg, u64 val) { + u64 orig_val = val; + switch (reg) { case SYS_ID_AA64ISAR0_EL1: /* Support everything but TME */ @@ -1424,12 +1441,11 @@ u64 limit_nv_id_reg(struct kvm *kvm, u32 reg, u64 val) break; case SYS_ID_AA64PFR0_EL1: - /* No RME, AMU, MPAM, S-EL2, or RAS */ + /* No RME, AMU, MPAM, or S-EL2 */ val &= ~(ID_AA64PFR0_EL1_RME | ID_AA64PFR0_EL1_AMU | ID_AA64PFR0_EL1_MPAM | ID_AA64PFR0_EL1_SEL2 | - ID_AA64PFR0_EL1_RAS | ID_AA64PFR0_EL1_EL3 | ID_AA64PFR0_EL1_EL2 | ID_AA64PFR0_EL1_EL1 | @@ -1480,13 +1496,16 @@ u64 limit_nv_id_reg(struct kvm *kvm, u32 reg, u64 val) */ switch (PAGE_SIZE) { case SZ_4K: - val |= SYS_FIELD_PREP_ENUM(ID_AA64MMFR0_EL1, TGRAN4_2, IMP); + if (has_tgran_2(orig_val, 4)) + val |= SYS_FIELD_PREP_ENUM(ID_AA64MMFR0_EL1, TGRAN4_2, IMP); fallthrough; case SZ_16K: - val |= SYS_FIELD_PREP_ENUM(ID_AA64MMFR0_EL1, TGRAN16_2, IMP); + if (has_tgran_2(orig_val, 16)) + val |= SYS_FIELD_PREP_ENUM(ID_AA64MMFR0_EL1, TGRAN16_2, IMP); fallthrough; case SZ_64K: - val |= SYS_FIELD_PREP_ENUM(ID_AA64MMFR0_EL1, TGRAN64_2, IMP); + if (has_tgran_2(orig_val, 64)) + val |= SYS_FIELD_PREP_ENUM(ID_AA64MMFR0_EL1, TGRAN64_2, IMP); break; } @@ -1663,69 +1682,21 @@ int kvm_init_nv_sysregs(struct kvm_vcpu *vcpu) set_sysreg_masks(kvm, HFGITR2_EL2, res0, res1); /* TCR2_EL2 */ - res0 = TCR2_EL2_RES0; - res1 = TCR2_EL2_RES1; - if (!kvm_has_feat(kvm, ID_AA64MMFR3_EL1, D128, IMP)) - res0 |= (TCR2_EL2_DisCH0 | TCR2_EL2_DisCH1 | TCR2_EL2_D128); - if (!kvm_has_feat(kvm, ID_AA64MMFR3_EL1, MEC, IMP)) - res0 |= TCR2_EL2_AMEC1 | TCR2_EL2_AMEC0; - if (!kvm_has_feat(kvm, ID_AA64MMFR1_EL1, HAFDBS, HAFT)) - res0 |= TCR2_EL2_HAFT; - if (!kvm_has_feat(kvm, ID_AA64PFR1_EL1, THE, IMP)) - res0 |= TCR2_EL2_PTTWI | TCR2_EL2_PnCH; - if (!kvm_has_feat(kvm, ID_AA64MMFR3_EL1, AIE, IMP)) - res0 |= TCR2_EL2_AIE; - if (!kvm_has_s1poe(kvm)) - res0 |= TCR2_EL2_POE | TCR2_EL2_E0POE; - if (!kvm_has_s1pie(kvm)) - res0 |= TCR2_EL2_PIE; - if (!kvm_has_feat(kvm, ID_AA64MMFR1_EL1, VH, IMP)) - res0 |= (TCR2_EL2_E0POE | TCR2_EL2_D128 | - TCR2_EL2_AMEC1 | TCR2_EL2_DisCH0 | TCR2_EL2_DisCH1); + get_reg_fixed_bits(kvm, TCR2_EL2, &res0, &res1); set_sysreg_masks(kvm, TCR2_EL2, res0, res1); /* SCTLR_EL1 */ - res0 = SCTLR_EL1_RES0; - res1 = SCTLR_EL1_RES1; - if (!kvm_has_feat(kvm, ID_AA64MMFR1_EL1, PAN, PAN3)) - res0 |= SCTLR_EL1_EPAN; + get_reg_fixed_bits(kvm, SCTLR_EL1, &res0, &res1); set_sysreg_masks(kvm, SCTLR_EL1, res0, res1); + /* SCTLR2_ELx */ + get_reg_fixed_bits(kvm, SCTLR2_EL1, &res0, &res1); + set_sysreg_masks(kvm, SCTLR2_EL1, res0, res1); + get_reg_fixed_bits(kvm, SCTLR2_EL2, &res0, &res1); + set_sysreg_masks(kvm, SCTLR2_EL2, res0, res1); + /* MDCR_EL2 */ - res0 = MDCR_EL2_RES0; - res1 = MDCR_EL2_RES1; - if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, PMUVer, IMP)) - res0 |= (MDCR_EL2_HPMN | MDCR_EL2_TPMCR | - MDCR_EL2_TPM | MDCR_EL2_HPME); - if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, PMSVer, IMP)) - res0 |= MDCR_EL2_E2PB | MDCR_EL2_TPMS; - if (!kvm_has_feat(kvm, ID_AA64DFR1_EL1, SPMU, IMP)) - res0 |= MDCR_EL2_EnSPM; - if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, PMUVer, V3P1)) - res0 |= MDCR_EL2_HPMD; - if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, TraceFilt, IMP)) - res0 |= MDCR_EL2_TTRF; - if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, PMUVer, V3P5)) - res0 |= MDCR_EL2_HCCD | MDCR_EL2_HLP; - if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, TraceBuffer, IMP)) - res0 |= MDCR_EL2_E2TB; - if (!kvm_has_feat(kvm, ID_AA64MMFR0_EL1, FGT, IMP)) - res0 |= MDCR_EL2_TDCC; - if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, MTPMU, IMP) || - kvm_has_feat(kvm, ID_AA64PFR0_EL1, EL3, IMP)) - res0 |= MDCR_EL2_MTPME; - if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, PMUVer, V3P7)) - res0 |= MDCR_EL2_HPMFZO; - if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, PMSS, IMP)) - res0 |= MDCR_EL2_PMSSE; - if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, PMSVer, V1P2)) - res0 |= MDCR_EL2_HPMFZS; - if (!kvm_has_feat(kvm, ID_AA64DFR1_EL1, EBEP, IMP)) - res0 |= MDCR_EL2_PMEE; - if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, DebugVer, V8P9)) - res0 |= MDCR_EL2_EBWE; - if (!kvm_has_feat(kvm, ID_AA64DFR2_EL1, STEP, IMP)) - res0 |= MDCR_EL2_EnSTEPOP; + get_reg_fixed_bits(kvm, MDCR_EL2, &res0, &res1); set_sysreg_masks(kvm, MDCR_EL2, res0, res1); /* CNTHCTL_EL2 */ @@ -1782,3 +1753,43 @@ void check_nested_vcpu_requests(struct kvm_vcpu *vcpu) if (kvm_check_request(KVM_REQ_GUEST_HYP_IRQ_PENDING, vcpu)) kvm_inject_nested_irq(vcpu); } + +/* + * One of the many architectural bugs in FEAT_NV2 is that the guest hypervisor + * can write to HCR_EL2 behind our back, potentially changing the exception + * routing / masking for even the host context. + * + * What follows is some slop to (1) react to exception routing / masking and (2) + * preserve the pending SError state across translation regimes. + */ +void kvm_nested_flush_hwstate(struct kvm_vcpu *vcpu) +{ + if (!vcpu_has_nv(vcpu)) + return; + + if (unlikely(vcpu_test_and_clear_flag(vcpu, NESTED_SERROR_PENDING))) + kvm_inject_serror_esr(vcpu, vcpu_get_vsesr(vcpu)); +} + +void kvm_nested_sync_hwstate(struct kvm_vcpu *vcpu) +{ + unsigned long *hcr = vcpu_hcr(vcpu); + + if (!vcpu_has_nv(vcpu)) + return; + + /* + * We previously decided that an SError was deliverable to the guest. + * Reap the pending state from HCR_EL2 and... + */ + if (unlikely(__test_and_clear_bit(__ffs(HCR_VSE), hcr))) + vcpu_set_flag(vcpu, NESTED_SERROR_PENDING); + + /* + * Re-attempt SError injection in case the deliverability has changed, + * which is necessary to faithfully emulate WFI the case of a pending + * SError being a wakeup condition. + */ + if (unlikely(vcpu_test_and_clear_flag(vcpu, NESTED_SERROR_PENDING))) + kvm_inject_serror_esr(vcpu, vcpu_get_vsesr(vcpu)); +} diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 76c2f0da821f..82ffb3b3b3cf 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -108,7 +108,6 @@ static bool get_el2_to_el1_mapping(unsigned int reg, PURE_EL2_SYSREG( HACR_EL2 ); PURE_EL2_SYSREG( VTTBR_EL2 ); PURE_EL2_SYSREG( VTCR_EL2 ); - PURE_EL2_SYSREG( RVBAR_EL2 ); PURE_EL2_SYSREG( TPIDR_EL2 ); PURE_EL2_SYSREG( HPFAR_EL2 ); PURE_EL2_SYSREG( HCRX_EL2 ); @@ -144,6 +143,7 @@ static bool get_el2_to_el1_mapping(unsigned int reg, MAPPED_EL2_SYSREG(SPSR_EL2, SPSR_EL1, NULL ); MAPPED_EL2_SYSREG(ZCR_EL2, ZCR_EL1, NULL ); MAPPED_EL2_SYSREG(CONTEXTIDR_EL2, CONTEXTIDR_EL1, NULL ); + MAPPED_EL2_SYSREG(SCTLR2_EL2, SCTLR2_EL1, NULL ); default: return false; } @@ -533,8 +533,7 @@ static bool access_gic_sre(struct kvm_vcpu *vcpu, return ignore_write(vcpu, p); if (p->Op1 == 4) { /* ICC_SRE_EL2 */ - p->regval = (ICC_SRE_EL2_ENABLE | ICC_SRE_EL2_SRE | - ICC_SRE_EL1_DIB | ICC_SRE_EL1_DFB); + p->regval = KVM_ICC_SRE_EL2; } else { /* ICC_SRE_EL1 */ p->regval = vcpu->arch.vgic_cpu.vgic_v3.vgic_sre; } @@ -773,6 +772,12 @@ static u64 reset_mpidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) return mpidr; } +static unsigned int hidden_visibility(const struct kvm_vcpu *vcpu, + const struct sys_reg_desc *r) +{ + return REG_HIDDEN; +} + static unsigned int pmu_visibility(const struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) { @@ -1612,13 +1617,14 @@ static u64 __kvm_read_sanitised_id_reg(const struct kvm_vcpu *vcpu, val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_GCS); val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_THE); val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MTEX); - val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_DF2); val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_PFAR); val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MPAM_frac); break; case SYS_ID_AA64PFR2_EL1: - /* We only expose FPMR */ - val &= ID_AA64PFR2_EL1_FPMR; + val &= ID_AA64PFR2_EL1_FPMR | + (kvm_has_mte(vcpu->kvm) ? + ID_AA64PFR2_EL1_MTEFAR | ID_AA64PFR2_EL1_MTESTOREONLY : + 0); break; case SYS_ID_AA64ISAR1_EL1: if (!vcpu_has_ptrauth(vcpu)) @@ -1643,8 +1649,10 @@ static u64 __kvm_read_sanitised_id_reg(const struct kvm_vcpu *vcpu, val &= ~ID_AA64MMFR2_EL1_NV; break; case SYS_ID_AA64MMFR3_EL1: - val &= ID_AA64MMFR3_EL1_TCRX | ID_AA64MMFR3_EL1_S1POE | - ID_AA64MMFR3_EL1_S1PIE; + val &= ID_AA64MMFR3_EL1_TCRX | + ID_AA64MMFR3_EL1_SCTLRX | + ID_AA64MMFR3_EL1_S1POE | + ID_AA64MMFR3_EL1_S1PIE; break; case SYS_ID_MMFR4_EL1: val &= ~ARM64_FEATURE_MASK(ID_MMFR4_EL1_CCIDX); @@ -1811,7 +1819,7 @@ static u64 sanitise_id_aa64pfr0_el1(const struct kvm_vcpu *vcpu, u64 val) val |= SYS_FIELD_PREP_ENUM(ID_AA64PFR0_EL1, CSV3, IMP); } - if (kvm_vgic_global_state.type == VGIC_V3) { + if (vgic_is_v3(vcpu->kvm)) { val &= ~ID_AA64PFR0_EL1_GIC_MASK; val |= SYS_FIELD_PREP_ENUM(ID_AA64PFR0_EL1, GIC, IMP); } @@ -1953,6 +1961,14 @@ static int set_id_aa64pfr0_el1(struct kvm_vcpu *vcpu, (vcpu_has_nv(vcpu) && !FIELD_GET(ID_AA64PFR0_EL1_EL2, user_val))) return -EINVAL; + /* + * If we are running on a GICv5 host and support FEAT_GCIE_LEGACY, then + * we support GICv3. Fail attempts to do anything but set that to IMP. + */ + if (vgic_is_v3_compat(vcpu->kvm) && + FIELD_GET(ID_AA64PFR0_EL1_GIC_MASK, user_val) != ID_AA64PFR0_EL1_GIC_IMP) + return -EINVAL; + return set_id_reg(vcpu, rd, user_val); } @@ -2325,6 +2341,10 @@ static bool bad_redir_trap(struct kvm_vcpu *vcpu, EL2_REG_FILTERED(name, acc, rst, v, el2_visibility) #define EL2_REG_VNCR(name, rst, v) EL2_REG(name, bad_vncr_trap, rst, v) +#define EL2_REG_VNCR_FILT(name, vis) \ + EL2_REG_FILTERED(name, bad_vncr_trap, reset_val, 0, vis) +#define EL2_REG_VNCR_GICv3(name) \ + EL2_REG_VNCR_FILT(name, hidden_visibility) #define EL2_REG_REDIR(name, rst, v) EL2_REG(name, bad_redir_trap, rst, v) /* @@ -2483,6 +2503,21 @@ static unsigned int vncr_el2_visibility(const struct kvm_vcpu *vcpu, return REG_HIDDEN; } +static unsigned int sctlr2_visibility(const struct kvm_vcpu *vcpu, + const struct sys_reg_desc *rd) +{ + if (kvm_has_sctlr2(vcpu->kvm)) + return 0; + + return REG_HIDDEN; +} + +static unsigned int sctlr2_el2_visibility(const struct kvm_vcpu *vcpu, + const struct sys_reg_desc *rd) +{ + return __el2_visibility(vcpu, rd, sctlr2_visibility); +} + static bool access_zcr_el2(struct kvm_vcpu *vcpu, struct sys_reg_params *p, const struct sys_reg_desc *r) @@ -2513,11 +2548,7 @@ static bool access_gic_vtr(struct kvm_vcpu *vcpu, if (p->is_write) return write_to_read_only(vcpu, p, r); - p->regval = kvm_vgic_global_state.ich_vtr_el2; - p->regval &= ~(ICH_VTR_EL2_DVIM | - ICH_VTR_EL2_A3V | - ICH_VTR_EL2_IDbits); - p->regval |= ICH_VTR_EL2_nV4; + p->regval = kvm_get_guest_vtr_el2(); return true; } @@ -2588,6 +2619,26 @@ static unsigned int tcr2_el2_visibility(const struct kvm_vcpu *vcpu, return __el2_visibility(vcpu, rd, tcr2_visibility); } +static unsigned int fgt2_visibility(const struct kvm_vcpu *vcpu, + const struct sys_reg_desc *rd) +{ + if (el2_visibility(vcpu, rd) == 0 && + kvm_has_feat(vcpu->kvm, ID_AA64MMFR0_EL1, FGT, FGT2)) + return 0; + + return REG_HIDDEN; +} + +static unsigned int fgt_visibility(const struct kvm_vcpu *vcpu, + const struct sys_reg_desc *rd) +{ + if (el2_visibility(vcpu, rd) == 0 && + kvm_has_feat(vcpu->kvm, ID_AA64MMFR0_EL1, FGT, IMP)) + return 0; + + return REG_HIDDEN; +} + static unsigned int s1pie_visibility(const struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd) { @@ -2624,7 +2675,7 @@ static bool access_mdcr(struct kvm_vcpu *vcpu, */ if (hpmn > vcpu->kvm->arch.nr_pmu_counters) { hpmn = vcpu->kvm->arch.nr_pmu_counters; - u64_replace_bits(val, hpmn, MDCR_EL2_HPMN); + u64p_replace_bits(&val, hpmn, MDCR_EL2_HPMN); } __vcpu_assign_sys_reg(vcpu, MDCR_EL2, val); @@ -2639,6 +2690,23 @@ static bool access_mdcr(struct kvm_vcpu *vcpu, return true; } +static bool access_ras(struct kvm_vcpu *vcpu, + struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + struct kvm *kvm = vcpu->kvm; + + switch(reg_to_encoding(r)) { + default: + if (!kvm_has_feat(kvm, ID_AA64PFR0_EL1, RAS, IMP)) { + kvm_inject_undefined(vcpu); + return false; + } + } + + return trap_raz_wi(vcpu, p, r); +} + /* * For historical (ahem ABI) reasons, KVM treated MIDR_EL1, REVIDR_EL1, and * AIDR_EL1 as "invariant" registers, meaning userspace cannot change them. @@ -2866,7 +2934,6 @@ static const struct sys_reg_desc sys_reg_descs[] = { ID_AA64PFR0_EL1_FP)), ID_FILTERED(ID_AA64PFR1_EL1, id_aa64pfr1_el1, ~(ID_AA64PFR1_EL1_PFAR | - ID_AA64PFR1_EL1_DF2 | ID_AA64PFR1_EL1_MTEX | ID_AA64PFR1_EL1_THE | ID_AA64PFR1_EL1_GCS | @@ -2878,7 +2945,10 @@ static const struct sys_reg_desc sys_reg_descs[] = { ID_AA64PFR1_EL1_MPAM_frac | ID_AA64PFR1_EL1_RAS_frac | ID_AA64PFR1_EL1_MTE)), - ID_WRITABLE(ID_AA64PFR2_EL1, ID_AA64PFR2_EL1_FPMR), + ID_WRITABLE(ID_AA64PFR2_EL1, + ID_AA64PFR2_EL1_FPMR | + ID_AA64PFR2_EL1_MTEFAR | + ID_AA64PFR2_EL1_MTESTOREONLY), ID_UNALLOCATED(4,3), ID_WRITABLE(ID_AA64ZFR0_EL1, ~ID_AA64ZFR0_EL1_RES0), ID_HIDDEN(ID_AA64SMFR0_EL1), @@ -2945,6 +3015,7 @@ static const struct sys_reg_desc sys_reg_descs[] = { ID_AA64MMFR2_EL1_NV | ID_AA64MMFR2_EL1_CCIDX)), ID_WRITABLE(ID_AA64MMFR3_EL1, (ID_AA64MMFR3_EL1_TCRX | + ID_AA64MMFR3_EL1_SCTLRX | ID_AA64MMFR3_EL1_S1PIE | ID_AA64MMFR3_EL1_S1POE)), ID_WRITABLE(ID_AA64MMFR4_EL1, ID_AA64MMFR4_EL1_NV_frac), @@ -2955,6 +3026,8 @@ static const struct sys_reg_desc sys_reg_descs[] = { { SYS_DESC(SYS_SCTLR_EL1), access_vm_reg, reset_val, SCTLR_EL1, 0x00C50078 }, { SYS_DESC(SYS_ACTLR_EL1), access_actlr, reset_actlr, ACTLR_EL1 }, { SYS_DESC(SYS_CPACR_EL1), NULL, reset_val, CPACR_EL1, 0 }, + { SYS_DESC(SYS_SCTLR2_EL1), access_vm_reg, reset_val, SCTLR2_EL1, 0, + .visibility = sctlr2_visibility }, MTE_REG(RGSR_EL1), MTE_REG(GCR_EL1), @@ -2984,14 +3057,14 @@ static const struct sys_reg_desc sys_reg_descs[] = { { SYS_DESC(SYS_AFSR1_EL1), access_vm_reg, reset_unknown, AFSR1_EL1 }, { SYS_DESC(SYS_ESR_EL1), access_vm_reg, reset_unknown, ESR_EL1 }, - { SYS_DESC(SYS_ERRIDR_EL1), trap_raz_wi }, - { SYS_DESC(SYS_ERRSELR_EL1), trap_raz_wi }, - { SYS_DESC(SYS_ERXFR_EL1), trap_raz_wi }, - { SYS_DESC(SYS_ERXCTLR_EL1), trap_raz_wi }, - { SYS_DESC(SYS_ERXSTATUS_EL1), trap_raz_wi }, - { SYS_DESC(SYS_ERXADDR_EL1), trap_raz_wi }, - { SYS_DESC(SYS_ERXMISC0_EL1), trap_raz_wi }, - { SYS_DESC(SYS_ERXMISC1_EL1), trap_raz_wi }, + { SYS_DESC(SYS_ERRIDR_EL1), access_ras }, + { SYS_DESC(SYS_ERRSELR_EL1), access_ras }, + { SYS_DESC(SYS_ERXFR_EL1), access_ras }, + { SYS_DESC(SYS_ERXCTLR_EL1), access_ras }, + { SYS_DESC(SYS_ERXSTATUS_EL1), access_ras }, + { SYS_DESC(SYS_ERXADDR_EL1), access_ras }, + { SYS_DESC(SYS_ERXMISC0_EL1), access_ras }, + { SYS_DESC(SYS_ERXMISC1_EL1), access_ras }, MTE_REG(TFSR_EL1), MTE_REG(TFSRE0_EL1), @@ -3302,12 +3375,14 @@ static const struct sys_reg_desc sys_reg_descs[] = { EL2_REG_VNCR(VMPIDR_EL2, reset_unknown, 0), EL2_REG(SCTLR_EL2, access_rw, reset_val, SCTLR_EL2_RES1), EL2_REG(ACTLR_EL2, access_rw, reset_val, 0), + EL2_REG_FILTERED(SCTLR2_EL2, access_vm_reg, reset_val, 0, + sctlr2_el2_visibility), EL2_REG_VNCR(HCR_EL2, reset_hcr, 0), EL2_REG(MDCR_EL2, access_mdcr, reset_mdcr, 0), EL2_REG(CPTR_EL2, access_rw, reset_val, CPTR_NVHE_EL2_RES1), EL2_REG_VNCR(HSTR_EL2, reset_val, 0), - EL2_REG_VNCR(HFGRTR_EL2, reset_val, 0), - EL2_REG_VNCR(HFGWTR_EL2, reset_val, 0), + EL2_REG_VNCR_FILT(HFGRTR_EL2, fgt_visibility), + EL2_REG_VNCR_FILT(HFGWTR_EL2, fgt_visibility), EL2_REG_VNCR(HFGITR_EL2, reset_val, 0), EL2_REG_VNCR(HACR_EL2, reset_val, 0), @@ -3327,9 +3402,14 @@ static const struct sys_reg_desc sys_reg_descs[] = { vncr_el2_visibility), { SYS_DESC(SYS_DACR32_EL2), undef_access, reset_unknown, DACR32_EL2 }, - EL2_REG_VNCR(HDFGRTR_EL2, reset_val, 0), - EL2_REG_VNCR(HDFGWTR_EL2, reset_val, 0), - EL2_REG_VNCR(HAFGRTR_EL2, reset_val, 0), + EL2_REG_VNCR_FILT(HDFGRTR2_EL2, fgt2_visibility), + EL2_REG_VNCR_FILT(HDFGWTR2_EL2, fgt2_visibility), + EL2_REG_VNCR_FILT(HFGRTR2_EL2, fgt2_visibility), + EL2_REG_VNCR_FILT(HFGWTR2_EL2, fgt2_visibility), + EL2_REG_VNCR_FILT(HDFGRTR_EL2, fgt_visibility), + EL2_REG_VNCR_FILT(HDFGWTR_EL2, fgt_visibility), + EL2_REG_VNCR_FILT(HAFGRTR_EL2, fgt_visibility), + EL2_REG_VNCR_FILT(HFGITR2_EL2, fgt2_visibility), EL2_REG_REDIR(SPSR_EL2, reset_val, 0), EL2_REG_REDIR(ELR_EL2, reset_val, 0), { SYS_DESC(SYS_SP_EL1), access_sp_el1}, @@ -3344,6 +3424,7 @@ static const struct sys_reg_desc sys_reg_descs[] = { EL2_REG(AFSR0_EL2, access_rw, reset_val, 0), EL2_REG(AFSR1_EL2, access_rw, reset_val, 0), EL2_REG_REDIR(ESR_EL2, reset_val, 0), + EL2_REG_VNCR(VSESR_EL2, reset_unknown, 0), { SYS_DESC(SYS_FPEXC32_EL2), undef_access, reset_val, FPEXC32_EL2, 0x700 }, EL2_REG_REDIR(FAR_EL2, reset_val, 0), @@ -3370,43 +3451,44 @@ static const struct sys_reg_desc sys_reg_descs[] = { { SYS_DESC(SYS_MPAMVPM7_EL2), undef_access }, EL2_REG(VBAR_EL2, access_rw, reset_val, 0), - EL2_REG(RVBAR_EL2, access_rw, reset_val, 0), + { SYS_DESC(SYS_RVBAR_EL2), undef_access }, { SYS_DESC(SYS_RMR_EL2), undef_access }, + EL2_REG_VNCR(VDISR_EL2, reset_unknown, 0), - EL2_REG_VNCR(ICH_AP0R0_EL2, reset_val, 0), - EL2_REG_VNCR(ICH_AP0R1_EL2, reset_val, 0), - EL2_REG_VNCR(ICH_AP0R2_EL2, reset_val, 0), - EL2_REG_VNCR(ICH_AP0R3_EL2, reset_val, 0), - EL2_REG_VNCR(ICH_AP1R0_EL2, reset_val, 0), - EL2_REG_VNCR(ICH_AP1R1_EL2, reset_val, 0), - EL2_REG_VNCR(ICH_AP1R2_EL2, reset_val, 0), - EL2_REG_VNCR(ICH_AP1R3_EL2, reset_val, 0), + EL2_REG_VNCR_GICv3(ICH_AP0R0_EL2), + EL2_REG_VNCR_GICv3(ICH_AP0R1_EL2), + EL2_REG_VNCR_GICv3(ICH_AP0R2_EL2), + EL2_REG_VNCR_GICv3(ICH_AP0R3_EL2), + EL2_REG_VNCR_GICv3(ICH_AP1R0_EL2), + EL2_REG_VNCR_GICv3(ICH_AP1R1_EL2), + EL2_REG_VNCR_GICv3(ICH_AP1R2_EL2), + EL2_REG_VNCR_GICv3(ICH_AP1R3_EL2), { SYS_DESC(SYS_ICC_SRE_EL2), access_gic_sre }, - EL2_REG_VNCR(ICH_HCR_EL2, reset_val, 0), + EL2_REG_VNCR_GICv3(ICH_HCR_EL2), { SYS_DESC(SYS_ICH_VTR_EL2), access_gic_vtr }, { SYS_DESC(SYS_ICH_MISR_EL2), access_gic_misr }, { SYS_DESC(SYS_ICH_EISR_EL2), access_gic_eisr }, { SYS_DESC(SYS_ICH_ELRSR_EL2), access_gic_elrsr }, - EL2_REG_VNCR(ICH_VMCR_EL2, reset_val, 0), - - EL2_REG_VNCR(ICH_LR0_EL2, reset_val, 0), - EL2_REG_VNCR(ICH_LR1_EL2, reset_val, 0), - EL2_REG_VNCR(ICH_LR2_EL2, reset_val, 0), - EL2_REG_VNCR(ICH_LR3_EL2, reset_val, 0), - EL2_REG_VNCR(ICH_LR4_EL2, reset_val, 0), - EL2_REG_VNCR(ICH_LR5_EL2, reset_val, 0), - EL2_REG_VNCR(ICH_LR6_EL2, reset_val, 0), - EL2_REG_VNCR(ICH_LR7_EL2, reset_val, 0), - EL2_REG_VNCR(ICH_LR8_EL2, reset_val, 0), - EL2_REG_VNCR(ICH_LR9_EL2, reset_val, 0), - EL2_REG_VNCR(ICH_LR10_EL2, reset_val, 0), - EL2_REG_VNCR(ICH_LR11_EL2, reset_val, 0), - EL2_REG_VNCR(ICH_LR12_EL2, reset_val, 0), - EL2_REG_VNCR(ICH_LR13_EL2, reset_val, 0), - EL2_REG_VNCR(ICH_LR14_EL2, reset_val, 0), - EL2_REG_VNCR(ICH_LR15_EL2, reset_val, 0), + EL2_REG_VNCR_GICv3(ICH_VMCR_EL2), + + EL2_REG_VNCR_GICv3(ICH_LR0_EL2), + EL2_REG_VNCR_GICv3(ICH_LR1_EL2), + EL2_REG_VNCR_GICv3(ICH_LR2_EL2), + EL2_REG_VNCR_GICv3(ICH_LR3_EL2), + EL2_REG_VNCR_GICv3(ICH_LR4_EL2), + EL2_REG_VNCR_GICv3(ICH_LR5_EL2), + EL2_REG_VNCR_GICv3(ICH_LR6_EL2), + EL2_REG_VNCR_GICv3(ICH_LR7_EL2), + EL2_REG_VNCR_GICv3(ICH_LR8_EL2), + EL2_REG_VNCR_GICv3(ICH_LR9_EL2), + EL2_REG_VNCR_GICv3(ICH_LR10_EL2), + EL2_REG_VNCR_GICv3(ICH_LR11_EL2), + EL2_REG_VNCR_GICv3(ICH_LR12_EL2), + EL2_REG_VNCR_GICv3(ICH_LR13_EL2), + EL2_REG_VNCR_GICv3(ICH_LR14_EL2), + EL2_REG_VNCR_GICv3(ICH_LR15_EL2), EL2_REG(CONTEXTIDR_EL2, access_rw, reset_val, 0), EL2_REG(TPIDR_EL2, access_rw, reset_val, 0), @@ -4275,12 +4357,12 @@ static const struct sys_reg_desc cp15_64_regs[] = { }; static bool check_sysreg_table(const struct sys_reg_desc *table, unsigned int n, - bool is_32) + bool reset_check) { unsigned int i; for (i = 0; i < n; i++) { - if (!is_32 && table[i].reg && !table[i].reset) { + if (reset_check && table[i].reg && !table[i].reset) { kvm_err("sys_reg table %pS entry %d (%s) lacks reset\n", &table[i], i, table[i].name); return false; @@ -4475,7 +4557,7 @@ static bool kvm_esr_cp10_id_to_sys64(u64 esr, struct sys_reg_params *params) return true; kvm_pr_unimpl("Unhandled cp10 register %s: %u\n", - params->is_write ? "write" : "read", reg_id); + str_write_read(params->is_write), reg_id); return false; } @@ -5269,18 +5351,22 @@ int kvm_finalize_sys_regs(struct kvm_vcpu *vcpu) int __init kvm_sys_reg_table_init(void) { + const struct sys_reg_desc *gicv3_regs; bool valid = true; - unsigned int i; + unsigned int i, sz; int ret = 0; /* Make sure tables are unique and in order. */ - valid &= check_sysreg_table(sys_reg_descs, ARRAY_SIZE(sys_reg_descs), false); - valid &= check_sysreg_table(cp14_regs, ARRAY_SIZE(cp14_regs), true); - valid &= check_sysreg_table(cp14_64_regs, ARRAY_SIZE(cp14_64_regs), true); - valid &= check_sysreg_table(cp15_regs, ARRAY_SIZE(cp15_regs), true); - valid &= check_sysreg_table(cp15_64_regs, ARRAY_SIZE(cp15_64_regs), true); + valid &= check_sysreg_table(sys_reg_descs, ARRAY_SIZE(sys_reg_descs), true); + valid &= check_sysreg_table(cp14_regs, ARRAY_SIZE(cp14_regs), false); + valid &= check_sysreg_table(cp14_64_regs, ARRAY_SIZE(cp14_64_regs), false); + valid &= check_sysreg_table(cp15_regs, ARRAY_SIZE(cp15_regs), false); + valid &= check_sysreg_table(cp15_64_regs, ARRAY_SIZE(cp15_64_regs), false); valid &= check_sysreg_table(sys_insn_descs, ARRAY_SIZE(sys_insn_descs), false); + gicv3_regs = vgic_v3_get_sysreg_table(&sz); + valid &= check_sysreg_table(gicv3_regs, sz, false); + if (!valid) return -EINVAL; diff --git a/arch/arm64/kvm/sys_regs.h b/arch/arm64/kvm/sys_regs.h index ef97d9fc67cc..317abc490368 100644 --- a/arch/arm64/kvm/sys_regs.h +++ b/arch/arm64/kvm/sys_regs.h @@ -108,7 +108,7 @@ inline void print_sys_reg_msg(const struct sys_reg_params *p, /* Look, we even formatted it for you to paste into the table! */ kvm_pr_unimpl("%pV { Op0(%2u), Op1(%2u), CRn(%2u), CRm(%2u), Op2(%2u), func_%s },\n", &(struct va_format){ fmt, &va }, - p->Op0, p->Op1, p->CRn, p->CRm, p->Op2, p->is_write ? "write" : "read"); + p->Op0, p->Op1, p->CRn, p->CRm, p->Op2, str_write_read(p->is_write)); va_end(va); } diff --git a/arch/arm64/kvm/trace_handle_exit.h b/arch/arm64/kvm/trace_handle_exit.h index f85415db7713..a7ab9a3bbed0 100644 --- a/arch/arm64/kvm/trace_handle_exit.h +++ b/arch/arm64/kvm/trace_handle_exit.h @@ -113,7 +113,7 @@ TRACE_EVENT(kvm_sys_access, __entry->vcpu_pc, __entry->name ?: "UNKN", __entry->Op0, __entry->Op1, __entry->CRn, __entry->CRm, __entry->Op2, - __entry->is_write ? "write" : "read") + str_write_read(__entry->is_write)) ); TRACE_EVENT(kvm_set_guest_debug, diff --git a/arch/arm64/kvm/vgic-sys-reg-v3.c b/arch/arm64/kvm/vgic-sys-reg-v3.c index 5eacb4b3250a..bdc2d57370b2 100644 --- a/arch/arm64/kvm/vgic-sys-reg-v3.c +++ b/arch/arm64/kvm/vgic-sys-reg-v3.c @@ -297,6 +297,91 @@ static int get_gic_sre(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r, return 0; } +static int set_gic_ich_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r, + u64 val) +{ + __vcpu_assign_sys_reg(vcpu, r->reg, val); + return 0; +} + +static int get_gic_ich_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r, + u64 *val) +{ + *val = __vcpu_sys_reg(vcpu, r->reg); + return 0; +} + +static int set_gic_ich_apr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r, + u64 val) +{ + u8 idx = r->Op2 & 3; + + if (idx > vgic_v3_max_apr_idx(vcpu)) + return -EINVAL; + + return set_gic_ich_reg(vcpu, r, val); +} + +static int get_gic_ich_apr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r, + u64 *val) +{ + u8 idx = r->Op2 & 3; + + if (idx > vgic_v3_max_apr_idx(vcpu)) + return -EINVAL; + + return get_gic_ich_reg(vcpu, r, val); +} + +static int set_gic_icc_sre(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r, + u64 val) +{ + if (val != KVM_ICC_SRE_EL2) + return -EINVAL; + return 0; +} + +static int get_gic_icc_sre(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r, + u64 *val) +{ + *val = KVM_ICC_SRE_EL2; + return 0; +} + +static int set_gic_ich_vtr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r, + u64 val) +{ + if (val != kvm_get_guest_vtr_el2()) + return -EINVAL; + return 0; +} + +static int get_gic_ich_vtr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r, + u64 *val) +{ + *val = kvm_get_guest_vtr_el2(); + return 0; +} + +static unsigned int el2_visibility(const struct kvm_vcpu *vcpu, + const struct sys_reg_desc *rd) +{ + return vcpu_has_nv(vcpu) ? 0 : REG_HIDDEN; +} + +#define __EL2_REG(r, acc, i) \ + { \ + SYS_DESC(SYS_ ## r), \ + .get_user = get_gic_ ## acc, \ + .set_user = set_gic_ ## acc, \ + .reg = i, \ + .visibility = el2_visibility, \ + } + +#define EL2_REG(r, acc) __EL2_REG(r, acc, r) + +#define EL2_REG_RO(r, acc) __EL2_REG(r, acc, 0) + static const struct sys_reg_desc gic_v3_icc_reg_descs[] = { { SYS_DESC(SYS_ICC_PMR_EL1), .set_user = set_gic_pmr, .get_user = get_gic_pmr, }, @@ -328,8 +413,42 @@ static const struct sys_reg_desc gic_v3_icc_reg_descs[] = { .set_user = set_gic_grpen0, .get_user = get_gic_grpen0, }, { SYS_DESC(SYS_ICC_IGRPEN1_EL1), .set_user = set_gic_grpen1, .get_user = get_gic_grpen1, }, + EL2_REG(ICH_AP0R0_EL2, ich_apr), + EL2_REG(ICH_AP0R1_EL2, ich_apr), + EL2_REG(ICH_AP0R2_EL2, ich_apr), + EL2_REG(ICH_AP0R3_EL2, ich_apr), + EL2_REG(ICH_AP1R0_EL2, ich_apr), + EL2_REG(ICH_AP1R1_EL2, ich_apr), + EL2_REG(ICH_AP1R2_EL2, ich_apr), + EL2_REG(ICH_AP1R3_EL2, ich_apr), + EL2_REG_RO(ICC_SRE_EL2, icc_sre), + EL2_REG(ICH_HCR_EL2, ich_reg), + EL2_REG_RO(ICH_VTR_EL2, ich_vtr), + EL2_REG(ICH_VMCR_EL2, ich_reg), + EL2_REG(ICH_LR0_EL2, ich_reg), + EL2_REG(ICH_LR1_EL2, ich_reg), + EL2_REG(ICH_LR2_EL2, ich_reg), + EL2_REG(ICH_LR3_EL2, ich_reg), + EL2_REG(ICH_LR4_EL2, ich_reg), + EL2_REG(ICH_LR5_EL2, ich_reg), + EL2_REG(ICH_LR6_EL2, ich_reg), + EL2_REG(ICH_LR7_EL2, ich_reg), + EL2_REG(ICH_LR8_EL2, ich_reg), + EL2_REG(ICH_LR9_EL2, ich_reg), + EL2_REG(ICH_LR10_EL2, ich_reg), + EL2_REG(ICH_LR11_EL2, ich_reg), + EL2_REG(ICH_LR12_EL2, ich_reg), + EL2_REG(ICH_LR13_EL2, ich_reg), + EL2_REG(ICH_LR14_EL2, ich_reg), + EL2_REG(ICH_LR15_EL2, ich_reg), }; +const struct sys_reg_desc *vgic_v3_get_sysreg_table(unsigned int *sz) +{ + *sz = ARRAY_SIZE(gic_v3_icc_reg_descs); + return gic_v3_icc_reg_descs; +} + static u64 attr_to_id(u64 attr) { return ARM64_SYS_REG(FIELD_GET(KVM_REG_ARM_VGIC_SYSREG_OP0_MASK, attr), @@ -341,8 +460,12 @@ static u64 attr_to_id(u64 attr) int vgic_v3_has_cpu_sysregs_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) { - if (get_reg_by_id(attr_to_id(attr->attr), gic_v3_icc_reg_descs, - ARRAY_SIZE(gic_v3_icc_reg_descs))) + const struct sys_reg_desc *r; + + r = get_reg_by_id(attr_to_id(attr->attr), gic_v3_icc_reg_descs, + ARRAY_SIZE(gic_v3_icc_reg_descs)); + + if (r && !sysreg_hidden(vcpu, r)) return 0; return -ENXIO; diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c index eb1205654ac8..1e680ad6e863 100644 --- a/arch/arm64/kvm/vgic/vgic-init.c +++ b/arch/arm64/kvm/vgic/vgic-init.c @@ -157,6 +157,7 @@ int kvm_vgic_create(struct kvm *kvm, u32 type) kvm->arch.vgic.in_kernel = true; kvm->arch.vgic.vgic_model = type; + kvm->arch.vgic.implementation_rev = KVM_VGIC_IMP_REV_LATEST; kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF; @@ -165,6 +166,9 @@ int kvm_vgic_create(struct kvm *kvm, u32 type) else INIT_LIST_HEAD(&kvm->arch.vgic.rd_regions); + if (type == KVM_DEV_TYPE_ARM_VGIC_V3) + kvm->arch.vgic.nassgicap = system_supports_direct_sgis(); + out_unlock: mutex_unlock(&kvm->arch.config_lock); kvm_unlock_all_vcpus(kvm); @@ -391,11 +395,10 @@ int vgic_init(struct kvm *kvm) goto out; /* - * If we have GICv4.1 enabled, unconditionally request enable the - * v4 support so that we get HW-accelerated vSGIs. Otherwise, only - * enable it if we present a virtual ITS to the guest. + * Ensure vPEs are allocated if direct IRQ injection (e.g. vSGIs, + * vLPIs) is supported. */ - if (vgic_supports_direct_msis(kvm)) { + if (vgic_supports_direct_irqs(kvm)) { ret = vgic_v4_init(kvm); if (ret) goto out; @@ -409,15 +412,7 @@ int vgic_init(struct kvm *kvm) goto out; vgic_debug_init(kvm); - - /* - * If userspace didn't set the GIC implementation revision, - * default to the latest and greatest. You know want it. - */ - if (!dist->implementation_rev) - dist->implementation_rev = KVM_VGIC_IMP_REV_LATEST; dist->initialized = true; - out: return ret; } @@ -443,7 +438,7 @@ static void kvm_vgic_dist_destroy(struct kvm *kvm) dist->vgic_cpu_base = VGIC_ADDR_UNDEF; } - if (vgic_supports_direct_msis(kvm)) + if (vgic_supports_direct_irqs(kvm)) vgic_v4_teardown(kvm); xa_destroy(&dist->lpi_xa); @@ -674,10 +669,12 @@ void kvm_vgic_init_cpu_hardware(void) * We want to make sure the list registers start out clear so that we * only have the program the used registers. */ - if (kvm_vgic_global_state.type == VGIC_V2) + if (kvm_vgic_global_state.type == VGIC_V2) { vgic_v2_init_lrs(); - else + } else if (kvm_vgic_global_state.type == VGIC_V3 || + kvm_vgic_global_state.has_gcie_v3_compat) { kvm_call_hyp(__vgic_v3_init_lrs); + } } /** @@ -722,6 +719,9 @@ int kvm_vgic_hyp_init(void) kvm_info("GIC system register CPU interface enabled\n"); } break; + case GIC_V5: + ret = vgic_v5_probe(gic_kvm_info); + break; default: ret = -ENODEV; } diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c index 534049c7c94b..7368c13f16b7 100644 --- a/arch/arm64/kvm/vgic/vgic-its.c +++ b/arch/arm64/kvm/vgic/vgic-its.c @@ -758,7 +758,7 @@ static void its_free_ite(struct kvm *kvm, struct its_ite *ite) if (irq) { scoped_guard(raw_spinlock_irqsave, &irq->irq_lock) { if (irq->hw) - WARN_ON(its_unmap_vlpi(ite->irq->host_irq)); + its_unmap_vlpi(ite->irq->host_irq); irq->hw = false; } @@ -2694,6 +2694,9 @@ static int vgic_its_ctrl(struct kvm *kvm, struct vgic_its *its, u64 attr) case KVM_DEV_ARM_ITS_RESTORE_TABLES: ret = abi->restore_tables(its); break; + default: + ret = -ENXIO; + break; } mutex_unlock(&its->its_lock); diff --git a/arch/arm64/kvm/vgic/vgic-kvm-device.c b/arch/arm64/kvm/vgic/vgic-kvm-device.c index f9ae790163fb..3d1a776b716d 100644 --- a/arch/arm64/kvm/vgic/vgic-kvm-device.c +++ b/arch/arm64/kvm/vgic/vgic-kvm-device.c @@ -5,6 +5,7 @@ * Copyright (C) 2015 ARM Ltd. * Author: Marc Zyngier <marc.zyngier@arm.com> */ +#include <linux/irqchip/arm-gic-v3.h> #include <linux/kvm_host.h> #include <kvm/arm_vgic.h> #include <linux/uaccess.h> @@ -303,12 +304,6 @@ static int vgic_get_common_attr(struct kvm_device *dev, VGIC_NR_PRIVATE_IRQS, uaddr); break; } - case KVM_DEV_ARM_VGIC_GRP_MAINT_IRQ: { - u32 __user *uaddr = (u32 __user *)(long)attr->addr; - - r = put_user(dev->kvm->arch.vgic.mi_intid, uaddr); - break; - } } return r; @@ -510,6 +505,24 @@ int vgic_v3_parse_attr(struct kvm_device *dev, struct kvm_device_attr *attr, } /* + * Allow access to certain ID-like registers prior to VGIC initialization, + * thereby allowing the VMM to provision the features / sizing of the VGIC. + */ +static bool reg_allowed_pre_init(struct kvm_device_attr *attr) +{ + if (attr->group != KVM_DEV_ARM_VGIC_GRP_DIST_REGS) + return false; + + switch (attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK) { + case GICD_IIDR: + case GICD_TYPER2: + return true; + default: + return false; + } +} + +/* * vgic_v3_attr_regs_access - allows user space to access VGIC v3 state * * @dev: kvm device handle @@ -523,7 +536,7 @@ static int vgic_v3_attr_regs_access(struct kvm_device *dev, struct vgic_reg_attr reg_attr; gpa_t addr; struct kvm_vcpu *vcpu; - bool uaccess, post_init = true; + bool uaccess; u32 val; int ret; @@ -539,9 +552,6 @@ static int vgic_v3_attr_regs_access(struct kvm_device *dev, /* Sysregs uaccess is performed by the sysreg handling code */ uaccess = false; break; - case KVM_DEV_ARM_VGIC_GRP_MAINT_IRQ: - post_init = false; - fallthrough; default: uaccess = true; } @@ -561,7 +571,7 @@ static int vgic_v3_attr_regs_access(struct kvm_device *dev, mutex_lock(&dev->kvm->arch.config_lock); - if (post_init != vgic_initialized(dev->kvm)) { + if (!(vgic_initialized(dev->kvm) || reg_allowed_pre_init(attr))) { ret = -EBUSY; goto out; } @@ -591,19 +601,6 @@ static int vgic_v3_attr_regs_access(struct kvm_device *dev, } break; } - case KVM_DEV_ARM_VGIC_GRP_MAINT_IRQ: - if (!is_write) { - val = dev->kvm->arch.vgic.mi_intid; - ret = 0; - break; - } - - ret = -EINVAL; - if ((val < VGIC_NR_PRIVATE_IRQS) && (val >= VGIC_NR_SGIS)) { - dev->kvm->arch.vgic.mi_intid = val; - ret = 0; - } - break; default: ret = -EINVAL; break; @@ -630,8 +627,24 @@ static int vgic_v3_set_attr(struct kvm_device *dev, case KVM_DEV_ARM_VGIC_GRP_REDIST_REGS: case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS: case KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO: - case KVM_DEV_ARM_VGIC_GRP_MAINT_IRQ: return vgic_v3_attr_regs_access(dev, attr, true); + case KVM_DEV_ARM_VGIC_GRP_MAINT_IRQ: { + u32 __user *uaddr = (u32 __user *)attr->addr; + u32 val; + + if (get_user(val, uaddr)) + return -EFAULT; + + guard(mutex)(&dev->kvm->arch.config_lock); + if (vgic_initialized(dev->kvm)) + return -EBUSY; + + if (!irq_is_ppi(val)) + return -EINVAL; + + dev->kvm->arch.vgic.mi_intid = val; + return 0; + } default: return vgic_set_common_attr(dev, attr); } @@ -645,8 +658,13 @@ static int vgic_v3_get_attr(struct kvm_device *dev, case KVM_DEV_ARM_VGIC_GRP_REDIST_REGS: case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS: case KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO: - case KVM_DEV_ARM_VGIC_GRP_MAINT_IRQ: return vgic_v3_attr_regs_access(dev, attr, false); + case KVM_DEV_ARM_VGIC_GRP_MAINT_IRQ: { + u32 __user *uaddr = (u32 __user *)(long)attr->addr; + + guard(mutex)(&dev->kvm->arch.config_lock); + return put_user(dev->kvm->arch.vgic.mi_intid, uaddr); + } default: return vgic_get_common_attr(dev, attr); } diff --git a/arch/arm64/kvm/vgic/vgic-mmio-v3.c b/arch/arm64/kvm/vgic/vgic-mmio-v3.c index ae4c0593d114..a3ef185209e9 100644 --- a/arch/arm64/kvm/vgic/vgic-mmio-v3.c +++ b/arch/arm64/kvm/vgic/vgic-mmio-v3.c @@ -50,8 +50,17 @@ bool vgic_has_its(struct kvm *kvm) bool vgic_supports_direct_msis(struct kvm *kvm) { - return (kvm_vgic_global_state.has_gicv4_1 || - (kvm_vgic_global_state.has_gicv4 && vgic_has_its(kvm))); + return kvm_vgic_global_state.has_gicv4 && vgic_has_its(kvm); +} + +bool system_supports_direct_sgis(void) +{ + return kvm_vgic_global_state.has_gicv4_1 && gic_cpuif_has_vsgi(); +} + +bool vgic_supports_direct_sgis(struct kvm *kvm) +{ + return kvm->arch.vgic.nassgicap; } /* @@ -86,7 +95,7 @@ static unsigned long vgic_mmio_read_v3_misc(struct kvm_vcpu *vcpu, } break; case GICD_TYPER2: - if (kvm_vgic_global_state.has_gicv4_1 && gic_cpuif_has_vsgi()) + if (vgic_supports_direct_sgis(vcpu->kvm)) value = GICD_TYPER2_nASSGIcap; break; case GICD_IIDR: @@ -119,7 +128,7 @@ static void vgic_mmio_write_v3_misc(struct kvm_vcpu *vcpu, dist->enabled = val & GICD_CTLR_ENABLE_SS_G1; /* Not a GICv4.1? No HW SGIs */ - if (!kvm_vgic_global_state.has_gicv4_1 || !gic_cpuif_has_vsgi()) + if (!vgic_supports_direct_sgis(vcpu->kvm)) val &= ~GICD_CTLR_nASSGIreq; /* Dist stays enabled? nASSGIreq is RO */ @@ -133,7 +142,7 @@ static void vgic_mmio_write_v3_misc(struct kvm_vcpu *vcpu, if (is_hwsgi != dist->nassgireq) vgic_v4_configure_vsgis(vcpu->kvm); - if (kvm_vgic_global_state.has_gicv4_1 && + if (vgic_supports_direct_sgis(vcpu->kvm) && was_enabled != dist->enabled) kvm_make_all_cpus_request(vcpu->kvm, KVM_REQ_RELOAD_GICv4); else if (!was_enabled && dist->enabled) @@ -159,8 +168,18 @@ static int vgic_mmio_uaccess_write_v3_misc(struct kvm_vcpu *vcpu, switch (addr & 0x0c) { case GICD_TYPER2: - if (val != vgic_mmio_read_v3_misc(vcpu, addr, len)) + reg = vgic_mmio_read_v3_misc(vcpu, addr, len); + + if (reg == val) + return 0; + if (vgic_initialized(vcpu->kvm)) + return -EBUSY; + if ((reg ^ val) & ~GICD_TYPER2_nASSGIcap) return -EINVAL; + if (!system_supports_direct_sgis() && val) + return -EINVAL; + + dist->nassgicap = val & GICD_TYPER2_nASSGIcap; return 0; case GICD_IIDR: reg = vgic_mmio_read_v3_misc(vcpu, addr, len); @@ -178,7 +197,7 @@ static int vgic_mmio_uaccess_write_v3_misc(struct kvm_vcpu *vcpu, } case GICD_CTLR: /* Not a GICv4.1? No HW SGIs */ - if (!kvm_vgic_global_state.has_gicv4_1) + if (!vgic_supports_direct_sgis(vcpu->kvm)) val &= ~GICD_CTLR_nASSGIreq; dist->enabled = val & GICD_CTLR_ENABLE_SS_G1; diff --git a/arch/arm64/kvm/vgic/vgic-v3-nested.c b/arch/arm64/kvm/vgic/vgic-v3-nested.c index a50fb7e6841f..7f1259b49c50 100644 --- a/arch/arm64/kvm/vgic/vgic-v3-nested.c +++ b/arch/arm64/kvm/vgic/vgic-v3-nested.c @@ -116,7 +116,7 @@ bool vgic_state_is_nested(struct kvm_vcpu *vcpu) { u64 xmo; - if (vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu)) { + if (is_nested_ctxt(vcpu)) { xmo = __vcpu_sys_reg(vcpu, HCR_EL2) & (HCR_IMO | HCR_FMO); WARN_ONCE(xmo && xmo != (HCR_IMO | HCR_FMO), "Separate virtual IRQ/FIQ settings not supported\n"); @@ -401,9 +401,7 @@ void vgic_v3_nested_update_mi(struct kvm_vcpu *vcpu) { bool level; - level = __vcpu_sys_reg(vcpu, ICH_HCR_EL2) & ICH_HCR_EL2_En; - if (level) - level &= vgic_v3_get_misr(vcpu); + level = (__vcpu_sys_reg(vcpu, ICH_HCR_EL2) & ICH_HCR_EL2_En) && vgic_v3_get_misr(vcpu); kvm_vgic_inject_irq(vcpu->kvm, vcpu, vcpu->kvm->arch.vgic.mi_intid, level, vcpu); } diff --git a/arch/arm64/kvm/vgic/vgic-v4.c b/arch/arm64/kvm/vgic/vgic-v4.c index 193946108192..4d9343d2b0b1 100644 --- a/arch/arm64/kvm/vgic/vgic-v4.c +++ b/arch/arm64/kvm/vgic/vgic-v4.c @@ -356,7 +356,7 @@ int vgic_v4_put(struct kvm_vcpu *vcpu) { struct its_vpe *vpe = &vcpu->arch.vgic_cpu.vgic_v3.its_vpe; - if (!vgic_supports_direct_msis(vcpu->kvm) || !vpe->resident) + if (!vgic_supports_direct_irqs(vcpu->kvm) || !vpe->resident) return 0; return its_make_vpe_non_resident(vpe, vgic_v4_want_doorbell(vcpu)); @@ -367,7 +367,7 @@ int vgic_v4_load(struct kvm_vcpu *vcpu) struct its_vpe *vpe = &vcpu->arch.vgic_cpu.vgic_v3.its_vpe; int err; - if (!vgic_supports_direct_msis(vcpu->kvm) || vpe->resident) + if (!vgic_supports_direct_irqs(vcpu->kvm) || vpe->resident) return 0; if (vcpu_get_flag(vcpu, IN_WFI)) @@ -527,28 +527,26 @@ static struct vgic_irq *__vgic_host_irq_get_vlpi(struct kvm *kvm, int host_irq) return NULL; } -int kvm_vgic_v4_unset_forwarding(struct kvm *kvm, int host_irq) +void kvm_vgic_v4_unset_forwarding(struct kvm *kvm, int host_irq) { struct vgic_irq *irq; unsigned long flags; - int ret = 0; if (!vgic_supports_direct_msis(kvm)) - return 0; + return; irq = __vgic_host_irq_get_vlpi(kvm, host_irq); if (!irq) - return 0; + return; raw_spin_lock_irqsave(&irq->irq_lock, flags); WARN_ON(irq->hw && irq->host_irq != host_irq); if (irq->hw) { atomic_dec(&irq->target_vcpu->arch.vgic_cpu.vgic_v3.its_vpe.vlpi_count); irq->hw = false; - ret = its_unmap_vlpi(host_irq); + its_unmap_vlpi(host_irq); } raw_spin_unlock_irqrestore(&irq->irq_lock, flags); vgic_put_irq(kvm, irq); - return ret; } diff --git a/arch/arm64/kvm/vgic/vgic-v5.c b/arch/arm64/kvm/vgic/vgic-v5.c new file mode 100644 index 000000000000..6bdbb221bcde --- /dev/null +++ b/arch/arm64/kvm/vgic/vgic-v5.c @@ -0,0 +1,52 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include <kvm/arm_vgic.h> +#include <linux/irqchip/arm-vgic-info.h> + +#include "vgic.h" + +/* + * Probe for a vGICv5 compatible interrupt controller, returning 0 on success. + * Currently only supports GICv3-based VMs on a GICv5 host, and hence only + * registers a VGIC_V3 device. + */ +int vgic_v5_probe(const struct gic_kvm_info *info) +{ + u64 ich_vtr_el2; + int ret; + + if (!info->has_gcie_v3_compat) + return -ENODEV; + + kvm_vgic_global_state.type = VGIC_V5; + kvm_vgic_global_state.has_gcie_v3_compat = true; + + /* We only support v3 compat mode - use vGICv3 limits */ + kvm_vgic_global_state.max_gic_vcpus = VGIC_V3_MAX_CPUS; + + kvm_vgic_global_state.vcpu_base = 0; + kvm_vgic_global_state.vctrl_base = NULL; + kvm_vgic_global_state.can_emulate_gicv2 = false; + kvm_vgic_global_state.has_gicv4 = false; + kvm_vgic_global_state.has_gicv4_1 = false; + + ich_vtr_el2 = kvm_call_hyp_ret(__vgic_v3_get_gic_config); + kvm_vgic_global_state.ich_vtr_el2 = (u32)ich_vtr_el2; + + /* + * The ListRegs field is 5 bits, but there is an architectural + * maximum of 16 list registers. Just ignore bit 4... + */ + kvm_vgic_global_state.nr_lr = (ich_vtr_el2 & 0xf) + 1; + + ret = kvm_register_vgic_device(KVM_DEV_TYPE_ARM_VGIC_V3); + if (ret) { + kvm_err("Cannot register GICv3-legacy KVM device.\n"); + return ret; + } + + static_branch_enable(&kvm_vgic_global_state.gicv3_cpuif); + kvm_info("GCIE legacy system register CPU interface\n"); + + return 0; +} diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c index 8f8096d48925..f5148b38120a 100644 --- a/arch/arm64/kvm/vgic/vgic.c +++ b/arch/arm64/kvm/vgic/vgic.c @@ -951,7 +951,7 @@ void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) * can be directly injected (GICv4). */ if (list_empty(&vcpu->arch.vgic_cpu.ap_list_head) && - !vgic_supports_direct_msis(vcpu->kvm)) + !vgic_supports_direct_irqs(vcpu->kvm)) return; DEBUG_SPINLOCK_BUG_ON(!irqs_disabled()); @@ -965,7 +965,7 @@ void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) if (can_access_vgic_from_kernel()) vgic_restore_state(vcpu); - if (vgic_supports_direct_msis(vcpu->kvm)) + if (vgic_supports_direct_irqs(vcpu->kvm)) vgic_v4_commit(vcpu); } diff --git a/arch/arm64/kvm/vgic/vgic.h b/arch/arm64/kvm/vgic/vgic.h index 4349084cb9a6..1384a04c0784 100644 --- a/arch/arm64/kvm/vgic/vgic.h +++ b/arch/arm64/kvm/vgic/vgic.h @@ -64,6 +64,24 @@ KVM_REG_ARM_VGIC_SYSREG_CRM_MASK | \ KVM_REG_ARM_VGIC_SYSREG_OP2_MASK) +#define KVM_ICC_SRE_EL2 (ICC_SRE_EL2_ENABLE | ICC_SRE_EL2_SRE | \ + ICC_SRE_EL1_DIB | ICC_SRE_EL1_DFB) +#define KVM_ICH_VTR_EL2_RES0 (ICH_VTR_EL2_DVIM | \ + ICH_VTR_EL2_A3V | \ + ICH_VTR_EL2_IDbits) +#define KVM_ICH_VTR_EL2_RES1 ICH_VTR_EL2_nV4 + +static inline u64 kvm_get_guest_vtr_el2(void) +{ + u64 vtr; + + vtr = kvm_vgic_global_state.ich_vtr_el2; + vtr &= ~KVM_ICH_VTR_EL2_RES0; + vtr |= KVM_ICH_VTR_EL2_RES1; + + return vtr; +} + /* * As per Documentation/virt/kvm/devices/arm-vgic-its.rst, * below macros are defined for ITS table entry encoding. @@ -297,6 +315,7 @@ int vgic_v3_redist_uaccess(struct kvm_vcpu *vcpu, bool is_write, int vgic_v3_cpu_sysregs_uaccess(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr, bool is_write); int vgic_v3_has_cpu_sysregs_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr); +const struct sys_reg_desc *vgic_v3_get_sysreg_table(unsigned int *sz); int vgic_v3_line_level_info_uaccess(struct kvm_vcpu *vcpu, bool is_write, u32 intid, u32 *val); int kvm_register_vgic_device(unsigned long type); @@ -308,6 +327,8 @@ int vgic_init(struct kvm *kvm); void vgic_debug_init(struct kvm *kvm); void vgic_debug_destroy(struct kvm *kvm); +int vgic_v5_probe(const struct gic_kvm_info *info); + static inline int vgic_v3_max_apr_idx(struct kvm_vcpu *vcpu) { struct vgic_cpu *cpu_if = &vcpu->arch.vgic_cpu; @@ -369,7 +390,23 @@ void vgic_its_invalidate_all_caches(struct kvm *kvm); int vgic_its_inv_lpi(struct kvm *kvm, struct vgic_irq *irq); int vgic_its_invall(struct kvm_vcpu *vcpu); +bool system_supports_direct_sgis(void); bool vgic_supports_direct_msis(struct kvm *kvm); +bool vgic_supports_direct_sgis(struct kvm *kvm); + +static inline bool vgic_supports_direct_irqs(struct kvm *kvm) +{ + /* + * Deliberately conflate vLPI and vSGI support on GICv4.1 hardware, + * indirectly allowing userspace to control whether or not vPEs are + * allocated for the VM. + */ + if (system_supports_direct_sgis()) + return vgic_supports_direct_sgis(kvm); + + return vgic_supports_direct_msis(kvm); +} + int vgic_v4_init(struct kvm *kvm); void vgic_v4_teardown(struct kvm *kvm); void vgic_v4_configure_vsgis(struct kvm *kvm); @@ -389,6 +426,17 @@ void vgic_v3_put_nested(struct kvm_vcpu *vcpu); void vgic_v3_handle_nested_maint_irq(struct kvm_vcpu *vcpu); void vgic_v3_nested_update_mi(struct kvm_vcpu *vcpu); +static inline bool vgic_is_v3_compat(struct kvm *kvm) +{ + return cpus_have_final_cap(ARM64_HAS_GICV5_CPUIF) && + kvm_vgic_global_state.has_gcie_v3_compat; +} + +static inline bool vgic_is_v3(struct kvm *kvm) +{ + return kvm_vgic_global_state.type == VGIC_V3 || vgic_is_v3_compat(kvm); +} + int vgic_its_debug_init(struct kvm_device *dev); void vgic_its_debug_destroy(struct kvm_device *dev); |