diff options
Diffstat (limited to 'arch/arm64/kvm/nested.c')
-rw-r--r-- | arch/arm64/kvm/nested.c | 132 |
1 files changed, 104 insertions, 28 deletions
diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c index 153b3e11b115..7a045cad6bdf 100644 --- a/arch/arm64/kvm/nested.c +++ b/arch/arm64/kvm/nested.c @@ -349,7 +349,7 @@ static void vtcr_to_walk_info(u64 vtcr, struct s2_walk_info *wi) wi->sl = FIELD_GET(VTCR_EL2_SL0_MASK, vtcr); /* Global limit for now, should eventually be per-VM */ wi->max_oa_bits = min(get_kvm_ipa_limit(), - ps_to_output_size(FIELD_GET(VTCR_EL2_PS_MASK, vtcr))); + ps_to_output_size(FIELD_GET(VTCR_EL2_PS_MASK, vtcr), false)); } int kvm_walk_nested_s2(struct kvm_vcpu *vcpu, phys_addr_t gipa, @@ -847,7 +847,7 @@ static void kvm_invalidate_vncr_ipa(struct kvm *kvm, u64 start, u64 end) ipa_size = ttl_to_size(pgshift_level_to_ttl(vt->wi.pgshift, vt->wr.level)); - ipa_start = vt->wr.pa & (ipa_size - 1); + ipa_start = vt->wr.pa & ~(ipa_size - 1); ipa_end = ipa_start + ipa_size; if (ipa_end <= start || ipa_start >= end) @@ -887,7 +887,7 @@ static void invalidate_vncr_va(struct kvm *kvm, va_size = ttl_to_size(pgshift_level_to_ttl(vt->wi.pgshift, vt->wr.level)); - va_start = vt->gva & (va_size - 1); + va_start = vt->gva & ~(va_size - 1); va_end = va_start + va_size; switch (scope->type) { @@ -1172,8 +1172,9 @@ static u64 read_vncr_el2(struct kvm_vcpu *vcpu) return (u64)sign_extend64(__vcpu_sys_reg(vcpu, VNCR_EL2), 48); } -static int kvm_translate_vncr(struct kvm_vcpu *vcpu) +static int kvm_translate_vncr(struct kvm_vcpu *vcpu, bool *is_gmem) { + struct kvm_memory_slot *memslot; bool write_fault, writable; unsigned long mmu_seq; struct vncr_tlb *vt; @@ -1216,10 +1217,25 @@ static int kvm_translate_vncr(struct kvm_vcpu *vcpu) smp_rmb(); gfn = vt->wr.pa >> PAGE_SHIFT; - pfn = kvm_faultin_pfn(vcpu, gfn, write_fault, &writable, &page); - if (is_error_noslot_pfn(pfn) || (write_fault && !writable)) + memslot = gfn_to_memslot(vcpu->kvm, gfn); + if (!memslot) return -EFAULT; + *is_gmem = kvm_slot_has_gmem(memslot); + if (!*is_gmem) { + pfn = __kvm_faultin_pfn(memslot, gfn, write_fault ? FOLL_WRITE : 0, + &writable, &page); + if (is_error_noslot_pfn(pfn) || (write_fault && !writable)) + return -EFAULT; + } else { + ret = kvm_gmem_get_pfn(vcpu->kvm, memslot, gfn, &pfn, &page, NULL); + if (ret) { + kvm_prepare_memory_fault_exit(vcpu, vt->wr.pa, PAGE_SIZE, + write_fault, false, false); + return ret; + } + } + scoped_guard(write_lock, &vcpu->kvm->mmu_lock) { if (mmu_invalidate_retry(vcpu->kvm, mmu_seq)) return -EAGAIN; @@ -1276,7 +1292,7 @@ static bool kvm_vncr_tlb_lookup(struct kvm_vcpu *vcpu) !(tcr & TCR_ASID16)) asid &= GENMASK(7, 0); - return asid != vt->wr.asid; + return asid == vt->wr.asid; } return true; @@ -1287,28 +1303,44 @@ int kvm_handle_vncr_abort(struct kvm_vcpu *vcpu) struct vncr_tlb *vt = vcpu->arch.vncr_tlb; u64 esr = kvm_vcpu_get_esr(vcpu); - BUG_ON(!(esr & ESR_ELx_VNCR_SHIFT)); + WARN_ON_ONCE(!(esr & ESR_ELx_VNCR)); + + if (kvm_vcpu_abt_issea(vcpu)) + return kvm_handle_guest_sea(vcpu); if (esr_fsc_is_permission_fault(esr)) { inject_vncr_perm(vcpu); } else if (esr_fsc_is_translation_fault(esr)) { - bool valid; + bool valid, is_gmem = false; int ret; scoped_guard(read_lock, &vcpu->kvm->mmu_lock) valid = kvm_vncr_tlb_lookup(vcpu); if (!valid) - ret = kvm_translate_vncr(vcpu); + ret = kvm_translate_vncr(vcpu, &is_gmem); else ret = -EPERM; switch (ret) { case -EAGAIN: - case -ENOMEM: /* Let's try again... */ break; + case -ENOMEM: + /* + * For guest_memfd, this indicates that it failed to + * create a folio to back the memory. Inform userspace. + */ + if (is_gmem) + return 0; + /* Otherwise, let's try again... */ + break; case -EFAULT: + case -EIO: + case -EHWPOISON: + if (is_gmem) + return 0; + fallthrough; case -EINVAL: case -ENOENT: case -EACCES: @@ -1459,9 +1491,16 @@ u64 limit_nv_id_reg(struct kvm *kvm, u32 reg, u64 val) case SYS_ID_AA64PFR1_EL1: /* Only support BTI, SSBS, CSV2_frac */ - val &= (ID_AA64PFR1_EL1_BT | - ID_AA64PFR1_EL1_SSBS | - ID_AA64PFR1_EL1_CSV2_frac); + val &= ~(ID_AA64PFR1_EL1_PFAR | + ID_AA64PFR1_EL1_MTEX | + ID_AA64PFR1_EL1_THE | + ID_AA64PFR1_EL1_GCS | + ID_AA64PFR1_EL1_MTE_frac | + ID_AA64PFR1_EL1_NMI | + ID_AA64PFR1_EL1_SME | + ID_AA64PFR1_EL1_RES0 | + ID_AA64PFR1_EL1_MPAM_frac | + ID_AA64PFR1_EL1_MTE); break; case SYS_ID_AA64MMFR0_EL1: @@ -1514,12 +1553,11 @@ u64 limit_nv_id_reg(struct kvm *kvm, u32 reg, u64 val) break; case SYS_ID_AA64MMFR1_EL1: - val &= (ID_AA64MMFR1_EL1_HCX | - ID_AA64MMFR1_EL1_PAN | - ID_AA64MMFR1_EL1_LO | - ID_AA64MMFR1_EL1_HPDS | - ID_AA64MMFR1_EL1_VH | - ID_AA64MMFR1_EL1_VMIDBits); + val &= ~(ID_AA64MMFR1_EL1_CMOW | + ID_AA64MMFR1_EL1_nTLBPA | + ID_AA64MMFR1_EL1_ETS | + ID_AA64MMFR1_EL1_XNX | + ID_AA64MMFR1_EL1_HAFDBS); /* FEAT_E2H0 implies no VHE */ if (test_bit(KVM_ARM_VCPU_HAS_EL2_E2H0, kvm->arch.vcpu_features)) val &= ~ID_AA64MMFR1_EL1_VH; @@ -1561,14 +1599,22 @@ u64 limit_nv_id_reg(struct kvm *kvm, u32 reg, u64 val) case SYS_ID_AA64DFR0_EL1: /* Only limited support for PMU, Debug, BPs, WPs, and HPMN0 */ - val &= (ID_AA64DFR0_EL1_PMUVer | - ID_AA64DFR0_EL1_WRPs | - ID_AA64DFR0_EL1_BRPs | - ID_AA64DFR0_EL1_DebugVer| - ID_AA64DFR0_EL1_HPMN0); - - /* Cap Debug to ARMv8.1 */ - val = ID_REG_LIMIT_FIELD_ENUM(val, ID_AA64DFR0_EL1, DebugVer, VHE); + val &= ~(ID_AA64DFR0_EL1_ExtTrcBuff | + ID_AA64DFR0_EL1_BRBE | + ID_AA64DFR0_EL1_MTPMU | + ID_AA64DFR0_EL1_TraceBuffer | + ID_AA64DFR0_EL1_TraceFilt | + ID_AA64DFR0_EL1_PMSVer | + ID_AA64DFR0_EL1_CTX_CMPs | + ID_AA64DFR0_EL1_SEBEP | + ID_AA64DFR0_EL1_PMSS | + ID_AA64DFR0_EL1_TraceVer); + + /* + * FEAT_Debugv8p9 requires support for extended breakpoints / + * watchpoints. + */ + val = ID_REG_LIMIT_FIELD_ENUM(val, ID_AA64DFR0_EL1, DebugVer, V8P8); break; } @@ -1793,3 +1839,33 @@ void kvm_nested_sync_hwstate(struct kvm_vcpu *vcpu) if (unlikely(vcpu_test_and_clear_flag(vcpu, NESTED_SERROR_PENDING))) kvm_inject_serror_esr(vcpu, vcpu_get_vsesr(vcpu)); } + +/* + * KVM unconditionally sets most of these traps anyway but use an allowlist + * to document the guest hypervisor traps that may take precedence and guard + * against future changes to the non-nested trap configuration. + */ +#define NV_MDCR_GUEST_INCLUDE (MDCR_EL2_TDE | \ + MDCR_EL2_TDA | \ + MDCR_EL2_TDRA | \ + MDCR_EL2_TTRF | \ + MDCR_EL2_TPMS | \ + MDCR_EL2_TPM | \ + MDCR_EL2_TPMCR | \ + MDCR_EL2_TDCC | \ + MDCR_EL2_TDOSA) + +void kvm_nested_setup_mdcr_el2(struct kvm_vcpu *vcpu) +{ + u64 guest_mdcr = __vcpu_sys_reg(vcpu, MDCR_EL2); + + /* + * In yet another example where FEAT_NV2 is fscking broken, accesses + * to MDSCR_EL1 are redirected to the VNCR despite having an effect + * at EL2. Use a big hammer to apply sanity. + */ + if (is_hyp_ctxt(vcpu)) + vcpu->arch.mdcr_el2 |= MDCR_EL2_TDA; + else + vcpu->arch.mdcr_el2 |= (guest_mdcr & NV_MDCR_GUEST_INCLUDE); +} |