diff options
39 files changed, 643 insertions, 370 deletions
diff --git a/Documentation/virt/kvm/devices/arm-vgic-v3.rst b/Documentation/virt/kvm/devices/arm-vgic-v3.rst index 51e5e5762571..5817edb4e046 100644 --- a/Documentation/virt/kvm/devices/arm-vgic-v3.rst +++ b/Documentation/virt/kvm/devices/arm-vgic-v3.rst @@ -59,6 +59,13 @@ Groups: It is invalid to mix calls with KVM_VGIC_V3_ADDR_TYPE_REDIST and KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION attributes. + Note that to obtain reproducible results (the same VCPU being associated + with the same redistributor across a save/restore operation), VCPU creation + order, redistributor region creation order as well as the respective + interleaves of VCPU and region creation MUST be preserved. Any change in + either ordering may result in a different vcpu_id/redistributor association, + resulting in a VM that will fail to run at restore time. + Errors: ======= ============================================================= diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 3d6725ff0bf6..13c948a0502d 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -54,6 +54,11 @@ void kvm_emulate_nested_eret(struct kvm_vcpu *vcpu); int kvm_inject_nested_sync(struct kvm_vcpu *vcpu, u64 esr_el2); int kvm_inject_nested_irq(struct kvm_vcpu *vcpu); +static inline bool vcpu_has_feature(const struct kvm_vcpu *vcpu, int feature) +{ + return test_bit(feature, vcpu->kvm->arch.vcpu_features); +} + #if defined(__KVM_VHE_HYPERVISOR__) || defined(__KVM_NVHE_HYPERVISOR__) static __always_inline bool vcpu_el1_is_32bit(struct kvm_vcpu *vcpu) { @@ -62,7 +67,7 @@ static __always_inline bool vcpu_el1_is_32bit(struct kvm_vcpu *vcpu) #else static __always_inline bool vcpu_el1_is_32bit(struct kvm_vcpu *vcpu) { - return test_bit(KVM_ARM_VCPU_EL1_32BIT, vcpu->arch.features); + return vcpu_has_feature(vcpu, KVM_ARM_VCPU_EL1_32BIT); } #endif @@ -465,7 +470,7 @@ static inline bool kvm_is_write_fault(struct kvm_vcpu *vcpu) static inline unsigned long kvm_vcpu_get_mpidr_aff(struct kvm_vcpu *vcpu) { - return vcpu_read_sys_reg(vcpu, MPIDR_EL1) & MPIDR_HWID_BITMASK; + return __vcpu_sys_reg(vcpu, MPIDR_EL1) & MPIDR_HWID_BITMASK; } static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu) @@ -565,12 +570,6 @@ static __always_inline void kvm_incr_pc(struct kvm_vcpu *vcpu) vcpu_set_flag((v), e); \ } while (0) - -static inline bool vcpu_has_feature(struct kvm_vcpu *vcpu, int feature) -{ - return test_bit(feature, vcpu->arch.features); -} - static __always_inline void kvm_write_cptr_el2(u64 val) { if (has_vhe() || has_hvhe()) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 5e3f778f81db..846a7706e925 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -78,7 +78,7 @@ extern unsigned int __ro_after_init kvm_sve_max_vl; int __init kvm_arm_init_sve(void); u32 __attribute_const__ kvm_target_cpu(void); -int kvm_reset_vcpu(struct kvm_vcpu *vcpu); +void kvm_reset_vcpu(struct kvm_vcpu *vcpu); void kvm_arm_vcpu_destroy(struct kvm_vcpu *vcpu); struct kvm_hyp_memcache { @@ -158,6 +158,16 @@ struct kvm_s2_mmu { phys_addr_t pgd_phys; struct kvm_pgtable *pgt; + /* + * VTCR value used on the host. For a non-NV guest (or a NV + * guest that runs in a context where its own S2 doesn't + * apply), its T0SZ value reflects that of the IPA size. + * + * For a shadow S2 MMU, T0SZ reflects the PARange exposed to + * the guest. + */ + u64 vtcr; + /* The last vcpu id that ran on each physical CPU */ int __percpu *last_vcpu_ran; @@ -202,12 +212,34 @@ struct kvm_protected_vm { struct kvm_hyp_memcache teardown_mc; }; +struct kvm_mpidr_data { + u64 mpidr_mask; + DECLARE_FLEX_ARRAY(u16, cmpidr_to_idx); +}; + +static inline u16 kvm_mpidr_index(struct kvm_mpidr_data *data, u64 mpidr) +{ + unsigned long mask = data->mpidr_mask; + u64 aff = mpidr & MPIDR_HWID_BITMASK; + int nbits, bit, bit_idx = 0; + u16 index = 0; + + /* + * If this looks like RISC-V's BEXT or x86's PEXT + * instructions, it isn't by accident. + */ + nbits = fls(mask); + for_each_set_bit(bit, &mask, nbits) { + index |= (aff & BIT(bit)) >> (bit - bit_idx); + bit_idx++; + } + + return index; +} + struct kvm_arch { struct kvm_s2_mmu mmu; - /* VTCR_EL2 value for this VM */ - u64 vtcr; - /* Interrupt controller */ struct vgic_dist vgic; @@ -239,15 +271,16 @@ struct kvm_arch { #define KVM_ARCH_FLAG_VM_COUNTER_OFFSET 5 /* Timer PPIs made immutable */ #define KVM_ARCH_FLAG_TIMER_PPIS_IMMUTABLE 6 - /* SMCCC filter initialized for the VM */ -#define KVM_ARCH_FLAG_SMCCC_FILTER_CONFIGURED 7 /* Initial ID reg values loaded */ -#define KVM_ARCH_FLAG_ID_REGS_INITIALIZED 8 +#define KVM_ARCH_FLAG_ID_REGS_INITIALIZED 7 unsigned long flags; /* VM-wide vCPU feature set */ DECLARE_BITMAP(vcpu_features, KVM_VCPU_MAX_FEATURES); + /* MPIDR to vcpu index mapping, optional */ + struct kvm_mpidr_data *mpidr_data; + /* * VM-wide PMU filter, implemented as a bitmap and big enough for * up to 2^10 events (ARMv8.0) or 2^16 events (ARMv8.1+). @@ -574,9 +607,6 @@ struct kvm_vcpu_arch { /* Cache some mmu pages needed inside spinlock regions */ struct kvm_mmu_memory_cache mmu_page_cache; - /* feature flags */ - DECLARE_BITMAP(features, KVM_VCPU_MAX_FEATURES); - /* Virtual SError ESR to restore when HCR_EL2.VSE is set */ u64 vsesr_el2; @@ -1025,7 +1055,7 @@ int kvm_arm_pvtime_has_attr(struct kvm_vcpu *vcpu, extern unsigned int __ro_after_init kvm_arm_vmid_bits; int __init kvm_arm_vmid_alloc_init(void); void __init kvm_arm_vmid_alloc_free(void); -void kvm_arm_vmid_update(struct kvm_vmid *kvm_vmid); +bool kvm_arm_vmid_update(struct kvm_vmid *kvm_vmid); void kvm_arm_vmid_clear_active(void); static inline void kvm_arm_pvtime_vcpu_init(struct kvm_vcpu_arch *vcpu_arch) @@ -1111,8 +1141,8 @@ static inline bool kvm_set_pmuserenr(u64 val) } #endif -void kvm_vcpu_load_sysregs_vhe(struct kvm_vcpu *vcpu); -void kvm_vcpu_put_sysregs_vhe(struct kvm_vcpu *vcpu); +void kvm_vcpu_load_vhe(struct kvm_vcpu *vcpu); +void kvm_vcpu_put_vhe(struct kvm_vcpu *vcpu); int __init kvm_set_ipa_limit(void); diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h index 66efd67ea7e8..145ce73fc16c 100644 --- a/arch/arm64/include/asm/kvm_hyp.h +++ b/arch/arm64/include/asm/kvm_hyp.h @@ -93,6 +93,8 @@ void __timer_disable_traps(struct kvm_vcpu *vcpu); void __sysreg_save_state_nvhe(struct kvm_cpu_context *ctxt); void __sysreg_restore_state_nvhe(struct kvm_cpu_context *ctxt); #else +void __vcpu_load_switch_sysregs(struct kvm_vcpu *vcpu); +void __vcpu_put_switch_sysregs(struct kvm_vcpu *vcpu); void sysreg_save_host_state_vhe(struct kvm_cpu_context *ctxt); void sysreg_restore_host_state_vhe(struct kvm_cpu_context *ctxt); void sysreg_save_guest_state_vhe(struct kvm_cpu_context *ctxt); @@ -111,11 +113,6 @@ void __fpsimd_save_state(struct user_fpsimd_state *fp_regs); void __fpsimd_restore_state(struct user_fpsimd_state *fp_regs); void __sve_restore_state(void *sve_pffr, u32 *fpsr); -#ifndef __KVM_NVHE_HYPERVISOR__ -void activate_traps_vhe_load(struct kvm_vcpu *vcpu); -void deactivate_traps_vhe_put(struct kvm_vcpu *vcpu); -#endif - u64 __guest_enter(struct kvm_vcpu *vcpu); bool kvm_host_psci_handler(struct kvm_cpu_context *host_ctxt, u32 func_id); diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 96a80e8f6226..d86c8661200a 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -150,9 +150,9 @@ static __always_inline unsigned long __kern_hyp_va(unsigned long v) */ #define KVM_PHYS_SHIFT (40) -#define kvm_phys_shift(kvm) VTCR_EL2_IPA(kvm->arch.vtcr) -#define kvm_phys_size(kvm) (_AC(1, ULL) << kvm_phys_shift(kvm)) -#define kvm_phys_mask(kvm) (kvm_phys_size(kvm) - _AC(1, ULL)) +#define kvm_phys_shift(mmu) VTCR_EL2_IPA((mmu)->vtcr) +#define kvm_phys_size(mmu) (_AC(1, ULL) << kvm_phys_shift(mmu)) +#define kvm_phys_mask(mmu) (kvm_phys_size(mmu) - _AC(1, ULL)) #include <asm/kvm_pgtable.h> #include <asm/stage2_pgtable.h> @@ -224,16 +224,41 @@ static inline void __clean_dcache_guest_page(void *va, size_t size) kvm_flush_dcache_to_poc(va, size); } +static inline size_t __invalidate_icache_max_range(void) +{ + u8 iminline; + u64 ctr; + + asm volatile(ALTERNATIVE_CB("movz %0, #0\n" + "movk %0, #0, lsl #16\n" + "movk %0, #0, lsl #32\n" + "movk %0, #0, lsl #48\n", + ARM64_ALWAYS_SYSTEM, + kvm_compute_final_ctr_el0) + : "=r" (ctr)); + + iminline = SYS_FIELD_GET(CTR_EL0, IminLine, ctr) + 2; + return MAX_DVM_OPS << iminline; +} + static inline void __invalidate_icache_guest_page(void *va, size_t size) { - if (icache_is_aliasing()) { - /* any kind of VIPT cache */ + /* + * VPIPT I-cache maintenance must be done from EL2. See comment in the + * nVHE flavor of __kvm_tlb_flush_vmid_ipa(). + */ + if (icache_is_vpipt() && read_sysreg(CurrentEL) != CurrentEL_EL2) + return; + + /* + * Blow the whole I-cache if it is aliasing (i.e. VIPT) or the + * invalidation range exceeds our arbitrary limit on invadations by + * cache line. + */ + if (icache_is_aliasing() || size > __invalidate_icache_max_range()) icache_inval_all_pou(); - } else if (read_sysreg(CurrentEL) != CurrentEL_EL1 || - !icache_is_vpipt()) { - /* PIPT or VPIPT at EL2 (see comment in __kvm_tlb_flush_vmid_ipa) */ + else icache_inval_pou((unsigned long)va, (unsigned long)va + size); - } } void kvm_set_way_flush(struct kvm_vcpu *vcpu); @@ -299,7 +324,7 @@ static __always_inline u64 kvm_get_vttbr(struct kvm_s2_mmu *mmu) static __always_inline void __load_stage2(struct kvm_s2_mmu *mmu, struct kvm_arch *arch) { - write_sysreg(arch->vtcr, vtcr_el2); + write_sysreg(mmu->vtcr, vtcr_el2); write_sysreg(kvm_get_vttbr(mmu), vttbr_el2); /* diff --git a/arch/arm64/include/asm/kvm_nested.h b/arch/arm64/include/asm/kvm_nested.h index fa23cc9c2adc..6cec8e9c6c91 100644 --- a/arch/arm64/include/asm/kvm_nested.h +++ b/arch/arm64/include/asm/kvm_nested.h @@ -2,13 +2,14 @@ #ifndef __ARM64_KVM_NESTED_H #define __ARM64_KVM_NESTED_H +#include <asm/kvm_emulate.h> #include <linux/kvm_host.h> static inline bool vcpu_has_nv(const struct kvm_vcpu *vcpu) { return (!__is_defined(__KVM_NVHE_HYPERVISOR__) && cpus_have_final_cap(ARM64_HAS_NESTED_VIRT) && - test_bit(KVM_ARM_VCPU_HAS_EL2, vcpu->arch.features)); + vcpu_has_feature(vcpu, KVM_ARM_VCPU_HAS_EL2)); } extern bool __check_nv_sr_forward(struct kvm_vcpu *vcpu); diff --git a/arch/arm64/include/asm/stage2_pgtable.h b/arch/arm64/include/asm/stage2_pgtable.h index c8dca8ae359c..23d27623e478 100644 --- a/arch/arm64/include/asm/stage2_pgtable.h +++ b/arch/arm64/include/asm/stage2_pgtable.h @@ -21,13 +21,13 @@ * (IPA_SHIFT - 4). */ #define stage2_pgtable_levels(ipa) ARM64_HW_PGTABLE_LEVELS((ipa) - 4) -#define kvm_stage2_levels(kvm) VTCR_EL2_LVLS(kvm->arch.vtcr) +#define kvm_stage2_levels(mmu) VTCR_EL2_LVLS((mmu)->vtcr) /* * kvm_mmmu_cache_min_pages() is the number of pages required to install * a stage-2 translation. We pre-allocate the entry level page table at * the VM creation. */ -#define kvm_mmu_cache_min_pages(kvm) (kvm_stage2_levels(kvm) - 1) +#define kvm_mmu_cache_min_pages(mmu) (kvm_stage2_levels(mmu) - 1) #endif /* __ARM64_S2_PGTABLE_H_ */ diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 38296579a4fd..5e65f51c10d2 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -270,6 +270,8 @@ /* ETM */ #define SYS_TRCOSLAR sys_reg(2, 1, 1, 0, 4) +#define SYS_BRBCR_EL2 sys_reg(2, 4, 9, 0, 0) + #define SYS_MIDR_EL1 sys_reg(3, 0, 0, 0, 0) #define SYS_MPIDR_EL1 sys_reg(3, 0, 0, 0, 5) #define SYS_REVIDR_EL1 sys_reg(3, 0, 0, 0, 6) @@ -484,6 +486,7 @@ #define SYS_SCTLR_EL2 sys_reg(3, 4, 1, 0, 0) #define SYS_ACTLR_EL2 sys_reg(3, 4, 1, 0, 1) +#define SYS_SCTLR2_EL2 sys_reg(3, 4, 1, 0, 3) #define SYS_HCR_EL2 sys_reg(3, 4, 1, 1, 0) #define SYS_MDCR_EL2 sys_reg(3, 4, 1, 1, 1) #define SYS_CPTR_EL2 sys_reg(3, 4, 1, 1, 2) @@ -497,10 +500,15 @@ #define SYS_VTCR_EL2 sys_reg(3, 4, 2, 1, 2) #define SYS_TRFCR_EL2 sys_reg(3, 4, 1, 2, 1) +#define SYS_VNCR_EL2 sys_reg(3, 4, 2, 2, 0) #define SYS_HAFGRTR_EL2 sys_reg(3, 4, 3, 1, 6) #define SYS_SPSR_EL2 sys_reg(3, 4, 4, 0, 0) #define SYS_ELR_EL2 sys_reg(3, 4, 4, 0, 1) #define SYS_SP_EL1 sys_reg(3, 4, 4, 1, 0) +#define SYS_SPSR_irq sys_reg(3, 4, 4, 3, 0) +#define SYS_SPSR_abt sys_reg(3, 4, 4, 3, 1) +#define SYS_SPSR_und sys_reg(3, 4, 4, 3, 2) +#define SYS_SPSR_fiq sys_reg(3, 4, 4, 3, 3) #define SYS_IFSR32_EL2 sys_reg(3, 4, 5, 0, 1) #define SYS_AFSR0_EL2 sys_reg(3, 4, 5, 1, 0) #define SYS_AFSR1_EL2 sys_reg(3, 4, 5, 1, 1) @@ -514,6 +522,18 @@ #define SYS_MAIR_EL2 sys_reg(3, 4, 10, 2, 0) #define SYS_AMAIR_EL2 sys_reg(3, 4, 10, 3, 0) +#define SYS_MPAMHCR_EL2 sys_reg(3, 4, 10, 4, 0) +#define SYS_MPAMVPMV_EL2 sys_reg(3, 4, 10, 4, 1) +#define SYS_MPAM2_EL2 sys_reg(3, 4, 10, 5, 0) +#define __SYS__MPAMVPMx_EL2(x) sys_reg(3, 4, 10, 6, x) +#define SYS_MPAMVPM0_EL2 __SYS__MPAMVPMx_EL2(0) +#define SYS_MPAMVPM1_EL2 __SYS__MPAMVPMx_EL2(1) +#define SYS_MPAMVPM2_EL2 __SYS__MPAMVPMx_EL2(2) +#define SYS_MPAMVPM3_EL2 __SYS__MPAMVPMx_EL2(3) +#define SYS_MPAMVPM4_EL2 __SYS__MPAMVPMx_EL2(4) +#define SYS_MPAMVPM5_EL2 __SYS__MPAMVPMx_EL2(5) +#define SYS_MPAMVPM6_EL2 __SYS__MPAMVPMx_EL2(6) +#define SYS_MPAMVPM7_EL2 __SYS__MPAMVPMx_EL2(7) #define SYS_VBAR_EL2 sys_reg(3, 4, 12, 0, 0) #define SYS_RVBAR_EL2 sys_reg(3, 4, 12, 0, 1) @@ -562,24 +582,49 @@ #define SYS_CONTEXTIDR_EL2 sys_reg(3, 4, 13, 0, 1) #define SYS_TPIDR_EL2 sys_reg(3, 4, 13, 0, 2) +#define SYS_SCXTNUM_EL2 sys_reg(3, 4, 13, 0, 7) + +#define __AMEV_op2(m) (m & 0x7) +#define __AMEV_CRm(n, m) (n | ((m & 0x8) >> 3)) +#define __SYS__AMEVCNTVOFF0n_EL2(m) sys_reg(3, 4, 13, __AMEV_CRm(0x8, m), __AMEV_op2(m)) +#define SYS_AMEVCNTVOFF0n_EL2(m) __SYS__AMEVCNTVOFF0n_EL2(m) +#define __SYS__AMEVCNTVOFF1n_EL2(m) sys_reg(3, 4, 13, __AMEV_CRm(0xA, m), __AMEV_op2(m)) +#define SYS_AMEVCNTVOFF1n_EL2(m) __SYS__AMEVCNTVOFF1n_EL2(m) #define SYS_CNTVOFF_EL2 sys_reg(3, 4, 14, 0, 3) #define SYS_CNTHCTL_EL2 sys_reg(3, 4, 14, 1, 0) +#define SYS_CNTHP_TVAL_EL2 sys_reg(3, 4, 14, 2, 0) +#define SYS_CNTHP_CTL_EL2 sys_reg(3, 4, 14, 2, 1) +#define SYS_CNTHP_CVAL_EL2 sys_reg(3, 4, 14, 2, 2) +#define SYS_CNTHV_TVAL_EL2 sys_reg(3, 4, 14, 3, 0) +#define SYS_CNTHV_CTL_EL2 sys_reg(3, 4, 14, 3, 1) +#define SYS_CNTHV_CVAL_EL2 sys_reg(3, 4, 14, 3, 2) /* VHE encodings for architectural EL0/1 system registers */ +#define SYS_BRBCR_EL12 sys_reg(2, 5, 9, 0, 0) #define SYS_SCTLR_EL12 sys_reg(3, 5, 1, 0, 0) +#define SYS_CPACR_EL12 sys_reg(3, 5, 1, 0, 2) +#define SYS_SCTLR2_EL12 sys_reg(3, 5, 1, 0, 3) +#define SYS_ZCR_EL12 sys_reg(3, 5, 1, 2, 0) +#define SYS_TRFCR_EL12 sys_reg(3, 5, 1, 2, 1) +#define SYS_SMCR_EL12 sys_reg(3, 5, 1, 2, 6) #define SYS_TTBR0_EL12 sys_reg(3, 5, 2, 0, 0) #define SYS_TTBR1_EL12 sys_reg(3, 5, 2, 0, 1) #define SYS_TCR_EL12 sys_reg(3, 5, 2, 0, 2) +#define SYS_TCR2_EL12 sys_reg(3, 5, 2, 0, 3) #define SYS_SPSR_EL12 sys_reg(3, 5, 4, 0, 0) #define SYS_ELR_EL12 sys_reg(3, 5, 4, 0, 1) #define SYS_AFSR0_EL12 sys_reg(3, 5, 5, 1, 0) #define SYS_AFSR1_EL12 sys_reg(3, 5, 5, 1, 1) #define SYS_ESR_EL12 sys_reg(3, 5, 5, 2, 0) #define SYS_TFSR_EL12 sys_reg(3, 5, 5, 6, 0) +#define SYS_FAR_EL12 sys_reg(3, 5, 6, 0, 0) +#define SYS_PMSCR_EL12 sys_reg(3, 5, 9, 9, 0) #define SYS_MAIR_EL12 sys_reg(3, 5, 10, 2, 0) #define SYS_AMAIR_EL12 sys_reg(3, 5, 10, 3, 0) #define SYS_VBAR_EL12 sys_reg(3, 5, 12, 0, 0) +#define SYS_CONTEXTIDR_EL12 sys_reg(3, 5, 13, 0, 1) +#define SYS_SCXTNUM_EL12 sys_reg(3, 5, 13, 0, 7) #define SYS_CNTKCTL_EL12 sys_reg(3, 5, 14, 1, 0) #define SYS_CNTP_TVAL_EL02 sys_reg(3, 5, 14, 2, 0) #define SYS_CNTP_CTL_EL02 sys_reg(3, 5, 14, 2, 1) diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h index b149cf9f91bc..3431d37e5054 100644 --- a/arch/arm64/include/asm/tlbflush.h +++ b/arch/arm64/include/asm/tlbflush.h @@ -333,7 +333,7 @@ static inline void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch) * This is meant to avoid soft lock-ups on large TLB flushing ranges and not * necessarily a performance improvement. */ -#define MAX_TLBI_OPS PTRS_PER_PTE +#define MAX_DVM_OPS PTRS_PER_PTE /* * __flush_tlb_range_op - Perform TLBI operation upon a range @@ -413,12 +413,12 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma, /* * When not uses TLB range ops, we can handle up to - * (MAX_TLBI_OPS - 1) pages; + * (MAX_DVM_OPS - 1) pages; * When uses TLB range ops, we can handle up to * (MAX_TLBI_RANGE_PAGES - 1) pages. */ if ((!system_supports_tlb_range() && - (end - start) >= (MAX_TLBI_OPS * stride)) || + (end - start) >= (MAX_DVM_OPS * stride)) || pages >= MAX_TLBI_RANGE_PAGES) { flush_tlb_mm(vma->vm_mm); return; @@ -451,7 +451,7 @@ static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end { unsigned long addr; - if ((end - start) > (MAX_TLBI_OPS * PAGE_SIZE)) { + if ((end - start) > (MAX_DVM_OPS * PAGE_SIZE)) { flush_tlb_all(); return; } diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c index 6dcdae4d38cb..6615a08382f1 100644 --- a/arch/arm64/kvm/arch_timer.c +++ b/arch/arm64/kvm/arch_timer.c @@ -458,7 +458,7 @@ static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level, timer_ctx->irq.level); if (!userspace_irqchip(vcpu->kvm)) { - ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id, + ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu, timer_irq(timer_ctx), timer_ctx->irq.level, timer_ctx); @@ -943,7 +943,7 @@ void kvm_timer_sync_user(struct kvm_vcpu *vcpu) unmask_vtimer_irq_user(vcpu); } -int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu) +void kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu) { struct arch_timer_cpu *timer = vcpu_timer(vcpu); struct timer_map map; @@ -987,8 +987,6 @@ int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu) soft_timer_cancel(&map.emul_vtimer->hrtimer); if (map.emul_ptimer) soft_timer_cancel(&map.emul_ptimer->hrtimer); - - return 0; } static void timer_context_init(struct kvm_vcpu *vcpu, int timerid) diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index f48430ed5b8b..c6cad400490f 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -205,6 +205,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm) if (is_protected_kvm_enabled()) pkvm_destroy_hyp_vm(kvm); + kfree(kvm->arch.mpidr_data); kvm_destroy_vcpus(kvm); kvm_unshare_hyp(kvm, kvm + 1); @@ -370,7 +371,6 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) /* Force users to call KVM_ARM_VCPU_INIT */ vcpu_clear_flag(vcpu, VCPU_INITIALIZED); - bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES); vcpu->arch.mmu_page_cache.gfp_zero = __GFP_ZERO; @@ -441,9 +441,9 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) * We might get preempted before the vCPU actually runs, but * over-invalidation doesn't affect correctness. */ - if (*last_ran != vcpu->vcpu_id) { + if (*last_ran != vcpu->vcpu_idx) { kvm_call_hyp(__kvm_flush_cpu_context, mmu); - *last_ran = vcpu->vcpu_id; + *last_ran = vcpu->vcpu_idx; } vcpu->cpu = cpu; @@ -451,7 +451,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) kvm_vgic_load(vcpu); kvm_timer_vcpu_load(vcpu); if (has_vhe()) - kvm_vcpu_load_sysregs_vhe(vcpu); + kvm_vcpu_load_vhe(vcpu); kvm_arch_vcpu_load_fp(vcpu); kvm_vcpu_pmu_restore_guest(vcpu); if (kvm_arm_is_pvtime_enabled(&vcpu->arch)) @@ -475,7 +475,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) kvm_arch_vcpu_put_debug_state_flags(vcpu); kvm_arch_vcpu_put_fp(vcpu); if (has_vhe()) - kvm_vcpu_put_sysregs_vhe(vcpu); + kvm_vcpu_put_vhe(vcpu); kvm_timer_vcpu_put(vcpu); kvm_vgic_put(vcpu); kvm_vcpu_pmu_restore_host(vcpu); @@ -581,6 +581,57 @@ static int kvm_vcpu_initialized(struct kvm_vcpu *vcpu) return vcpu_get_flag(vcpu, VCPU_INITIALIZED); } +static void kvm_init_mpidr_data(struct kvm *kvm) +{ + struct kvm_mpidr_data *data = NULL; + unsigned long c, mask, nr_entries; + u64 aff_set = 0, aff_clr = ~0UL; + struct kvm_vcpu *vcpu; + + mutex_lock(&kvm->arch.config_lock); + + if (kvm->arch.mpidr_data || atomic_read(&kvm->online_vcpus) == 1) + goto out; + + kvm_for_each_vcpu(c, vcpu, kvm) { + u64 aff = kvm_vcpu_get_mpidr_aff(vcpu); + aff_set |= aff; + aff_clr &= aff; + } + + /* + * A significant bit can be either 0 or 1, and will only appear in + * aff_set. Use aff_clr to weed out the useless stuff. + */ + mask = aff_set ^ aff_clr; + nr_entries = BIT_ULL(hweight_long(mask)); + + /* + * Don't let userspace fool us. If we need more than a single page + * to describe the compressed MPIDR array, just fall back to the + * iterative method. Single vcpu VMs do not need this either. + */ + if (struct_size(data, cmpidr_to_idx, nr_entries) <= PAGE_SIZE) + data = kzalloc(struct_size(data, cmpidr_to_idx, nr_entries), + GFP_KERNEL_ACCOUNT); + + if (!data) + goto out; + + data->mpidr_mask = mask; + + kvm_for_each_vcpu(c, vcpu, kvm) { + u64 aff = kvm_vcpu_get_mpidr_aff(vcpu); + u16 index = kvm_mpidr_index(data, aff); + + data->cmpidr_to_idx[index] = c; + } + + kvm->arch.mpidr_data = data; +out: + mutex_unlock(&kvm->arch.config_lock); +} + /* * Handle both the initialisation that is being done when the vcpu is * run for the first time, as well as the updates that must be @@ -604,6 +655,8 @@ int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu) if (likely(vcpu_has_run_once(vcpu))) return 0; + kvm_init_mpidr_data(kvm); + kvm_arm_vcpu_init_debug(vcpu); if (likely(irqchip_in_kernel(kvm))) { @@ -953,7 +1006,10 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) * making a thread's VMID inactive. So we need to call * kvm_arm_vmid_update() in non-premptible context. */ - kvm_arm_vmid_update(&vcpu->arch.hw_mmu->vmid); + if (kvm_arm_vmid_update(&vcpu->arch.hw_mmu->vmid) && + has_vhe()) + __load_stage2(vcpu->arch.hw_mmu, + vcpu->arch.hw_mmu->arch); kvm_pmu_flush_hwstate(vcpu); @@ -1137,27 +1193,23 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level, bool line_status) { u32 irq = irq_level->irq; - unsigned int irq_type, vcpu_idx, irq_num; - int nrcpus = atomic_read(&kvm->online_vcpus); + unsigned int irq_type, vcpu_id, irq_num; struct kvm_vcpu *vcpu = NULL; bool level = irq_level->level; irq_type = (irq >> KVM_ARM_IRQ_TYPE_SHIFT) & KVM_ARM_IRQ_TYPE_MASK; - vcpu_idx = (irq >> KVM_ARM_IRQ_VCPU_SHIFT) & KVM_ARM_IRQ_VCPU_MASK; - vcpu_idx += ((irq >> KVM_ARM_IRQ_VCPU2_SHIFT) & KVM_ARM_IRQ_VCPU2_MASK) * (KVM_ARM_IRQ_VCPU_MASK + 1); + vcpu_id = (irq >> KVM_ARM_IRQ_VCPU_SHIFT) & KVM_ARM_IRQ_VCPU_MASK; + vcpu_id += ((irq >> KVM_ARM_IRQ_VCPU2_SHIFT) & KVM_ARM_IRQ_VCPU2_MASK) * (KVM_ARM_IRQ_VCPU_MASK + 1); irq_num = (irq >> KVM_ARM_IRQ_NUM_SHIFT) & KVM_ARM_IRQ_NUM_MASK; - trace_kvm_irq_line(irq_type, vcpu_idx, irq_num, irq_level->level); + trace_kvm_irq_line(irq_type, vcpu_id, irq_num, irq_level->level); switch (irq_type) { case KVM_ARM_IRQ_TYPE_CPU: if (irqchip_in_kernel(kvm)) return -ENXIO; - if (vcpu_idx >= nrcpus) - return -EINVAL; - - vcpu = kvm_get_vcpu(kvm, vcpu_idx); + vcpu = kvm_get_vcpu_by_id(kvm, vcpu_id); if (!vcpu) return -EINVAL; @@ -1169,17 +1221,14 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level, if (!irqchip_in_kernel(kvm)) return -ENXIO; - if (vcpu_idx >= nrcpus) - return -EINVAL; - - vcpu = kvm_get_vcpu(kvm, vcpu_idx); + vcpu = kvm_get_vcpu_by_id(kvm, vcpu_id); if (!vcpu) return -EINVAL; if (irq_num < VGIC_NR_SGIS || irq_num >= VGIC_NR_PRIVATE_IRQS) return -EINVAL; - return kvm_vgic_inject_irq(kvm, vcpu->vcpu_id, irq_num, level, NULL); + return kvm_vgic_inject_irq(kvm, vcpu, irq_num, level, NULL); case KVM_ARM_IRQ_TYPE_SPI: if (!irqchip_in_kernel(kvm)) return -ENXIO; @@ -1187,12 +1236,36 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level, if (irq_num < VGIC_NR_PRIVATE_IRQS) return -EINVAL; - return kvm_vgic_inject_irq(kvm, 0, irq_num, level, NULL); + return kvm_vgic_inject_irq(kvm, NULL, irq_num, level, NULL); } return -EINVAL; } +static unsigned long system_supported_vcpu_features(void) +{ + unsigned long features = KVM_VCPU_VALID_FEATURES; + + if (!cpus_have_final_cap(ARM64_HAS_32BIT_EL1)) + clear_bit(KVM_ARM_VCPU_EL1_32BIT, &features); + + if (!kvm_arm_support_pmu_v3()) + clear_bit(KVM_ARM_VCPU_PMU_V3, &features); + + if (!system_supports_sve()) + clear_bit(KVM_ARM_VCPU_SVE, &features); + + if (!system_has_full_ptr_auth()) { + clear_bit(KVM_ARM_VCPU_PTRAUTH_ADDRESS, &features); + clear_bit(KVM_ARM_VCPU_PTRAUTH_GENERIC, &features); + } + + if (!cpus_have_final_cap(ARM64_HAS_NESTED_VIRT)) + clear_bit(KVM_ARM_VCPU_HAS_EL2, &features); + + return features; +} + static int kvm_vcpu_init_check_features(struct kvm_vcpu *vcpu, const struct kvm_vcpu_init *init) { @@ -1207,12 +1280,25 @@ static int kvm_vcpu_init_check_features(struct kvm_vcpu *vcpu, return -ENOENT; } - if (!test_bit(KVM_ARM_VCPU_EL1_32BIT, &features)) - return 0; + if (features & ~system_supported_vcpu_features()) + return -EINVAL; + + /* + * For now make sure that both address/generic pointer authentication + * features are requested by the userspace together. + */ + if (test_bit(KVM_ARM_VCPU_PTRAUTH_ADDRESS, &features) != + test_bit(KVM_ARM_VCPU_PTRAUTH_GENERIC, &features)) + return -EINVAL; - if (!cpus_have_const_cap(ARM64_HAS_32BIT_EL1)) + /* Disallow NV+SVE for the time being */ + if (test_bit(KVM_ARM_VCPU_HAS_EL2, &features) && + test_bit(KVM_ARM_VCPU_SVE, &features)) return -EINVAL; + if (!test_bit(KVM_ARM_VCPU_EL1_32BIT, &features)) + return 0; + /* MTE is incompatible with AArch32 */ if (kvm_has_mte(vcpu->kvm)) return -EINVAL; @@ -1229,7 +1315,8 @@ static bool kvm_vcpu_init_changed(struct kvm_vcpu *vcpu, { unsigned long features = init->features[0]; - return !bitmap_equal(vcpu->arch.features, &features, KVM_VCPU_MAX_FEATURES); + return !bitmap_equal(vcpu->kvm->arch.vcpu_features, &features, + KVM_VCPU_MAX_FEATURES); } static int __kvm_vcpu_set_target(struct kvm_vcpu *vcpu, @@ -1242,21 +1329,17 @@ static int __kvm_vcpu_set_target(struct kvm_vcpu *vcpu, mutex_lock(&kvm->arch.config_lock); if (test_bit(KVM_ARCH_FLAG_VCPU_FEATURES_CONFIGURED, &kvm->arch.flags) && - !bitmap_equal(kvm->arch.vcpu_features, &features, KVM_VCPU_MAX_FEATURES)) + kvm_vcpu_init_changed(vcpu, init)) goto out_unlock; - bitmap_copy(vcpu->arch.features, &features, KVM_VCPU_MAX_FEATURES); + bitmap_copy(kvm->arch.vcpu_features, &features, KVM_VCPU_MAX_FEATURES); /* Now we know what it is, we can reset it. */ - ret = kvm_reset_vcpu(vcpu); - if (ret) { - bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES); - goto out_unlock; - } + kvm_reset_vcpu(vcpu); - bitmap_copy(kvm->arch.vcpu_features, &features, KVM_VCPU_MAX_FEATURES); set_bit(KVM_ARCH_FLAG_VCPU_FEATURES_CONFIGURED, &kvm->arch.flags); vcpu_set_flag(vcpu, VCPU_INITIALIZED); + ret = 0; out_unlock: mutex_unlock(&kvm->arch.config_lock); return ret; @@ -1281,7 +1364,8 @@ static int kvm_vcpu_set_target(struct kvm_vcpu *vcpu, if (kvm_vcpu_init_changed(vcpu, init)) return -EINVAL; - return kvm_reset_vcpu(vcpu); + kvm_reset_vcpu(vcpu); + return 0; } static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu, @@ -2351,6 +2435,18 @@ struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr) unsigned long i; mpidr &= MPIDR_HWID_BITMASK; + + if (kvm->arch.mpidr_data) { + u16 idx = kvm_mpidr_index(kvm->arch.mpidr_data, mpidr); + + vcpu = kvm_get_vcpu(kvm, + kvm->arch.mpidr_data->cmpidr_to_idx[idx]); + if (mpidr != kvm_vcpu_get_mpidr_aff(vcpu)) + vcpu = NULL; + + return vcpu; + } + kvm_for_each_vcpu(i, vcpu, kvm) { if (mpidr == kvm_vcpu_get_mpidr_aff(vcpu)) return vcpu; diff --git a/arch/arm64/kvm/emulate-nested.c b/arch/arm64/kvm/emulate-nested.c index 9ced1bf0c2b7..91cc851a6c75 100644 --- a/arch/arm64/kvm/emulate-nested.c +++ b/arch/arm64/kvm/emulate-nested.c @@ -648,15 +648,80 @@ static const struct encoding_to_trap_config encoding_to_cgt[] __initconst = { SR_TRAP(SYS_APGAKEYLO_EL1, CGT_HCR_APK), SR_TRAP(SYS_APGAKEYHI_EL1, CGT_HCR_APK), /* All _EL2 registers */ - SR_RANGE_TRAP(sys_reg(3, 4, 0, 0, 0), - sys_reg(3, 4, 3, 15, 7), CGT_HCR_NV), + SR_TRAP(SYS_BRBCR_EL2, CGT_HCR_NV), + SR_TRAP(SYS_VPIDR_EL2, CGT_HCR_NV), + SR_TRAP(SYS_VMPIDR_EL2, CGT_HCR_NV), + SR_TRAP(SYS_SCTLR_EL2, CGT_HCR_NV), + SR_TRAP(SYS_ACTLR_EL2, CGT_HCR_NV), + SR_TRAP(SYS_SCTLR2_EL2, CGT_HCR_NV), + SR_RANGE_TRAP(SYS_HCR_EL2, + SYS_HCRX_EL2, CGT_HCR_NV), + SR_TRAP(SYS_SMPRIMAP_EL2, CGT_HCR_NV), + SR_TRAP(SYS_SMCR_EL2, CGT_HCR_NV), + SR_RANGE_TRAP(SYS_TTBR0_EL2, + SYS_TCR2_EL2, CGT_HCR_NV), + SR_TRAP(SYS_VTTBR_EL2, CGT_HCR_NV), + SR_TRAP(SYS_VTCR_EL2, CGT_HCR_NV), + SR_TRAP(SYS_VNCR_EL2, CGT_HCR_NV), + SR_RANGE_TRAP(SYS_HDFGRTR_EL2, + SYS_HAFGRTR_EL2, CGT_HCR_NV), /* Skip the SP_EL1 encoding... */ SR_TRAP(SYS_SPSR_EL2, CGT_HCR_NV), SR_TRAP(SYS_ELR_EL2, CGT_HCR_NV), - SR_RANGE_TRAP(sys_reg(3, 4, 4, 1, 1), - sys_reg(3, 4, 10, 15, 7), CGT_HCR_NV), - SR_RANGE_TRAP(sys_reg(3, 4, 12, 0, 0), - sys_reg(3, 4, 14, 15, 7), CGT_HCR_NV), + /* Skip SPSR_irq, SPSR_abt, SPSR_und, SPSR_fiq */ + SR_TRAP(SYS_AFSR0_EL2, CGT_HCR_NV), + SR_TRAP(SYS_AFSR1_EL2, CGT_HCR_NV), + SR_TRAP(SYS_ESR_EL2, CGT_HCR_NV), + SR_TRAP(SYS_VSESR_EL2, CGT_HCR_NV), + SR_TRAP(SYS_TFSR_EL2, CGT_HCR_NV), + SR_TRAP(SYS_FAR_EL2, CGT_HCR_NV), + SR_TRAP(SYS_HPFAR_EL2, CGT_HCR_NV), + SR_TRAP(SYS_PMSCR_EL2, CGT_HCR_NV), + SR_TRAP(SYS_MAIR_EL2, CGT_HCR_NV), + SR_TRAP(SYS_AMAIR_EL2, CGT_HCR_NV), + SR_TRAP(SYS_MPAMHCR_EL2, CGT_HCR_NV), + SR_TRAP(SYS_MPAMVPMV_EL2, CGT_HCR_NV), + SR_TRAP(SYS_MPAM2_EL2, CGT_HCR_NV), + SR_RANGE_TRAP(SYS_MPAMVPM0_EL2, + SYS_MPAMVPM7_EL2, CGT_HCR_NV), + /* + * Note that the spec. describes a group of MEC registers + * whose access should not trap, therefore skip the following: + * MECID_A0_EL2, MECID_A1_EL2, MECID_P0_EL2, + * MECID_P1_EL2, MECIDR_EL2, VMECID_A_EL2, + * VMECID_P_EL2. + */ + SR_RANGE_TRAP(SYS_VBAR_EL2, + SYS_RMR_EL2, CGT_HCR_NV), + SR_TRAP(SYS_VDISR_EL2, CGT_HCR_NV), + /* ICH_AP0R<m>_EL2 */ + SR_RANGE_TRAP(SYS_ICH_AP0R0_EL2, + SYS_ICH_AP0R3_EL2, CGT_HCR_NV), + /* ICH_AP1R<m>_EL2 */ + SR_RANGE_TRAP(SYS_ICH_AP1R0_EL2, + SYS_ICH_AP1R3_EL2, CGT_HCR_NV), + SR_TRAP(SYS_ICC_SRE_EL2, CGT_HCR_NV), + SR_RANGE_TRAP(SYS_ICH_HCR_EL2, + SYS_ICH_EISR_EL2, CGT_HCR_NV), + SR_TRAP(SYS_ICH_ELRSR_EL2, CGT_HCR_NV), + SR_TRAP(SYS_ICH_VMCR_EL2, CGT_HCR_NV), + /* ICH_LR<m>_EL2 */ + SR_RANGE_TRAP(SYS_ICH_LR0_EL2, + SYS_ICH_LR15_EL2, CGT_HCR_NV), + SR_TRAP(SYS_CONTEXTIDR_EL2, CGT_HCR_NV), + SR_TRAP(SYS_TPIDR_EL2, CGT_HCR_NV), + SR_TRAP(SYS_SCXTNUM_EL2, CGT_HCR_NV), + /* AMEVCNTVOFF0<n>_EL2, AMEVCNTVOFF1<n>_EL2 */ + SR_RANGE_TRAP(SYS_AMEVCNTVOFF0n_EL2(0), + SYS_AMEVCNTVOFF1n_EL2(15), CGT_HCR_NV), + /* CNT*_EL2 */ + SR_TRAP(SYS_CNTVOFF_EL2, CGT_HCR_NV), + SR_TRAP(SYS_CNTPOFF_EL2, CGT_HCR_NV), + SR_TRAP(SYS_CNTHCTL_EL2, CGT_HCR_NV), + SR_RANGE_TRAP(SYS_CNTHP_TVAL_EL2, + SYS_CNTHP_CVAL_EL2, CGT_HCR_NV), + SR_RANGE_TRAP(SYS_CNTHV_TVAL_EL2, + SYS_CNTHV_CVAL_EL2, CGT_HCR_NV), /* All _EL02, _EL12 registers */ SR_RANGE_TRAP(sys_reg(3, 5, 0, 0, 0), sys_reg(3, 5, 10, 15, 7), CGT_HCR_NV), diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index 9d703441278b..8d0a5834e883 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -129,8 +129,8 @@ static void prepare_host_vtcr(void) parange = kvm_get_parange(id_aa64mmfr0_el1_sys_val); phys_shift = id_aa64mmfr0_parange_to_phys_shift(parange); - host_mmu.arch.vtcr = kvm_get_vtcr(id_aa64mmfr0_el1_sys_val, - id_aa64mmfr1_el1_sys_val, phys_shift); + host_mmu.arch.mmu.vtcr = kvm_get_vtcr(id_aa64mmfr0_el1_sys_val, + id_aa64mmfr1_el1_sys_val, phys_shift); } static bool host_stage2_force_pte_cb(u64 addr, u64 end, enum kvm_pgtable_prot prot); @@ -235,7 +235,7 @@ int kvm_guest_prepare_stage2(struct pkvm_hyp_vm *vm, void *pgd) unsigned long nr_pages; int ret; - nr_pages = kvm_pgtable_stage2_pgd_size(vm->kvm.arch.vtcr) >> PAGE_SHIFT; + nr_pages = kvm_pgtable_stage2_pgd_size(mmu->vtcr) >> PAGE_SHIFT; ret = hyp_pool_init(&vm->pool, hyp_virt_to_pfn(pgd), nr_pages, 0); if (ret) return ret; @@ -295,7 +295,7 @@ int __pkvm_prot_finalize(void) return -EPERM; params->vttbr = kvm_get_vttbr(mmu); - params->vtcr = host_mmu.arch.vtcr; + params->vtcr = mmu->vtcr; params->hcr_el2 |= HCR_VM; /* diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c index 8033ef353a5d..9d23a51d7f75 100644 --- a/arch/arm64/kvm/hyp/nvhe/pkvm.c +++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c @@ -303,7 +303,7 @@ static void init_pkvm_hyp_vm(struct kvm *host_kvm, struct pkvm_hyp_vm *hyp_vm, { hyp_vm->host_kvm = host_kvm; hyp_vm->kvm.created_vcpus = nr_vcpus; - hyp_vm->kvm.arch.vtcr = host_mmu.arch.vtcr; + hyp_vm->kvm.arch.mmu.vtcr = host_mmu.arch.mmu.vtcr; } static int init_pkvm_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu, @@ -483,7 +483,7 @@ int __pkvm_init_vm(struct kvm *host_kvm, unsigned long vm_hva, } vm_size = pkvm_get_hyp_vm_size(nr_vcpus); - pgd_size = kvm_pgtable_stage2_pgd_size(host_mmu.arch.vtcr); + pgd_size = kvm_pgtable_stage2_pgd_size(host_mmu.arch.mmu.vtcr); ret = -ENOMEM; diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index f155b8c9e98c..66b30ef88434 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -1314,7 +1314,7 @@ int kvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr, ret = stage2_update_leaf_attrs(pgt, addr, 1, set, clr, NULL, &level, KVM_PGTABLE_WALK_HANDLE_FAULT | KVM_PGTABLE_WALK_SHARED); - if (!ret) + if (!ret || ret == -EAGAIN) kvm_call_hyp(__kvm_tlb_flush_vmid_ipa_nsh, pgt->mmu, addr, level); return ret; } @@ -1511,7 +1511,7 @@ int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu, kvm_pgtable_force_pte_cb_t force_pte_cb) { size_t pgd_sz; - u64 vtcr = mmu->arch->vtcr; + u64 vtcr = mmu->vtcr; u32 ia_bits = VTCR_EL2_IPA(vtcr); u32 sl0 = FIELD_GET(VTCR_EL2_SL0_MASK, vtcr); u32 start_level = VTCR_EL2_TGRAN_SL0_BASE - sl0; diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c index 6537f58b1a8c..b0cafd7c5f8f 100644 --- a/arch/arm64/kvm/hyp/vhe/switch.c +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -93,12 +93,12 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu) NOKPROBE_SYMBOL(__deactivate_traps); /* - * Disable IRQs in {activate,deactivate}_traps_vhe_{load,put}() to + * Disable IRQs in __vcpu_{load,put}_{activate,deactivate}_traps() to * prevent a race condition between context switching of PMUSERENR_EL0 * in __{activate,deactivate}_traps_common() and IPIs that attempts to * update PMUSERENR_EL0. See also kvm_set_pmuserenr(). */ -void activate_traps_vhe_load(struct kvm_vcpu *vcpu) +static void __vcpu_load_activate_traps(struct kvm_vcpu *vcpu) { unsigned long flags; @@ -107,7 +107,7 @@ void activate_traps_vhe_load(struct kvm_vcpu *vcpu) local_irq_restore(flags); } -void deactivate_traps_vhe_put(struct kvm_vcpu *vcpu) +static void __vcpu_put_deactivate_traps(struct kvm_vcpu *vcpu) { unsigned long flags; @@ -116,6 +116,19 @@ void deactivate_traps_vhe_put(struct kvm_vcpu *vcpu) local_irq_restore(flags); } +void kvm_vcpu_load_vhe(struct kvm_vcpu *vcpu) +{ + __vcpu_load_switch_sysregs(vcpu); + __vcpu_load_activate_traps(vcpu); + __load_stage2(vcpu->arch.hw_mmu, vcpu->arch.hw_mmu->arch); +} + +void kvm_vcpu_put_vhe(struct kvm_vcpu *vcpu) +{ + __vcpu_put_deactivate_traps(vcpu); + __vcpu_put_switch_sysregs(vcpu); +} + static const exit_handler_fn hyp_exit_handlers[] = { [0 ... ESR_ELx_EC_MAX] = NULL, [ESR_ELx_EC_CP15_32] = kvm_hyp_handle_cp15_32, @@ -170,17 +183,11 @@ static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu) sysreg_save_host_state_vhe(host_ctxt); /* - * ARM erratum 1165522 requires us to configure both stage 1 and - * stage 2 translation for the guest context before we clear - * HCR_EL2.TGE. - * - * We have already configured the guest's stage 1 translation in - * kvm_vcpu_load_sysregs_vhe above. We must now call - * __load_stage2 before __activate_traps, because - * __load_stage2 configures stage 2 translation, and - * __activate_traps clear HCR_EL2.TGE (among other things). + * Note that ARM erratum 1165522 requires us to configure both stage 1 + * and stage 2 translation for the guest context before we clear + * HCR_EL2.TGE. The stage 1 and stage 2 guest context has already been + * loaded on the CPU in kvm_vcpu_load_vhe(). */ - __load_stage2(vcpu->arch.hw_mmu, vcpu->arch.hw_mmu->arch); __activate_traps(vcpu); __kvm_adjust_pc(vcpu); diff --git a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c index b35a178e7e0d..8e1e0d5033b6 100644 --- a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c +++ b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c @@ -52,7 +52,7 @@ void sysreg_restore_guest_state_vhe(struct kvm_cpu_context *ctxt) NOKPROBE_SYMBOL(sysreg_restore_guest_state_vhe); /** - * kvm_vcpu_load_sysregs_vhe - Load guest system registers to the physical CPU + * __vcpu_load_switch_sysregs - Load guest system registers to the physical CPU * * @vcpu: The VCPU pointer * @@ -62,7 +62,7 @@ NOKPROBE_SYMBOL(sysreg_restore_guest_state_vhe); * and loading system register state early avoids having to load them on * every entry to the VM. */ -void kvm_vcpu_load_sysregs_vhe(struct kvm_vcpu *vcpu) +void __vcpu_load_switch_sysregs(struct kvm_vcpu *vcpu) { struct kvm_cpu_context *guest_ctxt = &vcpu->arch.ctxt; struct kvm_cpu_context *host_ctxt; @@ -92,12 +92,10 @@ void kvm_vcpu_load_sysregs_vhe(struct kvm_vcpu *vcpu) __sysreg_restore_el1_state(guest_ctxt); vcpu_set_flag(vcpu, SYSREGS_ON_CPU); - - activate_traps_vhe_load(vcpu); } /** - * kvm_vcpu_put_sysregs_vhe - Restore host system registers to the physical CPU + * __vcpu_put_switch_syregs - Restore host system registers to the physical CPU * * @vcpu: The VCPU pointer * @@ -107,13 +105,12 @@ void kvm_vcpu_load_sysregs_vhe(struct kvm_vcpu *vcpu) * and deferring saving system register state until we're no longer running the * VCPU avoids having to save them on every exit from the VM. */ -void kvm_vcpu_put_sysregs_vhe(struct kvm_vcpu *vcpu) +void __vcpu_put_switch_sysregs(struct kvm_vcpu *vcpu) { struct kvm_cpu_context *guest_ctxt = &vcpu->arch.ctxt; struct kvm_cpu_context *host_ctxt; host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; - deactivate_traps_vhe_put(vcpu); __sysreg_save_el1_state(guest_ctxt); __sysreg_save_user_state(guest_ctxt); diff --git a/arch/arm64/kvm/hyp/vhe/tlb.c b/arch/arm64/kvm/hyp/vhe/tlb.c index 46bd43f61d76..b636b4111dbf 100644 --- a/arch/arm64/kvm/hyp/vhe/tlb.c +++ b/arch/arm64/kvm/hyp/vhe/tlb.c @@ -11,18 +11,25 @@ #include <asm/tlbflush.h> struct tlb_inv_context { - unsigned long flags; - u64 tcr; - u64 sctlr; + struct kvm_s2_mmu *mmu; + unsigned long flags; + u64 tcr; + u64 sctlr; }; static void __tlb_switch_to_guest(struct kvm_s2_mmu *mmu, struct tlb_inv_context *cxt) { + struct kvm_vcpu *vcpu = kvm_get_running_vcpu(); u64 val; local_irq_save(cxt->flags); + if (vcpu && mmu != vcpu->arch.hw_mmu) + cxt->mmu = vcpu->arch.hw_mmu; + else + cxt->mmu = NULL; + if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { /* * For CPUs that are affected by ARM errata 1165522 or 1530923, @@ -66,10 +73,13 @@ static void __tlb_switch_to_host(struct tlb_inv_context *cxt) * We're done with the TLB operation, let's restore the host's * view of HCR_EL2. */ - write_sysreg(0, vttbr_el2); write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2); isb(); + /* ... and the stage-2 MMU context that we switched away from */ + if (cxt->mmu) + __load_stage2(cxt->mmu, cxt->mmu->arch); + if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { /* Restore the registers to what they were */ write_sysreg_el1(cxt->tcr, SYS_TCR); diff --git a/arch/arm64/kvm/hypercalls.c b/arch/arm64/kvm/hypercalls.c index 7fb4df0456de..5763d979d8ca 100644 --- a/arch/arm64/kvm/hypercalls.c +++ b/arch/arm64/kvm/hypercalls.c @@ -133,12 +133,10 @@ static bool kvm_smccc_test_fw_bmap(struct kvm_vcpu *vcpu, u32 func_id) ARM_SMCCC_SMC_64, \ 0, ARM_SMCCC_FUNC_MASK) -static void init_smccc_filter(struct kvm *kvm) +static int kvm_smccc_filter_insert_reserved(struct kvm *kvm) { int r; - mt_init(&kvm->arch.smccc_filter); - /* * Prevent userspace from handling any SMCCC calls in the architecture * range, avoiding the risk of misrepresenting Spectre mitigation status @@ -148,14 +146,25 @@ static void init_smccc_filter(struct kvm *kvm) SMC32_ARCH_RANGE_BEGIN, SMC32_ARCH_RANGE_END, xa_mk_value(KVM_SMCCC_FILTER_HANDLE), GFP_KERNEL_ACCOUNT); - WARN_ON_ONCE(r); + if (r) + goto out_destroy; r = mtree_insert_range(&kvm->arch.smccc_filter, SMC64_ARCH_RANGE_BEGIN, SMC64_ARCH_RANGE_END, xa_mk_value(KVM_SMCCC_FILTER_HANDLE), GFP_KERNEL_ACCOUNT); - WARN_ON_ONCE(r); + if (r) + goto out_destroy; + return 0; +out_destroy: + mtree_destroy(&kvm->arch.smccc_filter); + return r; +} + +static bool kvm_smccc_filter_configured(struct kvm *kvm) +{ + return !mtree_empty(&kvm->arch.smccc_filter); } static int kvm_smccc_set_filter(struct kvm *kvm, struct kvm_smccc_filter __user *uaddr) @@ -184,13 +193,14 @@ static int kvm_smccc_set_filter(struct kvm *kvm, struct kvm_smccc_filter __user goto out_unlock; } + if (!kvm_smccc_filter_configured(kvm)) { + r = kvm_smccc_filter_insert_reserved(kvm); + if (WARN_ON_ONCE(r)) + goto out_unlock; + } + r = mtree_insert_range(&kvm->arch.smccc_filter, start, end, xa_mk_value(filter.action), GFP_KERNEL_ACCOUNT); - if (r) - goto out_unlock; - - set_bit(KVM_ARCH_FLAG_SMCCC_FILTER_CONFIGURED, &kvm->arch.flags); - out_unlock: mutex_unlock(&kvm->arch.config_lock); return r; @@ -201,7 +211,7 @@ static u8 kvm_smccc_filter_get_action(struct kvm *kvm, u32 func_id) unsigned long idx = func_id; void *val; - if (!test_bit(KVM_ARCH_FLAG_SMCCC_FILTER_CONFIGURED, &kvm->arch.flags)) + if (!kvm_smccc_filter_configured(kvm)) return KVM_SMCCC_FILTER_HANDLE; /* @@ -387,7 +397,7 @@ void kvm_arm_init_hypercalls(struct kvm *kvm) smccc_feat->std_hyp_bmap = KVM_ARM_SMCCC_STD_HYP_FEATURES; smccc_feat->vendor_hyp_bmap = KVM_ARM_SMCCC_VENDOR_HYP_FEATURES; - init_smccc_filter(kvm); + mt_init(&kvm->arch.smccc_filter); } void kvm_arm_teardown_hypercalls(struct kvm *kvm) @@ -554,7 +564,7 @@ int kvm_arm_set_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) { bool wants_02; - wants_02 = test_bit(KVM_ARM_VCPU_PSCI_0_2, vcpu->arch.features); + wants_02 = vcpu_has_feature(vcpu, KVM_ARM_VCPU_PSCI_0_2); switch (val) { case KVM_ARM_PSCI_0_1: diff --git a/arch/arm64/kvm/mmio.c b/arch/arm64/kvm/mmio.c index 3dd38a151d2a..200c8019a82a 100644 --- a/arch/arm64/kvm/mmio.c +++ b/arch/arm64/kvm/mmio.c @@ -135,6 +135,9 @@ int io_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa) * volunteered to do so, and bail out otherwise. */ if (!kvm_vcpu_dabt_isvalid(vcpu)) { + trace_kvm_mmio_nisv(*vcpu_pc(vcpu), kvm_vcpu_get_esr(vcpu), + kvm_vcpu_get_hfar(vcpu), fault_ipa); + if (test_bit(KVM_ARCH_FLAG_RETURN_NISV_IO_ABORT_TO_USER, &vcpu->kvm->arch.flags)) { run->exit_reason = KVM_EXIT_ARM_NISV; @@ -143,7 +146,6 @@ int io_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa) return 0; } - kvm_pr_unimpl("Data abort outside memslots with no valid syndrome info\n"); return -ENOSYS; } diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 482280fe22d7..4e41ceed5468 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -892,7 +892,7 @@ int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu, unsigned long t mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1); mmfr1 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1); - kvm->arch.vtcr = kvm_get_vtcr(mmfr0, mmfr1, phys_shift); + mmu->vtcr = kvm_get_vtcr(mmfr0, mmfr1, phys_shift); if (mmu->pgt != NULL) { kvm_err("kvm_arch already initialized?\n"); @@ -1067,7 +1067,8 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, phys_addr_t addr; int ret = 0; struct kvm_mmu_memory_cache cache = { .gfp_zero = __GFP_ZERO }; - struct kvm_pgtable *pgt = kvm->arch.mmu.pgt; + struct kvm_s2_mmu *mmu = &kvm->arch.mmu; + struct kvm_pgtable *pgt = mmu->pgt; enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_DEVICE | KVM_PGTABLE_PROT_R | (writable ? KVM_PGTABLE_PROT_W : 0); @@ -1080,7 +1081,7 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, for (addr = guest_ipa; addr < guest_ipa + size; addr += PAGE_SIZE) { ret = kvm_mmu_topup_memory_cache(&cache, - kvm_mmu_cache_min_pages(kvm)); + kvm_mmu_cache_min_pages(mmu)); if (ret) break; @@ -1298,28 +1299,8 @@ transparent_hugepage_adjust(struct kvm *kvm, struct kvm_memory_slot *memslot, if (sz < PMD_SIZE) return PAGE_SIZE; - /* - * The address we faulted on is backed by a transparent huge - * page. However, because we map the compound huge page and - * not the individual tail page, we need to transfer the - * refcount to the head page. We have to be careful that the - * THP doesn't start to split while we are adjusting the - * refcounts. - * - * We are sure this doesn't happen, because mmu_invalidate_retry - * was successful and we are holding the mmu_lock, so if this - * THP is trying to split, it will be blocked in the mmu - * notifier before touching any of the pages, specifically - * before being able to call __split_huge_page_refcount(). - * - * We can therefore safely transfer the refcount from PG_tail - * to PG_head and switch the pfn from a tail page to the head - * page accordingly. - */ *ipap &= PMD_MASK; - kvm_release_pfn_clean(pfn); pfn &= ~(PTRS_PER_PMD - 1); - get_page(pfn_to_page(pfn)); *pfnp = pfn; return PMD_SIZE; @@ -1431,7 +1412,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, if (fault_status != ESR_ELx_FSC_PERM || (logging_active && write_fault)) { ret = kvm_mmu_topup_memory_cache(memcache, - kvm_mmu_cache_min_pages(kvm)); + kvm_mmu_cache_min_pages(vcpu->arch.hw_mmu)); if (ret) return ret; } @@ -1747,7 +1728,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu) } /* Userspace should not be able to register out-of-bounds IPAs */ - VM_BUG_ON(fault_ipa >= kvm_phys_size(vcpu->kvm)); + VM_BUG_ON(fault_ipa >= kvm_phys_size(vcpu->arch.hw_mmu)); if (fault_status == ESR_ELx_FSC_ACCESS) { handle_access_fault(vcpu, fault_ipa); @@ -2021,7 +2002,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, * Prevent userspace from creating a memory region outside of the IPA * space addressable by the KVM guest IPA space. */ - if ((new->base_gfn + new->npages) > (kvm_phys_size(kvm) >> PAGE_SHIFT)) + if ((new->base_gfn + new->npages) > (kvm_phys_size(&kvm->arch.mmu) >> PAGE_SHIFT)) return -EFAULT; hva = new->userspace_addr; diff --git a/arch/arm64/kvm/pkvm.c b/arch/arm64/kvm/pkvm.c index 6ff3ec18c925..8350fb8fee0b 100644 --- a/arch/arm64/kvm/pkvm.c +++ b/arch/arm64/kvm/pkvm.c @@ -123,7 +123,7 @@ static int __pkvm_create_hyp_vm(struct kvm *host_kvm) if (host_kvm->created_vcpus < 1) return -EINVAL; - pgd_sz = kvm_pgtable_stage2_pgd_size(host_kvm->arch.vtcr); + pgd_sz = kvm_pgtable_stage2_pgd_size(host_kvm->arch.mmu.vtcr); /* * The PGD pages will be reclaimed using a hyp_memcache which implies diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index 6b066e04dc5d..845993bab9d2 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -60,6 +60,23 @@ static u32 kvm_pmu_event_mask(struct kvm *kvm) return __kvm_pmu_event_mask(pmuver); } +u64 kvm_pmu_evtyper_mask(struct kvm *kvm) +{ + u64 mask = ARMV8_PMU_EXCLUDE_EL1 | ARMV8_PMU_EXCLUDE_EL0 | + kvm_pmu_event_mask(kvm); + u64 pfr0 = IDREG(kvm, SYS_ID_AA64PFR0_EL1); + + if (SYS_FIELD_GET(ID_AA64PFR0_EL1, EL2, pfr0)) + mask |= ARMV8_PMU_INCLUDE_EL2; + + if (SYS_FIELD_GET(ID_AA64PFR0_EL1, EL3, pfr0)) + mask |= ARMV8_PMU_EXCLUDE_NS_EL0 | + ARMV8_PMU_EXCLUDE_NS_EL1 | + ARMV8_PMU_EXCLUDE_EL3; + + return mask; +} + /** * kvm_pmc_is_64bit - determine if counter is 64bit * @pmc: counter context @@ -348,7 +365,7 @@ static void kvm_pmu_update_state(struct kvm_vcpu *vcpu) pmu->irq_level = overflow; if (likely(irqchip_in_kernel(vcpu->kvm))) { - int ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id, + int ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu, pmu->irq_num, overflow, pmu); WARN_ON(ret); } @@ -584,6 +601,7 @@ static void kvm_pmu_create_perf_event(struct kvm_pmc *pmc) struct perf_event *event; struct perf_event_attr attr; u64 eventsel, reg, data; + bool p, u, nsk, nsu; reg = counter_index_to_evtreg(pmc->idx); data = __vcpu_sys_reg(vcpu, reg); @@ -610,13 +628,18 @@ static void kvm_pmu_create_perf_event(struct kvm_pmc *pmc) !test_bit(eventsel, vcpu->kvm->arch.pmu_filter)) return; + p = data & ARMV8_PMU_EXCLUDE_EL1; + u = data & ARMV8_PMU_EXCLUDE_EL0; + nsk = data & ARMV8_PMU_EXCLUDE_NS_EL1; + nsu = data & ARMV8_PMU_EXCLUDE_NS_EL0; + memset(&attr, 0, sizeof(struct perf_event_attr)); attr.type = arm_pmu->pmu.type; attr.size = sizeof(attr); attr.pinned = 1; attr.disabled = !kvm_pmu_counter_is_enabled(pmc); - attr.exclude_user = data & ARMV8_PMU_EXCLUDE_EL0 ? 1 : 0; - attr.exclude_kernel = data & ARMV8_PMU_EXCLUDE_EL1 ? 1 : 0; + attr.exclude_user = (u != nsu); + attr.exclude_kernel = (p != nsk); attr.exclude_hv = 1; /* Don't count EL2 events */ attr.exclude_host = 1; /* Don't count host events */ attr.config = eventsel; @@ -657,18 +680,13 @@ void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data, u64 select_idx) { struct kvm_pmc *pmc = kvm_vcpu_idx_to_pmc(vcpu, select_idx); - u64 reg, mask; + u64 reg; if (!kvm_vcpu_has_pmu(vcpu)) return; - mask = ARMV8_PMU_EVTYPE_MASK; - mask &= ~ARMV8_PMU_EVTYPE_EVENT; - mask |= kvm_pmu_event_mask(vcpu->kvm); - reg = counter_index_to_evtreg(pmc->idx); - - __vcpu_sys_reg(vcpu, reg) = data & mask; + __vcpu_sys_reg(vcpu, reg) = data & kvm_pmu_evtyper_mask(vcpu->kvm); kvm_pmu_create_perf_event(pmc); } diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index 7a65a35ee4ac..5bb4de162cab 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -73,11 +73,8 @@ int __init kvm_arm_init_sve(void) return 0; } -static int kvm_vcpu_enable_sve(struct kvm_vcpu *vcpu) +static void kvm_vcpu_enable_sve(struct kvm_vcpu *vcpu) { - if (!system_supports_sve()) - return -EINVAL; - vcpu->arch.sve_max_vl = kvm_sve_max_vl; /* @@ -86,8 +83,6 @@ static int kvm_vcpu_enable_sve(struct kvm_vcpu *vcpu) * kvm_arm_vcpu_finalize(), which freezes the configuration. */ vcpu_set_flag(vcpu, GUEST_HAS_SVE); - - return 0; } /* @@ -170,20 +165,9 @@ static void kvm_vcpu_reset_sve(struct kvm_vcpu *vcpu) memset(vcpu->arch.sve_state, 0, vcpu_sve_state_size(vcpu)); } -static int kvm_vcpu_enable_ptrauth(struct kvm_vcpu *vcpu) +static void kvm_vcpu_enable_ptrauth(struct kvm_vcpu *vcpu) { - /* - * For now make sure that both address/generic pointer authentication - * features are requested by the userspace together and the system - * supports these capabilities. - */ - if (!test_bit(KVM_ARM_VCPU_PTRAUTH_ADDRESS, vcpu->arch.features) || - !test_bit(KVM_ARM_VCPU_PTRAUTH_GENERIC, vcpu->arch.features) || - !system_has_full_ptr_auth()) - return -EINVAL; - vcpu_set_flag(vcpu, GUEST_HAS_PTRAUTH); - return 0; } /** @@ -204,10 +188,9 @@ static int kvm_vcpu_enable_ptrauth(struct kvm_vcpu *vcpu) * disable preemption around the vcpu reset as we would otherwise race with * preempt notifiers which also call put/load. */ -int kvm_reset_vcpu(struct kvm_vcpu *vcpu) +void kvm_reset_vcpu(struct kvm_vcpu *vcpu) { struct vcpu_reset_state reset_state; - int ret; bool loaded; u32 pstate; @@ -224,29 +207,16 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) if (loaded) kvm_arch_vcpu_put(vcpu); - /* Disallow NV+SVE for the time being */ - if (vcpu_has_nv(vcpu) && vcpu_has_feature(vcpu, KVM_ARM_VCPU_SVE)) { - ret = -EINVAL; - goto out; - } - if (!kvm_arm_vcpu_sve_finalized(vcpu)) { - if (test_bit(KVM_ARM_VCPU_SVE, vcpu->arch.features)) { - ret = kvm_vcpu_enable_sve(vcpu); - if (ret) - goto out; - } + if (vcpu_has_feature(vcpu, KVM_ARM_VCPU_SVE)) + kvm_vcpu_enable_sve(vcpu); } else { kvm_vcpu_reset_sve(vcpu); } - if (test_bit(KVM_ARM_VCPU_PTRAUTH_ADDRESS, vcpu->arch.features) || - test_bit(KVM_ARM_VCPU_PTRAUTH_GENERIC, vcpu->arch.features)) { - if (kvm_vcpu_enable_ptrauth(vcpu)) { - ret = -EINVAL; - goto out; - } - } + if (vcpu_has_feature(vcpu, KVM_ARM_VCPU_PTRAUTH_ADDRESS) || + vcpu_has_feature(vcpu, KVM_ARM_VCPU_PTRAUTH_GENERIC)) + kvm_vcpu_enable_ptrauth(vcpu); if (vcpu_el1_is_32bit(vcpu)) pstate = VCPU_RESET_PSTATE_SVC; @@ -255,11 +225,6 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) else pstate = VCPU_RESET_PSTATE_EL1; - if (kvm_vcpu_has_pmu(vcpu) && !kvm_arm_support_pmu_v3()) { - ret = -EINVAL; - goto out; - } - /* Reset core registers */ memset(vcpu_gp_regs(vcpu), 0, sizeof(*vcpu_gp_regs(vcpu))); memset(&vcpu->arch.ctxt.fp_regs, 0, sizeof(vcpu->arch.ctxt.fp_regs)); @@ -294,12 +259,11 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) } /* Reset timer */ - ret = kvm_timer_vcpu_reset(vcpu); -out: + kvm_timer_vcpu_reset(vcpu); + if (loaded) kvm_arch_vcpu_load(vcpu, smp_processor_id()); preempt_enable(); - return ret; } u32 get_kvm_ipa_limit(void) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 57c8190d5438..2c4923fce573 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -746,8 +746,12 @@ static u64 reset_pmevcntr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) static u64 reset_pmevtyper(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) { + /* This thing will UNDEF, who cares about the reset value? */ + if (!kvm_vcpu_has_pmu(vcpu)) + return 0; + reset_unknown(vcpu, r); - __vcpu_sys_reg(vcpu, r->reg) &= ARMV8_PMU_EVTYPE_MASK; + __vcpu_sys_reg(vcpu, r->reg) &= kvm_pmu_evtyper_mask(vcpu->kvm); return __vcpu_sys_reg(vcpu, r->reg); } @@ -988,7 +992,7 @@ static bool access_pmu_evtyper(struct kvm_vcpu *vcpu, struct sys_reg_params *p, kvm_pmu_set_counter_event_type(vcpu, p->regval, idx); kvm_vcpu_pmu_restore_guest(vcpu); } else { - p->regval = __vcpu_sys_reg(vcpu, reg) & ARMV8_PMU_EVTYPE_MASK; + p->regval = __vcpu_sys_reg(vcpu, reg); } return true; @@ -1234,7 +1238,7 @@ static s64 kvm_arm64_ftr_safe_value(u32 id, const struct arm64_ftr_bits *ftrp, return arm64_ftr_safe_value(&kvm_ftr, new, cur); } -/** +/* * arm64_check_features() - Check if a feature register value constitutes * a subset of features indicated by the idreg's KVM sanitised limit. * @@ -1825,8 +1829,8 @@ static unsigned int el2_visibility(const struct kvm_vcpu *vcpu, * HCR_EL2.E2H==1, and only in the sysreg table for convenience of * handling traps. Given that, they are always hidden from userspace. */ -static unsigned int elx2_visibility(const struct kvm_vcpu *vcpu, - const struct sys_reg_desc *rd) +static unsigned int hidden_user_visibility(const struct kvm_vcpu *vcpu, + const struct sys_reg_desc *rd) { return REG_HIDDEN_USER; } @@ -1837,7 +1841,7 @@ static unsigned int elx2_visibility(const struct kvm_vcpu *vcpu, .reset = rst, \ .reg = name##_EL1, \ .val = v, \ - .visibility = elx2_visibility, \ + .visibility = hidden_user_visibility, \ } /* @@ -2003,7 +2007,7 @@ static const struct sys_reg_desc sys_reg_descs[] = { // DBGDTR[TR]X_EL0 share the same encoding { SYS_DESC(SYS_DBGDTRTX_EL0), trap_raz_wi }, - { SYS_DESC(SYS_DBGVCR32_EL2), NULL, reset_val, DBGVCR32_EL2, 0 }, + { SYS_DESC(SYS_DBGVCR32_EL2), trap_undef, reset_val, DBGVCR32_EL2, 0 }, { SYS_DESC(SYS_MPIDR_EL1), NULL, reset_mpidr, MPIDR_EL1 }, @@ -2450,18 +2454,28 @@ static const struct sys_reg_desc sys_reg_descs[] = { EL2_REG(VTTBR_EL2, access_rw, reset_val, 0), EL2_REG(VTCR_EL2, access_rw, reset_val, 0), - { SYS_DESC(SYS_DACR32_EL2), NULL, reset_unknown, DACR32_EL2 }, + { SYS_DESC(SYS_DACR32_EL2), trap_undef, reset_unknown, DACR32_EL2 }, EL2_REG(HDFGRTR_EL2, access_rw, reset_val, 0), EL2_REG(HDFGWTR_EL2, access_rw, reset_val, 0), EL2_REG(SPSR_EL2, access_rw, reset_val, 0), EL2_REG(ELR_EL2, access_rw, reset_val, 0), { SYS_DESC(SYS_SP_EL1), access_sp_el1}, - { SYS_DESC(SYS_IFSR32_EL2), NULL, reset_unknown, IFSR32_EL2 }, + /* AArch32 SPSR_* are RES0 if trapped from a NV guest */ + { SYS_DESC(SYS_SPSR_irq), .access = trap_raz_wi, + .visibility = hidden_user_visibility }, + { SYS_DESC(SYS_SPSR_abt), .access = trap_raz_wi, + .visibility = hidden_user_visibility }, + { SYS_DESC(SYS_SPSR_und), .access = trap_raz_wi, + .visibility = hidden_user_visibility }, + { SYS_DESC(SYS_SPSR_fiq), .access = trap_raz_wi, + .visibility = hidden_user_visibility }, + + { SYS_DESC(SYS_IFSR32_EL2), trap_undef, reset_unknown, IFSR32_EL2 }, EL2_REG(AFSR0_EL2, access_rw, reset_val, 0), EL2_REG(AFSR1_EL2, access_rw, reset_val, 0), EL2_REG(ESR_EL2, access_rw, reset_val, 0), - { SYS_DESC(SYS_FPEXC32_EL2), NULL, reset_val, FPEXC32_EL2, 0x700 }, + { SYS_DESC(SYS_FPEXC32_EL2), trap_undef, reset_val, FPEXC32_EL2, 0x700 }, EL2_REG(FAR_EL2, access_rw, reset_val, 0), EL2_REG(HPFAR_EL2, access_rw, reset_val, 0), diff --git a/arch/arm64/kvm/trace_arm.h b/arch/arm64/kvm/trace_arm.h index 8ad53104934d..c18c1a95831e 100644 --- a/arch/arm64/kvm/trace_arm.h +++ b/arch/arm64/kvm/trace_arm.h @@ -136,6 +136,31 @@ TRACE_EVENT(kvm_mmio_emulate, __entry->vcpu_pc, __entry->instr, __entry->cpsr) ); +TRACE_EVENT(kvm_mmio_nisv, + TP_PROTO(unsigned long vcpu_pc, unsigned long esr, + unsigned long far, unsigned long ipa), + TP_ARGS(vcpu_pc, esr, far, ipa), + + TP_STRUCT__entry( + __field( unsigned long, vcpu_pc ) + __field( unsigned long, esr ) + __field( unsigned long, far ) + __field( unsigned long, ipa ) + ), + + TP_fast_assign( + __entry->vcpu_pc = vcpu_pc; + __entry->esr = esr; + __entry->far = far; + __entry->ipa = ipa; + ), + + TP_printk("ipa %#016lx, esr %#016lx, far %#016lx, pc %#016lx", + __entry->ipa, __entry->esr, + __entry->far, __entry->vcpu_pc) +); + + TRACE_EVENT(kvm_set_way_flush, TP_PROTO(unsigned long vcpu_pc, bool cache), TP_ARGS(vcpu_pc, cache), diff --git a/arch/arm64/kvm/vgic/vgic-debug.c b/arch/arm64/kvm/vgic/vgic-debug.c index 07aa0437125a..85606a531dc3 100644 --- a/arch/arm64/kvm/vgic/vgic-debug.c +++ b/arch/arm64/kvm/vgic/vgic-debug.c @@ -166,7 +166,7 @@ static void print_header(struct seq_file *s, struct vgic_irq *irq, if (vcpu) { hdr = "VCPU"; - id = vcpu->vcpu_id; + id = vcpu->vcpu_idx; } seq_printf(s, "\n"); @@ -212,7 +212,7 @@ static void print_irq_state(struct seq_file *s, struct vgic_irq *irq, " %2d " "\n", type, irq->intid, - (irq->target_vcpu) ? irq->target_vcpu->vcpu_id : -1, + (irq->target_vcpu) ? irq->target_vcpu->vcpu_idx : -1, pending, irq->line_level, irq->active, @@ -224,7 +224,7 @@ static void print_irq_state(struct seq_file *s, struct vgic_irq *irq, irq->mpidr, irq->source, irq->priority, - (irq->vcpu) ? irq->vcpu->vcpu_id : -1); + (irq->vcpu) ? irq->vcpu->vcpu_idx : -1); } static int vgic_debug_show(struct seq_file *s, void *v) diff --git a/arch/arm64/kvm/vgic/vgic-irqfd.c b/arch/arm64/kvm/vgic/vgic-irqfd.c index 475059bacedf..8c711deb25aa 100644 --- a/arch/arm64/kvm/vgic/vgic-irqfd.c +++ b/arch/arm64/kvm/vgic/vgic-irqfd.c @@ -23,7 +23,7 @@ static int vgic_irqfd_set_irq(struct kvm_kernel_irq_routing_entry *e, if (!vgic_valid_spi(kvm, spi_id)) return -EINVAL; - return kvm_vgic_inject_irq(kvm, 0, spi_id, level, NULL); + return kvm_vgic_inject_irq(kvm, NULL, spi_id, level, NULL); } /** diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c index 5fe2365a629f..2dad2d095160 100644 --- a/arch/arm64/kvm/vgic/vgic-its.c +++ b/arch/arm64/kvm/vgic/vgic-its.c @@ -378,6 +378,12 @@ static int update_affinity(struct vgic_irq *irq, struct kvm_vcpu *vcpu) return ret; } +static struct kvm_vcpu *collection_to_vcpu(struct kvm *kvm, + struct its_collection *col) +{ + return kvm_get_vcpu_by_id(kvm, col->target_addr); +} + /* * Promotes the ITS view of affinity of an ITTE (which redistributor this LPI * is targeting) to the VGIC's view, which deals with target VCPUs. @@ -391,7 +397,7 @@ static void update_affinity_ite(struct kvm *kvm, struct its_ite *ite) if (!its_is_collection_mapped(ite->collection)) return; - vcpu = kvm_get_vcpu(kvm, ite->collection->target_addr); + vcpu = collection_to_vcpu(kvm, ite->collection); update_affinity(ite->irq, vcpu); } @@ -679,7 +685,7 @@ int vgic_its_resolve_lpi(struct kvm *kvm, struct vgic_its *its, if (!ite || !its_is_collection_mapped(ite->collection)) return E_ITS_INT_UNMAPPED_INTERRUPT; - vcpu = kvm_get_vcpu(kvm, ite->collection->target_addr); + vcpu = collection_to_vcpu(kvm, ite->collection); if (!vcpu) return E_ITS_INT_UNMAPPED_INTERRUPT; @@ -887,7 +893,7 @@ static int vgic_its_cmd_handle_movi(struct kvm *kvm, struct vgic_its *its, return E_ITS_MOVI_UNMAPPED_COLLECTION; ite->collection = collection; - vcpu = kvm_get_vcpu(kvm, collection->target_addr); + vcpu = collection_to_vcpu(kvm, collection); vgic_its_invalidate_cache(kvm); @@ -1121,7 +1127,7 @@ static int vgic_its_cmd_handle_mapi(struct kvm *kvm, struct vgic_its *its, } if (its_is_collection_mapped(collection)) - vcpu = kvm_get_vcpu(kvm, collection->target_addr); + vcpu = collection_to_vcpu(kvm, collection); irq = vgic_add_lpi(kvm, lpi_nr, vcpu); if (IS_ERR(irq)) { @@ -1242,21 +1248,22 @@ static int vgic_its_cmd_handle_mapc(struct kvm *kvm, struct vgic_its *its, u64 *its_cmd) { u16 coll_id; - u32 target_addr; struct its_collection *collection; bool valid; valid = its_cmd_get_validbit(its_cmd); coll_id = its_cmd_get_collection(its_cmd); - target_addr = its_cmd_get_target_addr(its_cmd); - - if (target_addr >= atomic_read(&kvm->online_vcpus)) - return E_ITS_MAPC_PROCNUM_OOR; if (!valid) { vgic_its_free_collection(its, coll_id); vgic_its_invalidate_cache(kvm); } else { + struct kvm_vcpu *vcpu; + + vcpu = kvm_get_vcpu_by_id(kvm, its_cmd_get_target_addr(its_cmd)); + if (!vcpu) + return E_ITS_MAPC_PROCNUM_OOR; + collection = find_collection(its, coll_id); if (!collection) { @@ -1270,9 +1277,9 @@ static int vgic_its_cmd_handle_mapc(struct kvm *kvm, struct vgic_its *its, coll_id); if (ret) return ret; - collection->target_addr = target_addr; + collection->target_addr = vcpu->vcpu_id; } else { - collection->target_addr = target_addr; + collection->target_addr = vcpu->vcpu_id; update_affinity_collection(kvm, its, collection); } } @@ -1382,7 +1389,7 @@ static int vgic_its_cmd_handle_invall(struct kvm *kvm, struct vgic_its *its, if (!its_is_collection_mapped(collection)) return E_ITS_INVALL_UNMAPPED_COLLECTION; - vcpu = kvm_get_vcpu(kvm, collection->target_addr); + vcpu = collection_to_vcpu(kvm, collection); vgic_its_invall(vcpu); return 0; @@ -1399,23 +1406,21 @@ static int vgic_its_cmd_handle_invall(struct kvm *kvm, struct vgic_its *its, static int vgic_its_cmd_handle_movall(struct kvm *kvm, struct vgic_its *its, u64 *its_cmd) { - u32 target1_addr = its_cmd_get_target_addr(its_cmd); - u32 target2_addr = its_cmd_mask_field(its_cmd, 3, 16, 32); struct kvm_vcpu *vcpu1, *vcpu2; struct vgic_irq *irq; u32 *intids; int irq_count, i; - if (target1_addr >= atomic_read(&kvm->online_vcpus) || - target2_addr >= atomic_read(&kvm->online_vcpus)) + /* We advertise GITS_TYPER.PTA==0, making the address the vcpu ID */ + vcpu1 = kvm_get_vcpu_by_id(kvm, its_cmd_get_target_addr(its_cmd)); + vcpu2 = kvm_get_vcpu_by_id(kvm, its_cmd_mask_field(its_cmd, 3, 16, 32)); + + if (!vcpu1 || !vcpu2) return E_ITS_MOVALL_PROCNUM_OOR; - if (target1_addr == target2_addr) + if (vcpu1 == vcpu2) return 0; - vcpu1 = kvm_get_vcpu(kvm, target1_addr); - vcpu2 = kvm_get_vcpu(kvm, target2_addr); - irq_count = vgic_copy_lpi_list(kvm, vcpu1, &intids); if (irq_count < 0) return irq_count; @@ -2258,7 +2263,7 @@ static int vgic_its_restore_ite(struct vgic_its *its, u32 event_id, return PTR_ERR(ite); if (its_is_collection_mapped(collection)) - vcpu = kvm_get_vcpu(kvm, collection->target_addr); + vcpu = kvm_get_vcpu_by_id(kvm, collection->target_addr); irq = vgic_add_lpi(kvm, lpi_id, vcpu); if (IS_ERR(irq)) { @@ -2573,7 +2578,7 @@ static int vgic_its_restore_cte(struct vgic_its *its, gpa_t gpa, int esz) coll_id = val & KVM_ITS_CTE_ICID_MASK; if (target_addr != COLLECTION_NOT_MAPPED && - target_addr >= atomic_read(&kvm->online_vcpus)) + !kvm_get_vcpu_by_id(kvm, target_addr)) return -EINVAL; collection = find_collection(its, coll_id); diff --git a/arch/arm64/kvm/vgic/vgic-kvm-device.c b/arch/arm64/kvm/vgic/vgic-kvm-device.c index 212b73a715c1..f48b8dab8b3d 100644 --- a/arch/arm64/kvm/vgic/vgic-kvm-device.c +++ b/arch/arm64/kvm/vgic/vgic-kvm-device.c @@ -27,7 +27,8 @@ int vgic_check_iorange(struct kvm *kvm, phys_addr_t ioaddr, if (addr + size < addr) return -EINVAL; - if (addr & ~kvm_phys_mask(kvm) || addr + size > kvm_phys_size(kvm)) + if (addr & ~kvm_phys_mask(&kvm->arch.mmu) || + (addr + size) > kvm_phys_size(&kvm->arch.mmu)) return -E2BIG; return 0; @@ -339,13 +340,9 @@ int vgic_v2_parse_attr(struct kvm_device *dev, struct kvm_device_attr *attr, { int cpuid; - cpuid = (attr->attr & KVM_DEV_ARM_VGIC_CPUID_MASK) >> - KVM_DEV_ARM_VGIC_CPUID_SHIFT; + cpuid = FIELD_GET(KVM_DEV_ARM_VGIC_CPUID_MASK, attr->attr); - if (cpuid >= atomic_read(&dev->kvm->online_vcpus)) - return -EINVAL; - - reg_attr->vcpu = kvm_get_vcpu(dev->kvm, cpuid); + reg_attr->vcpu = kvm_get_vcpu_by_id(dev->kvm, cpuid); reg_attr->addr = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK; return 0; diff --git a/arch/arm64/kvm/vgic/vgic-mmio-v3.c b/arch/arm64/kvm/vgic/vgic-mmio-v3.c index 188d2187eede..89117ba2528a 100644 --- a/arch/arm64/kvm/vgic/vgic-mmio-v3.c +++ b/arch/arm64/kvm/vgic/vgic-mmio-v3.c @@ -1013,35 +1013,6 @@ int vgic_v3_has_attr_regs(struct kvm_device *dev, struct kvm_device_attr *attr) return 0; } -/* - * Compare a given affinity (level 1-3 and a level 0 mask, from the SGI - * generation register ICC_SGI1R_EL1) with a given VCPU. - * If the VCPU's MPIDR matches, return the level0 affinity, otherwise - * return -1. - */ -static int match_mpidr(u64 sgi_aff, u16 sgi_cpu_mask, struct kvm_vcpu *vcpu) -{ - unsigned long affinity; - int level0; - - /* - * Split the current VCPU's MPIDR into affinity level 0 and the - * rest as this is what we have to compare against. - */ - affinity = kvm_vcpu_get_mpidr_aff(vcpu); - level0 = MPIDR_AFFINITY_LEVEL(affinity, 0); - affinity &= ~MPIDR_LEVEL_MASK; - - /* bail out if the upper three levels don't match */ - if (sgi_aff != affinity) - return -1; - - /* Is this VCPU's bit set in the mask ? */ - if (!(sgi_cpu_mask & BIT(level0))) - return -1; - - return level0; -} /* * The ICC_SGI* registers encode the affinity differently from the MPIDR, @@ -1052,6 +1023,38 @@ static int match_mpidr(u64 sgi_aff, u16 sgi_cpu_mask, struct kvm_vcpu *vcpu) ((((reg) & ICC_SGI1R_AFFINITY_## level ##_MASK) \ >> ICC_SGI1R_AFFINITY_## level ##_SHIFT) << MPIDR_LEVEL_SHIFT(level)) +static void vgic_v3_queue_sgi(struct kvm_vcpu *vcpu, u32 sgi, bool allow_group1) +{ + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, sgi); + unsigned long flags; + + raw_spin_lock_irqsave(&irq->irq_lock, flags); + + /* + * An access targeting Group0 SGIs can only generate + * those, while an access targeting Group1 SGIs can + * generate interrupts of either group. + */ + if (!irq->group || allow_group1) { + if (!irq->hw) { + irq->pending_latch = true; + vgic_queue_irq_unlock(vcpu->kvm, irq, flags); + } else { + /* HW SGI? Ask the GIC to inject it */ + int err; + err = irq_set_irqchip_state(irq->host_irq, + IRQCHIP_STATE_PENDING, + true); + WARN_RATELIMIT(err, "IRQ %d", irq->host_irq); + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); + } + } else { + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); + } + + vgic_put_irq(vcpu->kvm, irq); +} + /** * vgic_v3_dispatch_sgi - handle SGI requests from VCPUs * @vcpu: The VCPU requesting a SGI @@ -1062,83 +1065,46 @@ static int match_mpidr(u64 sgi_aff, u16 sgi_cpu_mask, struct kvm_vcpu *vcpu) * This will trap in sys_regs.c and call this function. * This ICC_SGI1R_EL1 register contains the upper three affinity levels of the * target processors as well as a bitmask of 16 Aff0 CPUs. - * If the interrupt routing mode bit is not set, we iterate over all VCPUs to - * check for matching ones. If this bit is set, we signal all, but not the - * calling VCPU. + * + * If the interrupt routing mode bit is not set, we iterate over the Aff0 + * bits and signal the VCPUs matching the provided Aff{3,2,1}. + * + * If this bit is set, we signal all, but not the calling VCPU. */ void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg, bool allow_group1) { struct kvm *kvm = vcpu->kvm; struct kvm_vcpu *c_vcpu; - u16 target_cpus; + unsigned long target_cpus; u64 mpidr; - int sgi; - int vcpu_id = vcpu->vcpu_id; - bool broadcast; - unsigned long c, flags; - - sgi = (reg & ICC_SGI1R_SGI_ID_MASK) >> ICC_SGI1R_SGI_ID_SHIFT; - broadcast = reg & BIT_ULL(ICC_SGI1R_IRQ_ROUTING_MODE_BIT); - target_cpus = (reg & ICC_SGI1R_TARGET_LIST_MASK) >> ICC_SGI1R_TARGET_LIST_SHIFT; - mpidr = SGI_AFFINITY_LEVEL(reg, 3); - mpidr |= SGI_AFFINITY_LEVEL(reg, 2); - mpidr |= SGI_AFFINITY_LEVEL(reg, 1); - - /* - * We iterate over all VCPUs to find the MPIDRs matching the request. - * If we have handled one CPU, we clear its bit to detect early - * if we are already finished. This avoids iterating through all - * VCPUs when most of the times we just signal a single VCPU. - */ - kvm_for_each_vcpu(c, c_vcpu, kvm) { - struct vgic_irq *irq; - - /* Exit early if we have dealt with all requested CPUs */ - if (!broadcast && target_cpus == 0) - break; - - /* Don't signal the calling VCPU */ - if (broadcast && c == vcpu_id) - continue; + u32 sgi, aff0; + unsigned long c; - if (!broadcast) { - int level0; + sgi = FIELD_GET(ICC_SGI1R_SGI_ID_MASK, reg); - level0 = match_mpidr(mpidr, target_cpus, c_vcpu); - if (level0 == -1) + /* Broadcast */ + if (unlikely(reg & BIT_ULL(ICC_SGI1R_IRQ_ROUTING_MODE_BIT))) { + kvm_for_each_vcpu(c, c_vcpu, kvm) { + /* Don't signal the calling VCPU */ + if (c_vcpu == vcpu) continue; - /* remove this matching VCPU from the mask */ - target_cpus &= ~BIT(level0); + vgic_v3_queue_sgi(c_vcpu, sgi, allow_group1); } - irq = vgic_get_irq(vcpu->kvm, c_vcpu, sgi); - - raw_spin_lock_irqsave(&irq->irq_lock, flags); + return; + } - /* - * An access targeting Group0 SGIs can only generate - * those, while an access targeting Group1 SGIs can - * generate interrupts of either group. - */ - if (!irq->group || allow_group1) { - if (!irq->hw) { - irq->pending_latch = true; - vgic_queue_irq_unlock(vcpu->kvm, irq, flags); - } else { - /* HW SGI? Ask the GIC to inject it */ - int err; - err = irq_set_irqchip_state(irq->host_irq, - IRQCHIP_STATE_PENDING, - true); - WARN_RATELIMIT(err, "IRQ %d", irq->host_irq); - raw_spin_unlock_irqrestore(&irq->irq_lock, flags); - } - } else { - raw_spin_unlock_irqrestore(&irq->irq_lock, flags); - } + /* We iterate over affinities to find the corresponding vcpus */ + mpidr = SGI_AFFINITY_LEVEL(reg, 3); + mpidr |= SGI_AFFINITY_LEVEL(reg, 2); + mpidr |= SGI_AFFINITY_LEVEL(reg, 1); + target_cpus = FIELD_GET(ICC_SGI1R_TARGET_LIST_MASK, reg); - vgic_put_irq(vcpu->kvm, irq); + for_each_set_bit(aff0, &target_cpus, hweight_long(ICC_SGI1R_TARGET_LIST_MASK)) { + c_vcpu = kvm_mpidr_to_vcpu(kvm, mpidr | aff0); + if (c_vcpu) + vgic_v3_queue_sgi(c_vcpu, sgi, allow_group1); } } diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c index 8be4c1ebdec2..db2a95762b1b 100644 --- a/arch/arm64/kvm/vgic/vgic.c +++ b/arch/arm64/kvm/vgic/vgic.c @@ -422,7 +422,7 @@ retry: /** * kvm_vgic_inject_irq - Inject an IRQ from a device to the vgic * @kvm: The VM structure pointer - * @cpuid: The CPU for PPIs + * @vcpu: The CPU for PPIs or NULL for global interrupts * @intid: The INTID to inject a new state to. * @level: Edge-triggered: true: to trigger the interrupt * false: to ignore the call @@ -436,24 +436,22 @@ retry: * level-sensitive interrupts. You can think of the level parameter as 1 * being HIGH and 0 being LOW and all devices being active-HIGH. */ -int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid, - bool level, void *owner) +int kvm_vgic_inject_irq(struct kvm *kvm, struct kvm_vcpu *vcpu, + unsigned int intid, bool level, void *owner) { - struct kvm_vcpu *vcpu; struct vgic_irq *irq; unsigned long flags; int ret; - trace_vgic_update_irq_pending(cpuid, intid, level); - ret = vgic_lazy_init(kvm); if (ret) return ret; - vcpu = kvm_get_vcpu(kvm, cpuid); if (!vcpu && intid < VGIC_NR_PRIVATE_IRQS) return -EINVAL; + trace_vgic_update_irq_pending(vcpu ? vcpu->vcpu_idx : 0, intid, level); + irq = vgic_get_irq(kvm, vcpu, intid); if (!irq) return -EINVAL; diff --git a/arch/arm64/kvm/vmid.c b/arch/arm64/kvm/vmid.c index 7fe8ba1a2851..806223b7022a 100644 --- a/arch/arm64/kvm/vmid.c +++ b/arch/arm64/kvm/vmid.c @@ -135,10 +135,11 @@ void kvm_arm_vmid_clear_active(void) atomic64_set(this_cpu_ptr(&active_vmids), VMID_ACTIVE_INVALID); } -void kvm_arm_vmid_update(struct kvm_vmid *kvm_vmid) +bool kvm_arm_vmid_update(struct kvm_vmid *kvm_vmid) { unsigned long flags; u64 vmid, old_active_vmid; + bool updated = false; vmid = atomic64_read(&kvm_vmid->id); @@ -156,17 +157,21 @@ void kvm_arm_vmid_update(struct kvm_vmid *kvm_vmid) if (old_active_vmid != 0 && vmid_gen_match(vmid) && 0 != atomic64_cmpxchg_relaxed(this_cpu_ptr(&active_vmids), old_active_vmid, vmid)) - return; + return false; raw_spin_lock_irqsave(&cpu_vmid_lock, flags); /* Check that our VMID belongs to the current generation. */ vmid = atomic64_read(&kvm_vmid->id); - if (!vmid_gen_match(vmid)) + if (!vmid_gen_match(vmid)) { vmid = new_vmid(kvm_vmid); + updated = true; + } atomic64_set(this_cpu_ptr(&active_vmids), vmid); raw_spin_unlock_irqrestore(&cpu_vmid_lock, flags); + + return updated; } /* diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h index bb3cb005873e..8adf09dbc473 100644 --- a/include/kvm/arm_arch_timer.h +++ b/include/kvm/arm_arch_timer.h @@ -94,7 +94,7 @@ struct arch_timer_cpu { int __init kvm_timer_hyp_init(bool has_gic); int kvm_timer_enable(struct kvm_vcpu *vcpu); -int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu); +void kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu); void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu); void kvm_timer_sync_user(struct kvm_vcpu *vcpu); bool kvm_timer_should_notify_user(struct kvm_vcpu *vcpu); diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h index 31029f4f7be8..4df71290b676 100644 --- a/include/kvm/arm_pmu.h +++ b/include/kvm/arm_pmu.h @@ -77,7 +77,7 @@ void kvm_vcpu_pmu_restore_host(struct kvm_vcpu *vcpu); void kvm_vcpu_pmu_resync_el0(void); #define kvm_vcpu_has_pmu(vcpu) \ - (test_bit(KVM_ARM_VCPU_PMU_V3, (vcpu)->arch.features)) + (vcpu_has_feature(vcpu, KVM_ARM_VCPU_PMU_V3)) /* * Updates the vcpu's view of the pmu events for this cpu. @@ -101,6 +101,7 @@ void kvm_vcpu_pmu_resync_el0(void); }) u8 kvm_arm_pmu_get_pmuver_limit(void); +u64 kvm_pmu_evtyper_mask(struct kvm *kvm); #else struct kvm_pmu { @@ -172,6 +173,10 @@ static inline u8 kvm_arm_pmu_get_pmuver_limit(void) { return 0; } +static inline u64 kvm_pmu_evtyper_mask(struct kvm *kvm) +{ + return 0; +} static inline void kvm_vcpu_pmu_resync_el0(void) {} #endif diff --git a/include/kvm/arm_psci.h b/include/kvm/arm_psci.h index 6e55b9283789..e8fb624013d1 100644 --- a/include/kvm/arm_psci.h +++ b/include/kvm/arm_psci.h @@ -26,7 +26,7 @@ static inline int kvm_psci_version(struct kvm_vcpu *vcpu) * revisions. It is thus safe to return the latest, unless * userspace has instructed us otherwise. */ - if (test_bit(KVM_ARM_VCPU_PSCI_0_2, vcpu->arch.features)) { + if (vcpu_has_feature(vcpu, KVM_ARM_VCPU_PSCI_0_2)) { if (vcpu->kvm->arch.psci_version) return vcpu->kvm->arch.psci_version; diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 5b27f94d4fad..8cc38e836f54 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -375,8 +375,8 @@ int kvm_vgic_map_resources(struct kvm *kvm); int kvm_vgic_hyp_init(void); void kvm_vgic_init_cpu_hardware(void); -int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid, - bool level, void *owner); +int kvm_vgic_inject_irq(struct kvm *kvm, struct kvm_vcpu *vcpu, + unsigned int intid, bool level, void *owner); int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, unsigned int host_irq, u32 vintid, struct irq_ops *ops); int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int vintid); diff --git a/include/linux/perf/arm_pmuv3.h b/include/linux/perf/arm_pmuv3.h index e3899bd77f5c..9c226adf938a 100644 --- a/include/linux/perf/arm_pmuv3.h +++ b/include/linux/perf/arm_pmuv3.h @@ -234,9 +234,12 @@ /* * Event filters for PMUv3 */ -#define ARMV8_PMU_EXCLUDE_EL1 (1U << 31) -#define ARMV8_PMU_EXCLUDE_EL0 (1U << 30) -#define ARMV8_PMU_INCLUDE_EL2 (1U << 27) +#define ARMV8_PMU_EXCLUDE_EL1 (1U << 31) +#define ARMV8_PMU_EXCLUDE_EL0 (1U << 30) +#define ARMV8_PMU_EXCLUDE_NS_EL1 (1U << 29) +#define ARMV8_PMU_EXCLUDE_NS_EL0 (1U << 28) +#define ARMV8_PMU_INCLUDE_EL2 (1U << 27) +#define ARMV8_PMU_EXCLUDE_EL3 (1U << 26) /* * PMUSERENR: user enable reg diff --git a/tools/testing/selftests/kvm/aarch64/page_fault_test.c b/tools/testing/selftests/kvm/aarch64/page_fault_test.c index 975d28be3cca..eb4217b7c768 100644 --- a/tools/testing/selftests/kvm/aarch64/page_fault_test.c +++ b/tools/testing/selftests/kvm/aarch64/page_fault_test.c @@ -135,8 +135,8 @@ static void guest_at(void) uint64_t par; asm volatile("at s1e1r, %0" :: "r" (guest_test_memory)); - par = read_sysreg(par_el1); isb(); + par = read_sysreg(par_el1); /* Bit 1 indicates whether the AT was successful */ GUEST_ASSERT_EQ(par & 1, 0); @@ -842,6 +842,7 @@ static void help(char *name) .name = SCAT2(ro_memslot_no_syndrome, _access), \ .data_memslot_flags = KVM_MEM_READONLY, \ .pt_memslot_flags = KVM_MEM_READONLY, \ + .guest_prepare = { _PREPARE(_access) }, \ .guest_test = _access, \ .fail_vcpu_run_handler = fail_vcpu_run_mmio_no_syndrome_handler, \ .expected_events = { .fail_vcpu_runs = 1 }, \ @@ -865,6 +866,7 @@ static void help(char *name) .name = SCAT2(ro_memslot_no_syn_and_dlog, _access), \ .data_memslot_flags = KVM_MEM_READONLY | KVM_MEM_LOG_DIRTY_PAGES, \ .pt_memslot_flags = KVM_MEM_READONLY | KVM_MEM_LOG_DIRTY_PAGES, \ + .guest_prepare = { _PREPARE(_access) }, \ .guest_test = _access, \ .guest_test_check = { _test_check }, \ .fail_vcpu_run_handler = fail_vcpu_run_mmio_no_syndrome_handler, \ @@ -894,6 +896,7 @@ static void help(char *name) .data_memslot_flags = KVM_MEM_READONLY, \ .pt_memslot_flags = KVM_MEM_READONLY, \ .mem_mark_cmd = CMD_HOLE_DATA | CMD_HOLE_PT, \ + .guest_prepare = { _PREPARE(_access) }, \ .guest_test = _access, \ .uffd_data_handler = _uffd_data_handler, \ .uffd_pt_handler = uffd_pt_handler, \ |