summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/virt/kvm/devices/arm-vgic-v3.rst7
-rw-r--r--arch/arm64/include/asm/kvm_emulate.h15
-rw-r--r--arch/arm64/include/asm/kvm_host.h56
-rw-r--r--arch/arm64/include/asm/kvm_hyp.h7
-rw-r--r--arch/arm64/include/asm/kvm_mmu.h45
-rw-r--r--arch/arm64/include/asm/kvm_nested.h3
-rw-r--r--arch/arm64/include/asm/stage2_pgtable.h4
-rw-r--r--arch/arm64/include/asm/sysreg.h45
-rw-r--r--arch/arm64/include/asm/tlbflush.h8
-rw-r--r--arch/arm64/kvm/arch_timer.c6
-rw-r--r--arch/arm64/kvm/arm.c164
-rw-r--r--arch/arm64/kvm/emulate-nested.c77
-rw-r--r--arch/arm64/kvm/hyp/nvhe/mem_protect.c8
-rw-r--r--arch/arm64/kvm/hyp/nvhe/pkvm.c4
-rw-r--r--arch/arm64/kvm/hyp/pgtable.c4
-rw-r--r--arch/arm64/kvm/hyp/vhe/switch.c33
-rw-r--r--arch/arm64/kvm/hyp/vhe/sysreg-sr.c11
-rw-r--r--arch/arm64/kvm/hyp/vhe/tlb.c18
-rw-r--r--arch/arm64/kvm/hypercalls.c36
-rw-r--r--arch/arm64/kvm/mmio.c4
-rw-r--r--arch/arm64/kvm/mmu.c33
-rw-r--r--arch/arm64/kvm/pkvm.c2
-rw-r--r--arch/arm64/kvm/pmu-emul.c38
-rw-r--r--arch/arm64/kvm/reset.c56
-rw-r--r--arch/arm64/kvm/sys_regs.c34
-rw-r--r--arch/arm64/kvm/trace_arm.h25
-rw-r--r--arch/arm64/kvm/vgic/vgic-debug.c6
-rw-r--r--arch/arm64/kvm/vgic/vgic-irqfd.c2
-rw-r--r--arch/arm64/kvm/vgic/vgic-its.c49
-rw-r--r--arch/arm64/kvm/vgic/vgic-kvm-device.c11
-rw-r--r--arch/arm64/kvm/vgic/vgic-mmio-v3.c150
-rw-r--r--arch/arm64/kvm/vgic/vgic.c12
-rw-r--r--arch/arm64/kvm/vmid.c11
-rw-r--r--include/kvm/arm_arch_timer.h2
-rw-r--r--include/kvm/arm_pmu.h7
-rw-r--r--include/kvm/arm_psci.h2
-rw-r--r--include/kvm/arm_vgic.h4
-rw-r--r--include/linux/perf/arm_pmuv3.h9
-rw-r--r--tools/testing/selftests/kvm/aarch64/page_fault_test.c5
39 files changed, 643 insertions, 370 deletions
diff --git a/Documentation/virt/kvm/devices/arm-vgic-v3.rst b/Documentation/virt/kvm/devices/arm-vgic-v3.rst
index 51e5e5762571..5817edb4e046 100644
--- a/Documentation/virt/kvm/devices/arm-vgic-v3.rst
+++ b/Documentation/virt/kvm/devices/arm-vgic-v3.rst
@@ -59,6 +59,13 @@ Groups:
It is invalid to mix calls with KVM_VGIC_V3_ADDR_TYPE_REDIST and
KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION attributes.
+ Note that to obtain reproducible results (the same VCPU being associated
+ with the same redistributor across a save/restore operation), VCPU creation
+ order, redistributor region creation order as well as the respective
+ interleaves of VCPU and region creation MUST be preserved. Any change in
+ either ordering may result in a different vcpu_id/redistributor association,
+ resulting in a VM that will fail to run at restore time.
+
Errors:
======= =============================================================
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index 3d6725ff0bf6..13c948a0502d 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -54,6 +54,11 @@ void kvm_emulate_nested_eret(struct kvm_vcpu *vcpu);
int kvm_inject_nested_sync(struct kvm_vcpu *vcpu, u64 esr_el2);
int kvm_inject_nested_irq(struct kvm_vcpu *vcpu);
+static inline bool vcpu_has_feature(const struct kvm_vcpu *vcpu, int feature)
+{
+ return test_bit(feature, vcpu->kvm->arch.vcpu_features);
+}
+
#if defined(__KVM_VHE_HYPERVISOR__) || defined(__KVM_NVHE_HYPERVISOR__)
static __always_inline bool vcpu_el1_is_32bit(struct kvm_vcpu *vcpu)
{
@@ -62,7 +67,7 @@ static __always_inline bool vcpu_el1_is_32bit(struct kvm_vcpu *vcpu)
#else
static __always_inline bool vcpu_el1_is_32bit(struct kvm_vcpu *vcpu)
{
- return test_bit(KVM_ARM_VCPU_EL1_32BIT, vcpu->arch.features);
+ return vcpu_has_feature(vcpu, KVM_ARM_VCPU_EL1_32BIT);
}
#endif
@@ -465,7 +470,7 @@ static inline bool kvm_is_write_fault(struct kvm_vcpu *vcpu)
static inline unsigned long kvm_vcpu_get_mpidr_aff(struct kvm_vcpu *vcpu)
{
- return vcpu_read_sys_reg(vcpu, MPIDR_EL1) & MPIDR_HWID_BITMASK;
+ return __vcpu_sys_reg(vcpu, MPIDR_EL1) & MPIDR_HWID_BITMASK;
}
static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu)
@@ -565,12 +570,6 @@ static __always_inline void kvm_incr_pc(struct kvm_vcpu *vcpu)
vcpu_set_flag((v), e); \
} while (0)
-
-static inline bool vcpu_has_feature(struct kvm_vcpu *vcpu, int feature)
-{
- return test_bit(feature, vcpu->arch.features);
-}
-
static __always_inline void kvm_write_cptr_el2(u64 val)
{
if (has_vhe() || has_hvhe())
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 5e3f778f81db..846a7706e925 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -78,7 +78,7 @@ extern unsigned int __ro_after_init kvm_sve_max_vl;
int __init kvm_arm_init_sve(void);
u32 __attribute_const__ kvm_target_cpu(void);
-int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
+void kvm_reset_vcpu(struct kvm_vcpu *vcpu);
void kvm_arm_vcpu_destroy(struct kvm_vcpu *vcpu);
struct kvm_hyp_memcache {
@@ -158,6 +158,16 @@ struct kvm_s2_mmu {
phys_addr_t pgd_phys;
struct kvm_pgtable *pgt;
+ /*
+ * VTCR value used on the host. For a non-NV guest (or a NV
+ * guest that runs in a context where its own S2 doesn't
+ * apply), its T0SZ value reflects that of the IPA size.
+ *
+ * For a shadow S2 MMU, T0SZ reflects the PARange exposed to
+ * the guest.
+ */
+ u64 vtcr;
+
/* The last vcpu id that ran on each physical CPU */
int __percpu *last_vcpu_ran;
@@ -202,12 +212,34 @@ struct kvm_protected_vm {
struct kvm_hyp_memcache teardown_mc;
};
+struct kvm_mpidr_data {
+ u64 mpidr_mask;
+ DECLARE_FLEX_ARRAY(u16, cmpidr_to_idx);
+};
+
+static inline u16 kvm_mpidr_index(struct kvm_mpidr_data *data, u64 mpidr)
+{
+ unsigned long mask = data->mpidr_mask;
+ u64 aff = mpidr & MPIDR_HWID_BITMASK;
+ int nbits, bit, bit_idx = 0;
+ u16 index = 0;
+
+ /*
+ * If this looks like RISC-V's BEXT or x86's PEXT
+ * instructions, it isn't by accident.
+ */
+ nbits = fls(mask);
+ for_each_set_bit(bit, &mask, nbits) {
+ index |= (aff & BIT(bit)) >> (bit - bit_idx);
+ bit_idx++;
+ }
+
+ return index;
+}
+
struct kvm_arch {
struct kvm_s2_mmu mmu;
- /* VTCR_EL2 value for this VM */
- u64 vtcr;
-
/* Interrupt controller */
struct vgic_dist vgic;
@@ -239,15 +271,16 @@ struct kvm_arch {
#define KVM_ARCH_FLAG_VM_COUNTER_OFFSET 5
/* Timer PPIs made immutable */
#define KVM_ARCH_FLAG_TIMER_PPIS_IMMUTABLE 6
- /* SMCCC filter initialized for the VM */
-#define KVM_ARCH_FLAG_SMCCC_FILTER_CONFIGURED 7
/* Initial ID reg values loaded */
-#define KVM_ARCH_FLAG_ID_REGS_INITIALIZED 8
+#define KVM_ARCH_FLAG_ID_REGS_INITIALIZED 7
unsigned long flags;
/* VM-wide vCPU feature set */
DECLARE_BITMAP(vcpu_features, KVM_VCPU_MAX_FEATURES);
+ /* MPIDR to vcpu index mapping, optional */
+ struct kvm_mpidr_data *mpidr_data;
+
/*
* VM-wide PMU filter, implemented as a bitmap and big enough for
* up to 2^10 events (ARMv8.0) or 2^16 events (ARMv8.1+).
@@ -574,9 +607,6 @@ struct kvm_vcpu_arch {
/* Cache some mmu pages needed inside spinlock regions */
struct kvm_mmu_memory_cache mmu_page_cache;
- /* feature flags */
- DECLARE_BITMAP(features, KVM_VCPU_MAX_FEATURES);
-
/* Virtual SError ESR to restore when HCR_EL2.VSE is set */
u64 vsesr_el2;
@@ -1025,7 +1055,7 @@ int kvm_arm_pvtime_has_attr(struct kvm_vcpu *vcpu,
extern unsigned int __ro_after_init kvm_arm_vmid_bits;
int __init kvm_arm_vmid_alloc_init(void);
void __init kvm_arm_vmid_alloc_free(void);
-void kvm_arm_vmid_update(struct kvm_vmid *kvm_vmid);
+bool kvm_arm_vmid_update(struct kvm_vmid *kvm_vmid);
void kvm_arm_vmid_clear_active(void);
static inline void kvm_arm_pvtime_vcpu_init(struct kvm_vcpu_arch *vcpu_arch)
@@ -1111,8 +1141,8 @@ static inline bool kvm_set_pmuserenr(u64 val)
}
#endif
-void kvm_vcpu_load_sysregs_vhe(struct kvm_vcpu *vcpu);
-void kvm_vcpu_put_sysregs_vhe(struct kvm_vcpu *vcpu);
+void kvm_vcpu_load_vhe(struct kvm_vcpu *vcpu);
+void kvm_vcpu_put_vhe(struct kvm_vcpu *vcpu);
int __init kvm_set_ipa_limit(void);
diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h
index 66efd67ea7e8..145ce73fc16c 100644
--- a/arch/arm64/include/asm/kvm_hyp.h
+++ b/arch/arm64/include/asm/kvm_hyp.h
@@ -93,6 +93,8 @@ void __timer_disable_traps(struct kvm_vcpu *vcpu);
void __sysreg_save_state_nvhe(struct kvm_cpu_context *ctxt);
void __sysreg_restore_state_nvhe(struct kvm_cpu_context *ctxt);
#else
+void __vcpu_load_switch_sysregs(struct kvm_vcpu *vcpu);
+void __vcpu_put_switch_sysregs(struct kvm_vcpu *vcpu);
void sysreg_save_host_state_vhe(struct kvm_cpu_context *ctxt);
void sysreg_restore_host_state_vhe(struct kvm_cpu_context *ctxt);
void sysreg_save_guest_state_vhe(struct kvm_cpu_context *ctxt);
@@ -111,11 +113,6 @@ void __fpsimd_save_state(struct user_fpsimd_state *fp_regs);
void __fpsimd_restore_state(struct user_fpsimd_state *fp_regs);
void __sve_restore_state(void *sve_pffr, u32 *fpsr);
-#ifndef __KVM_NVHE_HYPERVISOR__
-void activate_traps_vhe_load(struct kvm_vcpu *vcpu);
-void deactivate_traps_vhe_put(struct kvm_vcpu *vcpu);
-#endif
-
u64 __guest_enter(struct kvm_vcpu *vcpu);
bool kvm_host_psci_handler(struct kvm_cpu_context *host_ctxt, u32 func_id);
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 96a80e8f6226..d86c8661200a 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -150,9 +150,9 @@ static __always_inline unsigned long __kern_hyp_va(unsigned long v)
*/
#define KVM_PHYS_SHIFT (40)
-#define kvm_phys_shift(kvm) VTCR_EL2_IPA(kvm->arch.vtcr)
-#define kvm_phys_size(kvm) (_AC(1, ULL) << kvm_phys_shift(kvm))
-#define kvm_phys_mask(kvm) (kvm_phys_size(kvm) - _AC(1, ULL))
+#define kvm_phys_shift(mmu) VTCR_EL2_IPA((mmu)->vtcr)
+#define kvm_phys_size(mmu) (_AC(1, ULL) << kvm_phys_shift(mmu))
+#define kvm_phys_mask(mmu) (kvm_phys_size(mmu) - _AC(1, ULL))
#include <asm/kvm_pgtable.h>
#include <asm/stage2_pgtable.h>
@@ -224,16 +224,41 @@ static inline void __clean_dcache_guest_page(void *va, size_t size)
kvm_flush_dcache_to_poc(va, size);
}
+static inline size_t __invalidate_icache_max_range(void)
+{
+ u8 iminline;
+ u64 ctr;
+
+ asm volatile(ALTERNATIVE_CB("movz %0, #0\n"
+ "movk %0, #0, lsl #16\n"
+ "movk %0, #0, lsl #32\n"
+ "movk %0, #0, lsl #48\n",
+ ARM64_ALWAYS_SYSTEM,
+ kvm_compute_final_ctr_el0)
+ : "=r" (ctr));
+
+ iminline = SYS_FIELD_GET(CTR_EL0, IminLine, ctr) + 2;
+ return MAX_DVM_OPS << iminline;
+}
+
static inline void __invalidate_icache_guest_page(void *va, size_t size)
{
- if (icache_is_aliasing()) {
- /* any kind of VIPT cache */
+ /*
+ * VPIPT I-cache maintenance must be done from EL2. See comment in the
+ * nVHE flavor of __kvm_tlb_flush_vmid_ipa().
+ */
+ if (icache_is_vpipt() && read_sysreg(CurrentEL) != CurrentEL_EL2)
+ return;
+
+ /*
+ * Blow the whole I-cache if it is aliasing (i.e. VIPT) or the
+ * invalidation range exceeds our arbitrary limit on invadations by
+ * cache line.
+ */
+ if (icache_is_aliasing() || size > __invalidate_icache_max_range())
icache_inval_all_pou();
- } else if (read_sysreg(CurrentEL) != CurrentEL_EL1 ||
- !icache_is_vpipt()) {
- /* PIPT or VPIPT at EL2 (see comment in __kvm_tlb_flush_vmid_ipa) */
+ else
icache_inval_pou((unsigned long)va, (unsigned long)va + size);
- }
}
void kvm_set_way_flush(struct kvm_vcpu *vcpu);
@@ -299,7 +324,7 @@ static __always_inline u64 kvm_get_vttbr(struct kvm_s2_mmu *mmu)
static __always_inline void __load_stage2(struct kvm_s2_mmu *mmu,
struct kvm_arch *arch)
{
- write_sysreg(arch->vtcr, vtcr_el2);
+ write_sysreg(mmu->vtcr, vtcr_el2);
write_sysreg(kvm_get_vttbr(mmu), vttbr_el2);
/*
diff --git a/arch/arm64/include/asm/kvm_nested.h b/arch/arm64/include/asm/kvm_nested.h
index fa23cc9c2adc..6cec8e9c6c91 100644
--- a/arch/arm64/include/asm/kvm_nested.h
+++ b/arch/arm64/include/asm/kvm_nested.h
@@ -2,13 +2,14 @@
#ifndef __ARM64_KVM_NESTED_H
#define __ARM64_KVM_NESTED_H
+#include <asm/kvm_emulate.h>
#include <linux/kvm_host.h>
static inline bool vcpu_has_nv(const struct kvm_vcpu *vcpu)
{
return (!__is_defined(__KVM_NVHE_HYPERVISOR__) &&
cpus_have_final_cap(ARM64_HAS_NESTED_VIRT) &&
- test_bit(KVM_ARM_VCPU_HAS_EL2, vcpu->arch.features));
+ vcpu_has_feature(vcpu, KVM_ARM_VCPU_HAS_EL2));
}
extern bool __check_nv_sr_forward(struct kvm_vcpu *vcpu);
diff --git a/arch/arm64/include/asm/stage2_pgtable.h b/arch/arm64/include/asm/stage2_pgtable.h
index c8dca8ae359c..23d27623e478 100644
--- a/arch/arm64/include/asm/stage2_pgtable.h
+++ b/arch/arm64/include/asm/stage2_pgtable.h
@@ -21,13 +21,13 @@
* (IPA_SHIFT - 4).
*/
#define stage2_pgtable_levels(ipa) ARM64_HW_PGTABLE_LEVELS((ipa) - 4)
-#define kvm_stage2_levels(kvm) VTCR_EL2_LVLS(kvm->arch.vtcr)
+#define kvm_stage2_levels(mmu) VTCR_EL2_LVLS((mmu)->vtcr)
/*
* kvm_mmmu_cache_min_pages() is the number of pages required to install
* a stage-2 translation. We pre-allocate the entry level page table at
* the VM creation.
*/
-#define kvm_mmu_cache_min_pages(kvm) (kvm_stage2_levels(kvm) - 1)
+#define kvm_mmu_cache_min_pages(mmu) (kvm_stage2_levels(mmu) - 1)
#endif /* __ARM64_S2_PGTABLE_H_ */
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 38296579a4fd..5e65f51c10d2 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -270,6 +270,8 @@
/* ETM */
#define SYS_TRCOSLAR sys_reg(2, 1, 1, 0, 4)
+#define SYS_BRBCR_EL2 sys_reg(2, 4, 9, 0, 0)
+
#define SYS_MIDR_EL1 sys_reg(3, 0, 0, 0, 0)
#define SYS_MPIDR_EL1 sys_reg(3, 0, 0, 0, 5)
#define SYS_REVIDR_EL1 sys_reg(3, 0, 0, 0, 6)
@@ -484,6 +486,7 @@
#define SYS_SCTLR_EL2 sys_reg(3, 4, 1, 0, 0)
#define SYS_ACTLR_EL2 sys_reg(3, 4, 1, 0, 1)
+#define SYS_SCTLR2_EL2 sys_reg(3, 4, 1, 0, 3)
#define SYS_HCR_EL2 sys_reg(3, 4, 1, 1, 0)
#define SYS_MDCR_EL2 sys_reg(3, 4, 1, 1, 1)
#define SYS_CPTR_EL2 sys_reg(3, 4, 1, 1, 2)
@@ -497,10 +500,15 @@
#define SYS_VTCR_EL2 sys_reg(3, 4, 2, 1, 2)
#define SYS_TRFCR_EL2 sys_reg(3, 4, 1, 2, 1)
+#define SYS_VNCR_EL2 sys_reg(3, 4, 2, 2, 0)
#define SYS_HAFGRTR_EL2 sys_reg(3, 4, 3, 1, 6)
#define SYS_SPSR_EL2 sys_reg(3, 4, 4, 0, 0)
#define SYS_ELR_EL2 sys_reg(3, 4, 4, 0, 1)
#define SYS_SP_EL1 sys_reg(3, 4, 4, 1, 0)
+#define SYS_SPSR_irq sys_reg(3, 4, 4, 3, 0)
+#define SYS_SPSR_abt sys_reg(3, 4, 4, 3, 1)
+#define SYS_SPSR_und sys_reg(3, 4, 4, 3, 2)
+#define SYS_SPSR_fiq sys_reg(3, 4, 4, 3, 3)
#define SYS_IFSR32_EL2 sys_reg(3, 4, 5, 0, 1)
#define SYS_AFSR0_EL2 sys_reg(3, 4, 5, 1, 0)
#define SYS_AFSR1_EL2 sys_reg(3, 4, 5, 1, 1)
@@ -514,6 +522,18 @@
#define SYS_MAIR_EL2 sys_reg(3, 4, 10, 2, 0)
#define SYS_AMAIR_EL2 sys_reg(3, 4, 10, 3, 0)
+#define SYS_MPAMHCR_EL2 sys_reg(3, 4, 10, 4, 0)
+#define SYS_MPAMVPMV_EL2 sys_reg(3, 4, 10, 4, 1)
+#define SYS_MPAM2_EL2 sys_reg(3, 4, 10, 5, 0)
+#define __SYS__MPAMVPMx_EL2(x) sys_reg(3, 4, 10, 6, x)
+#define SYS_MPAMVPM0_EL2 __SYS__MPAMVPMx_EL2(0)
+#define SYS_MPAMVPM1_EL2 __SYS__MPAMVPMx_EL2(1)
+#define SYS_MPAMVPM2_EL2 __SYS__MPAMVPMx_EL2(2)
+#define SYS_MPAMVPM3_EL2 __SYS__MPAMVPMx_EL2(3)
+#define SYS_MPAMVPM4_EL2 __SYS__MPAMVPMx_EL2(4)
+#define SYS_MPAMVPM5_EL2 __SYS__MPAMVPMx_EL2(5)
+#define SYS_MPAMVPM6_EL2 __SYS__MPAMVPMx_EL2(6)
+#define SYS_MPAMVPM7_EL2 __SYS__MPAMVPMx_EL2(7)
#define SYS_VBAR_EL2 sys_reg(3, 4, 12, 0, 0)
#define SYS_RVBAR_EL2 sys_reg(3, 4, 12, 0, 1)
@@ -562,24 +582,49 @@
#define SYS_CONTEXTIDR_EL2 sys_reg(3, 4, 13, 0, 1)
#define SYS_TPIDR_EL2 sys_reg(3, 4, 13, 0, 2)
+#define SYS_SCXTNUM_EL2 sys_reg(3, 4, 13, 0, 7)
+
+#define __AMEV_op2(m) (m & 0x7)
+#define __AMEV_CRm(n, m) (n | ((m & 0x8) >> 3))
+#define __SYS__AMEVCNTVOFF0n_EL2(m) sys_reg(3, 4, 13, __AMEV_CRm(0x8, m), __AMEV_op2(m))
+#define SYS_AMEVCNTVOFF0n_EL2(m) __SYS__AMEVCNTVOFF0n_EL2(m)
+#define __SYS__AMEVCNTVOFF1n_EL2(m) sys_reg(3, 4, 13, __AMEV_CRm(0xA, m), __AMEV_op2(m))
+#define SYS_AMEVCNTVOFF1n_EL2(m) __SYS__AMEVCNTVOFF1n_EL2(m)
#define SYS_CNTVOFF_EL2 sys_reg(3, 4, 14, 0, 3)
#define SYS_CNTHCTL_EL2 sys_reg(3, 4, 14, 1, 0)
+#define SYS_CNTHP_TVAL_EL2 sys_reg(3, 4, 14, 2, 0)
+#define SYS_CNTHP_CTL_EL2 sys_reg(3, 4, 14, 2, 1)
+#define SYS_CNTHP_CVAL_EL2 sys_reg(3, 4, 14, 2, 2)
+#define SYS_CNTHV_TVAL_EL2 sys_reg(3, 4, 14, 3, 0)
+#define SYS_CNTHV_CTL_EL2 sys_reg(3, 4, 14, 3, 1)
+#define SYS_CNTHV_CVAL_EL2 sys_reg(3, 4, 14, 3, 2)
/* VHE encodings for architectural EL0/1 system registers */
+#define SYS_BRBCR_EL12 sys_reg(2, 5, 9, 0, 0)
#define SYS_SCTLR_EL12 sys_reg(3, 5, 1, 0, 0)
+#define SYS_CPACR_EL12 sys_reg(3, 5, 1, 0, 2)
+#define SYS_SCTLR2_EL12 sys_reg(3, 5, 1, 0, 3)
+#define SYS_ZCR_EL12 sys_reg(3, 5, 1, 2, 0)
+#define SYS_TRFCR_EL12 sys_reg(3, 5, 1, 2, 1)
+#define SYS_SMCR_EL12 sys_reg(3, 5, 1, 2, 6)
#define SYS_TTBR0_EL12 sys_reg(3, 5, 2, 0, 0)
#define SYS_TTBR1_EL12 sys_reg(3, 5, 2, 0, 1)
#define SYS_TCR_EL12 sys_reg(3, 5, 2, 0, 2)
+#define SYS_TCR2_EL12 sys_reg(3, 5, 2, 0, 3)
#define SYS_SPSR_EL12 sys_reg(3, 5, 4, 0, 0)
#define SYS_ELR_EL12 sys_reg(3, 5, 4, 0, 1)
#define SYS_AFSR0_EL12 sys_reg(3, 5, 5, 1, 0)
#define SYS_AFSR1_EL12 sys_reg(3, 5, 5, 1, 1)
#define SYS_ESR_EL12 sys_reg(3, 5, 5, 2, 0)
#define SYS_TFSR_EL12 sys_reg(3, 5, 5, 6, 0)
+#define SYS_FAR_EL12 sys_reg(3, 5, 6, 0, 0)
+#define SYS_PMSCR_EL12 sys_reg(3, 5, 9, 9, 0)
#define SYS_MAIR_EL12 sys_reg(3, 5, 10, 2, 0)
#define SYS_AMAIR_EL12 sys_reg(3, 5, 10, 3, 0)
#define SYS_VBAR_EL12 sys_reg(3, 5, 12, 0, 0)
+#define SYS_CONTEXTIDR_EL12 sys_reg(3, 5, 13, 0, 1)
+#define SYS_SCXTNUM_EL12 sys_reg(3, 5, 13, 0, 7)
#define SYS_CNTKCTL_EL12 sys_reg(3, 5, 14, 1, 0)
#define SYS_CNTP_TVAL_EL02 sys_reg(3, 5, 14, 2, 0)
#define SYS_CNTP_CTL_EL02 sys_reg(3, 5, 14, 2, 1)
diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h
index b149cf9f91bc..3431d37e5054 100644
--- a/arch/arm64/include/asm/tlbflush.h
+++ b/arch/arm64/include/asm/tlbflush.h
@@ -333,7 +333,7 @@ static inline void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
* This is meant to avoid soft lock-ups on large TLB flushing ranges and not
* necessarily a performance improvement.
*/
-#define MAX_TLBI_OPS PTRS_PER_PTE
+#define MAX_DVM_OPS PTRS_PER_PTE
/*
* __flush_tlb_range_op - Perform TLBI operation upon a range
@@ -413,12 +413,12 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma,
/*
* When not uses TLB range ops, we can handle up to
- * (MAX_TLBI_OPS - 1) pages;
+ * (MAX_DVM_OPS - 1) pages;
* When uses TLB range ops, we can handle up to
* (MAX_TLBI_RANGE_PAGES - 1) pages.
*/
if ((!system_supports_tlb_range() &&
- (end - start) >= (MAX_TLBI_OPS * stride)) ||
+ (end - start) >= (MAX_DVM_OPS * stride)) ||
pages >= MAX_TLBI_RANGE_PAGES) {
flush_tlb_mm(vma->vm_mm);
return;
@@ -451,7 +451,7 @@ static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end
{
unsigned long addr;
- if ((end - start) > (MAX_TLBI_OPS * PAGE_SIZE)) {
+ if ((end - start) > (MAX_DVM_OPS * PAGE_SIZE)) {
flush_tlb_all();
return;
}
diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c
index 6dcdae4d38cb..6615a08382f1 100644
--- a/arch/arm64/kvm/arch_timer.c
+++ b/arch/arm64/kvm/arch_timer.c
@@ -458,7 +458,7 @@ static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level,
timer_ctx->irq.level);
if (!userspace_irqchip(vcpu->kvm)) {
- ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id,
+ ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu,
timer_irq(timer_ctx),
timer_ctx->irq.level,
timer_ctx);
@@ -943,7 +943,7 @@ void kvm_timer_sync_user(struct kvm_vcpu *vcpu)
unmask_vtimer_irq_user(vcpu);
}
-int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu)
+void kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu)
{
struct arch_timer_cpu *timer = vcpu_timer(vcpu);
struct timer_map map;
@@ -987,8 +987,6 @@ int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu)
soft_timer_cancel(&map.emul_vtimer->hrtimer);
if (map.emul_ptimer)
soft_timer_cancel(&map.emul_ptimer->hrtimer);
-
- return 0;
}
static void timer_context_init(struct kvm_vcpu *vcpu, int timerid)
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index f48430ed5b8b..c6cad400490f 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -205,6 +205,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
if (is_protected_kvm_enabled())
pkvm_destroy_hyp_vm(kvm);
+ kfree(kvm->arch.mpidr_data);
kvm_destroy_vcpus(kvm);
kvm_unshare_hyp(kvm, kvm + 1);
@@ -370,7 +371,6 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
/* Force users to call KVM_ARM_VCPU_INIT */
vcpu_clear_flag(vcpu, VCPU_INITIALIZED);
- bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES);
vcpu->arch.mmu_page_cache.gfp_zero = __GFP_ZERO;
@@ -441,9 +441,9 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
* We might get preempted before the vCPU actually runs, but
* over-invalidation doesn't affect correctness.
*/
- if (*last_ran != vcpu->vcpu_id) {
+ if (*last_ran != vcpu->vcpu_idx) {
kvm_call_hyp(__kvm_flush_cpu_context, mmu);
- *last_ran = vcpu->vcpu_id;
+ *last_ran = vcpu->vcpu_idx;
}
vcpu->cpu = cpu;
@@ -451,7 +451,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
kvm_vgic_load(vcpu);
kvm_timer_vcpu_load(vcpu);
if (has_vhe())
- kvm_vcpu_load_sysregs_vhe(vcpu);
+ kvm_vcpu_load_vhe(vcpu);
kvm_arch_vcpu_load_fp(vcpu);
kvm_vcpu_pmu_restore_guest(vcpu);
if (kvm_arm_is_pvtime_enabled(&vcpu->arch))
@@ -475,7 +475,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
kvm_arch_vcpu_put_debug_state_flags(vcpu);
kvm_arch_vcpu_put_fp(vcpu);
if (has_vhe())
- kvm_vcpu_put_sysregs_vhe(vcpu);
+ kvm_vcpu_put_vhe(vcpu);
kvm_timer_vcpu_put(vcpu);
kvm_vgic_put(vcpu);
kvm_vcpu_pmu_restore_host(vcpu);
@@ -581,6 +581,57 @@ static int kvm_vcpu_initialized(struct kvm_vcpu *vcpu)
return vcpu_get_flag(vcpu, VCPU_INITIALIZED);
}
+static void kvm_init_mpidr_data(struct kvm *kvm)
+{
+ struct kvm_mpidr_data *data = NULL;
+ unsigned long c, mask, nr_entries;
+ u64 aff_set = 0, aff_clr = ~0UL;
+ struct kvm_vcpu *vcpu;
+
+ mutex_lock(&kvm->arch.config_lock);
+
+ if (kvm->arch.mpidr_data || atomic_read(&kvm->online_vcpus) == 1)
+ goto out;
+
+ kvm_for_each_vcpu(c, vcpu, kvm) {
+ u64 aff = kvm_vcpu_get_mpidr_aff(vcpu);
+ aff_set |= aff;
+ aff_clr &= aff;
+ }
+
+ /*
+ * A significant bit can be either 0 or 1, and will only appear in
+ * aff_set. Use aff_clr to weed out the useless stuff.
+ */
+ mask = aff_set ^ aff_clr;
+ nr_entries = BIT_ULL(hweight_long(mask));
+
+ /*
+ * Don't let userspace fool us. If we need more than a single page
+ * to describe the compressed MPIDR array, just fall back to the
+ * iterative method. Single vcpu VMs do not need this either.
+ */
+ if (struct_size(data, cmpidr_to_idx, nr_entries) <= PAGE_SIZE)
+ data = kzalloc(struct_size(data, cmpidr_to_idx, nr_entries),
+ GFP_KERNEL_ACCOUNT);
+
+ if (!data)
+ goto out;
+
+ data->mpidr_mask = mask;
+
+ kvm_for_each_vcpu(c, vcpu, kvm) {
+ u64 aff = kvm_vcpu_get_mpidr_aff(vcpu);
+ u16 index = kvm_mpidr_index(data, aff);
+
+ data->cmpidr_to_idx[index] = c;
+ }
+
+ kvm->arch.mpidr_data = data;
+out:
+ mutex_unlock(&kvm->arch.config_lock);
+}
+
/*
* Handle both the initialisation that is being done when the vcpu is
* run for the first time, as well as the updates that must be
@@ -604,6 +655,8 @@ int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu)
if (likely(vcpu_has_run_once(vcpu)))
return 0;
+ kvm_init_mpidr_data(kvm);
+
kvm_arm_vcpu_init_debug(vcpu);
if (likely(irqchip_in_kernel(kvm))) {
@@ -953,7 +1006,10 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
* making a thread's VMID inactive. So we need to call
* kvm_arm_vmid_update() in non-premptible context.
*/
- kvm_arm_vmid_update(&vcpu->arch.hw_mmu->vmid);
+ if (kvm_arm_vmid_update(&vcpu->arch.hw_mmu->vmid) &&
+ has_vhe())
+ __load_stage2(vcpu->arch.hw_mmu,
+ vcpu->arch.hw_mmu->arch);
kvm_pmu_flush_hwstate(vcpu);
@@ -1137,27 +1193,23 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level,
bool line_status)
{
u32 irq = irq_level->irq;
- unsigned int irq_type, vcpu_idx, irq_num;
- int nrcpus = atomic_read(&kvm->online_vcpus);
+ unsigned int irq_type, vcpu_id, irq_num;
struct kvm_vcpu *vcpu = NULL;
bool level = irq_level->level;
irq_type = (irq >> KVM_ARM_IRQ_TYPE_SHIFT) & KVM_ARM_IRQ_TYPE_MASK;
- vcpu_idx = (irq >> KVM_ARM_IRQ_VCPU_SHIFT) & KVM_ARM_IRQ_VCPU_MASK;
- vcpu_idx += ((irq >> KVM_ARM_IRQ_VCPU2_SHIFT) & KVM_ARM_IRQ_VCPU2_MASK) * (KVM_ARM_IRQ_VCPU_MASK + 1);
+ vcpu_id = (irq >> KVM_ARM_IRQ_VCPU_SHIFT) & KVM_ARM_IRQ_VCPU_MASK;
+ vcpu_id += ((irq >> KVM_ARM_IRQ_VCPU2_SHIFT) & KVM_ARM_IRQ_VCPU2_MASK) * (KVM_ARM_IRQ_VCPU_MASK + 1);
irq_num = (irq >> KVM_ARM_IRQ_NUM_SHIFT) & KVM_ARM_IRQ_NUM_MASK;
- trace_kvm_irq_line(irq_type, vcpu_idx, irq_num, irq_level->level);
+ trace_kvm_irq_line(irq_type, vcpu_id, irq_num, irq_level->level);
switch (irq_type) {
case KVM_ARM_IRQ_TYPE_CPU:
if (irqchip_in_kernel(kvm))
return -ENXIO;
- if (vcpu_idx >= nrcpus)
- return -EINVAL;
-
- vcpu = kvm_get_vcpu(kvm, vcpu_idx);
+ vcpu = kvm_get_vcpu_by_id(kvm, vcpu_id);
if (!vcpu)
return -EINVAL;
@@ -1169,17 +1221,14 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level,
if (!irqchip_in_kernel(kvm))
return -ENXIO;
- if (vcpu_idx >= nrcpus)
- return -EINVAL;
-
- vcpu = kvm_get_vcpu(kvm, vcpu_idx);
+ vcpu = kvm_get_vcpu_by_id(kvm, vcpu_id);
if (!vcpu)
return -EINVAL;
if (irq_num < VGIC_NR_SGIS || irq_num >= VGIC_NR_PRIVATE_IRQS)
return -EINVAL;
- return kvm_vgic_inject_irq(kvm, vcpu->vcpu_id, irq_num, level, NULL);
+ return kvm_vgic_inject_irq(kvm, vcpu, irq_num, level, NULL);
case KVM_ARM_IRQ_TYPE_SPI:
if (!irqchip_in_kernel(kvm))
return -ENXIO;
@@ -1187,12 +1236,36 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level,
if (irq_num < VGIC_NR_PRIVATE_IRQS)
return -EINVAL;
- return kvm_vgic_inject_irq(kvm, 0, irq_num, level, NULL);
+ return kvm_vgic_inject_irq(kvm, NULL, irq_num, level, NULL);
}
return -EINVAL;
}
+static unsigned long system_supported_vcpu_features(void)
+{
+ unsigned long features = KVM_VCPU_VALID_FEATURES;
+
+ if (!cpus_have_final_cap(ARM64_HAS_32BIT_EL1))
+ clear_bit(KVM_ARM_VCPU_EL1_32BIT, &features);
+
+ if (!kvm_arm_support_pmu_v3())
+ clear_bit(KVM_ARM_VCPU_PMU_V3, &features);
+
+ if (!system_supports_sve())
+ clear_bit(KVM_ARM_VCPU_SVE, &features);
+
+ if (!system_has_full_ptr_auth()) {
+ clear_bit(KVM_ARM_VCPU_PTRAUTH_ADDRESS, &features);
+ clear_bit(KVM_ARM_VCPU_PTRAUTH_GENERIC, &features);
+ }
+
+ if (!cpus_have_final_cap(ARM64_HAS_NESTED_VIRT))
+ clear_bit(KVM_ARM_VCPU_HAS_EL2, &features);
+
+ return features;
+}
+
static int kvm_vcpu_init_check_features(struct kvm_vcpu *vcpu,
const struct kvm_vcpu_init *init)
{
@@ -1207,12 +1280,25 @@ static int kvm_vcpu_init_check_features(struct kvm_vcpu *vcpu,
return -ENOENT;
}
- if (!test_bit(KVM_ARM_VCPU_EL1_32BIT, &features))
- return 0;
+ if (features & ~system_supported_vcpu_features())
+ return -EINVAL;
+
+ /*
+ * For now make sure that both address/generic pointer authentication
+ * features are requested by the userspace together.
+ */
+ if (test_bit(KVM_ARM_VCPU_PTRAUTH_ADDRESS, &features) !=
+ test_bit(KVM_ARM_VCPU_PTRAUTH_GENERIC, &features))
+ return -EINVAL;
- if (!cpus_have_const_cap(ARM64_HAS_32BIT_EL1))
+ /* Disallow NV+SVE for the time being */
+ if (test_bit(KVM_ARM_VCPU_HAS_EL2, &features) &&
+ test_bit(KVM_ARM_VCPU_SVE, &features))
return -EINVAL;
+ if (!test_bit(KVM_ARM_VCPU_EL1_32BIT, &features))
+ return 0;
+
/* MTE is incompatible with AArch32 */
if (kvm_has_mte(vcpu->kvm))
return -EINVAL;
@@ -1229,7 +1315,8 @@ static bool kvm_vcpu_init_changed(struct kvm_vcpu *vcpu,
{
unsigned long features = init->features[0];
- return !bitmap_equal(vcpu->arch.features, &features, KVM_VCPU_MAX_FEATURES);
+ return !bitmap_equal(vcpu->kvm->arch.vcpu_features, &features,
+ KVM_VCPU_MAX_FEATURES);
}
static int __kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
@@ -1242,21 +1329,17 @@ static int __kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
mutex_lock(&kvm->arch.config_lock);
if (test_bit(KVM_ARCH_FLAG_VCPU_FEATURES_CONFIGURED, &kvm->arch.flags) &&
- !bitmap_equal(kvm->arch.vcpu_features, &features, KVM_VCPU_MAX_FEATURES))
+ kvm_vcpu_init_changed(vcpu, init))
goto out_unlock;
- bitmap_copy(vcpu->arch.features, &features, KVM_VCPU_MAX_FEATURES);
+ bitmap_copy(kvm->arch.vcpu_features, &features, KVM_VCPU_MAX_FEATURES);
/* Now we know what it is, we can reset it. */
- ret = kvm_reset_vcpu(vcpu);
- if (ret) {
- bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES);
- goto out_unlock;
- }
+ kvm_reset_vcpu(vcpu);
- bitmap_copy(kvm->arch.vcpu_features, &features, KVM_VCPU_MAX_FEATURES);
set_bit(KVM_ARCH_FLAG_VCPU_FEATURES_CONFIGURED, &kvm->arch.flags);
vcpu_set_flag(vcpu, VCPU_INITIALIZED);
+ ret = 0;
out_unlock:
mutex_unlock(&kvm->arch.config_lock);
return ret;
@@ -1281,7 +1364,8 @@ static int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
if (kvm_vcpu_init_changed(vcpu, init))
return -EINVAL;
- return kvm_reset_vcpu(vcpu);
+ kvm_reset_vcpu(vcpu);
+ return 0;
}
static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu,
@@ -2351,6 +2435,18 @@ struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr)
unsigned long i;
mpidr &= MPIDR_HWID_BITMASK;
+
+ if (kvm->arch.mpidr_data) {
+ u16 idx = kvm_mpidr_index(kvm->arch.mpidr_data, mpidr);
+
+ vcpu = kvm_get_vcpu(kvm,
+ kvm->arch.mpidr_data->cmpidr_to_idx[idx]);
+ if (mpidr != kvm_vcpu_get_mpidr_aff(vcpu))
+ vcpu = NULL;
+
+ return vcpu;
+ }
+
kvm_for_each_vcpu(i, vcpu, kvm) {
if (mpidr == kvm_vcpu_get_mpidr_aff(vcpu))
return vcpu;
diff --git a/arch/arm64/kvm/emulate-nested.c b/arch/arm64/kvm/emulate-nested.c
index 9ced1bf0c2b7..91cc851a6c75 100644
--- a/arch/arm64/kvm/emulate-nested.c
+++ b/arch/arm64/kvm/emulate-nested.c
@@ -648,15 +648,80 @@ static const struct encoding_to_trap_config encoding_to_cgt[] __initconst = {
SR_TRAP(SYS_APGAKEYLO_EL1, CGT_HCR_APK),
SR_TRAP(SYS_APGAKEYHI_EL1, CGT_HCR_APK),
/* All _EL2 registers */
- SR_RANGE_TRAP(sys_reg(3, 4, 0, 0, 0),
- sys_reg(3, 4, 3, 15, 7), CGT_HCR_NV),
+ SR_TRAP(SYS_BRBCR_EL2, CGT_HCR_NV),
+ SR_TRAP(SYS_VPIDR_EL2, CGT_HCR_NV),
+ SR_TRAP(SYS_VMPIDR_EL2, CGT_HCR_NV),
+ SR_TRAP(SYS_SCTLR_EL2, CGT_HCR_NV),
+ SR_TRAP(SYS_ACTLR_EL2, CGT_HCR_NV),
+ SR_TRAP(SYS_SCTLR2_EL2, CGT_HCR_NV),
+ SR_RANGE_TRAP(SYS_HCR_EL2,
+ SYS_HCRX_EL2, CGT_HCR_NV),
+ SR_TRAP(SYS_SMPRIMAP_EL2, CGT_HCR_NV),
+ SR_TRAP(SYS_SMCR_EL2, CGT_HCR_NV),
+ SR_RANGE_TRAP(SYS_TTBR0_EL2,
+ SYS_TCR2_EL2, CGT_HCR_NV),
+ SR_TRAP(SYS_VTTBR_EL2, CGT_HCR_NV),
+ SR_TRAP(SYS_VTCR_EL2, CGT_HCR_NV),
+ SR_TRAP(SYS_VNCR_EL2, CGT_HCR_NV),
+ SR_RANGE_TRAP(SYS_HDFGRTR_EL2,
+ SYS_HAFGRTR_EL2, CGT_HCR_NV),
/* Skip the SP_EL1 encoding... */
SR_TRAP(SYS_SPSR_EL2, CGT_HCR_NV),
SR_TRAP(SYS_ELR_EL2, CGT_HCR_NV),
- SR_RANGE_TRAP(sys_reg(3, 4, 4, 1, 1),
- sys_reg(3, 4, 10, 15, 7), CGT_HCR_NV),
- SR_RANGE_TRAP(sys_reg(3, 4, 12, 0, 0),
- sys_reg(3, 4, 14, 15, 7), CGT_HCR_NV),
+ /* Skip SPSR_irq, SPSR_abt, SPSR_und, SPSR_fiq */
+ SR_TRAP(SYS_AFSR0_EL2, CGT_HCR_NV),
+ SR_TRAP(SYS_AFSR1_EL2, CGT_HCR_NV),
+ SR_TRAP(SYS_ESR_EL2, CGT_HCR_NV),
+ SR_TRAP(SYS_VSESR_EL2, CGT_HCR_NV),
+ SR_TRAP(SYS_TFSR_EL2, CGT_HCR_NV),
+ SR_TRAP(SYS_FAR_EL2, CGT_HCR_NV),
+ SR_TRAP(SYS_HPFAR_EL2, CGT_HCR_NV),
+ SR_TRAP(SYS_PMSCR_EL2, CGT_HCR_NV),
+ SR_TRAP(SYS_MAIR_EL2, CGT_HCR_NV),
+ SR_TRAP(SYS_AMAIR_EL2, CGT_HCR_NV),
+ SR_TRAP(SYS_MPAMHCR_EL2, CGT_HCR_NV),
+ SR_TRAP(SYS_MPAMVPMV_EL2, CGT_HCR_NV),
+ SR_TRAP(SYS_MPAM2_EL2, CGT_HCR_NV),
+ SR_RANGE_TRAP(SYS_MPAMVPM0_EL2,
+ SYS_MPAMVPM7_EL2, CGT_HCR_NV),
+ /*
+ * Note that the spec. describes a group of MEC registers
+ * whose access should not trap, therefore skip the following:
+ * MECID_A0_EL2, MECID_A1_EL2, MECID_P0_EL2,
+ * MECID_P1_EL2, MECIDR_EL2, VMECID_A_EL2,
+ * VMECID_P_EL2.
+ */
+ SR_RANGE_TRAP(SYS_VBAR_EL2,
+ SYS_RMR_EL2, CGT_HCR_NV),
+ SR_TRAP(SYS_VDISR_EL2, CGT_HCR_NV),
+ /* ICH_AP0R<m>_EL2 */
+ SR_RANGE_TRAP(SYS_ICH_AP0R0_EL2,
+ SYS_ICH_AP0R3_EL2, CGT_HCR_NV),
+ /* ICH_AP1R<m>_EL2 */
+ SR_RANGE_TRAP(SYS_ICH_AP1R0_EL2,
+ SYS_ICH_AP1R3_EL2, CGT_HCR_NV),
+ SR_TRAP(SYS_ICC_SRE_EL2, CGT_HCR_NV),
+ SR_RANGE_TRAP(SYS_ICH_HCR_EL2,
+ SYS_ICH_EISR_EL2, CGT_HCR_NV),
+ SR_TRAP(SYS_ICH_ELRSR_EL2, CGT_HCR_NV),
+ SR_TRAP(SYS_ICH_VMCR_EL2, CGT_HCR_NV),
+ /* ICH_LR<m>_EL2 */
+ SR_RANGE_TRAP(SYS_ICH_LR0_EL2,
+ SYS_ICH_LR15_EL2, CGT_HCR_NV),
+ SR_TRAP(SYS_CONTEXTIDR_EL2, CGT_HCR_NV),
+ SR_TRAP(SYS_TPIDR_EL2, CGT_HCR_NV),
+ SR_TRAP(SYS_SCXTNUM_EL2, CGT_HCR_NV),
+ /* AMEVCNTVOFF0<n>_EL2, AMEVCNTVOFF1<n>_EL2 */
+ SR_RANGE_TRAP(SYS_AMEVCNTVOFF0n_EL2(0),
+ SYS_AMEVCNTVOFF1n_EL2(15), CGT_HCR_NV),
+ /* CNT*_EL2 */
+ SR_TRAP(SYS_CNTVOFF_EL2, CGT_HCR_NV),
+ SR_TRAP(SYS_CNTPOFF_EL2, CGT_HCR_NV),
+ SR_TRAP(SYS_CNTHCTL_EL2, CGT_HCR_NV),
+ SR_RANGE_TRAP(SYS_CNTHP_TVAL_EL2,
+ SYS_CNTHP_CVAL_EL2, CGT_HCR_NV),
+ SR_RANGE_TRAP(SYS_CNTHV_TVAL_EL2,
+ SYS_CNTHV_CVAL_EL2, CGT_HCR_NV),
/* All _EL02, _EL12 registers */
SR_RANGE_TRAP(sys_reg(3, 5, 0, 0, 0),
sys_reg(3, 5, 10, 15, 7), CGT_HCR_NV),
diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
index 9d703441278b..8d0a5834e883 100644
--- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
+++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
@@ -129,8 +129,8 @@ static void prepare_host_vtcr(void)
parange = kvm_get_parange(id_aa64mmfr0_el1_sys_val);
phys_shift = id_aa64mmfr0_parange_to_phys_shift(parange);
- host_mmu.arch.vtcr = kvm_get_vtcr(id_aa64mmfr0_el1_sys_val,
- id_aa64mmfr1_el1_sys_val, phys_shift);
+ host_mmu.arch.mmu.vtcr = kvm_get_vtcr(id_aa64mmfr0_el1_sys_val,
+ id_aa64mmfr1_el1_sys_val, phys_shift);
}
static bool host_stage2_force_pte_cb(u64 addr, u64 end, enum kvm_pgtable_prot prot);
@@ -235,7 +235,7 @@ int kvm_guest_prepare_stage2(struct pkvm_hyp_vm *vm, void *pgd)
unsigned long nr_pages;
int ret;
- nr_pages = kvm_pgtable_stage2_pgd_size(vm->kvm.arch.vtcr) >> PAGE_SHIFT;
+ nr_pages = kvm_pgtable_stage2_pgd_size(mmu->vtcr) >> PAGE_SHIFT;
ret = hyp_pool_init(&vm->pool, hyp_virt_to_pfn(pgd), nr_pages, 0);
if (ret)
return ret;
@@ -295,7 +295,7 @@ int __pkvm_prot_finalize(void)
return -EPERM;
params->vttbr = kvm_get_vttbr(mmu);
- params->vtcr = host_mmu.arch.vtcr;
+ params->vtcr = mmu->vtcr;
params->hcr_el2 |= HCR_VM;
/*
diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c
index 8033ef353a5d..9d23a51d7f75 100644
--- a/arch/arm64/kvm/hyp/nvhe/pkvm.c
+++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c
@@ -303,7 +303,7 @@ static void init_pkvm_hyp_vm(struct kvm *host_kvm, struct pkvm_hyp_vm *hyp_vm,
{
hyp_vm->host_kvm = host_kvm;
hyp_vm->kvm.created_vcpus = nr_vcpus;
- hyp_vm->kvm.arch.vtcr = host_mmu.arch.vtcr;
+ hyp_vm->kvm.arch.mmu.vtcr = host_mmu.arch.mmu.vtcr;
}
static int init_pkvm_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu,
@@ -483,7 +483,7 @@ int __pkvm_init_vm(struct kvm *host_kvm, unsigned long vm_hva,
}
vm_size = pkvm_get_hyp_vm_size(nr_vcpus);
- pgd_size = kvm_pgtable_stage2_pgd_size(host_mmu.arch.vtcr);
+ pgd_size = kvm_pgtable_stage2_pgd_size(host_mmu.arch.mmu.vtcr);
ret = -ENOMEM;
diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index f155b8c9e98c..66b30ef88434 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -1314,7 +1314,7 @@ int kvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr,
ret = stage2_update_leaf_attrs(pgt, addr, 1, set, clr, NULL, &level,
KVM_PGTABLE_WALK_HANDLE_FAULT |
KVM_PGTABLE_WALK_SHARED);
- if (!ret)
+ if (!ret || ret == -EAGAIN)
kvm_call_hyp(__kvm_tlb_flush_vmid_ipa_nsh, pgt->mmu, addr, level);
return ret;
}
@@ -1511,7 +1511,7 @@ int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu,
kvm_pgtable_force_pte_cb_t force_pte_cb)
{
size_t pgd_sz;
- u64 vtcr = mmu->arch->vtcr;
+ u64 vtcr = mmu->vtcr;
u32 ia_bits = VTCR_EL2_IPA(vtcr);
u32 sl0 = FIELD_GET(VTCR_EL2_SL0_MASK, vtcr);
u32 start_level = VTCR_EL2_TGRAN_SL0_BASE - sl0;
diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c
index 6537f58b1a8c..b0cafd7c5f8f 100644
--- a/arch/arm64/kvm/hyp/vhe/switch.c
+++ b/arch/arm64/kvm/hyp/vhe/switch.c
@@ -93,12 +93,12 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu)
NOKPROBE_SYMBOL(__deactivate_traps);
/*
- * Disable IRQs in {activate,deactivate}_traps_vhe_{load,put}() to
+ * Disable IRQs in __vcpu_{load,put}_{activate,deactivate}_traps() to
* prevent a race condition between context switching of PMUSERENR_EL0
* in __{activate,deactivate}_traps_common() and IPIs that attempts to
* update PMUSERENR_EL0. See also kvm_set_pmuserenr().
*/
-void activate_traps_vhe_load(struct kvm_vcpu *vcpu)
+static void __vcpu_load_activate_traps(struct kvm_vcpu *vcpu)
{
unsigned long flags;
@@ -107,7 +107,7 @@ void activate_traps_vhe_load(struct kvm_vcpu *vcpu)
local_irq_restore(flags);
}
-void deactivate_traps_vhe_put(struct kvm_vcpu *vcpu)
+static void __vcpu_put_deactivate_traps(struct kvm_vcpu *vcpu)
{
unsigned long flags;
@@ -116,6 +116,19 @@ void deactivate_traps_vhe_put(struct kvm_vcpu *vcpu)
local_irq_restore(flags);
}
+void kvm_vcpu_load_vhe(struct kvm_vcpu *vcpu)
+{
+ __vcpu_load_switch_sysregs(vcpu);
+ __vcpu_load_activate_traps(vcpu);
+ __load_stage2(vcpu->arch.hw_mmu, vcpu->arch.hw_mmu->arch);
+}
+
+void kvm_vcpu_put_vhe(struct kvm_vcpu *vcpu)
+{
+ __vcpu_put_deactivate_traps(vcpu);
+ __vcpu_put_switch_sysregs(vcpu);
+}
+
static const exit_handler_fn hyp_exit_handlers[] = {
[0 ... ESR_ELx_EC_MAX] = NULL,
[ESR_ELx_EC_CP15_32] = kvm_hyp_handle_cp15_32,
@@ -170,17 +183,11 @@ static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
sysreg_save_host_state_vhe(host_ctxt);
/*
- * ARM erratum 1165522 requires us to configure both stage 1 and
- * stage 2 translation for the guest context before we clear
- * HCR_EL2.TGE.
- *
- * We have already configured the guest's stage 1 translation in
- * kvm_vcpu_load_sysregs_vhe above. We must now call
- * __load_stage2 before __activate_traps, because
- * __load_stage2 configures stage 2 translation, and
- * __activate_traps clear HCR_EL2.TGE (among other things).
+ * Note that ARM erratum 1165522 requires us to configure both stage 1
+ * and stage 2 translation for the guest context before we clear
+ * HCR_EL2.TGE. The stage 1 and stage 2 guest context has already been
+ * loaded on the CPU in kvm_vcpu_load_vhe().
*/
- __load_stage2(vcpu->arch.hw_mmu, vcpu->arch.hw_mmu->arch);
__activate_traps(vcpu);
__kvm_adjust_pc(vcpu);
diff --git a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c
index b35a178e7e0d..8e1e0d5033b6 100644
--- a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c
+++ b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c
@@ -52,7 +52,7 @@ void sysreg_restore_guest_state_vhe(struct kvm_cpu_context *ctxt)
NOKPROBE_SYMBOL(sysreg_restore_guest_state_vhe);
/**
- * kvm_vcpu_load_sysregs_vhe - Load guest system registers to the physical CPU
+ * __vcpu_load_switch_sysregs - Load guest system registers to the physical CPU
*
* @vcpu: The VCPU pointer
*
@@ -62,7 +62,7 @@ NOKPROBE_SYMBOL(sysreg_restore_guest_state_vhe);
* and loading system register state early avoids having to load them on
* every entry to the VM.
*/
-void kvm_vcpu_load_sysregs_vhe(struct kvm_vcpu *vcpu)
+void __vcpu_load_switch_sysregs(struct kvm_vcpu *vcpu)
{
struct kvm_cpu_context *guest_ctxt = &vcpu->arch.ctxt;
struct kvm_cpu_context *host_ctxt;
@@ -92,12 +92,10 @@ void kvm_vcpu_load_sysregs_vhe(struct kvm_vcpu *vcpu)
__sysreg_restore_el1_state(guest_ctxt);
vcpu_set_flag(vcpu, SYSREGS_ON_CPU);
-
- activate_traps_vhe_load(vcpu);
}
/**
- * kvm_vcpu_put_sysregs_vhe - Restore host system registers to the physical CPU
+ * __vcpu_put_switch_syregs - Restore host system registers to the physical CPU
*
* @vcpu: The VCPU pointer
*
@@ -107,13 +105,12 @@ void kvm_vcpu_load_sysregs_vhe(struct kvm_vcpu *vcpu)
* and deferring saving system register state until we're no longer running the
* VCPU avoids having to save them on every exit from the VM.
*/
-void kvm_vcpu_put_sysregs_vhe(struct kvm_vcpu *vcpu)
+void __vcpu_put_switch_sysregs(struct kvm_vcpu *vcpu)
{
struct kvm_cpu_context *guest_ctxt = &vcpu->arch.ctxt;
struct kvm_cpu_context *host_ctxt;
host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
- deactivate_traps_vhe_put(vcpu);
__sysreg_save_el1_state(guest_ctxt);
__sysreg_save_user_state(guest_ctxt);
diff --git a/arch/arm64/kvm/hyp/vhe/tlb.c b/arch/arm64/kvm/hyp/vhe/tlb.c
index 46bd43f61d76..b636b4111dbf 100644
--- a/arch/arm64/kvm/hyp/vhe/tlb.c
+++ b/arch/arm64/kvm/hyp/vhe/tlb.c
@@ -11,18 +11,25 @@
#include <asm/tlbflush.h>
struct tlb_inv_context {
- unsigned long flags;
- u64 tcr;
- u64 sctlr;
+ struct kvm_s2_mmu *mmu;
+ unsigned long flags;
+ u64 tcr;
+ u64 sctlr;
};
static void __tlb_switch_to_guest(struct kvm_s2_mmu *mmu,
struct tlb_inv_context *cxt)
{
+ struct kvm_vcpu *vcpu = kvm_get_running_vcpu();
u64 val;
local_irq_save(cxt->flags);
+ if (vcpu && mmu != vcpu->arch.hw_mmu)
+ cxt->mmu = vcpu->arch.hw_mmu;
+ else
+ cxt->mmu = NULL;
+
if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) {
/*
* For CPUs that are affected by ARM errata 1165522 or 1530923,
@@ -66,10 +73,13 @@ static void __tlb_switch_to_host(struct tlb_inv_context *cxt)
* We're done with the TLB operation, let's restore the host's
* view of HCR_EL2.
*/
- write_sysreg(0, vttbr_el2);
write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2);
isb();
+ /* ... and the stage-2 MMU context that we switched away from */
+ if (cxt->mmu)
+ __load_stage2(cxt->mmu, cxt->mmu->arch);
+
if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) {
/* Restore the registers to what they were */
write_sysreg_el1(cxt->tcr, SYS_TCR);
diff --git a/arch/arm64/kvm/hypercalls.c b/arch/arm64/kvm/hypercalls.c
index 7fb4df0456de..5763d979d8ca 100644
--- a/arch/arm64/kvm/hypercalls.c
+++ b/arch/arm64/kvm/hypercalls.c
@@ -133,12 +133,10 @@ static bool kvm_smccc_test_fw_bmap(struct kvm_vcpu *vcpu, u32 func_id)
ARM_SMCCC_SMC_64, \
0, ARM_SMCCC_FUNC_MASK)
-static void init_smccc_filter(struct kvm *kvm)
+static int kvm_smccc_filter_insert_reserved(struct kvm *kvm)
{
int r;
- mt_init(&kvm->arch.smccc_filter);
-
/*
* Prevent userspace from handling any SMCCC calls in the architecture
* range, avoiding the risk of misrepresenting Spectre mitigation status
@@ -148,14 +146,25 @@ static void init_smccc_filter(struct kvm *kvm)
SMC32_ARCH_RANGE_BEGIN, SMC32_ARCH_RANGE_END,
xa_mk_value(KVM_SMCCC_FILTER_HANDLE),
GFP_KERNEL_ACCOUNT);
- WARN_ON_ONCE(r);
+ if (r)
+ goto out_destroy;
r = mtree_insert_range(&kvm->arch.smccc_filter,
SMC64_ARCH_RANGE_BEGIN, SMC64_ARCH_RANGE_END,
xa_mk_value(KVM_SMCCC_FILTER_HANDLE),
GFP_KERNEL_ACCOUNT);
- WARN_ON_ONCE(r);
+ if (r)
+ goto out_destroy;
+ return 0;
+out_destroy:
+ mtree_destroy(&kvm->arch.smccc_filter);
+ return r;
+}
+
+static bool kvm_smccc_filter_configured(struct kvm *kvm)
+{
+ return !mtree_empty(&kvm->arch.smccc_filter);
}
static int kvm_smccc_set_filter(struct kvm *kvm, struct kvm_smccc_filter __user *uaddr)
@@ -184,13 +193,14 @@ static int kvm_smccc_set_filter(struct kvm *kvm, struct kvm_smccc_filter __user
goto out_unlock;
}
+ if (!kvm_smccc_filter_configured(kvm)) {
+ r = kvm_smccc_filter_insert_reserved(kvm);
+ if (WARN_ON_ONCE(r))
+ goto out_unlock;
+ }
+
r = mtree_insert_range(&kvm->arch.smccc_filter, start, end,
xa_mk_value(filter.action), GFP_KERNEL_ACCOUNT);
- if (r)
- goto out_unlock;
-
- set_bit(KVM_ARCH_FLAG_SMCCC_FILTER_CONFIGURED, &kvm->arch.flags);
-
out_unlock:
mutex_unlock(&kvm->arch.config_lock);
return r;
@@ -201,7 +211,7 @@ static u8 kvm_smccc_filter_get_action(struct kvm *kvm, u32 func_id)
unsigned long idx = func_id;
void *val;
- if (!test_bit(KVM_ARCH_FLAG_SMCCC_FILTER_CONFIGURED, &kvm->arch.flags))
+ if (!kvm_smccc_filter_configured(kvm))
return KVM_SMCCC_FILTER_HANDLE;
/*
@@ -387,7 +397,7 @@ void kvm_arm_init_hypercalls(struct kvm *kvm)
smccc_feat->std_hyp_bmap = KVM_ARM_SMCCC_STD_HYP_FEATURES;
smccc_feat->vendor_hyp_bmap = KVM_ARM_SMCCC_VENDOR_HYP_FEATURES;
- init_smccc_filter(kvm);
+ mt_init(&kvm->arch.smccc_filter);
}
void kvm_arm_teardown_hypercalls(struct kvm *kvm)
@@ -554,7 +564,7 @@ int kvm_arm_set_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
{
bool wants_02;
- wants_02 = test_bit(KVM_ARM_VCPU_PSCI_0_2, vcpu->arch.features);
+ wants_02 = vcpu_has_feature(vcpu, KVM_ARM_VCPU_PSCI_0_2);
switch (val) {
case KVM_ARM_PSCI_0_1:
diff --git a/arch/arm64/kvm/mmio.c b/arch/arm64/kvm/mmio.c
index 3dd38a151d2a..200c8019a82a 100644
--- a/arch/arm64/kvm/mmio.c
+++ b/arch/arm64/kvm/mmio.c
@@ -135,6 +135,9 @@ int io_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa)
* volunteered to do so, and bail out otherwise.
*/
if (!kvm_vcpu_dabt_isvalid(vcpu)) {
+ trace_kvm_mmio_nisv(*vcpu_pc(vcpu), kvm_vcpu_get_esr(vcpu),
+ kvm_vcpu_get_hfar(vcpu), fault_ipa);
+
if (test_bit(KVM_ARCH_FLAG_RETURN_NISV_IO_ABORT_TO_USER,
&vcpu->kvm->arch.flags)) {
run->exit_reason = KVM_EXIT_ARM_NISV;
@@ -143,7 +146,6 @@ int io_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa)
return 0;
}
- kvm_pr_unimpl("Data abort outside memslots with no valid syndrome info\n");
return -ENOSYS;
}
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index 482280fe22d7..4e41ceed5468 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -892,7 +892,7 @@ int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu, unsigned long t
mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
mmfr1 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1);
- kvm->arch.vtcr = kvm_get_vtcr(mmfr0, mmfr1, phys_shift);
+ mmu->vtcr = kvm_get_vtcr(mmfr0, mmfr1, phys_shift);
if (mmu->pgt != NULL) {
kvm_err("kvm_arch already initialized?\n");
@@ -1067,7 +1067,8 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
phys_addr_t addr;
int ret = 0;
struct kvm_mmu_memory_cache cache = { .gfp_zero = __GFP_ZERO };
- struct kvm_pgtable *pgt = kvm->arch.mmu.pgt;
+ struct kvm_s2_mmu *mmu = &kvm->arch.mmu;
+ struct kvm_pgtable *pgt = mmu->pgt;
enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_DEVICE |
KVM_PGTABLE_PROT_R |
(writable ? KVM_PGTABLE_PROT_W : 0);
@@ -1080,7 +1081,7 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
for (addr = guest_ipa; addr < guest_ipa + size; addr += PAGE_SIZE) {
ret = kvm_mmu_topup_memory_cache(&cache,
- kvm_mmu_cache_min_pages(kvm));
+ kvm_mmu_cache_min_pages(mmu));
if (ret)
break;
@@ -1298,28 +1299,8 @@ transparent_hugepage_adjust(struct kvm *kvm, struct kvm_memory_slot *memslot,
if (sz < PMD_SIZE)
return PAGE_SIZE;
- /*
- * The address we faulted on is backed by a transparent huge
- * page. However, because we map the compound huge page and
- * not the individual tail page, we need to transfer the
- * refcount to the head page. We have to be careful that the
- * THP doesn't start to split while we are adjusting the
- * refcounts.
- *
- * We are sure this doesn't happen, because mmu_invalidate_retry
- * was successful and we are holding the mmu_lock, so if this
- * THP is trying to split, it will be blocked in the mmu
- * notifier before touching any of the pages, specifically
- * before being able to call __split_huge_page_refcount().
- *
- * We can therefore safely transfer the refcount from PG_tail
- * to PG_head and switch the pfn from a tail page to the head
- * page accordingly.
- */
*ipap &= PMD_MASK;
- kvm_release_pfn_clean(pfn);
pfn &= ~(PTRS_PER_PMD - 1);
- get_page(pfn_to_page(pfn));
*pfnp = pfn;
return PMD_SIZE;
@@ -1431,7 +1412,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
if (fault_status != ESR_ELx_FSC_PERM ||
(logging_active && write_fault)) {
ret = kvm_mmu_topup_memory_cache(memcache,
- kvm_mmu_cache_min_pages(kvm));
+ kvm_mmu_cache_min_pages(vcpu->arch.hw_mmu));
if (ret)
return ret;
}
@@ -1747,7 +1728,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
}
/* Userspace should not be able to register out-of-bounds IPAs */
- VM_BUG_ON(fault_ipa >= kvm_phys_size(vcpu->kvm));
+ VM_BUG_ON(fault_ipa >= kvm_phys_size(vcpu->arch.hw_mmu));
if (fault_status == ESR_ELx_FSC_ACCESS) {
handle_access_fault(vcpu, fault_ipa);
@@ -2021,7 +2002,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
* Prevent userspace from creating a memory region outside of the IPA
* space addressable by the KVM guest IPA space.
*/
- if ((new->base_gfn + new->npages) > (kvm_phys_size(kvm) >> PAGE_SHIFT))
+ if ((new->base_gfn + new->npages) > (kvm_phys_size(&kvm->arch.mmu) >> PAGE_SHIFT))
return -EFAULT;
hva = new->userspace_addr;
diff --git a/arch/arm64/kvm/pkvm.c b/arch/arm64/kvm/pkvm.c
index 6ff3ec18c925..8350fb8fee0b 100644
--- a/arch/arm64/kvm/pkvm.c
+++ b/arch/arm64/kvm/pkvm.c
@@ -123,7 +123,7 @@ static int __pkvm_create_hyp_vm(struct kvm *host_kvm)
if (host_kvm->created_vcpus < 1)
return -EINVAL;
- pgd_sz = kvm_pgtable_stage2_pgd_size(host_kvm->arch.vtcr);
+ pgd_sz = kvm_pgtable_stage2_pgd_size(host_kvm->arch.mmu.vtcr);
/*
* The PGD pages will be reclaimed using a hyp_memcache which implies
diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c
index 6b066e04dc5d..845993bab9d2 100644
--- a/arch/arm64/kvm/pmu-emul.c
+++ b/arch/arm64/kvm/pmu-emul.c
@@ -60,6 +60,23 @@ static u32 kvm_pmu_event_mask(struct kvm *kvm)
return __kvm_pmu_event_mask(pmuver);
}
+u64 kvm_pmu_evtyper_mask(struct kvm *kvm)
+{
+ u64 mask = ARMV8_PMU_EXCLUDE_EL1 | ARMV8_PMU_EXCLUDE_EL0 |
+ kvm_pmu_event_mask(kvm);
+ u64 pfr0 = IDREG(kvm, SYS_ID_AA64PFR0_EL1);
+
+ if (SYS_FIELD_GET(ID_AA64PFR0_EL1, EL2, pfr0))
+ mask |= ARMV8_PMU_INCLUDE_EL2;
+
+ if (SYS_FIELD_GET(ID_AA64PFR0_EL1, EL3, pfr0))
+ mask |= ARMV8_PMU_EXCLUDE_NS_EL0 |
+ ARMV8_PMU_EXCLUDE_NS_EL1 |
+ ARMV8_PMU_EXCLUDE_EL3;
+
+ return mask;
+}
+
/**
* kvm_pmc_is_64bit - determine if counter is 64bit
* @pmc: counter context
@@ -348,7 +365,7 @@ static void kvm_pmu_update_state(struct kvm_vcpu *vcpu)
pmu->irq_level = overflow;
if (likely(irqchip_in_kernel(vcpu->kvm))) {
- int ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id,
+ int ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu,
pmu->irq_num, overflow, pmu);
WARN_ON(ret);
}
@@ -584,6 +601,7 @@ static void kvm_pmu_create_perf_event(struct kvm_pmc *pmc)
struct perf_event *event;
struct perf_event_attr attr;
u64 eventsel, reg, data;
+ bool p, u, nsk, nsu;
reg = counter_index_to_evtreg(pmc->idx);
data = __vcpu_sys_reg(vcpu, reg);
@@ -610,13 +628,18 @@ static void kvm_pmu_create_perf_event(struct kvm_pmc *pmc)
!test_bit(eventsel, vcpu->kvm->arch.pmu_filter))
return;
+ p = data & ARMV8_PMU_EXCLUDE_EL1;
+ u = data & ARMV8_PMU_EXCLUDE_EL0;
+ nsk = data & ARMV8_PMU_EXCLUDE_NS_EL1;
+ nsu = data & ARMV8_PMU_EXCLUDE_NS_EL0;
+
memset(&attr, 0, sizeof(struct perf_event_attr));
attr.type = arm_pmu->pmu.type;
attr.size = sizeof(attr);
attr.pinned = 1;
attr.disabled = !kvm_pmu_counter_is_enabled(pmc);
- attr.exclude_user = data & ARMV8_PMU_EXCLUDE_EL0 ? 1 : 0;
- attr.exclude_kernel = data & ARMV8_PMU_EXCLUDE_EL1 ? 1 : 0;
+ attr.exclude_user = (u != nsu);
+ attr.exclude_kernel = (p != nsk);
attr.exclude_hv = 1; /* Don't count EL2 events */
attr.exclude_host = 1; /* Don't count host events */
attr.config = eventsel;
@@ -657,18 +680,13 @@ void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data,
u64 select_idx)
{
struct kvm_pmc *pmc = kvm_vcpu_idx_to_pmc(vcpu, select_idx);
- u64 reg, mask;
+ u64 reg;
if (!kvm_vcpu_has_pmu(vcpu))
return;
- mask = ARMV8_PMU_EVTYPE_MASK;
- mask &= ~ARMV8_PMU_EVTYPE_EVENT;
- mask |= kvm_pmu_event_mask(vcpu->kvm);
-
reg = counter_index_to_evtreg(pmc->idx);
-
- __vcpu_sys_reg(vcpu, reg) = data & mask;
+ __vcpu_sys_reg(vcpu, reg) = data & kvm_pmu_evtyper_mask(vcpu->kvm);
kvm_pmu_create_perf_event(pmc);
}
diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
index 7a65a35ee4ac..5bb4de162cab 100644
--- a/arch/arm64/kvm/reset.c
+++ b/arch/arm64/kvm/reset.c
@@ -73,11 +73,8 @@ int __init kvm_arm_init_sve(void)
return 0;
}
-static int kvm_vcpu_enable_sve(struct kvm_vcpu *vcpu)
+static void kvm_vcpu_enable_sve(struct kvm_vcpu *vcpu)
{
- if (!system_supports_sve())
- return -EINVAL;
-
vcpu->arch.sve_max_vl = kvm_sve_max_vl;
/*
@@ -86,8 +83,6 @@ static int kvm_vcpu_enable_sve(struct kvm_vcpu *vcpu)
* kvm_arm_vcpu_finalize(), which freezes the configuration.
*/
vcpu_set_flag(vcpu, GUEST_HAS_SVE);
-
- return 0;
}
/*
@@ -170,20 +165,9 @@ static void kvm_vcpu_reset_sve(struct kvm_vcpu *vcpu)
memset(vcpu->arch.sve_state, 0, vcpu_sve_state_size(vcpu));
}
-static int kvm_vcpu_enable_ptrauth(struct kvm_vcpu *vcpu)
+static void kvm_vcpu_enable_ptrauth(struct kvm_vcpu *vcpu)
{
- /*
- * For now make sure that both address/generic pointer authentication
- * features are requested by the userspace together and the system
- * supports these capabilities.
- */
- if (!test_bit(KVM_ARM_VCPU_PTRAUTH_ADDRESS, vcpu->arch.features) ||
- !test_bit(KVM_ARM_VCPU_PTRAUTH_GENERIC, vcpu->arch.features) ||
- !system_has_full_ptr_auth())
- return -EINVAL;
-
vcpu_set_flag(vcpu, GUEST_HAS_PTRAUTH);
- return 0;
}
/**
@@ -204,10 +188,9 @@ static int kvm_vcpu_enable_ptrauth(struct kvm_vcpu *vcpu)
* disable preemption around the vcpu reset as we would otherwise race with
* preempt notifiers which also call put/load.
*/
-int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
+void kvm_reset_vcpu(struct kvm_vcpu *vcpu)
{
struct vcpu_reset_state reset_state;
- int ret;
bool loaded;
u32 pstate;
@@ -224,29 +207,16 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
if (loaded)
kvm_arch_vcpu_put(vcpu);
- /* Disallow NV+SVE for the time being */
- if (vcpu_has_nv(vcpu) && vcpu_has_feature(vcpu, KVM_ARM_VCPU_SVE)) {
- ret = -EINVAL;
- goto out;
- }
-
if (!kvm_arm_vcpu_sve_finalized(vcpu)) {
- if (test_bit(KVM_ARM_VCPU_SVE, vcpu->arch.features)) {
- ret = kvm_vcpu_enable_sve(vcpu);
- if (ret)
- goto out;
- }
+ if (vcpu_has_feature(vcpu, KVM_ARM_VCPU_SVE))
+ kvm_vcpu_enable_sve(vcpu);
} else {
kvm_vcpu_reset_sve(vcpu);
}
- if (test_bit(KVM_ARM_VCPU_PTRAUTH_ADDRESS, vcpu->arch.features) ||
- test_bit(KVM_ARM_VCPU_PTRAUTH_GENERIC, vcpu->arch.features)) {
- if (kvm_vcpu_enable_ptrauth(vcpu)) {
- ret = -EINVAL;
- goto out;
- }
- }
+ if (vcpu_has_feature(vcpu, KVM_ARM_VCPU_PTRAUTH_ADDRESS) ||
+ vcpu_has_feature(vcpu, KVM_ARM_VCPU_PTRAUTH_GENERIC))
+ kvm_vcpu_enable_ptrauth(vcpu);
if (vcpu_el1_is_32bit(vcpu))
pstate = VCPU_RESET_PSTATE_SVC;
@@ -255,11 +225,6 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
else
pstate = VCPU_RESET_PSTATE_EL1;
- if (kvm_vcpu_has_pmu(vcpu) && !kvm_arm_support_pmu_v3()) {
- ret = -EINVAL;
- goto out;
- }
-
/* Reset core registers */
memset(vcpu_gp_regs(vcpu), 0, sizeof(*vcpu_gp_regs(vcpu)));
memset(&vcpu->arch.ctxt.fp_regs, 0, sizeof(vcpu->arch.ctxt.fp_regs));
@@ -294,12 +259,11 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
}
/* Reset timer */
- ret = kvm_timer_vcpu_reset(vcpu);
-out:
+ kvm_timer_vcpu_reset(vcpu);
+
if (loaded)
kvm_arch_vcpu_load(vcpu, smp_processor_id());
preempt_enable();
- return ret;
}
u32 get_kvm_ipa_limit(void)
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 57c8190d5438..2c4923fce573 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -746,8 +746,12 @@ static u64 reset_pmevcntr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
static u64 reset_pmevtyper(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
{
+ /* This thing will UNDEF, who cares about the reset value? */
+ if (!kvm_vcpu_has_pmu(vcpu))
+ return 0;
+
reset_unknown(vcpu, r);
- __vcpu_sys_reg(vcpu, r->reg) &= ARMV8_PMU_EVTYPE_MASK;
+ __vcpu_sys_reg(vcpu, r->reg) &= kvm_pmu_evtyper_mask(vcpu->kvm);
return __vcpu_sys_reg(vcpu, r->reg);
}
@@ -988,7 +992,7 @@ static bool access_pmu_evtyper(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
kvm_pmu_set_counter_event_type(vcpu, p->regval, idx);
kvm_vcpu_pmu_restore_guest(vcpu);
} else {
- p->regval = __vcpu_sys_reg(vcpu, reg) & ARMV8_PMU_EVTYPE_MASK;
+ p->regval = __vcpu_sys_reg(vcpu, reg);
}
return true;
@@ -1234,7 +1238,7 @@ static s64 kvm_arm64_ftr_safe_value(u32 id, const struct arm64_ftr_bits *ftrp,
return arm64_ftr_safe_value(&kvm_ftr, new, cur);
}
-/**
+/*
* arm64_check_features() - Check if a feature register value constitutes
* a subset of features indicated by the idreg's KVM sanitised limit.
*
@@ -1825,8 +1829,8 @@ static unsigned int el2_visibility(const struct kvm_vcpu *vcpu,
* HCR_EL2.E2H==1, and only in the sysreg table for convenience of
* handling traps. Given that, they are always hidden from userspace.
*/
-static unsigned int elx2_visibility(const struct kvm_vcpu *vcpu,
- const struct sys_reg_desc *rd)
+static unsigned int hidden_user_visibility(const struct kvm_vcpu *vcpu,
+ const struct sys_reg_desc *rd)
{
return REG_HIDDEN_USER;
}
@@ -1837,7 +1841,7 @@ static unsigned int elx2_visibility(const struct kvm_vcpu *vcpu,
.reset = rst, \
.reg = name##_EL1, \
.val = v, \
- .visibility = elx2_visibility, \
+ .visibility = hidden_user_visibility, \
}
/*
@@ -2003,7 +2007,7 @@ static const struct sys_reg_desc sys_reg_descs[] = {
// DBGDTR[TR]X_EL0 share the same encoding
{ SYS_DESC(SYS_DBGDTRTX_EL0), trap_raz_wi },
- { SYS_DESC(SYS_DBGVCR32_EL2), NULL, reset_val, DBGVCR32_EL2, 0 },
+ { SYS_DESC(SYS_DBGVCR32_EL2), trap_undef, reset_val, DBGVCR32_EL2, 0 },
{ SYS_DESC(SYS_MPIDR_EL1), NULL, reset_mpidr, MPIDR_EL1 },
@@ -2450,18 +2454,28 @@ static const struct sys_reg_desc sys_reg_descs[] = {
EL2_REG(VTTBR_EL2, access_rw, reset_val, 0),
EL2_REG(VTCR_EL2, access_rw, reset_val, 0),
- { SYS_DESC(SYS_DACR32_EL2), NULL, reset_unknown, DACR32_EL2 },
+ { SYS_DESC(SYS_DACR32_EL2), trap_undef, reset_unknown, DACR32_EL2 },
EL2_REG(HDFGRTR_EL2, access_rw, reset_val, 0),
EL2_REG(HDFGWTR_EL2, access_rw, reset_val, 0),
EL2_REG(SPSR_EL2, access_rw, reset_val, 0),
EL2_REG(ELR_EL2, access_rw, reset_val, 0),
{ SYS_DESC(SYS_SP_EL1), access_sp_el1},
- { SYS_DESC(SYS_IFSR32_EL2), NULL, reset_unknown, IFSR32_EL2 },
+ /* AArch32 SPSR_* are RES0 if trapped from a NV guest */
+ { SYS_DESC(SYS_SPSR_irq), .access = trap_raz_wi,
+ .visibility = hidden_user_visibility },
+ { SYS_DESC(SYS_SPSR_abt), .access = trap_raz_wi,
+ .visibility = hidden_user_visibility },
+ { SYS_DESC(SYS_SPSR_und), .access = trap_raz_wi,
+ .visibility = hidden_user_visibility },
+ { SYS_DESC(SYS_SPSR_fiq), .access = trap_raz_wi,
+ .visibility = hidden_user_visibility },
+
+ { SYS_DESC(SYS_IFSR32_EL2), trap_undef, reset_unknown, IFSR32_EL2 },
EL2_REG(AFSR0_EL2, access_rw, reset_val, 0),
EL2_REG(AFSR1_EL2, access_rw, reset_val, 0),
EL2_REG(ESR_EL2, access_rw, reset_val, 0),
- { SYS_DESC(SYS_FPEXC32_EL2), NULL, reset_val, FPEXC32_EL2, 0x700 },
+ { SYS_DESC(SYS_FPEXC32_EL2), trap_undef, reset_val, FPEXC32_EL2, 0x700 },
EL2_REG(FAR_EL2, access_rw, reset_val, 0),
EL2_REG(HPFAR_EL2, access_rw, reset_val, 0),
diff --git a/arch/arm64/kvm/trace_arm.h b/arch/arm64/kvm/trace_arm.h
index 8ad53104934d..c18c1a95831e 100644
--- a/arch/arm64/kvm/trace_arm.h
+++ b/arch/arm64/kvm/trace_arm.h
@@ -136,6 +136,31 @@ TRACE_EVENT(kvm_mmio_emulate,
__entry->vcpu_pc, __entry->instr, __entry->cpsr)
);
+TRACE_EVENT(kvm_mmio_nisv,
+ TP_PROTO(unsigned long vcpu_pc, unsigned long esr,
+ unsigned long far, unsigned long ipa),
+ TP_ARGS(vcpu_pc, esr, far, ipa),
+
+ TP_STRUCT__entry(
+ __field( unsigned long, vcpu_pc )
+ __field( unsigned long, esr )
+ __field( unsigned long, far )
+ __field( unsigned long, ipa )
+ ),
+
+ TP_fast_assign(
+ __entry->vcpu_pc = vcpu_pc;
+ __entry->esr = esr;
+ __entry->far = far;
+ __entry->ipa = ipa;
+ ),
+
+ TP_printk("ipa %#016lx, esr %#016lx, far %#016lx, pc %#016lx",
+ __entry->ipa, __entry->esr,
+ __entry->far, __entry->vcpu_pc)
+);
+
+
TRACE_EVENT(kvm_set_way_flush,
TP_PROTO(unsigned long vcpu_pc, bool cache),
TP_ARGS(vcpu_pc, cache),
diff --git a/arch/arm64/kvm/vgic/vgic-debug.c b/arch/arm64/kvm/vgic/vgic-debug.c
index 07aa0437125a..85606a531dc3 100644
--- a/arch/arm64/kvm/vgic/vgic-debug.c
+++ b/arch/arm64/kvm/vgic/vgic-debug.c
@@ -166,7 +166,7 @@ static void print_header(struct seq_file *s, struct vgic_irq *irq,
if (vcpu) {
hdr = "VCPU";
- id = vcpu->vcpu_id;
+ id = vcpu->vcpu_idx;
}
seq_printf(s, "\n");
@@ -212,7 +212,7 @@ static void print_irq_state(struct seq_file *s, struct vgic_irq *irq,
" %2d "
"\n",
type, irq->intid,
- (irq->target_vcpu) ? irq->target_vcpu->vcpu_id : -1,
+ (irq->target_vcpu) ? irq->target_vcpu->vcpu_idx : -1,
pending,
irq->line_level,
irq->active,
@@ -224,7 +224,7 @@ static void print_irq_state(struct seq_file *s, struct vgic_irq *irq,
irq->mpidr,
irq->source,
irq->priority,
- (irq->vcpu) ? irq->vcpu->vcpu_id : -1);
+ (irq->vcpu) ? irq->vcpu->vcpu_idx : -1);
}
static int vgic_debug_show(struct seq_file *s, void *v)
diff --git a/arch/arm64/kvm/vgic/vgic-irqfd.c b/arch/arm64/kvm/vgic/vgic-irqfd.c
index 475059bacedf..8c711deb25aa 100644
--- a/arch/arm64/kvm/vgic/vgic-irqfd.c
+++ b/arch/arm64/kvm/vgic/vgic-irqfd.c
@@ -23,7 +23,7 @@ static int vgic_irqfd_set_irq(struct kvm_kernel_irq_routing_entry *e,
if (!vgic_valid_spi(kvm, spi_id))
return -EINVAL;
- return kvm_vgic_inject_irq(kvm, 0, spi_id, level, NULL);
+ return kvm_vgic_inject_irq(kvm, NULL, spi_id, level, NULL);
}
/**
diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c
index 5fe2365a629f..2dad2d095160 100644
--- a/arch/arm64/kvm/vgic/vgic-its.c
+++ b/arch/arm64/kvm/vgic/vgic-its.c
@@ -378,6 +378,12 @@ static int update_affinity(struct vgic_irq *irq, struct kvm_vcpu *vcpu)
return ret;
}
+static struct kvm_vcpu *collection_to_vcpu(struct kvm *kvm,
+ struct its_collection *col)
+{
+ return kvm_get_vcpu_by_id(kvm, col->target_addr);
+}
+
/*
* Promotes the ITS view of affinity of an ITTE (which redistributor this LPI
* is targeting) to the VGIC's view, which deals with target VCPUs.
@@ -391,7 +397,7 @@ static void update_affinity_ite(struct kvm *kvm, struct its_ite *ite)
if (!its_is_collection_mapped(ite->collection))
return;
- vcpu = kvm_get_vcpu(kvm, ite->collection->target_addr);
+ vcpu = collection_to_vcpu(kvm, ite->collection);
update_affinity(ite->irq, vcpu);
}
@@ -679,7 +685,7 @@ int vgic_its_resolve_lpi(struct kvm *kvm, struct vgic_its *its,
if (!ite || !its_is_collection_mapped(ite->collection))
return E_ITS_INT_UNMAPPED_INTERRUPT;
- vcpu = kvm_get_vcpu(kvm, ite->collection->target_addr);
+ vcpu = collection_to_vcpu(kvm, ite->collection);
if (!vcpu)
return E_ITS_INT_UNMAPPED_INTERRUPT;
@@ -887,7 +893,7 @@ static int vgic_its_cmd_handle_movi(struct kvm *kvm, struct vgic_its *its,
return E_ITS_MOVI_UNMAPPED_COLLECTION;
ite->collection = collection;
- vcpu = kvm_get_vcpu(kvm, collection->target_addr);
+ vcpu = collection_to_vcpu(kvm, collection);
vgic_its_invalidate_cache(kvm);
@@ -1121,7 +1127,7 @@ static int vgic_its_cmd_handle_mapi(struct kvm *kvm, struct vgic_its *its,
}
if (its_is_collection_mapped(collection))
- vcpu = kvm_get_vcpu(kvm, collection->target_addr);
+ vcpu = collection_to_vcpu(kvm, collection);
irq = vgic_add_lpi(kvm, lpi_nr, vcpu);
if (IS_ERR(irq)) {
@@ -1242,21 +1248,22 @@ static int vgic_its_cmd_handle_mapc(struct kvm *kvm, struct vgic_its *its,
u64 *its_cmd)
{
u16 coll_id;
- u32 target_addr;
struct its_collection *collection;
bool valid;
valid = its_cmd_get_validbit(its_cmd);
coll_id = its_cmd_get_collection(its_cmd);
- target_addr = its_cmd_get_target_addr(its_cmd);
-
- if (target_addr >= atomic_read(&kvm->online_vcpus))
- return E_ITS_MAPC_PROCNUM_OOR;
if (!valid) {
vgic_its_free_collection(its, coll_id);
vgic_its_invalidate_cache(kvm);
} else {
+ struct kvm_vcpu *vcpu;
+
+ vcpu = kvm_get_vcpu_by_id(kvm, its_cmd_get_target_addr(its_cmd));
+ if (!vcpu)
+ return E_ITS_MAPC_PROCNUM_OOR;
+
collection = find_collection(its, coll_id);
if (!collection) {
@@ -1270,9 +1277,9 @@ static int vgic_its_cmd_handle_mapc(struct kvm *kvm, struct vgic_its *its,
coll_id);
if (ret)
return ret;
- collection->target_addr = target_addr;
+ collection->target_addr = vcpu->vcpu_id;
} else {
- collection->target_addr = target_addr;
+ collection->target_addr = vcpu->vcpu_id;
update_affinity_collection(kvm, its, collection);
}
}
@@ -1382,7 +1389,7 @@ static int vgic_its_cmd_handle_invall(struct kvm *kvm, struct vgic_its *its,
if (!its_is_collection_mapped(collection))
return E_ITS_INVALL_UNMAPPED_COLLECTION;
- vcpu = kvm_get_vcpu(kvm, collection->target_addr);
+ vcpu = collection_to_vcpu(kvm, collection);
vgic_its_invall(vcpu);
return 0;
@@ -1399,23 +1406,21 @@ static int vgic_its_cmd_handle_invall(struct kvm *kvm, struct vgic_its *its,
static int vgic_its_cmd_handle_movall(struct kvm *kvm, struct vgic_its *its,
u64 *its_cmd)
{
- u32 target1_addr = its_cmd_get_target_addr(its_cmd);
- u32 target2_addr = its_cmd_mask_field(its_cmd, 3, 16, 32);
struct kvm_vcpu *vcpu1, *vcpu2;
struct vgic_irq *irq;
u32 *intids;
int irq_count, i;
- if (target1_addr >= atomic_read(&kvm->online_vcpus) ||
- target2_addr >= atomic_read(&kvm->online_vcpus))
+ /* We advertise GITS_TYPER.PTA==0, making the address the vcpu ID */
+ vcpu1 = kvm_get_vcpu_by_id(kvm, its_cmd_get_target_addr(its_cmd));
+ vcpu2 = kvm_get_vcpu_by_id(kvm, its_cmd_mask_field(its_cmd, 3, 16, 32));
+
+ if (!vcpu1 || !vcpu2)
return E_ITS_MOVALL_PROCNUM_OOR;
- if (target1_addr == target2_addr)
+ if (vcpu1 == vcpu2)
return 0;
- vcpu1 = kvm_get_vcpu(kvm, target1_addr);
- vcpu2 = kvm_get_vcpu(kvm, target2_addr);
-
irq_count = vgic_copy_lpi_list(kvm, vcpu1, &intids);
if (irq_count < 0)
return irq_count;
@@ -2258,7 +2263,7 @@ static int vgic_its_restore_ite(struct vgic_its *its, u32 event_id,
return PTR_ERR(ite);
if (its_is_collection_mapped(collection))
- vcpu = kvm_get_vcpu(kvm, collection->target_addr);
+ vcpu = kvm_get_vcpu_by_id(kvm, collection->target_addr);
irq = vgic_add_lpi(kvm, lpi_id, vcpu);
if (IS_ERR(irq)) {
@@ -2573,7 +2578,7 @@ static int vgic_its_restore_cte(struct vgic_its *its, gpa_t gpa, int esz)
coll_id = val & KVM_ITS_CTE_ICID_MASK;
if (target_addr != COLLECTION_NOT_MAPPED &&
- target_addr >= atomic_read(&kvm->online_vcpus))
+ !kvm_get_vcpu_by_id(kvm, target_addr))
return -EINVAL;
collection = find_collection(its, coll_id);
diff --git a/arch/arm64/kvm/vgic/vgic-kvm-device.c b/arch/arm64/kvm/vgic/vgic-kvm-device.c
index 212b73a715c1..f48b8dab8b3d 100644
--- a/arch/arm64/kvm/vgic/vgic-kvm-device.c
+++ b/arch/arm64/kvm/vgic/vgic-kvm-device.c
@@ -27,7 +27,8 @@ int vgic_check_iorange(struct kvm *kvm, phys_addr_t ioaddr,
if (addr + size < addr)
return -EINVAL;
- if (addr & ~kvm_phys_mask(kvm) || addr + size > kvm_phys_size(kvm))
+ if (addr & ~kvm_phys_mask(&kvm->arch.mmu) ||
+ (addr + size) > kvm_phys_size(&kvm->arch.mmu))
return -E2BIG;
return 0;
@@ -339,13 +340,9 @@ int vgic_v2_parse_attr(struct kvm_device *dev, struct kvm_device_attr *attr,
{
int cpuid;
- cpuid = (attr->attr & KVM_DEV_ARM_VGIC_CPUID_MASK) >>
- KVM_DEV_ARM_VGIC_CPUID_SHIFT;
+ cpuid = FIELD_GET(KVM_DEV_ARM_VGIC_CPUID_MASK, attr->attr);
- if (cpuid >= atomic_read(&dev->kvm->online_vcpus))
- return -EINVAL;
-
- reg_attr->vcpu = kvm_get_vcpu(dev->kvm, cpuid);
+ reg_attr->vcpu = kvm_get_vcpu_by_id(dev->kvm, cpuid);
reg_attr->addr = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK;
return 0;
diff --git a/arch/arm64/kvm/vgic/vgic-mmio-v3.c b/arch/arm64/kvm/vgic/vgic-mmio-v3.c
index 188d2187eede..89117ba2528a 100644
--- a/arch/arm64/kvm/vgic/vgic-mmio-v3.c
+++ b/arch/arm64/kvm/vgic/vgic-mmio-v3.c
@@ -1013,35 +1013,6 @@ int vgic_v3_has_attr_regs(struct kvm_device *dev, struct kvm_device_attr *attr)
return 0;
}
-/*
- * Compare a given affinity (level 1-3 and a level 0 mask, from the SGI
- * generation register ICC_SGI1R_EL1) with a given VCPU.
- * If the VCPU's MPIDR matches, return the level0 affinity, otherwise
- * return -1.
- */
-static int match_mpidr(u64 sgi_aff, u16 sgi_cpu_mask, struct kvm_vcpu *vcpu)
-{
- unsigned long affinity;
- int level0;
-
- /*
- * Split the current VCPU's MPIDR into affinity level 0 and the
- * rest as this is what we have to compare against.
- */
- affinity = kvm_vcpu_get_mpidr_aff(vcpu);
- level0 = MPIDR_AFFINITY_LEVEL(affinity, 0);
- affinity &= ~MPIDR_LEVEL_MASK;
-
- /* bail out if the upper three levels don't match */
- if (sgi_aff != affinity)
- return -1;
-
- /* Is this VCPU's bit set in the mask ? */
- if (!(sgi_cpu_mask & BIT(level0)))
- return -1;
-
- return level0;
-}
/*
* The ICC_SGI* registers encode the affinity differently from the MPIDR,
@@ -1052,6 +1023,38 @@ static int match_mpidr(u64 sgi_aff, u16 sgi_cpu_mask, struct kvm_vcpu *vcpu)
((((reg) & ICC_SGI1R_AFFINITY_## level ##_MASK) \
>> ICC_SGI1R_AFFINITY_## level ##_SHIFT) << MPIDR_LEVEL_SHIFT(level))
+static void vgic_v3_queue_sgi(struct kvm_vcpu *vcpu, u32 sgi, bool allow_group1)
+{
+ struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, sgi);
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&irq->irq_lock, flags);
+
+ /*
+ * An access targeting Group0 SGIs can only generate
+ * those, while an access targeting Group1 SGIs can
+ * generate interrupts of either group.
+ */
+ if (!irq->group || allow_group1) {
+ if (!irq->hw) {
+ irq->pending_latch = true;
+ vgic_queue_irq_unlock(vcpu->kvm, irq, flags);
+ } else {
+ /* HW SGI? Ask the GIC to inject it */
+ int err;
+ err = irq_set_irqchip_state(irq->host_irq,
+ IRQCHIP_STATE_PENDING,
+ true);
+ WARN_RATELIMIT(err, "IRQ %d", irq->host_irq);
+ raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
+ }
+ } else {
+ raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
+ }
+
+ vgic_put_irq(vcpu->kvm, irq);
+}
+
/**
* vgic_v3_dispatch_sgi - handle SGI requests from VCPUs
* @vcpu: The VCPU requesting a SGI
@@ -1062,83 +1065,46 @@ static int match_mpidr(u64 sgi_aff, u16 sgi_cpu_mask, struct kvm_vcpu *vcpu)
* This will trap in sys_regs.c and call this function.
* This ICC_SGI1R_EL1 register contains the upper three affinity levels of the
* target processors as well as a bitmask of 16 Aff0 CPUs.
- * If the interrupt routing mode bit is not set, we iterate over all VCPUs to
- * check for matching ones. If this bit is set, we signal all, but not the
- * calling VCPU.
+ *
+ * If the interrupt routing mode bit is not set, we iterate over the Aff0
+ * bits and signal the VCPUs matching the provided Aff{3,2,1}.
+ *
+ * If this bit is set, we signal all, but not the calling VCPU.
*/
void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg, bool allow_group1)
{
struct kvm *kvm = vcpu->kvm;
struct kvm_vcpu *c_vcpu;
- u16 target_cpus;
+ unsigned long target_cpus;
u64 mpidr;
- int sgi;
- int vcpu_id = vcpu->vcpu_id;
- bool broadcast;
- unsigned long c, flags;
-
- sgi = (reg & ICC_SGI1R_SGI_ID_MASK) >> ICC_SGI1R_SGI_ID_SHIFT;
- broadcast = reg & BIT_ULL(ICC_SGI1R_IRQ_ROUTING_MODE_BIT);
- target_cpus = (reg & ICC_SGI1R_TARGET_LIST_MASK) >> ICC_SGI1R_TARGET_LIST_SHIFT;
- mpidr = SGI_AFFINITY_LEVEL(reg, 3);
- mpidr |= SGI_AFFINITY_LEVEL(reg, 2);
- mpidr |= SGI_AFFINITY_LEVEL(reg, 1);
-
- /*
- * We iterate over all VCPUs to find the MPIDRs matching the request.
- * If we have handled one CPU, we clear its bit to detect early
- * if we are already finished. This avoids iterating through all
- * VCPUs when most of the times we just signal a single VCPU.
- */
- kvm_for_each_vcpu(c, c_vcpu, kvm) {
- struct vgic_irq *irq;
-
- /* Exit early if we have dealt with all requested CPUs */
- if (!broadcast && target_cpus == 0)
- break;
-
- /* Don't signal the calling VCPU */
- if (broadcast && c == vcpu_id)
- continue;
+ u32 sgi, aff0;
+ unsigned long c;
- if (!broadcast) {
- int level0;
+ sgi = FIELD_GET(ICC_SGI1R_SGI_ID_MASK, reg);
- level0 = match_mpidr(mpidr, target_cpus, c_vcpu);
- if (level0 == -1)
+ /* Broadcast */
+ if (unlikely(reg & BIT_ULL(ICC_SGI1R_IRQ_ROUTING_MODE_BIT))) {
+ kvm_for_each_vcpu(c, c_vcpu, kvm) {
+ /* Don't signal the calling VCPU */
+ if (c_vcpu == vcpu)
continue;
- /* remove this matching VCPU from the mask */
- target_cpus &= ~BIT(level0);
+ vgic_v3_queue_sgi(c_vcpu, sgi, allow_group1);
}
- irq = vgic_get_irq(vcpu->kvm, c_vcpu, sgi);
-
- raw_spin_lock_irqsave(&irq->irq_lock, flags);
+ return;
+ }
- /*
- * An access targeting Group0 SGIs can only generate
- * those, while an access targeting Group1 SGIs can
- * generate interrupts of either group.
- */
- if (!irq->group || allow_group1) {
- if (!irq->hw) {
- irq->pending_latch = true;
- vgic_queue_irq_unlock(vcpu->kvm, irq, flags);
- } else {
- /* HW SGI? Ask the GIC to inject it */
- int err;
- err = irq_set_irqchip_state(irq->host_irq,
- IRQCHIP_STATE_PENDING,
- true);
- WARN_RATELIMIT(err, "IRQ %d", irq->host_irq);
- raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
- }
- } else {
- raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
- }
+ /* We iterate over affinities to find the corresponding vcpus */
+ mpidr = SGI_AFFINITY_LEVEL(reg, 3);
+ mpidr |= SGI_AFFINITY_LEVEL(reg, 2);
+ mpidr |= SGI_AFFINITY_LEVEL(reg, 1);
+ target_cpus = FIELD_GET(ICC_SGI1R_TARGET_LIST_MASK, reg);
- vgic_put_irq(vcpu->kvm, irq);
+ for_each_set_bit(aff0, &target_cpus, hweight_long(ICC_SGI1R_TARGET_LIST_MASK)) {
+ c_vcpu = kvm_mpidr_to_vcpu(kvm, mpidr | aff0);
+ if (c_vcpu)
+ vgic_v3_queue_sgi(c_vcpu, sgi, allow_group1);
}
}
diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c
index 8be4c1ebdec2..db2a95762b1b 100644
--- a/arch/arm64/kvm/vgic/vgic.c
+++ b/arch/arm64/kvm/vgic/vgic.c
@@ -422,7 +422,7 @@ retry:
/**
* kvm_vgic_inject_irq - Inject an IRQ from a device to the vgic
* @kvm: The VM structure pointer
- * @cpuid: The CPU for PPIs
+ * @vcpu: The CPU for PPIs or NULL for global interrupts
* @intid: The INTID to inject a new state to.
* @level: Edge-triggered: true: to trigger the interrupt
* false: to ignore the call
@@ -436,24 +436,22 @@ retry:
* level-sensitive interrupts. You can think of the level parameter as 1
* being HIGH and 0 being LOW and all devices being active-HIGH.
*/
-int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid,
- bool level, void *owner)
+int kvm_vgic_inject_irq(struct kvm *kvm, struct kvm_vcpu *vcpu,
+ unsigned int intid, bool level, void *owner)
{
- struct kvm_vcpu *vcpu;
struct vgic_irq *irq;
unsigned long flags;
int ret;
- trace_vgic_update_irq_pending(cpuid, intid, level);
-
ret = vgic_lazy_init(kvm);
if (ret)
return ret;
- vcpu = kvm_get_vcpu(kvm, cpuid);
if (!vcpu && intid < VGIC_NR_PRIVATE_IRQS)
return -EINVAL;
+ trace_vgic_update_irq_pending(vcpu ? vcpu->vcpu_idx : 0, intid, level);
+
irq = vgic_get_irq(kvm, vcpu, intid);
if (!irq)
return -EINVAL;
diff --git a/arch/arm64/kvm/vmid.c b/arch/arm64/kvm/vmid.c
index 7fe8ba1a2851..806223b7022a 100644
--- a/arch/arm64/kvm/vmid.c
+++ b/arch/arm64/kvm/vmid.c
@@ -135,10 +135,11 @@ void kvm_arm_vmid_clear_active(void)
atomic64_set(this_cpu_ptr(&active_vmids), VMID_ACTIVE_INVALID);
}
-void kvm_arm_vmid_update(struct kvm_vmid *kvm_vmid)
+bool kvm_arm_vmid_update(struct kvm_vmid *kvm_vmid)
{
unsigned long flags;
u64 vmid, old_active_vmid;
+ bool updated = false;
vmid = atomic64_read(&kvm_vmid->id);
@@ -156,17 +157,21 @@ void kvm_arm_vmid_update(struct kvm_vmid *kvm_vmid)
if (old_active_vmid != 0 && vmid_gen_match(vmid) &&
0 != atomic64_cmpxchg_relaxed(this_cpu_ptr(&active_vmids),
old_active_vmid, vmid))
- return;
+ return false;
raw_spin_lock_irqsave(&cpu_vmid_lock, flags);
/* Check that our VMID belongs to the current generation. */
vmid = atomic64_read(&kvm_vmid->id);
- if (!vmid_gen_match(vmid))
+ if (!vmid_gen_match(vmid)) {
vmid = new_vmid(kvm_vmid);
+ updated = true;
+ }
atomic64_set(this_cpu_ptr(&active_vmids), vmid);
raw_spin_unlock_irqrestore(&cpu_vmid_lock, flags);
+
+ return updated;
}
/*
diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h
index bb3cb005873e..8adf09dbc473 100644
--- a/include/kvm/arm_arch_timer.h
+++ b/include/kvm/arm_arch_timer.h
@@ -94,7 +94,7 @@ struct arch_timer_cpu {
int __init kvm_timer_hyp_init(bool has_gic);
int kvm_timer_enable(struct kvm_vcpu *vcpu);
-int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu);
+void kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu);
void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu);
void kvm_timer_sync_user(struct kvm_vcpu *vcpu);
bool kvm_timer_should_notify_user(struct kvm_vcpu *vcpu);
diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h
index 31029f4f7be8..4df71290b676 100644
--- a/include/kvm/arm_pmu.h
+++ b/include/kvm/arm_pmu.h
@@ -77,7 +77,7 @@ void kvm_vcpu_pmu_restore_host(struct kvm_vcpu *vcpu);
void kvm_vcpu_pmu_resync_el0(void);
#define kvm_vcpu_has_pmu(vcpu) \
- (test_bit(KVM_ARM_VCPU_PMU_V3, (vcpu)->arch.features))
+ (vcpu_has_feature(vcpu, KVM_ARM_VCPU_PMU_V3))
/*
* Updates the vcpu's view of the pmu events for this cpu.
@@ -101,6 +101,7 @@ void kvm_vcpu_pmu_resync_el0(void);
})
u8 kvm_arm_pmu_get_pmuver_limit(void);
+u64 kvm_pmu_evtyper_mask(struct kvm *kvm);
#else
struct kvm_pmu {
@@ -172,6 +173,10 @@ static inline u8 kvm_arm_pmu_get_pmuver_limit(void)
{
return 0;
}
+static inline u64 kvm_pmu_evtyper_mask(struct kvm *kvm)
+{
+ return 0;
+}
static inline void kvm_vcpu_pmu_resync_el0(void) {}
#endif
diff --git a/include/kvm/arm_psci.h b/include/kvm/arm_psci.h
index 6e55b9283789..e8fb624013d1 100644
--- a/include/kvm/arm_psci.h
+++ b/include/kvm/arm_psci.h
@@ -26,7 +26,7 @@ static inline int kvm_psci_version(struct kvm_vcpu *vcpu)
* revisions. It is thus safe to return the latest, unless
* userspace has instructed us otherwise.
*/
- if (test_bit(KVM_ARM_VCPU_PSCI_0_2, vcpu->arch.features)) {
+ if (vcpu_has_feature(vcpu, KVM_ARM_VCPU_PSCI_0_2)) {
if (vcpu->kvm->arch.psci_version)
return vcpu->kvm->arch.psci_version;
diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index 5b27f94d4fad..8cc38e836f54 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -375,8 +375,8 @@ int kvm_vgic_map_resources(struct kvm *kvm);
int kvm_vgic_hyp_init(void);
void kvm_vgic_init_cpu_hardware(void);
-int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid,
- bool level, void *owner);
+int kvm_vgic_inject_irq(struct kvm *kvm, struct kvm_vcpu *vcpu,
+ unsigned int intid, bool level, void *owner);
int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, unsigned int host_irq,
u32 vintid, struct irq_ops *ops);
int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int vintid);
diff --git a/include/linux/perf/arm_pmuv3.h b/include/linux/perf/arm_pmuv3.h
index e3899bd77f5c..9c226adf938a 100644
--- a/include/linux/perf/arm_pmuv3.h
+++ b/include/linux/perf/arm_pmuv3.h
@@ -234,9 +234,12 @@
/*
* Event filters for PMUv3
*/
-#define ARMV8_PMU_EXCLUDE_EL1 (1U << 31)
-#define ARMV8_PMU_EXCLUDE_EL0 (1U << 30)
-#define ARMV8_PMU_INCLUDE_EL2 (1U << 27)
+#define ARMV8_PMU_EXCLUDE_EL1 (1U << 31)
+#define ARMV8_PMU_EXCLUDE_EL0 (1U << 30)
+#define ARMV8_PMU_EXCLUDE_NS_EL1 (1U << 29)
+#define ARMV8_PMU_EXCLUDE_NS_EL0 (1U << 28)
+#define ARMV8_PMU_INCLUDE_EL2 (1U << 27)
+#define ARMV8_PMU_EXCLUDE_EL3 (1U << 26)
/*
* PMUSERENR: user enable reg
diff --git a/tools/testing/selftests/kvm/aarch64/page_fault_test.c b/tools/testing/selftests/kvm/aarch64/page_fault_test.c
index 975d28be3cca..eb4217b7c768 100644
--- a/tools/testing/selftests/kvm/aarch64/page_fault_test.c
+++ b/tools/testing/selftests/kvm/aarch64/page_fault_test.c
@@ -135,8 +135,8 @@ static void guest_at(void)
uint64_t par;
asm volatile("at s1e1r, %0" :: "r" (guest_test_memory));
- par = read_sysreg(par_el1);
isb();
+ par = read_sysreg(par_el1);
/* Bit 1 indicates whether the AT was successful */
GUEST_ASSERT_EQ(par & 1, 0);
@@ -842,6 +842,7 @@ static void help(char *name)
.name = SCAT2(ro_memslot_no_syndrome, _access), \
.data_memslot_flags = KVM_MEM_READONLY, \
.pt_memslot_flags = KVM_MEM_READONLY, \
+ .guest_prepare = { _PREPARE(_access) }, \
.guest_test = _access, \
.fail_vcpu_run_handler = fail_vcpu_run_mmio_no_syndrome_handler, \
.expected_events = { .fail_vcpu_runs = 1 }, \
@@ -865,6 +866,7 @@ static void help(char *name)
.name = SCAT2(ro_memslot_no_syn_and_dlog, _access), \
.data_memslot_flags = KVM_MEM_READONLY | KVM_MEM_LOG_DIRTY_PAGES, \
.pt_memslot_flags = KVM_MEM_READONLY | KVM_MEM_LOG_DIRTY_PAGES, \
+ .guest_prepare = { _PREPARE(_access) }, \
.guest_test = _access, \
.guest_test_check = { _test_check }, \
.fail_vcpu_run_handler = fail_vcpu_run_mmio_no_syndrome_handler, \
@@ -894,6 +896,7 @@ static void help(char *name)
.data_memslot_flags = KVM_MEM_READONLY, \
.pt_memslot_flags = KVM_MEM_READONLY, \
.mem_mark_cmd = CMD_HOLE_DATA | CMD_HOLE_PT, \
+ .guest_prepare = { _PREPARE(_access) }, \
.guest_test = _access, \
.uffd_data_handler = _uffd_data_handler, \
.uffd_pt_handler = uffd_pt_handler, \