summaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
authorPaolo Bonzini <pbonzini@redhat.com>2023-10-31 16:37:07 -0400
committerPaolo Bonzini <pbonzini@redhat.com>2023-10-31 16:37:07 -0400
commit45b890f7689eb0aba454fc5831d2d79763781677 (patch)
tree71ab007123eaedd8553d570d6411dfde1062748e /arch
parentbe47941980d56238455eb54401c7b3de4ac5e269 (diff)
parent123f42f0ad6815014f54d0cc6eb9039c46ee2907 (diff)
Merge tag 'kvmarm-6.7' of git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm into HEAD
KVM/arm64 updates for 6.7 - Generalized infrastructure for 'writable' ID registers, effectively allowing userspace to opt-out of certain vCPU features for its guest - Optimization for vSGI injection, opportunistically compressing MPIDR to vCPU mapping into a table - Improvements to KVM's PMU emulation, allowing userspace to select the number of PMCs available to a VM - Guest support for memory operation instructions (FEAT_MOPS) - Cleanups to handling feature flags in KVM_ARM_VCPU_INIT, squashing bugs and getting rid of useless code - Changes to the way the SMCCC filter is constructed, avoiding wasted memory allocations when not in use - Load the stage-2 MMU context at vcpu_load() for VHE systems, reducing the overhead of errata mitigations - Miscellaneous kernel and selftest fixes
Diffstat (limited to 'arch')
-rw-r--r--arch/arm64/include/asm/kvm_arm.h4
-rw-r--r--arch/arm64/include/asm/kvm_emulate.h15
-rw-r--r--arch/arm64/include/asm/kvm_host.h61
-rw-r--r--arch/arm64/include/asm/kvm_hyp.h7
-rw-r--r--arch/arm64/include/asm/kvm_mmu.h45
-rw-r--r--arch/arm64/include/asm/kvm_nested.h3
-rw-r--r--arch/arm64/include/asm/stage2_pgtable.h4
-rw-r--r--arch/arm64/include/asm/sysreg.h45
-rw-r--r--arch/arm64/include/asm/tlbflush.h8
-rw-r--r--arch/arm64/include/asm/traps.h54
-rw-r--r--arch/arm64/include/uapi/asm/kvm.h32
-rw-r--r--arch/arm64/kernel/traps.c48
-rw-r--r--arch/arm64/kvm/arch_timer.c6
-rw-r--r--arch/arm64/kvm/arm.c196
-rw-r--r--arch/arm64/kvm/emulate-nested.c77
-rw-r--r--arch/arm64/kvm/hyp/include/hyp/switch.h17
-rw-r--r--arch/arm64/kvm/hyp/include/nvhe/fixed_config.h3
-rw-r--r--arch/arm64/kvm/hyp/nvhe/mem_protect.c8
-rw-r--r--arch/arm64/kvm/hyp/nvhe/pkvm.c4
-rw-r--r--arch/arm64/kvm/hyp/nvhe/switch.c2
-rw-r--r--arch/arm64/kvm/hyp/pgtable.c4
-rw-r--r--arch/arm64/kvm/hyp/vhe/switch.c34
-rw-r--r--arch/arm64/kvm/hyp/vhe/sysreg-sr.c11
-rw-r--r--arch/arm64/kvm/hyp/vhe/tlb.c18
-rw-r--r--arch/arm64/kvm/hypercalls.c36
-rw-r--r--arch/arm64/kvm/mmio.c4
-rw-r--r--arch/arm64/kvm/mmu.c33
-rw-r--r--arch/arm64/kvm/pkvm.c2
-rw-r--r--arch/arm64/kvm/pmu-emul.c145
-rw-r--r--arch/arm64/kvm/reset.c56
-rw-r--r--arch/arm64/kvm/sys_regs.c355
-rw-r--r--arch/arm64/kvm/trace_arm.h25
-rw-r--r--arch/arm64/kvm/vgic/vgic-debug.c6
-rw-r--r--arch/arm64/kvm/vgic/vgic-irqfd.c2
-rw-r--r--arch/arm64/kvm/vgic/vgic-its.c49
-rw-r--r--arch/arm64/kvm/vgic/vgic-kvm-device.c11
-rw-r--r--arch/arm64/kvm/vgic/vgic-mmio-v3.c150
-rw-r--r--arch/arm64/kvm/vgic/vgic.c12
-rw-r--r--arch/arm64/kvm/vmid.c11
39 files changed, 1103 insertions, 500 deletions
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index 1095c6647e96..b85f46a73e21 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -102,7 +102,9 @@
#define HCR_HOST_NVHE_PROTECTED_FLAGS (HCR_HOST_NVHE_FLAGS | HCR_TSC)
#define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H)
-#define HCRX_GUEST_FLAGS (HCRX_EL2_SMPME | HCRX_EL2_TCR2En)
+#define HCRX_GUEST_FLAGS \
+ (HCRX_EL2_SMPME | HCRX_EL2_TCR2En | \
+ (cpus_have_final_cap(ARM64_HAS_MOPS) ? (HCRX_EL2_MSCEn | HCRX_EL2_MCE2) : 0))
#define HCRX_HOST_FLAGS (HCRX_EL2_MSCEn | HCRX_EL2_TCR2En)
/* TCR_EL2 Registers bits */
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index 3d6725ff0bf6..13c948a0502d 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -54,6 +54,11 @@ void kvm_emulate_nested_eret(struct kvm_vcpu *vcpu);
int kvm_inject_nested_sync(struct kvm_vcpu *vcpu, u64 esr_el2);
int kvm_inject_nested_irq(struct kvm_vcpu *vcpu);
+static inline bool vcpu_has_feature(const struct kvm_vcpu *vcpu, int feature)
+{
+ return test_bit(feature, vcpu->kvm->arch.vcpu_features);
+}
+
#if defined(__KVM_VHE_HYPERVISOR__) || defined(__KVM_NVHE_HYPERVISOR__)
static __always_inline bool vcpu_el1_is_32bit(struct kvm_vcpu *vcpu)
{
@@ -62,7 +67,7 @@ static __always_inline bool vcpu_el1_is_32bit(struct kvm_vcpu *vcpu)
#else
static __always_inline bool vcpu_el1_is_32bit(struct kvm_vcpu *vcpu)
{
- return test_bit(KVM_ARM_VCPU_EL1_32BIT, vcpu->arch.features);
+ return vcpu_has_feature(vcpu, KVM_ARM_VCPU_EL1_32BIT);
}
#endif
@@ -465,7 +470,7 @@ static inline bool kvm_is_write_fault(struct kvm_vcpu *vcpu)
static inline unsigned long kvm_vcpu_get_mpidr_aff(struct kvm_vcpu *vcpu)
{
- return vcpu_read_sys_reg(vcpu, MPIDR_EL1) & MPIDR_HWID_BITMASK;
+ return __vcpu_sys_reg(vcpu, MPIDR_EL1) & MPIDR_HWID_BITMASK;
}
static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu)
@@ -565,12 +570,6 @@ static __always_inline void kvm_incr_pc(struct kvm_vcpu *vcpu)
vcpu_set_flag((v), e); \
} while (0)
-
-static inline bool vcpu_has_feature(struct kvm_vcpu *vcpu, int feature)
-{
- return test_bit(feature, vcpu->arch.features);
-}
-
static __always_inline void kvm_write_cptr_el2(u64 val)
{
if (has_vhe() || has_hvhe())
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index af06ccb7ee34..5653d3553e3e 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -78,7 +78,7 @@ extern unsigned int __ro_after_init kvm_sve_max_vl;
int __init kvm_arm_init_sve(void);
u32 __attribute_const__ kvm_target_cpu(void);
-int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
+void kvm_reset_vcpu(struct kvm_vcpu *vcpu);
void kvm_arm_vcpu_destroy(struct kvm_vcpu *vcpu);
struct kvm_hyp_memcache {
@@ -158,6 +158,16 @@ struct kvm_s2_mmu {
phys_addr_t pgd_phys;
struct kvm_pgtable *pgt;
+ /*
+ * VTCR value used on the host. For a non-NV guest (or a NV
+ * guest that runs in a context where its own S2 doesn't
+ * apply), its T0SZ value reflects that of the IPA size.
+ *
+ * For a shadow S2 MMU, T0SZ reflects the PARange exposed to
+ * the guest.
+ */
+ u64 vtcr;
+
/* The last vcpu id that ran on each physical CPU */
int __percpu *last_vcpu_ran;
@@ -202,12 +212,34 @@ struct kvm_protected_vm {
struct kvm_hyp_memcache teardown_mc;
};
+struct kvm_mpidr_data {
+ u64 mpidr_mask;
+ DECLARE_FLEX_ARRAY(u16, cmpidr_to_idx);
+};
+
+static inline u16 kvm_mpidr_index(struct kvm_mpidr_data *data, u64 mpidr)
+{
+ unsigned long mask = data->mpidr_mask;
+ u64 aff = mpidr & MPIDR_HWID_BITMASK;
+ int nbits, bit, bit_idx = 0;
+ u16 index = 0;
+
+ /*
+ * If this looks like RISC-V's BEXT or x86's PEXT
+ * instructions, it isn't by accident.
+ */
+ nbits = fls(mask);
+ for_each_set_bit(bit, &mask, nbits) {
+ index |= (aff & BIT(bit)) >> (bit - bit_idx);
+ bit_idx++;
+ }
+
+ return index;
+}
+
struct kvm_arch {
struct kvm_s2_mmu mmu;
- /* VTCR_EL2 value for this VM */
- u64 vtcr;
-
/* Interrupt controller */
struct vgic_dist vgic;
@@ -239,15 +271,16 @@ struct kvm_arch {
#define KVM_ARCH_FLAG_VM_COUNTER_OFFSET 5
/* Timer PPIs made immutable */
#define KVM_ARCH_FLAG_TIMER_PPIS_IMMUTABLE 6
- /* SMCCC filter initialized for the VM */
-#define KVM_ARCH_FLAG_SMCCC_FILTER_CONFIGURED 7
/* Initial ID reg values loaded */
-#define KVM_ARCH_FLAG_ID_REGS_INITIALIZED 8
+#define KVM_ARCH_FLAG_ID_REGS_INITIALIZED 7
unsigned long flags;
/* VM-wide vCPU feature set */
DECLARE_BITMAP(vcpu_features, KVM_VCPU_MAX_FEATURES);
+ /* MPIDR to vcpu index mapping, optional */
+ struct kvm_mpidr_data *mpidr_data;
+
/*
* VM-wide PMU filter, implemented as a bitmap and big enough for
* up to 2^10 events (ARMv8.0) or 2^16 events (ARMv8.1+).
@@ -257,6 +290,9 @@ struct kvm_arch {
cpumask_var_t supported_cpus;
+ /* PMCR_EL0.N value for the guest */
+ u8 pmcr_n;
+
/* Hypercall features firmware registers' descriptor */
struct kvm_smccc_features smccc_feat;
struct maple_tree smccc_filter;
@@ -574,9 +610,6 @@ struct kvm_vcpu_arch {
/* Cache some mmu pages needed inside spinlock regions */
struct kvm_mmu_memory_cache mmu_page_cache;
- /* feature flags */
- DECLARE_BITMAP(features, KVM_VCPU_MAX_FEATURES);
-
/* Virtual SError ESR to restore when HCR_EL2.VSE is set */
u64 vsesr_el2;
@@ -1025,7 +1058,7 @@ int kvm_arm_pvtime_has_attr(struct kvm_vcpu *vcpu,
extern unsigned int __ro_after_init kvm_arm_vmid_bits;
int __init kvm_arm_vmid_alloc_init(void);
void __init kvm_arm_vmid_alloc_free(void);
-void kvm_arm_vmid_update(struct kvm_vmid *kvm_vmid);
+bool kvm_arm_vmid_update(struct kvm_vmid *kvm_vmid);
void kvm_arm_vmid_clear_active(void);
static inline void kvm_arm_pvtime_vcpu_init(struct kvm_vcpu_arch *vcpu_arch)
@@ -1078,6 +1111,8 @@ int kvm_vm_ioctl_mte_copy_tags(struct kvm *kvm,
struct kvm_arm_copy_mte_tags *copy_tags);
int kvm_vm_ioctl_set_counter_offset(struct kvm *kvm,
struct kvm_arm_counter_offset *offset);
+int kvm_vm_ioctl_get_reg_writable_masks(struct kvm *kvm,
+ struct reg_mask_range *range);
/* Guest/host FPSIMD coordination helpers */
int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu);
@@ -1109,8 +1144,8 @@ static inline bool kvm_set_pmuserenr(u64 val)
}
#endif
-void kvm_vcpu_load_sysregs_vhe(struct kvm_vcpu *vcpu);
-void kvm_vcpu_put_sysregs_vhe(struct kvm_vcpu *vcpu);
+void kvm_vcpu_load_vhe(struct kvm_vcpu *vcpu);
+void kvm_vcpu_put_vhe(struct kvm_vcpu *vcpu);
int __init kvm_set_ipa_limit(void);
diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h
index 66efd67ea7e8..145ce73fc16c 100644
--- a/arch/arm64/include/asm/kvm_hyp.h
+++ b/arch/arm64/include/asm/kvm_hyp.h
@@ -93,6 +93,8 @@ void __timer_disable_traps(struct kvm_vcpu *vcpu);
void __sysreg_save_state_nvhe(struct kvm_cpu_context *ctxt);
void __sysreg_restore_state_nvhe(struct kvm_cpu_context *ctxt);
#else
+void __vcpu_load_switch_sysregs(struct kvm_vcpu *vcpu);
+void __vcpu_put_switch_sysregs(struct kvm_vcpu *vcpu);
void sysreg_save_host_state_vhe(struct kvm_cpu_context *ctxt);
void sysreg_restore_host_state_vhe(struct kvm_cpu_context *ctxt);
void sysreg_save_guest_state_vhe(struct kvm_cpu_context *ctxt);
@@ -111,11 +113,6 @@ void __fpsimd_save_state(struct user_fpsimd_state *fp_regs);
void __fpsimd_restore_state(struct user_fpsimd_state *fp_regs);
void __sve_restore_state(void *sve_pffr, u32 *fpsr);
-#ifndef __KVM_NVHE_HYPERVISOR__
-void activate_traps_vhe_load(struct kvm_vcpu *vcpu);
-void deactivate_traps_vhe_put(struct kvm_vcpu *vcpu);
-#endif
-
u64 __guest_enter(struct kvm_vcpu *vcpu);
bool kvm_host_psci_handler(struct kvm_cpu_context *host_ctxt, u32 func_id);
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 96a80e8f6226..d86c8661200a 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -150,9 +150,9 @@ static __always_inline unsigned long __kern_hyp_va(unsigned long v)
*/
#define KVM_PHYS_SHIFT (40)
-#define kvm_phys_shift(kvm) VTCR_EL2_IPA(kvm->arch.vtcr)
-#define kvm_phys_size(kvm) (_AC(1, ULL) << kvm_phys_shift(kvm))
-#define kvm_phys_mask(kvm) (kvm_phys_size(kvm) - _AC(1, ULL))
+#define kvm_phys_shift(mmu) VTCR_EL2_IPA((mmu)->vtcr)
+#define kvm_phys_size(mmu) (_AC(1, ULL) << kvm_phys_shift(mmu))
+#define kvm_phys_mask(mmu) (kvm_phys_size(mmu) - _AC(1, ULL))
#include <asm/kvm_pgtable.h>
#include <asm/stage2_pgtable.h>
@@ -224,16 +224,41 @@ static inline void __clean_dcache_guest_page(void *va, size_t size)
kvm_flush_dcache_to_poc(va, size);
}
+static inline size_t __invalidate_icache_max_range(void)
+{
+ u8 iminline;
+ u64 ctr;
+
+ asm volatile(ALTERNATIVE_CB("movz %0, #0\n"
+ "movk %0, #0, lsl #16\n"
+ "movk %0, #0, lsl #32\n"
+ "movk %0, #0, lsl #48\n",
+ ARM64_ALWAYS_SYSTEM,
+ kvm_compute_final_ctr_el0)
+ : "=r" (ctr));
+
+ iminline = SYS_FIELD_GET(CTR_EL0, IminLine, ctr) + 2;
+ return MAX_DVM_OPS << iminline;
+}
+
static inline void __invalidate_icache_guest_page(void *va, size_t size)
{
- if (icache_is_aliasing()) {
- /* any kind of VIPT cache */
+ /*
+ * VPIPT I-cache maintenance must be done from EL2. See comment in the
+ * nVHE flavor of __kvm_tlb_flush_vmid_ipa().
+ */
+ if (icache_is_vpipt() && read_sysreg(CurrentEL) != CurrentEL_EL2)
+ return;
+
+ /*
+ * Blow the whole I-cache if it is aliasing (i.e. VIPT) or the
+ * invalidation range exceeds our arbitrary limit on invadations by
+ * cache line.
+ */
+ if (icache_is_aliasing() || size > __invalidate_icache_max_range())
icache_inval_all_pou();
- } else if (read_sysreg(CurrentEL) != CurrentEL_EL1 ||
- !icache_is_vpipt()) {
- /* PIPT or VPIPT at EL2 (see comment in __kvm_tlb_flush_vmid_ipa) */
+ else
icache_inval_pou((unsigned long)va, (unsigned long)va + size);
- }
}
void kvm_set_way_flush(struct kvm_vcpu *vcpu);
@@ -299,7 +324,7 @@ static __always_inline u64 kvm_get_vttbr(struct kvm_s2_mmu *mmu)
static __always_inline void __load_stage2(struct kvm_s2_mmu *mmu,
struct kvm_arch *arch)
{
- write_sysreg(arch->vtcr, vtcr_el2);
+ write_sysreg(mmu->vtcr, vtcr_el2);
write_sysreg(kvm_get_vttbr(mmu), vttbr_el2);
/*
diff --git a/arch/arm64/include/asm/kvm_nested.h b/arch/arm64/include/asm/kvm_nested.h
index fa23cc9c2adc..6cec8e9c6c91 100644
--- a/arch/arm64/include/asm/kvm_nested.h
+++ b/arch/arm64/include/asm/kvm_nested.h
@@ -2,13 +2,14 @@
#ifndef __ARM64_KVM_NESTED_H
#define __ARM64_KVM_NESTED_H
+#include <asm/kvm_emulate.h>
#include <linux/kvm_host.h>
static inline bool vcpu_has_nv(const struct kvm_vcpu *vcpu)
{
return (!__is_defined(__KVM_NVHE_HYPERVISOR__) &&
cpus_have_final_cap(ARM64_HAS_NESTED_VIRT) &&
- test_bit(KVM_ARM_VCPU_HAS_EL2, vcpu->arch.features));
+ vcpu_has_feature(vcpu, KVM_ARM_VCPU_HAS_EL2));
}
extern bool __check_nv_sr_forward(struct kvm_vcpu *vcpu);
diff --git a/arch/arm64/include/asm/stage2_pgtable.h b/arch/arm64/include/asm/stage2_pgtable.h
index c8dca8ae359c..23d27623e478 100644
--- a/arch/arm64/include/asm/stage2_pgtable.h
+++ b/arch/arm64/include/asm/stage2_pgtable.h
@@ -21,13 +21,13 @@
* (IPA_SHIFT - 4).
*/
#define stage2_pgtable_levels(ipa) ARM64_HW_PGTABLE_LEVELS((ipa) - 4)
-#define kvm_stage2_levels(kvm) VTCR_EL2_LVLS(kvm->arch.vtcr)
+#define kvm_stage2_levels(mmu) VTCR_EL2_LVLS((mmu)->vtcr)
/*
* kvm_mmmu_cache_min_pages() is the number of pages required to install
* a stage-2 translation. We pre-allocate the entry level page table at
* the VM creation.
*/
-#define kvm_mmu_cache_min_pages(kvm) (kvm_stage2_levels(kvm) - 1)
+#define kvm_mmu_cache_min_pages(mmu) (kvm_stage2_levels(mmu) - 1)
#endif /* __ARM64_S2_PGTABLE_H_ */
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 38296579a4fd..5e65f51c10d2 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -270,6 +270,8 @@
/* ETM */
#define SYS_TRCOSLAR sys_reg(2, 1, 1, 0, 4)
+#define SYS_BRBCR_EL2 sys_reg(2, 4, 9, 0, 0)
+
#define SYS_MIDR_EL1 sys_reg(3, 0, 0, 0, 0)
#define SYS_MPIDR_EL1 sys_reg(3, 0, 0, 0, 5)
#define SYS_REVIDR_EL1 sys_reg(3, 0, 0, 0, 6)
@@ -484,6 +486,7 @@
#define SYS_SCTLR_EL2 sys_reg(3, 4, 1, 0, 0)
#define SYS_ACTLR_EL2 sys_reg(3, 4, 1, 0, 1)
+#define SYS_SCTLR2_EL2 sys_reg(3, 4, 1, 0, 3)
#define SYS_HCR_EL2 sys_reg(3, 4, 1, 1, 0)
#define SYS_MDCR_EL2 sys_reg(3, 4, 1, 1, 1)
#define SYS_CPTR_EL2 sys_reg(3, 4, 1, 1, 2)
@@ -497,10 +500,15 @@
#define SYS_VTCR_EL2 sys_reg(3, 4, 2, 1, 2)
#define SYS_TRFCR_EL2 sys_reg(3, 4, 1, 2, 1)
+#define SYS_VNCR_EL2 sys_reg(3, 4, 2, 2, 0)
#define SYS_HAFGRTR_EL2 sys_reg(3, 4, 3, 1, 6)
#define SYS_SPSR_EL2 sys_reg(3, 4, 4, 0, 0)
#define SYS_ELR_EL2 sys_reg(3, 4, 4, 0, 1)
#define SYS_SP_EL1 sys_reg(3, 4, 4, 1, 0)
+#define SYS_SPSR_irq sys_reg(3, 4, 4, 3, 0)
+#define SYS_SPSR_abt sys_reg(3, 4, 4, 3, 1)
+#define SYS_SPSR_und sys_reg(3, 4, 4, 3, 2)
+#define SYS_SPSR_fiq sys_reg(3, 4, 4, 3, 3)
#define SYS_IFSR32_EL2 sys_reg(3, 4, 5, 0, 1)
#define SYS_AFSR0_EL2 sys_reg(3, 4, 5, 1, 0)
#define SYS_AFSR1_EL2 sys_reg(3, 4, 5, 1, 1)
@@ -514,6 +522,18 @@
#define SYS_MAIR_EL2 sys_reg(3, 4, 10, 2, 0)
#define SYS_AMAIR_EL2 sys_reg(3, 4, 10, 3, 0)
+#define SYS_MPAMHCR_EL2 sys_reg(3, 4, 10, 4, 0)
+#define SYS_MPAMVPMV_EL2 sys_reg(3, 4, 10, 4, 1)
+#define SYS_MPAM2_EL2 sys_reg(3, 4, 10, 5, 0)
+#define __SYS__MPAMVPMx_EL2(x) sys_reg(3, 4, 10, 6, x)
+#define SYS_MPAMVPM0_EL2 __SYS__MPAMVPMx_EL2(0)
+#define SYS_MPAMVPM1_EL2 __SYS__MPAMVPMx_EL2(1)
+#define SYS_MPAMVPM2_EL2 __SYS__MPAMVPMx_EL2(2)
+#define SYS_MPAMVPM3_EL2 __SYS__MPAMVPMx_EL2(3)
+#define SYS_MPAMVPM4_EL2 __SYS__MPAMVPMx_EL2(4)
+#define SYS_MPAMVPM5_EL2 __SYS__MPAMVPMx_EL2(5)
+#define SYS_MPAMVPM6_EL2 __SYS__MPAMVPMx_EL2(6)
+#define SYS_MPAMVPM7_EL2 __SYS__MPAMVPMx_EL2(7)
#define SYS_VBAR_EL2 sys_reg(3, 4, 12, 0, 0)
#define SYS_RVBAR_EL2 sys_reg(3, 4, 12, 0, 1)
@@ -562,24 +582,49 @@
#define SYS_CONTEXTIDR_EL2 sys_reg(3, 4, 13, 0, 1)
#define SYS_TPIDR_EL2 sys_reg(3, 4, 13, 0, 2)
+#define SYS_SCXTNUM_EL2 sys_reg(3, 4, 13, 0, 7)
+
+#define __AMEV_op2(m) (m & 0x7)
+#define __AMEV_CRm(n, m) (n | ((m & 0x8) >> 3))
+#define __SYS__AMEVCNTVOFF0n_EL2(m) sys_reg(3, 4, 13, __AMEV_CRm(0x8, m), __AMEV_op2(m))
+#define SYS_AMEVCNTVOFF0n_EL2(m) __SYS__AMEVCNTVOFF0n_EL2(m)
+#define __SYS__AMEVCNTVOFF1n_EL2(m) sys_reg(3, 4, 13, __AMEV_CRm(0xA, m), __AMEV_op2(m))
+#define SYS_AMEVCNTVOFF1n_EL2(m) __SYS__AMEVCNTVOFF1n_EL2(m)
#define SYS_CNTVOFF_EL2 sys_reg(3, 4, 14, 0, 3)
#define SYS_CNTHCTL_EL2 sys_reg(3, 4, 14, 1, 0)
+#define SYS_CNTHP_TVAL_EL2 sys_reg(3, 4, 14, 2, 0)
+#define SYS_CNTHP_CTL_EL2 sys_reg(3, 4, 14, 2, 1)
+#define SYS_CNTHP_CVAL_EL2 sys_reg(3, 4, 14, 2, 2)
+#define SYS_CNTHV_TVAL_EL2 sys_reg(3, 4, 14, 3, 0)
+#define SYS_CNTHV_CTL_EL2 sys_reg(3, 4, 14, 3, 1)
+#define SYS_CNTHV_CVAL_EL2 sys_reg(3, 4, 14, 3, 2)
/* VHE encodings for architectural EL0/1 system registers */
+#define SYS_BRBCR_EL12 sys_reg(2, 5, 9, 0, 0)
#define SYS_SCTLR_EL12 sys_reg(3, 5, 1, 0, 0)
+#define SYS_CPACR_EL12 sys_reg(3, 5, 1, 0, 2)
+#define SYS_SCTLR2_EL12 sys_reg(3, 5, 1, 0, 3)
+#define SYS_ZCR_EL12 sys_reg(3, 5, 1, 2, 0)
+#define SYS_TRFCR_EL12 sys_reg(3, 5, 1, 2, 1)
+#define SYS_SMCR_EL12 sys_reg(3, 5, 1, 2, 6)
#define SYS_TTBR0_EL12 sys_reg(3, 5, 2, 0, 0)
#define SYS_TTBR1_EL12 sys_reg(3, 5, 2, 0, 1)
#define SYS_TCR_EL12 sys_reg(3, 5, 2, 0, 2)
+#define SYS_TCR2_EL12 sys_reg(3, 5, 2, 0, 3)
#define SYS_SPSR_EL12 sys_reg(3, 5, 4, 0, 0)
#define SYS_ELR_EL12 sys_reg(3, 5, 4, 0, 1)
#define SYS_AFSR0_EL12 sys_reg(3, 5, 5, 1, 0)
#define SYS_AFSR1_EL12 sys_reg(3, 5, 5, 1, 1)
#define SYS_ESR_EL12 sys_reg(3, 5, 5, 2, 0)
#define SYS_TFSR_EL12 sys_reg(3, 5, 5, 6, 0)
+#define SYS_FAR_EL12 sys_reg(3, 5, 6, 0, 0)
+#define SYS_PMSCR_EL12 sys_reg(3, 5, 9, 9, 0)
#define SYS_MAIR_EL12 sys_reg(3, 5, 10, 2, 0)
#define SYS_AMAIR_EL12 sys_reg(3, 5, 10, 3, 0)
#define SYS_VBAR_EL12 sys_reg(3, 5, 12, 0, 0)
+#define SYS_CONTEXTIDR_EL12 sys_reg(3, 5, 13, 0, 1)
+#define SYS_SCXTNUM_EL12 sys_reg(3, 5, 13, 0, 7)
#define SYS_CNTKCTL_EL12 sys_reg(3, 5, 14, 1, 0)
#define SYS_CNTP_TVAL_EL02 sys_reg(3, 5, 14, 2, 0)
#define SYS_CNTP_CTL_EL02 sys_reg(3, 5, 14, 2, 1)
diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h
index b149cf9f91bc..3431d37e5054 100644
--- a/arch/arm64/include/asm/tlbflush.h
+++ b/arch/arm64/include/asm/tlbflush.h
@@ -333,7 +333,7 @@ static inline void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
* This is meant to avoid soft lock-ups on large TLB flushing ranges and not
* necessarily a performance improvement.
*/
-#define MAX_TLBI_OPS PTRS_PER_PTE
+#define MAX_DVM_OPS PTRS_PER_PTE
/*
* __flush_tlb_range_op - Perform TLBI operation upon a range
@@ -413,12 +413,12 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma,
/*
* When not uses TLB range ops, we can handle up to
- * (MAX_TLBI_OPS - 1) pages;
+ * (MAX_DVM_OPS - 1) pages;
* When uses TLB range ops, we can handle up to
* (MAX_TLBI_RANGE_PAGES - 1) pages.
*/
if ((!system_supports_tlb_range() &&
- (end - start) >= (MAX_TLBI_OPS * stride)) ||
+ (end - start) >= (MAX_DVM_OPS * stride)) ||
pages >= MAX_TLBI_RANGE_PAGES) {
flush_tlb_mm(vma->vm_mm);
return;
@@ -451,7 +451,7 @@ static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end
{
unsigned long addr;
- if ((end - start) > (MAX_TLBI_OPS * PAGE_SIZE)) {
+ if ((end - start) > (MAX_DVM_OPS * PAGE_SIZE)) {
flush_tlb_all();
return;
}
diff --git a/arch/arm64/include/asm/traps.h b/arch/arm64/include/asm/traps.h
index d66dfb3a72dd..eefe766d6161 100644
--- a/arch/arm64/include/asm/traps.h
+++ b/arch/arm64/include/asm/traps.h
@@ -9,10 +9,9 @@
#include <linux/list.h>
#include <asm/esr.h>
+#include <asm/ptrace.h>
#include <asm/sections.h>
-struct pt_regs;
-
#ifdef CONFIG_ARMV8_DEPRECATED
bool try_emulate_armv8_deprecated(struct pt_regs *regs, u32 insn);
#else
@@ -101,4 +100,55 @@ static inline unsigned long arm64_ras_serror_get_severity(unsigned long esr)
bool arm64_is_fatal_ras_serror(struct pt_regs *regs, unsigned long esr);
void __noreturn arm64_serror_panic(struct pt_regs *regs, unsigned long esr);
+
+static inline void arm64_mops_reset_regs(struct user_pt_regs *regs, unsigned long esr)
+{
+ bool wrong_option = esr & ESR_ELx_MOPS_ISS_WRONG_OPTION;
+ bool option_a = esr & ESR_ELx_MOPS_ISS_OPTION_A;
+ int dstreg = ESR_ELx_MOPS_ISS_DESTREG(esr);
+ int srcreg = ESR_ELx_MOPS_ISS_SRCREG(esr);
+ int sizereg = ESR_ELx_MOPS_ISS_SIZEREG(esr);
+ unsigned long dst, src, size;
+
+ dst = regs->regs[dstreg];
+ src = regs->regs[srcreg];
+ size = regs->regs[sizereg];
+
+ /*
+ * Put the registers back in the original format suitable for a
+ * prologue instruction, using the generic return routine from the
+ * Arm ARM (DDI 0487I.a) rules CNTMJ and MWFQH.
+ */
+ if (esr & ESR_ELx_MOPS_ISS_MEM_INST) {
+ /* SET* instruction */
+ if (option_a ^ wrong_option) {
+ /* Format is from Option A; forward set */
+ regs->regs[dstreg] = dst + size;
+ regs->regs[sizereg] = -size;
+ }
+ } else {
+ /* CPY* instruction */
+ if (!(option_a ^ wrong_option)) {
+ /* Format is from Option B */
+ if (regs->pstate & PSR_N_BIT) {
+ /* Backward copy */
+ regs->regs[dstreg] = dst - size;
+ regs->regs[srcreg] = src - size;
+ }
+ } else {
+ /* Format is from Option A */
+ if (size & BIT(63)) {
+ /* Forward copy */
+ regs->regs[dstreg] = dst + size;
+ regs->regs[srcreg] = src + size;
+ regs->regs[sizereg] = -size;
+ }
+ }
+ }
+
+ if (esr & ESR_ELx_MOPS_ISS_FROM_EPILOGUE)
+ regs->pc -= 8;
+ else
+ regs->pc -= 4;
+}
#endif
diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
index f7ddd73a8c0f..89d2fc872d9f 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -505,6 +505,38 @@ struct kvm_smccc_filter {
#define KVM_HYPERCALL_EXIT_SMC (1U << 0)
#define KVM_HYPERCALL_EXIT_16BIT (1U << 1)
+/*
+ * Get feature ID registers userspace writable mask.
+ *
+ * From DDI0487J.a, D19.2.66 ("ID_AA64MMFR2_EL1, AArch64 Memory Model
+ * Feature Register 2"):
+ *
+ * "The Feature ID space is defined as the System register space in
+ * AArch64 with op0==3, op1=={0, 1, 3}, CRn==0, CRm=={0-7},
+ * op2=={0-7}."
+ *
+ * This covers all currently known R/O registers that indicate
+ * anything useful feature wise, including the ID registers.
+ *
+ * If we ever need to introduce a new range, it will be described as
+ * such in the range field.
+ */
+#define KVM_ARM_FEATURE_ID_RANGE_IDX(op0, op1, crn, crm, op2) \
+ ({ \
+ __u64 __op1 = (op1) & 3; \
+ __op1 -= (__op1 == 3); \
+ (__op1 << 6 | ((crm) & 7) << 3 | (op2)); \
+ })
+
+#define KVM_ARM_FEATURE_ID_RANGE 0
+#define KVM_ARM_FEATURE_ID_RANGE_SIZE (3 * 8 * 8)
+
+struct reg_mask_range {
+ __u64 addr; /* Pointer to mask array */
+ __u32 range; /* Requested range */
+ __u32 reserved[13];
+};
+
#endif
#endif /* __ARM_KVM_H__ */
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index 8b70759cdbb9..ede65a20e7dc 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -516,53 +516,7 @@ void do_el1_fpac(struct pt_regs *regs, unsigned long esr)
void do_el0_mops(struct pt_regs *regs, unsigned long esr)
{
- bool wrong_option = esr & ESR_ELx_MOPS_ISS_WRONG_OPTION;
- bool option_a = esr & ESR_ELx_MOPS_ISS_OPTION_A;
- int dstreg = ESR_ELx_MOPS_ISS_DESTREG(esr);
- int srcreg = ESR_ELx_MOPS_ISS_SRCREG(esr);
- int sizereg = ESR_ELx_MOPS_ISS_SIZEREG(esr);
- unsigned long dst, src, size;
-
- dst = pt_regs_read_reg(regs, dstreg);
- src = pt_regs_read_reg(regs, srcreg);
- size = pt_regs_read_reg(regs, sizereg);
-
- /*
- * Put the registers back in the original format suitable for a
- * prologue instruction, using the generic return routine from the
- * Arm ARM (DDI 0487I.a) rules CNTMJ and MWFQH.
- */
- if (esr & ESR_ELx_MOPS_ISS_MEM_INST) {
- /* SET* instruction */
- if (option_a ^ wrong_option) {
- /* Format is from Option A; forward set */
- pt_regs_write_reg(regs, dstreg, dst + size);
- pt_regs_write_reg(regs, sizereg, -size);
- }
- } else {
- /* CPY* instruction */
- if (!(option_a ^ wrong_option)) {
- /* Format is from Option B */
- if (regs->pstate & PSR_N_BIT) {
- /* Backward copy */
- pt_regs_write_reg(regs, dstreg, dst - size);
- pt_regs_write_reg(regs, srcreg, src - size);
- }
- } else {
- /* Format is from Option A */
- if (size & BIT(63)) {
- /* Forward copy */
- pt_regs_write_reg(regs, dstreg, dst + size);
- pt_regs_write_reg(regs, srcreg, src + size);
- pt_regs_write_reg(regs, sizereg, -size);
- }
- }
- }
-
- if (esr & ESR_ELx_MOPS_ISS_FROM_EPILOGUE)
- regs->pc -= 8;
- else
- regs->pc -= 4;
+ arm64_mops_reset_regs(&regs->user_regs, esr);
/*
* If single stepping then finish the step before executing the
diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c
index a1e24228aaaa..13ba691b848f 100644
--- a/arch/arm64/kvm/arch_timer.c
+++ b/arch/arm64/kvm/arch_timer.c
@@ -453,7 +453,7 @@ static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level,
timer_ctx->irq.level);
if (!userspace_irqchip(vcpu->kvm)) {
- ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id,
+ ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu,
timer_irq(timer_ctx),
timer_ctx->irq.level,
timer_ctx);
@@ -936,7 +936,7 @@ void kvm_timer_sync_user(struct kvm_vcpu *vcpu)
unmask_vtimer_irq_user(vcpu);
}
-int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu)
+void kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu)
{
struct arch_timer_cpu *timer = vcpu_timer(vcpu);
struct timer_map map;
@@ -980,8 +980,6 @@ int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu)
soft_timer_cancel(&map.emul_vtimer->hrtimer);
if (map.emul_ptimer)
soft_timer_cancel(&map.emul_ptimer->hrtimer);
-
- return 0;
}
static void timer_context_init(struct kvm_vcpu *vcpu, int timerid)
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index 4866b3f7b4ea..317964bad1e1 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -205,6 +205,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
if (is_protected_kvm_enabled())
pkvm_destroy_hyp_vm(kvm);
+ kfree(kvm->arch.mpidr_data);
kvm_destroy_vcpus(kvm);
kvm_unshare_hyp(kvm, kvm + 1);
@@ -317,6 +318,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_ARM_SUPPORTED_BLOCK_SIZES:
r = kvm_supported_block_sizes();
break;
+ case KVM_CAP_ARM_SUPPORTED_REG_MASK_RANGES:
+ r = BIT(0);
+ break;
default:
r = 0;
}
@@ -367,7 +371,6 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
/* Force users to call KVM_ARM_VCPU_INIT */
vcpu_clear_flag(vcpu, VCPU_INITIALIZED);
- bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES);
vcpu->arch.mmu_page_cache.gfp_zero = __GFP_ZERO;
@@ -438,9 +441,9 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
* We might get preempted before the vCPU actually runs, but
* over-invalidation doesn't affect correctness.
*/
- if (*last_ran != vcpu->vcpu_id) {
+ if (*last_ran != vcpu->vcpu_idx) {
kvm_call_hyp(__kvm_flush_cpu_context, mmu);
- *last_ran = vcpu->vcpu_id;
+ *last_ran = vcpu->vcpu_idx;
}
vcpu->cpu = cpu;
@@ -448,7 +451,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
kvm_vgic_load(vcpu);
kvm_timer_vcpu_load(vcpu);
if (has_vhe())
- kvm_vcpu_load_sysregs_vhe(vcpu);
+ kvm_vcpu_load_vhe(vcpu);
kvm_arch_vcpu_load_fp(vcpu);
kvm_vcpu_pmu_restore_guest(vcpu);
if (kvm_arm_is_pvtime_enabled(&vcpu->arch))
@@ -472,7 +475,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
kvm_arch_vcpu_put_debug_state_flags(vcpu);
kvm_arch_vcpu_put_fp(vcpu);
if (has_vhe())
- kvm_vcpu_put_sysregs_vhe(vcpu);
+ kvm_vcpu_put_vhe(vcpu);
kvm_timer_vcpu_put(vcpu);
kvm_vgic_put(vcpu);
kvm_vcpu_pmu_restore_host(vcpu);
@@ -578,6 +581,57 @@ static int kvm_vcpu_initialized(struct kvm_vcpu *vcpu)
return vcpu_get_flag(vcpu, VCPU_INITIALIZED);
}
+static void kvm_init_mpidr_data(struct kvm *kvm)
+{
+ struct kvm_mpidr_data *data = NULL;
+ unsigned long c, mask, nr_entries;
+ u64 aff_set = 0, aff_clr = ~0UL;
+ struct kvm_vcpu *vcpu;
+
+ mutex_lock(&kvm->arch.config_lock);
+
+ if (kvm->arch.mpidr_data || atomic_read(&kvm->online_vcpus) == 1)
+ goto out;
+
+ kvm_for_each_vcpu(c, vcpu, kvm) {
+ u64 aff = kvm_vcpu_get_mpidr_aff(vcpu);
+ aff_set |= aff;
+ aff_clr &= aff;
+ }
+
+ /*
+ * A significant bit can be either 0 or 1, and will only appear in
+ * aff_set. Use aff_clr to weed out the useless stuff.
+ */
+ mask = aff_set ^ aff_clr;
+ nr_entries = BIT_ULL(hweight_long(mask));
+
+ /*
+ * Don't let userspace fool us. If we need more than a single page
+ * to describe the compressed MPIDR array, just fall back to the
+ * iterative method. Single vcpu VMs do not need this either.
+ */
+ if (struct_size(data, cmpidr_to_idx, nr_entries) <= PAGE_SIZE)
+ data = kzalloc(struct_size(data, cmpidr_to_idx, nr_entries),
+ GFP_KERNEL_ACCOUNT);
+
+ if (!data)
+ goto out;
+
+ data->mpidr_mask = mask;
+
+ kvm_for_each_vcpu(c, vcpu, kvm) {
+ u64 aff = kvm_vcpu_get_mpidr_aff(vcpu);
+ u16 index = kvm_mpidr_index(data, aff);
+
+ data->cmpidr_to_idx[index] = c;
+ }
+
+ kvm->arch.mpidr_data = data;
+out:
+ mutex_unlock(&kvm->arch.config_lock);
+}
+
/*
* Handle both the initialisation that is being done when the vcpu is
* run for the first time, as well as the updates that must be
@@ -601,6 +655,8 @@ int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu)
if (likely(vcpu_has_run_once(vcpu)))
return 0;
+ kvm_init_mpidr_data(kvm);
+
kvm_arm_vcpu_init_debug(vcpu);
if (likely(irqchip_in_kernel(kvm))) {
@@ -801,8 +857,7 @@ static int check_vcpu_requests(struct kvm_vcpu *vcpu)
}
if (kvm_check_request(KVM_REQ_RELOAD_PMU, vcpu))
- kvm_pmu_handle_pmcr(vcpu,
- __vcpu_sys_reg(vcpu, PMCR_EL0));
+ kvm_vcpu_reload_pmu(vcpu);
if (kvm_check_request(KVM_REQ_RESYNC_PMU_EL0, vcpu))
kvm_vcpu_pmu_restore_guest(vcpu);
@@ -950,7 +1005,10 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
* making a thread's VMID inactive. So we need to call
* kvm_arm_vmid_update() in non-premptible context.
*/
- kvm_arm_vmid_update(&vcpu->arch.hw_mmu->vmid);
+ if (kvm_arm_vmid_update(&vcpu->arch.hw_mmu->vmid) &&
+ has_vhe())
+ __load_stage2(vcpu->arch.hw_mmu,
+ vcpu->arch.hw_mmu->arch);
kvm_pmu_flush_hwstate(vcpu);
@@ -1134,27 +1192,23 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level,
bool line_status)
{
u32 irq = irq_level->irq;
- unsigned int irq_type, vcpu_idx, irq_num;
- int nrcpus = atomic_read(&kvm->online_vcpus);
+ unsigned int irq_type, vcpu_id, irq_num;
struct kvm_vcpu *vcpu = NULL;
bool level = irq_level->level;
irq_type = (irq >> KVM_ARM_IRQ_TYPE_SHIFT) & KVM_ARM_IRQ_TYPE_MASK;
- vcpu_idx = (irq >> KVM_ARM_IRQ_VCPU_SHIFT) & KVM_ARM_IRQ_VCPU_MASK;
- vcpu_idx += ((irq >> KVM_ARM_IRQ_VCPU2_SHIFT) & KVM_ARM_IRQ_VCPU2_MASK) * (KVM_ARM_IRQ_VCPU_MASK + 1);
+ vcpu_id = (irq >> KVM_ARM_IRQ_VCPU_SHIFT) & KVM_ARM_IRQ_VCPU_MASK;
+ vcpu_id += ((irq >> KVM_ARM_IRQ_VCPU2_SHIFT) & KVM_ARM_IRQ_VCPU2_MASK) * (KVM_ARM_IRQ_VCPU_MASK + 1);
irq_num = (irq >> KVM_ARM_IRQ_NUM_SHIFT) & KVM_ARM_IRQ_NUM_MASK;
- trace_kvm_irq_line(irq_type, vcpu_idx, irq_num, irq_level->level);
+ trace_kvm_irq_line(irq_type, vcpu_id, irq_num, irq_level->level);
switch (irq_type) {
case KVM_ARM_IRQ_TYPE_CPU:
if (irqchip_in_kernel(kvm))
return -ENXIO;
- if (vcpu_idx >= nrcpus)
- return -EINVAL;
-
- vcpu = kvm_get_vcpu(kvm, vcpu_idx);
+ vcpu = kvm_get_vcpu_by_id(kvm, vcpu_id);
if (!vcpu)
return -EINVAL;
@@ -1166,17 +1220,14 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level,
if (!irqchip_in_kernel(kvm))
return -ENXIO;
- if (vcpu_idx >= nrcpus)
- return -EINVAL;
-
- vcpu = kvm_get_vcpu(kvm, vcpu_idx);
+ vcpu = kvm_get_vcpu_by_id(kvm, vcpu_id);
if (!vcpu)
return -EINVAL;
if (irq_num < VGIC_NR_SGIS || irq_num >= VGIC_NR_PRIVATE_IRQS)
return -EINVAL;
- return kvm_vgic_inject_irq(kvm, vcpu->vcpu_id, irq_num, level, NULL);
+ return kvm_vgic_inject_irq(kvm, vcpu, irq_num, level, NULL);
case KVM_ARM_IRQ_TYPE_SPI:
if (!irqchip_in_kernel(kvm))
return -ENXIO;
@@ -1184,12 +1235,36 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level,
if (irq_num < VGIC_NR_PRIVATE_IRQS)
return -EINVAL;
- return kvm_vgic_inject_irq(kvm, 0, irq_num, level, NULL);
+ return kvm_vgic_inject_irq(kvm, NULL, irq_num, level, NULL);
}
return -EINVAL;
}
+static unsigned long system_supported_vcpu_features(void)
+{
+ unsigned long features = KVM_VCPU_VALID_FEATURES;
+
+ if (!cpus_have_final_cap(ARM64_HAS_32BIT_EL1))
+ clear_bit(KVM_ARM_VCPU_EL1_32BIT, &features);
+
+ if (!kvm_arm_support_pmu_v3())
+ clear_bit(KVM_ARM_VCPU_PMU_V3, &features);
+
+ if (!system_supports_sve())
+ clear_bit(KVM_ARM_VCPU_SVE, &features);
+
+ if (!system_has_full_ptr_auth()) {
+ clear_bit(KVM_ARM_VCPU_PTRAUTH_ADDRESS, &features);
+ clear_bit(KVM_ARM_VCPU_PTRAUTH_GENERIC, &features);
+ }
+
+ if (!cpus_have_final_cap(ARM64_HAS_NESTED_VIRT))
+ clear_bit(KVM_ARM_VCPU_HAS_EL2, &features);
+
+ return features;
+}
+
static int kvm_vcpu_init_check_features(struct kvm_vcpu *vcpu,
const struct kvm_vcpu_init *init)
{
@@ -1204,12 +1279,25 @@ static int kvm_vcpu_init_check_features(struct kvm_vcpu *vcpu,
return -ENOENT;
}
- if (!test_bit(KVM_ARM_VCPU_EL1_32BIT, &features))
- return 0;
+ if (features & ~system_supported_vcpu_features())
+ return -EINVAL;
- if (!cpus_have_const_cap(ARM64_HAS_32BIT_EL1))
+ /*
+ * For now make sure that both address/generic pointer authentication
+ * features are requested by the userspace together.
+ */
+ if (test_bit(KVM_ARM_VCPU_PTRAUTH_ADDRESS, &features) !=
+ test_bit(KVM_ARM_VCPU_PTRAUTH_GENERIC, &features))
return -EINVAL;
+ /* Disallow NV+SVE for the time being */
+ if (test_bit(KVM_ARM_VCPU_HAS_EL2, &features) &&
+ test_bit(KVM_ARM_VCPU_SVE, &features))
+ return -EINVAL;
+
+ if (!test_bit(KVM_ARM_VCPU_EL1_32BIT, &features))
+ return 0;
+
/* MTE is incompatible with AArch32 */
if (kvm_has_mte(vcpu->kvm))
return -EINVAL;
@@ -1226,7 +1314,23 @@ static bool kvm_vcpu_init_changed(struct kvm_vcpu *vcpu,
{
unsigned long features = init->features[0];
- return !bitmap_equal(vcpu->arch.features, &features, KVM_VCPU_MAX_FEATURES);
+ return !bitmap_equal(vcpu->kvm->arch.vcpu_features, &features,
+ KVM_VCPU_MAX_FEATURES);
+}
+
+static int kvm_setup_vcpu(struct kvm_vcpu *vcpu)
+{
+ struct kvm *kvm = vcpu->kvm;
+ int ret = 0;
+
+ /*
+ * When the vCPU has a PMU, but no PMU is set for the guest
+ * yet, set the default one.
+ */
+ if (kvm_vcpu_has_pmu(vcpu) && !kvm->arch.arm_pmu)
+ ret = kvm_arm_set_default_pmu(kvm);
+
+ return ret;
}
static int __kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
@@ -1239,21 +1343,21 @@ static int __kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
mutex_lock(&kvm->arch.config_lock);
if (test_bit(KVM_ARCH_FLAG_VCPU_FEATURES_CONFIGURED, &kvm->arch.flags) &&
- !bitmap_equal(kvm->arch.vcpu_features, &features, KVM_VCPU_MAX_FEATURES))
+ kvm_vcpu_init_changed(vcpu, init))
goto out_unlock;
- bitmap_copy(vcpu->arch.features, &features, KVM_VCPU_MAX_FEATURES);
+ bitmap_copy(kvm->arch.vcpu_features, &features, KVM_VCPU_MAX_FEATURES);
- /* Now we know what it is, we can reset it. */
- ret = kvm_reset_vcpu(vcpu);
- if (ret) {
- bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES);
+ ret = kvm_setup_vcpu(vcpu);
+ if (ret)
goto out_unlock;
- }
- bitmap_copy(kvm->arch.vcpu_features, &features, KVM_VCPU_MAX_FEATURES);
+ /* Now we know what it is, we can reset it. */
+ kvm_reset_vcpu(vcpu);
+
set_bit(KVM_ARCH_FLAG_VCPU_FEATURES_CONFIGURED, &kvm->arch.flags);
vcpu_set_flag(vcpu, VCPU_INITIALIZED);
+ ret = 0;
out_unlock:
mutex_unlock(&kvm->arch.config_lock);
return ret;
@@ -1278,7 +1382,8 @@ static int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
if (kvm_vcpu_init_changed(vcpu, init))
return -EINVAL;
- return kvm_reset_vcpu(vcpu);
+ kvm_reset_vcpu(vcpu);
+ return 0;
}
static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu,
@@ -1629,6 +1734,13 @@ int kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
return kvm_vm_set_attr(kvm, &attr);
}
+ case KVM_ARM_GET_REG_WRITABLE_MASKS: {
+ struct reg_mask_range range;
+
+ if (copy_from_user(&range, argp, sizeof(range)))
+ return -EFAULT;
+ return kvm_vm_ioctl_get_reg_writable_masks(kvm, &range);
+ }
default:
return -EINVAL;
}
@@ -2341,6 +2453,18 @@ struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr)
unsigned long i;
mpidr &= MPIDR_HWID_BITMASK;
+
+ if (kvm->arch.mpidr_data) {
+ u16 idx = kvm_mpidr_index(kvm->arch.mpidr_data, mpidr);
+
+ vcpu = kvm_get_vcpu(kvm,
+ kvm->arch.mpidr_data->cmpidr_to_idx[idx]);
+ if (mpidr != kvm_vcpu_get_mpidr_aff(vcpu))
+ vcpu = NULL;
+
+ return vcpu;
+ }
+
kvm_for_each_vcpu(i, vcpu, kvm) {
if (mpidr == kvm_vcpu_get_mpidr_aff(vcpu))
return vcpu;
diff --git a/arch/arm64/kvm/emulate-nested.c b/arch/arm64/kvm/emulate-nested.c
index ee902ff2a50f..06185216a297 100644
--- a/arch/arm64/kvm/emulate-nested.c
+++ b/arch/arm64/kvm/emulate-nested.c
@@ -648,15 +648,80 @@ static const struct encoding_to_trap_config encoding_to_cgt[] __initconst = {
SR_TRAP(SYS_APGAKEYLO_EL1, CGT_HCR_APK),
SR_TRAP(SYS_APGAKEYHI_EL1, CGT_HCR_APK),
/* All _EL2 registers */
- SR_RANGE_TRAP(sys_reg(3, 4, 0, 0, 0),
- sys_reg(3, 4, 3, 15, 7), CGT_HCR_NV),
+ SR_TRAP(SYS_BRBCR_EL2, CGT_HCR_NV),
+ SR_TRAP(SYS_VPIDR_EL2, CGT_HCR_NV),
+ SR_TRAP(SYS_VMPIDR_EL2, CGT_HCR_NV),
+ SR_TRAP(SYS_SCTLR_EL2, CGT_HCR_NV),
+ SR_TRAP(SYS_ACTLR_EL2, CGT_HCR_NV),
+ SR_TRAP(SYS_SCTLR2_EL2, CGT_HCR_NV),
+ SR_RANGE_TRAP(SYS_HCR_EL2,
+ SYS_HCRX_EL2, CGT_HCR_NV),
+ SR_TRAP(SYS_SMPRIMAP_EL2, CGT_HCR_NV),
+ SR_TRAP(SYS_SMCR_EL2, CGT_HCR_NV),
+ SR_RANGE_TRAP(SYS_TTBR0_EL2,
+ SYS_TCR2_EL2, CGT_HCR_NV),
+ SR_TRAP(SYS_VTTBR_EL2, CGT_HCR_NV),
+ SR_TRAP(SYS_VTCR_EL2, CGT_HCR_NV),
+ SR_TRAP(SYS_VNCR_EL2, CGT_HCR_NV),
+ SR_RANGE_TRAP(SYS_HDFGRTR_EL2,
+ SYS_HAFGRTR_EL2, CGT_HCR_NV),
/* Skip the SP_EL1 encoding... */
SR_TRAP(SYS_SPSR_EL2, CGT_HCR_NV),
SR_TRAP(SYS_ELR_EL2, CGT_HCR_NV),
- SR_RANGE_TRAP(sys_reg(3, 4, 4, 1, 1),
- sys_reg(3, 4, 10, 15, 7), CGT_HCR_NV),
- SR_RANGE_TRAP(sys_reg(3, 4, 12, 0, 0),
- sys_reg(3, 4, 14, 15, 7), CGT_HCR_NV),
+ /* Skip SPSR_irq, SPSR_abt, SPSR_und, SPSR_fiq */
+ SR_TRAP(SYS_AFSR0_EL2, CGT_HCR_NV),
+ SR_TRAP(SYS_AFSR1_EL2, CGT_HCR_NV),
+ SR_TRAP(SYS_ESR_EL2, CGT_HCR_NV),
+ SR_TRAP(SYS_VSESR_EL2, CGT_HCR_NV),
+ SR_TRAP(SYS_TFSR_EL2, CGT_HCR_NV),
+ SR_TRAP(SYS_FAR_EL2, CGT_HCR_NV),
+ SR_TRAP(SYS_HPFAR_EL2, CGT_HCR_NV),
+ SR_TRAP(SYS_PMSCR_EL2, CGT_HCR_NV),
+ SR_TRAP(SYS_MAIR_EL2, CGT_HCR_NV),
+ SR_TRAP(SYS_AMAIR_EL2, CGT_HCR_NV),
+ SR_TRAP(SYS_MPAMHCR_EL2, CGT_HCR_NV),
+ SR_TRAP(SYS_MPAMVPMV_EL2, CGT_HCR_NV),
+ SR_TRAP(SYS_MPAM2_EL2, CGT_HCR_NV),
+ SR_RANGE_TRAP(SYS_MPAMVPM0_EL2,
+ SYS_MPAMVPM7_EL2, CGT_HCR_NV),
+ /*
+ * Note that the spec. describes a group of MEC registers
+ * whose access should not trap, therefore skip the following:
+ * MECID_A0_EL2, MECID_A1_EL2, MECID_P0_EL2,
+ * MECID_P1_EL2, MECIDR_EL2, VMECID_A_EL2,
+ * VMECID_P_EL2.
+ */
+ SR_RANGE_TRAP(SYS_VBAR_EL2,
+ SYS_RMR_EL2, CGT_HCR_NV),
+ SR_TRAP(SYS_VDISR_EL2, CGT_HCR_NV),
+ /* ICH_AP0R<m>_EL2 */
+ SR_RANGE_TRAP(SYS_ICH_AP0R0_EL2,
+ SYS_ICH_AP0R3_EL2, CGT_HCR_NV),
+ /* ICH_AP1R<m>_EL2 */
+ SR_RANGE_TRAP(SYS_ICH_AP1R0_EL2,
+ SYS_ICH_AP1R3_EL2, CGT_HCR_NV),
+ SR_TRAP(SYS_ICC_SRE_EL2, CGT_HCR_NV),
+ SR_RANGE_TRAP(SYS_ICH_HCR_EL2,
+ SYS_ICH_EISR_EL2, CGT_HCR_NV),
+ SR_TRAP(SYS_ICH_ELRSR_EL2, CGT_HCR_NV),
+ SR_TRAP(SYS_ICH_VMCR_EL2, CGT_HCR_NV),
+ /* ICH_LR<m>_EL2 */
+ SR_RANGE_TRAP(SYS_ICH_LR0_EL2,
+ SYS_ICH_LR15_EL2, CGT_HCR_NV),
+ SR_TRAP(SYS_CONTEXTIDR_EL2, CGT_HCR_NV),
+ SR_TRAP(SYS_TPIDR_EL2, CGT_HCR_NV),
+ SR_TRAP(SYS_SCXTNUM_EL2, CGT_HCR_NV),
+ /* AMEVCNTVOFF0<n>_EL2, AMEVCNTVOFF1<n>_EL2 */
+ SR_RANGE_TRAP(SYS_AMEVCNTVOFF0n_EL2(0),
+ SYS_AMEVCNTVOFF1n_EL2(15), CGT_HCR_NV),
+ /* CNT*_EL2 */
+ SR_TRAP(SYS_CNTVOFF_EL2, CGT_HCR_NV),
+ SR_TRAP(SYS_CNTPOFF_EL2, CGT_HCR_NV),
+ SR_TRAP(SYS_CNTHCTL_EL2, CGT_HCR_NV),
+ SR_RANGE_TRAP(SYS_CNTHP_TVAL_EL2,
+ SYS_CNTHP_CVAL_EL2, CGT_HCR_NV),
+ SR_RANGE_TRAP(SYS_CNTHV_TVAL_EL2,
+ SYS_CNTHV_CVAL_EL2, CGT_HCR_NV),
/* All _EL02, _EL12 registers */
SR_RANGE_TRAP(sys_reg(3, 5, 0, 0, 0),
sys_reg(3, 5, 10, 15, 7), CGT_HCR_NV),
diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h
index 9cfe6bd1dbe4..f99d8af0b9af 100644
--- a/arch/arm64/kvm/hyp/include/hyp/switch.h
+++ b/arch/arm64/kvm/hyp/include/hyp/switch.h
@@ -30,6 +30,7 @@
#include <asm/fpsimd.h>
#include <asm/debug-monitors.h>
#include <asm/processor.h>
+#include <asm/traps.h>
struct kvm_exception_table_entry {
int insn, fixup;
@@ -265,6 +266,22 @@ static inline bool __populate_fault_info(struct kvm_vcpu *vcpu)
return __get_fault_info(vcpu->arch.fault.esr_el2, &vcpu->arch.fault);
}
+static bool kvm_hyp_handle_mops(struct kvm_vcpu *vcpu, u64 *exit_code)
+{
+ *vcpu_pc(vcpu) = read_sysreg_el2(SYS_ELR);
+ arm64_mops_reset_regs(vcpu_gp_regs(vcpu), vcpu->arch.fault.esr_el2);
+ write_sysreg_el2(*vcpu_pc(vcpu), SYS_ELR);
+
+ /*
+ * Finish potential single step before executing the prologue
+ * instruction.
+ */
+ *vcpu_cpsr(vcpu) &= ~DBG_SPSR_SS;
+ write_sysreg_el2(*vcpu_cpsr(vcpu), SYS_SPSR);
+
+ return true;
+}
+
static inline void __hyp_sve_restore_guest(struct kvm_vcpu *vcpu)
{
sve_cond_update_zcr_vq(vcpu_sve_max_vq(vcpu) - 1, SYS_ZCR_EL2);
diff --git a/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h b/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h
index 37440e1dda93..e91922daa8ca 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h
@@ -197,7 +197,8 @@
#define PVM_ID_AA64ISAR2_ALLOW (\
ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_GPA3) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_APA3) \
+ ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_APA3) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_MOPS) \
)
u64 pvm_read_id_reg(const struct kvm_vcpu *vcpu, u32 id);
diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
index 9d703441278b..8d0a5834e883 100644
--- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
+++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
@@ -129,8 +129,8 @@ static void prepare_host_vtcr(void)
parange = kvm_get_parange(id_aa64mmfr0_el1_sys_val);
phys_shift = id_aa64mmfr0_parange_to_phys_shift(parange);
- host_mmu.arch.vtcr = kvm_get_vtcr(id_aa64mmfr0_el1_sys_val,
- id_aa64mmfr1_el1_sys_val, phys_shift);
+ host_mmu.arch.mmu.vtcr = kvm_get_vtcr(id_aa64mmfr0_el1_sys_val,
+ id_aa64mmfr1_el1_sys_val, phys_shift);
}
static bool host_stage2_force_pte_cb(u64 addr, u64 end, enum kvm_pgtable_prot prot);
@@ -235,7 +235,7 @@ int kvm_guest_prepare_stage2(struct pkvm_hyp_vm *vm, void *pgd)
unsigned long nr_pages;
int ret;
- nr_pages = kvm_pgtable_stage2_pgd_size(vm->kvm.arch.vtcr) >> PAGE_SHIFT;
+ nr_pages = kvm_pgtable_stage2_pgd_size(mmu->vtcr) >> PAGE_SHIFT;
ret = hyp_pool_init(&vm->pool, hyp_virt_to_pfn(pgd), nr_pages, 0);
if (ret)
return ret;
@@ -295,7 +295,7 @@ int __pkvm_prot_finalize(void)
return -EPERM;
params->vttbr = kvm_get_vttbr(mmu);
- params->vtcr = host_mmu.arch.vtcr;
+ params->vtcr = mmu->vtcr;
params->hcr_el2 |= HCR_VM;
/*
diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c
index 8033ef353a5d..9d23a51d7f75 100644
--- a/arch/arm64/kvm/hyp/nvhe/pkvm.c
+++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c
@@ -303,7 +303,7 @@ static void init_pkvm_hyp_vm(struct kvm *host_kvm, struct pkvm_hyp_vm *hyp_vm,
{
hyp_vm->host_kvm = host_kvm;
hyp_vm->kvm.created_vcpus = nr_vcpus;
- hyp_vm->kvm.arch.vtcr = host_mmu.arch.vtcr;
+ hyp_vm->kvm.arch.mmu.vtcr = host_mmu.arch.mmu.vtcr;
}
static int init_pkvm_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu,
@@ -483,7 +483,7 @@ int __pkvm_init_vm(struct kvm *host_kvm, unsigned long vm_hva,
}
vm_size = pkvm_get_hyp_vm_size(nr_vcpus);
- pgd_size = kvm_pgtable_stage2_pgd_size(host_mmu.arch.vtcr);
+ pgd_size = kvm_pgtable_stage2_pgd_size(host_mmu.arch.mmu.vtcr);
ret = -ENOMEM;
diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c
index c353a06ee7e6..c50f8459e4fc 100644
--- a/arch/arm64/kvm/hyp/nvhe/switch.c
+++ b/arch/arm64/kvm/hyp/nvhe/switch.c
@@ -192,6 +192,7 @@ static const exit_handler_fn hyp_exit_handlers[] = {
[ESR_ELx_EC_DABT_LOW] = kvm_hyp_handle_dabt_low,
[ESR_ELx_EC_WATCHPT_LOW] = kvm_hyp_handle_watchpt_low,
[ESR_ELx_EC_PAC] = kvm_hyp_handle_ptrauth,
+ [ESR_ELx_EC_MOPS] = kvm_hyp_handle_mops,
};
static const exit_handler_fn pvm_exit_handlers[] = {
@@ -203,6 +204,7 @@ static const exit_handler_fn pvm_exit_handlers[] = {
[ESR_ELx_EC_DABT_LOW] = kvm_hyp_handle_dabt_low,
[ESR_ELx_EC_WATCHPT_LOW] = kvm_hyp_handle_watchpt_low,
[ESR_ELx_EC_PAC] = kvm_hyp_handle_ptrauth,
+ [ESR_ELx_EC_MOPS] = kvm_hyp_handle_mops,
};
static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm_vcpu *vcpu)
diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index f155b8c9e98c..66b30ef88434 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -1314,7 +1314,7 @@ int kvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr,
ret = stage2_update_leaf_attrs(pgt, addr, 1, set, clr, NULL, &level,
KVM_PGTABLE_WALK_HANDLE_FAULT |
KVM_PGTABLE_WALK_SHARED);
- if (!ret)
+ if (!ret || ret == -EAGAIN)
kvm_call_hyp(__kvm_tlb_flush_vmid_ipa_nsh, pgt->mmu, addr, level);
return ret;
}
@@ -1511,7 +1511,7 @@ int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu,
kvm_pgtable_force_pte_cb_t force_pte_cb)
{
size_t pgd_sz;
- u64 vtcr = mmu->arch->vtcr;
+ u64 vtcr = mmu->vtcr;
u32 ia_bits = VTCR_EL2_IPA(vtcr);
u32 sl0 = FIELD_GET(VTCR_EL2_SL0_MASK, vtcr);
u32 start_level = VTCR_EL2_TGRAN_SL0_BASE - sl0;
diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c
index 448b17080d36..1581df6aec87 100644
--- a/arch/arm64/kvm/hyp/vhe/switch.c
+++ b/arch/arm64/kvm/hyp/vhe/switch.c
@@ -137,12 +137,12 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu)
NOKPROBE_SYMBOL(__deactivate_traps);
/*
- * Disable IRQs in {activate,deactivate}_traps_vhe_{load,put}() to
+ * Disable IRQs in __vcpu_{load,put}_{activate,deactivate}_traps() to
* prevent a race condition between context switching of PMUSERENR_EL0
* in __{activate,deactivate}_traps_common() and IPIs that attempts to
* update PMUSERENR_EL0. See also kvm_set_pmuserenr().
*/
-void activate_traps_vhe_load(struct kvm_vcpu *vcpu)
+static void __vcpu_load_activate_traps(struct kvm_vcpu *vcpu)
{
unsigned long flags;
@@ -151,7 +151,7 @@ void activate_traps_vhe_load(struct kvm_vcpu *vcpu)
local_irq_restore(flags);
}
-void deactivate_traps_vhe_put(struct kvm_vcpu *vcpu)
+static void __vcpu_put_deactivate_traps(struct kvm_vcpu *vcpu)
{
unsigned long flags;
@@ -160,6 +160,19 @@ void deactivate_traps_vhe_put(struct kvm_vcpu *vcpu)
local_irq_restore(flags);
}
+void kvm_vcpu_load_vhe(struct kvm_vcpu *vcpu)
+{
+ __vcpu_load_switch_sysregs(vcpu);
+ __vcpu_load_activate_traps(vcpu);
+ __load_stage2(vcpu->arch.hw_mmu, vcpu->arch.hw_mmu->arch);
+}
+
+void kvm_vcpu_put_vhe(struct kvm_vcpu *vcpu)
+{
+ __vcpu_put_deactivate_traps(vcpu);
+ __vcpu_put_switch_sysregs(vcpu);
+}
+
static const exit_handler_fn hyp_exit_handlers[] = {
[0 ... ESR_ELx_EC_MAX] = NULL,
[ESR_ELx_EC_CP15_32] = kvm_hyp_handle_cp15_32,
@@ -170,6 +183,7 @@ static const exit_handler_fn hyp_exit_handlers[] = {
[ESR_ELx_EC_DABT_LOW] = kvm_hyp_handle_dabt_low,
[ESR_ELx_EC_WATCHPT_LOW] = kvm_hyp_handle_watchpt_low,
[ESR_ELx_EC_PAC] = kvm_hyp_handle_ptrauth,
+ [ESR_ELx_EC_MOPS] = kvm_hyp_handle_mops,
};
static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm_vcpu *vcpu)
@@ -214,17 +228,11 @@ static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
sysreg_save_host_state_vhe(host_ctxt);
/*
- * ARM erratum 1165522 requires us to configure both stage 1 and
- * stage 2 translation for the guest context before we clear
- * HCR_EL2.TGE.
- *
- * We have already configured the guest's stage 1 translation in
- * kvm_vcpu_load_sysregs_vhe above. We must now call
- * __load_stage2 before __activate_traps, because
- * __load_stage2 configures stage 2 translation, and
- * __activate_traps clear HCR_EL2.TGE (among other things).
+ * Note that ARM erratum 1165522 requires us to configure both stage 1
+ * and stage 2 translation for the guest context before we clear
+ * HCR_EL2.TGE. The stage 1 and stage 2 guest context has already been
+ * loaded on the CPU in kvm_vcpu_load_vhe().
*/
- __load_stage2(vcpu->arch.hw_mmu, vcpu->arch.hw_mmu->arch);
__activate_traps(vcpu);
__kvm_adjust_pc(vcpu);
diff --git a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c
index b35a178e7e0d..8e1e0d5033b6 100644
--- a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c
+++ b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c
@@ -52,7 +52,7 @@ void sysreg_restore_guest_state_vhe(struct kvm_cpu_context *ctxt)
NOKPROBE_SYMBOL(sysreg_restore_guest_state_vhe);
/**
- * kvm_vcpu_load_sysregs_vhe - Load guest system registers to the physical CPU
+ * __vcpu_load_switch_sysregs - Load guest system registers to the physical CPU
*
* @vcpu: The VCPU pointer
*
@@ -62,7 +62,7 @@ NOKPROBE_SYMBOL(sysreg_restore_guest_state_vhe);
* and loading system register state early avoids having to load them on
* every entry to the VM.
*/
-void kvm_vcpu_load_sysregs_vhe(struct kvm_vcpu *vcpu)
+void __vcpu_load_switch_sysregs(struct kvm_vcpu *vcpu)
{
struct kvm_cpu_context *guest_ctxt = &vcpu->arch.ctxt;
struct kvm_cpu_context *host_ctxt;
@@ -92,12 +92,10 @@ void kvm_vcpu_load_sysregs_vhe(struct kvm_vcpu *vcpu)
__sysreg_restore_el1_state(guest_ctxt);
vcpu_set_flag(vcpu, SYSREGS_ON_CPU);
-
- activate_traps_vhe_load(vcpu);
}
/**
- * kvm_vcpu_put_sysregs_vhe - Restore host system registers to the physical CPU
+ * __vcpu_put_switch_syregs - Restore host system registers to the physical CPU
*
* @vcpu: The VCPU pointer
*
@@ -107,13 +105,12 @@ void kvm_vcpu_load_sysregs_vhe(struct kvm_vcpu *vcpu)
* and deferring saving system register state until we're no longer running the
* VCPU avoids having to save them on every exit from the VM.
*/
-void kvm_vcpu_put_sysregs_vhe(struct kvm_vcpu *vcpu)
+void __vcpu_put_switch_sysregs(struct kvm_vcpu *vcpu)
{
struct kvm_cpu_context *guest_ctxt = &vcpu->arch.ctxt;
struct kvm_cpu_context *host_ctxt;
host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
- deactivate_traps_vhe_put(vcpu);
__sysreg_save_el1_state(guest_ctxt);
__sysreg_save_user_state(guest_ctxt);
diff --git a/arch/arm64/kvm/hyp/vhe/tlb.c b/arch/arm64/kvm/hyp/vhe/tlb.c
index 46bd43f61d76..b636b4111dbf 100644
--- a/arch/arm64/kvm/hyp/vhe/tlb.c
+++ b/arch/arm64/kvm/hyp/vhe/tlb.c
@@ -11,18 +11,25 @@
#include <asm/tlbflush.h>
struct tlb_inv_context {
- unsigned long flags;
- u64 tcr;
- u64 sctlr;
+ struct kvm_s2_mmu *mmu;
+ unsigned long flags;
+ u64 tcr;
+ u64 sctlr;
};
static void __tlb_switch_to_guest(struct kvm_s2_mmu *mmu,
struct tlb_inv_context *cxt)
{
+ struct kvm_vcpu *vcpu = kvm_get_running_vcpu();
u64 val;
local_irq_save(cxt->flags);
+ if (vcpu && mmu != vcpu->arch.hw_mmu)
+ cxt->mmu = vcpu->arch.hw_mmu;
+ else
+ cxt->mmu = NULL;
+
if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) {
/*
* For CPUs that are affected by ARM errata 1165522 or 1530923,
@@ -66,10 +73,13 @@ static void __tlb_switch_to_host(struct tlb_inv_context *cxt)
* We're done with the TLB operation, let's restore the host's
* view of HCR_EL2.
*/
- write_sysreg(0, vttbr_el2);
write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2);
isb();
+ /* ... and the stage-2 MMU context that we switched away from */
+ if (cxt->mmu)
+ __load_stage2(cxt->mmu, cxt->mmu->arch);
+
if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) {
/* Restore the registers to what they were */
write_sysreg_el1(cxt->tcr, SYS_TCR);
diff --git a/arch/arm64/kvm/hypercalls.c b/arch/arm64/kvm/hypercalls.c
index 7fb4df0456de..5763d979d8ca 100644
--- a/arch/arm64/kvm/hypercalls.c
+++ b/arch/arm64/kvm/hypercalls.c
@@ -133,12 +133,10 @@ static bool kvm_smccc_test_fw_bmap(struct kvm_vcpu *vcpu, u32 func_id)
ARM_SMCCC_SMC_64, \
0, ARM_SMCCC_FUNC_MASK)
-static void init_smccc_filter(struct kvm *kvm)
+static int kvm_smccc_filter_insert_reserved(struct kvm *kvm)
{
int r;
- mt_init(&kvm->arch.smccc_filter);
-
/*
* Prevent userspace from handling any SMCCC calls in the architecture
* range, avoiding the risk of misrepresenting Spectre mitigation status
@@ -148,14 +146,25 @@ static void init_smccc_filter(struct kvm *kvm)
SMC32_ARCH_RANGE_BEGIN, SMC32_ARCH_RANGE_END,
xa_mk_value(KVM_SMCCC_FILTER_HANDLE),
GFP_KERNEL_ACCOUNT);
- WARN_ON_ONCE(r);
+ if (r)
+ goto out_destroy;
r = mtree_insert_range(&kvm->arch.smccc_filter,
SMC64_ARCH_RANGE_BEGIN, SMC64_ARCH_RANGE_END,
xa_mk_value(KVM_SMCCC_FILTER_HANDLE),
GFP_KERNEL_ACCOUNT);
- WARN_ON_ONCE(r);
+ if (r)
+ goto out_destroy;
+ return 0;
+out_destroy:
+ mtree_destroy(&kvm->arch.smccc_filter);
+ return r;
+}
+
+static bool kvm_smccc_filter_configured(struct kvm *kvm)
+{
+ return !mtree_empty(&kvm->arch.smccc_filter);
}
static int kvm_smccc_set_filter(struct kvm *kvm, struct kvm_smccc_filter __user *uaddr)
@@ -184,13 +193,14 @@ static int kvm_smccc_set_filter(struct kvm *kvm, struct kvm_smccc_filter __user
goto out_unlock;
}
+ if (!kvm_smccc_filter_configured(kvm)) {
+ r = kvm_smccc_filter_insert_reserved(kvm);
+ if (WARN_ON_ONCE(r))
+ goto out_unlock;
+ }
+
r = mtree_insert_range(&kvm->arch.smccc_filter, start, end,
xa_mk_value(filter.action), GFP_KERNEL_ACCOUNT);
- if (r)
- goto out_unlock;
-
- set_bit(KVM_ARCH_FLAG_SMCCC_FILTER_CONFIGURED, &kvm->arch.flags);
-
out_unlock:
mutex_unlock(&kvm->arch.config_lock);
return r;
@@ -201,7 +211,7 @@ static u8 kvm_smccc_filter_get_action(struct kvm *kvm, u32 func_id)
unsigned long idx = func_id;
void *val;
- if (!test_bit(KVM_ARCH_FLAG_SMCCC_FILTER_CONFIGURED, &kvm->arch.flags))
+ if (!kvm_smccc_filter_configured(kvm))
return KVM_SMCCC_FILTER_HANDLE;
/*
@@ -387,7 +397,7 @@ void kvm_arm_init_hypercalls(struct kvm *kvm)
smccc_feat->std_hyp_bmap = KVM_ARM_SMCCC_STD_HYP_FEATURES;
smccc_feat->vendor_hyp_bmap = KVM_ARM_SMCCC_VENDOR_HYP_FEATURES;
- init_smccc_filter(kvm);
+ mt_init(&kvm->arch.smccc_filter);
}
void kvm_arm_teardown_hypercalls(struct kvm *kvm)
@@ -554,7 +564,7 @@ int kvm_arm_set_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
{
bool wants_02;
- wants_02 = test_bit(KVM_ARM_VCPU_PSCI_0_2, vcpu->arch.features);
+ wants_02 = vcpu_has_feature(vcpu, KVM_ARM_VCPU_PSCI_0_2);
switch (val) {
case KVM_ARM_PSCI_0_1:
diff --git a/arch/arm64/kvm/mmio.c b/arch/arm64/kvm/mmio.c
index 3dd38a151d2a..200c8019a82a 100644
--- a/arch/arm64/kvm/mmio.c
+++ b/arch/arm64/kvm/mmio.c
@@ -135,6 +135,9 @@ int io_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa)
* volunteered to do so, and bail out otherwise.
*/
if (!kvm_vcpu_dabt_isvalid(vcpu)) {
+ trace_kvm_mmio_nisv(*vcpu_pc(vcpu), kvm_vcpu_get_esr(vcpu),
+ kvm_vcpu_get_hfar(vcpu), fault_ipa);
+
if (test_bit(KVM_ARCH_FLAG_RETURN_NISV_IO_ABORT_TO_USER,
&vcpu->kvm->arch.flags)) {
run->exit_reason = KVM_EXIT_ARM_NISV;
@@ -143,7 +146,6 @@ int io_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa)
return 0;
}
- kvm_pr_unimpl("Data abort outside memslots with no valid syndrome info\n");
return -ENOSYS;
}
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index 482280fe22d7..4e41ceed5468 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -892,7 +892,7 @@ int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu, unsigned long t
mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
mmfr1 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1);
- kvm->arch.vtcr = kvm_get_vtcr(mmfr0, mmfr1, phys_shift);
+ mmu->vtcr = kvm_get_vtcr(mmfr0, mmfr1, phys_shift);
if (mmu->pgt != NULL) {
kvm_err("kvm_arch already initialized?\n");
@@ -1067,7 +1067,8 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
phys_addr_t addr;
int ret = 0;
struct kvm_mmu_memory_cache cache = { .gfp_zero = __GFP_ZERO };
- struct kvm_pgtable *pgt = kvm->arch.mmu.pgt;
+ struct kvm_s2_mmu *mmu = &kvm->arch.mmu;
+ struct kvm_pgtable *pgt = mmu->pgt;
enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_DEVICE |
KVM_PGTABLE_PROT_R |
(writable ? KVM_PGTABLE_PROT_W : 0);
@@ -1080,7 +1081,7 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
for (addr = guest_ipa; addr < guest_ipa + size; addr += PAGE_SIZE) {
ret = kvm_mmu_topup_memory_cache(&cache,
- kvm_mmu_cache_min_pages(kvm));
+ kvm_mmu_cache_min_pages(mmu));
if (ret)
break;
@@ -1298,28 +1299,8 @@ transparent_hugepage_adjust(struct kvm *kvm, struct kvm_memory_slot *memslot,
if (sz < PMD_SIZE)
return PAGE_SIZE;
- /*
- * The address we faulted on is backed by a transparent huge
- * page. However, because we map the compound huge page and
- * not the individual tail page, we need to transfer the
- * refcount to the head page. We have to be careful that the
- * THP doesn't start to split while we are adjusting the
- * refcounts.
- *
- * We are sure this doesn't happen, because mmu_invalidate_retry
- * was successful and we are holding the mmu_lock, so if this
- * THP is trying to split, it will be blocked in the mmu
- * notifier before touching any of the pages, specifically
- * before being able to call __split_huge_page_refcount().
- *
- * We can therefore safely transfer the refcount from PG_tail
- * to PG_head and switch the pfn from a tail page to the head
- * page accordingly.
- */
*ipap &= PMD_MASK;
- kvm_release_pfn_clean(pfn);
pfn &= ~(PTRS_PER_PMD - 1);
- get_page(pfn_to_page(pfn));
*pfnp = pfn;
return PMD_SIZE;
@@ -1431,7 +1412,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
if (fault_status != ESR_ELx_FSC_PERM ||
(logging_active && write_fault)) {
ret = kvm_mmu_topup_memory_cache(memcache,
- kvm_mmu_cache_min_pages(kvm));
+ kvm_mmu_cache_min_pages(vcpu->arch.hw_mmu));
if (ret)
return ret;
}
@@ -1747,7 +1728,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
}
/* Userspace should not be able to register out-of-bounds IPAs */
- VM_BUG_ON(fault_ipa >= kvm_phys_size(vcpu->kvm));
+ VM_BUG_ON(fault_ipa >= kvm_phys_size(vcpu->arch.hw_mmu));
if (fault_status == ESR_ELx_FSC_ACCESS) {
handle_access_fault(vcpu, fault_ipa);
@@ -2021,7 +2002,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
* Prevent userspace from creating a memory region outside of the IPA
* space addressable by the KVM guest IPA space.
*/
- if ((new->base_gfn + new->npages) > (kvm_phys_size(kvm) >> PAGE_SHIFT))
+ if ((new->base_gfn + new->npages) > (kvm_phys_size(&kvm->arch.mmu) >> PAGE_SHIFT))
return -EFAULT;
hva = new->userspace_addr;
diff --git a/arch/arm64/kvm/pkvm.c b/arch/arm64/kvm/pkvm.c
index 6ff3ec18c925..8350fb8fee0b 100644
--- a/arch/arm64/kvm/pkvm.c
+++ b/arch/arm64/kvm/pkvm.c
@@ -123,7 +123,7 @@ static int __pkvm_create_hyp_vm(struct kvm *host_kvm)
if (host_kvm->created_vcpus < 1)
return -EINVAL;
- pgd_sz = kvm_pgtable_stage2_pgd_size(host_kvm->arch.vtcr);
+ pgd_sz = kvm_pgtable_stage2_pgd_size(host_kvm->arch.mmu.vtcr);
/*
* The PGD pages will be reclaimed using a hyp_memcache which implies
diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c
index 6b066e04dc5d..fe99b3dab6ce 100644
--- a/arch/arm64/kvm/pmu-emul.c
+++ b/arch/arm64/kvm/pmu-emul.c
@@ -60,6 +60,23 @@ static u32 kvm_pmu_event_mask(struct kvm *kvm)
return __kvm_pmu_event_mask(pmuver);
}
+u64 kvm_pmu_evtyper_mask(struct kvm *kvm)
+{
+ u64 mask = ARMV8_PMU_EXCLUDE_EL1 | ARMV8_PMU_EXCLUDE_EL0 |
+ kvm_pmu_event_mask(kvm);
+ u64 pfr0 = IDREG(kvm, SYS_ID_AA64PFR0_EL1);
+
+ if (SYS_FIELD_GET(ID_AA64PFR0_EL1, EL2, pfr0))
+ mask |= ARMV8_PMU_INCLUDE_EL2;
+
+ if (SYS_FIELD_GET(ID_AA64PFR0_EL1, EL3, pfr0))
+ mask |= ARMV8_PMU_EXCLUDE_NS_EL0 |
+ ARMV8_PMU_EXCLUDE_NS_EL1 |
+ ARMV8_PMU_EXCLUDE_EL3;
+
+ return mask;
+}
+
/**
* kvm_pmc_is_64bit - determine if counter is 64bit
* @pmc: counter context
@@ -72,7 +89,7 @@ static bool kvm_pmc_is_64bit(struct kvm_pmc *pmc)
static bool kvm_pmc_has_64bit_overflow(struct kvm_pmc *pmc)
{
- u64 val = __vcpu_sys_reg(kvm_pmc_to_vcpu(pmc), PMCR_EL0);
+ u64 val = kvm_vcpu_read_pmcr(kvm_pmc_to_vcpu(pmc));
return (pmc->idx < ARMV8_PMU_CYCLE_IDX && (val & ARMV8_PMU_PMCR_LP)) ||
(pmc->idx == ARMV8_PMU_CYCLE_IDX && (val & ARMV8_PMU_PMCR_LC));
@@ -250,7 +267,7 @@ void kvm_pmu_vcpu_destroy(struct kvm_vcpu *vcpu)
u64 kvm_pmu_valid_counter_mask(struct kvm_vcpu *vcpu)
{
- u64 val = __vcpu_sys_reg(vcpu, PMCR_EL0) >> ARMV8_PMU_PMCR_N_SHIFT;
+ u64 val = kvm_vcpu_read_pmcr(vcpu) >> ARMV8_PMU_PMCR_N_SHIFT;
val &= ARMV8_PMU_PMCR_N_MASK;
if (val == 0)
@@ -272,7 +289,7 @@ void kvm_pmu_enable_counter_mask(struct kvm_vcpu *vcpu, u64 val)
if (!kvm_vcpu_has_pmu(vcpu))
return;
- if (!(__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E) || !val)
+ if (!(kvm_vcpu_read_pmcr(vcpu) & ARMV8_PMU_PMCR_E) || !val)
return;
for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) {
@@ -324,7 +341,7 @@ static u64 kvm_pmu_overflow_status(struct kvm_vcpu *vcpu)
{
u64 reg = 0;
- if ((__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E)) {
+ if ((kvm_vcpu_read_pmcr(vcpu) & ARMV8_PMU_PMCR_E)) {
reg = __vcpu_sys_reg(vcpu, PMOVSSET_EL0);
reg &= __vcpu_sys_reg(vcpu, PMCNTENSET_EL0);
reg &= __vcpu_sys_reg(vcpu, PMINTENSET_EL1);
@@ -348,7 +365,7 @@ static void kvm_pmu_update_state(struct kvm_vcpu *vcpu)
pmu->irq_level = overflow;
if (likely(irqchip_in_kernel(vcpu->kvm))) {
- int ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id,
+ int ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu,
pmu->irq_num, overflow, pmu);
WARN_ON(ret);
}
@@ -426,7 +443,7 @@ static void kvm_pmu_counter_increment(struct kvm_vcpu *vcpu,
{
int i;
- if (!(__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E))
+ if (!(kvm_vcpu_read_pmcr(vcpu) & ARMV8_PMU_PMCR_E))
return;
/* Weed out disabled counters */
@@ -569,7 +586,7 @@ void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val)
static bool kvm_pmu_counter_is_enabled(struct kvm_pmc *pmc)
{
struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc);
- return (__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E) &&
+ return (kvm_vcpu_read_pmcr(vcpu) & ARMV8_PMU_PMCR_E) &&
(__vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & BIT(pmc->idx));
}
@@ -584,6 +601,7 @@ static void kvm_pmu_create_perf_event(struct kvm_pmc *pmc)
struct perf_event *event;
struct perf_event_attr attr;
u64 eventsel, reg, data;
+ bool p, u, nsk, nsu;
reg = counter_index_to_evtreg(pmc->idx);
data = __vcpu_sys_reg(vcpu, reg);
@@ -610,13 +628,18 @@ static void kvm_pmu_create_perf_event(struct kvm_pmc *pmc)
!test_bit(eventsel, vcpu->kvm->arch.pmu_filter))
return;
+ p = data & ARMV8_PMU_EXCLUDE_EL1;
+ u = data & ARMV8_PMU_EXCLUDE_EL0;
+ nsk = data & ARMV8_PMU_EXCLUDE_NS_EL1;
+ nsu = data & ARMV8_PMU_EXCLUDE_NS_EL0;
+
memset(&attr, 0, sizeof(struct perf_event_attr));
attr.type = arm_pmu->pmu.type;
attr.size = sizeof(attr);
attr.pinned = 1;
attr.disabled = !kvm_pmu_counter_is_enabled(pmc);
- attr.exclude_user = data & ARMV8_PMU_EXCLUDE_EL0 ? 1 : 0;
- attr.exclude_kernel = data & ARMV8_PMU_EXCLUDE_EL1 ? 1 : 0;
+ attr.exclude_user = (u != nsu);
+ attr.exclude_kernel = (p != nsk);
attr.exclude_hv = 1; /* Don't count EL2 events */
attr.exclude_host = 1; /* Don't count host events */
attr.config = eventsel;
@@ -657,18 +680,13 @@ void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data,
u64 select_idx)
{
struct kvm_pmc *pmc = kvm_vcpu_idx_to_pmc(vcpu, select_idx);
- u64 reg, mask;
+ u64 reg;
if (!kvm_vcpu_has_pmu(vcpu))
return;
- mask = ARMV8_PMU_EVTYPE_MASK;
- mask &= ~ARMV8_PMU_EVTYPE_EVENT;
- mask |= kvm_pmu_event_mask(vcpu->kvm);
-
reg = counter_index_to_evtreg(pmc->idx);
-
- __vcpu_sys_reg(vcpu, reg) = data & mask;
+ __vcpu_sys_reg(vcpu, reg) = data & kvm_pmu_evtyper_mask(vcpu->kvm);
kvm_pmu_create_perf_event(pmc);
}
@@ -717,10 +735,9 @@ static struct arm_pmu *kvm_pmu_probe_armpmu(void)
* It is still necessary to get a valid cpu, though, to probe for the
* default PMU instance as userspace is not required to specify a PMU
* type. In order to uphold the preexisting behavior KVM selects the
- * PMU instance for the core where the first call to the
- * KVM_ARM_VCPU_PMU_V3_CTRL attribute group occurs. A dependent use case
- * would be a user with disdain of all things big.LITTLE that affines
- * the VMM to a particular cluster of cores.
+ * PMU instance for the core during vcpu init. A dependent use
+ * case would be a user with disdain of all things big.LITTLE that
+ * affines the VMM to a particular cluster of cores.
*
* In any case, userspace should just do the sane thing and use the UAPI
* to select a PMU type directly. But, be wary of the baggage being
@@ -786,6 +803,17 @@ u64 kvm_pmu_get_pmceid(struct kvm_vcpu *vcpu, bool pmceid1)
return val & mask;
}
+void kvm_vcpu_reload_pmu(struct kvm_vcpu *vcpu)
+{
+ u64 mask = kvm_pmu_valid_counter_mask(vcpu);
+
+ kvm_pmu_handle_pmcr(vcpu, kvm_vcpu_read_pmcr(vcpu));
+
+ __vcpu_sys_reg(vcpu, PMOVSSET_EL0) &= mask;
+ __vcpu_sys_reg(vcpu, PMINTENSET_EL1) &= mask;
+ __vcpu_sys_reg(vcpu, PMCNTENSET_EL0) &= mask;
+}
+
int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu)
{
if (!kvm_vcpu_has_pmu(vcpu))
@@ -874,6 +902,52 @@ static bool pmu_irq_is_valid(struct kvm *kvm, int irq)
return true;
}
+/**
+ * kvm_arm_pmu_get_max_counters - Return the max number of PMU counters.
+ * @kvm: The kvm pointer
+ */
+u8 kvm_arm_pmu_get_max_counters(struct kvm *kvm)
+{
+ struct arm_pmu *arm_pmu = kvm->arch.arm_pmu;
+
+ /*
+ * The arm_pmu->num_events considers the cycle counter as well.
+ * Ignore that and return only the general-purpose counters.
+ */
+ return arm_pmu->num_events - 1;
+}
+
+static void kvm_arm_set_pmu(struct kvm *kvm, struct arm_pmu *arm_pmu)
+{
+ lockdep_assert_held(&kvm->arch.config_lock);
+
+ kvm->arch.arm_pmu = arm_pmu;
+ kvm->arch.pmcr_n = kvm_arm_pmu_get_max_counters(kvm);
+}
+
+/**
+ * kvm_arm_set_default_pmu - No PMU set, get the default one.
+ * @kvm: The kvm pointer
+ *
+ * The observant among you will notice that the supported_cpus
+ * mask does not get updated for the default PMU even though it
+ * is quite possible the selected instance supports only a
+ * subset of cores in the system. This is intentional, and
+ * upholds the preexisting behavior on heterogeneous systems
+ * where vCPUs can be scheduled on any core but the guest
+ * counters could stop working.
+ */
+int kvm_arm_set_default_pmu(struct kvm *kvm)
+{
+ struct arm_pmu *arm_pmu = kvm_pmu_probe_armpmu();
+
+ if (!arm_pmu)
+ return -ENODEV;
+
+ kvm_arm_set_pmu(kvm, arm_pmu);
+ return 0;
+}
+
static int kvm_arm_pmu_v3_set_pmu(struct kvm_vcpu *vcpu, int pmu_id)
{
struct kvm *kvm = vcpu->kvm;
@@ -893,7 +967,7 @@ static int kvm_arm_pmu_v3_set_pmu(struct kvm_vcpu *vcpu, int pmu_id)
break;
}
- kvm->arch.arm_pmu = arm_pmu;
+ kvm_arm_set_pmu(kvm, arm_pmu);
cpumask_copy(kvm->arch.supported_cpus, &arm_pmu->supported_cpus);
ret = 0;
break;
@@ -916,23 +990,6 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
if (vcpu->arch.pmu.created)
return -EBUSY;
- if (!kvm->arch.arm_pmu) {
- /*
- * No PMU set, get the default one.
- *
- * The observant among you will notice that the supported_cpus
- * mask does not get updated for the default PMU even though it
- * is quite possible the selected instance supports only a
- * subset of cores in the system. This is intentional, and
- * upholds the preexisting behavior on heterogeneous systems
- * where vCPUs can be scheduled on any core but the guest
- * counters could stop working.
- */
- kvm->arch.arm_pmu = kvm_pmu_probe_armpmu();
- if (!kvm->arch.arm_pmu)
- return -ENODEV;
- }
-
switch (attr->attr) {
case KVM_ARM_VCPU_PMU_V3_IRQ: {
int __user *uaddr = (int __user *)(long)attr->addr;
@@ -1072,3 +1129,15 @@ u8 kvm_arm_pmu_get_pmuver_limit(void)
ID_AA64DFR0_EL1_PMUVer_V3P5);
return FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer), tmp);
}
+
+/**
+ * kvm_vcpu_read_pmcr - Read PMCR_EL0 register for the vCPU
+ * @vcpu: The vcpu pointer
+ */
+u64 kvm_vcpu_read_pmcr(struct kvm_vcpu *vcpu)
+{
+ u64 pmcr = __vcpu_sys_reg(vcpu, PMCR_EL0) &
+ ~(ARMV8_PMU_PMCR_N_MASK << ARMV8_PMU_PMCR_N_SHIFT);
+
+ return pmcr | ((u64)vcpu->kvm->arch.pmcr_n << ARMV8_PMU_PMCR_N_SHIFT);
+}
diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
index 7a65a35ee4ac..5bb4de162cab 100644
--- a/arch/arm64/kvm/reset.c
+++ b/arch/arm64/kvm/reset.c
@@ -73,11 +73,8 @@ int __init kvm_arm_init_sve(void)
return 0;
}
-static int kvm_vcpu_enable_sve(struct kvm_vcpu *vcpu)
+static void kvm_vcpu_enable_sve(struct kvm_vcpu *vcpu)
{
- if (!system_supports_sve())
- return -EINVAL;
-
vcpu->arch.sve_max_vl = kvm_sve_max_vl;
/*
@@ -86,8 +83,6 @@ static int kvm_vcpu_enable_sve(struct kvm_vcpu *vcpu)
* kvm_arm_vcpu_finalize(), which freezes the configuration.
*/
vcpu_set_flag(vcpu, GUEST_HAS_SVE);
-
- return 0;
}
/*
@@ -170,20 +165,9 @@ static void kvm_vcpu_reset_sve(struct kvm_vcpu *vcpu)
memset(vcpu->arch.sve_state, 0, vcpu_sve_state_size(vcpu));
}
-static int kvm_vcpu_enable_ptrauth(struct kvm_vcpu *vcpu)
+static void kvm_vcpu_enable_ptrauth(struct kvm_vcpu *vcpu)
{
- /*
- * For now make sure that both address/generic pointer authentication
- * features are requested by the userspace together and the system
- * supports these capabilities.
- */
- if (!test_bit(KVM_ARM_VCPU_PTRAUTH_ADDRESS, vcpu->arch.features) ||
- !test_bit(KVM_ARM_VCPU_PTRAUTH_GENERIC, vcpu->arch.features) ||
- !system_has_full_ptr_auth())
- return -EINVAL;
-
vcpu_set_flag(vcpu, GUEST_HAS_PTRAUTH);
- return 0;
}
/**
@@ -204,10 +188,9 @@ static int kvm_vcpu_enable_ptrauth(struct kvm_vcpu *vcpu)
* disable preemption around the vcpu reset as we would otherwise race with
* preempt notifiers which also call put/load.
*/
-int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
+void kvm_reset_vcpu(struct kvm_vcpu *vcpu)
{
struct vcpu_reset_state reset_state;
- int ret;
bool loaded;
u32 pstate;
@@ -224,29 +207,16 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
if (loaded)
kvm_arch_vcpu_put(vcpu);
- /* Disallow NV+SVE for the time being */
- if (vcpu_has_nv(vcpu) && vcpu_has_feature(vcpu, KVM_ARM_VCPU_SVE)) {
- ret = -EINVAL;
- goto out;
- }
-
if (!kvm_arm_vcpu_sve_finalized(vcpu)) {
- if (test_bit(KVM_ARM_VCPU_SVE, vcpu->arch.features)) {
- ret = kvm_vcpu_enable_sve(vcpu);
- if (ret)
- goto out;
- }
+ if (vcpu_has_feature(vcpu, KVM_ARM_VCPU_SVE))
+ kvm_vcpu_enable_sve(vcpu);
} else {
kvm_vcpu_reset_sve(vcpu);
}
- if (test_bit(KVM_ARM_VCPU_PTRAUTH_ADDRESS, vcpu->arch.features) ||
- test_bit(KVM_ARM_VCPU_PTRAUTH_GENERIC, vcpu->arch.features)) {
- if (kvm_vcpu_enable_ptrauth(vcpu)) {
- ret = -EINVAL;
- goto out;
- }
- }
+ if (vcpu_has_feature(vcpu, KVM_ARM_VCPU_PTRAUTH_ADDRESS) ||
+ vcpu_has_feature(vcpu, KVM_ARM_VCPU_PTRAUTH_GENERIC))
+ kvm_vcpu_enable_ptrauth(vcpu);
if (vcpu_el1_is_32bit(vcpu))
pstate = VCPU_RESET_PSTATE_SVC;
@@ -255,11 +225,6 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
else
pstate = VCPU_RESET_PSTATE_EL1;
- if (kvm_vcpu_has_pmu(vcpu) && !kvm_arm_support_pmu_v3()) {
- ret = -EINVAL;
- goto out;
- }
-
/* Reset core registers */
memset(vcpu_gp_regs(vcpu), 0, sizeof(*vcpu_gp_regs(vcpu)));
memset(&vcpu->arch.ctxt.fp_regs, 0, sizeof(vcpu->arch.ctxt.fp_regs));
@@ -294,12 +259,11 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
}
/* Reset timer */
- ret = kvm_timer_vcpu_reset(vcpu);
-out:
+ kvm_timer_vcpu_reset(vcpu);
+
if (loaded)
kvm_arch_vcpu_load(vcpu, smp_processor_id());
preempt_enable();
- return ret;
}
u32 get_kvm_ipa_limit(void)
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 0afd6136e275..0f90026f12a0 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -379,7 +379,7 @@ static bool trap_loregion(struct kvm_vcpu *vcpu,
struct sys_reg_params *p,
const struct sys_reg_desc *r)
{
- u64 val = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1);
+ u64 val = IDREG(vcpu->kvm, SYS_ID_AA64MMFR1_EL1);
u32 sr = reg_to_encoding(r);
if (!(val & (0xfUL << ID_AA64MMFR1_EL1_LO_SHIFT))) {
@@ -719,14 +719,9 @@ static unsigned int pmu_visibility(const struct kvm_vcpu *vcpu,
static u64 reset_pmu_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
{
- u64 n, mask = BIT(ARMV8_PMU_CYCLE_IDX);
+ u64 mask = BIT(ARMV8_PMU_CYCLE_IDX);
+ u8 n = vcpu->kvm->arch.pmcr_n;
- /* No PMU available, any PMU reg may UNDEF... */
- if (!kvm_arm_support_pmu_v3())
- return 0;
-
- n = read_sysreg(pmcr_el0) >> ARMV8_PMU_PMCR_N_SHIFT;
- n &= ARMV8_PMU_PMCR_N_MASK;
if (n)
mask |= GENMASK(n - 1, 0);
@@ -746,8 +741,12 @@ static u64 reset_pmevcntr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
static u64 reset_pmevtyper(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
{
+ /* This thing will UNDEF, who cares about the reset value? */
+ if (!kvm_vcpu_has_pmu(vcpu))
+ return 0;
+
reset_unknown(vcpu, r);
- __vcpu_sys_reg(vcpu, r->reg) &= ARMV8_PMU_EVTYPE_MASK;
+ __vcpu_sys_reg(vcpu, r->reg) &= kvm_pmu_evtyper_mask(vcpu->kvm);
return __vcpu_sys_reg(vcpu, r->reg);
}
@@ -762,17 +761,15 @@ static u64 reset_pmselr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
static u64 reset_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
{
- u64 pmcr;
+ u64 pmcr = 0;
- /* No PMU available, PMCR_EL0 may UNDEF... */
- if (!kvm_arm_support_pmu_v3())
- return 0;
-
- /* Only preserve PMCR_EL0.N, and reset the rest to 0 */
- pmcr = read_sysreg(pmcr_el0) & (ARMV8_PMU_PMCR_N_MASK << ARMV8_PMU_PMCR_N_SHIFT);
if (!kvm_supports_32bit_el0())
pmcr |= ARMV8_PMU_PMCR_LC;
+ /*
+ * The value of PMCR.N field is included when the
+ * vCPU register is read via kvm_vcpu_read_pmcr().
+ */
__vcpu_sys_reg(vcpu, r->reg) = pmcr;
return __vcpu_sys_reg(vcpu, r->reg);
@@ -822,7 +819,7 @@ static bool access_pmcr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
* Only update writeable bits of PMCR (continuing into
* kvm_pmu_handle_pmcr() as well)
*/
- val = __vcpu_sys_reg(vcpu, PMCR_EL0);
+ val = kvm_vcpu_read_pmcr(vcpu);
val &= ~ARMV8_PMU_PMCR_MASK;
val |= p->regval & ARMV8_PMU_PMCR_MASK;
if (!kvm_supports_32bit_el0())
@@ -830,7 +827,7 @@ static bool access_pmcr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
kvm_pmu_handle_pmcr(vcpu, val);
} else {
/* PMCR.P & PMCR.C are RAZ */
- val = __vcpu_sys_reg(vcpu, PMCR_EL0)
+ val = kvm_vcpu_read_pmcr(vcpu)
& ~(ARMV8_PMU_PMCR_P | ARMV8_PMU_PMCR_C);
p->regval = val;
}
@@ -879,7 +876,7 @@ static bool pmu_counter_idx_valid(struct kvm_vcpu *vcpu, u64 idx)
{
u64 pmcr, val;
- pmcr = __vcpu_sys_reg(vcpu, PMCR_EL0);
+ pmcr = kvm_vcpu_read_pmcr(vcpu);
val = (pmcr >> ARMV8_PMU_PMCR_N_SHIFT) & ARMV8_PMU_PMCR_N_MASK;
if (idx >= val && idx != ARMV8_PMU_CYCLE_IDX) {
kvm_inject_undefined(vcpu);
@@ -988,12 +985,45 @@ static bool access_pmu_evtyper(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
kvm_pmu_set_counter_event_type(vcpu, p->regval, idx);
kvm_vcpu_pmu_restore_guest(vcpu);
} else {
- p->regval = __vcpu_sys_reg(vcpu, reg) & ARMV8_PMU_EVTYPE_MASK;
+ p->regval = __vcpu_sys_reg(vcpu, reg);
}
return true;
}
+static int set_pmreg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r, u64 val)
+{
+ bool set;
+
+ val &= kvm_pmu_valid_counter_mask(vcpu);
+
+ switch (r->reg) {
+ case PMOVSSET_EL0:
+ /* CRm[1] being set indicates a SET register, and CLR otherwise */
+ set = r->CRm & 2;
+ break;
+ default:
+ /* Op2[0] being set indicates a SET register, and CLR otherwise */
+ set = r->Op2 & 1;
+ break;
+ }
+
+ if (set)
+ __vcpu_sys_reg(vcpu, r->reg) |= val;
+ else
+ __vcpu_sys_reg(vcpu, r->reg) &= ~val;
+
+ return 0;
+}
+
+static int get_pmreg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r, u64 *val)
+{
+ u64 mask = kvm_pmu_valid_counter_mask(vcpu);
+
+ *val = __vcpu_sys_reg(vcpu, r->reg) & mask;
+ return 0;
+}
+
static bool access_pmcnten(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
const struct sys_reg_desc *r)
{
@@ -1103,6 +1133,51 @@ static bool access_pmuserenr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
return true;
}
+static int get_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
+ u64 *val)
+{
+ *val = kvm_vcpu_read_pmcr(vcpu);
+ return 0;
+}
+
+static int set_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
+ u64 val)
+{
+ u8 new_n = (val >> ARMV8_PMU_PMCR_N_SHIFT) & ARMV8_PMU_PMCR_N_MASK;
+ struct kvm *kvm = vcpu->kvm;
+
+ mutex_lock(&kvm->arch.config_lock);
+
+ /*
+ * The vCPU can't have more counters than the PMU hardware
+ * implements. Ignore this error to maintain compatibility
+ * with the existing KVM behavior.
+ */
+ if (!kvm_vm_has_ran_once(kvm) &&
+ new_n <= kvm_arm_pmu_get_max_counters(kvm))
+ kvm->arch.pmcr_n = new_n;
+
+ mutex_unlock(&kvm->arch.config_lock);
+
+ /*
+ * Ignore writes to RES0 bits, read only bits that are cleared on
+ * vCPU reset, and writable bits that KVM doesn't support yet.
+ * (i.e. only PMCR.N and bits [7:0] are mutable from userspace)
+ * The LP bit is RES0 when FEAT_PMUv3p5 is not supported on the vCPU.
+ * But, we leave the bit as it is here, as the vCPU's PMUver might
+ * be changed later (NOTE: the bit will be cleared on first vCPU run
+ * if necessary).
+ */
+ val &= ARMV8_PMU_PMCR_MASK;
+
+ /* The LC bit is RES1 when AArch32 is not supported */
+ if (!kvm_supports_32bit_el0())
+ val |= ARMV8_PMU_PMCR_LC;
+
+ __vcpu_sys_reg(vcpu, r->reg) = val;
+ return 0;
+}
+
/* Silly macro to expand the DBG{BCR,BVR,WVR,WCR}n_EL1 registers in one go */
#define DBG_BCR_BVR_WCR_WVR_EL1(n) \
{ SYS_DESC(SYS_DBGBVRn_EL1(n)), \
@@ -1216,8 +1291,14 @@ static s64 kvm_arm64_ftr_safe_value(u32 id, const struct arm64_ftr_bits *ftrp,
/* Some features have different safe value type in KVM than host features */
switch (id) {
case SYS_ID_AA64DFR0_EL1:
- if (kvm_ftr.shift == ID_AA64DFR0_EL1_PMUVer_SHIFT)
+ switch (kvm_ftr.shift) {
+ case ID_AA64DFR0_EL1_PMUVer_SHIFT:
+ kvm_ftr.type = FTR_LOWER_SAFE;
+ break;
+ case ID_AA64DFR0_EL1_DebugVer_SHIFT:
kvm_ftr.type = FTR_LOWER_SAFE;
+ break;
+ }
break;
case SYS_ID_DFR0_EL1:
if (kvm_ftr.shift == ID_DFR0_EL1_PerfMon_SHIFT)
@@ -1228,7 +1309,7 @@ static s64 kvm_arm64_ftr_safe_value(u32 id, const struct arm64_ftr_bits *ftrp,
return arm64_ftr_safe_value(&kvm_ftr, new, cur);
}
-/**
+/*
* arm64_check_features() - Check if a feature register value constitutes
* a subset of features indicated by the idreg's KVM sanitised limit.
*
@@ -1338,7 +1419,6 @@ static u64 __kvm_read_sanitised_id_reg(const struct kvm_vcpu *vcpu,
ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_GPA3));
if (!cpus_have_final_cap(ARM64_HAS_WFXT))
val &= ~ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_WFxT);
- val &= ~ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_MOPS);
break;
case SYS_ID_AA64MMFR2_EL1:
val &= ~ID_AA64MMFR2_EL1_CCIDX_MASK;
@@ -1373,6 +1453,13 @@ static inline bool is_id_reg(u32 id)
sys_reg_CRm(id) < 8);
}
+static inline bool is_aa32_id_reg(u32 id)
+{
+ return (sys_reg_Op0(id) == 3 && sys_reg_Op1(id) == 0 &&
+ sys_reg_CRn(id) == 0 && sys_reg_CRm(id) >= 1 &&
+ sys_reg_CRm(id) <= 3);
+}
+
static unsigned int id_visibility(const struct kvm_vcpu *vcpu,
const struct sys_reg_desc *r)
{
@@ -1469,14 +1556,21 @@ static u64 read_sanitised_id_aa64pfr0_el1(struct kvm_vcpu *vcpu,
return val;
}
+#define ID_REG_LIMIT_FIELD_ENUM(val, reg, field, limit) \
+({ \
+ u64 __f_val = FIELD_GET(reg##_##field##_MASK, val); \
+ (val) &= ~reg##_##field##_MASK; \
+ (val) |= FIELD_PREP(reg##_##field##_MASK, \
+ min(__f_val, (u64)reg##_##field##_##limit)); \
+ (val); \
+})
+
static u64 read_sanitised_id_aa64dfr0_el1(struct kvm_vcpu *vcpu,
const struct sys_reg_desc *rd)
{
u64 val = read_sanitised_ftr_reg(SYS_ID_AA64DFR0_EL1);
- /* Limit debug to ARMv8.0 */
- val &= ~ID_AA64DFR0_EL1_DebugVer_MASK;
- val |= SYS_FIELD_PREP_ENUM(ID_AA64DFR0_EL1, DebugVer, IMP);
+ val = ID_REG_LIMIT_FIELD_ENUM(val, ID_AA64DFR0_EL1, DebugVer, V8P8);
/*
* Only initialize the PMU version if the vCPU was configured with one.
@@ -1496,6 +1590,7 @@ static int set_id_aa64dfr0_el1(struct kvm_vcpu *vcpu,
const struct sys_reg_desc *rd,
u64 val)
{
+ u8 debugver = SYS_FIELD_GET(ID_AA64DFR0_EL1, DebugVer, val);
u8 pmuver = SYS_FIELD_GET(ID_AA64DFR0_EL1, PMUVer, val);
/*
@@ -1515,6 +1610,13 @@ static int set_id_aa64dfr0_el1(struct kvm_vcpu *vcpu,
if (pmuver == ID_AA64DFR0_EL1_PMUVer_IMP_DEF)
val &= ~ID_AA64DFR0_EL1_PMUVer_MASK;
+ /*
+ * ID_AA64DFR0_EL1.DebugVer is one of those awkward fields with a
+ * nonzero minimum safe value.
+ */
+ if (debugver < ID_AA64DFR0_EL1_DebugVer_IMP)
+ return -EINVAL;
+
return set_id_reg(vcpu, rd, val);
}
@@ -1528,6 +1630,8 @@ static u64 read_sanitised_id_dfr0_el1(struct kvm_vcpu *vcpu,
if (kvm_vcpu_has_pmu(vcpu))
val |= SYS_FIELD_PREP(ID_DFR0_EL1, PerfMon, perfmon);
+ val = ID_REG_LIMIT_FIELD_ENUM(val, ID_DFR0_EL1, CopDbg, Debugv8p8);
+
return val;
}
@@ -1536,6 +1640,7 @@ static int set_id_dfr0_el1(struct kvm_vcpu *vcpu,
u64 val)
{
u8 perfmon = SYS_FIELD_GET(ID_DFR0_EL1, PerfMon, val);
+ u8 copdbg = SYS_FIELD_GET(ID_DFR0_EL1, CopDbg, val);
if (perfmon == ID_DFR0_EL1_PerfMon_IMPDEF) {
val &= ~ID_DFR0_EL1_PerfMon_MASK;
@@ -1551,6 +1656,9 @@ static int set_id_dfr0_el1(struct kvm_vcpu *vcpu,
if (perfmon != 0 && perfmon < ID_DFR0_EL1_PerfMon_PMUv3)
return -EINVAL;
+ if (copdbg < ID_DFR0_EL1_CopDbg_Armv8)
+ return -EINVAL;
+
return set_id_reg(vcpu, rd, val);
}
@@ -1791,8 +1899,8 @@ static unsigned int el2_visibility(const struct kvm_vcpu *vcpu,
* HCR_EL2.E2H==1, and only in the sysreg table for convenience of
* handling traps. Given that, they are always hidden from userspace.
*/
-static unsigned int elx2_visibility(const struct kvm_vcpu *vcpu,
- const struct sys_reg_desc *rd)
+static unsigned int hidden_user_visibility(const struct kvm_vcpu *vcpu,
+ const struct sys_reg_desc *rd)
{
return REG_HIDDEN_USER;
}
@@ -1803,7 +1911,7 @@ static unsigned int elx2_visibility(const struct kvm_vcpu *vcpu,
.reset = rst, \
.reg = name##_EL1, \
.val = v, \
- .visibility = elx2_visibility, \
+ .visibility = hidden_user_visibility, \
}
/*
@@ -1817,11 +1925,14 @@ static unsigned int elx2_visibility(const struct kvm_vcpu *vcpu,
* from userspace.
*/
-/* sys_reg_desc initialiser for known cpufeature ID registers */
-#define ID_SANITISED(name) { \
+#define ID_DESC(name) \
SYS_DESC(SYS_##name), \
.access = access_id_reg, \
- .get_user = get_id_reg, \
+ .get_user = get_id_reg \
+
+/* sys_reg_desc initialiser for known cpufeature ID registers */
+#define ID_SANITISED(name) { \
+ ID_DESC(name), \
.set_user = set_id_reg, \
.visibility = id_visibility, \
.reset = kvm_read_sanitised_id_reg, \
@@ -1830,15 +1941,22 @@ static unsigned int elx2_visibility(const struct kvm_vcpu *vcpu,
/* sys_reg_desc initialiser for known cpufeature ID registers */
#define AA32_ID_SANITISED(name) { \
- SYS_DESC(SYS_##name), \
- .access = access_id_reg, \
- .get_user = get_id_reg, \
+ ID_DESC(name), \
.set_user = set_id_reg, \
.visibility = aa32_id_visibility, \
.reset = kvm_read_sanitised_id_reg, \
.val = 0, \
}
+/* sys_reg_desc initialiser for writable ID registers */
+#define ID_WRITABLE(name, mask) { \
+ ID_DESC(name), \
+ .set_user = set_id_reg, \
+ .visibility = id_visibility, \
+ .reset = kvm_read_sanitised_id_reg, \
+ .val = mask, \
+}
+
/*
* sys_reg_desc initialiser for architecturally unallocated cpufeature ID
* register with encoding Op0=3, Op1=0, CRn=0, CRm=crm, Op2=op2
@@ -1860,9 +1978,7 @@ static unsigned int elx2_visibility(const struct kvm_vcpu *vcpu,
* RAZ for the guest.
*/
#define ID_HIDDEN(name) { \
- SYS_DESC(SYS_##name), \
- .access = access_id_reg, \
- .get_user = get_id_reg, \
+ ID_DESC(name), \
.set_user = set_id_reg, \
.visibility = raz_visibility, \
.reset = kvm_read_sanitised_id_reg, \
@@ -1961,7 +2077,7 @@ static const struct sys_reg_desc sys_reg_descs[] = {
// DBGDTR[TR]X_EL0 share the same encoding
{ SYS_DESC(SYS_DBGDTRTX_EL0), trap_raz_wi },
- { SYS_DESC(SYS_DBGVCR32_EL2), NULL, reset_val, DBGVCR32_EL2, 0 },
+ { SYS_DESC(SYS_DBGVCR32_EL2), trap_undef, reset_val, DBGVCR32_EL2, 0 },
{ SYS_DESC(SYS_MPIDR_EL1), NULL, reset_mpidr, MPIDR_EL1 },
@@ -1980,7 +2096,8 @@ static const struct sys_reg_desc sys_reg_descs[] = {
.set_user = set_id_dfr0_el1,
.visibility = aa32_id_visibility,
.reset = read_sanitised_id_dfr0_el1,
- .val = ID_DFR0_EL1_PerfMon_MASK, },
+ .val = ID_DFR0_EL1_PerfMon_MASK |
+ ID_DFR0_EL1_CopDbg_MASK, },
ID_HIDDEN(ID_AFR0_EL1),
AA32_ID_SANITISED(ID_MMFR0_EL1),
AA32_ID_SANITISED(ID_MMFR1_EL1),
@@ -2014,11 +2131,17 @@ static const struct sys_reg_desc sys_reg_descs[] = {
.get_user = get_id_reg,
.set_user = set_id_reg,
.reset = read_sanitised_id_aa64pfr0_el1,
- .val = ID_AA64PFR0_EL1_CSV2_MASK | ID_AA64PFR0_EL1_CSV3_MASK, },
+ .val = ~(ID_AA64PFR0_EL1_AMU |
+ ID_AA64PFR0_EL1_MPAM |
+ ID_AA64PFR0_EL1_SVE |
+ ID_AA64PFR0_EL1_RAS |
+ ID_AA64PFR0_EL1_GIC |
+ ID_AA64PFR0_EL1_AdvSIMD |
+ ID_AA64PFR0_EL1_FP), },
ID_SANITISED(ID_AA64PFR1_EL1),
ID_UNALLOCATED(4,2),
ID_UNALLOCATED(4,3),
- ID_SANITISED(ID_AA64ZFR0_EL1),
+ ID_WRITABLE(ID_AA64ZFR0_EL1, ~ID_AA64ZFR0_EL1_RES0),
ID_HIDDEN(ID_AA64SMFR0_EL1),
ID_UNALLOCATED(4,6),
ID_UNALLOCATED(4,7),
@@ -2029,7 +2152,8 @@ static const struct sys_reg_desc sys_reg_descs[] = {
.get_user = get_id_reg,
.set_user = set_id_aa64dfr0_el1,
.reset = read_sanitised_id_aa64dfr0_el1,
- .val = ID_AA64DFR0_EL1_PMUVer_MASK, },
+ .val = ID_AA64DFR0_EL1_PMUVer_MASK |
+ ID_AA64DFR0_EL1_DebugVer_MASK, },
ID_SANITISED(ID_AA64DFR1_EL1),
ID_UNALLOCATED(5,2),
ID_UNALLOCATED(5,3),
@@ -2039,9 +2163,14 @@ static const struct sys_reg_desc sys_reg_descs[] = {
ID_UNALLOCATED(5,7),
/* CRm=6 */
- ID_SANITISED(ID_AA64ISAR0_EL1),
- ID_SANITISED(ID_AA64ISAR1_EL1),
- ID_SANITISED(ID_AA64ISAR2_EL1),
+ ID_WRITABLE(ID_AA64ISAR0_EL1, ~ID_AA64ISAR0_EL1_RES0),
+ ID_WRITABLE(ID_AA64ISAR1_EL1, ~(ID_AA64ISAR1_EL1_GPI |
+ ID_AA64ISAR1_EL1_GPA |
+ ID_AA64ISAR1_EL1_API |
+ ID_AA64ISAR1_EL1_APA)),
+ ID_WRITABLE(ID_AA64ISAR2_EL1, ~(ID_AA64ISAR2_EL1_RES0 |
+ ID_AA64ISAR2_EL1_APA3 |
+ ID_AA64ISAR2_EL1_GPA3)),
ID_UNALLOCATED(6,3),
ID_UNALLOCATED(6,4),
ID_UNALLOCATED(6,5),
@@ -2049,9 +2178,23 @@ static const struct sys_reg_desc sys_reg_descs[] = {
ID_UNALLOCATED(6,7),
/* CRm=7 */
- ID_SANITISED(ID_AA64MMFR0_EL1),
- ID_SANITISED(ID_AA64MMFR1_EL1),
- ID_SANITISED(ID_AA64MMFR2_EL1),
+ ID_WRITABLE(ID_AA64MMFR0_EL1, ~(ID_AA64MMFR0_EL1_RES0 |
+ ID_AA64MMFR0_EL1_TGRAN4_2 |
+ ID_AA64MMFR0_EL1_TGRAN64_2 |
+ ID_AA64MMFR0_EL1_TGRAN16_2)),
+ ID_WRITABLE(ID_AA64MMFR1_EL1, ~(ID_AA64MMFR1_EL1_RES0 |
+ ID_AA64MMFR1_EL1_HCX |
+ ID_AA64MMFR1_EL1_XNX |
+ ID_AA64MMFR1_EL1_TWED |
+ ID_AA64MMFR1_EL1_XNX |
+ ID_AA64MMFR1_EL1_VH |
+ ID_AA64MMFR1_EL1_VMIDBits)),
+ ID_WRITABLE(ID_AA64MMFR2_EL1, ~(ID_AA64MMFR2_EL1_RES0 |
+ ID_AA64MMFR2_EL1_EVT |
+ ID_AA64MMFR2_EL1_FWB |
+ ID_AA64MMFR2_EL1_IDS |
+ ID_AA64MMFR2_EL1_NV |
+ ID_AA64MMFR2_EL1_CCIDX)),
ID_SANITISED(ID_AA64MMFR3_EL1),
ID_UNALLOCATED(7,4),
ID_UNALLOCATED(7,5),
@@ -2116,9 +2259,11 @@ static const struct sys_reg_desc sys_reg_descs[] = {
/* PMBIDR_EL1 is not trapped */
{ PMU_SYS_REG(PMINTENSET_EL1),
- .access = access_pminten, .reg = PMINTENSET_EL1 },
+ .access = access_pminten, .reg = PMINTENSET_EL1,
+ .get_user = get_pmreg, .set_user = set_pmreg },
{ PMU_SYS_REG(PMINTENCLR_EL1),
- .access = access_pminten, .reg = PMINTENSET_EL1 },
+ .access = access_pminten, .reg = PMINTENSET_EL1,
+ .get_user = get_pmreg, .set_user = set_pmreg },
{ SYS_DESC(SYS_PMMIR_EL1), trap_raz_wi },
{ SYS_DESC(SYS_MAIR_EL1), access_vm_reg, reset_unknown, MAIR_EL1 },
@@ -2166,14 +2311,17 @@ static const struct sys_reg_desc sys_reg_descs[] = {
{ SYS_DESC(SYS_CTR_EL0), access_ctr },
{ SYS_DESC(SYS_SVCR), undef_access },
- { PMU_SYS_REG(PMCR_EL0), .access = access_pmcr,
- .reset = reset_pmcr, .reg = PMCR_EL0 },
+ { PMU_SYS_REG(PMCR_EL0), .access = access_pmcr, .reset = reset_pmcr,
+ .reg = PMCR_EL0, .get_user = get_pmcr, .set_user = set_pmcr },
{ PMU_SYS_REG(PMCNTENSET_EL0),
- .access = access_pmcnten, .reg = PMCNTENSET_EL0 },
+ .access = access_pmcnten, .reg = PMCNTENSET_EL0,
+ .get_user = get_pmreg, .set_user = set_pmreg },
{ PMU_SYS_REG(PMCNTENCLR_EL0),
- .access = access_pmcnten, .reg = PMCNTENSET_EL0 },
+ .access = access_pmcnten, .reg = PMCNTENSET_EL0,
+ .get_user = get_pmreg, .set_user = set_pmreg },
{ PMU_SYS_REG(PMOVSCLR_EL0),
- .access = access_pmovs, .reg = PMOVSSET_EL0 },
+ .access = access_pmovs, .reg = PMOVSSET_EL0,
+ .get_user = get_pmreg, .set_user = set_pmreg },
/*
* PM_SWINC_EL0 is exposed to userspace as RAZ/WI, as it was
* previously (and pointlessly) advertised in the past...
@@ -2201,7 +2349,8 @@ static const struct sys_reg_desc sys_reg_descs[] = {
{ PMU_SYS_REG(PMUSERENR_EL0), .access = access_pmuserenr,
.reset = reset_val, .reg = PMUSERENR_EL0, .val = 0 },
{ PMU_SYS_REG(PMOVSSET_EL0),
- .access = access_pmovs, .reg = PMOVSSET_EL0 },
+ .access = access_pmovs, .reg = PMOVSSET_EL0,
+ .get_user = get_pmreg, .set_user = set_pmreg },
{ SYS_DESC(SYS_TPIDR_EL0), NULL, reset_unknown, TPIDR_EL0 },
{ SYS_DESC(SYS_TPIDRRO_EL0), NULL, reset_unknown, TPIDRRO_EL0 },
@@ -2380,18 +2529,28 @@ static const struct sys_reg_desc sys_reg_descs[] = {
EL2_REG(VTTBR_EL2, access_rw, reset_val, 0),
EL2_REG(VTCR_EL2, access_rw, reset_val, 0),
- { SYS_DESC(SYS_DACR32_EL2), NULL, reset_unknown, DACR32_EL2 },
+ { SYS_DESC(SYS_DACR32_EL2), trap_undef, reset_unknown, DACR32_EL2 },
EL2_REG(HDFGRTR_EL2, access_rw, reset_val, 0),
EL2_REG(HDFGWTR_EL2, access_rw, reset_val, 0),
EL2_REG(SPSR_EL2, access_rw, reset_val, 0),
EL2_REG(ELR_EL2, access_rw, reset_val, 0),
{ SYS_DESC(SYS_SP_EL1), access_sp_el1},
- { SYS_DESC(SYS_IFSR32_EL2), NULL, reset_unknown, IFSR32_EL2 },
+ /* AArch32 SPSR_* are RES0 if trapped from a NV guest */
+ { SYS_DESC(SYS_SPSR_irq), .access = trap_raz_wi,
+ .visibility = hidden_user_visibility },
+ { SYS_DESC(SYS_SPSR_abt), .access = trap_raz_wi,
+ .visibility = hidden_user_visibility },
+ { SYS_DESC(SYS_SPSR_und), .access = trap_raz_wi,
+ .visibility = hidden_user_visibility },
+ { SYS_DESC(SYS_SPSR_fiq), .access = trap_raz_wi,
+ .visibility = hidden_user_visibility },
+
+ { SYS_DESC(SYS_IFSR32_EL2), trap_undef, reset_unknown, IFSR32_EL2 },
EL2_REG(AFSR0_EL2, access_rw, reset_val, 0),
EL2_REG(AFSR1_EL2, access_rw, reset_val, 0),
EL2_REG(ESR_EL2, access_rw, reset_val, 0),
- { SYS_DESC(SYS_FPEXC32_EL2), NULL, reset_val, FPEXC32_EL2, 0x700 },
+ { SYS_DESC(SYS_FPEXC32_EL2), trap_undef, reset_val, FPEXC32_EL2, 0x700 },
EL2_REG(FAR_EL2, access_rw, reset_val, 0),
EL2_REG(HPFAR_EL2, access_rw, reset_val, 0),
@@ -2438,14 +2597,15 @@ static bool trap_dbgdidr(struct kvm_vcpu *vcpu,
if (p->is_write) {
return ignore_write(vcpu, p);
} else {
- u64 dfr = read_sanitised_ftr_reg(SYS_ID_AA64DFR0_EL1);
- u64 pfr = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1);
- u32 el3 = !!cpuid_feature_extract_unsigned_field(pfr, ID_AA64PFR0_EL1_EL3_SHIFT);
-
- p->regval = ((((dfr >> ID_AA64DFR0_EL1_WRPs_SHIFT) & 0xf) << 28) |
- (((dfr >> ID_AA64DFR0_EL1_BRPs_SHIFT) & 0xf) << 24) |
- (((dfr >> ID_AA64DFR0_EL1_CTX_CMPs_SHIFT) & 0xf) << 20)
- | (6 << 16) | (1 << 15) | (el3 << 14) | (el3 << 12));
+ u64 dfr = IDREG(vcpu->kvm, SYS_ID_AA64DFR0_EL1);
+ u64 pfr = IDREG(vcpu->kvm, SYS_ID_AA64PFR0_EL1);
+ u32 el3 = !!SYS_FIELD_GET(ID_AA64PFR0_EL1, EL3, pfr);
+
+ p->regval = ((SYS_FIELD_GET(ID_AA64DFR0_EL1, WRPs, dfr) << 28) |
+ (SYS_FIELD_GET(ID_AA64DFR0_EL1, BRPs, dfr) << 24) |
+ (SYS_FIELD_GET(ID_AA64DFR0_EL1, CTX_CMPs, dfr) << 20) |
+ (SYS_FIELD_GET(ID_AA64DFR0_EL1, DebugVer, dfr) << 16) |
+ (1 << 15) | (el3 << 14) | (el3 << 12));
return true;
}
}
@@ -3572,6 +3732,65 @@ int kvm_arm_copy_sys_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
return write_demux_regids(uindices);
}
+#define KVM_ARM_FEATURE_ID_RANGE_INDEX(r) \
+ KVM_ARM_FEATURE_ID_RANGE_IDX(sys_reg_Op0(r), \
+ sys_reg_Op1(r), \
+ sys_reg_CRn(r), \
+ sys_reg_CRm(r), \
+ sys_reg_Op2(r))
+
+static bool is_feature_id_reg(u32 encoding)
+{
+ return (sys_reg_Op0(encoding) == 3 &&
+ (sys_reg_Op1(encoding) < 2 || sys_reg_Op1(encoding) == 3) &&
+ sys_reg_CRn(encoding) == 0 &&
+ sys_reg_CRm(encoding) <= 7);
+}
+
+int kvm_vm_ioctl_get_reg_writable_masks(struct kvm *kvm, struct reg_mask_range *range)
+{
+ const void *zero_page = page_to_virt(ZERO_PAGE(0));
+ u64 __user *masks = (u64 __user *)range->addr;
+
+ /* Only feature id range is supported, reserved[13] must be zero. */
+ if (range->range ||
+ memcmp(range->reserved, zero_page, sizeof(range->reserved)))
+ return -EINVAL;
+
+ /* Wipe the whole thing first */
+ if (clear_user(masks, KVM_ARM_FEATURE_ID_RANGE_SIZE * sizeof(__u64)))
+ return -EFAULT;
+
+ for (int i = 0; i < ARRAY_SIZE(sys_reg_descs); i++) {
+ const struct sys_reg_desc *reg = &sys_reg_descs[i];
+ u32 encoding = reg_to_encoding(reg);
+ u64 val;
+
+ if (!is_feature_id_reg(encoding) || !reg->set_user)
+ continue;
+
+ /*
+ * For ID registers, we return the writable mask. Other feature
+ * registers return a full 64bit mask. That's not necessary
+ * compliant with a given revision of the architecture, but the
+ * RES0/RES1 definitions allow us to do that.
+ */
+ if (is_id_reg(encoding)) {
+ if (!reg->val ||
+ (is_aa32_id_reg(encoding) && !kvm_supports_32bit_el0()))
+ continue;
+ val = reg->val;
+ } else {
+ val = ~0UL;
+ }
+
+ if (put_user(val, (masks + KVM_ARM_FEATURE_ID_RANGE_INDEX(encoding))))
+ return -EFAULT;
+ }
+
+ return 0;
+}
+
int __init kvm_sys_reg_table_init(void)
{
struct sys_reg_params params;
diff --git a/arch/arm64/kvm/trace_arm.h b/arch/arm64/kvm/trace_arm.h
index 8ad53104934d..c18c1a95831e 100644
--- a/arch/arm64/kvm/trace_arm.h
+++ b/arch/arm64/kvm/trace_arm.h
@@ -136,6 +136,31 @@ TRACE_EVENT(kvm_mmio_emulate,
__entry->vcpu_pc, __entry->instr, __entry->cpsr)
);
+TRACE_EVENT(kvm_mmio_nisv,
+ TP_PROTO(unsigned long vcpu_pc, unsigned long esr,
+ unsigned long far, unsigned long ipa),
+ TP_ARGS(vcpu_pc, esr, far, ipa),
+
+ TP_STRUCT__entry(
+ __field( unsigned long, vcpu_pc )
+ __field( unsigned long, esr )
+ __field( unsigned long, far )
+ __field( unsigned long, ipa )
+ ),
+
+ TP_fast_assign(
+ __entry->vcpu_pc = vcpu_pc;
+ __entry->esr = esr;
+ __entry->far = far;
+ __entry->ipa = ipa;
+ ),
+
+ TP_printk("ipa %#016lx, esr %#016lx, far %#016lx, pc %#016lx",
+ __entry->ipa, __entry->esr,
+ __entry->far, __entry->vcpu_pc)
+);
+
+
TRACE_EVENT(kvm_set_way_flush,
TP_PROTO(unsigned long vcpu_pc, bool cache),
TP_ARGS(vcpu_pc, cache),
diff --git a/arch/arm64/kvm/vgic/vgic-debug.c b/arch/arm64/kvm/vgic/vgic-debug.c
index 07aa0437125a..85606a531dc3 100644
--- a/arch/arm64/kvm/vgic/vgic-debug.c
+++ b/arch/arm64/kvm/vgic/vgic-debug.c
@@ -166,7 +166,7 @@ static void print_header(struct seq_file *s, struct vgic_irq *irq,
if (vcpu) {
hdr = "VCPU";
- id = vcpu->vcpu_id;
+ id = vcpu->vcpu_idx;
}
seq_printf(s, "\n");
@@ -212,7 +212,7 @@ static void print_irq_state(struct seq_file *s, struct vgic_irq *irq,
" %2d "
"\n",
type, irq->intid,
- (irq->target_vcpu) ? irq->target_vcpu->vcpu_id : -1,
+ (irq->target_vcpu) ? irq->target_vcpu->vcpu_idx : -1,
pending,
irq->line_level,
irq->active,
@@ -224,7 +224,7 @@ static void print_irq_state(struct seq_file *s, struct vgic_irq *irq,
irq->mpidr,
irq->source,
irq->priority,
- (irq->vcpu) ? irq->vcpu->vcpu_id : -1);
+ (irq->vcpu) ? irq->vcpu->vcpu_idx : -1);
}
static int vgic_debug_show(struct seq_file *s, void *v)
diff --git a/arch/arm64/kvm/vgic/vgic-irqfd.c b/arch/arm64/kvm/vgic/vgic-irqfd.c
index 475059bacedf..8c711deb25aa 100644
--- a/arch/arm64/kvm/vgic/vgic-irqfd.c
+++ b/arch/arm64/kvm/vgic/vgic-irqfd.c
@@ -23,7 +23,7 @@ static int vgic_irqfd_set_irq(struct kvm_kernel_irq_routing_entry *e,
if (!vgic_valid_spi(kvm, spi_id))
return -EINVAL;
- return kvm_vgic_inject_irq(kvm, 0, spi_id, level, NULL);
+ return kvm_vgic_inject_irq(kvm, NULL, spi_id, level, NULL);
}
/**
diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c
index 5fe2365a629f..2dad2d095160 100644
--- a/arch/arm64/kvm/vgic/vgic-its.c
+++ b/arch/arm64/kvm/vgic/vgic-its.c
@@ -378,6 +378,12 @@ static int update_affinity(struct vgic_irq *irq, struct kvm_vcpu *vcpu)
return ret;
}
+static struct kvm_vcpu *collection_to_vcpu(struct kvm *kvm,
+ struct its_collection *col)
+{
+ return kvm_get_vcpu_by_id(kvm, col->target_addr);
+}
+
/*
* Promotes the ITS view of affinity of an ITTE (which redistributor this LPI
* is targeting) to the VGIC's view, which deals with target VCPUs.
@@ -391,7 +397,7 @@ static void update_affinity_ite(struct kvm *kvm, struct its_ite *ite)
if (!its_is_collection_mapped(ite->collection))
return;
- vcpu = kvm_get_vcpu(kvm, ite->collection->target_addr);
+ vcpu = collection_to_vcpu(kvm, ite->collection);
update_affinity(ite->irq, vcpu);
}
@@ -679,7 +685,7 @@ int vgic_its_resolve_lpi(struct kvm *kvm, struct vgic_its *its,
if (!ite || !its_is_collection_mapped(ite->collection))
return E_ITS_INT_UNMAPPED_INTERRUPT;
- vcpu = kvm_get_vcpu(kvm, ite->collection->target_addr);
+ vcpu = collection_to_vcpu(kvm, ite->collection);
if (!vcpu)
return E_ITS_INT_UNMAPPED_INTERRUPT;
@@ -887,7 +893,7 @@ static int vgic_its_cmd_handle_movi(struct kvm *kvm, struct vgic_its *its,
return E_ITS_MOVI_UNMAPPED_COLLECTION;
ite->collection = collection;
- vcpu = kvm_get_vcpu(kvm, collection->target_addr);
+ vcpu = collection_to_vcpu(kvm, collection);
vgic_its_invalidate_cache(kvm);
@@ -1121,7 +1127,7 @@ static int vgic_its_cmd_handle_mapi(struct kvm *kvm, struct vgic_its *its,
}
if (its_is_collection_mapped(collection))
- vcpu = kvm_get_vcpu(kvm, collection->target_addr);
+ vcpu = collection_to_vcpu(kvm, collection);
irq = vgic_add_lpi(kvm, lpi_nr, vcpu);
if (IS_ERR(irq)) {
@@ -1242,21 +1248,22 @@ static int vgic_its_cmd_handle_mapc(struct kvm *kvm, struct vgic_its *its,
u64 *its_cmd)
{
u16 coll_id;
- u32 target_addr;
struct its_collection *collection;
bool valid;
valid = its_cmd_get_validbit(its_cmd);
coll_id = its_cmd_get_collection(its_cmd);
- target_addr = its_cmd_get_target_addr(its_cmd);
-
- if (target_addr >= atomic_read(&kvm->online_vcpus))
- return E_ITS_MAPC_PROCNUM_OOR;
if (!valid) {
vgic_its_free_collection(its, coll_id);
vgic_its_invalidate_cache(kvm);
} else {
+ struct kvm_vcpu *vcpu;
+
+ vcpu = kvm_get_vcpu_by_id(kvm, its_cmd_get_target_addr(its_cmd));
+ if (!vcpu)
+ return E_ITS_MAPC_PROCNUM_OOR;
+
collection = find_collection(its, coll_id);
if (!collection) {
@@ -1270,9 +1277,9 @@ static int vgic_its_cmd_handle_mapc(struct kvm *kvm, struct vgic_its *its,
coll_id);
if (ret)
return ret;
- collection->target_addr = target_addr;
+ collection->target_addr = vcpu->vcpu_id;
} else {
- collection->target_addr = target_addr;
+ collection->target_addr = vcpu->vcpu_id;
update_affinity_collection(kvm, its, collection);
}
}
@@ -1382,7 +1389,7 @@ static int vgic_its_cmd_handle_invall(struct kvm *kvm, struct vgic_its *its,
if (!its_is_collection_mapped(collection))
return E_ITS_INVALL_UNMAPPED_COLLECTION;
- vcpu = kvm_get_vcpu(kvm, collection->target_addr);
+ vcpu = collection_to_vcpu(kvm, collection);
vgic_its_invall(vcpu);
return 0;
@@ -1399,23 +1406,21 @@ static int vgic_its_cmd_handle_invall(struct kvm *kvm, struct vgic_its *its,
static int vgic_its_cmd_handle_movall(struct kvm *kvm, struct vgic_its *its,
u64 *its_cmd)
{
- u32 target1_addr = its_cmd_get_target_addr(its_cmd);
- u32 target2_addr = its_cmd_mask_field(its_cmd, 3, 16, 32);
struct kvm_vcpu *vcpu1, *vcpu2;
struct vgic_irq *irq;
u32 *intids;
int irq_count, i;
- if (target1_addr >= atomic_read(&kvm->online_vcpus) ||
- target2_addr >= atomic_read(&kvm->online_vcpus))
+ /* We advertise GITS_TYPER.PTA==0, making the address the vcpu ID */
+ vcpu1 = kvm_get_vcpu_by_id(kvm, its_cmd_get_target_addr(its_cmd));
+ vcpu2 = kvm_get_vcpu_by_id(kvm, its_cmd_mask_field(its_cmd, 3, 16, 32));
+
+ if (!vcpu1 || !vcpu2)
return E_ITS_MOVALL_PROCNUM_OOR;
- if (target1_addr == target2_addr)
+ if (vcpu1 == vcpu2)
return 0;
- vcpu1 = kvm_get_vcpu(kvm, target1_addr);
- vcpu2 = kvm_get_vcpu(kvm, target2_addr);
-
irq_count = vgic_copy_lpi_list(kvm, vcpu1, &intids);
if (irq_count < 0)
return irq_count;
@@ -2258,7 +2263,7 @@ static int vgic_its_restore_ite(struct vgic_its *its, u32 event_id,
return PTR_ERR(ite);
if (its_is_collection_mapped(collection))
- vcpu = kvm_get_vcpu(kvm, collection->target_addr);
+ vcpu = kvm_get_vcpu_by_id(kvm, collection->target_addr);
irq = vgic_add_lpi(kvm, lpi_id, vcpu);
if (IS_ERR(irq)) {
@@ -2573,7 +2578,7 @@ static int vgic_its_restore_cte(struct vgic_its *its, gpa_t gpa, int esz)
coll_id = val & KVM_ITS_CTE_ICID_MASK;
if (target_addr != COLLECTION_NOT_MAPPED &&
- target_addr >= atomic_read(&kvm->online_vcpus))
+ !kvm_get_vcpu_by_id(kvm, target_addr))
return -EINVAL;
collection = find_collection(its, coll_id);
diff --git a/arch/arm64/kvm/vgic/vgic-kvm-device.c b/arch/arm64/kvm/vgic/vgic-kvm-device.c
index 212b73a715c1..f48b8dab8b3d 100644
--- a/arch/arm64/kvm/vgic/vgic-kvm-device.c
+++ b/arch/arm64/kvm/vgic/vgic-kvm-device.c
@@ -27,7 +27,8 @@ int vgic_check_iorange(struct kvm *kvm, phys_addr_t ioaddr,
if (addr + size < addr)
return -EINVAL;
- if (addr & ~kvm_phys_mask(kvm) || addr + size > kvm_phys_size(kvm))
+ if (addr & ~kvm_phys_mask(&kvm->arch.mmu) ||
+ (addr + size) > kvm_phys_size(&kvm->arch.mmu))
return -E2BIG;
return 0;
@@ -339,13 +340,9 @@ int vgic_v2_parse_attr(struct kvm_device *dev, struct kvm_device_attr *attr,
{
int cpuid;
- cpuid = (attr->attr & KVM_DEV_ARM_VGIC_CPUID_MASK) >>
- KVM_DEV_ARM_VGIC_CPUID_SHIFT;
+ cpuid = FIELD_GET(KVM_DEV_ARM_VGIC_CPUID_MASK, attr->attr);
- if (cpuid >= atomic_read(&dev->kvm->online_vcpus))
- return -EINVAL;
-
- reg_attr->vcpu = kvm_get_vcpu(dev->kvm, cpuid);
+ reg_attr->vcpu = kvm_get_vcpu_by_id(dev->kvm, cpuid);
reg_attr->addr = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK;
return 0;
diff --git a/arch/arm64/kvm/vgic/vgic-mmio-v3.c b/arch/arm64/kvm/vgic/vgic-mmio-v3.c
index 188d2187eede..89117ba2528a 100644
--- a/arch/arm64/kvm/vgic/vgic-mmio-v3.c
+++ b/arch/arm64/kvm/vgic/vgic-mmio-v3.c
@@ -1013,35 +1013,6 @@ int vgic_v3_has_attr_regs(struct kvm_device *dev, struct kvm_device_attr *attr)
return 0;
}
-/*
- * Compare a given affinity (level 1-3 and a level 0 mask, from the SGI
- * generation register ICC_SGI1R_EL1) with a given VCPU.
- * If the VCPU's MPIDR matches, return the level0 affinity, otherwise
- * return -1.
- */
-static int match_mpidr(u64 sgi_aff, u16 sgi_cpu_mask, struct kvm_vcpu *vcpu)
-{
- unsigned long affinity;
- int level0;
-
- /*
- * Split the current VCPU's MPIDR into affinity level 0 and the
- * rest as this is what we have to compare against.
- */
- affinity = kvm_vcpu_get_mpidr_aff(vcpu);
- level0 = MPIDR_AFFINITY_LEVEL(affinity, 0);
- affinity &= ~MPIDR_LEVEL_MASK;
-
- /* bail out if the upper three levels don't match */
- if (sgi_aff != affinity)
- return -1;
-
- /* Is this VCPU's bit set in the mask ? */
- if (!(sgi_cpu_mask & BIT(level0)))
- return -1;
-
- return level0;
-}
/*
* The ICC_SGI* registers encode the affinity differently from the MPIDR,
@@ -1052,6 +1023,38 @@ static int match_mpidr(u64 sgi_aff, u16 sgi_cpu_mask, struct kvm_vcpu *vcpu)
((((reg) & ICC_SGI1R_AFFINITY_## level ##_MASK) \
>> ICC_SGI1R_AFFINITY_## level ##_SHIFT) << MPIDR_LEVEL_SHIFT(level))
+static void vgic_v3_queue_sgi(struct kvm_vcpu *vcpu, u32 sgi, bool allow_group1)
+{
+ struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, sgi);
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&irq->irq_lock, flags);
+
+ /*
+ * An access targeting Group0 SGIs can only generate
+ * those, while an access targeting Group1 SGIs can
+ * generate interrupts of either group.
+ */
+ if (!irq->group || allow_group1) {
+ if (!irq->hw) {
+ irq->pending_latch = true;
+ vgic_queue_irq_unlock(vcpu->kvm, irq, flags);
+ } else {
+ /* HW SGI? Ask the GIC to inject it */
+ int err;
+ err = irq_set_irqchip_state(irq->host_irq,
+ IRQCHIP_STATE_PENDING,
+ true);
+ WARN_RATELIMIT(err, "IRQ %d", irq->host_irq);
+ raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
+ }
+ } else {
+ raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
+ }
+
+ vgic_put_irq(vcpu->kvm, irq);
+}
+
/**
* vgic_v3_dispatch_sgi - handle SGI requests from VCPUs
* @vcpu: The VCPU requesting a SGI
@@ -1062,83 +1065,46 @@ static int match_mpidr(u64 sgi_aff, u16 sgi_cpu_mask, struct kvm_vcpu *vcpu)
* This will trap in sys_regs.c and call this function.
* This ICC_SGI1R_EL1 register contains the upper three affinity levels of the
* target processors as well as a bitmask of 16 Aff0 CPUs.
- * If the interrupt routing mode bit is not set, we iterate over all VCPUs to
- * check for matching ones. If this bit is set, we signal all, but not the
- * calling VCPU.
+ *
+ * If the interrupt routing mode bit is not set, we iterate over the Aff0
+ * bits and signal the VCPUs matching the provided Aff{3,2,1}.
+ *
+ * If this bit is set, we signal all, but not the calling VCPU.
*/
void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg, bool allow_group1)
{
struct kvm *kvm = vcpu->kvm;
struct kvm_vcpu *c_vcpu;
- u16 target_cpus;
+ unsigned long target_cpus;
u64 mpidr;
- int sgi;
- int vcpu_id = vcpu->vcpu_id;
- bool broadcast;
- unsigned long c, flags;
-
- sgi = (reg & ICC_SGI1R_SGI_ID_MASK) >> ICC_SGI1R_SGI_ID_SHIFT;
- broadcast = reg & BIT_ULL(ICC_SGI1R_IRQ_ROUTING_MODE_BIT);
- target_cpus = (reg & ICC_SGI1R_TARGET_LIST_MASK) >> ICC_SGI1R_TARGET_LIST_SHIFT;
- mpidr = SGI_AFFINITY_LEVEL(reg, 3);
- mpidr |= SGI_AFFINITY_LEVEL(reg, 2);
- mpidr |= SGI_AFFINITY_LEVEL(reg, 1);
-
- /*
- * We iterate over all VCPUs to find the MPIDRs matching the request.
- * If we have handled one CPU, we clear its bit to detect early
- * if we are already finished. This avoids iterating through all
- * VCPUs when most of the times we just signal a single VCPU.
- */
- kvm_for_each_vcpu(c, c_vcpu, kvm) {
- struct vgic_irq *irq;
-
- /* Exit early if we have dealt with all requested CPUs */
- if (!broadcast && target_cpus == 0)
- break;
-
- /* Don't signal the calling VCPU */
- if (broadcast && c == vcpu_id)
- continue;
+ u32 sgi, aff0;
+ unsigned long c;
- if (!broadcast) {
- int level0;
+ sgi = FIELD_GET(ICC_SGI1R_SGI_ID_MASK, reg);
- level0 = match_mpidr(mpidr, target_cpus, c_vcpu);
- if (level0 == -1)
+ /* Broadcast */
+ if (unlikely(reg & BIT_ULL(ICC_SGI1R_IRQ_ROUTING_MODE_BIT))) {
+ kvm_for_each_vcpu(c, c_vcpu, kvm) {
+ /* Don't signal the calling VCPU */
+ if (c_vcpu == vcpu)
continue;
- /* remove this matching VCPU from the mask */
- target_cpus &= ~BIT(level0);
+ vgic_v3_queue_sgi(c_vcpu, sgi, allow_group1);
}
- irq = vgic_get_irq(vcpu->kvm, c_vcpu, sgi);
-
- raw_spin_lock_irqsave(&irq->irq_lock, flags);
+ return;
+ }
- /*
- * An access targeting Group0 SGIs can only generate
- * those, while an access targeting Group1 SGIs can
- * generate interrupts of either group.
- */
- if (!irq->group || allow_group1) {
- if (!irq->hw) {
- irq->pending_latch = true;
- vgic_queue_irq_unlock(vcpu->kvm, irq, flags);
- } else {
- /* HW SGI? Ask the GIC to inject it */
- int err;
- err = irq_set_irqchip_state(irq->host_irq,
- IRQCHIP_STATE_PENDING,
- true);
- WARN_RATELIMIT(err, "IRQ %d", irq->host_irq);
- raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
- }
- } else {
- raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
- }
+ /* We iterate over affinities to find the corresponding vcpus */
+ mpidr = SGI_AFFINITY_LEVEL(reg, 3);
+ mpidr |= SGI_AFFINITY_LEVEL(reg, 2);
+ mpidr |= SGI_AFFINITY_LEVEL(reg, 1);
+ target_cpus = FIELD_GET(ICC_SGI1R_TARGET_LIST_MASK, reg);
- vgic_put_irq(vcpu->kvm, irq);
+ for_each_set_bit(aff0, &target_cpus, hweight_long(ICC_SGI1R_TARGET_LIST_MASK)) {
+ c_vcpu = kvm_mpidr_to_vcpu(kvm, mpidr | aff0);
+ if (c_vcpu)
+ vgic_v3_queue_sgi(c_vcpu, sgi, allow_group1);
}
}
diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c
index 8be4c1ebdec2..db2a95762b1b 100644
--- a/arch/arm64/kvm/vgic/vgic.c
+++ b/arch/arm64/kvm/vgic/vgic.c
@@ -422,7 +422,7 @@ retry:
/**
* kvm_vgic_inject_irq - Inject an IRQ from a device to the vgic
* @kvm: The VM structure pointer
- * @cpuid: The CPU for PPIs
+ * @vcpu: The CPU for PPIs or NULL for global interrupts
* @intid: The INTID to inject a new state to.
* @level: Edge-triggered: true: to trigger the interrupt
* false: to ignore the call
@@ -436,24 +436,22 @@ retry:
* level-sensitive interrupts. You can think of the level parameter as 1
* being HIGH and 0 being LOW and all devices being active-HIGH.
*/
-int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid,
- bool level, void *owner)
+int kvm_vgic_inject_irq(struct kvm *kvm, struct kvm_vcpu *vcpu,
+ unsigned int intid, bool level, void *owner)
{
- struct kvm_vcpu *vcpu;
struct vgic_irq *irq;
unsigned long flags;
int ret;
- trace_vgic_update_irq_pending(cpuid, intid, level);
-
ret = vgic_lazy_init(kvm);
if (ret)
return ret;
- vcpu = kvm_get_vcpu(kvm, cpuid);
if (!vcpu && intid < VGIC_NR_PRIVATE_IRQS)
return -EINVAL;
+ trace_vgic_update_irq_pending(vcpu ? vcpu->vcpu_idx : 0, intid, level);
+
irq = vgic_get_irq(kvm, vcpu, intid);
if (!irq)
return -EINVAL;
diff --git a/arch/arm64/kvm/vmid.c b/arch/arm64/kvm/vmid.c
index 7fe8ba1a2851..806223b7022a 100644
--- a/arch/arm64/kvm/vmid.c
+++ b/arch/arm64/kvm/vmid.c
@@ -135,10 +135,11 @@ void kvm_arm_vmid_clear_active(void)
atomic64_set(this_cpu_ptr(&active_vmids), VMID_ACTIVE_INVALID);
}
-void kvm_arm_vmid_update(struct kvm_vmid *kvm_vmid)
+bool kvm_arm_vmid_update(struct kvm_vmid *kvm_vmid)
{
unsigned long flags;
u64 vmid, old_active_vmid;
+ bool updated = false;
vmid = atomic64_read(&kvm_vmid->id);
@@ -156,17 +157,21 @@ void kvm_arm_vmid_update(struct kvm_vmid *kvm_vmid)
if (old_active_vmid != 0 && vmid_gen_match(vmid) &&
0 != atomic64_cmpxchg_relaxed(this_cpu_ptr(&active_vmids),
old_active_vmid, vmid))
- return;
+ return false;
raw_spin_lock_irqsave(&cpu_vmid_lock, flags);
/* Check that our VMID belongs to the current generation. */
vmid = atomic64_read(&kvm_vmid->id);
- if (!vmid_gen_match(vmid))
+ if (!vmid_gen_match(vmid)) {
vmid = new_vmid(kvm_vmid);
+ updated = true;
+ }
atomic64_set(this_cpu_ptr(&active_vmids), vmid);
raw_spin_unlock_irqrestore(&cpu_vmid_lock, flags);
+
+ return updated;
}
/*