summaryrefslogtreecommitdiff
path: root/arch/arm64/kvm
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2021-11-02 11:24:14 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2021-11-02 11:24:14 -0700
commitd7e0a795bf37a13554c80cfc5ba97abedf53f391 (patch)
tree26f107fbe530b1bd0912a748b808cbe476bfbf49 /arch/arm64/kvm
parent44261f8e287d1b02a2e4bfbd7399fb8d37d1ee24 (diff)
parent52cf891d8dbd7592261fa30f373410b97f22b76c (diff)
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull KVM updates from Paolo Bonzini: "ARM: - More progress on the protected VM front, now with the full fixed feature set as well as the limitation of some hypercalls after initialisation. - Cleanup of the RAZ/WI sysreg handling, which was pointlessly complicated - Fixes for the vgic placement in the IPA space, together with a bunch of selftests - More memcg accounting of the memory allocated on behalf of a guest - Timer and vgic selftests - Workarounds for the Apple M1 broken vgic implementation - KConfig cleanups - New kvmarm.mode=none option, for those who really dislike us RISC-V: - New KVM port. x86: - New API to control TSC offset from userspace - TSC scaling for nested hypervisors on SVM - Switch masterclock protection from raw_spin_lock to seqcount - Clean up function prototypes in the page fault code and avoid repeated memslot lookups - Convey the exit reason to userspace on emulation failure - Configure time between NX page recovery iterations - Expose Predictive Store Forwarding Disable CPUID leaf - Allocate page tracking data structures lazily (if the i915 KVM-GT functionality is not compiled in) - Cleanups, fixes and optimizations for the shadow MMU code s390: - SIGP Fixes - initial preparations for lazy destroy of secure VMs - storage key improvements/fixes - Log the guest CPNC Starting from this release, KVM-PPC patches will come from Michael Ellerman's PPC tree" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (227 commits) RISC-V: KVM: fix boolreturn.cocci warnings RISC-V: KVM: remove unneeded semicolon RISC-V: KVM: Fix GPA passed to __kvm_riscv_hfence_gvma_xyz() functions RISC-V: KVM: Factor-out FP virtualization into separate sources KVM: s390: add debug statement for diag 318 CPNC data KVM: s390: pv: properly handle page flags for protected guests KVM: s390: Fix handle_sske page fault handling KVM: x86: SGX must obey the KVM_INTERNAL_ERROR_EMULATION protocol KVM: x86: On emulation failure, convey the exit reason, etc. to userspace KVM: x86: Get exit_reason as part of kvm_x86_ops.get_exit_info KVM: x86: Clarify the kvm_run.emulation_failure structure layout KVM: s390: Add a routine for setting userspace CPU state KVM: s390: Simplify SIGP Set Arch handling KVM: s390: pv: avoid stalls when making pages secure KVM: s390: pv: avoid stalls for kvm_s390_pv_init_vm KVM: s390: pv: avoid double free of sida page KVM: s390: pv: add macros for UVC CC values s390/mm: optimize reset_guest_reference_bit() s390/mm: optimize set_guest_storage_key() s390/mm: no need for pte_alloc_map_lock() if we know the pmd is present ...
Diffstat (limited to 'arch/arm64/kvm')
-rw-r--r--arch/arm64/kvm/Kconfig10
-rw-r--r--arch/arm64/kvm/arm.c102
-rw-r--r--arch/arm64/kvm/hyp/include/hyp/fault.h75
-rw-r--r--arch/arm64/kvm/hyp/include/hyp/switch.h235
-rw-r--r--arch/arm64/kvm/hyp/include/nvhe/fixed_config.h200
-rw-r--r--arch/arm64/kvm/hyp/include/nvhe/trap_handler.h2
-rw-r--r--arch/arm64/kvm/hyp/nvhe/Makefile2
-rw-r--r--arch/arm64/kvm/hyp/nvhe/host.S26
-rw-r--r--arch/arm64/kvm/hyp/nvhe/hyp-main.c48
-rw-r--r--arch/arm64/kvm/hyp/nvhe/mem_protect.c11
-rw-r--r--arch/arm64/kvm/hyp/nvhe/pkvm.c185
-rw-r--r--arch/arm64/kvm/hyp/nvhe/setup.c3
-rw-r--r--arch/arm64/kvm/hyp/nvhe/switch.c99
-rw-r--r--arch/arm64/kvm/hyp/nvhe/sys_regs.c487
-rw-r--r--arch/arm64/kvm/hyp/vgic-v3-sr.c22
-rw-r--r--arch/arm64/kvm/hyp/vhe/switch.c16
-rw-r--r--arch/arm64/kvm/mmu.c2
-rw-r--r--arch/arm64/kvm/pmu-emul.c2
-rw-r--r--arch/arm64/kvm/reset.c2
-rw-r--r--arch/arm64/kvm/sys_regs.c41
-rw-r--r--arch/arm64/kvm/vgic/vgic-init.c2
-rw-r--r--arch/arm64/kvm/vgic/vgic-irqfd.c2
-rw-r--r--arch/arm64/kvm/vgic/vgic-its.c18
-rw-r--r--arch/arm64/kvm/vgic/vgic-kvm-device.c25
-rw-r--r--arch/arm64/kvm/vgic/vgic-mmio-v3.c8
-rw-r--r--arch/arm64/kvm/vgic/vgic-v3.c27
-rw-r--r--arch/arm64/kvm/vgic/vgic-v4.c2
-rw-r--r--arch/arm64/kvm/vgic/vgic.h5
28 files changed, 1390 insertions, 269 deletions
diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
index d7eec0b43744..8ffcbe29395e 100644
--- a/arch/arm64/kvm/Kconfig
+++ b/arch/arm64/kvm/Kconfig
@@ -4,6 +4,7 @@
#
source "virt/lib/Kconfig"
+source "virt/kvm/Kconfig"
menuconfig VIRTUALIZATION
bool "Virtualization"
@@ -19,7 +20,7 @@ if VIRTUALIZATION
menuconfig KVM
bool "Kernel-based Virtual Machine (KVM) support"
- depends on OF
+ depends on HAVE_KVM
select MMU_NOTIFIER
select PREEMPT_NOTIFIERS
select HAVE_KVM_CPU_RELAX_INTERCEPT
@@ -43,12 +44,9 @@ menuconfig KVM
If unsure, say N.
-if KVM
-
-source "virt/kvm/Kconfig"
-
config NVHE_EL2_DEBUG
bool "Debug mode for non-VHE EL2 object"
+ depends on KVM
help
Say Y here to enable the debug mode for the non-VHE KVM EL2 object.
Failure reports will BUG() in the hypervisor. This is intended for
@@ -56,6 +54,4 @@ config NVHE_EL2_DEBUG
If unsure, say N.
-endif # KVM
-
endif # VIRTUALIZATION
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index fe102cd2e518..f5490afe1ebf 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -291,18 +291,12 @@ long kvm_arch_dev_ioctl(struct file *filp,
struct kvm *kvm_arch_alloc_vm(void)
{
- if (!has_vhe())
- return kzalloc(sizeof(struct kvm), GFP_KERNEL);
-
- return vzalloc(sizeof(struct kvm));
-}
+ size_t sz = sizeof(struct kvm);
-void kvm_arch_free_vm(struct kvm *kvm)
-{
if (!has_vhe())
- kfree(kvm);
- else
- vfree(kvm);
+ return kzalloc(sz, GFP_KERNEL_ACCOUNT);
+
+ return __vmalloc(sz, GFP_KERNEL_ACCOUNT | __GFP_HIGHMEM | __GFP_ZERO);
}
int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
@@ -620,6 +614,14 @@ static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
ret = kvm_arm_pmu_v3_enable(vcpu);
+ /*
+ * Initialize traps for protected VMs.
+ * NOTE: Move to run in EL2 directly, rather than via a hypercall, once
+ * the code is in place for first run initialization at EL2.
+ */
+ if (kvm_vm_is_protected(kvm))
+ kvm_call_hyp_nvhe(__pkvm_vcpu_init_traps, vcpu);
+
return ret;
}
@@ -1579,25 +1581,33 @@ static void cpu_set_hyp_vector(void)
kvm_call_hyp_nvhe(__pkvm_cpu_set_vector, data->slot);
}
-static void cpu_hyp_reinit(void)
+static void cpu_hyp_init_context(void)
{
kvm_init_host_cpu_context(&this_cpu_ptr_hyp_sym(kvm_host_data)->host_ctxt);
- cpu_hyp_reset();
-
- if (is_kernel_in_hyp_mode())
- kvm_timer_init_vhe();
- else
+ if (!is_kernel_in_hyp_mode())
cpu_init_hyp_mode();
+}
+static void cpu_hyp_init_features(void)
+{
cpu_set_hyp_vector();
-
kvm_arm_init_debug();
+ if (is_kernel_in_hyp_mode())
+ kvm_timer_init_vhe();
+
if (vgic_present)
kvm_vgic_init_cpu_hardware();
}
+static void cpu_hyp_reinit(void)
+{
+ cpu_hyp_reset();
+ cpu_hyp_init_context();
+ cpu_hyp_init_features();
+}
+
static void _kvm_arch_hardware_enable(void *discard)
{
if (!__this_cpu_read(kvm_arm_hardware_enabled)) {
@@ -1788,10 +1798,17 @@ static int do_pkvm_init(u32 hyp_va_bits)
int ret;
preempt_disable();
- hyp_install_host_vector();
+ cpu_hyp_init_context();
ret = kvm_call_hyp_nvhe(__pkvm_init, hyp_mem_base, hyp_mem_size,
num_possible_cpus(), kern_hyp_va(per_cpu_base),
hyp_va_bits);
+ cpu_hyp_init_features();
+
+ /*
+ * The stub hypercalls are now disabled, so set our local flag to
+ * prevent a later re-init attempt in kvm_arch_hardware_enable().
+ */
+ __this_cpu_write(kvm_arm_hardware_enabled, 1);
preempt_enable();
return ret;
@@ -1802,8 +1819,13 @@ static int kvm_hyp_init_protection(u32 hyp_va_bits)
void *addr = phys_to_virt(hyp_mem_base);
int ret;
+ kvm_nvhe_sym(id_aa64pfr0_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1);
+ kvm_nvhe_sym(id_aa64pfr1_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64PFR1_EL1);
+ kvm_nvhe_sym(id_aa64isar0_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64ISAR0_EL1);
+ kvm_nvhe_sym(id_aa64isar1_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64ISAR1_EL1);
kvm_nvhe_sym(id_aa64mmfr0_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
kvm_nvhe_sym(id_aa64mmfr1_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1);
+ kvm_nvhe_sym(id_aa64mmfr2_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64MMFR2_EL1);
ret = create_hyp_mappings(addr, addr + hyp_mem_size, PAGE_HYP);
if (ret)
@@ -1971,9 +1993,25 @@ out_err:
return err;
}
-static void _kvm_host_prot_finalize(void *discard)
+static void _kvm_host_prot_finalize(void *arg)
{
- WARN_ON(kvm_call_hyp_nvhe(__pkvm_prot_finalize));
+ int *err = arg;
+
+ if (WARN_ON(kvm_call_hyp_nvhe(__pkvm_prot_finalize)))
+ WRITE_ONCE(*err, -EINVAL);
+}
+
+static int pkvm_drop_host_privileges(void)
+{
+ int ret = 0;
+
+ /*
+ * Flip the static key upfront as that may no longer be possible
+ * once the host stage 2 is installed.
+ */
+ static_branch_enable(&kvm_protected_mode_initialized);
+ on_each_cpu(_kvm_host_prot_finalize, &ret, 1);
+ return ret;
}
static int finalize_hyp_mode(void)
@@ -1987,15 +2025,7 @@ static int finalize_hyp_mode(void)
* None of other sections should ever be introspected.
*/
kmemleak_free_part(__hyp_bss_start, __hyp_bss_end - __hyp_bss_start);
-
- /*
- * Flip the static key upfront as that may no longer be possible
- * once the host stage 2 is installed.
- */
- static_branch_enable(&kvm_protected_mode_initialized);
- on_each_cpu(_kvm_host_prot_finalize, NULL, 1);
-
- return 0;
+ return pkvm_drop_host_privileges();
}
struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr)
@@ -2064,6 +2094,11 @@ int kvm_arch_init(void *opaque)
return -ENODEV;
}
+ if (kvm_get_mode() == KVM_MODE_NONE) {
+ kvm_info("KVM disabled from command line\n");
+ return -ENODEV;
+ }
+
in_hyp_mode = is_kernel_in_hyp_mode();
if (cpus_have_final_cap(ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE) ||
@@ -2137,8 +2172,15 @@ static int __init early_kvm_mode_cfg(char *arg)
return 0;
}
- if (strcmp(arg, "nvhe") == 0 && !WARN_ON(is_kernel_in_hyp_mode()))
+ if (strcmp(arg, "nvhe") == 0 && !WARN_ON(is_kernel_in_hyp_mode())) {
+ kvm_mode = KVM_MODE_DEFAULT;
return 0;
+ }
+
+ if (strcmp(arg, "none") == 0) {
+ kvm_mode = KVM_MODE_NONE;
+ return 0;
+ }
return -EINVAL;
}
diff --git a/arch/arm64/kvm/hyp/include/hyp/fault.h b/arch/arm64/kvm/hyp/include/hyp/fault.h
new file mode 100644
index 000000000000..1b8a2dcd712f
--- /dev/null
+++ b/arch/arm64/kvm/hyp/include/hyp/fault.h
@@ -0,0 +1,75 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2015 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ */
+
+#ifndef __ARM64_KVM_HYP_FAULT_H__
+#define __ARM64_KVM_HYP_FAULT_H__
+
+#include <asm/kvm_asm.h>
+#include <asm/kvm_emulate.h>
+#include <asm/kvm_hyp.h>
+#include <asm/kvm_mmu.h>
+
+static inline bool __translate_far_to_hpfar(u64 far, u64 *hpfar)
+{
+ u64 par, tmp;
+
+ /*
+ * Resolve the IPA the hard way using the guest VA.
+ *
+ * Stage-1 translation already validated the memory access
+ * rights. As such, we can use the EL1 translation regime, and
+ * don't have to distinguish between EL0 and EL1 access.
+ *
+ * We do need to save/restore PAR_EL1 though, as we haven't
+ * saved the guest context yet, and we may return early...
+ */
+ par = read_sysreg_par();
+ if (!__kvm_at("s1e1r", far))
+ tmp = read_sysreg_par();
+ else
+ tmp = SYS_PAR_EL1_F; /* back to the guest */
+ write_sysreg(par, par_el1);
+
+ if (unlikely(tmp & SYS_PAR_EL1_F))
+ return false; /* Translation failed, back to guest */
+
+ /* Convert PAR to HPFAR format */
+ *hpfar = PAR_TO_HPFAR(tmp);
+ return true;
+}
+
+static inline bool __get_fault_info(u64 esr, struct kvm_vcpu_fault_info *fault)
+{
+ u64 hpfar, far;
+
+ far = read_sysreg_el2(SYS_FAR);
+
+ /*
+ * The HPFAR can be invalid if the stage 2 fault did not
+ * happen during a stage 1 page table walk (the ESR_EL2.S1PTW
+ * bit is clear) and one of the two following cases are true:
+ * 1. The fault was due to a permission fault
+ * 2. The processor carries errata 834220
+ *
+ * Therefore, for all non S1PTW faults where we either have a
+ * permission fault or the errata workaround is enabled, we
+ * resolve the IPA using the AT instruction.
+ */
+ if (!(esr & ESR_ELx_S1PTW) &&
+ (cpus_have_final_cap(ARM64_WORKAROUND_834220) ||
+ (esr & ESR_ELx_FSC_TYPE) == FSC_PERM)) {
+ if (!__translate_far_to_hpfar(far, &hpfar))
+ return false;
+ } else {
+ hpfar = read_sysreg(hpfar_el2);
+ }
+
+ fault->far_el2 = far;
+ fault->hpfar_el2 = hpfar;
+ return true;
+}
+
+#endif
diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h
index d5a47b93ef9b..7a0af1d39303 100644
--- a/arch/arm64/kvm/hyp/include/hyp/switch.h
+++ b/arch/arm64/kvm/hyp/include/hyp/switch.h
@@ -8,6 +8,7 @@
#define __ARM64_KVM_HYP_SWITCH_H__
#include <hyp/adjust_pc.h>
+#include <hyp/fault.h>
#include <linux/arm-smccc.h>
#include <linux/kvm_host.h>
@@ -137,78 +138,9 @@ static inline void ___deactivate_traps(struct kvm_vcpu *vcpu)
}
}
-static inline bool __translate_far_to_hpfar(u64 far, u64 *hpfar)
-{
- u64 par, tmp;
-
- /*
- * Resolve the IPA the hard way using the guest VA.
- *
- * Stage-1 translation already validated the memory access
- * rights. As such, we can use the EL1 translation regime, and
- * don't have to distinguish between EL0 and EL1 access.
- *
- * We do need to save/restore PAR_EL1 though, as we haven't
- * saved the guest context yet, and we may return early...
- */
- par = read_sysreg_par();
- if (!__kvm_at("s1e1r", far))
- tmp = read_sysreg_par();
- else
- tmp = SYS_PAR_EL1_F; /* back to the guest */
- write_sysreg(par, par_el1);
-
- if (unlikely(tmp & SYS_PAR_EL1_F))
- return false; /* Translation failed, back to guest */
-
- /* Convert PAR to HPFAR format */
- *hpfar = PAR_TO_HPFAR(tmp);
- return true;
-}
-
-static inline bool __get_fault_info(u64 esr, struct kvm_vcpu_fault_info *fault)
-{
- u64 hpfar, far;
-
- far = read_sysreg_el2(SYS_FAR);
-
- /*
- * The HPFAR can be invalid if the stage 2 fault did not
- * happen during a stage 1 page table walk (the ESR_EL2.S1PTW
- * bit is clear) and one of the two following cases are true:
- * 1. The fault was due to a permission fault
- * 2. The processor carries errata 834220
- *
- * Therefore, for all non S1PTW faults where we either have a
- * permission fault or the errata workaround is enabled, we
- * resolve the IPA using the AT instruction.
- */
- if (!(esr & ESR_ELx_S1PTW) &&
- (cpus_have_final_cap(ARM64_WORKAROUND_834220) ||
- (esr & ESR_ELx_FSC_TYPE) == FSC_PERM)) {
- if (!__translate_far_to_hpfar(far, &hpfar))
- return false;
- } else {
- hpfar = read_sysreg(hpfar_el2);
- }
-
- fault->far_el2 = far;
- fault->hpfar_el2 = hpfar;
- return true;
-}
-
static inline bool __populate_fault_info(struct kvm_vcpu *vcpu)
{
- u8 ec;
- u64 esr;
-
- esr = vcpu->arch.fault.esr_el2;
- ec = ESR_ELx_EC(esr);
-
- if (ec != ESR_ELx_EC_DABT_LOW && ec != ESR_ELx_EC_IABT_LOW)
- return true;
-
- return __get_fault_info(esr, &vcpu->arch.fault);
+ return __get_fault_info(vcpu->arch.fault.esr_el2, &vcpu->arch.fault);
}
static inline void __hyp_sve_save_host(struct kvm_vcpu *vcpu)
@@ -229,8 +161,13 @@ static inline void __hyp_sve_restore_guest(struct kvm_vcpu *vcpu)
write_sysreg_el1(__vcpu_sys_reg(vcpu, ZCR_EL1), SYS_ZCR);
}
-/* Check for an FPSIMD/SVE trap and handle as appropriate */
-static inline bool __hyp_handle_fpsimd(struct kvm_vcpu *vcpu)
+/*
+ * We trap the first access to the FP/SIMD to save the host context and
+ * restore the guest context lazily.
+ * If FP/SIMD is not implemented, handle the trap and inject an undefined
+ * instruction exception to the guest. Similarly for trapped SVE accesses.
+ */
+static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code)
{
bool sve_guest, sve_host;
u8 esr_ec;
@@ -248,9 +185,6 @@ static inline bool __hyp_handle_fpsimd(struct kvm_vcpu *vcpu)
}
esr_ec = kvm_vcpu_trap_get_class(vcpu);
- if (esr_ec != ESR_ELx_EC_FP_ASIMD &&
- esr_ec != ESR_ELx_EC_SVE)
- return false;
/* Don't handle SVE traps for non-SVE vcpus here: */
if (!sve_guest && esr_ec != ESR_ELx_EC_FP_ASIMD)
@@ -352,14 +286,6 @@ static inline bool handle_tx2_tvm(struct kvm_vcpu *vcpu)
static inline bool esr_is_ptrauth_trap(u32 esr)
{
- u32 ec = ESR_ELx_EC(esr);
-
- if (ec == ESR_ELx_EC_PAC)
- return true;
-
- if (ec != ESR_ELx_EC_SYS64)
- return false;
-
switch (esr_sys64_to_sysreg(esr)) {
case SYS_APIAKEYLO_EL1:
case SYS_APIAKEYHI_EL1:
@@ -388,13 +314,12 @@ static inline bool esr_is_ptrauth_trap(u32 esr)
DECLARE_PER_CPU(struct kvm_cpu_context, kvm_hyp_ctxt);
-static inline bool __hyp_handle_ptrauth(struct kvm_vcpu *vcpu)
+static bool kvm_hyp_handle_ptrauth(struct kvm_vcpu *vcpu, u64 *exit_code)
{
struct kvm_cpu_context *ctxt;
u64 val;
- if (!vcpu_has_ptrauth(vcpu) ||
- !esr_is_ptrauth_trap(kvm_vcpu_get_esr(vcpu)))
+ if (!vcpu_has_ptrauth(vcpu))
return false;
ctxt = this_cpu_ptr(&kvm_hyp_ctxt);
@@ -413,6 +338,90 @@ static inline bool __hyp_handle_ptrauth(struct kvm_vcpu *vcpu)
return true;
}
+static bool kvm_hyp_handle_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code)
+{
+ if (cpus_have_final_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM) &&
+ handle_tx2_tvm(vcpu))
+ return true;
+
+ if (static_branch_unlikely(&vgic_v3_cpuif_trap) &&
+ __vgic_v3_perform_cpuif_access(vcpu) == 1)
+ return true;
+
+ if (esr_is_ptrauth_trap(kvm_vcpu_get_esr(vcpu)))
+ return kvm_hyp_handle_ptrauth(vcpu, exit_code);
+
+ return false;
+}
+
+static bool kvm_hyp_handle_cp15_32(struct kvm_vcpu *vcpu, u64 *exit_code)
+{
+ if (static_branch_unlikely(&vgic_v3_cpuif_trap) &&
+ __vgic_v3_perform_cpuif_access(vcpu) == 1)
+ return true;
+
+ return false;
+}
+
+static bool kvm_hyp_handle_iabt_low(struct kvm_vcpu *vcpu, u64 *exit_code)
+{
+ if (!__populate_fault_info(vcpu))
+ return true;
+
+ return false;
+}
+
+static bool kvm_hyp_handle_dabt_low(struct kvm_vcpu *vcpu, u64 *exit_code)
+{
+ if (!__populate_fault_info(vcpu))
+ return true;
+
+ if (static_branch_unlikely(&vgic_v2_cpuif_trap)) {
+ bool valid;
+
+ valid = kvm_vcpu_trap_get_fault_type(vcpu) == FSC_FAULT &&
+ kvm_vcpu_dabt_isvalid(vcpu) &&
+ !kvm_vcpu_abt_issea(vcpu) &&
+ !kvm_vcpu_abt_iss1tw(vcpu);
+
+ if (valid) {
+ int ret = __vgic_v2_perform_cpuif_access(vcpu);
+
+ if (ret == 1)
+ return true;
+
+ /* Promote an illegal access to an SError.*/
+ if (ret == -1)
+ *exit_code = ARM_EXCEPTION_EL1_SERROR;
+ }
+ }
+
+ return false;
+}
+
+typedef bool (*exit_handler_fn)(struct kvm_vcpu *, u64 *);
+
+static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm_vcpu *vcpu);
+
+/*
+ * Allow the hypervisor to handle the exit with an exit handler if it has one.
+ *
+ * Returns true if the hypervisor handled the exit, and control should go back
+ * to the guest, or false if it hasn't.
+ */
+static inline bool kvm_hyp_handle_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
+{
+ const exit_handler_fn *handlers = kvm_get_exit_handler_array(vcpu);
+ exit_handler_fn fn;
+
+ fn = handlers[kvm_vcpu_trap_get_class(vcpu)];
+
+ if (fn)
+ return fn(vcpu, exit_code);
+
+ return false;
+}
+
/*
* Return true when we were able to fixup the guest exit and should return to
* the guest, false when we should restore the host state and return to the
@@ -447,59 +456,9 @@ static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
if (*exit_code != ARM_EXCEPTION_TRAP)
goto exit;
- if (cpus_have_final_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM) &&
- kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_SYS64 &&
- handle_tx2_tvm(vcpu))
+ /* Check if there's an exit handler and allow it to handle the exit. */
+ if (kvm_hyp_handle_exit(vcpu, exit_code))
goto guest;
-
- /*
- * We trap the first access to the FP/SIMD to save the host context
- * and restore the guest context lazily.
- * If FP/SIMD is not implemented, handle the trap and inject an
- * undefined instruction exception to the guest.
- * Similarly for trapped SVE accesses.
- */
- if (__hyp_handle_fpsimd(vcpu))
- goto guest;
-
- if (__hyp_handle_ptrauth(vcpu))
- goto guest;
-
- if (!__populate_fault_info(vcpu))
- goto guest;
-
- if (static_branch_unlikely(&vgic_v2_cpuif_trap)) {
- bool valid;
-
- valid = kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_DABT_LOW &&
- kvm_vcpu_trap_get_fault_type(vcpu) == FSC_FAULT &&
- kvm_vcpu_dabt_isvalid(vcpu) &&
- !kvm_vcpu_abt_issea(vcpu) &&
- !kvm_vcpu_abt_iss1tw(vcpu);
-
- if (valid) {
- int ret = __vgic_v2_perform_cpuif_access(vcpu);
-
- if (ret == 1)
- goto guest;
-
- /* Promote an illegal access to an SError.*/
- if (ret == -1)
- *exit_code = ARM_EXCEPTION_EL1_SERROR;
-
- goto exit;
- }
- }
-
- if (static_branch_unlikely(&vgic_v3_cpuif_trap) &&
- (kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_SYS64 ||
- kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_CP15_32)) {
- int ret = __vgic_v3_perform_cpuif_access(vcpu);
-
- if (ret == 1)
- goto guest;
- }
-
exit:
/* Return to the host kernel and handle the exit */
return false;
diff --git a/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h b/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h
new file mode 100644
index 000000000000..eea1f6a53723
--- /dev/null
+++ b/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h
@@ -0,0 +1,200 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2021 Google LLC
+ * Author: Fuad Tabba <tabba@google.com>
+ */
+
+#ifndef __ARM64_KVM_FIXED_CONFIG_H__
+#define __ARM64_KVM_FIXED_CONFIG_H__
+
+#include <asm/sysreg.h>
+
+/*
+ * This file contains definitions for features to be allowed or restricted for
+ * guest virtual machines, depending on the mode KVM is running in and on the
+ * type of guest that is running.
+ *
+ * The ALLOW masks represent a bitmask of feature fields that are allowed
+ * without any restrictions as long as they are supported by the system.
+ *
+ * The RESTRICT_UNSIGNED masks, if present, represent unsigned fields for
+ * features that are restricted to support at most the specified feature.
+ *
+ * If a feature field is not present in either, than it is not supported.
+ *
+ * The approach taken for protected VMs is to allow features that are:
+ * - Needed by common Linux distributions (e.g., floating point)
+ * - Trivial to support, e.g., supporting the feature does not introduce or
+ * require tracking of additional state in KVM
+ * - Cannot be trapped or prevent the guest from using anyway
+ */
+
+/*
+ * Allow for protected VMs:
+ * - Floating-point and Advanced SIMD
+ * - Data Independent Timing
+ */
+#define PVM_ID_AA64PFR0_ALLOW (\
+ ARM64_FEATURE_MASK(ID_AA64PFR0_FP) | \
+ ARM64_FEATURE_MASK(ID_AA64PFR0_ASIMD) | \
+ ARM64_FEATURE_MASK(ID_AA64PFR0_DIT) \
+ )
+
+/*
+ * Restrict to the following *unsigned* features for protected VMs:
+ * - AArch64 guests only (no support for AArch32 guests):
+ * AArch32 adds complexity in trap handling, emulation, condition codes,
+ * etc...
+ * - RAS (v1)
+ * Supported by KVM
+ */
+#define PVM_ID_AA64PFR0_RESTRICT_UNSIGNED (\
+ FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL0), ID_AA64PFR0_ELx_64BIT_ONLY) | \
+ FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1), ID_AA64PFR0_ELx_64BIT_ONLY) | \
+ FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL2), ID_AA64PFR0_ELx_64BIT_ONLY) | \
+ FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL3), ID_AA64PFR0_ELx_64BIT_ONLY) | \
+ FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_RAS), ID_AA64PFR0_RAS_V1) \
+ )
+
+/*
+ * Allow for protected VMs:
+ * - Branch Target Identification
+ * - Speculative Store Bypassing
+ */
+#define PVM_ID_AA64PFR1_ALLOW (\
+ ARM64_FEATURE_MASK(ID_AA64PFR1_BT) | \
+ ARM64_FEATURE_MASK(ID_AA64PFR1_SSBS) \
+ )
+
+/*
+ * Allow for protected VMs:
+ * - Mixed-endian
+ * - Distinction between Secure and Non-secure Memory
+ * - Mixed-endian at EL0 only
+ * - Non-context synchronizing exception entry and exit
+ */
+#define PVM_ID_AA64MMFR0_ALLOW (\
+ ARM64_FEATURE_MASK(ID_AA64MMFR0_BIGENDEL) | \
+ ARM64_FEATURE_MASK(ID_AA64MMFR0_SNSMEM) | \
+ ARM64_FEATURE_MASK(ID_AA64MMFR0_BIGENDEL0) | \
+ ARM64_FEATURE_MASK(ID_AA64MMFR0_EXS) \
+ )
+
+/*
+ * Restrict to the following *unsigned* features for protected VMs:
+ * - 40-bit IPA
+ * - 16-bit ASID
+ */
+#define PVM_ID_AA64MMFR0_RESTRICT_UNSIGNED (\
+ FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64MMFR0_PARANGE), ID_AA64MMFR0_PARANGE_40) | \
+ FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64MMFR0_ASID), ID_AA64MMFR0_ASID_16) \
+ )
+
+/*
+ * Allow for protected VMs:
+ * - Hardware translation table updates to Access flag and Dirty state
+ * - Number of VMID bits from CPU
+ * - Hierarchical Permission Disables
+ * - Privileged Access Never
+ * - SError interrupt exceptions from speculative reads
+ * - Enhanced Translation Synchronization
+ */
+#define PVM_ID_AA64MMFR1_ALLOW (\
+ ARM64_FEATURE_MASK(ID_AA64MMFR1_HADBS) | \
+ ARM64_FEATURE_MASK(ID_AA64MMFR1_VMIDBITS) | \
+ ARM64_FEATURE_MASK(ID_AA64MMFR1_HPD) | \
+ ARM64_FEATURE_MASK(ID_AA64MMFR1_PAN) | \
+ ARM64_FEATURE_MASK(ID_AA64MMFR1_SPECSEI) | \
+ ARM64_FEATURE_MASK(ID_AA64MMFR1_ETS) \
+ )
+
+/*
+ * Allow for protected VMs:
+ * - Common not Private translations
+ * - User Access Override
+ * - IESB bit in the SCTLR_ELx registers
+ * - Unaligned single-copy atomicity and atomic functions
+ * - ESR_ELx.EC value on an exception by read access to feature ID space
+ * - TTL field in address operations.
+ * - Break-before-make sequences when changing translation block size
+ * - E0PDx mechanism
+ */
+#define PVM_ID_AA64MMFR2_ALLOW (\
+ ARM64_FEATURE_MASK(ID_AA64MMFR2_CNP) | \
+ ARM64_FEATURE_MASK(ID_AA64MMFR2_UAO) | \
+ ARM64_FEATURE_MASK(ID_AA64MMFR2_IESB) | \
+ ARM64_FEATURE_MASK(ID_AA64MMFR2_AT) | \
+ ARM64_FEATURE_MASK(ID_AA64MMFR2_IDS) | \
+ ARM64_FEATURE_MASK(ID_AA64MMFR2_TTL) | \
+ ARM64_FEATURE_MASK(ID_AA64MMFR2_BBM) | \
+ ARM64_FEATURE_MASK(ID_AA64MMFR2_E0PD) \
+ )
+
+/*
+ * No support for Scalable Vectors for protected VMs:
+ * Requires additional support from KVM, e.g., context-switching and
+ * trapping at EL2
+ */
+#define PVM_ID_AA64ZFR0_ALLOW (0ULL)
+
+/*
+ * No support for debug, including breakpoints, and watchpoints for protected
+ * VMs:
+ * The Arm architecture mandates support for at least the Armv8 debug
+ * architecture, which would include at least 2 hardware breakpoints and
+ * watchpoints. Providing that support to protected guests adds
+ * considerable state and complexity. Therefore, the reserved value of 0 is
+ * used for debug-related fields.
+ */
+#define PVM_ID_AA64DFR0_ALLOW (0ULL)
+#define PVM_ID_AA64DFR1_ALLOW (0ULL)
+
+/*
+ * No support for implementation defined features.
+ */
+#define PVM_ID_AA64AFR0_ALLOW (0ULL)
+#define PVM_ID_AA64AFR1_ALLOW (0ULL)
+
+/*
+ * No restrictions on instructions implemented in AArch64.
+ */
+#define PVM_ID_AA64ISAR0_ALLOW (\
+ ARM64_FEATURE_MASK(ID_AA64ISAR0_AES) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR0_SHA1) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR0_SHA2) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR0_CRC32) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR0_ATOMICS) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR0_RDM) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR0_SHA3) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR0_SM3) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR0_SM4) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR0_DP) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR0_FHM) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR0_TS) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR0_TLB) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR0_RNDR) \
+ )
+
+#define PVM_ID_AA64ISAR1_ALLOW (\
+ ARM64_FEATURE_MASK(ID_AA64ISAR1_DPB) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR1_APA) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR1_API) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR1_JSCVT) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR1_FCMA) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR1_LRCPC) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR1_GPA) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR1_GPI) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR1_FRINTTS) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR1_SB) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR1_SPECRES) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR1_BF16) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR1_DGH) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR1_I8MM) \
+ )
+
+u64 pvm_read_id_reg(const struct kvm_vcpu *vcpu, u32 id);
+bool kvm_handle_pvm_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code);
+bool kvm_handle_pvm_restricted(struct kvm_vcpu *vcpu, u64 *exit_code);
+int kvm_check_pvm_sysreg_table(void);
+
+#endif /* __ARM64_KVM_FIXED_CONFIG_H__ */
diff --git a/arch/arm64/kvm/hyp/include/nvhe/trap_handler.h b/arch/arm64/kvm/hyp/include/nvhe/trap_handler.h
index 1e6d995968a1..45a84f0ade04 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/trap_handler.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/trap_handler.h
@@ -15,4 +15,6 @@
#define DECLARE_REG(type, name, ctxt, reg) \
type name = (type)cpu_reg(ctxt, (reg))
+void __pkvm_vcpu_init_traps(struct kvm_vcpu *vcpu);
+
#endif /* __ARM64_KVM_NVHE_TRAP_HANDLER_H__ */
diff --git a/arch/arm64/kvm/hyp/nvhe/Makefile b/arch/arm64/kvm/hyp/nvhe/Makefile
index 8d741f71377f..c3c11974fa3b 100644
--- a/arch/arm64/kvm/hyp/nvhe/Makefile
+++ b/arch/arm64/kvm/hyp/nvhe/Makefile
@@ -14,7 +14,7 @@ lib-objs := $(addprefix ../../../lib/, $(lib-objs))
obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o host.o \
hyp-main.o hyp-smp.o psci-relay.o early_alloc.o stub.o page_alloc.o \
- cache.o setup.o mm.o mem_protect.o
+ cache.o setup.o mm.o mem_protect.o sys_regs.o pkvm.o
obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \
../fpsimd.o ../hyp-entry.o ../exception.o ../pgtable.o
obj-y += $(lib-objs)
diff --git a/arch/arm64/kvm/hyp/nvhe/host.S b/arch/arm64/kvm/hyp/nvhe/host.S
index 4b652ffb591d..0c6116d34e18 100644
--- a/arch/arm64/kvm/hyp/nvhe/host.S
+++ b/arch/arm64/kvm/hyp/nvhe/host.S
@@ -110,17 +110,14 @@ SYM_FUNC_START(__hyp_do_panic)
b __host_enter_for_panic
SYM_FUNC_END(__hyp_do_panic)
-.macro host_el1_sync_vect
- .align 7
-.L__vect_start\@:
- stp x0, x1, [sp, #-16]!
- mrs x0, esr_el2
- lsr x0, x0, #ESR_ELx_EC_SHIFT
- cmp x0, #ESR_ELx_EC_HVC64
- b.ne __host_exit
-
+SYM_FUNC_START(__host_hvc)
ldp x0, x1, [sp] // Don't fixup the stack yet
+ /* No stub for you, sonny Jim */
+alternative_if ARM64_KVM_PROTECTED_MODE
+ b __host_exit
+alternative_else_nop_endif
+
/* Check for a stub HVC call */
cmp x0, #HVC_STUB_HCALL_NR
b.hs __host_exit
@@ -137,6 +134,17 @@ SYM_FUNC_END(__hyp_do_panic)
ldr x5, =__kvm_handle_stub_hvc
hyp_pa x5, x6
br x5
+SYM_FUNC_END(__host_hvc)
+
+.macro host_el1_sync_vect
+ .align 7
+.L__vect_start\@:
+ stp x0, x1, [sp, #-16]!
+ mrs x0, esr_el2
+ lsr x0, x0, #ESR_ELx_EC_SHIFT
+ cmp x0, #ESR_ELx_EC_HVC64
+ b.eq __host_hvc
+ b __host_exit
.L__vect_end\@:
.if ((.L__vect_end\@ - .L__vect_start\@) > 0x80)
.error "host_el1_sync_vect larger than vector entry"
diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
index 2da6aa8da868..b096bf009144 100644
--- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c
+++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
@@ -4,7 +4,7 @@
* Author: Andrew Scull <ascull@google.com>
*/
-#include <hyp/switch.h>
+#include <hyp/adjust_pc.h>
#include <asm/pgtable-types.h>
#include <asm/kvm_asm.h>
@@ -160,41 +160,65 @@ static void handle___pkvm_prot_finalize(struct kvm_cpu_context *host_ctxt)
{
cpu_reg(host_ctxt, 1) = __pkvm_prot_finalize();
}
+
+static void handle___pkvm_vcpu_init_traps(struct kvm_cpu_context *host_ctxt)
+{
+ DECLARE_REG(struct kvm_vcpu *, vcpu, host_ctxt, 1);
+
+ __pkvm_vcpu_init_traps(kern_hyp_va(vcpu));
+}
+
typedef void (*hcall_t)(struct kvm_cpu_context *);
#define HANDLE_FUNC(x) [__KVM_HOST_SMCCC_FUNC_##x] = (hcall_t)handle_##x
static const hcall_t host_hcall[] = {
- HANDLE_FUNC(__kvm_vcpu_run),
+ /* ___kvm_hyp_init */
+ HANDLE_FUNC(__kvm_get_mdcr_el2),
+ HANDLE_FUNC(__pkvm_init),
+ HANDLE_FUNC(__pkvm_create_private_mapping),
+ HANDLE_FUNC(__pkvm_cpu_set_vector),
+ HANDLE_FUNC(__kvm_enable_ssbs),
+ HANDLE_FUNC(__vgic_v3_init_lrs),
+ HANDLE_FUNC(__vgic_v3_get_gic_config),
+ HANDLE_FUNC(__pkvm_prot_finalize),
+
+ HANDLE_FUNC(__pkvm_host_share_hyp),
HANDLE_FUNC(__kvm_adjust_pc),
+ HANDLE_FUNC(__kvm_vcpu_run),
HANDLE_FUNC(__kvm_flush_vm_context),
HANDLE_FUNC(__kvm_tlb_flush_vmid_ipa),
HANDLE_FUNC(__kvm_tlb_flush_vmid),
HANDLE_FUNC(__kvm_flush_cpu_context),
HANDLE_FUNC(__kvm_timer_set_cntvoff),
- HANDLE_FUNC(__kvm_enable_ssbs),
- HANDLE_FUNC(__vgic_v3_get_gic_config),
HANDLE_FUNC(__vgic_v3_read_vmcr),
HANDLE_FUNC(__vgic_v3_write_vmcr),
- HANDLE_FUNC(__vgic_v3_init_lrs),
- HANDLE_FUNC(__kvm_get_mdcr_el2),
HANDLE_FUNC(__vgic_v3_save_aprs),
HANDLE_FUNC(__vgic_v3_restore_aprs),
- HANDLE_FUNC(__pkvm_init),
- HANDLE_FUNC(__pkvm_cpu_set_vector),
- HANDLE_FUNC(__pkvm_host_share_hyp),
- HANDLE_FUNC(__pkvm_create_private_mapping),
- HANDLE_FUNC(__pkvm_prot_finalize),
+ HANDLE_FUNC(__pkvm_vcpu_init_traps),
};
static void handle_host_hcall(struct kvm_cpu_context *host_ctxt)
{
DECLARE_REG(unsigned long, id, host_ctxt, 0);
+ unsigned long hcall_min = 0;
hcall_t hfn;
+ /*
+ * If pKVM has been initialised then reject any calls to the
+ * early "privileged" hypercalls. Note that we cannot reject
+ * calls to __pkvm_prot_finalize for two reasons: (1) The static
+ * key used to determine initialisation must be toggled prior to
+ * finalisation and (2) finalisation is performed on a per-CPU
+ * basis. This is all fine, however, since __pkvm_prot_finalize
+ * returns -EPERM after the first call for a given CPU.
+ */
+ if (static_branch_unlikely(&kvm_protected_mode_initialized))
+ hcall_min = __KVM_HOST_SMCCC_FUNC___pkvm_prot_finalize;
+
id -= KVM_HOST_SMCCC_ID(0);
- if (unlikely(id >= ARRAY_SIZE(host_hcall)))
+ if (unlikely(id < hcall_min || id >= ARRAY_SIZE(host_hcall)))
goto inval;
hfn = host_hcall[id];
diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
index 34eeb524b686..c1a90dd022b8 100644
--- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
+++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
@@ -11,7 +11,7 @@
#include <asm/kvm_pgtable.h>
#include <asm/stage2_pgtable.h>
-#include <hyp/switch.h>
+#include <hyp/fault.h>
#include <nvhe/gfp.h>
#include <nvhe/memory.h>
@@ -25,12 +25,6 @@ struct host_kvm host_kvm;
static struct hyp_pool host_s2_pool;
-/*
- * Copies of the host's CPU features registers holding sanitized values.
- */
-u64 id_aa64mmfr0_el1_sys_val;
-u64 id_aa64mmfr1_el1_sys_val;
-
const u8 pkvm_hyp_id = 1;
static void *host_s2_zalloc_pages_exact(size_t size)
@@ -134,6 +128,9 @@ int __pkvm_prot_finalize(void)
struct kvm_s2_mmu *mmu = &host_kvm.arch.mmu;
struct kvm_nvhe_init_params *params = this_cpu_ptr(&kvm_init_params);
+ if (params->hcr_el2 & HCR_VM)
+ return -EPERM;
+
params->vttbr = kvm_get_vttbr(mmu);
params->vtcr = host_kvm.arch.vtcr;
params->hcr_el2 |= HCR_VM;
diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c
new file mode 100644
index 000000000000..99c8d8b73e70
--- /dev/null
+++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c
@@ -0,0 +1,185 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2021 Google LLC
+ * Author: Fuad Tabba <tabba@google.com>
+ */
+
+#include <linux/kvm_host.h>
+#include <linux/mm.h>
+#include <nvhe/fixed_config.h>
+#include <nvhe/trap_handler.h>
+
+/*
+ * Set trap register values based on features in ID_AA64PFR0.
+ */
+static void pvm_init_traps_aa64pfr0(struct kvm_vcpu *vcpu)
+{
+ const u64 feature_ids = pvm_read_id_reg(vcpu, SYS_ID_AA64PFR0_EL1);
+ u64 hcr_set = HCR_RW;
+ u64 hcr_clear = 0;
+ u64 cptr_set = 0;
+
+ /* Protected KVM does not support AArch32 guests. */
+ BUILD_BUG_ON(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL0),
+ PVM_ID_AA64PFR0_RESTRICT_UNSIGNED) != ID_AA64PFR0_ELx_64BIT_ONLY);
+ BUILD_BUG_ON(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1),
+ PVM_ID_AA64PFR0_RESTRICT_UNSIGNED) != ID_AA64PFR0_ELx_64BIT_ONLY);
+
+ /*
+ * Linux guests assume support for floating-point and Advanced SIMD. Do
+ * not change the trapping behavior for these from the KVM default.
+ */
+ BUILD_BUG_ON(!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_FP),
+ PVM_ID_AA64PFR0_ALLOW));
+ BUILD_BUG_ON(!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_ASIMD),
+ PVM_ID_AA64PFR0_ALLOW));
+
+ /* Trap RAS unless all current versions are supported */
+ if (FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_RAS), feature_ids) <
+ ID_AA64PFR0_RAS_V1P1) {
+ hcr_set |= HCR_TERR | HCR_TEA;
+ hcr_clear |= HCR_FIEN;
+ }
+
+ /* Trap AMU */
+ if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_AMU), feature_ids)) {
+ hcr_clear |= HCR_AMVOFFEN;
+ cptr_set |= CPTR_EL2_TAM;
+ }
+
+ /* Trap SVE */
+ if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_SVE), feature_ids))
+ cptr_set |= CPTR_EL2_TZ;
+
+ vcpu->arch.hcr_el2 |= hcr_set;
+ vcpu->arch.hcr_el2 &= ~hcr_clear;
+ vcpu->arch.cptr_el2 |= cptr_set;
+}
+
+/*
+ * Set trap register values based on features in ID_AA64PFR1.
+ */
+static void pvm_init_traps_aa64pfr1(struct kvm_vcpu *vcpu)
+{
+ const u64 feature_ids = pvm_read_id_reg(vcpu, SYS_ID_AA64PFR1_EL1);
+ u64 hcr_set = 0;
+ u64 hcr_clear = 0;
+
+ /* Memory Tagging: Trap and Treat as Untagged if not supported. */
+ if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR1_MTE), feature_ids)) {
+ hcr_set |= HCR_TID5;
+ hcr_clear |= HCR_DCT | HCR_ATA;
+ }
+
+ vcpu->arch.hcr_el2 |= hcr_set;
+ vcpu->arch.hcr_el2 &= ~hcr_clear;
+}
+
+/*
+ * Set trap register values based on features in ID_AA64DFR0.
+ */
+static void pvm_init_traps_aa64dfr0(struct kvm_vcpu *vcpu)
+{
+ const u64 feature_ids = pvm_read_id_reg(vcpu, SYS_ID_AA64DFR0_EL1);
+ u64 mdcr_set = 0;
+ u64 mdcr_clear = 0;
+ u64 cptr_set = 0;
+
+ /* Trap/constrain PMU */
+ if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_PMUVER), feature_ids)) {
+ mdcr_set |= MDCR_EL2_TPM | MDCR_EL2_TPMCR;
+ mdcr_clear |= MDCR_EL2_HPME | MDCR_EL2_MTPME |
+ MDCR_EL2_HPMN_MASK;
+ }
+
+ /* Trap Debug */
+ if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_DEBUGVER), feature_ids))
+ mdcr_set |= MDCR_EL2_TDRA | MDCR_EL2_TDA | MDCR_EL2_TDE;
+
+ /* Trap OS Double Lock */
+ if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_DOUBLELOCK), feature_ids))
+ mdcr_set |= MDCR_EL2_TDOSA;
+
+ /* Trap SPE */
+ if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_PMSVER), feature_ids)) {
+ mdcr_set |= MDCR_EL2_TPMS;
+ mdcr_clear |= MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT;
+ }
+
+ /* Trap Trace Filter */
+ if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_TRACE_FILT), feature_ids))
+ mdcr_set |= MDCR_EL2_TTRF;
+
+ /* Trap Trace */
+ if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_TRACEVER), feature_ids))
+ cptr_set |= CPTR_EL2_TTA;
+
+ vcpu->arch.mdcr_el2 |= mdcr_set;
+ vcpu->arch.mdcr_el2 &= ~mdcr_clear;
+ vcpu->arch.cptr_el2 |= cptr_set;
+}
+
+/*
+ * Set trap register values based on features in ID_AA64MMFR0.
+ */
+static void pvm_init_traps_aa64mmfr0(struct kvm_vcpu *vcpu)
+{
+ const u64 feature_ids = pvm_read_id_reg(vcpu, SYS_ID_AA64MMFR0_EL1);
+ u64 mdcr_set = 0;
+
+ /* Trap Debug Communications Channel registers */
+ if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_FGT), feature_ids))
+ mdcr_set |= MDCR_EL2_TDCC;
+
+ vcpu->arch.mdcr_el2 |= mdcr_set;
+}
+
+/*
+ * Set trap register values based on features in ID_AA64MMFR1.
+ */
+static void pvm_init_traps_aa64mmfr1(struct kvm_vcpu *vcpu)
+{
+ const u64 feature_ids = pvm_read_id_reg(vcpu, SYS_ID_AA64MMFR1_EL1);
+ u64 hcr_set = 0;
+
+ /* Trap LOR */
+ if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR1_LOR), feature_ids))
+ hcr_set |= HCR_TLOR;
+
+ vcpu->arch.hcr_el2 |= hcr_set;
+}
+
+/*
+ * Set baseline trap register values.
+ */
+static void pvm_init_trap_regs(struct kvm_vcpu *vcpu)
+{
+ const u64 hcr_trap_feat_regs = HCR_TID3;
+ const u64 hcr_trap_impdef = HCR_TACR | HCR_TIDCP | HCR_TID1;
+
+ /*
+ * Always trap:
+ * - Feature id registers: to control features exposed to guests
+ * - Implementation-defined features
+ */
+ vcpu->arch.hcr_el2 |= hcr_trap_feat_regs | hcr_trap_impdef;
+
+ /* Clear res0 and set res1 bits to trap potential new features. */
+ vcpu->arch.hcr_el2 &= ~(HCR_RES0);
+ vcpu->arch.mdcr_el2 &= ~(MDCR_EL2_RES0);
+ vcpu->arch.cptr_el2 |= CPTR_NVHE_EL2_RES1;
+ vcpu->arch.cptr_el2 &= ~(CPTR_NVHE_EL2_RES0);
+}
+
+/*
+ * Initialize trap register values for protected VMs.
+ */
+void __pkvm_vcpu_init_traps(struct kvm_vcpu *vcpu)
+{
+ pvm_init_trap_regs(vcpu);
+ pvm_init_traps_aa64pfr0(vcpu);
+ pvm_init_traps_aa64pfr1(vcpu);
+ pvm_init_traps_aa64dfr0(vcpu);
+ pvm_init_traps_aa64mmfr0(vcpu);
+ pvm_init_traps_aa64mmfr1(vcpu);
+}
diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c
index 57c27846320f..862c7b514e20 100644
--- a/arch/arm64/kvm/hyp/nvhe/setup.c
+++ b/arch/arm64/kvm/hyp/nvhe/setup.c
@@ -10,6 +10,7 @@
#include <asm/kvm_pgtable.h>
#include <nvhe/early_alloc.h>
+#include <nvhe/fixed_config.h>
#include <nvhe/gfp.h>
#include <nvhe/memory.h>
#include <nvhe/mem_protect.h>
@@ -260,6 +261,8 @@ int __pkvm_init(phys_addr_t phys, unsigned long size, unsigned long nr_cpus,
void (*fn)(phys_addr_t params_pa, void *finalize_fn_va);
int ret;
+ BUG_ON(kvm_check_pvm_sysreg_table());
+
if (!PAGE_ALIGNED(phys) || !PAGE_ALIGNED(size))
return -EINVAL;
diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c
index a34b01cc8ab9..c0e3fed26d93 100644
--- a/arch/arm64/kvm/hyp/nvhe/switch.c
+++ b/arch/arm64/kvm/hyp/nvhe/switch.c
@@ -27,6 +27,7 @@
#include <asm/processor.h>
#include <asm/thread_info.h>
+#include <nvhe/fixed_config.h>
#include <nvhe/mem_protect.h>
/* Non-VHE specific context */
@@ -158,6 +159,101 @@ static void __pmu_switch_to_host(struct kvm_cpu_context *host_ctxt)
write_sysreg(pmu->events_host, pmcntenset_el0);
}
+/**
+ * Handler for protected VM MSR, MRS or System instruction execution in AArch64.
+ *
+ * Returns true if the hypervisor has handled the exit, and control should go
+ * back to the guest, or false if it hasn't.
+ */
+static bool kvm_handle_pvm_sys64(struct kvm_vcpu *vcpu, u64 *exit_code)
+{
+ /*
+ * Make sure we handle the exit for workarounds and ptrauth
+ * before the pKVM handling, as the latter could decide to
+ * UNDEF.
+ */
+ return (kvm_hyp_handle_sysreg(vcpu, exit_code) ||
+ kvm_handle_pvm_sysreg(vcpu, exit_code));
+}
+
+/**
+ * Handler for protected floating-point and Advanced SIMD accesses.
+ *
+ * Returns true if the hypervisor has handled the exit, and control should go
+ * back to the guest, or false if it hasn't.
+ */
+static bool kvm_handle_pvm_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code)
+{
+ /* Linux guests assume support for floating-point and Advanced SIMD. */
+ BUILD_BUG_ON(!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_FP),
+ PVM_ID_AA64PFR0_ALLOW));
+ BUILD_BUG_ON(!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_ASIMD),
+ PVM_ID_AA64PFR0_ALLOW));
+
+ return kvm_hyp_handle_fpsimd(vcpu, exit_code);
+}
+
+static const exit_handler_fn hyp_exit_handlers[] = {
+ [0 ... ESR_ELx_EC_MAX] = NULL,
+ [ESR_ELx_EC_CP15_32] = kvm_hyp_handle_cp15_32,
+ [ESR_ELx_EC_SYS64] = kvm_hyp_handle_sysreg,
+ [ESR_ELx_EC_SVE] = kvm_hyp_handle_fpsimd,
+ [ESR_ELx_EC_FP_ASIMD] = kvm_hyp_handle_fpsimd,
+ [ESR_ELx_EC_IABT_LOW] = kvm_hyp_handle_iabt_low,
+ [ESR_ELx_EC_DABT_LOW] = kvm_hyp_handle_dabt_low,
+ [ESR_ELx_EC_PAC] = kvm_hyp_handle_ptrauth,
+};
+
+static const exit_handler_fn pvm_exit_handlers[] = {
+ [0 ... ESR_ELx_EC_MAX] = NULL,
+ [ESR_ELx_EC_SYS64] = kvm_handle_pvm_sys64,
+ [ESR_ELx_EC_SVE] = kvm_handle_pvm_restricted,
+ [ESR_ELx_EC_FP_ASIMD] = kvm_handle_pvm_fpsimd,
+ [ESR_ELx_EC_IABT_LOW] = kvm_hyp_handle_iabt_low,
+ [ESR_ELx_EC_DABT_LOW] = kvm_hyp_handle_dabt_low,
+ [ESR_ELx_EC_PAC] = kvm_hyp_handle_ptrauth,
+};
+
+static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm_vcpu *vcpu)
+{
+ if (unlikely(kvm_vm_is_protected(kern_hyp_va(vcpu->kvm))))
+ return pvm_exit_handlers;
+
+ return hyp_exit_handlers;
+}
+
+/*
+ * Some guests (e.g., protected VMs) are not be allowed to run in AArch32.
+ * The ARMv8 architecture does not give the hypervisor a mechanism to prevent a
+ * guest from dropping to AArch32 EL0 if implemented by the CPU. If the
+ * hypervisor spots a guest in such a state ensure it is handled, and don't
+ * trust the host to spot or fix it. The check below is based on the one in
+ * kvm_arch_vcpu_ioctl_run().
+ *
+ * Returns false if the guest ran in AArch32 when it shouldn't have, and
+ * thus should exit to the host, or true if a the guest run loop can continue.
+ */
+static bool handle_aarch32_guest(struct kvm_vcpu *vcpu, u64 *exit_code)
+{
+ struct kvm *kvm = kern_hyp_va(vcpu->kvm);
+
+ if (kvm_vm_is_protected(kvm) && vcpu_mode_is_32bit(vcpu)) {
+ /*
+ * As we have caught the guest red-handed, decide that it isn't
+ * fit for purpose anymore by making the vcpu invalid. The VMM
+ * can try and fix it by re-initializing the vcpu with
+ * KVM_ARM_VCPU_INIT, however, this is likely not possible for
+ * protected VMs.
+ */
+ vcpu->arch.target = -1;
+ *exit_code &= BIT(ARM_EXIT_WITH_SERROR_BIT);
+ *exit_code |= ARM_EXCEPTION_IL;
+ return false;
+ }
+
+ return true;
+}
+
/* Switch to the guest for legacy non-VHE systems */
int __kvm_vcpu_run(struct kvm_vcpu *vcpu)
{
@@ -220,6 +316,9 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu)
/* Jump in the fire! */
exit_code = __guest_enter(vcpu);
+ if (unlikely(!handle_aarch32_guest(vcpu, &exit_code)))
+ break;
+
/* And we're baaack! */
} while (fixup_guest_exit(vcpu, &exit_code));
diff --git a/arch/arm64/kvm/hyp/nvhe/sys_regs.c b/arch/arm64/kvm/hyp/nvhe/sys_regs.c
new file mode 100644
index 000000000000..3787ee6fb1a2
--- /dev/null
+++ b/arch/arm64/kvm/hyp/nvhe/sys_regs.c
@@ -0,0 +1,487 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2021 Google LLC
+ * Author: Fuad Tabba <tabba@google.com>
+ */
+
+#include <linux/irqchip/arm-gic-v3.h>
+
+#include <asm/kvm_asm.h>
+#include <asm/kvm_mmu.h>
+
+#include <hyp/adjust_pc.h>
+
+#include <nvhe/fixed_config.h>
+
+#include "../../sys_regs.h"
+
+/*
+ * Copies of the host's CPU features registers holding sanitized values at hyp.
+ */
+u64 id_aa64pfr0_el1_sys_val;
+u64 id_aa64pfr1_el1_sys_val;
+u64 id_aa64isar0_el1_sys_val;
+u64 id_aa64isar1_el1_sys_val;
+u64 id_aa64mmfr0_el1_sys_val;
+u64 id_aa64mmfr1_el1_sys_val;
+u64 id_aa64mmfr2_el1_sys_val;
+
+/*
+ * Inject an unknown/undefined exception to an AArch64 guest while most of its
+ * sysregs are live.
+ */
+static void inject_undef64(struct kvm_vcpu *vcpu)
+{
+ u32 esr = (ESR_ELx_EC_UNKNOWN << ESR_ELx_EC_SHIFT);
+
+ *vcpu_pc(vcpu) = read_sysreg_el2(SYS_ELR);
+ *vcpu_cpsr(vcpu) = read_sysreg_el2(SYS_SPSR);
+
+ vcpu->arch.flags |= (KVM_ARM64_EXCEPT_AA64_EL1 |
+ KVM_ARM64_EXCEPT_AA64_ELx_SYNC |
+ KVM_ARM64_PENDING_EXCEPTION);
+
+ __kvm_adjust_pc(vcpu);
+
+ write_sysreg_el1(esr, SYS_ESR);
+ write_sysreg_el1(read_sysreg_el2(SYS_ELR), SYS_ELR);
+ write_sysreg_el2(*vcpu_pc(vcpu), SYS_ELR);
+ write_sysreg_el2(*vcpu_cpsr(vcpu), SYS_SPSR);
+}
+
+/*
+ * Returns the restricted features values of the feature register based on the
+ * limitations in restrict_fields.
+ * A feature id field value of 0b0000 does not impose any restrictions.
+ * Note: Use only for unsigned feature field values.
+ */
+static u64 get_restricted_features_unsigned(u64 sys_reg_val,
+ u64 restrict_fields)
+{
+ u64 value = 0UL;
+ u64 mask = GENMASK_ULL(ARM64_FEATURE_FIELD_BITS - 1, 0);
+
+ /*
+ * According to the Arm Architecture Reference Manual, feature fields
+ * use increasing values to indicate increases in functionality.
+ * Iterate over the restricted feature fields and calculate the minimum
+ * unsigned value between the one supported by the system, and what the
+ * value is being restricted to.
+ */
+ while (sys_reg_val && restrict_fields) {
+ value |= min(sys_reg_val & mask, restrict_fields & mask);
+ sys_reg_val &= ~mask;
+ restrict_fields &= ~mask;
+ mask <<= ARM64_FEATURE_FIELD_BITS;
+ }
+
+ return value;
+}
+
+/*
+ * Functions that return the value of feature id registers for protected VMs
+ * based on allowed features, system features, and KVM support.
+ */
+
+static u64 get_pvm_id_aa64pfr0(const struct kvm_vcpu *vcpu)
+{
+ const struct kvm *kvm = (const struct kvm *)kern_hyp_va(vcpu->kvm);
+ u64 set_mask = 0;
+ u64 allow_mask = PVM_ID_AA64PFR0_ALLOW;
+
+ if (!vcpu_has_sve(vcpu))
+ allow_mask &= ~ARM64_FEATURE_MASK(ID_AA64PFR0_SVE);
+
+ set_mask |= get_restricted_features_unsigned(id_aa64pfr0_el1_sys_val,
+ PVM_ID_AA64PFR0_RESTRICT_UNSIGNED);
+
+ /* Spectre and Meltdown mitigation in KVM */
+ set_mask |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_CSV2),
+ (u64)kvm->arch.pfr0_csv2);
+ set_mask |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_CSV3),
+ (u64)kvm->arch.pfr0_csv3);
+
+ return (id_aa64pfr0_el1_sys_val & allow_mask) | set_mask;
+}
+
+static u64 get_pvm_id_aa64pfr1(const struct kvm_vcpu *vcpu)
+{
+ const struct kvm *kvm = (const struct kvm *)kern_hyp_va(vcpu->kvm);
+ u64 allow_mask = PVM_ID_AA64PFR1_ALLOW;
+
+ if (!kvm_has_mte(kvm))
+ allow_mask &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_MTE);
+
+ return id_aa64pfr1_el1_sys_val & allow_mask;
+}
+
+static u64 get_pvm_id_aa64zfr0(const struct kvm_vcpu *vcpu)
+{
+ /*
+ * No support for Scalable Vectors, therefore, hyp has no sanitized
+ * copy of the feature id register.
+ */
+ BUILD_BUG_ON(PVM_ID_AA64ZFR0_ALLOW != 0ULL);
+ return 0;
+}
+
+static u64 get_pvm_id_aa64dfr0(const struct kvm_vcpu *vcpu)
+{
+ /*
+ * No support for debug, including breakpoints, and watchpoints,
+ * therefore, pKVM has no sanitized copy of the feature id register.
+ */
+ BUILD_BUG_ON(PVM_ID_AA64DFR0_ALLOW != 0ULL);
+ return 0;
+}
+
+static u64 get_pvm_id_aa64dfr1(const struct kvm_vcpu *vcpu)
+{
+ /*
+ * No support for debug, therefore, hyp has no sanitized copy of the
+ * feature id register.
+ */
+ BUILD_BUG_ON(PVM_ID_AA64DFR1_ALLOW != 0ULL);
+ return 0;
+}
+
+static u64 get_pvm_id_aa64afr0(const struct kvm_vcpu *vcpu)
+{
+ /*
+ * No support for implementation defined features, therefore, hyp has no
+ * sanitized copy of the feature id register.
+ */
+ BUILD_BUG_ON(PVM_ID_AA64AFR0_ALLOW != 0ULL);
+ return 0;
+}
+
+static u64 get_pvm_id_aa64afr1(const struct kvm_vcpu *vcpu)
+{
+ /*
+ * No support for implementation defined features, therefore, hyp has no
+ * sanitized copy of the feature id register.
+ */
+ BUILD_BUG_ON(PVM_ID_AA64AFR1_ALLOW != 0ULL);
+ return 0;
+}
+
+static u64 get_pvm_id_aa64isar0(const struct kvm_vcpu *vcpu)
+{
+ return id_aa64isar0_el1_sys_val & PVM_ID_AA64ISAR0_ALLOW;
+}
+
+static u64 get_pvm_id_aa64isar1(const struct kvm_vcpu *vcpu)
+{
+ u64 allow_mask = PVM_ID_AA64ISAR1_ALLOW;
+
+ if (!vcpu_has_ptrauth(vcpu))
+ allow_mask &= ~(ARM64_FEATURE_MASK(ID_AA64ISAR1_APA) |
+ ARM64_FEATURE_MASK(ID_AA64ISAR1_API) |
+ ARM64_FEATURE_MASK(ID_AA64ISAR1_GPA) |
+ ARM64_FEATURE_MASK(ID_AA64ISAR1_GPI));
+
+ return id_aa64isar1_el1_sys_val & allow_mask;
+}
+
+static u64 get_pvm_id_aa64mmfr0(const struct kvm_vcpu *vcpu)
+{
+ u64 set_mask;
+
+ set_mask = get_restricted_features_unsigned(id_aa64mmfr0_el1_sys_val,
+ PVM_ID_AA64MMFR0_RESTRICT_UNSIGNED);
+
+ return (id_aa64mmfr0_el1_sys_val & PVM_ID_AA64MMFR0_ALLOW) | set_mask;
+}
+
+static u64 get_pvm_id_aa64mmfr1(const struct kvm_vcpu *vcpu)
+{
+ return id_aa64mmfr1_el1_sys_val & PVM_ID_AA64MMFR1_ALLOW;
+}
+
+static u64 get_pvm_id_aa64mmfr2(const struct kvm_vcpu *vcpu)
+{
+ return id_aa64mmfr2_el1_sys_val & PVM_ID_AA64MMFR2_ALLOW;
+}
+
+/* Read a sanitized cpufeature ID register by its encoding */
+u64 pvm_read_id_reg(const struct kvm_vcpu *vcpu, u32 id)
+{
+ switch (id) {
+ case SYS_ID_AA64PFR0_EL1:
+ return get_pvm_id_aa64pfr0(vcpu);
+ case SYS_ID_AA64PFR1_EL1:
+ return get_pvm_id_aa64pfr1(vcpu);
+ case SYS_ID_AA64ZFR0_EL1:
+ return get_pvm_id_aa64zfr0(vcpu);
+ case SYS_ID_AA64DFR0_EL1:
+ return get_pvm_id_aa64dfr0(vcpu);
+ case SYS_ID_AA64DFR1_EL1:
+ return get_pvm_id_aa64dfr1(vcpu);
+ case SYS_ID_AA64AFR0_EL1:
+ return get_pvm_id_aa64afr0(vcpu);
+ case SYS_ID_AA64AFR1_EL1:
+ return get_pvm_id_aa64afr1(vcpu);
+ case SYS_ID_AA64ISAR0_EL1:
+ return get_pvm_id_aa64isar0(vcpu);
+ case SYS_ID_AA64ISAR1_EL1:
+ return get_pvm_id_aa64isar1(vcpu);
+ case SYS_ID_AA64MMFR0_EL1:
+ return get_pvm_id_aa64mmfr0(vcpu);
+ case SYS_ID_AA64MMFR1_EL1:
+ return get_pvm_id_aa64mmfr1(vcpu);
+ case SYS_ID_AA64MMFR2_EL1:
+ return get_pvm_id_aa64mmfr2(vcpu);
+ default:
+ /*
+ * Should never happen because all cases are covered in
+ * pvm_sys_reg_descs[].
+ */
+ WARN_ON(1);
+ break;
+ }
+
+ return 0;
+}
+
+static u64 read_id_reg(const struct kvm_vcpu *vcpu,
+ struct sys_reg_desc const *r)
+{
+ return pvm_read_id_reg(vcpu, reg_to_encoding(r));
+}
+
+/* Handler to RAZ/WI sysregs */
+static bool pvm_access_raz_wi(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ if (!p->is_write)
+ p->regval = 0;
+
+ return true;
+}
+
+/*
+ * Accessor for AArch32 feature id registers.
+ *
+ * The value of these registers is "unknown" according to the spec if AArch32
+ * isn't supported.
+ */
+static bool pvm_access_id_aarch32(struct kvm_vcpu *vcpu,
+ struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ if (p->is_write) {
+ inject_undef64(vcpu);
+ return false;
+ }
+
+ /*
+ * No support for AArch32 guests, therefore, pKVM has no sanitized copy
+ * of AArch32 feature id registers.
+ */
+ BUILD_BUG_ON(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1),
+ PVM_ID_AA64PFR0_RESTRICT_UNSIGNED) > ID_AA64PFR0_ELx_64BIT_ONLY);
+
+ return pvm_access_raz_wi(vcpu, p, r);
+}
+
+/*
+ * Accessor for AArch64 feature id registers.
+ *
+ * If access is allowed, set the regval to the protected VM's view of the
+ * register and return true.
+ * Otherwise, inject an undefined exception and return false.
+ */
+static bool pvm_access_id_aarch64(struct kvm_vcpu *vcpu,
+ struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ if (p->is_write) {
+ inject_undef64(vcpu);
+ return false;
+ }
+
+ p->regval = read_id_reg(vcpu, r);
+ return true;
+}
+
+static bool pvm_gic_read_sre(struct kvm_vcpu *vcpu,
+ struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ /* pVMs only support GICv3. 'nuf said. */
+ if (!p->is_write)
+ p->regval = ICC_SRE_EL1_DIB | ICC_SRE_EL1_DFB | ICC_SRE_EL1_SRE;
+
+ return true;
+}
+
+/* Mark the specified system register as an AArch32 feature id register. */
+#define AARCH32(REG) { SYS_DESC(REG), .access = pvm_access_id_aarch32 }
+
+/* Mark the specified system register as an AArch64 feature id register. */
+#define AARCH64(REG) { SYS_DESC(REG), .access = pvm_access_id_aarch64 }
+
+/* Mark the specified system register as Read-As-Zero/Write-Ignored */
+#define RAZ_WI(REG) { SYS_DESC(REG), .access = pvm_access_raz_wi }
+
+/* Mark the specified system register as not being handled in hyp. */
+#define HOST_HANDLED(REG) { SYS_DESC(REG), .access = NULL }
+
+/*
+ * Architected system registers.
+ * Important: Must be sorted ascending by Op0, Op1, CRn, CRm, Op2
+ *
+ * NOTE: Anything not explicitly listed here is *restricted by default*, i.e.,
+ * it will lead to injecting an exception into the guest.
+ */
+static const struct sys_reg_desc pvm_sys_reg_descs[] = {
+ /* Cache maintenance by set/way operations are restricted. */
+
+ /* Debug and Trace Registers are restricted. */
+
+ /* AArch64 mappings of the AArch32 ID registers */
+ /* CRm=1 */
+ AARCH32(SYS_ID_PFR0_EL1),
+ AARCH32(SYS_ID_PFR1_EL1),
+ AARCH32(SYS_ID_DFR0_EL1),
+ AARCH32(SYS_ID_AFR0_EL1),
+ AARCH32(SYS_ID_MMFR0_EL1),
+ AARCH32(SYS_ID_MMFR1_EL1),
+ AARCH32(SYS_ID_MMFR2_EL1),
+ AARCH32(SYS_ID_MMFR3_EL1),
+
+ /* CRm=2 */
+ AARCH32(SYS_ID_ISAR0_EL1),
+ AARCH32(SYS_ID_ISAR1_EL1),
+ AARCH32(SYS_ID_ISAR2_EL1),
+ AARCH32(SYS_ID_ISAR3_EL1),
+ AARCH32(SYS_ID_ISAR4_EL1),
+ AARCH32(SYS_ID_ISAR5_EL1),
+ AARCH32(SYS_ID_MMFR4_EL1),
+ AARCH32(SYS_ID_ISAR6_EL1),
+
+ /* CRm=3 */
+ AARCH32(SYS_MVFR0_EL1),
+ AARCH32(SYS_MVFR1_EL1),
+ AARCH32(SYS_MVFR2_EL1),
+ AARCH32(SYS_ID_PFR2_EL1),
+ AARCH32(SYS_ID_DFR1_EL1),
+ AARCH32(SYS_ID_MMFR5_EL1),
+
+ /* AArch64 ID registers */
+ /* CRm=4 */
+ AARCH64(SYS_ID_AA64PFR0_EL1),
+ AARCH64(SYS_ID_AA64PFR1_EL1),
+ AARCH64(SYS_ID_AA64ZFR0_EL1),
+ AARCH64(SYS_ID_AA64DFR0_EL1),
+ AARCH64(SYS_ID_AA64DFR1_EL1),
+ AARCH64(SYS_ID_AA64AFR0_EL1),
+ AARCH64(SYS_ID_AA64AFR1_EL1),
+ AARCH64(SYS_ID_AA64ISAR0_EL1),
+ AARCH64(SYS_ID_AA64ISAR1_EL1),
+ AARCH64(SYS_ID_AA64MMFR0_EL1),
+ AARCH64(SYS_ID_AA64MMFR1_EL1),
+ AARCH64(SYS_ID_AA64MMFR2_EL1),
+
+ /* Scalable Vector Registers are restricted. */
+
+ RAZ_WI(SYS_ERRIDR_EL1),
+ RAZ_WI(SYS_ERRSELR_EL1),
+ RAZ_WI(SYS_ERXFR_EL1),
+ RAZ_WI(SYS_ERXCTLR_EL1),
+ RAZ_WI(SYS_ERXSTATUS_EL1),
+ RAZ_WI(SYS_ERXADDR_EL1),
+ RAZ_WI(SYS_ERXMISC0_EL1),
+ RAZ_WI(SYS_ERXMISC1_EL1),
+
+ /* Performance Monitoring Registers are restricted. */
+
+ /* Limited Ordering Regions Registers are restricted. */
+
+ HOST_HANDLED(SYS_ICC_SGI1R_EL1),
+ HOST_HANDLED(SYS_ICC_ASGI1R_EL1),
+ HOST_HANDLED(SYS_ICC_SGI0R_EL1),
+ { SYS_DESC(SYS_ICC_SRE_EL1), .access = pvm_gic_read_sre, },
+
+ HOST_HANDLED(SYS_CCSIDR_EL1),
+ HOST_HANDLED(SYS_CLIDR_EL1),
+ HOST_HANDLED(SYS_CSSELR_EL1),
+ HOST_HANDLED(SYS_CTR_EL0),
+
+ /* Performance Monitoring Registers are restricted. */
+
+ /* Activity Monitoring Registers are restricted. */
+
+ HOST_HANDLED(SYS_CNTP_TVAL_EL0),
+ HOST_HANDLED(SYS_CNTP_CTL_EL0),
+ HOST_HANDLED(SYS_CNTP_CVAL_EL0),
+
+ /* Performance Monitoring Registers are restricted. */
+};
+
+/*
+ * Checks that the sysreg table is unique and in-order.
+ *
+ * Returns 0 if the table is consistent, or 1 otherwise.
+ */
+int kvm_check_pvm_sysreg_table(void)
+{
+ unsigned int i;
+
+ for (i = 1; i < ARRAY_SIZE(pvm_sys_reg_descs); i++) {
+ if (cmp_sys_reg(&pvm_sys_reg_descs[i-1], &pvm_sys_reg_descs[i]) >= 0)
+ return 1;
+ }
+
+ return 0;
+}
+
+/*
+ * Handler for protected VM MSR, MRS or System instruction execution.
+ *
+ * Returns true if the hypervisor has handled the exit, and control should go
+ * back to the guest, or false if it hasn't, to be handled by the host.
+ */
+bool kvm_handle_pvm_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code)
+{
+ const struct sys_reg_desc *r;
+ struct sys_reg_params params;
+ unsigned long esr = kvm_vcpu_get_esr(vcpu);
+ int Rt = kvm_vcpu_sys_get_rt(vcpu);
+
+ params = esr_sys64_to_params(esr);
+ params.regval = vcpu_get_reg(vcpu, Rt);
+
+ r = find_reg(&params, pvm_sys_reg_descs, ARRAY_SIZE(pvm_sys_reg_descs));
+
+ /* Undefined (RESTRICTED). */
+ if (r == NULL) {
+ inject_undef64(vcpu);
+ return true;
+ }
+
+ /* Handled by the host (HOST_HANDLED) */
+ if (r->access == NULL)
+ return false;
+
+ /* Handled by hyp: skip instruction if instructed to do so. */
+ if (r->access(vcpu, &params, r))
+ __kvm_skip_instr(vcpu);
+
+ if (!params.is_write)
+ vcpu_set_reg(vcpu, Rt, params.regval);
+
+ return true;
+}
+
+/**
+ * Handler for protected VM restricted exceptions.
+ *
+ * Inject an undefined exception into the guest and return true to indicate that
+ * the hypervisor has handled the exit, and control should go back to the guest.
+ */
+bool kvm_handle_pvm_restricted(struct kvm_vcpu *vcpu, u64 *exit_code)
+{
+ inject_undef64(vcpu);
+ return true;
+}
diff --git a/arch/arm64/kvm/hyp/vgic-v3-sr.c b/arch/arm64/kvm/hyp/vgic-v3-sr.c
index 39f8f7f9227c..20db2f281cf2 100644
--- a/arch/arm64/kvm/hyp/vgic-v3-sr.c
+++ b/arch/arm64/kvm/hyp/vgic-v3-sr.c
@@ -695,9 +695,7 @@ static void __vgic_v3_read_iar(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
goto spurious;
lr_val &= ~ICH_LR_STATE;
- /* No active state for LPIs */
- if ((lr_val & ICH_LR_VIRTUAL_ID_MASK) <= VGIC_MAX_SPI)
- lr_val |= ICH_LR_ACTIVE_BIT;
+ lr_val |= ICH_LR_ACTIVE_BIT;
__gic_v3_set_lr(lr_val, lr);
__vgic_v3_set_active_priority(lr_prio, vmcr, grp);
vcpu_set_reg(vcpu, rt, lr_val & ICH_LR_VIRTUAL_ID_MASK);
@@ -764,20 +762,18 @@ static void __vgic_v3_write_eoir(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
/* Drop priority in any case */
act_prio = __vgic_v3_clear_highest_active_priority();
- /* If EOIing an LPI, no deactivate to be performed */
- if (vid >= VGIC_MIN_LPI)
- return;
-
- /* EOImode == 1, nothing to be done here */
- if (vmcr & ICH_VMCR_EOIM_MASK)
- return;
-
lr = __vgic_v3_find_active_lr(vcpu, vid, &lr_val);
if (lr == -1) {
- __vgic_v3_bump_eoicount();
+ /* Do not bump EOIcount for LPIs that aren't in the LRs */
+ if (!(vid >= VGIC_MIN_LPI))
+ __vgic_v3_bump_eoicount();
return;
}
+ /* EOImode == 1 and not an LPI, nothing to be done here */
+ if ((vmcr & ICH_VMCR_EOIM_MASK) && !(vid >= VGIC_MIN_LPI))
+ return;
+
lr_prio = (lr_val & ICH_LR_PRIORITY_MASK) >> ICH_LR_PRIORITY_SHIFT;
/* If priorities or group do not match, the guest has fscked-up. */
@@ -987,8 +983,6 @@ static void __vgic_v3_read_ctlr(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
val = ((vtr >> 29) & 7) << ICC_CTLR_EL1_PRI_BITS_SHIFT;
/* IDbits */
val |= ((vtr >> 23) & 7) << ICC_CTLR_EL1_ID_BITS_SHIFT;
- /* SEIS */
- val |= ((vtr >> 22) & 1) << ICC_CTLR_EL1_SEIS_SHIFT;
/* A3V */
val |= ((vtr >> 21) & 1) << ICC_CTLR_EL1_A3V_SHIFT;
/* EOImode */
diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c
index ded2c66675f0..5a2cb5d9bc4b 100644
--- a/arch/arm64/kvm/hyp/vhe/switch.c
+++ b/arch/arm64/kvm/hyp/vhe/switch.c
@@ -96,6 +96,22 @@ void deactivate_traps_vhe_put(struct kvm_vcpu *vcpu)
__deactivate_traps_common(vcpu);
}
+static const exit_handler_fn hyp_exit_handlers[] = {
+ [0 ... ESR_ELx_EC_MAX] = NULL,
+ [ESR_ELx_EC_CP15_32] = kvm_hyp_handle_cp15_32,
+ [ESR_ELx_EC_SYS64] = kvm_hyp_handle_sysreg,
+ [ESR_ELx_EC_SVE] = kvm_hyp_handle_fpsimd,
+ [ESR_ELx_EC_FP_ASIMD] = kvm_hyp_handle_fpsimd,
+ [ESR_ELx_EC_IABT_LOW] = kvm_hyp_handle_iabt_low,
+ [ESR_ELx_EC_DABT_LOW] = kvm_hyp_handle_dabt_low,
+ [ESR_ELx_EC_PAC] = kvm_hyp_handle_ptrauth,
+};
+
+static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm_vcpu *vcpu)
+{
+ return hyp_exit_handlers;
+}
+
/* Switch to the guest for VHE systems running in EL2 */
static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
{
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index 69bd1732a299..326cdfec74a1 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -512,7 +512,7 @@ int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu)
return -EINVAL;
}
- pgt = kzalloc(sizeof(*pgt), GFP_KERNEL);
+ pgt = kzalloc(sizeof(*pgt), GFP_KERNEL_ACCOUNT);
if (!pgt)
return -ENOMEM;
diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c
index 2af3c37445e0..a5e4bbf5e68f 100644
--- a/arch/arm64/kvm/pmu-emul.c
+++ b/arch/arm64/kvm/pmu-emul.c
@@ -978,7 +978,7 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
mutex_lock(&vcpu->kvm->lock);
if (!vcpu->kvm->arch.pmu_filter) {
- vcpu->kvm->arch.pmu_filter = bitmap_alloc(nr_events, GFP_KERNEL);
+ vcpu->kvm->arch.pmu_filter = bitmap_alloc(nr_events, GFP_KERNEL_ACCOUNT);
if (!vcpu->kvm->arch.pmu_filter) {
mutex_unlock(&vcpu->kvm->lock);
return -ENOMEM;
diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
index 09cd30a9aafb..426bd7fbc3fd 100644
--- a/arch/arm64/kvm/reset.c
+++ b/arch/arm64/kvm/reset.c
@@ -106,7 +106,7 @@ static int kvm_vcpu_finalize_sve(struct kvm_vcpu *vcpu)
vl > SVE_VL_ARCH_MAX))
return -EIO;
- buf = kzalloc(SVE_SIG_REGS_SIZE(sve_vq_from_vl(vl)), GFP_KERNEL);
+ buf = kzalloc(SVE_SIG_REGS_SIZE(sve_vq_from_vl(vl)), GFP_KERNEL_ACCOUNT);
if (!buf)
return -ENOMEM;
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 1d46e185f31e..e3ec1a44f94d 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -1064,7 +1064,12 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu,
struct sys_reg_desc const *r, bool raz)
{
u32 id = reg_to_encoding(r);
- u64 val = raz ? 0 : read_sanitised_ftr_reg(id);
+ u64 val;
+
+ if (raz)
+ return 0;
+
+ val = read_sanitised_ftr_reg(id);
switch (id) {
case SYS_ID_AA64PFR0_EL1:
@@ -1075,16 +1080,15 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu,
val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_CSV2), (u64)vcpu->kvm->arch.pfr0_csv2);
val &= ~ARM64_FEATURE_MASK(ID_AA64PFR0_CSV3);
val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_CSV3), (u64)vcpu->kvm->arch.pfr0_csv3);
+ if (irqchip_in_kernel(vcpu->kvm) &&
+ vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) {
+ val &= ~ARM64_FEATURE_MASK(ID_AA64PFR0_GIC);
+ val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_GIC), 1);
+ }
break;
case SYS_ID_AA64PFR1_EL1:
- val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_MTE);
- if (kvm_has_mte(vcpu->kvm)) {
- u64 pfr, mte;
-
- pfr = read_sanitised_ftr_reg(SYS_ID_AA64PFR1_EL1);
- mte = cpuid_feature_extract_unsigned_field(pfr, ID_AA64PFR1_MTE_SHIFT);
- val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR1_MTE), mte);
- }
+ if (!kvm_has_mte(vcpu->kvm))
+ val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_MTE);
break;
case SYS_ID_AA64ISAR1_EL1:
if (!vcpu_has_ptrauth(vcpu))
@@ -1268,16 +1272,19 @@ static int set_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
return __set_id_reg(vcpu, rd, uaddr, raz);
}
-static int get_raz_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
+static int set_raz_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
const struct kvm_one_reg *reg, void __user *uaddr)
{
- return __get_id_reg(vcpu, rd, uaddr, true);
+ return __set_id_reg(vcpu, rd, uaddr, true);
}
-static int set_raz_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
- const struct kvm_one_reg *reg, void __user *uaddr)
+static int get_raz_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
+ const struct kvm_one_reg *reg, void __user *uaddr)
{
- return __set_id_reg(vcpu, rd, uaddr, true);
+ const u64 id = sys_reg_to_index(rd);
+ const u64 val = 0;
+
+ return reg_to_user(uaddr, &val, id);
}
static int set_wi_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
@@ -1388,7 +1395,7 @@ static unsigned int mte_visibility(const struct kvm_vcpu *vcpu,
#define ID_UNALLOCATED(crm, op2) { \
Op0(3), Op1(0), CRn(0), CRm(crm), Op2(op2), \
.access = access_raz_id_reg, \
- .get_user = get_raz_id_reg, \
+ .get_user = get_raz_reg, \
.set_user = set_raz_id_reg, \
}
@@ -1400,7 +1407,7 @@ static unsigned int mte_visibility(const struct kvm_vcpu *vcpu,
#define ID_HIDDEN(name) { \
SYS_DESC(SYS_##name), \
.access = access_raz_id_reg, \
- .get_user = get_raz_id_reg, \
+ .get_user = get_raz_reg, \
.set_user = set_raz_id_reg, \
}
@@ -1642,7 +1649,7 @@ static const struct sys_reg_desc sys_reg_descs[] = {
* previously (and pointlessly) advertised in the past...
*/
{ PMU_SYS_REG(SYS_PMSWINC_EL0),
- .get_user = get_raz_id_reg, .set_user = set_wi_reg,
+ .get_user = get_raz_reg, .set_user = set_wi_reg,
.access = access_pmswinc, .reset = NULL },
{ PMU_SYS_REG(SYS_PMSELR_EL0),
.access = access_pmselr, .reset = reset_pmselr, .reg = PMSELR_EL0 },
diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c
index 340c51d87677..0a06d0648970 100644
--- a/arch/arm64/kvm/vgic/vgic-init.c
+++ b/arch/arm64/kvm/vgic/vgic-init.c
@@ -134,7 +134,7 @@ static int kvm_vgic_dist_init(struct kvm *kvm, unsigned int nr_spis)
struct kvm_vcpu *vcpu0 = kvm_get_vcpu(kvm, 0);
int i;
- dist->spis = kcalloc(nr_spis, sizeof(struct vgic_irq), GFP_KERNEL);
+ dist->spis = kcalloc(nr_spis, sizeof(struct vgic_irq), GFP_KERNEL_ACCOUNT);
if (!dist->spis)
return -ENOMEM;
diff --git a/arch/arm64/kvm/vgic/vgic-irqfd.c b/arch/arm64/kvm/vgic/vgic-irqfd.c
index 79f8899b234c..475059bacedf 100644
--- a/arch/arm64/kvm/vgic/vgic-irqfd.c
+++ b/arch/arm64/kvm/vgic/vgic-irqfd.c
@@ -139,7 +139,7 @@ int kvm_vgic_setup_default_irq_routing(struct kvm *kvm)
u32 nr = dist->nr_spis;
int i, ret;
- entries = kcalloc(nr, sizeof(*entries), GFP_KERNEL);
+ entries = kcalloc(nr, sizeof(*entries), GFP_KERNEL_ACCOUNT);
if (!entries)
return -ENOMEM;
diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c
index 61728c543eb9..089fc2ffcb43 100644
--- a/arch/arm64/kvm/vgic/vgic-its.c
+++ b/arch/arm64/kvm/vgic/vgic-its.c
@@ -48,7 +48,7 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid,
if (irq)
return irq;
- irq = kzalloc(sizeof(struct vgic_irq), GFP_KERNEL);
+ irq = kzalloc(sizeof(struct vgic_irq), GFP_KERNEL_ACCOUNT);
if (!irq)
return ERR_PTR(-ENOMEM);
@@ -332,7 +332,7 @@ int vgic_copy_lpi_list(struct kvm *kvm, struct kvm_vcpu *vcpu, u32 **intid_ptr)
* we must be careful not to overrun the array.
*/
irq_count = READ_ONCE(dist->lpi_list_count);
- intids = kmalloc_array(irq_count, sizeof(intids[0]), GFP_KERNEL);
+ intids = kmalloc_array(irq_count, sizeof(intids[0]), GFP_KERNEL_ACCOUNT);
if (!intids)
return -ENOMEM;
@@ -985,7 +985,7 @@ static int vgic_its_alloc_collection(struct vgic_its *its,
if (!vgic_its_check_id(its, its->baser_coll_table, coll_id, NULL))
return E_ITS_MAPC_COLLECTION_OOR;
- collection = kzalloc(sizeof(*collection), GFP_KERNEL);
+ collection = kzalloc(sizeof(*collection), GFP_KERNEL_ACCOUNT);
if (!collection)
return -ENOMEM;
@@ -1029,7 +1029,7 @@ static struct its_ite *vgic_its_alloc_ite(struct its_device *device,
{
struct its_ite *ite;
- ite = kzalloc(sizeof(*ite), GFP_KERNEL);
+ ite = kzalloc(sizeof(*ite), GFP_KERNEL_ACCOUNT);
if (!ite)
return ERR_PTR(-ENOMEM);
@@ -1150,7 +1150,7 @@ static struct its_device *vgic_its_alloc_device(struct vgic_its *its,
{
struct its_device *device;
- device = kzalloc(sizeof(*device), GFP_KERNEL);
+ device = kzalloc(sizeof(*device), GFP_KERNEL_ACCOUNT);
if (!device)
return ERR_PTR(-ENOMEM);
@@ -1847,7 +1847,7 @@ void vgic_lpi_translation_cache_init(struct kvm *kvm)
struct vgic_translation_cache_entry *cte;
/* An allocation failure is not fatal */
- cte = kzalloc(sizeof(*cte), GFP_KERNEL);
+ cte = kzalloc(sizeof(*cte), GFP_KERNEL_ACCOUNT);
if (WARN_ON(!cte))
break;
@@ -1888,7 +1888,7 @@ static int vgic_its_create(struct kvm_device *dev, u32 type)
if (type != KVM_DEV_TYPE_ARM_VGIC_ITS)
return -ENODEV;
- its = kzalloc(sizeof(struct vgic_its), GFP_KERNEL);
+ its = kzalloc(sizeof(struct vgic_its), GFP_KERNEL_ACCOUNT);
if (!its)
return -ENOMEM;
@@ -2710,8 +2710,8 @@ static int vgic_its_set_attr(struct kvm_device *dev,
if (copy_from_user(&addr, uaddr, sizeof(addr)))
return -EFAULT;
- ret = vgic_check_ioaddr(dev->kvm, &its->vgic_its_base,
- addr, SZ_64K);
+ ret = vgic_check_iorange(dev->kvm, its->vgic_its_base,
+ addr, SZ_64K, KVM_VGIC_V3_ITS_SIZE);
if (ret)
return ret;
diff --git a/arch/arm64/kvm/vgic/vgic-kvm-device.c b/arch/arm64/kvm/vgic/vgic-kvm-device.c
index 7740995de982..0d000d2fe8d2 100644
--- a/arch/arm64/kvm/vgic/vgic-kvm-device.c
+++ b/arch/arm64/kvm/vgic/vgic-kvm-device.c
@@ -14,17 +14,21 @@
/* common helpers */
-int vgic_check_ioaddr(struct kvm *kvm, phys_addr_t *ioaddr,
- phys_addr_t addr, phys_addr_t alignment)
+int vgic_check_iorange(struct kvm *kvm, phys_addr_t ioaddr,
+ phys_addr_t addr, phys_addr_t alignment,
+ phys_addr_t size)
{
- if (addr & ~kvm_phys_mask(kvm))
- return -E2BIG;
+ if (!IS_VGIC_ADDR_UNDEF(ioaddr))
+ return -EEXIST;
- if (!IS_ALIGNED(addr, alignment))
+ if (!IS_ALIGNED(addr, alignment) || !IS_ALIGNED(size, alignment))
return -EINVAL;
- if (!IS_VGIC_ADDR_UNDEF(*ioaddr))
- return -EEXIST;
+ if (addr + size < addr)
+ return -EINVAL;
+
+ if (addr & ~kvm_phys_mask(kvm) || addr + size > kvm_phys_size(kvm))
+ return -E2BIG;
return 0;
}
@@ -57,7 +61,7 @@ int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write)
{
int r = 0;
struct vgic_dist *vgic = &kvm->arch.vgic;
- phys_addr_t *addr_ptr, alignment;
+ phys_addr_t *addr_ptr, alignment, size;
u64 undef_value = VGIC_ADDR_UNDEF;
mutex_lock(&kvm->lock);
@@ -66,16 +70,19 @@ int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write)
r = vgic_check_type(kvm, KVM_DEV_TYPE_ARM_VGIC_V2);
addr_ptr = &vgic->vgic_dist_base;
alignment = SZ_4K;
+ size = KVM_VGIC_V2_DIST_SIZE;
break;
case KVM_VGIC_V2_ADDR_TYPE_CPU:
r = vgic_check_type(kvm, KVM_DEV_TYPE_ARM_VGIC_V2);
addr_ptr = &vgic->vgic_cpu_base;
alignment = SZ_4K;
+ size = KVM_VGIC_V2_CPU_SIZE;
break;
case KVM_VGIC_V3_ADDR_TYPE_DIST:
r = vgic_check_type(kvm, KVM_DEV_TYPE_ARM_VGIC_V3);
addr_ptr = &vgic->vgic_dist_base;
alignment = SZ_64K;
+ size = KVM_VGIC_V3_DIST_SIZE;
break;
case KVM_VGIC_V3_ADDR_TYPE_REDIST: {
struct vgic_redist_region *rdreg;
@@ -140,7 +147,7 @@ int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write)
goto out;
if (write) {
- r = vgic_check_ioaddr(kvm, addr_ptr, *addr, alignment);
+ r = vgic_check_iorange(kvm, *addr_ptr, *addr, alignment, size);
if (!r)
*addr_ptr = *addr;
} else {
diff --git a/arch/arm64/kvm/vgic/vgic-mmio-v3.c b/arch/arm64/kvm/vgic/vgic-mmio-v3.c
index a09cdc0b953c..bf7ec4a78497 100644
--- a/arch/arm64/kvm/vgic/vgic-mmio-v3.c
+++ b/arch/arm64/kvm/vgic/vgic-mmio-v3.c
@@ -796,7 +796,9 @@ static int vgic_v3_alloc_redist_region(struct kvm *kvm, uint32_t index,
struct vgic_dist *d = &kvm->arch.vgic;
struct vgic_redist_region *rdreg;
struct list_head *rd_regions = &d->rd_regions;
- size_t size = count * KVM_VGIC_V3_REDIST_SIZE;
+ int nr_vcpus = atomic_read(&kvm->online_vcpus);
+ size_t size = count ? count * KVM_VGIC_V3_REDIST_SIZE
+ : nr_vcpus * KVM_VGIC_V3_REDIST_SIZE;
int ret;
/* cross the end of memory ? */
@@ -834,13 +836,13 @@ static int vgic_v3_alloc_redist_region(struct kvm *kvm, uint32_t index,
if (vgic_v3_rdist_overlap(kvm, base, size))
return -EINVAL;
- rdreg = kzalloc(sizeof(*rdreg), GFP_KERNEL);
+ rdreg = kzalloc(sizeof(*rdreg), GFP_KERNEL_ACCOUNT);
if (!rdreg)
return -ENOMEM;
rdreg->base = VGIC_ADDR_UNDEF;
- ret = vgic_check_ioaddr(kvm, &rdreg->base, base, SZ_64K);
+ ret = vgic_check_iorange(kvm, rdreg->base, base, SZ_64K, size);
if (ret)
goto free;
diff --git a/arch/arm64/kvm/vgic/vgic-v3.c b/arch/arm64/kvm/vgic/vgic-v3.c
index 21a6207fb2ee..04f62c4b07fb 100644
--- a/arch/arm64/kvm/vgic/vgic-v3.c
+++ b/arch/arm64/kvm/vgic/vgic-v3.c
@@ -15,6 +15,7 @@
static bool group0_trap;
static bool group1_trap;
static bool common_trap;
+static bool dir_trap;
static bool gicv4_enable;
void vgic_v3_set_underflow(struct kvm_vcpu *vcpu)
@@ -296,6 +297,8 @@ void vgic_v3_enable(struct kvm_vcpu *vcpu)
vgic_v3->vgic_hcr |= ICH_HCR_TALL1;
if (common_trap)
vgic_v3->vgic_hcr |= ICH_HCR_TC;
+ if (dir_trap)
+ vgic_v3->vgic_hcr |= ICH_HCR_TDIR;
}
int vgic_v3_lpi_sync_pending_status(struct kvm *kvm, struct vgic_irq *irq)
@@ -483,8 +486,10 @@ bool vgic_v3_check_base(struct kvm *kvm)
return false;
list_for_each_entry(rdreg, &d->rd_regions, list) {
- if (rdreg->base + vgic_v3_rd_region_size(kvm, rdreg) <
- rdreg->base)
+ size_t sz = vgic_v3_rd_region_size(kvm, rdreg);
+
+ if (vgic_check_iorange(kvm, VGIC_ADDR_UNDEF,
+ rdreg->base, SZ_64K, sz))
return false;
}
@@ -671,11 +676,23 @@ int vgic_v3_probe(const struct gic_kvm_info *info)
group1_trap = true;
}
- if (group0_trap || group1_trap || common_trap) {
- kvm_info("GICv3 sysreg trapping enabled ([%s%s%s], reduced performance)\n",
+ if (kvm_vgic_global_state.ich_vtr_el2 & ICH_VTR_SEIS_MASK) {
+ kvm_info("GICv3 with locally generated SEI\n");
+
+ group0_trap = true;
+ group1_trap = true;
+ if (ich_vtr_el2 & ICH_VTR_TDS_MASK)
+ dir_trap = true;
+ else
+ common_trap = true;
+ }
+
+ if (group0_trap || group1_trap || common_trap | dir_trap) {
+ kvm_info("GICv3 sysreg trapping enabled ([%s%s%s%s], reduced performance)\n",
group0_trap ? "G0" : "",
group1_trap ? "G1" : "",
- common_trap ? "C" : "");
+ common_trap ? "C" : "",
+ dir_trap ? "D" : "");
static_branch_enable(&vgic_v3_cpuif_trap);
}
diff --git a/arch/arm64/kvm/vgic/vgic-v4.c b/arch/arm64/kvm/vgic/vgic-v4.c
index c1845d8f5f7e..772dd15a22c7 100644
--- a/arch/arm64/kvm/vgic/vgic-v4.c
+++ b/arch/arm64/kvm/vgic/vgic-v4.c
@@ -246,7 +246,7 @@ int vgic_v4_init(struct kvm *kvm)
nr_vcpus = atomic_read(&kvm->online_vcpus);
dist->its_vm.vpes = kcalloc(nr_vcpus, sizeof(*dist->its_vm.vpes),
- GFP_KERNEL);
+ GFP_KERNEL_ACCOUNT);
if (!dist->its_vm.vpes)
return -ENOMEM;
diff --git a/arch/arm64/kvm/vgic/vgic.h b/arch/arm64/kvm/vgic/vgic.h
index 14a9218641f5..3fd6c86a7ef3 100644
--- a/arch/arm64/kvm/vgic/vgic.h
+++ b/arch/arm64/kvm/vgic/vgic.h
@@ -172,8 +172,9 @@ void vgic_kick_vcpus(struct kvm *kvm);
void vgic_irq_handle_resampling(struct vgic_irq *irq,
bool lr_deactivated, bool lr_pending);
-int vgic_check_ioaddr(struct kvm *kvm, phys_addr_t *ioaddr,
- phys_addr_t addr, phys_addr_t alignment);
+int vgic_check_iorange(struct kvm *kvm, phys_addr_t ioaddr,
+ phys_addr_t addr, phys_addr_t alignment,
+ phys_addr_t size);
void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu);
void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr);