summaryrefslogtreecommitdiff
path: root/arch/arm64/kvm/hyp
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arm64/kvm/hyp')
-rw-r--r--arch/arm64/kvm/hyp/Makefile2
-rw-r--r--arch/arm64/kvm/hyp/fpsimd.S8
-rw-r--r--arch/arm64/kvm/hyp/hyp-constants.c10
-rw-r--r--arch/arm64/kvm/hyp/hyp-entry.S2
-rw-r--r--arch/arm64/kvm/hyp/include/hyp/fault.h75
-rw-r--r--arch/arm64/kvm/hyp/include/hyp/switch.h289
-rw-r--r--arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h7
-rw-r--r--arch/arm64/kvm/hyp/include/nvhe/fixed_config.h200
-rw-r--r--arch/arm64/kvm/hyp/include/nvhe/gfp.h1
-rw-r--r--arch/arm64/kvm/hyp/include/nvhe/mem_protect.h6
-rw-r--r--arch/arm64/kvm/hyp/include/nvhe/mm.h59
-rw-r--r--arch/arm64/kvm/hyp/include/nvhe/trap_handler.h2
-rw-r--r--arch/arm64/kvm/hyp/nvhe/Makefile3
-rw-r--r--arch/arm64/kvm/hyp/nvhe/early_alloc.c5
-rw-r--r--arch/arm64/kvm/hyp/nvhe/host.S26
-rw-r--r--arch/arm64/kvm/hyp/nvhe/hyp-main.c56
-rw-r--r--arch/arm64/kvm/hyp/nvhe/mem_protect.c529
-rw-r--r--arch/arm64/kvm/hyp/nvhe/mm.c4
-rw-r--r--arch/arm64/kvm/hyp/nvhe/page_alloc.c17
-rw-r--r--arch/arm64/kvm/hyp/nvhe/pkvm.c185
-rw-r--r--arch/arm64/kvm/hyp/nvhe/setup.c42
-rw-r--r--arch/arm64/kvm/hyp/nvhe/switch.c94
-rw-r--r--arch/arm64/kvm/hyp/nvhe/sys_regs.c487
-rw-r--r--arch/arm64/kvm/hyp/pgtable.c108
-rw-r--r--arch/arm64/kvm/hyp/reserved_mem.c109
-rw-r--r--arch/arm64/kvm/hyp/vgic-v3-sr.c22
-rw-r--r--arch/arm64/kvm/hyp/vhe/switch.c21
27 files changed, 1869 insertions, 500 deletions
diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile
index b726332eec49..687598e41b21 100644
--- a/arch/arm64/kvm/hyp/Makefile
+++ b/arch/arm64/kvm/hyp/Makefile
@@ -10,4 +10,4 @@ subdir-ccflags-y := -I$(incdir) \
-DDISABLE_BRANCH_PROFILING \
$(DISABLE_STACKLEAK_PLUGIN)
-obj-$(CONFIG_KVM) += vhe/ nvhe/ pgtable.o reserved_mem.o
+obj-$(CONFIG_KVM) += vhe/ nvhe/ pgtable.o
diff --git a/arch/arm64/kvm/hyp/fpsimd.S b/arch/arm64/kvm/hyp/fpsimd.S
index 3c635929771a..61e6f3ba7b7d 100644
--- a/arch/arm64/kvm/hyp/fpsimd.S
+++ b/arch/arm64/kvm/hyp/fpsimd.S
@@ -21,11 +21,7 @@ SYM_FUNC_START(__fpsimd_restore_state)
SYM_FUNC_END(__fpsimd_restore_state)
SYM_FUNC_START(__sve_restore_state)
- __sve_load 0, x1, 2
+ mov x2, #1
+ sve_load 0, x1, x2, 3
ret
SYM_FUNC_END(__sve_restore_state)
-
-SYM_FUNC_START(__sve_save_state)
- sve_save 0, x1, 2
- ret
-SYM_FUNC_END(__sve_save_state)
diff --git a/arch/arm64/kvm/hyp/hyp-constants.c b/arch/arm64/kvm/hyp/hyp-constants.c
new file mode 100644
index 000000000000..b3742a6691e8
--- /dev/null
+++ b/arch/arm64/kvm/hyp/hyp-constants.c
@@ -0,0 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/kbuild.h>
+#include <nvhe/memory.h>
+
+int main(void)
+{
+ DEFINE(STRUCT_HYP_PAGE_SIZE, sizeof(struct hyp_page));
+ return 0;
+}
diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S
index 9aa9b73475c9..b6b6801d96d5 100644
--- a/arch/arm64/kvm/hyp/hyp-entry.S
+++ b/arch/arm64/kvm/hyp/hyp-entry.S
@@ -44,7 +44,7 @@
el1_sync: // Guest trapped into EL2
mrs x0, esr_el2
- lsr x0, x0, #ESR_ELx_EC_SHIFT
+ ubfx x0, x0, #ESR_ELx_EC_SHIFT, #ESR_ELx_EC_WIDTH
cmp x0, #ESR_ELx_EC_HVC64
ccmp x0, #ESR_ELx_EC_HVC32, #4, ne
b.ne el1_trap
diff --git a/arch/arm64/kvm/hyp/include/hyp/fault.h b/arch/arm64/kvm/hyp/include/hyp/fault.h
new file mode 100644
index 000000000000..1b8a2dcd712f
--- /dev/null
+++ b/arch/arm64/kvm/hyp/include/hyp/fault.h
@@ -0,0 +1,75 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2015 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ */
+
+#ifndef __ARM64_KVM_HYP_FAULT_H__
+#define __ARM64_KVM_HYP_FAULT_H__
+
+#include <asm/kvm_asm.h>
+#include <asm/kvm_emulate.h>
+#include <asm/kvm_hyp.h>
+#include <asm/kvm_mmu.h>
+
+static inline bool __translate_far_to_hpfar(u64 far, u64 *hpfar)
+{
+ u64 par, tmp;
+
+ /*
+ * Resolve the IPA the hard way using the guest VA.
+ *
+ * Stage-1 translation already validated the memory access
+ * rights. As such, we can use the EL1 translation regime, and
+ * don't have to distinguish between EL0 and EL1 access.
+ *
+ * We do need to save/restore PAR_EL1 though, as we haven't
+ * saved the guest context yet, and we may return early...
+ */
+ par = read_sysreg_par();
+ if (!__kvm_at("s1e1r", far))
+ tmp = read_sysreg_par();
+ else
+ tmp = SYS_PAR_EL1_F; /* back to the guest */
+ write_sysreg(par, par_el1);
+
+ if (unlikely(tmp & SYS_PAR_EL1_F))
+ return false; /* Translation failed, back to guest */
+
+ /* Convert PAR to HPFAR format */
+ *hpfar = PAR_TO_HPFAR(tmp);
+ return true;
+}
+
+static inline bool __get_fault_info(u64 esr, struct kvm_vcpu_fault_info *fault)
+{
+ u64 hpfar, far;
+
+ far = read_sysreg_el2(SYS_FAR);
+
+ /*
+ * The HPFAR can be invalid if the stage 2 fault did not
+ * happen during a stage 1 page table walk (the ESR_EL2.S1PTW
+ * bit is clear) and one of the two following cases are true:
+ * 1. The fault was due to a permission fault
+ * 2. The processor carries errata 834220
+ *
+ * Therefore, for all non S1PTW faults where we either have a
+ * permission fault or the errata workaround is enabled, we
+ * resolve the IPA using the AT instruction.
+ */
+ if (!(esr & ESR_ELx_S1PTW) &&
+ (cpus_have_final_cap(ARM64_WORKAROUND_834220) ||
+ (esr & ESR_ELx_FSC_TYPE) == FSC_PERM)) {
+ if (!__translate_far_to_hpfar(far, &hpfar))
+ return false;
+ } else {
+ hpfar = read_sysreg(hpfar_el2);
+ }
+
+ fault->far_el2 = far;
+ fault->hpfar_el2 = hpfar;
+ return true;
+}
+
+#endif
diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h
index a0e78a6027be..58e14f8ead23 100644
--- a/arch/arm64/kvm/hyp/include/hyp/switch.h
+++ b/arch/arm64/kvm/hyp/include/hyp/switch.h
@@ -8,6 +8,7 @@
#define __ARM64_KVM_HYP_SWITCH_H__
#include <hyp/adjust_pc.h>
+#include <hyp/fault.h>
#include <linux/arm-smccc.h>
#include <linux/kvm_host.h>
@@ -28,10 +29,13 @@
#include <asm/fpsimd.h>
#include <asm/debug-monitors.h>
#include <asm/processor.h>
-#include <asm/thread_info.h>
-extern struct exception_table_entry __start___kvm_ex_table;
-extern struct exception_table_entry __stop___kvm_ex_table;
+struct kvm_exception_table_entry {
+ int insn, fixup;
+};
+
+extern struct kvm_exception_table_entry __start___kvm_ex_table;
+extern struct kvm_exception_table_entry __stop___kvm_ex_table;
/* Check whether the FP regs were dirtied while in the host-side run loop: */
static inline bool update_fp_enabled(struct kvm_vcpu *vcpu)
@@ -44,7 +48,7 @@ static inline bool update_fp_enabled(struct kvm_vcpu *vcpu)
* trap the accesses.
*/
if (!system_supports_fpsimd() ||
- vcpu->arch.host_thread_info->flags & _TIF_FOREIGN_FPSTATE)
+ vcpu->arch.flags & KVM_ARM64_FP_FOREIGN_FPSTATE)
vcpu->arch.flags &= ~(KVM_ARM64_FP_ENABLED |
KVM_ARM64_FP_HOST);
@@ -133,88 +137,9 @@ static inline void ___deactivate_traps(struct kvm_vcpu *vcpu)
}
}
-static inline bool __translate_far_to_hpfar(u64 far, u64 *hpfar)
-{
- u64 par, tmp;
-
- /*
- * Resolve the IPA the hard way using the guest VA.
- *
- * Stage-1 translation already validated the memory access
- * rights. As such, we can use the EL1 translation regime, and
- * don't have to distinguish between EL0 and EL1 access.
- *
- * We do need to save/restore PAR_EL1 though, as we haven't
- * saved the guest context yet, and we may return early...
- */
- par = read_sysreg_par();
- if (!__kvm_at("s1e1r", far))
- tmp = read_sysreg_par();
- else
- tmp = SYS_PAR_EL1_F; /* back to the guest */
- write_sysreg(par, par_el1);
-
- if (unlikely(tmp & SYS_PAR_EL1_F))
- return false; /* Translation failed, back to guest */
-
- /* Convert PAR to HPFAR format */
- *hpfar = PAR_TO_HPFAR(tmp);
- return true;
-}
-
-static inline bool __get_fault_info(u64 esr, struct kvm_vcpu_fault_info *fault)
-{
- u64 hpfar, far;
-
- far = read_sysreg_el2(SYS_FAR);
-
- /*
- * The HPFAR can be invalid if the stage 2 fault did not
- * happen during a stage 1 page table walk (the ESR_EL2.S1PTW
- * bit is clear) and one of the two following cases are true:
- * 1. The fault was due to a permission fault
- * 2. The processor carries errata 834220
- *
- * Therefore, for all non S1PTW faults where we either have a
- * permission fault or the errata workaround is enabled, we
- * resolve the IPA using the AT instruction.
- */
- if (!(esr & ESR_ELx_S1PTW) &&
- (cpus_have_final_cap(ARM64_WORKAROUND_834220) ||
- (esr & ESR_ELx_FSC_TYPE) == FSC_PERM)) {
- if (!__translate_far_to_hpfar(far, &hpfar))
- return false;
- } else {
- hpfar = read_sysreg(hpfar_el2);
- }
-
- fault->far_el2 = far;
- fault->hpfar_el2 = hpfar;
- return true;
-}
-
static inline bool __populate_fault_info(struct kvm_vcpu *vcpu)
{
- u8 ec;
- u64 esr;
-
- esr = vcpu->arch.fault.esr_el2;
- ec = ESR_ELx_EC(esr);
-
- if (ec != ESR_ELx_EC_DABT_LOW && ec != ESR_ELx_EC_IABT_LOW)
- return true;
-
- return __get_fault_info(esr, &vcpu->arch.fault);
-}
-
-static inline void __hyp_sve_save_host(struct kvm_vcpu *vcpu)
-{
- struct thread_struct *thread;
-
- thread = container_of(vcpu->arch.host_fpsimd_state, struct thread_struct,
- uw.fpsimd_state);
-
- __sve_save_state(sve_pffr(thread), &vcpu->arch.host_fpsimd_state->fpsr);
+ return __get_fault_info(vcpu->arch.fault.esr_el2, &vcpu->arch.fault);
}
static inline void __hyp_sve_restore_guest(struct kvm_vcpu *vcpu)
@@ -225,28 +150,23 @@ static inline void __hyp_sve_restore_guest(struct kvm_vcpu *vcpu)
write_sysreg_el1(__vcpu_sys_reg(vcpu, ZCR_EL1), SYS_ZCR);
}
-/* Check for an FPSIMD/SVE trap and handle as appropriate */
-static inline bool __hyp_handle_fpsimd(struct kvm_vcpu *vcpu)
+/*
+ * We trap the first access to the FP/SIMD to save the host context and
+ * restore the guest context lazily.
+ * If FP/SIMD is not implemented, handle the trap and inject an undefined
+ * instruction exception to the guest. Similarly for trapped SVE accesses.
+ */
+static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code)
{
- bool sve_guest, sve_host;
+ bool sve_guest;
u8 esr_ec;
u64 reg;
if (!system_supports_fpsimd())
return false;
- if (system_supports_sve()) {
- sve_guest = vcpu_has_sve(vcpu);
- sve_host = vcpu->arch.flags & KVM_ARM64_HOST_SVE_IN_USE;
- } else {
- sve_guest = false;
- sve_host = false;
- }
-
+ sve_guest = vcpu_has_sve(vcpu);
esr_ec = kvm_vcpu_trap_get_class(vcpu);
- if (esr_ec != ESR_ELx_EC_FP_ASIMD &&
- esr_ec != ESR_ELx_EC_SVE)
- return false;
/* Don't handle SVE traps for non-SVE vcpus here: */
if (!sve_guest && esr_ec != ESR_ELx_EC_FP_ASIMD)
@@ -269,11 +189,7 @@ static inline bool __hyp_handle_fpsimd(struct kvm_vcpu *vcpu)
isb();
if (vcpu->arch.flags & KVM_ARM64_FP_HOST) {
- if (sve_host)
- __hyp_sve_save_host(vcpu);
- else
- __fpsimd_save_state(vcpu->arch.host_fpsimd_state);
-
+ __fpsimd_save_state(vcpu->arch.host_fpsimd_state);
vcpu->arch.flags &= ~KVM_ARM64_FP_HOST;
}
@@ -348,14 +264,6 @@ static inline bool handle_tx2_tvm(struct kvm_vcpu *vcpu)
static inline bool esr_is_ptrauth_trap(u32 esr)
{
- u32 ec = ESR_ELx_EC(esr);
-
- if (ec == ESR_ELx_EC_PAC)
- return true;
-
- if (ec != ESR_ELx_EC_SYS64)
- return false;
-
switch (esr_sys64_to_sysreg(esr)) {
case SYS_APIAKEYLO_EL1:
case SYS_APIAKEYHI_EL1:
@@ -384,13 +292,12 @@ static inline bool esr_is_ptrauth_trap(u32 esr)
DECLARE_PER_CPU(struct kvm_cpu_context, kvm_hyp_ctxt);
-static inline bool __hyp_handle_ptrauth(struct kvm_vcpu *vcpu)
+static bool kvm_hyp_handle_ptrauth(struct kvm_vcpu *vcpu, u64 *exit_code)
{
struct kvm_cpu_context *ctxt;
u64 val;
- if (!vcpu_has_ptrauth(vcpu) ||
- !esr_is_ptrauth_trap(kvm_vcpu_get_esr(vcpu)))
+ if (!vcpu_has_ptrauth(vcpu))
return false;
ctxt = this_cpu_ptr(&kvm_hyp_ctxt);
@@ -409,6 +316,92 @@ static inline bool __hyp_handle_ptrauth(struct kvm_vcpu *vcpu)
return true;
}
+static bool kvm_hyp_handle_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code)
+{
+ if (cpus_have_final_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM) &&
+ handle_tx2_tvm(vcpu))
+ return true;
+
+ if (static_branch_unlikely(&vgic_v3_cpuif_trap) &&
+ __vgic_v3_perform_cpuif_access(vcpu) == 1)
+ return true;
+
+ if (esr_is_ptrauth_trap(kvm_vcpu_get_esr(vcpu)))
+ return kvm_hyp_handle_ptrauth(vcpu, exit_code);
+
+ return false;
+}
+
+static bool kvm_hyp_handle_cp15_32(struct kvm_vcpu *vcpu, u64 *exit_code)
+{
+ if (static_branch_unlikely(&vgic_v3_cpuif_trap) &&
+ __vgic_v3_perform_cpuif_access(vcpu) == 1)
+ return true;
+
+ return false;
+}
+
+static bool kvm_hyp_handle_iabt_low(struct kvm_vcpu *vcpu, u64 *exit_code)
+{
+ if (!__populate_fault_info(vcpu))
+ return true;
+
+ return false;
+}
+
+static bool kvm_hyp_handle_dabt_low(struct kvm_vcpu *vcpu, u64 *exit_code)
+{
+ if (!__populate_fault_info(vcpu))
+ return true;
+
+ if (static_branch_unlikely(&vgic_v2_cpuif_trap)) {
+ bool valid;
+
+ valid = kvm_vcpu_trap_get_fault_type(vcpu) == FSC_FAULT &&
+ kvm_vcpu_dabt_isvalid(vcpu) &&
+ !kvm_vcpu_abt_issea(vcpu) &&
+ !kvm_vcpu_abt_iss1tw(vcpu);
+
+ if (valid) {
+ int ret = __vgic_v2_perform_cpuif_access(vcpu);
+
+ if (ret == 1)
+ return true;
+
+ /* Promote an illegal access to an SError.*/
+ if (ret == -1)
+ *exit_code = ARM_EXCEPTION_EL1_SERROR;
+ }
+ }
+
+ return false;
+}
+
+typedef bool (*exit_handler_fn)(struct kvm_vcpu *, u64 *);
+
+static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm_vcpu *vcpu);
+
+static void early_exit_filter(struct kvm_vcpu *vcpu, u64 *exit_code);
+
+/*
+ * Allow the hypervisor to handle the exit with an exit handler if it has one.
+ *
+ * Returns true if the hypervisor handled the exit, and control should go back
+ * to the guest, or false if it hasn't.
+ */
+static inline bool kvm_hyp_handle_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
+{
+ const exit_handler_fn *handlers = kvm_get_exit_handler_array(vcpu);
+ exit_handler_fn fn;
+
+ fn = handlers[kvm_vcpu_trap_get_class(vcpu)];
+
+ if (fn)
+ return fn(vcpu, exit_code);
+
+ return false;
+}
+
/*
* Return true when we were able to fixup the guest exit and should return to
* the guest, false when we should restore the host state and return to the
@@ -416,6 +409,18 @@ static inline bool __hyp_handle_ptrauth(struct kvm_vcpu *vcpu)
*/
static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
{
+ /*
+ * Save PSTATE early so that we can evaluate the vcpu mode
+ * early on.
+ */
+ vcpu->arch.ctxt.regs.pstate = read_sysreg_el2(SYS_SPSR);
+
+ /*
+ * Check whether we want to repaint the state one way or
+ * another.
+ */
+ early_exit_filter(vcpu, exit_code);
+
if (ARM_EXCEPTION_CODE(*exit_code) != ARM_EXCEPTION_IRQ)
vcpu->arch.fault.esr_el2 = read_sysreg_el2(SYS_ESR);
@@ -443,59 +448,9 @@ static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
if (*exit_code != ARM_EXCEPTION_TRAP)
goto exit;
- if (cpus_have_final_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM) &&
- kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_SYS64 &&
- handle_tx2_tvm(vcpu))
- goto guest;
-
- /*
- * We trap the first access to the FP/SIMD to save the host context
- * and restore the guest context lazily.
- * If FP/SIMD is not implemented, handle the trap and inject an
- * undefined instruction exception to the guest.
- * Similarly for trapped SVE accesses.
- */
- if (__hyp_handle_fpsimd(vcpu))
- goto guest;
-
- if (__hyp_handle_ptrauth(vcpu))
+ /* Check if there's an exit handler and allow it to handle the exit. */
+ if (kvm_hyp_handle_exit(vcpu, exit_code))
goto guest;
-
- if (!__populate_fault_info(vcpu))
- goto guest;
-
- if (static_branch_unlikely(&vgic_v2_cpuif_trap)) {
- bool valid;
-
- valid = kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_DABT_LOW &&
- kvm_vcpu_trap_get_fault_type(vcpu) == FSC_FAULT &&
- kvm_vcpu_dabt_isvalid(vcpu) &&
- !kvm_vcpu_abt_issea(vcpu) &&
- !kvm_vcpu_abt_iss1tw(vcpu);
-
- if (valid) {
- int ret = __vgic_v2_perform_cpuif_access(vcpu);
-
- if (ret == 1)
- goto guest;
-
- /* Promote an illegal access to an SError.*/
- if (ret == -1)
- *exit_code = ARM_EXCEPTION_EL1_SERROR;
-
- goto exit;
- }
- }
-
- if (static_branch_unlikely(&vgic_v3_cpuif_trap) &&
- (kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_SYS64 ||
- kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_CP15_32)) {
- int ret = __vgic_v3_perform_cpuif_access(vcpu);
-
- if (ret == 1)
- goto guest;
- }
-
exit:
/* Return to the host kernel and handle the exit */
return false;
@@ -510,7 +465,7 @@ static inline void __kvm_unexpected_el2_exception(void)
{
extern char __guest_exit_panic[];
unsigned long addr, fixup;
- struct exception_table_entry *entry, *end;
+ struct kvm_exception_table_entry *entry, *end;
unsigned long elr_el2 = read_sysreg(elr_el2);
entry = &__start___kvm_ex_table;
diff --git a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
index de7e14c862e6..7ecca8b07851 100644
--- a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
+++ b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
@@ -70,7 +70,12 @@ static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt)
static inline void __sysreg_save_el2_return_state(struct kvm_cpu_context *ctxt)
{
ctxt->regs.pc = read_sysreg_el2(SYS_ELR);
- ctxt->regs.pstate = read_sysreg_el2(SYS_SPSR);
+ /*
+ * Guest PSTATE gets saved at guest fixup time in all
+ * cases. We still need to handle the nVHE host side here.
+ */
+ if (!has_vhe() && ctxt->__hyp_running_vcpu)
+ ctxt->regs.pstate = read_sysreg_el2(SYS_SPSR);
if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN))
ctxt_sys_reg(ctxt, DISR_EL1) = read_sysreg_s(SYS_VDISR_EL2);
diff --git a/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h b/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h
new file mode 100644
index 000000000000..eea1f6a53723
--- /dev/null
+++ b/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h
@@ -0,0 +1,200 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2021 Google LLC
+ * Author: Fuad Tabba <tabba@google.com>
+ */
+
+#ifndef __ARM64_KVM_FIXED_CONFIG_H__
+#define __ARM64_KVM_FIXED_CONFIG_H__
+
+#include <asm/sysreg.h>
+
+/*
+ * This file contains definitions for features to be allowed or restricted for
+ * guest virtual machines, depending on the mode KVM is running in and on the
+ * type of guest that is running.
+ *
+ * The ALLOW masks represent a bitmask of feature fields that are allowed
+ * without any restrictions as long as they are supported by the system.
+ *
+ * The RESTRICT_UNSIGNED masks, if present, represent unsigned fields for
+ * features that are restricted to support at most the specified feature.
+ *
+ * If a feature field is not present in either, than it is not supported.
+ *
+ * The approach taken for protected VMs is to allow features that are:
+ * - Needed by common Linux distributions (e.g., floating point)
+ * - Trivial to support, e.g., supporting the feature does not introduce or
+ * require tracking of additional state in KVM
+ * - Cannot be trapped or prevent the guest from using anyway
+ */
+
+/*
+ * Allow for protected VMs:
+ * - Floating-point and Advanced SIMD
+ * - Data Independent Timing
+ */
+#define PVM_ID_AA64PFR0_ALLOW (\
+ ARM64_FEATURE_MASK(ID_AA64PFR0_FP) | \
+ ARM64_FEATURE_MASK(ID_AA64PFR0_ASIMD) | \
+ ARM64_FEATURE_MASK(ID_AA64PFR0_DIT) \
+ )
+
+/*
+ * Restrict to the following *unsigned* features for protected VMs:
+ * - AArch64 guests only (no support for AArch32 guests):
+ * AArch32 adds complexity in trap handling, emulation, condition codes,
+ * etc...
+ * - RAS (v1)
+ * Supported by KVM
+ */
+#define PVM_ID_AA64PFR0_RESTRICT_UNSIGNED (\
+ FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL0), ID_AA64PFR0_ELx_64BIT_ONLY) | \
+ FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1), ID_AA64PFR0_ELx_64BIT_ONLY) | \
+ FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL2), ID_AA64PFR0_ELx_64BIT_ONLY) | \
+ FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL3), ID_AA64PFR0_ELx_64BIT_ONLY) | \
+ FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_RAS), ID_AA64PFR0_RAS_V1) \
+ )
+
+/*
+ * Allow for protected VMs:
+ * - Branch Target Identification
+ * - Speculative Store Bypassing
+ */
+#define PVM_ID_AA64PFR1_ALLOW (\
+ ARM64_FEATURE_MASK(ID_AA64PFR1_BT) | \
+ ARM64_FEATURE_MASK(ID_AA64PFR1_SSBS) \
+ )
+
+/*
+ * Allow for protected VMs:
+ * - Mixed-endian
+ * - Distinction between Secure and Non-secure Memory
+ * - Mixed-endian at EL0 only
+ * - Non-context synchronizing exception entry and exit
+ */
+#define PVM_ID_AA64MMFR0_ALLOW (\
+ ARM64_FEATURE_MASK(ID_AA64MMFR0_BIGENDEL) | \
+ ARM64_FEATURE_MASK(ID_AA64MMFR0_SNSMEM) | \
+ ARM64_FEATURE_MASK(ID_AA64MMFR0_BIGENDEL0) | \
+ ARM64_FEATURE_MASK(ID_AA64MMFR0_EXS) \
+ )
+
+/*
+ * Restrict to the following *unsigned* features for protected VMs:
+ * - 40-bit IPA
+ * - 16-bit ASID
+ */
+#define PVM_ID_AA64MMFR0_RESTRICT_UNSIGNED (\
+ FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64MMFR0_PARANGE), ID_AA64MMFR0_PARANGE_40) | \
+ FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64MMFR0_ASID), ID_AA64MMFR0_ASID_16) \
+ )
+
+/*
+ * Allow for protected VMs:
+ * - Hardware translation table updates to Access flag and Dirty state
+ * - Number of VMID bits from CPU
+ * - Hierarchical Permission Disables
+ * - Privileged Access Never
+ * - SError interrupt exceptions from speculative reads
+ * - Enhanced Translation Synchronization
+ */
+#define PVM_ID_AA64MMFR1_ALLOW (\
+ ARM64_FEATURE_MASK(ID_AA64MMFR1_HADBS) | \
+ ARM64_FEATURE_MASK(ID_AA64MMFR1_VMIDBITS) | \
+ ARM64_FEATURE_MASK(ID_AA64MMFR1_HPD) | \
+ ARM64_FEATURE_MASK(ID_AA64MMFR1_PAN) | \
+ ARM64_FEATURE_MASK(ID_AA64MMFR1_SPECSEI) | \
+ ARM64_FEATURE_MASK(ID_AA64MMFR1_ETS) \
+ )
+
+/*
+ * Allow for protected VMs:
+ * - Common not Private translations
+ * - User Access Override
+ * - IESB bit in the SCTLR_ELx registers
+ * - Unaligned single-copy atomicity and atomic functions
+ * - ESR_ELx.EC value on an exception by read access to feature ID space
+ * - TTL field in address operations.
+ * - Break-before-make sequences when changing translation block size
+ * - E0PDx mechanism
+ */
+#define PVM_ID_AA64MMFR2_ALLOW (\
+ ARM64_FEATURE_MASK(ID_AA64MMFR2_CNP) | \
+ ARM64_FEATURE_MASK(ID_AA64MMFR2_UAO) | \
+ ARM64_FEATURE_MASK(ID_AA64MMFR2_IESB) | \
+ ARM64_FEATURE_MASK(ID_AA64MMFR2_AT) | \
+ ARM64_FEATURE_MASK(ID_AA64MMFR2_IDS) | \
+ ARM64_FEATURE_MASK(ID_AA64MMFR2_TTL) | \
+ ARM64_FEATURE_MASK(ID_AA64MMFR2_BBM) | \
+ ARM64_FEATURE_MASK(ID_AA64MMFR2_E0PD) \
+ )
+
+/*
+ * No support for Scalable Vectors for protected VMs:
+ * Requires additional support from KVM, e.g., context-switching and
+ * trapping at EL2
+ */
+#define PVM_ID_AA64ZFR0_ALLOW (0ULL)
+
+/*
+ * No support for debug, including breakpoints, and watchpoints for protected
+ * VMs:
+ * The Arm architecture mandates support for at least the Armv8 debug
+ * architecture, which would include at least 2 hardware breakpoints and
+ * watchpoints. Providing that support to protected guests adds
+ * considerable state and complexity. Therefore, the reserved value of 0 is
+ * used for debug-related fields.
+ */
+#define PVM_ID_AA64DFR0_ALLOW (0ULL)
+#define PVM_ID_AA64DFR1_ALLOW (0ULL)
+
+/*
+ * No support for implementation defined features.
+ */
+#define PVM_ID_AA64AFR0_ALLOW (0ULL)
+#define PVM_ID_AA64AFR1_ALLOW (0ULL)
+
+/*
+ * No restrictions on instructions implemented in AArch64.
+ */
+#define PVM_ID_AA64ISAR0_ALLOW (\
+ ARM64_FEATURE_MASK(ID_AA64ISAR0_AES) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR0_SHA1) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR0_SHA2) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR0_CRC32) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR0_ATOMICS) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR0_RDM) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR0_SHA3) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR0_SM3) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR0_SM4) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR0_DP) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR0_FHM) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR0_TS) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR0_TLB) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR0_RNDR) \
+ )
+
+#define PVM_ID_AA64ISAR1_ALLOW (\
+ ARM64_FEATURE_MASK(ID_AA64ISAR1_DPB) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR1_APA) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR1_API) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR1_JSCVT) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR1_FCMA) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR1_LRCPC) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR1_GPA) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR1_GPI) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR1_FRINTTS) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR1_SB) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR1_SPECRES) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR1_BF16) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR1_DGH) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR1_I8MM) \
+ )
+
+u64 pvm_read_id_reg(const struct kvm_vcpu *vcpu, u32 id);
+bool kvm_handle_pvm_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code);
+bool kvm_handle_pvm_restricted(struct kvm_vcpu *vcpu, u64 *exit_code);
+int kvm_check_pvm_sysreg_table(void);
+
+#endif /* __ARM64_KVM_FIXED_CONFIG_H__ */
diff --git a/arch/arm64/kvm/hyp/include/nvhe/gfp.h b/arch/arm64/kvm/hyp/include/nvhe/gfp.h
index fb0f523d1492..0a048dc06a7d 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/gfp.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/gfp.h
@@ -24,6 +24,7 @@ struct hyp_pool {
/* Allocation */
void *hyp_alloc_pages(struct hyp_pool *pool, unsigned short order);
+void hyp_split_page(struct hyp_page *page);
void hyp_get_page(struct hyp_pool *pool, void *addr);
void hyp_put_page(struct hyp_pool *pool, void *addr);
diff --git a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
index b58c910babaf..80e99836eac7 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
@@ -24,6 +24,11 @@ enum pkvm_page_state {
PKVM_PAGE_OWNED = 0ULL,
PKVM_PAGE_SHARED_OWNED = KVM_PGTABLE_PROT_SW0,
PKVM_PAGE_SHARED_BORROWED = KVM_PGTABLE_PROT_SW1,
+ __PKVM_PAGE_RESERVED = KVM_PGTABLE_PROT_SW0 |
+ KVM_PGTABLE_PROT_SW1,
+
+ /* Meta-states which aren't encoded directly in the PTE's SW bits */
+ PKVM_NOPAGE,
};
#define PKVM_PAGE_STATE_PROT_MASK (KVM_PGTABLE_PROT_SW0 | KVM_PGTABLE_PROT_SW1)
@@ -50,6 +55,7 @@ extern const u8 pkvm_hyp_id;
int __pkvm_prot_finalize(void);
int __pkvm_host_share_hyp(u64 pfn);
+int __pkvm_host_unshare_hyp(u64 pfn);
bool addr_is_memory(phys_addr_t phys);
int host_stage2_idmap_locked(phys_addr_t addr, u64 size, enum kvm_pgtable_prot prot);
diff --git a/arch/arm64/kvm/hyp/include/nvhe/mm.h b/arch/arm64/kvm/hyp/include/nvhe/mm.h
index c9a8f535212e..2d08510c6cc1 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/mm.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/mm.h
@@ -10,13 +10,8 @@
#include <nvhe/memory.h>
#include <nvhe/spinlock.h>
-#define HYP_MEMBLOCK_REGIONS 128
-extern struct memblock_region kvm_nvhe_sym(hyp_memory)[];
-extern unsigned int kvm_nvhe_sym(hyp_memblock_nr);
extern struct kvm_pgtable pkvm_pgtable;
extern hyp_spinlock_t pkvm_pgd_lock;
-extern struct hyp_pool hpool;
-extern u64 __io_map_base;
int hyp_create_idmap(u32 hyp_va_bits);
int hyp_map_vectors(void);
@@ -39,58 +34,4 @@ static inline void hyp_vmemmap_range(phys_addr_t phys, unsigned long size,
*end = ALIGN(*end, PAGE_SIZE);
}
-static inline unsigned long __hyp_pgtable_max_pages(unsigned long nr_pages)
-{
- unsigned long total = 0, i;
-
- /* Provision the worst case scenario */
- for (i = 0; i < KVM_PGTABLE_MAX_LEVELS; i++) {
- nr_pages = DIV_ROUND_UP(nr_pages, PTRS_PER_PTE);
- total += nr_pages;
- }
-
- return total;
-}
-
-static inline unsigned long __hyp_pgtable_total_pages(void)
-{
- unsigned long res = 0, i;
-
- /* Cover all of memory with page-granularity */
- for (i = 0; i < kvm_nvhe_sym(hyp_memblock_nr); i++) {
- struct memblock_region *reg = &kvm_nvhe_sym(hyp_memory)[i];
- res += __hyp_pgtable_max_pages(reg->size >> PAGE_SHIFT);
- }
-
- return res;
-}
-
-static inline unsigned long hyp_s1_pgtable_pages(void)
-{
- unsigned long res;
-
- res = __hyp_pgtable_total_pages();
-
- /* Allow 1 GiB for private mappings */
- res += __hyp_pgtable_max_pages(SZ_1G >> PAGE_SHIFT);
-
- return res;
-}
-
-static inline unsigned long host_s2_pgtable_pages(void)
-{
- unsigned long res;
-
- /*
- * Include an extra 16 pages to safely upper-bound the worst case of
- * concatenated pgds.
- */
- res = __hyp_pgtable_total_pages() + 16;
-
- /* Allow 1 GiB for MMIO mappings */
- res += __hyp_pgtable_max_pages(SZ_1G >> PAGE_SHIFT);
-
- return res;
-}
-
#endif /* __KVM_HYP_MM_H */
diff --git a/arch/arm64/kvm/hyp/include/nvhe/trap_handler.h b/arch/arm64/kvm/hyp/include/nvhe/trap_handler.h
index 1e6d995968a1..45a84f0ade04 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/trap_handler.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/trap_handler.h
@@ -15,4 +15,6 @@
#define DECLARE_REG(type, name, ctxt, reg) \
type name = (type)cpu_reg(ctxt, (reg))
+void __pkvm_vcpu_init_traps(struct kvm_vcpu *vcpu);
+
#endif /* __ARM64_KVM_NVHE_TRAP_HANDLER_H__ */
diff --git a/arch/arm64/kvm/hyp/nvhe/Makefile b/arch/arm64/kvm/hyp/nvhe/Makefile
index 8d741f71377f..24b2c2425b38 100644
--- a/arch/arm64/kvm/hyp/nvhe/Makefile
+++ b/arch/arm64/kvm/hyp/nvhe/Makefile
@@ -14,7 +14,7 @@ lib-objs := $(addprefix ../../../lib/, $(lib-objs))
obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o host.o \
hyp-main.o hyp-smp.o psci-relay.o early_alloc.o stub.o page_alloc.o \
- cache.o setup.o mm.o mem_protect.o
+ cache.o setup.o mm.o mem_protect.o sys_regs.o pkvm.o
obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \
../fpsimd.o ../hyp-entry.o ../exception.o ../pgtable.o
obj-y += $(lib-objs)
@@ -89,6 +89,7 @@ KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_FTRACE) $(CC_FLAGS_SCS) $(CC_FLAGS_CFI)
# cause crashes. Just disable it.
GCOV_PROFILE := n
KASAN_SANITIZE := n
+KCSAN_SANITIZE := n
UBSAN_SANITIZE := n
KCOV_INSTRUMENT := n
diff --git a/arch/arm64/kvm/hyp/nvhe/early_alloc.c b/arch/arm64/kvm/hyp/nvhe/early_alloc.c
index 1306c430ab87..00de04153cc6 100644
--- a/arch/arm64/kvm/hyp/nvhe/early_alloc.c
+++ b/arch/arm64/kvm/hyp/nvhe/early_alloc.c
@@ -43,6 +43,9 @@ void *hyp_early_alloc_page(void *arg)
return hyp_early_alloc_contig(1);
}
+static void hyp_early_alloc_get_page(void *addr) { }
+static void hyp_early_alloc_put_page(void *addr) { }
+
void hyp_early_alloc_init(void *virt, unsigned long size)
{
base = cur = (unsigned long)virt;
@@ -51,4 +54,6 @@ void hyp_early_alloc_init(void *virt, unsigned long size)
hyp_early_alloc_mm_ops.zalloc_page = hyp_early_alloc_page;
hyp_early_alloc_mm_ops.phys_to_virt = hyp_phys_to_virt;
hyp_early_alloc_mm_ops.virt_to_phys = hyp_virt_to_phys;
+ hyp_early_alloc_mm_ops.get_page = hyp_early_alloc_get_page;
+ hyp_early_alloc_mm_ops.put_page = hyp_early_alloc_put_page;
}
diff --git a/arch/arm64/kvm/hyp/nvhe/host.S b/arch/arm64/kvm/hyp/nvhe/host.S
index 4b652ffb591d..3d613e721a75 100644
--- a/arch/arm64/kvm/hyp/nvhe/host.S
+++ b/arch/arm64/kvm/hyp/nvhe/host.S
@@ -110,17 +110,14 @@ SYM_FUNC_START(__hyp_do_panic)
b __host_enter_for_panic
SYM_FUNC_END(__hyp_do_panic)
-.macro host_el1_sync_vect
- .align 7
-.L__vect_start\@:
- stp x0, x1, [sp, #-16]!
- mrs x0, esr_el2
- lsr x0, x0, #ESR_ELx_EC_SHIFT
- cmp x0, #ESR_ELx_EC_HVC64
- b.ne __host_exit
-
+SYM_FUNC_START(__host_hvc)
ldp x0, x1, [sp] // Don't fixup the stack yet
+ /* No stub for you, sonny Jim */
+alternative_if ARM64_KVM_PROTECTED_MODE
+ b __host_exit
+alternative_else_nop_endif
+
/* Check for a stub HVC call */
cmp x0, #HVC_STUB_HCALL_NR
b.hs __host_exit
@@ -137,6 +134,17 @@ SYM_FUNC_END(__hyp_do_panic)
ldr x5, =__kvm_handle_stub_hvc
hyp_pa x5, x6
br x5
+SYM_FUNC_END(__host_hvc)
+
+.macro host_el1_sync_vect
+ .align 7
+.L__vect_start\@:
+ stp x0, x1, [sp, #-16]!
+ mrs x0, esr_el2
+ ubfx x0, x0, #ESR_ELx_EC_SHIFT, #ESR_ELx_EC_WIDTH
+ cmp x0, #ESR_ELx_EC_HVC64
+ b.eq __host_hvc
+ b __host_exit
.L__vect_end\@:
.if ((.L__vect_end\@ - .L__vect_start\@) > 0x80)
.error "host_el1_sync_vect larger than vector entry"
diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
index 2da6aa8da868..5e2197db0d32 100644
--- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c
+++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
@@ -4,7 +4,7 @@
* Author: Andrew Scull <ascull@google.com>
*/
-#include <hyp/switch.h>
+#include <hyp/adjust_pc.h>
#include <asm/pgtable-types.h>
#include <asm/kvm_asm.h>
@@ -147,6 +147,13 @@ static void handle___pkvm_host_share_hyp(struct kvm_cpu_context *host_ctxt)
cpu_reg(host_ctxt, 1) = __pkvm_host_share_hyp(pfn);
}
+static void handle___pkvm_host_unshare_hyp(struct kvm_cpu_context *host_ctxt)
+{
+ DECLARE_REG(u64, pfn, host_ctxt, 1);
+
+ cpu_reg(host_ctxt, 1) = __pkvm_host_unshare_hyp(pfn);
+}
+
static void handle___pkvm_create_private_mapping(struct kvm_cpu_context *host_ctxt)
{
DECLARE_REG(phys_addr_t, phys, host_ctxt, 1);
@@ -160,41 +167,66 @@ static void handle___pkvm_prot_finalize(struct kvm_cpu_context *host_ctxt)
{
cpu_reg(host_ctxt, 1) = __pkvm_prot_finalize();
}
+
+static void handle___pkvm_vcpu_init_traps(struct kvm_cpu_context *host_ctxt)
+{
+ DECLARE_REG(struct kvm_vcpu *, vcpu, host_ctxt, 1);
+
+ __pkvm_vcpu_init_traps(kern_hyp_va(vcpu));
+}
+
typedef void (*hcall_t)(struct kvm_cpu_context *);
#define HANDLE_FUNC(x) [__KVM_HOST_SMCCC_FUNC_##x] = (hcall_t)handle_##x
static const hcall_t host_hcall[] = {
- HANDLE_FUNC(__kvm_vcpu_run),
+ /* ___kvm_hyp_init */
+ HANDLE_FUNC(__kvm_get_mdcr_el2),
+ HANDLE_FUNC(__pkvm_init),
+ HANDLE_FUNC(__pkvm_create_private_mapping),
+ HANDLE_FUNC(__pkvm_cpu_set_vector),
+ HANDLE_FUNC(__kvm_enable_ssbs),
+ HANDLE_FUNC(__vgic_v3_init_lrs),
+ HANDLE_FUNC(__vgic_v3_get_gic_config),
+ HANDLE_FUNC(__pkvm_prot_finalize),
+
+ HANDLE_FUNC(__pkvm_host_share_hyp),
+ HANDLE_FUNC(__pkvm_host_unshare_hyp),
HANDLE_FUNC(__kvm_adjust_pc),
+ HANDLE_FUNC(__kvm_vcpu_run),
HANDLE_FUNC(__kvm_flush_vm_context),
HANDLE_FUNC(__kvm_tlb_flush_vmid_ipa),
HANDLE_FUNC(__kvm_tlb_flush_vmid),
HANDLE_FUNC(__kvm_flush_cpu_context),
HANDLE_FUNC(__kvm_timer_set_cntvoff),
- HANDLE_FUNC(__kvm_enable_ssbs),
- HANDLE_FUNC(__vgic_v3_get_gic_config),
HANDLE_FUNC(__vgic_v3_read_vmcr),
HANDLE_FUNC(__vgic_v3_write_vmcr),
- HANDLE_FUNC(__vgic_v3_init_lrs),
- HANDLE_FUNC(__kvm_get_mdcr_el2),
HANDLE_FUNC(__vgic_v3_save_aprs),
HANDLE_FUNC(__vgic_v3_restore_aprs),
- HANDLE_FUNC(__pkvm_init),
- HANDLE_FUNC(__pkvm_cpu_set_vector),
- HANDLE_FUNC(__pkvm_host_share_hyp),
- HANDLE_FUNC(__pkvm_create_private_mapping),
- HANDLE_FUNC(__pkvm_prot_finalize),
+ HANDLE_FUNC(__pkvm_vcpu_init_traps),
};
static void handle_host_hcall(struct kvm_cpu_context *host_ctxt)
{
DECLARE_REG(unsigned long, id, host_ctxt, 0);
+ unsigned long hcall_min = 0;
hcall_t hfn;
+ /*
+ * If pKVM has been initialised then reject any calls to the
+ * early "privileged" hypercalls. Note that we cannot reject
+ * calls to __pkvm_prot_finalize for two reasons: (1) The static
+ * key used to determine initialisation must be toggled prior to
+ * finalisation and (2) finalisation is performed on a per-CPU
+ * basis. This is all fine, however, since __pkvm_prot_finalize
+ * returns -EPERM after the first call for a given CPU.
+ */
+ if (static_branch_unlikely(&kvm_protected_mode_initialized))
+ hcall_min = __KVM_HOST_SMCCC_FUNC___pkvm_prot_finalize;
+
id -= KVM_HOST_SMCCC_ID(0);
- if (unlikely(id >= ARRAY_SIZE(host_hcall)))
+ if (unlikely(id < hcall_min || id >= ARRAY_SIZE(host_hcall)))
goto inval;
hfn = host_hcall[id];
diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
index bacd493a4eac..674f10564373 100644
--- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
+++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
@@ -9,9 +9,10 @@
#include <asm/kvm_hyp.h>
#include <asm/kvm_mmu.h>
#include <asm/kvm_pgtable.h>
+#include <asm/kvm_pkvm.h>
#include <asm/stage2_pgtable.h>
-#include <hyp/switch.h>
+#include <hyp/fault.h>
#include <nvhe/gfp.h>
#include <nvhe/memory.h>
@@ -25,17 +26,42 @@ struct host_kvm host_kvm;
static struct hyp_pool host_s2_pool;
-/*
- * Copies of the host's CPU features registers holding sanitized values.
- */
-u64 id_aa64mmfr0_el1_sys_val;
-u64 id_aa64mmfr1_el1_sys_val;
-
const u8 pkvm_hyp_id = 1;
+static void host_lock_component(void)
+{
+ hyp_spin_lock(&host_kvm.lock);
+}
+
+static void host_unlock_component(void)
+{
+ hyp_spin_unlock(&host_kvm.lock);
+}
+
+static void hyp_lock_component(void)
+{
+ hyp_spin_lock(&pkvm_pgd_lock);
+}
+
+static void hyp_unlock_component(void)
+{
+ hyp_spin_unlock(&pkvm_pgd_lock);
+}
+
static void *host_s2_zalloc_pages_exact(size_t size)
{
- return hyp_alloc_pages(&host_s2_pool, get_order(size));
+ void *addr = hyp_alloc_pages(&host_s2_pool, get_order(size));
+
+ hyp_split_page(hyp_virt_to_page(addr));
+
+ /*
+ * The size of concatenated PGDs is always a power of two of PAGE_SIZE,
+ * so there should be no need to free any of the tail pages to make the
+ * allocation exact.
+ */
+ WARN_ON(size != (PAGE_SIZE << get_order(size)));
+
+ return addr;
}
static void *host_s2_zalloc_page(void *pool)
@@ -98,19 +124,19 @@ int kvm_host_prepare_stage2(void *pgt_pool_base)
prepare_host_vtcr();
hyp_spin_lock_init(&host_kvm.lock);
+ mmu->arch = &host_kvm.arch;
ret = prepare_s2_pool(pgt_pool_base);
if (ret)
return ret;
- ret = __kvm_pgtable_stage2_init(&host_kvm.pgt, &host_kvm.arch,
+ ret = __kvm_pgtable_stage2_init(&host_kvm.pgt, mmu,
&host_kvm.mm_ops, KVM_HOST_S2_FLAGS,
host_stage2_force_pte_cb);
if (ret)
return ret;
mmu->pgd_phys = __hyp_pa(host_kvm.pgt.pgd);
- mmu->arch = &host_kvm.arch;
mmu->pgt = &host_kvm.pgt;
WRITE_ONCE(mmu->vmid.vmid_gen, 0);
WRITE_ONCE(mmu->vmid.vmid, 0);
@@ -123,6 +149,9 @@ int __pkvm_prot_finalize(void)
struct kvm_s2_mmu *mmu = &host_kvm.arch.mmu;
struct kvm_nvhe_init_params *params = this_cpu_ptr(&kvm_init_params);
+ if (params->hcr_el2 & HCR_VM)
+ return -EPERM;
+
params->vttbr = kvm_get_vttbr(mmu);
params->vtcr = host_kvm.arch.vtcr;
params->hcr_el2 |= HCR_VM;
@@ -330,116 +359,446 @@ static int host_stage2_idmap(u64 addr)
prot = is_memory ? PKVM_HOST_MEM_PROT : PKVM_HOST_MMIO_PROT;
- hyp_spin_lock(&host_kvm.lock);
+ host_lock_component();
ret = host_stage2_adjust_range(addr, &range);
if (ret)
goto unlock;
ret = host_stage2_idmap_locked(range.start, range.end - range.start, prot);
unlock:
- hyp_spin_unlock(&host_kvm.lock);
+ host_unlock_component();
return ret;
}
-static inline bool check_prot(enum kvm_pgtable_prot prot,
- enum kvm_pgtable_prot required,
- enum kvm_pgtable_prot denied)
+void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt)
{
- return (prot & (required | denied)) == required;
+ struct kvm_vcpu_fault_info fault;
+ u64 esr, addr;
+ int ret = 0;
+
+ esr = read_sysreg_el2(SYS_ESR);
+ BUG_ON(!__get_fault_info(esr, &fault));
+
+ addr = (fault.hpfar_el2 & HPFAR_MASK) << 8;
+ ret = host_stage2_idmap(addr);
+ BUG_ON(ret && ret != -EAGAIN);
}
-int __pkvm_host_share_hyp(u64 pfn)
+/* This corresponds to locking order */
+enum pkvm_component_id {
+ PKVM_ID_HOST,
+ PKVM_ID_HYP,
+};
+
+struct pkvm_mem_transition {
+ u64 nr_pages;
+
+ struct {
+ enum pkvm_component_id id;
+ /* Address in the initiator's address space */
+ u64 addr;
+
+ union {
+ struct {
+ /* Address in the completer's address space */
+ u64 completer_addr;
+ } host;
+ };
+ } initiator;
+
+ struct {
+ enum pkvm_component_id id;
+ } completer;
+};
+
+struct pkvm_mem_share {
+ const struct pkvm_mem_transition tx;
+ const enum kvm_pgtable_prot completer_prot;
+};
+
+struct check_walk_data {
+ enum pkvm_page_state desired;
+ enum pkvm_page_state (*get_page_state)(kvm_pte_t pte);
+};
+
+static int __check_page_state_visitor(u64 addr, u64 end, u32 level,
+ kvm_pte_t *ptep,
+ enum kvm_pgtable_walk_flags flag,
+ void * const arg)
{
- phys_addr_t addr = hyp_pfn_to_phys(pfn);
- enum kvm_pgtable_prot prot, cur;
- void *virt = __hyp_va(addr);
- enum pkvm_page_state state;
- kvm_pte_t pte;
- int ret;
+ struct check_walk_data *d = arg;
+ kvm_pte_t pte = *ptep;
- if (!addr_is_memory(addr))
+ if (kvm_pte_valid(pte) && !addr_is_memory(kvm_pte_to_phys(pte)))
return -EINVAL;
- hyp_spin_lock(&host_kvm.lock);
- hyp_spin_lock(&pkvm_pgd_lock);
+ return d->get_page_state(pte) == d->desired ? 0 : -EPERM;
+}
+
+static int check_page_state_range(struct kvm_pgtable *pgt, u64 addr, u64 size,
+ struct check_walk_data *data)
+{
+ struct kvm_pgtable_walker walker = {
+ .cb = __check_page_state_visitor,
+ .arg = data,
+ .flags = KVM_PGTABLE_WALK_LEAF,
+ };
+
+ return kvm_pgtable_walk(pgt, addr, size, &walker);
+}
+
+static enum pkvm_page_state host_get_page_state(kvm_pte_t pte)
+{
+ if (!kvm_pte_valid(pte) && pte)
+ return PKVM_NOPAGE;
+
+ return pkvm_getstate(kvm_pgtable_stage2_pte_prot(pte));
+}
+
+static int __host_check_page_state_range(u64 addr, u64 size,
+ enum pkvm_page_state state)
+{
+ struct check_walk_data d = {
+ .desired = state,
+ .get_page_state = host_get_page_state,
+ };
+
+ hyp_assert_lock_held(&host_kvm.lock);
+ return check_page_state_range(&host_kvm.pgt, addr, size, &d);
+}
+
+static int __host_set_page_state_range(u64 addr, u64 size,
+ enum pkvm_page_state state)
+{
+ enum kvm_pgtable_prot prot = pkvm_mkstate(PKVM_HOST_MEM_PROT, state);
+
+ return host_stage2_idmap_locked(addr, size, prot);
+}
+
+static int host_request_owned_transition(u64 *completer_addr,
+ const struct pkvm_mem_transition *tx)
+{
+ u64 size = tx->nr_pages * PAGE_SIZE;
+ u64 addr = tx->initiator.addr;
+
+ *completer_addr = tx->initiator.host.completer_addr;
+ return __host_check_page_state_range(addr, size, PKVM_PAGE_OWNED);
+}
+
+static int host_request_unshare(u64 *completer_addr,
+ const struct pkvm_mem_transition *tx)
+{
+ u64 size = tx->nr_pages * PAGE_SIZE;
+ u64 addr = tx->initiator.addr;
+
+ *completer_addr = tx->initiator.host.completer_addr;
+ return __host_check_page_state_range(addr, size, PKVM_PAGE_SHARED_OWNED);
+}
+
+static int host_initiate_share(u64 *completer_addr,
+ const struct pkvm_mem_transition *tx)
+{
+ u64 size = tx->nr_pages * PAGE_SIZE;
+ u64 addr = tx->initiator.addr;
+
+ *completer_addr = tx->initiator.host.completer_addr;
+ return __host_set_page_state_range(addr, size, PKVM_PAGE_SHARED_OWNED);
+}
+
+static int host_initiate_unshare(u64 *completer_addr,
+ const struct pkvm_mem_transition *tx)
+{
+ u64 size = tx->nr_pages * PAGE_SIZE;
+ u64 addr = tx->initiator.addr;
+
+ *completer_addr = tx->initiator.host.completer_addr;
+ return __host_set_page_state_range(addr, size, PKVM_PAGE_OWNED);
+}
+
+static enum pkvm_page_state hyp_get_page_state(kvm_pte_t pte)
+{
+ if (!kvm_pte_valid(pte))
+ return PKVM_NOPAGE;
+
+ return pkvm_getstate(kvm_pgtable_stage2_pte_prot(pte));
+}
+
+static int __hyp_check_page_state_range(u64 addr, u64 size,
+ enum pkvm_page_state state)
+{
+ struct check_walk_data d = {
+ .desired = state,
+ .get_page_state = hyp_get_page_state,
+ };
+
+ hyp_assert_lock_held(&pkvm_pgd_lock);
+ return check_page_state_range(&pkvm_pgtable, addr, size, &d);
+}
+
+static bool __hyp_ack_skip_pgtable_check(const struct pkvm_mem_transition *tx)
+{
+ return !(IS_ENABLED(CONFIG_NVHE_EL2_DEBUG) ||
+ tx->initiator.id != PKVM_ID_HOST);
+}
+
+static int hyp_ack_share(u64 addr, const struct pkvm_mem_transition *tx,
+ enum kvm_pgtable_prot perms)
+{
+ u64 size = tx->nr_pages * PAGE_SIZE;
+
+ if (perms != PAGE_HYP)
+ return -EPERM;
+
+ if (__hyp_ack_skip_pgtable_check(tx))
+ return 0;
+
+ return __hyp_check_page_state_range(addr, size, PKVM_NOPAGE);
+}
+
+static int hyp_ack_unshare(u64 addr, const struct pkvm_mem_transition *tx)
+{
+ u64 size = tx->nr_pages * PAGE_SIZE;
+
+ if (__hyp_ack_skip_pgtable_check(tx))
+ return 0;
+
+ return __hyp_check_page_state_range(addr, size,
+ PKVM_PAGE_SHARED_BORROWED);
+}
+
+static int hyp_complete_share(u64 addr, const struct pkvm_mem_transition *tx,
+ enum kvm_pgtable_prot perms)
+{
+ void *start = (void *)addr, *end = start + (tx->nr_pages * PAGE_SIZE);
+ enum kvm_pgtable_prot prot;
+
+ prot = pkvm_mkstate(perms, PKVM_PAGE_SHARED_BORROWED);
+ return pkvm_create_mappings_locked(start, end, prot);
+}
+
+static int hyp_complete_unshare(u64 addr, const struct pkvm_mem_transition *tx)
+{
+ u64 size = tx->nr_pages * PAGE_SIZE;
+ int ret = kvm_pgtable_hyp_unmap(&pkvm_pgtable, addr, size);
+
+ return (ret != size) ? -EFAULT : 0;
+}
+
+static int check_share(struct pkvm_mem_share *share)
+{
+ const struct pkvm_mem_transition *tx = &share->tx;
+ u64 completer_addr;
+ int ret;
+
+ switch (tx->initiator.id) {
+ case PKVM_ID_HOST:
+ ret = host_request_owned_transition(&completer_addr, tx);
+ break;
+ default:
+ ret = -EINVAL;
+ }
- ret = kvm_pgtable_get_leaf(&host_kvm.pgt, addr, &pte, NULL);
if (ret)
- goto unlock;
- if (!pte)
- goto map_shared;
+ return ret;
- /*
- * Check attributes in the host stage-2 PTE. We need the page to be:
- * - mapped RWX as we're sharing memory;
- * - not borrowed, as that implies absence of ownership.
- * Otherwise, we can't let it got through
- */
- cur = kvm_pgtable_stage2_pte_prot(pte);
- prot = pkvm_mkstate(0, PKVM_PAGE_SHARED_BORROWED);
- if (!check_prot(cur, PKVM_HOST_MEM_PROT, prot)) {
- ret = -EPERM;
- goto unlock;
+ switch (tx->completer.id) {
+ case PKVM_ID_HYP:
+ ret = hyp_ack_share(completer_addr, tx, share->completer_prot);
+ break;
+ default:
+ ret = -EINVAL;
}
- state = pkvm_getstate(cur);
- if (state == PKVM_PAGE_OWNED)
- goto map_shared;
+ return ret;
+}
- /*
- * Tolerate double-sharing the same page, but this requires
- * cross-checking the hypervisor stage-1.
- */
- if (state != PKVM_PAGE_SHARED_OWNED) {
- ret = -EPERM;
- goto unlock;
+static int __do_share(struct pkvm_mem_share *share)
+{
+ const struct pkvm_mem_transition *tx = &share->tx;
+ u64 completer_addr;
+ int ret;
+
+ switch (tx->initiator.id) {
+ case PKVM_ID_HOST:
+ ret = host_initiate_share(&completer_addr, tx);
+ break;
+ default:
+ ret = -EINVAL;
}
- ret = kvm_pgtable_get_leaf(&pkvm_pgtable, (u64)virt, &pte, NULL);
if (ret)
- goto unlock;
+ return ret;
- /*
- * If the page has been shared with the hypervisor, it must be
- * already mapped as SHARED_BORROWED in its stage-1.
- */
- cur = kvm_pgtable_hyp_pte_prot(pte);
- prot = pkvm_mkstate(PAGE_HYP, PKVM_PAGE_SHARED_BORROWED);
- if (!check_prot(cur, prot, ~prot))
- ret = -EPERM;
- goto unlock;
+ switch (tx->completer.id) {
+ case PKVM_ID_HYP:
+ ret = hyp_complete_share(completer_addr, tx, share->completer_prot);
+ break;
+ default:
+ ret = -EINVAL;
+ }
-map_shared:
- /*
- * If the page is not yet shared, adjust mappings in both page-tables
- * while both locks are held.
- */
- prot = pkvm_mkstate(PAGE_HYP, PKVM_PAGE_SHARED_BORROWED);
- ret = pkvm_create_mappings_locked(virt, virt + PAGE_SIZE, prot);
- BUG_ON(ret);
+ return ret;
+}
- prot = pkvm_mkstate(PKVM_HOST_MEM_PROT, PKVM_PAGE_SHARED_OWNED);
- ret = host_stage2_idmap_locked(addr, PAGE_SIZE, prot);
- BUG_ON(ret);
+/*
+ * do_share():
+ *
+ * The page owner grants access to another component with a given set
+ * of permissions.
+ *
+ * Initiator: OWNED => SHARED_OWNED
+ * Completer: NOPAGE => SHARED_BORROWED
+ */
+static int do_share(struct pkvm_mem_share *share)
+{
+ int ret;
-unlock:
- hyp_spin_unlock(&pkvm_pgd_lock);
- hyp_spin_unlock(&host_kvm.lock);
+ ret = check_share(share);
+ if (ret)
+ return ret;
+
+ return WARN_ON(__do_share(share));
+}
+
+static int check_unshare(struct pkvm_mem_share *share)
+{
+ const struct pkvm_mem_transition *tx = &share->tx;
+ u64 completer_addr;
+ int ret;
+
+ switch (tx->initiator.id) {
+ case PKVM_ID_HOST:
+ ret = host_request_unshare(&completer_addr, tx);
+ break;
+ default:
+ ret = -EINVAL;
+ }
+
+ if (ret)
+ return ret;
+
+ switch (tx->completer.id) {
+ case PKVM_ID_HYP:
+ ret = hyp_ack_unshare(completer_addr, tx);
+ break;
+ default:
+ ret = -EINVAL;
+ }
return ret;
}
-void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt)
+static int __do_unshare(struct pkvm_mem_share *share)
{
- struct kvm_vcpu_fault_info fault;
- u64 esr, addr;
- int ret = 0;
+ const struct pkvm_mem_transition *tx = &share->tx;
+ u64 completer_addr;
+ int ret;
- esr = read_sysreg_el2(SYS_ESR);
- BUG_ON(!__get_fault_info(esr, &fault));
+ switch (tx->initiator.id) {
+ case PKVM_ID_HOST:
+ ret = host_initiate_unshare(&completer_addr, tx);
+ break;
+ default:
+ ret = -EINVAL;
+ }
- addr = (fault.hpfar_el2 & HPFAR_MASK) << 8;
- ret = host_stage2_idmap(addr);
- BUG_ON(ret && ret != -EAGAIN);
+ if (ret)
+ return ret;
+
+ switch (tx->completer.id) {
+ case PKVM_ID_HYP:
+ ret = hyp_complete_unshare(completer_addr, tx);
+ break;
+ default:
+ ret = -EINVAL;
+ }
+
+ return ret;
+}
+
+/*
+ * do_unshare():
+ *
+ * The page owner revokes access from another component for a range of
+ * pages which were previously shared using do_share().
+ *
+ * Initiator: SHARED_OWNED => OWNED
+ * Completer: SHARED_BORROWED => NOPAGE
+ */
+static int do_unshare(struct pkvm_mem_share *share)
+{
+ int ret;
+
+ ret = check_unshare(share);
+ if (ret)
+ return ret;
+
+ return WARN_ON(__do_unshare(share));
+}
+
+int __pkvm_host_share_hyp(u64 pfn)
+{
+ int ret;
+ u64 host_addr = hyp_pfn_to_phys(pfn);
+ u64 hyp_addr = (u64)__hyp_va(host_addr);
+ struct pkvm_mem_share share = {
+ .tx = {
+ .nr_pages = 1,
+ .initiator = {
+ .id = PKVM_ID_HOST,
+ .addr = host_addr,
+ .host = {
+ .completer_addr = hyp_addr,
+ },
+ },
+ .completer = {
+ .id = PKVM_ID_HYP,
+ },
+ },
+ .completer_prot = PAGE_HYP,
+ };
+
+ host_lock_component();
+ hyp_lock_component();
+
+ ret = do_share(&share);
+
+ hyp_unlock_component();
+ host_unlock_component();
+
+ return ret;
+}
+
+int __pkvm_host_unshare_hyp(u64 pfn)
+{
+ int ret;
+ u64 host_addr = hyp_pfn_to_phys(pfn);
+ u64 hyp_addr = (u64)__hyp_va(host_addr);
+ struct pkvm_mem_share share = {
+ .tx = {
+ .nr_pages = 1,
+ .initiator = {
+ .id = PKVM_ID_HOST,
+ .addr = host_addr,
+ .host = {
+ .completer_addr = hyp_addr,
+ },
+ },
+ .completer = {
+ .id = PKVM_ID_HYP,
+ },
+ },
+ .completer_prot = PAGE_HYP,
+ };
+
+ host_lock_component();
+ hyp_lock_component();
+
+ ret = do_unshare(&share);
+
+ hyp_unlock_component();
+ host_unlock_component();
+
+ return ret;
}
diff --git a/arch/arm64/kvm/hyp/nvhe/mm.c b/arch/arm64/kvm/hyp/nvhe/mm.c
index 2fabeceb889a..526a7d6fa86f 100644
--- a/arch/arm64/kvm/hyp/nvhe/mm.c
+++ b/arch/arm64/kvm/hyp/nvhe/mm.c
@@ -8,6 +8,7 @@
#include <asm/kvm_hyp.h>
#include <asm/kvm_mmu.h>
#include <asm/kvm_pgtable.h>
+#include <asm/kvm_pkvm.h>
#include <asm/spectre.h>
#include <nvhe/early_alloc.h>
@@ -18,11 +19,12 @@
struct kvm_pgtable pkvm_pgtable;
hyp_spinlock_t pkvm_pgd_lock;
-u64 __io_map_base;
struct memblock_region hyp_memory[HYP_MEMBLOCK_REGIONS];
unsigned int hyp_memblock_nr;
+static u64 __io_map_base;
+
static int __pkvm_create_mappings(unsigned long start, unsigned long size,
unsigned long phys, enum kvm_pgtable_prot prot)
{
diff --git a/arch/arm64/kvm/hyp/nvhe/page_alloc.c b/arch/arm64/kvm/hyp/nvhe/page_alloc.c
index 41fc25bdfb34..543cad6c376a 100644
--- a/arch/arm64/kvm/hyp/nvhe/page_alloc.c
+++ b/arch/arm64/kvm/hyp/nvhe/page_alloc.c
@@ -152,6 +152,7 @@ static inline void hyp_page_ref_inc(struct hyp_page *p)
static inline int hyp_page_ref_dec_and_test(struct hyp_page *p)
{
+ BUG_ON(!p->refcount);
p->refcount--;
return (p->refcount == 0);
}
@@ -193,6 +194,20 @@ void hyp_get_page(struct hyp_pool *pool, void *addr)
hyp_spin_unlock(&pool->lock);
}
+void hyp_split_page(struct hyp_page *p)
+{
+ unsigned short order = p->order;
+ unsigned int i;
+
+ p->order = 0;
+ for (i = 1; i < (1 << order); i++) {
+ struct hyp_page *tail = p + i;
+
+ tail->order = 0;
+ hyp_set_page_refcounted(tail);
+ }
+}
+
void *hyp_alloc_pages(struct hyp_pool *pool, unsigned short order)
{
unsigned short i = order;
@@ -226,7 +241,7 @@ int hyp_pool_init(struct hyp_pool *pool, u64 pfn, unsigned int nr_pages,
int i;
hyp_spin_lock_init(&pool->lock);
- pool->max_order = min(MAX_ORDER, get_order(nr_pages << PAGE_SHIFT));
+ pool->max_order = min(MAX_ORDER, get_order((nr_pages + 1) << PAGE_SHIFT));
for (i = 0; i < pool->max_order; i++)
INIT_LIST_HEAD(&pool->free_area[i]);
pool->range_start = phys;
diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c
new file mode 100644
index 000000000000..99c8d8b73e70
--- /dev/null
+++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c
@@ -0,0 +1,185 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2021 Google LLC
+ * Author: Fuad Tabba <tabba@google.com>
+ */
+
+#include <linux/kvm_host.h>
+#include <linux/mm.h>
+#include <nvhe/fixed_config.h>
+#include <nvhe/trap_handler.h>
+
+/*
+ * Set trap register values based on features in ID_AA64PFR0.
+ */
+static void pvm_init_traps_aa64pfr0(struct kvm_vcpu *vcpu)
+{
+ const u64 feature_ids = pvm_read_id_reg(vcpu, SYS_ID_AA64PFR0_EL1);
+ u64 hcr_set = HCR_RW;
+ u64 hcr_clear = 0;
+ u64 cptr_set = 0;
+
+ /* Protected KVM does not support AArch32 guests. */
+ BUILD_BUG_ON(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL0),
+ PVM_ID_AA64PFR0_RESTRICT_UNSIGNED) != ID_AA64PFR0_ELx_64BIT_ONLY);
+ BUILD_BUG_ON(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1),
+ PVM_ID_AA64PFR0_RESTRICT_UNSIGNED) != ID_AA64PFR0_ELx_64BIT_ONLY);
+
+ /*
+ * Linux guests assume support for floating-point and Advanced SIMD. Do
+ * not change the trapping behavior for these from the KVM default.
+ */
+ BUILD_BUG_ON(!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_FP),
+ PVM_ID_AA64PFR0_ALLOW));
+ BUILD_BUG_ON(!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_ASIMD),
+ PVM_ID_AA64PFR0_ALLOW));
+
+ /* Trap RAS unless all current versions are supported */
+ if (FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_RAS), feature_ids) <
+ ID_AA64PFR0_RAS_V1P1) {
+ hcr_set |= HCR_TERR | HCR_TEA;
+ hcr_clear |= HCR_FIEN;
+ }
+
+ /* Trap AMU */
+ if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_AMU), feature_ids)) {
+ hcr_clear |= HCR_AMVOFFEN;
+ cptr_set |= CPTR_EL2_TAM;
+ }
+
+ /* Trap SVE */
+ if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_SVE), feature_ids))
+ cptr_set |= CPTR_EL2_TZ;
+
+ vcpu->arch.hcr_el2 |= hcr_set;
+ vcpu->arch.hcr_el2 &= ~hcr_clear;
+ vcpu->arch.cptr_el2 |= cptr_set;
+}
+
+/*
+ * Set trap register values based on features in ID_AA64PFR1.
+ */
+static void pvm_init_traps_aa64pfr1(struct kvm_vcpu *vcpu)
+{
+ const u64 feature_ids = pvm_read_id_reg(vcpu, SYS_ID_AA64PFR1_EL1);
+ u64 hcr_set = 0;
+ u64 hcr_clear = 0;
+
+ /* Memory Tagging: Trap and Treat as Untagged if not supported. */
+ if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR1_MTE), feature_ids)) {
+ hcr_set |= HCR_TID5;
+ hcr_clear |= HCR_DCT | HCR_ATA;
+ }
+
+ vcpu->arch.hcr_el2 |= hcr_set;
+ vcpu->arch.hcr_el2 &= ~hcr_clear;
+}
+
+/*
+ * Set trap register values based on features in ID_AA64DFR0.
+ */
+static void pvm_init_traps_aa64dfr0(struct kvm_vcpu *vcpu)
+{
+ const u64 feature_ids = pvm_read_id_reg(vcpu, SYS_ID_AA64DFR0_EL1);
+ u64 mdcr_set = 0;
+ u64 mdcr_clear = 0;
+ u64 cptr_set = 0;
+
+ /* Trap/constrain PMU */
+ if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_PMUVER), feature_ids)) {
+ mdcr_set |= MDCR_EL2_TPM | MDCR_EL2_TPMCR;
+ mdcr_clear |= MDCR_EL2_HPME | MDCR_EL2_MTPME |
+ MDCR_EL2_HPMN_MASK;
+ }
+
+ /* Trap Debug */
+ if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_DEBUGVER), feature_ids))
+ mdcr_set |= MDCR_EL2_TDRA | MDCR_EL2_TDA | MDCR_EL2_TDE;
+
+ /* Trap OS Double Lock */
+ if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_DOUBLELOCK), feature_ids))
+ mdcr_set |= MDCR_EL2_TDOSA;
+
+ /* Trap SPE */
+ if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_PMSVER), feature_ids)) {
+ mdcr_set |= MDCR_EL2_TPMS;
+ mdcr_clear |= MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT;
+ }
+
+ /* Trap Trace Filter */
+ if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_TRACE_FILT), feature_ids))
+ mdcr_set |= MDCR_EL2_TTRF;
+
+ /* Trap Trace */
+ if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_TRACEVER), feature_ids))
+ cptr_set |= CPTR_EL2_TTA;
+
+ vcpu->arch.mdcr_el2 |= mdcr_set;
+ vcpu->arch.mdcr_el2 &= ~mdcr_clear;
+ vcpu->arch.cptr_el2 |= cptr_set;
+}
+
+/*
+ * Set trap register values based on features in ID_AA64MMFR0.
+ */
+static void pvm_init_traps_aa64mmfr0(struct kvm_vcpu *vcpu)
+{
+ const u64 feature_ids = pvm_read_id_reg(vcpu, SYS_ID_AA64MMFR0_EL1);
+ u64 mdcr_set = 0;
+
+ /* Trap Debug Communications Channel registers */
+ if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_FGT), feature_ids))
+ mdcr_set |= MDCR_EL2_TDCC;
+
+ vcpu->arch.mdcr_el2 |= mdcr_set;
+}
+
+/*
+ * Set trap register values based on features in ID_AA64MMFR1.
+ */
+static void pvm_init_traps_aa64mmfr1(struct kvm_vcpu *vcpu)
+{
+ const u64 feature_ids = pvm_read_id_reg(vcpu, SYS_ID_AA64MMFR1_EL1);
+ u64 hcr_set = 0;
+
+ /* Trap LOR */
+ if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR1_LOR), feature_ids))
+ hcr_set |= HCR_TLOR;
+
+ vcpu->arch.hcr_el2 |= hcr_set;
+}
+
+/*
+ * Set baseline trap register values.
+ */
+static void pvm_init_trap_regs(struct kvm_vcpu *vcpu)
+{
+ const u64 hcr_trap_feat_regs = HCR_TID3;
+ const u64 hcr_trap_impdef = HCR_TACR | HCR_TIDCP | HCR_TID1;
+
+ /*
+ * Always trap:
+ * - Feature id registers: to control features exposed to guests
+ * - Implementation-defined features
+ */
+ vcpu->arch.hcr_el2 |= hcr_trap_feat_regs | hcr_trap_impdef;
+
+ /* Clear res0 and set res1 bits to trap potential new features. */
+ vcpu->arch.hcr_el2 &= ~(HCR_RES0);
+ vcpu->arch.mdcr_el2 &= ~(MDCR_EL2_RES0);
+ vcpu->arch.cptr_el2 |= CPTR_NVHE_EL2_RES1;
+ vcpu->arch.cptr_el2 &= ~(CPTR_NVHE_EL2_RES0);
+}
+
+/*
+ * Initialize trap register values for protected VMs.
+ */
+void __pkvm_vcpu_init_traps(struct kvm_vcpu *vcpu)
+{
+ pvm_init_trap_regs(vcpu);
+ pvm_init_traps_aa64pfr0(vcpu);
+ pvm_init_traps_aa64pfr1(vcpu);
+ pvm_init_traps_aa64dfr0(vcpu);
+ pvm_init_traps_aa64mmfr0(vcpu);
+ pvm_init_traps_aa64mmfr1(vcpu);
+}
diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c
index 57c27846320f..27af337f9fea 100644
--- a/arch/arm64/kvm/hyp/nvhe/setup.c
+++ b/arch/arm64/kvm/hyp/nvhe/setup.c
@@ -8,15 +8,16 @@
#include <asm/kvm_hyp.h>
#include <asm/kvm_mmu.h>
#include <asm/kvm_pgtable.h>
+#include <asm/kvm_pkvm.h>
#include <nvhe/early_alloc.h>
+#include <nvhe/fixed_config.h>
#include <nvhe/gfp.h>
#include <nvhe/memory.h>
#include <nvhe/mem_protect.h>
#include <nvhe/mm.h>
#include <nvhe/trap_handler.h>
-struct hyp_pool hpool;
unsigned long hyp_nr_cpus;
#define hyp_percpu_size ((unsigned long)__per_cpu_end - \
@@ -26,6 +27,7 @@ static void *vmemmap_base;
static void *hyp_pgt_base;
static void *host_s2_pgt_base;
static struct kvm_pgtable_mm_ops pkvm_pgtable_mm_ops;
+static struct hyp_pool hpool;
static int divide_memory_pool(void *virt, unsigned long size)
{
@@ -164,6 +166,7 @@ static int finalize_host_mappings_walker(u64 addr, u64 end, u32 level,
enum kvm_pgtable_walk_flags flag,
void * const arg)
{
+ struct kvm_pgtable_mm_ops *mm_ops = arg;
enum kvm_pgtable_prot prot;
enum pkvm_page_state state;
kvm_pte_t pte = *ptep;
@@ -172,12 +175,21 @@ static int finalize_host_mappings_walker(u64 addr, u64 end, u32 level,
if (!kvm_pte_valid(pte))
return 0;
+ /*
+ * Fix-up the refcount for the page-table pages as the early allocator
+ * was unable to access the hyp_vmemmap and so the buddy allocator has
+ * initialised the refcount to '1'.
+ */
+ mm_ops->get_page(ptep);
+ if (flag != KVM_PGTABLE_WALK_LEAF)
+ return 0;
+
if (level != (KVM_PGTABLE_MAX_LEVELS - 1))
return -EINVAL;
phys = kvm_pte_to_phys(pte);
if (!addr_is_memory(phys))
- return 0;
+ return -EINVAL;
/*
* Adjust the host stage-2 mappings to match the ownership attributes
@@ -204,10 +216,21 @@ static int finalize_host_mappings(void)
{
struct kvm_pgtable_walker walker = {
.cb = finalize_host_mappings_walker,
- .flags = KVM_PGTABLE_WALK_LEAF,
+ .flags = KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST,
+ .arg = pkvm_pgtable.mm_ops,
};
+ int i, ret;
+
+ for (i = 0; i < hyp_memblock_nr; i++) {
+ struct memblock_region *reg = &hyp_memory[i];
+ u64 start = (u64)hyp_phys_to_virt(reg->base);
+
+ ret = kvm_pgtable_walk(&pkvm_pgtable, start, reg->size, &walker);
+ if (ret)
+ return ret;
+ }
- return kvm_pgtable_walk(&pkvm_pgtable, 0, BIT(pkvm_pgtable.ia_bits), &walker);
+ return 0;
}
void __noreturn __pkvm_init_finalise(void)
@@ -229,19 +252,20 @@ void __noreturn __pkvm_init_finalise(void)
if (ret)
goto out;
- ret = finalize_host_mappings();
- if (ret)
- goto out;
-
pkvm_pgtable_mm_ops = (struct kvm_pgtable_mm_ops) {
.zalloc_page = hyp_zalloc_hyp_page,
.phys_to_virt = hyp_phys_to_virt,
.virt_to_phys = hyp_virt_to_phys,
.get_page = hpool_get_page,
.put_page = hpool_put_page,
+ .page_count = hyp_page_count,
};
pkvm_pgtable.mm_ops = &pkvm_pgtable_mm_ops;
+ ret = finalize_host_mappings();
+ if (ret)
+ goto out;
+
out:
/*
* We tail-called to here from handle___pkvm_init() and will not return,
@@ -260,6 +284,8 @@ int __pkvm_init(phys_addr_t phys, unsigned long size, unsigned long nr_cpus,
void (*fn)(phys_addr_t params_pa, void *finalize_fn_va);
int ret;
+ BUG_ON(kvm_check_pvm_sysreg_table());
+
if (!PAGE_ALIGNED(phys) || !PAGE_ALIGNED(size))
return -EINVAL;
diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c
index a34b01cc8ab9..6410d21d8695 100644
--- a/arch/arm64/kvm/hyp/nvhe/switch.c
+++ b/arch/arm64/kvm/hyp/nvhe/switch.c
@@ -25,8 +25,8 @@
#include <asm/fpsimd.h>
#include <asm/debug-monitors.h>
#include <asm/processor.h>
-#include <asm/thread_info.h>
+#include <nvhe/fixed_config.h>
#include <nvhe/mem_protect.h>
/* Non-VHE specific context */
@@ -158,6 +158,98 @@ static void __pmu_switch_to_host(struct kvm_cpu_context *host_ctxt)
write_sysreg(pmu->events_host, pmcntenset_el0);
}
+/**
+ * Handler for protected VM MSR, MRS or System instruction execution in AArch64.
+ *
+ * Returns true if the hypervisor has handled the exit, and control should go
+ * back to the guest, or false if it hasn't.
+ */
+static bool kvm_handle_pvm_sys64(struct kvm_vcpu *vcpu, u64 *exit_code)
+{
+ /*
+ * Make sure we handle the exit for workarounds and ptrauth
+ * before the pKVM handling, as the latter could decide to
+ * UNDEF.
+ */
+ return (kvm_hyp_handle_sysreg(vcpu, exit_code) ||
+ kvm_handle_pvm_sysreg(vcpu, exit_code));
+}
+
+/**
+ * Handler for protected floating-point and Advanced SIMD accesses.
+ *
+ * Returns true if the hypervisor has handled the exit, and control should go
+ * back to the guest, or false if it hasn't.
+ */
+static bool kvm_handle_pvm_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code)
+{
+ /* Linux guests assume support for floating-point and Advanced SIMD. */
+ BUILD_BUG_ON(!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_FP),
+ PVM_ID_AA64PFR0_ALLOW));
+ BUILD_BUG_ON(!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_ASIMD),
+ PVM_ID_AA64PFR0_ALLOW));
+
+ return kvm_hyp_handle_fpsimd(vcpu, exit_code);
+}
+
+static const exit_handler_fn hyp_exit_handlers[] = {
+ [0 ... ESR_ELx_EC_MAX] = NULL,
+ [ESR_ELx_EC_CP15_32] = kvm_hyp_handle_cp15_32,
+ [ESR_ELx_EC_SYS64] = kvm_hyp_handle_sysreg,
+ [ESR_ELx_EC_SVE] = kvm_hyp_handle_fpsimd,
+ [ESR_ELx_EC_FP_ASIMD] = kvm_hyp_handle_fpsimd,
+ [ESR_ELx_EC_IABT_LOW] = kvm_hyp_handle_iabt_low,
+ [ESR_ELx_EC_DABT_LOW] = kvm_hyp_handle_dabt_low,
+ [ESR_ELx_EC_PAC] = kvm_hyp_handle_ptrauth,
+};
+
+static const exit_handler_fn pvm_exit_handlers[] = {
+ [0 ... ESR_ELx_EC_MAX] = NULL,
+ [ESR_ELx_EC_SYS64] = kvm_handle_pvm_sys64,
+ [ESR_ELx_EC_SVE] = kvm_handle_pvm_restricted,
+ [ESR_ELx_EC_FP_ASIMD] = kvm_handle_pvm_fpsimd,
+ [ESR_ELx_EC_IABT_LOW] = kvm_hyp_handle_iabt_low,
+ [ESR_ELx_EC_DABT_LOW] = kvm_hyp_handle_dabt_low,
+ [ESR_ELx_EC_PAC] = kvm_hyp_handle_ptrauth,
+};
+
+static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm_vcpu *vcpu)
+{
+ if (unlikely(kvm_vm_is_protected(kern_hyp_va(vcpu->kvm))))
+ return pvm_exit_handlers;
+
+ return hyp_exit_handlers;
+}
+
+/*
+ * Some guests (e.g., protected VMs) are not be allowed to run in AArch32.
+ * The ARMv8 architecture does not give the hypervisor a mechanism to prevent a
+ * guest from dropping to AArch32 EL0 if implemented by the CPU. If the
+ * hypervisor spots a guest in such a state ensure it is handled, and don't
+ * trust the host to spot or fix it. The check below is based on the one in
+ * kvm_arch_vcpu_ioctl_run().
+ *
+ * Returns false if the guest ran in AArch32 when it shouldn't have, and
+ * thus should exit to the host, or true if a the guest run loop can continue.
+ */
+static void early_exit_filter(struct kvm_vcpu *vcpu, u64 *exit_code)
+{
+ struct kvm *kvm = kern_hyp_va(vcpu->kvm);
+
+ if (kvm_vm_is_protected(kvm) && vcpu_mode_is_32bit(vcpu)) {
+ /*
+ * As we have caught the guest red-handed, decide that it isn't
+ * fit for purpose anymore by making the vcpu invalid. The VMM
+ * can try and fix it by re-initializing the vcpu with
+ * KVM_ARM_VCPU_INIT, however, this is likely not possible for
+ * protected VMs.
+ */
+ vcpu->arch.target = -1;
+ *exit_code &= BIT(ARM_EXIT_WITH_SERROR_BIT);
+ *exit_code |= ARM_EXCEPTION_IL;
+ }
+}
+
/* Switch to the guest for legacy non-VHE systems */
int __kvm_vcpu_run(struct kvm_vcpu *vcpu)
{
diff --git a/arch/arm64/kvm/hyp/nvhe/sys_regs.c b/arch/arm64/kvm/hyp/nvhe/sys_regs.c
new file mode 100644
index 000000000000..792cf6e6ac92
--- /dev/null
+++ b/arch/arm64/kvm/hyp/nvhe/sys_regs.c
@@ -0,0 +1,487 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2021 Google LLC
+ * Author: Fuad Tabba <tabba@google.com>
+ */
+
+#include <linux/irqchip/arm-gic-v3.h>
+
+#include <asm/kvm_asm.h>
+#include <asm/kvm_mmu.h>
+
+#include <hyp/adjust_pc.h>
+
+#include <nvhe/fixed_config.h>
+
+#include "../../sys_regs.h"
+
+/*
+ * Copies of the host's CPU features registers holding sanitized values at hyp.
+ */
+u64 id_aa64pfr0_el1_sys_val;
+u64 id_aa64pfr1_el1_sys_val;
+u64 id_aa64isar0_el1_sys_val;
+u64 id_aa64isar1_el1_sys_val;
+u64 id_aa64mmfr0_el1_sys_val;
+u64 id_aa64mmfr1_el1_sys_val;
+u64 id_aa64mmfr2_el1_sys_val;
+
+/*
+ * Inject an unknown/undefined exception to an AArch64 guest while most of its
+ * sysregs are live.
+ */
+static void inject_undef64(struct kvm_vcpu *vcpu)
+{
+ u32 esr = (ESR_ELx_EC_UNKNOWN << ESR_ELx_EC_SHIFT);
+
+ *vcpu_pc(vcpu) = read_sysreg_el2(SYS_ELR);
+ *vcpu_cpsr(vcpu) = read_sysreg_el2(SYS_SPSR);
+
+ vcpu->arch.flags |= (KVM_ARM64_EXCEPT_AA64_EL1 |
+ KVM_ARM64_EXCEPT_AA64_ELx_SYNC |
+ KVM_ARM64_PENDING_EXCEPTION);
+
+ __kvm_adjust_pc(vcpu);
+
+ write_sysreg_el1(esr, SYS_ESR);
+ write_sysreg_el1(read_sysreg_el2(SYS_ELR), SYS_ELR);
+ write_sysreg_el2(*vcpu_pc(vcpu), SYS_ELR);
+ write_sysreg_el2(*vcpu_cpsr(vcpu), SYS_SPSR);
+}
+
+/*
+ * Returns the restricted features values of the feature register based on the
+ * limitations in restrict_fields.
+ * A feature id field value of 0b0000 does not impose any restrictions.
+ * Note: Use only for unsigned feature field values.
+ */
+static u64 get_restricted_features_unsigned(u64 sys_reg_val,
+ u64 restrict_fields)
+{
+ u64 value = 0UL;
+ u64 mask = GENMASK_ULL(ARM64_FEATURE_FIELD_BITS - 1, 0);
+
+ /*
+ * According to the Arm Architecture Reference Manual, feature fields
+ * use increasing values to indicate increases in functionality.
+ * Iterate over the restricted feature fields and calculate the minimum
+ * unsigned value between the one supported by the system, and what the
+ * value is being restricted to.
+ */
+ while (sys_reg_val && restrict_fields) {
+ value |= min(sys_reg_val & mask, restrict_fields & mask);
+ sys_reg_val &= ~mask;
+ restrict_fields &= ~mask;
+ mask <<= ARM64_FEATURE_FIELD_BITS;
+ }
+
+ return value;
+}
+
+/*
+ * Functions that return the value of feature id registers for protected VMs
+ * based on allowed features, system features, and KVM support.
+ */
+
+static u64 get_pvm_id_aa64pfr0(const struct kvm_vcpu *vcpu)
+{
+ const struct kvm *kvm = (const struct kvm *)kern_hyp_va(vcpu->kvm);
+ u64 set_mask = 0;
+ u64 allow_mask = PVM_ID_AA64PFR0_ALLOW;
+
+ if (!vcpu_has_sve(vcpu))
+ allow_mask &= ~ARM64_FEATURE_MASK(ID_AA64PFR0_SVE);
+
+ set_mask |= get_restricted_features_unsigned(id_aa64pfr0_el1_sys_val,
+ PVM_ID_AA64PFR0_RESTRICT_UNSIGNED);
+
+ /* Spectre and Meltdown mitigation in KVM */
+ set_mask |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_CSV2),
+ (u64)kvm->arch.pfr0_csv2);
+ set_mask |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_CSV3),
+ (u64)kvm->arch.pfr0_csv3);
+
+ return (id_aa64pfr0_el1_sys_val & allow_mask) | set_mask;
+}
+
+static u64 get_pvm_id_aa64pfr1(const struct kvm_vcpu *vcpu)
+{
+ const struct kvm *kvm = (const struct kvm *)kern_hyp_va(vcpu->kvm);
+ u64 allow_mask = PVM_ID_AA64PFR1_ALLOW;
+
+ if (!kvm_has_mte(kvm))
+ allow_mask &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_MTE);
+
+ return id_aa64pfr1_el1_sys_val & allow_mask;
+}
+
+static u64 get_pvm_id_aa64zfr0(const struct kvm_vcpu *vcpu)
+{
+ /*
+ * No support for Scalable Vectors, therefore, hyp has no sanitized
+ * copy of the feature id register.
+ */
+ BUILD_BUG_ON(PVM_ID_AA64ZFR0_ALLOW != 0ULL);
+ return 0;
+}
+
+static u64 get_pvm_id_aa64dfr0(const struct kvm_vcpu *vcpu)
+{
+ /*
+ * No support for debug, including breakpoints, and watchpoints,
+ * therefore, pKVM has no sanitized copy of the feature id register.
+ */
+ BUILD_BUG_ON(PVM_ID_AA64DFR0_ALLOW != 0ULL);
+ return 0;
+}
+
+static u64 get_pvm_id_aa64dfr1(const struct kvm_vcpu *vcpu)
+{
+ /*
+ * No support for debug, therefore, hyp has no sanitized copy of the
+ * feature id register.
+ */
+ BUILD_BUG_ON(PVM_ID_AA64DFR1_ALLOW != 0ULL);
+ return 0;
+}
+
+static u64 get_pvm_id_aa64afr0(const struct kvm_vcpu *vcpu)
+{
+ /*
+ * No support for implementation defined features, therefore, hyp has no
+ * sanitized copy of the feature id register.
+ */
+ BUILD_BUG_ON(PVM_ID_AA64AFR0_ALLOW != 0ULL);
+ return 0;
+}
+
+static u64 get_pvm_id_aa64afr1(const struct kvm_vcpu *vcpu)
+{
+ /*
+ * No support for implementation defined features, therefore, hyp has no
+ * sanitized copy of the feature id register.
+ */
+ BUILD_BUG_ON(PVM_ID_AA64AFR1_ALLOW != 0ULL);
+ return 0;
+}
+
+static u64 get_pvm_id_aa64isar0(const struct kvm_vcpu *vcpu)
+{
+ return id_aa64isar0_el1_sys_val & PVM_ID_AA64ISAR0_ALLOW;
+}
+
+static u64 get_pvm_id_aa64isar1(const struct kvm_vcpu *vcpu)
+{
+ u64 allow_mask = PVM_ID_AA64ISAR1_ALLOW;
+
+ if (!vcpu_has_ptrauth(vcpu))
+ allow_mask &= ~(ARM64_FEATURE_MASK(ID_AA64ISAR1_APA) |
+ ARM64_FEATURE_MASK(ID_AA64ISAR1_API) |
+ ARM64_FEATURE_MASK(ID_AA64ISAR1_GPA) |
+ ARM64_FEATURE_MASK(ID_AA64ISAR1_GPI));
+
+ return id_aa64isar1_el1_sys_val & allow_mask;
+}
+
+static u64 get_pvm_id_aa64mmfr0(const struct kvm_vcpu *vcpu)
+{
+ u64 set_mask;
+
+ set_mask = get_restricted_features_unsigned(id_aa64mmfr0_el1_sys_val,
+ PVM_ID_AA64MMFR0_RESTRICT_UNSIGNED);
+
+ return (id_aa64mmfr0_el1_sys_val & PVM_ID_AA64MMFR0_ALLOW) | set_mask;
+}
+
+static u64 get_pvm_id_aa64mmfr1(const struct kvm_vcpu *vcpu)
+{
+ return id_aa64mmfr1_el1_sys_val & PVM_ID_AA64MMFR1_ALLOW;
+}
+
+static u64 get_pvm_id_aa64mmfr2(const struct kvm_vcpu *vcpu)
+{
+ return id_aa64mmfr2_el1_sys_val & PVM_ID_AA64MMFR2_ALLOW;
+}
+
+/* Read a sanitized cpufeature ID register by its encoding */
+u64 pvm_read_id_reg(const struct kvm_vcpu *vcpu, u32 id)
+{
+ switch (id) {
+ case SYS_ID_AA64PFR0_EL1:
+ return get_pvm_id_aa64pfr0(vcpu);
+ case SYS_ID_AA64PFR1_EL1:
+ return get_pvm_id_aa64pfr1(vcpu);
+ case SYS_ID_AA64ZFR0_EL1:
+ return get_pvm_id_aa64zfr0(vcpu);
+ case SYS_ID_AA64DFR0_EL1:
+ return get_pvm_id_aa64dfr0(vcpu);
+ case SYS_ID_AA64DFR1_EL1:
+ return get_pvm_id_aa64dfr1(vcpu);
+ case SYS_ID_AA64AFR0_EL1:
+ return get_pvm_id_aa64afr0(vcpu);
+ case SYS_ID_AA64AFR1_EL1:
+ return get_pvm_id_aa64afr1(vcpu);
+ case SYS_ID_AA64ISAR0_EL1:
+ return get_pvm_id_aa64isar0(vcpu);
+ case SYS_ID_AA64ISAR1_EL1:
+ return get_pvm_id_aa64isar1(vcpu);
+ case SYS_ID_AA64MMFR0_EL1:
+ return get_pvm_id_aa64mmfr0(vcpu);
+ case SYS_ID_AA64MMFR1_EL1:
+ return get_pvm_id_aa64mmfr1(vcpu);
+ case SYS_ID_AA64MMFR2_EL1:
+ return get_pvm_id_aa64mmfr2(vcpu);
+ default:
+ /*
+ * Should never happen because all cases are covered in
+ * pvm_sys_reg_descs[].
+ */
+ WARN_ON(1);
+ break;
+ }
+
+ return 0;
+}
+
+static u64 read_id_reg(const struct kvm_vcpu *vcpu,
+ struct sys_reg_desc const *r)
+{
+ return pvm_read_id_reg(vcpu, reg_to_encoding(r));
+}
+
+/* Handler to RAZ/WI sysregs */
+static bool pvm_access_raz_wi(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ if (!p->is_write)
+ p->regval = 0;
+
+ return true;
+}
+
+/*
+ * Accessor for AArch32 feature id registers.
+ *
+ * The value of these registers is "unknown" according to the spec if AArch32
+ * isn't supported.
+ */
+static bool pvm_access_id_aarch32(struct kvm_vcpu *vcpu,
+ struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ if (p->is_write) {
+ inject_undef64(vcpu);
+ return false;
+ }
+
+ /*
+ * No support for AArch32 guests, therefore, pKVM has no sanitized copy
+ * of AArch32 feature id registers.
+ */
+ BUILD_BUG_ON(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1),
+ PVM_ID_AA64PFR0_RESTRICT_UNSIGNED) > ID_AA64PFR0_ELx_64BIT_ONLY);
+
+ return pvm_access_raz_wi(vcpu, p, r);
+}
+
+/*
+ * Accessor for AArch64 feature id registers.
+ *
+ * If access is allowed, set the regval to the protected VM's view of the
+ * register and return true.
+ * Otherwise, inject an undefined exception and return false.
+ */
+static bool pvm_access_id_aarch64(struct kvm_vcpu *vcpu,
+ struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ if (p->is_write) {
+ inject_undef64(vcpu);
+ return false;
+ }
+
+ p->regval = read_id_reg(vcpu, r);
+ return true;
+}
+
+static bool pvm_gic_read_sre(struct kvm_vcpu *vcpu,
+ struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ /* pVMs only support GICv3. 'nuf said. */
+ if (!p->is_write)
+ p->regval = ICC_SRE_EL1_DIB | ICC_SRE_EL1_DFB | ICC_SRE_EL1_SRE;
+
+ return true;
+}
+
+/* Mark the specified system register as an AArch32 feature id register. */
+#define AARCH32(REG) { SYS_DESC(REG), .access = pvm_access_id_aarch32 }
+
+/* Mark the specified system register as an AArch64 feature id register. */
+#define AARCH64(REG) { SYS_DESC(REG), .access = pvm_access_id_aarch64 }
+
+/* Mark the specified system register as Read-As-Zero/Write-Ignored */
+#define RAZ_WI(REG) { SYS_DESC(REG), .access = pvm_access_raz_wi }
+
+/* Mark the specified system register as not being handled in hyp. */
+#define HOST_HANDLED(REG) { SYS_DESC(REG), .access = NULL }
+
+/*
+ * Architected system registers.
+ * Important: Must be sorted ascending by Op0, Op1, CRn, CRm, Op2
+ *
+ * NOTE: Anything not explicitly listed here is *restricted by default*, i.e.,
+ * it will lead to injecting an exception into the guest.
+ */
+static const struct sys_reg_desc pvm_sys_reg_descs[] = {
+ /* Cache maintenance by set/way operations are restricted. */
+
+ /* Debug and Trace Registers are restricted. */
+
+ /* AArch64 mappings of the AArch32 ID registers */
+ /* CRm=1 */
+ AARCH32(SYS_ID_PFR0_EL1),
+ AARCH32(SYS_ID_PFR1_EL1),
+ AARCH32(SYS_ID_DFR0_EL1),
+ AARCH32(SYS_ID_AFR0_EL1),
+ AARCH32(SYS_ID_MMFR0_EL1),
+ AARCH32(SYS_ID_MMFR1_EL1),
+ AARCH32(SYS_ID_MMFR2_EL1),
+ AARCH32(SYS_ID_MMFR3_EL1),
+
+ /* CRm=2 */
+ AARCH32(SYS_ID_ISAR0_EL1),
+ AARCH32(SYS_ID_ISAR1_EL1),
+ AARCH32(SYS_ID_ISAR2_EL1),
+ AARCH32(SYS_ID_ISAR3_EL1),
+ AARCH32(SYS_ID_ISAR4_EL1),
+ AARCH32(SYS_ID_ISAR5_EL1),
+ AARCH32(SYS_ID_MMFR4_EL1),
+ AARCH32(SYS_ID_ISAR6_EL1),
+
+ /* CRm=3 */
+ AARCH32(SYS_MVFR0_EL1),
+ AARCH32(SYS_MVFR1_EL1),
+ AARCH32(SYS_MVFR2_EL1),
+ AARCH32(SYS_ID_PFR2_EL1),
+ AARCH32(SYS_ID_DFR1_EL1),
+ AARCH32(SYS_ID_MMFR5_EL1),
+
+ /* AArch64 ID registers */
+ /* CRm=4 */
+ AARCH64(SYS_ID_AA64PFR0_EL1),
+ AARCH64(SYS_ID_AA64PFR1_EL1),
+ AARCH64(SYS_ID_AA64ZFR0_EL1),
+ AARCH64(SYS_ID_AA64DFR0_EL1),
+ AARCH64(SYS_ID_AA64DFR1_EL1),
+ AARCH64(SYS_ID_AA64AFR0_EL1),
+ AARCH64(SYS_ID_AA64AFR1_EL1),
+ AARCH64(SYS_ID_AA64ISAR0_EL1),
+ AARCH64(SYS_ID_AA64ISAR1_EL1),
+ AARCH64(SYS_ID_AA64MMFR0_EL1),
+ AARCH64(SYS_ID_AA64MMFR1_EL1),
+ AARCH64(SYS_ID_AA64MMFR2_EL1),
+
+ /* Scalable Vector Registers are restricted. */
+
+ RAZ_WI(SYS_ERRIDR_EL1),
+ RAZ_WI(SYS_ERRSELR_EL1),
+ RAZ_WI(SYS_ERXFR_EL1),
+ RAZ_WI(SYS_ERXCTLR_EL1),
+ RAZ_WI(SYS_ERXSTATUS_EL1),
+ RAZ_WI(SYS_ERXADDR_EL1),
+ RAZ_WI(SYS_ERXMISC0_EL1),
+ RAZ_WI(SYS_ERXMISC1_EL1),
+
+ /* Performance Monitoring Registers are restricted. */
+
+ /* Limited Ordering Regions Registers are restricted. */
+
+ HOST_HANDLED(SYS_ICC_SGI1R_EL1),
+ HOST_HANDLED(SYS_ICC_ASGI1R_EL1),
+ HOST_HANDLED(SYS_ICC_SGI0R_EL1),
+ { SYS_DESC(SYS_ICC_SRE_EL1), .access = pvm_gic_read_sre, },
+
+ HOST_HANDLED(SYS_CCSIDR_EL1),
+ HOST_HANDLED(SYS_CLIDR_EL1),
+ HOST_HANDLED(SYS_CSSELR_EL1),
+ HOST_HANDLED(SYS_CTR_EL0),
+
+ /* Performance Monitoring Registers are restricted. */
+
+ /* Activity Monitoring Registers are restricted. */
+
+ HOST_HANDLED(SYS_CNTP_TVAL_EL0),
+ HOST_HANDLED(SYS_CNTP_CTL_EL0),
+ HOST_HANDLED(SYS_CNTP_CVAL_EL0),
+
+ /* Performance Monitoring Registers are restricted. */
+};
+
+/*
+ * Checks that the sysreg table is unique and in-order.
+ *
+ * Returns 0 if the table is consistent, or 1 otherwise.
+ */
+int kvm_check_pvm_sysreg_table(void)
+{
+ unsigned int i;
+
+ for (i = 1; i < ARRAY_SIZE(pvm_sys_reg_descs); i++) {
+ if (cmp_sys_reg(&pvm_sys_reg_descs[i-1], &pvm_sys_reg_descs[i]) >= 0)
+ return 1;
+ }
+
+ return 0;
+}
+
+/*
+ * Handler for protected VM MSR, MRS or System instruction execution.
+ *
+ * Returns true if the hypervisor has handled the exit, and control should go
+ * back to the guest, or false if it hasn't, to be handled by the host.
+ */
+bool kvm_handle_pvm_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code)
+{
+ const struct sys_reg_desc *r;
+ struct sys_reg_params params;
+ unsigned long esr = kvm_vcpu_get_esr(vcpu);
+ int Rt = kvm_vcpu_sys_get_rt(vcpu);
+
+ params = esr_sys64_to_params(esr);
+ params.regval = vcpu_get_reg(vcpu, Rt);
+
+ r = find_reg(&params, pvm_sys_reg_descs, ARRAY_SIZE(pvm_sys_reg_descs));
+
+ /* Undefined (RESTRICTED). */
+ if (r == NULL) {
+ inject_undef64(vcpu);
+ return true;
+ }
+
+ /* Handled by the host (HOST_HANDLED) */
+ if (r->access == NULL)
+ return false;
+
+ /* Handled by hyp: skip instruction if instructed to do so. */
+ if (r->access(vcpu, &params, r))
+ __kvm_skip_instr(vcpu);
+
+ if (!params.is_write)
+ vcpu_set_reg(vcpu, Rt, params.regval);
+
+ return true;
+}
+
+/*
+ * Handler for protected VM restricted exceptions.
+ *
+ * Inject an undefined exception into the guest and return true to indicate that
+ * the hypervisor has handled the exit, and control should go back to the guest.
+ */
+bool kvm_handle_pvm_restricted(struct kvm_vcpu *vcpu, u64 *exit_code)
+{
+ inject_undef64(vcpu);
+ return true;
+}
diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index f8ceebe4982e..844a6f003fd5 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -383,21 +383,6 @@ enum kvm_pgtable_prot kvm_pgtable_hyp_pte_prot(kvm_pte_t pte)
return prot;
}
-static bool hyp_pte_needs_update(kvm_pte_t old, kvm_pte_t new)
-{
- /*
- * Tolerate KVM recreating the exact same mapping, or changing software
- * bits if the existing mapping was valid.
- */
- if (old == new)
- return false;
-
- if (!kvm_pte_valid(old))
- return true;
-
- return !WARN_ON((old ^ new) & ~KVM_PTE_LEAF_ATTR_HI_SW);
-}
-
static bool hyp_map_walker_try_leaf(u64 addr, u64 end, u32 level,
kvm_pte_t *ptep, struct hyp_map_data *data)
{
@@ -407,11 +392,16 @@ static bool hyp_map_walker_try_leaf(u64 addr, u64 end, u32 level,
if (!kvm_block_mapping_supported(addr, end, phys, level))
return false;
+ data->phys += granule;
new = kvm_init_valid_leaf_pte(phys, data->attr, level);
- if (hyp_pte_needs_update(old, new))
- smp_store_release(ptep, new);
+ if (old == new)
+ return true;
+ if (!kvm_pte_valid(old))
+ data->mm_ops->get_page(ptep);
+ else if (WARN_ON((old ^ new) & ~KVM_PTE_LEAF_ATTR_HI_SW))
+ return false;
- data->phys += granule;
+ smp_store_release(ptep, new);
return true;
}
@@ -433,6 +423,7 @@ static int hyp_map_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
return -ENOMEM;
kvm_set_table_pte(ptep, childp, mm_ops);
+ mm_ops->get_page(ptep);
return 0;
}
@@ -460,6 +451,69 @@ int kvm_pgtable_hyp_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys,
return ret;
}
+struct hyp_unmap_data {
+ u64 unmapped;
+ struct kvm_pgtable_mm_ops *mm_ops;
+};
+
+static int hyp_unmap_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
+ enum kvm_pgtable_walk_flags flag, void * const arg)
+{
+ kvm_pte_t pte = *ptep, *childp = NULL;
+ u64 granule = kvm_granule_size(level);
+ struct hyp_unmap_data *data = arg;
+ struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops;
+
+ if (!kvm_pte_valid(pte))
+ return -EINVAL;
+
+ if (kvm_pte_table(pte, level)) {
+ childp = kvm_pte_follow(pte, mm_ops);
+
+ if (mm_ops->page_count(childp) != 1)
+ return 0;
+
+ kvm_clear_pte(ptep);
+ dsb(ishst);
+ __tlbi_level(vae2is, __TLBI_VADDR(addr, 0), level);
+ } else {
+ if (end - addr < granule)
+ return -EINVAL;
+
+ kvm_clear_pte(ptep);
+ dsb(ishst);
+ __tlbi_level(vale2is, __TLBI_VADDR(addr, 0), level);
+ data->unmapped += granule;
+ }
+
+ dsb(ish);
+ isb();
+ mm_ops->put_page(ptep);
+
+ if (childp)
+ mm_ops->put_page(childp);
+
+ return 0;
+}
+
+u64 kvm_pgtable_hyp_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size)
+{
+ struct hyp_unmap_data unmap_data = {
+ .mm_ops = pgt->mm_ops,
+ };
+ struct kvm_pgtable_walker walker = {
+ .cb = hyp_unmap_walker,
+ .arg = &unmap_data,
+ .flags = KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST,
+ };
+
+ if (!pgt->mm_ops->page_count)
+ return 0;
+
+ kvm_pgtable_walk(pgt, addr, size, &walker);
+ return unmap_data.unmapped;
+}
+
int kvm_pgtable_hyp_init(struct kvm_pgtable *pgt, u32 va_bits,
struct kvm_pgtable_mm_ops *mm_ops)
{
@@ -482,8 +536,16 @@ static int hyp_free_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
enum kvm_pgtable_walk_flags flag, void * const arg)
{
struct kvm_pgtable_mm_ops *mm_ops = arg;
+ kvm_pte_t pte = *ptep;
+
+ if (!kvm_pte_valid(pte))
+ return 0;
+
+ mm_ops->put_page(ptep);
+
+ if (kvm_pte_table(pte, level))
+ mm_ops->put_page(kvm_pte_follow(pte, mm_ops));
- mm_ops->put_page((void *)kvm_pte_follow(*ptep, mm_ops));
return 0;
}
@@ -491,7 +553,7 @@ void kvm_pgtable_hyp_destroy(struct kvm_pgtable *pgt)
{
struct kvm_pgtable_walker walker = {
.cb = hyp_free_walker,
- .flags = KVM_PGTABLE_WALK_TABLE_POST,
+ .flags = KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST,
.arg = pgt->mm_ops,
};
@@ -1116,13 +1178,13 @@ int kvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size)
}
-int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_arch *arch,
+int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu,
struct kvm_pgtable_mm_ops *mm_ops,
enum kvm_pgtable_stage2_flags flags,
kvm_pgtable_force_pte_cb_t force_pte_cb)
{
size_t pgd_sz;
- u64 vtcr = arch->vtcr;
+ u64 vtcr = mmu->arch->vtcr;
u32 ia_bits = VTCR_EL2_IPA(vtcr);
u32 sl0 = FIELD_GET(VTCR_EL2_SL0_MASK, vtcr);
u32 start_level = VTCR_EL2_TGRAN_SL0_BASE - sl0;
@@ -1135,7 +1197,7 @@ int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_arch *arch,
pgt->ia_bits = ia_bits;
pgt->start_level = start_level;
pgt->mm_ops = mm_ops;
- pgt->mmu = &arch->mmu;
+ pgt->mmu = mmu;
pgt->flags = flags;
pgt->force_pte_cb = force_pte_cb;
diff --git a/arch/arm64/kvm/hyp/reserved_mem.c b/arch/arm64/kvm/hyp/reserved_mem.c
deleted file mode 100644
index 578670e3f608..000000000000
--- a/arch/arm64/kvm/hyp/reserved_mem.c
+++ /dev/null
@@ -1,109 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2020 - Google LLC
- * Author: Quentin Perret <qperret@google.com>
- */
-
-#include <linux/kvm_host.h>
-#include <linux/memblock.h>
-#include <linux/sort.h>
-
-#include <asm/kvm_host.h>
-
-#include <nvhe/memory.h>
-#include <nvhe/mm.h>
-
-static struct memblock_region *hyp_memory = kvm_nvhe_sym(hyp_memory);
-static unsigned int *hyp_memblock_nr_ptr = &kvm_nvhe_sym(hyp_memblock_nr);
-
-phys_addr_t hyp_mem_base;
-phys_addr_t hyp_mem_size;
-
-static int cmp_hyp_memblock(const void *p1, const void *p2)
-{
- const struct memblock_region *r1 = p1;
- const struct memblock_region *r2 = p2;
-
- return r1->base < r2->base ? -1 : (r1->base > r2->base);
-}
-
-static void __init sort_memblock_regions(void)
-{
- sort(hyp_memory,
- *hyp_memblock_nr_ptr,
- sizeof(struct memblock_region),
- cmp_hyp_memblock,
- NULL);
-}
-
-static int __init register_memblock_regions(void)
-{
- struct memblock_region *reg;
-
- for_each_mem_region(reg) {
- if (*hyp_memblock_nr_ptr >= HYP_MEMBLOCK_REGIONS)
- return -ENOMEM;
-
- hyp_memory[*hyp_memblock_nr_ptr] = *reg;
- (*hyp_memblock_nr_ptr)++;
- }
- sort_memblock_regions();
-
- return 0;
-}
-
-void __init kvm_hyp_reserve(void)
-{
- u64 nr_pages, prev, hyp_mem_pages = 0;
- int ret;
-
- if (!is_hyp_mode_available() || is_kernel_in_hyp_mode())
- return;
-
- if (kvm_get_mode() != KVM_MODE_PROTECTED)
- return;
-
- ret = register_memblock_regions();
- if (ret) {
- *hyp_memblock_nr_ptr = 0;
- kvm_err("Failed to register hyp memblocks: %d\n", ret);
- return;
- }
-
- hyp_mem_pages += hyp_s1_pgtable_pages();
- hyp_mem_pages += host_s2_pgtable_pages();
-
- /*
- * The hyp_vmemmap needs to be backed by pages, but these pages
- * themselves need to be present in the vmemmap, so compute the number
- * of pages needed by looking for a fixed point.
- */
- nr_pages = 0;
- do {
- prev = nr_pages;
- nr_pages = hyp_mem_pages + prev;
- nr_pages = DIV_ROUND_UP(nr_pages * sizeof(struct hyp_page), PAGE_SIZE);
- nr_pages += __hyp_pgtable_max_pages(nr_pages);
- } while (nr_pages != prev);
- hyp_mem_pages += nr_pages;
-
- /*
- * Try to allocate a PMD-aligned region to reduce TLB pressure once
- * this is unmapped from the host stage-2, and fallback to PAGE_SIZE.
- */
- hyp_mem_size = hyp_mem_pages << PAGE_SHIFT;
- hyp_mem_base = memblock_phys_alloc(ALIGN(hyp_mem_size, PMD_SIZE),
- PMD_SIZE);
- if (!hyp_mem_base)
- hyp_mem_base = memblock_phys_alloc(hyp_mem_size, PAGE_SIZE);
- else
- hyp_mem_size = ALIGN(hyp_mem_size, PMD_SIZE);
-
- if (!hyp_mem_base) {
- kvm_err("Failed to reserve hyp memory\n");
- return;
- }
-
- kvm_info("Reserved %lld MiB at 0x%llx\n", hyp_mem_size >> 20,
- hyp_mem_base);
-}
diff --git a/arch/arm64/kvm/hyp/vgic-v3-sr.c b/arch/arm64/kvm/hyp/vgic-v3-sr.c
index 39f8f7f9227c..20db2f281cf2 100644
--- a/arch/arm64/kvm/hyp/vgic-v3-sr.c
+++ b/arch/arm64/kvm/hyp/vgic-v3-sr.c
@@ -695,9 +695,7 @@ static void __vgic_v3_read_iar(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
goto spurious;
lr_val &= ~ICH_LR_STATE;
- /* No active state for LPIs */
- if ((lr_val & ICH_LR_VIRTUAL_ID_MASK) <= VGIC_MAX_SPI)
- lr_val |= ICH_LR_ACTIVE_BIT;
+ lr_val |= ICH_LR_ACTIVE_BIT;
__gic_v3_set_lr(lr_val, lr);
__vgic_v3_set_active_priority(lr_prio, vmcr, grp);
vcpu_set_reg(vcpu, rt, lr_val & ICH_LR_VIRTUAL_ID_MASK);
@@ -764,20 +762,18 @@ static void __vgic_v3_write_eoir(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
/* Drop priority in any case */
act_prio = __vgic_v3_clear_highest_active_priority();
- /* If EOIing an LPI, no deactivate to be performed */
- if (vid >= VGIC_MIN_LPI)
- return;
-
- /* EOImode == 1, nothing to be done here */
- if (vmcr & ICH_VMCR_EOIM_MASK)
- return;
-
lr = __vgic_v3_find_active_lr(vcpu, vid, &lr_val);
if (lr == -1) {
- __vgic_v3_bump_eoicount();
+ /* Do not bump EOIcount for LPIs that aren't in the LRs */
+ if (!(vid >= VGIC_MIN_LPI))
+ __vgic_v3_bump_eoicount();
return;
}
+ /* EOImode == 1 and not an LPI, nothing to be done here */
+ if ((vmcr & ICH_VMCR_EOIM_MASK) && !(vid >= VGIC_MIN_LPI))
+ return;
+
lr_prio = (lr_val & ICH_LR_PRIORITY_MASK) >> ICH_LR_PRIORITY_SHIFT;
/* If priorities or group do not match, the guest has fscked-up. */
@@ -987,8 +983,6 @@ static void __vgic_v3_read_ctlr(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
val = ((vtr >> 29) & 7) << ICC_CTLR_EL1_PRI_BITS_SHIFT;
/* IDbits */
val |= ((vtr >> 23) & 7) << ICC_CTLR_EL1_ID_BITS_SHIFT;
- /* SEIS */
- val |= ((vtr >> 22) & 1) << ICC_CTLR_EL1_SEIS_SHIFT;
/* A3V */
val |= ((vtr >> 21) & 1) << ICC_CTLR_EL1_A3V_SHIFT;
/* EOImode */
diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c
index ded2c66675f0..11d053fdd604 100644
--- a/arch/arm64/kvm/hyp/vhe/switch.c
+++ b/arch/arm64/kvm/hyp/vhe/switch.c
@@ -24,7 +24,6 @@
#include <asm/fpsimd.h>
#include <asm/debug-monitors.h>
#include <asm/processor.h>
-#include <asm/thread_info.h>
/* VHE specific context */
DEFINE_PER_CPU(struct kvm_host_data, kvm_host_data);
@@ -96,6 +95,26 @@ void deactivate_traps_vhe_put(struct kvm_vcpu *vcpu)
__deactivate_traps_common(vcpu);
}
+static const exit_handler_fn hyp_exit_handlers[] = {
+ [0 ... ESR_ELx_EC_MAX] = NULL,
+ [ESR_ELx_EC_CP15_32] = kvm_hyp_handle_cp15_32,
+ [ESR_ELx_EC_SYS64] = kvm_hyp_handle_sysreg,
+ [ESR_ELx_EC_SVE] = kvm_hyp_handle_fpsimd,
+ [ESR_ELx_EC_FP_ASIMD] = kvm_hyp_handle_fpsimd,
+ [ESR_ELx_EC_IABT_LOW] = kvm_hyp_handle_iabt_low,
+ [ESR_ELx_EC_DABT_LOW] = kvm_hyp_handle_dabt_low,
+ [ESR_ELx_EC_PAC] = kvm_hyp_handle_ptrauth,
+};
+
+static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm_vcpu *vcpu)
+{
+ return hyp_exit_handlers;
+}
+
+static void early_exit_filter(struct kvm_vcpu *vcpu, u64 *exit_code)
+{
+}
+
/* Switch to the guest for VHE systems running in EL2 */
static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
{