summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2025-01-28 09:01:36 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2025-01-28 09:01:36 -0800
commite2ee2e9b159094527ae7ad78058b1316f62fc5b7 (patch)
tree77d3cf7905c301a596335dd242c63aee3e641e0c
parent9ff28f2fad67e173ed25b8c3a183b15da5445d2d (diff)
parent01009b06a6b52d8439c55b530633a971c13b6cb2 (diff)
Merge tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux
Pull KVM/arm64 updates from Will Deacon: "New features: - Support for non-protected guest in protected mode, achieving near feature parity with the non-protected mode - Support for the EL2 timers as part of the ongoing NV support - Allow control of hardware tracing for nVHE/hVHE Improvements, fixes and cleanups: - Massive cleanup of the debug infrastructure, making it a bit less awkward and definitely easier to maintain. This should pave the way for further optimisations - Complete rewrite of pKVM's fixed-feature infrastructure, aligning it with the rest of KVM and making the code easier to follow - Large simplification of pKVM's memory protection infrastructure - Better handling of RES0/RES1 fields for memory-backed system registers - Add a workaround for Qualcomm's Snapdragon X CPUs, which suffer from a pretty nasty timer bug - Small collection of cleanups and low-impact fixes" * tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux: (87 commits) arm64/sysreg: Get rid of TRFCR_ELx SysregFields KVM: arm64: nv: Fix doc header layout for timers KVM: arm64: nv: Apply RESx settings to sysreg reset values KVM: arm64: nv: Always evaluate HCR_EL2 using sanitising accessors KVM: arm64: Fix selftests after sysreg field name update coresight: Pass guest TRFCR value to KVM KVM: arm64: Support trace filtering for guests KVM: arm64: coresight: Give TRBE enabled state to KVM coresight: trbe: Remove redundant disable call arm64/sysreg/tools: Move TRFCR definitions to sysreg tools: arm64: Update sysreg.h header files KVM: arm64: Drop pkvm_mem_transition for host/hyp donations KVM: arm64: Drop pkvm_mem_transition for host/hyp sharing KVM: arm64: Drop pkvm_mem_transition for FF-A KVM: arm64: Explicitly handle BRBE traps as UNDEFINED KVM: arm64: vgic: Use str_enabled_disabled() in vgic_v3_probe() arm64: kvm: Introduce nvhe stack size constants KVM: arm64: Fix nVHE stacktrace VA bits mask KVM: arm64: Fix FEAT_MTE in pKVM Documentation: Update the behaviour of "kvm-arm.mode" ...
-rw-r--r--Documentation/admin-guide/kernel-parameters.txt16
-rw-r--r--Documentation/virt/kvm/devices/vcpu.rst14
-rw-r--r--arch/arm64/include/asm/cputype.h2
-rw-r--r--arch/arm64/include/asm/kvm_arm.h2
-rw-r--r--arch/arm64/include/asm/kvm_asm.h14
-rw-r--r--arch/arm64/include/asm/kvm_emulate.h65
-rw-r--r--arch/arm64/include/asm/kvm_host.h136
-rw-r--r--arch/arm64/include/asm/kvm_mmu.h18
-rw-r--r--arch/arm64/include/asm/kvm_nested.h3
-rw-r--r--arch/arm64/include/asm/kvm_pgtable.h38
-rw-r--r--arch/arm64/include/asm/kvm_pkvm.h51
-rw-r--r--arch/arm64/include/asm/memory.h5
-rw-r--r--arch/arm64/include/asm/stacktrace/nvhe.h2
-rw-r--r--arch/arm64/include/asm/sysreg.h16
-rw-r--r--arch/arm64/kernel/cpu_errata.c8
-rw-r--r--arch/arm64/kernel/image-vars.h3
-rw-r--r--arch/arm64/kvm/arch_timer.c179
-rw-r--r--arch/arm64/kvm/arm.c86
-rw-r--r--arch/arm64/kvm/debug.c416
-rw-r--r--arch/arm64/kvm/emulate-nested.c81
-rw-r--r--arch/arm64/kvm/fpsimd.c12
-rw-r--r--arch/arm64/kvm/guest.c31
-rw-r--r--arch/arm64/kvm/handle_exit.c5
-rw-r--r--arch/arm64/kvm/hyp/include/hyp/debug-sr.h42
-rw-r--r--arch/arm64/kvm/hyp/include/hyp/switch.h39
-rw-r--r--arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h43
-rw-r--r--arch/arm64/kvm/hyp/include/nvhe/fixed_config.h223
-rw-r--r--arch/arm64/kvm/hyp/include/nvhe/gfp.h6
-rw-r--r--arch/arm64/kvm/hyp/include/nvhe/mem_protect.h39
-rw-r--r--arch/arm64/kvm/hyp/include/nvhe/memory.h50
-rw-r--r--arch/arm64/kvm/hyp/include/nvhe/pkvm.h23
-rw-r--r--arch/arm64/kvm/hyp/nvhe/debug-sr.c72
-rw-r--r--arch/arm64/kvm/hyp/nvhe/host.S4
-rw-r--r--arch/arm64/kvm/hyp/nvhe/hyp-main.c209
-rw-r--r--arch/arm64/kvm/hyp/nvhe/mem_protect.c884
-rw-r--r--arch/arm64/kvm/hyp/nvhe/mm.c12
-rw-r--r--arch/arm64/kvm/hyp/nvhe/page_alloc.c14
-rw-r--r--arch/arm64/kvm/hyp/nvhe/pkvm.c410
-rw-r--r--arch/arm64/kvm/hyp/nvhe/setup.c8
-rw-r--r--arch/arm64/kvm/hyp/nvhe/stacktrace.c4
-rw-r--r--arch/arm64/kvm/hyp/nvhe/switch.c52
-rw-r--r--arch/arm64/kvm/hyp/nvhe/sys_regs.c404
-rw-r--r--arch/arm64/kvm/hyp/nvhe/timer-sr.c16
-rw-r--r--arch/arm64/kvm/hyp/pgtable.c13
-rw-r--r--arch/arm64/kvm/hyp/vhe/debug-sr.c5
-rw-r--r--arch/arm64/kvm/hyp/vhe/switch.c107
-rw-r--r--arch/arm64/kvm/hyp/vhe/sysreg-sr.c4
-rw-r--r--arch/arm64/kvm/mmu.c108
-rw-r--r--arch/arm64/kvm/nested.c38
-rw-r--r--arch/arm64/kvm/pkvm.c201
-rw-r--r--arch/arm64/kvm/reset.c6
-rw-r--r--arch/arm64/kvm/stacktrace.c9
-rw-r--r--arch/arm64/kvm/sys_regs.c411
-rw-r--r--arch/arm64/kvm/trace_handle_exit.h75
-rw-r--r--arch/arm64/kvm/vgic/vgic-v3.c11
-rw-r--r--arch/arm64/tools/cpucaps1
-rw-r--r--arch/arm64/tools/sysreg32
-rw-r--r--drivers/hwtracing/coresight/coresight-etm4x-core.c55
-rw-r--r--drivers/hwtracing/coresight/coresight-etm4x-sysfs.c10
-rw-r--r--drivers/hwtracing/coresight/coresight-etm4x.h2
-rw-r--r--drivers/hwtracing/coresight/coresight-priv.h3
-rw-r--r--drivers/hwtracing/coresight/coresight-self-hosted-trace.h9
-rw-r--r--drivers/hwtracing/coresight/coresight-trbe.c15
-rw-r--r--include/clocksource/arm_arch_timer.h6
-rw-r--r--include/kvm/arm_arch_timer.h23
-rw-r--r--tools/arch/arm64/include/asm/sysreg.h410
-rw-r--r--tools/include/linux/kasan-tags.h15
-rw-r--r--tools/testing/selftests/kvm/arm64/aarch32_id_regs.c2
-rw-r--r--tools/testing/selftests/kvm/arm64/set_id_regs.c2
69 files changed, 3112 insertions, 2218 deletions
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index d0f6c055dfcc..fb8752b42ec8 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -2824,17 +2824,21 @@
nvhe: Standard nVHE-based mode, without support for
protected guests.
- protected: nVHE-based mode with support for guests whose
- state is kept private from the host.
+ protected: Mode with support for guests whose state is
+ kept private from the host, using VHE or
+ nVHE depending on HW support.
nested: VHE-based mode with support for nested
- virtualization. Requires at least ARMv8.3
- hardware.
+ virtualization. Requires at least ARMv8.4
+ hardware (with FEAT_NV2).
Defaults to VHE/nVHE based on hardware support. Setting
mode to "protected" will disable kexec and hibernation
- for the host. "nested" is experimental and should be
- used with extreme caution.
+ for the host. To force nVHE on VHE hardware, add
+ "arm64_sw.hvhe=0 id_aa64mmfr1.vh=0" to the
+ command-line.
+ "nested" is experimental and should be used with
+ extreme caution.
kvm-arm.vgic_v3_group0_trap=
[KVM,ARM,EARLY] Trap guest accesses to GICv3 group-0
diff --git a/Documentation/virt/kvm/devices/vcpu.rst b/Documentation/virt/kvm/devices/vcpu.rst
index 31f14ec4a65b..31a9576c07af 100644
--- a/Documentation/virt/kvm/devices/vcpu.rst
+++ b/Documentation/virt/kvm/devices/vcpu.rst
@@ -142,8 +142,8 @@ the cpu field to the processor id.
:Architectures: ARM64
-2.1. ATTRIBUTES: KVM_ARM_VCPU_TIMER_IRQ_VTIMER, KVM_ARM_VCPU_TIMER_IRQ_PTIMER
------------------------------------------------------------------------------
+2.1. ATTRIBUTES: KVM_ARM_VCPU_TIMER_IRQ_{VTIMER,PTIMER,HVTIMER,HPTIMER}
+-----------------------------------------------------------------------
:Parameters: in kvm_device_attr.addr the address for the timer interrupt is a
pointer to an int
@@ -159,10 +159,12 @@ A value describing the architected timer interrupt number when connected to an
in-kernel virtual GIC. These must be a PPI (16 <= intid < 32). Setting the
attribute overrides the default values (see below).
-============================= ==========================================
-KVM_ARM_VCPU_TIMER_IRQ_VTIMER The EL1 virtual timer intid (default: 27)
-KVM_ARM_VCPU_TIMER_IRQ_PTIMER The EL1 physical timer intid (default: 30)
-============================= ==========================================
+============================== ==========================================
+KVM_ARM_VCPU_TIMER_IRQ_VTIMER The EL1 virtual timer intid (default: 27)
+KVM_ARM_VCPU_TIMER_IRQ_PTIMER The EL1 physical timer intid (default: 30)
+KVM_ARM_VCPU_TIMER_IRQ_HVTIMER The EL2 virtual timer intid (default: 28)
+KVM_ARM_VCPU_TIMER_IRQ_HPTIMER The EL2 physical timer intid (default: 26)
+============================== ==========================================
Setting the same PPI for different timers will prevent the VCPUs from running.
Setting the interrupt number on a VCPU configures all VCPUs created at that
diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
index 488f8e751349..6f3f4142e214 100644
--- a/arch/arm64/include/asm/cputype.h
+++ b/arch/arm64/include/asm/cputype.h
@@ -122,6 +122,7 @@
#define QCOM_CPU_PART_KRYO_3XX_SILVER 0x803
#define QCOM_CPU_PART_KRYO_4XX_GOLD 0x804
#define QCOM_CPU_PART_KRYO_4XX_SILVER 0x805
+#define QCOM_CPU_PART_ORYON_X1 0x001
#define NVIDIA_CPU_PART_DENVER 0x003
#define NVIDIA_CPU_PART_CARMEL 0x004
@@ -198,6 +199,7 @@
#define MIDR_QCOM_KRYO_3XX_SILVER MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_3XX_SILVER)
#define MIDR_QCOM_KRYO_4XX_GOLD MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_4XX_GOLD)
#define MIDR_QCOM_KRYO_4XX_SILVER MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_4XX_SILVER)
+#define MIDR_QCOM_ORYON_X1 MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_ORYON_X1)
#define MIDR_NVIDIA_DENVER MIDR_CPU_MODEL(ARM_CPU_IMP_NVIDIA, NVIDIA_CPU_PART_DENVER)
#define MIDR_NVIDIA_CARMEL MIDR_CPU_MODEL(ARM_CPU_IMP_NVIDIA, NVIDIA_CPU_PART_CARMEL)
#define MIDR_FUJITSU_A64FX MIDR_CPU_MODEL(ARM_CPU_IMP_FUJITSU, FUJITSU_CPU_PART_A64FX)
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index 43e365fbff0b..8d94a6c0ed5c 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -300,7 +300,7 @@
#define CPTR_EL2_TSM (1 << 12)
#define CPTR_EL2_TFP (1 << CPTR_EL2_TFP_SHIFT)
#define CPTR_EL2_TZ (1 << 8)
-#define CPTR_NVHE_EL2_RES1 0x000032ff /* known RES1 bits in CPTR_EL2 (nVHE) */
+#define CPTR_NVHE_EL2_RES1 (BIT(13) | BIT(9) | GENMASK(7, 0))
#define CPTR_NVHE_EL2_RES0 (GENMASK(63, 32) | \
GENMASK(29, 21) | \
GENMASK(19, 14) | \
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index ca2590344313..bec227f9500a 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -53,8 +53,7 @@
enum __kvm_host_smccc_func {
/* Hypercalls available only prior to pKVM finalisation */
/* __KVM_HOST_SMCCC_FUNC___kvm_hyp_init */
- __KVM_HOST_SMCCC_FUNC___kvm_get_mdcr_el2 = __KVM_HOST_SMCCC_FUNC___kvm_hyp_init + 1,
- __KVM_HOST_SMCCC_FUNC___pkvm_init,
+ __KVM_HOST_SMCCC_FUNC___pkvm_init = __KVM_HOST_SMCCC_FUNC___kvm_hyp_init + 1,
__KVM_HOST_SMCCC_FUNC___pkvm_create_private_mapping,
__KVM_HOST_SMCCC_FUNC___pkvm_cpu_set_vector,
__KVM_HOST_SMCCC_FUNC___kvm_enable_ssbs,
@@ -65,6 +64,12 @@ enum __kvm_host_smccc_func {
/* Hypercalls available after pKVM finalisation */
__KVM_HOST_SMCCC_FUNC___pkvm_host_share_hyp,
__KVM_HOST_SMCCC_FUNC___pkvm_host_unshare_hyp,
+ __KVM_HOST_SMCCC_FUNC___pkvm_host_share_guest,
+ __KVM_HOST_SMCCC_FUNC___pkvm_host_unshare_guest,
+ __KVM_HOST_SMCCC_FUNC___pkvm_host_relax_perms_guest,
+ __KVM_HOST_SMCCC_FUNC___pkvm_host_wrprotect_guest,
+ __KVM_HOST_SMCCC_FUNC___pkvm_host_test_clear_young_guest,
+ __KVM_HOST_SMCCC_FUNC___pkvm_host_mkyoung_guest,
__KVM_HOST_SMCCC_FUNC___kvm_adjust_pc,
__KVM_HOST_SMCCC_FUNC___kvm_vcpu_run,
__KVM_HOST_SMCCC_FUNC___kvm_flush_vm_context,
@@ -79,6 +84,9 @@ enum __kvm_host_smccc_func {
__KVM_HOST_SMCCC_FUNC___pkvm_init_vm,
__KVM_HOST_SMCCC_FUNC___pkvm_init_vcpu,
__KVM_HOST_SMCCC_FUNC___pkvm_teardown_vm,
+ __KVM_HOST_SMCCC_FUNC___pkvm_vcpu_load,
+ __KVM_HOST_SMCCC_FUNC___pkvm_vcpu_put,
+ __KVM_HOST_SMCCC_FUNC___pkvm_tlb_flush_vmid,
};
#define DECLARE_KVM_VHE_SYM(sym) extern char sym[]
@@ -247,8 +255,6 @@ extern void __kvm_adjust_pc(struct kvm_vcpu *vcpu);
extern u64 __vgic_v3_get_gic_config(void);
extern void __vgic_v3_init_lrs(void);
-extern u64 __kvm_get_mdcr_el2(void);
-
#define __KVM_EXTABLE(from, to) \
" .pushsection __kvm_ex_table, \"a\"\n" \
" .align 3\n" \
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index 4f1d99725f6b..47f2cf408eed 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -184,29 +184,30 @@ static inline bool vcpu_is_el2(const struct kvm_vcpu *vcpu)
return vcpu_is_el2_ctxt(&vcpu->arch.ctxt);
}
-static inline bool __vcpu_el2_e2h_is_set(const struct kvm_cpu_context *ctxt)
+static inline bool vcpu_el2_e2h_is_set(const struct kvm_vcpu *vcpu)
{
return (!cpus_have_final_cap(ARM64_HAS_HCR_NV1) ||
- (ctxt_sys_reg(ctxt, HCR_EL2) & HCR_E2H));
+ (__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_E2H));
}
-static inline bool vcpu_el2_e2h_is_set(const struct kvm_vcpu *vcpu)
+static inline bool vcpu_el2_tge_is_set(const struct kvm_vcpu *vcpu)
{
- return __vcpu_el2_e2h_is_set(&vcpu->arch.ctxt);
+ return ctxt_sys_reg(&vcpu->arch.ctxt, HCR_EL2) & HCR_TGE;
}
-static inline bool __vcpu_el2_tge_is_set(const struct kvm_cpu_context *ctxt)
+static inline bool is_hyp_ctxt(const struct kvm_vcpu *vcpu)
{
- return ctxt_sys_reg(ctxt, HCR_EL2) & HCR_TGE;
-}
+ bool e2h, tge;
+ u64 hcr;
-static inline bool vcpu_el2_tge_is_set(const struct kvm_vcpu *vcpu)
-{
- return __vcpu_el2_tge_is_set(&vcpu->arch.ctxt);
-}
+ if (!vcpu_has_nv(vcpu))
+ return false;
+
+ hcr = __vcpu_sys_reg(vcpu, HCR_EL2);
+
+ e2h = (hcr & HCR_E2H);
+ tge = (hcr & HCR_TGE);
-static inline bool __is_hyp_ctxt(const struct kvm_cpu_context *ctxt)
-{
/*
* We are in a hypervisor context if the vcpu mode is EL2 or
* E2H and TGE bits are set. The latter means we are in the user space
@@ -215,14 +216,7 @@ static inline bool __is_hyp_ctxt(const struct kvm_cpu_context *ctxt)
* Note that the HCR_EL2.{E2H,TGE}={0,1} isn't really handled in the
* rest of the KVM code, and will result in a misbehaving guest.
*/
- return vcpu_is_el2_ctxt(ctxt) ||
- (__vcpu_el2_e2h_is_set(ctxt) && __vcpu_el2_tge_is_set(ctxt)) ||
- __vcpu_el2_tge_is_set(ctxt);
-}
-
-static inline bool is_hyp_ctxt(const struct kvm_vcpu *vcpu)
-{
- return vcpu_has_nv(vcpu) && __is_hyp_ctxt(&vcpu->arch.ctxt);
+ return vcpu_is_el2(vcpu) || (e2h && tge) || tge;
}
static inline bool vcpu_is_host_el0(const struct kvm_vcpu *vcpu)
@@ -619,7 +613,8 @@ static __always_inline void kvm_write_cptr_el2(u64 val)
write_sysreg(val, cptr_el2);
}
-static __always_inline u64 kvm_get_reset_cptr_el2(struct kvm_vcpu *vcpu)
+/* Resets the value of cptr_el2 when returning to the host. */
+static __always_inline void __kvm_reset_cptr_el2(struct kvm *kvm)
{
u64 val;
@@ -630,29 +625,28 @@ static __always_inline u64 kvm_get_reset_cptr_el2(struct kvm_vcpu *vcpu)
} else if (has_hvhe()) {
val = CPACR_EL1_FPEN;
- if (!vcpu_has_sve(vcpu) || !guest_owns_fp_regs())
+ if (!kvm_has_sve(kvm) || !guest_owns_fp_regs())
val |= CPACR_EL1_ZEN;
if (cpus_have_final_cap(ARM64_SME))
val |= CPACR_EL1_SMEN;
} else {
val = CPTR_NVHE_EL2_RES1;
- if (vcpu_has_sve(vcpu) && guest_owns_fp_regs())
+ if (kvm_has_sve(kvm) && guest_owns_fp_regs())
val |= CPTR_EL2_TZ;
- if (cpus_have_final_cap(ARM64_SME))
- val &= ~CPTR_EL2_TSM;
+ if (!cpus_have_final_cap(ARM64_SME))
+ val |= CPTR_EL2_TSM;
}
- return val;
-}
-
-static __always_inline void kvm_reset_cptr_el2(struct kvm_vcpu *vcpu)
-{
- u64 val = kvm_get_reset_cptr_el2(vcpu);
-
kvm_write_cptr_el2(val);
}
+#ifdef __KVM_NVHE_HYPERVISOR__
+#define kvm_reset_cptr_el2(v) __kvm_reset_cptr_el2(kern_hyp_va((v)->kvm))
+#else
+#define kvm_reset_cptr_el2(v) __kvm_reset_cptr_el2((v)->kvm)
+#endif
+
/*
* Returns a 'sanitised' view of CPTR_EL2, translating from nVHE to the VHE
* format if E2H isn't set.
@@ -697,9 +691,4 @@ static inline bool guest_hyp_sve_traps_enabled(const struct kvm_vcpu *vcpu)
{
return __guest_hyp_cptr_xen_trap_enabled(vcpu, ZEN);
}
-
-static inline void kvm_vcpu_enable_ptrauth(struct kvm_vcpu *vcpu)
-{
- vcpu_set_flag(vcpu, GUEST_HAS_PTRAUTH);
-}
#endif /* __ARM64_KVM_EMULATE_H__ */
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index e18e9244d17a..7cfa024de4e3 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -85,6 +85,7 @@ void kvm_arm_vcpu_destroy(struct kvm_vcpu *vcpu);
struct kvm_hyp_memcache {
phys_addr_t head;
unsigned long nr_pages;
+ struct pkvm_mapping *mapping; /* only used from EL1 */
};
static inline void push_hyp_memcache(struct kvm_hyp_memcache *mc,
@@ -331,6 +332,8 @@ struct kvm_arch {
#define KVM_ARCH_FLAG_ID_REGS_INITIALIZED 7
/* Fine-Grained UNDEF initialised */
#define KVM_ARCH_FLAG_FGU_INITIALIZED 8
+ /* SVE exposed to guest */
+#define KVM_ARCH_FLAG_GUEST_HAS_SVE 9
unsigned long flags;
/* VM-wide vCPU feature set */
@@ -490,7 +493,6 @@ enum vcpu_sysreg {
VBAR_EL2, /* Vector Base Address Register (EL2) */
RVBAR_EL2, /* Reset Vector Base Address Register */
CONTEXTIDR_EL2, /* Context ID Register (EL2) */
- CNTHCTL_EL2, /* Counter-timer Hypervisor Control register */
SP_EL2, /* EL2 Stack Pointer */
CNTHP_CTL_EL2,
CNTHP_CVAL_EL2,
@@ -501,6 +503,7 @@ enum vcpu_sysreg {
MARKER(__SANITISED_REG_START__),
TCR2_EL2, /* Extended Translation Control Register (EL2) */
MDCR_EL2, /* Monitor Debug Configuration Register (EL2) */
+ CNTHCTL_EL2, /* Counter-timer Hypervisor Control register */
/* Any VNCR-capable reg goes after this point */
MARKER(__VNCR_START__),
@@ -610,6 +613,14 @@ struct cpu_sve_state {
* field.
*/
struct kvm_host_data {
+#define KVM_HOST_DATA_FLAG_HAS_SPE 0
+#define KVM_HOST_DATA_FLAG_HAS_TRBE 1
+#define KVM_HOST_DATA_FLAG_HOST_SVE_ENABLED 2
+#define KVM_HOST_DATA_FLAG_HOST_SME_ENABLED 3
+#define KVM_HOST_DATA_FLAG_TRBE_ENABLED 4
+#define KVM_HOST_DATA_FLAG_EL1_TRACING_CONFIGURED 5
+ unsigned long flags;
+
struct kvm_cpu_context host_ctxt;
/*
@@ -642,7 +653,7 @@ struct kvm_host_data {
* host_debug_state contains the host registers which are
* saved and restored during world switches.
*/
- struct {
+ struct {
/* {Break,watch}point registers */
struct kvm_guest_debug_arch regs;
/* Statistical profiling extension */
@@ -652,6 +663,16 @@ struct kvm_host_data {
/* Values of trap registers for the host before guest entry. */
u64 mdcr_el2;
} host_debug_state;
+
+ /* Guest trace filter value */
+ u64 trfcr_while_in_guest;
+
+ /* Number of programmable event counters (PMCR_EL0.N) for this CPU */
+ unsigned int nr_event_counters;
+
+ /* Number of debug breakpoints/watchpoints for this CPU (minus 1) */
+ unsigned int debug_brps;
+ unsigned int debug_wrps;
};
struct kvm_host_psci_config {
@@ -708,7 +729,6 @@ struct kvm_vcpu_arch {
u64 hcr_el2;
u64 hcrx_el2;
u64 mdcr_el2;
- u64 cptr_el2;
/* Exception Information */
struct kvm_vcpu_fault_info fault;
@@ -739,31 +759,22 @@ struct kvm_vcpu_arch {
*
* external_debug_state contains the debug values we want to debug the
* guest. This is set via the KVM_SET_GUEST_DEBUG ioctl.
- *
- * debug_ptr points to the set of debug registers that should be loaded
- * onto the hardware when running the guest.
*/
- struct kvm_guest_debug_arch *debug_ptr;
struct kvm_guest_debug_arch vcpu_debug_state;
struct kvm_guest_debug_arch external_debug_state;
+ u64 external_mdscr_el1;
+
+ enum {
+ VCPU_DEBUG_FREE,
+ VCPU_DEBUG_HOST_OWNED,
+ VCPU_DEBUG_GUEST_OWNED,
+ } debug_owner;
/* VGIC state */
struct vgic_cpu vgic_cpu;
struct arch_timer_cpu timer_cpu;
struct kvm_pmu pmu;
- /*
- * Guest registers we preserve during guest debugging.
- *
- * These shadow registers are updated by the kvm_handle_sys_reg
- * trap handler if the guest accesses or updates them while we
- * are using guest debug.
- */
- struct {
- u32 mdscr_el1;
- bool pstate_ss;
- } guest_debug_preserved;
-
/* vcpu power state */
struct kvm_mp_state mp_state;
spinlock_t mp_state_lock;
@@ -771,6 +782,9 @@ struct kvm_vcpu_arch {
/* Cache some mmu pages needed inside spinlock regions */
struct kvm_mmu_memory_cache mmu_page_cache;
+ /* Pages to top-up the pKVM/EL2 guest pool */
+ struct kvm_hyp_memcache pkvm_memcache;
+
/* Virtual SError ESR to restore when HCR_EL2.VSE is set */
u64 vsesr_el2;
@@ -863,14 +877,10 @@ struct kvm_vcpu_arch {
#define vcpu_set_flag(v, ...) __vcpu_set_flag((v), __VA_ARGS__)
#define vcpu_clear_flag(v, ...) __vcpu_clear_flag((v), __VA_ARGS__)
-/* SVE exposed to guest */
-#define GUEST_HAS_SVE __vcpu_single_flag(cflags, BIT(0))
+/* KVM_ARM_VCPU_INIT completed */
+#define VCPU_INITIALIZED __vcpu_single_flag(cflags, BIT(0))
/* SVE config completed */
#define VCPU_SVE_FINALIZED __vcpu_single_flag(cflags, BIT(1))
-/* PTRAUTH exposed to guest */
-#define GUEST_HAS_PTRAUTH __vcpu_single_flag(cflags, BIT(2))
-/* KVM_ARM_VCPU_INIT completed */
-#define VCPU_INITIALIZED __vcpu_single_flag(cflags, BIT(3))
/* Exception pending */
#define PENDING_EXCEPTION __vcpu_single_flag(iflags, BIT(0))
@@ -906,29 +916,21 @@ struct kvm_vcpu_arch {
#define EXCEPT_AA64_EL2_IRQ __vcpu_except_flags(5)
#define EXCEPT_AA64_EL2_FIQ __vcpu_except_flags(6)
#define EXCEPT_AA64_EL2_SERR __vcpu_except_flags(7)
-/* Guest debug is live */
-#define DEBUG_DIRTY __vcpu_single_flag(iflags, BIT(4))
-/* Save SPE context if active */
-#define DEBUG_STATE_SAVE_SPE __vcpu_single_flag(iflags, BIT(5))
-/* Save TRBE context if active */
-#define DEBUG_STATE_SAVE_TRBE __vcpu_single_flag(iflags, BIT(6))
-
-/* SVE enabled for host EL0 */
-#define HOST_SVE_ENABLED __vcpu_single_flag(sflags, BIT(0))
-/* SME enabled for EL0 */
-#define HOST_SME_ENABLED __vcpu_single_flag(sflags, BIT(1))
+
/* Physical CPU not in supported_cpus */
-#define ON_UNSUPPORTED_CPU __vcpu_single_flag(sflags, BIT(2))
+#define ON_UNSUPPORTED_CPU __vcpu_single_flag(sflags, BIT(0))
/* WFIT instruction trapped */
-#define IN_WFIT __vcpu_single_flag(sflags, BIT(3))
+#define IN_WFIT __vcpu_single_flag(sflags, BIT(1))
/* vcpu system registers loaded on physical CPU */
-#define SYSREGS_ON_CPU __vcpu_single_flag(sflags, BIT(4))
-/* Software step state is Active-pending */
-#define DBG_SS_ACTIVE_PENDING __vcpu_single_flag(sflags, BIT(5))
+#define SYSREGS_ON_CPU __vcpu_single_flag(sflags, BIT(2))
+/* Software step state is Active-pending for external debug */
+#define HOST_SS_ACTIVE_PENDING __vcpu_single_flag(sflags, BIT(3))
+/* Software step state is Active pending for guest debug */
+#define GUEST_SS_ACTIVE_PENDING __vcpu_single_flag(sflags, BIT(4))
/* PMUSERENR for the guest EL0 is on physical CPU */
-#define PMUSERENR_ON_CPU __vcpu_single_flag(sflags, BIT(6))
+#define PMUSERENR_ON_CPU __vcpu_single_flag(sflags, BIT(5))
/* WFI instruction trapped */
-#define IN_WFI __vcpu_single_flag(sflags, BIT(7))
+#define IN_WFI __vcpu_single_flag(sflags, BIT(6))
/* Pointer to the vcpu's SVE FFR for sve_{save,load}_state() */
@@ -959,14 +961,21 @@ struct kvm_vcpu_arch {
KVM_GUESTDBG_USE_HW | \
KVM_GUESTDBG_SINGLESTEP)
-#define vcpu_has_sve(vcpu) (system_supports_sve() && \
- vcpu_get_flag(vcpu, GUEST_HAS_SVE))
+#define kvm_has_sve(kvm) (system_supports_sve() && \
+ test_bit(KVM_ARCH_FLAG_GUEST_HAS_SVE, &(kvm)->arch.flags))
+
+#ifdef __KVM_NVHE_HYPERVISOR__
+#define vcpu_has_sve(vcpu) kvm_has_sve(kern_hyp_va((vcpu)->kvm))
+#else
+#define vcpu_has_sve(vcpu) kvm_has_sve((vcpu)->kvm)
+#endif
#ifdef CONFIG_ARM64_PTR_AUTH
#define vcpu_has_ptrauth(vcpu) \
((cpus_have_final_cap(ARM64_HAS_ADDRESS_AUTH) || \
cpus_have_final_cap(ARM64_HAS_GENERIC_AUTH)) && \
- vcpu_get_flag(vcpu, GUEST_HAS_PTRAUTH))
+ (vcpu_has_feature(vcpu, KVM_ARM_VCPU_PTRAUTH_ADDRESS) || \
+ vcpu_has_feature(vcpu, KVM_ARM_VCPU_PTRAUTH_GENERIC)))
#else
#define vcpu_has_ptrauth(vcpu) false
#endif
@@ -1307,6 +1316,13 @@ DECLARE_KVM_HYP_PER_CPU(struct kvm_host_data, kvm_host_data);
&this_cpu_ptr_hyp_sym(kvm_host_data)->f)
#endif
+#define host_data_test_flag(flag) \
+ (test_bit(KVM_HOST_DATA_FLAG_##flag, host_data_ptr(flags)))
+#define host_data_set_flag(flag) \
+ set_bit(KVM_HOST_DATA_FLAG_##flag, host_data_ptr(flags))
+#define host_data_clear_flag(flag) \
+ clear_bit(KVM_HOST_DATA_FLAG_##flag, host_data_ptr(flags))
+
/* Check whether the FP regs are owned by the guest */
static inline bool guest_owns_fp_regs(void)
{
@@ -1332,15 +1348,22 @@ static inline bool kvm_system_needs_idmapped_vectors(void)
static inline void kvm_arch_sync_events(struct kvm *kvm) {}
-void kvm_arm_init_debug(void);
-void kvm_arm_vcpu_init_debug(struct kvm_vcpu *vcpu);
-void kvm_arm_setup_debug(struct kvm_vcpu *vcpu);
-void kvm_arm_clear_debug(struct kvm_vcpu *vcpu);
-void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu);
+void kvm_init_host_debug_data(void);
+void kvm_vcpu_load_debug(struct kvm_vcpu *vcpu);
+void kvm_vcpu_put_debug(struct kvm_vcpu *vcpu);
+void kvm_debug_set_guest_ownership(struct kvm_vcpu *vcpu);
+void kvm_debug_handle_oslar(struct kvm_vcpu *vcpu, u64 val);
#define kvm_vcpu_os_lock_enabled(vcpu) \
(!!(__vcpu_sys_reg(vcpu, OSLSR_EL1) & OSLSR_EL1_OSLK))
+#define kvm_debug_regs_in_use(vcpu) \
+ ((vcpu)->arch.debug_owner != VCPU_DEBUG_FREE)
+#define kvm_host_owns_debug_regs(vcpu) \
+ ((vcpu)->arch.debug_owner == VCPU_DEBUG_HOST_OWNED)
+#define kvm_guest_owns_debug_regs(vcpu) \
+ ((vcpu)->arch.debug_owner == VCPU_DEBUG_GUEST_OWNED)
+
int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu,
struct kvm_device_attr *attr);
int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu,
@@ -1367,14 +1390,13 @@ static inline bool kvm_pmu_counter_deferred(struct perf_event_attr *attr)
return (!has_vhe() && attr->exclude_host);
}
-/* Flags for host debug state */
-void kvm_arch_vcpu_load_debug_state_flags(struct kvm_vcpu *vcpu);
-void kvm_arch_vcpu_put_debug_state_flags(struct kvm_vcpu *vcpu);
-
#ifdef CONFIG_KVM
void kvm_set_pmu_events(u64 set, struct perf_event_attr *attr);
void kvm_clr_pmu_events(u64 clr);
bool kvm_set_pmuserenr(u64 val);
+void kvm_enable_trbe(void);
+void kvm_disable_trbe(void);
+void kvm_tracing_set_el1_configuration(u64 trfcr_while_in_guest);
#else
static inline void kvm_set_pmu_events(u64 set, struct perf_event_attr *attr) {}
static inline void kvm_clr_pmu_events(u64 clr) {}
@@ -1382,6 +1404,9 @@ static inline bool kvm_set_pmuserenr(u64 val)
{
return false;
}
+static inline void kvm_enable_trbe(void) {}
+static inline void kvm_disable_trbe(void) {}
+static inline void kvm_tracing_set_el1_configuration(u64 trfcr_while_in_guest) {}
#endif
void kvm_vcpu_load_vhe(struct kvm_vcpu *vcpu);
@@ -1422,6 +1447,7 @@ static inline bool __vcpu_has_feature(const struct kvm_arch *ka, int feature)
return test_bit(feature, ka->vcpu_features);
}
+#define kvm_vcpu_has_feature(k, f) __vcpu_has_feature(&(k)->arch, (f))
#define vcpu_has_feature(v, f) __vcpu_has_feature(&(v)->kvm->arch, (f))
#define kvm_vcpu_initialized(v) vcpu_get_flag(vcpu, VCPU_INITIALIZED)
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 66d93e320ec8..b98ac6aa631f 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -139,6 +139,8 @@ static __always_inline unsigned long __kern_hyp_va(unsigned long v)
#define kern_hyp_va(v) ((typeof(v))(__kern_hyp_va((unsigned long)(v))))
+extern u32 __hyp_va_bits;
+
/*
* We currently support using a VM-specified IPA size. For backward
* compatibility, the default IPA size is fixed to 40bits.
@@ -353,6 +355,22 @@ static inline bool kvm_is_nested_s2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu)
return &kvm->arch.mmu != mmu;
}
+static inline void kvm_fault_lock(struct kvm *kvm)
+{
+ if (is_protected_kvm_enabled())
+ write_lock(&kvm->mmu_lock);
+ else
+ read_lock(&kvm->mmu_lock);
+}
+
+static inline void kvm_fault_unlock(struct kvm *kvm)
+{
+ if (is_protected_kvm_enabled())
+ write_unlock(&kvm->mmu_lock);
+ else
+ read_unlock(&kvm->mmu_lock);
+}
+
#ifdef CONFIG_PTDUMP_STAGE2_DEBUGFS
void kvm_s2_ptdump_create_debugfs(struct kvm *kvm);
#else
diff --git a/arch/arm64/include/asm/kvm_nested.h b/arch/arm64/include/asm/kvm_nested.h
index 6cd08198bf19..56c4bcd35e2e 100644
--- a/arch/arm64/include/asm/kvm_nested.h
+++ b/arch/arm64/include/asm/kvm_nested.h
@@ -64,6 +64,7 @@ static inline u64 translate_ttbr0_el2_to_ttbr0_el1(u64 ttbr0)
}
extern bool forward_smc_trap(struct kvm_vcpu *vcpu);
+extern bool forward_debug_exception(struct kvm_vcpu *vcpu);
extern void kvm_init_nested(struct kvm *kvm);
extern int kvm_vcpu_init_nested(struct kvm_vcpu *vcpu);
extern void kvm_init_nested_s2_mmu(struct kvm_s2_mmu *mmu);
@@ -186,7 +187,7 @@ static inline bool kvm_supported_tlbi_s1e2_op(struct kvm_vcpu *vpcu, u32 instr)
return true;
}
-int kvm_init_nv_sysregs(struct kvm *kvm);
+int kvm_init_nv_sysregs(struct kvm_vcpu *vcpu);
#ifdef CONFIG_ARM64_PTR_AUTH
bool kvm_auth_eretax(struct kvm_vcpu *vcpu, u64 *elr);
diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h
index aab04097b505..6b9d274052c7 100644
--- a/arch/arm64/include/asm/kvm_pgtable.h
+++ b/arch/arm64/include/asm/kvm_pgtable.h
@@ -412,15 +412,20 @@ static inline bool kvm_pgtable_walk_lock_held(void)
* be used instead of block mappings.
*/
struct kvm_pgtable {
- u32 ia_bits;
- s8 start_level;
- kvm_pteref_t pgd;
- struct kvm_pgtable_mm_ops *mm_ops;
-
- /* Stage-2 only */
- struct kvm_s2_mmu *mmu;
- enum kvm_pgtable_stage2_flags flags;
- kvm_pgtable_force_pte_cb_t force_pte_cb;
+ union {
+ struct rb_root pkvm_mappings;
+ struct {
+ u32 ia_bits;
+ s8 start_level;
+ kvm_pteref_t pgd;
+ struct kvm_pgtable_mm_ops *mm_ops;
+
+ /* Stage-2 only */
+ enum kvm_pgtable_stage2_flags flags;
+ kvm_pgtable_force_pte_cb_t force_pte_cb;
+ };
+ };
+ struct kvm_s2_mmu *mmu;
};
/**
@@ -526,8 +531,11 @@ int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu,
enum kvm_pgtable_stage2_flags flags,
kvm_pgtable_force_pte_cb_t force_pte_cb);
-#define kvm_pgtable_stage2_init(pgt, mmu, mm_ops) \
- __kvm_pgtable_stage2_init(pgt, mmu, mm_ops, 0, NULL)
+static inline int kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu,
+ struct kvm_pgtable_mm_ops *mm_ops)
+{
+ return __kvm_pgtable_stage2_init(pgt, mmu, mm_ops, 0, NULL);
+}
/**
* kvm_pgtable_stage2_destroy() - Destroy an unused guest stage-2 page-table.
@@ -669,13 +677,15 @@ int kvm_pgtable_stage2_wrprotect(struct kvm_pgtable *pgt, u64 addr, u64 size);
* kvm_pgtable_stage2_mkyoung() - Set the access flag in a page-table entry.
* @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*().
* @addr: Intermediate physical address to identify the page-table entry.
+ * @flags: Flags to control the page-table walk (ex. a shared walk)
*
* The offset of @addr within a page is ignored.
*
* If there is a valid, leaf page-table entry used to translate @addr, then
* set the access flag in that entry.
*/
-void kvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr);
+void kvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr,
+ enum kvm_pgtable_walk_flags flags);
/**
* kvm_pgtable_stage2_test_clear_young() - Test and optionally clear the access
@@ -705,6 +715,7 @@ bool kvm_pgtable_stage2_test_clear_young(struct kvm_pgtable *pgt, u64 addr,
* @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*().
* @addr: Intermediate physical address to identify the page-table entry.
* @prot: Additional permissions to grant for the mapping.
+ * @flags: Flags to control the page-table walk (ex. a shared walk)
*
* The offset of @addr within a page is ignored.
*
@@ -717,7 +728,8 @@ bool kvm_pgtable_stage2_test_clear_young(struct kvm_pgtable *pgt, u64 addr,
* Return: 0 on success, negative error code on failure.
*/
int kvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr,
- enum kvm_pgtable_prot prot);
+ enum kvm_pgtable_prot prot,
+ enum kvm_pgtable_walk_flags flags);
/**
* kvm_pgtable_stage2_flush_range() - Clean and invalidate data cache to Point
diff --git a/arch/arm64/include/asm/kvm_pkvm.h b/arch/arm64/include/asm/kvm_pkvm.h
index cd56acd9a842..eb65f12e81d9 100644
--- a/arch/arm64/include/asm/kvm_pkvm.h
+++ b/arch/arm64/include/asm/kvm_pkvm.h
@@ -20,6 +20,31 @@ int pkvm_init_host_vm(struct kvm *kvm);
int pkvm_create_hyp_vm(struct kvm *kvm);
void pkvm_destroy_hyp_vm(struct kvm *kvm);
+/*
+ * This functions as an allow-list of protected VM capabilities.
+ * Features not explicitly allowed by this function are denied.
+ */
+static inline bool kvm_pvm_ext_allowed(long ext)
+{
+ switch (ext) {
+ case KVM_CAP_IRQCHIP:
+ case KVM_CAP_ARM_PSCI:
+ case KVM_CAP_ARM_PSCI_0_2:
+ case KVM_CAP_NR_VCPUS:
+ case KVM_CAP_MAX_VCPUS:
+ case KVM_CAP_MAX_VCPU_ID:
+ case KVM_CAP_MSI_DEVID:
+ case KVM_CAP_ARM_VM_IPA_SIZE:
+ case KVM_CAP_ARM_PMU_V3:
+ case KVM_CAP_ARM_SVE:
+ case KVM_CAP_ARM_PTRAUTH_ADDRESS:
+ case KVM_CAP_ARM_PTRAUTH_GENERIC:
+ return true;
+ default:
+ return false;
+ }
+}
+
extern struct memblock_region kvm_nvhe_sym(hyp_memory)[];
extern unsigned int kvm_nvhe_sym(hyp_memblock_nr);
@@ -137,4 +162,30 @@ static inline size_t pkvm_host_sve_state_size(void)
SVE_SIG_REGS_SIZE(sve_vq_from_vl(kvm_host_sve_max_vl)));
}
+struct pkvm_mapping {
+ struct rb_node node;
+ u64 gfn;
+ u64 pfn;
+};
+
+int pkvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu,
+ struct kvm_pgtable_mm_ops *mm_ops);
+void pkvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt);
+int pkvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys,
+ enum kvm_pgtable_prot prot, void *mc,
+ enum kvm_pgtable_walk_flags flags);
+int pkvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size);
+int pkvm_pgtable_stage2_wrprotect(struct kvm_pgtable *pgt, u64 addr, u64 size);
+int pkvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size);
+bool pkvm_pgtable_stage2_test_clear_young(struct kvm_pgtable *pgt, u64 addr, u64 size, bool mkold);
+int pkvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr, enum kvm_pgtable_prot prot,
+ enum kvm_pgtable_walk_flags flags);
+void pkvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr,
+ enum kvm_pgtable_walk_flags flags);
+int pkvm_pgtable_stage2_split(struct kvm_pgtable *pgt, u64 addr, u64 size,
+ struct kvm_mmu_memory_cache *mc);
+void pkvm_pgtable_stage2_free_unlinked(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, s8 level);
+kvm_pte_t *pkvm_pgtable_stage2_create_unlinked(struct kvm_pgtable *pgt, u64 phys, s8 level,
+ enum kvm_pgtable_prot prot, void *mc,
+ bool force_pte);
#endif /* __ARM64_KVM_PKVM_H__ */
diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index 8b9f33cf561b..717829df294e 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -145,13 +145,16 @@
#define OVERFLOW_STACK_SIZE SZ_4K
+#define NVHE_STACK_SHIFT PAGE_SHIFT
+#define NVHE_STACK_SIZE (UL(1) << NVHE_STACK_SHIFT)
+
/*
* With the minimum frame size of [x29, x30], exactly half the combined
* sizes of the hyp and overflow stacks is the maximum size needed to
* save the unwinded stacktrace; plus an additional entry to delimit the
* end.
*/
-#define NVHE_STACKTRACE_SIZE ((OVERFLOW_STACK_SIZE + PAGE_SIZE) / 2 + sizeof(long))
+#define NVHE_STACKTRACE_SIZE ((OVERFLOW_STACK_SIZE + NVHE_STACK_SIZE) / 2 + sizeof(long))
/*
* Alignment of kernel segments (e.g. .text, .data).
diff --git a/arch/arm64/include/asm/stacktrace/nvhe.h b/arch/arm64/include/asm/stacktrace/nvhe.h
index 44759281d0d4..171f9edef49f 100644
--- a/arch/arm64/include/asm/stacktrace/nvhe.h
+++ b/arch/arm64/include/asm/stacktrace/nvhe.h
@@ -47,7 +47,7 @@ static inline void kvm_nvhe_unwind_init(struct unwind_state *state,
DECLARE_KVM_NVHE_PER_CPU(unsigned long [OVERFLOW_STACK_SIZE/sizeof(long)], overflow_stack);
DECLARE_KVM_NVHE_PER_CPU(struct kvm_nvhe_stacktrace_info, kvm_stacktrace_info);
-DECLARE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page);
+DECLARE_PER_CPU(unsigned long, kvm_arm_hyp_stack_base);
void kvm_nvhe_dump_backtrace(unsigned long hyp_offset);
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index b8303a83c0bf..05ea5223d2d5 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -283,8 +283,6 @@
#define SYS_RGSR_EL1 sys_reg(3, 0, 1, 0, 5)
#define SYS_GCR_EL1 sys_reg(3, 0, 1, 0, 6)
-#define SYS_TRFCR_EL1 sys_reg(3, 0, 1, 2, 1)
-
#define SYS_TCR_EL1 sys_reg(3, 0, 2, 0, 2)
#define SYS_APIAKEYLO_EL1 sys_reg(3, 0, 2, 1, 0)
@@ -477,6 +475,7 @@
#define SYS_CNTFRQ_EL0 sys_reg(3, 3, 14, 0, 0)
#define SYS_CNTPCT_EL0 sys_reg(3, 3, 14, 0, 1)
+#define SYS_CNTVCT_EL0 sys_reg(3, 3, 14, 0, 2)
#define SYS_CNTPCTSS_EL0 sys_reg(3, 3, 14, 0, 5)
#define SYS_CNTVCTSS_EL0 sys_reg(3, 3, 14, 0, 6)
@@ -484,14 +483,17 @@
#define SYS_CNTP_CTL_EL0 sys_reg(3, 3, 14, 2, 1)
#define SYS_CNTP_CVAL_EL0 sys_reg(3, 3, 14, 2, 2)
+#define SYS_CNTV_TVAL_EL0 sys_reg(3, 3, 14, 3, 0)
#define SYS_CNTV_CTL_EL0 sys_reg(3, 3, 14, 3, 1)
#define SYS_CNTV_CVAL_EL0 sys_reg(3, 3, 14, 3, 2)
#define SYS_AARCH32_CNTP_TVAL sys_reg(0, 0, 14, 2, 0)
#define SYS_AARCH32_CNTP_CTL sys_reg(0, 0, 14, 2, 1)
#define SYS_AARCH32_CNTPCT sys_reg(0, 0, 0, 14, 0)
+#define SYS_AARCH32_CNTVCT sys_reg(0, 1, 0, 14, 0)
#define SYS_AARCH32_CNTP_CVAL sys_reg(0, 2, 0, 14, 0)
#define SYS_AARCH32_CNTPCTSS sys_reg(0, 8, 0, 14, 0)
+#define SYS_AARCH32_CNTVCTSS sys_reg(0, 9, 0, 14, 0)
#define __PMEV_op2(n) ((n) & 0x7)
#define __CNTR_CRm(n) (0x8 | (((n) >> 3) & 0x3))
@@ -519,7 +521,6 @@
#define SYS_VTTBR_EL2 sys_reg(3, 4, 2, 1, 0)
#define SYS_VTCR_EL2 sys_reg(3, 4, 2, 1, 2)
-#define SYS_TRFCR_EL2 sys_reg(3, 4, 1, 2, 1)
#define SYS_VNCR_EL2 sys_reg(3, 4, 2, 2, 0)
#define SYS_HAFGRTR_EL2 sys_reg(3, 4, 3, 1, 6)
#define SYS_SPSR_EL2 sys_reg(3, 4, 4, 0, 0)
@@ -983,15 +984,6 @@
/* Safe value for MPIDR_EL1: Bit31:RES1, Bit30:U:0, Bit24:MT:0 */
#define SYS_MPIDR_SAFE_VAL (BIT(31))
-#define TRFCR_ELx_TS_SHIFT 5
-#define TRFCR_ELx_TS_MASK ((0x3UL) << TRFCR_ELx_TS_SHIFT)
-#define TRFCR_ELx_TS_VIRTUAL ((0x1UL) << TRFCR_ELx_TS_SHIFT)
-#define TRFCR_ELx_TS_GUEST_PHYSICAL ((0x2UL) << TRFCR_ELx_TS_SHIFT)
-#define TRFCR_ELx_TS_PHYSICAL ((0x3UL) << TRFCR_ELx_TS_SHIFT)
-#define TRFCR_EL2_CX BIT(3)
-#define TRFCR_ELx_ExTRE BIT(1)
-#define TRFCR_ELx_E0TRE BIT(0)
-
/* GIC Hypervisor interface registers */
/* ICH_MISR_EL2 bit definitions */
#define ICH_MISR_EOI (1 << 0)
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index a78f247029ae..7ce555862895 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -787,5 +787,13 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
},
#endif
{
+ .desc = "Broken CNTVOFF_EL2",
+ .capability = ARM64_WORKAROUND_QCOM_ORYON_CNTVOFF,
+ ERRATA_MIDR_RANGE_LIST(((const struct midr_range[]) {
+ MIDR_ALL_VERSIONS(MIDR_QCOM_ORYON_X1),
+ {}
+ })),
+ },
+ {
}
};
diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h
index 8f5422ed1b75..ef3a69cc398e 100644
--- a/arch/arm64/kernel/image-vars.h
+++ b/arch/arm64/kernel/image-vars.h
@@ -105,6 +105,9 @@ KVM_NVHE_ALIAS(__hyp_stub_vectors);
KVM_NVHE_ALIAS(vgic_v2_cpuif_trap);
KVM_NVHE_ALIAS(vgic_v3_cpuif_trap);
+/* Static key which is set if CNTVOFF_EL2 is unusable */
+KVM_NVHE_ALIAS(broken_cntvoff_key);
+
/* EL2 exception handling */
KVM_NVHE_ALIAS(__start___kvm_ex_table);
KVM_NVHE_ALIAS(__stop___kvm_ex_table);
diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c
index 1215df590418..d3d243366536 100644
--- a/arch/arm64/kvm/arch_timer.c
+++ b/arch/arm64/kvm/arch_timer.c
@@ -30,6 +30,7 @@ static u32 host_vtimer_irq_flags;
static u32 host_ptimer_irq_flags;
static DEFINE_STATIC_KEY_FALSE(has_gic_active_state);
+DEFINE_STATIC_KEY_FALSE(broken_cntvoff_key);
static const u8 default_ppi[] = {
[TIMER_PTIMER] = 30,
@@ -101,21 +102,6 @@ u64 timer_get_cval(struct arch_timer_context *ctxt)
}
}
-static u64 timer_get_offset(struct arch_timer_context *ctxt)
-{
- u64 offset = 0;
-
- if (!ctxt)
- return 0;
-
- if (ctxt->offset.vm_offset)
- offset += *ctxt->offset.vm_offset;
- if (ctxt->offset.vcpu_offset)
- offset += *ctxt->offset.vcpu_offset;
-
- return offset;
-}
-
static void timer_set_ctl(struct arch_timer_context *ctxt, u32 ctl)
{
struct kvm_vcpu *vcpu = ctxt->vcpu;
@@ -441,11 +427,30 @@ void kvm_timer_update_run(struct kvm_vcpu *vcpu)
regs->device_irq_level |= KVM_ARM_DEV_EL1_PTIMER;
}
+static void kvm_timer_update_status(struct arch_timer_context *ctx, bool level)
+{
+ /*
+ * Paper over NV2 brokenness by publishing the interrupt status
+ * bit. This still results in a poor quality of emulation (guest
+ * writes will have no effect until the next exit).
+ *
+ * But hey, it's fast, right?
+ */
+ if (is_hyp_ctxt(ctx->vcpu) &&
+ (ctx == vcpu_vtimer(ctx->vcpu) || ctx == vcpu_ptimer(ctx->vcpu))) {
+ unsigned long val = timer_get_ctl(ctx);
+ __assign_bit(__ffs(ARCH_TIMER_CTRL_IT_STAT), &val, level);
+ timer_set_ctl(ctx, val);
+ }
+}
+
static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level,
struct arch_timer_context *timer_ctx)
{
int ret;
+ kvm_timer_update_status(timer_ctx, new_level);
+
timer_ctx->irq.level = new_level;
trace_kvm_timer_update_irq(vcpu->vcpu_id, timer_irq(timer_ctx),
timer_ctx->irq.level);
@@ -471,6 +476,8 @@ static void timer_emulate(struct arch_timer_context *ctx)
return;
}
+ kvm_timer_update_status(ctx, should_fire);
+
/*
* If the timer can fire now, we don't need to have a soft timer
* scheduled for the future. If the timer cannot fire at all,
@@ -513,7 +520,12 @@ static void timer_save_state(struct arch_timer_context *ctx)
case TIMER_VTIMER:
case TIMER_HVTIMER:
timer_set_ctl(ctx, read_sysreg_el0(SYS_CNTV_CTL));
- timer_set_cval(ctx, read_sysreg_el0(SYS_CNTV_CVAL));
+ cval = read_sysreg_el0(SYS_CNTV_CVAL);
+
+ if (has_broken_cntvoff())
+ cval -= timer_get_offset(ctx);
+
+ timer_set_cval(ctx, cval);
/* Disable the timer */
write_sysreg_el0(0, SYS_CNTV_CTL);
@@ -618,8 +630,15 @@ static void timer_restore_state(struct arch_timer_context *ctx)
case TIMER_VTIMER:
case TIMER_HVTIMER:
- set_cntvoff(timer_get_offset(ctx));
- write_sysreg_el0(timer_get_cval(ctx), SYS_CNTV_CVAL);
+ cval = timer_get_cval(ctx);
+ offset = timer_get_offset(ctx);
+ if (has_broken_cntvoff()) {
+ set_cntvoff(0);
+ cval += offset;
+ } else {
+ set_cntvoff(offset);
+ }
+ write_sysreg_el0(cval, SYS_CNTV_CVAL);
isb();
write_sysreg_el0(timer_get_ctl(ctx), SYS_CNTV_CTL);
break;
@@ -762,7 +781,7 @@ static void kvm_timer_vcpu_load_nested_switch(struct kvm_vcpu *vcpu,
static void timer_set_traps(struct kvm_vcpu *vcpu, struct timer_map *map)
{
- bool tpt, tpc;
+ bool tvt, tpt, tvc, tpc, tvt02, tpt02;
u64 clr, set;
/*
@@ -777,7 +796,29 @@ static void timer_set_traps(struct kvm_vcpu *vcpu, struct timer_map *map)
* within this function, reality kicks in and we start adding
* traps based on emulation requirements.
*/
- tpt = tpc = false;
+ tvt = tpt = tvc = tpc = false;
+ tvt02 = tpt02 = false;
+
+ /*
+ * NV2 badly breaks the timer semantics by redirecting accesses to
+ * the EL1 timer state to memory, so let's call ECV to the rescue if
+ * available: we trap all CNT{P,V}_{CTL,CVAL,TVAL}_EL0 accesses.
+ *
+ * The treatment slightly varies depending whether we run a nVHE or
+ * VHE guest: nVHE will use the _EL0 registers directly, while VHE
+ * will use the _EL02 accessors. This translates in different trap
+ * bits.
+ *
+ * None of the trapping is required when running in non-HYP context,
+ * unless required by the L1 hypervisor settings once we advertise
+ * ECV+NV in the guest, or that we need trapping for other reasons.
+ */
+ if (cpus_have_final_cap(ARM64_HAS_ECV) && is_hyp_ctxt(vcpu)) {
+ if (vcpu_el2_e2h_is_set(vcpu))
+ tvt02 = tpt02 = true;
+ else
+ tvt = tpt = true;
+ }
/*
* We have two possibility to deal with a physical offset:
@@ -793,9 +834,20 @@ static void timer_set_traps(struct kvm_vcpu *vcpu, struct timer_map *map)
tpt = tpc = true;
/*
+ * For the poor sods that could not correctly substract one value
+ * from another, trap the full virtual timer and counter.
+ */
+ if (has_broken_cntvoff() && timer_get_offset(map->direct_vtimer))
+ tvt = tvc = true;
+
+ /*
* Apply the enable bits that the guest hypervisor has requested for
* its own guest. We can only add traps that wouldn't have been set
* above.
+ * Implementation choices: we do not support NV when E2H=0 in the
+ * guest, and we don't support configuration where E2H is writable
+ * by the guest (either FEAT_VHE or FEAT_E2H0 is implemented, but
+ * not both). This simplifies the handling of the EL1NV* bits.
*/
if (vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu)) {
u64 val = __vcpu_sys_reg(vcpu, CNTHCTL_EL2);
@@ -806,6 +858,9 @@ static void timer_set_traps(struct kvm_vcpu *vcpu, struct timer_map *map)
tpt |= !(val & (CNTHCTL_EL1PCEN << 10));
tpc |= !(val & (CNTHCTL_EL1PCTEN << 10));
+
+ tpt02 |= (val & CNTHCTL_EL1NVPCT);
+ tvt02 |= (val & CNTHCTL_EL1NVVCT);
}
/*
@@ -817,6 +872,10 @@ static void timer_set_traps(struct kvm_vcpu *vcpu, struct timer_map *map)
assign_clear_set_bit(tpt, CNTHCTL_EL1PCEN << 10, set, clr);
assign_clear_set_bit(tpc, CNTHCTL_EL1PCTEN << 10, set, clr);
+ assign_clear_set_bit(tvt, CNTHCTL_EL1TVT, clr, set);
+ assign_clear_set_bit(tvc, CNTHCTL_EL1TVCT, clr, set);
+ assign_clear_set_bit(tvt02, CNTHCTL_EL1NVVCT, clr, set);
+ assign_clear_set_bit(tpt02, CNTHCTL_EL1NVPCT, clr, set);
/* This only happens on VHE, so use the CNTHCTL_EL2 accessor. */
sysreg_clear_set(cnthctl_el2, clr, set);
@@ -905,6 +964,54 @@ void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu)
kvm_timer_blocking(vcpu);
}
+void kvm_timer_sync_nested(struct kvm_vcpu *vcpu)
+{
+ /*
+ * When NV2 is on, guest hypervisors have their EL1 timer register
+ * accesses redirected to the VNCR page. Any guest action taken on
+ * the timer is postponed until the next exit, leading to a very
+ * poor quality of emulation.
+ *
+ * This is an unmitigated disaster, only papered over by FEAT_ECV,
+ * which allows trapping of the timer registers even with NV2.
+ * Still, this is still worse than FEAT_NV on its own. Meh.
+ */
+ if (!vcpu_el2_e2h_is_set(vcpu)) {
+ if (cpus_have_final_cap(ARM64_HAS_ECV))
+ return;
+
+ /*
+ * A non-VHE guest hypervisor doesn't have any direct access
+ * to its timers: the EL2 registers trap (and the HW is
+ * fully emulated), while the EL0 registers access memory
+ * despite the access being notionally direct. Boo.
+ *
+ * We update the hardware timer registers with the
+ * latest value written by the guest to the VNCR page
+ * and let the hardware take care of the rest.
+ */
+ write_sysreg_el0(__vcpu_sys_reg(vcpu, CNTV_CTL_EL0), SYS_CNTV_CTL);
+ write_sysreg_el0(__vcpu_sys_reg(vcpu, CNTV_CVAL_EL0), SYS_CNTV_CVAL);
+ write_sysreg_el0(__vcpu_sys_reg(vcpu, CNTP_CTL_EL0), SYS_CNTP_CTL);
+ write_sysreg_el0(__vcpu_sys_reg(vcpu, CNTP_CVAL_EL0), SYS_CNTP_CVAL);
+ } else {
+ /*
+ * For a VHE guest hypervisor, the EL2 state is directly
+ * stored in the host EL1 timers, while the emulated EL0
+ * state is stored in the VNCR page. The latter could have
+ * been updated behind our back, and we must reset the
+ * emulation of the timers.
+ */
+ struct timer_map map;
+ get_timer_map(vcpu, &map);
+
+ soft_timer_cancel(&map.emul_vtimer->hrtimer);
+ soft_timer_cancel(&map.emul_ptimer->hrtimer);
+ timer_emulate(map.emul_vtimer);
+ timer_emulate(map.emul_ptimer);
+ }
+}
+
/*
* With a userspace irqchip we have to check if the guest de-asserted the
* timer and if so, unmask the timer irq signal on the host interrupt
@@ -1363,6 +1470,37 @@ static int kvm_irq_init(struct arch_timer_kvm_info *info)
return 0;
}
+static void kvm_timer_handle_errata(void)
+{
+ u64 mmfr0, mmfr1, mmfr4;
+
+ /*
+ * CNTVOFF_EL2 is broken on some implementations. For those, we trap
+ * all virtual timer/counter accesses, requiring FEAT_ECV.
+ *
+ * However, a hypervisor supporting nesting is likely to mitigate the
+ * erratum at L0, and not require other levels to mitigate it (which
+ * would otherwise be a terrible performance sink due to trap
+ * amplification).
+ *
+ * Given that the affected HW implements both FEAT_VHE and FEAT_E2H0,
+ * and that NV is likely not to (because of limitations of the
+ * architecture), only enable the workaround when FEAT_VHE and
+ * FEAT_E2H0 are both detected. Time will tell if this actually holds.
+ */
+ mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
+ mmfr1 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1);
+ mmfr4 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR4_EL1);
+ if (SYS_FIELD_GET(ID_AA64MMFR1_EL1, VH, mmfr1) &&
+ !SYS_FIELD_GET(ID_AA64MMFR4_EL1, E2H0, mmfr4) &&
+ SYS_FIELD_GET(ID_AA64MMFR0_EL1, ECV, mmfr0) &&
+ (has_vhe() || has_hvhe()) &&
+ cpus_have_final_cap(ARM64_WORKAROUND_QCOM_ORYON_CNTVOFF)) {
+ static_branch_enable(&broken_cntvoff_key);
+ kvm_info("Broken CNTVOFF_EL2, trapping virtual timer\n");
+ }
+}
+
int __init kvm_timer_hyp_init(bool has_gic)
{
struct arch_timer_kvm_info *info;
@@ -1431,6 +1569,7 @@ int __init kvm_timer_hyp_init(bool has_gic)
goto out_free_vtimer_irq;
}
+ kvm_timer_handle_errata();
return 0;
out_free_ptimer_irq:
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index 7b2735ad32e9..646e806c6ca6 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -61,7 +61,7 @@ static enum kvm_wfx_trap_policy kvm_wfe_trap_policy __read_mostly = KVM_WFX_NOTR
DECLARE_KVM_HYP_PER_CPU(unsigned long, kvm_hyp_vector);
-DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page);
+DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_base);
DECLARE_KVM_NVHE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params);
DECLARE_KVM_NVHE_PER_CPU(struct kvm_cpu_context, kvm_hyp_ctxt);
@@ -80,31 +80,6 @@ int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE;
}
-/*
- * This functions as an allow-list of protected VM capabilities.
- * Features not explicitly allowed by this function are denied.
- */
-static bool pkvm_ext_allowed(struct kvm *kvm, long ext)
-{
- switch (ext) {
- case KVM_CAP_IRQCHIP:
- case KVM_CAP_ARM_PSCI:
- case KVM_CAP_ARM_PSCI_0_2:
- case KVM_CAP_NR_VCPUS:
- case KVM_CAP_MAX_VCPUS:
- case KVM_CAP_MAX_VCPU_ID:
- case KVM_CAP_MSI_DEVID:
- case KVM_CAP_ARM_VM_IPA_SIZE:
- case KVM_CAP_ARM_PMU_V3:
- case KVM_CAP_ARM_SVE:
- case KVM_CAP_ARM_PTRAUTH_ADDRESS:
- case KVM_CAP_ARM_PTRAUTH_GENERIC:
- return true;
- default:
- return false;
- }
-}
-
int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
struct kvm_enable_cap *cap)
{
@@ -113,7 +88,7 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
if (cap->flags)
return -EINVAL;
- if (kvm_vm_is_protected(kvm) && !pkvm_ext_allowed(kvm, cap->cap))
+ if (kvm_vm_is_protected(kvm) && !kvm_pvm_ext_allowed(cap->cap))
return -EINVAL;
switch (cap->cap) {
@@ -311,7 +286,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
{
int r;
- if (kvm && kvm_vm_is_protected(kvm) && !pkvm_ext_allowed(kvm, ext))
+ if (kvm && kvm_vm_is_protected(kvm) && !kvm_pvm_ext_allowed(ext))
return 0;
switch (ext) {
@@ -476,8 +451,6 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
kvm_pmu_vcpu_init(vcpu);
- kvm_arm_reset_debug_ptr(vcpu);
-
kvm_arm_pvtime_vcpu_init(&vcpu->arch);
vcpu->arch.hw_mmu = &vcpu->kvm->arch.mmu;
@@ -502,7 +475,10 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
{
- kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_cache);
+ if (!is_protected_kvm_enabled())
+ kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_cache);
+ else
+ free_hyp_memcache(&vcpu->arch.pkvm_memcache);
kvm_timer_vcpu_terminate(vcpu);
kvm_pmu_vcpu_destroy(vcpu);
kvm_vgic_vcpu_destroy(vcpu);
@@ -574,6 +550,9 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
struct kvm_s2_mmu *mmu;
int *last_ran;
+ if (is_protected_kvm_enabled())
+ goto nommu;
+
if (vcpu_has_nv(vcpu))
kvm_vcpu_load_hw_mmu(vcpu);
@@ -594,10 +573,12 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
*last_ran = vcpu->vcpu_idx;
}
+nommu:
vcpu->cpu = cpu;
kvm_vgic_load(vcpu);
kvm_timer_vcpu_load(vcpu);
+ kvm_vcpu_load_debug(vcpu);
if (has_vhe())
kvm_vcpu_load_vhe(vcpu);
kvm_arch_vcpu_load_fp(vcpu);
@@ -617,7 +598,13 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
vcpu_set_pauth_traps(vcpu);
- kvm_arch_vcpu_load_debug_state_flags(vcpu);
+ if (is_protected_kvm_enabled()) {
+ kvm_call_hyp_nvhe(__pkvm_vcpu_load,
+ vcpu->kvm->arch.pkvm.handle,
+ vcpu->vcpu_idx, vcpu->arch.hcr_el2);
+ kvm_call_hyp(__vgic_v3_restore_vmcr_aprs,
+ &vcpu->arch.vgic_cpu.vgic_v3);
+ }
if (!cpumask_test_cpu(cpu, vcpu->kvm->arch.supported_cpus))
vcpu_set_on_unsupported_cpu(vcpu);
@@ -625,7 +612,13 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
{
- kvm_arch_vcpu_put_debug_state_flags(vcpu);
+ if (is_protected_kvm_enabled()) {
+ kvm_call_hyp(__vgic_v3_save_vmcr_aprs,
+ &vcpu->arch.vgic_cpu.vgic_v3);
+ kvm_call_hyp_nvhe(__pkvm_vcpu_put);
+ }
+
+ kvm_vcpu_put_debug(vcpu);
kvm_arch_vcpu_put_fp(vcpu);
if (has_vhe())
kvm_vcpu_put_vhe(vcpu);
@@ -808,8 +801,6 @@ int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu)
kvm_init_mpidr_data(kvm);
- kvm_arm_vcpu_init_debug(vcpu);
-
if (likely(irqchip_in_kernel(kvm))) {
/*
* Map the VGIC hardware resources before running a vcpu the
@@ -1187,7 +1178,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
continue;
}
- kvm_arm_setup_debug(vcpu);
kvm_arch_vcpu_ctxflush_fp(vcpu);
/**************************************************************
@@ -1204,8 +1194,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
* Back from guest
*************************************************************/
- kvm_arm_clear_debug(vcpu);
-
/*
* We must sync the PMU state before the vgic state so
* that the vgic can properly sample the updated state of the
@@ -1228,6 +1216,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
if (unlikely(!irqchip_in_kernel(vcpu->kvm)))
kvm_timer_sync_user(vcpu);
+ if (is_hyp_ctxt(vcpu))
+ kvm_timer_sync_nested(vcpu);
+
kvm_arch_vcpu_ctxsync_fp(vcpu);
/*
@@ -1571,7 +1562,6 @@ static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu,
}
vcpu_reset_hcr(vcpu);
- vcpu->arch.cptr_el2 = kvm_get_reset_cptr_el2(vcpu);
/*
* Handle the "start in power-off" case.
@@ -2109,6 +2099,7 @@ static void cpu_set_hyp_vector(void)
static void cpu_hyp_init_context(void)
{
kvm_init_host_cpu_context(host_data_ptr(host_ctxt));
+ kvm_init_host_debug_data();
if (!is_kernel_in_hyp_mode())
cpu_init_hyp_mode();
@@ -2117,7 +2108,6 @@ static void cpu_hyp_init_context(void)
static void cpu_hyp_init_features(void)
{
cpu_set_hyp_vector();
- kvm_arm_init_debug();
if (is_kernel_in_hyp_mode())
kvm_timer_init_vhe();
@@ -2339,7 +2329,7 @@ static void __init teardown_hyp_mode(void)
free_hyp_pgds();
for_each_possible_cpu(cpu) {
- free_page(per_cpu(kvm_arm_hyp_stack_page, cpu));
+ free_pages(per_cpu(kvm_arm_hyp_stack_base, cpu), NVHE_STACK_SHIFT - PAGE_SHIFT);
free_pages(kvm_nvhe_sym(kvm_arm_hyp_percpu_base)[cpu], nvhe_percpu_order());
if (free_sve) {
@@ -2527,15 +2517,15 @@ static int __init init_hyp_mode(void)
* Allocate stack pages for Hypervisor-mode
*/
for_each_possible_cpu(cpu) {
- unsigned long stack_page;
+ unsigned long stack_base;
- stack_page = __get_free_page(GFP_KERNEL);
- if (!stack_page) {
+ stack_base = __get_free_pages(GFP_KERNEL, NVHE_STACK_SHIFT - PAGE_SHIFT);
+ if (!stack_base) {
err = -ENOMEM;
goto out_err;
}
- per_cpu(kvm_arm_hyp_stack_page, cpu) = stack_page;
+ per_cpu(kvm_arm_hyp_stack_base, cpu) = stack_base;
}
/*
@@ -2604,9 +2594,9 @@ static int __init init_hyp_mode(void)
*/
for_each_possible_cpu(cpu) {
struct kvm_nvhe_init_params *params = per_cpu_ptr_nvhe_sym(kvm_init_params, cpu);
- char *stack_page = (char *)per_cpu(kvm_arm_hyp_stack_page, cpu);
+ char *stack_base = (char *)per_cpu(kvm_arm_hyp_stack_base, cpu);
- err = create_hyp_stack(__pa(stack_page), &params->stack_hyp_va);
+ err = create_hyp_stack(__pa(stack_base), &params->stack_hyp_va);
if (err) {
kvm_err("Cannot map hyp stack\n");
goto out_err;
@@ -2618,7 +2608,7 @@ static int __init init_hyp_mode(void)
* __hyp_pa() won't do the right thing there, since the stack
* has been mapped in the flexible private VA space.
*/
- params->stack_pa = __pa(stack_page);
+ params->stack_pa = __pa(stack_base);
}
for_each_possible_cpu(cpu) {
diff --git a/arch/arm64/kvm/debug.c b/arch/arm64/kvm/debug.c
index ce8886122ed3..0e4c805e7e89 100644
--- a/arch/arm64/kvm/debug.c
+++ b/arch/arm64/kvm/debug.c
@@ -3,7 +3,8 @@
* Debug and Guest Debug support
*
* Copyright (C) 2015 - Linaro Ltd
- * Author: Alex Bennée <alex.bennee@linaro.org>
+ * Authors: Alex Bennée <alex.bennee@linaro.org>
+ * Oliver Upton <oliver.upton@linux.dev>
*/
#include <linux/kvm_host.h>
@@ -14,72 +15,6 @@
#include <asm/kvm_arm.h>
#include <asm/kvm_emulate.h>
-#include "trace.h"
-
-/* These are the bits of MDSCR_EL1 we may manipulate */
-#define MDSCR_EL1_DEBUG_MASK (DBG_MDSCR_SS | \
- DBG_MDSCR_KDE | \
- DBG_MDSCR_MDE)
-
-static DEFINE_PER_CPU(u64, mdcr_el2);
-
-/*
- * save/restore_guest_debug_regs
- *
- * For some debug operations we need to tweak some guest registers. As
- * a result we need to save the state of those registers before we
- * make those modifications.
- *
- * Guest access to MDSCR_EL1 is trapped by the hypervisor and handled
- * after we have restored the preserved value to the main context.
- *
- * When single-step is enabled by userspace, we tweak PSTATE.SS on every
- * guest entry. Preserve PSTATE.SS so we can restore the original value
- * for the vcpu after the single-step is disabled.
- */
-static void save_guest_debug_regs(struct kvm_vcpu *vcpu)
-{
- u64 val = vcpu_read_sys_reg(vcpu, MDSCR_EL1);
-
- vcpu->arch.guest_debug_preserved.mdscr_el1 = val;
-
- trace_kvm_arm_set_dreg32("Saved MDSCR_EL1",
- vcpu->arch.guest_debug_preserved.mdscr_el1);
-
- vcpu->arch.guest_debug_preserved.pstate_ss =
- (*vcpu_cpsr(vcpu) & DBG_SPSR_SS);
-}
-
-static void restore_guest_debug_regs(struct kvm_vcpu *vcpu)
-{
- u64 val = vcpu->arch.guest_debug_preserved.mdscr_el1;
-
- vcpu_write_sys_reg(vcpu, val, MDSCR_EL1);
-
- trace_kvm_arm_set_dreg32("Restored MDSCR_EL1",
- vcpu_read_sys_reg(vcpu, MDSCR_EL1));
-
- if (vcpu->arch.guest_debug_preserved.pstate_ss)
- *vcpu_cpsr(vcpu) |= DBG_SPSR_SS;
- else
- *vcpu_cpsr(vcpu) &= ~DBG_SPSR_SS;
-}
-
-/**
- * kvm_arm_init_debug - grab what we need for debug
- *
- * Currently the sole task of this function is to retrieve the initial
- * value of mdcr_el2 so we can preserve MDCR_EL2.HPMN which has
- * presumably been set-up by some knowledgeable bootcode.
- *
- * It is called once per-cpu during CPU hyp initialisation.
- */
-
-void kvm_arm_init_debug(void)
-{
- __this_cpu_write(mdcr_el2, kvm_call_hyp_ret(__kvm_get_mdcr_el2));
-}
-
/**
* kvm_arm_setup_mdcr_el2 - configure vcpu mdcr_el2 value
*
@@ -95,11 +30,14 @@ void kvm_arm_init_debug(void)
*/
static void kvm_arm_setup_mdcr_el2(struct kvm_vcpu *vcpu)
{
+ preempt_disable();
+
/*
* This also clears MDCR_EL2_E2PB_MASK and MDCR_EL2_E2TB_MASK
* to disable guest access to the profiling and trace buffers
*/
- vcpu->arch.mdcr_el2 = __this_cpu_read(mdcr_el2) & MDCR_EL2_HPMN_MASK;
+ vcpu->arch.mdcr_el2 = FIELD_PREP(MDCR_EL2_HPMN,
+ *host_data_ptr(nr_event_counters));
vcpu->arch.mdcr_el2 |= (MDCR_EL2_TPM |
MDCR_EL2_TPMS |
MDCR_EL2_TTRF |
@@ -113,233 +51,215 @@ static void kvm_arm_setup_mdcr_el2(struct kvm_vcpu *vcpu)
vcpu->arch.mdcr_el2 |= MDCR_EL2_TDE;
/*
- * Trap debug register access when one of the following is true:
- * - Userspace is using the hardware to debug the guest
- * (KVM_GUESTDBG_USE_HW is set).
- * - The guest is not using debug (DEBUG_DIRTY clear).
- * - The guest has enabled the OS Lock (debug exceptions are blocked).
+ * Trap debug registers if the guest doesn't have ownership of them.
*/
- if ((vcpu->guest_debug & KVM_GUESTDBG_USE_HW) ||
- !vcpu_get_flag(vcpu, DEBUG_DIRTY) ||
- kvm_vcpu_os_lock_enabled(vcpu))
+ if (!kvm_guest_owns_debug_regs(vcpu))
vcpu->arch.mdcr_el2 |= MDCR_EL2_TDA;
- trace_kvm_arm_set_dreg32("MDCR_EL2", vcpu->arch.mdcr_el2);
-}
+ /* Write MDCR_EL2 directly if we're already at EL2 */
+ if (has_vhe())
+ write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2);
-/**
- * kvm_arm_vcpu_init_debug - setup vcpu debug traps
- *
- * @vcpu: the vcpu pointer
- *
- * Set vcpu initial mdcr_el2 value.
- */
-void kvm_arm_vcpu_init_debug(struct kvm_vcpu *vcpu)
-{
- preempt_disable();
- kvm_arm_setup_mdcr_el2(vcpu);
preempt_enable();
}
-/**
- * kvm_arm_reset_debug_ptr - reset the debug ptr to point to the vcpu state
- * @vcpu: the vcpu pointer
- */
-
-void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu)
+void kvm_init_host_debug_data(void)
{
- vcpu->arch.debug_ptr = &vcpu->arch.vcpu_debug_state;
+ u64 dfr0 = read_sysreg(id_aa64dfr0_el1);
+
+ if (cpuid_feature_extract_signed_field(dfr0, ID_AA64DFR0_EL1_PMUVer_SHIFT) > 0)
+ *host_data_ptr(nr_event_counters) = FIELD_GET(ARMV8_PMU_PMCR_N,
+ read_sysreg(pmcr_el0));
+
+ *host_data_ptr(debug_brps) = SYS_FIELD_GET(ID_AA64DFR0_EL1, BRPs, dfr0);
+ *host_data_ptr(debug_wrps) = SYS_FIELD_GET(ID_AA64DFR0_EL1, WRPs, dfr0);
+
+ if (has_vhe())
+ return;
+
+ if (cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_EL1_PMSVer_SHIFT) &&
+ !(read_sysreg_s(SYS_PMBIDR_EL1) & PMBIDR_EL1_P))
+ host_data_set_flag(HAS_SPE);
+
+ if (cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_EL1_TraceFilt_SHIFT)) {
+ /* Force disable trace in protected mode in case of no TRBE */
+ if (is_protected_kvm_enabled())
+ host_data_set_flag(EL1_TRACING_CONFIGURED);
+
+ if (cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_EL1_TraceBuffer_SHIFT) &&
+ !(read_sysreg_s(SYS_TRBIDR_EL1) & TRBIDR_EL1_P))
+ host_data_set_flag(HAS_TRBE);
+ }
}
-/**
- * kvm_arm_setup_debug - set up debug related stuff
+/*
+ * Configures the 'external' MDSCR_EL1 value for the guest, i.e. when the host
+ * has taken over MDSCR_EL1.
*
- * @vcpu: the vcpu pointer
+ * - Userspace is single-stepping the guest, and MDSCR_EL1.SS is forced to 1.
*
- * This is called before each entry into the hypervisor to setup any
- * debug related registers.
+ * - Userspace is using the breakpoint/watchpoint registers to debug the
+ * guest, and MDSCR_EL1.MDE is forced to 1.
*
- * Additionally, KVM only traps guest accesses to the debug registers if
- * the guest is not actively using them (see the DEBUG_DIRTY
- * flag on vcpu->arch.iflags). Since the guest must not interfere
- * with the hardware state when debugging the guest, we must ensure that
- * trapping is enabled whenever we are debugging the guest using the
- * debug registers.
+ * - The guest has enabled the OS Lock, and KVM is forcing MDSCR_EL1.MDE to 0,
+ * masking all debug exceptions affected by the OS Lock.
*/
-
-void kvm_arm_setup_debug(struct kvm_vcpu *vcpu)
+static void setup_external_mdscr(struct kvm_vcpu *vcpu)
{
- unsigned long mdscr, orig_mdcr_el2 = vcpu->arch.mdcr_el2;
+ /*
+ * Use the guest's MDSCR_EL1 as a starting point, since there are
+ * several other features controlled by MDSCR_EL1 that are not relevant
+ * to the host.
+ *
+ * Clear the bits that KVM may use which also satisfies emulation of
+ * the OS Lock as MDSCR_EL1.MDE is cleared.
+ */
+ u64 mdscr = vcpu_read_sys_reg(vcpu, MDSCR_EL1) & ~(MDSCR_EL1_SS |
+ MDSCR_EL1_MDE |
+ MDSCR_EL1_KDE);
- trace_kvm_arm_setup_debug(vcpu, vcpu->guest_debug);
+ if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
+ mdscr |= MDSCR_EL1_SS;
- kvm_arm_setup_mdcr_el2(vcpu);
+ if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW)
+ mdscr |= MDSCR_EL1_MDE | MDSCR_EL1_KDE;
- /* Check if we need to use the debug registers. */
+ vcpu->arch.external_mdscr_el1 = mdscr;
+}
+
+void kvm_vcpu_load_debug(struct kvm_vcpu *vcpu)
+{
+ u64 mdscr;
+
+ /* Must be called before kvm_vcpu_load_vhe() */
+ KVM_BUG_ON(vcpu_get_flag(vcpu, SYSREGS_ON_CPU), vcpu->kvm);
+
+ /*
+ * Determine which of the possible debug states we're in:
+ *
+ * - VCPU_DEBUG_HOST_OWNED: KVM has taken ownership of the guest's
+ * breakpoint/watchpoint registers, or needs to use MDSCR_EL1 to do
+ * software step or emulate the effects of the OS Lock being enabled.
+ *
+ * - VCPU_DEBUG_GUEST_OWNED: The guest has debug exceptions enabled, and
+ * the breakpoint/watchpoint registers need to be loaded eagerly.
+ *
+ * - VCPU_DEBUG_FREE: Neither of the above apply, no breakpoint/watchpoint
+ * context needs to be loaded on the CPU.
+ */
if (vcpu->guest_debug || kvm_vcpu_os_lock_enabled(vcpu)) {
- /* Save guest debug state */
- save_guest_debug_regs(vcpu);
+ vcpu->arch.debug_owner = VCPU_DEBUG_HOST_OWNED;
+ setup_external_mdscr(vcpu);
/*
- * Single Step (ARM ARM D2.12.3 The software step state
- * machine)
- *
- * If we are doing Single Step we need to manipulate
- * the guest's MDSCR_EL1.SS and PSTATE.SS. Once the
- * step has occurred the hypervisor will trap the
- * debug exception and we return to userspace.
- *
- * If the guest attempts to single step its userspace
- * we would have to deal with a trapped exception
- * while in the guest kernel. Because this would be
- * hard to unwind we suppress the guest's ability to
- * do so by masking MDSCR_EL.SS.
- *
- * This confuses guest debuggers which use
- * single-step behind the scenes but everything
- * returns to normal once the host is no longer
- * debugging the system.
+ * Steal the guest's single-step state machine if userspace wants
+ * single-step the guest.
*/
if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
- /*
- * If the software step state at the last guest exit
- * was Active-pending, we don't set DBG_SPSR_SS so
- * that the state is maintained (to not run another
- * single-step until the pending Software Step
- * exception is taken).
- */
- if (!vcpu_get_flag(vcpu, DBG_SS_ACTIVE_PENDING))
+ if (*vcpu_cpsr(vcpu) & DBG_SPSR_SS)
+ vcpu_clear_flag(vcpu, GUEST_SS_ACTIVE_PENDING);
+ else
+ vcpu_set_flag(vcpu, GUEST_SS_ACTIVE_PENDING);
+
+ if (!vcpu_get_flag(vcpu, HOST_SS_ACTIVE_PENDING))
*vcpu_cpsr(vcpu) |= DBG_SPSR_SS;
else
*vcpu_cpsr(vcpu) &= ~DBG_SPSR_SS;
-
- mdscr = vcpu_read_sys_reg(vcpu, MDSCR_EL1);
- mdscr |= DBG_MDSCR_SS;
- vcpu_write_sys_reg(vcpu, mdscr, MDSCR_EL1);
- } else {
- mdscr = vcpu_read_sys_reg(vcpu, MDSCR_EL1);
- mdscr &= ~DBG_MDSCR_SS;
- vcpu_write_sys_reg(vcpu, mdscr, MDSCR_EL1);
}
+ } else {
+ mdscr = vcpu_read_sys_reg(vcpu, MDSCR_EL1);
- trace_kvm_arm_set_dreg32("SPSR_EL2", *vcpu_cpsr(vcpu));
-
- /*
- * HW Breakpoints and watchpoints
- *
- * We simply switch the debug_ptr to point to our new
- * external_debug_state which has been populated by the
- * debug ioctl. The existing DEBUG_DIRTY mechanism ensures
- * the registers are updated on the world switch.
- */
- if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW) {
- /* Enable breakpoints/watchpoints */
- mdscr = vcpu_read_sys_reg(vcpu, MDSCR_EL1);
- mdscr |= DBG_MDSCR_MDE;
- vcpu_write_sys_reg(vcpu, mdscr, MDSCR_EL1);
-
- vcpu->arch.debug_ptr = &vcpu->arch.external_debug_state;
- vcpu_set_flag(vcpu, DEBUG_DIRTY);
-
- trace_kvm_arm_set_regset("BKPTS", get_num_brps(),
- &vcpu->arch.debug_ptr->dbg_bcr[0],
- &vcpu->arch.debug_ptr->dbg_bvr[0]);
-
- trace_kvm_arm_set_regset("WAPTS", get_num_wrps(),
- &vcpu->arch.debug_ptr->dbg_wcr[0],
- &vcpu->arch.debug_ptr->dbg_wvr[0]);
-
- /*
- * The OS Lock blocks debug exceptions in all ELs when it is
- * enabled. If the guest has enabled the OS Lock, constrain its
- * effects to the guest. Emulate the behavior by clearing
- * MDSCR_EL1.MDE. In so doing, we ensure that host debug
- * exceptions are unaffected by guest configuration of the OS
- * Lock.
- */
- } else if (kvm_vcpu_os_lock_enabled(vcpu)) {
- mdscr = vcpu_read_sys_reg(vcpu, MDSCR_EL1);
- mdscr &= ~DBG_MDSCR_MDE;
- vcpu_write_sys_reg(vcpu, mdscr, MDSCR_EL1);
- }
+ if (mdscr & (MDSCR_EL1_KDE | MDSCR_EL1_MDE))
+ vcpu->arch.debug_owner = VCPU_DEBUG_GUEST_OWNED;
+ else
+ vcpu->arch.debug_owner = VCPU_DEBUG_FREE;
}
- BUG_ON(!vcpu->guest_debug &&
- vcpu->arch.debug_ptr != &vcpu->arch.vcpu_debug_state);
-
- /* If KDE or MDE are set, perform a full save/restore cycle. */
- if (vcpu_read_sys_reg(vcpu, MDSCR_EL1) & (DBG_MDSCR_KDE | DBG_MDSCR_MDE))
- vcpu_set_flag(vcpu, DEBUG_DIRTY);
-
- /* Write mdcr_el2 changes since vcpu_load on VHE systems */
- if (has_vhe() && orig_mdcr_el2 != vcpu->arch.mdcr_el2)
- write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2);
-
- trace_kvm_arm_set_dreg32("MDSCR_EL1", vcpu_read_sys_reg(vcpu, MDSCR_EL1));
+ kvm_arm_setup_mdcr_el2(vcpu);
}
-void kvm_arm_clear_debug(struct kvm_vcpu *vcpu)
+void kvm_vcpu_put_debug(struct kvm_vcpu *vcpu)
{
- trace_kvm_arm_clear_debug(vcpu->guest_debug);
+ if (likely(!(vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)))
+ return;
/*
- * Restore the guest's debug registers if we were using them.
+ * Save the host's software step state and restore the guest's before
+ * potentially returning to userspace.
*/
- if (vcpu->guest_debug || kvm_vcpu_os_lock_enabled(vcpu)) {
- if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
- if (!(*vcpu_cpsr(vcpu) & DBG_SPSR_SS))
- /*
- * Mark the vcpu as ACTIVE_PENDING
- * until Software Step exception is taken.
- */
- vcpu_set_flag(vcpu, DBG_SS_ACTIVE_PENDING);
- }
-
- restore_guest_debug_regs(vcpu);
+ if (!(*vcpu_cpsr(vcpu) & DBG_SPSR_SS))
+ vcpu_set_flag(vcpu, HOST_SS_ACTIVE_PENDING);
+ else
+ vcpu_clear_flag(vcpu, HOST_SS_ACTIVE_PENDING);
- /*
- * If we were using HW debug we need to restore the
- * debug_ptr to the guest debug state.
- */
- if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW) {
- kvm_arm_reset_debug_ptr(vcpu);
+ if (vcpu_get_flag(vcpu, GUEST_SS_ACTIVE_PENDING))
+ *vcpu_cpsr(vcpu) &= ~DBG_SPSR_SS;
+ else
+ *vcpu_cpsr(vcpu) |= DBG_SPSR_SS;
+}
- trace_kvm_arm_set_regset("BKPTS", get_num_brps(),
- &vcpu->arch.debug_ptr->dbg_bcr[0],
- &vcpu->arch.debug_ptr->dbg_bvr[0]);
+/*
+ * Updates ownership of the debug registers after a trapped guest access to a
+ * breakpoint/watchpoint register. Host ownership of the debug registers is of
+ * strictly higher priority, and it is the responsibility of the VMM to emulate
+ * guest debug exceptions in this configuration.
+ */
+void kvm_debug_set_guest_ownership(struct kvm_vcpu *vcpu)
+{
+ if (kvm_host_owns_debug_regs(vcpu))
+ return;
- trace_kvm_arm_set_regset("WAPTS", get_num_wrps(),
- &vcpu->arch.debug_ptr->dbg_wcr[0],
- &vcpu->arch.debug_ptr->dbg_wvr[0]);
- }
- }
+ vcpu->arch.debug_owner = VCPU_DEBUG_GUEST_OWNED;
+ kvm_arm_setup_mdcr_el2(vcpu);
}
-void kvm_arch_vcpu_load_debug_state_flags(struct kvm_vcpu *vcpu)
+void kvm_debug_handle_oslar(struct kvm_vcpu *vcpu, u64 val)
{
- u64 dfr0;
+ if (val & OSLAR_EL1_OSLK)
+ __vcpu_sys_reg(vcpu, OSLSR_EL1) |= OSLSR_EL1_OSLK;
+ else
+ __vcpu_sys_reg(vcpu, OSLSR_EL1) &= ~OSLSR_EL1_OSLK;
- /* For VHE, there is nothing to do */
- if (has_vhe())
+ preempt_disable();
+ kvm_arch_vcpu_put(vcpu);
+ kvm_arch_vcpu_load(vcpu, smp_processor_id());
+ preempt_enable();
+}
+
+void kvm_enable_trbe(void)
+{
+ if (has_vhe() || is_protected_kvm_enabled() ||
+ WARN_ON_ONCE(preemptible()))
return;
- dfr0 = read_sysreg(id_aa64dfr0_el1);
- /*
- * If SPE is present on this CPU and is available at current EL,
- * we may need to check if the host state needs to be saved.
- */
- if (cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_EL1_PMSVer_SHIFT) &&
- !(read_sysreg_s(SYS_PMBIDR_EL1) & BIT(PMBIDR_EL1_P_SHIFT)))
- vcpu_set_flag(vcpu, DEBUG_STATE_SAVE_SPE);
+ host_data_set_flag(TRBE_ENABLED);
+}
+EXPORT_SYMBOL_GPL(kvm_enable_trbe);
+
+void kvm_disable_trbe(void)
+{
+ if (has_vhe() || is_protected_kvm_enabled() ||
+ WARN_ON_ONCE(preemptible()))
+ return;
- /* Check if we have TRBE implemented and available at the host */
- if (cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_EL1_TraceBuffer_SHIFT) &&
- !(read_sysreg_s(SYS_TRBIDR_EL1) & TRBIDR_EL1_P))
- vcpu_set_flag(vcpu, DEBUG_STATE_SAVE_TRBE);
+ host_data_clear_flag(TRBE_ENABLED);
}
+EXPORT_SYMBOL_GPL(kvm_disable_trbe);
-void kvm_arch_vcpu_put_debug_state_flags(struct kvm_vcpu *vcpu)
+void kvm_tracing_set_el1_configuration(u64 trfcr_while_in_guest)
{
- vcpu_clear_flag(vcpu, DEBUG_STATE_SAVE_SPE);
- vcpu_clear_flag(vcpu, DEBUG_STATE_SAVE_TRBE);
+ if (is_protected_kvm_enabled() || WARN_ON_ONCE(preemptible()))
+ return;
+
+ if (has_vhe()) {
+ write_sysreg_s(trfcr_while_in_guest, SYS_TRFCR_EL12);
+ return;
+ }
+
+ *host_data_ptr(trfcr_while_in_guest) = trfcr_while_in_guest;
+ if (read_sysreg_s(SYS_TRFCR_EL1) != trfcr_while_in_guest)
+ host_data_set_flag(EL1_TRACING_CONFIGURED);
+ else
+ host_data_clear_flag(EL1_TRACING_CONFIGURED);
}
+EXPORT_SYMBOL_GPL(kvm_tracing_set_el1_configuration);
diff --git a/arch/arm64/kvm/emulate-nested.c b/arch/arm64/kvm/emulate-nested.c
index f1b7287e1f3c..607d37bab70b 100644
--- a/arch/arm64/kvm/emulate-nested.c
+++ b/arch/arm64/kvm/emulate-nested.c
@@ -89,6 +89,9 @@ enum cgt_group_id {
CGT_HCRX_EnFPM,
CGT_HCRX_TCR2En,
+ CGT_CNTHCTL_EL1TVT,
+ CGT_CNTHCTL_EL1TVCT,
+
CGT_ICH_HCR_TC,
CGT_ICH_HCR_TALL0,
CGT_ICH_HCR_TALL1,
@@ -124,6 +127,8 @@ enum cgt_group_id {
__COMPLEX_CONDITIONS__,
CGT_CNTHCTL_EL1PCTEN = __COMPLEX_CONDITIONS__,
CGT_CNTHCTL_EL1PTEN,
+ CGT_CNTHCTL_EL1NVPCT,
+ CGT_CNTHCTL_EL1NVVCT,
CGT_CPTR_TTA,
CGT_MDCR_HPMN,
@@ -393,6 +398,18 @@ static const struct trap_bits coarse_trap_bits[] = {
.mask = HCRX_EL2_TCR2En,
.behaviour = BEHAVE_FORWARD_RW,
},
+ [CGT_CNTHCTL_EL1TVT] = {
+ .index = CNTHCTL_EL2,
+ .value = CNTHCTL_EL1TVT,
+ .mask = CNTHCTL_EL1TVT,
+ .behaviour = BEHAVE_FORWARD_RW,
+ },
+ [CGT_CNTHCTL_EL1TVCT] = {
+ .index = CNTHCTL_EL2,
+ .value = CNTHCTL_EL1TVCT,
+ .mask = CNTHCTL_EL1TVCT,
+ .behaviour = BEHAVE_FORWARD_READ,
+ },
[CGT_ICH_HCR_TC] = {
.index = ICH_HCR_EL2,
.value = ICH_HCR_TC,
@@ -487,6 +504,32 @@ static enum trap_behaviour check_cnthctl_el1pten(struct kvm_vcpu *vcpu)
return BEHAVE_FORWARD_RW;
}
+static bool is_nested_nv2_guest(struct kvm_vcpu *vcpu)
+{
+ u64 val;
+
+ val = __vcpu_sys_reg(vcpu, HCR_EL2);
+ return ((val & (HCR_E2H | HCR_TGE | HCR_NV2 | HCR_NV1 | HCR_NV)) == (HCR_E2H | HCR_NV2 | HCR_NV));
+}
+
+static enum trap_behaviour check_cnthctl_el1nvpct(struct kvm_vcpu *vcpu)
+{
+ if (!is_nested_nv2_guest(vcpu) ||
+ !(__vcpu_sys_reg(vcpu, CNTHCTL_EL2) & CNTHCTL_EL1NVPCT))
+ return BEHAVE_HANDLE_LOCALLY;
+
+ return BEHAVE_FORWARD_RW;
+}
+
+static enum trap_behaviour check_cnthctl_el1nvvct(struct kvm_vcpu *vcpu)
+{
+ if (!is_nested_nv2_guest(vcpu) ||
+ !(__vcpu_sys_reg(vcpu, CNTHCTL_EL2) & CNTHCTL_EL1NVVCT))
+ return BEHAVE_HANDLE_LOCALLY;
+
+ return BEHAVE_FORWARD_RW;
+}
+
static enum trap_behaviour check_cptr_tta(struct kvm_vcpu *vcpu)
{
u64 val = __vcpu_sys_reg(vcpu, CPTR_EL2);
@@ -534,6 +577,8 @@ static enum trap_behaviour check_mdcr_hpmn(struct kvm_vcpu *vcpu)
static const complex_condition_check ccc[] = {
CCC(CGT_CNTHCTL_EL1PCTEN, check_cnthctl_el1pcten),
CCC(CGT_CNTHCTL_EL1PTEN, check_cnthctl_el1pten),
+ CCC(CGT_CNTHCTL_EL1NVPCT, check_cnthctl_el1nvpct),
+ CCC(CGT_CNTHCTL_EL1NVVCT, check_cnthctl_el1nvvct),
CCC(CGT_CPTR_TTA, check_cptr_tta),
CCC(CGT_MDCR_HPMN, check_mdcr_hpmn),
};
@@ -850,11 +895,15 @@ static const struct encoding_to_trap_config encoding_to_cgt[] __initconst = {
SYS_CNTHP_CVAL_EL2, CGT_HCR_NV),
SR_RANGE_TRAP(SYS_CNTHV_TVAL_EL2,
SYS_CNTHV_CVAL_EL2, CGT_HCR_NV),
- /* All _EL02, _EL12 registers */
+ /* All _EL02, _EL12 registers up to CNTKCTL_EL12*/
SR_RANGE_TRAP(sys_reg(3, 5, 0, 0, 0),
sys_reg(3, 5, 10, 15, 7), CGT_HCR_NV),
SR_RANGE_TRAP(sys_reg(3, 5, 12, 0, 0),
- sys_reg(3, 5, 14, 15, 7), CGT_HCR_NV),
+ sys_reg(3, 5, 14, 1, 0), CGT_HCR_NV),
+ SR_TRAP(SYS_CNTP_CTL_EL02, CGT_CNTHCTL_EL1NVPCT),
+ SR_TRAP(SYS_CNTP_CVAL_EL02, CGT_CNTHCTL_EL1NVPCT),
+ SR_TRAP(SYS_CNTV_CTL_EL02, CGT_CNTHCTL_EL1NVVCT),
+ SR_TRAP(SYS_CNTV_CVAL_EL02, CGT_CNTHCTL_EL1NVVCT),
SR_TRAP(OP_AT_S1E2R, CGT_HCR_NV),
SR_TRAP(OP_AT_S1E2W, CGT_HCR_NV),
SR_TRAP(OP_AT_S12E1R, CGT_HCR_NV),
@@ -1184,6 +1233,11 @@ static const struct encoding_to_trap_config encoding_to_cgt[] __initconst = {
SR_TRAP(SYS_CNTP_CTL_EL0, CGT_CNTHCTL_EL1PTEN),
SR_TRAP(SYS_CNTPCT_EL0, CGT_CNTHCTL_EL1PCTEN),
SR_TRAP(SYS_CNTPCTSS_EL0, CGT_CNTHCTL_EL1PCTEN),
+ SR_TRAP(SYS_CNTV_TVAL_EL0, CGT_CNTHCTL_EL1TVT),
+ SR_TRAP(SYS_CNTV_CVAL_EL0, CGT_CNTHCTL_EL1TVT),
+ SR_TRAP(SYS_CNTV_CTL_EL0, CGT_CNTHCTL_EL1TVT),
+ SR_TRAP(SYS_CNTVCT_EL0, CGT_CNTHCTL_EL1TVCT),
+ SR_TRAP(SYS_CNTVCTSS_EL0, CGT_CNTHCTL_EL1TVCT),
SR_TRAP(SYS_FPMR, CGT_HCRX_EnFPM),
/*
* IMPDEF choice:
@@ -2345,14 +2399,14 @@ inject:
return true;
}
-static bool forward_traps(struct kvm_vcpu *vcpu, u64 control_bit)
+static bool __forward_traps(struct kvm_vcpu *vcpu, unsigned int reg, u64 control_bit)
{
bool control_bit_set;
if (!vcpu_has_nv(vcpu))
return false;
- control_bit_set = __vcpu_sys_reg(vcpu, HCR_EL2) & control_bit;
+ control_bit_set = __vcpu_sys_reg(vcpu, reg) & control_bit;
if (!is_hyp_ctxt(vcpu) && control_bit_set) {
kvm_inject_nested_sync(vcpu, kvm_vcpu_get_esr(vcpu));
return true;
@@ -2360,9 +2414,24 @@ static bool forward_traps(struct kvm_vcpu *vcpu, u64 control_bit)
return false;
}
+static bool forward_hcr_traps(struct kvm_vcpu *vcpu, u64 control_bit)
+{
+ return __forward_traps(vcpu, HCR_EL2, control_bit);
+}
+
bool forward_smc_trap(struct kvm_vcpu *vcpu)
{
- return forward_traps(vcpu, HCR_TSC);
+ return forward_hcr_traps(vcpu, HCR_TSC);
+}
+
+static bool forward_mdcr_traps(struct kvm_vcpu *vcpu, u64 control_bit)
+{
+ return __forward_traps(vcpu, MDCR_EL2, control_bit);
+}
+
+bool forward_debug_exception(struct kvm_vcpu *vcpu)
+{
+ return forward_mdcr_traps(vcpu, MDCR_EL2_TDE);
}
static u64 kvm_check_illegal_exception_return(struct kvm_vcpu *vcpu, u64 spsr)
@@ -2406,7 +2475,7 @@ void kvm_emulate_nested_eret(struct kvm_vcpu *vcpu)
* Forward this trap to the virtual EL2 if the virtual
* HCR_EL2.NV bit is set and this is coming from !EL2.
*/
- if (forward_traps(vcpu, HCR_NV))
+ if (forward_hcr_traps(vcpu, HCR_NV))
return;
spsr = vcpu_read_sys_reg(vcpu, SPSR_EL2);
diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c
index 98718bd65bf1..4d3d1a2eb157 100644
--- a/arch/arm64/kvm/fpsimd.c
+++ b/arch/arm64/kvm/fpsimd.c
@@ -65,14 +65,14 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu)
*host_data_ptr(fpsimd_state) = kern_hyp_va(&current->thread.uw.fpsimd_state);
*host_data_ptr(fpmr_ptr) = kern_hyp_va(&current->thread.uw.fpmr);
- vcpu_clear_flag(vcpu, HOST_SVE_ENABLED);
+ host_data_clear_flag(HOST_SVE_ENABLED);
if (read_sysreg(cpacr_el1) & CPACR_EL1_ZEN_EL0EN)
- vcpu_set_flag(vcpu, HOST_SVE_ENABLED);
+ host_data_set_flag(HOST_SVE_ENABLED);
if (system_supports_sme()) {
- vcpu_clear_flag(vcpu, HOST_SME_ENABLED);
+ host_data_clear_flag(HOST_SME_ENABLED);
if (read_sysreg(cpacr_el1) & CPACR_EL1_SMEN_EL0EN)
- vcpu_set_flag(vcpu, HOST_SME_ENABLED);
+ host_data_set_flag(HOST_SME_ENABLED);
/*
* If PSTATE.SM is enabled then save any pending FP
@@ -168,7 +168,7 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu)
*/
if (has_vhe() && system_supports_sme()) {
/* Also restore EL0 state seen on entry */
- if (vcpu_get_flag(vcpu, HOST_SME_ENABLED))
+ if (host_data_test_flag(HOST_SME_ENABLED))
sysreg_clear_set(CPACR_EL1, 0, CPACR_EL1_SMEN);
else
sysreg_clear_set(CPACR_EL1,
@@ -227,7 +227,7 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu)
* for EL0. To avoid spurious traps, restore the trap state
* seen by kvm_arch_vcpu_load_fp():
*/
- if (vcpu_get_flag(vcpu, HOST_SVE_ENABLED))
+ if (host_data_test_flag(HOST_SVE_ENABLED))
sysreg_clear_set(CPACR_EL1, 0, CPACR_EL1_ZEN_EL0EN);
else
sysreg_clear_set(CPACR_EL1, CPACR_EL1_ZEN_EL0EN, 0);
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index 12dad841f2a5..2196979a24a3 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -917,31 +917,24 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
struct kvm_guest_debug *dbg)
{
- int ret = 0;
-
trace_kvm_set_guest_debug(vcpu, dbg->control);
- if (dbg->control & ~KVM_GUESTDBG_VALID_MASK) {
- ret = -EINVAL;
- goto out;
- }
-
- if (dbg->control & KVM_GUESTDBG_ENABLE) {
- vcpu->guest_debug = dbg->control;
-
- /* Hardware assisted Break and Watch points */
- if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW) {
- vcpu->arch.external_debug_state = dbg->arch;
- }
+ if (dbg->control & ~KVM_GUESTDBG_VALID_MASK)
+ return -EINVAL;
- } else {
- /* If not enabled clear all flags */
+ if (!(dbg->control & KVM_GUESTDBG_ENABLE)) {
vcpu->guest_debug = 0;
- vcpu_clear_flag(vcpu, DBG_SS_ACTIVE_PENDING);
+ vcpu_clear_flag(vcpu, HOST_SS_ACTIVE_PENDING);
+ return 0;
}
-out:
- return ret;
+ vcpu->guest_debug = dbg->control;
+
+ /* Hardware assisted Break and Watch points */
+ if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW)
+ vcpu->arch.external_debug_state = dbg->arch;
+
+ return 0;
}
int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu,
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index d7c2990e7c9e..512d152233ff 100644
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -183,6 +183,9 @@ static int kvm_handle_guest_debug(struct kvm_vcpu *vcpu)
struct kvm_run *run = vcpu->run;
u64 esr = kvm_vcpu_get_esr(vcpu);
+ if (!vcpu->guest_debug && forward_debug_exception(vcpu))
+ return 1;
+
run->exit_reason = KVM_EXIT_DEBUG;
run->debug.arch.hsr = lower_32_bits(esr);
run->debug.arch.hsr_high = upper_32_bits(esr);
@@ -193,7 +196,7 @@ static int kvm_handle_guest_debug(struct kvm_vcpu *vcpu)
run->debug.arch.far = vcpu->arch.fault.far_el2;
break;
case ESR_ELx_EC_SOFTSTP_LOW:
- vcpu_clear_flag(vcpu, DBG_SS_ACTIVE_PENDING);
+ *vcpu_cpsr(vcpu) |= DBG_SPSR_SS;
break;
}
diff --git a/arch/arm64/kvm/hyp/include/hyp/debug-sr.h b/arch/arm64/kvm/hyp/include/hyp/debug-sr.h
index d00093699aaf..502a5b73ee70 100644
--- a/arch/arm64/kvm/hyp/include/hyp/debug-sr.h
+++ b/arch/arm64/kvm/hyp/include/hyp/debug-sr.h
@@ -88,15 +88,26 @@
default: write_debug(ptr[0], reg, 0); \
}
+static struct kvm_guest_debug_arch *__vcpu_debug_regs(struct kvm_vcpu *vcpu)
+{
+ switch (vcpu->arch.debug_owner) {
+ case VCPU_DEBUG_FREE:
+ WARN_ON_ONCE(1);
+ fallthrough;
+ case VCPU_DEBUG_GUEST_OWNED:
+ return &vcpu->arch.vcpu_debug_state;
+ case VCPU_DEBUG_HOST_OWNED:
+ return &vcpu->arch.external_debug_state;
+ }
+
+ return NULL;
+}
+
static void __debug_save_state(struct kvm_guest_debug_arch *dbg,
struct kvm_cpu_context *ctxt)
{
- u64 aa64dfr0;
- int brps, wrps;
-
- aa64dfr0 = read_sysreg(id_aa64dfr0_el1);
- brps = (aa64dfr0 >> 12) & 0xf;
- wrps = (aa64dfr0 >> 20) & 0xf;
+ int brps = *host_data_ptr(debug_brps);
+ int wrps = *host_data_ptr(debug_wrps);
save_debug(dbg->dbg_bcr, dbgbcr, brps);
save_debug(dbg->dbg_bvr, dbgbvr, brps);
@@ -109,13 +120,8 @@ static void __debug_save_state(struct kvm_guest_debug_arch *dbg,
static void __debug_restore_state(struct kvm_guest_debug_arch *dbg,
struct kvm_cpu_context *ctxt)
{
- u64 aa64dfr0;
- int brps, wrps;
-
- aa64dfr0 = read_sysreg(id_aa64dfr0_el1);
-
- brps = (aa64dfr0 >> 12) & 0xf;
- wrps = (aa64dfr0 >> 20) & 0xf;
+ int brps = *host_data_ptr(debug_brps);
+ int wrps = *host_data_ptr(debug_wrps);
restore_debug(dbg->dbg_bcr, dbgbcr, brps);
restore_debug(dbg->dbg_bvr, dbgbvr, brps);
@@ -132,13 +138,13 @@ static inline void __debug_switch_to_guest_common(struct kvm_vcpu *vcpu)
struct kvm_guest_debug_arch *host_dbg;
struct kvm_guest_debug_arch *guest_dbg;
- if (!vcpu_get_flag(vcpu, DEBUG_DIRTY))
+ if (!kvm_debug_regs_in_use(vcpu))
return;
host_ctxt = host_data_ptr(host_ctxt);
guest_ctxt = &vcpu->arch.ctxt;
host_dbg = host_data_ptr(host_debug_state.regs);
- guest_dbg = kern_hyp_va(vcpu->arch.debug_ptr);
+ guest_dbg = __vcpu_debug_regs(vcpu);
__debug_save_state(host_dbg, host_ctxt);
__debug_restore_state(guest_dbg, guest_ctxt);
@@ -151,18 +157,16 @@ static inline void __debug_switch_to_host_common(struct kvm_vcpu *vcpu)
struct kvm_guest_debug_arch *host_dbg;
struct kvm_guest_debug_arch *guest_dbg;
- if (!vcpu_get_flag(vcpu, DEBUG_DIRTY))
+ if (!kvm_debug_regs_in_use(vcpu))
return;
host_ctxt = host_data_ptr(host_ctxt);
guest_ctxt = &vcpu->arch.ctxt;
host_dbg = host_data_ptr(host_debug_state.regs);
- guest_dbg = kern_hyp_va(vcpu->arch.debug_ptr);
+ guest_dbg = __vcpu_debug_regs(vcpu);
__debug_save_state(guest_dbg, guest_ctxt);
__debug_restore_state(host_dbg, host_ctxt);
-
- vcpu_clear_flag(vcpu, DEBUG_DIRTY);
}
#endif /* __ARM64_KVM_HYP_DEBUG_SR_H__ */
diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h
index abfa6ad92e91..f838a45665f2 100644
--- a/arch/arm64/kvm/hyp/include/hyp/switch.h
+++ b/arch/arm64/kvm/hyp/include/hyp/switch.h
@@ -501,7 +501,12 @@ static inline bool handle_tx2_tvm(struct kvm_vcpu *vcpu)
return true;
}
-static bool kvm_hyp_handle_cntpct(struct kvm_vcpu *vcpu)
+static inline u64 compute_counter_value(struct arch_timer_context *ctxt)
+{
+ return arch_timer_read_cntpct_el0() - timer_get_offset(ctxt);
+}
+
+static bool kvm_handle_cntxct(struct kvm_vcpu *vcpu)
{
struct arch_timer_context *ctxt;
u32 sysreg;
@@ -511,18 +516,19 @@ static bool kvm_hyp_handle_cntpct(struct kvm_vcpu *vcpu)
* We only get here for 64bit guests, 32bit guests will hit
* the long and winding road all the way to the standard
* handling. Yes, it sucks to be irrelevant.
+ *
+ * Also, we only deal with non-hypervisor context here (either
+ * an EL1 guest, or a non-HYP context of an EL2 guest).
*/
+ if (is_hyp_ctxt(vcpu))
+ return false;
+
sysreg = esr_sys64_to_sysreg(kvm_vcpu_get_esr(vcpu));
switch (sysreg) {
case SYS_CNTPCT_EL0:
case SYS_CNTPCTSS_EL0:
if (vcpu_has_nv(vcpu)) {
- if (is_hyp_ctxt(vcpu)) {
- ctxt = vcpu_hptimer(vcpu);
- break;
- }
-
/* Check for guest hypervisor trapping */
val = __vcpu_sys_reg(vcpu, CNTHCTL_EL2);
if (!vcpu_el2_e2h_is_set(vcpu))
@@ -534,16 +540,23 @@ static bool kvm_hyp_handle_cntpct(struct kvm_vcpu *vcpu)
ctxt = vcpu_ptimer(vcpu);
break;
+ case SYS_CNTVCT_EL0:
+ case SYS_CNTVCTSS_EL0:
+ if (vcpu_has_nv(vcpu)) {
+ /* Check for guest hypervisor trapping */
+ val = __vcpu_sys_reg(vcpu, CNTHCTL_EL2);
+
+ if (val & CNTHCTL_EL1TVCT)
+ return false;
+ }
+
+ ctxt = vcpu_vtimer(vcpu);
+ break;
default:
return false;
}
- val = arch_timer_read_cntpct_el0();
-
- if (ctxt->offset.vm_offset)
- val -= *kern_hyp_va(ctxt->offset.vm_offset);
- if (ctxt->offset.vcpu_offset)
- val -= *kern_hyp_va(ctxt->offset.vcpu_offset);
+ val = compute_counter_value(ctxt);
vcpu_set_reg(vcpu, kvm_vcpu_sys_get_rt(vcpu), val);
__kvm_skip_instr(vcpu);
@@ -588,7 +601,7 @@ static bool kvm_hyp_handle_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code)
__vgic_v3_perform_cpuif_access(vcpu) == 1)
return true;
- if (kvm_hyp_handle_cntpct(vcpu))
+ if (kvm_handle_cntxct(vcpu))
return true;
return false;
diff --git a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
index a651c43ad679..76ff095c6b6e 100644
--- a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
+++ b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
@@ -18,9 +18,34 @@
static inline bool ctxt_has_s1poe(struct kvm_cpu_context *ctxt);
+static inline struct kvm_vcpu *ctxt_to_vcpu(struct kvm_cpu_context *ctxt)
+{
+ struct kvm_vcpu *vcpu = ctxt->__hyp_running_vcpu;
+
+ if (!vcpu)
+ vcpu = container_of(ctxt, struct kvm_vcpu, arch.ctxt);
+
+ return vcpu;
+}
+
+static inline bool ctxt_is_guest(struct kvm_cpu_context *ctxt)
+{
+ return host_data_ptr(host_ctxt) != ctxt;
+}
+
+static inline u64 *ctxt_mdscr_el1(struct kvm_cpu_context *ctxt)
+{
+ struct kvm_vcpu *vcpu = ctxt_to_vcpu(ctxt);
+
+ if (ctxt_is_guest(ctxt) && kvm_host_owns_debug_regs(vcpu))
+ return &vcpu->arch.external_mdscr_el1;
+
+ return &ctxt_sys_reg(ctxt, MDSCR_EL1);
+}
+
static inline void __sysreg_save_common_state(struct kvm_cpu_context *ctxt)
{
- ctxt_sys_reg(ctxt, MDSCR_EL1) = read_sysreg(mdscr_el1);
+ *ctxt_mdscr_el1(ctxt) = read_sysreg(mdscr_el1);
// POR_EL0 can affect uaccess, so must be saved/restored early.
if (ctxt_has_s1poe(ctxt))
@@ -33,16 +58,6 @@ static inline void __sysreg_save_user_state(struct kvm_cpu_context *ctxt)
ctxt_sys_reg(ctxt, TPIDRRO_EL0) = read_sysreg(tpidrro_el0);
}
-static inline struct kvm_vcpu *ctxt_to_vcpu(struct kvm_cpu_context *ctxt)
-{
- struct kvm_vcpu *vcpu = ctxt->__hyp_running_vcpu;
-
- if (!vcpu)
- vcpu = container_of(ctxt, struct kvm_vcpu, arch.ctxt);
-
- return vcpu;
-}
-
static inline bool ctxt_has_mte(struct kvm_cpu_context *ctxt)
{
struct kvm_vcpu *vcpu = ctxt_to_vcpu(ctxt);
@@ -139,7 +154,7 @@ static inline void __sysreg_save_el2_return_state(struct kvm_cpu_context *ctxt)
static inline void __sysreg_restore_common_state(struct kvm_cpu_context *ctxt)
{
- write_sysreg(ctxt_sys_reg(ctxt, MDSCR_EL1), mdscr_el1);
+ write_sysreg(*ctxt_mdscr_el1(ctxt), mdscr_el1);
// POR_EL0 can affect uaccess, so must be saved/restored early.
if (ctxt_has_s1poe(ctxt))
@@ -283,7 +298,7 @@ static inline void __sysreg32_save_state(struct kvm_vcpu *vcpu)
__vcpu_sys_reg(vcpu, DACR32_EL2) = read_sysreg(dacr32_el2);
__vcpu_sys_reg(vcpu, IFSR32_EL2) = read_sysreg(ifsr32_el2);
- if (has_vhe() || vcpu_get_flag(vcpu, DEBUG_DIRTY))
+ if (has_vhe() || kvm_debug_regs_in_use(vcpu))
__vcpu_sys_reg(vcpu, DBGVCR32_EL2) = read_sysreg(dbgvcr32_el2);
}
@@ -300,7 +315,7 @@ static inline void __sysreg32_restore_state(struct kvm_vcpu *vcpu)
write_sysreg(__vcpu_sys_reg(vcpu, DACR32_EL2), dacr32_el2);
write_sysreg(__vcpu_sys_reg(vcpu, IFSR32_EL2), ifsr32_el2);
- if (has_vhe() || vcpu_get_flag(vcpu, DEBUG_DIRTY))
+ if (has_vhe() || kvm_debug_regs_in_use(vcpu))
write_sysreg(__vcpu_sys_reg(vcpu, DBGVCR32_EL2), dbgvcr32_el2);
}
diff --git a/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h b/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h
deleted file mode 100644
index f957890c7e38..000000000000
--- a/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h
+++ /dev/null
@@ -1,223 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2021 Google LLC
- * Author: Fuad Tabba <tabba@google.com>
- */
-
-#ifndef __ARM64_KVM_FIXED_CONFIG_H__
-#define __ARM64_KVM_FIXED_CONFIG_H__
-
-#include <asm/sysreg.h>
-
-/*
- * This file contains definitions for features to be allowed or restricted for
- * guest virtual machines, depending on the mode KVM is running in and on the
- * type of guest that is running.
- *
- * The ALLOW masks represent a bitmask of feature fields that are allowed
- * without any restrictions as long as they are supported by the system.
- *
- * The RESTRICT_UNSIGNED masks, if present, represent unsigned fields for
- * features that are restricted to support at most the specified feature.
- *
- * If a feature field is not present in either, than it is not supported.
- *
- * The approach taken for protected VMs is to allow features that are:
- * - Needed by common Linux distributions (e.g., floating point)
- * - Trivial to support, e.g., supporting the feature does not introduce or
- * require tracking of additional state in KVM
- * - Cannot be trapped or prevent the guest from using anyway
- */
-
-/*
- * Allow for protected VMs:
- * - Floating-point and Advanced SIMD
- * - Data Independent Timing
- * - Spectre/Meltdown Mitigation
- */
-#define PVM_ID_AA64PFR0_ALLOW (\
- ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_FP) | \
- ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_AdvSIMD) | \
- ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_DIT) | \
- ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV2) | \
- ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV3) \
- )
-
-/*
- * Restrict to the following *unsigned* features for protected VMs:
- * - AArch64 guests only (no support for AArch32 guests):
- * AArch32 adds complexity in trap handling, emulation, condition codes,
- * etc...
- * - RAS (v1)
- * Supported by KVM
- */
-#define PVM_ID_AA64PFR0_RESTRICT_UNSIGNED (\
- SYS_FIELD_PREP_ENUM(ID_AA64PFR0_EL1, EL0, IMP) | \
- SYS_FIELD_PREP_ENUM(ID_AA64PFR0_EL1, EL1, IMP) | \
- SYS_FIELD_PREP_ENUM(ID_AA64PFR0_EL1, EL2, IMP) | \
- SYS_FIELD_PREP_ENUM(ID_AA64PFR0_EL1, EL3, IMP) | \
- SYS_FIELD_PREP_ENUM(ID_AA64PFR0_EL1, RAS, IMP) \
- )
-
-/*
- * Allow for protected VMs:
- * - Branch Target Identification
- * - Speculative Store Bypassing
- */
-#define PVM_ID_AA64PFR1_ALLOW (\
- ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_BT) | \
- ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_SSBS) \
- )
-
-#define PVM_ID_AA64PFR2_ALLOW 0ULL
-
-/*
- * Allow for protected VMs:
- * - Mixed-endian
- * - Distinction between Secure and Non-secure Memory
- * - Mixed-endian at EL0 only
- * - Non-context synchronizing exception entry and exit
- */
-#define PVM_ID_AA64MMFR0_ALLOW (\
- ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_BIGEND) | \
- ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_SNSMEM) | \
- ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_BIGENDEL0) | \
- ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_EXS) \
- )
-
-/*
- * Restrict to the following *unsigned* features for protected VMs:
- * - 40-bit IPA
- * - 16-bit ASID
- */
-#define PVM_ID_AA64MMFR0_RESTRICT_UNSIGNED (\
- FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_PARANGE), ID_AA64MMFR0_EL1_PARANGE_40) | \
- FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_ASIDBITS), ID_AA64MMFR0_EL1_ASIDBITS_16) \
- )
-
-/*
- * Allow for protected VMs:
- * - Hardware translation table updates to Access flag and Dirty state
- * - Number of VMID bits from CPU
- * - Hierarchical Permission Disables
- * - Privileged Access Never
- * - SError interrupt exceptions from speculative reads
- * - Enhanced Translation Synchronization
- * - Control for cache maintenance permission
- */
-#define PVM_ID_AA64MMFR1_ALLOW (\
- ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_HAFDBS) | \
- ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_VMIDBits) | \
- ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_HPDS) | \
- ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_PAN) | \
- ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_SpecSEI) | \
- ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_ETS) | \
- ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_CMOW) \
- )
-
-/*
- * Allow for protected VMs:
- * - Common not Private translations
- * - User Access Override
- * - IESB bit in the SCTLR_ELx registers
- * - Unaligned single-copy atomicity and atomic functions
- * - ESR_ELx.EC value on an exception by read access to feature ID space
- * - TTL field in address operations.
- * - Break-before-make sequences when changing translation block size
- * - E0PDx mechanism
- */
-#define PVM_ID_AA64MMFR2_ALLOW (\
- ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_CnP) | \
- ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_UAO) | \
- ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_IESB) | \
- ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_AT) | \
- ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_IDS) | \
- ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_TTL) | \
- ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_BBM) | \
- ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_E0PD) \
- )
-
-#define PVM_ID_AA64MMFR3_ALLOW (0ULL)
-
-/*
- * No support for Scalable Vectors for protected VMs:
- * Requires additional support from KVM, e.g., context-switching and
- * trapping at EL2
- */
-#define PVM_ID_AA64ZFR0_ALLOW (0ULL)
-
-/*
- * No support for debug, including breakpoints, and watchpoints for protected
- * VMs:
- * The Arm architecture mandates support for at least the Armv8 debug
- * architecture, which would include at least 2 hardware breakpoints and
- * watchpoints. Providing that support to protected guests adds
- * considerable state and complexity. Therefore, the reserved value of 0 is
- * used for debug-related fields.
- */
-#define PVM_ID_AA64DFR0_ALLOW (0ULL)
-#define PVM_ID_AA64DFR1_ALLOW (0ULL)
-
-/*
- * No support for implementation defined features.
- */
-#define PVM_ID_AA64AFR0_ALLOW (0ULL)
-#define PVM_ID_AA64AFR1_ALLOW (0ULL)
-
-/*
- * No restrictions on instructions implemented in AArch64.
- */
-#define PVM_ID_AA64ISAR0_ALLOW (\
- ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_AES) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_SHA1) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_SHA2) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_CRC32) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_ATOMIC) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_RDM) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_SHA3) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_SM3) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_SM4) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_DP) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_FHM) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_TS) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_TLB) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_RNDR) \
- )
-
-/* Restrict pointer authentication to the basic version. */
-#define PVM_ID_AA64ISAR1_RESTRICT_UNSIGNED (\
- FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_APA), ID_AA64ISAR1_EL1_APA_PAuth) | \
- FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_API), ID_AA64ISAR1_EL1_API_PAuth) \
- )
-
-#define PVM_ID_AA64ISAR2_RESTRICT_UNSIGNED (\
- FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_APA3), ID_AA64ISAR2_EL1_APA3_PAuth) \
- )
-
-#define PVM_ID_AA64ISAR1_ALLOW (\
- ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_DPB) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_JSCVT) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_FCMA) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_LRCPC) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_GPA) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_GPI) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_FRINTTS) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_SB) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_SPECRES) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_BF16) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_DGH) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_I8MM) \
- )
-
-#define PVM_ID_AA64ISAR2_ALLOW (\
- ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_ATS1A)| \
- ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_GPA3) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_MOPS) \
- )
-
-u64 pvm_read_id_reg(const struct kvm_vcpu *vcpu, u32 id);
-bool kvm_handle_pvm_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code);
-bool kvm_handle_pvm_restricted(struct kvm_vcpu *vcpu, u64 *exit_code);
-int kvm_check_pvm_sysreg_table(void);
-
-#endif /* __ARM64_KVM_FIXED_CONFIG_H__ */
diff --git a/arch/arm64/kvm/hyp/include/nvhe/gfp.h b/arch/arm64/kvm/hyp/include/nvhe/gfp.h
index 97c527ef53c2..3766333bace9 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/gfp.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/gfp.h
@@ -7,7 +7,7 @@
#include <nvhe/memory.h>
#include <nvhe/spinlock.h>
-#define HYP_NO_ORDER USHRT_MAX
+#define HYP_NO_ORDER ((u8)(~0))
struct hyp_pool {
/*
@@ -19,11 +19,11 @@ struct hyp_pool {
struct list_head free_area[NR_PAGE_ORDERS];
phys_addr_t range_start;
phys_addr_t range_end;
- unsigned short max_order;
+ u8 max_order;
};
/* Allocation */
-void *hyp_alloc_pages(struct hyp_pool *pool, unsigned short order);
+void *hyp_alloc_pages(struct hyp_pool *pool, u8 order);
void hyp_split_page(struct hyp_page *page);
void hyp_get_page(struct hyp_pool *pool, void *addr);
void hyp_put_page(struct hyp_pool *pool, void *addr);
diff --git a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
index 0972faccc2af..978f38c386ee 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
@@ -11,40 +11,10 @@
#include <asm/kvm_mmu.h>
#include <asm/kvm_pgtable.h>
#include <asm/virt.h>
+#include <nvhe/memory.h>
#include <nvhe/pkvm.h>
#include <nvhe/spinlock.h>
-/*
- * SW bits 0-1 are reserved to track the memory ownership state of each page:
- * 00: The page is owned exclusively by the page-table owner.
- * 01: The page is owned by the page-table owner, but is shared
- * with another entity.
- * 10: The page is shared with, but not owned by the page-table owner.
- * 11: Reserved for future use (lending).
- */
-enum pkvm_page_state {
- PKVM_PAGE_OWNED = 0ULL,
- PKVM_PAGE_SHARED_OWNED = KVM_PGTABLE_PROT_SW0,
- PKVM_PAGE_SHARED_BORROWED = KVM_PGTABLE_PROT_SW1,
- __PKVM_PAGE_RESERVED = KVM_PGTABLE_PROT_SW0 |
- KVM_PGTABLE_PROT_SW1,
-
- /* Meta-states which aren't encoded directly in the PTE's SW bits */
- PKVM_NOPAGE,
-};
-
-#define PKVM_PAGE_STATE_PROT_MASK (KVM_PGTABLE_PROT_SW0 | KVM_PGTABLE_PROT_SW1)
-static inline enum kvm_pgtable_prot pkvm_mkstate(enum kvm_pgtable_prot prot,
- enum pkvm_page_state state)
-{
- return (prot & ~PKVM_PAGE_STATE_PROT_MASK) | state;
-}
-
-static inline enum pkvm_page_state pkvm_getstate(enum kvm_pgtable_prot prot)
-{
- return prot & PKVM_PAGE_STATE_PROT_MASK;
-}
-
struct host_mmu {
struct kvm_arch arch;
struct kvm_pgtable pgt;
@@ -69,6 +39,13 @@ int __pkvm_host_donate_hyp(u64 pfn, u64 nr_pages);
int __pkvm_hyp_donate_host(u64 pfn, u64 nr_pages);
int __pkvm_host_share_ffa(u64 pfn, u64 nr_pages);
int __pkvm_host_unshare_ffa(u64 pfn, u64 nr_pages);
+int __pkvm_host_share_guest(u64 pfn, u64 gfn, struct pkvm_hyp_vcpu *vcpu,
+ enum kvm_pgtable_prot prot);
+int __pkvm_host_unshare_guest(u64 gfn, struct pkvm_hyp_vm *hyp_vm);
+int __pkvm_host_relax_perms_guest(u64 gfn, struct pkvm_hyp_vcpu *vcpu, enum kvm_pgtable_prot prot);
+int __pkvm_host_wrprotect_guest(u64 gfn, struct pkvm_hyp_vm *hyp_vm);
+int __pkvm_host_test_clear_young_guest(u64 gfn, bool mkold, struct pkvm_hyp_vm *vm);
+int __pkvm_host_mkyoung_guest(u64 gfn, struct pkvm_hyp_vcpu *vcpu);
bool addr_is_memory(phys_addr_t phys);
int host_stage2_idmap_locked(phys_addr_t addr, u64 size, enum kvm_pgtable_prot prot);
diff --git a/arch/arm64/kvm/hyp/include/nvhe/memory.h b/arch/arm64/kvm/hyp/include/nvhe/memory.h
index ab205c4d6774..34233d586060 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/memory.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/memory.h
@@ -7,9 +7,47 @@
#include <linux/types.h>
+/*
+ * Bits 0-1 are reserved to track the memory ownership state of each page:
+ * 00: The page is owned exclusively by the page-table owner.
+ * 01: The page is owned by the page-table owner, but is shared
+ * with another entity.
+ * 10: The page is shared with, but not owned by the page-table owner.
+ * 11: Reserved for future use (lending).
+ */
+enum pkvm_page_state {
+ PKVM_PAGE_OWNED = 0ULL,
+ PKVM_PAGE_SHARED_OWNED = BIT(0),
+ PKVM_PAGE_SHARED_BORROWED = BIT(1),
+ __PKVM_PAGE_RESERVED = BIT(0) | BIT(1),
+
+ /* Meta-states which aren't encoded directly in the PTE's SW bits */
+ PKVM_NOPAGE = BIT(2),
+};
+#define PKVM_PAGE_META_STATES_MASK (~__PKVM_PAGE_RESERVED)
+
+#define PKVM_PAGE_STATE_PROT_MASK (KVM_PGTABLE_PROT_SW0 | KVM_PGTABLE_PROT_SW1)
+static inline enum kvm_pgtable_prot pkvm_mkstate(enum kvm_pgtable_prot prot,
+ enum pkvm_page_state state)
+{
+ prot &= ~PKVM_PAGE_STATE_PROT_MASK;
+ prot |= FIELD_PREP(PKVM_PAGE_STATE_PROT_MASK, state);
+ return prot;
+}
+
+static inline enum pkvm_page_state pkvm_getstate(enum kvm_pgtable_prot prot)
+{
+ return FIELD_GET(PKVM_PAGE_STATE_PROT_MASK, prot);
+}
+
struct hyp_page {
- unsigned short refcount;
- unsigned short order;
+ u16 refcount;
+ u8 order;
+
+ /* Host (non-meta) state. Guarded by the host stage-2 lock. */
+ enum pkvm_page_state host_state : 8;
+
+ u32 host_share_guest_count;
};
extern u64 __hyp_vmemmap;
@@ -29,7 +67,13 @@ static inline phys_addr_t hyp_virt_to_phys(void *addr)
#define hyp_phys_to_pfn(phys) ((phys) >> PAGE_SHIFT)
#define hyp_pfn_to_phys(pfn) ((phys_addr_t)((pfn) << PAGE_SHIFT))
-#define hyp_phys_to_page(phys) (&hyp_vmemmap[hyp_phys_to_pfn(phys)])
+
+static inline struct hyp_page *hyp_phys_to_page(phys_addr_t phys)
+{
+ BUILD_BUG_ON(sizeof(struct hyp_page) != sizeof(u64));
+ return &hyp_vmemmap[hyp_phys_to_pfn(phys)];
+}
+
#define hyp_virt_to_page(virt) hyp_phys_to_page(__hyp_pa(virt))
#define hyp_virt_to_pfn(virt) hyp_phys_to_pfn(__hyp_pa(virt))
diff --git a/arch/arm64/kvm/hyp/include/nvhe/pkvm.h b/arch/arm64/kvm/hyp/include/nvhe/pkvm.h
index 24a9a8330d19..e42bf68c8848 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/pkvm.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/pkvm.h
@@ -20,6 +20,12 @@ struct pkvm_hyp_vcpu {
/* Backpointer to the host's (untrusted) vCPU instance. */
struct kvm_vcpu *host_vcpu;
+
+ /*
+ * If this hyp vCPU is loaded, then this is a backpointer to the
+ * per-cpu pointer tracking us. Otherwise, NULL if not loaded.
+ */
+ struct pkvm_hyp_vcpu **loaded_hyp_vcpu;
};
/*
@@ -47,6 +53,8 @@ struct pkvm_hyp_vm {
struct pkvm_hyp_vcpu *vcpus[];
};
+extern hyp_spinlock_t vm_table_lock;
+
static inline struct pkvm_hyp_vm *
pkvm_hyp_vcpu_to_hyp_vm(struct pkvm_hyp_vcpu *hyp_vcpu)
{
@@ -58,6 +66,11 @@ static inline bool pkvm_hyp_vcpu_is_protected(struct pkvm_hyp_vcpu *hyp_vcpu)
return vcpu_is_protected(&hyp_vcpu->vcpu);
}
+static inline bool pkvm_hyp_vm_is_protected(struct pkvm_hyp_vm *hyp_vm)
+{
+ return kvm_vm_is_protected(&hyp_vm->kvm);
+}
+
void pkvm_hyp_vm_table_init(void *tbl);
int __pkvm_init_vm(struct kvm *host_kvm, unsigned long vm_hva,
@@ -69,5 +82,15 @@ int __pkvm_teardown_vm(pkvm_handle_t handle);
struct pkvm_hyp_vcpu *pkvm_load_hyp_vcpu(pkvm_handle_t handle,
unsigned int vcpu_idx);
void pkvm_put_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu);
+struct pkvm_hyp_vcpu *pkvm_get_loaded_hyp_vcpu(void);
+
+struct pkvm_hyp_vm *get_pkvm_hyp_vm(pkvm_handle_t handle);
+struct pkvm_hyp_vm *get_np_pkvm_hyp_vm(pkvm_handle_t handle);
+void put_pkvm_hyp_vm(struct pkvm_hyp_vm *hyp_vm);
+
+bool kvm_handle_pvm_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code);
+bool kvm_handle_pvm_restricted(struct kvm_vcpu *vcpu, u64 *exit_code);
+void kvm_init_pvm_id_regs(struct kvm_vcpu *vcpu);
+int kvm_check_pvm_sysreg_table(void);
#endif /* __ARM64_KVM_NVHE_PKVM_H__ */
diff --git a/arch/arm64/kvm/hyp/nvhe/debug-sr.c b/arch/arm64/kvm/hyp/nvhe/debug-sr.c
index 53efda0235cf..2f4a4f5036bb 100644
--- a/arch/arm64/kvm/hyp/nvhe/debug-sr.c
+++ b/arch/arm64/kvm/hyp/nvhe/debug-sr.c
@@ -51,42 +51,55 @@ static void __debug_restore_spe(u64 pmscr_el1)
write_sysreg_el1(pmscr_el1, SYS_PMSCR);
}
-static void __debug_save_trace(u64 *trfcr_el1)
+static void __trace_do_switch(u64 *saved_trfcr, u64 new_trfcr)
{
- *trfcr_el1 = 0;
+ *saved_trfcr = read_sysreg_el1(SYS_TRFCR);
+ write_sysreg_el1(new_trfcr, SYS_TRFCR);
+}
- /* Check if the TRBE is enabled */
- if (!(read_sysreg_s(SYS_TRBLIMITR_EL1) & TRBLIMITR_EL1_E))
- return;
- /*
- * Prohibit trace generation while we are in guest.
- * Since access to TRFCR_EL1 is trapped, the guest can't
- * modify the filtering set by the host.
- */
- *trfcr_el1 = read_sysreg_el1(SYS_TRFCR);
- write_sysreg_el1(0, SYS_TRFCR);
- isb();
- /* Drain the trace buffer to memory */
- tsb_csync();
+static bool __trace_needs_drain(void)
+{
+ if (is_protected_kvm_enabled() && host_data_test_flag(HAS_TRBE))
+ return read_sysreg_s(SYS_TRBLIMITR_EL1) & TRBLIMITR_EL1_E;
+
+ return host_data_test_flag(TRBE_ENABLED);
}
-static void __debug_restore_trace(u64 trfcr_el1)
+static bool __trace_needs_switch(void)
{
- if (!trfcr_el1)
- return;
+ return host_data_test_flag(TRBE_ENABLED) ||
+ host_data_test_flag(EL1_TRACING_CONFIGURED);
+}
- /* Restore trace filter controls */
- write_sysreg_el1(trfcr_el1, SYS_TRFCR);
+static void __trace_switch_to_guest(void)
+{
+ /* Unsupported with TRBE so disable */
+ if (host_data_test_flag(TRBE_ENABLED))
+ *host_data_ptr(trfcr_while_in_guest) = 0;
+
+ __trace_do_switch(host_data_ptr(host_debug_state.trfcr_el1),
+ *host_data_ptr(trfcr_while_in_guest));
+
+ if (__trace_needs_drain()) {
+ isb();
+ tsb_csync();
+ }
+}
+
+static void __trace_switch_to_host(void)
+{
+ __trace_do_switch(host_data_ptr(trfcr_while_in_guest),
+ *host_data_ptr(host_debug_state.trfcr_el1));
}
void __debug_save_host_buffers_nvhe(struct kvm_vcpu *vcpu)
{
/* Disable and flush SPE data generation */
- if (vcpu_get_flag(vcpu, DEBUG_STATE_SAVE_SPE))
+ if (host_data_test_flag(HAS_SPE))
__debug_save_spe(host_data_ptr(host_debug_state.pmscr_el1));
- /* Disable and flush Self-Hosted Trace generation */
- if (vcpu_get_flag(vcpu, DEBUG_STATE_SAVE_TRBE))
- __debug_save_trace(host_data_ptr(host_debug_state.trfcr_el1));
+
+ if (__trace_needs_switch())
+ __trace_switch_to_guest();
}
void __debug_switch_to_guest(struct kvm_vcpu *vcpu)
@@ -96,18 +109,13 @@ void __debug_switch_to_guest(struct kvm_vcpu *vcpu)
void __debug_restore_host_buffers_nvhe(struct kvm_vcpu *vcpu)
{
- if (vcpu_get_flag(vcpu, DEBUG_STATE_SAVE_SPE))
+ if (host_data_test_flag(HAS_SPE))
__debug_restore_spe(*host_data_ptr(host_debug_state.pmscr_el1));
- if (vcpu_get_flag(vcpu, DEBUG_STATE_SAVE_TRBE))
- __debug_restore_trace(*host_data_ptr(host_debug_state.trfcr_el1));
+ if (__trace_needs_switch())
+ __trace_switch_to_host();
}
void __debug_switch_to_host(struct kvm_vcpu *vcpu)
{
__debug_switch_to_host_common(vcpu);
}
-
-u64 __kvm_get_mdcr_el2(void)
-{
- return read_sysreg(mdcr_el2);
-}
diff --git a/arch/arm64/kvm/hyp/nvhe/host.S b/arch/arm64/kvm/hyp/nvhe/host.S
index 3d610fc51f4d..58f0cb2298cc 100644
--- a/arch/arm64/kvm/hyp/nvhe/host.S
+++ b/arch/arm64/kvm/hyp/nvhe/host.S
@@ -188,12 +188,12 @@ SYM_FUNC_END(__host_hvc)
/*
* Test whether the SP has overflowed, without corrupting a GPR.
- * nVHE hypervisor stacks are aligned so that the PAGE_SHIFT bit
+ * nVHE hypervisor stacks are aligned so that the NVHE_STACK_SHIFT bit
* of SP should always be 1.
*/
add sp, sp, x0 // sp' = sp + x0
sub x0, sp, x0 // x0' = sp' - x0 = (sp + x0) - x0 = sp
- tbz x0, #PAGE_SHIFT, .L__hyp_sp_overflow\@
+ tbz x0, #NVHE_STACK_SHIFT, .L__hyp_sp_overflow\@
sub x0, sp, x0 // x0'' = sp' - x0' = (sp + x0) - sp = x0
sub sp, sp, x0 // sp'' = sp' - x0 = (sp + x0) - x0 = sp
diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
index 6c90ef6736d6..5c134520e180 100644
--- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c
+++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
@@ -103,8 +103,6 @@ static void flush_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu)
/* Limit guest vector length to the maximum supported by the host. */
hyp_vcpu->vcpu.arch.sve_max_vl = min(host_vcpu->arch.sve_max_vl, kvm_host_sve_max_vl);
- hyp_vcpu->vcpu.arch.hw_mmu = host_vcpu->arch.hw_mmu;
-
hyp_vcpu->vcpu.arch.mdcr_el2 = host_vcpu->arch.mdcr_el2;
hyp_vcpu->vcpu.arch.hcr_el2 &= ~(HCR_TWI | HCR_TWE);
hyp_vcpu->vcpu.arch.hcr_el2 |= READ_ONCE(host_vcpu->arch.hcr_el2) &
@@ -112,8 +110,6 @@ static void flush_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu)
hyp_vcpu->vcpu.arch.iflags = host_vcpu->arch.iflags;
- hyp_vcpu->vcpu.arch.debug_ptr = kern_hyp_va(host_vcpu->arch.debug_ptr);
-
hyp_vcpu->vcpu.arch.vsesr_el2 = host_vcpu->arch.vsesr_el2;
hyp_vcpu->vcpu.arch.vgic_cpu.vgic_v3 = host_vcpu->arch.vgic_cpu.vgic_v3;
@@ -141,16 +137,46 @@ static void sync_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu)
host_cpu_if->vgic_lr[i] = hyp_cpu_if->vgic_lr[i];
}
+static void handle___pkvm_vcpu_load(struct kvm_cpu_context *host_ctxt)
+{
+ DECLARE_REG(pkvm_handle_t, handle, host_ctxt, 1);
+ DECLARE_REG(unsigned int, vcpu_idx, host_ctxt, 2);
+ DECLARE_REG(u64, hcr_el2, host_ctxt, 3);
+ struct pkvm_hyp_vcpu *hyp_vcpu;
+
+ if (!is_protected_kvm_enabled())
+ return;
+
+ hyp_vcpu = pkvm_load_hyp_vcpu(handle, vcpu_idx);
+ if (!hyp_vcpu)
+ return;
+
+ if (pkvm_hyp_vcpu_is_protected(hyp_vcpu)) {
+ /* Propagate WFx trapping flags */
+ hyp_vcpu->vcpu.arch.hcr_el2 &= ~(HCR_TWE | HCR_TWI);
+ hyp_vcpu->vcpu.arch.hcr_el2 |= hcr_el2 & (HCR_TWE | HCR_TWI);
+ }
+}
+
+static void handle___pkvm_vcpu_put(struct kvm_cpu_context *host_ctxt)
+{
+ struct pkvm_hyp_vcpu *hyp_vcpu;
+
+ if (!is_protected_kvm_enabled())
+ return;
+
+ hyp_vcpu = pkvm_get_loaded_hyp_vcpu();
+ if (hyp_vcpu)
+ pkvm_put_hyp_vcpu(hyp_vcpu);
+}
+
static void handle___kvm_vcpu_run(struct kvm_cpu_context *host_ctxt)
{
DECLARE_REG(struct kvm_vcpu *, host_vcpu, host_ctxt, 1);
int ret;
- host_vcpu = kern_hyp_va(host_vcpu);
-
if (unlikely(is_protected_kvm_enabled())) {
- struct pkvm_hyp_vcpu *hyp_vcpu;
- struct kvm *host_kvm;
+ struct pkvm_hyp_vcpu *hyp_vcpu = pkvm_get_loaded_hyp_vcpu();
/*
* KVM (and pKVM) doesn't support SME guests for now, and
@@ -163,9 +189,6 @@ static void handle___kvm_vcpu_run(struct kvm_cpu_context *host_ctxt)
goto out;
}
- host_kvm = kern_hyp_va(host_vcpu->kvm);
- hyp_vcpu = pkvm_load_hyp_vcpu(host_kvm->arch.pkvm.handle,
- host_vcpu->vcpu_idx);
if (!hyp_vcpu) {
ret = -EINVAL;
goto out;
@@ -176,12 +199,141 @@ static void handle___kvm_vcpu_run(struct kvm_cpu_context *host_ctxt)
ret = __kvm_vcpu_run(&hyp_vcpu->vcpu);
sync_hyp_vcpu(hyp_vcpu);
- pkvm_put_hyp_vcpu(hyp_vcpu);
} else {
/* The host is fully trusted, run its vCPU directly. */
- ret = __kvm_vcpu_run(host_vcpu);
+ ret = __kvm_vcpu_run(kern_hyp_va(host_vcpu));
}
+out:
+ cpu_reg(host_ctxt, 1) = ret;
+}
+
+static int pkvm_refill_memcache(struct pkvm_hyp_vcpu *hyp_vcpu)
+{
+ struct kvm_vcpu *host_vcpu = hyp_vcpu->host_vcpu;
+
+ return refill_memcache(&hyp_vcpu->vcpu.arch.pkvm_memcache,
+ host_vcpu->arch.pkvm_memcache.nr_pages,
+ &host_vcpu->arch.pkvm_memcache);
+}
+
+static void handle___pkvm_host_share_guest(struct kvm_cpu_context *host_ctxt)
+{
+ DECLARE_REG(u64, pfn, host_ctxt, 1);
+ DECLARE_REG(u64, gfn, host_ctxt, 2);
+ DECLARE_REG(enum kvm_pgtable_prot, prot, host_ctxt, 3);
+ struct pkvm_hyp_vcpu *hyp_vcpu;
+ int ret = -EINVAL;
+ if (!is_protected_kvm_enabled())
+ goto out;
+
+ hyp_vcpu = pkvm_get_loaded_hyp_vcpu();
+ if (!hyp_vcpu || pkvm_hyp_vcpu_is_protected(hyp_vcpu))
+ goto out;
+
+ ret = pkvm_refill_memcache(hyp_vcpu);
+ if (ret)
+ goto out;
+
+ ret = __pkvm_host_share_guest(pfn, gfn, hyp_vcpu, prot);
+out:
+ cpu_reg(host_ctxt, 1) = ret;
+}
+
+static void handle___pkvm_host_unshare_guest(struct kvm_cpu_context *host_ctxt)
+{
+ DECLARE_REG(pkvm_handle_t, handle, host_ctxt, 1);
+ DECLARE_REG(u64, gfn, host_ctxt, 2);
+ struct pkvm_hyp_vm *hyp_vm;
+ int ret = -EINVAL;
+
+ if (!is_protected_kvm_enabled())
+ goto out;
+
+ hyp_vm = get_np_pkvm_hyp_vm(handle);
+ if (!hyp_vm)
+ goto out;
+
+ ret = __pkvm_host_unshare_guest(gfn, hyp_vm);
+ put_pkvm_hyp_vm(hyp_vm);
+out:
+ cpu_reg(host_ctxt, 1) = ret;
+}
+
+static void handle___pkvm_host_relax_perms_guest(struct kvm_cpu_context *host_ctxt)
+{
+ DECLARE_REG(u64, gfn, host_ctxt, 1);
+ DECLARE_REG(enum kvm_pgtable_prot, prot, host_ctxt, 2);
+ struct pkvm_hyp_vcpu *hyp_vcpu;
+ int ret = -EINVAL;
+
+ if (!is_protected_kvm_enabled())
+ goto out;
+
+ hyp_vcpu = pkvm_get_loaded_hyp_vcpu();
+ if (!hyp_vcpu || pkvm_hyp_vcpu_is_protected(hyp_vcpu))
+ goto out;
+
+ ret = __pkvm_host_relax_perms_guest(gfn, hyp_vcpu, prot);
+out:
+ cpu_reg(host_ctxt, 1) = ret;
+}
+
+static void handle___pkvm_host_wrprotect_guest(struct kvm_cpu_context *host_ctxt)
+{
+ DECLARE_REG(pkvm_handle_t, handle, host_ctxt, 1);
+ DECLARE_REG(u64, gfn, host_ctxt, 2);
+ struct pkvm_hyp_vm *hyp_vm;
+ int ret = -EINVAL;
+
+ if (!is_protected_kvm_enabled())
+ goto out;
+
+ hyp_vm = get_np_pkvm_hyp_vm(handle);
+ if (!hyp_vm)
+ goto out;
+
+ ret = __pkvm_host_wrprotect_guest(gfn, hyp_vm);
+ put_pkvm_hyp_vm(hyp_vm);
+out:
+ cpu_reg(host_ctxt, 1) = ret;
+}
+
+static void handle___pkvm_host_test_clear_young_guest(struct kvm_cpu_context *host_ctxt)
+{
+ DECLARE_REG(pkvm_handle_t, handle, host_ctxt, 1);
+ DECLARE_REG(u64, gfn, host_ctxt, 2);
+ DECLARE_REG(bool, mkold, host_ctxt, 3);
+ struct pkvm_hyp_vm *hyp_vm;
+ int ret = -EINVAL;
+
+ if (!is_protected_kvm_enabled())
+ goto out;
+
+ hyp_vm = get_np_pkvm_hyp_vm(handle);
+ if (!hyp_vm)
+ goto out;
+
+ ret = __pkvm_host_test_clear_young_guest(gfn, mkold, hyp_vm);
+ put_pkvm_hyp_vm(hyp_vm);
+out:
+ cpu_reg(host_ctxt, 1) = ret;
+}
+
+static void handle___pkvm_host_mkyoung_guest(struct kvm_cpu_context *host_ctxt)
+{
+ DECLARE_REG(u64, gfn, host_ctxt, 1);
+ struct pkvm_hyp_vcpu *hyp_vcpu;
+ int ret = -EINVAL;
+
+ if (!is_protected_kvm_enabled())
+ goto out;
+
+ hyp_vcpu = pkvm_get_loaded_hyp_vcpu();
+ if (!hyp_vcpu || pkvm_hyp_vcpu_is_protected(hyp_vcpu))
+ goto out;
+
+ ret = __pkvm_host_mkyoung_guest(gfn, hyp_vcpu);
out:
cpu_reg(host_ctxt, 1) = ret;
}
@@ -233,6 +385,22 @@ static void handle___kvm_tlb_flush_vmid(struct kvm_cpu_context *host_ctxt)
__kvm_tlb_flush_vmid(kern_hyp_va(mmu));
}
+static void handle___pkvm_tlb_flush_vmid(struct kvm_cpu_context *host_ctxt)
+{
+ DECLARE_REG(pkvm_handle_t, handle, host_ctxt, 1);
+ struct pkvm_hyp_vm *hyp_vm;
+
+ if (!is_protected_kvm_enabled())
+ return;
+
+ hyp_vm = get_np_pkvm_hyp_vm(handle);
+ if (!hyp_vm)
+ return;
+
+ __kvm_tlb_flush_vmid(&hyp_vm->kvm.arch.mmu);
+ put_pkvm_hyp_vm(hyp_vm);
+}
+
static void handle___kvm_flush_cpu_context(struct kvm_cpu_context *host_ctxt)
{
DECLARE_REG(struct kvm_s2_mmu *, mmu, host_ctxt, 1);
@@ -264,11 +432,6 @@ static void handle___vgic_v3_init_lrs(struct kvm_cpu_context *host_ctxt)
__vgic_v3_init_lrs();
}
-static void handle___kvm_get_mdcr_el2(struct kvm_cpu_context *host_ctxt)
-{
- cpu_reg(host_ctxt, 1) = __kvm_get_mdcr_el2();
-}
-
static void handle___vgic_v3_save_vmcr_aprs(struct kvm_cpu_context *host_ctxt)
{
DECLARE_REG(struct vgic_v3_cpu_if *, cpu_if, host_ctxt, 1);
@@ -384,7 +547,6 @@ typedef void (*hcall_t)(struct kvm_cpu_context *);
static const hcall_t host_hcall[] = {
/* ___kvm_hyp_init */
- HANDLE_FUNC(__kvm_get_mdcr_el2),
HANDLE_FUNC(__pkvm_init),
HANDLE_FUNC(__pkvm_create_private_mapping),
HANDLE_FUNC(__pkvm_cpu_set_vector),
@@ -395,6 +557,12 @@ static const hcall_t host_hcall[] = {
HANDLE_FUNC(__pkvm_host_share_hyp),
HANDLE_FUNC(__pkvm_host_unshare_hyp),
+ HANDLE_FUNC(__pkvm_host_share_guest),
+ HANDLE_FUNC(__pkvm_host_unshare_guest),
+ HANDLE_FUNC(__pkvm_host_relax_perms_guest),
+ HANDLE_FUNC(__pkvm_host_wrprotect_guest),
+ HANDLE_FUNC(__pkvm_host_test_clear_young_guest),
+ HANDLE_FUNC(__pkvm_host_mkyoung_guest),
HANDLE_FUNC(__kvm_adjust_pc),
HANDLE_FUNC(__kvm_vcpu_run),
HANDLE_FUNC(__kvm_flush_vm_context),
@@ -409,6 +577,9 @@ static const hcall_t host_hcall[] = {
HANDLE_FUNC(__pkvm_init_vm),
HANDLE_FUNC(__pkvm_init_vcpu),
HANDLE_FUNC(__pkvm_teardown_vm),
+ HANDLE_FUNC(__pkvm_vcpu_load),
+ HANDLE_FUNC(__pkvm_vcpu_put),
+ HANDLE_FUNC(__pkvm_tlb_flush_vmid),
};
static void handle_host_hcall(struct kvm_cpu_context *host_ctxt)
diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
index e75374d682f4..7ad7b133b81a 100644
--- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
+++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
@@ -201,8 +201,8 @@ static void *guest_s2_zalloc_page(void *mc)
memset(addr, 0, PAGE_SIZE);
p = hyp_virt_to_page(addr);
- memset(p, 0, sizeof(*p));
p->refcount = 1;
+ p->order = 0;
return addr;
}
@@ -268,6 +268,7 @@ int kvm_guest_prepare_stage2(struct pkvm_hyp_vm *vm, void *pgd)
void reclaim_guest_pages(struct pkvm_hyp_vm *vm, struct kvm_hyp_memcache *mc)
{
+ struct hyp_page *page;
void *addr;
/* Dump all pgtable pages in the hyp_pool */
@@ -279,7 +280,9 @@ void reclaim_guest_pages(struct pkvm_hyp_vm *vm, struct kvm_hyp_memcache *mc)
/* Drain the hyp_pool into the memcache */
addr = hyp_alloc_pages(&vm->pool, 0);
while (addr) {
- memset(hyp_virt_to_page(addr), 0, sizeof(struct hyp_page));
+ page = hyp_virt_to_page(addr);
+ page->refcount = 0;
+ page->order = 0;
push_hyp_memcache(mc, addr, hyp_virt_to_phys);
WARN_ON(__pkvm_hyp_donate_host(hyp_virt_to_pfn(addr), 1));
addr = hyp_alloc_pages(&vm->pool, 0);
@@ -382,19 +385,28 @@ bool addr_is_memory(phys_addr_t phys)
return !!find_mem_range(phys, &range);
}
-static bool addr_is_allowed_memory(phys_addr_t phys)
+static bool is_in_mem_range(u64 addr, struct kvm_mem_range *range)
+{
+ return range->start <= addr && addr < range->end;
+}
+
+static int check_range_allowed_memory(u64 start, u64 end)
{
struct memblock_region *reg;
struct kvm_mem_range range;
- reg = find_mem_range(phys, &range);
+ /*
+ * Callers can't check the state of a range that overlaps memory and
+ * MMIO regions, so ensure [start, end[ is in the same kvm_mem_range.
+ */
+ reg = find_mem_range(start, &range);
+ if (!is_in_mem_range(end - 1, &range))
+ return -EINVAL;
- return reg && !(reg->flags & MEMBLOCK_NOMAP);
-}
+ if (!reg || reg->flags & MEMBLOCK_NOMAP)
+ return -EPERM;
-static bool is_in_mem_range(u64 addr, struct kvm_mem_range *range)
-{
- return range->start <= addr && addr < range->end;
+ return 0;
}
static bool range_is_memory(u64 start, u64 end)
@@ -454,8 +466,10 @@ static int host_stage2_adjust_range(u64 addr, struct kvm_mem_range *range)
if (kvm_pte_valid(pte))
return -EAGAIN;
- if (pte)
+ if (pte) {
+ WARN_ON(addr_is_memory(addr) && hyp_phys_to_page(addr)->host_state != PKVM_NOPAGE);
return -EPERM;
+ }
do {
u64 granule = kvm_granule_size(level);
@@ -477,10 +491,33 @@ int host_stage2_idmap_locked(phys_addr_t addr, u64 size,
return host_stage2_try(__host_stage2_idmap, addr, addr + size, prot);
}
+static void __host_update_page_state(phys_addr_t addr, u64 size, enum pkvm_page_state state)
+{
+ phys_addr_t end = addr + size;
+
+ for (; addr < end; addr += PAGE_SIZE)
+ hyp_phys_to_page(addr)->host_state = state;
+}
+
int host_stage2_set_owner_locked(phys_addr_t addr, u64 size, u8 owner_id)
{
- return host_stage2_try(kvm_pgtable_stage2_set_owner, &host_mmu.pgt,
- addr, size, &host_s2_pool, owner_id);
+ int ret;
+
+ if (!addr_is_memory(addr))
+ return -EPERM;
+
+ ret = host_stage2_try(kvm_pgtable_stage2_set_owner, &host_mmu.pgt,
+ addr, size, &host_s2_pool, owner_id);
+ if (ret)
+ return ret;
+
+ /* Don't forget to update the vmemmap tracking for the host */
+ if (owner_id == PKVM_ID_HOST)
+ __host_update_page_state(addr, size, PKVM_PAGE_OWNED);
+ else
+ __host_update_page_state(addr, size, PKVM_NOPAGE);
+
+ return 0;
}
static bool host_stage2_force_pte_cb(u64 addr, u64 end, enum kvm_pgtable_prot prot)
@@ -546,39 +583,6 @@ void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt)
BUG_ON(ret && ret != -EAGAIN);
}
-struct pkvm_mem_transition {
- u64 nr_pages;
-
- struct {
- enum pkvm_component_id id;
- /* Address in the initiator's address space */
- u64 addr;
-
- union {
- struct {
- /* Address in the completer's address space */
- u64 completer_addr;
- } host;
- struct {
- u64 completer_addr;
- } hyp;
- };
- } initiator;
-
- struct {
- enum pkvm_component_id id;
- } completer;
-};
-
-struct pkvm_mem_share {
- const struct pkvm_mem_transition tx;
- const enum kvm_pgtable_prot completer_prot;
-};
-
-struct pkvm_mem_donation {
- const struct pkvm_mem_transition tx;
-};
-
struct check_walk_data {
enum pkvm_page_state desired;
enum pkvm_page_state (*get_page_state)(kvm_pte_t pte, u64 addr);
@@ -604,115 +608,38 @@ static int check_page_state_range(struct kvm_pgtable *pgt, u64 addr, u64 size,
return kvm_pgtable_walk(pgt, addr, size, &walker);
}
-static enum pkvm_page_state host_get_page_state(kvm_pte_t pte, u64 addr)
-{
- if (!addr_is_allowed_memory(addr))
- return PKVM_NOPAGE;
-
- if (!kvm_pte_valid(pte) && pte)
- return PKVM_NOPAGE;
-
- return pkvm_getstate(kvm_pgtable_stage2_pte_prot(pte));
-}
-
static int __host_check_page_state_range(u64 addr, u64 size,
enum pkvm_page_state state)
{
- struct check_walk_data d = {
- .desired = state,
- .get_page_state = host_get_page_state,
- };
+ u64 end = addr + size;
+ int ret;
+
+ ret = check_range_allowed_memory(addr, end);
+ if (ret)
+ return ret;
hyp_assert_lock_held(&host_mmu.lock);
- return check_page_state_range(&host_mmu.pgt, addr, size, &d);
+ for (; addr < end; addr += PAGE_SIZE) {
+ if (hyp_phys_to_page(addr)->host_state != state)
+ return -EPERM;
+ }
+
+ return 0;
}
static int __host_set_page_state_range(u64 addr, u64 size,
enum pkvm_page_state state)
{
- enum kvm_pgtable_prot prot = pkvm_mkstate(PKVM_HOST_MEM_PROT, state);
-
- return host_stage2_idmap_locked(addr, size, prot);
-}
-
-static int host_request_owned_transition(u64 *completer_addr,
- const struct pkvm_mem_transition *tx)
-{
- u64 size = tx->nr_pages * PAGE_SIZE;
- u64 addr = tx->initiator.addr;
-
- *completer_addr = tx->initiator.host.completer_addr;
- return __host_check_page_state_range(addr, size, PKVM_PAGE_OWNED);
-}
-
-static int host_request_unshare(u64 *completer_addr,
- const struct pkvm_mem_transition *tx)
-{
- u64 size = tx->nr_pages * PAGE_SIZE;
- u64 addr = tx->initiator.addr;
-
- *completer_addr = tx->initiator.host.completer_addr;
- return __host_check_page_state_range(addr, size, PKVM_PAGE_SHARED_OWNED);
-}
-
-static int host_initiate_share(u64 *completer_addr,
- const struct pkvm_mem_transition *tx)
-{
- u64 size = tx->nr_pages * PAGE_SIZE;
- u64 addr = tx->initiator.addr;
-
- *completer_addr = tx->initiator.host.completer_addr;
- return __host_set_page_state_range(addr, size, PKVM_PAGE_SHARED_OWNED);
-}
-
-static int host_initiate_unshare(u64 *completer_addr,
- const struct pkvm_mem_transition *tx)
-{
- u64 size = tx->nr_pages * PAGE_SIZE;
- u64 addr = tx->initiator.addr;
-
- *completer_addr = tx->initiator.host.completer_addr;
- return __host_set_page_state_range(addr, size, PKVM_PAGE_OWNED);
-}
-
-static int host_initiate_donation(u64 *completer_addr,
- const struct pkvm_mem_transition *tx)
-{
- u8 owner_id = tx->completer.id;
- u64 size = tx->nr_pages * PAGE_SIZE;
-
- *completer_addr = tx->initiator.host.completer_addr;
- return host_stage2_set_owner_locked(tx->initiator.addr, size, owner_id);
-}
-
-static bool __host_ack_skip_pgtable_check(const struct pkvm_mem_transition *tx)
-{
- return !(IS_ENABLED(CONFIG_NVHE_EL2_DEBUG) ||
- tx->initiator.id != PKVM_ID_HYP);
-}
-
-static int __host_ack_transition(u64 addr, const struct pkvm_mem_transition *tx,
- enum pkvm_page_state state)
-{
- u64 size = tx->nr_pages * PAGE_SIZE;
-
- if (__host_ack_skip_pgtable_check(tx))
- return 0;
-
- return __host_check_page_state_range(addr, size, state);
-}
+ if (hyp_phys_to_page(addr)->host_state == PKVM_NOPAGE) {
+ int ret = host_stage2_idmap_locked(addr, size, PKVM_HOST_MEM_PROT);
-static int host_ack_donation(u64 addr, const struct pkvm_mem_transition *tx)
-{
- return __host_ack_transition(addr, tx, PKVM_NOPAGE);
-}
+ if (ret)
+ return ret;
+ }
-static int host_complete_donation(u64 addr, const struct pkvm_mem_transition *tx)
-{
- u64 size = tx->nr_pages * PAGE_SIZE;
- u8 host_id = tx->completer.id;
+ __host_update_page_state(addr, size, state);
- return host_stage2_set_owner_locked(addr, size, host_id);
+ return 0;
}
static enum pkvm_page_state hyp_get_page_state(kvm_pte_t pte, u64 addr)
@@ -735,573 +662,418 @@ static int __hyp_check_page_state_range(u64 addr, u64 size,
return check_page_state_range(&pkvm_pgtable, addr, size, &d);
}
-static int hyp_request_donation(u64 *completer_addr,
- const struct pkvm_mem_transition *tx)
+static enum pkvm_page_state guest_get_page_state(kvm_pte_t pte, u64 addr)
{
- u64 size = tx->nr_pages * PAGE_SIZE;
- u64 addr = tx->initiator.addr;
+ if (!kvm_pte_valid(pte))
+ return PKVM_NOPAGE;
- *completer_addr = tx->initiator.hyp.completer_addr;
- return __hyp_check_page_state_range(addr, size, PKVM_PAGE_OWNED);
+ return pkvm_getstate(kvm_pgtable_stage2_pte_prot(pte));
}
-static int hyp_initiate_donation(u64 *completer_addr,
- const struct pkvm_mem_transition *tx)
+static int __guest_check_page_state_range(struct pkvm_hyp_vcpu *vcpu, u64 addr,
+ u64 size, enum pkvm_page_state state)
{
- u64 size = tx->nr_pages * PAGE_SIZE;
- int ret;
+ struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu);
+ struct check_walk_data d = {
+ .desired = state,
+ .get_page_state = guest_get_page_state,
+ };
- *completer_addr = tx->initiator.hyp.completer_addr;
- ret = kvm_pgtable_hyp_unmap(&pkvm_pgtable, tx->initiator.addr, size);
- return (ret != size) ? -EFAULT : 0;
+ hyp_assert_lock_held(&vm->lock);
+ return check_page_state_range(&vm->pgt, addr, size, &d);
}
-static bool __hyp_ack_skip_pgtable_check(const struct pkvm_mem_transition *tx)
+int __pkvm_host_share_hyp(u64 pfn)
{
- return !(IS_ENABLED(CONFIG_NVHE_EL2_DEBUG) ||
- tx->initiator.id != PKVM_ID_HOST);
-}
+ u64 phys = hyp_pfn_to_phys(pfn);
+ void *virt = __hyp_va(phys);
+ enum kvm_pgtable_prot prot;
+ u64 size = PAGE_SIZE;
+ int ret;
-static int hyp_ack_share(u64 addr, const struct pkvm_mem_transition *tx,
- enum kvm_pgtable_prot perms)
-{
- u64 size = tx->nr_pages * PAGE_SIZE;
+ host_lock_component();
+ hyp_lock_component();
- if (perms != PAGE_HYP)
- return -EPERM;
+ ret = __host_check_page_state_range(phys, size, PKVM_PAGE_OWNED);
+ if (ret)
+ goto unlock;
+ if (IS_ENABLED(CONFIG_NVHE_EL2_DEBUG)) {
+ ret = __hyp_check_page_state_range((u64)virt, size, PKVM_NOPAGE);
+ if (ret)
+ goto unlock;
+ }
- if (__hyp_ack_skip_pgtable_check(tx))
- return 0;
+ prot = pkvm_mkstate(PAGE_HYP, PKVM_PAGE_SHARED_BORROWED);
+ WARN_ON(pkvm_create_mappings_locked(virt, virt + size, prot));
+ WARN_ON(__host_set_page_state_range(phys, size, PKVM_PAGE_SHARED_OWNED));
- return __hyp_check_page_state_range(addr, size, PKVM_NOPAGE);
+unlock:
+ hyp_unlock_component();
+ host_unlock_component();
+
+ return ret;
}
-static int hyp_ack_unshare(u64 addr, const struct pkvm_mem_transition *tx)
+int __pkvm_host_unshare_hyp(u64 pfn)
{
- u64 size = tx->nr_pages * PAGE_SIZE;
+ u64 phys = hyp_pfn_to_phys(pfn);
+ u64 virt = (u64)__hyp_va(phys);
+ u64 size = PAGE_SIZE;
+ int ret;
- if (tx->initiator.id == PKVM_ID_HOST && hyp_page_count((void *)addr))
- return -EBUSY;
+ host_lock_component();
+ hyp_lock_component();
- return __hyp_check_page_state_range(addr, size,
- PKVM_PAGE_SHARED_BORROWED);
-}
+ ret = __host_check_page_state_range(phys, size, PKVM_PAGE_SHARED_OWNED);
+ if (ret)
+ goto unlock;
+ ret = __hyp_check_page_state_range(virt, size, PKVM_PAGE_SHARED_BORROWED);
+ if (ret)
+ goto unlock;
+ if (hyp_page_count((void *)virt)) {
+ ret = -EBUSY;
+ goto unlock;
+ }
-static int hyp_ack_donation(u64 addr, const struct pkvm_mem_transition *tx)
-{
- u64 size = tx->nr_pages * PAGE_SIZE;
+ WARN_ON(kvm_pgtable_hyp_unmap(&pkvm_pgtable, virt, size) != size);
+ WARN_ON(__host_set_page_state_range(phys, size, PKVM_PAGE_OWNED));
- if (__hyp_ack_skip_pgtable_check(tx))
- return 0;
+unlock:
+ hyp_unlock_component();
+ host_unlock_component();
- return __hyp_check_page_state_range(addr, size, PKVM_NOPAGE);
+ return ret;
}
-static int hyp_complete_share(u64 addr, const struct pkvm_mem_transition *tx,
- enum kvm_pgtable_prot perms)
+int __pkvm_host_donate_hyp(u64 pfn, u64 nr_pages)
{
- void *start = (void *)addr, *end = start + (tx->nr_pages * PAGE_SIZE);
+ u64 phys = hyp_pfn_to_phys(pfn);
+ u64 size = PAGE_SIZE * nr_pages;
+ void *virt = __hyp_va(phys);
enum kvm_pgtable_prot prot;
+ int ret;
- prot = pkvm_mkstate(perms, PKVM_PAGE_SHARED_BORROWED);
- return pkvm_create_mappings_locked(start, end, prot);
-}
+ host_lock_component();
+ hyp_lock_component();
-static int hyp_complete_unshare(u64 addr, const struct pkvm_mem_transition *tx)
-{
- u64 size = tx->nr_pages * PAGE_SIZE;
- int ret = kvm_pgtable_hyp_unmap(&pkvm_pgtable, addr, size);
+ ret = __host_check_page_state_range(phys, size, PKVM_PAGE_OWNED);
+ if (ret)
+ goto unlock;
+ if (IS_ENABLED(CONFIG_NVHE_EL2_DEBUG)) {
+ ret = __hyp_check_page_state_range((u64)virt, size, PKVM_NOPAGE);
+ if (ret)
+ goto unlock;
+ }
- return (ret != size) ? -EFAULT : 0;
-}
+ prot = pkvm_mkstate(PAGE_HYP, PKVM_PAGE_OWNED);
+ WARN_ON(pkvm_create_mappings_locked(virt, virt + size, prot));
+ WARN_ON(host_stage2_set_owner_locked(phys, size, PKVM_ID_HYP));
-static int hyp_complete_donation(u64 addr,
- const struct pkvm_mem_transition *tx)
-{
- void *start = (void *)addr, *end = start + (tx->nr_pages * PAGE_SIZE);
- enum kvm_pgtable_prot prot = pkvm_mkstate(PAGE_HYP, PKVM_PAGE_OWNED);
+unlock:
+ hyp_unlock_component();
+ host_unlock_component();
- return pkvm_create_mappings_locked(start, end, prot);
+ return ret;
}
-static int check_share(struct pkvm_mem_share *share)
+int __pkvm_hyp_donate_host(u64 pfn, u64 nr_pages)
{
- const struct pkvm_mem_transition *tx = &share->tx;
- u64 completer_addr;
+ u64 phys = hyp_pfn_to_phys(pfn);
+ u64 size = PAGE_SIZE * nr_pages;
+ u64 virt = (u64)__hyp_va(phys);
int ret;
- switch (tx->initiator.id) {
- case PKVM_ID_HOST:
- ret = host_request_owned_transition(&completer_addr, tx);
- break;
- default:
- ret = -EINVAL;
- }
+ host_lock_component();
+ hyp_lock_component();
+ ret = __hyp_check_page_state_range(virt, size, PKVM_PAGE_OWNED);
if (ret)
- return ret;
-
- switch (tx->completer.id) {
- case PKVM_ID_HYP:
- ret = hyp_ack_share(completer_addr, tx, share->completer_prot);
- break;
- case PKVM_ID_FFA:
- /*
- * We only check the host; the secure side will check the other
- * end when we forward the FFA call.
- */
- ret = 0;
- break;
- default:
- ret = -EINVAL;
+ goto unlock;
+ if (IS_ENABLED(CONFIG_NVHE_EL2_DEBUG)) {
+ ret = __host_check_page_state_range(phys, size, PKVM_NOPAGE);
+ if (ret)
+ goto unlock;
}
+ WARN_ON(kvm_pgtable_hyp_unmap(&pkvm_pgtable, virt, size) != size);
+ WARN_ON(host_stage2_set_owner_locked(phys, size, PKVM_ID_HOST));
+
+unlock:
+ hyp_unlock_component();
+ host_unlock_component();
+
return ret;
}
-static int __do_share(struct pkvm_mem_share *share)
+int hyp_pin_shared_mem(void *from, void *to)
{
- const struct pkvm_mem_transition *tx = &share->tx;
- u64 completer_addr;
+ u64 cur, start = ALIGN_DOWN((u64)from, PAGE_SIZE);
+ u64 end = PAGE_ALIGN((u64)to);
+ u64 size = end - start;
int ret;
- switch (tx->initiator.id) {
- case PKVM_ID_HOST:
- ret = host_initiate_share(&completer_addr, tx);
- break;
- default:
- ret = -EINVAL;
- }
+ host_lock_component();
+ hyp_lock_component();
+ ret = __host_check_page_state_range(__hyp_pa(start), size,
+ PKVM_PAGE_SHARED_OWNED);
if (ret)
- return ret;
-
- switch (tx->completer.id) {
- case PKVM_ID_HYP:
- ret = hyp_complete_share(completer_addr, tx, share->completer_prot);
- break;
- case PKVM_ID_FFA:
- /*
- * We're not responsible for any secure page-tables, so there's
- * nothing to do here.
- */
- ret = 0;
- break;
- default:
- ret = -EINVAL;
- }
+ goto unlock;
- return ret;
-}
+ ret = __hyp_check_page_state_range(start, size,
+ PKVM_PAGE_SHARED_BORROWED);
+ if (ret)
+ goto unlock;
-/*
- * do_share():
- *
- * The page owner grants access to another component with a given set
- * of permissions.
- *
- * Initiator: OWNED => SHARED_OWNED
- * Completer: NOPAGE => SHARED_BORROWED
- */
-static int do_share(struct pkvm_mem_share *share)
-{
- int ret;
+ for (cur = start; cur < end; cur += PAGE_SIZE)
+ hyp_page_ref_inc(hyp_virt_to_page(cur));
- ret = check_share(share);
- if (ret)
- return ret;
+unlock:
+ hyp_unlock_component();
+ host_unlock_component();
- return WARN_ON(__do_share(share));
+ return ret;
}
-static int check_unshare(struct pkvm_mem_share *share)
+void hyp_unpin_shared_mem(void *from, void *to)
{
- const struct pkvm_mem_transition *tx = &share->tx;
- u64 completer_addr;
- int ret;
-
- switch (tx->initiator.id) {
- case PKVM_ID_HOST:
- ret = host_request_unshare(&completer_addr, tx);
- break;
- default:
- ret = -EINVAL;
- }
+ u64 cur, start = ALIGN_DOWN((u64)from, PAGE_SIZE);
+ u64 end = PAGE_ALIGN((u64)to);
- if (ret)
- return ret;
+ host_lock_component();
+ hyp_lock_component();
- switch (tx->completer.id) {
- case PKVM_ID_HYP:
- ret = hyp_ack_unshare(completer_addr, tx);
- break;
- case PKVM_ID_FFA:
- /* See check_share() */
- ret = 0;
- break;
- default:
- ret = -EINVAL;
- }
+ for (cur = start; cur < end; cur += PAGE_SIZE)
+ hyp_page_ref_dec(hyp_virt_to_page(cur));
- return ret;
+ hyp_unlock_component();
+ host_unlock_component();
}
-static int __do_unshare(struct pkvm_mem_share *share)
+int __pkvm_host_share_ffa(u64 pfn, u64 nr_pages)
{
- const struct pkvm_mem_transition *tx = &share->tx;
- u64 completer_addr;
+ u64 phys = hyp_pfn_to_phys(pfn);
+ u64 size = PAGE_SIZE * nr_pages;
int ret;
- switch (tx->initiator.id) {
- case PKVM_ID_HOST:
- ret = host_initiate_unshare(&completer_addr, tx);
- break;
- default:
- ret = -EINVAL;
- }
-
- if (ret)
- return ret;
-
- switch (tx->completer.id) {
- case PKVM_ID_HYP:
- ret = hyp_complete_unshare(completer_addr, tx);
- break;
- case PKVM_ID_FFA:
- /* See __do_share() */
- ret = 0;
- break;
- default:
- ret = -EINVAL;
- }
+ host_lock_component();
+ ret = __host_check_page_state_range(phys, size, PKVM_PAGE_OWNED);
+ if (!ret)
+ ret = __host_set_page_state_range(phys, size, PKVM_PAGE_SHARED_OWNED);
+ host_unlock_component();
return ret;
}
-/*
- * do_unshare():
- *
- * The page owner revokes access from another component for a range of
- * pages which were previously shared using do_share().
- *
- * Initiator: SHARED_OWNED => OWNED
- * Completer: SHARED_BORROWED => NOPAGE
- */
-static int do_unshare(struct pkvm_mem_share *share)
+int __pkvm_host_unshare_ffa(u64 pfn, u64 nr_pages)
{
+ u64 phys = hyp_pfn_to_phys(pfn);
+ u64 size = PAGE_SIZE * nr_pages;
int ret;
- ret = check_unshare(share);
- if (ret)
- return ret;
+ host_lock_component();
+ ret = __host_check_page_state_range(phys, size, PKVM_PAGE_SHARED_OWNED);
+ if (!ret)
+ ret = __host_set_page_state_range(phys, size, PKVM_PAGE_OWNED);
+ host_unlock_component();
- return WARN_ON(__do_unshare(share));
+ return ret;
}
-static int check_donation(struct pkvm_mem_donation *donation)
+int __pkvm_host_share_guest(u64 pfn, u64 gfn, struct pkvm_hyp_vcpu *vcpu,
+ enum kvm_pgtable_prot prot)
{
- const struct pkvm_mem_transition *tx = &donation->tx;
- u64 completer_addr;
+ struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu);
+ u64 phys = hyp_pfn_to_phys(pfn);
+ u64 ipa = hyp_pfn_to_phys(gfn);
+ struct hyp_page *page;
int ret;
- switch (tx->initiator.id) {
- case PKVM_ID_HOST:
- ret = host_request_owned_transition(&completer_addr, tx);
- break;
- case PKVM_ID_HYP:
- ret = hyp_request_donation(&completer_addr, tx);
- break;
- default:
- ret = -EINVAL;
- }
+ if (prot & ~KVM_PGTABLE_PROT_RWX)
+ return -EINVAL;
+ ret = check_range_allowed_memory(phys, phys + PAGE_SIZE);
if (ret)
return ret;
- switch (tx->completer.id) {
- case PKVM_ID_HOST:
- ret = host_ack_donation(completer_addr, tx);
- break;
- case PKVM_ID_HYP:
- ret = hyp_ack_donation(completer_addr, tx);
- break;
- default:
- ret = -EINVAL;
- }
-
- return ret;
-}
+ host_lock_component();
+ guest_lock_component(vm);
-static int __do_donate(struct pkvm_mem_donation *donation)
-{
- const struct pkvm_mem_transition *tx = &donation->tx;
- u64 completer_addr;
- int ret;
+ ret = __guest_check_page_state_range(vcpu, ipa, PAGE_SIZE, PKVM_NOPAGE);
+ if (ret)
+ goto unlock;
- switch (tx->initiator.id) {
- case PKVM_ID_HOST:
- ret = host_initiate_donation(&completer_addr, tx);
- break;
- case PKVM_ID_HYP:
- ret = hyp_initiate_donation(&completer_addr, tx);
+ page = hyp_phys_to_page(phys);
+ switch (page->host_state) {
+ case PKVM_PAGE_OWNED:
+ WARN_ON(__host_set_page_state_range(phys, PAGE_SIZE, PKVM_PAGE_SHARED_OWNED));
break;
+ case PKVM_PAGE_SHARED_OWNED:
+ if (page->host_share_guest_count)
+ break;
+ /* Only host to np-guest multi-sharing is tolerated */
+ WARN_ON(1);
+ fallthrough;
default:
- ret = -EINVAL;
+ ret = -EPERM;
+ goto unlock;
}
- if (ret)
- return ret;
+ WARN_ON(kvm_pgtable_stage2_map(&vm->pgt, ipa, PAGE_SIZE, phys,
+ pkvm_mkstate(prot, PKVM_PAGE_SHARED_BORROWED),
+ &vcpu->vcpu.arch.pkvm_memcache, 0));
+ page->host_share_guest_count++;
- switch (tx->completer.id) {
- case PKVM_ID_HOST:
- ret = host_complete_donation(completer_addr, tx);
- break;
- case PKVM_ID_HYP:
- ret = hyp_complete_donation(completer_addr, tx);
- break;
- default:
- ret = -EINVAL;
- }
+unlock:
+ guest_unlock_component(vm);
+ host_unlock_component();
return ret;
}
-/*
- * do_donate():
- *
- * The page owner transfers ownership to another component, losing access
- * as a consequence.
- *
- * Initiator: OWNED => NOPAGE
- * Completer: NOPAGE => OWNED
- */
-static int do_donate(struct pkvm_mem_donation *donation)
+static int __check_host_shared_guest(struct pkvm_hyp_vm *vm, u64 *__phys, u64 ipa)
{
+ enum pkvm_page_state state;
+ struct hyp_page *page;
+ kvm_pte_t pte;
+ u64 phys;
+ s8 level;
int ret;
- ret = check_donation(donation);
+ ret = kvm_pgtable_get_leaf(&vm->pgt, ipa, &pte, &level);
if (ret)
return ret;
+ if (level != KVM_PGTABLE_LAST_LEVEL)
+ return -E2BIG;
+ if (!kvm_pte_valid(pte))
+ return -ENOENT;
- return WARN_ON(__do_donate(donation));
-}
-
-int __pkvm_host_share_hyp(u64 pfn)
-{
- int ret;
- u64 host_addr = hyp_pfn_to_phys(pfn);
- u64 hyp_addr = (u64)__hyp_va(host_addr);
- struct pkvm_mem_share share = {
- .tx = {
- .nr_pages = 1,
- .initiator = {
- .id = PKVM_ID_HOST,
- .addr = host_addr,
- .host = {
- .completer_addr = hyp_addr,
- },
- },
- .completer = {
- .id = PKVM_ID_HYP,
- },
- },
- .completer_prot = PAGE_HYP,
- };
+ state = guest_get_page_state(pte, ipa);
+ if (state != PKVM_PAGE_SHARED_BORROWED)
+ return -EPERM;
- host_lock_component();
- hyp_lock_component();
+ phys = kvm_pte_to_phys(pte);
+ ret = check_range_allowed_memory(phys, phys + PAGE_SIZE);
+ if (WARN_ON(ret))
+ return ret;
- ret = do_share(&share);
+ page = hyp_phys_to_page(phys);
+ if (page->host_state != PKVM_PAGE_SHARED_OWNED)
+ return -EPERM;
+ if (WARN_ON(!page->host_share_guest_count))
+ return -EINVAL;
- hyp_unlock_component();
- host_unlock_component();
+ *__phys = phys;
- return ret;
+ return 0;
}
-int __pkvm_host_unshare_hyp(u64 pfn)
+int __pkvm_host_unshare_guest(u64 gfn, struct pkvm_hyp_vm *vm)
{
+ u64 ipa = hyp_pfn_to_phys(gfn);
+ struct hyp_page *page;
+ u64 phys;
int ret;
- u64 host_addr = hyp_pfn_to_phys(pfn);
- u64 hyp_addr = (u64)__hyp_va(host_addr);
- struct pkvm_mem_share share = {
- .tx = {
- .nr_pages = 1,
- .initiator = {
- .id = PKVM_ID_HOST,
- .addr = host_addr,
- .host = {
- .completer_addr = hyp_addr,
- },
- },
- .completer = {
- .id = PKVM_ID_HYP,
- },
- },
- .completer_prot = PAGE_HYP,
- };
host_lock_component();
- hyp_lock_component();
+ guest_lock_component(vm);
+
+ ret = __check_host_shared_guest(vm, &phys, ipa);
+ if (ret)
+ goto unlock;
+
+ ret = kvm_pgtable_stage2_unmap(&vm->pgt, ipa, PAGE_SIZE);
+ if (ret)
+ goto unlock;
- ret = do_unshare(&share);
+ page = hyp_phys_to_page(phys);
+ page->host_share_guest_count--;
+ if (!page->host_share_guest_count)
+ WARN_ON(__host_set_page_state_range(phys, PAGE_SIZE, PKVM_PAGE_OWNED));
- hyp_unlock_component();
+unlock:
+ guest_unlock_component(vm);
host_unlock_component();
return ret;
}
-int __pkvm_host_donate_hyp(u64 pfn, u64 nr_pages)
+int __pkvm_host_relax_perms_guest(u64 gfn, struct pkvm_hyp_vcpu *vcpu, enum kvm_pgtable_prot prot)
{
+ struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu);
+ u64 ipa = hyp_pfn_to_phys(gfn);
+ u64 phys;
int ret;
- u64 host_addr = hyp_pfn_to_phys(pfn);
- u64 hyp_addr = (u64)__hyp_va(host_addr);
- struct pkvm_mem_donation donation = {
- .tx = {
- .nr_pages = nr_pages,
- .initiator = {
- .id = PKVM_ID_HOST,
- .addr = host_addr,
- .host = {
- .completer_addr = hyp_addr,
- },
- },
- .completer = {
- .id = PKVM_ID_HYP,
- },
- },
- };
+
+ if (prot & ~KVM_PGTABLE_PROT_RWX)
+ return -EINVAL;
host_lock_component();
- hyp_lock_component();
+ guest_lock_component(vm);
- ret = do_donate(&donation);
+ ret = __check_host_shared_guest(vm, &phys, ipa);
+ if (!ret)
+ ret = kvm_pgtable_stage2_relax_perms(&vm->pgt, ipa, prot, 0);
- hyp_unlock_component();
+ guest_unlock_component(vm);
host_unlock_component();
return ret;
}
-int __pkvm_hyp_donate_host(u64 pfn, u64 nr_pages)
+int __pkvm_host_wrprotect_guest(u64 gfn, struct pkvm_hyp_vm *vm)
{
+ u64 ipa = hyp_pfn_to_phys(gfn);
+ u64 phys;
int ret;
- u64 host_addr = hyp_pfn_to_phys(pfn);
- u64 hyp_addr = (u64)__hyp_va(host_addr);
- struct pkvm_mem_donation donation = {
- .tx = {
- .nr_pages = nr_pages,
- .initiator = {
- .id = PKVM_ID_HYP,
- .addr = hyp_addr,
- .hyp = {
- .completer_addr = host_addr,
- },
- },
- .completer = {
- .id = PKVM_ID_HOST,
- },
- },
- };
host_lock_component();
- hyp_lock_component();
+ guest_lock_component(vm);
- ret = do_donate(&donation);
+ ret = __check_host_shared_guest(vm, &phys, ipa);
+ if (!ret)
+ ret = kvm_pgtable_stage2_wrprotect(&vm->pgt, ipa, PAGE_SIZE);
- hyp_unlock_component();
+ guest_unlock_component(vm);
host_unlock_component();
return ret;
}
-int hyp_pin_shared_mem(void *from, void *to)
+int __pkvm_host_test_clear_young_guest(u64 gfn, bool mkold, struct pkvm_hyp_vm *vm)
{
- u64 cur, start = ALIGN_DOWN((u64)from, PAGE_SIZE);
- u64 end = PAGE_ALIGN((u64)to);
- u64 size = end - start;
+ u64 ipa = hyp_pfn_to_phys(gfn);
+ u64 phys;
int ret;
host_lock_component();
- hyp_lock_component();
-
- ret = __host_check_page_state_range(__hyp_pa(start), size,
- PKVM_PAGE_SHARED_OWNED);
- if (ret)
- goto unlock;
-
- ret = __hyp_check_page_state_range(start, size,
- PKVM_PAGE_SHARED_BORROWED);
- if (ret)
- goto unlock;
+ guest_lock_component(vm);
- for (cur = start; cur < end; cur += PAGE_SIZE)
- hyp_page_ref_inc(hyp_virt_to_page(cur));
+ ret = __check_host_shared_guest(vm, &phys, ipa);
+ if (!ret)
+ ret = kvm_pgtable_stage2_test_clear_young(&vm->pgt, ipa, PAGE_SIZE, mkold);
-unlock:
- hyp_unlock_component();
+ guest_unlock_component(vm);
host_unlock_component();
return ret;
}
-void hyp_unpin_shared_mem(void *from, void *to)
-{
- u64 cur, start = ALIGN_DOWN((u64)from, PAGE_SIZE);
- u64 end = PAGE_ALIGN((u64)to);
-
- host_lock_component();
- hyp_lock_component();
-
- for (cur = start; cur < end; cur += PAGE_SIZE)
- hyp_page_ref_dec(hyp_virt_to_page(cur));
-
- hyp_unlock_component();
- host_unlock_component();
-}
-
-int __pkvm_host_share_ffa(u64 pfn, u64 nr_pages)
+int __pkvm_host_mkyoung_guest(u64 gfn, struct pkvm_hyp_vcpu *vcpu)
{
+ struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu);
+ u64 ipa = hyp_pfn_to_phys(gfn);
+ u64 phys;
int ret;
- struct pkvm_mem_share share = {
- .tx = {
- .nr_pages = nr_pages,
- .initiator = {
- .id = PKVM_ID_HOST,
- .addr = hyp_pfn_to_phys(pfn),
- },
- .completer = {
- .id = PKVM_ID_FFA,
- },
- },
- };
host_lock_component();
- ret = do_share(&share);
- host_unlock_component();
+ guest_lock_component(vm);
- return ret;
-}
+ ret = __check_host_shared_guest(vm, &phys, ipa);
+ if (!ret)
+ kvm_pgtable_stage2_mkyoung(&vm->pgt, ipa, 0);
-int __pkvm_host_unshare_ffa(u64 pfn, u64 nr_pages)
-{
- int ret;
- struct pkvm_mem_share share = {
- .tx = {
- .nr_pages = nr_pages,
- .initiator = {
- .id = PKVM_ID_HOST,
- .addr = hyp_pfn_to_phys(pfn),
- },
- .completer = {
- .id = PKVM_ID_FFA,
- },
- },
- };
-
- host_lock_component();
- ret = do_unshare(&share);
+ guest_unlock_component(vm);
host_unlock_component();
return ret;
diff --git a/arch/arm64/kvm/hyp/nvhe/mm.c b/arch/arm64/kvm/hyp/nvhe/mm.c
index 8850b591d775..f41c7440b34b 100644
--- a/arch/arm64/kvm/hyp/nvhe/mm.c
+++ b/arch/arm64/kvm/hyp/nvhe/mm.c
@@ -360,10 +360,10 @@ int pkvm_create_stack(phys_addr_t phys, unsigned long *haddr)
prev_base = __io_map_base;
/*
- * Efficient stack verification using the PAGE_SHIFT bit implies
+ * Efficient stack verification using the NVHE_STACK_SHIFT bit implies
* an alignment of our allocation on the order of the size.
*/
- size = PAGE_SIZE * 2;
+ size = NVHE_STACK_SIZE * 2;
addr = ALIGN(__io_map_base, size);
ret = __pkvm_alloc_private_va_range(addr, size);
@@ -373,12 +373,12 @@ int pkvm_create_stack(phys_addr_t phys, unsigned long *haddr)
* at the higher address and leave the lower guard page
* unbacked.
*
- * Any valid stack address now has the PAGE_SHIFT bit as 1
+ * Any valid stack address now has the NVHE_STACK_SHIFT bit as 1
* and addresses corresponding to the guard page have the
- * PAGE_SHIFT bit as 0 - this is used for overflow detection.
+ * NVHE_STACK_SHIFT bit as 0 - this is used for overflow detection.
*/
- ret = kvm_pgtable_hyp_map(&pkvm_pgtable, addr + PAGE_SIZE,
- PAGE_SIZE, phys, PAGE_HYP);
+ ret = kvm_pgtable_hyp_map(&pkvm_pgtable, addr + NVHE_STACK_SIZE,
+ NVHE_STACK_SIZE, phys, PAGE_HYP);
if (ret)
__io_map_base = prev_base;
}
diff --git a/arch/arm64/kvm/hyp/nvhe/page_alloc.c b/arch/arm64/kvm/hyp/nvhe/page_alloc.c
index e691290d3765..a1eb27a1a747 100644
--- a/arch/arm64/kvm/hyp/nvhe/page_alloc.c
+++ b/arch/arm64/kvm/hyp/nvhe/page_alloc.c
@@ -32,7 +32,7 @@ u64 __hyp_vmemmap;
*/
static struct hyp_page *__find_buddy_nocheck(struct hyp_pool *pool,
struct hyp_page *p,
- unsigned short order)
+ u8 order)
{
phys_addr_t addr = hyp_page_to_phys(p);
@@ -51,7 +51,7 @@ static struct hyp_page *__find_buddy_nocheck(struct hyp_pool *pool,
/* Find a buddy page currently available for allocation */
static struct hyp_page *__find_buddy_avail(struct hyp_pool *pool,
struct hyp_page *p,
- unsigned short order)
+ u8 order)
{
struct hyp_page *buddy = __find_buddy_nocheck(pool, p, order);
@@ -94,7 +94,7 @@ static void __hyp_attach_page(struct hyp_pool *pool,
struct hyp_page *p)
{
phys_addr_t phys = hyp_page_to_phys(p);
- unsigned short order = p->order;
+ u8 order = p->order;
struct hyp_page *buddy;
memset(hyp_page_to_virt(p), 0, PAGE_SIZE << p->order);
@@ -129,7 +129,7 @@ insert:
static struct hyp_page *__hyp_extract_page(struct hyp_pool *pool,
struct hyp_page *p,
- unsigned short order)
+ u8 order)
{
struct hyp_page *buddy;
@@ -183,7 +183,7 @@ void hyp_get_page(struct hyp_pool *pool, void *addr)
void hyp_split_page(struct hyp_page *p)
{
- unsigned short order = p->order;
+ u8 order = p->order;
unsigned int i;
p->order = 0;
@@ -195,10 +195,10 @@ void hyp_split_page(struct hyp_page *p)
}
}
-void *hyp_alloc_pages(struct hyp_pool *pool, unsigned short order)
+void *hyp_alloc_pages(struct hyp_pool *pool, u8 order)
{
- unsigned short i = order;
struct hyp_page *p;
+ u8 i = order;
hyp_spin_lock(&pool->lock);
diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c
index 73e319891327..3927fe52a3dd 100644
--- a/arch/arm64/kvm/hyp/nvhe/pkvm.c
+++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c
@@ -9,7 +9,6 @@
#include <asm/kvm_emulate.h>
-#include <nvhe/fixed_config.h>
#include <nvhe/mem_protect.h>
#include <nvhe/memory.h>
#include <nvhe/pkvm.h>
@@ -24,232 +23,160 @@ unsigned int kvm_arm_vmid_bits;
unsigned int kvm_host_sve_max_vl;
/*
- * Set trap register values based on features in ID_AA64PFR0.
+ * The currently loaded hyp vCPU for each physical CPU. Used only when
+ * protected KVM is enabled, but for both protected and non-protected VMs.
*/
-static void pvm_init_traps_aa64pfr0(struct kvm_vcpu *vcpu)
-{
- const u64 feature_ids = pvm_read_id_reg(vcpu, SYS_ID_AA64PFR0_EL1);
- u64 hcr_set = HCR_RW;
- u64 hcr_clear = 0;
- u64 cptr_set = 0;
- u64 cptr_clear = 0;
-
- /* Protected KVM does not support AArch32 guests. */
- BUILD_BUG_ON(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL0),
- PVM_ID_AA64PFR0_RESTRICT_UNSIGNED) != ID_AA64PFR0_EL1_EL0_IMP);
- BUILD_BUG_ON(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL1),
- PVM_ID_AA64PFR0_RESTRICT_UNSIGNED) != ID_AA64PFR0_EL1_EL1_IMP);
+static DEFINE_PER_CPU(struct pkvm_hyp_vcpu *, loaded_hyp_vcpu);
- /*
- * Linux guests assume support for floating-point and Advanced SIMD. Do
- * not change the trapping behavior for these from the KVM default.
- */
- BUILD_BUG_ON(!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_FP),
- PVM_ID_AA64PFR0_ALLOW));
- BUILD_BUG_ON(!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_AdvSIMD),
- PVM_ID_AA64PFR0_ALLOW));
+static void pkvm_vcpu_reset_hcr(struct kvm_vcpu *vcpu)
+{
+ vcpu->arch.hcr_el2 = HCR_GUEST_FLAGS;
if (has_hvhe())
- hcr_set |= HCR_E2H;
+ vcpu->arch.hcr_el2 |= HCR_E2H;
- /* Trap RAS unless all current versions are supported */
- if (FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_RAS), feature_ids) <
- ID_AA64PFR0_EL1_RAS_V1P1) {
- hcr_set |= HCR_TERR | HCR_TEA;
- hcr_clear |= HCR_FIEN;
+ if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN)) {
+ /* route synchronous external abort exceptions to EL2 */
+ vcpu->arch.hcr_el2 |= HCR_TEA;
+ /* trap error record accesses */
+ vcpu->arch.hcr_el2 |= HCR_TERR;
}
- /* Trap AMU */
- if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_AMU), feature_ids)) {
- hcr_clear |= HCR_AMVOFFEN;
- cptr_set |= CPTR_EL2_TAM;
- }
+ if (cpus_have_final_cap(ARM64_HAS_STAGE2_FWB))
+ vcpu->arch.hcr_el2 |= HCR_FWB;
- /* Trap SVE */
- if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_SVE), feature_ids)) {
- if (has_hvhe())
- cptr_clear |= CPACR_EL1_ZEN;
- else
- cptr_set |= CPTR_EL2_TZ;
- }
+ if (cpus_have_final_cap(ARM64_HAS_EVT) &&
+ !cpus_have_final_cap(ARM64_MISMATCHED_CACHE_TYPE))
+ vcpu->arch.hcr_el2 |= HCR_TID4;
+ else
+ vcpu->arch.hcr_el2 |= HCR_TID2;
- vcpu->arch.hcr_el2 |= hcr_set;
- vcpu->arch.hcr_el2 &= ~hcr_clear;
- vcpu->arch.cptr_el2 |= cptr_set;
- vcpu->arch.cptr_el2 &= ~cptr_clear;
+ if (vcpu_has_ptrauth(vcpu))
+ vcpu->arch.hcr_el2 |= (HCR_API | HCR_APK);
+
+ if (kvm_has_mte(vcpu->kvm))
+ vcpu->arch.hcr_el2 |= HCR_ATA;
}
-/*
- * Set trap register values based on features in ID_AA64PFR1.
- */
-static void pvm_init_traps_aa64pfr1(struct kvm_vcpu *vcpu)
+static void pvm_init_traps_hcr(struct kvm_vcpu *vcpu)
{
- const u64 feature_ids = pvm_read_id_reg(vcpu, SYS_ID_AA64PFR1_EL1);
- u64 hcr_set = 0;
- u64 hcr_clear = 0;
+ struct kvm *kvm = vcpu->kvm;
+ u64 val = vcpu->arch.hcr_el2;
- /* Memory Tagging: Trap and Treat as Untagged if not supported. */
- if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MTE), feature_ids)) {
- hcr_set |= HCR_TID5;
- hcr_clear |= HCR_DCT | HCR_ATA;
+ /* No support for AArch32. */
+ val |= HCR_RW;
+
+ /*
+ * Always trap:
+ * - Feature id registers: to control features exposed to guests
+ * - Implementation-defined features
+ */
+ val |= HCR_TACR | HCR_TIDCP | HCR_TID3 | HCR_TID1;
+
+ if (!kvm_has_feat(kvm, ID_AA64PFR0_EL1, RAS, IMP)) {
+ val |= HCR_TERR | HCR_TEA;
+ val &= ~(HCR_FIEN);
}
- vcpu->arch.hcr_el2 |= hcr_set;
- vcpu->arch.hcr_el2 &= ~hcr_clear;
-}
+ if (!kvm_has_feat(kvm, ID_AA64PFR0_EL1, AMU, IMP))
+ val &= ~(HCR_AMVOFFEN);
-/*
- * Set trap register values based on features in ID_AA64DFR0.
- */
-static void pvm_init_traps_aa64dfr0(struct kvm_vcpu *vcpu)
-{
- const u64 feature_ids = pvm_read_id_reg(vcpu, SYS_ID_AA64DFR0_EL1);
- u64 mdcr_set = 0;
- u64 mdcr_clear = 0;
- u64 cptr_set = 0;
-
- /* Trap/constrain PMU */
- if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer), feature_ids)) {
- mdcr_set |= MDCR_EL2_TPM | MDCR_EL2_TPMCR;
- mdcr_clear |= MDCR_EL2_HPME | MDCR_EL2_MTPME |
- MDCR_EL2_HPMN_MASK;
+ if (!kvm_has_feat(kvm, ID_AA64PFR1_EL1, MTE, IMP)) {
+ val |= HCR_TID5;
+ val &= ~(HCR_DCT | HCR_ATA);
}
- /* Trap Debug */
- if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_DebugVer), feature_ids))
- mdcr_set |= MDCR_EL2_TDRA | MDCR_EL2_TDA | MDCR_EL2_TDE;
+ if (!kvm_has_feat(kvm, ID_AA64MMFR1_EL1, LO, IMP))
+ val |= HCR_TLOR;
- /* Trap OS Double Lock */
- if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_DoubleLock), feature_ids))
- mdcr_set |= MDCR_EL2_TDOSA;
+ vcpu->arch.hcr_el2 = val;
+}
+
+static void pvm_init_traps_mdcr(struct kvm_vcpu *vcpu)
+{
+ struct kvm *kvm = vcpu->kvm;
+ u64 val = vcpu->arch.mdcr_el2;
- /* Trap SPE */
- if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMSVer), feature_ids)) {
- mdcr_set |= MDCR_EL2_TPMS;
- mdcr_clear |= MDCR_EL2_E2PB_MASK;
+ if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, PMUVer, IMP)) {
+ val |= MDCR_EL2_TPM | MDCR_EL2_TPMCR;
+ val &= ~(MDCR_EL2_HPME | MDCR_EL2_MTPME | MDCR_EL2_HPMN_MASK);
}
- /* Trap Trace Filter */
- if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_TraceFilt), feature_ids))
- mdcr_set |= MDCR_EL2_TTRF;
+ if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, DebugVer, IMP))
+ val |= MDCR_EL2_TDRA | MDCR_EL2_TDA;
- /* Trap Trace */
- if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_TraceVer), feature_ids)) {
- if (has_hvhe())
- cptr_set |= CPACR_EL1_TTA;
- else
- cptr_set |= CPTR_EL2_TTA;
- }
+ if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, DoubleLock, IMP))
+ val |= MDCR_EL2_TDOSA;
- /* Trap External Trace */
- if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_ExtTrcBuff), feature_ids))
- mdcr_clear |= MDCR_EL2_E2TB_MASK;
+ if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, PMSVer, IMP)) {
+ val |= MDCR_EL2_TPMS;
+ val &= ~MDCR_EL2_E2PB_MASK;
+ }
- vcpu->arch.mdcr_el2 |= mdcr_set;
- vcpu->arch.mdcr_el2 &= ~mdcr_clear;
- vcpu->arch.cptr_el2 |= cptr_set;
-}
+ if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, TraceFilt, IMP))
+ val |= MDCR_EL2_TTRF;
-/*
- * Set trap register values based on features in ID_AA64MMFR0.
- */
-static void pvm_init_traps_aa64mmfr0(struct kvm_vcpu *vcpu)
-{
- const u64 feature_ids = pvm_read_id_reg(vcpu, SYS_ID_AA64MMFR0_EL1);
- u64 mdcr_set = 0;
+ if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, ExtTrcBuff, IMP))
+ val |= MDCR_EL2_E2TB_MASK;
/* Trap Debug Communications Channel registers */
- if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_FGT), feature_ids))
- mdcr_set |= MDCR_EL2_TDCC;
+ if (!kvm_has_feat(kvm, ID_AA64MMFR0_EL1, FGT, IMP))
+ val |= MDCR_EL2_TDCC;
- vcpu->arch.mdcr_el2 |= mdcr_set;
+ vcpu->arch.mdcr_el2 = val;
}
/*
- * Set trap register values based on features in ID_AA64MMFR1.
+ * Check that cpu features that are neither trapped nor supported are not
+ * enabled for protected VMs.
*/
-static void pvm_init_traps_aa64mmfr1(struct kvm_vcpu *vcpu)
+static int pkvm_check_pvm_cpu_features(struct kvm_vcpu *vcpu)
{
- const u64 feature_ids = pvm_read_id_reg(vcpu, SYS_ID_AA64MMFR1_EL1);
- u64 hcr_set = 0;
-
- /* Trap LOR */
- if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_LO), feature_ids))
- hcr_set |= HCR_TLOR;
+ struct kvm *kvm = vcpu->kvm;
- vcpu->arch.hcr_el2 |= hcr_set;
-}
-
-/*
- * Set baseline trap register values.
- */
-static void pvm_init_trap_regs(struct kvm_vcpu *vcpu)
-{
- const u64 hcr_trap_feat_regs = HCR_TID3;
- const u64 hcr_trap_impdef = HCR_TACR | HCR_TIDCP | HCR_TID1;
+ /* Protected KVM does not support AArch32 guests. */
+ if (kvm_has_feat(kvm, ID_AA64PFR0_EL1, EL0, AARCH32) ||
+ kvm_has_feat(kvm, ID_AA64PFR0_EL1, EL1, AARCH32))
+ return -EINVAL;
/*
- * Always trap:
- * - Feature id registers: to control features exposed to guests
- * - Implementation-defined features
+ * Linux guests assume support for floating-point and Advanced SIMD. Do
+ * not change the trapping behavior for these from the KVM default.
*/
- vcpu->arch.hcr_el2 |= hcr_trap_feat_regs | hcr_trap_impdef;
-
- /* Clear res0 and set res1 bits to trap potential new features. */
- vcpu->arch.hcr_el2 &= ~(HCR_RES0);
- vcpu->arch.mdcr_el2 &= ~(MDCR_EL2_RES0);
- if (!has_hvhe()) {
- vcpu->arch.cptr_el2 |= CPTR_NVHE_EL2_RES1;
- vcpu->arch.cptr_el2 &= ~(CPTR_NVHE_EL2_RES0);
- }
-}
-
-static void pkvm_vcpu_reset_hcr(struct kvm_vcpu *vcpu)
-{
- vcpu->arch.hcr_el2 = HCR_GUEST_FLAGS;
-
- if (has_hvhe())
- vcpu->arch.hcr_el2 |= HCR_E2H;
-
- if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN)) {
- /* route synchronous external abort exceptions to EL2 */
- vcpu->arch.hcr_el2 |= HCR_TEA;
- /* trap error record accesses */
- vcpu->arch.hcr_el2 |= HCR_TERR;
- }
-
- if (cpus_have_final_cap(ARM64_HAS_STAGE2_FWB))
- vcpu->arch.hcr_el2 |= HCR_FWB;
+ if (!kvm_has_feat(kvm, ID_AA64PFR0_EL1, FP, IMP) ||
+ !kvm_has_feat(kvm, ID_AA64PFR0_EL1, AdvSIMD, IMP))
+ return -EINVAL;
- if (cpus_have_final_cap(ARM64_HAS_EVT) &&
- !cpus_have_final_cap(ARM64_MISMATCHED_CACHE_TYPE))
- vcpu->arch.hcr_el2 |= HCR_TID4;
- else
- vcpu->arch.hcr_el2 |= HCR_TID2;
+ /* No SME support in KVM right now. Check to catch if it changes. */
+ if (kvm_has_feat(kvm, ID_AA64PFR1_EL1, SME, IMP))
+ return -EINVAL;
- if (vcpu_has_ptrauth(vcpu))
- vcpu->arch.hcr_el2 |= (HCR_API | HCR_APK);
+ return 0;
}
/*
* Initialize trap register values in protected mode.
*/
-static void pkvm_vcpu_init_traps(struct kvm_vcpu *vcpu)
+static int pkvm_vcpu_init_traps(struct pkvm_hyp_vcpu *hyp_vcpu)
{
- vcpu->arch.cptr_el2 = kvm_get_reset_cptr_el2(vcpu);
+ struct kvm_vcpu *vcpu = &hyp_vcpu->vcpu;
+ int ret;
+
vcpu->arch.mdcr_el2 = 0;
pkvm_vcpu_reset_hcr(vcpu);
- if ((!vcpu_is_protected(vcpu)))
- return;
+ if ((!pkvm_hyp_vcpu_is_protected(hyp_vcpu)))
+ return 0;
+
+ ret = pkvm_check_pvm_cpu_features(vcpu);
+ if (ret)
+ return ret;
+
+ pvm_init_traps_hcr(vcpu);
+ pvm_init_traps_mdcr(vcpu);
- pvm_init_trap_regs(vcpu);
- pvm_init_traps_aa64pfr0(vcpu);
- pvm_init_traps_aa64pfr1(vcpu);
- pvm_init_traps_aa64dfr0(vcpu);
- pvm_init_traps_aa64mmfr0(vcpu);
- pvm_init_traps_aa64mmfr1(vcpu);
+ return 0;
}
/*
@@ -270,10 +197,10 @@ static pkvm_handle_t idx_to_vm_handle(unsigned int idx)
/*
* Spinlock for protecting state related to the VM table. Protects writes
- * to 'vm_table' and 'nr_table_entries' as well as reads and writes to
- * 'last_hyp_vcpu_lookup'.
+ * to 'vm_table', 'nr_table_entries', and other per-vm state on initialization.
+ * Also protects reads and writes to 'last_hyp_vcpu_lookup'.
*/
-static DEFINE_HYP_SPINLOCK(vm_table_lock);
+DEFINE_HYP_SPINLOCK(vm_table_lock);
/*
* The table of VM entries for protected VMs in hyp.
@@ -306,15 +233,30 @@ struct pkvm_hyp_vcpu *pkvm_load_hyp_vcpu(pkvm_handle_t handle,
struct pkvm_hyp_vcpu *hyp_vcpu = NULL;
struct pkvm_hyp_vm *hyp_vm;
+ /* Cannot load a new vcpu without putting the old one first. */
+ if (__this_cpu_read(loaded_hyp_vcpu))
+ return NULL;
+
hyp_spin_lock(&vm_table_lock);
hyp_vm = get_vm_by_handle(handle);
if (!hyp_vm || hyp_vm->nr_vcpus <= vcpu_idx)
goto unlock;
hyp_vcpu = hyp_vm->vcpus[vcpu_idx];
+
+ /* Ensure vcpu isn't loaded on more than one cpu simultaneously. */
+ if (unlikely(hyp_vcpu->loaded_hyp_vcpu)) {
+ hyp_vcpu = NULL;
+ goto unlock;
+ }
+
+ hyp_vcpu->loaded_hyp_vcpu = this_cpu_ptr(&loaded_hyp_vcpu);
hyp_page_ref_inc(hyp_virt_to_page(hyp_vm));
unlock:
hyp_spin_unlock(&vm_table_lock);
+
+ if (hyp_vcpu)
+ __this_cpu_write(loaded_hyp_vcpu, hyp_vcpu);
return hyp_vcpu;
}
@@ -323,17 +265,63 @@ void pkvm_put_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu)
struct pkvm_hyp_vm *hyp_vm = pkvm_hyp_vcpu_to_hyp_vm(hyp_vcpu);
hyp_spin_lock(&vm_table_lock);
+ hyp_vcpu->loaded_hyp_vcpu = NULL;
+ __this_cpu_write(loaded_hyp_vcpu, NULL);
+ hyp_page_ref_dec(hyp_virt_to_page(hyp_vm));
+ hyp_spin_unlock(&vm_table_lock);
+}
+
+struct pkvm_hyp_vcpu *pkvm_get_loaded_hyp_vcpu(void)
+{
+ return __this_cpu_read(loaded_hyp_vcpu);
+
+}
+
+struct pkvm_hyp_vm *get_pkvm_hyp_vm(pkvm_handle_t handle)
+{
+ struct pkvm_hyp_vm *hyp_vm;
+
+ hyp_spin_lock(&vm_table_lock);
+ hyp_vm = get_vm_by_handle(handle);
+ if (hyp_vm)
+ hyp_page_ref_inc(hyp_virt_to_page(hyp_vm));
+ hyp_spin_unlock(&vm_table_lock);
+
+ return hyp_vm;
+}
+
+void put_pkvm_hyp_vm(struct pkvm_hyp_vm *hyp_vm)
+{
+ hyp_spin_lock(&vm_table_lock);
hyp_page_ref_dec(hyp_virt_to_page(hyp_vm));
hyp_spin_unlock(&vm_table_lock);
}
+struct pkvm_hyp_vm *get_np_pkvm_hyp_vm(pkvm_handle_t handle)
+{
+ struct pkvm_hyp_vm *hyp_vm = get_pkvm_hyp_vm(handle);
+
+ if (hyp_vm && pkvm_hyp_vm_is_protected(hyp_vm)) {
+ put_pkvm_hyp_vm(hyp_vm);
+ hyp_vm = NULL;
+ }
+
+ return hyp_vm;
+}
+
static void pkvm_init_features_from_host(struct pkvm_hyp_vm *hyp_vm, const struct kvm *host_kvm)
{
struct kvm *kvm = &hyp_vm->kvm;
+ unsigned long host_arch_flags = READ_ONCE(host_kvm->arch.flags);
DECLARE_BITMAP(allowed_features, KVM_VCPU_MAX_FEATURES);
+ if (test_bit(KVM_ARCH_FLAG_MTE_ENABLED, &host_kvm->arch.flags))
+ set_bit(KVM_ARCH_FLAG_MTE_ENABLED, &kvm->arch.flags);
+
/* No restrictions for non-protected VMs. */
if (!kvm_vm_is_protected(kvm)) {
+ hyp_vm->kvm.arch.flags = host_arch_flags;
+
bitmap_copy(kvm->arch.vcpu_features,
host_kvm->arch.vcpu_features,
KVM_VCPU_MAX_FEATURES);
@@ -342,50 +330,26 @@ static void pkvm_init_features_from_host(struct pkvm_hyp_vm *hyp_vm, const struc
bitmap_zero(allowed_features, KVM_VCPU_MAX_FEATURES);
- /*
- * For protected VMs, always allow:
- * - CPU starting in poweroff state
- * - PSCI v0.2
- */
- set_bit(KVM_ARM_VCPU_POWER_OFF, allowed_features);
set_bit(KVM_ARM_VCPU_PSCI_0_2, allowed_features);
- /*
- * Check if remaining features are allowed:
- * - Performance Monitoring
- * - Scalable Vectors
- * - Pointer Authentication
- */
- if (FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer), PVM_ID_AA64DFR0_ALLOW))
+ if (kvm_pvm_ext_allowed(KVM_CAP_ARM_PMU_V3))
set_bit(KVM_ARM_VCPU_PMU_V3, allowed_features);
- if (FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_SVE), PVM_ID_AA64PFR0_ALLOW))
- set_bit(KVM_ARM_VCPU_SVE, allowed_features);
-
- if (FIELD_GET(ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_API), PVM_ID_AA64ISAR1_RESTRICT_UNSIGNED) &&
- FIELD_GET(ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_APA), PVM_ID_AA64ISAR1_RESTRICT_UNSIGNED))
+ if (kvm_pvm_ext_allowed(KVM_CAP_ARM_PTRAUTH_ADDRESS))
set_bit(KVM_ARM_VCPU_PTRAUTH_ADDRESS, allowed_features);
- if (FIELD_GET(ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_GPI), PVM_ID_AA64ISAR1_ALLOW) &&
- FIELD_GET(ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_GPA), PVM_ID_AA64ISAR1_ALLOW))
+ if (kvm_pvm_ext_allowed(KVM_CAP_ARM_PTRAUTH_GENERIC))
set_bit(KVM_ARM_VCPU_PTRAUTH_GENERIC, allowed_features);
+ if (kvm_pvm_ext_allowed(KVM_CAP_ARM_SVE)) {
+ set_bit(KVM_ARM_VCPU_SVE, allowed_features);
+ kvm->arch.flags |= host_arch_flags & BIT(KVM_ARCH_FLAG_GUEST_HAS_SVE);
+ }
+
bitmap_and(kvm->arch.vcpu_features, host_kvm->arch.vcpu_features,
allowed_features, KVM_VCPU_MAX_FEATURES);
}
-static void pkvm_vcpu_init_ptrauth(struct pkvm_hyp_vcpu *hyp_vcpu)
-{
- struct kvm_vcpu *vcpu = &hyp_vcpu->vcpu;
-
- if (vcpu_has_feature(vcpu, KVM_ARM_VCPU_PTRAUTH_ADDRESS) ||
- vcpu_has_feature(vcpu, KVM_ARM_VCPU_PTRAUTH_GENERIC)) {
- kvm_vcpu_enable_ptrauth(vcpu);
- } else {
- vcpu_clear_flag(&hyp_vcpu->vcpu, GUEST_HAS_PTRAUTH);
- }
-}
-
static void unpin_host_vcpu(struct kvm_vcpu *host_vcpu)
{
if (host_vcpu)
@@ -408,6 +372,7 @@ static void init_pkvm_hyp_vm(struct kvm *host_kvm, struct pkvm_hyp_vm *hyp_vm,
hyp_vm->kvm.created_vcpus = nr_vcpus;
hyp_vm->kvm.arch.mmu.vtcr = host_mmu.arch.mmu.vtcr;
hyp_vm->kvm.arch.pkvm.enabled = READ_ONCE(host_kvm->arch.pkvm.enabled);
+ hyp_vm->kvm.arch.flags = 0;
pkvm_init_features_from_host(hyp_vm, host_kvm);
}
@@ -415,10 +380,8 @@ static void pkvm_vcpu_init_sve(struct pkvm_hyp_vcpu *hyp_vcpu, struct kvm_vcpu *
{
struct kvm_vcpu *vcpu = &hyp_vcpu->vcpu;
- if (!vcpu_has_feature(vcpu, KVM_ARM_VCPU_SVE)) {
- vcpu_clear_flag(vcpu, GUEST_HAS_SVE);
+ if (!vcpu_has_feature(vcpu, KVM_ARM_VCPU_SVE))
vcpu_clear_flag(vcpu, VCPU_SVE_FINALIZED);
- }
}
static int init_pkvm_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu,
@@ -446,9 +409,14 @@ static int init_pkvm_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu,
hyp_vcpu->vcpu.arch.cflags = READ_ONCE(host_vcpu->arch.cflags);
hyp_vcpu->vcpu.arch.mp_state.mp_state = KVM_MP_STATE_STOPPED;
+ if (pkvm_hyp_vcpu_is_protected(hyp_vcpu))
+ kvm_init_pvm_id_regs(&hyp_vcpu->vcpu);
+
+ ret = pkvm_vcpu_init_traps(hyp_vcpu);
+ if (ret)
+ goto done;
+
pkvm_vcpu_init_sve(hyp_vcpu, host_vcpu);
- pkvm_vcpu_init_ptrauth(hyp_vcpu);
- pkvm_vcpu_init_traps(&hyp_vcpu->vcpu);
done:
if (ret)
unpin_host_vcpu(host_vcpu);
@@ -693,8 +661,6 @@ unlock:
return ret;
}
- hyp_vcpu->vcpu.arch.cptr_el2 = kvm_get_reset_cptr_el2(&hyp_vcpu->vcpu);
-
return 0;
}
@@ -746,6 +712,14 @@ int __pkvm_teardown_vm(pkvm_handle_t handle)
/* Push the metadata pages to the teardown memcache */
for (idx = 0; idx < hyp_vm->nr_vcpus; ++idx) {
struct pkvm_hyp_vcpu *hyp_vcpu = hyp_vm->vcpus[idx];
+ struct kvm_hyp_memcache *vcpu_mc = &hyp_vcpu->vcpu.arch.pkvm_memcache;
+
+ while (vcpu_mc->nr_pages) {
+ void *addr = pop_hyp_memcache(vcpu_mc, hyp_phys_to_virt);
+
+ push_hyp_memcache(mc, addr, hyp_virt_to_phys);
+ unmap_donated_memory_noclear(addr, PAGE_SIZE);
+ }
teardown_donated_memory(mc, hyp_vcpu, sizeof(*hyp_vcpu));
}
diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c
index cbdd18cd3f98..d62bcb5634a2 100644
--- a/arch/arm64/kvm/hyp/nvhe/setup.c
+++ b/arch/arm64/kvm/hyp/nvhe/setup.c
@@ -12,7 +12,6 @@
#include <nvhe/early_alloc.h>
#include <nvhe/ffa.h>
-#include <nvhe/fixed_config.h>
#include <nvhe/gfp.h>
#include <nvhe/memory.h>
#include <nvhe/mem_protect.h>
@@ -180,7 +179,6 @@ static void hpool_put_page(void *addr)
static int fix_host_ownership_walker(const struct kvm_pgtable_visit_ctx *ctx,
enum kvm_pgtable_walk_flags visit)
{
- enum kvm_pgtable_prot prot;
enum pkvm_page_state state;
phys_addr_t phys;
@@ -203,16 +201,16 @@ static int fix_host_ownership_walker(const struct kvm_pgtable_visit_ctx *ctx,
case PKVM_PAGE_OWNED:
return host_stage2_set_owner_locked(phys, PAGE_SIZE, PKVM_ID_HYP);
case PKVM_PAGE_SHARED_OWNED:
- prot = pkvm_mkstate(PKVM_HOST_MEM_PROT, PKVM_PAGE_SHARED_BORROWED);
+ hyp_phys_to_page(phys)->host_state = PKVM_PAGE_SHARED_BORROWED;
break;
case PKVM_PAGE_SHARED_BORROWED:
- prot = pkvm_mkstate(PKVM_HOST_MEM_PROT, PKVM_PAGE_SHARED_OWNED);
+ hyp_phys_to_page(phys)->host_state = PKVM_PAGE_SHARED_OWNED;
break;
default:
return -EINVAL;
}
- return host_stage2_idmap_locked(phys, PAGE_SIZE, prot);
+ return 0;
}
static int fix_hyp_pgtable_refcnt_walker(const struct kvm_pgtable_visit_ctx *ctx,
diff --git a/arch/arm64/kvm/hyp/nvhe/stacktrace.c b/arch/arm64/kvm/hyp/nvhe/stacktrace.c
index ed6b58b19cfa..5b6eeab1a774 100644
--- a/arch/arm64/kvm/hyp/nvhe/stacktrace.c
+++ b/arch/arm64/kvm/hyp/nvhe/stacktrace.c
@@ -28,7 +28,7 @@ static void hyp_prepare_backtrace(unsigned long fp, unsigned long pc)
struct kvm_nvhe_stacktrace_info *stacktrace_info = this_cpu_ptr(&kvm_stacktrace_info);
struct kvm_nvhe_init_params *params = this_cpu_ptr(&kvm_init_params);
- stacktrace_info->stack_base = (unsigned long)(params->stack_hyp_va - PAGE_SIZE);
+ stacktrace_info->stack_base = (unsigned long)(params->stack_hyp_va - NVHE_STACK_SIZE);
stacktrace_info->overflow_stack_base = (unsigned long)this_cpu_ptr(overflow_stack);
stacktrace_info->fp = fp;
stacktrace_info->pc = pc;
@@ -54,7 +54,7 @@ static struct stack_info stackinfo_get_hyp(void)
{
struct kvm_nvhe_init_params *params = this_cpu_ptr(&kvm_init_params);
unsigned long high = params->stack_hyp_va;
- unsigned long low = high - PAGE_SIZE;
+ unsigned long low = high - NVHE_STACK_SIZE;
return (struct stack_info) {
.low = low,
diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c
index 0f6b01b3da5c..6c846d033d24 100644
--- a/arch/arm64/kvm/hyp/nvhe/switch.c
+++ b/arch/arm64/kvm/hyp/nvhe/switch.c
@@ -26,7 +26,6 @@
#include <asm/debug-monitors.h>
#include <asm/processor.h>
-#include <nvhe/fixed_config.h>
#include <nvhe/mem_protect.h>
/* Non-VHE specific context */
@@ -36,33 +35,46 @@ DEFINE_PER_CPU(unsigned long, kvm_hyp_vector);
extern void kvm_nvhe_prepare_backtrace(unsigned long fp, unsigned long pc);
-static void __activate_traps(struct kvm_vcpu *vcpu)
+static void __activate_cptr_traps(struct kvm_vcpu *vcpu)
{
- u64 val;
+ u64 val = CPTR_EL2_TAM; /* Same bit irrespective of E2H */
- ___activate_traps(vcpu, vcpu->arch.hcr_el2);
- __activate_traps_common(vcpu);
+ if (has_hvhe()) {
+ val |= CPACR_EL1_TTA;
- val = vcpu->arch.cptr_el2;
- val |= CPTR_EL2_TAM; /* Same bit irrespective of E2H */
- val |= has_hvhe() ? CPACR_EL1_TTA : CPTR_EL2_TTA;
- if (cpus_have_final_cap(ARM64_SME)) {
- if (has_hvhe())
- val &= ~CPACR_EL1_SMEN;
- else
- val |= CPTR_EL2_TSM;
- }
+ if (guest_owns_fp_regs()) {
+ val |= CPACR_EL1_FPEN;
+ if (vcpu_has_sve(vcpu))
+ val |= CPACR_EL1_ZEN;
+ }
+ } else {
+ val |= CPTR_EL2_TTA | CPTR_NVHE_EL2_RES1;
- if (!guest_owns_fp_regs()) {
- if (has_hvhe())
- val &= ~(CPACR_EL1_FPEN | CPACR_EL1_ZEN);
- else
- val |= CPTR_EL2_TFP | CPTR_EL2_TZ;
+ /*
+ * Always trap SME since it's not supported in KVM.
+ * TSM is RES1 if SME isn't implemented.
+ */
+ val |= CPTR_EL2_TSM;
- __activate_traps_fpsimd32(vcpu);
+ if (!vcpu_has_sve(vcpu) || !guest_owns_fp_regs())
+ val |= CPTR_EL2_TZ;
+
+ if (!guest_owns_fp_regs())
+ val |= CPTR_EL2_TFP;
}
+ if (!guest_owns_fp_regs())
+ __activate_traps_fpsimd32(vcpu);
+
kvm_write_cptr_el2(val);
+}
+
+static void __activate_traps(struct kvm_vcpu *vcpu)
+{
+ ___activate_traps(vcpu, vcpu->arch.hcr_el2);
+ __activate_traps_common(vcpu);
+ __activate_cptr_traps(vcpu);
+
write_sysreg(__this_cpu_read(kvm_hyp_vector), vbar_el2);
if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) {
diff --git a/arch/arm64/kvm/hyp/nvhe/sys_regs.c b/arch/arm64/kvm/hyp/nvhe/sys_regs.c
index 2860548d4250..1ddd9ed3cbb3 100644
--- a/arch/arm64/kvm/hyp/nvhe/sys_regs.c
+++ b/arch/arm64/kvm/hyp/nvhe/sys_regs.c
@@ -11,7 +11,7 @@
#include <hyp/adjust_pc.h>
-#include <nvhe/fixed_config.h>
+#include <nvhe/pkvm.h>
#include "../../sys_regs.h"
@@ -28,222 +28,255 @@ u64 id_aa64mmfr1_el1_sys_val;
u64 id_aa64mmfr2_el1_sys_val;
u64 id_aa64smfr0_el1_sys_val;
-/*
- * Inject an unknown/undefined exception to an AArch64 guest while most of its
- * sysregs are live.
- */
-static void inject_undef64(struct kvm_vcpu *vcpu)
-{
- u64 esr = (ESR_ELx_EC_UNKNOWN << ESR_ELx_EC_SHIFT);
-
- *vcpu_pc(vcpu) = read_sysreg_el2(SYS_ELR);
- *vcpu_cpsr(vcpu) = read_sysreg_el2(SYS_SPSR);
-
- kvm_pend_exception(vcpu, EXCEPT_AA64_EL1_SYNC);
-
- __kvm_adjust_pc(vcpu);
-
- write_sysreg_el1(esr, SYS_ESR);
- write_sysreg_el1(read_sysreg_el2(SYS_ELR), SYS_ELR);
- write_sysreg_el2(*vcpu_pc(vcpu), SYS_ELR);
- write_sysreg_el2(*vcpu_cpsr(vcpu), SYS_SPSR);
-}
-
-/*
- * Returns the restricted features values of the feature register based on the
- * limitations in restrict_fields.
- * A feature id field value of 0b0000 does not impose any restrictions.
- * Note: Use only for unsigned feature field values.
- */
-static u64 get_restricted_features_unsigned(u64 sys_reg_val,
- u64 restrict_fields)
-{
- u64 value = 0UL;
- u64 mask = GENMASK_ULL(ARM64_FEATURE_FIELD_BITS - 1, 0);
+struct pvm_ftr_bits {
+ bool sign;
+ u8 shift;
+ u8 width;
+ u8 max_val;
+ bool (*vm_supported)(const struct kvm *kvm);
+};
- /*
- * According to the Arm Architecture Reference Manual, feature fields
- * use increasing values to indicate increases in functionality.
- * Iterate over the restricted feature fields and calculate the minimum
- * unsigned value between the one supported by the system, and what the
- * value is being restricted to.
- */
- while (sys_reg_val && restrict_fields) {
- value |= min(sys_reg_val & mask, restrict_fields & mask);
- sys_reg_val &= ~mask;
- restrict_fields &= ~mask;
- mask <<= ARM64_FEATURE_FIELD_BITS;
+#define __MAX_FEAT_FUNC(id, fld, max, func, sgn) \
+ { \
+ .sign = sgn, \
+ .shift = id##_##fld##_SHIFT, \
+ .width = id##_##fld##_WIDTH, \
+ .max_val = id##_##fld##_##max, \
+ .vm_supported = func, \
}
- return value;
-}
-
-/*
- * Functions that return the value of feature id registers for protected VMs
- * based on allowed features, system features, and KVM support.
- */
+#define MAX_FEAT_FUNC(id, fld, max, func) \
+ __MAX_FEAT_FUNC(id, fld, max, func, id##_##fld##_SIGNED)
-static u64 get_pvm_id_aa64pfr0(const struct kvm_vcpu *vcpu)
-{
- u64 set_mask = 0;
- u64 allow_mask = PVM_ID_AA64PFR0_ALLOW;
+#define MAX_FEAT(id, fld, max) \
+ MAX_FEAT_FUNC(id, fld, max, NULL)
- set_mask |= get_restricted_features_unsigned(id_aa64pfr0_el1_sys_val,
- PVM_ID_AA64PFR0_RESTRICT_UNSIGNED);
+#define MAX_FEAT_ENUM(id, fld, max) \
+ __MAX_FEAT_FUNC(id, fld, max, NULL, false)
- return (id_aa64pfr0_el1_sys_val & allow_mask) | set_mask;
-}
+#define FEAT_END { .width = 0, }
-static u64 get_pvm_id_aa64pfr1(const struct kvm_vcpu *vcpu)
+static bool vm_has_ptrauth(const struct kvm *kvm)
{
- const struct kvm *kvm = (const struct kvm *)kern_hyp_va(vcpu->kvm);
- u64 allow_mask = PVM_ID_AA64PFR1_ALLOW;
-
- if (!kvm_has_mte(kvm))
- allow_mask &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MTE);
-
- return id_aa64pfr1_el1_sys_val & allow_mask;
-}
-
-static u64 get_pvm_id_aa64zfr0(const struct kvm_vcpu *vcpu)
-{
- /*
- * No support for Scalable Vectors, therefore, hyp has no sanitized
- * copy of the feature id register.
- */
- BUILD_BUG_ON(PVM_ID_AA64ZFR0_ALLOW != 0ULL);
- return 0;
-}
-
-static u64 get_pvm_id_aa64dfr0(const struct kvm_vcpu *vcpu)
-{
- /*
- * No support for debug, including breakpoints, and watchpoints,
- * therefore, pKVM has no sanitized copy of the feature id register.
- */
- BUILD_BUG_ON(PVM_ID_AA64DFR0_ALLOW != 0ULL);
- return 0;
-}
-
-static u64 get_pvm_id_aa64dfr1(const struct kvm_vcpu *vcpu)
-{
- /*
- * No support for debug, therefore, hyp has no sanitized copy of the
- * feature id register.
- */
- BUILD_BUG_ON(PVM_ID_AA64DFR1_ALLOW != 0ULL);
- return 0;
-}
+ if (!IS_ENABLED(CONFIG_ARM64_PTR_AUTH))
+ return false;
-static u64 get_pvm_id_aa64afr0(const struct kvm_vcpu *vcpu)
-{
- /*
- * No support for implementation defined features, therefore, hyp has no
- * sanitized copy of the feature id register.
- */
- BUILD_BUG_ON(PVM_ID_AA64AFR0_ALLOW != 0ULL);
- return 0;
+ return (cpus_have_final_cap(ARM64_HAS_ADDRESS_AUTH) ||
+ cpus_have_final_cap(ARM64_HAS_GENERIC_AUTH)) &&
+ kvm_vcpu_has_feature(kvm, KVM_ARM_VCPU_PTRAUTH_GENERIC);
}
-static u64 get_pvm_id_aa64afr1(const struct kvm_vcpu *vcpu)
+static bool vm_has_sve(const struct kvm *kvm)
{
- /*
- * No support for implementation defined features, therefore, hyp has no
- * sanitized copy of the feature id register.
- */
- BUILD_BUG_ON(PVM_ID_AA64AFR1_ALLOW != 0ULL);
- return 0;
+ return system_supports_sve() && kvm_vcpu_has_feature(kvm, KVM_ARM_VCPU_SVE);
}
-static u64 get_pvm_id_aa64isar0(const struct kvm_vcpu *vcpu)
-{
- return id_aa64isar0_el1_sys_val & PVM_ID_AA64ISAR0_ALLOW;
-}
+/*
+ * Definitions for features to be allowed or restricted for protected guests.
+ *
+ * Each field in the masks represents the highest supported value for the
+ * feature. If a feature field is not present, it is not supported. Moreover,
+ * these are used to generate the guest's view of the feature registers.
+ *
+ * The approach for protected VMs is to at least support features that are:
+ * - Needed by common Linux distributions (e.g., floating point)
+ * - Trivial to support, e.g., supporting the feature does not introduce or
+ * require tracking of additional state in KVM
+ * - Cannot be trapped or prevent the guest from using anyway
+ */
-static u64 get_pvm_id_aa64isar1(const struct kvm_vcpu *vcpu)
-{
- u64 allow_mask = PVM_ID_AA64ISAR1_ALLOW;
+static const struct pvm_ftr_bits pvmid_aa64pfr0[] = {
+ MAX_FEAT(ID_AA64PFR0_EL1, EL0, IMP),
+ MAX_FEAT(ID_AA64PFR0_EL1, EL1, IMP),
+ MAX_FEAT(ID_AA64PFR0_EL1, EL2, IMP),
+ MAX_FEAT(ID_AA64PFR0_EL1, EL3, IMP),
+ MAX_FEAT(ID_AA64PFR0_EL1, FP, FP16),
+ MAX_FEAT(ID_AA64PFR0_EL1, AdvSIMD, FP16),
+ MAX_FEAT(ID_AA64PFR0_EL1, GIC, IMP),
+ MAX_FEAT_FUNC(ID_AA64PFR0_EL1, SVE, IMP, vm_has_sve),
+ MAX_FEAT(ID_AA64PFR0_EL1, RAS, IMP),
+ MAX_FEAT(ID_AA64PFR0_EL1, DIT, IMP),
+ MAX_FEAT(ID_AA64PFR0_EL1, CSV2, IMP),
+ MAX_FEAT(ID_AA64PFR0_EL1, CSV3, IMP),
+ FEAT_END
+};
- if (!vcpu_has_ptrauth(vcpu))
- allow_mask &= ~(ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_APA) |
- ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_API) |
- ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_GPA) |
- ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_GPI));
+static const struct pvm_ftr_bits pvmid_aa64pfr1[] = {
+ MAX_FEAT(ID_AA64PFR1_EL1, BT, IMP),
+ MAX_FEAT(ID_AA64PFR1_EL1, SSBS, SSBS2),
+ MAX_FEAT_ENUM(ID_AA64PFR1_EL1, MTE_frac, NI),
+ FEAT_END
+};
- return id_aa64isar1_el1_sys_val & allow_mask;
-}
+static const struct pvm_ftr_bits pvmid_aa64mmfr0[] = {
+ MAX_FEAT_ENUM(ID_AA64MMFR0_EL1, PARANGE, 40),
+ MAX_FEAT_ENUM(ID_AA64MMFR0_EL1, ASIDBITS, 16),
+ MAX_FEAT(ID_AA64MMFR0_EL1, BIGEND, IMP),
+ MAX_FEAT(ID_AA64MMFR0_EL1, SNSMEM, IMP),
+ MAX_FEAT(ID_AA64MMFR0_EL1, BIGENDEL0, IMP),
+ MAX_FEAT(ID_AA64MMFR0_EL1, EXS, IMP),
+ FEAT_END
+};
-static u64 get_pvm_id_aa64isar2(const struct kvm_vcpu *vcpu)
-{
- u64 allow_mask = PVM_ID_AA64ISAR2_ALLOW;
+static const struct pvm_ftr_bits pvmid_aa64mmfr1[] = {
+ MAX_FEAT(ID_AA64MMFR1_EL1, HAFDBS, DBM),
+ MAX_FEAT_ENUM(ID_AA64MMFR1_EL1, VMIDBits, 16),
+ MAX_FEAT(ID_AA64MMFR1_EL1, HPDS, HPDS2),
+ MAX_FEAT(ID_AA64MMFR1_EL1, PAN, PAN3),
+ MAX_FEAT(ID_AA64MMFR1_EL1, SpecSEI, IMP),
+ MAX_FEAT(ID_AA64MMFR1_EL1, ETS, IMP),
+ MAX_FEAT(ID_AA64MMFR1_EL1, CMOW, IMP),
+ FEAT_END
+};
- if (!vcpu_has_ptrauth(vcpu))
- allow_mask &= ~(ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_APA3) |
- ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_GPA3));
+static const struct pvm_ftr_bits pvmid_aa64mmfr2[] = {
+ MAX_FEAT(ID_AA64MMFR2_EL1, CnP, IMP),
+ MAX_FEAT(ID_AA64MMFR2_EL1, UAO, IMP),
+ MAX_FEAT(ID_AA64MMFR2_EL1, IESB, IMP),
+ MAX_FEAT(ID_AA64MMFR2_EL1, AT, IMP),
+ MAX_FEAT_ENUM(ID_AA64MMFR2_EL1, IDS, 0x18),
+ MAX_FEAT(ID_AA64MMFR2_EL1, TTL, IMP),
+ MAX_FEAT(ID_AA64MMFR2_EL1, BBM, 2),
+ MAX_FEAT(ID_AA64MMFR2_EL1, E0PD, IMP),
+ FEAT_END
+};
- return id_aa64isar2_el1_sys_val & allow_mask;
-}
+static const struct pvm_ftr_bits pvmid_aa64isar1[] = {
+ MAX_FEAT(ID_AA64ISAR1_EL1, DPB, DPB2),
+ MAX_FEAT_FUNC(ID_AA64ISAR1_EL1, APA, PAuth, vm_has_ptrauth),
+ MAX_FEAT_FUNC(ID_AA64ISAR1_EL1, API, PAuth, vm_has_ptrauth),
+ MAX_FEAT(ID_AA64ISAR1_EL1, JSCVT, IMP),
+ MAX_FEAT(ID_AA64ISAR1_EL1, FCMA, IMP),
+ MAX_FEAT(ID_AA64ISAR1_EL1, LRCPC, LRCPC3),
+ MAX_FEAT(ID_AA64ISAR1_EL1, GPA, IMP),
+ MAX_FEAT(ID_AA64ISAR1_EL1, GPI, IMP),
+ MAX_FEAT(ID_AA64ISAR1_EL1, FRINTTS, IMP),
+ MAX_FEAT(ID_AA64ISAR1_EL1, SB, IMP),
+ MAX_FEAT(ID_AA64ISAR1_EL1, SPECRES, COSP_RCTX),
+ MAX_FEAT(ID_AA64ISAR1_EL1, BF16, EBF16),
+ MAX_FEAT(ID_AA64ISAR1_EL1, DGH, IMP),
+ MAX_FEAT(ID_AA64ISAR1_EL1, I8MM, IMP),
+ FEAT_END
+};
-static u64 get_pvm_id_aa64mmfr0(const struct kvm_vcpu *vcpu)
-{
- u64 set_mask;
+static const struct pvm_ftr_bits pvmid_aa64isar2[] = {
+ MAX_FEAT_FUNC(ID_AA64ISAR2_EL1, GPA3, IMP, vm_has_ptrauth),
+ MAX_FEAT_FUNC(ID_AA64ISAR2_EL1, APA3, PAuth, vm_has_ptrauth),
+ MAX_FEAT(ID_AA64ISAR2_EL1, ATS1A, IMP),
+ FEAT_END
+};
- set_mask = get_restricted_features_unsigned(id_aa64mmfr0_el1_sys_val,
- PVM_ID_AA64MMFR0_RESTRICT_UNSIGNED);
+/*
+ * None of the features in ID_AA64DFR0_EL1 nor ID_AA64MMFR4_EL1 are supported.
+ * However, both have Not-Implemented values that are non-zero. Define them
+ * so they can be used when getting the value of these registers.
+ */
+#define ID_AA64DFR0_EL1_NONZERO_NI \
+( \
+ SYS_FIELD_PREP_ENUM(ID_AA64DFR0_EL1, DoubleLock, NI) | \
+ SYS_FIELD_PREP_ENUM(ID_AA64DFR0_EL1, MTPMU, NI) \
+)
- return (id_aa64mmfr0_el1_sys_val & PVM_ID_AA64MMFR0_ALLOW) | set_mask;
-}
+#define ID_AA64MMFR4_EL1_NONZERO_NI \
+ SYS_FIELD_PREP_ENUM(ID_AA64MMFR4_EL1, E2H0, NI)
-static u64 get_pvm_id_aa64mmfr1(const struct kvm_vcpu *vcpu)
+/*
+ * Returns the value of the feature registers based on the system register
+ * value, the vcpu support for the revelant features, and the additional
+ * restrictions for protected VMs.
+ */
+static u64 get_restricted_features(const struct kvm_vcpu *vcpu,
+ u64 sys_reg_val,
+ const struct pvm_ftr_bits restrictions[])
{
- return id_aa64mmfr1_el1_sys_val & PVM_ID_AA64MMFR1_ALLOW;
-}
+ u64 val = 0UL;
+ int i;
+
+ for (i = 0; restrictions[i].width != 0; i++) {
+ bool (*vm_supported)(const struct kvm *) = restrictions[i].vm_supported;
+ bool sign = restrictions[i].sign;
+ int shift = restrictions[i].shift;
+ int width = restrictions[i].width;
+ u64 min_signed = (1UL << width) - 1UL;
+ u64 sign_bit = 1UL << (width - 1);
+ u64 mask = GENMASK_ULL(width + shift - 1, shift);
+ u64 sys_val = (sys_reg_val & mask) >> shift;
+ u64 pvm_max = restrictions[i].max_val;
+
+ if (vm_supported && !vm_supported(vcpu->kvm))
+ val |= (sign ? min_signed : 0) << shift;
+ else if (sign && (sys_val >= sign_bit || pvm_max >= sign_bit))
+ val |= max(sys_val, pvm_max) << shift;
+ else
+ val |= min(sys_val, pvm_max) << shift;
+ }
-static u64 get_pvm_id_aa64mmfr2(const struct kvm_vcpu *vcpu)
-{
- return id_aa64mmfr2_el1_sys_val & PVM_ID_AA64MMFR2_ALLOW;
+ return val;
}
-/* Read a sanitized cpufeature ID register by its encoding */
-u64 pvm_read_id_reg(const struct kvm_vcpu *vcpu, u32 id)
+static u64 pvm_calc_id_reg(const struct kvm_vcpu *vcpu, u32 id)
{
switch (id) {
case SYS_ID_AA64PFR0_EL1:
- return get_pvm_id_aa64pfr0(vcpu);
+ return get_restricted_features(vcpu, id_aa64pfr0_el1_sys_val, pvmid_aa64pfr0);
case SYS_ID_AA64PFR1_EL1:
- return get_pvm_id_aa64pfr1(vcpu);
- case SYS_ID_AA64ZFR0_EL1:
- return get_pvm_id_aa64zfr0(vcpu);
- case SYS_ID_AA64DFR0_EL1:
- return get_pvm_id_aa64dfr0(vcpu);
- case SYS_ID_AA64DFR1_EL1:
- return get_pvm_id_aa64dfr1(vcpu);
- case SYS_ID_AA64AFR0_EL1:
- return get_pvm_id_aa64afr0(vcpu);
- case SYS_ID_AA64AFR1_EL1:
- return get_pvm_id_aa64afr1(vcpu);
+ return get_restricted_features(vcpu, id_aa64pfr1_el1_sys_val, pvmid_aa64pfr1);
case SYS_ID_AA64ISAR0_EL1:
- return get_pvm_id_aa64isar0(vcpu);
+ return id_aa64isar0_el1_sys_val;
case SYS_ID_AA64ISAR1_EL1:
- return get_pvm_id_aa64isar1(vcpu);
+ return get_restricted_features(vcpu, id_aa64isar1_el1_sys_val, pvmid_aa64isar1);
case SYS_ID_AA64ISAR2_EL1:
- return get_pvm_id_aa64isar2(vcpu);
+ return get_restricted_features(vcpu, id_aa64isar2_el1_sys_val, pvmid_aa64isar2);
case SYS_ID_AA64MMFR0_EL1:
- return get_pvm_id_aa64mmfr0(vcpu);
+ return get_restricted_features(vcpu, id_aa64mmfr0_el1_sys_val, pvmid_aa64mmfr0);
case SYS_ID_AA64MMFR1_EL1:
- return get_pvm_id_aa64mmfr1(vcpu);
+ return get_restricted_features(vcpu, id_aa64mmfr1_el1_sys_val, pvmid_aa64mmfr1);
case SYS_ID_AA64MMFR2_EL1:
- return get_pvm_id_aa64mmfr2(vcpu);
+ return get_restricted_features(vcpu, id_aa64mmfr2_el1_sys_val, pvmid_aa64mmfr2);
+ case SYS_ID_AA64DFR0_EL1:
+ return ID_AA64DFR0_EL1_NONZERO_NI;
+ case SYS_ID_AA64MMFR4_EL1:
+ return ID_AA64MMFR4_EL1_NONZERO_NI;
default:
/* Unhandled ID register, RAZ */
return 0;
}
}
+/*
+ * Inject an unknown/undefined exception to an AArch64 guest while most of its
+ * sysregs are live.
+ */
+static void inject_undef64(struct kvm_vcpu *vcpu)
+{
+ u64 esr = (ESR_ELx_EC_UNKNOWN << ESR_ELx_EC_SHIFT);
+
+ *vcpu_pc(vcpu) = read_sysreg_el2(SYS_ELR);
+ *vcpu_cpsr(vcpu) = read_sysreg_el2(SYS_SPSR);
+
+ kvm_pend_exception(vcpu, EXCEPT_AA64_EL1_SYNC);
+
+ __kvm_adjust_pc(vcpu);
+
+ write_sysreg_el1(esr, SYS_ESR);
+ write_sysreg_el1(read_sysreg_el2(SYS_ELR), SYS_ELR);
+ write_sysreg_el2(*vcpu_pc(vcpu), SYS_ELR);
+ write_sysreg_el2(*vcpu_cpsr(vcpu), SYS_SPSR);
+}
+
static u64 read_id_reg(const struct kvm_vcpu *vcpu,
struct sys_reg_desc const *r)
{
- return pvm_read_id_reg(vcpu, reg_to_encoding(r));
+ struct kvm *kvm = vcpu->kvm;
+ u32 reg = reg_to_encoding(r);
+
+ if (WARN_ON_ONCE(!test_bit(KVM_ARCH_FLAG_ID_REGS_INITIALIZED, &kvm->arch.flags)))
+ return 0;
+
+ if (reg >= sys_reg(3, 0, 0, 1, 0) && reg <= sys_reg(3, 0, 0, 7, 7))
+ return kvm->arch.id_regs[IDREG_IDX(reg)];
+
+ return 0;
}
/* Handler to RAZ/WI sysregs */
@@ -271,13 +304,6 @@ static bool pvm_access_id_aarch32(struct kvm_vcpu *vcpu,
return false;
}
- /*
- * No support for AArch32 guests, therefore, pKVM has no sanitized copy
- * of AArch32 feature id registers.
- */
- BUILD_BUG_ON(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL1),
- PVM_ID_AA64PFR0_RESTRICT_UNSIGNED) > ID_AA64PFR0_EL1_EL1_IMP);
-
return pvm_access_raz_wi(vcpu, p, r);
}
@@ -449,6 +475,30 @@ static const struct sys_reg_desc pvm_sys_reg_descs[] = {
};
/*
+ * Initializes feature registers for protected vms.
+ */
+void kvm_init_pvm_id_regs(struct kvm_vcpu *vcpu)
+{
+ struct kvm *kvm = vcpu->kvm;
+ struct kvm_arch *ka = &kvm->arch;
+ u32 r;
+
+ hyp_assert_lock_held(&vm_table_lock);
+
+ if (test_bit(KVM_ARCH_FLAG_ID_REGS_INITIALIZED, &kvm->arch.flags))
+ return;
+
+ /*
+ * Initialize only AArch64 id registers since AArch32 isn't supported
+ * for protected VMs.
+ */
+ for (r = sys_reg(3, 0, 0, 4, 0); r <= sys_reg(3, 0, 0, 7, 7); r += sys_reg(0, 0, 0, 0, 1))
+ ka->id_regs[IDREG_IDX(r)] = pvm_calc_id_reg(vcpu, r);
+
+ set_bit(KVM_ARCH_FLAG_ID_REGS_INITIALIZED, &kvm->arch.flags);
+}
+
+/*
* Checks that the sysreg table is unique and in-order.
*
* Returns 0 if the table is consistent, or 1 otherwise.
diff --git a/arch/arm64/kvm/hyp/nvhe/timer-sr.c b/arch/arm64/kvm/hyp/nvhe/timer-sr.c
index 3aaab20ae5b4..ff176f4ce7de 100644
--- a/arch/arm64/kvm/hyp/nvhe/timer-sr.c
+++ b/arch/arm64/kvm/hyp/nvhe/timer-sr.c
@@ -22,15 +22,16 @@ void __kvm_timer_set_cntvoff(u64 cntvoff)
*/
void __timer_disable_traps(struct kvm_vcpu *vcpu)
{
- u64 val, shift = 0;
+ u64 set, clr, shift = 0;
if (has_hvhe())
shift = 10;
/* Allow physical timer/counter access for the host */
- val = read_sysreg(cnthctl_el2);
- val |= (CNTHCTL_EL1PCTEN | CNTHCTL_EL1PCEN) << shift;
- write_sysreg(val, cnthctl_el2);
+ set = (CNTHCTL_EL1PCTEN | CNTHCTL_EL1PCEN) << shift;
+ clr = CNTHCTL_EL1TVT | CNTHCTL_EL1TVCT;
+
+ sysreg_clear_set(cnthctl_el2, clr, set);
}
/*
@@ -58,5 +59,12 @@ void __timer_enable_traps(struct kvm_vcpu *vcpu)
set <<= 10;
}
+ /*
+ * Trap the virtual counter/timer if we have a broken cntvoff
+ * implementation.
+ */
+ if (has_broken_cntvoff())
+ set |= CNTHCTL_EL1TVT | CNTHCTL_EL1TVCT;
+
sysreg_clear_set(cnthctl_el2, clr, set);
}
diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index d2b6fa051d6b..df5cc74a7dd0 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -1232,14 +1232,13 @@ int kvm_pgtable_stage2_wrprotect(struct kvm_pgtable *pgt, u64 addr, u64 size)
NULL, NULL, 0);
}
-void kvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr)
+void kvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr,
+ enum kvm_pgtable_walk_flags flags)
{
int ret;
ret = stage2_update_leaf_attrs(pgt, addr, 1, KVM_PTE_LEAF_ATTR_LO_S2_AF, 0,
- NULL, NULL,
- KVM_PGTABLE_WALK_HANDLE_FAULT |
- KVM_PGTABLE_WALK_SHARED);
+ NULL, NULL, flags);
if (!ret)
dsb(ishst);
}
@@ -1295,7 +1294,7 @@ bool kvm_pgtable_stage2_test_clear_young(struct kvm_pgtable *pgt, u64 addr,
}
int kvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr,
- enum kvm_pgtable_prot prot)
+ enum kvm_pgtable_prot prot, enum kvm_pgtable_walk_flags flags)
{
int ret;
s8 level;
@@ -1313,9 +1312,7 @@ int kvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr,
if (prot & KVM_PGTABLE_PROT_X)
clr |= KVM_PTE_LEAF_ATTR_HI_S2_XN;
- ret = stage2_update_leaf_attrs(pgt, addr, 1, set, clr, NULL, &level,
- KVM_PGTABLE_WALK_HANDLE_FAULT |
- KVM_PGTABLE_WALK_SHARED);
+ ret = stage2_update_leaf_attrs(pgt, addr, 1, set, clr, NULL, &level, flags);
if (!ret || ret == -EAGAIN)
kvm_call_hyp(__kvm_tlb_flush_vmid_ipa_nsh, pgt->mmu, addr, level);
return ret;
diff --git a/arch/arm64/kvm/hyp/vhe/debug-sr.c b/arch/arm64/kvm/hyp/vhe/debug-sr.c
index 289689b2682d..0100339b09e0 100644
--- a/arch/arm64/kvm/hyp/vhe/debug-sr.c
+++ b/arch/arm64/kvm/hyp/vhe/debug-sr.c
@@ -19,8 +19,3 @@ void __debug_switch_to_host(struct kvm_vcpu *vcpu)
{
__debug_switch_to_host_common(vcpu);
}
-
-u64 __kvm_get_mdcr_el2(void)
-{
- return read_sysreg(mdcr_el2);
-}
diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c
index 59d992455793..b5b9dbaf1fdd 100644
--- a/arch/arm64/kvm/hyp/vhe/switch.c
+++ b/arch/arm64/kvm/hyp/vhe/switch.c
@@ -256,6 +256,110 @@ void kvm_vcpu_put_vhe(struct kvm_vcpu *vcpu)
host_data_ptr(host_ctxt)->__hyp_running_vcpu = NULL;
}
+static u64 compute_emulated_cntx_ctl_el0(struct kvm_vcpu *vcpu,
+ enum vcpu_sysreg reg)
+{
+ unsigned long ctl;
+ u64 cval, cnt;
+ bool stat;
+
+ switch (reg) {
+ case CNTP_CTL_EL0:
+ cval = __vcpu_sys_reg(vcpu, CNTP_CVAL_EL0);
+ ctl = __vcpu_sys_reg(vcpu, CNTP_CTL_EL0);
+ cnt = compute_counter_value(vcpu_ptimer(vcpu));
+ break;
+ case CNTV_CTL_EL0:
+ cval = __vcpu_sys_reg(vcpu, CNTV_CVAL_EL0);
+ ctl = __vcpu_sys_reg(vcpu, CNTV_CTL_EL0);
+ cnt = compute_counter_value(vcpu_vtimer(vcpu));
+ break;
+ default:
+ BUG();
+ }
+
+ stat = cval <= cnt;
+ __assign_bit(__ffs(ARCH_TIMER_CTRL_IT_STAT), &ctl, stat);
+
+ return ctl;
+}
+
+static bool kvm_hyp_handle_timer(struct kvm_vcpu *vcpu, u64 *exit_code)
+{
+ u64 esr, val;
+
+ /*
+ * Having FEAT_ECV allows for a better quality of timer emulation.
+ * However, this comes at a huge cost in terms of traps. Try and
+ * satisfy the reads from guest's hypervisor context without
+ * returning to the kernel if we can.
+ */
+ if (!is_hyp_ctxt(vcpu))
+ return false;
+
+ esr = kvm_vcpu_get_esr(vcpu);
+ if ((esr & ESR_ELx_SYS64_ISS_DIR_MASK) != ESR_ELx_SYS64_ISS_DIR_READ)
+ return false;
+
+ switch (esr_sys64_to_sysreg(esr)) {
+ case SYS_CNTP_CTL_EL02:
+ val = compute_emulated_cntx_ctl_el0(vcpu, CNTP_CTL_EL0);
+ break;
+ case SYS_CNTP_CTL_EL0:
+ if (vcpu_el2_e2h_is_set(vcpu))
+ val = read_sysreg_el0(SYS_CNTP_CTL);
+ else
+ val = compute_emulated_cntx_ctl_el0(vcpu, CNTP_CTL_EL0);
+ break;
+ case SYS_CNTP_CVAL_EL02:
+ val = __vcpu_sys_reg(vcpu, CNTP_CVAL_EL0);
+ break;
+ case SYS_CNTP_CVAL_EL0:
+ if (vcpu_el2_e2h_is_set(vcpu)) {
+ val = read_sysreg_el0(SYS_CNTP_CVAL);
+
+ if (!has_cntpoff())
+ val -= timer_get_offset(vcpu_hptimer(vcpu));
+ } else {
+ val = __vcpu_sys_reg(vcpu, CNTP_CVAL_EL0);
+ }
+ break;
+ case SYS_CNTPCT_EL0:
+ case SYS_CNTPCTSS_EL0:
+ val = compute_counter_value(vcpu_hptimer(vcpu));
+ break;
+ case SYS_CNTV_CTL_EL02:
+ val = compute_emulated_cntx_ctl_el0(vcpu, CNTV_CTL_EL0);
+ break;
+ case SYS_CNTV_CTL_EL0:
+ if (vcpu_el2_e2h_is_set(vcpu))
+ val = read_sysreg_el0(SYS_CNTV_CTL);
+ else
+ val = compute_emulated_cntx_ctl_el0(vcpu, CNTV_CTL_EL0);
+ break;
+ case SYS_CNTV_CVAL_EL02:
+ val = __vcpu_sys_reg(vcpu, CNTV_CVAL_EL0);
+ break;
+ case SYS_CNTV_CVAL_EL0:
+ if (vcpu_el2_e2h_is_set(vcpu))
+ val = read_sysreg_el0(SYS_CNTV_CVAL);
+ else
+ val = __vcpu_sys_reg(vcpu, CNTV_CVAL_EL0);
+ break;
+ case SYS_CNTVCT_EL0:
+ case SYS_CNTVCTSS_EL0:
+ val = compute_counter_value(vcpu_hvtimer(vcpu));
+ break;
+ default:
+ return false;
+ }
+
+ vcpu_set_reg(vcpu, kvm_vcpu_sys_get_rt(vcpu), val);
+ __kvm_skip_instr(vcpu);
+
+ return true;
+}
+
static bool kvm_hyp_handle_eret(struct kvm_vcpu *vcpu, u64 *exit_code)
{
u64 esr = kvm_vcpu_get_esr(vcpu);
@@ -409,6 +513,9 @@ static bool kvm_hyp_handle_sysreg_vhe(struct kvm_vcpu *vcpu, u64 *exit_code)
if (kvm_hyp_handle_tlbi_el2(vcpu, exit_code))
return true;
+ if (kvm_hyp_handle_timer(vcpu, exit_code))
+ return true;
+
if (kvm_hyp_handle_cpacr_el1(vcpu, exit_code))
return true;
diff --git a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c
index 5f78a39053a7..90b018e06f2c 100644
--- a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c
+++ b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c
@@ -216,7 +216,7 @@ void __vcpu_load_switch_sysregs(struct kvm_vcpu *vcpu)
__sysreg32_restore_state(vcpu);
__sysreg_restore_user_state(guest_ctxt);
- if (unlikely(__is_hyp_ctxt(guest_ctxt))) {
+ if (unlikely(is_hyp_ctxt(vcpu))) {
__sysreg_restore_vel2_state(vcpu);
} else {
if (vcpu_has_nv(vcpu)) {
@@ -260,7 +260,7 @@ void __vcpu_put_switch_sysregs(struct kvm_vcpu *vcpu)
host_ctxt = host_data_ptr(host_ctxt);
- if (unlikely(__is_hyp_ctxt(guest_ctxt)))
+ if (unlikely(is_hyp_ctxt(vcpu)))
__sysreg_save_vel2_state(vcpu);
else
__sysreg_save_el1_state(guest_ctxt);
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index c9d46ad57e52..1f55b0c7b11d 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -15,6 +15,7 @@
#include <asm/kvm_arm.h>
#include <asm/kvm_mmu.h>
#include <asm/kvm_pgtable.h>
+#include <asm/kvm_pkvm.h>
#include <asm/kvm_ras.h>
#include <asm/kvm_asm.h>
#include <asm/kvm_emulate.h>
@@ -29,8 +30,12 @@ static unsigned long __ro_after_init hyp_idmap_start;
static unsigned long __ro_after_init hyp_idmap_end;
static phys_addr_t __ro_after_init hyp_idmap_vector;
+u32 __ro_after_init __hyp_va_bits;
+
static unsigned long __ro_after_init io_map_base;
+#define KVM_PGT_FN(fn) (!is_protected_kvm_enabled() ? fn : p ## fn)
+
static phys_addr_t __stage2_range_addr_end(phys_addr_t addr, phys_addr_t end,
phys_addr_t size)
{
@@ -147,7 +152,7 @@ static int kvm_mmu_split_huge_pages(struct kvm *kvm, phys_addr_t addr,
return -EINVAL;
next = __stage2_range_addr_end(addr, end, chunk_size);
- ret = kvm_pgtable_stage2_split(pgt, addr, next - addr, cache);
+ ret = KVM_PGT_FN(kvm_pgtable_stage2_split)(pgt, addr, next - addr, cache);
if (ret)
break;
} while (addr = next, addr != end);
@@ -168,15 +173,23 @@ static bool memslot_is_logging(struct kvm_memory_slot *memslot)
*/
int kvm_arch_flush_remote_tlbs(struct kvm *kvm)
{
- kvm_call_hyp(__kvm_tlb_flush_vmid, &kvm->arch.mmu);
+ if (is_protected_kvm_enabled())
+ kvm_call_hyp_nvhe(__pkvm_tlb_flush_vmid, kvm->arch.pkvm.handle);
+ else
+ kvm_call_hyp(__kvm_tlb_flush_vmid, &kvm->arch.mmu);
return 0;
}
int kvm_arch_flush_remote_tlbs_range(struct kvm *kvm,
gfn_t gfn, u64 nr_pages)
{
- kvm_tlb_flush_vmid_range(&kvm->arch.mmu,
- gfn << PAGE_SHIFT, nr_pages << PAGE_SHIFT);
+ u64 size = nr_pages << PAGE_SHIFT;
+ u64 addr = gfn << PAGE_SHIFT;
+
+ if (is_protected_kvm_enabled())
+ kvm_call_hyp_nvhe(__pkvm_tlb_flush_vmid, kvm->arch.pkvm.handle);
+ else
+ kvm_tlb_flush_vmid_range(&kvm->arch.mmu, addr, size);
return 0;
}
@@ -225,7 +238,7 @@ static void stage2_free_unlinked_table_rcu_cb(struct rcu_head *head)
void *pgtable = page_to_virt(page);
s8 level = page_private(page);
- kvm_pgtable_stage2_free_unlinked(&kvm_s2_mm_ops, pgtable, level);
+ KVM_PGT_FN(kvm_pgtable_stage2_free_unlinked)(&kvm_s2_mm_ops, pgtable, level);
}
static void stage2_free_unlinked_table(void *addr, s8 level)
@@ -324,7 +337,7 @@ static void __unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64
lockdep_assert_held_write(&kvm->mmu_lock);
WARN_ON(size & ~PAGE_MASK);
- WARN_ON(stage2_apply_range(mmu, start, end, kvm_pgtable_stage2_unmap,
+ WARN_ON(stage2_apply_range(mmu, start, end, KVM_PGT_FN(kvm_pgtable_stage2_unmap),
may_block));
}
@@ -336,7 +349,7 @@ void kvm_stage2_unmap_range(struct kvm_s2_mmu *mmu, phys_addr_t start,
void kvm_stage2_flush_range(struct kvm_s2_mmu *mmu, phys_addr_t addr, phys_addr_t end)
{
- stage2_apply_range_resched(mmu, addr, end, kvm_pgtable_stage2_flush);
+ stage2_apply_range_resched(mmu, addr, end, KVM_PGT_FN(kvm_pgtable_stage2_flush));
}
static void stage2_flush_memslot(struct kvm *kvm,
@@ -704,10 +717,10 @@ int create_hyp_stack(phys_addr_t phys_addr, unsigned long *haddr)
mutex_lock(&kvm_hyp_pgd_mutex);
/*
- * Efficient stack verification using the PAGE_SHIFT bit implies
+ * Efficient stack verification using the NVHE_STACK_SHIFT bit implies
* an alignment of our allocation on the order of the size.
*/
- size = PAGE_SIZE * 2;
+ size = NVHE_STACK_SIZE * 2;
base = ALIGN_DOWN(io_map_base - size, size);
ret = __hyp_alloc_private_va_range(base);
@@ -724,12 +737,12 @@ int create_hyp_stack(phys_addr_t phys_addr, unsigned long *haddr)
* at the higher address and leave the lower guard page
* unbacked.
*
- * Any valid stack address now has the PAGE_SHIFT bit as 1
+ * Any valid stack address now has the NVHE_STACK_SHIFT bit as 1
* and addresses corresponding to the guard page have the
- * PAGE_SHIFT bit as 0 - this is used for overflow detection.
+ * NVHE_STACK_SHIFT bit as 0 - this is used for overflow detection.
*/
- ret = __create_hyp_mappings(base + PAGE_SIZE, PAGE_SIZE, phys_addr,
- PAGE_HYP);
+ ret = __create_hyp_mappings(base + NVHE_STACK_SIZE, NVHE_STACK_SIZE,
+ phys_addr, PAGE_HYP);
if (ret)
kvm_err("Cannot map hyp stack\n");
@@ -942,10 +955,14 @@ int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu, unsigned long t
return -ENOMEM;
mmu->arch = &kvm->arch;
- err = kvm_pgtable_stage2_init(pgt, mmu, &kvm_s2_mm_ops);
+ err = KVM_PGT_FN(kvm_pgtable_stage2_init)(pgt, mmu, &kvm_s2_mm_ops);
if (err)
goto out_free_pgtable;
+ mmu->pgt = pgt;
+ if (is_protected_kvm_enabled())
+ return 0;
+
mmu->last_vcpu_ran = alloc_percpu(typeof(*mmu->last_vcpu_ran));
if (!mmu->last_vcpu_ran) {
err = -ENOMEM;
@@ -959,7 +976,6 @@ int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu, unsigned long t
mmu->split_page_chunk_size = KVM_ARM_EAGER_SPLIT_CHUNK_SIZE_DEFAULT;
mmu->split_page_cache.gfp_zero = __GFP_ZERO;
- mmu->pgt = pgt;
mmu->pgd_phys = __pa(pgt->pgd);
if (kvm_is_nested_s2_mmu(kvm, mmu))
@@ -968,7 +984,7 @@ int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu, unsigned long t
return 0;
out_destroy_pgtable:
- kvm_pgtable_stage2_destroy(pgt);
+ KVM_PGT_FN(kvm_pgtable_stage2_destroy)(pgt);
out_free_pgtable:
kfree(pgt);
return err;
@@ -1065,7 +1081,7 @@ void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu)
write_unlock(&kvm->mmu_lock);
if (pgt) {
- kvm_pgtable_stage2_destroy(pgt);
+ KVM_PGT_FN(kvm_pgtable_stage2_destroy)(pgt);
kfree(pgt);
}
}
@@ -1082,9 +1098,11 @@ static void *hyp_mc_alloc_fn(void *unused)
void free_hyp_memcache(struct kvm_hyp_memcache *mc)
{
- if (is_protected_kvm_enabled())
- __free_hyp_memcache(mc, hyp_mc_free_fn,
- kvm_host_va, NULL);
+ if (!is_protected_kvm_enabled())
+ return;
+
+ kfree(mc->mapping);
+ __free_hyp_memcache(mc, hyp_mc_free_fn, kvm_host_va, NULL);
}
int topup_hyp_memcache(struct kvm_hyp_memcache *mc, unsigned long min_pages)
@@ -1092,6 +1110,12 @@ int topup_hyp_memcache(struct kvm_hyp_memcache *mc, unsigned long min_pages)
if (!is_protected_kvm_enabled())
return 0;
+ if (!mc->mapping) {
+ mc->mapping = kzalloc(sizeof(struct pkvm_mapping), GFP_KERNEL_ACCOUNT);
+ if (!mc->mapping)
+ return -ENOMEM;
+ }
+
return __topup_hyp_memcache(mc, min_pages, hyp_mc_alloc_fn,
kvm_host_pa, NULL);
}
@@ -1130,8 +1154,8 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
break;
write_lock(&kvm->mmu_lock);
- ret = kvm_pgtable_stage2_map(pgt, addr, PAGE_SIZE, pa, prot,
- &cache, 0);
+ ret = KVM_PGT_FN(kvm_pgtable_stage2_map)(pgt, addr, PAGE_SIZE,
+ pa, prot, &cache, 0);
write_unlock(&kvm->mmu_lock);
if (ret)
break;
@@ -1151,7 +1175,7 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
*/
void kvm_stage2_wp_range(struct kvm_s2_mmu *mmu, phys_addr_t addr, phys_addr_t end)
{
- stage2_apply_range_resched(mmu, addr, end, kvm_pgtable_stage2_wrprotect);
+ stage2_apply_range_resched(mmu, addr, end, KVM_PGT_FN(kvm_pgtable_stage2_wrprotect));
}
/**
@@ -1442,9 +1466,9 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
unsigned long mmu_seq;
phys_addr_t ipa = fault_ipa;
struct kvm *kvm = vcpu->kvm;
- struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache;
struct vm_area_struct *vma;
short vma_shift;
+ void *memcache;
gfn_t gfn;
kvm_pfn_t pfn;
bool logging_active = memslot_is_logging(memslot);
@@ -1452,6 +1476,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R;
struct kvm_pgtable *pgt;
struct page *page;
+ enum kvm_pgtable_walk_flags flags = KVM_PGTABLE_WALK_HANDLE_FAULT | KVM_PGTABLE_WALK_SHARED;
if (fault_is_perm)
fault_granule = kvm_vcpu_trap_get_perm_fault_granule(vcpu);
@@ -1471,8 +1496,15 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
* and a write fault needs to collapse a block entry into a table.
*/
if (!fault_is_perm || (logging_active && write_fault)) {
- ret = kvm_mmu_topup_memory_cache(memcache,
- kvm_mmu_cache_min_pages(vcpu->arch.hw_mmu));
+ int min_pages = kvm_mmu_cache_min_pages(vcpu->arch.hw_mmu);
+
+ if (!is_protected_kvm_enabled()) {
+ memcache = &vcpu->arch.mmu_page_cache;
+ ret = kvm_mmu_topup_memory_cache(memcache, min_pages);
+ } else {
+ memcache = &vcpu->arch.pkvm_memcache;
+ ret = topup_hyp_memcache(memcache, min_pages);
+ }
if (ret)
return ret;
}
@@ -1493,7 +1525,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
* logging_active is guaranteed to never be true for VM_PFNMAP
* memslots.
*/
- if (logging_active) {
+ if (logging_active || is_protected_kvm_enabled()) {
force_pte = true;
vma_shift = PAGE_SHIFT;
} else {
@@ -1633,7 +1665,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
prot |= kvm_encode_nested_level(nested);
}
- read_lock(&kvm->mmu_lock);
+ kvm_fault_lock(kvm);
pgt = vcpu->arch.hw_mmu->pgt;
if (mmu_invalidate_retry(kvm, mmu_seq)) {
ret = -EAGAIN;
@@ -1695,18 +1727,16 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
* PTE, which will be preserved.
*/
prot &= ~KVM_NV_GUEST_MAP_SZ;
- ret = kvm_pgtable_stage2_relax_perms(pgt, fault_ipa, prot);
+ ret = KVM_PGT_FN(kvm_pgtable_stage2_relax_perms)(pgt, fault_ipa, prot, flags);
} else {
- ret = kvm_pgtable_stage2_map(pgt, fault_ipa, vma_pagesize,
+ ret = KVM_PGT_FN(kvm_pgtable_stage2_map)(pgt, fault_ipa, vma_pagesize,
__pfn_to_phys(pfn), prot,
- memcache,
- KVM_PGTABLE_WALK_HANDLE_FAULT |
- KVM_PGTABLE_WALK_SHARED);
+ memcache, flags);
}
out_unlock:
kvm_release_faultin_page(kvm, page, !!ret, writable);
- read_unlock(&kvm->mmu_lock);
+ kvm_fault_unlock(kvm);
/* Mark the page dirty only if the fault is handled successfully */
if (writable && !ret)
@@ -1718,13 +1748,14 @@ out_unlock:
/* Resolve the access fault by making the page young again. */
static void handle_access_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa)
{
+ enum kvm_pgtable_walk_flags flags = KVM_PGTABLE_WALK_HANDLE_FAULT | KVM_PGTABLE_WALK_SHARED;
struct kvm_s2_mmu *mmu;
trace_kvm_access_fault(fault_ipa);
read_lock(&vcpu->kvm->mmu_lock);
mmu = vcpu->arch.hw_mmu;
- kvm_pgtable_stage2_mkyoung(mmu->pgt, fault_ipa);
+ KVM_PGT_FN(kvm_pgtable_stage2_mkyoung)(mmu->pgt, fault_ipa, flags);
read_unlock(&vcpu->kvm->mmu_lock);
}
@@ -1764,7 +1795,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
}
/* Falls between the IPA range and the PARange? */
- if (fault_ipa >= BIT_ULL(vcpu->arch.hw_mmu->pgt->ia_bits)) {
+ if (fault_ipa >= BIT_ULL(VTCR_EL2_IPA(vcpu->arch.hw_mmu->vtcr))) {
fault_ipa |= kvm_vcpu_get_hfar(vcpu) & GENMASK(11, 0);
if (is_iabt)
@@ -1930,7 +1961,7 @@ bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
if (!kvm->arch.mmu.pgt)
return false;
- return kvm_pgtable_stage2_test_clear_young(kvm->arch.mmu.pgt,
+ return KVM_PGT_FN(kvm_pgtable_stage2_test_clear_young)(kvm->arch.mmu.pgt,
range->start << PAGE_SHIFT,
size, true);
/*
@@ -1946,7 +1977,7 @@ bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
if (!kvm->arch.mmu.pgt)
return false;
- return kvm_pgtable_stage2_test_clear_young(kvm->arch.mmu.pgt,
+ return KVM_PGT_FN(kvm_pgtable_stage2_test_clear_young)(kvm->arch.mmu.pgt,
range->start << PAGE_SHIFT,
size, false);
}
@@ -2056,6 +2087,7 @@ int __init kvm_mmu_init(u32 *hyp_va_bits)
goto out_destroy_pgtable;
io_map_base = hyp_idmap_start;
+ __hyp_va_bits = *hyp_va_bits;
return 0;
out_destroy_pgtable:
diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c
index 9b36218b48de..33d2ace68665 100644
--- a/arch/arm64/kvm/nested.c
+++ b/arch/arm64/kvm/nested.c
@@ -830,8 +830,10 @@ static void limit_nv_id_regs(struct kvm *kvm)
NV_FTR(PFR0, RAS) |
NV_FTR(PFR0, EL3) |
NV_FTR(PFR0, EL2) |
- NV_FTR(PFR0, EL1));
- /* 64bit EL1/EL2/EL3 only */
+ NV_FTR(PFR0, EL1) |
+ NV_FTR(PFR0, EL0));
+ /* 64bit only at any EL */
+ val |= FIELD_PREP(NV_FTR(PFR0, EL0), 0b0001);
val |= FIELD_PREP(NV_FTR(PFR0, EL1), 0b0001);
val |= FIELD_PREP(NV_FTR(PFR0, EL2), 0b0001);
val |= FIELD_PREP(NV_FTR(PFR0, EL3), 0b0001);
@@ -963,14 +965,15 @@ static __always_inline void set_sysreg_masks(struct kvm *kvm, int sr, u64 res0,
kvm->arch.sysreg_masks->mask[i].res1 = res1;
}
-int kvm_init_nv_sysregs(struct kvm *kvm)
+int kvm_init_nv_sysregs(struct kvm_vcpu *vcpu)
{
+ struct kvm *kvm = vcpu->kvm;
u64 res0, res1;
lockdep_assert_held(&kvm->arch.config_lock);
if (kvm->arch.sysreg_masks)
- return 0;
+ goto out;
kvm->arch.sysreg_masks = kzalloc(sizeof(*(kvm->arch.sysreg_masks)),
GFP_KERNEL_ACCOUNT);
@@ -1021,8 +1024,8 @@ int kvm_init_nv_sysregs(struct kvm *kvm)
res0 |= HCR_NV2;
if (!kvm_has_feat(kvm, ID_AA64MMFR2_EL1, NV, IMP))
res0 |= (HCR_AT | HCR_NV1 | HCR_NV);
- if (!(__vcpu_has_feature(&kvm->arch, KVM_ARM_VCPU_PTRAUTH_ADDRESS) &&
- __vcpu_has_feature(&kvm->arch, KVM_ARM_VCPU_PTRAUTH_GENERIC)))
+ if (!(kvm_vcpu_has_feature(kvm, KVM_ARM_VCPU_PTRAUTH_ADDRESS) &&
+ kvm_vcpu_has_feature(kvm, KVM_ARM_VCPU_PTRAUTH_GENERIC)))
res0 |= (HCR_API | HCR_APK);
if (!kvm_has_feat(kvm, ID_AA64ISAR0_EL1, TME, IMP))
res0 |= BIT(39);
@@ -1078,8 +1081,8 @@ int kvm_init_nv_sysregs(struct kvm *kvm)
/* HFG[RW]TR_EL2 */
res0 = res1 = 0;
- if (!(__vcpu_has_feature(&kvm->arch, KVM_ARM_VCPU_PTRAUTH_ADDRESS) &&
- __vcpu_has_feature(&kvm->arch, KVM_ARM_VCPU_PTRAUTH_GENERIC)))
+ if (!(kvm_vcpu_has_feature(kvm, KVM_ARM_VCPU_PTRAUTH_ADDRESS) &&
+ kvm_vcpu_has_feature(kvm, KVM_ARM_VCPU_PTRAUTH_GENERIC)))
res0 |= (HFGxTR_EL2_APDAKey | HFGxTR_EL2_APDBKey |
HFGxTR_EL2_APGAKey | HFGxTR_EL2_APIAKey |
HFGxTR_EL2_APIBKey);
@@ -1271,6 +1274,25 @@ int kvm_init_nv_sysregs(struct kvm *kvm)
res0 |= MDCR_EL2_EnSTEPOP;
set_sysreg_masks(kvm, MDCR_EL2, res0, res1);
+ /* CNTHCTL_EL2 */
+ res0 = GENMASK(63, 20);
+ res1 = 0;
+ if (!kvm_has_feat(kvm, ID_AA64PFR0_EL1, RME, IMP))
+ res0 |= CNTHCTL_CNTPMASK | CNTHCTL_CNTVMASK;
+ if (!kvm_has_feat(kvm, ID_AA64MMFR0_EL1, ECV, CNTPOFF)) {
+ res0 |= CNTHCTL_ECV;
+ if (!kvm_has_feat(kvm, ID_AA64MMFR0_EL1, ECV, IMP))
+ res0 |= (CNTHCTL_EL1TVT | CNTHCTL_EL1TVCT |
+ CNTHCTL_EL1NVPCT | CNTHCTL_EL1NVVCT);
+ }
+ if (!kvm_has_feat(kvm, ID_AA64MMFR1_EL1, VH, IMP))
+ res0 |= GENMASK(11, 8);
+ set_sysreg_masks(kvm, CNTHCTL_EL2, res0, res1);
+
+out:
+ for (enum vcpu_sysreg sr = __SANITISED_REG_START__; sr < NR_SYS_REGS; sr++)
+ (void)__vcpu_sys_reg(vcpu, sr);
+
return 0;
}
diff --git a/arch/arm64/kvm/pkvm.c b/arch/arm64/kvm/pkvm.c
index 85117ea8f351..930b677eb9b0 100644
--- a/arch/arm64/kvm/pkvm.c
+++ b/arch/arm64/kvm/pkvm.c
@@ -7,6 +7,7 @@
#include <linux/init.h>
#include <linux/kmemleak.h>
#include <linux/kvm_host.h>
+#include <asm/kvm_mmu.h>
#include <linux/memblock.h>
#include <linux/mutex.h>
#include <linux/sort.h>
@@ -268,3 +269,203 @@ static int __init finalize_pkvm(void)
return ret;
}
device_initcall_sync(finalize_pkvm);
+
+static int cmp_mappings(struct rb_node *node, const struct rb_node *parent)
+{
+ struct pkvm_mapping *a = rb_entry(node, struct pkvm_mapping, node);
+ struct pkvm_mapping *b = rb_entry(parent, struct pkvm_mapping, node);
+
+ if (a->gfn < b->gfn)
+ return -1;
+ if (a->gfn > b->gfn)
+ return 1;
+ return 0;
+}
+
+static struct rb_node *find_first_mapping_node(struct rb_root *root, u64 gfn)
+{
+ struct rb_node *node = root->rb_node, *prev = NULL;
+ struct pkvm_mapping *mapping;
+
+ while (node) {
+ mapping = rb_entry(node, struct pkvm_mapping, node);
+ if (mapping->gfn == gfn)
+ return node;
+ prev = node;
+ node = (gfn < mapping->gfn) ? node->rb_left : node->rb_right;
+ }
+
+ return prev;
+}
+
+/*
+ * __tmp is updated to rb_next(__tmp) *before* entering the body of the loop to allow freeing
+ * of __map inline.
+ */
+#define for_each_mapping_in_range_safe(__pgt, __start, __end, __map) \
+ for (struct rb_node *__tmp = find_first_mapping_node(&(__pgt)->pkvm_mappings, \
+ ((__start) >> PAGE_SHIFT)); \
+ __tmp && ({ \
+ __map = rb_entry(__tmp, struct pkvm_mapping, node); \
+ __tmp = rb_next(__tmp); \
+ true; \
+ }); \
+ ) \
+ if (__map->gfn < ((__start) >> PAGE_SHIFT)) \
+ continue; \
+ else if (__map->gfn >= ((__end) >> PAGE_SHIFT)) \
+ break; \
+ else
+
+int pkvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu,
+ struct kvm_pgtable_mm_ops *mm_ops)
+{
+ pgt->pkvm_mappings = RB_ROOT;
+ pgt->mmu = mmu;
+
+ return 0;
+}
+
+void pkvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt)
+{
+ struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu);
+ pkvm_handle_t handle = kvm->arch.pkvm.handle;
+ struct pkvm_mapping *mapping;
+ struct rb_node *node;
+
+ if (!handle)
+ return;
+
+ node = rb_first(&pgt->pkvm_mappings);
+ while (node) {
+ mapping = rb_entry(node, struct pkvm_mapping, node);
+ kvm_call_hyp_nvhe(__pkvm_host_unshare_guest, handle, mapping->gfn);
+ node = rb_next(node);
+ rb_erase(&mapping->node, &pgt->pkvm_mappings);
+ kfree(mapping);
+ }
+}
+
+int pkvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size,
+ u64 phys, enum kvm_pgtable_prot prot,
+ void *mc, enum kvm_pgtable_walk_flags flags)
+{
+ struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu);
+ struct pkvm_mapping *mapping = NULL;
+ struct kvm_hyp_memcache *cache = mc;
+ u64 gfn = addr >> PAGE_SHIFT;
+ u64 pfn = phys >> PAGE_SHIFT;
+ int ret;
+
+ if (size != PAGE_SIZE)
+ return -EINVAL;
+
+ lockdep_assert_held_write(&kvm->mmu_lock);
+ ret = kvm_call_hyp_nvhe(__pkvm_host_share_guest, pfn, gfn, prot);
+ if (ret) {
+ /* Is the gfn already mapped due to a racing vCPU? */
+ if (ret == -EPERM)
+ return -EAGAIN;
+ }
+
+ swap(mapping, cache->mapping);
+ mapping->gfn = gfn;
+ mapping->pfn = pfn;
+ WARN_ON(rb_find_add(&mapping->node, &pgt->pkvm_mappings, cmp_mappings));
+
+ return ret;
+}
+
+int pkvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size)
+{
+ struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu);
+ pkvm_handle_t handle = kvm->arch.pkvm.handle;
+ struct pkvm_mapping *mapping;
+ int ret = 0;
+
+ lockdep_assert_held_write(&kvm->mmu_lock);
+ for_each_mapping_in_range_safe(pgt, addr, addr + size, mapping) {
+ ret = kvm_call_hyp_nvhe(__pkvm_host_unshare_guest, handle, mapping->gfn);
+ if (WARN_ON(ret))
+ break;
+ rb_erase(&mapping->node, &pgt->pkvm_mappings);
+ kfree(mapping);
+ }
+
+ return ret;
+}
+
+int pkvm_pgtable_stage2_wrprotect(struct kvm_pgtable *pgt, u64 addr, u64 size)
+{
+ struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu);
+ pkvm_handle_t handle = kvm->arch.pkvm.handle;
+ struct pkvm_mapping *mapping;
+ int ret = 0;
+
+ lockdep_assert_held(&kvm->mmu_lock);
+ for_each_mapping_in_range_safe(pgt, addr, addr + size, mapping) {
+ ret = kvm_call_hyp_nvhe(__pkvm_host_wrprotect_guest, handle, mapping->gfn);
+ if (WARN_ON(ret))
+ break;
+ }
+
+ return ret;
+}
+
+int pkvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size)
+{
+ struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu);
+ struct pkvm_mapping *mapping;
+
+ lockdep_assert_held(&kvm->mmu_lock);
+ for_each_mapping_in_range_safe(pgt, addr, addr + size, mapping)
+ __clean_dcache_guest_page(pfn_to_kaddr(mapping->pfn), PAGE_SIZE);
+
+ return 0;
+}
+
+bool pkvm_pgtable_stage2_test_clear_young(struct kvm_pgtable *pgt, u64 addr, u64 size, bool mkold)
+{
+ struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu);
+ pkvm_handle_t handle = kvm->arch.pkvm.handle;
+ struct pkvm_mapping *mapping;
+ bool young = false;
+
+ lockdep_assert_held(&kvm->mmu_lock);
+ for_each_mapping_in_range_safe(pgt, addr, addr + size, mapping)
+ young |= kvm_call_hyp_nvhe(__pkvm_host_test_clear_young_guest, handle, mapping->gfn,
+ mkold);
+
+ return young;
+}
+
+int pkvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr, enum kvm_pgtable_prot prot,
+ enum kvm_pgtable_walk_flags flags)
+{
+ return kvm_call_hyp_nvhe(__pkvm_host_relax_perms_guest, addr >> PAGE_SHIFT, prot);
+}
+
+void pkvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr,
+ enum kvm_pgtable_walk_flags flags)
+{
+ WARN_ON(kvm_call_hyp_nvhe(__pkvm_host_mkyoung_guest, addr >> PAGE_SHIFT));
+}
+
+void pkvm_pgtable_stage2_free_unlinked(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, s8 level)
+{
+ WARN_ON_ONCE(1);
+}
+
+kvm_pte_t *pkvm_pgtable_stage2_create_unlinked(struct kvm_pgtable *pgt, u64 phys, s8 level,
+ enum kvm_pgtable_prot prot, void *mc, bool force_pte)
+{
+ WARN_ON_ONCE(1);
+ return NULL;
+}
+
+int pkvm_pgtable_stage2_split(struct kvm_pgtable *pgt, u64 addr, u64 size,
+ struct kvm_mmu_memory_cache *mc)
+{
+ WARN_ON_ONCE(1);
+ return -EINVAL;
+}
diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
index 470524b31951..803e11b0dc8f 100644
--- a/arch/arm64/kvm/reset.c
+++ b/arch/arm64/kvm/reset.c
@@ -85,7 +85,7 @@ static void kvm_vcpu_enable_sve(struct kvm_vcpu *vcpu)
* KVM_REG_ARM64_SVE_VLS. Allocation is deferred until
* kvm_arm_vcpu_finalize(), which freezes the configuration.
*/
- vcpu_set_flag(vcpu, GUEST_HAS_SVE);
+ set_bit(KVM_ARCH_FLAG_GUEST_HAS_SVE, &vcpu->kvm->arch.flags);
}
/*
@@ -211,10 +211,6 @@ void kvm_reset_vcpu(struct kvm_vcpu *vcpu)
kvm_vcpu_reset_sve(vcpu);
}
- if (vcpu_has_feature(vcpu, KVM_ARM_VCPU_PTRAUTH_ADDRESS) ||
- vcpu_has_feature(vcpu, KVM_ARM_VCPU_PTRAUTH_GENERIC))
- kvm_vcpu_enable_ptrauth(vcpu);
-
if (vcpu_el1_is_32bit(vcpu))
pstate = VCPU_RESET_PSTATE_SVC;
else if (vcpu_has_nv(vcpu))
diff --git a/arch/arm64/kvm/stacktrace.c b/arch/arm64/kvm/stacktrace.c
index 3ace5b75813b..af5eec681127 100644
--- a/arch/arm64/kvm/stacktrace.c
+++ b/arch/arm64/kvm/stacktrace.c
@@ -19,6 +19,7 @@
#include <linux/kvm.h>
#include <linux/kvm_host.h>
+#include <asm/kvm_mmu.h>
#include <asm/stacktrace/nvhe.h>
static struct stack_info stackinfo_get_overflow(void)
@@ -50,7 +51,7 @@ static struct stack_info stackinfo_get_hyp(void)
struct kvm_nvhe_stacktrace_info *stacktrace_info
= this_cpu_ptr_nvhe_sym(kvm_stacktrace_info);
unsigned long low = (unsigned long)stacktrace_info->stack_base;
- unsigned long high = low + PAGE_SIZE;
+ unsigned long high = low + NVHE_STACK_SIZE;
return (struct stack_info) {
.low = low,
@@ -60,8 +61,8 @@ static struct stack_info stackinfo_get_hyp(void)
static struct stack_info stackinfo_get_hyp_kern_va(void)
{
- unsigned long low = (unsigned long)*this_cpu_ptr(&kvm_arm_hyp_stack_page);
- unsigned long high = low + PAGE_SIZE;
+ unsigned long low = (unsigned long)*this_cpu_ptr(&kvm_arm_hyp_stack_base);
+ unsigned long high = low + NVHE_STACK_SIZE;
return (struct stack_info) {
.low = low,
@@ -145,7 +146,7 @@ static void unwind(struct unwind_state *state,
*/
static bool kvm_nvhe_dump_backtrace_entry(void *arg, unsigned long where)
{
- unsigned long va_mask = GENMASK_ULL(vabits_actual - 1, 0);
+ unsigned long va_mask = GENMASK_ULL(__hyp_va_bits - 1, 0);
unsigned long hyp_offset = (unsigned long)arg;
/* Mask tags and convert to kern addr */
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index e4749ecbcd79..f6cd1ea7fb55 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -570,17 +570,10 @@ static bool trap_oslar_el1(struct kvm_vcpu *vcpu,
struct sys_reg_params *p,
const struct sys_reg_desc *r)
{
- u64 oslsr;
-
if (!p->is_write)
return read_from_write_only(vcpu, p, r);
- /* Forward the OSLK bit to OSLSR */
- oslsr = __vcpu_sys_reg(vcpu, OSLSR_EL1) & ~OSLSR_EL1_OSLK;
- if (p->regval & OSLAR_EL1_OSLK)
- oslsr |= OSLSR_EL1_OSLK;
-
- __vcpu_sys_reg(vcpu, OSLSR_EL1) = oslsr;
+ kvm_debug_handle_oslar(vcpu, p->regval);
return true;
}
@@ -621,43 +614,13 @@ static bool trap_dbgauthstatus_el1(struct kvm_vcpu *vcpu,
}
}
-/*
- * We want to avoid world-switching all the DBG registers all the
- * time:
- *
- * - If we've touched any debug register, it is likely that we're
- * going to touch more of them. It then makes sense to disable the
- * traps and start doing the save/restore dance
- * - If debug is active (DBG_MDSCR_KDE or DBG_MDSCR_MDE set), it is
- * then mandatory to save/restore the registers, as the guest
- * depends on them.
- *
- * For this, we use a DIRTY bit, indicating the guest has modified the
- * debug registers, used as follow:
- *
- * On guest entry:
- * - If the dirty bit is set (because we're coming back from trapping),
- * disable the traps, save host registers, restore guest registers.
- * - If debug is actively in use (DBG_MDSCR_KDE or DBG_MDSCR_MDE set),
- * set the dirty bit, disable the traps, save host registers,
- * restore guest registers.
- * - Otherwise, enable the traps
- *
- * On guest exit:
- * - If the dirty bit is set, save guest registers, restore host
- * registers and clear the dirty bit. This ensure that the host can
- * now use the debug registers.
- */
static bool trap_debug_regs(struct kvm_vcpu *vcpu,
struct sys_reg_params *p,
const struct sys_reg_desc *r)
{
access_rw(vcpu, p, r);
- if (p->is_write)
- vcpu_set_flag(vcpu, DEBUG_DIRTY);
-
- trace_trap_reg(__func__, r->reg, p->is_write, p->regval);
+ kvm_debug_set_guest_ownership(vcpu);
return true;
}
@@ -666,9 +629,6 @@ static bool trap_debug_regs(struct kvm_vcpu *vcpu,
*
* A 32 bit write to a debug register leave top bits alone
* A 32 bit read from a debug register only returns the bottom bits
- *
- * All writes will set the DEBUG_DIRTY flag to ensure the hyp code
- * switches between host and guest values in future.
*/
static void reg_to_dbg(struct kvm_vcpu *vcpu,
struct sys_reg_params *p,
@@ -683,8 +643,6 @@ static void reg_to_dbg(struct kvm_vcpu *vcpu,
val &= ~mask;
val |= (p->regval & (mask >> shift)) << shift;
*dbg_reg = val;
-
- vcpu_set_flag(vcpu, DEBUG_DIRTY);
}
static void dbg_to_reg(struct kvm_vcpu *vcpu,
@@ -698,152 +656,79 @@ static void dbg_to_reg(struct kvm_vcpu *vcpu,
p->regval = (*dbg_reg & mask) >> shift;
}
-static bool trap_bvr(struct kvm_vcpu *vcpu,
- struct sys_reg_params *p,
- const struct sys_reg_desc *rd)
-{
- u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->CRm];
-
- if (p->is_write)
- reg_to_dbg(vcpu, p, rd, dbg_reg);
- else
- dbg_to_reg(vcpu, p, rd, dbg_reg);
-
- trace_trap_reg(__func__, rd->CRm, p->is_write, *dbg_reg);
-
- return true;
-}
-
-static int set_bvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
- u64 val)
+static u64 *demux_wb_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd)
{
- vcpu->arch.vcpu_debug_state.dbg_bvr[rd->CRm] = val;
- return 0;
-}
+ struct kvm_guest_debug_arch *dbg = &vcpu->arch.vcpu_debug_state;
-static int get_bvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
- u64 *val)
-{
- *val = vcpu->arch.vcpu_debug_state.dbg_bvr[rd->CRm];
- return 0;
+ switch (rd->Op2) {
+ case 0b100:
+ return &dbg->dbg_bvr[rd->CRm];
+ case 0b101:
+ return &dbg->dbg_bcr[rd->CRm];
+ case 0b110:
+ return &dbg->dbg_wvr[rd->CRm];
+ case 0b111:
+ return &dbg->dbg_wcr[rd->CRm];
+ default:
+ KVM_BUG_ON(1, vcpu->kvm);
+ return NULL;
+ }
}
-static u64 reset_bvr(struct kvm_vcpu *vcpu,
- const struct sys_reg_desc *rd)
+static bool trap_dbg_wb_reg(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
+ const struct sys_reg_desc *rd)
{
- vcpu->arch.vcpu_debug_state.dbg_bvr[rd->CRm] = rd->val;
- return rd->val;
-}
+ u64 *reg = demux_wb_reg(vcpu, rd);
-static bool trap_bcr(struct kvm_vcpu *vcpu,
- struct sys_reg_params *p,
- const struct sys_reg_desc *rd)
-{
- u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bcr[rd->CRm];
+ if (!reg)
+ return false;
if (p->is_write)
- reg_to_dbg(vcpu, p, rd, dbg_reg);
+ reg_to_dbg(vcpu, p, rd, reg);
else
- dbg_to_reg(vcpu, p, rd, dbg_reg);
-
- trace_trap_reg(__func__, rd->CRm, p->is_write, *dbg_reg);
+ dbg_to_reg(vcpu, p, rd, reg);
+ kvm_debug_set_guest_ownership(vcpu);
return true;
}
-static int set_bcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
- u64 val)
-{
- vcpu->arch.vcpu_debug_state.dbg_bcr[rd->CRm] = val;
- return 0;
-}
-
-static int get_bcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
- u64 *val)
-{
- *val = vcpu->arch.vcpu_debug_state.dbg_bcr[rd->CRm];
- return 0;
-}
-
-static u64 reset_bcr(struct kvm_vcpu *vcpu,
- const struct sys_reg_desc *rd)
-{
- vcpu->arch.vcpu_debug_state.dbg_bcr[rd->CRm] = rd->val;
- return rd->val;
-}
-
-static bool trap_wvr(struct kvm_vcpu *vcpu,
- struct sys_reg_params *p,
- const struct sys_reg_desc *rd)
+static int set_dbg_wb_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
+ u64 val)
{
- u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_wvr[rd->CRm];
-
- if (p->is_write)
- reg_to_dbg(vcpu, p, rd, dbg_reg);
- else
- dbg_to_reg(vcpu, p, rd, dbg_reg);
+ u64 *reg = demux_wb_reg(vcpu, rd);
- trace_trap_reg(__func__, rd->CRm, p->is_write,
- vcpu->arch.vcpu_debug_state.dbg_wvr[rd->CRm]);
-
- return true;
-}
-
-static int set_wvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
- u64 val)
-{
- vcpu->arch.vcpu_debug_state.dbg_wvr[rd->CRm] = val;
- return 0;
-}
+ if (!reg)
+ return -EINVAL;
-static int get_wvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
- u64 *val)
-{
- *val = vcpu->arch.vcpu_debug_state.dbg_wvr[rd->CRm];
+ *reg = val;
return 0;
}
-static u64 reset_wvr(struct kvm_vcpu *vcpu,
- const struct sys_reg_desc *rd)
-{
- vcpu->arch.vcpu_debug_state.dbg_wvr[rd->CRm] = rd->val;
- return rd->val;
-}
-
-static bool trap_wcr(struct kvm_vcpu *vcpu,
- struct sys_reg_params *p,
- const struct sys_reg_desc *rd)
+static int get_dbg_wb_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
+ u64 *val)
{
- u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_wcr[rd->CRm];
+ u64 *reg = demux_wb_reg(vcpu, rd);
- if (p->is_write)
- reg_to_dbg(vcpu, p, rd, dbg_reg);
- else
- dbg_to_reg(vcpu, p, rd, dbg_reg);
-
- trace_trap_reg(__func__, rd->CRm, p->is_write, *dbg_reg);
-
- return true;
-}
+ if (!reg)
+ return -EINVAL;
-static int set_wcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
- u64 val)
-{
- vcpu->arch.vcpu_debug_state.dbg_wcr[rd->CRm] = val;
+ *val = *reg;
return 0;
}
-static int get_wcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
- u64 *val)
+static u64 reset_dbg_wb_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd)
{
- *val = vcpu->arch.vcpu_debug_state.dbg_wcr[rd->CRm];
- return 0;
-}
+ u64 *reg = demux_wb_reg(vcpu, rd);
-static u64 reset_wcr(struct kvm_vcpu *vcpu,
- const struct sys_reg_desc *rd)
-{
- vcpu->arch.vcpu_debug_state.dbg_wcr[rd->CRm] = rd->val;
+ /*
+ * Bail early if we couldn't find storage for the register, the
+ * KVM_BUG_ON() in demux_wb_reg() will prevent this VM from ever
+ * being run.
+ */
+ if (!reg)
+ return 0;
+
+ *reg = rd->val;
return rd->val;
}
@@ -1350,13 +1235,17 @@ static int set_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
/* Silly macro to expand the DBG{BCR,BVR,WVR,WCR}n_EL1 registers in one go */
#define DBG_BCR_BVR_WCR_WVR_EL1(n) \
{ SYS_DESC(SYS_DBGBVRn_EL1(n)), \
- trap_bvr, reset_bvr, 0, 0, get_bvr, set_bvr }, \
+ trap_dbg_wb_reg, reset_dbg_wb_reg, 0, 0, \
+ get_dbg_wb_reg, set_dbg_wb_reg }, \
{ SYS_DESC(SYS_DBGBCRn_EL1(n)), \
- trap_bcr, reset_bcr, 0, 0, get_bcr, set_bcr }, \
+ trap_dbg_wb_reg, reset_dbg_wb_reg, 0, 0, \
+ get_dbg_wb_reg, set_dbg_wb_reg }, \
{ SYS_DESC(SYS_DBGWVRn_EL1(n)), \
- trap_wvr, reset_wvr, 0, 0, get_wvr, set_wvr }, \
+ trap_dbg_wb_reg, reset_dbg_wb_reg, 0, 0, \
+ get_dbg_wb_reg, set_dbg_wb_reg }, \
{ SYS_DESC(SYS_DBGWCRn_EL1(n)), \
- trap_wcr, reset_wcr, 0, 0, get_wcr, set_wcr }
+ trap_dbg_wb_reg, reset_dbg_wb_reg, 0, 0, \
+ get_dbg_wb_reg, set_dbg_wb_reg }
#define PMU_SYS_REG(name) \
SYS_DESC(SYS_##name), .reset = reset_pmu_reg, \
@@ -1410,26 +1299,146 @@ static bool access_arch_timer(struct kvm_vcpu *vcpu,
switch (reg) {
case SYS_CNTP_TVAL_EL0:
+ if (is_hyp_ctxt(vcpu) && vcpu_el2_e2h_is_set(vcpu))
+ tmr = TIMER_HPTIMER;
+ else
+ tmr = TIMER_PTIMER;
+ treg = TIMER_REG_TVAL;
+ break;
+
+ case SYS_CNTV_TVAL_EL0:
+ if (is_hyp_ctxt(vcpu) && vcpu_el2_e2h_is_set(vcpu))
+ tmr = TIMER_HVTIMER;
+ else
+ tmr = TIMER_VTIMER;
+ treg = TIMER_REG_TVAL;
+ break;
+
case SYS_AARCH32_CNTP_TVAL:
+ case SYS_CNTP_TVAL_EL02:
tmr = TIMER_PTIMER;
treg = TIMER_REG_TVAL;
break;
+
+ case SYS_CNTV_TVAL_EL02:
+ tmr = TIMER_VTIMER;
+ treg = TIMER_REG_TVAL;
+ break;
+
+ case SYS_CNTHP_TVAL_EL2:
+ tmr = TIMER_HPTIMER;
+ treg = TIMER_REG_TVAL;
+ break;
+
+ case SYS_CNTHV_TVAL_EL2:
+ tmr = TIMER_HVTIMER;
+ treg = TIMER_REG_TVAL;
+ break;
+
case SYS_CNTP_CTL_EL0:
+ if (is_hyp_ctxt(vcpu) && vcpu_el2_e2h_is_set(vcpu))
+ tmr = TIMER_HPTIMER;
+ else
+ tmr = TIMER_PTIMER;
+ treg = TIMER_REG_CTL;
+ break;
+
+ case SYS_CNTV_CTL_EL0:
+ if (is_hyp_ctxt(vcpu) && vcpu_el2_e2h_is_set(vcpu))
+ tmr = TIMER_HVTIMER;
+ else
+ tmr = TIMER_VTIMER;
+ treg = TIMER_REG_CTL;
+ break;
+
case SYS_AARCH32_CNTP_CTL:
+ case SYS_CNTP_CTL_EL02:
tmr = TIMER_PTIMER;
treg = TIMER_REG_CTL;
break;
+
+ case SYS_CNTV_CTL_EL02:
+ tmr = TIMER_VTIMER;
+ treg = TIMER_REG_CTL;
+ break;
+
+ case SYS_CNTHP_CTL_EL2:
+ tmr = TIMER_HPTIMER;
+ treg = TIMER_REG_CTL;
+ break;
+
+ case SYS_CNTHV_CTL_EL2:
+ tmr = TIMER_HVTIMER;
+ treg = TIMER_REG_CTL;
+ break;
+
case SYS_CNTP_CVAL_EL0:
+ if (is_hyp_ctxt(vcpu) && vcpu_el2_e2h_is_set(vcpu))
+ tmr = TIMER_HPTIMER;
+ else
+ tmr = TIMER_PTIMER;
+ treg = TIMER_REG_CVAL;
+ break;
+
+ case SYS_CNTV_CVAL_EL0:
+ if (is_hyp_ctxt(vcpu) && vcpu_el2_e2h_is_set(vcpu))
+ tmr = TIMER_HVTIMER;
+ else
+ tmr = TIMER_VTIMER;
+ treg = TIMER_REG_CVAL;
+ break;
+
case SYS_AARCH32_CNTP_CVAL:
+ case SYS_CNTP_CVAL_EL02:
tmr = TIMER_PTIMER;
treg = TIMER_REG_CVAL;
break;
+
+ case SYS_CNTV_CVAL_EL02:
+ tmr = TIMER_VTIMER;
+ treg = TIMER_REG_CVAL;
+ break;
+
+ case SYS_CNTHP_CVAL_EL2:
+ tmr = TIMER_HPTIMER;
+ treg = TIMER_REG_CVAL;
+ break;
+
+ case SYS_CNTHV_CVAL_EL2:
+ tmr = TIMER_HVTIMER;
+ treg = TIMER_REG_CVAL;
+ break;
+
case SYS_CNTPCT_EL0:
case SYS_CNTPCTSS_EL0:
+ if (is_hyp_ctxt(vcpu))
+ tmr = TIMER_HPTIMER;
+ else
+ tmr = TIMER_PTIMER;
+ treg = TIMER_REG_CNT;
+ break;
+
case SYS_AARCH32_CNTPCT:
+ case SYS_AARCH32_CNTPCTSS:
tmr = TIMER_PTIMER;
treg = TIMER_REG_CNT;
break;
+
+ case SYS_CNTVCT_EL0:
+ case SYS_CNTVCTSS_EL0:
+ if (is_hyp_ctxt(vcpu))
+ tmr = TIMER_HVTIMER;
+ else
+ tmr = TIMER_VTIMER;
+ treg = TIMER_REG_CNT;
+ break;
+
+ case SYS_AARCH32_CNTVCT:
+ case SYS_AARCH32_CNTVCTSS:
+ tmr = TIMER_VTIMER;
+ treg = TIMER_REG_CNT;
+ break;
+
default:
print_sys_reg_msg(p, "%s", "Unhandled trapped timer register");
return undef_access(vcpu, p, r);
@@ -1599,7 +1608,8 @@ static u64 __kvm_read_sanitised_id_reg(const struct kvm_vcpu *vcpu,
if (!vcpu_has_ptrauth(vcpu))
val &= ~(ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_APA3) |
ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_GPA3));
- if (!cpus_have_final_cap(ARM64_HAS_WFXT))
+ if (!cpus_have_final_cap(ARM64_HAS_WFXT) ||
+ has_broken_cntvoff())
val &= ~ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_WFxT);
break;
case SYS_ID_AA64ISAR3_EL1:
@@ -1807,6 +1817,9 @@ static u64 sanitise_id_aa64dfr0_el1(const struct kvm_vcpu *vcpu, u64 val)
/* Hide SPE from guests */
val &= ~ID_AA64DFR0_EL1_PMSVer_MASK;
+ /* Hide BRBE from guests */
+ val &= ~ID_AA64DFR0_EL1_BRBE_MASK;
+
return val;
}
@@ -2924,11 +2937,17 @@ static const struct sys_reg_desc sys_reg_descs[] = {
AMU_AMEVTYPER1_EL0(15),
{ SYS_DESC(SYS_CNTPCT_EL0), access_arch_timer },
+ { SYS_DESC(SYS_CNTVCT_EL0), access_arch_timer },
{ SYS_DESC(SYS_CNTPCTSS_EL0), access_arch_timer },
+ { SYS_DESC(SYS_CNTVCTSS_EL0), access_arch_timer },
{ SYS_DESC(SYS_CNTP_TVAL_EL0), access_arch_timer },
{ SYS_DESC(SYS_CNTP_CTL_EL0), access_arch_timer },
{ SYS_DESC(SYS_CNTP_CVAL_EL0), access_arch_timer },
+ { SYS_DESC(SYS_CNTV_TVAL_EL0), access_arch_timer },
+ { SYS_DESC(SYS_CNTV_CTL_EL0), access_arch_timer },
+ { SYS_DESC(SYS_CNTV_CVAL_EL0), access_arch_timer },
+
/* PMEVCNTRn_EL0 */
PMU_PMEVCNTR_EL0(0),
PMU_PMEVCNTR_EL0(1),
@@ -3080,9 +3099,24 @@ static const struct sys_reg_desc sys_reg_descs[] = {
EL2_REG_VNCR(CNTVOFF_EL2, reset_val, 0),
EL2_REG(CNTHCTL_EL2, access_rw, reset_val, 0),
+ { SYS_DESC(SYS_CNTHP_TVAL_EL2), access_arch_timer },
+ EL2_REG(CNTHP_CTL_EL2, access_arch_timer, reset_val, 0),
+ EL2_REG(CNTHP_CVAL_EL2, access_arch_timer, reset_val, 0),
+
+ { SYS_DESC(SYS_CNTHV_TVAL_EL2), access_arch_timer },
+ EL2_REG(CNTHV_CTL_EL2, access_arch_timer, reset_val, 0),
+ EL2_REG(CNTHV_CVAL_EL2, access_arch_timer, reset_val, 0),
{ SYS_DESC(SYS_CNTKCTL_EL12), access_cntkctl_el12 },
+ { SYS_DESC(SYS_CNTP_TVAL_EL02), access_arch_timer },
+ { SYS_DESC(SYS_CNTP_CTL_EL02), access_arch_timer },
+ { SYS_DESC(SYS_CNTP_CVAL_EL02), access_arch_timer },
+
+ { SYS_DESC(SYS_CNTV_TVAL_EL02), access_arch_timer },
+ { SYS_DESC(SYS_CNTV_CTL_EL02), access_arch_timer },
+ { SYS_DESC(SYS_CNTV_CVAL_EL02), access_arch_timer },
+
EL2_REG(SP_EL2, NULL, reset_unknown, 0),
};
@@ -3594,18 +3628,20 @@ static bool trap_dbgdidr(struct kvm_vcpu *vcpu,
* None of the other registers share their location, so treat them as
* if they were 64bit.
*/
-#define DBG_BCR_BVR_WCR_WVR(n) \
- /* DBGBVRn */ \
- { AA32(LO), Op1( 0), CRn( 0), CRm((n)), Op2( 4), trap_bvr, NULL, n }, \
- /* DBGBCRn */ \
- { Op1( 0), CRn( 0), CRm((n)), Op2( 5), trap_bcr, NULL, n }, \
- /* DBGWVRn */ \
- { Op1( 0), CRn( 0), CRm((n)), Op2( 6), trap_wvr, NULL, n }, \
- /* DBGWCRn */ \
- { Op1( 0), CRn( 0), CRm((n)), Op2( 7), trap_wcr, NULL, n }
-
-#define DBGBXVR(n) \
- { AA32(HI), Op1( 0), CRn( 1), CRm((n)), Op2( 1), trap_bvr, NULL, n }
+#define DBG_BCR_BVR_WCR_WVR(n) \
+ /* DBGBVRn */ \
+ { AA32(LO), Op1( 0), CRn( 0), CRm((n)), Op2( 4), \
+ trap_dbg_wb_reg, NULL, n }, \
+ /* DBGBCRn */ \
+ { Op1( 0), CRn( 0), CRm((n)), Op2( 5), trap_dbg_wb_reg, NULL, n }, \
+ /* DBGWVRn */ \
+ { Op1( 0), CRn( 0), CRm((n)), Op2( 6), trap_dbg_wb_reg, NULL, n }, \
+ /* DBGWCRn */ \
+ { Op1( 0), CRn( 0), CRm((n)), Op2( 7), trap_dbg_wb_reg, NULL, n }
+
+#define DBGBXVR(n) \
+ { AA32(HI), Op1( 0), CRn( 1), CRm((n)), Op2( 1), \
+ trap_dbg_wb_reg, NULL, n }
/*
* Trapped cp14 registers. We generally ignore most of the external
@@ -3902,9 +3938,11 @@ static const struct sys_reg_desc cp15_64_regs[] = {
{ SYS_DESC(SYS_AARCH32_CNTPCT), access_arch_timer },
{ Op1( 1), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, TTBR1_EL1 },
{ Op1( 1), CRn( 0), CRm(12), Op2( 0), access_gic_sgi }, /* ICC_ASGI1R */
+ { SYS_DESC(SYS_AARCH32_CNTVCT), access_arch_timer },
{ Op1( 2), CRn( 0), CRm(12), Op2( 0), access_gic_sgi }, /* ICC_SGI0R */
{ SYS_DESC(SYS_AARCH32_CNTP_CVAL), access_arch_timer },
{ SYS_DESC(SYS_AARCH32_CNTPCTSS), access_arch_timer },
+ { SYS_DESC(SYS_AARCH32_CNTVCTSS), access_arch_timer },
};
static bool check_sysreg_table(const struct sys_reg_desc *table, unsigned int n,
@@ -4419,6 +4457,9 @@ void kvm_reset_sys_regs(struct kvm_vcpu *vcpu)
reset_vcpu_ftr_id_reg(vcpu, r);
else
r->reset(vcpu, r);
+
+ if (r->reg >= __SANITISED_REG_START__ && r->reg < NR_SYS_REGS)
+ (void)__vcpu_sys_reg(vcpu, r->reg);
}
set_bit(KVM_ARCH_FLAG_ID_REGS_INITIALIZED, &kvm->arch.flags);
@@ -4995,6 +5036,14 @@ void kvm_calculate_traps(struct kvm_vcpu *vcpu)
kvm->arch.fgu[HAFGRTR_GROUP] |= ~(HAFGRTR_EL2_RES0 |
HAFGRTR_EL2_RES1);
+ if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, BRBE, IMP)) {
+ kvm->arch.fgu[HDFGRTR_GROUP] |= (HDFGRTR_EL2_nBRBDATA |
+ HDFGRTR_EL2_nBRBCTL |
+ HDFGRTR_EL2_nBRBIDR);
+ kvm->arch.fgu[HFGITR_GROUP] |= (HFGITR_EL2_nBRBINJ |
+ HFGITR_EL2_nBRBIALL);
+ }
+
set_bit(KVM_ARCH_FLAG_FGU_INITIALIZED, &kvm->arch.flags);
out:
mutex_unlock(&kvm->arch.config_lock);
@@ -5022,7 +5071,7 @@ int kvm_finalize_sys_regs(struct kvm_vcpu *vcpu)
}
if (vcpu_has_nv(vcpu)) {
- int ret = kvm_init_nv_sysregs(kvm);
+ int ret = kvm_init_nv_sysregs(vcpu);
if (ret)
return ret;
}
diff --git a/arch/arm64/kvm/trace_handle_exit.h b/arch/arm64/kvm/trace_handle_exit.h
index 064a58c19f48..f85415db7713 100644
--- a/arch/arm64/kvm/trace_handle_exit.h
+++ b/arch/arm64/kvm/trace_handle_exit.h
@@ -46,38 +46,6 @@ TRACE_EVENT(kvm_hvc_arm64,
__entry->vcpu_pc, __entry->r0, __entry->imm)
);
-TRACE_EVENT(kvm_arm_setup_debug,
- TP_PROTO(struct kvm_vcpu *vcpu, __u32 guest_debug),
- TP_ARGS(vcpu, guest_debug),
-
- TP_STRUCT__entry(
- __field(struct kvm_vcpu *, vcpu)
- __field(__u32, guest_debug)
- ),
-
- TP_fast_assign(
- __entry->vcpu = vcpu;
- __entry->guest_debug = guest_debug;
- ),
-
- TP_printk("vcpu: %p, flags: 0x%08x", __entry->vcpu, __entry->guest_debug)
-);
-
-TRACE_EVENT(kvm_arm_clear_debug,
- TP_PROTO(__u32 guest_debug),
- TP_ARGS(guest_debug),
-
- TP_STRUCT__entry(
- __field(__u32, guest_debug)
- ),
-
- TP_fast_assign(
- __entry->guest_debug = guest_debug;
- ),
-
- TP_printk("flags: 0x%08x", __entry->guest_debug)
-);
-
/*
* The dreg32 name is a leftover from a distant past. This will really
* output a 64bit value...
@@ -99,49 +67,6 @@ TRACE_EVENT(kvm_arm_set_dreg32,
TP_printk("%s: 0x%llx", __entry->name, __entry->value)
);
-TRACE_DEFINE_SIZEOF(__u64);
-
-TRACE_EVENT(kvm_arm_set_regset,
- TP_PROTO(const char *type, int len, __u64 *control, __u64 *value),
- TP_ARGS(type, len, control, value),
- TP_STRUCT__entry(
- __field(const char *, name)
- __field(int, len)
- __array(u64, ctrls, 16)
- __array(u64, values, 16)
- ),
- TP_fast_assign(
- __entry->name = type;
- __entry->len = len;
- memcpy(__entry->ctrls, control, len << 3);
- memcpy(__entry->values, value, len << 3);
- ),
- TP_printk("%d %s CTRL:%s VALUE:%s", __entry->len, __entry->name,
- __print_array(__entry->ctrls, __entry->len, sizeof(__u64)),
- __print_array(__entry->values, __entry->len, sizeof(__u64)))
-);
-
-TRACE_EVENT(trap_reg,
- TP_PROTO(const char *fn, int reg, bool is_write, u64 write_value),
- TP_ARGS(fn, reg, is_write, write_value),
-
- TP_STRUCT__entry(
- __field(const char *, fn)
- __field(int, reg)
- __field(bool, is_write)
- __field(u64, write_value)
- ),
-
- TP_fast_assign(
- __entry->fn = fn;
- __entry->reg = reg;
- __entry->is_write = is_write;
- __entry->write_value = write_value;
- ),
-
- TP_printk("%s %s reg %d (0x%016llx)", __entry->fn, __entry->is_write?"write to":"read from", __entry->reg, __entry->write_value)
-);
-
TRACE_EVENT(kvm_handle_sys_reg,
TP_PROTO(unsigned long hsr),
TP_ARGS(hsr),
diff --git a/arch/arm64/kvm/vgic/vgic-v3.c b/arch/arm64/kvm/vgic/vgic-v3.c
index f267bc2486a1..d7233ab982d0 100644
--- a/arch/arm64/kvm/vgic/vgic-v3.c
+++ b/arch/arm64/kvm/vgic/vgic-v3.c
@@ -6,6 +6,7 @@
#include <linux/kstrtox.h>
#include <linux/kvm.h>
#include <linux/kvm_host.h>
+#include <linux/string_choices.h>
#include <kvm/arm_vgic.h>
#include <asm/kvm_hyp.h>
#include <asm/kvm_mmu.h>
@@ -663,9 +664,9 @@ int vgic_v3_probe(const struct gic_kvm_info *info)
if (info->has_v4) {
kvm_vgic_global_state.has_gicv4 = gicv4_enable;
kvm_vgic_global_state.has_gicv4_1 = info->has_v4_1 && gicv4_enable;
- kvm_info("GICv4%s support %sabled\n",
+ kvm_info("GICv4%s support %s\n",
kvm_vgic_global_state.has_gicv4_1 ? ".1" : "",
- gicv4_enable ? "en" : "dis");
+ str_enabled_disabled(gicv4_enable));
}
kvm_vgic_global_state.vcpu_base = 0;
@@ -734,7 +735,8 @@ void vgic_v3_load(struct kvm_vcpu *vcpu)
{
struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
- kvm_call_hyp(__vgic_v3_restore_vmcr_aprs, cpu_if);
+ if (likely(!is_protected_kvm_enabled()))
+ kvm_call_hyp(__vgic_v3_restore_vmcr_aprs, cpu_if);
if (has_vhe())
__vgic_v3_activate_traps(cpu_if);
@@ -746,7 +748,8 @@ void vgic_v3_put(struct kvm_vcpu *vcpu)
{
struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
- kvm_call_hyp(__vgic_v3_save_vmcr_aprs, cpu_if);
+ if (likely(!is_protected_kvm_enabled()))
+ kvm_call_hyp(__vgic_v3_save_vmcr_aprs, cpu_if);
WARN_ON(vgic_v4_put(vcpu));
if (has_vhe())
diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps
index eb17f59e543c..1e65f2fb45bd 100644
--- a/arch/arm64/tools/cpucaps
+++ b/arch/arm64/tools/cpucaps
@@ -105,6 +105,7 @@ WORKAROUND_CLEAN_CACHE
WORKAROUND_DEVICE_LOAD_ACQUIRE
WORKAROUND_NVIDIA_CARMEL_CNP
WORKAROUND_QCOM_FALKOR_E1003
+WORKAROUND_QCOM_ORYON_CNTVOFF
WORKAROUND_REPEAT_TLBI
WORKAROUND_SPECULATIVE_AT
WORKAROUND_SPECULATIVE_SSBS
diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg
index 94ca9cdb0b16..762ee084b37c 100644
--- a/arch/arm64/tools/sysreg
+++ b/arch/arm64/tools/sysreg
@@ -2064,6 +2064,18 @@ Field 17:16 ZEN
Res0 15:0
EndSysreg
+Sysreg TRFCR_EL1 3 0 1 2 1
+Res0 63:7
+UnsignedEnum 6:5 TS
+ 0b0001 VIRTUAL
+ 0b0010 GUEST_PHYSICAL
+ 0b0011 PHYSICAL
+EndEnum
+Res0 4:2
+Field 1 ExTRE
+Field 0 E0TRE
+EndSysregFields
+
Sysreg SMPRI_EL1 3 0 1 2 4
Res0 63:4
Field 3:0 PRIORITY
@@ -2613,6 +2625,22 @@ Field 1 ICIALLU
Field 0 ICIALLUIS
EndSysreg
+Sysreg TRFCR_EL2 3 4 1 2 1
+Res0 63:7
+UnsignedEnum 6:5 TS
+ 0b0000 USE_TRFCR_EL1_TS
+ 0b0001 VIRTUAL
+ 0b0010 GUEST_PHYSICAL
+ 0b0011 PHYSICAL
+EndEnum
+Res0 4
+Field 3 CX
+Res0 2
+Field 1 E2TRE
+Field 0 E0HTRE
+EndSysreg
+
+
Sysreg HDFGRTR_EL2 3 4 3 1 4
Field 63 PMBIDR_EL1
Field 62 nPMSNEVFR_EL1
@@ -3023,6 +3051,10 @@ Sysreg ZCR_EL12 3 5 1 2 0
Mapping ZCR_EL1
EndSysreg
+Sysreg TRFCR_EL12 3 5 1 2 1
+Mapping TRFCR_EL1
+EndSysreg
+
Sysreg SMCR_EL12 3 5 1 2 6
Mapping SMCR_EL1
EndSysreg
diff --git a/drivers/hwtracing/coresight/coresight-etm4x-core.c b/drivers/hwtracing/coresight/coresight-etm4x-core.c
index dd8c74f893db..2c1a60577728 100644
--- a/drivers/hwtracing/coresight/coresight-etm4x-core.c
+++ b/drivers/hwtracing/coresight/coresight-etm4x-core.c
@@ -6,6 +6,7 @@
#include <linux/acpi.h>
#include <linux/bitops.h>
#include <linux/kernel.h>
+#include <linux/kvm_host.h>
#include <linux/moduleparam.h>
#include <linux/init.h>
#include <linux/types.h>
@@ -268,10 +269,28 @@ struct etm4_enable_arg {
*/
static void etm4x_prohibit_trace(struct etmv4_drvdata *drvdata)
{
+ u64 trfcr;
+
/* If the CPU doesn't support FEAT_TRF, nothing to do */
if (!drvdata->trfcr)
return;
- cpu_prohibit_trace();
+
+ trfcr = drvdata->trfcr & ~(TRFCR_EL1_ExTRE | TRFCR_EL1_E0TRE);
+
+ write_trfcr(trfcr);
+ kvm_tracing_set_el1_configuration(trfcr);
+}
+
+static u64 etm4x_get_kern_user_filter(struct etmv4_drvdata *drvdata)
+{
+ u64 trfcr = drvdata->trfcr;
+
+ if (drvdata->config.mode & ETM_MODE_EXCL_KERN)
+ trfcr &= ~TRFCR_EL1_ExTRE;
+ if (drvdata->config.mode & ETM_MODE_EXCL_USER)
+ trfcr &= ~TRFCR_EL1_E0TRE;
+
+ return trfcr;
}
/*
@@ -286,18 +305,28 @@ static void etm4x_prohibit_trace(struct etmv4_drvdata *drvdata)
*/
static void etm4x_allow_trace(struct etmv4_drvdata *drvdata)
{
- u64 trfcr = drvdata->trfcr;
+ u64 trfcr, guest_trfcr;
/* If the CPU doesn't support FEAT_TRF, nothing to do */
- if (!trfcr)
+ if (!drvdata->trfcr)
return;
- if (drvdata->config.mode & ETM_MODE_EXCL_KERN)
- trfcr &= ~TRFCR_ELx_ExTRE;
- if (drvdata->config.mode & ETM_MODE_EXCL_USER)
- trfcr &= ~TRFCR_ELx_E0TRE;
+ if (drvdata->config.mode & ETM_MODE_EXCL_HOST)
+ trfcr = drvdata->trfcr & ~(TRFCR_EL1_ExTRE | TRFCR_EL1_E0TRE);
+ else
+ trfcr = etm4x_get_kern_user_filter(drvdata);
write_trfcr(trfcr);
+
+ /* Set filters for guests and pass to KVM */
+ if (drvdata->config.mode & ETM_MODE_EXCL_GUEST)
+ guest_trfcr = drvdata->trfcr & ~(TRFCR_EL1_ExTRE | TRFCR_EL1_E0TRE);
+ else
+ guest_trfcr = etm4x_get_kern_user_filter(drvdata);
+
+ /* TRFCR_EL1 doesn't have CX so mask it out. */
+ guest_trfcr &= ~TRFCR_EL2_CX;
+ kvm_tracing_set_el1_configuration(guest_trfcr);
}
#ifdef CONFIG_ETM4X_IMPDEF_FEATURE
@@ -655,6 +684,12 @@ static int etm4_parse_event_config(struct coresight_device *csdev,
if (attr->exclude_user)
config->mode = ETM_MODE_EXCL_USER;
+ if (attr->exclude_host)
+ config->mode |= ETM_MODE_EXCL_HOST;
+
+ if (attr->exclude_guest)
+ config->mode |= ETM_MODE_EXCL_GUEST;
+
/* Always start from the default config */
etm4_set_default_config(config);
@@ -1141,9 +1176,9 @@ static void cpu_detect_trace_filtering(struct etmv4_drvdata *drvdata)
* tracing at the kernel EL and EL0, forcing to use the
* virtual time as the timestamp.
*/
- trfcr = (TRFCR_ELx_TS_VIRTUAL |
- TRFCR_ELx_ExTRE |
- TRFCR_ELx_E0TRE);
+ trfcr = (TRFCR_EL1_TS_VIRTUAL |
+ TRFCR_EL1_ExTRE |
+ TRFCR_EL1_E0TRE);
/* If we are running at EL2, allow tracing the CONTEXTIDR_EL2. */
if (is_kernel_in_hyp_mode())
diff --git a/drivers/hwtracing/coresight/coresight-etm4x-sysfs.c b/drivers/hwtracing/coresight/coresight-etm4x-sysfs.c
index a9f19629f3f8..c767f8ae4cf1 100644
--- a/drivers/hwtracing/coresight/coresight-etm4x-sysfs.c
+++ b/drivers/hwtracing/coresight/coresight-etm4x-sysfs.c
@@ -2319,11 +2319,11 @@ static ssize_t ts_source_show(struct device *dev,
goto out;
}
- switch (drvdata->trfcr & TRFCR_ELx_TS_MASK) {
- case TRFCR_ELx_TS_VIRTUAL:
- case TRFCR_ELx_TS_GUEST_PHYSICAL:
- case TRFCR_ELx_TS_PHYSICAL:
- val = FIELD_GET(TRFCR_ELx_TS_MASK, drvdata->trfcr);
+ switch (drvdata->trfcr & TRFCR_EL1_TS_MASK) {
+ case TRFCR_EL1_TS_VIRTUAL:
+ case TRFCR_EL1_TS_GUEST_PHYSICAL:
+ case TRFCR_EL1_TS_PHYSICAL:
+ val = FIELD_GET(TRFCR_EL1_TS_MASK, drvdata->trfcr);
break;
default:
val = -1;
diff --git a/drivers/hwtracing/coresight/coresight-etm4x.h b/drivers/hwtracing/coresight/coresight-etm4x.h
index 9e9165f62e81..1119762b5cec 100644
--- a/drivers/hwtracing/coresight/coresight-etm4x.h
+++ b/drivers/hwtracing/coresight/coresight-etm4x.h
@@ -817,7 +817,7 @@ enum etm_impdef_type {
* @s_ex_level: Secure ELs where tracing is supported.
*/
struct etmv4_config {
- u32 mode;
+ u64 mode;
u32 pe_sel;
u32 cfg;
u32 eventctrl0;
diff --git a/drivers/hwtracing/coresight/coresight-priv.h b/drivers/hwtracing/coresight/coresight-priv.h
index 05f891ca6b5c..76403530f33e 100644
--- a/drivers/hwtracing/coresight/coresight-priv.h
+++ b/drivers/hwtracing/coresight/coresight-priv.h
@@ -42,6 +42,9 @@ extern const struct device_type coresight_dev_type[];
#define ETM_MODE_EXCL_KERN BIT(30)
#define ETM_MODE_EXCL_USER BIT(31)
+#define ETM_MODE_EXCL_HOST BIT(32)
+#define ETM_MODE_EXCL_GUEST BIT(33)
+
struct cs_pair_attribute {
struct device_attribute attr;
u32 lo_off;
diff --git a/drivers/hwtracing/coresight/coresight-self-hosted-trace.h b/drivers/hwtracing/coresight/coresight-self-hosted-trace.h
index 53840a2c41f2..303d71911870 100644
--- a/drivers/hwtracing/coresight/coresight-self-hosted-trace.h
+++ b/drivers/hwtracing/coresight/coresight-self-hosted-trace.h
@@ -21,13 +21,4 @@ static inline void write_trfcr(u64 val)
isb();
}
-static inline u64 cpu_prohibit_trace(void)
-{
- u64 trfcr = read_trfcr();
-
- /* Prohibit tracing at EL0 & the kernel EL */
- write_trfcr(trfcr & ~(TRFCR_ELx_ExTRE | TRFCR_ELx_E0TRE));
- /* Return the original value of the TRFCR */
- return trfcr;
-}
#endif /* __CORESIGHT_SELF_HOSTED_TRACE_H */
diff --git a/drivers/hwtracing/coresight/coresight-trbe.c b/drivers/hwtracing/coresight/coresight-trbe.c
index 919804b12a67..fff67aac8418 100644
--- a/drivers/hwtracing/coresight/coresight-trbe.c
+++ b/drivers/hwtracing/coresight/coresight-trbe.c
@@ -17,6 +17,7 @@
#include <asm/barrier.h>
#include <asm/cpufeature.h>
+#include <linux/kvm_host.h>
#include <linux/vmalloc.h>
#include "coresight-self-hosted-trace.h"
@@ -221,6 +222,7 @@ static inline void set_trbe_enabled(struct trbe_cpudata *cpudata, u64 trblimitr)
*/
trblimitr |= TRBLIMITR_EL1_E;
write_sysreg_s(trblimitr, SYS_TRBLIMITR_EL1);
+ kvm_enable_trbe();
/* Synchronize the TRBE enable event */
isb();
@@ -239,6 +241,7 @@ static inline void set_trbe_disabled(struct trbe_cpudata *cpudata)
*/
trblimitr &= ~TRBLIMITR_EL1_E;
write_sysreg_s(trblimitr, SYS_TRBLIMITR_EL1);
+ kvm_disable_trbe();
if (trbe_needs_drain_after_disable(cpudata))
trbe_drain_buffer();
@@ -253,8 +256,8 @@ static void trbe_drain_and_disable_local(struct trbe_cpudata *cpudata)
static void trbe_reset_local(struct trbe_cpudata *cpudata)
{
- trbe_drain_and_disable_local(cpudata);
write_sysreg_s(0, SYS_TRBLIMITR_EL1);
+ trbe_drain_buffer();
write_sysreg_s(0, SYS_TRBPTR_EL1);
write_sysreg_s(0, SYS_TRBBASER_EL1);
write_sysreg_s(0, SYS_TRBSR_EL1);
@@ -1110,6 +1113,16 @@ static bool is_perf_trbe(struct perf_output_handle *handle)
return true;
}
+static u64 cpu_prohibit_trace(void)
+{
+ u64 trfcr = read_trfcr();
+
+ /* Prohibit tracing at EL0 & the kernel EL */
+ write_trfcr(trfcr & ~(TRFCR_EL1_ExTRE | TRFCR_EL1_E0TRE));
+ /* Return the original value of the TRFCR */
+ return trfcr;
+}
+
static irqreturn_t arm_trbe_irq_handler(int irq, void *dev)
{
struct perf_output_handle **handle_ptr = dev;
diff --git a/include/clocksource/arm_arch_timer.h b/include/clocksource/arm_arch_timer.h
index cbbc9a6dc571..ce6521ad04d1 100644
--- a/include/clocksource/arm_arch_timer.h
+++ b/include/clocksource/arm_arch_timer.h
@@ -22,6 +22,12 @@
#define CNTHCTL_EVNTDIR (1 << 3)
#define CNTHCTL_EVNTI (0xF << 4)
#define CNTHCTL_ECV (1 << 12)
+#define CNTHCTL_EL1TVT (1 << 13)
+#define CNTHCTL_EL1TVCT (1 << 14)
+#define CNTHCTL_EL1NVPCT (1 << 15)
+#define CNTHCTL_EL1NVVCT (1 << 16)
+#define CNTHCTL_CNTVMASK (1 << 18)
+#define CNTHCTL_CNTPMASK (1 << 19)
enum arch_timer_reg {
ARCH_TIMER_REG_CTRL,
diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h
index fd650a8789b9..681cf0c8b9df 100644
--- a/include/kvm/arm_arch_timer.h
+++ b/include/kvm/arm_arch_timer.h
@@ -98,6 +98,7 @@ int __init kvm_timer_hyp_init(bool has_gic);
int kvm_timer_enable(struct kvm_vcpu *vcpu);
void kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu);
void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu);
+void kvm_timer_sync_nested(struct kvm_vcpu *vcpu);
void kvm_timer_sync_user(struct kvm_vcpu *vcpu);
bool kvm_timer_should_notify_user(struct kvm_vcpu *vcpu);
void kvm_timer_update_run(struct kvm_vcpu *vcpu);
@@ -150,9 +151,31 @@ void kvm_timer_cpu_down(void);
/* CNTKCTL_EL1 valid bits as of DDI0487J.a */
#define CNTKCTL_VALID_BITS (BIT(17) | GENMASK_ULL(9, 0))
+DECLARE_STATIC_KEY_FALSE(broken_cntvoff_key);
+
+static inline bool has_broken_cntvoff(void)
+{
+ return static_branch_unlikely(&broken_cntvoff_key);
+}
+
static inline bool has_cntpoff(void)
{
return (has_vhe() && cpus_have_final_cap(ARM64_HAS_ECV_CNTPOFF));
}
+static inline u64 timer_get_offset(struct arch_timer_context *ctxt)
+{
+ u64 offset = 0;
+
+ if (!ctxt)
+ return 0;
+
+ if (ctxt->offset.vm_offset)
+ offset += *ctxt->offset.vm_offset;
+ if (ctxt->offset.vcpu_offset)
+ offset += *ctxt->offset.vcpu_offset;
+
+ return offset;
+}
+
#endif
diff --git a/tools/arch/arm64/include/asm/sysreg.h b/tools/arch/arm64/include/asm/sysreg.h
index cd8420e8c3ad..150416682e2c 100644
--- a/tools/arch/arm64/include/asm/sysreg.h
+++ b/tools/arch/arm64/include/asm/sysreg.h
@@ -11,6 +11,7 @@
#include <linux/bits.h>
#include <linux/stringify.h>
+#include <linux/kasan-tags.h>
#include <asm/gpr-num.h>
@@ -108,6 +109,9 @@
#define set_pstate_ssbs(x) asm volatile(SET_PSTATE_SSBS(x))
#define set_pstate_dit(x) asm volatile(SET_PSTATE_DIT(x))
+/* Register-based PAN access, for save/restore purposes */
+#define SYS_PSTATE_PAN sys_reg(3, 0, 4, 2, 3)
+
#define __SYS_BARRIER_INSN(CRm, op2, Rt) \
__emit_inst(0xd5000000 | sys_insn(0, 3, 3, (CRm), (op2)) | ((Rt) & 0x1f))
@@ -123,6 +127,37 @@
#define SYS_DC_CIGSW sys_insn(1, 0, 7, 14, 4)
#define SYS_DC_CIGDSW sys_insn(1, 0, 7, 14, 6)
+#define SYS_IC_IALLUIS sys_insn(1, 0, 7, 1, 0)
+#define SYS_IC_IALLU sys_insn(1, 0, 7, 5, 0)
+#define SYS_IC_IVAU sys_insn(1, 3, 7, 5, 1)
+
+#define SYS_DC_IVAC sys_insn(1, 0, 7, 6, 1)
+#define SYS_DC_IGVAC sys_insn(1, 0, 7, 6, 3)
+#define SYS_DC_IGDVAC sys_insn(1, 0, 7, 6, 5)
+
+#define SYS_DC_CVAC sys_insn(1, 3, 7, 10, 1)
+#define SYS_DC_CGVAC sys_insn(1, 3, 7, 10, 3)
+#define SYS_DC_CGDVAC sys_insn(1, 3, 7, 10, 5)
+
+#define SYS_DC_CVAU sys_insn(1, 3, 7, 11, 1)
+
+#define SYS_DC_CVAP sys_insn(1, 3, 7, 12, 1)
+#define SYS_DC_CGVAP sys_insn(1, 3, 7, 12, 3)
+#define SYS_DC_CGDVAP sys_insn(1, 3, 7, 12, 5)
+
+#define SYS_DC_CVADP sys_insn(1, 3, 7, 13, 1)
+#define SYS_DC_CGVADP sys_insn(1, 3, 7, 13, 3)
+#define SYS_DC_CGDVADP sys_insn(1, 3, 7, 13, 5)
+
+#define SYS_DC_CIVAC sys_insn(1, 3, 7, 14, 1)
+#define SYS_DC_CIGVAC sys_insn(1, 3, 7, 14, 3)
+#define SYS_DC_CIGDVAC sys_insn(1, 3, 7, 14, 5)
+
+/* Data cache zero operations */
+#define SYS_DC_ZVA sys_insn(1, 3, 7, 4, 1)
+#define SYS_DC_GVA sys_insn(1, 3, 7, 4, 3)
+#define SYS_DC_GZVA sys_insn(1, 3, 7, 4, 4)
+
/*
* Automatically generated definitions for system registers, the
* manual encodings below are in the process of being converted to
@@ -162,6 +197,84 @@
#define SYS_DBGDTRTX_EL0 sys_reg(2, 3, 0, 5, 0)
#define SYS_DBGVCR32_EL2 sys_reg(2, 4, 0, 7, 0)
+#define SYS_BRBINF_EL1(n) sys_reg(2, 1, 8, (n & 15), (((n & 16) >> 2) | 0))
+#define SYS_BRBINFINJ_EL1 sys_reg(2, 1, 9, 1, 0)
+#define SYS_BRBSRC_EL1(n) sys_reg(2, 1, 8, (n & 15), (((n & 16) >> 2) | 1))
+#define SYS_BRBSRCINJ_EL1 sys_reg(2, 1, 9, 1, 1)
+#define SYS_BRBTGT_EL1(n) sys_reg(2, 1, 8, (n & 15), (((n & 16) >> 2) | 2))
+#define SYS_BRBTGTINJ_EL1 sys_reg(2, 1, 9, 1, 2)
+#define SYS_BRBTS_EL1 sys_reg(2, 1, 9, 0, 2)
+
+#define SYS_BRBCR_EL1 sys_reg(2, 1, 9, 0, 0)
+#define SYS_BRBFCR_EL1 sys_reg(2, 1, 9, 0, 1)
+#define SYS_BRBIDR0_EL1 sys_reg(2, 1, 9, 2, 0)
+
+#define SYS_TRCITECR_EL1 sys_reg(3, 0, 1, 2, 3)
+#define SYS_TRCACATR(m) sys_reg(2, 1, 2, ((m & 7) << 1), (2 | (m >> 3)))
+#define SYS_TRCACVR(m) sys_reg(2, 1, 2, ((m & 7) << 1), (0 | (m >> 3)))
+#define SYS_TRCAUTHSTATUS sys_reg(2, 1, 7, 14, 6)
+#define SYS_TRCAUXCTLR sys_reg(2, 1, 0, 6, 0)
+#define SYS_TRCBBCTLR sys_reg(2, 1, 0, 15, 0)
+#define SYS_TRCCCCTLR sys_reg(2, 1, 0, 14, 0)
+#define SYS_TRCCIDCCTLR0 sys_reg(2, 1, 3, 0, 2)
+#define SYS_TRCCIDCCTLR1 sys_reg(2, 1, 3, 1, 2)
+#define SYS_TRCCIDCVR(m) sys_reg(2, 1, 3, ((m & 7) << 1), 0)
+#define SYS_TRCCLAIMCLR sys_reg(2, 1, 7, 9, 6)
+#define SYS_TRCCLAIMSET sys_reg(2, 1, 7, 8, 6)
+#define SYS_TRCCNTCTLR(m) sys_reg(2, 1, 0, (4 | (m & 3)), 5)
+#define SYS_TRCCNTRLDVR(m) sys_reg(2, 1, 0, (0 | (m & 3)), 5)
+#define SYS_TRCCNTVR(m) sys_reg(2, 1, 0, (8 | (m & 3)), 5)
+#define SYS_TRCCONFIGR sys_reg(2, 1, 0, 4, 0)
+#define SYS_TRCDEVARCH sys_reg(2, 1, 7, 15, 6)
+#define SYS_TRCDEVID sys_reg(2, 1, 7, 2, 7)
+#define SYS_TRCEVENTCTL0R sys_reg(2, 1, 0, 8, 0)
+#define SYS_TRCEVENTCTL1R sys_reg(2, 1, 0, 9, 0)
+#define SYS_TRCEXTINSELR(m) sys_reg(2, 1, 0, (8 | (m & 3)), 4)
+#define SYS_TRCIDR0 sys_reg(2, 1, 0, 8, 7)
+#define SYS_TRCIDR10 sys_reg(2, 1, 0, 2, 6)
+#define SYS_TRCIDR11 sys_reg(2, 1, 0, 3, 6)
+#define SYS_TRCIDR12 sys_reg(2, 1, 0, 4, 6)
+#define SYS_TRCIDR13 sys_reg(2, 1, 0, 5, 6)
+#define SYS_TRCIDR1 sys_reg(2, 1, 0, 9, 7)
+#define SYS_TRCIDR2 sys_reg(2, 1, 0, 10, 7)
+#define SYS_TRCIDR3 sys_reg(2, 1, 0, 11, 7)
+#define SYS_TRCIDR4 sys_reg(2, 1, 0, 12, 7)
+#define SYS_TRCIDR5 sys_reg(2, 1, 0, 13, 7)
+#define SYS_TRCIDR6 sys_reg(2, 1, 0, 14, 7)
+#define SYS_TRCIDR7 sys_reg(2, 1, 0, 15, 7)
+#define SYS_TRCIDR8 sys_reg(2, 1, 0, 0, 6)
+#define SYS_TRCIDR9 sys_reg(2, 1, 0, 1, 6)
+#define SYS_TRCIMSPEC(m) sys_reg(2, 1, 0, (m & 7), 7)
+#define SYS_TRCITEEDCR sys_reg(2, 1, 0, 2, 1)
+#define SYS_TRCOSLSR sys_reg(2, 1, 1, 1, 4)
+#define SYS_TRCPRGCTLR sys_reg(2, 1, 0, 1, 0)
+#define SYS_TRCQCTLR sys_reg(2, 1, 0, 1, 1)
+#define SYS_TRCRSCTLR(m) sys_reg(2, 1, 1, (m & 15), (0 | (m >> 4)))
+#define SYS_TRCRSR sys_reg(2, 1, 0, 10, 0)
+#define SYS_TRCSEQEVR(m) sys_reg(2, 1, 0, (m & 3), 4)
+#define SYS_TRCSEQRSTEVR sys_reg(2, 1, 0, 6, 4)
+#define SYS_TRCSEQSTR sys_reg(2, 1, 0, 7, 4)
+#define SYS_TRCSSCCR(m) sys_reg(2, 1, 1, (m & 7), 2)
+#define SYS_TRCSSCSR(m) sys_reg(2, 1, 1, (8 | (m & 7)), 2)
+#define SYS_TRCSSPCICR(m) sys_reg(2, 1, 1, (m & 7), 3)
+#define SYS_TRCSTALLCTLR sys_reg(2, 1, 0, 11, 0)
+#define SYS_TRCSTATR sys_reg(2, 1, 0, 3, 0)
+#define SYS_TRCSYNCPR sys_reg(2, 1, 0, 13, 0)
+#define SYS_TRCTRACEIDR sys_reg(2, 1, 0, 0, 1)
+#define SYS_TRCTSCTLR sys_reg(2, 1, 0, 12, 0)
+#define SYS_TRCVICTLR sys_reg(2, 1, 0, 0, 2)
+#define SYS_TRCVIIECTLR sys_reg(2, 1, 0, 1, 2)
+#define SYS_TRCVIPCSSCTLR sys_reg(2, 1, 0, 3, 2)
+#define SYS_TRCVISSCTLR sys_reg(2, 1, 0, 2, 2)
+#define SYS_TRCVMIDCCTLR0 sys_reg(2, 1, 3, 2, 2)
+#define SYS_TRCVMIDCCTLR1 sys_reg(2, 1, 3, 3, 2)
+#define SYS_TRCVMIDCVR(m) sys_reg(2, 1, 3, ((m & 7) << 1), 1)
+
+/* ETM */
+#define SYS_TRCOSLAR sys_reg(2, 1, 1, 0, 4)
+
+#define SYS_BRBCR_EL2 sys_reg(2, 4, 9, 0, 0)
+
#define SYS_MIDR_EL1 sys_reg(3, 0, 0, 0, 0)
#define SYS_MPIDR_EL1 sys_reg(3, 0, 0, 0, 5)
#define SYS_REVIDR_EL1 sys_reg(3, 0, 0, 0, 6)
@@ -170,8 +283,6 @@
#define SYS_RGSR_EL1 sys_reg(3, 0, 1, 0, 5)
#define SYS_GCR_EL1 sys_reg(3, 0, 1, 0, 6)
-#define SYS_TRFCR_EL1 sys_reg(3, 0, 1, 2, 1)
-
#define SYS_TCR_EL1 sys_reg(3, 0, 2, 0, 2)
#define SYS_APIAKEYLO_EL1 sys_reg(3, 0, 2, 1, 0)
@@ -202,15 +313,38 @@
#define SYS_ERXCTLR_EL1 sys_reg(3, 0, 5, 4, 1)
#define SYS_ERXSTATUS_EL1 sys_reg(3, 0, 5, 4, 2)
#define SYS_ERXADDR_EL1 sys_reg(3, 0, 5, 4, 3)
+#define SYS_ERXPFGF_EL1 sys_reg(3, 0, 5, 4, 4)
+#define SYS_ERXPFGCTL_EL1 sys_reg(3, 0, 5, 4, 5)
+#define SYS_ERXPFGCDN_EL1 sys_reg(3, 0, 5, 4, 6)
#define SYS_ERXMISC0_EL1 sys_reg(3, 0, 5, 5, 0)
#define SYS_ERXMISC1_EL1 sys_reg(3, 0, 5, 5, 1)
+#define SYS_ERXMISC2_EL1 sys_reg(3, 0, 5, 5, 2)
+#define SYS_ERXMISC3_EL1 sys_reg(3, 0, 5, 5, 3)
#define SYS_TFSR_EL1 sys_reg(3, 0, 5, 6, 0)
#define SYS_TFSRE0_EL1 sys_reg(3, 0, 5, 6, 1)
#define SYS_PAR_EL1 sys_reg(3, 0, 7, 4, 0)
#define SYS_PAR_EL1_F BIT(0)
+/* When PAR_EL1.F == 1 */
#define SYS_PAR_EL1_FST GENMASK(6, 1)
+#define SYS_PAR_EL1_PTW BIT(8)
+#define SYS_PAR_EL1_S BIT(9)
+#define SYS_PAR_EL1_AssuredOnly BIT(12)
+#define SYS_PAR_EL1_TopLevel BIT(13)
+#define SYS_PAR_EL1_Overlay BIT(14)
+#define SYS_PAR_EL1_DirtyBit BIT(15)
+#define SYS_PAR_EL1_F1_IMPDEF GENMASK_ULL(63, 48)
+#define SYS_PAR_EL1_F1_RES0 (BIT(7) | BIT(10) | GENMASK_ULL(47, 16))
+#define SYS_PAR_EL1_RES1 BIT(11)
+/* When PAR_EL1.F == 0 */
+#define SYS_PAR_EL1_SH GENMASK_ULL(8, 7)
+#define SYS_PAR_EL1_NS BIT(9)
+#define SYS_PAR_EL1_F0_IMPDEF BIT(10)
+#define SYS_PAR_EL1_NSE BIT(11)
+#define SYS_PAR_EL1_PA GENMASK_ULL(51, 12)
+#define SYS_PAR_EL1_ATTR GENMASK_ULL(63, 56)
+#define SYS_PAR_EL1_F0_RES0 (GENMASK_ULL(6, 1) | GENMASK_ULL(55, 52))
/*** Statistical Profiling Extension ***/
#define PMSEVFR_EL1_RES0_IMP \
@@ -274,6 +408,8 @@
#define SYS_ICC_IGRPEN0_EL1 sys_reg(3, 0, 12, 12, 6)
#define SYS_ICC_IGRPEN1_EL1 sys_reg(3, 0, 12, 12, 7)
+#define SYS_ACCDATA_EL1 sys_reg(3, 0, 13, 0, 5)
+
#define SYS_CNTKCTL_EL1 sys_reg(3, 0, 14, 1, 0)
#define SYS_AIDR_EL1 sys_reg(3, 1, 0, 0, 7)
@@ -286,7 +422,6 @@
#define SYS_PMCNTENCLR_EL0 sys_reg(3, 3, 9, 12, 2)
#define SYS_PMOVSCLR_EL0 sys_reg(3, 3, 9, 12, 3)
#define SYS_PMSWINC_EL0 sys_reg(3, 3, 9, 12, 4)
-#define SYS_PMSELR_EL0 sys_reg(3, 3, 9, 12, 5)
#define SYS_PMCEID0_EL0 sys_reg(3, 3, 9, 12, 6)
#define SYS_PMCEID1_EL0 sys_reg(3, 3, 9, 12, 7)
#define SYS_PMCCNTR_EL0 sys_reg(3, 3, 9, 13, 0)
@@ -369,6 +504,7 @@
#define SYS_SCTLR_EL2 sys_reg(3, 4, 1, 0, 0)
#define SYS_ACTLR_EL2 sys_reg(3, 4, 1, 0, 1)
+#define SYS_SCTLR2_EL2 sys_reg(3, 4, 1, 0, 3)
#define SYS_HCR_EL2 sys_reg(3, 4, 1, 1, 0)
#define SYS_MDCR_EL2 sys_reg(3, 4, 1, 1, 1)
#define SYS_CPTR_EL2 sys_reg(3, 4, 1, 1, 2)
@@ -381,13 +517,15 @@
#define SYS_VTTBR_EL2 sys_reg(3, 4, 2, 1, 0)
#define SYS_VTCR_EL2 sys_reg(3, 4, 2, 1, 2)
-#define SYS_TRFCR_EL2 sys_reg(3, 4, 1, 2, 1)
-#define SYS_HDFGRTR_EL2 sys_reg(3, 4, 3, 1, 4)
-#define SYS_HDFGWTR_EL2 sys_reg(3, 4, 3, 1, 5)
+#define SYS_VNCR_EL2 sys_reg(3, 4, 2, 2, 0)
#define SYS_HAFGRTR_EL2 sys_reg(3, 4, 3, 1, 6)
#define SYS_SPSR_EL2 sys_reg(3, 4, 4, 0, 0)
#define SYS_ELR_EL2 sys_reg(3, 4, 4, 0, 1)
#define SYS_SP_EL1 sys_reg(3, 4, 4, 1, 0)
+#define SYS_SPSR_irq sys_reg(3, 4, 4, 3, 0)
+#define SYS_SPSR_abt sys_reg(3, 4, 4, 3, 1)
+#define SYS_SPSR_und sys_reg(3, 4, 4, 3, 2)
+#define SYS_SPSR_fiq sys_reg(3, 4, 4, 3, 3)
#define SYS_IFSR32_EL2 sys_reg(3, 4, 5, 0, 1)
#define SYS_AFSR0_EL2 sys_reg(3, 4, 5, 1, 0)
#define SYS_AFSR1_EL2 sys_reg(3, 4, 5, 1, 1)
@@ -449,24 +587,49 @@
#define SYS_CONTEXTIDR_EL2 sys_reg(3, 4, 13, 0, 1)
#define SYS_TPIDR_EL2 sys_reg(3, 4, 13, 0, 2)
+#define SYS_SCXTNUM_EL2 sys_reg(3, 4, 13, 0, 7)
+
+#define __AMEV_op2(m) (m & 0x7)
+#define __AMEV_CRm(n, m) (n | ((m & 0x8) >> 3))
+#define __SYS__AMEVCNTVOFF0n_EL2(m) sys_reg(3, 4, 13, __AMEV_CRm(0x8, m), __AMEV_op2(m))
+#define SYS_AMEVCNTVOFF0n_EL2(m) __SYS__AMEVCNTVOFF0n_EL2(m)
+#define __SYS__AMEVCNTVOFF1n_EL2(m) sys_reg(3, 4, 13, __AMEV_CRm(0xA, m), __AMEV_op2(m))
+#define SYS_AMEVCNTVOFF1n_EL2(m) __SYS__AMEVCNTVOFF1n_EL2(m)
#define SYS_CNTVOFF_EL2 sys_reg(3, 4, 14, 0, 3)
#define SYS_CNTHCTL_EL2 sys_reg(3, 4, 14, 1, 0)
+#define SYS_CNTHP_TVAL_EL2 sys_reg(3, 4, 14, 2, 0)
+#define SYS_CNTHP_CTL_EL2 sys_reg(3, 4, 14, 2, 1)
+#define SYS_CNTHP_CVAL_EL2 sys_reg(3, 4, 14, 2, 2)
+#define SYS_CNTHV_TVAL_EL2 sys_reg(3, 4, 14, 3, 0)
+#define SYS_CNTHV_CTL_EL2 sys_reg(3, 4, 14, 3, 1)
+#define SYS_CNTHV_CVAL_EL2 sys_reg(3, 4, 14, 3, 2)
/* VHE encodings for architectural EL0/1 system registers */
+#define SYS_BRBCR_EL12 sys_reg(2, 5, 9, 0, 0)
#define SYS_SCTLR_EL12 sys_reg(3, 5, 1, 0, 0)
+#define SYS_CPACR_EL12 sys_reg(3, 5, 1, 0, 2)
+#define SYS_SCTLR2_EL12 sys_reg(3, 5, 1, 0, 3)
+#define SYS_ZCR_EL12 sys_reg(3, 5, 1, 2, 0)
+#define SYS_TRFCR_EL12 sys_reg(3, 5, 1, 2, 1)
+#define SYS_SMCR_EL12 sys_reg(3, 5, 1, 2, 6)
#define SYS_TTBR0_EL12 sys_reg(3, 5, 2, 0, 0)
#define SYS_TTBR1_EL12 sys_reg(3, 5, 2, 0, 1)
#define SYS_TCR_EL12 sys_reg(3, 5, 2, 0, 2)
+#define SYS_TCR2_EL12 sys_reg(3, 5, 2, 0, 3)
#define SYS_SPSR_EL12 sys_reg(3, 5, 4, 0, 0)
#define SYS_ELR_EL12 sys_reg(3, 5, 4, 0, 1)
#define SYS_AFSR0_EL12 sys_reg(3, 5, 5, 1, 0)
#define SYS_AFSR1_EL12 sys_reg(3, 5, 5, 1, 1)
#define SYS_ESR_EL12 sys_reg(3, 5, 5, 2, 0)
#define SYS_TFSR_EL12 sys_reg(3, 5, 5, 6, 0)
+#define SYS_FAR_EL12 sys_reg(3, 5, 6, 0, 0)
+#define SYS_PMSCR_EL12 sys_reg(3, 5, 9, 9, 0)
#define SYS_MAIR_EL12 sys_reg(3, 5, 10, 2, 0)
#define SYS_AMAIR_EL12 sys_reg(3, 5, 10, 3, 0)
#define SYS_VBAR_EL12 sys_reg(3, 5, 12, 0, 0)
+#define SYS_CONTEXTIDR_EL12 sys_reg(3, 5, 13, 0, 1)
+#define SYS_SCXTNUM_EL12 sys_reg(3, 5, 13, 0, 7)
#define SYS_CNTKCTL_EL12 sys_reg(3, 5, 14, 1, 0)
#define SYS_CNTP_TVAL_EL02 sys_reg(3, 5, 14, 2, 0)
#define SYS_CNTP_CTL_EL02 sys_reg(3, 5, 14, 2, 1)
@@ -477,6 +640,183 @@
#define SYS_SP_EL2 sys_reg(3, 6, 4, 1, 0)
+/* AT instructions */
+#define AT_Op0 1
+#define AT_CRn 7
+
+#define OP_AT_S1E1R sys_insn(AT_Op0, 0, AT_CRn, 8, 0)
+#define OP_AT_S1E1W sys_insn(AT_Op0, 0, AT_CRn, 8, 1)
+#define OP_AT_S1E0R sys_insn(AT_Op0, 0, AT_CRn, 8, 2)
+#define OP_AT_S1E0W sys_insn(AT_Op0, 0, AT_CRn, 8, 3)
+#define OP_AT_S1E1RP sys_insn(AT_Op0, 0, AT_CRn, 9, 0)
+#define OP_AT_S1E1WP sys_insn(AT_Op0, 0, AT_CRn, 9, 1)
+#define OP_AT_S1E1A sys_insn(AT_Op0, 0, AT_CRn, 9, 2)
+#define OP_AT_S1E2R sys_insn(AT_Op0, 4, AT_CRn, 8, 0)
+#define OP_AT_S1E2W sys_insn(AT_Op0, 4, AT_CRn, 8, 1)
+#define OP_AT_S12E1R sys_insn(AT_Op0, 4, AT_CRn, 8, 4)
+#define OP_AT_S12E1W sys_insn(AT_Op0, 4, AT_CRn, 8, 5)
+#define OP_AT_S12E0R sys_insn(AT_Op0, 4, AT_CRn, 8, 6)
+#define OP_AT_S12E0W sys_insn(AT_Op0, 4, AT_CRn, 8, 7)
+#define OP_AT_S1E2A sys_insn(AT_Op0, 4, AT_CRn, 9, 2)
+
+/* TLBI instructions */
+#define TLBI_Op0 1
+
+#define TLBI_Op1_EL1 0 /* Accessible from EL1 or higher */
+#define TLBI_Op1_EL2 4 /* Accessible from EL2 or higher */
+
+#define TLBI_CRn_XS 8 /* Extra Slow (the common one) */
+#define TLBI_CRn_nXS 9 /* not Extra Slow (which nobody uses)*/
+
+#define TLBI_CRm_IPAIS 0 /* S2 Inner-Shareable */
+#define TLBI_CRm_nROS 1 /* non-Range, Outer-Sharable */
+#define TLBI_CRm_RIS 2 /* Range, Inner-Sharable */
+#define TLBI_CRm_nRIS 3 /* non-Range, Inner-Sharable */
+#define TLBI_CRm_IPAONS 4 /* S2 Outer and Non-Shareable */
+#define TLBI_CRm_ROS 5 /* Range, Outer-Sharable */
+#define TLBI_CRm_RNS 6 /* Range, Non-Sharable */
+#define TLBI_CRm_nRNS 7 /* non-Range, Non-Sharable */
+
+#define OP_TLBI_VMALLE1OS sys_insn(1, 0, 8, 1, 0)
+#define OP_TLBI_VAE1OS sys_insn(1, 0, 8, 1, 1)
+#define OP_TLBI_ASIDE1OS sys_insn(1, 0, 8, 1, 2)
+#define OP_TLBI_VAAE1OS sys_insn(1, 0, 8, 1, 3)
+#define OP_TLBI_VALE1OS sys_insn(1, 0, 8, 1, 5)
+#define OP_TLBI_VAALE1OS sys_insn(1, 0, 8, 1, 7)
+#define OP_TLBI_RVAE1IS sys_insn(1, 0, 8, 2, 1)
+#define OP_TLBI_RVAAE1IS sys_insn(1, 0, 8, 2, 3)
+#define OP_TLBI_RVALE1IS sys_insn(1, 0, 8, 2, 5)
+#define OP_TLBI_RVAALE1IS sys_insn(1, 0, 8, 2, 7)
+#define OP_TLBI_VMALLE1IS sys_insn(1, 0, 8, 3, 0)
+#define OP_TLBI_VAE1IS sys_insn(1, 0, 8, 3, 1)
+#define OP_TLBI_ASIDE1IS sys_insn(1, 0, 8, 3, 2)
+#define OP_TLBI_VAAE1IS sys_insn(1, 0, 8, 3, 3)
+#define OP_TLBI_VALE1IS sys_insn(1, 0, 8, 3, 5)
+#define OP_TLBI_VAALE1IS sys_insn(1, 0, 8, 3, 7)
+#define OP_TLBI_RVAE1OS sys_insn(1, 0, 8, 5, 1)
+#define OP_TLBI_RVAAE1OS sys_insn(1, 0, 8, 5, 3)
+#define OP_TLBI_RVALE1OS sys_insn(1, 0, 8, 5, 5)
+#define OP_TLBI_RVAALE1OS sys_insn(1, 0, 8, 5, 7)
+#define OP_TLBI_RVAE1 sys_insn(1, 0, 8, 6, 1)
+#define OP_TLBI_RVAAE1 sys_insn(1, 0, 8, 6, 3)
+#define OP_TLBI_RVALE1 sys_insn(1, 0, 8, 6, 5)
+#define OP_TLBI_RVAALE1 sys_insn(1, 0, 8, 6, 7)
+#define OP_TLBI_VMALLE1 sys_insn(1, 0, 8, 7, 0)
+#define OP_TLBI_VAE1 sys_insn(1, 0, 8, 7, 1)
+#define OP_TLBI_ASIDE1 sys_insn(1, 0, 8, 7, 2)
+#define OP_TLBI_VAAE1 sys_insn(1, 0, 8, 7, 3)
+#define OP_TLBI_VALE1 sys_insn(1, 0, 8, 7, 5)
+#define OP_TLBI_VAALE1 sys_insn(1, 0, 8, 7, 7)
+#define OP_TLBI_VMALLE1OSNXS sys_insn(1, 0, 9, 1, 0)
+#define OP_TLBI_VAE1OSNXS sys_insn(1, 0, 9, 1, 1)
+#define OP_TLBI_ASIDE1OSNXS sys_insn(1, 0, 9, 1, 2)
+#define OP_TLBI_VAAE1OSNXS sys_insn(1, 0, 9, 1, 3)
+#define OP_TLBI_VALE1OSNXS sys_insn(1, 0, 9, 1, 5)
+#define OP_TLBI_VAALE1OSNXS sys_insn(1, 0, 9, 1, 7)
+#define OP_TLBI_RVAE1ISNXS sys_insn(1, 0, 9, 2, 1)
+#define OP_TLBI_RVAAE1ISNXS sys_insn(1, 0, 9, 2, 3)
+#define OP_TLBI_RVALE1ISNXS sys_insn(1, 0, 9, 2, 5)
+#define OP_TLBI_RVAALE1ISNXS sys_insn(1, 0, 9, 2, 7)
+#define OP_TLBI_VMALLE1ISNXS sys_insn(1, 0, 9, 3, 0)
+#define OP_TLBI_VAE1ISNXS sys_insn(1, 0, 9, 3, 1)
+#define OP_TLBI_ASIDE1ISNXS sys_insn(1, 0, 9, 3, 2)
+#define OP_TLBI_VAAE1ISNXS sys_insn(1, 0, 9, 3, 3)
+#define OP_TLBI_VALE1ISNXS sys_insn(1, 0, 9, 3, 5)
+#define OP_TLBI_VAALE1ISNXS sys_insn(1, 0, 9, 3, 7)
+#define OP_TLBI_RVAE1OSNXS sys_insn(1, 0, 9, 5, 1)
+#define OP_TLBI_RVAAE1OSNXS sys_insn(1, 0, 9, 5, 3)
+#define OP_TLBI_RVALE1OSNXS sys_insn(1, 0, 9, 5, 5)
+#define OP_TLBI_RVAALE1OSNXS sys_insn(1, 0, 9, 5, 7)
+#define OP_TLBI_RVAE1NXS sys_insn(1, 0, 9, 6, 1)
+#define OP_TLBI_RVAAE1NXS sys_insn(1, 0, 9, 6, 3)
+#define OP_TLBI_RVALE1NXS sys_insn(1, 0, 9, 6, 5)
+#define OP_TLBI_RVAALE1NXS sys_insn(1, 0, 9, 6, 7)
+#define OP_TLBI_VMALLE1NXS sys_insn(1, 0, 9, 7, 0)
+#define OP_TLBI_VAE1NXS sys_insn(1, 0, 9, 7, 1)
+#define OP_TLBI_ASIDE1NXS sys_insn(1, 0, 9, 7, 2)
+#define OP_TLBI_VAAE1NXS sys_insn(1, 0, 9, 7, 3)
+#define OP_TLBI_VALE1NXS sys_insn(1, 0, 9, 7, 5)
+#define OP_TLBI_VAALE1NXS sys_insn(1, 0, 9, 7, 7)
+#define OP_TLBI_IPAS2E1IS sys_insn(1, 4, 8, 0, 1)
+#define OP_TLBI_RIPAS2E1IS sys_insn(1, 4, 8, 0, 2)
+#define OP_TLBI_IPAS2LE1IS sys_insn(1, 4, 8, 0, 5)
+#define OP_TLBI_RIPAS2LE1IS sys_insn(1, 4, 8, 0, 6)
+#define OP_TLBI_ALLE2OS sys_insn(1, 4, 8, 1, 0)
+#define OP_TLBI_VAE2OS sys_insn(1, 4, 8, 1, 1)
+#define OP_TLBI_ALLE1OS sys_insn(1, 4, 8, 1, 4)
+#define OP_TLBI_VALE2OS sys_insn(1, 4, 8, 1, 5)
+#define OP_TLBI_VMALLS12E1OS sys_insn(1, 4, 8, 1, 6)
+#define OP_TLBI_RVAE2IS sys_insn(1, 4, 8, 2, 1)
+#define OP_TLBI_RVALE2IS sys_insn(1, 4, 8, 2, 5)
+#define OP_TLBI_ALLE2IS sys_insn(1, 4, 8, 3, 0)
+#define OP_TLBI_VAE2IS sys_insn(1, 4, 8, 3, 1)
+#define OP_TLBI_ALLE1IS sys_insn(1, 4, 8, 3, 4)
+#define OP_TLBI_VALE2IS sys_insn(1, 4, 8, 3, 5)
+#define OP_TLBI_VMALLS12E1IS sys_insn(1, 4, 8, 3, 6)
+#define OP_TLBI_IPAS2E1OS sys_insn(1, 4, 8, 4, 0)
+#define OP_TLBI_IPAS2E1 sys_insn(1, 4, 8, 4, 1)
+#define OP_TLBI_RIPAS2E1 sys_insn(1, 4, 8, 4, 2)
+#define OP_TLBI_RIPAS2E1OS sys_insn(1, 4, 8, 4, 3)
+#define OP_TLBI_IPAS2LE1OS sys_insn(1, 4, 8, 4, 4)
+#define OP_TLBI_IPAS2LE1 sys_insn(1, 4, 8, 4, 5)
+#define OP_TLBI_RIPAS2LE1 sys_insn(1, 4, 8, 4, 6)
+#define OP_TLBI_RIPAS2LE1OS sys_insn(1, 4, 8, 4, 7)
+#define OP_TLBI_RVAE2OS sys_insn(1, 4, 8, 5, 1)
+#define OP_TLBI_RVALE2OS sys_insn(1, 4, 8, 5, 5)
+#define OP_TLBI_RVAE2 sys_insn(1, 4, 8, 6, 1)
+#define OP_TLBI_RVALE2 sys_insn(1, 4, 8, 6, 5)
+#define OP_TLBI_ALLE2 sys_insn(1, 4, 8, 7, 0)
+#define OP_TLBI_VAE2 sys_insn(1, 4, 8, 7, 1)
+#define OP_TLBI_ALLE1 sys_insn(1, 4, 8, 7, 4)
+#define OP_TLBI_VALE2 sys_insn(1, 4, 8, 7, 5)
+#define OP_TLBI_VMALLS12E1 sys_insn(1, 4, 8, 7, 6)
+#define OP_TLBI_IPAS2E1ISNXS sys_insn(1, 4, 9, 0, 1)
+#define OP_TLBI_RIPAS2E1ISNXS sys_insn(1, 4, 9, 0, 2)
+#define OP_TLBI_IPAS2LE1ISNXS sys_insn(1, 4, 9, 0, 5)
+#define OP_TLBI_RIPAS2LE1ISNXS sys_insn(1, 4, 9, 0, 6)
+#define OP_TLBI_ALLE2OSNXS sys_insn(1, 4, 9, 1, 0)
+#define OP_TLBI_VAE2OSNXS sys_insn(1, 4, 9, 1, 1)
+#define OP_TLBI_ALLE1OSNXS sys_insn(1, 4, 9, 1, 4)
+#define OP_TLBI_VALE2OSNXS sys_insn(1, 4, 9, 1, 5)
+#define OP_TLBI_VMALLS12E1OSNXS sys_insn(1, 4, 9, 1, 6)
+#define OP_TLBI_RVAE2ISNXS sys_insn(1, 4, 9, 2, 1)
+#define OP_TLBI_RVALE2ISNXS sys_insn(1, 4, 9, 2, 5)
+#define OP_TLBI_ALLE2ISNXS sys_insn(1, 4, 9, 3, 0)
+#define OP_TLBI_VAE2ISNXS sys_insn(1, 4, 9, 3, 1)
+#define OP_TLBI_ALLE1ISNXS sys_insn(1, 4, 9, 3, 4)
+#define OP_TLBI_VALE2ISNXS sys_insn(1, 4, 9, 3, 5)
+#define OP_TLBI_VMALLS12E1ISNXS sys_insn(1, 4, 9, 3, 6)
+#define OP_TLBI_IPAS2E1OSNXS sys_insn(1, 4, 9, 4, 0)
+#define OP_TLBI_IPAS2E1NXS sys_insn(1, 4, 9, 4, 1)
+#define OP_TLBI_RIPAS2E1NXS sys_insn(1, 4, 9, 4, 2)
+#define OP_TLBI_RIPAS2E1OSNXS sys_insn(1, 4, 9, 4, 3)
+#define OP_TLBI_IPAS2LE1OSNXS sys_insn(1, 4, 9, 4, 4)
+#define OP_TLBI_IPAS2LE1NXS sys_insn(1, 4, 9, 4, 5)
+#define OP_TLBI_RIPAS2LE1NXS sys_insn(1, 4, 9, 4, 6)
+#define OP_TLBI_RIPAS2LE1OSNXS sys_insn(1, 4, 9, 4, 7)
+#define OP_TLBI_RVAE2OSNXS sys_insn(1, 4, 9, 5, 1)
+#define OP_TLBI_RVALE2OSNXS sys_insn(1, 4, 9, 5, 5)
+#define OP_TLBI_RVAE2NXS sys_insn(1, 4, 9, 6, 1)
+#define OP_TLBI_RVALE2NXS sys_insn(1, 4, 9, 6, 5)
+#define OP_TLBI_ALLE2NXS sys_insn(1, 4, 9, 7, 0)
+#define OP_TLBI_VAE2NXS sys_insn(1, 4, 9, 7, 1)
+#define OP_TLBI_ALLE1NXS sys_insn(1, 4, 9, 7, 4)
+#define OP_TLBI_VALE2NXS sys_insn(1, 4, 9, 7, 5)
+#define OP_TLBI_VMALLS12E1NXS sys_insn(1, 4, 9, 7, 6)
+
+/* Misc instructions */
+#define OP_GCSPUSHX sys_insn(1, 0, 7, 7, 4)
+#define OP_GCSPOPCX sys_insn(1, 0, 7, 7, 5)
+#define OP_GCSPOPX sys_insn(1, 0, 7, 7, 6)
+#define OP_GCSPUSHM sys_insn(1, 3, 7, 7, 0)
+
+#define OP_BRB_IALL sys_insn(1, 1, 7, 2, 4)
+#define OP_BRB_INJ sys_insn(1, 1, 7, 2, 5)
+#define OP_CFP_RCTX sys_insn(1, 3, 7, 3, 4)
+#define OP_DVP_RCTX sys_insn(1, 3, 7, 3, 5)
+#define OP_COSP_RCTX sys_insn(1, 3, 7, 3, 6)
+#define OP_CPP_RCTX sys_insn(1, 3, 7, 3, 7)
+
/* Common SCTLR_ELx flags. */
#define SCTLR_ELx_ENTP2 (BIT(60))
#define SCTLR_ELx_DSSBS (BIT(44))
@@ -555,16 +895,14 @@
/* Position the attr at the correct index */
#define MAIR_ATTRIDX(attr, idx) ((attr) << ((idx) * 8))
-/* id_aa64pfr0 */
-#define ID_AA64PFR0_EL1_ELx_64BIT_ONLY 0x1
-#define ID_AA64PFR0_EL1_ELx_32BIT_64BIT 0x2
-
/* id_aa64mmfr0 */
#define ID_AA64MMFR0_EL1_TGRAN4_SUPPORTED_MIN 0x0
+#define ID_AA64MMFR0_EL1_TGRAN4_LPA2 ID_AA64MMFR0_EL1_TGRAN4_52_BIT
#define ID_AA64MMFR0_EL1_TGRAN4_SUPPORTED_MAX 0x7
#define ID_AA64MMFR0_EL1_TGRAN64_SUPPORTED_MIN 0x0
#define ID_AA64MMFR0_EL1_TGRAN64_SUPPORTED_MAX 0x7
#define ID_AA64MMFR0_EL1_TGRAN16_SUPPORTED_MIN 0x1
+#define ID_AA64MMFR0_EL1_TGRAN16_LPA2 ID_AA64MMFR0_EL1_TGRAN16_52_BIT
#define ID_AA64MMFR0_EL1_TGRAN16_SUPPORTED_MAX 0xf
#define ARM64_MIN_PARANGE_BITS 32
@@ -572,6 +910,7 @@
#define ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_DEFAULT 0x0
#define ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_NONE 0x1
#define ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_MIN 0x2
+#define ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_LPA2 0x3
#define ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_MAX 0x7
#ifdef CONFIG_ARM64_PA_BITS_52
@@ -582,11 +921,13 @@
#if defined(CONFIG_ARM64_4K_PAGES)
#define ID_AA64MMFR0_EL1_TGRAN_SHIFT ID_AA64MMFR0_EL1_TGRAN4_SHIFT
+#define ID_AA64MMFR0_EL1_TGRAN_LPA2 ID_AA64MMFR0_EL1_TGRAN4_52_BIT
#define ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MIN ID_AA64MMFR0_EL1_TGRAN4_SUPPORTED_MIN
#define ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MAX ID_AA64MMFR0_EL1_TGRAN4_SUPPORTED_MAX
#define ID_AA64MMFR0_EL1_TGRAN_2_SHIFT ID_AA64MMFR0_EL1_TGRAN4_2_SHIFT
#elif defined(CONFIG_ARM64_16K_PAGES)
#define ID_AA64MMFR0_EL1_TGRAN_SHIFT ID_AA64MMFR0_EL1_TGRAN16_SHIFT
+#define ID_AA64MMFR0_EL1_TGRAN_LPA2 ID_AA64MMFR0_EL1_TGRAN16_52_BIT
#define ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MIN ID_AA64MMFR0_EL1_TGRAN16_SUPPORTED_MIN
#define ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MAX ID_AA64MMFR0_EL1_TGRAN16_SUPPORTED_MAX
#define ID_AA64MMFR0_EL1_TGRAN_2_SHIFT ID_AA64MMFR0_EL1_TGRAN16_2_SHIFT
@@ -610,6 +951,19 @@
#define SYS_GCR_EL1_RRND (BIT(16))
#define SYS_GCR_EL1_EXCL_MASK 0xffffUL
+#ifdef CONFIG_KASAN_HW_TAGS
+/*
+ * KASAN always uses a whole byte for its tags. With CONFIG_KASAN_HW_TAGS it
+ * only uses tags in the range 0xF0-0xFF, which we map to MTE tags 0x0-0xF.
+ */
+#define __MTE_TAG_MIN (KASAN_TAG_MIN & 0xf)
+#define __MTE_TAG_MAX (KASAN_TAG_MAX & 0xf)
+#define __MTE_TAG_INCL GENMASK(__MTE_TAG_MAX, __MTE_TAG_MIN)
+#define KERNEL_GCR_EL1_EXCL (SYS_GCR_EL1_EXCL_MASK & ~__MTE_TAG_INCL)
+#else
+#define KERNEL_GCR_EL1_EXCL SYS_GCR_EL1_EXCL_MASK
+#endif
+
#define KERNEL_GCR_EL1 (SYS_GCR_EL1_RRND | KERNEL_GCR_EL1_EXCL)
/* RGSR_EL1 Definitions */
@@ -626,15 +980,6 @@
/* Safe value for MPIDR_EL1: Bit31:RES1, Bit30:U:0, Bit24:MT:0 */
#define SYS_MPIDR_SAFE_VAL (BIT(31))
-#define TRFCR_ELx_TS_SHIFT 5
-#define TRFCR_ELx_TS_MASK ((0x3UL) << TRFCR_ELx_TS_SHIFT)
-#define TRFCR_ELx_TS_VIRTUAL ((0x1UL) << TRFCR_ELx_TS_SHIFT)
-#define TRFCR_ELx_TS_GUEST_PHYSICAL ((0x2UL) << TRFCR_ELx_TS_SHIFT)
-#define TRFCR_ELx_TS_PHYSICAL ((0x3UL) << TRFCR_ELx_TS_SHIFT)
-#define TRFCR_EL2_CX BIT(3)
-#define TRFCR_ELx_ExTRE BIT(1)
-#define TRFCR_ELx_E0TRE BIT(0)
-
/* GIC Hypervisor interface registers */
/* ICH_MISR_EL2 bit definitions */
#define ICH_MISR_EOI (1 << 0)
@@ -716,6 +1061,22 @@
#define PIRx_ELx_PERM(idx, perm) ((perm) << ((idx) * 4))
+/*
+ * Permission Overlay Extension (POE) permission encodings.
+ */
+#define POE_NONE UL(0x0)
+#define POE_R UL(0x1)
+#define POE_X UL(0x2)
+#define POE_RX UL(0x3)
+#define POE_W UL(0x4)
+#define POE_RW UL(0x5)
+#define POE_XW UL(0x6)
+#define POE_RXW UL(0x7)
+#define POE_MASK UL(0xf)
+
+/* Initial value for Permission Overlay Extension for EL0 */
+#define POR_EL0_INIT POE_RXW
+
#define ARM64_FEATURE_FIELD_BITS 4
/* Defined for compatibility only, do not add new users. */
@@ -789,15 +1150,21 @@
/*
* For registers without architectural names, or simply unsupported by
* GAS.
+ *
+ * __check_r forces warnings to be generated by the compiler when
+ * evaluating r which wouldn't normally happen due to being passed to
+ * the assembler via __stringify(r).
*/
#define read_sysreg_s(r) ({ \
u64 __val; \
+ u32 __maybe_unused __check_r = (u32)(r); \
asm volatile(__mrs_s("%0", r) : "=r" (__val)); \
__val; \
})
#define write_sysreg_s(v, r) do { \
u64 __val = (u64)(v); \
+ u32 __maybe_unused __check_r = (u32)(r); \
asm volatile(__msr_s(r, "%x0") : : "rZ" (__val)); \
} while (0)
@@ -827,6 +1194,8 @@
par; \
})
+#define SYS_FIELD_VALUE(reg, field, val) reg##_##field##_##val
+
#define SYS_FIELD_GET(reg, field, val) \
FIELD_GET(reg##_##field##_MASK, val)
@@ -834,7 +1203,8 @@
FIELD_PREP(reg##_##field##_MASK, val)
#define SYS_FIELD_PREP_ENUM(reg, field, val) \
- FIELD_PREP(reg##_##field##_MASK, reg##_##field##_##val)
+ FIELD_PREP(reg##_##field##_MASK, \
+ SYS_FIELD_VALUE(reg, field, val))
#endif
diff --git a/tools/include/linux/kasan-tags.h b/tools/include/linux/kasan-tags.h
new file mode 100644
index 000000000000..4f85f562512c
--- /dev/null
+++ b/tools/include/linux/kasan-tags.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_KASAN_TAGS_H
+#define _LINUX_KASAN_TAGS_H
+
+#define KASAN_TAG_KERNEL 0xFF /* native kernel pointers tag */
+#define KASAN_TAG_INVALID 0xFE /* inaccessible memory tag */
+#define KASAN_TAG_MAX 0xFD /* maximum value for random tags */
+
+#ifdef CONFIG_KASAN_HW_TAGS
+#define KASAN_TAG_MIN 0xF0 /* minimum value for random tags */
+#else
+#define KASAN_TAG_MIN 0x00 /* minimum value for random tags */
+#endif
+
+#endif /* LINUX_KASAN_TAGS_H */
diff --git a/tools/testing/selftests/kvm/arm64/aarch32_id_regs.c b/tools/testing/selftests/kvm/arm64/aarch32_id_regs.c
index 447d61cae4db..cef8f7323ceb 100644
--- a/tools/testing/selftests/kvm/arm64/aarch32_id_regs.c
+++ b/tools/testing/selftests/kvm/arm64/aarch32_id_regs.c
@@ -147,7 +147,7 @@ static bool vcpu_aarch64_only(struct kvm_vcpu *vcpu)
val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1));
el0 = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL0), val);
- return el0 == ID_AA64PFR0_EL1_ELx_64BIT_ONLY;
+ return el0 == ID_AA64PFR0_EL1_EL0_IMP;
}
int main(void)
diff --git a/tools/testing/selftests/kvm/arm64/set_id_regs.c b/tools/testing/selftests/kvm/arm64/set_id_regs.c
index 3dd85ce8551c..217541fe6536 100644
--- a/tools/testing/selftests/kvm/arm64/set_id_regs.c
+++ b/tools/testing/selftests/kvm/arm64/set_id_regs.c
@@ -666,7 +666,7 @@ int main(void)
/* Check for AARCH64 only system */
val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1));
el0 = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL0), val);
- aarch64_only = (el0 == ID_AA64PFR0_EL1_ELx_64BIT_ONLY);
+ aarch64_only = (el0 == ID_AA64PFR0_EL1_EL0_IMP);
ksft_print_header();