summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarc Zyngier <maz@kernel.org>2023-04-21 09:44:58 +0100
committerMarc Zyngier <maz@kernel.org>2023-04-21 09:44:58 +0100
commit36fe1b29b3cae48f781011abd5a0b9e938f5b35f (patch)
tree74ae4b78cee40bdee6fa6653eeb84e6ffec07842
parent6dcf7316e05eccded11fc640813c8a8879f271a6 (diff)
parentbcf3e7da3ad3bfea38ac6ba9f56b99b2877af51f (diff)
Merge branch kvm-arm64/spec-ptw into kvmarm-master/next
* kvm-arm64/spec-ptw: : . : On taking an exception from EL1&0 to EL2(&0), the page table walker is : allowed to carry on with speculative walks started from EL1&0 while : running at EL2 (see R_LFHQG). Given that the PTW may be actively using : the EL1&0 system registers, the only safe way to deal with it is to : issue a DSB before changing any of it. : : We already did the right thing for SPE and TRBE, but ignored the PTW : for unknown reasons (probably because the architecture wasn't crystal : clear at the time). : : This requires a bit of surgery in the nvhe code, though most of these : patches are comments so that my future self can understand the purpose : of these barriers. The VHE code is largely unaffected, thanks to the : DSB in the context switch. : . KVM: arm64: vhe: Drop extra isb() on guest exit KVM: arm64: vhe: Synchronise with page table walker on MMU update KVM: arm64: pkvm: Document the side effects of kvm_flush_dcache_to_poc() KVM: arm64: nvhe: Synchronise with page table walker on TLBI KVM: arm64: nvhe: Synchronise with page table walker on vcpu run Signed-off-by: Marc Zyngier <maz@kernel.org>
-rw-r--r--arch/arm64/kvm/hyp/nvhe/debug-sr.c2
-rw-r--r--arch/arm64/kvm/hyp/nvhe/mem_protect.c7
-rw-r--r--arch/arm64/kvm/hyp/nvhe/switch.c18
-rw-r--r--arch/arm64/kvm/hyp/nvhe/tlb.c38
-rw-r--r--arch/arm64/kvm/hyp/vhe/switch.c7
-rw-r--r--arch/arm64/kvm/hyp/vhe/sysreg-sr.c12
6 files changed, 69 insertions, 15 deletions
diff --git a/arch/arm64/kvm/hyp/nvhe/debug-sr.c b/arch/arm64/kvm/hyp/nvhe/debug-sr.c
index 2673bde62fad..d756b939f296 100644
--- a/arch/arm64/kvm/hyp/nvhe/debug-sr.c
+++ b/arch/arm64/kvm/hyp/nvhe/debug-sr.c
@@ -37,7 +37,6 @@ static void __debug_save_spe(u64 *pmscr_el1)
/* Now drain all buffered data to memory */
psb_csync();
- dsb(nsh);
}
static void __debug_restore_spe(u64 pmscr_el1)
@@ -69,7 +68,6 @@ static void __debug_save_trace(u64 *trfcr_el1)
isb();
/* Drain the trace buffer to memory */
tsb_csync();
- dsb(nsh);
}
static void __debug_restore_trace(u64 trfcr_el1)
diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
index 552653fa18be..2e9ec4a2a4a3 100644
--- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
+++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
@@ -297,6 +297,13 @@ int __pkvm_prot_finalize(void)
params->vttbr = kvm_get_vttbr(mmu);
params->vtcr = host_mmu.arch.vtcr;
params->hcr_el2 |= HCR_VM;
+
+ /*
+ * The CMO below not only cleans the updated params to the
+ * PoC, but also provides the DSB that ensures ongoing
+ * page-table walks that have started before we trapped to EL2
+ * have completed.
+ */
kvm_flush_dcache_to_poc(params, sizeof(*params));
write_sysreg(params->hcr_el2, hcr_el2);
diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c
index c2cb46ca4fb6..71fa16a0dc77 100644
--- a/arch/arm64/kvm/hyp/nvhe/switch.c
+++ b/arch/arm64/kvm/hyp/nvhe/switch.c
@@ -272,6 +272,17 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu)
*/
__debug_save_host_buffers_nvhe(vcpu);
+ /*
+ * We're about to restore some new MMU state. Make sure
+ * ongoing page-table walks that have started before we
+ * trapped to EL2 have completed. This also synchronises the
+ * above disabling of SPE and TRBE.
+ *
+ * See DDI0487I.a D8.1.5 "Out-of-context translation regimes",
+ * rule R_LFHQG and subsequent information statements.
+ */
+ dsb(nsh);
+
__kvm_adjust_pc(vcpu);
/*
@@ -306,6 +317,13 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu)
__timer_disable_traps(vcpu);
__hyp_vgic_save_state(vcpu);
+ /*
+ * Same thing as before the guest run: we're about to switch
+ * the MMU context, so let's make sure we don't have any
+ * ongoing EL1&0 translations.
+ */
+ dsb(nsh);
+
__deactivate_traps(vcpu);
__load_host_stage2();
diff --git a/arch/arm64/kvm/hyp/nvhe/tlb.c b/arch/arm64/kvm/hyp/nvhe/tlb.c
index d296d617f589..978179133f4b 100644
--- a/arch/arm64/kvm/hyp/nvhe/tlb.c
+++ b/arch/arm64/kvm/hyp/nvhe/tlb.c
@@ -15,8 +15,31 @@ struct tlb_inv_context {
};
static void __tlb_switch_to_guest(struct kvm_s2_mmu *mmu,
- struct tlb_inv_context *cxt)
+ struct tlb_inv_context *cxt,
+ bool nsh)
{
+ /*
+ * We have two requirements:
+ *
+ * - ensure that the page table updates are visible to all
+ * CPUs, for which a dsb(DOMAIN-st) is what we need, DOMAIN
+ * being either ish or nsh, depending on the invalidation
+ * type.
+ *
+ * - complete any speculative page table walk started before
+ * we trapped to EL2 so that we can mess with the MM
+ * registers out of context, for which dsb(nsh) is enough
+ *
+ * The composition of these two barriers is a dsb(DOMAIN), and
+ * the 'nsh' parameter tracks the distinction between
+ * Inner-Shareable and Non-Shareable, as specified by the
+ * callers.
+ */
+ if (nsh)
+ dsb(nsh);
+ else
+ dsb(ish);
+
if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) {
u64 val;
@@ -60,10 +83,8 @@ void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu,
{
struct tlb_inv_context cxt;
- dsb(ishst);
-
/* Switch to requested VMID */
- __tlb_switch_to_guest(mmu, &cxt);
+ __tlb_switch_to_guest(mmu, &cxt, false);
/*
* We could do so much better if we had the VA as well.
@@ -113,10 +134,8 @@ void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu)
{
struct tlb_inv_context cxt;
- dsb(ishst);
-
/* Switch to requested VMID */
- __tlb_switch_to_guest(mmu, &cxt);
+ __tlb_switch_to_guest(mmu, &cxt, false);
__tlbi(vmalls12e1is);
dsb(ish);
@@ -130,7 +149,7 @@ void __kvm_flush_cpu_context(struct kvm_s2_mmu *mmu)
struct tlb_inv_context cxt;
/* Switch to requested VMID */
- __tlb_switch_to_guest(mmu, &cxt);
+ __tlb_switch_to_guest(mmu, &cxt, false);
__tlbi(vmalle1);
asm volatile("ic iallu");
@@ -142,7 +161,8 @@ void __kvm_flush_cpu_context(struct kvm_s2_mmu *mmu)
void __kvm_flush_vm_context(void)
{
- dsb(ishst);
+ /* Same remark as in __tlb_switch_to_guest() */
+ dsb(ish);
__tlbi(alle1is);
/*
diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c
index cd3f3117bf16..3d868e84c7a0 100644
--- a/arch/arm64/kvm/hyp/vhe/switch.c
+++ b/arch/arm64/kvm/hyp/vhe/switch.c
@@ -227,11 +227,10 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu)
/*
* When we exit from the guest we change a number of CPU configuration
- * parameters, such as traps. Make sure these changes take effect
- * before running the host or additional guests.
+ * parameters, such as traps. We rely on the isb() in kvm_call_hyp*()
+ * to make sure these changes take effect before running the host or
+ * additional guests.
*/
- isb();
-
return ret;
}
diff --git a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c
index 7b44f6b3b547..b35a178e7e0d 100644
--- a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c
+++ b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c
@@ -13,6 +13,7 @@
#include <asm/kvm_asm.h>
#include <asm/kvm_emulate.h>
#include <asm/kvm_hyp.h>
+#include <asm/kvm_nested.h>
/*
* VHE: Host and guest must save mdscr_el1 and sp_el0 (and the PC and
@@ -70,6 +71,17 @@ void kvm_vcpu_load_sysregs_vhe(struct kvm_vcpu *vcpu)
__sysreg_save_user_state(host_ctxt);
/*
+ * When running a normal EL1 guest, we only load a new vcpu
+ * after a context switch, which imvolves a DSB, so all
+ * speculative EL1&0 walks will have already completed.
+ * If running NV, the vcpu may transition between vEL1 and
+ * vEL2 without a context switch, so make sure we complete
+ * those walks before loading a new context.
+ */
+ if (vcpu_has_nv(vcpu))
+ dsb(nsh);
+
+ /*
* Load guest EL1 and user state
*
* We must restore the 32-bit state before the sysregs, thanks