summaryrefslogtreecommitdiff
path: root/arch/arm64/kvm/hyp
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arm64/kvm/hyp')
-rw-r--r--arch/arm64/kvm/hyp/Makefile2
-rw-r--r--arch/arm64/kvm/hyp/aarch32.c22
-rw-r--r--arch/arm64/kvm/hyp/entry.S13
-rw-r--r--arch/arm64/kvm/hyp/fpsimd.S6
-rw-r--r--arch/arm64/kvm/hyp/include/hyp/debug-sr.h50
-rw-r--r--arch/arm64/kvm/hyp/include/hyp/fault.h5
-rw-r--r--arch/arm64/kvm/hyp/include/hyp/switch.h467
-rw-r--r--arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h116
-rw-r--r--arch/arm64/kvm/hyp/include/nvhe/ffa.h2
-rw-r--r--arch/arm64/kvm/hyp/include/nvhe/fixed_config.h223
-rw-r--r--arch/arm64/kvm/hyp/include/nvhe/gfp.h6
-rw-r--r--arch/arm64/kvm/hyp/include/nvhe/mem_protect.h39
-rw-r--r--arch/arm64/kvm/hyp/include/nvhe/memory.h50
-rw-r--r--arch/arm64/kvm/hyp/include/nvhe/pkvm.h28
-rw-r--r--arch/arm64/kvm/hyp/include/nvhe/trap_handler.h2
-rw-r--r--arch/arm64/kvm/hyp/nvhe/Makefile21
-rw-r--r--arch/arm64/kvm/hyp/nvhe/debug-sr.c82
-rw-r--r--arch/arm64/kvm/hyp/nvhe/ffa.c214
-rw-r--r--arch/arm64/kvm/hyp/nvhe/gen-hyprel.c6
-rw-r--r--arch/arm64/kvm/hyp/nvhe/host.S12
-rw-r--r--arch/arm64/kvm/hyp/nvhe/hyp-init.S90
-rw-r--r--arch/arm64/kvm/hyp/nvhe/hyp-main.c374
-rw-r--r--arch/arm64/kvm/hyp/nvhe/mem_protect.c911
-rw-r--r--arch/arm64/kvm/hyp/nvhe/mm.c16
-rw-r--r--arch/arm64/kvm/hyp/nvhe/page_alloc.c14
-rw-r--r--arch/arm64/kvm/hyp/nvhe/pkvm.c403
-rw-r--r--arch/arm64/kvm/hyp/nvhe/psci-relay.c7
-rw-r--r--arch/arm64/kvm/hyp/nvhe/setup.c55
-rw-r--r--arch/arm64/kvm/hyp/nvhe/stacktrace.c4
-rw-r--r--arch/arm64/kvm/hyp/nvhe/switch.c125
-rw-r--r--arch/arm64/kvm/hyp/nvhe/sys_regs.c404
-rw-r--r--arch/arm64/kvm/hyp/nvhe/sysreg-sr.c2
-rw-r--r--arch/arm64/kvm/hyp/nvhe/timer-sr.c16
-rw-r--r--arch/arm64/kvm/hyp/nvhe/tlb.c120
-rw-r--r--arch/arm64/kvm/hyp/pgtable.c165
-rw-r--r--arch/arm64/kvm/hyp/vgic-v3-sr.c127
-rw-r--r--arch/arm64/kvm/hyp/vhe/Makefile2
-rw-r--r--arch/arm64/kvm/hyp/vhe/debug-sr.c5
-rw-r--r--arch/arm64/kvm/hyp/vhe/switch.c442
-rw-r--r--arch/arm64/kvm/hyp/vhe/sysreg-sr.c166
-rw-r--r--arch/arm64/kvm/hyp/vhe/tlb.c176
41 files changed, 3097 insertions, 1893 deletions
diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile
index a38dea6186c9..d61e44642f98 100644
--- a/arch/arm64/kvm/hyp/Makefile
+++ b/arch/arm64/kvm/hyp/Makefile
@@ -3,7 +3,7 @@
# Makefile for Kernel-based Virtual Machine module, HYP part
#
-incdir := $(srctree)/$(src)/include
+incdir := $(src)/include
subdir-asflags-y := -I$(incdir)
subdir-ccflags-y := -I$(incdir)
diff --git a/arch/arm64/kvm/hyp/aarch32.c b/arch/arm64/kvm/hyp/aarch32.c
index f98cbe2626a1..449fa58cf3b6 100644
--- a/arch/arm64/kvm/hyp/aarch32.c
+++ b/arch/arm64/kvm/hyp/aarch32.c
@@ -50,9 +50,23 @@ bool kvm_condition_valid32(const struct kvm_vcpu *vcpu)
u32 cpsr_cond;
int cond;
- /* Top two bits non-zero? Unconditional. */
- if (kvm_vcpu_get_esr(vcpu) >> 30)
+ /*
+ * These are the exception classes that could fire with a
+ * conditional instruction.
+ */
+ switch (kvm_vcpu_trap_get_class(vcpu)) {
+ case ESR_ELx_EC_CP15_32:
+ case ESR_ELx_EC_CP15_64:
+ case ESR_ELx_EC_CP14_MR:
+ case ESR_ELx_EC_CP14_LS:
+ case ESR_ELx_EC_FP_ASIMD:
+ case ESR_ELx_EC_CP10_ID:
+ case ESR_ELx_EC_CP14_64:
+ case ESR_ELx_EC_SVC32:
+ break;
+ default:
return true;
+ }
/* Is condition field valid? */
cond = kvm_vcpu_get_condition(vcpu);
@@ -84,7 +98,7 @@ bool kvm_condition_valid32(const struct kvm_vcpu *vcpu)
}
/**
- * adjust_itstate - adjust ITSTATE when emulating instructions in IT-block
+ * kvm_adjust_itstate - adjust ITSTATE when emulating instructions in IT-block
* @vcpu: The VCPU pointer
*
* When exceptions occur while instructions are executed in Thumb IF-THEN
@@ -120,7 +134,7 @@ static void kvm_adjust_itstate(struct kvm_vcpu *vcpu)
}
/**
- * kvm_skip_instr - skip a trapped instruction and proceed to the next
+ * kvm_skip_instr32 - skip a trapped instruction and proceed to the next
* @vcpu: The vcpu pointer
*/
void kvm_skip_instr32(struct kvm_vcpu *vcpu)
diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S
index f3aa7738b477..9f4e8d68ab50 100644
--- a/arch/arm64/kvm/hyp/entry.S
+++ b/arch/arm64/kvm/hyp/entry.S
@@ -44,6 +44,11 @@ alternative_if ARM64_HAS_RAS_EXTN
alternative_else_nop_endif
mrs x1, isr_el1
cbz x1, 1f
+
+ // Ensure that __guest_enter() always provides a context
+ // synchronization event so that callers don't need ISBs for anything
+ // that would usually be synchonized by the ERET.
+ isb
mov x0, #ARM_EXCEPTION_IRQ
ret
@@ -83,6 +88,14 @@ alternative_else_nop_endif
eret
sb
+SYM_INNER_LABEL(__guest_exit_restore_elr_and_panic, SYM_L_GLOBAL)
+ // x2-x29,lr: vcpu regs
+ // vcpu x0-x1 on the stack
+
+ adr_this_cpu x0, kvm_hyp_ctxt, x1
+ ldr x0, [x0, #CPU_ELR_EL2]
+ msr elr_el2, x0
+
SYM_INNER_LABEL(__guest_exit_panic, SYM_L_GLOBAL)
// x2-x29,lr: vcpu regs
// vcpu x0-x1 on the stack
diff --git a/arch/arm64/kvm/hyp/fpsimd.S b/arch/arm64/kvm/hyp/fpsimd.S
index 61e6f3ba7b7d..e950875e31ce 100644
--- a/arch/arm64/kvm/hyp/fpsimd.S
+++ b/arch/arm64/kvm/hyp/fpsimd.S
@@ -25,3 +25,9 @@ SYM_FUNC_START(__sve_restore_state)
sve_load 0, x1, x2, 3
ret
SYM_FUNC_END(__sve_restore_state)
+
+SYM_FUNC_START(__sve_save_state)
+ mov x2, #1
+ sve_save 0, x1, x2, 3
+ ret
+SYM_FUNC_END(__sve_save_state)
diff --git a/arch/arm64/kvm/hyp/include/hyp/debug-sr.h b/arch/arm64/kvm/hyp/include/hyp/debug-sr.h
index 961bbef104a6..502a5b73ee70 100644
--- a/arch/arm64/kvm/hyp/include/hyp/debug-sr.h
+++ b/arch/arm64/kvm/hyp/include/hyp/debug-sr.h
@@ -88,15 +88,26 @@
default: write_debug(ptr[0], reg, 0); \
}
+static struct kvm_guest_debug_arch *__vcpu_debug_regs(struct kvm_vcpu *vcpu)
+{
+ switch (vcpu->arch.debug_owner) {
+ case VCPU_DEBUG_FREE:
+ WARN_ON_ONCE(1);
+ fallthrough;
+ case VCPU_DEBUG_GUEST_OWNED:
+ return &vcpu->arch.vcpu_debug_state;
+ case VCPU_DEBUG_HOST_OWNED:
+ return &vcpu->arch.external_debug_state;
+ }
+
+ return NULL;
+}
+
static void __debug_save_state(struct kvm_guest_debug_arch *dbg,
struct kvm_cpu_context *ctxt)
{
- u64 aa64dfr0;
- int brps, wrps;
-
- aa64dfr0 = read_sysreg(id_aa64dfr0_el1);
- brps = (aa64dfr0 >> 12) & 0xf;
- wrps = (aa64dfr0 >> 20) & 0xf;
+ int brps = *host_data_ptr(debug_brps);
+ int wrps = *host_data_ptr(debug_wrps);
save_debug(dbg->dbg_bcr, dbgbcr, brps);
save_debug(dbg->dbg_bvr, dbgbvr, brps);
@@ -109,13 +120,8 @@ static void __debug_save_state(struct kvm_guest_debug_arch *dbg,
static void __debug_restore_state(struct kvm_guest_debug_arch *dbg,
struct kvm_cpu_context *ctxt)
{
- u64 aa64dfr0;
- int brps, wrps;
-
- aa64dfr0 = read_sysreg(id_aa64dfr0_el1);
-
- brps = (aa64dfr0 >> 12) & 0xf;
- wrps = (aa64dfr0 >> 20) & 0xf;
+ int brps = *host_data_ptr(debug_brps);
+ int wrps = *host_data_ptr(debug_wrps);
restore_debug(dbg->dbg_bcr, dbgbcr, brps);
restore_debug(dbg->dbg_bvr, dbgbvr, brps);
@@ -132,13 +138,13 @@ static inline void __debug_switch_to_guest_common(struct kvm_vcpu *vcpu)
struct kvm_guest_debug_arch *host_dbg;
struct kvm_guest_debug_arch *guest_dbg;
- if (!vcpu_get_flag(vcpu, DEBUG_DIRTY))
+ if (!kvm_debug_regs_in_use(vcpu))
return;
- host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
+ host_ctxt = host_data_ptr(host_ctxt);
guest_ctxt = &vcpu->arch.ctxt;
- host_dbg = &vcpu->arch.host_debug_state.regs;
- guest_dbg = kern_hyp_va(vcpu->arch.debug_ptr);
+ host_dbg = host_data_ptr(host_debug_state.regs);
+ guest_dbg = __vcpu_debug_regs(vcpu);
__debug_save_state(host_dbg, host_ctxt);
__debug_restore_state(guest_dbg, guest_ctxt);
@@ -151,18 +157,16 @@ static inline void __debug_switch_to_host_common(struct kvm_vcpu *vcpu)
struct kvm_guest_debug_arch *host_dbg;
struct kvm_guest_debug_arch *guest_dbg;
- if (!vcpu_get_flag(vcpu, DEBUG_DIRTY))
+ if (!kvm_debug_regs_in_use(vcpu))
return;
- host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
+ host_ctxt = host_data_ptr(host_ctxt);
guest_ctxt = &vcpu->arch.ctxt;
- host_dbg = &vcpu->arch.host_debug_state.regs;
- guest_dbg = kern_hyp_va(vcpu->arch.debug_ptr);
+ host_dbg = host_data_ptr(host_debug_state.regs);
+ guest_dbg = __vcpu_debug_regs(vcpu);
__debug_save_state(guest_dbg, guest_ctxt);
__debug_restore_state(host_dbg, host_ctxt);
-
- vcpu_clear_flag(vcpu, DEBUG_DIRTY);
}
#endif /* __ARM64_KVM_HYP_DEBUG_SR_H__ */
diff --git a/arch/arm64/kvm/hyp/include/hyp/fault.h b/arch/arm64/kvm/hyp/include/hyp/fault.h
index 9e13c1bc2ad5..17df94570f03 100644
--- a/arch/arm64/kvm/hyp/include/hyp/fault.h
+++ b/arch/arm64/kvm/hyp/include/hyp/fault.h
@@ -14,6 +14,7 @@
static inline bool __translate_far_to_hpfar(u64 far, u64 *hpfar)
{
+ int ret;
u64 par, tmp;
/*
@@ -27,7 +28,9 @@ static inline bool __translate_far_to_hpfar(u64 far, u64 *hpfar)
* saved the guest context yet, and we may return early...
*/
par = read_sysreg_par();
- if (!__kvm_at("s1e1r", far))
+ ret = system_supports_poe() ? __kvm_at(OP_AT_S1E1A, far) :
+ __kvm_at(OP_AT_S1E1R, far);
+ if (!ret)
tmp = read_sysreg_par();
else
tmp = SYS_PAR_EL1_F; /* back to the guest */
diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h
index a038320cdb08..23bbe28eaaf9 100644
--- a/arch/arm64/kvm/hyp/include/hyp/switch.h
+++ b/arch/arm64/kvm/hyp/include/hyp/switch.h
@@ -39,12 +39,6 @@ struct kvm_exception_table_entry {
extern struct kvm_exception_table_entry __start___kvm_ex_table;
extern struct kvm_exception_table_entry __stop___kvm_ex_table;
-/* Check whether the FP regs are owned by the guest */
-static inline bool guest_owns_fp_regs(struct kvm_vcpu *vcpu)
-{
- return vcpu->arch.fp_state == FP_STATE_GUEST_OWNED;
-}
-
/* Save the 32-bit only FPSIMD system register state */
static inline void __fpsimd_save_fpexc32(struct kvm_vcpu *vcpu)
{
@@ -79,14 +73,48 @@ static inline void __activate_traps_fpsimd32(struct kvm_vcpu *vcpu)
clr |= ~hfg & __ ## reg ## _nMASK; \
} while(0)
-#define update_fgt_traps_cs(vcpu, reg, clr, set) \
+#define reg_to_fgt_group_id(reg) \
+ ({ \
+ enum fgt_group_id id; \
+ switch(reg) { \
+ case HFGRTR_EL2: \
+ case HFGWTR_EL2: \
+ id = HFGxTR_GROUP; \
+ break; \
+ case HFGITR_EL2: \
+ id = HFGITR_GROUP; \
+ break; \
+ case HDFGRTR_EL2: \
+ case HDFGWTR_EL2: \
+ id = HDFGRTR_GROUP; \
+ break; \
+ case HAFGRTR_EL2: \
+ id = HAFGRTR_GROUP; \
+ break; \
+ default: \
+ BUILD_BUG_ON(1); \
+ } \
+ \
+ id; \
+ })
+
+#define compute_undef_clr_set(vcpu, kvm, reg, clr, set) \
+ do { \
+ u64 hfg = kvm->arch.fgu[reg_to_fgt_group_id(reg)]; \
+ set |= hfg & __ ## reg ## _MASK; \
+ clr |= hfg & __ ## reg ## _nMASK; \
+ } while(0)
+
+#define update_fgt_traps_cs(hctxt, vcpu, kvm, reg, clr, set) \
do { \
- struct kvm_cpu_context *hctxt = \
- &this_cpu_ptr(&kvm_host_data)->host_ctxt; \
u64 c = 0, s = 0; \
\
ctxt_sys_reg(hctxt, reg) = read_sysreg_s(SYS_ ## reg); \
- compute_clr_set(vcpu, reg, c, s); \
+ if (vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu)) \
+ compute_clr_set(vcpu, reg, c, s); \
+ \
+ compute_undef_clr_set(vcpu, kvm, reg, c, s); \
+ \
s |= set; \
c |= clr; \
if (c || s) { \
@@ -97,8 +125,8 @@ static inline void __activate_traps_fpsimd32(struct kvm_vcpu *vcpu)
} \
} while(0)
-#define update_fgt_traps(vcpu, reg) \
- update_fgt_traps_cs(vcpu, reg, 0, 0)
+#define update_fgt_traps(hctxt, vcpu, kvm, reg) \
+ update_fgt_traps_cs(hctxt, vcpu, kvm, reg, 0, 0)
/*
* Validate the fine grain trap masks.
@@ -121,9 +149,8 @@ static inline bool cpu_has_amu(void)
static inline void __activate_traps_hfgxtr(struct kvm_vcpu *vcpu)
{
- struct kvm_cpu_context *hctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
- u64 r_clr = 0, w_clr = 0, r_set = 0, w_set = 0, tmp;
- u64 r_val, w_val;
+ struct kvm_cpu_context *hctxt = host_data_ptr(host_ctxt);
+ struct kvm *kvm = kern_hyp_va(vcpu->kvm);
CHECK_FGT_MASKS(HFGRTR_EL2);
CHECK_FGT_MASKS(HFGWTR_EL2);
@@ -136,72 +163,74 @@ static inline void __activate_traps_hfgxtr(struct kvm_vcpu *vcpu)
if (!cpus_have_final_cap(ARM64_HAS_FGT))
return;
- ctxt_sys_reg(hctxt, HFGRTR_EL2) = read_sysreg_s(SYS_HFGRTR_EL2);
- ctxt_sys_reg(hctxt, HFGWTR_EL2) = read_sysreg_s(SYS_HFGWTR_EL2);
-
- if (cpus_have_final_cap(ARM64_SME)) {
- tmp = HFGxTR_EL2_nSMPRI_EL1_MASK | HFGxTR_EL2_nTPIDR2_EL0_MASK;
-
- r_clr |= tmp;
- w_clr |= tmp;
- }
-
- /*
- * Trap guest writes to TCR_EL1 to prevent it from enabling HA or HD.
- */
- if (cpus_have_final_cap(ARM64_WORKAROUND_AMPERE_AC03_CPU_38))
- w_set |= HFGxTR_EL2_TCR_EL1_MASK;
-
- if (vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu)) {
- compute_clr_set(vcpu, HFGRTR_EL2, r_clr, r_set);
- compute_clr_set(vcpu, HFGWTR_EL2, w_clr, w_set);
- }
+ update_fgt_traps(hctxt, vcpu, kvm, HFGRTR_EL2);
+ update_fgt_traps_cs(hctxt, vcpu, kvm, HFGWTR_EL2, 0,
+ cpus_have_final_cap(ARM64_WORKAROUND_AMPERE_AC03_CPU_38) ?
+ HFGxTR_EL2_TCR_EL1_MASK : 0);
+ update_fgt_traps(hctxt, vcpu, kvm, HFGITR_EL2);
+ update_fgt_traps(hctxt, vcpu, kvm, HDFGRTR_EL2);
+ update_fgt_traps(hctxt, vcpu, kvm, HDFGWTR_EL2);
- /* The default to trap everything not handled or supported in KVM. */
- tmp = HFGxTR_EL2_nAMAIR2_EL1 | HFGxTR_EL2_nMAIR2_EL1 | HFGxTR_EL2_nS2POR_EL1 |
- HFGxTR_EL2_nPOR_EL1 | HFGxTR_EL2_nPOR_EL0 | HFGxTR_EL2_nACCDATA_EL1;
-
- r_val = __HFGRTR_EL2_nMASK & ~tmp;
- r_val |= r_set;
- r_val &= ~r_clr;
+ if (cpu_has_amu())
+ update_fgt_traps(hctxt, vcpu, kvm, HAFGRTR_EL2);
+}
- w_val = __HFGWTR_EL2_nMASK & ~tmp;
- w_val |= w_set;
- w_val &= ~w_clr;
+#define __deactivate_fgt(htcxt, vcpu, kvm, reg) \
+ do { \
+ if ((vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu)) || \
+ kvm->arch.fgu[reg_to_fgt_group_id(reg)]) \
+ write_sysreg_s(ctxt_sys_reg(hctxt, reg), \
+ SYS_ ## reg); \
+ } while(0)
- write_sysreg_s(r_val, SYS_HFGRTR_EL2);
- write_sysreg_s(w_val, SYS_HFGWTR_EL2);
+static inline void __deactivate_traps_hfgxtr(struct kvm_vcpu *vcpu)
+{
+ struct kvm_cpu_context *hctxt = host_data_ptr(host_ctxt);
+ struct kvm *kvm = kern_hyp_va(vcpu->kvm);
- if (!vcpu_has_nv(vcpu) || is_hyp_ctxt(vcpu))
+ if (!cpus_have_final_cap(ARM64_HAS_FGT))
return;
- update_fgt_traps(vcpu, HFGITR_EL2);
- update_fgt_traps(vcpu, HDFGRTR_EL2);
- update_fgt_traps(vcpu, HDFGWTR_EL2);
+ __deactivate_fgt(hctxt, vcpu, kvm, HFGRTR_EL2);
+ if (cpus_have_final_cap(ARM64_WORKAROUND_AMPERE_AC03_CPU_38))
+ write_sysreg_s(ctxt_sys_reg(hctxt, HFGWTR_EL2), SYS_HFGWTR_EL2);
+ else
+ __deactivate_fgt(hctxt, vcpu, kvm, HFGWTR_EL2);
+ __deactivate_fgt(hctxt, vcpu, kvm, HFGITR_EL2);
+ __deactivate_fgt(hctxt, vcpu, kvm, HDFGRTR_EL2);
+ __deactivate_fgt(hctxt, vcpu, kvm, HDFGWTR_EL2);
if (cpu_has_amu())
- update_fgt_traps(vcpu, HAFGRTR_EL2);
+ __deactivate_fgt(hctxt, vcpu, kvm, HAFGRTR_EL2);
}
-static inline void __deactivate_traps_hfgxtr(struct kvm_vcpu *vcpu)
+static inline void __activate_traps_mpam(struct kvm_vcpu *vcpu)
{
- struct kvm_cpu_context *hctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
+ u64 r = MPAM2_EL2_TRAPMPAM0EL1 | MPAM2_EL2_TRAPMPAM1EL1;
- if (!cpus_have_final_cap(ARM64_HAS_FGT))
+ if (!system_supports_mpam())
return;
- write_sysreg_s(ctxt_sys_reg(hctxt, HFGRTR_EL2), SYS_HFGRTR_EL2);
- write_sysreg_s(ctxt_sys_reg(hctxt, HFGWTR_EL2), SYS_HFGWTR_EL2);
+ /* trap guest access to MPAMIDR_EL1 */
+ if (system_supports_mpam_hcr()) {
+ write_sysreg_s(MPAMHCR_EL2_TRAP_MPAMIDR_EL1, SYS_MPAMHCR_EL2);
+ } else {
+ /* From v1.1 TIDR can trap MPAMIDR, set it unconditionally */
+ r |= MPAM2_EL2_TIDR;
+ }
- if (!vcpu_has_nv(vcpu) || is_hyp_ctxt(vcpu))
+ write_sysreg_s(r, SYS_MPAM2_EL2);
+}
+
+static inline void __deactivate_traps_mpam(void)
+{
+ if (!system_supports_mpam())
return;
- write_sysreg_s(ctxt_sys_reg(hctxt, HFGITR_EL2), SYS_HFGITR_EL2);
- write_sysreg_s(ctxt_sys_reg(hctxt, HDFGRTR_EL2), SYS_HDFGRTR_EL2);
- write_sysreg_s(ctxt_sys_reg(hctxt, HDFGWTR_EL2), SYS_HDFGWTR_EL2);
+ write_sysreg_s(0, SYS_MPAM2_EL2);
- if (cpu_has_amu())
- write_sysreg_s(ctxt_sys_reg(hctxt, HAFGRTR_EL2), SYS_HAFGRTR_EL2);
+ if (system_supports_mpam_hcr())
+ write_sysreg_s(MPAMHCR_HOST_FLAGS, SYS_MPAMHCR_EL2);
}
static inline void __activate_traps_common(struct kvm_vcpu *vcpu)
@@ -220,17 +249,17 @@ static inline void __activate_traps_common(struct kvm_vcpu *vcpu)
write_sysreg(0, pmselr_el0);
- hctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
+ hctxt = host_data_ptr(host_ctxt);
ctxt_sys_reg(hctxt, PMUSERENR_EL0) = read_sysreg(pmuserenr_el0);
write_sysreg(ARMV8_PMU_USERENR_MASK, pmuserenr_el0);
vcpu_set_flag(vcpu, PMUSERENR_ON_CPU);
}
- vcpu->arch.mdcr_el2_host = read_sysreg(mdcr_el2);
+ *host_data_ptr(host_debug_state.mdcr_el2) = read_sysreg(mdcr_el2);
write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2);
if (cpus_have_final_cap(ARM64_HAS_HCX)) {
- u64 hcrx = HCRX_GUEST_FLAGS;
+ u64 hcrx = vcpu->arch.hcrx_el2;
if (vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu)) {
u64 clr = 0, set = 0;
@@ -244,17 +273,18 @@ static inline void __activate_traps_common(struct kvm_vcpu *vcpu)
}
__activate_traps_hfgxtr(vcpu);
+ __activate_traps_mpam(vcpu);
}
static inline void __deactivate_traps_common(struct kvm_vcpu *vcpu)
{
- write_sysreg(vcpu->arch.mdcr_el2_host, mdcr_el2);
+ write_sysreg(*host_data_ptr(host_debug_state.mdcr_el2), mdcr_el2);
write_sysreg(0, hstr_el2);
if (kvm_arm_support_pmu_v3()) {
struct kvm_cpu_context *hctxt;
- hctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
+ hctxt = host_data_ptr(host_ctxt);
write_sysreg(ctxt_sys_reg(hctxt, PMUSERENR_EL0), pmuserenr_el0);
vcpu_clear_flag(vcpu, PMUSERENR_ON_CPU);
}
@@ -263,12 +293,11 @@ static inline void __deactivate_traps_common(struct kvm_vcpu *vcpu)
write_sysreg_s(HCRX_HOST_FLAGS, SYS_HCRX_EL2);
__deactivate_traps_hfgxtr(vcpu);
+ __deactivate_traps_mpam();
}
-static inline void ___activate_traps(struct kvm_vcpu *vcpu)
+static inline void ___activate_traps(struct kvm_vcpu *vcpu, u64 hcr)
{
- u64 hcr = vcpu->arch.hcr_el2;
-
if (cpus_have_final_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM))
hcr |= HCR_TVM;
@@ -297,7 +326,7 @@ static inline bool __populate_fault_info(struct kvm_vcpu *vcpu)
return __get_fault_info(vcpu->arch.fault.esr_el2, &vcpu->arch.fault);
}
-static bool kvm_hyp_handle_mops(struct kvm_vcpu *vcpu, u64 *exit_code)
+static inline bool kvm_hyp_handle_mops(struct kvm_vcpu *vcpu, u64 *exit_code)
{
*vcpu_pc(vcpu) = read_sysreg_el2(SYS_ELR);
arm64_mops_reset_regs(vcpu_gp_regs(vcpu), vcpu->arch.fault.esr_el2);
@@ -315,23 +344,129 @@ static bool kvm_hyp_handle_mops(struct kvm_vcpu *vcpu, u64 *exit_code)
static inline void __hyp_sve_restore_guest(struct kvm_vcpu *vcpu)
{
+ /*
+ * The vCPU's saved SVE state layout always matches the max VL of the
+ * vCPU. Start off with the max VL so we can load the SVE state.
+ */
sve_cond_update_zcr_vq(vcpu_sve_max_vq(vcpu) - 1, SYS_ZCR_EL2);
__sve_restore_state(vcpu_sve_pffr(vcpu),
- &vcpu->arch.ctxt.fp_regs.fpsr);
- write_sysreg_el1(__vcpu_sys_reg(vcpu, ZCR_EL1), SYS_ZCR);
+ &vcpu->arch.ctxt.fp_regs.fpsr,
+ true);
+
+ /*
+ * The effective VL for a VM could differ from the max VL when running a
+ * nested guest, as the guest hypervisor could select a smaller VL. Slap
+ * that into hardware before wrapping up.
+ */
+ if (vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu))
+ sve_cond_update_zcr_vq(__vcpu_sys_reg(vcpu, ZCR_EL2), SYS_ZCR_EL2);
+
+ write_sysreg_el1(__vcpu_sys_reg(vcpu, vcpu_sve_zcr_elx(vcpu)), SYS_ZCR);
+}
+
+static inline void __hyp_sve_save_host(void)
+{
+ struct cpu_sve_state *sve_state = *host_data_ptr(sve_state);
+
+ sve_state->zcr_el1 = read_sysreg_el1(SYS_ZCR);
+ write_sysreg_s(sve_vq_from_vl(kvm_host_sve_max_vl) - 1, SYS_ZCR_EL2);
+ __sve_save_state(sve_state->sve_regs + sve_ffr_offset(kvm_host_sve_max_vl),
+ &sve_state->fpsr,
+ true);
+}
+
+static inline void fpsimd_lazy_switch_to_guest(struct kvm_vcpu *vcpu)
+{
+ u64 zcr_el1, zcr_el2;
+
+ if (!guest_owns_fp_regs())
+ return;
+
+ if (vcpu_has_sve(vcpu)) {
+ /* A guest hypervisor may restrict the effective max VL. */
+ if (vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu))
+ zcr_el2 = __vcpu_sys_reg(vcpu, ZCR_EL2);
+ else
+ zcr_el2 = vcpu_sve_max_vq(vcpu) - 1;
+
+ write_sysreg_el2(zcr_el2, SYS_ZCR);
+
+ zcr_el1 = __vcpu_sys_reg(vcpu, vcpu_sve_zcr_elx(vcpu));
+ write_sysreg_el1(zcr_el1, SYS_ZCR);
+ }
+}
+
+static inline void fpsimd_lazy_switch_to_host(struct kvm_vcpu *vcpu)
+{
+ u64 zcr_el1, zcr_el2;
+
+ if (!guest_owns_fp_regs())
+ return;
+
+ /*
+ * When the guest owns the FP regs, we know that guest+hyp traps for
+ * any FPSIMD/SVE/SME features exposed to the guest have been disabled
+ * by either fpsimd_lazy_switch_to_guest() or kvm_hyp_handle_fpsimd()
+ * prior to __guest_entry(). As __guest_entry() guarantees a context
+ * synchronization event, we don't need an ISB here to avoid taking
+ * traps for anything that was exposed to the guest.
+ */
+ if (vcpu_has_sve(vcpu)) {
+ zcr_el1 = read_sysreg_el1(SYS_ZCR);
+ __vcpu_sys_reg(vcpu, vcpu_sve_zcr_elx(vcpu)) = zcr_el1;
+
+ /*
+ * The guest's state is always saved using the guest's max VL.
+ * Ensure that the host has the guest's max VL active such that
+ * the host can save the guest's state lazily, but don't
+ * artificially restrict the host to the guest's max VL.
+ */
+ if (has_vhe()) {
+ zcr_el2 = vcpu_sve_max_vq(vcpu) - 1;
+ write_sysreg_el2(zcr_el2, SYS_ZCR);
+ } else {
+ zcr_el2 = sve_vq_from_vl(kvm_host_sve_max_vl) - 1;
+ write_sysreg_el2(zcr_el2, SYS_ZCR);
+
+ zcr_el1 = vcpu_sve_max_vq(vcpu) - 1;
+ write_sysreg_el1(zcr_el1, SYS_ZCR);
+ }
+ }
}
+static void kvm_hyp_save_fpsimd_host(struct kvm_vcpu *vcpu)
+{
+ /*
+ * Non-protected kvm relies on the host restoring its sve state.
+ * Protected kvm restores the host's sve state as not to reveal that
+ * fpsimd was used by a guest nor leak upper sve bits.
+ */
+ if (system_supports_sve()) {
+ __hyp_sve_save_host();
+
+ /* Re-enable SVE traps if not supported for the guest vcpu. */
+ if (!vcpu_has_sve(vcpu))
+ cpacr_clear_set(CPACR_EL1_ZEN, 0);
+
+ } else {
+ __fpsimd_save_state(host_data_ptr(host_ctxt.fp_regs));
+ }
+
+ if (kvm_has_fpmr(kern_hyp_va(vcpu->kvm)))
+ *host_data_ptr(fpmr) = read_sysreg_s(SYS_FPMR);
+}
+
+
/*
* We trap the first access to the FP/SIMD to save the host context and
* restore the guest context lazily.
* If FP/SIMD is not implemented, handle the trap and inject an undefined
* instruction exception to the guest. Similarly for trapped SVE accesses.
*/
-static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code)
+static inline bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code)
{
bool sve_guest;
u8 esr_ec;
- u64 reg;
if (!system_supports_fpsimd())
return false;
@@ -342,10 +477,19 @@ static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code)
/* Only handle traps the vCPU can support here: */
switch (esr_ec) {
case ESR_ELx_EC_FP_ASIMD:
+ /* Forward traps to the guest hypervisor as required */
+ if (guest_hyp_fpsimd_traps_enabled(vcpu))
+ return false;
break;
+ case ESR_ELx_EC_SYS64:
+ if (WARN_ON_ONCE(!is_hyp_ctxt(vcpu)))
+ return false;
+ fallthrough;
case ESR_ELx_EC_SVE:
if (!sve_guest)
return false;
+ if (guest_hyp_sve_traps_enabled(vcpu))
+ return false;
break;
default:
return false;
@@ -354,24 +498,15 @@ static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code)
/* Valid trap. Switch the context: */
/* First disable enough traps to allow us to update the registers */
- if (has_vhe() || has_hvhe()) {
- reg = CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN;
- if (sve_guest)
- reg |= CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN;
-
- sysreg_clear_set(cpacr_el1, 0, reg);
- } else {
- reg = CPTR_EL2_TFP;
- if (sve_guest)
- reg |= CPTR_EL2_TZ;
-
- sysreg_clear_set(cptr_el2, reg, 0);
- }
+ if (sve_guest || (is_protected_kvm_enabled() && system_supports_sve()))
+ cpacr_clear_set(0, CPACR_EL1_FPEN | CPACR_EL1_ZEN);
+ else
+ cpacr_clear_set(0, CPACR_EL1_FPEN);
isb();
/* Write out the host state if it's in the registers */
- if (vcpu->arch.fp_state == FP_STATE_HOST_OWNED)
- __fpsimd_save_state(vcpu->arch.host_fpsimd_state);
+ if (is_protected_kvm_enabled() && host_owns_fp_regs())
+ kvm_hyp_save_fpsimd_host(vcpu);
/* Restore the guest state */
if (sve_guest)
@@ -379,11 +514,14 @@ static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code)
else
__fpsimd_restore_state(&vcpu->arch.ctxt.fp_regs);
+ if (kvm_has_fpmr(kern_hyp_va(vcpu->kvm)))
+ write_sysreg_s(__vcpu_sys_reg(vcpu, FPMR), SYS_FPMR);
+
/* Skip restoring fpexc32 for AArch64 guests */
if (!(read_sysreg(hcr_el2) & HCR_RW))
write_sysreg(__vcpu_sys_reg(vcpu, FPEXC32_EL2), fpexc32_el2);
- vcpu->arch.fp_state = FP_STATE_GUEST_OWNED;
+ *host_data_ptr(fp_owner) = FP_STATE_GUEST_OWNED;
return true;
}
@@ -443,61 +581,25 @@ static inline bool handle_tx2_tvm(struct kvm_vcpu *vcpu)
return true;
}
-static inline bool esr_is_ptrauth_trap(u64 esr)
+/* Open-coded version of timer_get_offset() to allow for kern_hyp_va() */
+static inline u64 hyp_timer_get_offset(struct arch_timer_context *ctxt)
{
- switch (esr_sys64_to_sysreg(esr)) {
- case SYS_APIAKEYLO_EL1:
- case SYS_APIAKEYHI_EL1:
- case SYS_APIBKEYLO_EL1:
- case SYS_APIBKEYHI_EL1:
- case SYS_APDAKEYLO_EL1:
- case SYS_APDAKEYHI_EL1:
- case SYS_APDBKEYLO_EL1:
- case SYS_APDBKEYHI_EL1:
- case SYS_APGAKEYLO_EL1:
- case SYS_APGAKEYHI_EL1:
- return true;
- }
+ u64 offset = 0;
- return false;
-}
-
-#define __ptrauth_save_key(ctxt, key) \
- do { \
- u64 __val; \
- __val = read_sysreg_s(SYS_ ## key ## KEYLO_EL1); \
- ctxt_sys_reg(ctxt, key ## KEYLO_EL1) = __val; \
- __val = read_sysreg_s(SYS_ ## key ## KEYHI_EL1); \
- ctxt_sys_reg(ctxt, key ## KEYHI_EL1) = __val; \
-} while(0)
+ if (ctxt->offset.vm_offset)
+ offset += *kern_hyp_va(ctxt->offset.vm_offset);
+ if (ctxt->offset.vcpu_offset)
+ offset += *kern_hyp_va(ctxt->offset.vcpu_offset);
-DECLARE_PER_CPU(struct kvm_cpu_context, kvm_hyp_ctxt);
+ return offset;
+}
-static bool kvm_hyp_handle_ptrauth(struct kvm_vcpu *vcpu, u64 *exit_code)
+static inline u64 compute_counter_value(struct arch_timer_context *ctxt)
{
- struct kvm_cpu_context *ctxt;
- u64 val;
-
- if (!vcpu_has_ptrauth(vcpu))
- return false;
-
- ctxt = this_cpu_ptr(&kvm_hyp_ctxt);
- __ptrauth_save_key(ctxt, APIA);
- __ptrauth_save_key(ctxt, APIB);
- __ptrauth_save_key(ctxt, APDA);
- __ptrauth_save_key(ctxt, APDB);
- __ptrauth_save_key(ctxt, APGA);
-
- vcpu_ptrauth_enable(vcpu);
-
- val = read_sysreg(hcr_el2);
- val |= (HCR_API | HCR_APK);
- write_sysreg(val, hcr_el2);
-
- return true;
+ return arch_timer_read_cntpct_el0() - hyp_timer_get_offset(ctxt);
}
-static bool kvm_hyp_handle_cntpct(struct kvm_vcpu *vcpu)
+static bool kvm_handle_cntxct(struct kvm_vcpu *vcpu)
{
struct arch_timer_context *ctxt;
u32 sysreg;
@@ -507,18 +609,19 @@ static bool kvm_hyp_handle_cntpct(struct kvm_vcpu *vcpu)
* We only get here for 64bit guests, 32bit guests will hit
* the long and winding road all the way to the standard
* handling. Yes, it sucks to be irrelevant.
+ *
+ * Also, we only deal with non-hypervisor context here (either
+ * an EL1 guest, or a non-HYP context of an EL2 guest).
*/
+ if (is_hyp_ctxt(vcpu))
+ return false;
+
sysreg = esr_sys64_to_sysreg(kvm_vcpu_get_esr(vcpu));
switch (sysreg) {
case SYS_CNTPCT_EL0:
case SYS_CNTPCTSS_EL0:
if (vcpu_has_nv(vcpu)) {
- if (is_hyp_ctxt(vcpu)) {
- ctxt = vcpu_hptimer(vcpu);
- break;
- }
-
/* Check for guest hypervisor trapping */
val = __vcpu_sys_reg(vcpu, CNTHCTL_EL2);
if (!vcpu_el2_e2h_is_set(vcpu))
@@ -530,16 +633,23 @@ static bool kvm_hyp_handle_cntpct(struct kvm_vcpu *vcpu)
ctxt = vcpu_ptimer(vcpu);
break;
+ case SYS_CNTVCT_EL0:
+ case SYS_CNTVCTSS_EL0:
+ if (vcpu_has_nv(vcpu)) {
+ /* Check for guest hypervisor trapping */
+ val = __vcpu_sys_reg(vcpu, CNTHCTL_EL2);
+
+ if (val & CNTHCTL_EL1TVCT)
+ return false;
+ }
+
+ ctxt = vcpu_vtimer(vcpu);
+ break;
default:
return false;
}
- val = arch_timer_read_cntpct_el0();
-
- if (ctxt->offset.vm_offset)
- val -= *kern_hyp_va(ctxt->offset.vm_offset);
- if (ctxt->offset.vcpu_offset)
- val -= *kern_hyp_va(ctxt->offset.vcpu_offset);
+ val = compute_counter_value(ctxt);
vcpu_set_reg(vcpu, kvm_vcpu_sys_get_rt(vcpu), val);
__kvm_skip_instr(vcpu);
@@ -570,7 +680,7 @@ static bool handle_ampere1_tcr(struct kvm_vcpu *vcpu)
return true;
}
-static bool kvm_hyp_handle_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code)
+static inline bool kvm_hyp_handle_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code)
{
if (cpus_have_final_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM) &&
handle_tx2_tvm(vcpu))
@@ -584,16 +694,13 @@ static bool kvm_hyp_handle_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code)
__vgic_v3_perform_cpuif_access(vcpu) == 1)
return true;
- if (esr_is_ptrauth_trap(kvm_vcpu_get_esr(vcpu)))
- return kvm_hyp_handle_ptrauth(vcpu, exit_code);
-
- if (kvm_hyp_handle_cntpct(vcpu))
+ if (kvm_handle_cntxct(vcpu))
return true;
return false;
}
-static bool kvm_hyp_handle_cp15_32(struct kvm_vcpu *vcpu, u64 *exit_code)
+static inline bool kvm_hyp_handle_cp15_32(struct kvm_vcpu *vcpu, u64 *exit_code)
{
if (static_branch_unlikely(&vgic_v3_cpuif_trap) &&
__vgic_v3_perform_cpuif_access(vcpu) == 1)
@@ -602,19 +709,18 @@ static bool kvm_hyp_handle_cp15_32(struct kvm_vcpu *vcpu, u64 *exit_code)
return false;
}
-static bool kvm_hyp_handle_memory_fault(struct kvm_vcpu *vcpu, u64 *exit_code)
+static inline bool kvm_hyp_handle_memory_fault(struct kvm_vcpu *vcpu,
+ u64 *exit_code)
{
if (!__populate_fault_info(vcpu))
return true;
return false;
}
-static bool kvm_hyp_handle_iabt_low(struct kvm_vcpu *vcpu, u64 *exit_code)
- __alias(kvm_hyp_handle_memory_fault);
-static bool kvm_hyp_handle_watchpt_low(struct kvm_vcpu *vcpu, u64 *exit_code)
- __alias(kvm_hyp_handle_memory_fault);
+#define kvm_hyp_handle_iabt_low kvm_hyp_handle_memory_fault
+#define kvm_hyp_handle_watchpt_low kvm_hyp_handle_memory_fault
-static bool kvm_hyp_handle_dabt_low(struct kvm_vcpu *vcpu, u64 *exit_code)
+static inline bool kvm_hyp_handle_dabt_low(struct kvm_vcpu *vcpu, u64 *exit_code)
{
if (kvm_hyp_handle_memory_fault(vcpu, exit_code))
return true;
@@ -644,23 +750,16 @@ static bool kvm_hyp_handle_dabt_low(struct kvm_vcpu *vcpu, u64 *exit_code)
typedef bool (*exit_handler_fn)(struct kvm_vcpu *, u64 *);
-static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm_vcpu *vcpu);
-
-static void early_exit_filter(struct kvm_vcpu *vcpu, u64 *exit_code);
-
/*
* Allow the hypervisor to handle the exit with an exit handler if it has one.
*
* Returns true if the hypervisor handled the exit, and control should go back
* to the guest, or false if it hasn't.
*/
-static inline bool kvm_hyp_handle_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
+static inline bool kvm_hyp_handle_exit(struct kvm_vcpu *vcpu, u64 *exit_code,
+ const exit_handler_fn *handlers)
{
- const exit_handler_fn *handlers = kvm_get_exit_handler_array(vcpu);
- exit_handler_fn fn;
-
- fn = handlers[kvm_vcpu_trap_get_class(vcpu)];
-
+ exit_handler_fn fn = handlers[kvm_vcpu_trap_get_class(vcpu)];
if (fn)
return fn(vcpu, exit_code);
@@ -690,20 +789,9 @@ static inline void synchronize_vcpu_pstate(struct kvm_vcpu *vcpu, u64 *exit_code
* the guest, false when we should restore the host state and return to the
* main run loop.
*/
-static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
+static inline bool __fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code,
+ const exit_handler_fn *handlers)
{
- /*
- * Save PSTATE early so that we can evaluate the vcpu mode
- * early on.
- */
- synchronize_vcpu_pstate(vcpu, exit_code);
-
- /*
- * Check whether we want to repaint the state one way or
- * another.
- */
- early_exit_filter(vcpu, exit_code);
-
if (ARM_EXCEPTION_CODE(*exit_code) != ARM_EXCEPTION_IRQ)
vcpu->arch.fault.esr_el2 = read_sysreg_el2(SYS_ESR);
@@ -733,7 +821,7 @@ static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
goto exit;
/* Check if there's an exit handler and allow it to handle the exit. */
- if (kvm_hyp_handle_exit(vcpu, exit_code))
+ if (kvm_hyp_handle_exit(vcpu, exit_code, handlers))
goto guest;
exit:
/* Return to the host kernel and handle the exit */
@@ -747,7 +835,7 @@ guest:
static inline void __kvm_unexpected_el2_exception(void)
{
- extern char __guest_exit_panic[];
+ extern char __guest_exit_restore_elr_and_panic[];
unsigned long addr, fixup;
struct kvm_exception_table_entry *entry, *end;
unsigned long elr_el2 = read_sysreg(elr_el2);
@@ -769,7 +857,8 @@ static inline void __kvm_unexpected_el2_exception(void)
}
/* Trigger a panic after restoring the hyp context. */
- write_sysreg(__guest_exit_panic, elr_el2);
+ this_cpu_ptr(&kvm_hyp_ctxt)->sys_regs[ELR_EL2] = elr_el2;
+ write_sysreg(__guest_exit_restore_elr_and_panic, elr_el2);
}
#endif /* __ARM64_KVM_HYP_SWITCH_H__ */
diff --git a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
index bb6b571ec627..76ff095c6b6e 100644
--- a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
+++ b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
@@ -16,9 +16,40 @@
#include <asm/kvm_hyp.h>
#include <asm/kvm_mmu.h>
+static inline bool ctxt_has_s1poe(struct kvm_cpu_context *ctxt);
+
+static inline struct kvm_vcpu *ctxt_to_vcpu(struct kvm_cpu_context *ctxt)
+{
+ struct kvm_vcpu *vcpu = ctxt->__hyp_running_vcpu;
+
+ if (!vcpu)
+ vcpu = container_of(ctxt, struct kvm_vcpu, arch.ctxt);
+
+ return vcpu;
+}
+
+static inline bool ctxt_is_guest(struct kvm_cpu_context *ctxt)
+{
+ return host_data_ptr(host_ctxt) != ctxt;
+}
+
+static inline u64 *ctxt_mdscr_el1(struct kvm_cpu_context *ctxt)
+{
+ struct kvm_vcpu *vcpu = ctxt_to_vcpu(ctxt);
+
+ if (ctxt_is_guest(ctxt) && kvm_host_owns_debug_regs(vcpu))
+ return &vcpu->arch.external_mdscr_el1;
+
+ return &ctxt_sys_reg(ctxt, MDSCR_EL1);
+}
+
static inline void __sysreg_save_common_state(struct kvm_cpu_context *ctxt)
{
- ctxt_sys_reg(ctxt, MDSCR_EL1) = read_sysreg(mdscr_el1);
+ *ctxt_mdscr_el1(ctxt) = read_sysreg(mdscr_el1);
+
+ // POR_EL0 can affect uaccess, so must be saved/restored early.
+ if (ctxt_has_s1poe(ctxt))
+ ctxt_sys_reg(ctxt, POR_EL0) = read_sysreg_s(SYS_POR_EL0);
}
static inline void __sysreg_save_user_state(struct kvm_cpu_context *ctxt)
@@ -29,14 +60,44 @@ static inline void __sysreg_save_user_state(struct kvm_cpu_context *ctxt)
static inline bool ctxt_has_mte(struct kvm_cpu_context *ctxt)
{
- struct kvm_vcpu *vcpu = ctxt->__hyp_running_vcpu;
-
- if (!vcpu)
- vcpu = container_of(ctxt, struct kvm_vcpu, arch.ctxt);
+ struct kvm_vcpu *vcpu = ctxt_to_vcpu(ctxt);
return kvm_has_mte(kern_hyp_va(vcpu->kvm));
}
+static inline bool ctxt_has_s1pie(struct kvm_cpu_context *ctxt)
+{
+ struct kvm_vcpu *vcpu;
+
+ if (!cpus_have_final_cap(ARM64_HAS_S1PIE))
+ return false;
+
+ vcpu = ctxt_to_vcpu(ctxt);
+ return kvm_has_s1pie(kern_hyp_va(vcpu->kvm));
+}
+
+static inline bool ctxt_has_tcrx(struct kvm_cpu_context *ctxt)
+{
+ struct kvm_vcpu *vcpu;
+
+ if (!cpus_have_final_cap(ARM64_HAS_TCR2))
+ return false;
+
+ vcpu = ctxt_to_vcpu(ctxt);
+ return kvm_has_tcr2(kern_hyp_va(vcpu->kvm));
+}
+
+static inline bool ctxt_has_s1poe(struct kvm_cpu_context *ctxt)
+{
+ struct kvm_vcpu *vcpu;
+
+ if (!system_supports_poe())
+ return false;
+
+ vcpu = ctxt_to_vcpu(ctxt);
+ return kvm_has_s1poe(kern_hyp_va(vcpu->kvm));
+}
+
static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt)
{
ctxt_sys_reg(ctxt, SCTLR_EL1) = read_sysreg_el1(SYS_SCTLR);
@@ -44,8 +105,17 @@ static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt)
ctxt_sys_reg(ctxt, TTBR0_EL1) = read_sysreg_el1(SYS_TTBR0);
ctxt_sys_reg(ctxt, TTBR1_EL1) = read_sysreg_el1(SYS_TTBR1);
ctxt_sys_reg(ctxt, TCR_EL1) = read_sysreg_el1(SYS_TCR);
- if (cpus_have_final_cap(ARM64_HAS_TCR2))
+ if (ctxt_has_tcrx(ctxt)) {
ctxt_sys_reg(ctxt, TCR2_EL1) = read_sysreg_el1(SYS_TCR2);
+
+ if (ctxt_has_s1pie(ctxt)) {
+ ctxt_sys_reg(ctxt, PIR_EL1) = read_sysreg_el1(SYS_PIR);
+ ctxt_sys_reg(ctxt, PIRE0_EL1) = read_sysreg_el1(SYS_PIRE0);
+ }
+
+ if (ctxt_has_s1poe(ctxt))
+ ctxt_sys_reg(ctxt, POR_EL1) = read_sysreg_el1(SYS_POR);
+ }
ctxt_sys_reg(ctxt, ESR_EL1) = read_sysreg_el1(SYS_ESR);
ctxt_sys_reg(ctxt, AFSR0_EL1) = read_sysreg_el1(SYS_AFSR0);
ctxt_sys_reg(ctxt, AFSR1_EL1) = read_sysreg_el1(SYS_AFSR1);
@@ -55,10 +125,6 @@ static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt)
ctxt_sys_reg(ctxt, CONTEXTIDR_EL1) = read_sysreg_el1(SYS_CONTEXTIDR);
ctxt_sys_reg(ctxt, AMAIR_EL1) = read_sysreg_el1(SYS_AMAIR);
ctxt_sys_reg(ctxt, CNTKCTL_EL1) = read_sysreg_el1(SYS_CNTKCTL);
- if (cpus_have_final_cap(ARM64_HAS_S1PIE)) {
- ctxt_sys_reg(ctxt, PIR_EL1) = read_sysreg_el1(SYS_PIR);
- ctxt_sys_reg(ctxt, PIRE0_EL1) = read_sysreg_el1(SYS_PIRE0);
- }
ctxt_sys_reg(ctxt, PAR_EL1) = read_sysreg_par();
ctxt_sys_reg(ctxt, TPIDR_EL1) = read_sysreg(tpidr_el1);
@@ -88,7 +154,11 @@ static inline void __sysreg_save_el2_return_state(struct kvm_cpu_context *ctxt)
static inline void __sysreg_restore_common_state(struct kvm_cpu_context *ctxt)
{
- write_sysreg(ctxt_sys_reg(ctxt, MDSCR_EL1), mdscr_el1);
+ write_sysreg(*ctxt_mdscr_el1(ctxt), mdscr_el1);
+
+ // POR_EL0 can affect uaccess, so must be saved/restored early.
+ if (ctxt_has_s1poe(ctxt))
+ write_sysreg_s(ctxt_sys_reg(ctxt, POR_EL0), SYS_POR_EL0);
}
static inline void __sysreg_restore_user_state(struct kvm_cpu_context *ctxt)
@@ -97,9 +167,10 @@ static inline void __sysreg_restore_user_state(struct kvm_cpu_context *ctxt)
write_sysreg(ctxt_sys_reg(ctxt, TPIDRRO_EL0), tpidrro_el0);
}
-static inline void __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt)
+static inline void __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt,
+ u64 mpidr)
{
- write_sysreg(ctxt_sys_reg(ctxt, MPIDR_EL1), vmpidr_el2);
+ write_sysreg(mpidr, vmpidr_el2);
if (has_vhe() ||
!cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) {
@@ -120,8 +191,17 @@ static inline void __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt)
write_sysreg_el1(ctxt_sys_reg(ctxt, CPACR_EL1), SYS_CPACR);
write_sysreg_el1(ctxt_sys_reg(ctxt, TTBR0_EL1), SYS_TTBR0);
write_sysreg_el1(ctxt_sys_reg(ctxt, TTBR1_EL1), SYS_TTBR1);
- if (cpus_have_final_cap(ARM64_HAS_TCR2))
+ if (ctxt_has_tcrx(ctxt)) {
write_sysreg_el1(ctxt_sys_reg(ctxt, TCR2_EL1), SYS_TCR2);
+
+ if (ctxt_has_s1pie(ctxt)) {
+ write_sysreg_el1(ctxt_sys_reg(ctxt, PIR_EL1), SYS_PIR);
+ write_sysreg_el1(ctxt_sys_reg(ctxt, PIRE0_EL1), SYS_PIRE0);
+ }
+
+ if (ctxt_has_s1poe(ctxt))
+ write_sysreg_el1(ctxt_sys_reg(ctxt, POR_EL1), SYS_POR);
+ }
write_sysreg_el1(ctxt_sys_reg(ctxt, ESR_EL1), SYS_ESR);
write_sysreg_el1(ctxt_sys_reg(ctxt, AFSR0_EL1), SYS_AFSR0);
write_sysreg_el1(ctxt_sys_reg(ctxt, AFSR1_EL1), SYS_AFSR1);
@@ -131,10 +211,6 @@ static inline void __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt)
write_sysreg_el1(ctxt_sys_reg(ctxt, CONTEXTIDR_EL1), SYS_CONTEXTIDR);
write_sysreg_el1(ctxt_sys_reg(ctxt, AMAIR_EL1), SYS_AMAIR);
write_sysreg_el1(ctxt_sys_reg(ctxt, CNTKCTL_EL1), SYS_CNTKCTL);
- if (cpus_have_final_cap(ARM64_HAS_S1PIE)) {
- write_sysreg_el1(ctxt_sys_reg(ctxt, PIR_EL1), SYS_PIR);
- write_sysreg_el1(ctxt_sys_reg(ctxt, PIRE0_EL1), SYS_PIRE0);
- }
write_sysreg(ctxt_sys_reg(ctxt, PAR_EL1), par_el1);
write_sysreg(ctxt_sys_reg(ctxt, TPIDR_EL1), tpidr_el1);
@@ -222,7 +298,7 @@ static inline void __sysreg32_save_state(struct kvm_vcpu *vcpu)
__vcpu_sys_reg(vcpu, DACR32_EL2) = read_sysreg(dacr32_el2);
__vcpu_sys_reg(vcpu, IFSR32_EL2) = read_sysreg(ifsr32_el2);
- if (has_vhe() || vcpu_get_flag(vcpu, DEBUG_DIRTY))
+ if (has_vhe() || kvm_debug_regs_in_use(vcpu))
__vcpu_sys_reg(vcpu, DBGVCR32_EL2) = read_sysreg(dbgvcr32_el2);
}
@@ -239,7 +315,7 @@ static inline void __sysreg32_restore_state(struct kvm_vcpu *vcpu)
write_sysreg(__vcpu_sys_reg(vcpu, DACR32_EL2), dacr32_el2);
write_sysreg(__vcpu_sys_reg(vcpu, IFSR32_EL2), ifsr32_el2);
- if (has_vhe() || vcpu_get_flag(vcpu, DEBUG_DIRTY))
+ if (has_vhe() || kvm_debug_regs_in_use(vcpu))
write_sysreg(__vcpu_sys_reg(vcpu, DBGVCR32_EL2), dbgvcr32_el2);
}
diff --git a/arch/arm64/kvm/hyp/include/nvhe/ffa.h b/arch/arm64/kvm/hyp/include/nvhe/ffa.h
index d9fd5e6c7d3c..146e0aebfa1c 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/ffa.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/ffa.h
@@ -9,7 +9,7 @@
#include <asm/kvm_host.h>
#define FFA_MIN_FUNC_NUM 0x60
-#define FFA_MAX_FUNC_NUM 0x7F
+#define FFA_MAX_FUNC_NUM 0xFF
int hyp_ffa_init(void *pages);
bool kvm_host_ffa_handler(struct kvm_cpu_context *host_ctxt, u32 func_id);
diff --git a/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h b/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h
deleted file mode 100644
index 51f043649146..000000000000
--- a/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h
+++ /dev/null
@@ -1,223 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2021 Google LLC
- * Author: Fuad Tabba <tabba@google.com>
- */
-
-#ifndef __ARM64_KVM_FIXED_CONFIG_H__
-#define __ARM64_KVM_FIXED_CONFIG_H__
-
-#include <asm/sysreg.h>
-
-/*
- * This file contains definitions for features to be allowed or restricted for
- * guest virtual machines, depending on the mode KVM is running in and on the
- * type of guest that is running.
- *
- * The ALLOW masks represent a bitmask of feature fields that are allowed
- * without any restrictions as long as they are supported by the system.
- *
- * The RESTRICT_UNSIGNED masks, if present, represent unsigned fields for
- * features that are restricted to support at most the specified feature.
- *
- * If a feature field is not present in either, than it is not supported.
- *
- * The approach taken for protected VMs is to allow features that are:
- * - Needed by common Linux distributions (e.g., floating point)
- * - Trivial to support, e.g., supporting the feature does not introduce or
- * require tracking of additional state in KVM
- * - Cannot be trapped or prevent the guest from using anyway
- */
-
-/*
- * Allow for protected VMs:
- * - Floating-point and Advanced SIMD
- * - Data Independent Timing
- * - Spectre/Meltdown Mitigation
- */
-#define PVM_ID_AA64PFR0_ALLOW (\
- ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_FP) | \
- ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_AdvSIMD) | \
- ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_DIT) | \
- ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV2) | \
- ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV3) \
- )
-
-/*
- * Restrict to the following *unsigned* features for protected VMs:
- * - AArch64 guests only (no support for AArch32 guests):
- * AArch32 adds complexity in trap handling, emulation, condition codes,
- * etc...
- * - RAS (v1)
- * Supported by KVM
- */
-#define PVM_ID_AA64PFR0_RESTRICT_UNSIGNED (\
- FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL0), ID_AA64PFR0_EL1_ELx_64BIT_ONLY) | \
- FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL1), ID_AA64PFR0_EL1_ELx_64BIT_ONLY) | \
- FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL2), ID_AA64PFR0_EL1_ELx_64BIT_ONLY) | \
- FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL3), ID_AA64PFR0_EL1_ELx_64BIT_ONLY) | \
- FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_RAS), ID_AA64PFR0_EL1_RAS_IMP) \
- )
-
-/*
- * Allow for protected VMs:
- * - Branch Target Identification
- * - Speculative Store Bypassing
- */
-#define PVM_ID_AA64PFR1_ALLOW (\
- ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_BT) | \
- ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_SSBS) \
- )
-
-#define PVM_ID_AA64PFR2_ALLOW 0ULL
-
-/*
- * Allow for protected VMs:
- * - Mixed-endian
- * - Distinction between Secure and Non-secure Memory
- * - Mixed-endian at EL0 only
- * - Non-context synchronizing exception entry and exit
- */
-#define PVM_ID_AA64MMFR0_ALLOW (\
- ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_BIGEND) | \
- ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_SNSMEM) | \
- ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_BIGENDEL0) | \
- ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_EXS) \
- )
-
-/*
- * Restrict to the following *unsigned* features for protected VMs:
- * - 40-bit IPA
- * - 16-bit ASID
- */
-#define PVM_ID_AA64MMFR0_RESTRICT_UNSIGNED (\
- FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_PARANGE), ID_AA64MMFR0_EL1_PARANGE_40) | \
- FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_ASIDBITS), ID_AA64MMFR0_EL1_ASIDBITS_16) \
- )
-
-/*
- * Allow for protected VMs:
- * - Hardware translation table updates to Access flag and Dirty state
- * - Number of VMID bits from CPU
- * - Hierarchical Permission Disables
- * - Privileged Access Never
- * - SError interrupt exceptions from speculative reads
- * - Enhanced Translation Synchronization
- * - Control for cache maintenance permission
- */
-#define PVM_ID_AA64MMFR1_ALLOW (\
- ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_HAFDBS) | \
- ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_VMIDBits) | \
- ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_HPDS) | \
- ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_PAN) | \
- ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_SpecSEI) | \
- ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_ETS) | \
- ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_CMOW) \
- )
-
-/*
- * Allow for protected VMs:
- * - Common not Private translations
- * - User Access Override
- * - IESB bit in the SCTLR_ELx registers
- * - Unaligned single-copy atomicity and atomic functions
- * - ESR_ELx.EC value on an exception by read access to feature ID space
- * - TTL field in address operations.
- * - Break-before-make sequences when changing translation block size
- * - E0PDx mechanism
- */
-#define PVM_ID_AA64MMFR2_ALLOW (\
- ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_CnP) | \
- ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_UAO) | \
- ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_IESB) | \
- ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_AT) | \
- ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_IDS) | \
- ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_TTL) | \
- ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_BBM) | \
- ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_E0PD) \
- )
-
-#define PVM_ID_AA64MMFR3_ALLOW (0ULL)
-
-/*
- * No support for Scalable Vectors for protected VMs:
- * Requires additional support from KVM, e.g., context-switching and
- * trapping at EL2
- */
-#define PVM_ID_AA64ZFR0_ALLOW (0ULL)
-
-/*
- * No support for debug, including breakpoints, and watchpoints for protected
- * VMs:
- * The Arm architecture mandates support for at least the Armv8 debug
- * architecture, which would include at least 2 hardware breakpoints and
- * watchpoints. Providing that support to protected guests adds
- * considerable state and complexity. Therefore, the reserved value of 0 is
- * used for debug-related fields.
- */
-#define PVM_ID_AA64DFR0_ALLOW (0ULL)
-#define PVM_ID_AA64DFR1_ALLOW (0ULL)
-
-/*
- * No support for implementation defined features.
- */
-#define PVM_ID_AA64AFR0_ALLOW (0ULL)
-#define PVM_ID_AA64AFR1_ALLOW (0ULL)
-
-/*
- * No restrictions on instructions implemented in AArch64.
- */
-#define PVM_ID_AA64ISAR0_ALLOW (\
- ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_AES) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_SHA1) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_SHA2) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_CRC32) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_ATOMIC) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_RDM) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_SHA3) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_SM3) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_SM4) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_DP) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_FHM) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_TS) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_TLB) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_RNDR) \
- )
-
-/* Restrict pointer authentication to the basic version. */
-#define PVM_ID_AA64ISAR1_RESTRICT_UNSIGNED (\
- FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_APA), ID_AA64ISAR1_EL1_APA_PAuth) | \
- FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_API), ID_AA64ISAR1_EL1_API_PAuth) \
- )
-
-#define PVM_ID_AA64ISAR2_RESTRICT_UNSIGNED (\
- FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_APA3), ID_AA64ISAR2_EL1_APA3_PAuth) \
- )
-
-#define PVM_ID_AA64ISAR1_ALLOW (\
- ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_DPB) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_JSCVT) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_FCMA) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_LRCPC) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_GPA) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_GPI) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_FRINTTS) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_SB) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_SPECRES) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_BF16) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_DGH) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_I8MM) \
- )
-
-#define PVM_ID_AA64ISAR2_ALLOW (\
- ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_ATS1A)| \
- ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_GPA3) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_MOPS) \
- )
-
-u64 pvm_read_id_reg(const struct kvm_vcpu *vcpu, u32 id);
-bool kvm_handle_pvm_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code);
-bool kvm_handle_pvm_restricted(struct kvm_vcpu *vcpu, u64 *exit_code);
-int kvm_check_pvm_sysreg_table(void);
-
-#endif /* __ARM64_KVM_FIXED_CONFIG_H__ */
diff --git a/arch/arm64/kvm/hyp/include/nvhe/gfp.h b/arch/arm64/kvm/hyp/include/nvhe/gfp.h
index 97c527ef53c2..3766333bace9 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/gfp.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/gfp.h
@@ -7,7 +7,7 @@
#include <nvhe/memory.h>
#include <nvhe/spinlock.h>
-#define HYP_NO_ORDER USHRT_MAX
+#define HYP_NO_ORDER ((u8)(~0))
struct hyp_pool {
/*
@@ -19,11 +19,11 @@ struct hyp_pool {
struct list_head free_area[NR_PAGE_ORDERS];
phys_addr_t range_start;
phys_addr_t range_end;
- unsigned short max_order;
+ u8 max_order;
};
/* Allocation */
-void *hyp_alloc_pages(struct hyp_pool *pool, unsigned short order);
+void *hyp_alloc_pages(struct hyp_pool *pool, u8 order);
void hyp_split_page(struct hyp_page *page);
void hyp_get_page(struct hyp_pool *pool, void *addr);
void hyp_put_page(struct hyp_pool *pool, void *addr);
diff --git a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
index 0972faccc2af..978f38c386ee 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
@@ -11,40 +11,10 @@
#include <asm/kvm_mmu.h>
#include <asm/kvm_pgtable.h>
#include <asm/virt.h>
+#include <nvhe/memory.h>
#include <nvhe/pkvm.h>
#include <nvhe/spinlock.h>
-/*
- * SW bits 0-1 are reserved to track the memory ownership state of each page:
- * 00: The page is owned exclusively by the page-table owner.
- * 01: The page is owned by the page-table owner, but is shared
- * with another entity.
- * 10: The page is shared with, but not owned by the page-table owner.
- * 11: Reserved for future use (lending).
- */
-enum pkvm_page_state {
- PKVM_PAGE_OWNED = 0ULL,
- PKVM_PAGE_SHARED_OWNED = KVM_PGTABLE_PROT_SW0,
- PKVM_PAGE_SHARED_BORROWED = KVM_PGTABLE_PROT_SW1,
- __PKVM_PAGE_RESERVED = KVM_PGTABLE_PROT_SW0 |
- KVM_PGTABLE_PROT_SW1,
-
- /* Meta-states which aren't encoded directly in the PTE's SW bits */
- PKVM_NOPAGE,
-};
-
-#define PKVM_PAGE_STATE_PROT_MASK (KVM_PGTABLE_PROT_SW0 | KVM_PGTABLE_PROT_SW1)
-static inline enum kvm_pgtable_prot pkvm_mkstate(enum kvm_pgtable_prot prot,
- enum pkvm_page_state state)
-{
- return (prot & ~PKVM_PAGE_STATE_PROT_MASK) | state;
-}
-
-static inline enum pkvm_page_state pkvm_getstate(enum kvm_pgtable_prot prot)
-{
- return prot & PKVM_PAGE_STATE_PROT_MASK;
-}
-
struct host_mmu {
struct kvm_arch arch;
struct kvm_pgtable pgt;
@@ -69,6 +39,13 @@ int __pkvm_host_donate_hyp(u64 pfn, u64 nr_pages);
int __pkvm_hyp_donate_host(u64 pfn, u64 nr_pages);
int __pkvm_host_share_ffa(u64 pfn, u64 nr_pages);
int __pkvm_host_unshare_ffa(u64 pfn, u64 nr_pages);
+int __pkvm_host_share_guest(u64 pfn, u64 gfn, struct pkvm_hyp_vcpu *vcpu,
+ enum kvm_pgtable_prot prot);
+int __pkvm_host_unshare_guest(u64 gfn, struct pkvm_hyp_vm *hyp_vm);
+int __pkvm_host_relax_perms_guest(u64 gfn, struct pkvm_hyp_vcpu *vcpu, enum kvm_pgtable_prot prot);
+int __pkvm_host_wrprotect_guest(u64 gfn, struct pkvm_hyp_vm *hyp_vm);
+int __pkvm_host_test_clear_young_guest(u64 gfn, bool mkold, struct pkvm_hyp_vm *vm);
+int __pkvm_host_mkyoung_guest(u64 gfn, struct pkvm_hyp_vcpu *vcpu);
bool addr_is_memory(phys_addr_t phys);
int host_stage2_idmap_locked(phys_addr_t addr, u64 size, enum kvm_pgtable_prot prot);
diff --git a/arch/arm64/kvm/hyp/include/nvhe/memory.h b/arch/arm64/kvm/hyp/include/nvhe/memory.h
index ab205c4d6774..34233d586060 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/memory.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/memory.h
@@ -7,9 +7,47 @@
#include <linux/types.h>
+/*
+ * Bits 0-1 are reserved to track the memory ownership state of each page:
+ * 00: The page is owned exclusively by the page-table owner.
+ * 01: The page is owned by the page-table owner, but is shared
+ * with another entity.
+ * 10: The page is shared with, but not owned by the page-table owner.
+ * 11: Reserved for future use (lending).
+ */
+enum pkvm_page_state {
+ PKVM_PAGE_OWNED = 0ULL,
+ PKVM_PAGE_SHARED_OWNED = BIT(0),
+ PKVM_PAGE_SHARED_BORROWED = BIT(1),
+ __PKVM_PAGE_RESERVED = BIT(0) | BIT(1),
+
+ /* Meta-states which aren't encoded directly in the PTE's SW bits */
+ PKVM_NOPAGE = BIT(2),
+};
+#define PKVM_PAGE_META_STATES_MASK (~__PKVM_PAGE_RESERVED)
+
+#define PKVM_PAGE_STATE_PROT_MASK (KVM_PGTABLE_PROT_SW0 | KVM_PGTABLE_PROT_SW1)
+static inline enum kvm_pgtable_prot pkvm_mkstate(enum kvm_pgtable_prot prot,
+ enum pkvm_page_state state)
+{
+ prot &= ~PKVM_PAGE_STATE_PROT_MASK;
+ prot |= FIELD_PREP(PKVM_PAGE_STATE_PROT_MASK, state);
+ return prot;
+}
+
+static inline enum pkvm_page_state pkvm_getstate(enum kvm_pgtable_prot prot)
+{
+ return FIELD_GET(PKVM_PAGE_STATE_PROT_MASK, prot);
+}
+
struct hyp_page {
- unsigned short refcount;
- unsigned short order;
+ u16 refcount;
+ u8 order;
+
+ /* Host (non-meta) state. Guarded by the host stage-2 lock. */
+ enum pkvm_page_state host_state : 8;
+
+ u32 host_share_guest_count;
};
extern u64 __hyp_vmemmap;
@@ -29,7 +67,13 @@ static inline phys_addr_t hyp_virt_to_phys(void *addr)
#define hyp_phys_to_pfn(phys) ((phys) >> PAGE_SHIFT)
#define hyp_pfn_to_phys(pfn) ((phys_addr_t)((pfn) << PAGE_SHIFT))
-#define hyp_phys_to_page(phys) (&hyp_vmemmap[hyp_phys_to_pfn(phys)])
+
+static inline struct hyp_page *hyp_phys_to_page(phys_addr_t phys)
+{
+ BUILD_BUG_ON(sizeof(struct hyp_page) != sizeof(u64));
+ return &hyp_vmemmap[hyp_phys_to_pfn(phys)];
+}
+
#define hyp_virt_to_page(virt) hyp_phys_to_page(__hyp_pa(virt))
#define hyp_virt_to_pfn(virt) hyp_phys_to_pfn(__hyp_pa(virt))
diff --git a/arch/arm64/kvm/hyp/include/nvhe/pkvm.h b/arch/arm64/kvm/hyp/include/nvhe/pkvm.h
index 82b3d62538a6..e42bf68c8848 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/pkvm.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/pkvm.h
@@ -20,6 +20,12 @@ struct pkvm_hyp_vcpu {
/* Backpointer to the host's (untrusted) vCPU instance. */
struct kvm_vcpu *host_vcpu;
+
+ /*
+ * If this hyp vCPU is loaded, then this is a backpointer to the
+ * per-cpu pointer tracking us. Otherwise, NULL if not loaded.
+ */
+ struct pkvm_hyp_vcpu **loaded_hyp_vcpu;
};
/*
@@ -47,12 +53,24 @@ struct pkvm_hyp_vm {
struct pkvm_hyp_vcpu *vcpus[];
};
+extern hyp_spinlock_t vm_table_lock;
+
static inline struct pkvm_hyp_vm *
pkvm_hyp_vcpu_to_hyp_vm(struct pkvm_hyp_vcpu *hyp_vcpu)
{
return container_of(hyp_vcpu->vcpu.kvm, struct pkvm_hyp_vm, kvm);
}
+static inline bool pkvm_hyp_vcpu_is_protected(struct pkvm_hyp_vcpu *hyp_vcpu)
+{
+ return vcpu_is_protected(&hyp_vcpu->vcpu);
+}
+
+static inline bool pkvm_hyp_vm_is_protected(struct pkvm_hyp_vm *hyp_vm)
+{
+ return kvm_vm_is_protected(&hyp_vm->kvm);
+}
+
void pkvm_hyp_vm_table_init(void *tbl);
int __pkvm_init_vm(struct kvm *host_kvm, unsigned long vm_hva,
@@ -64,5 +82,15 @@ int __pkvm_teardown_vm(pkvm_handle_t handle);
struct pkvm_hyp_vcpu *pkvm_load_hyp_vcpu(pkvm_handle_t handle,
unsigned int vcpu_idx);
void pkvm_put_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu);
+struct pkvm_hyp_vcpu *pkvm_get_loaded_hyp_vcpu(void);
+
+struct pkvm_hyp_vm *get_pkvm_hyp_vm(pkvm_handle_t handle);
+struct pkvm_hyp_vm *get_np_pkvm_hyp_vm(pkvm_handle_t handle);
+void put_pkvm_hyp_vm(struct pkvm_hyp_vm *hyp_vm);
+
+bool kvm_handle_pvm_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code);
+bool kvm_handle_pvm_restricted(struct kvm_vcpu *vcpu, u64 *exit_code);
+void kvm_init_pvm_id_regs(struct kvm_vcpu *vcpu);
+int kvm_check_pvm_sysreg_table(void);
#endif /* __ARM64_KVM_NVHE_PKVM_H__ */
diff --git a/arch/arm64/kvm/hyp/include/nvhe/trap_handler.h b/arch/arm64/kvm/hyp/include/nvhe/trap_handler.h
index 45a84f0ade04..1e6d995968a1 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/trap_handler.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/trap_handler.h
@@ -15,6 +15,4 @@
#define DECLARE_REG(type, name, ctxt, reg) \
type name = (type)cpu_reg(ctxt, (reg))
-void __pkvm_vcpu_init_traps(struct kvm_vcpu *vcpu);
-
#endif /* __ARM64_KVM_NVHE_TRAP_HANDLER_H__ */
diff --git a/arch/arm64/kvm/hyp/nvhe/Makefile b/arch/arm64/kvm/hyp/nvhe/Makefile
index 2250253a6429..b43426a493df 100644
--- a/arch/arm64/kvm/hyp/nvhe/Makefile
+++ b/arch/arm64/kvm/hyp/nvhe/Makefile
@@ -20,6 +20,8 @@ HOST_EXTRACFLAGS += -I$(objtree)/include
lib-objs := clear_page.o copy_page.o memcpy.o memset.o
lib-objs := $(addprefix ../../../lib/, $(lib-objs))
+CFLAGS_switch.nvhe.o += -Wno-override-init
+
hyp-obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o host.o \
hyp-main.o hyp-smp.o psci-relay.o early_alloc.o page_alloc.o \
cache.o setup.o mm.o mem_protect.o sys_regs.o pkvm.o stacktrace.o ffa.o
@@ -89,24 +91,11 @@ quiet_cmd_hyprel = HYPREL $@
quiet_cmd_hypcopy = HYPCOPY $@
cmd_hypcopy = $(OBJCOPY) --prefix-symbols=__kvm_nvhe_ $< $@
-# Remove ftrace, Shadow Call Stack, and CFI CFLAGS.
-# This is equivalent to the 'notrace', '__noscs', and '__nocfi' annotations.
-KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_FTRACE) $(CC_FLAGS_SCS) $(CC_FLAGS_CFI), $(KBUILD_CFLAGS))
+# Remove ftrace and Shadow Call Stack CFLAGS.
+# This is equivalent to the 'notrace' and '__noscs' annotations.
+KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_FTRACE) $(CC_FLAGS_SCS), $(KBUILD_CFLAGS))
# Starting from 13.0.0 llvm emits SHT_REL section '.llvm.call-graph-profile'
# when profile optimization is applied. gen-hyprel does not support SHT_REL and
# causes a build failure. Remove profile optimization flags.
KBUILD_CFLAGS := $(filter-out -fprofile-sample-use=% -fprofile-use=%, $(KBUILD_CFLAGS))
KBUILD_CFLAGS += -fno-asynchronous-unwind-tables -fno-unwind-tables
-
-# KVM nVHE code is run at a different exception code with a different map, so
-# compiler instrumentation that inserts callbacks or checks into the code may
-# cause crashes. Just disable it.
-GCOV_PROFILE := n
-KASAN_SANITIZE := n
-KCSAN_SANITIZE := n
-UBSAN_SANITIZE := n
-KCOV_INSTRUMENT := n
-
-# Skip objtool checking for this directory because nVHE code is compiled with
-# non-standard build rules.
-OBJECT_FILES_NON_STANDARD := y
diff --git a/arch/arm64/kvm/hyp/nvhe/debug-sr.c b/arch/arm64/kvm/hyp/nvhe/debug-sr.c
index 4558c02eb352..2f4a4f5036bb 100644
--- a/arch/arm64/kvm/hyp/nvhe/debug-sr.c
+++ b/arch/arm64/kvm/hyp/nvhe/debug-sr.c
@@ -31,8 +31,8 @@ static void __debug_save_spe(u64 *pmscr_el1)
return;
/* Yes; save the control register and disable data generation */
- *pmscr_el1 = read_sysreg_s(SYS_PMSCR_EL1);
- write_sysreg_s(0, SYS_PMSCR_EL1);
+ *pmscr_el1 = read_sysreg_el1(SYS_PMSCR);
+ write_sysreg_el1(0, SYS_PMSCR);
isb();
/* Now drain all buffered data to memory */
@@ -48,45 +48,58 @@ static void __debug_restore_spe(u64 pmscr_el1)
isb();
/* Re-enable data generation */
- write_sysreg_s(pmscr_el1, SYS_PMSCR_EL1);
+ write_sysreg_el1(pmscr_el1, SYS_PMSCR);
}
-static void __debug_save_trace(u64 *trfcr_el1)
+static void __trace_do_switch(u64 *saved_trfcr, u64 new_trfcr)
{
- *trfcr_el1 = 0;
+ *saved_trfcr = read_sysreg_el1(SYS_TRFCR);
+ write_sysreg_el1(new_trfcr, SYS_TRFCR);
+}
- /* Check if the TRBE is enabled */
- if (!(read_sysreg_s(SYS_TRBLIMITR_EL1) & TRBLIMITR_EL1_E))
- return;
- /*
- * Prohibit trace generation while we are in guest.
- * Since access to TRFCR_EL1 is trapped, the guest can't
- * modify the filtering set by the host.
- */
- *trfcr_el1 = read_sysreg_s(SYS_TRFCR_EL1);
- write_sysreg_s(0, SYS_TRFCR_EL1);
- isb();
- /* Drain the trace buffer to memory */
- tsb_csync();
+static bool __trace_needs_drain(void)
+{
+ if (is_protected_kvm_enabled() && host_data_test_flag(HAS_TRBE))
+ return read_sysreg_s(SYS_TRBLIMITR_EL1) & TRBLIMITR_EL1_E;
+
+ return host_data_test_flag(TRBE_ENABLED);
}
-static void __debug_restore_trace(u64 trfcr_el1)
+static bool __trace_needs_switch(void)
{
- if (!trfcr_el1)
- return;
+ return host_data_test_flag(TRBE_ENABLED) ||
+ host_data_test_flag(EL1_TRACING_CONFIGURED);
+}
- /* Restore trace filter controls */
- write_sysreg_s(trfcr_el1, SYS_TRFCR_EL1);
+static void __trace_switch_to_guest(void)
+{
+ /* Unsupported with TRBE so disable */
+ if (host_data_test_flag(TRBE_ENABLED))
+ *host_data_ptr(trfcr_while_in_guest) = 0;
+
+ __trace_do_switch(host_data_ptr(host_debug_state.trfcr_el1),
+ *host_data_ptr(trfcr_while_in_guest));
+
+ if (__trace_needs_drain()) {
+ isb();
+ tsb_csync();
+ }
+}
+
+static void __trace_switch_to_host(void)
+{
+ __trace_do_switch(host_data_ptr(trfcr_while_in_guest),
+ *host_data_ptr(host_debug_state.trfcr_el1));
}
void __debug_save_host_buffers_nvhe(struct kvm_vcpu *vcpu)
{
/* Disable and flush SPE data generation */
- if (vcpu_get_flag(vcpu, DEBUG_STATE_SAVE_SPE))
- __debug_save_spe(&vcpu->arch.host_debug_state.pmscr_el1);
- /* Disable and flush Self-Hosted Trace generation */
- if (vcpu_get_flag(vcpu, DEBUG_STATE_SAVE_TRBE))
- __debug_save_trace(&vcpu->arch.host_debug_state.trfcr_el1);
+ if (host_data_test_flag(HAS_SPE))
+ __debug_save_spe(host_data_ptr(host_debug_state.pmscr_el1));
+
+ if (__trace_needs_switch())
+ __trace_switch_to_guest();
}
void __debug_switch_to_guest(struct kvm_vcpu *vcpu)
@@ -96,18 +109,13 @@ void __debug_switch_to_guest(struct kvm_vcpu *vcpu)
void __debug_restore_host_buffers_nvhe(struct kvm_vcpu *vcpu)
{
- if (vcpu_get_flag(vcpu, DEBUG_STATE_SAVE_SPE))
- __debug_restore_spe(vcpu->arch.host_debug_state.pmscr_el1);
- if (vcpu_get_flag(vcpu, DEBUG_STATE_SAVE_TRBE))
- __debug_restore_trace(vcpu->arch.host_debug_state.trfcr_el1);
+ if (host_data_test_flag(HAS_SPE))
+ __debug_restore_spe(*host_data_ptr(host_debug_state.pmscr_el1));
+ if (__trace_needs_switch())
+ __trace_switch_to_host();
}
void __debug_switch_to_host(struct kvm_vcpu *vcpu)
{
__debug_switch_to_host_common(vcpu);
}
-
-u64 __kvm_get_mdcr_el2(void)
-{
- return read_sysreg(mdcr_el2);
-}
diff --git a/arch/arm64/kvm/hyp/nvhe/ffa.c b/arch/arm64/kvm/hyp/nvhe/ffa.c
index 320f2eaa14a9..e433dfab882a 100644
--- a/arch/arm64/kvm/hyp/nvhe/ffa.c
+++ b/arch/arm64/kvm/hyp/nvhe/ffa.c
@@ -67,6 +67,9 @@ struct kvm_ffa_buffers {
*/
static struct kvm_ffa_buffers hyp_buffers;
static struct kvm_ffa_buffers host_buffers;
+static u32 hyp_ffa_version;
+static bool has_version_negotiated;
+static hyp_spinlock_t version_lock;
static void ffa_to_smccc_error(struct arm_smccc_res *res, u64 ffa_errno)
{
@@ -177,6 +180,14 @@ static void ffa_retrieve_req(struct arm_smccc_res *res, u32 len)
res);
}
+static void ffa_rx_release(struct arm_smccc_res *res)
+{
+ arm_smccc_1_1_smc(FFA_RX_RELEASE,
+ 0, 0,
+ 0, 0, 0, 0, 0,
+ res);
+}
+
static void do_ffa_rxtx_map(struct arm_smccc_res *res,
struct kvm_cpu_context *ctxt)
{
@@ -415,9 +426,9 @@ out:
return;
}
-static __always_inline void do_ffa_mem_xfer(const u64 func_id,
- struct arm_smccc_res *res,
- struct kvm_cpu_context *ctxt)
+static void __do_ffa_mem_xfer(const u64 func_id,
+ struct arm_smccc_res *res,
+ struct kvm_cpu_context *ctxt)
{
DECLARE_REG(u32, len, ctxt, 1);
DECLARE_REG(u32, fraglen, ctxt, 2);
@@ -429,9 +440,6 @@ static __always_inline void do_ffa_mem_xfer(const u64 func_id,
u32 offset, nr_ranges;
int ret = 0;
- BUILD_BUG_ON(func_id != FFA_FN64_MEM_SHARE &&
- func_id != FFA_FN64_MEM_LEND);
-
if (addr_mbz || npages_mbz || fraglen > len ||
fraglen > KVM_FFA_MBOX_NR_PAGES * PAGE_SIZE) {
ret = FFA_RET_INVALID_PARAMETERS;
@@ -450,11 +458,16 @@ static __always_inline void do_ffa_mem_xfer(const u64 func_id,
goto out_unlock;
}
+ if (len > ffa_desc_buf.len) {
+ ret = FFA_RET_NO_MEMORY;
+ goto out_unlock;
+ }
+
buf = hyp_buffers.tx;
memcpy(buf, host_buffers.tx, fraglen);
ep_mem_access = (void *)buf +
- ffa_mem_desc_offset(buf, 0, FFA_VERSION_1_0);
+ ffa_mem_desc_offset(buf, 0, hyp_ffa_version);
offset = ep_mem_access->composite_off;
if (!offset || buf->ep_count != 1 || buf->sender_id != HOST_FFA_ID) {
ret = FFA_RET_INVALID_PARAMETERS;
@@ -501,6 +514,13 @@ err_unshare:
goto out_unlock;
}
+#define do_ffa_mem_xfer(fid, res, ctxt) \
+ do { \
+ BUILD_BUG_ON((fid) != FFA_FN64_MEM_SHARE && \
+ (fid) != FFA_FN64_MEM_LEND); \
+ __do_ffa_mem_xfer((fid), (res), (ctxt)); \
+ } while (0);
+
static void do_ffa_mem_reclaim(struct arm_smccc_res *res,
struct kvm_cpu_context *ctxt)
{
@@ -533,7 +553,7 @@ static void do_ffa_mem_reclaim(struct arm_smccc_res *res,
fraglen = res->a2;
ep_mem_access = (void *)buf +
- ffa_mem_desc_offset(buf, 0, FFA_VERSION_1_0);
+ ffa_mem_desc_offset(buf, 0, hyp_ffa_version);
offset = ep_mem_access->composite_off;
/*
* We can trust the SPMD to get this right, but let's at least
@@ -543,16 +563,19 @@ static void do_ffa_mem_reclaim(struct arm_smccc_res *res,
if (WARN_ON(offset > len ||
fraglen > KVM_FFA_MBOX_NR_PAGES * PAGE_SIZE)) {
ret = FFA_RET_ABORTED;
+ ffa_rx_release(res);
goto out_unlock;
}
if (len > ffa_desc_buf.len) {
ret = FFA_RET_NO_MEMORY;
+ ffa_rx_release(res);
goto out_unlock;
}
buf = ffa_desc_buf.buf;
memcpy(buf, hyp_buffers.rx, fraglen);
+ ffa_rx_release(res);
for (fragoff = fraglen; fragoff < len; fragoff += fraglen) {
ffa_mem_frag_rx(res, handle_lo, handle_hi, fragoff);
@@ -563,6 +586,7 @@ static void do_ffa_mem_reclaim(struct arm_smccc_res *res,
fraglen = res->a3;
memcpy((void *)buf + fragoff, hyp_buffers.rx, fraglen);
+ ffa_rx_release(res);
}
ffa_mem_reclaim(res, handle_lo, handle_hi, flags);
@@ -600,7 +624,6 @@ static bool ffa_call_supported(u64 func_id)
case FFA_MSG_POLL:
case FFA_MSG_WAIT:
/* 32-bit variants of 64-bit calls */
- case FFA_MSG_SEND_DIRECT_REQ:
case FFA_MSG_SEND_DIRECT_RESP:
case FFA_RXTX_MAP:
case FFA_MEM_DONATE:
@@ -640,6 +663,132 @@ out_handled:
return true;
}
+static int hyp_ffa_post_init(void)
+{
+ size_t min_rxtx_sz;
+ struct arm_smccc_res res;
+
+ arm_smccc_1_1_smc(FFA_ID_GET, 0, 0, 0, 0, 0, 0, 0, &res);
+ if (res.a0 != FFA_SUCCESS)
+ return -EOPNOTSUPP;
+
+ if (res.a2 != HOST_FFA_ID)
+ return -EINVAL;
+
+ arm_smccc_1_1_smc(FFA_FEATURES, FFA_FN64_RXTX_MAP,
+ 0, 0, 0, 0, 0, 0, &res);
+ if (res.a0 != FFA_SUCCESS)
+ return -EOPNOTSUPP;
+
+ switch (res.a2) {
+ case FFA_FEAT_RXTX_MIN_SZ_4K:
+ min_rxtx_sz = SZ_4K;
+ break;
+ case FFA_FEAT_RXTX_MIN_SZ_16K:
+ min_rxtx_sz = SZ_16K;
+ break;
+ case FFA_FEAT_RXTX_MIN_SZ_64K:
+ min_rxtx_sz = SZ_64K;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ if (min_rxtx_sz > PAGE_SIZE)
+ return -EOPNOTSUPP;
+
+ return 0;
+}
+
+static void do_ffa_version(struct arm_smccc_res *res,
+ struct kvm_cpu_context *ctxt)
+{
+ DECLARE_REG(u32, ffa_req_version, ctxt, 1);
+
+ if (FFA_MAJOR_VERSION(ffa_req_version) != 1) {
+ res->a0 = FFA_RET_NOT_SUPPORTED;
+ return;
+ }
+
+ hyp_spin_lock(&version_lock);
+ if (has_version_negotiated) {
+ res->a0 = hyp_ffa_version;
+ goto unlock;
+ }
+
+ /*
+ * If the client driver tries to downgrade the version, we need to ask
+ * first if TEE supports it.
+ */
+ if (FFA_MINOR_VERSION(ffa_req_version) < FFA_MINOR_VERSION(hyp_ffa_version)) {
+ arm_smccc_1_1_smc(FFA_VERSION, ffa_req_version, 0,
+ 0, 0, 0, 0, 0,
+ res);
+ if (res->a0 == FFA_RET_NOT_SUPPORTED)
+ goto unlock;
+
+ hyp_ffa_version = ffa_req_version;
+ }
+
+ if (hyp_ffa_post_init())
+ res->a0 = FFA_RET_NOT_SUPPORTED;
+ else {
+ has_version_negotiated = true;
+ res->a0 = hyp_ffa_version;
+ }
+unlock:
+ hyp_spin_unlock(&version_lock);
+}
+
+static void do_ffa_part_get(struct arm_smccc_res *res,
+ struct kvm_cpu_context *ctxt)
+{
+ DECLARE_REG(u32, uuid0, ctxt, 1);
+ DECLARE_REG(u32, uuid1, ctxt, 2);
+ DECLARE_REG(u32, uuid2, ctxt, 3);
+ DECLARE_REG(u32, uuid3, ctxt, 4);
+ DECLARE_REG(u32, flags, ctxt, 5);
+ u32 count, partition_sz, copy_sz;
+
+ hyp_spin_lock(&host_buffers.lock);
+ if (!host_buffers.rx) {
+ ffa_to_smccc_res(res, FFA_RET_BUSY);
+ goto out_unlock;
+ }
+
+ arm_smccc_1_1_smc(FFA_PARTITION_INFO_GET, uuid0, uuid1,
+ uuid2, uuid3, flags, 0, 0,
+ res);
+
+ if (res->a0 != FFA_SUCCESS)
+ goto out_unlock;
+
+ count = res->a2;
+ if (!count)
+ goto out_unlock;
+
+ if (hyp_ffa_version > FFA_VERSION_1_0) {
+ /* Get the number of partitions deployed in the system */
+ if (flags & 0x1)
+ goto out_unlock;
+
+ partition_sz = res->a3;
+ } else {
+ /* FFA_VERSION_1_0 lacks the size in the response */
+ partition_sz = FFA_1_0_PARTITON_INFO_SZ;
+ }
+
+ copy_sz = partition_sz * count;
+ if (copy_sz > KVM_FFA_MBOX_NR_PAGES * PAGE_SIZE) {
+ ffa_to_smccc_res(res, FFA_RET_ABORTED);
+ goto out_unlock;
+ }
+
+ memcpy(host_buffers.rx, hyp_buffers.rx, copy_sz);
+out_unlock:
+ hyp_spin_unlock(&host_buffers.lock);
+}
+
bool kvm_host_ffa_handler(struct kvm_cpu_context *host_ctxt, u32 func_id)
{
struct arm_smccc_res res;
@@ -660,6 +809,11 @@ bool kvm_host_ffa_handler(struct kvm_cpu_context *host_ctxt, u32 func_id)
if (!is_ffa_call(func_id))
return false;
+ if (!has_version_negotiated && func_id != FFA_VERSION) {
+ ffa_to_smccc_error(&res, FFA_RET_INVALID_PARAMETERS);
+ goto out_handled;
+ }
+
switch (func_id) {
case FFA_FEATURES:
if (!do_ffa_features(&res, host_ctxt))
@@ -686,6 +840,12 @@ bool kvm_host_ffa_handler(struct kvm_cpu_context *host_ctxt, u32 func_id)
case FFA_MEM_FRAG_TX:
do_ffa_mem_frag_tx(&res, host_ctxt);
goto out_handled;
+ case FFA_VERSION:
+ do_ffa_version(&res, host_ctxt);
+ goto out_handled;
+ case FFA_PARTITION_INFO_GET:
+ do_ffa_part_get(&res, host_ctxt);
+ goto out_handled;
}
if (ffa_call_supported(func_id))
@@ -700,13 +860,12 @@ out_handled:
int hyp_ffa_init(void *pages)
{
struct arm_smccc_res res;
- size_t min_rxtx_sz;
void *tx, *rx;
if (kvm_host_psci_config.smccc_version < ARM_SMCCC_VERSION_1_2)
return 0;
- arm_smccc_1_1_smc(FFA_VERSION, FFA_VERSION_1_0, 0, 0, 0, 0, 0, 0, &res);
+ arm_smccc_1_1_smc(FFA_VERSION, FFA_VERSION_1_1, 0, 0, 0, 0, 0, 0, &res);
if (res.a0 == FFA_RET_NOT_SUPPORTED)
return 0;
@@ -726,34 +885,10 @@ int hyp_ffa_init(void *pages)
if (FFA_MAJOR_VERSION(res.a0) != 1)
return -EOPNOTSUPP;
- arm_smccc_1_1_smc(FFA_ID_GET, 0, 0, 0, 0, 0, 0, 0, &res);
- if (res.a0 != FFA_SUCCESS)
- return -EOPNOTSUPP;
-
- if (res.a2 != HOST_FFA_ID)
- return -EINVAL;
-
- arm_smccc_1_1_smc(FFA_FEATURES, FFA_FN64_RXTX_MAP,
- 0, 0, 0, 0, 0, 0, &res);
- if (res.a0 != FFA_SUCCESS)
- return -EOPNOTSUPP;
-
- switch (res.a2) {
- case FFA_FEAT_RXTX_MIN_SZ_4K:
- min_rxtx_sz = SZ_4K;
- break;
- case FFA_FEAT_RXTX_MIN_SZ_16K:
- min_rxtx_sz = SZ_16K;
- break;
- case FFA_FEAT_RXTX_MIN_SZ_64K:
- min_rxtx_sz = SZ_64K;
- break;
- default:
- return -EINVAL;
- }
-
- if (min_rxtx_sz > PAGE_SIZE)
- return -EOPNOTSUPP;
+ if (FFA_MINOR_VERSION(res.a0) < FFA_MINOR_VERSION(FFA_VERSION_1_1))
+ hyp_ffa_version = res.a0;
+ else
+ hyp_ffa_version = FFA_VERSION_1_1;
tx = pages;
pages += KVM_FFA_MBOX_NR_PAGES * PAGE_SIZE;
@@ -776,5 +911,6 @@ int hyp_ffa_init(void *pages)
.lock = __HYP_SPIN_LOCK_UNLOCKED,
};
+ version_lock = __HYP_SPIN_LOCK_UNLOCKED;
return 0;
}
diff --git a/arch/arm64/kvm/hyp/nvhe/gen-hyprel.c b/arch/arm64/kvm/hyp/nvhe/gen-hyprel.c
index 6bc88a756cb7..b63f4e1c1033 100644
--- a/arch/arm64/kvm/hyp/nvhe/gen-hyprel.c
+++ b/arch/arm64/kvm/hyp/nvhe/gen-hyprel.c
@@ -50,6 +50,9 @@
#ifndef R_AARCH64_ABS64
#define R_AARCH64_ABS64 257
#endif
+#ifndef R_AARCH64_ABS32
+#define R_AARCH64_ABS32 258
+#endif
#ifndef R_AARCH64_PREL64
#define R_AARCH64_PREL64 260
#endif
@@ -383,6 +386,9 @@ static void emit_rela_section(Elf64_Shdr *sh_rela)
case R_AARCH64_ABS64:
emit_rela_abs64(rela, sh_orig_name);
break;
+ /* Allow 32-bit absolute relocation, for kCFI type hashes. */
+ case R_AARCH64_ABS32:
+ break;
/* Allow position-relative data relocations. */
case R_AARCH64_PREL64:
case R_AARCH64_PREL32:
diff --git a/arch/arm64/kvm/hyp/nvhe/host.S b/arch/arm64/kvm/hyp/nvhe/host.S
index 7693a6757cd7..58f0cb2298cc 100644
--- a/arch/arm64/kvm/hyp/nvhe/host.S
+++ b/arch/arm64/kvm/hyp/nvhe/host.S
@@ -110,7 +110,7 @@ SYM_FUNC_END(__host_enter)
* u64 elr, u64 par);
*/
SYM_FUNC_START(__hyp_do_panic)
- /* Prepare and exit to the host's panic funciton. */
+ /* Prepare and exit to the host's panic function. */
mov lr, #(PSR_F_BIT | PSR_I_BIT | PSR_A_BIT | PSR_D_BIT |\
PSR_MODE_EL1h)
msr spsr_el2, lr
@@ -188,21 +188,15 @@ SYM_FUNC_END(__host_hvc)
/*
* Test whether the SP has overflowed, without corrupting a GPR.
- * nVHE hypervisor stacks are aligned so that the PAGE_SHIFT bit
+ * nVHE hypervisor stacks are aligned so that the NVHE_STACK_SHIFT bit
* of SP should always be 1.
*/
add sp, sp, x0 // sp' = sp + x0
sub x0, sp, x0 // x0' = sp' - x0 = (sp + x0) - x0 = sp
- tbz x0, #PAGE_SHIFT, .L__hyp_sp_overflow\@
+ tbz x0, #NVHE_STACK_SHIFT, .L__hyp_sp_overflow\@
sub x0, sp, x0 // x0'' = sp' - x0' = (sp + x0) - sp = x0
sub sp, sp, x0 // sp'' = sp' - x0 = (sp + x0) - x0 = sp
- /* If a guest is loaded, panic out of it. */
- stp x0, x1, [sp, #-16]!
- get_loaded_vcpu x0, x1
- cbnz x0, __guest_exit_panic
- add sp, sp, #16
-
/*
* The panic may not be clean if the exception is taken before the host
* context has been saved by __host_exit or after the hyp context has
diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-init.S b/arch/arm64/kvm/hyp/nvhe/hyp-init.S
index 2994878d68ea..f8af11189572 100644
--- a/arch/arm64/kvm/hyp/nvhe/hyp-init.S
+++ b/arch/arm64/kvm/hyp/nvhe/hyp-init.S
@@ -5,6 +5,7 @@
*/
#include <linux/arm-smccc.h>
+#include <linux/cfi_types.h>
#include <linux/linkage.h>
#include <asm/alternative.h>
@@ -23,28 +24,25 @@
.align 11
SYM_CODE_START(__kvm_hyp_init)
- ventry __invalid // Synchronous EL2t
- ventry __invalid // IRQ EL2t
- ventry __invalid // FIQ EL2t
- ventry __invalid // Error EL2t
+ ventry . // Synchronous EL2t
+ ventry . // IRQ EL2t
+ ventry . // FIQ EL2t
+ ventry . // Error EL2t
- ventry __invalid // Synchronous EL2h
- ventry __invalid // IRQ EL2h
- ventry __invalid // FIQ EL2h
- ventry __invalid // Error EL2h
+ ventry . // Synchronous EL2h
+ ventry . // IRQ EL2h
+ ventry . // FIQ EL2h
+ ventry . // Error EL2h
ventry __do_hyp_init // Synchronous 64-bit EL1
- ventry __invalid // IRQ 64-bit EL1
- ventry __invalid // FIQ 64-bit EL1
- ventry __invalid // Error 64-bit EL1
+ ventry . // IRQ 64-bit EL1
+ ventry . // FIQ 64-bit EL1
+ ventry . // Error 64-bit EL1
- ventry __invalid // Synchronous 32-bit EL1
- ventry __invalid // IRQ 32-bit EL1
- ventry __invalid // FIQ 32-bit EL1
- ventry __invalid // Error 32-bit EL1
-
-__invalid:
- b .
+ ventry . // Synchronous 32-bit EL1
+ ventry . // IRQ 32-bit EL1
+ ventry . // FIQ 32-bit EL1
+ ventry . // Error 32-bit EL1
/*
* Only uses x0..x3 so as to not clobber callee-saved SMCCC registers.
@@ -76,6 +74,17 @@ __do_hyp_init:
SYM_CODE_END(__kvm_hyp_init)
/*
+ * Initialize EL2 CPU state to sane values.
+ *
+ * HCR_EL2.E2H must have been initialized already.
+ */
+SYM_CODE_START_LOCAL(__kvm_init_el2_state)
+ init_el2_state // Clobbers x0..x2
+ finalise_el2_state
+ ret
+SYM_CODE_END(__kvm_init_el2_state)
+
+/*
* Initialize the hypervisor in EL2.
*
* Only uses x0..x2 so as to not clobber callee-saved SMCCC registers
@@ -101,9 +110,12 @@ SYM_CODE_START_LOCAL(___kvm_hyp_init)
// TPIDR_EL2 is used to preserve x0 across the macro maze...
isb
msr tpidr_el2, x0
- init_el2_state
- finalise_el2_state
+ str lr, [x0, #NVHE_INIT_TMP]
+
+ bl __kvm_init_el2_state
+
mrs x0, tpidr_el2
+ ldr lr, [x0, #NVHE_INIT_TMP]
1:
ldr x1, [x0, #NVHE_INIT_TPIDR_EL2]
@@ -129,7 +141,7 @@ alternative_else_nop_endif
/* Invalidate the stale TLBs from Bootloader */
tlbi alle2
- tlbi vmalls12e1
+ tlbi alle1
dsb sy
mov_q x0, INIT_SCTLR_EL2_MMU_ON
@@ -198,10 +210,9 @@ SYM_CODE_START_LOCAL(__kvm_hyp_init_cpu)
2: msr SPsel, #1 // We want to use SP_EL{1,2}
- /* Initialize EL2 CPU state to sane values. */
- init_el2_state // Clobbers x0..x2
- finalise_el2_state
- __init_el2_nvhe_prepare_eret
+ init_el2_hcr 0
+
+ bl __kvm_init_el2_state
/* Enable MMU, set vectors and stack. */
mov x0, x28
@@ -265,33 +276,38 @@ alternative_else_nop_endif
SYM_CODE_END(__kvm_handle_stub_hvc)
-SYM_FUNC_START(__pkvm_init_switch_pgd)
+/*
+ * void __pkvm_init_switch_pgd(phys_addr_t pgd, unsigned long sp,
+ * void (*fn)(void));
+ *
+ * SYM_TYPED_FUNC_START() allows C to call this ID-mapped function indirectly
+ * using a physical pointer without triggering a kCFI failure.
+ */
+SYM_TYPED_FUNC_START(__pkvm_init_switch_pgd)
/* Turn the MMU off */
pre_disable_mmu_workaround
- mrs x2, sctlr_el2
- bic x3, x2, #SCTLR_ELx_M
- msr sctlr_el2, x3
+ mrs x3, sctlr_el2
+ bic x4, x3, #SCTLR_ELx_M
+ msr sctlr_el2, x4
isb
tlbi alle2
/* Install the new pgtables */
- ldr x3, [x0, #NVHE_INIT_PGD_PA]
- phys_to_ttbr x4, x3
+ phys_to_ttbr x5, x0
alternative_if ARM64_HAS_CNP
- orr x4, x4, #TTBR_CNP_BIT
+ orr x5, x5, #TTBR_CNP_BIT
alternative_else_nop_endif
- msr ttbr0_el2, x4
+ msr ttbr0_el2, x5
/* Set the new stack pointer */
- ldr x0, [x0, #NVHE_INIT_STACK_HYP_VA]
- mov sp, x0
+ mov sp, x1
/* And turn the MMU back on! */
dsb nsh
isb
- set_sctlr_el2 x2
- ret x1
+ set_sctlr_el2 x3
+ ret x2
SYM_FUNC_END(__pkvm_init_switch_pgd)
.popsection
diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
index 2385fd03ed87..2c37680d954c 100644
--- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c
+++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
@@ -5,6 +5,7 @@
*/
#include <hyp/adjust_pc.h>
+#include <hyp/switch.h>
#include <asm/pgtable-types.h>
#include <asm/kvm_asm.h>
@@ -23,26 +24,115 @@ DEFINE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params);
void __kvm_hyp_host_forward_smc(struct kvm_cpu_context *host_ctxt);
+static void __hyp_sve_save_guest(struct kvm_vcpu *vcpu)
+{
+ __vcpu_sys_reg(vcpu, ZCR_EL1) = read_sysreg_el1(SYS_ZCR);
+ /*
+ * On saving/restoring guest sve state, always use the maximum VL for
+ * the guest. The layout of the data when saving the sve state depends
+ * on the VL, so use a consistent (i.e., the maximum) guest VL.
+ */
+ sve_cond_update_zcr_vq(vcpu_sve_max_vq(vcpu) - 1, SYS_ZCR_EL2);
+ __sve_save_state(vcpu_sve_pffr(vcpu), &vcpu->arch.ctxt.fp_regs.fpsr, true);
+ write_sysreg_s(sve_vq_from_vl(kvm_host_sve_max_vl) - 1, SYS_ZCR_EL2);
+}
+
+static void __hyp_sve_restore_host(void)
+{
+ struct cpu_sve_state *sve_state = *host_data_ptr(sve_state);
+
+ /*
+ * On saving/restoring host sve state, always use the maximum VL for
+ * the host. The layout of the data when saving the sve state depends
+ * on the VL, so use a consistent (i.e., the maximum) host VL.
+ *
+ * Note that this constrains the PE to the maximum shared VL
+ * that was discovered, if we wish to use larger VLs this will
+ * need to be revisited.
+ */
+ write_sysreg_s(sve_vq_from_vl(kvm_host_sve_max_vl) - 1, SYS_ZCR_EL2);
+ __sve_restore_state(sve_state->sve_regs + sve_ffr_offset(kvm_host_sve_max_vl),
+ &sve_state->fpsr,
+ true);
+ write_sysreg_el1(sve_state->zcr_el1, SYS_ZCR);
+}
+
+static void fpsimd_sve_flush(void)
+{
+ *host_data_ptr(fp_owner) = FP_STATE_HOST_OWNED;
+}
+
+static void fpsimd_sve_sync(struct kvm_vcpu *vcpu)
+{
+ bool has_fpmr;
+
+ if (!guest_owns_fp_regs())
+ return;
+
+ cpacr_clear_set(0, CPACR_EL1_FPEN | CPACR_EL1_ZEN);
+ isb();
+
+ if (vcpu_has_sve(vcpu))
+ __hyp_sve_save_guest(vcpu);
+ else
+ __fpsimd_save_state(&vcpu->arch.ctxt.fp_regs);
+
+ has_fpmr = kvm_has_fpmr(kern_hyp_va(vcpu->kvm));
+ if (has_fpmr)
+ __vcpu_sys_reg(vcpu, FPMR) = read_sysreg_s(SYS_FPMR);
+
+ if (system_supports_sve())
+ __hyp_sve_restore_host();
+ else
+ __fpsimd_restore_state(host_data_ptr(host_ctxt.fp_regs));
+
+ if (has_fpmr)
+ write_sysreg_s(*host_data_ptr(fpmr), SYS_FPMR);
+
+ *host_data_ptr(fp_owner) = FP_STATE_HOST_OWNED;
+}
+
+static void flush_debug_state(struct pkvm_hyp_vcpu *hyp_vcpu)
+{
+ struct kvm_vcpu *host_vcpu = hyp_vcpu->host_vcpu;
+
+ hyp_vcpu->vcpu.arch.debug_owner = host_vcpu->arch.debug_owner;
+
+ if (kvm_guest_owns_debug_regs(&hyp_vcpu->vcpu))
+ hyp_vcpu->vcpu.arch.vcpu_debug_state = host_vcpu->arch.vcpu_debug_state;
+ else if (kvm_host_owns_debug_regs(&hyp_vcpu->vcpu))
+ hyp_vcpu->vcpu.arch.external_debug_state = host_vcpu->arch.external_debug_state;
+}
+
+static void sync_debug_state(struct pkvm_hyp_vcpu *hyp_vcpu)
+{
+ struct kvm_vcpu *host_vcpu = hyp_vcpu->host_vcpu;
+
+ if (kvm_guest_owns_debug_regs(&hyp_vcpu->vcpu))
+ host_vcpu->arch.vcpu_debug_state = hyp_vcpu->vcpu.arch.vcpu_debug_state;
+ else if (kvm_host_owns_debug_regs(&hyp_vcpu->vcpu))
+ host_vcpu->arch.external_debug_state = hyp_vcpu->vcpu.arch.external_debug_state;
+}
+
static void flush_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu)
{
struct kvm_vcpu *host_vcpu = hyp_vcpu->host_vcpu;
+ fpsimd_sve_flush();
+ flush_debug_state(hyp_vcpu);
+
hyp_vcpu->vcpu.arch.ctxt = host_vcpu->arch.ctxt;
hyp_vcpu->vcpu.arch.sve_state = kern_hyp_va(host_vcpu->arch.sve_state);
- hyp_vcpu->vcpu.arch.sve_max_vl = host_vcpu->arch.sve_max_vl;
-
- hyp_vcpu->vcpu.arch.hw_mmu = host_vcpu->arch.hw_mmu;
+ /* Limit guest vector length to the maximum supported by the host. */
+ hyp_vcpu->vcpu.arch.sve_max_vl = min(host_vcpu->arch.sve_max_vl, kvm_host_sve_max_vl);
- hyp_vcpu->vcpu.arch.hcr_el2 = host_vcpu->arch.hcr_el2;
hyp_vcpu->vcpu.arch.mdcr_el2 = host_vcpu->arch.mdcr_el2;
- hyp_vcpu->vcpu.arch.cptr_el2 = host_vcpu->arch.cptr_el2;
+ hyp_vcpu->vcpu.arch.hcr_el2 &= ~(HCR_TWI | HCR_TWE);
+ hyp_vcpu->vcpu.arch.hcr_el2 |= READ_ONCE(host_vcpu->arch.hcr_el2) &
+ (HCR_TWI | HCR_TWE);
hyp_vcpu->vcpu.arch.iflags = host_vcpu->arch.iflags;
- hyp_vcpu->vcpu.arch.fp_state = host_vcpu->arch.fp_state;
-
- hyp_vcpu->vcpu.arch.debug_ptr = kern_hyp_va(host_vcpu->arch.debug_ptr);
- hyp_vcpu->vcpu.arch.host_fpsimd_state = host_vcpu->arch.host_fpsimd_state;
hyp_vcpu->vcpu.arch.vsesr_el2 = host_vcpu->arch.vsesr_el2;
@@ -56,35 +146,74 @@ static void sync_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu)
struct vgic_v3_cpu_if *host_cpu_if = &host_vcpu->arch.vgic_cpu.vgic_v3;
unsigned int i;
+ fpsimd_sve_sync(&hyp_vcpu->vcpu);
+ sync_debug_state(hyp_vcpu);
+
host_vcpu->arch.ctxt = hyp_vcpu->vcpu.arch.ctxt;
host_vcpu->arch.hcr_el2 = hyp_vcpu->vcpu.arch.hcr_el2;
- host_vcpu->arch.cptr_el2 = hyp_vcpu->vcpu.arch.cptr_el2;
host_vcpu->arch.fault = hyp_vcpu->vcpu.arch.fault;
host_vcpu->arch.iflags = hyp_vcpu->vcpu.arch.iflags;
- host_vcpu->arch.fp_state = hyp_vcpu->vcpu.arch.fp_state;
host_cpu_if->vgic_hcr = hyp_cpu_if->vgic_hcr;
for (i = 0; i < hyp_cpu_if->used_lrs; ++i)
host_cpu_if->vgic_lr[i] = hyp_cpu_if->vgic_lr[i];
}
+static void handle___pkvm_vcpu_load(struct kvm_cpu_context *host_ctxt)
+{
+ DECLARE_REG(pkvm_handle_t, handle, host_ctxt, 1);
+ DECLARE_REG(unsigned int, vcpu_idx, host_ctxt, 2);
+ DECLARE_REG(u64, hcr_el2, host_ctxt, 3);
+ struct pkvm_hyp_vcpu *hyp_vcpu;
+
+ if (!is_protected_kvm_enabled())
+ return;
+
+ hyp_vcpu = pkvm_load_hyp_vcpu(handle, vcpu_idx);
+ if (!hyp_vcpu)
+ return;
+
+ if (pkvm_hyp_vcpu_is_protected(hyp_vcpu)) {
+ /* Propagate WFx trapping flags */
+ hyp_vcpu->vcpu.arch.hcr_el2 &= ~(HCR_TWE | HCR_TWI);
+ hyp_vcpu->vcpu.arch.hcr_el2 |= hcr_el2 & (HCR_TWE | HCR_TWI);
+ }
+}
+
+static void handle___pkvm_vcpu_put(struct kvm_cpu_context *host_ctxt)
+{
+ struct pkvm_hyp_vcpu *hyp_vcpu;
+
+ if (!is_protected_kvm_enabled())
+ return;
+
+ hyp_vcpu = pkvm_get_loaded_hyp_vcpu();
+ if (hyp_vcpu)
+ pkvm_put_hyp_vcpu(hyp_vcpu);
+}
+
static void handle___kvm_vcpu_run(struct kvm_cpu_context *host_ctxt)
{
DECLARE_REG(struct kvm_vcpu *, host_vcpu, host_ctxt, 1);
int ret;
- host_vcpu = kern_hyp_va(host_vcpu);
-
if (unlikely(is_protected_kvm_enabled())) {
- struct pkvm_hyp_vcpu *hyp_vcpu;
- struct kvm *host_kvm;
+ struct pkvm_hyp_vcpu *hyp_vcpu = pkvm_get_loaded_hyp_vcpu();
+
+ /*
+ * KVM (and pKVM) doesn't support SME guests for now, and
+ * ensures that SME features aren't enabled in pstate when
+ * loading a vcpu. Therefore, if SME features enabled the host
+ * is misbehaving.
+ */
+ if (unlikely(system_supports_sme() && read_sysreg_s(SYS_SVCR))) {
+ ret = -EINVAL;
+ goto out;
+ }
- host_kvm = kern_hyp_va(host_vcpu->kvm);
- hyp_vcpu = pkvm_load_hyp_vcpu(host_kvm->arch.pkvm.handle,
- host_vcpu->vcpu_idx);
if (!hyp_vcpu) {
ret = -EINVAL;
goto out;
@@ -95,12 +224,145 @@ static void handle___kvm_vcpu_run(struct kvm_cpu_context *host_ctxt)
ret = __kvm_vcpu_run(&hyp_vcpu->vcpu);
sync_hyp_vcpu(hyp_vcpu);
- pkvm_put_hyp_vcpu(hyp_vcpu);
} else {
+ struct kvm_vcpu *vcpu = kern_hyp_va(host_vcpu);
+
/* The host is fully trusted, run its vCPU directly. */
- ret = __kvm_vcpu_run(host_vcpu);
+ fpsimd_lazy_switch_to_guest(vcpu);
+ ret = __kvm_vcpu_run(vcpu);
+ fpsimd_lazy_switch_to_host(vcpu);
}
+out:
+ cpu_reg(host_ctxt, 1) = ret;
+}
+
+static int pkvm_refill_memcache(struct pkvm_hyp_vcpu *hyp_vcpu)
+{
+ struct kvm_vcpu *host_vcpu = hyp_vcpu->host_vcpu;
+
+ return refill_memcache(&hyp_vcpu->vcpu.arch.pkvm_memcache,
+ host_vcpu->arch.pkvm_memcache.nr_pages,
+ &host_vcpu->arch.pkvm_memcache);
+}
+static void handle___pkvm_host_share_guest(struct kvm_cpu_context *host_ctxt)
+{
+ DECLARE_REG(u64, pfn, host_ctxt, 1);
+ DECLARE_REG(u64, gfn, host_ctxt, 2);
+ DECLARE_REG(enum kvm_pgtable_prot, prot, host_ctxt, 3);
+ struct pkvm_hyp_vcpu *hyp_vcpu;
+ int ret = -EINVAL;
+
+ if (!is_protected_kvm_enabled())
+ goto out;
+
+ hyp_vcpu = pkvm_get_loaded_hyp_vcpu();
+ if (!hyp_vcpu || pkvm_hyp_vcpu_is_protected(hyp_vcpu))
+ goto out;
+
+ ret = pkvm_refill_memcache(hyp_vcpu);
+ if (ret)
+ goto out;
+
+ ret = __pkvm_host_share_guest(pfn, gfn, hyp_vcpu, prot);
+out:
+ cpu_reg(host_ctxt, 1) = ret;
+}
+
+static void handle___pkvm_host_unshare_guest(struct kvm_cpu_context *host_ctxt)
+{
+ DECLARE_REG(pkvm_handle_t, handle, host_ctxt, 1);
+ DECLARE_REG(u64, gfn, host_ctxt, 2);
+ struct pkvm_hyp_vm *hyp_vm;
+ int ret = -EINVAL;
+
+ if (!is_protected_kvm_enabled())
+ goto out;
+
+ hyp_vm = get_np_pkvm_hyp_vm(handle);
+ if (!hyp_vm)
+ goto out;
+
+ ret = __pkvm_host_unshare_guest(gfn, hyp_vm);
+ put_pkvm_hyp_vm(hyp_vm);
+out:
+ cpu_reg(host_ctxt, 1) = ret;
+}
+
+static void handle___pkvm_host_relax_perms_guest(struct kvm_cpu_context *host_ctxt)
+{
+ DECLARE_REG(u64, gfn, host_ctxt, 1);
+ DECLARE_REG(enum kvm_pgtable_prot, prot, host_ctxt, 2);
+ struct pkvm_hyp_vcpu *hyp_vcpu;
+ int ret = -EINVAL;
+
+ if (!is_protected_kvm_enabled())
+ goto out;
+
+ hyp_vcpu = pkvm_get_loaded_hyp_vcpu();
+ if (!hyp_vcpu || pkvm_hyp_vcpu_is_protected(hyp_vcpu))
+ goto out;
+
+ ret = __pkvm_host_relax_perms_guest(gfn, hyp_vcpu, prot);
+out:
+ cpu_reg(host_ctxt, 1) = ret;
+}
+
+static void handle___pkvm_host_wrprotect_guest(struct kvm_cpu_context *host_ctxt)
+{
+ DECLARE_REG(pkvm_handle_t, handle, host_ctxt, 1);
+ DECLARE_REG(u64, gfn, host_ctxt, 2);
+ struct pkvm_hyp_vm *hyp_vm;
+ int ret = -EINVAL;
+
+ if (!is_protected_kvm_enabled())
+ goto out;
+
+ hyp_vm = get_np_pkvm_hyp_vm(handle);
+ if (!hyp_vm)
+ goto out;
+
+ ret = __pkvm_host_wrprotect_guest(gfn, hyp_vm);
+ put_pkvm_hyp_vm(hyp_vm);
+out:
+ cpu_reg(host_ctxt, 1) = ret;
+}
+
+static void handle___pkvm_host_test_clear_young_guest(struct kvm_cpu_context *host_ctxt)
+{
+ DECLARE_REG(pkvm_handle_t, handle, host_ctxt, 1);
+ DECLARE_REG(u64, gfn, host_ctxt, 2);
+ DECLARE_REG(bool, mkold, host_ctxt, 3);
+ struct pkvm_hyp_vm *hyp_vm;
+ int ret = -EINVAL;
+
+ if (!is_protected_kvm_enabled())
+ goto out;
+
+ hyp_vm = get_np_pkvm_hyp_vm(handle);
+ if (!hyp_vm)
+ goto out;
+
+ ret = __pkvm_host_test_clear_young_guest(gfn, mkold, hyp_vm);
+ put_pkvm_hyp_vm(hyp_vm);
+out:
+ cpu_reg(host_ctxt, 1) = ret;
+}
+
+static void handle___pkvm_host_mkyoung_guest(struct kvm_cpu_context *host_ctxt)
+{
+ DECLARE_REG(u64, gfn, host_ctxt, 1);
+ struct pkvm_hyp_vcpu *hyp_vcpu;
+ int ret = -EINVAL;
+
+ if (!is_protected_kvm_enabled())
+ goto out;
+
+ hyp_vcpu = pkvm_get_loaded_hyp_vcpu();
+ if (!hyp_vcpu || pkvm_hyp_vcpu_is_protected(hyp_vcpu))
+ goto out;
+
+ ret = __pkvm_host_mkyoung_guest(gfn, hyp_vcpu);
out:
cpu_reg(host_ctxt, 1) = ret;
}
@@ -152,6 +414,22 @@ static void handle___kvm_tlb_flush_vmid(struct kvm_cpu_context *host_ctxt)
__kvm_tlb_flush_vmid(kern_hyp_va(mmu));
}
+static void handle___pkvm_tlb_flush_vmid(struct kvm_cpu_context *host_ctxt)
+{
+ DECLARE_REG(pkvm_handle_t, handle, host_ctxt, 1);
+ struct pkvm_hyp_vm *hyp_vm;
+
+ if (!is_protected_kvm_enabled())
+ return;
+
+ hyp_vm = get_np_pkvm_hyp_vm(handle);
+ if (!hyp_vm)
+ return;
+
+ __kvm_tlb_flush_vmid(&hyp_vm->kvm.arch.mmu);
+ put_pkvm_hyp_vm(hyp_vm);
+}
+
static void handle___kvm_flush_cpu_context(struct kvm_cpu_context *host_ctxt)
{
DECLARE_REG(struct kvm_s2_mmu *, mmu, host_ctxt, 1);
@@ -178,38 +456,23 @@ static void handle___vgic_v3_get_gic_config(struct kvm_cpu_context *host_ctxt)
cpu_reg(host_ctxt, 1) = __vgic_v3_get_gic_config();
}
-static void handle___vgic_v3_read_vmcr(struct kvm_cpu_context *host_ctxt)
-{
- cpu_reg(host_ctxt, 1) = __vgic_v3_read_vmcr();
-}
-
-static void handle___vgic_v3_write_vmcr(struct kvm_cpu_context *host_ctxt)
-{
- __vgic_v3_write_vmcr(cpu_reg(host_ctxt, 1));
-}
-
static void handle___vgic_v3_init_lrs(struct kvm_cpu_context *host_ctxt)
{
__vgic_v3_init_lrs();
}
-static void handle___kvm_get_mdcr_el2(struct kvm_cpu_context *host_ctxt)
-{
- cpu_reg(host_ctxt, 1) = __kvm_get_mdcr_el2();
-}
-
-static void handle___vgic_v3_save_aprs(struct kvm_cpu_context *host_ctxt)
+static void handle___vgic_v3_save_vmcr_aprs(struct kvm_cpu_context *host_ctxt)
{
DECLARE_REG(struct vgic_v3_cpu_if *, cpu_if, host_ctxt, 1);
- __vgic_v3_save_aprs(kern_hyp_va(cpu_if));
+ __vgic_v3_save_vmcr_aprs(kern_hyp_va(cpu_if));
}
-static void handle___vgic_v3_restore_aprs(struct kvm_cpu_context *host_ctxt)
+static void handle___vgic_v3_restore_vmcr_aprs(struct kvm_cpu_context *host_ctxt)
{
DECLARE_REG(struct vgic_v3_cpu_if *, cpu_if, host_ctxt, 1);
- __vgic_v3_restore_aprs(kern_hyp_va(cpu_if));
+ __vgic_v3_restore_vmcr_aprs(kern_hyp_va(cpu_if));
}
static void handle___pkvm_init(struct kvm_cpu_context *host_ctxt)
@@ -280,13 +543,6 @@ static void handle___pkvm_prot_finalize(struct kvm_cpu_context *host_ctxt)
cpu_reg(host_ctxt, 1) = __pkvm_prot_finalize();
}
-static void handle___pkvm_vcpu_init_traps(struct kvm_cpu_context *host_ctxt)
-{
- DECLARE_REG(struct kvm_vcpu *, vcpu, host_ctxt, 1);
-
- __pkvm_vcpu_init_traps(kern_hyp_va(vcpu));
-}
-
static void handle___pkvm_init_vm(struct kvm_cpu_context *host_ctxt)
{
DECLARE_REG(struct kvm *, host_kvm, host_ctxt, 1);
@@ -320,7 +576,6 @@ typedef void (*hcall_t)(struct kvm_cpu_context *);
static const hcall_t host_hcall[] = {
/* ___kvm_hyp_init */
- HANDLE_FUNC(__kvm_get_mdcr_el2),
HANDLE_FUNC(__pkvm_init),
HANDLE_FUNC(__pkvm_create_private_mapping),
HANDLE_FUNC(__pkvm_cpu_set_vector),
@@ -331,6 +586,12 @@ static const hcall_t host_hcall[] = {
HANDLE_FUNC(__pkvm_host_share_hyp),
HANDLE_FUNC(__pkvm_host_unshare_hyp),
+ HANDLE_FUNC(__pkvm_host_share_guest),
+ HANDLE_FUNC(__pkvm_host_unshare_guest),
+ HANDLE_FUNC(__pkvm_host_relax_perms_guest),
+ HANDLE_FUNC(__pkvm_host_wrprotect_guest),
+ HANDLE_FUNC(__pkvm_host_test_clear_young_guest),
+ HANDLE_FUNC(__pkvm_host_mkyoung_guest),
HANDLE_FUNC(__kvm_adjust_pc),
HANDLE_FUNC(__kvm_vcpu_run),
HANDLE_FUNC(__kvm_flush_vm_context),
@@ -340,14 +601,14 @@ static const hcall_t host_hcall[] = {
HANDLE_FUNC(__kvm_tlb_flush_vmid_range),
HANDLE_FUNC(__kvm_flush_cpu_context),
HANDLE_FUNC(__kvm_timer_set_cntvoff),
- HANDLE_FUNC(__vgic_v3_read_vmcr),
- HANDLE_FUNC(__vgic_v3_write_vmcr),
- HANDLE_FUNC(__vgic_v3_save_aprs),
- HANDLE_FUNC(__vgic_v3_restore_aprs),
- HANDLE_FUNC(__pkvm_vcpu_init_traps),
+ HANDLE_FUNC(__vgic_v3_save_vmcr_aprs),
+ HANDLE_FUNC(__vgic_v3_restore_vmcr_aprs),
HANDLE_FUNC(__pkvm_init_vm),
HANDLE_FUNC(__pkvm_init_vcpu),
HANDLE_FUNC(__pkvm_teardown_vm),
+ HANDLE_FUNC(__pkvm_vcpu_load),
+ HANDLE_FUNC(__pkvm_vcpu_put),
+ HANDLE_FUNC(__pkvm_tlb_flush_vmid),
};
static void handle_host_hcall(struct kvm_cpu_context *host_ctxt)
@@ -419,15 +680,6 @@ void handle_trap(struct kvm_cpu_context *host_ctxt)
case ESR_ELx_EC_SMC64:
handle_host_smc(host_ctxt);
break;
- case ESR_ELx_EC_SVE:
- if (has_hvhe())
- sysreg_clear_set(cpacr_el1, 0, (CPACR_EL1_ZEN_EL1EN |
- CPACR_EL1_ZEN_EL0EN));
- else
- sysreg_clear_set(cptr_el2, CPTR_EL2_TZ, 0);
- isb();
- sve_cond_update_zcr_vq(ZCR_ELx_LEN_MASK, SYS_ZCR_EL2);
- break;
case ESR_ELx_EC_IABT_LOW:
case ESR_ELx_EC_DABT_LOW:
handle_host_mem_abort(host_ctxt);
diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
index 861c76021a25..19c3c631708c 100644
--- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
+++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
@@ -201,8 +201,8 @@ static void *guest_s2_zalloc_page(void *mc)
memset(addr, 0, PAGE_SIZE);
p = hyp_virt_to_page(addr);
- memset(p, 0, sizeof(*p));
p->refcount = 1;
+ p->order = 0;
return addr;
}
@@ -268,6 +268,7 @@ int kvm_guest_prepare_stage2(struct pkvm_hyp_vm *vm, void *pgd)
void reclaim_guest_pages(struct pkvm_hyp_vm *vm, struct kvm_hyp_memcache *mc)
{
+ struct hyp_page *page;
void *addr;
/* Dump all pgtable pages in the hyp_pool */
@@ -279,7 +280,9 @@ void reclaim_guest_pages(struct pkvm_hyp_vm *vm, struct kvm_hyp_memcache *mc)
/* Drain the hyp_pool into the memcache */
addr = hyp_alloc_pages(&vm->pool, 0);
while (addr) {
- memset(hyp_virt_to_page(addr), 0, sizeof(struct hyp_page));
+ page = hyp_virt_to_page(addr);
+ page->refcount = 0;
+ page->order = 0;
push_hyp_memcache(mc, addr, hyp_virt_to_phys);
WARN_ON(__pkvm_hyp_donate_host(hyp_virt_to_pfn(addr), 1));
addr = hyp_alloc_pages(&vm->pool, 0);
@@ -382,19 +385,28 @@ bool addr_is_memory(phys_addr_t phys)
return !!find_mem_range(phys, &range);
}
-static bool addr_is_allowed_memory(phys_addr_t phys)
+static bool is_in_mem_range(u64 addr, struct kvm_mem_range *range)
+{
+ return range->start <= addr && addr < range->end;
+}
+
+static int check_range_allowed_memory(u64 start, u64 end)
{
struct memblock_region *reg;
struct kvm_mem_range range;
- reg = find_mem_range(phys, &range);
+ /*
+ * Callers can't check the state of a range that overlaps memory and
+ * MMIO regions, so ensure [start, end[ is in the same kvm_mem_range.
+ */
+ reg = find_mem_range(start, &range);
+ if (!is_in_mem_range(end - 1, &range))
+ return -EINVAL;
- return reg && !(reg->flags & MEMBLOCK_NOMAP);
-}
+ if (!reg || reg->flags & MEMBLOCK_NOMAP)
+ return -EPERM;
-static bool is_in_mem_range(u64 addr, struct kvm_mem_range *range)
-{
- return range->start <= addr && addr < range->end;
+ return 0;
}
static bool range_is_memory(u64 start, u64 end)
@@ -454,8 +466,10 @@ static int host_stage2_adjust_range(u64 addr, struct kvm_mem_range *range)
if (kvm_pte_valid(pte))
return -EAGAIN;
- if (pte)
+ if (pte) {
+ WARN_ON(addr_is_memory(addr) && hyp_phys_to_page(addr)->host_state != PKVM_NOPAGE);
return -EPERM;
+ }
do {
u64 granule = kvm_granule_size(level);
@@ -477,10 +491,33 @@ int host_stage2_idmap_locked(phys_addr_t addr, u64 size,
return host_stage2_try(__host_stage2_idmap, addr, addr + size, prot);
}
+static void __host_update_page_state(phys_addr_t addr, u64 size, enum pkvm_page_state state)
+{
+ phys_addr_t end = addr + size;
+
+ for (; addr < end; addr += PAGE_SIZE)
+ hyp_phys_to_page(addr)->host_state = state;
+}
+
int host_stage2_set_owner_locked(phys_addr_t addr, u64 size, u8 owner_id)
{
- return host_stage2_try(kvm_pgtable_stage2_set_owner, &host_mmu.pgt,
- addr, size, &host_s2_pool, owner_id);
+ int ret;
+
+ if (!addr_is_memory(addr))
+ return -EPERM;
+
+ ret = host_stage2_try(kvm_pgtable_stage2_set_owner, &host_mmu.pgt,
+ addr, size, &host_s2_pool, owner_id);
+ if (ret)
+ return ret;
+
+ /* Don't forget to update the vmemmap tracking for the host */
+ if (owner_id == PKVM_ID_HOST)
+ __host_update_page_state(addr, size, PKVM_PAGE_OWNED);
+ else
+ __host_update_page_state(addr, size, PKVM_NOPAGE);
+
+ return 0;
}
static bool host_stage2_force_pte_cb(u64 addr, u64 end, enum kvm_pgtable_prot prot)
@@ -533,46 +570,19 @@ void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt)
int ret = 0;
esr = read_sysreg_el2(SYS_ESR);
- BUG_ON(!__get_fault_info(esr, &fault));
+ if (!__get_fault_info(esr, &fault)) {
+ /*
+ * We've presumably raced with a page-table change which caused
+ * AT to fail, try again.
+ */
+ return;
+ }
addr = (fault.hpfar_el2 & HPFAR_MASK) << 8;
ret = host_stage2_idmap(addr);
BUG_ON(ret && ret != -EAGAIN);
}
-struct pkvm_mem_transition {
- u64 nr_pages;
-
- struct {
- enum pkvm_component_id id;
- /* Address in the initiator's address space */
- u64 addr;
-
- union {
- struct {
- /* Address in the completer's address space */
- u64 completer_addr;
- } host;
- struct {
- u64 completer_addr;
- } hyp;
- };
- } initiator;
-
- struct {
- enum pkvm_component_id id;
- } completer;
-};
-
-struct pkvm_mem_share {
- const struct pkvm_mem_transition tx;
- const enum kvm_pgtable_prot completer_prot;
-};
-
-struct pkvm_mem_donation {
- const struct pkvm_mem_transition tx;
-};
-
struct check_walk_data {
enum pkvm_page_state desired;
enum pkvm_page_state (*get_page_state)(kvm_pte_t pte, u64 addr);
@@ -598,115 +608,38 @@ static int check_page_state_range(struct kvm_pgtable *pgt, u64 addr, u64 size,
return kvm_pgtable_walk(pgt, addr, size, &walker);
}
-static enum pkvm_page_state host_get_page_state(kvm_pte_t pte, u64 addr)
-{
- if (!addr_is_allowed_memory(addr))
- return PKVM_NOPAGE;
-
- if (!kvm_pte_valid(pte) && pte)
- return PKVM_NOPAGE;
-
- return pkvm_getstate(kvm_pgtable_stage2_pte_prot(pte));
-}
-
static int __host_check_page_state_range(u64 addr, u64 size,
enum pkvm_page_state state)
{
- struct check_walk_data d = {
- .desired = state,
- .get_page_state = host_get_page_state,
- };
+ u64 end = addr + size;
+ int ret;
+
+ ret = check_range_allowed_memory(addr, end);
+ if (ret)
+ return ret;
hyp_assert_lock_held(&host_mmu.lock);
- return check_page_state_range(&host_mmu.pgt, addr, size, &d);
+ for (; addr < end; addr += PAGE_SIZE) {
+ if (hyp_phys_to_page(addr)->host_state != state)
+ return -EPERM;
+ }
+
+ return 0;
}
static int __host_set_page_state_range(u64 addr, u64 size,
enum pkvm_page_state state)
{
- enum kvm_pgtable_prot prot = pkvm_mkstate(PKVM_HOST_MEM_PROT, state);
-
- return host_stage2_idmap_locked(addr, size, prot);
-}
-
-static int host_request_owned_transition(u64 *completer_addr,
- const struct pkvm_mem_transition *tx)
-{
- u64 size = tx->nr_pages * PAGE_SIZE;
- u64 addr = tx->initiator.addr;
-
- *completer_addr = tx->initiator.host.completer_addr;
- return __host_check_page_state_range(addr, size, PKVM_PAGE_OWNED);
-}
-
-static int host_request_unshare(u64 *completer_addr,
- const struct pkvm_mem_transition *tx)
-{
- u64 size = tx->nr_pages * PAGE_SIZE;
- u64 addr = tx->initiator.addr;
-
- *completer_addr = tx->initiator.host.completer_addr;
- return __host_check_page_state_range(addr, size, PKVM_PAGE_SHARED_OWNED);
-}
-
-static int host_initiate_share(u64 *completer_addr,
- const struct pkvm_mem_transition *tx)
-{
- u64 size = tx->nr_pages * PAGE_SIZE;
- u64 addr = tx->initiator.addr;
-
- *completer_addr = tx->initiator.host.completer_addr;
- return __host_set_page_state_range(addr, size, PKVM_PAGE_SHARED_OWNED);
-}
-
-static int host_initiate_unshare(u64 *completer_addr,
- const struct pkvm_mem_transition *tx)
-{
- u64 size = tx->nr_pages * PAGE_SIZE;
- u64 addr = tx->initiator.addr;
-
- *completer_addr = tx->initiator.host.completer_addr;
- return __host_set_page_state_range(addr, size, PKVM_PAGE_OWNED);
-}
-
-static int host_initiate_donation(u64 *completer_addr,
- const struct pkvm_mem_transition *tx)
-{
- u8 owner_id = tx->completer.id;
- u64 size = tx->nr_pages * PAGE_SIZE;
-
- *completer_addr = tx->initiator.host.completer_addr;
- return host_stage2_set_owner_locked(tx->initiator.addr, size, owner_id);
-}
-
-static bool __host_ack_skip_pgtable_check(const struct pkvm_mem_transition *tx)
-{
- return !(IS_ENABLED(CONFIG_NVHE_EL2_DEBUG) ||
- tx->initiator.id != PKVM_ID_HYP);
-}
-
-static int __host_ack_transition(u64 addr, const struct pkvm_mem_transition *tx,
- enum pkvm_page_state state)
-{
- u64 size = tx->nr_pages * PAGE_SIZE;
-
- if (__host_ack_skip_pgtable_check(tx))
- return 0;
-
- return __host_check_page_state_range(addr, size, state);
-}
+ if (hyp_phys_to_page(addr)->host_state == PKVM_NOPAGE) {
+ int ret = host_stage2_idmap_locked(addr, size, PKVM_HOST_MEM_PROT);
-static int host_ack_donation(u64 addr, const struct pkvm_mem_transition *tx)
-{
- return __host_ack_transition(addr, tx, PKVM_NOPAGE);
-}
+ if (ret)
+ return ret;
+ }
-static int host_complete_donation(u64 addr, const struct pkvm_mem_transition *tx)
-{
- u64 size = tx->nr_pages * PAGE_SIZE;
- u8 host_id = tx->completer.id;
+ __host_update_page_state(addr, size, state);
- return host_stage2_set_owner_locked(addr, size, host_id);
+ return 0;
}
static enum pkvm_page_state hyp_get_page_state(kvm_pte_t pte, u64 addr)
@@ -729,577 +662,425 @@ static int __hyp_check_page_state_range(u64 addr, u64 size,
return check_page_state_range(&pkvm_pgtable, addr, size, &d);
}
-static int hyp_request_donation(u64 *completer_addr,
- const struct pkvm_mem_transition *tx)
+static enum pkvm_page_state guest_get_page_state(kvm_pte_t pte, u64 addr)
{
- u64 size = tx->nr_pages * PAGE_SIZE;
- u64 addr = tx->initiator.addr;
+ if (!kvm_pte_valid(pte))
+ return PKVM_NOPAGE;
- *completer_addr = tx->initiator.hyp.completer_addr;
- return __hyp_check_page_state_range(addr, size, PKVM_PAGE_OWNED);
+ return pkvm_getstate(kvm_pgtable_stage2_pte_prot(pte));
}
-static int hyp_initiate_donation(u64 *completer_addr,
- const struct pkvm_mem_transition *tx)
+static int __guest_check_page_state_range(struct pkvm_hyp_vcpu *vcpu, u64 addr,
+ u64 size, enum pkvm_page_state state)
{
- u64 size = tx->nr_pages * PAGE_SIZE;
- int ret;
+ struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu);
+ struct check_walk_data d = {
+ .desired = state,
+ .get_page_state = guest_get_page_state,
+ };
- *completer_addr = tx->initiator.hyp.completer_addr;
- ret = kvm_pgtable_hyp_unmap(&pkvm_pgtable, tx->initiator.addr, size);
- return (ret != size) ? -EFAULT : 0;
+ hyp_assert_lock_held(&vm->lock);
+ return check_page_state_range(&vm->pgt, addr, size, &d);
}
-static bool __hyp_ack_skip_pgtable_check(const struct pkvm_mem_transition *tx)
+int __pkvm_host_share_hyp(u64 pfn)
{
- return !(IS_ENABLED(CONFIG_NVHE_EL2_DEBUG) ||
- tx->initiator.id != PKVM_ID_HOST);
-}
+ u64 phys = hyp_pfn_to_phys(pfn);
+ void *virt = __hyp_va(phys);
+ enum kvm_pgtable_prot prot;
+ u64 size = PAGE_SIZE;
+ int ret;
-static int hyp_ack_share(u64 addr, const struct pkvm_mem_transition *tx,
- enum kvm_pgtable_prot perms)
-{
- u64 size = tx->nr_pages * PAGE_SIZE;
+ host_lock_component();
+ hyp_lock_component();
- if (perms != PAGE_HYP)
- return -EPERM;
+ ret = __host_check_page_state_range(phys, size, PKVM_PAGE_OWNED);
+ if (ret)
+ goto unlock;
+ if (IS_ENABLED(CONFIG_NVHE_EL2_DEBUG)) {
+ ret = __hyp_check_page_state_range((u64)virt, size, PKVM_NOPAGE);
+ if (ret)
+ goto unlock;
+ }
+
+ prot = pkvm_mkstate(PAGE_HYP, PKVM_PAGE_SHARED_BORROWED);
+ WARN_ON(pkvm_create_mappings_locked(virt, virt + size, prot));
+ WARN_ON(__host_set_page_state_range(phys, size, PKVM_PAGE_SHARED_OWNED));
- if (__hyp_ack_skip_pgtable_check(tx))
- return 0;
+unlock:
+ hyp_unlock_component();
+ host_unlock_component();
- return __hyp_check_page_state_range(addr, size, PKVM_NOPAGE);
+ return ret;
}
-static int hyp_ack_unshare(u64 addr, const struct pkvm_mem_transition *tx)
+int __pkvm_host_unshare_hyp(u64 pfn)
{
- u64 size = tx->nr_pages * PAGE_SIZE;
-
- if (tx->initiator.id == PKVM_ID_HOST && hyp_page_count((void *)addr))
- return -EBUSY;
+ u64 phys = hyp_pfn_to_phys(pfn);
+ u64 virt = (u64)__hyp_va(phys);
+ u64 size = PAGE_SIZE;
+ int ret;
- if (__hyp_ack_skip_pgtable_check(tx))
- return 0;
+ host_lock_component();
+ hyp_lock_component();
- return __hyp_check_page_state_range(addr, size,
- PKVM_PAGE_SHARED_BORROWED);
-}
+ ret = __host_check_page_state_range(phys, size, PKVM_PAGE_SHARED_OWNED);
+ if (ret)
+ goto unlock;
+ ret = __hyp_check_page_state_range(virt, size, PKVM_PAGE_SHARED_BORROWED);
+ if (ret)
+ goto unlock;
+ if (hyp_page_count((void *)virt)) {
+ ret = -EBUSY;
+ goto unlock;
+ }
-static int hyp_ack_donation(u64 addr, const struct pkvm_mem_transition *tx)
-{
- u64 size = tx->nr_pages * PAGE_SIZE;
+ WARN_ON(kvm_pgtable_hyp_unmap(&pkvm_pgtable, virt, size) != size);
+ WARN_ON(__host_set_page_state_range(phys, size, PKVM_PAGE_OWNED));
- if (__hyp_ack_skip_pgtable_check(tx))
- return 0;
+unlock:
+ hyp_unlock_component();
+ host_unlock_component();
- return __hyp_check_page_state_range(addr, size, PKVM_NOPAGE);
+ return ret;
}
-static int hyp_complete_share(u64 addr, const struct pkvm_mem_transition *tx,
- enum kvm_pgtable_prot perms)
+int __pkvm_host_donate_hyp(u64 pfn, u64 nr_pages)
{
- void *start = (void *)addr, *end = start + (tx->nr_pages * PAGE_SIZE);
+ u64 phys = hyp_pfn_to_phys(pfn);
+ u64 size = PAGE_SIZE * nr_pages;
+ void *virt = __hyp_va(phys);
enum kvm_pgtable_prot prot;
+ int ret;
- prot = pkvm_mkstate(perms, PKVM_PAGE_SHARED_BORROWED);
- return pkvm_create_mappings_locked(start, end, prot);
-}
+ host_lock_component();
+ hyp_lock_component();
-static int hyp_complete_unshare(u64 addr, const struct pkvm_mem_transition *tx)
-{
- u64 size = tx->nr_pages * PAGE_SIZE;
- int ret = kvm_pgtable_hyp_unmap(&pkvm_pgtable, addr, size);
+ ret = __host_check_page_state_range(phys, size, PKVM_PAGE_OWNED);
+ if (ret)
+ goto unlock;
+ if (IS_ENABLED(CONFIG_NVHE_EL2_DEBUG)) {
+ ret = __hyp_check_page_state_range((u64)virt, size, PKVM_NOPAGE);
+ if (ret)
+ goto unlock;
+ }
- return (ret != size) ? -EFAULT : 0;
-}
+ prot = pkvm_mkstate(PAGE_HYP, PKVM_PAGE_OWNED);
+ WARN_ON(pkvm_create_mappings_locked(virt, virt + size, prot));
+ WARN_ON(host_stage2_set_owner_locked(phys, size, PKVM_ID_HYP));
-static int hyp_complete_donation(u64 addr,
- const struct pkvm_mem_transition *tx)
-{
- void *start = (void *)addr, *end = start + (tx->nr_pages * PAGE_SIZE);
- enum kvm_pgtable_prot prot = pkvm_mkstate(PAGE_HYP, PKVM_PAGE_OWNED);
+unlock:
+ hyp_unlock_component();
+ host_unlock_component();
- return pkvm_create_mappings_locked(start, end, prot);
+ return ret;
}
-static int check_share(struct pkvm_mem_share *share)
+int __pkvm_hyp_donate_host(u64 pfn, u64 nr_pages)
{
- const struct pkvm_mem_transition *tx = &share->tx;
- u64 completer_addr;
+ u64 phys = hyp_pfn_to_phys(pfn);
+ u64 size = PAGE_SIZE * nr_pages;
+ u64 virt = (u64)__hyp_va(phys);
int ret;
- switch (tx->initiator.id) {
- case PKVM_ID_HOST:
- ret = host_request_owned_transition(&completer_addr, tx);
- break;
- default:
- ret = -EINVAL;
- }
+ host_lock_component();
+ hyp_lock_component();
+ ret = __hyp_check_page_state_range(virt, size, PKVM_PAGE_OWNED);
if (ret)
- return ret;
-
- switch (tx->completer.id) {
- case PKVM_ID_HYP:
- ret = hyp_ack_share(completer_addr, tx, share->completer_prot);
- break;
- case PKVM_ID_FFA:
- /*
- * We only check the host; the secure side will check the other
- * end when we forward the FFA call.
- */
- ret = 0;
- break;
- default:
- ret = -EINVAL;
+ goto unlock;
+ if (IS_ENABLED(CONFIG_NVHE_EL2_DEBUG)) {
+ ret = __host_check_page_state_range(phys, size, PKVM_NOPAGE);
+ if (ret)
+ goto unlock;
}
+ WARN_ON(kvm_pgtable_hyp_unmap(&pkvm_pgtable, virt, size) != size);
+ WARN_ON(host_stage2_set_owner_locked(phys, size, PKVM_ID_HOST));
+
+unlock:
+ hyp_unlock_component();
+ host_unlock_component();
+
return ret;
}
-static int __do_share(struct pkvm_mem_share *share)
+int hyp_pin_shared_mem(void *from, void *to)
{
- const struct pkvm_mem_transition *tx = &share->tx;
- u64 completer_addr;
+ u64 cur, start = ALIGN_DOWN((u64)from, PAGE_SIZE);
+ u64 end = PAGE_ALIGN((u64)to);
+ u64 size = end - start;
int ret;
- switch (tx->initiator.id) {
- case PKVM_ID_HOST:
- ret = host_initiate_share(&completer_addr, tx);
- break;
- default:
- ret = -EINVAL;
- }
+ host_lock_component();
+ hyp_lock_component();
+ ret = __host_check_page_state_range(__hyp_pa(start), size,
+ PKVM_PAGE_SHARED_OWNED);
if (ret)
- return ret;
-
- switch (tx->completer.id) {
- case PKVM_ID_HYP:
- ret = hyp_complete_share(completer_addr, tx, share->completer_prot);
- break;
- case PKVM_ID_FFA:
- /*
- * We're not responsible for any secure page-tables, so there's
- * nothing to do here.
- */
- ret = 0;
- break;
- default:
- ret = -EINVAL;
- }
+ goto unlock;
- return ret;
-}
+ ret = __hyp_check_page_state_range(start, size,
+ PKVM_PAGE_SHARED_BORROWED);
+ if (ret)
+ goto unlock;
-/*
- * do_share():
- *
- * The page owner grants access to another component with a given set
- * of permissions.
- *
- * Initiator: OWNED => SHARED_OWNED
- * Completer: NOPAGE => SHARED_BORROWED
- */
-static int do_share(struct pkvm_mem_share *share)
-{
- int ret;
+ for (cur = start; cur < end; cur += PAGE_SIZE)
+ hyp_page_ref_inc(hyp_virt_to_page(cur));
- ret = check_share(share);
- if (ret)
- return ret;
+unlock:
+ hyp_unlock_component();
+ host_unlock_component();
- return WARN_ON(__do_share(share));
+ return ret;
}
-static int check_unshare(struct pkvm_mem_share *share)
+void hyp_unpin_shared_mem(void *from, void *to)
{
- const struct pkvm_mem_transition *tx = &share->tx;
- u64 completer_addr;
- int ret;
-
- switch (tx->initiator.id) {
- case PKVM_ID_HOST:
- ret = host_request_unshare(&completer_addr, tx);
- break;
- default:
- ret = -EINVAL;
- }
+ u64 cur, start = ALIGN_DOWN((u64)from, PAGE_SIZE);
+ u64 end = PAGE_ALIGN((u64)to);
- if (ret)
- return ret;
+ host_lock_component();
+ hyp_lock_component();
- switch (tx->completer.id) {
- case PKVM_ID_HYP:
- ret = hyp_ack_unshare(completer_addr, tx);
- break;
- case PKVM_ID_FFA:
- /* See check_share() */
- ret = 0;
- break;
- default:
- ret = -EINVAL;
- }
+ for (cur = start; cur < end; cur += PAGE_SIZE)
+ hyp_page_ref_dec(hyp_virt_to_page(cur));
- return ret;
+ hyp_unlock_component();
+ host_unlock_component();
}
-static int __do_unshare(struct pkvm_mem_share *share)
+int __pkvm_host_share_ffa(u64 pfn, u64 nr_pages)
{
- const struct pkvm_mem_transition *tx = &share->tx;
- u64 completer_addr;
+ u64 phys = hyp_pfn_to_phys(pfn);
+ u64 size = PAGE_SIZE * nr_pages;
int ret;
- switch (tx->initiator.id) {
- case PKVM_ID_HOST:
- ret = host_initiate_unshare(&completer_addr, tx);
- break;
- default:
- ret = -EINVAL;
- }
-
- if (ret)
- return ret;
-
- switch (tx->completer.id) {
- case PKVM_ID_HYP:
- ret = hyp_complete_unshare(completer_addr, tx);
- break;
- case PKVM_ID_FFA:
- /* See __do_share() */
- ret = 0;
- break;
- default:
- ret = -EINVAL;
- }
+ host_lock_component();
+ ret = __host_check_page_state_range(phys, size, PKVM_PAGE_OWNED);
+ if (!ret)
+ ret = __host_set_page_state_range(phys, size, PKVM_PAGE_SHARED_OWNED);
+ host_unlock_component();
return ret;
}
-/*
- * do_unshare():
- *
- * The page owner revokes access from another component for a range of
- * pages which were previously shared using do_share().
- *
- * Initiator: SHARED_OWNED => OWNED
- * Completer: SHARED_BORROWED => NOPAGE
- */
-static int do_unshare(struct pkvm_mem_share *share)
+int __pkvm_host_unshare_ffa(u64 pfn, u64 nr_pages)
{
+ u64 phys = hyp_pfn_to_phys(pfn);
+ u64 size = PAGE_SIZE * nr_pages;
int ret;
- ret = check_unshare(share);
- if (ret)
- return ret;
+ host_lock_component();
+ ret = __host_check_page_state_range(phys, size, PKVM_PAGE_SHARED_OWNED);
+ if (!ret)
+ ret = __host_set_page_state_range(phys, size, PKVM_PAGE_OWNED);
+ host_unlock_component();
- return WARN_ON(__do_unshare(share));
+ return ret;
}
-static int check_donation(struct pkvm_mem_donation *donation)
+int __pkvm_host_share_guest(u64 pfn, u64 gfn, struct pkvm_hyp_vcpu *vcpu,
+ enum kvm_pgtable_prot prot)
{
- const struct pkvm_mem_transition *tx = &donation->tx;
- u64 completer_addr;
+ struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu);
+ u64 phys = hyp_pfn_to_phys(pfn);
+ u64 ipa = hyp_pfn_to_phys(gfn);
+ struct hyp_page *page;
int ret;
- switch (tx->initiator.id) {
- case PKVM_ID_HOST:
- ret = host_request_owned_transition(&completer_addr, tx);
- break;
- case PKVM_ID_HYP:
- ret = hyp_request_donation(&completer_addr, tx);
- break;
- default:
- ret = -EINVAL;
- }
+ if (prot & ~KVM_PGTABLE_PROT_RWX)
+ return -EINVAL;
+ ret = check_range_allowed_memory(phys, phys + PAGE_SIZE);
if (ret)
return ret;
- switch (tx->completer.id) {
- case PKVM_ID_HOST:
- ret = host_ack_donation(completer_addr, tx);
- break;
- case PKVM_ID_HYP:
- ret = hyp_ack_donation(completer_addr, tx);
- break;
- default:
- ret = -EINVAL;
- }
-
- return ret;
-}
+ host_lock_component();
+ guest_lock_component(vm);
-static int __do_donate(struct pkvm_mem_donation *donation)
-{
- const struct pkvm_mem_transition *tx = &donation->tx;
- u64 completer_addr;
- int ret;
+ ret = __guest_check_page_state_range(vcpu, ipa, PAGE_SIZE, PKVM_NOPAGE);
+ if (ret)
+ goto unlock;
- switch (tx->initiator.id) {
- case PKVM_ID_HOST:
- ret = host_initiate_donation(&completer_addr, tx);
- break;
- case PKVM_ID_HYP:
- ret = hyp_initiate_donation(&completer_addr, tx);
+ page = hyp_phys_to_page(phys);
+ switch (page->host_state) {
+ case PKVM_PAGE_OWNED:
+ WARN_ON(__host_set_page_state_range(phys, PAGE_SIZE, PKVM_PAGE_SHARED_OWNED));
break;
+ case PKVM_PAGE_SHARED_OWNED:
+ if (page->host_share_guest_count)
+ break;
+ /* Only host to np-guest multi-sharing is tolerated */
+ WARN_ON(1);
+ fallthrough;
default:
- ret = -EINVAL;
+ ret = -EPERM;
+ goto unlock;
}
- if (ret)
- return ret;
+ WARN_ON(kvm_pgtable_stage2_map(&vm->pgt, ipa, PAGE_SIZE, phys,
+ pkvm_mkstate(prot, PKVM_PAGE_SHARED_BORROWED),
+ &vcpu->vcpu.arch.pkvm_memcache, 0));
+ page->host_share_guest_count++;
- switch (tx->completer.id) {
- case PKVM_ID_HOST:
- ret = host_complete_donation(completer_addr, tx);
- break;
- case PKVM_ID_HYP:
- ret = hyp_complete_donation(completer_addr, tx);
- break;
- default:
- ret = -EINVAL;
- }
+unlock:
+ guest_unlock_component(vm);
+ host_unlock_component();
return ret;
}
-/*
- * do_donate():
- *
- * The page owner transfers ownership to another component, losing access
- * as a consequence.
- *
- * Initiator: OWNED => NOPAGE
- * Completer: NOPAGE => OWNED
- */
-static int do_donate(struct pkvm_mem_donation *donation)
+static int __check_host_shared_guest(struct pkvm_hyp_vm *vm, u64 *__phys, u64 ipa)
{
+ enum pkvm_page_state state;
+ struct hyp_page *page;
+ kvm_pte_t pte;
+ u64 phys;
+ s8 level;
int ret;
- ret = check_donation(donation);
+ ret = kvm_pgtable_get_leaf(&vm->pgt, ipa, &pte, &level);
if (ret)
return ret;
+ if (!kvm_pte_valid(pte))
+ return -ENOENT;
+ if (level != KVM_PGTABLE_LAST_LEVEL)
+ return -E2BIG;
- return WARN_ON(__do_donate(donation));
-}
-
-int __pkvm_host_share_hyp(u64 pfn)
-{
- int ret;
- u64 host_addr = hyp_pfn_to_phys(pfn);
- u64 hyp_addr = (u64)__hyp_va(host_addr);
- struct pkvm_mem_share share = {
- .tx = {
- .nr_pages = 1,
- .initiator = {
- .id = PKVM_ID_HOST,
- .addr = host_addr,
- .host = {
- .completer_addr = hyp_addr,
- },
- },
- .completer = {
- .id = PKVM_ID_HYP,
- },
- },
- .completer_prot = PAGE_HYP,
- };
+ state = guest_get_page_state(pte, ipa);
+ if (state != PKVM_PAGE_SHARED_BORROWED)
+ return -EPERM;
- host_lock_component();
- hyp_lock_component();
+ phys = kvm_pte_to_phys(pte);
+ ret = check_range_allowed_memory(phys, phys + PAGE_SIZE);
+ if (WARN_ON(ret))
+ return ret;
- ret = do_share(&share);
+ page = hyp_phys_to_page(phys);
+ if (page->host_state != PKVM_PAGE_SHARED_OWNED)
+ return -EPERM;
+ if (WARN_ON(!page->host_share_guest_count))
+ return -EINVAL;
- hyp_unlock_component();
- host_unlock_component();
+ *__phys = phys;
- return ret;
+ return 0;
}
-int __pkvm_host_unshare_hyp(u64 pfn)
+int __pkvm_host_unshare_guest(u64 gfn, struct pkvm_hyp_vm *vm)
{
+ u64 ipa = hyp_pfn_to_phys(gfn);
+ struct hyp_page *page;
+ u64 phys;
int ret;
- u64 host_addr = hyp_pfn_to_phys(pfn);
- u64 hyp_addr = (u64)__hyp_va(host_addr);
- struct pkvm_mem_share share = {
- .tx = {
- .nr_pages = 1,
- .initiator = {
- .id = PKVM_ID_HOST,
- .addr = host_addr,
- .host = {
- .completer_addr = hyp_addr,
- },
- },
- .completer = {
- .id = PKVM_ID_HYP,
- },
- },
- .completer_prot = PAGE_HYP,
- };
host_lock_component();
- hyp_lock_component();
-
- ret = do_unshare(&share);
-
- hyp_unlock_component();
- host_unlock_component();
-
- return ret;
-}
+ guest_lock_component(vm);
-int __pkvm_host_donate_hyp(u64 pfn, u64 nr_pages)
-{
- int ret;
- u64 host_addr = hyp_pfn_to_phys(pfn);
- u64 hyp_addr = (u64)__hyp_va(host_addr);
- struct pkvm_mem_donation donation = {
- .tx = {
- .nr_pages = nr_pages,
- .initiator = {
- .id = PKVM_ID_HOST,
- .addr = host_addr,
- .host = {
- .completer_addr = hyp_addr,
- },
- },
- .completer = {
- .id = PKVM_ID_HYP,
- },
- },
- };
+ ret = __check_host_shared_guest(vm, &phys, ipa);
+ if (ret)
+ goto unlock;
- host_lock_component();
- hyp_lock_component();
+ ret = kvm_pgtable_stage2_unmap(&vm->pgt, ipa, PAGE_SIZE);
+ if (ret)
+ goto unlock;
- ret = do_donate(&donation);
+ page = hyp_phys_to_page(phys);
+ page->host_share_guest_count--;
+ if (!page->host_share_guest_count)
+ WARN_ON(__host_set_page_state_range(phys, PAGE_SIZE, PKVM_PAGE_OWNED));
- hyp_unlock_component();
+unlock:
+ guest_unlock_component(vm);
host_unlock_component();
return ret;
}
-int __pkvm_hyp_donate_host(u64 pfn, u64 nr_pages)
+static void assert_host_shared_guest(struct pkvm_hyp_vm *vm, u64 ipa)
{
+ u64 phys;
int ret;
- u64 host_addr = hyp_pfn_to_phys(pfn);
- u64 hyp_addr = (u64)__hyp_va(host_addr);
- struct pkvm_mem_donation donation = {
- .tx = {
- .nr_pages = nr_pages,
- .initiator = {
- .id = PKVM_ID_HYP,
- .addr = hyp_addr,
- .hyp = {
- .completer_addr = host_addr,
- },
- },
- .completer = {
- .id = PKVM_ID_HOST,
- },
- },
- };
+
+ if (!IS_ENABLED(CONFIG_NVHE_EL2_DEBUG))
+ return;
host_lock_component();
- hyp_lock_component();
+ guest_lock_component(vm);
- ret = do_donate(&donation);
+ ret = __check_host_shared_guest(vm, &phys, ipa);
- hyp_unlock_component();
+ guest_unlock_component(vm);
host_unlock_component();
- return ret;
+ WARN_ON(ret && ret != -ENOENT);
}
-int hyp_pin_shared_mem(void *from, void *to)
+int __pkvm_host_relax_perms_guest(u64 gfn, struct pkvm_hyp_vcpu *vcpu, enum kvm_pgtable_prot prot)
{
- u64 cur, start = ALIGN_DOWN((u64)from, PAGE_SIZE);
- u64 end = PAGE_ALIGN((u64)to);
- u64 size = end - start;
+ struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu);
+ u64 ipa = hyp_pfn_to_phys(gfn);
int ret;
- host_lock_component();
- hyp_lock_component();
-
- ret = __host_check_page_state_range(__hyp_pa(start), size,
- PKVM_PAGE_SHARED_OWNED);
- if (ret)
- goto unlock;
-
- ret = __hyp_check_page_state_range(start, size,
- PKVM_PAGE_SHARED_BORROWED);
- if (ret)
- goto unlock;
+ if (pkvm_hyp_vm_is_protected(vm))
+ return -EPERM;
- for (cur = start; cur < end; cur += PAGE_SIZE)
- hyp_page_ref_inc(hyp_virt_to_page(cur));
+ if (prot & ~KVM_PGTABLE_PROT_RWX)
+ return -EINVAL;
-unlock:
- hyp_unlock_component();
- host_unlock_component();
+ assert_host_shared_guest(vm, ipa);
+ guest_lock_component(vm);
+ ret = kvm_pgtable_stage2_relax_perms(&vm->pgt, ipa, prot, 0);
+ guest_unlock_component(vm);
return ret;
}
-void hyp_unpin_shared_mem(void *from, void *to)
+int __pkvm_host_wrprotect_guest(u64 gfn, struct pkvm_hyp_vm *vm)
{
- u64 cur, start = ALIGN_DOWN((u64)from, PAGE_SIZE);
- u64 end = PAGE_ALIGN((u64)to);
+ u64 ipa = hyp_pfn_to_phys(gfn);
+ int ret;
- host_lock_component();
- hyp_lock_component();
+ if (pkvm_hyp_vm_is_protected(vm))
+ return -EPERM;
- for (cur = start; cur < end; cur += PAGE_SIZE)
- hyp_page_ref_dec(hyp_virt_to_page(cur));
+ assert_host_shared_guest(vm, ipa);
+ guest_lock_component(vm);
+ ret = kvm_pgtable_stage2_wrprotect(&vm->pgt, ipa, PAGE_SIZE);
+ guest_unlock_component(vm);
- hyp_unlock_component();
- host_unlock_component();
+ return ret;
}
-int __pkvm_host_share_ffa(u64 pfn, u64 nr_pages)
+int __pkvm_host_test_clear_young_guest(u64 gfn, bool mkold, struct pkvm_hyp_vm *vm)
{
+ u64 ipa = hyp_pfn_to_phys(gfn);
int ret;
- struct pkvm_mem_share share = {
- .tx = {
- .nr_pages = nr_pages,
- .initiator = {
- .id = PKVM_ID_HOST,
- .addr = hyp_pfn_to_phys(pfn),
- },
- .completer = {
- .id = PKVM_ID_FFA,
- },
- },
- };
- host_lock_component();
- ret = do_share(&share);
- host_unlock_component();
+ if (pkvm_hyp_vm_is_protected(vm))
+ return -EPERM;
+
+ assert_host_shared_guest(vm, ipa);
+ guest_lock_component(vm);
+ ret = kvm_pgtable_stage2_test_clear_young(&vm->pgt, ipa, PAGE_SIZE, mkold);
+ guest_unlock_component(vm);
return ret;
}
-int __pkvm_host_unshare_ffa(u64 pfn, u64 nr_pages)
+int __pkvm_host_mkyoung_guest(u64 gfn, struct pkvm_hyp_vcpu *vcpu)
{
- int ret;
- struct pkvm_mem_share share = {
- .tx = {
- .nr_pages = nr_pages,
- .initiator = {
- .id = PKVM_ID_HOST,
- .addr = hyp_pfn_to_phys(pfn),
- },
- .completer = {
- .id = PKVM_ID_FFA,
- },
- },
- };
+ struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu);
+ u64 ipa = hyp_pfn_to_phys(gfn);
- host_lock_component();
- ret = do_unshare(&share);
- host_unlock_component();
+ if (pkvm_hyp_vm_is_protected(vm))
+ return -EPERM;
- return ret;
+ assert_host_shared_guest(vm, ipa);
+ guest_lock_component(vm);
+ kvm_pgtable_stage2_mkyoung(&vm->pgt, ipa, 0);
+ guest_unlock_component(vm);
+
+ return 0;
}
diff --git a/arch/arm64/kvm/hyp/nvhe/mm.c b/arch/arm64/kvm/hyp/nvhe/mm.c
index b01a3d1078a8..f41c7440b34b 100644
--- a/arch/arm64/kvm/hyp/nvhe/mm.c
+++ b/arch/arm64/kvm/hyp/nvhe/mm.c
@@ -155,7 +155,7 @@ int hyp_back_vmemmap(phys_addr_t back)
start = hyp_memory[i].base;
start = ALIGN_DOWN((u64)hyp_phys_to_page(start), PAGE_SIZE);
/*
- * The begining of the hyp_vmemmap region for the current
+ * The beginning of the hyp_vmemmap region for the current
* memblock may already be backed by the page backing the end
* the previous region, so avoid mapping it twice.
*/
@@ -360,10 +360,10 @@ int pkvm_create_stack(phys_addr_t phys, unsigned long *haddr)
prev_base = __io_map_base;
/*
- * Efficient stack verification using the PAGE_SHIFT bit implies
+ * Efficient stack verification using the NVHE_STACK_SHIFT bit implies
* an alignment of our allocation on the order of the size.
*/
- size = PAGE_SIZE * 2;
+ size = NVHE_STACK_SIZE * 2;
addr = ALIGN(__io_map_base, size);
ret = __pkvm_alloc_private_va_range(addr, size);
@@ -373,12 +373,12 @@ int pkvm_create_stack(phys_addr_t phys, unsigned long *haddr)
* at the higher address and leave the lower guard page
* unbacked.
*
- * Any valid stack address now has the PAGE_SHIFT bit as 1
+ * Any valid stack address now has the NVHE_STACK_SHIFT bit as 1
* and addresses corresponding to the guard page have the
- * PAGE_SHIFT bit as 0 - this is used for overflow detection.
+ * NVHE_STACK_SHIFT bit as 0 - this is used for overflow detection.
*/
- ret = kvm_pgtable_hyp_map(&pkvm_pgtable, addr + PAGE_SIZE,
- PAGE_SIZE, phys, PAGE_HYP);
+ ret = kvm_pgtable_hyp_map(&pkvm_pgtable, addr + NVHE_STACK_SIZE,
+ NVHE_STACK_SIZE, phys, PAGE_HYP);
if (ret)
__io_map_base = prev_base;
}
@@ -408,7 +408,7 @@ static void *admit_host_page(void *arg)
return pop_hyp_memcache(host_mc, hyp_phys_to_virt);
}
-/* Refill our local memcache by poping pages from the one provided by the host. */
+/* Refill our local memcache by popping pages from the one provided by the host. */
int refill_memcache(struct kvm_hyp_memcache *mc, unsigned long min_pages,
struct kvm_hyp_memcache *host_mc)
{
diff --git a/arch/arm64/kvm/hyp/nvhe/page_alloc.c b/arch/arm64/kvm/hyp/nvhe/page_alloc.c
index e691290d3765..a1eb27a1a747 100644
--- a/arch/arm64/kvm/hyp/nvhe/page_alloc.c
+++ b/arch/arm64/kvm/hyp/nvhe/page_alloc.c
@@ -32,7 +32,7 @@ u64 __hyp_vmemmap;
*/
static struct hyp_page *__find_buddy_nocheck(struct hyp_pool *pool,
struct hyp_page *p,
- unsigned short order)
+ u8 order)
{
phys_addr_t addr = hyp_page_to_phys(p);
@@ -51,7 +51,7 @@ static struct hyp_page *__find_buddy_nocheck(struct hyp_pool *pool,
/* Find a buddy page currently available for allocation */
static struct hyp_page *__find_buddy_avail(struct hyp_pool *pool,
struct hyp_page *p,
- unsigned short order)
+ u8 order)
{
struct hyp_page *buddy = __find_buddy_nocheck(pool, p, order);
@@ -94,7 +94,7 @@ static void __hyp_attach_page(struct hyp_pool *pool,
struct hyp_page *p)
{
phys_addr_t phys = hyp_page_to_phys(p);
- unsigned short order = p->order;
+ u8 order = p->order;
struct hyp_page *buddy;
memset(hyp_page_to_virt(p), 0, PAGE_SIZE << p->order);
@@ -129,7 +129,7 @@ insert:
static struct hyp_page *__hyp_extract_page(struct hyp_pool *pool,
struct hyp_page *p,
- unsigned short order)
+ u8 order)
{
struct hyp_page *buddy;
@@ -183,7 +183,7 @@ void hyp_get_page(struct hyp_pool *pool, void *addr)
void hyp_split_page(struct hyp_page *p)
{
- unsigned short order = p->order;
+ u8 order = p->order;
unsigned int i;
p->order = 0;
@@ -195,10 +195,10 @@ void hyp_split_page(struct hyp_page *p)
}
}
-void *hyp_alloc_pages(struct hyp_pool *pool, unsigned short order)
+void *hyp_alloc_pages(struct hyp_pool *pool, u8 order)
{
- unsigned short i = order;
struct hyp_page *p;
+ u8 i = order;
hyp_spin_lock(&pool->lock);
diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c
index 26dd9a20ad6e..3927fe52a3dd 100644
--- a/arch/arm64/kvm/hyp/nvhe/pkvm.c
+++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c
@@ -6,7 +6,9 @@
#include <linux/kvm_host.h>
#include <linux/mm.h>
-#include <nvhe/fixed_config.h>
+
+#include <asm/kvm_emulate.h>
+
#include <nvhe/mem_protect.h>
#include <nvhe/memory.h>
#include <nvhe/pkvm.h>
@@ -18,198 +20,163 @@ unsigned long __icache_flags;
/* Used by kvm_get_vttbr(). */
unsigned int kvm_arm_vmid_bits;
+unsigned int kvm_host_sve_max_vl;
+
/*
- * Set trap register values based on features in ID_AA64PFR0.
+ * The currently loaded hyp vCPU for each physical CPU. Used only when
+ * protected KVM is enabled, but for both protected and non-protected VMs.
*/
-static void pvm_init_traps_aa64pfr0(struct kvm_vcpu *vcpu)
-{
- const u64 feature_ids = pvm_read_id_reg(vcpu, SYS_ID_AA64PFR0_EL1);
- u64 hcr_set = HCR_RW;
- u64 hcr_clear = 0;
- u64 cptr_set = 0;
- u64 cptr_clear = 0;
+static DEFINE_PER_CPU(struct pkvm_hyp_vcpu *, loaded_hyp_vcpu);
- /* Protected KVM does not support AArch32 guests. */
- BUILD_BUG_ON(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL0),
- PVM_ID_AA64PFR0_RESTRICT_UNSIGNED) != ID_AA64PFR0_EL1_ELx_64BIT_ONLY);
- BUILD_BUG_ON(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL1),
- PVM_ID_AA64PFR0_RESTRICT_UNSIGNED) != ID_AA64PFR0_EL1_ELx_64BIT_ONLY);
-
- /*
- * Linux guests assume support for floating-point and Advanced SIMD. Do
- * not change the trapping behavior for these from the KVM default.
- */
- BUILD_BUG_ON(!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_FP),
- PVM_ID_AA64PFR0_ALLOW));
- BUILD_BUG_ON(!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_AdvSIMD),
- PVM_ID_AA64PFR0_ALLOW));
+static void pkvm_vcpu_reset_hcr(struct kvm_vcpu *vcpu)
+{
+ vcpu->arch.hcr_el2 = HCR_GUEST_FLAGS;
if (has_hvhe())
- hcr_set |= HCR_E2H;
+ vcpu->arch.hcr_el2 |= HCR_E2H;
- /* Trap RAS unless all current versions are supported */
- if (FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_RAS), feature_ids) <
- ID_AA64PFR0_EL1_RAS_V1P1) {
- hcr_set |= HCR_TERR | HCR_TEA;
- hcr_clear |= HCR_FIEN;
+ if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN)) {
+ /* route synchronous external abort exceptions to EL2 */
+ vcpu->arch.hcr_el2 |= HCR_TEA;
+ /* trap error record accesses */
+ vcpu->arch.hcr_el2 |= HCR_TERR;
}
- /* Trap AMU */
- if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_AMU), feature_ids)) {
- hcr_clear |= HCR_AMVOFFEN;
- cptr_set |= CPTR_EL2_TAM;
- }
+ if (cpus_have_final_cap(ARM64_HAS_STAGE2_FWB))
+ vcpu->arch.hcr_el2 |= HCR_FWB;
- /* Trap SVE */
- if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_SVE), feature_ids)) {
- if (has_hvhe())
- cptr_clear |= CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN;
- else
- cptr_set |= CPTR_EL2_TZ;
- }
+ if (cpus_have_final_cap(ARM64_HAS_EVT) &&
+ !cpus_have_final_cap(ARM64_MISMATCHED_CACHE_TYPE))
+ vcpu->arch.hcr_el2 |= HCR_TID4;
+ else
+ vcpu->arch.hcr_el2 |= HCR_TID2;
+
+ if (vcpu_has_ptrauth(vcpu))
+ vcpu->arch.hcr_el2 |= (HCR_API | HCR_APK);
- vcpu->arch.hcr_el2 |= hcr_set;
- vcpu->arch.hcr_el2 &= ~hcr_clear;
- vcpu->arch.cptr_el2 |= cptr_set;
- vcpu->arch.cptr_el2 &= ~cptr_clear;
+ if (kvm_has_mte(vcpu->kvm))
+ vcpu->arch.hcr_el2 |= HCR_ATA;
}
-/*
- * Set trap register values based on features in ID_AA64PFR1.
- */
-static void pvm_init_traps_aa64pfr1(struct kvm_vcpu *vcpu)
+static void pvm_init_traps_hcr(struct kvm_vcpu *vcpu)
{
- const u64 feature_ids = pvm_read_id_reg(vcpu, SYS_ID_AA64PFR1_EL1);
- u64 hcr_set = 0;
- u64 hcr_clear = 0;
+ struct kvm *kvm = vcpu->kvm;
+ u64 val = vcpu->arch.hcr_el2;
- /* Memory Tagging: Trap and Treat as Untagged if not supported. */
- if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MTE), feature_ids)) {
- hcr_set |= HCR_TID5;
- hcr_clear |= HCR_DCT | HCR_ATA;
+ /* No support for AArch32. */
+ val |= HCR_RW;
+
+ /*
+ * Always trap:
+ * - Feature id registers: to control features exposed to guests
+ * - Implementation-defined features
+ */
+ val |= HCR_TACR | HCR_TIDCP | HCR_TID3 | HCR_TID1;
+
+ if (!kvm_has_feat(kvm, ID_AA64PFR0_EL1, RAS, IMP)) {
+ val |= HCR_TERR | HCR_TEA;
+ val &= ~(HCR_FIEN);
}
- vcpu->arch.hcr_el2 |= hcr_set;
- vcpu->arch.hcr_el2 &= ~hcr_clear;
-}
+ if (!kvm_has_feat(kvm, ID_AA64PFR0_EL1, AMU, IMP))
+ val &= ~(HCR_AMVOFFEN);
-/*
- * Set trap register values based on features in ID_AA64DFR0.
- */
-static void pvm_init_traps_aa64dfr0(struct kvm_vcpu *vcpu)
-{
- const u64 feature_ids = pvm_read_id_reg(vcpu, SYS_ID_AA64DFR0_EL1);
- u64 mdcr_set = 0;
- u64 mdcr_clear = 0;
- u64 cptr_set = 0;
-
- /* Trap/constrain PMU */
- if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer), feature_ids)) {
- mdcr_set |= MDCR_EL2_TPM | MDCR_EL2_TPMCR;
- mdcr_clear |= MDCR_EL2_HPME | MDCR_EL2_MTPME |
- MDCR_EL2_HPMN_MASK;
+ if (!kvm_has_feat(kvm, ID_AA64PFR1_EL1, MTE, IMP)) {
+ val |= HCR_TID5;
+ val &= ~(HCR_DCT | HCR_ATA);
}
- /* Trap Debug */
- if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_DebugVer), feature_ids))
- mdcr_set |= MDCR_EL2_TDRA | MDCR_EL2_TDA | MDCR_EL2_TDE;
+ if (!kvm_has_feat(kvm, ID_AA64MMFR1_EL1, LO, IMP))
+ val |= HCR_TLOR;
+
+ vcpu->arch.hcr_el2 = val;
+}
- /* Trap OS Double Lock */
- if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_DoubleLock), feature_ids))
- mdcr_set |= MDCR_EL2_TDOSA;
+static void pvm_init_traps_mdcr(struct kvm_vcpu *vcpu)
+{
+ struct kvm *kvm = vcpu->kvm;
+ u64 val = vcpu->arch.mdcr_el2;
- /* Trap SPE */
- if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMSVer), feature_ids)) {
- mdcr_set |= MDCR_EL2_TPMS;
- mdcr_clear |= MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT;
+ if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, PMUVer, IMP)) {
+ val |= MDCR_EL2_TPM | MDCR_EL2_TPMCR;
+ val &= ~(MDCR_EL2_HPME | MDCR_EL2_MTPME | MDCR_EL2_HPMN_MASK);
}
- /* Trap Trace Filter */
- if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_TraceFilt), feature_ids))
- mdcr_set |= MDCR_EL2_TTRF;
+ if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, DebugVer, IMP))
+ val |= MDCR_EL2_TDRA | MDCR_EL2_TDA;
- /* Trap Trace */
- if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_TraceVer), feature_ids)) {
- if (has_hvhe())
- cptr_set |= CPACR_EL1_TTA;
- else
- cptr_set |= CPTR_EL2_TTA;
- }
+ if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, DoubleLock, IMP))
+ val |= MDCR_EL2_TDOSA;
- /* Trap External Trace */
- if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_ExtTrcBuff), feature_ids))
- mdcr_clear |= MDCR_EL2_E2TB_MASK << MDCR_EL2_E2TB_SHIFT;
+ if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, PMSVer, IMP)) {
+ val |= MDCR_EL2_TPMS;
+ val &= ~MDCR_EL2_E2PB_MASK;
+ }
- vcpu->arch.mdcr_el2 |= mdcr_set;
- vcpu->arch.mdcr_el2 &= ~mdcr_clear;
- vcpu->arch.cptr_el2 |= cptr_set;
-}
+ if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, TraceFilt, IMP))
+ val |= MDCR_EL2_TTRF;
-/*
- * Set trap register values based on features in ID_AA64MMFR0.
- */
-static void pvm_init_traps_aa64mmfr0(struct kvm_vcpu *vcpu)
-{
- const u64 feature_ids = pvm_read_id_reg(vcpu, SYS_ID_AA64MMFR0_EL1);
- u64 mdcr_set = 0;
+ if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, ExtTrcBuff, IMP))
+ val |= MDCR_EL2_E2TB_MASK;
/* Trap Debug Communications Channel registers */
- if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_FGT), feature_ids))
- mdcr_set |= MDCR_EL2_TDCC;
+ if (!kvm_has_feat(kvm, ID_AA64MMFR0_EL1, FGT, IMP))
+ val |= MDCR_EL2_TDCC;
- vcpu->arch.mdcr_el2 |= mdcr_set;
+ vcpu->arch.mdcr_el2 = val;
}
/*
- * Set trap register values based on features in ID_AA64MMFR1.
+ * Check that cpu features that are neither trapped nor supported are not
+ * enabled for protected VMs.
*/
-static void pvm_init_traps_aa64mmfr1(struct kvm_vcpu *vcpu)
+static int pkvm_check_pvm_cpu_features(struct kvm_vcpu *vcpu)
{
- const u64 feature_ids = pvm_read_id_reg(vcpu, SYS_ID_AA64MMFR1_EL1);
- u64 hcr_set = 0;
-
- /* Trap LOR */
- if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_LO), feature_ids))
- hcr_set |= HCR_TLOR;
+ struct kvm *kvm = vcpu->kvm;
- vcpu->arch.hcr_el2 |= hcr_set;
-}
-
-/*
- * Set baseline trap register values.
- */
-static void pvm_init_trap_regs(struct kvm_vcpu *vcpu)
-{
- const u64 hcr_trap_feat_regs = HCR_TID3;
- const u64 hcr_trap_impdef = HCR_TACR | HCR_TIDCP | HCR_TID1;
+ /* Protected KVM does not support AArch32 guests. */
+ if (kvm_has_feat(kvm, ID_AA64PFR0_EL1, EL0, AARCH32) ||
+ kvm_has_feat(kvm, ID_AA64PFR0_EL1, EL1, AARCH32))
+ return -EINVAL;
/*
- * Always trap:
- * - Feature id registers: to control features exposed to guests
- * - Implementation-defined features
+ * Linux guests assume support for floating-point and Advanced SIMD. Do
+ * not change the trapping behavior for these from the KVM default.
*/
- vcpu->arch.hcr_el2 |= hcr_trap_feat_regs | hcr_trap_impdef;
-
- /* Clear res0 and set res1 bits to trap potential new features. */
- vcpu->arch.hcr_el2 &= ~(HCR_RES0);
- vcpu->arch.mdcr_el2 &= ~(MDCR_EL2_RES0);
- if (!has_hvhe()) {
- vcpu->arch.cptr_el2 |= CPTR_NVHE_EL2_RES1;
- vcpu->arch.cptr_el2 &= ~(CPTR_NVHE_EL2_RES0);
- }
+ if (!kvm_has_feat(kvm, ID_AA64PFR0_EL1, FP, IMP) ||
+ !kvm_has_feat(kvm, ID_AA64PFR0_EL1, AdvSIMD, IMP))
+ return -EINVAL;
+
+ /* No SME support in KVM right now. Check to catch if it changes. */
+ if (kvm_has_feat(kvm, ID_AA64PFR1_EL1, SME, IMP))
+ return -EINVAL;
+
+ return 0;
}
/*
- * Initialize trap register values for protected VMs.
+ * Initialize trap register values in protected mode.
*/
-void __pkvm_vcpu_init_traps(struct kvm_vcpu *vcpu)
+static int pkvm_vcpu_init_traps(struct pkvm_hyp_vcpu *hyp_vcpu)
{
- pvm_init_trap_regs(vcpu);
- pvm_init_traps_aa64pfr0(vcpu);
- pvm_init_traps_aa64pfr1(vcpu);
- pvm_init_traps_aa64dfr0(vcpu);
- pvm_init_traps_aa64mmfr0(vcpu);
- pvm_init_traps_aa64mmfr1(vcpu);
+ struct kvm_vcpu *vcpu = &hyp_vcpu->vcpu;
+ int ret;
+
+ vcpu->arch.mdcr_el2 = 0;
+
+ pkvm_vcpu_reset_hcr(vcpu);
+
+ if ((!pkvm_hyp_vcpu_is_protected(hyp_vcpu)))
+ return 0;
+
+ ret = pkvm_check_pvm_cpu_features(vcpu);
+ if (ret)
+ return ret;
+
+ pvm_init_traps_hcr(vcpu);
+ pvm_init_traps_mdcr(vcpu);
+
+ return 0;
}
/*
@@ -230,10 +197,10 @@ static pkvm_handle_t idx_to_vm_handle(unsigned int idx)
/*
* Spinlock for protecting state related to the VM table. Protects writes
- * to 'vm_table' and 'nr_table_entries' as well as reads and writes to
- * 'last_hyp_vcpu_lookup'.
+ * to 'vm_table', 'nr_table_entries', and other per-vm state on initialization.
+ * Also protects reads and writes to 'last_hyp_vcpu_lookup'.
*/
-static DEFINE_HYP_SPINLOCK(vm_table_lock);
+DEFINE_HYP_SPINLOCK(vm_table_lock);
/*
* The table of VM entries for protected VMs in hyp.
@@ -266,15 +233,30 @@ struct pkvm_hyp_vcpu *pkvm_load_hyp_vcpu(pkvm_handle_t handle,
struct pkvm_hyp_vcpu *hyp_vcpu = NULL;
struct pkvm_hyp_vm *hyp_vm;
+ /* Cannot load a new vcpu without putting the old one first. */
+ if (__this_cpu_read(loaded_hyp_vcpu))
+ return NULL;
+
hyp_spin_lock(&vm_table_lock);
hyp_vm = get_vm_by_handle(handle);
if (!hyp_vm || hyp_vm->nr_vcpus <= vcpu_idx)
goto unlock;
hyp_vcpu = hyp_vm->vcpus[vcpu_idx];
+
+ /* Ensure vcpu isn't loaded on more than one cpu simultaneously. */
+ if (unlikely(hyp_vcpu->loaded_hyp_vcpu)) {
+ hyp_vcpu = NULL;
+ goto unlock;
+ }
+
+ hyp_vcpu->loaded_hyp_vcpu = this_cpu_ptr(&loaded_hyp_vcpu);
hyp_page_ref_inc(hyp_virt_to_page(hyp_vm));
unlock:
hyp_spin_unlock(&vm_table_lock);
+
+ if (hyp_vcpu)
+ __this_cpu_write(loaded_hyp_vcpu, hyp_vcpu);
return hyp_vcpu;
}
@@ -283,10 +265,91 @@ void pkvm_put_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu)
struct pkvm_hyp_vm *hyp_vm = pkvm_hyp_vcpu_to_hyp_vm(hyp_vcpu);
hyp_spin_lock(&vm_table_lock);
+ hyp_vcpu->loaded_hyp_vcpu = NULL;
+ __this_cpu_write(loaded_hyp_vcpu, NULL);
+ hyp_page_ref_dec(hyp_virt_to_page(hyp_vm));
+ hyp_spin_unlock(&vm_table_lock);
+}
+
+struct pkvm_hyp_vcpu *pkvm_get_loaded_hyp_vcpu(void)
+{
+ return __this_cpu_read(loaded_hyp_vcpu);
+
+}
+
+struct pkvm_hyp_vm *get_pkvm_hyp_vm(pkvm_handle_t handle)
+{
+ struct pkvm_hyp_vm *hyp_vm;
+
+ hyp_spin_lock(&vm_table_lock);
+ hyp_vm = get_vm_by_handle(handle);
+ if (hyp_vm)
+ hyp_page_ref_inc(hyp_virt_to_page(hyp_vm));
+ hyp_spin_unlock(&vm_table_lock);
+
+ return hyp_vm;
+}
+
+void put_pkvm_hyp_vm(struct pkvm_hyp_vm *hyp_vm)
+{
+ hyp_spin_lock(&vm_table_lock);
hyp_page_ref_dec(hyp_virt_to_page(hyp_vm));
hyp_spin_unlock(&vm_table_lock);
}
+struct pkvm_hyp_vm *get_np_pkvm_hyp_vm(pkvm_handle_t handle)
+{
+ struct pkvm_hyp_vm *hyp_vm = get_pkvm_hyp_vm(handle);
+
+ if (hyp_vm && pkvm_hyp_vm_is_protected(hyp_vm)) {
+ put_pkvm_hyp_vm(hyp_vm);
+ hyp_vm = NULL;
+ }
+
+ return hyp_vm;
+}
+
+static void pkvm_init_features_from_host(struct pkvm_hyp_vm *hyp_vm, const struct kvm *host_kvm)
+{
+ struct kvm *kvm = &hyp_vm->kvm;
+ unsigned long host_arch_flags = READ_ONCE(host_kvm->arch.flags);
+ DECLARE_BITMAP(allowed_features, KVM_VCPU_MAX_FEATURES);
+
+ if (test_bit(KVM_ARCH_FLAG_MTE_ENABLED, &host_kvm->arch.flags))
+ set_bit(KVM_ARCH_FLAG_MTE_ENABLED, &kvm->arch.flags);
+
+ /* No restrictions for non-protected VMs. */
+ if (!kvm_vm_is_protected(kvm)) {
+ hyp_vm->kvm.arch.flags = host_arch_flags;
+
+ bitmap_copy(kvm->arch.vcpu_features,
+ host_kvm->arch.vcpu_features,
+ KVM_VCPU_MAX_FEATURES);
+ return;
+ }
+
+ bitmap_zero(allowed_features, KVM_VCPU_MAX_FEATURES);
+
+ set_bit(KVM_ARM_VCPU_PSCI_0_2, allowed_features);
+
+ if (kvm_pvm_ext_allowed(KVM_CAP_ARM_PMU_V3))
+ set_bit(KVM_ARM_VCPU_PMU_V3, allowed_features);
+
+ if (kvm_pvm_ext_allowed(KVM_CAP_ARM_PTRAUTH_ADDRESS))
+ set_bit(KVM_ARM_VCPU_PTRAUTH_ADDRESS, allowed_features);
+
+ if (kvm_pvm_ext_allowed(KVM_CAP_ARM_PTRAUTH_GENERIC))
+ set_bit(KVM_ARM_VCPU_PTRAUTH_GENERIC, allowed_features);
+
+ if (kvm_pvm_ext_allowed(KVM_CAP_ARM_SVE)) {
+ set_bit(KVM_ARM_VCPU_SVE, allowed_features);
+ kvm->arch.flags |= host_arch_flags & BIT(KVM_ARCH_FLAG_GUEST_HAS_SVE);
+ }
+
+ bitmap_and(kvm->arch.vcpu_features, host_kvm->arch.vcpu_features,
+ allowed_features, KVM_VCPU_MAX_FEATURES);
+}
+
static void unpin_host_vcpu(struct kvm_vcpu *host_vcpu)
{
if (host_vcpu)
@@ -308,6 +371,17 @@ static void init_pkvm_hyp_vm(struct kvm *host_kvm, struct pkvm_hyp_vm *hyp_vm,
hyp_vm->host_kvm = host_kvm;
hyp_vm->kvm.created_vcpus = nr_vcpus;
hyp_vm->kvm.arch.mmu.vtcr = host_mmu.arch.mmu.vtcr;
+ hyp_vm->kvm.arch.pkvm.enabled = READ_ONCE(host_kvm->arch.pkvm.enabled);
+ hyp_vm->kvm.arch.flags = 0;
+ pkvm_init_features_from_host(hyp_vm, host_kvm);
+}
+
+static void pkvm_vcpu_init_sve(struct pkvm_hyp_vcpu *hyp_vcpu, struct kvm_vcpu *host_vcpu)
+{
+ struct kvm_vcpu *vcpu = &hyp_vcpu->vcpu;
+
+ if (!vcpu_has_feature(vcpu, KVM_ARM_VCPU_SVE))
+ vcpu_clear_flag(vcpu, VCPU_SVE_FINALIZED);
}
static int init_pkvm_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu,
@@ -333,6 +407,16 @@ static int init_pkvm_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu,
hyp_vcpu->vcpu.arch.hw_mmu = &hyp_vm->kvm.arch.mmu;
hyp_vcpu->vcpu.arch.cflags = READ_ONCE(host_vcpu->arch.cflags);
+ hyp_vcpu->vcpu.arch.mp_state.mp_state = KVM_MP_STATE_STOPPED;
+
+ if (pkvm_hyp_vcpu_is_protected(hyp_vcpu))
+ kvm_init_pvm_id_regs(&hyp_vcpu->vcpu);
+
+ ret = pkvm_vcpu_init_traps(hyp_vcpu);
+ if (ret)
+ goto done;
+
+ pkvm_vcpu_init_sve(hyp_vcpu, host_vcpu);
done:
if (ret)
unpin_host_vcpu(host_vcpu);
@@ -430,6 +514,7 @@ static void *map_donated_memory(unsigned long host_va, size_t size)
static void __unmap_donated_memory(void *va, size_t size)
{
+ kvm_flush_dcache_to_poc(va, size);
WARN_ON(__pkvm_hyp_donate_host(hyp_virt_to_pfn(va),
PAGE_ALIGN(size) >> PAGE_SHIFT));
}
@@ -571,10 +656,12 @@ int __pkvm_init_vcpu(pkvm_handle_t handle, struct kvm_vcpu *host_vcpu,
unlock:
hyp_spin_unlock(&vm_table_lock);
- if (ret)
+ if (ret) {
unmap_donated_memory(hyp_vcpu, sizeof(*hyp_vcpu));
+ return ret;
+ }
- return ret;
+ return 0;
}
static void
@@ -625,6 +712,14 @@ int __pkvm_teardown_vm(pkvm_handle_t handle)
/* Push the metadata pages to the teardown memcache */
for (idx = 0; idx < hyp_vm->nr_vcpus; ++idx) {
struct pkvm_hyp_vcpu *hyp_vcpu = hyp_vm->vcpus[idx];
+ struct kvm_hyp_memcache *vcpu_mc = &hyp_vcpu->vcpu.arch.pkvm_memcache;
+
+ while (vcpu_mc->nr_pages) {
+ void *addr = pop_hyp_memcache(vcpu_mc, hyp_phys_to_virt);
+
+ push_hyp_memcache(mc, addr, hyp_virt_to_phys);
+ unmap_donated_memory_noclear(addr, PAGE_SIZE);
+ }
teardown_donated_memory(mc, hyp_vcpu, sizeof(*hyp_vcpu));
}
diff --git a/arch/arm64/kvm/hyp/nvhe/psci-relay.c b/arch/arm64/kvm/hyp/nvhe/psci-relay.c
index d57bcb6ab94d..c3e196fb8b18 100644
--- a/arch/arm64/kvm/hyp/nvhe/psci-relay.c
+++ b/arch/arm64/kvm/hyp/nvhe/psci-relay.c
@@ -205,7 +205,7 @@ asmlinkage void __noreturn __kvm_host_psci_cpu_entry(bool is_cpu_on)
struct psci_boot_args *boot_args;
struct kvm_cpu_context *host_ctxt;
- host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
+ host_ctxt = host_data_ptr(host_ctxt);
if (is_cpu_on)
boot_args = this_cpu_ptr(&cpu_on_args);
@@ -218,6 +218,9 @@ asmlinkage void __noreturn __kvm_host_psci_cpu_entry(bool is_cpu_on)
if (is_cpu_on)
release_boot_args(boot_args);
+ write_sysreg_el1(INIT_SCTLR_EL1_MMU_OFF, SYS_SCTLR);
+ write_sysreg(INIT_PSTATE_EL1, SPSR_EL2);
+
__host_enter(host_ctxt);
}
@@ -265,6 +268,8 @@ static unsigned long psci_1_0_handler(u64 func_id, struct kvm_cpu_context *host_
case PSCI_1_0_FN_PSCI_FEATURES:
case PSCI_1_0_FN_SET_SUSPEND_MODE:
case PSCI_1_1_FN64_SYSTEM_RESET2:
+ case PSCI_1_3_FN_SYSTEM_OFF2:
+ case PSCI_1_3_FN64_SYSTEM_OFF2:
return psci_forward(host_ctxt);
case PSCI_1_0_FN64_SYSTEM_SUSPEND:
return psci_system_suspend(func_id, host_ctxt);
diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c
index bc58d1b515af..d62bcb5634a2 100644
--- a/arch/arm64/kvm/hyp/nvhe/setup.c
+++ b/arch/arm64/kvm/hyp/nvhe/setup.c
@@ -12,7 +12,6 @@
#include <nvhe/early_alloc.h>
#include <nvhe/ffa.h>
-#include <nvhe/fixed_config.h>
#include <nvhe/gfp.h>
#include <nvhe/memory.h>
#include <nvhe/mem_protect.h>
@@ -67,13 +66,34 @@ static int divide_memory_pool(void *virt, unsigned long size)
return 0;
}
+static int pkvm_create_host_sve_mappings(void)
+{
+ void *start, *end;
+ int ret, i;
+
+ if (!system_supports_sve())
+ return 0;
+
+ for (i = 0; i < hyp_nr_cpus; i++) {
+ struct kvm_host_data *host_data = per_cpu_ptr(&kvm_host_data, i);
+ struct cpu_sve_state *sve_state = host_data->sve_state;
+
+ start = kern_hyp_va(sve_state);
+ end = start + PAGE_ALIGN(pkvm_host_sve_state_size());
+ ret = pkvm_create_mappings(start, end, PAGE_HYP);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
static int recreate_hyp_mappings(phys_addr_t phys, unsigned long size,
unsigned long *per_cpu_base,
u32 hyp_va_bits)
{
void *start, *end, *virt = hyp_phys_to_virt(phys);
unsigned long pgt_size = hyp_s1_pgtable_pages() << PAGE_SHIFT;
- enum kvm_pgtable_prot prot;
int ret, i;
/* Recreate the hyp page-table using the early page allocator */
@@ -125,22 +145,7 @@ static int recreate_hyp_mappings(phys_addr_t phys, unsigned long size,
return ret;
}
- /*
- * Map the host sections RO in the hypervisor, but transfer the
- * ownership from the host to the hypervisor itself to make sure they
- * can't be donated or shared with another entity.
- *
- * The ownership transition requires matching changes in the host
- * stage-2. This will be done later (see finalize_host_mappings()) once
- * the hyp_vmemmap is addressable.
- */
- prot = pkvm_mkstate(PAGE_HYP_RO, PKVM_PAGE_SHARED_OWNED);
- ret = pkvm_create_mappings(&kvm_vgic_global_state,
- &kvm_vgic_global_state + 1, prot);
- if (ret)
- return ret;
-
- return 0;
+ return pkvm_create_host_sve_mappings();
}
static void update_nvhe_init_params(void)
@@ -174,7 +179,6 @@ static void hpool_put_page(void *addr)
static int fix_host_ownership_walker(const struct kvm_pgtable_visit_ctx *ctx,
enum kvm_pgtable_walk_flags visit)
{
- enum kvm_pgtable_prot prot;
enum pkvm_page_state state;
phys_addr_t phys;
@@ -197,16 +201,16 @@ static int fix_host_ownership_walker(const struct kvm_pgtable_visit_ctx *ctx,
case PKVM_PAGE_OWNED:
return host_stage2_set_owner_locked(phys, PAGE_SIZE, PKVM_ID_HYP);
case PKVM_PAGE_SHARED_OWNED:
- prot = pkvm_mkstate(PKVM_HOST_MEM_PROT, PKVM_PAGE_SHARED_BORROWED);
+ hyp_phys_to_page(phys)->host_state = PKVM_PAGE_SHARED_BORROWED;
break;
case PKVM_PAGE_SHARED_BORROWED:
- prot = pkvm_mkstate(PKVM_HOST_MEM_PROT, PKVM_PAGE_SHARED_OWNED);
+ hyp_phys_to_page(phys)->host_state = PKVM_PAGE_SHARED_OWNED;
break;
default:
return -EINVAL;
}
- return host_stage2_idmap_locked(phys, PAGE_SIZE, prot);
+ return 0;
}
static int fix_hyp_pgtable_refcnt_walker(const struct kvm_pgtable_visit_ctx *ctx,
@@ -257,8 +261,7 @@ static int fix_hyp_pgtable_refcnt(void)
void __noreturn __pkvm_init_finalise(void)
{
- struct kvm_host_data *host_data = this_cpu_ptr(&kvm_host_data);
- struct kvm_cpu_context *host_ctxt = &host_data->host_ctxt;
+ struct kvm_cpu_context *host_ctxt = host_data_ptr(host_ctxt);
unsigned long nr_pages, reserved_pages, pfn;
int ret;
@@ -316,7 +319,7 @@ int __pkvm_init(phys_addr_t phys, unsigned long size, unsigned long nr_cpus,
{
struct kvm_nvhe_init_params *params;
void *virt = hyp_phys_to_virt(phys);
- void (*fn)(phys_addr_t params_pa, void *finalize_fn_va);
+ typeof(__pkvm_init_switch_pgd) *fn;
int ret;
BUG_ON(kvm_check_pvm_sysreg_table());
@@ -340,7 +343,7 @@ int __pkvm_init(phys_addr_t phys, unsigned long size, unsigned long nr_cpus,
/* Jump in the idmap page to switch to the new page-tables */
params = this_cpu_ptr(&kvm_init_params);
fn = (typeof(fn))__hyp_pa(__pkvm_init_switch_pgd);
- fn(__hyp_pa(params), __pkvm_init_finalise);
+ fn(params->pgd_pa, params->stack_hyp_va, __pkvm_init_finalise);
unreachable();
}
diff --git a/arch/arm64/kvm/hyp/nvhe/stacktrace.c b/arch/arm64/kvm/hyp/nvhe/stacktrace.c
index ed6b58b19cfa..5b6eeab1a774 100644
--- a/arch/arm64/kvm/hyp/nvhe/stacktrace.c
+++ b/arch/arm64/kvm/hyp/nvhe/stacktrace.c
@@ -28,7 +28,7 @@ static void hyp_prepare_backtrace(unsigned long fp, unsigned long pc)
struct kvm_nvhe_stacktrace_info *stacktrace_info = this_cpu_ptr(&kvm_stacktrace_info);
struct kvm_nvhe_init_params *params = this_cpu_ptr(&kvm_init_params);
- stacktrace_info->stack_base = (unsigned long)(params->stack_hyp_va - PAGE_SIZE);
+ stacktrace_info->stack_base = (unsigned long)(params->stack_hyp_va - NVHE_STACK_SIZE);
stacktrace_info->overflow_stack_base = (unsigned long)this_cpu_ptr(overflow_stack);
stacktrace_info->fp = fp;
stacktrace_info->pc = pc;
@@ -54,7 +54,7 @@ static struct stack_info stackinfo_get_hyp(void)
{
struct kvm_nvhe_init_params *params = this_cpu_ptr(&kvm_init_params);
unsigned long high = params->stack_hyp_va;
- unsigned long low = high - PAGE_SIZE;
+ unsigned long low = high - NVHE_STACK_SIZE;
return (struct stack_info) {
.low = low,
diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c
index c50f8459e4fc..7d2ba6ef0261 100644
--- a/arch/arm64/kvm/hyp/nvhe/switch.c
+++ b/arch/arm64/kvm/hyp/nvhe/switch.c
@@ -26,7 +26,6 @@
#include <asm/debug-monitors.h>
#include <asm/processor.h>
-#include <nvhe/fixed_config.h>
#include <nvhe/mem_protect.h>
/* Non-VHE specific context */
@@ -36,34 +35,71 @@ DEFINE_PER_CPU(unsigned long, kvm_hyp_vector);
extern void kvm_nvhe_prepare_backtrace(unsigned long fp, unsigned long pc);
-static void __activate_traps(struct kvm_vcpu *vcpu)
+static void __activate_cptr_traps(struct kvm_vcpu *vcpu)
{
- u64 val;
+ u64 val = CPTR_EL2_TAM; /* Same bit irrespective of E2H */
- ___activate_traps(vcpu);
- __activate_traps_common(vcpu);
+ if (!guest_owns_fp_regs())
+ __activate_traps_fpsimd32(vcpu);
- val = vcpu->arch.cptr_el2;
- val |= CPTR_EL2_TAM; /* Same bit irrespective of E2H */
- val |= has_hvhe() ? CPACR_EL1_TTA : CPTR_EL2_TTA;
- if (cpus_have_final_cap(ARM64_SME)) {
- if (has_hvhe())
- val &= ~(CPACR_EL1_SMEN_EL1EN | CPACR_EL1_SMEN_EL0EN);
- else
- val |= CPTR_EL2_TSM;
+ if (has_hvhe()) {
+ val |= CPACR_EL1_TTA;
+
+ if (guest_owns_fp_regs()) {
+ val |= CPACR_EL1_FPEN;
+ if (vcpu_has_sve(vcpu))
+ val |= CPACR_EL1_ZEN;
+ }
+
+ write_sysreg(val, cpacr_el1);
+ } else {
+ val |= CPTR_EL2_TTA | CPTR_NVHE_EL2_RES1;
+
+ /*
+ * Always trap SME since it's not supported in KVM.
+ * TSM is RES1 if SME isn't implemented.
+ */
+ val |= CPTR_EL2_TSM;
+
+ if (!vcpu_has_sve(vcpu) || !guest_owns_fp_regs())
+ val |= CPTR_EL2_TZ;
+
+ if (!guest_owns_fp_regs())
+ val |= CPTR_EL2_TFP;
+
+ write_sysreg(val, cptr_el2);
}
+}
+
+static void __deactivate_cptr_traps(struct kvm_vcpu *vcpu)
+{
+ if (has_hvhe()) {
+ u64 val = CPACR_EL1_FPEN;
- if (!guest_owns_fp_regs(vcpu)) {
- if (has_hvhe())
- val &= ~(CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN |
- CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN);
- else
- val |= CPTR_EL2_TFP | CPTR_EL2_TZ;
+ if (cpus_have_final_cap(ARM64_SVE))
+ val |= CPACR_EL1_ZEN;
+ if (cpus_have_final_cap(ARM64_SME))
+ val |= CPACR_EL1_SMEN;
- __activate_traps_fpsimd32(vcpu);
+ write_sysreg(val, cpacr_el1);
+ } else {
+ u64 val = CPTR_NVHE_EL2_RES1;
+
+ if (!cpus_have_final_cap(ARM64_SVE))
+ val |= CPTR_EL2_TZ;
+ if (!cpus_have_final_cap(ARM64_SME))
+ val |= CPTR_EL2_TSM;
+
+ write_sysreg(val, cptr_el2);
}
+}
+
+static void __activate_traps(struct kvm_vcpu *vcpu)
+{
+ ___activate_traps(vcpu, vcpu->arch.hcr_el2);
+ __activate_traps_common(vcpu);
+ __activate_cptr_traps(vcpu);
- kvm_write_cptr_el2(val);
write_sysreg(__this_cpu_read(kvm_hyp_vector), vbar_el2);
if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) {
@@ -108,7 +144,7 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu)
write_sysreg(this_cpu_ptr(&kvm_init_params)->hcr_el2, hcr_el2);
- kvm_reset_cptr_el2(vcpu);
+ __deactivate_cptr_traps(vcpu);
write_sysreg(__kvm_hyp_host_vector, vbar_el2);
}
@@ -174,9 +210,8 @@ static void __pmu_switch_to_host(struct kvm_vcpu *vcpu)
static bool kvm_handle_pvm_sys64(struct kvm_vcpu *vcpu, u64 *exit_code)
{
/*
- * Make sure we handle the exit for workarounds and ptrauth
- * before the pKVM handling, as the latter could decide to
- * UNDEF.
+ * Make sure we handle the exit for workarounds before the pKVM
+ * handling, as the latter could decide to UNDEF.
*/
return (kvm_hyp_handle_sysreg(vcpu, exit_code) ||
kvm_handle_pvm_sysreg(vcpu, exit_code));
@@ -191,7 +226,6 @@ static const exit_handler_fn hyp_exit_handlers[] = {
[ESR_ELx_EC_IABT_LOW] = kvm_hyp_handle_iabt_low,
[ESR_ELx_EC_DABT_LOW] = kvm_hyp_handle_dabt_low,
[ESR_ELx_EC_WATCHPT_LOW] = kvm_hyp_handle_watchpt_low,
- [ESR_ELx_EC_PAC] = kvm_hyp_handle_ptrauth,
[ESR_ELx_EC_MOPS] = kvm_hyp_handle_mops,
};
@@ -203,34 +237,33 @@ static const exit_handler_fn pvm_exit_handlers[] = {
[ESR_ELx_EC_IABT_LOW] = kvm_hyp_handle_iabt_low,
[ESR_ELx_EC_DABT_LOW] = kvm_hyp_handle_dabt_low,
[ESR_ELx_EC_WATCHPT_LOW] = kvm_hyp_handle_watchpt_low,
- [ESR_ELx_EC_PAC] = kvm_hyp_handle_ptrauth,
[ESR_ELx_EC_MOPS] = kvm_hyp_handle_mops,
};
static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm_vcpu *vcpu)
{
- if (unlikely(kvm_vm_is_protected(kern_hyp_va(vcpu->kvm))))
+ if (unlikely(vcpu_is_protected(vcpu)))
return pvm_exit_handlers;
return hyp_exit_handlers;
}
-/*
- * Some guests (e.g., protected VMs) are not be allowed to run in AArch32.
- * The ARMv8 architecture does not give the hypervisor a mechanism to prevent a
- * guest from dropping to AArch32 EL0 if implemented by the CPU. If the
- * hypervisor spots a guest in such a state ensure it is handled, and don't
- * trust the host to spot or fix it. The check below is based on the one in
- * kvm_arch_vcpu_ioctl_run().
- *
- * Returns false if the guest ran in AArch32 when it shouldn't have, and
- * thus should exit to the host, or true if a the guest run loop can continue.
- */
-static void early_exit_filter(struct kvm_vcpu *vcpu, u64 *exit_code)
+static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
{
- struct kvm *kvm = kern_hyp_va(vcpu->kvm);
+ const exit_handler_fn *handlers = kvm_get_exit_handler_array(vcpu);
+
+ synchronize_vcpu_pstate(vcpu, exit_code);
- if (kvm_vm_is_protected(kvm) && vcpu_mode_is_32bit(vcpu)) {
+ /*
+ * Some guests (e.g., protected VMs) are not be allowed to run in
+ * AArch32. The ARMv8 architecture does not give the hypervisor a
+ * mechanism to prevent a guest from dropping to AArch32 EL0 if
+ * implemented by the CPU. If the hypervisor spots a guest in such a
+ * state ensure it is handled, and don't trust the host to spot or fix
+ * it. The check below is based on the one in
+ * kvm_arch_vcpu_ioctl_run().
+ */
+ if (unlikely(vcpu_is_protected(vcpu) && vcpu_mode_is_32bit(vcpu))) {
/*
* As we have caught the guest red-handed, decide that it isn't
* fit for purpose anymore by making the vcpu invalid. The VMM
@@ -242,6 +275,8 @@ static void early_exit_filter(struct kvm_vcpu *vcpu, u64 *exit_code)
*exit_code &= BIT(ARM_EXIT_WITH_SERROR_BIT);
*exit_code |= ARM_EXCEPTION_IL;
}
+
+ return __fixup_guest_exit(vcpu, exit_code, handlers);
}
/* Switch to the guest for legacy non-VHE systems */
@@ -264,7 +299,7 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu)
pmr_sync();
}
- host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
+ host_ctxt = host_data_ptr(host_ctxt);
host_ctxt->__hyp_running_vcpu = vcpu;
guest_ctxt = &vcpu->arch.ctxt;
@@ -337,7 +372,7 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu)
__sysreg_restore_state_nvhe(host_ctxt);
- if (vcpu->arch.fp_state == FP_STATE_GUEST_OWNED)
+ if (guest_owns_fp_regs())
__fpsimd_save_fpexc32(vcpu);
__debug_switch_to_host(vcpu);
@@ -367,7 +402,7 @@ asmlinkage void __noreturn hyp_panic(void)
struct kvm_cpu_context *host_ctxt;
struct kvm_vcpu *vcpu;
- host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
+ host_ctxt = host_data_ptr(host_ctxt);
vcpu = host_ctxt->__hyp_running_vcpu;
if (vcpu) {
diff --git a/arch/arm64/kvm/hyp/nvhe/sys_regs.c b/arch/arm64/kvm/hyp/nvhe/sys_regs.c
index edd969a1f36b..1ddd9ed3cbb3 100644
--- a/arch/arm64/kvm/hyp/nvhe/sys_regs.c
+++ b/arch/arm64/kvm/hyp/nvhe/sys_regs.c
@@ -11,7 +11,7 @@
#include <hyp/adjust_pc.h>
-#include <nvhe/fixed_config.h>
+#include <nvhe/pkvm.h>
#include "../../sys_regs.h"
@@ -28,222 +28,255 @@ u64 id_aa64mmfr1_el1_sys_val;
u64 id_aa64mmfr2_el1_sys_val;
u64 id_aa64smfr0_el1_sys_val;
-/*
- * Inject an unknown/undefined exception to an AArch64 guest while most of its
- * sysregs are live.
- */
-static void inject_undef64(struct kvm_vcpu *vcpu)
-{
- u64 esr = (ESR_ELx_EC_UNKNOWN << ESR_ELx_EC_SHIFT);
-
- *vcpu_pc(vcpu) = read_sysreg_el2(SYS_ELR);
- *vcpu_cpsr(vcpu) = read_sysreg_el2(SYS_SPSR);
-
- kvm_pend_exception(vcpu, EXCEPT_AA64_EL1_SYNC);
-
- __kvm_adjust_pc(vcpu);
-
- write_sysreg_el1(esr, SYS_ESR);
- write_sysreg_el1(read_sysreg_el2(SYS_ELR), SYS_ELR);
- write_sysreg_el2(*vcpu_pc(vcpu), SYS_ELR);
- write_sysreg_el2(*vcpu_cpsr(vcpu), SYS_SPSR);
-}
-
-/*
- * Returns the restricted features values of the feature register based on the
- * limitations in restrict_fields.
- * A feature id field value of 0b0000 does not impose any restrictions.
- * Note: Use only for unsigned feature field values.
- */
-static u64 get_restricted_features_unsigned(u64 sys_reg_val,
- u64 restrict_fields)
-{
- u64 value = 0UL;
- u64 mask = GENMASK_ULL(ARM64_FEATURE_FIELD_BITS - 1, 0);
+struct pvm_ftr_bits {
+ bool sign;
+ u8 shift;
+ u8 width;
+ u8 max_val;
+ bool (*vm_supported)(const struct kvm *kvm);
+};
- /*
- * According to the Arm Architecture Reference Manual, feature fields
- * use increasing values to indicate increases in functionality.
- * Iterate over the restricted feature fields and calculate the minimum
- * unsigned value between the one supported by the system, and what the
- * value is being restricted to.
- */
- while (sys_reg_val && restrict_fields) {
- value |= min(sys_reg_val & mask, restrict_fields & mask);
- sys_reg_val &= ~mask;
- restrict_fields &= ~mask;
- mask <<= ARM64_FEATURE_FIELD_BITS;
+#define __MAX_FEAT_FUNC(id, fld, max, func, sgn) \
+ { \
+ .sign = sgn, \
+ .shift = id##_##fld##_SHIFT, \
+ .width = id##_##fld##_WIDTH, \
+ .max_val = id##_##fld##_##max, \
+ .vm_supported = func, \
}
- return value;
-}
-
-/*
- * Functions that return the value of feature id registers for protected VMs
- * based on allowed features, system features, and KVM support.
- */
+#define MAX_FEAT_FUNC(id, fld, max, func) \
+ __MAX_FEAT_FUNC(id, fld, max, func, id##_##fld##_SIGNED)
-static u64 get_pvm_id_aa64pfr0(const struct kvm_vcpu *vcpu)
-{
- u64 set_mask = 0;
- u64 allow_mask = PVM_ID_AA64PFR0_ALLOW;
+#define MAX_FEAT(id, fld, max) \
+ MAX_FEAT_FUNC(id, fld, max, NULL)
- set_mask |= get_restricted_features_unsigned(id_aa64pfr0_el1_sys_val,
- PVM_ID_AA64PFR0_RESTRICT_UNSIGNED);
+#define MAX_FEAT_ENUM(id, fld, max) \
+ __MAX_FEAT_FUNC(id, fld, max, NULL, false)
- return (id_aa64pfr0_el1_sys_val & allow_mask) | set_mask;
-}
+#define FEAT_END { .width = 0, }
-static u64 get_pvm_id_aa64pfr1(const struct kvm_vcpu *vcpu)
+static bool vm_has_ptrauth(const struct kvm *kvm)
{
- const struct kvm *kvm = (const struct kvm *)kern_hyp_va(vcpu->kvm);
- u64 allow_mask = PVM_ID_AA64PFR1_ALLOW;
-
- if (!kvm_has_mte(kvm))
- allow_mask &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MTE);
-
- return id_aa64pfr1_el1_sys_val & allow_mask;
-}
-
-static u64 get_pvm_id_aa64zfr0(const struct kvm_vcpu *vcpu)
-{
- /*
- * No support for Scalable Vectors, therefore, hyp has no sanitized
- * copy of the feature id register.
- */
- BUILD_BUG_ON(PVM_ID_AA64ZFR0_ALLOW != 0ULL);
- return 0;
-}
-
-static u64 get_pvm_id_aa64dfr0(const struct kvm_vcpu *vcpu)
-{
- /*
- * No support for debug, including breakpoints, and watchpoints,
- * therefore, pKVM has no sanitized copy of the feature id register.
- */
- BUILD_BUG_ON(PVM_ID_AA64DFR0_ALLOW != 0ULL);
- return 0;
-}
-
-static u64 get_pvm_id_aa64dfr1(const struct kvm_vcpu *vcpu)
-{
- /*
- * No support for debug, therefore, hyp has no sanitized copy of the
- * feature id register.
- */
- BUILD_BUG_ON(PVM_ID_AA64DFR1_ALLOW != 0ULL);
- return 0;
-}
+ if (!IS_ENABLED(CONFIG_ARM64_PTR_AUTH))
+ return false;
-static u64 get_pvm_id_aa64afr0(const struct kvm_vcpu *vcpu)
-{
- /*
- * No support for implementation defined features, therefore, hyp has no
- * sanitized copy of the feature id register.
- */
- BUILD_BUG_ON(PVM_ID_AA64AFR0_ALLOW != 0ULL);
- return 0;
+ return (cpus_have_final_cap(ARM64_HAS_ADDRESS_AUTH) ||
+ cpus_have_final_cap(ARM64_HAS_GENERIC_AUTH)) &&
+ kvm_vcpu_has_feature(kvm, KVM_ARM_VCPU_PTRAUTH_GENERIC);
}
-static u64 get_pvm_id_aa64afr1(const struct kvm_vcpu *vcpu)
+static bool vm_has_sve(const struct kvm *kvm)
{
- /*
- * No support for implementation defined features, therefore, hyp has no
- * sanitized copy of the feature id register.
- */
- BUILD_BUG_ON(PVM_ID_AA64AFR1_ALLOW != 0ULL);
- return 0;
+ return system_supports_sve() && kvm_vcpu_has_feature(kvm, KVM_ARM_VCPU_SVE);
}
-static u64 get_pvm_id_aa64isar0(const struct kvm_vcpu *vcpu)
-{
- return id_aa64isar0_el1_sys_val & PVM_ID_AA64ISAR0_ALLOW;
-}
+/*
+ * Definitions for features to be allowed or restricted for protected guests.
+ *
+ * Each field in the masks represents the highest supported value for the
+ * feature. If a feature field is not present, it is not supported. Moreover,
+ * these are used to generate the guest's view of the feature registers.
+ *
+ * The approach for protected VMs is to at least support features that are:
+ * - Needed by common Linux distributions (e.g., floating point)
+ * - Trivial to support, e.g., supporting the feature does not introduce or
+ * require tracking of additional state in KVM
+ * - Cannot be trapped or prevent the guest from using anyway
+ */
-static u64 get_pvm_id_aa64isar1(const struct kvm_vcpu *vcpu)
-{
- u64 allow_mask = PVM_ID_AA64ISAR1_ALLOW;
+static const struct pvm_ftr_bits pvmid_aa64pfr0[] = {
+ MAX_FEAT(ID_AA64PFR0_EL1, EL0, IMP),
+ MAX_FEAT(ID_AA64PFR0_EL1, EL1, IMP),
+ MAX_FEAT(ID_AA64PFR0_EL1, EL2, IMP),
+ MAX_FEAT(ID_AA64PFR0_EL1, EL3, IMP),
+ MAX_FEAT(ID_AA64PFR0_EL1, FP, FP16),
+ MAX_FEAT(ID_AA64PFR0_EL1, AdvSIMD, FP16),
+ MAX_FEAT(ID_AA64PFR0_EL1, GIC, IMP),
+ MAX_FEAT_FUNC(ID_AA64PFR0_EL1, SVE, IMP, vm_has_sve),
+ MAX_FEAT(ID_AA64PFR0_EL1, RAS, IMP),
+ MAX_FEAT(ID_AA64PFR0_EL1, DIT, IMP),
+ MAX_FEAT(ID_AA64PFR0_EL1, CSV2, IMP),
+ MAX_FEAT(ID_AA64PFR0_EL1, CSV3, IMP),
+ FEAT_END
+};
- if (!vcpu_has_ptrauth(vcpu))
- allow_mask &= ~(ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_APA) |
- ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_API) |
- ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_GPA) |
- ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_GPI));
+static const struct pvm_ftr_bits pvmid_aa64pfr1[] = {
+ MAX_FEAT(ID_AA64PFR1_EL1, BT, IMP),
+ MAX_FEAT(ID_AA64PFR1_EL1, SSBS, SSBS2),
+ MAX_FEAT_ENUM(ID_AA64PFR1_EL1, MTE_frac, NI),
+ FEAT_END
+};
- return id_aa64isar1_el1_sys_val & allow_mask;
-}
+static const struct pvm_ftr_bits pvmid_aa64mmfr0[] = {
+ MAX_FEAT_ENUM(ID_AA64MMFR0_EL1, PARANGE, 40),
+ MAX_FEAT_ENUM(ID_AA64MMFR0_EL1, ASIDBITS, 16),
+ MAX_FEAT(ID_AA64MMFR0_EL1, BIGEND, IMP),
+ MAX_FEAT(ID_AA64MMFR0_EL1, SNSMEM, IMP),
+ MAX_FEAT(ID_AA64MMFR0_EL1, BIGENDEL0, IMP),
+ MAX_FEAT(ID_AA64MMFR0_EL1, EXS, IMP),
+ FEAT_END
+};
-static u64 get_pvm_id_aa64isar2(const struct kvm_vcpu *vcpu)
-{
- u64 allow_mask = PVM_ID_AA64ISAR2_ALLOW;
+static const struct pvm_ftr_bits pvmid_aa64mmfr1[] = {
+ MAX_FEAT(ID_AA64MMFR1_EL1, HAFDBS, DBM),
+ MAX_FEAT_ENUM(ID_AA64MMFR1_EL1, VMIDBits, 16),
+ MAX_FEAT(ID_AA64MMFR1_EL1, HPDS, HPDS2),
+ MAX_FEAT(ID_AA64MMFR1_EL1, PAN, PAN3),
+ MAX_FEAT(ID_AA64MMFR1_EL1, SpecSEI, IMP),
+ MAX_FEAT(ID_AA64MMFR1_EL1, ETS, IMP),
+ MAX_FEAT(ID_AA64MMFR1_EL1, CMOW, IMP),
+ FEAT_END
+};
- if (!vcpu_has_ptrauth(vcpu))
- allow_mask &= ~(ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_APA3) |
- ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_GPA3));
+static const struct pvm_ftr_bits pvmid_aa64mmfr2[] = {
+ MAX_FEAT(ID_AA64MMFR2_EL1, CnP, IMP),
+ MAX_FEAT(ID_AA64MMFR2_EL1, UAO, IMP),
+ MAX_FEAT(ID_AA64MMFR2_EL1, IESB, IMP),
+ MAX_FEAT(ID_AA64MMFR2_EL1, AT, IMP),
+ MAX_FEAT_ENUM(ID_AA64MMFR2_EL1, IDS, 0x18),
+ MAX_FEAT(ID_AA64MMFR2_EL1, TTL, IMP),
+ MAX_FEAT(ID_AA64MMFR2_EL1, BBM, 2),
+ MAX_FEAT(ID_AA64MMFR2_EL1, E0PD, IMP),
+ FEAT_END
+};
- return id_aa64isar2_el1_sys_val & allow_mask;
-}
+static const struct pvm_ftr_bits pvmid_aa64isar1[] = {
+ MAX_FEAT(ID_AA64ISAR1_EL1, DPB, DPB2),
+ MAX_FEAT_FUNC(ID_AA64ISAR1_EL1, APA, PAuth, vm_has_ptrauth),
+ MAX_FEAT_FUNC(ID_AA64ISAR1_EL1, API, PAuth, vm_has_ptrauth),
+ MAX_FEAT(ID_AA64ISAR1_EL1, JSCVT, IMP),
+ MAX_FEAT(ID_AA64ISAR1_EL1, FCMA, IMP),
+ MAX_FEAT(ID_AA64ISAR1_EL1, LRCPC, LRCPC3),
+ MAX_FEAT(ID_AA64ISAR1_EL1, GPA, IMP),
+ MAX_FEAT(ID_AA64ISAR1_EL1, GPI, IMP),
+ MAX_FEAT(ID_AA64ISAR1_EL1, FRINTTS, IMP),
+ MAX_FEAT(ID_AA64ISAR1_EL1, SB, IMP),
+ MAX_FEAT(ID_AA64ISAR1_EL1, SPECRES, COSP_RCTX),
+ MAX_FEAT(ID_AA64ISAR1_EL1, BF16, EBF16),
+ MAX_FEAT(ID_AA64ISAR1_EL1, DGH, IMP),
+ MAX_FEAT(ID_AA64ISAR1_EL1, I8MM, IMP),
+ FEAT_END
+};
-static u64 get_pvm_id_aa64mmfr0(const struct kvm_vcpu *vcpu)
-{
- u64 set_mask;
+static const struct pvm_ftr_bits pvmid_aa64isar2[] = {
+ MAX_FEAT_FUNC(ID_AA64ISAR2_EL1, GPA3, IMP, vm_has_ptrauth),
+ MAX_FEAT_FUNC(ID_AA64ISAR2_EL1, APA3, PAuth, vm_has_ptrauth),
+ MAX_FEAT(ID_AA64ISAR2_EL1, ATS1A, IMP),
+ FEAT_END
+};
- set_mask = get_restricted_features_unsigned(id_aa64mmfr0_el1_sys_val,
- PVM_ID_AA64MMFR0_RESTRICT_UNSIGNED);
+/*
+ * None of the features in ID_AA64DFR0_EL1 nor ID_AA64MMFR4_EL1 are supported.
+ * However, both have Not-Implemented values that are non-zero. Define them
+ * so they can be used when getting the value of these registers.
+ */
+#define ID_AA64DFR0_EL1_NONZERO_NI \
+( \
+ SYS_FIELD_PREP_ENUM(ID_AA64DFR0_EL1, DoubleLock, NI) | \
+ SYS_FIELD_PREP_ENUM(ID_AA64DFR0_EL1, MTPMU, NI) \
+)
- return (id_aa64mmfr0_el1_sys_val & PVM_ID_AA64MMFR0_ALLOW) | set_mask;
-}
+#define ID_AA64MMFR4_EL1_NONZERO_NI \
+ SYS_FIELD_PREP_ENUM(ID_AA64MMFR4_EL1, E2H0, NI)
-static u64 get_pvm_id_aa64mmfr1(const struct kvm_vcpu *vcpu)
+/*
+ * Returns the value of the feature registers based on the system register
+ * value, the vcpu support for the revelant features, and the additional
+ * restrictions for protected VMs.
+ */
+static u64 get_restricted_features(const struct kvm_vcpu *vcpu,
+ u64 sys_reg_val,
+ const struct pvm_ftr_bits restrictions[])
{
- return id_aa64mmfr1_el1_sys_val & PVM_ID_AA64MMFR1_ALLOW;
-}
+ u64 val = 0UL;
+ int i;
+
+ for (i = 0; restrictions[i].width != 0; i++) {
+ bool (*vm_supported)(const struct kvm *) = restrictions[i].vm_supported;
+ bool sign = restrictions[i].sign;
+ int shift = restrictions[i].shift;
+ int width = restrictions[i].width;
+ u64 min_signed = (1UL << width) - 1UL;
+ u64 sign_bit = 1UL << (width - 1);
+ u64 mask = GENMASK_ULL(width + shift - 1, shift);
+ u64 sys_val = (sys_reg_val & mask) >> shift;
+ u64 pvm_max = restrictions[i].max_val;
+
+ if (vm_supported && !vm_supported(vcpu->kvm))
+ val |= (sign ? min_signed : 0) << shift;
+ else if (sign && (sys_val >= sign_bit || pvm_max >= sign_bit))
+ val |= max(sys_val, pvm_max) << shift;
+ else
+ val |= min(sys_val, pvm_max) << shift;
+ }
-static u64 get_pvm_id_aa64mmfr2(const struct kvm_vcpu *vcpu)
-{
- return id_aa64mmfr2_el1_sys_val & PVM_ID_AA64MMFR2_ALLOW;
+ return val;
}
-/* Read a sanitized cpufeature ID register by its encoding */
-u64 pvm_read_id_reg(const struct kvm_vcpu *vcpu, u32 id)
+static u64 pvm_calc_id_reg(const struct kvm_vcpu *vcpu, u32 id)
{
switch (id) {
case SYS_ID_AA64PFR0_EL1:
- return get_pvm_id_aa64pfr0(vcpu);
+ return get_restricted_features(vcpu, id_aa64pfr0_el1_sys_val, pvmid_aa64pfr0);
case SYS_ID_AA64PFR1_EL1:
- return get_pvm_id_aa64pfr1(vcpu);
- case SYS_ID_AA64ZFR0_EL1:
- return get_pvm_id_aa64zfr0(vcpu);
- case SYS_ID_AA64DFR0_EL1:
- return get_pvm_id_aa64dfr0(vcpu);
- case SYS_ID_AA64DFR1_EL1:
- return get_pvm_id_aa64dfr1(vcpu);
- case SYS_ID_AA64AFR0_EL1:
- return get_pvm_id_aa64afr0(vcpu);
- case SYS_ID_AA64AFR1_EL1:
- return get_pvm_id_aa64afr1(vcpu);
+ return get_restricted_features(vcpu, id_aa64pfr1_el1_sys_val, pvmid_aa64pfr1);
case SYS_ID_AA64ISAR0_EL1:
- return get_pvm_id_aa64isar0(vcpu);
+ return id_aa64isar0_el1_sys_val;
case SYS_ID_AA64ISAR1_EL1:
- return get_pvm_id_aa64isar1(vcpu);
+ return get_restricted_features(vcpu, id_aa64isar1_el1_sys_val, pvmid_aa64isar1);
case SYS_ID_AA64ISAR2_EL1:
- return get_pvm_id_aa64isar2(vcpu);
+ return get_restricted_features(vcpu, id_aa64isar2_el1_sys_val, pvmid_aa64isar2);
case SYS_ID_AA64MMFR0_EL1:
- return get_pvm_id_aa64mmfr0(vcpu);
+ return get_restricted_features(vcpu, id_aa64mmfr0_el1_sys_val, pvmid_aa64mmfr0);
case SYS_ID_AA64MMFR1_EL1:
- return get_pvm_id_aa64mmfr1(vcpu);
+ return get_restricted_features(vcpu, id_aa64mmfr1_el1_sys_val, pvmid_aa64mmfr1);
case SYS_ID_AA64MMFR2_EL1:
- return get_pvm_id_aa64mmfr2(vcpu);
+ return get_restricted_features(vcpu, id_aa64mmfr2_el1_sys_val, pvmid_aa64mmfr2);
+ case SYS_ID_AA64DFR0_EL1:
+ return ID_AA64DFR0_EL1_NONZERO_NI;
+ case SYS_ID_AA64MMFR4_EL1:
+ return ID_AA64MMFR4_EL1_NONZERO_NI;
default:
/* Unhandled ID register, RAZ */
return 0;
}
}
+/*
+ * Inject an unknown/undefined exception to an AArch64 guest while most of its
+ * sysregs are live.
+ */
+static void inject_undef64(struct kvm_vcpu *vcpu)
+{
+ u64 esr = (ESR_ELx_EC_UNKNOWN << ESR_ELx_EC_SHIFT);
+
+ *vcpu_pc(vcpu) = read_sysreg_el2(SYS_ELR);
+ *vcpu_cpsr(vcpu) = read_sysreg_el2(SYS_SPSR);
+
+ kvm_pend_exception(vcpu, EXCEPT_AA64_EL1_SYNC);
+
+ __kvm_adjust_pc(vcpu);
+
+ write_sysreg_el1(esr, SYS_ESR);
+ write_sysreg_el1(read_sysreg_el2(SYS_ELR), SYS_ELR);
+ write_sysreg_el2(*vcpu_pc(vcpu), SYS_ELR);
+ write_sysreg_el2(*vcpu_cpsr(vcpu), SYS_SPSR);
+}
+
static u64 read_id_reg(const struct kvm_vcpu *vcpu,
struct sys_reg_desc const *r)
{
- return pvm_read_id_reg(vcpu, reg_to_encoding(r));
+ struct kvm *kvm = vcpu->kvm;
+ u32 reg = reg_to_encoding(r);
+
+ if (WARN_ON_ONCE(!test_bit(KVM_ARCH_FLAG_ID_REGS_INITIALIZED, &kvm->arch.flags)))
+ return 0;
+
+ if (reg >= sys_reg(3, 0, 0, 1, 0) && reg <= sys_reg(3, 0, 0, 7, 7))
+ return kvm->arch.id_regs[IDREG_IDX(reg)];
+
+ return 0;
}
/* Handler to RAZ/WI sysregs */
@@ -271,13 +304,6 @@ static bool pvm_access_id_aarch32(struct kvm_vcpu *vcpu,
return false;
}
- /*
- * No support for AArch32 guests, therefore, pKVM has no sanitized copy
- * of AArch32 feature id registers.
- */
- BUILD_BUG_ON(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL1),
- PVM_ID_AA64PFR0_RESTRICT_UNSIGNED) > ID_AA64PFR0_EL1_ELx_64BIT_ONLY);
-
return pvm_access_raz_wi(vcpu, p, r);
}
@@ -449,6 +475,30 @@ static const struct sys_reg_desc pvm_sys_reg_descs[] = {
};
/*
+ * Initializes feature registers for protected vms.
+ */
+void kvm_init_pvm_id_regs(struct kvm_vcpu *vcpu)
+{
+ struct kvm *kvm = vcpu->kvm;
+ struct kvm_arch *ka = &kvm->arch;
+ u32 r;
+
+ hyp_assert_lock_held(&vm_table_lock);
+
+ if (test_bit(KVM_ARCH_FLAG_ID_REGS_INITIALIZED, &kvm->arch.flags))
+ return;
+
+ /*
+ * Initialize only AArch64 id registers since AArch32 isn't supported
+ * for protected VMs.
+ */
+ for (r = sys_reg(3, 0, 0, 4, 0); r <= sys_reg(3, 0, 0, 7, 7); r += sys_reg(0, 0, 0, 0, 1))
+ ka->id_regs[IDREG_IDX(r)] = pvm_calc_id_reg(vcpu, r);
+
+ set_bit(KVM_ARCH_FLAG_ID_REGS_INITIALIZED, &kvm->arch.flags);
+}
+
+/*
* Checks that the sysreg table is unique and in-order.
*
* Returns 0 if the table is consistent, or 1 otherwise.
diff --git a/arch/arm64/kvm/hyp/nvhe/sysreg-sr.c b/arch/arm64/kvm/hyp/nvhe/sysreg-sr.c
index 29305022bc04..dba101565de3 100644
--- a/arch/arm64/kvm/hyp/nvhe/sysreg-sr.c
+++ b/arch/arm64/kvm/hyp/nvhe/sysreg-sr.c
@@ -28,7 +28,7 @@ void __sysreg_save_state_nvhe(struct kvm_cpu_context *ctxt)
void __sysreg_restore_state_nvhe(struct kvm_cpu_context *ctxt)
{
- __sysreg_restore_el1_state(ctxt);
+ __sysreg_restore_el1_state(ctxt, ctxt_sys_reg(ctxt, MPIDR_EL1));
__sysreg_restore_common_state(ctxt);
__sysreg_restore_user_state(ctxt);
__sysreg_restore_el2_return_state(ctxt);
diff --git a/arch/arm64/kvm/hyp/nvhe/timer-sr.c b/arch/arm64/kvm/hyp/nvhe/timer-sr.c
index 3aaab20ae5b4..ff176f4ce7de 100644
--- a/arch/arm64/kvm/hyp/nvhe/timer-sr.c
+++ b/arch/arm64/kvm/hyp/nvhe/timer-sr.c
@@ -22,15 +22,16 @@ void __kvm_timer_set_cntvoff(u64 cntvoff)
*/
void __timer_disable_traps(struct kvm_vcpu *vcpu)
{
- u64 val, shift = 0;
+ u64 set, clr, shift = 0;
if (has_hvhe())
shift = 10;
/* Allow physical timer/counter access for the host */
- val = read_sysreg(cnthctl_el2);
- val |= (CNTHCTL_EL1PCTEN | CNTHCTL_EL1PCEN) << shift;
- write_sysreg(val, cnthctl_el2);
+ set = (CNTHCTL_EL1PCTEN | CNTHCTL_EL1PCEN) << shift;
+ clr = CNTHCTL_EL1TVT | CNTHCTL_EL1TVCT;
+
+ sysreg_clear_set(cnthctl_el2, clr, set);
}
/*
@@ -58,5 +59,12 @@ void __timer_enable_traps(struct kvm_vcpu *vcpu)
set <<= 10;
}
+ /*
+ * Trap the virtual counter/timer if we have a broken cntvoff
+ * implementation.
+ */
+ if (has_broken_cntvoff())
+ set |= CNTHCTL_EL1TVT | CNTHCTL_EL1TVCT;
+
sysreg_clear_set(cnthctl_el2, clr, set);
}
diff --git a/arch/arm64/kvm/hyp/nvhe/tlb.c b/arch/arm64/kvm/hyp/nvhe/tlb.c
index a60fb13e2192..48da9ca9763f 100644
--- a/arch/arm64/kvm/hyp/nvhe/tlb.c
+++ b/arch/arm64/kvm/hyp/nvhe/tlb.c
@@ -11,13 +11,23 @@
#include <nvhe/mem_protect.h>
struct tlb_inv_context {
- u64 tcr;
+ struct kvm_s2_mmu *mmu;
+ u64 tcr;
+ u64 sctlr;
};
-static void __tlb_switch_to_guest(struct kvm_s2_mmu *mmu,
- struct tlb_inv_context *cxt,
- bool nsh)
+static void enter_vmid_context(struct kvm_s2_mmu *mmu,
+ struct tlb_inv_context *cxt,
+ bool nsh)
{
+ struct kvm_s2_mmu *host_s2_mmu = &host_mmu.arch.mmu;
+ struct kvm_cpu_context *host_ctxt;
+ struct kvm_vcpu *vcpu;
+
+ host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
+ vcpu = host_ctxt->__hyp_running_vcpu;
+ cxt->mmu = NULL;
+
/*
* We have two requirements:
*
@@ -40,20 +50,55 @@ static void __tlb_switch_to_guest(struct kvm_s2_mmu *mmu,
else
dsb(ish);
+ /*
+ * If we're already in the desired context, then there's nothing to do.
+ */
+ if (vcpu) {
+ /*
+ * We're in guest context. However, for this to work, this needs
+ * to be called from within __kvm_vcpu_run(), which ensures that
+ * __hyp_running_vcpu is set to the current guest vcpu.
+ */
+ if (mmu == vcpu->arch.hw_mmu || WARN_ON(mmu != host_s2_mmu))
+ return;
+
+ cxt->mmu = vcpu->arch.hw_mmu;
+ } else {
+ /* We're in host context. */
+ if (mmu == host_s2_mmu)
+ return;
+
+ cxt->mmu = host_s2_mmu;
+ }
+
if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) {
u64 val;
/*
* For CPUs that are affected by ARM 1319367, we need to
- * avoid a host Stage-1 walk while we have the guest's
- * VMID set in the VTTBR in order to invalidate TLBs.
- * We're guaranteed that the S1 MMU is enabled, so we can
- * simply set the EPD bits to avoid any further TLB fill.
+ * avoid a Stage-1 walk with the old VMID while we have
+ * the new VMID set in the VTTBR in order to invalidate TLBs.
+ * We're guaranteed that the host S1 MMU is enabled, so
+ * we can simply set the EPD bits to avoid any further
+ * TLB fill. For guests, we ensure that the S1 MMU is
+ * temporarily enabled in the next context.
*/
val = cxt->tcr = read_sysreg_el1(SYS_TCR);
val |= TCR_EPD1_MASK | TCR_EPD0_MASK;
write_sysreg_el1(val, SYS_TCR);
isb();
+
+ if (vcpu) {
+ val = cxt->sctlr = read_sysreg_el1(SYS_SCTLR);
+ if (!(val & SCTLR_ELx_M)) {
+ val |= SCTLR_ELx_M;
+ write_sysreg_el1(val, SYS_SCTLR);
+ isb();
+ }
+ } else {
+ /* The host S1 MMU is always enabled. */
+ cxt->sctlr = SCTLR_ELx_M;
+ }
}
/*
@@ -62,18 +107,40 @@ static void __tlb_switch_to_guest(struct kvm_s2_mmu *mmu,
* ensuring that we always have an ISB, but not two ISBs back
* to back.
*/
- __load_stage2(mmu, kern_hyp_va(mmu->arch));
+ if (vcpu)
+ __load_host_stage2();
+ else
+ __load_stage2(mmu, kern_hyp_va(mmu->arch));
+
asm(ALTERNATIVE("isb", "nop", ARM64_WORKAROUND_SPECULATIVE_AT));
}
-static void __tlb_switch_to_host(struct tlb_inv_context *cxt)
+static void exit_vmid_context(struct tlb_inv_context *cxt)
{
- __load_host_stage2();
+ struct kvm_s2_mmu *mmu = cxt->mmu;
+ struct kvm_cpu_context *host_ctxt;
+ struct kvm_vcpu *vcpu;
+
+ host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
+ vcpu = host_ctxt->__hyp_running_vcpu;
+
+ if (!mmu)
+ return;
+
+ if (vcpu)
+ __load_stage2(mmu, kern_hyp_va(mmu->arch));
+ else
+ __load_host_stage2();
+
+ /* Ensure write of the old VMID */
+ isb();
if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) {
- /* Ensure write of the host VMID */
- isb();
- /* Restore the host's TCR_EL1 */
+ if (!(cxt->sctlr & SCTLR_ELx_M)) {
+ write_sysreg_el1(cxt->sctlr, SYS_SCTLR);
+ isb();
+ }
+
write_sysreg_el1(cxt->tcr, SYS_TCR);
}
}
@@ -84,7 +151,7 @@ void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu,
struct tlb_inv_context cxt;
/* Switch to requested VMID */
- __tlb_switch_to_guest(mmu, &cxt, false);
+ enter_vmid_context(mmu, &cxt, false);
/*
* We could do so much better if we had the VA as well.
@@ -105,7 +172,7 @@ void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu,
dsb(ish);
isb();
- __tlb_switch_to_host(&cxt);
+ exit_vmid_context(&cxt);
}
void __kvm_tlb_flush_vmid_ipa_nsh(struct kvm_s2_mmu *mmu,
@@ -114,7 +181,7 @@ void __kvm_tlb_flush_vmid_ipa_nsh(struct kvm_s2_mmu *mmu,
struct tlb_inv_context cxt;
/* Switch to requested VMID */
- __tlb_switch_to_guest(mmu, &cxt, true);
+ enter_vmid_context(mmu, &cxt, true);
/*
* We could do so much better if we had the VA as well.
@@ -135,7 +202,7 @@ void __kvm_tlb_flush_vmid_ipa_nsh(struct kvm_s2_mmu *mmu,
dsb(nsh);
isb();
- __tlb_switch_to_host(&cxt);
+ exit_vmid_context(&cxt);
}
void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
@@ -152,16 +219,17 @@ void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
start = round_down(start, stride);
/* Switch to requested VMID */
- __tlb_switch_to_guest(mmu, &cxt, false);
+ enter_vmid_context(mmu, &cxt, false);
- __flush_s2_tlb_range_op(ipas2e1is, start, pages, stride, 0);
+ __flush_s2_tlb_range_op(ipas2e1is, start, pages, stride,
+ TLBI_TTL_UNKNOWN);
dsb(ish);
__tlbi(vmalle1is);
dsb(ish);
isb();
- __tlb_switch_to_host(&cxt);
+ exit_vmid_context(&cxt);
}
void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu)
@@ -169,13 +237,13 @@ void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu)
struct tlb_inv_context cxt;
/* Switch to requested VMID */
- __tlb_switch_to_guest(mmu, &cxt, false);
+ enter_vmid_context(mmu, &cxt, false);
__tlbi(vmalls12e1is);
dsb(ish);
isb();
- __tlb_switch_to_host(&cxt);
+ exit_vmid_context(&cxt);
}
void __kvm_flush_cpu_context(struct kvm_s2_mmu *mmu)
@@ -183,19 +251,19 @@ void __kvm_flush_cpu_context(struct kvm_s2_mmu *mmu)
struct tlb_inv_context cxt;
/* Switch to requested VMID */
- __tlb_switch_to_guest(mmu, &cxt, false);
+ enter_vmid_context(mmu, &cxt, false);
__tlbi(vmalle1);
asm volatile("ic iallu");
dsb(nsh);
isb();
- __tlb_switch_to_host(&cxt);
+ exit_vmid_context(&cxt);
}
void __kvm_flush_vm_context(void)
{
- /* Same remark as in __tlb_switch_to_guest() */
+ /* Same remark as in enter_vmid_context() */
dsb(ish);
__tlbi(alle1is);
dsb(ish);
diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index ab9d05fcf98b..df5cc74a7dd0 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -17,48 +17,6 @@
#define KVM_PTE_TYPE_PAGE 1
#define KVM_PTE_TYPE_TABLE 1
-#define KVM_PTE_LEAF_ATTR_LO GENMASK(11, 2)
-
-#define KVM_PTE_LEAF_ATTR_LO_S1_ATTRIDX GENMASK(4, 2)
-#define KVM_PTE_LEAF_ATTR_LO_S1_AP GENMASK(7, 6)
-#define KVM_PTE_LEAF_ATTR_LO_S1_AP_RO \
- ({ cpus_have_final_cap(ARM64_KVM_HVHE) ? 2 : 3; })
-#define KVM_PTE_LEAF_ATTR_LO_S1_AP_RW \
- ({ cpus_have_final_cap(ARM64_KVM_HVHE) ? 0 : 1; })
-#define KVM_PTE_LEAF_ATTR_LO_S1_SH GENMASK(9, 8)
-#define KVM_PTE_LEAF_ATTR_LO_S1_SH_IS 3
-#define KVM_PTE_LEAF_ATTR_LO_S1_AF BIT(10)
-
-#define KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR GENMASK(5, 2)
-#define KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R BIT(6)
-#define KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W BIT(7)
-#define KVM_PTE_LEAF_ATTR_LO_S2_SH GENMASK(9, 8)
-#define KVM_PTE_LEAF_ATTR_LO_S2_SH_IS 3
-#define KVM_PTE_LEAF_ATTR_LO_S2_AF BIT(10)
-
-#define KVM_PTE_LEAF_ATTR_HI GENMASK(63, 50)
-
-#define KVM_PTE_LEAF_ATTR_HI_SW GENMASK(58, 55)
-
-#define KVM_PTE_LEAF_ATTR_HI_S1_XN BIT(54)
-
-#define KVM_PTE_LEAF_ATTR_HI_S2_XN BIT(54)
-
-#define KVM_PTE_LEAF_ATTR_HI_S1_GP BIT(50)
-
-#define KVM_PTE_LEAF_ATTR_S2_PERMS (KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R | \
- KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W | \
- KVM_PTE_LEAF_ATTR_HI_S2_XN)
-
-#define KVM_INVALID_PTE_OWNER_MASK GENMASK(9, 2)
-#define KVM_MAX_OWNER_ID 1
-
-/*
- * Used to indicate a pte for which a 'break-before-make' sequence is in
- * progress.
- */
-#define KVM_INVALID_PTE_LOCKED BIT(10)
-
struct kvm_pgtable_walk_data {
struct kvm_pgtable_walker *walker;
@@ -77,14 +35,6 @@ static bool kvm_pgtable_walk_skip_cmo(const struct kvm_pgtable_visit_ctx *ctx)
return unlikely(ctx->flags & KVM_PGTABLE_WALK_SKIP_CMO);
}
-static bool kvm_phys_is_valid(u64 phys)
-{
- u64 parange_max = kvm_get_parange_max();
- u8 shift = id_aa64mmfr0_parange_to_phys_shift(parange_max);
-
- return phys < BIT(shift);
-}
-
static bool kvm_block_mapping_supported(const struct kvm_pgtable_visit_ctx *ctx, u64 phys)
{
u64 granule = kvm_granule_size(ctx->level);
@@ -95,7 +45,7 @@ static bool kvm_block_mapping_supported(const struct kvm_pgtable_visit_ctx *ctx,
if (granule > (ctx->end - ctx->addr))
return false;
- if (kvm_phys_is_valid(phys) && !IS_ALIGNED(phys, granule))
+ if (!IS_ALIGNED(phys, granule))
return false;
return IS_ALIGNED(ctx->addr, granule);
@@ -528,7 +478,7 @@ static int hyp_unmap_walker(const struct kvm_pgtable_visit_ctx *ctx,
kvm_clear_pte(ctx->ptep);
dsb(ishst);
- __tlbi_level(vae2is, __TLBI_VADDR(ctx->addr, 0), ctx->level);
+ __tlbi_level(vae2is, __TLBI_VADDR(ctx->addr, 0), TLBI_TTL_UNKNOWN);
} else {
if (ctx->end - ctx->addr < granule)
return -EINVAL;
@@ -629,6 +579,9 @@ struct stage2_map_data {
/* Force mappings to page granularity */
bool force_pte;
+
+ /* Walk should update owner_id only */
+ bool annotation;
};
u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift)
@@ -717,15 +670,29 @@ void kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
static int stage2_set_prot_attr(struct kvm_pgtable *pgt, enum kvm_pgtable_prot prot,
kvm_pte_t *ptep)
{
- bool device = prot & KVM_PGTABLE_PROT_DEVICE;
- kvm_pte_t attr = device ? KVM_S2_MEMATTR(pgt, DEVICE_nGnRE) :
- KVM_S2_MEMATTR(pgt, NORMAL);
+ kvm_pte_t attr;
u32 sh = KVM_PTE_LEAF_ATTR_LO_S2_SH_IS;
+ switch (prot & (KVM_PGTABLE_PROT_DEVICE |
+ KVM_PGTABLE_PROT_NORMAL_NC)) {
+ case KVM_PGTABLE_PROT_DEVICE | KVM_PGTABLE_PROT_NORMAL_NC:
+ return -EINVAL;
+ case KVM_PGTABLE_PROT_DEVICE:
+ if (prot & KVM_PGTABLE_PROT_X)
+ return -EINVAL;
+ attr = KVM_S2_MEMATTR(pgt, DEVICE_nGnRE);
+ break;
+ case KVM_PGTABLE_PROT_NORMAL_NC:
+ if (prot & KVM_PGTABLE_PROT_X)
+ return -EINVAL;
+ attr = KVM_S2_MEMATTR(pgt, NORMAL_NC);
+ break;
+ default:
+ attr = KVM_S2_MEMATTR(pgt, NORMAL);
+ }
+
if (!(prot & KVM_PGTABLE_PROT_X))
attr |= KVM_PTE_LEAF_ATTR_HI_S2_XN;
- else if (device)
- return -EINVAL;
if (prot & KVM_PGTABLE_PROT_R)
attr |= KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R;
@@ -829,12 +796,15 @@ static bool stage2_try_break_pte(const struct kvm_pgtable_visit_ctx *ctx,
* Perform the appropriate TLB invalidation based on the
* evicted pte value (if any).
*/
- if (kvm_pte_table(ctx->old, ctx->level))
- kvm_tlb_flush_vmid_range(mmu, ctx->addr,
- kvm_granule_size(ctx->level));
- else if (kvm_pte_valid(ctx->old))
+ if (kvm_pte_table(ctx->old, ctx->level)) {
+ u64 size = kvm_granule_size(ctx->level);
+ u64 addr = ALIGN_DOWN(ctx->addr, size);
+
+ kvm_tlb_flush_vmid_range(mmu, addr, size);
+ } else if (kvm_pte_valid(ctx->old)) {
kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu,
ctx->addr, ctx->level);
+ }
}
if (stage2_pte_is_counted(ctx->old))
@@ -882,9 +852,13 @@ static void stage2_unmap_put_pte(const struct kvm_pgtable_visit_ctx *ctx,
if (kvm_pte_valid(ctx->old)) {
kvm_clear_pte(ctx->ptep);
- if (!stage2_unmap_defer_tlb_flush(pgt))
- kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu,
- ctx->addr, ctx->level);
+ if (kvm_pte_table(ctx->old, ctx->level)) {
+ kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, ctx->addr,
+ TLBI_TTL_UNKNOWN);
+ } else if (!stage2_unmap_defer_tlb_flush(pgt)) {
+ kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, ctx->addr,
+ ctx->level);
+ }
}
mm_ops->put_page(ctx->ptep);
@@ -893,12 +867,12 @@ static void stage2_unmap_put_pte(const struct kvm_pgtable_visit_ctx *ctx,
static bool stage2_pte_cacheable(struct kvm_pgtable *pgt, kvm_pte_t pte)
{
u64 memattr = pte & KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR;
- return memattr == KVM_S2_MEMATTR(pgt, NORMAL);
+ return kvm_pte_valid(pte) && memattr == KVM_S2_MEMATTR(pgt, NORMAL);
}
static bool stage2_pte_executable(kvm_pte_t pte)
{
- return !(pte & KVM_PTE_LEAF_ATTR_HI_S2_XN);
+ return kvm_pte_valid(pte) && !(pte & KVM_PTE_LEAF_ATTR_HI_S2_XN);
}
static u64 stage2_map_walker_phys_addr(const struct kvm_pgtable_visit_ctx *ctx,
@@ -906,18 +880,7 @@ static u64 stage2_map_walker_phys_addr(const struct kvm_pgtable_visit_ctx *ctx,
{
u64 phys = data->phys;
- /*
- * Stage-2 walks to update ownership data are communicated to the map
- * walker using an invalid PA. Avoid offsetting an already invalid PA,
- * which could overflow and make the address valid again.
- */
- if (!kvm_phys_is_valid(phys))
- return phys;
-
- /*
- * Otherwise, work out the correct PA based on how far the walk has
- * gotten.
- */
+ /* Work out the correct PA based on how far the walk has gotten */
return phys + (ctx->addr - ctx->start);
}
@@ -929,6 +892,9 @@ static bool stage2_leaf_mapping_allowed(const struct kvm_pgtable_visit_ctx *ctx,
if (data->force_pte && ctx->level < KVM_PGTABLE_LAST_LEVEL)
return false;
+ if (data->annotation)
+ return true;
+
return kvm_block_mapping_supported(ctx, phys);
}
@@ -944,7 +910,7 @@ static int stage2_map_walker_try_leaf(const struct kvm_pgtable_visit_ctx *ctx,
if (!stage2_leaf_mapping_allowed(ctx, data))
return -E2BIG;
- if (kvm_phys_is_valid(phys))
+ if (!data->annotation)
new = kvm_init_valid_leaf_pte(phys, data->attr, ctx->level);
else
new = kvm_init_invalid_leaf_owner(data->owner_id);
@@ -958,6 +924,21 @@ static int stage2_map_walker_try_leaf(const struct kvm_pgtable_visit_ctx *ctx,
if (!stage2_pte_needs_update(ctx->old, new))
return -EAGAIN;
+ /* If we're only changing software bits, then store them and go! */
+ if (!kvm_pgtable_walk_shared(ctx) &&
+ !((ctx->old ^ new) & ~KVM_PTE_LEAF_ATTR_HI_SW)) {
+ bool old_is_counted = stage2_pte_is_counted(ctx->old);
+
+ if (old_is_counted != stage2_pte_is_counted(new)) {
+ if (old_is_counted)
+ mm_ops->put_page(ctx->ptep);
+ else
+ mm_ops->get_page(ctx->ptep);
+ }
+ WARN_ON_ONCE(!stage2_try_set_pte(ctx, new));
+ return 0;
+ }
+
if (!stage2_try_break_pte(ctx, data->mmu))
return -EAGAIN;
@@ -1091,11 +1072,11 @@ int kvm_pgtable_stage2_set_owner(struct kvm_pgtable *pgt, u64 addr, u64 size,
{
int ret;
struct stage2_map_data map_data = {
- .phys = KVM_PHYS_INVALID,
.mmu = pgt->mmu,
.memcache = mc,
.owner_id = owner_id,
.force_pte = true,
+ .annotation = true,
};
struct kvm_pgtable_walker walker = {
.cb = stage2_map_walker,
@@ -1251,19 +1232,15 @@ int kvm_pgtable_stage2_wrprotect(struct kvm_pgtable *pgt, u64 addr, u64 size)
NULL, NULL, 0);
}
-kvm_pte_t kvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr)
+void kvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr,
+ enum kvm_pgtable_walk_flags flags)
{
- kvm_pte_t pte = 0;
int ret;
ret = stage2_update_leaf_attrs(pgt, addr, 1, KVM_PTE_LEAF_ATTR_LO_S2_AF, 0,
- &pte, NULL,
- KVM_PGTABLE_WALK_HANDLE_FAULT |
- KVM_PGTABLE_WALK_SHARED);
+ NULL, NULL, flags);
if (!ret)
dsb(ishst);
-
- return pte;
}
struct stage2_age_data {
@@ -1317,7 +1294,7 @@ bool kvm_pgtable_stage2_test_clear_young(struct kvm_pgtable *pgt, u64 addr,
}
int kvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr,
- enum kvm_pgtable_prot prot)
+ enum kvm_pgtable_prot prot, enum kvm_pgtable_walk_flags flags)
{
int ret;
s8 level;
@@ -1335,9 +1312,7 @@ int kvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr,
if (prot & KVM_PGTABLE_PROT_X)
clr |= KVM_PTE_LEAF_ATTR_HI_S2_XN;
- ret = stage2_update_leaf_attrs(pgt, addr, 1, set, clr, NULL, &level,
- KVM_PGTABLE_WALK_HANDLE_FAULT |
- KVM_PGTABLE_WALK_SHARED);
+ ret = stage2_update_leaf_attrs(pgt, addr, 1, set, clr, NULL, &level, flags);
if (!ret || ret == -EAGAIN)
kvm_call_hyp(__kvm_tlb_flush_vmid_ipa_nsh, pgt->mmu, addr, level);
return ret;
@@ -1349,7 +1324,7 @@ static int stage2_flush_walker(const struct kvm_pgtable_visit_ctx *ctx,
struct kvm_pgtable *pgt = ctx->arg;
struct kvm_pgtable_mm_ops *mm_ops = pgt->mm_ops;
- if (!kvm_pte_valid(ctx->old) || !stage2_pte_cacheable(pgt, ctx->old))
+ if (!stage2_pte_cacheable(pgt, ctx->old))
return 0;
if (mm_ops->dcache_clean_inval_poc)
@@ -1511,7 +1486,6 @@ static int stage2_split_walker(const struct kvm_pgtable_visit_ctx *ctx,
*/
new = kvm_init_table_pte(childp, mm_ops);
stage2_make_pte(ctx, new);
- dsb(ishst);
return 0;
}
@@ -1523,8 +1497,11 @@ int kvm_pgtable_stage2_split(struct kvm_pgtable *pgt, u64 addr, u64 size,
.flags = KVM_PGTABLE_WALK_LEAF,
.arg = mc,
};
+ int ret;
- return kvm_pgtable_walk(pgt, addr, size, &walker);
+ ret = kvm_pgtable_walk(pgt, addr, size, &walker);
+ dsb(ishst);
+ return ret;
}
int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu,
diff --git a/arch/arm64/kvm/hyp/vgic-v3-sr.c b/arch/arm64/kvm/hyp/vgic-v3-sr.c
index 6cb638b184b1..3f9741e51d41 100644
--- a/arch/arm64/kvm/hyp/vgic-v3-sr.c
+++ b/arch/arm64/kvm/hyp/vgic-v3-sr.c
@@ -268,8 +268,16 @@ void __vgic_v3_activate_traps(struct vgic_v3_cpu_if *cpu_if)
* starting to mess with the rest of the GIC, and VMCR_EL2 in
* particular. This logic must be called before
* __vgic_v3_restore_state().
+ *
+ * However, if the vgic is disabled (ICH_HCR_EL2.EN==0), no GIC is
+ * provisioned at all. In order to prevent illegal accesses to the
+ * system registers to trap to EL1 (duh), force ICC_SRE_EL1.SRE to 1
+ * so that the trap bits can take effect. Yes, we *loves* the GIC.
*/
- if (!cpu_if->vgic_sre) {
+ if (!(cpu_if->vgic_hcr & ICH_HCR_EN)) {
+ write_gicreg(ICC_SRE_EL1_SRE, ICC_SRE_EL1);
+ isb();
+ } else if (!cpu_if->vgic_sre) {
write_gicreg(0, ICC_SRE_EL1);
isb();
write_gicreg(cpu_if->vgic_vmcr, ICH_VMCR_EL2);
@@ -288,8 +296,9 @@ void __vgic_v3_activate_traps(struct vgic_v3_cpu_if *cpu_if)
}
/*
- * Prevent the guest from touching the GIC system registers if
- * SRE isn't enabled for GICv3 emulation.
+ * Prevent the guest from touching the ICC_SRE_EL1 system
+ * register. Note that this may not have any effect, as
+ * ICC_SRE_EL2.Enable being RAO/WI is a valid implementation.
*/
write_gicreg(read_gicreg(ICC_SRE_EL2) & ~ICC_SRE_EL2_ENABLE,
ICC_SRE_EL2);
@@ -297,10 +306,11 @@ void __vgic_v3_activate_traps(struct vgic_v3_cpu_if *cpu_if)
/*
* If we need to trap system registers, we must write
* ICH_HCR_EL2 anyway, even if no interrupts are being
- * injected,
+ * injected. Note that this also applies if we don't expect
+ * any system register access (no vgic at all).
*/
if (static_branch_unlikely(&vgic_v3_cpuif_trap) ||
- cpu_if->its_vpe.its_vm)
+ cpu_if->its_vpe.its_vm || !cpu_if->vgic_sre)
write_gicreg(cpu_if->vgic_hcr, ICH_HCR_EL2);
}
@@ -326,11 +336,11 @@ void __vgic_v3_deactivate_traps(struct vgic_v3_cpu_if *cpu_if)
* no interrupts were being injected, and we disable it again here.
*/
if (static_branch_unlikely(&vgic_v3_cpuif_trap) ||
- cpu_if->its_vpe.its_vm)
+ cpu_if->its_vpe.its_vm || !cpu_if->vgic_sre)
write_gicreg(0, ICH_HCR_EL2);
}
-void __vgic_v3_save_aprs(struct vgic_v3_cpu_if *cpu_if)
+static void __vgic_v3_save_aprs(struct vgic_v3_cpu_if *cpu_if)
{
u64 val;
u32 nr_pre_bits;
@@ -363,7 +373,7 @@ void __vgic_v3_save_aprs(struct vgic_v3_cpu_if *cpu_if)
}
}
-void __vgic_v3_restore_aprs(struct vgic_v3_cpu_if *cpu_if)
+static void __vgic_v3_restore_aprs(struct vgic_v3_cpu_if *cpu_if)
{
u64 val;
u32 nr_pre_bits;
@@ -455,16 +465,35 @@ u64 __vgic_v3_get_gic_config(void)
return val;
}
-u64 __vgic_v3_read_vmcr(void)
+static u64 __vgic_v3_read_vmcr(void)
{
return read_gicreg(ICH_VMCR_EL2);
}
-void __vgic_v3_write_vmcr(u32 vmcr)
+static void __vgic_v3_write_vmcr(u32 vmcr)
{
write_gicreg(vmcr, ICH_VMCR_EL2);
}
+void __vgic_v3_save_vmcr_aprs(struct vgic_v3_cpu_if *cpu_if)
+{
+ __vgic_v3_save_aprs(cpu_if);
+ if (cpu_if->vgic_sre)
+ cpu_if->vgic_vmcr = __vgic_v3_read_vmcr();
+}
+
+void __vgic_v3_restore_vmcr_aprs(struct vgic_v3_cpu_if *cpu_if)
+{
+ /*
+ * If dealing with a GICv2 emulation on GICv3, VMCR_EL2.VFIQen
+ * is dependent on ICC_SRE_EL1.SRE, and we have to perform the
+ * VMCR_EL2 save/restore in the world switch.
+ */
+ if (cpu_if->vgic_sre)
+ __vgic_v3_write_vmcr(cpu_if->vgic_vmcr);
+ __vgic_v3_restore_aprs(cpu_if);
+}
+
static int __vgic_v3_bpr_min(void)
{
/* See Pseudocode for VPriorityGroup */
@@ -983,9 +1012,6 @@ static void __vgic_v3_read_ctlr(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
val = ((vtr >> 29) & 7) << ICC_CTLR_EL1_PRI_BITS_SHIFT;
/* IDbits */
val |= ((vtr >> 23) & 7) << ICC_CTLR_EL1_ID_BITS_SHIFT;
- /* SEIS */
- if (kvm_vgic_global_state.ich_vtr_el2 & ICH_VTR_SEIS_MASK)
- val |= BIT(ICC_CTLR_EL1_SEIS_SHIFT);
/* A3V */
val |= ((vtr >> 21) & 1) << ICC_CTLR_EL1_A3V_SHIFT;
/* EOImode */
@@ -1013,6 +1039,75 @@ static void __vgic_v3_write_ctlr(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
write_gicreg(vmcr, ICH_VMCR_EL2);
}
+static bool __vgic_v3_check_trap_forwarding(struct kvm_vcpu *vcpu,
+ u32 sysreg, bool is_read)
+{
+ u64 ich_hcr;
+
+ if (!vcpu_has_nv(vcpu) || is_hyp_ctxt(vcpu))
+ return false;
+
+ ich_hcr = __vcpu_sys_reg(vcpu, ICH_HCR_EL2);
+
+ switch (sysreg) {
+ case SYS_ICC_IGRPEN0_EL1:
+ if (is_read &&
+ (__vcpu_sys_reg(vcpu, HFGRTR_EL2) & HFGxTR_EL2_ICC_IGRPENn_EL1))
+ return true;
+
+ if (!is_read &&
+ (__vcpu_sys_reg(vcpu, HFGWTR_EL2) & HFGxTR_EL2_ICC_IGRPENn_EL1))
+ return true;
+
+ fallthrough;
+
+ case SYS_ICC_AP0Rn_EL1(0):
+ case SYS_ICC_AP0Rn_EL1(1):
+ case SYS_ICC_AP0Rn_EL1(2):
+ case SYS_ICC_AP0Rn_EL1(3):
+ case SYS_ICC_BPR0_EL1:
+ case SYS_ICC_EOIR0_EL1:
+ case SYS_ICC_HPPIR0_EL1:
+ case SYS_ICC_IAR0_EL1:
+ return ich_hcr & ICH_HCR_TALL0;
+
+ case SYS_ICC_IGRPEN1_EL1:
+ if (is_read &&
+ (__vcpu_sys_reg(vcpu, HFGRTR_EL2) & HFGxTR_EL2_ICC_IGRPENn_EL1))
+ return true;
+
+ if (!is_read &&
+ (__vcpu_sys_reg(vcpu, HFGWTR_EL2) & HFGxTR_EL2_ICC_IGRPENn_EL1))
+ return true;
+
+ fallthrough;
+
+ case SYS_ICC_AP1Rn_EL1(0):
+ case SYS_ICC_AP1Rn_EL1(1):
+ case SYS_ICC_AP1Rn_EL1(2):
+ case SYS_ICC_AP1Rn_EL1(3):
+ case SYS_ICC_BPR1_EL1:
+ case SYS_ICC_EOIR1_EL1:
+ case SYS_ICC_HPPIR1_EL1:
+ case SYS_ICC_IAR1_EL1:
+ return ich_hcr & ICH_HCR_TALL1;
+
+ case SYS_ICC_DIR_EL1:
+ if (ich_hcr & ICH_HCR_TDIR)
+ return true;
+
+ fallthrough;
+
+ case SYS_ICC_RPR_EL1:
+ case SYS_ICC_CTLR_EL1:
+ case SYS_ICC_PMR_EL1:
+ return ich_hcr & ICH_HCR_TC;
+
+ default:
+ return false;
+ }
+}
+
int __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu)
{
int rt;
@@ -1022,6 +1117,9 @@ int __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu)
bool is_read;
u32 sysreg;
+ if (kern_hyp_va(vcpu->kvm)->arch.vgic.vgic_model != KVM_DEV_TYPE_ARM_VGIC_V3)
+ return 0;
+
esr = kvm_vcpu_get_esr(vcpu);
if (vcpu_mode_is_32bit(vcpu)) {
if (!kvm_condition_valid(vcpu)) {
@@ -1036,6 +1134,9 @@ int __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu)
is_read = (esr & ESR_ELx_SYS64_ISS_DIR_MASK) == ESR_ELx_SYS64_ISS_DIR_READ;
+ if (__vgic_v3_check_trap_forwarding(vcpu, sysreg, is_read))
+ return 0;
+
switch (sysreg) {
case SYS_ICC_IAR0_EL1:
case SYS_ICC_IAR1_EL1:
diff --git a/arch/arm64/kvm/hyp/vhe/Makefile b/arch/arm64/kvm/hyp/vhe/Makefile
index 3b9e5464b5b3..afc4aed9231a 100644
--- a/arch/arm64/kvm/hyp/vhe/Makefile
+++ b/arch/arm64/kvm/hyp/vhe/Makefile
@@ -6,6 +6,8 @@
asflags-y := -D__KVM_VHE_HYPERVISOR__
ccflags-y := -D__KVM_VHE_HYPERVISOR__
+CFLAGS_switch.o += -Wno-override-init
+
obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o
obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \
../fpsimd.o ../hyp-entry.o ../exception.o
diff --git a/arch/arm64/kvm/hyp/vhe/debug-sr.c b/arch/arm64/kvm/hyp/vhe/debug-sr.c
index 289689b2682d..0100339b09e0 100644
--- a/arch/arm64/kvm/hyp/vhe/debug-sr.c
+++ b/arch/arm64/kvm/hyp/vhe/debug-sr.c
@@ -19,8 +19,3 @@ void __debug_switch_to_host(struct kvm_vcpu *vcpu)
{
__debug_switch_to_host_common(vcpu);
}
-
-u64 __kvm_get_mdcr_el2(void)
-{
- return read_sysreg(mdcr_el2);
-}
diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c
index 1581df6aec87..647737d6e8d0 100644
--- a/arch/arm64/kvm/hyp/vhe/switch.c
+++ b/arch/arm64/kvm/hyp/vhe/switch.c
@@ -33,11 +33,124 @@ DEFINE_PER_CPU(struct kvm_host_data, kvm_host_data);
DEFINE_PER_CPU(struct kvm_cpu_context, kvm_hyp_ctxt);
DEFINE_PER_CPU(unsigned long, kvm_hyp_vector);
+/*
+ * HCR_EL2 bits that the NV guest can freely change (no RES0/RES1
+ * semantics, irrespective of the configuration), but that cannot be
+ * applied to the actual HW as things would otherwise break badly.
+ *
+ * - TGE: we want the guest to use EL1, which is incompatible with
+ * this bit being set
+ *
+ * - API/APK: they are already accounted for by vcpu_load(), and can
+ * only take effect across a load/put cycle (such as ERET)
+ */
+#define NV_HCR_GUEST_EXCLUDE (HCR_TGE | HCR_API | HCR_APK)
+
+static u64 __compute_hcr(struct kvm_vcpu *vcpu)
+{
+ u64 hcr = vcpu->arch.hcr_el2;
+
+ if (!vcpu_has_nv(vcpu))
+ return hcr;
+
+ if (is_hyp_ctxt(vcpu)) {
+ hcr |= HCR_NV | HCR_NV2 | HCR_AT | HCR_TTLB;
+
+ if (!vcpu_el2_e2h_is_set(vcpu))
+ hcr |= HCR_NV1;
+
+ write_sysreg_s(vcpu->arch.ctxt.vncr_array, SYS_VNCR_EL2);
+ }
+
+ return hcr | (__vcpu_sys_reg(vcpu, HCR_EL2) & ~NV_HCR_GUEST_EXCLUDE);
+}
+
+static void __activate_cptr_traps(struct kvm_vcpu *vcpu)
+{
+ u64 cptr;
+
+ /*
+ * With VHE (HCR.E2H == 1), accesses to CPACR_EL1 are routed to
+ * CPTR_EL2. In general, CPACR_EL1 has the same layout as CPTR_EL2,
+ * except for some missing controls, such as TAM.
+ * In this case, CPTR_EL2.TAM has the same position with or without
+ * VHE (HCR.E2H == 1) which allows us to use here the CPTR_EL2.TAM
+ * shift value for trapping the AMU accesses.
+ */
+ u64 val = CPACR_EL1_TTA | CPTR_EL2_TAM;
+
+ if (guest_owns_fp_regs()) {
+ val |= CPACR_EL1_FPEN;
+ if (vcpu_has_sve(vcpu))
+ val |= CPACR_EL1_ZEN;
+ } else {
+ __activate_traps_fpsimd32(vcpu);
+ }
+
+ if (!vcpu_has_nv(vcpu))
+ goto write;
+
+ /*
+ * The architecture is a bit crap (what a surprise): an EL2 guest
+ * writing to CPTR_EL2 via CPACR_EL1 can't set any of TCPAC or TTA,
+ * as they are RES0 in the guest's view. To work around it, trap the
+ * sucker using the very same bit it can't set...
+ */
+ if (vcpu_el2_e2h_is_set(vcpu) && is_hyp_ctxt(vcpu))
+ val |= CPTR_EL2_TCPAC;
+
+ /*
+ * Layer the guest hypervisor's trap configuration on top of our own if
+ * we're in a nested context.
+ */
+ if (is_hyp_ctxt(vcpu))
+ goto write;
+
+ cptr = vcpu_sanitised_cptr_el2(vcpu);
+
+ /*
+ * Pay attention, there's some interesting detail here.
+ *
+ * The CPTR_EL2.xEN fields are 2 bits wide, although there are only two
+ * meaningful trap states when HCR_EL2.TGE = 0 (running a nested guest):
+ *
+ * - CPTR_EL2.xEN = x0, traps are enabled
+ * - CPTR_EL2.xEN = x1, traps are disabled
+ *
+ * In other words, bit[0] determines if guest accesses trap or not. In
+ * the interest of simplicity, clear the entire field if the guest
+ * hypervisor has traps enabled to dispel any illusion of something more
+ * complicated taking place.
+ */
+ if (!(SYS_FIELD_GET(CPACR_EL1, FPEN, cptr) & BIT(0)))
+ val &= ~CPACR_EL1_FPEN;
+ if (!(SYS_FIELD_GET(CPACR_EL1, ZEN, cptr) & BIT(0)))
+ val &= ~CPACR_EL1_ZEN;
+
+ if (kvm_has_feat(vcpu->kvm, ID_AA64MMFR3_EL1, S2POE, IMP))
+ val |= cptr & CPACR_EL1_E0POE;
+
+ val |= cptr & CPTR_EL2_TCPAC;
+
+write:
+ write_sysreg(val, cpacr_el1);
+}
+
+static void __deactivate_cptr_traps(struct kvm_vcpu *vcpu)
+{
+ u64 val = CPACR_EL1_FPEN | CPACR_EL1_ZEN_EL1EN;
+
+ if (cpus_have_final_cap(ARM64_SME))
+ val |= CPACR_EL1_SMEN_EL1EN;
+
+ write_sysreg(val, cpacr_el1);
+}
+
static void __activate_traps(struct kvm_vcpu *vcpu)
{
u64 val;
- ___activate_traps(vcpu);
+ ___activate_traps(vcpu, __compute_hcr(vcpu));
if (has_cntpoff()) {
struct timer_map map;
@@ -59,31 +172,7 @@ static void __activate_traps(struct kvm_vcpu *vcpu)
}
}
- val = read_sysreg(cpacr_el1);
- val |= CPACR_ELx_TTA;
- val &= ~(CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN |
- CPACR_EL1_SMEN_EL0EN | CPACR_EL1_SMEN_EL1EN);
-
- /*
- * With VHE (HCR.E2H == 1), accesses to CPACR_EL1 are routed to
- * CPTR_EL2. In general, CPACR_EL1 has the same layout as CPTR_EL2,
- * except for some missing controls, such as TAM.
- * In this case, CPTR_EL2.TAM has the same position with or without
- * VHE (HCR.E2H == 1) which allows us to use here the CPTR_EL2.TAM
- * shift value for trapping the AMU accesses.
- */
-
- val |= CPTR_EL2_TAM;
-
- if (guest_owns_fp_regs(vcpu)) {
- if (vcpu_has_sve(vcpu))
- val |= CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN;
- } else {
- val &= ~(CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN);
- __activate_traps_fpsimd32(vcpu);
- }
-
- write_sysreg(val, cpacr_el1);
+ __activate_cptr_traps(vcpu);
write_sysreg(__this_cpu_read(kvm_hyp_vector), vbar_el1);
}
@@ -128,7 +217,7 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu)
*/
asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_SPECULATIVE_AT));
- kvm_reset_cptr_el2(vcpu);
+ __deactivate_cptr_traps(vcpu);
if (!arm64_kernel_unmapped_at_el0())
host_vectors = __this_cpu_read(this_cpu_vector);
@@ -162,6 +251,8 @@ static void __vcpu_put_deactivate_traps(struct kvm_vcpu *vcpu)
void kvm_vcpu_load_vhe(struct kvm_vcpu *vcpu)
{
+ host_data_ptr(host_ctxt)->__hyp_running_vcpu = vcpu;
+
__vcpu_load_switch_sysregs(vcpu);
__vcpu_load_activate_traps(vcpu);
__load_stage2(vcpu->arch.hw_mmu, vcpu->arch.hw_mmu->arch);
@@ -171,33 +262,293 @@ void kvm_vcpu_put_vhe(struct kvm_vcpu *vcpu)
{
__vcpu_put_deactivate_traps(vcpu);
__vcpu_put_switch_sysregs(vcpu);
+
+ host_data_ptr(host_ctxt)->__hyp_running_vcpu = NULL;
+}
+
+static u64 compute_emulated_cntx_ctl_el0(struct kvm_vcpu *vcpu,
+ enum vcpu_sysreg reg)
+{
+ unsigned long ctl;
+ u64 cval, cnt;
+ bool stat;
+
+ switch (reg) {
+ case CNTP_CTL_EL0:
+ cval = __vcpu_sys_reg(vcpu, CNTP_CVAL_EL0);
+ ctl = __vcpu_sys_reg(vcpu, CNTP_CTL_EL0);
+ cnt = compute_counter_value(vcpu_ptimer(vcpu));
+ break;
+ case CNTV_CTL_EL0:
+ cval = __vcpu_sys_reg(vcpu, CNTV_CVAL_EL0);
+ ctl = __vcpu_sys_reg(vcpu, CNTV_CTL_EL0);
+ cnt = compute_counter_value(vcpu_vtimer(vcpu));
+ break;
+ default:
+ BUG();
+ }
+
+ stat = cval <= cnt;
+ __assign_bit(__ffs(ARCH_TIMER_CTRL_IT_STAT), &ctl, stat);
+
+ return ctl;
+}
+
+static bool kvm_hyp_handle_timer(struct kvm_vcpu *vcpu, u64 *exit_code)
+{
+ u64 esr, val;
+
+ /*
+ * Having FEAT_ECV allows for a better quality of timer emulation.
+ * However, this comes at a huge cost in terms of traps. Try and
+ * satisfy the reads from guest's hypervisor context without
+ * returning to the kernel if we can.
+ */
+ if (!is_hyp_ctxt(vcpu))
+ return false;
+
+ esr = kvm_vcpu_get_esr(vcpu);
+ if ((esr & ESR_ELx_SYS64_ISS_DIR_MASK) != ESR_ELx_SYS64_ISS_DIR_READ)
+ return false;
+
+ switch (esr_sys64_to_sysreg(esr)) {
+ case SYS_CNTP_CTL_EL02:
+ val = compute_emulated_cntx_ctl_el0(vcpu, CNTP_CTL_EL0);
+ break;
+ case SYS_CNTP_CTL_EL0:
+ if (vcpu_el2_e2h_is_set(vcpu))
+ val = read_sysreg_el0(SYS_CNTP_CTL);
+ else
+ val = compute_emulated_cntx_ctl_el0(vcpu, CNTP_CTL_EL0);
+ break;
+ case SYS_CNTP_CVAL_EL02:
+ val = __vcpu_sys_reg(vcpu, CNTP_CVAL_EL0);
+ break;
+ case SYS_CNTP_CVAL_EL0:
+ if (vcpu_el2_e2h_is_set(vcpu)) {
+ val = read_sysreg_el0(SYS_CNTP_CVAL);
+
+ if (!has_cntpoff())
+ val -= timer_get_offset(vcpu_hptimer(vcpu));
+ } else {
+ val = __vcpu_sys_reg(vcpu, CNTP_CVAL_EL0);
+ }
+ break;
+ case SYS_CNTPCT_EL0:
+ case SYS_CNTPCTSS_EL0:
+ val = compute_counter_value(vcpu_hptimer(vcpu));
+ break;
+ case SYS_CNTV_CTL_EL02:
+ val = compute_emulated_cntx_ctl_el0(vcpu, CNTV_CTL_EL0);
+ break;
+ case SYS_CNTV_CTL_EL0:
+ if (vcpu_el2_e2h_is_set(vcpu))
+ val = read_sysreg_el0(SYS_CNTV_CTL);
+ else
+ val = compute_emulated_cntx_ctl_el0(vcpu, CNTV_CTL_EL0);
+ break;
+ case SYS_CNTV_CVAL_EL02:
+ val = __vcpu_sys_reg(vcpu, CNTV_CVAL_EL0);
+ break;
+ case SYS_CNTV_CVAL_EL0:
+ if (vcpu_el2_e2h_is_set(vcpu))
+ val = read_sysreg_el0(SYS_CNTV_CVAL);
+ else
+ val = __vcpu_sys_reg(vcpu, CNTV_CVAL_EL0);
+ break;
+ case SYS_CNTVCT_EL0:
+ case SYS_CNTVCTSS_EL0:
+ val = compute_counter_value(vcpu_hvtimer(vcpu));
+ break;
+ default:
+ return false;
+ }
+
+ vcpu_set_reg(vcpu, kvm_vcpu_sys_get_rt(vcpu), val);
+ __kvm_skip_instr(vcpu);
+
+ return true;
+}
+
+static bool kvm_hyp_handle_eret(struct kvm_vcpu *vcpu, u64 *exit_code)
+{
+ u64 esr = kvm_vcpu_get_esr(vcpu);
+ u64 spsr, elr, mode;
+
+ /*
+ * Going through the whole put/load motions is a waste of time
+ * if this is a VHE guest hypervisor returning to its own
+ * userspace, or the hypervisor performing a local exception
+ * return. No need to save/restore registers, no need to
+ * switch S2 MMU. Just do the canonical ERET.
+ *
+ * Unless the trap has to be forwarded further down the line,
+ * of course...
+ */
+ if ((__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_NV) ||
+ (__vcpu_sys_reg(vcpu, HFGITR_EL2) & HFGITR_EL2_ERET))
+ return false;
+
+ spsr = read_sysreg_el1(SYS_SPSR);
+ mode = spsr & (PSR_MODE_MASK | PSR_MODE32_BIT);
+
+ switch (mode) {
+ case PSR_MODE_EL0t:
+ if (!(vcpu_el2_e2h_is_set(vcpu) && vcpu_el2_tge_is_set(vcpu)))
+ return false;
+ break;
+ case PSR_MODE_EL2t:
+ mode = PSR_MODE_EL1t;
+ break;
+ case PSR_MODE_EL2h:
+ mode = PSR_MODE_EL1h;
+ break;
+ default:
+ return false;
+ }
+
+ /* If ERETAx fails, take the slow path */
+ if (esr_iss_is_eretax(esr)) {
+ if (!(vcpu_has_ptrauth(vcpu) && kvm_auth_eretax(vcpu, &elr)))
+ return false;
+ } else {
+ elr = read_sysreg_el1(SYS_ELR);
+ }
+
+ spsr = (spsr & ~(PSR_MODE_MASK | PSR_MODE32_BIT)) | mode;
+
+ write_sysreg_el2(spsr, SYS_SPSR);
+ write_sysreg_el2(elr, SYS_ELR);
+
+ return true;
+}
+
+static bool kvm_hyp_handle_tlbi_el2(struct kvm_vcpu *vcpu, u64 *exit_code)
+{
+ int ret = -EINVAL;
+ u32 instr;
+ u64 val;
+
+ /*
+ * Ideally, we would never trap on EL2 S1 TLB invalidations using
+ * the EL1 instructions when the guest's HCR_EL2.{E2H,TGE}=={1,1}.
+ * But "thanks" to FEAT_NV2, we don't trap writes to HCR_EL2,
+ * meaning that we can't track changes to the virtual TGE bit. So we
+ * have to leave HCR_EL2.TTLB set on the host. Oopsie...
+ *
+ * Try and handle these invalidation as quickly as possible, without
+ * fully exiting. Note that we don't need to consider any forwarding
+ * here, as having E2H+TGE set is the very definition of being
+ * InHost.
+ *
+ * For the lesser hypervisors out there that have failed to get on
+ * with the VHE program, we can also handle the nVHE style of EL2
+ * invalidation.
+ */
+ if (!(is_hyp_ctxt(vcpu)))
+ return false;
+
+ instr = esr_sys64_to_sysreg(kvm_vcpu_get_esr(vcpu));
+ val = vcpu_get_reg(vcpu, kvm_vcpu_sys_get_rt(vcpu));
+
+ if ((kvm_supported_tlbi_s1e1_op(vcpu, instr) &&
+ vcpu_el2_e2h_is_set(vcpu) && vcpu_el2_tge_is_set(vcpu)) ||
+ kvm_supported_tlbi_s1e2_op (vcpu, instr))
+ ret = __kvm_tlbi_s1e2(NULL, val, instr);
+
+ if (ret)
+ return false;
+
+ __kvm_skip_instr(vcpu);
+
+ return true;
+}
+
+static bool kvm_hyp_handle_cpacr_el1(struct kvm_vcpu *vcpu, u64 *exit_code)
+{
+ u64 esr = kvm_vcpu_get_esr(vcpu);
+ int rt;
+
+ if (!is_hyp_ctxt(vcpu) || esr_sys64_to_sysreg(esr) != SYS_CPACR_EL1)
+ return false;
+
+ rt = kvm_vcpu_sys_get_rt(vcpu);
+
+ if ((esr & ESR_ELx_SYS64_ISS_DIR_MASK) == ESR_ELx_SYS64_ISS_DIR_READ) {
+ vcpu_set_reg(vcpu, rt, __vcpu_sys_reg(vcpu, CPTR_EL2));
+ } else {
+ vcpu_write_sys_reg(vcpu, vcpu_get_reg(vcpu, rt), CPTR_EL2);
+ __activate_cptr_traps(vcpu);
+ }
+
+ __kvm_skip_instr(vcpu);
+
+ return true;
+}
+
+static bool kvm_hyp_handle_zcr_el2(struct kvm_vcpu *vcpu, u64 *exit_code)
+{
+ u32 sysreg = esr_sys64_to_sysreg(kvm_vcpu_get_esr(vcpu));
+
+ if (!vcpu_has_nv(vcpu))
+ return false;
+
+ if (sysreg != SYS_ZCR_EL2)
+ return false;
+
+ if (guest_owns_fp_regs())
+ return false;
+
+ /*
+ * ZCR_EL2 traps are handled in the slow path, with the expectation
+ * that the guest's FP context has already been loaded onto the CPU.
+ *
+ * Load the guest's FP context and unconditionally forward to the
+ * slow path for handling (i.e. return false).
+ */
+ kvm_hyp_handle_fpsimd(vcpu, exit_code);
+ return false;
+}
+
+static bool kvm_hyp_handle_sysreg_vhe(struct kvm_vcpu *vcpu, u64 *exit_code)
+{
+ if (kvm_hyp_handle_tlbi_el2(vcpu, exit_code))
+ return true;
+
+ if (kvm_hyp_handle_timer(vcpu, exit_code))
+ return true;
+
+ if (kvm_hyp_handle_cpacr_el1(vcpu, exit_code))
+ return true;
+
+ if (kvm_hyp_handle_zcr_el2(vcpu, exit_code))
+ return true;
+
+ return kvm_hyp_handle_sysreg(vcpu, exit_code);
}
static const exit_handler_fn hyp_exit_handlers[] = {
[0 ... ESR_ELx_EC_MAX] = NULL,
[ESR_ELx_EC_CP15_32] = kvm_hyp_handle_cp15_32,
- [ESR_ELx_EC_SYS64] = kvm_hyp_handle_sysreg,
+ [ESR_ELx_EC_SYS64] = kvm_hyp_handle_sysreg_vhe,
[ESR_ELx_EC_SVE] = kvm_hyp_handle_fpsimd,
[ESR_ELx_EC_FP_ASIMD] = kvm_hyp_handle_fpsimd,
[ESR_ELx_EC_IABT_LOW] = kvm_hyp_handle_iabt_low,
[ESR_ELx_EC_DABT_LOW] = kvm_hyp_handle_dabt_low,
[ESR_ELx_EC_WATCHPT_LOW] = kvm_hyp_handle_watchpt_low,
- [ESR_ELx_EC_PAC] = kvm_hyp_handle_ptrauth,
+ [ESR_ELx_EC_ERET] = kvm_hyp_handle_eret,
[ESR_ELx_EC_MOPS] = kvm_hyp_handle_mops,
};
-static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm_vcpu *vcpu)
+static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
{
- return hyp_exit_handlers;
-}
+ synchronize_vcpu_pstate(vcpu, exit_code);
-static void early_exit_filter(struct kvm_vcpu *vcpu, u64 *exit_code)
-{
/*
* If we were in HYP context on entry, adjust the PSTATE view
* so that the usual helpers work correctly.
*/
- if (unlikely(vcpu_get_flag(vcpu, VCPU_HYP_CONTEXT))) {
+ if (vcpu_has_nv(vcpu) && (read_sysreg(hcr_el2) & HCR_NV)) {
u64 mode = *vcpu_cpsr(vcpu) & (PSR_MODE_MASK | PSR_MODE32_BIT);
switch (mode) {
@@ -212,6 +563,8 @@ static void early_exit_filter(struct kvm_vcpu *vcpu, u64 *exit_code)
*vcpu_cpsr(vcpu) &= ~(PSR_MODE_MASK | PSR_MODE32_BIT);
*vcpu_cpsr(vcpu) |= mode;
}
+
+ return __fixup_guest_exit(vcpu, exit_code, hyp_exit_handlers);
}
/* Switch to the guest for VHE systems running in EL2 */
@@ -221,12 +574,13 @@ static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
struct kvm_cpu_context *guest_ctxt;
u64 exit_code;
- host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
- host_ctxt->__hyp_running_vcpu = vcpu;
+ host_ctxt = host_data_ptr(host_ctxt);
guest_ctxt = &vcpu->arch.ctxt;
sysreg_save_host_state_vhe(host_ctxt);
+ fpsimd_lazy_switch_to_guest(vcpu);
+
/*
* Note that ARM erratum 1165522 requires us to configure both stage 1
* and stage 2 translation for the guest context before we clear
@@ -240,11 +594,6 @@ static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
sysreg_restore_guest_state_vhe(guest_ctxt);
__debug_switch_to_guest(vcpu);
- if (is_hyp_ctxt(vcpu))
- vcpu_set_flag(vcpu, VCPU_HYP_CONTEXT);
- else
- vcpu_clear_flag(vcpu, VCPU_HYP_CONTEXT);
-
do {
/* Jump in the fire! */
exit_code = __guest_enter(vcpu);
@@ -256,9 +605,11 @@ static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
__deactivate_traps(vcpu);
+ fpsimd_lazy_switch_to_host(vcpu);
+
sysreg_restore_host_state_vhe(host_ctxt);
- if (vcpu->arch.fp_state == FP_STATE_GUEST_OWNED)
+ if (guest_owns_fp_regs())
__fpsimd_save_fpexc32(vcpu);
__debug_switch_to_host(vcpu);
@@ -301,12 +652,12 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu)
return ret;
}
-static void __hyp_call_panic(u64 spsr, u64 elr, u64 par)
+static void __noreturn __hyp_call_panic(u64 spsr, u64 elr, u64 par)
{
struct kvm_cpu_context *host_ctxt;
struct kvm_vcpu *vcpu;
- host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
+ host_ctxt = host_data_ptr(host_ctxt);
vcpu = host_ctxt->__hyp_running_vcpu;
__deactivate_traps(vcpu);
@@ -326,7 +677,6 @@ void __noreturn hyp_panic(void)
u64 par = read_sysreg_par();
__hyp_call_panic(spsr, elr, par);
- unreachable();
}
asmlinkage void kvm_unexpected_el2_exception(void)
diff --git a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c
index 8e1e0d5033b6..90b018e06f2c 100644
--- a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c
+++ b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c
@@ -15,6 +15,131 @@
#include <asm/kvm_hyp.h>
#include <asm/kvm_nested.h>
+static void __sysreg_save_vel2_state(struct kvm_vcpu *vcpu)
+{
+ /* These registers are common with EL1 */
+ __vcpu_sys_reg(vcpu, PAR_EL1) = read_sysreg(par_el1);
+ __vcpu_sys_reg(vcpu, TPIDR_EL1) = read_sysreg(tpidr_el1);
+
+ __vcpu_sys_reg(vcpu, ESR_EL2) = read_sysreg_el1(SYS_ESR);
+ __vcpu_sys_reg(vcpu, AFSR0_EL2) = read_sysreg_el1(SYS_AFSR0);
+ __vcpu_sys_reg(vcpu, AFSR1_EL2) = read_sysreg_el1(SYS_AFSR1);
+ __vcpu_sys_reg(vcpu, FAR_EL2) = read_sysreg_el1(SYS_FAR);
+ __vcpu_sys_reg(vcpu, MAIR_EL2) = read_sysreg_el1(SYS_MAIR);
+ __vcpu_sys_reg(vcpu, VBAR_EL2) = read_sysreg_el1(SYS_VBAR);
+ __vcpu_sys_reg(vcpu, CONTEXTIDR_EL2) = read_sysreg_el1(SYS_CONTEXTIDR);
+ __vcpu_sys_reg(vcpu, AMAIR_EL2) = read_sysreg_el1(SYS_AMAIR);
+
+ /*
+ * In VHE mode those registers are compatible between EL1 and EL2,
+ * and the guest uses the _EL1 versions on the CPU naturally.
+ * So we save them into their _EL2 versions here.
+ * For nVHE mode we trap accesses to those registers, so our
+ * _EL2 copy in sys_regs[] is always up-to-date and we don't need
+ * to save anything here.
+ */
+ if (vcpu_el2_e2h_is_set(vcpu)) {
+ u64 val;
+
+ /*
+ * We don't save CPTR_EL2, as accesses to CPACR_EL1
+ * are always trapped, ensuring that the in-memory
+ * copy is always up-to-date. A small blessing...
+ */
+ __vcpu_sys_reg(vcpu, SCTLR_EL2) = read_sysreg_el1(SYS_SCTLR);
+ __vcpu_sys_reg(vcpu, TTBR0_EL2) = read_sysreg_el1(SYS_TTBR0);
+ __vcpu_sys_reg(vcpu, TTBR1_EL2) = read_sysreg_el1(SYS_TTBR1);
+ __vcpu_sys_reg(vcpu, TCR_EL2) = read_sysreg_el1(SYS_TCR);
+
+ if (ctxt_has_tcrx(&vcpu->arch.ctxt)) {
+ __vcpu_sys_reg(vcpu, TCR2_EL2) = read_sysreg_el1(SYS_TCR2);
+
+ if (ctxt_has_s1pie(&vcpu->arch.ctxt)) {
+ __vcpu_sys_reg(vcpu, PIRE0_EL2) = read_sysreg_el1(SYS_PIRE0);
+ __vcpu_sys_reg(vcpu, PIR_EL2) = read_sysreg_el1(SYS_PIR);
+ }
+
+ if (ctxt_has_s1poe(&vcpu->arch.ctxt))
+ __vcpu_sys_reg(vcpu, POR_EL2) = read_sysreg_el1(SYS_POR);
+ }
+
+ /*
+ * The EL1 view of CNTKCTL_EL1 has a bunch of RES0 bits where
+ * the interesting CNTHCTL_EL2 bits live. So preserve these
+ * bits when reading back the guest-visible value.
+ */
+ val = read_sysreg_el1(SYS_CNTKCTL);
+ val &= CNTKCTL_VALID_BITS;
+ __vcpu_sys_reg(vcpu, CNTHCTL_EL2) &= ~CNTKCTL_VALID_BITS;
+ __vcpu_sys_reg(vcpu, CNTHCTL_EL2) |= val;
+ }
+
+ __vcpu_sys_reg(vcpu, SP_EL2) = read_sysreg(sp_el1);
+ __vcpu_sys_reg(vcpu, ELR_EL2) = read_sysreg_el1(SYS_ELR);
+ __vcpu_sys_reg(vcpu, SPSR_EL2) = read_sysreg_el1(SYS_SPSR);
+}
+
+static void __sysreg_restore_vel2_state(struct kvm_vcpu *vcpu)
+{
+ u64 val;
+
+ /* These registers are common with EL1 */
+ write_sysreg(__vcpu_sys_reg(vcpu, PAR_EL1), par_el1);
+ write_sysreg(__vcpu_sys_reg(vcpu, TPIDR_EL1), tpidr_el1);
+
+ write_sysreg(__vcpu_sys_reg(vcpu, MPIDR_EL1), vmpidr_el2);
+ write_sysreg_el1(__vcpu_sys_reg(vcpu, MAIR_EL2), SYS_MAIR);
+ write_sysreg_el1(__vcpu_sys_reg(vcpu, VBAR_EL2), SYS_VBAR);
+ write_sysreg_el1(__vcpu_sys_reg(vcpu, CONTEXTIDR_EL2), SYS_CONTEXTIDR);
+ write_sysreg_el1(__vcpu_sys_reg(vcpu, AMAIR_EL2), SYS_AMAIR);
+
+ if (vcpu_el2_e2h_is_set(vcpu)) {
+ /*
+ * In VHE mode those registers are compatible between
+ * EL1 and EL2.
+ */
+ write_sysreg_el1(__vcpu_sys_reg(vcpu, SCTLR_EL2), SYS_SCTLR);
+ write_sysreg_el1(__vcpu_sys_reg(vcpu, CPTR_EL2), SYS_CPACR);
+ write_sysreg_el1(__vcpu_sys_reg(vcpu, TTBR0_EL2), SYS_TTBR0);
+ write_sysreg_el1(__vcpu_sys_reg(vcpu, TTBR1_EL2), SYS_TTBR1);
+ write_sysreg_el1(__vcpu_sys_reg(vcpu, TCR_EL2), SYS_TCR);
+ write_sysreg_el1(__vcpu_sys_reg(vcpu, CNTHCTL_EL2), SYS_CNTKCTL);
+ } else {
+ /*
+ * CNTHCTL_EL2 only affects EL1 when running nVHE, so
+ * no need to restore it.
+ */
+ val = translate_sctlr_el2_to_sctlr_el1(__vcpu_sys_reg(vcpu, SCTLR_EL2));
+ write_sysreg_el1(val, SYS_SCTLR);
+ val = translate_cptr_el2_to_cpacr_el1(__vcpu_sys_reg(vcpu, CPTR_EL2));
+ write_sysreg_el1(val, SYS_CPACR);
+ val = translate_ttbr0_el2_to_ttbr0_el1(__vcpu_sys_reg(vcpu, TTBR0_EL2));
+ write_sysreg_el1(val, SYS_TTBR0);
+ val = translate_tcr_el2_to_tcr_el1(__vcpu_sys_reg(vcpu, TCR_EL2));
+ write_sysreg_el1(val, SYS_TCR);
+ }
+
+ if (ctxt_has_tcrx(&vcpu->arch.ctxt)) {
+ write_sysreg_el1(__vcpu_sys_reg(vcpu, TCR2_EL2), SYS_TCR2);
+
+ if (ctxt_has_s1pie(&vcpu->arch.ctxt)) {
+ write_sysreg_el1(__vcpu_sys_reg(vcpu, PIR_EL2), SYS_PIR);
+ write_sysreg_el1(__vcpu_sys_reg(vcpu, PIRE0_EL2), SYS_PIRE0);
+ }
+
+ if (ctxt_has_s1poe(&vcpu->arch.ctxt))
+ write_sysreg_el1(__vcpu_sys_reg(vcpu, POR_EL2), SYS_POR);
+ }
+
+ write_sysreg_el1(__vcpu_sys_reg(vcpu, ESR_EL2), SYS_ESR);
+ write_sysreg_el1(__vcpu_sys_reg(vcpu, AFSR0_EL2), SYS_AFSR0);
+ write_sysreg_el1(__vcpu_sys_reg(vcpu, AFSR1_EL2), SYS_AFSR1);
+ write_sysreg_el1(__vcpu_sys_reg(vcpu, FAR_EL2), SYS_FAR);
+ write_sysreg(__vcpu_sys_reg(vcpu, SP_EL2), sp_el1);
+ write_sysreg_el1(__vcpu_sys_reg(vcpu, ELR_EL2), SYS_ELR);
+ write_sysreg_el1(__vcpu_sys_reg(vcpu, SPSR_EL2), SYS_SPSR);
+}
+
/*
* VHE: Host and guest must save mdscr_el1 and sp_el0 (and the PC and
* pstate, which are handled as part of the el2 return state) on every
@@ -66,8 +191,9 @@ void __vcpu_load_switch_sysregs(struct kvm_vcpu *vcpu)
{
struct kvm_cpu_context *guest_ctxt = &vcpu->arch.ctxt;
struct kvm_cpu_context *host_ctxt;
+ u64 mpidr;
- host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
+ host_ctxt = host_data_ptr(host_ctxt);
__sysreg_save_user_state(host_ctxt);
/*
@@ -89,13 +215,35 @@ void __vcpu_load_switch_sysregs(struct kvm_vcpu *vcpu)
*/
__sysreg32_restore_state(vcpu);
__sysreg_restore_user_state(guest_ctxt);
- __sysreg_restore_el1_state(guest_ctxt);
+
+ if (unlikely(is_hyp_ctxt(vcpu))) {
+ __sysreg_restore_vel2_state(vcpu);
+ } else {
+ if (vcpu_has_nv(vcpu)) {
+ /*
+ * Use the guest hypervisor's VPIDR_EL2 when in a
+ * nested state. The hardware value of MIDR_EL1 gets
+ * restored on put.
+ */
+ write_sysreg(ctxt_sys_reg(guest_ctxt, VPIDR_EL2), vpidr_el2);
+
+ /*
+ * As we're restoring a nested guest, set the value
+ * provided by the guest hypervisor.
+ */
+ mpidr = ctxt_sys_reg(guest_ctxt, VMPIDR_EL2);
+ } else {
+ mpidr = ctxt_sys_reg(guest_ctxt, MPIDR_EL1);
+ }
+
+ __sysreg_restore_el1_state(guest_ctxt, mpidr);
+ }
vcpu_set_flag(vcpu, SYSREGS_ON_CPU);
}
/**
- * __vcpu_put_switch_syregs - Restore host system registers to the physical CPU
+ * __vcpu_put_switch_sysregs - Restore host system registers to the physical CPU
*
* @vcpu: The VCPU pointer
*
@@ -110,14 +258,22 @@ void __vcpu_put_switch_sysregs(struct kvm_vcpu *vcpu)
struct kvm_cpu_context *guest_ctxt = &vcpu->arch.ctxt;
struct kvm_cpu_context *host_ctxt;
- host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
+ host_ctxt = host_data_ptr(host_ctxt);
+
+ if (unlikely(is_hyp_ctxt(vcpu)))
+ __sysreg_save_vel2_state(vcpu);
+ else
+ __sysreg_save_el1_state(guest_ctxt);
- __sysreg_save_el1_state(guest_ctxt);
__sysreg_save_user_state(guest_ctxt);
__sysreg32_save_state(vcpu);
/* Restore host user state */
__sysreg_restore_user_state(host_ctxt);
+ /* If leaving a nesting guest, restore MIDR_EL1 default view */
+ if (vcpu_has_nv(vcpu))
+ write_sysreg(read_cpuid_id(), vpidr_el2);
+
vcpu_clear_flag(vcpu, SYSREGS_ON_CPU);
}
diff --git a/arch/arm64/kvm/hyp/vhe/tlb.c b/arch/arm64/kvm/hyp/vhe/tlb.c
index b32e2940df7d..3d50a1bd2bdb 100644
--- a/arch/arm64/kvm/hyp/vhe/tlb.c
+++ b/arch/arm64/kvm/hyp/vhe/tlb.c
@@ -17,8 +17,8 @@ struct tlb_inv_context {
u64 sctlr;
};
-static void __tlb_switch_to_guest(struct kvm_s2_mmu *mmu,
- struct tlb_inv_context *cxt)
+static void enter_vmid_context(struct kvm_s2_mmu *mmu,
+ struct tlb_inv_context *cxt)
{
struct kvm_vcpu *vcpu = kvm_get_running_vcpu();
u64 val;
@@ -67,7 +67,7 @@ static void __tlb_switch_to_guest(struct kvm_s2_mmu *mmu,
isb();
}
-static void __tlb_switch_to_host(struct tlb_inv_context *cxt)
+static void exit_vmid_context(struct tlb_inv_context *cxt)
{
/*
* We're done with the TLB operation, let's restore the host's
@@ -97,7 +97,7 @@ void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu,
dsb(ishst);
/* Switch to requested VMID */
- __tlb_switch_to_guest(mmu, &cxt);
+ enter_vmid_context(mmu, &cxt);
/*
* We could do so much better if we had the VA as well.
@@ -118,7 +118,7 @@ void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu,
dsb(ish);
isb();
- __tlb_switch_to_host(&cxt);
+ exit_vmid_context(&cxt);
}
void __kvm_tlb_flush_vmid_ipa_nsh(struct kvm_s2_mmu *mmu,
@@ -129,7 +129,7 @@ void __kvm_tlb_flush_vmid_ipa_nsh(struct kvm_s2_mmu *mmu,
dsb(nshst);
/* Switch to requested VMID */
- __tlb_switch_to_guest(mmu, &cxt);
+ enter_vmid_context(mmu, &cxt);
/*
* We could do so much better if we had the VA as well.
@@ -150,7 +150,7 @@ void __kvm_tlb_flush_vmid_ipa_nsh(struct kvm_s2_mmu *mmu,
dsb(nsh);
isb();
- __tlb_switch_to_host(&cxt);
+ exit_vmid_context(&cxt);
}
void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
@@ -169,16 +169,17 @@ void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
dsb(ishst);
/* Switch to requested VMID */
- __tlb_switch_to_guest(mmu, &cxt);
+ enter_vmid_context(mmu, &cxt);
- __flush_s2_tlb_range_op(ipas2e1is, start, pages, stride, 0);
+ __flush_s2_tlb_range_op(ipas2e1is, start, pages, stride,
+ TLBI_TTL_UNKNOWN);
dsb(ish);
__tlbi(vmalle1is);
dsb(ish);
isb();
- __tlb_switch_to_host(&cxt);
+ exit_vmid_context(&cxt);
}
void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu)
@@ -188,13 +189,13 @@ void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu)
dsb(ishst);
/* Switch to requested VMID */
- __tlb_switch_to_guest(mmu, &cxt);
+ enter_vmid_context(mmu, &cxt);
__tlbi(vmalls12e1is);
dsb(ish);
isb();
- __tlb_switch_to_host(&cxt);
+ exit_vmid_context(&cxt);
}
void __kvm_flush_cpu_context(struct kvm_s2_mmu *mmu)
@@ -202,14 +203,14 @@ void __kvm_flush_cpu_context(struct kvm_s2_mmu *mmu)
struct tlb_inv_context cxt;
/* Switch to requested VMID */
- __tlb_switch_to_guest(mmu, &cxt);
+ enter_vmid_context(mmu, &cxt);
__tlbi(vmalle1);
asm volatile("ic iallu");
dsb(nsh);
isb();
- __tlb_switch_to_host(&cxt);
+ exit_vmid_context(&cxt);
}
void __kvm_flush_vm_context(void)
@@ -218,3 +219,150 @@ void __kvm_flush_vm_context(void)
__tlbi(alle1is);
dsb(ish);
}
+
+/*
+ * TLB invalidation emulation for NV. For any given instruction, we
+ * perform the following transformtions:
+ *
+ * - a TLBI targeting EL2 S1 is remapped to EL1 S1
+ * - a non-shareable TLBI is upgraded to being inner-shareable
+ * - an outer-shareable TLBI is also mapped to inner-shareable
+ * - an nXS TLBI is upgraded to XS
+ */
+int __kvm_tlbi_s1e2(struct kvm_s2_mmu *mmu, u64 va, u64 sys_encoding)
+{
+ struct tlb_inv_context cxt;
+ int ret = 0;
+
+ /*
+ * The guest will have provided its own DSB ISHST before trapping.
+ * If it hasn't, that's its own problem, and we won't paper over it
+ * (plus, there is plenty of extra synchronisation before we even
+ * get here...).
+ */
+
+ if (mmu)
+ enter_vmid_context(mmu, &cxt);
+
+ switch (sys_encoding) {
+ case OP_TLBI_ALLE2:
+ case OP_TLBI_ALLE2IS:
+ case OP_TLBI_ALLE2OS:
+ case OP_TLBI_VMALLE1:
+ case OP_TLBI_VMALLE1IS:
+ case OP_TLBI_VMALLE1OS:
+ case OP_TLBI_ALLE2NXS:
+ case OP_TLBI_ALLE2ISNXS:
+ case OP_TLBI_ALLE2OSNXS:
+ case OP_TLBI_VMALLE1NXS:
+ case OP_TLBI_VMALLE1ISNXS:
+ case OP_TLBI_VMALLE1OSNXS:
+ __tlbi(vmalle1is);
+ break;
+ case OP_TLBI_VAE2:
+ case OP_TLBI_VAE2IS:
+ case OP_TLBI_VAE2OS:
+ case OP_TLBI_VAE1:
+ case OP_TLBI_VAE1IS:
+ case OP_TLBI_VAE1OS:
+ case OP_TLBI_VAE2NXS:
+ case OP_TLBI_VAE2ISNXS:
+ case OP_TLBI_VAE2OSNXS:
+ case OP_TLBI_VAE1NXS:
+ case OP_TLBI_VAE1ISNXS:
+ case OP_TLBI_VAE1OSNXS:
+ __tlbi(vae1is, va);
+ break;
+ case OP_TLBI_VALE2:
+ case OP_TLBI_VALE2IS:
+ case OP_TLBI_VALE2OS:
+ case OP_TLBI_VALE1:
+ case OP_TLBI_VALE1IS:
+ case OP_TLBI_VALE1OS:
+ case OP_TLBI_VALE2NXS:
+ case OP_TLBI_VALE2ISNXS:
+ case OP_TLBI_VALE2OSNXS:
+ case OP_TLBI_VALE1NXS:
+ case OP_TLBI_VALE1ISNXS:
+ case OP_TLBI_VALE1OSNXS:
+ __tlbi(vale1is, va);
+ break;
+ case OP_TLBI_ASIDE1:
+ case OP_TLBI_ASIDE1IS:
+ case OP_TLBI_ASIDE1OS:
+ case OP_TLBI_ASIDE1NXS:
+ case OP_TLBI_ASIDE1ISNXS:
+ case OP_TLBI_ASIDE1OSNXS:
+ __tlbi(aside1is, va);
+ break;
+ case OP_TLBI_VAAE1:
+ case OP_TLBI_VAAE1IS:
+ case OP_TLBI_VAAE1OS:
+ case OP_TLBI_VAAE1NXS:
+ case OP_TLBI_VAAE1ISNXS:
+ case OP_TLBI_VAAE1OSNXS:
+ __tlbi(vaae1is, va);
+ break;
+ case OP_TLBI_VAALE1:
+ case OP_TLBI_VAALE1IS:
+ case OP_TLBI_VAALE1OS:
+ case OP_TLBI_VAALE1NXS:
+ case OP_TLBI_VAALE1ISNXS:
+ case OP_TLBI_VAALE1OSNXS:
+ __tlbi(vaale1is, va);
+ break;
+ case OP_TLBI_RVAE2:
+ case OP_TLBI_RVAE2IS:
+ case OP_TLBI_RVAE2OS:
+ case OP_TLBI_RVAE1:
+ case OP_TLBI_RVAE1IS:
+ case OP_TLBI_RVAE1OS:
+ case OP_TLBI_RVAE2NXS:
+ case OP_TLBI_RVAE2ISNXS:
+ case OP_TLBI_RVAE2OSNXS:
+ case OP_TLBI_RVAE1NXS:
+ case OP_TLBI_RVAE1ISNXS:
+ case OP_TLBI_RVAE1OSNXS:
+ __tlbi(rvae1is, va);
+ break;
+ case OP_TLBI_RVALE2:
+ case OP_TLBI_RVALE2IS:
+ case OP_TLBI_RVALE2OS:
+ case OP_TLBI_RVALE1:
+ case OP_TLBI_RVALE1IS:
+ case OP_TLBI_RVALE1OS:
+ case OP_TLBI_RVALE2NXS:
+ case OP_TLBI_RVALE2ISNXS:
+ case OP_TLBI_RVALE2OSNXS:
+ case OP_TLBI_RVALE1NXS:
+ case OP_TLBI_RVALE1ISNXS:
+ case OP_TLBI_RVALE1OSNXS:
+ __tlbi(rvale1is, va);
+ break;
+ case OP_TLBI_RVAAE1:
+ case OP_TLBI_RVAAE1IS:
+ case OP_TLBI_RVAAE1OS:
+ case OP_TLBI_RVAAE1NXS:
+ case OP_TLBI_RVAAE1ISNXS:
+ case OP_TLBI_RVAAE1OSNXS:
+ __tlbi(rvaae1is, va);
+ break;
+ case OP_TLBI_RVAALE1:
+ case OP_TLBI_RVAALE1IS:
+ case OP_TLBI_RVAALE1OS:
+ case OP_TLBI_RVAALE1NXS:
+ case OP_TLBI_RVAALE1ISNXS:
+ case OP_TLBI_RVAALE1OSNXS:
+ __tlbi(rvaale1is, va);
+ break;
+ default:
+ ret = -EINVAL;
+ }
+ dsb(ish);
+ isb();
+
+ if (mmu)
+ exit_vmid_context(&cxt);
+
+ return ret;
+}