diff options
Diffstat (limited to 'arch/arm64/kvm/hyp/nvhe')
-rw-r--r-- | arch/arm64/kvm/hyp/nvhe/Makefile | 21 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/nvhe/debug-sr.c | 76 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/nvhe/ffa.c | 214 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/nvhe/gen-hyprel.c | 6 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/nvhe/host.S | 10 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/nvhe/hyp-init.S | 84 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/nvhe/hyp-main.c | 369 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/nvhe/mem_protect.c | 895 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/nvhe/mm.c | 12 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/nvhe/page_alloc.c | 14 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/nvhe/pkvm.c | 403 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/nvhe/psci-relay.c | 4 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/nvhe/setup.c | 55 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/nvhe/stacktrace.c | 4 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/nvhe/switch.c | 100 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/nvhe/sys_regs.c | 404 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/nvhe/sysreg-sr.c | 2 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/nvhe/timer-sr.c | 16 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/nvhe/tlb.c | 117 |
19 files changed, 1619 insertions, 1187 deletions
diff --git a/arch/arm64/kvm/hyp/nvhe/Makefile b/arch/arm64/kvm/hyp/nvhe/Makefile index 2250253a6429..b43426a493df 100644 --- a/arch/arm64/kvm/hyp/nvhe/Makefile +++ b/arch/arm64/kvm/hyp/nvhe/Makefile @@ -20,6 +20,8 @@ HOST_EXTRACFLAGS += -I$(objtree)/include lib-objs := clear_page.o copy_page.o memcpy.o memset.o lib-objs := $(addprefix ../../../lib/, $(lib-objs)) +CFLAGS_switch.nvhe.o += -Wno-override-init + hyp-obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o host.o \ hyp-main.o hyp-smp.o psci-relay.o early_alloc.o page_alloc.o \ cache.o setup.o mm.o mem_protect.o sys_regs.o pkvm.o stacktrace.o ffa.o @@ -89,24 +91,11 @@ quiet_cmd_hyprel = HYPREL $@ quiet_cmd_hypcopy = HYPCOPY $@ cmd_hypcopy = $(OBJCOPY) --prefix-symbols=__kvm_nvhe_ $< $@ -# Remove ftrace, Shadow Call Stack, and CFI CFLAGS. -# This is equivalent to the 'notrace', '__noscs', and '__nocfi' annotations. -KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_FTRACE) $(CC_FLAGS_SCS) $(CC_FLAGS_CFI), $(KBUILD_CFLAGS)) +# Remove ftrace and Shadow Call Stack CFLAGS. +# This is equivalent to the 'notrace' and '__noscs' annotations. +KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_FTRACE) $(CC_FLAGS_SCS), $(KBUILD_CFLAGS)) # Starting from 13.0.0 llvm emits SHT_REL section '.llvm.call-graph-profile' # when profile optimization is applied. gen-hyprel does not support SHT_REL and # causes a build failure. Remove profile optimization flags. KBUILD_CFLAGS := $(filter-out -fprofile-sample-use=% -fprofile-use=%, $(KBUILD_CFLAGS)) KBUILD_CFLAGS += -fno-asynchronous-unwind-tables -fno-unwind-tables - -# KVM nVHE code is run at a different exception code with a different map, so -# compiler instrumentation that inserts callbacks or checks into the code may -# cause crashes. Just disable it. -GCOV_PROFILE := n -KASAN_SANITIZE := n -KCSAN_SANITIZE := n -UBSAN_SANITIZE := n -KCOV_INSTRUMENT := n - -# Skip objtool checking for this directory because nVHE code is compiled with -# non-standard build rules. -OBJECT_FILES_NON_STANDARD := y diff --git a/arch/arm64/kvm/hyp/nvhe/debug-sr.c b/arch/arm64/kvm/hyp/nvhe/debug-sr.c index 7746ea507b6f..2f4a4f5036bb 100644 --- a/arch/arm64/kvm/hyp/nvhe/debug-sr.c +++ b/arch/arm64/kvm/hyp/nvhe/debug-sr.c @@ -51,42 +51,55 @@ static void __debug_restore_spe(u64 pmscr_el1) write_sysreg_el1(pmscr_el1, SYS_PMSCR); } -static void __debug_save_trace(u64 *trfcr_el1) +static void __trace_do_switch(u64 *saved_trfcr, u64 new_trfcr) { - *trfcr_el1 = 0; + *saved_trfcr = read_sysreg_el1(SYS_TRFCR); + write_sysreg_el1(new_trfcr, SYS_TRFCR); +} - /* Check if the TRBE is enabled */ - if (!(read_sysreg_s(SYS_TRBLIMITR_EL1) & TRBLIMITR_EL1_E)) - return; - /* - * Prohibit trace generation while we are in guest. - * Since access to TRFCR_EL1 is trapped, the guest can't - * modify the filtering set by the host. - */ - *trfcr_el1 = read_sysreg_el1(SYS_TRFCR); - write_sysreg_el1(0, SYS_TRFCR); - isb(); - /* Drain the trace buffer to memory */ - tsb_csync(); +static bool __trace_needs_drain(void) +{ + if (is_protected_kvm_enabled() && host_data_test_flag(HAS_TRBE)) + return read_sysreg_s(SYS_TRBLIMITR_EL1) & TRBLIMITR_EL1_E; + + return host_data_test_flag(TRBE_ENABLED); } -static void __debug_restore_trace(u64 trfcr_el1) +static bool __trace_needs_switch(void) { - if (!trfcr_el1) - return; + return host_data_test_flag(TRBE_ENABLED) || + host_data_test_flag(EL1_TRACING_CONFIGURED); +} - /* Restore trace filter controls */ - write_sysreg_el1(trfcr_el1, SYS_TRFCR); +static void __trace_switch_to_guest(void) +{ + /* Unsupported with TRBE so disable */ + if (host_data_test_flag(TRBE_ENABLED)) + *host_data_ptr(trfcr_while_in_guest) = 0; + + __trace_do_switch(host_data_ptr(host_debug_state.trfcr_el1), + *host_data_ptr(trfcr_while_in_guest)); + + if (__trace_needs_drain()) { + isb(); + tsb_csync(); + } +} + +static void __trace_switch_to_host(void) +{ + __trace_do_switch(host_data_ptr(trfcr_while_in_guest), + *host_data_ptr(host_debug_state.trfcr_el1)); } void __debug_save_host_buffers_nvhe(struct kvm_vcpu *vcpu) { /* Disable and flush SPE data generation */ - if (vcpu_get_flag(vcpu, DEBUG_STATE_SAVE_SPE)) - __debug_save_spe(&vcpu->arch.host_debug_state.pmscr_el1); - /* Disable and flush Self-Hosted Trace generation */ - if (vcpu_get_flag(vcpu, DEBUG_STATE_SAVE_TRBE)) - __debug_save_trace(&vcpu->arch.host_debug_state.trfcr_el1); + if (host_data_test_flag(HAS_SPE)) + __debug_save_spe(host_data_ptr(host_debug_state.pmscr_el1)); + + if (__trace_needs_switch()) + __trace_switch_to_guest(); } void __debug_switch_to_guest(struct kvm_vcpu *vcpu) @@ -96,18 +109,13 @@ void __debug_switch_to_guest(struct kvm_vcpu *vcpu) void __debug_restore_host_buffers_nvhe(struct kvm_vcpu *vcpu) { - if (vcpu_get_flag(vcpu, DEBUG_STATE_SAVE_SPE)) - __debug_restore_spe(vcpu->arch.host_debug_state.pmscr_el1); - if (vcpu_get_flag(vcpu, DEBUG_STATE_SAVE_TRBE)) - __debug_restore_trace(vcpu->arch.host_debug_state.trfcr_el1); + if (host_data_test_flag(HAS_SPE)) + __debug_restore_spe(*host_data_ptr(host_debug_state.pmscr_el1)); + if (__trace_needs_switch()) + __trace_switch_to_host(); } void __debug_switch_to_host(struct kvm_vcpu *vcpu) { __debug_switch_to_host_common(vcpu); } - -u64 __kvm_get_mdcr_el2(void) -{ - return read_sysreg(mdcr_el2); -} diff --git a/arch/arm64/kvm/hyp/nvhe/ffa.c b/arch/arm64/kvm/hyp/nvhe/ffa.c index 320f2eaa14a9..e433dfab882a 100644 --- a/arch/arm64/kvm/hyp/nvhe/ffa.c +++ b/arch/arm64/kvm/hyp/nvhe/ffa.c @@ -67,6 +67,9 @@ struct kvm_ffa_buffers { */ static struct kvm_ffa_buffers hyp_buffers; static struct kvm_ffa_buffers host_buffers; +static u32 hyp_ffa_version; +static bool has_version_negotiated; +static hyp_spinlock_t version_lock; static void ffa_to_smccc_error(struct arm_smccc_res *res, u64 ffa_errno) { @@ -177,6 +180,14 @@ static void ffa_retrieve_req(struct arm_smccc_res *res, u32 len) res); } +static void ffa_rx_release(struct arm_smccc_res *res) +{ + arm_smccc_1_1_smc(FFA_RX_RELEASE, + 0, 0, + 0, 0, 0, 0, 0, + res); +} + static void do_ffa_rxtx_map(struct arm_smccc_res *res, struct kvm_cpu_context *ctxt) { @@ -415,9 +426,9 @@ out: return; } -static __always_inline void do_ffa_mem_xfer(const u64 func_id, - struct arm_smccc_res *res, - struct kvm_cpu_context *ctxt) +static void __do_ffa_mem_xfer(const u64 func_id, + struct arm_smccc_res *res, + struct kvm_cpu_context *ctxt) { DECLARE_REG(u32, len, ctxt, 1); DECLARE_REG(u32, fraglen, ctxt, 2); @@ -429,9 +440,6 @@ static __always_inline void do_ffa_mem_xfer(const u64 func_id, u32 offset, nr_ranges; int ret = 0; - BUILD_BUG_ON(func_id != FFA_FN64_MEM_SHARE && - func_id != FFA_FN64_MEM_LEND); - if (addr_mbz || npages_mbz || fraglen > len || fraglen > KVM_FFA_MBOX_NR_PAGES * PAGE_SIZE) { ret = FFA_RET_INVALID_PARAMETERS; @@ -450,11 +458,16 @@ static __always_inline void do_ffa_mem_xfer(const u64 func_id, goto out_unlock; } + if (len > ffa_desc_buf.len) { + ret = FFA_RET_NO_MEMORY; + goto out_unlock; + } + buf = hyp_buffers.tx; memcpy(buf, host_buffers.tx, fraglen); ep_mem_access = (void *)buf + - ffa_mem_desc_offset(buf, 0, FFA_VERSION_1_0); + ffa_mem_desc_offset(buf, 0, hyp_ffa_version); offset = ep_mem_access->composite_off; if (!offset || buf->ep_count != 1 || buf->sender_id != HOST_FFA_ID) { ret = FFA_RET_INVALID_PARAMETERS; @@ -501,6 +514,13 @@ err_unshare: goto out_unlock; } +#define do_ffa_mem_xfer(fid, res, ctxt) \ + do { \ + BUILD_BUG_ON((fid) != FFA_FN64_MEM_SHARE && \ + (fid) != FFA_FN64_MEM_LEND); \ + __do_ffa_mem_xfer((fid), (res), (ctxt)); \ + } while (0); + static void do_ffa_mem_reclaim(struct arm_smccc_res *res, struct kvm_cpu_context *ctxt) { @@ -533,7 +553,7 @@ static void do_ffa_mem_reclaim(struct arm_smccc_res *res, fraglen = res->a2; ep_mem_access = (void *)buf + - ffa_mem_desc_offset(buf, 0, FFA_VERSION_1_0); + ffa_mem_desc_offset(buf, 0, hyp_ffa_version); offset = ep_mem_access->composite_off; /* * We can trust the SPMD to get this right, but let's at least @@ -543,16 +563,19 @@ static void do_ffa_mem_reclaim(struct arm_smccc_res *res, if (WARN_ON(offset > len || fraglen > KVM_FFA_MBOX_NR_PAGES * PAGE_SIZE)) { ret = FFA_RET_ABORTED; + ffa_rx_release(res); goto out_unlock; } if (len > ffa_desc_buf.len) { ret = FFA_RET_NO_MEMORY; + ffa_rx_release(res); goto out_unlock; } buf = ffa_desc_buf.buf; memcpy(buf, hyp_buffers.rx, fraglen); + ffa_rx_release(res); for (fragoff = fraglen; fragoff < len; fragoff += fraglen) { ffa_mem_frag_rx(res, handle_lo, handle_hi, fragoff); @@ -563,6 +586,7 @@ static void do_ffa_mem_reclaim(struct arm_smccc_res *res, fraglen = res->a3; memcpy((void *)buf + fragoff, hyp_buffers.rx, fraglen); + ffa_rx_release(res); } ffa_mem_reclaim(res, handle_lo, handle_hi, flags); @@ -600,7 +624,6 @@ static bool ffa_call_supported(u64 func_id) case FFA_MSG_POLL: case FFA_MSG_WAIT: /* 32-bit variants of 64-bit calls */ - case FFA_MSG_SEND_DIRECT_REQ: case FFA_MSG_SEND_DIRECT_RESP: case FFA_RXTX_MAP: case FFA_MEM_DONATE: @@ -640,6 +663,132 @@ out_handled: return true; } +static int hyp_ffa_post_init(void) +{ + size_t min_rxtx_sz; + struct arm_smccc_res res; + + arm_smccc_1_1_smc(FFA_ID_GET, 0, 0, 0, 0, 0, 0, 0, &res); + if (res.a0 != FFA_SUCCESS) + return -EOPNOTSUPP; + + if (res.a2 != HOST_FFA_ID) + return -EINVAL; + + arm_smccc_1_1_smc(FFA_FEATURES, FFA_FN64_RXTX_MAP, + 0, 0, 0, 0, 0, 0, &res); + if (res.a0 != FFA_SUCCESS) + return -EOPNOTSUPP; + + switch (res.a2) { + case FFA_FEAT_RXTX_MIN_SZ_4K: + min_rxtx_sz = SZ_4K; + break; + case FFA_FEAT_RXTX_MIN_SZ_16K: + min_rxtx_sz = SZ_16K; + break; + case FFA_FEAT_RXTX_MIN_SZ_64K: + min_rxtx_sz = SZ_64K; + break; + default: + return -EINVAL; + } + + if (min_rxtx_sz > PAGE_SIZE) + return -EOPNOTSUPP; + + return 0; +} + +static void do_ffa_version(struct arm_smccc_res *res, + struct kvm_cpu_context *ctxt) +{ + DECLARE_REG(u32, ffa_req_version, ctxt, 1); + + if (FFA_MAJOR_VERSION(ffa_req_version) != 1) { + res->a0 = FFA_RET_NOT_SUPPORTED; + return; + } + + hyp_spin_lock(&version_lock); + if (has_version_negotiated) { + res->a0 = hyp_ffa_version; + goto unlock; + } + + /* + * If the client driver tries to downgrade the version, we need to ask + * first if TEE supports it. + */ + if (FFA_MINOR_VERSION(ffa_req_version) < FFA_MINOR_VERSION(hyp_ffa_version)) { + arm_smccc_1_1_smc(FFA_VERSION, ffa_req_version, 0, + 0, 0, 0, 0, 0, + res); + if (res->a0 == FFA_RET_NOT_SUPPORTED) + goto unlock; + + hyp_ffa_version = ffa_req_version; + } + + if (hyp_ffa_post_init()) + res->a0 = FFA_RET_NOT_SUPPORTED; + else { + has_version_negotiated = true; + res->a0 = hyp_ffa_version; + } +unlock: + hyp_spin_unlock(&version_lock); +} + +static void do_ffa_part_get(struct arm_smccc_res *res, + struct kvm_cpu_context *ctxt) +{ + DECLARE_REG(u32, uuid0, ctxt, 1); + DECLARE_REG(u32, uuid1, ctxt, 2); + DECLARE_REG(u32, uuid2, ctxt, 3); + DECLARE_REG(u32, uuid3, ctxt, 4); + DECLARE_REG(u32, flags, ctxt, 5); + u32 count, partition_sz, copy_sz; + + hyp_spin_lock(&host_buffers.lock); + if (!host_buffers.rx) { + ffa_to_smccc_res(res, FFA_RET_BUSY); + goto out_unlock; + } + + arm_smccc_1_1_smc(FFA_PARTITION_INFO_GET, uuid0, uuid1, + uuid2, uuid3, flags, 0, 0, + res); + + if (res->a0 != FFA_SUCCESS) + goto out_unlock; + + count = res->a2; + if (!count) + goto out_unlock; + + if (hyp_ffa_version > FFA_VERSION_1_0) { + /* Get the number of partitions deployed in the system */ + if (flags & 0x1) + goto out_unlock; + + partition_sz = res->a3; + } else { + /* FFA_VERSION_1_0 lacks the size in the response */ + partition_sz = FFA_1_0_PARTITON_INFO_SZ; + } + + copy_sz = partition_sz * count; + if (copy_sz > KVM_FFA_MBOX_NR_PAGES * PAGE_SIZE) { + ffa_to_smccc_res(res, FFA_RET_ABORTED); + goto out_unlock; + } + + memcpy(host_buffers.rx, hyp_buffers.rx, copy_sz); +out_unlock: + hyp_spin_unlock(&host_buffers.lock); +} + bool kvm_host_ffa_handler(struct kvm_cpu_context *host_ctxt, u32 func_id) { struct arm_smccc_res res; @@ -660,6 +809,11 @@ bool kvm_host_ffa_handler(struct kvm_cpu_context *host_ctxt, u32 func_id) if (!is_ffa_call(func_id)) return false; + if (!has_version_negotiated && func_id != FFA_VERSION) { + ffa_to_smccc_error(&res, FFA_RET_INVALID_PARAMETERS); + goto out_handled; + } + switch (func_id) { case FFA_FEATURES: if (!do_ffa_features(&res, host_ctxt)) @@ -686,6 +840,12 @@ bool kvm_host_ffa_handler(struct kvm_cpu_context *host_ctxt, u32 func_id) case FFA_MEM_FRAG_TX: do_ffa_mem_frag_tx(&res, host_ctxt); goto out_handled; + case FFA_VERSION: + do_ffa_version(&res, host_ctxt); + goto out_handled; + case FFA_PARTITION_INFO_GET: + do_ffa_part_get(&res, host_ctxt); + goto out_handled; } if (ffa_call_supported(func_id)) @@ -700,13 +860,12 @@ out_handled: int hyp_ffa_init(void *pages) { struct arm_smccc_res res; - size_t min_rxtx_sz; void *tx, *rx; if (kvm_host_psci_config.smccc_version < ARM_SMCCC_VERSION_1_2) return 0; - arm_smccc_1_1_smc(FFA_VERSION, FFA_VERSION_1_0, 0, 0, 0, 0, 0, 0, &res); + arm_smccc_1_1_smc(FFA_VERSION, FFA_VERSION_1_1, 0, 0, 0, 0, 0, 0, &res); if (res.a0 == FFA_RET_NOT_SUPPORTED) return 0; @@ -726,34 +885,10 @@ int hyp_ffa_init(void *pages) if (FFA_MAJOR_VERSION(res.a0) != 1) return -EOPNOTSUPP; - arm_smccc_1_1_smc(FFA_ID_GET, 0, 0, 0, 0, 0, 0, 0, &res); - if (res.a0 != FFA_SUCCESS) - return -EOPNOTSUPP; - - if (res.a2 != HOST_FFA_ID) - return -EINVAL; - - arm_smccc_1_1_smc(FFA_FEATURES, FFA_FN64_RXTX_MAP, - 0, 0, 0, 0, 0, 0, &res); - if (res.a0 != FFA_SUCCESS) - return -EOPNOTSUPP; - - switch (res.a2) { - case FFA_FEAT_RXTX_MIN_SZ_4K: - min_rxtx_sz = SZ_4K; - break; - case FFA_FEAT_RXTX_MIN_SZ_16K: - min_rxtx_sz = SZ_16K; - break; - case FFA_FEAT_RXTX_MIN_SZ_64K: - min_rxtx_sz = SZ_64K; - break; - default: - return -EINVAL; - } - - if (min_rxtx_sz > PAGE_SIZE) - return -EOPNOTSUPP; + if (FFA_MINOR_VERSION(res.a0) < FFA_MINOR_VERSION(FFA_VERSION_1_1)) + hyp_ffa_version = res.a0; + else + hyp_ffa_version = FFA_VERSION_1_1; tx = pages; pages += KVM_FFA_MBOX_NR_PAGES * PAGE_SIZE; @@ -776,5 +911,6 @@ int hyp_ffa_init(void *pages) .lock = __HYP_SPIN_LOCK_UNLOCKED, }; + version_lock = __HYP_SPIN_LOCK_UNLOCKED; return 0; } diff --git a/arch/arm64/kvm/hyp/nvhe/gen-hyprel.c b/arch/arm64/kvm/hyp/nvhe/gen-hyprel.c index 6bc88a756cb7..b63f4e1c1033 100644 --- a/arch/arm64/kvm/hyp/nvhe/gen-hyprel.c +++ b/arch/arm64/kvm/hyp/nvhe/gen-hyprel.c @@ -50,6 +50,9 @@ #ifndef R_AARCH64_ABS64 #define R_AARCH64_ABS64 257 #endif +#ifndef R_AARCH64_ABS32 +#define R_AARCH64_ABS32 258 +#endif #ifndef R_AARCH64_PREL64 #define R_AARCH64_PREL64 260 #endif @@ -383,6 +386,9 @@ static void emit_rela_section(Elf64_Shdr *sh_rela) case R_AARCH64_ABS64: emit_rela_abs64(rela, sh_orig_name); break; + /* Allow 32-bit absolute relocation, for kCFI type hashes. */ + case R_AARCH64_ABS32: + break; /* Allow position-relative data relocations. */ case R_AARCH64_PREL64: case R_AARCH64_PREL32: diff --git a/arch/arm64/kvm/hyp/nvhe/host.S b/arch/arm64/kvm/hyp/nvhe/host.S index 135cfb294ee5..58f0cb2298cc 100644 --- a/arch/arm64/kvm/hyp/nvhe/host.S +++ b/arch/arm64/kvm/hyp/nvhe/host.S @@ -188,21 +188,15 @@ SYM_FUNC_END(__host_hvc) /* * Test whether the SP has overflowed, without corrupting a GPR. - * nVHE hypervisor stacks are aligned so that the PAGE_SHIFT bit + * nVHE hypervisor stacks are aligned so that the NVHE_STACK_SHIFT bit * of SP should always be 1. */ add sp, sp, x0 // sp' = sp + x0 sub x0, sp, x0 // x0' = sp' - x0 = (sp + x0) - x0 = sp - tbz x0, #PAGE_SHIFT, .L__hyp_sp_overflow\@ + tbz x0, #NVHE_STACK_SHIFT, .L__hyp_sp_overflow\@ sub x0, sp, x0 // x0'' = sp' - x0' = (sp + x0) - sp = x0 sub sp, sp, x0 // sp'' = sp' - x0 = (sp + x0) - x0 = sp - /* If a guest is loaded, panic out of it. */ - stp x0, x1, [sp, #-16]! - get_loaded_vcpu x0, x1 - cbnz x0, __guest_exit_panic - add sp, sp, #16 - /* * The panic may not be clean if the exception is taken before the host * context has been saved by __host_exit or after the hyp context has diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-init.S b/arch/arm64/kvm/hyp/nvhe/hyp-init.S index 2994878d68ea..fc1866226067 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-init.S +++ b/arch/arm64/kvm/hyp/nvhe/hyp-init.S @@ -5,6 +5,7 @@ */ #include <linux/arm-smccc.h> +#include <linux/cfi_types.h> #include <linux/linkage.h> #include <asm/alternative.h> @@ -23,28 +24,25 @@ .align 11 SYM_CODE_START(__kvm_hyp_init) - ventry __invalid // Synchronous EL2t - ventry __invalid // IRQ EL2t - ventry __invalid // FIQ EL2t - ventry __invalid // Error EL2t + ventry . // Synchronous EL2t + ventry . // IRQ EL2t + ventry . // FIQ EL2t + ventry . // Error EL2t - ventry __invalid // Synchronous EL2h - ventry __invalid // IRQ EL2h - ventry __invalid // FIQ EL2h - ventry __invalid // Error EL2h + ventry . // Synchronous EL2h + ventry . // IRQ EL2h + ventry . // FIQ EL2h + ventry . // Error EL2h ventry __do_hyp_init // Synchronous 64-bit EL1 - ventry __invalid // IRQ 64-bit EL1 - ventry __invalid // FIQ 64-bit EL1 - ventry __invalid // Error 64-bit EL1 + ventry . // IRQ 64-bit EL1 + ventry . // FIQ 64-bit EL1 + ventry . // Error 64-bit EL1 - ventry __invalid // Synchronous 32-bit EL1 - ventry __invalid // IRQ 32-bit EL1 - ventry __invalid // FIQ 32-bit EL1 - ventry __invalid // Error 32-bit EL1 - -__invalid: - b . + ventry . // Synchronous 32-bit EL1 + ventry . // IRQ 32-bit EL1 + ventry . // FIQ 32-bit EL1 + ventry . // Error 32-bit EL1 /* * Only uses x0..x3 so as to not clobber callee-saved SMCCC registers. @@ -75,6 +73,13 @@ __do_hyp_init: eret SYM_CODE_END(__kvm_hyp_init) +SYM_CODE_START_LOCAL(__kvm_init_el2_state) + /* Initialize EL2 CPU state to sane values. */ + init_el2_state // Clobbers x0..x2 + finalise_el2_state + ret +SYM_CODE_END(__kvm_init_el2_state) + /* * Initialize the hypervisor in EL2. * @@ -101,9 +106,12 @@ SYM_CODE_START_LOCAL(___kvm_hyp_init) // TPIDR_EL2 is used to preserve x0 across the macro maze... isb msr tpidr_el2, x0 - init_el2_state - finalise_el2_state + str lr, [x0, #NVHE_INIT_TMP] + + bl __kvm_init_el2_state + mrs x0, tpidr_el2 + ldr lr, [x0, #NVHE_INIT_TMP] 1: ldr x1, [x0, #NVHE_INIT_TPIDR_EL2] @@ -129,7 +137,7 @@ alternative_else_nop_endif /* Invalidate the stale TLBs from Bootloader */ tlbi alle2 - tlbi vmalls12e1 + tlbi alle1 dsb sy mov_q x0, INIT_SCTLR_EL2_MMU_ON @@ -198,9 +206,8 @@ SYM_CODE_START_LOCAL(__kvm_hyp_init_cpu) 2: msr SPsel, #1 // We want to use SP_EL{1,2} - /* Initialize EL2 CPU state to sane values. */ - init_el2_state // Clobbers x0..x2 - finalise_el2_state + bl __kvm_init_el2_state + __init_el2_nvhe_prepare_eret /* Enable MMU, set vectors and stack. */ @@ -265,33 +272,38 @@ alternative_else_nop_endif SYM_CODE_END(__kvm_handle_stub_hvc) -SYM_FUNC_START(__pkvm_init_switch_pgd) +/* + * void __pkvm_init_switch_pgd(phys_addr_t pgd, unsigned long sp, + * void (*fn)(void)); + * + * SYM_TYPED_FUNC_START() allows C to call this ID-mapped function indirectly + * using a physical pointer without triggering a kCFI failure. + */ +SYM_TYPED_FUNC_START(__pkvm_init_switch_pgd) /* Turn the MMU off */ pre_disable_mmu_workaround - mrs x2, sctlr_el2 - bic x3, x2, #SCTLR_ELx_M - msr sctlr_el2, x3 + mrs x3, sctlr_el2 + bic x4, x3, #SCTLR_ELx_M + msr sctlr_el2, x4 isb tlbi alle2 /* Install the new pgtables */ - ldr x3, [x0, #NVHE_INIT_PGD_PA] - phys_to_ttbr x4, x3 + phys_to_ttbr x5, x0 alternative_if ARM64_HAS_CNP - orr x4, x4, #TTBR_CNP_BIT + orr x5, x5, #TTBR_CNP_BIT alternative_else_nop_endif - msr ttbr0_el2, x4 + msr ttbr0_el2, x5 /* Set the new stack pointer */ - ldr x0, [x0, #NVHE_INIT_STACK_HYP_VA] - mov sp, x0 + mov sp, x1 /* And turn the MMU back on! */ dsb nsh isb - set_sctlr_el2 x2 - ret x1 + set_sctlr_el2 x3 + ret x2 SYM_FUNC_END(__pkvm_init_switch_pgd) .popsection diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c index 2385fd03ed87..6e12c070832f 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c @@ -23,26 +23,115 @@ DEFINE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params); void __kvm_hyp_host_forward_smc(struct kvm_cpu_context *host_ctxt); +static void __hyp_sve_save_guest(struct kvm_vcpu *vcpu) +{ + __vcpu_sys_reg(vcpu, ZCR_EL1) = read_sysreg_el1(SYS_ZCR); + /* + * On saving/restoring guest sve state, always use the maximum VL for + * the guest. The layout of the data when saving the sve state depends + * on the VL, so use a consistent (i.e., the maximum) guest VL. + */ + sve_cond_update_zcr_vq(vcpu_sve_max_vq(vcpu) - 1, SYS_ZCR_EL2); + __sve_save_state(vcpu_sve_pffr(vcpu), &vcpu->arch.ctxt.fp_regs.fpsr, true); + write_sysreg_s(sve_vq_from_vl(kvm_host_sve_max_vl) - 1, SYS_ZCR_EL2); +} + +static void __hyp_sve_restore_host(void) +{ + struct cpu_sve_state *sve_state = *host_data_ptr(sve_state); + + /* + * On saving/restoring host sve state, always use the maximum VL for + * the host. The layout of the data when saving the sve state depends + * on the VL, so use a consistent (i.e., the maximum) host VL. + * + * Note that this constrains the PE to the maximum shared VL + * that was discovered, if we wish to use larger VLs this will + * need to be revisited. + */ + write_sysreg_s(sve_vq_from_vl(kvm_host_sve_max_vl) - 1, SYS_ZCR_EL2); + __sve_restore_state(sve_state->sve_regs + sve_ffr_offset(kvm_host_sve_max_vl), + &sve_state->fpsr, + true); + write_sysreg_el1(sve_state->zcr_el1, SYS_ZCR); +} + +static void fpsimd_sve_flush(void) +{ + *host_data_ptr(fp_owner) = FP_STATE_HOST_OWNED; +} + +static void fpsimd_sve_sync(struct kvm_vcpu *vcpu) +{ + bool has_fpmr; + + if (!guest_owns_fp_regs()) + return; + + cpacr_clear_set(0, CPACR_EL1_FPEN | CPACR_EL1_ZEN); + isb(); + + if (vcpu_has_sve(vcpu)) + __hyp_sve_save_guest(vcpu); + else + __fpsimd_save_state(&vcpu->arch.ctxt.fp_regs); + + has_fpmr = kvm_has_fpmr(kern_hyp_va(vcpu->kvm)); + if (has_fpmr) + __vcpu_sys_reg(vcpu, FPMR) = read_sysreg_s(SYS_FPMR); + + if (system_supports_sve()) + __hyp_sve_restore_host(); + else + __fpsimd_restore_state(*host_data_ptr(fpsimd_state)); + + if (has_fpmr) + write_sysreg_s(*host_data_ptr(fpmr), SYS_FPMR); + + *host_data_ptr(fp_owner) = FP_STATE_HOST_OWNED; +} + +static void flush_debug_state(struct pkvm_hyp_vcpu *hyp_vcpu) +{ + struct kvm_vcpu *host_vcpu = hyp_vcpu->host_vcpu; + + hyp_vcpu->vcpu.arch.debug_owner = host_vcpu->arch.debug_owner; + + if (kvm_guest_owns_debug_regs(&hyp_vcpu->vcpu)) + hyp_vcpu->vcpu.arch.vcpu_debug_state = host_vcpu->arch.vcpu_debug_state; + else if (kvm_host_owns_debug_regs(&hyp_vcpu->vcpu)) + hyp_vcpu->vcpu.arch.external_debug_state = host_vcpu->arch.external_debug_state; +} + +static void sync_debug_state(struct pkvm_hyp_vcpu *hyp_vcpu) +{ + struct kvm_vcpu *host_vcpu = hyp_vcpu->host_vcpu; + + if (kvm_guest_owns_debug_regs(&hyp_vcpu->vcpu)) + host_vcpu->arch.vcpu_debug_state = hyp_vcpu->vcpu.arch.vcpu_debug_state; + else if (kvm_host_owns_debug_regs(&hyp_vcpu->vcpu)) + host_vcpu->arch.external_debug_state = hyp_vcpu->vcpu.arch.external_debug_state; +} + static void flush_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu) { struct kvm_vcpu *host_vcpu = hyp_vcpu->host_vcpu; + fpsimd_sve_flush(); + flush_debug_state(hyp_vcpu); + hyp_vcpu->vcpu.arch.ctxt = host_vcpu->arch.ctxt; hyp_vcpu->vcpu.arch.sve_state = kern_hyp_va(host_vcpu->arch.sve_state); - hyp_vcpu->vcpu.arch.sve_max_vl = host_vcpu->arch.sve_max_vl; + /* Limit guest vector length to the maximum supported by the host. */ + hyp_vcpu->vcpu.arch.sve_max_vl = min(host_vcpu->arch.sve_max_vl, kvm_host_sve_max_vl); - hyp_vcpu->vcpu.arch.hw_mmu = host_vcpu->arch.hw_mmu; - - hyp_vcpu->vcpu.arch.hcr_el2 = host_vcpu->arch.hcr_el2; hyp_vcpu->vcpu.arch.mdcr_el2 = host_vcpu->arch.mdcr_el2; - hyp_vcpu->vcpu.arch.cptr_el2 = host_vcpu->arch.cptr_el2; + hyp_vcpu->vcpu.arch.hcr_el2 &= ~(HCR_TWI | HCR_TWE); + hyp_vcpu->vcpu.arch.hcr_el2 |= READ_ONCE(host_vcpu->arch.hcr_el2) & + (HCR_TWI | HCR_TWE); hyp_vcpu->vcpu.arch.iflags = host_vcpu->arch.iflags; - hyp_vcpu->vcpu.arch.fp_state = host_vcpu->arch.fp_state; - - hyp_vcpu->vcpu.arch.debug_ptr = kern_hyp_va(host_vcpu->arch.debug_ptr); - hyp_vcpu->vcpu.arch.host_fpsimd_state = host_vcpu->arch.host_fpsimd_state; hyp_vcpu->vcpu.arch.vsesr_el2 = host_vcpu->arch.vsesr_el2; @@ -56,35 +145,74 @@ static void sync_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu) struct vgic_v3_cpu_if *host_cpu_if = &host_vcpu->arch.vgic_cpu.vgic_v3; unsigned int i; + fpsimd_sve_sync(&hyp_vcpu->vcpu); + sync_debug_state(hyp_vcpu); + host_vcpu->arch.ctxt = hyp_vcpu->vcpu.arch.ctxt; host_vcpu->arch.hcr_el2 = hyp_vcpu->vcpu.arch.hcr_el2; - host_vcpu->arch.cptr_el2 = hyp_vcpu->vcpu.arch.cptr_el2; host_vcpu->arch.fault = hyp_vcpu->vcpu.arch.fault; host_vcpu->arch.iflags = hyp_vcpu->vcpu.arch.iflags; - host_vcpu->arch.fp_state = hyp_vcpu->vcpu.arch.fp_state; host_cpu_if->vgic_hcr = hyp_cpu_if->vgic_hcr; for (i = 0; i < hyp_cpu_if->used_lrs; ++i) host_cpu_if->vgic_lr[i] = hyp_cpu_if->vgic_lr[i]; } +static void handle___pkvm_vcpu_load(struct kvm_cpu_context *host_ctxt) +{ + DECLARE_REG(pkvm_handle_t, handle, host_ctxt, 1); + DECLARE_REG(unsigned int, vcpu_idx, host_ctxt, 2); + DECLARE_REG(u64, hcr_el2, host_ctxt, 3); + struct pkvm_hyp_vcpu *hyp_vcpu; + + if (!is_protected_kvm_enabled()) + return; + + hyp_vcpu = pkvm_load_hyp_vcpu(handle, vcpu_idx); + if (!hyp_vcpu) + return; + + if (pkvm_hyp_vcpu_is_protected(hyp_vcpu)) { + /* Propagate WFx trapping flags */ + hyp_vcpu->vcpu.arch.hcr_el2 &= ~(HCR_TWE | HCR_TWI); + hyp_vcpu->vcpu.arch.hcr_el2 |= hcr_el2 & (HCR_TWE | HCR_TWI); + } +} + +static void handle___pkvm_vcpu_put(struct kvm_cpu_context *host_ctxt) +{ + struct pkvm_hyp_vcpu *hyp_vcpu; + + if (!is_protected_kvm_enabled()) + return; + + hyp_vcpu = pkvm_get_loaded_hyp_vcpu(); + if (hyp_vcpu) + pkvm_put_hyp_vcpu(hyp_vcpu); +} + static void handle___kvm_vcpu_run(struct kvm_cpu_context *host_ctxt) { DECLARE_REG(struct kvm_vcpu *, host_vcpu, host_ctxt, 1); int ret; - host_vcpu = kern_hyp_va(host_vcpu); - if (unlikely(is_protected_kvm_enabled())) { - struct pkvm_hyp_vcpu *hyp_vcpu; - struct kvm *host_kvm; + struct pkvm_hyp_vcpu *hyp_vcpu = pkvm_get_loaded_hyp_vcpu(); + + /* + * KVM (and pKVM) doesn't support SME guests for now, and + * ensures that SME features aren't enabled in pstate when + * loading a vcpu. Therefore, if SME features enabled the host + * is misbehaving. + */ + if (unlikely(system_supports_sme() && read_sysreg_s(SYS_SVCR))) { + ret = -EINVAL; + goto out; + } - host_kvm = kern_hyp_va(host_vcpu->kvm); - hyp_vcpu = pkvm_load_hyp_vcpu(host_kvm->arch.pkvm.handle, - host_vcpu->vcpu_idx); if (!hyp_vcpu) { ret = -EINVAL; goto out; @@ -95,12 +223,141 @@ static void handle___kvm_vcpu_run(struct kvm_cpu_context *host_ctxt) ret = __kvm_vcpu_run(&hyp_vcpu->vcpu); sync_hyp_vcpu(hyp_vcpu); - pkvm_put_hyp_vcpu(hyp_vcpu); } else { /* The host is fully trusted, run its vCPU directly. */ - ret = __kvm_vcpu_run(host_vcpu); + ret = __kvm_vcpu_run(kern_hyp_va(host_vcpu)); } +out: + cpu_reg(host_ctxt, 1) = ret; +} + +static int pkvm_refill_memcache(struct pkvm_hyp_vcpu *hyp_vcpu) +{ + struct kvm_vcpu *host_vcpu = hyp_vcpu->host_vcpu; + + return refill_memcache(&hyp_vcpu->vcpu.arch.pkvm_memcache, + host_vcpu->arch.pkvm_memcache.nr_pages, + &host_vcpu->arch.pkvm_memcache); +} +static void handle___pkvm_host_share_guest(struct kvm_cpu_context *host_ctxt) +{ + DECLARE_REG(u64, pfn, host_ctxt, 1); + DECLARE_REG(u64, gfn, host_ctxt, 2); + DECLARE_REG(enum kvm_pgtable_prot, prot, host_ctxt, 3); + struct pkvm_hyp_vcpu *hyp_vcpu; + int ret = -EINVAL; + + if (!is_protected_kvm_enabled()) + goto out; + + hyp_vcpu = pkvm_get_loaded_hyp_vcpu(); + if (!hyp_vcpu || pkvm_hyp_vcpu_is_protected(hyp_vcpu)) + goto out; + + ret = pkvm_refill_memcache(hyp_vcpu); + if (ret) + goto out; + + ret = __pkvm_host_share_guest(pfn, gfn, hyp_vcpu, prot); +out: + cpu_reg(host_ctxt, 1) = ret; +} + +static void handle___pkvm_host_unshare_guest(struct kvm_cpu_context *host_ctxt) +{ + DECLARE_REG(pkvm_handle_t, handle, host_ctxt, 1); + DECLARE_REG(u64, gfn, host_ctxt, 2); + struct pkvm_hyp_vm *hyp_vm; + int ret = -EINVAL; + + if (!is_protected_kvm_enabled()) + goto out; + + hyp_vm = get_np_pkvm_hyp_vm(handle); + if (!hyp_vm) + goto out; + + ret = __pkvm_host_unshare_guest(gfn, hyp_vm); + put_pkvm_hyp_vm(hyp_vm); +out: + cpu_reg(host_ctxt, 1) = ret; +} + +static void handle___pkvm_host_relax_perms_guest(struct kvm_cpu_context *host_ctxt) +{ + DECLARE_REG(u64, gfn, host_ctxt, 1); + DECLARE_REG(enum kvm_pgtable_prot, prot, host_ctxt, 2); + struct pkvm_hyp_vcpu *hyp_vcpu; + int ret = -EINVAL; + + if (!is_protected_kvm_enabled()) + goto out; + + hyp_vcpu = pkvm_get_loaded_hyp_vcpu(); + if (!hyp_vcpu || pkvm_hyp_vcpu_is_protected(hyp_vcpu)) + goto out; + + ret = __pkvm_host_relax_perms_guest(gfn, hyp_vcpu, prot); +out: + cpu_reg(host_ctxt, 1) = ret; +} + +static void handle___pkvm_host_wrprotect_guest(struct kvm_cpu_context *host_ctxt) +{ + DECLARE_REG(pkvm_handle_t, handle, host_ctxt, 1); + DECLARE_REG(u64, gfn, host_ctxt, 2); + struct pkvm_hyp_vm *hyp_vm; + int ret = -EINVAL; + + if (!is_protected_kvm_enabled()) + goto out; + + hyp_vm = get_np_pkvm_hyp_vm(handle); + if (!hyp_vm) + goto out; + + ret = __pkvm_host_wrprotect_guest(gfn, hyp_vm); + put_pkvm_hyp_vm(hyp_vm); +out: + cpu_reg(host_ctxt, 1) = ret; +} + +static void handle___pkvm_host_test_clear_young_guest(struct kvm_cpu_context *host_ctxt) +{ + DECLARE_REG(pkvm_handle_t, handle, host_ctxt, 1); + DECLARE_REG(u64, gfn, host_ctxt, 2); + DECLARE_REG(bool, mkold, host_ctxt, 3); + struct pkvm_hyp_vm *hyp_vm; + int ret = -EINVAL; + + if (!is_protected_kvm_enabled()) + goto out; + + hyp_vm = get_np_pkvm_hyp_vm(handle); + if (!hyp_vm) + goto out; + + ret = __pkvm_host_test_clear_young_guest(gfn, mkold, hyp_vm); + put_pkvm_hyp_vm(hyp_vm); +out: + cpu_reg(host_ctxt, 1) = ret; +} + +static void handle___pkvm_host_mkyoung_guest(struct kvm_cpu_context *host_ctxt) +{ + DECLARE_REG(u64, gfn, host_ctxt, 1); + struct pkvm_hyp_vcpu *hyp_vcpu; + int ret = -EINVAL; + + if (!is_protected_kvm_enabled()) + goto out; + + hyp_vcpu = pkvm_get_loaded_hyp_vcpu(); + if (!hyp_vcpu || pkvm_hyp_vcpu_is_protected(hyp_vcpu)) + goto out; + + ret = __pkvm_host_mkyoung_guest(gfn, hyp_vcpu); out: cpu_reg(host_ctxt, 1) = ret; } @@ -152,6 +409,22 @@ static void handle___kvm_tlb_flush_vmid(struct kvm_cpu_context *host_ctxt) __kvm_tlb_flush_vmid(kern_hyp_va(mmu)); } +static void handle___pkvm_tlb_flush_vmid(struct kvm_cpu_context *host_ctxt) +{ + DECLARE_REG(pkvm_handle_t, handle, host_ctxt, 1); + struct pkvm_hyp_vm *hyp_vm; + + if (!is_protected_kvm_enabled()) + return; + + hyp_vm = get_np_pkvm_hyp_vm(handle); + if (!hyp_vm) + return; + + __kvm_tlb_flush_vmid(&hyp_vm->kvm.arch.mmu); + put_pkvm_hyp_vm(hyp_vm); +} + static void handle___kvm_flush_cpu_context(struct kvm_cpu_context *host_ctxt) { DECLARE_REG(struct kvm_s2_mmu *, mmu, host_ctxt, 1); @@ -178,38 +451,23 @@ static void handle___vgic_v3_get_gic_config(struct kvm_cpu_context *host_ctxt) cpu_reg(host_ctxt, 1) = __vgic_v3_get_gic_config(); } -static void handle___vgic_v3_read_vmcr(struct kvm_cpu_context *host_ctxt) -{ - cpu_reg(host_ctxt, 1) = __vgic_v3_read_vmcr(); -} - -static void handle___vgic_v3_write_vmcr(struct kvm_cpu_context *host_ctxt) -{ - __vgic_v3_write_vmcr(cpu_reg(host_ctxt, 1)); -} - static void handle___vgic_v3_init_lrs(struct kvm_cpu_context *host_ctxt) { __vgic_v3_init_lrs(); } -static void handle___kvm_get_mdcr_el2(struct kvm_cpu_context *host_ctxt) -{ - cpu_reg(host_ctxt, 1) = __kvm_get_mdcr_el2(); -} - -static void handle___vgic_v3_save_aprs(struct kvm_cpu_context *host_ctxt) +static void handle___vgic_v3_save_vmcr_aprs(struct kvm_cpu_context *host_ctxt) { DECLARE_REG(struct vgic_v3_cpu_if *, cpu_if, host_ctxt, 1); - __vgic_v3_save_aprs(kern_hyp_va(cpu_if)); + __vgic_v3_save_vmcr_aprs(kern_hyp_va(cpu_if)); } -static void handle___vgic_v3_restore_aprs(struct kvm_cpu_context *host_ctxt) +static void handle___vgic_v3_restore_vmcr_aprs(struct kvm_cpu_context *host_ctxt) { DECLARE_REG(struct vgic_v3_cpu_if *, cpu_if, host_ctxt, 1); - __vgic_v3_restore_aprs(kern_hyp_va(cpu_if)); + __vgic_v3_restore_vmcr_aprs(kern_hyp_va(cpu_if)); } static void handle___pkvm_init(struct kvm_cpu_context *host_ctxt) @@ -280,13 +538,6 @@ static void handle___pkvm_prot_finalize(struct kvm_cpu_context *host_ctxt) cpu_reg(host_ctxt, 1) = __pkvm_prot_finalize(); } -static void handle___pkvm_vcpu_init_traps(struct kvm_cpu_context *host_ctxt) -{ - DECLARE_REG(struct kvm_vcpu *, vcpu, host_ctxt, 1); - - __pkvm_vcpu_init_traps(kern_hyp_va(vcpu)); -} - static void handle___pkvm_init_vm(struct kvm_cpu_context *host_ctxt) { DECLARE_REG(struct kvm *, host_kvm, host_ctxt, 1); @@ -320,7 +571,6 @@ typedef void (*hcall_t)(struct kvm_cpu_context *); static const hcall_t host_hcall[] = { /* ___kvm_hyp_init */ - HANDLE_FUNC(__kvm_get_mdcr_el2), HANDLE_FUNC(__pkvm_init), HANDLE_FUNC(__pkvm_create_private_mapping), HANDLE_FUNC(__pkvm_cpu_set_vector), @@ -331,6 +581,12 @@ static const hcall_t host_hcall[] = { HANDLE_FUNC(__pkvm_host_share_hyp), HANDLE_FUNC(__pkvm_host_unshare_hyp), + HANDLE_FUNC(__pkvm_host_share_guest), + HANDLE_FUNC(__pkvm_host_unshare_guest), + HANDLE_FUNC(__pkvm_host_relax_perms_guest), + HANDLE_FUNC(__pkvm_host_wrprotect_guest), + HANDLE_FUNC(__pkvm_host_test_clear_young_guest), + HANDLE_FUNC(__pkvm_host_mkyoung_guest), HANDLE_FUNC(__kvm_adjust_pc), HANDLE_FUNC(__kvm_vcpu_run), HANDLE_FUNC(__kvm_flush_vm_context), @@ -340,14 +596,14 @@ static const hcall_t host_hcall[] = { HANDLE_FUNC(__kvm_tlb_flush_vmid_range), HANDLE_FUNC(__kvm_flush_cpu_context), HANDLE_FUNC(__kvm_timer_set_cntvoff), - HANDLE_FUNC(__vgic_v3_read_vmcr), - HANDLE_FUNC(__vgic_v3_write_vmcr), - HANDLE_FUNC(__vgic_v3_save_aprs), - HANDLE_FUNC(__vgic_v3_restore_aprs), - HANDLE_FUNC(__pkvm_vcpu_init_traps), + HANDLE_FUNC(__vgic_v3_save_vmcr_aprs), + HANDLE_FUNC(__vgic_v3_restore_vmcr_aprs), HANDLE_FUNC(__pkvm_init_vm), HANDLE_FUNC(__pkvm_init_vcpu), HANDLE_FUNC(__pkvm_teardown_vm), + HANDLE_FUNC(__pkvm_vcpu_load), + HANDLE_FUNC(__pkvm_vcpu_put), + HANDLE_FUNC(__pkvm_tlb_flush_vmid), }; static void handle_host_hcall(struct kvm_cpu_context *host_ctxt) @@ -420,13 +676,10 @@ void handle_trap(struct kvm_cpu_context *host_ctxt) handle_host_smc(host_ctxt); break; case ESR_ELx_EC_SVE: - if (has_hvhe()) - sysreg_clear_set(cpacr_el1, 0, (CPACR_EL1_ZEN_EL1EN | - CPACR_EL1_ZEN_EL0EN)); - else - sysreg_clear_set(cptr_el2, CPTR_EL2_TZ, 0); + cpacr_clear_set(0, CPACR_EL1_ZEN); isb(); - sve_cond_update_zcr_vq(ZCR_ELx_LEN_MASK, SYS_ZCR_EL2); + sve_cond_update_zcr_vq(sve_vq_from_vl(kvm_host_sve_max_vl) - 1, + SYS_ZCR_EL2); break; case ESR_ELx_EC_IABT_LOW: case ESR_ELx_EC_DABT_LOW: diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index 861c76021a25..7ad7b133b81a 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -201,8 +201,8 @@ static void *guest_s2_zalloc_page(void *mc) memset(addr, 0, PAGE_SIZE); p = hyp_virt_to_page(addr); - memset(p, 0, sizeof(*p)); p->refcount = 1; + p->order = 0; return addr; } @@ -268,6 +268,7 @@ int kvm_guest_prepare_stage2(struct pkvm_hyp_vm *vm, void *pgd) void reclaim_guest_pages(struct pkvm_hyp_vm *vm, struct kvm_hyp_memcache *mc) { + struct hyp_page *page; void *addr; /* Dump all pgtable pages in the hyp_pool */ @@ -279,7 +280,9 @@ void reclaim_guest_pages(struct pkvm_hyp_vm *vm, struct kvm_hyp_memcache *mc) /* Drain the hyp_pool into the memcache */ addr = hyp_alloc_pages(&vm->pool, 0); while (addr) { - memset(hyp_virt_to_page(addr), 0, sizeof(struct hyp_page)); + page = hyp_virt_to_page(addr); + page->refcount = 0; + page->order = 0; push_hyp_memcache(mc, addr, hyp_virt_to_phys); WARN_ON(__pkvm_hyp_donate_host(hyp_virt_to_pfn(addr), 1)); addr = hyp_alloc_pages(&vm->pool, 0); @@ -382,19 +385,28 @@ bool addr_is_memory(phys_addr_t phys) return !!find_mem_range(phys, &range); } -static bool addr_is_allowed_memory(phys_addr_t phys) +static bool is_in_mem_range(u64 addr, struct kvm_mem_range *range) +{ + return range->start <= addr && addr < range->end; +} + +static int check_range_allowed_memory(u64 start, u64 end) { struct memblock_region *reg; struct kvm_mem_range range; - reg = find_mem_range(phys, &range); + /* + * Callers can't check the state of a range that overlaps memory and + * MMIO regions, so ensure [start, end[ is in the same kvm_mem_range. + */ + reg = find_mem_range(start, &range); + if (!is_in_mem_range(end - 1, &range)) + return -EINVAL; - return reg && !(reg->flags & MEMBLOCK_NOMAP); -} + if (!reg || reg->flags & MEMBLOCK_NOMAP) + return -EPERM; -static bool is_in_mem_range(u64 addr, struct kvm_mem_range *range) -{ - return range->start <= addr && addr < range->end; + return 0; } static bool range_is_memory(u64 start, u64 end) @@ -454,8 +466,10 @@ static int host_stage2_adjust_range(u64 addr, struct kvm_mem_range *range) if (kvm_pte_valid(pte)) return -EAGAIN; - if (pte) + if (pte) { + WARN_ON(addr_is_memory(addr) && hyp_phys_to_page(addr)->host_state != PKVM_NOPAGE); return -EPERM; + } do { u64 granule = kvm_granule_size(level); @@ -477,10 +491,33 @@ int host_stage2_idmap_locked(phys_addr_t addr, u64 size, return host_stage2_try(__host_stage2_idmap, addr, addr + size, prot); } +static void __host_update_page_state(phys_addr_t addr, u64 size, enum pkvm_page_state state) +{ + phys_addr_t end = addr + size; + + for (; addr < end; addr += PAGE_SIZE) + hyp_phys_to_page(addr)->host_state = state; +} + int host_stage2_set_owner_locked(phys_addr_t addr, u64 size, u8 owner_id) { - return host_stage2_try(kvm_pgtable_stage2_set_owner, &host_mmu.pgt, - addr, size, &host_s2_pool, owner_id); + int ret; + + if (!addr_is_memory(addr)) + return -EPERM; + + ret = host_stage2_try(kvm_pgtable_stage2_set_owner, &host_mmu.pgt, + addr, size, &host_s2_pool, owner_id); + if (ret) + return ret; + + /* Don't forget to update the vmemmap tracking for the host */ + if (owner_id == PKVM_ID_HOST) + __host_update_page_state(addr, size, PKVM_PAGE_OWNED); + else + __host_update_page_state(addr, size, PKVM_NOPAGE); + + return 0; } static bool host_stage2_force_pte_cb(u64 addr, u64 end, enum kvm_pgtable_prot prot) @@ -533,46 +570,19 @@ void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt) int ret = 0; esr = read_sysreg_el2(SYS_ESR); - BUG_ON(!__get_fault_info(esr, &fault)); + if (!__get_fault_info(esr, &fault)) { + /* + * We've presumably raced with a page-table change which caused + * AT to fail, try again. + */ + return; + } addr = (fault.hpfar_el2 & HPFAR_MASK) << 8; ret = host_stage2_idmap(addr); BUG_ON(ret && ret != -EAGAIN); } -struct pkvm_mem_transition { - u64 nr_pages; - - struct { - enum pkvm_component_id id; - /* Address in the initiator's address space */ - u64 addr; - - union { - struct { - /* Address in the completer's address space */ - u64 completer_addr; - } host; - struct { - u64 completer_addr; - } hyp; - }; - } initiator; - - struct { - enum pkvm_component_id id; - } completer; -}; - -struct pkvm_mem_share { - const struct pkvm_mem_transition tx; - const enum kvm_pgtable_prot completer_prot; -}; - -struct pkvm_mem_donation { - const struct pkvm_mem_transition tx; -}; - struct check_walk_data { enum pkvm_page_state desired; enum pkvm_page_state (*get_page_state)(kvm_pte_t pte, u64 addr); @@ -598,115 +608,38 @@ static int check_page_state_range(struct kvm_pgtable *pgt, u64 addr, u64 size, return kvm_pgtable_walk(pgt, addr, size, &walker); } -static enum pkvm_page_state host_get_page_state(kvm_pte_t pte, u64 addr) -{ - if (!addr_is_allowed_memory(addr)) - return PKVM_NOPAGE; - - if (!kvm_pte_valid(pte) && pte) - return PKVM_NOPAGE; - - return pkvm_getstate(kvm_pgtable_stage2_pte_prot(pte)); -} - static int __host_check_page_state_range(u64 addr, u64 size, enum pkvm_page_state state) { - struct check_walk_data d = { - .desired = state, - .get_page_state = host_get_page_state, - }; + u64 end = addr + size; + int ret; + + ret = check_range_allowed_memory(addr, end); + if (ret) + return ret; hyp_assert_lock_held(&host_mmu.lock); - return check_page_state_range(&host_mmu.pgt, addr, size, &d); + for (; addr < end; addr += PAGE_SIZE) { + if (hyp_phys_to_page(addr)->host_state != state) + return -EPERM; + } + + return 0; } static int __host_set_page_state_range(u64 addr, u64 size, enum pkvm_page_state state) { - enum kvm_pgtable_prot prot = pkvm_mkstate(PKVM_HOST_MEM_PROT, state); - - return host_stage2_idmap_locked(addr, size, prot); -} - -static int host_request_owned_transition(u64 *completer_addr, - const struct pkvm_mem_transition *tx) -{ - u64 size = tx->nr_pages * PAGE_SIZE; - u64 addr = tx->initiator.addr; - - *completer_addr = tx->initiator.host.completer_addr; - return __host_check_page_state_range(addr, size, PKVM_PAGE_OWNED); -} - -static int host_request_unshare(u64 *completer_addr, - const struct pkvm_mem_transition *tx) -{ - u64 size = tx->nr_pages * PAGE_SIZE; - u64 addr = tx->initiator.addr; - - *completer_addr = tx->initiator.host.completer_addr; - return __host_check_page_state_range(addr, size, PKVM_PAGE_SHARED_OWNED); -} - -static int host_initiate_share(u64 *completer_addr, - const struct pkvm_mem_transition *tx) -{ - u64 size = tx->nr_pages * PAGE_SIZE; - u64 addr = tx->initiator.addr; - - *completer_addr = tx->initiator.host.completer_addr; - return __host_set_page_state_range(addr, size, PKVM_PAGE_SHARED_OWNED); -} - -static int host_initiate_unshare(u64 *completer_addr, - const struct pkvm_mem_transition *tx) -{ - u64 size = tx->nr_pages * PAGE_SIZE; - u64 addr = tx->initiator.addr; + if (hyp_phys_to_page(addr)->host_state == PKVM_NOPAGE) { + int ret = host_stage2_idmap_locked(addr, size, PKVM_HOST_MEM_PROT); - *completer_addr = tx->initiator.host.completer_addr; - return __host_set_page_state_range(addr, size, PKVM_PAGE_OWNED); -} - -static int host_initiate_donation(u64 *completer_addr, - const struct pkvm_mem_transition *tx) -{ - u8 owner_id = tx->completer.id; - u64 size = tx->nr_pages * PAGE_SIZE; - - *completer_addr = tx->initiator.host.completer_addr; - return host_stage2_set_owner_locked(tx->initiator.addr, size, owner_id); -} - -static bool __host_ack_skip_pgtable_check(const struct pkvm_mem_transition *tx) -{ - return !(IS_ENABLED(CONFIG_NVHE_EL2_DEBUG) || - tx->initiator.id != PKVM_ID_HYP); -} - -static int __host_ack_transition(u64 addr, const struct pkvm_mem_transition *tx, - enum pkvm_page_state state) -{ - u64 size = tx->nr_pages * PAGE_SIZE; - - if (__host_ack_skip_pgtable_check(tx)) - return 0; - - return __host_check_page_state_range(addr, size, state); -} - -static int host_ack_donation(u64 addr, const struct pkvm_mem_transition *tx) -{ - return __host_ack_transition(addr, tx, PKVM_NOPAGE); -} + if (ret) + return ret; + } -static int host_complete_donation(u64 addr, const struct pkvm_mem_transition *tx) -{ - u64 size = tx->nr_pages * PAGE_SIZE; - u8 host_id = tx->completer.id; + __host_update_page_state(addr, size, state); - return host_stage2_set_owner_locked(addr, size, host_id); + return 0; } static enum pkvm_page_state hyp_get_page_state(kvm_pte_t pte, u64 addr) @@ -729,576 +662,418 @@ static int __hyp_check_page_state_range(u64 addr, u64 size, return check_page_state_range(&pkvm_pgtable, addr, size, &d); } -static int hyp_request_donation(u64 *completer_addr, - const struct pkvm_mem_transition *tx) +static enum pkvm_page_state guest_get_page_state(kvm_pte_t pte, u64 addr) { - u64 size = tx->nr_pages * PAGE_SIZE; - u64 addr = tx->initiator.addr; + if (!kvm_pte_valid(pte)) + return PKVM_NOPAGE; - *completer_addr = tx->initiator.hyp.completer_addr; - return __hyp_check_page_state_range(addr, size, PKVM_PAGE_OWNED); + return pkvm_getstate(kvm_pgtable_stage2_pte_prot(pte)); } -static int hyp_initiate_donation(u64 *completer_addr, - const struct pkvm_mem_transition *tx) +static int __guest_check_page_state_range(struct pkvm_hyp_vcpu *vcpu, u64 addr, + u64 size, enum pkvm_page_state state) { - u64 size = tx->nr_pages * PAGE_SIZE; - int ret; + struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu); + struct check_walk_data d = { + .desired = state, + .get_page_state = guest_get_page_state, + }; - *completer_addr = tx->initiator.hyp.completer_addr; - ret = kvm_pgtable_hyp_unmap(&pkvm_pgtable, tx->initiator.addr, size); - return (ret != size) ? -EFAULT : 0; + hyp_assert_lock_held(&vm->lock); + return check_page_state_range(&vm->pgt, addr, size, &d); } -static bool __hyp_ack_skip_pgtable_check(const struct pkvm_mem_transition *tx) +int __pkvm_host_share_hyp(u64 pfn) { - return !(IS_ENABLED(CONFIG_NVHE_EL2_DEBUG) || - tx->initiator.id != PKVM_ID_HOST); -} + u64 phys = hyp_pfn_to_phys(pfn); + void *virt = __hyp_va(phys); + enum kvm_pgtable_prot prot; + u64 size = PAGE_SIZE; + int ret; -static int hyp_ack_share(u64 addr, const struct pkvm_mem_transition *tx, - enum kvm_pgtable_prot perms) -{ - u64 size = tx->nr_pages * PAGE_SIZE; + host_lock_component(); + hyp_lock_component(); - if (perms != PAGE_HYP) - return -EPERM; + ret = __host_check_page_state_range(phys, size, PKVM_PAGE_OWNED); + if (ret) + goto unlock; + if (IS_ENABLED(CONFIG_NVHE_EL2_DEBUG)) { + ret = __hyp_check_page_state_range((u64)virt, size, PKVM_NOPAGE); + if (ret) + goto unlock; + } + + prot = pkvm_mkstate(PAGE_HYP, PKVM_PAGE_SHARED_BORROWED); + WARN_ON(pkvm_create_mappings_locked(virt, virt + size, prot)); + WARN_ON(__host_set_page_state_range(phys, size, PKVM_PAGE_SHARED_OWNED)); - if (__hyp_ack_skip_pgtable_check(tx)) - return 0; +unlock: + hyp_unlock_component(); + host_unlock_component(); - return __hyp_check_page_state_range(addr, size, PKVM_NOPAGE); + return ret; } -static int hyp_ack_unshare(u64 addr, const struct pkvm_mem_transition *tx) +int __pkvm_host_unshare_hyp(u64 pfn) { - u64 size = tx->nr_pages * PAGE_SIZE; - - if (tx->initiator.id == PKVM_ID_HOST && hyp_page_count((void *)addr)) - return -EBUSY; + u64 phys = hyp_pfn_to_phys(pfn); + u64 virt = (u64)__hyp_va(phys); + u64 size = PAGE_SIZE; + int ret; - if (__hyp_ack_skip_pgtable_check(tx)) - return 0; + host_lock_component(); + hyp_lock_component(); - return __hyp_check_page_state_range(addr, size, - PKVM_PAGE_SHARED_BORROWED); -} + ret = __host_check_page_state_range(phys, size, PKVM_PAGE_SHARED_OWNED); + if (ret) + goto unlock; + ret = __hyp_check_page_state_range(virt, size, PKVM_PAGE_SHARED_BORROWED); + if (ret) + goto unlock; + if (hyp_page_count((void *)virt)) { + ret = -EBUSY; + goto unlock; + } -static int hyp_ack_donation(u64 addr, const struct pkvm_mem_transition *tx) -{ - u64 size = tx->nr_pages * PAGE_SIZE; + WARN_ON(kvm_pgtable_hyp_unmap(&pkvm_pgtable, virt, size) != size); + WARN_ON(__host_set_page_state_range(phys, size, PKVM_PAGE_OWNED)); - if (__hyp_ack_skip_pgtable_check(tx)) - return 0; +unlock: + hyp_unlock_component(); + host_unlock_component(); - return __hyp_check_page_state_range(addr, size, PKVM_NOPAGE); + return ret; } -static int hyp_complete_share(u64 addr, const struct pkvm_mem_transition *tx, - enum kvm_pgtable_prot perms) +int __pkvm_host_donate_hyp(u64 pfn, u64 nr_pages) { - void *start = (void *)addr, *end = start + (tx->nr_pages * PAGE_SIZE); + u64 phys = hyp_pfn_to_phys(pfn); + u64 size = PAGE_SIZE * nr_pages; + void *virt = __hyp_va(phys); enum kvm_pgtable_prot prot; + int ret; - prot = pkvm_mkstate(perms, PKVM_PAGE_SHARED_BORROWED); - return pkvm_create_mappings_locked(start, end, prot); -} + host_lock_component(); + hyp_lock_component(); -static int hyp_complete_unshare(u64 addr, const struct pkvm_mem_transition *tx) -{ - u64 size = tx->nr_pages * PAGE_SIZE; - int ret = kvm_pgtable_hyp_unmap(&pkvm_pgtable, addr, size); + ret = __host_check_page_state_range(phys, size, PKVM_PAGE_OWNED); + if (ret) + goto unlock; + if (IS_ENABLED(CONFIG_NVHE_EL2_DEBUG)) { + ret = __hyp_check_page_state_range((u64)virt, size, PKVM_NOPAGE); + if (ret) + goto unlock; + } - return (ret != size) ? -EFAULT : 0; -} + prot = pkvm_mkstate(PAGE_HYP, PKVM_PAGE_OWNED); + WARN_ON(pkvm_create_mappings_locked(virt, virt + size, prot)); + WARN_ON(host_stage2_set_owner_locked(phys, size, PKVM_ID_HYP)); -static int hyp_complete_donation(u64 addr, - const struct pkvm_mem_transition *tx) -{ - void *start = (void *)addr, *end = start + (tx->nr_pages * PAGE_SIZE); - enum kvm_pgtable_prot prot = pkvm_mkstate(PAGE_HYP, PKVM_PAGE_OWNED); +unlock: + hyp_unlock_component(); + host_unlock_component(); - return pkvm_create_mappings_locked(start, end, prot); + return ret; } -static int check_share(struct pkvm_mem_share *share) +int __pkvm_hyp_donate_host(u64 pfn, u64 nr_pages) { - const struct pkvm_mem_transition *tx = &share->tx; - u64 completer_addr; + u64 phys = hyp_pfn_to_phys(pfn); + u64 size = PAGE_SIZE * nr_pages; + u64 virt = (u64)__hyp_va(phys); int ret; - switch (tx->initiator.id) { - case PKVM_ID_HOST: - ret = host_request_owned_transition(&completer_addr, tx); - break; - default: - ret = -EINVAL; - } + host_lock_component(); + hyp_lock_component(); + ret = __hyp_check_page_state_range(virt, size, PKVM_PAGE_OWNED); if (ret) - return ret; - - switch (tx->completer.id) { - case PKVM_ID_HYP: - ret = hyp_ack_share(completer_addr, tx, share->completer_prot); - break; - case PKVM_ID_FFA: - /* - * We only check the host; the secure side will check the other - * end when we forward the FFA call. - */ - ret = 0; - break; - default: - ret = -EINVAL; + goto unlock; + if (IS_ENABLED(CONFIG_NVHE_EL2_DEBUG)) { + ret = __host_check_page_state_range(phys, size, PKVM_NOPAGE); + if (ret) + goto unlock; } + WARN_ON(kvm_pgtable_hyp_unmap(&pkvm_pgtable, virt, size) != size); + WARN_ON(host_stage2_set_owner_locked(phys, size, PKVM_ID_HOST)); + +unlock: + hyp_unlock_component(); + host_unlock_component(); + return ret; } -static int __do_share(struct pkvm_mem_share *share) +int hyp_pin_shared_mem(void *from, void *to) { - const struct pkvm_mem_transition *tx = &share->tx; - u64 completer_addr; + u64 cur, start = ALIGN_DOWN((u64)from, PAGE_SIZE); + u64 end = PAGE_ALIGN((u64)to); + u64 size = end - start; int ret; - switch (tx->initiator.id) { - case PKVM_ID_HOST: - ret = host_initiate_share(&completer_addr, tx); - break; - default: - ret = -EINVAL; - } + host_lock_component(); + hyp_lock_component(); + ret = __host_check_page_state_range(__hyp_pa(start), size, + PKVM_PAGE_SHARED_OWNED); if (ret) - return ret; - - switch (tx->completer.id) { - case PKVM_ID_HYP: - ret = hyp_complete_share(completer_addr, tx, share->completer_prot); - break; - case PKVM_ID_FFA: - /* - * We're not responsible for any secure page-tables, so there's - * nothing to do here. - */ - ret = 0; - break; - default: - ret = -EINVAL; - } + goto unlock; - return ret; -} + ret = __hyp_check_page_state_range(start, size, + PKVM_PAGE_SHARED_BORROWED); + if (ret) + goto unlock; -/* - * do_share(): - * - * The page owner grants access to another component with a given set - * of permissions. - * - * Initiator: OWNED => SHARED_OWNED - * Completer: NOPAGE => SHARED_BORROWED - */ -static int do_share(struct pkvm_mem_share *share) -{ - int ret; + for (cur = start; cur < end; cur += PAGE_SIZE) + hyp_page_ref_inc(hyp_virt_to_page(cur)); - ret = check_share(share); - if (ret) - return ret; +unlock: + hyp_unlock_component(); + host_unlock_component(); - return WARN_ON(__do_share(share)); + return ret; } -static int check_unshare(struct pkvm_mem_share *share) +void hyp_unpin_shared_mem(void *from, void *to) { - const struct pkvm_mem_transition *tx = &share->tx; - u64 completer_addr; - int ret; - - switch (tx->initiator.id) { - case PKVM_ID_HOST: - ret = host_request_unshare(&completer_addr, tx); - break; - default: - ret = -EINVAL; - } + u64 cur, start = ALIGN_DOWN((u64)from, PAGE_SIZE); + u64 end = PAGE_ALIGN((u64)to); - if (ret) - return ret; + host_lock_component(); + hyp_lock_component(); - switch (tx->completer.id) { - case PKVM_ID_HYP: - ret = hyp_ack_unshare(completer_addr, tx); - break; - case PKVM_ID_FFA: - /* See check_share() */ - ret = 0; - break; - default: - ret = -EINVAL; - } + for (cur = start; cur < end; cur += PAGE_SIZE) + hyp_page_ref_dec(hyp_virt_to_page(cur)); - return ret; + hyp_unlock_component(); + host_unlock_component(); } -static int __do_unshare(struct pkvm_mem_share *share) +int __pkvm_host_share_ffa(u64 pfn, u64 nr_pages) { - const struct pkvm_mem_transition *tx = &share->tx; - u64 completer_addr; + u64 phys = hyp_pfn_to_phys(pfn); + u64 size = PAGE_SIZE * nr_pages; int ret; - switch (tx->initiator.id) { - case PKVM_ID_HOST: - ret = host_initiate_unshare(&completer_addr, tx); - break; - default: - ret = -EINVAL; - } - - if (ret) - return ret; - - switch (tx->completer.id) { - case PKVM_ID_HYP: - ret = hyp_complete_unshare(completer_addr, tx); - break; - case PKVM_ID_FFA: - /* See __do_share() */ - ret = 0; - break; - default: - ret = -EINVAL; - } + host_lock_component(); + ret = __host_check_page_state_range(phys, size, PKVM_PAGE_OWNED); + if (!ret) + ret = __host_set_page_state_range(phys, size, PKVM_PAGE_SHARED_OWNED); + host_unlock_component(); return ret; } -/* - * do_unshare(): - * - * The page owner revokes access from another component for a range of - * pages which were previously shared using do_share(). - * - * Initiator: SHARED_OWNED => OWNED - * Completer: SHARED_BORROWED => NOPAGE - */ -static int do_unshare(struct pkvm_mem_share *share) +int __pkvm_host_unshare_ffa(u64 pfn, u64 nr_pages) { + u64 phys = hyp_pfn_to_phys(pfn); + u64 size = PAGE_SIZE * nr_pages; int ret; - ret = check_unshare(share); - if (ret) - return ret; + host_lock_component(); + ret = __host_check_page_state_range(phys, size, PKVM_PAGE_SHARED_OWNED); + if (!ret) + ret = __host_set_page_state_range(phys, size, PKVM_PAGE_OWNED); + host_unlock_component(); - return WARN_ON(__do_unshare(share)); + return ret; } -static int check_donation(struct pkvm_mem_donation *donation) +int __pkvm_host_share_guest(u64 pfn, u64 gfn, struct pkvm_hyp_vcpu *vcpu, + enum kvm_pgtable_prot prot) { - const struct pkvm_mem_transition *tx = &donation->tx; - u64 completer_addr; + struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu); + u64 phys = hyp_pfn_to_phys(pfn); + u64 ipa = hyp_pfn_to_phys(gfn); + struct hyp_page *page; int ret; - switch (tx->initiator.id) { - case PKVM_ID_HOST: - ret = host_request_owned_transition(&completer_addr, tx); - break; - case PKVM_ID_HYP: - ret = hyp_request_donation(&completer_addr, tx); - break; - default: - ret = -EINVAL; - } + if (prot & ~KVM_PGTABLE_PROT_RWX) + return -EINVAL; + ret = check_range_allowed_memory(phys, phys + PAGE_SIZE); if (ret) return ret; - switch (tx->completer.id) { - case PKVM_ID_HOST: - ret = host_ack_donation(completer_addr, tx); - break; - case PKVM_ID_HYP: - ret = hyp_ack_donation(completer_addr, tx); - break; - default: - ret = -EINVAL; - } - - return ret; -} + host_lock_component(); + guest_lock_component(vm); -static int __do_donate(struct pkvm_mem_donation *donation) -{ - const struct pkvm_mem_transition *tx = &donation->tx; - u64 completer_addr; - int ret; + ret = __guest_check_page_state_range(vcpu, ipa, PAGE_SIZE, PKVM_NOPAGE); + if (ret) + goto unlock; - switch (tx->initiator.id) { - case PKVM_ID_HOST: - ret = host_initiate_donation(&completer_addr, tx); - break; - case PKVM_ID_HYP: - ret = hyp_initiate_donation(&completer_addr, tx); + page = hyp_phys_to_page(phys); + switch (page->host_state) { + case PKVM_PAGE_OWNED: + WARN_ON(__host_set_page_state_range(phys, PAGE_SIZE, PKVM_PAGE_SHARED_OWNED)); break; + case PKVM_PAGE_SHARED_OWNED: + if (page->host_share_guest_count) + break; + /* Only host to np-guest multi-sharing is tolerated */ + WARN_ON(1); + fallthrough; default: - ret = -EINVAL; + ret = -EPERM; + goto unlock; } - if (ret) - return ret; + WARN_ON(kvm_pgtable_stage2_map(&vm->pgt, ipa, PAGE_SIZE, phys, + pkvm_mkstate(prot, PKVM_PAGE_SHARED_BORROWED), + &vcpu->vcpu.arch.pkvm_memcache, 0)); + page->host_share_guest_count++; - switch (tx->completer.id) { - case PKVM_ID_HOST: - ret = host_complete_donation(completer_addr, tx); - break; - case PKVM_ID_HYP: - ret = hyp_complete_donation(completer_addr, tx); - break; - default: - ret = -EINVAL; - } +unlock: + guest_unlock_component(vm); + host_unlock_component(); return ret; } -/* - * do_donate(): - * - * The page owner transfers ownership to another component, losing access - * as a consequence. - * - * Initiator: OWNED => NOPAGE - * Completer: NOPAGE => OWNED - */ -static int do_donate(struct pkvm_mem_donation *donation) +static int __check_host_shared_guest(struct pkvm_hyp_vm *vm, u64 *__phys, u64 ipa) { + enum pkvm_page_state state; + struct hyp_page *page; + kvm_pte_t pte; + u64 phys; + s8 level; int ret; - ret = check_donation(donation); + ret = kvm_pgtable_get_leaf(&vm->pgt, ipa, &pte, &level); if (ret) return ret; + if (level != KVM_PGTABLE_LAST_LEVEL) + return -E2BIG; + if (!kvm_pte_valid(pte)) + return -ENOENT; - return WARN_ON(__do_donate(donation)); -} - -int __pkvm_host_share_hyp(u64 pfn) -{ - int ret; - u64 host_addr = hyp_pfn_to_phys(pfn); - u64 hyp_addr = (u64)__hyp_va(host_addr); - struct pkvm_mem_share share = { - .tx = { - .nr_pages = 1, - .initiator = { - .id = PKVM_ID_HOST, - .addr = host_addr, - .host = { - .completer_addr = hyp_addr, - }, - }, - .completer = { - .id = PKVM_ID_HYP, - }, - }, - .completer_prot = PAGE_HYP, - }; + state = guest_get_page_state(pte, ipa); + if (state != PKVM_PAGE_SHARED_BORROWED) + return -EPERM; - host_lock_component(); - hyp_lock_component(); + phys = kvm_pte_to_phys(pte); + ret = check_range_allowed_memory(phys, phys + PAGE_SIZE); + if (WARN_ON(ret)) + return ret; - ret = do_share(&share); + page = hyp_phys_to_page(phys); + if (page->host_state != PKVM_PAGE_SHARED_OWNED) + return -EPERM; + if (WARN_ON(!page->host_share_guest_count)) + return -EINVAL; - hyp_unlock_component(); - host_unlock_component(); + *__phys = phys; - return ret; + return 0; } -int __pkvm_host_unshare_hyp(u64 pfn) +int __pkvm_host_unshare_guest(u64 gfn, struct pkvm_hyp_vm *vm) { + u64 ipa = hyp_pfn_to_phys(gfn); + struct hyp_page *page; + u64 phys; int ret; - u64 host_addr = hyp_pfn_to_phys(pfn); - u64 hyp_addr = (u64)__hyp_va(host_addr); - struct pkvm_mem_share share = { - .tx = { - .nr_pages = 1, - .initiator = { - .id = PKVM_ID_HOST, - .addr = host_addr, - .host = { - .completer_addr = hyp_addr, - }, - }, - .completer = { - .id = PKVM_ID_HYP, - }, - }, - .completer_prot = PAGE_HYP, - }; host_lock_component(); - hyp_lock_component(); + guest_lock_component(vm); - ret = do_unshare(&share); + ret = __check_host_shared_guest(vm, &phys, ipa); + if (ret) + goto unlock; - hyp_unlock_component(); + ret = kvm_pgtable_stage2_unmap(&vm->pgt, ipa, PAGE_SIZE); + if (ret) + goto unlock; + + page = hyp_phys_to_page(phys); + page->host_share_guest_count--; + if (!page->host_share_guest_count) + WARN_ON(__host_set_page_state_range(phys, PAGE_SIZE, PKVM_PAGE_OWNED)); + +unlock: + guest_unlock_component(vm); host_unlock_component(); return ret; } -int __pkvm_host_donate_hyp(u64 pfn, u64 nr_pages) +int __pkvm_host_relax_perms_guest(u64 gfn, struct pkvm_hyp_vcpu *vcpu, enum kvm_pgtable_prot prot) { + struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu); + u64 ipa = hyp_pfn_to_phys(gfn); + u64 phys; int ret; - u64 host_addr = hyp_pfn_to_phys(pfn); - u64 hyp_addr = (u64)__hyp_va(host_addr); - struct pkvm_mem_donation donation = { - .tx = { - .nr_pages = nr_pages, - .initiator = { - .id = PKVM_ID_HOST, - .addr = host_addr, - .host = { - .completer_addr = hyp_addr, - }, - }, - .completer = { - .id = PKVM_ID_HYP, - }, - }, - }; + + if (prot & ~KVM_PGTABLE_PROT_RWX) + return -EINVAL; host_lock_component(); - hyp_lock_component(); + guest_lock_component(vm); - ret = do_donate(&donation); + ret = __check_host_shared_guest(vm, &phys, ipa); + if (!ret) + ret = kvm_pgtable_stage2_relax_perms(&vm->pgt, ipa, prot, 0); - hyp_unlock_component(); + guest_unlock_component(vm); host_unlock_component(); return ret; } -int __pkvm_hyp_donate_host(u64 pfn, u64 nr_pages) +int __pkvm_host_wrprotect_guest(u64 gfn, struct pkvm_hyp_vm *vm) { + u64 ipa = hyp_pfn_to_phys(gfn); + u64 phys; int ret; - u64 host_addr = hyp_pfn_to_phys(pfn); - u64 hyp_addr = (u64)__hyp_va(host_addr); - struct pkvm_mem_donation donation = { - .tx = { - .nr_pages = nr_pages, - .initiator = { - .id = PKVM_ID_HYP, - .addr = hyp_addr, - .hyp = { - .completer_addr = host_addr, - }, - }, - .completer = { - .id = PKVM_ID_HOST, - }, - }, - }; host_lock_component(); - hyp_lock_component(); + guest_lock_component(vm); - ret = do_donate(&donation); + ret = __check_host_shared_guest(vm, &phys, ipa); + if (!ret) + ret = kvm_pgtable_stage2_wrprotect(&vm->pgt, ipa, PAGE_SIZE); - hyp_unlock_component(); + guest_unlock_component(vm); host_unlock_component(); return ret; } -int hyp_pin_shared_mem(void *from, void *to) +int __pkvm_host_test_clear_young_guest(u64 gfn, bool mkold, struct pkvm_hyp_vm *vm) { - u64 cur, start = ALIGN_DOWN((u64)from, PAGE_SIZE); - u64 end = PAGE_ALIGN((u64)to); - u64 size = end - start; + u64 ipa = hyp_pfn_to_phys(gfn); + u64 phys; int ret; host_lock_component(); - hyp_lock_component(); - - ret = __host_check_page_state_range(__hyp_pa(start), size, - PKVM_PAGE_SHARED_OWNED); - if (ret) - goto unlock; - - ret = __hyp_check_page_state_range(start, size, - PKVM_PAGE_SHARED_BORROWED); - if (ret) - goto unlock; + guest_lock_component(vm); - for (cur = start; cur < end; cur += PAGE_SIZE) - hyp_page_ref_inc(hyp_virt_to_page(cur)); + ret = __check_host_shared_guest(vm, &phys, ipa); + if (!ret) + ret = kvm_pgtable_stage2_test_clear_young(&vm->pgt, ipa, PAGE_SIZE, mkold); -unlock: - hyp_unlock_component(); + guest_unlock_component(vm); host_unlock_component(); return ret; } -void hyp_unpin_shared_mem(void *from, void *to) -{ - u64 cur, start = ALIGN_DOWN((u64)from, PAGE_SIZE); - u64 end = PAGE_ALIGN((u64)to); - - host_lock_component(); - hyp_lock_component(); - - for (cur = start; cur < end; cur += PAGE_SIZE) - hyp_page_ref_dec(hyp_virt_to_page(cur)); - - hyp_unlock_component(); - host_unlock_component(); -} - -int __pkvm_host_share_ffa(u64 pfn, u64 nr_pages) +int __pkvm_host_mkyoung_guest(u64 gfn, struct pkvm_hyp_vcpu *vcpu) { + struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu); + u64 ipa = hyp_pfn_to_phys(gfn); + u64 phys; int ret; - struct pkvm_mem_share share = { - .tx = { - .nr_pages = nr_pages, - .initiator = { - .id = PKVM_ID_HOST, - .addr = hyp_pfn_to_phys(pfn), - }, - .completer = { - .id = PKVM_ID_FFA, - }, - }, - }; host_lock_component(); - ret = do_share(&share); - host_unlock_component(); + guest_lock_component(vm); - return ret; -} + ret = __check_host_shared_guest(vm, &phys, ipa); + if (!ret) + kvm_pgtable_stage2_mkyoung(&vm->pgt, ipa, 0); -int __pkvm_host_unshare_ffa(u64 pfn, u64 nr_pages) -{ - int ret; - struct pkvm_mem_share share = { - .tx = { - .nr_pages = nr_pages, - .initiator = { - .id = PKVM_ID_HOST, - .addr = hyp_pfn_to_phys(pfn), - }, - .completer = { - .id = PKVM_ID_FFA, - }, - }, - }; - - host_lock_component(); - ret = do_unshare(&share); + guest_unlock_component(vm); host_unlock_component(); return ret; diff --git a/arch/arm64/kvm/hyp/nvhe/mm.c b/arch/arm64/kvm/hyp/nvhe/mm.c index 8850b591d775..f41c7440b34b 100644 --- a/arch/arm64/kvm/hyp/nvhe/mm.c +++ b/arch/arm64/kvm/hyp/nvhe/mm.c @@ -360,10 +360,10 @@ int pkvm_create_stack(phys_addr_t phys, unsigned long *haddr) prev_base = __io_map_base; /* - * Efficient stack verification using the PAGE_SHIFT bit implies + * Efficient stack verification using the NVHE_STACK_SHIFT bit implies * an alignment of our allocation on the order of the size. */ - size = PAGE_SIZE * 2; + size = NVHE_STACK_SIZE * 2; addr = ALIGN(__io_map_base, size); ret = __pkvm_alloc_private_va_range(addr, size); @@ -373,12 +373,12 @@ int pkvm_create_stack(phys_addr_t phys, unsigned long *haddr) * at the higher address and leave the lower guard page * unbacked. * - * Any valid stack address now has the PAGE_SHIFT bit as 1 + * Any valid stack address now has the NVHE_STACK_SHIFT bit as 1 * and addresses corresponding to the guard page have the - * PAGE_SHIFT bit as 0 - this is used for overflow detection. + * NVHE_STACK_SHIFT bit as 0 - this is used for overflow detection. */ - ret = kvm_pgtable_hyp_map(&pkvm_pgtable, addr + PAGE_SIZE, - PAGE_SIZE, phys, PAGE_HYP); + ret = kvm_pgtable_hyp_map(&pkvm_pgtable, addr + NVHE_STACK_SIZE, + NVHE_STACK_SIZE, phys, PAGE_HYP); if (ret) __io_map_base = prev_base; } diff --git a/arch/arm64/kvm/hyp/nvhe/page_alloc.c b/arch/arm64/kvm/hyp/nvhe/page_alloc.c index e691290d3765..a1eb27a1a747 100644 --- a/arch/arm64/kvm/hyp/nvhe/page_alloc.c +++ b/arch/arm64/kvm/hyp/nvhe/page_alloc.c @@ -32,7 +32,7 @@ u64 __hyp_vmemmap; */ static struct hyp_page *__find_buddy_nocheck(struct hyp_pool *pool, struct hyp_page *p, - unsigned short order) + u8 order) { phys_addr_t addr = hyp_page_to_phys(p); @@ -51,7 +51,7 @@ static struct hyp_page *__find_buddy_nocheck(struct hyp_pool *pool, /* Find a buddy page currently available for allocation */ static struct hyp_page *__find_buddy_avail(struct hyp_pool *pool, struct hyp_page *p, - unsigned short order) + u8 order) { struct hyp_page *buddy = __find_buddy_nocheck(pool, p, order); @@ -94,7 +94,7 @@ static void __hyp_attach_page(struct hyp_pool *pool, struct hyp_page *p) { phys_addr_t phys = hyp_page_to_phys(p); - unsigned short order = p->order; + u8 order = p->order; struct hyp_page *buddy; memset(hyp_page_to_virt(p), 0, PAGE_SIZE << p->order); @@ -129,7 +129,7 @@ insert: static struct hyp_page *__hyp_extract_page(struct hyp_pool *pool, struct hyp_page *p, - unsigned short order) + u8 order) { struct hyp_page *buddy; @@ -183,7 +183,7 @@ void hyp_get_page(struct hyp_pool *pool, void *addr) void hyp_split_page(struct hyp_page *p) { - unsigned short order = p->order; + u8 order = p->order; unsigned int i; p->order = 0; @@ -195,10 +195,10 @@ void hyp_split_page(struct hyp_page *p) } } -void *hyp_alloc_pages(struct hyp_pool *pool, unsigned short order) +void *hyp_alloc_pages(struct hyp_pool *pool, u8 order) { - unsigned short i = order; struct hyp_page *p; + u8 i = order; hyp_spin_lock(&pool->lock); diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c index 26dd9a20ad6e..3927fe52a3dd 100644 --- a/arch/arm64/kvm/hyp/nvhe/pkvm.c +++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c @@ -6,7 +6,9 @@ #include <linux/kvm_host.h> #include <linux/mm.h> -#include <nvhe/fixed_config.h> + +#include <asm/kvm_emulate.h> + #include <nvhe/mem_protect.h> #include <nvhe/memory.h> #include <nvhe/pkvm.h> @@ -18,198 +20,163 @@ unsigned long __icache_flags; /* Used by kvm_get_vttbr(). */ unsigned int kvm_arm_vmid_bits; +unsigned int kvm_host_sve_max_vl; + /* - * Set trap register values based on features in ID_AA64PFR0. + * The currently loaded hyp vCPU for each physical CPU. Used only when + * protected KVM is enabled, but for both protected and non-protected VMs. */ -static void pvm_init_traps_aa64pfr0(struct kvm_vcpu *vcpu) -{ - const u64 feature_ids = pvm_read_id_reg(vcpu, SYS_ID_AA64PFR0_EL1); - u64 hcr_set = HCR_RW; - u64 hcr_clear = 0; - u64 cptr_set = 0; - u64 cptr_clear = 0; +static DEFINE_PER_CPU(struct pkvm_hyp_vcpu *, loaded_hyp_vcpu); - /* Protected KVM does not support AArch32 guests. */ - BUILD_BUG_ON(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL0), - PVM_ID_AA64PFR0_RESTRICT_UNSIGNED) != ID_AA64PFR0_EL1_ELx_64BIT_ONLY); - BUILD_BUG_ON(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL1), - PVM_ID_AA64PFR0_RESTRICT_UNSIGNED) != ID_AA64PFR0_EL1_ELx_64BIT_ONLY); - - /* - * Linux guests assume support for floating-point and Advanced SIMD. Do - * not change the trapping behavior for these from the KVM default. - */ - BUILD_BUG_ON(!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_FP), - PVM_ID_AA64PFR0_ALLOW)); - BUILD_BUG_ON(!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_AdvSIMD), - PVM_ID_AA64PFR0_ALLOW)); +static void pkvm_vcpu_reset_hcr(struct kvm_vcpu *vcpu) +{ + vcpu->arch.hcr_el2 = HCR_GUEST_FLAGS; if (has_hvhe()) - hcr_set |= HCR_E2H; + vcpu->arch.hcr_el2 |= HCR_E2H; - /* Trap RAS unless all current versions are supported */ - if (FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_RAS), feature_ids) < - ID_AA64PFR0_EL1_RAS_V1P1) { - hcr_set |= HCR_TERR | HCR_TEA; - hcr_clear |= HCR_FIEN; + if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN)) { + /* route synchronous external abort exceptions to EL2 */ + vcpu->arch.hcr_el2 |= HCR_TEA; + /* trap error record accesses */ + vcpu->arch.hcr_el2 |= HCR_TERR; } - /* Trap AMU */ - if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_AMU), feature_ids)) { - hcr_clear |= HCR_AMVOFFEN; - cptr_set |= CPTR_EL2_TAM; - } + if (cpus_have_final_cap(ARM64_HAS_STAGE2_FWB)) + vcpu->arch.hcr_el2 |= HCR_FWB; - /* Trap SVE */ - if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_SVE), feature_ids)) { - if (has_hvhe()) - cptr_clear |= CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN; - else - cptr_set |= CPTR_EL2_TZ; - } + if (cpus_have_final_cap(ARM64_HAS_EVT) && + !cpus_have_final_cap(ARM64_MISMATCHED_CACHE_TYPE)) + vcpu->arch.hcr_el2 |= HCR_TID4; + else + vcpu->arch.hcr_el2 |= HCR_TID2; + + if (vcpu_has_ptrauth(vcpu)) + vcpu->arch.hcr_el2 |= (HCR_API | HCR_APK); - vcpu->arch.hcr_el2 |= hcr_set; - vcpu->arch.hcr_el2 &= ~hcr_clear; - vcpu->arch.cptr_el2 |= cptr_set; - vcpu->arch.cptr_el2 &= ~cptr_clear; + if (kvm_has_mte(vcpu->kvm)) + vcpu->arch.hcr_el2 |= HCR_ATA; } -/* - * Set trap register values based on features in ID_AA64PFR1. - */ -static void pvm_init_traps_aa64pfr1(struct kvm_vcpu *vcpu) +static void pvm_init_traps_hcr(struct kvm_vcpu *vcpu) { - const u64 feature_ids = pvm_read_id_reg(vcpu, SYS_ID_AA64PFR1_EL1); - u64 hcr_set = 0; - u64 hcr_clear = 0; + struct kvm *kvm = vcpu->kvm; + u64 val = vcpu->arch.hcr_el2; - /* Memory Tagging: Trap and Treat as Untagged if not supported. */ - if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MTE), feature_ids)) { - hcr_set |= HCR_TID5; - hcr_clear |= HCR_DCT | HCR_ATA; + /* No support for AArch32. */ + val |= HCR_RW; + + /* + * Always trap: + * - Feature id registers: to control features exposed to guests + * - Implementation-defined features + */ + val |= HCR_TACR | HCR_TIDCP | HCR_TID3 | HCR_TID1; + + if (!kvm_has_feat(kvm, ID_AA64PFR0_EL1, RAS, IMP)) { + val |= HCR_TERR | HCR_TEA; + val &= ~(HCR_FIEN); } - vcpu->arch.hcr_el2 |= hcr_set; - vcpu->arch.hcr_el2 &= ~hcr_clear; -} + if (!kvm_has_feat(kvm, ID_AA64PFR0_EL1, AMU, IMP)) + val &= ~(HCR_AMVOFFEN); -/* - * Set trap register values based on features in ID_AA64DFR0. - */ -static void pvm_init_traps_aa64dfr0(struct kvm_vcpu *vcpu) -{ - const u64 feature_ids = pvm_read_id_reg(vcpu, SYS_ID_AA64DFR0_EL1); - u64 mdcr_set = 0; - u64 mdcr_clear = 0; - u64 cptr_set = 0; - - /* Trap/constrain PMU */ - if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer), feature_ids)) { - mdcr_set |= MDCR_EL2_TPM | MDCR_EL2_TPMCR; - mdcr_clear |= MDCR_EL2_HPME | MDCR_EL2_MTPME | - MDCR_EL2_HPMN_MASK; + if (!kvm_has_feat(kvm, ID_AA64PFR1_EL1, MTE, IMP)) { + val |= HCR_TID5; + val &= ~(HCR_DCT | HCR_ATA); } - /* Trap Debug */ - if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_DebugVer), feature_ids)) - mdcr_set |= MDCR_EL2_TDRA | MDCR_EL2_TDA | MDCR_EL2_TDE; + if (!kvm_has_feat(kvm, ID_AA64MMFR1_EL1, LO, IMP)) + val |= HCR_TLOR; + + vcpu->arch.hcr_el2 = val; +} - /* Trap OS Double Lock */ - if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_DoubleLock), feature_ids)) - mdcr_set |= MDCR_EL2_TDOSA; +static void pvm_init_traps_mdcr(struct kvm_vcpu *vcpu) +{ + struct kvm *kvm = vcpu->kvm; + u64 val = vcpu->arch.mdcr_el2; - /* Trap SPE */ - if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMSVer), feature_ids)) { - mdcr_set |= MDCR_EL2_TPMS; - mdcr_clear |= MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT; + if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, PMUVer, IMP)) { + val |= MDCR_EL2_TPM | MDCR_EL2_TPMCR; + val &= ~(MDCR_EL2_HPME | MDCR_EL2_MTPME | MDCR_EL2_HPMN_MASK); } - /* Trap Trace Filter */ - if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_TraceFilt), feature_ids)) - mdcr_set |= MDCR_EL2_TTRF; + if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, DebugVer, IMP)) + val |= MDCR_EL2_TDRA | MDCR_EL2_TDA; - /* Trap Trace */ - if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_TraceVer), feature_ids)) { - if (has_hvhe()) - cptr_set |= CPACR_EL1_TTA; - else - cptr_set |= CPTR_EL2_TTA; - } + if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, DoubleLock, IMP)) + val |= MDCR_EL2_TDOSA; - /* Trap External Trace */ - if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_ExtTrcBuff), feature_ids)) - mdcr_clear |= MDCR_EL2_E2TB_MASK << MDCR_EL2_E2TB_SHIFT; + if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, PMSVer, IMP)) { + val |= MDCR_EL2_TPMS; + val &= ~MDCR_EL2_E2PB_MASK; + } - vcpu->arch.mdcr_el2 |= mdcr_set; - vcpu->arch.mdcr_el2 &= ~mdcr_clear; - vcpu->arch.cptr_el2 |= cptr_set; -} + if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, TraceFilt, IMP)) + val |= MDCR_EL2_TTRF; -/* - * Set trap register values based on features in ID_AA64MMFR0. - */ -static void pvm_init_traps_aa64mmfr0(struct kvm_vcpu *vcpu) -{ - const u64 feature_ids = pvm_read_id_reg(vcpu, SYS_ID_AA64MMFR0_EL1); - u64 mdcr_set = 0; + if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, ExtTrcBuff, IMP)) + val |= MDCR_EL2_E2TB_MASK; /* Trap Debug Communications Channel registers */ - if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_FGT), feature_ids)) - mdcr_set |= MDCR_EL2_TDCC; + if (!kvm_has_feat(kvm, ID_AA64MMFR0_EL1, FGT, IMP)) + val |= MDCR_EL2_TDCC; - vcpu->arch.mdcr_el2 |= mdcr_set; + vcpu->arch.mdcr_el2 = val; } /* - * Set trap register values based on features in ID_AA64MMFR1. + * Check that cpu features that are neither trapped nor supported are not + * enabled for protected VMs. */ -static void pvm_init_traps_aa64mmfr1(struct kvm_vcpu *vcpu) +static int pkvm_check_pvm_cpu_features(struct kvm_vcpu *vcpu) { - const u64 feature_ids = pvm_read_id_reg(vcpu, SYS_ID_AA64MMFR1_EL1); - u64 hcr_set = 0; - - /* Trap LOR */ - if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_LO), feature_ids)) - hcr_set |= HCR_TLOR; + struct kvm *kvm = vcpu->kvm; - vcpu->arch.hcr_el2 |= hcr_set; -} - -/* - * Set baseline trap register values. - */ -static void pvm_init_trap_regs(struct kvm_vcpu *vcpu) -{ - const u64 hcr_trap_feat_regs = HCR_TID3; - const u64 hcr_trap_impdef = HCR_TACR | HCR_TIDCP | HCR_TID1; + /* Protected KVM does not support AArch32 guests. */ + if (kvm_has_feat(kvm, ID_AA64PFR0_EL1, EL0, AARCH32) || + kvm_has_feat(kvm, ID_AA64PFR0_EL1, EL1, AARCH32)) + return -EINVAL; /* - * Always trap: - * - Feature id registers: to control features exposed to guests - * - Implementation-defined features + * Linux guests assume support for floating-point and Advanced SIMD. Do + * not change the trapping behavior for these from the KVM default. */ - vcpu->arch.hcr_el2 |= hcr_trap_feat_regs | hcr_trap_impdef; - - /* Clear res0 and set res1 bits to trap potential new features. */ - vcpu->arch.hcr_el2 &= ~(HCR_RES0); - vcpu->arch.mdcr_el2 &= ~(MDCR_EL2_RES0); - if (!has_hvhe()) { - vcpu->arch.cptr_el2 |= CPTR_NVHE_EL2_RES1; - vcpu->arch.cptr_el2 &= ~(CPTR_NVHE_EL2_RES0); - } + if (!kvm_has_feat(kvm, ID_AA64PFR0_EL1, FP, IMP) || + !kvm_has_feat(kvm, ID_AA64PFR0_EL1, AdvSIMD, IMP)) + return -EINVAL; + + /* No SME support in KVM right now. Check to catch if it changes. */ + if (kvm_has_feat(kvm, ID_AA64PFR1_EL1, SME, IMP)) + return -EINVAL; + + return 0; } /* - * Initialize trap register values for protected VMs. + * Initialize trap register values in protected mode. */ -void __pkvm_vcpu_init_traps(struct kvm_vcpu *vcpu) +static int pkvm_vcpu_init_traps(struct pkvm_hyp_vcpu *hyp_vcpu) { - pvm_init_trap_regs(vcpu); - pvm_init_traps_aa64pfr0(vcpu); - pvm_init_traps_aa64pfr1(vcpu); - pvm_init_traps_aa64dfr0(vcpu); - pvm_init_traps_aa64mmfr0(vcpu); - pvm_init_traps_aa64mmfr1(vcpu); + struct kvm_vcpu *vcpu = &hyp_vcpu->vcpu; + int ret; + + vcpu->arch.mdcr_el2 = 0; + + pkvm_vcpu_reset_hcr(vcpu); + + if ((!pkvm_hyp_vcpu_is_protected(hyp_vcpu))) + return 0; + + ret = pkvm_check_pvm_cpu_features(vcpu); + if (ret) + return ret; + + pvm_init_traps_hcr(vcpu); + pvm_init_traps_mdcr(vcpu); + + return 0; } /* @@ -230,10 +197,10 @@ static pkvm_handle_t idx_to_vm_handle(unsigned int idx) /* * Spinlock for protecting state related to the VM table. Protects writes - * to 'vm_table' and 'nr_table_entries' as well as reads and writes to - * 'last_hyp_vcpu_lookup'. + * to 'vm_table', 'nr_table_entries', and other per-vm state on initialization. + * Also protects reads and writes to 'last_hyp_vcpu_lookup'. */ -static DEFINE_HYP_SPINLOCK(vm_table_lock); +DEFINE_HYP_SPINLOCK(vm_table_lock); /* * The table of VM entries for protected VMs in hyp. @@ -266,15 +233,30 @@ struct pkvm_hyp_vcpu *pkvm_load_hyp_vcpu(pkvm_handle_t handle, struct pkvm_hyp_vcpu *hyp_vcpu = NULL; struct pkvm_hyp_vm *hyp_vm; + /* Cannot load a new vcpu without putting the old one first. */ + if (__this_cpu_read(loaded_hyp_vcpu)) + return NULL; + hyp_spin_lock(&vm_table_lock); hyp_vm = get_vm_by_handle(handle); if (!hyp_vm || hyp_vm->nr_vcpus <= vcpu_idx) goto unlock; hyp_vcpu = hyp_vm->vcpus[vcpu_idx]; + + /* Ensure vcpu isn't loaded on more than one cpu simultaneously. */ + if (unlikely(hyp_vcpu->loaded_hyp_vcpu)) { + hyp_vcpu = NULL; + goto unlock; + } + + hyp_vcpu->loaded_hyp_vcpu = this_cpu_ptr(&loaded_hyp_vcpu); hyp_page_ref_inc(hyp_virt_to_page(hyp_vm)); unlock: hyp_spin_unlock(&vm_table_lock); + + if (hyp_vcpu) + __this_cpu_write(loaded_hyp_vcpu, hyp_vcpu); return hyp_vcpu; } @@ -283,10 +265,91 @@ void pkvm_put_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu) struct pkvm_hyp_vm *hyp_vm = pkvm_hyp_vcpu_to_hyp_vm(hyp_vcpu); hyp_spin_lock(&vm_table_lock); + hyp_vcpu->loaded_hyp_vcpu = NULL; + __this_cpu_write(loaded_hyp_vcpu, NULL); + hyp_page_ref_dec(hyp_virt_to_page(hyp_vm)); + hyp_spin_unlock(&vm_table_lock); +} + +struct pkvm_hyp_vcpu *pkvm_get_loaded_hyp_vcpu(void) +{ + return __this_cpu_read(loaded_hyp_vcpu); + +} + +struct pkvm_hyp_vm *get_pkvm_hyp_vm(pkvm_handle_t handle) +{ + struct pkvm_hyp_vm *hyp_vm; + + hyp_spin_lock(&vm_table_lock); + hyp_vm = get_vm_by_handle(handle); + if (hyp_vm) + hyp_page_ref_inc(hyp_virt_to_page(hyp_vm)); + hyp_spin_unlock(&vm_table_lock); + + return hyp_vm; +} + +void put_pkvm_hyp_vm(struct pkvm_hyp_vm *hyp_vm) +{ + hyp_spin_lock(&vm_table_lock); hyp_page_ref_dec(hyp_virt_to_page(hyp_vm)); hyp_spin_unlock(&vm_table_lock); } +struct pkvm_hyp_vm *get_np_pkvm_hyp_vm(pkvm_handle_t handle) +{ + struct pkvm_hyp_vm *hyp_vm = get_pkvm_hyp_vm(handle); + + if (hyp_vm && pkvm_hyp_vm_is_protected(hyp_vm)) { + put_pkvm_hyp_vm(hyp_vm); + hyp_vm = NULL; + } + + return hyp_vm; +} + +static void pkvm_init_features_from_host(struct pkvm_hyp_vm *hyp_vm, const struct kvm *host_kvm) +{ + struct kvm *kvm = &hyp_vm->kvm; + unsigned long host_arch_flags = READ_ONCE(host_kvm->arch.flags); + DECLARE_BITMAP(allowed_features, KVM_VCPU_MAX_FEATURES); + + if (test_bit(KVM_ARCH_FLAG_MTE_ENABLED, &host_kvm->arch.flags)) + set_bit(KVM_ARCH_FLAG_MTE_ENABLED, &kvm->arch.flags); + + /* No restrictions for non-protected VMs. */ + if (!kvm_vm_is_protected(kvm)) { + hyp_vm->kvm.arch.flags = host_arch_flags; + + bitmap_copy(kvm->arch.vcpu_features, + host_kvm->arch.vcpu_features, + KVM_VCPU_MAX_FEATURES); + return; + } + + bitmap_zero(allowed_features, KVM_VCPU_MAX_FEATURES); + + set_bit(KVM_ARM_VCPU_PSCI_0_2, allowed_features); + + if (kvm_pvm_ext_allowed(KVM_CAP_ARM_PMU_V3)) + set_bit(KVM_ARM_VCPU_PMU_V3, allowed_features); + + if (kvm_pvm_ext_allowed(KVM_CAP_ARM_PTRAUTH_ADDRESS)) + set_bit(KVM_ARM_VCPU_PTRAUTH_ADDRESS, allowed_features); + + if (kvm_pvm_ext_allowed(KVM_CAP_ARM_PTRAUTH_GENERIC)) + set_bit(KVM_ARM_VCPU_PTRAUTH_GENERIC, allowed_features); + + if (kvm_pvm_ext_allowed(KVM_CAP_ARM_SVE)) { + set_bit(KVM_ARM_VCPU_SVE, allowed_features); + kvm->arch.flags |= host_arch_flags & BIT(KVM_ARCH_FLAG_GUEST_HAS_SVE); + } + + bitmap_and(kvm->arch.vcpu_features, host_kvm->arch.vcpu_features, + allowed_features, KVM_VCPU_MAX_FEATURES); +} + static void unpin_host_vcpu(struct kvm_vcpu *host_vcpu) { if (host_vcpu) @@ -308,6 +371,17 @@ static void init_pkvm_hyp_vm(struct kvm *host_kvm, struct pkvm_hyp_vm *hyp_vm, hyp_vm->host_kvm = host_kvm; hyp_vm->kvm.created_vcpus = nr_vcpus; hyp_vm->kvm.arch.mmu.vtcr = host_mmu.arch.mmu.vtcr; + hyp_vm->kvm.arch.pkvm.enabled = READ_ONCE(host_kvm->arch.pkvm.enabled); + hyp_vm->kvm.arch.flags = 0; + pkvm_init_features_from_host(hyp_vm, host_kvm); +} + +static void pkvm_vcpu_init_sve(struct pkvm_hyp_vcpu *hyp_vcpu, struct kvm_vcpu *host_vcpu) +{ + struct kvm_vcpu *vcpu = &hyp_vcpu->vcpu; + + if (!vcpu_has_feature(vcpu, KVM_ARM_VCPU_SVE)) + vcpu_clear_flag(vcpu, VCPU_SVE_FINALIZED); } static int init_pkvm_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu, @@ -333,6 +407,16 @@ static int init_pkvm_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu, hyp_vcpu->vcpu.arch.hw_mmu = &hyp_vm->kvm.arch.mmu; hyp_vcpu->vcpu.arch.cflags = READ_ONCE(host_vcpu->arch.cflags); + hyp_vcpu->vcpu.arch.mp_state.mp_state = KVM_MP_STATE_STOPPED; + + if (pkvm_hyp_vcpu_is_protected(hyp_vcpu)) + kvm_init_pvm_id_regs(&hyp_vcpu->vcpu); + + ret = pkvm_vcpu_init_traps(hyp_vcpu); + if (ret) + goto done; + + pkvm_vcpu_init_sve(hyp_vcpu, host_vcpu); done: if (ret) unpin_host_vcpu(host_vcpu); @@ -430,6 +514,7 @@ static void *map_donated_memory(unsigned long host_va, size_t size) static void __unmap_donated_memory(void *va, size_t size) { + kvm_flush_dcache_to_poc(va, size); WARN_ON(__pkvm_hyp_donate_host(hyp_virt_to_pfn(va), PAGE_ALIGN(size) >> PAGE_SHIFT)); } @@ -571,10 +656,12 @@ int __pkvm_init_vcpu(pkvm_handle_t handle, struct kvm_vcpu *host_vcpu, unlock: hyp_spin_unlock(&vm_table_lock); - if (ret) + if (ret) { unmap_donated_memory(hyp_vcpu, sizeof(*hyp_vcpu)); + return ret; + } - return ret; + return 0; } static void @@ -625,6 +712,14 @@ int __pkvm_teardown_vm(pkvm_handle_t handle) /* Push the metadata pages to the teardown memcache */ for (idx = 0; idx < hyp_vm->nr_vcpus; ++idx) { struct pkvm_hyp_vcpu *hyp_vcpu = hyp_vm->vcpus[idx]; + struct kvm_hyp_memcache *vcpu_mc = &hyp_vcpu->vcpu.arch.pkvm_memcache; + + while (vcpu_mc->nr_pages) { + void *addr = pop_hyp_memcache(vcpu_mc, hyp_phys_to_virt); + + push_hyp_memcache(mc, addr, hyp_virt_to_phys); + unmap_donated_memory_noclear(addr, PAGE_SIZE); + } teardown_donated_memory(mc, hyp_vcpu, sizeof(*hyp_vcpu)); } diff --git a/arch/arm64/kvm/hyp/nvhe/psci-relay.c b/arch/arm64/kvm/hyp/nvhe/psci-relay.c index d57bcb6ab94d..9c2ce1e0e99a 100644 --- a/arch/arm64/kvm/hyp/nvhe/psci-relay.c +++ b/arch/arm64/kvm/hyp/nvhe/psci-relay.c @@ -205,7 +205,7 @@ asmlinkage void __noreturn __kvm_host_psci_cpu_entry(bool is_cpu_on) struct psci_boot_args *boot_args; struct kvm_cpu_context *host_ctxt; - host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; + host_ctxt = host_data_ptr(host_ctxt); if (is_cpu_on) boot_args = this_cpu_ptr(&cpu_on_args); @@ -265,6 +265,8 @@ static unsigned long psci_1_0_handler(u64 func_id, struct kvm_cpu_context *host_ case PSCI_1_0_FN_PSCI_FEATURES: case PSCI_1_0_FN_SET_SUSPEND_MODE: case PSCI_1_1_FN64_SYSTEM_RESET2: + case PSCI_1_3_FN_SYSTEM_OFF2: + case PSCI_1_3_FN64_SYSTEM_OFF2: return psci_forward(host_ctxt); case PSCI_1_0_FN64_SYSTEM_SUSPEND: return psci_system_suspend(func_id, host_ctxt); diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c index bc58d1b515af..d62bcb5634a2 100644 --- a/arch/arm64/kvm/hyp/nvhe/setup.c +++ b/arch/arm64/kvm/hyp/nvhe/setup.c @@ -12,7 +12,6 @@ #include <nvhe/early_alloc.h> #include <nvhe/ffa.h> -#include <nvhe/fixed_config.h> #include <nvhe/gfp.h> #include <nvhe/memory.h> #include <nvhe/mem_protect.h> @@ -67,13 +66,34 @@ static int divide_memory_pool(void *virt, unsigned long size) return 0; } +static int pkvm_create_host_sve_mappings(void) +{ + void *start, *end; + int ret, i; + + if (!system_supports_sve()) + return 0; + + for (i = 0; i < hyp_nr_cpus; i++) { + struct kvm_host_data *host_data = per_cpu_ptr(&kvm_host_data, i); + struct cpu_sve_state *sve_state = host_data->sve_state; + + start = kern_hyp_va(sve_state); + end = start + PAGE_ALIGN(pkvm_host_sve_state_size()); + ret = pkvm_create_mappings(start, end, PAGE_HYP); + if (ret) + return ret; + } + + return 0; +} + static int recreate_hyp_mappings(phys_addr_t phys, unsigned long size, unsigned long *per_cpu_base, u32 hyp_va_bits) { void *start, *end, *virt = hyp_phys_to_virt(phys); unsigned long pgt_size = hyp_s1_pgtable_pages() << PAGE_SHIFT; - enum kvm_pgtable_prot prot; int ret, i; /* Recreate the hyp page-table using the early page allocator */ @@ -125,22 +145,7 @@ static int recreate_hyp_mappings(phys_addr_t phys, unsigned long size, return ret; } - /* - * Map the host sections RO in the hypervisor, but transfer the - * ownership from the host to the hypervisor itself to make sure they - * can't be donated or shared with another entity. - * - * The ownership transition requires matching changes in the host - * stage-2. This will be done later (see finalize_host_mappings()) once - * the hyp_vmemmap is addressable. - */ - prot = pkvm_mkstate(PAGE_HYP_RO, PKVM_PAGE_SHARED_OWNED); - ret = pkvm_create_mappings(&kvm_vgic_global_state, - &kvm_vgic_global_state + 1, prot); - if (ret) - return ret; - - return 0; + return pkvm_create_host_sve_mappings(); } static void update_nvhe_init_params(void) @@ -174,7 +179,6 @@ static void hpool_put_page(void *addr) static int fix_host_ownership_walker(const struct kvm_pgtable_visit_ctx *ctx, enum kvm_pgtable_walk_flags visit) { - enum kvm_pgtable_prot prot; enum pkvm_page_state state; phys_addr_t phys; @@ -197,16 +201,16 @@ static int fix_host_ownership_walker(const struct kvm_pgtable_visit_ctx *ctx, case PKVM_PAGE_OWNED: return host_stage2_set_owner_locked(phys, PAGE_SIZE, PKVM_ID_HYP); case PKVM_PAGE_SHARED_OWNED: - prot = pkvm_mkstate(PKVM_HOST_MEM_PROT, PKVM_PAGE_SHARED_BORROWED); + hyp_phys_to_page(phys)->host_state = PKVM_PAGE_SHARED_BORROWED; break; case PKVM_PAGE_SHARED_BORROWED: - prot = pkvm_mkstate(PKVM_HOST_MEM_PROT, PKVM_PAGE_SHARED_OWNED); + hyp_phys_to_page(phys)->host_state = PKVM_PAGE_SHARED_OWNED; break; default: return -EINVAL; } - return host_stage2_idmap_locked(phys, PAGE_SIZE, prot); + return 0; } static int fix_hyp_pgtable_refcnt_walker(const struct kvm_pgtable_visit_ctx *ctx, @@ -257,8 +261,7 @@ static int fix_hyp_pgtable_refcnt(void) void __noreturn __pkvm_init_finalise(void) { - struct kvm_host_data *host_data = this_cpu_ptr(&kvm_host_data); - struct kvm_cpu_context *host_ctxt = &host_data->host_ctxt; + struct kvm_cpu_context *host_ctxt = host_data_ptr(host_ctxt); unsigned long nr_pages, reserved_pages, pfn; int ret; @@ -316,7 +319,7 @@ int __pkvm_init(phys_addr_t phys, unsigned long size, unsigned long nr_cpus, { struct kvm_nvhe_init_params *params; void *virt = hyp_phys_to_virt(phys); - void (*fn)(phys_addr_t params_pa, void *finalize_fn_va); + typeof(__pkvm_init_switch_pgd) *fn; int ret; BUG_ON(kvm_check_pvm_sysreg_table()); @@ -340,7 +343,7 @@ int __pkvm_init(phys_addr_t phys, unsigned long size, unsigned long nr_cpus, /* Jump in the idmap page to switch to the new page-tables */ params = this_cpu_ptr(&kvm_init_params); fn = (typeof(fn))__hyp_pa(__pkvm_init_switch_pgd); - fn(__hyp_pa(params), __pkvm_init_finalise); + fn(params->pgd_pa, params->stack_hyp_va, __pkvm_init_finalise); unreachable(); } diff --git a/arch/arm64/kvm/hyp/nvhe/stacktrace.c b/arch/arm64/kvm/hyp/nvhe/stacktrace.c index ed6b58b19cfa..5b6eeab1a774 100644 --- a/arch/arm64/kvm/hyp/nvhe/stacktrace.c +++ b/arch/arm64/kvm/hyp/nvhe/stacktrace.c @@ -28,7 +28,7 @@ static void hyp_prepare_backtrace(unsigned long fp, unsigned long pc) struct kvm_nvhe_stacktrace_info *stacktrace_info = this_cpu_ptr(&kvm_stacktrace_info); struct kvm_nvhe_init_params *params = this_cpu_ptr(&kvm_init_params); - stacktrace_info->stack_base = (unsigned long)(params->stack_hyp_va - PAGE_SIZE); + stacktrace_info->stack_base = (unsigned long)(params->stack_hyp_va - NVHE_STACK_SIZE); stacktrace_info->overflow_stack_base = (unsigned long)this_cpu_ptr(overflow_stack); stacktrace_info->fp = fp; stacktrace_info->pc = pc; @@ -54,7 +54,7 @@ static struct stack_info stackinfo_get_hyp(void) { struct kvm_nvhe_init_params *params = this_cpu_ptr(&kvm_init_params); unsigned long high = params->stack_hyp_va; - unsigned long low = high - PAGE_SIZE; + unsigned long low = high - NVHE_STACK_SIZE; return (struct stack_info) { .low = low, diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index c50f8459e4fc..6c846d033d24 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -26,7 +26,6 @@ #include <asm/debug-monitors.h> #include <asm/processor.h> -#include <nvhe/fixed_config.h> #include <nvhe/mem_protect.h> /* Non-VHE specific context */ @@ -36,34 +35,46 @@ DEFINE_PER_CPU(unsigned long, kvm_hyp_vector); extern void kvm_nvhe_prepare_backtrace(unsigned long fp, unsigned long pc); -static void __activate_traps(struct kvm_vcpu *vcpu) +static void __activate_cptr_traps(struct kvm_vcpu *vcpu) { - u64 val; + u64 val = CPTR_EL2_TAM; /* Same bit irrespective of E2H */ - ___activate_traps(vcpu); - __activate_traps_common(vcpu); + if (has_hvhe()) { + val |= CPACR_EL1_TTA; - val = vcpu->arch.cptr_el2; - val |= CPTR_EL2_TAM; /* Same bit irrespective of E2H */ - val |= has_hvhe() ? CPACR_EL1_TTA : CPTR_EL2_TTA; - if (cpus_have_final_cap(ARM64_SME)) { - if (has_hvhe()) - val &= ~(CPACR_EL1_SMEN_EL1EN | CPACR_EL1_SMEN_EL0EN); - else - val |= CPTR_EL2_TSM; - } + if (guest_owns_fp_regs()) { + val |= CPACR_EL1_FPEN; + if (vcpu_has_sve(vcpu)) + val |= CPACR_EL1_ZEN; + } + } else { + val |= CPTR_EL2_TTA | CPTR_NVHE_EL2_RES1; - if (!guest_owns_fp_regs(vcpu)) { - if (has_hvhe()) - val &= ~(CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN | - CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN); - else - val |= CPTR_EL2_TFP | CPTR_EL2_TZ; + /* + * Always trap SME since it's not supported in KVM. + * TSM is RES1 if SME isn't implemented. + */ + val |= CPTR_EL2_TSM; - __activate_traps_fpsimd32(vcpu); + if (!vcpu_has_sve(vcpu) || !guest_owns_fp_regs()) + val |= CPTR_EL2_TZ; + + if (!guest_owns_fp_regs()) + val |= CPTR_EL2_TFP; } + if (!guest_owns_fp_regs()) + __activate_traps_fpsimd32(vcpu); + kvm_write_cptr_el2(val); +} + +static void __activate_traps(struct kvm_vcpu *vcpu) +{ + ___activate_traps(vcpu, vcpu->arch.hcr_el2); + __activate_traps_common(vcpu); + __activate_cptr_traps(vcpu); + write_sysreg(__this_cpu_read(kvm_hyp_vector), vbar_el2); if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { @@ -174,14 +185,41 @@ static void __pmu_switch_to_host(struct kvm_vcpu *vcpu) static bool kvm_handle_pvm_sys64(struct kvm_vcpu *vcpu, u64 *exit_code) { /* - * Make sure we handle the exit for workarounds and ptrauth - * before the pKVM handling, as the latter could decide to - * UNDEF. + * Make sure we handle the exit for workarounds before the pKVM + * handling, as the latter could decide to UNDEF. */ return (kvm_hyp_handle_sysreg(vcpu, exit_code) || kvm_handle_pvm_sysreg(vcpu, exit_code)); } +static void kvm_hyp_save_fpsimd_host(struct kvm_vcpu *vcpu) +{ + /* + * Non-protected kvm relies on the host restoring its sve state. + * Protected kvm restores the host's sve state as not to reveal that + * fpsimd was used by a guest nor leak upper sve bits. + */ + if (unlikely(is_protected_kvm_enabled() && system_supports_sve())) { + __hyp_sve_save_host(); + + /* Re-enable SVE traps if not supported for the guest vcpu. */ + if (!vcpu_has_sve(vcpu)) + cpacr_clear_set(CPACR_EL1_ZEN, 0); + + } else { + __fpsimd_save_state(*host_data_ptr(fpsimd_state)); + } + + if (kvm_has_fpmr(kern_hyp_va(vcpu->kvm))) { + u64 val = read_sysreg_s(SYS_FPMR); + + if (unlikely(is_protected_kvm_enabled())) + *host_data_ptr(fpmr) = val; + else + **host_data_ptr(fpmr_ptr) = val; + } +} + static const exit_handler_fn hyp_exit_handlers[] = { [0 ... ESR_ELx_EC_MAX] = NULL, [ESR_ELx_EC_CP15_32] = kvm_hyp_handle_cp15_32, @@ -191,7 +229,6 @@ static const exit_handler_fn hyp_exit_handlers[] = { [ESR_ELx_EC_IABT_LOW] = kvm_hyp_handle_iabt_low, [ESR_ELx_EC_DABT_LOW] = kvm_hyp_handle_dabt_low, [ESR_ELx_EC_WATCHPT_LOW] = kvm_hyp_handle_watchpt_low, - [ESR_ELx_EC_PAC] = kvm_hyp_handle_ptrauth, [ESR_ELx_EC_MOPS] = kvm_hyp_handle_mops, }; @@ -203,13 +240,12 @@ static const exit_handler_fn pvm_exit_handlers[] = { [ESR_ELx_EC_IABT_LOW] = kvm_hyp_handle_iabt_low, [ESR_ELx_EC_DABT_LOW] = kvm_hyp_handle_dabt_low, [ESR_ELx_EC_WATCHPT_LOW] = kvm_hyp_handle_watchpt_low, - [ESR_ELx_EC_PAC] = kvm_hyp_handle_ptrauth, [ESR_ELx_EC_MOPS] = kvm_hyp_handle_mops, }; static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm_vcpu *vcpu) { - if (unlikely(kvm_vm_is_protected(kern_hyp_va(vcpu->kvm)))) + if (unlikely(vcpu_is_protected(vcpu))) return pvm_exit_handlers; return hyp_exit_handlers; @@ -228,9 +264,7 @@ static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm_vcpu *vcpu) */ static void early_exit_filter(struct kvm_vcpu *vcpu, u64 *exit_code) { - struct kvm *kvm = kern_hyp_va(vcpu->kvm); - - if (kvm_vm_is_protected(kvm) && vcpu_mode_is_32bit(vcpu)) { + if (unlikely(vcpu_is_protected(vcpu) && vcpu_mode_is_32bit(vcpu))) { /* * As we have caught the guest red-handed, decide that it isn't * fit for purpose anymore by making the vcpu invalid. The VMM @@ -264,7 +298,7 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu) pmr_sync(); } - host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; + host_ctxt = host_data_ptr(host_ctxt); host_ctxt->__hyp_running_vcpu = vcpu; guest_ctxt = &vcpu->arch.ctxt; @@ -337,7 +371,7 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu) __sysreg_restore_state_nvhe(host_ctxt); - if (vcpu->arch.fp_state == FP_STATE_GUEST_OWNED) + if (guest_owns_fp_regs()) __fpsimd_save_fpexc32(vcpu); __debug_switch_to_host(vcpu); @@ -367,7 +401,7 @@ asmlinkage void __noreturn hyp_panic(void) struct kvm_cpu_context *host_ctxt; struct kvm_vcpu *vcpu; - host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; + host_ctxt = host_data_ptr(host_ctxt); vcpu = host_ctxt->__hyp_running_vcpu; if (vcpu) { diff --git a/arch/arm64/kvm/hyp/nvhe/sys_regs.c b/arch/arm64/kvm/hyp/nvhe/sys_regs.c index edd969a1f36b..1ddd9ed3cbb3 100644 --- a/arch/arm64/kvm/hyp/nvhe/sys_regs.c +++ b/arch/arm64/kvm/hyp/nvhe/sys_regs.c @@ -11,7 +11,7 @@ #include <hyp/adjust_pc.h> -#include <nvhe/fixed_config.h> +#include <nvhe/pkvm.h> #include "../../sys_regs.h" @@ -28,222 +28,255 @@ u64 id_aa64mmfr1_el1_sys_val; u64 id_aa64mmfr2_el1_sys_val; u64 id_aa64smfr0_el1_sys_val; -/* - * Inject an unknown/undefined exception to an AArch64 guest while most of its - * sysregs are live. - */ -static void inject_undef64(struct kvm_vcpu *vcpu) -{ - u64 esr = (ESR_ELx_EC_UNKNOWN << ESR_ELx_EC_SHIFT); - - *vcpu_pc(vcpu) = read_sysreg_el2(SYS_ELR); - *vcpu_cpsr(vcpu) = read_sysreg_el2(SYS_SPSR); - - kvm_pend_exception(vcpu, EXCEPT_AA64_EL1_SYNC); - - __kvm_adjust_pc(vcpu); - - write_sysreg_el1(esr, SYS_ESR); - write_sysreg_el1(read_sysreg_el2(SYS_ELR), SYS_ELR); - write_sysreg_el2(*vcpu_pc(vcpu), SYS_ELR); - write_sysreg_el2(*vcpu_cpsr(vcpu), SYS_SPSR); -} - -/* - * Returns the restricted features values of the feature register based on the - * limitations in restrict_fields. - * A feature id field value of 0b0000 does not impose any restrictions. - * Note: Use only for unsigned feature field values. - */ -static u64 get_restricted_features_unsigned(u64 sys_reg_val, - u64 restrict_fields) -{ - u64 value = 0UL; - u64 mask = GENMASK_ULL(ARM64_FEATURE_FIELD_BITS - 1, 0); +struct pvm_ftr_bits { + bool sign; + u8 shift; + u8 width; + u8 max_val; + bool (*vm_supported)(const struct kvm *kvm); +}; - /* - * According to the Arm Architecture Reference Manual, feature fields - * use increasing values to indicate increases in functionality. - * Iterate over the restricted feature fields and calculate the minimum - * unsigned value between the one supported by the system, and what the - * value is being restricted to. - */ - while (sys_reg_val && restrict_fields) { - value |= min(sys_reg_val & mask, restrict_fields & mask); - sys_reg_val &= ~mask; - restrict_fields &= ~mask; - mask <<= ARM64_FEATURE_FIELD_BITS; +#define __MAX_FEAT_FUNC(id, fld, max, func, sgn) \ + { \ + .sign = sgn, \ + .shift = id##_##fld##_SHIFT, \ + .width = id##_##fld##_WIDTH, \ + .max_val = id##_##fld##_##max, \ + .vm_supported = func, \ } - return value; -} - -/* - * Functions that return the value of feature id registers for protected VMs - * based on allowed features, system features, and KVM support. - */ +#define MAX_FEAT_FUNC(id, fld, max, func) \ + __MAX_FEAT_FUNC(id, fld, max, func, id##_##fld##_SIGNED) -static u64 get_pvm_id_aa64pfr0(const struct kvm_vcpu *vcpu) -{ - u64 set_mask = 0; - u64 allow_mask = PVM_ID_AA64PFR0_ALLOW; +#define MAX_FEAT(id, fld, max) \ + MAX_FEAT_FUNC(id, fld, max, NULL) - set_mask |= get_restricted_features_unsigned(id_aa64pfr0_el1_sys_val, - PVM_ID_AA64PFR0_RESTRICT_UNSIGNED); +#define MAX_FEAT_ENUM(id, fld, max) \ + __MAX_FEAT_FUNC(id, fld, max, NULL, false) - return (id_aa64pfr0_el1_sys_val & allow_mask) | set_mask; -} +#define FEAT_END { .width = 0, } -static u64 get_pvm_id_aa64pfr1(const struct kvm_vcpu *vcpu) +static bool vm_has_ptrauth(const struct kvm *kvm) { - const struct kvm *kvm = (const struct kvm *)kern_hyp_va(vcpu->kvm); - u64 allow_mask = PVM_ID_AA64PFR1_ALLOW; - - if (!kvm_has_mte(kvm)) - allow_mask &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MTE); - - return id_aa64pfr1_el1_sys_val & allow_mask; -} - -static u64 get_pvm_id_aa64zfr0(const struct kvm_vcpu *vcpu) -{ - /* - * No support for Scalable Vectors, therefore, hyp has no sanitized - * copy of the feature id register. - */ - BUILD_BUG_ON(PVM_ID_AA64ZFR0_ALLOW != 0ULL); - return 0; -} - -static u64 get_pvm_id_aa64dfr0(const struct kvm_vcpu *vcpu) -{ - /* - * No support for debug, including breakpoints, and watchpoints, - * therefore, pKVM has no sanitized copy of the feature id register. - */ - BUILD_BUG_ON(PVM_ID_AA64DFR0_ALLOW != 0ULL); - return 0; -} - -static u64 get_pvm_id_aa64dfr1(const struct kvm_vcpu *vcpu) -{ - /* - * No support for debug, therefore, hyp has no sanitized copy of the - * feature id register. - */ - BUILD_BUG_ON(PVM_ID_AA64DFR1_ALLOW != 0ULL); - return 0; -} + if (!IS_ENABLED(CONFIG_ARM64_PTR_AUTH)) + return false; -static u64 get_pvm_id_aa64afr0(const struct kvm_vcpu *vcpu) -{ - /* - * No support for implementation defined features, therefore, hyp has no - * sanitized copy of the feature id register. - */ - BUILD_BUG_ON(PVM_ID_AA64AFR0_ALLOW != 0ULL); - return 0; + return (cpus_have_final_cap(ARM64_HAS_ADDRESS_AUTH) || + cpus_have_final_cap(ARM64_HAS_GENERIC_AUTH)) && + kvm_vcpu_has_feature(kvm, KVM_ARM_VCPU_PTRAUTH_GENERIC); } -static u64 get_pvm_id_aa64afr1(const struct kvm_vcpu *vcpu) +static bool vm_has_sve(const struct kvm *kvm) { - /* - * No support for implementation defined features, therefore, hyp has no - * sanitized copy of the feature id register. - */ - BUILD_BUG_ON(PVM_ID_AA64AFR1_ALLOW != 0ULL); - return 0; + return system_supports_sve() && kvm_vcpu_has_feature(kvm, KVM_ARM_VCPU_SVE); } -static u64 get_pvm_id_aa64isar0(const struct kvm_vcpu *vcpu) -{ - return id_aa64isar0_el1_sys_val & PVM_ID_AA64ISAR0_ALLOW; -} +/* + * Definitions for features to be allowed or restricted for protected guests. + * + * Each field in the masks represents the highest supported value for the + * feature. If a feature field is not present, it is not supported. Moreover, + * these are used to generate the guest's view of the feature registers. + * + * The approach for protected VMs is to at least support features that are: + * - Needed by common Linux distributions (e.g., floating point) + * - Trivial to support, e.g., supporting the feature does not introduce or + * require tracking of additional state in KVM + * - Cannot be trapped or prevent the guest from using anyway + */ -static u64 get_pvm_id_aa64isar1(const struct kvm_vcpu *vcpu) -{ - u64 allow_mask = PVM_ID_AA64ISAR1_ALLOW; +static const struct pvm_ftr_bits pvmid_aa64pfr0[] = { + MAX_FEAT(ID_AA64PFR0_EL1, EL0, IMP), + MAX_FEAT(ID_AA64PFR0_EL1, EL1, IMP), + MAX_FEAT(ID_AA64PFR0_EL1, EL2, IMP), + MAX_FEAT(ID_AA64PFR0_EL1, EL3, IMP), + MAX_FEAT(ID_AA64PFR0_EL1, FP, FP16), + MAX_FEAT(ID_AA64PFR0_EL1, AdvSIMD, FP16), + MAX_FEAT(ID_AA64PFR0_EL1, GIC, IMP), + MAX_FEAT_FUNC(ID_AA64PFR0_EL1, SVE, IMP, vm_has_sve), + MAX_FEAT(ID_AA64PFR0_EL1, RAS, IMP), + MAX_FEAT(ID_AA64PFR0_EL1, DIT, IMP), + MAX_FEAT(ID_AA64PFR0_EL1, CSV2, IMP), + MAX_FEAT(ID_AA64PFR0_EL1, CSV3, IMP), + FEAT_END +}; - if (!vcpu_has_ptrauth(vcpu)) - allow_mask &= ~(ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_APA) | - ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_API) | - ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_GPA) | - ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_GPI)); +static const struct pvm_ftr_bits pvmid_aa64pfr1[] = { + MAX_FEAT(ID_AA64PFR1_EL1, BT, IMP), + MAX_FEAT(ID_AA64PFR1_EL1, SSBS, SSBS2), + MAX_FEAT_ENUM(ID_AA64PFR1_EL1, MTE_frac, NI), + FEAT_END +}; - return id_aa64isar1_el1_sys_val & allow_mask; -} +static const struct pvm_ftr_bits pvmid_aa64mmfr0[] = { + MAX_FEAT_ENUM(ID_AA64MMFR0_EL1, PARANGE, 40), + MAX_FEAT_ENUM(ID_AA64MMFR0_EL1, ASIDBITS, 16), + MAX_FEAT(ID_AA64MMFR0_EL1, BIGEND, IMP), + MAX_FEAT(ID_AA64MMFR0_EL1, SNSMEM, IMP), + MAX_FEAT(ID_AA64MMFR0_EL1, BIGENDEL0, IMP), + MAX_FEAT(ID_AA64MMFR0_EL1, EXS, IMP), + FEAT_END +}; -static u64 get_pvm_id_aa64isar2(const struct kvm_vcpu *vcpu) -{ - u64 allow_mask = PVM_ID_AA64ISAR2_ALLOW; +static const struct pvm_ftr_bits pvmid_aa64mmfr1[] = { + MAX_FEAT(ID_AA64MMFR1_EL1, HAFDBS, DBM), + MAX_FEAT_ENUM(ID_AA64MMFR1_EL1, VMIDBits, 16), + MAX_FEAT(ID_AA64MMFR1_EL1, HPDS, HPDS2), + MAX_FEAT(ID_AA64MMFR1_EL1, PAN, PAN3), + MAX_FEAT(ID_AA64MMFR1_EL1, SpecSEI, IMP), + MAX_FEAT(ID_AA64MMFR1_EL1, ETS, IMP), + MAX_FEAT(ID_AA64MMFR1_EL1, CMOW, IMP), + FEAT_END +}; - if (!vcpu_has_ptrauth(vcpu)) - allow_mask &= ~(ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_APA3) | - ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_GPA3)); +static const struct pvm_ftr_bits pvmid_aa64mmfr2[] = { + MAX_FEAT(ID_AA64MMFR2_EL1, CnP, IMP), + MAX_FEAT(ID_AA64MMFR2_EL1, UAO, IMP), + MAX_FEAT(ID_AA64MMFR2_EL1, IESB, IMP), + MAX_FEAT(ID_AA64MMFR2_EL1, AT, IMP), + MAX_FEAT_ENUM(ID_AA64MMFR2_EL1, IDS, 0x18), + MAX_FEAT(ID_AA64MMFR2_EL1, TTL, IMP), + MAX_FEAT(ID_AA64MMFR2_EL1, BBM, 2), + MAX_FEAT(ID_AA64MMFR2_EL1, E0PD, IMP), + FEAT_END +}; - return id_aa64isar2_el1_sys_val & allow_mask; -} +static const struct pvm_ftr_bits pvmid_aa64isar1[] = { + MAX_FEAT(ID_AA64ISAR1_EL1, DPB, DPB2), + MAX_FEAT_FUNC(ID_AA64ISAR1_EL1, APA, PAuth, vm_has_ptrauth), + MAX_FEAT_FUNC(ID_AA64ISAR1_EL1, API, PAuth, vm_has_ptrauth), + MAX_FEAT(ID_AA64ISAR1_EL1, JSCVT, IMP), + MAX_FEAT(ID_AA64ISAR1_EL1, FCMA, IMP), + MAX_FEAT(ID_AA64ISAR1_EL1, LRCPC, LRCPC3), + MAX_FEAT(ID_AA64ISAR1_EL1, GPA, IMP), + MAX_FEAT(ID_AA64ISAR1_EL1, GPI, IMP), + MAX_FEAT(ID_AA64ISAR1_EL1, FRINTTS, IMP), + MAX_FEAT(ID_AA64ISAR1_EL1, SB, IMP), + MAX_FEAT(ID_AA64ISAR1_EL1, SPECRES, COSP_RCTX), + MAX_FEAT(ID_AA64ISAR1_EL1, BF16, EBF16), + MAX_FEAT(ID_AA64ISAR1_EL1, DGH, IMP), + MAX_FEAT(ID_AA64ISAR1_EL1, I8MM, IMP), + FEAT_END +}; -static u64 get_pvm_id_aa64mmfr0(const struct kvm_vcpu *vcpu) -{ - u64 set_mask; +static const struct pvm_ftr_bits pvmid_aa64isar2[] = { + MAX_FEAT_FUNC(ID_AA64ISAR2_EL1, GPA3, IMP, vm_has_ptrauth), + MAX_FEAT_FUNC(ID_AA64ISAR2_EL1, APA3, PAuth, vm_has_ptrauth), + MAX_FEAT(ID_AA64ISAR2_EL1, ATS1A, IMP), + FEAT_END +}; - set_mask = get_restricted_features_unsigned(id_aa64mmfr0_el1_sys_val, - PVM_ID_AA64MMFR0_RESTRICT_UNSIGNED); +/* + * None of the features in ID_AA64DFR0_EL1 nor ID_AA64MMFR4_EL1 are supported. + * However, both have Not-Implemented values that are non-zero. Define them + * so they can be used when getting the value of these registers. + */ +#define ID_AA64DFR0_EL1_NONZERO_NI \ +( \ + SYS_FIELD_PREP_ENUM(ID_AA64DFR0_EL1, DoubleLock, NI) | \ + SYS_FIELD_PREP_ENUM(ID_AA64DFR0_EL1, MTPMU, NI) \ +) - return (id_aa64mmfr0_el1_sys_val & PVM_ID_AA64MMFR0_ALLOW) | set_mask; -} +#define ID_AA64MMFR4_EL1_NONZERO_NI \ + SYS_FIELD_PREP_ENUM(ID_AA64MMFR4_EL1, E2H0, NI) -static u64 get_pvm_id_aa64mmfr1(const struct kvm_vcpu *vcpu) +/* + * Returns the value of the feature registers based on the system register + * value, the vcpu support for the revelant features, and the additional + * restrictions for protected VMs. + */ +static u64 get_restricted_features(const struct kvm_vcpu *vcpu, + u64 sys_reg_val, + const struct pvm_ftr_bits restrictions[]) { - return id_aa64mmfr1_el1_sys_val & PVM_ID_AA64MMFR1_ALLOW; -} + u64 val = 0UL; + int i; + + for (i = 0; restrictions[i].width != 0; i++) { + bool (*vm_supported)(const struct kvm *) = restrictions[i].vm_supported; + bool sign = restrictions[i].sign; + int shift = restrictions[i].shift; + int width = restrictions[i].width; + u64 min_signed = (1UL << width) - 1UL; + u64 sign_bit = 1UL << (width - 1); + u64 mask = GENMASK_ULL(width + shift - 1, shift); + u64 sys_val = (sys_reg_val & mask) >> shift; + u64 pvm_max = restrictions[i].max_val; + + if (vm_supported && !vm_supported(vcpu->kvm)) + val |= (sign ? min_signed : 0) << shift; + else if (sign && (sys_val >= sign_bit || pvm_max >= sign_bit)) + val |= max(sys_val, pvm_max) << shift; + else + val |= min(sys_val, pvm_max) << shift; + } -static u64 get_pvm_id_aa64mmfr2(const struct kvm_vcpu *vcpu) -{ - return id_aa64mmfr2_el1_sys_val & PVM_ID_AA64MMFR2_ALLOW; + return val; } -/* Read a sanitized cpufeature ID register by its encoding */ -u64 pvm_read_id_reg(const struct kvm_vcpu *vcpu, u32 id) +static u64 pvm_calc_id_reg(const struct kvm_vcpu *vcpu, u32 id) { switch (id) { case SYS_ID_AA64PFR0_EL1: - return get_pvm_id_aa64pfr0(vcpu); + return get_restricted_features(vcpu, id_aa64pfr0_el1_sys_val, pvmid_aa64pfr0); case SYS_ID_AA64PFR1_EL1: - return get_pvm_id_aa64pfr1(vcpu); - case SYS_ID_AA64ZFR0_EL1: - return get_pvm_id_aa64zfr0(vcpu); - case SYS_ID_AA64DFR0_EL1: - return get_pvm_id_aa64dfr0(vcpu); - case SYS_ID_AA64DFR1_EL1: - return get_pvm_id_aa64dfr1(vcpu); - case SYS_ID_AA64AFR0_EL1: - return get_pvm_id_aa64afr0(vcpu); - case SYS_ID_AA64AFR1_EL1: - return get_pvm_id_aa64afr1(vcpu); + return get_restricted_features(vcpu, id_aa64pfr1_el1_sys_val, pvmid_aa64pfr1); case SYS_ID_AA64ISAR0_EL1: - return get_pvm_id_aa64isar0(vcpu); + return id_aa64isar0_el1_sys_val; case SYS_ID_AA64ISAR1_EL1: - return get_pvm_id_aa64isar1(vcpu); + return get_restricted_features(vcpu, id_aa64isar1_el1_sys_val, pvmid_aa64isar1); case SYS_ID_AA64ISAR2_EL1: - return get_pvm_id_aa64isar2(vcpu); + return get_restricted_features(vcpu, id_aa64isar2_el1_sys_val, pvmid_aa64isar2); case SYS_ID_AA64MMFR0_EL1: - return get_pvm_id_aa64mmfr0(vcpu); + return get_restricted_features(vcpu, id_aa64mmfr0_el1_sys_val, pvmid_aa64mmfr0); case SYS_ID_AA64MMFR1_EL1: - return get_pvm_id_aa64mmfr1(vcpu); + return get_restricted_features(vcpu, id_aa64mmfr1_el1_sys_val, pvmid_aa64mmfr1); case SYS_ID_AA64MMFR2_EL1: - return get_pvm_id_aa64mmfr2(vcpu); + return get_restricted_features(vcpu, id_aa64mmfr2_el1_sys_val, pvmid_aa64mmfr2); + case SYS_ID_AA64DFR0_EL1: + return ID_AA64DFR0_EL1_NONZERO_NI; + case SYS_ID_AA64MMFR4_EL1: + return ID_AA64MMFR4_EL1_NONZERO_NI; default: /* Unhandled ID register, RAZ */ return 0; } } +/* + * Inject an unknown/undefined exception to an AArch64 guest while most of its + * sysregs are live. + */ +static void inject_undef64(struct kvm_vcpu *vcpu) +{ + u64 esr = (ESR_ELx_EC_UNKNOWN << ESR_ELx_EC_SHIFT); + + *vcpu_pc(vcpu) = read_sysreg_el2(SYS_ELR); + *vcpu_cpsr(vcpu) = read_sysreg_el2(SYS_SPSR); + + kvm_pend_exception(vcpu, EXCEPT_AA64_EL1_SYNC); + + __kvm_adjust_pc(vcpu); + + write_sysreg_el1(esr, SYS_ESR); + write_sysreg_el1(read_sysreg_el2(SYS_ELR), SYS_ELR); + write_sysreg_el2(*vcpu_pc(vcpu), SYS_ELR); + write_sysreg_el2(*vcpu_cpsr(vcpu), SYS_SPSR); +} + static u64 read_id_reg(const struct kvm_vcpu *vcpu, struct sys_reg_desc const *r) { - return pvm_read_id_reg(vcpu, reg_to_encoding(r)); + struct kvm *kvm = vcpu->kvm; + u32 reg = reg_to_encoding(r); + + if (WARN_ON_ONCE(!test_bit(KVM_ARCH_FLAG_ID_REGS_INITIALIZED, &kvm->arch.flags))) + return 0; + + if (reg >= sys_reg(3, 0, 0, 1, 0) && reg <= sys_reg(3, 0, 0, 7, 7)) + return kvm->arch.id_regs[IDREG_IDX(reg)]; + + return 0; } /* Handler to RAZ/WI sysregs */ @@ -271,13 +304,6 @@ static bool pvm_access_id_aarch32(struct kvm_vcpu *vcpu, return false; } - /* - * No support for AArch32 guests, therefore, pKVM has no sanitized copy - * of AArch32 feature id registers. - */ - BUILD_BUG_ON(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL1), - PVM_ID_AA64PFR0_RESTRICT_UNSIGNED) > ID_AA64PFR0_EL1_ELx_64BIT_ONLY); - return pvm_access_raz_wi(vcpu, p, r); } @@ -449,6 +475,30 @@ static const struct sys_reg_desc pvm_sys_reg_descs[] = { }; /* + * Initializes feature registers for protected vms. + */ +void kvm_init_pvm_id_regs(struct kvm_vcpu *vcpu) +{ + struct kvm *kvm = vcpu->kvm; + struct kvm_arch *ka = &kvm->arch; + u32 r; + + hyp_assert_lock_held(&vm_table_lock); + + if (test_bit(KVM_ARCH_FLAG_ID_REGS_INITIALIZED, &kvm->arch.flags)) + return; + + /* + * Initialize only AArch64 id registers since AArch32 isn't supported + * for protected VMs. + */ + for (r = sys_reg(3, 0, 0, 4, 0); r <= sys_reg(3, 0, 0, 7, 7); r += sys_reg(0, 0, 0, 0, 1)) + ka->id_regs[IDREG_IDX(r)] = pvm_calc_id_reg(vcpu, r); + + set_bit(KVM_ARCH_FLAG_ID_REGS_INITIALIZED, &kvm->arch.flags); +} + +/* * Checks that the sysreg table is unique and in-order. * * Returns 0 if the table is consistent, or 1 otherwise. diff --git a/arch/arm64/kvm/hyp/nvhe/sysreg-sr.c b/arch/arm64/kvm/hyp/nvhe/sysreg-sr.c index 29305022bc04..dba101565de3 100644 --- a/arch/arm64/kvm/hyp/nvhe/sysreg-sr.c +++ b/arch/arm64/kvm/hyp/nvhe/sysreg-sr.c @@ -28,7 +28,7 @@ void __sysreg_save_state_nvhe(struct kvm_cpu_context *ctxt) void __sysreg_restore_state_nvhe(struct kvm_cpu_context *ctxt) { - __sysreg_restore_el1_state(ctxt); + __sysreg_restore_el1_state(ctxt, ctxt_sys_reg(ctxt, MPIDR_EL1)); __sysreg_restore_common_state(ctxt); __sysreg_restore_user_state(ctxt); __sysreg_restore_el2_return_state(ctxt); diff --git a/arch/arm64/kvm/hyp/nvhe/timer-sr.c b/arch/arm64/kvm/hyp/nvhe/timer-sr.c index 3aaab20ae5b4..ff176f4ce7de 100644 --- a/arch/arm64/kvm/hyp/nvhe/timer-sr.c +++ b/arch/arm64/kvm/hyp/nvhe/timer-sr.c @@ -22,15 +22,16 @@ void __kvm_timer_set_cntvoff(u64 cntvoff) */ void __timer_disable_traps(struct kvm_vcpu *vcpu) { - u64 val, shift = 0; + u64 set, clr, shift = 0; if (has_hvhe()) shift = 10; /* Allow physical timer/counter access for the host */ - val = read_sysreg(cnthctl_el2); - val |= (CNTHCTL_EL1PCTEN | CNTHCTL_EL1PCEN) << shift; - write_sysreg(val, cnthctl_el2); + set = (CNTHCTL_EL1PCTEN | CNTHCTL_EL1PCEN) << shift; + clr = CNTHCTL_EL1TVT | CNTHCTL_EL1TVCT; + + sysreg_clear_set(cnthctl_el2, clr, set); } /* @@ -58,5 +59,12 @@ void __timer_enable_traps(struct kvm_vcpu *vcpu) set <<= 10; } + /* + * Trap the virtual counter/timer if we have a broken cntvoff + * implementation. + */ + if (has_broken_cntvoff()) + set |= CNTHCTL_EL1TVT | CNTHCTL_EL1TVCT; + sysreg_clear_set(cnthctl_el2, clr, set); } diff --git a/arch/arm64/kvm/hyp/nvhe/tlb.c b/arch/arm64/kvm/hyp/nvhe/tlb.c index 2fc68da4036d..48da9ca9763f 100644 --- a/arch/arm64/kvm/hyp/nvhe/tlb.c +++ b/arch/arm64/kvm/hyp/nvhe/tlb.c @@ -11,13 +11,23 @@ #include <nvhe/mem_protect.h> struct tlb_inv_context { - u64 tcr; + struct kvm_s2_mmu *mmu; + u64 tcr; + u64 sctlr; }; -static void __tlb_switch_to_guest(struct kvm_s2_mmu *mmu, - struct tlb_inv_context *cxt, - bool nsh) +static void enter_vmid_context(struct kvm_s2_mmu *mmu, + struct tlb_inv_context *cxt, + bool nsh) { + struct kvm_s2_mmu *host_s2_mmu = &host_mmu.arch.mmu; + struct kvm_cpu_context *host_ctxt; + struct kvm_vcpu *vcpu; + + host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; + vcpu = host_ctxt->__hyp_running_vcpu; + cxt->mmu = NULL; + /* * We have two requirements: * @@ -40,20 +50,55 @@ static void __tlb_switch_to_guest(struct kvm_s2_mmu *mmu, else dsb(ish); + /* + * If we're already in the desired context, then there's nothing to do. + */ + if (vcpu) { + /* + * We're in guest context. However, for this to work, this needs + * to be called from within __kvm_vcpu_run(), which ensures that + * __hyp_running_vcpu is set to the current guest vcpu. + */ + if (mmu == vcpu->arch.hw_mmu || WARN_ON(mmu != host_s2_mmu)) + return; + + cxt->mmu = vcpu->arch.hw_mmu; + } else { + /* We're in host context. */ + if (mmu == host_s2_mmu) + return; + + cxt->mmu = host_s2_mmu; + } + if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { u64 val; /* * For CPUs that are affected by ARM 1319367, we need to - * avoid a host Stage-1 walk while we have the guest's - * VMID set in the VTTBR in order to invalidate TLBs. - * We're guaranteed that the S1 MMU is enabled, so we can - * simply set the EPD bits to avoid any further TLB fill. + * avoid a Stage-1 walk with the old VMID while we have + * the new VMID set in the VTTBR in order to invalidate TLBs. + * We're guaranteed that the host S1 MMU is enabled, so + * we can simply set the EPD bits to avoid any further + * TLB fill. For guests, we ensure that the S1 MMU is + * temporarily enabled in the next context. */ val = cxt->tcr = read_sysreg_el1(SYS_TCR); val |= TCR_EPD1_MASK | TCR_EPD0_MASK; write_sysreg_el1(val, SYS_TCR); isb(); + + if (vcpu) { + val = cxt->sctlr = read_sysreg_el1(SYS_SCTLR); + if (!(val & SCTLR_ELx_M)) { + val |= SCTLR_ELx_M; + write_sysreg_el1(val, SYS_SCTLR); + isb(); + } + } else { + /* The host S1 MMU is always enabled. */ + cxt->sctlr = SCTLR_ELx_M; + } } /* @@ -62,18 +107,40 @@ static void __tlb_switch_to_guest(struct kvm_s2_mmu *mmu, * ensuring that we always have an ISB, but not two ISBs back * to back. */ - __load_stage2(mmu, kern_hyp_va(mmu->arch)); + if (vcpu) + __load_host_stage2(); + else + __load_stage2(mmu, kern_hyp_va(mmu->arch)); + asm(ALTERNATIVE("isb", "nop", ARM64_WORKAROUND_SPECULATIVE_AT)); } -static void __tlb_switch_to_host(struct tlb_inv_context *cxt) +static void exit_vmid_context(struct tlb_inv_context *cxt) { - __load_host_stage2(); + struct kvm_s2_mmu *mmu = cxt->mmu; + struct kvm_cpu_context *host_ctxt; + struct kvm_vcpu *vcpu; + + host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; + vcpu = host_ctxt->__hyp_running_vcpu; + + if (!mmu) + return; + + if (vcpu) + __load_stage2(mmu, kern_hyp_va(mmu->arch)); + else + __load_host_stage2(); + + /* Ensure write of the old VMID */ + isb(); if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { - /* Ensure write of the host VMID */ - isb(); - /* Restore the host's TCR_EL1 */ + if (!(cxt->sctlr & SCTLR_ELx_M)) { + write_sysreg_el1(cxt->sctlr, SYS_SCTLR); + isb(); + } + write_sysreg_el1(cxt->tcr, SYS_TCR); } } @@ -84,7 +151,7 @@ void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, struct tlb_inv_context cxt; /* Switch to requested VMID */ - __tlb_switch_to_guest(mmu, &cxt, false); + enter_vmid_context(mmu, &cxt, false); /* * We could do so much better if we had the VA as well. @@ -105,7 +172,7 @@ void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, dsb(ish); isb(); - __tlb_switch_to_host(&cxt); + exit_vmid_context(&cxt); } void __kvm_tlb_flush_vmid_ipa_nsh(struct kvm_s2_mmu *mmu, @@ -114,7 +181,7 @@ void __kvm_tlb_flush_vmid_ipa_nsh(struct kvm_s2_mmu *mmu, struct tlb_inv_context cxt; /* Switch to requested VMID */ - __tlb_switch_to_guest(mmu, &cxt, true); + enter_vmid_context(mmu, &cxt, true); /* * We could do so much better if we had the VA as well. @@ -135,7 +202,7 @@ void __kvm_tlb_flush_vmid_ipa_nsh(struct kvm_s2_mmu *mmu, dsb(nsh); isb(); - __tlb_switch_to_host(&cxt); + exit_vmid_context(&cxt); } void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu, @@ -152,7 +219,7 @@ void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu, start = round_down(start, stride); /* Switch to requested VMID */ - __tlb_switch_to_guest(mmu, &cxt, false); + enter_vmid_context(mmu, &cxt, false); __flush_s2_tlb_range_op(ipas2e1is, start, pages, stride, TLBI_TTL_UNKNOWN); @@ -162,7 +229,7 @@ void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu, dsb(ish); isb(); - __tlb_switch_to_host(&cxt); + exit_vmid_context(&cxt); } void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu) @@ -170,13 +237,13 @@ void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu) struct tlb_inv_context cxt; /* Switch to requested VMID */ - __tlb_switch_to_guest(mmu, &cxt, false); + enter_vmid_context(mmu, &cxt, false); __tlbi(vmalls12e1is); dsb(ish); isb(); - __tlb_switch_to_host(&cxt); + exit_vmid_context(&cxt); } void __kvm_flush_cpu_context(struct kvm_s2_mmu *mmu) @@ -184,19 +251,19 @@ void __kvm_flush_cpu_context(struct kvm_s2_mmu *mmu) struct tlb_inv_context cxt; /* Switch to requested VMID */ - __tlb_switch_to_guest(mmu, &cxt, false); + enter_vmid_context(mmu, &cxt, false); __tlbi(vmalle1); asm volatile("ic iallu"); dsb(nsh); isb(); - __tlb_switch_to_host(&cxt); + exit_vmid_context(&cxt); } void __kvm_flush_vm_context(void) { - /* Same remark as in __tlb_switch_to_guest() */ + /* Same remark as in enter_vmid_context() */ dsb(ish); __tlbi(alle1is); dsb(ish); |