diff options
Diffstat (limited to 'arch/arm64/kvm/hyp')
-rw-r--r-- | arch/arm64/kvm/hyp/Makefile | 3 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/entry.S | 95 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/hyp-entry.S | 107 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/include/hyp/debug-sr.h | 4 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/include/hyp/switch.h | 50 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/nvhe/.gitignore | 2 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/nvhe/Makefile | 62 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/nvhe/host.S | 187 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/nvhe/hyp-init.S | 67 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/nvhe/hyp-main.c | 117 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/nvhe/hyp.lds.S | 19 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/nvhe/switch.c | 52 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/nvhe/tlb.c | 9 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/pgtable.c | 892 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/vhe/switch.c | 35 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/vhe/sysreg-sr.c | 4 |
16 files changed, 1394 insertions, 311 deletions
diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile index f54f0e89a71c..4a81eddabcd8 100644 --- a/arch/arm64/kvm/hyp/Makefile +++ b/arch/arm64/kvm/hyp/Makefile @@ -10,5 +10,4 @@ subdir-ccflags-y := -I$(incdir) \ -DDISABLE_BRANCH_PROFILING \ $(DISABLE_STACKLEAK_PLUGIN) -obj-$(CONFIG_KVM) += vhe/ nvhe/ -obj-$(CONFIG_KVM_INDIRECT_VECTORS) += smccc_wa.o +obj-$(CONFIG_KVM) += vhe/ nvhe/ pgtable.o smccc_wa.o diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S index 76e7eaf4675e..b0afad7a99c6 100644 --- a/arch/arm64/kvm/hyp/entry.S +++ b/arch/arm64/kvm/hyp/entry.S @@ -7,7 +7,6 @@ #include <linux/linkage.h> #include <asm/alternative.h> -#include <asm/asm-offsets.h> #include <asm/assembler.h> #include <asm/fpsimdmacros.h> #include <asm/kvm.h> @@ -16,66 +15,28 @@ #include <asm/kvm_mmu.h> #include <asm/kvm_ptrauth.h> -#define CPU_XREG_OFFSET(x) (CPU_USER_PT_REGS + 8*x) -#define CPU_SP_EL0_OFFSET (CPU_XREG_OFFSET(30) + 8) - .text /* - * We treat x18 as callee-saved as the host may use it as a platform - * register (e.g. for shadow call stack). - */ -.macro save_callee_saved_regs ctxt - str x18, [\ctxt, #CPU_XREG_OFFSET(18)] - stp x19, x20, [\ctxt, #CPU_XREG_OFFSET(19)] - stp x21, x22, [\ctxt, #CPU_XREG_OFFSET(21)] - stp x23, x24, [\ctxt, #CPU_XREG_OFFSET(23)] - stp x25, x26, [\ctxt, #CPU_XREG_OFFSET(25)] - stp x27, x28, [\ctxt, #CPU_XREG_OFFSET(27)] - stp x29, lr, [\ctxt, #CPU_XREG_OFFSET(29)] -.endm - -.macro restore_callee_saved_regs ctxt - // We require \ctxt is not x18-x28 - ldr x18, [\ctxt, #CPU_XREG_OFFSET(18)] - ldp x19, x20, [\ctxt, #CPU_XREG_OFFSET(19)] - ldp x21, x22, [\ctxt, #CPU_XREG_OFFSET(21)] - ldp x23, x24, [\ctxt, #CPU_XREG_OFFSET(23)] - ldp x25, x26, [\ctxt, #CPU_XREG_OFFSET(25)] - ldp x27, x28, [\ctxt, #CPU_XREG_OFFSET(27)] - ldp x29, lr, [\ctxt, #CPU_XREG_OFFSET(29)] -.endm - -.macro save_sp_el0 ctxt, tmp - mrs \tmp, sp_el0 - str \tmp, [\ctxt, #CPU_SP_EL0_OFFSET] -.endm - -.macro restore_sp_el0 ctxt, tmp - ldr \tmp, [\ctxt, #CPU_SP_EL0_OFFSET] - msr sp_el0, \tmp -.endm - -/* - * u64 __guest_enter(struct kvm_vcpu *vcpu, - * struct kvm_cpu_context *host_ctxt); + * u64 __guest_enter(struct kvm_vcpu *vcpu); */ SYM_FUNC_START(__guest_enter) // x0: vcpu - // x1: host context - // x2-x17: clobbered by macros + // x1-x17: clobbered by macros // x29: guest context - // Store the host regs + adr_this_cpu x1, kvm_hyp_ctxt, x2 + + // Store the hyp regs save_callee_saved_regs x1 - // Save the host's sp_el0 + // Save hyp's sp_el0 save_sp_el0 x1, x2 - // Now the host state is stored if we have a pending RAS SError it must - // affect the host. If any asynchronous exception is pending we defer - // the guest entry. The DSB isn't necessary before v8.2 as any SError - // would be fatal. + // Now the hyp state is stored if we have a pending RAS SError it must + // affect the host or hyp. If any asynchronous exception is pending we + // defer the guest entry. The DSB isn't necessary before v8.2 as any + // SError would be fatal. alternative_if ARM64_HAS_RAS_EXTN dsb nshst isb @@ -86,6 +47,8 @@ alternative_else_nop_endif ret 1: + set_loaded_vcpu x0, x1, x2 + add x29, x0, #VCPU_CONTEXT // Macro ptrauth_switch_to_guest format: @@ -116,6 +79,26 @@ alternative_else_nop_endif eret sb +SYM_INNER_LABEL(__guest_exit_panic, SYM_L_GLOBAL) + // x2-x29,lr: vcpu regs + // vcpu x0-x1 on the stack + + // If the hyp context is loaded, go straight to hyp_panic + get_loaded_vcpu x0, x1 + cbz x0, hyp_panic + + // The hyp context is saved so make sure it is restored to allow + // hyp_panic to run at hyp and, subsequently, panic to run in the host. + // This makes use of __guest_exit to avoid duplication but sets the + // return address to tail call into hyp_panic. As a side effect, the + // current state is saved to the guest context but it will only be + // accurate if the guest had been completely restored. + adr_this_cpu x0, kvm_hyp_ctxt, x1 + adr x1, hyp_panic + str x1, [x0, #CPU_XREG_OFFSET(30)] + + get_vcpu_ptr x1, x0 + SYM_INNER_LABEL(__guest_exit, SYM_L_GLOBAL) // x0: return code // x1: vcpu @@ -148,21 +131,23 @@ SYM_INNER_LABEL(__guest_exit, SYM_L_GLOBAL) // Store the guest's sp_el0 save_sp_el0 x1, x2 - get_host_ctxt x2, x3 + adr_this_cpu x2, kvm_hyp_ctxt, x3 - // Macro ptrauth_switch_to_guest format: - // ptrauth_switch_to_host(guest cxt, host cxt, tmp1, tmp2, tmp3) + // Macro ptrauth_switch_to_hyp format: + // ptrauth_switch_to_hyp(guest cxt, host cxt, tmp1, tmp2, tmp3) // The below macro to save/restore keys is not implemented in C code // as it may cause Pointer Authentication key signing mismatch errors // when this feature is enabled for kernel code. - ptrauth_switch_to_host x1, x2, x3, x4, x5 + ptrauth_switch_to_hyp x1, x2, x3, x4, x5 - // Restore the hosts's sp_el0 + // Restore hyp's sp_el0 restore_sp_el0 x2, x3 - // Now restore the host regs + // Now restore the hyp regs restore_callee_saved_regs x2 + set_loaded_vcpu xzr, x1, x2 + alternative_if ARM64_HAS_RAS_EXTN // If we have the RAS extensions we can consume a pending error // without an unmask-SError and isb. The ESB-instruction consumed any diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S index 46b4dab933d0..0a5b36eb54b3 100644 --- a/arch/arm64/kvm/hyp/hyp-entry.S +++ b/arch/arm64/kvm/hyp/hyp-entry.S @@ -12,7 +12,6 @@ #include <asm/cpufeature.h> #include <asm/kvm_arm.h> #include <asm/kvm_asm.h> -#include <asm/kvm_mmu.h> #include <asm/mmu.h> .macro save_caller_saved_regs_vect @@ -41,20 +40,6 @@ .text -.macro do_el2_call - /* - * Shuffle the parameters before calling the function - * pointed to in x0. Assumes parameters in x[1,2,3]. - */ - str lr, [sp, #-16]! - mov lr, x0 - mov x0, x1 - mov x1, x2 - mov x2, x3 - blr lr - ldr lr, [sp], #16 -.endm - el1_sync: // Guest trapped into EL2 mrs x0, esr_el2 @@ -63,44 +48,6 @@ el1_sync: // Guest trapped into EL2 ccmp x0, #ESR_ELx_EC_HVC32, #4, ne b.ne el1_trap -#ifdef __KVM_NVHE_HYPERVISOR__ - mrs x1, vttbr_el2 // If vttbr is valid, the guest - cbnz x1, el1_hvc_guest // called HVC - - /* Here, we're pretty sure the host called HVC. */ - ldp x0, x1, [sp], #16 - - /* Check for a stub HVC call */ - cmp x0, #HVC_STUB_HCALL_NR - b.hs 1f - - /* - * Compute the idmap address of __kvm_handle_stub_hvc and - * jump there. Since we use kimage_voffset, do not use the - * HYP VA for __kvm_handle_stub_hvc, but the kernel VA instead - * (by loading it from the constant pool). - * - * Preserve x0-x4, which may contain stub parameters. - */ - ldr x5, =__kvm_handle_stub_hvc - ldr_l x6, kimage_voffset - - /* x5 = __pa(x5) */ - sub x5, x5, x6 - br x5 - -1: - /* - * Perform the EL2 call - */ - kern_hyp_va x0 - do_el2_call - - eret - sb -#endif /* __KVM_NVHE_HYPERVISOR__ */ - -el1_hvc_guest: /* * Fastest possible path for ARM_SMCCC_ARCH_WORKAROUND_1. * The workaround has already been applied on the host, @@ -116,35 +63,6 @@ el1_hvc_guest: ARM_SMCCC_ARCH_WORKAROUND_2) cbnz w1, el1_trap -#ifdef CONFIG_ARM64_SSBD -alternative_cb arm64_enable_wa2_handling - b wa2_end -alternative_cb_end - get_vcpu_ptr x2, x0 - ldr x0, [x2, #VCPU_WORKAROUND_FLAGS] - - // Sanitize the argument and update the guest flags - ldr x1, [sp, #8] // Guest's x1 - clz w1, w1 // Murphy's device: - lsr w1, w1, #5 // w1 = !!w1 without using - eor w1, w1, #1 // the flags... - bfi x0, x1, #VCPU_WORKAROUND_2_FLAG_SHIFT, #1 - str x0, [x2, #VCPU_WORKAROUND_FLAGS] - - /* Check that we actually need to perform the call */ - hyp_ldr_this_cpu x0, arm64_ssbd_callback_required, x2 - cbz x0, wa2_end - - mov w0, #ARM_SMCCC_ARCH_WORKAROUND_2 - smc #0 - - /* Don't leak data from the SMC call */ - mov x3, xzr -wa2_end: - mov x2, xzr - mov x1, xzr -#endif - wa_epilogue: mov x0, xzr add sp, sp, #16 @@ -198,24 +116,7 @@ el2_error: eret sb -#ifdef __KVM_NVHE_HYPERVISOR__ -SYM_FUNC_START(__hyp_do_panic) - mov lr, #(PSR_F_BIT | PSR_I_BIT | PSR_A_BIT | PSR_D_BIT |\ - PSR_MODE_EL1h) - msr spsr_el2, lr - ldr lr, =panic - msr elr_el2, lr - eret - sb -SYM_FUNC_END(__hyp_do_panic) -#endif - -SYM_CODE_START(__hyp_panic) - get_host_ctxt x0, x1 - b hyp_panic -SYM_CODE_END(__hyp_panic) - -.macro invalid_vector label, target = __hyp_panic +.macro invalid_vector label, target = __guest_exit_panic .align 2 SYM_CODE_START(\label) b \target @@ -227,7 +128,6 @@ SYM_CODE_END(\label) invalid_vector el2t_irq_invalid invalid_vector el2t_fiq_invalid invalid_vector el2t_error_invalid - invalid_vector el2h_sync_invalid invalid_vector el2h_irq_invalid invalid_vector el2h_fiq_invalid invalid_vector el1_fiq_invalid @@ -257,10 +157,9 @@ check_preamble_length 661b, 662b .macro invalid_vect target .align 7 661: - b \target nop + stp x0, x1, [sp, #-16]! 662: - ldp x0, x1, [sp], #16 b \target check_preamble_length 661b, 662b @@ -288,7 +187,6 @@ SYM_CODE_START(__kvm_hyp_vector) valid_vect el1_error // Error 32-bit EL1 SYM_CODE_END(__kvm_hyp_vector) -#ifdef CONFIG_KVM_INDIRECT_VECTORS .macro hyp_ventry .align 7 1: esb @@ -338,4 +236,3 @@ SYM_CODE_START(__bp_harden_hyp_vecs) 1: .org __bp_harden_hyp_vecs + __BP_HARDEN_HYP_VECS_SZ .org 1b SYM_CODE_END(__bp_harden_hyp_vecs) -#endif diff --git a/arch/arm64/kvm/hyp/include/hyp/debug-sr.h b/arch/arm64/kvm/hyp/include/hyp/debug-sr.h index 5e28ea6aa097..4ebe9f558f3a 100644 --- a/arch/arm64/kvm/hyp/include/hyp/debug-sr.h +++ b/arch/arm64/kvm/hyp/include/hyp/debug-sr.h @@ -135,7 +135,7 @@ static inline void __debug_switch_to_guest_common(struct kvm_vcpu *vcpu) if (!(vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY)) return; - host_ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt; + host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; guest_ctxt = &vcpu->arch.ctxt; host_dbg = &vcpu->arch.host_debug_state.regs; guest_dbg = kern_hyp_va(vcpu->arch.debug_ptr); @@ -154,7 +154,7 @@ static inline void __debug_switch_to_host_common(struct kvm_vcpu *vcpu) if (!(vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY)) return; - host_ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt; + host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; guest_ctxt = &vcpu->arch.ctxt; host_dbg = &vcpu->arch.host_debug_state.regs; guest_dbg = kern_hyp_va(vcpu->arch.debug_ptr); diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index 5b6b8fa00f0a..313a8fa3c721 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -126,11 +126,6 @@ static inline void ___deactivate_traps(struct kvm_vcpu *vcpu) } } -static inline void __activate_vm(struct kvm_s2_mmu *mmu) -{ - __load_guest_stage2(mmu); -} - static inline bool __translate_far_to_hpfar(u64 far, u64 *hpfar) { u64 par, tmp; @@ -377,6 +372,8 @@ static inline bool esr_is_ptrauth_trap(u32 esr) ctxt_sys_reg(ctxt, key ## KEYHI_EL1) = __val; \ } while(0) +DECLARE_PER_CPU(struct kvm_cpu_context, kvm_hyp_ctxt); + static inline bool __hyp_handle_ptrauth(struct kvm_vcpu *vcpu) { struct kvm_cpu_context *ctxt; @@ -386,7 +383,7 @@ static inline bool __hyp_handle_ptrauth(struct kvm_vcpu *vcpu) !esr_is_ptrauth_trap(kvm_vcpu_get_esr(vcpu))) return false; - ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt; + ctxt = this_cpu_ptr(&kvm_hyp_ctxt); __ptrauth_save_key(ctxt, APIA); __ptrauth_save_key(ctxt, APIB); __ptrauth_save_key(ctxt, APDA); @@ -449,7 +446,7 @@ static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code) kvm_vcpu_trap_get_fault_type(vcpu) == FSC_FAULT && kvm_vcpu_dabt_isvalid(vcpu) && !kvm_vcpu_abt_issea(vcpu) && - !kvm_vcpu_dabt_iss1tw(vcpu); + !kvm_vcpu_abt_iss1tw(vcpu); if (valid) { int ret = __vgic_v2_perform_cpuif_access(vcpu); @@ -479,49 +476,15 @@ exit: return false; } -static inline bool __needs_ssbd_off(struct kvm_vcpu *vcpu) -{ - if (!cpus_have_final_cap(ARM64_SSBD)) - return false; - - return !(vcpu->arch.workaround_flags & VCPU_WORKAROUND_2_FLAG); -} - -static inline void __set_guest_arch_workaround_state(struct kvm_vcpu *vcpu) -{ -#ifdef CONFIG_ARM64_SSBD - /* - * The host runs with the workaround always present. If the - * guest wants it disabled, so be it... - */ - if (__needs_ssbd_off(vcpu) && - __hyp_this_cpu_read(arm64_ssbd_callback_required)) - arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_2, 0, NULL); -#endif -} - -static inline void __set_host_arch_workaround_state(struct kvm_vcpu *vcpu) -{ -#ifdef CONFIG_ARM64_SSBD - /* - * If the guest has disabled the workaround, bring it back on. - */ - if (__needs_ssbd_off(vcpu) && - __hyp_this_cpu_read(arm64_ssbd_callback_required)) - arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_2, 1, NULL); -#endif -} - static inline void __kvm_unexpected_el2_exception(void) { + extern char __guest_exit_panic[]; unsigned long addr, fixup; - struct kvm_cpu_context *host_ctxt; struct exception_table_entry *entry, *end; unsigned long elr_el2 = read_sysreg(elr_el2); entry = hyp_symbol_addr(__start___kvm_ex_table); end = hyp_symbol_addr(__stop___kvm_ex_table); - host_ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt; while (entry < end) { addr = (unsigned long)&entry->insn + entry->insn; @@ -536,7 +499,8 @@ static inline void __kvm_unexpected_el2_exception(void) return; } - hyp_panic(host_ctxt); + /* Trigger a panic after restoring the hyp context. */ + write_sysreg(__guest_exit_panic, elr_el2); } #endif /* __ARM64_KVM_HYP_SWITCH_H__ */ diff --git a/arch/arm64/kvm/hyp/nvhe/.gitignore b/arch/arm64/kvm/hyp/nvhe/.gitignore new file mode 100644 index 000000000000..695d73d0249e --- /dev/null +++ b/arch/arm64/kvm/hyp/nvhe/.gitignore @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only +hyp.lds diff --git a/arch/arm64/kvm/hyp/nvhe/Makefile b/arch/arm64/kvm/hyp/nvhe/Makefile index aef76487edc2..ddde15fe85f2 100644 --- a/arch/arm64/kvm/hyp/nvhe/Makefile +++ b/arch/arm64/kvm/hyp/nvhe/Makefile @@ -6,44 +6,50 @@ asflags-y := -D__KVM_NVHE_HYPERVISOR__ ccflags-y := -D__KVM_NVHE_HYPERVISOR__ -obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o +obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o host.o hyp-main.o obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \ ../fpsimd.o ../hyp-entry.o -obj-y := $(patsubst %.o,%.hyp.o,$(obj-y)) -extra-y := $(patsubst %.hyp.o,%.hyp.tmp.o,$(obj-y)) +## +## Build rules for compiling nVHE hyp code +## Output of this folder is `kvm_nvhe.o`, a partially linked object +## file containing all nVHE hyp code and data. +## -$(obj)/%.hyp.tmp.o: $(src)/%.c FORCE +hyp-obj := $(patsubst %.o,%.nvhe.o,$(obj-y)) +obj-y := kvm_nvhe.o +extra-y := $(hyp-obj) kvm_nvhe.tmp.o hyp.lds + +# 1) Compile all source files to `.nvhe.o` object files. The file extension +# avoids file name clashes for files shared with VHE. +$(obj)/%.nvhe.o: $(src)/%.c FORCE $(call if_changed_rule,cc_o_c) -$(obj)/%.hyp.tmp.o: $(src)/%.S FORCE +$(obj)/%.nvhe.o: $(src)/%.S FORCE $(call if_changed_rule,as_o_S) -$(obj)/%.hyp.o: $(obj)/%.hyp.tmp.o FORCE - $(call if_changed,hypcopy) -# Disable reordering functions by GCC (enabled at -O2). -# This pass puts functions into '.text.*' sections to aid the linker -# in optimizing ELF layout. See HYPCOPY comment below for more info. -ccflags-y += $(call cc-option,-fno-reorder-functions) +# 2) Compile linker script. +$(obj)/hyp.lds: $(src)/hyp.lds.S FORCE + $(call if_changed_dep,cpp_lds_S) + +# 3) Partially link all '.nvhe.o' files and apply the linker script. +# Prefixes names of ELF sections with '.hyp', eg. '.hyp.text'. +# Note: The following rule assumes that the 'ld' rule puts LDFLAGS before +# the list of dependencies to form '-T $(obj)/hyp.lds'. This is to +# keep the dependency on the target while avoiding an error from +# GNU ld if the linker script is passed to it twice. +LDFLAGS_kvm_nvhe.tmp.o := -r -T +$(obj)/kvm_nvhe.tmp.o: $(obj)/hyp.lds $(addprefix $(obj)/,$(hyp-obj)) FORCE + $(call if_changed,ld) + +# 4) Produce the final 'kvm_nvhe.o', ready to be linked into 'vmlinux'. +# Prefixes names of ELF symbols with '__kvm_nvhe_'. +$(obj)/kvm_nvhe.o: $(obj)/kvm_nvhe.tmp.o FORCE + $(call if_changed,hypcopy) # The HYPCOPY command uses `objcopy` to prefix all ELF symbol names -# and relevant ELF section names to avoid clashes with VHE code/data. -# -# Hyp code is assumed to be in the '.text' section of the input object -# files (with the exception of specialized sections such as -# '.hyp.idmap.text'). This assumption may be broken by a compiler that -# divides code into sections like '.text.unlikely' so as to optimize -# ELF layout. HYPCOPY checks that no such sections exist in the input -# using `objdump`, otherwise they would be linked together with other -# kernel code and not memory-mapped correctly at runtime. +# to avoid clashes with VHE code/data. quiet_cmd_hypcopy = HYPCOPY $@ - cmd_hypcopy = \ - if $(OBJDUMP) -h $< | grep -F '.text.'; then \ - echo "$@: function reordering not supported in nVHE hyp code" >&2; \ - /bin/false; \ - fi; \ - $(OBJCOPY) --prefix-symbols=__kvm_nvhe_ \ - --rename-section=.text=.hyp.text \ - $< $@ + cmd_hypcopy = $(OBJCOPY) --prefix-symbols=__kvm_nvhe_ $< $@ # Remove ftrace and Shadow Call Stack CFLAGS. # This is equivalent to the 'notrace' and '__noscs' annotations. diff --git a/arch/arm64/kvm/hyp/nvhe/host.S b/arch/arm64/kvm/hyp/nvhe/host.S new file mode 100644 index 000000000000..ff9a0f547b9f --- /dev/null +++ b/arch/arm64/kvm/hyp/nvhe/host.S @@ -0,0 +1,187 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2020 - Google Inc + * Author: Andrew Scull <ascull@google.com> + */ + +#include <linux/linkage.h> + +#include <asm/assembler.h> +#include <asm/kvm_asm.h> +#include <asm/kvm_mmu.h> + + .text + +SYM_FUNC_START(__host_exit) + stp x0, x1, [sp, #-16]! + + get_host_ctxt x0, x1 + + ALTERNATIVE(nop, SET_PSTATE_PAN(1), ARM64_HAS_PAN, CONFIG_ARM64_PAN) + + /* Store the host regs x2 and x3 */ + stp x2, x3, [x0, #CPU_XREG_OFFSET(2)] + + /* Retrieve the host regs x0-x1 from the stack */ + ldp x2, x3, [sp], #16 // x0, x1 + + /* Store the host regs x0-x1 and x4-x17 */ + stp x2, x3, [x0, #CPU_XREG_OFFSET(0)] + stp x4, x5, [x0, #CPU_XREG_OFFSET(4)] + stp x6, x7, [x0, #CPU_XREG_OFFSET(6)] + stp x8, x9, [x0, #CPU_XREG_OFFSET(8)] + stp x10, x11, [x0, #CPU_XREG_OFFSET(10)] + stp x12, x13, [x0, #CPU_XREG_OFFSET(12)] + stp x14, x15, [x0, #CPU_XREG_OFFSET(14)] + stp x16, x17, [x0, #CPU_XREG_OFFSET(16)] + + /* Store the host regs x18-x29, lr */ + save_callee_saved_regs x0 + + /* Save the host context pointer in x29 across the function call */ + mov x29, x0 + bl handle_trap + + /* Restore host regs x0-x17 */ + ldp x0, x1, [x29, #CPU_XREG_OFFSET(0)] + ldp x2, x3, [x29, #CPU_XREG_OFFSET(2)] + ldp x4, x5, [x29, #CPU_XREG_OFFSET(4)] + ldp x6, x7, [x29, #CPU_XREG_OFFSET(6)] + + /* x0-7 are use for panic arguments */ +__host_enter_for_panic: + ldp x8, x9, [x29, #CPU_XREG_OFFSET(8)] + ldp x10, x11, [x29, #CPU_XREG_OFFSET(10)] + ldp x12, x13, [x29, #CPU_XREG_OFFSET(12)] + ldp x14, x15, [x29, #CPU_XREG_OFFSET(14)] + ldp x16, x17, [x29, #CPU_XREG_OFFSET(16)] + + /* Restore host regs x18-x29, lr */ + restore_callee_saved_regs x29 + + /* Do not touch any register after this! */ +__host_enter_without_restoring: + eret + sb +SYM_FUNC_END(__host_exit) + +/* + * void __noreturn __hyp_do_panic(bool restore_host, u64 spsr, u64 elr, u64 par); + */ +SYM_FUNC_START(__hyp_do_panic) + /* Load the format arguments into x1-7 */ + mov x6, x3 + get_vcpu_ptr x7, x3 + + mrs x3, esr_el2 + mrs x4, far_el2 + mrs x5, hpfar_el2 + + /* Prepare and exit to the host's panic funciton. */ + mov lr, #(PSR_F_BIT | PSR_I_BIT | PSR_A_BIT | PSR_D_BIT |\ + PSR_MODE_EL1h) + msr spsr_el2, lr + ldr lr, =panic + msr elr_el2, lr + + /* + * Set the panic format string and enter the host, conditionally + * restoring the host context. + */ + cmp x0, xzr + ldr x0, =__hyp_panic_string + b.eq __host_enter_without_restoring + b __host_enter_for_panic +SYM_FUNC_END(__hyp_do_panic) + +.macro host_el1_sync_vect + .align 7 +.L__vect_start\@: + stp x0, x1, [sp, #-16]! + mrs x0, esr_el2 + lsr x0, x0, #ESR_ELx_EC_SHIFT + cmp x0, #ESR_ELx_EC_HVC64 + ldp x0, x1, [sp], #16 + b.ne __host_exit + + /* Check for a stub HVC call */ + cmp x0, #HVC_STUB_HCALL_NR + b.hs __host_exit + + /* + * Compute the idmap address of __kvm_handle_stub_hvc and + * jump there. Since we use kimage_voffset, do not use the + * HYP VA for __kvm_handle_stub_hvc, but the kernel VA instead + * (by loading it from the constant pool). + * + * Preserve x0-x4, which may contain stub parameters. + */ + ldr x5, =__kvm_handle_stub_hvc + ldr_l x6, kimage_voffset + + /* x5 = __pa(x5) */ + sub x5, x5, x6 + br x5 +.L__vect_end\@: +.if ((.L__vect_end\@ - .L__vect_start\@) > 0x80) + .error "host_el1_sync_vect larger than vector entry" +.endif +.endm + +.macro invalid_host_el2_vect + .align 7 + /* If a guest is loaded, panic out of it. */ + stp x0, x1, [sp, #-16]! + get_loaded_vcpu x0, x1 + cbnz x0, __guest_exit_panic + add sp, sp, #16 + + /* + * The panic may not be clean if the exception is taken before the host + * context has been saved by __host_exit or after the hyp context has + * been partially clobbered by __host_enter. + */ + b hyp_panic +.endm + +.macro invalid_host_el1_vect + .align 7 + mov x0, xzr /* restore_host = false */ + mrs x1, spsr_el2 + mrs x2, elr_el2 + mrs x3, par_el1 + b __hyp_do_panic +.endm + +/* + * The host vector does not use an ESB instruction in order to avoid consuming + * SErrors that should only be consumed by the host. Guest entry is deferred by + * __guest_enter if there are any pending asynchronous exceptions so hyp will + * always return to the host without having consumerd host SErrors. + * + * CONFIG_KVM_INDIRECT_VECTORS is not applied to the host vectors because the + * host knows about the EL2 vectors already, and there is no point in hiding + * them. + */ + .align 11 +SYM_CODE_START(__kvm_hyp_host_vector) + invalid_host_el2_vect // Synchronous EL2t + invalid_host_el2_vect // IRQ EL2t + invalid_host_el2_vect // FIQ EL2t + invalid_host_el2_vect // Error EL2t + + invalid_host_el2_vect // Synchronous EL2h + invalid_host_el2_vect // IRQ EL2h + invalid_host_el2_vect // FIQ EL2h + invalid_host_el2_vect // Error EL2h + + host_el1_sync_vect // Synchronous 64-bit EL1 + invalid_host_el1_vect // IRQ 64-bit EL1 + invalid_host_el1_vect // FIQ 64-bit EL1 + invalid_host_el1_vect // Error 64-bit EL1 + + invalid_host_el1_vect // Synchronous 32-bit EL1 + invalid_host_el1_vect // IRQ 32-bit EL1 + invalid_host_el1_vect // FIQ 32-bit EL1 + invalid_host_el1_vect // Error 32-bit EL1 +SYM_CODE_END(__kvm_hyp_host_vector) diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-init.S b/arch/arm64/kvm/hyp/nvhe/hyp-init.S index d9434e90c06d..47224dc62c51 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-init.S +++ b/arch/arm64/kvm/hyp/nvhe/hyp-init.S @@ -4,11 +4,13 @@ * Author: Marc Zyngier <marc.zyngier@arm.com> */ +#include <linux/arm-smccc.h> #include <linux/linkage.h> #include <asm/alternative.h> #include <asm/assembler.h> #include <asm/kvm_arm.h> +#include <asm/kvm_asm.h> #include <asm/kvm_mmu.h> #include <asm/pgtable-hwdef.h> #include <asm/sysreg.h> @@ -44,27 +46,37 @@ __invalid: b . /* - * x0: HYP pgd - * x1: HYP stack - * x2: HYP vectors - * x3: per-CPU offset + * x0: SMCCC function ID + * x1: HYP pgd + * x2: per-CPU offset + * x3: HYP stack + * x4: HYP vectors */ __do_hyp_init: /* Check for a stub HVC call */ cmp x0, #HVC_STUB_HCALL_NR b.lo __kvm_handle_stub_hvc - phys_to_ttbr x4, x0 + /* Set tpidr_el2 for use by HYP to free a register */ + msr tpidr_el2, x2 + + mov x2, #KVM_HOST_SMCCC_FUNC(__kvm_hyp_init) + cmp x0, x2 + b.eq 1f + mov x0, #SMCCC_RET_NOT_SUPPORTED + eret + +1: phys_to_ttbr x0, x1 alternative_if ARM64_HAS_CNP - orr x4, x4, #TTBR_CNP_BIT + orr x0, x0, #TTBR_CNP_BIT alternative_else_nop_endif - msr ttbr0_el2, x4 + msr ttbr0_el2, x0 - mrs x4, tcr_el1 - mov_q x5, TCR_EL2_MASK - and x4, x4, x5 - mov x5, #TCR_EL2_RES1 - orr x4, x4, x5 + mrs x0, tcr_el1 + mov_q x1, TCR_EL2_MASK + and x0, x0, x1 + mov x1, #TCR_EL2_RES1 + orr x0, x0, x1 /* * The ID map may be configured to use an extended virtual address @@ -80,18 +92,18 @@ alternative_else_nop_endif * * So use the same T0SZ value we use for the ID map. */ - ldr_l x5, idmap_t0sz - bfi x4, x5, TCR_T0SZ_OFFSET, TCR_TxSZ_WIDTH + ldr_l x1, idmap_t0sz + bfi x0, x1, TCR_T0SZ_OFFSET, TCR_TxSZ_WIDTH /* * Set the PS bits in TCR_EL2. */ - tcr_compute_pa_size x4, #TCR_EL2_PS_SHIFT, x5, x6 + tcr_compute_pa_size x0, #TCR_EL2_PS_SHIFT, x1, x2 - msr tcr_el2, x4 + msr tcr_el2, x0 - mrs x4, mair_el1 - msr mair_el2, x4 + mrs x0, mair_el1 + msr mair_el2, x0 isb /* Invalidate the stale TLBs from Bootloader */ @@ -103,25 +115,22 @@ alternative_else_nop_endif * as well as the EE bit on BE. Drop the A flag since the compiler * is allowed to generate unaligned accesses. */ - mov_q x4, (SCTLR_EL2_RES1 | (SCTLR_ELx_FLAGS & ~SCTLR_ELx_A)) -CPU_BE( orr x4, x4, #SCTLR_ELx_EE) + mov_q x0, (SCTLR_EL2_RES1 | (SCTLR_ELx_FLAGS & ~SCTLR_ELx_A)) +CPU_BE( orr x0, x0, #SCTLR_ELx_EE) alternative_if ARM64_HAS_ADDRESS_AUTH - mov_q x5, (SCTLR_ELx_ENIA | SCTLR_ELx_ENIB | \ + mov_q x1, (SCTLR_ELx_ENIA | SCTLR_ELx_ENIB | \ SCTLR_ELx_ENDA | SCTLR_ELx_ENDB) - orr x4, x4, x5 + orr x0, x0, x1 alternative_else_nop_endif - msr sctlr_el2, x4 + msr sctlr_el2, x0 isb /* Set the stack and new vectors */ - kern_hyp_va x1 - mov sp, x1 - msr vbar_el2, x2 - - /* Set tpidr_el2 for use by HYP */ - msr tpidr_el2, x3 + mov sp, x3 + msr vbar_el2, x4 /* Hello, World! */ + mov x0, #SMCCC_RET_SUCCESS eret SYM_CODE_END(__kvm_hyp_init) diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c new file mode 100644 index 000000000000..e2eafe2c93af --- /dev/null +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c @@ -0,0 +1,117 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2020 - Google Inc + * Author: Andrew Scull <ascull@google.com> + */ + +#include <hyp/switch.h> + +#include <asm/kvm_asm.h> +#include <asm/kvm_emulate.h> +#include <asm/kvm_host.h> +#include <asm/kvm_hyp.h> +#include <asm/kvm_mmu.h> + +#include <kvm/arm_hypercalls.h> + +static void handle_host_hcall(unsigned long func_id, + struct kvm_cpu_context *host_ctxt) +{ + unsigned long ret = 0; + + switch (func_id) { + case KVM_HOST_SMCCC_FUNC(__kvm_vcpu_run): { + unsigned long r1 = host_ctxt->regs.regs[1]; + struct kvm_vcpu *vcpu = (struct kvm_vcpu *)r1; + + ret = __kvm_vcpu_run(kern_hyp_va(vcpu)); + break; + } + case KVM_HOST_SMCCC_FUNC(__kvm_flush_vm_context): + __kvm_flush_vm_context(); + break; + case KVM_HOST_SMCCC_FUNC(__kvm_tlb_flush_vmid_ipa): { + unsigned long r1 = host_ctxt->regs.regs[1]; + struct kvm_s2_mmu *mmu = (struct kvm_s2_mmu *)r1; + phys_addr_t ipa = host_ctxt->regs.regs[2]; + int level = host_ctxt->regs.regs[3]; + + __kvm_tlb_flush_vmid_ipa(kern_hyp_va(mmu), ipa, level); + break; + } + case KVM_HOST_SMCCC_FUNC(__kvm_tlb_flush_vmid): { + unsigned long r1 = host_ctxt->regs.regs[1]; + struct kvm_s2_mmu *mmu = (struct kvm_s2_mmu *)r1; + + __kvm_tlb_flush_vmid(kern_hyp_va(mmu)); + break; + } + case KVM_HOST_SMCCC_FUNC(__kvm_tlb_flush_local_vmid): { + unsigned long r1 = host_ctxt->regs.regs[1]; + struct kvm_s2_mmu *mmu = (struct kvm_s2_mmu *)r1; + + __kvm_tlb_flush_local_vmid(kern_hyp_va(mmu)); + break; + } + case KVM_HOST_SMCCC_FUNC(__kvm_timer_set_cntvoff): { + u64 cntvoff = host_ctxt->regs.regs[1]; + + __kvm_timer_set_cntvoff(cntvoff); + break; + } + case KVM_HOST_SMCCC_FUNC(__kvm_enable_ssbs): + __kvm_enable_ssbs(); + break; + case KVM_HOST_SMCCC_FUNC(__vgic_v3_get_ich_vtr_el2): + ret = __vgic_v3_get_ich_vtr_el2(); + break; + case KVM_HOST_SMCCC_FUNC(__vgic_v3_read_vmcr): + ret = __vgic_v3_read_vmcr(); + break; + case KVM_HOST_SMCCC_FUNC(__vgic_v3_write_vmcr): { + u32 vmcr = host_ctxt->regs.regs[1]; + + __vgic_v3_write_vmcr(vmcr); + break; + } + case KVM_HOST_SMCCC_FUNC(__vgic_v3_init_lrs): + __vgic_v3_init_lrs(); + break; + case KVM_HOST_SMCCC_FUNC(__kvm_get_mdcr_el2): + ret = __kvm_get_mdcr_el2(); + break; + case KVM_HOST_SMCCC_FUNC(__vgic_v3_save_aprs): { + unsigned long r1 = host_ctxt->regs.regs[1]; + struct vgic_v3_cpu_if *cpu_if = (struct vgic_v3_cpu_if *)r1; + + __vgic_v3_save_aprs(kern_hyp_va(cpu_if)); + break; + } + case KVM_HOST_SMCCC_FUNC(__vgic_v3_restore_aprs): { + unsigned long r1 = host_ctxt->regs.regs[1]; + struct vgic_v3_cpu_if *cpu_if = (struct vgic_v3_cpu_if *)r1; + + __vgic_v3_restore_aprs(kern_hyp_va(cpu_if)); + break; + } + default: + /* Invalid host HVC. */ + host_ctxt->regs.regs[0] = SMCCC_RET_NOT_SUPPORTED; + return; + } + + host_ctxt->regs.regs[0] = SMCCC_RET_SUCCESS; + host_ctxt->regs.regs[1] = ret; +} + +void handle_trap(struct kvm_cpu_context *host_ctxt) +{ + u64 esr = read_sysreg_el2(SYS_ESR); + unsigned long func_id; + + if (ESR_ELx_EC(esr) != ESR_ELx_EC_HVC64) + hyp_panic(); + + func_id = host_ctxt->regs.regs[0]; + handle_host_hcall(func_id, host_ctxt); +} diff --git a/arch/arm64/kvm/hyp/nvhe/hyp.lds.S b/arch/arm64/kvm/hyp/nvhe/hyp.lds.S new file mode 100644 index 000000000000..bb2d986ff696 --- /dev/null +++ b/arch/arm64/kvm/hyp/nvhe/hyp.lds.S @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Google LLC. + * Written by David Brazdil <dbrazdil@google.com> + * + * Linker script used for partial linking of nVHE EL2 object files. + */ + +#include <asm/hyp_image.h> +#include <asm-generic/vmlinux.lds.h> +#include <asm/cache.h> +#include <asm/memory.h> + +SECTIONS { + HYP_SECTION(.text) + HYP_SECTION_NAME(.data..percpu) : { + PERCPU_INPUT(L1_CACHE_BYTES) + } +} diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index 0970442d2dbc..a457a0306e03 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -27,6 +27,11 @@ #include <asm/processor.h> #include <asm/thread_info.h> +/* Non-VHE specific context */ +DEFINE_PER_CPU(struct kvm_host_data, kvm_host_data); +DEFINE_PER_CPU(struct kvm_cpu_context, kvm_hyp_ctxt); +DEFINE_PER_CPU(unsigned long, kvm_hyp_vector); + static void __activate_traps(struct kvm_vcpu *vcpu) { u64 val; @@ -42,6 +47,7 @@ static void __activate_traps(struct kvm_vcpu *vcpu) } write_sysreg(val, cptr_el2); + write_sysreg(__this_cpu_read(kvm_hyp_vector), vbar_el2); if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { struct kvm_cpu_context *ctxt = &vcpu->arch.ctxt; @@ -60,6 +66,7 @@ static void __activate_traps(struct kvm_vcpu *vcpu) static void __deactivate_traps(struct kvm_vcpu *vcpu) { + extern char __kvm_hyp_host_vector[]; u64 mdcr_el2; ___deactivate_traps(vcpu); @@ -91,9 +98,10 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu) write_sysreg(mdcr_el2, mdcr_el2); write_sysreg(HCR_HOST_NVHE_FLAGS, hcr_el2); write_sysreg(CPTR_EL2_DEFAULT, cptr_el2); + write_sysreg(__kvm_hyp_host_vector, vbar_el2); } -static void __deactivate_vm(struct kvm_vcpu *vcpu) +static void __load_host_stage2(void) { write_sysreg(0, vttbr_el2); } @@ -173,9 +181,7 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu) pmr_sync(); } - vcpu = kern_hyp_va(vcpu); - - host_ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt; + host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; host_ctxt->__hyp_running_vcpu = vcpu; guest_ctxt = &vcpu->arch.ctxt; @@ -194,7 +200,7 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu) __sysreg32_restore_state(vcpu); __sysreg_restore_state_nvhe(guest_ctxt); - __activate_vm(kern_hyp_va(vcpu->arch.hw_mmu)); + __load_guest_stage2(kern_hyp_va(vcpu->arch.hw_mmu)); __activate_traps(vcpu); __hyp_vgic_restore_state(vcpu); @@ -202,24 +208,20 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu) __debug_switch_to_guest(vcpu); - __set_guest_arch_workaround_state(vcpu); - do { /* Jump in the fire! */ - exit_code = __guest_enter(vcpu, host_ctxt); + exit_code = __guest_enter(vcpu); /* And we're baaack! */ } while (fixup_guest_exit(vcpu, &exit_code)); - __set_host_arch_workaround_state(vcpu); - __sysreg_save_state_nvhe(guest_ctxt); __sysreg32_save_state(vcpu); __timer_disable_traps(vcpu); __hyp_vgic_save_state(vcpu); __deactivate_traps(vcpu); - __deactivate_vm(vcpu); + __load_host_stage2(); __sysreg_restore_state_nvhe(host_ctxt); @@ -239,35 +241,31 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu) if (system_uses_irq_prio_masking()) gic_write_pmr(GIC_PRIO_IRQOFF); + host_ctxt->__hyp_running_vcpu = NULL; + return exit_code; } -void __noreturn hyp_panic(struct kvm_cpu_context *host_ctxt) +void __noreturn hyp_panic(void) { u64 spsr = read_sysreg_el2(SYS_SPSR); u64 elr = read_sysreg_el2(SYS_ELR); u64 par = read_sysreg(par_el1); - struct kvm_vcpu *vcpu = host_ctxt->__hyp_running_vcpu; - unsigned long str_va; + bool restore_host = true; + struct kvm_cpu_context *host_ctxt; + struct kvm_vcpu *vcpu; + + host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; + vcpu = host_ctxt->__hyp_running_vcpu; - if (read_sysreg(vttbr_el2)) { + if (vcpu) { __timer_disable_traps(vcpu); __deactivate_traps(vcpu); - __deactivate_vm(vcpu); + __load_host_stage2(); __sysreg_restore_state_nvhe(host_ctxt); } - /* - * Force the panic string to be loaded from the literal pool, - * making sure it is a kernel address and not a PC-relative - * reference. - */ - asm volatile("ldr %0, =%1" : "=r" (str_va) : "S" (__hyp_panic_string)); - - __hyp_do_panic(str_va, - spsr, elr, - read_sysreg(esr_el2), read_sysreg_el2(SYS_FAR), - read_sysreg(hpfar_el2), par, vcpu); + __hyp_do_panic(restore_host, spsr, elr, par); unreachable(); } diff --git a/arch/arm64/kvm/hyp/nvhe/tlb.c b/arch/arm64/kvm/hyp/nvhe/tlb.c index 69eae608d670..39ca71ab8866 100644 --- a/arch/arm64/kvm/hyp/nvhe/tlb.c +++ b/arch/arm64/kvm/hyp/nvhe/tlb.c @@ -31,7 +31,14 @@ static void __tlb_switch_to_guest(struct kvm_s2_mmu *mmu, isb(); } + /* + * __load_guest_stage2() includes an ISB only when the AT + * workaround is applied. Take care of the opposite condition, + * ensuring that we always have an ISB, but not two ISBs back + * to back. + */ __load_guest_stage2(mmu); + asm(ALTERNATIVE("isb", "nop", ARM64_WORKAROUND_SPECULATIVE_AT)); } static void __tlb_switch_to_host(struct tlb_inv_context *cxt) @@ -54,7 +61,6 @@ void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, dsb(ishst); /* Switch to requested VMID */ - mmu = kern_hyp_va(mmu); __tlb_switch_to_guest(mmu, &cxt); /* @@ -108,7 +114,6 @@ void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu) dsb(ishst); /* Switch to requested VMID */ - mmu = kern_hyp_va(mmu); __tlb_switch_to_guest(mmu, &cxt); __tlbi(vmalls12e1is); diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c new file mode 100644 index 000000000000..0cdf6e461cbd --- /dev/null +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -0,0 +1,892 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Stand-alone page-table allocator for hyp stage-1 and guest stage-2. + * No bombay mix was harmed in the writing of this file. + * + * Copyright (C) 2020 Google LLC + * Author: Will Deacon <will@kernel.org> + */ + +#include <linux/bitfield.h> +#include <asm/kvm_pgtable.h> + +#define KVM_PGTABLE_MAX_LEVELS 4U + +#define KVM_PTE_VALID BIT(0) + +#define KVM_PTE_TYPE BIT(1) +#define KVM_PTE_TYPE_BLOCK 0 +#define KVM_PTE_TYPE_PAGE 1 +#define KVM_PTE_TYPE_TABLE 1 + +#define KVM_PTE_ADDR_MASK GENMASK(47, PAGE_SHIFT) +#define KVM_PTE_ADDR_51_48 GENMASK(15, 12) + +#define KVM_PTE_LEAF_ATTR_LO GENMASK(11, 2) + +#define KVM_PTE_LEAF_ATTR_LO_S1_ATTRIDX GENMASK(4, 2) +#define KVM_PTE_LEAF_ATTR_LO_S1_AP GENMASK(7, 6) +#define KVM_PTE_LEAF_ATTR_LO_S1_AP_RO 3 +#define KVM_PTE_LEAF_ATTR_LO_S1_AP_RW 1 +#define KVM_PTE_LEAF_ATTR_LO_S1_SH GENMASK(9, 8) +#define KVM_PTE_LEAF_ATTR_LO_S1_SH_IS 3 +#define KVM_PTE_LEAF_ATTR_LO_S1_AF BIT(10) + +#define KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR GENMASK(5, 2) +#define KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R BIT(6) +#define KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W BIT(7) +#define KVM_PTE_LEAF_ATTR_LO_S2_SH GENMASK(9, 8) +#define KVM_PTE_LEAF_ATTR_LO_S2_SH_IS 3 +#define KVM_PTE_LEAF_ATTR_LO_S2_AF BIT(10) + +#define KVM_PTE_LEAF_ATTR_HI GENMASK(63, 51) + +#define KVM_PTE_LEAF_ATTR_HI_S1_XN BIT(54) + +#define KVM_PTE_LEAF_ATTR_HI_S2_XN BIT(54) + +struct kvm_pgtable_walk_data { + struct kvm_pgtable *pgt; + struct kvm_pgtable_walker *walker; + + u64 addr; + u64 end; +}; + +static u64 kvm_granule_shift(u32 level) +{ + /* Assumes KVM_PGTABLE_MAX_LEVELS is 4 */ + return ARM64_HW_PGTABLE_LEVEL_SHIFT(level); +} + +static u64 kvm_granule_size(u32 level) +{ + return BIT(kvm_granule_shift(level)); +} + +static bool kvm_block_mapping_supported(u64 addr, u64 end, u64 phys, u32 level) +{ + u64 granule = kvm_granule_size(level); + + /* + * Reject invalid block mappings and don't bother with 4TB mappings for + * 52-bit PAs. + */ + if (level == 0 || (PAGE_SIZE != SZ_4K && level == 1)) + return false; + + if (granule > (end - addr)) + return false; + + return IS_ALIGNED(addr, granule) && IS_ALIGNED(phys, granule); +} + +static u32 kvm_pgtable_idx(struct kvm_pgtable_walk_data *data, u32 level) +{ + u64 shift = kvm_granule_shift(level); + u64 mask = BIT(PAGE_SHIFT - 3) - 1; + + return (data->addr >> shift) & mask; +} + +static u32 __kvm_pgd_page_idx(struct kvm_pgtable *pgt, u64 addr) +{ + u64 shift = kvm_granule_shift(pgt->start_level - 1); /* May underflow */ + u64 mask = BIT(pgt->ia_bits) - 1; + + return (addr & mask) >> shift; +} + +static u32 kvm_pgd_page_idx(struct kvm_pgtable_walk_data *data) +{ + return __kvm_pgd_page_idx(data->pgt, data->addr); +} + +static u32 kvm_pgd_pages(u32 ia_bits, u32 start_level) +{ + struct kvm_pgtable pgt = { + .ia_bits = ia_bits, + .start_level = start_level, + }; + + return __kvm_pgd_page_idx(&pgt, -1ULL) + 1; +} + +static bool kvm_pte_valid(kvm_pte_t pte) +{ + return pte & KVM_PTE_VALID; +} + +static bool kvm_pte_table(kvm_pte_t pte, u32 level) +{ + if (level == KVM_PGTABLE_MAX_LEVELS - 1) + return false; + + if (!kvm_pte_valid(pte)) + return false; + + return FIELD_GET(KVM_PTE_TYPE, pte) == KVM_PTE_TYPE_TABLE; +} + +static u64 kvm_pte_to_phys(kvm_pte_t pte) +{ + u64 pa = pte & KVM_PTE_ADDR_MASK; + + if (PAGE_SHIFT == 16) + pa |= FIELD_GET(KVM_PTE_ADDR_51_48, pte) << 48; + + return pa; +} + +static kvm_pte_t kvm_phys_to_pte(u64 pa) +{ + kvm_pte_t pte = pa & KVM_PTE_ADDR_MASK; + + if (PAGE_SHIFT == 16) + pte |= FIELD_PREP(KVM_PTE_ADDR_51_48, pa >> 48); + + return pte; +} + +static kvm_pte_t *kvm_pte_follow(kvm_pte_t pte) +{ + return __va(kvm_pte_to_phys(pte)); +} + +static void kvm_set_invalid_pte(kvm_pte_t *ptep) +{ + kvm_pte_t pte = *ptep; + WRITE_ONCE(*ptep, pte & ~KVM_PTE_VALID); +} + +static void kvm_set_table_pte(kvm_pte_t *ptep, kvm_pte_t *childp) +{ + kvm_pte_t old = *ptep, pte = kvm_phys_to_pte(__pa(childp)); + + pte |= FIELD_PREP(KVM_PTE_TYPE, KVM_PTE_TYPE_TABLE); + pte |= KVM_PTE_VALID; + + WARN_ON(kvm_pte_valid(old)); + smp_store_release(ptep, pte); +} + +static bool kvm_set_valid_leaf_pte(kvm_pte_t *ptep, u64 pa, kvm_pte_t attr, + u32 level) +{ + kvm_pte_t old = *ptep, pte = kvm_phys_to_pte(pa); + u64 type = (level == KVM_PGTABLE_MAX_LEVELS - 1) ? KVM_PTE_TYPE_PAGE : + KVM_PTE_TYPE_BLOCK; + + pte |= attr & (KVM_PTE_LEAF_ATTR_LO | KVM_PTE_LEAF_ATTR_HI); + pte |= FIELD_PREP(KVM_PTE_TYPE, type); + pte |= KVM_PTE_VALID; + + /* Tolerate KVM recreating the exact same mapping. */ + if (kvm_pte_valid(old)) + return old == pte; + + smp_store_release(ptep, pte); + return true; +} + +static int kvm_pgtable_visitor_cb(struct kvm_pgtable_walk_data *data, u64 addr, + u32 level, kvm_pte_t *ptep, + enum kvm_pgtable_walk_flags flag) +{ + struct kvm_pgtable_walker *walker = data->walker; + return walker->cb(addr, data->end, level, ptep, flag, walker->arg); +} + +static int __kvm_pgtable_walk(struct kvm_pgtable_walk_data *data, + kvm_pte_t *pgtable, u32 level); + +static inline int __kvm_pgtable_visit(struct kvm_pgtable_walk_data *data, + kvm_pte_t *ptep, u32 level) +{ + int ret = 0; + u64 addr = data->addr; + kvm_pte_t *childp, pte = *ptep; + bool table = kvm_pte_table(pte, level); + enum kvm_pgtable_walk_flags flags = data->walker->flags; + + if (table && (flags & KVM_PGTABLE_WALK_TABLE_PRE)) { + ret = kvm_pgtable_visitor_cb(data, addr, level, ptep, + KVM_PGTABLE_WALK_TABLE_PRE); + } + + if (!table && (flags & KVM_PGTABLE_WALK_LEAF)) { + ret = kvm_pgtable_visitor_cb(data, addr, level, ptep, + KVM_PGTABLE_WALK_LEAF); + pte = *ptep; + table = kvm_pte_table(pte, level); + } + + if (ret) + goto out; + + if (!table) { + data->addr += kvm_granule_size(level); + goto out; + } + + childp = kvm_pte_follow(pte); + ret = __kvm_pgtable_walk(data, childp, level + 1); + if (ret) + goto out; + + if (flags & KVM_PGTABLE_WALK_TABLE_POST) { + ret = kvm_pgtable_visitor_cb(data, addr, level, ptep, + KVM_PGTABLE_WALK_TABLE_POST); + } + +out: + return ret; +} + +static int __kvm_pgtable_walk(struct kvm_pgtable_walk_data *data, + kvm_pte_t *pgtable, u32 level) +{ + u32 idx; + int ret = 0; + + if (WARN_ON_ONCE(level >= KVM_PGTABLE_MAX_LEVELS)) + return -EINVAL; + + for (idx = kvm_pgtable_idx(data, level); idx < PTRS_PER_PTE; ++idx) { + kvm_pte_t *ptep = &pgtable[idx]; + + if (data->addr >= data->end) + break; + + ret = __kvm_pgtable_visit(data, ptep, level); + if (ret) + break; + } + + return ret; +} + +static int _kvm_pgtable_walk(struct kvm_pgtable_walk_data *data) +{ + u32 idx; + int ret = 0; + struct kvm_pgtable *pgt = data->pgt; + u64 limit = BIT(pgt->ia_bits); + + if (data->addr > limit || data->end > limit) + return -ERANGE; + + if (!pgt->pgd) + return -EINVAL; + + for (idx = kvm_pgd_page_idx(data); data->addr < data->end; ++idx) { + kvm_pte_t *ptep = &pgt->pgd[idx * PTRS_PER_PTE]; + + ret = __kvm_pgtable_walk(data, ptep, pgt->start_level); + if (ret) + break; + } + + return ret; +} + +int kvm_pgtable_walk(struct kvm_pgtable *pgt, u64 addr, u64 size, + struct kvm_pgtable_walker *walker) +{ + struct kvm_pgtable_walk_data walk_data = { + .pgt = pgt, + .addr = ALIGN_DOWN(addr, PAGE_SIZE), + .end = PAGE_ALIGN(walk_data.addr + size), + .walker = walker, + }; + + return _kvm_pgtable_walk(&walk_data); +} + +struct hyp_map_data { + u64 phys; + kvm_pte_t attr; +}; + +static int hyp_map_set_prot_attr(enum kvm_pgtable_prot prot, + struct hyp_map_data *data) +{ + bool device = prot & KVM_PGTABLE_PROT_DEVICE; + u32 mtype = device ? MT_DEVICE_nGnRE : MT_NORMAL; + kvm_pte_t attr = FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_ATTRIDX, mtype); + u32 sh = KVM_PTE_LEAF_ATTR_LO_S1_SH_IS; + u32 ap = (prot & KVM_PGTABLE_PROT_W) ? KVM_PTE_LEAF_ATTR_LO_S1_AP_RW : + KVM_PTE_LEAF_ATTR_LO_S1_AP_RO; + + if (!(prot & KVM_PGTABLE_PROT_R)) + return -EINVAL; + + if (prot & KVM_PGTABLE_PROT_X) { + if (prot & KVM_PGTABLE_PROT_W) + return -EINVAL; + + if (device) + return -EINVAL; + } else { + attr |= KVM_PTE_LEAF_ATTR_HI_S1_XN; + } + + attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_AP, ap); + attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_SH, sh); + attr |= KVM_PTE_LEAF_ATTR_LO_S1_AF; + data->attr = attr; + return 0; +} + +static bool hyp_map_walker_try_leaf(u64 addr, u64 end, u32 level, + kvm_pte_t *ptep, struct hyp_map_data *data) +{ + u64 granule = kvm_granule_size(level), phys = data->phys; + + if (!kvm_block_mapping_supported(addr, end, phys, level)) + return false; + + WARN_ON(!kvm_set_valid_leaf_pte(ptep, phys, data->attr, level)); + data->phys += granule; + return true; +} + +static int hyp_map_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, + enum kvm_pgtable_walk_flags flag, void * const arg) +{ + kvm_pte_t *childp; + + if (hyp_map_walker_try_leaf(addr, end, level, ptep, arg)) + return 0; + + if (WARN_ON(level == KVM_PGTABLE_MAX_LEVELS - 1)) + return -EINVAL; + + childp = (kvm_pte_t *)get_zeroed_page(GFP_KERNEL); + if (!childp) + return -ENOMEM; + + kvm_set_table_pte(ptep, childp); + return 0; +} + +int kvm_pgtable_hyp_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys, + enum kvm_pgtable_prot prot) +{ + int ret; + struct hyp_map_data map_data = { + .phys = ALIGN_DOWN(phys, PAGE_SIZE), + }; + struct kvm_pgtable_walker walker = { + .cb = hyp_map_walker, + .flags = KVM_PGTABLE_WALK_LEAF, + .arg = &map_data, + }; + + ret = hyp_map_set_prot_attr(prot, &map_data); + if (ret) + return ret; + + ret = kvm_pgtable_walk(pgt, addr, size, &walker); + dsb(ishst); + isb(); + return ret; +} + +int kvm_pgtable_hyp_init(struct kvm_pgtable *pgt, u32 va_bits) +{ + u64 levels = ARM64_HW_PGTABLE_LEVELS(va_bits); + + pgt->pgd = (kvm_pte_t *)get_zeroed_page(GFP_KERNEL); + if (!pgt->pgd) + return -ENOMEM; + + pgt->ia_bits = va_bits; + pgt->start_level = KVM_PGTABLE_MAX_LEVELS - levels; + pgt->mmu = NULL; + return 0; +} + +static int hyp_free_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, + enum kvm_pgtable_walk_flags flag, void * const arg) +{ + free_page((unsigned long)kvm_pte_follow(*ptep)); + return 0; +} + +void kvm_pgtable_hyp_destroy(struct kvm_pgtable *pgt) +{ + struct kvm_pgtable_walker walker = { + .cb = hyp_free_walker, + .flags = KVM_PGTABLE_WALK_TABLE_POST, + }; + + WARN_ON(kvm_pgtable_walk(pgt, 0, BIT(pgt->ia_bits), &walker)); + free_page((unsigned long)pgt->pgd); + pgt->pgd = NULL; +} + +struct stage2_map_data { + u64 phys; + kvm_pte_t attr; + + kvm_pte_t *anchor; + + struct kvm_s2_mmu *mmu; + struct kvm_mmu_memory_cache *memcache; +}; + +static int stage2_map_set_prot_attr(enum kvm_pgtable_prot prot, + struct stage2_map_data *data) +{ + bool device = prot & KVM_PGTABLE_PROT_DEVICE; + kvm_pte_t attr = device ? PAGE_S2_MEMATTR(DEVICE_nGnRE) : + PAGE_S2_MEMATTR(NORMAL); + u32 sh = KVM_PTE_LEAF_ATTR_LO_S2_SH_IS; + + if (!(prot & KVM_PGTABLE_PROT_X)) + attr |= KVM_PTE_LEAF_ATTR_HI_S2_XN; + else if (device) + return -EINVAL; + + if (prot & KVM_PGTABLE_PROT_R) + attr |= KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R; + + if (prot & KVM_PGTABLE_PROT_W) + attr |= KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W; + + attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S2_SH, sh); + attr |= KVM_PTE_LEAF_ATTR_LO_S2_AF; + data->attr = attr; + return 0; +} + +static bool stage2_map_walker_try_leaf(u64 addr, u64 end, u32 level, + kvm_pte_t *ptep, + struct stage2_map_data *data) +{ + u64 granule = kvm_granule_size(level), phys = data->phys; + + if (!kvm_block_mapping_supported(addr, end, phys, level)) + return false; + + if (kvm_set_valid_leaf_pte(ptep, phys, data->attr, level)) + goto out; + + /* There's an existing valid leaf entry, so perform break-before-make */ + kvm_set_invalid_pte(ptep); + kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, data->mmu, addr, level); + kvm_set_valid_leaf_pte(ptep, phys, data->attr, level); +out: + data->phys += granule; + return true; +} + +static int stage2_map_walk_table_pre(u64 addr, u64 end, u32 level, + kvm_pte_t *ptep, + struct stage2_map_data *data) +{ + if (data->anchor) + return 0; + + if (!kvm_block_mapping_supported(addr, end, data->phys, level)) + return 0; + + kvm_set_invalid_pte(ptep); + kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, data->mmu, addr, 0); + data->anchor = ptep; + return 0; +} + +static int stage2_map_walk_leaf(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, + struct stage2_map_data *data) +{ + kvm_pte_t *childp, pte = *ptep; + struct page *page = virt_to_page(ptep); + + if (data->anchor) { + if (kvm_pte_valid(pte)) + put_page(page); + + return 0; + } + + if (stage2_map_walker_try_leaf(addr, end, level, ptep, data)) + goto out_get_page; + + if (WARN_ON(level == KVM_PGTABLE_MAX_LEVELS - 1)) + return -EINVAL; + + if (!data->memcache) + return -ENOMEM; + + childp = kvm_mmu_memory_cache_alloc(data->memcache); + if (!childp) + return -ENOMEM; + + /* + * If we've run into an existing block mapping then replace it with + * a table. Accesses beyond 'end' that fall within the new table + * will be mapped lazily. + */ + if (kvm_pte_valid(pte)) { + kvm_set_invalid_pte(ptep); + kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, data->mmu, addr, level); + put_page(page); + } + + kvm_set_table_pte(ptep, childp); + +out_get_page: + get_page(page); + return 0; +} + +static int stage2_map_walk_table_post(u64 addr, u64 end, u32 level, + kvm_pte_t *ptep, + struct stage2_map_data *data) +{ + int ret = 0; + + if (!data->anchor) + return 0; + + free_page((unsigned long)kvm_pte_follow(*ptep)); + put_page(virt_to_page(ptep)); + + if (data->anchor == ptep) { + data->anchor = NULL; + ret = stage2_map_walk_leaf(addr, end, level, ptep, data); + } + + return ret; +} + +/* + * This is a little fiddly, as we use all three of the walk flags. The idea + * is that the TABLE_PRE callback runs for table entries on the way down, + * looking for table entries which we could conceivably replace with a + * block entry for this mapping. If it finds one, then it sets the 'anchor' + * field in 'struct stage2_map_data' to point at the table entry, before + * clearing the entry to zero and descending into the now detached table. + * + * The behaviour of the LEAF callback then depends on whether or not the + * anchor has been set. If not, then we're not using a block mapping higher + * up the table and we perform the mapping at the existing leaves instead. + * If, on the other hand, the anchor _is_ set, then we drop references to + * all valid leaves so that the pages beneath the anchor can be freed. + * + * Finally, the TABLE_POST callback does nothing if the anchor has not + * been set, but otherwise frees the page-table pages while walking back up + * the page-table, installing the block entry when it revisits the anchor + * pointer and clearing the anchor to NULL. + */ +static int stage2_map_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, + enum kvm_pgtable_walk_flags flag, void * const arg) +{ + struct stage2_map_data *data = arg; + + switch (flag) { + case KVM_PGTABLE_WALK_TABLE_PRE: + return stage2_map_walk_table_pre(addr, end, level, ptep, data); + case KVM_PGTABLE_WALK_LEAF: + return stage2_map_walk_leaf(addr, end, level, ptep, data); + case KVM_PGTABLE_WALK_TABLE_POST: + return stage2_map_walk_table_post(addr, end, level, ptep, data); + } + + return -EINVAL; +} + +int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size, + u64 phys, enum kvm_pgtable_prot prot, + struct kvm_mmu_memory_cache *mc) +{ + int ret; + struct stage2_map_data map_data = { + .phys = ALIGN_DOWN(phys, PAGE_SIZE), + .mmu = pgt->mmu, + .memcache = mc, + }; + struct kvm_pgtable_walker walker = { + .cb = stage2_map_walker, + .flags = KVM_PGTABLE_WALK_TABLE_PRE | + KVM_PGTABLE_WALK_LEAF | + KVM_PGTABLE_WALK_TABLE_POST, + .arg = &map_data, + }; + + ret = stage2_map_set_prot_attr(prot, &map_data); + if (ret) + return ret; + + ret = kvm_pgtable_walk(pgt, addr, size, &walker); + dsb(ishst); + return ret; +} + +static void stage2_flush_dcache(void *addr, u64 size) +{ + if (cpus_have_const_cap(ARM64_HAS_STAGE2_FWB)) + return; + + __flush_dcache_area(addr, size); +} + +static bool stage2_pte_cacheable(kvm_pte_t pte) +{ + u64 memattr = FIELD_GET(KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR, pte); + return memattr == PAGE_S2_MEMATTR(NORMAL); +} + +static int stage2_unmap_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, + enum kvm_pgtable_walk_flags flag, + void * const arg) +{ + struct kvm_s2_mmu *mmu = arg; + kvm_pte_t pte = *ptep, *childp = NULL; + bool need_flush = false; + + if (!kvm_pte_valid(pte)) + return 0; + + if (kvm_pte_table(pte, level)) { + childp = kvm_pte_follow(pte); + + if (page_count(virt_to_page(childp)) != 1) + return 0; + } else if (stage2_pte_cacheable(pte)) { + need_flush = true; + } + + /* + * This is similar to the map() path in that we unmap the entire + * block entry and rely on the remaining portions being faulted + * back lazily. + */ + kvm_set_invalid_pte(ptep); + kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, addr, level); + put_page(virt_to_page(ptep)); + + if (need_flush) { + stage2_flush_dcache(kvm_pte_follow(pte), + kvm_granule_size(level)); + } + + if (childp) + free_page((unsigned long)childp); + + return 0; +} + +int kvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size) +{ + struct kvm_pgtable_walker walker = { + .cb = stage2_unmap_walker, + .arg = pgt->mmu, + .flags = KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST, + }; + + return kvm_pgtable_walk(pgt, addr, size, &walker); +} + +struct stage2_attr_data { + kvm_pte_t attr_set; + kvm_pte_t attr_clr; + kvm_pte_t pte; + u32 level; +}; + +static int stage2_attr_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, + enum kvm_pgtable_walk_flags flag, + void * const arg) +{ + kvm_pte_t pte = *ptep; + struct stage2_attr_data *data = arg; + + if (!kvm_pte_valid(pte)) + return 0; + + data->level = level; + data->pte = pte; + pte &= ~data->attr_clr; + pte |= data->attr_set; + + /* + * We may race with the CPU trying to set the access flag here, + * but worst-case the access flag update gets lost and will be + * set on the next access instead. + */ + if (data->pte != pte) + WRITE_ONCE(*ptep, pte); + + return 0; +} + +static int stage2_update_leaf_attrs(struct kvm_pgtable *pgt, u64 addr, + u64 size, kvm_pte_t attr_set, + kvm_pte_t attr_clr, kvm_pte_t *orig_pte, + u32 *level) +{ + int ret; + kvm_pte_t attr_mask = KVM_PTE_LEAF_ATTR_LO | KVM_PTE_LEAF_ATTR_HI; + struct stage2_attr_data data = { + .attr_set = attr_set & attr_mask, + .attr_clr = attr_clr & attr_mask, + }; + struct kvm_pgtable_walker walker = { + .cb = stage2_attr_walker, + .arg = &data, + .flags = KVM_PGTABLE_WALK_LEAF, + }; + + ret = kvm_pgtable_walk(pgt, addr, size, &walker); + if (ret) + return ret; + + if (orig_pte) + *orig_pte = data.pte; + + if (level) + *level = data.level; + return 0; +} + +int kvm_pgtable_stage2_wrprotect(struct kvm_pgtable *pgt, u64 addr, u64 size) +{ + return stage2_update_leaf_attrs(pgt, addr, size, 0, + KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W, + NULL, NULL); +} + +kvm_pte_t kvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr) +{ + kvm_pte_t pte = 0; + stage2_update_leaf_attrs(pgt, addr, 1, KVM_PTE_LEAF_ATTR_LO_S2_AF, 0, + &pte, NULL); + dsb(ishst); + return pte; +} + +kvm_pte_t kvm_pgtable_stage2_mkold(struct kvm_pgtable *pgt, u64 addr) +{ + kvm_pte_t pte = 0; + stage2_update_leaf_attrs(pgt, addr, 1, 0, KVM_PTE_LEAF_ATTR_LO_S2_AF, + &pte, NULL); + /* + * "But where's the TLBI?!", you scream. + * "Over in the core code", I sigh. + * + * See the '->clear_flush_young()' callback on the KVM mmu notifier. + */ + return pte; +} + +bool kvm_pgtable_stage2_is_young(struct kvm_pgtable *pgt, u64 addr) +{ + kvm_pte_t pte = 0; + stage2_update_leaf_attrs(pgt, addr, 1, 0, 0, &pte, NULL); + return pte & KVM_PTE_LEAF_ATTR_LO_S2_AF; +} + +int kvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr, + enum kvm_pgtable_prot prot) +{ + int ret; + u32 level; + kvm_pte_t set = 0, clr = 0; + + if (prot & KVM_PGTABLE_PROT_R) + set |= KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R; + + if (prot & KVM_PGTABLE_PROT_W) + set |= KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W; + + if (prot & KVM_PGTABLE_PROT_X) + clr |= KVM_PTE_LEAF_ATTR_HI_S2_XN; + + ret = stage2_update_leaf_attrs(pgt, addr, 1, set, clr, NULL, &level); + if (!ret) + kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, pgt->mmu, addr, level); + return ret; +} + +static int stage2_flush_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, + enum kvm_pgtable_walk_flags flag, + void * const arg) +{ + kvm_pte_t pte = *ptep; + + if (!kvm_pte_valid(pte) || !stage2_pte_cacheable(pte)) + return 0; + + stage2_flush_dcache(kvm_pte_follow(pte), kvm_granule_size(level)); + return 0; +} + +int kvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size) +{ + struct kvm_pgtable_walker walker = { + .cb = stage2_flush_walker, + .flags = KVM_PGTABLE_WALK_LEAF, + }; + + if (cpus_have_const_cap(ARM64_HAS_STAGE2_FWB)) + return 0; + + return kvm_pgtable_walk(pgt, addr, size, &walker); +} + +int kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm *kvm) +{ + size_t pgd_sz; + u64 vtcr = kvm->arch.vtcr; + u32 ia_bits = VTCR_EL2_IPA(vtcr); + u32 sl0 = FIELD_GET(VTCR_EL2_SL0_MASK, vtcr); + u32 start_level = VTCR_EL2_TGRAN_SL0_BASE - sl0; + + pgd_sz = kvm_pgd_pages(ia_bits, start_level) * PAGE_SIZE; + pgt->pgd = alloc_pages_exact(pgd_sz, GFP_KERNEL | __GFP_ZERO); + if (!pgt->pgd) + return -ENOMEM; + + pgt->ia_bits = ia_bits; + pgt->start_level = start_level; + pgt->mmu = &kvm->arch.mmu; + + /* Ensure zeroed PGD pages are visible to the hardware walker */ + dsb(ishst); + return 0; +} + +static int stage2_free_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, + enum kvm_pgtable_walk_flags flag, + void * const arg) +{ + kvm_pte_t pte = *ptep; + + if (!kvm_pte_valid(pte)) + return 0; + + put_page(virt_to_page(ptep)); + + if (kvm_pte_table(pte, level)) + free_page((unsigned long)kvm_pte_follow(pte)); + + return 0; +} + +void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt) +{ + size_t pgd_sz; + struct kvm_pgtable_walker walker = { + .cb = stage2_free_walker, + .flags = KVM_PGTABLE_WALK_LEAF | + KVM_PGTABLE_WALK_TABLE_POST, + }; + + WARN_ON(kvm_pgtable_walk(pgt, 0, BIT(pgt->ia_bits), &walker)); + pgd_sz = kvm_pgd_pages(pgt->ia_bits, pgt->start_level) * PAGE_SIZE; + free_pages_exact(pgt->pgd, pgd_sz); + pgt->pgd = NULL; +} diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c index c1da4f86ccac..fe69de16dadc 100644 --- a/arch/arm64/kvm/hyp/vhe/switch.c +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -28,6 +28,11 @@ const char __hyp_panic_string[] = "HYP panic:\nPS:%08llx PC:%016llx ESR:%08llx\nFAR:%016llx HPFAR:%016llx PAR:%016llx\nVCPU:%p\n"; +/* VHE specific context */ +DEFINE_PER_CPU(struct kvm_host_data, kvm_host_data); +DEFINE_PER_CPU(struct kvm_cpu_context, kvm_hyp_ctxt); +DEFINE_PER_CPU(unsigned long, kvm_hyp_vector); + static void __activate_traps(struct kvm_vcpu *vcpu) { u64 val; @@ -59,7 +64,7 @@ static void __activate_traps(struct kvm_vcpu *vcpu) write_sysreg(val, cpacr_el1); - write_sysreg(kvm_get_hyp_vector(), vbar_el1); + write_sysreg(__this_cpu_read(kvm_hyp_vector), vbar_el1); } NOKPROBE_SYMBOL(__activate_traps); @@ -108,7 +113,7 @@ static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu) struct kvm_cpu_context *guest_ctxt; u64 exit_code; - host_ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt; + host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; host_ctxt->__hyp_running_vcpu = vcpu; guest_ctxt = &vcpu->arch.ctxt; @@ -120,28 +125,24 @@ static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu) * HCR_EL2.TGE. * * We have already configured the guest's stage 1 translation in - * kvm_vcpu_load_sysregs_vhe above. We must now call __activate_vm - * before __activate_traps, because __activate_vm configures - * stage 2 translation, and __activate_traps clear HCR_EL2.TGE - * (among other things). + * kvm_vcpu_load_sysregs_vhe above. We must now call + * __load_guest_stage2 before __activate_traps, because + * __load_guest_stage2 configures stage 2 translation, and + * __activate_traps clear HCR_EL2.TGE (among other things). */ - __activate_vm(vcpu->arch.hw_mmu); + __load_guest_stage2(vcpu->arch.hw_mmu); __activate_traps(vcpu); sysreg_restore_guest_state_vhe(guest_ctxt); __debug_switch_to_guest(vcpu); - __set_guest_arch_workaround_state(vcpu); - do { /* Jump in the fire! */ - exit_code = __guest_enter(vcpu, host_ctxt); + exit_code = __guest_enter(vcpu); /* And we're baaack! */ } while (fixup_guest_exit(vcpu, &exit_code)); - __set_host_arch_workaround_state(vcpu); - sysreg_save_guest_state_vhe(guest_ctxt); __deactivate_traps(vcpu); @@ -192,10 +193,12 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu) return ret; } -static void __hyp_call_panic(u64 spsr, u64 elr, u64 par, - struct kvm_cpu_context *host_ctxt) +static void __hyp_call_panic(u64 spsr, u64 elr, u64 par) { + struct kvm_cpu_context *host_ctxt; struct kvm_vcpu *vcpu; + + host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; vcpu = host_ctxt->__hyp_running_vcpu; __deactivate_traps(vcpu); @@ -208,13 +211,13 @@ static void __hyp_call_panic(u64 spsr, u64 elr, u64 par, } NOKPROBE_SYMBOL(__hyp_call_panic); -void __noreturn hyp_panic(struct kvm_cpu_context *host_ctxt) +void __noreturn hyp_panic(void) { u64 spsr = read_sysreg_el2(SYS_SPSR); u64 elr = read_sysreg_el2(SYS_ELR); u64 par = read_sysreg(par_el1); - __hyp_call_panic(spsr, elr, par, host_ctxt); + __hyp_call_panic(spsr, elr, par); unreachable(); } diff --git a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c index 996471e4c138..2a0b8c88d74f 100644 --- a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c +++ b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c @@ -66,7 +66,7 @@ void kvm_vcpu_load_sysregs_vhe(struct kvm_vcpu *vcpu) struct kvm_cpu_context *guest_ctxt = &vcpu->arch.ctxt; struct kvm_cpu_context *host_ctxt; - host_ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt; + host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; __sysreg_save_user_state(host_ctxt); /* @@ -100,7 +100,7 @@ void kvm_vcpu_put_sysregs_vhe(struct kvm_vcpu *vcpu) struct kvm_cpu_context *guest_ctxt = &vcpu->arch.ctxt; struct kvm_cpu_context *host_ctxt; - host_ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt; + host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; deactivate_traps_vhe_put(); __sysreg_save_el1_state(guest_ctxt); |