diff options
25 files changed, 539 insertions, 202 deletions
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 0bed0e33e1ae..76f795b628f1 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -50,6 +50,8 @@ #define KVM_DIRTY_LOG_MANUAL_CAPS (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | \ KVM_DIRTY_LOG_INITIALLY_SET) +#define KVM_HAVE_MMU_RWLOCK + /* * Mode of operation configurable with kvm-arm.mode early param. * See Documentation/admin-guide/kernel-parameters.txt for more information. @@ -71,9 +73,7 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu); void kvm_arm_vcpu_destroy(struct kvm_vcpu *vcpu); struct kvm_vmid { - /* The VMID generation used for the virt. memory system */ - u64 vmid_gen; - u32 vmid; + atomic64_t id; }; struct kvm_s2_mmu { @@ -174,6 +174,7 @@ enum vcpu_sysreg { PAR_EL1, /* Physical Address Register */ MDSCR_EL1, /* Monitor Debug System Control Register */ MDCCINT_EL1, /* Monitor Debug Comms Channel Interrupt Enable Reg */ + OSLSR_EL1, /* OS Lock Status Register */ DISR_EL1, /* Deferred Interrupt Status Register */ /* Performance Monitors Registers */ @@ -705,6 +706,12 @@ int kvm_arm_pvtime_get_attr(struct kvm_vcpu *vcpu, int kvm_arm_pvtime_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr); +extern unsigned int kvm_arm_vmid_bits; +int kvm_arm_vmid_alloc_init(void); +void kvm_arm_vmid_alloc_free(void); +void kvm_arm_vmid_update(struct kvm_vmid *kvm_vmid); +void kvm_arm_vmid_clear_active(void); + static inline void kvm_arm_pvtime_vcpu_init(struct kvm_vcpu_arch *vcpu_arch) { vcpu_arch->steal.base = GPA_INVALID; @@ -738,6 +745,10 @@ void kvm_arm_vcpu_init_debug(struct kvm_vcpu *vcpu); void kvm_arm_setup_debug(struct kvm_vcpu *vcpu); void kvm_arm_clear_debug(struct kvm_vcpu *vcpu); void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu); + +#define kvm_vcpu_os_lock_enabled(vcpu) \ + (!!(__vcpu_sys_reg(vcpu, OSLSR_EL1) & SYS_OSLSR_OSLK)) + int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr); int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu, diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 81839e9a8a24..74735a864eee 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -115,6 +115,7 @@ alternative_cb_end #include <asm/cache.h> #include <asm/cacheflush.h> #include <asm/mmu_context.h> +#include <asm/kvm_host.h> void kvm_update_va_mask(struct alt_instr *alt, __le32 *origptr, __le32 *updptr, int nr_inst); @@ -266,7 +267,8 @@ static __always_inline u64 kvm_get_vttbr(struct kvm_s2_mmu *mmu) u64 cnp = system_supports_cnp() ? VTTBR_CNP_BIT : 0; baddr = mmu->pgd_phys; - vmid_field = (u64)READ_ONCE(vmid->vmid) << VTTBR_VMID_SHIFT; + vmid_field = atomic64_read(&vmid->id) << VTTBR_VMID_SHIFT; + vmid_field &= VTTBR_VMID_MASK(kvm_arm_vmid_bits); return kvm_phys_to_vttbr(baddr) | vmid_field | cnp; } diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 898bee0004ae..906a3550fc50 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -128,8 +128,16 @@ #define SYS_DBGWVRn_EL1(n) sys_reg(2, 0, 0, n, 6) #define SYS_DBGWCRn_EL1(n) sys_reg(2, 0, 0, n, 7) #define SYS_MDRAR_EL1 sys_reg(2, 0, 1, 0, 0) + #define SYS_OSLAR_EL1 sys_reg(2, 0, 1, 0, 4) +#define SYS_OSLAR_OSLK BIT(0) + #define SYS_OSLSR_EL1 sys_reg(2, 0, 1, 1, 4) +#define SYS_OSLSR_OSLM_MASK (BIT(3) | BIT(0)) +#define SYS_OSLSR_OSLM_NI 0 +#define SYS_OSLSR_OSLM_IMPLEMENTED BIT(3) +#define SYS_OSLSR_OSLK BIT(1) + #define SYS_OSDLR_EL1 sys_reg(2, 0, 1, 3, 4) #define SYS_DBGPRCR_EL1 sys_reg(2, 0, 1, 4, 4) #define SYS_DBGCLAIMSET_EL1 sys_reg(2, 0, 7, 8, 6) diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 5280e098cfb5..47af76e53221 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -348,7 +348,13 @@ static void task_fpsimd_load(void) /* * Ensure FPSIMD/SVE storage in memory for the loaded context is up to - * date with respect to the CPU registers. + * date with respect to the CPU registers. Note carefully that the + * current context is the context last bound to the CPU stored in + * last, if KVM is involved this may be the guest VM context rather + * than the host thread for the VM pointed to by current. This means + * that we must always reference the state storage via last rather + * than via current, other than the TIF_ flags which KVM will + * carefully maintain for us. */ static void fpsimd_save(void) { diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h index 7eaf1f7c4168..884844849c70 100644 --- a/arch/arm64/kernel/image-vars.h +++ b/arch/arm64/kernel/image-vars.h @@ -79,6 +79,9 @@ KVM_NVHE_ALIAS(__hyp_stub_vectors); /* Kernel symbol used by icache_is_vpipt(). */ KVM_NVHE_ALIAS(__icache_flags); +/* VMID bits set by the KVM VMID allocator */ +KVM_NVHE_ALIAS(kvm_arm_vmid_bits); + /* Kernel symbols needed for cpus_have_final/const_caps checks. */ KVM_NVHE_ALIAS(arm64_const_caps_ready); KVM_NVHE_ALIAS(cpu_hwcap_keys); diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index 91861fd8b897..261644b1a6bb 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile @@ -14,7 +14,7 @@ kvm-y += arm.o mmu.o mmio.o psci.o hypercalls.o pvtime.o \ inject_fault.o va_layout.o handle_exit.o \ guest.o debug.o reset.o sys_regs.o \ vgic-sys-reg-v3.o fpsimd.o pmu.o pkvm.o \ - arch_timer.o trng.o\ + arch_timer.o trng.o vmid.o \ vgic/vgic.o vgic/vgic-init.o \ vgic/vgic-irqfd.o vgic/vgic-v2.o \ vgic/vgic-v3.o vgic/vgic-v4.o \ diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 13c1318d8b9a..fefd5774ab55 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -53,11 +53,6 @@ static DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page); unsigned long kvm_arm_hyp_percpu_base[NR_CPUS]; DECLARE_KVM_NVHE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params); -/* The VMID used in the VTTBR */ -static atomic64_t kvm_vmid_gen = ATOMIC64_INIT(1); -static u32 kvm_next_vmid; -static DEFINE_SPINLOCK(kvm_vmid_lock); - static bool vgic_present; static DEFINE_PER_CPU(unsigned char, kvm_arm_hardware_enabled); @@ -430,6 +425,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) kvm_timer_vcpu_put(vcpu); kvm_vgic_put(vcpu); kvm_vcpu_pmu_restore_host(vcpu); + kvm_arm_vmid_clear_active(); vcpu_clear_on_unsupported_cpu(vcpu); vcpu->cpu = -1; @@ -498,87 +494,6 @@ unsigned long kvm_arch_vcpu_get_ip(struct kvm_vcpu *vcpu) } #endif -/* Just ensure a guest exit from a particular CPU */ -static void exit_vm_noop(void *info) -{ -} - -void force_vm_exit(const cpumask_t *mask) -{ - preempt_disable(); - smp_call_function_many(mask, exit_vm_noop, NULL, true); - preempt_enable(); -} - -/** - * need_new_vmid_gen - check that the VMID is still valid - * @vmid: The VMID to check - * - * return true if there is a new generation of VMIDs being used - * - * The hardware supports a limited set of values with the value zero reserved - * for the host, so we check if an assigned value belongs to a previous - * generation, which requires us to assign a new value. If we're the first to - * use a VMID for the new generation, we must flush necessary caches and TLBs - * on all CPUs. - */ -static bool need_new_vmid_gen(struct kvm_vmid *vmid) -{ - u64 current_vmid_gen = atomic64_read(&kvm_vmid_gen); - smp_rmb(); /* Orders read of kvm_vmid_gen and kvm->arch.vmid */ - return unlikely(READ_ONCE(vmid->vmid_gen) != current_vmid_gen); -} - -/** - * update_vmid - Update the vmid with a valid VMID for the current generation - * @vmid: The stage-2 VMID information struct - */ -static void update_vmid(struct kvm_vmid *vmid) -{ - if (!need_new_vmid_gen(vmid)) - return; - - spin_lock(&kvm_vmid_lock); - - /* - * We need to re-check the vmid_gen here to ensure that if another vcpu - * already allocated a valid vmid for this vm, then this vcpu should - * use the same vmid. - */ - if (!need_new_vmid_gen(vmid)) { - spin_unlock(&kvm_vmid_lock); - return; - } - - /* First user of a new VMID generation? */ - if (unlikely(kvm_next_vmid == 0)) { - atomic64_inc(&kvm_vmid_gen); - kvm_next_vmid = 1; - - /* - * On SMP we know no other CPUs can use this CPU's or each - * other's VMID after force_vm_exit returns since the - * kvm_vmid_lock blocks them from reentry to the guest. - */ - force_vm_exit(cpu_all_mask); - /* - * Now broadcast TLB + ICACHE invalidation over the inner - * shareable domain to make sure all data structures are - * clean. - */ - kvm_call_hyp(__kvm_flush_vm_context); - } - - WRITE_ONCE(vmid->vmid, kvm_next_vmid); - kvm_next_vmid++; - kvm_next_vmid &= (1 << kvm_get_vmid_bits()) - 1; - - smp_wmb(); - WRITE_ONCE(vmid->vmid_gen, atomic64_read(&kvm_vmid_gen)); - - spin_unlock(&kvm_vmid_lock); -} - static int kvm_vcpu_initialized(struct kvm_vcpu *vcpu) { return vcpu->arch.target >= 0; @@ -814,7 +729,6 @@ static bool kvm_vcpu_exit_request(struct kvm_vcpu *vcpu, int *ret) } return kvm_request_pending(vcpu) || - need_new_vmid_gen(&vcpu->arch.hw_mmu->vmid) || xfer_to_guest_mode_work_pending(); } @@ -876,8 +790,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) if (!ret) ret = 1; - update_vmid(&vcpu->arch.hw_mmu->vmid); - check_vcpu_requests(vcpu); /* @@ -887,6 +799,15 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) */ preempt_disable(); + /* + * The VMID allocator only tracks active VMIDs per + * physical CPU, and therefore the VMID allocated may not be + * preserved on VMID roll-over if the task was preempted, + * making a thread's VMID inactive. So we need to call + * kvm_arm_vmid_update() in non-premptible context. + */ + kvm_arm_vmid_update(&vcpu->arch.hw_mmu->vmid); + kvm_pmu_flush_hwstate(vcpu); local_irq_disable(); @@ -2182,6 +2103,12 @@ int kvm_arch_init(void *opaque) if (err) return err; + err = kvm_arm_vmid_alloc_init(); + if (err) { + kvm_err("Failed to initialize VMID allocator.\n"); + return err; + } + if (!in_hyp_mode) { err = init_hyp_mode(); if (err) @@ -2221,6 +2148,7 @@ out_hyp: if (!in_hyp_mode) teardown_hyp_mode(); out_err: + kvm_arm_vmid_alloc_free(); return err; } diff --git a/arch/arm64/kvm/debug.c b/arch/arm64/kvm/debug.c index db9361338b2a..4fd5c216c4bb 100644 --- a/arch/arm64/kvm/debug.c +++ b/arch/arm64/kvm/debug.c @@ -105,9 +105,11 @@ static void kvm_arm_setup_mdcr_el2(struct kvm_vcpu *vcpu) * - Userspace is using the hardware to debug the guest * (KVM_GUESTDBG_USE_HW is set). * - The guest is not using debug (KVM_ARM64_DEBUG_DIRTY is clear). + * - The guest has enabled the OS Lock (debug exceptions are blocked). */ if ((vcpu->guest_debug & KVM_GUESTDBG_USE_HW) || - !(vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY)) + !(vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY) || + kvm_vcpu_os_lock_enabled(vcpu)) vcpu->arch.mdcr_el2 |= MDCR_EL2_TDA; trace_kvm_arm_set_dreg32("MDCR_EL2", vcpu->arch.mdcr_el2); @@ -160,8 +162,8 @@ void kvm_arm_setup_debug(struct kvm_vcpu *vcpu) kvm_arm_setup_mdcr_el2(vcpu); - /* Is Guest debugging in effect? */ - if (vcpu->guest_debug) { + /* Check if we need to use the debug registers. */ + if (vcpu->guest_debug || kvm_vcpu_os_lock_enabled(vcpu)) { /* Save guest debug state */ save_guest_debug_regs(vcpu); @@ -223,6 +225,19 @@ void kvm_arm_setup_debug(struct kvm_vcpu *vcpu) trace_kvm_arm_set_regset("WAPTS", get_num_wrps(), &vcpu->arch.debug_ptr->dbg_wcr[0], &vcpu->arch.debug_ptr->dbg_wvr[0]); + + /* + * The OS Lock blocks debug exceptions in all ELs when it is + * enabled. If the guest has enabled the OS Lock, constrain its + * effects to the guest. Emulate the behavior by clearing + * MDSCR_EL1.MDE. In so doing, we ensure that host debug + * exceptions are unaffected by guest configuration of the OS + * Lock. + */ + } else if (kvm_vcpu_os_lock_enabled(vcpu)) { + mdscr = vcpu_read_sys_reg(vcpu, MDSCR_EL1); + mdscr &= ~DBG_MDSCR_MDE; + vcpu_write_sys_reg(vcpu, mdscr, MDSCR_EL1); } } @@ -244,7 +259,10 @@ void kvm_arm_clear_debug(struct kvm_vcpu *vcpu) { trace_kvm_arm_clear_debug(vcpu->guest_debug); - if (vcpu->guest_debug) { + /* + * Restore the guest's debug registers if we were using them. + */ + if (vcpu->guest_debug || kvm_vcpu_os_lock_enabled(vcpu)) { restore_guest_debug_regs(vcpu); /* diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c index 2f48fd362a8c..397fdac75cb1 100644 --- a/arch/arm64/kvm/fpsimd.c +++ b/arch/arm64/kvm/fpsimd.c @@ -84,6 +84,11 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu) vcpu->arch.flags |= KVM_ARM64_HOST_SVE_ENABLED; } +/* + * Called just before entering the guest once we are no longer + * preemptable. Syncs the host's TIF_FOREIGN_FPSTATE with the KVM + * mirror of the flag used by the hypervisor. + */ void kvm_arch_vcpu_ctxflush_fp(struct kvm_vcpu *vcpu) { if (test_thread_flag(TIF_FOREIGN_FPSTATE)) @@ -93,10 +98,11 @@ void kvm_arch_vcpu_ctxflush_fp(struct kvm_vcpu *vcpu) } /* - * If the guest FPSIMD state was loaded, update the host's context - * tracking data mark the CPU FPSIMD regs as dirty and belonging to vcpu - * so that they will be written back if the kernel clobbers them due to - * kernel-mode NEON before re-entry into the guest. + * Called just after exiting the guest. If the guest FPSIMD state + * was loaded, update the host's context tracking data mark the CPU + * FPSIMD regs as dirty and belonging to vcpu so that they will be + * written back if the kernel clobbers them due to kernel-mode NEON + * before re-entry into the guest. */ void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu) { diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index 701cfb964905..667654bd3734 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -173,6 +173,8 @@ static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code) return false; /* Valid trap. Switch the context: */ + + /* First disable enough traps to allow us to update the registers */ if (has_vhe()) { reg = CPACR_EL1_FPEN; if (sve_guest) @@ -188,11 +190,13 @@ static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code) } isb(); + /* Write out the host state if it's in the registers */ if (vcpu->arch.flags & KVM_ARM64_FP_HOST) { __fpsimd_save_state(vcpu->arch.host_fpsimd_state); vcpu->arch.flags &= ~KVM_ARM64_FP_HOST; } + /* Restore the guest state */ if (sve_guest) __hyp_sve_restore_guest(vcpu); else diff --git a/arch/arm64/kvm/hyp/nvhe/Makefile b/arch/arm64/kvm/hyp/nvhe/Makefile index 24b2c2425b38..f9fe4dc21b1f 100644 --- a/arch/arm64/kvm/hyp/nvhe/Makefile +++ b/arch/arm64/kvm/hyp/nvhe/Makefile @@ -13,10 +13,11 @@ lib-objs := clear_page.o copy_page.o memcpy.o memset.o lib-objs := $(addprefix ../../../lib/, $(lib-objs)) obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o host.o \ - hyp-main.o hyp-smp.o psci-relay.o early_alloc.o stub.o page_alloc.o \ + hyp-main.o hyp-smp.o psci-relay.o early_alloc.o page_alloc.o \ cache.o setup.o mm.o mem_protect.o sys_regs.o pkvm.o obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \ ../fpsimd.o ../hyp-entry.o ../exception.o ../pgtable.o +obj-$(CONFIG_DEBUG_LIST) += list_debug.o obj-y += $(lib-objs) ## diff --git a/arch/arm64/kvm/hyp/nvhe/list_debug.c b/arch/arm64/kvm/hyp/nvhe/list_debug.c new file mode 100644 index 000000000000..d68abd7ea124 --- /dev/null +++ b/arch/arm64/kvm/hyp/nvhe/list_debug.c @@ -0,0 +1,54 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2022 - Google LLC + * Author: Keir Fraser <keirf@google.com> + */ + +#include <linux/list.h> +#include <linux/bug.h> + +static inline __must_check bool nvhe_check_data_corruption(bool v) +{ + return v; +} + +#define NVHE_CHECK_DATA_CORRUPTION(condition) \ + nvhe_check_data_corruption(({ \ + bool corruption = unlikely(condition); \ + if (corruption) { \ + if (IS_ENABLED(CONFIG_BUG_ON_DATA_CORRUPTION)) { \ + BUG_ON(1); \ + } else \ + WARN_ON(1); \ + } \ + corruption; \ + })) + +/* The predicates checked here are taken from lib/list_debug.c. */ + +bool __list_add_valid(struct list_head *new, struct list_head *prev, + struct list_head *next) +{ + if (NVHE_CHECK_DATA_CORRUPTION(next->prev != prev) || + NVHE_CHECK_DATA_CORRUPTION(prev->next != next) || + NVHE_CHECK_DATA_CORRUPTION(new == prev || new == next)) + return false; + + return true; +} + +bool __list_del_entry_valid(struct list_head *entry) +{ + struct list_head *prev, *next; + + prev = entry->prev; + next = entry->next; + + if (NVHE_CHECK_DATA_CORRUPTION(next == LIST_POISON1) || + NVHE_CHECK_DATA_CORRUPTION(prev == LIST_POISON2) || + NVHE_CHECK_DATA_CORRUPTION(prev->next != entry) || + NVHE_CHECK_DATA_CORRUPTION(next->prev != entry)) + return false; + + return true; +} diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index 674f10564373..78edf077fa3b 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -138,8 +138,7 @@ int kvm_host_prepare_stage2(void *pgt_pool_base) mmu->pgd_phys = __hyp_pa(host_kvm.pgt.pgd); mmu->pgt = &host_kvm.pgt; - WRITE_ONCE(mmu->vmid.vmid_gen, 0); - WRITE_ONCE(mmu->vmid.vmid, 0); + atomic64_set(&mmu->vmid.id, 0); return 0; } diff --git a/arch/arm64/kvm/hyp/nvhe/stub.c b/arch/arm64/kvm/hyp/nvhe/stub.c deleted file mode 100644 index c0aa6bbfd79d..000000000000 --- a/arch/arm64/kvm/hyp/nvhe/stub.c +++ /dev/null @@ -1,22 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Stubs for out-of-line function calls caused by re-using kernel - * infrastructure at EL2. - * - * Copyright (C) 2020 - Google LLC - */ - -#include <linux/list.h> - -#ifdef CONFIG_DEBUG_LIST -bool __list_add_valid(struct list_head *new, struct list_head *prev, - struct list_head *next) -{ - return true; -} - -bool __list_del_entry_valid(struct list_head *entry) -{ - return true; -} -#endif diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index bc2aba953299..1623abc56af2 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -58,7 +58,7 @@ static int stage2_apply_range(struct kvm *kvm, phys_addr_t addr, break; if (resched && next != end) - cond_resched_lock(&kvm->mmu_lock); + cond_resched_rwlock_write(&kvm->mmu_lock); } while (addr = next, addr != end); return ret; @@ -179,7 +179,7 @@ static void __unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 struct kvm *kvm = kvm_s2_mmu_to_kvm(mmu); phys_addr_t end = start + size; - assert_spin_locked(&kvm->mmu_lock); + lockdep_assert_held_write(&kvm->mmu_lock); WARN_ON(size & ~PAGE_MASK); WARN_ON(stage2_apply_range(kvm, start, end, kvm_pgtable_stage2_unmap, may_block)); @@ -213,13 +213,13 @@ static void stage2_flush_vm(struct kvm *kvm) int idx, bkt; idx = srcu_read_lock(&kvm->srcu); - spin_lock(&kvm->mmu_lock); + write_lock(&kvm->mmu_lock); slots = kvm_memslots(kvm); kvm_for_each_memslot(memslot, bkt, slots) stage2_flush_memslot(kvm, memslot); - spin_unlock(&kvm->mmu_lock); + write_unlock(&kvm->mmu_lock); srcu_read_unlock(&kvm->srcu, idx); } @@ -653,7 +653,6 @@ int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu) mmu->pgt = pgt; mmu->pgd_phys = __pa(pgt->pgd); - WRITE_ONCE(mmu->vmid.vmid_gen, 0); return 0; out_destroy_pgtable: @@ -720,13 +719,13 @@ void stage2_unmap_vm(struct kvm *kvm) idx = srcu_read_lock(&kvm->srcu); mmap_read_lock(current->mm); - spin_lock(&kvm->mmu_lock); + write_lock(&kvm->mmu_lock); slots = kvm_memslots(kvm); kvm_for_each_memslot(memslot, bkt, slots) stage2_unmap_memslot(kvm, memslot); - spin_unlock(&kvm->mmu_lock); + write_unlock(&kvm->mmu_lock); mmap_read_unlock(current->mm); srcu_read_unlock(&kvm->srcu, idx); } @@ -736,14 +735,14 @@ void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu) struct kvm *kvm = kvm_s2_mmu_to_kvm(mmu); struct kvm_pgtable *pgt = NULL; - spin_lock(&kvm->mmu_lock); + write_lock(&kvm->mmu_lock); pgt = mmu->pgt; if (pgt) { mmu->pgd_phys = 0; mmu->pgt = NULL; free_percpu(mmu->last_vcpu_ran); } - spin_unlock(&kvm->mmu_lock); + write_unlock(&kvm->mmu_lock); if (pgt) { kvm_pgtable_stage2_destroy(pgt); @@ -783,10 +782,10 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, if (ret) break; - spin_lock(&kvm->mmu_lock); + write_lock(&kvm->mmu_lock); ret = kvm_pgtable_stage2_map(pgt, addr, PAGE_SIZE, pa, prot, &cache); - spin_unlock(&kvm->mmu_lock); + write_unlock(&kvm->mmu_lock); if (ret) break; @@ -834,9 +833,9 @@ static void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot) start = memslot->base_gfn << PAGE_SHIFT; end = (memslot->base_gfn + memslot->npages) << PAGE_SHIFT; - spin_lock(&kvm->mmu_lock); + write_lock(&kvm->mmu_lock); stage2_wp_range(&kvm->arch.mmu, start, end); - spin_unlock(&kvm->mmu_lock); + write_unlock(&kvm->mmu_lock); kvm_flush_remote_tlbs(kvm); } @@ -1080,6 +1079,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, gfn_t gfn; kvm_pfn_t pfn; bool logging_active = memslot_is_logging(memslot); + bool logging_perm_fault = false; unsigned long fault_level = kvm_vcpu_trap_get_fault_level(vcpu); unsigned long vma_pagesize, fault_granule; enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R; @@ -1114,6 +1114,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, if (logging_active) { force_pte = true; vma_shift = PAGE_SHIFT; + logging_perm_fault = (fault_status == FSC_PERM && write_fault); } else { vma_shift = get_vma_page_shift(vma, hva); } @@ -1212,7 +1213,15 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, if (exec_fault && device) return -ENOEXEC; - spin_lock(&kvm->mmu_lock); + /* + * To reduce MMU contentions and enhance concurrency during dirty + * logging dirty logging, only acquire read lock for permission + * relaxation. + */ + if (logging_perm_fault) + read_lock(&kvm->mmu_lock); + else + write_lock(&kvm->mmu_lock); pgt = vcpu->arch.hw_mmu->pgt; if (mmu_notifier_retry(kvm, mmu_seq)) goto out_unlock; @@ -1271,7 +1280,10 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, } out_unlock: - spin_unlock(&kvm->mmu_lock); + if (logging_perm_fault) + read_unlock(&kvm->mmu_lock); + else + write_unlock(&kvm->mmu_lock); kvm_set_pfn_accessed(pfn); kvm_release_pfn_clean(pfn); return ret != -EAGAIN ? ret : 0; @@ -1286,10 +1298,10 @@ static void handle_access_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa) trace_kvm_access_fault(fault_ipa); - spin_lock(&vcpu->kvm->mmu_lock); + write_lock(&vcpu->kvm->mmu_lock); mmu = vcpu->arch.hw_mmu; kpte = kvm_pgtable_stage2_mkyoung(mmu->pgt, fault_ipa); - spin_unlock(&vcpu->kvm->mmu_lock); + write_unlock(&vcpu->kvm->mmu_lock); pte = __pte(kpte); if (pte_valid(pte)) @@ -1692,9 +1704,9 @@ void kvm_arch_flush_shadow_memslot(struct kvm *kvm, gpa_t gpa = slot->base_gfn << PAGE_SHIFT; phys_addr_t size = slot->npages << PAGE_SHIFT; - spin_lock(&kvm->mmu_lock); + write_lock(&kvm->mmu_lock); unmap_stage2_range(&kvm->arch.mmu, gpa, size); - spin_unlock(&kvm->mmu_lock); + write_unlock(&kvm->mmu_lock); } /* diff --git a/arch/arm64/kvm/psci.c b/arch/arm64/kvm/psci.c index 3eae32876897..a0c10c11f40e 100644 --- a/arch/arm64/kvm/psci.c +++ b/arch/arm64/kvm/psci.c @@ -85,7 +85,7 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) if (!vcpu) return PSCI_RET_INVALID_PARAMS; if (!vcpu->arch.power_off) { - if (kvm_psci_version(source_vcpu, kvm) != KVM_ARM_PSCI_0_1) + if (kvm_psci_version(source_vcpu) != KVM_ARM_PSCI_0_1) return PSCI_RET_ALREADY_ON; else return PSCI_RET_INVALID_PARAMS; @@ -392,7 +392,7 @@ static int kvm_psci_0_1_call(struct kvm_vcpu *vcpu) */ int kvm_psci_call(struct kvm_vcpu *vcpu) { - switch (kvm_psci_version(vcpu, vcpu->kvm)) { + switch (kvm_psci_version(vcpu)) { case KVM_ARM_PSCI_1_0: return kvm_psci_1_0_call(vcpu); case KVM_ARM_PSCI_0_2: @@ -471,7 +471,7 @@ int kvm_arm_get_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) switch (reg->id) { case KVM_REG_ARM_PSCI_VERSION: - val = kvm_psci_version(vcpu, vcpu->kvm); + val = kvm_psci_version(vcpu); break; case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1: case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2: diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 4dc2fba316ff..dd34b5ab51d4 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -44,6 +44,10 @@ * 64bit interface. */ +static int reg_from_user(u64 *val, const void __user *uaddr, u64 id); +static int reg_to_user(void __user *uaddr, const u64 *val, u64 id); +static u64 sys_reg_to_index(const struct sys_reg_desc *reg); + static bool read_from_write_only(struct kvm_vcpu *vcpu, struct sys_reg_params *params, const struct sys_reg_desc *r) @@ -287,16 +291,55 @@ static bool trap_loregion(struct kvm_vcpu *vcpu, return trap_raz_wi(vcpu, p, r); } +static bool trap_oslar_el1(struct kvm_vcpu *vcpu, + struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + u64 oslsr; + + if (!p->is_write) + return read_from_write_only(vcpu, p, r); + + /* Forward the OSLK bit to OSLSR */ + oslsr = __vcpu_sys_reg(vcpu, OSLSR_EL1) & ~SYS_OSLSR_OSLK; + if (p->regval & SYS_OSLAR_OSLK) + oslsr |= SYS_OSLSR_OSLK; + + __vcpu_sys_reg(vcpu, OSLSR_EL1) = oslsr; + return true; +} + static bool trap_oslsr_el1(struct kvm_vcpu *vcpu, struct sys_reg_params *p, const struct sys_reg_desc *r) { - if (p->is_write) { - return ignore_write(vcpu, p); - } else { - p->regval = (1 << 3); - return true; - } + if (p->is_write) + return write_to_read_only(vcpu, p, r); + + p->regval = __vcpu_sys_reg(vcpu, r->reg); + return true; +} + +static int set_oslsr_el1(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, + const struct kvm_one_reg *reg, void __user *uaddr) +{ + u64 id = sys_reg_to_index(rd); + u64 val; + int err; + + err = reg_from_user(&val, uaddr, id); + if (err) + return err; + + /* + * The only modifiable bit is the OSLK bit. Refuse the write if + * userspace attempts to change any other bit in the register. + */ + if ((val ^ rd->val) & ~SYS_OSLSR_OSLK) + return -EINVAL; + + __vcpu_sys_reg(vcpu, rd->reg) = val; + return 0; } static bool trap_dbgauthstatus_el1(struct kvm_vcpu *vcpu, @@ -1164,10 +1207,6 @@ static bool access_raz_id_reg(struct kvm_vcpu *vcpu, return __access_id_reg(vcpu, p, r, true); } -static int reg_from_user(u64 *val, const void __user *uaddr, u64 id); -static int reg_to_user(void __user *uaddr, const u64 *val, u64 id); -static u64 sys_reg_to_index(const struct sys_reg_desc *reg); - /* Visibility overrides for SVE-specific control registers */ static unsigned int sve_visibility(const struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd) @@ -1418,9 +1457,9 @@ static unsigned int mte_visibility(const struct kvm_vcpu *vcpu, * Debug handling: We do trap most, if not all debug related system * registers. The implementation is good enough to ensure that a guest * can use these with minimal performance degradation. The drawback is - * that we don't implement any of the external debug, none of the - * OSlock protocol. This should be revisited if we ever encounter a - * more demanding guest... + * that we don't implement any of the external debug architecture. + * This should be revisited if we ever encounter a more demanding + * guest... */ static const struct sys_reg_desc sys_reg_descs[] = { { SYS_DESC(SYS_DC_ISW), access_dcsw }, @@ -1447,8 +1486,9 @@ static const struct sys_reg_desc sys_reg_descs[] = { DBG_BCR_BVR_WCR_WVR_EL1(15), { SYS_DESC(SYS_MDRAR_EL1), trap_raz_wi }, - { SYS_DESC(SYS_OSLAR_EL1), trap_raz_wi }, - { SYS_DESC(SYS_OSLSR_EL1), trap_oslsr_el1 }, + { SYS_DESC(SYS_OSLAR_EL1), trap_oslar_el1 }, + { SYS_DESC(SYS_OSLSR_EL1), trap_oslsr_el1, reset_val, OSLSR_EL1, + SYS_OSLSR_OSLM_IMPLEMENTED, .set_user = set_oslsr_el1, }, { SYS_DESC(SYS_OSDLR_EL1), trap_raz_wi }, { SYS_DESC(SYS_DBGPRCR_EL1), trap_raz_wi }, { SYS_DESC(SYS_DBGCLAIMSET_EL1), trap_raz_wi }, @@ -1920,10 +1960,10 @@ static const struct sys_reg_desc cp14_regs[] = { DBGBXVR(0), /* DBGOSLAR */ - { Op1( 0), CRn( 1), CRm( 0), Op2( 4), trap_raz_wi }, + { Op1( 0), CRn( 1), CRm( 0), Op2( 4), trap_oslar_el1 }, DBGBXVR(1), /* DBGOSLSR */ - { Op1( 0), CRn( 1), CRm( 1), Op2( 4), trap_oslsr_el1 }, + { Op1( 0), CRn( 1), CRm( 1), Op2( 4), trap_oslsr_el1, NULL, OSLSR_EL1 }, DBGBXVR(2), DBGBXVR(3), /* DBGOSDLR */ diff --git a/arch/arm64/kvm/vmid.c b/arch/arm64/kvm/vmid.c new file mode 100644 index 000000000000..8d5f0506fd87 --- /dev/null +++ b/arch/arm64/kvm/vmid.c @@ -0,0 +1,196 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * VMID allocator. + * + * Based on Arm64 ASID allocator algorithm. + * Please refer arch/arm64/mm/context.c for detailed + * comments on algorithm. + * + * Copyright (C) 2002-2003 Deep Blue Solutions Ltd, all rights reserved. + * Copyright (C) 2012 ARM Ltd. + */ + +#include <linux/bitfield.h> +#include <linux/bitops.h> + +#include <asm/kvm_asm.h> +#include <asm/kvm_mmu.h> + +unsigned int kvm_arm_vmid_bits; +static DEFINE_RAW_SPINLOCK(cpu_vmid_lock); + +static atomic64_t vmid_generation; +static unsigned long *vmid_map; + +static DEFINE_PER_CPU(atomic64_t, active_vmids); +static DEFINE_PER_CPU(u64, reserved_vmids); + +#define VMID_MASK (~GENMASK(kvm_arm_vmid_bits - 1, 0)) +#define VMID_FIRST_VERSION (1UL << kvm_arm_vmid_bits) + +#define NUM_USER_VMIDS VMID_FIRST_VERSION +#define vmid2idx(vmid) ((vmid) & ~VMID_MASK) +#define idx2vmid(idx) vmid2idx(idx) + +/* + * As vmid #0 is always reserved, we will never allocate one + * as below and can be treated as invalid. This is used to + * set the active_vmids on vCPU schedule out. + */ +#define VMID_ACTIVE_INVALID VMID_FIRST_VERSION + +#define vmid_gen_match(vmid) \ + (!(((vmid) ^ atomic64_read(&vmid_generation)) >> kvm_arm_vmid_bits)) + +static void flush_context(void) +{ + int cpu; + u64 vmid; + + bitmap_clear(vmid_map, 0, NUM_USER_VMIDS); + + for_each_possible_cpu(cpu) { + vmid = atomic64_xchg_relaxed(&per_cpu(active_vmids, cpu), 0); + + /* Preserve reserved VMID */ + if (vmid == 0) + vmid = per_cpu(reserved_vmids, cpu); + __set_bit(vmid2idx(vmid), vmid_map); + per_cpu(reserved_vmids, cpu) = vmid; + } + + /* + * Unlike ASID allocator, we expect less frequent rollover in + * case of VMIDs. Hence, instead of marking the CPU as + * flush_pending and issuing a local context invalidation on + * the next context-switch, we broadcast TLB flush + I-cache + * invalidation over the inner shareable domain on rollover. + */ + kvm_call_hyp(__kvm_flush_vm_context); +} + +static bool check_update_reserved_vmid(u64 vmid, u64 newvmid) +{ + int cpu; + bool hit = false; + + /* + * Iterate over the set of reserved VMIDs looking for a match + * and update to use newvmid (i.e. the same VMID in the current + * generation). + */ + for_each_possible_cpu(cpu) { + if (per_cpu(reserved_vmids, cpu) == vmid) { + hit = true; + per_cpu(reserved_vmids, cpu) = newvmid; + } + } + + return hit; +} + +static u64 new_vmid(struct kvm_vmid *kvm_vmid) +{ + static u32 cur_idx = 1; + u64 vmid = atomic64_read(&kvm_vmid->id); + u64 generation = atomic64_read(&vmid_generation); + + if (vmid != 0) { + u64 newvmid = generation | (vmid & ~VMID_MASK); + + if (check_update_reserved_vmid(vmid, newvmid)) { + atomic64_set(&kvm_vmid->id, newvmid); + return newvmid; + } + + if (!__test_and_set_bit(vmid2idx(vmid), vmid_map)) { + atomic64_set(&kvm_vmid->id, newvmid); + return newvmid; + } + } + + vmid = find_next_zero_bit(vmid_map, NUM_USER_VMIDS, cur_idx); + if (vmid != NUM_USER_VMIDS) + goto set_vmid; + + /* We're out of VMIDs, so increment the global generation count */ + generation = atomic64_add_return_relaxed(VMID_FIRST_VERSION, + &vmid_generation); + flush_context(); + + /* We have more VMIDs than CPUs, so this will always succeed */ + vmid = find_next_zero_bit(vmid_map, NUM_USER_VMIDS, 1); + +set_vmid: + __set_bit(vmid, vmid_map); + cur_idx = vmid; + vmid = idx2vmid(vmid) | generation; + atomic64_set(&kvm_vmid->id, vmid); + return vmid; +} + +/* Called from vCPU sched out with preemption disabled */ +void kvm_arm_vmid_clear_active(void) +{ + atomic64_set(this_cpu_ptr(&active_vmids), VMID_ACTIVE_INVALID); +} + +void kvm_arm_vmid_update(struct kvm_vmid *kvm_vmid) +{ + unsigned long flags; + u64 vmid, old_active_vmid; + + vmid = atomic64_read(&kvm_vmid->id); + + /* + * Please refer comments in check_and_switch_context() in + * arch/arm64/mm/context.c. + * + * Unlike ASID allocator, we set the active_vmids to + * VMID_ACTIVE_INVALID on vCPU schedule out to avoid + * reserving the VMID space needlessly on rollover. + * Hence explicitly check here for a "!= 0" to + * handle the sync with a concurrent rollover. + */ + old_active_vmid = atomic64_read(this_cpu_ptr(&active_vmids)); + if (old_active_vmid != 0 && vmid_gen_match(vmid) && + 0 != atomic64_cmpxchg_relaxed(this_cpu_ptr(&active_vmids), + old_active_vmid, vmid)) + return; + + raw_spin_lock_irqsave(&cpu_vmid_lock, flags); + + /* Check that our VMID belongs to the current generation. */ + vmid = atomic64_read(&kvm_vmid->id); + if (!vmid_gen_match(vmid)) + vmid = new_vmid(kvm_vmid); + + atomic64_set(this_cpu_ptr(&active_vmids), vmid); + raw_spin_unlock_irqrestore(&cpu_vmid_lock, flags); +} + +/* + * Initialize the VMID allocator + */ +int kvm_arm_vmid_alloc_init(void) +{ + kvm_arm_vmid_bits = kvm_get_vmid_bits(); + + /* + * Expect allocation after rollover to fail if we don't have + * at least one more VMID than CPUs. VMID #0 is always reserved. + */ + WARN_ON(NUM_USER_VMIDS - 1 <= num_possible_cpus()); + atomic64_set(&vmid_generation, VMID_FIRST_VERSION); + vmid_map = kcalloc(BITS_TO_LONGS(NUM_USER_VMIDS), + sizeof(*vmid_map), GFP_KERNEL); + if (!vmid_map) + return -ENOMEM; + + return 0; +} + +void kvm_arm_vmid_alloc_free(void) +{ + kfree(vmid_map); +} diff --git a/include/kvm/arm_psci.h b/include/kvm/arm_psci.h index 5b58bd2fe088..297645edcaff 100644 --- a/include/kvm/arm_psci.h +++ b/include/kvm/arm_psci.h @@ -16,11 +16,7 @@ #define KVM_ARM_PSCI_LATEST KVM_ARM_PSCI_1_0 -/* - * We need the KVM pointer independently from the vcpu as we can call - * this from HYP, and need to apply kern_hyp_va on it... - */ -static inline int kvm_psci_version(struct kvm_vcpu *vcpu, struct kvm *kvm) +static inline int kvm_psci_version(struct kvm_vcpu *vcpu) { /* * Our PSCI implementation stays the same across versions from diff --git a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c index ea189d83abf7..63b2178210c4 100644 --- a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c +++ b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c @@ -23,7 +23,7 @@ #define SPSR_D (1 << 9) #define SPSR_SS (1 << 21) -extern unsigned char sw_bp, hw_bp, bp_svc, bp_brk, hw_wp, ss_start; +extern unsigned char sw_bp, sw_bp2, hw_bp, hw_bp2, bp_svc, bp_brk, hw_wp, ss_start; static volatile uint64_t sw_bp_addr, hw_bp_addr; static volatile uint64_t wp_addr, wp_data_addr; static volatile uint64_t svc_addr; @@ -47,6 +47,14 @@ static void reset_debug_state(void) isb(); } +static void enable_os_lock(void) +{ + write_sysreg(1, oslar_el1); + isb(); + + GUEST_ASSERT(read_sysreg(oslsr_el1) & 2); +} + static void install_wp(uint64_t addr) { uint32_t wcr; @@ -99,6 +107,7 @@ static void guest_code(void) GUEST_SYNC(0); /* Software-breakpoint */ + reset_debug_state(); asm volatile("sw_bp: brk #0"); GUEST_ASSERT_EQ(sw_bp_addr, PC(sw_bp)); @@ -152,6 +161,51 @@ static void guest_code(void) GUEST_ASSERT_EQ(ss_addr[1], PC(ss_start) + 4); GUEST_ASSERT_EQ(ss_addr[2], PC(ss_start) + 8); + GUEST_SYNC(6); + + /* OS Lock does not block software-breakpoint */ + reset_debug_state(); + enable_os_lock(); + sw_bp_addr = 0; + asm volatile("sw_bp2: brk #0"); + GUEST_ASSERT_EQ(sw_bp_addr, PC(sw_bp2)); + + GUEST_SYNC(7); + + /* OS Lock blocking hardware-breakpoint */ + reset_debug_state(); + enable_os_lock(); + install_hw_bp(PC(hw_bp2)); + hw_bp_addr = 0; + asm volatile("hw_bp2: nop"); + GUEST_ASSERT_EQ(hw_bp_addr, 0); + + GUEST_SYNC(8); + + /* OS Lock blocking watchpoint */ + reset_debug_state(); + enable_os_lock(); + write_data = '\0'; + wp_data_addr = 0; + install_wp(PC(write_data)); + write_data = 'x'; + GUEST_ASSERT_EQ(write_data, 'x'); + GUEST_ASSERT_EQ(wp_data_addr, 0); + + GUEST_SYNC(9); + + /* OS Lock blocking single-step */ + reset_debug_state(); + enable_os_lock(); + ss_addr[0] = 0; + install_ss(); + ss_idx = 0; + asm volatile("mrs x0, esr_el1\n\t" + "add x0, x0, #1\n\t" + "msr daifset, #8\n\t" + : : : "x0"); + GUEST_ASSERT_EQ(ss_addr[0], 0); + GUEST_DONE(); } @@ -223,7 +277,7 @@ int main(int argc, char *argv[]) vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, ESR_EC_SVC64, guest_svc_handler); - for (stage = 0; stage < 7; stage++) { + for (stage = 0; stage < 11; stage++) { vcpu_run(vm, VCPU_ID); switch (get_ucall(vm, VCPU_ID, &uc)) { diff --git a/tools/testing/selftests/kvm/aarch64/get-reg-list.c b/tools/testing/selftests/kvm/aarch64/get-reg-list.c index f769fc6cd927..f12147c43464 100644 --- a/tools/testing/selftests/kvm/aarch64/get-reg-list.c +++ b/tools/testing/selftests/kvm/aarch64/get-reg-list.c @@ -760,6 +760,7 @@ static __u64 base_regs[] = { ARM64_SYS_REG(2, 0, 0, 15, 5), ARM64_SYS_REG(2, 0, 0, 15, 6), ARM64_SYS_REG(2, 0, 0, 15, 7), + ARM64_SYS_REG(2, 0, 1, 1, 4), /* OSLSR_EL1 */ ARM64_SYS_REG(2, 4, 0, 7, 0), /* DBGVCR32_EL2 */ ARM64_SYS_REG(3, 0, 0, 0, 5), /* MPIDR_EL1 */ ARM64_SYS_REG(3, 0, 0, 1, 0), /* ID_PFR0_EL1 */ diff --git a/tools/testing/selftests/kvm/aarch64/vgic_irq.c b/tools/testing/selftests/kvm/aarch64/vgic_irq.c index e6c7d7f8fbd1..f0230711fbe9 100644 --- a/tools/testing/selftests/kvm/aarch64/vgic_irq.c +++ b/tools/testing/selftests/kvm/aarch64/vgic_irq.c @@ -306,7 +306,8 @@ static void guest_restore_active(struct test_args *args, uint32_t prio, intid, ap1r; int i; - /* Set the priorities of the first (KVM_NUM_PRIOS - 1) IRQs + /* + * Set the priorities of the first (KVM_NUM_PRIOS - 1) IRQs * in descending order, so intid+1 can preempt intid. */ for (i = 0, prio = (num - 1) * 8; i < num; i++, prio -= 8) { @@ -315,7 +316,8 @@ static void guest_restore_active(struct test_args *args, gic_set_priority(intid, prio); } - /* In a real migration, KVM would restore all GIC state before running + /* + * In a real migration, KVM would restore all GIC state before running * guest code. */ for (i = 0; i < num; i++) { @@ -472,10 +474,10 @@ static void test_restore_active(struct test_args *args, struct kvm_inject_desc * guest_restore_active(args, MIN_SPI, 4, f->cmd); } -static void guest_code(struct test_args args) +static void guest_code(struct test_args *args) { - uint32_t i, nr_irqs = args.nr_irqs; - bool level_sensitive = args.level_sensitive; + uint32_t i, nr_irqs = args->nr_irqs; + bool level_sensitive = args->level_sensitive; struct kvm_inject_desc *f, *inject_fns; gic_init(GIC_V3, 1, dist, redist); @@ -484,11 +486,11 @@ static void guest_code(struct test_args args) gic_irq_enable(i); for (i = MIN_SPI; i < nr_irqs; i++) - gic_irq_set_config(i, !args.level_sensitive); + gic_irq_set_config(i, !level_sensitive); - gic_set_eoi_split(args.eoi_split); + gic_set_eoi_split(args->eoi_split); - reset_priorities(&args); + reset_priorities(args); gic_set_priority_mask(CPU_PRIO_MASK); inject_fns = level_sensitive ? inject_level_fns @@ -497,17 +499,18 @@ static void guest_code(struct test_args args) local_irq_enable(); /* Start the tests. */ - for_each_supported_inject_fn(&args, inject_fns, f) { - test_injection(&args, f); - test_preemption(&args, f); - test_injection_failure(&args, f); + for_each_supported_inject_fn(args, inject_fns, f) { + test_injection(args, f); + test_preemption(args, f); + test_injection_failure(args, f); } - /* Restore the active state of IRQs. This would happen when live + /* + * Restore the active state of IRQs. This would happen when live * migrating IRQs in the middle of being handled. */ - for_each_supported_activate_fn(&args, set_active_fns, f) - test_restore_active(&args, f); + for_each_supported_activate_fn(args, set_active_fns, f) + test_restore_active(args, f); GUEST_DONE(); } @@ -573,8 +576,8 @@ static void kvm_set_gsi_routing_irqchip_check(struct kvm_vm *vm, kvm_gsi_routing_write(vm, routing); } else { ret = _kvm_gsi_routing_write(vm, routing); - /* The kernel only checks for KVM_IRQCHIP_NUM_PINS. */ - if (intid >= KVM_IRQCHIP_NUM_PINS) + /* The kernel only checks e->irqchip.pin >= KVM_IRQCHIP_NUM_PINS */ + if (((uint64_t)intid + num - 1 - MIN_SPI) >= KVM_IRQCHIP_NUM_PINS) TEST_ASSERT(ret != 0 && errno == EINVAL, "Bad intid %u did not cause KVM_SET_GSI_ROUTING " "error: rc: %i errno: %i", intid, ret, errno); @@ -739,6 +742,7 @@ static void test_vgic(uint32_t nr_irqs, bool level_sensitive, bool eoi_split) int gic_fd; struct kvm_vm *vm; struct kvm_inject_args inject_args; + vm_vaddr_t args_gva; struct test_args args = { .nr_irqs = nr_irqs, @@ -757,7 +761,9 @@ static void test_vgic(uint32_t nr_irqs, bool level_sensitive, bool eoi_split) vcpu_init_descriptor_tables(vm, VCPU_ID); /* Setup the guest args page (so it gets the args). */ - vcpu_args_set(vm, 0, 1, args); + args_gva = vm_vaddr_alloc_page(vm); + memcpy(addr_gva2hva(vm, args_gva), &args, sizeof(args)); + vcpu_args_set(vm, 0, 1, args_gva); gic_fd = vgic_v3_setup(vm, 1, nr_irqs, GICD_BASE_GPA, GICR_BASE_GPA); @@ -837,7 +843,8 @@ int main(int argc, char **argv) } } - /* If the user just specified nr_irqs and/or gic_version, then run all + /* + * If the user just specified nr_irqs and/or gic_version, then run all * combinations. */ if (default_args) { diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c index 1954b964d1cf..b501338d9430 100644 --- a/tools/testing/selftests/kvm/dirty_log_perf_test.c +++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c @@ -18,6 +18,12 @@ #include "test_util.h" #include "perf_test_util.h" #include "guest_modes.h" +#ifdef __aarch64__ +#include "aarch64/vgic.h" + +#define GICD_BASE_GPA 0x8000000ULL +#define GICR_BASE_GPA 0x80A0000ULL +#endif /* How many host loops to run by default (one KVM_GET_DIRTY_LOG for each loop)*/ #define TEST_HOST_LOOP_N 2UL @@ -200,6 +206,10 @@ static void run_test(enum vm_guest_mode mode, void *arg) vm_enable_cap(vm, &cap); } +#ifdef __aarch64__ + vgic_v3_setup(vm, nr_vcpus, 64, GICD_BASE_GPA, GICR_BASE_GPA); +#endif + /* Start the iterations */ iteration = 0; host_quit = false; diff --git a/tools/testing/selftests/kvm/lib/aarch64/gic_v3.c b/tools/testing/selftests/kvm/lib/aarch64/gic_v3.c index 00f613c0583c..263bf3ed8fd5 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/gic_v3.c +++ b/tools/testing/selftests/kvm/lib/aarch64/gic_v3.c @@ -19,7 +19,7 @@ struct gicv3_data { unsigned int nr_spis; }; -#define sgi_base_from_redist(redist_base) (redist_base + SZ_64K) +#define sgi_base_from_redist(redist_base) (redist_base + SZ_64K) #define DIST_BIT (1U << 31) enum gicv3_intid_range { @@ -105,7 +105,8 @@ static void gicv3_set_eoi_split(bool split) { uint32_t val; - /* All other fields are read-only, so no need to read CTLR first. In + /* + * All other fields are read-only, so no need to read CTLR first. In * fact, the kernel does the same. */ val = split ? (1U << 1) : 0; @@ -159,9 +160,10 @@ static void gicv3_access_reg(uint32_t intid, uint64_t offset, uint32_t cpu_or_dist; GUEST_ASSERT(bits_per_field <= reg_bits); - GUEST_ASSERT(*val < (1U << bits_per_field)); - /* Some registers like IROUTER are 64 bit long. Those are currently not - * supported by readl nor writel, so just asserting here until then. + GUEST_ASSERT(!write || *val < (1U << bits_per_field)); + /* + * This function does not support 64 bit accesses. Just asserting here + * until we implement readq/writeq. */ GUEST_ASSERT(reg_bits == 32); diff --git a/tools/testing/selftests/kvm/lib/aarch64/vgic.c b/tools/testing/selftests/kvm/lib/aarch64/vgic.c index b3a0fca0d780..f365c32a7296 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/vgic.c +++ b/tools/testing/selftests/kvm/lib/aarch64/vgic.c @@ -138,9 +138,6 @@ static void vgic_poke_irq(int gic_fd, uint32_t intid, uint64_t val; bool intid_is_private = INTID_IS_SGI(intid) || INTID_IS_PPI(intid); - /* Check that the addr part of the attr is within 32 bits. */ - assert(attr <= KVM_DEV_ARM_VGIC_OFFSET_MASK); - uint32_t group = intid_is_private ? KVM_DEV_ARM_VGIC_GRP_REDIST_REGS : KVM_DEV_ARM_VGIC_GRP_DIST_REGS; @@ -150,7 +147,11 @@ static void vgic_poke_irq(int gic_fd, uint32_t intid, attr += SZ_64K; } - /* All calls will succeed, even with invalid intid's, as long as the + /* Check that the addr part of the attr is within 32 bits. */ + assert((attr & ~KVM_DEV_ARM_VGIC_OFFSET_MASK) == 0); + + /* + * All calls will succeed, even with invalid intid's, as long as the * addr part of the attr is within 32 bits (checked above). An invalid * intid will just make the read/writes point to above the intended * register space (i.e., ICPENDR after ISPENDR). |