diff options
Diffstat (limited to 'arch/arm64/kvm/guest.c')
| -rw-r--r-- | arch/arm64/kvm/guest.c | 274 |
1 files changed, 126 insertions, 148 deletions
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 1dfb83578277..1c87699fd886 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -18,12 +18,13 @@ #include <linux/string.h> #include <linux/vmalloc.h> #include <linux/fs.h> -#include <kvm/arm_psci.h> +#include <kvm/arm_hypercalls.h> #include <asm/cputype.h> #include <linux/uaccess.h> #include <asm/fpsimd.h> #include <asm/kvm.h> #include <asm/kvm_emulate.h> +#include <asm/kvm_nested.h> #include <asm/sigcontext.h> #include "trace.h" @@ -31,8 +32,6 @@ const struct _kvm_stats_desc kvm_vm_stats_desc[] = { KVM_GENERIC_VM_STATS() }; -static_assert(ARRAY_SIZE(kvm_vm_stats_desc) == - sizeof(struct kvm_vm_stat) / sizeof(u64)); const struct kvm_stats_header kvm_vm_stats_header = { .name_size = KVM_STATS_NAME_SIZE, @@ -50,10 +49,9 @@ const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = { STATS_DESC_COUNTER(VCPU, wfi_exit_stat), STATS_DESC_COUNTER(VCPU, mmio_exit_user), STATS_DESC_COUNTER(VCPU, mmio_exit_kernel), + STATS_DESC_COUNTER(VCPU, signal_exits), STATS_DESC_COUNTER(VCPU, exits) }; -static_assert(ARRAY_SIZE(kvm_vcpu_stats_desc) == - sizeof(struct kvm_vcpu_stat) / sizeof(u64)); const struct kvm_stats_header kvm_vcpu_stats_header = { .name_size = KVM_STATS_NAME_SIZE, @@ -245,7 +243,7 @@ static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) u64 mode = (*(u64 *)valp) & PSR_AA32_MODE_MASK; switch (mode) { case PSR_AA32_MODE_USR: - if (!system_supports_32bit_el0()) + if (!kvm_supports_32bit_el0()) return -EINVAL; break; case PSR_AA32_MODE_FIQ: @@ -253,9 +251,15 @@ static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) case PSR_AA32_MODE_SVC: case PSR_AA32_MODE_ABT: case PSR_AA32_MODE_UND: + case PSR_AA32_MODE_SYS: if (!vcpu_el1_is_32bit(vcpu)) return -EINVAL; break; + case PSR_MODE_EL2h: + case PSR_MODE_EL2t: + if (!vcpu_has_nv(vcpu)) + return -EINVAL; + fallthrough; case PSR_MODE_EL0t: case PSR_MODE_EL1t: case PSR_MODE_EL1h: @@ -273,7 +277,7 @@ static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) if (*vcpu_cpsr(vcpu) & PSR_MODE32_BIT) { int i, nr_reg; - switch (*vcpu_cpsr(vcpu)) { + switch (*vcpu_cpsr(vcpu) & PSR_AA32_MODE_MASK) { /* * Either we are dealing with user mode, and only the * first 15 registers (+ PC) must be narrowed to 32bit. @@ -285,7 +289,7 @@ static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) break; /* - * Otherwide, this is a priviledged mode, and *all* the + * Otherwise, this is a privileged mode, and *all* the * registers must be narrowed to 32bit. */ default: @@ -587,59 +591,6 @@ static unsigned long num_core_regs(const struct kvm_vcpu *vcpu) return copy_core_reg_indices(vcpu, NULL); } -/** - * ARM64 versions of the TIMER registers, always available on arm64 - */ - -#define NUM_TIMER_REGS 3 - -static bool is_timer_reg(u64 index) -{ - switch (index) { - case KVM_REG_ARM_TIMER_CTL: - case KVM_REG_ARM_TIMER_CNT: - case KVM_REG_ARM_TIMER_CVAL: - return true; - } - return false; -} - -static int copy_timer_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) -{ - if (put_user(KVM_REG_ARM_TIMER_CTL, uindices)) - return -EFAULT; - uindices++; - if (put_user(KVM_REG_ARM_TIMER_CNT, uindices)) - return -EFAULT; - uindices++; - if (put_user(KVM_REG_ARM_TIMER_CVAL, uindices)) - return -EFAULT; - - return 0; -} - -static int set_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) -{ - void __user *uaddr = (void __user *)(long)reg->addr; - u64 val; - int ret; - - ret = copy_from_user(&val, uaddr, KVM_REG_SIZE(reg->id)); - if (ret != 0) - return -EFAULT; - - return kvm_arm_timer_set_reg(vcpu, reg->id, val); -} - -static int get_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) -{ - void __user *uaddr = (void __user *)(long)reg->addr; - u64 val; - - val = kvm_arm_timer_get_reg(vcpu, reg->id); - return copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id)) ? -EFAULT : 0; -} - static unsigned long num_sve_regs(const struct kvm_vcpu *vcpu) { const unsigned int slices = vcpu_sve_slices(vcpu); @@ -703,6 +654,7 @@ static int copy_sve_reg_indices(const struct kvm_vcpu *vcpu, /** * kvm_arm_num_regs - how many registers do we present via KVM_GET_ONE_REG + * @vcpu: the vCPU pointer * * This is for all registers. */ @@ -714,13 +666,14 @@ unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu) res += num_sve_regs(vcpu); res += kvm_arm_num_sys_reg_descs(vcpu); res += kvm_arm_get_fw_num_regs(vcpu); - res += NUM_TIMER_REGS; return res; } /** * kvm_arm_copy_reg_indices - get indices of all registers. + * @vcpu: the vCPU pointer + * @uindices: register list to copy * * We do core registers right here, then we append system regs. */ @@ -743,11 +696,6 @@ int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) return ret; uindices += kvm_arm_get_fw_num_regs(vcpu); - ret = copy_timer_indices(vcpu, uindices); - if (ret < 0) - return ret; - uindices += NUM_TIMER_REGS; - return kvm_arm_copy_sys_reg_indices(vcpu, uindices); } @@ -759,13 +707,12 @@ int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) switch (reg->id & KVM_REG_ARM_COPROC_MASK) { case KVM_REG_ARM_CORE: return get_core_reg(vcpu, reg); - case KVM_REG_ARM_FW: return kvm_arm_get_fw_reg(vcpu, reg); + case KVM_REG_ARM_FW: + case KVM_REG_ARM_FW_FEAT_BMAP: + return kvm_arm_get_fw_reg(vcpu, reg); case KVM_REG_ARM64_SVE: return get_sve_reg(vcpu, reg); } - if (is_timer_reg(reg->id)) - return get_timer_reg(vcpu, reg); - return kvm_arm_sys_reg_get_reg(vcpu, reg); } @@ -777,13 +724,12 @@ int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) switch (reg->id & KVM_REG_ARM_COPROC_MASK) { case KVM_REG_ARM_CORE: return set_core_reg(vcpu, reg); - case KVM_REG_ARM_FW: return kvm_arm_set_fw_reg(vcpu, reg); + case KVM_REG_ARM_FW: + case KVM_REG_ARM_FW_FEAT_BMAP: + return kvm_arm_set_fw_reg(vcpu, reg); case KVM_REG_ARM64_SVE: return set_sve_reg(vcpu, reg); } - if (is_timer_reg(reg->id)) - return set_timer_reg(vcpu, reg); - return kvm_arm_sys_reg_set_reg(vcpu, reg); } @@ -802,8 +748,9 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, int __kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu, struct kvm_vcpu_events *events) { - events->exception.serror_pending = !!(vcpu->arch.hcr_el2 & HCR_VSE); - events->exception.serror_has_esr = cpus_have_const_cap(ARM64_HAS_RAS_EXTN); + events->exception.serror_has_esr = cpus_have_final_cap(ARM64_HAS_RAS_EXTN); + events->exception.serror_pending = (vcpu->arch.hcr_el2 & HCR_VSE) || + vcpu_get_flag(vcpu, NESTED_SERROR_PENDING); if (events->exception.serror_pending && events->exception.serror_has_esr) events->exception.serror_esr = vcpu_get_vsesr(vcpu); @@ -817,32 +764,65 @@ int __kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu, return 0; } +static void commit_pending_events(struct kvm_vcpu *vcpu) +{ + if (!vcpu_get_flag(vcpu, PENDING_EXCEPTION)) + return; + + /* + * Reset the MMIO emulation state to avoid stepping PC after emulating + * the exception entry. + */ + vcpu->mmio_needed = false; + kvm_call_hyp(__kvm_adjust_pc, vcpu); +} + int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu, struct kvm_vcpu_events *events) { bool serror_pending = events->exception.serror_pending; bool has_esr = events->exception.serror_has_esr; bool ext_dabt_pending = events->exception.ext_dabt_pending; + u64 esr = events->exception.serror_esr; + int ret = 0; - if (serror_pending && has_esr) { - if (!cpus_have_const_cap(ARM64_HAS_RAS_EXTN)) - return -EINVAL; - - if (!((events->exception.serror_esr) & ~ESR_ELx_ISS_MASK)) - kvm_set_sei_esr(vcpu, events->exception.serror_esr); - else - return -EINVAL; - } else if (serror_pending) { - kvm_inject_vabt(vcpu); + /* + * Immediately commit the pending SEA to the vCPU's architectural + * state which is necessary since we do not return a pending SEA + * to userspace via KVM_GET_VCPU_EVENTS. + */ + if (ext_dabt_pending) { + ret = kvm_inject_sea_dabt(vcpu, kvm_vcpu_get_hfar(vcpu)); + commit_pending_events(vcpu); } - if (ext_dabt_pending) - kvm_inject_dabt(vcpu, kvm_vcpu_get_hfar(vcpu)); + if (ret < 0) + return ret; + + if (!serror_pending) + return 0; - return 0; + if (!cpus_have_final_cap(ARM64_HAS_RAS_EXTN) && has_esr) + return -EINVAL; + + if (has_esr && (esr & ~ESR_ELx_ISS_MASK)) + return -EINVAL; + + if (has_esr) + ret = kvm_inject_serror_esr(vcpu, esr); + else + ret = kvm_inject_serror(vcpu); + + /* + * We could've decided that the SError is due for immediate software + * injection; commit the exception in case userspace decides it wants + * to inject more exceptions for some strange reason. + */ + commit_pending_events(vcpu); + return (ret < 0) ? ret : 0; } -int __attribute_const__ kvm_target_cpu(void) +u32 __attribute_const__ kvm_target_cpu(void) { unsigned long implementor = read_cpuid_implementor(); unsigned long part_number = read_cpuid_part_number(); @@ -862,7 +842,7 @@ int __attribute_const__ kvm_target_cpu(void) break; case ARM_CPU_IMP_APM: switch (part_number) { - case APM_CPU_PART_POTENZA: + case APM_CPU_PART_XGENE: return KVM_ARM_TARGET_XGENE_POTENZA; } break; @@ -872,26 +852,6 @@ int __attribute_const__ kvm_target_cpu(void) return KVM_ARM_TARGET_GENERIC_V8; } -int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init) -{ - int target = kvm_target_cpu(); - - if (target < 0) - return -ENODEV; - - memset(init, 0, sizeof(*init)); - - /* - * For now, we don't return any features. - * In future, we might use features to return target - * specific features available for the preferred - * target type. - */ - init->target = (__u32)target; - - return 0; -} - int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) { return -EINVAL; @@ -910,8 +870,8 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, /** * kvm_arch_vcpu_ioctl_set_guest_debug - set up guest debugging - * @kvm: pointer to the KVM struct - * @kvm_guest_debug: the ioctl data buffer + * @vcpu: the vCPU pointer + * @dbg: the ioctl data buffer * * This sets up and enables the VM for guest debugging. Userspace * passes in a control flag to enable different debug types and @@ -921,30 +881,24 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg) { - int ret = 0; - trace_kvm_set_guest_debug(vcpu, dbg->control); - if (dbg->control & ~KVM_GUESTDBG_VALID_MASK) { - ret = -EINVAL; - goto out; - } - - if (dbg->control & KVM_GUESTDBG_ENABLE) { - vcpu->guest_debug = dbg->control; - - /* Hardware assisted Break and Watch points */ - if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW) { - vcpu->arch.external_debug_state = dbg->arch; - } + if (dbg->control & ~KVM_GUESTDBG_VALID_MASK) + return -EINVAL; - } else { - /* If not enabled clear all flags */ + if (!(dbg->control & KVM_GUESTDBG_ENABLE)) { vcpu->guest_debug = 0; + vcpu_clear_flag(vcpu, HOST_SS_ACTIVE_PENDING); + return 0; } -out: - return ret; + vcpu->guest_debug = dbg->control; + + /* Hardware assisted Break and Watch points */ + if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW) + vcpu->arch.external_debug_state = dbg->arch; + + return 0; } int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu, @@ -954,7 +908,9 @@ int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu, switch (attr->group) { case KVM_ARM_VCPU_PMU_V3_CTRL: + mutex_lock(&vcpu->kvm->arch.config_lock); ret = kvm_arm_pmu_v3_set_attr(vcpu, attr); + mutex_unlock(&vcpu->kvm->arch.config_lock); break; case KVM_ARM_VCPU_TIMER_CTRL: ret = kvm_arm_timer_set_attr(vcpu, attr); @@ -1016,8 +972,8 @@ int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu, return ret; } -long kvm_vm_ioctl_mte_copy_tags(struct kvm *kvm, - struct kvm_arm_copy_mte_tags *copy_tags) +int kvm_vm_ioctl_mte_copy_tags(struct kvm *kvm, + struct kvm_arm_copy_mte_tags *copy_tags) { gpa_t guest_ipa = copy_tags->guest_ipa; size_t length = copy_tags->length; @@ -1038,50 +994,72 @@ long kvm_vm_ioctl_mte_copy_tags(struct kvm *kvm, if (length & ~PAGE_MASK || guest_ipa & ~PAGE_MASK) return -EINVAL; + /* Lengths above INT_MAX cannot be represented in the return value */ + if (length > INT_MAX) + return -EINVAL; + gfn = gpa_to_gfn(guest_ipa); mutex_lock(&kvm->slots_lock); + if (write && atomic_read(&kvm->nr_memslots_dirty_logging)) { + ret = -EBUSY; + goto out; + } + while (length > 0) { - kvm_pfn_t pfn = gfn_to_pfn_prot(kvm, gfn, write, NULL); + struct page *page = __gfn_to_page(kvm, gfn, write); void *maddr; unsigned long num_tags; - struct page *page; + struct folio *folio; - if (is_error_noslot_pfn(pfn)) { + if (!page) { ret = -EFAULT; goto out; } - page = pfn_to_online_page(pfn); - if (!page) { + if (!pfn_to_online_page(page_to_pfn(page))) { /* Reject ZONE_DEVICE memory */ + kvm_release_page_unused(page); ret = -EFAULT; goto out; } + folio = page_folio(page); maddr = page_address(page); if (!write) { - if (test_bit(PG_mte_tagged, &page->flags)) + if ((folio_test_hugetlb(folio) && + folio_test_hugetlb_mte_tagged(folio)) || + page_mte_tagged(page)) num_tags = mte_copy_tags_to_user(tags, maddr, MTE_GRANULES_PER_PAGE); else /* No tags in memory, so write zeros */ num_tags = MTE_GRANULES_PER_PAGE - clear_user(tags, MTE_GRANULES_PER_PAGE); - kvm_release_pfn_clean(pfn); + kvm_release_page_clean(page); } else { + /* + * Only locking to serialise with a concurrent + * __set_ptes() in the VMM but still overriding the + * tags, hence ignoring the return value. + */ + if (folio_test_hugetlb(folio)) + folio_try_hugetlb_mte_tagging(folio); + else + try_page_mte_tagging(page); num_tags = mte_copy_tags_from_user(maddr, tags, MTE_GRANULES_PER_PAGE); - /* - * Set the flag after checking the write - * completed fully - */ - if (num_tags == MTE_GRANULES_PER_PAGE) - set_bit(PG_mte_tagged, &page->flags); + /* uaccess failed, don't leave stale tags */ + if (num_tags != MTE_GRANULES_PER_PAGE) + mte_clear_page_tags(maddr); + if (folio_test_hugetlb(folio)) + folio_set_hugetlb_mte_tagged(folio); + else + set_page_mte_tagged(page); - kvm_release_pfn_dirty(pfn); + kvm_release_page_dirty(page); } if (num_tags != MTE_GRANULES_PER_PAGE) { |
