diff options
Diffstat (limited to 'arch/arm64/kvm/guest.c')
| -rw-r--r-- | arch/arm64/kvm/guest.c | 877 |
1 files changed, 706 insertions, 171 deletions
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index dd436a50fce7..1c87699fd886 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2012,2013 - ARM Ltd * Author: Marc Zyngier <marc.zyngier@arm.com> @@ -5,51 +6,66 @@ * Derived from arch/arm/kvm/guest.c: * Copyright (C) 2012 - Virtual Open Systems and Columbia University * Author: Christoffer Dall <c.dall@virtualopensystems.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. */ +#include <linux/bits.h> #include <linux/errno.h> #include <linux/err.h> +#include <linux/nospec.h> #include <linux/kvm_host.h> #include <linux/module.h> +#include <linux/stddef.h> +#include <linux/string.h> #include <linux/vmalloc.h> #include <linux/fs.h> -#include <kvm/arm_psci.h> +#include <kvm/arm_hypercalls.h> #include <asm/cputype.h> #include <linux/uaccess.h> +#include <asm/fpsimd.h> #include <asm/kvm.h> #include <asm/kvm_emulate.h> -#include <asm/kvm_coproc.h> +#include <asm/kvm_nested.h> +#include <asm/sigcontext.h> #include "trace.h" -#define VM_STAT(x) { #x, offsetof(struct kvm, stat.x), KVM_STAT_VM } -#define VCPU_STAT(x) { #x, offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU } - -struct kvm_stats_debugfs_item debugfs_entries[] = { - VCPU_STAT(hvc_exit_stat), - VCPU_STAT(wfe_exit_stat), - VCPU_STAT(wfi_exit_stat), - VCPU_STAT(mmio_exit_user), - VCPU_STAT(mmio_exit_kernel), - VCPU_STAT(exits), - { NULL } +const struct _kvm_stats_desc kvm_vm_stats_desc[] = { + KVM_GENERIC_VM_STATS() +}; + +const struct kvm_stats_header kvm_vm_stats_header = { + .name_size = KVM_STATS_NAME_SIZE, + .num_desc = ARRAY_SIZE(kvm_vm_stats_desc), + .id_offset = sizeof(struct kvm_stats_header), + .desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE, + .data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE + + sizeof(kvm_vm_stats_desc), }; -int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) +const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = { + KVM_GENERIC_VCPU_STATS(), + STATS_DESC_COUNTER(VCPU, hvc_exit_stat), + STATS_DESC_COUNTER(VCPU, wfe_exit_stat), + STATS_DESC_COUNTER(VCPU, wfi_exit_stat), + STATS_DESC_COUNTER(VCPU, mmio_exit_user), + STATS_DESC_COUNTER(VCPU, mmio_exit_kernel), + STATS_DESC_COUNTER(VCPU, signal_exits), + STATS_DESC_COUNTER(VCPU, exits) +}; + +const struct kvm_stats_header kvm_vcpu_stats_header = { + .name_size = KVM_STATS_NAME_SIZE, + .num_desc = ARRAY_SIZE(kvm_vcpu_stats_desc), + .id_offset = sizeof(struct kvm_stats_header), + .desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE, + .data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE + + sizeof(kvm_vcpu_stats_desc), +}; + +static bool core_reg_offset_is_vreg(u64 off) { - return 0; + return off >= KVM_REG_ARM_CORE_REG(fp_regs.vregs) && + off < KVM_REG_ARM_CORE_REG(fp_regs.fpsr); } static u64 core_reg_offset_from_id(u64 id) @@ -57,9 +73,8 @@ static u64 core_reg_offset_from_id(u64 id) return id & ~(KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK | KVM_REG_ARM_CORE); } -static int validate_core_offset(const struct kvm_one_reg *reg) +static int core_reg_size_from_offset(const struct kvm_vcpu *vcpu, u64 off) { - u64 off = core_reg_offset_from_id(reg->id); int size; switch (off) { @@ -89,11 +104,83 @@ static int validate_core_offset(const struct kvm_one_reg *reg) return -EINVAL; } - if (KVM_REG_SIZE(reg->id) == size && - IS_ALIGNED(off, size / sizeof(__u32))) - return 0; + if (!IS_ALIGNED(off, size / sizeof(__u32))) + return -EINVAL; - return -EINVAL; + /* + * The KVM_REG_ARM64_SVE regs must be used instead of + * KVM_REG_ARM_CORE for accessing the FPSIMD V-registers on + * SVE-enabled vcpus: + */ + if (vcpu_has_sve(vcpu) && core_reg_offset_is_vreg(off)) + return -EINVAL; + + return size; +} + +static void *core_reg_addr(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) +{ + u64 off = core_reg_offset_from_id(reg->id); + int size = core_reg_size_from_offset(vcpu, off); + + if (size < 0) + return NULL; + + if (KVM_REG_SIZE(reg->id) != size) + return NULL; + + switch (off) { + case KVM_REG_ARM_CORE_REG(regs.regs[0]) ... + KVM_REG_ARM_CORE_REG(regs.regs[30]): + off -= KVM_REG_ARM_CORE_REG(regs.regs[0]); + off /= 2; + return &vcpu->arch.ctxt.regs.regs[off]; + + case KVM_REG_ARM_CORE_REG(regs.sp): + return &vcpu->arch.ctxt.regs.sp; + + case KVM_REG_ARM_CORE_REG(regs.pc): + return &vcpu->arch.ctxt.regs.pc; + + case KVM_REG_ARM_CORE_REG(regs.pstate): + return &vcpu->arch.ctxt.regs.pstate; + + case KVM_REG_ARM_CORE_REG(sp_el1): + return __ctxt_sys_reg(&vcpu->arch.ctxt, SP_EL1); + + case KVM_REG_ARM_CORE_REG(elr_el1): + return __ctxt_sys_reg(&vcpu->arch.ctxt, ELR_EL1); + + case KVM_REG_ARM_CORE_REG(spsr[KVM_SPSR_EL1]): + return __ctxt_sys_reg(&vcpu->arch.ctxt, SPSR_EL1); + + case KVM_REG_ARM_CORE_REG(spsr[KVM_SPSR_ABT]): + return &vcpu->arch.ctxt.spsr_abt; + + case KVM_REG_ARM_CORE_REG(spsr[KVM_SPSR_UND]): + return &vcpu->arch.ctxt.spsr_und; + + case KVM_REG_ARM_CORE_REG(spsr[KVM_SPSR_IRQ]): + return &vcpu->arch.ctxt.spsr_irq; + + case KVM_REG_ARM_CORE_REG(spsr[KVM_SPSR_FIQ]): + return &vcpu->arch.ctxt.spsr_fiq; + + case KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]) ... + KVM_REG_ARM_CORE_REG(fp_regs.vregs[31]): + off -= KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]); + off /= 4; + return &vcpu->arch.ctxt.fp_regs.vregs[off]; + + case KVM_REG_ARM_CORE_REG(fp_regs.fpsr): + return &vcpu->arch.ctxt.fp_regs.fpsr; + + case KVM_REG_ARM_CORE_REG(fp_regs.fpcr): + return &vcpu->arch.ctxt.fp_regs.fpcr; + + default: + return NULL; + } } static int get_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) @@ -105,8 +192,8 @@ static int get_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) * off the index in the "array". */ __u32 __user *uaddr = (__u32 __user *)(unsigned long)reg->addr; - struct kvm_regs *regs = vcpu_gp_regs(vcpu); - int nr_regs = sizeof(*regs) / sizeof(__u32); + int nr_regs = sizeof(struct kvm_regs) / sizeof(__u32); + void *addr; u32 off; /* Our ID is an index into the kvm_regs struct. */ @@ -115,10 +202,11 @@ static int get_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) (off + (KVM_REG_SIZE(reg->id) / sizeof(__u32))) >= nr_regs) return -ENOENT; - if (validate_core_offset(reg)) + addr = core_reg_addr(vcpu, reg); + if (!addr) return -EINVAL; - if (copy_to_user(uaddr, ((u32 *)regs) + off, KVM_REG_SIZE(reg->id))) + if (copy_to_user(uaddr, addr, KVM_REG_SIZE(reg->id))) return -EFAULT; return 0; @@ -127,10 +215,9 @@ static int get_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) { __u32 __user *uaddr = (__u32 __user *)(unsigned long)reg->addr; - struct kvm_regs *regs = vcpu_gp_regs(vcpu); - int nr_regs = sizeof(*regs) / sizeof(__u32); + int nr_regs = sizeof(struct kvm_regs) / sizeof(__u32); __uint128_t tmp; - void *valp = &tmp; + void *valp = &tmp, *addr; u64 off; int err = 0; @@ -140,7 +227,8 @@ static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) (off + (KVM_REG_SIZE(reg->id) / sizeof(__u32))) >= nr_regs) return -ENOENT; - if (validate_core_offset(reg)) + addr = core_reg_addr(vcpu, reg); + if (!addr) return -EINVAL; if (KVM_REG_SIZE(reg->id) > sizeof(tmp)) @@ -155,7 +243,7 @@ static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) u64 mode = (*(u64 *)valp) & PSR_AA32_MODE_MASK; switch (mode) { case PSR_AA32_MODE_USR: - if (!system_supports_32bit_el0()) + if (!kvm_supports_32bit_el0()) return -EINVAL; break; case PSR_AA32_MODE_FIQ: @@ -163,9 +251,15 @@ static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) case PSR_AA32_MODE_SVC: case PSR_AA32_MODE_ABT: case PSR_AA32_MODE_UND: + case PSR_AA32_MODE_SYS: if (!vcpu_el1_is_32bit(vcpu)) return -EINVAL; break; + case PSR_MODE_EL2h: + case PSR_MODE_EL2t: + if (!vcpu_has_nv(vcpu)) + return -EINVAL; + fallthrough; case PSR_MODE_EL0t: case PSR_MODE_EL1t: case PSR_MODE_EL1h: @@ -178,116 +272,429 @@ static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) } } - memcpy((u32 *)regs + off, valp, KVM_REG_SIZE(reg->id)); + memcpy(addr, valp, KVM_REG_SIZE(reg->id)); + + if (*vcpu_cpsr(vcpu) & PSR_MODE32_BIT) { + int i, nr_reg; + + switch (*vcpu_cpsr(vcpu) & PSR_AA32_MODE_MASK) { + /* + * Either we are dealing with user mode, and only the + * first 15 registers (+ PC) must be narrowed to 32bit. + * AArch32 r0-r14 conveniently map to AArch64 x0-x14. + */ + case PSR_AA32_MODE_USR: + case PSR_AA32_MODE_SYS: + nr_reg = 15; + break; + + /* + * Otherwise, this is a privileged mode, and *all* the + * registers must be narrowed to 32bit. + */ + default: + nr_reg = 31; + break; + } + + for (i = 0; i < nr_reg; i++) + vcpu_set_reg(vcpu, i, (u32)vcpu_get_reg(vcpu, i)); + + *vcpu_pc(vcpu) = (u32)*vcpu_pc(vcpu); + } out: return err; } -int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) -{ - return -EINVAL; -} +#define vq_word(vq) (((vq) - SVE_VQ_MIN) / 64) +#define vq_mask(vq) ((u64)1 << ((vq) - SVE_VQ_MIN) % 64) +#define vq_present(vqs, vq) (!!((vqs)[vq_word(vq)] & vq_mask(vq))) -int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) +static int get_sve_vls(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) { - return -EINVAL; + unsigned int max_vq, vq; + u64 vqs[KVM_ARM64_SVE_VLS_WORDS]; + + if (!vcpu_has_sve(vcpu)) + return -ENOENT; + + if (WARN_ON(!sve_vl_valid(vcpu->arch.sve_max_vl))) + return -EINVAL; + + memset(vqs, 0, sizeof(vqs)); + + max_vq = vcpu_sve_max_vq(vcpu); + for (vq = SVE_VQ_MIN; vq <= max_vq; ++vq) + if (sve_vq_available(vq)) + vqs[vq_word(vq)] |= vq_mask(vq); + + if (copy_to_user((void __user *)reg->addr, vqs, sizeof(vqs))) + return -EFAULT; + + return 0; } -static unsigned long num_core_regs(void) +static int set_sve_vls(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) { - return sizeof(struct kvm_regs) / sizeof(__u32); + unsigned int max_vq, vq; + u64 vqs[KVM_ARM64_SVE_VLS_WORDS]; + + if (!vcpu_has_sve(vcpu)) + return -ENOENT; + + if (kvm_arm_vcpu_sve_finalized(vcpu)) + return -EPERM; /* too late! */ + + if (WARN_ON(vcpu->arch.sve_state)) + return -EINVAL; + + if (copy_from_user(vqs, (const void __user *)reg->addr, sizeof(vqs))) + return -EFAULT; + + max_vq = 0; + for (vq = SVE_VQ_MIN; vq <= SVE_VQ_MAX; ++vq) + if (vq_present(vqs, vq)) + max_vq = vq; + + if (max_vq > sve_vq_from_vl(kvm_sve_max_vl)) + return -EINVAL; + + /* + * Vector lengths supported by the host can't currently be + * hidden from the guest individually: instead we can only set a + * maximum via ZCR_EL2.LEN. So, make sure the available vector + * lengths match the set requested exactly up to the requested + * maximum: + */ + for (vq = SVE_VQ_MIN; vq <= max_vq; ++vq) + if (vq_present(vqs, vq) != sve_vq_available(vq)) + return -EINVAL; + + /* Can't run with no vector lengths at all: */ + if (max_vq < SVE_VQ_MIN) + return -EINVAL; + + /* vcpu->arch.sve_state will be alloc'd by kvm_vcpu_finalize_sve() */ + vcpu->arch.sve_max_vl = sve_vl_from_vq(max_vq); + + return 0; } -/** - * ARM64 versions of the TIMER registers, always available on arm64 +#define SVE_REG_SLICE_SHIFT 0 +#define SVE_REG_SLICE_BITS 5 +#define SVE_REG_ID_SHIFT (SVE_REG_SLICE_SHIFT + SVE_REG_SLICE_BITS) +#define SVE_REG_ID_BITS 5 + +#define SVE_REG_SLICE_MASK \ + GENMASK(SVE_REG_SLICE_SHIFT + SVE_REG_SLICE_BITS - 1, \ + SVE_REG_SLICE_SHIFT) +#define SVE_REG_ID_MASK \ + GENMASK(SVE_REG_ID_SHIFT + SVE_REG_ID_BITS - 1, SVE_REG_ID_SHIFT) + +#define SVE_NUM_SLICES (1 << SVE_REG_SLICE_BITS) + +#define KVM_SVE_ZREG_SIZE KVM_REG_SIZE(KVM_REG_ARM64_SVE_ZREG(0, 0)) +#define KVM_SVE_PREG_SIZE KVM_REG_SIZE(KVM_REG_ARM64_SVE_PREG(0, 0)) + +/* + * Number of register slices required to cover each whole SVE register. + * NOTE: Only the first slice every exists, for now. + * If you are tempted to modify this, you must also rework sve_reg_to_region() + * to match: */ +#define vcpu_sve_slices(vcpu) 1 -#define NUM_TIMER_REGS 3 +/* Bounds of a single SVE register slice within vcpu->arch.sve_state */ +struct sve_state_reg_region { + unsigned int koffset; /* offset into sve_state in kernel memory */ + unsigned int klen; /* length in kernel memory */ + unsigned int upad; /* extra trailing padding in user memory */ +}; -static bool is_timer_reg(u64 index) +/* + * Validate SVE register ID and get sanitised bounds for user/kernel SVE + * register copy + */ +static int sve_reg_to_region(struct sve_state_reg_region *region, + struct kvm_vcpu *vcpu, + const struct kvm_one_reg *reg) { - switch (index) { - case KVM_REG_ARM_TIMER_CTL: - case KVM_REG_ARM_TIMER_CNT: - case KVM_REG_ARM_TIMER_CVAL: - return true; + /* reg ID ranges for Z- registers */ + const u64 zreg_id_min = KVM_REG_ARM64_SVE_ZREG(0, 0); + const u64 zreg_id_max = KVM_REG_ARM64_SVE_ZREG(SVE_NUM_ZREGS - 1, + SVE_NUM_SLICES - 1); + + /* reg ID ranges for P- registers and FFR (which are contiguous) */ + const u64 preg_id_min = KVM_REG_ARM64_SVE_PREG(0, 0); + const u64 preg_id_max = KVM_REG_ARM64_SVE_FFR(SVE_NUM_SLICES - 1); + + unsigned int vq; + unsigned int reg_num; + + unsigned int reqoffset, reqlen; /* User-requested offset and length */ + unsigned int maxlen; /* Maximum permitted length */ + + size_t sve_state_size; + + const u64 last_preg_id = KVM_REG_ARM64_SVE_PREG(SVE_NUM_PREGS - 1, + SVE_NUM_SLICES - 1); + + /* Verify that the P-regs and FFR really do have contiguous IDs: */ + BUILD_BUG_ON(KVM_REG_ARM64_SVE_FFR(0) != last_preg_id + 1); + + /* Verify that we match the UAPI header: */ + BUILD_BUG_ON(SVE_NUM_SLICES != KVM_ARM64_SVE_MAX_SLICES); + + reg_num = (reg->id & SVE_REG_ID_MASK) >> SVE_REG_ID_SHIFT; + + if (reg->id >= zreg_id_min && reg->id <= zreg_id_max) { + if (!vcpu_has_sve(vcpu) || (reg->id & SVE_REG_SLICE_MASK) > 0) + return -ENOENT; + + vq = vcpu_sve_max_vq(vcpu); + + reqoffset = SVE_SIG_ZREG_OFFSET(vq, reg_num) - + SVE_SIG_REGS_OFFSET; + reqlen = KVM_SVE_ZREG_SIZE; + maxlen = SVE_SIG_ZREG_SIZE(vq); + } else if (reg->id >= preg_id_min && reg->id <= preg_id_max) { + if (!vcpu_has_sve(vcpu) || (reg->id & SVE_REG_SLICE_MASK) > 0) + return -ENOENT; + + vq = vcpu_sve_max_vq(vcpu); + + reqoffset = SVE_SIG_PREG_OFFSET(vq, reg_num) - + SVE_SIG_REGS_OFFSET; + reqlen = KVM_SVE_PREG_SIZE; + maxlen = SVE_SIG_PREG_SIZE(vq); + } else { + return -EINVAL; } - return false; + + sve_state_size = vcpu_sve_state_size(vcpu); + if (WARN_ON(!sve_state_size)) + return -EINVAL; + + region->koffset = array_index_nospec(reqoffset, sve_state_size); + region->klen = min(maxlen, reqlen); + region->upad = reqlen - region->klen; + + return 0; } -static int copy_timer_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) +static int get_sve_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) { - if (put_user(KVM_REG_ARM_TIMER_CTL, uindices)) - return -EFAULT; - uindices++; - if (put_user(KVM_REG_ARM_TIMER_CNT, uindices)) - return -EFAULT; - uindices++; - if (put_user(KVM_REG_ARM_TIMER_CVAL, uindices)) + int ret; + struct sve_state_reg_region region; + char __user *uptr = (char __user *)reg->addr; + + /* Handle the KVM_REG_ARM64_SVE_VLS pseudo-reg as a special case: */ + if (reg->id == KVM_REG_ARM64_SVE_VLS) + return get_sve_vls(vcpu, reg); + + /* Try to interpret reg ID as an architectural SVE register... */ + ret = sve_reg_to_region(®ion, vcpu, reg); + if (ret) + return ret; + + if (!kvm_arm_vcpu_sve_finalized(vcpu)) + return -EPERM; + + if (copy_to_user(uptr, vcpu->arch.sve_state + region.koffset, + region.klen) || + clear_user(uptr + region.klen, region.upad)) return -EFAULT; return 0; } -static int set_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) +static int set_sve_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) { - void __user *uaddr = (void __user *)(long)reg->addr; - u64 val; int ret; + struct sve_state_reg_region region; + const char __user *uptr = (const char __user *)reg->addr; + + /* Handle the KVM_REG_ARM64_SVE_VLS pseudo-reg as a special case: */ + if (reg->id == KVM_REG_ARM64_SVE_VLS) + return set_sve_vls(vcpu, reg); + + /* Try to interpret reg ID as an architectural SVE register... */ + ret = sve_reg_to_region(®ion, vcpu, reg); + if (ret) + return ret; + + if (!kvm_arm_vcpu_sve_finalized(vcpu)) + return -EPERM; - ret = copy_from_user(&val, uaddr, KVM_REG_SIZE(reg->id)); - if (ret != 0) + if (copy_from_user(vcpu->arch.sve_state + region.koffset, uptr, + region.klen)) return -EFAULT; - return kvm_arm_timer_set_reg(vcpu, reg->id, val); + return 0; +} + +int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) +{ + return -EINVAL; +} + +int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) +{ + return -EINVAL; +} + +static int copy_core_reg_indices(const struct kvm_vcpu *vcpu, + u64 __user *uindices) +{ + unsigned int i; + int n = 0; + + for (i = 0; i < sizeof(struct kvm_regs) / sizeof(__u32); i++) { + u64 reg = KVM_REG_ARM64 | KVM_REG_ARM_CORE | i; + int size = core_reg_size_from_offset(vcpu, i); + + if (size < 0) + continue; + + switch (size) { + case sizeof(__u32): + reg |= KVM_REG_SIZE_U32; + break; + + case sizeof(__u64): + reg |= KVM_REG_SIZE_U64; + break; + + case sizeof(__uint128_t): + reg |= KVM_REG_SIZE_U128; + break; + + default: + WARN_ON(1); + continue; + } + + if (uindices) { + if (put_user(reg, uindices)) + return -EFAULT; + uindices++; + } + + n++; + } + + return n; } -static int get_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) +static unsigned long num_core_regs(const struct kvm_vcpu *vcpu) { - void __user *uaddr = (void __user *)(long)reg->addr; - u64 val; + return copy_core_reg_indices(vcpu, NULL); +} + +static unsigned long num_sve_regs(const struct kvm_vcpu *vcpu) +{ + const unsigned int slices = vcpu_sve_slices(vcpu); + + if (!vcpu_has_sve(vcpu)) + return 0; + + /* Policed by KVM_GET_REG_LIST: */ + WARN_ON(!kvm_arm_vcpu_sve_finalized(vcpu)); - val = kvm_arm_timer_get_reg(vcpu, reg->id); - return copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id)) ? -EFAULT : 0; + return slices * (SVE_NUM_PREGS + SVE_NUM_ZREGS + 1 /* FFR */) + + 1; /* KVM_REG_ARM64_SVE_VLS */ +} + +static int copy_sve_reg_indices(const struct kvm_vcpu *vcpu, + u64 __user *uindices) +{ + const unsigned int slices = vcpu_sve_slices(vcpu); + u64 reg; + unsigned int i, n; + int num_regs = 0; + + if (!vcpu_has_sve(vcpu)) + return 0; + + /* Policed by KVM_GET_REG_LIST: */ + WARN_ON(!kvm_arm_vcpu_sve_finalized(vcpu)); + + /* + * Enumerate this first, so that userspace can save/restore in + * the order reported by KVM_GET_REG_LIST: + */ + reg = KVM_REG_ARM64_SVE_VLS; + if (put_user(reg, uindices++)) + return -EFAULT; + ++num_regs; + + for (i = 0; i < slices; i++) { + for (n = 0; n < SVE_NUM_ZREGS; n++) { + reg = KVM_REG_ARM64_SVE_ZREG(n, i); + if (put_user(reg, uindices++)) + return -EFAULT; + num_regs++; + } + + for (n = 0; n < SVE_NUM_PREGS; n++) { + reg = KVM_REG_ARM64_SVE_PREG(n, i); + if (put_user(reg, uindices++)) + return -EFAULT; + num_regs++; + } + + reg = KVM_REG_ARM64_SVE_FFR(i); + if (put_user(reg, uindices++)) + return -EFAULT; + num_regs++; + } + + return num_regs; } /** * kvm_arm_num_regs - how many registers do we present via KVM_GET_ONE_REG + * @vcpu: the vCPU pointer * * This is for all registers. */ unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu) { - return num_core_regs() + kvm_arm_num_sys_reg_descs(vcpu) - + kvm_arm_get_fw_num_regs(vcpu) + NUM_TIMER_REGS; + unsigned long res = 0; + + res += num_core_regs(vcpu); + res += num_sve_regs(vcpu); + res += kvm_arm_num_sys_reg_descs(vcpu); + res += kvm_arm_get_fw_num_regs(vcpu); + + return res; } /** * kvm_arm_copy_reg_indices - get indices of all registers. + * @vcpu: the vCPU pointer + * @uindices: register list to copy * * We do core registers right here, then we append system regs. */ int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) { - unsigned int i; - const u64 core_reg = KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE; int ret; - for (i = 0; i < sizeof(struct kvm_regs) / sizeof(__u32); i++) { - if (put_user(core_reg | i, uindices)) - return -EFAULT; - uindices++; - } + ret = copy_core_reg_indices(vcpu, uindices); + if (ret < 0) + return ret; + uindices += ret; - ret = kvm_arm_copy_fw_reg_indices(vcpu, uindices); - if (ret) + ret = copy_sve_reg_indices(vcpu, uindices); + if (ret < 0) return ret; - uindices += kvm_arm_get_fw_num_regs(vcpu); + uindices += ret; - ret = copy_timer_indices(vcpu, uindices); - if (ret) + ret = kvm_arm_copy_fw_reg_indices(vcpu, uindices); + if (ret < 0) return ret; - uindices += NUM_TIMER_REGS; + uindices += kvm_arm_get_fw_num_regs(vcpu); return kvm_arm_copy_sys_reg_indices(vcpu, uindices); } @@ -298,15 +705,13 @@ int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) if ((reg->id & ~KVM_REG_SIZE_MASK) >> 32 != KVM_REG_ARM64 >> 32) return -EINVAL; - /* Register group 16 means we want a core register. */ - if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_CORE) - return get_core_reg(vcpu, reg); - - if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_FW) + switch (reg->id & KVM_REG_ARM_COPROC_MASK) { + case KVM_REG_ARM_CORE: return get_core_reg(vcpu, reg); + case KVM_REG_ARM_FW: + case KVM_REG_ARM_FW_FEAT_BMAP: return kvm_arm_get_fw_reg(vcpu, reg); - - if (is_timer_reg(reg->id)) - return get_timer_reg(vcpu, reg); + case KVM_REG_ARM64_SVE: return get_sve_reg(vcpu, reg); + } return kvm_arm_sys_reg_get_reg(vcpu, reg); } @@ -317,15 +722,13 @@ int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) if ((reg->id & ~KVM_REG_SIZE_MASK) >> 32 != KVM_REG_ARM64 >> 32) return -EINVAL; - /* Register group 16 means we set a core register. */ - if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_CORE) - return set_core_reg(vcpu, reg); - - if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_FW) + switch (reg->id & KVM_REG_ARM_COPROC_MASK) { + case KVM_REG_ARM_CORE: return set_core_reg(vcpu, reg); + case KVM_REG_ARM_FW: + case KVM_REG_ARM_FW_FEAT_BMAP: return kvm_arm_set_fw_reg(vcpu, reg); - - if (is_timer_reg(reg->id)) - return set_timer_reg(vcpu, reg); + case KVM_REG_ARM64_SVE: return set_sve_reg(vcpu, reg); + } return kvm_arm_sys_reg_set_reg(vcpu, reg); } @@ -345,37 +748,81 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, int __kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu, struct kvm_vcpu_events *events) { - events->exception.serror_pending = !!(vcpu->arch.hcr_el2 & HCR_VSE); - events->exception.serror_has_esr = cpus_have_const_cap(ARM64_HAS_RAS_EXTN); + events->exception.serror_has_esr = cpus_have_final_cap(ARM64_HAS_RAS_EXTN); + events->exception.serror_pending = (vcpu->arch.hcr_el2 & HCR_VSE) || + vcpu_get_flag(vcpu, NESTED_SERROR_PENDING); if (events->exception.serror_pending && events->exception.serror_has_esr) events->exception.serror_esr = vcpu_get_vsesr(vcpu); + /* + * We never return a pending ext_dabt here because we deliver it to + * the virtual CPU directly when setting the event and it's no longer + * 'pending' at this point. + */ + return 0; } +static void commit_pending_events(struct kvm_vcpu *vcpu) +{ + if (!vcpu_get_flag(vcpu, PENDING_EXCEPTION)) + return; + + /* + * Reset the MMIO emulation state to avoid stepping PC after emulating + * the exception entry. + */ + vcpu->mmio_needed = false; + kvm_call_hyp(__kvm_adjust_pc, vcpu); +} + int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu, struct kvm_vcpu_events *events) { bool serror_pending = events->exception.serror_pending; bool has_esr = events->exception.serror_has_esr; + bool ext_dabt_pending = events->exception.ext_dabt_pending; + u64 esr = events->exception.serror_esr; + int ret = 0; - if (serror_pending && has_esr) { - if (!cpus_have_const_cap(ARM64_HAS_RAS_EXTN)) - return -EINVAL; - - if (!((events->exception.serror_esr) & ~ESR_ELx_ISS_MASK)) - kvm_set_sei_esr(vcpu, events->exception.serror_esr); - else - return -EINVAL; - } else if (serror_pending) { - kvm_inject_vabt(vcpu); + /* + * Immediately commit the pending SEA to the vCPU's architectural + * state which is necessary since we do not return a pending SEA + * to userspace via KVM_GET_VCPU_EVENTS. + */ + if (ext_dabt_pending) { + ret = kvm_inject_sea_dabt(vcpu, kvm_vcpu_get_hfar(vcpu)); + commit_pending_events(vcpu); } - return 0; + if (ret < 0) + return ret; + + if (!serror_pending) + return 0; + + if (!cpus_have_final_cap(ARM64_HAS_RAS_EXTN) && has_esr) + return -EINVAL; + + if (has_esr && (esr & ~ESR_ELx_ISS_MASK)) + return -EINVAL; + + if (has_esr) + ret = kvm_inject_serror_esr(vcpu, esr); + else + ret = kvm_inject_serror(vcpu); + + /* + * We could've decided that the SError is due for immediate software + * injection; commit the exception in case userspace decides it wants + * to inject more exceptions for some strange reason. + */ + commit_pending_events(vcpu); + return (ret < 0) ? ret : 0; } -int __attribute_const__ kvm_target_cpu(void) +u32 __attribute_const__ kvm_target_cpu(void) { unsigned long implementor = read_cpuid_implementor(); unsigned long part_number = read_cpuid_part_number(); @@ -395,7 +842,7 @@ int __attribute_const__ kvm_target_cpu(void) break; case ARM_CPU_IMP_APM: switch (part_number) { - case APM_CPU_PART_POTENZA: + case APM_CPU_PART_XGENE: return KVM_ARM_TARGET_XGENE_POTENZA; } break; @@ -405,26 +852,6 @@ int __attribute_const__ kvm_target_cpu(void) return KVM_ARM_TARGET_GENERIC_V8; } -int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init) -{ - int target = kvm_target_cpu(); - - if (target < 0) - return -ENODEV; - - memset(init, 0, sizeof(*init)); - - /* - * For now, we don't return any features. - * In future, we might use features to return target - * specific features available for the preferred - * target type. - */ - init->target = (__u32)target; - - return 0; -} - int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) { return -EINVAL; @@ -441,15 +868,10 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, return -EINVAL; } -#define KVM_GUESTDBG_VALID_MASK (KVM_GUESTDBG_ENABLE | \ - KVM_GUESTDBG_USE_SW_BP | \ - KVM_GUESTDBG_USE_HW | \ - KVM_GUESTDBG_SINGLESTEP) - /** * kvm_arch_vcpu_ioctl_set_guest_debug - set up guest debugging - * @kvm: pointer to the KVM struct - * @kvm_guest_debug: the ioctl data buffer + * @vcpu: the vCPU pointer + * @dbg: the ioctl data buffer * * This sets up and enables the VM for guest debugging. Userspace * passes in a control flag to enable different debug types and @@ -459,30 +881,24 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg) { - int ret = 0; - trace_kvm_set_guest_debug(vcpu, dbg->control); - if (dbg->control & ~KVM_GUESTDBG_VALID_MASK) { - ret = -EINVAL; - goto out; - } - - if (dbg->control & KVM_GUESTDBG_ENABLE) { - vcpu->guest_debug = dbg->control; - - /* Hardware assisted Break and Watch points */ - if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW) { - vcpu->arch.external_debug_state = dbg->arch; - } + if (dbg->control & ~KVM_GUESTDBG_VALID_MASK) + return -EINVAL; - } else { - /* If not enabled clear all flags */ + if (!(dbg->control & KVM_GUESTDBG_ENABLE)) { vcpu->guest_debug = 0; + vcpu_clear_flag(vcpu, HOST_SS_ACTIVE_PENDING); + return 0; } -out: - return ret; + vcpu->guest_debug = dbg->control; + + /* Hardware assisted Break and Watch points */ + if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW) + vcpu->arch.external_debug_state = dbg->arch; + + return 0; } int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu, @@ -492,11 +908,16 @@ int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu, switch (attr->group) { case KVM_ARM_VCPU_PMU_V3_CTRL: + mutex_lock(&vcpu->kvm->arch.config_lock); ret = kvm_arm_pmu_v3_set_attr(vcpu, attr); + mutex_unlock(&vcpu->kvm->arch.config_lock); break; case KVM_ARM_VCPU_TIMER_CTRL: ret = kvm_arm_timer_set_attr(vcpu, attr); break; + case KVM_ARM_VCPU_PVTIME_CTRL: + ret = kvm_arm_pvtime_set_attr(vcpu, attr); + break; default: ret = -ENXIO; break; @@ -517,6 +938,9 @@ int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu, case KVM_ARM_VCPU_TIMER_CTRL: ret = kvm_arm_timer_get_attr(vcpu, attr); break; + case KVM_ARM_VCPU_PVTIME_CTRL: + ret = kvm_arm_pvtime_get_attr(vcpu, attr); + break; default: ret = -ENXIO; break; @@ -537,6 +961,9 @@ int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu, case KVM_ARM_VCPU_TIMER_CTRL: ret = kvm_arm_timer_has_attr(vcpu, attr); break; + case KVM_ARM_VCPU_PVTIME_CTRL: + ret = kvm_arm_pvtime_has_attr(vcpu, attr); + break; default: ret = -ENXIO; break; @@ -544,3 +971,111 @@ int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu, return ret; } + +int kvm_vm_ioctl_mte_copy_tags(struct kvm *kvm, + struct kvm_arm_copy_mte_tags *copy_tags) +{ + gpa_t guest_ipa = copy_tags->guest_ipa; + size_t length = copy_tags->length; + void __user *tags = copy_tags->addr; + gpa_t gfn; + bool write = !(copy_tags->flags & KVM_ARM_TAGS_FROM_GUEST); + int ret = 0; + + if (!kvm_has_mte(kvm)) + return -EINVAL; + + if (copy_tags->reserved[0] || copy_tags->reserved[1]) + return -EINVAL; + + if (copy_tags->flags & ~KVM_ARM_TAGS_FROM_GUEST) + return -EINVAL; + + if (length & ~PAGE_MASK || guest_ipa & ~PAGE_MASK) + return -EINVAL; + + /* Lengths above INT_MAX cannot be represented in the return value */ + if (length > INT_MAX) + return -EINVAL; + + gfn = gpa_to_gfn(guest_ipa); + + mutex_lock(&kvm->slots_lock); + + if (write && atomic_read(&kvm->nr_memslots_dirty_logging)) { + ret = -EBUSY; + goto out; + } + + while (length > 0) { + struct page *page = __gfn_to_page(kvm, gfn, write); + void *maddr; + unsigned long num_tags; + struct folio *folio; + + if (!page) { + ret = -EFAULT; + goto out; + } + + if (!pfn_to_online_page(page_to_pfn(page))) { + /* Reject ZONE_DEVICE memory */ + kvm_release_page_unused(page); + ret = -EFAULT; + goto out; + } + folio = page_folio(page); + maddr = page_address(page); + + if (!write) { + if ((folio_test_hugetlb(folio) && + folio_test_hugetlb_mte_tagged(folio)) || + page_mte_tagged(page)) + num_tags = mte_copy_tags_to_user(tags, maddr, + MTE_GRANULES_PER_PAGE); + else + /* No tags in memory, so write zeros */ + num_tags = MTE_GRANULES_PER_PAGE - + clear_user(tags, MTE_GRANULES_PER_PAGE); + kvm_release_page_clean(page); + } else { + /* + * Only locking to serialise with a concurrent + * __set_ptes() in the VMM but still overriding the + * tags, hence ignoring the return value. + */ + if (folio_test_hugetlb(folio)) + folio_try_hugetlb_mte_tagging(folio); + else + try_page_mte_tagging(page); + num_tags = mte_copy_tags_from_user(maddr, tags, + MTE_GRANULES_PER_PAGE); + + /* uaccess failed, don't leave stale tags */ + if (num_tags != MTE_GRANULES_PER_PAGE) + mte_clear_page_tags(maddr); + if (folio_test_hugetlb(folio)) + folio_set_hugetlb_mte_tagged(folio); + else + set_page_mte_tagged(page); + + kvm_release_page_dirty(page); + } + + if (num_tags != MTE_GRANULES_PER_PAGE) { + ret = -EFAULT; + goto out; + } + + gfn++; + tags += num_tags; + length -= PAGE_SIZE; + } + +out: + mutex_unlock(&kvm->slots_lock); + /* If some data has been copied report the number of bytes copied */ + if (length != copy_tags->length) + return copy_tags->length - length; + return ret; +} |
