diff options
Diffstat (limited to 'arch/s390/kvm/kvm-s390.c')
| -rw-r--r-- | arch/s390/kvm/kvm-s390.c | 919 |
1 files changed, 597 insertions, 322 deletions
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 17b81659cdb2..56a50524b3ee 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -10,10 +10,11 @@ * Jason J. Herne <jjherne@us.ibm.com> */ -#define KMSG_COMPONENT "kvm-s390" -#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt +#define pr_fmt(fmt) "kvm-s390: " fmt #include <linux/compiler.h> +#include <linux/entry-virt.h> +#include <linux/export.h> #include <linux/err.h> #include <linux/fs.h> #include <linux/hrtimer.h> @@ -23,6 +24,7 @@ #include <linux/mman.h> #include <linux/module.h> #include <linux/moduleparam.h> +#include <linux/cpufeature.h> #include <linux/random.h> #include <linux/slab.h> #include <linux/timer.h> @@ -33,19 +35,22 @@ #include <linux/pgtable.h> #include <linux/mmu_notifier.h> +#include <asm/access-regs.h> #include <asm/asm-offsets.h> #include <asm/lowcore.h> +#include <asm/machine.h> #include <asm/stp.h> #include <asm/gmap.h> +#include <asm/gmap_helpers.h> #include <asm/nmi.h> -#include <asm/switch_to.h> #include <asm/isc.h> #include <asm/sclp.h> #include <asm/cpacf.h> #include <asm/timex.h> +#include <asm/asm.h> +#include <asm/fpu.h> #include <asm/ap.h> #include <asm/uv.h> -#include <asm/fpu/api.h> #include "kvm-s390.h" #include "gaccess.h" #include "pci.h" @@ -66,7 +71,14 @@ const struct _kvm_stats_desc kvm_vm_stats_desc[] = { STATS_DESC_COUNTER(VM, inject_pfault_done), STATS_DESC_COUNTER(VM, inject_service_signal), STATS_DESC_COUNTER(VM, inject_virtio), - STATS_DESC_COUNTER(VM, aen_forward) + STATS_DESC_COUNTER(VM, aen_forward), + STATS_DESC_COUNTER(VM, gmap_shadow_reuse), + STATS_DESC_COUNTER(VM, gmap_shadow_create), + STATS_DESC_COUNTER(VM, gmap_shadow_r1_entry), + STATS_DESC_COUNTER(VM, gmap_shadow_r2_entry), + STATS_DESC_COUNTER(VM, gmap_shadow_r3_entry), + STATS_DESC_COUNTER(VM, gmap_shadow_sg_entry), + STATS_DESC_COUNTER(VM, gmap_shadow_pg_entry), }; const struct kvm_stats_header kvm_vm_stats_header = { @@ -125,6 +137,7 @@ const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = { STATS_DESC_COUNTER(VCPU, instruction_io_other), STATS_DESC_COUNTER(VCPU, instruction_lpsw), STATS_DESC_COUNTER(VCPU, instruction_lpswe), + STATS_DESC_COUNTER(VCPU, instruction_lpswey), STATS_DESC_COUNTER(VCPU, instruction_pfmf), STATS_DESC_COUNTER(VCPU, instruction_ptff), STATS_DESC_COUNTER(VCPU, instruction_sck), @@ -172,7 +185,8 @@ const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = { STATS_DESC_COUNTER(VCPU, instruction_diagnose_308), STATS_DESC_COUNTER(VCPU, instruction_diagnose_500), STATS_DESC_COUNTER(VCPU, instruction_diagnose_other), - STATS_DESC_COUNTER(VCPU, pfault_sync) + STATS_DESC_COUNTER(VCPU, pfault_sync), + STATS_DESC_COUNTER(VCPU, signal_exits) }; const struct kvm_stats_header kvm_vcpu_stats_header = { @@ -259,7 +273,6 @@ debug_info_t *kvm_s390_dbf_uv; /* forward declarations */ static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start, unsigned long end); -static int sca_switch_to_extended(struct kvm *kvm); static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta) { @@ -332,28 +345,46 @@ static inline int plo_test_bit(unsigned char nr) " lgr 0,%[function]\n" /* Parameter registers are ignored for "test bit" */ " plo 0,0,0,0(0)\n" - " ipm %0\n" - " srl %0,28\n" - : "=d" (cc) + CC_IPM(cc) + : CC_OUT(cc, cc) : [function] "d" (function) + : CC_CLOBBER_LIST("0")); + return CC_TRANSFORM(cc) == 0; +} + +static __always_inline void pfcr_query(u8 (*query)[16]) +{ + asm volatile( + " lghi 0,0\n" + " .insn rsy,0xeb0000000016,0,0,%[query]" + : [query] "=QS" (*query) + : : "cc", "0"); - return cc == 0; } -static __always_inline void __insn32_query(unsigned int opcode, u8 *query) +static __always_inline void __sortl_query(u8 (*query)[32]) { asm volatile( " lghi 0,0\n" - " lgr 1,%[query]\n" + " la 1,%[query]\n" /* Parameter registers are ignored */ - " .insn rrf,%[opc] << 16,2,4,6,0\n" + " .insn rre,0xb9380000,2,4" + : [query] "=R" (*query) : - : [query] "d" ((unsigned long)query), [opc] "i" (opcode) - : "cc", "memory", "0", "1"); + : "cc", "0", "1"); } -#define INSN_SORTL 0xb938 -#define INSN_DFLTCC 0xb939 +static __always_inline void __dfltcc_query(u8 (*query)[32]) +{ + asm volatile( + " lghi 0,0\n" + " la 1,%[query]\n" + /* Parameter registers are ignored */ + " .insn rrf,0xb9390000,2,4,6,0" + : [query] "=R" (*query) + : + : "cc", "0", "1"); +} static void __init kvm_s390_cpu_feat_init(void) { @@ -407,18 +438,21 @@ static void __init kvm_s390_cpu_feat_init(void) kvm_s390_available_subfunc.kdsa); if (test_facility(150)) /* SORTL */ - __insn32_query(INSN_SORTL, kvm_s390_available_subfunc.sortl); + __sortl_query(&kvm_s390_available_subfunc.sortl); if (test_facility(151)) /* DFLTCC */ - __insn32_query(INSN_DFLTCC, kvm_s390_available_subfunc.dfltcc); + __dfltcc_query(&kvm_s390_available_subfunc.dfltcc); + + if (test_facility(201)) /* PFCR */ + pfcr_query(&kvm_s390_available_subfunc.pfcr); - if (MACHINE_HAS_ESOP) + if (machine_has_esop()) allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP); /* * We need SIE support, ESOP (PROT_READ protection for gmap_shadow), * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing). */ - if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao || + if (!sclp.has_sief2 || !machine_has_esop() || !sclp.has_64bscao || !test_facility(3) || !nested) return; allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2); @@ -556,7 +590,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_ENABLE_CAP: case KVM_CAP_S390_CSS_SUPPORT: case KVM_CAP_IOEVENTFD: - case KVM_CAP_DEVICE_CTRL: case KVM_CAP_S390_IRQCHIP: case KVM_CAP_VM_ATTRIBUTES: case KVM_CAP_MP_STATE: @@ -574,6 +607,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_SET_GUEST_DEBUG: case KVM_CAP_S390_DIAG318: case KVM_CAP_IRQFD_RESAMPLE: + case KVM_CAP_S390_USER_OPEREXEC: r = 1; break; case KVM_CAP_SET_GUEST_DEBUG2: @@ -581,7 +615,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) break; case KVM_CAP_S390_HPAGE_1M: r = 0; - if (hpage && !kvm_is_ucontrol(kvm)) + if (hpage && !(kvm && kvm_is_ucontrol(kvm))) r = 1; break; case KVM_CAP_S390_MEM_OP: @@ -599,19 +633,21 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_NR_VCPUS: case KVM_CAP_MAX_VCPUS: case KVM_CAP_MAX_VCPU_ID: - r = KVM_S390_BSCA_CPU_SLOTS; + /* + * Return the same value for KVM_CAP_MAX_VCPUS and + * KVM_CAP_MAX_VCPU_ID to conform with the KVM API. + */ + r = KVM_S390_ESCA_CPU_SLOTS; if (!kvm_s390_use_sca_entries()) r = KVM_MAX_VCPUS; - else if (sclp.has_esca && sclp.has_64bscao) - r = KVM_S390_ESCA_CPU_SLOTS; if (ext == KVM_CAP_NR_VCPUS) r = min_t(unsigned int, num_online_cpus(), r); break; case KVM_CAP_S390_COW: - r = MACHINE_HAS_ESOP; + r = machine_has_esop(); break; case KVM_CAP_S390_VECTOR_REGISTERS: - r = MACHINE_HAS_VX; + r = test_facility(129); break; case KVM_CAP_S390_RI: r = test_facility(64); @@ -760,7 +796,7 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) mutex_lock(&kvm->lock); if (kvm->created_vcpus) { r = -EBUSY; - } else if (MACHINE_HAS_VX) { + } else if (cpu_has_vx()) { set_kvm_facility(kvm->arch.model.fac_mask, 129); set_kvm_facility(kvm->arch.model.fac_list, 129); if (test_facility(134)) { @@ -783,6 +819,14 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) set_kvm_facility(kvm->arch.model.fac_mask, 192); set_kvm_facility(kvm->arch.model.fac_list, 192); } + if (test_facility(198)) { + set_kvm_facility(kvm->arch.model.fac_mask, 198); + set_kvm_facility(kvm->arch.model.fac_list, 198); + } + if (test_facility(199)) { + set_kvm_facility(kvm->arch.model.fac_mask, 199); + set_kvm_facility(kvm->arch.model.fac_list, 199); + } r = 0; } else r = -EINVAL; @@ -879,6 +923,12 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) VM_EVENT(kvm, 3, "ENABLE: CAP_S390_CPU_TOPOLOGY %s", r ? "(not available)" : "(success)"); break; + case KVM_CAP_S390_USER_OPEREXEC: + VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_OPEREXEC"); + kvm->arch.user_operexec = 1; + icpt_operexc_on_all_vcpus(kvm); + r = 0; + break; default: r = -EINVAL; break; @@ -982,7 +1032,7 @@ static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *att } mutex_unlock(&kvm->lock); VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit); - VM_EVENT(kvm, 3, "New guest asce: 0x%pK", + VM_EVENT(kvm, 3, "New guest asce: 0x%p", (void *) kvm->arch.gmap->asce); break; } @@ -1527,6 +1577,42 @@ static int kvm_s390_set_processor_subfunc(struct kvm *kvm, ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1], ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2], ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]); + VM_EVENT(kvm, 3, "GET: guest PFCR subfunc 0x%16.16lx.%16.16lx", + ((unsigned long *) &kvm_s390_available_subfunc.pfcr)[0], + ((unsigned long *) &kvm_s390_available_subfunc.pfcr)[1]); + + return 0; +} + +#define KVM_S390_VM_CPU_UV_FEAT_GUEST_MASK \ +( \ + ((struct kvm_s390_vm_cpu_uv_feat){ \ + .ap = 1, \ + .ap_intr = 1, \ + }) \ + .feat \ +) + +static int kvm_s390_set_uv_feat(struct kvm *kvm, struct kvm_device_attr *attr) +{ + struct kvm_s390_vm_cpu_uv_feat __user *ptr = (void __user *)attr->addr; + unsigned long data, filter; + + filter = uv_info.uv_feature_indications & KVM_S390_VM_CPU_UV_FEAT_GUEST_MASK; + if (get_user(data, &ptr->feat)) + return -EFAULT; + if (!bitmap_subset(&data, &filter, KVM_S390_VM_CPU_UV_FEAT_NR_BITS)) + return -EINVAL; + + mutex_lock(&kvm->lock); + if (kvm->created_vcpus) { + mutex_unlock(&kvm->lock); + return -EBUSY; + } + kvm->arch.model.uv_feat_guest.feat = data; + mutex_unlock(&kvm->lock); + + VM_EVENT(kvm, 3, "SET: guest UV-feat: 0x%16.16lx", data); return 0; } @@ -1545,6 +1631,9 @@ static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr) case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC: ret = kvm_s390_set_processor_subfunc(kvm, attr); break; + case KVM_S390_VM_CPU_PROCESSOR_UV_FEAT_GUEST: + ret = kvm_s390_set_uv_feat(kvm, attr); + break; } return ret; } @@ -1705,6 +1794,9 @@ static int kvm_s390_get_processor_subfunc(struct kvm *kvm, ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1], ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2], ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]); + VM_EVENT(kvm, 3, "GET: guest PFCR subfunc 0x%16.16lx.%16.16lx", + ((unsigned long *) &kvm_s390_available_subfunc.pfcr)[0], + ((unsigned long *) &kvm_s390_available_subfunc.pfcr)[1]); return 0; } @@ -1773,6 +1865,36 @@ static int kvm_s390_get_machine_subfunc(struct kvm *kvm, ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[1], ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[2], ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[3]); + VM_EVENT(kvm, 3, "GET: host PFCR subfunc 0x%16.16lx.%16.16lx", + ((unsigned long *) &kvm_s390_available_subfunc.pfcr)[0], + ((unsigned long *) &kvm_s390_available_subfunc.pfcr)[1]); + + return 0; +} + +static int kvm_s390_get_processor_uv_feat(struct kvm *kvm, struct kvm_device_attr *attr) +{ + struct kvm_s390_vm_cpu_uv_feat __user *dst = (void __user *)attr->addr; + unsigned long feat = kvm->arch.model.uv_feat_guest.feat; + + if (put_user(feat, &dst->feat)) + return -EFAULT; + VM_EVENT(kvm, 3, "GET: guest UV-feat: 0x%16.16lx", feat); + + return 0; +} + +static int kvm_s390_get_machine_uv_feat(struct kvm *kvm, struct kvm_device_attr *attr) +{ + struct kvm_s390_vm_cpu_uv_feat __user *dst = (void __user *)attr->addr; + unsigned long feat; + + BUILD_BUG_ON(sizeof(*dst) != sizeof(uv_info.uv_feature_indications)); + + feat = uv_info.uv_feature_indications & KVM_S390_VM_CPU_UV_FEAT_GUEST_MASK; + if (put_user(feat, &dst->feat)) + return -EFAULT; + VM_EVENT(kvm, 3, "GET: guest UV-feat: 0x%16.16lx", feat); return 0; } @@ -1800,6 +1922,12 @@ static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr) case KVM_S390_VM_CPU_MACHINE_SUBFUNC: ret = kvm_s390_get_machine_subfunc(kvm, attr); break; + case KVM_S390_VM_CPU_PROCESSOR_UV_FEAT_GUEST: + ret = kvm_s390_get_processor_uv_feat(kvm, attr); + break; + case KVM_S390_VM_CPU_MACHINE_UV_FEAT_GUEST: + ret = kvm_s390_get_machine_uv_feat(kvm, attr); + break; } return ret; } @@ -1812,22 +1940,18 @@ static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr) * Updates the Multiprocessor Topology-Change-Report bit to signal * the guest with a topology change. * This is only relevant if the topology facility is present. - * - * The SCA version, bsca or esca, doesn't matter as offset is the same. */ static void kvm_s390_update_topology_change_report(struct kvm *kvm, bool val) { union sca_utility new, old; - struct bsca_block *sca; + struct esca_block *sca; - read_lock(&kvm->arch.sca_lock); sca = kvm->arch.sca; + old = READ_ONCE(sca->utility); do { - old = READ_ONCE(sca->utility); new = old; new.mtcr = val; - } while (cmpxchg(&sca->utility.val, old.val, new.val) != old.val); - read_unlock(&kvm->arch.sca_lock); + } while (!try_cmpxchg(&sca->utility.val, &old.val, new.val)); } static int kvm_s390_set_topo_change_indication(struct kvm *kvm, @@ -1848,9 +1972,7 @@ static int kvm_s390_get_topo_change_indication(struct kvm *kvm, if (!test_kvm_facility(kvm, 11)) return -ENXIO; - read_lock(&kvm->arch.sca_lock); - topo = ((struct bsca_block *)kvm->arch.sca)->utility.mtcr; - read_unlock(&kvm->arch.sca_lock); + topo = kvm->arch.sca->utility.mtcr; return put_user(topo, (u8 __user *)attr->addr); } @@ -1952,6 +2074,8 @@ static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr) case KVM_S390_VM_CPU_MACHINE_FEAT: case KVM_S390_VM_CPU_MACHINE_SUBFUNC: case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC: + case KVM_S390_VM_CPU_MACHINE_UV_FEAT_GUEST: + case KVM_S390_VM_CPU_PROCESSOR_UV_FEAT_GUEST: ret = 0; break; default: @@ -2156,6 +2280,10 @@ static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots, ms = container_of(mnode, struct kvm_memory_slot, gfn_node[slots->node_idx]); ofs = 0; } + + if (cur_gfn < ms->base_gfn) + ofs = 0; + ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs); while (ofs >= ms->npages && (mnode = rb_next(mnode))) { ms = container_of(mnode, struct kvm_memory_slot, gfn_node[slots->node_idx]); @@ -2402,7 +2530,7 @@ static int kvm_s390_cpus_to_pv(struct kvm *kvm, u16 *rc, u16 *rrc) struct kvm_vcpu *vcpu; /* Disable the GISA if the ultravisor does not support AIV. */ - if (!test_bit_inv(BIT_UV_FEAT_AIV, &uv_info.uv_feature_indications)) + if (!uv_has_feature(BIT_UV_FEAT_AIV)) kvm_s390_gisa_disable(kvm); kvm_for_each_vcpu(i, vcpu, kvm) { @@ -2542,17 +2670,9 @@ static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd) if (kvm_s390_pv_is_protected(kvm)) break; - /* - * FMT 4 SIE needs esca. As we never switch back to bsca from - * esca, we need no cleanup in the error cases below - */ - r = sca_switch_to_extended(kvm); - if (r) - break; - - mmap_write_lock(current->mm); - r = gmap_mark_unmergeable(); - mmap_write_unlock(current->mm); + mmap_write_lock(kvm->mm); + r = gmap_helper_disable_cow_sharing(); + mmap_write_unlock(kvm->mm); if (r) break; @@ -2797,7 +2917,7 @@ static int kvm_s390_vm_mem_op_abs(struct kvm *kvm, struct kvm_s390_mem_op *mop) srcu_idx = srcu_read_lock(&kvm->srcu); - if (kvm_is_error_gpa(kvm, mop->gaddr)) { + if (!kvm_is_gpa_in_memslot(kvm, mop->gaddr)) { r = PGM_ADDRESSING; goto out_unlock; } @@ -2859,7 +2979,7 @@ static int kvm_s390_vm_mem_op_cmpxchg(struct kvm *kvm, struct kvm_s390_mem_op *m srcu_idx = srcu_read_lock(&kvm->srcu); - if (kvm_is_error_gpa(kvm, mop->gaddr)) { + if (!kvm_is_gpa_in_memslot(kvm, mop->gaddr)) { r = PGM_ADDRESSING; goto out_unlock; } @@ -2917,14 +3037,9 @@ int kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) break; } case KVM_CREATE_IRQCHIP: { - struct kvm_irq_routing_entry routing; - r = -EINVAL; - if (kvm->arch.use_irqchip) { - /* Set up dummy routing. */ - memset(&routing, 0, sizeof(routing)); - r = kvm_set_irq_routing(kvm, &routing, 0, 0); - } + if (kvm->arch.use_irqchip) + r = 0; break; } case KVM_SET_DEVICE_ATTR: { @@ -3072,7 +3187,7 @@ static int kvm_s390_apxa_installed(void) */ static void kvm_s390_set_crycb_format(struct kvm *kvm) { - kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb; + kvm->arch.crypto.crycbd = virt_to_phys(kvm->arch.crypto.crycb); /* Clear the CRYCB format bits - i.e., set format 0 by default */ kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK); @@ -3197,10 +3312,7 @@ static void kvm_s390_crypto_init(struct kvm *kvm) static void sca_dispose(struct kvm *kvm) { - if (kvm->arch.use_esca) - free_pages_exact(kvm->arch.sca, sizeof(struct esca_block)); - else - free_page((unsigned long)(kvm->arch.sca)); + free_pages_exact(kvm->arch.sca, sizeof(*kvm->arch.sca)); kvm->arch.sca = NULL; } @@ -3214,10 +3326,9 @@ void kvm_arch_free_vm(struct kvm *kvm) int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) { - gfp_t alloc_flags = GFP_KERNEL_ACCOUNT; - int i, rc; + gfp_t alloc_flags = GFP_KERNEL_ACCOUNT | __GFP_ZERO; char debug_name[16]; - static unsigned long sca_offset; + int i, rc; rc = -EINVAL; #ifdef CONFIG_KVM_S390_UCONTROL @@ -3238,20 +3349,14 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) if (!sclp.has_64bscao) alloc_flags |= GFP_DMA; - rwlock_init(&kvm->arch.sca_lock); - /* start with basic SCA */ - kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags); - if (!kvm->arch.sca) - goto out_err; mutex_lock(&kvm_lock); - sca_offset += 16; - if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE) - sca_offset = 0; - kvm->arch.sca = (struct bsca_block *) - ((char *) kvm->arch.sca + sca_offset); + + kvm->arch.sca = alloc_pages_exact(sizeof(*kvm->arch.sca), alloc_flags); mutex_unlock(&kvm_lock); + if (!kvm->arch.sca) + goto out_err; - sprintf(debug_name, "kvm-%u", current->pid); + snprintf(debug_name, sizeof(debug_name), "kvm-%u", current->pid); kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long)); if (!kvm->arch.dbf) @@ -3281,7 +3386,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) /* we emulate STHYI in kvm */ set_kvm_facility(kvm->arch.model.fac_mask, 74); set_kvm_facility(kvm->arch.model.fac_list, 74); - if (MACHINE_HAS_TLB_GUEST) { + if (machine_has_tlb_guest()) { set_kvm_facility(kvm->arch.model.fac_mask, 147); set_kvm_facility(kvm->arch.model.fac_list, 147); } @@ -3292,6 +3397,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid(); kvm->arch.model.ibc = sclp.ibc & 0x0fff; + kvm->arch.model.uv_feat_guest.feat = 0; + kvm_s390_crypto_init(kvm); if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM)) { @@ -3312,8 +3419,20 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) VM_EVENT(kvm, 3, "vm created with type %lu", type); if (type & KVM_VM_S390_UCONTROL) { + struct kvm_userspace_memory_region2 fake_memslot = { + .slot = KVM_S390_UCONTROL_MEMSLOT, + .guest_phys_addr = 0, + .userspace_addr = 0, + .memory_size = ALIGN_DOWN(TASK_SIZE, _SEGMENT_SIZE), + .flags = 0, + }; + kvm->arch.gmap = NULL; kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT; + /* one flat fake memslot covering the whole address-space */ + mutex_lock(&kvm->slots_lock); + KVM_BUG_ON(kvm_set_internal_memslot(kvm, &fake_memslot), kvm); + mutex_unlock(&kvm->slots_lock); } else { if (sclp.hamax == U64_MAX) kvm->arch.mem_limit = TASK_SIZE_MAX; @@ -3335,7 +3454,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) kvm_s390_gisa_init(kvm); INIT_LIST_HEAD(&kvm->arch.pv.need_cleanup); kvm->arch.pv.set_aside = NULL; - KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid); + KVM_EVENT(3, "vm 0x%p created by pid %u", kvm, current->pid); return 0; out_err: @@ -3398,7 +3517,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm) kvm_s390_destroy_adapters(kvm); kvm_s390_clear_float_irqs(kvm); kvm_s390_vsie_destroy(kvm); - KVM_EVENT(3, "vm 0x%pK destroyed", kvm); + KVM_EVENT(3, "vm 0x%p destroyed", kvm); } /* Section: vcpu related */ @@ -3414,133 +3533,38 @@ static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu) static void sca_del_vcpu(struct kvm_vcpu *vcpu) { + struct esca_block *sca = vcpu->kvm->arch.sca; + if (!kvm_s390_use_sca_entries()) return; - read_lock(&vcpu->kvm->arch.sca_lock); - if (vcpu->kvm->arch.use_esca) { - struct esca_block *sca = vcpu->kvm->arch.sca; - - clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn); - sca->cpu[vcpu->vcpu_id].sda = 0; - } else { - struct bsca_block *sca = vcpu->kvm->arch.sca; - clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn); - sca->cpu[vcpu->vcpu_id].sda = 0; - } - read_unlock(&vcpu->kvm->arch.sca_lock); + clear_bit_inv(vcpu->vcpu_id, (unsigned long *)sca->mcn); + sca->cpu[vcpu->vcpu_id].sda = 0; } static void sca_add_vcpu(struct kvm_vcpu *vcpu) { - if (!kvm_s390_use_sca_entries()) { - phys_addr_t sca_phys = virt_to_phys(vcpu->kvm->arch.sca); - - /* we still need the basic sca for the ipte control */ - vcpu->arch.sie_block->scaoh = sca_phys >> 32; - vcpu->arch.sie_block->scaol = sca_phys; - return; - } - read_lock(&vcpu->kvm->arch.sca_lock); - if (vcpu->kvm->arch.use_esca) { - struct esca_block *sca = vcpu->kvm->arch.sca; - phys_addr_t sca_phys = virt_to_phys(sca); - - sca->cpu[vcpu->vcpu_id].sda = virt_to_phys(vcpu->arch.sie_block); - vcpu->arch.sie_block->scaoh = sca_phys >> 32; - vcpu->arch.sie_block->scaol = sca_phys & ESCA_SCAOL_MASK; - vcpu->arch.sie_block->ecb2 |= ECB2_ESCA; - set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn); - } else { - struct bsca_block *sca = vcpu->kvm->arch.sca; - phys_addr_t sca_phys = virt_to_phys(sca); - - sca->cpu[vcpu->vcpu_id].sda = virt_to_phys(vcpu->arch.sie_block); - vcpu->arch.sie_block->scaoh = sca_phys >> 32; - vcpu->arch.sie_block->scaol = sca_phys; - set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn); - } - read_unlock(&vcpu->kvm->arch.sca_lock); -} - -/* Basic SCA to Extended SCA data copy routines */ -static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s) -{ - d->sda = s->sda; - d->sigp_ctrl.c = s->sigp_ctrl.c; - d->sigp_ctrl.scn = s->sigp_ctrl.scn; -} - -static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s) -{ - int i; - - d->ipte_control = s->ipte_control; - d->mcn[0] = s->mcn; - for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++) - sca_copy_entry(&d->cpu[i], &s->cpu[i]); -} - -static int sca_switch_to_extended(struct kvm *kvm) -{ - struct bsca_block *old_sca = kvm->arch.sca; - struct esca_block *new_sca; - struct kvm_vcpu *vcpu; - unsigned long vcpu_idx; - u32 scaol, scaoh; - phys_addr_t new_sca_phys; - - if (kvm->arch.use_esca) - return 0; - - new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL_ACCOUNT | __GFP_ZERO); - if (!new_sca) - return -ENOMEM; - - new_sca_phys = virt_to_phys(new_sca); - scaoh = new_sca_phys >> 32; - scaol = new_sca_phys & ESCA_SCAOL_MASK; - - kvm_s390_vcpu_block_all(kvm); - write_lock(&kvm->arch.sca_lock); + struct esca_block *sca = vcpu->kvm->arch.sca; + phys_addr_t sca_phys = virt_to_phys(sca); - sca_copy_b_to_e(new_sca, old_sca); + /* we still need the sca header for the ipte control */ + vcpu->arch.sie_block->scaoh = sca_phys >> 32; + vcpu->arch.sie_block->scaol = sca_phys & ESCA_SCAOL_MASK; + vcpu->arch.sie_block->ecb2 |= ECB2_ESCA; - kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) { - vcpu->arch.sie_block->scaoh = scaoh; - vcpu->arch.sie_block->scaol = scaol; - vcpu->arch.sie_block->ecb2 |= ECB2_ESCA; - } - kvm->arch.sca = new_sca; - kvm->arch.use_esca = 1; - - write_unlock(&kvm->arch.sca_lock); - kvm_s390_vcpu_unblock_all(kvm); - - free_page((unsigned long)old_sca); + if (!kvm_s390_use_sca_entries()) + return; - VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)", - old_sca, kvm->arch.sca); - return 0; + set_bit_inv(vcpu->vcpu_id, (unsigned long *)sca->mcn); + sca->cpu[vcpu->vcpu_id].sda = virt_to_phys(vcpu->arch.sie_block); } static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id) { - int rc; - - if (!kvm_s390_use_sca_entries()) { - if (id < KVM_MAX_VCPUS) - return true; - return false; - } - if (id < KVM_S390_BSCA_CPU_SLOTS) - return true; - if (!sclp.has_esca || !sclp.has_64bscao) - return false; - - rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm); + if (!kvm_s390_use_sca_entries()) + return id < KVM_MAX_VCPUS; - return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS; + return id < KVM_S390_ESCA_CPU_SLOTS; } /* needs disabled preemption to protect from TOD sync and vcpu_load/put */ @@ -3633,7 +3657,6 @@ __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu) void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { - gmap_enable(vcpu->arch.enabled_gmap); kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING); if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu)) __start_cpu_timer_accounting(vcpu); @@ -3646,8 +3669,6 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu)) __stop_cpu_timer_accounting(vcpu); kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING); - vcpu->arch.enabled_gmap = gmap_get_enabled(); - gmap_disable(vcpu->arch.enabled_gmap); } @@ -3665,8 +3686,6 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) } if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0) vcpu->arch.sie_block->ictl |= ICTL_OPEREXC; - /* make vcpu_load load the right gmap on the first trigger */ - vcpu->arch.enabled_gmap = vcpu->arch.gmap; } static bool kvm_has_pckmo_subfunc(struct kvm *kvm, unsigned long nr) @@ -3688,6 +3707,13 @@ static bool kvm_has_pckmo_ecc(struct kvm *kvm) } +static bool kvm_has_pckmo_hmac(struct kvm *kvm) +{ + /* At least one HMAC subfunction must be present */ + return kvm_has_pckmo_subfunc(kvm, 118) || + kvm_has_pckmo_subfunc(kvm, 122); +} + static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu) { /* @@ -3700,7 +3726,7 @@ static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu) vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd; vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA); vcpu->arch.sie_block->eca &= ~ECA_APIE; - vcpu->arch.sie_block->ecd &= ~ECD_ECC; + vcpu->arch.sie_block->ecd &= ~(ECD_ECC | ECD_HMAC); if (vcpu->kvm->arch.crypto.apie) vcpu->arch.sie_block->eca |= ECA_APIE; @@ -3708,9 +3734,11 @@ static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu) /* Set up protected key support */ if (vcpu->kvm->arch.crypto.aes_kw) { vcpu->arch.sie_block->ecb3 |= ECB3_AES; - /* ecc is also wrapped with AES key */ + /* ecc/hmac is also wrapped with AES key */ if (kvm_has_pckmo_ecc(vcpu->kvm)) vcpu->arch.sie_block->ecd |= ECD_ECC; + if (kvm_has_pckmo_hmac(vcpu->kvm)) + vcpu->arch.sie_block->ecd |= ECD_HMAC; } if (vcpu->kvm->arch.crypto.dea_kw) @@ -3759,8 +3787,8 @@ static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu) kvm_s390_vcpu_setup_model(vcpu); - /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */ - if (MACHINE_HAS_ESOP) + /* pgste_set_pte has special handling for !machine_has_esop() */ + if (machine_has_esop()) vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT; if (test_kvm_facility(vcpu->kvm, 9)) vcpu->arch.sie_block->ecb |= ECB_SRSI; @@ -3782,7 +3810,7 @@ static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu) vcpu->arch.sie_block->eca |= ECA_IB; if (sclp.has_siif) vcpu->arch.sie_block->eca |= ECA_SII; - if (sclp.has_sigpif) + if (kvm_s390_use_sca_entries()) vcpu->arch.sie_block->eca |= ECA_SIGPI; if (test_kvm_facility(vcpu->kvm, 129)) { vcpu->arch.sie_block->eca |= ECA_VX; @@ -3810,8 +3838,8 @@ static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu) if (rc) return rc; } - hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); - vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup; + hrtimer_setup(&vcpu->arch.ckc_timer, kvm_s390_idle_wakeup, CLOCK_MONOTONIC, + HRTIMER_MODE_REL); vcpu->arch.sie_block->hpid = HPID_KVM; @@ -3868,6 +3896,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) KVM_SYNC_ARCH0 | KVM_SYNC_PFAULT | KVM_SYNC_DIAG318; + vcpu->arch.acrs_loaded = false; kvm_s390_set_prefix(vcpu, 0); if (test_kvm_facility(vcpu->kvm, 64)) vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB; @@ -3878,9 +3907,9 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) if (test_kvm_facility(vcpu->kvm, 156)) vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN; /* fprs can be synchronized via vrs, even if the guest has no vx. With - * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format. + * cpu_has_vx(), (load|store)_fpu_regs() will work with vrs format. */ - if (MACHINE_HAS_VX) + if (cpu_has_vx()) vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS; else vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS; @@ -3891,7 +3920,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) goto out_free_sie_block; } - VM_EVENT(vcpu->kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", + VM_EVENT(vcpu->kvm, 3, "create cpu %d at 0x%p, sie block at 0x%p", vcpu->vcpu_id, vcpu, vcpu->arch.sie_block); trace_kvm_s390_create_vcpu(vcpu->vcpu_id, vcpu, vcpu->arch.sie_block); @@ -3976,6 +4005,8 @@ static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start, unsigned long prefix; unsigned long i; + trace_kvm_s390_gmap_notifier(start, end, gmap_is_shadow(gmap)); + if (gmap_is_shadow(gmap)) return; if (start >= 1UL << 31) @@ -3995,7 +4026,7 @@ static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start, bool kvm_arch_no_poll(struct kvm_vcpu *vcpu) { /* do not poll with more than halt_poll_max_steal percent of steal time */ - if (S390_lowcore.avg_steal_timer * 100 / (TICK_USEC << 12) >= + if (get_lowcore()->avg_steal_timer * 100 / (TICK_USEC << 12) >= READ_ONCE(halt_poll_max_steal)) { vcpu->stat.halt_no_poll_steal++; return true; @@ -4157,7 +4188,7 @@ static void kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu) vcpu->run->s.regs.fpc = 0; /* * Do not reset these registers in the protected case, as some of - * them are overlayed and they are not accessible in this case + * them are overlaid and they are not accessible in this case * anyway. */ if (!kvm_s390_pv_cpu_is_protected(vcpu)) { @@ -4226,33 +4257,24 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) { - int ret = 0; - vcpu_load(vcpu); - if (test_fp_ctl(fpu->fpc)) { - ret = -EINVAL; - goto out; - } vcpu->run->s.regs.fpc = fpu->fpc; - if (MACHINE_HAS_VX) + if (cpu_has_vx()) convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs, (freg_t *) fpu->fprs); else memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs)); -out: vcpu_put(vcpu); - return ret; + return 0; } int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) { vcpu_load(vcpu); - /* make sure we have the latest values */ - save_fpu_regs(); - if (MACHINE_HAS_VX) + if (cpu_has_vx()) convert_vx_to_fp((freg_t *) fpu->fprs, (__vector128 *) vcpu->run->s.regs.vrs); else @@ -4382,6 +4404,75 @@ static bool ibs_enabled(struct kvm_vcpu *vcpu) return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS); } +static int __kvm_s390_fixup_fault_sync(struct gmap *gmap, gpa_t gaddr, unsigned int flags) +{ + struct kvm *kvm = gmap->private; + gfn_t gfn = gpa_to_gfn(gaddr); + bool unlocked; + hva_t vmaddr; + gpa_t tmp; + int rc; + + if (kvm_is_ucontrol(kvm)) { + tmp = __gmap_translate(gmap, gaddr); + gfn = gpa_to_gfn(tmp); + } + + vmaddr = gfn_to_hva(kvm, gfn); + rc = fixup_user_fault(gmap->mm, vmaddr, FAULT_FLAG_WRITE, &unlocked); + if (!rc) + rc = __gmap_link(gmap, gaddr, vmaddr); + return rc; +} + +/** + * __kvm_s390_mprotect_many() - Apply specified protection to guest pages + * @gmap: the gmap of the guest + * @gpa: the starting guest address + * @npages: how many pages to protect + * @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE + * @bits: pgste notification bits to set + * + * Returns: 0 in case of success, < 0 in case of error - see gmap_protect_one() + * + * Context: kvm->srcu and gmap->mm need to be held in read mode + */ +int __kvm_s390_mprotect_many(struct gmap *gmap, gpa_t gpa, u8 npages, unsigned int prot, + unsigned long bits) +{ + unsigned int fault_flag = (prot & PROT_WRITE) ? FAULT_FLAG_WRITE : 0; + gpa_t end = gpa + npages * PAGE_SIZE; + int rc; + + for (; gpa < end; gpa = ALIGN(gpa + 1, rc)) { + rc = gmap_protect_one(gmap, gpa, prot, bits); + if (rc == -EAGAIN) { + __kvm_s390_fixup_fault_sync(gmap, gpa, fault_flag); + rc = gmap_protect_one(gmap, gpa, prot, bits); + } + if (rc < 0) + return rc; + } + + return 0; +} + +static int kvm_s390_mprotect_notify_prefix(struct kvm_vcpu *vcpu) +{ + gpa_t gaddr = kvm_s390_get_prefix(vcpu); + int idx, rc; + + idx = srcu_read_lock(&vcpu->kvm->srcu); + mmap_read_lock(vcpu->arch.gmap->mm); + + rc = __kvm_s390_mprotect_many(vcpu->arch.gmap, gaddr, 2, PROT_WRITE, GMAP_NOTIFY_MPROT); + + mmap_read_unlock(vcpu->arch.gmap->mm); + srcu_read_unlock(&vcpu->kvm->srcu, idx); + + return rc; +} + static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu) { retry: @@ -4397,9 +4488,8 @@ retry: */ if (kvm_check_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu)) { int rc; - rc = gmap_mprotect_notify(vcpu->arch.gmap, - kvm_s390_get_prefix(vcpu), - PAGE_SIZE * 2, PROT_WRITE); + + rc = kvm_s390_mprotect_notify_prefix(vcpu); if (rc) { kvm_make_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu); return rc; @@ -4497,22 +4587,6 @@ int kvm_s390_try_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clo return 1; } -/** - * kvm_arch_fault_in_page - fault-in guest page if necessary - * @vcpu: The corresponding virtual cpu - * @gpa: Guest physical address - * @writable: Whether the page should be writable or not - * - * Make sure that a guest page has been faulted-in on the host. - * - * Return: Zero on success, negative error code otherwise. - */ -long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable) -{ - return gmap_fault(vcpu->arch.gmap, gpa, - writable ? FAULT_FLAG_WRITE : 0); -} - static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token, unsigned long token) { @@ -4580,12 +4654,11 @@ static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu) if (!vcpu->arch.gmap->pfault_enabled) return false; - hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr)); - hva += current->thread.gmap_addr & ~PAGE_MASK; + hva = gfn_to_hva(vcpu->kvm, current->thread.gmap_teid.addr); if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8)) return false; - return kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch); + return kvm_setup_async_pf(vcpu, current->thread.gmap_teid.addr * PAGE_SIZE, hva, &arch); } static int vcpu_pre_run(struct kvm_vcpu *vcpu) @@ -4602,12 +4675,9 @@ static int vcpu_pre_run(struct kvm_vcpu *vcpu) vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14]; vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15]; - if (need_resched()) - schedule(); - if (!kvm_is_ucontrol(vcpu->kvm)) { rc = kvm_s390_deliver_pending_interrupts(vcpu); - if (rc) + if (rc || guestdbg_exit_pending(vcpu)) return rc; } @@ -4623,6 +4693,7 @@ static int vcpu_pre_run(struct kvm_vcpu *vcpu) clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask); vcpu->arch.sie_block->icptcode = 0; + current->thread.gmap_int_code = 0; cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags); VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags); trace_kvm_s390_sie_enter(vcpu, cpuflags); @@ -4630,7 +4701,7 @@ static int vcpu_pre_run(struct kvm_vcpu *vcpu) return 0; } -static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu) +static int vcpu_post_run_addressing_exception(struct kvm_vcpu *vcpu) { struct kvm_s390_pgm_info pgm_info = { .code = PGM_ADDRESSING, @@ -4666,10 +4737,182 @@ static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu) return kvm_s390_inject_prog_irq(vcpu, &pgm_info); } +static void kvm_s390_assert_primary_as(struct kvm_vcpu *vcpu) +{ + KVM_BUG(current->thread.gmap_teid.as != PSW_BITS_AS_PRIMARY, vcpu->kvm, + "Unexpected program interrupt 0x%x, TEID 0x%016lx", + current->thread.gmap_int_code, current->thread.gmap_teid.val); +} + +/* + * __kvm_s390_handle_dat_fault() - handle a dat fault for the gmap of a vcpu + * @vcpu: the vCPU whose gmap is to be fixed up + * @gfn: the guest frame number used for memslots (including fake memslots) + * @gaddr: the gmap address, does not have to match @gfn for ucontrol gmaps + * @foll: FOLL_* flags + * + * Return: 0 on success, < 0 in case of error. + * Context: The mm lock must not be held before calling. May sleep. + */ +int __kvm_s390_handle_dat_fault(struct kvm_vcpu *vcpu, gfn_t gfn, gpa_t gaddr, unsigned int foll) +{ + struct kvm_memory_slot *slot; + unsigned int fault_flags; + bool writable, unlocked; + unsigned long vmaddr; + struct page *page; + kvm_pfn_t pfn; + int rc; + + slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn); + if (!slot || slot->flags & KVM_MEMSLOT_INVALID) + return vcpu_post_run_addressing_exception(vcpu); + + fault_flags = foll & FOLL_WRITE ? FAULT_FLAG_WRITE : 0; + if (vcpu->arch.gmap->pfault_enabled) + foll |= FOLL_NOWAIT; + vmaddr = __gfn_to_hva_memslot(slot, gfn); + +try_again: + pfn = __kvm_faultin_pfn(slot, gfn, foll, &writable, &page); + + /* Access outside memory, inject addressing exception */ + if (is_noslot_pfn(pfn)) + return vcpu_post_run_addressing_exception(vcpu); + /* Signal pending: try again */ + if (pfn == KVM_PFN_ERR_SIGPENDING) + return -EAGAIN; + + /* Needs I/O, try to setup async pfault (only possible with FOLL_NOWAIT) */ + if (pfn == KVM_PFN_ERR_NEEDS_IO) { + trace_kvm_s390_major_guest_pfault(vcpu); + if (kvm_arch_setup_async_pf(vcpu)) + return 0; + vcpu->stat.pfault_sync++; + /* Could not setup async pfault, try again synchronously */ + foll &= ~FOLL_NOWAIT; + goto try_again; + } + /* Any other error */ + if (is_error_pfn(pfn)) + return -EFAULT; + + /* Success */ + mmap_read_lock(vcpu->arch.gmap->mm); + /* Mark the userspace PTEs as young and/or dirty, to avoid page fault loops */ + rc = fixup_user_fault(vcpu->arch.gmap->mm, vmaddr, fault_flags, &unlocked); + if (!rc) + rc = __gmap_link(vcpu->arch.gmap, gaddr, vmaddr); + scoped_guard(spinlock, &vcpu->kvm->mmu_lock) { + kvm_release_faultin_page(vcpu->kvm, page, false, writable); + } + mmap_read_unlock(vcpu->arch.gmap->mm); + return rc; +} + +static int vcpu_dat_fault_handler(struct kvm_vcpu *vcpu, unsigned long gaddr, unsigned int foll) +{ + unsigned long gaddr_tmp; + gfn_t gfn; + + gfn = gpa_to_gfn(gaddr); + if (kvm_is_ucontrol(vcpu->kvm)) { + /* + * This translates the per-vCPU guest address into a + * fake guest address, which can then be used with the + * fake memslots that are identity mapping userspace. + * This allows ucontrol VMs to use the normal fault + * resolution path, like normal VMs. + */ + mmap_read_lock(vcpu->arch.gmap->mm); + gaddr_tmp = __gmap_translate(vcpu->arch.gmap, gaddr); + mmap_read_unlock(vcpu->arch.gmap->mm); + if (gaddr_tmp == -EFAULT) { + vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL; + vcpu->run->s390_ucontrol.trans_exc_code = gaddr; + vcpu->run->s390_ucontrol.pgm_code = PGM_SEGMENT_TRANSLATION; + return -EREMOTE; + } + gfn = gpa_to_gfn(gaddr_tmp); + } + return __kvm_s390_handle_dat_fault(vcpu, gfn, gaddr, foll); +} + +static int vcpu_post_run_handle_fault(struct kvm_vcpu *vcpu) +{ + unsigned int foll = 0; + unsigned long gaddr; + int rc; + + gaddr = current->thread.gmap_teid.addr * PAGE_SIZE; + if (kvm_s390_cur_gmap_fault_is_write()) + foll = FOLL_WRITE; + + switch (current->thread.gmap_int_code & PGM_INT_CODE_MASK) { + case 0: + vcpu->stat.exit_null++; + break; + case PGM_SECURE_STORAGE_ACCESS: + case PGM_SECURE_STORAGE_VIOLATION: + kvm_s390_assert_primary_as(vcpu); + /* + * This can happen after a reboot with asynchronous teardown; + * the new guest (normal or protected) will run on top of the + * previous protected guest. The old pages need to be destroyed + * so the new guest can use them. + */ + if (kvm_s390_pv_destroy_page(vcpu->kvm, gaddr)) { + /* + * Either KVM messed up the secure guest mapping or the + * same page is mapped into multiple secure guests. + * + * This exception is only triggered when a guest 2 is + * running and can therefore never occur in kernel + * context. + */ + pr_warn_ratelimited("Secure storage violation (%x) in task: %s, pid %d\n", + current->thread.gmap_int_code, current->comm, + current->pid); + send_sig(SIGSEGV, current, 0); + } + break; + case PGM_NON_SECURE_STORAGE_ACCESS: + kvm_s390_assert_primary_as(vcpu); + /* + * This is normal operation; a page belonging to a protected + * guest has not been imported yet. Try to import the page into + * the protected guest. + */ + rc = kvm_s390_pv_convert_to_secure(vcpu->kvm, gaddr); + if (rc == -EINVAL) + send_sig(SIGSEGV, current, 0); + if (rc != -ENXIO) + break; + foll = FOLL_WRITE; + fallthrough; + case PGM_PROTECTION: + case PGM_SEGMENT_TRANSLATION: + case PGM_PAGE_TRANSLATION: + case PGM_ASCE_TYPE: + case PGM_REGION_FIRST_TRANS: + case PGM_REGION_SECOND_TRANS: + case PGM_REGION_THIRD_TRANS: + kvm_s390_assert_primary_as(vcpu); + return vcpu_dat_fault_handler(vcpu, gaddr, foll); + default: + KVM_BUG(1, vcpu->kvm, "Unexpected program interrupt 0x%x, TEID 0x%016lx", + current->thread.gmap_int_code, current->thread.gmap_teid.val); + send_sig(SIGSEGV, current, 0); + break; + } + return 0; +} + static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason) { struct mcck_volatile_info *mcck_info; struct sie_page *sie_page; + int rc; VCPU_EVENT(vcpu, 6, "exit sie icptcode %d", vcpu->arch.sie_block->icptcode); @@ -4691,7 +4934,7 @@ static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason) } if (vcpu->arch.sie_block->icptcode > 0) { - int rc = kvm_handle_sie_intercept(vcpu); + rc = kvm_handle_sie_intercept(vcpu); if (rc != -EOPNOTSUPP) return rc; @@ -4700,24 +4943,28 @@ static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason) vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa; vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb; return -EREMOTE; - } else if (exit_reason != -EFAULT) { - vcpu->stat.exit_null++; - return 0; - } else if (kvm_is_ucontrol(vcpu->kvm)) { - vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL; - vcpu->run->s390_ucontrol.trans_exc_code = - current->thread.gmap_addr; - vcpu->run->s390_ucontrol.pgm_code = 0x10; - return -EREMOTE; - } else if (current->thread.gmap_pfault) { - trace_kvm_s390_major_guest_pfault(vcpu); - current->thread.gmap_pfault = 0; - if (kvm_arch_setup_async_pf(vcpu)) - return 0; - vcpu->stat.pfault_sync++; - return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1); } - return vcpu_post_run_fault_in_sie(vcpu); + + return vcpu_post_run_handle_fault(vcpu); +} + +int noinstr kvm_s390_enter_exit_sie(struct kvm_s390_sie_block *scb, + u64 *gprs, unsigned long gasce) +{ + int ret; + + guest_state_enter_irqoff(); + + /* + * The guest_state_{enter,exit}_irqoff() functions inform lockdep and + * tracing that entry to the guest will enable host IRQs, and exit from + * the guest will disable host IRQs. + */ + ret = sie64a(scb, gprs, gasce); + + guest_state_exit_irqoff(); + + return ret; } #define PSW_INT_MASK (PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_MCHECK) @@ -4732,29 +4979,45 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) */ kvm_vcpu_srcu_read_lock(vcpu); - do { + while (true) { rc = vcpu_pre_run(vcpu); - if (rc) + kvm_vcpu_srcu_read_unlock(vcpu); + if (rc || guestdbg_exit_pending(vcpu)) break; - kvm_vcpu_srcu_read_unlock(vcpu); /* * As PF_VCPU will be used in fault handler, between - * guest_enter and guest_exit should be no uaccess. + * guest_timing_enter_irqoff and guest_timing_exit_irqoff + * should be no uaccess. */ - local_irq_disable(); - guest_enter_irqoff(); - __disable_cpu_timer_accounting(vcpu); - local_irq_enable(); if (kvm_s390_pv_cpu_is_protected(vcpu)) { memcpy(sie_page->pv_grregs, vcpu->run->s.regs.gprs, sizeof(sie_page->pv_grregs)); } - if (test_cpu_flag(CIF_FPU)) - load_fpu_regs(); - exit_reason = sie64a(vcpu->arch.sie_block, - vcpu->run->s.regs.gprs); + +xfer_to_guest_mode_check: + local_irq_disable(); + xfer_to_guest_mode_prepare(); + if (xfer_to_guest_mode_work_pending()) { + local_irq_enable(); + rc = kvm_xfer_to_guest_mode_handle_work(vcpu); + if (rc) + break; + goto xfer_to_guest_mode_check; + } + + guest_timing_enter_irqoff(); + __disable_cpu_timer_accounting(vcpu); + + exit_reason = kvm_s390_enter_exit_sie(vcpu->arch.sie_block, + vcpu->run->s.regs.gprs, + vcpu->arch.gmap->asce); + + __enable_cpu_timer_accounting(vcpu); + guest_timing_exit_irqoff(); + local_irq_enable(); + if (kvm_s390_pv_cpu_is_protected(vcpu)) { memcpy(vcpu->run->s.regs.gprs, sie_page->pv_grregs, @@ -4770,16 +5033,15 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK; } } - local_irq_disable(); - __enable_cpu_timer_accounting(vcpu); - guest_exit_irqoff(); - local_irq_enable(); kvm_vcpu_srcu_read_lock(vcpu); rc = vcpu_post_run(vcpu, exit_reason); - } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc); + if (rc || guestdbg_exit_pending(vcpu)) { + kvm_vcpu_srcu_read_unlock(vcpu); + break; + } + } - kvm_vcpu_srcu_read_unlock(vcpu); return rc; } @@ -4839,9 +5101,9 @@ static void sync_regs_fmt2(struct kvm_vcpu *vcpu) vcpu->arch.sie_block->fpf &= ~FPF_BPBC; vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0; } - if (MACHINE_HAS_GS) { + if (cpu_has_gs()) { preempt_disable(); - __ctl_set_bit(2, 4); + local_ctl_set_bit(2, CR2_GUARDED_STORAGE_BIT); if (current->thread.gs_cb) { vcpu->arch.host_gscb = current->thread.gs_cb; save_gs_cb(vcpu->arch.host_gscb); @@ -4873,19 +5135,8 @@ static void sync_regs(struct kvm_vcpu *vcpu) } save_access_regs(vcpu->arch.host_acrs); restore_access_regs(vcpu->run->s.regs.acrs); - /* save host (userspace) fprs/vrs */ - save_fpu_regs(); - vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc; - vcpu->arch.host_fpregs.regs = current->thread.fpu.regs; - if (MACHINE_HAS_VX) - current->thread.fpu.regs = vcpu->run->s.regs.vrs; - else - current->thread.fpu.regs = vcpu->run->s.regs.fprs; - current->thread.fpu.fpc = vcpu->run->s.regs.fpc; - if (test_fp_ctl(current->thread.fpu.fpc)) - /* User space provided an invalid FPC, let's clear it */ - current->thread.fpu.fpc = 0; - + vcpu->arch.acrs_loaded = true; + kvm_s390_fpu_load(vcpu->run); /* Sync fmt2 only data */ if (likely(!kvm_s390_pv_cpu_is_protected(vcpu))) { sync_regs_fmt2(vcpu); @@ -4916,15 +5167,15 @@ static void store_regs_fmt2(struct kvm_vcpu *vcpu) kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea; kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC; kvm_run->s.regs.diag318 = vcpu->arch.diag318_info.val; - if (MACHINE_HAS_GS) { + if (cpu_has_gs()) { preempt_disable(); - __ctl_set_bit(2, 4); + local_ctl_set_bit(2, CR2_GUARDED_STORAGE_BIT); if (vcpu->arch.gs_enabled) save_gs_cb(current->thread.gs_cb); current->thread.gs_cb = vcpu->arch.host_gscb; restore_gs_cb(vcpu->arch.host_gscb); if (!vcpu->arch.host_gscb) - __ctl_clear_bit(2, 4); + local_ctl_clear_bit(2, CR2_GUARDED_STORAGE_BIT); vcpu->arch.host_gscb = NULL; preempt_enable(); } @@ -4946,12 +5197,8 @@ static void store_regs(struct kvm_vcpu *vcpu) kvm_run->s.regs.pfc = vcpu->arch.pfault_compare; save_access_regs(vcpu->run->s.regs.acrs); restore_access_regs(vcpu->arch.host_acrs); - /* Save guest register state */ - save_fpu_regs(); - vcpu->run->s.regs.fpc = current->thread.fpu.fpc; - /* Restore will be done lazily at return */ - current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc; - current->thread.fpu.regs = vcpu->arch.host_fpregs.regs; + vcpu->arch.acrs_loaded = false; + kvm_s390_fpu_store(vcpu->run); if (likely(!kvm_s390_pv_cpu_is_protected(vcpu))) store_regs_fmt2(vcpu); } @@ -4959,6 +5206,7 @@ static void store_regs(struct kvm_vcpu *vcpu) int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) { struct kvm_run *kvm_run = vcpu->run; + DECLARE_KERNEL_FPU_ONSTACK32(fpu); int rc; /* @@ -4970,7 +5218,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) if (vcpu->kvm->arch.pv.dumping) return -EINVAL; - if (kvm_run->immediate_exit) + if (!vcpu->wants_to_run) return -EINTR; if (kvm_run->kvm_valid_regs & ~KVM_SYNC_S390_VALID_FIELDS || @@ -5000,6 +5248,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) goto out; } + kernel_fpu_begin(&fpu, KERNEL_FPC | KERNEL_VXR); sync_regs(vcpu); enable_cpu_timer_accounting(vcpu); @@ -5008,6 +5257,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) if (signal_pending(current) && !rc) { kvm_run->exit_reason = KVM_EXIT_INTR; + vcpu->stat.signal_exits++; rc = -EINTR; } @@ -5023,6 +5273,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) disable_cpu_timer_accounting(vcpu); store_regs(vcpu); + kernel_fpu_end(&fpu, KERNEL_FPC | KERNEL_VXR); kvm_sigset_deactivate(vcpu); @@ -5059,7 +5310,7 @@ int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa) gpa -= __LC_FPREGS_SAVE_AREA; /* manually convert vector registers if necessary */ - if (MACHINE_HAS_VX) { + if (cpu_has_vx()) { convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs); rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA, fprs, 128); @@ -5097,8 +5348,7 @@ int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr) * switch in the run ioctl. Let's update our copies before we save * it into the save area */ - save_fpu_regs(); - vcpu->run->s.regs.fpc = current->thread.fpu.fpc; + kvm_s390_fpu_store(vcpu->run); save_access_regs(vcpu->run->s.regs.acrs); return kvm_s390_store_status_unloaded(vcpu, addr); @@ -5374,11 +5624,12 @@ static long kvm_s390_vcpu_memsida_op(struct kvm_vcpu *vcpu, return r; } -long kvm_arch_vcpu_async_ioctl(struct file *filp, - unsigned int ioctl, unsigned long arg) +long kvm_arch_vcpu_unlocked_ioctl(struct file *filp, unsigned int ioctl, + unsigned long arg) { struct kvm_vcpu *vcpu = filp->private_data; void __user *argp = (void __user *)arg; + int rc; switch (ioctl) { case KVM_S390_IRQ: { @@ -5386,7 +5637,8 @@ long kvm_arch_vcpu_async_ioctl(struct file *filp, if (copy_from_user(&s390irq, argp, sizeof(s390irq))) return -EFAULT; - return kvm_s390_inject_vcpu(vcpu, &s390irq); + rc = kvm_s390_inject_vcpu(vcpu, &s390irq); + break; } case KVM_S390_INTERRUPT: { struct kvm_s390_interrupt s390int; @@ -5396,10 +5648,25 @@ long kvm_arch_vcpu_async_ioctl(struct file *filp, return -EFAULT; if (s390int_to_s390irq(&s390int, &s390irq)) return -EINVAL; - return kvm_s390_inject_vcpu(vcpu, &s390irq); + rc = kvm_s390_inject_vcpu(vcpu, &s390irq); + break; } + default: + rc = -ENOIOCTLCMD; + break; } - return -ENOIOCTLCMD; + + /* + * To simplify single stepping of userspace-emulated instructions, + * KVM_EXIT_S390_SIEIC exit sets KVM_GUESTDBG_EXIT_PENDING (see + * should_handle_per_ifetch()). However, if userspace emulation injects + * an interrupt, it needs to be cleared, so that KVM_EXIT_DEBUG happens + * after (and not before) the interrupt delivery. + */ + if (!rc) + vcpu->guest_debug &= ~KVM_GUESTDBG_EXIT_PENDING; + + return rc; } static int kvm_s390_handle_pv_vcpu_dump(struct kvm_vcpu *vcpu, @@ -5553,7 +5820,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp, } #endif case KVM_S390_VCPU_FAULT: { - r = gmap_fault(vcpu->arch.gmap, arg, 0); + idx = srcu_read_lock(&vcpu->kvm->srcu); + r = vcpu_dat_fault_handler(vcpu, arg, 0); + srcu_read_unlock(&vcpu->kvm->srcu, idx); break; } case KVM_ENABLE_CAP: @@ -5669,6 +5938,9 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, { gpa_t size; + if (kvm_is_ucontrol(kvm) && new->id < KVM_USER_MEM_SLOTS) + return -EINVAL; + /* When we are protected, we should not change the memory slots */ if (kvm_s390_pv_get_handle(kvm)) return -EINVAL; @@ -5718,6 +5990,9 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, { int rc = 0; + if (kvm_is_ucontrol(kvm)) + return; + switch (change) { case KVM_MR_DELETE: rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE, |
