diff options
Diffstat (limited to 'virt/kvm/kvm_main.c')
-rw-r--r-- | virt/kvm/kvm_main.c | 106 |
1 files changed, 93 insertions, 13 deletions
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 1192942aef91..d0788d0a72cc 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1,9 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Kernel-based Virtual Machine driver for Linux - * - * This module enables machines with Intel VT-x extensions to run virtual - * machines without emulation or binary translation. + * Kernel-based Virtual Machine (KVM) Hypervisor * * Copyright (C) 2006 Qumranet, Inc. * Copyright 2010 Red Hat, Inc. and/or its affiliates. @@ -74,6 +71,7 @@ #define ITOA_MAX_LEN 12 MODULE_AUTHOR("Qumranet"); +MODULE_DESCRIPTION("Kernel-based Virtual Machine (KVM) Hypervisor"); MODULE_LICENSE("GPL"); /* Architectures should define their poll value according to the halt latency */ @@ -91,8 +89,8 @@ unsigned int halt_poll_ns_grow_start = 10000; /* 10us */ module_param(halt_poll_ns_grow_start, uint, 0644); EXPORT_SYMBOL_GPL(halt_poll_ns_grow_start); -/* Default resets per-vcpu halt_poll_ns . */ -unsigned int halt_poll_ns_shrink; +/* Default halves per-vcpu halt_poll_ns. */ +unsigned int halt_poll_ns_shrink = 2; module_param(halt_poll_ns_shrink, uint, 0644); EXPORT_SYMBOL_GPL(halt_poll_ns_shrink); @@ -110,8 +108,7 @@ static struct kmem_cache *kvm_vcpu_cache; static __read_mostly struct preempt_ops kvm_preempt_ops; static DEFINE_PER_CPU(struct kvm_vcpu *, kvm_running_vcpu); -struct dentry *kvm_debugfs_dir; -EXPORT_SYMBOL_GPL(kvm_debugfs_dir); +static struct dentry *kvm_debugfs_dir; static const struct file_operations stat_fops_per_vm; @@ -1145,8 +1142,7 @@ static struct kvm *kvm_create_vm(unsigned long type, const char *fdname) { struct kvm *kvm = kvm_arch_alloc_vm(); struct kvm_memslots *slots; - int r = -ENOMEM; - int i, j; + int r, i, j; if (!kvm) return ERR_PTR(-ENOMEM); @@ -1183,12 +1179,18 @@ static struct kvm *kvm_create_vm(unsigned long type, const char *fdname) snprintf(kvm->stats_id, sizeof(kvm->stats_id), "kvm-%d", task_pid_nr(current)); + r = -ENOMEM; if (init_srcu_struct(&kvm->srcu)) goto out_err_no_srcu; if (init_srcu_struct(&kvm->irq_srcu)) goto out_err_no_irq_srcu; + r = kvm_init_irq_routing(kvm); + if (r) + goto out_err_no_irq_routing; + refcount_set(&kvm->users_count, 1); + for (i = 0; i < kvm_arch_nr_memslot_as_ids(kvm); i++) { for (j = 0; j < 2; j++) { slots = &kvm->__memslots[i][j]; @@ -1206,6 +1208,7 @@ static struct kvm *kvm_create_vm(unsigned long type, const char *fdname) rcu_assign_pointer(kvm->memslots[i], &kvm->__memslots[i][0]); } + r = -ENOMEM; for (i = 0; i < KVM_NR_BUSES; i++) { rcu_assign_pointer(kvm->buses[i], kzalloc(sizeof(struct kvm_io_bus), GFP_KERNEL_ACCOUNT)); @@ -1267,6 +1270,8 @@ out_err_no_arch_destroy_vm: WARN_ON_ONCE(!refcount_dec_and_test(&kvm->users_count)); for (i = 0; i < KVM_NR_BUSES; i++) kfree(kvm_get_bus(kvm, i)); + kvm_free_irq_routing(kvm); +out_err_no_irq_routing: cleanup_srcu_struct(&kvm->irq_srcu); out_err_no_irq_srcu: cleanup_srcu_struct(&kvm->srcu); @@ -4202,12 +4207,21 @@ static void kvm_create_vcpu_debugfs(struct kvm_vcpu *vcpu) /* * Creates some virtual cpus. Good luck creating more than one. */ -static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id) +static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, unsigned long id) { int r; struct kvm_vcpu *vcpu; struct page *page; + /* + * KVM tracks vCPU IDs as 'int', be kind to userspace and reject + * too-large values instead of silently truncating. + * + * Ensure KVM_MAX_VCPU_IDS isn't pushed above INT_MAX without first + * changing the storage type (at the very least, IDs should be tracked + * as unsigned ints). + */ + BUILD_BUG_ON(KVM_MAX_VCPU_IDS > INT_MAX); if (id >= KVM_MAX_VCPU_IDS) return -EINVAL; @@ -4375,6 +4389,52 @@ static int kvm_vcpu_ioctl_get_stats_fd(struct kvm_vcpu *vcpu) return fd; } +#ifdef CONFIG_KVM_GENERIC_PRE_FAULT_MEMORY +static int kvm_vcpu_pre_fault_memory(struct kvm_vcpu *vcpu, + struct kvm_pre_fault_memory *range) +{ + int idx; + long r; + u64 full_size; + + if (range->flags) + return -EINVAL; + + if (!PAGE_ALIGNED(range->gpa) || + !PAGE_ALIGNED(range->size) || + range->gpa + range->size <= range->gpa) + return -EINVAL; + + vcpu_load(vcpu); + idx = srcu_read_lock(&vcpu->kvm->srcu); + + full_size = range->size; + do { + if (signal_pending(current)) { + r = -EINTR; + break; + } + + r = kvm_arch_vcpu_pre_fault_memory(vcpu, range); + if (WARN_ON_ONCE(r == 0 || r == -EIO)) + break; + + if (r < 0) + break; + + range->size -= r; + range->gpa += r; + cond_resched(); + } while (range->size); + + srcu_read_unlock(&vcpu->kvm->srcu, idx); + vcpu_put(vcpu); + + /* Return success if at least one page was mapped successfully. */ + return full_size == range->size ? r : 0; +} +#endif + static long kvm_vcpu_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { @@ -4421,7 +4481,10 @@ static long kvm_vcpu_ioctl(struct file *filp, synchronize_rcu(); put_pid(oldpid); } + vcpu->wants_to_run = !READ_ONCE(vcpu->run->immediate_exit__unsafe); r = kvm_arch_vcpu_ioctl_run(vcpu); + vcpu->wants_to_run = false; + trace_kvm_userspace_exit(vcpu->run->exit_reason, r); break; } @@ -4575,6 +4638,20 @@ out_free1: r = kvm_vcpu_ioctl_get_stats_fd(vcpu); break; } +#ifdef CONFIG_KVM_GENERIC_PRE_FAULT_MEMORY + case KVM_PRE_FAULT_MEMORY: { + struct kvm_pre_fault_memory range; + + r = -EFAULT; + if (copy_from_user(&range, argp, sizeof(range))) + break; + r = kvm_vcpu_pre_fault_memory(vcpu, &range); + /* Pass back leftover range. */ + if (copy_to_user(argp, &range, sizeof(range))) + r = -EFAULT; + break; + } +#endif default: r = kvm_arch_vcpu_ioctl(filp, ioctl, arg); } @@ -6287,8 +6364,9 @@ static void kvm_sched_in(struct preempt_notifier *pn, int cpu) WRITE_ONCE(vcpu->ready, false); __this_cpu_write(kvm_running_vcpu, vcpu); - kvm_arch_sched_in(vcpu, cpu); kvm_arch_vcpu_load(vcpu, cpu); + + WRITE_ONCE(vcpu->scheduled_out, false); } static void kvm_sched_out(struct preempt_notifier *pn, @@ -6296,7 +6374,9 @@ static void kvm_sched_out(struct preempt_notifier *pn, { struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn); - if (current->on_rq) { + WRITE_ONCE(vcpu->scheduled_out, true); + + if (current->on_rq && vcpu->wants_to_run) { WRITE_ONCE(vcpu->preempted, true); WRITE_ONCE(vcpu->ready, true); } |