summaryrefslogtreecommitdiff
path: root/virt
diff options
context:
space:
mode:
Diffstat (limited to 'virt')
-rw-r--r--virt/kvm/async_pf.c36
-rw-r--r--virt/kvm/eventfd.c2
-rw-r--r--virt/kvm/kvm_main.c162
3 files changed, 119 insertions, 81 deletions
diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c
index 15e5b037f92d..a36828fbf40a 100644
--- a/virt/kvm/async_pf.c
+++ b/virt/kvm/async_pf.c
@@ -51,6 +51,7 @@ static void async_pf_execute(struct work_struct *work)
unsigned long addr = apf->addr;
gpa_t cr2_or_gpa = apf->cr2_or_gpa;
int locked = 1;
+ bool first;
might_sleep();
@@ -69,10 +70,14 @@ static void async_pf_execute(struct work_struct *work)
kvm_arch_async_page_present(vcpu, apf);
spin_lock(&vcpu->async_pf.lock);
+ first = list_empty(&vcpu->async_pf.done);
list_add_tail(&apf->link, &vcpu->async_pf.done);
apf->vcpu = NULL;
spin_unlock(&vcpu->async_pf.lock);
+ if (!IS_ENABLED(CONFIG_KVM_ASYNC_PF_SYNC) && first)
+ kvm_arch_async_page_present_queued(vcpu);
+
/*
* apf may be freed by kvm_check_async_pf_completion() after
* this point
@@ -80,8 +85,7 @@ static void async_pf_execute(struct work_struct *work)
trace_kvm_async_pf_completed(addr, cr2_or_gpa);
- if (swq_has_sleeper(&vcpu->wq))
- swake_up_one(&vcpu->wq);
+ rcuwait_wake_up(&vcpu->wait);
mmput(mm);
kvm_put_kvm(vcpu->kvm);
@@ -135,7 +139,7 @@ void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu)
struct kvm_async_pf *work;
while (!list_empty_careful(&vcpu->async_pf.done) &&
- kvm_arch_can_inject_async_page_present(vcpu)) {
+ kvm_arch_can_dequeue_async_page_present(vcpu)) {
spin_lock(&vcpu->async_pf.lock);
work = list_first_entry(&vcpu->async_pf.done, typeof(*work),
link);
@@ -160,7 +164,9 @@ int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
if (vcpu->async_pf.queued >= ASYNC_PF_PER_VCPU)
return 0;
- /* setup delayed work */
+ /* Arch specific code should not do async PF in this case */
+ if (unlikely(kvm_is_error_hva(hva)))
+ return 0;
/*
* do alloc nowait since if we are going to sleep anyway we
@@ -179,29 +185,21 @@ int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
mmget(work->mm);
kvm_get_kvm(work->vcpu->kvm);
- /* this can't really happen otherwise gfn_to_pfn_async
- would succeed */
- if (unlikely(kvm_is_error_hva(work->addr)))
- goto retry_sync;
-
INIT_WORK(&work->work, async_pf_execute);
- if (!schedule_work(&work->work))
- goto retry_sync;
list_add_tail(&work->queue, &vcpu->async_pf.queue);
vcpu->async_pf.queued++;
- kvm_arch_async_page_not_present(vcpu, work);
+ work->notpresent_injected = kvm_arch_async_page_not_present(vcpu, work);
+
+ schedule_work(&work->work);
+
return 1;
-retry_sync:
- kvm_put_kvm(work->vcpu->kvm);
- mmput(work->mm);
- kmem_cache_free(async_pf_cache, work);
- return 0;
}
int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu)
{
struct kvm_async_pf *work;
+ bool first;
if (!list_empty_careful(&vcpu->async_pf.done))
return 0;
@@ -214,9 +212,13 @@ int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu)
INIT_LIST_HEAD(&work->queue); /* for list_del to work */
spin_lock(&vcpu->async_pf.lock);
+ first = list_empty(&vcpu->async_pf.done);
list_add_tail(&work->link, &vcpu->async_pf.done);
spin_unlock(&vcpu->async_pf.lock);
+ if (!IS_ENABLED(CONFIG_KVM_ASYNC_PF_SYNC) && first)
+ kvm_arch_async_page_present_queued(vcpu);
+
vcpu->async_pf.queued++;
return 0;
}
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index e586d1395c28..ef7ed916ad4a 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -721,7 +721,7 @@ ioeventfd_in_range(struct _ioeventfd *p, gpa_t addr, int len, const void *val)
return false;
}
- return _val == p->datamatch ? true : false;
+ return _val == p->datamatch;
}
/* MMIO/PIO writes trigger an event if the addr/val match */
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index f57792b1541b..7b6013f2ba19 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -155,10 +155,9 @@ static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm);
static unsigned long long kvm_createvm_count;
static unsigned long long kvm_active_vms;
-__weak int kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
- unsigned long start, unsigned long end, bool blockable)
+__weak void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
+ unsigned long start, unsigned long end)
{
- return 0;
}
bool kvm_is_zone_device_pfn(kvm_pfn_t pfn)
@@ -259,6 +258,7 @@ static inline bool kvm_kick_many_cpus(const struct cpumask *cpus, bool wait)
}
bool kvm_make_vcpus_request_mask(struct kvm *kvm, unsigned int req,
+ struct kvm_vcpu *except,
unsigned long *vcpu_bitmap, cpumask_var_t tmp)
{
int i, cpu, me;
@@ -268,7 +268,8 @@ bool kvm_make_vcpus_request_mask(struct kvm *kvm, unsigned int req,
me = get_cpu();
kvm_for_each_vcpu(i, vcpu, kvm) {
- if (vcpu_bitmap && !test_bit(i, vcpu_bitmap))
+ if ((vcpu_bitmap && !test_bit(i, vcpu_bitmap)) ||
+ vcpu == except)
continue;
kvm_make_request(req, vcpu);
@@ -288,19 +289,25 @@ bool kvm_make_vcpus_request_mask(struct kvm *kvm, unsigned int req,
return called;
}
-bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req)
+bool kvm_make_all_cpus_request_except(struct kvm *kvm, unsigned int req,
+ struct kvm_vcpu *except)
{
cpumask_var_t cpus;
bool called;
zalloc_cpumask_var(&cpus, GFP_ATOMIC);
- called = kvm_make_vcpus_request_mask(kvm, req, NULL, cpus);
+ called = kvm_make_vcpus_request_mask(kvm, req, except, NULL, cpus);
free_cpumask_var(cpus);
return called;
}
+bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req)
+{
+ return kvm_make_all_cpus_request_except(kvm, req, NULL);
+}
+
#ifndef CONFIG_HAVE_KVM_ARCH_TLB_FLUSH_ALL
void kvm_flush_remote_tlbs(struct kvm *kvm)
{
@@ -341,7 +348,7 @@ static void kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
vcpu->kvm = kvm;
vcpu->vcpu_id = id;
vcpu->pid = NULL;
- init_swait_queue_head(&vcpu->wq);
+ rcuwait_init(&vcpu->wait);
kvm_async_pf_vcpu_init(vcpu);
vcpu->pre_pcpu = -1;
@@ -376,6 +383,18 @@ static inline struct kvm *mmu_notifier_to_kvm(struct mmu_notifier *mn)
return container_of(mn, struct kvm, mmu_notifier);
}
+static void kvm_mmu_notifier_invalidate_range(struct mmu_notifier *mn,
+ struct mm_struct *mm,
+ unsigned long start, unsigned long end)
+{
+ struct kvm *kvm = mmu_notifier_to_kvm(mn);
+ int idx;
+
+ idx = srcu_read_lock(&kvm->srcu);
+ kvm_arch_mmu_notifier_invalidate_range(kvm, start, end);
+ srcu_read_unlock(&kvm->srcu, idx);
+}
+
static void kvm_mmu_notifier_change_pte(struct mmu_notifier *mn,
struct mm_struct *mm,
unsigned long address,
@@ -400,7 +419,6 @@ static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
{
struct kvm *kvm = mmu_notifier_to_kvm(mn);
int need_tlb_flush = 0, idx;
- int ret;
idx = srcu_read_lock(&kvm->srcu);
spin_lock(&kvm->mmu_lock);
@@ -417,14 +435,9 @@ static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
kvm_flush_remote_tlbs(kvm);
spin_unlock(&kvm->mmu_lock);
-
- ret = kvm_arch_mmu_notifier_invalidate_range(kvm, range->start,
- range->end,
- mmu_notifier_range_blockable(range));
-
srcu_read_unlock(&kvm->srcu, idx);
- return ret;
+ return 0;
}
static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn,
@@ -530,6 +543,7 @@ static void kvm_mmu_notifier_release(struct mmu_notifier *mn,
}
static const struct mmu_notifier_ops kvm_mmu_notifier_ops = {
+ .invalidate_range = kvm_mmu_notifier_invalidate_range,
.invalidate_range_start = kvm_mmu_notifier_invalidate_range_start,
.invalidate_range_end = kvm_mmu_notifier_invalidate_range_end,
.clear_flush_young = kvm_mmu_notifier_clear_flush_young,
@@ -710,6 +724,8 @@ static struct kvm *kvm_create_vm(unsigned long type)
goto out_err_no_arch_destroy_vm;
}
+ kvm->max_halt_poll_ns = halt_poll_ns;
+
r = kvm_arch_init_vm(kvm, type);
if (r)
goto out_err_no_arch_destroy_vm;
@@ -1215,10 +1231,9 @@ int __kvm_set_memory_region(struct kvm *kvm,
if (mem->guest_phys_addr & (PAGE_SIZE - 1))
return -EINVAL;
/* We can read the guest memory with __xxx_user() later on. */
- if ((id < KVM_USER_MEM_SLOTS) &&
- ((mem->userspace_addr & (PAGE_SIZE - 1)) ||
+ if ((mem->userspace_addr & (PAGE_SIZE - 1)) ||
!access_ok((void __user *)(unsigned long)mem->userspace_addr,
- mem->memory_size)))
+ mem->memory_size))
return -EINVAL;
if (as_id >= KVM_ADDRESS_SPACE_NUM || id >= KVM_MEM_SLOTS_NUM)
return -EINVAL;
@@ -1602,16 +1617,13 @@ struct kvm_memory_slot *kvm_vcpu_gfn_to_memslot(struct kvm_vcpu *vcpu, gfn_t gfn
{
return __gfn_to_memslot(kvm_vcpu_memslots(vcpu), gfn);
}
+EXPORT_SYMBOL_GPL(kvm_vcpu_gfn_to_memslot);
bool kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn)
{
struct kvm_memory_slot *memslot = gfn_to_memslot(kvm, gfn);
- if (!memslot || memslot->id >= KVM_USER_MEM_SLOTS ||
- memslot->flags & KVM_MEMSLOT_INVALID)
- return false;
-
- return true;
+ return kvm_is_visible_memslot(memslot);
}
EXPORT_SYMBOL_GPL(kvm_is_visible_gfn);
@@ -2503,13 +2515,15 @@ int kvm_write_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
}
EXPORT_SYMBOL_GPL(kvm_write_guest_cached);
-int kvm_read_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
- void *data, unsigned long len)
+int kvm_read_guest_offset_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
+ void *data, unsigned int offset,
+ unsigned long len)
{
struct kvm_memslots *slots = kvm_memslots(kvm);
int r;
+ gpa_t gpa = ghc->gpa + offset;
- BUG_ON(len > ghc->len);
+ BUG_ON(len + offset > ghc->len);
if (slots->generation != ghc->generation) {
if (__kvm_gfn_to_hva_cache_init(slots, ghc, ghc->gpa, ghc->len))
@@ -2520,14 +2534,21 @@ int kvm_read_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
return -EFAULT;
if (unlikely(!ghc->memslot))
- return kvm_read_guest(kvm, ghc->gpa, data, len);
+ return kvm_read_guest(kvm, gpa, data, len);
- r = __copy_from_user(data, (void __user *)ghc->hva, len);
+ r = __copy_from_user(data, (void __user *)ghc->hva + offset, len);
if (r)
return -EFAULT;
return 0;
}
+EXPORT_SYMBOL_GPL(kvm_read_guest_offset_cached);
+
+int kvm_read_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
+ void *data, unsigned long len)
+{
+ return kvm_read_guest_offset_cached(kvm, ghc, data, 0, len);
+}
EXPORT_SYMBOL_GPL(kvm_read_guest_cached);
int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len)
@@ -2665,19 +2686,27 @@ out:
return ret;
}
+static inline void
+update_halt_poll_stats(struct kvm_vcpu *vcpu, u64 poll_ns, bool waited)
+{
+ if (waited)
+ vcpu->stat.halt_poll_fail_ns += poll_ns;
+ else
+ vcpu->stat.halt_poll_success_ns += poll_ns;
+}
+
/*
* The vCPU has executed a HLT instruction with in-kernel mode enabled.
*/
void kvm_vcpu_block(struct kvm_vcpu *vcpu)
{
- ktime_t start, cur;
- DECLARE_SWAITQUEUE(wait);
+ ktime_t start, cur, poll_end;
bool waited = false;
u64 block_ns;
kvm_arch_vcpu_blocking(vcpu);
- start = cur = ktime_get();
+ start = cur = poll_end = ktime_get();
if (vcpu->halt_poll_ns && !kvm_arch_no_poll(vcpu)) {
ktime_t stop = ktime_add_ns(ktime_get(), vcpu->halt_poll_ns);
@@ -2693,12 +2722,13 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
++vcpu->stat.halt_poll_invalid;
goto out;
}
- cur = ktime_get();
+ poll_end = cur = ktime_get();
} while (single_task_running() && ktime_before(cur, stop));
}
+ prepare_to_rcuwait(&vcpu->wait);
for (;;) {
- prepare_to_swait_exclusive(&vcpu->wq, &wait, TASK_INTERRUPTIBLE);
+ set_current_state(TASK_INTERRUPTIBLE);
if (kvm_vcpu_check_block(vcpu) < 0)
break;
@@ -2706,25 +2736,28 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
waited = true;
schedule();
}
-
- finish_swait(&vcpu->wq, &wait);
+ finish_rcuwait(&vcpu->wait);
cur = ktime_get();
out:
kvm_arch_vcpu_unblocking(vcpu);
block_ns = ktime_to_ns(cur) - ktime_to_ns(start);
+ update_halt_poll_stats(
+ vcpu, ktime_to_ns(ktime_sub(poll_end, start)), waited);
+
if (!kvm_arch_no_poll(vcpu)) {
if (!vcpu_valid_wakeup(vcpu)) {
shrink_halt_poll_ns(vcpu);
- } else if (halt_poll_ns) {
+ } else if (vcpu->kvm->max_halt_poll_ns) {
if (block_ns <= vcpu->halt_poll_ns)
;
/* we had a long block, shrink polling */
- else if (vcpu->halt_poll_ns && block_ns > halt_poll_ns)
+ else if (vcpu->halt_poll_ns &&
+ block_ns > vcpu->kvm->max_halt_poll_ns)
shrink_halt_poll_ns(vcpu);
/* we had a short halt and our poll time is too small */
- else if (vcpu->halt_poll_ns < halt_poll_ns &&
- block_ns < halt_poll_ns)
+ else if (vcpu->halt_poll_ns < vcpu->kvm->max_halt_poll_ns &&
+ block_ns < vcpu->kvm->max_halt_poll_ns)
grow_halt_poll_ns(vcpu);
} else {
vcpu->halt_poll_ns = 0;
@@ -2738,11 +2771,10 @@ EXPORT_SYMBOL_GPL(kvm_vcpu_block);
bool kvm_vcpu_wake_up(struct kvm_vcpu *vcpu)
{
- struct swait_queue_head *wqp;
+ struct rcuwait *waitp;
- wqp = kvm_arch_vcpu_wq(vcpu);
- if (swq_has_sleeper(wqp)) {
- swake_up_one(wqp);
+ waitp = kvm_arch_vcpu_get_wait(vcpu);
+ if (rcuwait_wake_up(waitp)) {
WRITE_ONCE(vcpu->ready, true);
++vcpu->stat.halt_wakeup;
return true;
@@ -2884,7 +2916,8 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me, bool yield_to_kernel_mode)
continue;
if (vcpu == me)
continue;
- if (swait_active(&vcpu->wq) && !vcpu_dy_runnable(vcpu))
+ if (rcuwait_active(&vcpu->wait) &&
+ !vcpu_dy_runnable(vcpu))
continue;
if (READ_ONCE(vcpu->preempted) && yield_to_kernel_mode &&
!kvm_arch_vcpu_in_kernel(vcpu))
@@ -2946,7 +2979,6 @@ static int kvm_vcpu_release(struct inode *inode, struct file *filp)
{
struct kvm_vcpu *vcpu = filp->private_data;
- debugfs_remove_recursive(vcpu->debugfs_dentry);
kvm_put_kvm(vcpu->kvm);
return 0;
}
@@ -2973,16 +3005,17 @@ static int create_vcpu_fd(struct kvm_vcpu *vcpu)
static void kvm_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
{
#ifdef __KVM_HAVE_ARCH_VCPU_DEBUGFS
+ struct dentry *debugfs_dentry;
char dir_name[ITOA_MAX_LEN * 2];
if (!debugfs_initialized())
return;
snprintf(dir_name, sizeof(dir_name), "vcpu%d", vcpu->vcpu_id);
- vcpu->debugfs_dentry = debugfs_create_dir(dir_name,
- vcpu->kvm->debugfs_dentry);
+ debugfs_dentry = debugfs_create_dir(dir_name,
+ vcpu->kvm->debugfs_dentry);
- kvm_arch_create_vcpu_debugfs(vcpu);
+ kvm_arch_create_vcpu_debugfs(vcpu, debugfs_dentry);
#endif
}
@@ -3031,8 +3064,6 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id)
if (r)
goto vcpu_free_run_page;
- kvm_create_vcpu_debugfs(vcpu);
-
mutex_lock(&kvm->lock);
if (kvm_get_vcpu_by_id(kvm, id)) {
r = -EEXIST;
@@ -3061,11 +3092,11 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id)
mutex_unlock(&kvm->lock);
kvm_arch_vcpu_postcreate(vcpu);
+ kvm_create_vcpu_debugfs(vcpu);
return r;
unlock_vcpu_destroy:
mutex_unlock(&kvm->lock);
- debugfs_remove_recursive(vcpu->debugfs_dentry);
kvm_arch_vcpu_destroy(vcpu);
vcpu_free_run_page:
free_page((unsigned long)vcpu->run);
@@ -3135,7 +3166,7 @@ static long kvm_vcpu_ioctl(struct file *filp,
synchronize_rcu();
put_pid(oldpid);
}
- r = kvm_arch_vcpu_ioctl_run(vcpu, vcpu->run);
+ r = kvm_arch_vcpu_ioctl_run(vcpu);
trace_kvm_userspace_exit(vcpu->run->exit_reason, r);
break;
}
@@ -3160,7 +3191,6 @@ out_free1:
case KVM_SET_REGS: {
struct kvm_regs *kvm_regs;
- r = -ENOMEM;
kvm_regs = memdup_user(argp, sizeof(*kvm_regs));
if (IS_ERR(kvm_regs)) {
r = PTR_ERR(kvm_regs);
@@ -3516,6 +3546,7 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
case KVM_CAP_IOEVENTFD_ANY_LENGTH:
case KVM_CAP_CHECK_EXTENSION_VM:
case KVM_CAP_ENABLE_CAP_VM:
+ case KVM_CAP_HALT_POLL:
return 1;
#ifdef CONFIG_KVM_MMIO
case KVM_CAP_COALESCED_MMIO:
@@ -3566,6 +3597,13 @@ static int kvm_vm_ioctl_enable_cap_generic(struct kvm *kvm,
return 0;
}
#endif
+ case KVM_CAP_HALT_POLL: {
+ if (cap->flags || cap->args[0] != (unsigned int)cap->args[0])
+ return -EINVAL;
+
+ kvm->max_halt_poll_ns = cap->args[0];
+ return 0;
+ }
default:
return kvm_vm_ioctl_enable_cap(kvm, cap);
}
@@ -3714,21 +3752,18 @@ static long kvm_vm_ioctl(struct file *filp,
if (routing.flags)
goto out;
if (routing.nr) {
- r = -ENOMEM;
- entries = vmalloc(array_size(sizeof(*entries),
- routing.nr));
- if (!entries)
- goto out;
- r = -EFAULT;
urouting = argp;
- if (copy_from_user(entries, urouting->entries,
- routing.nr * sizeof(*entries)))
- goto out_free_irq_routing;
+ entries = vmemdup_user(urouting->entries,
+ array_size(sizeof(*entries),
+ routing.nr));
+ if (IS_ERR(entries)) {
+ r = PTR_ERR(entries);
+ goto out;
+ }
}
r = kvm_set_irq_routing(kvm, entries, routing.nr,
routing.flags);
-out_free_irq_routing:
- vfree(entries);
+ kvfree(entries);
break;
}
#endif /* CONFIG_HAVE_KVM_IRQ_ROUTING */
@@ -4639,6 +4674,7 @@ struct kvm_vcpu *kvm_get_running_vcpu(void)
return vcpu;
}
+EXPORT_SYMBOL_GPL(kvm_get_running_vcpu);
/**
* kvm_get_running_vcpus - get the per-CPU array of currently running vcpus.