diff options
Diffstat (limited to 'virt')
-rw-r--r-- | virt/kvm/async_pf.c | 36 | ||||
-rw-r--r-- | virt/kvm/eventfd.c | 2 | ||||
-rw-r--r-- | virt/kvm/kvm_main.c | 162 |
3 files changed, 119 insertions, 81 deletions
diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c index 15e5b037f92d..a36828fbf40a 100644 --- a/virt/kvm/async_pf.c +++ b/virt/kvm/async_pf.c @@ -51,6 +51,7 @@ static void async_pf_execute(struct work_struct *work) unsigned long addr = apf->addr; gpa_t cr2_or_gpa = apf->cr2_or_gpa; int locked = 1; + bool first; might_sleep(); @@ -69,10 +70,14 @@ static void async_pf_execute(struct work_struct *work) kvm_arch_async_page_present(vcpu, apf); spin_lock(&vcpu->async_pf.lock); + first = list_empty(&vcpu->async_pf.done); list_add_tail(&apf->link, &vcpu->async_pf.done); apf->vcpu = NULL; spin_unlock(&vcpu->async_pf.lock); + if (!IS_ENABLED(CONFIG_KVM_ASYNC_PF_SYNC) && first) + kvm_arch_async_page_present_queued(vcpu); + /* * apf may be freed by kvm_check_async_pf_completion() after * this point @@ -80,8 +85,7 @@ static void async_pf_execute(struct work_struct *work) trace_kvm_async_pf_completed(addr, cr2_or_gpa); - if (swq_has_sleeper(&vcpu->wq)) - swake_up_one(&vcpu->wq); + rcuwait_wake_up(&vcpu->wait); mmput(mm); kvm_put_kvm(vcpu->kvm); @@ -135,7 +139,7 @@ void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu) struct kvm_async_pf *work; while (!list_empty_careful(&vcpu->async_pf.done) && - kvm_arch_can_inject_async_page_present(vcpu)) { + kvm_arch_can_dequeue_async_page_present(vcpu)) { spin_lock(&vcpu->async_pf.lock); work = list_first_entry(&vcpu->async_pf.done, typeof(*work), link); @@ -160,7 +164,9 @@ int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, if (vcpu->async_pf.queued >= ASYNC_PF_PER_VCPU) return 0; - /* setup delayed work */ + /* Arch specific code should not do async PF in this case */ + if (unlikely(kvm_is_error_hva(hva))) + return 0; /* * do alloc nowait since if we are going to sleep anyway we @@ -179,29 +185,21 @@ int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, mmget(work->mm); kvm_get_kvm(work->vcpu->kvm); - /* this can't really happen otherwise gfn_to_pfn_async - would succeed */ - if (unlikely(kvm_is_error_hva(work->addr))) - goto retry_sync; - INIT_WORK(&work->work, async_pf_execute); - if (!schedule_work(&work->work)) - goto retry_sync; list_add_tail(&work->queue, &vcpu->async_pf.queue); vcpu->async_pf.queued++; - kvm_arch_async_page_not_present(vcpu, work); + work->notpresent_injected = kvm_arch_async_page_not_present(vcpu, work); + + schedule_work(&work->work); + return 1; -retry_sync: - kvm_put_kvm(work->vcpu->kvm); - mmput(work->mm); - kmem_cache_free(async_pf_cache, work); - return 0; } int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu) { struct kvm_async_pf *work; + bool first; if (!list_empty_careful(&vcpu->async_pf.done)) return 0; @@ -214,9 +212,13 @@ int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu) INIT_LIST_HEAD(&work->queue); /* for list_del to work */ spin_lock(&vcpu->async_pf.lock); + first = list_empty(&vcpu->async_pf.done); list_add_tail(&work->link, &vcpu->async_pf.done); spin_unlock(&vcpu->async_pf.lock); + if (!IS_ENABLED(CONFIG_KVM_ASYNC_PF_SYNC) && first) + kvm_arch_async_page_present_queued(vcpu); + vcpu->async_pf.queued++; return 0; } diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index e586d1395c28..ef7ed916ad4a 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -721,7 +721,7 @@ ioeventfd_in_range(struct _ioeventfd *p, gpa_t addr, int len, const void *val) return false; } - return _val == p->datamatch ? true : false; + return _val == p->datamatch; } /* MMIO/PIO writes trigger an event if the addr/val match */ diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index f57792b1541b..7b6013f2ba19 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -155,10 +155,9 @@ static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm); static unsigned long long kvm_createvm_count; static unsigned long long kvm_active_vms; -__weak int kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm, - unsigned long start, unsigned long end, bool blockable) +__weak void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm, + unsigned long start, unsigned long end) { - return 0; } bool kvm_is_zone_device_pfn(kvm_pfn_t pfn) @@ -259,6 +258,7 @@ static inline bool kvm_kick_many_cpus(const struct cpumask *cpus, bool wait) } bool kvm_make_vcpus_request_mask(struct kvm *kvm, unsigned int req, + struct kvm_vcpu *except, unsigned long *vcpu_bitmap, cpumask_var_t tmp) { int i, cpu, me; @@ -268,7 +268,8 @@ bool kvm_make_vcpus_request_mask(struct kvm *kvm, unsigned int req, me = get_cpu(); kvm_for_each_vcpu(i, vcpu, kvm) { - if (vcpu_bitmap && !test_bit(i, vcpu_bitmap)) + if ((vcpu_bitmap && !test_bit(i, vcpu_bitmap)) || + vcpu == except) continue; kvm_make_request(req, vcpu); @@ -288,19 +289,25 @@ bool kvm_make_vcpus_request_mask(struct kvm *kvm, unsigned int req, return called; } -bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req) +bool kvm_make_all_cpus_request_except(struct kvm *kvm, unsigned int req, + struct kvm_vcpu *except) { cpumask_var_t cpus; bool called; zalloc_cpumask_var(&cpus, GFP_ATOMIC); - called = kvm_make_vcpus_request_mask(kvm, req, NULL, cpus); + called = kvm_make_vcpus_request_mask(kvm, req, except, NULL, cpus); free_cpumask_var(cpus); return called; } +bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req) +{ + return kvm_make_all_cpus_request_except(kvm, req, NULL); +} + #ifndef CONFIG_HAVE_KVM_ARCH_TLB_FLUSH_ALL void kvm_flush_remote_tlbs(struct kvm *kvm) { @@ -341,7 +348,7 @@ static void kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) vcpu->kvm = kvm; vcpu->vcpu_id = id; vcpu->pid = NULL; - init_swait_queue_head(&vcpu->wq); + rcuwait_init(&vcpu->wait); kvm_async_pf_vcpu_init(vcpu); vcpu->pre_pcpu = -1; @@ -376,6 +383,18 @@ static inline struct kvm *mmu_notifier_to_kvm(struct mmu_notifier *mn) return container_of(mn, struct kvm, mmu_notifier); } +static void kvm_mmu_notifier_invalidate_range(struct mmu_notifier *mn, + struct mm_struct *mm, + unsigned long start, unsigned long end) +{ + struct kvm *kvm = mmu_notifier_to_kvm(mn); + int idx; + + idx = srcu_read_lock(&kvm->srcu); + kvm_arch_mmu_notifier_invalidate_range(kvm, start, end); + srcu_read_unlock(&kvm->srcu, idx); +} + static void kvm_mmu_notifier_change_pte(struct mmu_notifier *mn, struct mm_struct *mm, unsigned long address, @@ -400,7 +419,6 @@ static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn, { struct kvm *kvm = mmu_notifier_to_kvm(mn); int need_tlb_flush = 0, idx; - int ret; idx = srcu_read_lock(&kvm->srcu); spin_lock(&kvm->mmu_lock); @@ -417,14 +435,9 @@ static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn, kvm_flush_remote_tlbs(kvm); spin_unlock(&kvm->mmu_lock); - - ret = kvm_arch_mmu_notifier_invalidate_range(kvm, range->start, - range->end, - mmu_notifier_range_blockable(range)); - srcu_read_unlock(&kvm->srcu, idx); - return ret; + return 0; } static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn, @@ -530,6 +543,7 @@ static void kvm_mmu_notifier_release(struct mmu_notifier *mn, } static const struct mmu_notifier_ops kvm_mmu_notifier_ops = { + .invalidate_range = kvm_mmu_notifier_invalidate_range, .invalidate_range_start = kvm_mmu_notifier_invalidate_range_start, .invalidate_range_end = kvm_mmu_notifier_invalidate_range_end, .clear_flush_young = kvm_mmu_notifier_clear_flush_young, @@ -710,6 +724,8 @@ static struct kvm *kvm_create_vm(unsigned long type) goto out_err_no_arch_destroy_vm; } + kvm->max_halt_poll_ns = halt_poll_ns; + r = kvm_arch_init_vm(kvm, type); if (r) goto out_err_no_arch_destroy_vm; @@ -1215,10 +1231,9 @@ int __kvm_set_memory_region(struct kvm *kvm, if (mem->guest_phys_addr & (PAGE_SIZE - 1)) return -EINVAL; /* We can read the guest memory with __xxx_user() later on. */ - if ((id < KVM_USER_MEM_SLOTS) && - ((mem->userspace_addr & (PAGE_SIZE - 1)) || + if ((mem->userspace_addr & (PAGE_SIZE - 1)) || !access_ok((void __user *)(unsigned long)mem->userspace_addr, - mem->memory_size))) + mem->memory_size)) return -EINVAL; if (as_id >= KVM_ADDRESS_SPACE_NUM || id >= KVM_MEM_SLOTS_NUM) return -EINVAL; @@ -1602,16 +1617,13 @@ struct kvm_memory_slot *kvm_vcpu_gfn_to_memslot(struct kvm_vcpu *vcpu, gfn_t gfn { return __gfn_to_memslot(kvm_vcpu_memslots(vcpu), gfn); } +EXPORT_SYMBOL_GPL(kvm_vcpu_gfn_to_memslot); bool kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn) { struct kvm_memory_slot *memslot = gfn_to_memslot(kvm, gfn); - if (!memslot || memslot->id >= KVM_USER_MEM_SLOTS || - memslot->flags & KVM_MEMSLOT_INVALID) - return false; - - return true; + return kvm_is_visible_memslot(memslot); } EXPORT_SYMBOL_GPL(kvm_is_visible_gfn); @@ -2503,13 +2515,15 @@ int kvm_write_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, } EXPORT_SYMBOL_GPL(kvm_write_guest_cached); -int kvm_read_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, - void *data, unsigned long len) +int kvm_read_guest_offset_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, + void *data, unsigned int offset, + unsigned long len) { struct kvm_memslots *slots = kvm_memslots(kvm); int r; + gpa_t gpa = ghc->gpa + offset; - BUG_ON(len > ghc->len); + BUG_ON(len + offset > ghc->len); if (slots->generation != ghc->generation) { if (__kvm_gfn_to_hva_cache_init(slots, ghc, ghc->gpa, ghc->len)) @@ -2520,14 +2534,21 @@ int kvm_read_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, return -EFAULT; if (unlikely(!ghc->memslot)) - return kvm_read_guest(kvm, ghc->gpa, data, len); + return kvm_read_guest(kvm, gpa, data, len); - r = __copy_from_user(data, (void __user *)ghc->hva, len); + r = __copy_from_user(data, (void __user *)ghc->hva + offset, len); if (r) return -EFAULT; return 0; } +EXPORT_SYMBOL_GPL(kvm_read_guest_offset_cached); + +int kvm_read_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, + void *data, unsigned long len) +{ + return kvm_read_guest_offset_cached(kvm, ghc, data, 0, len); +} EXPORT_SYMBOL_GPL(kvm_read_guest_cached); int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len) @@ -2665,19 +2686,27 @@ out: return ret; } +static inline void +update_halt_poll_stats(struct kvm_vcpu *vcpu, u64 poll_ns, bool waited) +{ + if (waited) + vcpu->stat.halt_poll_fail_ns += poll_ns; + else + vcpu->stat.halt_poll_success_ns += poll_ns; +} + /* * The vCPU has executed a HLT instruction with in-kernel mode enabled. */ void kvm_vcpu_block(struct kvm_vcpu *vcpu) { - ktime_t start, cur; - DECLARE_SWAITQUEUE(wait); + ktime_t start, cur, poll_end; bool waited = false; u64 block_ns; kvm_arch_vcpu_blocking(vcpu); - start = cur = ktime_get(); + start = cur = poll_end = ktime_get(); if (vcpu->halt_poll_ns && !kvm_arch_no_poll(vcpu)) { ktime_t stop = ktime_add_ns(ktime_get(), vcpu->halt_poll_ns); @@ -2693,12 +2722,13 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu) ++vcpu->stat.halt_poll_invalid; goto out; } - cur = ktime_get(); + poll_end = cur = ktime_get(); } while (single_task_running() && ktime_before(cur, stop)); } + prepare_to_rcuwait(&vcpu->wait); for (;;) { - prepare_to_swait_exclusive(&vcpu->wq, &wait, TASK_INTERRUPTIBLE); + set_current_state(TASK_INTERRUPTIBLE); if (kvm_vcpu_check_block(vcpu) < 0) break; @@ -2706,25 +2736,28 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu) waited = true; schedule(); } - - finish_swait(&vcpu->wq, &wait); + finish_rcuwait(&vcpu->wait); cur = ktime_get(); out: kvm_arch_vcpu_unblocking(vcpu); block_ns = ktime_to_ns(cur) - ktime_to_ns(start); + update_halt_poll_stats( + vcpu, ktime_to_ns(ktime_sub(poll_end, start)), waited); + if (!kvm_arch_no_poll(vcpu)) { if (!vcpu_valid_wakeup(vcpu)) { shrink_halt_poll_ns(vcpu); - } else if (halt_poll_ns) { + } else if (vcpu->kvm->max_halt_poll_ns) { if (block_ns <= vcpu->halt_poll_ns) ; /* we had a long block, shrink polling */ - else if (vcpu->halt_poll_ns && block_ns > halt_poll_ns) + else if (vcpu->halt_poll_ns && + block_ns > vcpu->kvm->max_halt_poll_ns) shrink_halt_poll_ns(vcpu); /* we had a short halt and our poll time is too small */ - else if (vcpu->halt_poll_ns < halt_poll_ns && - block_ns < halt_poll_ns) + else if (vcpu->halt_poll_ns < vcpu->kvm->max_halt_poll_ns && + block_ns < vcpu->kvm->max_halt_poll_ns) grow_halt_poll_ns(vcpu); } else { vcpu->halt_poll_ns = 0; @@ -2738,11 +2771,10 @@ EXPORT_SYMBOL_GPL(kvm_vcpu_block); bool kvm_vcpu_wake_up(struct kvm_vcpu *vcpu) { - struct swait_queue_head *wqp; + struct rcuwait *waitp; - wqp = kvm_arch_vcpu_wq(vcpu); - if (swq_has_sleeper(wqp)) { - swake_up_one(wqp); + waitp = kvm_arch_vcpu_get_wait(vcpu); + if (rcuwait_wake_up(waitp)) { WRITE_ONCE(vcpu->ready, true); ++vcpu->stat.halt_wakeup; return true; @@ -2884,7 +2916,8 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me, bool yield_to_kernel_mode) continue; if (vcpu == me) continue; - if (swait_active(&vcpu->wq) && !vcpu_dy_runnable(vcpu)) + if (rcuwait_active(&vcpu->wait) && + !vcpu_dy_runnable(vcpu)) continue; if (READ_ONCE(vcpu->preempted) && yield_to_kernel_mode && !kvm_arch_vcpu_in_kernel(vcpu)) @@ -2946,7 +2979,6 @@ static int kvm_vcpu_release(struct inode *inode, struct file *filp) { struct kvm_vcpu *vcpu = filp->private_data; - debugfs_remove_recursive(vcpu->debugfs_dentry); kvm_put_kvm(vcpu->kvm); return 0; } @@ -2973,16 +3005,17 @@ static int create_vcpu_fd(struct kvm_vcpu *vcpu) static void kvm_create_vcpu_debugfs(struct kvm_vcpu *vcpu) { #ifdef __KVM_HAVE_ARCH_VCPU_DEBUGFS + struct dentry *debugfs_dentry; char dir_name[ITOA_MAX_LEN * 2]; if (!debugfs_initialized()) return; snprintf(dir_name, sizeof(dir_name), "vcpu%d", vcpu->vcpu_id); - vcpu->debugfs_dentry = debugfs_create_dir(dir_name, - vcpu->kvm->debugfs_dentry); + debugfs_dentry = debugfs_create_dir(dir_name, + vcpu->kvm->debugfs_dentry); - kvm_arch_create_vcpu_debugfs(vcpu); + kvm_arch_create_vcpu_debugfs(vcpu, debugfs_dentry); #endif } @@ -3031,8 +3064,6 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id) if (r) goto vcpu_free_run_page; - kvm_create_vcpu_debugfs(vcpu); - mutex_lock(&kvm->lock); if (kvm_get_vcpu_by_id(kvm, id)) { r = -EEXIST; @@ -3061,11 +3092,11 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id) mutex_unlock(&kvm->lock); kvm_arch_vcpu_postcreate(vcpu); + kvm_create_vcpu_debugfs(vcpu); return r; unlock_vcpu_destroy: mutex_unlock(&kvm->lock); - debugfs_remove_recursive(vcpu->debugfs_dentry); kvm_arch_vcpu_destroy(vcpu); vcpu_free_run_page: free_page((unsigned long)vcpu->run); @@ -3135,7 +3166,7 @@ static long kvm_vcpu_ioctl(struct file *filp, synchronize_rcu(); put_pid(oldpid); } - r = kvm_arch_vcpu_ioctl_run(vcpu, vcpu->run); + r = kvm_arch_vcpu_ioctl_run(vcpu); trace_kvm_userspace_exit(vcpu->run->exit_reason, r); break; } @@ -3160,7 +3191,6 @@ out_free1: case KVM_SET_REGS: { struct kvm_regs *kvm_regs; - r = -ENOMEM; kvm_regs = memdup_user(argp, sizeof(*kvm_regs)); if (IS_ERR(kvm_regs)) { r = PTR_ERR(kvm_regs); @@ -3516,6 +3546,7 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg) case KVM_CAP_IOEVENTFD_ANY_LENGTH: case KVM_CAP_CHECK_EXTENSION_VM: case KVM_CAP_ENABLE_CAP_VM: + case KVM_CAP_HALT_POLL: return 1; #ifdef CONFIG_KVM_MMIO case KVM_CAP_COALESCED_MMIO: @@ -3566,6 +3597,13 @@ static int kvm_vm_ioctl_enable_cap_generic(struct kvm *kvm, return 0; } #endif + case KVM_CAP_HALT_POLL: { + if (cap->flags || cap->args[0] != (unsigned int)cap->args[0]) + return -EINVAL; + + kvm->max_halt_poll_ns = cap->args[0]; + return 0; + } default: return kvm_vm_ioctl_enable_cap(kvm, cap); } @@ -3714,21 +3752,18 @@ static long kvm_vm_ioctl(struct file *filp, if (routing.flags) goto out; if (routing.nr) { - r = -ENOMEM; - entries = vmalloc(array_size(sizeof(*entries), - routing.nr)); - if (!entries) - goto out; - r = -EFAULT; urouting = argp; - if (copy_from_user(entries, urouting->entries, - routing.nr * sizeof(*entries))) - goto out_free_irq_routing; + entries = vmemdup_user(urouting->entries, + array_size(sizeof(*entries), + routing.nr)); + if (IS_ERR(entries)) { + r = PTR_ERR(entries); + goto out; + } } r = kvm_set_irq_routing(kvm, entries, routing.nr, routing.flags); -out_free_irq_routing: - vfree(entries); + kvfree(entries); break; } #endif /* CONFIG_HAVE_KVM_IRQ_ROUTING */ @@ -4639,6 +4674,7 @@ struct kvm_vcpu *kvm_get_running_vcpu(void) return vcpu; } +EXPORT_SYMBOL_GPL(kvm_get_running_vcpu); /** * kvm_get_running_vcpus - get the per-CPU array of currently running vcpus. |