From b3ae2096974b12c3af2ad1a4e7716b084949867f Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Mon, 2 Jul 2012 17:56:33 +0900 Subject: KVM: Introduce kvm_unmap_hva_range() for kvm_mmu_notifier_invalidate_range_start() When we tested KVM under memory pressure, with THP enabled on the host, we noticed that MMU notifier took a long time to invalidate huge pages. Since the invalidation was done with mmu_lock held, it not only wasted the CPU but also made the host harder to respond. This patch mitigates this by using kvm_handle_hva_range(). Signed-off-by: Takuya Yoshikawa Cc: Alexander Graf Cc: Paul Mackerras Signed-off-by: Marcelo Tosatti --- virt/kvm/kvm_main.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'virt/kvm/kvm_main.c') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index b3ce91c623e2..e2b1a159e5df 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -332,8 +332,7 @@ static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn, * count is also read inside the mmu_lock critical section. */ kvm->mmu_notifier_count++; - for (; start < end; start += PAGE_SIZE) - need_tlb_flush |= kvm_unmap_hva(kvm, start); + need_tlb_flush = kvm_unmap_hva_range(kvm, start, end); need_tlb_flush |= kvm->tlbs_dirty; /* we've to flush the tlb before the pages can be freed */ if (need_tlb_flush) -- cgit From 903816fa4d016e20ec71a1a97700cfcdda115580 Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Tue, 17 Jul 2012 21:54:11 +0800 Subject: KVM: using get_fault_pfn to get the fault pfn Using get_fault_pfn to cleanup the code Signed-off-by: Xiao Guangrong Signed-off-by: Marcelo Tosatti --- virt/kvm/kvm_main.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) (limited to 'virt/kvm/kvm_main.c') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index e2b1a159e5df..0fbbf2d21603 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -103,8 +103,8 @@ static bool largepages_enabled = true; static struct page *hwpoison_page; static pfn_t hwpoison_pfn; -struct page *fault_page; -pfn_t fault_pfn; +static struct page *fault_page; +static pfn_t fault_pfn; inline int kvm_is_mmio_pfn(pfn_t pfn) { @@ -949,12 +949,6 @@ int is_hwpoison_pfn(pfn_t pfn) } EXPORT_SYMBOL_GPL(is_hwpoison_pfn); -int is_fault_pfn(pfn_t pfn) -{ - return pfn == fault_pfn; -} -EXPORT_SYMBOL_GPL(is_fault_pfn); - int is_noslot_pfn(pfn_t pfn) { return pfn == bad_pfn; @@ -1038,11 +1032,12 @@ unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn) } EXPORT_SYMBOL_GPL(gfn_to_hva); -static pfn_t get_fault_pfn(void) +pfn_t get_fault_pfn(void) { get_page(fault_page); return fault_pfn; } +EXPORT_SYMBOL_GPL(get_fault_pfn); int get_user_page_nowait(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, int write, struct page **page) -- cgit From ca0565f5736e67af3172d188577b57e303dd082a Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Tue, 17 Jul 2012 21:54:52 +0800 Subject: KVM: make bad_pfn static to kvm_main.c bad_pfn is not used out of kvm_main.c, so mark it static, also move it near hwpoison_pfn and fault_pfn Signed-off-by: Xiao Guangrong Signed-off-by: Marcelo Tosatti --- virt/kvm/kvm_main.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'virt/kvm/kvm_main.c') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 0fbbf2d21603..f955eee92aa9 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -100,6 +100,9 @@ EXPORT_SYMBOL_GPL(kvm_rebooting); static bool largepages_enabled = true; +struct page *bad_page; +static pfn_t bad_pfn; + static struct page *hwpoison_page; static pfn_t hwpoison_pfn; @@ -2691,9 +2694,6 @@ static struct syscore_ops kvm_syscore_ops = { .resume = kvm_resume, }; -struct page *bad_page; -pfn_t bad_pfn; - static inline struct kvm_vcpu *preempt_notifier_to_vcpu(struct preempt_notifier *pn) { -- cgit From d566104853361cc377c61f70e41c1ad3d44b86c6 Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Tue, 17 Jul 2012 21:56:16 +0800 Subject: KVM: remove the unused parameter of gfn_to_pfn_memslot The parameter, 'kvm', is not used in gfn_to_pfn_memslot, we can happily remove it Signed-off-by: Xiao Guangrong Signed-off-by: Marcelo Tosatti --- virt/kvm/kvm_main.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) (limited to 'virt/kvm/kvm_main.c') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index f955eee92aa9..68dda513cd72 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1062,8 +1062,8 @@ static inline int check_user_page_hwpoison(unsigned long addr) return rc == -EHWPOISON; } -static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr, bool atomic, - bool *async, bool write_fault, bool *writable) +static pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool *async, + bool write_fault, bool *writable) { struct page *page[1]; int npages = 0; @@ -1143,9 +1143,9 @@ static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr, bool atomic, return pfn; } -pfn_t hva_to_pfn_atomic(struct kvm *kvm, unsigned long addr) +pfn_t hva_to_pfn_atomic(unsigned long addr) { - return hva_to_pfn(kvm, addr, true, NULL, true, NULL); + return hva_to_pfn(addr, true, NULL, true, NULL); } EXPORT_SYMBOL_GPL(hva_to_pfn_atomic); @@ -1163,7 +1163,7 @@ static pfn_t __gfn_to_pfn(struct kvm *kvm, gfn_t gfn, bool atomic, bool *async, return page_to_pfn(bad_page); } - return hva_to_pfn(kvm, addr, atomic, async, write_fault, writable); + return hva_to_pfn(addr, atomic, async, write_fault, writable); } pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn) @@ -1192,11 +1192,10 @@ pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault, } EXPORT_SYMBOL_GPL(gfn_to_pfn_prot); -pfn_t gfn_to_pfn_memslot(struct kvm *kvm, - struct kvm_memory_slot *slot, gfn_t gfn) +pfn_t gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn) { unsigned long addr = gfn_to_hva_memslot(slot, gfn); - return hva_to_pfn(kvm, addr, false, NULL, true, NULL); + return hva_to_pfn(addr, false, NULL, true, NULL); } int gfn_to_page_many_atomic(struct kvm *kvm, gfn_t gfn, struct page **pages, -- cgit From 4c088493c8d07e4e27bad53a99dcfdc14cdf45f8 Mon Sep 17 00:00:00 2001 From: Raghavendra K T Date: Wed, 18 Jul 2012 19:07:46 +0530 Subject: KVM: Note down when cpu relax intercepted or pause loop exited Noting pause loop exited vcpu or cpu relax intercepted helps in filtering right candidate to yield. Wrong selection of vcpu; i.e., a vcpu that just did a pl-exit or cpu relax intercepted may contribute to performance degradation. Signed-off-by: Raghavendra K T Reviewed-by: Marcelo Tosatti Reviewed-by: Rik van Riel Tested-by: Christian Borntraeger # on s390x Signed-off-by: Avi Kivity --- virt/kvm/kvm_main.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'virt/kvm/kvm_main.c') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 68dda513cd72..0892b75eeedd 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -239,6 +239,9 @@ int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) } vcpu->run = page_address(page); + kvm_vcpu_set_in_spin_loop(vcpu, false); + kvm_vcpu_set_dy_eligible(vcpu, false); + r = kvm_arch_vcpu_init(vcpu); if (r < 0) goto fail_free_run; @@ -1585,6 +1588,7 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me) int pass; int i; + kvm_vcpu_set_in_spin_loop(me, true); /* * We boost the priority of a VCPU that is runnable but not * currently running, because it got preempted by something @@ -1610,6 +1614,7 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me) } } } + kvm_vcpu_set_in_spin_loop(me, false); } EXPORT_SYMBOL_GPL(kvm_vcpu_on_spin); -- cgit From 06e48c510aa37f6e791602e6420422ea7071fe94 Mon Sep 17 00:00:00 2001 From: Raghavendra K T Date: Thu, 19 Jul 2012 15:17:52 +0530 Subject: KVM: Choose better candidate for directed yield Currently, on a large vcpu guests, there is a high probability of yielding to the same vcpu who had recently done a pause-loop exit or cpu relax intercepted. Such a yield can lead to the vcpu spinning again and hence degrade the performance. The patchset keeps track of the pause loop exit/cpu relax interception and gives chance to a vcpu which: (a) Has not done pause loop exit or cpu relax intercepted at all (probably he is preempted lock-holder) (b) Was skipped in last iteration because it did pause loop exit or cpu relax intercepted, and probably has become eligible now (next eligible lock holder) Signed-off-by: Raghavendra K T Reviewed-by: Marcelo Tosatti Reviewed-by: Rik van Riel Tested-by: Christian Borntraeger # on s390x Signed-off-by: Avi Kivity --- virt/kvm/kvm_main.c | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) (limited to 'virt/kvm/kvm_main.c') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 0892b75eeedd..1e10ebe1a370 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1579,6 +1579,43 @@ bool kvm_vcpu_yield_to(struct kvm_vcpu *target) } EXPORT_SYMBOL_GPL(kvm_vcpu_yield_to); +#ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT +/* + * Helper that checks whether a VCPU is eligible for directed yield. + * Most eligible candidate to yield is decided by following heuristics: + * + * (a) VCPU which has not done pl-exit or cpu relax intercepted recently + * (preempted lock holder), indicated by @in_spin_loop. + * Set at the beiginning and cleared at the end of interception/PLE handler. + * + * (b) VCPU which has done pl-exit/ cpu relax intercepted but did not get + * chance last time (mostly it has become eligible now since we have probably + * yielded to lockholder in last iteration. This is done by toggling + * @dy_eligible each time a VCPU checked for eligibility.) + * + * Yielding to a recently pl-exited/cpu relax intercepted VCPU before yielding + * to preempted lock-holder could result in wrong VCPU selection and CPU + * burning. Giving priority for a potential lock-holder increases lock + * progress. + * + * Since algorithm is based on heuristics, accessing another VCPU data without + * locking does not harm. It may result in trying to yield to same VCPU, fail + * and continue with next VCPU and so on. + */ +bool kvm_vcpu_eligible_for_directed_yield(struct kvm_vcpu *vcpu) +{ + bool eligible; + + eligible = !vcpu->spin_loop.in_spin_loop || + (vcpu->spin_loop.in_spin_loop && + vcpu->spin_loop.dy_eligible); + + if (vcpu->spin_loop.in_spin_loop) + kvm_vcpu_set_dy_eligible(vcpu, !vcpu->spin_loop.dy_eligible); + + return eligible; +} +#endif void kvm_vcpu_on_spin(struct kvm_vcpu *me) { struct kvm *kvm = me->kvm; @@ -1607,6 +1644,8 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me) continue; if (waitqueue_active(&vcpu->wq)) continue; + if (!kvm_vcpu_eligible_for_directed_yield(vcpu)) + continue; if (kvm_vcpu_yield_to(vcpu)) { kvm->last_boosted_vcpu = i; yielded = 1; @@ -1615,6 +1654,9 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me) } } kvm_vcpu_set_in_spin_loop(me, false); + + /* Ensure vcpu is not eligible during next spinloop */ + kvm_vcpu_set_dy_eligible(me, false); } EXPORT_SYMBOL_GPL(kvm_vcpu_on_spin); -- cgit From a2766325cf9f9e36d1225145f1ce1b066f001837 Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Thu, 26 Jul 2012 11:58:59 +0800 Subject: KVM: remove dummy pages Currently, kvm allocates some pages and use them as error indicators, it wastes memory and is not good for scalability Base on Avi's suggestion, we use the error codes instead of these pages to indicate the error conditions Signed-off-by: Xiao Guangrong Signed-off-by: Avi Kivity --- virt/kvm/kvm_main.c | 121 +++++++++++++++++++++------------------------------- 1 file changed, 49 insertions(+), 72 deletions(-) (limited to 'virt/kvm/kvm_main.c') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 0014ee99dc7f..de89497fe4c7 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -100,17 +100,11 @@ EXPORT_SYMBOL_GPL(kvm_rebooting); static bool largepages_enabled = true; -struct page *bad_page; -static pfn_t bad_pfn; - -static struct page *hwpoison_page; -static pfn_t hwpoison_pfn; - -static struct page *fault_page; -static pfn_t fault_pfn; - -inline int kvm_is_mmio_pfn(pfn_t pfn) +bool kvm_is_mmio_pfn(pfn_t pfn) { + if (is_error_pfn(pfn)) + return false; + if (pfn_valid(pfn)) { int reserved; struct page *tail = pfn_to_page(pfn); @@ -939,34 +933,55 @@ EXPORT_SYMBOL_GPL(kvm_disable_largepages); int is_error_page(struct page *page) { - return page == bad_page || page == hwpoison_page || page == fault_page; + return IS_ERR(page); } EXPORT_SYMBOL_GPL(is_error_page); int is_error_pfn(pfn_t pfn) { - return pfn == bad_pfn || pfn == hwpoison_pfn || pfn == fault_pfn; + return IS_ERR_VALUE(pfn); } EXPORT_SYMBOL_GPL(is_error_pfn); +static pfn_t get_bad_pfn(void) +{ + return -ENOENT; +} + +pfn_t get_fault_pfn(void) +{ + return -EFAULT; +} +EXPORT_SYMBOL_GPL(get_fault_pfn); + +static pfn_t get_hwpoison_pfn(void) +{ + return -EHWPOISON; +} + int is_hwpoison_pfn(pfn_t pfn) { - return pfn == hwpoison_pfn; + return pfn == -EHWPOISON; } EXPORT_SYMBOL_GPL(is_hwpoison_pfn); int is_noslot_pfn(pfn_t pfn) { - return pfn == bad_pfn; + return pfn == -ENOENT; } EXPORT_SYMBOL_GPL(is_noslot_pfn); int is_invalid_pfn(pfn_t pfn) { - return pfn == hwpoison_pfn || pfn == fault_pfn; + return !is_noslot_pfn(pfn) && is_error_pfn(pfn); } EXPORT_SYMBOL_GPL(is_invalid_pfn); +struct page *get_bad_page(void) +{ + return ERR_PTR(-ENOENT); +} + static inline unsigned long bad_hva(void) { return PAGE_OFFSET; @@ -1038,13 +1053,6 @@ unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn) } EXPORT_SYMBOL_GPL(gfn_to_hva); -pfn_t get_fault_pfn(void) -{ - get_page(fault_page); - return fault_pfn; -} -EXPORT_SYMBOL_GPL(get_fault_pfn); - int get_user_page_nowait(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, int write, struct page **page) { @@ -1122,8 +1130,7 @@ static pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool *async, if (npages == -EHWPOISON || (!async && check_user_page_hwpoison(addr))) { up_read(¤t->mm->mmap_sem); - get_page(hwpoison_page); - return page_to_pfn(hwpoison_page); + return get_hwpoison_pfn(); } vma = find_vma_intersection(current->mm, addr, addr+1); @@ -1161,10 +1168,8 @@ static pfn_t __gfn_to_pfn(struct kvm *kvm, gfn_t gfn, bool atomic, bool *async, *async = false; addr = gfn_to_hva(kvm, gfn); - if (kvm_is_error_hva(addr)) { - get_page(bad_page); - return page_to_pfn(bad_page); - } + if (kvm_is_error_hva(addr)) + return get_bad_pfn(); return hva_to_pfn(addr, atomic, async, write_fault, writable); } @@ -1218,37 +1223,45 @@ int gfn_to_page_many_atomic(struct kvm *kvm, gfn_t gfn, struct page **pages, } EXPORT_SYMBOL_GPL(gfn_to_page_many_atomic); +static struct page *kvm_pfn_to_page(pfn_t pfn) +{ + WARN_ON(kvm_is_mmio_pfn(pfn)); + + if (is_error_pfn(pfn) || kvm_is_mmio_pfn(pfn)) + return get_bad_page(); + + return pfn_to_page(pfn); +} + struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn) { pfn_t pfn; pfn = gfn_to_pfn(kvm, gfn); - if (!kvm_is_mmio_pfn(pfn)) - return pfn_to_page(pfn); - - WARN_ON(kvm_is_mmio_pfn(pfn)); - get_page(bad_page); - return bad_page; + return kvm_pfn_to_page(pfn); } EXPORT_SYMBOL_GPL(gfn_to_page); void kvm_release_page_clean(struct page *page) { - kvm_release_pfn_clean(page_to_pfn(page)); + if (!is_error_page(page)) + kvm_release_pfn_clean(page_to_pfn(page)); } EXPORT_SYMBOL_GPL(kvm_release_page_clean); void kvm_release_pfn_clean(pfn_t pfn) { - if (!kvm_is_mmio_pfn(pfn)) + if (!is_error_pfn(pfn) && !kvm_is_mmio_pfn(pfn)) put_page(pfn_to_page(pfn)); } EXPORT_SYMBOL_GPL(kvm_release_pfn_clean); void kvm_release_page_dirty(struct page *page) { + WARN_ON(is_error_page(page)); + kvm_release_pfn_dirty(page_to_pfn(page)); } EXPORT_SYMBOL_GPL(kvm_release_page_dirty); @@ -2771,33 +2784,6 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align, if (r) goto out_fail; - bad_page = alloc_page(GFP_KERNEL | __GFP_ZERO); - - if (bad_page == NULL) { - r = -ENOMEM; - goto out; - } - - bad_pfn = page_to_pfn(bad_page); - - hwpoison_page = alloc_page(GFP_KERNEL | __GFP_ZERO); - - if (hwpoison_page == NULL) { - r = -ENOMEM; - goto out_free_0; - } - - hwpoison_pfn = page_to_pfn(hwpoison_page); - - fault_page = alloc_page(GFP_KERNEL | __GFP_ZERO); - - if (fault_page == NULL) { - r = -ENOMEM; - goto out_free_0; - } - - fault_pfn = page_to_pfn(fault_page); - if (!zalloc_cpumask_var(&cpus_hardware_enabled, GFP_KERNEL)) { r = -ENOMEM; goto out_free_0; @@ -2872,12 +2858,6 @@ out_free_1: out_free_0a: free_cpumask_var(cpus_hardware_enabled); out_free_0: - if (fault_page) - __free_page(fault_page); - if (hwpoison_page) - __free_page(hwpoison_page); - __free_page(bad_page); -out: kvm_arch_exit(); out_fail: return r; @@ -2897,8 +2877,5 @@ void kvm_exit(void) kvm_arch_hardware_unsetup(); kvm_arch_exit(); free_cpumask_var(cpus_hardware_enabled); - __free_page(fault_page); - __free_page(hwpoison_page); - __free_page(bad_page); } EXPORT_SYMBOL_GPL(kvm_exit); -- cgit From 23d43cf998275bc97437931c0cdee1df2c1aa3ca Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Tue, 24 Jul 2012 08:51:20 -0400 Subject: KVM: Move KVM_IRQ_LINE to arch-generic code Handle KVM_IRQ_LINE and KVM_IRQ_LINE_STATUS in the generic kvm_vm_ioctl() function and call into kvm_vm_ioctl_irq_line(). This is even more relevant when KVM/ARM also uses this ioctl. Signed-off-by: Christoffer Dall Signed-off-by: Avi Kivity --- virt/kvm/kvm_main.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'virt/kvm/kvm_main.c') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index de89497fe4c7..bcf973ec98ff 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2148,6 +2148,29 @@ static long kvm_vm_ioctl(struct file *filp, r = kvm_send_userspace_msi(kvm, &msi); break; } +#endif +#ifdef __KVM_HAVE_IRQ_LINE + case KVM_IRQ_LINE_STATUS: + case KVM_IRQ_LINE: { + struct kvm_irq_level irq_event; + + r = -EFAULT; + if (copy_from_user(&irq_event, argp, sizeof irq_event)) + goto out; + + r = kvm_vm_ioctl_irq_line(kvm, &irq_event); + if (r) + goto out; + + r = -EFAULT; + if (ioctl == KVM_IRQ_LINE_STATUS) { + if (copy_to_user(argp, &irq_event, sizeof irq_event)) + goto out; + } + + r = 0; + break; + } #endif default: r = kvm_arch_vm_ioctl(filp, ioctl, arg); -- cgit From d89cc617b954aff4030fce178f7d86f59aaf713d Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Wed, 1 Aug 2012 18:03:28 +0900 Subject: KVM: Push rmap into kvm_arch_memory_slot Two reasons: - x86 can integrate rmap and rmap_pde and remove heuristics in __gfn_to_rmap(). - Some architectures do not need rmap. Since rmap is one of the most memory consuming stuff in KVM, ppc'd better restrict the allocation to Book3S HV. Signed-off-by: Takuya Yoshikawa Acked-by: Paul Mackerras Signed-off-by: Avi Kivity --- virt/kvm/kvm_main.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) (limited to 'virt/kvm/kvm_main.c') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index bcf973ec98ff..14ec567816ab 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -550,16 +550,12 @@ static void kvm_destroy_dirty_bitmap(struct kvm_memory_slot *memslot) static void kvm_free_physmem_slot(struct kvm_memory_slot *free, struct kvm_memory_slot *dont) { - if (!dont || free->rmap != dont->rmap) - vfree(free->rmap); - if (!dont || free->dirty_bitmap != dont->dirty_bitmap) kvm_destroy_dirty_bitmap(free); kvm_arch_free_memslot(free, dont); free->npages = 0; - free->rmap = NULL; } void kvm_free_physmem(struct kvm *kvm) @@ -768,11 +764,7 @@ int __kvm_set_memory_region(struct kvm *kvm, if (npages && !old.npages) { new.user_alloc = user_alloc; new.userspace_addr = mem->userspace_addr; -#ifndef CONFIG_S390 - new.rmap = vzalloc(npages * sizeof(*new.rmap)); - if (!new.rmap) - goto out_free; -#endif /* not defined CONFIG_S390 */ + if (kvm_arch_create_memslot(&new, npages)) goto out_free; } @@ -831,7 +823,6 @@ int __kvm_set_memory_region(struct kvm *kvm, /* actual memory is freed via old in kvm_free_physmem_slot below */ if (!npages) { - new.rmap = NULL; new.dirty_bitmap = NULL; memset(&new.arch, 0, sizeof(new.arch)); } -- cgit From 6c8ee57be9350c5c2cafdd6a99d0462d528676e2 Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Fri, 3 Aug 2012 15:37:54 +0800 Subject: KVM: introduce KVM_PFN_ERR_FAULT After that, the exported and un-inline function, get_fault_pfn, can be removed Signed-off-by: Xiao Guangrong Signed-off-by: Avi Kivity --- virt/kvm/kvm_main.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) (limited to 'virt/kvm/kvm_main.c') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 14ec567816ab..ef0491645a10 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -939,12 +939,6 @@ static pfn_t get_bad_pfn(void) return -ENOENT; } -pfn_t get_fault_pfn(void) -{ - return -EFAULT; -} -EXPORT_SYMBOL_GPL(get_fault_pfn); - static pfn_t get_hwpoison_pfn(void) { return -EHWPOISON; @@ -1115,7 +1109,7 @@ static pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool *async, struct vm_area_struct *vma; if (atomic) - return get_fault_pfn(); + return KVM_PFN_ERR_FAULT; down_read(¤t->mm->mmap_sem); if (npages == -EHWPOISON || @@ -1127,7 +1121,7 @@ static pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool *async, vma = find_vma_intersection(current->mm, addr, addr+1); if (vma == NULL) - pfn = get_fault_pfn(); + pfn = KVM_PFN_ERR_FAULT; else if ((vma->vm_flags & VM_PFNMAP)) { pfn = ((addr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; @@ -1135,7 +1129,7 @@ static pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool *async, } else { if (async && (vma->vm_flags & VM_WRITE)) *async = true; - pfn = get_fault_pfn(); + pfn = KVM_PFN_ERR_FAULT; } up_read(¤t->mm->mmap_sem); } else -- cgit From e6c1502b3f933ace20c711ce34ab696f5a67086d Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Fri, 3 Aug 2012 15:38:36 +0800 Subject: KVM: introduce KVM_PFN_ERR_HWPOISON Then, get_hwpoison_pfn and is_hwpoison_pfn can be removed Signed-off-by: Xiao Guangrong Signed-off-by: Avi Kivity --- virt/kvm/kvm_main.c | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) (limited to 'virt/kvm/kvm_main.c') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index ef0491645a10..7fce2d5787ae 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -939,17 +939,6 @@ static pfn_t get_bad_pfn(void) return -ENOENT; } -static pfn_t get_hwpoison_pfn(void) -{ - return -EHWPOISON; -} - -int is_hwpoison_pfn(pfn_t pfn) -{ - return pfn == -EHWPOISON; -} -EXPORT_SYMBOL_GPL(is_hwpoison_pfn); - int is_noslot_pfn(pfn_t pfn) { return pfn == -ENOENT; @@ -1115,7 +1104,7 @@ static pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool *async, if (npages == -EHWPOISON || (!async && check_user_page_hwpoison(addr))) { up_read(¤t->mm->mmap_sem); - return get_hwpoison_pfn(); + return KVM_PFN_ERR_HWPOISON; } vma = find_vma_intersection(current->mm, addr, addr+1); -- cgit From 950e95097b1c6573ef5e21061ccb56964278c45b Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Fri, 3 Aug 2012 15:39:19 +0800 Subject: KVM: introduce KVM_PFN_ERR_BAD Then, remove get_bad_pfn Signed-off-by: Xiao Guangrong Signed-off-by: Avi Kivity --- virt/kvm/kvm_main.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'virt/kvm/kvm_main.c') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 7fce2d5787ae..988b2339d846 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -934,11 +934,6 @@ int is_error_pfn(pfn_t pfn) } EXPORT_SYMBOL_GPL(is_error_pfn); -static pfn_t get_bad_pfn(void) -{ - return -ENOENT; -} - int is_noslot_pfn(pfn_t pfn) { return pfn == -ENOENT; @@ -1143,7 +1138,7 @@ static pfn_t __gfn_to_pfn(struct kvm *kvm, gfn_t gfn, bool atomic, bool *async, addr = gfn_to_hva(kvm, gfn); if (kvm_is_error_hva(addr)) - return get_bad_pfn(); + return KVM_PFN_ERR_BAD; return hva_to_pfn(addr, atomic, async, write_fault, writable); } -- cgit From 83f09228d068911ac8797ae8d6febef886698936 Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Fri, 3 Aug 2012 15:39:59 +0800 Subject: KVM: inline is_*_pfn functions These functions are exported and can not inline, move them to kvm_host.h to eliminate the overload of function call Signed-off-by: Xiao Guangrong Signed-off-by: Avi Kivity --- virt/kvm/kvm_main.c | 18 ------------------ 1 file changed, 18 deletions(-) (limited to 'virt/kvm/kvm_main.c') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 988b2339d846..eb73e5f13678 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -928,24 +928,6 @@ int is_error_page(struct page *page) } EXPORT_SYMBOL_GPL(is_error_page); -int is_error_pfn(pfn_t pfn) -{ - return IS_ERR_VALUE(pfn); -} -EXPORT_SYMBOL_GPL(is_error_pfn); - -int is_noslot_pfn(pfn_t pfn) -{ - return pfn == -ENOENT; -} -EXPORT_SYMBOL_GPL(is_noslot_pfn); - -int is_invalid_pfn(pfn_t pfn) -{ - return !is_noslot_pfn(pfn) && is_error_pfn(pfn); -} -EXPORT_SYMBOL_GPL(is_invalid_pfn); - struct page *get_bad_page(void) { return ERR_PTR(-ENOENT); -- cgit From 6cede2e6794be6b0649f62d3681e0c4aff5a9270 Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Fri, 3 Aug 2012 15:41:22 +0800 Subject: KVM: introduce KVM_ERR_PTR_BAD_PAGE It is used to eliminate the overload of function call and cleanup the code Signed-off-by: Xiao Guangrong Signed-off-by: Avi Kivity --- virt/kvm/kvm_main.c | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) (limited to 'virt/kvm/kvm_main.c') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index eb73e5f13678..93d3c6e063c8 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -922,17 +922,6 @@ void kvm_disable_largepages(void) } EXPORT_SYMBOL_GPL(kvm_disable_largepages); -int is_error_page(struct page *page) -{ - return IS_ERR(page); -} -EXPORT_SYMBOL_GPL(is_error_page); - -struct page *get_bad_page(void) -{ - return ERR_PTR(-ENOENT); -} - static inline unsigned long bad_hva(void) { return PAGE_OFFSET; @@ -1179,7 +1168,7 @@ static struct page *kvm_pfn_to_page(pfn_t pfn) WARN_ON(kvm_is_mmio_pfn(pfn)); if (is_error_pfn(pfn) || kvm_is_mmio_pfn(pfn)) - return get_bad_page(); + return KVM_ERR_PTR_BAD_PAGE; return pfn_to_page(pfn); } -- cgit From cb9aaa30b133574b646d9d4766ef08a843211393 Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Fri, 3 Aug 2012 15:42:10 +0800 Subject: KVM: do not release the error pfn After commit a2766325cf9f9, the error pfn is replaced by the error code, it need not be released anymore [ The patch has been compiling tested for powerpc ] Signed-off-by: Xiao Guangrong Signed-off-by: Avi Kivity --- virt/kvm/kvm_main.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'virt/kvm/kvm_main.c') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 93d3c6e063c8..eafba99d1070 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -102,9 +102,6 @@ static bool largepages_enabled = true; bool kvm_is_mmio_pfn(pfn_t pfn) { - if (is_error_pfn(pfn)) - return false; - if (pfn_valid(pfn)) { int reserved; struct page *tail = pfn_to_page(pfn); @@ -1165,10 +1162,13 @@ EXPORT_SYMBOL_GPL(gfn_to_page_many_atomic); static struct page *kvm_pfn_to_page(pfn_t pfn) { - WARN_ON(kvm_is_mmio_pfn(pfn)); + if (is_error_pfn(pfn)) + return KVM_ERR_PTR_BAD_PAGE; - if (is_error_pfn(pfn) || kvm_is_mmio_pfn(pfn)) + if (kvm_is_mmio_pfn(pfn)) { + WARN_ON(1); return KVM_ERR_PTR_BAD_PAGE; + } return pfn_to_page(pfn); } @@ -1193,7 +1193,9 @@ EXPORT_SYMBOL_GPL(kvm_release_page_clean); void kvm_release_pfn_clean(pfn_t pfn) { - if (!is_error_pfn(pfn) && !kvm_is_mmio_pfn(pfn)) + WARN_ON(is_error_pfn(pfn)); + + if (!kvm_is_mmio_pfn(pfn)) put_page(pfn_to_page(pfn)); } EXPORT_SYMBOL_GPL(kvm_release_pfn_clean); -- cgit From 32cad84f44d186654492f1a50a1424c8906ccbd9 Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Fri, 3 Aug 2012 15:42:52 +0800 Subject: KVM: do not release the error page After commit a2766325cf9f9, the error page is replaced by the error code, it need not be released anymore [ The patch has been compiling tested for powerpc ] Signed-off-by: Xiao Guangrong Signed-off-by: Avi Kivity --- virt/kvm/kvm_main.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'virt/kvm/kvm_main.c') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index eafba99d1070..a2e85af847c1 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1186,8 +1186,9 @@ EXPORT_SYMBOL_GPL(gfn_to_page); void kvm_release_page_clean(struct page *page) { - if (!is_error_page(page)) - kvm_release_pfn_clean(page_to_pfn(page)); + WARN_ON(is_error_page(page)); + + kvm_release_pfn_clean(page_to_pfn(page)); } EXPORT_SYMBOL_GPL(kvm_release_page_clean); -- cgit From a50d64d6590f470b712758598a077ef4fd0fc40a Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Tue, 21 Aug 2012 10:58:13 +0800 Subject: KVM: fix missing check for memslot flags Check flags when memslot is registered from userspace as Avi's suggestion Signed-off-by: Xiao Guangrong Signed-off-by: Avi Kivity --- virt/kvm/kvm_main.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'virt/kvm/kvm_main.c') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index a2e85af847c1..7b94d70a323f 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -678,6 +678,14 @@ void update_memslots(struct kvm_memslots *slots, struct kvm_memory_slot *new) slots->generation++; } +static int check_memory_region_flags(struct kvm_userspace_memory_region *mem) +{ + if (mem->flags & ~KVM_MEM_LOG_DIRTY_PAGES) + return -EINVAL; + + return 0; +} + /* * Allocate some memory and give it an address in the guest physical address * space. @@ -698,6 +706,10 @@ int __kvm_set_memory_region(struct kvm *kvm, struct kvm_memory_slot old, new; struct kvm_memslots *slots, *old_memslots; + r = check_memory_region_flags(mem); + if (r) + goto out; + r = -EINVAL; /* General sanity checks */ if (mem->memory_size & (PAGE_SIZE - 1)) -- cgit From 037d92dc5d4691ae7cf44699c55ca83b1b441992 Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Tue, 21 Aug 2012 10:59:12 +0800 Subject: KVM: introduce gfn_to_pfn_memslot_atomic It can instead of hva_to_pfn_atomic Signed-off-by: Xiao Guangrong Signed-off-by: Avi Kivity --- virt/kvm/kvm_main.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'virt/kvm/kvm_main.c') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 7b94d70a323f..543f9b7e5aa2 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1102,12 +1102,6 @@ static pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool *async, return pfn; } -pfn_t hva_to_pfn_atomic(unsigned long addr) -{ - return hva_to_pfn(addr, true, NULL, true, NULL); -} -EXPORT_SYMBOL_GPL(hva_to_pfn_atomic); - static pfn_t __gfn_to_pfn(struct kvm *kvm, gfn_t gfn, bool atomic, bool *async, bool write_fault, bool *writable) { @@ -1155,6 +1149,14 @@ pfn_t gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn) return hva_to_pfn(addr, false, NULL, true, NULL); } +pfn_t gfn_to_pfn_memslot_atomic(struct kvm_memory_slot *slot, gfn_t gfn) +{ + unsigned long addr = gfn_to_hva_memslot(slot, gfn); + + return hva_to_pfn(addr, true, NULL, true, NULL); +} +EXPORT_SYMBOL_GPL(gfn_to_pfn_memslot_atomic); + int gfn_to_page_many_atomic(struct kvm *kvm, gfn_t gfn, struct page **pages, int nr_pages) { -- cgit From 86ab8cffb498077e926957f099b064db3751c1de Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Tue, 21 Aug 2012 10:59:53 +0800 Subject: KVM: introduce gfn_to_hva_read/kvm_read_hva/kvm_read_hva_atomic This set of functions is only used to read data from host space, in the later patch, we will only get a readonly hva in gfn_to_hva_read, and the function name is a good hint to let gfn_to_hva_read to pair with kvm_read_hva()/kvm_read_hva_atomic() Signed-off-by: Xiao Guangrong Signed-off-by: Avi Kivity --- virt/kvm/kvm_main.c | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) (limited to 'virt/kvm/kvm_main.c') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 543f9b7e5aa2..6e3ea15397d4 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1002,6 +1002,25 @@ unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn) } EXPORT_SYMBOL_GPL(gfn_to_hva); +/* + * The hva returned by this function is only allowed to be read. + * It should pair with kvm_read_hva() or kvm_read_hva_atomic(). + */ +static unsigned long gfn_to_hva_read(struct kvm *kvm, gfn_t gfn) +{ + return gfn_to_hva_many(gfn_to_memslot(kvm, gfn), gfn, NULL); +} + +static int kvm_read_hva(void *data, void __user *hva, int len) +{ + return __copy_from_user(data, hva, len); +} + +static int kvm_read_hva_atomic(void *data, void __user *hva, int len) +{ + return __copy_from_user_inatomic(data, hva, len); +} + int get_user_page_nowait(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, int write, struct page **page) { @@ -1274,10 +1293,10 @@ int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset, int r; unsigned long addr; - addr = gfn_to_hva(kvm, gfn); + addr = gfn_to_hva_read(kvm, gfn); if (kvm_is_error_hva(addr)) return -EFAULT; - r = __copy_from_user(data, (void __user *)addr + offset, len); + r = kvm_read_hva(data, (void __user *)addr + offset, len); if (r) return -EFAULT; return 0; @@ -1312,11 +1331,11 @@ int kvm_read_guest_atomic(struct kvm *kvm, gpa_t gpa, void *data, gfn_t gfn = gpa >> PAGE_SHIFT; int offset = offset_in_page(gpa); - addr = gfn_to_hva(kvm, gfn); + addr = gfn_to_hva_read(kvm, gfn); if (kvm_is_error_hva(addr)) return -EFAULT; pagefault_disable(); - r = __copy_from_user_inatomic(data, (void __user *)addr + offset, len); + r = kvm_read_hva_atomic(data, (void __user *)addr + offset, len); pagefault_enable(); if (r) return -EFAULT; -- cgit From 2fc843117d64b31c20d36c6b625c0626c9a41a41 Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Tue, 21 Aug 2012 11:00:22 +0800 Subject: KVM: reorganize hva_to_pfn We do too many things in hva_to_pfn, this patch reorganize the code, let it be better readable Signed-off-by: Xiao Guangrong Signed-off-by: Avi Kivity --- virt/kvm/kvm_main.c | 159 ++++++++++++++++++++++++++++++++-------------------- 1 file changed, 97 insertions(+), 62 deletions(-) (limited to 'virt/kvm/kvm_main.c') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 6e3ea15397d4..aa4a38ad9d9a 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1041,83 +1041,118 @@ static inline int check_user_page_hwpoison(unsigned long addr) return rc == -EHWPOISON; } -static pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool *async, - bool write_fault, bool *writable) +/* + * The atomic path to get the writable pfn which will be stored in @pfn, + * true indicates success, otherwise false is returned. + */ +static bool hva_to_pfn_fast(unsigned long addr, bool atomic, bool *async, + bool write_fault, bool *writable, pfn_t *pfn) { struct page *page[1]; - int npages = 0; - pfn_t pfn; + int npages; - /* we can do it either atomically or asynchronously, not both */ - BUG_ON(atomic && async); + if (!(async || atomic)) + return false; - BUG_ON(!write_fault && !writable); + npages = __get_user_pages_fast(addr, 1, 1, page); + if (npages == 1) { + *pfn = page_to_pfn(page[0]); - if (writable) - *writable = true; + if (writable) + *writable = true; + return true; + } + + return false; +} - if (atomic || async) - npages = __get_user_pages_fast(addr, 1, 1, page); +/* + * The slow path to get the pfn of the specified host virtual address, + * 1 indicates success, -errno is returned if error is detected. + */ +static int hva_to_pfn_slow(unsigned long addr, bool *async, bool write_fault, + bool *writable, pfn_t *pfn) +{ + struct page *page[1]; + int npages = 0; - if (unlikely(npages != 1) && !atomic) { - might_sleep(); + might_sleep(); - if (writable) - *writable = write_fault; - - if (async) { - down_read(¤t->mm->mmap_sem); - npages = get_user_page_nowait(current, current->mm, - addr, write_fault, page); - up_read(¤t->mm->mmap_sem); - } else - npages = get_user_pages_fast(addr, 1, write_fault, - page); - - /* map read fault as writable if possible */ - if (unlikely(!write_fault) && npages == 1) { - struct page *wpage[1]; - - npages = __get_user_pages_fast(addr, 1, 1, wpage); - if (npages == 1) { - *writable = true; - put_page(page[0]); - page[0] = wpage[0]; - } - npages = 1; + if (writable) + *writable = write_fault; + + if (async) { + down_read(¤t->mm->mmap_sem); + npages = get_user_page_nowait(current, current->mm, + addr, write_fault, page); + up_read(¤t->mm->mmap_sem); + } else + npages = get_user_pages_fast(addr, 1, write_fault, + page); + if (npages != 1) + return npages; + + /* map read fault as writable if possible */ + if (unlikely(!write_fault)) { + struct page *wpage[1]; + + npages = __get_user_pages_fast(addr, 1, 1, wpage); + if (npages == 1) { + *writable = true; + put_page(page[0]); + page[0] = wpage[0]; } + + npages = 1; } + *pfn = page_to_pfn(page[0]); + return npages; +} + +static pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool *async, + bool write_fault, bool *writable) +{ + struct vm_area_struct *vma; + pfn_t pfn = 0; + int npages; - if (unlikely(npages != 1)) { - struct vm_area_struct *vma; + /* we can do it either atomically or asynchronously, not both */ + BUG_ON(atomic && async); - if (atomic) - return KVM_PFN_ERR_FAULT; + BUG_ON(!write_fault && !writable); - down_read(¤t->mm->mmap_sem); - if (npages == -EHWPOISON || - (!async && check_user_page_hwpoison(addr))) { - up_read(¤t->mm->mmap_sem); - return KVM_PFN_ERR_HWPOISON; - } + if (hva_to_pfn_fast(addr, atomic, async, write_fault, writable, &pfn)) + return pfn; - vma = find_vma_intersection(current->mm, addr, addr+1); - - if (vma == NULL) - pfn = KVM_PFN_ERR_FAULT; - else if ((vma->vm_flags & VM_PFNMAP)) { - pfn = ((addr - vma->vm_start) >> PAGE_SHIFT) + - vma->vm_pgoff; - BUG_ON(!kvm_is_mmio_pfn(pfn)); - } else { - if (async && (vma->vm_flags & VM_WRITE)) - *async = true; - pfn = KVM_PFN_ERR_FAULT; - } - up_read(¤t->mm->mmap_sem); - } else - pfn = page_to_pfn(page[0]); + if (atomic) + return KVM_PFN_ERR_FAULT; + npages = hva_to_pfn_slow(addr, async, write_fault, writable, &pfn); + if (npages == 1) + return pfn; + + down_read(¤t->mm->mmap_sem); + if (npages == -EHWPOISON || + (!async && check_user_page_hwpoison(addr))) { + pfn = KVM_PFN_ERR_HWPOISON; + goto exit; + } + + vma = find_vma_intersection(current->mm, addr, addr + 1); + + if (vma == NULL) + pfn = KVM_PFN_ERR_FAULT; + else if ((vma->vm_flags & VM_PFNMAP)) { + pfn = ((addr - vma->vm_start) >> PAGE_SHIFT) + + vma->vm_pgoff; + BUG_ON(!kvm_is_mmio_pfn(pfn)); + } else { + if (async && (vma->vm_flags & VM_WRITE)) + *async = true; + pfn = KVM_PFN_ERR_FAULT; + } +exit: + up_read(¤t->mm->mmap_sem); return pfn; } -- cgit From 12ce13fea925ab629f2281be2b9b01cf647a86cd Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Tue, 21 Aug 2012 11:00:49 +0800 Subject: KVM: use 'writable' as a hint to map writable pfn In current code, we always map writable pfn for the read fault, in order to support readonly memslot, we map writable pfn only if 'writable' is not NULL Signed-off-by: Xiao Guangrong Signed-off-by: Avi Kivity --- virt/kvm/kvm_main.c | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) (limited to 'virt/kvm/kvm_main.c') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index aa4a38ad9d9a..c89d1b5129e4 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1054,6 +1054,14 @@ static bool hva_to_pfn_fast(unsigned long addr, bool atomic, bool *async, if (!(async || atomic)) return false; + /* + * Fast pin a writable pfn only if it is a write fault request + * or the caller allows to map a writable pfn for a read fault + * request. + */ + if (!(write_fault || writable)) + return false; + npages = __get_user_pages_fast(addr, 1, 1, page); if (npages == 1) { *pfn = page_to_pfn(page[0]); @@ -1093,7 +1101,7 @@ static int hva_to_pfn_slow(unsigned long addr, bool *async, bool write_fault, return npages; /* map read fault as writable if possible */ - if (unlikely(!write_fault)) { + if (unlikely(!write_fault) && writable) { struct page *wpage[1]; npages = __get_user_pages_fast(addr, 1, 1, wpage); @@ -1109,6 +1117,20 @@ static int hva_to_pfn_slow(unsigned long addr, bool *async, bool write_fault, return npages; } +/* + * Pin guest page in memory and return its pfn. + * @addr: host virtual address which maps memory to the guest + * @atomic: whether this function can sleep + * @async: whether this function need to wait IO complete if the + * host page is not in the memory + * @write_fault: whether we should get a writable host page + * @writable: whether it allows to map a writable host page for !@write_fault + * + * The function will map a writable host page for these two cases: + * 1): @write_fault = true + * 2): @write_fault = false && @writable, @writable will tell the caller + * whether the mapping is writable. + */ static pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool *async, bool write_fault, bool *writable) { -- cgit From ca3a490c7de8472b514e2d635c052b1e0f8e1c9d Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Tue, 21 Aug 2012 11:01:50 +0800 Subject: KVM: introduce KVM_HVA_ERR_BAD Then, remove bad_hva and inline kvm_is_error_hva Signed-off-by: Xiao Guangrong Signed-off-by: Avi Kivity --- virt/kvm/kvm_main.c | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) (limited to 'virt/kvm/kvm_main.c') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index c89d1b5129e4..e3e1658c491d 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -931,17 +931,6 @@ void kvm_disable_largepages(void) } EXPORT_SYMBOL_GPL(kvm_disable_largepages); -static inline unsigned long bad_hva(void) -{ - return PAGE_OFFSET; -} - -int kvm_is_error_hva(unsigned long addr) -{ - return addr == bad_hva(); -} -EXPORT_SYMBOL_GPL(kvm_is_error_hva); - struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn) { return __gfn_to_memslot(kvm_memslots(kvm), gfn); @@ -988,7 +977,7 @@ static unsigned long gfn_to_hva_many(struct kvm_memory_slot *slot, gfn_t gfn, gfn_t *nr_pages) { if (!slot || slot->flags & KVM_MEMSLOT_INVALID) - return bad_hva(); + return KVM_HVA_ERR_BAD; if (nr_pages) *nr_pages = slot->npages - (gfn - slot->base_gfn); -- cgit From 4d8b81abc47b83a1939e59df2fdb0e98dfe0eedd Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Tue, 21 Aug 2012 11:02:51 +0800 Subject: KVM: introduce readonly memslot In current code, if we map a readonly memory space from host to guest and the page is not currently mapped in the host, we will get a fault pfn and async is not allowed, then the vm will crash We introduce readonly memory region to map ROM/ROMD to the guest, read access is happy for readonly memslot, write access on readonly memslot will cause KVM_EXIT_MMIO exit Signed-off-by: Xiao Guangrong Signed-off-by: Avi Kivity --- virt/kvm/kvm_main.c | 96 +++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 78 insertions(+), 18 deletions(-) (limited to 'virt/kvm/kvm_main.c') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index e3e1658c491d..3416f8a31f63 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -680,7 +680,13 @@ void update_memslots(struct kvm_memslots *slots, struct kvm_memory_slot *new) static int check_memory_region_flags(struct kvm_userspace_memory_region *mem) { - if (mem->flags & ~KVM_MEM_LOG_DIRTY_PAGES) + u32 valid_flags = KVM_MEM_LOG_DIRTY_PAGES; + +#ifdef KVM_CAP_READONLY_MEM + valid_flags |= KVM_MEM_READONLY; +#endif + + if (mem->flags & ~valid_flags) return -EINVAL; return 0; @@ -973,18 +979,45 @@ out: return size; } -static unsigned long gfn_to_hva_many(struct kvm_memory_slot *slot, gfn_t gfn, - gfn_t *nr_pages) +static bool memslot_is_readonly(struct kvm_memory_slot *slot) +{ + return slot->flags & KVM_MEM_READONLY; +} + +static unsigned long __gfn_to_hva_memslot(struct kvm_memory_slot *slot, + gfn_t gfn) +{ + return slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE; +} + +static unsigned long __gfn_to_hva_many(struct kvm_memory_slot *slot, gfn_t gfn, + gfn_t *nr_pages, bool write) { if (!slot || slot->flags & KVM_MEMSLOT_INVALID) return KVM_HVA_ERR_BAD; + if (memslot_is_readonly(slot) && write) + return KVM_HVA_ERR_RO_BAD; + if (nr_pages) *nr_pages = slot->npages - (gfn - slot->base_gfn); - return gfn_to_hva_memslot(slot, gfn); + return __gfn_to_hva_memslot(slot, gfn); } +static unsigned long gfn_to_hva_many(struct kvm_memory_slot *slot, gfn_t gfn, + gfn_t *nr_pages) +{ + return __gfn_to_hva_many(slot, gfn, nr_pages, true); +} + +unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, + gfn_t gfn) +{ + return gfn_to_hva_many(slot, gfn, NULL); +} +EXPORT_SYMBOL_GPL(gfn_to_hva_memslot); + unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn) { return gfn_to_hva_many(gfn_to_memslot(kvm, gfn), gfn, NULL); @@ -997,7 +1030,7 @@ EXPORT_SYMBOL_GPL(gfn_to_hva); */ static unsigned long gfn_to_hva_read(struct kvm *kvm, gfn_t gfn) { - return gfn_to_hva_many(gfn_to_memslot(kvm, gfn), gfn, NULL); + return __gfn_to_hva_many(gfn_to_memslot(kvm, gfn), gfn, NULL, false); } static int kvm_read_hva(void *data, void __user *hva, int len) @@ -1106,6 +1139,17 @@ static int hva_to_pfn_slow(unsigned long addr, bool *async, bool write_fault, return npages; } +static bool vma_is_valid(struct vm_area_struct *vma, bool write_fault) +{ + if (unlikely(!(vma->vm_flags & VM_READ))) + return false; + + if (write_fault && (unlikely(!(vma->vm_flags & VM_WRITE)))) + return false; + + return true; +} + /* * Pin guest page in memory and return its pfn. * @addr: host virtual address which maps memory to the guest @@ -1130,8 +1174,6 @@ static pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool *async, /* we can do it either atomically or asynchronously, not both */ BUG_ON(atomic && async); - BUG_ON(!write_fault && !writable); - if (hva_to_pfn_fast(addr, atomic, async, write_fault, writable, &pfn)) return pfn; @@ -1158,7 +1200,7 @@ static pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool *async, vma->vm_pgoff; BUG_ON(!kvm_is_mmio_pfn(pfn)); } else { - if (async && (vma->vm_flags & VM_WRITE)) + if (async && vma_is_valid(vma, write_fault)) *async = true; pfn = KVM_PFN_ERR_FAULT; } @@ -1167,19 +1209,40 @@ exit: return pfn; } +static pfn_t +__gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn, bool atomic, + bool *async, bool write_fault, bool *writable) +{ + unsigned long addr = __gfn_to_hva_many(slot, gfn, NULL, write_fault); + + if (addr == KVM_HVA_ERR_RO_BAD) + return KVM_PFN_ERR_RO_FAULT; + + if (kvm_is_error_hva(addr)) + return KVM_PFN_ERR_BAD; + + /* Do not map writable pfn in the readonly memslot. */ + if (writable && memslot_is_readonly(slot)) { + *writable = false; + writable = NULL; + } + + return hva_to_pfn(addr, atomic, async, write_fault, + writable); +} + static pfn_t __gfn_to_pfn(struct kvm *kvm, gfn_t gfn, bool atomic, bool *async, bool write_fault, bool *writable) { - unsigned long addr; + struct kvm_memory_slot *slot; if (async) *async = false; - addr = gfn_to_hva(kvm, gfn); - if (kvm_is_error_hva(addr)) - return KVM_PFN_ERR_BAD; + slot = gfn_to_memslot(kvm, gfn); - return hva_to_pfn(addr, atomic, async, write_fault, writable); + return __gfn_to_pfn_memslot(slot, gfn, atomic, async, write_fault, + writable); } pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn) @@ -1210,15 +1273,12 @@ EXPORT_SYMBOL_GPL(gfn_to_pfn_prot); pfn_t gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn) { - unsigned long addr = gfn_to_hva_memslot(slot, gfn); - return hva_to_pfn(addr, false, NULL, true, NULL); + return __gfn_to_pfn_memslot(slot, gfn, false, NULL, true, NULL); } pfn_t gfn_to_pfn_memslot_atomic(struct kvm_memory_slot *slot, gfn_t gfn) { - unsigned long addr = gfn_to_hva_memslot(slot, gfn); - - return hva_to_pfn(addr, true, NULL, true, NULL); + return __gfn_to_pfn_memslot(slot, gfn, true, NULL, true, NULL); } EXPORT_SYMBOL_GPL(gfn_to_pfn_memslot_atomic); -- cgit From 66a03505a7fcc70187319ef2318832f4d3c451a6 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Fri, 24 Aug 2012 16:50:28 +0800 Subject: KVM: PPC: book3s: fix build error caused by gfn_to_hva_memslot() The build error was caused by that builtin functions are calling the functions implemented in modules. This error was introduced by commit 4d8b81abc4 ("KVM: introduce readonly memslot"). The patch fixes the build error by moving function __gfn_to_hva_memslot() from kvm_main.c to kvm_host.h and making that "inline" so that the builtin function (kvmppc_h_enter) can use that. Acked-by: Paul Mackerras Signed-off-by: Gavin Shan Signed-off-by: Marcelo Tosatti --- virt/kvm/kvm_main.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'virt/kvm/kvm_main.c') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 3416f8a31f63..6425906d7cec 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -984,12 +984,6 @@ static bool memslot_is_readonly(struct kvm_memory_slot *slot) return slot->flags & KVM_MEM_READONLY; } -static unsigned long __gfn_to_hva_memslot(struct kvm_memory_slot *slot, - gfn_t gfn) -{ - return slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE; -} - static unsigned long __gfn_to_hva_many(struct kvm_memory_slot *slot, gfn_t gfn, gfn_t *nr_pages, bool write) { -- cgit From 2df72e9bc4c505d8357012f2924589f3d16f9d44 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Fri, 24 Aug 2012 15:54:57 -0300 Subject: KVM: split kvm_arch_flush_shadow Introducing kvm_arch_flush_shadow_memslot, to invalidate the translations of a single memory slot. Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- virt/kvm/kvm_main.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'virt/kvm/kvm_main.c') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 6425906d7cec..a4bf05be5fea 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -408,7 +408,7 @@ static void kvm_mmu_notifier_release(struct mmu_notifier *mn, int idx; idx = srcu_read_lock(&kvm->srcu); - kvm_arch_flush_shadow(kvm); + kvm_arch_flush_shadow_all(kvm); srcu_read_unlock(&kvm->srcu, idx); } @@ -582,7 +582,7 @@ static void kvm_destroy_vm(struct kvm *kvm) #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) mmu_notifier_unregister(&kvm->mmu_notifier, kvm->mm); #else - kvm_arch_flush_shadow(kvm); + kvm_arch_flush_shadow_all(kvm); #endif kvm_arch_destroy_vm(kvm); kvm_free_physmem(kvm); @@ -814,7 +814,7 @@ int __kvm_set_memory_region(struct kvm *kvm, * - gfn_to_hva (kvm_read_guest, gfn_to_pfn) * - kvm_is_visible_gfn (mmu_check_roots) */ - kvm_arch_flush_shadow(kvm); + kvm_arch_flush_shadow_memslot(kvm, slot); kfree(old_memslots); } @@ -854,7 +854,7 @@ int __kvm_set_memory_region(struct kvm *kvm, * mmio sptes. */ if (npages && old.base_gfn != mem->guest_phys_addr >> PAGE_SHIFT) - kvm_arch_flush_shadow(kvm); + kvm_arch_flush_shadow_all(kvm); kvm_free_physmem_slot(&old, &new); kfree(old_memslots); -- cgit From 12d6e7538e2d418c08f082b1b44ffa5fb7270ed8 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Fri, 24 Aug 2012 15:54:58 -0300 Subject: KVM: perform an invalid memslot step for gpa base change PPC must flush all translations before the new memory slot is visible. Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- virt/kvm/kvm_main.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'virt/kvm/kvm_main.c') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index a4bf05be5fea..f41ea1262d51 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -791,7 +791,7 @@ int __kvm_set_memory_region(struct kvm *kvm, /* destroy any largepage mappings for dirty tracking */ } - if (!npages) { + if (!npages || base_gfn != old.base_gfn) { struct kvm_memory_slot *slot; r = -ENOMEM; @@ -807,8 +807,8 @@ int __kvm_set_memory_region(struct kvm *kvm, old_memslots = kvm->memslots; rcu_assign_pointer(kvm->memslots, slots); synchronize_srcu_expedited(&kvm->srcu); - /* From this point no new shadow pages pointing to a deleted - * memslot will be created. + /* From this point no new shadow pages pointing to a deleted, + * or moved, memslot will be created. * * validation of sp->gfn happens in: * - gfn_to_hva (kvm_read_guest, gfn_to_pfn) -- cgit From 3b4dc3a031110753b9ba36432dbd21f989fcee56 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Tue, 28 Aug 2012 17:43:26 -0300 Subject: KVM: move postcommit flush to x86, as mmio sptes are x86 specific Other arches do not need this. Signed-off-by: Marcelo Tosatti v2: fix incorrect deletion of mmio sptes on gpa move (noticed by Takuya) Signed-off-by: Avi Kivity --- virt/kvm/kvm_main.c | 7 ------- 1 file changed, 7 deletions(-) (limited to 'virt/kvm/kvm_main.c') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index f41ea1262d51..4fe02d900810 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -849,13 +849,6 @@ int __kvm_set_memory_region(struct kvm *kvm, kvm_arch_commit_memory_region(kvm, mem, old, user_alloc); - /* - * If the new memory slot is created, we need to clear all - * mmio sptes. - */ - if (npages && old.base_gfn != mem->guest_phys_addr >> PAGE_SHIFT) - kvm_arch_flush_shadow_all(kvm); - kvm_free_physmem_slot(&old, &new); kfree(old_memslots); -- cgit From 9fc77441e5e1bf80b794cc546d2243ee9f4afb75 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Sun, 16 Sep 2012 11:50:30 +0300 Subject: KVM: make processes waiting on vcpu mutex killable vcpu mutex can be held for unlimited time so taking it with mutex_lock on an ioctl is wrong: one process could be passed a vcpu fd and call this ioctl on the vcpu used by another process, it will then be unkillable until the owner exits. Call mutex_lock_killable instead and return status. Note: mutex_lock_interruptible would be even nicer, but I am not sure all users are prepared to handle EINTR from these ioctls. They might misinterpret it as an error. Cleanup paths expect a vcpu that can't be used by any userspace so this will always succeed - catch bugs by calling BUG_ON. Catch callers that don't check return state by adding __must_check. Signed-off-by: Michael S. Tsirkin Signed-off-by: Marcelo Tosatti --- virt/kvm/kvm_main.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'virt/kvm/kvm_main.c') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 4fe02d900810..cc3f6dc506e4 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -131,11 +131,12 @@ bool kvm_is_mmio_pfn(pfn_t pfn) /* * Switches to specified vcpu, until a matching vcpu_put() */ -void vcpu_load(struct kvm_vcpu *vcpu) +int vcpu_load(struct kvm_vcpu *vcpu) { int cpu; - mutex_lock(&vcpu->mutex); + if (mutex_lock_killable(&vcpu->mutex)) + return -EINTR; if (unlikely(vcpu->pid != current->pids[PIDTYPE_PID].pid)) { /* The thread running this VCPU changed. */ struct pid *oldpid = vcpu->pid; @@ -148,6 +149,7 @@ void vcpu_load(struct kvm_vcpu *vcpu) preempt_notifier_register(&vcpu->preempt_notifier); kvm_arch_vcpu_load(vcpu, cpu); put_cpu(); + return 0; } void vcpu_put(struct kvm_vcpu *vcpu) @@ -1891,7 +1893,9 @@ static long kvm_vcpu_ioctl(struct file *filp, #endif - vcpu_load(vcpu); + r = vcpu_load(vcpu); + if (r) + return r; switch (ioctl) { case KVM_RUN: r = -EINVAL; -- cgit