From 9121923c457d1d8667a6e3a67302c29e5c5add6b Mon Sep 17 00:00:00 2001 From: Jim Mattson Date: Thu, 24 Oct 2019 16:03:26 -0700 Subject: kvm: Allocate memslots and buses before calling kvm_arch_init_vm This reorganization will allow us to call kvm_arch_destroy_vm in the event that kvm_create_vm fails after calling kvm_arch_init_vm. Suggested-by: Junaid Shahid Signed-off-by: Jim Mattson Reviewed-by: Junaid Shahid Signed-off-by: Paolo Bonzini --- virt/kvm/kvm_main.c | 40 +++++++++++++++++++++------------------- 1 file changed, 21 insertions(+), 19 deletions(-) (limited to 'virt') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 67ef3f2e19e8..ec14dae2f538 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -627,8 +627,9 @@ static int kvm_create_vm_debugfs(struct kvm *kvm, int fd) static struct kvm *kvm_create_vm(unsigned long type) { - int r, i; struct kvm *kvm = kvm_arch_alloc_vm(); + int r = -ENOMEM; + int i; if (!kvm) return ERR_PTR(-ENOMEM); @@ -643,6 +644,25 @@ static struct kvm *kvm_create_vm(unsigned long type) refcount_set(&kvm->users_count, 1); INIT_LIST_HEAD(&kvm->devices); + BUILD_BUG_ON(KVM_MEM_SLOTS_NUM > SHRT_MAX); + + for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) { + struct kvm_memslots *slots = kvm_alloc_memslots(); + + if (!slots) + goto out_err_no_disable; + /* Generations must be different for each address space. */ + slots->generation = i; + rcu_assign_pointer(kvm->memslots[i], slots); + } + + for (i = 0; i < KVM_NR_BUSES; i++) { + rcu_assign_pointer(kvm->buses[i], + kzalloc(sizeof(struct kvm_io_bus), GFP_KERNEL_ACCOUNT)); + if (!kvm->buses[i]) + goto out_err_no_disable; + } + r = kvm_arch_init_vm(kvm, type); if (r) goto out_err_no_disable; @@ -655,28 +675,10 @@ static struct kvm *kvm_create_vm(unsigned long type) INIT_HLIST_HEAD(&kvm->irq_ack_notifier_list); #endif - BUILD_BUG_ON(KVM_MEM_SLOTS_NUM > SHRT_MAX); - - r = -ENOMEM; - for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) { - struct kvm_memslots *slots = kvm_alloc_memslots(); - if (!slots) - goto out_err_no_srcu; - /* Generations must be different for each address space. */ - slots->generation = i; - rcu_assign_pointer(kvm->memslots[i], slots); - } - if (init_srcu_struct(&kvm->srcu)) goto out_err_no_srcu; if (init_srcu_struct(&kvm->irq_srcu)) goto out_err_no_irq_srcu; - for (i = 0; i < KVM_NR_BUSES; i++) { - rcu_assign_pointer(kvm->buses[i], - kzalloc(sizeof(struct kvm_io_bus), GFP_KERNEL_ACCOUNT)); - if (!kvm->buses[i]) - goto out_err; - } r = kvm_init_mmu_notifier(kvm); if (r) -- cgit From a97b0e773e492ae319a7e981e98962a1060215f9 Mon Sep 17 00:00:00 2001 From: Jim Mattson Date: Fri, 25 Oct 2019 13:34:58 +0200 Subject: kvm: call kvm_arch_destroy_vm if vm creation fails In kvm_create_vm(), if we've successfully called kvm_arch_init_vm(), but then fail later in the function, we need to call kvm_arch_destroy_vm() so that it can do any necessary cleanup (like freeing memory). Fixes: 44a95dae1d229a ("KVM: x86: Detect and Initialize AVIC support") Signed-off-by: John Sperbeck Signed-off-by: Jim Mattson Reviewed-by: Junaid Shahid [Remove dependency on "kvm: Don't clear reference count on kvm_create_vm() error path" which was not committed. - Paolo] Signed-off-by: Paolo Bonzini --- virt/kvm/kvm_main.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'virt') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index ec14dae2f538..d6f0696d98ef 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -641,7 +641,6 @@ static struct kvm *kvm_create_vm(unsigned long type) mutex_init(&kvm->lock); mutex_init(&kvm->irq_lock); mutex_init(&kvm->slots_lock); - refcount_set(&kvm->users_count, 1); INIT_LIST_HEAD(&kvm->devices); BUILD_BUG_ON(KVM_MEM_SLOTS_NUM > SHRT_MAX); @@ -650,7 +649,7 @@ static struct kvm *kvm_create_vm(unsigned long type) struct kvm_memslots *slots = kvm_alloc_memslots(); if (!slots) - goto out_err_no_disable; + goto out_err_no_arch_destroy_vm; /* Generations must be different for each address space. */ slots->generation = i; rcu_assign_pointer(kvm->memslots[i], slots); @@ -660,12 +659,13 @@ static struct kvm *kvm_create_vm(unsigned long type) rcu_assign_pointer(kvm->buses[i], kzalloc(sizeof(struct kvm_io_bus), GFP_KERNEL_ACCOUNT)); if (!kvm->buses[i]) - goto out_err_no_disable; + goto out_err_no_arch_destroy_vm; } + refcount_set(&kvm->users_count, 1); r = kvm_arch_init_vm(kvm, type); if (r) - goto out_err_no_disable; + goto out_err_no_arch_destroy_vm; r = hardware_enable_all(); if (r) @@ -699,7 +699,9 @@ out_err_no_irq_srcu: out_err_no_srcu: hardware_disable_all(); out_err_no_disable: - refcount_set(&kvm->users_count, 0); + kvm_arch_destroy_vm(kvm); + WARN_ON_ONCE(!refcount_dec_and_test(&kvm->users_count)); +out_err_no_arch_destroy_vm: for (i = 0; i < KVM_NR_BUSES; i++) kfree(kvm_get_bus(kvm, i)); for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) -- cgit From c57c80467f90e5504c8df9ad3555d2c78800bf94 Mon Sep 17 00:00:00 2001 From: Junaid Shahid Date: Mon, 4 Nov 2019 12:22:02 +0100 Subject: kvm: Add helper function for creating VM worker threads Add a function to create a kernel thread associated with a given VM. In particular, it ensures that the worker thread inherits the priority and cgroups of the calling thread. Signed-off-by: Junaid Shahid Signed-off-by: Paolo Bonzini Signed-off-by: Thomas Gleixner --- virt/kvm/kvm_main.c | 84 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 84 insertions(+) (limited to 'virt') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index d6f0696d98ef..8aed32b604d9 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -50,6 +50,7 @@ #include #include #include +#include #include #include @@ -4371,3 +4372,86 @@ void kvm_exit(void) kvm_vfio_ops_exit(); } EXPORT_SYMBOL_GPL(kvm_exit); + +struct kvm_vm_worker_thread_context { + struct kvm *kvm; + struct task_struct *parent; + struct completion init_done; + kvm_vm_thread_fn_t thread_fn; + uintptr_t data; + int err; +}; + +static int kvm_vm_worker_thread(void *context) +{ + /* + * The init_context is allocated on the stack of the parent thread, so + * we have to locally copy anything that is needed beyond initialization + */ + struct kvm_vm_worker_thread_context *init_context = context; + struct kvm *kvm = init_context->kvm; + kvm_vm_thread_fn_t thread_fn = init_context->thread_fn; + uintptr_t data = init_context->data; + int err; + + err = kthread_park(current); + /* kthread_park(current) is never supposed to return an error */ + WARN_ON(err != 0); + if (err) + goto init_complete; + + err = cgroup_attach_task_all(init_context->parent, current); + if (err) { + kvm_err("%s: cgroup_attach_task_all failed with err %d\n", + __func__, err); + goto init_complete; + } + + set_user_nice(current, task_nice(init_context->parent)); + +init_complete: + init_context->err = err; + complete(&init_context->init_done); + init_context = NULL; + + if (err) + return err; + + /* Wait to be woken up by the spawner before proceeding. */ + kthread_parkme(); + + if (!kthread_should_stop()) + err = thread_fn(kvm, data); + + return err; +} + +int kvm_vm_create_worker_thread(struct kvm *kvm, kvm_vm_thread_fn_t thread_fn, + uintptr_t data, const char *name, + struct task_struct **thread_ptr) +{ + struct kvm_vm_worker_thread_context init_context = {}; + struct task_struct *thread; + + *thread_ptr = NULL; + init_context.kvm = kvm; + init_context.parent = current; + init_context.thread_fn = thread_fn; + init_context.data = data; + init_completion(&init_context.init_done); + + thread = kthread_run(kvm_vm_worker_thread, &init_context, + "%s-%d", name, task_pid_nr(current)); + if (IS_ERR(thread)) + return PTR_ERR(thread); + + /* kthread_run is never supposed to return NULL */ + WARN_ON(thread == NULL); + + wait_for_completion(&init_context.init_done); + + if (!init_context.err) + *thread_ptr = thread; + + return init_context.err; +} -- cgit From 1aa9b9572b10529c2e64e2b8f44025d86e124308 Mon Sep 17 00:00:00 2001 From: Junaid Shahid Date: Mon, 4 Nov 2019 20:26:00 +0100 Subject: kvm: x86: mmu: Recovery of shattered NX large pages The page table pages corresponding to broken down large pages are zapped in FIFO order, so that the large page can potentially be recovered, if it is not longer being used for execution. This removes the performance penalty for walking deeper EPT page tables. By default, one large page will last about one hour once the guest reaches a steady state. Signed-off-by: Junaid Shahid Signed-off-by: Paolo Bonzini Signed-off-by: Thomas Gleixner --- virt/kvm/kvm_main.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'virt') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 8aed32b604d9..4aab3547a165 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -626,6 +626,23 @@ static int kvm_create_vm_debugfs(struct kvm *kvm, int fd) return 0; } +/* + * Called after the VM is otherwise initialized, but just before adding it to + * the vm_list. + */ +int __weak kvm_arch_post_init_vm(struct kvm *kvm) +{ + return 0; +} + +/* + * Called just after removing the VM from the vm_list, but before doing any + * other destruction. + */ +void __weak kvm_arch_pre_destroy_vm(struct kvm *kvm) +{ +} + static struct kvm *kvm_create_vm(unsigned long type) { struct kvm *kvm = kvm_arch_alloc_vm(); @@ -682,6 +699,10 @@ static struct kvm *kvm_create_vm(unsigned long type) goto out_err_no_irq_srcu; r = kvm_init_mmu_notifier(kvm); + if (r) + goto out_err_no_mmu_notifier; + + r = kvm_arch_post_init_vm(kvm); if (r) goto out_err; @@ -694,6 +715,11 @@ static struct kvm *kvm_create_vm(unsigned long type) return kvm; out_err: +#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) + if (kvm->mmu_notifier.ops) + mmu_notifier_unregister(&kvm->mmu_notifier, current->mm); +#endif +out_err_no_mmu_notifier: cleanup_srcu_struct(&kvm->irq_srcu); out_err_no_irq_srcu: cleanup_srcu_struct(&kvm->srcu); @@ -738,6 +764,8 @@ static void kvm_destroy_vm(struct kvm *kvm) mutex_lock(&kvm_lock); list_del(&kvm->vm_list); mutex_unlock(&kvm_lock); + kvm_arch_pre_destroy_vm(kvm); + kvm_free_irq_routing(kvm); for (i = 0; i < KVM_NR_BUSES; i++) { struct kvm_io_bus *bus = kvm_get_bus(kvm, i); -- cgit From 8a44119a98bee4381d28f3ed1e41dfacf5c3aa6d Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 4 Nov 2019 12:16:49 +0100 Subject: KVM: Fix NULL-ptr deref after kvm_create_vm fails Reported by syzkaller: kasan: CONFIG_KASAN_INLINE enabled kasan: GPF could be caused by NULL-ptr deref or user memory access general protection fault: 0000 [#1] PREEMPT SMP KASAN CPU: 0 PID: 14727 Comm: syz-executor.3 Not tainted 5.4.0-rc4+ #0 RIP: 0010:kvm_coalesced_mmio_init+0x5d/0x110 arch/x86/kvm/../../../virt/kvm/coalesced_mmio.c:121 Call Trace: kvm_dev_ioctl_create_vm arch/x86/kvm/../../../virt/kvm/kvm_main.c:3446 [inline] kvm_dev_ioctl+0x781/0x1490 arch/x86/kvm/../../../virt/kvm/kvm_main.c:3494 vfs_ioctl fs/ioctl.c:46 [inline] file_ioctl fs/ioctl.c:509 [inline] do_vfs_ioctl+0x196/0x1150 fs/ioctl.c:696 ksys_ioctl+0x62/0x90 fs/ioctl.c:713 __do_sys_ioctl fs/ioctl.c:720 [inline] __se_sys_ioctl fs/ioctl.c:718 [inline] __x64_sys_ioctl+0x6e/0xb0 fs/ioctl.c:718 do_syscall_64+0xca/0x5d0 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe Commit 9121923c457d ("kvm: Allocate memslots and buses before calling kvm_arch_init_vm") moves memslots and buses allocations around, however, if kvm->srcu/irq_srcu fails initialization, NULL will be returned instead of error code, NULL will not be intercepted in kvm_dev_ioctl_create_vm() and be dereferenced by kvm_coalesced_mmio_init(), this patch fixes it. Moving the initialization is required anyway to avoid an incorrect synchronize_srcu that was also reported by syzkaller: wait_for_completion+0x29c/0x440 kernel/sched/completion.c:136 __synchronize_srcu+0x197/0x250 kernel/rcu/srcutree.c:921 synchronize_srcu_expedited kernel/rcu/srcutree.c:946 [inline] synchronize_srcu+0x239/0x3e8 kernel/rcu/srcutree.c:997 kvm_page_track_unregister_notifier+0xe7/0x130 arch/x86/kvm/page_track.c:212 kvm_mmu_uninit_vm+0x1e/0x30 arch/x86/kvm/mmu.c:5828 kvm_arch_destroy_vm+0x4a2/0x5f0 arch/x86/kvm/x86.c:9579 kvm_create_vm arch/x86/kvm/../../../virt/kvm/kvm_main.c:702 [inline] so do it. Reported-by: syzbot+89a8060879fa0bd2db4f@syzkaller.appspotmail.com Reported-by: syzbot+e27e7027eb2b80e44225@syzkaller.appspotmail.com Fixes: 9121923c457d ("kvm: Allocate memslots and buses before calling kvm_arch_init_vm") Cc: Jim Mattson Cc: Wanpeng Li Signed-off-by: Paolo Bonzini --- virt/kvm/kvm_main.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'virt') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index d6f0696d98ef..e22ff63e5b1a 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -645,6 +645,11 @@ static struct kvm *kvm_create_vm(unsigned long type) BUILD_BUG_ON(KVM_MEM_SLOTS_NUM > SHRT_MAX); + if (init_srcu_struct(&kvm->srcu)) + goto out_err_no_srcu; + if (init_srcu_struct(&kvm->irq_srcu)) + goto out_err_no_irq_srcu; + for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) { struct kvm_memslots *slots = kvm_alloc_memslots(); @@ -675,11 +680,6 @@ static struct kvm *kvm_create_vm(unsigned long type) INIT_HLIST_HEAD(&kvm->irq_ack_notifier_list); #endif - if (init_srcu_struct(&kvm->srcu)) - goto out_err_no_srcu; - if (init_srcu_struct(&kvm->irq_srcu)) - goto out_err_no_irq_srcu; - r = kvm_init_mmu_notifier(kvm); if (r) goto out_err; @@ -693,10 +693,6 @@ static struct kvm *kvm_create_vm(unsigned long type) return kvm; out_err: - cleanup_srcu_struct(&kvm->irq_srcu); -out_err_no_irq_srcu: - cleanup_srcu_struct(&kvm->srcu); -out_err_no_srcu: hardware_disable_all(); out_err_no_disable: kvm_arch_destroy_vm(kvm); @@ -706,6 +702,10 @@ out_err_no_arch_destroy_vm: kfree(kvm_get_bus(kvm, i)); for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) kvm_free_memslots(kvm, __kvm_memslots(kvm, i)); + cleanup_srcu_struct(&kvm->irq_srcu); +out_err_no_irq_srcu: + cleanup_srcu_struct(&kvm->srcu); +out_err_no_srcu: kvm_arch_free_vm(kvm); mmdrop(current->mm); return ERR_PTR(r); -- cgit From e2d3fcaf939dded3da604a25ebbea9fb954c2280 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 4 Nov 2019 13:23:53 +0100 Subject: KVM: fix placement of refcount initialization Reported by syzkaller: ============================= WARNING: suspicious RCU usage ----------------------------- ./include/linux/kvm_host.h:536 suspicious rcu_dereference_check() usage! other info that might help us debug this: rcu_scheduler_active = 2, debug_locks = 1 no locks held by repro_11/12688. stack backtrace: Call Trace: dump_stack+0x7d/0xc5 lockdep_rcu_suspicious+0x123/0x170 kvm_dev_ioctl+0x9a9/0x1260 [kvm] do_vfs_ioctl+0x1a1/0xfb0 ksys_ioctl+0x6d/0x80 __x64_sys_ioctl+0x73/0xb0 do_syscall_64+0x108/0xaa0 entry_SYSCALL_64_after_hwframe+0x49/0xbe Commit a97b0e773e4 (kvm: call kvm_arch_destroy_vm if vm creation fails) sets users_count to 1 before kvm_arch_init_vm(), however, if kvm_arch_init_vm() fails, we need to decrease this count. By moving it earlier, we can push the decrease to out_err_no_arch_destroy_vm without introducing yet another error label. syzkaller source: https://syzkaller.appspot.com/x/repro.c?x=15209b84e00000 Reported-by: syzbot+75475908cd0910f141ee@syzkaller.appspotmail.com Fixes: a97b0e773e49 ("kvm: call kvm_arch_destroy_vm if vm creation fails") Cc: Jim Mattson Analyzed-by: Wanpeng Li Signed-off-by: Paolo Bonzini --- virt/kvm/kvm_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'virt') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index e22ff63e5b1a..e7a07132cd7f 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -650,6 +650,7 @@ static struct kvm *kvm_create_vm(unsigned long type) if (init_srcu_struct(&kvm->irq_srcu)) goto out_err_no_irq_srcu; + refcount_set(&kvm->users_count, 1); for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) { struct kvm_memslots *slots = kvm_alloc_memslots(); @@ -667,7 +668,6 @@ static struct kvm *kvm_create_vm(unsigned long type) goto out_err_no_arch_destroy_vm; } - refcount_set(&kvm->users_count, 1); r = kvm_arch_init_vm(kvm, type); if (r) goto out_err_no_arch_destroy_vm; @@ -696,8 +696,8 @@ out_err: hardware_disable_all(); out_err_no_disable: kvm_arch_destroy_vm(kvm); - WARN_ON_ONCE(!refcount_dec_and_test(&kvm->users_count)); out_err_no_arch_destroy_vm: + WARN_ON_ONCE(!refcount_dec_and_test(&kvm->users_count)); for (i = 0; i < KVM_NR_BUSES; i++) kfree(kvm_get_bus(kvm, i)); for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) -- cgit From a78986aae9b2988f8493f9f65a587ee433e83bc3 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Mon, 11 Nov 2019 14:12:27 -0800 Subject: KVM: MMU: Do not treat ZONE_DEVICE pages as being reserved Explicitly exempt ZONE_DEVICE pages from kvm_is_reserved_pfn() and instead manually handle ZONE_DEVICE on a case-by-case basis. For things like page refcounts, KVM needs to treat ZONE_DEVICE pages like normal pages, e.g. put pages grabbed via gup(). But for flows such as setting A/D bits or shifting refcounts for transparent huge pages, KVM needs to to avoid processing ZONE_DEVICE pages as the flows in question lack the underlying machinery for proper handling of ZONE_DEVICE pages. This fixes a hang reported by Adam Borowski[*] in dev_pagemap_cleanup() when running a KVM guest backed with /dev/dax memory, as KVM straight up doesn't put any references to ZONE_DEVICE pages acquired by gup(). Note, Dan Williams proposed an alternative solution of doing put_page() on ZONE_DEVICE pages immediately after gup() in order to simplify the auditing needed to ensure is_zone_device_page() is called if and only if the backing device is pinned (via gup()). But that approach would break kvm_vcpu_{un}map() as KVM requires the page to be pinned from map() 'til unmap() when accessing guest memory, unlike KVM's secondary MMU, which coordinates with mmu_notifier invalidations to avoid creating stale page references, i.e. doesn't rely on pages being pinned. [*] http://lkml.kernel.org/r/20190919115547.GA17963@angband.pl Reported-by: Adam Borowski Analyzed-by: David Hildenbrand Acked-by: Dan Williams Cc: stable@vger.kernel.org Fixes: 3565fce3a659 ("mm, x86: get_user_pages() for dax mappings") Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- virt/kvm/kvm_main.c | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) (limited to 'virt') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index e7a07132cd7f..0dac149ead16 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -149,10 +149,30 @@ __weak int kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm, return 0; } +bool kvm_is_zone_device_pfn(kvm_pfn_t pfn) +{ + /* + * The metadata used by is_zone_device_page() to determine whether or + * not a page is ZONE_DEVICE is guaranteed to be valid if and only if + * the device has been pinned, e.g. by get_user_pages(). WARN if the + * page_count() is zero to help detect bad usage of this helper. + */ + if (!pfn_valid(pfn) || WARN_ON_ONCE(!page_count(pfn_to_page(pfn)))) + return false; + + return is_zone_device_page(pfn_to_page(pfn)); +} + bool kvm_is_reserved_pfn(kvm_pfn_t pfn) { + /* + * ZONE_DEVICE pages currently set PG_reserved, but from a refcounting + * perspective they are "normal" pages, albeit with slightly different + * usage rules. + */ if (pfn_valid(pfn)) - return PageReserved(pfn_to_page(pfn)); + return PageReserved(pfn_to_page(pfn)) && + !kvm_is_zone_device_pfn(pfn); return true; } @@ -1857,7 +1877,7 @@ EXPORT_SYMBOL_GPL(kvm_release_pfn_dirty); void kvm_set_pfn_dirty(kvm_pfn_t pfn) { - if (!kvm_is_reserved_pfn(pfn)) { + if (!kvm_is_reserved_pfn(pfn) && !kvm_is_zone_device_pfn(pfn)) { struct page *page = pfn_to_page(pfn); SetPageDirty(page); @@ -1867,7 +1887,7 @@ EXPORT_SYMBOL_GPL(kvm_set_pfn_dirty); void kvm_set_pfn_accessed(kvm_pfn_t pfn) { - if (!kvm_is_reserved_pfn(pfn)) + if (!kvm_is_reserved_pfn(pfn) && !kvm_is_zone_device_pfn(pfn)) mark_page_accessed(pfn_to_page(pfn)); } EXPORT_SYMBOL_GPL(kvm_set_pfn_accessed); -- cgit From b9876e6de123adb52ac693bac08c493e989bd93e Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 13 Nov 2019 16:05:23 +0000 Subject: KVM: Forbid /dev/kvm being opened by a compat task when CONFIG_KVM_COMPAT=n On a system without KVM_COMPAT, we prevent IOCTLs from being issued by a compat task. Although this prevents most silly things from happening, it can still confuse a 32bit userspace that is able to open the kvm device (the qemu test suite seems to be pretty mad with this behaviour). Take a more radical approach and return a -ENODEV to the compat task. Reported-by: Peter Maydell Signed-off-by: Marc Zyngier Signed-off-by: Paolo Bonzini --- virt/kvm/kvm_main.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'virt') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 524cff24a68d..6a65ed915c7a 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -124,7 +124,13 @@ static long kvm_vcpu_compat_ioctl(struct file *file, unsigned int ioctl, #else static long kvm_no_compat_ioctl(struct file *file, unsigned int ioctl, unsigned long arg) { return -EINVAL; } -#define KVM_COMPAT(c) .compat_ioctl = kvm_no_compat_ioctl + +static int kvm_no_compat_open(struct inode *inode, struct file *file) +{ + return is_compat_task() ? -ENODEV : 0; +} +#define KVM_COMPAT(c) .compat_ioctl = kvm_no_compat_ioctl, \ + .open = kvm_no_compat_open #endif static int hardware_enable_all(void); static void hardware_disable_all(void); -- cgit From 9cb09e7c1c9af2968d5186ef9085f05641ab65d9 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 14 Nov 2019 13:17:39 +0000 Subject: KVM: Add a comment describing the /dev/kvm no_compat handling Add a comment explaining the rational behind having both no_compat open and ioctl callbacks to fend off compat tasks. Signed-off-by: Marc Zyngier Signed-off-by: Paolo Bonzini --- virt/kvm/kvm_main.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'virt') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 6a65ed915c7a..13efc291b1c7 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -122,6 +122,13 @@ static long kvm_vcpu_compat_ioctl(struct file *file, unsigned int ioctl, unsigned long arg); #define KVM_COMPAT(c) .compat_ioctl = (c) #else +/* + * For architectures that don't implement a compat infrastructure, + * adopt a double line of defense: + * - Prevent a compat task from opening /dev/kvm + * - If the open has been done by a 64bit task, and the KVM fd + * passed to a compat task, let the ioctls fail. + */ static long kvm_no_compat_ioctl(struct file *file, unsigned int ioctl, unsigned long arg) { return -EINVAL; } -- cgit