From 087e15206d6ac0d46734e2b0ab34370c0fdca481 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 18 Oct 2023 13:46:22 -0700 Subject: KVM: Set file_operations.owner appropriately for all such structures Set .owner for all KVM-owned filed types so that the KVM module is pinned until any files with callbacks back into KVM are completely freed. Using "struct kvm" as a proxy for the module, i.e. keeping KVM-the-module alive while there are active VMs, doesn't provide full protection. Userspace can invoke delete_module() the instant the last reference to KVM is put. If KVM itself puts the last reference, e.g. via kvm_destroy_vm(), then it's possible for KVM to be preempted and deleted/unloaded before KVM fully exits, e.g. when the task running kvm_destroy_vm() is scheduled back in, it will jump to a code page that is no longer mapped. Note, file types that can call into sub-module code, e.g. kvm-intel.ko or kvm-amd.ko on x86, must use the module pointer passed to kvm_init(), not THIS_MODULE (which points at kvm.ko). KVM assumes that if /dev/kvm is reachable, e.g. VMs are active, then the vendor module is loaded. To reduce the probability of forgetting to set .owner entirely, use THIS_MODULE for stats files where KVM does not call back into vendor code. This reverts commit 70375c2d8fa3fb9b0b59207a9c5df1e2e1205c10, and fixes several other file types that have been buggy since their introduction. Fixes: 70375c2d8fa3 ("Revert "KVM: set owner of cpu and vm file operations"") Fixes: 3bcd0662d66f ("KVM: X86: Introduce mmu_rmaps_stat per-vm debugfs file") Reported-by: Al Viro Link: https://lore.kernel.org/all/20231010003746.GN800259@ZenIV Link: https://lore.kernel.org/r/20231018204624.1905300-2-seanjc@google.com Signed-off-by: Sean Christopherson --- virt/kvm/kvm_main.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'virt/kvm/kvm_main.c') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 486800a7024b..1e65a506985f 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -3887,7 +3887,7 @@ static int kvm_vcpu_release(struct inode *inode, struct file *filp) return 0; } -static const struct file_operations kvm_vcpu_fops = { +static struct file_operations kvm_vcpu_fops = { .release = kvm_vcpu_release, .unlocked_ioctl = kvm_vcpu_ioctl, .mmap = kvm_vcpu_mmap, @@ -4081,6 +4081,7 @@ static int kvm_vcpu_stats_release(struct inode *inode, struct file *file) } static const struct file_operations kvm_vcpu_stats_fops = { + .owner = THIS_MODULE, .read = kvm_vcpu_stats_read, .release = kvm_vcpu_stats_release, .llseek = noop_llseek, @@ -4431,7 +4432,7 @@ static int kvm_device_release(struct inode *inode, struct file *filp) return 0; } -static const struct file_operations kvm_device_fops = { +static struct file_operations kvm_device_fops = { .unlocked_ioctl = kvm_device_ioctl, .release = kvm_device_release, KVM_COMPAT(kvm_device_ioctl), @@ -4759,6 +4760,7 @@ static int kvm_vm_stats_release(struct inode *inode, struct file *file) } static const struct file_operations kvm_vm_stats_fops = { + .owner = THIS_MODULE, .read = kvm_vm_stats_read, .release = kvm_vm_stats_release, .llseek = noop_llseek, @@ -5060,7 +5062,7 @@ static long kvm_vm_compat_ioctl(struct file *filp, } #endif -static const struct file_operations kvm_vm_fops = { +static struct file_operations kvm_vm_fops = { .release = kvm_vm_release, .unlocked_ioctl = kvm_vm_ioctl, .llseek = noop_llseek, @@ -6095,6 +6097,9 @@ int kvm_init(unsigned vcpu_size, unsigned vcpu_align, struct module *module) goto err_async_pf; kvm_chardev_ops.owner = module; + kvm_vm_fops.owner = module; + kvm_vcpu_fops.owner = module; + kvm_device_fops.owner = module; kvm_preempt_ops.sched_in = kvm_sched_in; kvm_preempt_ops.sched_out = kvm_sched_out; -- cgit From ea61294befd361ab8260c65d53987b400e5599a7 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 18 Oct 2023 13:46:24 -0700 Subject: Revert "KVM: Prevent module exit until all VMs are freed" Revert KVM's misguided attempt to "fix" a use-after-module-unload bug that was actually due to failure to flush a workqueue, not a lack of module refcounting. Pinning the KVM module until kvm_vm_destroy() doesn't prevent use-after-free due to the module being unloaded, as userspace can invoke delete_module() the instant the last reference to KVM is put, i.e. can cause all KVM code to be unmapped while KVM is actively executing said code. Generally speaking, the many instances of module_put(THIS_MODULE) notwithstanding, outside of a few special paths, a module can never safely put the last reference to itself without creating deadlock, i.e. something external to the module *must* put the last reference. In other words, having VMs grab a reference to the KVM module is futile, pointless, and as evidenced by the now-reverted commit 70375c2d8fa3 ("Revert "KVM: set owner of cpu and vm file operations""), actively dangerous. This reverts commit 405294f29faee5de8c10cb9d4a90e229c2835279 and commit 5f6de5cbebee925a612856fce6f9182bb3eee0db. Fixes: 405294f29fae ("KVM: Unconditionally get a ref to /dev/kvm module when creating a VM") Fixes: 5f6de5cbebee ("KVM: Prevent module exit until all VMs are freed") Link: https://lore.kernel.org/r/20231018204624.1905300-4-seanjc@google.com Signed-off-by: Sean Christopherson --- virt/kvm/kvm_main.c | 7 ------- 1 file changed, 7 deletions(-) (limited to 'virt/kvm/kvm_main.c') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 1e65a506985f..3b1b9e8dd70c 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -115,8 +115,6 @@ EXPORT_SYMBOL_GPL(kvm_debugfs_dir); static const struct file_operations stat_fops_per_vm; -static struct file_operations kvm_chardev_ops; - static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl, unsigned long arg); #ifdef CONFIG_KVM_COMPAT @@ -1157,9 +1155,6 @@ static struct kvm *kvm_create_vm(unsigned long type, const char *fdname) if (!kvm) return ERR_PTR(-ENOMEM); - /* KVM is pinned via open("/dev/kvm"), the fd passed to this ioctl(). */ - __module_get(kvm_chardev_ops.owner); - KVM_MMU_LOCK_INIT(kvm); mmgrab(current->mm); kvm->mm = current->mm; @@ -1279,7 +1274,6 @@ out_err_no_irq_srcu: out_err_no_srcu: kvm_arch_free_vm(kvm); mmdrop(current->mm); - module_put(kvm_chardev_ops.owner); return ERR_PTR(r); } @@ -1348,7 +1342,6 @@ static void kvm_destroy_vm(struct kvm *kvm) preempt_notifier_dec(); hardware_disable_all(); mmdrop(mm); - module_put(kvm_chardev_ops.owner); } void kvm_get_kvm(struct kvm *kvm) -- cgit