diff options
Diffstat (limited to 'drivers/vfio/vfio_main.c')
| -rw-r--r-- | drivers/vfio/vfio_main.c | 450 |
1 files changed, 418 insertions, 32 deletions
diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c index f0ca33b2e1df..f7df90c423b4 100644 --- a/drivers/vfio/vfio_main.c +++ b/drivers/vfio/vfio_main.c @@ -16,16 +16,19 @@ #include <linux/fs.h> #include <linux/idr.h> #include <linux/iommu.h> -#ifdef CONFIG_HAVE_KVM +#if IS_ENABLED(CONFIG_KVM) #include <linux/kvm_host.h> #endif #include <linux/list.h> #include <linux/miscdevice.h> #include <linux/module.h> +#include <linux/mount.h> #include <linux/mutex.h> #include <linux/pci.h> +#include <linux/pseudo_fs.h> #include <linux/rwsem.h> #include <linux/sched.h> +#include <linux/seq_file.h> #include <linux/slab.h> #include <linux/stat.h> #include <linux/string.h> @@ -43,9 +46,13 @@ #define DRIVER_AUTHOR "Alex Williamson <alex.williamson@redhat.com>" #define DRIVER_DESC "VFIO - User Level meta-driver" +#define VFIO_MAGIC 0x5646494f /* "VFIO" */ + static struct vfio { struct class *device_class; struct ida device_ida; + struct vfsmount *vfs_mount; + int fs_count; } vfio; #ifdef CONFIG_VFIO_NOIOMMU @@ -141,6 +148,21 @@ unsigned int vfio_device_set_open_count(struct vfio_device_set *dev_set) } EXPORT_SYMBOL_GPL(vfio_device_set_open_count); +struct vfio_device * +vfio_find_device_in_devset(struct vfio_device_set *dev_set, + struct device *dev) +{ + struct vfio_device *cur; + + lockdep_assert_held(&dev_set->lock); + + list_for_each_entry(cur, &dev_set->device_list, dev_set_list) + if (cur->dev == dev) + return cur; + return NULL; +} +EXPORT_SYMBOL_GPL(vfio_find_device_in_devset); + /* * Device objects - create, release, get, put, search */ @@ -150,11 +172,13 @@ void vfio_device_put_registration(struct vfio_device *device) if (refcount_dec_and_test(&device->refcount)) complete(&device->comp); } +EXPORT_SYMBOL_GPL(vfio_device_put_registration); bool vfio_device_try_get_registration(struct vfio_device *device) { return refcount_inc_not_zero(&device->refcount); } +EXPORT_SYMBOL_GPL(vfio_device_try_get_registration); /* * VFIO driver API @@ -171,6 +195,8 @@ static void vfio_device_release(struct device *dev) if (device->ops->release) device->ops->release(device); + iput(device->inode); + simple_release_fs(&vfio.vfs_mount, &vfio.fs_count); kvfree(device); } @@ -213,6 +239,34 @@ out_free: } EXPORT_SYMBOL_GPL(_vfio_alloc_device); +static int vfio_fs_init_fs_context(struct fs_context *fc) +{ + return init_pseudo(fc, VFIO_MAGIC) ? 0 : -ENOMEM; +} + +static struct file_system_type vfio_fs_type = { + .name = "vfio", + .owner = THIS_MODULE, + .init_fs_context = vfio_fs_init_fs_context, + .kill_sb = kill_anon_super, +}; + +static struct inode *vfio_fs_inode_new(void) +{ + struct inode *inode; + int ret; + + ret = simple_pin_fs(&vfio_fs_type, &vfio.vfs_mount, &vfio.fs_count); + if (ret) + return ERR_PTR(ret); + + inode = alloc_anon_inode(vfio.vfs_mount->mnt_sb); + if (IS_ERR(inode)) + simple_release_fs(&vfio.vfs_mount, &vfio.fs_count); + + return inode; +} + /* * Initialize a vfio_device so it can be registered to vfio core. */ @@ -231,6 +285,11 @@ static int vfio_init_device(struct vfio_device *device, struct device *dev, init_completion(&device->comp); device->dev = dev; device->ops = ops; + device->inode = vfio_fs_inode_new(); + if (IS_ERR(device->inode)) { + ret = PTR_ERR(device->inode); + goto out_inode; + } if (ops->init) { ret = ops->init(device); @@ -245,6 +304,9 @@ static int vfio_init_device(struct vfio_device *device, struct device *dev, return 0; out_uninit: + iput(device->inode); + simple_release_fs(&vfio.vfs_mount, &vfio.fs_count); +out_inode: vfio_release_device_set(device); ida_free(&vfio.device_ida, device->index); return ret; @@ -258,7 +320,8 @@ static int __vfio_register_dev(struct vfio_device *device, if (WARN_ON(IS_ENABLED(CONFIG_IOMMUFD) && (!device->ops->bind_iommufd || !device->ops->unbind_iommufd || - !device->ops->attach_ioas))) + !device->ops->attach_ioas || + !device->ops->detach_ioas))) return -EINVAL; /* @@ -276,7 +339,18 @@ static int __vfio_register_dev(struct vfio_device *device, if (ret) return ret; - ret = device_add(&device->device); + /* + * VFIO always sets IOMMU_CACHE because we offer no way for userspace to + * restore cache coherency. It has to be checked here because it is only + * valid for cases where we are using iommu groups. + */ + if (type == VFIO_IOMMU && !vfio_device_is_noiommu(device) && + !device_iommu_capable(device->dev, IOMMU_CAP_CACHE_COHERENCY)) { + ret = -EINVAL; + goto err_out; + } + + ret = vfio_device_add(device); if (ret) goto err_out; @@ -284,6 +358,7 @@ static int __vfio_register_dev(struct vfio_device *device, refcount_set(&device->refcount, 1); vfio_device_group_register(device); + vfio_device_debugfs_init(device); return 0; err_out: @@ -316,6 +391,18 @@ void vfio_unregister_group_dev(struct vfio_device *device) bool interrupted = false; long rc; + /* + * Prevent new device opened by userspace via the + * VFIO_GROUP_GET_DEVICE_FD in the group path. + */ + vfio_device_group_unregister(device); + + /* + * Balances vfio_device_add() in register path, also prevents + * new device opened by userspace in the cdev path. + */ + vfio_device_del(device); + vfio_device_put_registration(device); rc = try_wait_for_completion(&device->comp); while (rc <= 0) { @@ -339,18 +426,14 @@ void vfio_unregister_group_dev(struct vfio_device *device) } } - vfio_device_group_unregister(device); - - /* Balances device_add in register path */ - device_del(&device->device); - + vfio_device_debugfs_exit(device); /* Balances vfio_device_set_group in register path */ vfio_device_remove_group(device); } EXPORT_SYMBOL_GPL(vfio_unregister_group_dev); -#ifdef CONFIG_HAVE_KVM -void _vfio_device_get_kvm_safe(struct vfio_device *device, struct kvm *kvm) +#if IS_ENABLED(CONFIG_KVM) +void vfio_device_get_kvm_safe(struct vfio_device *device, struct kvm *kvm) { void (*pfn)(struct kvm *kvm); bool (*fn)(struct kvm *kvm); @@ -358,6 +441,9 @@ void _vfio_device_get_kvm_safe(struct vfio_device *device, struct kvm *kvm) lockdep_assert_held(&device->dev_set->lock); + if (!kvm) + return; + pfn = symbol_get(kvm_put_kvm); if (WARN_ON(!pfn)) return; @@ -404,9 +490,25 @@ static bool vfio_assert_device_open(struct vfio_device *device) return !WARN_ON_ONCE(!READ_ONCE(device->open_count)); } -static int vfio_device_first_open(struct vfio_device *device, - struct iommufd_ctx *iommufd) +struct vfio_device_file * +vfio_allocate_device_file(struct vfio_device *device) { + struct vfio_device_file *df; + + df = kzalloc(sizeof(*df), GFP_KERNEL_ACCOUNT); + if (!df) + return ERR_PTR(-ENOMEM); + + df->device = device; + spin_lock_init(&df->kvm_ref_lock); + + return df; +} + +static int vfio_df_device_first_open(struct vfio_device_file *df) +{ + struct vfio_device *device = df->device; + struct iommufd_ctx *iommufd = df->iommufd; int ret; lockdep_assert_held(&device->dev_set->lock); @@ -415,7 +517,7 @@ static int vfio_device_first_open(struct vfio_device *device, return -ENODEV; if (iommufd) - ret = vfio_iommufd_bind(device, iommufd); + ret = vfio_df_iommufd_bind(df); else ret = vfio_device_group_use_iommu(device); if (ret) @@ -430,7 +532,7 @@ static int vfio_device_first_open(struct vfio_device *device, err_unuse_iommu: if (iommufd) - vfio_iommufd_unbind(device); + vfio_df_iommufd_unbind(df); else vfio_device_group_unuse_iommu(device); err_module_put: @@ -438,29 +540,39 @@ err_module_put: return ret; } -static void vfio_device_last_close(struct vfio_device *device, - struct iommufd_ctx *iommufd) +static void vfio_df_device_last_close(struct vfio_device_file *df) { + struct vfio_device *device = df->device; + struct iommufd_ctx *iommufd = df->iommufd; + lockdep_assert_held(&device->dev_set->lock); if (device->ops->close_device) device->ops->close_device(device); if (iommufd) - vfio_iommufd_unbind(device); + vfio_df_iommufd_unbind(df); else vfio_device_group_unuse_iommu(device); module_put(device->dev->driver->owner); } -int vfio_device_open(struct vfio_device *device, struct iommufd_ctx *iommufd) +int vfio_df_open(struct vfio_device_file *df) { + struct vfio_device *device = df->device; int ret = 0; lockdep_assert_held(&device->dev_set->lock); + /* + * Only the group path allows the device to be opened multiple + * times. The device cdev path doesn't have a secure way for it. + */ + if (device->open_count != 0 && !df->group) + return -EINVAL; + device->open_count++; if (device->open_count == 1) { - ret = vfio_device_first_open(device, iommufd); + ret = vfio_df_device_first_open(df); if (ret) device->open_count--; } @@ -468,14 +580,16 @@ int vfio_device_open(struct vfio_device *device, struct iommufd_ctx *iommufd) return ret; } -void vfio_device_close(struct vfio_device *device, - struct iommufd_ctx *iommufd) +void vfio_df_close(struct vfio_device_file *df) { + struct vfio_device *device = df->device; + lockdep_assert_held(&device->dev_set->lock); - vfio_assert_device_open(device); + if (!vfio_assert_device_open(device)) + return; if (device->open_count == 1) - vfio_device_last_close(device, iommufd); + vfio_df_device_last_close(df); device->open_count--; } @@ -517,12 +631,18 @@ static inline void vfio_device_pm_runtime_put(struct vfio_device *device) */ static int vfio_device_fops_release(struct inode *inode, struct file *filep) { - struct vfio_device *device = filep->private_data; + struct vfio_device_file *df = filep->private_data; + struct vfio_device *device = df->device; - vfio_device_group_close(device); + if (df->group) + vfio_df_group_close(df); + else + vfio_df_unbind_iommufd(df); vfio_device_put_registration(device); + kfree(df); + return 0; } @@ -865,6 +985,63 @@ static int vfio_ioctl_device_feature_migration(struct vfio_device *device, return 0; } +void vfio_combine_iova_ranges(struct rb_root_cached *root, u32 cur_nodes, + u32 req_nodes) +{ + struct interval_tree_node *prev, *curr, *comb_start, *comb_end; + unsigned long min_gap, curr_gap; + + /* Special shortcut when a single range is required */ + if (req_nodes == 1) { + unsigned long last; + + comb_start = interval_tree_iter_first(root, 0, ULONG_MAX); + + /* Empty list */ + if (WARN_ON_ONCE(!comb_start)) + return; + + curr = comb_start; + while (curr) { + last = curr->last; + prev = curr; + curr = interval_tree_iter_next(curr, 0, ULONG_MAX); + if (prev != comb_start) + interval_tree_remove(prev, root); + } + comb_start->last = last; + return; + } + + /* Combine ranges which have the smallest gap */ + while (cur_nodes > req_nodes) { + prev = NULL; + min_gap = ULONG_MAX; + curr = interval_tree_iter_first(root, 0, ULONG_MAX); + while (curr) { + if (prev) { + curr_gap = curr->start - prev->last; + if (curr_gap < min_gap) { + min_gap = curr_gap; + comb_start = prev; + comb_end = curr; + } + } + prev = curr; + curr = interval_tree_iter_next(curr, 0, ULONG_MAX); + } + + /* Empty list or no nodes to combine */ + if (WARN_ON_ONCE(min_gap == ULONG_MAX)) + break; + + comb_start->last = comb_end->last; + interval_tree_remove(comb_end, root); + cur_nodes--; + } +} +EXPORT_SYMBOL_GPL(vfio_combine_iova_ranges); + /* Ranges should fit into a single kernel page */ #define LOG_MAX_RANGES \ (PAGE_SIZE / sizeof(struct vfio_device_feature_dma_logging_range)) @@ -1077,26 +1254,97 @@ static int vfio_ioctl_device_feature(struct vfio_device *device, feature.argsz - minsz); default: if (unlikely(!device->ops->device_feature)) - return -EINVAL; + return -ENOTTY; return device->ops->device_feature(device, feature.flags, arg->data, feature.argsz - minsz); } } +static long vfio_get_region_info(struct vfio_device *device, + struct vfio_region_info __user *arg) +{ + unsigned long minsz = offsetofend(struct vfio_region_info, offset); + struct vfio_region_info info = {}; + struct vfio_info_cap caps = {}; + int ret; + + if (unlikely(!device->ops->get_region_info_caps)) + return -EINVAL; + + if (copy_from_user(&info, arg, minsz)) + return -EFAULT; + if (info.argsz < minsz) + return -EINVAL; + + ret = device->ops->get_region_info_caps(device, &info, &caps); + if (ret) + goto out_free; + + if (caps.size) { + info.flags |= VFIO_REGION_INFO_FLAG_CAPS; + if (info.argsz < sizeof(info) + caps.size) { + info.argsz = sizeof(info) + caps.size; + info.cap_offset = 0; + } else { + vfio_info_cap_shift(&caps, sizeof(info)); + if (copy_to_user(arg + 1, caps.buf, caps.size)) { + ret = -EFAULT; + goto out_free; + } + info.cap_offset = sizeof(info); + } + } + + if (copy_to_user(arg, &info, minsz)){ + ret = -EFAULT; + goto out_free; + } + +out_free: + kfree(caps.buf); + return ret; +} + static long vfio_device_fops_unl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) { - struct vfio_device *device = filep->private_data; + struct vfio_device_file *df = filep->private_data; + struct vfio_device *device = df->device; + void __user *uptr = (void __user *)arg; int ret; + if (cmd == VFIO_DEVICE_BIND_IOMMUFD) + return vfio_df_ioctl_bind_iommufd(df, uptr); + + /* Paired with smp_store_release() following vfio_df_open() */ + if (!smp_load_acquire(&df->access_granted)) + return -EINVAL; + ret = vfio_device_pm_runtime_get(device); if (ret) return ret; + /* cdev only ioctls */ + if (IS_ENABLED(CONFIG_VFIO_DEVICE_CDEV) && !df->group) { + switch (cmd) { + case VFIO_DEVICE_ATTACH_IOMMUFD_PT: + ret = vfio_df_ioctl_attach_pt(df, uptr); + goto out; + + case VFIO_DEVICE_DETACH_IOMMUFD_PT: + ret = vfio_df_ioctl_detach_pt(df, uptr); + goto out; + } + } + switch (cmd) { case VFIO_DEVICE_FEATURE: - ret = vfio_ioctl_device_feature(device, (void __user *)arg); + ret = vfio_ioctl_device_feature(device, uptr); + break; + + case VFIO_DEVICE_GET_REGION_INFO: + ret = vfio_get_region_info(device, uptr); break; default: @@ -1106,7 +1354,7 @@ static long vfio_device_fops_unl_ioctl(struct file *filep, ret = device->ops->ioctl(device, cmd, arg); break; } - +out: vfio_device_pm_runtime_put(device); return ret; } @@ -1114,7 +1362,12 @@ static long vfio_device_fops_unl_ioctl(struct file *filep, static ssize_t vfio_device_fops_read(struct file *filep, char __user *buf, size_t count, loff_t *ppos) { - struct vfio_device *device = filep->private_data; + struct vfio_device_file *df = filep->private_data; + struct vfio_device *device = df->device; + + /* Paired with smp_store_release() following vfio_df_open() */ + if (!smp_load_acquire(&df->access_granted)) + return -EINVAL; if (unlikely(!device->ops->read)) return -EINVAL; @@ -1126,7 +1379,12 @@ static ssize_t vfio_device_fops_write(struct file *filep, const char __user *buf, size_t count, loff_t *ppos) { - struct vfio_device *device = filep->private_data; + struct vfio_device_file *df = filep->private_data; + struct vfio_device *device = df->device; + + /* Paired with smp_store_release() following vfio_df_open() */ + if (!smp_load_acquire(&df->access_granted)) + return -EINVAL; if (unlikely(!device->ops->write)) return -EINVAL; @@ -1136,7 +1394,12 @@ static ssize_t vfio_device_fops_write(struct file *filep, static int vfio_device_fops_mmap(struct file *filep, struct vm_area_struct *vma) { - struct vfio_device *device = filep->private_data; + struct vfio_device_file *df = filep->private_data; + struct vfio_device *device = df->device; + + /* Paired with smp_store_release() following vfio_df_open() */ + if (!smp_load_acquire(&df->access_granted)) + return -EINVAL; if (unlikely(!device->ops->mmap)) return -EINVAL; @@ -1144,16 +1407,118 @@ static int vfio_device_fops_mmap(struct file *filep, struct vm_area_struct *vma) return device->ops->mmap(device, vma); } +#ifdef CONFIG_PROC_FS +static void vfio_device_show_fdinfo(struct seq_file *m, struct file *filep) +{ + char *path; + struct vfio_device_file *df = filep->private_data; + struct vfio_device *device = df->device; + + path = kobject_get_path(&device->dev->kobj, GFP_KERNEL); + if (!path) + return; + + seq_printf(m, "vfio-device-syspath: /sys%s\n", path); + kfree(path); +} +#endif + const struct file_operations vfio_device_fops = { .owner = THIS_MODULE, + .open = vfio_device_fops_cdev_open, .release = vfio_device_fops_release, .read = vfio_device_fops_read, .write = vfio_device_fops_write, .unlocked_ioctl = vfio_device_fops_unl_ioctl, .compat_ioctl = compat_ptr_ioctl, .mmap = vfio_device_fops_mmap, +#ifdef CONFIG_PROC_FS + .show_fdinfo = vfio_device_show_fdinfo, +#endif }; +static struct vfio_device *vfio_device_from_file(struct file *file) +{ + struct vfio_device_file *df = file->private_data; + + if (file->f_op != &vfio_device_fops) + return NULL; + return df->device; +} + +/** + * vfio_file_is_valid - True if the file is valid vfio file + * @file: VFIO group file or VFIO device file + */ +bool vfio_file_is_valid(struct file *file) +{ + return vfio_group_from_file(file) || + vfio_device_from_file(file); +} +EXPORT_SYMBOL_GPL(vfio_file_is_valid); + +/** + * vfio_file_enforced_coherent - True if the DMA associated with the VFIO file + * is always CPU cache coherent + * @file: VFIO group file or VFIO device file + * + * Enforced coherency means that the IOMMU ignores things like the PCIe no-snoop + * bit in DMA transactions. A return of false indicates that the user has + * rights to access additional instructions such as wbinvd on x86. + */ +bool vfio_file_enforced_coherent(struct file *file) +{ + struct vfio_device *device; + struct vfio_group *group; + + group = vfio_group_from_file(file); + if (group) + return vfio_group_enforced_coherent(group); + + device = vfio_device_from_file(file); + if (device) + return device_iommu_capable(device->dev, + IOMMU_CAP_ENFORCE_CACHE_COHERENCY); + + return true; +} +EXPORT_SYMBOL_GPL(vfio_file_enforced_coherent); + +static void vfio_device_file_set_kvm(struct file *file, struct kvm *kvm) +{ + struct vfio_device_file *df = file->private_data; + + /* + * The kvm is first recorded in the vfio_device_file, and will + * be propagated to vfio_device::kvm when the file is bound to + * iommufd successfully in the vfio device cdev path. + */ + spin_lock(&df->kvm_ref_lock); + df->kvm = kvm; + spin_unlock(&df->kvm_ref_lock); +} + +/** + * vfio_file_set_kvm - Link a kvm with VFIO drivers + * @file: VFIO group file or VFIO device file + * @kvm: KVM to link + * + * When a VFIO device is first opened the KVM will be available in + * device->kvm if one was associated with the file. + */ +void vfio_file_set_kvm(struct file *file, struct kvm *kvm) +{ + struct vfio_group *group; + + group = vfio_group_from_file(file); + if (group) + vfio_group_set_kvm(group, kvm); + + if (vfio_device_from_file(file)) + vfio_device_file_set_kvm(file, kvm); +} +EXPORT_SYMBOL_GPL(vfio_file_set_kvm); + /* * Sub-module support */ @@ -1172,6 +1537,9 @@ struct vfio_info_cap_header *vfio_info_cap_add(struct vfio_info_cap *caps, void *buf; struct vfio_info_cap_header *header, *tmp; + /* Ensure that the next capability struct will be aligned */ + size = ALIGN(size, sizeof(u64)); + buf = krealloc(caps->buf, caps->size + size, GFP_KERNEL); if (!buf) { kfree(caps->buf); @@ -1205,6 +1573,9 @@ void vfio_info_cap_shift(struct vfio_info_cap *caps, size_t offset) struct vfio_info_cap_header *tmp; void *buf = (void *)caps->buf; + /* Capability structs should start with proper alignment */ + WARN_ON(!IS_ALIGNED(offset, sizeof(u64))); + for (tmp = buf; tmp->next; tmp = buf + tmp->next - offset) tmp->next += offset; } @@ -1293,6 +1664,8 @@ int vfio_pin_pages(struct vfio_device *device, dma_addr_t iova, /* group->container cannot change while a vfio device is open */ if (!pages || !npage || WARN_ON(!vfio_assert_device_open(device))) return -EINVAL; + if (!device->ops->dma_unmap) + return -EINVAL; if (vfio_device_has_container(device)) return vfio_device_container_pin_pages(device, iova, npage, prot, pages); @@ -1330,6 +1703,8 @@ void vfio_unpin_pages(struct vfio_device *device, dma_addr_t iova, int npage) { if (WARN_ON(!vfio_assert_device_open(device))) return; + if (WARN_ON(!device->ops->dma_unmap)) + return; if (vfio_device_has_container(device)) { vfio_device_container_unpin_pages(device, iova, npage); @@ -1415,9 +1790,17 @@ static int __init vfio_init(void) goto err_dev_class; } + ret = vfio_cdev_init(vfio.device_class); + if (ret) + goto err_alloc_dev_chrdev; + + vfio_debugfs_create_root(); pr_info(DRIVER_DESC " version: " DRIVER_VERSION "\n"); return 0; +err_alloc_dev_chrdev: + class_destroy(vfio.device_class); + vfio.device_class = NULL; err_dev_class: vfio_virqfd_exit(); err_virqfd: @@ -1427,7 +1810,9 @@ err_virqfd: static void __exit vfio_cleanup(void) { + vfio_debugfs_remove_root(); ida_destroy(&vfio.device_ida); + vfio_cdev_cleanup(); class_destroy(vfio.device_class); vfio.device_class = NULL; vfio_virqfd_exit(); @@ -1438,6 +1823,7 @@ static void __exit vfio_cleanup(void) module_init(vfio_init); module_exit(vfio_cleanup); +MODULE_IMPORT_NS("IOMMUFD"); MODULE_VERSION(DRIVER_VERSION); MODULE_LICENSE("GPL v2"); MODULE_AUTHOR(DRIVER_AUTHOR); |
