diff options
Diffstat (limited to 'drivers/iommu/iommufd/main.c')
-rw-r--r-- | drivers/iommu/iommufd/main.c | 206 |
1 files changed, 184 insertions, 22 deletions
diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c index 3df468f64e7d..15af7ced0501 100644 --- a/drivers/iommu/iommufd/main.c +++ b/drivers/iommu/iommufd/main.c @@ -23,12 +23,72 @@ #include "iommufd_test.h" struct iommufd_object_ops { + void (*pre_destroy)(struct iommufd_object *obj); void (*destroy)(struct iommufd_object *obj); void (*abort)(struct iommufd_object *obj); }; static const struct iommufd_object_ops iommufd_object_ops[]; static struct miscdevice vfio_misc_dev; +struct iommufd_object *_iommufd_object_alloc(struct iommufd_ctx *ictx, + size_t size, + enum iommufd_object_type type) +{ + struct iommufd_object *obj; + int rc; + + obj = kzalloc(size, GFP_KERNEL_ACCOUNT); + if (!obj) + return ERR_PTR(-ENOMEM); + obj->type = type; + /* Starts out bias'd by 1 until it is removed from the xarray */ + refcount_set(&obj->wait_cnt, 1); + refcount_set(&obj->users, 1); + + /* + * Reserve an ID in the xarray but do not publish the pointer yet since + * the caller hasn't initialized it yet. Once the pointer is published + * in the xarray and visible to other threads we can't reliably destroy + * it anymore, so the caller must complete all errorable operations + * before calling iommufd_object_finalize(). + */ + rc = xa_alloc(&ictx->objects, &obj->id, XA_ZERO_ENTRY, xa_limit_31b, + GFP_KERNEL_ACCOUNT); + if (rc) + goto out_free; + return obj; +out_free: + kfree(obj); + return ERR_PTR(rc); +} + +struct iommufd_object *_iommufd_object_alloc_ucmd(struct iommufd_ucmd *ucmd, + size_t size, + enum iommufd_object_type type) +{ + struct iommufd_object *new_obj; + + /* Something is coded wrong if this is hit */ + if (WARN_ON(ucmd->new_obj)) + return ERR_PTR(-EBUSY); + + /* + * An abort op means that its caller needs to invoke it within a lock in + * the caller. So it doesn't work with _iommufd_object_alloc_ucmd() that + * will invoke the abort op in iommufd_object_abort_and_destroy(), which + * must be outside the caller's lock. + */ + if (WARN_ON(iommufd_object_ops[type].abort)) + return ERR_PTR(-EOPNOTSUPP); + + new_obj = _iommufd_object_alloc(ucmd->ictx, size, type); + if (IS_ERR(new_obj)) + return new_obj; + + ucmd->new_obj = new_obj; + return new_obj; +} + /* * Allow concurrent access to the object. * @@ -95,20 +155,22 @@ struct iommufd_object *iommufd_get_object(struct iommufd_ctx *ictx, u32 id, return obj; } -static int iommufd_object_dec_wait_shortterm(struct iommufd_ctx *ictx, - struct iommufd_object *to_destroy) +static int iommufd_object_dec_wait(struct iommufd_ctx *ictx, + struct iommufd_object *to_destroy) { - if (refcount_dec_and_test(&to_destroy->shortterm_users)) + if (refcount_dec_and_test(&to_destroy->wait_cnt)) return 0; + if (iommufd_object_ops[to_destroy->type].pre_destroy) + iommufd_object_ops[to_destroy->type].pre_destroy(to_destroy); + if (wait_event_timeout(ictx->destroy_wait, - refcount_read(&to_destroy->shortterm_users) == - 0, - msecs_to_jiffies(60000))) + refcount_read(&to_destroy->wait_cnt) == 0, + msecs_to_jiffies(60000))) return 0; pr_crit("Time out waiting for iommufd object to become free\n"); - refcount_inc(&to_destroy->shortterm_users); + refcount_inc(&to_destroy->wait_cnt); return -EBUSY; } @@ -122,17 +184,18 @@ int iommufd_object_remove(struct iommufd_ctx *ictx, { struct iommufd_object *obj; XA_STATE(xas, &ictx->objects, id); - bool zerod_shortterm = false; + bool zerod_wait_cnt = false; int ret; /* - * The purpose of the shortterm_users is to ensure deterministic - * destruction of objects used by external drivers and destroyed by this - * function. Any temporary increment of the refcount must increment - * shortterm_users, such as during ioctl execution. + * The purpose of the wait_cnt is to ensure deterministic destruction + * of objects used by external drivers and destroyed by this function. + * Incrementing this wait_cnt should either be short lived, such as + * during ioctl execution, or be revoked and blocked during + * pre_destroy(), such as vdev holding the idev's refcount. */ - if (flags & REMOVE_WAIT_SHORTTERM) { - ret = iommufd_object_dec_wait_shortterm(ictx, to_destroy); + if (flags & REMOVE_WAIT) { + ret = iommufd_object_dec_wait(ictx, to_destroy); if (ret) { /* * We have a bug. Put back the callers reference and @@ -141,7 +204,7 @@ int iommufd_object_remove(struct iommufd_ctx *ictx, refcount_dec(&to_destroy->users); return ret; } - zerod_shortterm = true; + zerod_wait_cnt = true; } xa_lock(&ictx->objects); @@ -167,17 +230,17 @@ int iommufd_object_remove(struct iommufd_ctx *ictx, goto err_xa; } - xas_store(&xas, NULL); + xas_store(&xas, (flags & REMOVE_OBJ_TOMBSTONE) ? XA_ZERO_ENTRY : NULL); if (ictx->vfio_ioas == container_of(obj, struct iommufd_ioas, obj)) ictx->vfio_ioas = NULL; xa_unlock(&ictx->objects); /* - * Since users is zero any positive users_shortterm must be racing + * Since users is zero any positive wait_cnt must be racing * iommufd_put_object(), or we have a bug. */ - if (!zerod_shortterm) { - ret = iommufd_object_dec_wait_shortterm(ictx, obj); + if (!zerod_wait_cnt) { + ret = iommufd_object_dec_wait(ictx, obj); if (WARN_ON(ret)) return ret; } @@ -187,9 +250,9 @@ int iommufd_object_remove(struct iommufd_ctx *ictx, return 0; err_xa: - if (zerod_shortterm) { + if (zerod_wait_cnt) { /* Restore the xarray owned reference */ - refcount_set(&obj->shortterm_users, 1); + refcount_set(&obj->wait_cnt, 1); } xa_unlock(&ictx->objects); @@ -226,6 +289,7 @@ static int iommufd_fops_open(struct inode *inode, struct file *filp) xa_init_flags(&ictx->objects, XA_FLAGS_ALLOC1 | XA_FLAGS_ACCOUNT); xa_init(&ictx->groups); ictx->file = filp; + mt_init_flags(&ictx->mt_mmap, MT_FLAGS_ALLOC_RANGE); init_waitqueue_head(&ictx->destroy_wait); mutex_init(&ictx->sw_msi_lock); INIT_LIST_HEAD(&ictx->sw_msi_list); @@ -252,19 +316,41 @@ static int iommufd_fops_release(struct inode *inode, struct file *filp) while (!xa_empty(&ictx->objects)) { unsigned int destroyed = 0; unsigned long index; + bool empty = true; + /* + * We can't use xa_empty() to end the loop as the tombstones + * are stored as XA_ZERO_ENTRY in the xarray. However + * xa_for_each() automatically converts them to NULL and skips + * them causing xa_empty() to be kept false. Thus once + * xa_for_each() finds no further !NULL entries the loop is + * done. + */ xa_for_each(&ictx->objects, index, obj) { + empty = false; if (!refcount_dec_if_one(&obj->users)) continue; + destroyed++; xa_erase(&ictx->objects, index); iommufd_object_ops[obj->type].destroy(obj); kfree(obj); } + + if (empty) + break; + /* Bug related to users refcount */ if (WARN_ON(!destroyed)) break; } + + /* + * There may be some tombstones left over from + * iommufd_object_tombstone_user() + */ + xa_destroy(&ictx->objects); + WARN_ON(!xa_empty(&ictx->groups)); mutex_destroy(&ictx->sw_msi_lock); @@ -305,6 +391,7 @@ union ucmd_buffer { struct iommu_destroy destroy; struct iommu_fault_alloc fault; struct iommu_hw_info info; + struct iommu_hw_queue_alloc hw_queue; struct iommu_hwpt_alloc hwpt; struct iommu_hwpt_get_dirty_bitmap get_dirty_bitmap; struct iommu_hwpt_invalidate cache; @@ -347,6 +434,8 @@ static const struct iommufd_ioctl_op iommufd_ioctl_ops[] = { struct iommu_fault_alloc, out_fault_fd), IOCTL_OP(IOMMU_GET_HW_INFO, iommufd_get_hw_info, struct iommu_hw_info, __reserved), + IOCTL_OP(IOMMU_HW_QUEUE_ALLOC, iommufd_hw_queue_alloc_ioctl, + struct iommu_hw_queue_alloc, length), IOCTL_OP(IOMMU_HWPT_ALLOC, iommufd_hwpt_alloc, struct iommu_hwpt_alloc, __reserved), IOCTL_OP(IOMMU_HWPT_GET_DIRTY_BITMAP, iommufd_hwpt_get_dirty_bitmap, @@ -417,14 +506,83 @@ static long iommufd_fops_ioctl(struct file *filp, unsigned int cmd, if (ret) return ret; ret = op->execute(&ucmd); + + if (ucmd.new_obj) { + if (ret) + iommufd_object_abort_and_destroy(ictx, ucmd.new_obj); + else + iommufd_object_finalize(ictx, ucmd.new_obj); + } return ret; } +static void iommufd_fops_vma_open(struct vm_area_struct *vma) +{ + struct iommufd_mmap *immap = vma->vm_private_data; + + refcount_inc(&immap->owner->users); +} + +static void iommufd_fops_vma_close(struct vm_area_struct *vma) +{ + struct iommufd_mmap *immap = vma->vm_private_data; + + refcount_dec(&immap->owner->users); +} + +static const struct vm_operations_struct iommufd_vma_ops = { + .open = iommufd_fops_vma_open, + .close = iommufd_fops_vma_close, +}; + +/* The vm_pgoff must be pre-allocated from mt_mmap, and given to user space */ +static int iommufd_fops_mmap(struct file *filp, struct vm_area_struct *vma) +{ + struct iommufd_ctx *ictx = filp->private_data; + size_t length = vma->vm_end - vma->vm_start; + struct iommufd_mmap *immap; + int rc; + + if (!PAGE_ALIGNED(length)) + return -EINVAL; + if (!(vma->vm_flags & VM_SHARED)) + return -EINVAL; + if (vma->vm_flags & VM_EXEC) + return -EPERM; + + /* vma->vm_pgoff carries a page-shifted start position to an immap */ + immap = mtree_load(&ictx->mt_mmap, vma->vm_pgoff << PAGE_SHIFT); + if (!immap) + return -ENXIO; + /* + * mtree_load() returns the immap for any contained mmio_addr, so only + * allow the exact immap thing to be mapped + */ + if (vma->vm_pgoff != immap->vm_pgoff || length != immap->length) + return -ENXIO; + + vma->vm_pgoff = 0; + vma->vm_private_data = immap; + vma->vm_ops = &iommufd_vma_ops; + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + + rc = io_remap_pfn_range(vma, vma->vm_start, + immap->mmio_addr >> PAGE_SHIFT, length, + vma->vm_page_prot); + if (rc) + return rc; + + /* vm_ops.open won't be called for mmap itself. */ + refcount_inc(&immap->owner->users); + return rc; +} + static const struct file_operations iommufd_fops = { .owner = THIS_MODULE, .open = iommufd_fops_open, .release = iommufd_fops_release, .unlocked_ioctl = iommufd_fops_ioctl, + .mmap = iommufd_fops_mmap, }; /** @@ -498,11 +656,15 @@ static const struct iommufd_object_ops iommufd_object_ops[] = { .destroy = iommufd_access_destroy_object, }, [IOMMUFD_OBJ_DEVICE] = { + .pre_destroy = iommufd_device_pre_destroy, .destroy = iommufd_device_destroy, }, [IOMMUFD_OBJ_FAULT] = { .destroy = iommufd_fault_destroy, }, + [IOMMUFD_OBJ_HW_QUEUE] = { + .destroy = iommufd_hw_queue_destroy, + }, [IOMMUFD_OBJ_HWPT_PAGING] = { .destroy = iommufd_hwpt_paging_destroy, .abort = iommufd_hwpt_paging_abort, @@ -516,6 +678,7 @@ static const struct iommufd_object_ops iommufd_object_ops[] = { }, [IOMMUFD_OBJ_VDEVICE] = { .destroy = iommufd_vdevice_destroy, + .abort = iommufd_vdevice_abort, }, [IOMMUFD_OBJ_VEVENTQ] = { .destroy = iommufd_veventq_destroy, @@ -539,7 +702,6 @@ static struct miscdevice iommu_misc_dev = { .mode = 0660, }; - static struct miscdevice vfio_misc_dev = { .minor = VFIO_MINOR, .name = "vfio", |