summaryrefslogtreecommitdiff
path: root/drivers/iommu/iommufd
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/iommu/iommufd')
-rw-r--r--drivers/iommu/iommufd/device.c143
-rw-r--r--drivers/iommu/iommufd/driver.c113
-rw-r--r--drivers/iommu/iommufd/eventq.c14
-rw-r--r--drivers/iommu/iommufd/hw_pagetable.c10
-rw-r--r--drivers/iommu/iommufd/io_pagetable.c57
-rw-r--r--drivers/iommu/iommufd/io_pagetable.h5
-rw-r--r--drivers/iommu/iommufd/iommufd_private.h135
-rw-r--r--drivers/iommu/iommufd/iommufd_test.h20
-rw-r--r--drivers/iommu/iommufd/iova_bitmap.c1
-rw-r--r--drivers/iommu/iommufd/main.c206
-rw-r--r--drivers/iommu/iommufd/pages.c21
-rw-r--r--drivers/iommu/iommufd/selftest.c208
-rw-r--r--drivers/iommu/iommufd/viommu.c309
13 files changed, 1052 insertions, 190 deletions
diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c
index 86244403b532..65fbd098f9e9 100644
--- a/drivers/iommu/iommufd/device.c
+++ b/drivers/iommu/iommufd/device.c
@@ -137,6 +137,57 @@ static struct iommufd_group *iommufd_get_group(struct iommufd_ctx *ictx,
}
}
+static void iommufd_device_remove_vdev(struct iommufd_device *idev)
+{
+ struct iommufd_vdevice *vdev;
+
+ mutex_lock(&idev->igroup->lock);
+ /* prevent new references from vdev */
+ idev->destroying = true;
+ /* vdev has been completely destroyed by userspace */
+ if (!idev->vdev)
+ goto out_unlock;
+
+ vdev = iommufd_get_vdevice(idev->ictx, idev->vdev->obj.id);
+ /*
+ * An ongoing vdev destroy ioctl has removed the vdev from the object
+ * xarray, but has not finished iommufd_vdevice_destroy() yet as it
+ * needs the same mutex. We exit the locking then wait on wait_cnt
+ * reference for the vdev destruction.
+ */
+ if (IS_ERR(vdev))
+ goto out_unlock;
+
+ /* Should never happen */
+ if (WARN_ON(vdev != idev->vdev)) {
+ iommufd_put_object(idev->ictx, &vdev->obj);
+ goto out_unlock;
+ }
+
+ /*
+ * vdev is still alive. Hold a users refcount to prevent racing with
+ * userspace destruction, then use iommufd_object_tombstone_user() to
+ * destroy it and leave a tombstone.
+ */
+ refcount_inc(&vdev->obj.users);
+ iommufd_put_object(idev->ictx, &vdev->obj);
+ mutex_unlock(&idev->igroup->lock);
+ iommufd_object_tombstone_user(idev->ictx, &vdev->obj);
+ return;
+
+out_unlock:
+ mutex_unlock(&idev->igroup->lock);
+}
+
+void iommufd_device_pre_destroy(struct iommufd_object *obj)
+{
+ struct iommufd_device *idev =
+ container_of(obj, struct iommufd_device, obj);
+
+ /* Release the wait_cnt reference on this */
+ iommufd_device_remove_vdev(idev);
+}
+
void iommufd_device_destroy(struct iommufd_object *obj)
{
struct iommufd_device *idev =
@@ -485,8 +536,7 @@ iommufd_device_get_attach_handle(struct iommufd_device *idev, ioasid_t pasid)
lockdep_assert_held(&idev->igroup->lock);
- handle =
- iommu_attach_handle_get(idev->igroup->group, pasid, 0);
+ handle = iommu_attach_handle_get(idev->igroup->group, pasid, 0);
if (IS_ERR(handle))
return NULL;
return to_iommufd_handle(handle);
@@ -1049,7 +1099,7 @@ static int iommufd_access_change_ioas(struct iommufd_access *access,
}
if (cur_ioas) {
- if (access->ops->unmap) {
+ if (!iommufd_access_is_internal(access) && access->ops->unmap) {
mutex_unlock(&access->ioas_lock);
access->ops->unmap(access->data, 0, ULONG_MAX);
mutex_lock(&access->ioas_lock);
@@ -1085,7 +1135,39 @@ void iommufd_access_destroy_object(struct iommufd_object *obj)
if (access->ioas)
WARN_ON(iommufd_access_change_ioas(access, NULL));
mutex_unlock(&access->ioas_lock);
- iommufd_ctx_put(access->ictx);
+ if (!iommufd_access_is_internal(access))
+ iommufd_ctx_put(access->ictx);
+}
+
+static struct iommufd_access *__iommufd_access_create(struct iommufd_ctx *ictx)
+{
+ struct iommufd_access *access;
+
+ /*
+ * There is no uAPI for the access object, but to keep things symmetric
+ * use the object infrastructure anyhow.
+ */
+ access = iommufd_object_alloc(ictx, access, IOMMUFD_OBJ_ACCESS);
+ if (IS_ERR(access))
+ return access;
+
+ /* The calling driver is a user until iommufd_access_destroy() */
+ refcount_inc(&access->obj.users);
+ mutex_init(&access->ioas_lock);
+ return access;
+}
+
+struct iommufd_access *iommufd_access_create_internal(struct iommufd_ctx *ictx)
+{
+ struct iommufd_access *access;
+
+ access = __iommufd_access_create(ictx);
+ if (IS_ERR(access))
+ return access;
+ access->iova_alignment = PAGE_SIZE;
+
+ iommufd_object_finalize(ictx, &access->obj);
+ return access;
}
/**
@@ -1107,11 +1189,7 @@ iommufd_access_create(struct iommufd_ctx *ictx,
{
struct iommufd_access *access;
- /*
- * There is no uAPI for the access object, but to keep things symmetric
- * use the object infrastructure anyhow.
- */
- access = iommufd_object_alloc(ictx, access, IOMMUFD_OBJ_ACCESS);
+ access = __iommufd_access_create(ictx);
if (IS_ERR(access))
return access;
@@ -1123,13 +1201,10 @@ iommufd_access_create(struct iommufd_ctx *ictx,
else
access->iova_alignment = 1;
- /* The calling driver is a user until iommufd_access_destroy() */
- refcount_inc(&access->obj.users);
access->ictx = ictx;
iommufd_ctx_get(ictx);
iommufd_object_finalize(ictx, &access->obj);
*id = access->obj.id;
- mutex_init(&access->ioas_lock);
return access;
}
EXPORT_SYMBOL_NS_GPL(iommufd_access_create, "IOMMUFD");
@@ -1174,6 +1249,22 @@ int iommufd_access_attach(struct iommufd_access *access, u32 ioas_id)
}
EXPORT_SYMBOL_NS_GPL(iommufd_access_attach, "IOMMUFD");
+int iommufd_access_attach_internal(struct iommufd_access *access,
+ struct iommufd_ioas *ioas)
+{
+ int rc;
+
+ mutex_lock(&access->ioas_lock);
+ if (WARN_ON(access->ioas)) {
+ mutex_unlock(&access->ioas_lock);
+ return -EINVAL;
+ }
+
+ rc = iommufd_access_change_ioas(access, ioas);
+ mutex_unlock(&access->ioas_lock);
+ return rc;
+}
+
int iommufd_access_replace(struct iommufd_access *access, u32 ioas_id)
{
int rc;
@@ -1215,7 +1306,8 @@ void iommufd_access_notify_unmap(struct io_pagetable *iopt, unsigned long iova,
xa_lock(&ioas->iopt.access_list);
xa_for_each(&ioas->iopt.access_list, index, access) {
- if (!iommufd_lock_obj(&access->obj))
+ if (!iommufd_lock_obj(&access->obj) ||
+ iommufd_access_is_internal(access))
continue;
xa_unlock(&ioas->iopt.access_list);
@@ -1239,6 +1331,7 @@ void iommufd_access_notify_unmap(struct io_pagetable *iopt, unsigned long iova,
void iommufd_access_unpin_pages(struct iommufd_access *access,
unsigned long iova, unsigned long length)
{
+ bool internal = iommufd_access_is_internal(access);
struct iopt_area_contig_iter iter;
struct io_pagetable *iopt;
unsigned long last_iova;
@@ -1265,7 +1358,8 @@ void iommufd_access_unpin_pages(struct iommufd_access *access,
area, iopt_area_iova_to_index(area, iter.cur_iova),
iopt_area_iova_to_index(
area,
- min(last_iova, iopt_area_last_iova(area))));
+ min(last_iova, iopt_area_last_iova(area))),
+ internal);
WARN_ON(!iopt_area_contig_done(&iter));
up_read(&iopt->iova_rwsem);
mutex_unlock(&access->ioas_lock);
@@ -1314,6 +1408,7 @@ int iommufd_access_pin_pages(struct iommufd_access *access, unsigned long iova,
unsigned long length, struct page **out_pages,
unsigned int flags)
{
+ bool internal = iommufd_access_is_internal(access);
struct iopt_area_contig_iter iter;
struct io_pagetable *iopt;
unsigned long last_iova;
@@ -1322,7 +1417,8 @@ int iommufd_access_pin_pages(struct iommufd_access *access, unsigned long iova,
/* Driver's ops don't support pin_pages */
if (IS_ENABLED(CONFIG_IOMMUFD_TEST) &&
- WARN_ON(access->iova_alignment != PAGE_SIZE || !access->ops->unmap))
+ WARN_ON(access->iova_alignment != PAGE_SIZE ||
+ (!internal && !access->ops->unmap)))
return -EINVAL;
if (!length)
@@ -1356,7 +1452,7 @@ int iommufd_access_pin_pages(struct iommufd_access *access, unsigned long iova,
}
rc = iopt_area_add_access(area, index, last_index, out_pages,
- flags);
+ flags, internal);
if (rc)
goto err_remove;
out_pages += last_index - index + 1;
@@ -1379,7 +1475,8 @@ err_remove:
iopt_area_iova_to_index(area, iter.cur_iova),
iopt_area_iova_to_index(
area, min(last_iova,
- iopt_area_last_iova(area))));
+ iopt_area_last_iova(area))),
+ internal);
}
up_read(&iopt->iova_rwsem);
mutex_unlock(&access->ioas_lock);
@@ -1453,6 +1550,7 @@ EXPORT_SYMBOL_NS_GPL(iommufd_access_rw, "IOMMUFD");
int iommufd_get_hw_info(struct iommufd_ucmd *ucmd)
{
+ const u32 SUPPORTED_FLAGS = IOMMU_HW_INFO_FLAG_INPUT_TYPE;
struct iommu_hw_info *cmd = ucmd->cmd;
void __user *user_ptr = u64_to_user_ptr(cmd->data_uptr);
const struct iommu_ops *ops;
@@ -1462,9 +1560,14 @@ int iommufd_get_hw_info(struct iommufd_ucmd *ucmd)
void *data;
int rc;
- if (cmd->flags || cmd->__reserved[0] || cmd->__reserved[1] ||
- cmd->__reserved[2])
+ if (cmd->flags & ~SUPPORTED_FLAGS)
return -EOPNOTSUPP;
+ if (cmd->__reserved[0] || cmd->__reserved[1] || cmd->__reserved[2])
+ return -EOPNOTSUPP;
+
+ /* Clear the type field since drivers don't support a random input */
+ if (!(cmd->flags & IOMMU_HW_INFO_FLAG_INPUT_TYPE))
+ cmd->in_data_type = IOMMU_HW_INFO_TYPE_DEFAULT;
idev = iommufd_get_device(ucmd, cmd->dev_id);
if (IS_ERR(idev))
@@ -1484,7 +1587,7 @@ int iommufd_get_hw_info(struct iommufd_ucmd *ucmd)
*/
if (WARN_ON_ONCE(cmd->out_data_type ==
IOMMU_HW_INFO_TYPE_NONE)) {
- rc = -ENODEV;
+ rc = -EOPNOTSUPP;
goto out_free;
}
} else {
diff --git a/drivers/iommu/iommufd/driver.c b/drivers/iommu/iommufd/driver.c
index 922cd1fe7ec2..6f1010da221c 100644
--- a/drivers/iommu/iommufd/driver.c
+++ b/drivers/iommu/iommufd/driver.c
@@ -3,38 +3,91 @@
*/
#include "iommufd_private.h"
-struct iommufd_object *_iommufd_object_alloc(struct iommufd_ctx *ictx,
- size_t size,
- enum iommufd_object_type type)
+/* Driver should use a per-structure helper in include/linux/iommufd.h */
+int _iommufd_object_depend(struct iommufd_object *obj_dependent,
+ struct iommufd_object *obj_depended)
{
- struct iommufd_object *obj;
+ /* Reject self dependency that dead locks */
+ if (obj_dependent == obj_depended)
+ return -EINVAL;
+ /* Only support dependency between two objects of the same type */
+ if (obj_dependent->type != obj_depended->type)
+ return -EINVAL;
+
+ refcount_inc(&obj_depended->users);
+ return 0;
+}
+EXPORT_SYMBOL_NS_GPL(_iommufd_object_depend, "IOMMUFD");
+
+/* Driver should use a per-structure helper in include/linux/iommufd.h */
+void _iommufd_object_undepend(struct iommufd_object *obj_dependent,
+ struct iommufd_object *obj_depended)
+{
+ if (WARN_ON_ONCE(obj_dependent == obj_depended ||
+ obj_dependent->type != obj_depended->type))
+ return;
+
+ refcount_dec(&obj_depended->users);
+}
+EXPORT_SYMBOL_NS_GPL(_iommufd_object_undepend, "IOMMUFD");
+
+/*
+ * Allocate an @offset to return to user space to use for an mmap() syscall
+ *
+ * Driver should use a per-structure helper in include/linux/iommufd.h
+ */
+int _iommufd_alloc_mmap(struct iommufd_ctx *ictx, struct iommufd_object *owner,
+ phys_addr_t mmio_addr, size_t length,
+ unsigned long *offset)
+{
+ struct iommufd_mmap *immap;
+ unsigned long startp;
int rc;
- obj = kzalloc(size, GFP_KERNEL_ACCOUNT);
- if (!obj)
- return ERR_PTR(-ENOMEM);
- obj->type = type;
- /* Starts out bias'd by 1 until it is removed from the xarray */
- refcount_set(&obj->shortterm_users, 1);
- refcount_set(&obj->users, 1);
+ if (!PAGE_ALIGNED(mmio_addr))
+ return -EINVAL;
+ if (!length || !PAGE_ALIGNED(length))
+ return -EINVAL;
- /*
- * Reserve an ID in the xarray but do not publish the pointer yet since
- * the caller hasn't initialized it yet. Once the pointer is published
- * in the xarray and visible to other threads we can't reliably destroy
- * it anymore, so the caller must complete all errorable operations
- * before calling iommufd_object_finalize().
- */
- rc = xa_alloc(&ictx->objects, &obj->id, XA_ZERO_ENTRY, xa_limit_31b,
- GFP_KERNEL_ACCOUNT);
- if (rc)
- goto out_free;
- return obj;
-out_free:
- kfree(obj);
- return ERR_PTR(rc);
+ immap = kzalloc(sizeof(*immap), GFP_KERNEL);
+ if (!immap)
+ return -ENOMEM;
+ immap->owner = owner;
+ immap->length = length;
+ immap->mmio_addr = mmio_addr;
+
+ /* Skip the first page to ease caller identifying the returned offset */
+ rc = mtree_alloc_range(&ictx->mt_mmap, &startp, immap, immap->length,
+ PAGE_SIZE, ULONG_MAX, GFP_KERNEL);
+ if (rc < 0) {
+ kfree(immap);
+ return rc;
+ }
+
+ /* mmap() syscall will right-shift the offset in vma->vm_pgoff too */
+ immap->vm_pgoff = startp >> PAGE_SHIFT;
+ *offset = startp;
+ return 0;
+}
+EXPORT_SYMBOL_NS_GPL(_iommufd_alloc_mmap, "IOMMUFD");
+
+/* Driver should use a per-structure helper in include/linux/iommufd.h */
+void _iommufd_destroy_mmap(struct iommufd_ctx *ictx,
+ struct iommufd_object *owner, unsigned long offset)
+{
+ struct iommufd_mmap *immap;
+
+ immap = mtree_erase(&ictx->mt_mmap, offset);
+ WARN_ON_ONCE(!immap || immap->owner != owner);
+ kfree(immap);
+}
+EXPORT_SYMBOL_NS_GPL(_iommufd_destroy_mmap, "IOMMUFD");
+
+struct device *iommufd_vdevice_to_device(struct iommufd_vdevice *vdev)
+{
+ return vdev->idev->dev;
}
-EXPORT_SYMBOL_NS_GPL(_iommufd_object_alloc, "IOMMUFD");
+EXPORT_SYMBOL_NS_GPL(iommufd_vdevice_to_device, "IOMMUFD");
/* Caller should xa_lock(&viommu->vdevs) to protect the return value */
struct device *iommufd_viommu_find_dev(struct iommufd_viommu *viommu,
@@ -45,7 +98,7 @@ struct device *iommufd_viommu_find_dev(struct iommufd_viommu *viommu,
lockdep_assert_held(&viommu->vdevs.xa_lock);
vdev = xa_load(&viommu->vdevs, vdev_id);
- return vdev ? vdev->dev : NULL;
+ return vdev ? iommufd_vdevice_to_device(vdev) : NULL;
}
EXPORT_SYMBOL_NS_GPL(iommufd_viommu_find_dev, "IOMMUFD");
@@ -62,8 +115,8 @@ int iommufd_viommu_get_vdev_id(struct iommufd_viommu *viommu,
xa_lock(&viommu->vdevs);
xa_for_each(&viommu->vdevs, index, vdev) {
- if (vdev->dev == dev) {
- *vdev_id = vdev->id;
+ if (iommufd_vdevice_to_device(vdev) == dev) {
+ *vdev_id = vdev->virt_id;
rc = 0;
break;
}
diff --git a/drivers/iommu/iommufd/eventq.c b/drivers/iommu/iommufd/eventq.c
index e373b9eec7f5..fc4de63b0bce 100644
--- a/drivers/iommu/iommufd/eventq.c
+++ b/drivers/iommu/iommufd/eventq.c
@@ -427,8 +427,8 @@ int iommufd_fault_alloc(struct iommufd_ucmd *ucmd)
if (cmd->flags)
return -EOPNOTSUPP;
- fault = __iommufd_object_alloc(ucmd->ictx, fault, IOMMUFD_OBJ_FAULT,
- common.obj);
+ fault = __iommufd_object_alloc_ucmd(ucmd, fault, IOMMUFD_OBJ_FAULT,
+ common.obj);
if (IS_ERR(fault))
return PTR_ERR(fault);
@@ -437,10 +437,8 @@ int iommufd_fault_alloc(struct iommufd_ucmd *ucmd)
fdno = iommufd_eventq_init(&fault->common, "[iommufd-pgfault]",
ucmd->ictx, &iommufd_fault_fops);
- if (fdno < 0) {
- rc = fdno;
- goto out_abort;
- }
+ if (fdno < 0)
+ return fdno;
cmd->out_fault_id = fault->common.obj.id;
cmd->out_fault_fd = fdno;
@@ -448,7 +446,6 @@ int iommufd_fault_alloc(struct iommufd_ucmd *ucmd)
rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
if (rc)
goto out_put_fdno;
- iommufd_object_finalize(ucmd->ictx, &fault->common.obj);
fd_install(fdno, fault->common.filep);
@@ -456,9 +453,6 @@ int iommufd_fault_alloc(struct iommufd_ucmd *ucmd)
out_put_fdno:
put_unused_fd(fdno);
fput(fault->common.filep);
-out_abort:
- iommufd_object_abort_and_destroy(ucmd->ictx, &fault->common.obj);
-
return rc;
}
diff --git a/drivers/iommu/iommufd/hw_pagetable.c b/drivers/iommu/iommufd/hw_pagetable.c
index 487779470261..fe789c2dc0c9 100644
--- a/drivers/iommu/iommufd/hw_pagetable.c
+++ b/drivers/iommu/iommufd/hw_pagetable.c
@@ -264,7 +264,7 @@ iommufd_hwpt_nested_alloc(struct iommufd_ctx *ictx,
hwpt->domain->cookie_type = IOMMU_COOKIE_IOMMUFD;
if (WARN_ON_ONCE(hwpt->domain->type != IOMMU_DOMAIN_NESTED)) {
- rc = -EINVAL;
+ rc = -EOPNOTSUPP;
goto out_abort;
}
return hwpt_nested;
@@ -309,10 +309,8 @@ iommufd_viommu_alloc_hwpt_nested(struct iommufd_viommu *viommu, u32 flags,
refcount_inc(&viommu->obj.users);
hwpt_nested->parent = viommu->hwpt;
- hwpt->domain =
- viommu->ops->alloc_domain_nested(viommu,
- flags & ~IOMMU_HWPT_FAULT_ID_VALID,
- user_data);
+ hwpt->domain = viommu->ops->alloc_domain_nested(
+ viommu, flags & ~IOMMU_HWPT_FAULT_ID_VALID, user_data);
if (IS_ERR(hwpt->domain)) {
rc = PTR_ERR(hwpt->domain);
hwpt->domain = NULL;
@@ -323,7 +321,7 @@ iommufd_viommu_alloc_hwpt_nested(struct iommufd_viommu *viommu, u32 flags,
hwpt->domain->cookie_type = IOMMU_COOKIE_IOMMUFD;
if (WARN_ON_ONCE(hwpt->domain->type != IOMMU_DOMAIN_NESTED)) {
- rc = -EINVAL;
+ rc = -EOPNOTSUPP;
goto out_abort;
}
return hwpt_nested;
diff --git a/drivers/iommu/iommufd/io_pagetable.c b/drivers/iommu/iommufd/io_pagetable.c
index 8a790e597e12..c0360c450880 100644
--- a/drivers/iommu/iommufd/io_pagetable.c
+++ b/drivers/iommu/iommufd/io_pagetable.c
@@ -70,36 +70,45 @@ struct iopt_area *iopt_area_contig_next(struct iopt_area_contig_iter *iter)
return iter->area;
}
-static bool __alloc_iova_check_hole(struct interval_tree_double_span_iter *span,
- unsigned long length,
- unsigned long iova_alignment,
- unsigned long page_offset)
+static bool __alloc_iova_check_range(unsigned long *start, unsigned long last,
+ unsigned long length,
+ unsigned long iova_alignment,
+ unsigned long page_offset)
{
- if (span->is_used || span->last_hole - span->start_hole < length - 1)
+ unsigned long aligned_start;
+
+ /* ALIGN_UP() */
+ if (check_add_overflow(*start, iova_alignment - 1, &aligned_start))
return false;
+ aligned_start &= ~(iova_alignment - 1);
+ aligned_start |= page_offset;
- span->start_hole = ALIGN(span->start_hole, iova_alignment) |
- page_offset;
- if (span->start_hole > span->last_hole ||
- span->last_hole - span->start_hole < length - 1)
+ if (aligned_start >= last || last - aligned_start < length - 1)
return false;
+ *start = aligned_start;
return true;
}
-static bool __alloc_iova_check_used(struct interval_tree_span_iter *span,
+static bool __alloc_iova_check_hole(struct interval_tree_double_span_iter *span,
unsigned long length,
unsigned long iova_alignment,
unsigned long page_offset)
{
- if (span->is_hole || span->last_used - span->start_used < length - 1)
+ if (span->is_used)
return false;
+ return __alloc_iova_check_range(&span->start_hole, span->last_hole,
+ length, iova_alignment, page_offset);
+}
- span->start_used = ALIGN(span->start_used, iova_alignment) |
- page_offset;
- if (span->start_used > span->last_used ||
- span->last_used - span->start_used < length - 1)
+static bool __alloc_iova_check_used(struct interval_tree_span_iter *span,
+ unsigned long length,
+ unsigned long iova_alignment,
+ unsigned long page_offset)
+{
+ if (span->is_hole)
return false;
- return true;
+ return __alloc_iova_check_range(&span->start_used, span->last_used,
+ length, iova_alignment, page_offset);
}
/*
@@ -719,6 +728,12 @@ again:
goto out_unlock_iova;
}
+ /* The area is locked by an object that has not been destroyed */
+ if (area->num_locks) {
+ rc = -EBUSY;
+ goto out_unlock_iova;
+ }
+
if (area_first < start || area_last > last) {
rc = -ENOENT;
goto out_unlock_iova;
@@ -743,8 +758,10 @@ again:
iommufd_access_notify_unmap(iopt, area_first, length);
/* Something is not responding to unmap requests. */
tries++;
- if (WARN_ON(tries > 100))
- return -EDEADLOCK;
+ if (WARN_ON(tries > 100)) {
+ rc = -EDEADLOCK;
+ goto out_unmapped;
+ }
goto again;
}
@@ -766,6 +783,7 @@ again:
out_unlock_iova:
up_write(&iopt->iova_rwsem);
up_read(&iopt->domains_rwsem);
+out_unmapped:
if (unmapped)
*unmapped = unmapped_bytes;
return rc;
@@ -1410,8 +1428,7 @@ out_unlock:
}
void iopt_remove_access(struct io_pagetable *iopt,
- struct iommufd_access *access,
- u32 iopt_access_list_id)
+ struct iommufd_access *access, u32 iopt_access_list_id)
{
down_write(&iopt->domains_rwsem);
down_write(&iopt->iova_rwsem);
diff --git a/drivers/iommu/iommufd/io_pagetable.h b/drivers/iommu/iommufd/io_pagetable.h
index 10c928a9a463..b6064f4ce4af 100644
--- a/drivers/iommu/iommufd/io_pagetable.h
+++ b/drivers/iommu/iommufd/io_pagetable.h
@@ -48,6 +48,7 @@ struct iopt_area {
int iommu_prot;
bool prevent_access : 1;
unsigned int num_accesses;
+ unsigned int num_locks;
};
struct iopt_allowed {
@@ -238,9 +239,9 @@ void iopt_pages_unfill_xarray(struct iopt_pages *pages, unsigned long start,
int iopt_area_add_access(struct iopt_area *area, unsigned long start,
unsigned long last, struct page **out_pages,
- unsigned int flags);
+ unsigned int flags, bool lock_area);
void iopt_area_remove_access(struct iopt_area *area, unsigned long start,
- unsigned long last);
+ unsigned long last, bool unlock_area);
int iopt_pages_rw_access(struct iopt_pages *pages, unsigned long start_byte,
void *data, unsigned long length, unsigned int flags);
diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h
index 9ccc83341f32..0da2a81eedfa 100644
--- a/drivers/iommu/iommufd/iommufd_private.h
+++ b/drivers/iommu/iommufd/iommufd_private.h
@@ -7,6 +7,7 @@
#include <linux/iommu.h>
#include <linux/iommufd.h>
#include <linux/iova_bitmap.h>
+#include <linux/maple_tree.h>
#include <linux/rwsem.h>
#include <linux/uaccess.h>
#include <linux/xarray.h>
@@ -44,6 +45,7 @@ struct iommufd_ctx {
struct xarray groups;
wait_queue_head_t destroy_wait;
struct rw_semaphore ioas_creation_lock;
+ struct maple_tree mt_mmap;
struct mutex sw_msi_lock;
struct list_head sw_msi_list;
@@ -55,6 +57,18 @@ struct iommufd_ctx {
struct iommufd_ioas *vfio_ioas;
};
+/* Entry for iommufd_ctx::mt_mmap */
+struct iommufd_mmap {
+ struct iommufd_object *owner;
+
+ /* Page-shifted start position in mt_mmap to validate vma->vm_pgoff */
+ unsigned long vm_pgoff;
+
+ /* Physical range for io_remap_pfn_range() */
+ phys_addr_t mmio_addr;
+ size_t length;
+};
+
/*
* The IOVA to PFN map. The map automatically copies the PFNs into multiple
* domains and permits sharing of PFNs between io_pagetable instances. This
@@ -135,6 +149,7 @@ struct iommufd_ucmd {
void __user *ubuffer;
u32 user_size;
void *cmd;
+ struct iommufd_object *new_obj;
};
int iommufd_vfio_ioctl(struct iommufd_ctx *ictx, unsigned int cmd,
@@ -154,7 +169,7 @@ static inline bool iommufd_lock_obj(struct iommufd_object *obj)
{
if (!refcount_inc_not_zero(&obj->users))
return false;
- if (!refcount_inc_not_zero(&obj->shortterm_users)) {
+ if (!refcount_inc_not_zero(&obj->wait_cnt)) {
/*
* If the caller doesn't already have a ref on obj this must be
* called under the xa_lock. Otherwise the caller is holding a
@@ -172,11 +187,11 @@ static inline void iommufd_put_object(struct iommufd_ctx *ictx,
struct iommufd_object *obj)
{
/*
- * Users first, then shortterm so that REMOVE_WAIT_SHORTTERM never sees
- * a spurious !0 users with a 0 shortterm_users.
+ * Users first, then wait_cnt so that REMOVE_WAIT never sees a spurious
+ * !0 users with a 0 wait_cnt.
*/
refcount_dec(&obj->users);
- if (refcount_dec_and_test(&obj->shortterm_users))
+ if (refcount_dec_and_test(&obj->wait_cnt))
wake_up_interruptible_all(&ictx->destroy_wait);
}
@@ -187,7 +202,8 @@ void iommufd_object_finalize(struct iommufd_ctx *ictx,
struct iommufd_object *obj);
enum {
- REMOVE_WAIT_SHORTTERM = 1,
+ REMOVE_WAIT = BIT(0),
+ REMOVE_OBJ_TOMBSTONE = BIT(1),
};
int iommufd_object_remove(struct iommufd_ctx *ictx,
struct iommufd_object *to_destroy, u32 id,
@@ -195,15 +211,35 @@ int iommufd_object_remove(struct iommufd_ctx *ictx,
/*
* The caller holds a users refcount and wants to destroy the object. At this
- * point the caller has no shortterm_users reference and at least the xarray
- * will be holding one.
+ * point the caller has no wait_cnt reference and at least the xarray will be
+ * holding one.
*/
static inline void iommufd_object_destroy_user(struct iommufd_ctx *ictx,
struct iommufd_object *obj)
{
int ret;
- ret = iommufd_object_remove(ictx, obj, obj->id, REMOVE_WAIT_SHORTTERM);
+ ret = iommufd_object_remove(ictx, obj, obj->id, REMOVE_WAIT);
+
+ /*
+ * If there is a bug and we couldn't destroy the object then we did put
+ * back the caller's users refcount and will eventually try to free it
+ * again during close.
+ */
+ WARN_ON(ret);
+}
+
+/*
+ * Similar to iommufd_object_destroy_user(), except that the object ID is left
+ * reserved/tombstoned.
+ */
+static inline void iommufd_object_tombstone_user(struct iommufd_ctx *ictx,
+ struct iommufd_object *obj)
+{
+ int ret;
+
+ ret = iommufd_object_remove(ictx, obj, obj->id,
+ REMOVE_WAIT | REMOVE_OBJ_TOMBSTONE);
/*
* If there is a bug and we couldn't destroy the object then we did put
@@ -230,6 +266,15 @@ iommufd_object_put_and_try_destroy(struct iommufd_ctx *ictx,
iommufd_object_remove(ictx, obj, obj->id, 0);
}
+/*
+ * Callers of these normal object allocators must call iommufd_object_finalize()
+ * to finalize the object, or call iommufd_object_abort_and_destroy() to revert
+ * the allocation.
+ */
+struct iommufd_object *_iommufd_object_alloc(struct iommufd_ctx *ictx,
+ size_t size,
+ enum iommufd_object_type type);
+
#define __iommufd_object_alloc(ictx, ptr, type, obj) \
container_of(_iommufd_object_alloc( \
ictx, \
@@ -243,6 +288,26 @@ iommufd_object_put_and_try_destroy(struct iommufd_ctx *ictx,
__iommufd_object_alloc(ictx, ptr, type, obj)
/*
+ * Callers of these _ucmd allocators should not call iommufd_object_finalize()
+ * or iommufd_object_abort_and_destroy(), as the core automatically does that.
+ */
+struct iommufd_object *
+_iommufd_object_alloc_ucmd(struct iommufd_ucmd *ucmd, size_t size,
+ enum iommufd_object_type type);
+
+#define __iommufd_object_alloc_ucmd(ucmd, ptr, type, obj) \
+ container_of(_iommufd_object_alloc_ucmd( \
+ ucmd, \
+ sizeof(*(ptr)) + BUILD_BUG_ON_ZERO( \
+ offsetof(typeof(*(ptr)), \
+ obj) != 0), \
+ type), \
+ typeof(*(ptr)), obj)
+
+#define iommufd_object_alloc_ucmd(ucmd, ptr, type) \
+ __iommufd_object_alloc_ucmd(ucmd, ptr, type, obj)
+
+/*
* The IO Address Space (IOAS) pagetable is a virtual page table backed by the
* io_pagetable object. It is a user controlled mapping of IOVA -> PFNs. The
* mapping is copied into all of the associated domains and made available to
@@ -266,8 +331,7 @@ struct iommufd_ioas {
static inline struct iommufd_ioas *iommufd_get_ioas(struct iommufd_ctx *ictx,
u32 id)
{
- return container_of(iommufd_get_object(ictx, id,
- IOMMUFD_OBJ_IOAS),
+ return container_of(iommufd_get_object(ictx, id, IOMMUFD_OBJ_IOAS),
struct iommufd_ioas, obj);
}
@@ -425,6 +489,8 @@ struct iommufd_device {
/* always the physical device */
struct device *dev;
bool enforce_cache_coherency;
+ struct iommufd_vdevice *vdev;
+ bool destroying;
};
static inline struct iommufd_device *
@@ -435,6 +501,7 @@ iommufd_get_device(struct iommufd_ucmd *ucmd, u32 id)
struct iommufd_device, obj);
}
+void iommufd_device_pre_destroy(struct iommufd_object *obj);
void iommufd_device_destroy(struct iommufd_object *obj);
int iommufd_get_hw_info(struct iommufd_ucmd *ucmd);
@@ -452,10 +519,32 @@ struct iommufd_access {
int iopt_add_access(struct io_pagetable *iopt, struct iommufd_access *access);
void iopt_remove_access(struct io_pagetable *iopt,
- struct iommufd_access *access,
- u32 iopt_access_list_id);
+ struct iommufd_access *access, u32 iopt_access_list_id);
void iommufd_access_destroy_object(struct iommufd_object *obj);
+/* iommufd_access for internal use */
+static inline bool iommufd_access_is_internal(struct iommufd_access *access)
+{
+ return !access->ictx;
+}
+
+struct iommufd_access *iommufd_access_create_internal(struct iommufd_ctx *ictx);
+
+static inline void
+iommufd_access_destroy_internal(struct iommufd_ctx *ictx,
+ struct iommufd_access *access)
+{
+ iommufd_object_destroy_user(ictx, &access->obj);
+}
+
+int iommufd_access_attach_internal(struct iommufd_access *access,
+ struct iommufd_ioas *ioas);
+
+static inline void iommufd_access_detach_internal(struct iommufd_access *access)
+{
+ iommufd_access_detach(access);
+}
+
struct iommufd_eventq {
struct iommufd_object obj;
struct iommufd_ctx *ictx;
@@ -528,7 +617,7 @@ struct iommufd_veventq {
struct list_head node; /* for iommufd_viommu::veventqs */
struct iommufd_vevent lost_events_header;
- unsigned int type;
+ enum iommu_veventq_type type;
unsigned int depth;
/* Use common.lock for protection */
@@ -583,7 +672,8 @@ iommufd_get_viommu(struct iommufd_ucmd *ucmd, u32 id)
}
static inline struct iommufd_veventq *
-iommufd_viommu_find_veventq(struct iommufd_viommu *viommu, u32 type)
+iommufd_viommu_find_veventq(struct iommufd_viommu *viommu,
+ enum iommu_veventq_type type)
{
struct iommufd_veventq *veventq, *next;
@@ -600,14 +690,17 @@ int iommufd_viommu_alloc_ioctl(struct iommufd_ucmd *ucmd);
void iommufd_viommu_destroy(struct iommufd_object *obj);
int iommufd_vdevice_alloc_ioctl(struct iommufd_ucmd *ucmd);
void iommufd_vdevice_destroy(struct iommufd_object *obj);
+void iommufd_vdevice_abort(struct iommufd_object *obj);
+int iommufd_hw_queue_alloc_ioctl(struct iommufd_ucmd *ucmd);
+void iommufd_hw_queue_destroy(struct iommufd_object *obj);
-struct iommufd_vdevice {
- struct iommufd_object obj;
- struct iommufd_ctx *ictx;
- struct iommufd_viommu *viommu;
- struct device *dev;
- u64 id; /* per-vIOMMU virtual ID */
-};
+static inline struct iommufd_vdevice *
+iommufd_get_vdevice(struct iommufd_ctx *ictx, u32 id)
+{
+ return container_of(iommufd_get_object(ictx, id,
+ IOMMUFD_OBJ_VDEVICE),
+ struct iommufd_vdevice, obj);
+}
#ifdef CONFIG_IOMMUFD_TEST
int iommufd_test(struct iommufd_ucmd *ucmd);
diff --git a/drivers/iommu/iommufd/iommufd_test.h b/drivers/iommu/iommufd/iommufd_test.h
index 1cd7e8394129..8fc618b2bcf9 100644
--- a/drivers/iommu/iommufd/iommufd_test.h
+++ b/drivers/iommu/iommufd/iommufd_test.h
@@ -227,6 +227,23 @@ struct iommu_hwpt_invalidate_selftest {
#define IOMMU_VIOMMU_TYPE_SELFTEST 0xdeadbeef
+/**
+ * struct iommu_viommu_selftest - vIOMMU data for Mock driver
+ * (IOMMU_VIOMMU_TYPE_SELFTEST)
+ * @in_data: Input random data from user space
+ * @out_data: Output data (matching @in_data) to user space
+ * @out_mmap_offset: The offset argument for mmap syscall
+ * @out_mmap_length: The length argument for mmap syscall
+ *
+ * Simply set @out_data=@in_data for a loopback test
+ */
+struct iommu_viommu_selftest {
+ __u32 in_data;
+ __u32 out_data;
+ __aligned_u64 out_mmap_offset;
+ __aligned_u64 out_mmap_length;
+};
+
/* Should not be equal to any defined value in enum iommu_viommu_invalidate_data_type */
#define IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST 0xdeadbeef
#define IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST_INVALID 0xdadbeef
@@ -252,4 +269,7 @@ struct iommu_viommu_event_selftest {
__u32 virt_id;
};
+#define IOMMU_HW_QUEUE_TYPE_SELFTEST 0xdeadbeef
+#define IOMMU_TEST_HW_QUEUE_MAX 2
+
#endif
diff --git a/drivers/iommu/iommufd/iova_bitmap.c b/drivers/iommu/iommufd/iova_bitmap.c
index 39a86a4a1d3a..4514575818fc 100644
--- a/drivers/iommu/iommufd/iova_bitmap.c
+++ b/drivers/iommu/iommufd/iova_bitmap.c
@@ -407,7 +407,6 @@ void iova_bitmap_set(struct iova_bitmap *bitmap,
update_indexes:
if (unlikely(!iova_bitmap_mapped_range(mapped, iova, length))) {
-
/*
* The attempt to advance the base index to @iova
* may fail if it's out of bounds, or pinning the pages
diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c
index 3df468f64e7d..15af7ced0501 100644
--- a/drivers/iommu/iommufd/main.c
+++ b/drivers/iommu/iommufd/main.c
@@ -23,12 +23,72 @@
#include "iommufd_test.h"
struct iommufd_object_ops {
+ void (*pre_destroy)(struct iommufd_object *obj);
void (*destroy)(struct iommufd_object *obj);
void (*abort)(struct iommufd_object *obj);
};
static const struct iommufd_object_ops iommufd_object_ops[];
static struct miscdevice vfio_misc_dev;
+struct iommufd_object *_iommufd_object_alloc(struct iommufd_ctx *ictx,
+ size_t size,
+ enum iommufd_object_type type)
+{
+ struct iommufd_object *obj;
+ int rc;
+
+ obj = kzalloc(size, GFP_KERNEL_ACCOUNT);
+ if (!obj)
+ return ERR_PTR(-ENOMEM);
+ obj->type = type;
+ /* Starts out bias'd by 1 until it is removed from the xarray */
+ refcount_set(&obj->wait_cnt, 1);
+ refcount_set(&obj->users, 1);
+
+ /*
+ * Reserve an ID in the xarray but do not publish the pointer yet since
+ * the caller hasn't initialized it yet. Once the pointer is published
+ * in the xarray and visible to other threads we can't reliably destroy
+ * it anymore, so the caller must complete all errorable operations
+ * before calling iommufd_object_finalize().
+ */
+ rc = xa_alloc(&ictx->objects, &obj->id, XA_ZERO_ENTRY, xa_limit_31b,
+ GFP_KERNEL_ACCOUNT);
+ if (rc)
+ goto out_free;
+ return obj;
+out_free:
+ kfree(obj);
+ return ERR_PTR(rc);
+}
+
+struct iommufd_object *_iommufd_object_alloc_ucmd(struct iommufd_ucmd *ucmd,
+ size_t size,
+ enum iommufd_object_type type)
+{
+ struct iommufd_object *new_obj;
+
+ /* Something is coded wrong if this is hit */
+ if (WARN_ON(ucmd->new_obj))
+ return ERR_PTR(-EBUSY);
+
+ /*
+ * An abort op means that its caller needs to invoke it within a lock in
+ * the caller. So it doesn't work with _iommufd_object_alloc_ucmd() that
+ * will invoke the abort op in iommufd_object_abort_and_destroy(), which
+ * must be outside the caller's lock.
+ */
+ if (WARN_ON(iommufd_object_ops[type].abort))
+ return ERR_PTR(-EOPNOTSUPP);
+
+ new_obj = _iommufd_object_alloc(ucmd->ictx, size, type);
+ if (IS_ERR(new_obj))
+ return new_obj;
+
+ ucmd->new_obj = new_obj;
+ return new_obj;
+}
+
/*
* Allow concurrent access to the object.
*
@@ -95,20 +155,22 @@ struct iommufd_object *iommufd_get_object(struct iommufd_ctx *ictx, u32 id,
return obj;
}
-static int iommufd_object_dec_wait_shortterm(struct iommufd_ctx *ictx,
- struct iommufd_object *to_destroy)
+static int iommufd_object_dec_wait(struct iommufd_ctx *ictx,
+ struct iommufd_object *to_destroy)
{
- if (refcount_dec_and_test(&to_destroy->shortterm_users))
+ if (refcount_dec_and_test(&to_destroy->wait_cnt))
return 0;
+ if (iommufd_object_ops[to_destroy->type].pre_destroy)
+ iommufd_object_ops[to_destroy->type].pre_destroy(to_destroy);
+
if (wait_event_timeout(ictx->destroy_wait,
- refcount_read(&to_destroy->shortterm_users) ==
- 0,
- msecs_to_jiffies(60000)))
+ refcount_read(&to_destroy->wait_cnt) == 0,
+ msecs_to_jiffies(60000)))
return 0;
pr_crit("Time out waiting for iommufd object to become free\n");
- refcount_inc(&to_destroy->shortterm_users);
+ refcount_inc(&to_destroy->wait_cnt);
return -EBUSY;
}
@@ -122,17 +184,18 @@ int iommufd_object_remove(struct iommufd_ctx *ictx,
{
struct iommufd_object *obj;
XA_STATE(xas, &ictx->objects, id);
- bool zerod_shortterm = false;
+ bool zerod_wait_cnt = false;
int ret;
/*
- * The purpose of the shortterm_users is to ensure deterministic
- * destruction of objects used by external drivers and destroyed by this
- * function. Any temporary increment of the refcount must increment
- * shortterm_users, such as during ioctl execution.
+ * The purpose of the wait_cnt is to ensure deterministic destruction
+ * of objects used by external drivers and destroyed by this function.
+ * Incrementing this wait_cnt should either be short lived, such as
+ * during ioctl execution, or be revoked and blocked during
+ * pre_destroy(), such as vdev holding the idev's refcount.
*/
- if (flags & REMOVE_WAIT_SHORTTERM) {
- ret = iommufd_object_dec_wait_shortterm(ictx, to_destroy);
+ if (flags & REMOVE_WAIT) {
+ ret = iommufd_object_dec_wait(ictx, to_destroy);
if (ret) {
/*
* We have a bug. Put back the callers reference and
@@ -141,7 +204,7 @@ int iommufd_object_remove(struct iommufd_ctx *ictx,
refcount_dec(&to_destroy->users);
return ret;
}
- zerod_shortterm = true;
+ zerod_wait_cnt = true;
}
xa_lock(&ictx->objects);
@@ -167,17 +230,17 @@ int iommufd_object_remove(struct iommufd_ctx *ictx,
goto err_xa;
}
- xas_store(&xas, NULL);
+ xas_store(&xas, (flags & REMOVE_OBJ_TOMBSTONE) ? XA_ZERO_ENTRY : NULL);
if (ictx->vfio_ioas == container_of(obj, struct iommufd_ioas, obj))
ictx->vfio_ioas = NULL;
xa_unlock(&ictx->objects);
/*
- * Since users is zero any positive users_shortterm must be racing
+ * Since users is zero any positive wait_cnt must be racing
* iommufd_put_object(), or we have a bug.
*/
- if (!zerod_shortterm) {
- ret = iommufd_object_dec_wait_shortterm(ictx, obj);
+ if (!zerod_wait_cnt) {
+ ret = iommufd_object_dec_wait(ictx, obj);
if (WARN_ON(ret))
return ret;
}
@@ -187,9 +250,9 @@ int iommufd_object_remove(struct iommufd_ctx *ictx,
return 0;
err_xa:
- if (zerod_shortterm) {
+ if (zerod_wait_cnt) {
/* Restore the xarray owned reference */
- refcount_set(&obj->shortterm_users, 1);
+ refcount_set(&obj->wait_cnt, 1);
}
xa_unlock(&ictx->objects);
@@ -226,6 +289,7 @@ static int iommufd_fops_open(struct inode *inode, struct file *filp)
xa_init_flags(&ictx->objects, XA_FLAGS_ALLOC1 | XA_FLAGS_ACCOUNT);
xa_init(&ictx->groups);
ictx->file = filp;
+ mt_init_flags(&ictx->mt_mmap, MT_FLAGS_ALLOC_RANGE);
init_waitqueue_head(&ictx->destroy_wait);
mutex_init(&ictx->sw_msi_lock);
INIT_LIST_HEAD(&ictx->sw_msi_list);
@@ -252,19 +316,41 @@ static int iommufd_fops_release(struct inode *inode, struct file *filp)
while (!xa_empty(&ictx->objects)) {
unsigned int destroyed = 0;
unsigned long index;
+ bool empty = true;
+ /*
+ * We can't use xa_empty() to end the loop as the tombstones
+ * are stored as XA_ZERO_ENTRY in the xarray. However
+ * xa_for_each() automatically converts them to NULL and skips
+ * them causing xa_empty() to be kept false. Thus once
+ * xa_for_each() finds no further !NULL entries the loop is
+ * done.
+ */
xa_for_each(&ictx->objects, index, obj) {
+ empty = false;
if (!refcount_dec_if_one(&obj->users))
continue;
+
destroyed++;
xa_erase(&ictx->objects, index);
iommufd_object_ops[obj->type].destroy(obj);
kfree(obj);
}
+
+ if (empty)
+ break;
+
/* Bug related to users refcount */
if (WARN_ON(!destroyed))
break;
}
+
+ /*
+ * There may be some tombstones left over from
+ * iommufd_object_tombstone_user()
+ */
+ xa_destroy(&ictx->objects);
+
WARN_ON(!xa_empty(&ictx->groups));
mutex_destroy(&ictx->sw_msi_lock);
@@ -305,6 +391,7 @@ union ucmd_buffer {
struct iommu_destroy destroy;
struct iommu_fault_alloc fault;
struct iommu_hw_info info;
+ struct iommu_hw_queue_alloc hw_queue;
struct iommu_hwpt_alloc hwpt;
struct iommu_hwpt_get_dirty_bitmap get_dirty_bitmap;
struct iommu_hwpt_invalidate cache;
@@ -347,6 +434,8 @@ static const struct iommufd_ioctl_op iommufd_ioctl_ops[] = {
struct iommu_fault_alloc, out_fault_fd),
IOCTL_OP(IOMMU_GET_HW_INFO, iommufd_get_hw_info, struct iommu_hw_info,
__reserved),
+ IOCTL_OP(IOMMU_HW_QUEUE_ALLOC, iommufd_hw_queue_alloc_ioctl,
+ struct iommu_hw_queue_alloc, length),
IOCTL_OP(IOMMU_HWPT_ALLOC, iommufd_hwpt_alloc, struct iommu_hwpt_alloc,
__reserved),
IOCTL_OP(IOMMU_HWPT_GET_DIRTY_BITMAP, iommufd_hwpt_get_dirty_bitmap,
@@ -417,14 +506,83 @@ static long iommufd_fops_ioctl(struct file *filp, unsigned int cmd,
if (ret)
return ret;
ret = op->execute(&ucmd);
+
+ if (ucmd.new_obj) {
+ if (ret)
+ iommufd_object_abort_and_destroy(ictx, ucmd.new_obj);
+ else
+ iommufd_object_finalize(ictx, ucmd.new_obj);
+ }
return ret;
}
+static void iommufd_fops_vma_open(struct vm_area_struct *vma)
+{
+ struct iommufd_mmap *immap = vma->vm_private_data;
+
+ refcount_inc(&immap->owner->users);
+}
+
+static void iommufd_fops_vma_close(struct vm_area_struct *vma)
+{
+ struct iommufd_mmap *immap = vma->vm_private_data;
+
+ refcount_dec(&immap->owner->users);
+}
+
+static const struct vm_operations_struct iommufd_vma_ops = {
+ .open = iommufd_fops_vma_open,
+ .close = iommufd_fops_vma_close,
+};
+
+/* The vm_pgoff must be pre-allocated from mt_mmap, and given to user space */
+static int iommufd_fops_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+ struct iommufd_ctx *ictx = filp->private_data;
+ size_t length = vma->vm_end - vma->vm_start;
+ struct iommufd_mmap *immap;
+ int rc;
+
+ if (!PAGE_ALIGNED(length))
+ return -EINVAL;
+ if (!(vma->vm_flags & VM_SHARED))
+ return -EINVAL;
+ if (vma->vm_flags & VM_EXEC)
+ return -EPERM;
+
+ /* vma->vm_pgoff carries a page-shifted start position to an immap */
+ immap = mtree_load(&ictx->mt_mmap, vma->vm_pgoff << PAGE_SHIFT);
+ if (!immap)
+ return -ENXIO;
+ /*
+ * mtree_load() returns the immap for any contained mmio_addr, so only
+ * allow the exact immap thing to be mapped
+ */
+ if (vma->vm_pgoff != immap->vm_pgoff || length != immap->length)
+ return -ENXIO;
+
+ vma->vm_pgoff = 0;
+ vma->vm_private_data = immap;
+ vma->vm_ops = &iommufd_vma_ops;
+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+ rc = io_remap_pfn_range(vma, vma->vm_start,
+ immap->mmio_addr >> PAGE_SHIFT, length,
+ vma->vm_page_prot);
+ if (rc)
+ return rc;
+
+ /* vm_ops.open won't be called for mmap itself. */
+ refcount_inc(&immap->owner->users);
+ return rc;
+}
+
static const struct file_operations iommufd_fops = {
.owner = THIS_MODULE,
.open = iommufd_fops_open,
.release = iommufd_fops_release,
.unlocked_ioctl = iommufd_fops_ioctl,
+ .mmap = iommufd_fops_mmap,
};
/**
@@ -498,11 +656,15 @@ static const struct iommufd_object_ops iommufd_object_ops[] = {
.destroy = iommufd_access_destroy_object,
},
[IOMMUFD_OBJ_DEVICE] = {
+ .pre_destroy = iommufd_device_pre_destroy,
.destroy = iommufd_device_destroy,
},
[IOMMUFD_OBJ_FAULT] = {
.destroy = iommufd_fault_destroy,
},
+ [IOMMUFD_OBJ_HW_QUEUE] = {
+ .destroy = iommufd_hw_queue_destroy,
+ },
[IOMMUFD_OBJ_HWPT_PAGING] = {
.destroy = iommufd_hwpt_paging_destroy,
.abort = iommufd_hwpt_paging_abort,
@@ -516,6 +678,7 @@ static const struct iommufd_object_ops iommufd_object_ops[] = {
},
[IOMMUFD_OBJ_VDEVICE] = {
.destroy = iommufd_vdevice_destroy,
+ .abort = iommufd_vdevice_abort,
},
[IOMMUFD_OBJ_VEVENTQ] = {
.destroy = iommufd_veventq_destroy,
@@ -539,7 +702,6 @@ static struct miscdevice iommu_misc_dev = {
.mode = 0660,
};
-
static struct miscdevice vfio_misc_dev = {
.minor = VFIO_MINOR,
.name = "vfio",
diff --git a/drivers/iommu/iommufd/pages.c b/drivers/iommu/iommufd/pages.c
index 3427749bc5ce..c3433b845561 100644
--- a/drivers/iommu/iommufd/pages.c
+++ b/drivers/iommu/iommufd/pages.c
@@ -1287,8 +1287,7 @@ static int pfn_reader_first(struct pfn_reader *pfns, struct iopt_pages *pages,
}
static struct iopt_pages *iopt_alloc_pages(unsigned long start_byte,
- unsigned long length,
- bool writable)
+ unsigned long length, bool writable)
{
struct iopt_pages *pages;
@@ -1328,7 +1327,7 @@ struct iopt_pages *iopt_alloc_user_pages(void __user *uptr,
struct iopt_pages *pages;
unsigned long end;
void __user *uptr_down =
- (void __user *) ALIGN_DOWN((uintptr_t)uptr, PAGE_SIZE);
+ (void __user *)ALIGN_DOWN((uintptr_t)uptr, PAGE_SIZE);
if (check_add_overflow((unsigned long)uptr, length, &end))
return ERR_PTR(-EOVERFLOW);
@@ -2104,6 +2103,7 @@ iopt_pages_get_exact_access(struct iopt_pages *pages, unsigned long index,
* @last_index: Inclusive last page index
* @out_pages: Output list of struct page's representing the PFNs
* @flags: IOMMUFD_ACCESS_RW_* flags
+ * @lock_area: Fail userspace munmap on this area
*
* Record that an in-kernel access will be accessing the pages, ensure they are
* pinned, and return the PFNs as a simple list of 'struct page *'.
@@ -2111,8 +2111,8 @@ iopt_pages_get_exact_access(struct iopt_pages *pages, unsigned long index,
* This should be undone through a matching call to iopt_area_remove_access()
*/
int iopt_area_add_access(struct iopt_area *area, unsigned long start_index,
- unsigned long last_index, struct page **out_pages,
- unsigned int flags)
+ unsigned long last_index, struct page **out_pages,
+ unsigned int flags, bool lock_area)
{
struct iopt_pages *pages = area->pages;
struct iopt_pages_access *access;
@@ -2125,6 +2125,8 @@ int iopt_area_add_access(struct iopt_area *area, unsigned long start_index,
access = iopt_pages_get_exact_access(pages, start_index, last_index);
if (access) {
area->num_accesses++;
+ if (lock_area)
+ area->num_locks++;
access->users++;
iopt_pages_fill_from_xarray(pages, start_index, last_index,
out_pages);
@@ -2146,6 +2148,8 @@ int iopt_area_add_access(struct iopt_area *area, unsigned long start_index,
access->node.last = last_index;
access->users = 1;
area->num_accesses++;
+ if (lock_area)
+ area->num_locks++;
interval_tree_insert(&access->node, &pages->access_itree);
mutex_unlock(&pages->mutex);
return 0;
@@ -2162,12 +2166,13 @@ err_unlock:
* @area: The source of PFNs
* @start_index: First page index
* @last_index: Inclusive last page index
+ * @unlock_area: Must match the matching iopt_area_add_access()'s lock_area
*
* Undo iopt_area_add_access() and unpin the pages if necessary. The caller
* must stop using the PFNs before calling this.
*/
void iopt_area_remove_access(struct iopt_area *area, unsigned long start_index,
- unsigned long last_index)
+ unsigned long last_index, bool unlock_area)
{
struct iopt_pages *pages = area->pages;
struct iopt_pages_access *access;
@@ -2178,6 +2183,10 @@ void iopt_area_remove_access(struct iopt_area *area, unsigned long start_index,
goto out_unlock;
WARN_ON(area->num_accesses == 0 || access->users == 0);
+ if (unlock_area) {
+ WARN_ON(area->num_locks == 0);
+ area->num_locks--;
+ }
area->num_accesses--;
access->users--;
if (access->users)
diff --git a/drivers/iommu/iommufd/selftest.c b/drivers/iommu/iommufd/selftest.c
index 6bd0abf9a641..61686603c769 100644
--- a/drivers/iommu/iommufd/selftest.c
+++ b/drivers/iommu/iommufd/selftest.c
@@ -138,7 +138,6 @@ to_mock_domain(struct iommu_domain *domain)
struct mock_iommu_domain_nested {
struct iommu_domain domain;
struct mock_viommu *mock_viommu;
- struct mock_iommu_domain *parent;
u32 iotlb[MOCK_NESTED_DOMAIN_IOTLB_NUM];
};
@@ -151,6 +150,11 @@ to_mock_nested(struct iommu_domain *domain)
struct mock_viommu {
struct iommufd_viommu core;
struct mock_iommu_domain *s2_parent;
+ struct mock_hw_queue *hw_queue[IOMMU_TEST_HW_QUEUE_MAX];
+ struct mutex queue_mutex;
+
+ unsigned long mmap_offset;
+ u32 *page; /* Mmap page to test u32 type of in_data */
};
static inline struct mock_viommu *to_mock_viommu(struct iommufd_viommu *viommu)
@@ -158,6 +162,19 @@ static inline struct mock_viommu *to_mock_viommu(struct iommufd_viommu *viommu)
return container_of(viommu, struct mock_viommu, core);
}
+struct mock_hw_queue {
+ struct iommufd_hw_queue core;
+ struct mock_viommu *mock_viommu;
+ struct mock_hw_queue *prev;
+ u16 index;
+};
+
+static inline struct mock_hw_queue *
+to_mock_hw_queue(struct iommufd_hw_queue *hw_queue)
+{
+ return container_of(hw_queue, struct mock_hw_queue, core);
+}
+
enum selftest_obj_type {
TYPE_IDEV,
};
@@ -288,10 +305,15 @@ static struct iommu_domain mock_blocking_domain = {
.ops = &mock_blocking_ops,
};
-static void *mock_domain_hw_info(struct device *dev, u32 *length, u32 *type)
+static void *mock_domain_hw_info(struct device *dev, u32 *length,
+ enum iommu_hw_info_type *type)
{
struct iommu_test_hw_info *info;
+ if (*type != IOMMU_HW_INFO_TYPE_DEFAULT &&
+ *type != IOMMU_HW_INFO_TYPE_SELFTEST)
+ return ERR_PTR(-EOPNOTSUPP);
+
info = kzalloc(sizeof(*info), GFP_KERNEL);
if (!info)
return ERR_PTR(-ENOMEM);
@@ -434,7 +456,6 @@ mock_domain_alloc_nested(struct device *dev, struct iommu_domain *parent,
mock_nested = __mock_domain_alloc_nested(user_data);
if (IS_ERR(mock_nested))
return ERR_CAST(mock_nested);
- mock_nested->parent = mock_parent;
return &mock_nested->domain;
}
@@ -671,9 +692,15 @@ static void mock_viommu_destroy(struct iommufd_viommu *viommu)
{
struct mock_iommu_device *mock_iommu = container_of(
viommu->iommu_dev, struct mock_iommu_device, iommu_dev);
+ struct mock_viommu *mock_viommu = to_mock_viommu(viommu);
if (refcount_dec_and_test(&mock_iommu->users))
complete(&mock_iommu->complete);
+ if (mock_viommu->mmap_offset)
+ iommufd_viommu_destroy_mmap(&mock_viommu->core,
+ mock_viommu->mmap_offset);
+ free_page((unsigned long)mock_viommu->page);
+ mutex_destroy(&mock_viommu->queue_mutex);
/* iommufd core frees mock_viommu and viommu */
}
@@ -692,7 +719,6 @@ mock_viommu_alloc_domain_nested(struct iommufd_viommu *viommu, u32 flags,
if (IS_ERR(mock_nested))
return ERR_CAST(mock_nested);
mock_nested->mock_viommu = mock_viommu;
- mock_nested->parent = mock_viommu->s2_parent;
return &mock_nested->domain;
}
@@ -766,31 +792,149 @@ out:
return rc;
}
+static size_t mock_viommu_get_hw_queue_size(struct iommufd_viommu *viommu,
+ enum iommu_hw_queue_type queue_type)
+{
+ if (queue_type != IOMMU_HW_QUEUE_TYPE_SELFTEST)
+ return 0;
+ return HW_QUEUE_STRUCT_SIZE(struct mock_hw_queue, core);
+}
+
+static void mock_hw_queue_destroy(struct iommufd_hw_queue *hw_queue)
+{
+ struct mock_hw_queue *mock_hw_queue = to_mock_hw_queue(hw_queue);
+ struct mock_viommu *mock_viommu = mock_hw_queue->mock_viommu;
+
+ mutex_lock(&mock_viommu->queue_mutex);
+ mock_viommu->hw_queue[mock_hw_queue->index] = NULL;
+ if (mock_hw_queue->prev)
+ iommufd_hw_queue_undepend(mock_hw_queue, mock_hw_queue->prev,
+ core);
+ mutex_unlock(&mock_viommu->queue_mutex);
+}
+
+/* Test iommufd_hw_queue_depend/undepend() */
+static int mock_hw_queue_init_phys(struct iommufd_hw_queue *hw_queue, u32 index,
+ phys_addr_t base_addr_pa)
+{
+ struct mock_viommu *mock_viommu = to_mock_viommu(hw_queue->viommu);
+ struct mock_hw_queue *mock_hw_queue = to_mock_hw_queue(hw_queue);
+ struct mock_hw_queue *prev = NULL;
+ int rc = 0;
+
+ if (index >= IOMMU_TEST_HW_QUEUE_MAX)
+ return -EINVAL;
+
+ mutex_lock(&mock_viommu->queue_mutex);
+
+ if (mock_viommu->hw_queue[index]) {
+ rc = -EEXIST;
+ goto unlock;
+ }
+
+ if (index) {
+ prev = mock_viommu->hw_queue[index - 1];
+ if (!prev) {
+ rc = -EIO;
+ goto unlock;
+ }
+ }
+
+ /*
+ * Test to catch a kernel bug if the core converted the physical address
+ * incorrectly. Let mock_domain_iova_to_phys() WARN_ON if it fails.
+ */
+ if (base_addr_pa != iommu_iova_to_phys(&mock_viommu->s2_parent->domain,
+ hw_queue->base_addr)) {
+ rc = -EFAULT;
+ goto unlock;
+ }
+
+ if (prev) {
+ rc = iommufd_hw_queue_depend(mock_hw_queue, prev, core);
+ if (rc)
+ goto unlock;
+ }
+
+ mock_hw_queue->prev = prev;
+ mock_hw_queue->mock_viommu = mock_viommu;
+ mock_viommu->hw_queue[index] = mock_hw_queue;
+
+ hw_queue->destroy = &mock_hw_queue_destroy;
+unlock:
+ mutex_unlock(&mock_viommu->queue_mutex);
+ return rc;
+}
+
static struct iommufd_viommu_ops mock_viommu_ops = {
.destroy = mock_viommu_destroy,
.alloc_domain_nested = mock_viommu_alloc_domain_nested,
.cache_invalidate = mock_viommu_cache_invalidate,
+ .get_hw_queue_size = mock_viommu_get_hw_queue_size,
+ .hw_queue_init_phys = mock_hw_queue_init_phys,
};
-static struct iommufd_viommu *mock_viommu_alloc(struct device *dev,
- struct iommu_domain *domain,
- struct iommufd_ctx *ictx,
- unsigned int viommu_type)
+static size_t mock_get_viommu_size(struct device *dev,
+ enum iommu_viommu_type viommu_type)
{
- struct mock_iommu_device *mock_iommu =
- iommu_get_iommu_dev(dev, struct mock_iommu_device, iommu_dev);
- struct mock_viommu *mock_viommu;
-
if (viommu_type != IOMMU_VIOMMU_TYPE_SELFTEST)
- return ERR_PTR(-EOPNOTSUPP);
+ return 0;
+ return VIOMMU_STRUCT_SIZE(struct mock_viommu, core);
+}
+
+static int mock_viommu_init(struct iommufd_viommu *viommu,
+ struct iommu_domain *parent_domain,
+ const struct iommu_user_data *user_data)
+{
+ struct mock_iommu_device *mock_iommu = container_of(
+ viommu->iommu_dev, struct mock_iommu_device, iommu_dev);
+ struct mock_viommu *mock_viommu = to_mock_viommu(viommu);
+ struct iommu_viommu_selftest data;
+ int rc;
+
+ if (user_data) {
+ rc = iommu_copy_struct_from_user(
+ &data, user_data, IOMMU_VIOMMU_TYPE_SELFTEST, out_data);
+ if (rc)
+ return rc;
+
+ /* Allocate two pages */
+ mock_viommu->page =
+ (u32 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, 1);
+ if (!mock_viommu->page)
+ return -ENOMEM;
- mock_viommu = iommufd_viommu_alloc(ictx, struct mock_viommu, core,
- &mock_viommu_ops);
- if (IS_ERR(mock_viommu))
- return ERR_CAST(mock_viommu);
+ rc = iommufd_viommu_alloc_mmap(&mock_viommu->core,
+ __pa(mock_viommu->page),
+ PAGE_SIZE * 2,
+ &mock_viommu->mmap_offset);
+ if (rc)
+ goto err_free_page;
+
+ /* For loopback tests on both the page and out_data */
+ *mock_viommu->page = data.in_data;
+ data.out_data = data.in_data;
+ data.out_mmap_length = PAGE_SIZE * 2;
+ data.out_mmap_offset = mock_viommu->mmap_offset;
+ rc = iommu_copy_struct_to_user(
+ user_data, &data, IOMMU_VIOMMU_TYPE_SELFTEST, out_data);
+ if (rc)
+ goto err_destroy_mmap;
+ }
refcount_inc(&mock_iommu->users);
- return &mock_viommu->core;
+ mutex_init(&mock_viommu->queue_mutex);
+ mock_viommu->s2_parent = to_mock_domain(parent_domain);
+
+ viommu->ops = &mock_viommu_ops;
+ return 0;
+
+err_destroy_mmap:
+ iommufd_viommu_destroy_mmap(&mock_viommu->core,
+ mock_viommu->mmap_offset);
+err_free_page:
+ free_page((unsigned long)mock_viommu->page);
+ return rc;
}
static const struct iommu_ops mock_ops = {
@@ -801,7 +945,6 @@ static const struct iommu_ops mock_ops = {
.default_domain = &mock_blocking_domain,
.blocked_domain = &mock_blocking_domain,
.owner = THIS_MODULE,
- .pgsize_bitmap = MOCK_IO_PAGE_SIZE,
.hw_info = mock_domain_hw_info,
.domain_alloc_paging_flags = mock_domain_alloc_paging_flags,
.domain_alloc_nested = mock_domain_alloc_nested,
@@ -810,7 +953,8 @@ static const struct iommu_ops mock_ops = {
.probe_device = mock_probe_device,
.page_response = mock_domain_page_response,
.user_pasid_table = true,
- .viommu_alloc = mock_viommu_alloc,
+ .get_viommu_size = mock_get_viommu_size,
+ .viommu_init = mock_viommu_init,
.default_domain_ops =
&(struct iommu_domain_ops){
.free = mock_domain_free,
@@ -1216,9 +1360,8 @@ static int iommufd_test_md_check_refs(struct iommufd_ucmd *ucmd,
return 0;
}
-static int iommufd_test_md_check_iotlb(struct iommufd_ucmd *ucmd,
- u32 mockpt_id, unsigned int iotlb_id,
- u32 iotlb)
+static int iommufd_test_md_check_iotlb(struct iommufd_ucmd *ucmd, u32 mockpt_id,
+ unsigned int iotlb_id, u32 iotlb)
{
struct mock_iommu_domain_nested *mock_nested;
struct iommufd_hw_pagetable *hwpt;
@@ -1491,7 +1634,7 @@ static int iommufd_test_access_pages(struct iommufd_ucmd *ucmd,
int rc;
/* Prevent syzkaller from triggering a WARN_ON in kvzalloc() */
- if (length > 16*1024*1024)
+ if (length > 16 * 1024 * 1024)
return -ENOMEM;
if (flags & ~(MOCK_FLAGS_ACCESS_WRITE | MOCK_FLAGS_ACCESS_SYZ))
@@ -1508,7 +1651,7 @@ static int iommufd_test_access_pages(struct iommufd_ucmd *ucmd,
if (flags & MOCK_FLAGS_ACCESS_SYZ)
iova = iommufd_test_syz_conv_iova(staccess->access,
- &cmd->access_pages.iova);
+ &cmd->access_pages.iova);
npages = (ALIGN(iova + length, PAGE_SIZE) -
ALIGN_DOWN(iova, PAGE_SIZE)) /
@@ -1584,7 +1727,7 @@ static int iommufd_test_access_rw(struct iommufd_ucmd *ucmd,
int rc;
/* Prevent syzkaller from triggering a WARN_ON in kvzalloc() */
- if (length > 16*1024*1024)
+ if (length > 16 * 1024 * 1024)
return -ENOMEM;
if (flags & ~(MOCK_ACCESS_RW_WRITE | MOCK_ACCESS_RW_SLOW_PATH |
@@ -1610,7 +1753,7 @@ static int iommufd_test_access_rw(struct iommufd_ucmd *ucmd,
if (flags & MOCK_FLAGS_ACCESS_SYZ)
iova = iommufd_test_syz_conv_iova(staccess->access,
- &cmd->access_rw.iova);
+ &cmd->access_rw.iova);
rc = iommufd_access_rw(staccess->access, iova, tmp, length, flags);
if (rc)
@@ -1665,7 +1808,7 @@ static int iommufd_test_dirty(struct iommufd_ucmd *ucmd, unsigned int mockpt_id,
goto out_put;
}
- if (copy_from_user(tmp, uptr,DIV_ROUND_UP(max, BITS_PER_BYTE))) {
+ if (copy_from_user(tmp, uptr, DIV_ROUND_UP(max, BITS_PER_BYTE))) {
rc = -EFAULT;
goto out_free;
}
@@ -1701,7 +1844,7 @@ out_put:
static int iommufd_test_trigger_iopf(struct iommufd_ucmd *ucmd,
struct iommu_test_cmd *cmd)
{
- struct iopf_fault event = { };
+ struct iopf_fault event = {};
struct iommufd_device *idev;
idev = iommufd_get_device(ucmd, cmd->trigger_iopf.dev_id);
@@ -1832,8 +1975,7 @@ static int iommufd_test_pasid_attach(struct iommufd_ucmd *ucmd,
rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
if (rc)
- iommufd_device_detach(sobj->idev.idev,
- cmd->pasid_attach.pasid);
+ iommufd_device_detach(sobj->idev.idev, cmd->pasid_attach.pasid);
out_sobj:
iommufd_put_object(ucmd->ictx, &sobj->obj);
@@ -2004,8 +2146,8 @@ int __init iommufd_test_init(void)
goto err_bus;
rc = iommu_device_register_bus(&mock_iommu.iommu_dev, &mock_ops,
- &iommufd_mock_bus_type.bus,
- &iommufd_mock_bus_type.nb);
+ &iommufd_mock_bus_type.bus,
+ &iommufd_mock_bus_type.nb);
if (rc)
goto err_sysfs;
diff --git a/drivers/iommu/iommufd/viommu.c b/drivers/iommu/iommufd/viommu.c
index 01df2b985f02..2ca5809b238b 100644
--- a/drivers/iommu/iommufd/viommu.c
+++ b/drivers/iommu/iommufd/viommu.c
@@ -17,10 +17,16 @@ void iommufd_viommu_destroy(struct iommufd_object *obj)
int iommufd_viommu_alloc_ioctl(struct iommufd_ucmd *ucmd)
{
struct iommu_viommu_alloc *cmd = ucmd->cmd;
+ const struct iommu_user_data user_data = {
+ .type = cmd->type,
+ .uptr = u64_to_user_ptr(cmd->data_uptr),
+ .len = cmd->data_len,
+ };
struct iommufd_hwpt_paging *hwpt_paging;
struct iommufd_viommu *viommu;
struct iommufd_device *idev;
const struct iommu_ops *ops;
+ size_t viommu_size;
int rc;
if (cmd->flags || cmd->type == IOMMU_VIOMMU_TYPE_DEFAULT)
@@ -31,7 +37,22 @@ int iommufd_viommu_alloc_ioctl(struct iommufd_ucmd *ucmd)
return PTR_ERR(idev);
ops = dev_iommu_ops(idev->dev);
- if (!ops->viommu_alloc) {
+ if (!ops->get_viommu_size || !ops->viommu_init) {
+ rc = -EOPNOTSUPP;
+ goto out_put_idev;
+ }
+
+ viommu_size = ops->get_viommu_size(idev->dev, cmd->type);
+ if (!viommu_size) {
+ rc = -EOPNOTSUPP;
+ goto out_put_idev;
+ }
+
+ /*
+ * It is a driver bug for providing a viommu_size smaller than the core
+ * vIOMMU structure size
+ */
+ if (WARN_ON_ONCE(viommu_size < sizeof(*viommu))) {
rc = -EOPNOTSUPP;
goto out_put_idev;
}
@@ -47,8 +68,8 @@ int iommufd_viommu_alloc_ioctl(struct iommufd_ucmd *ucmd)
goto out_put_hwpt;
}
- viommu = ops->viommu_alloc(idev->dev, hwpt_paging->common.domain,
- ucmd->ictx, cmd->type);
+ viommu = (struct iommufd_viommu *)_iommufd_object_alloc_ucmd(
+ ucmd, viommu_size, IOMMUFD_OBJ_VIOMMU);
if (IS_ERR(viommu)) {
rc = PTR_ERR(viommu);
goto out_put_hwpt;
@@ -68,15 +89,20 @@ int iommufd_viommu_alloc_ioctl(struct iommufd_ucmd *ucmd)
*/
viommu->iommu_dev = __iommu_get_iommu_dev(idev->dev);
+ rc = ops->viommu_init(viommu, hwpt_paging->common.domain,
+ user_data.len ? &user_data : NULL);
+ if (rc)
+ goto out_put_hwpt;
+
+ /* It is a driver bug that viommu->ops isn't filled */
+ if (WARN_ON_ONCE(!viommu->ops)) {
+ rc = -EOPNOTSUPP;
+ goto out_put_hwpt;
+ }
+
cmd->out_viommu_id = viommu->obj.id;
rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
- if (rc)
- goto out_abort;
- iommufd_object_finalize(ucmd->ictx, &viommu->obj);
- goto out_put_hwpt;
-out_abort:
- iommufd_object_abort_and_destroy(ucmd->ictx, &viommu->obj);
out_put_hwpt:
iommufd_put_object(ucmd->ictx, &hwpt_paging->common.obj);
out_put_idev:
@@ -84,22 +110,41 @@ out_put_idev:
return rc;
}
-void iommufd_vdevice_destroy(struct iommufd_object *obj)
+void iommufd_vdevice_abort(struct iommufd_object *obj)
{
struct iommufd_vdevice *vdev =
container_of(obj, struct iommufd_vdevice, obj);
struct iommufd_viommu *viommu = vdev->viommu;
+ struct iommufd_device *idev = vdev->idev;
+
+ lockdep_assert_held(&idev->igroup->lock);
+ if (vdev->destroy)
+ vdev->destroy(vdev);
/* xa_cmpxchg is okay to fail if alloc failed xa_cmpxchg previously */
- xa_cmpxchg(&viommu->vdevs, vdev->id, vdev, NULL, GFP_KERNEL);
+ xa_cmpxchg(&viommu->vdevs, vdev->virt_id, vdev, NULL, GFP_KERNEL);
refcount_dec(&viommu->obj.users);
- put_device(vdev->dev);
+ idev->vdev = NULL;
+}
+
+void iommufd_vdevice_destroy(struct iommufd_object *obj)
+{
+ struct iommufd_vdevice *vdev =
+ container_of(obj, struct iommufd_vdevice, obj);
+ struct iommufd_device *idev = vdev->idev;
+ struct iommufd_ctx *ictx = idev->ictx;
+
+ mutex_lock(&idev->igroup->lock);
+ iommufd_vdevice_abort(obj);
+ mutex_unlock(&idev->igroup->lock);
+ iommufd_put_object(ictx, &idev->obj);
}
int iommufd_vdevice_alloc_ioctl(struct iommufd_ucmd *ucmd)
{
struct iommu_vdevice_alloc *cmd = ucmd->cmd;
struct iommufd_vdevice *vdev, *curr;
+ size_t vdev_size = sizeof(*vdev);
struct iommufd_viommu *viommu;
struct iommufd_device *idev;
u64 virt_id = cmd->virt_id;
@@ -124,17 +169,54 @@ int iommufd_vdevice_alloc_ioctl(struct iommufd_ucmd *ucmd)
goto out_put_idev;
}
- vdev = iommufd_object_alloc(ucmd->ictx, vdev, IOMMUFD_OBJ_VDEVICE);
+ mutex_lock(&idev->igroup->lock);
+ if (idev->destroying) {
+ rc = -ENOENT;
+ goto out_unlock_igroup;
+ }
+
+ if (idev->vdev) {
+ rc = -EEXIST;
+ goto out_unlock_igroup;
+ }
+
+ if (viommu->ops && viommu->ops->vdevice_size) {
+ /*
+ * It is a driver bug for:
+ * - ops->vdevice_size smaller than the core structure size
+ * - not implementing a pairing ops->vdevice_init op
+ */
+ if (WARN_ON_ONCE(viommu->ops->vdevice_size < vdev_size ||
+ !viommu->ops->vdevice_init)) {
+ rc = -EOPNOTSUPP;
+ goto out_put_idev;
+ }
+ vdev_size = viommu->ops->vdevice_size;
+ }
+
+ vdev = (struct iommufd_vdevice *)_iommufd_object_alloc(
+ ucmd->ictx, vdev_size, IOMMUFD_OBJ_VDEVICE);
if (IS_ERR(vdev)) {
rc = PTR_ERR(vdev);
- goto out_put_idev;
+ goto out_unlock_igroup;
}
- vdev->id = virt_id;
- vdev->dev = idev->dev;
- get_device(idev->dev);
+ vdev->virt_id = virt_id;
vdev->viommu = viommu;
refcount_inc(&viommu->obj.users);
+ /*
+ * A wait_cnt reference is held on the idev so long as we have the
+ * pointer. iommufd_device_pre_destroy() will revoke it before the
+ * idev real destruction.
+ */
+ vdev->idev = idev;
+
+ /*
+ * iommufd_device_destroy() delays until idev->vdev is NULL before
+ * freeing the idev, which only happens once the vdev is finished
+ * destruction.
+ */
+ idev->vdev = vdev;
curr = xa_cmpxchg(&viommu->vdevs, virt_id, NULL, vdev, GFP_KERNEL);
if (curr) {
@@ -142,17 +224,206 @@ int iommufd_vdevice_alloc_ioctl(struct iommufd_ucmd *ucmd)
goto out_abort;
}
+ if (viommu->ops && viommu->ops->vdevice_init) {
+ rc = viommu->ops->vdevice_init(vdev);
+ if (rc)
+ goto out_abort;
+ }
+
cmd->out_vdevice_id = vdev->obj.id;
rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
if (rc)
goto out_abort;
iommufd_object_finalize(ucmd->ictx, &vdev->obj);
- goto out_put_idev;
+ goto out_unlock_igroup;
out_abort:
iommufd_object_abort_and_destroy(ucmd->ictx, &vdev->obj);
+out_unlock_igroup:
+ mutex_unlock(&idev->igroup->lock);
out_put_idev:
- iommufd_put_object(ucmd->ictx, &idev->obj);
+ if (rc)
+ iommufd_put_object(ucmd->ictx, &idev->obj);
+out_put_viommu:
+ iommufd_put_object(ucmd->ictx, &viommu->obj);
+ return rc;
+}
+
+static void iommufd_hw_queue_destroy_access(struct iommufd_ctx *ictx,
+ struct iommufd_access *access,
+ u64 base_iova, size_t length)
+{
+ u64 aligned_iova = PAGE_ALIGN_DOWN(base_iova);
+ u64 offset = base_iova - aligned_iova;
+
+ iommufd_access_unpin_pages(access, aligned_iova,
+ PAGE_ALIGN(length + offset));
+ iommufd_access_detach_internal(access);
+ iommufd_access_destroy_internal(ictx, access);
+}
+
+void iommufd_hw_queue_destroy(struct iommufd_object *obj)
+{
+ struct iommufd_hw_queue *hw_queue =
+ container_of(obj, struct iommufd_hw_queue, obj);
+
+ if (hw_queue->destroy)
+ hw_queue->destroy(hw_queue);
+ if (hw_queue->access)
+ iommufd_hw_queue_destroy_access(hw_queue->viommu->ictx,
+ hw_queue->access,
+ hw_queue->base_addr,
+ hw_queue->length);
+ if (hw_queue->viommu)
+ refcount_dec(&hw_queue->viommu->obj.users);
+}
+
+/*
+ * When the HW accesses the guest queue via physical addresses, the underlying
+ * physical pages of the guest queue must be contiguous. Also, for the security
+ * concern that IOMMUFD_CMD_IOAS_UNMAP could potentially remove the mappings of
+ * the guest queue from the nesting parent iopt while the HW is still accessing
+ * the guest queue memory physically, such a HW queue must require an access to
+ * pin the underlying pages and prevent that from happening.
+ */
+static struct iommufd_access *
+iommufd_hw_queue_alloc_phys(struct iommu_hw_queue_alloc *cmd,
+ struct iommufd_viommu *viommu, phys_addr_t *base_pa)
+{
+ u64 aligned_iova = PAGE_ALIGN_DOWN(cmd->nesting_parent_iova);
+ u64 offset = cmd->nesting_parent_iova - aligned_iova;
+ struct iommufd_access *access;
+ struct page **pages;
+ size_t max_npages;
+ size_t length;
+ size_t i;
+ int rc;
+
+ /* max_npages = DIV_ROUND_UP(offset + cmd->length, PAGE_SIZE) */
+ if (check_add_overflow(offset, cmd->length, &length))
+ return ERR_PTR(-ERANGE);
+ if (check_add_overflow(length, PAGE_SIZE - 1, &length))
+ return ERR_PTR(-ERANGE);
+ max_npages = length / PAGE_SIZE;
+ /* length needs to be page aligned too */
+ length = max_npages * PAGE_SIZE;
+
+ /*
+ * Use kvcalloc() to avoid memory fragmentation for a large page array.
+ * Set __GFP_NOWARN to avoid syzkaller blowups
+ */
+ pages = kvcalloc(max_npages, sizeof(*pages), GFP_KERNEL | __GFP_NOWARN);
+ if (!pages)
+ return ERR_PTR(-ENOMEM);
+
+ access = iommufd_access_create_internal(viommu->ictx);
+ if (IS_ERR(access)) {
+ rc = PTR_ERR(access);
+ goto out_free;
+ }
+
+ rc = iommufd_access_attach_internal(access, viommu->hwpt->ioas);
+ if (rc)
+ goto out_destroy;
+
+ rc = iommufd_access_pin_pages(access, aligned_iova, length, pages, 0);
+ if (rc)
+ goto out_detach;
+
+ /* Validate if the underlying physical pages are contiguous */
+ for (i = 1; i < max_npages; i++) {
+ if (page_to_pfn(pages[i]) == page_to_pfn(pages[i - 1]) + 1)
+ continue;
+ rc = -EFAULT;
+ goto out_unpin;
+ }
+
+ *base_pa = (page_to_pfn(pages[0]) << PAGE_SHIFT) + offset;
+ kfree(pages);
+ return access;
+
+out_unpin:
+ iommufd_access_unpin_pages(access, aligned_iova, length);
+out_detach:
+ iommufd_access_detach_internal(access);
+out_destroy:
+ iommufd_access_destroy_internal(viommu->ictx, access);
+out_free:
+ kfree(pages);
+ return ERR_PTR(rc);
+}
+
+int iommufd_hw_queue_alloc_ioctl(struct iommufd_ucmd *ucmd)
+{
+ struct iommu_hw_queue_alloc *cmd = ucmd->cmd;
+ struct iommufd_hw_queue *hw_queue;
+ struct iommufd_viommu *viommu;
+ struct iommufd_access *access;
+ size_t hw_queue_size;
+ phys_addr_t base_pa;
+ u64 last;
+ int rc;
+
+ if (cmd->flags || cmd->type == IOMMU_HW_QUEUE_TYPE_DEFAULT)
+ return -EOPNOTSUPP;
+ if (!cmd->length)
+ return -EINVAL;
+ if (check_add_overflow(cmd->nesting_parent_iova, cmd->length - 1,
+ &last))
+ return -EOVERFLOW;
+
+ viommu = iommufd_get_viommu(ucmd, cmd->viommu_id);
+ if (IS_ERR(viommu))
+ return PTR_ERR(viommu);
+
+ if (!viommu->ops || !viommu->ops->get_hw_queue_size ||
+ !viommu->ops->hw_queue_init_phys) {
+ rc = -EOPNOTSUPP;
+ goto out_put_viommu;
+ }
+
+ hw_queue_size = viommu->ops->get_hw_queue_size(viommu, cmd->type);
+ if (!hw_queue_size) {
+ rc = -EOPNOTSUPP;
+ goto out_put_viommu;
+ }
+
+ /*
+ * It is a driver bug for providing a hw_queue_size smaller than the
+ * core HW queue structure size
+ */
+ if (WARN_ON_ONCE(hw_queue_size < sizeof(*hw_queue))) {
+ rc = -EOPNOTSUPP;
+ goto out_put_viommu;
+ }
+
+ hw_queue = (struct iommufd_hw_queue *)_iommufd_object_alloc_ucmd(
+ ucmd, hw_queue_size, IOMMUFD_OBJ_HW_QUEUE);
+ if (IS_ERR(hw_queue)) {
+ rc = PTR_ERR(hw_queue);
+ goto out_put_viommu;
+ }
+
+ access = iommufd_hw_queue_alloc_phys(cmd, viommu, &base_pa);
+ if (IS_ERR(access)) {
+ rc = PTR_ERR(access);
+ goto out_put_viommu;
+ }
+
+ hw_queue->viommu = viommu;
+ refcount_inc(&viommu->obj.users);
+ hw_queue->access = access;
+ hw_queue->type = cmd->type;
+ hw_queue->length = cmd->length;
+ hw_queue->base_addr = cmd->nesting_parent_iova;
+
+ rc = viommu->ops->hw_queue_init_phys(hw_queue, cmd->index, base_pa);
+ if (rc)
+ goto out_put_viommu;
+
+ cmd->out_hw_queue_id = hw_queue->obj.id;
+ rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
+
out_put_viommu:
iommufd_put_object(ucmd->ictx, &viommu->obj);
return rc;