diff options
Diffstat (limited to 'drivers/iommu/iommufd')
-rw-r--r-- | drivers/iommu/iommufd/Kconfig | 4 | ||||
-rw-r--r-- | drivers/iommu/iommufd/device.c | 728 | ||||
-rw-r--r-- | drivers/iommu/iommufd/hw_pagetable.c | 112 | ||||
-rw-r--r-- | drivers/iommu/iommufd/io_pagetable.c | 36 | ||||
-rw-r--r-- | drivers/iommu/iommufd/iommufd_private.h | 62 | ||||
-rw-r--r-- | drivers/iommu/iommufd/iommufd_test.h | 10 | ||||
-rw-r--r-- | drivers/iommu/iommufd/main.c | 58 | ||||
-rw-r--r-- | drivers/iommu/iommufd/selftest.c | 197 |
8 files changed, 931 insertions, 276 deletions
diff --git a/drivers/iommu/iommufd/Kconfig b/drivers/iommu/iommufd/Kconfig index ada693ea51a7..99d4b075df49 100644 --- a/drivers/iommu/iommufd/Kconfig +++ b/drivers/iommu/iommufd/Kconfig @@ -14,8 +14,8 @@ config IOMMUFD if IOMMUFD config IOMMUFD_VFIO_CONTAINER bool "IOMMUFD provides the VFIO container /dev/vfio/vfio" - depends on VFIO && !VFIO_CONTAINER - default VFIO && !VFIO_CONTAINER + depends on VFIO_GROUP && !VFIO_CONTAINER + default VFIO_GROUP && !VFIO_CONTAINER help IOMMUFD will provide /dev/vfio/vfio instead of VFIO. This relies on IOMMUFD providing compatibility emulation to give the same ioctls. diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c index ed2937a4e196..90f88c295ce0 100644 --- a/drivers/iommu/iommufd/device.c +++ b/drivers/iommu/iommufd/device.c @@ -4,6 +4,7 @@ #include <linux/iommufd.h> #include <linux/slab.h> #include <linux/iommu.h> +#include "../iommu-priv.h" #include "io_pagetable.h" #include "iommufd_private.h" @@ -15,13 +16,127 @@ MODULE_PARM_DESC( "Allow IOMMUFD to bind to devices even if the platform cannot isolate " "the MSI interrupt window. Enabling this is a security weakness."); +static void iommufd_group_release(struct kref *kref) +{ + struct iommufd_group *igroup = + container_of(kref, struct iommufd_group, ref); + + WARN_ON(igroup->hwpt || !list_empty(&igroup->device_list)); + + xa_cmpxchg(&igroup->ictx->groups, iommu_group_id(igroup->group), igroup, + NULL, GFP_KERNEL); + iommu_group_put(igroup->group); + mutex_destroy(&igroup->lock); + kfree(igroup); +} + +static void iommufd_put_group(struct iommufd_group *group) +{ + kref_put(&group->ref, iommufd_group_release); +} + +static bool iommufd_group_try_get(struct iommufd_group *igroup, + struct iommu_group *group) +{ + if (!igroup) + return false; + /* + * group ID's cannot be re-used until the group is put back which does + * not happen if we could get an igroup pointer under the xa_lock. + */ + if (WARN_ON(igroup->group != group)) + return false; + return kref_get_unless_zero(&igroup->ref); +} + +/* + * iommufd needs to store some more data for each iommu_group, we keep a + * parallel xarray indexed by iommu_group id to hold this instead of putting it + * in the core structure. To keep things simple the iommufd_group memory is + * unique within the iommufd_ctx. This makes it easy to check there are no + * memory leaks. + */ +static struct iommufd_group *iommufd_get_group(struct iommufd_ctx *ictx, + struct device *dev) +{ + struct iommufd_group *new_igroup; + struct iommufd_group *cur_igroup; + struct iommufd_group *igroup; + struct iommu_group *group; + unsigned int id; + + group = iommu_group_get(dev); + if (!group) + return ERR_PTR(-ENODEV); + + id = iommu_group_id(group); + + xa_lock(&ictx->groups); + igroup = xa_load(&ictx->groups, id); + if (iommufd_group_try_get(igroup, group)) { + xa_unlock(&ictx->groups); + iommu_group_put(group); + return igroup; + } + xa_unlock(&ictx->groups); + + new_igroup = kzalloc(sizeof(*new_igroup), GFP_KERNEL); + if (!new_igroup) { + iommu_group_put(group); + return ERR_PTR(-ENOMEM); + } + + kref_init(&new_igroup->ref); + mutex_init(&new_igroup->lock); + INIT_LIST_HEAD(&new_igroup->device_list); + new_igroup->sw_msi_start = PHYS_ADDR_MAX; + /* group reference moves into new_igroup */ + new_igroup->group = group; + + /* + * The ictx is not additionally refcounted here becase all objects using + * an igroup must put it before their destroy completes. + */ + new_igroup->ictx = ictx; + + /* + * We dropped the lock so igroup is invalid. NULL is a safe and likely + * value to assume for the xa_cmpxchg algorithm. + */ + cur_igroup = NULL; + xa_lock(&ictx->groups); + while (true) { + igroup = __xa_cmpxchg(&ictx->groups, id, cur_igroup, new_igroup, + GFP_KERNEL); + if (xa_is_err(igroup)) { + xa_unlock(&ictx->groups); + iommufd_put_group(new_igroup); + return ERR_PTR(xa_err(igroup)); + } + + /* new_group was successfully installed */ + if (cur_igroup == igroup) { + xa_unlock(&ictx->groups); + return new_igroup; + } + + /* Check again if the current group is any good */ + if (iommufd_group_try_get(igroup, group)) { + xa_unlock(&ictx->groups); + iommufd_put_group(new_igroup); + return igroup; + } + cur_igroup = igroup; + } +} + void iommufd_device_destroy(struct iommufd_object *obj) { struct iommufd_device *idev = container_of(obj, struct iommufd_device, obj); iommu_device_release_dma_owner(idev->dev); - iommu_group_put(idev->group); + iommufd_put_group(idev->igroup); if (!iommufd_selftest_is_mock_dev(idev->dev)) iommufd_ctx_put(idev->ictx); } @@ -46,7 +161,7 @@ struct iommufd_device *iommufd_device_bind(struct iommufd_ctx *ictx, struct device *dev, u32 *id) { struct iommufd_device *idev; - struct iommu_group *group; + struct iommufd_group *igroup; int rc; /* @@ -56,9 +171,29 @@ struct iommufd_device *iommufd_device_bind(struct iommufd_ctx *ictx, if (!device_iommu_capable(dev, IOMMU_CAP_CACHE_COHERENCY)) return ERR_PTR(-EINVAL); - group = iommu_group_get(dev); - if (!group) - return ERR_PTR(-ENODEV); + igroup = iommufd_get_group(ictx, dev); + if (IS_ERR(igroup)) + return ERR_CAST(igroup); + + /* + * For historical compat with VFIO the insecure interrupt path is + * allowed if the module parameter is set. Secure/Isolated means that a + * MemWr operation from the device (eg a simple DMA) cannot trigger an + * interrupt outside this iommufd context. + */ + if (!iommufd_selftest_is_mock_dev(dev) && + !iommu_group_has_isolated_msi(igroup->group)) { + if (!allow_unsafe_interrupts) { + rc = -EPERM; + goto out_group_put; + } + + dev_warn( + dev, + "MSI interrupts are not secure, they cannot be isolated by the platform. " + "Check that platform features like interrupt remapping are enabled. " + "Use the \"allow_unsafe_interrupts\" module parameter to override\n"); + } rc = iommu_device_claim_dma_owner(dev, ictx); if (rc) @@ -77,8 +212,8 @@ struct iommufd_device *iommufd_device_bind(struct iommufd_ctx *ictx, device_iommu_capable(dev, IOMMU_CAP_ENFORCE_CACHE_COHERENCY); /* The calling driver is a user until iommufd_device_unbind() */ refcount_inc(&idev->obj.users); - /* group refcount moves into iommufd_device */ - idev->group = group; + /* igroup refcount moves into iommufd_device */ + idev->igroup = igroup; /* * If the caller fails after this success it must call @@ -93,12 +228,43 @@ struct iommufd_device *iommufd_device_bind(struct iommufd_ctx *ictx, out_release_owner: iommu_device_release_dma_owner(dev); out_group_put: - iommu_group_put(group); + iommufd_put_group(igroup); return ERR_PTR(rc); } EXPORT_SYMBOL_NS_GPL(iommufd_device_bind, IOMMUFD); /** + * iommufd_ctx_has_group - True if any device within the group is bound + * to the ictx + * @ictx: iommufd file descriptor + * @group: Pointer to a physical iommu_group struct + * + * True if any device within the group has been bound to this ictx, ex. via + * iommufd_device_bind(), therefore implying ictx ownership of the group. + */ +bool iommufd_ctx_has_group(struct iommufd_ctx *ictx, struct iommu_group *group) +{ + struct iommufd_object *obj; + unsigned long index; + + if (!ictx || !group) + return false; + + xa_lock(&ictx->objects); + xa_for_each(&ictx->objects, index, obj) { + if (obj->type == IOMMUFD_OBJ_DEVICE && + container_of(obj, struct iommufd_device, obj) + ->igroup->group == group) { + xa_unlock(&ictx->objects); + return true; + } + } + xa_unlock(&ictx->objects); + return false; +} +EXPORT_SYMBOL_NS_GPL(iommufd_ctx_has_group, IOMMUFD); + +/** * iommufd_device_unbind - Undo iommufd_device_bind() * @idev: Device returned by iommufd_device_bind() * @@ -113,10 +279,22 @@ void iommufd_device_unbind(struct iommufd_device *idev) } EXPORT_SYMBOL_NS_GPL(iommufd_device_unbind, IOMMUFD); -static int iommufd_device_setup_msi(struct iommufd_device *idev, - struct iommufd_hw_pagetable *hwpt, - phys_addr_t sw_msi_start) +struct iommufd_ctx *iommufd_device_to_ictx(struct iommufd_device *idev) +{ + return idev->ictx; +} +EXPORT_SYMBOL_NS_GPL(iommufd_device_to_ictx, IOMMUFD); + +u32 iommufd_device_to_id(struct iommufd_device *idev) +{ + return idev->obj.id; +} +EXPORT_SYMBOL_NS_GPL(iommufd_device_to_id, IOMMUFD); + +static int iommufd_group_setup_msi(struct iommufd_group *igroup, + struct iommufd_hw_pagetable *hwpt) { + phys_addr_t sw_msi_start = igroup->sw_msi_start; int rc; /* @@ -143,128 +321,192 @@ static int iommufd_device_setup_msi(struct iommufd_device *idev, */ hwpt->msi_cookie = true; } - - /* - * For historical compat with VFIO the insecure interrupt path is - * allowed if the module parameter is set. Insecure means that a MemWr - * operation from the device (eg a simple DMA) cannot trigger an - * interrupt outside this iommufd context. - */ - if (!iommufd_selftest_is_mock_dev(idev->dev) && - !iommu_group_has_isolated_msi(idev->group)) { - if (!allow_unsafe_interrupts) - return -EPERM; - - dev_warn( - idev->dev, - "MSI interrupts are not secure, they cannot be isolated by the platform. " - "Check that platform features like interrupt remapping are enabled. " - "Use the \"allow_unsafe_interrupts\" module parameter to override\n"); - } return 0; } -static bool iommufd_hw_pagetable_has_group(struct iommufd_hw_pagetable *hwpt, - struct iommu_group *group) -{ - struct iommufd_device *cur_dev; - - lockdep_assert_held(&hwpt->devices_lock); - - list_for_each_entry(cur_dev, &hwpt->devices, devices_item) - if (cur_dev->group == group) - return true; - return false; -} - int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt, struct iommufd_device *idev) { - phys_addr_t sw_msi_start = PHYS_ADDR_MAX; int rc; - lockdep_assert_held(&hwpt->devices_lock); + mutex_lock(&idev->igroup->lock); - if (WARN_ON(idev->hwpt)) - return -EINVAL; - - /* - * Try to upgrade the domain we have, it is an iommu driver bug to - * report IOMMU_CAP_ENFORCE_CACHE_COHERENCY but fail - * enforce_cache_coherency when there are no devices attached to the - * domain. - */ - if (idev->enforce_cache_coherency && !hwpt->enforce_cache_coherency) { - if (hwpt->domain->ops->enforce_cache_coherency) - hwpt->enforce_cache_coherency = - hwpt->domain->ops->enforce_cache_coherency( - hwpt->domain); - if (!hwpt->enforce_cache_coherency) { - WARN_ON(list_empty(&hwpt->devices)); - return -EINVAL; - } + if (idev->igroup->hwpt != NULL && idev->igroup->hwpt != hwpt) { + rc = -EINVAL; + goto err_unlock; } - rc = iopt_table_enforce_group_resv_regions(&hwpt->ioas->iopt, idev->dev, - idev->group, &sw_msi_start); - if (rc) - return rc; + /* Try to upgrade the domain we have */ + if (idev->enforce_cache_coherency) { + rc = iommufd_hw_pagetable_enforce_cc(hwpt); + if (rc) + goto err_unlock; + } - rc = iommufd_device_setup_msi(idev, hwpt, sw_msi_start); + rc = iopt_table_enforce_dev_resv_regions(&hwpt->ioas->iopt, idev->dev, + &idev->igroup->sw_msi_start); if (rc) - goto err_unresv; + goto err_unlock; /* - * FIXME: Hack around missing a device-centric iommu api, only attach to - * the group once for the first device that is in the group. + * Only attach to the group once for the first device that is in the + * group. All the other devices will follow this attachment. The user + * should attach every device individually to the hwpt as the per-device + * reserved regions are only updated during individual device + * attachment. */ - if (!iommufd_hw_pagetable_has_group(hwpt, idev->group)) { - rc = iommu_attach_group(hwpt->domain, idev->group); + if (list_empty(&idev->igroup->device_list)) { + rc = iommufd_group_setup_msi(idev->igroup, hwpt); if (rc) goto err_unresv; + + rc = iommu_attach_group(hwpt->domain, idev->igroup->group); + if (rc) + goto err_unresv; + idev->igroup->hwpt = hwpt; } + refcount_inc(&hwpt->obj.users); + list_add_tail(&idev->group_item, &idev->igroup->device_list); + mutex_unlock(&idev->igroup->lock); return 0; err_unresv: iopt_remove_reserved_iova(&hwpt->ioas->iopt, idev->dev); +err_unlock: + mutex_unlock(&idev->igroup->lock); return rc; } -void iommufd_hw_pagetable_detach(struct iommufd_hw_pagetable *hwpt, - struct iommufd_device *idev) +struct iommufd_hw_pagetable * +iommufd_hw_pagetable_detach(struct iommufd_device *idev) { - if (!iommufd_hw_pagetable_has_group(hwpt, idev->group)) - iommu_detach_group(hwpt->domain, idev->group); + struct iommufd_hw_pagetable *hwpt = idev->igroup->hwpt; + + mutex_lock(&idev->igroup->lock); + list_del(&idev->group_item); + if (list_empty(&idev->igroup->device_list)) { + iommu_detach_group(hwpt->domain, idev->igroup->group); + idev->igroup->hwpt = NULL; + } iopt_remove_reserved_iova(&hwpt->ioas->iopt, idev->dev); + mutex_unlock(&idev->igroup->lock); + + /* Caller must destroy hwpt */ + return hwpt; } -static int iommufd_device_do_attach(struct iommufd_device *idev, - struct iommufd_hw_pagetable *hwpt) +static struct iommufd_hw_pagetable * +iommufd_device_do_attach(struct iommufd_device *idev, + struct iommufd_hw_pagetable *hwpt) { int rc; - mutex_lock(&hwpt->devices_lock); rc = iommufd_hw_pagetable_attach(hwpt, idev); if (rc) - goto out_unlock; + return ERR_PTR(rc); + return NULL; +} - idev->hwpt = hwpt; - refcount_inc(&hwpt->obj.users); - list_add(&idev->devices_item, &hwpt->devices); -out_unlock: - mutex_unlock(&hwpt->devices_lock); - return rc; +static struct iommufd_hw_pagetable * +iommufd_device_do_replace(struct iommufd_device *idev, + struct iommufd_hw_pagetable *hwpt) +{ + struct iommufd_group *igroup = idev->igroup; + struct iommufd_hw_pagetable *old_hwpt; + unsigned int num_devices = 0; + struct iommufd_device *cur; + int rc; + + mutex_lock(&idev->igroup->lock); + + if (igroup->hwpt == NULL) { + rc = -EINVAL; + goto err_unlock; + } + + if (hwpt == igroup->hwpt) { + mutex_unlock(&idev->igroup->lock); + return NULL; + } + + /* Try to upgrade the domain we have */ + list_for_each_entry(cur, &igroup->device_list, group_item) { + num_devices++; + if (cur->enforce_cache_coherency) { + rc = iommufd_hw_pagetable_enforce_cc(hwpt); + if (rc) + goto err_unlock; + } + } + + old_hwpt = igroup->hwpt; + if (hwpt->ioas != old_hwpt->ioas) { + list_for_each_entry(cur, &igroup->device_list, group_item) { + rc = iopt_table_enforce_dev_resv_regions( + &hwpt->ioas->iopt, cur->dev, NULL); + if (rc) + goto err_unresv; + } + } + + rc = iommufd_group_setup_msi(idev->igroup, hwpt); + if (rc) + goto err_unresv; + + rc = iommu_group_replace_domain(igroup->group, hwpt->domain); + if (rc) + goto err_unresv; + + if (hwpt->ioas != old_hwpt->ioas) { + list_for_each_entry(cur, &igroup->device_list, group_item) + iopt_remove_reserved_iova(&old_hwpt->ioas->iopt, + cur->dev); + } + + igroup->hwpt = hwpt; + + /* + * Move the refcounts held by the device_list to the new hwpt. Retain a + * refcount for this thread as the caller will free it. + */ + refcount_add(num_devices, &hwpt->obj.users); + if (num_devices > 1) + WARN_ON(refcount_sub_and_test(num_devices - 1, + &old_hwpt->obj.users)); + mutex_unlock(&idev->igroup->lock); + + /* Caller must destroy old_hwpt */ + return old_hwpt; +err_unresv: + list_for_each_entry(cur, &igroup->device_list, group_item) + iopt_remove_reserved_iova(&hwpt->ioas->iopt, cur->dev); +err_unlock: + mutex_unlock(&idev->igroup->lock); + return ERR_PTR(rc); } +typedef struct iommufd_hw_pagetable *(*attach_fn)( + struct iommufd_device *idev, struct iommufd_hw_pagetable *hwpt); + /* * When automatically managing the domains we search for a compatible domain in * the iopt and if one is found use it, otherwise create a new domain. * Automatic domain selection will never pick a manually created domain. */ -static int iommufd_device_auto_get_domain(struct iommufd_device *idev, - struct iommufd_ioas *ioas) +static struct iommufd_hw_pagetable * +iommufd_device_auto_get_domain(struct iommufd_device *idev, + struct iommufd_ioas *ioas, u32 *pt_id, + attach_fn do_attach) { + /* + * iommufd_hw_pagetable_attach() is called by + * iommufd_hw_pagetable_alloc() in immediate attachment mode, same as + * iommufd_device_do_attach(). So if we are in this mode then we prefer + * to use the immediate_attach path as it supports drivers that can't + * directly allocate a domain. + */ + bool immediate_attach = do_attach == iommufd_device_do_attach; + struct iommufd_hw_pagetable *destroy_hwpt; struct iommufd_hw_pagetable *hwpt; - int rc; /* * There is no differentiation when domains are allocated, so any domain @@ -278,50 +520,58 @@ static int iommufd_device_auto_get_domain(struct iommufd_device *idev, if (!iommufd_lock_obj(&hwpt->obj)) continue; - rc = iommufd_device_do_attach(idev, hwpt); + destroy_hwpt = (*do_attach)(idev, hwpt); + if (IS_ERR(destroy_hwpt)) { + iommufd_put_object(&hwpt->obj); + /* + * -EINVAL means the domain is incompatible with the + * device. Other error codes should propagate to + * userspace as failure. Success means the domain is + * attached. + */ + if (PTR_ERR(destroy_hwpt) == -EINVAL) + continue; + goto out_unlock; + } + *pt_id = hwpt->obj.id; iommufd_put_object(&hwpt->obj); - - /* - * -EINVAL means the domain is incompatible with the device. - * Other error codes should propagate to userspace as failure. - * Success means the domain is attached. - */ - if (rc == -EINVAL) - continue; goto out_unlock; } - hwpt = iommufd_hw_pagetable_alloc(idev->ictx, ioas, idev, true); + hwpt = iommufd_hw_pagetable_alloc(idev->ictx, ioas, idev, + immediate_attach); if (IS_ERR(hwpt)) { - rc = PTR_ERR(hwpt); + destroy_hwpt = ERR_CAST(hwpt); goto out_unlock; } + + if (!immediate_attach) { + destroy_hwpt = (*do_attach)(idev, hwpt); + if (IS_ERR(destroy_hwpt)) + goto out_abort; + } else { + destroy_hwpt = NULL; + } + hwpt->auto_domain = true; + *pt_id = hwpt->obj.id; - mutex_unlock(&ioas->mutex); iommufd_object_finalize(idev->ictx, &hwpt->obj); - return 0; + mutex_unlock(&ioas->mutex); + return destroy_hwpt; + +out_abort: + iommufd_object_abort_and_destroy(idev->ictx, &hwpt->obj); out_unlock: mutex_unlock(&ioas->mutex); - return rc; + return destroy_hwpt; } -/** - * iommufd_device_attach - Connect a device from an iommu_domain - * @idev: device to attach - * @pt_id: Input a IOMMUFD_OBJ_IOAS, or IOMMUFD_OBJ_HW_PAGETABLE - * Output the IOMMUFD_OBJ_HW_PAGETABLE ID - * - * This connects the device to an iommu_domain, either automatically or manually - * selected. Once this completes the device could do DMA. - * - * The caller should return the resulting pt_id back to userspace. - * This function is undone by calling iommufd_device_detach(). - */ -int iommufd_device_attach(struct iommufd_device *idev, u32 *pt_id) +static int iommufd_device_change_pt(struct iommufd_device *idev, u32 *pt_id, + attach_fn do_attach) { + struct iommufd_hw_pagetable *destroy_hwpt; struct iommufd_object *pt_obj; - int rc; pt_obj = iommufd_get_object(idev->ictx, *pt_id, IOMMUFD_OBJ_ANY); if (IS_ERR(pt_obj)) @@ -332,8 +582,8 @@ int iommufd_device_attach(struct iommufd_device *idev, u32 *pt_id) struct iommufd_hw_pagetable *hwpt = container_of(pt_obj, struct iommufd_hw_pagetable, obj); - rc = iommufd_device_do_attach(idev, hwpt); - if (rc) + destroy_hwpt = (*do_attach)(idev, hwpt); + if (IS_ERR(destroy_hwpt)) goto out_put_pt_obj; break; } @@ -341,27 +591,80 @@ int iommufd_device_attach(struct iommufd_device *idev, u32 *pt_id) struct iommufd_ioas *ioas = container_of(pt_obj, struct iommufd_ioas, obj); - rc = iommufd_device_auto_get_domain(idev, ioas); - if (rc) + destroy_hwpt = iommufd_device_auto_get_domain(idev, ioas, pt_id, + do_attach); + if (IS_ERR(destroy_hwpt)) goto out_put_pt_obj; break; } default: - rc = -EINVAL; + destroy_hwpt = ERR_PTR(-EINVAL); goto out_put_pt_obj; } + iommufd_put_object(pt_obj); - refcount_inc(&idev->obj.users); - *pt_id = idev->hwpt->obj.id; - rc = 0; + /* This destruction has to be after we unlock everything */ + if (destroy_hwpt) + iommufd_hw_pagetable_put(idev->ictx, destroy_hwpt); + return 0; out_put_pt_obj: iommufd_put_object(pt_obj); - return rc; + return PTR_ERR(destroy_hwpt); +} + +/** + * iommufd_device_attach - Connect a device to an iommu_domain + * @idev: device to attach + * @pt_id: Input a IOMMUFD_OBJ_IOAS, or IOMMUFD_OBJ_HW_PAGETABLE + * Output the IOMMUFD_OBJ_HW_PAGETABLE ID + * + * This connects the device to an iommu_domain, either automatically or manually + * selected. Once this completes the device could do DMA. + * + * The caller should return the resulting pt_id back to userspace. + * This function is undone by calling iommufd_device_detach(). + */ +int iommufd_device_attach(struct iommufd_device *idev, u32 *pt_id) +{ + int rc; + + rc = iommufd_device_change_pt(idev, pt_id, &iommufd_device_do_attach); + if (rc) + return rc; + + /* + * Pairs with iommufd_device_detach() - catches caller bugs attempting + * to destroy a device with an attachment. + */ + refcount_inc(&idev->obj.users); + return 0; } EXPORT_SYMBOL_NS_GPL(iommufd_device_attach, IOMMUFD); /** + * iommufd_device_replace - Change the device's iommu_domain + * @idev: device to change + * @pt_id: Input a IOMMUFD_OBJ_IOAS, or IOMMUFD_OBJ_HW_PAGETABLE + * Output the IOMMUFD_OBJ_HW_PAGETABLE ID + * + * This is the same as:: + * + * iommufd_device_detach(); + * iommufd_device_attach(); + * + * If it fails then no change is made to the attachment. The iommu driver may + * implement this so there is no disruption in translation. This can only be + * called if iommufd_device_attach() has already succeeded. + */ +int iommufd_device_replace(struct iommufd_device *idev, u32 *pt_id) +{ + return iommufd_device_change_pt(idev, pt_id, + &iommufd_device_do_replace); +} +EXPORT_SYMBOL_NS_GPL(iommufd_device_replace, IOMMUFD); + +/** * iommufd_device_detach - Disconnect a device to an iommu_domain * @idev: device to detach * @@ -370,33 +673,87 @@ EXPORT_SYMBOL_NS_GPL(iommufd_device_attach, IOMMUFD); */ void iommufd_device_detach(struct iommufd_device *idev) { - struct iommufd_hw_pagetable *hwpt = idev->hwpt; - - mutex_lock(&hwpt->devices_lock); - list_del(&idev->devices_item); - idev->hwpt = NULL; - iommufd_hw_pagetable_detach(hwpt, idev); - mutex_unlock(&hwpt->devices_lock); - - if (hwpt->auto_domain) - iommufd_object_deref_user(idev->ictx, &hwpt->obj); - else - refcount_dec(&hwpt->obj.users); + struct iommufd_hw_pagetable *hwpt; + hwpt = iommufd_hw_pagetable_detach(idev); + iommufd_hw_pagetable_put(idev->ictx, hwpt); refcount_dec(&idev->obj.users); } EXPORT_SYMBOL_NS_GPL(iommufd_device_detach, IOMMUFD); +/* + * On success, it will refcount_inc() at a valid new_ioas and refcount_dec() at + * a valid cur_ioas (access->ioas). A caller passing in a valid new_ioas should + * call iommufd_put_object() if it does an iommufd_get_object() for a new_ioas. + */ +static int iommufd_access_change_ioas(struct iommufd_access *access, + struct iommufd_ioas *new_ioas) +{ + u32 iopt_access_list_id = access->iopt_access_list_id; + struct iommufd_ioas *cur_ioas = access->ioas; + int rc; + + lockdep_assert_held(&access->ioas_lock); + + /* We are racing with a concurrent detach, bail */ + if (cur_ioas != access->ioas_unpin) + return -EBUSY; + + if (cur_ioas == new_ioas) + return 0; + + /* + * Set ioas to NULL to block any further iommufd_access_pin_pages(). + * iommufd_access_unpin_pages() can continue using access->ioas_unpin. + */ + access->ioas = NULL; + + if (new_ioas) { + rc = iopt_add_access(&new_ioas->iopt, access); + if (rc) { + access->ioas = cur_ioas; + return rc; + } + refcount_inc(&new_ioas->obj.users); + } + + if (cur_ioas) { + if (access->ops->unmap) { + mutex_unlock(&access->ioas_lock); + access->ops->unmap(access->data, 0, ULONG_MAX); + mutex_lock(&access->ioas_lock); + } + iopt_remove_access(&cur_ioas->iopt, access, iopt_access_list_id); + refcount_dec(&cur_ioas->obj.users); + } + + access->ioas = new_ioas; + access->ioas_unpin = new_ioas; + + return 0; +} + +static int iommufd_access_change_ioas_id(struct iommufd_access *access, u32 id) +{ + struct iommufd_ioas *ioas = iommufd_get_ioas(access->ictx, id); + int rc; + + if (IS_ERR(ioas)) + return PTR_ERR(ioas); + rc = iommufd_access_change_ioas(access, ioas); + iommufd_put_object(&ioas->obj); + return rc; +} + void iommufd_access_destroy_object(struct iommufd_object *obj) { struct iommufd_access *access = container_of(obj, struct iommufd_access, obj); - if (access->ioas) { - iopt_remove_access(&access->ioas->iopt, access); - refcount_dec(&access->ioas->obj.users); - access->ioas = NULL; - } + mutex_lock(&access->ioas_lock); + if (access->ioas) + WARN_ON(iommufd_access_change_ioas(access, NULL)); + mutex_unlock(&access->ioas_lock); iommufd_ctx_put(access->ictx); } @@ -441,6 +798,7 @@ iommufd_access_create(struct iommufd_ctx *ictx, iommufd_ctx_get(ictx); iommufd_object_finalize(ictx, &access->obj); *id = access->obj.id; + mutex_init(&access->ioas_lock); return access; } EXPORT_SYMBOL_NS_GPL(iommufd_access_create, IOMMUFD); @@ -457,30 +815,49 @@ void iommufd_access_destroy(struct iommufd_access *access) } EXPORT_SYMBOL_NS_GPL(iommufd_access_destroy, IOMMUFD); +void iommufd_access_detach(struct iommufd_access *access) +{ + mutex_lock(&access->ioas_lock); + if (WARN_ON(!access->ioas)) { + mutex_unlock(&access->ioas_lock); + return; + } + WARN_ON(iommufd_access_change_ioas(access, NULL)); + mutex_unlock(&access->ioas_lock); +} +EXPORT_SYMBOL_NS_GPL(iommufd_access_detach, IOMMUFD); + int iommufd_access_attach(struct iommufd_access *access, u32 ioas_id) { - struct iommufd_ioas *new_ioas; - int rc = 0; + int rc; - if (access->ioas) + mutex_lock(&access->ioas_lock); + if (WARN_ON(access->ioas)) { + mutex_unlock(&access->ioas_lock); return -EINVAL; - - new_ioas = iommufd_get_ioas(access->ictx, ioas_id); - if (IS_ERR(new_ioas)) - return PTR_ERR(new_ioas); - - rc = iopt_add_access(&new_ioas->iopt, access); - if (rc) { - iommufd_put_object(&new_ioas->obj); - return rc; } - iommufd_ref_to_users(&new_ioas->obj); - access->ioas = new_ioas; - return 0; + rc = iommufd_access_change_ioas_id(access, ioas_id); + mutex_unlock(&access->ioas_lock); + return rc; } EXPORT_SYMBOL_NS_GPL(iommufd_access_attach, IOMMUFD); +int iommufd_access_replace(struct iommufd_access *access, u32 ioas_id) +{ + int rc; + + mutex_lock(&access->ioas_lock); + if (!access->ioas) { + mutex_unlock(&access->ioas_lock); + return -ENOENT; + } + rc = iommufd_access_change_ioas_id(access, ioas_id); + mutex_unlock(&access->ioas_lock); + return rc; +} +EXPORT_SYMBOL_NS_GPL(iommufd_access_replace, IOMMUFD); + /** * iommufd_access_notify_unmap - Notify users of an iopt to stop using it * @iopt: iopt to work on @@ -531,8 +908,8 @@ void iommufd_access_notify_unmap(struct io_pagetable *iopt, unsigned long iova, void iommufd_access_unpin_pages(struct iommufd_access *access, unsigned long iova, unsigned long length) { - struct io_pagetable *iopt = &access->ioas->iopt; struct iopt_area_contig_iter iter; + struct io_pagetable *iopt; unsigned long last_iova; struct iopt_area *area; @@ -540,6 +917,17 @@ void iommufd_access_unpin_pages(struct iommufd_access *access, WARN_ON(check_add_overflow(iova, length - 1, &last_iova))) return; + mutex_lock(&access->ioas_lock); + /* + * The driver must be doing something wrong if it calls this before an + * iommufd_access_attach() or after an iommufd_access_detach(). + */ + if (WARN_ON(!access->ioas_unpin)) { + mutex_unlock(&access->ioas_lock); + return; + } + iopt = &access->ioas_unpin->iopt; + down_read(&iopt->iova_rwsem); iopt_for_each_contig_area(&iter, area, iopt, iova, last_iova) iopt_area_remove_access( @@ -549,6 +937,7 @@ void iommufd_access_unpin_pages(struct iommufd_access *access, min(last_iova, iopt_area_last_iova(area)))); WARN_ON(!iopt_area_contig_done(&iter)); up_read(&iopt->iova_rwsem); + mutex_unlock(&access->ioas_lock); } EXPORT_SYMBOL_NS_GPL(iommufd_access_unpin_pages, IOMMUFD); @@ -594,8 +983,8 @@ int iommufd_access_pin_pages(struct iommufd_access *access, unsigned long iova, unsigned long length, struct page **out_pages, unsigned int flags) { - struct io_pagetable *iopt = &access->ioas->iopt; struct iopt_area_contig_iter iter; + struct io_pagetable *iopt; unsigned long last_iova; struct iopt_area *area; int rc; @@ -610,6 +999,13 @@ int iommufd_access_pin_pages(struct iommufd_access *access, unsigned long iova, if (check_add_overflow(iova, length - 1, &last_iova)) return -EOVERFLOW; + mutex_lock(&access->ioas_lock); + if (!access->ioas) { + mutex_unlock(&access->ioas_lock); + return -ENOENT; + } + iopt = &access->ioas->iopt; + down_read(&iopt->iova_rwsem); iopt_for_each_contig_area(&iter, area, iopt, iova, last_iova) { unsigned long last = min(last_iova, iopt_area_last_iova(area)); @@ -640,6 +1036,7 @@ int iommufd_access_pin_pages(struct iommufd_access *access, unsigned long iova, } up_read(&iopt->iova_rwsem); + mutex_unlock(&access->ioas_lock); return 0; err_remove: @@ -654,6 +1051,7 @@ err_remove: iopt_area_last_iova(area)))); } up_read(&iopt->iova_rwsem); + mutex_unlock(&access->ioas_lock); return rc; } EXPORT_SYMBOL_NS_GPL(iommufd_access_pin_pages, IOMMUFD); @@ -673,8 +1071,8 @@ EXPORT_SYMBOL_NS_GPL(iommufd_access_pin_pages, IOMMUFD); int iommufd_access_rw(struct iommufd_access *access, unsigned long iova, void *data, size_t length, unsigned int flags) { - struct io_pagetable *iopt = &access->ioas->iopt; struct iopt_area_contig_iter iter; + struct io_pagetable *iopt; struct iopt_area *area; unsigned long last_iova; int rc; @@ -684,6 +1082,13 @@ int iommufd_access_rw(struct iommufd_access *access, unsigned long iova, if (check_add_overflow(iova, length - 1, &last_iova)) return -EOVERFLOW; + mutex_lock(&access->ioas_lock); + if (!access->ioas) { + mutex_unlock(&access->ioas_lock); + return -ENOENT; + } + iopt = &access->ioas->iopt; + down_read(&iopt->iova_rwsem); iopt_for_each_contig_area(&iter, area, iopt, iova, last_iova) { unsigned long last = min(last_iova, iopt_area_last_iova(area)); @@ -710,6 +1115,7 @@ int iommufd_access_rw(struct iommufd_access *access, unsigned long iova, rc = -ENOENT; err_out: up_read(&iopt->iova_rwsem); + mutex_unlock(&access->ioas_lock); return rc; } EXPORT_SYMBOL_NS_GPL(iommufd_access_rw, IOMMUFD); diff --git a/drivers/iommu/iommufd/hw_pagetable.c b/drivers/iommu/iommufd/hw_pagetable.c index 6cdb6749d359..cf2c1504e20d 100644 --- a/drivers/iommu/iommufd/hw_pagetable.c +++ b/drivers/iommu/iommufd/hw_pagetable.c @@ -3,6 +3,7 @@ * Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES */ #include <linux/iommu.h> +#include <uapi/linux/iommufd.h> #include "iommufd_private.h" @@ -11,8 +12,6 @@ void iommufd_hw_pagetable_destroy(struct iommufd_object *obj) struct iommufd_hw_pagetable *hwpt = container_of(obj, struct iommufd_hw_pagetable, obj); - WARN_ON(!list_empty(&hwpt->devices)); - if (!list_empty(&hwpt->hwpt_item)) { mutex_lock(&hwpt->ioas->mutex); list_del(&hwpt->hwpt_item); @@ -25,7 +24,35 @@ void iommufd_hw_pagetable_destroy(struct iommufd_object *obj) iommu_domain_free(hwpt->domain); refcount_dec(&hwpt->ioas->obj.users); - mutex_destroy(&hwpt->devices_lock); +} + +void iommufd_hw_pagetable_abort(struct iommufd_object *obj) +{ + struct iommufd_hw_pagetable *hwpt = + container_of(obj, struct iommufd_hw_pagetable, obj); + + /* The ioas->mutex must be held until finalize is called. */ + lockdep_assert_held(&hwpt->ioas->mutex); + + if (!list_empty(&hwpt->hwpt_item)) { + list_del_init(&hwpt->hwpt_item); + iopt_table_remove_domain(&hwpt->ioas->iopt, hwpt->domain); + } + iommufd_hw_pagetable_destroy(obj); +} + +int iommufd_hw_pagetable_enforce_cc(struct iommufd_hw_pagetable *hwpt) +{ + if (hwpt->enforce_cache_coherency) + return 0; + + if (hwpt->domain->ops->enforce_cache_coherency) + hwpt->enforce_cache_coherency = + hwpt->domain->ops->enforce_cache_coherency( + hwpt->domain); + if (!hwpt->enforce_cache_coherency) + return -EINVAL; + return 0; } /** @@ -38,6 +65,10 @@ void iommufd_hw_pagetable_destroy(struct iommufd_object *obj) * Allocate a new iommu_domain and return it as a hw_pagetable. The HWPT * will be linked to the given ioas and upon return the underlying iommu_domain * is fully popoulated. + * + * The caller must hold the ioas->mutex until after + * iommufd_object_abort_and_destroy() or iommufd_object_finalize() is called on + * the returned hwpt. */ struct iommufd_hw_pagetable * iommufd_hw_pagetable_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas, @@ -52,9 +83,7 @@ iommufd_hw_pagetable_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas, if (IS_ERR(hwpt)) return hwpt; - INIT_LIST_HEAD(&hwpt->devices); INIT_LIST_HEAD(&hwpt->hwpt_item); - mutex_init(&hwpt->devices_lock); /* Pairs with iommufd_hw_pagetable_destroy() */ refcount_inc(&ioas->obj.users); hwpt->ioas = ioas; @@ -65,7 +94,18 @@ iommufd_hw_pagetable_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas, goto out_abort; } - mutex_lock(&hwpt->devices_lock); + /* + * Set the coherency mode before we do iopt_table_add_domain() as some + * iommus have a per-PTE bit that controls it and need to decide before + * doing any maps. It is an iommu driver bug to report + * IOMMU_CAP_ENFORCE_CACHE_COHERENCY but fail enforce_cache_coherency on + * a new domain. + */ + if (idev->enforce_cache_coherency) { + rc = iommufd_hw_pagetable_enforce_cc(hwpt); + if (WARN_ON(rc)) + goto out_abort; + } /* * immediate_attach exists only to accommodate iommu drivers that cannot @@ -76,30 +116,64 @@ iommufd_hw_pagetable_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas, if (immediate_attach) { rc = iommufd_hw_pagetable_attach(hwpt, idev); if (rc) - goto out_unlock; + goto out_abort; } rc = iopt_table_add_domain(&hwpt->ioas->iopt, hwpt->domain); if (rc) goto out_detach; list_add_tail(&hwpt->hwpt_item, &hwpt->ioas->hwpt_list); - - if (immediate_attach) { - /* See iommufd_device_do_attach() */ - refcount_inc(&hwpt->obj.users); - idev->hwpt = hwpt; - list_add(&idev->devices_item, &hwpt->devices); - } - - mutex_unlock(&hwpt->devices_lock); return hwpt; out_detach: if (immediate_attach) - iommufd_hw_pagetable_detach(hwpt, idev); -out_unlock: - mutex_unlock(&hwpt->devices_lock); + iommufd_hw_pagetable_detach(idev); out_abort: iommufd_object_abort_and_destroy(ictx, &hwpt->obj); return ERR_PTR(rc); } + +int iommufd_hwpt_alloc(struct iommufd_ucmd *ucmd) +{ + struct iommu_hwpt_alloc *cmd = ucmd->cmd; + struct iommufd_hw_pagetable *hwpt; + struct iommufd_device *idev; + struct iommufd_ioas *ioas; + int rc; + + if (cmd->flags || cmd->__reserved) + return -EOPNOTSUPP; + + idev = iommufd_get_device(ucmd, cmd->dev_id); + if (IS_ERR(idev)) + return PTR_ERR(idev); + + ioas = iommufd_get_ioas(ucmd->ictx, cmd->pt_id); + if (IS_ERR(ioas)) { + rc = PTR_ERR(ioas); + goto out_put_idev; + } + + mutex_lock(&ioas->mutex); + hwpt = iommufd_hw_pagetable_alloc(ucmd->ictx, ioas, idev, false); + if (IS_ERR(hwpt)) { + rc = PTR_ERR(hwpt); + goto out_unlock; + } + + cmd->out_hwpt_id = hwpt->obj.id; + rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd)); + if (rc) + goto out_hwpt; + iommufd_object_finalize(ucmd->ictx, &hwpt->obj); + goto out_unlock; + +out_hwpt: + iommufd_object_abort_and_destroy(ucmd->ictx, &hwpt->obj); +out_unlock: + mutex_unlock(&ioas->mutex); + iommufd_put_object(&ioas->obj); +out_put_idev: + iommufd_put_object(&idev->obj); + return rc; +} diff --git a/drivers/iommu/iommufd/io_pagetable.c b/drivers/iommu/iommufd/io_pagetable.c index 724c4c574241..3a598182b761 100644 --- a/drivers/iommu/iommufd/io_pagetable.c +++ b/drivers/iommu/iommufd/io_pagetable.c @@ -1158,36 +1158,36 @@ out_unlock: } void iopt_remove_access(struct io_pagetable *iopt, - struct iommufd_access *access) + struct iommufd_access *access, + u32 iopt_access_list_id) { down_write(&iopt->domains_rwsem); down_write(&iopt->iova_rwsem); - WARN_ON(xa_erase(&iopt->access_list, access->iopt_access_list_id) != - access); + WARN_ON(xa_erase(&iopt->access_list, iopt_access_list_id) != access); WARN_ON(iopt_calculate_iova_alignment(iopt)); up_write(&iopt->iova_rwsem); up_write(&iopt->domains_rwsem); } -/* Narrow the valid_iova_itree to include reserved ranges from a group. */ -int iopt_table_enforce_group_resv_regions(struct io_pagetable *iopt, - struct device *device, - struct iommu_group *group, - phys_addr_t *sw_msi_start) +/* Narrow the valid_iova_itree to include reserved ranges from a device. */ +int iopt_table_enforce_dev_resv_regions(struct io_pagetable *iopt, + struct device *dev, + phys_addr_t *sw_msi_start) { struct iommu_resv_region *resv; - struct iommu_resv_region *tmp; - LIST_HEAD(group_resv_regions); + LIST_HEAD(resv_regions); unsigned int num_hw_msi = 0; unsigned int num_sw_msi = 0; int rc; + if (iommufd_should_fail()) + return -EINVAL; + down_write(&iopt->iova_rwsem); - rc = iommu_get_group_resv_regions(group, &group_resv_regions); - if (rc) - goto out_unlock; + /* FIXME: drivers allocate memory but there is no failure propogated */ + iommu_get_resv_regions(dev, &resv_regions); - list_for_each_entry(resv, &group_resv_regions, list) { + list_for_each_entry(resv, &resv_regions, list) { if (resv->type == IOMMU_RESV_DIRECT_RELAXABLE) continue; @@ -1199,7 +1199,7 @@ int iopt_table_enforce_group_resv_regions(struct io_pagetable *iopt, } rc = iopt_reserve_iova(iopt, resv->start, - resv->length - 1 + resv->start, device); + resv->length - 1 + resv->start, dev); if (rc) goto out_reserved; } @@ -1214,11 +1214,9 @@ int iopt_table_enforce_group_resv_regions(struct io_pagetable *iopt, goto out_free_resv; out_reserved: - __iopt_remove_reserved_iova(iopt, device); + __iopt_remove_reserved_iova(iopt, dev); out_free_resv: - list_for_each_entry_safe(resv, tmp, &group_resv_regions, list) - kfree(resv); -out_unlock: + iommu_put_resv_regions(dev, &resv_regions); up_write(&iopt->iova_rwsem); return rc; } diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h index f9790983699c..15596a08a057 100644 --- a/drivers/iommu/iommufd/iommufd_private.h +++ b/drivers/iommu/iommufd/iommufd_private.h @@ -17,6 +17,7 @@ struct iommufd_device; struct iommufd_ctx { struct file *file; struct xarray objects; + struct xarray groups; u8 account_mode; /* Compatibility with VFIO no iommu */ @@ -75,10 +76,9 @@ int iopt_table_add_domain(struct io_pagetable *iopt, struct iommu_domain *domain); void iopt_table_remove_domain(struct io_pagetable *iopt, struct iommu_domain *domain); -int iopt_table_enforce_group_resv_regions(struct io_pagetable *iopt, - struct device *device, - struct iommu_group *group, - phys_addr_t *sw_msi_start); +int iopt_table_enforce_dev_resv_regions(struct io_pagetable *iopt, + struct device *dev, + phys_addr_t *sw_msi_start); int iopt_set_allow_iova(struct io_pagetable *iopt, struct rb_root_cached *allowed_iova); int iopt_reserve_iova(struct io_pagetable *iopt, unsigned long start, @@ -119,6 +119,7 @@ enum iommufd_object_type { #ifdef CONFIG_IOMMUFD_TEST IOMMUFD_OBJ_SELFTEST, #endif + IOMMUFD_OBJ_MAX, }; /* Base struct for all objects with a userspace ID handle. */ @@ -260,18 +261,39 @@ struct iommufd_hw_pagetable { bool msi_cookie : 1; /* Head at iommufd_ioas::hwpt_list */ struct list_head hwpt_item; - struct mutex devices_lock; - struct list_head devices; }; struct iommufd_hw_pagetable * iommufd_hw_pagetable_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas, struct iommufd_device *idev, bool immediate_attach); +int iommufd_hw_pagetable_enforce_cc(struct iommufd_hw_pagetable *hwpt); int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt, struct iommufd_device *idev); -void iommufd_hw_pagetable_detach(struct iommufd_hw_pagetable *hwpt, - struct iommufd_device *idev); +struct iommufd_hw_pagetable * +iommufd_hw_pagetable_detach(struct iommufd_device *idev); void iommufd_hw_pagetable_destroy(struct iommufd_object *obj); +void iommufd_hw_pagetable_abort(struct iommufd_object *obj); +int iommufd_hwpt_alloc(struct iommufd_ucmd *ucmd); + +static inline void iommufd_hw_pagetable_put(struct iommufd_ctx *ictx, + struct iommufd_hw_pagetable *hwpt) +{ + lockdep_assert_not_held(&hwpt->ioas->mutex); + if (hwpt->auto_domain) + iommufd_object_deref_user(ictx, &hwpt->obj); + else + refcount_dec(&hwpt->obj.users); +} + +struct iommufd_group { + struct kref ref; + struct mutex lock; + struct iommufd_ctx *ictx; + struct iommu_group *group; + struct iommufd_hw_pagetable *hwpt; + struct list_head device_list; + phys_addr_t sw_msi_start; +}; /* * A iommufd_device object represents the binding relationship between a @@ -281,21 +303,29 @@ void iommufd_hw_pagetable_destroy(struct iommufd_object *obj); struct iommufd_device { struct iommufd_object obj; struct iommufd_ctx *ictx; - struct iommufd_hw_pagetable *hwpt; - /* Head at iommufd_hw_pagetable::devices */ - struct list_head devices_item; + struct iommufd_group *igroup; + struct list_head group_item; /* always the physical device */ struct device *dev; - struct iommu_group *group; bool enforce_cache_coherency; }; +static inline struct iommufd_device * +iommufd_get_device(struct iommufd_ucmd *ucmd, u32 id) +{ + return container_of(iommufd_get_object(ucmd->ictx, id, + IOMMUFD_OBJ_DEVICE), + struct iommufd_device, obj); +} + void iommufd_device_destroy(struct iommufd_object *obj); struct iommufd_access { struct iommufd_object obj; struct iommufd_ctx *ictx; struct iommufd_ioas *ioas; + struct iommufd_ioas *ioas_unpin; + struct mutex ioas_lock; const struct iommufd_access_ops *ops; void *data; unsigned long iova_alignment; @@ -304,7 +334,8 @@ struct iommufd_access { int iopt_add_access(struct io_pagetable *iopt, struct iommufd_access *access); void iopt_remove_access(struct io_pagetable *iopt, - struct iommufd_access *access); + struct iommufd_access *access, + u32 iopt_access_list_id); void iommufd_access_destroy_object(struct iommufd_object *obj); #ifdef CONFIG_IOMMUFD_TEST @@ -314,7 +345,7 @@ extern size_t iommufd_test_memory_limit; void iommufd_test_syz_conv_iova_id(struct iommufd_ucmd *ucmd, unsigned int ioas_id, u64 *iova, u32 *flags); bool iommufd_should_fail(void); -void __init iommufd_test_init(void); +int __init iommufd_test_init(void); void iommufd_test_exit(void); bool iommufd_selftest_is_mock_dev(struct device *dev); #else @@ -327,8 +358,9 @@ static inline bool iommufd_should_fail(void) { return false; } -static inline void __init iommufd_test_init(void) +static inline int __init iommufd_test_init(void) { + return 0; } static inline void iommufd_test_exit(void) { diff --git a/drivers/iommu/iommufd/iommufd_test.h b/drivers/iommu/iommufd/iommufd_test.h index b3d69cca7729..258de2253b61 100644 --- a/drivers/iommu/iommufd/iommufd_test.h +++ b/drivers/iommu/iommufd/iommufd_test.h @@ -17,6 +17,8 @@ enum { IOMMU_TEST_OP_ACCESS_PAGES, IOMMU_TEST_OP_ACCESS_RW, IOMMU_TEST_OP_SET_TEMP_MEMORY_LIMIT, + IOMMU_TEST_OP_MOCK_DOMAIN_REPLACE, + IOMMU_TEST_OP_ACCESS_REPLACE_IOAS, }; enum { @@ -51,8 +53,13 @@ struct iommu_test_cmd { struct { __u32 out_stdev_id; __u32 out_hwpt_id; + /* out_idev_id is the standard iommufd_bind object */ + __u32 out_idev_id; } mock_domain; struct { + __u32 pt_id; + } mock_domain_replace; + struct { __aligned_u64 iova; __aligned_u64 length; __aligned_u64 uptr; @@ -85,6 +92,9 @@ struct iommu_test_cmd { struct { __u32 limit; } memory_limit; + struct { + __u32 ioas_id; + } access_replace_ioas; }; __u32 last; }; diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c index 4cf5f73f2708..5f7e9fa45502 100644 --- a/drivers/iommu/iommufd/main.c +++ b/drivers/iommu/iommufd/main.c @@ -24,6 +24,7 @@ struct iommufd_object_ops { void (*destroy)(struct iommufd_object *obj); + void (*abort)(struct iommufd_object *obj); }; static const struct iommufd_object_ops iommufd_object_ops[]; static struct miscdevice vfio_misc_dev; @@ -32,6 +33,7 @@ struct iommufd_object *_iommufd_object_alloc(struct iommufd_ctx *ictx, size_t size, enum iommufd_object_type type) { + static struct lock_class_key obj_keys[IOMMUFD_OBJ_MAX]; struct iommufd_object *obj; int rc; @@ -39,7 +41,15 @@ struct iommufd_object *_iommufd_object_alloc(struct iommufd_ctx *ictx, if (!obj) return ERR_PTR(-ENOMEM); obj->type = type; - init_rwsem(&obj->destroy_rwsem); + /* + * In most cases the destroy_rwsem is obtained with try so it doesn't + * interact with lockdep, however on destroy we have to sleep. This + * means if we have to destroy an object while holding a get on another + * object it triggers lockdep. Using one locking class per object type + * is a simple and reasonable way to avoid this. + */ + __init_rwsem(&obj->destroy_rwsem, "iommufd_object::destroy_rwsem", + &obj_keys[type]); refcount_set(&obj->users, 1); /* @@ -50,7 +60,7 @@ struct iommufd_object *_iommufd_object_alloc(struct iommufd_ctx *ictx, * before calling iommufd_object_finalize(). */ rc = xa_alloc(&ictx->objects, &obj->id, XA_ZERO_ENTRY, - xa_limit_32b, GFP_KERNEL_ACCOUNT); + xa_limit_31b, GFP_KERNEL_ACCOUNT); if (rc) goto out_free; return obj; @@ -95,7 +105,10 @@ void iommufd_object_abort(struct iommufd_ctx *ictx, struct iommufd_object *obj) void iommufd_object_abort_and_destroy(struct iommufd_ctx *ictx, struct iommufd_object *obj) { - iommufd_object_ops[obj->type].destroy(obj); + if (iommufd_object_ops[obj->type].abort) + iommufd_object_ops[obj->type].abort(obj); + else + iommufd_object_ops[obj->type].destroy(obj); iommufd_object_abort(ictx, obj); } @@ -223,6 +236,7 @@ static int iommufd_fops_open(struct inode *inode, struct file *filp) } xa_init_flags(&ictx->objects, XA_FLAGS_ALLOC1 | XA_FLAGS_ACCOUNT); + xa_init(&ictx->groups); ictx->file = filp; filp->private_data = ictx; return 0; @@ -258,6 +272,7 @@ static int iommufd_fops_release(struct inode *inode, struct file *filp) if (WARN_ON(!destroyed)) break; } + WARN_ON(!xa_empty(&ictx->groups)); kfree(ictx); return 0; } @@ -290,6 +305,7 @@ static int iommufd_option(struct iommufd_ucmd *ucmd) union ucmd_buffer { struct iommu_destroy destroy; + struct iommu_hwpt_alloc hwpt; struct iommu_ioas_alloc alloc; struct iommu_ioas_allow_iovas allow_iovas; struct iommu_ioas_copy ioas_copy; @@ -321,6 +337,8 @@ struct iommufd_ioctl_op { } static const struct iommufd_ioctl_op iommufd_ioctl_ops[] = { IOCTL_OP(IOMMU_DESTROY, iommufd_destroy, struct iommu_destroy, id), + IOCTL_OP(IOMMU_HWPT_ALLOC, iommufd_hwpt_alloc, struct iommu_hwpt_alloc, + __reserved), IOCTL_OP(IOMMU_IOAS_ALLOC, iommufd_ioas_alloc_ioctl, struct iommu_ioas_alloc, out_ioas_id), IOCTL_OP(IOMMU_IOAS_ALLOW_IOVAS, iommufd_ioas_allow_iovas, @@ -418,6 +436,30 @@ struct iommufd_ctx *iommufd_ctx_from_file(struct file *file) EXPORT_SYMBOL_NS_GPL(iommufd_ctx_from_file, IOMMUFD); /** + * iommufd_ctx_from_fd - Acquires a reference to the iommufd context + * @fd: File descriptor to obtain the reference from + * + * Returns a pointer to the iommufd_ctx, otherwise ERR_PTR. On success + * the caller is responsible to call iommufd_ctx_put(). + */ +struct iommufd_ctx *iommufd_ctx_from_fd(int fd) +{ + struct file *file; + + file = fget(fd); + if (!file) + return ERR_PTR(-EBADF); + + if (file->f_op != &iommufd_fops) { + fput(file); + return ERR_PTR(-EBADFD); + } + /* fget is the same as iommufd_ctx_get() */ + return file->private_data; +} +EXPORT_SYMBOL_NS_GPL(iommufd_ctx_from_fd, IOMMUFD); + +/** * iommufd_ctx_put - Put back a reference * @ictx: Context to put back */ @@ -439,6 +481,7 @@ static const struct iommufd_object_ops iommufd_object_ops[] = { }, [IOMMUFD_OBJ_HW_PAGETABLE] = { .destroy = iommufd_hw_pagetable_destroy, + .abort = iommufd_hw_pagetable_abort, }, #ifdef CONFIG_IOMMUFD_TEST [IOMMUFD_OBJ_SELFTEST] = { @@ -477,8 +520,14 @@ static int __init iommufd_init(void) if (ret) goto err_misc; } - iommufd_test_init(); + ret = iommufd_test_init(); + if (ret) + goto err_vfio_misc; return 0; + +err_vfio_misc: + if (IS_ENABLED(CONFIG_IOMMUFD_VFIO_CONTAINER)) + misc_deregister(&vfio_misc_dev); err_misc: misc_deregister(&iommu_misc_dev); return ret; @@ -499,5 +548,6 @@ module_exit(iommufd_exit); MODULE_ALIAS_MISCDEV(VFIO_MINOR); MODULE_ALIAS("devname:vfio/vfio"); #endif +MODULE_IMPORT_NS(IOMMUFD_INTERNAL); MODULE_DESCRIPTION("I/O Address Space Management for passthrough devices"); MODULE_LICENSE("GPL"); diff --git a/drivers/iommu/iommufd/selftest.c b/drivers/iommu/iommufd/selftest.c index 74c2076105d4..9223ff3f23f9 100644 --- a/drivers/iommu/iommufd/selftest.c +++ b/drivers/iommu/iommufd/selftest.c @@ -9,14 +9,17 @@ #include <linux/file.h> #include <linux/anon_inodes.h> #include <linux/fault-inject.h> +#include <linux/platform_device.h> #include <uapi/linux/iommufd.h> +#include "../iommu-priv.h" #include "io_pagetable.h" #include "iommufd_private.h" #include "iommufd_test.h" static DECLARE_FAULT_ATTR(fail_iommufd); static struct dentry *dbgfs_root; +static struct platform_device *selftest_iommu_dev; size_t iommufd_test_memory_limit = 65536; @@ -135,7 +138,7 @@ static struct iommu_domain *mock_domain_alloc(unsigned int iommu_domain_type) if (iommu_domain_type == IOMMU_DOMAIN_BLOCKED) return &mock_blocking_domain; - if (WARN_ON(iommu_domain_type != IOMMU_DOMAIN_UNMANAGED)) + if (iommu_domain_type != IOMMU_DOMAIN_UNMANAGED) return NULL; mock = kzalloc(sizeof(*mock), GFP_KERNEL); @@ -276,12 +279,22 @@ static void mock_domain_set_plaform_dma_ops(struct device *dev) */ } +static struct iommu_device mock_iommu_device = { +}; + +static struct iommu_device *mock_probe_device(struct device *dev) +{ + return &mock_iommu_device; +} + static const struct iommu_ops mock_ops = { .owner = THIS_MODULE, .pgsize_bitmap = MOCK_IO_PAGE_SIZE, .domain_alloc = mock_domain_alloc, .capable = mock_domain_capable, .set_platform_dma_ops = mock_domain_set_plaform_dma_ops, + .device_group = generic_device_group, + .probe_device = mock_probe_device, .default_domain_ops = &(struct iommu_domain_ops){ .free = mock_domain_free, @@ -292,10 +305,6 @@ static const struct iommu_ops mock_ops = { }, }; -static struct iommu_device mock_iommu_device = { - .ops = &mock_ops, -}; - static inline struct iommufd_hw_pagetable * get_md_pagetable(struct iommufd_ucmd *ucmd, u32 mockpt_id, struct mock_iommu_domain **mock) @@ -316,22 +325,29 @@ get_md_pagetable(struct iommufd_ucmd *ucmd, u32 mockpt_id, return hwpt; } -static struct bus_type iommufd_mock_bus_type = { - .name = "iommufd_mock", - .iommu_ops = &mock_ops, +struct mock_bus_type { + struct bus_type bus; + struct notifier_block nb; }; +static struct mock_bus_type iommufd_mock_bus_type = { + .bus = { + .name = "iommufd_mock", + }, +}; + +static atomic_t mock_dev_num; + static void mock_dev_release(struct device *dev) { struct mock_dev *mdev = container_of(dev, struct mock_dev, dev); + atomic_dec(&mock_dev_num); kfree(mdev); } static struct mock_dev *mock_dev_create(void) { - struct iommu_group *iommu_group; - struct dev_iommu *dev_iommu; struct mock_dev *mdev; int rc; @@ -341,51 +357,18 @@ static struct mock_dev *mock_dev_create(void) device_initialize(&mdev->dev); mdev->dev.release = mock_dev_release; - mdev->dev.bus = &iommufd_mock_bus_type; - - iommu_group = iommu_group_alloc(); - if (IS_ERR(iommu_group)) { - rc = PTR_ERR(iommu_group); - goto err_put; - } + mdev->dev.bus = &iommufd_mock_bus_type.bus; rc = dev_set_name(&mdev->dev, "iommufd_mock%u", - iommu_group_id(iommu_group)); + atomic_inc_return(&mock_dev_num)); if (rc) - goto err_group; - - /* - * The iommu core has no way to associate a single device with an iommu - * driver (heck currently it can't even support two iommu_drivers - * registering). Hack it together with an open coded dev_iommu_get(). - * Notice that the normal notifier triggered iommu release process also - * does not work here because this bus is not in iommu_buses. - */ - mdev->dev.iommu = kzalloc(sizeof(*dev_iommu), GFP_KERNEL); - if (!mdev->dev.iommu) { - rc = -ENOMEM; - goto err_group; - } - mutex_init(&mdev->dev.iommu->lock); - mdev->dev.iommu->iommu_dev = &mock_iommu_device; + goto err_put; rc = device_add(&mdev->dev); if (rc) - goto err_dev_iommu; - - rc = iommu_group_add_device(iommu_group, &mdev->dev); - if (rc) - goto err_del; - iommu_group_put(iommu_group); + goto err_put; return mdev; -err_del: - device_del(&mdev->dev); -err_dev_iommu: - kfree(mdev->dev.iommu); - mdev->dev.iommu = NULL; -err_group: - iommu_group_put(iommu_group); err_put: put_device(&mdev->dev); return ERR_PTR(rc); @@ -393,11 +376,7 @@ err_put: static void mock_dev_destroy(struct mock_dev *mdev) { - iommu_group_remove_device(&mdev->dev); - device_del(&mdev->dev); - kfree(mdev->dev.iommu); - mdev->dev.iommu = NULL; - put_device(&mdev->dev); + device_unregister(&mdev->dev); } bool iommufd_selftest_is_mock_dev(struct device *dev) @@ -443,9 +422,15 @@ static int iommufd_test_mock_domain(struct iommufd_ucmd *ucmd, /* Userspace must destroy the device_id to destroy the object */ cmd->mock_domain.out_hwpt_id = pt_id; cmd->mock_domain.out_stdev_id = sobj->obj.id; + cmd->mock_domain.out_idev_id = idev_id; + rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd)); + if (rc) + goto out_detach; iommufd_object_finalize(ucmd->ictx, &sobj->obj); - return iommufd_ucmd_respond(ucmd, sizeof(*cmd)); + return 0; +out_detach: + iommufd_device_detach(idev); out_unbind: iommufd_device_unbind(idev); out_mdev: @@ -455,6 +440,42 @@ out_sobj: return rc; } +/* Replace the mock domain with a manually allocated hw_pagetable */ +static int iommufd_test_mock_domain_replace(struct iommufd_ucmd *ucmd, + unsigned int device_id, u32 pt_id, + struct iommu_test_cmd *cmd) +{ + struct iommufd_object *dev_obj; + struct selftest_obj *sobj; + int rc; + + /* + * Prefer to use the OBJ_SELFTEST because the destroy_rwsem will ensure + * it doesn't race with detach, which is not allowed. + */ + dev_obj = + iommufd_get_object(ucmd->ictx, device_id, IOMMUFD_OBJ_SELFTEST); + if (IS_ERR(dev_obj)) + return PTR_ERR(dev_obj); + + sobj = container_of(dev_obj, struct selftest_obj, obj); + if (sobj->type != TYPE_IDEV) { + rc = -EINVAL; + goto out_dev_obj; + } + + rc = iommufd_device_replace(sobj->idev.idev, &pt_id); + if (rc) + goto out_dev_obj; + + cmd->mock_domain_replace.pt_id = pt_id; + rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd)); + +out_dev_obj: + iommufd_put_object(dev_obj); + return rc; +} + /* Add an additional reserved IOVA to the IOAS */ static int iommufd_test_add_reserved(struct iommufd_ucmd *ucmd, unsigned int mockpt_id, @@ -748,6 +769,22 @@ out_free_staccess: return rc; } +static int iommufd_test_access_replace_ioas(struct iommufd_ucmd *ucmd, + unsigned int access_id, + unsigned int ioas_id) +{ + struct selftest_access *staccess; + int rc; + + staccess = iommufd_access_get(access_id); + if (IS_ERR(staccess)) + return PTR_ERR(staccess); + + rc = iommufd_access_replace(staccess->access, ioas_id); + fput(staccess->file); + return rc; +} + /* Check that the pages in a page array match the pages in the user VA */ static int iommufd_test_check_pages(void __user *uptr, struct page **pages, size_t npages) @@ -948,6 +985,9 @@ int iommufd_test(struct iommufd_ucmd *ucmd) cmd->add_reserved.length); case IOMMU_TEST_OP_MOCK_DOMAIN: return iommufd_test_mock_domain(ucmd, cmd); + case IOMMU_TEST_OP_MOCK_DOMAIN_REPLACE: + return iommufd_test_mock_domain_replace( + ucmd, cmd->id, cmd->mock_domain_replace.pt_id, cmd); case IOMMU_TEST_OP_MD_CHECK_MAP: return iommufd_test_md_check_pa( ucmd, cmd->id, cmd->check_map.iova, @@ -960,6 +1000,9 @@ int iommufd_test(struct iommufd_ucmd *ucmd) case IOMMU_TEST_OP_CREATE_ACCESS: return iommufd_test_create_access(ucmd, cmd->id, cmd->create_access.flags); + case IOMMU_TEST_OP_ACCESS_REPLACE_IOAS: + return iommufd_test_access_replace_ioas( + ucmd, cmd->id, cmd->access_replace_ioas.ioas_id); case IOMMU_TEST_OP_ACCESS_PAGES: return iommufd_test_access_pages( ucmd, cmd->id, cmd->access_pages.iova, @@ -992,15 +1035,57 @@ bool iommufd_should_fail(void) return should_fail(&fail_iommufd, 1); } -void __init iommufd_test_init(void) +int __init iommufd_test_init(void) { + struct platform_device_info pdevinfo = { + .name = "iommufd_selftest_iommu", + }; + int rc; + dbgfs_root = fault_create_debugfs_attr("fail_iommufd", NULL, &fail_iommufd); - WARN_ON(bus_register(&iommufd_mock_bus_type)); + + selftest_iommu_dev = platform_device_register_full(&pdevinfo); + if (IS_ERR(selftest_iommu_dev)) { + rc = PTR_ERR(selftest_iommu_dev); + goto err_dbgfs; + } + + rc = bus_register(&iommufd_mock_bus_type.bus); + if (rc) + goto err_platform; + + rc = iommu_device_sysfs_add(&mock_iommu_device, + &selftest_iommu_dev->dev, NULL, "%s", + dev_name(&selftest_iommu_dev->dev)); + if (rc) + goto err_bus; + + rc = iommu_device_register_bus(&mock_iommu_device, &mock_ops, + &iommufd_mock_bus_type.bus, + &iommufd_mock_bus_type.nb); + if (rc) + goto err_sysfs; + return 0; + +err_sysfs: + iommu_device_sysfs_remove(&mock_iommu_device); +err_bus: + bus_unregister(&iommufd_mock_bus_type.bus); +err_platform: + platform_device_del(selftest_iommu_dev); +err_dbgfs: + debugfs_remove_recursive(dbgfs_root); + return rc; } void iommufd_test_exit(void) { + iommu_device_sysfs_remove(&mock_iommu_device); + iommu_device_unregister_bus(&mock_iommu_device, + &iommufd_mock_bus_type.bus, + &iommufd_mock_bus_type.nb); + bus_unregister(&iommufd_mock_bus_type.bus); + platform_device_del(selftest_iommu_dev); debugfs_remove_recursive(dbgfs_root); - bus_unregister(&iommufd_mock_bus_type); } |