diff options
Diffstat (limited to 'drivers/iommu/iommufd/device.c')
-rw-r--r-- | drivers/iommu/iommufd/device.c | 575 |
1 files changed, 443 insertions, 132 deletions
diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c index 873630c111c1..86244403b532 100644 --- a/drivers/iommu/iommufd/device.c +++ b/drivers/iommu/iommufd/device.c @@ -1,12 +1,13 @@ // SPDX-License-Identifier: GPL-2.0-only /* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES */ +#include <linux/iommu.h> #include <linux/iommufd.h> +#include <linux/pci-ats.h> #include <linux/slab.h> -#include <linux/iommu.h> #include <uapi/linux/iommufd.h> -#include "../iommu-priv.h" +#include "../iommu-priv.h" #include "io_pagetable.h" #include "iommufd_private.h" @@ -17,12 +18,17 @@ MODULE_PARM_DESC( "Allow IOMMUFD to bind to devices even if the platform cannot isolate " "the MSI interrupt window. Enabling this is a security weakness."); +struct iommufd_attach { + struct iommufd_hw_pagetable *hwpt; + struct xarray device_array; +}; + static void iommufd_group_release(struct kref *kref) { struct iommufd_group *igroup = container_of(kref, struct iommufd_group, ref); - WARN_ON(igroup->hwpt || !list_empty(&igroup->device_list)); + WARN_ON(!xa_empty(&igroup->pasid_attach)); xa_cmpxchg(&igroup->ictx->groups, iommu_group_id(igroup->group), igroup, NULL, GFP_KERNEL); @@ -89,7 +95,7 @@ static struct iommufd_group *iommufd_get_group(struct iommufd_ctx *ictx, kref_init(&new_igroup->ref); mutex_init(&new_igroup->lock); - INIT_LIST_HEAD(&new_igroup->device_list); + xa_init(&new_igroup->pasid_attach); new_igroup->sw_msi_start = PHYS_ADDR_MAX; /* group reference moves into new_igroup */ new_igroup->group = group; @@ -232,7 +238,7 @@ out_group_put: iommufd_put_group(igroup); return ERR_PTR(rc); } -EXPORT_SYMBOL_NS_GPL(iommufd_device_bind, IOMMUFD); +EXPORT_SYMBOL_NS_GPL(iommufd_device_bind, "IOMMUFD"); /** * iommufd_ctx_has_group - True if any device within the group is bound @@ -263,7 +269,7 @@ bool iommufd_ctx_has_group(struct iommufd_ctx *ictx, struct iommu_group *group) xa_unlock(&ictx->objects); return false; } -EXPORT_SYMBOL_NS_GPL(iommufd_ctx_has_group, IOMMUFD); +EXPORT_SYMBOL_NS_GPL(iommufd_ctx_has_group, "IOMMUFD"); /** * iommufd_device_unbind - Undo iommufd_device_bind() @@ -278,69 +284,97 @@ void iommufd_device_unbind(struct iommufd_device *idev) { iommufd_object_destroy_user(idev->ictx, &idev->obj); } -EXPORT_SYMBOL_NS_GPL(iommufd_device_unbind, IOMMUFD); +EXPORT_SYMBOL_NS_GPL(iommufd_device_unbind, "IOMMUFD"); struct iommufd_ctx *iommufd_device_to_ictx(struct iommufd_device *idev) { return idev->ictx; } -EXPORT_SYMBOL_NS_GPL(iommufd_device_to_ictx, IOMMUFD); +EXPORT_SYMBOL_NS_GPL(iommufd_device_to_ictx, "IOMMUFD"); u32 iommufd_device_to_id(struct iommufd_device *idev) { return idev->obj.id; } -EXPORT_SYMBOL_NS_GPL(iommufd_device_to_id, IOMMUFD); +EXPORT_SYMBOL_NS_GPL(iommufd_device_to_id, "IOMMUFD"); + +static unsigned int iommufd_group_device_num(struct iommufd_group *igroup, + ioasid_t pasid) +{ + struct iommufd_attach *attach; + struct iommufd_device *idev; + unsigned int count = 0; + unsigned long index; + lockdep_assert_held(&igroup->lock); + + attach = xa_load(&igroup->pasid_attach, pasid); + if (attach) + xa_for_each(&attach->device_array, index, idev) + count++; + return count; +} + +#ifdef CONFIG_IRQ_MSI_IOMMU static int iommufd_group_setup_msi(struct iommufd_group *igroup, struct iommufd_hwpt_paging *hwpt_paging) { - phys_addr_t sw_msi_start = igroup->sw_msi_start; - int rc; + struct iommufd_ctx *ictx = igroup->ictx; + struct iommufd_sw_msi_map *cur; + + if (igroup->sw_msi_start == PHYS_ADDR_MAX) + return 0; /* - * If the IOMMU driver gives a IOMMU_RESV_SW_MSI then it is asking us to - * call iommu_get_msi_cookie() on its behalf. This is necessary to setup - * the MSI window so iommu_dma_prepare_msi() can install pages into our - * domain after request_irq(). If it is not done interrupts will not - * work on this domain. - * - * FIXME: This is conceptually broken for iommufd since we want to allow - * userspace to change the domains, eg switch from an identity IOAS to a - * DMA IOAS. There is currently no way to create a MSI window that - * matches what the IRQ layer actually expects in a newly created - * domain. + * Install all the MSI pages the device has been using into the domain */ - if (sw_msi_start != PHYS_ADDR_MAX && !hwpt_paging->msi_cookie) { - rc = iommu_get_msi_cookie(hwpt_paging->common.domain, - sw_msi_start); + guard(mutex)(&ictx->sw_msi_lock); + list_for_each_entry(cur, &ictx->sw_msi_list, sw_msi_item) { + int rc; + + if (cur->sw_msi_start != igroup->sw_msi_start || + !test_bit(cur->id, igroup->required_sw_msi.bitmap)) + continue; + + rc = iommufd_sw_msi_install(ictx, hwpt_paging, cur); if (rc) return rc; - - /* - * iommu_get_msi_cookie() can only be called once per domain, - * it returns -EBUSY on later calls. - */ - hwpt_paging->msi_cookie = true; } return 0; } +#else +static inline int +iommufd_group_setup_msi(struct iommufd_group *igroup, + struct iommufd_hwpt_paging *hwpt_paging) +{ + return 0; +} +#endif -static int iommufd_hwpt_paging_attach(struct iommufd_hwpt_paging *hwpt_paging, - struct iommufd_device *idev) +static bool +iommufd_group_first_attach(struct iommufd_group *igroup, ioasid_t pasid) { + lockdep_assert_held(&igroup->lock); + return !xa_load(&igroup->pasid_attach, pasid); +} + +static int +iommufd_device_attach_reserved_iova(struct iommufd_device *idev, + struct iommufd_hwpt_paging *hwpt_paging) +{ + struct iommufd_group *igroup = idev->igroup; int rc; - lockdep_assert_held(&idev->igroup->lock); + lockdep_assert_held(&igroup->lock); rc = iopt_table_enforce_dev_resv_regions(&hwpt_paging->ioas->iopt, idev->dev, - &idev->igroup->sw_msi_start); + &igroup->sw_msi_start); if (rc) return rc; - if (list_empty(&idev->igroup->device_list)) { - rc = iommufd_group_setup_msi(idev->igroup, hwpt_paging); + if (iommufd_group_first_attach(igroup, IOMMU_NO_PASID)) { + rc = iommufd_group_setup_msi(igroup, hwpt_paging); if (rc) { iopt_remove_reserved_iova(&hwpt_paging->ioas->iopt, idev->dev); @@ -350,22 +384,217 @@ static int iommufd_hwpt_paging_attach(struct iommufd_hwpt_paging *hwpt_paging, return 0; } +/* The device attach/detach/replace helpers for attach_handle */ + +static bool iommufd_device_is_attached(struct iommufd_device *idev, + ioasid_t pasid) +{ + struct iommufd_attach *attach; + + attach = xa_load(&idev->igroup->pasid_attach, pasid); + return xa_load(&attach->device_array, idev->obj.id); +} + +static int iommufd_hwpt_pasid_compat(struct iommufd_hw_pagetable *hwpt, + struct iommufd_device *idev, + ioasid_t pasid) +{ + struct iommufd_group *igroup = idev->igroup; + + lockdep_assert_held(&igroup->lock); + + if (pasid == IOMMU_NO_PASID) { + unsigned long start = IOMMU_NO_PASID; + + if (!hwpt->pasid_compat && + xa_find_after(&igroup->pasid_attach, + &start, UINT_MAX, XA_PRESENT)) + return -EINVAL; + } else { + struct iommufd_attach *attach; + + if (!hwpt->pasid_compat) + return -EINVAL; + + attach = xa_load(&igroup->pasid_attach, IOMMU_NO_PASID); + if (attach && attach->hwpt && !attach->hwpt->pasid_compat) + return -EINVAL; + } + + return 0; +} + +static bool iommufd_hwpt_compatible_device(struct iommufd_hw_pagetable *hwpt, + struct iommufd_device *idev) +{ + struct pci_dev *pdev; + + if (!hwpt->fault || !dev_is_pci(idev->dev)) + return true; + + /* + * Once we turn on PCI/PRI support for VF, the response failure code + * should not be forwarded to the hardware due to PRI being a shared + * resource between PF and VFs. There is no coordination for this + * shared capability. This waits for a vPRI reset to recover. + */ + pdev = to_pci_dev(idev->dev); + + return (!pdev->is_virtfn || !pci_pri_supported(pdev)); +} + +static int iommufd_hwpt_attach_device(struct iommufd_hw_pagetable *hwpt, + struct iommufd_device *idev, + ioasid_t pasid) +{ + struct iommufd_attach_handle *handle; + int rc; + + if (!iommufd_hwpt_compatible_device(hwpt, idev)) + return -EINVAL; + + rc = iommufd_hwpt_pasid_compat(hwpt, idev, pasid); + if (rc) + return rc; + + handle = kzalloc(sizeof(*handle), GFP_KERNEL); + if (!handle) + return -ENOMEM; + + handle->idev = idev; + if (pasid == IOMMU_NO_PASID) + rc = iommu_attach_group_handle(hwpt->domain, idev->igroup->group, + &handle->handle); + else + rc = iommu_attach_device_pasid(hwpt->domain, idev->dev, pasid, + &handle->handle); + if (rc) + goto out_free_handle; + + return 0; + +out_free_handle: + kfree(handle); + return rc; +} + +static struct iommufd_attach_handle * +iommufd_device_get_attach_handle(struct iommufd_device *idev, ioasid_t pasid) +{ + struct iommu_attach_handle *handle; + + lockdep_assert_held(&idev->igroup->lock); + + handle = + iommu_attach_handle_get(idev->igroup->group, pasid, 0); + if (IS_ERR(handle)) + return NULL; + return to_iommufd_handle(handle); +} + +static void iommufd_hwpt_detach_device(struct iommufd_hw_pagetable *hwpt, + struct iommufd_device *idev, + ioasid_t pasid) +{ + struct iommufd_attach_handle *handle; + + handle = iommufd_device_get_attach_handle(idev, pasid); + if (pasid == IOMMU_NO_PASID) + iommu_detach_group_handle(hwpt->domain, idev->igroup->group); + else + iommu_detach_device_pasid(hwpt->domain, idev->dev, pasid); + + iommufd_auto_response_faults(hwpt, handle); + kfree(handle); +} + +static int iommufd_hwpt_replace_device(struct iommufd_device *idev, + ioasid_t pasid, + struct iommufd_hw_pagetable *hwpt, + struct iommufd_hw_pagetable *old) +{ + struct iommufd_attach_handle *handle, *old_handle; + int rc; + + if (!iommufd_hwpt_compatible_device(hwpt, idev)) + return -EINVAL; + + rc = iommufd_hwpt_pasid_compat(hwpt, idev, pasid); + if (rc) + return rc; + + old_handle = iommufd_device_get_attach_handle(idev, pasid); + + handle = kzalloc(sizeof(*handle), GFP_KERNEL); + if (!handle) + return -ENOMEM; + + handle->idev = idev; + if (pasid == IOMMU_NO_PASID) + rc = iommu_replace_group_handle(idev->igroup->group, + hwpt->domain, &handle->handle); + else + rc = iommu_replace_device_pasid(hwpt->domain, idev->dev, + pasid, &handle->handle); + if (rc) + goto out_free_handle; + + iommufd_auto_response_faults(hwpt, old_handle); + kfree(old_handle); + + return 0; + +out_free_handle: + kfree(handle); + return rc; +} + int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt, - struct iommufd_device *idev) + struct iommufd_device *idev, ioasid_t pasid) { + struct iommufd_hwpt_paging *hwpt_paging = find_hwpt_paging(hwpt); + bool attach_resv = hwpt_paging && pasid == IOMMU_NO_PASID; + struct iommufd_group *igroup = idev->igroup; + struct iommufd_hw_pagetable *old_hwpt; + struct iommufd_attach *attach; int rc; - mutex_lock(&idev->igroup->lock); + mutex_lock(&igroup->lock); - if (idev->igroup->hwpt != NULL && idev->igroup->hwpt != hwpt) { - rc = -EINVAL; + attach = xa_cmpxchg(&igroup->pasid_attach, pasid, NULL, + XA_ZERO_ENTRY, GFP_KERNEL); + if (xa_is_err(attach)) { + rc = xa_err(attach); goto err_unlock; } - if (hwpt_is_paging(hwpt)) { - rc = iommufd_hwpt_paging_attach(to_hwpt_paging(hwpt), idev); + if (!attach) { + attach = kzalloc(sizeof(*attach), GFP_KERNEL); + if (!attach) { + rc = -ENOMEM; + goto err_release_pasid; + } + xa_init(&attach->device_array); + } + + old_hwpt = attach->hwpt; + + rc = xa_insert(&attach->device_array, idev->obj.id, XA_ZERO_ENTRY, + GFP_KERNEL); + if (rc) { + WARN_ON(rc == -EBUSY && !old_hwpt); + goto err_free_attach; + } + + if (old_hwpt && old_hwpt != hwpt) { + rc = -EINVAL; + goto err_release_devid; + } + + if (attach_resv) { + rc = iommufd_device_attach_reserved_iova(idev, hwpt_paging); if (rc) - goto err_unlock; + goto err_release_devid; } /* @@ -375,52 +604,74 @@ int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt, * reserved regions are only updated during individual device * attachment. */ - if (list_empty(&idev->igroup->device_list)) { - rc = iommu_attach_group(hwpt->domain, idev->igroup->group); + if (iommufd_group_first_attach(igroup, pasid)) { + rc = iommufd_hwpt_attach_device(hwpt, idev, pasid); if (rc) goto err_unresv; - idev->igroup->hwpt = hwpt; + attach->hwpt = hwpt; + WARN_ON(xa_is_err(xa_store(&igroup->pasid_attach, pasid, attach, + GFP_KERNEL))); } refcount_inc(&hwpt->obj.users); - list_add_tail(&idev->group_item, &idev->igroup->device_list); - mutex_unlock(&idev->igroup->lock); + WARN_ON(xa_is_err(xa_store(&attach->device_array, idev->obj.id, + idev, GFP_KERNEL))); + mutex_unlock(&igroup->lock); return 0; err_unresv: - if (hwpt_is_paging(hwpt)) - iopt_remove_reserved_iova(&to_hwpt_paging(hwpt)->ioas->iopt, - idev->dev); + if (attach_resv) + iopt_remove_reserved_iova(&hwpt_paging->ioas->iopt, idev->dev); +err_release_devid: + xa_release(&attach->device_array, idev->obj.id); +err_free_attach: + if (iommufd_group_first_attach(igroup, pasid)) + kfree(attach); +err_release_pasid: + if (iommufd_group_first_attach(igroup, pasid)) + xa_release(&igroup->pasid_attach, pasid); err_unlock: - mutex_unlock(&idev->igroup->lock); + mutex_unlock(&igroup->lock); return rc; } struct iommufd_hw_pagetable * -iommufd_hw_pagetable_detach(struct iommufd_device *idev) +iommufd_hw_pagetable_detach(struct iommufd_device *idev, ioasid_t pasid) { - struct iommufd_hw_pagetable *hwpt = idev->igroup->hwpt; + struct iommufd_group *igroup = idev->igroup; + struct iommufd_hwpt_paging *hwpt_paging; + struct iommufd_hw_pagetable *hwpt; + struct iommufd_attach *attach; - mutex_lock(&idev->igroup->lock); - list_del(&idev->group_item); - if (list_empty(&idev->igroup->device_list)) { - iommu_detach_group(hwpt->domain, idev->igroup->group); - idev->igroup->hwpt = NULL; + mutex_lock(&igroup->lock); + attach = xa_load(&igroup->pasid_attach, pasid); + if (!attach) { + mutex_unlock(&igroup->lock); + return NULL; } - if (hwpt_is_paging(hwpt)) - iopt_remove_reserved_iova(&to_hwpt_paging(hwpt)->ioas->iopt, - idev->dev); - mutex_unlock(&idev->igroup->lock); + + hwpt = attach->hwpt; + hwpt_paging = find_hwpt_paging(hwpt); + + xa_erase(&attach->device_array, idev->obj.id); + if (xa_empty(&attach->device_array)) { + iommufd_hwpt_detach_device(hwpt, idev, pasid); + xa_erase(&igroup->pasid_attach, pasid); + kfree(attach); + } + if (hwpt_paging && pasid == IOMMU_NO_PASID) + iopt_remove_reserved_iova(&hwpt_paging->ioas->iopt, idev->dev); + mutex_unlock(&igroup->lock); /* Caller must destroy hwpt */ return hwpt; } static struct iommufd_hw_pagetable * -iommufd_device_do_attach(struct iommufd_device *idev, +iommufd_device_do_attach(struct iommufd_device *idev, ioasid_t pasid, struct iommufd_hw_pagetable *hwpt) { int rc; - rc = iommufd_hw_pagetable_attach(hwpt, idev); + rc = iommufd_hw_pagetable_attach(hwpt, idev, pasid); if (rc) return ERR_PTR(rc); return NULL; @@ -430,27 +681,33 @@ static void iommufd_group_remove_reserved_iova(struct iommufd_group *igroup, struct iommufd_hwpt_paging *hwpt_paging) { + struct iommufd_attach *attach; struct iommufd_device *cur; + unsigned long index; lockdep_assert_held(&igroup->lock); - list_for_each_entry(cur, &igroup->device_list, group_item) + attach = xa_load(&igroup->pasid_attach, IOMMU_NO_PASID); + xa_for_each(&attach->device_array, index, cur) iopt_remove_reserved_iova(&hwpt_paging->ioas->iopt, cur->dev); } static int -iommufd_group_do_replace_paging(struct iommufd_group *igroup, - struct iommufd_hwpt_paging *hwpt_paging) +iommufd_group_do_replace_reserved_iova(struct iommufd_group *igroup, + struct iommufd_hwpt_paging *hwpt_paging) { - struct iommufd_hw_pagetable *old_hwpt = igroup->hwpt; + struct iommufd_hwpt_paging *old_hwpt_paging; + struct iommufd_attach *attach; struct iommufd_device *cur; + unsigned long index; int rc; lockdep_assert_held(&igroup->lock); - if (!hwpt_is_paging(old_hwpt) || - hwpt_paging->ioas != to_hwpt_paging(old_hwpt)->ioas) { - list_for_each_entry(cur, &igroup->device_list, group_item) { + attach = xa_load(&igroup->pasid_attach, IOMMU_NO_PASID); + old_hwpt_paging = find_hwpt_paging(attach->hwpt); + if (!old_hwpt_paging || hwpt_paging->ioas != old_hwpt_paging->ioas) { + xa_for_each(&attach->device_array, index, cur) { rc = iopt_table_enforce_dev_resv_regions( &hwpt_paging->ioas->iopt, cur->dev, NULL); if (rc) @@ -469,70 +726,81 @@ err_unresv: } static struct iommufd_hw_pagetable * -iommufd_device_do_replace(struct iommufd_device *idev, +iommufd_device_do_replace(struct iommufd_device *idev, ioasid_t pasid, struct iommufd_hw_pagetable *hwpt) { + struct iommufd_hwpt_paging *hwpt_paging = find_hwpt_paging(hwpt); + bool attach_resv = hwpt_paging && pasid == IOMMU_NO_PASID; + struct iommufd_hwpt_paging *old_hwpt_paging; struct iommufd_group *igroup = idev->igroup; struct iommufd_hw_pagetable *old_hwpt; + struct iommufd_attach *attach; unsigned int num_devices; int rc; - mutex_lock(&idev->igroup->lock); + mutex_lock(&igroup->lock); - if (igroup->hwpt == NULL) { + attach = xa_load(&igroup->pasid_attach, pasid); + if (!attach) { rc = -EINVAL; goto err_unlock; } - if (hwpt == igroup->hwpt) { - mutex_unlock(&idev->igroup->lock); + old_hwpt = attach->hwpt; + + WARN_ON(!old_hwpt || xa_empty(&attach->device_array)); + + if (!iommufd_device_is_attached(idev, pasid)) { + rc = -EINVAL; + goto err_unlock; + } + + if (hwpt == old_hwpt) { + mutex_unlock(&igroup->lock); return NULL; } - old_hwpt = igroup->hwpt; - if (hwpt_is_paging(hwpt)) { - rc = iommufd_group_do_replace_paging(igroup, - to_hwpt_paging(hwpt)); + if (attach_resv) { + rc = iommufd_group_do_replace_reserved_iova(igroup, hwpt_paging); if (rc) goto err_unlock; } - rc = iommu_group_replace_domain(igroup->group, hwpt->domain); + rc = iommufd_hwpt_replace_device(idev, pasid, hwpt, old_hwpt); if (rc) goto err_unresv; - if (hwpt_is_paging(old_hwpt) && - (!hwpt_is_paging(hwpt) || - to_hwpt_paging(hwpt)->ioas != to_hwpt_paging(old_hwpt)->ioas)) - iommufd_group_remove_reserved_iova(igroup, - to_hwpt_paging(old_hwpt)); + old_hwpt_paging = find_hwpt_paging(old_hwpt); + if (old_hwpt_paging && pasid == IOMMU_NO_PASID && + (!hwpt_paging || hwpt_paging->ioas != old_hwpt_paging->ioas)) + iommufd_group_remove_reserved_iova(igroup, old_hwpt_paging); - igroup->hwpt = hwpt; + attach->hwpt = hwpt; - num_devices = list_count_nodes(&igroup->device_list); + num_devices = iommufd_group_device_num(igroup, pasid); /* - * Move the refcounts held by the device_list to the new hwpt. Retain a + * Move the refcounts held by the device_array to the new hwpt. Retain a * refcount for this thread as the caller will free it. */ refcount_add(num_devices, &hwpt->obj.users); if (num_devices > 1) WARN_ON(refcount_sub_and_test(num_devices - 1, &old_hwpt->obj.users)); - mutex_unlock(&idev->igroup->lock); + mutex_unlock(&igroup->lock); /* Caller must destroy old_hwpt */ return old_hwpt; err_unresv: - if (hwpt_is_paging(hwpt)) - iommufd_group_remove_reserved_iova(igroup, - to_hwpt_paging(old_hwpt)); + if (attach_resv) + iommufd_group_remove_reserved_iova(igroup, hwpt_paging); err_unlock: - mutex_unlock(&idev->igroup->lock); + mutex_unlock(&igroup->lock); return ERR_PTR(rc); } typedef struct iommufd_hw_pagetable *(*attach_fn)( - struct iommufd_device *idev, struct iommufd_hw_pagetable *hwpt); + struct iommufd_device *idev, ioasid_t pasid, + struct iommufd_hw_pagetable *hwpt); /* * When automatically managing the domains we search for a compatible domain in @@ -540,7 +808,7 @@ typedef struct iommufd_hw_pagetable *(*attach_fn)( * Automatic domain selection will never pick a manually created domain. */ static struct iommufd_hw_pagetable * -iommufd_device_auto_get_domain(struct iommufd_device *idev, +iommufd_device_auto_get_domain(struct iommufd_device *idev, ioasid_t pasid, struct iommufd_ioas *ioas, u32 *pt_id, attach_fn do_attach) { @@ -569,7 +837,7 @@ iommufd_device_auto_get_domain(struct iommufd_device *idev, hwpt = &hwpt_paging->common; if (!iommufd_lock_obj(&hwpt->obj)) continue; - destroy_hwpt = (*do_attach)(idev, hwpt); + destroy_hwpt = (*do_attach)(idev, pasid, hwpt); if (IS_ERR(destroy_hwpt)) { iommufd_put_object(idev->ictx, &hwpt->obj); /* @@ -587,8 +855,8 @@ iommufd_device_auto_get_domain(struct iommufd_device *idev, goto out_unlock; } - hwpt_paging = iommufd_hwpt_paging_alloc(idev->ictx, ioas, idev, 0, - immediate_attach, NULL); + hwpt_paging = iommufd_hwpt_paging_alloc(idev->ictx, ioas, idev, pasid, + 0, immediate_attach, NULL); if (IS_ERR(hwpt_paging)) { destroy_hwpt = ERR_CAST(hwpt_paging); goto out_unlock; @@ -596,7 +864,7 @@ iommufd_device_auto_get_domain(struct iommufd_device *idev, hwpt = &hwpt_paging->common; if (!immediate_attach) { - destroy_hwpt = (*do_attach)(idev, hwpt); + destroy_hwpt = (*do_attach)(idev, pasid, hwpt); if (IS_ERR(destroy_hwpt)) goto out_abort; } else { @@ -617,8 +885,9 @@ out_unlock: return destroy_hwpt; } -static int iommufd_device_change_pt(struct iommufd_device *idev, u32 *pt_id, - attach_fn do_attach) +static int iommufd_device_change_pt(struct iommufd_device *idev, + ioasid_t pasid, + u32 *pt_id, attach_fn do_attach) { struct iommufd_hw_pagetable *destroy_hwpt; struct iommufd_object *pt_obj; @@ -633,7 +902,7 @@ static int iommufd_device_change_pt(struct iommufd_device *idev, u32 *pt_id, struct iommufd_hw_pagetable *hwpt = container_of(pt_obj, struct iommufd_hw_pagetable, obj); - destroy_hwpt = (*do_attach)(idev, hwpt); + destroy_hwpt = (*do_attach)(idev, pasid, hwpt); if (IS_ERR(destroy_hwpt)) goto out_put_pt_obj; break; @@ -642,8 +911,8 @@ static int iommufd_device_change_pt(struct iommufd_device *idev, u32 *pt_id, struct iommufd_ioas *ioas = container_of(pt_obj, struct iommufd_ioas, obj); - destroy_hwpt = iommufd_device_auto_get_domain(idev, ioas, pt_id, - do_attach); + destroy_hwpt = iommufd_device_auto_get_domain(idev, pasid, ioas, + pt_id, do_attach); if (IS_ERR(destroy_hwpt)) goto out_put_pt_obj; break; @@ -665,22 +934,26 @@ out_put_pt_obj: } /** - * iommufd_device_attach - Connect a device to an iommu_domain + * iommufd_device_attach - Connect a device/pasid to an iommu_domain * @idev: device to attach + * @pasid: pasid to attach * @pt_id: Input a IOMMUFD_OBJ_IOAS, or IOMMUFD_OBJ_HWPT_PAGING * Output the IOMMUFD_OBJ_HWPT_PAGING ID * - * This connects the device to an iommu_domain, either automatically or manually - * selected. Once this completes the device could do DMA. + * This connects the device/pasid to an iommu_domain, either automatically + * or manually selected. Once this completes the device could do DMA with + * @pasid. @pasid is IOMMU_NO_PASID if this attach is for no pasid usage. * * The caller should return the resulting pt_id back to userspace. * This function is undone by calling iommufd_device_detach(). */ -int iommufd_device_attach(struct iommufd_device *idev, u32 *pt_id) +int iommufd_device_attach(struct iommufd_device *idev, ioasid_t pasid, + u32 *pt_id) { int rc; - rc = iommufd_device_change_pt(idev, pt_id, &iommufd_device_do_attach); + rc = iommufd_device_change_pt(idev, pasid, pt_id, + &iommufd_device_do_attach); if (rc) return rc; @@ -691,11 +964,12 @@ int iommufd_device_attach(struct iommufd_device *idev, u32 *pt_id) refcount_inc(&idev->obj.users); return 0; } -EXPORT_SYMBOL_NS_GPL(iommufd_device_attach, IOMMUFD); +EXPORT_SYMBOL_NS_GPL(iommufd_device_attach, "IOMMUFD"); /** - * iommufd_device_replace - Change the device's iommu_domain + * iommufd_device_replace - Change the device/pasid's iommu_domain * @idev: device to change + * @pasid: pasid to change * @pt_id: Input a IOMMUFD_OBJ_IOAS, or IOMMUFD_OBJ_HWPT_PAGING * Output the IOMMUFD_OBJ_HWPT_PAGING ID * @@ -706,31 +980,37 @@ EXPORT_SYMBOL_NS_GPL(iommufd_device_attach, IOMMUFD); * * If it fails then no change is made to the attachment. The iommu driver may * implement this so there is no disruption in translation. This can only be - * called if iommufd_device_attach() has already succeeded. + * called if iommufd_device_attach() has already succeeded. @pasid is + * IOMMU_NO_PASID for no pasid usage. */ -int iommufd_device_replace(struct iommufd_device *idev, u32 *pt_id) +int iommufd_device_replace(struct iommufd_device *idev, ioasid_t pasid, + u32 *pt_id) { - return iommufd_device_change_pt(idev, pt_id, + return iommufd_device_change_pt(idev, pasid, pt_id, &iommufd_device_do_replace); } -EXPORT_SYMBOL_NS_GPL(iommufd_device_replace, IOMMUFD); +EXPORT_SYMBOL_NS_GPL(iommufd_device_replace, "IOMMUFD"); /** - * iommufd_device_detach - Disconnect a device to an iommu_domain + * iommufd_device_detach - Disconnect a device/device to an iommu_domain * @idev: device to detach + * @pasid: pasid to detach * * Undo iommufd_device_attach(). This disconnects the idev from the previously * attached pt_id. The device returns back to a blocked DMA translation. + * @pasid is IOMMU_NO_PASID for no pasid usage. */ -void iommufd_device_detach(struct iommufd_device *idev) +void iommufd_device_detach(struct iommufd_device *idev, ioasid_t pasid) { struct iommufd_hw_pagetable *hwpt; - hwpt = iommufd_hw_pagetable_detach(idev); + hwpt = iommufd_hw_pagetable_detach(idev, pasid); + if (!hwpt) + return; iommufd_hw_pagetable_put(idev->ictx, hwpt); refcount_dec(&idev->obj.users); } -EXPORT_SYMBOL_NS_GPL(iommufd_device_detach, IOMMUFD); +EXPORT_SYMBOL_NS_GPL(iommufd_device_detach, "IOMMUFD"); /* * On success, it will refcount_inc() at a valid new_ioas and refcount_dec() at @@ -852,7 +1132,7 @@ iommufd_access_create(struct iommufd_ctx *ictx, mutex_init(&access->ioas_lock); return access; } -EXPORT_SYMBOL_NS_GPL(iommufd_access_create, IOMMUFD); +EXPORT_SYMBOL_NS_GPL(iommufd_access_create, "IOMMUFD"); /** * iommufd_access_destroy - Destroy an iommufd_access @@ -864,7 +1144,7 @@ void iommufd_access_destroy(struct iommufd_access *access) { iommufd_object_destroy_user(access->ictx, &access->obj); } -EXPORT_SYMBOL_NS_GPL(iommufd_access_destroy, IOMMUFD); +EXPORT_SYMBOL_NS_GPL(iommufd_access_destroy, "IOMMUFD"); void iommufd_access_detach(struct iommufd_access *access) { @@ -876,7 +1156,7 @@ void iommufd_access_detach(struct iommufd_access *access) WARN_ON(iommufd_access_change_ioas(access, NULL)); mutex_unlock(&access->ioas_lock); } -EXPORT_SYMBOL_NS_GPL(iommufd_access_detach, IOMMUFD); +EXPORT_SYMBOL_NS_GPL(iommufd_access_detach, "IOMMUFD"); int iommufd_access_attach(struct iommufd_access *access, u32 ioas_id) { @@ -892,7 +1172,7 @@ int iommufd_access_attach(struct iommufd_access *access, u32 ioas_id) mutex_unlock(&access->ioas_lock); return rc; } -EXPORT_SYMBOL_NS_GPL(iommufd_access_attach, IOMMUFD); +EXPORT_SYMBOL_NS_GPL(iommufd_access_attach, "IOMMUFD"); int iommufd_access_replace(struct iommufd_access *access, u32 ioas_id) { @@ -907,7 +1187,7 @@ int iommufd_access_replace(struct iommufd_access *access, u32 ioas_id) mutex_unlock(&access->ioas_lock); return rc; } -EXPORT_SYMBOL_NS_GPL(iommufd_access_replace, IOMMUFD); +EXPORT_SYMBOL_NS_GPL(iommufd_access_replace, "IOMMUFD"); /** * iommufd_access_notify_unmap - Notify users of an iopt to stop using it @@ -990,7 +1270,7 @@ void iommufd_access_unpin_pages(struct iommufd_access *access, up_read(&iopt->iova_rwsem); mutex_unlock(&access->ioas_lock); } -EXPORT_SYMBOL_NS_GPL(iommufd_access_unpin_pages, IOMMUFD); +EXPORT_SYMBOL_NS_GPL(iommufd_access_unpin_pages, "IOMMUFD"); static bool iopt_area_contig_is_aligned(struct iopt_area_contig_iter *iter) { @@ -1105,7 +1385,7 @@ err_remove: mutex_unlock(&access->ioas_lock); return rc; } -EXPORT_SYMBOL_NS_GPL(iommufd_access_pin_pages, IOMMUFD); +EXPORT_SYMBOL_NS_GPL(iommufd_access_pin_pages, "IOMMUFD"); /** * iommufd_access_rw - Read or write data under the iova @@ -1126,7 +1406,7 @@ int iommufd_access_rw(struct iommufd_access *access, unsigned long iova, struct io_pagetable *iopt; struct iopt_area *area; unsigned long last_iova; - int rc; + int rc = -EINVAL; if (!length) return -EINVAL; @@ -1169,7 +1449,7 @@ err_out: mutex_unlock(&access->ioas_lock); return rc; } -EXPORT_SYMBOL_NS_GPL(iommufd_access_rw, IOMMUFD); +EXPORT_SYMBOL_NS_GPL(iommufd_access_rw, "IOMMUFD"); int iommufd_get_hw_info(struct iommufd_ucmd *ucmd) { @@ -1182,7 +1462,8 @@ int iommufd_get_hw_info(struct iommufd_ucmd *ucmd) void *data; int rc; - if (cmd->flags || cmd->__reserved) + if (cmd->flags || cmd->__reserved[0] || cmd->__reserved[1] || + cmd->__reserved[2]) return -EOPNOTSUPP; idev = iommufd_get_device(ucmd, cmd->dev_id); @@ -1239,6 +1520,36 @@ int iommufd_get_hw_info(struct iommufd_ucmd *ucmd) if (device_iommu_capable(idev->dev, IOMMU_CAP_DIRTY_TRACKING)) cmd->out_capabilities |= IOMMU_HW_CAP_DIRTY_TRACKING; + cmd->out_max_pasid_log2 = 0; + /* + * Currently, all iommu drivers enable PASID in the probe_device() + * op if iommu and device supports it. So the max_pasids stored in + * dev->iommu indicates both PASID support and enable status. A + * non-zero dev->iommu->max_pasids means PASID is supported and + * enabled. The iommufd only reports PASID capability to userspace + * if it's enabled. + */ + if (idev->dev->iommu->max_pasids) { + cmd->out_max_pasid_log2 = ilog2(idev->dev->iommu->max_pasids); + + if (dev_is_pci(idev->dev)) { + struct pci_dev *pdev = to_pci_dev(idev->dev); + int ctrl; + + ctrl = pci_pasid_status(pdev); + + WARN_ON_ONCE(ctrl < 0 || + !(ctrl & PCI_PASID_CTRL_ENABLE)); + + if (ctrl & PCI_PASID_CTRL_EXEC) + cmd->out_capabilities |= + IOMMU_HW_CAP_PCI_PASID_EXEC; + if (ctrl & PCI_PASID_CTRL_PRIV) + cmd->out_capabilities |= + IOMMU_HW_CAP_PCI_PASID_PRIV; + } + } + rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd)); out_free: kfree(data); |