summaryrefslogtreecommitdiff
path: root/drivers/iommu/iommufd/device.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/iommu/iommufd/device.c')
-rw-r--r--drivers/iommu/iommufd/device.c575
1 files changed, 443 insertions, 132 deletions
diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c
index 873630c111c1..86244403b532 100644
--- a/drivers/iommu/iommufd/device.c
+++ b/drivers/iommu/iommufd/device.c
@@ -1,12 +1,13 @@
// SPDX-License-Identifier: GPL-2.0-only
/* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES
*/
+#include <linux/iommu.h>
#include <linux/iommufd.h>
+#include <linux/pci-ats.h>
#include <linux/slab.h>
-#include <linux/iommu.h>
#include <uapi/linux/iommufd.h>
-#include "../iommu-priv.h"
+#include "../iommu-priv.h"
#include "io_pagetable.h"
#include "iommufd_private.h"
@@ -17,12 +18,17 @@ MODULE_PARM_DESC(
"Allow IOMMUFD to bind to devices even if the platform cannot isolate "
"the MSI interrupt window. Enabling this is a security weakness.");
+struct iommufd_attach {
+ struct iommufd_hw_pagetable *hwpt;
+ struct xarray device_array;
+};
+
static void iommufd_group_release(struct kref *kref)
{
struct iommufd_group *igroup =
container_of(kref, struct iommufd_group, ref);
- WARN_ON(igroup->hwpt || !list_empty(&igroup->device_list));
+ WARN_ON(!xa_empty(&igroup->pasid_attach));
xa_cmpxchg(&igroup->ictx->groups, iommu_group_id(igroup->group), igroup,
NULL, GFP_KERNEL);
@@ -89,7 +95,7 @@ static struct iommufd_group *iommufd_get_group(struct iommufd_ctx *ictx,
kref_init(&new_igroup->ref);
mutex_init(&new_igroup->lock);
- INIT_LIST_HEAD(&new_igroup->device_list);
+ xa_init(&new_igroup->pasid_attach);
new_igroup->sw_msi_start = PHYS_ADDR_MAX;
/* group reference moves into new_igroup */
new_igroup->group = group;
@@ -232,7 +238,7 @@ out_group_put:
iommufd_put_group(igroup);
return ERR_PTR(rc);
}
-EXPORT_SYMBOL_NS_GPL(iommufd_device_bind, IOMMUFD);
+EXPORT_SYMBOL_NS_GPL(iommufd_device_bind, "IOMMUFD");
/**
* iommufd_ctx_has_group - True if any device within the group is bound
@@ -263,7 +269,7 @@ bool iommufd_ctx_has_group(struct iommufd_ctx *ictx, struct iommu_group *group)
xa_unlock(&ictx->objects);
return false;
}
-EXPORT_SYMBOL_NS_GPL(iommufd_ctx_has_group, IOMMUFD);
+EXPORT_SYMBOL_NS_GPL(iommufd_ctx_has_group, "IOMMUFD");
/**
* iommufd_device_unbind - Undo iommufd_device_bind()
@@ -278,69 +284,97 @@ void iommufd_device_unbind(struct iommufd_device *idev)
{
iommufd_object_destroy_user(idev->ictx, &idev->obj);
}
-EXPORT_SYMBOL_NS_GPL(iommufd_device_unbind, IOMMUFD);
+EXPORT_SYMBOL_NS_GPL(iommufd_device_unbind, "IOMMUFD");
struct iommufd_ctx *iommufd_device_to_ictx(struct iommufd_device *idev)
{
return idev->ictx;
}
-EXPORT_SYMBOL_NS_GPL(iommufd_device_to_ictx, IOMMUFD);
+EXPORT_SYMBOL_NS_GPL(iommufd_device_to_ictx, "IOMMUFD");
u32 iommufd_device_to_id(struct iommufd_device *idev)
{
return idev->obj.id;
}
-EXPORT_SYMBOL_NS_GPL(iommufd_device_to_id, IOMMUFD);
+EXPORT_SYMBOL_NS_GPL(iommufd_device_to_id, "IOMMUFD");
+
+static unsigned int iommufd_group_device_num(struct iommufd_group *igroup,
+ ioasid_t pasid)
+{
+ struct iommufd_attach *attach;
+ struct iommufd_device *idev;
+ unsigned int count = 0;
+ unsigned long index;
+ lockdep_assert_held(&igroup->lock);
+
+ attach = xa_load(&igroup->pasid_attach, pasid);
+ if (attach)
+ xa_for_each(&attach->device_array, index, idev)
+ count++;
+ return count;
+}
+
+#ifdef CONFIG_IRQ_MSI_IOMMU
static int iommufd_group_setup_msi(struct iommufd_group *igroup,
struct iommufd_hwpt_paging *hwpt_paging)
{
- phys_addr_t sw_msi_start = igroup->sw_msi_start;
- int rc;
+ struct iommufd_ctx *ictx = igroup->ictx;
+ struct iommufd_sw_msi_map *cur;
+
+ if (igroup->sw_msi_start == PHYS_ADDR_MAX)
+ return 0;
/*
- * If the IOMMU driver gives a IOMMU_RESV_SW_MSI then it is asking us to
- * call iommu_get_msi_cookie() on its behalf. This is necessary to setup
- * the MSI window so iommu_dma_prepare_msi() can install pages into our
- * domain after request_irq(). If it is not done interrupts will not
- * work on this domain.
- *
- * FIXME: This is conceptually broken for iommufd since we want to allow
- * userspace to change the domains, eg switch from an identity IOAS to a
- * DMA IOAS. There is currently no way to create a MSI window that
- * matches what the IRQ layer actually expects in a newly created
- * domain.
+ * Install all the MSI pages the device has been using into the domain
*/
- if (sw_msi_start != PHYS_ADDR_MAX && !hwpt_paging->msi_cookie) {
- rc = iommu_get_msi_cookie(hwpt_paging->common.domain,
- sw_msi_start);
+ guard(mutex)(&ictx->sw_msi_lock);
+ list_for_each_entry(cur, &ictx->sw_msi_list, sw_msi_item) {
+ int rc;
+
+ if (cur->sw_msi_start != igroup->sw_msi_start ||
+ !test_bit(cur->id, igroup->required_sw_msi.bitmap))
+ continue;
+
+ rc = iommufd_sw_msi_install(ictx, hwpt_paging, cur);
if (rc)
return rc;
-
- /*
- * iommu_get_msi_cookie() can only be called once per domain,
- * it returns -EBUSY on later calls.
- */
- hwpt_paging->msi_cookie = true;
}
return 0;
}
+#else
+static inline int
+iommufd_group_setup_msi(struct iommufd_group *igroup,
+ struct iommufd_hwpt_paging *hwpt_paging)
+{
+ return 0;
+}
+#endif
-static int iommufd_hwpt_paging_attach(struct iommufd_hwpt_paging *hwpt_paging,
- struct iommufd_device *idev)
+static bool
+iommufd_group_first_attach(struct iommufd_group *igroup, ioasid_t pasid)
{
+ lockdep_assert_held(&igroup->lock);
+ return !xa_load(&igroup->pasid_attach, pasid);
+}
+
+static int
+iommufd_device_attach_reserved_iova(struct iommufd_device *idev,
+ struct iommufd_hwpt_paging *hwpt_paging)
+{
+ struct iommufd_group *igroup = idev->igroup;
int rc;
- lockdep_assert_held(&idev->igroup->lock);
+ lockdep_assert_held(&igroup->lock);
rc = iopt_table_enforce_dev_resv_regions(&hwpt_paging->ioas->iopt,
idev->dev,
- &idev->igroup->sw_msi_start);
+ &igroup->sw_msi_start);
if (rc)
return rc;
- if (list_empty(&idev->igroup->device_list)) {
- rc = iommufd_group_setup_msi(idev->igroup, hwpt_paging);
+ if (iommufd_group_first_attach(igroup, IOMMU_NO_PASID)) {
+ rc = iommufd_group_setup_msi(igroup, hwpt_paging);
if (rc) {
iopt_remove_reserved_iova(&hwpt_paging->ioas->iopt,
idev->dev);
@@ -350,22 +384,217 @@ static int iommufd_hwpt_paging_attach(struct iommufd_hwpt_paging *hwpt_paging,
return 0;
}
+/* The device attach/detach/replace helpers for attach_handle */
+
+static bool iommufd_device_is_attached(struct iommufd_device *idev,
+ ioasid_t pasid)
+{
+ struct iommufd_attach *attach;
+
+ attach = xa_load(&idev->igroup->pasid_attach, pasid);
+ return xa_load(&attach->device_array, idev->obj.id);
+}
+
+static int iommufd_hwpt_pasid_compat(struct iommufd_hw_pagetable *hwpt,
+ struct iommufd_device *idev,
+ ioasid_t pasid)
+{
+ struct iommufd_group *igroup = idev->igroup;
+
+ lockdep_assert_held(&igroup->lock);
+
+ if (pasid == IOMMU_NO_PASID) {
+ unsigned long start = IOMMU_NO_PASID;
+
+ if (!hwpt->pasid_compat &&
+ xa_find_after(&igroup->pasid_attach,
+ &start, UINT_MAX, XA_PRESENT))
+ return -EINVAL;
+ } else {
+ struct iommufd_attach *attach;
+
+ if (!hwpt->pasid_compat)
+ return -EINVAL;
+
+ attach = xa_load(&igroup->pasid_attach, IOMMU_NO_PASID);
+ if (attach && attach->hwpt && !attach->hwpt->pasid_compat)
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static bool iommufd_hwpt_compatible_device(struct iommufd_hw_pagetable *hwpt,
+ struct iommufd_device *idev)
+{
+ struct pci_dev *pdev;
+
+ if (!hwpt->fault || !dev_is_pci(idev->dev))
+ return true;
+
+ /*
+ * Once we turn on PCI/PRI support for VF, the response failure code
+ * should not be forwarded to the hardware due to PRI being a shared
+ * resource between PF and VFs. There is no coordination for this
+ * shared capability. This waits for a vPRI reset to recover.
+ */
+ pdev = to_pci_dev(idev->dev);
+
+ return (!pdev->is_virtfn || !pci_pri_supported(pdev));
+}
+
+static int iommufd_hwpt_attach_device(struct iommufd_hw_pagetable *hwpt,
+ struct iommufd_device *idev,
+ ioasid_t pasid)
+{
+ struct iommufd_attach_handle *handle;
+ int rc;
+
+ if (!iommufd_hwpt_compatible_device(hwpt, idev))
+ return -EINVAL;
+
+ rc = iommufd_hwpt_pasid_compat(hwpt, idev, pasid);
+ if (rc)
+ return rc;
+
+ handle = kzalloc(sizeof(*handle), GFP_KERNEL);
+ if (!handle)
+ return -ENOMEM;
+
+ handle->idev = idev;
+ if (pasid == IOMMU_NO_PASID)
+ rc = iommu_attach_group_handle(hwpt->domain, idev->igroup->group,
+ &handle->handle);
+ else
+ rc = iommu_attach_device_pasid(hwpt->domain, idev->dev, pasid,
+ &handle->handle);
+ if (rc)
+ goto out_free_handle;
+
+ return 0;
+
+out_free_handle:
+ kfree(handle);
+ return rc;
+}
+
+static struct iommufd_attach_handle *
+iommufd_device_get_attach_handle(struct iommufd_device *idev, ioasid_t pasid)
+{
+ struct iommu_attach_handle *handle;
+
+ lockdep_assert_held(&idev->igroup->lock);
+
+ handle =
+ iommu_attach_handle_get(idev->igroup->group, pasid, 0);
+ if (IS_ERR(handle))
+ return NULL;
+ return to_iommufd_handle(handle);
+}
+
+static void iommufd_hwpt_detach_device(struct iommufd_hw_pagetable *hwpt,
+ struct iommufd_device *idev,
+ ioasid_t pasid)
+{
+ struct iommufd_attach_handle *handle;
+
+ handle = iommufd_device_get_attach_handle(idev, pasid);
+ if (pasid == IOMMU_NO_PASID)
+ iommu_detach_group_handle(hwpt->domain, idev->igroup->group);
+ else
+ iommu_detach_device_pasid(hwpt->domain, idev->dev, pasid);
+
+ iommufd_auto_response_faults(hwpt, handle);
+ kfree(handle);
+}
+
+static int iommufd_hwpt_replace_device(struct iommufd_device *idev,
+ ioasid_t pasid,
+ struct iommufd_hw_pagetable *hwpt,
+ struct iommufd_hw_pagetable *old)
+{
+ struct iommufd_attach_handle *handle, *old_handle;
+ int rc;
+
+ if (!iommufd_hwpt_compatible_device(hwpt, idev))
+ return -EINVAL;
+
+ rc = iommufd_hwpt_pasid_compat(hwpt, idev, pasid);
+ if (rc)
+ return rc;
+
+ old_handle = iommufd_device_get_attach_handle(idev, pasid);
+
+ handle = kzalloc(sizeof(*handle), GFP_KERNEL);
+ if (!handle)
+ return -ENOMEM;
+
+ handle->idev = idev;
+ if (pasid == IOMMU_NO_PASID)
+ rc = iommu_replace_group_handle(idev->igroup->group,
+ hwpt->domain, &handle->handle);
+ else
+ rc = iommu_replace_device_pasid(hwpt->domain, idev->dev,
+ pasid, &handle->handle);
+ if (rc)
+ goto out_free_handle;
+
+ iommufd_auto_response_faults(hwpt, old_handle);
+ kfree(old_handle);
+
+ return 0;
+
+out_free_handle:
+ kfree(handle);
+ return rc;
+}
+
int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt,
- struct iommufd_device *idev)
+ struct iommufd_device *idev, ioasid_t pasid)
{
+ struct iommufd_hwpt_paging *hwpt_paging = find_hwpt_paging(hwpt);
+ bool attach_resv = hwpt_paging && pasid == IOMMU_NO_PASID;
+ struct iommufd_group *igroup = idev->igroup;
+ struct iommufd_hw_pagetable *old_hwpt;
+ struct iommufd_attach *attach;
int rc;
- mutex_lock(&idev->igroup->lock);
+ mutex_lock(&igroup->lock);
- if (idev->igroup->hwpt != NULL && idev->igroup->hwpt != hwpt) {
- rc = -EINVAL;
+ attach = xa_cmpxchg(&igroup->pasid_attach, pasid, NULL,
+ XA_ZERO_ENTRY, GFP_KERNEL);
+ if (xa_is_err(attach)) {
+ rc = xa_err(attach);
goto err_unlock;
}
- if (hwpt_is_paging(hwpt)) {
- rc = iommufd_hwpt_paging_attach(to_hwpt_paging(hwpt), idev);
+ if (!attach) {
+ attach = kzalloc(sizeof(*attach), GFP_KERNEL);
+ if (!attach) {
+ rc = -ENOMEM;
+ goto err_release_pasid;
+ }
+ xa_init(&attach->device_array);
+ }
+
+ old_hwpt = attach->hwpt;
+
+ rc = xa_insert(&attach->device_array, idev->obj.id, XA_ZERO_ENTRY,
+ GFP_KERNEL);
+ if (rc) {
+ WARN_ON(rc == -EBUSY && !old_hwpt);
+ goto err_free_attach;
+ }
+
+ if (old_hwpt && old_hwpt != hwpt) {
+ rc = -EINVAL;
+ goto err_release_devid;
+ }
+
+ if (attach_resv) {
+ rc = iommufd_device_attach_reserved_iova(idev, hwpt_paging);
if (rc)
- goto err_unlock;
+ goto err_release_devid;
}
/*
@@ -375,52 +604,74 @@ int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt,
* reserved regions are only updated during individual device
* attachment.
*/
- if (list_empty(&idev->igroup->device_list)) {
- rc = iommu_attach_group(hwpt->domain, idev->igroup->group);
+ if (iommufd_group_first_attach(igroup, pasid)) {
+ rc = iommufd_hwpt_attach_device(hwpt, idev, pasid);
if (rc)
goto err_unresv;
- idev->igroup->hwpt = hwpt;
+ attach->hwpt = hwpt;
+ WARN_ON(xa_is_err(xa_store(&igroup->pasid_attach, pasid, attach,
+ GFP_KERNEL)));
}
refcount_inc(&hwpt->obj.users);
- list_add_tail(&idev->group_item, &idev->igroup->device_list);
- mutex_unlock(&idev->igroup->lock);
+ WARN_ON(xa_is_err(xa_store(&attach->device_array, idev->obj.id,
+ idev, GFP_KERNEL)));
+ mutex_unlock(&igroup->lock);
return 0;
err_unresv:
- if (hwpt_is_paging(hwpt))
- iopt_remove_reserved_iova(&to_hwpt_paging(hwpt)->ioas->iopt,
- idev->dev);
+ if (attach_resv)
+ iopt_remove_reserved_iova(&hwpt_paging->ioas->iopt, idev->dev);
+err_release_devid:
+ xa_release(&attach->device_array, idev->obj.id);
+err_free_attach:
+ if (iommufd_group_first_attach(igroup, pasid))
+ kfree(attach);
+err_release_pasid:
+ if (iommufd_group_first_attach(igroup, pasid))
+ xa_release(&igroup->pasid_attach, pasid);
err_unlock:
- mutex_unlock(&idev->igroup->lock);
+ mutex_unlock(&igroup->lock);
return rc;
}
struct iommufd_hw_pagetable *
-iommufd_hw_pagetable_detach(struct iommufd_device *idev)
+iommufd_hw_pagetable_detach(struct iommufd_device *idev, ioasid_t pasid)
{
- struct iommufd_hw_pagetable *hwpt = idev->igroup->hwpt;
+ struct iommufd_group *igroup = idev->igroup;
+ struct iommufd_hwpt_paging *hwpt_paging;
+ struct iommufd_hw_pagetable *hwpt;
+ struct iommufd_attach *attach;
- mutex_lock(&idev->igroup->lock);
- list_del(&idev->group_item);
- if (list_empty(&idev->igroup->device_list)) {
- iommu_detach_group(hwpt->domain, idev->igroup->group);
- idev->igroup->hwpt = NULL;
+ mutex_lock(&igroup->lock);
+ attach = xa_load(&igroup->pasid_attach, pasid);
+ if (!attach) {
+ mutex_unlock(&igroup->lock);
+ return NULL;
}
- if (hwpt_is_paging(hwpt))
- iopt_remove_reserved_iova(&to_hwpt_paging(hwpt)->ioas->iopt,
- idev->dev);
- mutex_unlock(&idev->igroup->lock);
+
+ hwpt = attach->hwpt;
+ hwpt_paging = find_hwpt_paging(hwpt);
+
+ xa_erase(&attach->device_array, idev->obj.id);
+ if (xa_empty(&attach->device_array)) {
+ iommufd_hwpt_detach_device(hwpt, idev, pasid);
+ xa_erase(&igroup->pasid_attach, pasid);
+ kfree(attach);
+ }
+ if (hwpt_paging && pasid == IOMMU_NO_PASID)
+ iopt_remove_reserved_iova(&hwpt_paging->ioas->iopt, idev->dev);
+ mutex_unlock(&igroup->lock);
/* Caller must destroy hwpt */
return hwpt;
}
static struct iommufd_hw_pagetable *
-iommufd_device_do_attach(struct iommufd_device *idev,
+iommufd_device_do_attach(struct iommufd_device *idev, ioasid_t pasid,
struct iommufd_hw_pagetable *hwpt)
{
int rc;
- rc = iommufd_hw_pagetable_attach(hwpt, idev);
+ rc = iommufd_hw_pagetable_attach(hwpt, idev, pasid);
if (rc)
return ERR_PTR(rc);
return NULL;
@@ -430,27 +681,33 @@ static void
iommufd_group_remove_reserved_iova(struct iommufd_group *igroup,
struct iommufd_hwpt_paging *hwpt_paging)
{
+ struct iommufd_attach *attach;
struct iommufd_device *cur;
+ unsigned long index;
lockdep_assert_held(&igroup->lock);
- list_for_each_entry(cur, &igroup->device_list, group_item)
+ attach = xa_load(&igroup->pasid_attach, IOMMU_NO_PASID);
+ xa_for_each(&attach->device_array, index, cur)
iopt_remove_reserved_iova(&hwpt_paging->ioas->iopt, cur->dev);
}
static int
-iommufd_group_do_replace_paging(struct iommufd_group *igroup,
- struct iommufd_hwpt_paging *hwpt_paging)
+iommufd_group_do_replace_reserved_iova(struct iommufd_group *igroup,
+ struct iommufd_hwpt_paging *hwpt_paging)
{
- struct iommufd_hw_pagetable *old_hwpt = igroup->hwpt;
+ struct iommufd_hwpt_paging *old_hwpt_paging;
+ struct iommufd_attach *attach;
struct iommufd_device *cur;
+ unsigned long index;
int rc;
lockdep_assert_held(&igroup->lock);
- if (!hwpt_is_paging(old_hwpt) ||
- hwpt_paging->ioas != to_hwpt_paging(old_hwpt)->ioas) {
- list_for_each_entry(cur, &igroup->device_list, group_item) {
+ attach = xa_load(&igroup->pasid_attach, IOMMU_NO_PASID);
+ old_hwpt_paging = find_hwpt_paging(attach->hwpt);
+ if (!old_hwpt_paging || hwpt_paging->ioas != old_hwpt_paging->ioas) {
+ xa_for_each(&attach->device_array, index, cur) {
rc = iopt_table_enforce_dev_resv_regions(
&hwpt_paging->ioas->iopt, cur->dev, NULL);
if (rc)
@@ -469,70 +726,81 @@ err_unresv:
}
static struct iommufd_hw_pagetable *
-iommufd_device_do_replace(struct iommufd_device *idev,
+iommufd_device_do_replace(struct iommufd_device *idev, ioasid_t pasid,
struct iommufd_hw_pagetable *hwpt)
{
+ struct iommufd_hwpt_paging *hwpt_paging = find_hwpt_paging(hwpt);
+ bool attach_resv = hwpt_paging && pasid == IOMMU_NO_PASID;
+ struct iommufd_hwpt_paging *old_hwpt_paging;
struct iommufd_group *igroup = idev->igroup;
struct iommufd_hw_pagetable *old_hwpt;
+ struct iommufd_attach *attach;
unsigned int num_devices;
int rc;
- mutex_lock(&idev->igroup->lock);
+ mutex_lock(&igroup->lock);
- if (igroup->hwpt == NULL) {
+ attach = xa_load(&igroup->pasid_attach, pasid);
+ if (!attach) {
rc = -EINVAL;
goto err_unlock;
}
- if (hwpt == igroup->hwpt) {
- mutex_unlock(&idev->igroup->lock);
+ old_hwpt = attach->hwpt;
+
+ WARN_ON(!old_hwpt || xa_empty(&attach->device_array));
+
+ if (!iommufd_device_is_attached(idev, pasid)) {
+ rc = -EINVAL;
+ goto err_unlock;
+ }
+
+ if (hwpt == old_hwpt) {
+ mutex_unlock(&igroup->lock);
return NULL;
}
- old_hwpt = igroup->hwpt;
- if (hwpt_is_paging(hwpt)) {
- rc = iommufd_group_do_replace_paging(igroup,
- to_hwpt_paging(hwpt));
+ if (attach_resv) {
+ rc = iommufd_group_do_replace_reserved_iova(igroup, hwpt_paging);
if (rc)
goto err_unlock;
}
- rc = iommu_group_replace_domain(igroup->group, hwpt->domain);
+ rc = iommufd_hwpt_replace_device(idev, pasid, hwpt, old_hwpt);
if (rc)
goto err_unresv;
- if (hwpt_is_paging(old_hwpt) &&
- (!hwpt_is_paging(hwpt) ||
- to_hwpt_paging(hwpt)->ioas != to_hwpt_paging(old_hwpt)->ioas))
- iommufd_group_remove_reserved_iova(igroup,
- to_hwpt_paging(old_hwpt));
+ old_hwpt_paging = find_hwpt_paging(old_hwpt);
+ if (old_hwpt_paging && pasid == IOMMU_NO_PASID &&
+ (!hwpt_paging || hwpt_paging->ioas != old_hwpt_paging->ioas))
+ iommufd_group_remove_reserved_iova(igroup, old_hwpt_paging);
- igroup->hwpt = hwpt;
+ attach->hwpt = hwpt;
- num_devices = list_count_nodes(&igroup->device_list);
+ num_devices = iommufd_group_device_num(igroup, pasid);
/*
- * Move the refcounts held by the device_list to the new hwpt. Retain a
+ * Move the refcounts held by the device_array to the new hwpt. Retain a
* refcount for this thread as the caller will free it.
*/
refcount_add(num_devices, &hwpt->obj.users);
if (num_devices > 1)
WARN_ON(refcount_sub_and_test(num_devices - 1,
&old_hwpt->obj.users));
- mutex_unlock(&idev->igroup->lock);
+ mutex_unlock(&igroup->lock);
/* Caller must destroy old_hwpt */
return old_hwpt;
err_unresv:
- if (hwpt_is_paging(hwpt))
- iommufd_group_remove_reserved_iova(igroup,
- to_hwpt_paging(old_hwpt));
+ if (attach_resv)
+ iommufd_group_remove_reserved_iova(igroup, hwpt_paging);
err_unlock:
- mutex_unlock(&idev->igroup->lock);
+ mutex_unlock(&igroup->lock);
return ERR_PTR(rc);
}
typedef struct iommufd_hw_pagetable *(*attach_fn)(
- struct iommufd_device *idev, struct iommufd_hw_pagetable *hwpt);
+ struct iommufd_device *idev, ioasid_t pasid,
+ struct iommufd_hw_pagetable *hwpt);
/*
* When automatically managing the domains we search for a compatible domain in
@@ -540,7 +808,7 @@ typedef struct iommufd_hw_pagetable *(*attach_fn)(
* Automatic domain selection will never pick a manually created domain.
*/
static struct iommufd_hw_pagetable *
-iommufd_device_auto_get_domain(struct iommufd_device *idev,
+iommufd_device_auto_get_domain(struct iommufd_device *idev, ioasid_t pasid,
struct iommufd_ioas *ioas, u32 *pt_id,
attach_fn do_attach)
{
@@ -569,7 +837,7 @@ iommufd_device_auto_get_domain(struct iommufd_device *idev,
hwpt = &hwpt_paging->common;
if (!iommufd_lock_obj(&hwpt->obj))
continue;
- destroy_hwpt = (*do_attach)(idev, hwpt);
+ destroy_hwpt = (*do_attach)(idev, pasid, hwpt);
if (IS_ERR(destroy_hwpt)) {
iommufd_put_object(idev->ictx, &hwpt->obj);
/*
@@ -587,8 +855,8 @@ iommufd_device_auto_get_domain(struct iommufd_device *idev,
goto out_unlock;
}
- hwpt_paging = iommufd_hwpt_paging_alloc(idev->ictx, ioas, idev, 0,
- immediate_attach, NULL);
+ hwpt_paging = iommufd_hwpt_paging_alloc(idev->ictx, ioas, idev, pasid,
+ 0, immediate_attach, NULL);
if (IS_ERR(hwpt_paging)) {
destroy_hwpt = ERR_CAST(hwpt_paging);
goto out_unlock;
@@ -596,7 +864,7 @@ iommufd_device_auto_get_domain(struct iommufd_device *idev,
hwpt = &hwpt_paging->common;
if (!immediate_attach) {
- destroy_hwpt = (*do_attach)(idev, hwpt);
+ destroy_hwpt = (*do_attach)(idev, pasid, hwpt);
if (IS_ERR(destroy_hwpt))
goto out_abort;
} else {
@@ -617,8 +885,9 @@ out_unlock:
return destroy_hwpt;
}
-static int iommufd_device_change_pt(struct iommufd_device *idev, u32 *pt_id,
- attach_fn do_attach)
+static int iommufd_device_change_pt(struct iommufd_device *idev,
+ ioasid_t pasid,
+ u32 *pt_id, attach_fn do_attach)
{
struct iommufd_hw_pagetable *destroy_hwpt;
struct iommufd_object *pt_obj;
@@ -633,7 +902,7 @@ static int iommufd_device_change_pt(struct iommufd_device *idev, u32 *pt_id,
struct iommufd_hw_pagetable *hwpt =
container_of(pt_obj, struct iommufd_hw_pagetable, obj);
- destroy_hwpt = (*do_attach)(idev, hwpt);
+ destroy_hwpt = (*do_attach)(idev, pasid, hwpt);
if (IS_ERR(destroy_hwpt))
goto out_put_pt_obj;
break;
@@ -642,8 +911,8 @@ static int iommufd_device_change_pt(struct iommufd_device *idev, u32 *pt_id,
struct iommufd_ioas *ioas =
container_of(pt_obj, struct iommufd_ioas, obj);
- destroy_hwpt = iommufd_device_auto_get_domain(idev, ioas, pt_id,
- do_attach);
+ destroy_hwpt = iommufd_device_auto_get_domain(idev, pasid, ioas,
+ pt_id, do_attach);
if (IS_ERR(destroy_hwpt))
goto out_put_pt_obj;
break;
@@ -665,22 +934,26 @@ out_put_pt_obj:
}
/**
- * iommufd_device_attach - Connect a device to an iommu_domain
+ * iommufd_device_attach - Connect a device/pasid to an iommu_domain
* @idev: device to attach
+ * @pasid: pasid to attach
* @pt_id: Input a IOMMUFD_OBJ_IOAS, or IOMMUFD_OBJ_HWPT_PAGING
* Output the IOMMUFD_OBJ_HWPT_PAGING ID
*
- * This connects the device to an iommu_domain, either automatically or manually
- * selected. Once this completes the device could do DMA.
+ * This connects the device/pasid to an iommu_domain, either automatically
+ * or manually selected. Once this completes the device could do DMA with
+ * @pasid. @pasid is IOMMU_NO_PASID if this attach is for no pasid usage.
*
* The caller should return the resulting pt_id back to userspace.
* This function is undone by calling iommufd_device_detach().
*/
-int iommufd_device_attach(struct iommufd_device *idev, u32 *pt_id)
+int iommufd_device_attach(struct iommufd_device *idev, ioasid_t pasid,
+ u32 *pt_id)
{
int rc;
- rc = iommufd_device_change_pt(idev, pt_id, &iommufd_device_do_attach);
+ rc = iommufd_device_change_pt(idev, pasid, pt_id,
+ &iommufd_device_do_attach);
if (rc)
return rc;
@@ -691,11 +964,12 @@ int iommufd_device_attach(struct iommufd_device *idev, u32 *pt_id)
refcount_inc(&idev->obj.users);
return 0;
}
-EXPORT_SYMBOL_NS_GPL(iommufd_device_attach, IOMMUFD);
+EXPORT_SYMBOL_NS_GPL(iommufd_device_attach, "IOMMUFD");
/**
- * iommufd_device_replace - Change the device's iommu_domain
+ * iommufd_device_replace - Change the device/pasid's iommu_domain
* @idev: device to change
+ * @pasid: pasid to change
* @pt_id: Input a IOMMUFD_OBJ_IOAS, or IOMMUFD_OBJ_HWPT_PAGING
* Output the IOMMUFD_OBJ_HWPT_PAGING ID
*
@@ -706,31 +980,37 @@ EXPORT_SYMBOL_NS_GPL(iommufd_device_attach, IOMMUFD);
*
* If it fails then no change is made to the attachment. The iommu driver may
* implement this so there is no disruption in translation. This can only be
- * called if iommufd_device_attach() has already succeeded.
+ * called if iommufd_device_attach() has already succeeded. @pasid is
+ * IOMMU_NO_PASID for no pasid usage.
*/
-int iommufd_device_replace(struct iommufd_device *idev, u32 *pt_id)
+int iommufd_device_replace(struct iommufd_device *idev, ioasid_t pasid,
+ u32 *pt_id)
{
- return iommufd_device_change_pt(idev, pt_id,
+ return iommufd_device_change_pt(idev, pasid, pt_id,
&iommufd_device_do_replace);
}
-EXPORT_SYMBOL_NS_GPL(iommufd_device_replace, IOMMUFD);
+EXPORT_SYMBOL_NS_GPL(iommufd_device_replace, "IOMMUFD");
/**
- * iommufd_device_detach - Disconnect a device to an iommu_domain
+ * iommufd_device_detach - Disconnect a device/device to an iommu_domain
* @idev: device to detach
+ * @pasid: pasid to detach
*
* Undo iommufd_device_attach(). This disconnects the idev from the previously
* attached pt_id. The device returns back to a blocked DMA translation.
+ * @pasid is IOMMU_NO_PASID for no pasid usage.
*/
-void iommufd_device_detach(struct iommufd_device *idev)
+void iommufd_device_detach(struct iommufd_device *idev, ioasid_t pasid)
{
struct iommufd_hw_pagetable *hwpt;
- hwpt = iommufd_hw_pagetable_detach(idev);
+ hwpt = iommufd_hw_pagetable_detach(idev, pasid);
+ if (!hwpt)
+ return;
iommufd_hw_pagetable_put(idev->ictx, hwpt);
refcount_dec(&idev->obj.users);
}
-EXPORT_SYMBOL_NS_GPL(iommufd_device_detach, IOMMUFD);
+EXPORT_SYMBOL_NS_GPL(iommufd_device_detach, "IOMMUFD");
/*
* On success, it will refcount_inc() at a valid new_ioas and refcount_dec() at
@@ -852,7 +1132,7 @@ iommufd_access_create(struct iommufd_ctx *ictx,
mutex_init(&access->ioas_lock);
return access;
}
-EXPORT_SYMBOL_NS_GPL(iommufd_access_create, IOMMUFD);
+EXPORT_SYMBOL_NS_GPL(iommufd_access_create, "IOMMUFD");
/**
* iommufd_access_destroy - Destroy an iommufd_access
@@ -864,7 +1144,7 @@ void iommufd_access_destroy(struct iommufd_access *access)
{
iommufd_object_destroy_user(access->ictx, &access->obj);
}
-EXPORT_SYMBOL_NS_GPL(iommufd_access_destroy, IOMMUFD);
+EXPORT_SYMBOL_NS_GPL(iommufd_access_destroy, "IOMMUFD");
void iommufd_access_detach(struct iommufd_access *access)
{
@@ -876,7 +1156,7 @@ void iommufd_access_detach(struct iommufd_access *access)
WARN_ON(iommufd_access_change_ioas(access, NULL));
mutex_unlock(&access->ioas_lock);
}
-EXPORT_SYMBOL_NS_GPL(iommufd_access_detach, IOMMUFD);
+EXPORT_SYMBOL_NS_GPL(iommufd_access_detach, "IOMMUFD");
int iommufd_access_attach(struct iommufd_access *access, u32 ioas_id)
{
@@ -892,7 +1172,7 @@ int iommufd_access_attach(struct iommufd_access *access, u32 ioas_id)
mutex_unlock(&access->ioas_lock);
return rc;
}
-EXPORT_SYMBOL_NS_GPL(iommufd_access_attach, IOMMUFD);
+EXPORT_SYMBOL_NS_GPL(iommufd_access_attach, "IOMMUFD");
int iommufd_access_replace(struct iommufd_access *access, u32 ioas_id)
{
@@ -907,7 +1187,7 @@ int iommufd_access_replace(struct iommufd_access *access, u32 ioas_id)
mutex_unlock(&access->ioas_lock);
return rc;
}
-EXPORT_SYMBOL_NS_GPL(iommufd_access_replace, IOMMUFD);
+EXPORT_SYMBOL_NS_GPL(iommufd_access_replace, "IOMMUFD");
/**
* iommufd_access_notify_unmap - Notify users of an iopt to stop using it
@@ -990,7 +1270,7 @@ void iommufd_access_unpin_pages(struct iommufd_access *access,
up_read(&iopt->iova_rwsem);
mutex_unlock(&access->ioas_lock);
}
-EXPORT_SYMBOL_NS_GPL(iommufd_access_unpin_pages, IOMMUFD);
+EXPORT_SYMBOL_NS_GPL(iommufd_access_unpin_pages, "IOMMUFD");
static bool iopt_area_contig_is_aligned(struct iopt_area_contig_iter *iter)
{
@@ -1105,7 +1385,7 @@ err_remove:
mutex_unlock(&access->ioas_lock);
return rc;
}
-EXPORT_SYMBOL_NS_GPL(iommufd_access_pin_pages, IOMMUFD);
+EXPORT_SYMBOL_NS_GPL(iommufd_access_pin_pages, "IOMMUFD");
/**
* iommufd_access_rw - Read or write data under the iova
@@ -1126,7 +1406,7 @@ int iommufd_access_rw(struct iommufd_access *access, unsigned long iova,
struct io_pagetable *iopt;
struct iopt_area *area;
unsigned long last_iova;
- int rc;
+ int rc = -EINVAL;
if (!length)
return -EINVAL;
@@ -1169,7 +1449,7 @@ err_out:
mutex_unlock(&access->ioas_lock);
return rc;
}
-EXPORT_SYMBOL_NS_GPL(iommufd_access_rw, IOMMUFD);
+EXPORT_SYMBOL_NS_GPL(iommufd_access_rw, "IOMMUFD");
int iommufd_get_hw_info(struct iommufd_ucmd *ucmd)
{
@@ -1182,7 +1462,8 @@ int iommufd_get_hw_info(struct iommufd_ucmd *ucmd)
void *data;
int rc;
- if (cmd->flags || cmd->__reserved)
+ if (cmd->flags || cmd->__reserved[0] || cmd->__reserved[1] ||
+ cmd->__reserved[2])
return -EOPNOTSUPP;
idev = iommufd_get_device(ucmd, cmd->dev_id);
@@ -1239,6 +1520,36 @@ int iommufd_get_hw_info(struct iommufd_ucmd *ucmd)
if (device_iommu_capable(idev->dev, IOMMU_CAP_DIRTY_TRACKING))
cmd->out_capabilities |= IOMMU_HW_CAP_DIRTY_TRACKING;
+ cmd->out_max_pasid_log2 = 0;
+ /*
+ * Currently, all iommu drivers enable PASID in the probe_device()
+ * op if iommu and device supports it. So the max_pasids stored in
+ * dev->iommu indicates both PASID support and enable status. A
+ * non-zero dev->iommu->max_pasids means PASID is supported and
+ * enabled. The iommufd only reports PASID capability to userspace
+ * if it's enabled.
+ */
+ if (idev->dev->iommu->max_pasids) {
+ cmd->out_max_pasid_log2 = ilog2(idev->dev->iommu->max_pasids);
+
+ if (dev_is_pci(idev->dev)) {
+ struct pci_dev *pdev = to_pci_dev(idev->dev);
+ int ctrl;
+
+ ctrl = pci_pasid_status(pdev);
+
+ WARN_ON_ONCE(ctrl < 0 ||
+ !(ctrl & PCI_PASID_CTRL_ENABLE));
+
+ if (ctrl & PCI_PASID_CTRL_EXEC)
+ cmd->out_capabilities |=
+ IOMMU_HW_CAP_PCI_PASID_EXEC;
+ if (ctrl & PCI_PASID_CTRL_PRIV)
+ cmd->out_capabilities |=
+ IOMMU_HW_CAP_PCI_PASID_PRIV;
+ }
+ }
+
rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
out_free:
kfree(data);