summaryrefslogtreecommitdiff
path: root/drivers/iommu/iommu.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/iommu/iommu.c')
-rw-r--r--drivers/iommu/iommu.c1016
1 files changed, 688 insertions, 328 deletions
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index a95a483def2d..a4b606c591da 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -18,6 +18,7 @@
#include <linux/errno.h>
#include <linux/host1x_context_bus.h>
#include <linux/iommu.h>
+#include <linux/iommufd.h>
#include <linux/idr.h>
#include <linux/err.h>
#include <linux/pci.h>
@@ -32,6 +33,7 @@
#include <trace/events/iommu.h>
#include <linux/sched/mm.h>
#include <linux/msi.h>
+#include <uapi/linux/iommufd.h>
#include "dma-iommu.h"
#include "iommu-priv.h"
@@ -44,6 +46,9 @@ static unsigned int iommu_def_domain_type __read_mostly;
static bool iommu_dma_strict __read_mostly = IS_ENABLED(CONFIG_IOMMU_DEFAULT_DMA_STRICT);
static u32 iommu_cmd_line __read_mostly;
+/* Tags used with xa_tag_pointer() in group->pasid_array */
+enum { IOMMU_PASID_ARRAY_DOMAIN = 0, IOMMU_PASID_ARRAY_HANDLE = 1 };
+
struct iommu_group {
struct kobject kobj;
struct kobject *devices_kobj;
@@ -90,15 +95,17 @@ static const char * const iommu_group_resv_type_string[] = {
#define IOMMU_CMD_LINE_DMA_API BIT(0)
#define IOMMU_CMD_LINE_STRICT BIT(1)
+static int bus_iommu_probe(const struct bus_type *bus);
static int iommu_bus_notifier(struct notifier_block *nb,
unsigned long action, void *data);
static void iommu_release_device(struct device *dev);
-static struct iommu_domain *
-__iommu_group_domain_alloc(struct iommu_group *group, unsigned int type);
static int __iommu_attach_device(struct iommu_domain *domain,
struct device *dev);
static int __iommu_attach_group(struct iommu_domain *domain,
struct iommu_group *group);
+static struct iommu_domain *__iommu_paging_domain_alloc_flags(struct device *dev,
+ unsigned int type,
+ unsigned int flags);
enum {
IOMMU_SET_DOMAIN_MUST_SUCCEED = 1 << 0,
@@ -133,6 +140,8 @@ static struct group_device *iommu_group_alloc_device(struct iommu_group *group,
struct device *dev);
static void __iommu_group_free_device(struct iommu_group *group,
struct group_device *grp_dev);
+static void iommu_domain_init(struct iommu_domain *domain, unsigned int type,
+ const struct iommu_ops *ops);
#define IOMMU_GROUP_ATTR(_name, _mode, _show, _store) \
struct iommu_group_attribute iommu_group_attr_##_name = \
@@ -268,6 +277,8 @@ int iommu_device_register(struct iommu_device *iommu,
err = bus_iommu_probe(iommu_buses[i]);
if (err)
iommu_device_unregister(iommu);
+ else
+ WRITE_ONCE(iommu->ready, true);
return err;
}
EXPORT_SYMBOL_GPL(iommu_device_register);
@@ -347,7 +358,7 @@ static struct dev_iommu *dev_iommu_get(struct device *dev)
return param;
}
-static void dev_iommu_free(struct device *dev)
+void dev_iommu_free(struct device *dev)
{
struct dev_iommu *param = dev->iommu;
@@ -399,14 +410,42 @@ EXPORT_SYMBOL_GPL(dev_iommu_priv_set);
* Init the dev->iommu and dev->iommu_group in the struct device and get the
* driver probed
*/
-static int iommu_init_device(struct device *dev, const struct iommu_ops *ops)
+static int iommu_init_device(struct device *dev)
{
+ const struct iommu_ops *ops;
struct iommu_device *iommu_dev;
struct iommu_group *group;
int ret;
if (!dev_iommu_get(dev))
return -ENOMEM;
+ /*
+ * For FDT-based systems and ACPI IORT/VIOT, the common firmware parsing
+ * is buried in the bus dma_configure path. Properly unpicking that is
+ * still a big job, so for now just invoke the whole thing. The device
+ * already having a driver bound means dma_configure has already run and
+ * found no IOMMU to wait for, so there's no point calling it again.
+ */
+ if (!dev->iommu->fwspec && !dev->driver && dev->bus->dma_configure) {
+ mutex_unlock(&iommu_probe_device_lock);
+ dev->bus->dma_configure(dev);
+ mutex_lock(&iommu_probe_device_lock);
+ /* If another instance finished the job for us, skip it */
+ if (!dev->iommu || dev->iommu_group)
+ return -ENODEV;
+ }
+ /*
+ * At this point, relevant devices either now have a fwspec which will
+ * match ops registered with a non-NULL fwnode, or we can reasonably
+ * assume that only one of Intel, AMD, s390, PAMU or legacy SMMUv2 can
+ * be present, and that any of their registered instances has suitable
+ * ops for probing, and thus cheekily co-opt the same mechanism.
+ */
+ ops = iommu_fwspec_ops(dev->iommu->fwspec);
+ if (!ops) {
+ ret = -ENODEV;
+ goto err_free;
+ }
if (!try_module_get(ops->owner)) {
ret = -EINVAL;
@@ -503,35 +542,27 @@ static void iommu_deinit_device(struct device *dev)
dev->iommu_group = NULL;
module_put(ops->owner);
dev_iommu_free(dev);
+#ifdef CONFIG_IOMMU_DMA
+ dev->dma_iommu = false;
+#endif
+}
+
+static struct iommu_domain *pasid_array_entry_to_domain(void *entry)
+{
+ if (xa_pointer_tag(entry) == IOMMU_PASID_ARRAY_DOMAIN)
+ return xa_untag_pointer(entry);
+ return ((struct iommu_attach_handle *)xa_untag_pointer(entry))->domain;
}
DEFINE_MUTEX(iommu_probe_device_lock);
static int __iommu_probe_device(struct device *dev, struct list_head *group_list)
{
- const struct iommu_ops *ops;
- struct iommu_fwspec *fwspec;
struct iommu_group *group;
struct group_device *gdev;
int ret;
/*
- * For FDT-based systems and ACPI IORT/VIOT, drivers register IOMMU
- * instances with non-NULL fwnodes, and client devices should have been
- * identified with a fwspec by this point. Otherwise, we can currently
- * assume that only one of Intel, AMD, s390, PAMU or legacy SMMUv2 can
- * be present, and that any of their registered instances has suitable
- * ops for probing, and thus cheekily co-opt the same mechanism.
- */
- fwspec = dev_iommu_fwspec_get(dev);
- if (fwspec && fwspec->ops)
- ops = fwspec->ops;
- else
- ops = iommu_ops_from_fwnode(NULL);
-
- if (!ops)
- return -ENODEV;
- /*
* Serialise to avoid races between IOMMU drivers registering in
* parallel and/or the "replay" calls from ACPI/OF code via client
* driver probe. Once the latter have been cleaned up we should
@@ -544,9 +575,15 @@ static int __iommu_probe_device(struct device *dev, struct list_head *group_list
if (dev->iommu_group)
return 0;
- ret = iommu_init_device(dev, ops);
+ ret = iommu_init_device(dev);
if (ret)
return ret;
+ /*
+ * And if we do now see any replay calls, they would indicate someone
+ * misusing the dma_configure path outside bus code.
+ */
+ if (dev->driver)
+ dev_WARN(dev, "late IOMMU probe at driver bind, something fishy here!\n");
group = dev->iommu_group;
gdev = iommu_group_alloc_device(group, dev);
@@ -581,10 +618,11 @@ static int __iommu_probe_device(struct device *dev, struct list_head *group_list
if (list_empty(&group->entry))
list_add_tail(&group->entry, group_list);
}
- mutex_unlock(&group->mutex);
- if (dev_is_pci(dev))
- iommu_dma_set_pci_32bit_workaround(dev);
+ if (group->default_domain)
+ iommu_setup_dma_ops(dev);
+
+ mutex_unlock(&group->mutex);
return 0;
@@ -1146,10 +1184,6 @@ map_end:
}
}
-
- if (!list_empty(&mappings) && iommu_is_dma_domain(domain))
- iommu_flush_iotlb_all(domain);
-
out:
iommu_put_resv_regions(dev, &mappings);
@@ -1591,12 +1625,57 @@ struct iommu_group *fsl_mc_device_group(struct device *dev)
}
EXPORT_SYMBOL_GPL(fsl_mc_device_group);
+static struct iommu_domain *__iommu_alloc_identity_domain(struct device *dev)
+{
+ const struct iommu_ops *ops = dev_iommu_ops(dev);
+ struct iommu_domain *domain;
+
+ if (ops->identity_domain)
+ return ops->identity_domain;
+
+ if (ops->domain_alloc_identity) {
+ domain = ops->domain_alloc_identity(dev);
+ if (IS_ERR(domain))
+ return domain;
+ } else {
+ return ERR_PTR(-EOPNOTSUPP);
+ }
+
+ iommu_domain_init(domain, IOMMU_DOMAIN_IDENTITY, ops);
+ return domain;
+}
+
static struct iommu_domain *
__iommu_group_alloc_default_domain(struct iommu_group *group, int req_type)
{
+ struct device *dev = iommu_group_first_dev(group);
+ struct iommu_domain *dom;
+
if (group->default_domain && group->default_domain->type == req_type)
return group->default_domain;
- return __iommu_group_domain_alloc(group, req_type);
+
+ /*
+ * When allocating the DMA API domain assume that the driver is going to
+ * use PASID and make sure the RID's domain is PASID compatible.
+ */
+ if (req_type & __IOMMU_DOMAIN_PAGING) {
+ dom = __iommu_paging_domain_alloc_flags(dev, req_type,
+ dev->iommu->max_pasids ? IOMMU_HWPT_ALLOC_PASID : 0);
+
+ /*
+ * If driver does not support PASID feature then
+ * try to allocate non-PASID domain
+ */
+ if (PTR_ERR(dom) == -EOPNOTSUPP)
+ dom = __iommu_paging_domain_alloc_flags(dev, req_type, 0);
+
+ return dom;
+ }
+
+ if (req_type == IOMMU_DOMAIN_IDENTITY)
+ return __iommu_alloc_identity_domain(dev);
+
+ return ERR_PTR(-EINVAL);
}
/*
@@ -1713,7 +1792,7 @@ static int iommu_get_def_domain_type(struct iommu_group *group,
group->id);
/*
- * Try to recover, drivers are allowed to force IDENITY or DMA, IDENTITY
+ * Try to recover, drivers are allowed to force IDENTITY or DMA, IDENTITY
* takes precedence.
*/
if (type == IOMMU_DOMAIN_IDENTITY)
@@ -1800,7 +1879,7 @@ static void iommu_group_do_probe_finalize(struct device *dev)
ops->probe_finalize(dev);
}
-int bus_iommu_probe(const struct bus_type *bus)
+static int bus_iommu_probe(const struct bus_type *bus)
{
struct iommu_group *group, *next;
LIST_HEAD(group_list);
@@ -1828,6 +1907,8 @@ int bus_iommu_probe(const struct bus_type *bus)
mutex_unlock(&group->mutex);
return ret;
}
+ for_each_group_device(group, gdev)
+ iommu_setup_dma_ops(gdev->dev);
mutex_unlock(&group->mutex);
/*
@@ -1844,31 +1925,6 @@ int bus_iommu_probe(const struct bus_type *bus)
}
/**
- * iommu_present() - make platform-specific assumptions about an IOMMU
- * @bus: bus to check
- *
- * Do not use this function. You want device_iommu_mapped() instead.
- *
- * Return: true if some IOMMU is present and aware of devices on the given bus;
- * in general it may not be the only IOMMU, and it may not have anything to do
- * with whatever device you are ultimately interested in.
- */
-bool iommu_present(const struct bus_type *bus)
-{
- bool ret = false;
-
- for (int i = 0; i < ARRAY_SIZE(iommu_buses); i++) {
- if (iommu_buses[i] == bus) {
- spin_lock(&iommu_device_lock);
- ret = !list_empty(&iommu_device_list);
- spin_unlock(&iommu_device_lock);
- }
- }
- return ret;
-}
-EXPORT_SYMBOL_GPL(iommu_present);
-
-/**
* device_iommu_capable() - check for a general IOMMU capability
* @dev: device to which the capability would be relevant, if available
* @cap: IOMMU capability
@@ -1930,110 +1986,94 @@ void iommu_set_fault_handler(struct iommu_domain *domain,
iommu_fault_handler_t handler,
void *token)
{
- BUG_ON(!domain);
+ if (WARN_ON(!domain || domain->cookie_type != IOMMU_COOKIE_NONE))
+ return;
+ domain->cookie_type = IOMMU_COOKIE_FAULT_HANDLER;
domain->handler = handler;
domain->handler_token = token;
}
EXPORT_SYMBOL_GPL(iommu_set_fault_handler);
-static struct iommu_domain *__iommu_domain_alloc(const struct iommu_ops *ops,
- struct device *dev,
- unsigned int type)
+static void iommu_domain_init(struct iommu_domain *domain, unsigned int type,
+ const struct iommu_ops *ops)
{
- struct iommu_domain *domain;
- unsigned int alloc_type = type & IOMMU_DOMAIN_ALLOC_FLAGS;
-
- if (alloc_type == IOMMU_DOMAIN_IDENTITY && ops->identity_domain)
- return ops->identity_domain;
- else if (alloc_type == IOMMU_DOMAIN_BLOCKED && ops->blocked_domain)
- return ops->blocked_domain;
- else if (type & __IOMMU_DOMAIN_PAGING && ops->domain_alloc_paging)
- domain = ops->domain_alloc_paging(dev);
- else if (ops->domain_alloc)
- domain = ops->domain_alloc(alloc_type);
- else
- return ERR_PTR(-EOPNOTSUPP);
-
- /*
- * Many domain_alloc ops now return ERR_PTR, make things easier for the
- * driver by accepting ERR_PTR from all domain_alloc ops instead of
- * having two rules.
- */
- if (IS_ERR(domain))
- return domain;
- if (!domain)
- return ERR_PTR(-ENOMEM);
-
domain->type = type;
domain->owner = ops;
+ if (!domain->ops)
+ domain->ops = ops->default_domain_ops;
+
/*
* If not already set, assume all sizes by default; the driver
* may override this later
*/
if (!domain->pgsize_bitmap)
domain->pgsize_bitmap = ops->pgsize_bitmap;
-
- if (!domain->ops)
- domain->ops = ops->default_domain_ops;
-
- if (iommu_is_dma_domain(domain)) {
- int rc;
-
- rc = iommu_get_dma_cookie(domain);
- if (rc) {
- iommu_domain_free(domain);
- return ERR_PTR(rc);
- }
- }
- return domain;
}
static struct iommu_domain *
-__iommu_group_domain_alloc(struct iommu_group *group, unsigned int type)
+__iommu_paging_domain_alloc_flags(struct device *dev, unsigned int type,
+ unsigned int flags)
{
- struct device *dev = iommu_group_first_dev(group);
+ const struct iommu_ops *ops;
+ struct iommu_domain *domain;
- return __iommu_domain_alloc(dev_iommu_ops(dev), dev, type);
-}
+ if (!dev_has_iommu(dev))
+ return ERR_PTR(-ENODEV);
-static int __iommu_domain_alloc_dev(struct device *dev, void *data)
-{
- const struct iommu_ops **ops = data;
+ ops = dev_iommu_ops(dev);
- if (!dev_has_iommu(dev))
- return 0;
+ if (ops->domain_alloc_paging && !flags)
+ domain = ops->domain_alloc_paging(dev);
+ else if (ops->domain_alloc_paging_flags)
+ domain = ops->domain_alloc_paging_flags(dev, flags, NULL);
+#if IS_ENABLED(CONFIG_FSL_PAMU)
+ else if (ops->domain_alloc && !flags)
+ domain = ops->domain_alloc(IOMMU_DOMAIN_UNMANAGED);
+#endif
+ else
+ return ERR_PTR(-EOPNOTSUPP);
- if (WARN_ONCE(*ops && *ops != dev_iommu_ops(dev),
- "Multiple IOMMU drivers present for bus %s, which the public IOMMU API can't fully support yet. You will still need to disable one or more for this to work, sorry!\n",
- dev_bus_name(dev)))
- return -EBUSY;
+ if (IS_ERR(domain))
+ return domain;
+ if (!domain)
+ return ERR_PTR(-ENOMEM);
- *ops = dev_iommu_ops(dev);
- return 0;
+ iommu_domain_init(domain, type, ops);
+ return domain;
}
-struct iommu_domain *iommu_domain_alloc(const struct bus_type *bus)
+/**
+ * iommu_paging_domain_alloc_flags() - Allocate a paging domain
+ * @dev: device for which the domain is allocated
+ * @flags: Bitmap of iommufd_hwpt_alloc_flags
+ *
+ * Allocate a paging domain which will be managed by a kernel driver. Return
+ * allocated domain if successful, or an ERR pointer for failure.
+ */
+struct iommu_domain *iommu_paging_domain_alloc_flags(struct device *dev,
+ unsigned int flags)
{
- const struct iommu_ops *ops = NULL;
- int err = bus_for_each_dev(bus, NULL, &ops, __iommu_domain_alloc_dev);
- struct iommu_domain *domain;
-
- if (err || !ops)
- return NULL;
-
- domain = __iommu_domain_alloc(ops, NULL, IOMMU_DOMAIN_UNMANAGED);
- if (IS_ERR(domain))
- return NULL;
- return domain;
+ return __iommu_paging_domain_alloc_flags(dev,
+ IOMMU_DOMAIN_UNMANAGED, flags);
}
-EXPORT_SYMBOL_GPL(iommu_domain_alloc);
+EXPORT_SYMBOL_GPL(iommu_paging_domain_alloc_flags);
void iommu_domain_free(struct iommu_domain *domain)
{
- if (domain->type == IOMMU_DOMAIN_SVA)
+ switch (domain->cookie_type) {
+ case IOMMU_COOKIE_DMA_IOVA:
+ iommu_put_dma_cookie(domain);
+ break;
+ case IOMMU_COOKIE_DMA_MSI:
+ iommu_put_msi_cookie(domain);
+ break;
+ case IOMMU_COOKIE_SVA:
mmdrop(domain->mm);
- iommu_put_dma_cookie(domain);
+ break;
+ default:
+ break;
+ }
if (domain->ops->free)
domain->ops->free(domain);
}
@@ -2157,6 +2197,30 @@ struct iommu_domain *iommu_get_dma_domain(struct device *dev)
return dev->iommu_group->default_domain;
}
+static void *iommu_make_pasid_array_entry(struct iommu_domain *domain,
+ struct iommu_attach_handle *handle)
+{
+ if (handle) {
+ handle->domain = domain;
+ return xa_tag_pointer(handle, IOMMU_PASID_ARRAY_HANDLE);
+ }
+
+ return xa_tag_pointer(domain, IOMMU_PASID_ARRAY_DOMAIN);
+}
+
+static bool domain_iommu_ops_compatible(const struct iommu_ops *ops,
+ struct iommu_domain *domain)
+{
+ if (domain->owner == ops)
+ return true;
+
+ /* For static domains, owner isn't set. */
+ if (domain == ops->blocked_domain || domain == ops->identity_domain)
+ return true;
+
+ return false;
+}
+
static int __iommu_attach_group(struct iommu_domain *domain,
struct iommu_group *group)
{
@@ -2167,7 +2231,8 @@ static int __iommu_attach_group(struct iommu_domain *domain,
return -EBUSY;
dev = iommu_group_first_dev(group);
- if (!dev_has_iommu(dev) || dev_iommu_ops(dev) != domain->owner)
+ if (!dev_has_iommu(dev) ||
+ !domain_iommu_ops_compatible(dev_iommu_ops(dev), domain))
return -EINVAL;
return __iommu_group_set_domain(group, domain);
@@ -2197,32 +2262,6 @@ int iommu_attach_group(struct iommu_domain *domain, struct iommu_group *group)
}
EXPORT_SYMBOL_GPL(iommu_attach_group);
-/**
- * iommu_group_replace_domain - replace the domain that a group is attached to
- * @new_domain: new IOMMU domain to replace with
- * @group: IOMMU group that will be attached to the new domain
- *
- * This API allows the group to switch domains without being forced to go to
- * the blocking domain in-between.
- *
- * If the currently attached domain is a core domain (e.g. a default_domain),
- * it will act just like the iommu_attach_group().
- */
-int iommu_group_replace_domain(struct iommu_group *group,
- struct iommu_domain *new_domain)
-{
- int ret;
-
- if (!new_domain)
- return -EINVAL;
-
- mutex_lock(&group->mutex);
- ret = __iommu_group_set_domain(group, new_domain);
- mutex_unlock(&group->mutex);
- return ret;
-}
-EXPORT_SYMBOL_NS_GPL(iommu_group_replace_domain, IOMMUFD_INTERNAL);
-
static int __iommu_device_set_domain(struct iommu_group *group,
struct device *dev,
struct iommu_domain *new_domain,
@@ -2374,6 +2413,7 @@ static size_t iommu_pgsize(struct iommu_domain *domain, unsigned long iova,
unsigned int pgsize_idx, pgsize_idx_next;
unsigned long pgsizes;
size_t offset, pgsize, pgsize_next;
+ size_t offset_end;
unsigned long addr_merge = paddr | iova;
/* Page sizes supported by the hardware and small enough for @size */
@@ -2414,7 +2454,8 @@ static size_t iommu_pgsize(struct iommu_domain *domain, unsigned long iova,
* If size is big enough to accommodate the larger page, reduce
* the number of smaller pages.
*/
- if (offset + pgsize_next <= size)
+ if (!check_add_overflow(offset, pgsize_next, &offset_end) &&
+ offset_end <= size)
size = offset;
out_set_count:
@@ -2422,8 +2463,8 @@ out_set_count:
return pgsize;
}
-static int __iommu_map(struct iommu_domain *domain, unsigned long iova,
- phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
+int iommu_map_nosync(struct iommu_domain *domain, unsigned long iova,
+ phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
{
const struct iommu_domain_ops *ops = domain->ops;
unsigned long orig_iova = iova;
@@ -2432,12 +2473,19 @@ static int __iommu_map(struct iommu_domain *domain, unsigned long iova,
phys_addr_t orig_paddr = paddr;
int ret = 0;
+ might_sleep_if(gfpflags_allow_blocking(gfp));
+
if (unlikely(!(domain->type & __IOMMU_DOMAIN_PAGING)))
return -EINVAL;
if (WARN_ON(!ops->map_pages || domain->pgsize_bitmap == 0UL))
return -ENODEV;
+ /* Discourage passing strange GFP flags */
+ if (WARN_ON_ONCE(gfp & (__GFP_COMP | __GFP_DMA | __GFP_DMA32 |
+ __GFP_HIGHMEM)))
+ return -EINVAL;
+
/* find out the minimum page size supported */
min_pagesz = 1 << __ffs(domain->pgsize_bitmap);
@@ -2485,31 +2533,27 @@ static int __iommu_map(struct iommu_domain *domain, unsigned long iova,
return ret;
}
-int iommu_map(struct iommu_domain *domain, unsigned long iova,
- phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
+int iommu_sync_map(struct iommu_domain *domain, unsigned long iova, size_t size)
{
const struct iommu_domain_ops *ops = domain->ops;
- int ret;
-
- might_sleep_if(gfpflags_allow_blocking(gfp));
- /* Discourage passing strange GFP flags */
- if (WARN_ON_ONCE(gfp & (__GFP_COMP | __GFP_DMA | __GFP_DMA32 |
- __GFP_HIGHMEM)))
- return -EINVAL;
+ if (!ops->iotlb_sync_map)
+ return 0;
+ return ops->iotlb_sync_map(domain, iova, size);
+}
- ret = __iommu_map(domain, iova, paddr, size, prot, gfp);
- if (ret == 0 && ops->iotlb_sync_map) {
- ret = ops->iotlb_sync_map(domain, iova, size);
- if (ret)
- goto out_err;
- }
+int iommu_map(struct iommu_domain *domain, unsigned long iova,
+ phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
+{
+ int ret;
- return ret;
+ ret = iommu_map_nosync(domain, iova, paddr, size, prot, gfp);
+ if (ret)
+ return ret;
-out_err:
- /* undo mappings already done */
- iommu_unmap(domain, iova, size);
+ ret = iommu_sync_map(domain, iova, size);
+ if (ret)
+ iommu_unmap(domain, iova, size);
return ret;
}
@@ -2569,6 +2613,20 @@ static size_t __iommu_unmap(struct iommu_domain *domain,
return unmapped;
}
+/**
+ * iommu_unmap() - Remove mappings from a range of IOVA
+ * @domain: Domain to manipulate
+ * @iova: IO virtual address to start
+ * @size: Length of the range starting from @iova
+ *
+ * iommu_unmap() will remove a translation created by iommu_map(). It cannot
+ * subdivide a mapping created by iommu_map(), so it should be called with IOVA
+ * ranges that match what was passed to iommu_map(). The range can aggregate
+ * contiguous iommu_map() calls so long as no individual range is split.
+ *
+ * Returns: Number of bytes of IOVA unmapped. iova + res will be the point
+ * unmapping stopped.
+ */
size_t iommu_unmap(struct iommu_domain *domain,
unsigned long iova, size_t size)
{
@@ -2583,6 +2641,25 @@ size_t iommu_unmap(struct iommu_domain *domain,
}
EXPORT_SYMBOL_GPL(iommu_unmap);
+/**
+ * iommu_unmap_fast() - Remove mappings from a range of IOVA without IOTLB sync
+ * @domain: Domain to manipulate
+ * @iova: IO virtual address to start
+ * @size: Length of the range starting from @iova
+ * @iotlb_gather: range information for a pending IOTLB flush
+ *
+ * iommu_unmap_fast() will remove a translation created by iommu_map().
+ * It can't subdivide a mapping created by iommu_map(), so it should be
+ * called with IOVA ranges that match what was passed to iommu_map(). The
+ * range can aggregate contiguous iommu_map() calls so long as no individual
+ * range is split.
+ *
+ * Basically iommu_unmap_fast() is the same as iommu_unmap() but for callers
+ * which manage the IOTLB flushing externally to perform a batched sync.
+ *
+ * Returns: Number of bytes of IOVA unmapped. iova + res will be the point
+ * unmapping stopped.
+ */
size_t iommu_unmap_fast(struct iommu_domain *domain,
unsigned long iova, size_t size,
struct iommu_iotlb_gather *iotlb_gather)
@@ -2595,26 +2672,17 @@ ssize_t iommu_map_sg(struct iommu_domain *domain, unsigned long iova,
struct scatterlist *sg, unsigned int nents, int prot,
gfp_t gfp)
{
- const struct iommu_domain_ops *ops = domain->ops;
size_t len = 0, mapped = 0;
phys_addr_t start;
unsigned int i = 0;
int ret;
- might_sleep_if(gfpflags_allow_blocking(gfp));
-
- /* Discourage passing strange GFP flags */
- if (WARN_ON_ONCE(gfp & (__GFP_COMP | __GFP_DMA | __GFP_DMA32 |
- __GFP_HIGHMEM)))
- return -EINVAL;
-
while (i <= nents) {
phys_addr_t s_phys = sg_phys(sg);
if (len && s_phys != start + len) {
- ret = __iommu_map(domain, iova + mapped, start,
+ ret = iommu_map_nosync(domain, iova + mapped, start,
len, prot, gfp);
-
if (ret)
goto out_err;
@@ -2637,11 +2705,10 @@ next:
sg = sg_next(sg);
}
- if (ops->iotlb_sync_map) {
- ret = ops->iotlb_sync_map(domain, iova, mapped);
- if (ret)
- goto out_err;
- }
+ ret = iommu_sync_map(domain, iova, mapped);
+ if (ret)
+ goto out_err;
+
return mapped;
out_err:
@@ -2685,7 +2752,8 @@ int report_iommu_fault(struct iommu_domain *domain, struct device *dev,
* if upper layers showed interest and installed a fault handler,
* invoke it.
*/
- if (domain->handler)
+ if (domain->cookie_type == IOMMU_COOKIE_FAULT_HANDLER &&
+ domain->handler)
ret = domain->handler(domain, dev, iova, flags,
domain->handler_token);
@@ -2706,16 +2774,6 @@ static int __init iommu_init(void)
}
core_initcall(iommu_init);
-int iommu_enable_nesting(struct iommu_domain *domain)
-{
- if (domain->type != IOMMU_DOMAIN_UNMANAGED)
- return -EINVAL;
- if (!domain->ops->enable_nesting)
- return -EINVAL;
- return domain->ops->enable_nesting(domain);
-}
-EXPORT_SYMBOL_GPL(iommu_enable_nesting);
-
int iommu_set_pgtable_quirks(struct iommu_domain *domain,
unsigned long quirk)
{
@@ -2804,28 +2862,39 @@ bool iommu_default_passthrough(void)
}
EXPORT_SYMBOL_GPL(iommu_default_passthrough);
-const struct iommu_ops *iommu_ops_from_fwnode(const struct fwnode_handle *fwnode)
+static const struct iommu_device *iommu_from_fwnode(const struct fwnode_handle *fwnode)
{
- const struct iommu_ops *ops = NULL;
- struct iommu_device *iommu;
+ const struct iommu_device *iommu, *ret = NULL;
spin_lock(&iommu_device_lock);
list_for_each_entry(iommu, &iommu_device_list, list)
if (iommu->fwnode == fwnode) {
- ops = iommu->ops;
+ ret = iommu;
break;
}
spin_unlock(&iommu_device_lock);
- return ops;
+ return ret;
+}
+
+const struct iommu_ops *iommu_ops_from_fwnode(const struct fwnode_handle *fwnode)
+{
+ const struct iommu_device *iommu = iommu_from_fwnode(fwnode);
+
+ return iommu ? iommu->ops : NULL;
}
-int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode,
- const struct iommu_ops *ops)
+int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode)
{
+ const struct iommu_device *iommu = iommu_from_fwnode(iommu_fwnode);
struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
+ if (!iommu)
+ return driver_deferred_probe_check_state(dev);
+ if (!dev->iommu && !READ_ONCE(iommu->ready))
+ return -EPROBE_DEFER;
+
if (fwspec)
- return ops == fwspec->ops ? 0 : -EINVAL;
+ return iommu->ops == iommu_fwspec_ops(fwspec) ? 0 : -EINVAL;
if (!dev_iommu_get(dev))
return -ENOMEM;
@@ -2835,9 +2904,8 @@ int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode,
if (!fwspec)
return -ENOMEM;
- of_node_get(to_of_node(iommu_fwnode));
+ fwnode_handle_get(iommu_fwnode);
fwspec->iommu_fwnode = iommu_fwnode;
- fwspec->ops = ops;
dev_iommu_fwspec_set(dev, fwspec);
return 0;
}
@@ -2853,7 +2921,6 @@ void iommu_fwspec_free(struct device *dev)
dev_iommu_fwspec_set(dev, NULL);
}
}
-EXPORT_SYMBOL_GPL(iommu_fwspec_free);
int iommu_fwspec_add_ids(struct device *dev, const u32 *ids, int num_ids)
{
@@ -2881,38 +2948,6 @@ int iommu_fwspec_add_ids(struct device *dev, const u32 *ids, int num_ids)
}
EXPORT_SYMBOL_GPL(iommu_fwspec_add_ids);
-/*
- * Per device IOMMU features.
- */
-int iommu_dev_enable_feature(struct device *dev, enum iommu_dev_features feat)
-{
- if (dev_has_iommu(dev)) {
- const struct iommu_ops *ops = dev_iommu_ops(dev);
-
- if (ops->dev_enable_feat)
- return ops->dev_enable_feat(dev, feat);
- }
-
- return -ENODEV;
-}
-EXPORT_SYMBOL_GPL(iommu_dev_enable_feature);
-
-/*
- * The device drivers should do the necessary cleanups before calling this.
- */
-int iommu_dev_disable_feature(struct device *dev, enum iommu_dev_features feat)
-{
- if (dev_has_iommu(dev)) {
- const struct iommu_ops *ops = dev_iommu_ops(dev);
-
- if (ops->dev_disable_feat)
- return ops->dev_disable_feat(dev, feat);
- }
-
- return -EBUSY;
-}
-EXPORT_SYMBOL_GPL(iommu_dev_disable_feature);
-
/**
* iommu_setup_default_domain - Set the default_domain for the group
* @group: Group to change
@@ -2946,6 +2981,14 @@ static int iommu_setup_default_domain(struct iommu_group *group,
if (group->default_domain == dom)
return 0;
+ if (iommu_is_dma_domain(dom)) {
+ ret = iommu_get_dma_cookie(dom);
+ if (ret) {
+ iommu_domain_free(dom);
+ return ret;
+ }
+ }
+
/*
* IOMMU_RESV_DIRECT and IOMMU_RESV_DIRECT_RELAXABLE regions must be
* mapped before their device is attached, in order to guarantee
@@ -3066,18 +3109,9 @@ static ssize_t iommu_group_store_type(struct iommu_group *group,
if (ret)
goto out_unlock;
- /*
- * Release the mutex here because ops->probe_finalize() call-back of
- * some vendor IOMMU drivers calls arm_iommu_attach_device() which
- * in-turn might call back into IOMMU core code, where it tries to take
- * group->mutex, resulting in a deadlock.
- */
- mutex_unlock(&group->mutex);
-
/* Make sure dma_ops is appropriatley set */
for_each_group_device(group, gdev)
- iommu_group_do_probe_finalize(gdev->dev);
- return count;
+ iommu_setup_dma_ops(gdev->dev);
out_unlock:
mutex_unlock(&group->mutex);
@@ -3102,6 +3136,11 @@ int iommu_device_use_default_domain(struct device *dev)
return 0;
mutex_lock(&group->mutex);
+ /* We may race against bus_iommu_probe() finalising groups here */
+ if (!group->default_domain) {
+ ret = -EPROBE_DEFER;
+ goto unlock_out;
+ }
if (group->owner_cnt) {
if (group->domain != group->default_domain || group->owner ||
!xa_empty(&group->pasid_array)) {
@@ -3142,22 +3181,25 @@ void iommu_device_unuse_default_domain(struct device *dev)
static int __iommu_group_alloc_blocking_domain(struct iommu_group *group)
{
+ struct device *dev = iommu_group_first_dev(group);
+ const struct iommu_ops *ops = dev_iommu_ops(dev);
struct iommu_domain *domain;
if (group->blocking_domain)
return 0;
- domain = __iommu_group_domain_alloc(group, IOMMU_DOMAIN_BLOCKED);
- if (IS_ERR(domain)) {
- /*
- * For drivers that do not yet understand IOMMU_DOMAIN_BLOCKED
- * create an empty domain instead.
- */
- domain = __iommu_group_domain_alloc(group,
- IOMMU_DOMAIN_UNMANAGED);
- if (IS_ERR(domain))
- return PTR_ERR(domain);
+ if (ops->blocked_domain) {
+ group->blocking_domain = ops->blocked_domain;
+ return 0;
}
+
+ /*
+ * For drivers that do not yet understand IOMMU_DOMAIN_BLOCKED create an
+ * empty PAGING domain instead.
+ */
+ domain = iommu_paging_domain_alloc(dev);
+ if (IS_ERR(domain))
+ return PTR_ERR(domain);
group->blocking_domain = domain;
return 0;
}
@@ -3314,30 +3356,64 @@ bool iommu_group_dma_owner_claimed(struct iommu_group *group)
}
EXPORT_SYMBOL_GPL(iommu_group_dma_owner_claimed);
+static void iommu_remove_dev_pasid(struct device *dev, ioasid_t pasid,
+ struct iommu_domain *domain)
+{
+ const struct iommu_ops *ops = dev_iommu_ops(dev);
+ struct iommu_domain *blocked_domain = ops->blocked_domain;
+
+ WARN_ON(blocked_domain->ops->set_dev_pasid(blocked_domain,
+ dev, pasid, domain));
+}
+
static int __iommu_set_group_pasid(struct iommu_domain *domain,
- struct iommu_group *group, ioasid_t pasid)
+ struct iommu_group *group, ioasid_t pasid,
+ struct iommu_domain *old)
{
- struct group_device *device;
- int ret = 0;
+ struct group_device *device, *last_gdev;
+ int ret;
for_each_group_device(group, device) {
- ret = domain->ops->set_dev_pasid(domain, device->dev, pasid);
- if (ret)
- break;
+ if (device->dev->iommu->max_pasids > 0) {
+ ret = domain->ops->set_dev_pasid(domain, device->dev,
+ pasid, old);
+ if (ret)
+ goto err_revert;
+ }
}
+ return 0;
+
+err_revert:
+ last_gdev = device;
+ for_each_group_device(group, device) {
+ if (device == last_gdev)
+ break;
+ if (device->dev->iommu->max_pasids > 0) {
+ /*
+ * If no old domain, undo the succeeded devices/pasid.
+ * Otherwise, rollback the succeeded devices/pasid to
+ * the old domain. And it is a driver bug to fail
+ * attaching with a previously good domain.
+ */
+ if (!old ||
+ WARN_ON(old->ops->set_dev_pasid(old, device->dev,
+ pasid, domain)))
+ iommu_remove_dev_pasid(device->dev, pasid, domain);
+ }
+ }
return ret;
}
static void __iommu_remove_group_pasid(struct iommu_group *group,
- ioasid_t pasid)
+ ioasid_t pasid,
+ struct iommu_domain *domain)
{
struct group_device *device;
- const struct iommu_ops *ops;
for_each_group_device(group, device) {
- ops = dev_iommu_ops(device->dev);
- ops->remove_dev_pasid(device->dev, pasid);
+ if (device->dev->iommu->max_pasids > 0)
+ iommu_remove_dev_pasid(device->dev, pasid, domain);
}
}
@@ -3346,109 +3422,195 @@ static void __iommu_remove_group_pasid(struct iommu_group *group,
* @domain: the iommu domain.
* @dev: the attached device.
* @pasid: the pasid of the device.
+ * @handle: the attach handle.
+ *
+ * Caller should always provide a new handle to avoid race with the paths
+ * that have lockless reference to handle if it intends to pass a valid handle.
*
* Return: 0 on success, or an error.
*/
int iommu_attach_device_pasid(struct iommu_domain *domain,
- struct device *dev, ioasid_t pasid)
+ struct device *dev, ioasid_t pasid,
+ struct iommu_attach_handle *handle)
{
/* Caller must be a probed driver on dev */
struct iommu_group *group = dev->iommu_group;
struct group_device *device;
- void *curr;
+ const struct iommu_ops *ops;
+ void *entry;
int ret;
- if (!domain->ops->set_dev_pasid)
- return -EOPNOTSUPP;
-
if (!group)
return -ENODEV;
- if (!dev_has_iommu(dev) || dev_iommu_ops(dev) != domain->owner ||
+ ops = dev_iommu_ops(dev);
+
+ if (!domain->ops->set_dev_pasid ||
+ !ops->blocked_domain ||
+ !ops->blocked_domain->ops->set_dev_pasid)
+ return -EOPNOTSUPP;
+
+ if (!domain_iommu_ops_compatible(ops, domain) ||
pasid == IOMMU_NO_PASID)
return -EINVAL;
mutex_lock(&group->mutex);
for_each_group_device(group, device) {
- if (pasid >= device->dev->iommu->max_pasids) {
+ /*
+ * Skip PASID validation for devices without PASID support
+ * (max_pasids = 0). These devices cannot issue transactions
+ * with PASID, so they don't affect group's PASID usage.
+ */
+ if ((device->dev->iommu->max_pasids > 0) &&
+ (pasid >= device->dev->iommu->max_pasids)) {
ret = -EINVAL;
goto out_unlock;
}
}
- curr = xa_cmpxchg(&group->pasid_array, pasid, NULL, domain, GFP_KERNEL);
- if (curr) {
- ret = xa_err(curr) ? : -EBUSY;
+ entry = iommu_make_pasid_array_entry(domain, handle);
+
+ /*
+ * Entry present is a failure case. Use xa_insert() instead of
+ * xa_reserve().
+ */
+ ret = xa_insert(&group->pasid_array, pasid, XA_ZERO_ENTRY, GFP_KERNEL);
+ if (ret)
goto out_unlock;
- }
- ret = __iommu_set_group_pasid(domain, group, pasid);
+ ret = __iommu_set_group_pasid(domain, group, pasid, NULL);
if (ret) {
- __iommu_remove_group_pasid(group, pasid);
- xa_erase(&group->pasid_array, pasid);
+ xa_release(&group->pasid_array, pasid);
+ goto out_unlock;
}
+
+ /*
+ * The xa_insert() above reserved the memory, and the group->mutex is
+ * held, this cannot fail. The new domain cannot be visible until the
+ * operation succeeds as we cannot tolerate PRIs becoming concurrently
+ * queued and then failing attach.
+ */
+ WARN_ON(xa_is_err(xa_store(&group->pasid_array,
+ pasid, entry, GFP_KERNEL)));
+
out_unlock:
mutex_unlock(&group->mutex);
return ret;
}
EXPORT_SYMBOL_GPL(iommu_attach_device_pasid);
-/*
- * iommu_detach_device_pasid() - Detach the domain from pasid of device
- * @domain: the iommu domain.
+/**
+ * iommu_replace_device_pasid - Replace the domain that a specific pasid
+ * of the device is attached to
+ * @domain: the new iommu domain
* @dev: the attached device.
* @pasid: the pasid of the device.
+ * @handle: the attach handle.
*
- * The @domain must have been attached to @pasid of the @dev with
- * iommu_attach_device_pasid().
+ * This API allows the pasid to switch domains. The @pasid should have been
+ * attached. Otherwise, this fails. The pasid will keep the old configuration
+ * if replacement failed.
+ *
+ * Caller should always provide a new handle to avoid race with the paths
+ * that have lockless reference to handle if it intends to pass a valid handle.
+ *
+ * Return 0 on success, or an error.
*/
-void iommu_detach_device_pasid(struct iommu_domain *domain, struct device *dev,
- ioasid_t pasid)
+int iommu_replace_device_pasid(struct iommu_domain *domain,
+ struct device *dev, ioasid_t pasid,
+ struct iommu_attach_handle *handle)
{
/* Caller must be a probed driver on dev */
struct iommu_group *group = dev->iommu_group;
+ struct iommu_attach_handle *entry;
+ struct iommu_domain *curr_domain;
+ void *curr;
+ int ret;
+
+ if (!group)
+ return -ENODEV;
+
+ if (!domain->ops->set_dev_pasid)
+ return -EOPNOTSUPP;
+
+ if (!domain_iommu_ops_compatible(dev_iommu_ops(dev), domain) ||
+ pasid == IOMMU_NO_PASID || !handle)
+ return -EINVAL;
mutex_lock(&group->mutex);
- __iommu_remove_group_pasid(group, pasid);
- WARN_ON(xa_erase(&group->pasid_array, pasid) != domain);
+ entry = iommu_make_pasid_array_entry(domain, handle);
+ curr = xa_cmpxchg(&group->pasid_array, pasid, NULL,
+ XA_ZERO_ENTRY, GFP_KERNEL);
+ if (xa_is_err(curr)) {
+ ret = xa_err(curr);
+ goto out_unlock;
+ }
+
+ /*
+ * No domain (with or without handle) attached, hence not
+ * a replace case.
+ */
+ if (!curr) {
+ xa_release(&group->pasid_array, pasid);
+ ret = -EINVAL;
+ goto out_unlock;
+ }
+
+ /*
+ * Reusing handle is problematic as there are paths that refers
+ * the handle without lock. To avoid race, reject the callers that
+ * attempt it.
+ */
+ if (curr == entry) {
+ WARN_ON(1);
+ ret = -EINVAL;
+ goto out_unlock;
+ }
+
+ curr_domain = pasid_array_entry_to_domain(curr);
+ ret = 0;
+
+ if (curr_domain != domain) {
+ ret = __iommu_set_group_pasid(domain, group,
+ pasid, curr_domain);
+ if (ret)
+ goto out_unlock;
+ }
+
+ /*
+ * The above xa_cmpxchg() reserved the memory, and the
+ * group->mutex is held, this cannot fail.
+ */
+ WARN_ON(xa_is_err(xa_store(&group->pasid_array,
+ pasid, entry, GFP_KERNEL)));
+
+out_unlock:
mutex_unlock(&group->mutex);
+ return ret;
}
-EXPORT_SYMBOL_GPL(iommu_detach_device_pasid);
+EXPORT_SYMBOL_NS_GPL(iommu_replace_device_pasid, "IOMMUFD_INTERNAL");
/*
- * iommu_get_domain_for_dev_pasid() - Retrieve domain for @pasid of @dev
- * @dev: the queried device
- * @pasid: the pasid of the device
- * @type: matched domain type, 0 for any match
- *
- * This is a variant of iommu_get_domain_for_dev(). It returns the existing
- * domain attached to pasid of a device. Callers must hold a lock around this
- * function, and both iommu_attach/detach_dev_pasid() whenever a domain of
- * type is being manipulated. This API does not internally resolve races with
- * attach/detach.
+ * iommu_detach_device_pasid() - Detach the domain from pasid of device
+ * @domain: the iommu domain.
+ * @dev: the attached device.
+ * @pasid: the pasid of the device.
*
- * Return: attached domain on success, NULL otherwise.
+ * The @domain must have been attached to @pasid of the @dev with
+ * iommu_attach_device_pasid().
*/
-struct iommu_domain *iommu_get_domain_for_dev_pasid(struct device *dev,
- ioasid_t pasid,
- unsigned int type)
+void iommu_detach_device_pasid(struct iommu_domain *domain, struct device *dev,
+ ioasid_t pasid)
{
/* Caller must be a probed driver on dev */
struct iommu_group *group = dev->iommu_group;
- struct iommu_domain *domain;
-
- if (!group)
- return NULL;
-
- xa_lock(&group->pasid_array);
- domain = xa_load(&group->pasid_array, pasid);
- if (type && domain && domain->type != type)
- domain = ERR_PTR(-EBUSY);
- xa_unlock(&group->pasid_array);
- return domain;
+ mutex_lock(&group->mutex);
+ __iommu_remove_group_pasid(group, pasid, domain);
+ xa_erase(&group->pasid_array, pasid);
+ mutex_unlock(&group->mutex);
}
-EXPORT_SYMBOL_GPL(iommu_get_domain_for_dev_pasid);
+EXPORT_SYMBOL_GPL(iommu_detach_device_pasid);
ioasid_t iommu_alloc_global_pasid(struct device *dev)
{
@@ -3476,3 +3638,201 @@ void iommu_free_global_pasid(ioasid_t pasid)
ida_free(&iommu_global_pasid_ida, pasid);
}
EXPORT_SYMBOL_GPL(iommu_free_global_pasid);
+
+/**
+ * iommu_attach_handle_get - Return the attach handle
+ * @group: the iommu group that domain was attached to
+ * @pasid: the pasid within the group
+ * @type: matched domain type, 0 for any match
+ *
+ * Return handle or ERR_PTR(-ENOENT) on none, ERR_PTR(-EBUSY) on mismatch.
+ *
+ * Return the attach handle to the caller. The life cycle of an iommu attach
+ * handle is from the time when the domain is attached to the time when the
+ * domain is detached. Callers are required to synchronize the call of
+ * iommu_attach_handle_get() with domain attachment and detachment. The attach
+ * handle can only be used during its life cycle.
+ */
+struct iommu_attach_handle *
+iommu_attach_handle_get(struct iommu_group *group, ioasid_t pasid, unsigned int type)
+{
+ struct iommu_attach_handle *handle;
+ void *entry;
+
+ xa_lock(&group->pasid_array);
+ entry = xa_load(&group->pasid_array, pasid);
+ if (!entry || xa_pointer_tag(entry) != IOMMU_PASID_ARRAY_HANDLE) {
+ handle = ERR_PTR(-ENOENT);
+ } else {
+ handle = xa_untag_pointer(entry);
+ if (type && handle->domain->type != type)
+ handle = ERR_PTR(-EBUSY);
+ }
+ xa_unlock(&group->pasid_array);
+
+ return handle;
+}
+EXPORT_SYMBOL_NS_GPL(iommu_attach_handle_get, "IOMMUFD_INTERNAL");
+
+/**
+ * iommu_attach_group_handle - Attach an IOMMU domain to an IOMMU group
+ * @domain: IOMMU domain to attach
+ * @group: IOMMU group that will be attached
+ * @handle: attach handle
+ *
+ * Returns 0 on success and error code on failure.
+ *
+ * This is a variant of iommu_attach_group(). It allows the caller to provide
+ * an attach handle and use it when the domain is attached. This is currently
+ * used by IOMMUFD to deliver the I/O page faults.
+ *
+ * Caller should always provide a new handle to avoid race with the paths
+ * that have lockless reference to handle.
+ */
+int iommu_attach_group_handle(struct iommu_domain *domain,
+ struct iommu_group *group,
+ struct iommu_attach_handle *handle)
+{
+ void *entry;
+ int ret;
+
+ if (!handle)
+ return -EINVAL;
+
+ mutex_lock(&group->mutex);
+ entry = iommu_make_pasid_array_entry(domain, handle);
+ ret = xa_insert(&group->pasid_array,
+ IOMMU_NO_PASID, XA_ZERO_ENTRY, GFP_KERNEL);
+ if (ret)
+ goto out_unlock;
+
+ ret = __iommu_attach_group(domain, group);
+ if (ret) {
+ xa_release(&group->pasid_array, IOMMU_NO_PASID);
+ goto out_unlock;
+ }
+
+ /*
+ * The xa_insert() above reserved the memory, and the group->mutex is
+ * held, this cannot fail. The new domain cannot be visible until the
+ * operation succeeds as we cannot tolerate PRIs becoming concurrently
+ * queued and then failing attach.
+ */
+ WARN_ON(xa_is_err(xa_store(&group->pasid_array,
+ IOMMU_NO_PASID, entry, GFP_KERNEL)));
+
+out_unlock:
+ mutex_unlock(&group->mutex);
+ return ret;
+}
+EXPORT_SYMBOL_NS_GPL(iommu_attach_group_handle, "IOMMUFD_INTERNAL");
+
+/**
+ * iommu_detach_group_handle - Detach an IOMMU domain from an IOMMU group
+ * @domain: IOMMU domain to attach
+ * @group: IOMMU group that will be attached
+ *
+ * Detach the specified IOMMU domain from the specified IOMMU group.
+ * It must be used in conjunction with iommu_attach_group_handle().
+ */
+void iommu_detach_group_handle(struct iommu_domain *domain,
+ struct iommu_group *group)
+{
+ mutex_lock(&group->mutex);
+ __iommu_group_set_core_domain(group);
+ xa_erase(&group->pasid_array, IOMMU_NO_PASID);
+ mutex_unlock(&group->mutex);
+}
+EXPORT_SYMBOL_NS_GPL(iommu_detach_group_handle, "IOMMUFD_INTERNAL");
+
+/**
+ * iommu_replace_group_handle - replace the domain that a group is attached to
+ * @group: IOMMU group that will be attached to the new domain
+ * @new_domain: new IOMMU domain to replace with
+ * @handle: attach handle
+ *
+ * This API allows the group to switch domains without being forced to go to
+ * the blocking domain in-between. It allows the caller to provide an attach
+ * handle for the new domain and use it when the domain is attached.
+ *
+ * If the currently attached domain is a core domain (e.g. a default_domain),
+ * it will act just like the iommu_attach_group_handle().
+ *
+ * Caller should always provide a new handle to avoid race with the paths
+ * that have lockless reference to handle.
+ */
+int iommu_replace_group_handle(struct iommu_group *group,
+ struct iommu_domain *new_domain,
+ struct iommu_attach_handle *handle)
+{
+ void *curr, *entry;
+ int ret;
+
+ if (!new_domain || !handle)
+ return -EINVAL;
+
+ mutex_lock(&group->mutex);
+ entry = iommu_make_pasid_array_entry(new_domain, handle);
+ ret = xa_reserve(&group->pasid_array, IOMMU_NO_PASID, GFP_KERNEL);
+ if (ret)
+ goto err_unlock;
+
+ ret = __iommu_group_set_domain(group, new_domain);
+ if (ret)
+ goto err_release;
+
+ curr = xa_store(&group->pasid_array, IOMMU_NO_PASID, entry, GFP_KERNEL);
+ WARN_ON(xa_is_err(curr));
+
+ mutex_unlock(&group->mutex);
+
+ return 0;
+err_release:
+ xa_release(&group->pasid_array, IOMMU_NO_PASID);
+err_unlock:
+ mutex_unlock(&group->mutex);
+ return ret;
+}
+EXPORT_SYMBOL_NS_GPL(iommu_replace_group_handle, "IOMMUFD_INTERNAL");
+
+#if IS_ENABLED(CONFIG_IRQ_MSI_IOMMU)
+/**
+ * iommu_dma_prepare_msi() - Map the MSI page in the IOMMU domain
+ * @desc: MSI descriptor, will store the MSI page
+ * @msi_addr: MSI target address to be mapped
+ *
+ * The implementation of sw_msi() should take msi_addr and map it to
+ * an IOVA in the domain and call msi_desc_set_iommu_msi_iova() with the
+ * mapping information.
+ *
+ * Return: 0 on success or negative error code if the mapping failed.
+ */
+int iommu_dma_prepare_msi(struct msi_desc *desc, phys_addr_t msi_addr)
+{
+ struct device *dev = msi_desc_to_dev(desc);
+ struct iommu_group *group = dev->iommu_group;
+ int ret = 0;
+
+ if (!group)
+ return 0;
+
+ mutex_lock(&group->mutex);
+ /* An IDENTITY domain must pass through */
+ if (group->domain && group->domain->type != IOMMU_DOMAIN_IDENTITY) {
+ switch (group->domain->cookie_type) {
+ case IOMMU_COOKIE_DMA_MSI:
+ case IOMMU_COOKIE_DMA_IOVA:
+ ret = iommu_dma_sw_msi(group->domain, desc, msi_addr);
+ break;
+ case IOMMU_COOKIE_IOMMUFD:
+ ret = iommufd_sw_msi(group->domain, desc, msi_addr);
+ break;
+ default:
+ ret = -EOPNOTSUPP;
+ break;
+ }
+ }
+ mutex_unlock(&group->mutex);
+ return ret;
+}
+#endif /* CONFIG_IRQ_MSI_IOMMU */