summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--arch/arm/mm/dma-mapping.c11
-rw-r--r--arch/s390/include/asm/msi.h17
-rw-r--r--arch/s390/include/asm/pci_dma.h5
-rw-r--r--arch/s390/pci/pci_dma.c31
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c3
-rw-r--r--drivers/gpu/drm/tegra/drm.c2
-rw-r--r--drivers/gpu/host1x/cdma.c2
-rw-r--r--drivers/infiniband/hw/usnic/usnic_uiom.c8
-rw-r--r--drivers/iommu/amd/iommu.c5
-rw-r--r--drivers/iommu/dma-iommu.c18
-rw-r--r--drivers/iommu/intel/iommu.c38
-rw-r--r--drivers/iommu/intel/iommu.h2
-rw-r--r--drivers/iommu/intel/irq_remapping.c3
-rw-r--r--drivers/iommu/intel/pasid.c2
-rw-r--r--drivers/iommu/iommu.c77
-rw-r--r--drivers/iommu/iommufd/Kconfig2
-rw-r--r--drivers/iommu/iommufd/device.c8
-rw-r--r--drivers/iommu/iommufd/iommufd_private.h2
-rw-r--r--drivers/iommu/iommufd/main.c3
-rw-r--r--drivers/iommu/iommufd/pages.c6
-rw-r--r--drivers/iommu/iommufd/vfio_compat.c107
-rw-r--r--drivers/iommu/s390-iommu.c17
-rw-r--r--drivers/irqchip/irq-gic-v3-its.c4
-rw-r--r--drivers/media/platform/qcom/venus/firmware.c2
-rw-r--r--drivers/net/ipa/ipa_mem.c6
-rw-r--r--drivers/net/wireless/ath/ath10k/snoc.c2
-rw-r--r--drivers/net/wireless/ath/ath11k/ahb.c4
-rw-r--r--drivers/remoteproc/remoteproc_core.c5
-rw-r--r--drivers/vfio/Kconfig2
-rw-r--r--drivers/vfio/container.c7
-rw-r--r--drivers/vfio/group.c7
-rw-r--r--drivers/vfio/iommufd.c19
-rw-r--r--drivers/vfio/vfio.h8
-rw-r--r--drivers/vfio/vfio_iommu_type1.c25
-rw-r--r--drivers/vfio/vfio_main.c7
-rw-r--r--drivers/vhost/vdpa.c2
-rw-r--r--include/linux/iommu.h33
-rw-r--r--include/linux/iommufd.h12
-rw-r--r--include/linux/irqdomain.h29
-rw-r--r--include/linux/msi.h17
-rw-r--r--kernel/irq/irqdomain.c39
-rw-r--r--kernel/irq/msi.c27
-rw-r--r--tools/testing/selftests/iommu/iommufd.c2
43 files changed, 369 insertions, 259 deletions
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index c135f6e37a00..8bc01071474a 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -984,7 +984,8 @@ __iommu_create_mapping(struct device *dev, struct page **pages, size_t size,
len = (j - i) << PAGE_SHIFT;
ret = iommu_map(mapping->domain, iova, phys, len,
- __dma_info_to_prot(DMA_BIDIRECTIONAL, attrs));
+ __dma_info_to_prot(DMA_BIDIRECTIONAL, attrs),
+ GFP_KERNEL);
if (ret < 0)
goto fail;
iova += len;
@@ -1207,7 +1208,8 @@ static int __map_sg_chunk(struct device *dev, struct scatterlist *sg,
prot = __dma_info_to_prot(dir, attrs);
- ret = iommu_map(mapping->domain, iova, phys, len, prot);
+ ret = iommu_map(mapping->domain, iova, phys, len, prot,
+ GFP_KERNEL);
if (ret < 0)
goto fail;
count += len >> PAGE_SHIFT;
@@ -1379,7 +1381,8 @@ static dma_addr_t arm_iommu_map_page(struct device *dev, struct page *page,
prot = __dma_info_to_prot(dir, attrs);
- ret = iommu_map(mapping->domain, dma_addr, page_to_phys(page), len, prot);
+ ret = iommu_map(mapping->domain, dma_addr, page_to_phys(page), len,
+ prot, GFP_KERNEL);
if (ret < 0)
goto fail;
@@ -1443,7 +1446,7 @@ static dma_addr_t arm_iommu_map_resource(struct device *dev,
prot = __dma_info_to_prot(dir, attrs) | IOMMU_MMIO;
- ret = iommu_map(mapping->domain, dma_addr, addr, len, prot);
+ ret = iommu_map(mapping->domain, dma_addr, addr, len, prot, GFP_KERNEL);
if (ret < 0)
goto fail;
diff --git a/arch/s390/include/asm/msi.h b/arch/s390/include/asm/msi.h
new file mode 100644
index 000000000000..399343ed9ffb
--- /dev/null
+++ b/arch/s390/include/asm/msi.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_S390_MSI_H
+#define _ASM_S390_MSI_H
+#include <asm-generic/msi.h>
+
+/*
+ * Work around S390 not using irq_domain at all so we can't set
+ * IRQ_DOMAIN_FLAG_ISOLATED_MSI. See for an explanation how it works:
+ *
+ * https://lore.kernel.org/r/31af8174-35e9-ebeb-b9ef-74c90d4bfd93@linux.ibm.com/
+ *
+ * Note this is less isolated than the ARM/x86 versions as userspace can trigger
+ * MSI belonging to kernel devices within the same gisa.
+ */
+#define arch_is_isolated_msi() true
+
+#endif
diff --git a/arch/s390/include/asm/pci_dma.h b/arch/s390/include/asm/pci_dma.h
index 91e63426bdc5..7119c04c51c5 100644
--- a/arch/s390/include/asm/pci_dma.h
+++ b/arch/s390/include/asm/pci_dma.h
@@ -186,9 +186,10 @@ static inline unsigned long *get_st_pto(unsigned long entry)
/* Prototypes */
void dma_free_seg_table(unsigned long);
-unsigned long *dma_alloc_cpu_table(void);
+unsigned long *dma_alloc_cpu_table(gfp_t gfp);
void dma_cleanup_tables(unsigned long *);
-unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr);
+unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr,
+ gfp_t gfp);
void dma_update_cpu_trans(unsigned long *entry, phys_addr_t page_addr, int flags);
extern const struct dma_map_ops s390_pci_dma_ops;
diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c
index ea478d11fbd1..2d9b01d7ca4c 100644
--- a/arch/s390/pci/pci_dma.c
+++ b/arch/s390/pci/pci_dma.c
@@ -27,11 +27,11 @@ static int zpci_refresh_global(struct zpci_dev *zdev)
zdev->iommu_pages * PAGE_SIZE);
}
-unsigned long *dma_alloc_cpu_table(void)
+unsigned long *dma_alloc_cpu_table(gfp_t gfp)
{
unsigned long *table, *entry;
- table = kmem_cache_alloc(dma_region_table_cache, GFP_ATOMIC);
+ table = kmem_cache_alloc(dma_region_table_cache, gfp);
if (!table)
return NULL;
@@ -45,11 +45,11 @@ static void dma_free_cpu_table(void *table)
kmem_cache_free(dma_region_table_cache, table);
}
-static unsigned long *dma_alloc_page_table(void)
+static unsigned long *dma_alloc_page_table(gfp_t gfp)
{
unsigned long *table, *entry;
- table = kmem_cache_alloc(dma_page_table_cache, GFP_ATOMIC);
+ table = kmem_cache_alloc(dma_page_table_cache, gfp);
if (!table)
return NULL;
@@ -63,7 +63,7 @@ static void dma_free_page_table(void *table)
kmem_cache_free(dma_page_table_cache, table);
}
-static unsigned long *dma_get_seg_table_origin(unsigned long *rtep)
+static unsigned long *dma_get_seg_table_origin(unsigned long *rtep, gfp_t gfp)
{
unsigned long old_rte, rte;
unsigned long *sto;
@@ -72,7 +72,7 @@ static unsigned long *dma_get_seg_table_origin(unsigned long *rtep)
if (reg_entry_isvalid(rte)) {
sto = get_rt_sto(rte);
} else {
- sto = dma_alloc_cpu_table();
+ sto = dma_alloc_cpu_table(gfp);
if (!sto)
return NULL;
@@ -90,7 +90,7 @@ static unsigned long *dma_get_seg_table_origin(unsigned long *rtep)
return sto;
}
-static unsigned long *dma_get_page_table_origin(unsigned long *step)
+static unsigned long *dma_get_page_table_origin(unsigned long *step, gfp_t gfp)
{
unsigned long old_ste, ste;
unsigned long *pto;
@@ -99,7 +99,7 @@ static unsigned long *dma_get_page_table_origin(unsigned long *step)
if (reg_entry_isvalid(ste)) {
pto = get_st_pto(ste);
} else {
- pto = dma_alloc_page_table();
+ pto = dma_alloc_page_table(gfp);
if (!pto)
return NULL;
set_st_pto(&ste, virt_to_phys(pto));
@@ -116,18 +116,19 @@ static unsigned long *dma_get_page_table_origin(unsigned long *step)
return pto;
}
-unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr)
+unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr,
+ gfp_t gfp)
{
unsigned long *sto, *pto;
unsigned int rtx, sx, px;
rtx = calc_rtx(dma_addr);
- sto = dma_get_seg_table_origin(&rto[rtx]);
+ sto = dma_get_seg_table_origin(&rto[rtx], gfp);
if (!sto)
return NULL;
sx = calc_sx(dma_addr);
- pto = dma_get_page_table_origin(&sto[sx]);
+ pto = dma_get_page_table_origin(&sto[sx], gfp);
if (!pto)
return NULL;
@@ -170,7 +171,8 @@ static int __dma_update_trans(struct zpci_dev *zdev, phys_addr_t pa,
return -EINVAL;
for (i = 0; i < nr_pages; i++) {
- entry = dma_walk_cpu_trans(zdev->dma_table, dma_addr);
+ entry = dma_walk_cpu_trans(zdev->dma_table, dma_addr,
+ GFP_ATOMIC);
if (!entry) {
rc = -ENOMEM;
goto undo_cpu_trans;
@@ -186,7 +188,8 @@ undo_cpu_trans:
while (i-- > 0) {
page_addr -= PAGE_SIZE;
dma_addr -= PAGE_SIZE;
- entry = dma_walk_cpu_trans(zdev->dma_table, dma_addr);
+ entry = dma_walk_cpu_trans(zdev->dma_table, dma_addr,
+ GFP_ATOMIC);
if (!entry)
break;
dma_update_cpu_trans(entry, page_addr, flags);
@@ -576,7 +579,7 @@ int zpci_dma_init_device(struct zpci_dev *zdev)
spin_lock_init(&zdev->iommu_bitmap_lock);
- zdev->dma_table = dma_alloc_cpu_table();
+ zdev->dma_table = dma_alloc_cpu_table(GFP_KERNEL);
if (!zdev->dma_table) {
rc = -ENOMEM;
goto out;
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c
index 648ecf5a8fbc..a4ac94a2ab57 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c
@@ -475,7 +475,8 @@ gk20a_instobj_ctor_iommu(struct gk20a_instmem *imem, u32 npages, u32 align,
u32 offset = (r->offset + i) << imem->iommu_pgshift;
ret = iommu_map(imem->domain, offset, node->dma_addrs[i],
- PAGE_SIZE, IOMMU_READ | IOMMU_WRITE);
+ PAGE_SIZE, IOMMU_READ | IOMMU_WRITE,
+ GFP_KERNEL);
if (ret < 0) {
nvkm_error(subdev, "IOMMU mapping failure: %d\n", ret);
diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c
index 7bd2e65c2a16..6ca9f396e55b 100644
--- a/drivers/gpu/drm/tegra/drm.c
+++ b/drivers/gpu/drm/tegra/drm.c
@@ -1057,7 +1057,7 @@ void *tegra_drm_alloc(struct tegra_drm *tegra, size_t size, dma_addr_t *dma)
*dma = iova_dma_addr(&tegra->carveout.domain, alloc);
err = iommu_map(tegra->domain, *dma, virt_to_phys(virt),
- size, IOMMU_READ | IOMMU_WRITE);
+ size, IOMMU_READ | IOMMU_WRITE, GFP_KERNEL);
if (err < 0)
goto free_iova;
diff --git a/drivers/gpu/host1x/cdma.c b/drivers/gpu/host1x/cdma.c
index 103fda055394..4ddfcd2138c9 100644
--- a/drivers/gpu/host1x/cdma.c
+++ b/drivers/gpu/host1x/cdma.c
@@ -105,7 +105,7 @@ static int host1x_pushbuffer_init(struct push_buffer *pb)
pb->dma = iova_dma_addr(&host1x->iova, alloc);
err = iommu_map(host1x->domain, pb->dma, pb->phys, size,
- IOMMU_READ);
+ IOMMU_READ, GFP_KERNEL);
if (err)
goto iommu_free_iova;
} else {
diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.c b/drivers/infiniband/hw/usnic/usnic_uiom.c
index a2857accc427..2a5cac2658ec 100644
--- a/drivers/infiniband/hw/usnic/usnic_uiom.c
+++ b/drivers/infiniband/hw/usnic/usnic_uiom.c
@@ -276,8 +276,8 @@ iter_chunk:
size = pa_end - pa_start + PAGE_SIZE;
usnic_dbg("va 0x%lx pa %pa size 0x%zx flags 0x%x",
va_start, &pa_start, size, flags);
- err = iommu_map_atomic(pd->domain, va_start,
- pa_start, size, flags);
+ err = iommu_map(pd->domain, va_start, pa_start,
+ size, flags, GFP_ATOMIC);
if (err) {
usnic_err("Failed to map va 0x%lx pa %pa size 0x%zx with err %d\n",
va_start, &pa_start, size, err);
@@ -293,8 +293,8 @@ iter_chunk:
size = pa - pa_start + PAGE_SIZE;
usnic_dbg("va 0x%lx pa %pa size 0x%zx flags 0x%x\n",
va_start, &pa_start, size, flags);
- err = iommu_map_atomic(pd->domain, va_start,
- pa_start, size, flags);
+ err = iommu_map(pd->domain, va_start, pa_start,
+ size, flags, GFP_ATOMIC);
if (err) {
usnic_err("Failed to map va 0x%lx pa %pa size 0x%zx with err %d\n",
va_start, &pa_start, size, err);
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index cbeaab55c0db..321d50e9df5b 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -2271,8 +2271,6 @@ static bool amd_iommu_capable(struct device *dev, enum iommu_cap cap)
switch (cap) {
case IOMMU_CAP_CACHE_COHERENCY:
return true;
- case IOMMU_CAP_INTR_REMAP:
- return (irq_remapping_enabled == 1);
case IOMMU_CAP_NOEXEC:
return false;
case IOMMU_CAP_PRE_BOOT_PROTECTION:
@@ -3671,7 +3669,8 @@ int amd_iommu_create_irq_domain(struct amd_iommu *iommu)
}
irq_domain_update_bus_token(iommu->ir_domain, DOMAIN_BUS_AMDVI);
- iommu->ir_domain->flags |= IRQ_DOMAIN_FLAG_MSI_PARENT;
+ iommu->ir_domain->flags |= IRQ_DOMAIN_FLAG_MSI_PARENT |
+ IRQ_DOMAIN_FLAG_ISOLATED_MSI;
if (amd_iommu_np_cache)
iommu->ir_domain->msi_parent_ops = &virt_amdvi_msi_parent_ops;
diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index f798c44e0903..c99e4bc55d8c 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -713,7 +713,7 @@ static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys,
if (!iova)
return DMA_MAPPING_ERROR;
- if (iommu_map_atomic(domain, iova, phys - iova_off, size, prot)) {
+ if (iommu_map(domain, iova, phys - iova_off, size, prot, GFP_ATOMIC)) {
iommu_dma_free_iova(cookie, iova, size, NULL);
return DMA_MAPPING_ERROR;
}
@@ -822,7 +822,14 @@ static struct page **__iommu_dma_alloc_noncontiguous(struct device *dev,
if (!iova)
goto out_free_pages;
- if (sg_alloc_table_from_pages(sgt, pages, count, 0, size, GFP_KERNEL))
+ /*
+ * Remove the zone/policy flags from the GFP - these are applied to the
+ * __iommu_dma_alloc_pages() but are not used for the supporting
+ * internal allocations that follow.
+ */
+ gfp &= ~(__GFP_DMA | __GFP_DMA32 | __GFP_HIGHMEM | __GFP_COMP);
+
+ if (sg_alloc_table_from_pages(sgt, pages, count, 0, size, gfp))
goto out_free_iova;
if (!(ioprot & IOMMU_CACHE)) {
@@ -833,7 +840,8 @@ static struct page **__iommu_dma_alloc_noncontiguous(struct device *dev,
arch_dma_prep_coherent(sg_page(sg), sg->length);
}
- ret = iommu_map_sg_atomic(domain, iova, sgt->sgl, sgt->orig_nents, ioprot);
+ ret = iommu_map_sg(domain, iova, sgt->sgl, sgt->orig_nents, ioprot,
+ gfp);
if (ret < 0 || ret < size)
goto out_free_sg;
@@ -1281,7 +1289,7 @@ static int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg,
* We'll leave any physical concatenation to the IOMMU driver's
* implementation - it knows better than we do.
*/
- ret = iommu_map_sg_atomic(domain, iova, sg, nents, prot);
+ ret = iommu_map_sg(domain, iova, sg, nents, prot, GFP_ATOMIC);
if (ret < 0 || ret < iova_len)
goto out_free_iova;
@@ -1615,7 +1623,7 @@ static struct iommu_dma_msi_page *iommu_dma_get_msi_page(struct device *dev,
if (!iova)
goto out_free_page;
- if (iommu_map(domain, iova, msi_addr, size, prot))
+ if (iommu_map(domain, iova, msi_addr, size, prot, GFP_KERNEL))
goto out_free_iova;
INIT_LIST_HEAD(&msi_page->list);
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 59df7e42fd53..6fd223cf639f 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -362,12 +362,12 @@ static int __init intel_iommu_setup(char *str)
}
__setup("intel_iommu=", intel_iommu_setup);
-void *alloc_pgtable_page(int node)
+void *alloc_pgtable_page(int node, gfp_t gfp)
{
struct page *page;
void *vaddr = NULL;
- page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0);
+ page = alloc_pages_node(node, gfp | __GFP_ZERO, 0);
if (page)
vaddr = page_address(page);
return vaddr;
@@ -612,7 +612,7 @@ struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus,
if (!alloc)
return NULL;
- context = alloc_pgtable_page(iommu->node);
+ context = alloc_pgtable_page(iommu->node, GFP_ATOMIC);
if (!context)
return NULL;
@@ -908,7 +908,8 @@ pgtable_walk:
#endif
static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
- unsigned long pfn, int *target_level)
+ unsigned long pfn, int *target_level,
+ gfp_t gfp)
{
struct dma_pte *parent, *pte;
int level = agaw_to_level(domain->agaw);
@@ -935,7 +936,7 @@ static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
if (!dma_pte_present(pte)) {
uint64_t pteval;
- tmp_page = alloc_pgtable_page(domain->nid);
+ tmp_page = alloc_pgtable_page(domain->nid, gfp);
if (!tmp_page)
return NULL;
@@ -1186,7 +1187,7 @@ static int iommu_alloc_root_entry(struct intel_iommu *iommu)
{
struct root_entry *root;
- root = (struct root_entry *)alloc_pgtable_page(iommu->node);
+ root = (struct root_entry *)alloc_pgtable_page(iommu->node, GFP_ATOMIC);
if (!root) {
pr_err("Allocating root entry for %s failed\n",
iommu->name);
@@ -2150,7 +2151,8 @@ static void switch_to_super_page(struct dmar_domain *domain,
while (start_pfn <= end_pfn) {
if (!pte)
- pte = pfn_to_dma_pte(domain, start_pfn, &level);
+ pte = pfn_to_dma_pte(domain, start_pfn, &level,
+ GFP_ATOMIC);
if (dma_pte_present(pte)) {
dma_pte_free_pagetable(domain, start_pfn,
@@ -2172,7 +2174,8 @@ static void switch_to_super_page(struct dmar_domain *domain,
static int
__domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
- unsigned long phys_pfn, unsigned long nr_pages, int prot)
+ unsigned long phys_pfn, unsigned long nr_pages, int prot,
+ gfp_t gfp)
{
struct dma_pte *first_pte = NULL, *pte = NULL;
unsigned int largepage_lvl = 0;
@@ -2202,7 +2205,8 @@ __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
largepage_lvl = hardware_largepage_caps(domain, iov_pfn,
phys_pfn, nr_pages);
- pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl);
+ pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl,
+ gfp);
if (!pte)
return -ENOMEM;
first_pte = pte;
@@ -2368,7 +2372,7 @@ static int iommu_domain_identity_map(struct dmar_domain *domain,
return __domain_mapping(domain, first_vpfn,
first_vpfn, last_vpfn - first_vpfn + 1,
- DMA_PTE_READ|DMA_PTE_WRITE);
+ DMA_PTE_READ|DMA_PTE_WRITE, GFP_KERNEL);
}
static int md_domain_init(struct dmar_domain *domain, int guest_width);
@@ -2676,7 +2680,7 @@ static int copy_context_table(struct intel_iommu *iommu,
if (!old_ce)
goto out;
- new_ce = alloc_pgtable_page(iommu->node);
+ new_ce = alloc_pgtable_page(iommu->node, GFP_KERNEL);
if (!new_ce)
goto out_unmap;
@@ -4136,7 +4140,7 @@ static int md_domain_init(struct dmar_domain *domain, int guest_width)
domain->max_addr = 0;
/* always allocate the top pgd */
- domain->pgd = alloc_pgtable_page(domain->nid);
+ domain->pgd = alloc_pgtable_page(domain->nid, GFP_ATOMIC);
if (!domain->pgd)
return -ENOMEM;
domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
@@ -4298,7 +4302,7 @@ static int intel_iommu_map(struct iommu_domain *domain,
the low bits of hpa would take us onto the next page */
size = aligned_nrpages(hpa, size);
return __domain_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
- hpa >> VTD_PAGE_SHIFT, size, prot);
+ hpa >> VTD_PAGE_SHIFT, size, prot, gfp);
}
static int intel_iommu_map_pages(struct iommu_domain *domain,
@@ -4333,7 +4337,8 @@ static size_t intel_iommu_unmap(struct iommu_domain *domain,
/* Cope with horrid API which requires us to unmap more than the
size argument if it happens to be a large-page mapping. */
- BUG_ON(!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level));
+ BUG_ON(!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level,
+ GFP_ATOMIC));
if (size < VTD_PAGE_SIZE << level_to_offset_bits(level))
size = VTD_PAGE_SIZE << level_to_offset_bits(level);
@@ -4392,7 +4397,8 @@ static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
int level = 0;
u64 phys = 0;
- pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level);
+ pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level,
+ GFP_ATOMIC);
if (pte && dma_pte_present(pte))
phys = dma_pte_addr(pte) +
(iova & (BIT_MASK(level_to_offset_bits(level) +
@@ -4464,8 +4470,6 @@ static bool intel_iommu_capable(struct device *dev, enum iommu_cap cap)
switch (cap) {
case IOMMU_CAP_CACHE_COHERENCY:
return true;
- case IOMMU_CAP_INTR_REMAP:
- return irq_remapping_enabled == 1;
case IOMMU_CAP_PRE_BOOT_PROTECTION:
return dmar_platform_optin();
case IOMMU_CAP_ENFORCE_CACHE_COHERENCY:
diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h
index 06e61e474856..ca9a035e0110 100644
--- a/drivers/iommu/intel/iommu.h
+++ b/drivers/iommu/intel/iommu.h
@@ -737,7 +737,7 @@ int qi_submit_sync(struct intel_iommu *iommu, struct qi_desc *desc,
extern int dmar_ir_support(void);
-void *alloc_pgtable_page(int node);
+void *alloc_pgtable_page(int node, gfp_t gfp);
void free_pgtable_page(void *vaddr);
void iommu_flush_write_buffer(struct intel_iommu *iommu);
struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn);
diff --git a/drivers/iommu/intel/irq_remapping.c b/drivers/iommu/intel/irq_remapping.c
index f58f5f57af78..6d01fa078c36 100644
--- a/drivers/iommu/intel/irq_remapping.c
+++ b/drivers/iommu/intel/irq_remapping.c
@@ -573,7 +573,8 @@ static int intel_setup_irq_remapping(struct intel_iommu *iommu)
}
irq_domain_update_bus_token(iommu->ir_domain, DOMAIN_BUS_DMAR);
- iommu->ir_domain->flags |= IRQ_DOMAIN_FLAG_MSI_PARENT;
+ iommu->ir_domain->flags |= IRQ_DOMAIN_FLAG_MSI_PARENT |
+ IRQ_DOMAIN_FLAG_ISOLATED_MSI;
if (cap_caching_mode(iommu->cap))
iommu->ir_domain->msi_parent_ops = &virt_dmar_msi_parent_ops;
diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c
index fb3c7020028d..c5bf74e9372d 100644
--- a/drivers/iommu/intel/pasid.c
+++ b/drivers/iommu/intel/pasid.c
@@ -200,7 +200,7 @@ static struct pasid_entry *intel_pasid_get_entry(struct device *dev, u32 pasid)
retry:
entries = get_pasid_table_from_pde(&dir[dir_index]);
if (!entries) {
- entries = alloc_pgtable_page(info->iommu->node);
+ entries = alloc_pgtable_page(info->iommu->node, GFP_ATOMIC);
if (!entries)
return NULL;
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 5f6a85aea501..501e8bcf1aaa 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -30,6 +30,7 @@
#include <linux/cc_platform.h>
#include <trace/events/iommu.h>
#include <linux/sched/mm.h>
+#include <linux/msi.h>
#include "dma-iommu.h"
@@ -930,7 +931,7 @@ map_end:
if (map_size) {
ret = iommu_map(domain, addr - map_size,
addr - map_size, map_size,
- entry->prot);
+ entry->prot, GFP_KERNEL);
if (ret)
goto out;
map_size = 0;
@@ -1898,6 +1899,29 @@ bool device_iommu_capable(struct device *dev, enum iommu_cap cap)
EXPORT_SYMBOL_GPL(device_iommu_capable);
/**
+ * iommu_group_has_isolated_msi() - Compute msi_device_has_isolated_msi()
+ * for a group
+ * @group: Group to query
+ *
+ * IOMMU groups should not have differing values of
+ * msi_device_has_isolated_msi() for devices in a group. However nothing
+ * directly prevents this, so ensure mistakes don't result in isolation failures
+ * by checking that all the devices are the same.
+ */
+bool iommu_group_has_isolated_msi(struct iommu_group *group)
+{
+ struct group_device *group_dev;
+ bool ret = true;
+
+ mutex_lock(&group->mutex);
+ list_for_each_entry(group_dev, &group->devices, list)
+ ret &= msi_device_has_isolated_msi(group_dev->dev);
+ mutex_unlock(&group->mutex);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(iommu_group_has_isolated_msi);
+
+/**
* iommu_set_fault_handler() - set a fault handler for an iommu domain
* @domain: iommu domain
* @handler: fault handler
@@ -2360,34 +2384,27 @@ static int __iommu_map(struct iommu_domain *domain, unsigned long iova,
return ret;
}
-static int _iommu_map(struct iommu_domain *domain, unsigned long iova,
- phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
+int iommu_map(struct iommu_domain *domain, unsigned long iova,
+ phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
{
const struct iommu_domain_ops *ops = domain->ops;
int ret;
+ might_sleep_if(gfpflags_allow_blocking(gfp));
+
+ /* Discourage passing strange GFP flags */
+ if (WARN_ON_ONCE(gfp & (__GFP_COMP | __GFP_DMA | __GFP_DMA32 |
+ __GFP_HIGHMEM)))
+ return -EINVAL;
+
ret = __iommu_map(domain, iova, paddr, size, prot, gfp);
if (ret == 0 && ops->iotlb_sync_map)
ops->iotlb_sync_map(domain, iova, size);
return ret;
}
-
-int iommu_map(struct iommu_domain *domain, unsigned long iova,
- phys_addr_t paddr, size_t size, int prot)
-{
- might_sleep();
- return _iommu_map(domain, iova, paddr, size, prot, GFP_KERNEL);
-}
EXPORT_SYMBOL_GPL(iommu_map);
-int iommu_map_atomic(struct iommu_domain *domain, unsigned long iova,
- phys_addr_t paddr, size_t size, int prot)
-{
- return _iommu_map(domain, iova, paddr, size, prot, GFP_ATOMIC);
-}
-EXPORT_SYMBOL_GPL(iommu_map_atomic);
-
static size_t __iommu_unmap_pages(struct iommu_domain *domain,
unsigned long iova, size_t size,
struct iommu_iotlb_gather *iotlb_gather)
@@ -2477,9 +2494,9 @@ size_t iommu_unmap_fast(struct iommu_domain *domain,
}
EXPORT_SYMBOL_GPL(iommu_unmap_fast);
-static ssize_t __iommu_map_sg(struct iommu_domain *domain, unsigned long iova,
- struct scatterlist *sg, unsigned int nents, int prot,
- gfp_t gfp)
+ssize_t iommu_map_sg(struct iommu_domain *domain, unsigned long iova,
+ struct scatterlist *sg, unsigned int nents, int prot,
+ gfp_t gfp)
{
const struct iommu_domain_ops *ops = domain->ops;
size_t len = 0, mapped = 0;
@@ -2487,6 +2504,13 @@ static ssize_t __iommu_map_sg(struct iommu_domain *domain, unsigned long iova,
unsigned int i = 0;
int ret;
+ might_sleep_if(gfpflags_allow_blocking(gfp));
+
+ /* Discourage passing strange GFP flags */
+ if (WARN_ON_ONCE(gfp & (__GFP_COMP | __GFP_DMA | __GFP_DMA32 |
+ __GFP_HIGHMEM)))
+ return -EINVAL;
+
while (i <= nents) {
phys_addr_t s_phys = sg_phys(sg);
@@ -2526,21 +2550,8 @@ out_err:
return ret;
}
-
-ssize_t iommu_map_sg(struct iommu_domain *domain, unsigned long iova,
- struct scatterlist *sg, unsigned int nents, int prot)
-{
- might_sleep();
- return __iommu_map_sg(domain, iova, sg, nents, prot, GFP_KERNEL);
-}
EXPORT_SYMBOL_GPL(iommu_map_sg);
-ssize_t iommu_map_sg_atomic(struct iommu_domain *domain, unsigned long iova,
- struct scatterlist *sg, unsigned int nents, int prot)
-{
- return __iommu_map_sg(domain, iova, sg, nents, prot, GFP_ATOMIC);
-}
-
/**
* report_iommu_fault() - report about an IOMMU fault to the IOMMU framework
* @domain: the iommu domain where the fault has happened
diff --git a/drivers/iommu/iommufd/Kconfig b/drivers/iommu/iommufd/Kconfig
index 8306616b6d81..ada693ea51a7 100644
--- a/drivers/iommu/iommufd/Kconfig
+++ b/drivers/iommu/iommufd/Kconfig
@@ -23,7 +23,7 @@ config IOMMUFD_VFIO_CONTAINER
removed.
IOMMUFD VFIO container emulation is known to lack certain features
- of the native VFIO container, such as no-IOMMU support, peer-to-peer
+ of the native VFIO container, such as peer-to-peer
DMA mapping, PPC IOMMU support, as well as other potentially
undiscovered gaps. This option is currently intended for the
purpose of testing IOMMUFD with unmodified userspace supporting VFIO
diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c
index d81f93a321af..a0c66f47a65a 100644
--- a/drivers/iommu/iommufd/device.c
+++ b/drivers/iommu/iommufd/device.c
@@ -4,7 +4,6 @@
#include <linux/iommufd.h>
#include <linux/slab.h>
#include <linux/iommu.h>
-#include <linux/irqdomain.h>
#include "io_pagetable.h"
#include "iommufd_private.h"
@@ -169,8 +168,7 @@ static int iommufd_device_setup_msi(struct iommufd_device *idev,
* operation from the device (eg a simple DMA) cannot trigger an
* interrupt outside this iommufd context.
*/
- if (!device_iommu_capable(idev->dev, IOMMU_CAP_INTR_REMAP) &&
- !irq_domain_check_msi_remap()) {
+ if (!iommu_group_has_isolated_msi(idev->group)) {
if (!allow_unsafe_interrupts)
return -EPERM;
@@ -346,10 +344,6 @@ int iommufd_device_attach(struct iommufd_device *idev, u32 *pt_id)
rc = iommufd_device_do_attach(idev, hwpt);
if (rc)
goto out_put_pt_obj;
-
- mutex_lock(&hwpt->ioas->mutex);
- list_add_tail(&hwpt->hwpt_item, &hwpt->ioas->hwpt_list);
- mutex_unlock(&hwpt->ioas->mutex);
break;
}
case IOMMUFD_OBJ_IOAS: {
diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h
index 222e86591f8a..9d7f71510ca1 100644
--- a/drivers/iommu/iommufd/iommufd_private.h
+++ b/drivers/iommu/iommufd/iommufd_private.h
@@ -18,6 +18,8 @@ struct iommufd_ctx {
struct xarray objects;
u8 account_mode;
+ /* Compatibility with VFIO no iommu */
+ u8 no_iommu_mode;
struct iommufd_ioas *vfio_ioas;
};
diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c
index 083e6fcbe10a..3fbe636c3d8a 100644
--- a/drivers/iommu/iommufd/main.c
+++ b/drivers/iommu/iommufd/main.c
@@ -252,9 +252,12 @@ union ucmd_buffer {
struct iommu_destroy destroy;
struct iommu_ioas_alloc alloc;
struct iommu_ioas_allow_iovas allow_iovas;
+ struct iommu_ioas_copy ioas_copy;
struct iommu_ioas_iova_ranges iova_ranges;
struct iommu_ioas_map map;
struct iommu_ioas_unmap unmap;
+ struct iommu_option option;
+ struct iommu_vfio_ioas vfio_ioas;
#ifdef CONFIG_IOMMUFD_TEST
struct iommu_test_cmd test;
#endif
diff --git a/drivers/iommu/iommufd/pages.c b/drivers/iommu/iommufd/pages.c
index 1e1d3509efae..f8d92c9bb65b 100644
--- a/drivers/iommu/iommufd/pages.c
+++ b/drivers/iommu/iommufd/pages.c
@@ -456,7 +456,8 @@ static int batch_iommu_map_small(struct iommu_domain *domain,
size % PAGE_SIZE);
while (size) {
- rc = iommu_map(domain, iova, paddr, PAGE_SIZE, prot);
+ rc = iommu_map(domain, iova, paddr, PAGE_SIZE, prot,
+ GFP_KERNEL_ACCOUNT);
if (rc)
goto err_unmap;
iova += PAGE_SIZE;
@@ -500,7 +501,8 @@ static int batch_to_domain(struct pfn_batch *batch, struct iommu_domain *domain,
else
rc = iommu_map(domain, iova,
PFN_PHYS(batch->pfns[cur]) + page_offset,
- next_iova - iova, area->iommu_prot);
+ next_iova - iova, area->iommu_prot,
+ GFP_KERNEL_ACCOUNT);
if (rc)
goto err_unmap;
iova = next_iova;
diff --git a/drivers/iommu/iommufd/vfio_compat.c b/drivers/iommu/iommufd/vfio_compat.c
index 3ceca0e8311c..514494a0025b 100644
--- a/drivers/iommu/iommufd/vfio_compat.c
+++ b/drivers/iommu/iommufd/vfio_compat.c
@@ -26,39 +26,84 @@ out_unlock:
}
/**
- * iommufd_vfio_compat_ioas_id - Return the IOAS ID that vfio should use
+ * iommufd_vfio_compat_ioas_get_id - Ensure a compat IOAS exists
+ * @ictx: Context to operate on
+ * @out_ioas_id: The IOAS ID of the compatibility IOAS
+ *
+ * Return the ID of the current compatibility IOAS. The ID can be passed into
+ * other functions that take an ioas_id.
+ */
+int iommufd_vfio_compat_ioas_get_id(struct iommufd_ctx *ictx, u32 *out_ioas_id)
+{
+ struct iommufd_ioas *ioas;
+
+ ioas = get_compat_ioas(ictx);
+ if (IS_ERR(ioas))
+ return PTR_ERR(ioas);
+ *out_ioas_id = ioas->obj.id;
+ iommufd_put_object(&ioas->obj);
+ return 0;
+}
+EXPORT_SYMBOL_NS_GPL(iommufd_vfio_compat_ioas_get_id, IOMMUFD_VFIO);
+
+/**
+ * iommufd_vfio_compat_set_no_iommu - Called when a no-iommu device is attached
+ * @ictx: Context to operate on
+ *
+ * This allows selecting the VFIO_NOIOMMU_IOMMU and blocks normal types.
+ */
+int iommufd_vfio_compat_set_no_iommu(struct iommufd_ctx *ictx)
+{
+ int ret;
+
+ xa_lock(&ictx->objects);
+ if (!ictx->vfio_ioas) {
+ ictx->no_iommu_mode = 1;
+ ret = 0;
+ } else {
+ ret = -EINVAL;
+ }
+ xa_unlock(&ictx->objects);
+ return ret;
+}
+EXPORT_SYMBOL_NS_GPL(iommufd_vfio_compat_set_no_iommu, IOMMUFD_VFIO);
+
+/**
+ * iommufd_vfio_compat_ioas_create - Ensure the compat IOAS is created
* @ictx: Context to operate on
- * @out_ioas_id: The ioas_id the caller should use
*
* The compatibility IOAS is the IOAS that the vfio compatibility ioctls operate
* on since they do not have an IOAS ID input in their ABI. Only attaching a
- * group should cause a default creation of the internal ioas, this returns the
- * existing ioas if it has already been assigned somehow.
+ * group should cause a default creation of the internal ioas, this does nothing
+ * if an existing ioas has already been assigned somehow.
*/
-int iommufd_vfio_compat_ioas_id(struct iommufd_ctx *ictx, u32 *out_ioas_id)
+int iommufd_vfio_compat_ioas_create(struct iommufd_ctx *ictx)
{
struct iommufd_ioas *ioas = NULL;
- struct iommufd_ioas *out_ioas;
+ int ret;
ioas = iommufd_ioas_alloc(ictx);
if (IS_ERR(ioas))
return PTR_ERR(ioas);
xa_lock(&ictx->objects);
- if (ictx->vfio_ioas && iommufd_lock_obj(&ictx->vfio_ioas->obj))
- out_ioas = ictx->vfio_ioas;
- else {
- out_ioas = ioas;
- ictx->vfio_ioas = ioas;
+ /*
+ * VFIO won't allow attaching a container to both iommu and no iommu
+ * operation
+ */
+ if (ictx->no_iommu_mode) {
+ ret = -EINVAL;
+ goto out_abort;
}
- xa_unlock(&ictx->objects);
- *out_ioas_id = out_ioas->obj.id;
- if (out_ioas != ioas) {
- iommufd_put_object(&out_ioas->obj);
- iommufd_object_abort(ictx, &ioas->obj);
- return 0;
+ if (ictx->vfio_ioas && iommufd_lock_obj(&ictx->vfio_ioas->obj)) {
+ ret = 0;
+ iommufd_put_object(&ictx->vfio_ioas->obj);
+ goto out_abort;
}
+ ictx->vfio_ioas = ioas;
+ xa_unlock(&ictx->objects);
+
/*
* An automatically created compat IOAS is treated as a userspace
* created object. Userspace can learn the ID via IOMMU_VFIO_IOAS_GET,
@@ -67,8 +112,13 @@ int iommufd_vfio_compat_ioas_id(struct iommufd_ctx *ictx, u32 *out_ioas_id)
*/
iommufd_object_finalize(ictx, &ioas->obj);
return 0;
+
+out_abort:
+ xa_unlock(&ictx->objects);
+ iommufd_object_abort(ictx, &ioas->obj);
+ return ret;
}
-EXPORT_SYMBOL_NS_GPL(iommufd_vfio_compat_ioas_id, IOMMUFD_VFIO);
+EXPORT_SYMBOL_NS_GPL(iommufd_vfio_compat_ioas_create, IOMMUFD_VFIO);
int iommufd_vfio_ioas(struct iommufd_ucmd *ucmd)
{
@@ -235,6 +285,9 @@ static int iommufd_vfio_check_extension(struct iommufd_ctx *ictx,
case VFIO_UNMAP_ALL:
return 1;
+ case VFIO_NOIOMMU_IOMMU:
+ return IS_ENABLED(CONFIG_VFIO_NOIOMMU);
+
case VFIO_DMA_CC_IOMMU:
return iommufd_vfio_cc_iommu(ictx);
@@ -261,10 +314,24 @@ static int iommufd_vfio_check_extension(struct iommufd_ctx *ictx,
static int iommufd_vfio_set_iommu(struct iommufd_ctx *ictx, unsigned long type)
{
+ bool no_iommu_mode = READ_ONCE(ictx->no_iommu_mode);
struct iommufd_ioas *ioas = NULL;
int rc = 0;
- if (type != VFIO_TYPE1_IOMMU && type != VFIO_TYPE1v2_IOMMU)
+ /*
+ * Emulation for NOIOMMU is imperfect in that VFIO blocks almost all
+ * other ioctls. We let them keep working but they mostly fail since no
+ * IOAS should exist.
+ */
+ if (IS_ENABLED(CONFIG_VFIO_NOIOMMU) && type == VFIO_NOIOMMU_IOMMU &&
+ no_iommu_mode) {
+ if (!capable(CAP_SYS_RAWIO))
+ return -EPERM;
+ return 0;
+ }
+
+ if ((type != VFIO_TYPE1_IOMMU && type != VFIO_TYPE1v2_IOMMU) ||
+ no_iommu_mode)
return -EINVAL;
/* VFIO fails the set_iommu if there is no group */
@@ -381,7 +448,7 @@ static int iommufd_vfio_iommu_get_info(struct iommufd_ctx *ictx,
};
size_t minsz = offsetofend(struct vfio_iommu_type1_info, iova_pgsizes);
struct vfio_info_cap_header __user *last_cap = NULL;
- struct vfio_iommu_type1_info info;
+ struct vfio_iommu_type1_info info = {};
struct iommufd_ioas *ioas;
size_t total_cap_size;
int rc;
diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c
index ed33c6cce083..c489714ddc4d 100644
--- a/drivers/iommu/s390-iommu.c
+++ b/drivers/iommu/s390-iommu.c
@@ -34,8 +34,6 @@ static bool s390_iommu_capable(struct device *dev, enum iommu_cap cap)
switch (cap) {
case IOMMU_CAP_CACHE_COHERENCY:
return true;
- case IOMMU_CAP_INTR_REMAP:
- return true;
default:
return false;
}
@@ -52,7 +50,7 @@ static struct iommu_domain *s390_domain_alloc(unsigned domain_type)
if (!s390_domain)
return NULL;
- s390_domain->dma_table = dma_alloc_cpu_table();
+ s390_domain->dma_table = dma_alloc_cpu_table(GFP_KERNEL);
if (!s390_domain->dma_table) {
kfree(s390_domain);
return NULL;
@@ -260,7 +258,8 @@ static void s390_iommu_iotlb_sync_map(struct iommu_domain *domain,
static int s390_iommu_validate_trans(struct s390_domain *s390_domain,
phys_addr_t pa, dma_addr_t dma_addr,
- unsigned long nr_pages, int flags)
+ unsigned long nr_pages, int flags,
+ gfp_t gfp)
{
phys_addr_t page_addr = pa & PAGE_MASK;
unsigned long *entry;
@@ -268,7 +267,8 @@ static int s390_iommu_validate_trans(struct s390_domain *s390_domain,
int rc;
for (i = 0; i < nr_pages; i++) {
- entry = dma_walk_cpu_trans(s390_domain->dma_table, dma_addr);
+ entry = dma_walk_cpu_trans(s390_domain->dma_table, dma_addr,
+ gfp);
if (unlikely(!entry)) {
rc = -ENOMEM;
goto undo_cpu_trans;
@@ -284,7 +284,7 @@ undo_cpu_trans:
while (i-- > 0) {
dma_addr -= PAGE_SIZE;
entry = dma_walk_cpu_trans(s390_domain->dma_table,
- dma_addr);
+ dma_addr, gfp);
if (!entry)
break;
dma_update_cpu_trans(entry, 0, ZPCI_PTE_INVALID);
@@ -301,7 +301,8 @@ static int s390_iommu_invalidate_trans(struct s390_domain *s390_domain,
int rc = 0;
for (i = 0; i < nr_pages; i++) {
- entry = dma_walk_cpu_trans(s390_domain->dma_table, dma_addr);
+ entry = dma_walk_cpu_trans(s390_domain->dma_table, dma_addr,
+ GFP_ATOMIC);
if (unlikely(!entry)) {
rc = -EINVAL;
break;
@@ -339,7 +340,7 @@ static int s390_iommu_map_pages(struct iommu_domain *domain,
flags |= ZPCI_TABLE_PROTECTED;
rc = s390_iommu_validate_trans(s390_domain, paddr, iova,
- pgcount, flags);
+ pgcount, flags, gfp);
if (!rc)
*mapped = size;
diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index 973ede0197e3..b4069f825a9b 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -4692,7 +4692,7 @@ static bool __maybe_unused its_enable_quirk_socionext_synquacer(void *data)
}
/* the pre-ITS breaks isolation, so disable MSI remapping */
- its->msi_domain_flags &= ~IRQ_DOMAIN_FLAG_MSI_REMAP;
+ its->msi_domain_flags &= ~IRQ_DOMAIN_FLAG_ISOLATED_MSI;
return true;
}
return false;
@@ -5074,7 +5074,7 @@ static int __init its_probe_one(struct resource *res,
its->cmd_write = its->cmd_base;
its->fwnode_handle = handle;
its->get_msi_base = its_irq_get_msi_base;
- its->msi_domain_flags = IRQ_DOMAIN_FLAG_MSI_REMAP;
+ its->msi_domain_flags = IRQ_DOMAIN_FLAG_ISOLATED_MSI;
its_enable_quirks(its);
diff --git a/drivers/media/platform/qcom/venus/firmware.c b/drivers/media/platform/qcom/venus/firmware.c
index 142d4c74017c..07d4dceb5e72 100644
--- a/drivers/media/platform/qcom/venus/firmware.c
+++ b/drivers/media/platform/qcom/venus/firmware.c
@@ -158,7 +158,7 @@ static int venus_boot_no_tz(struct venus_core *core, phys_addr_t mem_phys,
core->fw.mapped_mem_size = mem_size;
ret = iommu_map(iommu, VENUS_FW_START_ADDR, mem_phys, mem_size,
- IOMMU_READ | IOMMU_WRITE | IOMMU_PRIV);
+ IOMMU_READ | IOMMU_WRITE | IOMMU_PRIV, GFP_KERNEL);
if (ret) {
dev_err(dev, "could not map video firmware region\n");
return ret;
diff --git a/drivers/net/ipa/ipa_mem.c b/drivers/net/ipa/ipa_mem.c
index 9ec5af323f73..991a7d39f066 100644
--- a/drivers/net/ipa/ipa_mem.c
+++ b/drivers/net/ipa/ipa_mem.c
@@ -466,7 +466,8 @@ static int ipa_imem_init(struct ipa *ipa, unsigned long addr, size_t size)
size = PAGE_ALIGN(size + addr - phys);
iova = phys; /* We just want a direct mapping */
- ret = iommu_map(domain, iova, phys, size, IOMMU_READ | IOMMU_WRITE);
+ ret = iommu_map(domain, iova, phys, size, IOMMU_READ | IOMMU_WRITE,
+ GFP_KERNEL);
if (ret)
return ret;
@@ -574,7 +575,8 @@ static int ipa_smem_init(struct ipa *ipa, u32 item, size_t size)
size = PAGE_ALIGN(size + addr - phys);
iova = phys; /* We just want a direct mapping */
- ret = iommu_map(domain, iova, phys, size, IOMMU_READ | IOMMU_WRITE);
+ ret = iommu_map(domain, iova, phys, size, IOMMU_READ | IOMMU_WRITE,
+ GFP_KERNEL);
if (ret)
return ret;
diff --git a/drivers/net/wireless/ath/ath10k/snoc.c b/drivers/net/wireless/ath/ath10k/snoc.c
index cfcb759a87de..9a82f0336d95 100644
--- a/drivers/net/wireless/ath/ath10k/snoc.c
+++ b/drivers/net/wireless/ath/ath10k/snoc.c
@@ -1639,7 +1639,7 @@ static int ath10k_fw_init(struct ath10k *ar)
ret = iommu_map(iommu_dom, ar_snoc->fw.fw_start_addr,
ar->msa.paddr, ar->msa.mem_size,
- IOMMU_READ | IOMMU_WRITE);
+ IOMMU_READ | IOMMU_WRITE, GFP_KERNEL);
if (ret) {
ath10k_err(ar, "failed to map firmware region: %d\n", ret);
goto err_iommu_detach;
diff --git a/drivers/net/wireless/ath/ath11k/ahb.c b/drivers/net/wireless/ath/ath11k/ahb.c
index d34a4d6325b2..df8fdc7067f9 100644
--- a/drivers/net/wireless/ath/ath11k/ahb.c
+++ b/drivers/net/wireless/ath/ath11k/ahb.c
@@ -1021,7 +1021,7 @@ static int ath11k_ahb_fw_resources_init(struct ath11k_base *ab)
ret = iommu_map(iommu_dom, ab_ahb->fw.msa_paddr,
ab_ahb->fw.msa_paddr, ab_ahb->fw.msa_size,
- IOMMU_READ | IOMMU_WRITE);
+ IOMMU_READ | IOMMU_WRITE, GFP_KERNEL);
if (ret) {
ath11k_err(ab, "failed to map firmware region: %d\n", ret);
goto err_iommu_detach;
@@ -1029,7 +1029,7 @@ static int ath11k_ahb_fw_resources_init(struct ath11k_base *ab)
ret = iommu_map(iommu_dom, ab_ahb->fw.ce_paddr,
ab_ahb->fw.ce_paddr, ab_ahb->fw.ce_size,
- IOMMU_READ | IOMMU_WRITE);
+ IOMMU_READ | IOMMU_WRITE, GFP_KERNEL);
if (ret) {
ath11k_err(ab, "failed to map firmware CE region: %d\n", ret);
goto err_iommu_unmap;
diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c
index 1cd4815a6dd1..80072b6b6283 100644
--- a/drivers/remoteproc/remoteproc_core.c
+++ b/drivers/remoteproc/remoteproc_core.c
@@ -643,7 +643,8 @@ static int rproc_handle_devmem(struct rproc *rproc, void *ptr,
if (!mapping)
return -ENOMEM;
- ret = iommu_map(rproc->domain, rsc->da, rsc->pa, rsc->len, rsc->flags);
+ ret = iommu_map(rproc->domain, rsc->da, rsc->pa, rsc->len, rsc->flags,
+ GFP_KERNEL);
if (ret) {
dev_err(dev, "failed to map devmem: %d\n", ret);
goto out;
@@ -737,7 +738,7 @@ static int rproc_alloc_carveout(struct rproc *rproc,
}
ret = iommu_map(rproc->domain, mem->da, dma, mem->len,
- mem->flags);
+ mem->flags, GFP_KERNEL);
if (ret) {
dev_err(dev, "iommu_map failed: %d\n", ret);
goto free_mapping;
diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig
index a8f544629467..89e06c981e43 100644
--- a/drivers/vfio/Kconfig
+++ b/drivers/vfio/Kconfig
@@ -32,6 +32,7 @@ config VFIO_IOMMU_SPAPR_TCE
tristate
depends on SPAPR_TCE_IOMMU
default VFIO
+endif
config VFIO_NOIOMMU
bool "VFIO No-IOMMU support"
@@ -46,7 +47,6 @@ config VFIO_NOIOMMU
this mode since there is no IOMMU to provide DMA translation.
If you don't know what to do here, say N.
-endif
config VFIO_VIRQFD
bool
diff --git a/drivers/vfio/container.c b/drivers/vfio/container.c
index b7a9560ab25e..89f10becf962 100644
--- a/drivers/vfio/container.c
+++ b/drivers/vfio/container.c
@@ -29,13 +29,6 @@ static struct vfio {
struct mutex iommu_drivers_lock;
} vfio;
-#ifdef CONFIG_VFIO_NOIOMMU
-bool vfio_noiommu __read_mostly;
-module_param_named(enable_unsafe_noiommu_mode,
- vfio_noiommu, bool, S_IRUGO | S_IWUSR);
-MODULE_PARM_DESC(enable_unsafe_noiommu_mode, "Enable UNSAFE, no-IOMMU mode. This mode provides no device isolation, no DMA translation, no host kernel protection, cannot be used for device assignment to virtual machines, requires RAWIO permissions, and will taint the kernel. If you do not know what this is for, step away. (default: false)");
-#endif
-
static void *vfio_noiommu_open(unsigned long arg)
{
if (arg != VFIO_NOIOMMU_IOMMU)
diff --git a/drivers/vfio/group.c b/drivers/vfio/group.c
index bb24b2f0271e..e166ad7ce6e7 100644
--- a/drivers/vfio/group.c
+++ b/drivers/vfio/group.c
@@ -133,9 +133,12 @@ static int vfio_group_ioctl_set_container(struct vfio_group *group,
iommufd = iommufd_ctx_from_file(f.file);
if (!IS_ERR(iommufd)) {
- u32 ioas_id;
+ if (IS_ENABLED(CONFIG_VFIO_NOIOMMU) &&
+ group->type == VFIO_NO_IOMMU)
+ ret = iommufd_vfio_compat_set_no_iommu(iommufd);
+ else
+ ret = iommufd_vfio_compat_ioas_create(iommufd);
- ret = iommufd_vfio_compat_ioas_id(iommufd, &ioas_id);
if (ret) {
iommufd_ctx_put(group->iommufd);
goto out_unlock;
diff --git a/drivers/vfio/iommufd.c b/drivers/vfio/iommufd.c
index 4f82a6fa7c6c..db4efbd56042 100644
--- a/drivers/vfio/iommufd.c
+++ b/drivers/vfio/iommufd.c
@@ -18,6 +18,20 @@ int vfio_iommufd_bind(struct vfio_device *vdev, struct iommufd_ctx *ictx)
lockdep_assert_held(&vdev->dev_set->lock);
+ if (vfio_device_is_noiommu(vdev)) {
+ if (!capable(CAP_SYS_RAWIO))
+ return -EPERM;
+
+ /*
+ * Require no compat ioas to be assigned to proceed. The basic
+ * statement is that the user cannot have done something that
+ * implies they expected translation to exist
+ */
+ if (!iommufd_vfio_compat_ioas_get_id(ictx, &ioas_id))
+ return -EPERM;
+ return 0;
+ }
+
/*
* If the driver doesn't provide this op then it means the device does
* not do DMA at all. So nothing to do.
@@ -29,7 +43,7 @@ int vfio_iommufd_bind(struct vfio_device *vdev, struct iommufd_ctx *ictx)
if (ret)
return ret;
- ret = iommufd_vfio_compat_ioas_id(ictx, &ioas_id);
+ ret = iommufd_vfio_compat_ioas_get_id(ictx, &ioas_id);
if (ret)
goto err_unbind;
ret = vdev->ops->attach_ioas(vdev, &ioas_id);
@@ -52,6 +66,9 @@ void vfio_iommufd_unbind(struct vfio_device *vdev)
{
lockdep_assert_held(&vdev->dev_set->lock);
+ if (vfio_device_is_noiommu(vdev))
+ return;
+
if (vdev->ops->unbind_iommufd)
vdev->ops->unbind_iommufd(vdev);
}
diff --git a/drivers/vfio/vfio.h b/drivers/vfio/vfio.h
index f8219a438bfb..9e94abcf8ee1 100644
--- a/drivers/vfio/vfio.h
+++ b/drivers/vfio/vfio.h
@@ -10,10 +10,10 @@
#include <linux/device.h>
#include <linux/cdev.h>
#include <linux/module.h>
+#include <linux/vfio.h>
struct iommufd_ctx;
struct iommu_group;
-struct vfio_device;
struct vfio_container;
void vfio_device_put_registration(struct vfio_device *device);
@@ -88,6 +88,12 @@ bool vfio_device_has_container(struct vfio_device *device);
int __init vfio_group_init(void);
void vfio_group_cleanup(void);
+static inline bool vfio_device_is_noiommu(struct vfio_device *vdev)
+{
+ return IS_ENABLED(CONFIG_VFIO_NOIOMMU) &&
+ vdev->group->type == VFIO_NO_IOMMU;
+}
+
#if IS_ENABLED(CONFIG_VFIO_CONTAINER)
/* events for the backend driver notify callback */
enum vfio_iommu_notify_type {
diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index 2209372f236d..a44ac3fe657c 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -37,7 +37,6 @@
#include <linux/vfio.h>
#include <linux/workqueue.h>
#include <linux/notifier.h>
-#include <linux/irqdomain.h>
#include "vfio.h"
#define DRIVER_VERSION "0.2"
@@ -1480,7 +1479,8 @@ static int vfio_iommu_map(struct vfio_iommu *iommu, dma_addr_t iova,
list_for_each_entry(d, &iommu->domain_list, next) {
ret = iommu_map(d->domain, iova, (phys_addr_t)pfn << PAGE_SHIFT,
- npage << PAGE_SHIFT, prot | IOMMU_CACHE);
+ npage << PAGE_SHIFT, prot | IOMMU_CACHE,
+ GFP_KERNEL);
if (ret)
goto unwind;
@@ -1777,8 +1777,8 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu,
size = npage << PAGE_SHIFT;
}
- ret = iommu_map(domain->domain, iova, phys,
- size, dma->prot | IOMMU_CACHE);
+ ret = iommu_map(domain->domain, iova, phys, size,
+ dma->prot | IOMMU_CACHE, GFP_KERNEL);
if (ret) {
if (!dma->iommu_mapped) {
vfio_unpin_pages_remote(dma, iova,
@@ -1873,7 +1873,7 @@ static void vfio_test_domain_fgsp(struct vfio_domain *domain, struct list_head *
continue;
ret = iommu_map(domain->domain, start, page_to_phys(pages), PAGE_SIZE * 2,
- IOMMU_READ | IOMMU_WRITE | IOMMU_CACHE);
+ IOMMU_READ | IOMMU_WRITE | IOMMU_CACHE, GFP_KERNEL);
if (!ret) {
size_t unmapped = iommu_unmap(domain->domain, start, PAGE_SIZE);
@@ -2169,12 +2169,6 @@ static void vfio_iommu_iova_insert_copy(struct vfio_iommu *iommu,
list_splice_tail(iova_copy, iova);
}
-/* Redundantly walks non-present capabilities to simplify caller */
-static int vfio_iommu_device_capable(struct device *dev, void *data)
-{
- return device_iommu_capable(dev, (enum iommu_cap)data);
-}
-
static int vfio_iommu_domain_alloc(struct device *dev, void *data)
{
struct iommu_domain **domain = data;
@@ -2189,7 +2183,7 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
struct vfio_iommu *iommu = iommu_data;
struct vfio_iommu_group *group;
struct vfio_domain *domain, *d;
- bool resv_msi, msi_remap;
+ bool resv_msi;
phys_addr_t resv_msi_base = 0;
struct iommu_domain_geometry *geo;
LIST_HEAD(iova_copy);
@@ -2287,11 +2281,8 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
INIT_LIST_HEAD(&domain->group_list);
list_add(&group->next, &domain->group_list);
- msi_remap = irq_domain_check_msi_remap() ||
- iommu_group_for_each_dev(iommu_group, (void *)IOMMU_CAP_INTR_REMAP,
- vfio_iommu_device_capable);
-
- if (!allow_unsafe_interrupts && !msi_remap) {
+ if (!allow_unsafe_interrupts &&
+ !iommu_group_has_isolated_msi(iommu_group)) {
pr_warn("%s: No interrupt remapping support. Use the module param \"allow_unsafe_interrupts\" to enable VFIO IOMMU support on this platform\n",
__func__);
ret = -EPERM;
diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c
index 5177bb061b17..90541fc94988 100644
--- a/drivers/vfio/vfio_main.c
+++ b/drivers/vfio/vfio_main.c
@@ -45,6 +45,13 @@ static struct vfio {
struct ida device_ida;
} vfio;
+#ifdef CONFIG_VFIO_NOIOMMU
+bool vfio_noiommu __read_mostly;
+module_param_named(enable_unsafe_noiommu_mode,
+ vfio_noiommu, bool, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(enable_unsafe_noiommu_mode, "Enable UNSAFE, no-IOMMU mode. This mode provides no device isolation, no DMA translation, no host kernel protection, cannot be used for device assignment to virtual machines, requires RAWIO permissions, and will taint the kernel. If you do not know what this is for, step away. (default: false)");
+#endif
+
static DEFINE_XARRAY(vfio_device_set_xa);
int vfio_assign_device_set(struct vfio_device *device, void *set_id)
diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
index ec32f785dfde..fd1536de5b1d 100644
--- a/drivers/vhost/vdpa.c
+++ b/drivers/vhost/vdpa.c
@@ -792,7 +792,7 @@ static int vhost_vdpa_map(struct vhost_vdpa *v, struct vhost_iotlb *iotlb,
r = ops->set_map(vdpa, asid, iotlb);
} else {
r = iommu_map(v->domain, iova, pa, size,
- perm_to_iommu_flags(perm));
+ perm_to_iommu_flags(perm), GFP_KERNEL);
}
if (r) {
vhost_iotlb_del_range(iotlb, iova, iova + size - 1);
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 46e1347bfa22..35d5d50e6d12 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -120,7 +120,6 @@ static inline bool iommu_is_dma_domain(struct iommu_domain *domain)
enum iommu_cap {
IOMMU_CAP_CACHE_COHERENCY, /* IOMMU_CACHE is supported */
- IOMMU_CAP_INTR_REMAP, /* IOMMU supports interrupt isolation */
IOMMU_CAP_NOEXEC, /* IOMMU_NOEXEC flag */
IOMMU_CAP_PRE_BOOT_PROTECTION, /* Firmware says it used the IOMMU for
DMA protection and we should too */
@@ -455,6 +454,7 @@ static inline const struct iommu_ops *dev_iommu_ops(struct device *dev)
extern int bus_iommu_probe(struct bus_type *bus);
extern bool iommu_present(struct bus_type *bus);
extern bool device_iommu_capable(struct device *dev, enum iommu_cap cap);
+extern bool iommu_group_has_isolated_msi(struct iommu_group *group);
extern struct iommu_domain *iommu_domain_alloc(struct bus_type *bus);
extern struct iommu_group *iommu_group_get_by_id(int id);
extern void iommu_domain_free(struct iommu_domain *domain);
@@ -467,19 +467,15 @@ extern int iommu_sva_unbind_gpasid(struct iommu_domain *domain,
extern struct iommu_domain *iommu_get_domain_for_dev(struct device *dev);
extern struct iommu_domain *iommu_get_dma_domain(struct device *dev);
extern int iommu_map(struct iommu_domain *domain, unsigned long iova,
- phys_addr_t paddr, size_t size, int prot);
-extern int iommu_map_atomic(struct iommu_domain *domain, unsigned long iova,
- phys_addr_t paddr, size_t size, int prot);
+ phys_addr_t paddr, size_t size, int prot, gfp_t gfp);
extern size_t iommu_unmap(struct iommu_domain *domain, unsigned long iova,
size_t size);
extern size_t iommu_unmap_fast(struct iommu_domain *domain,
unsigned long iova, size_t size,
struct iommu_iotlb_gather *iotlb_gather);
extern ssize_t iommu_map_sg(struct iommu_domain *domain, unsigned long iova,
- struct scatterlist *sg, unsigned int nents, int prot);
-extern ssize_t iommu_map_sg_atomic(struct iommu_domain *domain,
- unsigned long iova, struct scatterlist *sg,
- unsigned int nents, int prot);
+ struct scatterlist *sg, unsigned int nents,
+ int prot, gfp_t gfp);
extern phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova);
extern void iommu_set_fault_handler(struct iommu_domain *domain,
iommu_fault_handler_t handler, void *token);
@@ -773,14 +769,7 @@ static inline struct iommu_domain *iommu_get_domain_for_dev(struct device *dev)
}
static inline int iommu_map(struct iommu_domain *domain, unsigned long iova,
- phys_addr_t paddr, size_t size, int prot)
-{
- return -ENODEV;
-}
-
-static inline int iommu_map_atomic(struct iommu_domain *domain,
- unsigned long iova, phys_addr_t paddr,
- size_t size, int prot)
+ phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
{
return -ENODEV;
}
@@ -800,14 +789,7 @@ static inline size_t iommu_unmap_fast(struct iommu_domain *domain,
static inline ssize_t iommu_map_sg(struct iommu_domain *domain,
unsigned long iova, struct scatterlist *sg,
- unsigned int nents, int prot)
-{
- return -ENODEV;
-}
-
-static inline ssize_t iommu_map_sg_atomic(struct iommu_domain *domain,
- unsigned long iova, struct scatterlist *sg,
- unsigned int nents, int prot)
+ unsigned int nents, int prot, gfp_t gfp)
{
return -ENODEV;
}
@@ -1118,7 +1100,8 @@ iommu_get_domain_for_dev_pasid(struct device *dev, ioasid_t pasid,
static inline size_t iommu_map_sgtable(struct iommu_domain *domain,
unsigned long iova, struct sg_table *sgt, int prot)
{
- return iommu_map_sg(domain, iova, sgt->sgl, sgt->orig_nents, prot);
+ return iommu_map_sg(domain, iova, sgt->sgl, sgt->orig_nents, prot,
+ GFP_KERNEL);
}
#ifdef CONFIG_IOMMU_DEBUGFS
diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h
index 650d45629647..c0b5b3ac34f1 100644
--- a/include/linux/iommufd.h
+++ b/include/linux/iommufd.h
@@ -57,7 +57,9 @@ void iommufd_access_unpin_pages(struct iommufd_access *access,
unsigned long iova, unsigned long length);
int iommufd_access_rw(struct iommufd_access *access, unsigned long iova,
void *data, size_t len, unsigned int flags);
-int iommufd_vfio_compat_ioas_id(struct iommufd_ctx *ictx, u32 *out_ioas_id);
+int iommufd_vfio_compat_ioas_get_id(struct iommufd_ctx *ictx, u32 *out_ioas_id);
+int iommufd_vfio_compat_ioas_create(struct iommufd_ctx *ictx);
+int iommufd_vfio_compat_set_no_iommu(struct iommufd_ctx *ictx);
#else /* !CONFIG_IOMMUFD */
static inline struct iommufd_ctx *iommufd_ctx_from_file(struct file *file)
{
@@ -89,8 +91,12 @@ static inline int iommufd_access_rw(struct iommufd_access *access, unsigned long
return -EOPNOTSUPP;
}
-static inline int iommufd_vfio_compat_ioas_id(struct iommufd_ctx *ictx,
- u32 *out_ioas_id)
+static inline int iommufd_vfio_compat_ioas_create(struct iommufd_ctx *ictx)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline int iommufd_vfio_compat_set_no_iommu(struct iommufd_ctx *ictx)
{
return -EOPNOTSUPP;
}
diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index a372086750ca..0a3e974b7288 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -192,8 +192,10 @@ enum {
/* Irq domain implements MSIs */
IRQ_DOMAIN_FLAG_MSI = (1 << 4),
- /* Irq domain implements MSI remapping */
- IRQ_DOMAIN_FLAG_MSI_REMAP = (1 << 5),
+ /*
+ * Irq domain implements isolated MSI, see msi_device_has_isolated_msi()
+ */
+ IRQ_DOMAIN_FLAG_ISOLATED_MSI = (1 << 5),
/* Irq domain doesn't translate anything */
IRQ_DOMAIN_FLAG_NO_MAP = (1 << 6),
@@ -276,7 +278,6 @@ struct irq_domain *irq_domain_create_legacy(struct fwnode_handle *fwnode,
void *host_data);
extern struct irq_domain *irq_find_matching_fwspec(struct irq_fwspec *fwspec,
enum irq_domain_bus_token bus_token);
-extern bool irq_domain_check_msi_remap(void);
extern void irq_set_default_host(struct irq_domain *host);
extern struct irq_domain *irq_get_default_host(void);
extern int irq_domain_alloc_descs(int virq, unsigned int nr_irqs,
@@ -559,13 +560,6 @@ static inline bool irq_domain_is_msi(struct irq_domain *domain)
return domain->flags & IRQ_DOMAIN_FLAG_MSI;
}
-static inline bool irq_domain_is_msi_remap(struct irq_domain *domain)
-{
- return domain->flags & IRQ_DOMAIN_FLAG_MSI_REMAP;
-}
-
-extern bool irq_domain_hierarchical_is_msi_remap(struct irq_domain *domain);
-
static inline bool irq_domain_is_msi_parent(struct irq_domain *domain)
{
return domain->flags & IRQ_DOMAIN_FLAG_MSI_PARENT;
@@ -611,17 +605,6 @@ static inline bool irq_domain_is_msi(struct irq_domain *domain)
return false;
}
-static inline bool irq_domain_is_msi_remap(struct irq_domain *domain)
-{
- return false;
-}
-
-static inline bool
-irq_domain_hierarchical_is_msi_remap(struct irq_domain *domain)
-{
- return false;
-}
-
static inline bool irq_domain_is_msi_parent(struct irq_domain *domain)
{
return false;
@@ -641,10 +624,6 @@ static inline struct irq_domain *irq_find_matching_fwnode(
{
return NULL;
}
-static inline bool irq_domain_check_msi_remap(void)
-{
- return false;
-}
#endif /* !CONFIG_IRQ_DOMAIN */
#endif /* _LINUX_IRQDOMAIN_H */
diff --git a/include/linux/msi.h b/include/linux/msi.h
index a112b913fff9..13c9b74a4575 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -48,6 +48,10 @@ typedef struct arch_msi_msg_data {
} __attribute__ ((packed)) arch_msi_msg_data_t;
#endif
+#ifndef arch_is_isolated_msi
+#define arch_is_isolated_msi() false
+#endif
+
/**
* msi_msg - Representation of a MSI message
* @address_lo: Low 32 bits of msi message address
@@ -649,6 +653,19 @@ int platform_msi_device_domain_alloc(struct irq_domain *domain, unsigned int vir
void platform_msi_device_domain_free(struct irq_domain *domain, unsigned int virq,
unsigned int nvec);
void *platform_msi_get_host_data(struct irq_domain *domain);
+
+bool msi_device_has_isolated_msi(struct device *dev);
+#else /* CONFIG_GENERIC_MSI_IRQ */
+static inline bool msi_device_has_isolated_msi(struct device *dev)
+{
+ /*
+ * Arguably if the platform does not enable MSI support then it has
+ * "isolated MSI", as an interrupt controller that cannot receive MSIs
+ * is inherently isolated by our definition. The default definition for
+ * arch_is_isolated_msi() is conservative and returns false anyhow.
+ */
+ return arch_is_isolated_msi();
+}
#endif /* CONFIG_GENERIC_MSI_IRQ */
/* PCI specific interfaces */
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index 798a9042421f..9d1b3ab07a16 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -437,31 +437,6 @@ struct irq_domain *irq_find_matching_fwspec(struct irq_fwspec *fwspec,
EXPORT_SYMBOL_GPL(irq_find_matching_fwspec);
/**
- * irq_domain_check_msi_remap - Check whether all MSI irq domains implement
- * IRQ remapping
- *
- * Return: false if any MSI irq domain does not support IRQ remapping,
- * true otherwise (including if there is no MSI irq domain)
- */
-bool irq_domain_check_msi_remap(void)
-{
- struct irq_domain *h;
- bool ret = true;
-
- mutex_lock(&irq_domain_mutex);
- list_for_each_entry(h, &irq_domain_list, link) {
- if (irq_domain_is_msi(h) &&
- !irq_domain_hierarchical_is_msi_remap(h)) {
- ret = false;
- break;
- }
- }
- mutex_unlock(&irq_domain_mutex);
- return ret;
-}
-EXPORT_SYMBOL_GPL(irq_domain_check_msi_remap);
-
-/**
* irq_set_default_host() - Set a "default" irq domain
* @domain: default domain pointer
*
@@ -1815,20 +1790,6 @@ static void irq_domain_check_hierarchy(struct irq_domain *domain)
if (domain->ops->alloc)
domain->flags |= IRQ_DOMAIN_FLAG_HIERARCHY;
}
-
-/**
- * irq_domain_hierarchical_is_msi_remap - Check if the domain or any
- * parent has MSI remapping support
- * @domain: domain pointer
- */
-bool irq_domain_hierarchical_is_msi_remap(struct irq_domain *domain)
-{
- for (; domain; domain = domain->parent) {
- if (irq_domain_is_msi_remap(domain))
- return true;
- }
- return false;
-}
#else /* CONFIG_IRQ_DOMAIN_HIERARCHY */
/**
* irq_domain_get_irq_data - Get irq_data associated with @virq and @domain
diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c
index 783a3e6a0b10..d0f0389920c1 100644
--- a/kernel/irq/msi.c
+++ b/kernel/irq/msi.c
@@ -1627,3 +1627,30 @@ struct msi_domain_info *msi_get_domain_info(struct irq_domain *domain)
{
return (struct msi_domain_info *)domain->host_data;
}
+
+/**
+ * msi_device_has_isolated_msi - True if the device has isolated MSI
+ * @dev: The device to check
+ *
+ * Isolated MSI means that HW modeled by an irq_domain on the path from the
+ * initiating device to the CPU will validate that the MSI message specifies an
+ * interrupt number that the device is authorized to trigger. This must block
+ * devices from triggering interrupts they are not authorized to trigger.
+ * Currently authorization means the MSI vector is one assigned to the device.
+ *
+ * This is interesting for securing VFIO use cases where a rouge MSI (eg created
+ * by abusing a normal PCI MemWr DMA) must not allow the VFIO userspace to
+ * impact outside its security domain, eg userspace triggering interrupts on
+ * kernel drivers, a VM triggering interrupts on the hypervisor, or a VM
+ * triggering interrupts on another VM.
+ */
+bool msi_device_has_isolated_msi(struct device *dev)
+{
+ struct irq_domain *domain = dev_get_msi_domain(dev);
+
+ for (; domain; domain = domain->parent)
+ if (domain->flags & IRQ_DOMAIN_FLAG_ISOLATED_MSI)
+ return true;
+ return arch_is_isolated_msi();
+}
+EXPORT_SYMBOL_GPL(msi_device_has_isolated_msi);
diff --git a/tools/testing/selftests/iommu/iommufd.c b/tools/testing/selftests/iommu/iommufd.c
index 8aa8a346cf22..fa08209268c4 100644
--- a/tools/testing/selftests/iommu/iommufd.c
+++ b/tools/testing/selftests/iommu/iommufd.c
@@ -1259,7 +1259,7 @@ TEST_F(iommufd_mock_domain, user_copy)
test_cmd_destroy_access_pages(
access_cmd.id, access_cmd.access_pages.out_access_pages_id);
- test_cmd_destroy_access(access_cmd.id) test_ioctl_destroy(ioas_id);
+ test_cmd_destroy_access(access_cmd.id);
test_ioctl_destroy(ioas_id);
}