summaryrefslogtreecommitdiff
path: root/arch/powerpc/kernel/iommu.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/kernel/iommu.c')
-rw-r--r--arch/powerpc/kernel/iommu.c391
1 files changed, 264 insertions, 127 deletions
diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
index 2af89a5e379f..0ce71310b7d9 100644
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
@@ -16,6 +16,7 @@
#include <linux/mm.h>
#include <linux/spinlock.h>
#include <linux/string.h>
+#include <linux/string_choices.h>
#include <linux/dma-mapping.h>
#include <linux/bitmap.h>
#include <linux/iommu-helper.h>
@@ -26,8 +27,8 @@
#include <linux/iommu.h>
#include <linux/sched.h>
#include <linux/debugfs.h>
+#include <linux/vmalloc.h>
#include <asm/io.h>
-#include <asm/prom.h>
#include <asm/iommu.h>
#include <asm/pci-bridge.h>
#include <asm/machdep.h>
@@ -36,6 +37,7 @@
#include <asm/vio.h>
#include <asm/tce.h>
#include <asm/mmu_context.h>
+#include <asm/ppc-pci.h>
#define DBG(...)
@@ -68,11 +70,9 @@ static void iommu_debugfs_add(struct iommu_table *tbl)
static void iommu_debugfs_del(struct iommu_table *tbl)
{
char name[10];
- struct dentry *liobn_entry;
sprintf(name, "%08lx", tbl->it_index);
- liobn_entry = debugfs_lookup(name, iommu_debugfs_dir);
- debugfs_remove(liobn_entry);
+ debugfs_lookup_and_remove(name, iommu_debugfs_dir);
}
#else
static void iommu_debugfs_add(struct iommu_table *tbl){}
@@ -174,17 +174,28 @@ static int fail_iommu_bus_notify(struct notifier_block *nb,
return 0;
}
-static struct notifier_block fail_iommu_bus_notifier = {
+/*
+ * PCI and VIO buses need separate notifier_block structs, since they're linked
+ * list nodes. Sharing a notifier_block would mean that any notifiers later
+ * registered for PCI buses would also get called by VIO buses and vice versa.
+ */
+static struct notifier_block fail_iommu_pci_bus_notifier = {
+ .notifier_call = fail_iommu_bus_notify
+};
+
+#ifdef CONFIG_IBMVIO
+static struct notifier_block fail_iommu_vio_bus_notifier = {
.notifier_call = fail_iommu_bus_notify
};
+#endif
static int __init fail_iommu_setup(void)
{
#ifdef CONFIG_PCI
- bus_register_notifier(&pci_bus_type, &fail_iommu_bus_notifier);
+ bus_register_notifier(&pci_bus_type, &fail_iommu_pci_bus_notifier);
#endif
#ifdef CONFIG_IBMVIO
- bus_register_notifier(&vio_bus_type, &fail_iommu_bus_notifier);
+ bus_register_notifier(&vio_bus_type, &fail_iommu_vio_bus_notifier);
#endif
return 0;
@@ -473,7 +484,7 @@ int ppc_iommu_map_sg(struct device *dev, struct iommu_table *tbl,
BUG_ON(direction == DMA_NONE);
if ((nelems == 0) || !tbl)
- return 0;
+ return -EINVAL;
outs = s = segstart = &sglist[0];
outcount = 1;
@@ -520,7 +531,7 @@ int ppc_iommu_map_sg(struct device *dev, struct iommu_table *tbl,
/* Convert entry to a dma_addr_t */
entry += tbl->it_offset;
dma_addr = entry << tbl->it_page_shift;
- dma_addr |= (s->offset & ~IOMMU_PAGE_MASK(tbl));
+ dma_addr |= (vaddr & ~IOMMU_PAGE_MASK(tbl));
DBG(" - %lu pages, entry: %lx, dma_addr: %lx\n",
npages, entry, dma_addr);
@@ -575,7 +586,6 @@ int ppc_iommu_map_sg(struct device *dev, struct iommu_table *tbl,
*/
if (outcount < incount) {
outs = sg_next(outs);
- outs->dma_address = DMA_MAPPING_ERROR;
outs->dma_length = 0;
}
@@ -593,13 +603,12 @@ int ppc_iommu_map_sg(struct device *dev, struct iommu_table *tbl,
npages = iommu_num_pages(s->dma_address, s->dma_length,
IOMMU_PAGE_SIZE(tbl));
__iommu_free(tbl, vaddr, npages);
- s->dma_address = DMA_MAPPING_ERROR;
s->dma_length = 0;
}
if (s == outs)
break;
}
- return 0;
+ return -EIO;
}
@@ -635,7 +644,7 @@ void ppc_iommu_unmap_sg(struct iommu_table *tbl, struct scatterlist *sglist,
tbl->it_ops->flush(tbl);
}
-static void iommu_table_clear(struct iommu_table *tbl)
+void iommu_table_clear(struct iommu_table *tbl)
{
/*
* In case of firmware assisted dump system goes through clean
@@ -676,10 +685,10 @@ static void iommu_table_clear(struct iommu_table *tbl)
#endif
}
-static void iommu_table_reserve_pages(struct iommu_table *tbl,
+void iommu_table_reserve_pages(struct iommu_table *tbl,
unsigned long res_start, unsigned long res_end)
{
- int i;
+ unsigned long i;
WARN_ON_ONCE(res_end < res_start);
/*
@@ -690,32 +699,24 @@ static void iommu_table_reserve_pages(struct iommu_table *tbl,
if (tbl->it_offset == 0)
set_bit(0, tbl->it_map);
- tbl->it_reserved_start = res_start;
- tbl->it_reserved_end = res_end;
+ if (res_start < tbl->it_offset)
+ res_start = tbl->it_offset;
- /* Check if res_start..res_end isn't empty and overlaps the table */
- if (res_start && res_end &&
- (tbl->it_offset + tbl->it_size < res_start ||
- res_end < tbl->it_offset))
- return;
-
- for (i = tbl->it_reserved_start; i < tbl->it_reserved_end; ++i)
- set_bit(i - tbl->it_offset, tbl->it_map);
-}
+ if (res_end > (tbl->it_offset + tbl->it_size))
+ res_end = tbl->it_offset + tbl->it_size;
-static void iommu_table_release_pages(struct iommu_table *tbl)
-{
- int i;
+ /* Check if res_start..res_end is a valid range in the table */
+ if (res_start >= res_end) {
+ tbl->it_reserved_start = tbl->it_offset;
+ tbl->it_reserved_end = tbl->it_offset;
+ return;
+ }
- /*
- * In case we have reserved the first bit, we should not emit
- * the warning below.
- */
- if (tbl->it_offset == 0)
- clear_bit(0, tbl->it_map);
+ tbl->it_reserved_start = res_start;
+ tbl->it_reserved_end = res_end;
for (i = tbl->it_reserved_start; i < tbl->it_reserved_end; ++i)
- clear_bit(i - tbl->it_offset, tbl->it_map);
+ set_bit(i - tbl->it_offset, tbl->it_map);
}
/*
@@ -769,8 +770,8 @@ struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid,
iommu_table_clear(tbl);
if (!welcomed) {
- printk(KERN_INFO "IOMMU table initialized, virtual merging %s\n",
- novmerge ? "disabled" : "enabled");
+ pr_info("IOMMU table initialized, virtual merging %s\n",
+ str_disabled_enabled(novmerge));
welcomed = 1;
}
@@ -779,6 +780,27 @@ struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid,
return tbl;
}
+bool iommu_table_in_use(struct iommu_table *tbl)
+{
+ unsigned long start = 0, end;
+
+ /* ignore reserved bit0 */
+ if (tbl->it_offset == 0)
+ start = 1;
+
+ /* Simple case with no reserved MMIO32 region */
+ if (!tbl->it_reserved_start && !tbl->it_reserved_end)
+ return find_next_bit(tbl->it_map, tbl->it_size, start) != tbl->it_size;
+
+ end = tbl->it_reserved_start - tbl->it_offset;
+ if (find_next_bit(tbl->it_map, end, start) != end)
+ return true;
+
+ start = tbl->it_reserved_end - tbl->it_offset;
+ end = tbl->it_size;
+ return find_next_bit(tbl->it_map, end, start) != end;
+}
+
static void iommu_table_free(struct kref *kref)
{
struct iommu_table *tbl;
@@ -795,10 +817,8 @@ static void iommu_table_free(struct kref *kref)
iommu_debugfs_del(tbl);
- iommu_table_release_pages(tbl);
-
/* verify that table contains no entries */
- if (!bitmap_empty(tbl->it_map, tbl->it_size))
+ if (iommu_table_in_use(tbl))
pr_warn("%s: Unexpected TCEs\n", __func__);
/* free bitmap */
@@ -828,12 +848,12 @@ EXPORT_SYMBOL_GPL(iommu_tce_table_put);
/* Creates TCEs for a user provided buffer. The user buffer must be
* contiguous real kernel storage (not vmalloc). The address passed here
- * comprises a page address and offset into that page. The dma_addr_t
- * returned will point to the same byte within the page as was passed in.
+ * is physical address into that page. The dma_addr_t returned will point
+ * to the same byte within the page as was passed in.
*/
-dma_addr_t iommu_map_page(struct device *dev, struct iommu_table *tbl,
- struct page *page, unsigned long offset, size_t size,
- unsigned long mask, enum dma_data_direction direction,
+dma_addr_t iommu_map_phys(struct device *dev, struct iommu_table *tbl,
+ phys_addr_t phys, size_t size, unsigned long mask,
+ enum dma_data_direction direction,
unsigned long attrs)
{
dma_addr_t dma_handle = DMA_MAPPING_ERROR;
@@ -843,7 +863,7 @@ dma_addr_t iommu_map_page(struct device *dev, struct iommu_table *tbl,
BUG_ON(direction == DMA_NONE);
- vaddr = page_address(page) + offset;
+ vaddr = phys_to_virt(phys);
uaddr = (unsigned long)vaddr;
if (tbl) {
@@ -870,7 +890,7 @@ dma_addr_t iommu_map_page(struct device *dev, struct iommu_table *tbl,
return dma_handle;
}
-void iommu_unmap_page(struct iommu_table *tbl, dma_addr_t dma_handle,
+void iommu_unmap_phys(struct iommu_table *tbl, dma_addr_t dma_handle,
size_t size, enum dma_data_direction direction,
unsigned long attrs)
{
@@ -898,6 +918,7 @@ void *iommu_alloc_coherent(struct device *dev, struct iommu_table *tbl,
unsigned int order;
unsigned int nio_pages, io_order;
struct page *page;
+ int tcesize = (1 << tbl->it_page_shift);
size = PAGE_ALIGN(size);
order = get_order(size);
@@ -924,7 +945,8 @@ void *iommu_alloc_coherent(struct device *dev, struct iommu_table *tbl,
memset(ret, 0, size);
/* Set up tces to cover the allocated range */
- nio_pages = size >> tbl->it_page_shift;
+ nio_pages = IOMMU_PAGE_ALIGN(size, tbl) >> tbl->it_page_shift;
+
io_order = get_iommu_order(size, tbl);
mapping = iommu_alloc(dev, tbl, ret, nio_pages, DMA_BIDIRECTIONAL,
mask >> tbl->it_page_shift, io_order, 0);
@@ -932,7 +954,8 @@ void *iommu_alloc_coherent(struct device *dev, struct iommu_table *tbl,
free_pages((unsigned long)ret, order);
return NULL;
}
- *dma_handle = mapping;
+
+ *dma_handle = mapping | ((u64)ret & (tcesize - 1));
return ret;
}
@@ -943,7 +966,7 @@ void iommu_free_coherent(struct iommu_table *tbl, size_t size,
unsigned int nio_pages;
size = PAGE_ALIGN(size);
- nio_pages = size >> tbl->it_page_shift;
+ nio_pages = IOMMU_PAGE_ALIGN(size, tbl) >> tbl->it_page_shift;
iommu_free(tbl, dma_handle, nio_pages);
size = PAGE_ALIGN(size);
free_pages((unsigned long)vaddr, get_order(size));
@@ -966,6 +989,23 @@ unsigned long iommu_direction_to_tce_perm(enum dma_data_direction dir)
EXPORT_SYMBOL_GPL(iommu_direction_to_tce_perm);
#ifdef CONFIG_IOMMU_API
+
+int dev_has_iommu_table(struct device *dev, void *data)
+{
+ struct pci_dev *pdev = to_pci_dev(dev);
+ struct pci_dev **ppdev = data;
+
+ if (!dev)
+ return 0;
+
+ if (device_iommu_mapped(dev)) {
+ *ppdev = pdev;
+ return 1;
+ }
+
+ return 0;
+}
+
/*
* SPAPR TCE API
*/
@@ -1053,15 +1093,15 @@ int iommu_tce_check_gpa(unsigned long page_shift, unsigned long gpa)
}
EXPORT_SYMBOL_GPL(iommu_tce_check_gpa);
-extern long iommu_tce_xchg_no_kill(struct mm_struct *mm,
- struct iommu_table *tbl,
- unsigned long entry, unsigned long *hpa,
- enum dma_data_direction *direction)
+long iommu_tce_xchg_no_kill(struct mm_struct *mm,
+ struct iommu_table *tbl,
+ unsigned long entry, unsigned long *hpa,
+ enum dma_data_direction *direction)
{
long ret;
unsigned long size = 0;
- ret = tbl->it_ops->xchg_no_kill(tbl, entry, hpa, direction, false);
+ ret = tbl->it_ops->xchg_no_kill(tbl, entry, hpa, direction);
if (!ret && ((*direction == DMA_FROM_DEVICE) ||
(*direction == DMA_BIDIRECTIONAL)) &&
!mm_iommu_is_devmem(mm, *hpa, tbl->it_page_shift,
@@ -1076,68 +1116,10 @@ void iommu_tce_kill(struct iommu_table *tbl,
unsigned long entry, unsigned long pages)
{
if (tbl->it_ops->tce_kill)
- tbl->it_ops->tce_kill(tbl, entry, pages, false);
+ tbl->it_ops->tce_kill(tbl, entry, pages);
}
EXPORT_SYMBOL_GPL(iommu_tce_kill);
-int iommu_take_ownership(struct iommu_table *tbl)
-{
- unsigned long flags, i, sz = (tbl->it_size + 7) >> 3;
- int ret = 0;
-
- /*
- * VFIO does not control TCE entries allocation and the guest
- * can write new TCEs on top of existing ones so iommu_tce_build()
- * must be able to release old pages. This functionality
- * requires exchange() callback defined so if it is not
- * implemented, we disallow taking ownership over the table.
- */
- if (!tbl->it_ops->xchg_no_kill)
- return -EINVAL;
-
- spin_lock_irqsave(&tbl->large_pool.lock, flags);
- for (i = 0; i < tbl->nr_pools; i++)
- spin_lock_nest_lock(&tbl->pools[i].lock, &tbl->large_pool.lock);
-
- iommu_table_release_pages(tbl);
-
- if (!bitmap_empty(tbl->it_map, tbl->it_size)) {
- pr_err("iommu_tce: it_map is not empty");
- ret = -EBUSY;
- /* Undo iommu_table_release_pages, i.e. restore bit#0, etc */
- iommu_table_reserve_pages(tbl, tbl->it_reserved_start,
- tbl->it_reserved_end);
- } else {
- memset(tbl->it_map, 0xff, sz);
- }
-
- for (i = 0; i < tbl->nr_pools; i++)
- spin_unlock(&tbl->pools[i].lock);
- spin_unlock_irqrestore(&tbl->large_pool.lock, flags);
-
- return ret;
-}
-EXPORT_SYMBOL_GPL(iommu_take_ownership);
-
-void iommu_release_ownership(struct iommu_table *tbl)
-{
- unsigned long flags, i, sz = (tbl->it_size + 7) >> 3;
-
- spin_lock_irqsave(&tbl->large_pool.lock, flags);
- for (i = 0; i < tbl->nr_pools; i++)
- spin_lock_nest_lock(&tbl->pools[i].lock, &tbl->large_pool.lock);
-
- memset(tbl->it_map, 0, sz);
-
- iommu_table_reserve_pages(tbl, tbl->it_reserved_start,
- tbl->it_reserved_end);
-
- for (i = 0; i < tbl->nr_pools; i++)
- spin_unlock(&tbl->pools[i].lock);
- spin_unlock_irqrestore(&tbl->large_pool.lock, flags);
-}
-EXPORT_SYMBOL_GPL(iommu_release_ownership);
-
int iommu_add_device(struct iommu_table_group *table_group, struct device *dev)
{
/*
@@ -1157,25 +1139,180 @@ int iommu_add_device(struct iommu_table_group *table_group, struct device *dev)
pr_debug("%s: Adding %s to iommu group %d\n",
__func__, dev_name(dev), iommu_group_id(table_group->group));
-
- return iommu_group_add_device(table_group->group, dev);
+ /*
+ * This is still not adding devices via the IOMMU bus notifier because
+ * of pcibios_init() from arch/powerpc/kernel/pci_64.c which calls
+ * pcibios_scan_phb() first (and this guy adds devices and triggers
+ * the notifier) and only then it calls pci_bus_add_devices() which
+ * configures DMA for buses which also creates PEs and IOMMU groups.
+ */
+ return iommu_probe_device(dev);
}
EXPORT_SYMBOL_GPL(iommu_add_device);
-void iommu_del_device(struct device *dev)
+#if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV)
+/*
+ * A simple iommu_ops to allow less cruft in generic VFIO code.
+ */
+static int
+spapr_tce_platform_iommu_attach_dev(struct iommu_domain *platform_domain,
+ struct device *dev,
+ struct iommu_domain *old)
{
+ struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
+ struct iommu_table_group *table_group;
+ struct iommu_group *grp;
+
+ /* At first attach the ownership is already set */
+ if (!domain)
+ return 0;
+
+ grp = iommu_group_get(dev);
+ table_group = iommu_group_get_iommudata(grp);
/*
- * Some devices might not have IOMMU table and group
- * and we needn't detach them from the associated
- * IOMMU groups
+ * The domain being set to PLATFORM from earlier
+ * BLOCKED. The table_group ownership has to be released.
*/
- if (!device_iommu_mapped(dev)) {
- pr_debug("iommu_tce: skipping device %s with no tbl\n",
- dev_name(dev));
- return;
+ table_group->ops->release_ownership(table_group, dev);
+ iommu_group_put(grp);
+
+ return 0;
+}
+
+static const struct iommu_domain_ops spapr_tce_platform_domain_ops = {
+ .attach_dev = spapr_tce_platform_iommu_attach_dev,
+};
+
+static struct iommu_domain spapr_tce_platform_domain = {
+ .type = IOMMU_DOMAIN_PLATFORM,
+ .ops = &spapr_tce_platform_domain_ops,
+};
+
+static int
+spapr_tce_blocked_iommu_attach_dev(struct iommu_domain *platform_domain,
+ struct device *dev, struct iommu_domain *old)
+{
+ struct iommu_group *grp = iommu_group_get(dev);
+ struct iommu_table_group *table_group;
+ int ret = -EINVAL;
+
+ /*
+ * FIXME: SPAPR mixes blocked and platform behaviors, the blocked domain
+ * also sets the dma_api ops
+ */
+ table_group = iommu_group_get_iommudata(grp);
+ ret = table_group->ops->take_ownership(table_group, dev);
+ iommu_group_put(grp);
+
+ return ret;
+}
+
+static const struct iommu_domain_ops spapr_tce_blocked_domain_ops = {
+ .attach_dev = spapr_tce_blocked_iommu_attach_dev,
+};
+
+static struct iommu_domain spapr_tce_blocked_domain = {
+ .type = IOMMU_DOMAIN_BLOCKED,
+ .ops = &spapr_tce_blocked_domain_ops,
+};
+
+static bool spapr_tce_iommu_capable(struct device *dev, enum iommu_cap cap)
+{
+ switch (cap) {
+ case IOMMU_CAP_CACHE_COHERENCY:
+ return true;
+ default:
+ break;
}
- iommu_group_remove_device(dev);
+ return false;
+}
+
+static struct iommu_device *spapr_tce_iommu_probe_device(struct device *dev)
+{
+ struct pci_dev *pdev;
+ struct pci_controller *hose;
+
+ if (!dev_is_pci(dev))
+ return ERR_PTR(-ENODEV);
+
+ pdev = to_pci_dev(dev);
+ hose = pdev->bus->sysdata;
+
+ return &hose->iommu;
+}
+
+static void spapr_tce_iommu_release_device(struct device *dev)
+{
+}
+
+static struct iommu_group *spapr_tce_iommu_device_group(struct device *dev)
+{
+ struct pci_controller *hose;
+ struct pci_dev *pdev;
+
+ pdev = to_pci_dev(dev);
+ hose = pdev->bus->sysdata;
+
+ if (!hose->controller_ops.device_group)
+ return ERR_PTR(-ENOENT);
+
+ return hose->controller_ops.device_group(hose, pdev);
+}
+
+static const struct iommu_ops spapr_tce_iommu_ops = {
+ .default_domain = &spapr_tce_platform_domain,
+ .blocked_domain = &spapr_tce_blocked_domain,
+ .capable = spapr_tce_iommu_capable,
+ .probe_device = spapr_tce_iommu_probe_device,
+ .release_device = spapr_tce_iommu_release_device,
+ .device_group = spapr_tce_iommu_device_group,
+};
+
+static struct attribute *spapr_tce_iommu_attrs[] = {
+ NULL,
+};
+
+static struct attribute_group spapr_tce_iommu_group = {
+ .name = "spapr-tce-iommu",
+ .attrs = spapr_tce_iommu_attrs,
+};
+
+static const struct attribute_group *spapr_tce_iommu_groups[] = {
+ &spapr_tce_iommu_group,
+ NULL,
+};
+
+void ppc_iommu_register_device(struct pci_controller *phb)
+{
+ iommu_device_sysfs_add(&phb->iommu, phb->parent,
+ spapr_tce_iommu_groups, "iommu-phb%04x",
+ phb->global_number);
+ iommu_device_register(&phb->iommu, &spapr_tce_iommu_ops,
+ phb->parent);
+}
+
+void ppc_iommu_unregister_device(struct pci_controller *phb)
+{
+ iommu_device_unregister(&phb->iommu);
+ iommu_device_sysfs_remove(&phb->iommu);
+}
+
+/*
+ * This registers IOMMU devices of PHBs. This needs to happen
+ * after core_initcall(iommu_init) + postcore_initcall(pci_driver_init) and
+ * before subsys_initcall(iommu_subsys_init).
+ */
+static int __init spapr_tce_setup_phb_iommus_initcall(void)
+{
+ struct pci_controller *hose;
+
+ list_for_each_entry(hose, &hose_list, list_node) {
+ ppc_iommu_register_device(hose);
+ }
+ return 0;
}
-EXPORT_SYMBOL_GPL(iommu_del_device);
+postcore_initcall_sync(spapr_tce_setup_phb_iommus_initcall);
+#endif
+
#endif /* CONFIG_IOMMU_API */