13 files changed, 691 insertions, 303 deletions
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index c88cfa7522b2..f73ff28f77e2 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -219,7 +219,7 @@ config OMAP_IOMMU_DEBUG
 
 config ROCKCHIP_IOMMU
 	bool "Rockchip IOMMU Support"
-	depends on ARM
+	depends on ARM || ARM64
 	depends on ARCH_ROCKCHIP || COMPILE_TEST
 	select IOMMU_API
 	select ARM_DMA_USE_IOMMU
@@ -274,7 +274,7 @@ config EXYNOS_IOMMU_DEBUG
 
 config IPMMU_VMSA
 	bool "Renesas VMSA-compatible IPMMU"
-	depends on ARM_LPAE
+	depends on ARM || IOMMU_DMA
 	depends on ARCH_RENESAS || COMPILE_TEST
 	select IOMMU_API
 	select IOMMU_IO_PGTABLE_LPAE
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 63cacf5d6cf2..83a55df17dfe 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -89,25 +89,6 @@ LIST_HEAD(ioapic_map);
 LIST_HEAD(hpet_map);
 LIST_HEAD(acpihid_map);
 
-#define FLUSH_QUEUE_SIZE 256
-
-struct flush_queue_entry {
-	unsigned long iova_pfn;
-	unsigned long pages;
-	struct dma_ops_domain *dma_dom;
-};
-
-struct flush_queue {
-	spinlock_t lock;
-	unsigned next;
-	struct flush_queue_entry *entries;
-};
-
-static DEFINE_PER_CPU(struct flush_queue, flush_queue);
-
-static atomic_t queue_timer_on;
-static struct timer_list queue_timer;
-
 /*
  * Domain for untranslated devices - only allocated
  * if iommu=pt passed on kernel cmd line.
@@ -138,6 +119,8 @@ struct iommu_dev_data {
 					     PPR completions */
 	u32 errata;			  /* Bitmap for errata to apply */
 	bool use_vapic;			  /* Enable device to use vapic mode */
+
+	struct ratelimit_state rs;	  /* Ratelimit IOPF messages */
 };
 
 /*
@@ -153,6 +136,20 @@ static void update_domain(struct protection_domain *domain);
 static int protection_domain_init(struct protection_domain *domain);
 static void detach_device(struct device *dev);
 
+#define FLUSH_QUEUE_SIZE 256
+
+struct flush_queue_entry {
+	unsigned long iova_pfn;
+	unsigned long pages;
+	u64 counter; /* Flush counter when this entry was added to the queue */
+};
+
+struct flush_queue {
+	struct flush_queue_entry *entries;
+	unsigned head, tail;
+	spinlock_t lock;
+};
+
 /*
  * Data container for a dma_ops specific protection domain
  */
@@ -162,6 +159,36 @@ struct dma_ops_domain {
 
 	/* IOVA RB-Tree */
 	struct iova_domain iovad;
+
+	struct flush_queue __percpu *flush_queue;
+
+	/*
+	 * We need two counter here to be race-free wrt. IOTLB flushing and
+	 * adding entries to the flush queue.
+	 *
+	 * The flush_start_cnt is incremented _before_ the IOTLB flush starts.
+	 * New entries added to the flush ring-buffer get their 'counter' value
+	 * from here. This way we can make sure that entries added to the queue
+	 * (or other per-cpu queues of the same domain) while the TLB is about
+	 * to be flushed are not considered to be flushed already.
+	 */
+	atomic64_t flush_start_cnt;
+
+	/*
+	 * The flush_finish_cnt is incremented when an IOTLB flush is complete.
+	 * This value is always smaller than flush_start_cnt. The queue_add
+	 * function frees all IOVAs that have a counter value smaller than
+	 * flush_finish_cnt. This makes sure that we only free IOVAs that are
+	 * flushed out of the IOTLB of the domain.
+	 */
+	atomic64_t flush_finish_cnt;
+
+	/*
+	 * Timer to make sure we don't keep IOVAs around unflushed
+	 * for too long
+	 */
+	struct timer_list flush_timer;
+	atomic_t flush_timer_on;
 };
 
 static struct iova_domain reserved_iova_ranges;
@@ -253,6 +280,8 @@ static struct iommu_dev_data *alloc_dev_data(u16 devid)
 	list_add_tail(&dev_data->dev_data_list, &dev_data_list);
 	spin_unlock_irqrestore(&dev_data_list_lock, flags);
 
+	ratelimit_default_init(&dev_data->rs);
+
 	return dev_data;
 }
 
@@ -551,6 +580,29 @@ static void dump_command(unsigned long phys_addr)
 		pr_err("AMD-Vi: CMD[%d]: %08x\n", i, cmd->data[i]);
 }
 
+static void amd_iommu_report_page_fault(u16 devid, u16 domain_id,
+					u64 address, int flags)
+{
+	struct iommu_dev_data *dev_data = NULL;
+	struct pci_dev *pdev;
+
+	pdev = pci_get_bus_and_slot(PCI_BUS_NUM(devid), devid & 0xff);
+	if (pdev)
+		dev_data = get_dev_data(&pdev->dev);
+
+	if (dev_data && __ratelimit(&dev_data->rs)) {
+		dev_err(&pdev->dev, "AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x%04x address=0x%016llx flags=0x%04x]\n",
+			domain_id, address, flags);
+	} else if (printk_ratelimit()) {
+		pr_err("AMD-Vi: Event logged [IO_PAGE_FAULT device=%02x:%02x.%x domain=0x%04x address=0x%016llx flags=0x%04x]\n",
+			PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
+			domain_id, address, flags);
+	}
+
+	if (pdev)
+		pci_dev_put(pdev);
+}
+
 static void iommu_print_event(struct amd_iommu *iommu, void *__evt)
 {
 	int type, devid, domid, flags;
@@ -575,7 +627,12 @@ retry:
 		goto retry;
 	}
 
-	printk(KERN_ERR "AMD-Vi: Event logged [");
+	if (type == EVENT_TYPE_IO_FAULT) {
+		amd_iommu_report_page_fault(devid, domid, address, flags);
+		return;
+	} else {
+		printk(KERN_ERR "AMD-Vi: Event logged [");
+	}
 
 	switch (type) {
 	case EVENT_TYPE_ILL_DEV:
@@ -585,12 +642,6 @@ retry:
 		       address, flags);
 		dump_dte_entry(devid);
 		break;
-	case EVENT_TYPE_IO_FAULT:
-		printk("IO_PAGE_FAULT device=%02x:%02x.%x "
-		       "domain=0x%04x address=0x%016llx flags=0x%04x]\n",
-		       PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
-		       domid, address, flags);
-		break;
 	case EVENT_TYPE_DEV_TAB_ERR:
 		printk("DEV_TAB_HARDWARE_ERROR device=%02x:%02x.%x "
 		       "address=0x%016llx flags=0x%04x]\n",
@@ -848,19 +899,20 @@ static int wait_on_sem(volatile u64 *sem)
 }
 
 static void copy_cmd_to_buffer(struct amd_iommu *iommu,
-			       struct iommu_cmd *cmd,
-			       u32 tail)
+			       struct iommu_cmd *cmd)
 {
 	u8 *target;
 
-	target = iommu->cmd_buf + tail;
-	tail   = (tail + sizeof(*cmd)) % CMD_BUFFER_SIZE;
+	target = iommu->cmd_buf + iommu->cmd_buf_tail;
+
+	iommu->cmd_buf_tail += sizeof(*cmd);
+	iommu->cmd_buf_tail %= CMD_BUFFER_SIZE;
 
 	/* Copy command to buffer */
 	memcpy(target, cmd, sizeof(*cmd));
 
 	/* Tell the IOMMU about it */
-	writel(tail, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
+	writel(iommu->cmd_buf_tail, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
 }
 
 static void build_completion_wait(struct iommu_cmd *cmd, u64 address)
@@ -1018,33 +1070,34 @@ static int __iommu_queue_command_sync(struct amd_iommu *iommu,
 				      struct iommu_cmd *cmd,
 				      bool sync)
 {
-	u32 left, tail, head, next_tail;
+	unsigned int count = 0;
+	u32 left, next_tail;
 
+	next_tail = (iommu->cmd_buf_tail + sizeof(*cmd)) % CMD_BUFFER_SIZE;
 again:
-
-	head      = readl(iommu->mmio_base + MMIO_CMD_HEAD_OFFSET);
-	tail      = readl(iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
-	next_tail = (tail + sizeof(*cmd)) % CMD_BUFFER_SIZE;
-	left      = (head - next_tail) % CMD_BUFFER_SIZE;
+	left      = (iommu->cmd_buf_head - next_tail) % CMD_BUFFER_SIZE;
 
 	if (left <= 0x20) {
-		struct iommu_cmd sync_cmd;
-		int ret;
-
-		iommu->cmd_sem = 0;
+		/* Skip udelay() the first time around */
+		if (count++) {
+			if (count == LOOP_TIMEOUT) {
+				pr_err("AMD-Vi: Command buffer timeout\n");
+				return -EIO;
+			}
 
-		build_completion_wait(&sync_cmd, (u64)&iommu->cmd_sem);
-		copy_cmd_to_buffer(iommu, &sync_cmd, tail);
+			udelay(1);
+		}
 
-		if ((ret = wait_on_sem(&iommu->cmd_sem)) != 0)
-			return ret;
+		/* Update head and recheck remaining space */
+		iommu->cmd_buf_head = readl(iommu->mmio_base +
+					    MMIO_CMD_HEAD_OFFSET);
 
 		goto again;
 	}
 
-	copy_cmd_to_buffer(iommu, cmd, tail);
+	copy_cmd_to_buffer(iommu, cmd);
 
-	/* We need to sync now to make sure all commands are processed */
+	/* Do we need to make sure all commands are processed? */
 	iommu->need_sync = sync;
 
 	return 0;
@@ -1733,6 +1786,180 @@ static void free_gcr3_table(struct protection_domain *domain)
 	free_page((unsigned long)domain->gcr3_tbl);
 }
 
+static void dma_ops_domain_free_flush_queue(struct dma_ops_domain *dom)
+{
+	int cpu;
+
+	for_each_possible_cpu(cpu) {
+		struct flush_queue *queue;
+
+		queue = per_cpu_ptr(dom->flush_queue, cpu);
+		kfree(queue->entries);
+	}
+
+	free_percpu(dom->flush_queue);
+
+	dom->flush_queue = NULL;
+}
+
+static int dma_ops_domain_alloc_flush_queue(struct dma_ops_domain *dom)
+{
+	int cpu;
+
+	atomic64_set(&dom->flush_start_cnt,  0);
+	atomic64_set(&dom->flush_finish_cnt, 0);
+
+	dom->flush_queue = alloc_percpu(struct flush_queue);
+	if (!dom->flush_queue)
+		return -ENOMEM;
+
+	/* First make sure everything is cleared */
+	for_each_possible_cpu(cpu) {
+		struct flush_queue *queue;
+
+		queue = per_cpu_ptr(dom->flush_queue, cpu);
+		queue->head    = 0;
+		queue->tail    = 0;
+		queue->entries = NULL;
+	}
+
+	/* Now start doing the allocation */
+	for_each_possible_cpu(cpu) {
+		struct flush_queue *queue;
+
+		queue = per_cpu_ptr(dom->flush_queue, cpu);
+		queue->entries = kzalloc(FLUSH_QUEUE_SIZE * sizeof(*queue->entries),
+					 GFP_KERNEL);
+		if (!queue->entries) {
+			dma_ops_domain_free_flush_queue(dom);
+			return -ENOMEM;
+		}
+
+		spin_lock_init(&queue->lock);
+	}
+
+	return 0;
+}
+
+static void dma_ops_domain_flush_tlb(struct dma_ops_domain *dom)
+{
+	atomic64_inc(&dom->flush_start_cnt);
+	domain_flush_tlb(&dom->domain);
+	domain_flush_complete(&dom->domain);
+	atomic64_inc(&dom->flush_finish_cnt);
+}
+
+static inline bool queue_ring_full(struct flush_queue *queue)
+{
+	assert_spin_locked(&queue->lock);
+
+	return (((queue->tail + 1) % FLUSH_QUEUE_SIZE) == queue->head);
+}
+
+#define queue_ring_for_each(i, q) \
+	for (i = (q)->head; i != (q)->tail; i = (i + 1) % FLUSH_QUEUE_SIZE)
+
+static inline unsigned queue_ring_add(struct flush_queue *queue)
+{
+	unsigned idx = queue->tail;
+
+	assert_spin_locked(&queue->lock);
+	queue->tail = (idx + 1) % FLUSH_QUEUE_SIZE;
+
+	return idx;
+}
+
+static inline void queue_ring_remove_head(struct flush_queue *queue)
+{
+	assert_spin_locked(&queue->lock);
+	queue->head = (queue->head + 1) % FLUSH_QUEUE_SIZE;
+}
+
+static void queue_ring_free_flushed(struct dma_ops_domain *dom,
+				    struct flush_queue *queue)
+{
+	u64 counter = atomic64_read(&dom->flush_finish_cnt);
+	int idx;
+
+	queue_ring_for_each(idx, queue) {
+		/*
+		 * This assumes that counter values in the ring-buffer are
+		 * monotonously rising.
+		 */
+		if (queue->entries[idx].counter >= counter)
+			break;
+
+		free_iova_fast(&dom->iovad,
+			       queue->entries[idx].iova_pfn,
+			       queue->entries[idx].pages);
+
+		queue_ring_remove_head(queue);
+	}
+}
+
+static void queue_add(struct dma_ops_domain *dom,
+		      unsigned long address, unsigned long pages)
+{
+	struct flush_queue *queue;
+	unsigned long flags;
+	int idx;
+
+	pages     = __roundup_pow_of_two(pages);
+	address >>= PAGE_SHIFT;
+
+	queue = get_cpu_ptr(dom->flush_queue);
+	spin_lock_irqsave(&queue->lock, flags);
+
+	/*
+	 * First remove the enries from the ring-buffer that are already
+	 * flushed to make the below queue_ring_full() check less likely
+	 */
+	queue_ring_free_flushed(dom, queue);
+
+	/*
+	 * When ring-queue is full, flush the entries from the IOTLB so
+	 * that we can free all entries with queue_ring_free_flushed()
+	 * below.
+	 */
+	if (queue_ring_full(queue)) {
+		dma_ops_domain_flush_tlb(dom);
+		queue_ring_free_flushed(dom, queue);
+	}
+
+	idx = queue_ring_add(queue);
+
+	queue->entries[idx].iova_pfn = address;
+	queue->entries[idx].pages    = pages;
+	queue->entries[idx].counter  = atomic64_read(&dom->flush_start_cnt);
+
+	spin_unlock_irqrestore(&queue->lock, flags);
+
+	if (atomic_cmpxchg(&dom->flush_timer_on, 0, 1) == 0)
+		mod_timer(&dom->flush_timer, jiffies + msecs_to_jiffies(10));
+
+	put_cpu_ptr(dom->flush_queue);
+}
+
+static void queue_flush_timeout(unsigned long data)
+{
+	struct dma_ops_domain *dom = (struct dma_ops_domain *)data;
+	int cpu;
+
+	atomic_set(&dom->flush_timer_on, 0);
+
+	dma_ops_domain_flush_tlb(dom);
+
+	for_each_possible_cpu(cpu) {
+		struct flush_queue *queue;
+		unsigned long flags;
+
+		queue = per_cpu_ptr(dom->flush_queue, cpu);
+		spin_lock_irqsave(&queue->lock, flags);
+		queue_ring_free_flushed(dom, queue);
+		spin_unlock_irqrestore(&queue->lock, flags);
+	}
+}
+
 /*
  * Free a domain, only used if something went wrong in the
  * allocation path and we need to free an already allocated page table
@@ -1744,6 +1971,11 @@ static void dma_ops_domain_free(struct dma_ops_domain *dom)
 
 	del_domain_from_list(&dom->domain);
 
+	if (timer_pending(&dom->flush_timer))
+		del_timer(&dom->flush_timer);
+
+	dma_ops_domain_free_flush_queue(dom);
+
 	put_iova_domain(&dom->iovad);
 
 	free_pagetable(&dom->domain);
@@ -1782,6 +2014,14 @@ static struct dma_ops_domain *dma_ops_domain_alloc(void)
 	/* Initialize reserved ranges */
 	copy_reserved_iova(&reserved_iova_ranges, &dma_dom->iovad);
 
+	if (dma_ops_domain_alloc_flush_queue(dma_dom))
+		goto free_dma_dom;
+
+	setup_timer(&dma_dom->flush_timer, queue_flush_timeout,
+		    (unsigned long)dma_dom);
+
+	atomic_set(&dma_dom->flush_timer_on, 0);
+
 	add_domain_to_list(&dma_dom->domain);
 
 	return dma_dom;
@@ -1844,7 +2084,8 @@ static void set_dte_entry(u16 devid, struct protection_domain *domain, bool ats)
 		flags    |= tmp;
 	}
 
-	flags &= ~(0xffffUL);
+
+	flags &= ~(DTE_FLAG_SA | 0xffffULL);
 	flags |= domain->id;
 
 	amd_iommu_dev_table[devid].data[1]  = flags;
@@ -2225,92 +2466,6 @@ static struct iommu_group *amd_iommu_device_group(struct device *dev)
  *
  *****************************************************************************/
 
-static void __queue_flush(struct flush_queue *queue)
-{
-	struct protection_domain *domain;
-	unsigned long flags;
-	int idx;
-
-	/* First flush TLB of all known domains */
-	spin_lock_irqsave(&amd_iommu_pd_lock, flags);
-	list_for_each_entry(domain, &amd_iommu_pd_list, list)
-		domain_flush_tlb(domain);
-	spin_unlock_irqrestore(&amd_iommu_pd_lock, flags);
-
-	/* Wait until flushes have completed */
-	domain_flush_complete(NULL);
-
-	for (idx = 0; idx < queue->next; ++idx) {
-		struct flush_queue_entry *entry;
-
-		entry = queue->entries + idx;
-
-		free_iova_fast(&entry->dma_dom->iovad,
-				entry->iova_pfn,
-				entry->pages);
-
-		/* Not really necessary, just to make sure we catch any bugs */
-		entry->dma_dom = NULL;
-	}
-
-	queue->next = 0;
-}
-
-static void queue_flush_all(void)
-{
-	int cpu;
-
-	for_each_possible_cpu(cpu) {
-		struct flush_queue *queue;
-		unsigned long flags;
-
-		queue = per_cpu_ptr(&flush_queue, cpu);
-		spin_lock_irqsave(&queue->lock, flags);
-		if (queue->next > 0)
-			__queue_flush(queue);
-		spin_unlock_irqrestore(&queue->lock, flags);
-	}
-}
-
-static void queue_flush_timeout(unsigned long unsused)
-{
-	atomic_set(&queue_timer_on, 0);
-	queue_flush_all();
-}
-
-static void queue_add(struct dma_ops_domain *dma_dom,
-		      unsigned long address, unsigned long pages)
-{
-	struct flush_queue_entry *entry;
-	struct flush_queue *queue;
-	unsigned long flags;
-	int idx;
-
-	pages     = __roundup_pow_of_two(pages);
-	address >>= PAGE_SHIFT;
-
-	queue = get_cpu_ptr(&flush_queue);
-	spin_lock_irqsave(&queue->lock, flags);
-
-	if (queue->next == FLUSH_QUEUE_SIZE)
-		__queue_flush(queue);
-
-	idx   = queue->next++;
-	entry = queue->entries + idx;
-
-	entry->iova_pfn = address;
-	entry->pages    = pages;
-	entry->dma_dom  = dma_dom;
-
-	spin_unlock_irqrestore(&queue->lock, flags);
-
-	if (atomic_cmpxchg(&queue_timer_on, 0, 1) == 0)
-		mod_timer(&queue_timer, jiffies + msecs_to_jiffies(10));
-
-	put_cpu_ptr(&flush_queue);
-}
-
-
 /*
  * In the dma_ops path we only have the struct device. This function
  * finds the corresponding IOMMU, the protection domain and the
@@ -2797,7 +2952,7 @@ static int init_reserved_iova_ranges(void)
 
 int __init amd_iommu_init_api(void)
 {
-	int ret, cpu, err = 0;
+	int ret, err = 0;
 
 	ret = iova_cache_get();
 	if (ret)
@@ -2807,18 +2962,6 @@ int __init amd_iommu_init_api(void)
 	if (ret)
 		return ret;
 
-	for_each_possible_cpu(cpu) {
-		struct flush_queue *queue = per_cpu_ptr(&flush_queue, cpu);
-
-		queue->entries = kzalloc(FLUSH_QUEUE_SIZE *
-					 sizeof(*queue->entries),
-					 GFP_KERNEL);
-		if (!queue->entries)
-			goto out_put_iova;
-
-		spin_lock_init(&queue->lock);
-	}
-
 	err = bus_set_iommu(&pci_bus_type, &amd_iommu_ops);
 	if (err)
 		return err;
@@ -2830,23 +2973,12 @@ int __init amd_iommu_init_api(void)
 	err = bus_set_iommu(&platform_bus_type, &amd_iommu_ops);
 	if (err)
 		return err;
-	return 0;
-
-out_put_iova:
-	for_each_possible_cpu(cpu) {
-		struct flush_queue *queue = per_cpu_ptr(&flush_queue, cpu);
-
-		kfree(queue->entries);
-	}
 
-	return -ENOMEM;
+	return 0;
 }
 
 int __init amd_iommu_init_dma_ops(void)
 {
-	setup_timer(&queue_timer, queue_flush_timeout, 0);
-	atomic_set(&queue_timer_on, 0);
-
 	swiotlb        = iommu_pass_through ? 1 : 0;
 	iommu_detected = 1;
 
@@ -3002,12 +3134,6 @@ static void amd_iommu_domain_free(struct iommu_domain *dom)
 
 	switch (dom->type) {
 	case IOMMU_DOMAIN_DMA:
-		/*
-		 * First make sure the domain is no longer referenced from the
-		 * flush queue
-		 */
-		queue_flush_all();
-
 		/* Now release the domain */
 		dma_dom = to_dma_ops_domain(domain);
 		dma_ops_domain_free(dma_dom);
@@ -3879,11 +4005,9 @@ static void irte_ga_prepare(void *entry,
 			    u8 vector, u32 dest_apicid, int devid)
 {
 	struct irte_ga *irte = (struct irte_ga *) entry;
-	struct iommu_dev_data *dev_data = search_dev_data(devid);
 
 	irte->lo.val                      = 0;
 	irte->hi.val                      = 0;
-	irte->lo.fields_remap.guest_mode  = dev_data ? dev_data->use_vapic : 0;
 	irte->lo.fields_remap.int_type    = delivery_mode;
 	irte->lo.fields_remap.dm          = dest_mode;
 	irte->hi.fields.vector            = vector;
@@ -3939,10 +4063,10 @@ static void irte_ga_set_affinity(void *entry, u16 devid, u16 index,
 	struct irte_ga *irte = (struct irte_ga *) entry;
 	struct iommu_dev_data *dev_data = search_dev_data(devid);
 
-	if (!dev_data || !dev_data->use_vapic) {
+	if (!dev_data || !dev_data->use_vapic ||
+	    !irte->lo.fields_remap.guest_mode) {
 		irte->hi.fields.vector = vector;
 		irte->lo.fields_remap.destination = dest_apicid;
-		irte->lo.fields_remap.guest_mode = 0;
 		modify_irte_ga(devid, index, irte, NULL);
 	}
 }
@@ -4273,7 +4397,7 @@ static void irq_remapping_deactivate(struct irq_domain *domain,
 					    irte_info->index);
 }
 
-static struct irq_domain_ops amd_ir_domain_ops = {
+static const struct irq_domain_ops amd_ir_domain_ops = {
 	.alloc = irq_remapping_alloc,
 	.free = irq_remapping_free,
 	.activate = irq_remapping_activate,
diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index 5a11328f4d98..5cc597b383c7 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -29,6 +29,7 @@
 #include <linux/export.h>
 #include <linux/iommu.h>
 #include <linux/kmemleak.h>
+#include <linux/crash_dump.h>
 #include <asm/pci-direct.h>
 #include <asm/iommu.h>
 #include <asm/gart.h>
@@ -236,6 +237,7 @@ enum iommu_init_state {
 	IOMMU_INITIALIZED,
 	IOMMU_NOT_FOUND,
 	IOMMU_INIT_ERROR,
+	IOMMU_CMDLINE_DISABLED,
 };
 
 /* Early ioapic and hpet maps from kernel command line */
@@ -588,6 +590,8 @@ void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu)
 
 	writel(0x00, iommu->mmio_base + MMIO_CMD_HEAD_OFFSET);
 	writel(0x00, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
+	iommu->cmd_buf_head = 0;
+	iommu->cmd_buf_tail = 0;
 
 	iommu_feature_enable(iommu, CONTROL_CMDBUF_EN);
 }
@@ -1898,6 +1902,14 @@ static void init_device_table_dma(void)
 	for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) {
 		set_dev_entry_bit(devid, DEV_ENTRY_VALID);
 		set_dev_entry_bit(devid, DEV_ENTRY_TRANSLATION);
+		/*
+		 * In kdump kernels in-flight DMA from the old kernel might
+		 * cause IO_PAGE_FAULTs. There are no reports that a kdump
+		 * actually failed because of that, so just disable fault
+		 * reporting in the hardware to get rid of the messages
+		 */
+		if (is_kdump_kernel())
+			set_dev_entry_bit(devid, DEV_ENTRY_NO_PAGE_FAULT);
 	}
 }
 
@@ -2097,23 +2109,27 @@ static struct syscore_ops amd_iommu_syscore_ops = {
 	.resume = amd_iommu_resume,
 };
 
-static void __init free_on_init_error(void)
+static void __init free_iommu_resources(void)
 {
 	kmemleak_free(irq_lookup_table);
 	free_pages((unsigned long)irq_lookup_table,
 		   get_order(rlookup_table_size));
+	irq_lookup_table = NULL;
 
 	kmem_cache_destroy(amd_iommu_irq_cache);
 	amd_iommu_irq_cache = NULL;
 
 	free_pages((unsigned long)amd_iommu_rlookup_table,
 		   get_order(rlookup_table_size));
+	amd_iommu_rlookup_table = NULL;
 
 	free_pages((unsigned long)amd_iommu_alias_table,
 		   get_order(alias_table_size));
+	amd_iommu_alias_table = NULL;
 
 	free_pages((unsigned long)amd_iommu_dev_table,
 		   get_order(dev_table_size));
+	amd_iommu_dev_table = NULL;
 
 	free_iommu_all();
 
@@ -2183,6 +2199,7 @@ static void __init free_dma_resources(void)
 {
 	free_pages((unsigned long)amd_iommu_pd_alloc_bitmap,
 		   get_order(MAX_DOMAIN_ID/8));
+	amd_iommu_pd_alloc_bitmap = NULL;
 
 	free_unity_maps();
 }
@@ -2307,6 +2324,9 @@ static int __init early_amd_iommu_init(void)
 	if (ret)
 		goto out;
 
+	/* Disable any previously enabled IOMMUs */
+	disable_iommus();
+
 	if (amd_iommu_irq_remap)
 		amd_iommu_irq_remap = check_ioapic_information();
 
@@ -2410,6 +2430,13 @@ static int __init state_next(void)
 	case IOMMU_IVRS_DETECTED:
 		ret = early_amd_iommu_init();
 		init_state = ret ? IOMMU_INIT_ERROR : IOMMU_ACPI_FINISHED;
+		if (init_state == IOMMU_ACPI_FINISHED && amd_iommu_disabled) {
+			pr_info("AMD-Vi: AMD IOMMU disabled on kernel command-line\n");
+			free_dma_resources();
+			free_iommu_resources();
+			init_state = IOMMU_CMDLINE_DISABLED;
+			ret = -EINVAL;
+		}
 		break;
 	case IOMMU_ACPI_FINISHED:
 		early_enable_iommus();
@@ -2438,6 +2465,7 @@ static int __init state_next(void)
 		break;
 	case IOMMU_NOT_FOUND:
 	case IOMMU_INIT_ERROR:
+	case IOMMU_CMDLINE_DISABLED:
 		/* Error states => do nothing */
 		ret = -EINVAL;
 		break;
@@ -2451,13 +2479,14 @@ static int __init state_next(void)
 
 static int __init iommu_go_to_state(enum iommu_init_state state)
 {
-	int ret = 0;
+	int ret = -EINVAL;
 
 	while (init_state != state) {
-		ret = state_next();
-		if (init_state == IOMMU_NOT_FOUND ||
-		    init_state == IOMMU_INIT_ERROR)
+		if (init_state == IOMMU_NOT_FOUND         ||
+		    init_state == IOMMU_INIT_ERROR        ||
+		    init_state == IOMMU_CMDLINE_DISABLED)
 			break;
+		ret = state_next();
 	}
 
 	return ret;
@@ -2522,7 +2551,7 @@ static int __init amd_iommu_init(void)
 		free_dma_resources();
 		if (!irq_remapping_enabled) {
 			disable_iommus();
-			free_on_init_error();
+			free_iommu_resources();
 		} else {
 			struct amd_iommu *iommu;
 
@@ -2549,9 +2578,6 @@ int __init amd_iommu_detect(void)
 	if (no_iommu || (iommu_detected && !gart_iommu_aperture))
 		return -ENODEV;
 
-	if (amd_iommu_disabled)
-		return -ENODEV;
-
 	ret = iommu_go_to_state(IOMMU_IVRS_DETECTED);
 	if (ret)
 		return ret;
diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h
index 4de8f4160bb8..294a409e283b 100644
--- a/drivers/iommu/amd_iommu_types.h
+++ b/drivers/iommu/amd_iommu_types.h
@@ -322,6 +322,7 @@
 #define IOMMU_PTE_IW (1ULL << 62)
 
 #define DTE_FLAG_IOTLB	(1ULL << 32)
+#define DTE_FLAG_SA	(1ULL << 34)
 #define DTE_FLAG_GV	(1ULL << 55)
 #define DTE_FLAG_MASK	(0x3ffULL << 32)
 #define DTE_GLX_SHIFT	(56)
@@ -516,6 +517,8 @@ struct amd_iommu {
 
 	/* command buffer virtual address */
 	u8 *cmd_buf;
+	u32 cmd_buf_head;
+	u32 cmd_buf_tail;
 
 	/* event buffer virtual address */
 	u8 *evt_buf;
diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 62618e77bedc..f1db86939031 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -314,7 +314,7 @@ int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base,
 		 * If we have devices with different DMA masks, move the free
 		 * area cache limit down for the benefit of the smaller one.
 		 */
-		iovad->dma_32bit_pfn = min(end_pfn, iovad->dma_32bit_pfn);
+		iovad->dma_32bit_pfn = min(end_pfn + 1, iovad->dma_32bit_pfn);
 
 		return 0;
 	}
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index fc2765ccdb57..c9bd5c8b162e 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -481,7 +481,7 @@ struct deferred_flush_data {
 	struct deferred_flush_table *tables;
 };
 
-DEFINE_PER_CPU(struct deferred_flush_data, deferred_flush);
+static DEFINE_PER_CPU(struct deferred_flush_data, deferred_flush);
 
 /* bitmap for indexing intel_iommus */
 static int g_num_of_iommus;
@@ -2390,7 +2390,7 @@ static struct dmar_domain *find_domain(struct device *dev)
 
 	/* No lock here, assumes no domain exit in normal case */
 	info = dev->archdata.iommu;
-	if (info)
+	if (likely(info))
 		return info->domain;
 	return NULL;
 }
@@ -3478,7 +3478,7 @@ static unsigned long intel_alloc_iova(struct device *dev,
 	return iova_pfn;
 }
 
-static struct dmar_domain *__get_valid_domain_for_dev(struct device *dev)
+static struct dmar_domain *get_valid_domain_for_dev(struct device *dev)
 {
 	struct dmar_domain *domain, *tmp;
 	struct dmar_rmrr_unit *rmrr;
@@ -3525,18 +3525,6 @@ out:
 	return domain;
 }
 
-static inline struct dmar_domain *get_valid_domain_for_dev(struct device *dev)
-{
-	struct device_domain_info *info;
-
-	/* No lock here, assumes no domain exit in normal case */
-	info = dev->archdata.iommu;
-	if (likely(info))
-		return info->domain;
-
-	return __get_valid_domain_for_dev(dev);
-}
-
 /* Check if the dev needs to go through non-identity map and unmap process.*/
 static int iommu_no_mapping(struct device *dev)
 {
@@ -3725,10 +3713,8 @@ static void add_unmap(struct dmar_domain *dom, unsigned long iova_pfn,
 	struct intel_iommu *iommu;
 	struct deferred_flush_entry *entry;
 	struct deferred_flush_data *flush_data;
-	unsigned int cpuid;
 
-	cpuid = get_cpu();
-	flush_data = per_cpu_ptr(&deferred_flush, cpuid);
+	flush_data = raw_cpu_ptr(&deferred_flush);
 
 	/* Flush all CPUs' entries to avoid deferring too much.  If
 	 * this becomes a bottleneck, can just flush us, and rely on
@@ -3761,8 +3747,6 @@ static void add_unmap(struct dmar_domain *dom, unsigned long iova_pfn,
 	}
 	flush_data->size++;
 	spin_unlock_irqrestore(&flush_data->lock, flags);
-
-	put_cpu();
 }
 
 static void intel_unmap(struct device *dev, dma_addr_t dev_addr, size_t size)
@@ -3973,7 +3957,7 @@ static int intel_mapping_error(struct device *dev, dma_addr_t dma_addr)
 	return !dma_addr;
 }
 
-struct dma_map_ops intel_dma_ops = {
+const struct dma_map_ops intel_dma_ops = {
 	.alloc = intel_alloc_coherent,
 	.free = intel_free_coherent,
 	.map_sg = intel_map_sg,
diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c
index 23c427602c55..f167c0d84ebf 100644
--- a/drivers/iommu/intel-svm.c
+++ b/drivers/iommu/intel-svm.c
@@ -489,6 +489,36 @@ int intel_svm_unbind_mm(struct device *dev, int pasid)
 }
 EXPORT_SYMBOL_GPL(intel_svm_unbind_mm);
 
+int intel_svm_is_pasid_valid(struct device *dev, int pasid)
+{
+	struct intel_iommu *iommu;
+	struct intel_svm *svm;
+	int ret = -EINVAL;
+
+	mutex_lock(&pasid_mutex);
+	iommu = intel_svm_device_to_iommu(dev);
+	if (!iommu || !iommu->pasid_table)
+		goto out;
+
+	svm = idr_find(&iommu->pasid_idr, pasid);
+	if (!svm)
+		goto out;
+
+	/* init_mm is used in this case */
+	if (!svm->mm)
+		ret = 1;
+	else if (atomic_read(&svm->mm->mm_users) > 0)
+		ret = 1;
+	else
+		ret = 0;
+
+ out:
+	mutex_unlock(&pasid_mutex);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(intel_svm_is_pasid_valid);
+
 /* Page request queue descriptor */
 struct page_req_dsc {
 	u64 srr:1;
diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c
index a190cbd76ef7..f7ef4a5d4785 100644
--- a/drivers/iommu/intel_irq_remapping.c
+++ b/drivers/iommu/intel_irq_remapping.c
@@ -76,7 +76,7 @@ static struct hpet_scope ir_hpet[MAX_HPET_TBS];
  * the dmar_global_lock.
  */
 static DEFINE_RAW_SPINLOCK(irq_2_ir_lock);
-static struct irq_domain_ops intel_ir_domain_ops;
+static const struct irq_domain_ops intel_ir_domain_ops;
 
 static void iommu_disable_irq_remapping(struct intel_iommu *iommu);
 static int __init parse_ioapics_under_ir(void);
@@ -1396,7 +1396,7 @@ static void intel_irq_remapping_deactivate(struct irq_domain *domain,
 	modify_irte(&data->irq_2_iommu, &entry);
 }
 
-static struct irq_domain_ops intel_ir_domain_ops = {
+static const struct irq_domain_ops intel_ir_domain_ops = {
 	.alloc = intel_irq_remapping_alloc,
 	.free = intel_irq_remapping_free,
 	.activate = intel_irq_remapping_activate,
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index cf7ca7e70777..3f6ea160afed 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -915,13 +915,7 @@ static int get_pci_alias_or_group(struct pci_dev *pdev, u16 alias, void *opaque)
  */
 struct iommu_group *generic_device_group(struct device *dev)
 {
-	struct iommu_group *group;
-
-	group = iommu_group_alloc();
-	if (IS_ERR(group))
-		return NULL;
-
-	return group;
+	return iommu_group_alloc();
 }
 
 /*
@@ -988,11 +982,7 @@ struct iommu_group *pci_device_group(struct device *dev)
 		return group;
 
 	/* No shared group found, allocate new */
-	group = iommu_group_alloc();
-	if (IS_ERR(group))
-		return NULL;
-
-	return group;
+	return iommu_group_alloc();
 }
 
 /**
@@ -1020,6 +1010,9 @@ struct iommu_group *iommu_group_get_for_dev(struct device *dev)
 	if (ops && ops->device_group)
 		group = ops->device_group(dev);
 
+	if (WARN_ON_ONCE(group == NULL))
+		return ERR_PTR(-EINVAL);
+
 	if (IS_ERR(group))
 		return group;
 
diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c
index 5c88ba70e4e0..246f14c83944 100644
--- a/drivers/iommu/iova.c
+++ b/drivers/iommu/iova.c
@@ -22,6 +22,7 @@
 #include <linux/slab.h>
 #include <linux/smp.h>
 #include <linux/bitops.h>
+#include <linux/cpu.h>
 
 static bool iova_rcache_insert(struct iova_domain *iovad,
 			       unsigned long pfn,
@@ -48,7 +49,7 @@ init_iova_domain(struct iova_domain *iovad, unsigned long granule,
 	iovad->cached32_node = NULL;
 	iovad->granule = granule;
 	iovad->start_pfn = start_pfn;
-	iovad->dma_32bit_pfn = pfn_32bit;
+	iovad->dma_32bit_pfn = pfn_32bit + 1;
 	init_iova_rcaches(iovad);
 }
 EXPORT_SYMBOL_GPL(init_iova_domain);
@@ -63,7 +64,7 @@ __get_cached_rbnode(struct iova_domain *iovad, unsigned long *limit_pfn)
 		struct rb_node *prev_node = rb_prev(iovad->cached32_node);
 		struct iova *curr_iova =
 			rb_entry(iovad->cached32_node, struct iova, node);
-		*limit_pfn = curr_iova->pfn_lo - 1;
+		*limit_pfn = curr_iova->pfn_lo;
 		return prev_node;
 	}
 }
@@ -135,7 +136,7 @@ iova_insert_rbtree(struct rb_root *root, struct iova *iova,
 static unsigned int
 iova_get_pad_size(unsigned int size, unsigned int limit_pfn)
 {
-	return (limit_pfn + 1 - size) & (__roundup_pow_of_two(size) - 1);
+	return (limit_pfn - size) & (__roundup_pow_of_two(size) - 1);
 }
 
 static int __alloc_and_insert_iova_range(struct iova_domain *iovad,
@@ -155,18 +156,15 @@ static int __alloc_and_insert_iova_range(struct iova_domain *iovad,
 	while (curr) {
 		struct iova *curr_iova = rb_entry(curr, struct iova, node);
 
-		if (limit_pfn < curr_iova->pfn_lo)
+		if (limit_pfn <= curr_iova->pfn_lo) {
 			goto move_left;
-		else if (limit_pfn < curr_iova->pfn_hi)
-			goto adjust_limit_pfn;
-		else {
+		} else if (limit_pfn > curr_iova->pfn_hi) {
 			if (size_aligned)
 				pad_size = iova_get_pad_size(size, limit_pfn);
-			if ((curr_iova->pfn_hi + size + pad_size) <= limit_pfn)
+			if ((curr_iova->pfn_hi + size + pad_size) < limit_pfn)
 				break;	/* found a free slot */
 		}
-adjust_limit_pfn:
-		limit_pfn = curr_iova->pfn_lo ? (curr_iova->pfn_lo - 1) : 0;
+		limit_pfn = curr_iova->pfn_lo;
 move_left:
 		prev = curr;
 		curr = rb_prev(curr);
@@ -182,7 +180,7 @@ move_left:
 	}
 
 	/* pfn_lo will point to size aligned address if size_aligned is set */
-	new->pfn_lo = limit_pfn - (size + pad_size) + 1;
+	new->pfn_lo = limit_pfn - (size + pad_size);
 	new->pfn_hi = new->pfn_lo + size - 1;
 
 	/* If we have 'prev', it's a valid place to start the insertion. */
@@ -269,7 +267,7 @@ alloc_iova(struct iova_domain *iovad, unsigned long size,
 	if (!new_iova)
 		return NULL;
 
-	ret = __alloc_and_insert_iova_range(iovad, size, limit_pfn,
+	ret = __alloc_and_insert_iova_range(iovad, size, limit_pfn + 1,
 			new_iova, size_aligned);
 
 	if (ret) {
@@ -398,10 +396,8 @@ retry:
 
 		/* Try replenishing IOVAs by flushing rcache. */
 		flushed_rcache = true;
-		preempt_disable();
 		for_each_online_cpu(cpu)
 			free_cpu_cached_iovas(cpu, iovad);
-		preempt_enable();
 		goto retry;
 	}
 
@@ -729,7 +725,7 @@ static bool __iova_rcache_insert(struct iova_domain *iovad,
 	bool can_insert = false;
 	unsigned long flags;
 
-	cpu_rcache = get_cpu_ptr(rcache->cpu_rcaches);
+	cpu_rcache = raw_cpu_ptr(rcache->cpu_rcaches);
 	spin_lock_irqsave(&cpu_rcache->lock, flags);
 
 	if (!iova_magazine_full(cpu_rcache->loaded)) {
@@ -759,7 +755,6 @@ static bool __iova_rcache_insert(struct iova_domain *iovad,
 		iova_magazine_push(cpu_rcache->loaded, iova_pfn);
 
 	spin_unlock_irqrestore(&cpu_rcache->lock, flags);
-	put_cpu_ptr(rcache->cpu_rcaches);
 
 	if (mag_to_free) {
 		iova_magazine_free_pfns(mag_to_free, iovad);
@@ -793,7 +788,7 @@ static unsigned long __iova_rcache_get(struct iova_rcache *rcache,
 	bool has_pfn = false;
 	unsigned long flags;
 
-	cpu_rcache = get_cpu_ptr(rcache->cpu_rcaches);
+	cpu_rcache = raw_cpu_ptr(rcache->cpu_rcaches);
 	spin_lock_irqsave(&cpu_rcache->lock, flags);
 
 	if (!iova_magazine_empty(cpu_rcache->loaded)) {
@@ -815,7 +810,6 @@ static unsigned long __iova_rcache_get(struct iova_rcache *rcache,
 		iova_pfn = iova_magazine_pop(cpu_rcache->loaded, limit_pfn);
 
 	spin_unlock_irqrestore(&cpu_rcache->lock, flags);
-	put_cpu_ptr(rcache->cpu_rcaches);
 
 	return iova_pfn;
 }
diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c
index b7e14ee863f9..2a38aa15be17 100644
--- a/drivers/iommu/ipmmu-vmsa.c
+++ b/drivers/iommu/ipmmu-vmsa.c
@@ -8,7 +8,9 @@
  * the Free Software Foundation; version 2 of the License.
  */
 
+#include <linux/bitmap.h>
 #include <linux/delay.h>
+#include <linux/dma-iommu.h>
 #include <linux/dma-mapping.h>
 #include <linux/err.h>
 #include <linux/export.h>
@@ -21,17 +23,24 @@
 #include <linux/sizes.h>
 #include <linux/slab.h>
 
+#if defined(CONFIG_ARM) && !defined(CONFIG_IOMMU_DMA)
 #include <asm/dma-iommu.h>
 #include <asm/pgalloc.h>
+#endif
 
 #include "io-pgtable.h"
 
+#define IPMMU_CTX_MAX 1
+
 struct ipmmu_vmsa_device {
 	struct device *dev;
 	void __iomem *base;
 	struct list_head list;
 
 	unsigned int num_utlbs;
+	spinlock_t lock;			/* Protects ctx and domains[] */
+	DECLARE_BITMAP(ctx, IPMMU_CTX_MAX);
+	struct ipmmu_vmsa_domain *domains[IPMMU_CTX_MAX];
 
 	struct dma_iommu_mapping *mapping;
 };
@@ -47,10 +56,12 @@ struct ipmmu_vmsa_domain {
 	spinlock_t lock;			/* Protects mappings */
 };
 
-struct ipmmu_vmsa_archdata {
+struct ipmmu_vmsa_iommu_priv {
 	struct ipmmu_vmsa_device *mmu;
 	unsigned int *utlbs;
 	unsigned int num_utlbs;
+	struct device *dev;
+	struct list_head list;
 };
 
 static DEFINE_SPINLOCK(ipmmu_devices_lock);
@@ -61,6 +72,24 @@ static struct ipmmu_vmsa_domain *to_vmsa_domain(struct iommu_domain *dom)
 	return container_of(dom, struct ipmmu_vmsa_domain, io_domain);
 }
 
+
+static struct ipmmu_vmsa_iommu_priv *to_priv(struct device *dev)
+{
+#if defined(CONFIG_ARM)
+	return dev->archdata.iommu;
+#else
+	return dev->iommu_fwspec->iommu_priv;
+#endif
+}
+static void set_priv(struct device *dev, struct ipmmu_vmsa_iommu_priv *p)
+{
+#if defined(CONFIG_ARM)
+	dev->archdata.iommu = p;
+#else
+	dev->iommu_fwspec->iommu_priv = p;
+#endif
+}
+
 #define TLB_LOOP_TIMEOUT		100	/* 100us */
 
 /* -----------------------------------------------------------------------------
@@ -293,9 +322,29 @@ static struct iommu_gather_ops ipmmu_gather_ops = {
  * Domain/Context Management
  */
 
+static int ipmmu_domain_allocate_context(struct ipmmu_vmsa_device *mmu,
+					 struct ipmmu_vmsa_domain *domain)
+{
+	unsigned long flags;
+	int ret;
+
+	spin_lock_irqsave(&mmu->lock, flags);
+
+	ret = find_first_zero_bit(mmu->ctx, IPMMU_CTX_MAX);
+	if (ret != IPMMU_CTX_MAX) {
+		mmu->domains[ret] = domain;
+		set_bit(ret, mmu->ctx);
+	}
+
+	spin_unlock_irqrestore(&mmu->lock, flags);
+
+	return ret;
+}
+
 static int ipmmu_domain_init_context(struct ipmmu_vmsa_domain *domain)
 {
 	u64 ttbr;
+	int ret;
 
 	/*
 	 * Allocate the page table operations.
@@ -309,7 +358,7 @@ static int ipmmu_domain_init_context(struct ipmmu_vmsa_domain *domain)
 	 * non-secure mode.
 	 */
 	domain->cfg.quirks = IO_PGTABLE_QUIRK_ARM_NS;
-	domain->cfg.pgsize_bitmap = SZ_1G | SZ_2M | SZ_4K,
+	domain->cfg.pgsize_bitmap = SZ_1G | SZ_2M | SZ_4K;
 	domain->cfg.ias = 32;
 	domain->cfg.oas = 40;
 	domain->cfg.tlb = &ipmmu_gather_ops;
@@ -327,10 +376,15 @@ static int ipmmu_domain_init_context(struct ipmmu_vmsa_domain *domain)
 		return -EINVAL;
 
 	/*
-	 * TODO: When adding support for multiple contexts, find an unused
-	 * context.
+	 * Find an unused context.
 	 */
-	domain->context_id = 0;
+	ret = ipmmu_domain_allocate_context(domain->mmu, domain);
+	if (ret == IPMMU_CTX_MAX) {
+		free_io_pgtable_ops(domain->iop);
+		return -EBUSY;
+	}
+
+	domain->context_id = ret;
 
 	/* TTBR0 */
 	ttbr = domain->cfg.arm_lpae_s1_cfg.ttbr[0];
@@ -372,6 +426,19 @@ static int ipmmu_domain_init_context(struct ipmmu_vmsa_domain *domain)
 	return 0;
 }
 
+static void ipmmu_domain_free_context(struct ipmmu_vmsa_device *mmu,
+				      unsigned int context_id)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&mmu->lock, flags);
+
+	clear_bit(context_id, mmu->ctx);
+	mmu->domains[context_id] = NULL;
+
+	spin_unlock_irqrestore(&mmu->lock, flags);
+}
+
 static void ipmmu_domain_destroy_context(struct ipmmu_vmsa_domain *domain)
 {
 	/*
@@ -382,6 +449,7 @@ static void ipmmu_domain_destroy_context(struct ipmmu_vmsa_domain *domain)
 	 */
 	ipmmu_ctx_write(domain, IMCTR, IMCTR_FLUSH);
 	ipmmu_tlb_sync(domain);
+	ipmmu_domain_free_context(domain->mmu, domain->context_id);
 }
 
 /* -----------------------------------------------------------------------------
@@ -439,29 +507,35 @@ static irqreturn_t ipmmu_domain_irq(struct ipmmu_vmsa_domain *domain)
 static irqreturn_t ipmmu_irq(int irq, void *dev)
 {
 	struct ipmmu_vmsa_device *mmu = dev;
-	struct iommu_domain *io_domain;
-	struct ipmmu_vmsa_domain *domain;
+	irqreturn_t status = IRQ_NONE;
+	unsigned int i;
+	unsigned long flags;
 
-	if (!mmu->mapping)
-		return IRQ_NONE;
+	spin_lock_irqsave(&mmu->lock, flags);
+
+	/*
+	 * Check interrupts for all active contexts.
+	 */
+	for (i = 0; i < IPMMU_CTX_MAX; i++) {
+		if (!mmu->domains[i])
+			continue;
+		if (ipmmu_domain_irq(mmu->domains[i]) == IRQ_HANDLED)
+			status = IRQ_HANDLED;
+	}
 
-	io_domain = mmu->mapping->domain;
-	domain = to_vmsa_domain(io_domain);
+	spin_unlock_irqrestore(&mmu->lock, flags);
 
-	return ipmmu_domain_irq(domain);
+	return status;
 }
 
 /* -----------------------------------------------------------------------------
  * IOMMU Operations
  */
 
-static struct iommu_domain *ipmmu_domain_alloc(unsigned type)
+static struct iommu_domain *__ipmmu_domain_alloc(unsigned type)
 {
 	struct ipmmu_vmsa_domain *domain;
 
-	if (type != IOMMU_DOMAIN_UNMANAGED)
-		return NULL;
-
 	domain = kzalloc(sizeof(*domain), GFP_KERNEL);
 	if (!domain)
 		return NULL;
@@ -487,8 +561,8 @@ static void ipmmu_domain_free(struct iommu_domain *io_domain)
 static int ipmmu_attach_device(struct iommu_domain *io_domain,
 			       struct device *dev)
 {
-	struct ipmmu_vmsa_archdata *archdata = dev->archdata.iommu;
-	struct ipmmu_vmsa_device *mmu = archdata->mmu;
+	struct ipmmu_vmsa_iommu_priv *priv = to_priv(dev);
+	struct ipmmu_vmsa_device *mmu = priv->mmu;
 	struct ipmmu_vmsa_domain *domain = to_vmsa_domain(io_domain);
 	unsigned long flags;
 	unsigned int i;
@@ -513,15 +587,16 @@ static int ipmmu_attach_device(struct iommu_domain *io_domain,
 		dev_err(dev, "Can't attach IPMMU %s to domain on IPMMU %s\n",
 			dev_name(mmu->dev), dev_name(domain->mmu->dev));
 		ret = -EINVAL;
-	}
+	} else
+		dev_info(dev, "Reusing IPMMU context %u\n", domain->context_id);
 
 	spin_unlock_irqrestore(&domain->lock, flags);
 
 	if (ret < 0)
 		return ret;
 
-	for (i = 0; i < archdata->num_utlbs; ++i)
-		ipmmu_utlb_enable(domain, archdata->utlbs[i]);
+	for (i = 0; i < priv->num_utlbs; ++i)
+		ipmmu_utlb_enable(domain, priv->utlbs[i]);
 
 	return 0;
 }
@@ -529,12 +604,12 @@ static int ipmmu_attach_device(struct iommu_domain *io_domain,
 static void ipmmu_detach_device(struct iommu_domain *io_domain,
 				struct device *dev)
 {
-	struct ipmmu_vmsa_archdata *archdata = dev->archdata.iommu;
+	struct ipmmu_vmsa_iommu_priv *priv = to_priv(dev);
 	struct ipmmu_vmsa_domain *domain = to_vmsa_domain(io_domain);
 	unsigned int i;
 
-	for (i = 0; i < archdata->num_utlbs; ++i)
-		ipmmu_utlb_disable(domain, archdata->utlbs[i]);
+	for (i = 0; i < priv->num_utlbs; ++i)
+		ipmmu_utlb_disable(domain, priv->utlbs[i]);
 
 	/*
 	 * TODO: Optimize by disabling the context when no device is attached.
@@ -595,22 +670,15 @@ static int ipmmu_find_utlbs(struct ipmmu_vmsa_device *mmu, struct device *dev,
 	return 0;
 }
 
-static int ipmmu_add_device(struct device *dev)
+static int ipmmu_init_platform_device(struct device *dev)
 {
-	struct ipmmu_vmsa_archdata *archdata;
+	struct ipmmu_vmsa_iommu_priv *priv;
 	struct ipmmu_vmsa_device *mmu;
-	struct iommu_group *group = NULL;
 	unsigned int *utlbs;
 	unsigned int i;
 	int num_utlbs;
 	int ret = -ENODEV;
 
-	if (dev->archdata.iommu) {
-		dev_warn(dev, "IOMMU driver already assigned to device %s\n",
-			 dev_name(dev));
-		return -EINVAL;
-	}
-
 	/* Find the master corresponding to the device. */
 
 	num_utlbs = of_count_phandle_with_args(dev->of_node, "iommus",
@@ -647,6 +715,46 @@ static int ipmmu_add_device(struct device *dev)
 		}
 	}
 
+	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+	if (!priv) {
+		ret = -ENOMEM;
+		goto error;
+	}
+
+	priv->mmu = mmu;
+	priv->utlbs = utlbs;
+	priv->num_utlbs = num_utlbs;
+	priv->dev = dev;
+	set_priv(dev, priv);
+	return 0;
+
+error:
+	kfree(utlbs);
+	return ret;
+}
+
+#if defined(CONFIG_ARM) && !defined(CONFIG_IOMMU_DMA)
+
+static struct iommu_domain *ipmmu_domain_alloc(unsigned type)
+{
+	if (type != IOMMU_DOMAIN_UNMANAGED)
+		return NULL;
+
+	return __ipmmu_domain_alloc(type);
+}
+
+static int ipmmu_add_device(struct device *dev)
+{
+	struct ipmmu_vmsa_device *mmu = NULL;
+	struct iommu_group *group;
+	int ret;
+
+	if (to_priv(dev)) {
+		dev_warn(dev, "IOMMU driver already assigned to device %s\n",
+			 dev_name(dev));
+		return -EINVAL;
+	}
+
 	/* Create a device group and add the device to it. */
 	group = iommu_group_alloc();
 	if (IS_ERR(group)) {
@@ -664,16 +772,9 @@ static int ipmmu_add_device(struct device *dev)
 		goto error;
 	}
 
-	archdata = kzalloc(sizeof(*archdata), GFP_KERNEL);
-	if (!archdata) {
-		ret = -ENOMEM;
+	ret = ipmmu_init_platform_device(dev);
+	if (ret < 0)
 		goto error;
-	}
-
-	archdata->mmu = mmu;
-	archdata->utlbs = utlbs;
-	archdata->num_utlbs = num_utlbs;
-	dev->archdata.iommu = archdata;
 
 	/*
 	 * Create the ARM mapping, used by the ARM DMA mapping core to allocate
@@ -684,6 +785,7 @@ static int ipmmu_add_device(struct device *dev)
 	 * - Make the mapping size configurable ? We currently use a 2GB mapping
 	 *   at a 1GB offset to ensure that NULL VAs will fault.
 	 */
+	mmu = to_priv(dev)->mmu;
 	if (!mmu->mapping) {
 		struct dma_iommu_mapping *mapping;
 
@@ -708,30 +810,30 @@ static int ipmmu_add_device(struct device *dev)
 	return 0;
 
 error:
-	arm_iommu_release_mapping(mmu->mapping);
-
-	kfree(dev->archdata.iommu);
-	kfree(utlbs);
-
-	dev->archdata.iommu = NULL;
+	if (mmu)
+		arm_iommu_release_mapping(mmu->mapping);
 
 	if (!IS_ERR_OR_NULL(group))
 		iommu_group_remove_device(dev);
 
+	kfree(to_priv(dev)->utlbs);
+	kfree(to_priv(dev));
+	set_priv(dev, NULL);
+
 	return ret;
 }
 
 static void ipmmu_remove_device(struct device *dev)
 {
-	struct ipmmu_vmsa_archdata *archdata = dev->archdata.iommu;
+	struct ipmmu_vmsa_iommu_priv *priv = to_priv(dev);
 
 	arm_iommu_detach_device(dev);
 	iommu_group_remove_device(dev);
 
-	kfree(archdata->utlbs);
-	kfree(archdata);
+	kfree(priv->utlbs);
+	kfree(priv);
 
-	dev->archdata.iommu = NULL;
+	set_priv(dev, NULL);
 }
 
 static const struct iommu_ops ipmmu_ops = {
@@ -748,6 +850,144 @@ static const struct iommu_ops ipmmu_ops = {
 	.pgsize_bitmap = SZ_1G | SZ_2M | SZ_4K,
 };
 
+#endif /* !CONFIG_ARM && CONFIG_IOMMU_DMA */
+
+#ifdef CONFIG_IOMMU_DMA
+
+static DEFINE_SPINLOCK(ipmmu_slave_devices_lock);
+static LIST_HEAD(ipmmu_slave_devices);
+
+static struct iommu_domain *ipmmu_domain_alloc_dma(unsigned type)
+{
+	struct iommu_domain *io_domain = NULL;
+
+	switch (type) {
+	case IOMMU_DOMAIN_UNMANAGED:
+		io_domain = __ipmmu_domain_alloc(type);
+		break;
+
+	case IOMMU_DOMAIN_DMA:
+		io_domain = __ipmmu_domain_alloc(type);
+		if (io_domain)
+			iommu_get_dma_cookie(io_domain);
+		break;
+	}
+
+	return io_domain;
+}
+
+static void ipmmu_domain_free_dma(struct iommu_domain *io_domain)
+{
+	switch (io_domain->type) {
+	case IOMMU_DOMAIN_DMA:
+		iommu_put_dma_cookie(io_domain);
+		/* fall-through */
+	default:
+		ipmmu_domain_free(io_domain);
+		break;
+	}
+}
+
+static int ipmmu_add_device_dma(struct device *dev)
+{
+	struct iommu_fwspec *fwspec = dev->iommu_fwspec;
+	struct iommu_group *group;
+
+	/*
+	 * Only let through devices that have been verified in xlate()
+	 * We may get called with dev->iommu_fwspec set to NULL.
+	 */
+	if (!fwspec || !fwspec->iommu_priv)
+		return -ENODEV;
+
+	group = iommu_group_get_for_dev(dev);
+	if (IS_ERR(group))
+		return PTR_ERR(group);
+
+	spin_lock(&ipmmu_slave_devices_lock);
+	list_add(&to_priv(dev)->list, &ipmmu_slave_devices);
+	spin_unlock(&ipmmu_slave_devices_lock);
+	return 0;
+}
+
+static void ipmmu_remove_device_dma(struct device *dev)
+{
+	struct ipmmu_vmsa_iommu_priv *priv = to_priv(dev);
+
+	spin_lock(&ipmmu_slave_devices_lock);
+	list_del(&priv->list);
+	spin_unlock(&ipmmu_slave_devices_lock);
+
+	iommu_group_remove_device(dev);
+}
+
+static struct device *ipmmu_find_sibling_device(struct device *dev)
+{
+	struct ipmmu_vmsa_iommu_priv *priv = to_priv(dev);
+	struct ipmmu_vmsa_iommu_priv *sibling_priv = NULL;
+	bool found = false;
+
+	spin_lock(&ipmmu_slave_devices_lock);
+
+	list_for_each_entry(sibling_priv, &ipmmu_slave_devices, list) {
+		if (priv == sibling_priv)
+			continue;
+		if (sibling_priv->mmu == priv->mmu) {
+			found = true;
+			break;
+		}
+	}
+
+	spin_unlock(&ipmmu_slave_devices_lock);
+
+	return found ? sibling_priv->dev : NULL;
+}
+
+static struct iommu_group *ipmmu_find_group_dma(struct device *dev)
+{
+	struct iommu_group *group;
+	struct device *sibling;
+
+	sibling = ipmmu_find_sibling_device(dev);
+	if (sibling)
+		group = iommu_group_get(sibling);
+	if (!sibling || IS_ERR(group))
+		group = generic_device_group(dev);
+
+	return group;
+}
+
+static int ipmmu_of_xlate_dma(struct device *dev,
+			      struct of_phandle_args *spec)
+{
+	/* If the IPMMU device is disabled in DT then return error
+	 * to make sure the of_iommu code does not install ops
+	 * even though the iommu device is disabled
+	 */
+	if (!of_device_is_available(spec->np))
+		return -ENODEV;
+
+	return ipmmu_init_platform_device(dev);
+}
+
+static const struct iommu_ops ipmmu_ops = {
+	.domain_alloc = ipmmu_domain_alloc_dma,
+	.domain_free = ipmmu_domain_free_dma,
+	.attach_dev = ipmmu_attach_device,
+	.detach_dev = ipmmu_detach_device,
+	.map = ipmmu_map,
+	.unmap = ipmmu_unmap,
+	.map_sg = default_iommu_map_sg,
+	.iova_to_phys = ipmmu_iova_to_phys,
+	.add_device = ipmmu_add_device_dma,
+	.remove_device = ipmmu_remove_device_dma,
+	.device_group = ipmmu_find_group_dma,
+	.pgsize_bitmap = SZ_1G | SZ_2M | SZ_4K,
+	.of_xlate = ipmmu_of_xlate_dma,
+};
+
+#endif /* CONFIG_IOMMU_DMA */
+
 /* -----------------------------------------------------------------------------
  * Probe/remove and init
  */
@@ -768,11 +1008,6 @@ static int ipmmu_probe(struct platform_device *pdev)
 	int irq;
 	int ret;
 
-	if (!IS_ENABLED(CONFIG_OF) && !pdev->dev.platform_data) {
-		dev_err(&pdev->dev, "missing platform data\n");
-		return -EINVAL;
-	}
-
 	mmu = devm_kzalloc(&pdev->dev, sizeof(*mmu), GFP_KERNEL);
 	if (!mmu) {
 		dev_err(&pdev->dev, "cannot allocate device data\n");
@@ -781,6 +1016,8 @@ static int ipmmu_probe(struct platform_device *pdev)
 
 	mmu->dev = &pdev->dev;
 	mmu->num_utlbs = 32;
+	spin_lock_init(&mmu->lock);
+	bitmap_zero(mmu->ctx, IPMMU_CTX_MAX);
 
 	/* Map I/O memory and request IRQ. */
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
@@ -840,7 +1077,9 @@ static int ipmmu_remove(struct platform_device *pdev)
 	list_del(&mmu->list);
 	spin_unlock(&ipmmu_devices_lock);
 
+#if defined(CONFIG_ARM) && !defined(CONFIG_IOMMU_DMA)
 	arm_iommu_release_mapping(mmu->mapping);
+#endif
 
 	ipmmu_device_reset(mmu);
 
diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c
index 95dfca36ccb9..641e035cf866 100644
--- a/drivers/iommu/omap-iommu.c
+++ b/drivers/iommu/omap-iommu.c
@@ -1309,7 +1309,7 @@ static void omap_iommu_remove_device(struct device *dev)
 static struct iommu_group *omap_iommu_device_group(struct device *dev)
 {
 	struct omap_iommu_arch_data *arch_data = dev->archdata.iommu;
-	struct iommu_group *group = NULL;
+	struct iommu_group *group = ERR_PTR(-EINVAL);
 
 	if (arch_data->iommu_dev)
 		group = arch_data->iommu_dev->group;
diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c
index 179e636a4d91..8788640756a7 100644
--- a/drivers/iommu/s390-iommu.c
+++ b/drivers/iommu/s390-iommu.c
@@ -165,20 +165,14 @@ static void s390_iommu_detach_device(struct iommu_domain *domain,
 
 static int s390_iommu_add_device(struct device *dev)
 {
-	struct iommu_group *group;
-	int rc;
+	struct iommu_group *group = iommu_group_get_for_dev(dev);
 
-	group = iommu_group_get(dev);
-	if (!group) {
-		group = iommu_group_alloc();
-		if (IS_ERR(group))
-			return PTR_ERR(group);
-	}
+	if (IS_ERR(group))
+		return PTR_ERR(group);
 
-	rc = iommu_group_add_device(group, dev);
 	iommu_group_put(group);
 
-	return rc;
+	return 0;
 }
 
 static void s390_iommu_remove_device(struct device *dev)
@@ -344,6 +338,7 @@ static struct iommu_ops s390_iommu_ops = {
 	.iova_to_phys = s390_iommu_iova_to_phys,
 	.add_device = s390_iommu_add_device,
 	.remove_device = s390_iommu_remove_device,
+	.device_group = generic_device_group,
 	.pgsize_bitmap = S390_IOMMU_PGSIZES,
 };