From d87beb749281404b4b4919930b1cc6352e3746f2 Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Wed, 31 May 2017 18:52:29 +0100
Subject: iommu/of: Handle PCI aliases properly

When a PCI device has DMA quirks, we need to ensure that an upstream
IOMMU knows about all possible aliases, since the presence of a DMA
quirk does not preclude the device still also emitting transactions
(e.g. MSIs) on its 'real' RID. Similarly, the rules for bridge aliasing
are relatively complex, and some bridges may only take ownership of
transactions under particular transient circumstances, leading again to
multiple RIDs potentially being seen at the IOMMU for the given device.

Take all this into account in the OF code by translating every RID
produced by the alias walk, not just whichever one comes out last.
Happily, this also makes things tidy enough that we can reduce the
number of both total lines of code, and confusing levels of indirection,
by pulling the "iommus"/"iommu-map" parsing helpers back in-line again.

Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/of_iommu.c | 104 +++++++++++++++++++++--------------------------
 1 file changed, 47 insertions(+), 57 deletions(-)

(limited to 'drivers/iommu')

diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c
index 8cb60829a7a1..be8ac1ddec06 100644
--- a/drivers/iommu/of_iommu.c
+++ b/drivers/iommu/of_iommu.c
@@ -140,75 +140,39 @@ static const struct iommu_ops
 	return ops;
 }
 
-static int __get_pci_rid(struct pci_dev *pdev, u16 alias, void *data)
-{
-	struct of_phandle_args *iommu_spec = data;
-
-	iommu_spec->args[0] = alias;
-	return iommu_spec->np == pdev->bus->dev.of_node;
-}
+struct of_pci_iommu_alias_info {
+	struct device *dev;
+	struct device_node *np;
+};
 
-static const struct iommu_ops
-*of_pci_iommu_init(struct pci_dev *pdev, struct device_node *bridge_np)
+static int of_pci_iommu_init(struct pci_dev *pdev, u16 alias, void *data)
 {
+	struct of_pci_iommu_alias_info *info = data;
 	const struct iommu_ops *ops;
-	struct of_phandle_args iommu_spec;
+	struct of_phandle_args iommu_spec = { .args_count = 1 };
 	int err;
 
-	/*
-	 * Start by tracing the RID alias down the PCI topology as
-	 * far as the host bridge whose OF node we have...
-	 * (we're not even attempting to handle multi-alias devices yet)
-	 */
-	iommu_spec.args_count = 1;
-	iommu_spec.np = bridge_np;
-	pci_for_each_dma_alias(pdev, __get_pci_rid, &iommu_spec);
-	/*
-	 * ...then find out what that becomes once it escapes the PCI
-	 * bus into the system beyond, and which IOMMU it ends up at.
-	 */
-	iommu_spec.np = NULL;
-	err = of_pci_map_rid(bridge_np, iommu_spec.args[0], "iommu-map",
+	err = of_pci_map_rid(info->np, alias, "iommu-map",
 			     "iommu-map-mask", &iommu_spec.np,
 			     iommu_spec.args);
 	if (err)
-		return err == -ENODEV ? NULL : ERR_PTR(err);
-
-	ops = of_iommu_xlate(&pdev->dev, &iommu_spec);
+		return err == -ENODEV ? 1 : err;
 
+	ops = of_iommu_xlate(info->dev, &iommu_spec);
 	of_node_put(iommu_spec.np);
-	return ops;
-}
-
-static const struct iommu_ops
-*of_platform_iommu_init(struct device *dev, struct device_node *np)
-{
-	struct of_phandle_args iommu_spec;
-	const struct iommu_ops *ops = NULL;
-	int idx = 0;
 
-	/*
-	 * We don't currently walk up the tree looking for a parent IOMMU.
-	 * See the `Notes:' section of
-	 * Documentation/devicetree/bindings/iommu/iommu.txt
-	 */
-	while (!of_parse_phandle_with_args(np, "iommus", "#iommu-cells",
-					   idx, &iommu_spec)) {
-		ops = of_iommu_xlate(dev, &iommu_spec);
-		of_node_put(iommu_spec.np);
-		idx++;
-		if (IS_ERR_OR_NULL(ops))
-			break;
-	}
+	if (IS_ERR(ops))
+		return PTR_ERR(ops);
 
-	return ops;
+	return info->np == pdev->bus->dev.of_node;
 }
 
 const struct iommu_ops *of_iommu_configure(struct device *dev,
 					   struct device_node *master_np)
 {
-	const struct iommu_ops *ops;
+	const struct iommu_ops *ops = NULL;
 	struct iommu_fwspec *fwspec = dev->iommu_fwspec;
+	int err;
 
 	if (!master_np)
 		return NULL;
@@ -221,18 +185,44 @@ const struct iommu_ops *of_iommu_configure(struct device *dev,
 		iommu_fwspec_free(dev);
 	}
 
-	if (dev_is_pci(dev))
-		ops = of_pci_iommu_init(to_pci_dev(dev), master_np);
-	else
-		ops = of_platform_iommu_init(dev, master_np);
+	/*
+	 * We don't currently walk up the tree looking for a parent IOMMU.
+	 * See the `Notes:' section of
+	 * Documentation/devicetree/bindings/iommu/iommu.txt
+	 */
+	if (dev_is_pci(dev)) {
+		struct of_pci_iommu_alias_info info = {
+			.dev = dev,
+			.np = master_np,
+		};
+
+		err = pci_for_each_dma_alias(to_pci_dev(dev),
+					     of_pci_iommu_init, &info);
+		if (err) /* err > 0 means the walk stopped, but non-fatally */
+			ops = ERR_PTR(min(err, 0));
+		else /* success implies both fwspec and ops are now valid */
+			ops = dev->iommu_fwspec->ops;
+	} else {
+		struct of_phandle_args iommu_spec;
+		int idx = 0;
+
+		while (!of_parse_phandle_with_args(master_np, "iommus",
+						   "#iommu-cells",
+						   idx, &iommu_spec)) {
+			ops = of_iommu_xlate(dev, &iommu_spec);
+			of_node_put(iommu_spec.np);
+			idx++;
+			if (IS_ERR_OR_NULL(ops))
+				break;
+		}
+	}
 	/*
 	 * If we have reason to believe the IOMMU driver missed the initial
 	 * add_device callback for dev, replay it to get things in order.
 	 */
 	if (!IS_ERR_OR_NULL(ops) && ops->add_device &&
 	    dev->bus && !dev->iommu_group) {
-		int err = ops->add_device(dev);
-
+		err = ops->add_device(dev);
 		if (err)
 			ops = ERR_PTR(err);
 	}
-- 
cgit 


From 6bd4f1c754b2fafac403073b0d8469bed1d37e2d Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Tue, 18 Jul 2017 16:43:09 -0500
Subject: iommu: Convert to using %pOF instead of full_name

Now that we have a custom printf format specifier, convert users of
full_name to use %pOF instead. This is preparation to remove storing
of the full path string for each node.

Signed-off-by: Rob Herring <robh@kernel.org>
Cc: Joerg Roedel <joro@8bytes.org>
Cc: Heiko Stuebner <heiko@sntech.de>
Cc: iommu@lists.linux-foundation.org
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-rockchip@lists.infradead.org
Reviewed-by: Heiko Stuebner <heiko@sntech.de>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/fsl_pamu.c        | 20 ++++++++------------
 drivers/iommu/fsl_pamu_domain.c | 10 ++++------
 drivers/iommu/of_iommu.c        |  3 +--
 drivers/iommu/rockchip-iommu.c  | 10 +++++-----
 4 files changed, 18 insertions(+), 25 deletions(-)

(limited to 'drivers/iommu')

diff --git a/drivers/iommu/fsl_pamu.c b/drivers/iommu/fsl_pamu.c
index a34355fca37a..919ad9045ac4 100644
--- a/drivers/iommu/fsl_pamu.c
+++ b/drivers/iommu/fsl_pamu.c
@@ -530,8 +530,8 @@ u32 get_stash_id(u32 stash_dest_hint, u32 vcpu)
 		if (node) {
 			prop = of_get_property(node, "cache-stash-id", NULL);
 			if (!prop) {
-				pr_debug("missing cache-stash-id at %s\n",
-					 node->full_name);
+				pr_debug("missing cache-stash-id at %pOF\n",
+					 node);
 				of_node_put(node);
 				return ~(u32)0;
 			}
@@ -557,8 +557,8 @@ found_cpu_node:
 		if (stash_dest_hint == cache_level) {
 			prop = of_get_property(node, "cache-stash-id", NULL);
 			if (!prop) {
-				pr_debug("missing cache-stash-id at %s\n",
-					 node->full_name);
+				pr_debug("missing cache-stash-id at %pOF\n",
+					 node);
 				of_node_put(node);
 				return ~(u32)0;
 			}
@@ -568,8 +568,7 @@ found_cpu_node:
 
 		prop = of_get_property(node, "next-level-cache", NULL);
 		if (!prop) {
-			pr_debug("can't find next-level-cache at %s\n",
-				 node->full_name);
+			pr_debug("can't find next-level-cache at %pOF\n", node);
 			of_node_put(node);
 			return ~(u32)0;  /* can't traverse any further */
 		}
@@ -1063,8 +1062,7 @@ static int fsl_pamu_probe(struct platform_device *pdev)
 
 	guts_node = of_find_matching_node(NULL, guts_device_ids);
 	if (!guts_node) {
-		dev_err(dev, "could not find GUTS node %s\n",
-			dev->of_node->full_name);
+		dev_err(dev, "could not find GUTS node %pOF\n", dev->of_node);
 		ret = -ENODEV;
 		goto error;
 	}
@@ -1246,8 +1244,7 @@ static __init int fsl_pamu_init(void)
 
 	pdev = platform_device_alloc("fsl-of-pamu", 0);
 	if (!pdev) {
-		pr_err("could not allocate device %s\n",
-		       np->full_name);
+		pr_err("could not allocate device %pOF\n", np);
 		ret = -ENOMEM;
 		goto error_device_alloc;
 	}
@@ -1259,8 +1256,7 @@ static __init int fsl_pamu_init(void)
 
 	ret = platform_device_add(pdev);
 	if (ret) {
-		pr_err("could not add device %s (err=%i)\n",
-		       np->full_name, ret);
+		pr_err("could not add device %pOF (err=%i)\n", np, ret);
 		goto error_device_add;
 	}
 
diff --git a/drivers/iommu/fsl_pamu_domain.c b/drivers/iommu/fsl_pamu_domain.c
index da0e1e30ef37..01c73479345d 100644
--- a/drivers/iommu/fsl_pamu_domain.c
+++ b/drivers/iommu/fsl_pamu_domain.c
@@ -619,8 +619,8 @@ static int handle_attach_device(struct fsl_dma_domain *dma_domain,
 	for (i = 0; i < num; i++) {
 		/* Ensure that LIODN value is valid */
 		if (liodn[i] >= PAACE_NUMBER_ENTRIES) {
-			pr_debug("Invalid liodn %d, attach device failed for %s\n",
-				 liodn[i], dev->of_node->full_name);
+			pr_debug("Invalid liodn %d, attach device failed for %pOF\n",
+				 liodn[i], dev->of_node);
 			ret = -EINVAL;
 			break;
 		}
@@ -684,8 +684,7 @@ static int fsl_pamu_attach_device(struct iommu_domain *domain,
 		liodn_cnt = len / sizeof(u32);
 		ret = handle_attach_device(dma_domain, dev, liodn, liodn_cnt);
 	} else {
-		pr_debug("missing fsl,liodn property at %s\n",
-			 dev->of_node->full_name);
+		pr_debug("missing fsl,liodn property at %pOF\n", dev->of_node);
 		ret = -EINVAL;
 	}
 
@@ -720,8 +719,7 @@ static void fsl_pamu_detach_device(struct iommu_domain *domain,
 	if (prop)
 		detach_device(dev, dma_domain);
 	else
-		pr_debug("missing fsl,liodn property at %s\n",
-			 dev->of_node->full_name);
+		pr_debug("missing fsl,liodn property at %pOF\n", dev->of_node);
 }
 
 static  int configure_domain_geometry(struct iommu_domain *domain, void *data)
diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c
index be8ac1ddec06..34160e7a8dd7 100644
--- a/drivers/iommu/of_iommu.c
+++ b/drivers/iommu/of_iommu.c
@@ -245,8 +245,7 @@ static int __init of_iommu_init(void)
 		const of_iommu_init_fn init_fn = match->data;
 
 		if (init_fn && init_fn(np))
-			pr_err("Failed to initialise IOMMU %s\n",
-				of_node_full_name(np));
+			pr_err("Failed to initialise IOMMU %pOF\n", np);
 	}
 
 	return 0;
diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c
index 4ba48a26b389..1b8155dada26 100644
--- a/drivers/iommu/rockchip-iommu.c
+++ b/drivers/iommu/rockchip-iommu.c
@@ -1008,20 +1008,20 @@ static int rk_iommu_group_set_iommudata(struct iommu_group *group,
 	ret = of_parse_phandle_with_args(np, "iommus", "#iommu-cells", 0,
 					 &args);
 	if (ret) {
-		dev_err(dev, "of_parse_phandle_with_args(%s) => %d\n",
-			np->full_name, ret);
+		dev_err(dev, "of_parse_phandle_with_args(%pOF) => %d\n",
+			np, ret);
 		return ret;
 	}
 	if (args.args_count != 0) {
-		dev_err(dev, "incorrect number of iommu params found for %s (found %d, expected 0)\n",
-			args.np->full_name, args.args_count);
+		dev_err(dev, "incorrect number of iommu params found for %pOF (found %d, expected 0)\n",
+			args.np, args.args_count);
 		return -EINVAL;
 	}
 
 	pd = of_find_device_by_node(args.np);
 	of_node_put(args.np);
 	if (!pd) {
-		dev_err(dev, "iommu %s not found\n", args.np->full_name);
+		dev_err(dev, "iommu %pOF not found\n", args.np);
 		return -EPROBE_DEFER;
 	}
 
-- 
cgit 


From ce2eb8f44e60c748fac56ede46b526fdac773e1b Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Fri, 21 Jul 2017 13:12:35 +0100
Subject: iommu/msm: Add iommu_group support

As the last step to making groups mandatory, clean up the remaining
drivers by adding basic support. Whilst it may not perfectly reflect the
isolation capabilities of the hardware, using generic_device_group()
should at least maintain existing behaviour with respect to the API.

Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/msm_iommu.c | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

(limited to 'drivers/iommu')

diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c
index d0448353d501..04f4d51ffacb 100644
--- a/drivers/iommu/msm_iommu.c
+++ b/drivers/iommu/msm_iommu.c
@@ -393,6 +393,7 @@ static struct msm_iommu_dev *find_iommu_for_dev(struct device *dev)
 static int msm_iommu_add_device(struct device *dev)
 {
 	struct msm_iommu_dev *iommu;
+	struct iommu_group *group;
 	unsigned long flags;
 	int ret = 0;
 
@@ -406,7 +407,16 @@ static int msm_iommu_add_device(struct device *dev)
 
 	spin_unlock_irqrestore(&msm_iommu_lock, flags);
 
-	return ret;
+	if (ret)
+		return ret;
+
+	group = iommu_group_get_for_dev(dev);
+	if (IS_ERR(group))
+		return PTR_ERR(group);
+
+	iommu_group_put(group);
+
+	return 0;
 }
 
 static void msm_iommu_remove_device(struct device *dev)
@@ -421,6 +431,8 @@ static void msm_iommu_remove_device(struct device *dev)
 		iommu_device_unlink(&iommu->iommu, dev);
 
 	spin_unlock_irqrestore(&msm_iommu_lock, flags);
+
+	iommu_group_remove_device(dev);
 }
 
 static int msm_iommu_attach_dev(struct iommu_domain *domain, struct device *dev)
@@ -700,6 +712,7 @@ static struct iommu_ops msm_iommu_ops = {
 	.iova_to_phys = msm_iommu_iova_to_phys,
 	.add_device = msm_iommu_add_device,
 	.remove_device = msm_iommu_remove_device,
+	.device_group = generic_device_group,
 	.pgsize_bitmap = MSM_IOMMU_PGSIZES,
 	.of_xlate = qcom_iommu_of_xlate,
 };
-- 
cgit 


From d92e1f849830fc78c50a00b953361fc1449aa1e2 Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Fri, 21 Jul 2017 13:12:36 +0100
Subject: iommu/tegra-smmu: Add iommu_group support

As the last step to making groups mandatory, clean up the remaining
drivers by adding basic support. Whilst it may not perfectly reflect
the isolation capabilities of the hardware (tegra_smmu_swgroup sounds
suspiciously like something that might warrant representing at the
iommu_group level), using generic_device_group() should at least
maintain existing behaviour with respect to the API.

Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Tested-by: Mikko Perttunen <mperttunen@nvidia.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/tegra-smmu.c | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'drivers/iommu')

diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c
index eeb19f560a05..faa9c1e70482 100644
--- a/drivers/iommu/tegra-smmu.c
+++ b/drivers/iommu/tegra-smmu.c
@@ -704,6 +704,7 @@ static struct tegra_smmu *tegra_smmu_find(struct device_node *np)
 static int tegra_smmu_add_device(struct device *dev)
 {
 	struct device_node *np = dev->of_node;
+	struct iommu_group *group;
 	struct of_phandle_args args;
 	unsigned int index = 0;
 
@@ -725,12 +726,19 @@ static int tegra_smmu_add_device(struct device *dev)
 		index++;
 	}
 
+	group = iommu_group_get_for_dev(dev);
+	if (IS_ERR(group))
+		return PTR_ERR(group);
+
+	iommu_group_put(group);
+
 	return 0;
 }
 
 static void tegra_smmu_remove_device(struct device *dev)
 {
 	dev->archdata.iommu = NULL;
+	iommu_group_remove_device(dev);
 }
 
 static const struct iommu_ops tegra_smmu_ops = {
@@ -741,6 +749,7 @@ static const struct iommu_ops tegra_smmu_ops = {
 	.detach_dev = tegra_smmu_detach_dev,
 	.add_device = tegra_smmu_add_device,
 	.remove_device = tegra_smmu_remove_device,
+	.device_group = generic_device_group,
 	.map = tegra_smmu_map,
 	.unmap = tegra_smmu_unmap,
 	.map_sg = default_iommu_map_sg,
-- 
cgit 


From 15f9a3104b80a83e33ec04609aa61ac7e045fa2c Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Fri, 21 Jul 2017 13:12:37 +0100
Subject: iommu/tegra-gart: Add iommu_group support

As the last step to making groups mandatory, clean up the remaining
drivers by adding basic support. Whilst it may not perfectly reflect the
isolation capabilities of the hardware, using generic_device_group()
should at least maintain existing behaviour with respect to the API.

Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Tested-by: Dmitry Osipenko <digetx@gmail.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/tegra-gart.c | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

(limited to 'drivers/iommu')

diff --git a/drivers/iommu/tegra-gart.c b/drivers/iommu/tegra-gart.c
index 37e708fdbb5a..29bafc6e82ae 100644
--- a/drivers/iommu/tegra-gart.c
+++ b/drivers/iommu/tegra-gart.c
@@ -334,12 +334,31 @@ static bool gart_iommu_capable(enum iommu_cap cap)
 	return false;
 }
 
+static int gart_iommu_add_device(struct device *dev)
+{
+	struct iommu_group *group = iommu_group_get_for_dev(dev);
+
+	if (IS_ERR(group))
+		return PTR_ERR(group);
+
+	iommu_group_put(group);
+	return 0;
+}
+
+static void gart_iommu_remove_device(struct device *dev)
+{
+	iommu_group_remove_device(dev);
+}
+
 static const struct iommu_ops gart_iommu_ops = {
 	.capable	= gart_iommu_capable,
 	.domain_alloc	= gart_iommu_domain_alloc,
 	.domain_free	= gart_iommu_domain_free,
 	.attach_dev	= gart_iommu_attach_dev,
 	.detach_dev	= gart_iommu_detach_dev,
+	.add_device	= gart_iommu_add_device,
+	.remove_device	= gart_iommu_remove_device,
+	.device_group	= generic_device_group,
 	.map		= gart_iommu_map,
 	.map_sg		= default_iommu_map_sg,
 	.unmap		= gart_iommu_unmap,
-- 
cgit 


From 05f80300dc8bcfe8566b36256d01482cae5afa02 Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Fri, 21 Jul 2017 13:12:38 +0100
Subject: iommu: Finish making iommu_group support mandatory

Now that all the drivers properly implementing the IOMMU API support
groups (I'm ignoring the etnaviv GPU MMUs which seemingly only do just
enough to convince the ARM DMA mapping ops), we can remove the FIXME
workarounds from the core code. In the process, it also seems logical to
make the .device_group callback non-optional for drivers calling
iommu_group_get_for_dev() - the current callers all implement it anyway,
and it doesn't make sense for any future callers not to either.

Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/iommu.c | 19 ++++---------------
 1 file changed, 4 insertions(+), 15 deletions(-)

(limited to 'drivers/iommu')

diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 3f6ea160afed..af69bf7e035a 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -1005,11 +1005,10 @@ struct iommu_group *iommu_group_get_for_dev(struct device *dev)
 	if (group)
 		return group;
 
-	group = ERR_PTR(-EINVAL);
-
-	if (ops && ops->device_group)
-		group = ops->device_group(dev);
+	if (!ops)
+		return ERR_PTR(-EINVAL);
 
+	group = ops->device_group(dev);
 	if (WARN_ON_ONCE(group == NULL))
 		return ERR_PTR(-EINVAL);
 
@@ -1298,12 +1297,8 @@ int iommu_attach_device(struct iommu_domain *domain, struct device *dev)
 	int ret;
 
 	group = iommu_group_get(dev);
-	/* FIXME: Remove this when groups a mandatory for iommu drivers */
-	if (group == NULL)
-		return __iommu_attach_device(domain, dev);
-
 	/*
-	 * We have a group - lock it to make sure the device-count doesn't
+	 * Lock the group to make sure the device-count doesn't
 	 * change while we are attaching
 	 */
 	mutex_lock(&group->mutex);
@@ -1336,9 +1331,6 @@ void iommu_detach_device(struct iommu_domain *domain, struct device *dev)
 	struct iommu_group *group;
 
 	group = iommu_group_get(dev);
-	/* FIXME: Remove this when groups a mandatory for iommu drivers */
-	if (group == NULL)
-		return __iommu_detach_device(domain, dev);
 
 	mutex_lock(&group->mutex);
 	if (iommu_group_device_count(group) != 1) {
@@ -1360,9 +1352,6 @@ struct iommu_domain *iommu_get_domain_for_dev(struct device *dev)
 	struct iommu_group *group;
 
 	group = iommu_group_get(dev);
-	/* FIXME: Remove this when groups a mandatory for iommu drivers */
-	if (group == NULL)
-		return NULL;
 
 	domain = group->domain;
 
-- 
cgit 


From da4b02750a9fe1d1c4d047d14e69ec7542dddeb3 Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Fri, 4 Aug 2017 17:29:06 +0100
Subject: iommu/of: Fix of_iommu_configure() for disabled IOMMUs

Sudeep reports that the logic got slightly broken when a PCI iommu-map
entry targets an IOMMU marked as disabled in DT, since of_pci_map_rid()
succeeds in following a phandle, and of_iommu_xlate() doesn't return an
error value, but we miss checking whether ops was actually non-NULL.
Whilst this could be solved with a point fix in of_pci_iommu_init(), it
suggests that all the juggling of ERR_PTR values through the ops pointer
is proving rather too complicated for its own good, so let's instead
simplify the whole flow (with a side-effect of eliminating the cause of
the bug).

The fact that we now rely on iommu_fwspec means that we no longer need
to pass around an iommu_ops pointer at all - we can simply propagate a
regular int return value until we know whether we have a viable IOMMU,
then retrieve the ops from the fwspec if and when we actually need them.
This makes everything a bit more uniform and certainly easier to follow.

Fixes: d87beb749281 ("iommu/of: Handle PCI aliases properly")
Reported-by: Sudeep Holla <sudeep.holla@arm.com>
Tested-by: Sudeep Holla <sudeep.holla@arm.com>
Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/of_iommu.c | 59 ++++++++++++++++++++++++------------------------
 1 file changed, 29 insertions(+), 30 deletions(-)

(limited to 'drivers/iommu')

diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c
index 34160e7a8dd7..e60e3dba85a0 100644
--- a/drivers/iommu/of_iommu.c
+++ b/drivers/iommu/of_iommu.c
@@ -25,6 +25,8 @@
 #include <linux/of_pci.h>
 #include <linux/slab.h>
 
+#define NO_IOMMU	1
+
 static const struct of_device_id __iommu_of_table_sentinel
 	__used __section(__iommu_of_table_end);
 
@@ -109,8 +111,8 @@ static bool of_iommu_driver_present(struct device_node *np)
 	return of_match_node(&__iommu_of_table, np);
 }
 
-static const struct iommu_ops
-*of_iommu_xlate(struct device *dev, struct of_phandle_args *iommu_spec)
+static int of_iommu_xlate(struct device *dev,
+			  struct of_phandle_args *iommu_spec)
 {
 	const struct iommu_ops *ops;
 	struct fwnode_handle *fwnode = &iommu_spec->np->fwnode;
@@ -120,24 +122,20 @@ static const struct iommu_ops
 	if ((ops && !ops->of_xlate) ||
 	    !of_device_is_available(iommu_spec->np) ||
 	    (!ops && !of_iommu_driver_present(iommu_spec->np)))
-		return NULL;
+		return NO_IOMMU;
 
 	err = iommu_fwspec_init(dev, &iommu_spec->np->fwnode, ops);
 	if (err)
-		return ERR_PTR(err);
+		return err;
 	/*
 	 * The otherwise-empty fwspec handily serves to indicate the specific
 	 * IOMMU device we're waiting for, which will be useful if we ever get
 	 * a proper probe-ordering dependency mechanism in future.
 	 */
 	if (!ops)
-		return ERR_PTR(-EPROBE_DEFER);
-
-	err = ops->of_xlate(dev, iommu_spec);
-	if (err)
-		return ERR_PTR(err);
+		return -EPROBE_DEFER;
 
-	return ops;
+	return ops->of_xlate(dev, iommu_spec);
 }
 
 struct of_pci_iommu_alias_info {
@@ -148,7 +146,6 @@ struct of_pci_iommu_alias_info {
 static int of_pci_iommu_init(struct pci_dev *pdev, u16 alias, void *data)
 {
 	struct of_pci_iommu_alias_info *info = data;
-	const struct iommu_ops *ops;
 	struct of_phandle_args iommu_spec = { .args_count = 1 };
 	int err;
 
@@ -156,13 +153,12 @@ static int of_pci_iommu_init(struct pci_dev *pdev, u16 alias, void *data)
 			     "iommu-map-mask", &iommu_spec.np,
 			     iommu_spec.args);
 	if (err)
-		return err == -ENODEV ? 1 : err;
+		return err == -ENODEV ? NO_IOMMU : err;
 
-	ops = of_iommu_xlate(info->dev, &iommu_spec);
+	err = of_iommu_xlate(info->dev, &iommu_spec);
 	of_node_put(iommu_spec.np);
-
-	if (IS_ERR(ops))
-		return PTR_ERR(ops);
+	if (err)
+		return err;
 
 	return info->np == pdev->bus->dev.of_node;
 }
@@ -172,7 +168,7 @@ const struct iommu_ops *of_iommu_configure(struct device *dev,
 {
 	const struct iommu_ops *ops = NULL;
 	struct iommu_fwspec *fwspec = dev->iommu_fwspec;
-	int err;
+	int err = NO_IOMMU;
 
 	if (!master_np)
 		return NULL;
@@ -198,10 +194,6 @@ const struct iommu_ops *of_iommu_configure(struct device *dev,
 
 		err = pci_for_each_dma_alias(to_pci_dev(dev),
 					     of_pci_iommu_init, &info);
-		if (err) /* err > 0 means the walk stopped, but non-fatally */
-			ops = ERR_PTR(min(err, 0));
-		else /* success implies both fwspec and ops are now valid */
-			ops = dev->iommu_fwspec->ops;
 	} else {
 		struct of_phandle_args iommu_spec;
 		int idx = 0;
@@ -209,27 +201,34 @@ const struct iommu_ops *of_iommu_configure(struct device *dev,
 		while (!of_parse_phandle_with_args(master_np, "iommus",
 						   "#iommu-cells",
 						   idx, &iommu_spec)) {
-			ops = of_iommu_xlate(dev, &iommu_spec);
+			err = of_iommu_xlate(dev, &iommu_spec);
 			of_node_put(iommu_spec.np);
 			idx++;
-			if (IS_ERR_OR_NULL(ops))
+			if (err)
 				break;
 		}
 	}
+
+	/*
+	 * Two success conditions can be represented by non-negative err here:
+	 * >0 : there is no IOMMU, or one was unavailable for non-fatal reasons
+	 *  0 : we found an IOMMU, and dev->fwspec is initialised appropriately
+	 * <0 : any actual error
+	 */
+	if (!err)
+		ops = dev->iommu_fwspec->ops;
 	/*
 	 * If we have reason to believe the IOMMU driver missed the initial
 	 * add_device callback for dev, replay it to get things in order.
 	 */
-	if (!IS_ERR_OR_NULL(ops) && ops->add_device &&
-	    dev->bus && !dev->iommu_group) {
+	if (ops && ops->add_device && dev->bus && !dev->iommu_group)
 		err = ops->add_device(dev);
-		if (err)
-			ops = ERR_PTR(err);
-	}
 
 	/* Ignore all other errors apart from EPROBE_DEFER */
-	if (IS_ERR(ops) && (PTR_ERR(ops) != -EPROBE_DEFER)) {
-		dev_dbg(dev, "Adding to IOMMU failed: %ld\n", PTR_ERR(ops));
+	if (err == -EPROBE_DEFER) {
+		ops = ERR_PTR(err);
+	} else if (err < 0) {
+		dev_dbg(dev, "Adding to IOMMU failed: %d\n", err);
 		ops = NULL;
 	}
 
-- 
cgit 


From 42f87e71c3df12d8f29ec1bb7b47772ffaeaf1ee Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Thu, 10 Aug 2017 14:44:28 +0200
Subject: iommu/iova: Add flush-queue data structures

This patch adds the basic data-structures to implement
flush-queues in the generic IOVA code. It also adds the
initialization and destroy routines for these data
structures.

The initialization routine is designed so that the use of
this feature is optional for the users of IOVA code.

Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/iova.c | 39 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 39 insertions(+)

(limited to 'drivers/iommu')

diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c
index 246f14c83944..b9f6ce02a1e1 100644
--- a/drivers/iommu/iova.c
+++ b/drivers/iommu/iova.c
@@ -50,10 +50,48 @@ init_iova_domain(struct iova_domain *iovad, unsigned long granule,
 	iovad->granule = granule;
 	iovad->start_pfn = start_pfn;
 	iovad->dma_32bit_pfn = pfn_32bit + 1;
+	iovad->flush_cb = NULL;
+	iovad->fq = NULL;
 	init_iova_rcaches(iovad);
 }
 EXPORT_SYMBOL_GPL(init_iova_domain);
 
+static void free_iova_flush_queue(struct iova_domain *iovad)
+{
+	if (!iovad->fq)
+		return;
+
+	free_percpu(iovad->fq);
+
+	iovad->fq         = NULL;
+	iovad->flush_cb   = NULL;
+	iovad->entry_dtor = NULL;
+}
+
+int init_iova_flush_queue(struct iova_domain *iovad,
+			  iova_flush_cb flush_cb, iova_entry_dtor entry_dtor)
+{
+	int cpu;
+
+	iovad->fq = alloc_percpu(struct iova_fq);
+	if (!iovad->fq)
+		return -ENOMEM;
+
+	iovad->flush_cb   = flush_cb;
+	iovad->entry_dtor = entry_dtor;
+
+	for_each_possible_cpu(cpu) {
+		struct iova_fq *fq;
+
+		fq = per_cpu_ptr(iovad->fq, cpu);
+		fq->head = 0;
+		fq->tail = 0;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(init_iova_flush_queue);
+
 static struct rb_node *
 __get_cached_rbnode(struct iova_domain *iovad, unsigned long *limit_pfn)
 {
@@ -433,6 +471,7 @@ void put_iova_domain(struct iova_domain *iovad)
 	struct rb_node *node;
 	unsigned long flags;
 
+	free_iova_flush_queue(iovad);
 	free_iova_rcaches(iovad);
 	spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
 	node = rb_first(&iovad->rbroot);
-- 
cgit 


From 1928210107edd4fa786199fef6b875d3af3bef88 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Thu, 10 Aug 2017 15:49:44 +0200
Subject: iommu/iova: Implement Flush-Queue ring buffer

Add a function to add entries to the Flush-Queue ring
buffer. If the buffer is full, call the flush-callback and
free the entries.

Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/iova.c | 80 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 80 insertions(+)

(limited to 'drivers/iommu')

diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c
index b9f6ce02a1e1..e5c9a7ae6088 100644
--- a/drivers/iommu/iova.c
+++ b/drivers/iommu/iova.c
@@ -32,6 +32,7 @@ static unsigned long iova_rcache_get(struct iova_domain *iovad,
 				     unsigned long limit_pfn);
 static void init_iova_rcaches(struct iova_domain *iovad);
 static void free_iova_rcaches(struct iova_domain *iovad);
+static void fq_destroy_all_entries(struct iova_domain *iovad);
 
 void
 init_iova_domain(struct iova_domain *iovad, unsigned long granule,
@@ -61,6 +62,7 @@ static void free_iova_flush_queue(struct iova_domain *iovad)
 	if (!iovad->fq)
 		return;
 
+	fq_destroy_all_entries(iovad);
 	free_percpu(iovad->fq);
 
 	iovad->fq         = NULL;
@@ -461,6 +463,84 @@ free_iova_fast(struct iova_domain *iovad, unsigned long pfn, unsigned long size)
 }
 EXPORT_SYMBOL_GPL(free_iova_fast);
 
+#define fq_ring_for_each(i, fq) \
+	for ((i) = (fq)->head; (i) != (fq)->tail; (i) = ((i) + 1) % IOVA_FQ_SIZE)
+
+static inline bool fq_full(struct iova_fq *fq)
+{
+	return (((fq->tail + 1) % IOVA_FQ_SIZE) == fq->head);
+}
+
+static inline unsigned fq_ring_add(struct iova_fq *fq)
+{
+	unsigned idx = fq->tail;
+
+	fq->tail = (idx + 1) % IOVA_FQ_SIZE;
+
+	return idx;
+}
+
+static void fq_ring_free(struct iova_domain *iovad, struct iova_fq *fq)
+{
+	unsigned idx;
+
+	fq_ring_for_each(idx, fq) {
+
+		if (iovad->entry_dtor)
+			iovad->entry_dtor(fq->entries[idx].data);
+
+		free_iova_fast(iovad,
+			       fq->entries[idx].iova_pfn,
+			       fq->entries[idx].pages);
+	}
+
+	fq->head = 0;
+	fq->tail = 0;
+}
+
+static void fq_destroy_all_entries(struct iova_domain *iovad)
+{
+	int cpu;
+
+	/*
+	 * This code runs when the iova_domain is being detroyed, so don't
+	 * bother to free iovas, just call the entry_dtor on all remaining
+	 * entries.
+	 */
+	if (!iovad->entry_dtor)
+		return;
+
+	for_each_possible_cpu(cpu) {
+		struct iova_fq *fq = per_cpu_ptr(iovad->fq, cpu);
+		int idx;
+
+		fq_ring_for_each(idx, fq)
+			iovad->entry_dtor(fq->entries[idx].data);
+	}
+}
+
+void queue_iova(struct iova_domain *iovad,
+		unsigned long pfn, unsigned long pages,
+		unsigned long data)
+{
+	struct iova_fq *fq = get_cpu_ptr(iovad->fq);
+	unsigned idx;
+
+	if (fq_full(fq)) {
+		iovad->flush_cb(iovad);
+		fq_ring_free(iovad, fq);
+	}
+
+	idx = fq_ring_add(fq);
+
+	fq->entries[idx].iova_pfn = pfn;
+	fq->entries[idx].pages    = pages;
+	fq->entries[idx].data     = data;
+
+	put_cpu_ptr(iovad->fq);
+}
+EXPORT_SYMBOL_GPL(queue_iova);
+
 /**
  * put_iova_domain - destroys the iova doamin
  * @iovad: - iova domain in question.
-- 
cgit 


From fb418dab8a4f01dde0c025d15145c589ec02796b Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Thu, 10 Aug 2017 16:14:59 +0200
Subject: iommu/iova: Add flush counters to Flush-Queue implementation

There are two counters:

	* fq_flush_start_cnt  - Increased when a TLB flush
	                        is started.

	* fq_flush_finish_cnt - Increased when a TLB flush
				is finished.

The fq_flush_start_cnt is assigned to every Flush-Queue
entry on its creation. When freeing entries from the
Flush-Queue, the value in the entry is compared to the
fq_flush_finish_cnt. The entry can only be freed when its
value is less than the value of fq_flush_finish_cnt.

The reason for these counters it to take advantage of IOMMU
TLB flushes that happened on other CPUs. These already
flushed the TLB for Flush-Queue entries on other CPUs so
that they can already be freed without flushing the TLB
again.

This makes it less likely that the Flush-Queue is full and
saves IOMMU TLB flushes.

Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/iova.c | 27 ++++++++++++++++++++++++---
 1 file changed, 24 insertions(+), 3 deletions(-)

(limited to 'drivers/iommu')

diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c
index e5c9a7ae6088..47b144e417ad 100644
--- a/drivers/iommu/iova.c
+++ b/drivers/iommu/iova.c
@@ -75,6 +75,9 @@ int init_iova_flush_queue(struct iova_domain *iovad,
 {
 	int cpu;
 
+	atomic64_set(&iovad->fq_flush_start_cnt,  0);
+	atomic64_set(&iovad->fq_flush_finish_cnt, 0);
+
 	iovad->fq = alloc_percpu(struct iova_fq);
 	if (!iovad->fq)
 		return -ENOMEM;
@@ -482,20 +485,30 @@ static inline unsigned fq_ring_add(struct iova_fq *fq)
 
 static void fq_ring_free(struct iova_domain *iovad, struct iova_fq *fq)
 {
+	u64 counter = atomic64_read(&iovad->fq_flush_finish_cnt);
 	unsigned idx;
 
 	fq_ring_for_each(idx, fq) {
 
+		if (fq->entries[idx].counter >= counter)
+			break;
+
 		if (iovad->entry_dtor)
 			iovad->entry_dtor(fq->entries[idx].data);
 
 		free_iova_fast(iovad,
 			       fq->entries[idx].iova_pfn,
 			       fq->entries[idx].pages);
+
+		fq->head = (fq->head + 1) % IOVA_FQ_SIZE;
 	}
+}
 
-	fq->head = 0;
-	fq->tail = 0;
+static void iova_domain_flush(struct iova_domain *iovad)
+{
+	atomic64_inc(&iovad->fq_flush_start_cnt);
+	iovad->flush_cb(iovad);
+	atomic64_inc(&iovad->fq_flush_finish_cnt);
 }
 
 static void fq_destroy_all_entries(struct iova_domain *iovad)
@@ -526,8 +539,15 @@ void queue_iova(struct iova_domain *iovad,
 	struct iova_fq *fq = get_cpu_ptr(iovad->fq);
 	unsigned idx;
 
+	/*
+	 * First remove all entries from the flush queue that have already been
+	 * flushed out on another CPU. This makes the fq_full() check below less
+	 * likely to be true.
+	 */
+	fq_ring_free(iovad, fq);
+
 	if (fq_full(fq)) {
-		iovad->flush_cb(iovad);
+		iova_domain_flush(iovad);
 		fq_ring_free(iovad, fq);
 	}
 
@@ -536,6 +556,7 @@ void queue_iova(struct iova_domain *iovad,
 	fq->entries[idx].iova_pfn = pfn;
 	fq->entries[idx].pages    = pages;
 	fq->entries[idx].data     = data;
+	fq->entries[idx].counter  = atomic64_read(&iovad->fq_flush_start_cnt);
 
 	put_cpu_ptr(iovad->fq);
 }
-- 
cgit 


From 8109c2a2f8463852dddd6a1c3fcf262047c0c124 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Thu, 10 Aug 2017 16:31:17 +0200
Subject: iommu/iova: Add locking to Flush-Queues

The lock is taken from the same CPU most of the time. But
having it allows to flush the queue also from another CPU if
necessary.

This will be used by a timer to regularily flush any pending
IOVAs from the Flush-Queues.

Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/iova.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'drivers/iommu')

diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c
index 47b144e417ad..749d39533e0b 100644
--- a/drivers/iommu/iova.c
+++ b/drivers/iommu/iova.c
@@ -91,6 +91,8 @@ int init_iova_flush_queue(struct iova_domain *iovad,
 		fq = per_cpu_ptr(iovad->fq, cpu);
 		fq->head = 0;
 		fq->tail = 0;
+
+		spin_lock_init(&fq->lock);
 	}
 
 	return 0;
@@ -471,6 +473,7 @@ EXPORT_SYMBOL_GPL(free_iova_fast);
 
 static inline bool fq_full(struct iova_fq *fq)
 {
+	assert_spin_locked(&fq->lock);
 	return (((fq->tail + 1) % IOVA_FQ_SIZE) == fq->head);
 }
 
@@ -478,6 +481,8 @@ static inline unsigned fq_ring_add(struct iova_fq *fq)
 {
 	unsigned idx = fq->tail;
 
+	assert_spin_locked(&fq->lock);
+
 	fq->tail = (idx + 1) % IOVA_FQ_SIZE;
 
 	return idx;
@@ -488,6 +493,8 @@ static void fq_ring_free(struct iova_domain *iovad, struct iova_fq *fq)
 	u64 counter = atomic64_read(&iovad->fq_flush_finish_cnt);
 	unsigned idx;
 
+	assert_spin_locked(&fq->lock);
+
 	fq_ring_for_each(idx, fq) {
 
 		if (fq->entries[idx].counter >= counter)
@@ -537,8 +544,11 @@ void queue_iova(struct iova_domain *iovad,
 		unsigned long data)
 {
 	struct iova_fq *fq = get_cpu_ptr(iovad->fq);
+	unsigned long flags;
 	unsigned idx;
 
+	spin_lock_irqsave(&fq->lock, flags);
+
 	/*
 	 * First remove all entries from the flush queue that have already been
 	 * flushed out on another CPU. This makes the fq_full() check below less
@@ -558,6 +568,7 @@ void queue_iova(struct iova_domain *iovad,
 	fq->entries[idx].data     = data;
 	fq->entries[idx].counter  = atomic64_read(&iovad->fq_flush_start_cnt);
 
+	spin_unlock_irqrestore(&fq->lock, flags);
 	put_cpu_ptr(iovad->fq);
 }
 EXPORT_SYMBOL_GPL(queue_iova);
-- 
cgit 


From 9a005a800ae817c2c90ef117d7cd77614d866777 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Thu, 10 Aug 2017 16:58:18 +0200
Subject: iommu/iova: Add flush timer

Add a timer to flush entries from the Flush-Queues every
10ms. This makes sure that no stale TLB entries remain for
too long after an IOVA has been unmapped.

Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/iova.c | 32 ++++++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)

(limited to 'drivers/iommu')

diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c
index 749d39533e0b..33edfa794ae9 100644
--- a/drivers/iommu/iova.c
+++ b/drivers/iommu/iova.c
@@ -33,6 +33,7 @@ static unsigned long iova_rcache_get(struct iova_domain *iovad,
 static void init_iova_rcaches(struct iova_domain *iovad);
 static void free_iova_rcaches(struct iova_domain *iovad);
 static void fq_destroy_all_entries(struct iova_domain *iovad);
+static void fq_flush_timeout(unsigned long data);
 
 void
 init_iova_domain(struct iova_domain *iovad, unsigned long granule,
@@ -62,7 +63,11 @@ static void free_iova_flush_queue(struct iova_domain *iovad)
 	if (!iovad->fq)
 		return;
 
+	if (timer_pending(&iovad->fq_timer))
+		del_timer(&iovad->fq_timer);
+
 	fq_destroy_all_entries(iovad);
+
 	free_percpu(iovad->fq);
 
 	iovad->fq         = NULL;
@@ -95,6 +100,9 @@ int init_iova_flush_queue(struct iova_domain *iovad,
 		spin_lock_init(&fq->lock);
 	}
 
+	setup_timer(&iovad->fq_timer, fq_flush_timeout, (unsigned long)iovad);
+	atomic_set(&iovad->fq_timer_on, 0);
+
 	return 0;
 }
 EXPORT_SYMBOL_GPL(init_iova_flush_queue);
@@ -539,6 +547,25 @@ static void fq_destroy_all_entries(struct iova_domain *iovad)
 	}
 }
 
+static void fq_flush_timeout(unsigned long data)
+{
+	struct iova_domain *iovad = (struct iova_domain *)data;
+	int cpu;
+
+	atomic_set(&iovad->fq_timer_on, 0);
+	iova_domain_flush(iovad);
+
+	for_each_possible_cpu(cpu) {
+		unsigned long flags;
+		struct iova_fq *fq;
+
+		fq = per_cpu_ptr(iovad->fq, cpu);
+		spin_lock_irqsave(&fq->lock, flags);
+		fq_ring_free(iovad, fq);
+		spin_unlock_irqrestore(&fq->lock, flags);
+	}
+}
+
 void queue_iova(struct iova_domain *iovad,
 		unsigned long pfn, unsigned long pages,
 		unsigned long data)
@@ -569,6 +596,11 @@ void queue_iova(struct iova_domain *iovad,
 	fq->entries[idx].counter  = atomic64_read(&iovad->fq_flush_start_cnt);
 
 	spin_unlock_irqrestore(&fq->lock, flags);
+
+	if (atomic_cmpxchg(&iovad->fq_timer_on, 0, 1) == 0)
+		mod_timer(&iovad->fq_timer,
+			  jiffies + msecs_to_jiffies(IOVA_FQ_TIMEOUT));
+
 	put_cpu_ptr(iovad->fq);
 }
 EXPORT_SYMBOL_GPL(queue_iova);
-- 
cgit 


From 9003d6186321e22b19125721b6fb2aa390ff8be6 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Thu, 10 Aug 2017 17:19:13 +0200
Subject: iommu/amd: Make use of iova queue flushing

Rip out the implementation in the AMD IOMMU driver and use
the one in the common iova code instead.

Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/amd_iommu.c | 229 ++--------------------------------------------
 1 file changed, 9 insertions(+), 220 deletions(-)

(limited to 'drivers/iommu')

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 688e77576e5a..cabcaa506ed6 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -137,20 +137,7 @@ struct kmem_cache *amd_iommu_irq_cache;
 static void update_domain(struct protection_domain *domain);
 static int protection_domain_init(struct protection_domain *domain);
 static void detach_device(struct device *dev);
-
-#define FLUSH_QUEUE_SIZE 256
-
-struct flush_queue_entry {
-	unsigned long iova_pfn;
-	unsigned long pages;
-	u64 counter; /* Flush counter when this entry was added to the queue */
-};
-
-struct flush_queue {
-	struct flush_queue_entry *entries;
-	unsigned head, tail;
-	spinlock_t lock;
-};
+static void iova_domain_flush_tlb(struct iova_domain *iovad);
 
 /*
  * Data container for a dma_ops specific protection domain
@@ -161,36 +148,6 @@ struct dma_ops_domain {
 
 	/* IOVA RB-Tree */
 	struct iova_domain iovad;
-
-	struct flush_queue __percpu *flush_queue;
-
-	/*
-	 * We need two counter here to be race-free wrt. IOTLB flushing and
-	 * adding entries to the flush queue.
-	 *
-	 * The flush_start_cnt is incremented _before_ the IOTLB flush starts.
-	 * New entries added to the flush ring-buffer get their 'counter' value
-	 * from here. This way we can make sure that entries added to the queue
-	 * (or other per-cpu queues of the same domain) while the TLB is about
-	 * to be flushed are not considered to be flushed already.
-	 */
-	atomic64_t flush_start_cnt;
-
-	/*
-	 * The flush_finish_cnt is incremented when an IOTLB flush is complete.
-	 * This value is always smaller than flush_start_cnt. The queue_add
-	 * function frees all IOVAs that have a counter value smaller than
-	 * flush_finish_cnt. This makes sure that we only free IOVAs that are
-	 * flushed out of the IOTLB of the domain.
-	 */
-	atomic64_t flush_finish_cnt;
-
-	/*
-	 * Timer to make sure we don't keep IOVAs around unflushed
-	 * for too long
-	 */
-	struct timer_list flush_timer;
-	atomic_t flush_timer_on;
 };
 
 static struct iova_domain reserved_iova_ranges;
@@ -1788,178 +1745,19 @@ static void free_gcr3_table(struct protection_domain *domain)
 	free_page((unsigned long)domain->gcr3_tbl);
 }
 
-static void dma_ops_domain_free_flush_queue(struct dma_ops_domain *dom)
-{
-	int cpu;
-
-	for_each_possible_cpu(cpu) {
-		struct flush_queue *queue;
-
-		queue = per_cpu_ptr(dom->flush_queue, cpu);
-		kfree(queue->entries);
-	}
-
-	free_percpu(dom->flush_queue);
-
-	dom->flush_queue = NULL;
-}
-
-static int dma_ops_domain_alloc_flush_queue(struct dma_ops_domain *dom)
-{
-	int cpu;
-
-	atomic64_set(&dom->flush_start_cnt,  0);
-	atomic64_set(&dom->flush_finish_cnt, 0);
-
-	dom->flush_queue = alloc_percpu(struct flush_queue);
-	if (!dom->flush_queue)
-		return -ENOMEM;
-
-	/* First make sure everything is cleared */
-	for_each_possible_cpu(cpu) {
-		struct flush_queue *queue;
-
-		queue = per_cpu_ptr(dom->flush_queue, cpu);
-		queue->head    = 0;
-		queue->tail    = 0;
-		queue->entries = NULL;
-	}
-
-	/* Now start doing the allocation */
-	for_each_possible_cpu(cpu) {
-		struct flush_queue *queue;
-
-		queue = per_cpu_ptr(dom->flush_queue, cpu);
-		queue->entries = kzalloc(FLUSH_QUEUE_SIZE * sizeof(*queue->entries),
-					 GFP_KERNEL);
-		if (!queue->entries) {
-			dma_ops_domain_free_flush_queue(dom);
-			return -ENOMEM;
-		}
-
-		spin_lock_init(&queue->lock);
-	}
-
-	return 0;
-}
-
 static void dma_ops_domain_flush_tlb(struct dma_ops_domain *dom)
 {
-	atomic64_inc(&dom->flush_start_cnt);
 	domain_flush_tlb(&dom->domain);
 	domain_flush_complete(&dom->domain);
-	atomic64_inc(&dom->flush_finish_cnt);
 }
 
-static inline bool queue_ring_full(struct flush_queue *queue)
+static void iova_domain_flush_tlb(struct iova_domain *iovad)
 {
-	assert_spin_locked(&queue->lock);
-
-	return (((queue->tail + 1) % FLUSH_QUEUE_SIZE) == queue->head);
-}
+	struct dma_ops_domain *dom;
 
-#define queue_ring_for_each(i, q) \
-	for (i = (q)->head; i != (q)->tail; i = (i + 1) % FLUSH_QUEUE_SIZE)
-
-static inline unsigned queue_ring_add(struct flush_queue *queue)
-{
-	unsigned idx = queue->tail;
-
-	assert_spin_locked(&queue->lock);
-	queue->tail = (idx + 1) % FLUSH_QUEUE_SIZE;
-
-	return idx;
-}
-
-static inline void queue_ring_remove_head(struct flush_queue *queue)
-{
-	assert_spin_locked(&queue->lock);
-	queue->head = (queue->head + 1) % FLUSH_QUEUE_SIZE;
-}
-
-static void queue_ring_free_flushed(struct dma_ops_domain *dom,
-				    struct flush_queue *queue)
-{
-	u64 counter = atomic64_read(&dom->flush_finish_cnt);
-	int idx;
-
-	queue_ring_for_each(idx, queue) {
-		/*
-		 * This assumes that counter values in the ring-buffer are
-		 * monotonously rising.
-		 */
-		if (queue->entries[idx].counter >= counter)
-			break;
-
-		free_iova_fast(&dom->iovad,
-			       queue->entries[idx].iova_pfn,
-			       queue->entries[idx].pages);
-
-		queue_ring_remove_head(queue);
-	}
-}
-
-static void queue_add(struct dma_ops_domain *dom,
-		      unsigned long address, unsigned long pages)
-{
-	struct flush_queue *queue;
-	unsigned long flags;
-	int idx;
-
-	pages     = __roundup_pow_of_two(pages);
-	address >>= PAGE_SHIFT;
-
-	queue = get_cpu_ptr(dom->flush_queue);
-	spin_lock_irqsave(&queue->lock, flags);
-
-	/*
-	 * First remove the enries from the ring-buffer that are already
-	 * flushed to make the below queue_ring_full() check less likely
-	 */
-	queue_ring_free_flushed(dom, queue);
-
-	/*
-	 * When ring-queue is full, flush the entries from the IOTLB so
-	 * that we can free all entries with queue_ring_free_flushed()
-	 * below.
-	 */
-	if (queue_ring_full(queue)) {
-		dma_ops_domain_flush_tlb(dom);
-		queue_ring_free_flushed(dom, queue);
-	}
-
-	idx = queue_ring_add(queue);
-
-	queue->entries[idx].iova_pfn = address;
-	queue->entries[idx].pages    = pages;
-	queue->entries[idx].counter  = atomic64_read(&dom->flush_start_cnt);
-
-	spin_unlock_irqrestore(&queue->lock, flags);
-
-	if (atomic_cmpxchg(&dom->flush_timer_on, 0, 1) == 0)
-		mod_timer(&dom->flush_timer, jiffies + msecs_to_jiffies(10));
-
-	put_cpu_ptr(dom->flush_queue);
-}
-
-static void queue_flush_timeout(unsigned long data)
-{
-	struct dma_ops_domain *dom = (struct dma_ops_domain *)data;
-	int cpu;
-
-	atomic_set(&dom->flush_timer_on, 0);
+	dom = container_of(iovad, struct dma_ops_domain, iovad);
 
 	dma_ops_domain_flush_tlb(dom);
-
-	for_each_possible_cpu(cpu) {
-		struct flush_queue *queue;
-		unsigned long flags;
-
-		queue = per_cpu_ptr(dom->flush_queue, cpu);
-		spin_lock_irqsave(&queue->lock, flags);
-		queue_ring_free_flushed(dom, queue);
-		spin_unlock_irqrestore(&queue->lock, flags);
-	}
 }
 
 /*
@@ -1973,11 +1771,6 @@ static void dma_ops_domain_free(struct dma_ops_domain *dom)
 
 	del_domain_from_list(&dom->domain);
 
-	if (timer_pending(&dom->flush_timer))
-		del_timer(&dom->flush_timer);
-
-	dma_ops_domain_free_flush_queue(dom);
-
 	put_iova_domain(&dom->iovad);
 
 	free_pagetable(&dom->domain);
@@ -2013,16 +1806,11 @@ static struct dma_ops_domain *dma_ops_domain_alloc(void)
 	init_iova_domain(&dma_dom->iovad, PAGE_SIZE,
 			 IOVA_START_PFN, DMA_32BIT_PFN);
 
-	/* Initialize reserved ranges */
-	copy_reserved_iova(&reserved_iova_ranges, &dma_dom->iovad);
-
-	if (dma_ops_domain_alloc_flush_queue(dma_dom))
+	if (init_iova_flush_queue(&dma_dom->iovad, iova_domain_flush_tlb, NULL))
 		goto free_dma_dom;
 
-	setup_timer(&dma_dom->flush_timer, queue_flush_timeout,
-		    (unsigned long)dma_dom);
-
-	atomic_set(&dma_dom->flush_timer_on, 0);
+	/* Initialize reserved ranges */
+	copy_reserved_iova(&reserved_iova_ranges, &dma_dom->iovad);
 
 	add_domain_to_list(&dma_dom->domain);
 
@@ -2619,7 +2407,8 @@ static void __unmap_single(struct dma_ops_domain *dma_dom,
 		domain_flush_tlb(&dma_dom->domain);
 		domain_flush_complete(&dma_dom->domain);
 	} else {
-		queue_add(dma_dom, dma_addr, pages);
+		pages = __roundup_pow_of_two(pages);
+		queue_iova(&dma_dom->iovad, dma_addr >> PAGE_SHIFT, pages, 0);
 	}
 }
 
-- 
cgit 


From c8acb28b331364b32a5c81dbfbdfc8475b2f1f27 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Fri, 11 Aug 2017 11:42:46 +0200
Subject: iommu/vt-d: Allow to flush more than 4GB of device TLBs

The shift qi_flush_dev_iotlb() is done on an int, which
limits the mask to 32 bits. Make the mask 64 bits wide so
that more than 4GB of address range can be flushed at once.

Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/dmar.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers/iommu')

diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c
index c8b0329c85d2..ca5ebaeafd6a 100644
--- a/drivers/iommu/dmar.c
+++ b/drivers/iommu/dmar.c
@@ -1343,7 +1343,7 @@ void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 qdep,
 
 	if (mask) {
 		BUG_ON(addr & ((1 << (VTD_PAGE_SHIFT + mask)) - 1));
-		addr |= (1 << (VTD_PAGE_SHIFT + mask - 1)) - 1;
+		addr |= (1ULL << (VTD_PAGE_SHIFT + mask - 1)) - 1;
 		desc.high = QI_DEV_IOTLB_ADDR(addr) | QI_DEV_IOTLB_SIZE;
 	} else
 		desc.high = QI_DEV_IOTLB_ADDR(addr);
-- 
cgit 


From 13cf01744608e1dc3f13dd316c95cb7a1fdaf740 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Fri, 11 Aug 2017 11:40:10 +0200
Subject: iommu/vt-d: Make use of iova deferred flushing

Remove the deferred flushing implementation in the Intel
VT-d driver and use the one from the common iova code
instead.

Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/intel-iommu.c | 197 +++++++++-----------------------------------
 1 file changed, 38 insertions(+), 159 deletions(-)

(limited to 'drivers/iommu')

diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 687f18f65cea..d5e8b8628a1a 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -458,31 +458,6 @@ static LIST_HEAD(dmar_rmrr_units);
 #define for_each_rmrr_units(rmrr) \
 	list_for_each_entry(rmrr, &dmar_rmrr_units, list)
 
-static void flush_unmaps_timeout(unsigned long data);
-
-struct deferred_flush_entry {
-	unsigned long iova_pfn;
-	unsigned long nrpages;
-	struct dmar_domain *domain;
-	struct page *freelist;
-};
-
-#define HIGH_WATER_MARK 250
-struct deferred_flush_table {
-	int next;
-	struct deferred_flush_entry entries[HIGH_WATER_MARK];
-};
-
-struct deferred_flush_data {
-	spinlock_t lock;
-	int timer_on;
-	struct timer_list timer;
-	long size;
-	struct deferred_flush_table *tables;
-};
-
-static DEFINE_PER_CPU(struct deferred_flush_data, deferred_flush);
-
 /* bitmap for indexing intel_iommus */
 static int g_num_of_iommus;
 
@@ -1309,6 +1284,13 @@ static void dma_free_pagelist(struct page *freelist)
 	}
 }
 
+static void iova_entry_free(unsigned long data)
+{
+	struct page *freelist = (struct page *)data;
+
+	dma_free_pagelist(freelist);
+}
+
 /* iommu handling */
 static int iommu_alloc_root_entry(struct intel_iommu *iommu)
 {
@@ -1622,6 +1604,25 @@ static void iommu_flush_iotlb_psi(struct intel_iommu *iommu,
 				      addr, mask);
 }
 
+static void iommu_flush_iova(struct iova_domain *iovad)
+{
+	struct dmar_domain *domain;
+	int idx;
+
+	domain = container_of(iovad, struct dmar_domain, iovad);
+
+	for_each_domain_iommu(idx, domain) {
+		struct intel_iommu *iommu = g_iommus[idx];
+		u16 did = domain->iommu_did[iommu->seq_id];
+
+		iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
+
+		if (!cap_caching_mode(iommu->cap))
+			iommu_flush_dev_iotlb(get_iommu_domain(iommu, did),
+					      0, MAX_AGAW_PFN_WIDTH);
+	}
+}
+
 static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
 {
 	u32 pmen;
@@ -1932,9 +1933,16 @@ static int domain_init(struct dmar_domain *domain, struct intel_iommu *iommu,
 {
 	int adjust_width, agaw;
 	unsigned long sagaw;
+	int err;
 
 	init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN,
 			DMA_32BIT_PFN);
+
+	err = init_iova_flush_queue(&domain->iovad,
+				    iommu_flush_iova, iova_entry_free);
+	if (err)
+		return err;
+
 	domain_reserve_special_ranges(domain);
 
 	/* calculate AGAW */
@@ -1986,14 +1994,6 @@ static void domain_exit(struct dmar_domain *domain)
 	if (!domain)
 		return;
 
-	/* Flush any lazy unmaps that may reference this domain */
-	if (!intel_iommu_strict) {
-		int cpu;
-
-		for_each_possible_cpu(cpu)
-			flush_unmaps_timeout(cpu);
-	}
-
 	/* Remove associated devices and clear attached or cached domains */
 	rcu_read_lock();
 	domain_remove_dev_info(domain);
@@ -3206,7 +3206,7 @@ static int __init init_dmars(void)
 	bool copied_tables = false;
 	struct device *dev;
 	struct intel_iommu *iommu;
-	int i, ret, cpu;
+	int i, ret;
 
 	/*
 	 * for each drhd
@@ -3239,22 +3239,6 @@ static int __init init_dmars(void)
 		goto error;
 	}
 
-	for_each_possible_cpu(cpu) {
-		struct deferred_flush_data *dfd = per_cpu_ptr(&deferred_flush,
-							      cpu);
-
-		dfd->tables = kzalloc(g_num_of_iommus *
-				      sizeof(struct deferred_flush_table),
-				      GFP_KERNEL);
-		if (!dfd->tables) {
-			ret = -ENOMEM;
-			goto free_g_iommus;
-		}
-
-		spin_lock_init(&dfd->lock);
-		setup_timer(&dfd->timer, flush_unmaps_timeout, cpu);
-	}
-
 	for_each_active_iommu(iommu, drhd) {
 		g_iommus[iommu->seq_id] = iommu;
 
@@ -3437,10 +3421,9 @@ free_iommu:
 		disable_dmar_iommu(iommu);
 		free_dmar_iommu(iommu);
 	}
-free_g_iommus:
-	for_each_possible_cpu(cpu)
-		kfree(per_cpu_ptr(&deferred_flush, cpu)->tables);
+
 	kfree(g_iommus);
+
 error:
 	return ret;
 }
@@ -3645,110 +3628,6 @@ static dma_addr_t intel_map_page(struct device *dev, struct page *page,
 				  dir, *dev->dma_mask);
 }
 
-static void flush_unmaps(struct deferred_flush_data *flush_data)
-{
-	int i, j;
-
-	flush_data->timer_on = 0;
-
-	/* just flush them all */
-	for (i = 0; i < g_num_of_iommus; i++) {
-		struct intel_iommu *iommu = g_iommus[i];
-		struct deferred_flush_table *flush_table =
-				&flush_data->tables[i];
-		if (!iommu)
-			continue;
-
-		if (!flush_table->next)
-			continue;
-
-		/* In caching mode, global flushes turn emulation expensive */
-		if (!cap_caching_mode(iommu->cap))
-			iommu->flush.flush_iotlb(iommu, 0, 0, 0,
-					 DMA_TLB_GLOBAL_FLUSH);
-		for (j = 0; j < flush_table->next; j++) {
-			unsigned long mask;
-			struct deferred_flush_entry *entry =
-						&flush_table->entries[j];
-			unsigned long iova_pfn = entry->iova_pfn;
-			unsigned long nrpages = entry->nrpages;
-			struct dmar_domain *domain = entry->domain;
-			struct page *freelist = entry->freelist;
-
-			/* On real hardware multiple invalidations are expensive */
-			if (cap_caching_mode(iommu->cap))
-				iommu_flush_iotlb_psi(iommu, domain,
-					mm_to_dma_pfn(iova_pfn),
-					nrpages, !freelist, 0);
-			else {
-				mask = ilog2(nrpages);
-				iommu_flush_dev_iotlb(domain,
-						(uint64_t)iova_pfn << PAGE_SHIFT, mask);
-			}
-			free_iova_fast(&domain->iovad, iova_pfn, nrpages);
-			if (freelist)
-				dma_free_pagelist(freelist);
-		}
-		flush_table->next = 0;
-	}
-
-	flush_data->size = 0;
-}
-
-static void flush_unmaps_timeout(unsigned long cpuid)
-{
-	struct deferred_flush_data *flush_data = per_cpu_ptr(&deferred_flush, cpuid);
-	unsigned long flags;
-
-	spin_lock_irqsave(&flush_data->lock, flags);
-	flush_unmaps(flush_data);
-	spin_unlock_irqrestore(&flush_data->lock, flags);
-}
-
-static void add_unmap(struct dmar_domain *dom, unsigned long iova_pfn,
-		      unsigned long nrpages, struct page *freelist)
-{
-	unsigned long flags;
-	int entry_id, iommu_id;
-	struct intel_iommu *iommu;
-	struct deferred_flush_entry *entry;
-	struct deferred_flush_data *flush_data;
-
-	flush_data = raw_cpu_ptr(&deferred_flush);
-
-	/* Flush all CPUs' entries to avoid deferring too much.  If
-	 * this becomes a bottleneck, can just flush us, and rely on
-	 * flush timer for the rest.
-	 */
-	if (flush_data->size == HIGH_WATER_MARK) {
-		int cpu;
-
-		for_each_online_cpu(cpu)
-			flush_unmaps_timeout(cpu);
-	}
-
-	spin_lock_irqsave(&flush_data->lock, flags);
-
-	iommu = domain_get_iommu(dom);
-	iommu_id = iommu->seq_id;
-
-	entry_id = flush_data->tables[iommu_id].next;
-	++(flush_data->tables[iommu_id].next);
-
-	entry = &flush_data->tables[iommu_id].entries[entry_id];
-	entry->domain = dom;
-	entry->iova_pfn = iova_pfn;
-	entry->nrpages = nrpages;
-	entry->freelist = freelist;
-
-	if (!flush_data->timer_on) {
-		mod_timer(&flush_data->timer, jiffies + msecs_to_jiffies(10));
-		flush_data->timer_on = 1;
-	}
-	flush_data->size++;
-	spin_unlock_irqrestore(&flush_data->lock, flags);
-}
-
 static void intel_unmap(struct device *dev, dma_addr_t dev_addr, size_t size)
 {
 	struct dmar_domain *domain;
@@ -3784,7 +3663,8 @@ static void intel_unmap(struct device *dev, dma_addr_t dev_addr, size_t size)
 		free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(nrpages));
 		dma_free_pagelist(freelist);
 	} else {
-		add_unmap(domain, iova_pfn, nrpages, freelist);
+		queue_iova(&domain->iovad, iova_pfn, nrpages,
+			   (unsigned long)freelist);
 		/*
 		 * queue up the release of the unmap to save the 1/6th of the
 		 * cpu used up by the iotlb flush operation...
@@ -4721,7 +4601,6 @@ static void free_all_cpu_cached_iovas(unsigned int cpu)
 static int intel_iommu_cpu_dead(unsigned int cpu)
 {
 	free_all_cpu_cached_iovas(cpu);
-	flush_unmaps_timeout(cpu);
 	return 0;
 }
 
-- 
cgit