From 2a03c1314506557277829562dd2ec5c11a6ea914 Mon Sep 17 00:00:00 2001
From: Chuhong Yuan <hslester96@gmail.com>
Date: Sun, 24 Nov 2019 13:28:55 +0800
Subject: dmaengine: ti: edma: add missed operations

The driver forgets to call pm_runtime_disable and pm_runtime_put_sync in
probe failure and remove.
Add the calls and modify probe failure handling to fix it.

To simplify the fix, the patch adjusts the calling order and merges checks
for devm_kcalloc.

Fixes: 2b6b3b742019 ("ARM/dmaengine: edma: Merge the two drivers under drivers/dma/")
Signed-off-by: Chuhong Yuan <hslester96@gmail.com>
Acked-by: Peter Ujfalusi <peter.ujfalusi@ti.com>
Link: https://lore.kernel.org/r/20191124052855.6472-1-hslester96@gmail.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/ti/edma.c | 37 ++++++++++++++++++++-----------------
 1 file changed, 20 insertions(+), 17 deletions(-)

(limited to 'drivers/dma')

diff --git a/drivers/dma/ti/edma.c b/drivers/dma/ti/edma.c
index 756a3c951dc7..0628ee4bf1b4 100644
--- a/drivers/dma/ti/edma.c
+++ b/drivers/dma/ti/edma.c
@@ -2289,13 +2289,6 @@ static int edma_probe(struct platform_device *pdev)
 	if (!info)
 		return -ENODEV;
 
-	pm_runtime_enable(dev);
-	ret = pm_runtime_get_sync(dev);
-	if (ret < 0) {
-		dev_err(dev, "pm_runtime_get_sync() failed\n");
-		return ret;
-	}
-
 	ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32));
 	if (ret)
 		return ret;
@@ -2326,27 +2319,31 @@ static int edma_probe(struct platform_device *pdev)
 
 	platform_set_drvdata(pdev, ecc);
 
+	pm_runtime_enable(dev);
+	ret = pm_runtime_get_sync(dev);
+	if (ret < 0) {
+		dev_err(dev, "pm_runtime_get_sync() failed\n");
+		pm_runtime_disable(dev);
+		return ret;
+	}
+
 	/* Get eDMA3 configuration from IP */
 	ret = edma_setup_from_hw(dev, info, ecc);
 	if (ret)
-		return ret;
+		goto err_disable_pm;
 
 	/* Allocate memory based on the information we got from the IP */
 	ecc->slave_chans = devm_kcalloc(dev, ecc->num_channels,
 					sizeof(*ecc->slave_chans), GFP_KERNEL);
-	if (!ecc->slave_chans)
-		return -ENOMEM;
 
 	ecc->slot_inuse = devm_kcalloc(dev, BITS_TO_LONGS(ecc->num_slots),
 				       sizeof(unsigned long), GFP_KERNEL);
-	if (!ecc->slot_inuse)
-		return -ENOMEM;
 
 	ecc->channels_mask = devm_kcalloc(dev,
 					   BITS_TO_LONGS(ecc->num_channels),
 					   sizeof(unsigned long), GFP_KERNEL);
-	if (!ecc->channels_mask)
-		return -ENOMEM;
+	if (!ecc->slave_chans || !ecc->slot_inuse || !ecc->channels_mask)
+		goto err_disable_pm;
 
 	/* Mark all channels available initially */
 	bitmap_fill(ecc->channels_mask, ecc->num_channels);
@@ -2388,7 +2385,7 @@ static int edma_probe(struct platform_device *pdev)
 				       ecc);
 		if (ret) {
 			dev_err(dev, "CCINT (%d) failed --> %d\n", irq, ret);
-			return ret;
+			goto err_disable_pm;
 		}
 		ecc->ccint = irq;
 	}
@@ -2404,7 +2401,7 @@ static int edma_probe(struct platform_device *pdev)
 				       ecc);
 		if (ret) {
 			dev_err(dev, "CCERRINT (%d) failed --> %d\n", irq, ret);
-			return ret;
+			goto err_disable_pm;
 		}
 		ecc->ccerrint = irq;
 	}
@@ -2412,7 +2409,8 @@ static int edma_probe(struct platform_device *pdev)
 	ecc->dummy_slot = edma_alloc_slot(ecc, EDMA_SLOT_ANY);
 	if (ecc->dummy_slot < 0) {
 		dev_err(dev, "Can't allocate PaRAM dummy slot\n");
-		return ecc->dummy_slot;
+		ret = ecc->dummy_slot;
+		goto err_disable_pm;
 	}
 
 	queue_priority_mapping = info->queue_priority_mapping;
@@ -2512,6 +2510,9 @@ static int edma_probe(struct platform_device *pdev)
 
 err_reg1:
 	edma_free_slot(ecc, ecc->dummy_slot);
+err_disable_pm:
+	pm_runtime_put_sync(dev);
+	pm_runtime_disable(dev);
 	return ret;
 }
 
@@ -2542,6 +2543,8 @@ static int edma_remove(struct platform_device *pdev)
 	if (ecc->dma_memcpy)
 		dma_async_device_unregister(ecc->dma_memcpy);
 	edma_free_slot(ecc, ecc->dummy_slot);
+	pm_runtime_put_sync(dev);
+	pm_runtime_disable(dev);
 
 	return 0;
 }
-- 
cgit 


From a39cddc9e3775100100a4272feed64faac831be9 Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Thu, 5 Dec 2019 15:37:45 +0100
Subject: dmaengine: pl330: Drop boilerplate code for suspend/resume

Let's drop the boilerplate code in the system suspend/resume callbacks and
convert to use pm_runtime_force_suspend|resume(). This change also has a
nice side effect, as pm_runtime_force_resume() may decide to leave the
device in low power state, when that is feasible, thus avoiding to waste
both time and energy during system resume.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Tested-by: Marek Szyprowski <m.szyprowski@samsung.com>
Link: https://lore.kernel.org/r/20191205143746.24873-2-ulf.hansson@linaro.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/pl330.c | 12 ++----------
 1 file changed, 2 insertions(+), 10 deletions(-)

(limited to 'drivers/dma')

diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c
index 6cce9ef61b29..8e01da157518 100644
--- a/drivers/dma/pl330.c
+++ b/drivers/dma/pl330.c
@@ -2961,12 +2961,7 @@ static int __maybe_unused pl330_suspend(struct device *dev)
 {
 	struct amba_device *pcdev = to_amba_device(dev);
 
-	pm_runtime_disable(dev);
-
-	if (!pm_runtime_status_suspended(dev)) {
-		/* amba did not disable the clock */
-		amba_pclk_disable(pcdev);
-	}
+	pm_runtime_force_suspend(dev);
 	amba_pclk_unprepare(pcdev);
 
 	return 0;
@@ -2981,10 +2976,7 @@ static int __maybe_unused pl330_resume(struct device *dev)
 	if (ret)
 		return ret;
 
-	if (!pm_runtime_status_suspended(dev))
-		ret = amba_pclk_enable(pcdev);
-
-	pm_runtime_enable(dev);
+	pm_runtime_force_resume(dev);
 
 	return ret;
 }
-- 
cgit 


From f68190c8cb753b0d150daa9d5e2b72ca0dfecf5c Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Thu, 5 Dec 2019 15:37:46 +0100
Subject: dmaengine: pl330: Convert to the *_late and *_early system sleep
 callbacks

It has turned out that it's in general a good idea for dmaengines to allow
DMA requests during the entire dpm_suspend() phase. Therefore, convert the
pl330 driver into using SET_LATE_SYSTEM_SLEEP_PM_OPS.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Tested-by: Marek Szyprowski <m.szyprowski@samsung.com>
Link: https://lore.kernel.org/r/20191205143746.24873-3-ulf.hansson@linaro.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/pl330.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'drivers/dma')

diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c
index 8e01da157518..88b884cbb7c1 100644
--- a/drivers/dma/pl330.c
+++ b/drivers/dma/pl330.c
@@ -2981,7 +2981,9 @@ static int __maybe_unused pl330_resume(struct device *dev)
 	return ret;
 }
 
-static SIMPLE_DEV_PM_OPS(pl330_pm, pl330_suspend, pl330_resume);
+static const struct dev_pm_ops pl330_pm = {
+	SET_LATE_SYSTEM_SLEEP_PM_OPS(pl330_suspend, pl330_resume)
+};
 
 static int
 pl330_probe(struct amba_device *adev, const struct amba_id *id)
-- 
cgit 


From a5b982af953bcc838cd198b0434834cc1dff14ec Mon Sep 17 00:00:00 2001
From: Chuhong Yuan <hslester96@gmail.com>
Date: Mon, 9 Dec 2019 16:57:11 +0800
Subject: dmaengine: axi-dmac: add a check for devm_regmap_init_mmio

The driver misses checking the result of devm_regmap_init_mmio().
Add a check to fix it.

Fixes: fc15be39a827 ("dmaengine: axi-dmac: add regmap support")
Signed-off-by: Chuhong Yuan <hslester96@gmail.com>
Reviewed-by: Alexandru Ardelean <alexandru.ardelean@analog.com>
Link: https://lore.kernel.org/r/20191209085711.16001-1-hslester96@gmail.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/dma-axi-dmac.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

(limited to 'drivers/dma')

diff --git a/drivers/dma/dma-axi-dmac.c b/drivers/dma/dma-axi-dmac.c
index a0ee404b736e..f1d149e32839 100644
--- a/drivers/dma/dma-axi-dmac.c
+++ b/drivers/dma/dma-axi-dmac.c
@@ -830,6 +830,7 @@ static int axi_dmac_probe(struct platform_device *pdev)
 	struct dma_device *dma_dev;
 	struct axi_dmac *dmac;
 	struct resource *res;
+	struct regmap *regmap;
 	int ret;
 
 	dmac = devm_kzalloc(&pdev->dev, sizeof(*dmac), GFP_KERNEL);
@@ -921,10 +922,17 @@ static int axi_dmac_probe(struct platform_device *pdev)
 
 	platform_set_drvdata(pdev, dmac);
 
-	devm_regmap_init_mmio(&pdev->dev, dmac->base, &axi_dmac_regmap_config);
+	regmap = devm_regmap_init_mmio(&pdev->dev, dmac->base,
+		 &axi_dmac_regmap_config);
+	if (IS_ERR(regmap)) {
+		ret = PTR_ERR(regmap);
+		goto err_free_irq;
+	}
 
 	return 0;
 
+err_free_irq:
+	free_irq(dmac->irq, dmac);
 err_unregister_of:
 	of_dma_controller_free(pdev->dev.of_node);
 err_unregister_device:
-- 
cgit 


From d1fd03a35efc6285e43f4ef35ef04dbf2c9389c6 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <weiyongjun1@huawei.com>
Date: Thu, 12 Dec 2019 11:46:22 +0000
Subject: dmaengine: ti: edma: Fix error return code in edma_probe()

Fix to return negative error code -ENOMEM from the error handling
case instead of 0, as done elsewhere in this function.

Fixes: 2a03c1314506 ("dmaengine: ti: edma: add missed operations")
Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com>
Acked-by: Peter Ujfalusi <peter.ujfalusi@ti.com>
Link: https://lore.kernel.org/r/20191212114622.127322-1-weiyongjun1@huawei.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/ti/edma.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'drivers/dma')

diff --git a/drivers/dma/ti/edma.c b/drivers/dma/ti/edma.c
index 0628ee4bf1b4..03a7f647f7b2 100644
--- a/drivers/dma/ti/edma.c
+++ b/drivers/dma/ti/edma.c
@@ -2342,8 +2342,10 @@ static int edma_probe(struct platform_device *pdev)
 	ecc->channels_mask = devm_kcalloc(dev,
 					   BITS_TO_LONGS(ecc->num_channels),
 					   sizeof(unsigned long), GFP_KERNEL);
-	if (!ecc->slave_chans || !ecc->slot_inuse || !ecc->channels_mask)
+	if (!ecc->slave_chans || !ecc->slot_inuse || !ecc->channels_mask) {
+		ret = -ENOMEM;
 		goto err_disable_pm;
+	}
 
 	/* Mark all channels available initially */
 	bitmap_fill(ecc->channels_mask, ecc->num_channels);
-- 
cgit 


From ed5a0ab41555032a944c18bd5f8eb275c740f856 Mon Sep 17 00:00:00 2001
From: Peng Ma <peng.ma@nxp.com>
Date: Thu, 12 Dec 2019 03:38:10 +0000
Subject: dmaengine: fsl-edma: Add eDMA support for QorIQ LS1028A platform

Our platforms(such as LS1021A, LS1012A, LS1043A, LS1046A, LS1028A) with
below registers(CHCFG0 - CHCFG15) of eDMA as follows:
*-----------------------------------------------------------*
|     Offset   |	OTHERS			|		LS1028A			|
|--------------|--------------------|-----------------------|
|     0x0      |        CHCFG0      |           CHCFG3      |
|--------------|--------------------|-----------------------|
|     0x1      |        CHCFG1      |           CHCFG2      |
|--------------|--------------------|-----------------------|
|     0x2      |        CHCFG2      |           CHCFG1      |
|--------------|--------------------|-----------------------|
|     0x3      |        CHCFG3      |           CHCFG0      |
|--------------|--------------------|-----------------------|
|     ...      |        ......      |           ......      |
|--------------|--------------------|-----------------------|
|     0xC      |        CHCFG12     |           CHCFG15     |
|--------------|--------------------|-----------------------|
|     0xD      |        CHCFG13     |           CHCFG14     |
|--------------|--------------------|-----------------------|
|     0xE      |        CHCFG14     |           CHCFG13     |
|--------------|--------------------|-----------------------|
|     0xF      |        CHCFG15     |           CHCFG12     |
*-----------------------------------------------------------*

This patch is to improve edma driver to fit LS1028A platform.

Signed-off-by: Peng Ma <peng.ma@nxp.com>
Reviewed-by: Robin Gong <yibin.gong@nxp.com>
Link: https://lore.kernel.org/r/20191212033714.4090-1-peng.ma@nxp.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/fsl-edma-common.c | 5 +++++
 drivers/dma/fsl-edma-common.h | 1 +
 drivers/dma/fsl-edma.c        | 8 ++++++++
 3 files changed, 14 insertions(+)

(limited to 'drivers/dma')

diff --git a/drivers/dma/fsl-edma-common.c b/drivers/dma/fsl-edma-common.c
index b1a7ca91701a..5697c3622699 100644
--- a/drivers/dma/fsl-edma-common.c
+++ b/drivers/dma/fsl-edma-common.c
@@ -109,10 +109,15 @@ void fsl_edma_chan_mux(struct fsl_edma_chan *fsl_chan,
 	u32 ch = fsl_chan->vchan.chan.chan_id;
 	void __iomem *muxaddr;
 	unsigned int chans_per_mux, ch_off;
+	int endian_diff[4] = {3, 1, -1, -3};
 	u32 dmamux_nr = fsl_chan->edma->drvdata->dmamuxs;
 
 	chans_per_mux = fsl_chan->edma->n_chans / dmamux_nr;
 	ch_off = fsl_chan->vchan.chan.chan_id % chans_per_mux;
+
+	if (fsl_chan->edma->drvdata->mux_swap)
+		ch_off += endian_diff[ch_off % 4];
+
 	muxaddr = fsl_chan->edma->muxbase[ch / chans_per_mux];
 	slot = EDMAMUX_CHCFG_SOURCE(slot);
 
diff --git a/drivers/dma/fsl-edma-common.h b/drivers/dma/fsl-edma-common.h
index 5eaa2902ed39..67e422590c9a 100644
--- a/drivers/dma/fsl-edma-common.h
+++ b/drivers/dma/fsl-edma-common.h
@@ -147,6 +147,7 @@ struct fsl_edma_drvdata {
 	enum edma_version	version;
 	u32			dmamuxs;
 	bool			has_dmaclk;
+	bool			mux_swap;
 	int			(*setup_irq)(struct platform_device *pdev,
 					     struct fsl_edma_engine *fsl_edma);
 };
diff --git a/drivers/dma/fsl-edma.c b/drivers/dma/fsl-edma.c
index b626c06ac2e0..eff7ebd8cf35 100644
--- a/drivers/dma/fsl-edma.c
+++ b/drivers/dma/fsl-edma.c
@@ -233,6 +233,13 @@ static struct fsl_edma_drvdata vf610_data = {
 	.setup_irq = fsl_edma_irq_init,
 };
 
+static struct fsl_edma_drvdata ls1028a_data = {
+	.version = v1,
+	.dmamuxs = DMAMUX_NR,
+	.mux_swap = true,
+	.setup_irq = fsl_edma_irq_init,
+};
+
 static struct fsl_edma_drvdata imx7ulp_data = {
 	.version = v3,
 	.dmamuxs = 1,
@@ -242,6 +249,7 @@ static struct fsl_edma_drvdata imx7ulp_data = {
 
 static const struct of_device_id fsl_edma_dt_ids[] = {
 	{ .compatible = "fsl,vf610-edma", .data = &vf610_data},
+	{ .compatible = "fsl,ls1028a-edma", .data = &ls1028a_data},
 	{ .compatible = "fsl,imx7ulp-edma", .data = &imx7ulp_data},
 	{ /* sentinel */ }
 };
-- 
cgit 


From 20f5a659554a6e684da0806939bddf8f6537e936 Mon Sep 17 00:00:00 2001
From: "周琰杰 (Zhou Yanjie)" <zhouyanjie@wanyeetech.com>
Date: Tue, 17 Dec 2019 21:59:00 +0800
Subject: dmaengine: JZ4780: Add support for the X1830.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add support for probing the dma-jz4780 driver on the X1830 Soc.

Signed-off-by: 周琰杰 (Zhou Yanjie) <zhouyanjie@wanyeetech.com>
Link: https://lore.kernel.org/r/1576591140-125668-4-git-send-email-zhouyanjie@wanyeetech.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/dma-jz4780.c | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'drivers/dma')

diff --git a/drivers/dma/dma-jz4780.c b/drivers/dma/dma-jz4780.c
index fa626acdc9b9..f8ee4b74dae4 100644
--- a/drivers/dma/dma-jz4780.c
+++ b/drivers/dma/dma-jz4780.c
@@ -1020,12 +1020,19 @@ static const struct jz4780_dma_soc_data x1000_dma_soc_data = {
 	.flags = JZ_SOC_DATA_PROGRAMMABLE_DMA,
 };
 
+static const struct jz4780_dma_soc_data x1830_dma_soc_data = {
+	.nb_channels = 32,
+	.transfer_ord_max = 7,
+	.flags = JZ_SOC_DATA_PROGRAMMABLE_DMA,
+};
+
 static const struct of_device_id jz4780_dma_dt_match[] = {
 	{ .compatible = "ingenic,jz4740-dma", .data = &jz4740_dma_soc_data },
 	{ .compatible = "ingenic,jz4725b-dma", .data = &jz4725b_dma_soc_data },
 	{ .compatible = "ingenic,jz4770-dma", .data = &jz4770_dma_soc_data },
 	{ .compatible = "ingenic,jz4780-dma", .data = &jz4780_dma_soc_data },
 	{ .compatible = "ingenic,x1000-dma", .data = &x1000_dma_soc_data },
+	{ .compatible = "ingenic,x1830-dma", .data = &x1830_dma_soc_data },
 	{},
 };
 MODULE_DEVICE_TABLE(of, jz4780_dma_dt_match);
-- 
cgit 


From dae7a589c18a4d979d5f14b09374e871b995ceb1 Mon Sep 17 00:00:00 2001
From: Logan Gunthorpe <logang@deltatee.com>
Date: Mon, 16 Dec 2019 12:01:16 -0700
Subject: dmaengine: Store module owner in dma_device struct

dma_chan_to_owner() dereferences the driver from the struct device to
obtain the owner and call module_[get|put](). However, if the backing
device is unbound before the dma_device is unregistered, the driver
will be cleared and this will cause a NULL pointer dereference.

Instead, store a pointer to the owner module in the dma_device struct
so the module reference can be properly put when the channel is put, even
if the backing device was destroyed first.

This change helps to support a safer unbind of DMA engines.
If the dma_device is unregistered in the driver's remove function,
there's no guarantee that there are no existing clients and a users
action may trigger the WARN_ONCE in dma_async_device_unregister()
which is unlikely to leave the system in a consistent state.
Instead, a better approach is to allow the backing driver to go away
and fail any subsequent requests to it.

Signed-off-by: Logan Gunthorpe <logang@deltatee.com>
Link: https://lore.kernel.org/r/20191216190120.21374-2-logang@deltatee.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/dmaengine.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'drivers/dma')

diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
index 03ac4b96117c..4b604086b1b3 100644
--- a/drivers/dma/dmaengine.c
+++ b/drivers/dma/dmaengine.c
@@ -179,7 +179,7 @@ __dma_device_satisfies_mask(struct dma_device *device,
 
 static struct module *dma_chan_to_owner(struct dma_chan *chan)
 {
-	return chan->device->dev->driver->owner;
+	return chan->device->owner;
 }
 
 /**
@@ -919,6 +919,8 @@ int dma_async_device_register(struct dma_device *device)
 		return -EIO;
 	}
 
+	device->owner = device->dev->driver->owner;
+
 	if (dma_has_cap(DMA_MEMCPY, device->cap_mask) && !device->device_prep_dma_memcpy) {
 		dev_err(device->dev,
 			"Device claims capability %s, but op is not defined\n",
-- 
cgit 


From 686607106f1fe163f7d017561f3622f39a291de8 Mon Sep 17 00:00:00 2001
From: Logan Gunthorpe <logang@deltatee.com>
Date: Mon, 16 Dec 2019 12:01:17 -0700
Subject: dmaengine: Call module_put() after device_free_chan_resources()

The module reference is taken to ensure the callbacks still exist
when they are called. If the channel holds the last reference to the
module, the module can disappear before device_free_chan_resources() is
called and would cause a call into free'd memory.

Signed-off-by: Logan Gunthorpe <logang@deltatee.com>
Link: https://lore.kernel.org/r/20191216190120.21374-3-logang@deltatee.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/dmaengine.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'drivers/dma')

diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
index 4b604086b1b3..776fdf535a3a 100644
--- a/drivers/dma/dmaengine.c
+++ b/drivers/dma/dmaengine.c
@@ -250,7 +250,6 @@ static void dma_chan_put(struct dma_chan *chan)
 		return;
 
 	chan->client_count--;
-	module_put(dma_chan_to_owner(chan));
 
 	/* This channel is not in use anymore, free it */
 	if (!chan->client_count && chan->device->device_free_chan_resources) {
@@ -259,6 +258,8 @@ static void dma_chan_put(struct dma_chan *chan)
 		chan->device->device_free_chan_resources(chan);
 	}
 
+	module_put(dma_chan_to_owner(chan));
+
 	/* If the channel is used via a DMA request router, free the mapping */
 	if (chan->router && chan->router->route_free) {
 		chan->router->route_free(chan->router->dev, chan->route_data);
-- 
cgit 


From 11a0fd2b3baa5e4a97197b9cd990b5d05e69d669 Mon Sep 17 00:00:00 2001
From: Logan Gunthorpe <logang@deltatee.com>
Date: Mon, 16 Dec 2019 12:01:18 -0700
Subject: dmaengine: Move dma_channel_rebalance() infrastructure up in code

So it can be called by a release function which is needed higher up in
the code. No functional changes intended.

Signed-off-by: Logan Gunthorpe <logang@deltatee.com>
Link: https://lore.kernel.org/r/20191216190120.21374-4-logang@deltatee.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/dmaengine.c | 288 ++++++++++++++++++++++++------------------------
 1 file changed, 144 insertions(+), 144 deletions(-)

(limited to 'drivers/dma')

diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
index 776fdf535a3a..1f9a6293f15a 100644
--- a/drivers/dma/dmaengine.c
+++ b/drivers/dma/dmaengine.c
@@ -164,6 +164,150 @@ static struct class dma_devclass = {
 
 /* --- client and device registration --- */
 
+/**
+ * dma_cap_mask_all - enable iteration over all operation types
+ */
+static dma_cap_mask_t dma_cap_mask_all;
+
+/**
+ * dma_chan_tbl_ent - tracks channel allocations per core/operation
+ * @chan - associated channel for this entry
+ */
+struct dma_chan_tbl_ent {
+	struct dma_chan *chan;
+};
+
+/**
+ * channel_table - percpu lookup table for memory-to-memory offload providers
+ */
+static struct dma_chan_tbl_ent __percpu *channel_table[DMA_TX_TYPE_END];
+
+static int __init dma_channel_table_init(void)
+{
+	enum dma_transaction_type cap;
+	int err = 0;
+
+	bitmap_fill(dma_cap_mask_all.bits, DMA_TX_TYPE_END);
+
+	/* 'interrupt', 'private', and 'slave' are channel capabilities,
+	 * but are not associated with an operation so they do not need
+	 * an entry in the channel_table
+	 */
+	clear_bit(DMA_INTERRUPT, dma_cap_mask_all.bits);
+	clear_bit(DMA_PRIVATE, dma_cap_mask_all.bits);
+	clear_bit(DMA_SLAVE, dma_cap_mask_all.bits);
+
+	for_each_dma_cap_mask(cap, dma_cap_mask_all) {
+		channel_table[cap] = alloc_percpu(struct dma_chan_tbl_ent);
+		if (!channel_table[cap]) {
+			err = -ENOMEM;
+			break;
+		}
+	}
+
+	if (err) {
+		pr_err("initialization failure\n");
+		for_each_dma_cap_mask(cap, dma_cap_mask_all)
+			free_percpu(channel_table[cap]);
+	}
+
+	return err;
+}
+arch_initcall(dma_channel_table_init);
+
+/**
+ * dma_chan_is_local - returns true if the channel is in the same numa-node as
+ *	the cpu
+ */
+static bool dma_chan_is_local(struct dma_chan *chan, int cpu)
+{
+	int node = dev_to_node(chan->device->dev);
+	return node == NUMA_NO_NODE ||
+		cpumask_test_cpu(cpu, cpumask_of_node(node));
+}
+
+/**
+ * min_chan - returns the channel with min count and in the same numa-node as
+ *	the cpu
+ * @cap: capability to match
+ * @cpu: cpu index which the channel should be close to
+ *
+ * If some channels are close to the given cpu, the one with the lowest
+ * reference count is returned. Otherwise, cpu is ignored and only the
+ * reference count is taken into account.
+ * Must be called under dma_list_mutex.
+ */
+static struct dma_chan *min_chan(enum dma_transaction_type cap, int cpu)
+{
+	struct dma_device *device;
+	struct dma_chan *chan;
+	struct dma_chan *min = NULL;
+	struct dma_chan *localmin = NULL;
+
+	list_for_each_entry(device, &dma_device_list, global_node) {
+		if (!dma_has_cap(cap, device->cap_mask) ||
+		    dma_has_cap(DMA_PRIVATE, device->cap_mask))
+			continue;
+		list_for_each_entry(chan, &device->channels, device_node) {
+			if (!chan->client_count)
+				continue;
+			if (!min || chan->table_count < min->table_count)
+				min = chan;
+
+			if (dma_chan_is_local(chan, cpu))
+				if (!localmin ||
+				    chan->table_count < localmin->table_count)
+					localmin = chan;
+		}
+	}
+
+	chan = localmin ? localmin : min;
+
+	if (chan)
+		chan->table_count++;
+
+	return chan;
+}
+
+/**
+ * dma_channel_rebalance - redistribute the available channels
+ *
+ * Optimize for cpu isolation (each cpu gets a dedicated channel for an
+ * operation type) in the SMP case,  and operation isolation (avoid
+ * multi-tasking channels) in the non-SMP case.  Must be called under
+ * dma_list_mutex.
+ */
+static void dma_channel_rebalance(void)
+{
+	struct dma_chan *chan;
+	struct dma_device *device;
+	int cpu;
+	int cap;
+
+	/* undo the last distribution */
+	for_each_dma_cap_mask(cap, dma_cap_mask_all)
+		for_each_possible_cpu(cpu)
+			per_cpu_ptr(channel_table[cap], cpu)->chan = NULL;
+
+	list_for_each_entry(device, &dma_device_list, global_node) {
+		if (dma_has_cap(DMA_PRIVATE, device->cap_mask))
+			continue;
+		list_for_each_entry(chan, &device->channels, device_node)
+			chan->table_count = 0;
+	}
+
+	/* don't populate the channel_table if no clients are available */
+	if (!dmaengine_ref_count)
+		return;
+
+	/* redistribute available channels */
+	for_each_dma_cap_mask(cap, dma_cap_mask_all)
+		for_each_online_cpu(cpu) {
+			chan = min_chan(cap, cpu);
+			per_cpu_ptr(channel_table[cap], cpu)->chan = chan;
+		}
+}
+
 #define dma_device_satisfies_mask(device, mask) \
 	__dma_device_satisfies_mask((device), &(mask))
 static int
@@ -289,57 +433,6 @@ enum dma_status dma_sync_wait(struct dma_chan *chan, dma_cookie_t cookie)
 }
 EXPORT_SYMBOL(dma_sync_wait);
 
-/**
- * dma_cap_mask_all - enable iteration over all operation types
- */
-static dma_cap_mask_t dma_cap_mask_all;
-
-/**
- * dma_chan_tbl_ent - tracks channel allocations per core/operation
- * @chan - associated channel for this entry
- */
-struct dma_chan_tbl_ent {
-	struct dma_chan *chan;
-};
-
-/**
- * channel_table - percpu lookup table for memory-to-memory offload providers
- */
-static struct dma_chan_tbl_ent __percpu *channel_table[DMA_TX_TYPE_END];
-
-static int __init dma_channel_table_init(void)
-{
-	enum dma_transaction_type cap;
-	int err = 0;
-
-	bitmap_fill(dma_cap_mask_all.bits, DMA_TX_TYPE_END);
-
-	/* 'interrupt', 'private', and 'slave' are channel capabilities,
-	 * but are not associated with an operation so they do not need
-	 * an entry in the channel_table
-	 */
-	clear_bit(DMA_INTERRUPT, dma_cap_mask_all.bits);
-	clear_bit(DMA_PRIVATE, dma_cap_mask_all.bits);
-	clear_bit(DMA_SLAVE, dma_cap_mask_all.bits);
-
-	for_each_dma_cap_mask(cap, dma_cap_mask_all) {
-		channel_table[cap] = alloc_percpu(struct dma_chan_tbl_ent);
-		if (!channel_table[cap]) {
-			err = -ENOMEM;
-			break;
-		}
-	}
-
-	if (err) {
-		pr_err("initialization failure\n");
-		for_each_dma_cap_mask(cap, dma_cap_mask_all)
-			free_percpu(channel_table[cap]);
-	}
-
-	return err;
-}
-arch_initcall(dma_channel_table_init);
-
 /**
  * dma_find_channel - find a channel to carry out the operation
  * @tx_type: transaction type
@@ -370,97 +463,6 @@ void dma_issue_pending_all(void)
 }
 EXPORT_SYMBOL(dma_issue_pending_all);
 
-/**
- * dma_chan_is_local - returns true if the channel is in the same numa-node as the cpu
- */
-static bool dma_chan_is_local(struct dma_chan *chan, int cpu)
-{
-	int node = dev_to_node(chan->device->dev);
-	return node == NUMA_NO_NODE ||
-		cpumask_test_cpu(cpu, cpumask_of_node(node));
-}
-
-/**
- * min_chan - returns the channel with min count and in the same numa-node as the cpu
- * @cap: capability to match
- * @cpu: cpu index which the channel should be close to
- *
- * If some channels are close to the given cpu, the one with the lowest
- * reference count is returned. Otherwise, cpu is ignored and only the
- * reference count is taken into account.
- * Must be called under dma_list_mutex.
- */
-static struct dma_chan *min_chan(enum dma_transaction_type cap, int cpu)
-{
-	struct dma_device *device;
-	struct dma_chan *chan;
-	struct dma_chan *min = NULL;
-	struct dma_chan *localmin = NULL;
-
-	list_for_each_entry(device, &dma_device_list, global_node) {
-		if (!dma_has_cap(cap, device->cap_mask) ||
-		    dma_has_cap(DMA_PRIVATE, device->cap_mask))
-			continue;
-		list_for_each_entry(chan, &device->channels, device_node) {
-			if (!chan->client_count)
-				continue;
-			if (!min || chan->table_count < min->table_count)
-				min = chan;
-
-			if (dma_chan_is_local(chan, cpu))
-				if (!localmin ||
-				    chan->table_count < localmin->table_count)
-					localmin = chan;
-		}
-	}
-
-	chan = localmin ? localmin : min;
-
-	if (chan)
-		chan->table_count++;
-
-	return chan;
-}
-
-/**
- * dma_channel_rebalance - redistribute the available channels
- *
- * Optimize for cpu isolation (each cpu gets a dedicated channel for an
- * operation type) in the SMP case,  and operation isolation (avoid
- * multi-tasking channels) in the non-SMP case.  Must be called under
- * dma_list_mutex.
- */
-static void dma_channel_rebalance(void)
-{
-	struct dma_chan *chan;
-	struct dma_device *device;
-	int cpu;
-	int cap;
-
-	/* undo the last distribution */
-	for_each_dma_cap_mask(cap, dma_cap_mask_all)
-		for_each_possible_cpu(cpu)
-			per_cpu_ptr(channel_table[cap], cpu)->chan = NULL;
-
-	list_for_each_entry(device, &dma_device_list, global_node) {
-		if (dma_has_cap(DMA_PRIVATE, device->cap_mask))
-			continue;
-		list_for_each_entry(chan, &device->channels, device_node)
-			chan->table_count = 0;
-	}
-
-	/* don't populate the channel_table if no clients are available */
-	if (!dmaengine_ref_count)
-		return;
-
-	/* redistribute available channels */
-	for_each_dma_cap_mask(cap, dma_cap_mask_all)
-		for_each_online_cpu(cpu) {
-			chan = min_chan(cap, cpu);
-			per_cpu_ptr(channel_table[cap], cpu)->chan = chan;
-		}
-}
-
 int dma_get_slave_caps(struct dma_chan *chan, struct dma_slave_caps *caps)
 {
 	struct dma_device *device;
@@ -1376,5 +1378,3 @@ static int __init dma_bus_init(void)
 	return class_register(&dma_devclass);
 }
 arch_initcall(dma_bus_init);
-
-
-- 
cgit 


From 8ad342a863590b24ce77681b7e081363fb3333f7 Mon Sep 17 00:00:00 2001
From: Logan Gunthorpe <logang@deltatee.com>
Date: Mon, 16 Dec 2019 12:01:19 -0700
Subject: dmaengine: Add reference counting to dma_device struct

Adding a reference count helps drivers to properly implement the unbind
while in use case.

References are taken and put every time a channel is allocated or freed.

Once the final reference is put, the device is removed from the
dma_device_list and a release callback function is called to signal
the driver to free the memory.

Signed-off-by: Logan Gunthorpe <logang@deltatee.com>
Link: https://lore.kernel.org/r/20191216190120.21374-5-logang@deltatee.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/dmaengine.c | 57 ++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 49 insertions(+), 8 deletions(-)

(limited to 'drivers/dma')

diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
index 1f9a6293f15a..e316abe3672d 100644
--- a/drivers/dma/dmaengine.c
+++ b/drivers/dma/dmaengine.c
@@ -342,6 +342,23 @@ static void balance_ref_count(struct dma_chan *chan)
 	}
 }
 
+static void dma_device_release(struct kref *ref)
+{
+	struct dma_device *device = container_of(ref, struct dma_device, ref);
+
+	list_del_rcu(&device->global_node);
+	dma_channel_rebalance();
+
+	if (device->device_release)
+		device->device_release(device);
+}
+
+static void dma_device_put(struct dma_device *device)
+{
+	lockdep_assert_held(&dma_list_mutex);
+	kref_put(&device->ref, dma_device_release);
+}
+
 /**
  * dma_chan_get - try to grab a dma channel's parent driver module
  * @chan - channel to grab
@@ -362,6 +379,12 @@ static int dma_chan_get(struct dma_chan *chan)
 	if (!try_module_get(owner))
 		return -ENODEV;
 
+	ret = kref_get_unless_zero(&chan->device->ref);
+	if (!ret) {
+		ret = -ENODEV;
+		goto module_put_out;
+	}
+
 	/* allocate upon first client reference */
 	if (chan->device->device_alloc_chan_resources) {
 		ret = chan->device->device_alloc_chan_resources(chan);
@@ -377,6 +400,8 @@ out:
 	return 0;
 
 err_out:
+	dma_device_put(chan->device);
+module_put_out:
 	module_put(owner);
 	return ret;
 }
@@ -402,6 +427,7 @@ static void dma_chan_put(struct dma_chan *chan)
 		chan->device->device_free_chan_resources(chan);
 	}
 
+	dma_device_put(chan->device);
 	module_put(dma_chan_to_owner(chan));
 
 	/* If the channel is used via a DMA request router, free the mapping */
@@ -837,14 +863,14 @@ EXPORT_SYMBOL(dmaengine_get);
  */
 void dmaengine_put(void)
 {
-	struct dma_device *device;
+	struct dma_device *device, *_d;
 	struct dma_chan *chan;
 
 	mutex_lock(&dma_list_mutex);
 	dmaengine_ref_count--;
 	BUG_ON(dmaengine_ref_count < 0);
 	/* drop channel references */
-	list_for_each_entry(device, &dma_device_list, global_node) {
+	list_for_each_entry_safe(device, _d, &dma_device_list, global_node) {
 		if (dma_has_cap(DMA_PRIVATE, device->cap_mask))
 			continue;
 		list_for_each_entry(chan, &device->channels, device_node)
@@ -906,6 +932,10 @@ static int get_dma_id(struct dma_device *device)
 /**
  * dma_async_device_register - registers DMA devices found
  * @device: &dma_device
+ *
+ * After calling this routine the structure should not be freed except in the
+ * device_release() callback which will be called after
+ * dma_async_device_unregister() is called and no further references are taken.
  */
 int dma_async_device_register(struct dma_device *device)
 {
@@ -999,6 +1029,12 @@ int dma_async_device_register(struct dma_device *device)
 		return -EIO;
 	}
 
+	if (!device->device_release)
+		dev_warn(device->dev,
+			 "WARN: Device release is not defined so it is not safe to unbind this driver while in use\n");
+
+	kref_init(&device->ref);
+
 	/* note: this only matters in the
 	 * CONFIG_ASYNC_TX_ENABLE_CHANNEL_SWITCH=n case
 	 */
@@ -1115,13 +1151,8 @@ void dma_async_device_unregister(struct dma_device *device)
 {
 	struct dma_chan *chan;
 
-	mutex_lock(&dma_list_mutex);
-	list_del_rcu(&device->global_node);
-	dma_channel_rebalance();
-	mutex_unlock(&dma_list_mutex);
-
 	list_for_each_entry(chan, &device->channels, device_node) {
-		WARN_ONCE(chan->client_count,
+		WARN_ONCE(!device->device_release && chan->client_count,
 			  "%s called while %d clients hold a reference\n",
 			  __func__, chan->client_count);
 		mutex_lock(&dma_list_mutex);
@@ -1130,6 +1161,16 @@ void dma_async_device_unregister(struct dma_device *device)
 		device_unregister(&chan->dev->device);
 		free_percpu(chan->local);
 	}
+
+	mutex_lock(&dma_list_mutex);
+	/*
+	 * setting DMA_PRIVATE ensures the device being torn down will not
+	 * be used in the channel_table
+	 */
+	dma_cap_set(DMA_PRIVATE, device->cap_mask);
+	dma_channel_rebalance();
+	dma_device_put(device);
+	mutex_unlock(&dma_list_mutex);
 }
 EXPORT_SYMBOL(dma_async_device_unregister);
 
-- 
cgit 


From bf453a0a18b224f1379658245c892fd9e1907580 Mon Sep 17 00:00:00 2001
From: Logan Gunthorpe <logang@deltatee.com>
Date: Mon, 16 Dec 2019 12:01:20 -0700
Subject: dmaengine: ioat: Support in-use unbind

Don't allocate memory using the devm infrastructure and instead call
kfree with the new dmaengine device_release call back. This ensures
the structures are available until the last reference is dropped.

We also need to ensure we call ioat_shutdown() in ioat_remove() so
that all the channels are quiesced and further transaction fails.

Signed-off-by: Logan Gunthorpe <logang@deltatee.com>
Acked-by: Dave Jiang <dave.jiang@intel.com>
Link: https://lore.kernel.org/r/20191216190120.21374-6-logang@deltatee.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/ioat/init.c | 38 ++++++++++++++++++++++++++------------
 1 file changed, 26 insertions(+), 12 deletions(-)

(limited to 'drivers/dma')

diff --git a/drivers/dma/ioat/init.c b/drivers/dma/ioat/init.c
index a6a6dc432db8..60e9afbb896c 100644
--- a/drivers/dma/ioat/init.c
+++ b/drivers/dma/ioat/init.c
@@ -556,10 +556,6 @@ static void ioat_dma_remove(struct ioatdma_device *ioat_dma)
 	ioat_kobject_del(ioat_dma);
 
 	dma_async_device_unregister(dma);
-
-	dma_pool_destroy(ioat_dma->completion_pool);
-
-	INIT_LIST_HEAD(&dma->channels);
 }
 
 /**
@@ -589,7 +585,7 @@ static void ioat_enumerate_channels(struct ioatdma_device *ioat_dma)
 	dev_dbg(dev, "%s: xfercap = %d\n", __func__, 1 << xfercap_log);
 
 	for (i = 0; i < dma->chancnt; i++) {
-		ioat_chan = devm_kzalloc(dev, sizeof(*ioat_chan), GFP_KERNEL);
+		ioat_chan = kzalloc(sizeof(*ioat_chan), GFP_KERNEL);
 		if (!ioat_chan)
 			break;
 
@@ -624,12 +620,16 @@ static void ioat_free_chan_resources(struct dma_chan *c)
 		return;
 
 	ioat_stop(ioat_chan);
-	ioat_reset_hw(ioat_chan);
 
-	/* Put LTR to idle */
-	if (ioat_dma->version >= IOAT_VER_3_4)
-		writeb(IOAT_CHAN_LTR_SWSEL_IDLE,
-			ioat_chan->reg_base + IOAT_CHAN_LTR_SWSEL_OFFSET);
+	if (!test_bit(IOAT_CHAN_DOWN, &ioat_chan->state)) {
+		ioat_reset_hw(ioat_chan);
+
+		/* Put LTR to idle */
+		if (ioat_dma->version >= IOAT_VER_3_4)
+			writeb(IOAT_CHAN_LTR_SWSEL_IDLE,
+			       ioat_chan->reg_base +
+			       IOAT_CHAN_LTR_SWSEL_OFFSET);
+	}
 
 	spin_lock_bh(&ioat_chan->cleanup_lock);
 	spin_lock_bh(&ioat_chan->prep_lock);
@@ -1322,16 +1322,28 @@ static struct pci_driver ioat_pci_driver = {
 	.err_handler	= &ioat_err_handler,
 };
 
+static void release_ioatdma(struct dma_device *device)
+{
+	struct ioatdma_device *d = to_ioatdma_device(device);
+	int i;
+
+	for (i = 0; i < IOAT_MAX_CHANS; i++)
+		kfree(d->idx[i]);
+
+	dma_pool_destroy(d->completion_pool);
+	kfree(d);
+}
+
 static struct ioatdma_device *
 alloc_ioatdma(struct pci_dev *pdev, void __iomem *iobase)
 {
-	struct device *dev = &pdev->dev;
-	struct ioatdma_device *d = devm_kzalloc(dev, sizeof(*d), GFP_KERNEL);
+	struct ioatdma_device *d = kzalloc(sizeof(*d), GFP_KERNEL);
 
 	if (!d)
 		return NULL;
 	d->pdev = pdev;
 	d->reg_base = iobase;
+	d->dma_dev.device_release = release_ioatdma;
 	return d;
 }
 
@@ -1400,6 +1412,8 @@ static void ioat_remove(struct pci_dev *pdev)
 	if (!device)
 		return;
 
+	ioat_shutdown(pdev);
+
 	dev_err(&pdev->dev, "Removing dma and dca services\n");
 	if (device->dca) {
 		unregister_dca_provider(device->dca, &pdev->dev);
-- 
cgit 


From 83c77940db12112646a2f74d1d21505788812d7f Mon Sep 17 00:00:00 2001
From: Vinod Koul <vkoul@kernel.org>
Date: Tue, 24 Dec 2019 10:22:15 +0530
Subject: dmaengine: move module_/dma_device_put() after route free

We call dma_device_put() and module_put() after invoking
.device_free_chan_resources callback, but we should also take care of
router devices and invoke this after .route_free callback. So move it
after .route_free

Reviewed-by: Logan Gunthorpe <logang@deltatee.com>
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/dmaengine.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'drivers/dma')

diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
index e316abe3672d..0505ea5b002f 100644
--- a/drivers/dma/dmaengine.c
+++ b/drivers/dma/dmaengine.c
@@ -427,15 +427,15 @@ static void dma_chan_put(struct dma_chan *chan)
 		chan->device->device_free_chan_resources(chan);
 	}
 
-	dma_device_put(chan->device);
-	module_put(dma_chan_to_owner(chan));
-
 	/* If the channel is used via a DMA request router, free the mapping */
 	if (chan->router && chan->router->route_free) {
 		chan->router->route_free(chan->router->dev, chan->route_data);
 		chan->router = NULL;
 		chan->route_data = NULL;
 	}
+
+	dma_device_put(chan->device);
+	module_put(dma_chan_to_owner(chan));
 }
 
 enum dma_status dma_sync_wait(struct dma_chan *chan, dma_cookie_t cookie)
-- 
cgit 


From 08baca4280d8abcf139fa8fec5b3de6f346efbae Mon Sep 17 00:00:00 2001
From: Vinod Koul <vkoul@kernel.org>
Date: Tue, 24 Dec 2019 10:26:14 +0530
Subject: dmaengine: print more meaningful error message

error log for dma_channel_table_init() failure pointed a mere
"initialization failure", which is not very helpful message, so print
additional details like function name and error code.

Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/dmaengine.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers/dma')

diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
index 0505ea5b002f..4ac77456e830 100644
--- a/drivers/dma/dmaengine.c
+++ b/drivers/dma/dmaengine.c
@@ -206,7 +206,7 @@ static int __init dma_channel_table_init(void)
 	}
 
 	if (err) {
-		pr_err("initialization failure\n");
+		pr_err("dmaengine dma_channel_table_init failure: %d\n", err);
 		for_each_dma_cap_mask(cap, dma_cap_mask_all)
 			free_percpu(channel_table[cap]);
 	}
-- 
cgit 


From 5c8aacbbb3b706a024eefc376b1d542d59de5749 Mon Sep 17 00:00:00 2001
From: Sascha Hauer <s.hauer@pengutronix.de>
Date: Mon, 16 Dec 2019 11:53:20 +0100
Subject: dmaengine: bcm2835: do not call vchan_vdesc_fini() with lock held

vchan_vdesc_fini() can't be called locked. Instead, call
vchan_terminate_vdesc() which delays the freeing of the descriptor to
vchan_synchronize().

Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
Link: https://lore.kernel.org/r/20191216105328.15198-2-s.hauer@pengutronix.de
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/bcm2835-dma.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'drivers/dma')

diff --git a/drivers/dma/bcm2835-dma.c b/drivers/dma/bcm2835-dma.c
index e4c593f48575..4768ef26013b 100644
--- a/drivers/dma/bcm2835-dma.c
+++ b/drivers/dma/bcm2835-dma.c
@@ -797,10 +797,7 @@ static int bcm2835_dma_terminate_all(struct dma_chan *chan)
 
 	/* stop DMA activity */
 	if (c->desc) {
-		if (c->desc->vd.tx.flags & DMA_PREP_INTERRUPT)
-			vchan_terminate_vdesc(&c->desc->vd);
-		else
-			vchan_vdesc_fini(&c->desc->vd);
+		vchan_terminate_vdesc(&c->desc->vd);
 		c->desc = NULL;
 		bcm2835_dma_abort(c);
 	}
-- 
cgit 


From 51fe9cd2bd92d95fc200385187d346e293ceefc9 Mon Sep 17 00:00:00 2001
From: Sascha Hauer <s.hauer@pengutronix.de>
Date: Mon, 16 Dec 2019 11:53:21 +0100
Subject: dmaengine: virt-dma: Add missing locking

Originally freeing descriptors was split into a locked and an unlocked
part. The locked part in vchan_get_all_descriptors() collected all
descriptors on a separate list_head. This was done to allow iterating
over that new list in vchan_dma_desc_free_list() without a lock held.

This became broken in 13bb26ae8850 ("dmaengine: virt-dma: don't always
free descriptor upon completion"). With this commit
vchan_dma_desc_free_list() no longer exclusively operates on the
separate list, but starts to put descriptors which can be reused back on
&vc->desc_allocated. This list operation should have been locked, but
wasn't.
In the mean time drivers started to call vchan_dma_desc_free_list() with
their lock held so that we now have the situation that
vchan_dma_desc_free_list() is called locked from some drivers and
unlocked from others.
To clean this up we have to do two things:

1. Add missing locking in vchan_dma_desc_free_list()
2. Make sure drivers call vchan_dma_desc_free_list() unlocked

This needs to be done atomically, so in this patch the locking is added
and all drivers are fixed.

Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
Reviewed-by: Peter Ujfalusi <peter.ujfalusi@ti.com>
Tested-by: Peter Ujfalusi <peter.ujfalusi@ti.com>
Reviewed-by: Green Wan <green.wan@sifive.com>
Tested-by: Green Wan <green.wan@sifive.com>
Link: https://lore.kernel.org/r/20191216105328.15198-3-s.hauer@pengutronix.de
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/dw-axi-dmac/dw-axi-dmac-platform.c |  8 ++------
 drivers/dma/mediatek/mtk-uart-apdma.c          |  3 ++-
 drivers/dma/owl-dma.c                          |  3 ++-
 drivers/dma/s3c24xx-dma.c                      | 22 +++++++++++-----------
 drivers/dma/sf-pdma/sf-pdma.c                  |  4 ++--
 drivers/dma/sun4i-dma.c                        |  3 ++-
 drivers/dma/virt-dma.c                         |  4 ++++
 7 files changed, 25 insertions(+), 22 deletions(-)

(limited to 'drivers/dma')

diff --git a/drivers/dma/dw-axi-dmac/dw-axi-dmac-platform.c b/drivers/dma/dw-axi-dmac/dw-axi-dmac-platform.c
index a1ce307c502f..14c1ac26f866 100644
--- a/drivers/dma/dw-axi-dmac/dw-axi-dmac-platform.c
+++ b/drivers/dma/dw-axi-dmac/dw-axi-dmac-platform.c
@@ -636,14 +636,10 @@ static int dma_chan_terminate_all(struct dma_chan *dchan)
 
 	vchan_get_all_descriptors(&chan->vc, &head);
 
-	/*
-	 * As vchan_dma_desc_free_list can access to desc_allocated list
-	 * we need to call it in vc.lock context.
-	 */
-	vchan_dma_desc_free_list(&chan->vc, &head);
-
 	spin_unlock_irqrestore(&chan->vc.lock, flags);
 
+	vchan_dma_desc_free_list(&chan->vc, &head);
+
 	dev_vdbg(dchan2dev(dchan), "terminated: %s\n", axi_chan_name(chan));
 
 	return 0;
diff --git a/drivers/dma/mediatek/mtk-uart-apdma.c b/drivers/dma/mediatek/mtk-uart-apdma.c
index c20e6bd4e298..29f1223b285a 100644
--- a/drivers/dma/mediatek/mtk-uart-apdma.c
+++ b/drivers/dma/mediatek/mtk-uart-apdma.c
@@ -430,9 +430,10 @@ static int mtk_uart_apdma_terminate_all(struct dma_chan *chan)
 
 	spin_lock_irqsave(&c->vc.lock, flags);
 	vchan_get_all_descriptors(&c->vc, &head);
-	vchan_dma_desc_free_list(&c->vc, &head);
 	spin_unlock_irqrestore(&c->vc.lock, flags);
 
+	vchan_dma_desc_free_list(&c->vc, &head);
+
 	return 0;
 }
 
diff --git a/drivers/dma/owl-dma.c b/drivers/dma/owl-dma.c
index 023f951189a7..c683051257fd 100644
--- a/drivers/dma/owl-dma.c
+++ b/drivers/dma/owl-dma.c
@@ -674,10 +674,11 @@ static int owl_dma_terminate_all(struct dma_chan *chan)
 	}
 
 	vchan_get_all_descriptors(&vchan->vc, &head);
-	vchan_dma_desc_free_list(&vchan->vc, &head);
 
 	spin_unlock_irqrestore(&vchan->vc.lock, flags);
 
+	vchan_dma_desc_free_list(&vchan->vc, &head);
+
 	return 0;
 }
 
diff --git a/drivers/dma/s3c24xx-dma.c b/drivers/dma/s3c24xx-dma.c
index 43da8eeb18ef..1ed5dc1f597c 100644
--- a/drivers/dma/s3c24xx-dma.c
+++ b/drivers/dma/s3c24xx-dma.c
@@ -519,15 +519,6 @@ static void s3c24xx_dma_start_next_txd(struct s3c24xx_dma_chan *s3cchan)
 	s3c24xx_dma_start_next_sg(s3cchan, txd);
 }
 
-static void s3c24xx_dma_free_txd_list(struct s3c24xx_dma_engine *s3cdma,
-				struct s3c24xx_dma_chan *s3cchan)
-{
-	LIST_HEAD(head);
-
-	vchan_get_all_descriptors(&s3cchan->vc, &head);
-	vchan_dma_desc_free_list(&s3cchan->vc, &head);
-}
-
 /*
  * Try to allocate a physical channel.  When successful, assign it to
  * this virtual channel, and initiate the next descriptor.  The
@@ -709,8 +700,9 @@ static int s3c24xx_dma_terminate_all(struct dma_chan *chan)
 {
 	struct s3c24xx_dma_chan *s3cchan = to_s3c24xx_dma_chan(chan);
 	struct s3c24xx_dma_engine *s3cdma = s3cchan->host;
+	LIST_HEAD(head);
 	unsigned long flags;
-	int ret = 0;
+	int ret;
 
 	spin_lock_irqsave(&s3cchan->vc.lock, flags);
 
@@ -734,7 +726,15 @@ static int s3c24xx_dma_terminate_all(struct dma_chan *chan)
 	}
 
 	/* Dequeue jobs not yet fired as well */
-	s3c24xx_dma_free_txd_list(s3cdma, s3cchan);
+
+	vchan_get_all_descriptors(&s3cchan->vc, &head);
+
+	spin_unlock_irqrestore(&s3cchan->vc.lock, flags);
+
+	vchan_dma_desc_free_list(&s3cchan->vc, &head);
+
+	return 0;
+
 unlock:
 	spin_unlock_irqrestore(&s3cchan->vc.lock, flags);
 
diff --git a/drivers/dma/sf-pdma/sf-pdma.c b/drivers/dma/sf-pdma/sf-pdma.c
index 465256fe8b1f..6d0bec947636 100644
--- a/drivers/dma/sf-pdma/sf-pdma.c
+++ b/drivers/dma/sf-pdma/sf-pdma.c
@@ -155,9 +155,9 @@ static void sf_pdma_free_chan_resources(struct dma_chan *dchan)
 	kfree(chan->desc);
 	chan->desc = NULL;
 	vchan_get_all_descriptors(&chan->vchan, &head);
-	vchan_dma_desc_free_list(&chan->vchan, &head);
 	sf_pdma_disclaim_chan(chan);
 	spin_unlock_irqrestore(&chan->vchan.lock, flags);
+	vchan_dma_desc_free_list(&chan->vchan, &head);
 }
 
 static size_t sf_pdma_desc_residue(struct sf_pdma_chan *chan,
@@ -220,8 +220,8 @@ static int sf_pdma_terminate_all(struct dma_chan *dchan)
 	chan->desc = NULL;
 	chan->xfer_err = false;
 	vchan_get_all_descriptors(&chan->vchan, &head);
-	vchan_dma_desc_free_list(&chan->vchan, &head);
 	spin_unlock_irqrestore(&chan->vchan.lock, flags);
+	vchan_dma_desc_free_list(&chan->vchan, &head);
 
 	return 0;
 }
diff --git a/drivers/dma/sun4i-dma.c b/drivers/dma/sun4i-dma.c
index e397a50058c8..4e1575e731d8 100644
--- a/drivers/dma/sun4i-dma.c
+++ b/drivers/dma/sun4i-dma.c
@@ -885,12 +885,13 @@ static int sun4i_dma_terminate_all(struct dma_chan *chan)
 	}
 
 	spin_lock_irqsave(&vchan->vc.lock, flags);
-	vchan_dma_desc_free_list(&vchan->vc, &head);
 	/* Clear these so the vchan is usable again */
 	vchan->processing = NULL;
 	vchan->pchan = NULL;
 	spin_unlock_irqrestore(&vchan->vc.lock, flags);
 
+	vchan_dma_desc_free_list(&vchan->vc, &head);
+
 	return 0;
 }
 
diff --git a/drivers/dma/virt-dma.c b/drivers/dma/virt-dma.c
index ec4adf4260a0..660267ca5e42 100644
--- a/drivers/dma/virt-dma.c
+++ b/drivers/dma/virt-dma.c
@@ -116,7 +116,11 @@ void vchan_dma_desc_free_list(struct virt_dma_chan *vc, struct list_head *head)
 
 	list_for_each_entry_safe(vd, _vd, head, node) {
 		if (dmaengine_desc_test_reuse(&vd->tx)) {
+			unsigned long flags;
+
+			spin_lock_irqsave(&vc->lock, flags);
 			list_move_tail(&vd->node, &vc->desc_allocated);
+			spin_unlock_irqrestore(&vc->lock, flags);
 		} else {
 			dev_dbg(vc->chan.device->dev, "txd %p: freeing\n", vd);
 			list_del(&vd->node);
-- 
cgit 


From df660a2b6dfcf26001af6cd2441268c29416dd64 Mon Sep 17 00:00:00 2001
From: Sascha Hauer <s.hauer@pengutronix.de>
Date: Mon, 16 Dec 2019 11:53:22 +0100
Subject: dmaengine: virt-dma: remove debug message

vchan_dma_desc_free_list() basically open codes vchan_vdesc_fini() in
the loop body. One difference is an additional debug message. As this
isn't overly useful remove it.

Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
Reviewed-by: Peter Ujfalusi <peter.ujfalusi@ti.com>
Tested-by: Peter Ujfalusi <peter.ujfalusi@ti.com>
Link: https://lore.kernel.org/r/20191216105328.15198-4-s.hauer@pengutronix.de
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/virt-dma.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'drivers/dma')

diff --git a/drivers/dma/virt-dma.c b/drivers/dma/virt-dma.c
index 660267ca5e42..7ba712888ac7 100644
--- a/drivers/dma/virt-dma.c
+++ b/drivers/dma/virt-dma.c
@@ -122,7 +122,6 @@ void vchan_dma_desc_free_list(struct virt_dma_chan *vc, struct list_head *head)
 			list_move_tail(&vd->node, &vc->desc_allocated);
 			spin_unlock_irqrestore(&vc->lock, flags);
 		} else {
-			dev_dbg(vc->chan.device->dev, "txd %p: freeing\n", vd);
 			list_del(&vd->node);
 			vc->desc_free(vd);
 		}
-- 
cgit 


From f882101122aa110ef8e64be04472e5a47030300e Mon Sep 17 00:00:00 2001
From: Sascha Hauer <s.hauer@pengutronix.de>
Date: Mon, 16 Dec 2019 11:53:23 +0100
Subject: dmaengine: virt-dma: Do not call desc_free() under a spin_lock

vchan_vdesc_fini() shouldn't be called under a spin_lock. This is done
in two places, once in vchan_terminate_vdesc() and once in
vchan_synchronize(). Instead of freeing the vdesc right away, collect
the aborted vdescs on a separate list and free them along with the other
vdescs. The terminated descs are also freed in vchan_synchronize as done
before this patch.

Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
Reviewed-by: Peter Ujfalusi <peter.ujfalusi@ti.com>
Tested-by: Peter Ujfalusi <peter.ujfalusi@ti.com>
Link: https://lore.kernel.org/r/20191216105328.15198-5-s.hauer@pengutronix.de
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/virt-dma.c |  1 +
 drivers/dma/virt-dma.h | 18 +++++++++---------
 2 files changed, 10 insertions(+), 9 deletions(-)

(limited to 'drivers/dma')

diff --git a/drivers/dma/virt-dma.c b/drivers/dma/virt-dma.c
index 7ba712888ac7..26e08c7a7465 100644
--- a/drivers/dma/virt-dma.c
+++ b/drivers/dma/virt-dma.c
@@ -138,6 +138,7 @@ void vchan_init(struct virt_dma_chan *vc, struct dma_device *dmadev)
 	INIT_LIST_HEAD(&vc->desc_submitted);
 	INIT_LIST_HEAD(&vc->desc_issued);
 	INIT_LIST_HEAD(&vc->desc_completed);
+	INIT_LIST_HEAD(&vc->desc_terminated);
 
 	tasklet_init(&vc->task, vchan_complete, (unsigned long)vc);
 
diff --git a/drivers/dma/virt-dma.h b/drivers/dma/virt-dma.h
index ab158bac03a7..e213137b6bc1 100644
--- a/drivers/dma/virt-dma.h
+++ b/drivers/dma/virt-dma.h
@@ -31,9 +31,9 @@ struct virt_dma_chan {
 	struct list_head desc_submitted;
 	struct list_head desc_issued;
 	struct list_head desc_completed;
+	struct list_head desc_terminated;
 
 	struct virt_dma_desc *cyclic;
-	struct virt_dma_desc *vd_terminated;
 };
 
 static inline struct virt_dma_chan *to_virt_chan(struct dma_chan *chan)
@@ -141,11 +141,8 @@ static inline void vchan_terminate_vdesc(struct virt_dma_desc *vd)
 {
 	struct virt_dma_chan *vc = to_virt_chan(vd->tx.chan);
 
-	/* free up stuck descriptor */
-	if (vc->vd_terminated)
-		vchan_vdesc_fini(vc->vd_terminated);
+	list_add_tail(&vd->node, &vc->desc_terminated);
 
-	vc->vd_terminated = vd;
 	if (vc->cyclic == vd)
 		vc->cyclic = NULL;
 }
@@ -179,6 +176,7 @@ static inline void vchan_get_all_descriptors(struct virt_dma_chan *vc,
 	list_splice_tail_init(&vc->desc_submitted, head);
 	list_splice_tail_init(&vc->desc_issued, head);
 	list_splice_tail_init(&vc->desc_completed, head);
+	list_splice_tail_init(&vc->desc_terminated, head);
 }
 
 static inline void vchan_free_chan_resources(struct virt_dma_chan *vc)
@@ -207,16 +205,18 @@ static inline void vchan_free_chan_resources(struct virt_dma_chan *vc)
  */
 static inline void vchan_synchronize(struct virt_dma_chan *vc)
 {
+	LIST_HEAD(head);
 	unsigned long flags;
 
 	tasklet_kill(&vc->task);
 
 	spin_lock_irqsave(&vc->lock, flags);
-	if (vc->vd_terminated) {
-		vchan_vdesc_fini(vc->vd_terminated);
-		vc->vd_terminated = NULL;
-	}
+
+	list_splice_tail_init(&vc->desc_terminated, &head);
+
 	spin_unlock_irqrestore(&vc->lock, flags);
+
+	vchan_dma_desc_free_list(vc, &head);
 }
 
 #endif
-- 
cgit 


From 9f91e6bfa8666270565fb76d883db2ba2e441585 Mon Sep 17 00:00:00 2001
From: Sascha Hauer <s.hauer@pengutronix.de>
Date: Mon, 16 Dec 2019 11:53:24 +0100
Subject: dmaengine: virt-dma: Add missing locking around list operations

All list operations are protected by &vc->lock. As vchan_vdesc_fini()
is called unlocked add the missing locking around the list operations.

Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
Reviewed-by: Peter Ujfalusi <peter.ujfalusi@ti.com>
Tested-by: Peter Ujfalusi <peter.ujfalusi@ti.com>
Link: https://lore.kernel.org/r/20191216105328.15198-6-s.hauer@pengutronix.de
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/virt-dma.h | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

(limited to 'drivers/dma')

diff --git a/drivers/dma/virt-dma.h b/drivers/dma/virt-dma.h
index e213137b6bc1..e9f5250fbe4d 100644
--- a/drivers/dma/virt-dma.h
+++ b/drivers/dma/virt-dma.h
@@ -113,10 +113,15 @@ static inline void vchan_vdesc_fini(struct virt_dma_desc *vd)
 {
 	struct virt_dma_chan *vc = to_virt_chan(vd->tx.chan);
 
-	if (dmaengine_desc_test_reuse(&vd->tx))
+	if (dmaengine_desc_test_reuse(&vd->tx)) {
+		unsigned long flags;
+
+		spin_lock_irqsave(&vc->lock, flags);
 		list_add(&vd->node, &vc->desc_allocated);
-	else
+		spin_unlock_irqrestore(&vc->lock, flags);
+	} else {
 		vc->desc_free(vd);
+	}
 }
 
 /**
-- 
cgit 


From b167f94d2928f3516176c8107cb698a6d9bb9911 Mon Sep 17 00:00:00 2001
From: Sascha Hauer <s.hauer@pengutronix.de>
Date: Mon, 16 Dec 2019 11:53:25 +0100
Subject: dmaengine: virt-dma: use vchan_vdesc_fini() to free descriptors

vchan_dma_desc_free_list() basically open codes vchan_vdesc_fini() in its
loop body. Call it directly rather than duplicating the code.

Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
Reviewed-by: Peter Ujfalusi <peter.ujfalusi@ti.com>
Tested-by: Peter Ujfalusi <peter.ujfalusi@ti.com>
Link: https://lore.kernel.org/r/20191216105328.15198-7-s.hauer@pengutronix.de
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/virt-dma.c | 12 ++----------
 1 file changed, 2 insertions(+), 10 deletions(-)

(limited to 'drivers/dma')

diff --git a/drivers/dma/virt-dma.c b/drivers/dma/virt-dma.c
index 26e08c7a7465..95dfe431777e 100644
--- a/drivers/dma/virt-dma.c
+++ b/drivers/dma/virt-dma.c
@@ -115,16 +115,8 @@ void vchan_dma_desc_free_list(struct virt_dma_chan *vc, struct list_head *head)
 	struct virt_dma_desc *vd, *_vd;
 
 	list_for_each_entry_safe(vd, _vd, head, node) {
-		if (dmaengine_desc_test_reuse(&vd->tx)) {
-			unsigned long flags;
-
-			spin_lock_irqsave(&vc->lock, flags);
-			list_move_tail(&vd->node, &vc->desc_allocated);
-			spin_unlock_irqrestore(&vc->lock, flags);
-		} else {
-			list_del(&vd->node);
-			vc->desc_free(vd);
-		}
+		list_del(&vd->node);
+		vchan_vdesc_fini(vd);
 	}
 }
 EXPORT_SYMBOL_GPL(vchan_dma_desc_free_list);
-- 
cgit 


From a80f2787f1ff0fd068fdcb8fd949a992029b6f25 Mon Sep 17 00:00:00 2001
From: Sascha Hauer <s.hauer@pengutronix.de>
Date: Mon, 16 Dec 2019 11:53:26 +0100
Subject: dmaengine: imx-sdma: rename function

Rename sdma_disable_channel_async() after the hook it implements, like
done for all other functions in the SDMA driver.

Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
Link: https://lore.kernel.org/r/20191216105328.15198-8-s.hauer@pengutronix.de
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/imx-sdma.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'drivers/dma')

diff --git a/drivers/dma/imx-sdma.c b/drivers/dma/imx-sdma.c
index c27e206a764c..527f8a81f50b 100644
--- a/drivers/dma/imx-sdma.c
+++ b/drivers/dma/imx-sdma.c
@@ -1077,7 +1077,7 @@ static void sdma_channel_terminate_work(struct work_struct *work)
 	sdmac->context_loaded = false;
 }
 
-static int sdma_disable_channel_async(struct dma_chan *chan)
+static int sdma_terminate_all(struct dma_chan *chan)
 {
 	struct sdma_channel *sdmac = to_sdma_chan(chan);
 
@@ -1324,7 +1324,7 @@ static void sdma_free_chan_resources(struct dma_chan *chan)
 	struct sdma_channel *sdmac = to_sdma_chan(chan);
 	struct sdma_engine *sdma = sdmac->sdma;
 
-	sdma_disable_channel_async(chan);
+	sdma_terminate_all(chan);
 
 	sdma_channel_synchronize(chan);
 
@@ -2103,7 +2103,7 @@ static int sdma_probe(struct platform_device *pdev)
 	sdma->dma_device.device_prep_slave_sg = sdma_prep_slave_sg;
 	sdma->dma_device.device_prep_dma_cyclic = sdma_prep_dma_cyclic;
 	sdma->dma_device.device_config = sdma_config;
-	sdma->dma_device.device_terminate_all = sdma_disable_channel_async;
+	sdma->dma_device.device_terminate_all = sdma_terminate_all;
 	sdma->dma_device.device_synchronize = sdma_channel_synchronize;
 	sdma->dma_device.src_addr_widths = SDMA_DMA_BUSWIDTHS;
 	sdma->dma_device.dst_addr_widths = SDMA_DMA_BUSWIDTHS;
-- 
cgit 


From a1ff6a07f5a3951fcac84f064a76d1ad79c10e40 Mon Sep 17 00:00:00 2001
From: Sascha Hauer <s.hauer@pengutronix.de>
Date: Mon, 16 Dec 2019 11:53:27 +0100
Subject: dmaengine: imx-sdma: find desc first in sdma_tx_status

In sdma_tx_status() we must first find the current sdma_desc. In cyclic
mode we assume that this can always be found with vchan_find_desc().
This is true because do not remove the current descriptor from the
desc_issued list:

	/*
	 * Do not delete the node in desc_issued list in cyclic mode, otherwise
	 * the desc allocated will never be freed in vchan_dma_desc_free_list
	 */
	if (!(sdmac->flags & IMX_DMA_SG_LOOP))
		list_del(&vd->node);

We will change this in the next step, so check if the current descriptor is
the desired one also for the cyclic case.

Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
Link: https://lore.kernel.org/r/20191216105328.15198-9-s.hauer@pengutronix.de
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/imx-sdma.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

(limited to 'drivers/dma')

diff --git a/drivers/dma/imx-sdma.c b/drivers/dma/imx-sdma.c
index 527f8a81f50b..99dbfd9039cf 100644
--- a/drivers/dma/imx-sdma.c
+++ b/drivers/dma/imx-sdma.c
@@ -1648,7 +1648,7 @@ static enum dma_status sdma_tx_status(struct dma_chan *chan,
 				      struct dma_tx_state *txstate)
 {
 	struct sdma_channel *sdmac = to_sdma_chan(chan);
-	struct sdma_desc *desc;
+	struct sdma_desc *desc = NULL;
 	u32 residue;
 	struct virt_dma_desc *vd;
 	enum dma_status ret;
@@ -1659,19 +1659,23 @@ static enum dma_status sdma_tx_status(struct dma_chan *chan,
 		return ret;
 
 	spin_lock_irqsave(&sdmac->vc.lock, flags);
+
 	vd = vchan_find_desc(&sdmac->vc, cookie);
-	if (vd) {
+	if (vd)
 		desc = to_sdma_desc(&vd->tx);
+	else if (sdmac->desc && sdmac->desc->vd.tx.cookie == cookie)
+		desc = sdmac->desc;
+
+	if (desc) {
 		if (sdmac->flags & IMX_DMA_SG_LOOP)
 			residue = (desc->num_bd - desc->buf_ptail) *
 				desc->period_len - desc->chn_real_count;
 		else
 			residue = desc->chn_count - desc->chn_real_count;
-	} else if (sdmac->desc && sdmac->desc->vd.tx.cookie == cookie) {
-		residue = sdmac->desc->chn_count - sdmac->desc->chn_real_count;
 	} else {
 		residue = 0;
 	}
+
 	spin_unlock_irqrestore(&sdmac->vc.lock, flags);
 
 	dma_set_tx_state(txstate, chan->completed_cookie, chan->cookie,
-- 
cgit 


From 02939cd167095f16328a1bd5cab5a90b550606df Mon Sep 17 00:00:00 2001
From: Sascha Hauer <s.hauer@pengutronix.de>
Date: Mon, 16 Dec 2019 11:53:28 +0100
Subject: dmaengine: imx-sdma: Fix memory leak

The current descriptor is not on any list of the virtual DMA channel.
Once sdma_terminate_all() is called when a descriptor is currently
in flight then this one is forgotten to be freed. We have to call
vchan_terminate_vdesc() on this descriptor to re-add it to the lists.
Now that we also free the currently running descriptor we can (and
actually have to) remove the current descriptor from its list also
for the cyclic case.

Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
Reviewed-by: Robin Gong <yibin.gong@nxp.com>
Tested-by: Robin Gong <yibin.gong@nxp.com>
Link: https://lore.kernel.org/r/20191216105328.15198-10-s.hauer@pengutronix.de
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/imx-sdma.c | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

(limited to 'drivers/dma')

diff --git a/drivers/dma/imx-sdma.c b/drivers/dma/imx-sdma.c
index 99dbfd9039cf..066b21a32232 100644
--- a/drivers/dma/imx-sdma.c
+++ b/drivers/dma/imx-sdma.c
@@ -760,12 +760,8 @@ static void sdma_start_desc(struct sdma_channel *sdmac)
 		return;
 	}
 	sdmac->desc = desc = to_sdma_desc(&vd->tx);
-	/*
-	 * Do not delete the node in desc_issued list in cyclic mode, otherwise
-	 * the desc allocated will never be freed in vchan_dma_desc_free_list
-	 */
-	if (!(sdmac->flags & IMX_DMA_SG_LOOP))
-		list_del(&vd->node);
+
+	list_del(&vd->node);
 
 	sdma->channel_control[channel].base_bd_ptr = desc->bd_phys;
 	sdma->channel_control[channel].current_bd_ptr = desc->bd_phys;
@@ -1071,7 +1067,6 @@ static void sdma_channel_terminate_work(struct work_struct *work)
 
 	spin_lock_irqsave(&sdmac->vc.lock, flags);
 	vchan_get_all_descriptors(&sdmac->vc, &head);
-	sdmac->desc = NULL;
 	spin_unlock_irqrestore(&sdmac->vc.lock, flags);
 	vchan_dma_desc_free_list(&sdmac->vc, &head);
 	sdmac->context_loaded = false;
@@ -1080,11 +1075,19 @@ static void sdma_channel_terminate_work(struct work_struct *work)
 static int sdma_terminate_all(struct dma_chan *chan)
 {
 	struct sdma_channel *sdmac = to_sdma_chan(chan);
+	unsigned long flags;
+
+	spin_lock_irqsave(&sdmac->vc.lock, flags);
 
 	sdma_disable_channel(chan);
 
-	if (sdmac->desc)
+	if (sdmac->desc) {
+		vchan_terminate_vdesc(&sdmac->desc->vd);
+		sdmac->desc = NULL;
 		schedule_work(&sdmac->terminate_worker);
+	}
+
+	spin_unlock_irqrestore(&sdmac->vc.lock, flags);
 
 	return 0;
 }
-- 
cgit 


From 905ca51e63be794331e09ff74ccddd44393d3378 Mon Sep 17 00:00:00 2001
From: Logan Gunthorpe <logang@deltatee.com>
Date: Fri, 3 Jan 2020 14:20:19 -0700
Subject: dmaengine: plx-dma: Introduce PLX DMA engine PCI driver skeleton

Some PLX Switches can expose DMA engines via extra PCI functions
on the upstream port. Each function will have one DMA channel.

This patch is just the core PCI driver skeleton and dma
engine registration.

Signed-off-by: Logan Gunthorpe <logang@deltatee.com>
Link: https://lore.kernel.org/r/20200103212021.2881-2-logang@deltatee.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/Kconfig   |   9 +++
 drivers/dma/Makefile  |   1 +
 drivers/dma/plx_dma.c | 150 ++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 160 insertions(+)
 create mode 100644 drivers/dma/plx_dma.c

(limited to 'drivers/dma')

diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index 6fa1eba9d477..312a6cc36c78 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -497,6 +497,15 @@ config PXA_DMA
 	  16 to 32 channels for peripheral to memory or memory to memory
 	  transfers.
 
+config PLX_DMA
+	tristate "PLX ExpressLane PEX Switch DMA Engine Support"
+	depends on PCI
+	select DMA_ENGINE
+	help
+	  Some PLX ExpressLane PCI Switches support additional DMA engines.
+	  These are exposed via extra functions on the switch's
+	  upstream port. Each function exposes one DMA channel.
+
 config SIRF_DMA
 	tristate "CSR SiRFprimaII/SiRFmarco DMA support"
 	depends on ARCH_SIRF
diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile
index 42d7e2fc64fa..a150d1d792fd 100644
--- a/drivers/dma/Makefile
+++ b/drivers/dma/Makefile
@@ -59,6 +59,7 @@ obj-$(CONFIG_NBPFAXI_DMA) += nbpfaxi.o
 obj-$(CONFIG_OWL_DMA) += owl-dma.o
 obj-$(CONFIG_PCH_DMA) += pch_dma.o
 obj-$(CONFIG_PL330_DMA) += pl330.o
+obj-$(CONFIG_PLX_DMA) += plx_dma.o
 obj-$(CONFIG_PPC_BESTCOMM) += bestcomm/
 obj-$(CONFIG_PXA_DMA) += pxa_dma.o
 obj-$(CONFIG_RENESAS_DMA) += sh/
diff --git a/drivers/dma/plx_dma.c b/drivers/dma/plx_dma.c
new file mode 100644
index 000000000000..e002cbb7d2b6
--- /dev/null
+++ b/drivers/dma/plx_dma.c
@@ -0,0 +1,150 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Microsemi Switchtec(tm) PCIe Management Driver
+ * Copyright (c) 2019, Logan Gunthorpe <logang@deltatee.com>
+ * Copyright (c) 2019, GigaIO Networks, Inc
+ */
+
+#include "dmaengine.h"
+
+#include <linux/dmaengine.h>
+#include <linux/kref.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+
+MODULE_DESCRIPTION("PLX ExpressLane PEX PCI Switch DMA Engine");
+MODULE_VERSION("0.1");
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Logan Gunthorpe");
+
+struct plx_dma_dev {
+	struct dma_device dma_dev;
+	struct dma_chan dma_chan;
+	void __iomem *bar;
+};
+
+static void plx_dma_release(struct dma_device *dma_dev)
+{
+	struct plx_dma_dev *plxdev =
+		container_of(dma_dev, struct plx_dma_dev, dma_dev);
+
+	put_device(dma_dev->dev);
+	kfree(plxdev);
+}
+
+static int plx_dma_create(struct pci_dev *pdev)
+{
+	struct plx_dma_dev *plxdev;
+	struct dma_device *dma;
+	struct dma_chan *chan;
+	int rc;
+
+	plxdev = kzalloc(sizeof(*plxdev), GFP_KERNEL);
+	if (!plxdev)
+		return -ENOMEM;
+
+	plxdev->bar = pcim_iomap_table(pdev)[0];
+
+	dma = &plxdev->dma_dev;
+	dma->chancnt = 1;
+	INIT_LIST_HEAD(&dma->channels);
+	dma->copy_align = DMAENGINE_ALIGN_1_BYTE;
+	dma->dev = get_device(&pdev->dev);
+
+	dma->device_release = plx_dma_release;
+
+	chan = &plxdev->dma_chan;
+	chan->device = dma;
+	dma_cookie_init(chan);
+	list_add_tail(&chan->device_node, &dma->channels);
+
+	rc = dma_async_device_register(dma);
+	if (rc) {
+		pci_err(pdev, "Failed to register dma device: %d\n", rc);
+		free_irq(pci_irq_vector(pdev, 0),  plxdev);
+		kfree(plxdev);
+		return rc;
+	}
+
+	pci_set_drvdata(pdev, plxdev);
+
+	return 0;
+}
+
+static int plx_dma_probe(struct pci_dev *pdev,
+			 const struct pci_device_id *id)
+{
+	int rc;
+
+	rc = pcim_enable_device(pdev);
+	if (rc)
+		return rc;
+
+	rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(48));
+	if (rc)
+		rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+	if (rc)
+		return rc;
+
+	rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(48));
+	if (rc)
+		rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
+	if (rc)
+		return rc;
+
+	rc = pcim_iomap_regions(pdev, 1, KBUILD_MODNAME);
+	if (rc)
+		return rc;
+
+	rc = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_ALL_TYPES);
+	if (rc <= 0)
+		return rc;
+
+	pci_set_master(pdev);
+
+	rc = plx_dma_create(pdev);
+	if (rc)
+		goto err_free_irq_vectors;
+
+	pci_info(pdev, "PLX DMA Channel Registered\n");
+
+	return 0;
+
+err_free_irq_vectors:
+	pci_free_irq_vectors(pdev);
+	return rc;
+}
+
+static void plx_dma_remove(struct pci_dev *pdev)
+{
+	struct plx_dma_dev *plxdev = pci_get_drvdata(pdev);
+
+	free_irq(pci_irq_vector(pdev, 0),  plxdev);
+
+	plxdev->bar = NULL;
+	dma_async_device_unregister(&plxdev->dma_dev);
+
+	pci_free_irq_vectors(pdev);
+}
+
+static const struct pci_device_id plx_dma_pci_tbl[] = {
+	{
+		.vendor		= PCI_VENDOR_ID_PLX,
+		.device		= 0x87D0,
+		.subvendor	= PCI_ANY_ID,
+		.subdevice	= PCI_ANY_ID,
+		.class		= PCI_CLASS_SYSTEM_OTHER << 8,
+		.class_mask	= 0xFFFFFFFF,
+	},
+	{0}
+};
+MODULE_DEVICE_TABLE(pci, plx_dma_pci_tbl);
+
+static struct pci_driver plx_dma_pci_driver = {
+	.name           = KBUILD_MODNAME,
+	.id_table       = plx_dma_pci_tbl,
+	.probe          = plx_dma_probe,
+	.remove		= plx_dma_remove,
+};
+module_pci_driver(plx_dma_pci_driver);
-- 
cgit 


From c2dbcaa8c672d7ecc96c7a62fd6a34c9d3821bdb Mon Sep 17 00:00:00 2001
From: Logan Gunthorpe <logang@deltatee.com>
Date: Fri, 3 Jan 2020 14:20:20 -0700
Subject: dmaengine: plx-dma: Implement hardware initialization and cleanup

Allocate DMA coherent memory for the ring of DMA descriptors and
program the appropriate hardware registers.

A tasklet is created which is triggered on an interrupt to process
all the finished requests. Additionally, any remaining descriptors
are aborted when the hardware is removed or the resources freed.

Use an RCU pointer to synchronize PCI device unbind.

Signed-off-by: Logan Gunthorpe <logang@deltatee.com>
Link: https://lore.kernel.org/r/20200103212021.2881-3-logang@deltatee.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/plx_dma.c | 370 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 370 insertions(+)

(limited to 'drivers/dma')

diff --git a/drivers/dma/plx_dma.c b/drivers/dma/plx_dma.c
index e002cbb7d2b6..f3a5c9e47658 100644
--- a/drivers/dma/plx_dma.c
+++ b/drivers/dma/plx_dma.c
@@ -18,12 +18,358 @@ MODULE_VERSION("0.1");
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Logan Gunthorpe");
 
+#define PLX_REG_DESC_RING_ADDR			0x214
+#define PLX_REG_DESC_RING_ADDR_HI		0x218
+#define PLX_REG_DESC_RING_NEXT_ADDR		0x21C
+#define PLX_REG_DESC_RING_COUNT			0x220
+#define PLX_REG_DESC_RING_LAST_ADDR		0x224
+#define PLX_REG_DESC_RING_LAST_SIZE		0x228
+#define PLX_REG_PREF_LIMIT			0x234
+#define PLX_REG_CTRL				0x238
+#define PLX_REG_CTRL2				0x23A
+#define PLX_REG_INTR_CTRL			0x23C
+#define PLX_REG_INTR_STATUS			0x23E
+
+#define PLX_REG_PREF_LIMIT_PREF_FOUR		8
+
+#define PLX_REG_CTRL_GRACEFUL_PAUSE		BIT(0)
+#define PLX_REG_CTRL_ABORT			BIT(1)
+#define PLX_REG_CTRL_WRITE_BACK_EN		BIT(2)
+#define PLX_REG_CTRL_START			BIT(3)
+#define PLX_REG_CTRL_RING_STOP_MODE		BIT(4)
+#define PLX_REG_CTRL_DESC_MODE_BLOCK		(0 << 5)
+#define PLX_REG_CTRL_DESC_MODE_ON_CHIP		(1 << 5)
+#define PLX_REG_CTRL_DESC_MODE_OFF_CHIP		(2 << 5)
+#define PLX_REG_CTRL_DESC_INVALID		BIT(8)
+#define PLX_REG_CTRL_GRACEFUL_PAUSE_DONE	BIT(9)
+#define PLX_REG_CTRL_ABORT_DONE			BIT(10)
+#define PLX_REG_CTRL_IMM_PAUSE_DONE		BIT(12)
+#define PLX_REG_CTRL_IN_PROGRESS		BIT(30)
+
+#define PLX_REG_CTRL_RESET_VAL	(PLX_REG_CTRL_DESC_INVALID | \
+				 PLX_REG_CTRL_GRACEFUL_PAUSE_DONE | \
+				 PLX_REG_CTRL_ABORT_DONE | \
+				 PLX_REG_CTRL_IMM_PAUSE_DONE)
+
+#define PLX_REG_CTRL_START_VAL	(PLX_REG_CTRL_WRITE_BACK_EN | \
+				 PLX_REG_CTRL_DESC_MODE_OFF_CHIP | \
+				 PLX_REG_CTRL_START | \
+				 PLX_REG_CTRL_RESET_VAL)
+
+#define PLX_REG_CTRL2_MAX_TXFR_SIZE_64B		0
+#define PLX_REG_CTRL2_MAX_TXFR_SIZE_128B	1
+#define PLX_REG_CTRL2_MAX_TXFR_SIZE_256B	2
+#define PLX_REG_CTRL2_MAX_TXFR_SIZE_512B	3
+#define PLX_REG_CTRL2_MAX_TXFR_SIZE_1KB		4
+#define PLX_REG_CTRL2_MAX_TXFR_SIZE_2KB		5
+#define PLX_REG_CTRL2_MAX_TXFR_SIZE_4B		7
+
+#define PLX_REG_INTR_CRTL_ERROR_EN		BIT(0)
+#define PLX_REG_INTR_CRTL_INV_DESC_EN		BIT(1)
+#define PLX_REG_INTR_CRTL_ABORT_DONE_EN		BIT(3)
+#define PLX_REG_INTR_CRTL_PAUSE_DONE_EN		BIT(4)
+#define PLX_REG_INTR_CRTL_IMM_PAUSE_DONE_EN	BIT(5)
+
+#define PLX_REG_INTR_STATUS_ERROR		BIT(0)
+#define PLX_REG_INTR_STATUS_INV_DESC		BIT(1)
+#define PLX_REG_INTR_STATUS_DESC_DONE		BIT(2)
+#define PLX_REG_INTR_CRTL_ABORT_DONE		BIT(3)
+
+struct plx_dma_hw_std_desc {
+	__le32 flags_and_size;
+	__le16 dst_addr_hi;
+	__le16 src_addr_hi;
+	__le32 dst_addr_lo;
+	__le32 src_addr_lo;
+};
+
+#define PLX_DESC_SIZE_MASK		0x7ffffff
+#define PLX_DESC_FLAG_VALID		BIT(31)
+#define PLX_DESC_FLAG_INT_WHEN_DONE	BIT(30)
+
+#define PLX_DESC_WB_SUCCESS		BIT(30)
+#define PLX_DESC_WB_RD_FAIL		BIT(29)
+#define PLX_DESC_WB_WR_FAIL		BIT(28)
+
+#define PLX_DMA_RING_COUNT		2048
+
+struct plx_dma_desc {
+	struct dma_async_tx_descriptor txd;
+	struct plx_dma_hw_std_desc *hw;
+	u32 orig_size;
+};
+
 struct plx_dma_dev {
 	struct dma_device dma_dev;
 	struct dma_chan dma_chan;
+	struct pci_dev __rcu *pdev;
 	void __iomem *bar;
+	struct tasklet_struct desc_task;
+
+	spinlock_t ring_lock;
+	bool ring_active;
+	int head;
+	int tail;
+	struct plx_dma_hw_std_desc *hw_ring;
+	dma_addr_t hw_ring_dma;
+	struct plx_dma_desc **desc_ring;
 };
 
+static struct plx_dma_dev *chan_to_plx_dma_dev(struct dma_chan *c)
+{
+	return container_of(c, struct plx_dma_dev, dma_chan);
+}
+
+static struct plx_dma_desc *plx_dma_get_desc(struct plx_dma_dev *plxdev, int i)
+{
+	return plxdev->desc_ring[i & (PLX_DMA_RING_COUNT - 1)];
+}
+
+static void plx_dma_process_desc(struct plx_dma_dev *plxdev)
+{
+	struct dmaengine_result res;
+	struct plx_dma_desc *desc;
+	u32 flags;
+
+	spin_lock_bh(&plxdev->ring_lock);
+
+	while (plxdev->tail != plxdev->head) {
+		desc = plx_dma_get_desc(plxdev, plxdev->tail);
+
+		flags = le32_to_cpu(READ_ONCE(desc->hw->flags_and_size));
+
+		if (flags & PLX_DESC_FLAG_VALID)
+			break;
+
+		res.residue = desc->orig_size - (flags & PLX_DESC_SIZE_MASK);
+
+		if (flags & PLX_DESC_WB_SUCCESS)
+			res.result = DMA_TRANS_NOERROR;
+		else if (flags & PLX_DESC_WB_WR_FAIL)
+			res.result = DMA_TRANS_WRITE_FAILED;
+		else
+			res.result = DMA_TRANS_READ_FAILED;
+
+		dma_cookie_complete(&desc->txd);
+		dma_descriptor_unmap(&desc->txd);
+		dmaengine_desc_get_callback_invoke(&desc->txd, &res);
+		desc->txd.callback = NULL;
+		desc->txd.callback_result = NULL;
+
+		plxdev->tail++;
+	}
+
+	spin_unlock_bh(&plxdev->ring_lock);
+}
+
+static void plx_dma_abort_desc(struct plx_dma_dev *plxdev)
+{
+	struct dmaengine_result res;
+	struct plx_dma_desc *desc;
+
+	plx_dma_process_desc(plxdev);
+
+	spin_lock_bh(&plxdev->ring_lock);
+
+	while (plxdev->tail != plxdev->head) {
+		desc = plx_dma_get_desc(plxdev, plxdev->tail);
+
+		res.residue = desc->orig_size;
+		res.result = DMA_TRANS_ABORTED;
+
+		dma_cookie_complete(&desc->txd);
+		dma_descriptor_unmap(&desc->txd);
+		dmaengine_desc_get_callback_invoke(&desc->txd, &res);
+		desc->txd.callback = NULL;
+		desc->txd.callback_result = NULL;
+
+		plxdev->tail++;
+	}
+
+	spin_unlock_bh(&plxdev->ring_lock);
+}
+
+static void __plx_dma_stop(struct plx_dma_dev *plxdev)
+{
+	unsigned long timeout = jiffies + msecs_to_jiffies(1000);
+	u32 val;
+
+	val = readl(plxdev->bar + PLX_REG_CTRL);
+	if (!(val & ~PLX_REG_CTRL_GRACEFUL_PAUSE))
+		return;
+
+	writel(PLX_REG_CTRL_RESET_VAL | PLX_REG_CTRL_GRACEFUL_PAUSE,
+	       plxdev->bar + PLX_REG_CTRL);
+
+	while (!time_after(jiffies, timeout)) {
+		val = readl(plxdev->bar + PLX_REG_CTRL);
+		if (val & PLX_REG_CTRL_GRACEFUL_PAUSE_DONE)
+			break;
+
+		cpu_relax();
+	}
+
+	if (!(val & PLX_REG_CTRL_GRACEFUL_PAUSE_DONE))
+		dev_err(plxdev->dma_dev.dev,
+			"Timeout waiting for graceful pause!\n");
+
+	writel(PLX_REG_CTRL_RESET_VAL | PLX_REG_CTRL_GRACEFUL_PAUSE,
+	       plxdev->bar + PLX_REG_CTRL);
+
+	writel(0, plxdev->bar + PLX_REG_DESC_RING_COUNT);
+	writel(0, plxdev->bar + PLX_REG_DESC_RING_ADDR);
+	writel(0, plxdev->bar + PLX_REG_DESC_RING_ADDR_HI);
+	writel(0, plxdev->bar + PLX_REG_DESC_RING_NEXT_ADDR);
+}
+
+static void plx_dma_stop(struct plx_dma_dev *plxdev)
+{
+	rcu_read_lock();
+	if (!rcu_dereference(plxdev->pdev)) {
+		rcu_read_unlock();
+		return;
+	}
+
+	__plx_dma_stop(plxdev);
+
+	rcu_read_unlock();
+}
+
+static void plx_dma_desc_task(unsigned long data)
+{
+	struct plx_dma_dev *plxdev = (void *)data;
+
+	plx_dma_process_desc(plxdev);
+}
+
+static irqreturn_t plx_dma_isr(int irq, void *devid)
+{
+	struct plx_dma_dev *plxdev = devid;
+	u32 status;
+
+	status = readw(plxdev->bar + PLX_REG_INTR_STATUS);
+
+	if (!status)
+		return IRQ_NONE;
+
+	if (status & PLX_REG_INTR_STATUS_DESC_DONE && plxdev->ring_active)
+		tasklet_schedule(&plxdev->desc_task);
+
+	writew(status, plxdev->bar + PLX_REG_INTR_STATUS);
+
+	return IRQ_HANDLED;
+}
+
+static int plx_dma_alloc_desc(struct plx_dma_dev *plxdev)
+{
+	struct plx_dma_desc *desc;
+	int i;
+
+	plxdev->desc_ring = kcalloc(PLX_DMA_RING_COUNT,
+				    sizeof(*plxdev->desc_ring), GFP_KERNEL);
+	if (!plxdev->desc_ring)
+		return -ENOMEM;
+
+	for (i = 0; i < PLX_DMA_RING_COUNT; i++) {
+		desc = kzalloc(sizeof(*desc), GFP_KERNEL);
+		if (!desc)
+			goto free_and_exit;
+
+		dma_async_tx_descriptor_init(&desc->txd, &plxdev->dma_chan);
+		desc->hw = &plxdev->hw_ring[i];
+		plxdev->desc_ring[i] = desc;
+	}
+
+	return 0;
+
+free_and_exit:
+	for (i = 0; i < PLX_DMA_RING_COUNT; i++)
+		kfree(plxdev->desc_ring[i]);
+	kfree(plxdev->desc_ring);
+	return -ENOMEM;
+}
+
+static int plx_dma_alloc_chan_resources(struct dma_chan *chan)
+{
+	struct plx_dma_dev *plxdev = chan_to_plx_dma_dev(chan);
+	size_t ring_sz = PLX_DMA_RING_COUNT * sizeof(*plxdev->hw_ring);
+	int rc;
+
+	plxdev->head = plxdev->tail = 0;
+	plxdev->hw_ring = dma_alloc_coherent(plxdev->dma_dev.dev, ring_sz,
+					     &plxdev->hw_ring_dma, GFP_KERNEL);
+	if (!plxdev->hw_ring)
+		return -ENOMEM;
+
+	rc = plx_dma_alloc_desc(plxdev);
+	if (rc)
+		goto out_free_hw_ring;
+
+	rcu_read_lock();
+	if (!rcu_dereference(plxdev->pdev)) {
+		rcu_read_unlock();
+		rc = -ENODEV;
+		goto out_free_hw_ring;
+	}
+
+	writel(PLX_REG_CTRL_RESET_VAL, plxdev->bar + PLX_REG_CTRL);
+	writel(lower_32_bits(plxdev->hw_ring_dma),
+	       plxdev->bar + PLX_REG_DESC_RING_ADDR);
+	writel(upper_32_bits(plxdev->hw_ring_dma),
+	       plxdev->bar + PLX_REG_DESC_RING_ADDR_HI);
+	writel(lower_32_bits(plxdev->hw_ring_dma),
+	       plxdev->bar + PLX_REG_DESC_RING_NEXT_ADDR);
+	writel(PLX_DMA_RING_COUNT, plxdev->bar + PLX_REG_DESC_RING_COUNT);
+	writel(PLX_REG_PREF_LIMIT_PREF_FOUR, plxdev->bar + PLX_REG_PREF_LIMIT);
+
+	plxdev->ring_active = true;
+
+	rcu_read_unlock();
+
+	return PLX_DMA_RING_COUNT;
+
+out_free_hw_ring:
+	dma_free_coherent(plxdev->dma_dev.dev, ring_sz, plxdev->hw_ring,
+			  plxdev->hw_ring_dma);
+	return rc;
+}
+
+static void plx_dma_free_chan_resources(struct dma_chan *chan)
+{
+	struct plx_dma_dev *plxdev = chan_to_plx_dma_dev(chan);
+	size_t ring_sz = PLX_DMA_RING_COUNT * sizeof(*plxdev->hw_ring);
+	struct pci_dev *pdev;
+	int irq = -1;
+	int i;
+
+	spin_lock_bh(&plxdev->ring_lock);
+	plxdev->ring_active = false;
+	spin_unlock_bh(&plxdev->ring_lock);
+
+	plx_dma_stop(plxdev);
+
+	rcu_read_lock();
+	pdev = rcu_dereference(plxdev->pdev);
+	if (pdev)
+		irq = pci_irq_vector(pdev, 0);
+	rcu_read_unlock();
+
+	if (irq > 0)
+		synchronize_irq(irq);
+
+	tasklet_kill(&plxdev->desc_task);
+
+	plx_dma_abort_desc(plxdev);
+
+	for (i = 0; i < PLX_DMA_RING_COUNT; i++)
+		kfree(plxdev->desc_ring[i]);
+
+	kfree(plxdev->desc_ring);
+	dma_free_coherent(plxdev->dma_dev.dev, ring_sz, plxdev->hw_ring,
+			  plxdev->hw_ring_dma);
+
+}
+
 static void plx_dma_release(struct dma_device *dma_dev)
 {
 	struct plx_dma_dev *plxdev =
@@ -44,6 +390,18 @@ static int plx_dma_create(struct pci_dev *pdev)
 	if (!plxdev)
 		return -ENOMEM;
 
+	rc = request_irq(pci_irq_vector(pdev, 0), plx_dma_isr, 0,
+			 KBUILD_MODNAME, plxdev);
+	if (rc) {
+		kfree(plxdev);
+		return rc;
+	}
+
+	spin_lock_init(&plxdev->ring_lock);
+	tasklet_init(&plxdev->desc_task, plx_dma_desc_task,
+		     (unsigned long)plxdev);
+
+	RCU_INIT_POINTER(plxdev->pdev, pdev);
 	plxdev->bar = pcim_iomap_table(pdev)[0];
 
 	dma = &plxdev->dma_dev;
@@ -52,6 +410,8 @@ static int plx_dma_create(struct pci_dev *pdev)
 	dma->copy_align = DMAENGINE_ALIGN_1_BYTE;
 	dma->dev = get_device(&pdev->dev);
 
+	dma->device_alloc_chan_resources = plx_dma_alloc_chan_resources;
+	dma->device_free_chan_resources = plx_dma_free_chan_resources;
 	dma->device_release = plx_dma_release;
 
 	chan = &plxdev->dma_chan;
@@ -122,6 +482,16 @@ static void plx_dma_remove(struct pci_dev *pdev)
 
 	free_irq(pci_irq_vector(pdev, 0),  plxdev);
 
+	rcu_assign_pointer(plxdev->pdev, NULL);
+	synchronize_rcu();
+
+	spin_lock_bh(&plxdev->ring_lock);
+	plxdev->ring_active = false;
+	spin_unlock_bh(&plxdev->ring_lock);
+
+	__plx_dma_stop(plxdev);
+	plx_dma_abort_desc(plxdev);
+
 	plxdev->bar = NULL;
 	dma_async_device_unregister(&plxdev->dma_dev);
 
-- 
cgit 


From 4d3df16840a2bba5a345326426380e1381d976d2 Mon Sep 17 00:00:00 2001
From: Logan Gunthorpe <logang@deltatee.com>
Date: Fri, 3 Jan 2020 14:20:21 -0700
Subject: dmaengine: plx-dma: Implement descriptor submission

On prep, a spin lock is taken and the next entry in the circular buffer
is filled. On submit, the valid bit is set in the hardware descriptor
and the lock is released.

The DMA engine is started (if it's not already running) when the client
calls dma_async_issue_pending().

Signed-off-by: Logan Gunthorpe <logang@deltatee.com>
Link: https://lore.kernel.org/r/20200103212021.2881-4-logang@deltatee.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/plx_dma.c | 119 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 119 insertions(+)

(limited to 'drivers/dma')

diff --git a/drivers/dma/plx_dma.c b/drivers/dma/plx_dma.c
index f3a5c9e47658..db4c5fd453a9 100644
--- a/drivers/dma/plx_dma.c
+++ b/drivers/dma/plx_dma.c
@@ -7,6 +7,7 @@
 
 #include "dmaengine.h"
 
+#include <linux/circ_buf.h>
 #include <linux/dmaengine.h>
 #include <linux/kref.h>
 #include <linux/list.h>
@@ -120,6 +121,11 @@ static struct plx_dma_dev *chan_to_plx_dma_dev(struct dma_chan *c)
 	return container_of(c, struct plx_dma_dev, dma_chan);
 }
 
+static struct plx_dma_desc *to_plx_desc(struct dma_async_tx_descriptor *txd)
+{
+	return container_of(txd, struct plx_dma_desc, txd);
+}
+
 static struct plx_dma_desc *plx_dma_get_desc(struct plx_dma_dev *plxdev, int i)
 {
 	return plxdev->desc_ring[i & (PLX_DMA_RING_COUNT - 1)];
@@ -242,6 +248,113 @@ static void plx_dma_desc_task(unsigned long data)
 	plx_dma_process_desc(plxdev);
 }
 
+static struct dma_async_tx_descriptor *plx_dma_prep_memcpy(struct dma_chan *c,
+		dma_addr_t dma_dst, dma_addr_t dma_src, size_t len,
+		unsigned long flags)
+	__acquires(plxdev->ring_lock)
+{
+	struct plx_dma_dev *plxdev = chan_to_plx_dma_dev(c);
+	struct plx_dma_desc *plxdesc;
+
+	spin_lock_bh(&plxdev->ring_lock);
+	if (!plxdev->ring_active)
+		goto err_unlock;
+
+	if (!CIRC_SPACE(plxdev->head, plxdev->tail, PLX_DMA_RING_COUNT))
+		goto err_unlock;
+
+	if (len > PLX_DESC_SIZE_MASK)
+		goto err_unlock;
+
+	plxdesc = plx_dma_get_desc(plxdev, plxdev->head);
+	plxdev->head++;
+
+	plxdesc->hw->dst_addr_lo = cpu_to_le32(lower_32_bits(dma_dst));
+	plxdesc->hw->dst_addr_hi = cpu_to_le16(upper_32_bits(dma_dst));
+	plxdesc->hw->src_addr_lo = cpu_to_le32(lower_32_bits(dma_src));
+	plxdesc->hw->src_addr_hi = cpu_to_le16(upper_32_bits(dma_src));
+
+	plxdesc->orig_size = len;
+
+	if (flags & DMA_PREP_INTERRUPT)
+		len |= PLX_DESC_FLAG_INT_WHEN_DONE;
+
+	plxdesc->hw->flags_and_size = cpu_to_le32(len);
+	plxdesc->txd.flags = flags;
+
+	/* return with the lock held, it will be released in tx_submit */
+
+	return &plxdesc->txd;
+
+err_unlock:
+	/*
+	 * Keep sparse happy by restoring an even lock count on
+	 * this lock.
+	 */
+	__acquire(plxdev->ring_lock);
+
+	spin_unlock_bh(&plxdev->ring_lock);
+	return NULL;
+}
+
+static dma_cookie_t plx_dma_tx_submit(struct dma_async_tx_descriptor *desc)
+	__releases(plxdev->ring_lock)
+{
+	struct plx_dma_dev *plxdev = chan_to_plx_dma_dev(desc->chan);
+	struct plx_dma_desc *plxdesc = to_plx_desc(desc);
+	dma_cookie_t cookie;
+
+	cookie = dma_cookie_assign(desc);
+
+	/*
+	 * Ensure the descriptor updates are visible to the dma device
+	 * before setting the valid bit.
+	 */
+	wmb();
+
+	plxdesc->hw->flags_and_size |= cpu_to_le32(PLX_DESC_FLAG_VALID);
+
+	spin_unlock_bh(&plxdev->ring_lock);
+
+	return cookie;
+}
+
+static enum dma_status plx_dma_tx_status(struct dma_chan *chan,
+		dma_cookie_t cookie, struct dma_tx_state *txstate)
+{
+	struct plx_dma_dev *plxdev = chan_to_plx_dma_dev(chan);
+	enum dma_status ret;
+
+	ret = dma_cookie_status(chan, cookie, txstate);
+	if (ret == DMA_COMPLETE)
+		return ret;
+
+	plx_dma_process_desc(plxdev);
+
+	return dma_cookie_status(chan, cookie, txstate);
+}
+
+static void plx_dma_issue_pending(struct dma_chan *chan)
+{
+	struct plx_dma_dev *plxdev = chan_to_plx_dma_dev(chan);
+
+	rcu_read_lock();
+	if (!rcu_dereference(plxdev->pdev)) {
+		rcu_read_unlock();
+		return;
+	}
+
+	/*
+	 * Ensure the valid bits are visible before starting the
+	 * DMA engine.
+	 */
+	wmb();
+
+	writew(PLX_REG_CTRL_START_VAL, plxdev->bar + PLX_REG_CTRL);
+
+	rcu_read_unlock();
+}
+
 static irqreturn_t plx_dma_isr(int irq, void *devid)
 {
 	struct plx_dma_dev *plxdev = devid;
@@ -276,7 +389,9 @@ static int plx_dma_alloc_desc(struct plx_dma_dev *plxdev)
 			goto free_and_exit;
 
 		dma_async_tx_descriptor_init(&desc->txd, &plxdev->dma_chan);
+		desc->txd.tx_submit = plx_dma_tx_submit;
 		desc->hw = &plxdev->hw_ring[i];
+
 		plxdev->desc_ring[i] = desc;
 	}
 
@@ -407,11 +522,15 @@ static int plx_dma_create(struct pci_dev *pdev)
 	dma = &plxdev->dma_dev;
 	dma->chancnt = 1;
 	INIT_LIST_HEAD(&dma->channels);
+	dma_cap_set(DMA_MEMCPY, dma->cap_mask);
 	dma->copy_align = DMAENGINE_ALIGN_1_BYTE;
 	dma->dev = get_device(&pdev->dev);
 
 	dma->device_alloc_chan_resources = plx_dma_alloc_chan_resources;
 	dma->device_free_chan_resources = plx_dma_free_chan_resources;
+	dma->device_prep_dma_memcpy = plx_dma_prep_memcpy;
+	dma->device_issue_pending = plx_dma_issue_pending;
+	dma->device_tx_status = plx_dma_tx_status;
 	dma->device_release = plx_dma_release;
 
 	chan = &plxdev->dma_chan;
-- 
cgit 


From 4db8fd32ed2be7cc510e51e43ec3349aa64074a9 Mon Sep 17 00:00:00 2001
From: Peter Ujfalusi <peter.ujfalusi@ti.com>
Date: Mon, 23 Dec 2019 13:04:44 +0200
Subject: dmaengine: Add metadata_ops for dma_async_tx_descriptor

The metadata is best described as side band data or parameters traveling
alongside the data DMAd by the DMA engine. It is data
which is understood by the peripheral and the peripheral driver only, the
DMA engine see it only as data block and it is not interpreting it in any
way.

The metadata can be different per descriptor as it is a parameter for the
data being transferred.

If the DMA supports per descriptor metadata it can implement the attach,
get_ptr/set_len callbacks.

Client drivers must only use either attach or get_ptr/set_len to avoid
misconfiguration.

Client driver can check if a given metadata mode is supported by the
channel during probe time with
dmaengine_is_metadata_mode_supported(chan, DESC_METADATA_CLIENT);
dmaengine_is_metadata_mode_supported(chan, DESC_METADATA_ENGINE);

and based on this information can use either mode.

Wrappers are also added for the metadata_ops.

To be used in DESC_METADATA_CLIENT mode:
dmaengine_desc_attach_metadata()

To be used in DESC_METADATA_ENGINE mode:
dmaengine_desc_get_metadata_ptr()
dmaengine_desc_set_metadata_len()

Signed-off-by: Peter Ujfalusi <peter.ujfalusi@ti.com>
Reviewed-by: Tero Kristo <t-kristo@ti.com>
Tested-by: Keerthy <j-keerthy@ti.com>
Reviewed-by: Grygorii Strashko <grygorii.strashko@ti.com>
Link: https://lore.kernel.org/r/20191223110458.30766-5-peter.ujfalusi@ti.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/dmaengine.c | 73 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 73 insertions(+)

(limited to 'drivers/dma')

diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
index 4ac77456e830..158aeb1b6a8a 100644
--- a/drivers/dma/dmaengine.c
+++ b/drivers/dma/dmaengine.c
@@ -1348,6 +1348,79 @@ void dma_async_tx_descriptor_init(struct dma_async_tx_descriptor *tx,
 }
 EXPORT_SYMBOL(dma_async_tx_descriptor_init);
 
+static inline int desc_check_and_set_metadata_mode(
+	struct dma_async_tx_descriptor *desc, enum dma_desc_metadata_mode mode)
+{
+	/* Make sure that the metadata mode is not mixed */
+	if (!desc->desc_metadata_mode) {
+		if (dmaengine_is_metadata_mode_supported(desc->chan, mode))
+			desc->desc_metadata_mode = mode;
+		else
+			return -ENOTSUPP;
+	} else if (desc->desc_metadata_mode != mode) {
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+int dmaengine_desc_attach_metadata(struct dma_async_tx_descriptor *desc,
+				   void *data, size_t len)
+{
+	int ret;
+
+	if (!desc)
+		return -EINVAL;
+
+	ret = desc_check_and_set_metadata_mode(desc, DESC_METADATA_CLIENT);
+	if (ret)
+		return ret;
+
+	if (!desc->metadata_ops || !desc->metadata_ops->attach)
+		return -ENOTSUPP;
+
+	return desc->metadata_ops->attach(desc, data, len);
+}
+EXPORT_SYMBOL_GPL(dmaengine_desc_attach_metadata);
+
+void *dmaengine_desc_get_metadata_ptr(struct dma_async_tx_descriptor *desc,
+				      size_t *payload_len, size_t *max_len)
+{
+	int ret;
+
+	if (!desc)
+		return ERR_PTR(-EINVAL);
+
+	ret = desc_check_and_set_metadata_mode(desc, DESC_METADATA_ENGINE);
+	if (ret)
+		return ERR_PTR(ret);
+
+	if (!desc->metadata_ops || !desc->metadata_ops->get_ptr)
+		return ERR_PTR(-ENOTSUPP);
+
+	return desc->metadata_ops->get_ptr(desc, payload_len, max_len);
+}
+EXPORT_SYMBOL_GPL(dmaengine_desc_get_metadata_ptr);
+
+int dmaengine_desc_set_metadata_len(struct dma_async_tx_descriptor *desc,
+				    size_t payload_len)
+{
+	int ret;
+
+	if (!desc)
+		return -EINVAL;
+
+	ret = desc_check_and_set_metadata_mode(desc, DESC_METADATA_ENGINE);
+	if (ret)
+		return ret;
+
+	if (!desc->metadata_ops || !desc->metadata_ops->set_len)
+		return -ENOTSUPP;
+
+	return desc->metadata_ops->set_len(desc, payload_len);
+}
+EXPORT_SYMBOL_GPL(dmaengine_desc_set_metadata_len);
+
 /* dma_wait_for_async_tx - spin wait for a transaction to complete
  * @tx: in-flight transaction to wait on
  */
-- 
cgit 


From 6755ec06d1333765d2b935e4e4a5bd011332bac6 Mon Sep 17 00:00:00 2001
From: Peter Ujfalusi <peter.ujfalusi@ti.com>
Date: Mon, 23 Dec 2019 13:04:45 +0200
Subject: dmaengine: Add support for reporting DMA cached data amount

A DMA hardware can have big cache or FIFO and the amount of data sitting in
the DMA fabric can be an interest for the clients.

For example in audio we want to know the delay in the data flow and in case
the DMA have significantly large FIFO/cache, it can affect the latenc/delay

Signed-off-by: Peter Ujfalusi <peter.ujfalusi@ti.com>
Reviewed-by: Tero Kristo <t-kristo@ti.com>
Tested-by: Keerthy <j-keerthy@ti.com>
Reviewed-by: Grygorii Strashko <grygorii.strashko@ti.com>
Link: https://lore.kernel.org/r/20191223110458.30766-6-peter.ujfalusi@ti.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/dmaengine.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'drivers/dma')

diff --git a/drivers/dma/dmaengine.h b/drivers/dma/dmaengine.h
index 501c0b063f85..b0b97475707a 100644
--- a/drivers/dma/dmaengine.h
+++ b/drivers/dma/dmaengine.h
@@ -77,6 +77,7 @@ static inline enum dma_status dma_cookie_status(struct dma_chan *chan,
 		state->last = complete;
 		state->used = used;
 		state->residue = 0;
+		state->in_flight_bytes = 0;
 	}
 	return dma_async_is_complete(cookie, complete, used);
 }
@@ -87,6 +88,13 @@ static inline void dma_set_residue(struct dma_tx_state *state, u32 residue)
 		state->residue = residue;
 }
 
+static inline void dma_set_in_flight_bytes(struct dma_tx_state *state,
+					   u32 in_flight_bytes)
+{
+	if (state)
+		state->in_flight_bytes = in_flight_bytes;
+}
+
 struct dmaengine_desc_callback {
 	dma_async_tx_callback callback;
 	dma_async_tx_callback_result callback_result;
-- 
cgit 


From 8c6bb62f6b4a24c446511e8f894251946dcc2ef1 Mon Sep 17 00:00:00 2001
From: Peter Ujfalusi <peter.ujfalusi@ti.com>
Date: Mon, 23 Dec 2019 13:04:48 +0200
Subject: dmaengine: ti: k3 PSI-L remote endpoint configuration

In K3 architecture the DMA operates within threads. One end of the thread
is UDMAP, the other is on the peripheral side.

The UDMAP channel configuration depends on the needs of the remote
endpoint and it can be differ from peripheral to peripheral.

This patch adds database for am654 and j721e and small API to fetch the
PSI-L endpoint configuration from the database which should only used by
the DMA driver(s).

Another API is added for native peripherals to give possibility to pass new
configuration for the threads they are using, which is needed to be able to
handle changes caused by different firmware loaded for the peripheral for
example.

Signed-off-by: Peter Ujfalusi <peter.ujfalusi@ti.com>
Tested-by: Keerthy <j-keerthy@ti.com>
Reviewed-by: Grygorii Strashko <grygorii.strashko@ti.com>
Link: https://lore.kernel.org/r/20191223110458.30766-9-peter.ujfalusi@ti.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/ti/Kconfig         |   3 +
 drivers/dma/ti/Makefile        |   1 +
 drivers/dma/ti/k3-psil-am654.c | 175 ++++++++++++++++++++++++++++++++
 drivers/dma/ti/k3-psil-j721e.c | 222 +++++++++++++++++++++++++++++++++++++++++
 drivers/dma/ti/k3-psil-priv.h  |  39 ++++++++
 drivers/dma/ti/k3-psil.c       |  93 +++++++++++++++++
 6 files changed, 533 insertions(+)
 create mode 100644 drivers/dma/ti/k3-psil-am654.c
 create mode 100644 drivers/dma/ti/k3-psil-j721e.c
 create mode 100644 drivers/dma/ti/k3-psil-priv.h
 create mode 100644 drivers/dma/ti/k3-psil.c

(limited to 'drivers/dma')

diff --git a/drivers/dma/ti/Kconfig b/drivers/dma/ti/Kconfig
index d507c24fbf31..72f3d2728178 100644
--- a/drivers/dma/ti/Kconfig
+++ b/drivers/dma/ti/Kconfig
@@ -34,5 +34,8 @@ config DMA_OMAP
 	  Enable support for the TI sDMA (System DMA or DMA4) controller. This
 	  DMA engine is found on OMAP and DRA7xx parts.
 
+config TI_K3_PSIL
+	bool
+
 config TI_DMA_CROSSBAR
 	bool
diff --git a/drivers/dma/ti/Makefile b/drivers/dma/ti/Makefile
index 113e59ec9c32..f8d912ad7eaf 100644
--- a/drivers/dma/ti/Makefile
+++ b/drivers/dma/ti/Makefile
@@ -2,4 +2,5 @@
 obj-$(CONFIG_TI_CPPI41) += cppi41.o
 obj-$(CONFIG_TI_EDMA) += edma.o
 obj-$(CONFIG_DMA_OMAP) += omap-dma.o
+obj-$(CONFIG_TI_K3_PSIL) += k3-psil.o k3-psil-am654.o k3-psil-j721e.o
 obj-$(CONFIG_TI_DMA_CROSSBAR) += dma-crossbar.o
diff --git a/drivers/dma/ti/k3-psil-am654.c b/drivers/dma/ti/k3-psil-am654.c
new file mode 100644
index 000000000000..7da9242b6114
--- /dev/null
+++ b/drivers/dma/ti/k3-psil-am654.c
@@ -0,0 +1,175 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *  Copyright (C) 2019 Texas Instruments Incorporated - http://www.ti.com
+ *  Author: Peter Ujfalusi <peter.ujfalusi@ti.com>
+ */
+
+#include <linux/kernel.h>
+
+#include "k3-psil-priv.h"
+
+#define PSIL_PDMA_XY_TR(x)				\
+	{						\
+		.thread_id = x,				\
+		.ep_config = {				\
+			.ep_type = PSIL_EP_PDMA_XY,	\
+		},					\
+	}
+
+#define PSIL_PDMA_XY_PKT(x)				\
+	{						\
+		.thread_id = x,				\
+		.ep_config = {				\
+			.ep_type = PSIL_EP_PDMA_XY,	\
+			.pkt_mode = 1,			\
+		},					\
+	}
+
+#define PSIL_ETHERNET(x)				\
+	{						\
+		.thread_id = x,				\
+		.ep_config = {				\
+			.ep_type = PSIL_EP_NATIVE,	\
+			.pkt_mode = 1,			\
+			.needs_epib = 1,		\
+			.psd_size = 16,			\
+		},					\
+	}
+
+#define PSIL_SA2UL(x, tx)				\
+	{						\
+		.thread_id = x,				\
+		.ep_config = {				\
+			.ep_type = PSIL_EP_NATIVE,	\
+			.pkt_mode = 1,			\
+			.needs_epib = 1,		\
+			.psd_size = 64,			\
+			.notdpkt = tx,			\
+		},					\
+	}
+
+/* PSI-L source thread IDs, used for RX (DMA_DEV_TO_MEM) */
+struct psil_ep am654_src_ep_map[] = {
+	/* SA2UL */
+	PSIL_SA2UL(0x4000, 0),
+	PSIL_SA2UL(0x4001, 0),
+	PSIL_SA2UL(0x4002, 0),
+	PSIL_SA2UL(0x4003, 0),
+	/* PRU_ICSSG0 */
+	PSIL_ETHERNET(0x4100),
+	PSIL_ETHERNET(0x4101),
+	PSIL_ETHERNET(0x4102),
+	PSIL_ETHERNET(0x4103),
+	/* PRU_ICSSG1 */
+	PSIL_ETHERNET(0x4200),
+	PSIL_ETHERNET(0x4201),
+	PSIL_ETHERNET(0x4202),
+	PSIL_ETHERNET(0x4203),
+	/* PRU_ICSSG2 */
+	PSIL_ETHERNET(0x4300),
+	PSIL_ETHERNET(0x4301),
+	PSIL_ETHERNET(0x4302),
+	PSIL_ETHERNET(0x4303),
+	/* PDMA0 - McASPs */
+	PSIL_PDMA_XY_TR(0x4400),
+	PSIL_PDMA_XY_TR(0x4401),
+	PSIL_PDMA_XY_TR(0x4402),
+	/* PDMA1 - SPI0-4 */
+	PSIL_PDMA_XY_PKT(0x4500),
+	PSIL_PDMA_XY_PKT(0x4501),
+	PSIL_PDMA_XY_PKT(0x4502),
+	PSIL_PDMA_XY_PKT(0x4503),
+	PSIL_PDMA_XY_PKT(0x4504),
+	PSIL_PDMA_XY_PKT(0x4505),
+	PSIL_PDMA_XY_PKT(0x4506),
+	PSIL_PDMA_XY_PKT(0x4507),
+	PSIL_PDMA_XY_PKT(0x4508),
+	PSIL_PDMA_XY_PKT(0x4509),
+	PSIL_PDMA_XY_PKT(0x450a),
+	PSIL_PDMA_XY_PKT(0x450b),
+	PSIL_PDMA_XY_PKT(0x450c),
+	PSIL_PDMA_XY_PKT(0x450d),
+	PSIL_PDMA_XY_PKT(0x450e),
+	PSIL_PDMA_XY_PKT(0x450f),
+	PSIL_PDMA_XY_PKT(0x4510),
+	PSIL_PDMA_XY_PKT(0x4511),
+	PSIL_PDMA_XY_PKT(0x4512),
+	PSIL_PDMA_XY_PKT(0x4513),
+	/* PDMA1 - USART0-2 */
+	PSIL_PDMA_XY_PKT(0x4514),
+	PSIL_PDMA_XY_PKT(0x4515),
+	PSIL_PDMA_XY_PKT(0x4516),
+	/* CPSW0 */
+	PSIL_ETHERNET(0x7000),
+	/* MCU_PDMA0 - ADCs */
+	PSIL_PDMA_XY_TR(0x7100),
+	PSIL_PDMA_XY_TR(0x7101),
+	PSIL_PDMA_XY_TR(0x7102),
+	PSIL_PDMA_XY_TR(0x7103),
+	/* MCU_PDMA1 - MCU_SPI0-2 */
+	PSIL_PDMA_XY_PKT(0x7200),
+	PSIL_PDMA_XY_PKT(0x7201),
+	PSIL_PDMA_XY_PKT(0x7202),
+	PSIL_PDMA_XY_PKT(0x7203),
+	PSIL_PDMA_XY_PKT(0x7204),
+	PSIL_PDMA_XY_PKT(0x7205),
+	PSIL_PDMA_XY_PKT(0x7206),
+	PSIL_PDMA_XY_PKT(0x7207),
+	PSIL_PDMA_XY_PKT(0x7208),
+	PSIL_PDMA_XY_PKT(0x7209),
+	PSIL_PDMA_XY_PKT(0x720a),
+	PSIL_PDMA_XY_PKT(0x720b),
+	/* MCU_PDMA1 - MCU_USART0 */
+	PSIL_PDMA_XY_PKT(0x7212),
+};
+
+/* PSI-L destination thread IDs, used for TX (DMA_MEM_TO_DEV) */
+struct psil_ep am654_dst_ep_map[] = {
+	/* SA2UL */
+	PSIL_SA2UL(0xc000, 1),
+	PSIL_SA2UL(0xc001, 1),
+	/* PRU_ICSSG0 */
+	PSIL_ETHERNET(0xc100),
+	PSIL_ETHERNET(0xc101),
+	PSIL_ETHERNET(0xc102),
+	PSIL_ETHERNET(0xc103),
+	PSIL_ETHERNET(0xc104),
+	PSIL_ETHERNET(0xc105),
+	PSIL_ETHERNET(0xc106),
+	PSIL_ETHERNET(0xc107),
+	/* PRU_ICSSG1 */
+	PSIL_ETHERNET(0xc200),
+	PSIL_ETHERNET(0xc201),
+	PSIL_ETHERNET(0xc202),
+	PSIL_ETHERNET(0xc203),
+	PSIL_ETHERNET(0xc204),
+	PSIL_ETHERNET(0xc205),
+	PSIL_ETHERNET(0xc206),
+	PSIL_ETHERNET(0xc207),
+	/* PRU_ICSSG2 */
+	PSIL_ETHERNET(0xc300),
+	PSIL_ETHERNET(0xc301),
+	PSIL_ETHERNET(0xc302),
+	PSIL_ETHERNET(0xc303),
+	PSIL_ETHERNET(0xc304),
+	PSIL_ETHERNET(0xc305),
+	PSIL_ETHERNET(0xc306),
+	PSIL_ETHERNET(0xc307),
+	/* CPSW0 */
+	PSIL_ETHERNET(0xf000),
+	PSIL_ETHERNET(0xf001),
+	PSIL_ETHERNET(0xf002),
+	PSIL_ETHERNET(0xf003),
+	PSIL_ETHERNET(0xf004),
+	PSIL_ETHERNET(0xf005),
+	PSIL_ETHERNET(0xf006),
+	PSIL_ETHERNET(0xf007),
+};
+
+struct psil_ep_map am654_ep_map = {
+	.name = "am654",
+	.src = am654_src_ep_map,
+	.src_count = ARRAY_SIZE(am654_src_ep_map),
+	.dst = am654_dst_ep_map,
+	.dst_count = ARRAY_SIZE(am654_dst_ep_map),
+};
diff --git a/drivers/dma/ti/k3-psil-j721e.c b/drivers/dma/ti/k3-psil-j721e.c
new file mode 100644
index 000000000000..a609d496fddd
--- /dev/null
+++ b/drivers/dma/ti/k3-psil-j721e.c
@@ -0,0 +1,222 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *  Copyright (C) 2019 Texas Instruments Incorporated - http://www.ti.com
+ *  Author: Peter Ujfalusi <peter.ujfalusi@ti.com>
+ */
+
+#include <linux/kernel.h>
+
+#include "k3-psil-priv.h"
+
+#define PSIL_PDMA_XY_TR(x)				\
+	{						\
+		.thread_id = x,				\
+		.ep_config = {				\
+			.ep_type = PSIL_EP_PDMA_XY,	\
+		},					\
+	}
+
+#define PSIL_PDMA_XY_PKT(x)				\
+	{						\
+		.thread_id = x,				\
+		.ep_config = {				\
+			.ep_type = PSIL_EP_PDMA_XY,	\
+			.pkt_mode = 1,			\
+		},					\
+	}
+
+#define PSIL_PDMA_MCASP(x)				\
+	{						\
+		.thread_id = x,				\
+		.ep_config = {				\
+			.ep_type = PSIL_EP_PDMA_XY,	\
+			.pdma_acc32 = 1,		\
+			.pdma_burst = 1,		\
+		},					\
+	}
+
+#define PSIL_ETHERNET(x)				\
+	{						\
+		.thread_id = x,				\
+		.ep_config = {				\
+			.ep_type = PSIL_EP_NATIVE,	\
+			.pkt_mode = 1,			\
+			.needs_epib = 1,		\
+			.psd_size = 16,			\
+		},					\
+	}
+
+#define PSIL_SA2UL(x, tx)				\
+	{						\
+		.thread_id = x,				\
+		.ep_config = {				\
+			.ep_type = PSIL_EP_NATIVE,	\
+			.pkt_mode = 1,			\
+			.needs_epib = 1,		\
+			.psd_size = 64,			\
+			.notdpkt = tx,			\
+		},					\
+	}
+
+/* PSI-L source thread IDs, used for RX (DMA_DEV_TO_MEM) */
+struct psil_ep j721e_src_ep_map[] = {
+	/* SA2UL */
+	PSIL_SA2UL(0x4000, 0),
+	PSIL_SA2UL(0x4001, 0),
+	PSIL_SA2UL(0x4002, 0),
+	PSIL_SA2UL(0x4003, 0),
+	/* PRU_ICSSG0 */
+	PSIL_ETHERNET(0x4100),
+	PSIL_ETHERNET(0x4101),
+	PSIL_ETHERNET(0x4102),
+	PSIL_ETHERNET(0x4103),
+	/* PRU_ICSSG1 */
+	PSIL_ETHERNET(0x4200),
+	PSIL_ETHERNET(0x4201),
+	PSIL_ETHERNET(0x4202),
+	PSIL_ETHERNET(0x4203),
+	/* PDMA6 (PSIL_PDMA_MCASP_G0) - McASP0-2 */
+	PSIL_PDMA_MCASP(0x4400),
+	PSIL_PDMA_MCASP(0x4401),
+	PSIL_PDMA_MCASP(0x4402),
+	/* PDMA7 (PSIL_PDMA_MCASP_G1) - McASP3-11 */
+	PSIL_PDMA_MCASP(0x4500),
+	PSIL_PDMA_MCASP(0x4501),
+	PSIL_PDMA_MCASP(0x4502),
+	PSIL_PDMA_MCASP(0x4503),
+	PSIL_PDMA_MCASP(0x4504),
+	PSIL_PDMA_MCASP(0x4505),
+	PSIL_PDMA_MCASP(0x4506),
+	PSIL_PDMA_MCASP(0x4507),
+	PSIL_PDMA_MCASP(0x4508),
+	/* PDMA8 (PDMA_MISC_G0) - SPI0-1 */
+	PSIL_PDMA_XY_PKT(0x4600),
+	PSIL_PDMA_XY_PKT(0x4601),
+	PSIL_PDMA_XY_PKT(0x4602),
+	PSIL_PDMA_XY_PKT(0x4603),
+	PSIL_PDMA_XY_PKT(0x4604),
+	PSIL_PDMA_XY_PKT(0x4605),
+	PSIL_PDMA_XY_PKT(0x4606),
+	PSIL_PDMA_XY_PKT(0x4607),
+	/* PDMA9 (PDMA_MISC_G1) - SPI2-3 */
+	PSIL_PDMA_XY_PKT(0x460c),
+	PSIL_PDMA_XY_PKT(0x460d),
+	PSIL_PDMA_XY_PKT(0x460e),
+	PSIL_PDMA_XY_PKT(0x460f),
+	PSIL_PDMA_XY_PKT(0x4610),
+	PSIL_PDMA_XY_PKT(0x4611),
+	PSIL_PDMA_XY_PKT(0x4612),
+	PSIL_PDMA_XY_PKT(0x4613),
+	/* PDMA10 (PDMA_MISC_G2) - SPI4-5 */
+	PSIL_PDMA_XY_PKT(0x4618),
+	PSIL_PDMA_XY_PKT(0x4619),
+	PSIL_PDMA_XY_PKT(0x461a),
+	PSIL_PDMA_XY_PKT(0x461b),
+	PSIL_PDMA_XY_PKT(0x461c),
+	PSIL_PDMA_XY_PKT(0x461d),
+	PSIL_PDMA_XY_PKT(0x461e),
+	PSIL_PDMA_XY_PKT(0x461f),
+	/* PDMA11 (PDMA_MISC_G3) */
+	PSIL_PDMA_XY_PKT(0x4624),
+	PSIL_PDMA_XY_PKT(0x4625),
+	PSIL_PDMA_XY_PKT(0x4626),
+	PSIL_PDMA_XY_PKT(0x4627),
+	PSIL_PDMA_XY_PKT(0x4628),
+	PSIL_PDMA_XY_PKT(0x4629),
+	PSIL_PDMA_XY_PKT(0x4630),
+	PSIL_PDMA_XY_PKT(0x463a),
+	/* PDMA13 (PDMA_USART_G0) - UART0-1 */
+	PSIL_PDMA_XY_PKT(0x4700),
+	PSIL_PDMA_XY_PKT(0x4701),
+	/* PDMA14 (PDMA_USART_G1) - UART2-3 */
+	PSIL_PDMA_XY_PKT(0x4702),
+	PSIL_PDMA_XY_PKT(0x4703),
+	/* PDMA15 (PDMA_USART_G2) - UART4-9 */
+	PSIL_PDMA_XY_PKT(0x4704),
+	PSIL_PDMA_XY_PKT(0x4705),
+	PSIL_PDMA_XY_PKT(0x4706),
+	PSIL_PDMA_XY_PKT(0x4707),
+	PSIL_PDMA_XY_PKT(0x4708),
+	PSIL_PDMA_XY_PKT(0x4709),
+	/* CPSW9 */
+	PSIL_ETHERNET(0x4a00),
+	/* CPSW0 */
+	PSIL_ETHERNET(0x7000),
+	/* MCU_PDMA0 (MCU_PDMA_MISC_G0) - SPI0 */
+	PSIL_PDMA_XY_PKT(0x7100),
+	PSIL_PDMA_XY_PKT(0x7101),
+	PSIL_PDMA_XY_PKT(0x7102),
+	PSIL_PDMA_XY_PKT(0x7103),
+	/* MCU_PDMA1 (MCU_PDMA_MISC_G1) - SPI1-2 */
+	PSIL_PDMA_XY_PKT(0x7200),
+	PSIL_PDMA_XY_PKT(0x7201),
+	PSIL_PDMA_XY_PKT(0x7202),
+	PSIL_PDMA_XY_PKT(0x7203),
+	PSIL_PDMA_XY_PKT(0x7204),
+	PSIL_PDMA_XY_PKT(0x7205),
+	PSIL_PDMA_XY_PKT(0x7206),
+	PSIL_PDMA_XY_PKT(0x7207),
+	/* MCU_PDMA2 (MCU_PDMA_MISC_G2) - UART0 */
+	PSIL_PDMA_XY_PKT(0x7300),
+	/* MCU_PDMA_ADC - ADC0-1 */
+	PSIL_PDMA_XY_TR(0x7400),
+	PSIL_PDMA_XY_TR(0x7401),
+	PSIL_PDMA_XY_TR(0x7402),
+	PSIL_PDMA_XY_TR(0x7403),
+	/* SA2UL */
+	PSIL_SA2UL(0x7500, 0),
+	PSIL_SA2UL(0x7501, 0),
+};
+
+/* PSI-L destination thread IDs, used for TX (DMA_MEM_TO_DEV) */
+struct psil_ep j721e_dst_ep_map[] = {
+	/* SA2UL */
+	PSIL_SA2UL(0xc000, 1),
+	PSIL_SA2UL(0xc001, 1),
+	/* PRU_ICSSG0 */
+	PSIL_ETHERNET(0xc100),
+	PSIL_ETHERNET(0xc101),
+	PSIL_ETHERNET(0xc102),
+	PSIL_ETHERNET(0xc103),
+	PSIL_ETHERNET(0xc104),
+	PSIL_ETHERNET(0xc105),
+	PSIL_ETHERNET(0xc106),
+	PSIL_ETHERNET(0xc107),
+	/* PRU_ICSSG1 */
+	PSIL_ETHERNET(0xc200),
+	PSIL_ETHERNET(0xc201),
+	PSIL_ETHERNET(0xc202),
+	PSIL_ETHERNET(0xc203),
+	PSIL_ETHERNET(0xc204),
+	PSIL_ETHERNET(0xc205),
+	PSIL_ETHERNET(0xc206),
+	PSIL_ETHERNET(0xc207),
+	/* CPSW9 */
+	PSIL_ETHERNET(0xca00),
+	PSIL_ETHERNET(0xca01),
+	PSIL_ETHERNET(0xca02),
+	PSIL_ETHERNET(0xca03),
+	PSIL_ETHERNET(0xca04),
+	PSIL_ETHERNET(0xca05),
+	PSIL_ETHERNET(0xca06),
+	PSIL_ETHERNET(0xca07),
+	/* CPSW0 */
+	PSIL_ETHERNET(0xf000),
+	PSIL_ETHERNET(0xf001),
+	PSIL_ETHERNET(0xf002),
+	PSIL_ETHERNET(0xf003),
+	PSIL_ETHERNET(0xf004),
+	PSIL_ETHERNET(0xf005),
+	PSIL_ETHERNET(0xf006),
+	PSIL_ETHERNET(0xf007),
+	/* SA2UL */
+	PSIL_SA2UL(0xf500, 1),
+};
+
+struct psil_ep_map j721e_ep_map = {
+	.name = "j721e",
+	.src = j721e_src_ep_map,
+	.src_count = ARRAY_SIZE(j721e_src_ep_map),
+	.dst = j721e_dst_ep_map,
+	.dst_count = ARRAY_SIZE(j721e_dst_ep_map),
+};
diff --git a/drivers/dma/ti/k3-psil-priv.h b/drivers/dma/ti/k3-psil-priv.h
new file mode 100644
index 000000000000..f74420653d8a
--- /dev/null
+++ b/drivers/dma/ti/k3-psil-priv.h
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *  Copyright (C) 2019 Texas Instruments Incorporated - http://www.ti.com
+ */
+
+#ifndef K3_PSIL_PRIV_H_
+#define K3_PSIL_PRIV_H_
+
+#include <linux/dma/k3-psil.h>
+
+struct psil_ep {
+	u32 thread_id;
+	struct psil_endpoint_config ep_config;
+};
+
+/**
+ * struct psil_ep_map - PSI-L thread ID configuration maps
+ * @name:	Name of the map, set it to the name of the SoC
+ * @src:	Array of source PSI-L thread configurations
+ * @src_count:	Number of entries in the src array
+ * @dst:	Array of destination PSI-L thread configurations
+ * @dst_count:	Number of entries in the dst array
+ *
+ * In case of symmetric configuration for a matching src/dst thread (for example
+ * 0x4400 and 0xc400) only the src configuration can be present. If no dst
+ * configuration found the code will look for (dst_thread_id & ~0x8000) to find
+ * the symmetric match.
+ */
+struct psil_ep_map {
+	char *name;
+	struct psil_ep	*src;
+	int src_count;
+	struct psil_ep	*dst;
+	int dst_count;
+};
+
+struct psil_endpoint_config *psil_get_ep_config(u32 thread_id);
+
+#endif /* K3_PSIL_PRIV_H_ */
diff --git a/drivers/dma/ti/k3-psil.c b/drivers/dma/ti/k3-psil.c
new file mode 100644
index 000000000000..9314cf9a52e4
--- /dev/null
+++ b/drivers/dma/ti/k3-psil.c
@@ -0,0 +1,93 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *  Copyright (C) 2019 Texas Instruments Incorporated - http://www.ti.com
+ *  Author: Peter Ujfalusi <peter.ujfalusi@ti.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/device.h>
+#include <linux/init.h>
+#include <linux/mutex.h>
+#include <linux/of.h>
+
+#include "k3-psil-priv.h"
+
+extern struct psil_ep_map am654_ep_map;
+extern struct psil_ep_map j721e_ep_map;
+
+static DEFINE_MUTEX(ep_map_mutex);
+static struct psil_ep_map *soc_ep_map;
+
+struct psil_endpoint_config *psil_get_ep_config(u32 thread_id)
+{
+	int i;
+
+	mutex_lock(&ep_map_mutex);
+	if (!soc_ep_map) {
+		if (of_machine_is_compatible("ti,am654")) {
+			soc_ep_map = &am654_ep_map;
+		} else if (of_machine_is_compatible("ti,j721e")) {
+			soc_ep_map = &j721e_ep_map;
+		} else {
+			pr_err("PSIL: No compatible machine found for map\n");
+			return ERR_PTR(-ENOTSUPP);
+		}
+		pr_debug("%s: Using map for %s\n", __func__, soc_ep_map->name);
+	}
+	mutex_unlock(&ep_map_mutex);
+
+	if (thread_id & K3_PSIL_DST_THREAD_ID_OFFSET && soc_ep_map->dst) {
+		/* check in destination thread map */
+		for (i = 0; i < soc_ep_map->dst_count; i++) {
+			if (soc_ep_map->dst[i].thread_id == thread_id)
+				return &soc_ep_map->dst[i].ep_config;
+		}
+	}
+
+	thread_id &= ~K3_PSIL_DST_THREAD_ID_OFFSET;
+	if (soc_ep_map->src) {
+		for (i = 0; i < soc_ep_map->src_count; i++) {
+			if (soc_ep_map->src[i].thread_id == thread_id)
+				return &soc_ep_map->src[i].ep_config;
+		}
+	}
+
+	return ERR_PTR(-ENOENT);
+}
+EXPORT_SYMBOL_GPL(psil_get_ep_config);
+
+int psil_set_new_ep_config(struct device *dev, const char *name,
+			   struct psil_endpoint_config *ep_config)
+{
+	struct psil_endpoint_config *dst_ep_config;
+	struct of_phandle_args dma_spec;
+	u32 thread_id;
+	int index;
+
+	if (!dev || !dev->of_node)
+		return -EINVAL;
+
+	index = of_property_match_string(dev->of_node, "dma-names", name);
+	if (index < 0)
+		return index;
+
+	if (of_parse_phandle_with_args(dev->of_node, "dmas", "#dma-cells",
+				       index, &dma_spec))
+		return -ENOENT;
+
+	thread_id = dma_spec.args[0];
+
+	dst_ep_config = psil_get_ep_config(thread_id);
+	if (IS_ERR(dst_ep_config)) {
+		pr_err("PSIL: thread ID 0x%04x not defined in map\n",
+		       thread_id);
+		of_node_put(dma_spec.np);
+		return PTR_ERR(dst_ep_config);
+	}
+
+	memcpy(dst_ep_config, ep_config, sizeof(*dst_ep_config));
+
+	of_node_put(dma_spec.np);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(psil_set_new_ep_config);
-- 
cgit 


From 25dcb5dd7b7ce5587c1df18f584ff78f51a68a94 Mon Sep 17 00:00:00 2001
From: Peter Ujfalusi <peter.ujfalusi@ti.com>
Date: Mon, 23 Dec 2019 13:04:50 +0200
Subject: dmaengine: ti: New driver for K3 UDMA
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Split patch for review containing: defines, structs, io and low level
functions and interrupt callbacks.

DMA driver for
Texas Instruments K3 NAVSS Unified DMA – Peripheral Root Complex (UDMA-P)

The UDMA-P is intended to perform similar (but significantly upgraded) functions
as the packet-oriented DMA used on previous SoC devices. The UDMA-P module
supports the transmission and reception of various packet types. The UDMA-P is
architected to facilitate the segmentation and reassembly of SoC DMA data
structure compliant packets to/from smaller data blocks that are natively
compatible with the specific requirements of each connected peripheral. Multiple
Tx and Rx channels are provided within the DMA which allow multiple segmentation
or reassembly operations to be ongoing. The DMA controller maintains state
information for each of the channels which allows packet segmentation and
reassembly operations to be time division multiplexed between channels in order
to share the underlying DMA hardware. An external DMA scheduler is used to
control the ordering and rate at which this multiplexing occurs for Transmit
operations. The ordering and rate of Receive operations is indirectly controlled
by the order in which blocks are pushed into the DMA on the Rx PSI-L interface.

The UDMA-P also supports acting as both a UTC and UDMA-C for its internal
channels. Channels in the UDMA-P can be configured to be either Packet-Based or
Third-Party channels on a channel by channel basis.

The initial driver supports:
- MEM_TO_MEM (TR mode)
- DEV_TO_MEM (Packet / TR mode)
- MEM_TO_DEV (Packet / TR mode)
- Cyclic (Packet / TR mode)
- Metadata for descriptors

Signed-off-by: Peter Ujfalusi <peter.ujfalusi@ti.com>
Tested-by: Keerthy <j-keerthy@ti.com>
Reviewed-by: Grygorii Strashko <grygorii.strashko@ti.com>
Link: https://lore.kernel.org/r/20191223110458.30766-11-peter.ujfalusi@ti.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/ti/Kconfig   |   13 +
 drivers/dma/ti/Makefile  |    1 +
 drivers/dma/ti/k3-udma.c | 3371 ++++++++++++++++++++++++++++++++++++++++++++++
 drivers/dma/ti/k3-udma.h |  120 ++
 4 files changed, 3505 insertions(+)
 create mode 100644 drivers/dma/ti/k3-udma.c
 create mode 100644 drivers/dma/ti/k3-udma.h

(limited to 'drivers/dma')

diff --git a/drivers/dma/ti/Kconfig b/drivers/dma/ti/Kconfig
index 72f3d2728178..cca6708bd77c 100644
--- a/drivers/dma/ti/Kconfig
+++ b/drivers/dma/ti/Kconfig
@@ -34,6 +34,19 @@ config DMA_OMAP
 	  Enable support for the TI sDMA (System DMA or DMA4) controller. This
 	  DMA engine is found on OMAP and DRA7xx parts.
 
+config TI_K3_UDMA
+	bool "Texas Instruments UDMA support"
+	depends on ARCH_K3 || COMPILE_TEST
+	depends on TI_SCI_PROTOCOL
+	depends on TI_SCI_INTA_IRQCHIP
+	select DMA_ENGINE
+	select DMA_VIRTUAL_CHANNELS
+	select TI_K3_RINGACC
+	select TI_K3_PSIL
+        help
+	  Enable support for the TI UDMA (Unified DMA) controller. This
+	  DMA engine is used in AM65x and j721e.
+
 config TI_K3_PSIL
 	bool
 
diff --git a/drivers/dma/ti/Makefile b/drivers/dma/ti/Makefile
index f8d912ad7eaf..9d787f009195 100644
--- a/drivers/dma/ti/Makefile
+++ b/drivers/dma/ti/Makefile
@@ -2,5 +2,6 @@
 obj-$(CONFIG_TI_CPPI41) += cppi41.o
 obj-$(CONFIG_TI_EDMA) += edma.o
 obj-$(CONFIG_DMA_OMAP) += omap-dma.o
+obj-$(CONFIG_TI_K3_UDMA) += k3-udma.o
 obj-$(CONFIG_TI_K3_PSIL) += k3-psil.o k3-psil-am654.o k3-psil-j721e.o
 obj-$(CONFIG_TI_DMA_CROSSBAR) += dma-crossbar.o
diff --git a/drivers/dma/ti/k3-udma.c b/drivers/dma/ti/k3-udma.c
new file mode 100644
index 000000000000..14f8f8c75d3a
--- /dev/null
+++ b/drivers/dma/ti/k3-udma.c
@@ -0,0 +1,3371 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *  Copyright (C) 2019 Texas Instruments Incorporated - http://www.ti.com
+ *  Author: Peter Ujfalusi <peter.ujfalusi@ti.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmapool.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/list.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/of.h>
+#include <linux/of_dma.h>
+#include <linux/of_device.h>
+#include <linux/of_irq.h>
+#include <linux/workqueue.h>
+#include <linux/completion.h>
+#include <linux/soc/ti/k3-ringacc.h>
+#include <linux/soc/ti/ti_sci_protocol.h>
+#include <linux/soc/ti/ti_sci_inta_msi.h>
+#include <linux/dma/ti-cppi5.h>
+
+#include "../virt-dma.h"
+#include "k3-udma.h"
+#include "k3-psil-priv.h"
+
+struct udma_static_tr {
+	u8 elsize; /* RPSTR0 */
+	u16 elcnt; /* RPSTR0 */
+	u16 bstcnt; /* RPSTR1 */
+};
+
+#define K3_UDMA_MAX_RFLOWS		1024
+#define K3_UDMA_DEFAULT_RING_SIZE	16
+
+/* How SRC/DST tag should be updated by UDMA in the descriptor's Word 3 */
+#define UDMA_RFLOW_SRCTAG_NONE		0
+#define UDMA_RFLOW_SRCTAG_CFG_TAG	1
+#define UDMA_RFLOW_SRCTAG_FLOW_ID	2
+#define UDMA_RFLOW_SRCTAG_SRC_TAG	4
+
+#define UDMA_RFLOW_DSTTAG_NONE		0
+#define UDMA_RFLOW_DSTTAG_CFG_TAG	1
+#define UDMA_RFLOW_DSTTAG_FLOW_ID	2
+#define UDMA_RFLOW_DSTTAG_DST_TAG_LO	4
+#define UDMA_RFLOW_DSTTAG_DST_TAG_HI	5
+
+struct udma_chan;
+
+enum udma_mmr {
+	MMR_GCFG = 0,
+	MMR_RCHANRT,
+	MMR_TCHANRT,
+	MMR_LAST,
+};
+
+static const char * const mmr_names[] = { "gcfg", "rchanrt", "tchanrt" };
+
+struct udma_tchan {
+	void __iomem *reg_rt;
+
+	int id;
+	struct k3_ring *t_ring; /* Transmit ring */
+	struct k3_ring *tc_ring; /* Transmit Completion ring */
+};
+
+struct udma_rflow {
+	int id;
+	struct k3_ring *fd_ring; /* Free Descriptor ring */
+	struct k3_ring *r_ring; /* Receive ring */
+};
+
+struct udma_rchan {
+	void __iomem *reg_rt;
+
+	int id;
+};
+
+#define UDMA_FLAG_PDMA_ACC32		BIT(0)
+#define UDMA_FLAG_PDMA_BURST		BIT(1)
+
+struct udma_match_data {
+	u32 psil_base;
+	bool enable_memcpy_support;
+	u32 flags;
+	u32 statictr_z_mask;
+	u32 rchan_oes_offset;
+
+	u8 tpl_levels;
+	u32 level_start_idx[];
+};
+
+struct udma_dev {
+	struct dma_device ddev;
+	struct device *dev;
+	void __iomem *mmrs[MMR_LAST];
+	const struct udma_match_data *match_data;
+
+	size_t desc_align; /* alignment to use for descriptors */
+
+	struct udma_tisci_rm tisci_rm;
+
+	struct k3_ringacc *ringacc;
+
+	struct work_struct purge_work;
+	struct list_head desc_to_purge;
+	spinlock_t lock;
+
+	int tchan_cnt;
+	int echan_cnt;
+	int rchan_cnt;
+	int rflow_cnt;
+	unsigned long *tchan_map;
+	unsigned long *rchan_map;
+	unsigned long *rflow_gp_map;
+	unsigned long *rflow_gp_map_allocated;
+	unsigned long *rflow_in_use;
+
+	struct udma_tchan *tchans;
+	struct udma_rchan *rchans;
+	struct udma_rflow *rflows;
+
+	struct udma_chan *channels;
+	u32 psil_base;
+};
+
+struct udma_hwdesc {
+	size_t cppi5_desc_size;
+	void *cppi5_desc_vaddr;
+	dma_addr_t cppi5_desc_paddr;
+
+	/* TR descriptor internal pointers */
+	void *tr_req_base;
+	struct cppi5_tr_resp_t *tr_resp_base;
+};
+
+struct udma_desc {
+	struct virt_dma_desc vd;
+
+	bool terminated;
+
+	enum dma_transfer_direction dir;
+
+	struct udma_static_tr static_tr;
+	u32 residue;
+
+	unsigned int sglen;
+	unsigned int desc_idx; /* Only used for cyclic in packet mode */
+	unsigned int tr_idx;
+
+	u32 metadata_size;
+	void *metadata; /* pointer to provided metadata buffer (EPIP, PSdata) */
+
+	unsigned int hwdesc_count;
+	struct udma_hwdesc hwdesc[0];
+};
+
+enum udma_chan_state {
+	UDMA_CHAN_IS_IDLE = 0, /* not active, no teardown is in progress */
+	UDMA_CHAN_IS_ACTIVE, /* Normal operation */
+	UDMA_CHAN_IS_TERMINATING, /* channel is being terminated */
+};
+
+struct udma_tx_drain {
+	struct delayed_work work;
+	unsigned long jiffie;
+	u32 residue;
+};
+
+struct udma_chan_config {
+	bool pkt_mode; /* TR or packet */
+	bool needs_epib; /* EPIB is needed for the communication or not */
+	u32 psd_size; /* size of Protocol Specific Data */
+	u32 metadata_size; /* (needs_epib ? 16:0) + psd_size */
+	u32 hdesc_size; /* Size of a packet descriptor in packet mode */
+	bool notdpkt; /* Suppress sending TDC packet */
+	int remote_thread_id;
+	u32 src_thread;
+	u32 dst_thread;
+	enum psil_endpoint_type ep_type;
+	bool enable_acc32;
+	bool enable_burst;
+	enum udma_tp_level channel_tpl; /* Channel Throughput Level */
+
+	enum dma_transfer_direction dir;
+};
+
+struct udma_chan {
+	struct virt_dma_chan vc;
+	struct dma_slave_config	cfg;
+	struct udma_dev *ud;
+	struct udma_desc *desc;
+	struct udma_desc *terminated_desc;
+	struct udma_static_tr static_tr;
+	char *name;
+
+	struct udma_tchan *tchan;
+	struct udma_rchan *rchan;
+	struct udma_rflow *rflow;
+
+	bool psil_paired;
+
+	int irq_num_ring;
+	int irq_num_udma;
+
+	bool cyclic;
+	bool paused;
+
+	enum udma_chan_state state;
+	struct completion teardown_completed;
+
+	struct udma_tx_drain tx_drain;
+
+	u32 bcnt; /* number of bytes completed since the start of the channel */
+	u32 in_ring_cnt; /* number of descriptors in flight */
+
+	/* Channel configuration parameters */
+	struct udma_chan_config config;
+
+	/* dmapool for packet mode descriptors */
+	bool use_dma_pool;
+	struct dma_pool *hdesc_pool;
+
+	u32 id;
+};
+
+static inline struct udma_dev *to_udma_dev(struct dma_device *d)
+{
+	return container_of(d, struct udma_dev, ddev);
+}
+
+static inline struct udma_chan *to_udma_chan(struct dma_chan *c)
+{
+	return container_of(c, struct udma_chan, vc.chan);
+}
+
+static inline struct udma_desc *to_udma_desc(struct dma_async_tx_descriptor *t)
+{
+	return container_of(t, struct udma_desc, vd.tx);
+}
+
+/* Generic register access functions */
+static inline u32 udma_read(void __iomem *base, int reg)
+{
+	return readl(base + reg);
+}
+
+static inline void udma_write(void __iomem *base, int reg, u32 val)
+{
+	writel(val, base + reg);
+}
+
+static inline void udma_update_bits(void __iomem *base, int reg,
+				    u32 mask, u32 val)
+{
+	u32 tmp, orig;
+
+	orig = readl(base + reg);
+	tmp = orig & ~mask;
+	tmp |= (val & mask);
+
+	if (tmp != orig)
+		writel(tmp, base + reg);
+}
+
+/* TCHANRT */
+static inline u32 udma_tchanrt_read(struct udma_tchan *tchan, int reg)
+{
+	if (!tchan)
+		return 0;
+	return udma_read(tchan->reg_rt, reg);
+}
+
+static inline void udma_tchanrt_write(struct udma_tchan *tchan, int reg,
+				      u32 val)
+{
+	if (!tchan)
+		return;
+	udma_write(tchan->reg_rt, reg, val);
+}
+
+static inline void udma_tchanrt_update_bits(struct udma_tchan *tchan, int reg,
+					    u32 mask, u32 val)
+{
+	if (!tchan)
+		return;
+	udma_update_bits(tchan->reg_rt, reg, mask, val);
+}
+
+/* RCHANRT */
+static inline u32 udma_rchanrt_read(struct udma_rchan *rchan, int reg)
+{
+	if (!rchan)
+		return 0;
+	return udma_read(rchan->reg_rt, reg);
+}
+
+static inline void udma_rchanrt_write(struct udma_rchan *rchan, int reg,
+				      u32 val)
+{
+	if (!rchan)
+		return;
+	udma_write(rchan->reg_rt, reg, val);
+}
+
+static inline void udma_rchanrt_update_bits(struct udma_rchan *rchan, int reg,
+					    u32 mask, u32 val)
+{
+	if (!rchan)
+		return;
+	udma_update_bits(rchan->reg_rt, reg, mask, val);
+}
+
+static int navss_psil_pair(struct udma_dev *ud, u32 src_thread, u32 dst_thread)
+{
+	struct udma_tisci_rm *tisci_rm = &ud->tisci_rm;
+
+	dst_thread |= K3_PSIL_DST_THREAD_ID_OFFSET;
+	return tisci_rm->tisci_psil_ops->pair(tisci_rm->tisci,
+					      tisci_rm->tisci_navss_dev_id,
+					      src_thread, dst_thread);
+}
+
+static int navss_psil_unpair(struct udma_dev *ud, u32 src_thread,
+			     u32 dst_thread)
+{
+	struct udma_tisci_rm *tisci_rm = &ud->tisci_rm;
+
+	dst_thread |= K3_PSIL_DST_THREAD_ID_OFFSET;
+	return tisci_rm->tisci_psil_ops->unpair(tisci_rm->tisci,
+						tisci_rm->tisci_navss_dev_id,
+						src_thread, dst_thread);
+}
+
+static void udma_reset_uchan(struct udma_chan *uc)
+{
+	memset(&uc->config, 0, sizeof(uc->config));
+	uc->config.remote_thread_id = -1;
+	uc->state = UDMA_CHAN_IS_IDLE;
+}
+
+static void udma_dump_chan_stdata(struct udma_chan *uc)
+{
+	struct device *dev = uc->ud->dev;
+	u32 offset;
+	int i;
+
+	if (uc->config.dir == DMA_MEM_TO_DEV || uc->config.dir == DMA_MEM_TO_MEM) {
+		dev_dbg(dev, "TCHAN State data:\n");
+		for (i = 0; i < 32; i++) {
+			offset = UDMA_TCHAN_RT_STDATA_REG + i * 4;
+			dev_dbg(dev, "TRT_STDATA[%02d]: 0x%08x\n", i,
+				udma_tchanrt_read(uc->tchan, offset));
+		}
+	}
+
+	if (uc->config.dir == DMA_DEV_TO_MEM || uc->config.dir == DMA_MEM_TO_MEM) {
+		dev_dbg(dev, "RCHAN State data:\n");
+		for (i = 0; i < 32; i++) {
+			offset = UDMA_RCHAN_RT_STDATA_REG + i * 4;
+			dev_dbg(dev, "RRT_STDATA[%02d]: 0x%08x\n", i,
+				udma_rchanrt_read(uc->rchan, offset));
+		}
+	}
+}
+
+static inline dma_addr_t udma_curr_cppi5_desc_paddr(struct udma_desc *d,
+						    int idx)
+{
+	return d->hwdesc[idx].cppi5_desc_paddr;
+}
+
+static inline void *udma_curr_cppi5_desc_vaddr(struct udma_desc *d, int idx)
+{
+	return d->hwdesc[idx].cppi5_desc_vaddr;
+}
+
+static struct udma_desc *udma_udma_desc_from_paddr(struct udma_chan *uc,
+						   dma_addr_t paddr)
+{
+	struct udma_desc *d = uc->terminated_desc;
+
+	if (d) {
+		dma_addr_t desc_paddr = udma_curr_cppi5_desc_paddr(d,
+								   d->desc_idx);
+
+		if (desc_paddr != paddr)
+			d = NULL;
+	}
+
+	if (!d) {
+		d = uc->desc;
+		if (d) {
+			dma_addr_t desc_paddr = udma_curr_cppi5_desc_paddr(d,
+								d->desc_idx);
+
+			if (desc_paddr != paddr)
+				d = NULL;
+		}
+	}
+
+	return d;
+}
+
+static void udma_free_hwdesc(struct udma_chan *uc, struct udma_desc *d)
+{
+	if (uc->use_dma_pool) {
+		int i;
+
+		for (i = 0; i < d->hwdesc_count; i++) {
+			if (!d->hwdesc[i].cppi5_desc_vaddr)
+				continue;
+
+			dma_pool_free(uc->hdesc_pool,
+				      d->hwdesc[i].cppi5_desc_vaddr,
+				      d->hwdesc[i].cppi5_desc_paddr);
+
+			d->hwdesc[i].cppi5_desc_vaddr = NULL;
+		}
+	} else if (d->hwdesc[0].cppi5_desc_vaddr) {
+		struct udma_dev *ud = uc->ud;
+
+		dma_free_coherent(ud->dev, d->hwdesc[0].cppi5_desc_size,
+				  d->hwdesc[0].cppi5_desc_vaddr,
+				  d->hwdesc[0].cppi5_desc_paddr);
+
+		d->hwdesc[0].cppi5_desc_vaddr = NULL;
+	}
+}
+
+static void udma_purge_desc_work(struct work_struct *work)
+{
+	struct udma_dev *ud = container_of(work, typeof(*ud), purge_work);
+	struct virt_dma_desc *vd, *_vd;
+	unsigned long flags;
+	LIST_HEAD(head);
+
+	spin_lock_irqsave(&ud->lock, flags);
+	list_splice_tail_init(&ud->desc_to_purge, &head);
+	spin_unlock_irqrestore(&ud->lock, flags);
+
+	list_for_each_entry_safe(vd, _vd, &head, node) {
+		struct udma_chan *uc = to_udma_chan(vd->tx.chan);
+		struct udma_desc *d = to_udma_desc(&vd->tx);
+
+		udma_free_hwdesc(uc, d);
+		list_del(&vd->node);
+		kfree(d);
+	}
+
+	/* If more to purge, schedule the work again */
+	if (!list_empty(&ud->desc_to_purge))
+		schedule_work(&ud->purge_work);
+}
+
+static void udma_desc_free(struct virt_dma_desc *vd)
+{
+	struct udma_dev *ud = to_udma_dev(vd->tx.chan->device);
+	struct udma_chan *uc = to_udma_chan(vd->tx.chan);
+	struct udma_desc *d = to_udma_desc(&vd->tx);
+	unsigned long flags;
+
+	if (uc->terminated_desc == d)
+		uc->terminated_desc = NULL;
+
+	if (uc->use_dma_pool) {
+		udma_free_hwdesc(uc, d);
+		kfree(d);
+		return;
+	}
+
+	spin_lock_irqsave(&ud->lock, flags);
+	list_add_tail(&vd->node, &ud->desc_to_purge);
+	spin_unlock_irqrestore(&ud->lock, flags);
+
+	schedule_work(&ud->purge_work);
+}
+
+static bool udma_is_chan_running(struct udma_chan *uc)
+{
+	u32 trt_ctl = 0;
+	u32 rrt_ctl = 0;
+
+	if (uc->tchan)
+		trt_ctl = udma_tchanrt_read(uc->tchan, UDMA_TCHAN_RT_CTL_REG);
+	if (uc->rchan)
+		rrt_ctl = udma_rchanrt_read(uc->rchan, UDMA_RCHAN_RT_CTL_REG);
+
+	if (trt_ctl & UDMA_CHAN_RT_CTL_EN || rrt_ctl & UDMA_CHAN_RT_CTL_EN)
+		return true;
+
+	return false;
+}
+
+static bool udma_is_chan_paused(struct udma_chan *uc)
+{
+	u32 val, pause_mask;
+
+	switch (uc->desc->dir) {
+	case DMA_DEV_TO_MEM:
+		val = udma_rchanrt_read(uc->rchan,
+					UDMA_RCHAN_RT_PEER_RT_EN_REG);
+		pause_mask = UDMA_PEER_RT_EN_PAUSE;
+		break;
+	case DMA_MEM_TO_DEV:
+		val = udma_tchanrt_read(uc->tchan,
+					UDMA_TCHAN_RT_PEER_RT_EN_REG);
+		pause_mask = UDMA_PEER_RT_EN_PAUSE;
+		break;
+	case DMA_MEM_TO_MEM:
+		val = udma_tchanrt_read(uc->tchan, UDMA_TCHAN_RT_CTL_REG);
+		pause_mask = UDMA_CHAN_RT_CTL_PAUSE;
+		break;
+	default:
+		return false;
+	}
+
+	if (val & pause_mask)
+		return true;
+
+	return false;
+}
+
+static void udma_sync_for_device(struct udma_chan *uc, int idx)
+{
+	struct udma_desc *d = uc->desc;
+
+	if (uc->cyclic && uc->config.pkt_mode) {
+		dma_sync_single_for_device(uc->ud->dev,
+					   d->hwdesc[idx].cppi5_desc_paddr,
+					   d->hwdesc[idx].cppi5_desc_size,
+					   DMA_TO_DEVICE);
+	} else {
+		int i;
+
+		for (i = 0; i < d->hwdesc_count; i++) {
+			if (!d->hwdesc[i].cppi5_desc_vaddr)
+				continue;
+
+			dma_sync_single_for_device(uc->ud->dev,
+						d->hwdesc[i].cppi5_desc_paddr,
+						d->hwdesc[i].cppi5_desc_size,
+						DMA_TO_DEVICE);
+		}
+	}
+}
+
+static int udma_push_to_ring(struct udma_chan *uc, int idx)
+{
+	struct udma_desc *d = uc->desc;
+
+	struct k3_ring *ring = NULL;
+	int ret = -EINVAL;
+
+	switch (uc->config.dir) {
+	case DMA_DEV_TO_MEM:
+		ring = uc->rflow->fd_ring;
+		break;
+	case DMA_MEM_TO_DEV:
+	case DMA_MEM_TO_MEM:
+		ring = uc->tchan->t_ring;
+		break;
+	default:
+		break;
+	}
+
+	if (ring) {
+		dma_addr_t desc_addr = udma_curr_cppi5_desc_paddr(d, idx);
+
+		wmb(); /* Ensure that writes are not moved over this point */
+		udma_sync_for_device(uc, idx);
+		ret = k3_ringacc_ring_push(ring, &desc_addr);
+		uc->in_ring_cnt++;
+	}
+
+	return ret;
+}
+
+static int udma_pop_from_ring(struct udma_chan *uc, dma_addr_t *addr)
+{
+	struct k3_ring *ring = NULL;
+	int ret = -ENOENT;
+
+	switch (uc->config.dir) {
+	case DMA_DEV_TO_MEM:
+		ring = uc->rflow->r_ring;
+		break;
+	case DMA_MEM_TO_DEV:
+	case DMA_MEM_TO_MEM:
+		ring = uc->tchan->tc_ring;
+		break;
+	default:
+		break;
+	}
+
+	if (ring && k3_ringacc_ring_get_occ(ring)) {
+		struct udma_desc *d = NULL;
+
+		ret = k3_ringacc_ring_pop(ring, addr);
+		if (ret)
+			return ret;
+
+		/* Teardown completion */
+		if (cppi5_desc_is_tdcm(*addr))
+			return ret;
+
+		d = udma_udma_desc_from_paddr(uc, *addr);
+
+		if (d)
+			dma_sync_single_for_cpu(uc->ud->dev, *addr,
+						d->hwdesc[0].cppi5_desc_size,
+						DMA_FROM_DEVICE);
+		rmb(); /* Ensure that reads are not moved before this point */
+
+		if (!ret)
+			uc->in_ring_cnt--;
+	}
+
+	return ret;
+}
+
+static void udma_reset_rings(struct udma_chan *uc)
+{
+	struct k3_ring *ring1 = NULL;
+	struct k3_ring *ring2 = NULL;
+
+	switch (uc->config.dir) {
+	case DMA_DEV_TO_MEM:
+		if (uc->rchan) {
+			ring1 = uc->rflow->fd_ring;
+			ring2 = uc->rflow->r_ring;
+		}
+		break;
+	case DMA_MEM_TO_DEV:
+	case DMA_MEM_TO_MEM:
+		if (uc->tchan) {
+			ring1 = uc->tchan->t_ring;
+			ring2 = uc->tchan->tc_ring;
+		}
+		break;
+	default:
+		break;
+	}
+
+	if (ring1)
+		k3_ringacc_ring_reset_dma(ring1,
+					  k3_ringacc_ring_get_occ(ring1));
+	if (ring2)
+		k3_ringacc_ring_reset(ring2);
+
+	/* make sure we are not leaking memory by stalled descriptor */
+	if (uc->terminated_desc) {
+		udma_desc_free(&uc->terminated_desc->vd);
+		uc->terminated_desc = NULL;
+	}
+
+	uc->in_ring_cnt = 0;
+}
+
+static void udma_reset_counters(struct udma_chan *uc)
+{
+	u32 val;
+
+	if (uc->tchan) {
+		val = udma_tchanrt_read(uc->tchan, UDMA_TCHAN_RT_BCNT_REG);
+		udma_tchanrt_write(uc->tchan, UDMA_TCHAN_RT_BCNT_REG, val);
+
+		val = udma_tchanrt_read(uc->tchan, UDMA_TCHAN_RT_SBCNT_REG);
+		udma_tchanrt_write(uc->tchan, UDMA_TCHAN_RT_SBCNT_REG, val);
+
+		val = udma_tchanrt_read(uc->tchan, UDMA_TCHAN_RT_PCNT_REG);
+		udma_tchanrt_write(uc->tchan, UDMA_TCHAN_RT_PCNT_REG, val);
+
+		val = udma_tchanrt_read(uc->tchan, UDMA_TCHAN_RT_PEER_BCNT_REG);
+		udma_tchanrt_write(uc->tchan, UDMA_TCHAN_RT_PEER_BCNT_REG, val);
+	}
+
+	if (uc->rchan) {
+		val = udma_rchanrt_read(uc->rchan, UDMA_RCHAN_RT_BCNT_REG);
+		udma_rchanrt_write(uc->rchan, UDMA_RCHAN_RT_BCNT_REG, val);
+
+		val = udma_rchanrt_read(uc->rchan, UDMA_RCHAN_RT_SBCNT_REG);
+		udma_rchanrt_write(uc->rchan, UDMA_RCHAN_RT_SBCNT_REG, val);
+
+		val = udma_rchanrt_read(uc->rchan, UDMA_RCHAN_RT_PCNT_REG);
+		udma_rchanrt_write(uc->rchan, UDMA_RCHAN_RT_PCNT_REG, val);
+
+		val = udma_rchanrt_read(uc->rchan, UDMA_RCHAN_RT_PEER_BCNT_REG);
+		udma_rchanrt_write(uc->rchan, UDMA_RCHAN_RT_PEER_BCNT_REG, val);
+	}
+
+	uc->bcnt = 0;
+}
+
+static int udma_reset_chan(struct udma_chan *uc, bool hard)
+{
+	switch (uc->config.dir) {
+	case DMA_DEV_TO_MEM:
+		udma_rchanrt_write(uc->rchan, UDMA_RCHAN_RT_PEER_RT_EN_REG, 0);
+		udma_rchanrt_write(uc->rchan, UDMA_RCHAN_RT_CTL_REG, 0);
+		break;
+	case DMA_MEM_TO_DEV:
+		udma_tchanrt_write(uc->tchan, UDMA_TCHAN_RT_CTL_REG, 0);
+		udma_tchanrt_write(uc->tchan, UDMA_TCHAN_RT_PEER_RT_EN_REG, 0);
+		break;
+	case DMA_MEM_TO_MEM:
+		udma_rchanrt_write(uc->rchan, UDMA_RCHAN_RT_CTL_REG, 0);
+		udma_tchanrt_write(uc->tchan, UDMA_TCHAN_RT_CTL_REG, 0);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	/* Reset all counters */
+	udma_reset_counters(uc);
+
+	/* Hard reset: re-initialize the channel to reset */
+	if (hard) {
+		struct udma_chan_config ucc_backup;
+		int ret;
+
+		memcpy(&ucc_backup, &uc->config, sizeof(uc->config));
+		uc->ud->ddev.device_free_chan_resources(&uc->vc.chan);
+
+		/* restore the channel configuration */
+		memcpy(&uc->config, &ucc_backup, sizeof(uc->config));
+		ret = uc->ud->ddev.device_alloc_chan_resources(&uc->vc.chan);
+		if (ret)
+			return ret;
+
+		/*
+		 * Setting forced teardown after forced reset helps recovering
+		 * the rchan.
+		 */
+		if (uc->config.dir == DMA_DEV_TO_MEM)
+			udma_rchanrt_write(uc->rchan, UDMA_RCHAN_RT_CTL_REG,
+					   UDMA_CHAN_RT_CTL_EN |
+					   UDMA_CHAN_RT_CTL_TDOWN |
+					   UDMA_CHAN_RT_CTL_FTDOWN);
+	}
+	uc->state = UDMA_CHAN_IS_IDLE;
+
+	return 0;
+}
+
+static void udma_start_desc(struct udma_chan *uc)
+{
+	struct udma_chan_config *ucc = &uc->config;
+
+	if (ucc->pkt_mode && (uc->cyclic || ucc->dir == DMA_DEV_TO_MEM)) {
+		int i;
+
+		/* Push all descriptors to ring for packet mode cyclic or RX */
+		for (i = 0; i < uc->desc->sglen; i++)
+			udma_push_to_ring(uc, i);
+	} else {
+		udma_push_to_ring(uc, 0);
+	}
+}
+
+static bool udma_chan_needs_reconfiguration(struct udma_chan *uc)
+{
+	/* Only PDMAs have staticTR */
+	if (uc->config.ep_type == PSIL_EP_NATIVE)
+		return false;
+
+	/* Check if the staticTR configuration has changed for TX */
+	if (memcmp(&uc->static_tr, &uc->desc->static_tr, sizeof(uc->static_tr)))
+		return true;
+
+	return false;
+}
+
+static int udma_start(struct udma_chan *uc)
+{
+	struct virt_dma_desc *vd = vchan_next_desc(&uc->vc);
+
+	if (!vd) {
+		uc->desc = NULL;
+		return -ENOENT;
+	}
+
+	list_del(&vd->node);
+
+	uc->desc = to_udma_desc(&vd->tx);
+
+	/* Channel is already running and does not need reconfiguration */
+	if (udma_is_chan_running(uc) && !udma_chan_needs_reconfiguration(uc)) {
+		udma_start_desc(uc);
+		goto out;
+	}
+
+	/* Make sure that we clear the teardown bit, if it is set */
+	udma_reset_chan(uc, false);
+
+	/* Push descriptors before we start the channel */
+	udma_start_desc(uc);
+
+	switch (uc->desc->dir) {
+	case DMA_DEV_TO_MEM:
+		/* Config remote TR */
+		if (uc->config.ep_type == PSIL_EP_PDMA_XY) {
+			u32 val = PDMA_STATIC_TR_Y(uc->desc->static_tr.elcnt) |
+				  PDMA_STATIC_TR_X(uc->desc->static_tr.elsize);
+			const struct udma_match_data *match_data =
+							uc->ud->match_data;
+
+			if (uc->config.enable_acc32)
+				val |= PDMA_STATIC_TR_XY_ACC32;
+			if (uc->config.enable_burst)
+				val |= PDMA_STATIC_TR_XY_BURST;
+
+			udma_rchanrt_write(uc->rchan,
+				UDMA_RCHAN_RT_PEER_STATIC_TR_XY_REG, val);
+
+			udma_rchanrt_write(uc->rchan,
+				UDMA_RCHAN_RT_PEER_STATIC_TR_Z_REG,
+				PDMA_STATIC_TR_Z(uc->desc->static_tr.bstcnt,
+						 match_data->statictr_z_mask));
+
+			/* save the current staticTR configuration */
+			memcpy(&uc->static_tr, &uc->desc->static_tr,
+			       sizeof(uc->static_tr));
+		}
+
+		udma_rchanrt_write(uc->rchan, UDMA_RCHAN_RT_CTL_REG,
+				   UDMA_CHAN_RT_CTL_EN);
+
+		/* Enable remote */
+		udma_rchanrt_write(uc->rchan, UDMA_RCHAN_RT_PEER_RT_EN_REG,
+				   UDMA_PEER_RT_EN_ENABLE);
+
+		break;
+	case DMA_MEM_TO_DEV:
+		/* Config remote TR */
+		if (uc->config.ep_type == PSIL_EP_PDMA_XY) {
+			u32 val = PDMA_STATIC_TR_Y(uc->desc->static_tr.elcnt) |
+				  PDMA_STATIC_TR_X(uc->desc->static_tr.elsize);
+
+			if (uc->config.enable_acc32)
+				val |= PDMA_STATIC_TR_XY_ACC32;
+			if (uc->config.enable_burst)
+				val |= PDMA_STATIC_TR_XY_BURST;
+
+			udma_tchanrt_write(uc->tchan,
+				UDMA_TCHAN_RT_PEER_STATIC_TR_XY_REG, val);
+
+			/* save the current staticTR configuration */
+			memcpy(&uc->static_tr, &uc->desc->static_tr,
+			       sizeof(uc->static_tr));
+		}
+
+		/* Enable remote */
+		udma_tchanrt_write(uc->tchan, UDMA_TCHAN_RT_PEER_RT_EN_REG,
+				   UDMA_PEER_RT_EN_ENABLE);
+
+		udma_tchanrt_write(uc->tchan, UDMA_TCHAN_RT_CTL_REG,
+				   UDMA_CHAN_RT_CTL_EN);
+
+		break;
+	case DMA_MEM_TO_MEM:
+		udma_rchanrt_write(uc->rchan, UDMA_RCHAN_RT_CTL_REG,
+				   UDMA_CHAN_RT_CTL_EN);
+		udma_tchanrt_write(uc->tchan, UDMA_TCHAN_RT_CTL_REG,
+				   UDMA_CHAN_RT_CTL_EN);
+
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	uc->state = UDMA_CHAN_IS_ACTIVE;
+out:
+
+	return 0;
+}
+
+static int udma_stop(struct udma_chan *uc)
+{
+	enum udma_chan_state old_state = uc->state;
+
+	uc->state = UDMA_CHAN_IS_TERMINATING;
+	reinit_completion(&uc->teardown_completed);
+
+	switch (uc->config.dir) {
+	case DMA_DEV_TO_MEM:
+		udma_rchanrt_write(uc->rchan, UDMA_RCHAN_RT_PEER_RT_EN_REG,
+				   UDMA_PEER_RT_EN_ENABLE |
+				   UDMA_PEER_RT_EN_TEARDOWN);
+		break;
+	case DMA_MEM_TO_DEV:
+		udma_tchanrt_write(uc->tchan, UDMA_TCHAN_RT_PEER_RT_EN_REG,
+				   UDMA_PEER_RT_EN_ENABLE |
+				   UDMA_PEER_RT_EN_FLUSH);
+		udma_tchanrt_write(uc->tchan, UDMA_TCHAN_RT_CTL_REG,
+				   UDMA_CHAN_RT_CTL_EN |
+				   UDMA_CHAN_RT_CTL_TDOWN);
+		break;
+	case DMA_MEM_TO_MEM:
+		udma_tchanrt_write(uc->tchan, UDMA_TCHAN_RT_CTL_REG,
+				   UDMA_CHAN_RT_CTL_EN |
+				   UDMA_CHAN_RT_CTL_TDOWN);
+		break;
+	default:
+		uc->state = old_state;
+		complete_all(&uc->teardown_completed);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static void udma_cyclic_packet_elapsed(struct udma_chan *uc)
+{
+	struct udma_desc *d = uc->desc;
+	struct cppi5_host_desc_t *h_desc;
+
+	h_desc = d->hwdesc[d->desc_idx].cppi5_desc_vaddr;
+	cppi5_hdesc_reset_to_original(h_desc);
+	udma_push_to_ring(uc, d->desc_idx);
+	d->desc_idx = (d->desc_idx + 1) % d->sglen;
+}
+
+static inline void udma_fetch_epib(struct udma_chan *uc, struct udma_desc *d)
+{
+	struct cppi5_host_desc_t *h_desc = d->hwdesc[0].cppi5_desc_vaddr;
+
+	memcpy(d->metadata, h_desc->epib, d->metadata_size);
+}
+
+static bool udma_is_desc_really_done(struct udma_chan *uc, struct udma_desc *d)
+{
+	u32 peer_bcnt, bcnt;
+
+	/* Only TX towards PDMA is affected */
+	if (uc->config.ep_type == PSIL_EP_NATIVE ||
+	    uc->config.dir != DMA_MEM_TO_DEV)
+		return true;
+
+	peer_bcnt = udma_tchanrt_read(uc->tchan, UDMA_TCHAN_RT_PEER_BCNT_REG);
+	bcnt = udma_tchanrt_read(uc->tchan, UDMA_TCHAN_RT_BCNT_REG);
+
+	if (peer_bcnt < bcnt) {
+		uc->tx_drain.residue = bcnt - peer_bcnt;
+		uc->tx_drain.jiffie = jiffies;
+		return false;
+	}
+
+	return true;
+}
+
+static void udma_check_tx_completion(struct work_struct *work)
+{
+	struct udma_chan *uc = container_of(work, typeof(*uc),
+					    tx_drain.work.work);
+	bool desc_done = true;
+	u32 residue_diff;
+	unsigned long jiffie_diff, delay;
+
+	if (uc->desc) {
+		residue_diff = uc->tx_drain.residue;
+		jiffie_diff = uc->tx_drain.jiffie;
+		desc_done = udma_is_desc_really_done(uc, uc->desc);
+	}
+
+	if (!desc_done) {
+		jiffie_diff = uc->tx_drain.jiffie - jiffie_diff;
+		residue_diff -= uc->tx_drain.residue;
+		if (residue_diff) {
+			/* Try to guess when we should check next time */
+			residue_diff /= jiffie_diff;
+			delay = uc->tx_drain.residue / residue_diff / 3;
+			if (jiffies_to_msecs(delay) < 5)
+				delay = 0;
+		} else {
+			/* No progress, check again in 1 second  */
+			delay = HZ;
+		}
+
+		schedule_delayed_work(&uc->tx_drain.work, delay);
+	} else if (uc->desc) {
+		struct udma_desc *d = uc->desc;
+
+		uc->bcnt += d->residue;
+		udma_start(uc);
+		vchan_cookie_complete(&d->vd);
+	}
+}
+
+static irqreturn_t udma_ring_irq_handler(int irq, void *data)
+{
+	struct udma_chan *uc = data;
+	struct udma_desc *d;
+	unsigned long flags;
+	dma_addr_t paddr = 0;
+
+	if (udma_pop_from_ring(uc, &paddr) || !paddr)
+		return IRQ_HANDLED;
+
+	spin_lock_irqsave(&uc->vc.lock, flags);
+
+	/* Teardown completion message */
+	if (cppi5_desc_is_tdcm(paddr)) {
+		/* Compensate our internal pop/push counter */
+		uc->in_ring_cnt++;
+
+		complete_all(&uc->teardown_completed);
+
+		if (uc->terminated_desc) {
+			udma_desc_free(&uc->terminated_desc->vd);
+			uc->terminated_desc = NULL;
+		}
+
+		if (!uc->desc)
+			udma_start(uc);
+
+		goto out;
+	}
+
+	d = udma_udma_desc_from_paddr(uc, paddr);
+
+	if (d) {
+		dma_addr_t desc_paddr = udma_curr_cppi5_desc_paddr(d,
+								   d->desc_idx);
+		if (desc_paddr != paddr) {
+			dev_err(uc->ud->dev, "not matching descriptors!\n");
+			goto out;
+		}
+
+		if (uc->cyclic) {
+			/* push the descriptor back to the ring */
+			if (d == uc->desc) {
+				udma_cyclic_packet_elapsed(uc);
+				vchan_cyclic_callback(&d->vd);
+			}
+		} else {
+			bool desc_done = false;
+
+			if (d == uc->desc) {
+				desc_done = udma_is_desc_really_done(uc, d);
+
+				if (desc_done) {
+					uc->bcnt += d->residue;
+					udma_start(uc);
+				} else {
+					schedule_delayed_work(&uc->tx_drain.work,
+							      0);
+				}
+			}
+
+			if (desc_done)
+				vchan_cookie_complete(&d->vd);
+		}
+	}
+out:
+	spin_unlock_irqrestore(&uc->vc.lock, flags);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t udma_udma_irq_handler(int irq, void *data)
+{
+	struct udma_chan *uc = data;
+	struct udma_desc *d;
+	unsigned long flags;
+
+	spin_lock_irqsave(&uc->vc.lock, flags);
+	d = uc->desc;
+	if (d) {
+		d->tr_idx = (d->tr_idx + 1) % d->sglen;
+
+		if (uc->cyclic) {
+			vchan_cyclic_callback(&d->vd);
+		} else {
+			/* TODO: figure out the real amount of data */
+			uc->bcnt += d->residue;
+			udma_start(uc);
+			vchan_cookie_complete(&d->vd);
+		}
+	}
+
+	spin_unlock_irqrestore(&uc->vc.lock, flags);
+
+	return IRQ_HANDLED;
+}
+
+static struct udma_rflow *__udma_get_rflow(struct udma_dev *ud, int id)
+{
+	/*
+	 * Attempt to request rflow by ID can be made for any rflow
+	 * if not in use with assumption that caller knows what's doing.
+	 * TI-SCI FW will perform additional permission check ant way, it's
+	 * safe
+	 */
+
+	if (id < 0 || id >= ud->rflow_cnt)
+		return ERR_PTR(-ENOENT);
+
+	if (test_bit(id, ud->rflow_in_use))
+		return ERR_PTR(-ENOENT);
+
+	/* GP rflow has to be allocated first */
+	if (!test_bit(id, ud->rflow_gp_map) &&
+	    !test_bit(id, ud->rflow_gp_map_allocated))
+		return ERR_PTR(-EINVAL);
+
+	dev_dbg(ud->dev, "get rflow%d\n", id);
+	set_bit(id, ud->rflow_in_use);
+	return &ud->rflows[id];
+}
+
+static void __udma_put_rflow(struct udma_dev *ud, struct udma_rflow *rflow)
+{
+	if (!test_bit(rflow->id, ud->rflow_in_use)) {
+		dev_err(ud->dev, "attempt to put unused rflow%d\n", rflow->id);
+		return;
+	}
+
+	dev_dbg(ud->dev, "put rflow%d\n", rflow->id);
+	clear_bit(rflow->id, ud->rflow_in_use);
+}
+
+#define UDMA_RESERVE_RESOURCE(res)					\
+static struct udma_##res *__udma_reserve_##res(struct udma_dev *ud,	\
+					       enum udma_tp_level tpl,	\
+					       int id)			\
+{									\
+	if (id >= 0) {							\
+		if (test_bit(id, ud->res##_map)) {			\
+			dev_err(ud->dev, "res##%d is in use\n", id);	\
+			return ERR_PTR(-ENOENT);			\
+		}							\
+	} else {							\
+		int start;						\
+									\
+		if (tpl >= ud->match_data->tpl_levels)			\
+			tpl = ud->match_data->tpl_levels - 1;		\
+									\
+		start = ud->match_data->level_start_idx[tpl];		\
+									\
+		id = find_next_zero_bit(ud->res##_map, ud->res##_cnt,	\
+					start);				\
+		if (id == ud->res##_cnt) {				\
+			return ERR_PTR(-ENOENT);			\
+		}							\
+	}								\
+									\
+	set_bit(id, ud->res##_map);					\
+	return &ud->res##s[id];						\
+}
+
+UDMA_RESERVE_RESOURCE(tchan);
+UDMA_RESERVE_RESOURCE(rchan);
+
+static int udma_get_tchan(struct udma_chan *uc)
+{
+	struct udma_dev *ud = uc->ud;
+
+	if (uc->tchan) {
+		dev_dbg(ud->dev, "chan%d: already have tchan%d allocated\n",
+			uc->id, uc->tchan->id);
+		return 0;
+	}
+
+	uc->tchan = __udma_reserve_tchan(ud, uc->config.channel_tpl, -1);
+	if (IS_ERR(uc->tchan))
+		return PTR_ERR(uc->tchan);
+
+	return 0;
+}
+
+static int udma_get_rchan(struct udma_chan *uc)
+{
+	struct udma_dev *ud = uc->ud;
+
+	if (uc->rchan) {
+		dev_dbg(ud->dev, "chan%d: already have rchan%d allocated\n",
+			uc->id, uc->rchan->id);
+		return 0;
+	}
+
+	uc->rchan = __udma_reserve_rchan(ud, uc->config.channel_tpl, -1);
+	if (IS_ERR(uc->rchan))
+		return PTR_ERR(uc->rchan);
+
+	return 0;
+}
+
+static int udma_get_chan_pair(struct udma_chan *uc)
+{
+	struct udma_dev *ud = uc->ud;
+	const struct udma_match_data *match_data = ud->match_data;
+	int chan_id, end;
+
+	if ((uc->tchan && uc->rchan) && uc->tchan->id == uc->rchan->id) {
+		dev_info(ud->dev, "chan%d: already have %d pair allocated\n",
+			 uc->id, uc->tchan->id);
+		return 0;
+	}
+
+	if (uc->tchan) {
+		dev_err(ud->dev, "chan%d: already have tchan%d allocated\n",
+			uc->id, uc->tchan->id);
+		return -EBUSY;
+	} else if (uc->rchan) {
+		dev_err(ud->dev, "chan%d: already have rchan%d allocated\n",
+			uc->id, uc->rchan->id);
+		return -EBUSY;
+	}
+
+	/* Can be optimized, but let's have it like this for now */
+	end = min(ud->tchan_cnt, ud->rchan_cnt);
+	/* Try to use the highest TPL channel pair for MEM_TO_MEM channels */
+	chan_id = match_data->level_start_idx[match_data->tpl_levels - 1];
+	for (; chan_id < end; chan_id++) {
+		if (!test_bit(chan_id, ud->tchan_map) &&
+		    !test_bit(chan_id, ud->rchan_map))
+			break;
+	}
+
+	if (chan_id == end)
+		return -ENOENT;
+
+	set_bit(chan_id, ud->tchan_map);
+	set_bit(chan_id, ud->rchan_map);
+	uc->tchan = &ud->tchans[chan_id];
+	uc->rchan = &ud->rchans[chan_id];
+
+	return 0;
+}
+
+static int udma_get_rflow(struct udma_chan *uc, int flow_id)
+{
+	struct udma_dev *ud = uc->ud;
+
+	if (!uc->rchan) {
+		dev_err(ud->dev, "chan%d: does not have rchan??\n", uc->id);
+		return -EINVAL;
+	}
+
+	if (uc->rflow) {
+		dev_dbg(ud->dev, "chan%d: already have rflow%d allocated\n",
+			uc->id, uc->rflow->id);
+		return 0;
+	}
+
+	uc->rflow = __udma_get_rflow(ud, flow_id);
+	if (IS_ERR(uc->rflow))
+		return PTR_ERR(uc->rflow);
+
+	return 0;
+}
+
+static void udma_put_rchan(struct udma_chan *uc)
+{
+	struct udma_dev *ud = uc->ud;
+
+	if (uc->rchan) {
+		dev_dbg(ud->dev, "chan%d: put rchan%d\n", uc->id,
+			uc->rchan->id);
+		clear_bit(uc->rchan->id, ud->rchan_map);
+		uc->rchan = NULL;
+	}
+}
+
+static void udma_put_tchan(struct udma_chan *uc)
+{
+	struct udma_dev *ud = uc->ud;
+
+	if (uc->tchan) {
+		dev_dbg(ud->dev, "chan%d: put tchan%d\n", uc->id,
+			uc->tchan->id);
+		clear_bit(uc->tchan->id, ud->tchan_map);
+		uc->tchan = NULL;
+	}
+}
+
+static void udma_put_rflow(struct udma_chan *uc)
+{
+	struct udma_dev *ud = uc->ud;
+
+	if (uc->rflow) {
+		dev_dbg(ud->dev, "chan%d: put rflow%d\n", uc->id,
+			uc->rflow->id);
+		__udma_put_rflow(ud, uc->rflow);
+		uc->rflow = NULL;
+	}
+}
+
+static void udma_free_tx_resources(struct udma_chan *uc)
+{
+	if (!uc->tchan)
+		return;
+
+	k3_ringacc_ring_free(uc->tchan->t_ring);
+	k3_ringacc_ring_free(uc->tchan->tc_ring);
+	uc->tchan->t_ring = NULL;
+	uc->tchan->tc_ring = NULL;
+
+	udma_put_tchan(uc);
+}
+
+static int udma_alloc_tx_resources(struct udma_chan *uc)
+{
+	struct k3_ring_cfg ring_cfg;
+	struct udma_dev *ud = uc->ud;
+	int ret;
+
+	ret = udma_get_tchan(uc);
+	if (ret)
+		return ret;
+
+	uc->tchan->t_ring = k3_ringacc_request_ring(ud->ringacc,
+						    uc->tchan->id, 0);
+	if (!uc->tchan->t_ring) {
+		ret = -EBUSY;
+		goto err_tx_ring;
+	}
+
+	uc->tchan->tc_ring = k3_ringacc_request_ring(ud->ringacc, -1, 0);
+	if (!uc->tchan->tc_ring) {
+		ret = -EBUSY;
+		goto err_txc_ring;
+	}
+
+	memset(&ring_cfg, 0, sizeof(ring_cfg));
+	ring_cfg.size = K3_UDMA_DEFAULT_RING_SIZE;
+	ring_cfg.elm_size = K3_RINGACC_RING_ELSIZE_8;
+	ring_cfg.mode = K3_RINGACC_RING_MODE_MESSAGE;
+
+	ret = k3_ringacc_ring_cfg(uc->tchan->t_ring, &ring_cfg);
+	ret |= k3_ringacc_ring_cfg(uc->tchan->tc_ring, &ring_cfg);
+
+	if (ret)
+		goto err_ringcfg;
+
+	return 0;
+
+err_ringcfg:
+	k3_ringacc_ring_free(uc->tchan->tc_ring);
+	uc->tchan->tc_ring = NULL;
+err_txc_ring:
+	k3_ringacc_ring_free(uc->tchan->t_ring);
+	uc->tchan->t_ring = NULL;
+err_tx_ring:
+	udma_put_tchan(uc);
+
+	return ret;
+}
+
+static void udma_free_rx_resources(struct udma_chan *uc)
+{
+	if (!uc->rchan)
+		return;
+
+	if (uc->rflow) {
+		struct udma_rflow *rflow = uc->rflow;
+
+		k3_ringacc_ring_free(rflow->fd_ring);
+		k3_ringacc_ring_free(rflow->r_ring);
+		rflow->fd_ring = NULL;
+		rflow->r_ring = NULL;
+
+		udma_put_rflow(uc);
+	}
+
+	udma_put_rchan(uc);
+}
+
+static int udma_alloc_rx_resources(struct udma_chan *uc)
+{
+	struct udma_dev *ud = uc->ud;
+	struct k3_ring_cfg ring_cfg;
+	struct udma_rflow *rflow;
+	int fd_ring_id;
+	int ret;
+
+	ret = udma_get_rchan(uc);
+	if (ret)
+		return ret;
+
+	/* For MEM_TO_MEM we don't need rflow or rings */
+	if (uc->config.dir == DMA_MEM_TO_MEM)
+		return 0;
+
+	ret = udma_get_rflow(uc, uc->rchan->id);
+	if (ret) {
+		ret = -EBUSY;
+		goto err_rflow;
+	}
+
+	rflow = uc->rflow;
+	fd_ring_id = ud->tchan_cnt + ud->echan_cnt + uc->rchan->id;
+	rflow->fd_ring = k3_ringacc_request_ring(ud->ringacc, fd_ring_id, 0);
+	if (!rflow->fd_ring) {
+		ret = -EBUSY;
+		goto err_rx_ring;
+	}
+
+	rflow->r_ring = k3_ringacc_request_ring(ud->ringacc, -1, 0);
+	if (!rflow->r_ring) {
+		ret = -EBUSY;
+		goto err_rxc_ring;
+	}
+
+	memset(&ring_cfg, 0, sizeof(ring_cfg));
+
+	if (uc->config.pkt_mode)
+		ring_cfg.size = SG_MAX_SEGMENTS;
+	else
+		ring_cfg.size = K3_UDMA_DEFAULT_RING_SIZE;
+
+	ring_cfg.elm_size = K3_RINGACC_RING_ELSIZE_8;
+	ring_cfg.mode = K3_RINGACC_RING_MODE_MESSAGE;
+
+	ret = k3_ringacc_ring_cfg(rflow->fd_ring, &ring_cfg);
+	ring_cfg.size = K3_UDMA_DEFAULT_RING_SIZE;
+	ret |= k3_ringacc_ring_cfg(rflow->r_ring, &ring_cfg);
+
+	if (ret)
+		goto err_ringcfg;
+
+	return 0;
+
+err_ringcfg:
+	k3_ringacc_ring_free(rflow->r_ring);
+	rflow->r_ring = NULL;
+err_rxc_ring:
+	k3_ringacc_ring_free(rflow->fd_ring);
+	rflow->fd_ring = NULL;
+err_rx_ring:
+	udma_put_rflow(uc);
+err_rflow:
+	udma_put_rchan(uc);
+
+	return ret;
+}
+
+#define TISCI_TCHAN_VALID_PARAMS (				\
+	TI_SCI_MSG_VALUE_RM_UDMAP_CH_PAUSE_ON_ERR_VALID |	\
+	TI_SCI_MSG_VALUE_RM_UDMAP_CH_TX_FILT_EINFO_VALID |	\
+	TI_SCI_MSG_VALUE_RM_UDMAP_CH_TX_FILT_PSWORDS_VALID |	\
+	TI_SCI_MSG_VALUE_RM_UDMAP_CH_CHAN_TYPE_VALID |		\
+	TI_SCI_MSG_VALUE_RM_UDMAP_CH_TX_SUPR_TDPKT_VALID |	\
+	TI_SCI_MSG_VALUE_RM_UDMAP_CH_FETCH_SIZE_VALID |		\
+	TI_SCI_MSG_VALUE_RM_UDMAP_CH_CQ_QNUM_VALID)
+
+#define TISCI_RCHAN_VALID_PARAMS (				\
+	TI_SCI_MSG_VALUE_RM_UDMAP_CH_PAUSE_ON_ERR_VALID |	\
+	TI_SCI_MSG_VALUE_RM_UDMAP_CH_FETCH_SIZE_VALID |		\
+	TI_SCI_MSG_VALUE_RM_UDMAP_CH_CQ_QNUM_VALID |		\
+	TI_SCI_MSG_VALUE_RM_UDMAP_CH_CHAN_TYPE_VALID |		\
+	TI_SCI_MSG_VALUE_RM_UDMAP_CH_RX_IGNORE_SHORT_VALID |	\
+	TI_SCI_MSG_VALUE_RM_UDMAP_CH_RX_IGNORE_LONG_VALID |	\
+	TI_SCI_MSG_VALUE_RM_UDMAP_CH_RX_FLOWID_START_VALID |	\
+	TI_SCI_MSG_VALUE_RM_UDMAP_CH_RX_FLOWID_CNT_VALID)
+
+static int udma_tisci_m2m_channel_config(struct udma_chan *uc)
+{
+	struct udma_dev *ud = uc->ud;
+	struct udma_tisci_rm *tisci_rm = &ud->tisci_rm;
+	const struct ti_sci_rm_udmap_ops *tisci_ops = tisci_rm->tisci_udmap_ops;
+	struct udma_tchan *tchan = uc->tchan;
+	struct udma_rchan *rchan = uc->rchan;
+	int ret = 0;
+
+	/* Non synchronized - mem to mem type of transfer */
+	int tc_ring = k3_ringacc_get_ring_id(tchan->tc_ring);
+	struct ti_sci_msg_rm_udmap_tx_ch_cfg req_tx = { 0 };
+	struct ti_sci_msg_rm_udmap_rx_ch_cfg req_rx = { 0 };
+
+	req_tx.valid_params = TISCI_TCHAN_VALID_PARAMS;
+	req_tx.nav_id = tisci_rm->tisci_dev_id;
+	req_tx.index = tchan->id;
+	req_tx.tx_chan_type = TI_SCI_RM_UDMAP_CHAN_TYPE_3RDP_BCOPY_PBRR;
+	req_tx.tx_fetch_size = sizeof(struct cppi5_desc_hdr_t) >> 2;
+	req_tx.txcq_qnum = tc_ring;
+
+	ret = tisci_ops->tx_ch_cfg(tisci_rm->tisci, &req_tx);
+	if (ret) {
+		dev_err(ud->dev, "tchan%d cfg failed %d\n", tchan->id, ret);
+		return ret;
+	}
+
+	req_rx.valid_params = TISCI_RCHAN_VALID_PARAMS;
+	req_rx.nav_id = tisci_rm->tisci_dev_id;
+	req_rx.index = rchan->id;
+	req_rx.rx_fetch_size = sizeof(struct cppi5_desc_hdr_t) >> 2;
+	req_rx.rxcq_qnum = tc_ring;
+	req_rx.rx_chan_type = TI_SCI_RM_UDMAP_CHAN_TYPE_3RDP_BCOPY_PBRR;
+
+	ret = tisci_ops->rx_ch_cfg(tisci_rm->tisci, &req_rx);
+	if (ret)
+		dev_err(ud->dev, "rchan%d alloc failed %d\n", rchan->id, ret);
+
+	return ret;
+}
+
+static int udma_tisci_tx_channel_config(struct udma_chan *uc)
+{
+	struct udma_dev *ud = uc->ud;
+	struct udma_tisci_rm *tisci_rm = &ud->tisci_rm;
+	const struct ti_sci_rm_udmap_ops *tisci_ops = tisci_rm->tisci_udmap_ops;
+	struct udma_tchan *tchan = uc->tchan;
+	int tc_ring = k3_ringacc_get_ring_id(tchan->tc_ring);
+	struct ti_sci_msg_rm_udmap_tx_ch_cfg req_tx = { 0 };
+	u32 mode, fetch_size;
+	int ret = 0;
+
+	if (uc->config.pkt_mode) {
+		mode = TI_SCI_RM_UDMAP_CHAN_TYPE_PKT_PBRR;
+		fetch_size = cppi5_hdesc_calc_size(uc->config.needs_epib,
+						   uc->config.psd_size, 0);
+	} else {
+		mode = TI_SCI_RM_UDMAP_CHAN_TYPE_3RDP_PBRR;
+		fetch_size = sizeof(struct cppi5_desc_hdr_t);
+	}
+
+	req_tx.valid_params = TISCI_TCHAN_VALID_PARAMS;
+	req_tx.nav_id = tisci_rm->tisci_dev_id;
+	req_tx.index = tchan->id;
+	req_tx.tx_chan_type = mode;
+	req_tx.tx_supr_tdpkt = uc->config.notdpkt;
+	req_tx.tx_fetch_size = fetch_size >> 2;
+	req_tx.txcq_qnum = tc_ring;
+
+	ret = tisci_ops->tx_ch_cfg(tisci_rm->tisci, &req_tx);
+	if (ret)
+		dev_err(ud->dev, "tchan%d cfg failed %d\n", tchan->id, ret);
+
+	return ret;
+}
+
+static int udma_tisci_rx_channel_config(struct udma_chan *uc)
+{
+	struct udma_dev *ud = uc->ud;
+	struct udma_tisci_rm *tisci_rm = &ud->tisci_rm;
+	const struct ti_sci_rm_udmap_ops *tisci_ops = tisci_rm->tisci_udmap_ops;
+	struct udma_rchan *rchan = uc->rchan;
+	int fd_ring = k3_ringacc_get_ring_id(uc->rflow->fd_ring);
+	int rx_ring = k3_ringacc_get_ring_id(uc->rflow->r_ring);
+	struct ti_sci_msg_rm_udmap_rx_ch_cfg req_rx = { 0 };
+	struct ti_sci_msg_rm_udmap_flow_cfg flow_req = { 0 };
+	u32 mode, fetch_size;
+	int ret = 0;
+
+	if (uc->config.pkt_mode) {
+		mode = TI_SCI_RM_UDMAP_CHAN_TYPE_PKT_PBRR;
+		fetch_size = cppi5_hdesc_calc_size(uc->config.needs_epib,
+						   uc->config.psd_size, 0);
+	} else {
+		mode = TI_SCI_RM_UDMAP_CHAN_TYPE_3RDP_PBRR;
+		fetch_size = sizeof(struct cppi5_desc_hdr_t);
+	}
+
+	req_rx.valid_params = TISCI_RCHAN_VALID_PARAMS;
+	req_rx.nav_id = tisci_rm->tisci_dev_id;
+	req_rx.index = rchan->id;
+	req_rx.rx_fetch_size =  fetch_size >> 2;
+	req_rx.rxcq_qnum = rx_ring;
+	req_rx.rx_chan_type = mode;
+
+	ret = tisci_ops->rx_ch_cfg(tisci_rm->tisci, &req_rx);
+	if (ret) {
+		dev_err(ud->dev, "rchan%d cfg failed %d\n", rchan->id, ret);
+		return ret;
+	}
+
+	flow_req.valid_params =
+		TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_EINFO_PRESENT_VALID |
+		TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_PSINFO_PRESENT_VALID |
+		TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_ERROR_HANDLING_VALID |
+		TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_DESC_TYPE_VALID |
+		TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_DEST_QNUM_VALID |
+		TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_SRC_TAG_HI_SEL_VALID |
+		TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_SRC_TAG_LO_SEL_VALID |
+		TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_DEST_TAG_HI_SEL_VALID |
+		TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_DEST_TAG_LO_SEL_VALID |
+		TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_FDQ0_SZ0_QNUM_VALID |
+		TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_FDQ1_QNUM_VALID |
+		TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_FDQ2_QNUM_VALID |
+		TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_FDQ3_QNUM_VALID;
+
+	flow_req.nav_id = tisci_rm->tisci_dev_id;
+	flow_req.flow_index = rchan->id;
+
+	if (uc->config.needs_epib)
+		flow_req.rx_einfo_present = 1;
+	else
+		flow_req.rx_einfo_present = 0;
+	if (uc->config.psd_size)
+		flow_req.rx_psinfo_present = 1;
+	else
+		flow_req.rx_psinfo_present = 0;
+	flow_req.rx_error_handling = 1;
+	flow_req.rx_dest_qnum = rx_ring;
+	flow_req.rx_src_tag_hi_sel = UDMA_RFLOW_SRCTAG_NONE;
+	flow_req.rx_src_tag_lo_sel = UDMA_RFLOW_SRCTAG_SRC_TAG;
+	flow_req.rx_dest_tag_hi_sel = UDMA_RFLOW_DSTTAG_DST_TAG_HI;
+	flow_req.rx_dest_tag_lo_sel = UDMA_RFLOW_DSTTAG_DST_TAG_LO;
+	flow_req.rx_fdq0_sz0_qnum = fd_ring;
+	flow_req.rx_fdq1_qnum = fd_ring;
+	flow_req.rx_fdq2_qnum = fd_ring;
+	flow_req.rx_fdq3_qnum = fd_ring;
+
+	ret = tisci_ops->rx_flow_cfg(tisci_rm->tisci, &flow_req);
+
+	if (ret)
+		dev_err(ud->dev, "flow%d config failed: %d\n", rchan->id, ret);
+
+	return 0;
+}
+
+static int udma_alloc_chan_resources(struct dma_chan *chan)
+{
+	struct udma_chan *uc = to_udma_chan(chan);
+	struct udma_dev *ud = to_udma_dev(chan->device);
+	const struct udma_match_data *match_data = ud->match_data;
+	struct k3_ring *irq_ring;
+	u32 irq_udma_idx;
+	int ret;
+
+	if (uc->config.pkt_mode || uc->config.dir == DMA_MEM_TO_MEM) {
+		uc->use_dma_pool = true;
+		/* in case of MEM_TO_MEM we have maximum of two TRs */
+		if (uc->config.dir == DMA_MEM_TO_MEM) {
+			uc->config.hdesc_size = cppi5_trdesc_calc_size(
+					sizeof(struct cppi5_tr_type15_t), 2);
+			uc->config.pkt_mode = false;
+		}
+	}
+
+	if (uc->use_dma_pool) {
+		uc->hdesc_pool = dma_pool_create(uc->name, ud->ddev.dev,
+						 uc->config.hdesc_size,
+						 ud->desc_align,
+						 0);
+		if (!uc->hdesc_pool) {
+			dev_err(ud->ddev.dev,
+				"Descriptor pool allocation failed\n");
+			uc->use_dma_pool = false;
+			return -ENOMEM;
+		}
+	}
+
+	/*
+	 * Make sure that the completion is in a known state:
+	 * No teardown, the channel is idle
+	 */
+	reinit_completion(&uc->teardown_completed);
+	complete_all(&uc->teardown_completed);
+	uc->state = UDMA_CHAN_IS_IDLE;
+
+	switch (uc->config.dir) {
+	case DMA_MEM_TO_MEM:
+		/* Non synchronized - mem to mem type of transfer */
+		dev_dbg(uc->ud->dev, "%s: chan%d as MEM-to-MEM\n", __func__,
+			uc->id);
+
+		ret = udma_get_chan_pair(uc);
+		if (ret)
+			return ret;
+
+		ret = udma_alloc_tx_resources(uc);
+		if (ret)
+			return ret;
+
+		ret = udma_alloc_rx_resources(uc);
+		if (ret) {
+			udma_free_tx_resources(uc);
+			return ret;
+		}
+
+		uc->config.src_thread = ud->psil_base + uc->tchan->id;
+		uc->config.dst_thread = (ud->psil_base + uc->rchan->id) |
+					K3_PSIL_DST_THREAD_ID_OFFSET;
+
+		irq_ring = uc->tchan->tc_ring;
+		irq_udma_idx = uc->tchan->id;
+
+		ret = udma_tisci_m2m_channel_config(uc);
+		break;
+	case DMA_MEM_TO_DEV:
+		/* Slave transfer synchronized - mem to dev (TX) trasnfer */
+		dev_dbg(uc->ud->dev, "%s: chan%d as MEM-to-DEV\n", __func__,
+			uc->id);
+
+		ret = udma_alloc_tx_resources(uc);
+		if (ret) {
+			uc->config.remote_thread_id = -1;
+			return ret;
+		}
+
+		uc->config.src_thread = ud->psil_base + uc->tchan->id;
+		uc->config.dst_thread = uc->config.remote_thread_id;
+		uc->config.dst_thread |= K3_PSIL_DST_THREAD_ID_OFFSET;
+
+		irq_ring = uc->tchan->tc_ring;
+		irq_udma_idx = uc->tchan->id;
+
+		ret = udma_tisci_tx_channel_config(uc);
+		break;
+	case DMA_DEV_TO_MEM:
+		/* Slave transfer synchronized - dev to mem (RX) trasnfer */
+		dev_dbg(uc->ud->dev, "%s: chan%d as DEV-to-MEM\n", __func__,
+			uc->id);
+
+		ret = udma_alloc_rx_resources(uc);
+		if (ret) {
+			uc->config.remote_thread_id = -1;
+			return ret;
+		}
+
+		uc->config.src_thread = uc->config.remote_thread_id;
+		uc->config.dst_thread = (ud->psil_base + uc->rchan->id) |
+					K3_PSIL_DST_THREAD_ID_OFFSET;
+
+		irq_ring = uc->rflow->r_ring;
+		irq_udma_idx = match_data->rchan_oes_offset + uc->rchan->id;
+
+		ret = udma_tisci_rx_channel_config(uc);
+		break;
+	default:
+		/* Can not happen */
+		dev_err(uc->ud->dev, "%s: chan%d invalid direction (%u)\n",
+			__func__, uc->id, uc->config.dir);
+		return -EINVAL;
+	}
+
+	/* check if the channel configuration was successful */
+	if (ret)
+		goto err_res_free;
+
+	if (udma_is_chan_running(uc)) {
+		dev_warn(ud->dev, "chan%d: is running!\n", uc->id);
+		udma_stop(uc);
+		if (udma_is_chan_running(uc)) {
+			dev_err(ud->dev, "chan%d: won't stop!\n", uc->id);
+			goto err_res_free;
+		}
+	}
+
+	/* PSI-L pairing */
+	ret = navss_psil_pair(ud, uc->config.src_thread, uc->config.dst_thread);
+	if (ret) {
+		dev_err(ud->dev, "PSI-L pairing failed: 0x%04x -> 0x%04x\n",
+			uc->config.src_thread, uc->config.dst_thread);
+		goto err_res_free;
+	}
+
+	uc->psil_paired = true;
+
+	uc->irq_num_ring = k3_ringacc_get_ring_irq_num(irq_ring);
+	if (uc->irq_num_ring <= 0) {
+		dev_err(ud->dev, "Failed to get ring irq (index: %u)\n",
+			k3_ringacc_get_ring_id(irq_ring));
+		ret = -EINVAL;
+		goto err_psi_free;
+	}
+
+	ret = request_irq(uc->irq_num_ring, udma_ring_irq_handler,
+			  IRQF_TRIGGER_HIGH, uc->name, uc);
+	if (ret) {
+		dev_err(ud->dev, "chan%d: ring irq request failed\n", uc->id);
+		goto err_irq_free;
+	}
+
+	/* Event from UDMA (TR events) only needed for slave TR mode channels */
+	if (is_slave_direction(uc->config.dir) && !uc->config.pkt_mode) {
+		uc->irq_num_udma = ti_sci_inta_msi_get_virq(ud->dev,
+							    irq_udma_idx);
+		if (uc->irq_num_udma <= 0) {
+			dev_err(ud->dev, "Failed to get udma irq (index: %u)\n",
+				irq_udma_idx);
+			free_irq(uc->irq_num_ring, uc);
+			ret = -EINVAL;
+			goto err_irq_free;
+		}
+
+		ret = request_irq(uc->irq_num_udma, udma_udma_irq_handler, 0,
+				  uc->name, uc);
+		if (ret) {
+			dev_err(ud->dev, "chan%d: UDMA irq request failed\n",
+				uc->id);
+			free_irq(uc->irq_num_ring, uc);
+			goto err_irq_free;
+		}
+	} else {
+		uc->irq_num_udma = 0;
+	}
+
+	udma_reset_rings(uc);
+
+	INIT_DELAYED_WORK_ONSTACK(&uc->tx_drain.work,
+				  udma_check_tx_completion);
+	return 0;
+
+err_irq_free:
+	uc->irq_num_ring = 0;
+	uc->irq_num_udma = 0;
+err_psi_free:
+	navss_psil_unpair(ud, uc->config.src_thread, uc->config.dst_thread);
+	uc->psil_paired = false;
+err_res_free:
+	udma_free_tx_resources(uc);
+	udma_free_rx_resources(uc);
+
+	udma_reset_uchan(uc);
+
+	if (uc->use_dma_pool) {
+		dma_pool_destroy(uc->hdesc_pool);
+		uc->use_dma_pool = false;
+	}
+
+	return ret;
+}
+
+static int udma_slave_config(struct dma_chan *chan,
+			     struct dma_slave_config *cfg)
+{
+	struct udma_chan *uc = to_udma_chan(chan);
+
+	memcpy(&uc->cfg, cfg, sizeof(uc->cfg));
+
+	return 0;
+}
+
+static struct udma_desc *udma_alloc_tr_desc(struct udma_chan *uc,
+					    size_t tr_size, int tr_count,
+					    enum dma_transfer_direction dir)
+{
+	struct udma_hwdesc *hwdesc;
+	struct cppi5_desc_hdr_t *tr_desc;
+	struct udma_desc *d;
+	u32 reload_count = 0;
+	u32 ring_id;
+
+	switch (tr_size) {
+	case 16:
+	case 32:
+	case 64:
+	case 128:
+		break;
+	default:
+		dev_err(uc->ud->dev, "Unsupported TR size of %zu\n", tr_size);
+		return NULL;
+	}
+
+	/* We have only one descriptor containing multiple TRs */
+	d = kzalloc(sizeof(*d) + sizeof(d->hwdesc[0]), GFP_NOWAIT);
+	if (!d)
+		return NULL;
+
+	d->sglen = tr_count;
+
+	d->hwdesc_count = 1;
+	hwdesc = &d->hwdesc[0];
+
+	/* Allocate memory for DMA ring descriptor */
+	if (uc->use_dma_pool) {
+		hwdesc->cppi5_desc_size = uc->config.hdesc_size;
+		hwdesc->cppi5_desc_vaddr = dma_pool_zalloc(uc->hdesc_pool,
+						GFP_NOWAIT,
+						&hwdesc->cppi5_desc_paddr);
+	} else {
+		hwdesc->cppi5_desc_size = cppi5_trdesc_calc_size(tr_size,
+								 tr_count);
+		hwdesc->cppi5_desc_size = ALIGN(hwdesc->cppi5_desc_size,
+						uc->ud->desc_align);
+		hwdesc->cppi5_desc_vaddr = dma_alloc_coherent(uc->ud->dev,
+						hwdesc->cppi5_desc_size,
+						&hwdesc->cppi5_desc_paddr,
+						GFP_NOWAIT);
+	}
+
+	if (!hwdesc->cppi5_desc_vaddr) {
+		kfree(d);
+		return NULL;
+	}
+
+	/* Start of the TR req records */
+	hwdesc->tr_req_base = hwdesc->cppi5_desc_vaddr + tr_size;
+	/* Start address of the TR response array */
+	hwdesc->tr_resp_base = hwdesc->tr_req_base + tr_size * tr_count;
+
+	tr_desc = hwdesc->cppi5_desc_vaddr;
+
+	if (uc->cyclic)
+		reload_count = CPPI5_INFO0_TRDESC_RLDCNT_INFINITE;
+
+	if (dir == DMA_DEV_TO_MEM)
+		ring_id = k3_ringacc_get_ring_id(uc->rflow->r_ring);
+	else
+		ring_id = k3_ringacc_get_ring_id(uc->tchan->tc_ring);
+
+	cppi5_trdesc_init(tr_desc, tr_count, tr_size, 0, reload_count);
+	cppi5_desc_set_pktids(tr_desc, uc->id,
+			      CPPI5_INFO1_DESC_FLOWID_DEFAULT);
+	cppi5_desc_set_retpolicy(tr_desc, 0, ring_id);
+
+	return d;
+}
+
+static struct udma_desc *
+udma_prep_slave_sg_tr(struct udma_chan *uc, struct scatterlist *sgl,
+		      unsigned int sglen, enum dma_transfer_direction dir,
+		      unsigned long tx_flags, void *context)
+{
+	enum dma_slave_buswidth dev_width;
+	struct scatterlist *sgent;
+	struct udma_desc *d;
+	size_t tr_size;
+	struct cppi5_tr_type1_t *tr_req = NULL;
+	unsigned int i;
+	u32 burst;
+
+	if (dir == DMA_DEV_TO_MEM) {
+		dev_width = uc->cfg.src_addr_width;
+		burst = uc->cfg.src_maxburst;
+	} else if (dir == DMA_MEM_TO_DEV) {
+		dev_width = uc->cfg.dst_addr_width;
+		burst = uc->cfg.dst_maxburst;
+	} else {
+		dev_err(uc->ud->dev, "%s: bad direction?\n", __func__);
+		return NULL;
+	}
+
+	if (!burst)
+		burst = 1;
+
+	/* Now allocate and setup the descriptor. */
+	tr_size = sizeof(struct cppi5_tr_type1_t);
+	d = udma_alloc_tr_desc(uc, tr_size, sglen, dir);
+	if (!d)
+		return NULL;
+
+	d->sglen = sglen;
+
+	tr_req = d->hwdesc[0].tr_req_base;
+	for_each_sg(sgl, sgent, sglen, i) {
+		d->residue += sg_dma_len(sgent);
+
+		cppi5_tr_init(&tr_req[i].flags, CPPI5_TR_TYPE1, false, false,
+			      CPPI5_TR_EVENT_SIZE_COMPLETION, 0);
+		cppi5_tr_csf_set(&tr_req[i].flags, CPPI5_TR_CSF_SUPR_EVT);
+
+		tr_req[i].addr = sg_dma_address(sgent);
+		tr_req[i].icnt0 = burst * dev_width;
+		tr_req[i].dim1 = burst * dev_width;
+		tr_req[i].icnt1 = sg_dma_len(sgent) / tr_req[i].icnt0;
+	}
+
+	cppi5_tr_csf_set(&tr_req[i - 1].flags, CPPI5_TR_CSF_EOP);
+
+	return d;
+}
+
+static int udma_configure_statictr(struct udma_chan *uc, struct udma_desc *d,
+				   enum dma_slave_buswidth dev_width,
+				   u16 elcnt)
+{
+	if (uc->config.ep_type != PSIL_EP_PDMA_XY)
+		return 0;
+
+	/* Bus width translates to the element size (ES) */
+	switch (dev_width) {
+	case DMA_SLAVE_BUSWIDTH_1_BYTE:
+		d->static_tr.elsize = 0;
+		break;
+	case DMA_SLAVE_BUSWIDTH_2_BYTES:
+		d->static_tr.elsize = 1;
+		break;
+	case DMA_SLAVE_BUSWIDTH_3_BYTES:
+		d->static_tr.elsize = 2;
+		break;
+	case DMA_SLAVE_BUSWIDTH_4_BYTES:
+		d->static_tr.elsize = 3;
+		break;
+	case DMA_SLAVE_BUSWIDTH_8_BYTES:
+		d->static_tr.elsize = 4;
+		break;
+	default: /* not reached */
+		return -EINVAL;
+	}
+
+	d->static_tr.elcnt = elcnt;
+
+	/*
+	 * PDMA must to close the packet when the channel is in packet mode.
+	 * For TR mode when the channel is not cyclic we also need PDMA to close
+	 * the packet otherwise the transfer will stall because PDMA holds on
+	 * the data it has received from the peripheral.
+	 */
+	if (uc->config.pkt_mode || !uc->cyclic) {
+		unsigned int div = dev_width * elcnt;
+
+		if (uc->cyclic)
+			d->static_tr.bstcnt = d->residue / d->sglen / div;
+		else
+			d->static_tr.bstcnt = d->residue / div;
+
+		if (uc->config.dir == DMA_DEV_TO_MEM &&
+		    d->static_tr.bstcnt > uc->ud->match_data->statictr_z_mask)
+			return -EINVAL;
+	} else {
+		d->static_tr.bstcnt = 0;
+	}
+
+	return 0;
+}
+
+static struct udma_desc *
+udma_prep_slave_sg_pkt(struct udma_chan *uc, struct scatterlist *sgl,
+		       unsigned int sglen, enum dma_transfer_direction dir,
+		       unsigned long tx_flags, void *context)
+{
+	struct scatterlist *sgent;
+	struct cppi5_host_desc_t *h_desc = NULL;
+	struct udma_desc *d;
+	u32 ring_id;
+	unsigned int i;
+
+	d = kzalloc(sizeof(*d) + sglen * sizeof(d->hwdesc[0]), GFP_NOWAIT);
+	if (!d)
+		return NULL;
+
+	d->sglen = sglen;
+	d->hwdesc_count = sglen;
+
+	if (dir == DMA_DEV_TO_MEM)
+		ring_id = k3_ringacc_get_ring_id(uc->rflow->r_ring);
+	else
+		ring_id = k3_ringacc_get_ring_id(uc->tchan->tc_ring);
+
+	for_each_sg(sgl, sgent, sglen, i) {
+		struct udma_hwdesc *hwdesc = &d->hwdesc[i];
+		dma_addr_t sg_addr = sg_dma_address(sgent);
+		struct cppi5_host_desc_t *desc;
+		size_t sg_len = sg_dma_len(sgent);
+
+		hwdesc->cppi5_desc_vaddr = dma_pool_zalloc(uc->hdesc_pool,
+						GFP_NOWAIT,
+						&hwdesc->cppi5_desc_paddr);
+		if (!hwdesc->cppi5_desc_vaddr) {
+			dev_err(uc->ud->dev,
+				"descriptor%d allocation failed\n", i);
+
+			udma_free_hwdesc(uc, d);
+			kfree(d);
+			return NULL;
+		}
+
+		d->residue += sg_len;
+		hwdesc->cppi5_desc_size = uc->config.hdesc_size;
+		desc = hwdesc->cppi5_desc_vaddr;
+
+		if (i == 0) {
+			cppi5_hdesc_init(desc, 0, 0);
+			/* Flow and Packed ID */
+			cppi5_desc_set_pktids(&desc->hdr, uc->id,
+					      CPPI5_INFO1_DESC_FLOWID_DEFAULT);
+			cppi5_desc_set_retpolicy(&desc->hdr, 0, ring_id);
+		} else {
+			cppi5_hdesc_reset_hbdesc(desc);
+			cppi5_desc_set_retpolicy(&desc->hdr, 0, 0xffff);
+		}
+
+		/* attach the sg buffer to the descriptor */
+		cppi5_hdesc_attach_buf(desc, sg_addr, sg_len, sg_addr, sg_len);
+
+		/* Attach link as host buffer descriptor */
+		if (h_desc)
+			cppi5_hdesc_link_hbdesc(h_desc,
+						hwdesc->cppi5_desc_paddr);
+
+		if (dir == DMA_MEM_TO_DEV)
+			h_desc = desc;
+	}
+
+	if (d->residue >= SZ_4M) {
+		dev_err(uc->ud->dev,
+			"%s: Transfer size %u is over the supported 4M range\n",
+			__func__, d->residue);
+		udma_free_hwdesc(uc, d);
+		kfree(d);
+		return NULL;
+	}
+
+	h_desc = d->hwdesc[0].cppi5_desc_vaddr;
+	cppi5_hdesc_set_pktlen(h_desc, d->residue);
+
+	return d;
+}
+
+static int udma_attach_metadata(struct dma_async_tx_descriptor *desc,
+				void *data, size_t len)
+{
+	struct udma_desc *d = to_udma_desc(desc);
+	struct udma_chan *uc = to_udma_chan(desc->chan);
+	struct cppi5_host_desc_t *h_desc;
+	u32 psd_size = len;
+	u32 flags = 0;
+
+	if (!uc->config.pkt_mode || !uc->config.metadata_size)
+		return -ENOTSUPP;
+
+	if (!data || len > uc->config.metadata_size)
+		return -EINVAL;
+
+	if (uc->config.needs_epib && len < CPPI5_INFO0_HDESC_EPIB_SIZE)
+		return -EINVAL;
+
+	h_desc = d->hwdesc[0].cppi5_desc_vaddr;
+	if (d->dir == DMA_MEM_TO_DEV)
+		memcpy(h_desc->epib, data, len);
+
+	if (uc->config.needs_epib)
+		psd_size -= CPPI5_INFO0_HDESC_EPIB_SIZE;
+
+	d->metadata = data;
+	d->metadata_size = len;
+	if (uc->config.needs_epib)
+		flags |= CPPI5_INFO0_HDESC_EPIB_PRESENT;
+
+	cppi5_hdesc_update_flags(h_desc, flags);
+	cppi5_hdesc_update_psdata_size(h_desc, psd_size);
+
+	return 0;
+}
+
+static void *udma_get_metadata_ptr(struct dma_async_tx_descriptor *desc,
+				   size_t *payload_len, size_t *max_len)
+{
+	struct udma_desc *d = to_udma_desc(desc);
+	struct udma_chan *uc = to_udma_chan(desc->chan);
+	struct cppi5_host_desc_t *h_desc;
+
+	if (!uc->config.pkt_mode || !uc->config.metadata_size)
+		return ERR_PTR(-ENOTSUPP);
+
+	h_desc = d->hwdesc[0].cppi5_desc_vaddr;
+
+	*max_len = uc->config.metadata_size;
+
+	*payload_len = cppi5_hdesc_epib_present(&h_desc->hdr) ?
+		       CPPI5_INFO0_HDESC_EPIB_SIZE : 0;
+	*payload_len += cppi5_hdesc_get_psdata_size(h_desc);
+
+	return h_desc->epib;
+}
+
+static int udma_set_metadata_len(struct dma_async_tx_descriptor *desc,
+				 size_t payload_len)
+{
+	struct udma_desc *d = to_udma_desc(desc);
+	struct udma_chan *uc = to_udma_chan(desc->chan);
+	struct cppi5_host_desc_t *h_desc;
+	u32 psd_size = payload_len;
+	u32 flags = 0;
+
+	if (!uc->config.pkt_mode || !uc->config.metadata_size)
+		return -ENOTSUPP;
+
+	if (payload_len > uc->config.metadata_size)
+		return -EINVAL;
+
+	if (uc->config.needs_epib && payload_len < CPPI5_INFO0_HDESC_EPIB_SIZE)
+		return -EINVAL;
+
+	h_desc = d->hwdesc[0].cppi5_desc_vaddr;
+
+	if (uc->config.needs_epib) {
+		psd_size -= CPPI5_INFO0_HDESC_EPIB_SIZE;
+		flags |= CPPI5_INFO0_HDESC_EPIB_PRESENT;
+	}
+
+	cppi5_hdesc_update_flags(h_desc, flags);
+	cppi5_hdesc_update_psdata_size(h_desc, psd_size);
+
+	return 0;
+}
+
+static struct dma_descriptor_metadata_ops metadata_ops = {
+	.attach = udma_attach_metadata,
+	.get_ptr = udma_get_metadata_ptr,
+	.set_len = udma_set_metadata_len,
+};
+
+static struct dma_async_tx_descriptor *
+udma_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
+		   unsigned int sglen, enum dma_transfer_direction dir,
+		   unsigned long tx_flags, void *context)
+{
+	struct udma_chan *uc = to_udma_chan(chan);
+	enum dma_slave_buswidth dev_width;
+	struct udma_desc *d;
+	u32 burst;
+
+	if (dir != uc->config.dir) {
+		dev_err(chan->device->dev,
+			"%s: chan%d is for %s, not supporting %s\n",
+			__func__, uc->id,
+			dmaengine_get_direction_text(uc->config.dir),
+			dmaengine_get_direction_text(dir));
+		return NULL;
+	}
+
+	if (dir == DMA_DEV_TO_MEM) {
+		dev_width = uc->cfg.src_addr_width;
+		burst = uc->cfg.src_maxburst;
+	} else if (dir == DMA_MEM_TO_DEV) {
+		dev_width = uc->cfg.dst_addr_width;
+		burst = uc->cfg.dst_maxburst;
+	} else {
+		dev_err(chan->device->dev, "%s: bad direction?\n", __func__);
+		return NULL;
+	}
+
+	if (!burst)
+		burst = 1;
+
+	if (uc->config.pkt_mode)
+		d = udma_prep_slave_sg_pkt(uc, sgl, sglen, dir, tx_flags,
+					   context);
+	else
+		d = udma_prep_slave_sg_tr(uc, sgl, sglen, dir, tx_flags,
+					  context);
+
+	if (!d)
+		return NULL;
+
+	d->dir = dir;
+	d->desc_idx = 0;
+	d->tr_idx = 0;
+
+	/* static TR for remote PDMA */
+	if (udma_configure_statictr(uc, d, dev_width, burst)) {
+		dev_err(uc->ud->dev,
+			"%s: StaticTR Z is limted to maximum 4095 (%u)\n",
+			__func__, d->static_tr.bstcnt);
+
+		udma_free_hwdesc(uc, d);
+		kfree(d);
+		return NULL;
+	}
+
+	if (uc->config.metadata_size)
+		d->vd.tx.metadata_ops = &metadata_ops;
+
+	return vchan_tx_prep(&uc->vc, &d->vd, tx_flags);
+}
+
+static struct udma_desc *
+udma_prep_dma_cyclic_tr(struct udma_chan *uc, dma_addr_t buf_addr,
+			size_t buf_len, size_t period_len,
+			enum dma_transfer_direction dir, unsigned long flags)
+{
+	enum dma_slave_buswidth dev_width;
+	struct udma_desc *d;
+	size_t tr_size;
+	struct cppi5_tr_type1_t *tr_req;
+	unsigned int i;
+	unsigned int periods = buf_len / period_len;
+	u32 burst;
+
+	if (dir == DMA_DEV_TO_MEM) {
+		dev_width = uc->cfg.src_addr_width;
+		burst = uc->cfg.src_maxburst;
+	} else if (dir == DMA_MEM_TO_DEV) {
+		dev_width = uc->cfg.dst_addr_width;
+		burst = uc->cfg.dst_maxburst;
+	} else {
+		dev_err(uc->ud->dev, "%s: bad direction?\n", __func__);
+		return NULL;
+	}
+
+	if (!burst)
+		burst = 1;
+
+	/* Now allocate and setup the descriptor. */
+	tr_size = sizeof(struct cppi5_tr_type1_t);
+	d = udma_alloc_tr_desc(uc, tr_size, periods, dir);
+	if (!d)
+		return NULL;
+
+	tr_req = d->hwdesc[0].tr_req_base;
+	for (i = 0; i < periods; i++) {
+		cppi5_tr_init(&tr_req[i].flags, CPPI5_TR_TYPE1, false, false,
+			      CPPI5_TR_EVENT_SIZE_COMPLETION, 0);
+
+		tr_req[i].addr = buf_addr + period_len * i;
+		tr_req[i].icnt0 = dev_width;
+		tr_req[i].icnt1 = period_len / dev_width;
+		tr_req[i].dim1 = dev_width;
+
+		if (!(flags & DMA_PREP_INTERRUPT))
+			cppi5_tr_csf_set(&tr_req[i].flags,
+					 CPPI5_TR_CSF_SUPR_EVT);
+	}
+
+	return d;
+}
+
+static struct udma_desc *
+udma_prep_dma_cyclic_pkt(struct udma_chan *uc, dma_addr_t buf_addr,
+			 size_t buf_len, size_t period_len,
+			 enum dma_transfer_direction dir, unsigned long flags)
+{
+	struct udma_desc *d;
+	u32 ring_id;
+	int i;
+	int periods = buf_len / period_len;
+
+	if (periods > (K3_UDMA_DEFAULT_RING_SIZE - 1))
+		return NULL;
+
+	if (period_len >= SZ_4M)
+		return NULL;
+
+	d = kzalloc(sizeof(*d) + periods * sizeof(d->hwdesc[0]), GFP_NOWAIT);
+	if (!d)
+		return NULL;
+
+	d->hwdesc_count = periods;
+
+	/* TODO: re-check this... */
+	if (dir == DMA_DEV_TO_MEM)
+		ring_id = k3_ringacc_get_ring_id(uc->rflow->r_ring);
+	else
+		ring_id = k3_ringacc_get_ring_id(uc->tchan->tc_ring);
+
+	for (i = 0; i < periods; i++) {
+		struct udma_hwdesc *hwdesc = &d->hwdesc[i];
+		dma_addr_t period_addr = buf_addr + (period_len * i);
+		struct cppi5_host_desc_t *h_desc;
+
+		hwdesc->cppi5_desc_vaddr = dma_pool_zalloc(uc->hdesc_pool,
+						GFP_NOWAIT,
+						&hwdesc->cppi5_desc_paddr);
+		if (!hwdesc->cppi5_desc_vaddr) {
+			dev_err(uc->ud->dev,
+				"descriptor%d allocation failed\n", i);
+
+			udma_free_hwdesc(uc, d);
+			kfree(d);
+			return NULL;
+		}
+
+		hwdesc->cppi5_desc_size = uc->config.hdesc_size;
+		h_desc = hwdesc->cppi5_desc_vaddr;
+
+		cppi5_hdesc_init(h_desc, 0, 0);
+		cppi5_hdesc_set_pktlen(h_desc, period_len);
+
+		/* Flow and Packed ID */
+		cppi5_desc_set_pktids(&h_desc->hdr, uc->id,
+				      CPPI5_INFO1_DESC_FLOWID_DEFAULT);
+		cppi5_desc_set_retpolicy(&h_desc->hdr, 0, ring_id);
+
+		/* attach each period to a new descriptor */
+		cppi5_hdesc_attach_buf(h_desc,
+				       period_addr, period_len,
+				       period_addr, period_len);
+	}
+
+	return d;
+}
+
+static struct dma_async_tx_descriptor *
+udma_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len,
+		     size_t period_len, enum dma_transfer_direction dir,
+		     unsigned long flags)
+{
+	struct udma_chan *uc = to_udma_chan(chan);
+	enum dma_slave_buswidth dev_width;
+	struct udma_desc *d;
+	u32 burst;
+
+	if (dir != uc->config.dir) {
+		dev_err(chan->device->dev,
+			"%s: chan%d is for %s, not supporting %s\n",
+			__func__, uc->id,
+			dmaengine_get_direction_text(uc->config.dir),
+			dmaengine_get_direction_text(dir));
+		return NULL;
+	}
+
+	uc->cyclic = true;
+
+	if (dir == DMA_DEV_TO_MEM) {
+		dev_width = uc->cfg.src_addr_width;
+		burst = uc->cfg.src_maxburst;
+	} else if (dir == DMA_MEM_TO_DEV) {
+		dev_width = uc->cfg.dst_addr_width;
+		burst = uc->cfg.dst_maxburst;
+	} else {
+		dev_err(uc->ud->dev, "%s: bad direction?\n", __func__);
+		return NULL;
+	}
+
+	if (!burst)
+		burst = 1;
+
+	if (uc->config.pkt_mode)
+		d = udma_prep_dma_cyclic_pkt(uc, buf_addr, buf_len, period_len,
+					     dir, flags);
+	else
+		d = udma_prep_dma_cyclic_tr(uc, buf_addr, buf_len, period_len,
+					    dir, flags);
+
+	if (!d)
+		return NULL;
+
+	d->sglen = buf_len / period_len;
+
+	d->dir = dir;
+	d->residue = buf_len;
+
+	/* static TR for remote PDMA */
+	if (udma_configure_statictr(uc, d, dev_width, burst)) {
+		dev_err(uc->ud->dev,
+			"%s: StaticTR Z is limted to maximum 4095 (%u)\n",
+			__func__, d->static_tr.bstcnt);
+
+		udma_free_hwdesc(uc, d);
+		kfree(d);
+		return NULL;
+	}
+
+	if (uc->config.metadata_size)
+		d->vd.tx.metadata_ops = &metadata_ops;
+
+	return vchan_tx_prep(&uc->vc, &d->vd, flags);
+}
+
+static struct dma_async_tx_descriptor *
+udma_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
+		     size_t len, unsigned long tx_flags)
+{
+	struct udma_chan *uc = to_udma_chan(chan);
+	struct udma_desc *d;
+	struct cppi5_tr_type15_t *tr_req;
+	int num_tr;
+	size_t tr_size = sizeof(struct cppi5_tr_type15_t);
+	u16 tr0_cnt0, tr0_cnt1, tr1_cnt0;
+
+	if (uc->config.dir != DMA_MEM_TO_MEM) {
+		dev_err(chan->device->dev,
+			"%s: chan%d is for %s, not supporting %s\n",
+			__func__, uc->id,
+			dmaengine_get_direction_text(uc->config.dir),
+			dmaengine_get_direction_text(DMA_MEM_TO_MEM));
+		return NULL;
+	}
+
+	if (len < SZ_64K) {
+		num_tr = 1;
+		tr0_cnt0 = len;
+		tr0_cnt1 = 1;
+	} else {
+		unsigned long align_to = __ffs(src | dest);
+
+		if (align_to > 3)
+			align_to = 3;
+		/*
+		 * Keep simple: tr0: SZ_64K-alignment blocks,
+		 *		tr1: the remaining
+		 */
+		num_tr = 2;
+		tr0_cnt0 = (SZ_64K - BIT(align_to));
+		if (len / tr0_cnt0 >= SZ_64K) {
+			dev_err(uc->ud->dev, "size %zu is not supported\n",
+				len);
+			return NULL;
+		}
+
+		tr0_cnt1 = len / tr0_cnt0;
+		tr1_cnt0 = len % tr0_cnt0;
+	}
+
+	d = udma_alloc_tr_desc(uc, tr_size, num_tr, DMA_MEM_TO_MEM);
+	if (!d)
+		return NULL;
+
+	d->dir = DMA_MEM_TO_MEM;
+	d->desc_idx = 0;
+	d->tr_idx = 0;
+	d->residue = len;
+
+	tr_req = d->hwdesc[0].tr_req_base;
+
+	cppi5_tr_init(&tr_req[0].flags, CPPI5_TR_TYPE15, false, true,
+		      CPPI5_TR_EVENT_SIZE_COMPLETION, 0);
+	cppi5_tr_csf_set(&tr_req[0].flags, CPPI5_TR_CSF_SUPR_EVT);
+
+	tr_req[0].addr = src;
+	tr_req[0].icnt0 = tr0_cnt0;
+	tr_req[0].icnt1 = tr0_cnt1;
+	tr_req[0].icnt2 = 1;
+	tr_req[0].icnt3 = 1;
+	tr_req[0].dim1 = tr0_cnt0;
+
+	tr_req[0].daddr = dest;
+	tr_req[0].dicnt0 = tr0_cnt0;
+	tr_req[0].dicnt1 = tr0_cnt1;
+	tr_req[0].dicnt2 = 1;
+	tr_req[0].dicnt3 = 1;
+	tr_req[0].ddim1 = tr0_cnt0;
+
+	if (num_tr == 2) {
+		cppi5_tr_init(&tr_req[1].flags, CPPI5_TR_TYPE15, false, true,
+			      CPPI5_TR_EVENT_SIZE_COMPLETION, 0);
+		cppi5_tr_csf_set(&tr_req[1].flags, CPPI5_TR_CSF_SUPR_EVT);
+
+		tr_req[1].addr = src + tr0_cnt1 * tr0_cnt0;
+		tr_req[1].icnt0 = tr1_cnt0;
+		tr_req[1].icnt1 = 1;
+		tr_req[1].icnt2 = 1;
+		tr_req[1].icnt3 = 1;
+
+		tr_req[1].daddr = dest + tr0_cnt1 * tr0_cnt0;
+		tr_req[1].dicnt0 = tr1_cnt0;
+		tr_req[1].dicnt1 = 1;
+		tr_req[1].dicnt2 = 1;
+		tr_req[1].dicnt3 = 1;
+	}
+
+	cppi5_tr_csf_set(&tr_req[num_tr - 1].flags, CPPI5_TR_CSF_EOP);
+
+	if (uc->config.metadata_size)
+		d->vd.tx.metadata_ops = &metadata_ops;
+
+	return vchan_tx_prep(&uc->vc, &d->vd, tx_flags);
+}
+
+static void udma_issue_pending(struct dma_chan *chan)
+{
+	struct udma_chan *uc = to_udma_chan(chan);
+	unsigned long flags;
+
+	spin_lock_irqsave(&uc->vc.lock, flags);
+
+	/* If we have something pending and no active descriptor, then */
+	if (vchan_issue_pending(&uc->vc) && !uc->desc) {
+		/*
+		 * start a descriptor if the channel is NOT [marked as
+		 * terminating _and_ it is still running (teardown has not
+		 * completed yet)].
+		 */
+		if (!(uc->state == UDMA_CHAN_IS_TERMINATING &&
+		      udma_is_chan_running(uc)))
+			udma_start(uc);
+	}
+
+	spin_unlock_irqrestore(&uc->vc.lock, flags);
+}
+
+static enum dma_status udma_tx_status(struct dma_chan *chan,
+				      dma_cookie_t cookie,
+				      struct dma_tx_state *txstate)
+{
+	struct udma_chan *uc = to_udma_chan(chan);
+	enum dma_status ret;
+	unsigned long flags;
+
+	spin_lock_irqsave(&uc->vc.lock, flags);
+
+	ret = dma_cookie_status(chan, cookie, txstate);
+
+	if (ret == DMA_IN_PROGRESS && udma_is_chan_paused(uc))
+		ret = DMA_PAUSED;
+
+	if (ret == DMA_COMPLETE || !txstate)
+		goto out;
+
+	if (uc->desc && uc->desc->vd.tx.cookie == cookie) {
+		u32 peer_bcnt = 0;
+		u32 bcnt = 0;
+		u32 residue = uc->desc->residue;
+		u32 delay = 0;
+
+		if (uc->desc->dir == DMA_MEM_TO_DEV) {
+			bcnt = udma_tchanrt_read(uc->tchan,
+						 UDMA_TCHAN_RT_SBCNT_REG);
+
+			if (uc->config.ep_type != PSIL_EP_NATIVE) {
+				peer_bcnt = udma_tchanrt_read(uc->tchan,
+						UDMA_TCHAN_RT_PEER_BCNT_REG);
+
+				if (bcnt > peer_bcnt)
+					delay = bcnt - peer_bcnt;
+			}
+		} else if (uc->desc->dir == DMA_DEV_TO_MEM) {
+			bcnt = udma_rchanrt_read(uc->rchan,
+						 UDMA_RCHAN_RT_BCNT_REG);
+
+			if (uc->config.ep_type != PSIL_EP_NATIVE) {
+				peer_bcnt = udma_rchanrt_read(uc->rchan,
+						UDMA_RCHAN_RT_PEER_BCNT_REG);
+
+				if (peer_bcnt > bcnt)
+					delay = peer_bcnt - bcnt;
+			}
+		} else {
+			bcnt = udma_tchanrt_read(uc->tchan,
+						 UDMA_TCHAN_RT_BCNT_REG);
+		}
+
+		bcnt -= uc->bcnt;
+		if (bcnt && !(bcnt % uc->desc->residue))
+			residue = 0;
+		else
+			residue -= bcnt % uc->desc->residue;
+
+		if (!residue && (uc->config.dir == DMA_DEV_TO_MEM || !delay)) {
+			ret = DMA_COMPLETE;
+			delay = 0;
+		}
+
+		dma_set_residue(txstate, residue);
+		dma_set_in_flight_bytes(txstate, delay);
+
+	} else {
+		ret = DMA_COMPLETE;
+	}
+
+out:
+	spin_unlock_irqrestore(&uc->vc.lock, flags);
+	return ret;
+}
+
+static int udma_pause(struct dma_chan *chan)
+{
+	struct udma_chan *uc = to_udma_chan(chan);
+
+	if (!uc->desc)
+		return -EINVAL;
+
+	/* pause the channel */
+	switch (uc->desc->dir) {
+	case DMA_DEV_TO_MEM:
+		udma_rchanrt_update_bits(uc->rchan,
+					 UDMA_RCHAN_RT_PEER_RT_EN_REG,
+					 UDMA_PEER_RT_EN_PAUSE,
+					 UDMA_PEER_RT_EN_PAUSE);
+		break;
+	case DMA_MEM_TO_DEV:
+		udma_tchanrt_update_bits(uc->tchan,
+					 UDMA_TCHAN_RT_PEER_RT_EN_REG,
+					 UDMA_PEER_RT_EN_PAUSE,
+					 UDMA_PEER_RT_EN_PAUSE);
+		break;
+	case DMA_MEM_TO_MEM:
+		udma_tchanrt_update_bits(uc->tchan, UDMA_TCHAN_RT_CTL_REG,
+					 UDMA_CHAN_RT_CTL_PAUSE,
+					 UDMA_CHAN_RT_CTL_PAUSE);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int udma_resume(struct dma_chan *chan)
+{
+	struct udma_chan *uc = to_udma_chan(chan);
+
+	if (!uc->desc)
+		return -EINVAL;
+
+	/* resume the channel */
+	switch (uc->desc->dir) {
+	case DMA_DEV_TO_MEM:
+		udma_rchanrt_update_bits(uc->rchan,
+					 UDMA_RCHAN_RT_PEER_RT_EN_REG,
+					 UDMA_PEER_RT_EN_PAUSE, 0);
+
+		break;
+	case DMA_MEM_TO_DEV:
+		udma_tchanrt_update_bits(uc->tchan,
+					 UDMA_TCHAN_RT_PEER_RT_EN_REG,
+					 UDMA_PEER_RT_EN_PAUSE, 0);
+		break;
+	case DMA_MEM_TO_MEM:
+		udma_tchanrt_update_bits(uc->tchan, UDMA_TCHAN_RT_CTL_REG,
+					 UDMA_CHAN_RT_CTL_PAUSE, 0);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int udma_terminate_all(struct dma_chan *chan)
+{
+	struct udma_chan *uc = to_udma_chan(chan);
+	unsigned long flags;
+	LIST_HEAD(head);
+
+	spin_lock_irqsave(&uc->vc.lock, flags);
+
+	if (udma_is_chan_running(uc))
+		udma_stop(uc);
+
+	if (uc->desc) {
+		uc->terminated_desc = uc->desc;
+		uc->desc = NULL;
+		uc->terminated_desc->terminated = true;
+		cancel_delayed_work(&uc->tx_drain.work);
+	}
+
+	uc->paused = false;
+
+	vchan_get_all_descriptors(&uc->vc, &head);
+	spin_unlock_irqrestore(&uc->vc.lock, flags);
+	vchan_dma_desc_free_list(&uc->vc, &head);
+
+	return 0;
+}
+
+static void udma_synchronize(struct dma_chan *chan)
+{
+	struct udma_chan *uc = to_udma_chan(chan);
+	unsigned long timeout = msecs_to_jiffies(1000);
+
+	vchan_synchronize(&uc->vc);
+
+	if (uc->state == UDMA_CHAN_IS_TERMINATING) {
+		timeout = wait_for_completion_timeout(&uc->teardown_completed,
+						      timeout);
+		if (!timeout) {
+			dev_warn(uc->ud->dev, "chan%d teardown timeout!\n",
+				 uc->id);
+			udma_dump_chan_stdata(uc);
+			udma_reset_chan(uc, true);
+		}
+	}
+
+	udma_reset_chan(uc, false);
+	if (udma_is_chan_running(uc))
+		dev_warn(uc->ud->dev, "chan%d refused to stop!\n", uc->id);
+
+	cancel_delayed_work_sync(&uc->tx_drain.work);
+	udma_reset_rings(uc);
+}
+
+static void udma_desc_pre_callback(struct virt_dma_chan *vc,
+				   struct virt_dma_desc *vd,
+				   struct dmaengine_result *result)
+{
+	struct udma_chan *uc = to_udma_chan(&vc->chan);
+	struct udma_desc *d;
+
+	if (!vd)
+		return;
+
+	d = to_udma_desc(&vd->tx);
+
+	if (d->metadata_size)
+		udma_fetch_epib(uc, d);
+
+	/* Provide residue information for the client */
+	if (result) {
+		void *desc_vaddr = udma_curr_cppi5_desc_vaddr(d, d->desc_idx);
+
+		if (cppi5_desc_get_type(desc_vaddr) ==
+		    CPPI5_INFO0_DESC_TYPE_VAL_HOST) {
+			result->residue = d->residue -
+					  cppi5_hdesc_get_pktlen(desc_vaddr);
+			if (result->residue)
+				result->result = DMA_TRANS_ABORTED;
+			else
+				result->result = DMA_TRANS_NOERROR;
+		} else {
+			result->residue = 0;
+			result->result = DMA_TRANS_NOERROR;
+		}
+	}
+}
+
+/*
+ * This tasklet handles the completion of a DMA descriptor by
+ * calling its callback and freeing it.
+ */
+static void udma_vchan_complete(unsigned long arg)
+{
+	struct virt_dma_chan *vc = (struct virt_dma_chan *)arg;
+	struct virt_dma_desc *vd, *_vd;
+	struct dmaengine_desc_callback cb;
+	LIST_HEAD(head);
+
+	spin_lock_irq(&vc->lock);
+	list_splice_tail_init(&vc->desc_completed, &head);
+	vd = vc->cyclic;
+	if (vd) {
+		vc->cyclic = NULL;
+		dmaengine_desc_get_callback(&vd->tx, &cb);
+	} else {
+		memset(&cb, 0, sizeof(cb));
+	}
+	spin_unlock_irq(&vc->lock);
+
+	udma_desc_pre_callback(vc, vd, NULL);
+	dmaengine_desc_callback_invoke(&cb, NULL);
+
+	list_for_each_entry_safe(vd, _vd, &head, node) {
+		struct dmaengine_result result;
+
+		dmaengine_desc_get_callback(&vd->tx, &cb);
+
+		list_del(&vd->node);
+
+		udma_desc_pre_callback(vc, vd, &result);
+		dmaengine_desc_callback_invoke(&cb, &result);
+
+		vchan_vdesc_fini(vd);
+	}
+}
+
+static void udma_free_chan_resources(struct dma_chan *chan)
+{
+	struct udma_chan *uc = to_udma_chan(chan);
+	struct udma_dev *ud = to_udma_dev(chan->device);
+
+	udma_terminate_all(chan);
+	if (uc->terminated_desc) {
+		udma_reset_chan(uc, false);
+		udma_reset_rings(uc);
+	}
+
+	cancel_delayed_work_sync(&uc->tx_drain.work);
+	destroy_delayed_work_on_stack(&uc->tx_drain.work);
+
+	if (uc->irq_num_ring > 0) {
+		free_irq(uc->irq_num_ring, uc);
+
+		uc->irq_num_ring = 0;
+	}
+	if (uc->irq_num_udma > 0) {
+		free_irq(uc->irq_num_udma, uc);
+
+		uc->irq_num_udma = 0;
+	}
+
+	/* Release PSI-L pairing */
+	if (uc->psil_paired) {
+		navss_psil_unpair(ud, uc->config.src_thread,
+				  uc->config.dst_thread);
+		uc->psil_paired = false;
+	}
+
+	vchan_free_chan_resources(&uc->vc);
+	tasklet_kill(&uc->vc.task);
+
+	udma_free_tx_resources(uc);
+	udma_free_rx_resources(uc);
+	udma_reset_uchan(uc);
+
+	if (uc->use_dma_pool) {
+		dma_pool_destroy(uc->hdesc_pool);
+		uc->use_dma_pool = false;
+	}
+}
+
+static struct platform_driver udma_driver;
+
+static bool udma_dma_filter_fn(struct dma_chan *chan, void *param)
+{
+	struct udma_chan_config *ucc;
+	struct psil_endpoint_config *ep_config;
+	struct udma_chan *uc;
+	struct udma_dev *ud;
+	u32 *args;
+
+	if (chan->device->dev->driver != &udma_driver.driver)
+		return false;
+
+	uc = to_udma_chan(chan);
+	ucc = &uc->config;
+	ud = uc->ud;
+	args = param;
+
+	ucc->remote_thread_id = args[0];
+
+	if (ucc->remote_thread_id & K3_PSIL_DST_THREAD_ID_OFFSET)
+		ucc->dir = DMA_MEM_TO_DEV;
+	else
+		ucc->dir = DMA_DEV_TO_MEM;
+
+	ep_config = psil_get_ep_config(ucc->remote_thread_id);
+	if (IS_ERR(ep_config)) {
+		dev_err(ud->dev, "No configuration for psi-l thread 0x%04x\n",
+			ucc->remote_thread_id);
+		ucc->dir = DMA_MEM_TO_MEM;
+		ucc->remote_thread_id = -1;
+		return false;
+	}
+
+	ucc->pkt_mode = ep_config->pkt_mode;
+	ucc->channel_tpl = ep_config->channel_tpl;
+	ucc->notdpkt = ep_config->notdpkt;
+	ucc->ep_type = ep_config->ep_type;
+
+	if (ucc->ep_type != PSIL_EP_NATIVE) {
+		const struct udma_match_data *match_data = ud->match_data;
+
+		if (match_data->flags & UDMA_FLAG_PDMA_ACC32)
+			ucc->enable_acc32 = ep_config->pdma_acc32;
+		if (match_data->flags & UDMA_FLAG_PDMA_BURST)
+			ucc->enable_burst = ep_config->pdma_burst;
+	}
+
+	ucc->needs_epib = ep_config->needs_epib;
+	ucc->psd_size = ep_config->psd_size;
+	ucc->metadata_size =
+			(ucc->needs_epib ? CPPI5_INFO0_HDESC_EPIB_SIZE : 0) +
+			ucc->psd_size;
+
+	if (ucc->pkt_mode)
+		ucc->hdesc_size = ALIGN(sizeof(struct cppi5_host_desc_t) +
+				 ucc->metadata_size, ud->desc_align);
+
+	dev_dbg(ud->dev, "chan%d: Remote thread: 0x%04x (%s)\n", uc->id,
+		ucc->remote_thread_id, dmaengine_get_direction_text(ucc->dir));
+
+	return true;
+}
+
+static struct dma_chan *udma_of_xlate(struct of_phandle_args *dma_spec,
+				      struct of_dma *ofdma)
+{
+	struct udma_dev *ud = ofdma->of_dma_data;
+	dma_cap_mask_t mask = ud->ddev.cap_mask;
+	struct dma_chan *chan;
+
+	if (dma_spec->args_count != 1)
+		return NULL;
+
+	chan = __dma_request_channel(&mask, udma_dma_filter_fn,
+				     &dma_spec->args[0], ofdma->of_node);
+	if (!chan) {
+		dev_err(ud->dev, "get channel fail in %s.\n", __func__);
+		return ERR_PTR(-EINVAL);
+	}
+
+	return chan;
+}
+
+static struct udma_match_data am654_main_data = {
+	.psil_base = 0x1000,
+	.enable_memcpy_support = true,
+	.statictr_z_mask = GENMASK(11, 0),
+	.rchan_oes_offset = 0x2000,
+	.tpl_levels = 2,
+	.level_start_idx = {
+		[0] = 8, /* Normal channels */
+		[1] = 0, /* High Throughput channels */
+	},
+};
+
+static struct udma_match_data am654_mcu_data = {
+	.psil_base = 0x6000,
+	.enable_memcpy_support = false, /* MEM_TO_MEM is slow via MCU UDMA */
+	.statictr_z_mask = GENMASK(11, 0),
+	.rchan_oes_offset = 0x2000,
+	.tpl_levels = 2,
+	.level_start_idx = {
+		[0] = 2, /* Normal channels */
+		[1] = 0, /* High Throughput channels */
+	},
+};
+
+static struct udma_match_data j721e_main_data = {
+	.psil_base = 0x1000,
+	.enable_memcpy_support = true,
+	.flags = UDMA_FLAG_PDMA_ACC32 | UDMA_FLAG_PDMA_BURST,
+	.statictr_z_mask = GENMASK(23, 0),
+	.rchan_oes_offset = 0x400,
+	.tpl_levels = 3,
+	.level_start_idx = {
+		[0] = 16, /* Normal channels */
+		[1] = 4, /* High Throughput channels */
+		[2] = 0, /* Ultra High Throughput channels */
+	},
+};
+
+static struct udma_match_data j721e_mcu_data = {
+	.psil_base = 0x6000,
+	.enable_memcpy_support = false, /* MEM_TO_MEM is slow via MCU UDMA */
+	.flags = UDMA_FLAG_PDMA_ACC32 | UDMA_FLAG_PDMA_BURST,
+	.statictr_z_mask = GENMASK(23, 0),
+	.rchan_oes_offset = 0x400,
+	.tpl_levels = 2,
+	.level_start_idx = {
+		[0] = 2, /* Normal channels */
+		[1] = 0, /* High Throughput channels */
+	},
+};
+
+static const struct of_device_id udma_of_match[] = {
+	{
+		.compatible = "ti,am654-navss-main-udmap",
+		.data = &am654_main_data,
+	},
+	{
+		.compatible = "ti,am654-navss-mcu-udmap",
+		.data = &am654_mcu_data,
+	}, {
+		.compatible = "ti,j721e-navss-main-udmap",
+		.data = &j721e_main_data,
+	}, {
+		.compatible = "ti,j721e-navss-mcu-udmap",
+		.data = &j721e_mcu_data,
+	},
+	{ /* Sentinel */ },
+};
+
+static int udma_get_mmrs(struct platform_device *pdev, struct udma_dev *ud)
+{
+	struct resource *res;
+	int i;
+
+	for (i = 0; i < MMR_LAST; i++) {
+		res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
+						   mmr_names[i]);
+		ud->mmrs[i] = devm_ioremap_resource(&pdev->dev, res);
+		if (IS_ERR(ud->mmrs[i]))
+			return PTR_ERR(ud->mmrs[i]);
+	}
+
+	return 0;
+}
+
+static int udma_setup_resources(struct udma_dev *ud)
+{
+	struct device *dev = ud->dev;
+	int ch_count, ret, i, j;
+	u32 cap2, cap3;
+	struct ti_sci_resource_desc *rm_desc;
+	struct ti_sci_resource *rm_res, irq_res;
+	struct udma_tisci_rm *tisci_rm = &ud->tisci_rm;
+	static const char * const range_names[] = { "ti,sci-rm-range-tchan",
+						    "ti,sci-rm-range-rchan",
+						    "ti,sci-rm-range-rflow" };
+
+	cap2 = udma_read(ud->mmrs[MMR_GCFG], 0x28);
+	cap3 = udma_read(ud->mmrs[MMR_GCFG], 0x2c);
+
+	ud->rflow_cnt = cap3 & 0x3fff;
+	ud->tchan_cnt = cap2 & 0x1ff;
+	ud->echan_cnt = (cap2 >> 9) & 0x1ff;
+	ud->rchan_cnt = (cap2 >> 18) & 0x1ff;
+	ch_count  = ud->tchan_cnt + ud->rchan_cnt;
+
+	ud->tchan_map = devm_kmalloc_array(dev, BITS_TO_LONGS(ud->tchan_cnt),
+					   sizeof(unsigned long), GFP_KERNEL);
+	ud->tchans = devm_kcalloc(dev, ud->tchan_cnt, sizeof(*ud->tchans),
+				  GFP_KERNEL);
+	ud->rchan_map = devm_kmalloc_array(dev, BITS_TO_LONGS(ud->rchan_cnt),
+					   sizeof(unsigned long), GFP_KERNEL);
+	ud->rchans = devm_kcalloc(dev, ud->rchan_cnt, sizeof(*ud->rchans),
+				  GFP_KERNEL);
+	ud->rflow_gp_map = devm_kmalloc_array(dev, BITS_TO_LONGS(ud->rflow_cnt),
+					      sizeof(unsigned long),
+					      GFP_KERNEL);
+	ud->rflow_gp_map_allocated = devm_kcalloc(dev,
+						  BITS_TO_LONGS(ud->rflow_cnt),
+						  sizeof(unsigned long),
+						  GFP_KERNEL);
+	ud->rflow_in_use = devm_kcalloc(dev, BITS_TO_LONGS(ud->rflow_cnt),
+					sizeof(unsigned long),
+					GFP_KERNEL);
+	ud->rflows = devm_kcalloc(dev, ud->rflow_cnt, sizeof(*ud->rflows),
+				  GFP_KERNEL);
+
+	if (!ud->tchan_map || !ud->rchan_map || !ud->rflow_gp_map ||
+	    !ud->rflow_gp_map_allocated || !ud->tchans || !ud->rchans ||
+	    !ud->rflows || !ud->rflow_in_use)
+		return -ENOMEM;
+
+	/*
+	 * RX flows with the same Ids as RX channels are reserved to be used
+	 * as default flows if remote HW can't generate flow_ids. Those
+	 * RX flows can be requested only explicitly by id.
+	 */
+	bitmap_set(ud->rflow_gp_map_allocated, 0, ud->rchan_cnt);
+
+	/* by default no GP rflows are assigned to Linux */
+	bitmap_set(ud->rflow_gp_map, 0, ud->rflow_cnt);
+
+	/* Get resource ranges from tisci */
+	for (i = 0; i < RM_RANGE_LAST; i++)
+		tisci_rm->rm_ranges[i] =
+			devm_ti_sci_get_of_resource(tisci_rm->tisci, dev,
+						    tisci_rm->tisci_dev_id,
+						    (char *)range_names[i]);
+
+	/* tchan ranges */
+	rm_res = tisci_rm->rm_ranges[RM_RANGE_TCHAN];
+	if (IS_ERR(rm_res)) {
+		bitmap_zero(ud->tchan_map, ud->tchan_cnt);
+	} else {
+		bitmap_fill(ud->tchan_map, ud->tchan_cnt);
+		for (i = 0; i < rm_res->sets; i++) {
+			rm_desc = &rm_res->desc[i];
+			bitmap_clear(ud->tchan_map, rm_desc->start,
+				     rm_desc->num);
+			dev_dbg(dev, "ti-sci-res: tchan: %d:%d\n",
+				rm_desc->start, rm_desc->num);
+		}
+	}
+	irq_res.sets = rm_res->sets;
+
+	/* rchan and matching default flow ranges */
+	rm_res = tisci_rm->rm_ranges[RM_RANGE_RCHAN];
+	if (IS_ERR(rm_res)) {
+		bitmap_zero(ud->rchan_map, ud->rchan_cnt);
+	} else {
+		bitmap_fill(ud->rchan_map, ud->rchan_cnt);
+		for (i = 0; i < rm_res->sets; i++) {
+			rm_desc = &rm_res->desc[i];
+			bitmap_clear(ud->rchan_map, rm_desc->start,
+				     rm_desc->num);
+			dev_dbg(dev, "ti-sci-res: rchan: %d:%d\n",
+				rm_desc->start, rm_desc->num);
+		}
+	}
+
+	irq_res.sets += rm_res->sets;
+	irq_res.desc = kcalloc(irq_res.sets, sizeof(*irq_res.desc), GFP_KERNEL);
+	rm_res = tisci_rm->rm_ranges[RM_RANGE_TCHAN];
+	for (i = 0; i < rm_res->sets; i++) {
+		irq_res.desc[i].start = rm_res->desc[i].start;
+		irq_res.desc[i].num = rm_res->desc[i].num;
+	}
+	rm_res = tisci_rm->rm_ranges[RM_RANGE_RCHAN];
+	for (j = 0; j < rm_res->sets; j++, i++) {
+		irq_res.desc[i].start = rm_res->desc[j].start +
+					ud->match_data->rchan_oes_offset;
+		irq_res.desc[i].num = rm_res->desc[j].num;
+	}
+	ret = ti_sci_inta_msi_domain_alloc_irqs(ud->dev, &irq_res);
+	kfree(irq_res.desc);
+	if (ret) {
+		dev_err(ud->dev, "Failed to allocate MSI interrupts\n");
+		return ret;
+	}
+
+	/* GP rflow ranges */
+	rm_res = tisci_rm->rm_ranges[RM_RANGE_RFLOW];
+	if (IS_ERR(rm_res)) {
+		/* all gp flows are assigned exclusively to Linux */
+		bitmap_clear(ud->rflow_gp_map, ud->rchan_cnt,
+			     ud->rflow_cnt - ud->rchan_cnt);
+	} else {
+		for (i = 0; i < rm_res->sets; i++) {
+			rm_desc = &rm_res->desc[i];
+			bitmap_clear(ud->rflow_gp_map, rm_desc->start,
+				     rm_desc->num);
+			dev_dbg(dev, "ti-sci-res: rflow: %d:%d\n",
+				rm_desc->start, rm_desc->num);
+		}
+	}
+
+	ch_count -= bitmap_weight(ud->tchan_map, ud->tchan_cnt);
+	ch_count -= bitmap_weight(ud->rchan_map, ud->rchan_cnt);
+	if (!ch_count)
+		return -ENODEV;
+
+	ud->channels = devm_kcalloc(dev, ch_count, sizeof(*ud->channels),
+				    GFP_KERNEL);
+	if (!ud->channels)
+		return -ENOMEM;
+
+	dev_info(dev, "Channels: %d (tchan: %u, rchan: %u, gp-rflow: %u)\n",
+		 ch_count,
+		 ud->tchan_cnt - bitmap_weight(ud->tchan_map, ud->tchan_cnt),
+		 ud->rchan_cnt - bitmap_weight(ud->rchan_map, ud->rchan_cnt),
+		 ud->rflow_cnt - bitmap_weight(ud->rflow_gp_map,
+					       ud->rflow_cnt));
+
+	return ch_count;
+}
+
+#define TI_UDMAC_BUSWIDTHS	(BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | \
+				 BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | \
+				 BIT(DMA_SLAVE_BUSWIDTH_3_BYTES) | \
+				 BIT(DMA_SLAVE_BUSWIDTH_4_BYTES) | \
+				 BIT(DMA_SLAVE_BUSWIDTH_8_BYTES))
+
+static int udma_probe(struct platform_device *pdev)
+{
+	struct device_node *navss_node = pdev->dev.parent->of_node;
+	struct device *dev = &pdev->dev;
+	struct udma_dev *ud;
+	const struct of_device_id *match;
+	int i, ret;
+	int ch_count;
+
+	ret = dma_coerce_mask_and_coherent(dev, DMA_BIT_MASK(48));
+	if (ret)
+		dev_err(dev, "failed to set dma mask stuff\n");
+
+	ud = devm_kzalloc(dev, sizeof(*ud), GFP_KERNEL);
+	if (!ud)
+		return -ENOMEM;
+
+	ret = udma_get_mmrs(pdev, ud);
+	if (ret)
+		return ret;
+
+	ud->tisci_rm.tisci = ti_sci_get_by_phandle(dev->of_node, "ti,sci");
+	if (IS_ERR(ud->tisci_rm.tisci))
+		return PTR_ERR(ud->tisci_rm.tisci);
+
+	ret = of_property_read_u32(dev->of_node, "ti,sci-dev-id",
+				   &ud->tisci_rm.tisci_dev_id);
+	if (ret) {
+		dev_err(dev, "ti,sci-dev-id read failure %d\n", ret);
+		return ret;
+	}
+	pdev->id = ud->tisci_rm.tisci_dev_id;
+
+	ret = of_property_read_u32(navss_node, "ti,sci-dev-id",
+				   &ud->tisci_rm.tisci_navss_dev_id);
+	if (ret) {
+		dev_err(dev, "NAVSS ti,sci-dev-id read failure %d\n", ret);
+		return ret;
+	}
+
+	ud->tisci_rm.tisci_udmap_ops = &ud->tisci_rm.tisci->ops.rm_udmap_ops;
+	ud->tisci_rm.tisci_psil_ops = &ud->tisci_rm.tisci->ops.rm_psil_ops;
+
+	ud->ringacc = of_k3_ringacc_get_by_phandle(dev->of_node, "ti,ringacc");
+	if (IS_ERR(ud->ringacc))
+		return PTR_ERR(ud->ringacc);
+
+	dev->msi_domain = of_msi_get_domain(dev, dev->of_node,
+					    DOMAIN_BUS_TI_SCI_INTA_MSI);
+	if (!dev->msi_domain) {
+		dev_err(dev, "Failed to get MSI domain\n");
+		return -EPROBE_DEFER;
+	}
+
+	match = of_match_node(udma_of_match, dev->of_node);
+	if (!match) {
+		dev_err(dev, "No compatible match found\n");
+		return -ENODEV;
+	}
+	ud->match_data = match->data;
+
+	dma_cap_set(DMA_SLAVE, ud->ddev.cap_mask);
+	dma_cap_set(DMA_CYCLIC, ud->ddev.cap_mask);
+
+	ud->ddev.device_alloc_chan_resources = udma_alloc_chan_resources;
+	ud->ddev.device_config = udma_slave_config;
+	ud->ddev.device_prep_slave_sg = udma_prep_slave_sg;
+	ud->ddev.device_prep_dma_cyclic = udma_prep_dma_cyclic;
+	ud->ddev.device_issue_pending = udma_issue_pending;
+	ud->ddev.device_tx_status = udma_tx_status;
+	ud->ddev.device_pause = udma_pause;
+	ud->ddev.device_resume = udma_resume;
+	ud->ddev.device_terminate_all = udma_terminate_all;
+	ud->ddev.device_synchronize = udma_synchronize;
+
+	ud->ddev.device_free_chan_resources = udma_free_chan_resources;
+	ud->ddev.src_addr_widths = TI_UDMAC_BUSWIDTHS;
+	ud->ddev.dst_addr_widths = TI_UDMAC_BUSWIDTHS;
+	ud->ddev.directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV);
+	ud->ddev.residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;
+	ud->ddev.copy_align = DMAENGINE_ALIGN_8_BYTES;
+	ud->ddev.desc_metadata_modes = DESC_METADATA_CLIENT |
+				       DESC_METADATA_ENGINE;
+	if (ud->match_data->enable_memcpy_support) {
+		dma_cap_set(DMA_MEMCPY, ud->ddev.cap_mask);
+		ud->ddev.device_prep_dma_memcpy = udma_prep_dma_memcpy;
+		ud->ddev.directions |= BIT(DMA_MEM_TO_MEM);
+	}
+
+	ud->ddev.dev = dev;
+	ud->dev = dev;
+	ud->psil_base = ud->match_data->psil_base;
+
+	INIT_LIST_HEAD(&ud->ddev.channels);
+	INIT_LIST_HEAD(&ud->desc_to_purge);
+
+	ch_count = udma_setup_resources(ud);
+	if (ch_count <= 0)
+		return ch_count;
+
+	spin_lock_init(&ud->lock);
+	INIT_WORK(&ud->purge_work, udma_purge_desc_work);
+
+	ud->desc_align = 64;
+	if (ud->desc_align < dma_get_cache_alignment())
+		ud->desc_align = dma_get_cache_alignment();
+
+	for (i = 0; i < ud->tchan_cnt; i++) {
+		struct udma_tchan *tchan = &ud->tchans[i];
+
+		tchan->id = i;
+		tchan->reg_rt = ud->mmrs[MMR_TCHANRT] + i * 0x1000;
+	}
+
+	for (i = 0; i < ud->rchan_cnt; i++) {
+		struct udma_rchan *rchan = &ud->rchans[i];
+
+		rchan->id = i;
+		rchan->reg_rt = ud->mmrs[MMR_RCHANRT] + i * 0x1000;
+	}
+
+	for (i = 0; i < ud->rflow_cnt; i++) {
+		struct udma_rflow *rflow = &ud->rflows[i];
+
+		rflow->id = i;
+	}
+
+	for (i = 0; i < ch_count; i++) {
+		struct udma_chan *uc = &ud->channels[i];
+
+		uc->ud = ud;
+		uc->vc.desc_free = udma_desc_free;
+		uc->id = i;
+		uc->tchan = NULL;
+		uc->rchan = NULL;
+		uc->config.remote_thread_id = -1;
+		uc->config.dir = DMA_MEM_TO_MEM;
+		uc->name = devm_kasprintf(dev, GFP_KERNEL, "%s chan%d",
+					  dev_name(dev), i);
+
+		vchan_init(&uc->vc, &ud->ddev);
+		/* Use custom vchan completion handling */
+		tasklet_init(&uc->vc.task, udma_vchan_complete,
+			     (unsigned long)&uc->vc);
+		init_completion(&uc->teardown_completed);
+	}
+
+	ret = dma_async_device_register(&ud->ddev);
+	if (ret) {
+		dev_err(dev, "failed to register slave DMA engine: %d\n", ret);
+		return ret;
+	}
+
+	platform_set_drvdata(pdev, ud);
+
+	ret = of_dma_controller_register(dev->of_node, udma_of_xlate, ud);
+	if (ret) {
+		dev_err(dev, "failed to register of_dma controller\n");
+		dma_async_device_unregister(&ud->ddev);
+	}
+
+	return ret;
+}
+
+static struct platform_driver udma_driver = {
+	.driver = {
+		.name	= "ti-udma",
+		.of_match_table = udma_of_match,
+		.suppress_bind_attrs = true,
+	},
+	.probe		= udma_probe,
+};
+builtin_platform_driver(udma_driver);
diff --git a/drivers/dma/ti/k3-udma.h b/drivers/dma/ti/k3-udma.h
new file mode 100644
index 000000000000..ff428f823e67
--- /dev/null
+++ b/drivers/dma/ti/k3-udma.h
@@ -0,0 +1,120 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *  Copyright (C) 2019 Texas Instruments Incorporated - http://www.ti.com
+ */
+
+#ifndef K3_UDMA_H_
+#define K3_UDMA_H_
+
+#include <linux/soc/ti/ti_sci_protocol.h>
+
+/* Global registers */
+#define UDMA_REV_REG			0x0
+#define UDMA_PERF_CTL_REG		0x4
+#define UDMA_EMU_CTL_REG		0x8
+#define UDMA_PSIL_TO_REG		0x10
+#define UDMA_UTC_CTL_REG		0x1c
+#define UDMA_CAP_REG(i)			(0x20 + ((i) * 4))
+#define UDMA_RX_FLOW_ID_FW_OES_REG	0x80
+#define UDMA_RX_FLOW_ID_FW_STATUS_REG	0x88
+
+/* TX chan RT regs */
+#define UDMA_TCHAN_RT_CTL_REG		0x0
+#define UDMA_TCHAN_RT_SWTRIG_REG	0x8
+#define UDMA_TCHAN_RT_STDATA_REG	0x80
+
+#define UDMA_TCHAN_RT_PEER_REG(i)	(0x200 + ((i) * 0x4))
+#define UDMA_TCHAN_RT_PEER_STATIC_TR_XY_REG	\
+	UDMA_TCHAN_RT_PEER_REG(0)	/* PSI-L: 0x400 */
+#define UDMA_TCHAN_RT_PEER_STATIC_TR_Z_REG	\
+	UDMA_TCHAN_RT_PEER_REG(1)	/* PSI-L: 0x401 */
+#define UDMA_TCHAN_RT_PEER_BCNT_REG		\
+	UDMA_TCHAN_RT_PEER_REG(4)	/* PSI-L: 0x404 */
+#define UDMA_TCHAN_RT_PEER_RT_EN_REG		\
+	UDMA_TCHAN_RT_PEER_REG(8)	/* PSI-L: 0x408 */
+
+#define UDMA_TCHAN_RT_PCNT_REG		0x400
+#define UDMA_TCHAN_RT_BCNT_REG		0x408
+#define UDMA_TCHAN_RT_SBCNT_REG		0x410
+
+/* RX chan RT regs */
+#define UDMA_RCHAN_RT_CTL_REG		0x0
+#define UDMA_RCHAN_RT_SWTRIG_REG	0x8
+#define UDMA_RCHAN_RT_STDATA_REG	0x80
+
+#define UDMA_RCHAN_RT_PEER_REG(i)	(0x200 + ((i) * 0x4))
+#define UDMA_RCHAN_RT_PEER_STATIC_TR_XY_REG	\
+	UDMA_RCHAN_RT_PEER_REG(0)	/* PSI-L: 0x400 */
+#define UDMA_RCHAN_RT_PEER_STATIC_TR_Z_REG	\
+	UDMA_RCHAN_RT_PEER_REG(1)	/* PSI-L: 0x401 */
+#define UDMA_RCHAN_RT_PEER_BCNT_REG		\
+	UDMA_RCHAN_RT_PEER_REG(4)	/* PSI-L: 0x404 */
+#define UDMA_RCHAN_RT_PEER_RT_EN_REG		\
+	UDMA_RCHAN_RT_PEER_REG(8)	/* PSI-L: 0x408 */
+
+#define UDMA_RCHAN_RT_PCNT_REG		0x400
+#define UDMA_RCHAN_RT_BCNT_REG		0x408
+#define UDMA_RCHAN_RT_SBCNT_REG		0x410
+
+/* UDMA_TCHAN_RT_CTL_REG/UDMA_RCHAN_RT_CTL_REG */
+#define UDMA_CHAN_RT_CTL_EN		BIT(31)
+#define UDMA_CHAN_RT_CTL_TDOWN		BIT(30)
+#define UDMA_CHAN_RT_CTL_PAUSE		BIT(29)
+#define UDMA_CHAN_RT_CTL_FTDOWN		BIT(28)
+#define UDMA_CHAN_RT_CTL_ERROR		BIT(0)
+
+/* UDMA_TCHAN_RT_PEER_RT_EN_REG/UDMA_RCHAN_RT_PEER_RT_EN_REG (PSI-L: 0x408) */
+#define UDMA_PEER_RT_EN_ENABLE		BIT(31)
+#define UDMA_PEER_RT_EN_TEARDOWN	BIT(30)
+#define UDMA_PEER_RT_EN_PAUSE		BIT(29)
+#define UDMA_PEER_RT_EN_FLUSH		BIT(28)
+#define UDMA_PEER_RT_EN_IDLE		BIT(1)
+
+/*
+ * UDMA_TCHAN_RT_PEER_STATIC_TR_XY_REG /
+ * UDMA_RCHAN_RT_PEER_STATIC_TR_XY_REG
+ */
+#define PDMA_STATIC_TR_X_MASK		GENMASK(26, 24)
+#define PDMA_STATIC_TR_X_SHIFT		(24)
+#define PDMA_STATIC_TR_Y_MASK		GENMASK(11, 0)
+#define PDMA_STATIC_TR_Y_SHIFT		(0)
+
+#define PDMA_STATIC_TR_Y(x)	\
+	(((x) << PDMA_STATIC_TR_Y_SHIFT) & PDMA_STATIC_TR_Y_MASK)
+#define PDMA_STATIC_TR_X(x)	\
+	(((x) << PDMA_STATIC_TR_X_SHIFT) & PDMA_STATIC_TR_X_MASK)
+
+#define PDMA_STATIC_TR_XY_ACC32		BIT(30)
+#define PDMA_STATIC_TR_XY_BURST		BIT(31)
+
+/*
+ * UDMA_TCHAN_RT_PEER_STATIC_TR_Z_REG /
+ * UDMA_RCHAN_RT_PEER_STATIC_TR_Z_REG
+ */
+#define PDMA_STATIC_TR_Z(x, mask)	((x) & (mask))
+
+struct udma_dev;
+struct udma_tchan;
+struct udma_rchan;
+struct udma_rflow;
+
+enum udma_rm_range {
+	RM_RANGE_TCHAN = 0,
+	RM_RANGE_RCHAN,
+	RM_RANGE_RFLOW,
+	RM_RANGE_LAST,
+};
+
+struct udma_tisci_rm {
+	const struct ti_sci_handle *tisci;
+	const struct ti_sci_rm_udmap_ops *tisci_udmap_ops;
+	u32  tisci_dev_id;
+
+	/* tisci information for PSI-L thread pairing/unpairing */
+	const struct ti_sci_rm_psil_ops *tisci_psil_ops;
+	u32  tisci_navss_dev_id;
+
+	struct ti_sci_resource *rm_ranges[RM_RANGE_LAST];
+};
+
+#endif /* K3_UDMA_H_ */
-- 
cgit 


From d702419134133db1eab2067dc6ea5723467fd917 Mon Sep 17 00:00:00 2001
From: Grygorii Strashko <grygorii.strashko@ti.com>
Date: Mon, 23 Dec 2019 13:04:51 +0200
Subject: dmaengine: ti: k3-udma: Add glue layer for non DMAengine users

Certain users can not use right now the DMAengine API due to missing
features in the core. Prime example is Networking.

These users can use the glue layer interface to avoid misuse of DMAengine
API and when the core gains the needed features they can be converted to
use generic API.

The most prominent features the glue layer clients are depending on:

- most PSI-L native peripheral use extra rflow ranges on a receive channel
   and depending on the peripheral's configuration packets from a single
   free descriptor ring is going to be received to different receive ring
  - it is also possible to have different free descriptor rings per rflow
    and an rflow can also support 4 additional free descriptor ring based
    on the size of the incoming packet
- out of order completion of descriptors on a channel
 - when we have several queues to handle different priority packets the
   descriptors will be completed 'out-of-order'
- the notion of prep_slave_sg is not matching with what the streaming type
   of operation is demanding for networking
- Streaming type of operation
 - Ability to fill the free descriptor ring with descriptors in
   anticipation of incoming traffic and when a packet arrives UDMAP will
   form a packet and gives it to the client driver
 - the descriptors are not backed with exact size data buffers as we don't
   know the size of the packet we will receive, but as a generic pool of
   buffers to be used by the receive channel
- NAPI type of operation (polling instead of interrupt driven transfer)
 - without this we can not sustain gigabit speeds and we need to support NAPI
 - not to limit this to networking, but other high performance operations

Signed-off-by: Grygorii Strashko <grygorii.strashko@ti.com>
Signed-off-by: Peter Ujfalusi <peter.ujfalusi@ti.com>
Tested-by: Keerthy <j-keerthy@ti.com>
Link: https://lore.kernel.org/r/20191223110458.30766-12-peter.ujfalusi@ti.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/ti/Kconfig           |    8 +
 drivers/dma/ti/Makefile          |    1 +
 drivers/dma/ti/k3-udma-glue.c    | 1198 ++++++++++++++++++++++++++++++++++++++
 drivers/dma/ti/k3-udma-private.c |  133 +++++
 drivers/dma/ti/k3-udma.c         |   63 +-
 drivers/dma/ti/k3-udma.h         |   31 +
 6 files changed, 1433 insertions(+), 1 deletion(-)
 create mode 100644 drivers/dma/ti/k3-udma-glue.c
 create mode 100644 drivers/dma/ti/k3-udma-private.c

(limited to 'drivers/dma')

diff --git a/drivers/dma/ti/Kconfig b/drivers/dma/ti/Kconfig
index cca6708bd77c..f76e06651f80 100644
--- a/drivers/dma/ti/Kconfig
+++ b/drivers/dma/ti/Kconfig
@@ -47,6 +47,14 @@ config TI_K3_UDMA
 	  Enable support for the TI UDMA (Unified DMA) controller. This
 	  DMA engine is used in AM65x and j721e.
 
+config TI_K3_UDMA_GLUE_LAYER
+	bool "Texas Instruments UDMA Glue layer for non DMAengine users"
+	depends on ARCH_K3 || COMPILE_TEST
+	depends on TI_K3_UDMA
+	help
+	  Say y here to support the K3 NAVSS DMA glue interface
+	  If unsure, say N.
+
 config TI_K3_PSIL
 	bool
 
diff --git a/drivers/dma/ti/Makefile b/drivers/dma/ti/Makefile
index 9d787f009195..9a29a107e374 100644
--- a/drivers/dma/ti/Makefile
+++ b/drivers/dma/ti/Makefile
@@ -3,5 +3,6 @@ obj-$(CONFIG_TI_CPPI41) += cppi41.o
 obj-$(CONFIG_TI_EDMA) += edma.o
 obj-$(CONFIG_DMA_OMAP) += omap-dma.o
 obj-$(CONFIG_TI_K3_UDMA) += k3-udma.o
+obj-$(CONFIG_TI_K3_UDMA_GLUE_LAYER) += k3-udma-glue.o
 obj-$(CONFIG_TI_K3_PSIL) += k3-psil.o k3-psil-am654.o k3-psil-j721e.o
 obj-$(CONFIG_TI_DMA_CROSSBAR) += dma-crossbar.o
diff --git a/drivers/dma/ti/k3-udma-glue.c b/drivers/dma/ti/k3-udma-glue.c
new file mode 100644
index 000000000000..c1511298ece2
--- /dev/null
+++ b/drivers/dma/ti/k3-udma-glue.c
@@ -0,0 +1,1198 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * K3 NAVSS DMA glue interface
+ *
+ * Copyright (C) 2019 Texas Instruments Incorporated - http://www.ti.com
+ *
+ */
+
+#include <linux/atomic.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/io.h>
+#include <linux/init.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/soc/ti/k3-ringacc.h>
+#include <linux/dma/ti-cppi5.h>
+#include <linux/dma/k3-udma-glue.h>
+
+#include "k3-udma.h"
+#include "k3-psil-priv.h"
+
+struct k3_udma_glue_common {
+	struct device *dev;
+	struct udma_dev *udmax;
+	const struct udma_tisci_rm *tisci_rm;
+	struct k3_ringacc *ringacc;
+	u32 src_thread;
+	u32 dst_thread;
+
+	u32  hdesc_size;
+	bool epib;
+	u32  psdata_size;
+	u32  swdata_size;
+};
+
+struct k3_udma_glue_tx_channel {
+	struct k3_udma_glue_common common;
+
+	struct udma_tchan *udma_tchanx;
+	int udma_tchan_id;
+
+	struct k3_ring *ringtx;
+	struct k3_ring *ringtxcq;
+
+	bool psil_paired;
+
+	int virq;
+
+	atomic_t free_pkts;
+	bool tx_pause_on_err;
+	bool tx_filt_einfo;
+	bool tx_filt_pswords;
+	bool tx_supr_tdpkt;
+};
+
+struct k3_udma_glue_rx_flow {
+	struct udma_rflow *udma_rflow;
+	int udma_rflow_id;
+	struct k3_ring *ringrx;
+	struct k3_ring *ringrxfdq;
+
+	int virq;
+};
+
+struct k3_udma_glue_rx_channel {
+	struct k3_udma_glue_common common;
+
+	struct udma_rchan *udma_rchanx;
+	int udma_rchan_id;
+	bool remote;
+
+	bool psil_paired;
+
+	u32  swdata_size;
+	int  flow_id_base;
+
+	struct k3_udma_glue_rx_flow *flows;
+	u32 flow_num;
+	u32 flows_ready;
+};
+
+#define K3_UDMAX_TDOWN_TIMEOUT_US 1000
+
+static int of_k3_udma_glue_parse(struct device_node *udmax_np,
+				 struct k3_udma_glue_common *common)
+{
+	common->ringacc = of_k3_ringacc_get_by_phandle(udmax_np,
+						       "ti,ringacc");
+	if (IS_ERR(common->ringacc))
+		return PTR_ERR(common->ringacc);
+
+	common->udmax = of_xudma_dev_get(udmax_np, NULL);
+	if (IS_ERR(common->udmax))
+		return PTR_ERR(common->udmax);
+
+	common->tisci_rm = xudma_dev_get_tisci_rm(common->udmax);
+
+	return 0;
+}
+
+static int of_k3_udma_glue_parse_chn(struct device_node *chn_np,
+		const char *name, struct k3_udma_glue_common *common,
+		bool tx_chn)
+{
+	struct psil_endpoint_config *ep_config;
+	struct of_phandle_args dma_spec;
+	u32 thread_id;
+	int ret = 0;
+	int index;
+
+	if (unlikely(!name))
+		return -EINVAL;
+
+	index = of_property_match_string(chn_np, "dma-names", name);
+	if (index < 0)
+		return index;
+
+	if (of_parse_phandle_with_args(chn_np, "dmas", "#dma-cells", index,
+				       &dma_spec))
+		return -ENOENT;
+
+	thread_id = dma_spec.args[0];
+
+	if (tx_chn && !(thread_id & K3_PSIL_DST_THREAD_ID_OFFSET)) {
+		ret = -EINVAL;
+		goto out_put_spec;
+	}
+
+	if (!tx_chn && (thread_id & K3_PSIL_DST_THREAD_ID_OFFSET)) {
+		ret = -EINVAL;
+		goto out_put_spec;
+	}
+
+	/* get psil endpoint config */
+	ep_config = psil_get_ep_config(thread_id);
+	if (IS_ERR(ep_config)) {
+		dev_err(common->dev,
+			"No configuration for psi-l thread 0x%04x\n",
+			thread_id);
+		ret = PTR_ERR(ep_config);
+		goto out_put_spec;
+	}
+
+	common->epib = ep_config->needs_epib;
+	common->psdata_size = ep_config->psd_size;
+
+	if (tx_chn)
+		common->dst_thread = thread_id;
+	else
+		common->src_thread = thread_id;
+
+	ret = of_k3_udma_glue_parse(dma_spec.np, common);
+
+out_put_spec:
+	of_node_put(dma_spec.np);
+	return ret;
+};
+
+static void k3_udma_glue_dump_tx_chn(struct k3_udma_glue_tx_channel *tx_chn)
+{
+	struct device *dev = tx_chn->common.dev;
+
+	dev_dbg(dev, "dump_tx_chn:\n"
+		"udma_tchan_id: %d\n"
+		"src_thread: %08x\n"
+		"dst_thread: %08x\n",
+		tx_chn->udma_tchan_id,
+		tx_chn->common.src_thread,
+		tx_chn->common.dst_thread);
+}
+
+static void k3_udma_glue_dump_tx_rt_chn(struct k3_udma_glue_tx_channel *chn,
+					char *mark)
+{
+	struct device *dev = chn->common.dev;
+
+	dev_dbg(dev, "=== dump ===> %s\n", mark);
+	dev_dbg(dev, "0x%08X: %08X\n", UDMA_TCHAN_RT_CTL_REG,
+		xudma_tchanrt_read(chn->udma_tchanx, UDMA_TCHAN_RT_CTL_REG));
+	dev_dbg(dev, "0x%08X: %08X\n", UDMA_TCHAN_RT_PEER_RT_EN_REG,
+		xudma_tchanrt_read(chn->udma_tchanx,
+				   UDMA_TCHAN_RT_PEER_RT_EN_REG));
+	dev_dbg(dev, "0x%08X: %08X\n", UDMA_TCHAN_RT_PCNT_REG,
+		xudma_tchanrt_read(chn->udma_tchanx, UDMA_TCHAN_RT_PCNT_REG));
+	dev_dbg(dev, "0x%08X: %08X\n", UDMA_TCHAN_RT_BCNT_REG,
+		xudma_tchanrt_read(chn->udma_tchanx, UDMA_TCHAN_RT_BCNT_REG));
+	dev_dbg(dev, "0x%08X: %08X\n", UDMA_TCHAN_RT_SBCNT_REG,
+		xudma_tchanrt_read(chn->udma_tchanx, UDMA_TCHAN_RT_SBCNT_REG));
+}
+
+static int k3_udma_glue_cfg_tx_chn(struct k3_udma_glue_tx_channel *tx_chn)
+{
+	const struct udma_tisci_rm *tisci_rm = tx_chn->common.tisci_rm;
+	struct ti_sci_msg_rm_udmap_tx_ch_cfg req;
+
+	memset(&req, 0, sizeof(req));
+
+	req.valid_params = TI_SCI_MSG_VALUE_RM_UDMAP_CH_PAUSE_ON_ERR_VALID |
+			TI_SCI_MSG_VALUE_RM_UDMAP_CH_TX_FILT_EINFO_VALID |
+			TI_SCI_MSG_VALUE_RM_UDMAP_CH_TX_FILT_PSWORDS_VALID |
+			TI_SCI_MSG_VALUE_RM_UDMAP_CH_CHAN_TYPE_VALID |
+			TI_SCI_MSG_VALUE_RM_UDMAP_CH_TX_SUPR_TDPKT_VALID |
+			TI_SCI_MSG_VALUE_RM_UDMAP_CH_FETCH_SIZE_VALID |
+			TI_SCI_MSG_VALUE_RM_UDMAP_CH_CQ_QNUM_VALID;
+	req.nav_id = tisci_rm->tisci_dev_id;
+	req.index = tx_chn->udma_tchan_id;
+	if (tx_chn->tx_pause_on_err)
+		req.tx_pause_on_err = 1;
+	if (tx_chn->tx_filt_einfo)
+		req.tx_filt_einfo = 1;
+	if (tx_chn->tx_filt_pswords)
+		req.tx_filt_pswords = 1;
+	req.tx_chan_type = TI_SCI_RM_UDMAP_CHAN_TYPE_PKT_PBRR;
+	if (tx_chn->tx_supr_tdpkt)
+		req.tx_supr_tdpkt = 1;
+	req.tx_fetch_size = tx_chn->common.hdesc_size >> 2;
+	req.txcq_qnum = k3_ringacc_get_ring_id(tx_chn->ringtxcq);
+
+	return tisci_rm->tisci_udmap_ops->tx_ch_cfg(tisci_rm->tisci, &req);
+}
+
+struct k3_udma_glue_tx_channel *k3_udma_glue_request_tx_chn(struct device *dev,
+		const char *name, struct k3_udma_glue_tx_channel_cfg *cfg)
+{
+	struct k3_udma_glue_tx_channel *tx_chn;
+	int ret;
+
+	tx_chn = devm_kzalloc(dev, sizeof(*tx_chn), GFP_KERNEL);
+	if (!tx_chn)
+		return ERR_PTR(-ENOMEM);
+
+	tx_chn->common.dev = dev;
+	tx_chn->common.swdata_size = cfg->swdata_size;
+	tx_chn->tx_pause_on_err = cfg->tx_pause_on_err;
+	tx_chn->tx_filt_einfo = cfg->tx_filt_einfo;
+	tx_chn->tx_filt_pswords = cfg->tx_filt_pswords;
+	tx_chn->tx_supr_tdpkt = cfg->tx_supr_tdpkt;
+
+	/* parse of udmap channel */
+	ret = of_k3_udma_glue_parse_chn(dev->of_node, name,
+					&tx_chn->common, true);
+	if (ret)
+		goto err;
+
+	tx_chn->common.hdesc_size = cppi5_hdesc_calc_size(tx_chn->common.epib,
+						tx_chn->common.psdata_size,
+						tx_chn->common.swdata_size);
+
+	/* request and cfg UDMAP TX channel */
+	tx_chn->udma_tchanx = xudma_tchan_get(tx_chn->common.udmax, -1);
+	if (IS_ERR(tx_chn->udma_tchanx)) {
+		ret = PTR_ERR(tx_chn->udma_tchanx);
+		dev_err(dev, "UDMAX tchanx get err %d\n", ret);
+		goto err;
+	}
+	tx_chn->udma_tchan_id = xudma_tchan_get_id(tx_chn->udma_tchanx);
+
+	atomic_set(&tx_chn->free_pkts, cfg->txcq_cfg.size);
+
+	/* request and cfg rings */
+	tx_chn->ringtx = k3_ringacc_request_ring(tx_chn->common.ringacc,
+						 tx_chn->udma_tchan_id, 0);
+	if (!tx_chn->ringtx) {
+		ret = -ENODEV;
+		dev_err(dev, "Failed to get TX ring %u\n",
+			tx_chn->udma_tchan_id);
+		goto err;
+	}
+
+	tx_chn->ringtxcq = k3_ringacc_request_ring(tx_chn->common.ringacc,
+						   -1, 0);
+	if (!tx_chn->ringtxcq) {
+		ret = -ENODEV;
+		dev_err(dev, "Failed to get TXCQ ring\n");
+		goto err;
+	}
+
+	ret = k3_ringacc_ring_cfg(tx_chn->ringtx, &cfg->tx_cfg);
+	if (ret) {
+		dev_err(dev, "Failed to cfg ringtx %d\n", ret);
+		goto err;
+	}
+
+	ret = k3_ringacc_ring_cfg(tx_chn->ringtxcq, &cfg->txcq_cfg);
+	if (ret) {
+		dev_err(dev, "Failed to cfg ringtx %d\n", ret);
+		goto err;
+	}
+
+	/* request and cfg psi-l */
+	tx_chn->common.src_thread =
+			xudma_dev_get_psil_base(tx_chn->common.udmax) +
+			tx_chn->udma_tchan_id;
+
+	ret = k3_udma_glue_cfg_tx_chn(tx_chn);
+	if (ret) {
+		dev_err(dev, "Failed to cfg tchan %d\n", ret);
+		goto err;
+	}
+
+	ret = xudma_navss_psil_pair(tx_chn->common.udmax,
+				    tx_chn->common.src_thread,
+				    tx_chn->common.dst_thread);
+	if (ret) {
+		dev_err(dev, "PSI-L request err %d\n", ret);
+		goto err;
+	}
+
+	tx_chn->psil_paired = true;
+
+	/* reset TX RT registers */
+	k3_udma_glue_disable_tx_chn(tx_chn);
+
+	k3_udma_glue_dump_tx_chn(tx_chn);
+
+	return tx_chn;
+
+err:
+	k3_udma_glue_release_tx_chn(tx_chn);
+	return ERR_PTR(ret);
+}
+EXPORT_SYMBOL_GPL(k3_udma_glue_request_tx_chn);
+
+void k3_udma_glue_release_tx_chn(struct k3_udma_glue_tx_channel *tx_chn)
+{
+	if (tx_chn->psil_paired) {
+		xudma_navss_psil_unpair(tx_chn->common.udmax,
+					tx_chn->common.src_thread,
+					tx_chn->common.dst_thread);
+		tx_chn->psil_paired = false;
+	}
+
+	if (!IS_ERR_OR_NULL(tx_chn->udma_tchanx))
+		xudma_tchan_put(tx_chn->common.udmax,
+				tx_chn->udma_tchanx);
+
+	if (tx_chn->ringtxcq)
+		k3_ringacc_ring_free(tx_chn->ringtxcq);
+
+	if (tx_chn->ringtx)
+		k3_ringacc_ring_free(tx_chn->ringtx);
+}
+EXPORT_SYMBOL_GPL(k3_udma_glue_release_tx_chn);
+
+int k3_udma_glue_push_tx_chn(struct k3_udma_glue_tx_channel *tx_chn,
+			     struct cppi5_host_desc_t *desc_tx,
+			     dma_addr_t desc_dma)
+{
+	u32 ringtxcq_id;
+
+	if (!atomic_add_unless(&tx_chn->free_pkts, -1, 0))
+		return -ENOMEM;
+
+	ringtxcq_id = k3_ringacc_get_ring_id(tx_chn->ringtxcq);
+	cppi5_desc_set_retpolicy(&desc_tx->hdr, 0, ringtxcq_id);
+
+	return k3_ringacc_ring_push(tx_chn->ringtx, &desc_dma);
+}
+EXPORT_SYMBOL_GPL(k3_udma_glue_push_tx_chn);
+
+int k3_udma_glue_pop_tx_chn(struct k3_udma_glue_tx_channel *tx_chn,
+			    dma_addr_t *desc_dma)
+{
+	int ret;
+
+	ret = k3_ringacc_ring_pop(tx_chn->ringtxcq, desc_dma);
+	if (!ret)
+		atomic_inc(&tx_chn->free_pkts);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(k3_udma_glue_pop_tx_chn);
+
+int k3_udma_glue_enable_tx_chn(struct k3_udma_glue_tx_channel *tx_chn)
+{
+	u32 txrt_ctl;
+
+	txrt_ctl = UDMA_PEER_RT_EN_ENABLE;
+	xudma_tchanrt_write(tx_chn->udma_tchanx,
+			    UDMA_TCHAN_RT_PEER_RT_EN_REG,
+			    txrt_ctl);
+
+	txrt_ctl = xudma_tchanrt_read(tx_chn->udma_tchanx,
+				      UDMA_TCHAN_RT_CTL_REG);
+	txrt_ctl |= UDMA_CHAN_RT_CTL_EN;
+	xudma_tchanrt_write(tx_chn->udma_tchanx, UDMA_TCHAN_RT_CTL_REG,
+			    txrt_ctl);
+
+	k3_udma_glue_dump_tx_rt_chn(tx_chn, "txchn en");
+	return 0;
+}
+EXPORT_SYMBOL_GPL(k3_udma_glue_enable_tx_chn);
+
+void k3_udma_glue_disable_tx_chn(struct k3_udma_glue_tx_channel *tx_chn)
+{
+	k3_udma_glue_dump_tx_rt_chn(tx_chn, "txchn dis1");
+
+	xudma_tchanrt_write(tx_chn->udma_tchanx, UDMA_TCHAN_RT_CTL_REG, 0);
+
+	xudma_tchanrt_write(tx_chn->udma_tchanx,
+			    UDMA_TCHAN_RT_PEER_RT_EN_REG, 0);
+	k3_udma_glue_dump_tx_rt_chn(tx_chn, "txchn dis2");
+}
+EXPORT_SYMBOL_GPL(k3_udma_glue_disable_tx_chn);
+
+void k3_udma_glue_tdown_tx_chn(struct k3_udma_glue_tx_channel *tx_chn,
+			       bool sync)
+{
+	int i = 0;
+	u32 val;
+
+	k3_udma_glue_dump_tx_rt_chn(tx_chn, "txchn tdown1");
+
+	xudma_tchanrt_write(tx_chn->udma_tchanx, UDMA_TCHAN_RT_CTL_REG,
+			    UDMA_CHAN_RT_CTL_EN | UDMA_CHAN_RT_CTL_TDOWN);
+
+	val = xudma_tchanrt_read(tx_chn->udma_tchanx, UDMA_TCHAN_RT_CTL_REG);
+
+	while (sync && (val & UDMA_CHAN_RT_CTL_EN)) {
+		val = xudma_tchanrt_read(tx_chn->udma_tchanx,
+					 UDMA_TCHAN_RT_CTL_REG);
+		udelay(1);
+		if (i > K3_UDMAX_TDOWN_TIMEOUT_US) {
+			dev_err(tx_chn->common.dev, "TX tdown timeout\n");
+			break;
+		}
+		i++;
+	}
+
+	val = xudma_tchanrt_read(tx_chn->udma_tchanx,
+				 UDMA_TCHAN_RT_PEER_RT_EN_REG);
+	if (sync && (val & UDMA_PEER_RT_EN_ENABLE))
+		dev_err(tx_chn->common.dev, "TX tdown peer not stopped\n");
+	k3_udma_glue_dump_tx_rt_chn(tx_chn, "txchn tdown2");
+}
+EXPORT_SYMBOL_GPL(k3_udma_glue_tdown_tx_chn);
+
+void k3_udma_glue_reset_tx_chn(struct k3_udma_glue_tx_channel *tx_chn,
+			       void *data,
+			       void (*cleanup)(void *data, dma_addr_t desc_dma))
+{
+	dma_addr_t desc_dma;
+	int occ_tx, i, ret;
+
+	/* reset TXCQ as it is not input for udma - expected to be empty */
+	if (tx_chn->ringtxcq)
+		k3_ringacc_ring_reset(tx_chn->ringtxcq);
+
+	/*
+	 * TXQ reset need to be special way as it is input for udma and its
+	 * state cached by udma, so:
+	 * 1) save TXQ occ
+	 * 2) clean up TXQ and call callback .cleanup() for each desc
+	 * 3) reset TXQ in a special way
+	 */
+	occ_tx = k3_ringacc_ring_get_occ(tx_chn->ringtx);
+	dev_dbg(tx_chn->common.dev, "TX reset occ_tx %u\n", occ_tx);
+
+	for (i = 0; i < occ_tx; i++) {
+		ret = k3_ringacc_ring_pop(tx_chn->ringtx, &desc_dma);
+		if (ret) {
+			dev_err(tx_chn->common.dev, "TX reset pop %d\n", ret);
+			break;
+		}
+		cleanup(data, desc_dma);
+	}
+
+	k3_ringacc_ring_reset_dma(tx_chn->ringtx, occ_tx);
+}
+EXPORT_SYMBOL_GPL(k3_udma_glue_reset_tx_chn);
+
+u32 k3_udma_glue_tx_get_hdesc_size(struct k3_udma_glue_tx_channel *tx_chn)
+{
+	return tx_chn->common.hdesc_size;
+}
+EXPORT_SYMBOL_GPL(k3_udma_glue_tx_get_hdesc_size);
+
+u32 k3_udma_glue_tx_get_txcq_id(struct k3_udma_glue_tx_channel *tx_chn)
+{
+	return k3_ringacc_get_ring_id(tx_chn->ringtxcq);
+}
+EXPORT_SYMBOL_GPL(k3_udma_glue_tx_get_txcq_id);
+
+int k3_udma_glue_tx_get_irq(struct k3_udma_glue_tx_channel *tx_chn)
+{
+	tx_chn->virq = k3_ringacc_get_ring_irq_num(tx_chn->ringtxcq);
+
+	return tx_chn->virq;
+}
+EXPORT_SYMBOL_GPL(k3_udma_glue_tx_get_irq);
+
+static int k3_udma_glue_cfg_rx_chn(struct k3_udma_glue_rx_channel *rx_chn)
+{
+	const struct udma_tisci_rm *tisci_rm = rx_chn->common.tisci_rm;
+	struct ti_sci_msg_rm_udmap_rx_ch_cfg req;
+	int ret;
+
+	memset(&req, 0, sizeof(req));
+
+	req.valid_params = TI_SCI_MSG_VALUE_RM_UDMAP_CH_FETCH_SIZE_VALID |
+			   TI_SCI_MSG_VALUE_RM_UDMAP_CH_CQ_QNUM_VALID |
+			   TI_SCI_MSG_VALUE_RM_UDMAP_CH_CHAN_TYPE_VALID |
+			   TI_SCI_MSG_VALUE_RM_UDMAP_CH_RX_FLOWID_START_VALID |
+			   TI_SCI_MSG_VALUE_RM_UDMAP_CH_RX_FLOWID_CNT_VALID;
+
+	req.nav_id = tisci_rm->tisci_dev_id;
+	req.index = rx_chn->udma_rchan_id;
+	req.rx_fetch_size = rx_chn->common.hdesc_size >> 2;
+	/*
+	 * TODO: we can't support rxcq_qnum/RCHAN[a]_RCQ cfg with current sysfw
+	 * and udmax impl, so just configure it to invalid value.
+	 * req.rxcq_qnum = k3_ringacc_get_ring_id(rx_chn->flows[0].ringrx);
+	 */
+	req.rxcq_qnum = 0xFFFF;
+	if (rx_chn->flow_num && rx_chn->flow_id_base != rx_chn->udma_rchan_id) {
+		/* Default flow + extra ones */
+		req.flowid_start = rx_chn->flow_id_base;
+		req.flowid_cnt = rx_chn->flow_num;
+	}
+	req.rx_chan_type = TI_SCI_RM_UDMAP_CHAN_TYPE_PKT_PBRR;
+
+	ret = tisci_rm->tisci_udmap_ops->rx_ch_cfg(tisci_rm->tisci, &req);
+	if (ret)
+		dev_err(rx_chn->common.dev, "rchan%d cfg failed %d\n",
+			rx_chn->udma_rchan_id, ret);
+
+	return ret;
+}
+
+static void k3_udma_glue_release_rx_flow(struct k3_udma_glue_rx_channel *rx_chn,
+					 u32 flow_num)
+{
+	struct k3_udma_glue_rx_flow *flow = &rx_chn->flows[flow_num];
+
+	if (IS_ERR_OR_NULL(flow->udma_rflow))
+		return;
+
+	if (flow->ringrxfdq)
+		k3_ringacc_ring_free(flow->ringrxfdq);
+
+	if (flow->ringrx)
+		k3_ringacc_ring_free(flow->ringrx);
+
+	xudma_rflow_put(rx_chn->common.udmax, flow->udma_rflow);
+	flow->udma_rflow = NULL;
+	rx_chn->flows_ready--;
+}
+
+static int k3_udma_glue_cfg_rx_flow(struct k3_udma_glue_rx_channel *rx_chn,
+				    u32 flow_idx,
+				    struct k3_udma_glue_rx_flow_cfg *flow_cfg)
+{
+	struct k3_udma_glue_rx_flow *flow = &rx_chn->flows[flow_idx];
+	const struct udma_tisci_rm *tisci_rm = rx_chn->common.tisci_rm;
+	struct device *dev = rx_chn->common.dev;
+	struct ti_sci_msg_rm_udmap_flow_cfg req;
+	int rx_ring_id;
+	int rx_ringfdq_id;
+	int ret = 0;
+
+	flow->udma_rflow = xudma_rflow_get(rx_chn->common.udmax,
+					   flow->udma_rflow_id);
+	if (IS_ERR(flow->udma_rflow)) {
+		ret = PTR_ERR(flow->udma_rflow);
+		dev_err(dev, "UDMAX rflow get err %d\n", ret);
+		goto err;
+	}
+
+	if (flow->udma_rflow_id != xudma_rflow_get_id(flow->udma_rflow)) {
+		xudma_rflow_put(rx_chn->common.udmax, flow->udma_rflow);
+		return -ENODEV;
+	}
+
+	/* request and cfg rings */
+	flow->ringrx = k3_ringacc_request_ring(rx_chn->common.ringacc,
+					       flow_cfg->ring_rxq_id, 0);
+	if (!flow->ringrx) {
+		ret = -ENODEV;
+		dev_err(dev, "Failed to get RX ring\n");
+		goto err;
+	}
+
+	flow->ringrxfdq = k3_ringacc_request_ring(rx_chn->common.ringacc,
+						  flow_cfg->ring_rxfdq0_id, 0);
+	if (!flow->ringrxfdq) {
+		ret = -ENODEV;
+		dev_err(dev, "Failed to get RXFDQ ring\n");
+		goto err;
+	}
+
+	ret = k3_ringacc_ring_cfg(flow->ringrx, &flow_cfg->rx_cfg);
+	if (ret) {
+		dev_err(dev, "Failed to cfg ringrx %d\n", ret);
+		goto err;
+	}
+
+	ret = k3_ringacc_ring_cfg(flow->ringrxfdq, &flow_cfg->rxfdq_cfg);
+	if (ret) {
+		dev_err(dev, "Failed to cfg ringrxfdq %d\n", ret);
+		goto err;
+	}
+
+	if (rx_chn->remote) {
+		rx_ring_id = TI_SCI_RESOURCE_NULL;
+		rx_ringfdq_id = TI_SCI_RESOURCE_NULL;
+	} else {
+		rx_ring_id = k3_ringacc_get_ring_id(flow->ringrx);
+		rx_ringfdq_id = k3_ringacc_get_ring_id(flow->ringrxfdq);
+	}
+
+	memset(&req, 0, sizeof(req));
+
+	req.valid_params =
+			TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_EINFO_PRESENT_VALID |
+			TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_PSINFO_PRESENT_VALID |
+			TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_ERROR_HANDLING_VALID |
+			TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_DESC_TYPE_VALID |
+			TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_DEST_QNUM_VALID |
+			TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_SRC_TAG_HI_SEL_VALID |
+			TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_SRC_TAG_LO_SEL_VALID |
+			TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_DEST_TAG_HI_SEL_VALID |
+			TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_DEST_TAG_LO_SEL_VALID |
+			TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_FDQ0_SZ0_QNUM_VALID |
+			TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_FDQ1_QNUM_VALID |
+			TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_FDQ2_QNUM_VALID |
+			TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_FDQ3_QNUM_VALID;
+	req.nav_id = tisci_rm->tisci_dev_id;
+	req.flow_index = flow->udma_rflow_id;
+	if (rx_chn->common.epib)
+		req.rx_einfo_present = 1;
+	if (rx_chn->common.psdata_size)
+		req.rx_psinfo_present = 1;
+	if (flow_cfg->rx_error_handling)
+		req.rx_error_handling = 1;
+	req.rx_desc_type = 0;
+	req.rx_dest_qnum = rx_ring_id;
+	req.rx_src_tag_hi_sel = 0;
+	req.rx_src_tag_lo_sel = flow_cfg->src_tag_lo_sel;
+	req.rx_dest_tag_hi_sel = 0;
+	req.rx_dest_tag_lo_sel = 0;
+	req.rx_fdq0_sz0_qnum = rx_ringfdq_id;
+	req.rx_fdq1_qnum = rx_ringfdq_id;
+	req.rx_fdq2_qnum = rx_ringfdq_id;
+	req.rx_fdq3_qnum = rx_ringfdq_id;
+
+	ret = tisci_rm->tisci_udmap_ops->rx_flow_cfg(tisci_rm->tisci, &req);
+	if (ret) {
+		dev_err(dev, "flow%d config failed: %d\n", flow->udma_rflow_id,
+			ret);
+		goto err;
+	}
+
+	rx_chn->flows_ready++;
+	dev_dbg(dev, "flow%d config done. ready:%d\n",
+		flow->udma_rflow_id, rx_chn->flows_ready);
+
+	return 0;
+err:
+	k3_udma_glue_release_rx_flow(rx_chn, flow_idx);
+	return ret;
+}
+
+static void k3_udma_glue_dump_rx_chn(struct k3_udma_glue_rx_channel *chn)
+{
+	struct device *dev = chn->common.dev;
+
+	dev_dbg(dev, "dump_rx_chn:\n"
+		"udma_rchan_id: %d\n"
+		"src_thread: %08x\n"
+		"dst_thread: %08x\n"
+		"epib: %d\n"
+		"hdesc_size: %u\n"
+		"psdata_size: %u\n"
+		"swdata_size: %u\n"
+		"flow_id_base: %d\n"
+		"flow_num: %d\n",
+		chn->udma_rchan_id,
+		chn->common.src_thread,
+		chn->common.dst_thread,
+		chn->common.epib,
+		chn->common.hdesc_size,
+		chn->common.psdata_size,
+		chn->common.swdata_size,
+		chn->flow_id_base,
+		chn->flow_num);
+}
+
+static void k3_udma_glue_dump_rx_rt_chn(struct k3_udma_glue_rx_channel *chn,
+					char *mark)
+{
+	struct device *dev = chn->common.dev;
+
+	dev_dbg(dev, "=== dump ===> %s\n", mark);
+
+	dev_dbg(dev, "0x%08X: %08X\n", UDMA_RCHAN_RT_CTL_REG,
+		xudma_rchanrt_read(chn->udma_rchanx, UDMA_RCHAN_RT_CTL_REG));
+	dev_dbg(dev, "0x%08X: %08X\n", UDMA_RCHAN_RT_PEER_RT_EN_REG,
+		xudma_rchanrt_read(chn->udma_rchanx,
+				   UDMA_RCHAN_RT_PEER_RT_EN_REG));
+	dev_dbg(dev, "0x%08X: %08X\n", UDMA_RCHAN_RT_PCNT_REG,
+		xudma_rchanrt_read(chn->udma_rchanx, UDMA_RCHAN_RT_PCNT_REG));
+	dev_dbg(dev, "0x%08X: %08X\n", UDMA_RCHAN_RT_BCNT_REG,
+		xudma_rchanrt_read(chn->udma_rchanx, UDMA_RCHAN_RT_BCNT_REG));
+	dev_dbg(dev, "0x%08X: %08X\n", UDMA_RCHAN_RT_SBCNT_REG,
+		xudma_rchanrt_read(chn->udma_rchanx, UDMA_RCHAN_RT_SBCNT_REG));
+}
+
+static int
+k3_udma_glue_allocate_rx_flows(struct k3_udma_glue_rx_channel *rx_chn,
+			       struct k3_udma_glue_rx_channel_cfg *cfg)
+{
+	int ret;
+
+	/* default rflow */
+	if (cfg->flow_id_use_rxchan_id)
+		return 0;
+
+	/* not a GP rflows */
+	if (rx_chn->flow_id_base != -1 &&
+	    !xudma_rflow_is_gp(rx_chn->common.udmax, rx_chn->flow_id_base))
+		return 0;
+
+	/* Allocate range of GP rflows */
+	ret = xudma_alloc_gp_rflow_range(rx_chn->common.udmax,
+					 rx_chn->flow_id_base,
+					 rx_chn->flow_num);
+	if (ret < 0) {
+		dev_err(rx_chn->common.dev, "UDMAX reserve_rflow %d cnt:%d err: %d\n",
+			rx_chn->flow_id_base, rx_chn->flow_num, ret);
+		return ret;
+	}
+	rx_chn->flow_id_base = ret;
+
+	return 0;
+}
+
+static struct k3_udma_glue_rx_channel *
+k3_udma_glue_request_rx_chn_priv(struct device *dev, const char *name,
+				 struct k3_udma_glue_rx_channel_cfg *cfg)
+{
+	struct k3_udma_glue_rx_channel *rx_chn;
+	int ret, i;
+
+	if (cfg->flow_id_num <= 0)
+		return ERR_PTR(-EINVAL);
+
+	if (cfg->flow_id_num != 1 &&
+	    (cfg->def_flow_cfg || cfg->flow_id_use_rxchan_id))
+		return ERR_PTR(-EINVAL);
+
+	rx_chn = devm_kzalloc(dev, sizeof(*rx_chn), GFP_KERNEL);
+	if (!rx_chn)
+		return ERR_PTR(-ENOMEM);
+
+	rx_chn->common.dev = dev;
+	rx_chn->common.swdata_size = cfg->swdata_size;
+	rx_chn->remote = false;
+
+	/* parse of udmap channel */
+	ret = of_k3_udma_glue_parse_chn(dev->of_node, name,
+					&rx_chn->common, false);
+	if (ret)
+		goto err;
+
+	rx_chn->common.hdesc_size = cppi5_hdesc_calc_size(rx_chn->common.epib,
+						rx_chn->common.psdata_size,
+						rx_chn->common.swdata_size);
+
+	/* request and cfg UDMAP RX channel */
+	rx_chn->udma_rchanx = xudma_rchan_get(rx_chn->common.udmax, -1);
+	if (IS_ERR(rx_chn->udma_rchanx)) {
+		ret = PTR_ERR(rx_chn->udma_rchanx);
+		dev_err(dev, "UDMAX rchanx get err %d\n", ret);
+		goto err;
+	}
+	rx_chn->udma_rchan_id = xudma_rchan_get_id(rx_chn->udma_rchanx);
+
+	rx_chn->flow_num = cfg->flow_id_num;
+	rx_chn->flow_id_base = cfg->flow_id_base;
+
+	/* Use RX channel id as flow id: target dev can't generate flow_id */
+	if (cfg->flow_id_use_rxchan_id)
+		rx_chn->flow_id_base = rx_chn->udma_rchan_id;
+
+	rx_chn->flows = devm_kcalloc(dev, rx_chn->flow_num,
+				     sizeof(*rx_chn->flows), GFP_KERNEL);
+	if (!rx_chn->flows) {
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	ret = k3_udma_glue_allocate_rx_flows(rx_chn, cfg);
+	if (ret)
+		goto err;
+
+	for (i = 0; i < rx_chn->flow_num; i++)
+		rx_chn->flows[i].udma_rflow_id = rx_chn->flow_id_base + i;
+
+	/* request and cfg psi-l */
+	rx_chn->common.dst_thread =
+			xudma_dev_get_psil_base(rx_chn->common.udmax) +
+			rx_chn->udma_rchan_id;
+
+	ret = k3_udma_glue_cfg_rx_chn(rx_chn);
+	if (ret) {
+		dev_err(dev, "Failed to cfg rchan %d\n", ret);
+		goto err;
+	}
+
+	/* init default RX flow only if flow_num = 1 */
+	if (cfg->def_flow_cfg) {
+		ret = k3_udma_glue_cfg_rx_flow(rx_chn, 0, cfg->def_flow_cfg);
+		if (ret)
+			goto err;
+	}
+
+	ret = xudma_navss_psil_pair(rx_chn->common.udmax,
+				    rx_chn->common.src_thread,
+				    rx_chn->common.dst_thread);
+	if (ret) {
+		dev_err(dev, "PSI-L request err %d\n", ret);
+		goto err;
+	}
+
+	rx_chn->psil_paired = true;
+
+	/* reset RX RT registers */
+	k3_udma_glue_disable_rx_chn(rx_chn);
+
+	k3_udma_glue_dump_rx_chn(rx_chn);
+
+	return rx_chn;
+
+err:
+	k3_udma_glue_release_rx_chn(rx_chn);
+	return ERR_PTR(ret);
+}
+
+static struct k3_udma_glue_rx_channel *
+k3_udma_glue_request_remote_rx_chn(struct device *dev, const char *name,
+				   struct k3_udma_glue_rx_channel_cfg *cfg)
+{
+	struct k3_udma_glue_rx_channel *rx_chn;
+	int ret, i;
+
+	if (cfg->flow_id_num <= 0 ||
+	    cfg->flow_id_use_rxchan_id ||
+	    cfg->def_flow_cfg ||
+	    cfg->flow_id_base < 0)
+		return ERR_PTR(-EINVAL);
+
+	/*
+	 * Remote RX channel is under control of Remote CPU core, so
+	 * Linux can only request and manipulate by dedicated RX flows
+	 */
+
+	rx_chn = devm_kzalloc(dev, sizeof(*rx_chn), GFP_KERNEL);
+	if (!rx_chn)
+		return ERR_PTR(-ENOMEM);
+
+	rx_chn->common.dev = dev;
+	rx_chn->common.swdata_size = cfg->swdata_size;
+	rx_chn->remote = true;
+	rx_chn->udma_rchan_id = -1;
+	rx_chn->flow_num = cfg->flow_id_num;
+	rx_chn->flow_id_base = cfg->flow_id_base;
+	rx_chn->psil_paired = false;
+
+	/* parse of udmap channel */
+	ret = of_k3_udma_glue_parse_chn(dev->of_node, name,
+					&rx_chn->common, false);
+	if (ret)
+		goto err;
+
+	rx_chn->common.hdesc_size = cppi5_hdesc_calc_size(rx_chn->common.epib,
+						rx_chn->common.psdata_size,
+						rx_chn->common.swdata_size);
+
+	rx_chn->flows = devm_kcalloc(dev, rx_chn->flow_num,
+				     sizeof(*rx_chn->flows), GFP_KERNEL);
+	if (!rx_chn->flows) {
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	ret = k3_udma_glue_allocate_rx_flows(rx_chn, cfg);
+	if (ret)
+		goto err;
+
+	for (i = 0; i < rx_chn->flow_num; i++)
+		rx_chn->flows[i].udma_rflow_id = rx_chn->flow_id_base + i;
+
+	k3_udma_glue_dump_rx_chn(rx_chn);
+
+	return rx_chn;
+
+err:
+	k3_udma_glue_release_rx_chn(rx_chn);
+	return ERR_PTR(ret);
+}
+
+struct k3_udma_glue_rx_channel *
+k3_udma_glue_request_rx_chn(struct device *dev, const char *name,
+			    struct k3_udma_glue_rx_channel_cfg *cfg)
+{
+	if (cfg->remote)
+		return k3_udma_glue_request_remote_rx_chn(dev, name, cfg);
+	else
+		return k3_udma_glue_request_rx_chn_priv(dev, name, cfg);
+}
+EXPORT_SYMBOL_GPL(k3_udma_glue_request_rx_chn);
+
+void k3_udma_glue_release_rx_chn(struct k3_udma_glue_rx_channel *rx_chn)
+{
+	int i;
+
+	if (IS_ERR_OR_NULL(rx_chn->common.udmax))
+		return;
+
+	if (rx_chn->psil_paired) {
+		xudma_navss_psil_unpair(rx_chn->common.udmax,
+					rx_chn->common.src_thread,
+					rx_chn->common.dst_thread);
+		rx_chn->psil_paired = false;
+	}
+
+	for (i = 0; i < rx_chn->flow_num; i++)
+		k3_udma_glue_release_rx_flow(rx_chn, i);
+
+	if (xudma_rflow_is_gp(rx_chn->common.udmax, rx_chn->flow_id_base))
+		xudma_free_gp_rflow_range(rx_chn->common.udmax,
+					  rx_chn->flow_id_base,
+					  rx_chn->flow_num);
+
+	if (!IS_ERR_OR_NULL(rx_chn->udma_rchanx))
+		xudma_rchan_put(rx_chn->common.udmax,
+				rx_chn->udma_rchanx);
+}
+EXPORT_SYMBOL_GPL(k3_udma_glue_release_rx_chn);
+
+int k3_udma_glue_rx_flow_init(struct k3_udma_glue_rx_channel *rx_chn,
+			      u32 flow_idx,
+			      struct k3_udma_glue_rx_flow_cfg *flow_cfg)
+{
+	if (flow_idx >= rx_chn->flow_num)
+		return -EINVAL;
+
+	return k3_udma_glue_cfg_rx_flow(rx_chn, flow_idx, flow_cfg);
+}
+EXPORT_SYMBOL_GPL(k3_udma_glue_rx_flow_init);
+
+u32 k3_udma_glue_rx_flow_get_fdq_id(struct k3_udma_glue_rx_channel *rx_chn,
+				    u32 flow_idx)
+{
+	struct k3_udma_glue_rx_flow *flow;
+
+	if (flow_idx >= rx_chn->flow_num)
+		return -EINVAL;
+
+	flow = &rx_chn->flows[flow_idx];
+
+	return k3_ringacc_get_ring_id(flow->ringrxfdq);
+}
+EXPORT_SYMBOL_GPL(k3_udma_glue_rx_flow_get_fdq_id);
+
+u32 k3_udma_glue_rx_get_flow_id_base(struct k3_udma_glue_rx_channel *rx_chn)
+{
+	return rx_chn->flow_id_base;
+}
+EXPORT_SYMBOL_GPL(k3_udma_glue_rx_get_flow_id_base);
+
+int k3_udma_glue_rx_flow_enable(struct k3_udma_glue_rx_channel *rx_chn,
+				u32 flow_idx)
+{
+	struct k3_udma_glue_rx_flow *flow = &rx_chn->flows[flow_idx];
+	const struct udma_tisci_rm *tisci_rm = rx_chn->common.tisci_rm;
+	struct device *dev = rx_chn->common.dev;
+	struct ti_sci_msg_rm_udmap_flow_cfg req;
+	int rx_ring_id;
+	int rx_ringfdq_id;
+	int ret = 0;
+
+	if (!rx_chn->remote)
+		return -EINVAL;
+
+	rx_ring_id = k3_ringacc_get_ring_id(flow->ringrx);
+	rx_ringfdq_id = k3_ringacc_get_ring_id(flow->ringrxfdq);
+
+	memset(&req, 0, sizeof(req));
+
+	req.valid_params =
+			TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_DEST_QNUM_VALID |
+			TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_FDQ0_SZ0_QNUM_VALID |
+			TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_FDQ1_QNUM_VALID |
+			TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_FDQ2_QNUM_VALID |
+			TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_FDQ3_QNUM_VALID;
+	req.nav_id = tisci_rm->tisci_dev_id;
+	req.flow_index = flow->udma_rflow_id;
+	req.rx_dest_qnum = rx_ring_id;
+	req.rx_fdq0_sz0_qnum = rx_ringfdq_id;
+	req.rx_fdq1_qnum = rx_ringfdq_id;
+	req.rx_fdq2_qnum = rx_ringfdq_id;
+	req.rx_fdq3_qnum = rx_ringfdq_id;
+
+	ret = tisci_rm->tisci_udmap_ops->rx_flow_cfg(tisci_rm->tisci, &req);
+	if (ret) {
+		dev_err(dev, "flow%d enable failed: %d\n", flow->udma_rflow_id,
+			ret);
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(k3_udma_glue_rx_flow_enable);
+
+int k3_udma_glue_rx_flow_disable(struct k3_udma_glue_rx_channel *rx_chn,
+				 u32 flow_idx)
+{
+	struct k3_udma_glue_rx_flow *flow = &rx_chn->flows[flow_idx];
+	const struct udma_tisci_rm *tisci_rm = rx_chn->common.tisci_rm;
+	struct device *dev = rx_chn->common.dev;
+	struct ti_sci_msg_rm_udmap_flow_cfg req;
+	int ret = 0;
+
+	if (!rx_chn->remote)
+		return -EINVAL;
+
+	memset(&req, 0, sizeof(req));
+	req.valid_params =
+			TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_DEST_QNUM_VALID |
+			TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_FDQ0_SZ0_QNUM_VALID |
+			TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_FDQ1_QNUM_VALID |
+			TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_FDQ2_QNUM_VALID |
+			TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_FDQ3_QNUM_VALID;
+	req.nav_id = tisci_rm->tisci_dev_id;
+	req.flow_index = flow->udma_rflow_id;
+	req.rx_dest_qnum = TI_SCI_RESOURCE_NULL;
+	req.rx_fdq0_sz0_qnum = TI_SCI_RESOURCE_NULL;
+	req.rx_fdq1_qnum = TI_SCI_RESOURCE_NULL;
+	req.rx_fdq2_qnum = TI_SCI_RESOURCE_NULL;
+	req.rx_fdq3_qnum = TI_SCI_RESOURCE_NULL;
+
+	ret = tisci_rm->tisci_udmap_ops->rx_flow_cfg(tisci_rm->tisci, &req);
+	if (ret) {
+		dev_err(dev, "flow%d disable failed: %d\n", flow->udma_rflow_id,
+			ret);
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(k3_udma_glue_rx_flow_disable);
+
+int k3_udma_glue_enable_rx_chn(struct k3_udma_glue_rx_channel *rx_chn)
+{
+	u32 rxrt_ctl;
+
+	if (rx_chn->remote)
+		return -EINVAL;
+
+	if (rx_chn->flows_ready < rx_chn->flow_num)
+		return -EINVAL;
+
+	rxrt_ctl = xudma_rchanrt_read(rx_chn->udma_rchanx,
+				      UDMA_RCHAN_RT_CTL_REG);
+	rxrt_ctl |= UDMA_CHAN_RT_CTL_EN;
+	xudma_rchanrt_write(rx_chn->udma_rchanx, UDMA_RCHAN_RT_CTL_REG,
+			    rxrt_ctl);
+
+	xudma_rchanrt_write(rx_chn->udma_rchanx,
+			    UDMA_RCHAN_RT_PEER_RT_EN_REG,
+			    UDMA_PEER_RT_EN_ENABLE);
+
+	k3_udma_glue_dump_rx_rt_chn(rx_chn, "rxrt en");
+	return 0;
+}
+EXPORT_SYMBOL_GPL(k3_udma_glue_enable_rx_chn);
+
+void k3_udma_glue_disable_rx_chn(struct k3_udma_glue_rx_channel *rx_chn)
+{
+	k3_udma_glue_dump_rx_rt_chn(rx_chn, "rxrt dis1");
+
+	xudma_rchanrt_write(rx_chn->udma_rchanx,
+			    UDMA_RCHAN_RT_PEER_RT_EN_REG,
+			    0);
+	xudma_rchanrt_write(rx_chn->udma_rchanx, UDMA_RCHAN_RT_CTL_REG, 0);
+
+	k3_udma_glue_dump_rx_rt_chn(rx_chn, "rxrt dis2");
+}
+EXPORT_SYMBOL_GPL(k3_udma_glue_disable_rx_chn);
+
+void k3_udma_glue_tdown_rx_chn(struct k3_udma_glue_rx_channel *rx_chn,
+			       bool sync)
+{
+	int i = 0;
+	u32 val;
+
+	if (rx_chn->remote)
+		return;
+
+	k3_udma_glue_dump_rx_rt_chn(rx_chn, "rxrt tdown1");
+
+	xudma_rchanrt_write(rx_chn->udma_rchanx, UDMA_RCHAN_RT_PEER_RT_EN_REG,
+			    UDMA_PEER_RT_EN_ENABLE | UDMA_PEER_RT_EN_TEARDOWN);
+
+	val = xudma_rchanrt_read(rx_chn->udma_rchanx, UDMA_RCHAN_RT_CTL_REG);
+
+	while (sync && (val & UDMA_CHAN_RT_CTL_EN)) {
+		val = xudma_rchanrt_read(rx_chn->udma_rchanx,
+					 UDMA_RCHAN_RT_CTL_REG);
+		udelay(1);
+		if (i > K3_UDMAX_TDOWN_TIMEOUT_US) {
+			dev_err(rx_chn->common.dev, "RX tdown timeout\n");
+			break;
+		}
+		i++;
+	}
+
+	val = xudma_rchanrt_read(rx_chn->udma_rchanx,
+				 UDMA_RCHAN_RT_PEER_RT_EN_REG);
+	if (sync && (val & UDMA_PEER_RT_EN_ENABLE))
+		dev_err(rx_chn->common.dev, "TX tdown peer not stopped\n");
+	k3_udma_glue_dump_rx_rt_chn(rx_chn, "rxrt tdown2");
+}
+EXPORT_SYMBOL_GPL(k3_udma_glue_tdown_rx_chn);
+
+void k3_udma_glue_reset_rx_chn(struct k3_udma_glue_rx_channel *rx_chn,
+		u32 flow_num, void *data,
+		void (*cleanup)(void *data, dma_addr_t desc_dma), bool skip_fdq)
+{
+	struct k3_udma_glue_rx_flow *flow = &rx_chn->flows[flow_num];
+	struct device *dev = rx_chn->common.dev;
+	dma_addr_t desc_dma;
+	int occ_rx, i, ret;
+
+	/* reset RXCQ as it is not input for udma - expected to be empty */
+	occ_rx = k3_ringacc_ring_get_occ(flow->ringrx);
+	dev_dbg(dev, "RX reset flow %u occ_rx %u\n", flow_num, occ_rx);
+	if (flow->ringrx)
+		k3_ringacc_ring_reset(flow->ringrx);
+
+	/* Skip RX FDQ in case one FDQ is used for the set of flows */
+	if (skip_fdq)
+		return;
+
+	/*
+	 * RX FDQ reset need to be special way as it is input for udma and its
+	 * state cached by udma, so:
+	 * 1) save RX FDQ occ
+	 * 2) clean up RX FDQ and call callback .cleanup() for each desc
+	 * 3) reset RX FDQ in a special way
+	 */
+	occ_rx = k3_ringacc_ring_get_occ(flow->ringrxfdq);
+	dev_dbg(dev, "RX reset flow %u occ_rx_fdq %u\n", flow_num, occ_rx);
+
+	for (i = 0; i < occ_rx; i++) {
+		ret = k3_ringacc_ring_pop(flow->ringrxfdq, &desc_dma);
+		if (ret) {
+			dev_err(dev, "RX reset pop %d\n", ret);
+			break;
+		}
+		cleanup(data, desc_dma);
+	}
+
+	k3_ringacc_ring_reset_dma(flow->ringrxfdq, occ_rx);
+}
+EXPORT_SYMBOL_GPL(k3_udma_glue_reset_rx_chn);
+
+int k3_udma_glue_push_rx_chn(struct k3_udma_glue_rx_channel *rx_chn,
+			     u32 flow_num, struct cppi5_host_desc_t *desc_rx,
+			     dma_addr_t desc_dma)
+{
+	struct k3_udma_glue_rx_flow *flow = &rx_chn->flows[flow_num];
+
+	return k3_ringacc_ring_push(flow->ringrxfdq, &desc_dma);
+}
+EXPORT_SYMBOL_GPL(k3_udma_glue_push_rx_chn);
+
+int k3_udma_glue_pop_rx_chn(struct k3_udma_glue_rx_channel *rx_chn,
+			    u32 flow_num, dma_addr_t *desc_dma)
+{
+	struct k3_udma_glue_rx_flow *flow = &rx_chn->flows[flow_num];
+
+	return k3_ringacc_ring_pop(flow->ringrx, desc_dma);
+}
+EXPORT_SYMBOL_GPL(k3_udma_glue_pop_rx_chn);
+
+int k3_udma_glue_rx_get_irq(struct k3_udma_glue_rx_channel *rx_chn,
+			    u32 flow_num)
+{
+	struct k3_udma_glue_rx_flow *flow;
+
+	flow = &rx_chn->flows[flow_num];
+
+	flow->virq = k3_ringacc_get_ring_irq_num(flow->ringrx);
+
+	return flow->virq;
+}
+EXPORT_SYMBOL_GPL(k3_udma_glue_rx_get_irq);
diff --git a/drivers/dma/ti/k3-udma-private.c b/drivers/dma/ti/k3-udma-private.c
new file mode 100644
index 000000000000..0b8f3dd6b146
--- /dev/null
+++ b/drivers/dma/ti/k3-udma-private.c
@@ -0,0 +1,133 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *  Copyright (C) 2019 Texas Instruments Incorporated - http://www.ti.com
+ *  Author: Peter Ujfalusi <peter.ujfalusi@ti.com>
+ */
+
+int xudma_navss_psil_pair(struct udma_dev *ud, u32 src_thread, u32 dst_thread)
+{
+	return navss_psil_pair(ud, src_thread, dst_thread);
+}
+EXPORT_SYMBOL(xudma_navss_psil_pair);
+
+int xudma_navss_psil_unpair(struct udma_dev *ud, u32 src_thread, u32 dst_thread)
+{
+	return navss_psil_unpair(ud, src_thread, dst_thread);
+}
+EXPORT_SYMBOL(xudma_navss_psil_unpair);
+
+struct udma_dev *of_xudma_dev_get(struct device_node *np, const char *property)
+{
+	struct device_node *udma_node = np;
+	struct platform_device *pdev;
+	struct udma_dev *ud;
+
+	if (property) {
+		udma_node = of_parse_phandle(np, property, 0);
+		if (!udma_node) {
+			pr_err("UDMA node is not found\n");
+			return ERR_PTR(-ENODEV);
+		}
+	}
+
+	pdev = of_find_device_by_node(udma_node);
+	if (!pdev) {
+		pr_debug("UDMA device not found\n");
+		return ERR_PTR(-EPROBE_DEFER);
+	}
+
+	if (np != udma_node)
+		of_node_put(udma_node);
+
+	ud = platform_get_drvdata(pdev);
+	if (!ud) {
+		pr_debug("UDMA has not been probed\n");
+		return ERR_PTR(-EPROBE_DEFER);
+	}
+
+	return ud;
+}
+EXPORT_SYMBOL(of_xudma_dev_get);
+
+u32 xudma_dev_get_psil_base(struct udma_dev *ud)
+{
+	return ud->psil_base;
+}
+EXPORT_SYMBOL(xudma_dev_get_psil_base);
+
+struct udma_tisci_rm *xudma_dev_get_tisci_rm(struct udma_dev *ud)
+{
+	return &ud->tisci_rm;
+}
+EXPORT_SYMBOL(xudma_dev_get_tisci_rm);
+
+int xudma_alloc_gp_rflow_range(struct udma_dev *ud, int from, int cnt)
+{
+	return __udma_alloc_gp_rflow_range(ud, from, cnt);
+}
+EXPORT_SYMBOL(xudma_alloc_gp_rflow_range);
+
+int xudma_free_gp_rflow_range(struct udma_dev *ud, int from, int cnt)
+{
+	return __udma_free_gp_rflow_range(ud, from, cnt);
+}
+EXPORT_SYMBOL(xudma_free_gp_rflow_range);
+
+bool xudma_rflow_is_gp(struct udma_dev *ud, int id)
+{
+	return !test_bit(id, ud->rflow_gp_map);
+}
+EXPORT_SYMBOL(xudma_rflow_is_gp);
+
+#define XUDMA_GET_PUT_RESOURCE(res)					\
+struct udma_##res *xudma_##res##_get(struct udma_dev *ud, int id)	\
+{									\
+	return __udma_reserve_##res(ud, false, id);			\
+}									\
+EXPORT_SYMBOL(xudma_##res##_get);					\
+									\
+void xudma_##res##_put(struct udma_dev *ud, struct udma_##res *p)	\
+{									\
+	clear_bit(p->id, ud->res##_map);				\
+}									\
+EXPORT_SYMBOL(xudma_##res##_put)
+XUDMA_GET_PUT_RESOURCE(tchan);
+XUDMA_GET_PUT_RESOURCE(rchan);
+
+struct udma_rflow *xudma_rflow_get(struct udma_dev *ud, int id)
+{
+	return __udma_get_rflow(ud, id);
+}
+EXPORT_SYMBOL(xudma_rflow_get);
+
+void xudma_rflow_put(struct udma_dev *ud, struct udma_rflow *p)
+{
+	__udma_put_rflow(ud, p);
+}
+EXPORT_SYMBOL(xudma_rflow_put);
+
+#define XUDMA_GET_RESOURCE_ID(res)					\
+int xudma_##res##_get_id(struct udma_##res *p)				\
+{									\
+	return p->id;							\
+}									\
+EXPORT_SYMBOL(xudma_##res##_get_id)
+XUDMA_GET_RESOURCE_ID(tchan);
+XUDMA_GET_RESOURCE_ID(rchan);
+XUDMA_GET_RESOURCE_ID(rflow);
+
+/* Exported register access functions */
+#define XUDMA_RT_IO_FUNCTIONS(res)					\
+u32 xudma_##res##rt_read(struct udma_##res *p, int reg)			\
+{									\
+	return udma_##res##rt_read(p, reg);				\
+}									\
+EXPORT_SYMBOL(xudma_##res##rt_read);					\
+									\
+void xudma_##res##rt_write(struct udma_##res *p, int reg, u32 val)	\
+{									\
+	udma_##res##rt_write(p, reg, val);				\
+}									\
+EXPORT_SYMBOL(xudma_##res##rt_write)
+XUDMA_RT_IO_FUNCTIONS(tchan);
+XUDMA_RT_IO_FUNCTIONS(rchan);
diff --git a/drivers/dma/ti/k3-udma.c b/drivers/dma/ti/k3-udma.c
index 14f8f8c75d3a..9974e72cdc50 100644
--- a/drivers/dma/ti/k3-udma.c
+++ b/drivers/dma/ti/k3-udma.c
@@ -1090,6 +1090,64 @@ static irqreturn_t udma_udma_irq_handler(int irq, void *data)
 	return IRQ_HANDLED;
 }
 
+/**
+ * __udma_alloc_gp_rflow_range - alloc range of GP RX flows
+ * @ud: UDMA device
+ * @from: Start the search from this flow id number
+ * @cnt: Number of consecutive flow ids to allocate
+ *
+ * Allocate range of RX flow ids for future use, those flows can be requested
+ * only using explicit flow id number. if @from is set to -1 it will try to find
+ * first free range. if @from is positive value it will force allocation only
+ * of the specified range of flows.
+ *
+ * Returns -ENOMEM if can't find free range.
+ * -EEXIST if requested range is busy.
+ * -EINVAL if wrong input values passed.
+ * Returns flow id on success.
+ */
+static int __udma_alloc_gp_rflow_range(struct udma_dev *ud, int from, int cnt)
+{
+	int start, tmp_from;
+	DECLARE_BITMAP(tmp, K3_UDMA_MAX_RFLOWS);
+
+	tmp_from = from;
+	if (tmp_from < 0)
+		tmp_from = ud->rchan_cnt;
+	/* default flows can't be allocated and accessible only by id */
+	if (tmp_from < ud->rchan_cnt)
+		return -EINVAL;
+
+	if (tmp_from + cnt > ud->rflow_cnt)
+		return -EINVAL;
+
+	bitmap_or(tmp, ud->rflow_gp_map, ud->rflow_gp_map_allocated,
+		  ud->rflow_cnt);
+
+	start = bitmap_find_next_zero_area(tmp,
+					   ud->rflow_cnt,
+					   tmp_from, cnt, 0);
+	if (start >= ud->rflow_cnt)
+		return -ENOMEM;
+
+	if (from >= 0 && start != from)
+		return -EEXIST;
+
+	bitmap_set(ud->rflow_gp_map_allocated, start, cnt);
+	return start;
+}
+
+static int __udma_free_gp_rflow_range(struct udma_dev *ud, int from, int cnt)
+{
+	if (from < ud->rchan_cnt)
+		return -EINVAL;
+	if (from + cnt > ud->rflow_cnt)
+		return -EINVAL;
+
+	bitmap_clear(ud->rflow_gp_map_allocated, from, cnt);
+	return 0;
+}
+
 static struct udma_rflow *__udma_get_rflow(struct udma_dev *ud, int id)
 {
 	/*
@@ -2968,7 +3026,7 @@ static struct udma_match_data am654_main_data = {
 
 static struct udma_match_data am654_mcu_data = {
 	.psil_base = 0x6000,
-	.enable_memcpy_support = false, /* MEM_TO_MEM is slow via MCU UDMA */
+	.enable_memcpy_support = true, /* TEST: DMA domains */
 	.statictr_z_mask = GENMASK(11, 0),
 	.rchan_oes_offset = 0x2000,
 	.tpl_levels = 2,
@@ -3369,3 +3427,6 @@ static struct platform_driver udma_driver = {
 	.probe		= udma_probe,
 };
 builtin_platform_driver(udma_driver);
+
+/* Private interfaces to UDMA */
+#include "k3-udma-private.c"
diff --git a/drivers/dma/ti/k3-udma.h b/drivers/dma/ti/k3-udma.h
index ff428f823e67..128d8744a435 100644
--- a/drivers/dma/ti/k3-udma.h
+++ b/drivers/dma/ti/k3-udma.h
@@ -117,4 +117,35 @@ struct udma_tisci_rm {
 	struct ti_sci_resource *rm_ranges[RM_RANGE_LAST];
 };
 
+/* Direct access to UDMA low lever resources for the glue layer */
+int xudma_navss_psil_pair(struct udma_dev *ud, u32 src_thread, u32 dst_thread);
+int xudma_navss_psil_unpair(struct udma_dev *ud, u32 src_thread,
+			    u32 dst_thread);
+
+struct udma_dev *of_xudma_dev_get(struct device_node *np, const char *property);
+void xudma_dev_put(struct udma_dev *ud);
+u32 xudma_dev_get_psil_base(struct udma_dev *ud);
+struct udma_tisci_rm *xudma_dev_get_tisci_rm(struct udma_dev *ud);
+
+int xudma_alloc_gp_rflow_range(struct udma_dev *ud, int from, int cnt);
+int xudma_free_gp_rflow_range(struct udma_dev *ud, int from, int cnt);
+
+struct udma_tchan *xudma_tchan_get(struct udma_dev *ud, int id);
+struct udma_rchan *xudma_rchan_get(struct udma_dev *ud, int id);
+struct udma_rflow *xudma_rflow_get(struct udma_dev *ud, int id);
+
+void xudma_tchan_put(struct udma_dev *ud, struct udma_tchan *p);
+void xudma_rchan_put(struct udma_dev *ud, struct udma_rchan *p);
+void xudma_rflow_put(struct udma_dev *ud, struct udma_rflow *p);
+
+int xudma_tchan_get_id(struct udma_tchan *p);
+int xudma_rchan_get_id(struct udma_rchan *p);
+int xudma_rflow_get_id(struct udma_rflow *p);
+
+u32 xudma_tchanrt_read(struct udma_tchan *tchan, int reg);
+void xudma_tchanrt_write(struct udma_tchan *tchan, int reg, u32 val);
+u32 xudma_rchanrt_read(struct udma_rchan *rchan, int reg);
+void xudma_rchanrt_write(struct udma_rchan *rchan, int reg, u32 val);
+bool xudma_rflow_is_gp(struct udma_dev *ud, int id);
+
 #endif /* K3_UDMA_H_ */
-- 
cgit 


From 2bd8010a618c9181dd8d49097a1c72fd396c632e Mon Sep 17 00:00:00 2001
From: Peter Ujfalusi <peter.ujfalusi@ti.com>
Date: Tue, 21 Jan 2020 09:01:04 +0200
Subject: dmaengine: ti: k3-psil: make symbols static

Fixe the following warnings by making these static

drivers/dma/ti/k3-psil-j721e.c:62:16: warning: symbol 'j721e_src_ep_map' was not declared. Should it be static?
drivers/dma/ti/k3-psil-j721e.c:172:16: warning: symbol 'j721e_dst_ep_map' was not declared. Should it be static?
drivers/dma/ti/k3-psil-j721e.c:216:20: warning: symbol 'j721e_ep_map' was not declared. Should it be static?
  CC      drivers/dma/ti/k3-psil-j721e.o
drivers/dma/ti/k3-psil-am654.c:52:16: warning: symbol 'am654_src_ep_map' was not declared. Should it be static?
drivers/dma/ti/k3-psil-am654.c:127:16: warning: symbol 'am654_dst_ep_map' was not declared. Should it be static?
drivers/dma/ti/k3-psil-am654.c:169:20: warning: symbol 'am654_ep_map' was not declared. Should it be static?

Reported-by: Vinod Koul <vkoul@kernel.org>
Signed-off-by: Peter Ujfalusi <peter.ujfalusi@ti.com>
Link: https://lore.kernel.org/r/20200121070104.4393-1-peter.ujfalusi@ti.com
[vkoul: updated patch title]
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/ti/k3-psil-am654.c | 4 ++--
 drivers/dma/ti/k3-psil-j721e.c | 4 ++--
 drivers/dma/ti/k3-psil-priv.h  | 4 ++++
 drivers/dma/ti/k3-psil.c       | 3 ---
 4 files changed, 8 insertions(+), 7 deletions(-)

(limited to 'drivers/dma')

diff --git a/drivers/dma/ti/k3-psil-am654.c b/drivers/dma/ti/k3-psil-am654.c
index 7da9242b6114..a896a15908cf 100644
--- a/drivers/dma/ti/k3-psil-am654.c
+++ b/drivers/dma/ti/k3-psil-am654.c
@@ -49,7 +49,7 @@
 	}
 
 /* PSI-L source thread IDs, used for RX (DMA_DEV_TO_MEM) */
-struct psil_ep am654_src_ep_map[] = {
+static struct psil_ep am654_src_ep_map[] = {
 	/* SA2UL */
 	PSIL_SA2UL(0x4000, 0),
 	PSIL_SA2UL(0x4001, 0),
@@ -124,7 +124,7 @@ struct psil_ep am654_src_ep_map[] = {
 };
 
 /* PSI-L destination thread IDs, used for TX (DMA_MEM_TO_DEV) */
-struct psil_ep am654_dst_ep_map[] = {
+static struct psil_ep am654_dst_ep_map[] = {
 	/* SA2UL */
 	PSIL_SA2UL(0xc000, 1),
 	PSIL_SA2UL(0xc001, 1),
diff --git a/drivers/dma/ti/k3-psil-j721e.c b/drivers/dma/ti/k3-psil-j721e.c
index a609d496fddd..e3cfd5f66842 100644
--- a/drivers/dma/ti/k3-psil-j721e.c
+++ b/drivers/dma/ti/k3-psil-j721e.c
@@ -59,7 +59,7 @@
 	}
 
 /* PSI-L source thread IDs, used for RX (DMA_DEV_TO_MEM) */
-struct psil_ep j721e_src_ep_map[] = {
+static struct psil_ep j721e_src_ep_map[] = {
 	/* SA2UL */
 	PSIL_SA2UL(0x4000, 0),
 	PSIL_SA2UL(0x4001, 0),
@@ -169,7 +169,7 @@ struct psil_ep j721e_src_ep_map[] = {
 };
 
 /* PSI-L destination thread IDs, used for TX (DMA_MEM_TO_DEV) */
-struct psil_ep j721e_dst_ep_map[] = {
+static struct psil_ep j721e_dst_ep_map[] = {
 	/* SA2UL */
 	PSIL_SA2UL(0xc000, 1),
 	PSIL_SA2UL(0xc001, 1),
diff --git a/drivers/dma/ti/k3-psil-priv.h b/drivers/dma/ti/k3-psil-priv.h
index f74420653d8a..a1f389ca371e 100644
--- a/drivers/dma/ti/k3-psil-priv.h
+++ b/drivers/dma/ti/k3-psil-priv.h
@@ -36,4 +36,8 @@ struct psil_ep_map {
 
 struct psil_endpoint_config *psil_get_ep_config(u32 thread_id);
 
+/* SoC PSI-L endpoint maps */
+extern struct psil_ep_map am654_ep_map;
+extern struct psil_ep_map j721e_ep_map;
+
 #endif /* K3_PSIL_PRIV_H_ */
diff --git a/drivers/dma/ti/k3-psil.c b/drivers/dma/ti/k3-psil.c
index 9314cf9a52e4..d7b965049ccb 100644
--- a/drivers/dma/ti/k3-psil.c
+++ b/drivers/dma/ti/k3-psil.c
@@ -12,9 +12,6 @@
 
 #include "k3-psil-priv.h"
 
-extern struct psil_ep_map am654_ep_map;
-extern struct psil_ep_map j721e_ep_map;
-
 static DEFINE_MUTEX(ep_map_mutex);
 static struct psil_ep_map *soc_ep_map;
 
-- 
cgit 


From 4b048178854da11656596d36a107577d66fd1e08 Mon Sep 17 00:00:00 2001
From: Chen Zhou <chenzhou10@huawei.com>
Date: Mon, 20 Jan 2020 20:58:43 +0800
Subject: dmaengine: fsl-qdma: fix duplicated argument to &&

There is duplicated argument to && in function fsl_qdma_free_chan_resources,
which looks like a typo, pointer fsl_queue->desc_pool also needs NULL check,
fix it.
Detected with coccinelle.

Fixes: b092529e0aa0 ("dmaengine: fsl-qdma: Add qDMA controller driver for Layerscape SoCs")
Signed-off-by: Chen Zhou <chenzhou10@huawei.com>
Reviewed-by: Peng Ma <peng.ma@nxp.com>
Tested-by: Peng Ma <peng.ma@nxp.com>
Link: https://lore.kernel.org/r/20200120125843.34398-1-chenzhou10@huawei.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/fsl-qdma.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers/dma')

diff --git a/drivers/dma/fsl-qdma.c b/drivers/dma/fsl-qdma.c
index 89792083d62c..95cc0256b387 100644
--- a/drivers/dma/fsl-qdma.c
+++ b/drivers/dma/fsl-qdma.c
@@ -304,7 +304,7 @@ static void fsl_qdma_free_chan_resources(struct dma_chan *chan)
 
 	vchan_dma_desc_free_list(&fsl_chan->vchan, &head);
 
-	if (!fsl_queue->comp_pool && !fsl_queue->comp_pool)
+	if (!fsl_queue->comp_pool && !fsl_queue->desc_pool)
 		return;
 
 	list_for_each_entry_safe(comp_temp, _comp_temp,
-- 
cgit 


From ffc079a4accc2bd02758eb93cb8efcb3bfee1891 Mon Sep 17 00:00:00 2001
From: Stefan Mavrodiev <stefan@olimex.com>
Date: Fri, 10 Jan 2020 16:11:39 +0200
Subject: dmaengine: sun4i: Add support for cyclic requests with dedicated DMA

Currently the cyclic transfers can be used only with normal DMAs. They
can be used by pcm_dmaengine module, which is required for implementing
sound with sun4i-hdmi encoder. This is so because the controller can
accept audio only from a dedicated DMA.

This patch enables them, following the existing style for the
scatter/gather type transfers.

Signed-off-by: Stefan Mavrodiev <stefan@olimex.com>
Acked-by: Maxime Ripard <mripard@kernel.org>
Link: https://lore.kernel.org/r/20200110141140.28527-2-stefan@olimex.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/sun4i-dma.c | 45 ++++++++++++++++++++++++---------------------
 1 file changed, 24 insertions(+), 21 deletions(-)

(limited to 'drivers/dma')

diff --git a/drivers/dma/sun4i-dma.c b/drivers/dma/sun4i-dma.c
index 4e1575e731d8..bbc2bda3b902 100644
--- a/drivers/dma/sun4i-dma.c
+++ b/drivers/dma/sun4i-dma.c
@@ -669,43 +669,41 @@ sun4i_dma_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t buf, size_t len,
 	dma_addr_t src, dest;
 	u32 endpoints;
 	int nr_periods, offset, plength, i;
+	u8 ram_type, io_mode, linear_mode;
 
 	if (!is_slave_direction(dir)) {
 		dev_err(chan2dev(chan), "Invalid DMA direction\n");
 		return NULL;
 	}
 
-	if (vchan->is_dedicated) {
-		/*
-		 * As we are using this just for audio data, we need to use
-		 * normal DMA. There is nothing stopping us from supporting
-		 * dedicated DMA here as well, so if a client comes up and
-		 * requires it, it will be simple to implement it.
-		 */
-		dev_err(chan2dev(chan),
-			"Cyclic transfers are only supported on Normal DMA\n");
-		return NULL;
-	}
-
 	contract = generate_dma_contract();
 	if (!contract)
 		return NULL;
 
 	contract->is_cyclic = 1;
 
-	/* Figure out the endpoints and the address we need */
+	if (vchan->is_dedicated) {
+		io_mode = SUN4I_DDMA_ADDR_MODE_IO;
+		linear_mode = SUN4I_DDMA_ADDR_MODE_LINEAR;
+		ram_type = SUN4I_DDMA_DRQ_TYPE_SDRAM;
+	} else {
+		io_mode = SUN4I_NDMA_ADDR_MODE_IO;
+		linear_mode = SUN4I_NDMA_ADDR_MODE_LINEAR;
+		ram_type = SUN4I_NDMA_DRQ_TYPE_SDRAM;
+	}
+
 	if (dir == DMA_MEM_TO_DEV) {
 		src = buf;
 		dest = sconfig->dst_addr;
-		endpoints = SUN4I_DMA_CFG_SRC_DRQ_TYPE(SUN4I_NDMA_DRQ_TYPE_SDRAM) |
-			    SUN4I_DMA_CFG_DST_DRQ_TYPE(vchan->endpoint) |
-			    SUN4I_DMA_CFG_DST_ADDR_MODE(SUN4I_NDMA_ADDR_MODE_IO);
+		endpoints = SUN4I_DMA_CFG_DST_DRQ_TYPE(vchan->endpoint) |
+			    SUN4I_DMA_CFG_DST_ADDR_MODE(io_mode) |
+			    SUN4I_DMA_CFG_SRC_DRQ_TYPE(ram_type);
 	} else {
 		src = sconfig->src_addr;
 		dest = buf;
-		endpoints = SUN4I_DMA_CFG_SRC_DRQ_TYPE(vchan->endpoint) |
-			    SUN4I_DMA_CFG_SRC_ADDR_MODE(SUN4I_NDMA_ADDR_MODE_IO) |
-			    SUN4I_DMA_CFG_DST_DRQ_TYPE(SUN4I_NDMA_DRQ_TYPE_SDRAM);
+		endpoints = SUN4I_DMA_CFG_DST_DRQ_TYPE(ram_type) |
+			    SUN4I_DMA_CFG_SRC_DRQ_TYPE(vchan->endpoint) |
+			    SUN4I_DMA_CFG_SRC_ADDR_MODE(io_mode);
 	}
 
 	/*
@@ -747,8 +745,13 @@ sun4i_dma_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t buf, size_t len,
 			dest = buf + offset;
 
 		/* Make the promise */
-		promise = generate_ndma_promise(chan, src, dest,
-						plength, sconfig, dir);
+		if (vchan->is_dedicated)
+			promise = generate_ddma_promise(chan, src, dest,
+							plength, sconfig);
+		else
+			promise = generate_ndma_promise(chan, src, dest,
+							plength, sconfig, dir);
+
 		if (!promise) {
 			/* TODO: should we free everything? */
 			return NULL;
-- 
cgit 


From cc88525ebffc757e00cc5a5d61da6271646c7f5f Mon Sep 17 00:00:00 2001
From: Matthias Fend <matthias.fend@wolfvision.net>
Date: Wed, 15 Jan 2020 11:22:49 +0100
Subject: dmaengine: zynqmp_dma: fix burst length configuration

Since the dma engine expects the burst length register content as
power of 2 value, the burst length needs to be converted first.
Additionally add a burst length range check to avoid corrupting unrelated
register bits.

Signed-off-by: Matthias Fend <matthias.fend@wolfvision.net>
Link: https://lore.kernel.org/r/20200115102249.24398-1-matthias.fend@wolfvision.net
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/xilinx/zynqmp_dma.c | 24 +++++++++++++++---------
 1 file changed, 15 insertions(+), 9 deletions(-)

(limited to 'drivers/dma')

diff --git a/drivers/dma/xilinx/zynqmp_dma.c b/drivers/dma/xilinx/zynqmp_dma.c
index 9c845c07b107..d47749a35863 100644
--- a/drivers/dma/xilinx/zynqmp_dma.c
+++ b/drivers/dma/xilinx/zynqmp_dma.c
@@ -123,10 +123,12 @@
 /* Max transfer size per descriptor */
 #define ZYNQMP_DMA_MAX_TRANS_LEN	0x40000000
 
+/* Max burst lengths */
+#define ZYNQMP_DMA_MAX_DST_BURST_LEN    32768U
+#define ZYNQMP_DMA_MAX_SRC_BURST_LEN    32768U
+
 /* Reset values for data attributes */
 #define ZYNQMP_DMA_AXCACHE_VAL		0xF
-#define ZYNQMP_DMA_ARLEN_RST_VAL	0xF
-#define ZYNQMP_DMA_AWLEN_RST_VAL	0xF
 
 #define ZYNQMP_DMA_SRC_ISSUE_RST_VAL	0x1F
 
@@ -534,17 +536,19 @@ static void zynqmp_dma_handle_ovfl_int(struct zynqmp_dma_chan *chan, u32 status)
 
 static void zynqmp_dma_config(struct zynqmp_dma_chan *chan)
 {
-	u32 val;
+	u32 val, burst_val;
 
 	val = readl(chan->regs + ZYNQMP_DMA_CTRL0);
 	val |= ZYNQMP_DMA_POINT_TYPE_SG;
 	writel(val, chan->regs + ZYNQMP_DMA_CTRL0);
 
 	val = readl(chan->regs + ZYNQMP_DMA_DATA_ATTR);
+	burst_val = __ilog2_u32(chan->src_burst_len);
 	val = (val & ~ZYNQMP_DMA_ARLEN) |
-		(chan->src_burst_len << ZYNQMP_DMA_ARLEN_OFST);
+		((burst_val << ZYNQMP_DMA_ARLEN_OFST) & ZYNQMP_DMA_ARLEN);
+	burst_val = __ilog2_u32(chan->dst_burst_len);
 	val = (val & ~ZYNQMP_DMA_AWLEN) |
-		(chan->dst_burst_len << ZYNQMP_DMA_AWLEN_OFST);
+		((burst_val << ZYNQMP_DMA_AWLEN_OFST) & ZYNQMP_DMA_AWLEN);
 	writel(val, chan->regs + ZYNQMP_DMA_DATA_ATTR);
 }
 
@@ -560,8 +564,10 @@ static int zynqmp_dma_device_config(struct dma_chan *dchan,
 {
 	struct zynqmp_dma_chan *chan = to_chan(dchan);
 
-	chan->src_burst_len = config->src_maxburst;
-	chan->dst_burst_len = config->dst_maxburst;
+	chan->src_burst_len = clamp(config->src_maxburst, 1U,
+		ZYNQMP_DMA_MAX_SRC_BURST_LEN);
+	chan->dst_burst_len = clamp(config->dst_maxburst, 1U,
+		ZYNQMP_DMA_MAX_DST_BURST_LEN);
 
 	return 0;
 }
@@ -887,8 +893,8 @@ static int zynqmp_dma_chan_probe(struct zynqmp_dma_device *zdev,
 		return PTR_ERR(chan->regs);
 
 	chan->bus_width = ZYNQMP_DMA_BUS_WIDTH_64;
-	chan->dst_burst_len = ZYNQMP_DMA_AWLEN_RST_VAL;
-	chan->src_burst_len = ZYNQMP_DMA_ARLEN_RST_VAL;
+	chan->dst_burst_len = ZYNQMP_DMA_MAX_DST_BURST_LEN;
+	chan->src_burst_len = ZYNQMP_DMA_MAX_SRC_BURST_LEN;
 	err = of_property_read_u32(node, "xlnx,bus-width", &chan->bus_width);
 	if (err < 0) {
 		dev_err(&pdev->dev, "missing xlnx,bus-width property\n");
-- 
cgit 


From 69b1189ba2cd6643474312004f10685324e38f58 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Tue, 21 Jan 2020 10:33:09 +0100
Subject: dmaengine: Remove dma_device_satisfies_mask() wrapper

Commit aa1e6f1a385eb2b0 ("dmaengine: kill struct dma_client and
supporting infrastructure") removed the last user of the
dma_device_satisfies_mask() wrapper.

Remove the wrapper, and rename __dma_device_satisfies_mask() to
dma_device_satisfies_mask(), to get rid of one more function starting
with a double underscore.

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Link: https://lore.kernel.org/r/20200121093311.28639-2-geert+renesas@glider.be
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/dmaengine.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

(limited to 'drivers/dma')

diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
index 158aeb1b6a8a..7550dbdf5488 100644
--- a/drivers/dma/dmaengine.c
+++ b/drivers/dma/dmaengine.c
@@ -308,11 +308,8 @@ static void dma_channel_rebalance(void)
 		}
 }
 
-#define dma_device_satisfies_mask(device, mask) \
-	__dma_device_satisfies_mask((device), &(mask))
-static int
-__dma_device_satisfies_mask(struct dma_device *device,
-			    const dma_cap_mask_t *want)
+static int dma_device_satisfies_mask(struct dma_device *device,
+				     const dma_cap_mask_t *want)
 {
 	dma_cap_mask_t has;
 
@@ -531,7 +528,7 @@ static struct dma_chan *private_candidate(const dma_cap_mask_t *mask,
 {
 	struct dma_chan *chan;
 
-	if (mask && !__dma_device_satisfies_mask(dev, mask)) {
+	if (mask && !dma_device_satisfies_mask(dev, mask)) {
 		dev_dbg(dev->dev, "%s: wrong capabilities\n", __func__);
 		return NULL;
 	}
-- 
cgit 


From c3c431de99c068e3f64d01335c1532b22e4b1d1b Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Tue, 21 Jan 2020 10:33:11 +0100
Subject: dmaengine: Move dma_get_{,any_}slave_channel() to private dmaengine.h

The functions dma_get_slave_channel() and dma_get_any_slave_channel()
are called from DMA engine drivers only.  Hence move their declarations
from the public header file <linux/dmaengine.h> to the private header
file drivers/dma/dmaengine.h.

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Link: https://lore.kernel.org/r/20200121093311.28639-4-geert+renesas@glider.be
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/dmaengine.h | 3 +++
 drivers/dma/of-dma.c    | 2 ++
 2 files changed, 5 insertions(+)

(limited to 'drivers/dma')

diff --git a/drivers/dma/dmaengine.h b/drivers/dma/dmaengine.h
index b0b97475707a..e8a320c9e57c 100644
--- a/drivers/dma/dmaengine.h
+++ b/drivers/dma/dmaengine.h
@@ -179,4 +179,7 @@ dmaengine_desc_callback_valid(struct dmaengine_desc_callback *cb)
 	return (cb->callback) ? true : false;
 }
 
+struct dma_chan *dma_get_slave_channel(struct dma_chan *chan);
+struct dma_chan *dma_get_any_slave_channel(struct dma_device *device);
+
 #endif
diff --git a/drivers/dma/of-dma.c b/drivers/dma/of-dma.c
index c2d779daa4b5..b2c2b5e8093c 100644
--- a/drivers/dma/of-dma.c
+++ b/drivers/dma/of-dma.c
@@ -15,6 +15,8 @@
 #include <linux/of.h>
 #include <linux/of_dma.h>
 
+#include "dmaengine.h"
+
 static LIST_HEAD(of_dma_list);
 static DEFINE_MUTEX(of_dma_lock);
 
-- 
cgit 


From e606c8b9d751e593b71bdcb636ac3392c62c1c50 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Wed, 22 Jan 2020 23:52:37 +0000
Subject: dmaengine: s3c24xx-dma: fix spelling mistake "to" -> "too"

There is a spelling mistake in a dev_err message. Fix it.

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Link: https://lore.kernel.org/r/20200122235237.2830344-1-colin.king@canonical.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/s3c24xx-dma.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers/dma')

diff --git a/drivers/dma/s3c24xx-dma.c b/drivers/dma/s3c24xx-dma.c
index 1ed5dc1f597c..8e14c72d03f0 100644
--- a/drivers/dma/s3c24xx-dma.c
+++ b/drivers/dma/s3c24xx-dma.c
@@ -1198,7 +1198,7 @@ static int s3c24xx_dma_probe(struct platform_device *pdev)
 
 	/* Basic sanity check */
 	if (pdata->num_phy_channels > MAX_DMA_CHANNELS) {
-		dev_err(&pdev->dev, "to many dma channels %d, max %d\n",
+		dev_err(&pdev->dev, "too many dma channels %d, max %d\n",
 			pdata->num_phy_channels, MAX_DMA_CHANNELS);
 		return -EINVAL;
 	}
-- 
cgit 


From 6c0157be02f02c490dbe74af0d469ed1ddc30b1e Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Wed, 22 Jan 2020 09:38:18 +0000
Subject: dmaengine: ti: k3-udma: fix spelling mistake "limted" -> "limited"

There are spelling mistakes in dev_err messages. Fix them.

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Acked-by: Peter Ujfalusi <peter.ujfalusi@ti.com>
Link: https://lore.kernel.org/r/20200122093818.2800743-1-colin.king@canonical.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/ti/k3-udma.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'drivers/dma')

diff --git a/drivers/dma/ti/k3-udma.c b/drivers/dma/ti/k3-udma.c
index 9974e72cdc50..ea79c2df28e0 100644
--- a/drivers/dma/ti/k3-udma.c
+++ b/drivers/dma/ti/k3-udma.c
@@ -2300,7 +2300,7 @@ udma_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 	/* static TR for remote PDMA */
 	if (udma_configure_statictr(uc, d, dev_width, burst)) {
 		dev_err(uc->ud->dev,
-			"%s: StaticTR Z is limted to maximum 4095 (%u)\n",
+			"%s: StaticTR Z is limited to maximum 4095 (%u)\n",
 			__func__, d->static_tr.bstcnt);
 
 		udma_free_hwdesc(uc, d);
@@ -2483,7 +2483,7 @@ udma_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len,
 	/* static TR for remote PDMA */
 	if (udma_configure_statictr(uc, d, dev_width, burst)) {
 		dev_err(uc->ud->dev,
-			"%s: StaticTR Z is limted to maximum 4095 (%u)\n",
+			"%s: StaticTR Z is limited to maximum 4095 (%u)\n",
 			__func__, d->static_tr.bstcnt);
 
 		udma_free_hwdesc(uc, d);
-- 
cgit 


From d2fb0a0438384fee08a418025f743913020033ce Mon Sep 17 00:00:00 2001
From: Dave Jiang <dave.jiang@intel.com>
Date: Tue, 21 Jan 2020 16:43:47 -0700
Subject: dmaengine: break out channel registration

In preparation for dynamic channel registration, the code segment that
does the channel registration is broken out to its own function.

Signed-off-by: Dave Jiang <dave.jiang@intel.com>
Link: https://lore.kernel.org/r/157965022778.73301.8929944324898985438.stgit@djiang5-desk3.ch.intel.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/dmaengine.c | 135 +++++++++++++++++++++++++++++-------------------
 1 file changed, 81 insertions(+), 54 deletions(-)

(limited to 'drivers/dma')

diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
index 7550dbdf5488..2daf2ee9bebd 100644
--- a/drivers/dma/dmaengine.c
+++ b/drivers/dma/dmaengine.c
@@ -926,6 +926,79 @@ static int get_dma_id(struct dma_device *device)
 	return 0;
 }
 
+static int __dma_async_device_channel_register(struct dma_device *device,
+					       struct dma_chan *chan,
+					       int chan_id)
+{
+	int rc = 0;
+	int chancnt = device->chancnt;
+	atomic_t *idr_ref;
+	struct dma_chan *tchan;
+
+	tchan = list_first_entry_or_null(&device->channels,
+					 struct dma_chan, device_node);
+	if (tchan->dev) {
+		idr_ref = tchan->dev->idr_ref;
+	} else {
+		idr_ref = kmalloc(sizeof(*idr_ref), GFP_KERNEL);
+		if (!idr_ref)
+			return -ENOMEM;
+		atomic_set(idr_ref, 0);
+	}
+
+	chan->local = alloc_percpu(typeof(*chan->local));
+	if (!chan->local)
+		goto err_out;
+	chan->dev = kzalloc(sizeof(*chan->dev), GFP_KERNEL);
+	if (!chan->dev) {
+		free_percpu(chan->local);
+		chan->local = NULL;
+		goto err_out;
+	}
+
+	/*
+	 * When the chan_id is a negative value, we are dynamically adding
+	 * the channel. Otherwise we are static enumerating.
+	 */
+	chan->chan_id = chan_id < 0 ? chancnt : chan_id;
+	chan->dev->device.class = &dma_devclass;
+	chan->dev->device.parent = device->dev;
+	chan->dev->chan = chan;
+	chan->dev->idr_ref = idr_ref;
+	chan->dev->dev_id = device->dev_id;
+	atomic_inc(idr_ref);
+	dev_set_name(&chan->dev->device, "dma%dchan%d",
+		     device->dev_id, chan->chan_id);
+
+	rc = device_register(&chan->dev->device);
+	if (rc)
+		goto err_out;
+	chan->client_count = 0;
+	device->chancnt = chan->chan_id + 1;
+
+	return 0;
+
+ err_out:
+	free_percpu(chan->local);
+	kfree(chan->dev);
+	if (atomic_dec_return(idr_ref) == 0)
+		kfree(idr_ref);
+	return rc;
+}
+
+static void __dma_async_device_channel_unregister(struct dma_device *device,
+						  struct dma_chan *chan)
+{
+	WARN_ONCE(!device->device_release && chan->client_count,
+		  "%s called while %d clients hold a reference\n",
+		  __func__, chan->client_count);
+	mutex_lock(&dma_list_mutex);
+	chan->dev->chan = NULL;
+	mutex_unlock(&dma_list_mutex);
+	device_unregister(&chan->dev->device);
+	free_percpu(chan->local);
+}
+
 /**
  * dma_async_device_register - registers DMA devices found
  * @device: &dma_device
@@ -936,9 +1009,8 @@ static int get_dma_id(struct dma_device *device)
  */
 int dma_async_device_register(struct dma_device *device)
 {
-	int chancnt = 0, rc;
+	int rc, i = 0;
 	struct dma_chan* chan;
-	atomic_t *idr_ref;
 
 	if (!device)
 		return -ENODEV;
@@ -1038,59 +1110,23 @@ int dma_async_device_register(struct dma_device *device)
 	if (device_has_all_tx_types(device))
 		dma_cap_set(DMA_ASYNC_TX, device->cap_mask);
 
-	idr_ref = kmalloc(sizeof(*idr_ref), GFP_KERNEL);
-	if (!idr_ref)
-		return -ENOMEM;
 	rc = get_dma_id(device);
-	if (rc != 0) {
-		kfree(idr_ref);
+	if (rc != 0)
 		return rc;
-	}
-
-	atomic_set(idr_ref, 0);
 
 	/* represent channels in sysfs. Probably want devs too */
 	list_for_each_entry(chan, &device->channels, device_node) {
-		rc = -ENOMEM;
-		chan->local = alloc_percpu(typeof(*chan->local));
-		if (chan->local == NULL)
+		rc = __dma_async_device_channel_register(device, chan, i++);
+		if (rc < 0)
 			goto err_out;
-		chan->dev = kzalloc(sizeof(*chan->dev), GFP_KERNEL);
-		if (chan->dev == NULL) {
-			free_percpu(chan->local);
-			chan->local = NULL;
-			goto err_out;
-		}
-
-		chan->chan_id = chancnt++;
-		chan->dev->device.class = &dma_devclass;
-		chan->dev->device.parent = device->dev;
-		chan->dev->chan = chan;
-		chan->dev->idr_ref = idr_ref;
-		chan->dev->dev_id = device->dev_id;
-		atomic_inc(idr_ref);
-		dev_set_name(&chan->dev->device, "dma%dchan%d",
-			     device->dev_id, chan->chan_id);
-
-		rc = device_register(&chan->dev->device);
-		if (rc) {
-			free_percpu(chan->local);
-			chan->local = NULL;
-			kfree(chan->dev);
-			atomic_dec(idr_ref);
-			goto err_out;
-		}
-		chan->client_count = 0;
 	}
 
-	if (!chancnt) {
+	if (!device->chancnt) {
 		dev_err(device->dev, "%s: device has no channels!\n", __func__);
 		rc = -ENODEV;
 		goto err_out;
 	}
 
-	device->chancnt = chancnt;
-
 	mutex_lock(&dma_list_mutex);
 	/* take references on public channels */
 	if (dmaengine_ref_count && !dma_has_cap(DMA_PRIVATE, device->cap_mask))
@@ -1118,9 +1154,8 @@ int dma_async_device_register(struct dma_device *device)
 
 err_out:
 	/* if we never registered a channel just release the idr */
-	if (atomic_read(idr_ref) == 0) {
+	if (!device->chancnt) {
 		ida_free(&dma_ida, device->dev_id);
-		kfree(idr_ref);
 		return rc;
 	}
 
@@ -1148,16 +1183,8 @@ void dma_async_device_unregister(struct dma_device *device)
 {
 	struct dma_chan *chan;
 
-	list_for_each_entry(chan, &device->channels, device_node) {
-		WARN_ONCE(!device->device_release && chan->client_count,
-			  "%s called while %d clients hold a reference\n",
-			  __func__, chan->client_count);
-		mutex_lock(&dma_list_mutex);
-		chan->dev->chan = NULL;
-		mutex_unlock(&dma_list_mutex);
-		device_unregister(&chan->dev->device);
-		free_percpu(chan->local);
-	}
+	list_for_each_entry(chan, &device->channels, device_node)
+		__dma_async_device_channel_unregister(device, chan);
 
 	mutex_lock(&dma_list_mutex);
 	/*
-- 
cgit 


From e81274cd6b5264809384066e09a5253708822522 Mon Sep 17 00:00:00 2001
From: Dave Jiang <dave.jiang@intel.com>
Date: Tue, 21 Jan 2020 16:43:53 -0700
Subject: dmaengine: add support to dynamic register/unregister of channels

With the channel registration routines broken out, now add support code to
allow independent registering and unregistering of channels in a hotplug fashion.

Signed-off-by: Dave Jiang <dave.jiang@intel.com>
Link: https://lore.kernel.org/r/157965023364.73301.7821862091077299040.stgit@djiang5-desk3.ch.intel.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/dmaengine.c | 34 ++++++++++++++++++++++++++--------
 1 file changed, 26 insertions(+), 8 deletions(-)

(limited to 'drivers/dma')

diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
index 2daf2ee9bebd..51a2f2b1b2de 100644
--- a/drivers/dma/dmaengine.c
+++ b/drivers/dma/dmaengine.c
@@ -986,6 +986,20 @@ static int __dma_async_device_channel_register(struct dma_device *device,
 	return rc;
 }
 
+int dma_async_device_channel_register(struct dma_device *device,
+				      struct dma_chan *chan)
+{
+	int rc;
+
+	rc = __dma_async_device_channel_register(device, chan, -1);
+	if (rc < 0)
+		return rc;
+
+	dma_channel_rebalance();
+	return 0;
+}
+EXPORT_SYMBOL_GPL(dma_async_device_channel_register);
+
 static void __dma_async_device_channel_unregister(struct dma_device *device,
 						  struct dma_chan *chan)
 {
@@ -993,12 +1007,22 @@ static void __dma_async_device_channel_unregister(struct dma_device *device,
 		  "%s called while %d clients hold a reference\n",
 		  __func__, chan->client_count);
 	mutex_lock(&dma_list_mutex);
+	list_del(&chan->device_node);
+	device->chancnt--;
 	chan->dev->chan = NULL;
 	mutex_unlock(&dma_list_mutex);
 	device_unregister(&chan->dev->device);
 	free_percpu(chan->local);
 }
 
+void dma_async_device_channel_unregister(struct dma_device *device,
+					 struct dma_chan *chan)
+{
+	__dma_async_device_channel_unregister(device, chan);
+	dma_channel_rebalance();
+}
+EXPORT_SYMBOL_GPL(dma_async_device_channel_unregister);
+
 /**
  * dma_async_device_register - registers DMA devices found
  * @device: &dma_device
@@ -1121,12 +1145,6 @@ int dma_async_device_register(struct dma_device *device)
 			goto err_out;
 	}
 
-	if (!device->chancnt) {
-		dev_err(device->dev, "%s: device has no channels!\n", __func__);
-		rc = -ENODEV;
-		goto err_out;
-	}
-
 	mutex_lock(&dma_list_mutex);
 	/* take references on public channels */
 	if (dmaengine_ref_count && !dma_has_cap(DMA_PRIVATE, device->cap_mask))
@@ -1181,9 +1199,9 @@ EXPORT_SYMBOL(dma_async_device_register);
  */
 void dma_async_device_unregister(struct dma_device *device)
 {
-	struct dma_chan *chan;
+	struct dma_chan *chan, *n;
 
-	list_for_each_entry(chan, &device->channels, device_node)
+	list_for_each_entry_safe(chan, n, &device->channels, device_node)
 		__dma_async_device_channel_unregister(device, chan);
 
 	mutex_lock(&dma_list_mutex);
-- 
cgit 


From bfe1d56091c1a404b3d4ce7e9809d745fc4453bb Mon Sep 17 00:00:00 2001
From: Dave Jiang <dave.jiang@intel.com>
Date: Tue, 21 Jan 2020 16:43:59 -0700
Subject: dmaengine: idxd: Init and probe for Intel data accelerators

The idxd driver introduces the Intel Data Stream Accelerator [1] that will
be available on future Intel Xeon CPUs. One of the kernel access
point for the driver is through the dmaengine subsystem. It will initially
provide the DMA copy service to the kernel.

Some of the main functionality introduced with this accelerator
are: shared virtual memory (SVM) support, and descriptor submission using
Intel CPU instructions movdir64b and enqcmds. There will be additional
accelerator devices that share the same driver with variations to
capabilities.

This commit introduces the probe and initialization component of the
driver.

[1]: https://software.intel.com/en-us/download/intel-data-streaming-accelerator-preliminary-architecture-specification

Signed-off-by: Dave Jiang <dave.jiang@intel.com>
Link: https://lore.kernel.org/r/157965023991.73301.6186843973135311580.stgit@djiang5-desk3.ch.intel.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/Kconfig          |  13 +
 drivers/dma/Makefile         |   1 +
 drivers/dma/idxd/Makefile    |   2 +
 drivers/dma/idxd/device.c    | 657 +++++++++++++++++++++++++++++++++++++++++++
 drivers/dma/idxd/idxd.h      | 225 +++++++++++++++
 drivers/dma/idxd/init.c      | 468 ++++++++++++++++++++++++++++++
 drivers/dma/idxd/irq.c       | 156 ++++++++++
 drivers/dma/idxd/registers.h | 335 ++++++++++++++++++++++
 8 files changed, 1857 insertions(+)
 create mode 100644 drivers/dma/idxd/Makefile
 create mode 100644 drivers/dma/idxd/device.c
 create mode 100644 drivers/dma/idxd/idxd.h
 create mode 100644 drivers/dma/idxd/init.c
 create mode 100644 drivers/dma/idxd/irq.c
 create mode 100644 drivers/dma/idxd/registers.h

(limited to 'drivers/dma')

diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index 312a6cc36c78..a8f8e9552885 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -273,6 +273,19 @@ config INTEL_IDMA64
 	  Enable DMA support for Intel Low Power Subsystem such as found on
 	  Intel Skylake PCH.
 
+config INTEL_IDXD
+	tristate "Intel Data Accelerators support"
+	depends on PCI && X86_64
+	select DMA_ENGINE
+	select SBITMAP
+	help
+	  Enable support for the Intel(R) data accelerators present
+	  in Intel Xeon CPU.
+
+	  Say Y if you have such a platform.
+
+	  If unsure, say N.
+
 config INTEL_IOATDMA
 	tristate "Intel I/OAT DMA support"
 	depends on PCI && X86_64
diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile
index a150d1d792fd..461d77c4a839 100644
--- a/drivers/dma/Makefile
+++ b/drivers/dma/Makefile
@@ -41,6 +41,7 @@ obj-$(CONFIG_IMX_DMA) += imx-dma.o
 obj-$(CONFIG_IMX_SDMA) += imx-sdma.o
 obj-$(CONFIG_INTEL_IDMA64) += idma64.o
 obj-$(CONFIG_INTEL_IOATDMA) += ioat/
+obj-$(CONFIG_INTEL_IDXD) += idxd/
 obj-$(CONFIG_INTEL_IOP_ADMA) += iop-adma.o
 obj-$(CONFIG_INTEL_MIC_X100_DMA) += mic_x100_dma.o
 obj-$(CONFIG_K3_DMA) += k3dma.o
diff --git a/drivers/dma/idxd/Makefile b/drivers/dma/idxd/Makefile
new file mode 100644
index 000000000000..0dd1ca77513f
--- /dev/null
+++ b/drivers/dma/idxd/Makefile
@@ -0,0 +1,2 @@
+obj-$(CONFIG_INTEL_IDXD) += idxd.o
+idxd-y := init.o irq.o device.o
diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c
new file mode 100644
index 000000000000..af2bdc18df3d
--- /dev/null
+++ b/drivers/dma/idxd/device.c
@@ -0,0 +1,657 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2019 Intel Corporation. All rights rsvd. */
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/io-64-nonatomic-lo-hi.h>
+#include <uapi/linux/idxd.h>
+#include "idxd.h"
+#include "registers.h"
+
+static int idxd_cmd_wait(struct idxd_device *idxd, u32 *status, int timeout);
+static int idxd_cmd_send(struct idxd_device *idxd, int cmd_code, u32 operand);
+
+/* Interrupt control bits */
+int idxd_mask_msix_vector(struct idxd_device *idxd, int vec_id)
+{
+	struct pci_dev *pdev = idxd->pdev;
+	int msixcnt = pci_msix_vec_count(pdev);
+	union msix_perm perm;
+	u32 offset;
+
+	if (vec_id < 0 || vec_id >= msixcnt)
+		return -EINVAL;
+
+	offset = idxd->msix_perm_offset + vec_id * 8;
+	perm.bits = ioread32(idxd->reg_base + offset);
+	perm.ignore = 1;
+	iowrite32(perm.bits, idxd->reg_base + offset);
+
+	return 0;
+}
+
+void idxd_mask_msix_vectors(struct idxd_device *idxd)
+{
+	struct pci_dev *pdev = idxd->pdev;
+	int msixcnt = pci_msix_vec_count(pdev);
+	int i, rc;
+
+	for (i = 0; i < msixcnt; i++) {
+		rc = idxd_mask_msix_vector(idxd, i);
+		if (rc < 0)
+			dev_warn(&pdev->dev,
+				 "Failed disabling msix vec %d\n", i);
+	}
+}
+
+int idxd_unmask_msix_vector(struct idxd_device *idxd, int vec_id)
+{
+	struct pci_dev *pdev = idxd->pdev;
+	int msixcnt = pci_msix_vec_count(pdev);
+	union msix_perm perm;
+	u32 offset;
+
+	if (vec_id < 0 || vec_id >= msixcnt)
+		return -EINVAL;
+
+	offset = idxd->msix_perm_offset + vec_id * 8;
+	perm.bits = ioread32(idxd->reg_base + offset);
+	perm.ignore = 0;
+	iowrite32(perm.bits, idxd->reg_base + offset);
+
+	return 0;
+}
+
+void idxd_unmask_error_interrupts(struct idxd_device *idxd)
+{
+	union genctrl_reg genctrl;
+
+	genctrl.bits = ioread32(idxd->reg_base + IDXD_GENCTRL_OFFSET);
+	genctrl.softerr_int_en = 1;
+	iowrite32(genctrl.bits, idxd->reg_base + IDXD_GENCTRL_OFFSET);
+}
+
+void idxd_mask_error_interrupts(struct idxd_device *idxd)
+{
+	union genctrl_reg genctrl;
+
+	genctrl.bits = ioread32(idxd->reg_base + IDXD_GENCTRL_OFFSET);
+	genctrl.softerr_int_en = 0;
+	iowrite32(genctrl.bits, idxd->reg_base + IDXD_GENCTRL_OFFSET);
+}
+
+static void free_hw_descs(struct idxd_wq *wq)
+{
+	int i;
+
+	for (i = 0; i < wq->num_descs; i++)
+		kfree(wq->hw_descs[i]);
+
+	kfree(wq->hw_descs);
+}
+
+static int alloc_hw_descs(struct idxd_wq *wq, int num)
+{
+	struct device *dev = &wq->idxd->pdev->dev;
+	int i;
+	int node = dev_to_node(dev);
+
+	wq->hw_descs = kcalloc_node(num, sizeof(struct dsa_hw_desc *),
+				    GFP_KERNEL, node);
+	if (!wq->hw_descs)
+		return -ENOMEM;
+
+	for (i = 0; i < num; i++) {
+		wq->hw_descs[i] = kzalloc_node(sizeof(*wq->hw_descs[i]),
+					       GFP_KERNEL, node);
+		if (!wq->hw_descs[i]) {
+			free_hw_descs(wq);
+			return -ENOMEM;
+		}
+	}
+
+	return 0;
+}
+
+static void free_descs(struct idxd_wq *wq)
+{
+	int i;
+
+	for (i = 0; i < wq->num_descs; i++)
+		kfree(wq->descs[i]);
+
+	kfree(wq->descs);
+}
+
+static int alloc_descs(struct idxd_wq *wq, int num)
+{
+	struct device *dev = &wq->idxd->pdev->dev;
+	int i;
+	int node = dev_to_node(dev);
+
+	wq->descs = kcalloc_node(num, sizeof(struct idxd_desc *),
+				 GFP_KERNEL, node);
+	if (!wq->descs)
+		return -ENOMEM;
+
+	for (i = 0; i < num; i++) {
+		wq->descs[i] = kzalloc_node(sizeof(*wq->descs[i]),
+					    GFP_KERNEL, node);
+		if (!wq->descs[i]) {
+			free_descs(wq);
+			return -ENOMEM;
+		}
+	}
+
+	return 0;
+}
+
+/* WQ control bits */
+int idxd_wq_alloc_resources(struct idxd_wq *wq)
+{
+	struct idxd_device *idxd = wq->idxd;
+	struct idxd_group *group = wq->group;
+	struct device *dev = &idxd->pdev->dev;
+	int rc, num_descs, i;
+
+	num_descs = wq->size +
+		idxd->hw.gen_cap.max_descs_per_engine * group->num_engines;
+	wq->num_descs = num_descs;
+
+	rc = alloc_hw_descs(wq, num_descs);
+	if (rc < 0)
+		return rc;
+
+	wq->compls_size = num_descs * sizeof(struct dsa_completion_record);
+	wq->compls = dma_alloc_coherent(dev, wq->compls_size,
+					&wq->compls_addr, GFP_KERNEL);
+	if (!wq->compls) {
+		rc = -ENOMEM;
+		goto fail_alloc_compls;
+	}
+
+	rc = alloc_descs(wq, num_descs);
+	if (rc < 0)
+		goto fail_alloc_descs;
+
+	rc = sbitmap_init_node(&wq->sbmap, num_descs, -1, GFP_KERNEL,
+			       dev_to_node(dev));
+	if (rc < 0)
+		goto fail_sbitmap_init;
+
+	for (i = 0; i < num_descs; i++) {
+		struct idxd_desc *desc = wq->descs[i];
+
+		desc->hw = wq->hw_descs[i];
+		desc->completion = &wq->compls[i];
+		desc->compl_dma  = wq->compls_addr +
+			sizeof(struct dsa_completion_record) * i;
+		desc->id = i;
+		desc->wq = wq;
+	}
+
+	return 0;
+
+ fail_sbitmap_init:
+	free_descs(wq);
+ fail_alloc_descs:
+	dma_free_coherent(dev, wq->compls_size, wq->compls, wq->compls_addr);
+ fail_alloc_compls:
+	free_hw_descs(wq);
+	return rc;
+}
+
+void idxd_wq_free_resources(struct idxd_wq *wq)
+{
+	struct device *dev = &wq->idxd->pdev->dev;
+
+	free_hw_descs(wq);
+	free_descs(wq);
+	dma_free_coherent(dev, wq->compls_size, wq->compls, wq->compls_addr);
+	sbitmap_free(&wq->sbmap);
+}
+
+int idxd_wq_enable(struct idxd_wq *wq)
+{
+	struct idxd_device *idxd = wq->idxd;
+	struct device *dev = &idxd->pdev->dev;
+	u32 status;
+	int rc;
+
+	lockdep_assert_held(&idxd->dev_lock);
+
+	if (wq->state == IDXD_WQ_ENABLED) {
+		dev_dbg(dev, "WQ %d already enabled\n", wq->id);
+		return -ENXIO;
+	}
+
+	rc = idxd_cmd_send(idxd, IDXD_CMD_ENABLE_WQ, wq->id);
+	if (rc < 0)
+		return rc;
+	rc = idxd_cmd_wait(idxd, &status, IDXD_REG_TIMEOUT);
+	if (rc < 0)
+		return rc;
+
+	if (status != IDXD_CMDSTS_SUCCESS &&
+	    status != IDXD_CMDSTS_ERR_WQ_ENABLED) {
+		dev_dbg(dev, "WQ enable failed: %#x\n", status);
+		return -ENXIO;
+	}
+
+	wq->state = IDXD_WQ_ENABLED;
+	dev_dbg(dev, "WQ %d enabled\n", wq->id);
+	return 0;
+}
+
+int idxd_wq_disable(struct idxd_wq *wq)
+{
+	struct idxd_device *idxd = wq->idxd;
+	struct device *dev = &idxd->pdev->dev;
+	u32 status, operand;
+	int rc;
+
+	lockdep_assert_held(&idxd->dev_lock);
+	dev_dbg(dev, "Disabling WQ %d\n", wq->id);
+
+	if (wq->state != IDXD_WQ_ENABLED) {
+		dev_dbg(dev, "WQ %d in wrong state: %d\n", wq->id, wq->state);
+		return 0;
+	}
+
+	operand = BIT(wq->id % 16) | ((wq->id / 16) << 16);
+	rc = idxd_cmd_send(idxd, IDXD_CMD_DISABLE_WQ, operand);
+	if (rc < 0)
+		return rc;
+	rc = idxd_cmd_wait(idxd, &status, IDXD_REG_TIMEOUT);
+	if (rc < 0)
+		return rc;
+
+	if (status != IDXD_CMDSTS_SUCCESS) {
+		dev_dbg(dev, "WQ disable failed: %#x\n", status);
+		return -ENXIO;
+	}
+
+	wq->state = IDXD_WQ_DISABLED;
+	dev_dbg(dev, "WQ %d disabled\n", wq->id);
+	return 0;
+}
+
+/* Device control bits */
+static inline bool idxd_is_enabled(struct idxd_device *idxd)
+{
+	union gensts_reg gensts;
+
+	gensts.bits = ioread32(idxd->reg_base + IDXD_GENSTATS_OFFSET);
+
+	if (gensts.state == IDXD_DEVICE_STATE_ENABLED)
+		return true;
+	return false;
+}
+
+static int idxd_cmd_wait(struct idxd_device *idxd, u32 *status, int timeout)
+{
+	u32 sts, to = timeout;
+
+	lockdep_assert_held(&idxd->dev_lock);
+	sts = ioread32(idxd->reg_base + IDXD_CMDSTS_OFFSET);
+	while (sts & IDXD_CMDSTS_ACTIVE && --to) {
+		cpu_relax();
+		sts = ioread32(idxd->reg_base + IDXD_CMDSTS_OFFSET);
+	}
+
+	if (to == 0 && sts & IDXD_CMDSTS_ACTIVE) {
+		dev_warn(&idxd->pdev->dev, "%s timed out!\n", __func__);
+		*status = 0;
+		return -EBUSY;
+	}
+
+	*status = sts;
+	return 0;
+}
+
+static int idxd_cmd_send(struct idxd_device *idxd, int cmd_code, u32 operand)
+{
+	union idxd_command_reg cmd;
+	int rc;
+	u32 status;
+
+	lockdep_assert_held(&idxd->dev_lock);
+	rc = idxd_cmd_wait(idxd, &status, IDXD_REG_TIMEOUT);
+	if (rc < 0)
+		return rc;
+
+	memset(&cmd, 0, sizeof(cmd));
+	cmd.cmd = cmd_code;
+	cmd.operand = operand;
+	dev_dbg(&idxd->pdev->dev, "%s: sending cmd: %#x op: %#x\n",
+		__func__, cmd_code, operand);
+	iowrite32(cmd.bits, idxd->reg_base + IDXD_CMD_OFFSET);
+
+	return 0;
+}
+
+int idxd_device_enable(struct idxd_device *idxd)
+{
+	struct device *dev = &idxd->pdev->dev;
+	int rc;
+	u32 status;
+
+	lockdep_assert_held(&idxd->dev_lock);
+	if (idxd_is_enabled(idxd)) {
+		dev_dbg(dev, "Device already enabled\n");
+		return -ENXIO;
+	}
+
+	rc = idxd_cmd_send(idxd, IDXD_CMD_ENABLE_DEVICE, 0);
+	if (rc < 0)
+		return rc;
+	rc = idxd_cmd_wait(idxd, &status, IDXD_REG_TIMEOUT);
+	if (rc < 0)
+		return rc;
+
+	/* If the command is successful or if the device was enabled */
+	if (status != IDXD_CMDSTS_SUCCESS &&
+	    status != IDXD_CMDSTS_ERR_DEV_ENABLED) {
+		dev_dbg(dev, "%s: err_code: %#x\n", __func__, status);
+		return -ENXIO;
+	}
+
+	idxd->state = IDXD_DEV_ENABLED;
+	return 0;
+}
+
+int idxd_device_disable(struct idxd_device *idxd)
+{
+	struct device *dev = &idxd->pdev->dev;
+	int rc;
+	u32 status;
+
+	lockdep_assert_held(&idxd->dev_lock);
+	if (!idxd_is_enabled(idxd)) {
+		dev_dbg(dev, "Device is not enabled\n");
+		return 0;
+	}
+
+	rc = idxd_cmd_send(idxd, IDXD_CMD_DISABLE_DEVICE, 0);
+	if (rc < 0)
+		return rc;
+	rc = idxd_cmd_wait(idxd, &status, IDXD_REG_TIMEOUT);
+	if (rc < 0)
+		return rc;
+
+	/* If the command is successful or if the device was disabled */
+	if (status != IDXD_CMDSTS_SUCCESS &&
+	    !(status & IDXD_CMDSTS_ERR_DIS_DEV_EN)) {
+		dev_dbg(dev, "%s: err_code: %#x\n", __func__, status);
+		rc = -ENXIO;
+		return rc;
+	}
+
+	idxd->state = IDXD_DEV_CONF_READY;
+	return 0;
+}
+
+int __idxd_device_reset(struct idxd_device *idxd)
+{
+	u32 status;
+	int rc;
+
+	rc = idxd_cmd_send(idxd, IDXD_CMD_RESET_DEVICE, 0);
+	if (rc < 0)
+		return rc;
+	rc = idxd_cmd_wait(idxd, &status, IDXD_REG_TIMEOUT);
+	if (rc < 0)
+		return rc;
+
+	return 0;
+}
+
+int idxd_device_reset(struct idxd_device *idxd)
+{
+	unsigned long flags;
+	int rc;
+
+	spin_lock_irqsave(&idxd->dev_lock, flags);
+	rc = __idxd_device_reset(idxd);
+	spin_unlock_irqrestore(&idxd->dev_lock, flags);
+	return rc;
+}
+
+/* Device configuration bits */
+static void idxd_group_config_write(struct idxd_group *group)
+{
+	struct idxd_device *idxd = group->idxd;
+	struct device *dev = &idxd->pdev->dev;
+	int i;
+	u32 grpcfg_offset;
+
+	dev_dbg(dev, "Writing group %d cfg registers\n", group->id);
+
+	/* setup GRPWQCFG */
+	for (i = 0; i < 4; i++) {
+		grpcfg_offset = idxd->grpcfg_offset +
+			group->id * 64 + i * sizeof(u64);
+		iowrite64(group->grpcfg.wqs[i],
+			  idxd->reg_base + grpcfg_offset);
+		dev_dbg(dev, "GRPCFG wq[%d:%d: %#x]: %#llx\n",
+			group->id, i, grpcfg_offset,
+			ioread64(idxd->reg_base + grpcfg_offset));
+	}
+
+	/* setup GRPENGCFG */
+	grpcfg_offset = idxd->grpcfg_offset + group->id * 64 + 32;
+	iowrite64(group->grpcfg.engines, idxd->reg_base + grpcfg_offset);
+	dev_dbg(dev, "GRPCFG engs[%d: %#x]: %#llx\n", group->id,
+		grpcfg_offset, ioread64(idxd->reg_base + grpcfg_offset));
+
+	/* setup GRPFLAGS */
+	grpcfg_offset = idxd->grpcfg_offset + group->id * 64 + 40;
+	iowrite32(group->grpcfg.flags.bits, idxd->reg_base + grpcfg_offset);
+	dev_dbg(dev, "GRPFLAGS flags[%d: %#x]: %#x\n",
+		group->id, grpcfg_offset,
+		ioread32(idxd->reg_base + grpcfg_offset));
+}
+
+static int idxd_groups_config_write(struct idxd_device *idxd)
+
+{
+	union gencfg_reg reg;
+	int i;
+	struct device *dev = &idxd->pdev->dev;
+
+	/* Setup bandwidth token limit */
+	if (idxd->token_limit) {
+		reg.bits = ioread32(idxd->reg_base + IDXD_GENCFG_OFFSET);
+		reg.token_limit = idxd->token_limit;
+		iowrite32(reg.bits, idxd->reg_base + IDXD_GENCFG_OFFSET);
+	}
+
+	dev_dbg(dev, "GENCFG(%#x): %#x\n", IDXD_GENCFG_OFFSET,
+		ioread32(idxd->reg_base + IDXD_GENCFG_OFFSET));
+
+	for (i = 0; i < idxd->max_groups; i++) {
+		struct idxd_group *group = &idxd->groups[i];
+
+		idxd_group_config_write(group);
+	}
+
+	return 0;
+}
+
+static int idxd_wq_config_write(struct idxd_wq *wq)
+{
+	struct idxd_device *idxd = wq->idxd;
+	struct device *dev = &idxd->pdev->dev;
+	u32 wq_offset;
+	int i;
+
+	if (!wq->group)
+		return 0;
+
+	memset(&wq->wqcfg, 0, sizeof(union wqcfg));
+
+	/* byte 0-3 */
+	wq->wqcfg.wq_size = wq->size;
+
+	if (wq->size == 0) {
+		dev_warn(dev, "Incorrect work queue size: 0\n");
+		return -EINVAL;
+	}
+
+	/* bytes 4-7 */
+	wq->wqcfg.wq_thresh = wq->threshold;
+
+	/* byte 8-11 */
+	wq->wqcfg.priv = 1; /* kernel, therefore priv */
+	wq->wqcfg.mode = 1;
+
+	wq->wqcfg.priority = wq->priority;
+
+	/* bytes 12-15 */
+	wq->wqcfg.max_xfer_shift = idxd->hw.gen_cap.max_xfer_shift;
+	wq->wqcfg.max_batch_shift = idxd->hw.gen_cap.max_batch_shift;
+
+	dev_dbg(dev, "WQ %d CFGs\n", wq->id);
+	for (i = 0; i < 8; i++) {
+		wq_offset = idxd->wqcfg_offset + wq->id * 32 + i * sizeof(u32);
+		iowrite32(wq->wqcfg.bits[i], idxd->reg_base + wq_offset);
+		dev_dbg(dev, "WQ[%d][%d][%#x]: %#x\n",
+			wq->id, i, wq_offset,
+			ioread32(idxd->reg_base + wq_offset));
+	}
+
+	return 0;
+}
+
+static int idxd_wqs_config_write(struct idxd_device *idxd)
+{
+	int i, rc;
+
+	for (i = 0; i < idxd->max_wqs; i++) {
+		struct idxd_wq *wq = &idxd->wqs[i];
+
+		rc = idxd_wq_config_write(wq);
+		if (rc < 0)
+			return rc;
+	}
+
+	return 0;
+}
+
+static void idxd_group_flags_setup(struct idxd_device *idxd)
+{
+	int i;
+
+	/* TC-A 0 and TC-B 1 should be defaults */
+	for (i = 0; i < idxd->max_groups; i++) {
+		struct idxd_group *group = &idxd->groups[i];
+
+		if (group->tc_a == -1)
+			group->grpcfg.flags.tc_a = 0;
+		else
+			group->grpcfg.flags.tc_a = group->tc_a;
+		if (group->tc_b == -1)
+			group->grpcfg.flags.tc_b = 1;
+		else
+			group->grpcfg.flags.tc_b = group->tc_b;
+		group->grpcfg.flags.use_token_limit = group->use_token_limit;
+		group->grpcfg.flags.tokens_reserved = group->tokens_reserved;
+		if (group->tokens_allowed)
+			group->grpcfg.flags.tokens_allowed =
+				group->tokens_allowed;
+		else
+			group->grpcfg.flags.tokens_allowed = idxd->max_tokens;
+	}
+}
+
+static int idxd_engines_setup(struct idxd_device *idxd)
+{
+	int i, engines = 0;
+	struct idxd_engine *eng;
+	struct idxd_group *group;
+
+	for (i = 0; i < idxd->max_groups; i++) {
+		group = &idxd->groups[i];
+		group->grpcfg.engines = 0;
+	}
+
+	for (i = 0; i < idxd->max_engines; i++) {
+		eng = &idxd->engines[i];
+		group = eng->group;
+
+		if (!group)
+			continue;
+
+		group->grpcfg.engines |= BIT(eng->id);
+		engines++;
+	}
+
+	if (!engines)
+		return -EINVAL;
+
+	return 0;
+}
+
+static int idxd_wqs_setup(struct idxd_device *idxd)
+{
+	struct idxd_wq *wq;
+	struct idxd_group *group;
+	int i, j, configured = 0;
+	struct device *dev = &idxd->pdev->dev;
+
+	for (i = 0; i < idxd->max_groups; i++) {
+		group = &idxd->groups[i];
+		for (j = 0; j < 4; j++)
+			group->grpcfg.wqs[j] = 0;
+	}
+
+	for (i = 0; i < idxd->max_wqs; i++) {
+		wq = &idxd->wqs[i];
+		group = wq->group;
+
+		if (!wq->group)
+			continue;
+		if (!wq->size)
+			continue;
+
+		if (!wq_dedicated(wq)) {
+			dev_warn(dev, "No shared workqueue support.\n");
+			return -EINVAL;
+		}
+
+		group->grpcfg.wqs[wq->id / 64] |= BIT(wq->id % 64);
+		configured++;
+	}
+
+	if (configured == 0)
+		return -EINVAL;
+
+	return 0;
+}
+
+int idxd_device_config(struct idxd_device *idxd)
+{
+	int rc;
+
+	lockdep_assert_held(&idxd->dev_lock);
+	rc = idxd_wqs_setup(idxd);
+	if (rc < 0)
+		return rc;
+
+	rc = idxd_engines_setup(idxd);
+	if (rc < 0)
+		return rc;
+
+	idxd_group_flags_setup(idxd);
+
+	rc = idxd_wqs_config_write(idxd);
+	if (rc < 0)
+		return rc;
+
+	rc = idxd_groups_config_write(idxd);
+	if (rc < 0)
+		return rc;
+
+	return 0;
+}
diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h
new file mode 100644
index 000000000000..733484922365
--- /dev/null
+++ b/drivers/dma/idxd/idxd.h
@@ -0,0 +1,225 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2019 Intel Corporation. All rights rsvd. */
+#ifndef _IDXD_H_
+#define _IDXD_H_
+
+#include <linux/sbitmap.h>
+#include <linux/percpu-rwsem.h>
+#include <linux/wait.h>
+#include "registers.h"
+
+#define IDXD_DRIVER_VERSION	"1.00"
+
+extern struct kmem_cache *idxd_desc_pool;
+
+#define IDXD_REG_TIMEOUT	50
+#define IDXD_DRAIN_TIMEOUT	5000
+
+enum idxd_type {
+	IDXD_TYPE_UNKNOWN = -1,
+	IDXD_TYPE_DSA = 0,
+	IDXD_TYPE_MAX
+};
+
+#define IDXD_NAME_SIZE		128
+
+struct idxd_device_driver {
+	struct device_driver drv;
+};
+
+struct idxd_irq_entry {
+	struct idxd_device *idxd;
+	int id;
+	struct llist_head pending_llist;
+	struct list_head work_list;
+};
+
+struct idxd_group {
+	struct device conf_dev;
+	struct idxd_device *idxd;
+	struct grpcfg grpcfg;
+	int id;
+	int num_engines;
+	int num_wqs;
+	bool use_token_limit;
+	u8 tokens_allowed;
+	u8 tokens_reserved;
+	int tc_a;
+	int tc_b;
+};
+
+#define IDXD_MAX_PRIORITY	0xf
+
+enum idxd_wq_state {
+	IDXD_WQ_DISABLED = 0,
+	IDXD_WQ_ENABLED,
+};
+
+enum idxd_wq_flag {
+	WQ_FLAG_DEDICATED = 0,
+};
+
+enum idxd_wq_type {
+	IDXD_WQT_NONE = 0,
+	IDXD_WQT_KERNEL,
+};
+
+#define IDXD_ALLOCATED_BATCH_SIZE	128U
+#define WQ_NAME_SIZE   1024
+#define WQ_TYPE_SIZE   10
+
+struct idxd_wq {
+	void __iomem *dportal;
+	struct device conf_dev;
+	struct idxd_device *idxd;
+	int id;
+	enum idxd_wq_type type;
+	struct idxd_group *group;
+	int client_count;
+	struct mutex wq_lock;	/* mutex for workqueue */
+	u32 size;
+	u32 threshold;
+	u32 priority;
+	enum idxd_wq_state state;
+	unsigned long flags;
+	union wqcfg wqcfg;
+	atomic_t dq_count;	/* dedicated queue flow control */
+	u32 vec_ptr;		/* interrupt steering */
+	struct dsa_hw_desc **hw_descs;
+	int num_descs;
+	struct dsa_completion_record *compls;
+	dma_addr_t compls_addr;
+	int compls_size;
+	struct idxd_desc **descs;
+	struct sbitmap sbmap;
+	struct percpu_rw_semaphore submit_lock;
+	wait_queue_head_t submit_waitq;
+	char name[WQ_NAME_SIZE + 1];
+};
+
+struct idxd_engine {
+	struct device conf_dev;
+	int id;
+	struct idxd_group *group;
+	struct idxd_device *idxd;
+};
+
+/* shadow registers */
+struct idxd_hw {
+	u32 version;
+	union gen_cap_reg gen_cap;
+	union wq_cap_reg wq_cap;
+	union group_cap_reg group_cap;
+	union engine_cap_reg engine_cap;
+	struct opcap opcap;
+};
+
+enum idxd_device_state {
+	IDXD_DEV_HALTED = -1,
+	IDXD_DEV_DISABLED = 0,
+	IDXD_DEV_CONF_READY,
+	IDXD_DEV_ENABLED,
+};
+
+enum idxd_device_flag {
+	IDXD_FLAG_CONFIGURABLE = 0,
+};
+
+struct idxd_device {
+	enum idxd_type type;
+	struct device conf_dev;
+	struct list_head list;
+	struct idxd_hw hw;
+	enum idxd_device_state state;
+	unsigned long flags;
+	int id;
+
+	struct pci_dev *pdev;
+	void __iomem *reg_base;
+
+	spinlock_t dev_lock;	/* spinlock for device */
+	struct idxd_group *groups;
+	struct idxd_wq *wqs;
+	struct idxd_engine *engines;
+
+	int num_groups;
+
+	u32 msix_perm_offset;
+	u32 wqcfg_offset;
+	u32 grpcfg_offset;
+	u32 perfmon_offset;
+
+	u64 max_xfer_bytes;
+	u32 max_batch_size;
+	int max_groups;
+	int max_engines;
+	int max_tokens;
+	int max_wqs;
+	int max_wq_size;
+	int token_limit;
+
+	union sw_err_reg sw_err;
+
+	struct msix_entry *msix_entries;
+	int num_wq_irqs;
+	struct idxd_irq_entry *irq_entries;
+};
+
+/* IDXD software descriptor */
+struct idxd_desc {
+	struct dsa_hw_desc *hw;
+	dma_addr_t desc_dma;
+	struct dsa_completion_record *completion;
+	dma_addr_t compl_dma;
+	struct llist_node llnode;
+	struct list_head list;
+	int id;
+	struct idxd_wq *wq;
+};
+
+#define confdev_to_idxd(dev) container_of(dev, struct idxd_device, conf_dev)
+#define confdev_to_wq(dev) container_of(dev, struct idxd_wq, conf_dev)
+
+static inline bool wq_dedicated(struct idxd_wq *wq)
+{
+	return test_bit(WQ_FLAG_DEDICATED, &wq->flags);
+}
+
+static inline void idxd_set_type(struct idxd_device *idxd)
+{
+	struct pci_dev *pdev = idxd->pdev;
+
+	if (pdev->device == PCI_DEVICE_ID_INTEL_DSA_SPR0)
+		idxd->type = IDXD_TYPE_DSA;
+	else
+		idxd->type = IDXD_TYPE_UNKNOWN;
+}
+
+const char *idxd_get_dev_name(struct idxd_device *idxd);
+
+/* device interrupt control */
+irqreturn_t idxd_irq_handler(int vec, void *data);
+irqreturn_t idxd_misc_thread(int vec, void *data);
+irqreturn_t idxd_wq_thread(int irq, void *data);
+void idxd_mask_error_interrupts(struct idxd_device *idxd);
+void idxd_unmask_error_interrupts(struct idxd_device *idxd);
+void idxd_mask_msix_vectors(struct idxd_device *idxd);
+int idxd_mask_msix_vector(struct idxd_device *idxd, int vec_id);
+int idxd_unmask_msix_vector(struct idxd_device *idxd, int vec_id);
+
+/* device control */
+int idxd_device_enable(struct idxd_device *idxd);
+int idxd_device_disable(struct idxd_device *idxd);
+int idxd_device_reset(struct idxd_device *idxd);
+int __idxd_device_reset(struct idxd_device *idxd);
+void idxd_device_cleanup(struct idxd_device *idxd);
+int idxd_device_config(struct idxd_device *idxd);
+void idxd_device_wqs_clear_state(struct idxd_device *idxd);
+
+/* work queue control */
+int idxd_wq_alloc_resources(struct idxd_wq *wq);
+void idxd_wq_free_resources(struct idxd_wq *wq);
+int idxd_wq_enable(struct idxd_wq *wq);
+int idxd_wq_disable(struct idxd_wq *wq);
+
+#endif
diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c
new file mode 100644
index 000000000000..6e89a87d62b0
--- /dev/null
+++ b/drivers/dma/idxd/init.c
@@ -0,0 +1,468 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2019 Intel Corporation. All rights rsvd. */
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/pci.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/workqueue.h>
+#include <linux/aer.h>
+#include <linux/fs.h>
+#include <linux/io-64-nonatomic-lo-hi.h>
+#include <linux/device.h>
+#include <linux/idr.h>
+#include <uapi/linux/idxd.h>
+#include "registers.h"
+#include "idxd.h"
+
+MODULE_VERSION(IDXD_DRIVER_VERSION);
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Intel Corporation");
+
+#define DRV_NAME "idxd"
+
+static struct idr idxd_idrs[IDXD_TYPE_MAX];
+static struct mutex idxd_idr_lock;
+
+static struct pci_device_id idxd_pci_tbl[] = {
+	/* DSA ver 1.0 platforms */
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_DSA_SPR0) },
+	{ 0, }
+};
+MODULE_DEVICE_TABLE(pci, idxd_pci_tbl);
+
+static char *idxd_name[] = {
+	"dsa",
+};
+
+const char *idxd_get_dev_name(struct idxd_device *idxd)
+{
+	return idxd_name[idxd->type];
+}
+
+static int idxd_setup_interrupts(struct idxd_device *idxd)
+{
+	struct pci_dev *pdev = idxd->pdev;
+	struct device *dev = &pdev->dev;
+	struct msix_entry *msix;
+	struct idxd_irq_entry *irq_entry;
+	int i, msixcnt;
+	int rc = 0;
+
+	msixcnt = pci_msix_vec_count(pdev);
+	if (msixcnt < 0) {
+		dev_err(dev, "Not MSI-X interrupt capable.\n");
+		goto err_no_irq;
+	}
+
+	idxd->msix_entries = devm_kzalloc(dev, sizeof(struct msix_entry) *
+			msixcnt, GFP_KERNEL);
+	if (!idxd->msix_entries) {
+		rc = -ENOMEM;
+		goto err_no_irq;
+	}
+
+	for (i = 0; i < msixcnt; i++)
+		idxd->msix_entries[i].entry = i;
+
+	rc = pci_enable_msix_exact(pdev, idxd->msix_entries, msixcnt);
+	if (rc) {
+		dev_err(dev, "Failed enabling %d MSIX entries.\n", msixcnt);
+		goto err_no_irq;
+	}
+	dev_dbg(dev, "Enabled %d msix vectors\n", msixcnt);
+
+	/*
+	 * We implement 1 completion list per MSI-X entry except for
+	 * entry 0, which is for errors and others.
+	 */
+	idxd->irq_entries = devm_kcalloc(dev, msixcnt,
+					 sizeof(struct idxd_irq_entry),
+					 GFP_KERNEL);
+	if (!idxd->irq_entries) {
+		rc = -ENOMEM;
+		goto err_no_irq;
+	}
+
+	for (i = 0; i < msixcnt; i++) {
+		idxd->irq_entries[i].id = i;
+		idxd->irq_entries[i].idxd = idxd;
+	}
+
+	msix = &idxd->msix_entries[0];
+	irq_entry = &idxd->irq_entries[0];
+	rc = devm_request_threaded_irq(dev, msix->vector, idxd_irq_handler,
+				       idxd_misc_thread, 0, "idxd-misc",
+				       irq_entry);
+	if (rc < 0) {
+		dev_err(dev, "Failed to allocate misc interrupt.\n");
+		goto err_no_irq;
+	}
+
+	dev_dbg(dev, "Allocated idxd-misc handler on msix vector %d\n",
+		msix->vector);
+
+	/* first MSI-X entry is not for wq interrupts */
+	idxd->num_wq_irqs = msixcnt - 1;
+
+	for (i = 1; i < msixcnt; i++) {
+		msix = &idxd->msix_entries[i];
+		irq_entry = &idxd->irq_entries[i];
+
+		init_llist_head(&idxd->irq_entries[i].pending_llist);
+		INIT_LIST_HEAD(&idxd->irq_entries[i].work_list);
+		rc = devm_request_threaded_irq(dev, msix->vector,
+					       idxd_irq_handler,
+					       idxd_wq_thread, 0,
+					       "idxd-portal", irq_entry);
+		if (rc < 0) {
+			dev_err(dev, "Failed to allocate irq %d.\n",
+				msix->vector);
+			goto err_no_irq;
+		}
+		dev_dbg(dev, "Allocated idxd-msix %d for vector %d\n",
+			i, msix->vector);
+	}
+
+	idxd_unmask_error_interrupts(idxd);
+
+	return 0;
+
+ err_no_irq:
+	/* Disable error interrupt generation */
+	idxd_mask_error_interrupts(idxd);
+	pci_disable_msix(pdev);
+	dev_err(dev, "No usable interrupts\n");
+	return rc;
+}
+
+static void idxd_wqs_free_lock(struct idxd_device *idxd)
+{
+	int i;
+
+	for (i = 0; i < idxd->max_wqs; i++) {
+		struct idxd_wq *wq = &idxd->wqs[i];
+
+		percpu_free_rwsem(&wq->submit_lock);
+	}
+}
+
+static int idxd_setup_internals(struct idxd_device *idxd)
+{
+	struct device *dev = &idxd->pdev->dev;
+	int i;
+
+	idxd->groups = devm_kcalloc(dev, idxd->max_groups,
+				    sizeof(struct idxd_group), GFP_KERNEL);
+	if (!idxd->groups)
+		return -ENOMEM;
+
+	for (i = 0; i < idxd->max_groups; i++) {
+		idxd->groups[i].idxd = idxd;
+		idxd->groups[i].id = i;
+		idxd->groups[i].tc_a = -1;
+		idxd->groups[i].tc_b = -1;
+	}
+
+	idxd->wqs = devm_kcalloc(dev, idxd->max_wqs, sizeof(struct idxd_wq),
+				 GFP_KERNEL);
+	if (!idxd->wqs)
+		return -ENOMEM;
+
+	idxd->engines = devm_kcalloc(dev, idxd->max_engines,
+				     sizeof(struct idxd_engine), GFP_KERNEL);
+	if (!idxd->engines)
+		return -ENOMEM;
+
+	for (i = 0; i < idxd->max_wqs; i++) {
+		struct idxd_wq *wq = &idxd->wqs[i];
+		int rc;
+
+		wq->id = i;
+		wq->idxd = idxd;
+		mutex_init(&wq->wq_lock);
+		atomic_set(&wq->dq_count, 0);
+		init_waitqueue_head(&wq->submit_waitq);
+		rc = percpu_init_rwsem(&wq->submit_lock);
+		if (rc < 0) {
+			idxd_wqs_free_lock(idxd);
+			return rc;
+		}
+	}
+
+	for (i = 0; i < idxd->max_engines; i++) {
+		idxd->engines[i].idxd = idxd;
+		idxd->engines[i].id = i;
+	}
+
+	return 0;
+}
+
+static void idxd_read_table_offsets(struct idxd_device *idxd)
+{
+	union offsets_reg offsets;
+	struct device *dev = &idxd->pdev->dev;
+
+	offsets.bits[0] = ioread64(idxd->reg_base + IDXD_TABLE_OFFSET);
+	offsets.bits[1] = ioread64(idxd->reg_base + IDXD_TABLE_OFFSET
+			+ sizeof(u64));
+	idxd->grpcfg_offset = offsets.grpcfg * 0x100;
+	dev_dbg(dev, "IDXD Group Config Offset: %#x\n", idxd->grpcfg_offset);
+	idxd->wqcfg_offset = offsets.wqcfg * 0x100;
+	dev_dbg(dev, "IDXD Work Queue Config Offset: %#x\n",
+		idxd->wqcfg_offset);
+	idxd->msix_perm_offset = offsets.msix_perm * 0x100;
+	dev_dbg(dev, "IDXD MSIX Permission Offset: %#x\n",
+		idxd->msix_perm_offset);
+	idxd->perfmon_offset = offsets.perfmon * 0x100;
+	dev_dbg(dev, "IDXD Perfmon Offset: %#x\n", idxd->perfmon_offset);
+}
+
+static void idxd_read_caps(struct idxd_device *idxd)
+{
+	struct device *dev = &idxd->pdev->dev;
+	int i;
+
+	/* reading generic capabilities */
+	idxd->hw.gen_cap.bits = ioread64(idxd->reg_base + IDXD_GENCAP_OFFSET);
+	dev_dbg(dev, "gen_cap: %#llx\n", idxd->hw.gen_cap.bits);
+	idxd->max_xfer_bytes = 1ULL << idxd->hw.gen_cap.max_xfer_shift;
+	dev_dbg(dev, "max xfer size: %llu bytes\n", idxd->max_xfer_bytes);
+	idxd->max_batch_size = 1U << idxd->hw.gen_cap.max_batch_shift;
+	dev_dbg(dev, "max batch size: %u\n", idxd->max_batch_size);
+	if (idxd->hw.gen_cap.config_en)
+		set_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags);
+
+	/* reading group capabilities */
+	idxd->hw.group_cap.bits =
+		ioread64(idxd->reg_base + IDXD_GRPCAP_OFFSET);
+	dev_dbg(dev, "group_cap: %#llx\n", idxd->hw.group_cap.bits);
+	idxd->max_groups = idxd->hw.group_cap.num_groups;
+	dev_dbg(dev, "max groups: %u\n", idxd->max_groups);
+	idxd->max_tokens = idxd->hw.group_cap.total_tokens;
+	dev_dbg(dev, "max tokens: %u\n", idxd->max_tokens);
+
+	/* read engine capabilities */
+	idxd->hw.engine_cap.bits =
+		ioread64(idxd->reg_base + IDXD_ENGCAP_OFFSET);
+	dev_dbg(dev, "engine_cap: %#llx\n", idxd->hw.engine_cap.bits);
+	idxd->max_engines = idxd->hw.engine_cap.num_engines;
+	dev_dbg(dev, "max engines: %u\n", idxd->max_engines);
+
+	/* read workqueue capabilities */
+	idxd->hw.wq_cap.bits = ioread64(idxd->reg_base + IDXD_WQCAP_OFFSET);
+	dev_dbg(dev, "wq_cap: %#llx\n", idxd->hw.wq_cap.bits);
+	idxd->max_wq_size = idxd->hw.wq_cap.total_wq_size;
+	dev_dbg(dev, "total workqueue size: %u\n", idxd->max_wq_size);
+	idxd->max_wqs = idxd->hw.wq_cap.num_wqs;
+	dev_dbg(dev, "max workqueues: %u\n", idxd->max_wqs);
+
+	/* reading operation capabilities */
+	for (i = 0; i < 4; i++) {
+		idxd->hw.opcap.bits[i] = ioread64(idxd->reg_base +
+				IDXD_OPCAP_OFFSET + i * sizeof(u64));
+		dev_dbg(dev, "opcap[%d]: %#llx\n", i, idxd->hw.opcap.bits[i]);
+	}
+}
+
+static struct idxd_device *idxd_alloc(struct pci_dev *pdev,
+				      void __iomem * const *iomap)
+{
+	struct device *dev = &pdev->dev;
+	struct idxd_device *idxd;
+
+	idxd = devm_kzalloc(dev, sizeof(struct idxd_device), GFP_KERNEL);
+	if (!idxd)
+		return NULL;
+
+	idxd->pdev = pdev;
+	idxd->reg_base = iomap[IDXD_MMIO_BAR];
+	spin_lock_init(&idxd->dev_lock);
+
+	return idxd;
+}
+
+static int idxd_probe(struct idxd_device *idxd)
+{
+	struct pci_dev *pdev = idxd->pdev;
+	struct device *dev = &pdev->dev;
+	int rc;
+
+	dev_dbg(dev, "%s entered and resetting device\n", __func__);
+	rc = idxd_device_reset(idxd);
+	if (rc < 0)
+		return rc;
+	dev_dbg(dev, "IDXD reset complete\n");
+
+	idxd_read_caps(idxd);
+	idxd_read_table_offsets(idxd);
+
+	rc = idxd_setup_internals(idxd);
+	if (rc)
+		goto err_setup;
+
+	rc = idxd_setup_interrupts(idxd);
+	if (rc)
+		goto err_setup;
+
+	dev_dbg(dev, "IDXD interrupt setup complete.\n");
+
+	mutex_lock(&idxd_idr_lock);
+	idxd->id = idr_alloc(&idxd_idrs[idxd->type], idxd, 0, 0, GFP_KERNEL);
+	mutex_unlock(&idxd_idr_lock);
+	if (idxd->id < 0) {
+		rc = -ENOMEM;
+		goto err_idr_fail;
+	}
+
+	dev_dbg(dev, "IDXD device %d probed successfully\n", idxd->id);
+	return 0;
+
+ err_idr_fail:
+	idxd_mask_error_interrupts(idxd);
+	idxd_mask_msix_vectors(idxd);
+ err_setup:
+	return rc;
+}
+
+static int idxd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+	void __iomem * const *iomap;
+	struct device *dev = &pdev->dev;
+	struct idxd_device *idxd;
+	int rc;
+	unsigned int mask;
+
+	rc = pcim_enable_device(pdev);
+	if (rc)
+		return rc;
+
+	dev_dbg(dev, "Mapping BARs\n");
+	mask = (1 << IDXD_MMIO_BAR);
+	rc = pcim_iomap_regions(pdev, mask, DRV_NAME);
+	if (rc)
+		return rc;
+
+	iomap = pcim_iomap_table(pdev);
+	if (!iomap)
+		return -ENOMEM;
+
+	dev_dbg(dev, "Set DMA masks\n");
+	rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
+	if (rc)
+		rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+	if (rc)
+		return rc;
+
+	rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
+	if (rc)
+		rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
+	if (rc)
+		return rc;
+
+	dev_dbg(dev, "Alloc IDXD context\n");
+	idxd = idxd_alloc(pdev, iomap);
+	if (!idxd)
+		return -ENOMEM;
+
+	idxd_set_type(idxd);
+
+	dev_dbg(dev, "Set PCI master\n");
+	pci_set_master(pdev);
+	pci_set_drvdata(pdev, idxd);
+
+	idxd->hw.version = ioread32(idxd->reg_base + IDXD_VER_OFFSET);
+	rc = idxd_probe(idxd);
+	if (rc) {
+		dev_err(dev, "Intel(R) IDXD DMA Engine init failed\n");
+		return -ENODEV;
+	}
+
+	dev_info(&pdev->dev, "Intel(R) Accelerator Device (v%x)\n",
+		 idxd->hw.version);
+
+	return 0;
+}
+
+static void idxd_shutdown(struct pci_dev *pdev)
+{
+	struct idxd_device *idxd = pci_get_drvdata(pdev);
+	int rc, i;
+	struct idxd_irq_entry *irq_entry;
+	int msixcnt = pci_msix_vec_count(pdev);
+	unsigned long flags;
+
+	spin_lock_irqsave(&idxd->dev_lock, flags);
+	rc = idxd_device_disable(idxd);
+	spin_unlock_irqrestore(&idxd->dev_lock, flags);
+	if (rc)
+		dev_err(&pdev->dev, "Disabling device failed\n");
+
+	dev_dbg(&pdev->dev, "%s called\n", __func__);
+	idxd_mask_msix_vectors(idxd);
+	idxd_mask_error_interrupts(idxd);
+
+	for (i = 0; i < msixcnt; i++) {
+		irq_entry = &idxd->irq_entries[i];
+		synchronize_irq(idxd->msix_entries[i].vector);
+		if (i == 0)
+			continue;
+	}
+}
+
+static void idxd_remove(struct pci_dev *pdev)
+{
+	struct idxd_device *idxd = pci_get_drvdata(pdev);
+
+	dev_dbg(&pdev->dev, "%s called\n", __func__);
+	idxd_shutdown(pdev);
+	idxd_wqs_free_lock(idxd);
+	mutex_lock(&idxd_idr_lock);
+	idr_remove(&idxd_idrs[idxd->type], idxd->id);
+	mutex_unlock(&idxd_idr_lock);
+}
+
+static struct pci_driver idxd_pci_driver = {
+	.name		= DRV_NAME,
+	.id_table	= idxd_pci_tbl,
+	.probe		= idxd_pci_probe,
+	.remove		= idxd_remove,
+	.shutdown	= idxd_shutdown,
+};
+
+static int __init idxd_init_module(void)
+{
+	int err, i;
+
+	/*
+	 * If the CPU does not support write512, there's no point in
+	 * enumerating the device. We can not utilize it.
+	 */
+	if (!boot_cpu_has(X86_FEATURE_MOVDIR64B)) {
+		pr_warn("idxd driver failed to load without MOVDIR64B.\n");
+		return -ENODEV;
+	}
+
+	pr_info("%s: Intel(R) Accelerator Devices Driver %s\n",
+		DRV_NAME, IDXD_DRIVER_VERSION);
+
+	mutex_init(&idxd_idr_lock);
+	for (i = 0; i < IDXD_TYPE_MAX; i++)
+		idr_init(&idxd_idrs[i]);
+
+	err = pci_register_driver(&idxd_pci_driver);
+	if (err)
+		return err;
+
+	return 0;
+}
+module_init(idxd_init_module);
+
+static void __exit idxd_exit_module(void)
+{
+	pci_unregister_driver(&idxd_pci_driver);
+}
+module_exit(idxd_exit_module);
diff --git a/drivers/dma/idxd/irq.c b/drivers/dma/idxd/irq.c
new file mode 100644
index 000000000000..de4b80973c2f
--- /dev/null
+++ b/drivers/dma/idxd/irq.c
@@ -0,0 +1,156 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2019 Intel Corporation. All rights rsvd. */
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/io-64-nonatomic-lo-hi.h>
+#include <uapi/linux/idxd.h>
+#include "idxd.h"
+#include "registers.h"
+
+void idxd_device_wqs_clear_state(struct idxd_device *idxd)
+{
+	int i;
+
+	lockdep_assert_held(&idxd->dev_lock);
+	for (i = 0; i < idxd->max_wqs; i++) {
+		struct idxd_wq *wq = &idxd->wqs[i];
+
+		wq->state = IDXD_WQ_DISABLED;
+	}
+}
+
+static int idxd_restart(struct idxd_device *idxd)
+{
+	int i, rc;
+
+	lockdep_assert_held(&idxd->dev_lock);
+
+	rc = __idxd_device_reset(idxd);
+	if (rc < 0)
+		goto out;
+
+	rc = idxd_device_config(idxd);
+	if (rc < 0)
+		goto out;
+
+	rc = idxd_device_enable(idxd);
+	if (rc < 0)
+		goto out;
+
+	for (i = 0; i < idxd->max_wqs; i++) {
+		struct idxd_wq *wq = &idxd->wqs[i];
+
+		if (wq->state == IDXD_WQ_ENABLED) {
+			rc = idxd_wq_enable(wq);
+			if (rc < 0) {
+				dev_warn(&idxd->pdev->dev,
+					 "Unable to re-enable wq %s\n",
+					 dev_name(&wq->conf_dev));
+			}
+		}
+	}
+
+	return 0;
+
+ out:
+	idxd_device_wqs_clear_state(idxd);
+	idxd->state = IDXD_DEV_HALTED;
+	return rc;
+}
+
+irqreturn_t idxd_irq_handler(int vec, void *data)
+{
+	struct idxd_irq_entry *irq_entry = data;
+	struct idxd_device *idxd = irq_entry->idxd;
+
+	idxd_mask_msix_vector(idxd, irq_entry->id);
+	return IRQ_WAKE_THREAD;
+}
+
+irqreturn_t idxd_misc_thread(int vec, void *data)
+{
+	struct idxd_irq_entry *irq_entry = data;
+	struct idxd_device *idxd = irq_entry->idxd;
+	struct device *dev = &idxd->pdev->dev;
+	union gensts_reg gensts;
+	u32 cause, val = 0;
+	int i, rc;
+	bool err = false;
+
+	cause = ioread32(idxd->reg_base + IDXD_INTCAUSE_OFFSET);
+
+	if (cause & IDXD_INTC_ERR) {
+		spin_lock_bh(&idxd->dev_lock);
+		for (i = 0; i < 4; i++)
+			idxd->sw_err.bits[i] = ioread64(idxd->reg_base +
+					IDXD_SWERR_OFFSET + i * sizeof(u64));
+		iowrite64(IDXD_SWERR_ACK, idxd->reg_base + IDXD_SWERR_OFFSET);
+		spin_unlock_bh(&idxd->dev_lock);
+		val |= IDXD_INTC_ERR;
+
+		for (i = 0; i < 4; i++)
+			dev_warn(dev, "err[%d]: %#16.16llx\n",
+				 i, idxd->sw_err.bits[i]);
+		err = true;
+	}
+
+	if (cause & IDXD_INTC_CMD) {
+		/* Driver does use command interrupts */
+		val |= IDXD_INTC_CMD;
+	}
+
+	if (cause & IDXD_INTC_OCCUPY) {
+		/* Driver does not utilize occupancy interrupt */
+		val |= IDXD_INTC_OCCUPY;
+	}
+
+	if (cause & IDXD_INTC_PERFMON_OVFL) {
+		/*
+		 * Driver does not utilize perfmon counter overflow interrupt
+		 * yet.
+		 */
+		val |= IDXD_INTC_PERFMON_OVFL;
+	}
+
+	val ^= cause;
+	if (val)
+		dev_warn_once(dev, "Unexpected interrupt cause bits set: %#x\n",
+			      val);
+
+	iowrite32(cause, idxd->reg_base + IDXD_INTCAUSE_OFFSET);
+	if (!err)
+		return IRQ_HANDLED;
+
+	gensts.bits = ioread32(idxd->reg_base + IDXD_GENSTATS_OFFSET);
+	if (gensts.state == IDXD_DEVICE_STATE_HALT) {
+		spin_lock_bh(&idxd->dev_lock);
+		if (gensts.reset_type == IDXD_DEVICE_RESET_SOFTWARE) {
+			rc = idxd_restart(idxd);
+			if (rc < 0)
+				dev_err(&idxd->pdev->dev,
+					"idxd restart failed, device halt.");
+		} else {
+			idxd_device_wqs_clear_state(idxd);
+			idxd->state = IDXD_DEV_HALTED;
+			dev_err(&idxd->pdev->dev,
+				"idxd halted, need %s.\n",
+				gensts.reset_type == IDXD_DEVICE_RESET_FLR ?
+				"FLR" : "system reset");
+		}
+		spin_unlock_bh(&idxd->dev_lock);
+	}
+
+	idxd_unmask_msix_vector(idxd, irq_entry->id);
+	return IRQ_HANDLED;
+}
+
+irqreturn_t idxd_wq_thread(int irq, void *data)
+{
+	struct idxd_irq_entry *irq_entry = data;
+
+	idxd_unmask_msix_vector(irq_entry->idxd, irq_entry->id);
+
+	return IRQ_HANDLED;
+}
diff --git a/drivers/dma/idxd/registers.h b/drivers/dma/idxd/registers.h
new file mode 100644
index 000000000000..146e51f1d872
--- /dev/null
+++ b/drivers/dma/idxd/registers.h
@@ -0,0 +1,335 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2019 Intel Corporation. All rights rsvd. */
+#ifndef _IDXD_REGISTERS_H_
+#define _IDXD_REGISTERS_H_
+
+/* PCI Config */
+#define PCI_DEVICE_ID_INTEL_DSA_SPR0	0x0b25
+
+#define IDXD_MMIO_BAR		0
+#define IDXD_WQ_BAR		2
+
+/* MMIO Device BAR0 Registers */
+#define IDXD_VER_OFFSET			0x00
+#define IDXD_VER_MAJOR_MASK		0xf0
+#define IDXD_VER_MINOR_MASK		0x0f
+#define GET_IDXD_VER_MAJOR(x)		(((x) & IDXD_VER_MAJOR_MASK) >> 4)
+#define GET_IDXD_VER_MINOR(x)		((x) & IDXD_VER_MINOR_MASK)
+
+union gen_cap_reg {
+	struct {
+		u64 block_on_fault:1;
+		u64 overlap_copy:1;
+		u64 cache_control_mem:1;
+		u64 cache_control_cache:1;
+		u64 rsvd:3;
+		u64 int_handle_req:1;
+		u64 dest_readback:1;
+		u64 drain_readback:1;
+		u64 rsvd2:6;
+		u64 max_xfer_shift:5;
+		u64 max_batch_shift:4;
+		u64 max_ims_mult:6;
+		u64 config_en:1;
+		u64 max_descs_per_engine:8;
+		u64 rsvd3:24;
+	};
+	u64 bits;
+} __packed;
+#define IDXD_GENCAP_OFFSET		0x10
+
+union wq_cap_reg {
+	struct {
+		u64 total_wq_size:16;
+		u64 num_wqs:8;
+		u64 rsvd:24;
+		u64 shared_mode:1;
+		u64 dedicated_mode:1;
+		u64 rsvd2:1;
+		u64 priority:1;
+		u64 occupancy:1;
+		u64 occupancy_int:1;
+		u64 rsvd3:10;
+	};
+	u64 bits;
+} __packed;
+#define IDXD_WQCAP_OFFSET		0x20
+
+union group_cap_reg {
+	struct {
+		u64 num_groups:8;
+		u64 total_tokens:8;
+		u64 token_en:1;
+		u64 token_limit:1;
+		u64 rsvd:46;
+	};
+	u64 bits;
+} __packed;
+#define IDXD_GRPCAP_OFFSET		0x30
+
+union engine_cap_reg {
+	struct {
+		u64 num_engines:8;
+		u64 rsvd:56;
+	};
+	u64 bits;
+} __packed;
+
+#define IDXD_ENGCAP_OFFSET		0x38
+
+#define IDXD_OPCAP_NOOP			0x0001
+#define IDXD_OPCAP_BATCH			0x0002
+#define IDXD_OPCAP_MEMMOVE		0x0008
+struct opcap {
+	u64 bits[4];
+};
+
+#define IDXD_OPCAP_OFFSET		0x40
+
+#define IDXD_TABLE_OFFSET		0x60
+union offsets_reg {
+	struct {
+		u64 grpcfg:16;
+		u64 wqcfg:16;
+		u64 msix_perm:16;
+		u64 ims:16;
+		u64 perfmon:16;
+		u64 rsvd:48;
+	};
+	u64 bits[2];
+} __packed;
+
+#define IDXD_GENCFG_OFFSET		0x80
+union gencfg_reg {
+	struct {
+		u32 token_limit:8;
+		u32 rsvd:4;
+		u32 user_int_en:1;
+		u32 rsvd2:19;
+	};
+	u32 bits;
+} __packed;
+
+#define IDXD_GENCTRL_OFFSET		0x88
+union genctrl_reg {
+	struct {
+		u32 softerr_int_en:1;
+		u32 rsvd:31;
+	};
+	u32 bits;
+} __packed;
+
+#define IDXD_GENSTATS_OFFSET		0x90
+union gensts_reg {
+	struct {
+		u32 state:2;
+		u32 reset_type:2;
+		u32 rsvd:28;
+	};
+	u32 bits;
+} __packed;
+
+enum idxd_device_status_state {
+	IDXD_DEVICE_STATE_DISABLED = 0,
+	IDXD_DEVICE_STATE_ENABLED,
+	IDXD_DEVICE_STATE_DRAIN,
+	IDXD_DEVICE_STATE_HALT,
+};
+
+enum idxd_device_reset_type {
+	IDXD_DEVICE_RESET_SOFTWARE = 0,
+	IDXD_DEVICE_RESET_FLR,
+	IDXD_DEVICE_RESET_WARM,
+	IDXD_DEVICE_RESET_COLD,
+};
+
+#define IDXD_INTCAUSE_OFFSET		0x98
+#define IDXD_INTC_ERR			0x01
+#define IDXD_INTC_CMD			0x02
+#define IDXD_INTC_OCCUPY			0x04
+#define IDXD_INTC_PERFMON_OVFL		0x08
+
+#define IDXD_CMD_OFFSET			0xa0
+union idxd_command_reg {
+	struct {
+		u32 operand:20;
+		u32 cmd:5;
+		u32 rsvd:6;
+		u32 int_req:1;
+	};
+	u32 bits;
+} __packed;
+
+enum idxd_cmd {
+	IDXD_CMD_ENABLE_DEVICE = 1,
+	IDXD_CMD_DISABLE_DEVICE,
+	IDXD_CMD_DRAIN_ALL,
+	IDXD_CMD_ABORT_ALL,
+	IDXD_CMD_RESET_DEVICE,
+	IDXD_CMD_ENABLE_WQ,
+	IDXD_CMD_DISABLE_WQ,
+	IDXD_CMD_DRAIN_WQ,
+	IDXD_CMD_ABORT_WQ,
+	IDXD_CMD_RESET_WQ,
+	IDXD_CMD_DRAIN_PASID,
+	IDXD_CMD_ABORT_PASID,
+	IDXD_CMD_REQUEST_INT_HANDLE,
+};
+
+#define IDXD_CMDSTS_OFFSET		0xa8
+union cmdsts_reg {
+	struct {
+		u8 err;
+		u16 result;
+		u8 rsvd:7;
+		u8 active:1;
+	};
+	u32 bits;
+} __packed;
+#define IDXD_CMDSTS_ACTIVE		0x80000000
+
+enum idxd_cmdsts_err {
+	IDXD_CMDSTS_SUCCESS = 0,
+	IDXD_CMDSTS_INVAL_CMD,
+	IDXD_CMDSTS_INVAL_WQIDX,
+	IDXD_CMDSTS_HW_ERR,
+	/* enable device errors */
+	IDXD_CMDSTS_ERR_DEV_ENABLED = 0x10,
+	IDXD_CMDSTS_ERR_CONFIG,
+	IDXD_CMDSTS_ERR_BUSMASTER_EN,
+	IDXD_CMDSTS_ERR_PASID_INVAL,
+	IDXD_CMDSTS_ERR_WQ_SIZE_ERANGE,
+	IDXD_CMDSTS_ERR_GRP_CONFIG,
+	IDXD_CMDSTS_ERR_GRP_CONFIG2,
+	IDXD_CMDSTS_ERR_GRP_CONFIG3,
+	IDXD_CMDSTS_ERR_GRP_CONFIG4,
+	/* enable wq errors */
+	IDXD_CMDSTS_ERR_DEV_NOTEN = 0x20,
+	IDXD_CMDSTS_ERR_WQ_ENABLED,
+	IDXD_CMDSTS_ERR_WQ_SIZE,
+	IDXD_CMDSTS_ERR_WQ_PRIOR,
+	IDXD_CMDSTS_ERR_WQ_MODE,
+	IDXD_CMDSTS_ERR_BOF_EN,
+	IDXD_CMDSTS_ERR_PASID_EN,
+	IDXD_CMDSTS_ERR_MAX_BATCH_SIZE,
+	IDXD_CMDSTS_ERR_MAX_XFER_SIZE,
+	/* disable device errors */
+	IDXD_CMDSTS_ERR_DIS_DEV_EN = 0x31,
+	/* disable WQ, drain WQ, abort WQ, reset WQ */
+	IDXD_CMDSTS_ERR_DEV_NOT_EN,
+	/* request interrupt handle */
+	IDXD_CMDSTS_ERR_INVAL_INT_IDX = 0x41,
+	IDXD_CMDSTS_ERR_NO_HANDLE,
+};
+
+#define IDXD_SWERR_OFFSET		0xc0
+#define IDXD_SWERR_VALID			0x00000001
+#define IDXD_SWERR_OVERFLOW		0x00000002
+#define IDXD_SWERR_ACK			(IDXD_SWERR_VALID | IDXD_SWERR_OVERFLOW)
+union sw_err_reg {
+	struct {
+		u64 valid:1;
+		u64 overflow:1;
+		u64 desc_valid:1;
+		u64 wq_idx_valid:1;
+		u64 batch:1;
+		u64 fault_rw:1;
+		u64 priv:1;
+		u64 rsvd:1;
+		u64 error:8;
+		u64 wq_idx:8;
+		u64 rsvd2:8;
+		u64 operation:8;
+		u64 pasid:20;
+		u64 rsvd3:4;
+
+		u64 batch_idx:16;
+		u64 rsvd4:16;
+		u64 invalid_flags:32;
+
+		u64 fault_addr;
+
+		u64 rsvd5;
+	};
+	u64 bits[4];
+} __packed;
+
+union msix_perm {
+	struct {
+		u32 rsvd:2;
+		u32 ignore:1;
+		u32 pasid_en:1;
+		u32 rsvd2:8;
+		u32 pasid:20;
+	};
+	u32 bits;
+} __packed;
+
+union group_flags {
+	struct {
+		u32 tc_a:3;
+		u32 tc_b:3;
+		u32 rsvd:1;
+		u32 use_token_limit:1;
+		u32 tokens_reserved:8;
+		u32 rsvd2:4;
+		u32 tokens_allowed:8;
+		u32 rsvd3:4;
+	};
+	u32 bits;
+} __packed;
+
+struct grpcfg {
+	u64 wqs[4];
+	u64 engines;
+	union group_flags flags;
+} __packed;
+
+union wqcfg {
+	struct {
+		/* bytes 0-3 */
+		u16 wq_size;
+		u16 rsvd;
+
+		/* bytes 4-7 */
+		u16 wq_thresh;
+		u16 rsvd1;
+
+		/* bytes 8-11 */
+		u32 mode:1;	/* shared or dedicated */
+		u32 bof:1;	/* block on fault */
+		u32 rsvd2:2;
+		u32 priority:4;
+		u32 pasid:20;
+		u32 pasid_en:1;
+		u32 priv:1;
+		u32 rsvd3:2;
+
+		/* bytes 12-15 */
+		u32 max_xfer_shift:5;
+		u32 max_batch_shift:4;
+		u32 rsvd4:23;
+
+		/* bytes 16-19 */
+		u16 occupancy_inth;
+		u16 occupancy_table_sel:1;
+		u16 rsvd5:15;
+
+		/* bytes 20-23 */
+		u16 occupancy_limit;
+		u16 occupancy_int_en:1;
+		u16 rsvd6:15;
+
+		/* bytes 24-27 */
+		u16 occupancy;
+		u16 occupancy_int:1;
+		u16 rsvd7:12;
+		u16 mode_support:1;
+		u16 wq_state:2;
+
+		/* bytes 28-31 */
+		u32 rsvd8;
+	};
+	u32 bits[8];
+} __packed;
+#endif
-- 
cgit 


From c52ca478233c172b2d322b5241d6279a8661cbba Mon Sep 17 00:00:00 2001
From: Dave Jiang <dave.jiang@intel.com>
Date: Tue, 21 Jan 2020 16:44:05 -0700
Subject: dmaengine: idxd: add configuration component of driver

The device is left unconfigured when the driver is loaded. Various
components are configured via the driver sysfs attributes. Once
configuration is done, the device can be enabled by writing the device name
to the bind attribute of the device driver sysfs. Disabling can be done
similarly. Also the individual work queues can also be enabled and disabled
through the bind/unbind attributes. A constructed hierarchy is created
through the struct device framework in order to provide appropriate
configuration points and device state and status. This hierarchy is
presented off the virtual DSA bus.

i.e. /sys/bus/dsa/...

Signed-off-by: Dave Jiang <dave.jiang@intel.com>
Link: https://lore.kernel.org/r/157965024585.73301.6431413676230150589.stgit@djiang5-desk3.ch.intel.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/idxd/Makefile    |    2 +-
 drivers/dma/idxd/device.c    |   31 +
 drivers/dma/idxd/idxd.h      |   24 +
 drivers/dma/idxd/init.c      |   27 +-
 drivers/dma/idxd/registers.h |    3 +-
 drivers/dma/idxd/sysfs.c     | 1452 ++++++++++++++++++++++++++++++++++++++++++
 6 files changed, 1536 insertions(+), 3 deletions(-)
 create mode 100644 drivers/dma/idxd/sysfs.c

(limited to 'drivers/dma')

diff --git a/drivers/dma/idxd/Makefile b/drivers/dma/idxd/Makefile
index 0dd1ca77513f..a552560a03dc 100644
--- a/drivers/dma/idxd/Makefile
+++ b/drivers/dma/idxd/Makefile
@@ -1,2 +1,2 @@
 obj-$(CONFIG_INTEL_IDXD) += idxd.o
-idxd-y := init.o irq.o device.o
+idxd-y := init.o irq.o device.o sysfs.o
diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c
index af2bdc18df3d..d626780caa53 100644
--- a/drivers/dma/idxd/device.c
+++ b/drivers/dma/idxd/device.c
@@ -155,6 +155,9 @@ int idxd_wq_alloc_resources(struct idxd_wq *wq)
 	struct device *dev = &idxd->pdev->dev;
 	int rc, num_descs, i;
 
+	if (wq->type != IDXD_WQT_KERNEL)
+		return 0;
+
 	num_descs = wq->size +
 		idxd->hw.gen_cap.max_descs_per_engine * group->num_engines;
 	wq->num_descs = num_descs;
@@ -206,6 +209,9 @@ void idxd_wq_free_resources(struct idxd_wq *wq)
 {
 	struct device *dev = &wq->idxd->pdev->dev;
 
+	if (wq->type != IDXD_WQT_KERNEL)
+		return;
+
 	free_hw_descs(wq);
 	free_descs(wq);
 	dma_free_coherent(dev, wq->compls_size, wq->compls, wq->compls_addr);
@@ -277,6 +283,31 @@ int idxd_wq_disable(struct idxd_wq *wq)
 	return 0;
 }
 
+int idxd_wq_map_portal(struct idxd_wq *wq)
+{
+	struct idxd_device *idxd = wq->idxd;
+	struct pci_dev *pdev = idxd->pdev;
+	struct device *dev = &pdev->dev;
+	resource_size_t start;
+
+	start = pci_resource_start(pdev, IDXD_WQ_BAR);
+	start = start + wq->id * IDXD_PORTAL_SIZE;
+
+	wq->dportal = devm_ioremap(dev, start, IDXD_PORTAL_SIZE);
+	if (!wq->dportal)
+		return -ENOMEM;
+	dev_dbg(dev, "wq %d portal mapped at %p\n", wq->id, wq->dportal);
+
+	return 0;
+}
+
+void idxd_wq_unmap_portal(struct idxd_wq *wq)
+{
+	struct device *dev = &wq->idxd->pdev->dev;
+
+	devm_iounmap(dev, wq->dportal);
+}
+
 /* Device control bits */
 static inline bool idxd_is_enabled(struct idxd_device *idxd)
 {
diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h
index 733484922365..909926aefd3e 100644
--- a/drivers/dma/idxd/idxd.h
+++ b/drivers/dma/idxd/idxd.h
@@ -157,6 +157,7 @@ struct idxd_device {
 	int max_wqs;
 	int max_wq_size;
 	int token_limit;
+	int nr_tokens;		/* non-reserved tokens */
 
 	union sw_err_reg sw_err;
 
@@ -195,7 +196,28 @@ static inline void idxd_set_type(struct idxd_device *idxd)
 		idxd->type = IDXD_TYPE_UNKNOWN;
 }
 
+static inline void idxd_wq_get(struct idxd_wq *wq)
+{
+	wq->client_count++;
+}
+
+static inline void idxd_wq_put(struct idxd_wq *wq)
+{
+	wq->client_count--;
+}
+
+static inline int idxd_wq_refcount(struct idxd_wq *wq)
+{
+	return wq->client_count;
+};
+
 const char *idxd_get_dev_name(struct idxd_device *idxd);
+int idxd_register_bus_type(void);
+void idxd_unregister_bus_type(void);
+int idxd_setup_sysfs(struct idxd_device *idxd);
+void idxd_cleanup_sysfs(struct idxd_device *idxd);
+int idxd_register_driver(void);
+void idxd_unregister_driver(void);
 
 /* device interrupt control */
 irqreturn_t idxd_irq_handler(int vec, void *data);
@@ -221,5 +243,7 @@ int idxd_wq_alloc_resources(struct idxd_wq *wq);
 void idxd_wq_free_resources(struct idxd_wq *wq);
 int idxd_wq_enable(struct idxd_wq *wq);
 int idxd_wq_disable(struct idxd_wq *wq);
+int idxd_wq_map_portal(struct idxd_wq *wq);
+void idxd_wq_unmap_portal(struct idxd_wq *wq);
 
 #endif
diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c
index 6e89a87d62b0..229386464923 100644
--- a/drivers/dma/idxd/init.c
+++ b/drivers/dma/idxd/init.c
@@ -244,6 +244,7 @@ static void idxd_read_caps(struct idxd_device *idxd)
 	dev_dbg(dev, "max groups: %u\n", idxd->max_groups);
 	idxd->max_tokens = idxd->hw.group_cap.total_tokens;
 	dev_dbg(dev, "max tokens: %u\n", idxd->max_tokens);
+	idxd->nr_tokens = idxd->max_tokens;
 
 	/* read engine capabilities */
 	idxd->hw.engine_cap.bits =
@@ -381,6 +382,14 @@ static int idxd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 		return -ENODEV;
 	}
 
+	rc = idxd_setup_sysfs(idxd);
+	if (rc) {
+		dev_err(dev, "IDXD sysfs setup failed\n");
+		return -ENODEV;
+	}
+
+	idxd->state = IDXD_DEV_CONF_READY;
+
 	dev_info(&pdev->dev, "Intel(R) Accelerator Device (v%x)\n",
 		 idxd->hw.version);
 
@@ -418,6 +427,7 @@ static void idxd_remove(struct pci_dev *pdev)
 	struct idxd_device *idxd = pci_get_drvdata(pdev);
 
 	dev_dbg(&pdev->dev, "%s called\n", __func__);
+	idxd_cleanup_sysfs(idxd);
 	idxd_shutdown(pdev);
 	idxd_wqs_free_lock(idxd);
 	mutex_lock(&idxd_idr_lock);
@@ -453,16 +463,31 @@ static int __init idxd_init_module(void)
 	for (i = 0; i < IDXD_TYPE_MAX; i++)
 		idr_init(&idxd_idrs[i]);
 
+	err = idxd_register_bus_type();
+	if (err < 0)
+		return err;
+
+	err = idxd_register_driver();
+	if (err < 0)
+		goto err_idxd_driver_register;
+
 	err = pci_register_driver(&idxd_pci_driver);
 	if (err)
-		return err;
+		goto err_pci_register;
 
 	return 0;
+
+err_pci_register:
+	idxd_unregister_driver();
+err_idxd_driver_register:
+	idxd_unregister_bus_type();
+	return err;
 }
 module_init(idxd_init_module);
 
 static void __exit idxd_exit_module(void)
 {
 	pci_unregister_driver(&idxd_pci_driver);
+	idxd_unregister_bus_type();
 }
 module_exit(idxd_exit_module);
diff --git a/drivers/dma/idxd/registers.h b/drivers/dma/idxd/registers.h
index 146e51f1d872..a39e7ae6b3d9 100644
--- a/drivers/dma/idxd/registers.h
+++ b/drivers/dma/idxd/registers.h
@@ -8,6 +8,7 @@
 
 #define IDXD_MMIO_BAR		0
 #define IDXD_WQ_BAR		2
+#define IDXD_PORTAL_SIZE	0x4000
 
 /* MMIO Device BAR0 Registers */
 #define IDXD_VER_OFFSET			0x00
@@ -223,7 +224,7 @@ enum idxd_cmdsts_err {
 };
 
 #define IDXD_SWERR_OFFSET		0xc0
-#define IDXD_SWERR_VALID			0x00000001
+#define IDXD_SWERR_VALID		0x00000001
 #define IDXD_SWERR_OVERFLOW		0x00000002
 #define IDXD_SWERR_ACK			(IDXD_SWERR_VALID | IDXD_SWERR_OVERFLOW)
 union sw_err_reg {
diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c
new file mode 100644
index 000000000000..b6a0a59b500f
--- /dev/null
+++ b/drivers/dma/idxd/sysfs.c
@@ -0,0 +1,1452 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2019 Intel Corporation. All rights rsvd. */
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/device.h>
+#include <linux/io-64-nonatomic-lo-hi.h>
+#include <uapi/linux/idxd.h>
+#include "registers.h"
+#include "idxd.h"
+
+static char *idxd_wq_type_names[] = {
+	[IDXD_WQT_NONE]		= "none",
+	[IDXD_WQT_KERNEL]	= "kernel",
+};
+
+static void idxd_conf_device_release(struct device *dev)
+{
+	dev_dbg(dev, "%s for %s\n", __func__, dev_name(dev));
+}
+
+static struct device_type idxd_group_device_type = {
+	.name = "group",
+	.release = idxd_conf_device_release,
+};
+
+static struct device_type idxd_wq_device_type = {
+	.name = "wq",
+	.release = idxd_conf_device_release,
+};
+
+static struct device_type idxd_engine_device_type = {
+	.name = "engine",
+	.release = idxd_conf_device_release,
+};
+
+static struct device_type dsa_device_type = {
+	.name = "dsa",
+	.release = idxd_conf_device_release,
+};
+
+static inline bool is_dsa_dev(struct device *dev)
+{
+	return dev ? dev->type == &dsa_device_type : false;
+}
+
+static inline bool is_idxd_dev(struct device *dev)
+{
+	return is_dsa_dev(dev);
+}
+
+static inline bool is_idxd_wq_dev(struct device *dev)
+{
+	return dev ? dev->type == &idxd_wq_device_type : false;
+}
+
+static int idxd_config_bus_match(struct device *dev,
+				 struct device_driver *drv)
+{
+	int matched = 0;
+
+	if (is_idxd_dev(dev)) {
+		struct idxd_device *idxd = confdev_to_idxd(dev);
+
+		if (idxd->state != IDXD_DEV_CONF_READY)
+			return 0;
+		matched = 1;
+	} else if (is_idxd_wq_dev(dev)) {
+		struct idxd_wq *wq = confdev_to_wq(dev);
+		struct idxd_device *idxd = wq->idxd;
+
+		if (idxd->state < IDXD_DEV_CONF_READY)
+			return 0;
+
+		if (wq->state != IDXD_WQ_DISABLED) {
+			dev_dbg(dev, "%s not disabled\n", dev_name(dev));
+			return 0;
+		}
+		matched = 1;
+	}
+
+	if (matched)
+		dev_dbg(dev, "%s matched\n", dev_name(dev));
+
+	return matched;
+}
+
+static int idxd_config_bus_probe(struct device *dev)
+{
+	int rc;
+	unsigned long flags;
+
+	dev_dbg(dev, "%s called\n", __func__);
+
+	if (is_idxd_dev(dev)) {
+		struct idxd_device *idxd = confdev_to_idxd(dev);
+
+		if (idxd->state != IDXD_DEV_CONF_READY) {
+			dev_warn(dev, "Device not ready for config\n");
+			return -EBUSY;
+		}
+
+		spin_lock_irqsave(&idxd->dev_lock, flags);
+
+		/* Perform IDXD configuration and enabling */
+		rc = idxd_device_config(idxd);
+		if (rc < 0) {
+			spin_unlock_irqrestore(&idxd->dev_lock, flags);
+			dev_warn(dev, "Device config failed: %d\n", rc);
+			return rc;
+		}
+
+		/* start device */
+		rc = idxd_device_enable(idxd);
+		if (rc < 0) {
+			spin_unlock_irqrestore(&idxd->dev_lock, flags);
+			dev_warn(dev, "Device enable failed: %d\n", rc);
+			return rc;
+		}
+
+		spin_unlock_irqrestore(&idxd->dev_lock, flags);
+		dev_info(dev, "Device %s enabled\n", dev_name(dev));
+
+		return 0;
+	} else if (is_idxd_wq_dev(dev)) {
+		struct idxd_wq *wq = confdev_to_wq(dev);
+		struct idxd_device *idxd = wq->idxd;
+
+		mutex_lock(&wq->wq_lock);
+
+		if (idxd->state != IDXD_DEV_ENABLED) {
+			mutex_unlock(&wq->wq_lock);
+			dev_warn(dev, "Enabling while device not enabled.\n");
+			return -EPERM;
+		}
+
+		if (wq->state != IDXD_WQ_DISABLED) {
+			mutex_unlock(&wq->wq_lock);
+			dev_warn(dev, "WQ %d already enabled.\n", wq->id);
+			return -EBUSY;
+		}
+
+		if (!wq->group) {
+			mutex_unlock(&wq->wq_lock);
+			dev_warn(dev, "WQ not attached to group.\n");
+			return -EINVAL;
+		}
+
+		if (strlen(wq->name) == 0) {
+			mutex_unlock(&wq->wq_lock);
+			dev_warn(dev, "WQ name not set.\n");
+			return -EINVAL;
+		}
+
+		rc = idxd_wq_alloc_resources(wq);
+		if (rc < 0) {
+			mutex_unlock(&wq->wq_lock);
+			dev_warn(dev, "WQ resource alloc failed\n");
+			return rc;
+		}
+
+		spin_lock_irqsave(&idxd->dev_lock, flags);
+		rc = idxd_device_config(idxd);
+		if (rc < 0) {
+			spin_unlock_irqrestore(&idxd->dev_lock, flags);
+			mutex_unlock(&wq->wq_lock);
+			dev_warn(dev, "Writing WQ %d config failed: %d\n",
+				 wq->id, rc);
+			return rc;
+		}
+
+		rc = idxd_wq_enable(wq);
+		if (rc < 0) {
+			spin_unlock_irqrestore(&idxd->dev_lock, flags);
+			mutex_unlock(&wq->wq_lock);
+			dev_warn(dev, "WQ %d enabling failed: %d\n",
+				 wq->id, rc);
+			return rc;
+		}
+		spin_unlock_irqrestore(&idxd->dev_lock, flags);
+
+		rc = idxd_wq_map_portal(wq);
+		if (rc < 0) {
+			dev_warn(dev, "wq portal mapping failed: %d\n", rc);
+			rc = idxd_wq_disable(wq);
+			if (rc < 0)
+				dev_warn(dev, "IDXD wq disable failed\n");
+			spin_unlock_irqrestore(&idxd->dev_lock, flags);
+			mutex_unlock(&wq->wq_lock);
+			return rc;
+		}
+
+		wq->client_count = 0;
+
+		dev_info(dev, "wq %s enabled\n", dev_name(&wq->conf_dev));
+		mutex_unlock(&wq->wq_lock);
+		return 0;
+	}
+
+	return -ENODEV;
+}
+
+static void disable_wq(struct idxd_wq *wq)
+{
+	struct idxd_device *idxd = wq->idxd;
+	struct device *dev = &idxd->pdev->dev;
+	unsigned long flags;
+	int rc;
+
+	mutex_lock(&wq->wq_lock);
+	dev_dbg(dev, "%s removing WQ %s\n", __func__, dev_name(&wq->conf_dev));
+	if (wq->state == IDXD_WQ_DISABLED) {
+		mutex_unlock(&wq->wq_lock);
+		return;
+	}
+
+	if (idxd_wq_refcount(wq))
+		dev_warn(dev, "Clients has claim on wq %d: %d\n",
+			 wq->id, idxd_wq_refcount(wq));
+
+	idxd_wq_unmap_portal(wq);
+
+	spin_lock_irqsave(&idxd->dev_lock, flags);
+	rc = idxd_wq_disable(wq);
+	spin_unlock_irqrestore(&idxd->dev_lock, flags);
+
+	idxd_wq_free_resources(wq);
+	wq->client_count = 0;
+	mutex_unlock(&wq->wq_lock);
+
+	if (rc < 0)
+		dev_warn(dev, "Failed to disable %s: %d\n",
+			 dev_name(&wq->conf_dev), rc);
+	else
+		dev_info(dev, "wq %s disabled\n", dev_name(&wq->conf_dev));
+}
+
+static int idxd_config_bus_remove(struct device *dev)
+{
+	int rc;
+	unsigned long flags;
+
+	dev_dbg(dev, "%s called for %s\n", __func__, dev_name(dev));
+
+	/* disable workqueue here */
+	if (is_idxd_wq_dev(dev)) {
+		struct idxd_wq *wq = confdev_to_wq(dev);
+
+		disable_wq(wq);
+	} else if (is_idxd_dev(dev)) {
+		struct idxd_device *idxd = confdev_to_idxd(dev);
+		int i;
+
+		dev_dbg(dev, "%s removing dev %s\n", __func__,
+			dev_name(&idxd->conf_dev));
+		for (i = 0; i < idxd->max_wqs; i++) {
+			struct idxd_wq *wq = &idxd->wqs[i];
+
+			if (wq->state == IDXD_WQ_DISABLED)
+				continue;
+			dev_warn(dev, "Active wq %d on disable %s.\n", i,
+				 dev_name(&idxd->conf_dev));
+			device_release_driver(&wq->conf_dev);
+		}
+
+		spin_lock_irqsave(&idxd->dev_lock, flags);
+		rc = idxd_device_disable(idxd);
+		spin_unlock_irqrestore(&idxd->dev_lock, flags);
+		if (rc < 0)
+			dev_warn(dev, "Device disable failed\n");
+		else
+			dev_info(dev, "Device %s disabled\n", dev_name(dev));
+	}
+
+	return 0;
+}
+
+static void idxd_config_bus_shutdown(struct device *dev)
+{
+	dev_dbg(dev, "%s called\n", __func__);
+}
+
+static struct bus_type dsa_bus_type = {
+	.name = "dsa",
+	.match = idxd_config_bus_match,
+	.probe = idxd_config_bus_probe,
+	.remove = idxd_config_bus_remove,
+	.shutdown = idxd_config_bus_shutdown,
+};
+
+static struct bus_type *idxd_bus_types[] = {
+	&dsa_bus_type
+};
+
+static struct idxd_device_driver dsa_drv = {
+	.drv = {
+		.name = "dsa",
+		.bus = &dsa_bus_type,
+		.owner = THIS_MODULE,
+		.mod_name = KBUILD_MODNAME,
+	},
+};
+
+static struct idxd_device_driver *idxd_drvs[] = {
+	&dsa_drv
+};
+
+static struct bus_type *idxd_get_bus_type(struct idxd_device *idxd)
+{
+	return idxd_bus_types[idxd->type];
+}
+
+static struct device_type *idxd_get_device_type(struct idxd_device *idxd)
+{
+	if (idxd->type == IDXD_TYPE_DSA)
+		return &dsa_device_type;
+	else
+		return NULL;
+}
+
+/* IDXD generic driver setup */
+int idxd_register_driver(void)
+{
+	int i, rc;
+
+	for (i = 0; i < IDXD_TYPE_MAX; i++) {
+		rc = driver_register(&idxd_drvs[i]->drv);
+		if (rc < 0)
+			goto drv_fail;
+	}
+
+	return 0;
+
+drv_fail:
+	for (; i > 0; i--)
+		driver_unregister(&idxd_drvs[i]->drv);
+	return rc;
+}
+
+void idxd_unregister_driver(void)
+{
+	int i;
+
+	for (i = 0; i < IDXD_TYPE_MAX; i++)
+		driver_unregister(&idxd_drvs[i]->drv);
+}
+
+/* IDXD engine attributes */
+static ssize_t engine_group_id_show(struct device *dev,
+				    struct device_attribute *attr, char *buf)
+{
+	struct idxd_engine *engine =
+		container_of(dev, struct idxd_engine, conf_dev);
+
+	if (engine->group)
+		return sprintf(buf, "%d\n", engine->group->id);
+	else
+		return sprintf(buf, "%d\n", -1);
+}
+
+static ssize_t engine_group_id_store(struct device *dev,
+				     struct device_attribute *attr,
+				     const char *buf, size_t count)
+{
+	struct idxd_engine *engine =
+		container_of(dev, struct idxd_engine, conf_dev);
+	struct idxd_device *idxd = engine->idxd;
+	long id;
+	int rc;
+	struct idxd_group *prevg, *group;
+
+	rc = kstrtol(buf, 10, &id);
+	if (rc < 0)
+		return -EINVAL;
+
+	if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags))
+		return -EPERM;
+
+	if (id > idxd->max_groups - 1 || id < -1)
+		return -EINVAL;
+
+	if (id == -1) {
+		if (engine->group) {
+			engine->group->num_engines--;
+			engine->group = NULL;
+		}
+		return count;
+	}
+
+	group = &idxd->groups[id];
+	prevg = engine->group;
+
+	if (prevg)
+		prevg->num_engines--;
+	engine->group = &idxd->groups[id];
+	engine->group->num_engines++;
+
+	return count;
+}
+
+static struct device_attribute dev_attr_engine_group =
+		__ATTR(group_id, 0644, engine_group_id_show,
+		       engine_group_id_store);
+
+static struct attribute *idxd_engine_attributes[] = {
+	&dev_attr_engine_group.attr,
+	NULL,
+};
+
+static const struct attribute_group idxd_engine_attribute_group = {
+	.attrs = idxd_engine_attributes,
+};
+
+static const struct attribute_group *idxd_engine_attribute_groups[] = {
+	&idxd_engine_attribute_group,
+	NULL,
+};
+
+/* Group attributes */
+
+static void idxd_set_free_tokens(struct idxd_device *idxd)
+{
+	int i, tokens;
+
+	for (i = 0, tokens = 0; i < idxd->max_groups; i++) {
+		struct idxd_group *g = &idxd->groups[i];
+
+		tokens += g->tokens_reserved;
+	}
+
+	idxd->nr_tokens = idxd->max_tokens - tokens;
+}
+
+static ssize_t group_tokens_reserved_show(struct device *dev,
+					  struct device_attribute *attr,
+					  char *buf)
+{
+	struct idxd_group *group =
+		container_of(dev, struct idxd_group, conf_dev);
+
+	return sprintf(buf, "%u\n", group->tokens_reserved);
+}
+
+static ssize_t group_tokens_reserved_store(struct device *dev,
+					   struct device_attribute *attr,
+					   const char *buf, size_t count)
+{
+	struct idxd_group *group =
+		container_of(dev, struct idxd_group, conf_dev);
+	struct idxd_device *idxd = group->idxd;
+	unsigned long val;
+	int rc;
+
+	rc = kstrtoul(buf, 10, &val);
+	if (rc < 0)
+		return -EINVAL;
+
+	if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags))
+		return -EPERM;
+
+	if (idxd->state == IDXD_DEV_ENABLED)
+		return -EPERM;
+
+	if (idxd->token_limit == 0)
+		return -EPERM;
+
+	if (val > idxd->max_tokens)
+		return -EINVAL;
+
+	if (val > idxd->nr_tokens)
+		return -EINVAL;
+
+	group->tokens_reserved = val;
+	idxd_set_free_tokens(idxd);
+	return count;
+}
+
+static struct device_attribute dev_attr_group_tokens_reserved =
+		__ATTR(tokens_reserved, 0644, group_tokens_reserved_show,
+		       group_tokens_reserved_store);
+
+static ssize_t group_tokens_allowed_show(struct device *dev,
+					 struct device_attribute *attr,
+					 char *buf)
+{
+	struct idxd_group *group =
+		container_of(dev, struct idxd_group, conf_dev);
+
+	return sprintf(buf, "%u\n", group->tokens_allowed);
+}
+
+static ssize_t group_tokens_allowed_store(struct device *dev,
+					  struct device_attribute *attr,
+					  const char *buf, size_t count)
+{
+	struct idxd_group *group =
+		container_of(dev, struct idxd_group, conf_dev);
+	struct idxd_device *idxd = group->idxd;
+	unsigned long val;
+	int rc;
+
+	rc = kstrtoul(buf, 10, &val);
+	if (rc < 0)
+		return -EINVAL;
+
+	if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags))
+		return -EPERM;
+
+	if (idxd->state == IDXD_DEV_ENABLED)
+		return -EPERM;
+
+	if (idxd->token_limit == 0)
+		return -EPERM;
+	if (val < 4 * group->num_engines ||
+	    val > group->tokens_reserved + idxd->nr_tokens)
+		return -EINVAL;
+
+	group->tokens_allowed = val;
+	return count;
+}
+
+static struct device_attribute dev_attr_group_tokens_allowed =
+		__ATTR(tokens_allowed, 0644, group_tokens_allowed_show,
+		       group_tokens_allowed_store);
+
+static ssize_t group_use_token_limit_show(struct device *dev,
+					  struct device_attribute *attr,
+					  char *buf)
+{
+	struct idxd_group *group =
+		container_of(dev, struct idxd_group, conf_dev);
+
+	return sprintf(buf, "%u\n", group->use_token_limit);
+}
+
+static ssize_t group_use_token_limit_store(struct device *dev,
+					   struct device_attribute *attr,
+					   const char *buf, size_t count)
+{
+	struct idxd_group *group =
+		container_of(dev, struct idxd_group, conf_dev);
+	struct idxd_device *idxd = group->idxd;
+	unsigned long val;
+	int rc;
+
+	rc = kstrtoul(buf, 10, &val);
+	if (rc < 0)
+		return -EINVAL;
+
+	if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags))
+		return -EPERM;
+
+	if (idxd->state == IDXD_DEV_ENABLED)
+		return -EPERM;
+
+	if (idxd->token_limit == 0)
+		return -EPERM;
+
+	group->use_token_limit = !!val;
+	return count;
+}
+
+static struct device_attribute dev_attr_group_use_token_limit =
+		__ATTR(use_token_limit, 0644, group_use_token_limit_show,
+		       group_use_token_limit_store);
+
+static ssize_t group_engines_show(struct device *dev,
+				  struct device_attribute *attr, char *buf)
+{
+	struct idxd_group *group =
+		container_of(dev, struct idxd_group, conf_dev);
+	int i, rc = 0;
+	char *tmp = buf;
+	struct idxd_device *idxd = group->idxd;
+
+	for (i = 0; i < idxd->max_engines; i++) {
+		struct idxd_engine *engine = &idxd->engines[i];
+
+		if (!engine->group)
+			continue;
+
+		if (engine->group->id == group->id)
+			rc += sprintf(tmp + rc, "engine%d.%d ",
+					idxd->id, engine->id);
+	}
+
+	rc--;
+	rc += sprintf(tmp + rc, "\n");
+
+	return rc;
+}
+
+static struct device_attribute dev_attr_group_engines =
+		__ATTR(engines, 0444, group_engines_show, NULL);
+
+static ssize_t group_work_queues_show(struct device *dev,
+				      struct device_attribute *attr, char *buf)
+{
+	struct idxd_group *group =
+		container_of(dev, struct idxd_group, conf_dev);
+	int i, rc = 0;
+	char *tmp = buf;
+	struct idxd_device *idxd = group->idxd;
+
+	for (i = 0; i < idxd->max_wqs; i++) {
+		struct idxd_wq *wq = &idxd->wqs[i];
+
+		if (!wq->group)
+			continue;
+
+		if (wq->group->id == group->id)
+			rc += sprintf(tmp + rc, "wq%d.%d ",
+					idxd->id, wq->id);
+	}
+
+	rc--;
+	rc += sprintf(tmp + rc, "\n");
+
+	return rc;
+}
+
+static struct device_attribute dev_attr_group_work_queues =
+		__ATTR(work_queues, 0444, group_work_queues_show, NULL);
+
+static ssize_t group_traffic_class_a_show(struct device *dev,
+					  struct device_attribute *attr,
+					  char *buf)
+{
+	struct idxd_group *group =
+		container_of(dev, struct idxd_group, conf_dev);
+
+	return sprintf(buf, "%d\n", group->tc_a);
+}
+
+static ssize_t group_traffic_class_a_store(struct device *dev,
+					   struct device_attribute *attr,
+					   const char *buf, size_t count)
+{
+	struct idxd_group *group =
+		container_of(dev, struct idxd_group, conf_dev);
+	struct idxd_device *idxd = group->idxd;
+	long val;
+	int rc;
+
+	rc = kstrtol(buf, 10, &val);
+	if (rc < 0)
+		return -EINVAL;
+
+	if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags))
+		return -EPERM;
+
+	if (idxd->state == IDXD_DEV_ENABLED)
+		return -EPERM;
+
+	if (val < 0 || val > 7)
+		return -EINVAL;
+
+	group->tc_a = val;
+	return count;
+}
+
+static struct device_attribute dev_attr_group_traffic_class_a =
+		__ATTR(traffic_class_a, 0644, group_traffic_class_a_show,
+		       group_traffic_class_a_store);
+
+static ssize_t group_traffic_class_b_show(struct device *dev,
+					  struct device_attribute *attr,
+					  char *buf)
+{
+	struct idxd_group *group =
+		container_of(dev, struct idxd_group, conf_dev);
+
+	return sprintf(buf, "%d\n", group->tc_b);
+}
+
+static ssize_t group_traffic_class_b_store(struct device *dev,
+					   struct device_attribute *attr,
+					   const char *buf, size_t count)
+{
+	struct idxd_group *group =
+		container_of(dev, struct idxd_group, conf_dev);
+	struct idxd_device *idxd = group->idxd;
+	long val;
+	int rc;
+
+	rc = kstrtol(buf, 10, &val);
+	if (rc < 0)
+		return -EINVAL;
+
+	if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags))
+		return -EPERM;
+
+	if (idxd->state == IDXD_DEV_ENABLED)
+		return -EPERM;
+
+	if (val < 0 || val > 7)
+		return -EINVAL;
+
+	group->tc_b = val;
+	return count;
+}
+
+static struct device_attribute dev_attr_group_traffic_class_b =
+		__ATTR(traffic_class_b, 0644, group_traffic_class_b_show,
+		       group_traffic_class_b_store);
+
+static struct attribute *idxd_group_attributes[] = {
+	&dev_attr_group_work_queues.attr,
+	&dev_attr_group_engines.attr,
+	&dev_attr_group_use_token_limit.attr,
+	&dev_attr_group_tokens_allowed.attr,
+	&dev_attr_group_tokens_reserved.attr,
+	&dev_attr_group_traffic_class_a.attr,
+	&dev_attr_group_traffic_class_b.attr,
+	NULL,
+};
+
+static const struct attribute_group idxd_group_attribute_group = {
+	.attrs = idxd_group_attributes,
+};
+
+static const struct attribute_group *idxd_group_attribute_groups[] = {
+	&idxd_group_attribute_group,
+	NULL,
+};
+
+/* IDXD work queue attribs */
+static ssize_t wq_clients_show(struct device *dev,
+			       struct device_attribute *attr, char *buf)
+{
+	struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev);
+
+	return sprintf(buf, "%d\n", wq->client_count);
+}
+
+static struct device_attribute dev_attr_wq_clients =
+		__ATTR(clients, 0444, wq_clients_show, NULL);
+
+static ssize_t wq_state_show(struct device *dev,
+			     struct device_attribute *attr, char *buf)
+{
+	struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev);
+
+	switch (wq->state) {
+	case IDXD_WQ_DISABLED:
+		return sprintf(buf, "disabled\n");
+	case IDXD_WQ_ENABLED:
+		return sprintf(buf, "enabled\n");
+	}
+
+	return sprintf(buf, "unknown\n");
+}
+
+static struct device_attribute dev_attr_wq_state =
+		__ATTR(state, 0444, wq_state_show, NULL);
+
+static ssize_t wq_group_id_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev);
+
+	if (wq->group)
+		return sprintf(buf, "%u\n", wq->group->id);
+	else
+		return sprintf(buf, "-1\n");
+}
+
+static ssize_t wq_group_id_store(struct device *dev,
+				 struct device_attribute *attr,
+				 const char *buf, size_t count)
+{
+	struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev);
+	struct idxd_device *idxd = wq->idxd;
+	long id;
+	int rc;
+	struct idxd_group *prevg, *group;
+
+	rc = kstrtol(buf, 10, &id);
+	if (rc < 0)
+		return -EINVAL;
+
+	if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags))
+		return -EPERM;
+
+	if (wq->state != IDXD_WQ_DISABLED)
+		return -EPERM;
+
+	if (id > idxd->max_groups - 1 || id < -1)
+		return -EINVAL;
+
+	if (id == -1) {
+		if (wq->group) {
+			wq->group->num_wqs--;
+			wq->group = NULL;
+		}
+		return count;
+	}
+
+	group = &idxd->groups[id];
+	prevg = wq->group;
+
+	if (prevg)
+		prevg->num_wqs--;
+	wq->group = group;
+	group->num_wqs++;
+	return count;
+}
+
+static struct device_attribute dev_attr_wq_group_id =
+		__ATTR(group_id, 0644, wq_group_id_show, wq_group_id_store);
+
+static ssize_t wq_mode_show(struct device *dev, struct device_attribute *attr,
+			    char *buf)
+{
+	struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev);
+
+	return sprintf(buf, "%s\n",
+			wq_dedicated(wq) ? "dedicated" : "shared");
+}
+
+static ssize_t wq_mode_store(struct device *dev,
+			     struct device_attribute *attr, const char *buf,
+			     size_t count)
+{
+	struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev);
+	struct idxd_device *idxd = wq->idxd;
+
+	if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags))
+		return -EPERM;
+
+	if (wq->state != IDXD_WQ_DISABLED)
+		return -EPERM;
+
+	if (sysfs_streq(buf, "dedicated")) {
+		set_bit(WQ_FLAG_DEDICATED, &wq->flags);
+		wq->threshold = 0;
+	} else {
+		return -EINVAL;
+	}
+
+	return count;
+}
+
+static struct device_attribute dev_attr_wq_mode =
+		__ATTR(mode, 0644, wq_mode_show, wq_mode_store);
+
+static ssize_t wq_size_show(struct device *dev, struct device_attribute *attr,
+			    char *buf)
+{
+	struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev);
+
+	return sprintf(buf, "%u\n", wq->size);
+}
+
+static ssize_t wq_size_store(struct device *dev,
+			     struct device_attribute *attr, const char *buf,
+			     size_t count)
+{
+	struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev);
+	unsigned long size;
+	struct idxd_device *idxd = wq->idxd;
+	int rc;
+
+	rc = kstrtoul(buf, 10, &size);
+	if (rc < 0)
+		return -EINVAL;
+
+	if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags))
+		return -EPERM;
+
+	if (wq->state != IDXD_WQ_DISABLED)
+		return -EPERM;
+
+	if (size > idxd->max_wq_size)
+		return -EINVAL;
+
+	wq->size = size;
+	return count;
+}
+
+static struct device_attribute dev_attr_wq_size =
+		__ATTR(size, 0644, wq_size_show, wq_size_store);
+
+static ssize_t wq_priority_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev);
+
+	return sprintf(buf, "%u\n", wq->priority);
+}
+
+static ssize_t wq_priority_store(struct device *dev,
+				 struct device_attribute *attr,
+				 const char *buf, size_t count)
+{
+	struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev);
+	unsigned long prio;
+	struct idxd_device *idxd = wq->idxd;
+	int rc;
+
+	rc = kstrtoul(buf, 10, &prio);
+	if (rc < 0)
+		return -EINVAL;
+
+	if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags))
+		return -EPERM;
+
+	if (wq->state != IDXD_WQ_DISABLED)
+		return -EPERM;
+
+	if (prio > IDXD_MAX_PRIORITY)
+		return -EINVAL;
+
+	wq->priority = prio;
+	return count;
+}
+
+static struct device_attribute dev_attr_wq_priority =
+		__ATTR(priority, 0644, wq_priority_show, wq_priority_store);
+
+static ssize_t wq_type_show(struct device *dev,
+			    struct device_attribute *attr, char *buf)
+{
+	struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev);
+
+	switch (wq->type) {
+	case IDXD_WQT_KERNEL:
+		return sprintf(buf, "%s\n",
+			       idxd_wq_type_names[IDXD_WQT_KERNEL]);
+	case IDXD_WQT_NONE:
+	default:
+		return sprintf(buf, "%s\n",
+			       idxd_wq_type_names[IDXD_WQT_NONE]);
+	}
+
+	return -EINVAL;
+}
+
+static ssize_t wq_type_store(struct device *dev,
+			     struct device_attribute *attr, const char *buf,
+			     size_t count)
+{
+	struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev);
+	enum idxd_wq_type old_type;
+
+	if (wq->state != IDXD_WQ_DISABLED)
+		return -EPERM;
+
+	old_type = wq->type;
+	if (sysfs_streq(buf, idxd_wq_type_names[IDXD_WQT_KERNEL]))
+		wq->type = IDXD_WQT_KERNEL;
+	else
+		wq->type = IDXD_WQT_NONE;
+
+	/* If we are changing queue type, clear the name */
+	if (wq->type != old_type)
+		memset(wq->name, 0, WQ_NAME_SIZE + 1);
+
+	return count;
+}
+
+static struct device_attribute dev_attr_wq_type =
+		__ATTR(type, 0644, wq_type_show, wq_type_store);
+
+static ssize_t wq_name_show(struct device *dev,
+			    struct device_attribute *attr, char *buf)
+{
+	struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev);
+
+	return sprintf(buf, "%s\n", wq->name);
+}
+
+static ssize_t wq_name_store(struct device *dev,
+			     struct device_attribute *attr, const char *buf,
+			     size_t count)
+{
+	struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev);
+
+	if (wq->state != IDXD_WQ_DISABLED)
+		return -EPERM;
+
+	if (strlen(buf) > WQ_NAME_SIZE || strlen(buf) == 0)
+		return -EINVAL;
+
+	memset(wq->name, 0, WQ_NAME_SIZE + 1);
+	strncpy(wq->name, buf, WQ_NAME_SIZE);
+	strreplace(wq->name, '\n', '\0');
+	return count;
+}
+
+static struct device_attribute dev_attr_wq_name =
+		__ATTR(name, 0644, wq_name_show, wq_name_store);
+
+static struct attribute *idxd_wq_attributes[] = {
+	&dev_attr_wq_clients.attr,
+	&dev_attr_wq_state.attr,
+	&dev_attr_wq_group_id.attr,
+	&dev_attr_wq_mode.attr,
+	&dev_attr_wq_size.attr,
+	&dev_attr_wq_priority.attr,
+	&dev_attr_wq_type.attr,
+	&dev_attr_wq_name.attr,
+	NULL,
+};
+
+static const struct attribute_group idxd_wq_attribute_group = {
+	.attrs = idxd_wq_attributes,
+};
+
+static const struct attribute_group *idxd_wq_attribute_groups[] = {
+	&idxd_wq_attribute_group,
+	NULL,
+};
+
+/* IDXD device attribs */
+static ssize_t max_work_queues_size_show(struct device *dev,
+					 struct device_attribute *attr,
+					 char *buf)
+{
+	struct idxd_device *idxd =
+		container_of(dev, struct idxd_device, conf_dev);
+
+	return sprintf(buf, "%u\n", idxd->max_wq_size);
+}
+static DEVICE_ATTR_RO(max_work_queues_size);
+
+static ssize_t max_groups_show(struct device *dev,
+			       struct device_attribute *attr, char *buf)
+{
+	struct idxd_device *idxd =
+		container_of(dev, struct idxd_device, conf_dev);
+
+	return sprintf(buf, "%u\n", idxd->max_groups);
+}
+static DEVICE_ATTR_RO(max_groups);
+
+static ssize_t max_work_queues_show(struct device *dev,
+				    struct device_attribute *attr, char *buf)
+{
+	struct idxd_device *idxd =
+		container_of(dev, struct idxd_device, conf_dev);
+
+	return sprintf(buf, "%u\n", idxd->max_wqs);
+}
+static DEVICE_ATTR_RO(max_work_queues);
+
+static ssize_t max_engines_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct idxd_device *idxd =
+		container_of(dev, struct idxd_device, conf_dev);
+
+	return sprintf(buf, "%u\n", idxd->max_engines);
+}
+static DEVICE_ATTR_RO(max_engines);
+
+static ssize_t numa_node_show(struct device *dev,
+			      struct device_attribute *attr, char *buf)
+{
+	struct idxd_device *idxd =
+		container_of(dev, struct idxd_device, conf_dev);
+
+	return sprintf(buf, "%d\n", dev_to_node(&idxd->pdev->dev));
+}
+static DEVICE_ATTR_RO(numa_node);
+
+static ssize_t max_batch_size_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct idxd_device *idxd =
+		container_of(dev, struct idxd_device, conf_dev);
+
+	return sprintf(buf, "%u\n", idxd->max_batch_size);
+}
+static DEVICE_ATTR_RO(max_batch_size);
+
+static ssize_t max_transfer_size_show(struct device *dev,
+				      struct device_attribute *attr,
+				      char *buf)
+{
+	struct idxd_device *idxd =
+		container_of(dev, struct idxd_device, conf_dev);
+
+	return sprintf(buf, "%llu\n", idxd->max_xfer_bytes);
+}
+static DEVICE_ATTR_RO(max_transfer_size);
+
+static ssize_t op_cap_show(struct device *dev,
+			   struct device_attribute *attr, char *buf)
+{
+	struct idxd_device *idxd =
+		container_of(dev, struct idxd_device, conf_dev);
+
+	return sprintf(buf, "%#llx\n", idxd->hw.opcap.bits[0]);
+}
+static DEVICE_ATTR_RO(op_cap);
+
+static ssize_t configurable_show(struct device *dev,
+				 struct device_attribute *attr, char *buf)
+{
+	struct idxd_device *idxd =
+		container_of(dev, struct idxd_device, conf_dev);
+
+	return sprintf(buf, "%u\n",
+			test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags));
+}
+static DEVICE_ATTR_RO(configurable);
+
+static ssize_t clients_show(struct device *dev,
+			    struct device_attribute *attr, char *buf)
+{
+	struct idxd_device *idxd =
+		container_of(dev, struct idxd_device, conf_dev);
+	unsigned long flags;
+	int count = 0, i;
+
+	spin_lock_irqsave(&idxd->dev_lock, flags);
+	for (i = 0; i < idxd->max_wqs; i++) {
+		struct idxd_wq *wq = &idxd->wqs[i];
+
+		count += wq->client_count;
+	}
+	spin_unlock_irqrestore(&idxd->dev_lock, flags);
+
+	return sprintf(buf, "%d\n", count);
+}
+static DEVICE_ATTR_RO(clients);
+
+static ssize_t state_show(struct device *dev,
+			  struct device_attribute *attr, char *buf)
+{
+	struct idxd_device *idxd =
+		container_of(dev, struct idxd_device, conf_dev);
+
+	switch (idxd->state) {
+	case IDXD_DEV_DISABLED:
+	case IDXD_DEV_CONF_READY:
+		return sprintf(buf, "disabled\n");
+	case IDXD_DEV_ENABLED:
+		return sprintf(buf, "enabled\n");
+	case IDXD_DEV_HALTED:
+		return sprintf(buf, "halted\n");
+	}
+
+	return sprintf(buf, "unknown\n");
+}
+static DEVICE_ATTR_RO(state);
+
+static ssize_t errors_show(struct device *dev,
+			   struct device_attribute *attr, char *buf)
+{
+	struct idxd_device *idxd =
+		container_of(dev, struct idxd_device, conf_dev);
+	int i, out = 0;
+	unsigned long flags;
+
+	spin_lock_irqsave(&idxd->dev_lock, flags);
+	for (i = 0; i < 4; i++)
+		out += sprintf(buf + out, "%#018llx ", idxd->sw_err.bits[i]);
+	spin_unlock_irqrestore(&idxd->dev_lock, flags);
+	out--;
+	out += sprintf(buf + out, "\n");
+	return out;
+}
+static DEVICE_ATTR_RO(errors);
+
+static ssize_t max_tokens_show(struct device *dev,
+			       struct device_attribute *attr, char *buf)
+{
+	struct idxd_device *idxd =
+		container_of(dev, struct idxd_device, conf_dev);
+
+	return sprintf(buf, "%u\n", idxd->max_tokens);
+}
+static DEVICE_ATTR_RO(max_tokens);
+
+static ssize_t token_limit_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct idxd_device *idxd =
+		container_of(dev, struct idxd_device, conf_dev);
+
+	return sprintf(buf, "%u\n", idxd->token_limit);
+}
+
+static ssize_t token_limit_store(struct device *dev,
+				 struct device_attribute *attr,
+				 const char *buf, size_t count)
+{
+	struct idxd_device *idxd =
+		container_of(dev, struct idxd_device, conf_dev);
+	unsigned long val;
+	int rc;
+
+	rc = kstrtoul(buf, 10, &val);
+	if (rc < 0)
+		return -EINVAL;
+
+	if (idxd->state == IDXD_DEV_ENABLED)
+		return -EPERM;
+
+	if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags))
+		return -EPERM;
+
+	if (!idxd->hw.group_cap.token_limit)
+		return -EPERM;
+
+	if (val > idxd->hw.group_cap.total_tokens)
+		return -EINVAL;
+
+	idxd->token_limit = val;
+	return count;
+}
+static DEVICE_ATTR_RW(token_limit);
+
+static struct attribute *idxd_device_attributes[] = {
+	&dev_attr_max_groups.attr,
+	&dev_attr_max_work_queues.attr,
+	&dev_attr_max_work_queues_size.attr,
+	&dev_attr_max_engines.attr,
+	&dev_attr_numa_node.attr,
+	&dev_attr_max_batch_size.attr,
+	&dev_attr_max_transfer_size.attr,
+	&dev_attr_op_cap.attr,
+	&dev_attr_configurable.attr,
+	&dev_attr_clients.attr,
+	&dev_attr_state.attr,
+	&dev_attr_errors.attr,
+	&dev_attr_max_tokens.attr,
+	&dev_attr_token_limit.attr,
+	NULL,
+};
+
+static const struct attribute_group idxd_device_attribute_group = {
+	.attrs = idxd_device_attributes,
+};
+
+static const struct attribute_group *idxd_attribute_groups[] = {
+	&idxd_device_attribute_group,
+	NULL,
+};
+
+static int idxd_setup_engine_sysfs(struct idxd_device *idxd)
+{
+	struct device *dev = &idxd->pdev->dev;
+	int i, rc;
+
+	for (i = 0; i < idxd->max_engines; i++) {
+		struct idxd_engine *engine = &idxd->engines[i];
+
+		engine->conf_dev.parent = &idxd->conf_dev;
+		dev_set_name(&engine->conf_dev, "engine%d.%d",
+			     idxd->id, engine->id);
+		engine->conf_dev.bus = idxd_get_bus_type(idxd);
+		engine->conf_dev.groups = idxd_engine_attribute_groups;
+		engine->conf_dev.type = &idxd_engine_device_type;
+		dev_dbg(dev, "Engine device register: %s\n",
+			dev_name(&engine->conf_dev));
+		rc = device_register(&engine->conf_dev);
+		if (rc < 0) {
+			put_device(&engine->conf_dev);
+			goto cleanup;
+		}
+	}
+
+	return 0;
+
+cleanup:
+	while (i--) {
+		struct idxd_engine *engine = &idxd->engines[i];
+
+		device_unregister(&engine->conf_dev);
+	}
+	return rc;
+}
+
+static int idxd_setup_group_sysfs(struct idxd_device *idxd)
+{
+	struct device *dev = &idxd->pdev->dev;
+	int i, rc;
+
+	for (i = 0; i < idxd->max_groups; i++) {
+		struct idxd_group *group = &idxd->groups[i];
+
+		group->conf_dev.parent = &idxd->conf_dev;
+		dev_set_name(&group->conf_dev, "group%d.%d",
+			     idxd->id, group->id);
+		group->conf_dev.bus = idxd_get_bus_type(idxd);
+		group->conf_dev.groups = idxd_group_attribute_groups;
+		group->conf_dev.type = &idxd_group_device_type;
+		dev_dbg(dev, "Group device register: %s\n",
+			dev_name(&group->conf_dev));
+		rc = device_register(&group->conf_dev);
+		if (rc < 0) {
+			put_device(&group->conf_dev);
+			goto cleanup;
+		}
+	}
+
+	return 0;
+
+cleanup:
+	while (i--) {
+		struct idxd_group *group = &idxd->groups[i];
+
+		device_unregister(&group->conf_dev);
+	}
+	return rc;
+}
+
+static int idxd_setup_wq_sysfs(struct idxd_device *idxd)
+{
+	struct device *dev = &idxd->pdev->dev;
+	int i, rc;
+
+	for (i = 0; i < idxd->max_wqs; i++) {
+		struct idxd_wq *wq = &idxd->wqs[i];
+
+		wq->conf_dev.parent = &idxd->conf_dev;
+		dev_set_name(&wq->conf_dev, "wq%d.%d", idxd->id, wq->id);
+		wq->conf_dev.bus = idxd_get_bus_type(idxd);
+		wq->conf_dev.groups = idxd_wq_attribute_groups;
+		wq->conf_dev.type = &idxd_wq_device_type;
+		dev_dbg(dev, "WQ device register: %s\n",
+			dev_name(&wq->conf_dev));
+		rc = device_register(&wq->conf_dev);
+		if (rc < 0) {
+			put_device(&wq->conf_dev);
+			goto cleanup;
+		}
+	}
+
+	return 0;
+
+cleanup:
+	while (i--) {
+		struct idxd_wq *wq = &idxd->wqs[i];
+
+		device_unregister(&wq->conf_dev);
+	}
+	return rc;
+}
+
+static int idxd_setup_device_sysfs(struct idxd_device *idxd)
+{
+	struct device *dev = &idxd->pdev->dev;
+	int rc;
+	char devname[IDXD_NAME_SIZE];
+
+	sprintf(devname, "%s%d", idxd_get_dev_name(idxd), idxd->id);
+	idxd->conf_dev.parent = dev;
+	dev_set_name(&idxd->conf_dev, "%s", devname);
+	idxd->conf_dev.bus = idxd_get_bus_type(idxd);
+	idxd->conf_dev.groups = idxd_attribute_groups;
+	idxd->conf_dev.type = idxd_get_device_type(idxd);
+
+	dev_dbg(dev, "IDXD device register: %s\n", dev_name(&idxd->conf_dev));
+	rc = device_register(&idxd->conf_dev);
+	if (rc < 0) {
+		put_device(&idxd->conf_dev);
+		return rc;
+	}
+
+	return 0;
+}
+
+int idxd_setup_sysfs(struct idxd_device *idxd)
+{
+	struct device *dev = &idxd->pdev->dev;
+	int rc;
+
+	rc = idxd_setup_device_sysfs(idxd);
+	if (rc < 0) {
+		dev_dbg(dev, "Device sysfs registering failed: %d\n", rc);
+		return rc;
+	}
+
+	rc = idxd_setup_wq_sysfs(idxd);
+	if (rc < 0) {
+		/* unregister conf dev */
+		dev_dbg(dev, "Work Queue sysfs registering failed: %d\n", rc);
+		return rc;
+	}
+
+	rc = idxd_setup_group_sysfs(idxd);
+	if (rc < 0) {
+		/* unregister conf dev */
+		dev_dbg(dev, "Group sysfs registering failed: %d\n", rc);
+		return rc;
+	}
+
+	rc = idxd_setup_engine_sysfs(idxd);
+	if (rc < 0) {
+		/* unregister conf dev */
+		dev_dbg(dev, "Engine sysfs registering failed: %d\n", rc);
+		return rc;
+	}
+
+	return 0;
+}
+
+void idxd_cleanup_sysfs(struct idxd_device *idxd)
+{
+	int i;
+
+	for (i = 0; i < idxd->max_wqs; i++) {
+		struct idxd_wq *wq = &idxd->wqs[i];
+
+		device_unregister(&wq->conf_dev);
+	}
+
+	for (i = 0; i < idxd->max_engines; i++) {
+		struct idxd_engine *engine = &idxd->engines[i];
+
+		device_unregister(&engine->conf_dev);
+	}
+
+	for (i = 0; i < idxd->max_groups; i++) {
+		struct idxd_group *group = &idxd->groups[i];
+
+		device_unregister(&group->conf_dev);
+	}
+
+	device_unregister(&idxd->conf_dev);
+}
+
+int idxd_register_bus_type(void)
+{
+	int i, rc;
+
+	for (i = 0; i < IDXD_TYPE_MAX; i++) {
+		rc = bus_register(idxd_bus_types[i]);
+		if (rc < 0)
+			goto bus_err;
+	}
+
+	return 0;
+
+bus_err:
+	for (; i > 0; i--)
+		bus_unregister(idxd_bus_types[i]);
+	return rc;
+}
+
+void idxd_unregister_bus_type(void)
+{
+	int i;
+
+	for (i = 0; i < IDXD_TYPE_MAX; i++)
+		bus_unregister(idxd_bus_types[i]);
+}
-- 
cgit 


From d1dfe5b8ac644a0ffccfe7af22abed7c80b34702 Mon Sep 17 00:00:00 2001
From: Dave Jiang <dave.jiang@intel.com>
Date: Tue, 21 Jan 2020 16:44:17 -0700
Subject: dmaengine: idxd: add descriptor manipulation routines

This commit adds helper functions for DSA descriptor allocation,
submission, and free operations.

Signed-off-by: Dave Jiang <dave.jiang@intel.com>
Link: https://lore.kernel.org/r/157965025757.73301.12692876585357550065.stgit@djiang5-desk3.ch.intel.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/idxd/Makefile |  2 +-
 drivers/dma/idxd/idxd.h   | 10 ++++++
 drivers/dma/idxd/submit.c | 91 +++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 102 insertions(+), 1 deletion(-)
 create mode 100644 drivers/dma/idxd/submit.c

(limited to 'drivers/dma')

diff --git a/drivers/dma/idxd/Makefile b/drivers/dma/idxd/Makefile
index a552560a03dc..50eca12015e2 100644
--- a/drivers/dma/idxd/Makefile
+++ b/drivers/dma/idxd/Makefile
@@ -1,2 +1,2 @@
 obj-$(CONFIG_INTEL_IDXD) += idxd.o
-idxd-y := init.o irq.o device.o sysfs.o
+idxd-y := init.o irq.o device.o sysfs.o submit.o
diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h
index 909926aefd3e..d369b75468e3 100644
--- a/drivers/dma/idxd/idxd.h
+++ b/drivers/dma/idxd/idxd.h
@@ -68,6 +68,11 @@ enum idxd_wq_type {
 #define WQ_NAME_SIZE   1024
 #define WQ_TYPE_SIZE   10
 
+enum idxd_op_type {
+	IDXD_OP_BLOCK = 0,
+	IDXD_OP_NONBLOCK = 1,
+};
+
 struct idxd_wq {
 	void __iomem *dportal;
 	struct device conf_dev;
@@ -246,4 +251,9 @@ int idxd_wq_disable(struct idxd_wq *wq);
 int idxd_wq_map_portal(struct idxd_wq *wq);
 void idxd_wq_unmap_portal(struct idxd_wq *wq);
 
+/* submission */
+int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc);
+struct idxd_desc *idxd_alloc_desc(struct idxd_wq *wq, enum idxd_op_type optype);
+void idxd_free_desc(struct idxd_wq *wq, struct idxd_desc *desc);
+
 #endif
diff --git a/drivers/dma/idxd/submit.c b/drivers/dma/idxd/submit.c
new file mode 100644
index 000000000000..a405f06990e3
--- /dev/null
+++ b/drivers/dma/idxd/submit.c
@@ -0,0 +1,91 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2019 Intel Corporation. All rights rsvd. */
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <uapi/linux/idxd.h>
+#include "idxd.h"
+#include "registers.h"
+
+struct idxd_desc *idxd_alloc_desc(struct idxd_wq *wq, enum idxd_op_type optype)
+{
+	struct idxd_desc *desc;
+	int idx;
+	struct idxd_device *idxd = wq->idxd;
+
+	if (idxd->state != IDXD_DEV_ENABLED)
+		return ERR_PTR(-EIO);
+
+	if (optype == IDXD_OP_BLOCK)
+		percpu_down_read(&wq->submit_lock);
+	else if (!percpu_down_read_trylock(&wq->submit_lock))
+		return ERR_PTR(-EBUSY);
+
+	if (!atomic_add_unless(&wq->dq_count, 1, wq->size)) {
+		int rc;
+
+		if (optype == IDXD_OP_NONBLOCK) {
+			percpu_up_read(&wq->submit_lock);
+			return ERR_PTR(-EAGAIN);
+		}
+
+		percpu_up_read(&wq->submit_lock);
+		percpu_down_write(&wq->submit_lock);
+		rc = wait_event_interruptible(wq->submit_waitq,
+					      atomic_add_unless(&wq->dq_count,
+								1, wq->size) ||
+					       idxd->state != IDXD_DEV_ENABLED);
+		percpu_up_write(&wq->submit_lock);
+		if (rc < 0)
+			return ERR_PTR(-EINTR);
+		if (idxd->state != IDXD_DEV_ENABLED)
+			return ERR_PTR(-EIO);
+	} else {
+		percpu_up_read(&wq->submit_lock);
+	}
+
+	idx = sbitmap_get(&wq->sbmap, 0, false);
+	if (idx < 0) {
+		atomic_dec(&wq->dq_count);
+		return ERR_PTR(-EAGAIN);
+	}
+
+	desc = wq->descs[idx];
+	memset(desc->hw, 0, sizeof(struct dsa_hw_desc));
+	memset(desc->completion, 0, sizeof(struct dsa_completion_record));
+	return desc;
+}
+
+void idxd_free_desc(struct idxd_wq *wq, struct idxd_desc *desc)
+{
+	atomic_dec(&wq->dq_count);
+
+	sbitmap_clear_bit(&wq->sbmap, desc->id);
+	wake_up(&wq->submit_waitq);
+}
+
+int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc)
+{
+	struct idxd_device *idxd = wq->idxd;
+	int vec = desc->hw->int_handle;
+
+	if (idxd->state != IDXD_DEV_ENABLED)
+		return -EIO;
+
+	/*
+	 * The wmb() flushes writes to coherent DMA data before possibly
+	 * triggering a DMA read. The wmb() is necessary even on UP because
+	 * the recipient is a device.
+	 */
+	wmb();
+	iosubmit_cmds512(wq->dportal, desc->hw, 1);
+
+	/*
+	 * Pending the descriptor to the lockless list for the irq_entry
+	 * that we designated the descriptor to.
+	 */
+	llist_add(&desc->llnode, &idxd->irq_entries[vec].pending_llist);
+
+	return 0;
+}
-- 
cgit 


From 8f47d1a5e545f903cd049c42da31a3be36178447 Mon Sep 17 00:00:00 2001
From: Dave Jiang <dave.jiang@intel.com>
Date: Tue, 21 Jan 2020 16:44:23 -0700
Subject: dmaengine: idxd: connect idxd to dmaengine subsystem

Add plumbing for dmaengine subsystem connection. The driver register a DMA
device per DSA device. The channels are dynamically registered when a
workqueue is configured to be "kernel:dmanegine" type.

Signed-off-by: Dave Jiang <dave.jiang@intel.com>
Link: https://lore.kernel.org/r/157965026376.73301.13867988830650740445.stgit@djiang5-desk3.ch.intel.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/idxd/Makefile |   2 +-
 drivers/dma/idxd/device.c |   5 ++
 drivers/dma/idxd/dma.c    | 217 ++++++++++++++++++++++++++++++++++++++++++++++
 drivers/dma/idxd/idxd.h   |  20 +++++
 drivers/dma/idxd/init.c   |  30 +++++++
 drivers/dma/idxd/irq.c    |  87 +++++++++++++++++++
 drivers/dma/idxd/submit.c |   4 +-
 drivers/dma/idxd/sysfs.c  |  28 ++++++
 8 files changed, 391 insertions(+), 2 deletions(-)
 create mode 100644 drivers/dma/idxd/dma.c

(limited to 'drivers/dma')

diff --git a/drivers/dma/idxd/Makefile b/drivers/dma/idxd/Makefile
index 50eca12015e2..a036ba0e77d2 100644
--- a/drivers/dma/idxd/Makefile
+++ b/drivers/dma/idxd/Makefile
@@ -1,2 +1,2 @@
 obj-$(CONFIG_INTEL_IDXD) += idxd.o
-idxd-y := init.o irq.o device.o sysfs.o submit.o
+idxd-y := init.o irq.o device.o sysfs.o submit.o dma.o
diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c
index d626780caa53..b4c4cec489df 100644
--- a/drivers/dma/idxd/device.c
+++ b/drivers/dma/idxd/device.c
@@ -5,7 +5,9 @@
 #include <linux/module.h>
 #include <linux/pci.h>
 #include <linux/io-64-nonatomic-lo-hi.h>
+#include <linux/dmaengine.h>
 #include <uapi/linux/idxd.h>
+#include "../dmaengine.h"
 #include "idxd.h"
 #include "registers.h"
 
@@ -192,6 +194,9 @@ int idxd_wq_alloc_resources(struct idxd_wq *wq)
 			sizeof(struct dsa_completion_record) * i;
 		desc->id = i;
 		desc->wq = wq;
+
+		dma_async_tx_descriptor_init(&desc->txd, &wq->dma_chan);
+		desc->txd.tx_submit = idxd_dma_tx_submit;
 	}
 
 	return 0;
diff --git a/drivers/dma/idxd/dma.c b/drivers/dma/idxd/dma.c
new file mode 100644
index 000000000000..c64c1429d160
--- /dev/null
+++ b/drivers/dma/idxd/dma.c
@@ -0,0 +1,217 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2019 Intel Corporation. All rights rsvd. */
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/device.h>
+#include <linux/io-64-nonatomic-lo-hi.h>
+#include <linux/dmaengine.h>
+#include <uapi/linux/idxd.h>
+#include "../dmaengine.h"
+#include "registers.h"
+#include "idxd.h"
+
+static inline struct idxd_wq *to_idxd_wq(struct dma_chan *c)
+{
+	return container_of(c, struct idxd_wq, dma_chan);
+}
+
+void idxd_dma_complete_txd(struct idxd_desc *desc,
+			   enum idxd_complete_type comp_type)
+{
+	struct dma_async_tx_descriptor *tx;
+	struct dmaengine_result res;
+	int complete = 1;
+
+	if (desc->completion->status == DSA_COMP_SUCCESS)
+		res.result = DMA_TRANS_NOERROR;
+	else if (desc->completion->status)
+		res.result = DMA_TRANS_WRITE_FAILED;
+	else if (comp_type == IDXD_COMPLETE_ABORT)
+		res.result = DMA_TRANS_ABORTED;
+	else
+		complete = 0;
+
+	tx = &desc->txd;
+	if (complete && tx->cookie) {
+		dma_cookie_complete(tx);
+		dma_descriptor_unmap(tx);
+		dmaengine_desc_get_callback_invoke(tx, &res);
+		tx->callback = NULL;
+		tx->callback_result = NULL;
+	}
+}
+
+static void op_flag_setup(unsigned long flags, u32 *desc_flags)
+{
+	*desc_flags = IDXD_OP_FLAG_CRAV | IDXD_OP_FLAG_RCR;
+	if (flags & DMA_PREP_INTERRUPT)
+		*desc_flags |= IDXD_OP_FLAG_RCI;
+}
+
+static inline void set_completion_address(struct idxd_desc *desc,
+					  u64 *compl_addr)
+{
+		*compl_addr = desc->compl_dma;
+}
+
+static inline void idxd_prep_desc_common(struct idxd_wq *wq,
+					 struct dsa_hw_desc *hw, char opcode,
+					 u64 addr_f1, u64 addr_f2, u64 len,
+					 u64 compl, u32 flags)
+{
+	struct idxd_device *idxd = wq->idxd;
+
+	hw->flags = flags;
+	hw->opcode = opcode;
+	hw->src_addr = addr_f1;
+	hw->dst_addr = addr_f2;
+	hw->xfer_size = len;
+	hw->priv = !!(wq->type == IDXD_WQT_KERNEL);
+	hw->completion_addr = compl;
+
+	/*
+	 * Descriptor completion vectors are 1-8 for MSIX. We will round
+	 * robin through the 8 vectors.
+	 */
+	wq->vec_ptr = (wq->vec_ptr % idxd->num_wq_irqs) + 1;
+	hw->int_handle =  wq->vec_ptr;
+}
+
+static struct dma_async_tx_descriptor *
+idxd_dma_submit_memcpy(struct dma_chan *c, dma_addr_t dma_dest,
+		       dma_addr_t dma_src, size_t len, unsigned long flags)
+{
+	struct idxd_wq *wq = to_idxd_wq(c);
+	u32 desc_flags;
+	struct idxd_device *idxd = wq->idxd;
+	struct idxd_desc *desc;
+
+	if (wq->state != IDXD_WQ_ENABLED)
+		return NULL;
+
+	if (len > idxd->max_xfer_bytes)
+		return NULL;
+
+	op_flag_setup(flags, &desc_flags);
+	desc = idxd_alloc_desc(wq, IDXD_OP_BLOCK);
+	if (IS_ERR(desc))
+		return NULL;
+
+	idxd_prep_desc_common(wq, desc->hw, DSA_OPCODE_MEMMOVE,
+			      dma_src, dma_dest, len, desc->compl_dma,
+			      desc_flags);
+
+	desc->txd.flags = flags;
+
+	return &desc->txd;
+}
+
+static int idxd_dma_alloc_chan_resources(struct dma_chan *chan)
+{
+	struct idxd_wq *wq = to_idxd_wq(chan);
+	struct device *dev = &wq->idxd->pdev->dev;
+
+	idxd_wq_get(wq);
+	dev_dbg(dev, "%s: client_count: %d\n", __func__,
+		idxd_wq_refcount(wq));
+	return 0;
+}
+
+static void idxd_dma_free_chan_resources(struct dma_chan *chan)
+{
+	struct idxd_wq *wq = to_idxd_wq(chan);
+	struct device *dev = &wq->idxd->pdev->dev;
+
+	idxd_wq_put(wq);
+	dev_dbg(dev, "%s: client_count: %d\n", __func__,
+		idxd_wq_refcount(wq));
+}
+
+static enum dma_status idxd_dma_tx_status(struct dma_chan *dma_chan,
+					  dma_cookie_t cookie,
+					  struct dma_tx_state *txstate)
+{
+	return dma_cookie_status(dma_chan, cookie, txstate);
+}
+
+/*
+ * issue_pending() does not need to do anything since tx_submit() does the job
+ * already.
+ */
+static void idxd_dma_issue_pending(struct dma_chan *dma_chan)
+{
+}
+
+dma_cookie_t idxd_dma_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+	struct dma_chan *c = tx->chan;
+	struct idxd_wq *wq = to_idxd_wq(c);
+	dma_cookie_t cookie;
+	int rc;
+	struct idxd_desc *desc = container_of(tx, struct idxd_desc, txd);
+
+	cookie = dma_cookie_assign(tx);
+
+	rc = idxd_submit_desc(wq, desc);
+	if (rc < 0) {
+		idxd_free_desc(wq, desc);
+		return rc;
+	}
+
+	return cookie;
+}
+
+static void idxd_dma_release(struct dma_device *device)
+{
+}
+
+int idxd_register_dma_device(struct idxd_device *idxd)
+{
+	struct dma_device *dma = &idxd->dma_dev;
+
+	INIT_LIST_HEAD(&dma->channels);
+	dma->dev = &idxd->pdev->dev;
+
+	dma->device_release = idxd_dma_release;
+
+	if (idxd->hw.opcap.bits[0] & IDXD_OPCAP_MEMMOVE) {
+		dma_cap_set(DMA_MEMCPY, dma->cap_mask);
+		dma->device_prep_dma_memcpy = idxd_dma_submit_memcpy;
+	}
+
+	dma->device_tx_status = idxd_dma_tx_status;
+	dma->device_issue_pending = idxd_dma_issue_pending;
+	dma->device_alloc_chan_resources = idxd_dma_alloc_chan_resources;
+	dma->device_free_chan_resources = idxd_dma_free_chan_resources;
+
+	return dma_async_device_register(&idxd->dma_dev);
+}
+
+void idxd_unregister_dma_device(struct idxd_device *idxd)
+{
+	dma_async_device_unregister(&idxd->dma_dev);
+}
+
+int idxd_register_dma_channel(struct idxd_wq *wq)
+{
+	struct idxd_device *idxd = wq->idxd;
+	struct dma_device *dma = &idxd->dma_dev;
+	struct dma_chan *chan = &wq->dma_chan;
+	int rc;
+
+	memset(&wq->dma_chan, 0, sizeof(struct dma_chan));
+	chan->device = dma;
+	list_add_tail(&chan->device_node, &dma->channels);
+	rc = dma_async_device_channel_register(dma, chan);
+	if (rc < 0)
+		return rc;
+
+	return 0;
+}
+
+void idxd_unregister_dma_channel(struct idxd_wq *wq)
+{
+	dma_async_device_channel_unregister(&wq->idxd->dma_dev, &wq->dma_chan);
+}
diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h
index d369b75468e3..a36214818d1e 100644
--- a/drivers/dma/idxd/idxd.h
+++ b/drivers/dma/idxd/idxd.h
@@ -4,6 +4,7 @@
 #define _IDXD_H_
 
 #include <linux/sbitmap.h>
+#include <linux/dmaengine.h>
 #include <linux/percpu-rwsem.h>
 #include <linux/wait.h>
 #include "registers.h"
@@ -73,6 +74,11 @@ enum idxd_op_type {
 	IDXD_OP_NONBLOCK = 1,
 };
 
+enum idxd_complete_type {
+	IDXD_COMPLETE_NORMAL = 0,
+	IDXD_COMPLETE_ABORT,
+};
+
 struct idxd_wq {
 	void __iomem *dportal;
 	struct device conf_dev;
@@ -97,6 +103,7 @@ struct idxd_wq {
 	int compls_size;
 	struct idxd_desc **descs;
 	struct sbitmap sbmap;
+	struct dma_chan dma_chan;
 	struct percpu_rw_semaphore submit_lock;
 	wait_queue_head_t submit_waitq;
 	char name[WQ_NAME_SIZE + 1];
@@ -169,6 +176,8 @@ struct idxd_device {
 	struct msix_entry *msix_entries;
 	int num_wq_irqs;
 	struct idxd_irq_entry *irq_entries;
+
+	struct dma_device dma_dev;
 };
 
 /* IDXD software descriptor */
@@ -177,6 +186,7 @@ struct idxd_desc {
 	dma_addr_t desc_dma;
 	struct dsa_completion_record *completion;
 	dma_addr_t compl_dma;
+	struct dma_async_tx_descriptor txd;
 	struct llist_node llnode;
 	struct list_head list;
 	int id;
@@ -256,4 +266,14 @@ int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc);
 struct idxd_desc *idxd_alloc_desc(struct idxd_wq *wq, enum idxd_op_type optype);
 void idxd_free_desc(struct idxd_wq *wq, struct idxd_desc *desc);
 
+/* dmaengine */
+int idxd_register_dma_device(struct idxd_device *idxd);
+void idxd_unregister_dma_device(struct idxd_device *idxd);
+int idxd_register_dma_channel(struct idxd_wq *wq);
+void idxd_unregister_dma_channel(struct idxd_wq *wq);
+void idxd_parse_completion_status(u8 status, enum dmaengine_tx_result *res);
+void idxd_dma_complete_txd(struct idxd_desc *desc,
+			   enum idxd_complete_type comp_type);
+dma_cookie_t idxd_dma_tx_submit(struct dma_async_tx_descriptor *tx);
+
 #endif
diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c
index 229386464923..cf6e1d89dd02 100644
--- a/drivers/dma/idxd/init.c
+++ b/drivers/dma/idxd/init.c
@@ -15,6 +15,8 @@
 #include <linux/device.h>
 #include <linux/idr.h>
 #include <uapi/linux/idxd.h>
+#include <linux/dmaengine.h>
+#include "../dmaengine.h"
 #include "registers.h"
 #include "idxd.h"
 
@@ -396,6 +398,32 @@ static int idxd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	return 0;
 }
 
+static void idxd_flush_pending_llist(struct idxd_irq_entry *ie)
+{
+	struct idxd_desc *desc, *itr;
+	struct llist_node *head;
+
+	head = llist_del_all(&ie->pending_llist);
+	if (!head)
+		return;
+
+	llist_for_each_entry_safe(desc, itr, head, llnode) {
+		idxd_dma_complete_txd(desc, IDXD_COMPLETE_ABORT);
+		idxd_free_desc(desc->wq, desc);
+	}
+}
+
+static void idxd_flush_work_list(struct idxd_irq_entry *ie)
+{
+	struct idxd_desc *desc, *iter;
+
+	list_for_each_entry_safe(desc, iter, &ie->work_list, list) {
+		list_del(&desc->list);
+		idxd_dma_complete_txd(desc, IDXD_COMPLETE_ABORT);
+		idxd_free_desc(desc->wq, desc);
+	}
+}
+
 static void idxd_shutdown(struct pci_dev *pdev)
 {
 	struct idxd_device *idxd = pci_get_drvdata(pdev);
@@ -419,6 +447,8 @@ static void idxd_shutdown(struct pci_dev *pdev)
 		synchronize_irq(idxd->msix_entries[i].vector);
 		if (i == 0)
 			continue;
+		idxd_flush_pending_llist(irq_entry);
+		idxd_flush_work_list(irq_entry);
 	}
 }
 
diff --git a/drivers/dma/idxd/irq.c b/drivers/dma/idxd/irq.c
index de4b80973c2f..770d408470db 100644
--- a/drivers/dma/idxd/irq.c
+++ b/drivers/dma/idxd/irq.c
@@ -5,7 +5,9 @@
 #include <linux/module.h>
 #include <linux/pci.h>
 #include <linux/io-64-nonatomic-lo-hi.h>
+#include <linux/dmaengine.h>
 #include <uapi/linux/idxd.h>
+#include "../dmaengine.h"
 #include "idxd.h"
 #include "registers.h"
 
@@ -146,11 +148,96 @@ irqreturn_t idxd_misc_thread(int vec, void *data)
 	return IRQ_HANDLED;
 }
 
+static int irq_process_pending_llist(struct idxd_irq_entry *irq_entry,
+				     int *processed)
+{
+	struct idxd_desc *desc, *t;
+	struct llist_node *head;
+	int queued = 0;
+
+	head = llist_del_all(&irq_entry->pending_llist);
+	if (!head)
+		return 0;
+
+	llist_for_each_entry_safe(desc, t, head, llnode) {
+		if (desc->completion->status) {
+			idxd_dma_complete_txd(desc, IDXD_COMPLETE_NORMAL);
+			idxd_free_desc(desc->wq, desc);
+			(*processed)++;
+		} else {
+			list_add_tail(&desc->list, &irq_entry->work_list);
+			queued++;
+		}
+	}
+
+	return queued;
+}
+
+static int irq_process_work_list(struct idxd_irq_entry *irq_entry,
+				 int *processed)
+{
+	struct list_head *node, *next;
+	int queued = 0;
+
+	if (list_empty(&irq_entry->work_list))
+		return 0;
+
+	list_for_each_safe(node, next, &irq_entry->work_list) {
+		struct idxd_desc *desc =
+			container_of(node, struct idxd_desc, list);
+
+		if (desc->completion->status) {
+			list_del(&desc->list);
+			/* process and callback */
+			idxd_dma_complete_txd(desc, IDXD_COMPLETE_NORMAL);
+			idxd_free_desc(desc->wq, desc);
+			(*processed)++;
+		} else {
+			queued++;
+		}
+	}
+
+	return queued;
+}
+
 irqreturn_t idxd_wq_thread(int irq, void *data)
 {
 	struct idxd_irq_entry *irq_entry = data;
+	int rc, processed = 0, retry = 0;
+
+	/*
+	 * There are two lists we are processing. The pending_llist is where
+	 * submmiter adds all the submitted descriptor after sending it to
+	 * the workqueue. It's a lockless singly linked list. The work_list
+	 * is the common linux double linked list. We are in a scenario of
+	 * multiple producers and a single consumer. The producers are all
+	 * the kernel submitters of descriptors, and the consumer is the
+	 * kernel irq handler thread for the msix vector when using threaded
+	 * irq. To work with the restrictions of llist to remain lockless,
+	 * we are doing the following steps:
+	 * 1. Iterate through the work_list and process any completed
+	 *    descriptor. Delete the completed entries during iteration.
+	 * 2. llist_del_all() from the pending list.
+	 * 3. Iterate through the llist that was deleted from the pending list
+	 *    and process the completed entries.
+	 * 4. If the entry is still waiting on hardware, list_add_tail() to
+	 *    the work_list.
+	 * 5. Repeat until no more descriptors.
+	 */
+	do {
+		rc = irq_process_work_list(irq_entry, &processed);
+		if (rc != 0) {
+			retry++;
+			continue;
+		}
+
+		rc = irq_process_pending_llist(irq_entry, &processed);
+	} while (rc != 0 && retry != 10);
 
 	idxd_unmask_msix_vector(irq_entry->idxd, irq_entry->id);
 
+	if (processed == 0)
+		return IRQ_NONE;
+
 	return IRQ_HANDLED;
 }
diff --git a/drivers/dma/idxd/submit.c b/drivers/dma/idxd/submit.c
index a405f06990e3..e16cab37dda8 100644
--- a/drivers/dma/idxd/submit.c
+++ b/drivers/dma/idxd/submit.c
@@ -85,7 +85,9 @@ int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc)
 	 * Pending the descriptor to the lockless list for the irq_entry
 	 * that we designated the descriptor to.
 	 */
-	llist_add(&desc->llnode, &idxd->irq_entries[vec].pending_llist);
+	if (desc->hw->flags & IDXD_OP_FLAG_RCI)
+		llist_add(&desc->llnode,
+			  &idxd->irq_entries[vec].pending_llist);
 
 	return 0;
 }
diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c
index b6a0a59b500f..f5e3f962ee6a 100644
--- a/drivers/dma/idxd/sysfs.c
+++ b/drivers/dma/idxd/sysfs.c
@@ -55,6 +55,14 @@ static inline bool is_idxd_wq_dev(struct device *dev)
 	return dev ? dev->type == &idxd_wq_device_type : false;
 }
 
+static inline bool is_idxd_wq_dmaengine(struct idxd_wq *wq)
+{
+	if (wq->type == IDXD_WQT_KERNEL &&
+	    strcmp(wq->name, "dmaengine") == 0)
+		return true;
+	return false;
+}
+
 static int idxd_config_bus_match(struct device *dev,
 				 struct device_driver *drv)
 {
@@ -122,6 +130,12 @@ static int idxd_config_bus_probe(struct device *dev)
 		spin_unlock_irqrestore(&idxd->dev_lock, flags);
 		dev_info(dev, "Device %s enabled\n", dev_name(dev));
 
+		rc = idxd_register_dma_device(idxd);
+		if (rc < 0) {
+			spin_unlock_irqrestore(&idxd->dev_lock, flags);
+			dev_dbg(dev, "Failed to register dmaengine device\n");
+			return rc;
+		}
 		return 0;
 	} else if (is_idxd_wq_dev(dev)) {
 		struct idxd_wq *wq = confdev_to_wq(dev);
@@ -194,6 +208,16 @@ static int idxd_config_bus_probe(struct device *dev)
 		wq->client_count = 0;
 
 		dev_info(dev, "wq %s enabled\n", dev_name(&wq->conf_dev));
+
+		if (is_idxd_wq_dmaengine(wq)) {
+			rc = idxd_register_dma_channel(wq);
+			if (rc < 0) {
+				dev_dbg(dev, "DMA channel register failed\n");
+				mutex_unlock(&wq->wq_lock);
+				return rc;
+			}
+		}
+
 		mutex_unlock(&wq->wq_lock);
 		return 0;
 	}
@@ -215,6 +239,9 @@ static void disable_wq(struct idxd_wq *wq)
 		return;
 	}
 
+	if (is_idxd_wq_dmaengine(wq))
+		idxd_unregister_dma_channel(wq);
+
 	if (idxd_wq_refcount(wq))
 		dev_warn(dev, "Clients has claim on wq %d: %d\n",
 			 wq->id, idxd_wq_refcount(wq));
@@ -264,6 +291,7 @@ static int idxd_config_bus_remove(struct device *dev)
 			device_release_driver(&wq->conf_dev);
 		}
 
+		idxd_unregister_dma_device(idxd);
 		spin_lock_irqsave(&idxd->dev_lock, flags);
 		rc = idxd_device_disable(idxd);
 		spin_unlock_irqrestore(&idxd->dev_lock, flags);
-- 
cgit 


From 42d279f9137ab7d5503836baec2739284b278d8f Mon Sep 17 00:00:00 2001
From: Dave Jiang <dave.jiang@intel.com>
Date: Tue, 21 Jan 2020 16:44:29 -0700
Subject: dmaengine: idxd: add char driver to expose submission portal to
 userland

Create a char device region that will allow acquisition of user portals in
order to allow applications to submit DMA operations. A char device will be
created per work queue that gets exposed. The workqueue type "user"
is used to mark a work queue for user char device. For example if the
workqueue 0 of DSA device 0 is marked for char device, then a device node
of /dev/dsa/wq0.0 will be created.

Signed-off-by: Dave Jiang <dave.jiang@intel.com>
Link: https://lore.kernel.org/r/157965026985.73301.976523230037106742.stgit@djiang5-desk3.ch.intel.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/idxd/Makefile |   2 +-
 drivers/dma/idxd/cdev.c   | 302 ++++++++++++++++++++++++++++++++++++++++++++++
 drivers/dma/idxd/device.c |   2 +-
 drivers/dma/idxd/idxd.h   |  37 ++++++
 drivers/dma/idxd/init.c   |  10 ++
 drivers/dma/idxd/irq.c    |  18 +++
 drivers/dma/idxd/submit.c |   4 +-
 drivers/dma/idxd/sysfs.c  |  52 +++++++-
 8 files changed, 422 insertions(+), 5 deletions(-)
 create mode 100644 drivers/dma/idxd/cdev.c

(limited to 'drivers/dma')

diff --git a/drivers/dma/idxd/Makefile b/drivers/dma/idxd/Makefile
index a036ba0e77d2..8978b898d777 100644
--- a/drivers/dma/idxd/Makefile
+++ b/drivers/dma/idxd/Makefile
@@ -1,2 +1,2 @@
 obj-$(CONFIG_INTEL_IDXD) += idxd.o
-idxd-y := init.o irq.o device.o sysfs.o submit.o dma.o
+idxd-y := init.o irq.o device.o sysfs.o submit.o dma.o cdev.o
diff --git a/drivers/dma/idxd/cdev.c b/drivers/dma/idxd/cdev.c
new file mode 100644
index 000000000000..1d7347825b95
--- /dev/null
+++ b/drivers/dma/idxd/cdev.c
@@ -0,0 +1,302 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2019 Intel Corporation. All rights rsvd. */
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/device.h>
+#include <linux/sched/task.h>
+#include <linux/intel-svm.h>
+#include <linux/io-64-nonatomic-lo-hi.h>
+#include <linux/cdev.h>
+#include <linux/fs.h>
+#include <linux/poll.h>
+#include <uapi/linux/idxd.h>
+#include "registers.h"
+#include "idxd.h"
+
+struct idxd_cdev_context {
+	const char *name;
+	dev_t devt;
+	struct ida minor_ida;
+};
+
+/*
+ * ictx is an array based off of accelerator types. enum idxd_type
+ * is used as index
+ */
+static struct idxd_cdev_context ictx[IDXD_TYPE_MAX] = {
+	{ .name = "dsa" },
+};
+
+struct idxd_user_context {
+	struct idxd_wq *wq;
+	struct task_struct *task;
+	unsigned int flags;
+};
+
+enum idxd_cdev_cleanup {
+	CDEV_NORMAL = 0,
+	CDEV_FAILED,
+};
+
+static void idxd_cdev_dev_release(struct device *dev)
+{
+	dev_dbg(dev, "releasing cdev device\n");
+	kfree(dev);
+}
+
+static struct device_type idxd_cdev_device_type = {
+	.name = "idxd_cdev",
+	.release = idxd_cdev_dev_release,
+};
+
+static inline struct idxd_cdev *inode_idxd_cdev(struct inode *inode)
+{
+	struct cdev *cdev = inode->i_cdev;
+
+	return container_of(cdev, struct idxd_cdev, cdev);
+}
+
+static inline struct idxd_wq *idxd_cdev_wq(struct idxd_cdev *idxd_cdev)
+{
+	return container_of(idxd_cdev, struct idxd_wq, idxd_cdev);
+}
+
+static inline struct idxd_wq *inode_wq(struct inode *inode)
+{
+	return idxd_cdev_wq(inode_idxd_cdev(inode));
+}
+
+static int idxd_cdev_open(struct inode *inode, struct file *filp)
+{
+	struct idxd_user_context *ctx;
+	struct idxd_device *idxd;
+	struct idxd_wq *wq;
+	struct device *dev;
+	struct idxd_cdev *idxd_cdev;
+
+	wq = inode_wq(inode);
+	idxd = wq->idxd;
+	dev = &idxd->pdev->dev;
+	idxd_cdev = &wq->idxd_cdev;
+
+	dev_dbg(dev, "%s called\n", __func__);
+
+	if (idxd_wq_refcount(wq) > 1 && wq_dedicated(wq))
+		return -EBUSY;
+
+	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+	if (!ctx)
+		return -ENOMEM;
+
+	ctx->wq = wq;
+	filp->private_data = ctx;
+	idxd_wq_get(wq);
+	return 0;
+}
+
+static int idxd_cdev_release(struct inode *node, struct file *filep)
+{
+	struct idxd_user_context *ctx = filep->private_data;
+	struct idxd_wq *wq = ctx->wq;
+	struct idxd_device *idxd = wq->idxd;
+	struct device *dev = &idxd->pdev->dev;
+
+	dev_dbg(dev, "%s called\n", __func__);
+	filep->private_data = NULL;
+
+	kfree(ctx);
+	idxd_wq_put(wq);
+	return 0;
+}
+
+static int check_vma(struct idxd_wq *wq, struct vm_area_struct *vma,
+		     const char *func)
+{
+	struct device *dev = &wq->idxd->pdev->dev;
+
+	if ((vma->vm_end - vma->vm_start) > PAGE_SIZE) {
+		dev_info_ratelimited(dev,
+				     "%s: %s: mapping too large: %lu\n",
+				     current->comm, func,
+				     vma->vm_end - vma->vm_start);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int idxd_cdev_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+	struct idxd_user_context *ctx = filp->private_data;
+	struct idxd_wq *wq = ctx->wq;
+	struct idxd_device *idxd = wq->idxd;
+	struct pci_dev *pdev = idxd->pdev;
+	phys_addr_t base = pci_resource_start(pdev, IDXD_WQ_BAR);
+	unsigned long pfn;
+	int rc;
+
+	dev_dbg(&pdev->dev, "%s called\n", __func__);
+	rc = check_vma(wq, vma, __func__);
+
+	vma->vm_flags |= VM_DONTCOPY;
+	pfn = (base + idxd_get_wq_portal_full_offset(wq->id,
+				IDXD_PORTAL_LIMITED)) >> PAGE_SHIFT;
+	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+	vma->vm_private_data = ctx;
+
+	return io_remap_pfn_range(vma, vma->vm_start, pfn, PAGE_SIZE,
+			vma->vm_page_prot);
+}
+
+static __poll_t idxd_cdev_poll(struct file *filp,
+			       struct poll_table_struct *wait)
+{
+	struct idxd_user_context *ctx = filp->private_data;
+	struct idxd_wq *wq = ctx->wq;
+	struct idxd_device *idxd = wq->idxd;
+	struct idxd_cdev *idxd_cdev = &wq->idxd_cdev;
+	unsigned long flags;
+	__poll_t out = 0;
+
+	poll_wait(filp, &idxd_cdev->err_queue, wait);
+	spin_lock_irqsave(&idxd->dev_lock, flags);
+	if (idxd->sw_err.valid)
+		out = EPOLLIN | EPOLLRDNORM;
+	spin_unlock_irqrestore(&idxd->dev_lock, flags);
+
+	return out;
+}
+
+static const struct file_operations idxd_cdev_fops = {
+	.owner = THIS_MODULE,
+	.open = idxd_cdev_open,
+	.release = idxd_cdev_release,
+	.mmap = idxd_cdev_mmap,
+	.poll = idxd_cdev_poll,
+};
+
+int idxd_cdev_get_major(struct idxd_device *idxd)
+{
+	return MAJOR(ictx[idxd->type].devt);
+}
+
+static int idxd_wq_cdev_dev_setup(struct idxd_wq *wq)
+{
+	struct idxd_device *idxd = wq->idxd;
+	struct idxd_cdev *idxd_cdev = &wq->idxd_cdev;
+	struct idxd_cdev_context *cdev_ctx;
+	struct device *dev;
+	int minor, rc;
+
+	idxd_cdev->dev = kzalloc(sizeof(*idxd_cdev->dev), GFP_KERNEL);
+	if (!idxd_cdev->dev)
+		return -ENOMEM;
+
+	dev = idxd_cdev->dev;
+	dev->parent = &idxd->pdev->dev;
+	dev_set_name(dev, "%s/wq%u.%u", idxd_get_dev_name(idxd),
+		     idxd->id, wq->id);
+	dev->bus = idxd_get_bus_type(idxd);
+
+	cdev_ctx = &ictx[wq->idxd->type];
+	minor = ida_simple_get(&cdev_ctx->minor_ida, 0, MINORMASK, GFP_KERNEL);
+	if (minor < 0) {
+		rc = minor;
+		goto ida_err;
+	}
+
+	dev->devt = MKDEV(MAJOR(cdev_ctx->devt), minor);
+	dev->type = &idxd_cdev_device_type;
+	rc = device_register(dev);
+	if (rc < 0) {
+		dev_err(&idxd->pdev->dev, "device register failed\n");
+		put_device(dev);
+		goto dev_reg_err;
+	}
+	idxd_cdev->minor = minor;
+
+	return 0;
+
+ dev_reg_err:
+	ida_simple_remove(&cdev_ctx->minor_ida, MINOR(dev->devt));
+ ida_err:
+	kfree(dev);
+	idxd_cdev->dev = NULL;
+	return rc;
+}
+
+static void idxd_wq_cdev_cleanup(struct idxd_wq *wq,
+				 enum idxd_cdev_cleanup cdev_state)
+{
+	struct idxd_cdev *idxd_cdev = &wq->idxd_cdev;
+	struct idxd_cdev_context *cdev_ctx;
+
+	cdev_ctx = &ictx[wq->idxd->type];
+	if (cdev_state == CDEV_NORMAL)
+		cdev_del(&idxd_cdev->cdev);
+	device_unregister(idxd_cdev->dev);
+	/*
+	 * The device_type->release() will be called on the device and free
+	 * the allocated struct device. We can just forget it.
+	 */
+	ida_simple_remove(&cdev_ctx->minor_ida, idxd_cdev->minor);
+	idxd_cdev->dev = NULL;
+	idxd_cdev->minor = -1;
+}
+
+int idxd_wq_add_cdev(struct idxd_wq *wq)
+{
+	struct idxd_cdev *idxd_cdev = &wq->idxd_cdev;
+	struct cdev *cdev = &idxd_cdev->cdev;
+	struct device *dev;
+	int rc;
+
+	rc = idxd_wq_cdev_dev_setup(wq);
+	if (rc < 0)
+		return rc;
+
+	dev = idxd_cdev->dev;
+	cdev_init(cdev, &idxd_cdev_fops);
+	cdev_set_parent(cdev, &dev->kobj);
+	rc = cdev_add(cdev, dev->devt, 1);
+	if (rc) {
+		dev_dbg(&wq->idxd->pdev->dev, "cdev_add failed: %d\n", rc);
+		idxd_wq_cdev_cleanup(wq, CDEV_FAILED);
+		return rc;
+	}
+
+	init_waitqueue_head(&idxd_cdev->err_queue);
+	return 0;
+}
+
+void idxd_wq_del_cdev(struct idxd_wq *wq)
+{
+	idxd_wq_cdev_cleanup(wq, CDEV_NORMAL);
+}
+
+int idxd_cdev_register(void)
+{
+	int rc, i;
+
+	for (i = 0; i < IDXD_TYPE_MAX; i++) {
+		ida_init(&ictx[i].minor_ida);
+		rc = alloc_chrdev_region(&ictx[i].devt, 0, MINORMASK,
+					 ictx[i].name);
+		if (rc)
+			return rc;
+	}
+
+	return 0;
+}
+
+void idxd_cdev_remove(void)
+{
+	int i;
+
+	for (i = 0; i < IDXD_TYPE_MAX; i++) {
+		unregister_chrdev_region(ictx[i].devt, MINORMASK);
+		ida_destroy(&ictx[i].minor_ida);
+	}
+}
diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c
index b4c4cec489df..ada69e722f84 100644
--- a/drivers/dma/idxd/device.c
+++ b/drivers/dma/idxd/device.c
@@ -539,7 +539,7 @@ static int idxd_wq_config_write(struct idxd_wq *wq)
 	wq->wqcfg.wq_thresh = wq->threshold;
 
 	/* byte 8-11 */
-	wq->wqcfg.priv = 1; /* kernel, therefore priv */
+	wq->wqcfg.priv = !!(wq->type == IDXD_WQT_KERNEL);
 	wq->wqcfg.mode = 1;
 
 	wq->wqcfg.priority = wq->priority;
diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h
index a36214818d1e..b8f8a363b4a7 100644
--- a/drivers/dma/idxd/idxd.h
+++ b/drivers/dma/idxd/idxd.h
@@ -7,6 +7,7 @@
 #include <linux/dmaengine.h>
 #include <linux/percpu-rwsem.h>
 #include <linux/wait.h>
+#include <linux/cdev.h>
 #include "registers.h"
 
 #define IDXD_DRIVER_VERSION	"1.00"
@@ -63,6 +64,14 @@ enum idxd_wq_flag {
 enum idxd_wq_type {
 	IDXD_WQT_NONE = 0,
 	IDXD_WQT_KERNEL,
+	IDXD_WQT_USER,
+};
+
+struct idxd_cdev {
+	struct cdev cdev;
+	struct device *dev;
+	int minor;
+	struct wait_queue_head err_queue;
 };
 
 #define IDXD_ALLOCATED_BATCH_SIZE	128U
@@ -82,6 +91,7 @@ enum idxd_complete_type {
 struct idxd_wq {
 	void __iomem *dportal;
 	struct device conf_dev;
+	struct idxd_cdev idxd_cdev;
 	struct idxd_device *idxd;
 	int id;
 	enum idxd_wq_type type;
@@ -145,6 +155,7 @@ struct idxd_device {
 	enum idxd_device_state state;
 	unsigned long flags;
 	int id;
+	int major;
 
 	struct pci_dev *pdev;
 	void __iomem *reg_base;
@@ -196,11 +207,29 @@ struct idxd_desc {
 #define confdev_to_idxd(dev) container_of(dev, struct idxd_device, conf_dev)
 #define confdev_to_wq(dev) container_of(dev, struct idxd_wq, conf_dev)
 
+extern struct bus_type dsa_bus_type;
+
 static inline bool wq_dedicated(struct idxd_wq *wq)
 {
 	return test_bit(WQ_FLAG_DEDICATED, &wq->flags);
 }
 
+enum idxd_portal_prot {
+	IDXD_PORTAL_UNLIMITED = 0,
+	IDXD_PORTAL_LIMITED,
+};
+
+static inline int idxd_get_wq_portal_offset(enum idxd_portal_prot prot)
+{
+	return prot * 0x1000;
+}
+
+static inline int idxd_get_wq_portal_full_offset(int wq_id,
+						 enum idxd_portal_prot prot)
+{
+	return ((wq_id * 4) << PAGE_SHIFT) + idxd_get_wq_portal_offset(prot);
+}
+
 static inline void idxd_set_type(struct idxd_device *idxd)
 {
 	struct pci_dev *pdev = idxd->pdev;
@@ -233,6 +262,7 @@ int idxd_setup_sysfs(struct idxd_device *idxd);
 void idxd_cleanup_sysfs(struct idxd_device *idxd);
 int idxd_register_driver(void);
 void idxd_unregister_driver(void);
+struct bus_type *idxd_get_bus_type(struct idxd_device *idxd);
 
 /* device interrupt control */
 irqreturn_t idxd_irq_handler(int vec, void *data);
@@ -276,4 +306,11 @@ void idxd_dma_complete_txd(struct idxd_desc *desc,
 			   enum idxd_complete_type comp_type);
 dma_cookie_t idxd_dma_tx_submit(struct dma_async_tx_descriptor *tx);
 
+/* cdev */
+int idxd_cdev_register(void);
+void idxd_cdev_remove(void);
+int idxd_cdev_get_major(struct idxd_device *idxd);
+int idxd_wq_add_cdev(struct idxd_wq *wq);
+void idxd_wq_del_cdev(struct idxd_wq *wq);
+
 #endif
diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c
index cf6e1d89dd02..7778c05deb5d 100644
--- a/drivers/dma/idxd/init.c
+++ b/drivers/dma/idxd/init.c
@@ -188,6 +188,7 @@ static int idxd_setup_internals(struct idxd_device *idxd)
 		mutex_init(&wq->wq_lock);
 		atomic_set(&wq->dq_count, 0);
 		init_waitqueue_head(&wq->submit_waitq);
+		wq->idxd_cdev.minor = -1;
 		rc = percpu_init_rwsem(&wq->submit_lock);
 		if (rc < 0) {
 			idxd_wqs_free_lock(idxd);
@@ -321,6 +322,8 @@ static int idxd_probe(struct idxd_device *idxd)
 		goto err_idr_fail;
 	}
 
+	idxd->major = idxd_cdev_get_major(idxd);
+
 	dev_dbg(dev, "IDXD device %d probed successfully\n", idxd->id);
 	return 0;
 
@@ -501,6 +504,10 @@ static int __init idxd_init_module(void)
 	if (err < 0)
 		goto err_idxd_driver_register;
 
+	err = idxd_cdev_register();
+	if (err)
+		goto err_cdev_register;
+
 	err = pci_register_driver(&idxd_pci_driver);
 	if (err)
 		goto err_pci_register;
@@ -508,6 +515,8 @@ static int __init idxd_init_module(void)
 	return 0;
 
 err_pci_register:
+	idxd_cdev_remove();
+err_cdev_register:
 	idxd_unregister_driver();
 err_idxd_driver_register:
 	idxd_unregister_bus_type();
@@ -518,6 +527,7 @@ module_init(idxd_init_module);
 static void __exit idxd_exit_module(void)
 {
 	pci_unregister_driver(&idxd_pci_driver);
+	idxd_cdev_remove();
 	idxd_unregister_bus_type();
 }
 module_exit(idxd_exit_module);
diff --git a/drivers/dma/idxd/irq.c b/drivers/dma/idxd/irq.c
index 770d408470db..d6fcd2e60103 100644
--- a/drivers/dma/idxd/irq.c
+++ b/drivers/dma/idxd/irq.c
@@ -89,6 +89,24 @@ irqreturn_t idxd_misc_thread(int vec, void *data)
 			idxd->sw_err.bits[i] = ioread64(idxd->reg_base +
 					IDXD_SWERR_OFFSET + i * sizeof(u64));
 		iowrite64(IDXD_SWERR_ACK, idxd->reg_base + IDXD_SWERR_OFFSET);
+
+		if (idxd->sw_err.valid && idxd->sw_err.wq_idx_valid) {
+			int id = idxd->sw_err.wq_idx;
+			struct idxd_wq *wq = &idxd->wqs[id];
+
+			if (wq->type == IDXD_WQT_USER)
+				wake_up_interruptible(&wq->idxd_cdev.err_queue);
+		} else {
+			int i;
+
+			for (i = 0; i < idxd->max_wqs; i++) {
+				struct idxd_wq *wq = &idxd->wqs[i];
+
+				if (wq->type == IDXD_WQT_USER)
+					wake_up_interruptible(&wq->idxd_cdev.err_queue);
+			}
+		}
+
 		spin_unlock_bh(&idxd->dev_lock);
 		val |= IDXD_INTC_ERR;
 
diff --git a/drivers/dma/idxd/submit.c b/drivers/dma/idxd/submit.c
index e16cab37dda8..45a0c5869a0a 100644
--- a/drivers/dma/idxd/submit.c
+++ b/drivers/dma/idxd/submit.c
@@ -69,17 +69,19 @@ int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc)
 {
 	struct idxd_device *idxd = wq->idxd;
 	int vec = desc->hw->int_handle;
+	void __iomem *portal;
 
 	if (idxd->state != IDXD_DEV_ENABLED)
 		return -EIO;
 
+	portal = wq->dportal + idxd_get_wq_portal_offset(IDXD_PORTAL_UNLIMITED);
 	/*
 	 * The wmb() flushes writes to coherent DMA data before possibly
 	 * triggering a DMA read. The wmb() is necessary even on UP because
 	 * the recipient is a device.
 	 */
 	wmb();
-	iosubmit_cmds512(wq->dportal, desc->hw, 1);
+	iosubmit_cmds512(portal, desc->hw, 1);
 
 	/*
 	 * Pending the descriptor to the lockless list for the irq_entry
diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c
index f5e3f962ee6a..849c50ab939a 100644
--- a/drivers/dma/idxd/sysfs.c
+++ b/drivers/dma/idxd/sysfs.c
@@ -13,6 +13,7 @@
 static char *idxd_wq_type_names[] = {
 	[IDXD_WQT_NONE]		= "none",
 	[IDXD_WQT_KERNEL]	= "kernel",
+	[IDXD_WQT_USER]		= "user",
 };
 
 static void idxd_conf_device_release(struct device *dev)
@@ -63,6 +64,11 @@ static inline bool is_idxd_wq_dmaengine(struct idxd_wq *wq)
 	return false;
 }
 
+static inline bool is_idxd_wq_cdev(struct idxd_wq *wq)
+{
+	return wq->type == IDXD_WQT_USER ? true : false;
+}
+
 static int idxd_config_bus_match(struct device *dev,
 				 struct device_driver *drv)
 {
@@ -109,6 +115,9 @@ static int idxd_config_bus_probe(struct device *dev)
 			return -EBUSY;
 		}
 
+		if (!try_module_get(THIS_MODULE))
+			return -ENXIO;
+
 		spin_lock_irqsave(&idxd->dev_lock, flags);
 
 		/* Perform IDXD configuration and enabling */
@@ -216,6 +225,13 @@ static int idxd_config_bus_probe(struct device *dev)
 				mutex_unlock(&wq->wq_lock);
 				return rc;
 			}
+		} else if (is_idxd_wq_cdev(wq)) {
+			rc = idxd_wq_add_cdev(wq);
+			if (rc < 0) {
+				dev_dbg(dev, "Cdev creation failed\n");
+				mutex_unlock(&wq->wq_lock);
+				return rc;
+			}
 		}
 
 		mutex_unlock(&wq->wq_lock);
@@ -241,6 +257,8 @@ static void disable_wq(struct idxd_wq *wq)
 
 	if (is_idxd_wq_dmaengine(wq))
 		idxd_unregister_dma_channel(wq);
+	else if (is_idxd_wq_cdev(wq))
+		idxd_wq_del_cdev(wq);
 
 	if (idxd_wq_refcount(wq))
 		dev_warn(dev, "Clients has claim on wq %d: %d\n",
@@ -295,10 +313,12 @@ static int idxd_config_bus_remove(struct device *dev)
 		spin_lock_irqsave(&idxd->dev_lock, flags);
 		rc = idxd_device_disable(idxd);
 		spin_unlock_irqrestore(&idxd->dev_lock, flags);
+		module_put(THIS_MODULE);
 		if (rc < 0)
 			dev_warn(dev, "Device disable failed\n");
 		else
 			dev_info(dev, "Device %s disabled\n", dev_name(dev));
+
 	}
 
 	return 0;
@@ -309,7 +329,7 @@ static void idxd_config_bus_shutdown(struct device *dev)
 	dev_dbg(dev, "%s called\n", __func__);
 }
 
-static struct bus_type dsa_bus_type = {
+struct bus_type dsa_bus_type = {
 	.name = "dsa",
 	.match = idxd_config_bus_match,
 	.probe = idxd_config_bus_probe,
@@ -334,7 +354,7 @@ static struct idxd_device_driver *idxd_drvs[] = {
 	&dsa_drv
 };
 
-static struct bus_type *idxd_get_bus_type(struct idxd_device *idxd)
+struct bus_type *idxd_get_bus_type(struct idxd_device *idxd)
 {
 	return idxd_bus_types[idxd->type];
 }
@@ -956,6 +976,9 @@ static ssize_t wq_type_show(struct device *dev,
 	case IDXD_WQT_KERNEL:
 		return sprintf(buf, "%s\n",
 			       idxd_wq_type_names[IDXD_WQT_KERNEL]);
+	case IDXD_WQT_USER:
+		return sprintf(buf, "%s\n",
+			       idxd_wq_type_names[IDXD_WQT_USER]);
 	case IDXD_WQT_NONE:
 	default:
 		return sprintf(buf, "%s\n",
@@ -978,6 +1001,8 @@ static ssize_t wq_type_store(struct device *dev,
 	old_type = wq->type;
 	if (sysfs_streq(buf, idxd_wq_type_names[IDXD_WQT_KERNEL]))
 		wq->type = IDXD_WQT_KERNEL;
+	else if (sysfs_streq(buf, idxd_wq_type_names[IDXD_WQT_USER]))
+		wq->type = IDXD_WQT_USER;
 	else
 		wq->type = IDXD_WQT_NONE;
 
@@ -1020,6 +1045,17 @@ static ssize_t wq_name_store(struct device *dev,
 static struct device_attribute dev_attr_wq_name =
 		__ATTR(name, 0644, wq_name_show, wq_name_store);
 
+static ssize_t wq_cdev_minor_show(struct device *dev,
+				  struct device_attribute *attr, char *buf)
+{
+	struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev);
+
+	return sprintf(buf, "%d\n", wq->idxd_cdev.minor);
+}
+
+static struct device_attribute dev_attr_wq_cdev_minor =
+		__ATTR(cdev_minor, 0444, wq_cdev_minor_show, NULL);
+
 static struct attribute *idxd_wq_attributes[] = {
 	&dev_attr_wq_clients.attr,
 	&dev_attr_wq_state.attr,
@@ -1029,6 +1065,7 @@ static struct attribute *idxd_wq_attributes[] = {
 	&dev_attr_wq_priority.attr,
 	&dev_attr_wq_type.attr,
 	&dev_attr_wq_name.attr,
+	&dev_attr_wq_cdev_minor.attr,
 	NULL,
 };
 
@@ -1242,6 +1279,16 @@ static ssize_t token_limit_store(struct device *dev,
 }
 static DEVICE_ATTR_RW(token_limit);
 
+static ssize_t cdev_major_show(struct device *dev,
+			       struct device_attribute *attr, char *buf)
+{
+	struct idxd_device *idxd =
+		container_of(dev, struct idxd_device, conf_dev);
+
+	return sprintf(buf, "%u\n", idxd->major);
+}
+static DEVICE_ATTR_RO(cdev_major);
+
 static struct attribute *idxd_device_attributes[] = {
 	&dev_attr_max_groups.attr,
 	&dev_attr_max_work_queues.attr,
@@ -1257,6 +1304,7 @@ static struct attribute *idxd_device_attributes[] = {
 	&dev_attr_errors.attr,
 	&dev_attr_max_tokens.attr,
 	&dev_attr_token_limit.attr,
+	&dev_attr_cdev_major.attr,
 	NULL,
 };
 
-- 
cgit 


From e9f08b65250d73ab70e79e194813f52b8d306784 Mon Sep 17 00:00:00 2001
From: Zhou Wang <wangzhou1@hisilicon.com>
Date: Thu, 16 Jan 2020 14:10:57 +0800
Subject: dmaengine: hisilicon: Add Kunpeng DMA engine support

This patch adds a driver for HiSilicon Kunpeng DMA engine. This DMA engine
which is an PCIe iEP offers 30 channels, each channel has a send queue, a
complete queue and an interrupt to help to do tasks. This DMA engine can do
memory copy between memory blocks or between memory and device buffer.

Signed-off-by: Zhou Wang <wangzhou1@hisilicon.com>
Signed-off-by: Zhenfa Qiu <qiuzhenfa@hisilicon.com>
Link: https://lore.kernel.org/r/1579155057-80523-1-git-send-email-wangzhou1@hisilicon.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/Kconfig    |   8 +
 drivers/dma/Makefile   |   1 +
 drivers/dma/hisi_dma.c | 611 +++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 620 insertions(+)
 create mode 100644 drivers/dma/hisi_dma.c

(limited to 'drivers/dma')

diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index a8f8e9552885..5142da401db3 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -239,6 +239,14 @@ config FSL_RAID
 	  the capability to offload memcpy, xor and pq computation
 	  for raid5/6.
 
+config HISI_DMA
+	tristate "HiSilicon DMA Engine support"
+	depends on ARM64 || (COMPILE_TEST && PCI_MSI)
+	select DMA_ENGINE
+	select DMA_VIRTUAL_CHANNELS
+	help
+	  Support HiSilicon Kunpeng DMA engine.
+
 config IMG_MDC_DMA
 	tristate "IMG MDC support"
 	depends on MIPS || COMPILE_TEST
diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile
index 461d77c4a839..1d908394fbea 100644
--- a/drivers/dma/Makefile
+++ b/drivers/dma/Makefile
@@ -35,6 +35,7 @@ obj-$(CONFIG_FSL_EDMA) += fsl-edma.o fsl-edma-common.o
 obj-$(CONFIG_MCF_EDMA) += mcf-edma.o fsl-edma-common.o
 obj-$(CONFIG_FSL_QDMA) += fsl-qdma.o
 obj-$(CONFIG_FSL_RAID) += fsl_raid.o
+obj-$(CONFIG_HISI_DMA) += hisi_dma.o
 obj-$(CONFIG_HSU_DMA) += hsu/
 obj-$(CONFIG_IMG_MDC_DMA) += img-mdc-dma.o
 obj-$(CONFIG_IMX_DMA) += imx-dma.o
diff --git a/drivers/dma/hisi_dma.c b/drivers/dma/hisi_dma.c
new file mode 100644
index 000000000000..ed3619266a48
--- /dev/null
+++ b/drivers/dma/hisi_dma.c
@@ -0,0 +1,611 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright(c) 2019 HiSilicon Limited. */
+#include <linux/bitfield.h>
+#include <linux/dmaengine.h>
+#include <linux/init.h>
+#include <linux/iopoll.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/spinlock.h>
+#include "virt-dma.h"
+
+#define HISI_DMA_SQ_BASE_L		0x0
+#define HISI_DMA_SQ_BASE_H		0x4
+#define HISI_DMA_SQ_DEPTH		0x8
+#define HISI_DMA_SQ_TAIL_PTR		0xc
+#define HISI_DMA_CQ_BASE_L		0x10
+#define HISI_DMA_CQ_BASE_H		0x14
+#define HISI_DMA_CQ_DEPTH		0x18
+#define HISI_DMA_CQ_HEAD_PTR		0x1c
+#define HISI_DMA_CTRL0			0x20
+#define HISI_DMA_CTRL0_QUEUE_EN_S	0
+#define HISI_DMA_CTRL0_QUEUE_PAUSE_S	4
+#define HISI_DMA_CTRL1			0x24
+#define HISI_DMA_CTRL1_QUEUE_RESET_S	0
+#define HISI_DMA_Q_FSM_STS		0x30
+#define HISI_DMA_FSM_STS_MASK		GENMASK(3, 0)
+#define HISI_DMA_INT_STS		0x40
+#define HISI_DMA_INT_STS_MASK		GENMASK(12, 0)
+#define HISI_DMA_INT_MSK		0x44
+#define HISI_DMA_MODE			0x217c
+#define HISI_DMA_OFFSET			0x100
+
+#define HISI_DMA_MSI_NUM		30
+#define HISI_DMA_CHAN_NUM		30
+#define HISI_DMA_Q_DEPTH_VAL		1024
+
+#define PCI_BAR_2			2
+
+enum hisi_dma_mode {
+	EP = 0,
+	RC,
+};
+
+enum hisi_dma_chan_status {
+	DISABLE = -1,
+	IDLE = 0,
+	RUN,
+	CPL,
+	PAUSE,
+	HALT,
+	ABORT,
+	WAIT,
+	BUFFCLR,
+};
+
+struct hisi_dma_sqe {
+	__le32 dw0;
+#define OPCODE_MASK			GENMASK(3, 0)
+#define OPCODE_SMALL_PACKAGE		0x1
+#define OPCODE_M2M			0x4
+#define LOCAL_IRQ_EN			BIT(8)
+#define ATTR_SRC_MASK			GENMASK(14, 12)
+	__le32 dw1;
+	__le32 dw2;
+#define ATTR_DST_MASK			GENMASK(26, 24)
+	__le32 length;
+	__le64 src_addr;
+	__le64 dst_addr;
+};
+
+struct hisi_dma_cqe {
+	__le32 rsv0;
+	__le32 rsv1;
+	__le16 sq_head;
+	__le16 rsv2;
+	__le16 rsv3;
+	__le16 w0;
+#define STATUS_MASK			GENMASK(15, 1)
+#define STATUS_SUCC			0x0
+#define VALID_BIT			BIT(0)
+};
+
+struct hisi_dma_desc {
+	struct virt_dma_desc vd;
+	struct hisi_dma_sqe sqe;
+};
+
+struct hisi_dma_chan {
+	struct virt_dma_chan vc;
+	struct hisi_dma_dev *hdma_dev;
+	struct hisi_dma_sqe *sq;
+	struct hisi_dma_cqe *cq;
+	dma_addr_t sq_dma;
+	dma_addr_t cq_dma;
+	u32 sq_tail;
+	u32 cq_head;
+	u32 qp_num;
+	enum hisi_dma_chan_status status;
+	struct hisi_dma_desc *desc;
+};
+
+struct hisi_dma_dev {
+	struct pci_dev *pdev;
+	void __iomem *base;
+	struct dma_device dma_dev;
+	u32 chan_num;
+	u32 chan_depth;
+	struct hisi_dma_chan chan[];
+};
+
+static inline struct hisi_dma_chan *to_hisi_dma_chan(struct dma_chan *c)
+{
+	return container_of(c, struct hisi_dma_chan, vc.chan);
+}
+
+static inline struct hisi_dma_desc *to_hisi_dma_desc(struct virt_dma_desc *vd)
+{
+	return container_of(vd, struct hisi_dma_desc, vd);
+}
+
+static inline void hisi_dma_chan_write(void __iomem *base, u32 reg, u32 index,
+				       u32 val)
+{
+	writel_relaxed(val, base + reg + index * HISI_DMA_OFFSET);
+}
+
+static inline void hisi_dma_update_bit(void __iomem *addr, u32 pos, bool val)
+{
+	u32 tmp;
+
+	tmp = readl_relaxed(addr);
+	tmp = val ? tmp | BIT(pos) : tmp & ~BIT(pos);
+	writel_relaxed(tmp, addr);
+}
+
+static void hisi_dma_free_irq_vectors(void *data)
+{
+	pci_free_irq_vectors(data);
+}
+
+static void hisi_dma_pause_dma(struct hisi_dma_dev *hdma_dev, u32 index,
+			       bool pause)
+{
+	void __iomem *addr = hdma_dev->base + HISI_DMA_CTRL0 + index *
+			     HISI_DMA_OFFSET;
+
+	hisi_dma_update_bit(addr, HISI_DMA_CTRL0_QUEUE_PAUSE_S, pause);
+}
+
+static void hisi_dma_enable_dma(struct hisi_dma_dev *hdma_dev, u32 index,
+				bool enable)
+{
+	void __iomem *addr = hdma_dev->base + HISI_DMA_CTRL0 + index *
+			     HISI_DMA_OFFSET;
+
+	hisi_dma_update_bit(addr, HISI_DMA_CTRL0_QUEUE_EN_S, enable);
+}
+
+static void hisi_dma_mask_irq(struct hisi_dma_dev *hdma_dev, u32 qp_index)
+{
+	hisi_dma_chan_write(hdma_dev->base, HISI_DMA_INT_MSK, qp_index,
+			    HISI_DMA_INT_STS_MASK);
+}
+
+static void hisi_dma_unmask_irq(struct hisi_dma_dev *hdma_dev, u32 qp_index)
+{
+	void __iomem *base = hdma_dev->base;
+
+	hisi_dma_chan_write(base, HISI_DMA_INT_STS, qp_index,
+			    HISI_DMA_INT_STS_MASK);
+	hisi_dma_chan_write(base, HISI_DMA_INT_MSK, qp_index, 0);
+}
+
+static void hisi_dma_do_reset(struct hisi_dma_dev *hdma_dev, u32 index)
+{
+	void __iomem *addr = hdma_dev->base + HISI_DMA_CTRL1 + index *
+			     HISI_DMA_OFFSET;
+
+	hisi_dma_update_bit(addr, HISI_DMA_CTRL1_QUEUE_RESET_S, 1);
+}
+
+static void hisi_dma_reset_qp_point(struct hisi_dma_dev *hdma_dev, u32 index)
+{
+	hisi_dma_chan_write(hdma_dev->base, HISI_DMA_SQ_TAIL_PTR, index, 0);
+	hisi_dma_chan_write(hdma_dev->base, HISI_DMA_CQ_HEAD_PTR, index, 0);
+}
+
+static void hisi_dma_reset_hw_chan(struct hisi_dma_chan *chan)
+{
+	struct hisi_dma_dev *hdma_dev = chan->hdma_dev;
+	u32 index = chan->qp_num, tmp;
+	int ret;
+
+	hisi_dma_pause_dma(hdma_dev, index, true);
+	hisi_dma_enable_dma(hdma_dev, index, false);
+	hisi_dma_mask_irq(hdma_dev, index);
+
+	ret = readl_relaxed_poll_timeout(hdma_dev->base +
+		HISI_DMA_Q_FSM_STS + index * HISI_DMA_OFFSET, tmp,
+		FIELD_GET(HISI_DMA_FSM_STS_MASK, tmp) != RUN, 10, 1000);
+	if (ret) {
+		dev_err(&hdma_dev->pdev->dev, "disable channel timeout!\n");
+		WARN_ON(1);
+	}
+
+	hisi_dma_do_reset(hdma_dev, index);
+	hisi_dma_reset_qp_point(hdma_dev, index);
+	hisi_dma_pause_dma(hdma_dev, index, false);
+	hisi_dma_enable_dma(hdma_dev, index, true);
+	hisi_dma_unmask_irq(hdma_dev, index);
+
+	ret = readl_relaxed_poll_timeout(hdma_dev->base +
+		HISI_DMA_Q_FSM_STS + index * HISI_DMA_OFFSET, tmp,
+		FIELD_GET(HISI_DMA_FSM_STS_MASK, tmp) == IDLE, 10, 1000);
+	if (ret) {
+		dev_err(&hdma_dev->pdev->dev, "reset channel timeout!\n");
+		WARN_ON(1);
+	}
+}
+
+static void hisi_dma_free_chan_resources(struct dma_chan *c)
+{
+	struct hisi_dma_chan *chan = to_hisi_dma_chan(c);
+	struct hisi_dma_dev *hdma_dev = chan->hdma_dev;
+
+	hisi_dma_reset_hw_chan(chan);
+	vchan_free_chan_resources(&chan->vc);
+
+	memset(chan->sq, 0, sizeof(struct hisi_dma_sqe) * hdma_dev->chan_depth);
+	memset(chan->cq, 0, sizeof(struct hisi_dma_cqe) * hdma_dev->chan_depth);
+	chan->sq_tail = 0;
+	chan->cq_head = 0;
+	chan->status = DISABLE;
+}
+
+static void hisi_dma_desc_free(struct virt_dma_desc *vd)
+{
+	kfree(to_hisi_dma_desc(vd));
+}
+
+static struct dma_async_tx_descriptor *
+hisi_dma_prep_dma_memcpy(struct dma_chan *c, dma_addr_t dst, dma_addr_t src,
+			 size_t len, unsigned long flags)
+{
+	struct hisi_dma_chan *chan = to_hisi_dma_chan(c);
+	struct hisi_dma_desc *desc;
+
+	desc = kzalloc(sizeof(*desc), GFP_NOWAIT);
+	if (!desc)
+		return NULL;
+
+	desc->sqe.length = cpu_to_le32(len);
+	desc->sqe.src_addr = cpu_to_le64(src);
+	desc->sqe.dst_addr = cpu_to_le64(dst);
+
+	return vchan_tx_prep(&chan->vc, &desc->vd, flags);
+}
+
+static enum dma_status
+hisi_dma_tx_status(struct dma_chan *c, dma_cookie_t cookie,
+		   struct dma_tx_state *txstate)
+{
+	return dma_cookie_status(c, cookie, txstate);
+}
+
+static void hisi_dma_start_transfer(struct hisi_dma_chan *chan)
+{
+	struct hisi_dma_sqe *sqe = chan->sq + chan->sq_tail;
+	struct hisi_dma_dev *hdma_dev = chan->hdma_dev;
+	struct hisi_dma_desc *desc;
+	struct virt_dma_desc *vd;
+
+	vd = vchan_next_desc(&chan->vc);
+	if (!vd) {
+		dev_err(&hdma_dev->pdev->dev, "no issued task!\n");
+		chan->desc = NULL;
+		return;
+	}
+	list_del(&vd->node);
+	desc = to_hisi_dma_desc(vd);
+	chan->desc = desc;
+
+	memcpy(sqe, &desc->sqe, sizeof(struct hisi_dma_sqe));
+
+	/* update other field in sqe */
+	sqe->dw0 = cpu_to_le32(FIELD_PREP(OPCODE_MASK, OPCODE_M2M));
+	sqe->dw0 |= cpu_to_le32(LOCAL_IRQ_EN);
+
+	/* make sure data has been updated in sqe */
+	wmb();
+
+	/* update sq tail, point to new sqe position */
+	chan->sq_tail = (chan->sq_tail + 1) % hdma_dev->chan_depth;
+
+	/* update sq_tail to trigger a new task */
+	hisi_dma_chan_write(hdma_dev->base, HISI_DMA_SQ_TAIL_PTR, chan->qp_num,
+			    chan->sq_tail);
+}
+
+static void hisi_dma_issue_pending(struct dma_chan *c)
+{
+	struct hisi_dma_chan *chan = to_hisi_dma_chan(c);
+	unsigned long flags;
+
+	spin_lock_irqsave(&chan->vc.lock, flags);
+
+	if (vchan_issue_pending(&chan->vc))
+		hisi_dma_start_transfer(chan);
+
+	spin_unlock_irqrestore(&chan->vc.lock, flags);
+}
+
+static int hisi_dma_terminate_all(struct dma_chan *c)
+{
+	struct hisi_dma_chan *chan = to_hisi_dma_chan(c);
+	unsigned long flags;
+	LIST_HEAD(head);
+
+	spin_lock_irqsave(&chan->vc.lock, flags);
+
+	hisi_dma_pause_dma(chan->hdma_dev, chan->qp_num, true);
+	if (chan->desc) {
+		vchan_terminate_vdesc(&chan->desc->vd);
+		chan->desc = NULL;
+	}
+
+	vchan_get_all_descriptors(&chan->vc, &head);
+
+	spin_unlock_irqrestore(&chan->vc.lock, flags);
+
+	vchan_dma_desc_free_list(&chan->vc, &head);
+	hisi_dma_pause_dma(chan->hdma_dev, chan->qp_num, false);
+
+	return 0;
+}
+
+static void hisi_dma_synchronize(struct dma_chan *c)
+{
+	struct hisi_dma_chan *chan = to_hisi_dma_chan(c);
+
+	vchan_synchronize(&chan->vc);
+}
+
+static int hisi_dma_alloc_qps_mem(struct hisi_dma_dev *hdma_dev)
+{
+	size_t sq_size = sizeof(struct hisi_dma_sqe) * hdma_dev->chan_depth;
+	size_t cq_size = sizeof(struct hisi_dma_cqe) * hdma_dev->chan_depth;
+	struct device *dev = &hdma_dev->pdev->dev;
+	struct hisi_dma_chan *chan;
+	int i;
+
+	for (i = 0; i < hdma_dev->chan_num; i++) {
+		chan = &hdma_dev->chan[i];
+		chan->sq = dmam_alloc_coherent(dev, sq_size, &chan->sq_dma,
+					       GFP_KERNEL);
+		if (!chan->sq)
+			return -ENOMEM;
+
+		chan->cq = dmam_alloc_coherent(dev, cq_size, &chan->cq_dma,
+					       GFP_KERNEL);
+		if (!chan->cq)
+			return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static void hisi_dma_init_hw_qp(struct hisi_dma_dev *hdma_dev, u32 index)
+{
+	struct hisi_dma_chan *chan = &hdma_dev->chan[index];
+	u32 hw_depth = hdma_dev->chan_depth - 1;
+	void __iomem *base = hdma_dev->base;
+
+	/* set sq, cq base */
+	hisi_dma_chan_write(base, HISI_DMA_SQ_BASE_L, index,
+			    lower_32_bits(chan->sq_dma));
+	hisi_dma_chan_write(base, HISI_DMA_SQ_BASE_H, index,
+			    upper_32_bits(chan->sq_dma));
+	hisi_dma_chan_write(base, HISI_DMA_CQ_BASE_L, index,
+			    lower_32_bits(chan->cq_dma));
+	hisi_dma_chan_write(base, HISI_DMA_CQ_BASE_H, index,
+			    upper_32_bits(chan->cq_dma));
+
+	/* set sq, cq depth */
+	hisi_dma_chan_write(base, HISI_DMA_SQ_DEPTH, index, hw_depth);
+	hisi_dma_chan_write(base, HISI_DMA_CQ_DEPTH, index, hw_depth);
+
+	/* init sq tail and cq head */
+	hisi_dma_chan_write(base, HISI_DMA_SQ_TAIL_PTR, index, 0);
+	hisi_dma_chan_write(base, HISI_DMA_CQ_HEAD_PTR, index, 0);
+}
+
+static void hisi_dma_enable_qp(struct hisi_dma_dev *hdma_dev, u32 qp_index)
+{
+	hisi_dma_init_hw_qp(hdma_dev, qp_index);
+	hisi_dma_unmask_irq(hdma_dev, qp_index);
+	hisi_dma_enable_dma(hdma_dev, qp_index, true);
+}
+
+static void hisi_dma_disable_qp(struct hisi_dma_dev *hdma_dev, u32 qp_index)
+{
+	hisi_dma_reset_hw_chan(&hdma_dev->chan[qp_index]);
+}
+
+static void hisi_dma_enable_qps(struct hisi_dma_dev *hdma_dev)
+{
+	int i;
+
+	for (i = 0; i < hdma_dev->chan_num; i++) {
+		hdma_dev->chan[i].qp_num = i;
+		hdma_dev->chan[i].hdma_dev = hdma_dev;
+		hdma_dev->chan[i].vc.desc_free = hisi_dma_desc_free;
+		vchan_init(&hdma_dev->chan[i].vc, &hdma_dev->dma_dev);
+		hisi_dma_enable_qp(hdma_dev, i);
+	}
+}
+
+static void hisi_dma_disable_qps(struct hisi_dma_dev *hdma_dev)
+{
+	int i;
+
+	for (i = 0; i < hdma_dev->chan_num; i++) {
+		hisi_dma_disable_qp(hdma_dev, i);
+		tasklet_kill(&hdma_dev->chan[i].vc.task);
+	}
+}
+
+static irqreturn_t hisi_dma_irq(int irq, void *data)
+{
+	struct hisi_dma_chan *chan = data;
+	struct hisi_dma_dev *hdma_dev = chan->hdma_dev;
+	struct hisi_dma_desc *desc;
+	struct hisi_dma_cqe *cqe;
+	unsigned long flags;
+
+	spin_lock_irqsave(&chan->vc.lock, flags);
+
+	desc = chan->desc;
+	cqe = chan->cq + chan->cq_head;
+	if (desc) {
+		if (FIELD_GET(STATUS_MASK, cqe->w0) == STATUS_SUCC) {
+			chan->cq_head = (chan->cq_head + 1) %
+					hdma_dev->chan_depth;
+			hisi_dma_chan_write(hdma_dev->base,
+					    HISI_DMA_CQ_HEAD_PTR, chan->qp_num,
+					    chan->cq_head);
+			vchan_cookie_complete(&desc->vd);
+		} else {
+			dev_err(&hdma_dev->pdev->dev, "task error!\n");
+		}
+
+		chan->desc = NULL;
+	}
+
+	spin_unlock_irqrestore(&chan->vc.lock, flags);
+
+	return IRQ_HANDLED;
+}
+
+static int hisi_dma_request_qps_irq(struct hisi_dma_dev *hdma_dev)
+{
+	struct pci_dev *pdev = hdma_dev->pdev;
+	int i, ret;
+
+	for (i = 0; i < hdma_dev->chan_num; i++) {
+		ret = devm_request_irq(&pdev->dev, pci_irq_vector(pdev, i),
+				       hisi_dma_irq, IRQF_SHARED, "hisi_dma",
+				       &hdma_dev->chan[i]);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+/* This function enables all hw channels in a device */
+static int hisi_dma_enable_hw_channels(struct hisi_dma_dev *hdma_dev)
+{
+	int ret;
+
+	ret = hisi_dma_alloc_qps_mem(hdma_dev);
+	if (ret) {
+		dev_err(&hdma_dev->pdev->dev, "fail to allocate qp memory!\n");
+		return ret;
+	}
+
+	ret = hisi_dma_request_qps_irq(hdma_dev);
+	if (ret) {
+		dev_err(&hdma_dev->pdev->dev, "fail to request qp irq!\n");
+		return ret;
+	}
+
+	hisi_dma_enable_qps(hdma_dev);
+
+	return 0;
+}
+
+static void hisi_dma_disable_hw_channels(void *data)
+{
+	hisi_dma_disable_qps(data);
+}
+
+static void hisi_dma_set_mode(struct hisi_dma_dev *hdma_dev,
+			      enum hisi_dma_mode mode)
+{
+	writel_relaxed(mode == RC ? 1 : 0, hdma_dev->base + HISI_DMA_MODE);
+}
+
+static int hisi_dma_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+	struct device *dev = &pdev->dev;
+	struct hisi_dma_dev *hdma_dev;
+	struct dma_device *dma_dev;
+	size_t dev_size;
+	int ret;
+
+	ret = pcim_enable_device(pdev);
+	if (ret) {
+		dev_err(dev, "failed to enable device mem!\n");
+		return ret;
+	}
+
+	ret = pcim_iomap_regions(pdev, 1 << PCI_BAR_2, pci_name(pdev));
+	if (ret) {
+		dev_err(dev, "failed to remap I/O region!\n");
+		return ret;
+	}
+
+	ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
+	if (ret)
+		return ret;
+
+	ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
+	if (ret)
+		return ret;
+
+	dev_size = sizeof(struct hisi_dma_chan) * HISI_DMA_CHAN_NUM +
+		   sizeof(*hdma_dev);
+	hdma_dev = devm_kzalloc(dev, dev_size, GFP_KERNEL);
+	if (!hdma_dev)
+		return -EINVAL;
+
+	hdma_dev->base = pcim_iomap_table(pdev)[PCI_BAR_2];
+	hdma_dev->pdev = pdev;
+	hdma_dev->chan_num = HISI_DMA_CHAN_NUM;
+	hdma_dev->chan_depth = HISI_DMA_Q_DEPTH_VAL;
+
+	pci_set_drvdata(pdev, hdma_dev);
+	pci_set_master(pdev);
+
+	ret = pci_alloc_irq_vectors(pdev, HISI_DMA_MSI_NUM, HISI_DMA_MSI_NUM,
+				    PCI_IRQ_MSI);
+	if (ret < 0) {
+		dev_err(dev, "Failed to allocate MSI vectors!\n");
+		return ret;
+	}
+
+	ret = devm_add_action_or_reset(dev, hisi_dma_free_irq_vectors, pdev);
+	if (ret)
+		return ret;
+
+	dma_dev = &hdma_dev->dma_dev;
+	dma_cap_set(DMA_MEMCPY, dma_dev->cap_mask);
+	dma_dev->device_free_chan_resources = hisi_dma_free_chan_resources;
+	dma_dev->device_prep_dma_memcpy = hisi_dma_prep_dma_memcpy;
+	dma_dev->device_tx_status = hisi_dma_tx_status;
+	dma_dev->device_issue_pending = hisi_dma_issue_pending;
+	dma_dev->device_terminate_all = hisi_dma_terminate_all;
+	dma_dev->device_synchronize = hisi_dma_synchronize;
+	dma_dev->directions = BIT(DMA_MEM_TO_MEM);
+	dma_dev->dev = dev;
+	INIT_LIST_HEAD(&dma_dev->channels);
+
+	hisi_dma_set_mode(hdma_dev, RC);
+
+	ret = hisi_dma_enable_hw_channels(hdma_dev);
+	if (ret < 0) {
+		dev_err(dev, "failed to enable hw channel!\n");
+		return ret;
+	}
+
+	ret = devm_add_action_or_reset(dev, hisi_dma_disable_hw_channels,
+				       hdma_dev);
+	if (ret)
+		return ret;
+
+	ret = dmaenginem_async_device_register(dma_dev);
+	if (ret < 0)
+		dev_err(dev, "failed to register device!\n");
+
+	return ret;
+}
+
+static const struct pci_device_id hisi_dma_pci_tbl[] = {
+	{ PCI_DEVICE(PCI_VENDOR_ID_HUAWEI, 0xa122) },
+	{ 0, }
+};
+
+static struct pci_driver hisi_dma_pci_driver = {
+	.name		= "hisi_dma",
+	.id_table	= hisi_dma_pci_tbl,
+	.probe		= hisi_dma_probe,
+};
+
+module_pci_driver(hisi_dma_pci_driver);
+
+MODULE_AUTHOR("Zhou Wang <wangzhou1@hisilicon.com>");
+MODULE_AUTHOR("Zhenfa Qiu <qiuzhenfa@hisilicon.com>");
+MODULE_DESCRIPTION("HiSilicon Kunpeng DMA controller driver");
+MODULE_LICENSE("GPL v2");
+MODULE_DEVICE_TABLE(pci, hisi_dma_pci_tbl);
-- 
cgit 


From 71723a96b8b1367fefc18f60025dae792477d602 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Fri, 17 Jan 2020 16:30:56 +0100
Subject: dmaengine: Create symlinks between DMA channels and slaves
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Currently it is not easy to find out which DMA channels are in use, and
which slave devices are using which channels.

Fix this by creating two symlinks between the DMA channel and the actual
slave device when a channel is requested:
  1. A "slave" symlink from DMA channel to slave device,
  2. A "dma:<name>" symlink slave device to DMA channel.
When the channel is released, the symlinks are removed again.
The latter requires keeping track of the slave device and the channel
name in the dma_chan structure.

Note that this is limited to channel request functions for requesting an
exclusive slave channel that take a device pointer (dma_request_chan()
and dma_request_slave_channel*()).

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Tested-by: Niklas Söderlund <niklas.soderlund@ragnatech.se>
Link: https://lore.kernel.org/r/20200117153056.31363-1-geert+renesas@glider.be
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/dmaengine.c | 37 +++++++++++++++++++++++++++++++------
 1 file changed, 31 insertions(+), 6 deletions(-)

(limited to 'drivers/dma')

diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
index 51a2f2b1b2de..f3ef4edd4de1 100644
--- a/drivers/dma/dmaengine.c
+++ b/drivers/dma/dmaengine.c
@@ -60,6 +60,8 @@ static long dmaengine_ref_count;
 
 /* --- sysfs implementation --- */
 
+#define DMA_SLAVE_NAME	"slave"
+
 /**
  * dev_to_dma_chan - convert a device pointer to its sysfs container object
  * @dev - device node
@@ -730,11 +732,11 @@ struct dma_chan *dma_request_chan(struct device *dev, const char *name)
 	if (has_acpi_companion(dev) && !chan)
 		chan = acpi_dma_request_slave_chan_by_name(dev, name);
 
-	if (chan) {
-		/* Valid channel found or requester needs to be deferred */
-		if (!IS_ERR(chan) || PTR_ERR(chan) == -EPROBE_DEFER)
-			return chan;
-	}
+	if (PTR_ERR(chan) == -EPROBE_DEFER)
+		return chan;
+
+	if (!IS_ERR_OR_NULL(chan))
+		goto found;
 
 	/* Try to find the channel via the DMA filter map(s) */
 	mutex_lock(&dma_list_mutex);
@@ -754,7 +756,23 @@ struct dma_chan *dma_request_chan(struct device *dev, const char *name)
 	}
 	mutex_unlock(&dma_list_mutex);
 
-	return chan ? chan : ERR_PTR(-EPROBE_DEFER);
+	if (!IS_ERR_OR_NULL(chan))
+		goto found;
+
+	return ERR_PTR(-EPROBE_DEFER);
+
+found:
+	chan->slave = dev;
+	chan->name = kasprintf(GFP_KERNEL, "dma:%s", name);
+	if (!chan->name)
+		return ERR_PTR(-ENOMEM);
+
+	if (sysfs_create_link(&chan->dev->device.kobj, &dev->kobj,
+			      DMA_SLAVE_NAME))
+		dev_err(dev, "Cannot create DMA %s symlink\n", DMA_SLAVE_NAME);
+	if (sysfs_create_link(&dev->kobj, &chan->dev->device.kobj, chan->name))
+		dev_err(dev, "Cannot create DMA %s symlink\n", chan->name);
+	return chan;
 }
 EXPORT_SYMBOL_GPL(dma_request_chan);
 
@@ -812,6 +830,13 @@ void dma_release_channel(struct dma_chan *chan)
 	/* drop PRIVATE cap enabled by __dma_request_channel() */
 	if (--chan->device->privatecnt == 0)
 		dma_cap_clear(DMA_PRIVATE, chan->device->cap_mask);
+	if (chan->slave) {
+		sysfs_remove_link(&chan->slave->kobj, chan->name);
+		kfree(chan->name);
+		chan->name = NULL;
+		chan->slave = NULL;
+	}
+	sysfs_remove_link(&chan->dev->device.kobj, DMA_SLAVE_NAME);
 	mutex_unlock(&dma_list_mutex);
 }
 EXPORT_SYMBOL_GPL(dma_release_channel);
-- 
cgit