diff options
Diffstat (limited to 'drivers/dma')
52 files changed, 4241 insertions, 1700 deletions
diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig index 24e8597b2c3e..fadc4d8783bd 100644 --- a/drivers/dma/Kconfig +++ b/drivers/dma/Kconfig @@ -56,14 +56,22 @@ config DMA_OF select DMA_ENGINE #devices +config ALTERA_MSGDMA + tristate "Altera / Intel mSGDMA Engine" + select DMA_ENGINE + help + Enable support for Altera / Intel mSGDMA controller. + config AMBA_PL08X bool "ARM PrimeCell PL080 or PL081 support" depends on ARM_AMBA select DMA_ENGINE select DMA_VIRTUAL_CHANNELS help - Platform has a PL08x DMAC device - which can provide DMA engine support + Say yes if your platform has a PL08x DMAC device which can + provide DMA engine support. This includes the original ARM + PL080 and PL081, Samsungs PL080 derivative and Faraday + Technology's FTDMAC020 PL080 derivative. config AMCC_PPC440SPE_ADMA tristate "AMCC PPC440SPe ADMA support" @@ -99,6 +107,21 @@ config AXI_DMAC controller is often used in Analog Device's reference designs for FPGA platforms. +config BCM_SBA_RAID + tristate "Broadcom SBA RAID engine support" + depends on ARM64 || COMPILE_TEST + depends on MAILBOX && RAID6_PQ + select DMA_ENGINE + select DMA_ENGINE_RAID + select ASYNC_TX_DISABLE_XOR_VAL_DMA + select ASYNC_TX_DISABLE_PQ_VAL_DMA + default ARCH_BCM_IPROC + help + Enable support for Broadcom SBA RAID Engine. The SBA RAID + engine is available on most of the Broadcom iProc SoCs. It + has the capability to offload memcpy, xor and pq computation + for raid5/6. + config COH901318 bool "ST-Ericsson COH901318 DMA support" select DMA_ENGINE @@ -354,13 +377,12 @@ config MV_XOR_V2 config MXS_DMA bool "MXS DMA support" - depends on SOC_IMX23 || SOC_IMX28 || SOC_IMX6Q || SOC_IMX6UL + depends on ARCH_MXS || ARCH_MXC || COMPILE_TEST select STMP_DEVICE select DMA_ENGINE help Support the MXS DMA engine. This engine including APBH-DMA - and APBX-DMA is integrated into Freescale - i.MX23/28/MX6Q/MX6DL/MX6UL chips. + and APBX-DMA is integrated into some Freescale chips. config MX3_IPU bool "MX3x Image Processing Unit support" diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile index 0b723e94d9e6..9d0156b50294 100644 --- a/drivers/dma/Makefile +++ b/drivers/dma/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 #dmaengine debug flags subdir-ccflags-$(CONFIG_DMADEVICES_DEBUG) := -DDEBUG subdir-ccflags-$(CONFIG_DMADEVICES_VDEBUG) += -DVERBOSE_DEBUG @@ -12,11 +13,13 @@ obj-$(CONFIG_DMA_OF) += of-dma.o obj-$(CONFIG_DMATEST) += dmatest.o #devices +obj-$(CONFIG_ALTERA_MSGDMA) += altera-msgdma.o obj-$(CONFIG_AMBA_PL08X) += amba-pl08x.o obj-$(CONFIG_AMCC_PPC440SPE_ADMA) += ppc4xx/ obj-$(CONFIG_AT_HDMAC) += at_hdmac.o obj-$(CONFIG_AT_XDMAC) += at_xdmac.o obj-$(CONFIG_AXI_DMAC) += dma-axi-dmac.o +obj-$(CONFIG_BCM_SBA_RAID) += bcm-sba-raid.o obj-$(CONFIG_COH901318) += coh901318.o coh901318_lli.o obj-$(CONFIG_DMA_BCM2835) += bcm2835-dma.o obj-$(CONFIG_DMA_JZ4740) += dma-jz4740.o diff --git a/drivers/dma/altera-msgdma.c b/drivers/dma/altera-msgdma.c new file mode 100644 index 000000000000..55f9c62ee54b --- /dev/null +++ b/drivers/dma/altera-msgdma.c @@ -0,0 +1,934 @@ +/* + * DMA driver for Altera mSGDMA IP core + * + * Copyright (C) 2017 Stefan Roese <sr@denx.de> + * + * Based on drivers/dma/xilinx/zynqmp_dma.c, which is: + * Copyright (C) 2016 Xilinx, Inc. All rights reserved. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + */ + +#include <linux/bitops.h> +#include <linux/delay.h> +#include <linux/dma-mapping.h> +#include <linux/dmapool.h> +#include <linux/init.h> +#include <linux/interrupt.h> +#include <linux/io.h> +#include <linux/iopoll.h> +#include <linux/module.h> +#include <linux/platform_device.h> +#include <linux/slab.h> + +#include "dmaengine.h" + +#define MSGDMA_MAX_TRANS_LEN U32_MAX +#define MSGDMA_DESC_NUM 1024 + +/** + * struct msgdma_extended_desc - implements an extended descriptor + * @read_addr_lo: data buffer source address low bits + * @write_addr_lo: data buffer destination address low bits + * @len: the number of bytes to transfer per descriptor + * @burst_seq_num: bit 31:24 write burst + * bit 23:16 read burst + * bit 15:00 sequence number + * @stride: bit 31:16 write stride + * bit 15:00 read stride + * @read_addr_hi: data buffer source address high bits + * @write_addr_hi: data buffer destination address high bits + * @control: characteristics of the transfer + */ +struct msgdma_extended_desc { + u32 read_addr_lo; + u32 write_addr_lo; + u32 len; + u32 burst_seq_num; + u32 stride; + u32 read_addr_hi; + u32 write_addr_hi; + u32 control; +}; + +/* mSGDMA descriptor control field bit definitions */ +#define MSGDMA_DESC_CTL_SET_CH(x) ((x) & 0xff) +#define MSGDMA_DESC_CTL_GEN_SOP BIT(8) +#define MSGDMA_DESC_CTL_GEN_EOP BIT(9) +#define MSGDMA_DESC_CTL_PARK_READS BIT(10) +#define MSGDMA_DESC_CTL_PARK_WRITES BIT(11) +#define MSGDMA_DESC_CTL_END_ON_EOP BIT(12) +#define MSGDMA_DESC_CTL_END_ON_LEN BIT(13) +#define MSGDMA_DESC_CTL_TR_COMP_IRQ BIT(14) +#define MSGDMA_DESC_CTL_EARLY_IRQ BIT(15) +#define MSGDMA_DESC_CTL_TR_ERR_IRQ GENMASK(23, 16) +#define MSGDMA_DESC_CTL_EARLY_DONE BIT(24) + +/* + * Writing "1" the "go" bit commits the entire descriptor into the + * descriptor FIFO(s) + */ +#define MSGDMA_DESC_CTL_GO BIT(31) + +/* Tx buffer control flags */ +#define MSGDMA_DESC_CTL_TX_FIRST (MSGDMA_DESC_CTL_GEN_SOP | \ + MSGDMA_DESC_CTL_TR_ERR_IRQ | \ + MSGDMA_DESC_CTL_GO) + +#define MSGDMA_DESC_CTL_TX_MIDDLE (MSGDMA_DESC_CTL_TR_ERR_IRQ | \ + MSGDMA_DESC_CTL_GO) + +#define MSGDMA_DESC_CTL_TX_LAST (MSGDMA_DESC_CTL_GEN_EOP | \ + MSGDMA_DESC_CTL_TR_COMP_IRQ | \ + MSGDMA_DESC_CTL_TR_ERR_IRQ | \ + MSGDMA_DESC_CTL_GO) + +#define MSGDMA_DESC_CTL_TX_SINGLE (MSGDMA_DESC_CTL_GEN_SOP | \ + MSGDMA_DESC_CTL_GEN_EOP | \ + MSGDMA_DESC_CTL_TR_COMP_IRQ | \ + MSGDMA_DESC_CTL_TR_ERR_IRQ | \ + MSGDMA_DESC_CTL_GO) + +#define MSGDMA_DESC_CTL_RX_SINGLE (MSGDMA_DESC_CTL_END_ON_EOP | \ + MSGDMA_DESC_CTL_END_ON_LEN | \ + MSGDMA_DESC_CTL_TR_COMP_IRQ | \ + MSGDMA_DESC_CTL_EARLY_IRQ | \ + MSGDMA_DESC_CTL_TR_ERR_IRQ | \ + MSGDMA_DESC_CTL_GO) + +/* mSGDMA extended descriptor stride definitions */ +#define MSGDMA_DESC_STRIDE_RD 0x00000001 +#define MSGDMA_DESC_STRIDE_WR 0x00010000 +#define MSGDMA_DESC_STRIDE_RW 0x00010001 + +/* mSGDMA dispatcher control and status register map */ +#define MSGDMA_CSR_STATUS 0x00 /* Read / Clear */ +#define MSGDMA_CSR_CONTROL 0x04 /* Read / Write */ +#define MSGDMA_CSR_RW_FILL_LEVEL 0x08 /* 31:16 - write fill level */ + /* 15:00 - read fill level */ +#define MSGDMA_CSR_RESP_FILL_LEVEL 0x0c /* response FIFO fill level */ +#define MSGDMA_CSR_RW_SEQ_NUM 0x10 /* 31:16 - write seq number */ + /* 15:00 - read seq number */ + +/* mSGDMA CSR status register bit definitions */ +#define MSGDMA_CSR_STAT_BUSY BIT(0) +#define MSGDMA_CSR_STAT_DESC_BUF_EMPTY BIT(1) +#define MSGDMA_CSR_STAT_DESC_BUF_FULL BIT(2) +#define MSGDMA_CSR_STAT_RESP_BUF_EMPTY BIT(3) +#define MSGDMA_CSR_STAT_RESP_BUF_FULL BIT(4) +#define MSGDMA_CSR_STAT_STOPPED BIT(5) +#define MSGDMA_CSR_STAT_RESETTING BIT(6) +#define MSGDMA_CSR_STAT_STOPPED_ON_ERR BIT(7) +#define MSGDMA_CSR_STAT_STOPPED_ON_EARLY BIT(8) +#define MSGDMA_CSR_STAT_IRQ BIT(9) +#define MSGDMA_CSR_STAT_MASK GENMASK(9, 0) +#define MSGDMA_CSR_STAT_MASK_WITHOUT_IRQ GENMASK(8, 0) + +#define DESC_EMPTY (MSGDMA_CSR_STAT_DESC_BUF_EMPTY | \ + MSGDMA_CSR_STAT_RESP_BUF_EMPTY) + +/* mSGDMA CSR control register bit definitions */ +#define MSGDMA_CSR_CTL_STOP BIT(0) +#define MSGDMA_CSR_CTL_RESET BIT(1) +#define MSGDMA_CSR_CTL_STOP_ON_ERR BIT(2) +#define MSGDMA_CSR_CTL_STOP_ON_EARLY BIT(3) +#define MSGDMA_CSR_CTL_GLOBAL_INTR BIT(4) +#define MSGDMA_CSR_CTL_STOP_DESCS BIT(5) + +/* mSGDMA CSR fill level bits */ +#define MSGDMA_CSR_WR_FILL_LEVEL_GET(v) (((v) & 0xffff0000) >> 16) +#define MSGDMA_CSR_RD_FILL_LEVEL_GET(v) ((v) & 0x0000ffff) +#define MSGDMA_CSR_RESP_FILL_LEVEL_GET(v) ((v) & 0x0000ffff) + +#define MSGDMA_CSR_SEQ_NUM_GET(v) (((v) & 0xffff0000) >> 16) + +/* mSGDMA response register map */ +#define MSGDMA_RESP_BYTES_TRANSFERRED 0x00 +#define MSGDMA_RESP_STATUS 0x04 + +/* mSGDMA response register bit definitions */ +#define MSGDMA_RESP_EARLY_TERM BIT(8) +#define MSGDMA_RESP_ERR_MASK 0xff + +/** + * struct msgdma_sw_desc - implements a sw descriptor + * @async_tx: support for the async_tx api + * @hw_desc: assosiated HW descriptor + * @free_list: node of the free SW descriprots list + */ +struct msgdma_sw_desc { + struct dma_async_tx_descriptor async_tx; + struct msgdma_extended_desc hw_desc; + struct list_head node; + struct list_head tx_list; +}; + +/** + * struct msgdma_device - DMA device structure + */ +struct msgdma_device { + spinlock_t lock; + struct device *dev; + struct tasklet_struct irq_tasklet; + struct list_head pending_list; + struct list_head free_list; + struct list_head active_list; + struct list_head done_list; + u32 desc_free_cnt; + bool idle; + + struct dma_device dmadev; + struct dma_chan dmachan; + dma_addr_t hw_desq; + struct msgdma_sw_desc *sw_desq; + unsigned int npendings; + + struct dma_slave_config slave_cfg; + + int irq; + + /* mSGDMA controller */ + void __iomem *csr; + + /* mSGDMA descriptors */ + void __iomem *desc; + + /* mSGDMA response */ + void __iomem *resp; +}; + +#define to_mdev(chan) container_of(chan, struct msgdma_device, dmachan) +#define tx_to_desc(tx) container_of(tx, struct msgdma_sw_desc, async_tx) + +/** + * msgdma_get_descriptor - Get the sw descriptor from the pool + * @mdev: Pointer to the Altera mSGDMA device structure + * + * Return: The sw descriptor + */ +static struct msgdma_sw_desc *msgdma_get_descriptor(struct msgdma_device *mdev) +{ + struct msgdma_sw_desc *desc; + unsigned long flags; + + spin_lock_irqsave(&mdev->lock, flags); + desc = list_first_entry(&mdev->free_list, struct msgdma_sw_desc, node); + list_del(&desc->node); + spin_unlock_irqrestore(&mdev->lock, flags); + + INIT_LIST_HEAD(&desc->tx_list); + + return desc; +} + +/** + * msgdma_free_descriptor - Issue pending transactions + * @mdev: Pointer to the Altera mSGDMA device structure + * @desc: Transaction descriptor pointer + */ +static void msgdma_free_descriptor(struct msgdma_device *mdev, + struct msgdma_sw_desc *desc) +{ + struct msgdma_sw_desc *child, *next; + + mdev->desc_free_cnt++; + list_add_tail(&desc->node, &mdev->free_list); + list_for_each_entry_safe(child, next, &desc->tx_list, node) { + mdev->desc_free_cnt++; + list_move_tail(&child->node, &mdev->free_list); + } +} + +/** + * msgdma_free_desc_list - Free descriptors list + * @mdev: Pointer to the Altera mSGDMA device structure + * @list: List to parse and delete the descriptor + */ +static void msgdma_free_desc_list(struct msgdma_device *mdev, + struct list_head *list) +{ + struct msgdma_sw_desc *desc, *next; + + list_for_each_entry_safe(desc, next, list, node) + msgdma_free_descriptor(mdev, desc); +} + +/** + * msgdma_desc_config - Configure the descriptor + * @desc: Hw descriptor pointer + * @dst: Destination buffer address + * @src: Source buffer address + * @len: Transfer length + */ +static void msgdma_desc_config(struct msgdma_extended_desc *desc, + dma_addr_t dst, dma_addr_t src, size_t len, + u32 stride) +{ + /* Set lower 32bits of src & dst addresses in the descriptor */ + desc->read_addr_lo = lower_32_bits(src); + desc->write_addr_lo = lower_32_bits(dst); + + /* Set upper 32bits of src & dst addresses in the descriptor */ + desc->read_addr_hi = upper_32_bits(src); + desc->write_addr_hi = upper_32_bits(dst); + + desc->len = len; + desc->stride = stride; + desc->burst_seq_num = 0; /* 0 will result in max burst length */ + + /* + * Don't set interrupt on xfer end yet, this will be done later + * for the "last" descriptor + */ + desc->control = MSGDMA_DESC_CTL_TR_ERR_IRQ | MSGDMA_DESC_CTL_GO | + MSGDMA_DESC_CTL_END_ON_LEN; +} + +/** + * msgdma_desc_config_eod - Mark the descriptor as end descriptor + * @desc: Hw descriptor pointer + */ +static void msgdma_desc_config_eod(struct msgdma_extended_desc *desc) +{ + desc->control |= MSGDMA_DESC_CTL_TR_COMP_IRQ; +} + +/** + * msgdma_tx_submit - Submit DMA transaction + * @tx: Async transaction descriptor pointer + * + * Return: cookie value + */ +static dma_cookie_t msgdma_tx_submit(struct dma_async_tx_descriptor *tx) +{ + struct msgdma_device *mdev = to_mdev(tx->chan); + struct msgdma_sw_desc *new; + dma_cookie_t cookie; + unsigned long flags; + + new = tx_to_desc(tx); + spin_lock_irqsave(&mdev->lock, flags); + cookie = dma_cookie_assign(tx); + + list_add_tail(&new->node, &mdev->pending_list); + spin_unlock_irqrestore(&mdev->lock, flags); + + return cookie; +} + +/** + * msgdma_prep_memcpy - prepare descriptors for memcpy transaction + * @dchan: DMA channel + * @dma_dst: Destination buffer address + * @dma_src: Source buffer address + * @len: Transfer length + * @flags: transfer ack flags + * + * Return: Async transaction descriptor on success and NULL on failure + */ +static struct dma_async_tx_descriptor * +msgdma_prep_memcpy(struct dma_chan *dchan, dma_addr_t dma_dst, + dma_addr_t dma_src, size_t len, ulong flags) +{ + struct msgdma_device *mdev = to_mdev(dchan); + struct msgdma_sw_desc *new, *first = NULL; + struct msgdma_extended_desc *desc; + size_t copy; + u32 desc_cnt; + unsigned long irqflags; + + desc_cnt = DIV_ROUND_UP(len, MSGDMA_MAX_TRANS_LEN); + + spin_lock_irqsave(&mdev->lock, irqflags); + if (desc_cnt > mdev->desc_free_cnt) { + spin_unlock_irqrestore(&mdev->lock, irqflags); + dev_dbg(mdev->dev, "mdev %p descs are not available\n", mdev); + return NULL; + } + mdev->desc_free_cnt -= desc_cnt; + spin_unlock_irqrestore(&mdev->lock, irqflags); + + do { + /* Allocate and populate the descriptor */ + new = msgdma_get_descriptor(mdev); + + copy = min_t(size_t, len, MSGDMA_MAX_TRANS_LEN); + desc = &new->hw_desc; + msgdma_desc_config(desc, dma_dst, dma_src, copy, + MSGDMA_DESC_STRIDE_RW); + len -= copy; + dma_src += copy; + dma_dst += copy; + if (!first) + first = new; + else + list_add_tail(&new->node, &first->tx_list); + } while (len); + + msgdma_desc_config_eod(desc); + async_tx_ack(&first->async_tx); + first->async_tx.flags = flags; + + return &first->async_tx; +} + +/** + * msgdma_prep_slave_sg - prepare descriptors for a slave sg transaction + * + * @dchan: DMA channel + * @sgl: Destination scatter list + * @sg_len: Number of entries in destination scatter list + * @dir: DMA transfer direction + * @flags: transfer ack flags + * @context: transfer context (unused) + */ +static struct dma_async_tx_descriptor * +msgdma_prep_slave_sg(struct dma_chan *dchan, struct scatterlist *sgl, + unsigned int sg_len, enum dma_transfer_direction dir, + unsigned long flags, void *context) + +{ + struct msgdma_device *mdev = to_mdev(dchan); + struct dma_slave_config *cfg = &mdev->slave_cfg; + struct msgdma_sw_desc *new, *first = NULL; + void *desc = NULL; + size_t len, avail; + dma_addr_t dma_dst, dma_src; + u32 desc_cnt = 0, i; + struct scatterlist *sg; + u32 stride; + unsigned long irqflags; + + for_each_sg(sgl, sg, sg_len, i) + desc_cnt += DIV_ROUND_UP(sg_dma_len(sg), MSGDMA_MAX_TRANS_LEN); + + spin_lock_irqsave(&mdev->lock, irqflags); + if (desc_cnt > mdev->desc_free_cnt) { + spin_unlock_irqrestore(&mdev->lock, irqflags); + dev_dbg(mdev->dev, "mdev %p descs are not available\n", mdev); + return NULL; + } + mdev->desc_free_cnt -= desc_cnt; + spin_unlock_irqrestore(&mdev->lock, irqflags); + + avail = sg_dma_len(sgl); + + /* Run until we are out of scatterlist entries */ + while (true) { + /* Allocate and populate the descriptor */ + new = msgdma_get_descriptor(mdev); + + desc = &new->hw_desc; + len = min_t(size_t, avail, MSGDMA_MAX_TRANS_LEN); + + if (dir == DMA_MEM_TO_DEV) { + dma_src = sg_dma_address(sgl) + sg_dma_len(sgl) - avail; + dma_dst = cfg->dst_addr; + stride = MSGDMA_DESC_STRIDE_RD; + } else { + dma_src = cfg->src_addr; + dma_dst = sg_dma_address(sgl) + sg_dma_len(sgl) - avail; + stride = MSGDMA_DESC_STRIDE_WR; + } + msgdma_desc_config(desc, dma_dst, dma_src, len, stride); + avail -= len; + + if (!first) + first = new; + else + list_add_tail(&new->node, &first->tx_list); + + /* Fetch the next scatterlist entry */ + if (avail == 0) { + if (sg_len == 0) + break; + sgl = sg_next(sgl); + if (sgl == NULL) + break; + sg_len--; + avail = sg_dma_len(sgl); + } + } + + msgdma_desc_config_eod(desc); + first->async_tx.flags = flags; + + return &first->async_tx; +} + +static int msgdma_dma_config(struct dma_chan *dchan, + struct dma_slave_config *config) +{ + struct msgdma_device *mdev = to_mdev(dchan); + + memcpy(&mdev->slave_cfg, config, sizeof(*config)); + + return 0; +} + +static void msgdma_reset(struct msgdma_device *mdev) +{ + u32 val; + int ret; + + /* Reset mSGDMA */ + iowrite32(MSGDMA_CSR_STAT_MASK, mdev->csr + MSGDMA_CSR_STATUS); + iowrite32(MSGDMA_CSR_CTL_RESET, mdev->csr + MSGDMA_CSR_CONTROL); + + ret = readl_poll_timeout(mdev->csr + MSGDMA_CSR_STATUS, val, + (val & MSGDMA_CSR_STAT_RESETTING) == 0, + 1, 10000); + if (ret) + dev_err(mdev->dev, "DMA channel did not reset\n"); + + /* Clear all status bits */ + iowrite32(MSGDMA_CSR_STAT_MASK, mdev->csr + MSGDMA_CSR_STATUS); + + /* Enable the DMA controller including interrupts */ + iowrite32(MSGDMA_CSR_CTL_STOP_ON_ERR | MSGDMA_CSR_CTL_STOP_ON_EARLY | + MSGDMA_CSR_CTL_GLOBAL_INTR, mdev->csr + MSGDMA_CSR_CONTROL); + + mdev->idle = true; +}; + +static void msgdma_copy_one(struct msgdma_device *mdev, + struct msgdma_sw_desc *desc) +{ + void __iomem *hw_desc = mdev->desc; + + /* + * Check if the DESC FIFO it not full. If its full, we need to wait + * for at least one entry to become free again + */ + while (ioread32(mdev->csr + MSGDMA_CSR_STATUS) & + MSGDMA_CSR_STAT_DESC_BUF_FULL) + mdelay(1); + + /* + * The descriptor needs to get copied into the descriptor FIFO + * of the DMA controller. The descriptor will get flushed to the + * FIFO, once the last word (control word) is written. Since we + * are not 100% sure that memcpy() writes all word in the "correct" + * oder (address from low to high) on all architectures, we make + * sure this control word is written last by single coding it and + * adding some write-barriers here. + */ + memcpy((void __force *)hw_desc, &desc->hw_desc, + sizeof(desc->hw_desc) - sizeof(u32)); + + /* Write control word last to flush this descriptor into the FIFO */ + mdev->idle = false; + wmb(); + iowrite32(desc->hw_desc.control, hw_desc + + offsetof(struct msgdma_extended_desc, control)); + wmb(); +} + +/** + * msgdma_copy_desc_to_fifo - copy descriptor(s) into controller FIFO + * @mdev: Pointer to the Altera mSGDMA device structure + * @desc: Transaction descriptor pointer + */ +static void msgdma_copy_desc_to_fifo(struct msgdma_device *mdev, + struct msgdma_sw_desc *desc) +{ + struct msgdma_sw_desc *sdesc, *next; + + msgdma_copy_one(mdev, desc); + + list_for_each_entry_safe(sdesc, next, &desc->tx_list, node) + msgdma_copy_one(mdev, sdesc); +} + +/** + * msgdma_start_transfer - Initiate the new transfer + * @mdev: Pointer to the Altera mSGDMA device structure + */ +static void msgdma_start_transfer(struct msgdma_device *mdev) +{ + struct msgdma_sw_desc *desc; + + if (!mdev->idle) + return; + + desc = list_first_entry_or_null(&mdev->pending_list, + struct msgdma_sw_desc, node); + if (!desc) + return; + + list_splice_tail_init(&mdev->pending_list, &mdev->active_list); + msgdma_copy_desc_to_fifo(mdev, desc); +} + +/** + * msgdma_issue_pending - Issue pending transactions + * @chan: DMA channel pointer + */ +static void msgdma_issue_pending(struct dma_chan *chan) +{ + struct msgdma_device *mdev = to_mdev(chan); + unsigned long flags; + + spin_lock_irqsave(&mdev->lock, flags); + msgdma_start_transfer(mdev); + spin_unlock_irqrestore(&mdev->lock, flags); +} + +/** + * msgdma_chan_desc_cleanup - Cleanup the completed descriptors + * @mdev: Pointer to the Altera mSGDMA device structure + */ +static void msgdma_chan_desc_cleanup(struct msgdma_device *mdev) +{ + struct msgdma_sw_desc *desc, *next; + + list_for_each_entry_safe(desc, next, &mdev->done_list, node) { + dma_async_tx_callback callback; + void *callback_param; + + list_del(&desc->node); + + callback = desc->async_tx.callback; + callback_param = desc->async_tx.callback_param; + if (callback) { + spin_unlock(&mdev->lock); + callback(callback_param); + spin_lock(&mdev->lock); + } + + /* Run any dependencies, then free the descriptor */ + msgdma_free_descriptor(mdev, desc); + } +} + +/** + * msgdma_complete_descriptor - Mark the active descriptor as complete + * @mdev: Pointer to the Altera mSGDMA device structure + */ +static void msgdma_complete_descriptor(struct msgdma_device *mdev) +{ + struct msgdma_sw_desc *desc; + + desc = list_first_entry_or_null(&mdev->active_list, + struct msgdma_sw_desc, node); + if (!desc) + return; + list_del(&desc->node); + dma_cookie_complete(&desc->async_tx); + list_add_tail(&desc->node, &mdev->done_list); +} + +/** + * msgdma_free_descriptors - Free channel descriptors + * @mdev: Pointer to the Altera mSGDMA device structure + */ +static void msgdma_free_descriptors(struct msgdma_device *mdev) +{ + msgdma_free_desc_list(mdev, &mdev->active_list); + msgdma_free_desc_list(mdev, &mdev->pending_list); + msgdma_free_desc_list(mdev, &mdev->done_list); +} + +/** + * msgdma_free_chan_resources - Free channel resources + * @dchan: DMA channel pointer + */ +static void msgdma_free_chan_resources(struct dma_chan *dchan) +{ + struct msgdma_device *mdev = to_mdev(dchan); + unsigned long flags; + + spin_lock_irqsave(&mdev->lock, flags); + msgdma_free_descriptors(mdev); + spin_unlock_irqrestore(&mdev->lock, flags); + kfree(mdev->sw_desq); +} + +/** + * msgdma_alloc_chan_resources - Allocate channel resources + * @dchan: DMA channel + * + * Return: Number of descriptors on success and failure value on error + */ +static int msgdma_alloc_chan_resources(struct dma_chan *dchan) +{ + struct msgdma_device *mdev = to_mdev(dchan); + struct msgdma_sw_desc *desc; + int i; + + mdev->sw_desq = kcalloc(MSGDMA_DESC_NUM, sizeof(*desc), GFP_NOWAIT); + if (!mdev->sw_desq) + return -ENOMEM; + + mdev->idle = true; + mdev->desc_free_cnt = MSGDMA_DESC_NUM; + + INIT_LIST_HEAD(&mdev->free_list); + + for (i = 0; i < MSGDMA_DESC_NUM; i++) { + desc = mdev->sw_desq + i; + dma_async_tx_descriptor_init(&desc->async_tx, &mdev->dmachan); + desc->async_tx.tx_submit = msgdma_tx_submit; + list_add_tail(&desc->node, &mdev->free_list); + } + + return MSGDMA_DESC_NUM; +} + +/** + * msgdma_tasklet - Schedule completion tasklet + * @data: Pointer to the Altera sSGDMA channel structure + */ +static void msgdma_tasklet(unsigned long data) +{ + struct msgdma_device *mdev = (struct msgdma_device *)data; + u32 count; + u32 __maybe_unused size; + u32 __maybe_unused status; + unsigned long flags; + + spin_lock_irqsave(&mdev->lock, flags); + + /* Read number of responses that are available */ + count = ioread32(mdev->csr + MSGDMA_CSR_RESP_FILL_LEVEL); + dev_dbg(mdev->dev, "%s (%d): response count=%d\n", + __func__, __LINE__, count); + + while (count--) { + /* + * Read both longwords to purge this response from the FIFO + * On Avalon-MM implementations, size and status do not + * have any real values, like transferred bytes or error + * bits. So we need to just drop these values. + */ + size = ioread32(mdev->resp + MSGDMA_RESP_BYTES_TRANSFERRED); + status = ioread32(mdev->resp + MSGDMA_RESP_STATUS); + + msgdma_complete_descriptor(mdev); + msgdma_chan_desc_cleanup(mdev); + } + + spin_unlock_irqrestore(&mdev->lock, flags); +} + +/** + * msgdma_irq_handler - Altera mSGDMA Interrupt handler + * @irq: IRQ number + * @data: Pointer to the Altera mSGDMA device structure + * + * Return: IRQ_HANDLED/IRQ_NONE + */ +static irqreturn_t msgdma_irq_handler(int irq, void *data) +{ + struct msgdma_device *mdev = data; + u32 status; + + status = ioread32(mdev->csr + MSGDMA_CSR_STATUS); + if ((status & MSGDMA_CSR_STAT_BUSY) == 0) { + /* Start next transfer if the DMA controller is idle */ + spin_lock(&mdev->lock); + mdev->idle = true; + msgdma_start_transfer(mdev); + spin_unlock(&mdev->lock); + } + + tasklet_schedule(&mdev->irq_tasklet); + + /* Clear interrupt in mSGDMA controller */ + iowrite32(MSGDMA_CSR_STAT_IRQ, mdev->csr + MSGDMA_CSR_STATUS); + + return IRQ_HANDLED; +} + +/** + * msgdma_chan_remove - Channel remove function + * @mdev: Pointer to the Altera mSGDMA device structure + */ +static void msgdma_dev_remove(struct msgdma_device *mdev) +{ + if (!mdev) + return; + + devm_free_irq(mdev->dev, mdev->irq, mdev); + tasklet_kill(&mdev->irq_tasklet); + list_del(&mdev->dmachan.device_node); +} + +static int request_and_map(struct platform_device *pdev, const char *name, + struct resource **res, void __iomem **ptr) +{ + struct resource *region; + struct device *device = &pdev->dev; + + *res = platform_get_resource_byname(pdev, IORESOURCE_MEM, name); + if (*res == NULL) { + dev_err(device, "resource %s not defined\n", name); + return -ENODEV; + } + + region = devm_request_mem_region(device, (*res)->start, + resource_size(*res), dev_name(device)); + if (region == NULL) { + dev_err(device, "unable to request %s\n", name); + return -EBUSY; + } + + *ptr = devm_ioremap_nocache(device, region->start, + resource_size(region)); + if (*ptr == NULL) { + dev_err(device, "ioremap_nocache of %s failed!", name); + return -ENOMEM; + } + + return 0; +} + +/** + * msgdma_probe - Driver probe function + * @pdev: Pointer to the platform_device structure + * + * Return: '0' on success and failure value on error + */ +static int msgdma_probe(struct platform_device *pdev) +{ + struct msgdma_device *mdev; + struct dma_device *dma_dev; + struct resource *dma_res; + int ret; + + mdev = devm_kzalloc(&pdev->dev, sizeof(*mdev), GFP_NOWAIT); + if (!mdev) + return -ENOMEM; + + mdev->dev = &pdev->dev; + + /* Map CSR space */ + ret = request_and_map(pdev, "csr", &dma_res, &mdev->csr); + if (ret) + return ret; + + /* Map (extended) descriptor space */ + ret = request_and_map(pdev, "desc", &dma_res, &mdev->desc); + if (ret) + return ret; + + /* Map response space */ + ret = request_and_map(pdev, "resp", &dma_res, &mdev->resp); + if (ret) + return ret; + + platform_set_drvdata(pdev, mdev); + + /* Get interrupt nr from platform data */ + mdev->irq = platform_get_irq(pdev, 0); + if (mdev->irq < 0) + return -ENXIO; + + ret = devm_request_irq(&pdev->dev, mdev->irq, msgdma_irq_handler, + 0, dev_name(&pdev->dev), mdev); + if (ret) + return ret; + + tasklet_init(&mdev->irq_tasklet, msgdma_tasklet, (unsigned long)mdev); + + dma_cookie_init(&mdev->dmachan); + + spin_lock_init(&mdev->lock); + + INIT_LIST_HEAD(&mdev->active_list); + INIT_LIST_HEAD(&mdev->pending_list); + INIT_LIST_HEAD(&mdev->done_list); + INIT_LIST_HEAD(&mdev->free_list); + + dma_dev = &mdev->dmadev; + + /* Set DMA capabilities */ + dma_cap_zero(dma_dev->cap_mask); + dma_cap_set(DMA_MEMCPY, dma_dev->cap_mask); + dma_cap_set(DMA_SLAVE, dma_dev->cap_mask); + + dma_dev->src_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_4_BYTES); + dma_dev->dst_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_4_BYTES); + dma_dev->directions = BIT(DMA_MEM_TO_DEV) | BIT(DMA_DEV_TO_MEM) | + BIT(DMA_MEM_TO_MEM); + dma_dev->residue_granularity = DMA_RESIDUE_GRANULARITY_DESCRIPTOR; + + /* Init DMA link list */ + INIT_LIST_HEAD(&dma_dev->channels); + + /* Set base routines */ + dma_dev->device_tx_status = dma_cookie_status; + dma_dev->device_issue_pending = msgdma_issue_pending; + dma_dev->dev = &pdev->dev; + + dma_dev->copy_align = DMAENGINE_ALIGN_4_BYTES; + dma_dev->device_prep_dma_memcpy = msgdma_prep_memcpy; + dma_dev->device_prep_slave_sg = msgdma_prep_slave_sg; + dma_dev->device_config = msgdma_dma_config; + + dma_dev->device_alloc_chan_resources = msgdma_alloc_chan_resources; + dma_dev->device_free_chan_resources = msgdma_free_chan_resources; + + mdev->dmachan.device = dma_dev; + list_add_tail(&mdev->dmachan.device_node, &dma_dev->channels); + + /* Set DMA mask to 64 bits */ + ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); + if (ret) { + dev_warn(&pdev->dev, "unable to set coherent mask to 64"); + ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); + if (ret) + goto fail; + } + + msgdma_reset(mdev); + + ret = dma_async_device_register(dma_dev); + if (ret) + goto fail; + + dev_notice(&pdev->dev, "Altera mSGDMA driver probe success\n"); + + return 0; + +fail: + msgdma_dev_remove(mdev); + + return ret; +} + +/** + * msgdma_dma_remove - Driver remove function + * @pdev: Pointer to the platform_device structure + * + * Return: Always '0' + */ +static int msgdma_remove(struct platform_device *pdev) +{ + struct msgdma_device *mdev = platform_get_drvdata(pdev); + + dma_async_device_unregister(&mdev->dmadev); + msgdma_dev_remove(mdev); + + dev_notice(&pdev->dev, "Altera mSGDMA driver removed\n"); + + return 0; +} + +static struct platform_driver msgdma_driver = { + .driver = { + .name = "altera-msgdma", + }, + .probe = msgdma_probe, + .remove = msgdma_remove, +}; + +module_platform_driver(msgdma_driver); + +MODULE_ALIAS("platform:altera-msgdma"); +MODULE_DESCRIPTION("Altera mSGDMA driver"); +MODULE_AUTHOR("Stefan Roese <sr@denx.de>"); +MODULE_LICENSE("GPL"); diff --git a/drivers/dma/amba-pl08x.c b/drivers/dma/amba-pl08x.c index 6bb8813ca275..b52b0d55247e 100644 --- a/drivers/dma/amba-pl08x.c +++ b/drivers/dma/amba-pl08x.c @@ -1,9 +1,10 @@ /* * Copyright (c) 2006 ARM Ltd. * Copyright (c) 2010 ST-Ericsson SA + * Copyirght (c) 2017 Linaro Ltd. * * Author: Peter Pearse <peter.pearse@arm.com> - * Author: Linus Walleij <linus.walleij@stericsson.com> + * Author: Linus Walleij <linus.walleij@linaro.org> * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the Free @@ -110,11 +111,12 @@ struct pl08x_driver_data; * @channels: the number of channels available in this variant * @signals: the number of request signals available from the hardware * @dualmaster: whether this version supports dual AHB masters or not. - * @nomadik: whether the channels have Nomadik security extension bits - * that need to be checked for permission before use and some registers are - * missing - * @pl080s: whether this version is a PL080S, which has separate register and - * LLI word for transfer size. + * @nomadik: whether this variant is a ST Microelectronics Nomadik, where the + * channels have Nomadik security extension bits that need to be checked + * for permission before use and some registers are missing + * @pl080s: whether this variant is a Samsung PL080S, which has separate + * register and LLI word for transfer size. + * @ftdmac020: whether this variant is a Faraday Technology FTDMAC020 * @max_transfer_size: the maximum single element transfer size for this * PL08x variant. */ @@ -125,6 +127,7 @@ struct vendor_data { bool dualmaster; bool nomadik; bool pl080s; + bool ftdmac020; u32 max_transfer_size; }; @@ -148,19 +151,34 @@ struct pl08x_bus_data { * @id: physical index to this channel * @base: memory base address for this physical channel * @reg_config: configuration address for this physical channel + * @reg_control: control address for this physical channel + * @reg_src: transfer source address register + * @reg_dst: transfer destination address register + * @reg_lli: transfer LLI address register + * @reg_busy: if the variant has a special per-channel busy register, + * this contains a pointer to it * @lock: a lock to use when altering an instance of this struct * @serving: the virtual channel currently being served by this physical * channel * @locked: channel unavailable for the system, e.g. dedicated to secure * world + * @ftdmac020: channel is on a FTDMAC020 + * @pl080s: channel is on a PL08s */ struct pl08x_phy_chan { unsigned int id; void __iomem *base; void __iomem *reg_config; + void __iomem *reg_control; + void __iomem *reg_src; + void __iomem *reg_dst; + void __iomem *reg_lli; + void __iomem *reg_busy; spinlock_t lock; struct pl08x_dma_chan *serving; bool locked; + bool ftdmac020; + bool pl080s; }; /** @@ -253,8 +271,9 @@ struct pl08x_dma_chan { /** * struct pl08x_driver_data - the local state holder for the PL08x - * @slave: slave engine for this instance + * @slave: optional slave engine for this instance * @memcpy: memcpy engine for this instance + * @has_slave: the PL08x has a slave engine (routed signals) * @base: virtual memory base (remapped) for the PL08x * @adev: the corresponding AMBA (PrimeCell) bus entry * @vd: vendor data for this PL08x variant @@ -269,6 +288,7 @@ struct pl08x_dma_chan { struct pl08x_driver_data { struct dma_device slave; struct dma_device memcpy; + bool has_slave; void __iomem *base; struct amba_device *adev; const struct vendor_data *vd; @@ -360,10 +380,24 @@ static int pl08x_phy_channel_busy(struct pl08x_phy_chan *ch) { unsigned int val; + /* If we have a special busy register, take a shortcut */ + if (ch->reg_busy) { + val = readl(ch->reg_busy); + return !!(val & BIT(ch->id)); + } val = readl(ch->reg_config); return val & PL080_CONFIG_ACTIVE; } +/* + * pl08x_write_lli() - Write an LLI into the DMA controller. + * + * The PL08x derivatives support linked lists, but the first item of the + * list containing the source, destination, control word and next LLI is + * ignored. Instead the driver has to write those values directly into the + * SRC, DST, LLI and control registers. On FTDMAC020 also the SIZE + * register need to be set up for the first transfer. + */ static void pl08x_write_lli(struct pl08x_driver_data *pl08x, struct pl08x_phy_chan *phychan, const u32 *lli, u32 ccfg) { @@ -381,11 +415,112 @@ static void pl08x_write_lli(struct pl08x_driver_data *pl08x, phychan->id, lli[PL080_LLI_SRC], lli[PL080_LLI_DST], lli[PL080_LLI_LLI], lli[PL080_LLI_CCTL], ccfg); - writel_relaxed(lli[PL080_LLI_SRC], phychan->base + PL080_CH_SRC_ADDR); - writel_relaxed(lli[PL080_LLI_DST], phychan->base + PL080_CH_DST_ADDR); - writel_relaxed(lli[PL080_LLI_LLI], phychan->base + PL080_CH_LLI); - writel_relaxed(lli[PL080_LLI_CCTL], phychan->base + PL080_CH_CONTROL); + writel_relaxed(lli[PL080_LLI_SRC], phychan->reg_src); + writel_relaxed(lli[PL080_LLI_DST], phychan->reg_dst); + writel_relaxed(lli[PL080_LLI_LLI], phychan->reg_lli); + + /* + * The FTMAC020 has a different layout in the CCTL word of the LLI + * and the CCTL register which is split in CSR and SIZE registers. + * Convert the LLI item CCTL into the proper values to write into + * the CSR and SIZE registers. + */ + if (phychan->ftdmac020) { + u32 llictl = lli[PL080_LLI_CCTL]; + u32 val = 0; + + /* Write the transfer size (12 bits) to the size register */ + writel_relaxed(llictl & FTDMAC020_LLI_TRANSFER_SIZE_MASK, + phychan->base + FTDMAC020_CH_SIZE); + /* + * Then write the control bits 28..16 to the control register + * by shuffleing the bits around to where they are in the + * main register. The mapping is as follows: + * Bit 28: TC_MSK - mask on all except last LLI + * Bit 27..25: SRC_WIDTH + * Bit 24..22: DST_WIDTH + * Bit 21..20: SRCAD_CTRL + * Bit 19..17: DSTAD_CTRL + * Bit 17: SRC_SEL + * Bit 16: DST_SEL + */ + if (llictl & FTDMAC020_LLI_TC_MSK) + val |= FTDMAC020_CH_CSR_TC_MSK; + val |= ((llictl & FTDMAC020_LLI_SRC_WIDTH_MSK) >> + (FTDMAC020_LLI_SRC_WIDTH_SHIFT - + FTDMAC020_CH_CSR_SRC_WIDTH_SHIFT)); + val |= ((llictl & FTDMAC020_LLI_DST_WIDTH_MSK) >> + (FTDMAC020_LLI_DST_WIDTH_SHIFT - + FTDMAC020_CH_CSR_DST_WIDTH_SHIFT)); + val |= ((llictl & FTDMAC020_LLI_SRCAD_CTL_MSK) >> + (FTDMAC020_LLI_SRCAD_CTL_SHIFT - + FTDMAC020_CH_CSR_SRCAD_CTL_SHIFT)); + val |= ((llictl & FTDMAC020_LLI_DSTAD_CTL_MSK) >> + (FTDMAC020_LLI_DSTAD_CTL_SHIFT - + FTDMAC020_CH_CSR_DSTAD_CTL_SHIFT)); + if (llictl & FTDMAC020_LLI_SRC_SEL) + val |= FTDMAC020_CH_CSR_SRC_SEL; + if (llictl & FTDMAC020_LLI_DST_SEL) + val |= FTDMAC020_CH_CSR_DST_SEL; + + /* + * Set up the bits that exist in the CSR but are not + * part the LLI, i.e. only gets written to the control + * register right here. + * + * FIXME: do not just handle memcpy, also handle slave DMA. + */ + switch (pl08x->pd->memcpy_burst_size) { + default: + case PL08X_BURST_SZ_1: + val |= PL080_BSIZE_1 << + FTDMAC020_CH_CSR_SRC_SIZE_SHIFT; + break; + case PL08X_BURST_SZ_4: + val |= PL080_BSIZE_4 << + FTDMAC020_CH_CSR_SRC_SIZE_SHIFT; + break; + case PL08X_BURST_SZ_8: + val |= PL080_BSIZE_8 << + FTDMAC020_CH_CSR_SRC_SIZE_SHIFT; + break; + case PL08X_BURST_SZ_16: + val |= PL080_BSIZE_16 << + FTDMAC020_CH_CSR_SRC_SIZE_SHIFT; + break; + case PL08X_BURST_SZ_32: + val |= PL080_BSIZE_32 << + FTDMAC020_CH_CSR_SRC_SIZE_SHIFT; + break; + case PL08X_BURST_SZ_64: + val |= PL080_BSIZE_64 << + FTDMAC020_CH_CSR_SRC_SIZE_SHIFT; + break; + case PL08X_BURST_SZ_128: + val |= PL080_BSIZE_128 << + FTDMAC020_CH_CSR_SRC_SIZE_SHIFT; + break; + case PL08X_BURST_SZ_256: + val |= PL080_BSIZE_256 << + FTDMAC020_CH_CSR_SRC_SIZE_SHIFT; + break; + } + + /* Protection flags */ + if (pl08x->pd->memcpy_prot_buff) + val |= FTDMAC020_CH_CSR_PROT2; + if (pl08x->pd->memcpy_prot_cache) + val |= FTDMAC020_CH_CSR_PROT3; + /* We are the kernel, so we are in privileged mode */ + val |= FTDMAC020_CH_CSR_PROT1; + + writel_relaxed(val, phychan->reg_control); + } else { + /* Bits are just identical */ + writel_relaxed(lli[PL080_LLI_CCTL], phychan->reg_control); + } + /* Second control word on the PL080s */ if (pl08x->vd->pl080s) writel_relaxed(lli[PL080S_LLI_CCTL2], phychan->base + PL080S_CH_CONTROL2); @@ -423,11 +558,25 @@ static void pl08x_start_next_txd(struct pl08x_dma_chan *plchan) cpu_relax(); /* Do not access config register until channel shows as inactive */ - val = readl(phychan->reg_config); - while ((val & PL080_CONFIG_ACTIVE) || (val & PL080_CONFIG_ENABLE)) + if (phychan->ftdmac020) { + val = readl(phychan->reg_config); + while (val & FTDMAC020_CH_CFG_BUSY) + val = readl(phychan->reg_config); + + val = readl(phychan->reg_control); + while (val & FTDMAC020_CH_CSR_EN) + val = readl(phychan->reg_control); + + writel(val | FTDMAC020_CH_CSR_EN, + phychan->reg_control); + } else { val = readl(phychan->reg_config); + while ((val & PL080_CONFIG_ACTIVE) || + (val & PL080_CONFIG_ENABLE)) + val = readl(phychan->reg_config); - writel(val | PL080_CONFIG_ENABLE, phychan->reg_config); + writel(val | PL080_CONFIG_ENABLE, phychan->reg_config); + } } /* @@ -445,6 +594,14 @@ static void pl08x_pause_phy_chan(struct pl08x_phy_chan *ch) u32 val; int timeout; + if (ch->ftdmac020) { + /* Use the enable bit on the FTDMAC020 */ + val = readl(ch->reg_control); + val &= ~FTDMAC020_CH_CSR_EN; + writel(val, ch->reg_control); + return; + } + /* Set the HALT bit and wait for the FIFO to drain */ val = readl(ch->reg_config); val |= PL080_CONFIG_HALT; @@ -464,6 +621,14 @@ static void pl08x_resume_phy_chan(struct pl08x_phy_chan *ch) { u32 val; + /* Use the enable bit on the FTDMAC020 */ + if (ch->ftdmac020) { + val = readl(ch->reg_control); + val |= FTDMAC020_CH_CSR_EN; + writel(val, ch->reg_control); + return; + } + /* Clear the HALT bit */ val = readl(ch->reg_config); val &= ~PL080_CONFIG_HALT; @@ -479,25 +644,68 @@ static void pl08x_resume_phy_chan(struct pl08x_phy_chan *ch) static void pl08x_terminate_phy_chan(struct pl08x_driver_data *pl08x, struct pl08x_phy_chan *ch) { - u32 val = readl(ch->reg_config); + u32 val; + /* The layout for the FTDMAC020 is different */ + if (ch->ftdmac020) { + /* Disable all interrupts */ + val = readl(ch->reg_config); + val |= (FTDMAC020_CH_CFG_INT_ABT_MASK | + FTDMAC020_CH_CFG_INT_ERR_MASK | + FTDMAC020_CH_CFG_INT_TC_MASK); + writel(val, ch->reg_config); + + /* Abort and disable channel */ + val = readl(ch->reg_control); + val &= ~FTDMAC020_CH_CSR_EN; + val |= FTDMAC020_CH_CSR_ABT; + writel(val, ch->reg_control); + + /* Clear ABT and ERR interrupt flags */ + writel(BIT(ch->id) | BIT(ch->id + 16), + pl08x->base + PL080_ERR_CLEAR); + writel(BIT(ch->id), pl08x->base + PL080_TC_CLEAR); + + return; + } + + val = readl(ch->reg_config); val &= ~(PL080_CONFIG_ENABLE | PL080_CONFIG_ERR_IRQ_MASK | PL080_CONFIG_TC_IRQ_MASK); - writel(val, ch->reg_config); writel(BIT(ch->id), pl08x->base + PL080_ERR_CLEAR); writel(BIT(ch->id), pl08x->base + PL080_TC_CLEAR); } -static inline u32 get_bytes_in_cctl(u32 cctl) +static u32 get_bytes_in_phy_channel(struct pl08x_phy_chan *ch) { - /* The source width defines the number of bytes */ - u32 bytes = cctl & PL080_CONTROL_TRANSFER_SIZE_MASK; + u32 val; + u32 bytes; + + if (ch->ftdmac020) { + bytes = readl(ch->base + FTDMAC020_CH_SIZE); - cctl &= PL080_CONTROL_SWIDTH_MASK; + val = readl(ch->reg_control); + val &= FTDMAC020_CH_CSR_SRC_WIDTH_MSK; + val >>= FTDMAC020_CH_CSR_SRC_WIDTH_SHIFT; + } else if (ch->pl080s) { + val = readl(ch->base + PL080S_CH_CONTROL2); + bytes = val & PL080S_CONTROL_TRANSFER_SIZE_MASK; - switch (cctl >> PL080_CONTROL_SWIDTH_SHIFT) { + val = readl(ch->reg_control); + val &= PL080_CONTROL_SWIDTH_MASK; + val >>= PL080_CONTROL_SWIDTH_SHIFT; + } else { + /* Plain PL08x */ + val = readl(ch->reg_control); + bytes = val & PL080_CONTROL_TRANSFER_SIZE_MASK; + + val &= PL080_CONTROL_SWIDTH_MASK; + val >>= PL080_CONTROL_SWIDTH_SHIFT; + } + + switch (val) { case PL080_WIDTH_8BIT: break; case PL080_WIDTH_16BIT: @@ -510,14 +718,35 @@ static inline u32 get_bytes_in_cctl(u32 cctl) return bytes; } -static inline u32 get_bytes_in_cctl_pl080s(u32 cctl, u32 cctl1) +static u32 get_bytes_in_lli(struct pl08x_phy_chan *ch, const u32 *llis_va) { - /* The source width defines the number of bytes */ - u32 bytes = cctl1 & PL080S_CONTROL_TRANSFER_SIZE_MASK; + u32 val; + u32 bytes; + + if (ch->ftdmac020) { + val = llis_va[PL080_LLI_CCTL]; + bytes = val & FTDMAC020_LLI_TRANSFER_SIZE_MASK; + + val = llis_va[PL080_LLI_CCTL]; + val &= FTDMAC020_LLI_SRC_WIDTH_MSK; + val >>= FTDMAC020_LLI_SRC_WIDTH_SHIFT; + } else if (ch->pl080s) { + val = llis_va[PL080S_LLI_CCTL2]; + bytes = val & PL080S_CONTROL_TRANSFER_SIZE_MASK; + + val = llis_va[PL080_LLI_CCTL]; + val &= PL080_CONTROL_SWIDTH_MASK; + val >>= PL080_CONTROL_SWIDTH_SHIFT; + } else { + /* Plain PL08x */ + val = llis_va[PL080_LLI_CCTL]; + bytes = val & PL080_CONTROL_TRANSFER_SIZE_MASK; - cctl &= PL080_CONTROL_SWIDTH_MASK; + val &= PL080_CONTROL_SWIDTH_MASK; + val >>= PL080_CONTROL_SWIDTH_SHIFT; + } - switch (cctl >> PL080_CONTROL_SWIDTH_SHIFT) { + switch (val) { case PL080_WIDTH_8BIT: break; case PL080_WIDTH_16BIT: @@ -552,15 +781,10 @@ static u32 pl08x_getbytes_chan(struct pl08x_dma_chan *plchan) * Follow the LLIs to get the number of remaining * bytes in the currently active transaction. */ - clli = readl(ch->base + PL080_CH_LLI) & ~PL080_LLI_LM_AHB2; + clli = readl(ch->reg_lli) & ~PL080_LLI_LM_AHB2; /* First get the remaining bytes in the active transfer */ - if (pl08x->vd->pl080s) - bytes = get_bytes_in_cctl_pl080s( - readl(ch->base + PL080_CH_CONTROL), - readl(ch->base + PL080S_CH_CONTROL2)); - else - bytes = get_bytes_in_cctl(readl(ch->base + PL080_CH_CONTROL)); + bytes = get_bytes_in_phy_channel(ch); if (!clli) return bytes; @@ -581,12 +805,7 @@ static u32 pl08x_getbytes_chan(struct pl08x_dma_chan *plchan) llis_va_limit = llis_va + llis_max_words; for (; llis_va < llis_va_limit; llis_va += pl08x->lli_words) { - if (pl08x->vd->pl080s) - bytes += get_bytes_in_cctl_pl080s( - llis_va[PL080_LLI_CCTL], - llis_va[PL080S_LLI_CCTL2]); - else - bytes += get_bytes_in_cctl(llis_va[PL080_LLI_CCTL]); + bytes += get_bytes_in_lli(ch, llis_va); /* * A LLI pointer going backward terminates the LLI list @@ -705,7 +924,7 @@ static void pl08x_phy_free(struct pl08x_dma_chan *plchan) break; } - if (!next) { + if (!next && pl08x->has_slave) { list_for_each_entry(p, &pl08x->slave.channels, vc.chan.device_node) if (p->state == PL08X_CHAN_WAITING) { next = p; @@ -746,9 +965,30 @@ static void pl08x_phy_free(struct pl08x_dma_chan *plchan) * LLI handling */ -static inline unsigned int pl08x_get_bytes_for_cctl(unsigned int coded) +static inline unsigned int +pl08x_get_bytes_for_lli(struct pl08x_driver_data *pl08x, + u32 cctl, + bool source) { - switch (coded) { + u32 val; + + if (pl08x->vd->ftdmac020) { + if (source) + val = (cctl & FTDMAC020_LLI_SRC_WIDTH_MSK) >> + FTDMAC020_LLI_SRC_WIDTH_SHIFT; + else + val = (cctl & FTDMAC020_LLI_DST_WIDTH_MSK) >> + FTDMAC020_LLI_DST_WIDTH_SHIFT; + } else { + if (source) + val = (cctl & PL080_CONTROL_SWIDTH_MASK) >> + PL080_CONTROL_SWIDTH_SHIFT; + else + val = (cctl & PL080_CONTROL_DWIDTH_MASK) >> + PL080_CONTROL_DWIDTH_SHIFT; + } + + switch (val) { case PL080_WIDTH_8BIT: return 1; case PL080_WIDTH_16BIT: @@ -762,49 +1002,106 @@ static inline unsigned int pl08x_get_bytes_for_cctl(unsigned int coded) return 0; } -static inline u32 pl08x_cctl_bits(u32 cctl, u8 srcwidth, u8 dstwidth, - size_t tsize) +static inline u32 pl08x_lli_control_bits(struct pl08x_driver_data *pl08x, + u32 cctl, + u8 srcwidth, u8 dstwidth, + size_t tsize) { u32 retbits = cctl; - /* Remove all src, dst and transfer size bits */ - retbits &= ~PL080_CONTROL_DWIDTH_MASK; - retbits &= ~PL080_CONTROL_SWIDTH_MASK; - retbits &= ~PL080_CONTROL_TRANSFER_SIZE_MASK; + /* + * Remove all src, dst and transfer size bits, then set the + * width and size according to the parameters. The bit offsets + * are different in the FTDMAC020 so we need to accound for this. + */ + if (pl08x->vd->ftdmac020) { + retbits &= ~FTDMAC020_LLI_DST_WIDTH_MSK; + retbits &= ~FTDMAC020_LLI_SRC_WIDTH_MSK; + retbits &= ~FTDMAC020_LLI_TRANSFER_SIZE_MASK; + + switch (srcwidth) { + case 1: + retbits |= PL080_WIDTH_8BIT << + FTDMAC020_LLI_SRC_WIDTH_SHIFT; + break; + case 2: + retbits |= PL080_WIDTH_16BIT << + FTDMAC020_LLI_SRC_WIDTH_SHIFT; + break; + case 4: + retbits |= PL080_WIDTH_32BIT << + FTDMAC020_LLI_SRC_WIDTH_SHIFT; + break; + default: + BUG(); + break; + } - /* Then set the bits according to the parameters */ - switch (srcwidth) { - case 1: - retbits |= PL080_WIDTH_8BIT << PL080_CONTROL_SWIDTH_SHIFT; - break; - case 2: - retbits |= PL080_WIDTH_16BIT << PL080_CONTROL_SWIDTH_SHIFT; - break; - case 4: - retbits |= PL080_WIDTH_32BIT << PL080_CONTROL_SWIDTH_SHIFT; - break; - default: - BUG(); - break; - } + switch (dstwidth) { + case 1: + retbits |= PL080_WIDTH_8BIT << + FTDMAC020_LLI_DST_WIDTH_SHIFT; + break; + case 2: + retbits |= PL080_WIDTH_16BIT << + FTDMAC020_LLI_DST_WIDTH_SHIFT; + break; + case 4: + retbits |= PL080_WIDTH_32BIT << + FTDMAC020_LLI_DST_WIDTH_SHIFT; + break; + default: + BUG(); + break; + } - switch (dstwidth) { - case 1: - retbits |= PL080_WIDTH_8BIT << PL080_CONTROL_DWIDTH_SHIFT; - break; - case 2: - retbits |= PL080_WIDTH_16BIT << PL080_CONTROL_DWIDTH_SHIFT; - break; - case 4: - retbits |= PL080_WIDTH_32BIT << PL080_CONTROL_DWIDTH_SHIFT; - break; - default: - BUG(); - break; + tsize &= FTDMAC020_LLI_TRANSFER_SIZE_MASK; + retbits |= tsize << FTDMAC020_LLI_TRANSFER_SIZE_SHIFT; + } else { + retbits &= ~PL080_CONTROL_DWIDTH_MASK; + retbits &= ~PL080_CONTROL_SWIDTH_MASK; + retbits &= ~PL080_CONTROL_TRANSFER_SIZE_MASK; + + switch (srcwidth) { + case 1: + retbits |= PL080_WIDTH_8BIT << + PL080_CONTROL_SWIDTH_SHIFT; + break; + case 2: + retbits |= PL080_WIDTH_16BIT << + PL080_CONTROL_SWIDTH_SHIFT; + break; + case 4: + retbits |= PL080_WIDTH_32BIT << + PL080_CONTROL_SWIDTH_SHIFT; + break; + default: + BUG(); + break; + } + + switch (dstwidth) { + case 1: + retbits |= PL080_WIDTH_8BIT << + PL080_CONTROL_DWIDTH_SHIFT; + break; + case 2: + retbits |= PL080_WIDTH_16BIT << + PL080_CONTROL_DWIDTH_SHIFT; + break; + case 4: + retbits |= PL080_WIDTH_32BIT << + PL080_CONTROL_DWIDTH_SHIFT; + break; + default: + BUG(); + break; + } + + tsize &= PL080_CONTROL_TRANSFER_SIZE_MASK; + retbits |= tsize << PL080_CONTROL_TRANSFER_SIZE_SHIFT; } - tsize &= PL080_CONTROL_TRANSFER_SIZE_MASK; - retbits |= tsize << PL080_CONTROL_TRANSFER_SIZE_SHIFT; return retbits; } @@ -825,13 +1122,35 @@ struct pl08x_lli_build_data { * - prefers the destination bus if both available * - prefers bus with fixed address (i.e. peripheral) */ -static void pl08x_choose_master_bus(struct pl08x_lli_build_data *bd, - struct pl08x_bus_data **mbus, struct pl08x_bus_data **sbus, u32 cctl) +static void pl08x_choose_master_bus(struct pl08x_driver_data *pl08x, + struct pl08x_lli_build_data *bd, + struct pl08x_bus_data **mbus, + struct pl08x_bus_data **sbus, + u32 cctl) { - if (!(cctl & PL080_CONTROL_DST_INCR)) { + bool dst_incr; + bool src_incr; + + /* + * The FTDMAC020 only supports memory-to-memory transfer, so + * source and destination always increase. + */ + if (pl08x->vd->ftdmac020) { + dst_incr = true; + src_incr = true; + } else { + dst_incr = !!(cctl & PL080_CONTROL_DST_INCR); + src_incr = !!(cctl & PL080_CONTROL_SRC_INCR); + } + + /* + * If either bus is not advancing, i.e. it is a peripheral, that + * one becomes master + */ + if (!dst_incr) { *mbus = &bd->dstbus; *sbus = &bd->srcbus; - } else if (!(cctl & PL080_CONTROL_SRC_INCR)) { + } else if (!src_incr) { *mbus = &bd->srcbus; *sbus = &bd->dstbus; } else { @@ -869,10 +1188,16 @@ static void pl08x_fill_lli_for_desc(struct pl08x_driver_data *pl08x, if (pl08x->vd->pl080s) llis_va[PL080S_LLI_CCTL2] = cctl2; - if (cctl & PL080_CONTROL_SRC_INCR) + if (pl08x->vd->ftdmac020) { + /* FIXME: only memcpy so far so both increase */ bd->srcbus.addr += len; - if (cctl & PL080_CONTROL_DST_INCR) bd->dstbus.addr += len; + } else { + if (cctl & PL080_CONTROL_SRC_INCR) + bd->srcbus.addr += len; + if (cctl & PL080_CONTROL_DST_INCR) + bd->dstbus.addr += len; + } BUG_ON(bd->remainder < len); @@ -883,12 +1208,12 @@ static inline void prep_byte_width_lli(struct pl08x_driver_data *pl08x, struct pl08x_lli_build_data *bd, u32 *cctl, u32 len, int num_llis, size_t *total_bytes) { - *cctl = pl08x_cctl_bits(*cctl, 1, 1, len); + *cctl = pl08x_lli_control_bits(pl08x, *cctl, 1, 1, len); pl08x_fill_lli_for_desc(pl08x, bd, num_llis, len, *cctl, len); (*total_bytes) += len; } -#ifdef VERBOSE_DEBUG +#if 1 static void pl08x_dump_lli(struct pl08x_driver_data *pl08x, const u32 *llis_va, int num_llis) { @@ -953,14 +1278,10 @@ static int pl08x_fill_llis_for_desc(struct pl08x_driver_data *pl08x, cctl = txd->cctl; /* Find maximum width of the source bus */ - bd.srcbus.maxwidth = - pl08x_get_bytes_for_cctl((cctl & PL080_CONTROL_SWIDTH_MASK) >> - PL080_CONTROL_SWIDTH_SHIFT); + bd.srcbus.maxwidth = pl08x_get_bytes_for_lli(pl08x, cctl, true); /* Find maximum width of the destination bus */ - bd.dstbus.maxwidth = - pl08x_get_bytes_for_cctl((cctl & PL080_CONTROL_DWIDTH_MASK) >> - PL080_CONTROL_DWIDTH_SHIFT); + bd.dstbus.maxwidth = pl08x_get_bytes_for_lli(pl08x, cctl, false); list_for_each_entry(dsg, &txd->dsg_list, node) { total_bytes = 0; @@ -972,7 +1293,7 @@ static int pl08x_fill_llis_for_desc(struct pl08x_driver_data *pl08x, bd.srcbus.buswidth = bd.srcbus.maxwidth; bd.dstbus.buswidth = bd.dstbus.maxwidth; - pl08x_choose_master_bus(&bd, &mbus, &sbus, cctl); + pl08x_choose_master_bus(pl08x, &bd, &mbus, &sbus, cctl); dev_vdbg(&pl08x->adev->dev, "src=0x%08llx%s/%u dst=0x%08llx%s/%u len=%zu\n", @@ -1009,8 +1330,14 @@ static int pl08x_fill_llis_for_desc(struct pl08x_driver_data *pl08x, * supported. Thus, we can't have scattered addresses. */ if (!bd.remainder) { - u32 fc = (txd->ccfg & PL080_CONFIG_FLOW_CONTROL_MASK) >> - PL080_CONFIG_FLOW_CONTROL_SHIFT; + u32 fc; + + /* FTDMAC020 only does memory-to-memory */ + if (pl08x->vd->ftdmac020) + fc = PL080_FLOW_MEM2MEM; + else + fc = (txd->ccfg & PL080_CONFIG_FLOW_CONTROL_MASK) >> + PL080_CONFIG_FLOW_CONTROL_SHIFT; if (!((fc >= PL080_FLOW_SRC2DST_DST) && (fc <= PL080_FLOW_SRC2DST_SRC))) { dev_err(&pl08x->adev->dev, "%s sg len can't be zero", @@ -1027,8 +1354,9 @@ static int pl08x_fill_llis_for_desc(struct pl08x_driver_data *pl08x, return 0; } - cctl = pl08x_cctl_bits(cctl, bd.srcbus.buswidth, - bd.dstbus.buswidth, 0); + cctl = pl08x_lli_control_bits(pl08x, cctl, + bd.srcbus.buswidth, bd.dstbus.buswidth, + 0); pl08x_fill_lli_for_desc(pl08x, &bd, num_llis++, 0, cctl, 0); break; @@ -1107,8 +1435,9 @@ static int pl08x_fill_llis_for_desc(struct pl08x_driver_data *pl08x, "size 0x%08zx (remainder 0x%08zx)\n", __func__, lli_len, bd.remainder); - cctl = pl08x_cctl_bits(cctl, bd.srcbus.buswidth, - bd.dstbus.buswidth, tsize); + cctl = pl08x_lli_control_bits(pl08x, cctl, + bd.srcbus.buswidth, bd.dstbus.buswidth, + tsize); pl08x_fill_lli_for_desc(pl08x, &bd, num_llis++, lli_len, cctl, tsize); total_bytes += lli_len; @@ -1151,7 +1480,10 @@ static int pl08x_fill_llis_for_desc(struct pl08x_driver_data *pl08x, /* The final LLI terminates the LLI. */ last_lli[PL080_LLI_LLI] = 0; /* The final LLI element shall also fire an interrupt. */ - last_lli[PL080_LLI_CCTL] |= PL080_CONTROL_TC_IRQ_EN; + if (pl08x->vd->ftdmac020) + last_lli[PL080_LLI_CCTL] &= ~FTDMAC020_LLI_TC_MSK; + else + last_lli[PL080_LLI_CCTL] |= PL080_CONTROL_TC_IRQ_EN; } pl08x_dump_lli(pl08x, llis_va, num_llis); @@ -1317,14 +1649,25 @@ static const struct burst_table burst_sizes[] = { * will be routed to each port. We try to have source and destination * on separate ports, but always respect the allowable settings. */ -static u32 pl08x_select_bus(u8 src, u8 dst) +static u32 pl08x_select_bus(bool ftdmac020, u8 src, u8 dst) { u32 cctl = 0; + u32 dst_ahb2; + u32 src_ahb2; + + /* The FTDMAC020 use different bits to indicate src/dst bus */ + if (ftdmac020) { + dst_ahb2 = FTDMAC020_LLI_DST_SEL; + src_ahb2 = FTDMAC020_LLI_SRC_SEL; + } else { + dst_ahb2 = PL080_CONTROL_DST_AHB2; + src_ahb2 = PL080_CONTROL_SRC_AHB2; + } if (!(dst & PL08X_AHB1) || ((dst & PL08X_AHB2) && (src & PL08X_AHB1))) - cctl |= PL080_CONTROL_DST_AHB2; + cctl |= dst_ahb2; if (!(src & PL08X_AHB1) || ((src & PL08X_AHB2) && !(dst & PL08X_AHB2))) - cctl |= PL080_CONTROL_SRC_AHB2; + cctl |= src_ahb2; return cctl; } @@ -1412,14 +1755,134 @@ static struct pl08x_txd *pl08x_get_txd(struct pl08x_dma_chan *plchan) { struct pl08x_txd *txd = kzalloc(sizeof(*txd), GFP_NOWAIT); - if (txd) { + if (txd) INIT_LIST_HEAD(&txd->dsg_list); + return txd; +} - /* Always enable error and terminal interrupts */ - txd->ccfg = PL080_CONFIG_ERR_IRQ_MASK | - PL080_CONFIG_TC_IRQ_MASK; +static u32 pl08x_memcpy_cctl(struct pl08x_driver_data *pl08x) +{ + u32 cctl = 0; + + /* Conjure cctl */ + switch (pl08x->pd->memcpy_burst_size) { + default: + dev_err(&pl08x->adev->dev, + "illegal burst size for memcpy, set to 1\n"); + /* Fall through */ + case PL08X_BURST_SZ_1: + cctl |= PL080_BSIZE_1 << PL080_CONTROL_SB_SIZE_SHIFT | + PL080_BSIZE_1 << PL080_CONTROL_DB_SIZE_SHIFT; + break; + case PL08X_BURST_SZ_4: + cctl |= PL080_BSIZE_4 << PL080_CONTROL_SB_SIZE_SHIFT | + PL080_BSIZE_4 << PL080_CONTROL_DB_SIZE_SHIFT; + break; + case PL08X_BURST_SZ_8: + cctl |= PL080_BSIZE_8 << PL080_CONTROL_SB_SIZE_SHIFT | + PL080_BSIZE_8 << PL080_CONTROL_DB_SIZE_SHIFT; + break; + case PL08X_BURST_SZ_16: + cctl |= PL080_BSIZE_16 << PL080_CONTROL_SB_SIZE_SHIFT | + PL080_BSIZE_16 << PL080_CONTROL_DB_SIZE_SHIFT; + break; + case PL08X_BURST_SZ_32: + cctl |= PL080_BSIZE_32 << PL080_CONTROL_SB_SIZE_SHIFT | + PL080_BSIZE_32 << PL080_CONTROL_DB_SIZE_SHIFT; + break; + case PL08X_BURST_SZ_64: + cctl |= PL080_BSIZE_64 << PL080_CONTROL_SB_SIZE_SHIFT | + PL080_BSIZE_64 << PL080_CONTROL_DB_SIZE_SHIFT; + break; + case PL08X_BURST_SZ_128: + cctl |= PL080_BSIZE_128 << PL080_CONTROL_SB_SIZE_SHIFT | + PL080_BSIZE_128 << PL080_CONTROL_DB_SIZE_SHIFT; + break; + case PL08X_BURST_SZ_256: + cctl |= PL080_BSIZE_256 << PL080_CONTROL_SB_SIZE_SHIFT | + PL080_BSIZE_256 << PL080_CONTROL_DB_SIZE_SHIFT; + break; } - return txd; + + switch (pl08x->pd->memcpy_bus_width) { + default: + dev_err(&pl08x->adev->dev, + "illegal bus width for memcpy, set to 8 bits\n"); + /* Fall through */ + case PL08X_BUS_WIDTH_8_BITS: + cctl |= PL080_WIDTH_8BIT << PL080_CONTROL_SWIDTH_SHIFT | + PL080_WIDTH_8BIT << PL080_CONTROL_DWIDTH_SHIFT; + break; + case PL08X_BUS_WIDTH_16_BITS: + cctl |= PL080_WIDTH_16BIT << PL080_CONTROL_SWIDTH_SHIFT | + PL080_WIDTH_16BIT << PL080_CONTROL_DWIDTH_SHIFT; + break; + case PL08X_BUS_WIDTH_32_BITS: + cctl |= PL080_WIDTH_32BIT << PL080_CONTROL_SWIDTH_SHIFT | + PL080_WIDTH_32BIT << PL080_CONTROL_DWIDTH_SHIFT; + break; + } + + /* Protection flags */ + if (pl08x->pd->memcpy_prot_buff) + cctl |= PL080_CONTROL_PROT_BUFF; + if (pl08x->pd->memcpy_prot_cache) + cctl |= PL080_CONTROL_PROT_CACHE; + + /* We are the kernel, so we are in privileged mode */ + cctl |= PL080_CONTROL_PROT_SYS; + + /* Both to be incremented or the code will break */ + cctl |= PL080_CONTROL_SRC_INCR | PL080_CONTROL_DST_INCR; + + if (pl08x->vd->dualmaster) + cctl |= pl08x_select_bus(false, + pl08x->mem_buses, + pl08x->mem_buses); + + return cctl; +} + +static u32 pl08x_ftdmac020_memcpy_cctl(struct pl08x_driver_data *pl08x) +{ + u32 cctl = 0; + + /* Conjure cctl */ + switch (pl08x->pd->memcpy_bus_width) { + default: + dev_err(&pl08x->adev->dev, + "illegal bus width for memcpy, set to 8 bits\n"); + /* Fall through */ + case PL08X_BUS_WIDTH_8_BITS: + cctl |= PL080_WIDTH_8BIT << FTDMAC020_LLI_SRC_WIDTH_SHIFT | + PL080_WIDTH_8BIT << FTDMAC020_LLI_DST_WIDTH_SHIFT; + break; + case PL08X_BUS_WIDTH_16_BITS: + cctl |= PL080_WIDTH_16BIT << FTDMAC020_LLI_SRC_WIDTH_SHIFT | + PL080_WIDTH_16BIT << FTDMAC020_LLI_DST_WIDTH_SHIFT; + break; + case PL08X_BUS_WIDTH_32_BITS: + cctl |= PL080_WIDTH_32BIT << FTDMAC020_LLI_SRC_WIDTH_SHIFT | + PL080_WIDTH_32BIT << FTDMAC020_LLI_DST_WIDTH_SHIFT; + break; + } + + /* + * By default mask the TC IRQ on all LLIs, it will be unmasked on + * the last LLI item by other code. + */ + cctl |= FTDMAC020_LLI_TC_MSK; + + /* + * Both to be incremented so leave bits FTDMAC020_LLI_SRCAD_CTL + * and FTDMAC020_LLI_DSTAD_CTL as zero + */ + if (pl08x->vd->dualmaster) + cctl |= pl08x_select_bus(true, + pl08x->mem_buses, + pl08x->mem_buses); + + return cctl; } /* @@ -1452,18 +1915,16 @@ static struct dma_async_tx_descriptor *pl08x_prep_dma_memcpy( dsg->src_addr = src; dsg->dst_addr = dest; dsg->len = len; - - /* Set platform data for m2m */ - txd->ccfg |= PL080_FLOW_MEM2MEM << PL080_CONFIG_FLOW_CONTROL_SHIFT; - txd->cctl = pl08x->pd->memcpy_channel.cctl_memcpy & - ~(PL080_CONTROL_DST_AHB2 | PL080_CONTROL_SRC_AHB2); - - /* Both to be incremented or the code will break */ - txd->cctl |= PL080_CONTROL_SRC_INCR | PL080_CONTROL_DST_INCR; - - if (pl08x->vd->dualmaster) - txd->cctl |= pl08x_select_bus(pl08x->mem_buses, - pl08x->mem_buses); + if (pl08x->vd->ftdmac020) { + /* Writing CCFG zero ENABLES all interrupts */ + txd->ccfg = 0; + txd->cctl = pl08x_ftdmac020_memcpy_cctl(pl08x); + } else { + txd->ccfg = PL080_CONFIG_ERR_IRQ_MASK | + PL080_CONFIG_TC_IRQ_MASK | + PL080_FLOW_MEM2MEM << PL080_CONFIG_FLOW_CONTROL_SHIFT; + txd->cctl = pl08x_memcpy_cctl(pl08x); + } ret = pl08x_fill_llis_for_desc(plchan->host, txd); if (!ret) { @@ -1527,7 +1988,7 @@ static struct pl08x_txd *pl08x_init_txd( return NULL; } - txd->cctl = cctl | pl08x_select_bus(src_buses, dst_buses); + txd->cctl = cctl | pl08x_select_bus(false, src_buses, dst_buses); if (plchan->cfg.device_fc) tmp = (direction == DMA_MEM_TO_DEV) ? PL080_FLOW_MEM2PER_PER : @@ -1536,7 +1997,9 @@ static struct pl08x_txd *pl08x_init_txd( tmp = (direction == DMA_MEM_TO_DEV) ? PL080_FLOW_MEM2PER : PL080_FLOW_PER2MEM; - txd->ccfg |= tmp << PL080_CONFIG_FLOW_CONTROL_SHIFT; + txd->ccfg = PL080_CONFIG_ERR_IRQ_MASK | + PL080_CONFIG_TC_IRQ_MASK | + tmp << PL080_CONFIG_FLOW_CONTROL_SHIFT; ret = pl08x_request_mux(plchan); if (ret < 0) { @@ -1813,6 +2276,11 @@ static void pl08x_ensure_on(struct pl08x_driver_data *pl08x) /* The Nomadik variant does not have the config register */ if (pl08x->vd->nomadik) return; + /* The FTDMAC020 variant does this in another register */ + if (pl08x->vd->ftdmac020) { + writel(PL080_CONFIG_ENABLE, pl08x->base + FTDMAC020_CSR); + return; + } writel(PL080_CONFIG_ENABLE, pl08x->base + PL080_CONFIG); } @@ -1925,9 +2393,16 @@ static int pl08x_dma_init_virtual_channels(struct pl08x_driver_data *pl08x, chan->signal = i; pl08x_dma_slave_init(chan); } else { - chan->cd = &pl08x->pd->memcpy_channel; + chan->cd = kzalloc(sizeof(*chan->cd), GFP_KERNEL); + if (!chan->cd) { + kfree(chan); + return -ENOMEM; + } + chan->cd->bus_id = "memcpy"; + chan->cd->periph_buses = pl08x->pd->mem_buses; chan->name = kasprintf(GFP_KERNEL, "memcpy%d", i); if (!chan->name) { + kfree(chan->cd); kfree(chan); return -ENOMEM; } @@ -2009,12 +2484,15 @@ static int pl08x_debugfs_show(struct seq_file *s, void *data) pl08x_state_str(chan->state)); } - seq_printf(s, "\nPL08x virtual slave channels:\n"); - seq_printf(s, "CHANNEL:\tSTATE:\n"); - seq_printf(s, "--------\t------\n"); - list_for_each_entry(chan, &pl08x->slave.channels, vc.chan.device_node) { - seq_printf(s, "%s\t\t%s\n", chan->name, - pl08x_state_str(chan->state)); + if (pl08x->has_slave) { + seq_printf(s, "\nPL08x virtual slave channels:\n"); + seq_printf(s, "CHANNEL:\tSTATE:\n"); + seq_printf(s, "--------\t------\n"); + list_for_each_entry(chan, &pl08x->slave.channels, + vc.chan.device_node) { + seq_printf(s, "%s\t\t%s\n", chan->name, + pl08x_state_str(chan->state)); + } } return 0; @@ -2052,6 +2530,10 @@ static struct dma_chan *pl08x_find_chan_id(struct pl08x_driver_data *pl08x, { struct pl08x_dma_chan *chan; + /* Trying to get a slave channel from something with no slave support */ + if (!pl08x->has_slave) + return NULL; + list_for_each_entry(chan, &pl08x->slave.channels, vc.chan.device_node) { if (chan->signal == id) return &chan->vc.chan; @@ -2099,7 +2581,6 @@ static int pl08x_of_probe(struct amba_device *adev, { struct pl08x_platform_data *pd; struct pl08x_channel_data *chanp = NULL; - u32 cctl_memcpy = 0; u32 val; int ret; int i; @@ -2139,36 +2620,28 @@ static int pl08x_of_probe(struct amba_device *adev, dev_err(&adev->dev, "illegal burst size for memcpy, set to 1\n"); /* Fall through */ case 1: - cctl_memcpy |= PL080_BSIZE_1 << PL080_CONTROL_SB_SIZE_SHIFT | - PL080_BSIZE_1 << PL080_CONTROL_DB_SIZE_SHIFT; + pd->memcpy_burst_size = PL08X_BURST_SZ_1; break; case 4: - cctl_memcpy |= PL080_BSIZE_4 << PL080_CONTROL_SB_SIZE_SHIFT | - PL080_BSIZE_4 << PL080_CONTROL_DB_SIZE_SHIFT; + pd->memcpy_burst_size = PL08X_BURST_SZ_4; break; case 8: - cctl_memcpy |= PL080_BSIZE_8 << PL080_CONTROL_SB_SIZE_SHIFT | - PL080_BSIZE_8 << PL080_CONTROL_DB_SIZE_SHIFT; + pd->memcpy_burst_size = PL08X_BURST_SZ_8; break; case 16: - cctl_memcpy |= PL080_BSIZE_16 << PL080_CONTROL_SB_SIZE_SHIFT | - PL080_BSIZE_16 << PL080_CONTROL_DB_SIZE_SHIFT; + pd->memcpy_burst_size = PL08X_BURST_SZ_16; break; case 32: - cctl_memcpy |= PL080_BSIZE_32 << PL080_CONTROL_SB_SIZE_SHIFT | - PL080_BSIZE_32 << PL080_CONTROL_DB_SIZE_SHIFT; + pd->memcpy_burst_size = PL08X_BURST_SZ_32; break; case 64: - cctl_memcpy |= PL080_BSIZE_64 << PL080_CONTROL_SB_SIZE_SHIFT | - PL080_BSIZE_64 << PL080_CONTROL_DB_SIZE_SHIFT; + pd->memcpy_burst_size = PL08X_BURST_SZ_64; break; case 128: - cctl_memcpy |= PL080_BSIZE_128 << PL080_CONTROL_SB_SIZE_SHIFT | - PL080_BSIZE_128 << PL080_CONTROL_DB_SIZE_SHIFT; + pd->memcpy_burst_size = PL08X_BURST_SZ_128; break; case 256: - cctl_memcpy |= PL080_BSIZE_256 << PL080_CONTROL_SB_SIZE_SHIFT | - PL080_BSIZE_256 << PL080_CONTROL_DB_SIZE_SHIFT; + pd->memcpy_burst_size = PL08X_BURST_SZ_256; break; } @@ -2182,48 +2655,40 @@ static int pl08x_of_probe(struct amba_device *adev, dev_err(&adev->dev, "illegal bus width for memcpy, set to 8 bits\n"); /* Fall through */ case 8: - cctl_memcpy |= PL080_WIDTH_8BIT << PL080_CONTROL_SWIDTH_SHIFT | - PL080_WIDTH_8BIT << PL080_CONTROL_DWIDTH_SHIFT; + pd->memcpy_bus_width = PL08X_BUS_WIDTH_8_BITS; break; case 16: - cctl_memcpy |= PL080_WIDTH_16BIT << PL080_CONTROL_SWIDTH_SHIFT | - PL080_WIDTH_16BIT << PL080_CONTROL_DWIDTH_SHIFT; + pd->memcpy_bus_width = PL08X_BUS_WIDTH_16_BITS; break; case 32: - cctl_memcpy |= PL080_WIDTH_32BIT << PL080_CONTROL_SWIDTH_SHIFT | - PL080_WIDTH_32BIT << PL080_CONTROL_DWIDTH_SHIFT; + pd->memcpy_bus_width = PL08X_BUS_WIDTH_32_BITS; break; } - /* This is currently the only thing making sense */ - cctl_memcpy |= PL080_CONTROL_PROT_SYS; - - /* Set up memcpy channel */ - pd->memcpy_channel.bus_id = "memcpy"; - pd->memcpy_channel.cctl_memcpy = cctl_memcpy; - /* Use the buses that can access memory, obviously */ - pd->memcpy_channel.periph_buses = pd->mem_buses; - /* * Allocate channel data for all possible slave channels (one * for each possible signal), channels will then be allocated * for a device and have it's AHB interfaces set up at * translation time. */ - chanp = devm_kcalloc(&adev->dev, - pl08x->vd->signals, - sizeof(struct pl08x_channel_data), - GFP_KERNEL); - if (!chanp) - return -ENOMEM; + if (pl08x->vd->signals) { + chanp = devm_kcalloc(&adev->dev, + pl08x->vd->signals, + sizeof(struct pl08x_channel_data), + GFP_KERNEL); + if (!chanp) + return -ENOMEM; - pd->slave_channels = chanp; - for (i = 0; i < pl08x->vd->signals; i++) { - /* chanp->periph_buses will be assigned at translation */ - chanp->bus_id = kasprintf(GFP_KERNEL, "slave%d", i); - chanp++; + pd->slave_channels = chanp; + for (i = 0; i < pl08x->vd->signals; i++) { + /* + * chanp->periph_buses will be assigned at translation + */ + chanp->bus_id = kasprintf(GFP_KERNEL, "slave%d", i); + chanp++; + } + pd->num_slave_channels = pl08x->vd->signals; } - pd->num_slave_channels = pl08x->vd->signals; pl08x->pd = pd; @@ -2242,7 +2707,7 @@ static inline int pl08x_of_probe(struct amba_device *adev, static int pl08x_probe(struct amba_device *adev, const struct amba_id *id) { struct pl08x_driver_data *pl08x; - const struct vendor_data *vd = id->data; + struct vendor_data *vd = id->data; struct device_node *np = adev->dev.of_node; u32 tsfr_size; int ret = 0; @@ -2268,6 +2733,34 @@ static int pl08x_probe(struct amba_device *adev, const struct amba_id *id) pl08x->adev = adev; pl08x->vd = vd; + pl08x->base = ioremap(adev->res.start, resource_size(&adev->res)); + if (!pl08x->base) { + ret = -ENOMEM; + goto out_no_ioremap; + } + + if (vd->ftdmac020) { + u32 val; + + val = readl(pl08x->base + FTDMAC020_REVISION); + dev_info(&pl08x->adev->dev, "FTDMAC020 %d.%d rel %d\n", + (val >> 16) & 0xff, (val >> 8) & 0xff, val & 0xff); + val = readl(pl08x->base + FTDMAC020_FEATURE); + dev_info(&pl08x->adev->dev, "FTDMAC020 %d channels, " + "%s built-in bridge, %s, %s linked lists\n", + (val >> 12) & 0x0f, + (val & BIT(10)) ? "no" : "has", + (val & BIT(9)) ? "AHB0 and AHB1" : "AHB0", + (val & BIT(8)) ? "supports" : "does not support"); + + /* Vendor data from feature register */ + if (!(val & BIT(8))) + dev_warn(&pl08x->adev->dev, + "linked lists not supported, required\n"); + vd->channels = (val >> 12) & 0x0f; + vd->dualmaster = !!(val & BIT(9)); + } + /* Initialize memcpy engine */ dma_cap_set(DMA_MEMCPY, pl08x->memcpy.cap_mask); pl08x->memcpy.dev = &adev->dev; @@ -2284,25 +2777,38 @@ static int pl08x_probe(struct amba_device *adev, const struct amba_id *id) pl08x->memcpy.dst_addr_widths = PL80X_DMA_BUSWIDTHS; pl08x->memcpy.directions = BIT(DMA_MEM_TO_MEM); pl08x->memcpy.residue_granularity = DMA_RESIDUE_GRANULARITY_SEGMENT; + if (vd->ftdmac020) + pl08x->memcpy.copy_align = DMAENGINE_ALIGN_4_BYTES; - /* Initialize slave engine */ - dma_cap_set(DMA_SLAVE, pl08x->slave.cap_mask); - dma_cap_set(DMA_CYCLIC, pl08x->slave.cap_mask); - pl08x->slave.dev = &adev->dev; - pl08x->slave.device_free_chan_resources = pl08x_free_chan_resources; - pl08x->slave.device_prep_dma_interrupt = pl08x_prep_dma_interrupt; - pl08x->slave.device_tx_status = pl08x_dma_tx_status; - pl08x->slave.device_issue_pending = pl08x_issue_pending; - pl08x->slave.device_prep_slave_sg = pl08x_prep_slave_sg; - pl08x->slave.device_prep_dma_cyclic = pl08x_prep_dma_cyclic; - pl08x->slave.device_config = pl08x_config; - pl08x->slave.device_pause = pl08x_pause; - pl08x->slave.device_resume = pl08x_resume; - pl08x->slave.device_terminate_all = pl08x_terminate_all; - pl08x->slave.src_addr_widths = PL80X_DMA_BUSWIDTHS; - pl08x->slave.dst_addr_widths = PL80X_DMA_BUSWIDTHS; - pl08x->slave.directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV); - pl08x->slave.residue_granularity = DMA_RESIDUE_GRANULARITY_SEGMENT; + + /* + * Initialize slave engine, if the block has no signals, that means + * we have no slave support. + */ + if (vd->signals) { + pl08x->has_slave = true; + dma_cap_set(DMA_SLAVE, pl08x->slave.cap_mask); + dma_cap_set(DMA_CYCLIC, pl08x->slave.cap_mask); + pl08x->slave.dev = &adev->dev; + pl08x->slave.device_free_chan_resources = + pl08x_free_chan_resources; + pl08x->slave.device_prep_dma_interrupt = + pl08x_prep_dma_interrupt; + pl08x->slave.device_tx_status = pl08x_dma_tx_status; + pl08x->slave.device_issue_pending = pl08x_issue_pending; + pl08x->slave.device_prep_slave_sg = pl08x_prep_slave_sg; + pl08x->slave.device_prep_dma_cyclic = pl08x_prep_dma_cyclic; + pl08x->slave.device_config = pl08x_config; + pl08x->slave.device_pause = pl08x_pause; + pl08x->slave.device_resume = pl08x_resume; + pl08x->slave.device_terminate_all = pl08x_terminate_all; + pl08x->slave.src_addr_widths = PL80X_DMA_BUSWIDTHS; + pl08x->slave.dst_addr_widths = PL80X_DMA_BUSWIDTHS; + pl08x->slave.directions = + BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV); + pl08x->slave.residue_granularity = + DMA_RESIDUE_GRANULARITY_SEGMENT; + } /* Get the platform data */ pl08x->pd = dev_get_platdata(&adev->dev); @@ -2344,19 +2850,18 @@ static int pl08x_probe(struct amba_device *adev, const struct amba_id *id) goto out_no_lli_pool; } - pl08x->base = ioremap(adev->res.start, resource_size(&adev->res)); - if (!pl08x->base) { - ret = -ENOMEM; - goto out_no_ioremap; - } - /* Turn on the PL08x */ pl08x_ensure_on(pl08x); - /* Attach the interrupt handler */ - writel(0x000000FF, pl08x->base + PL080_ERR_CLEAR); + /* Clear any pending interrupts */ + if (vd->ftdmac020) + /* This variant has error IRQs in bits 16-19 */ + writel(0x0000FFFF, pl08x->base + PL080_ERR_CLEAR); + else + writel(0x000000FF, pl08x->base + PL080_ERR_CLEAR); writel(0x000000FF, pl08x->base + PL080_TC_CLEAR); + /* Attach the interrupt handler */ ret = request_irq(adev->irq[0], pl08x_irq, 0, DRIVER_NAME, pl08x); if (ret) { dev_err(&adev->dev, "%s failed to request interrupt %d\n", @@ -2377,7 +2882,25 @@ static int pl08x_probe(struct amba_device *adev, const struct amba_id *id) ch->id = i; ch->base = pl08x->base + PL080_Cx_BASE(i); - ch->reg_config = ch->base + vd->config_offset; + if (vd->ftdmac020) { + /* FTDMA020 has a special channel busy register */ + ch->reg_busy = ch->base + FTDMAC020_CH_BUSY; + ch->reg_config = ch->base + FTDMAC020_CH_CFG; + ch->reg_control = ch->base + FTDMAC020_CH_CSR; + ch->reg_src = ch->base + FTDMAC020_CH_SRC_ADDR; + ch->reg_dst = ch->base + FTDMAC020_CH_DST_ADDR; + ch->reg_lli = ch->base + FTDMAC020_CH_LLP; + ch->ftdmac020 = true; + } else { + ch->reg_config = ch->base + vd->config_offset; + ch->reg_control = ch->base + PL080_CH_CONTROL; + ch->reg_src = ch->base + PL080_CH_SRC_ADDR; + ch->reg_dst = ch->base + PL080_CH_DST_ADDR; + ch->reg_lli = ch->base + PL080_CH_LLI; + } + if (vd->pl080s) + ch->pl080s = true; + spin_lock_init(&ch->lock); /* @@ -2410,13 +2933,15 @@ static int pl08x_probe(struct amba_device *adev, const struct amba_id *id) } /* Register slave channels */ - ret = pl08x_dma_init_virtual_channels(pl08x, &pl08x->slave, - pl08x->pd->num_slave_channels, true); - if (ret < 0) { - dev_warn(&pl08x->adev->dev, - "%s failed to enumerate slave channels - %d\n", - __func__, ret); - goto out_no_slave; + if (pl08x->has_slave) { + ret = pl08x_dma_init_virtual_channels(pl08x, &pl08x->slave, + pl08x->pd->num_slave_channels, true); + if (ret < 0) { + dev_warn(&pl08x->adev->dev, + "%s failed to enumerate slave channels - %d\n", + __func__, ret); + goto out_no_slave; + } } ret = dma_async_device_register(&pl08x->memcpy); @@ -2427,12 +2952,14 @@ static int pl08x_probe(struct amba_device *adev, const struct amba_id *id) goto out_no_memcpy_reg; } - ret = dma_async_device_register(&pl08x->slave); - if (ret) { - dev_warn(&pl08x->adev->dev, + if (pl08x->has_slave) { + ret = dma_async_device_register(&pl08x->slave); + if (ret) { + dev_warn(&pl08x->adev->dev, "%s failed to register slave as an async device - %d\n", __func__, ret); - goto out_no_slave_reg; + goto out_no_slave_reg; + } } amba_set_drvdata(adev, pl08x); @@ -2446,7 +2973,8 @@ static int pl08x_probe(struct amba_device *adev, const struct amba_id *id) out_no_slave_reg: dma_async_device_unregister(&pl08x->memcpy); out_no_memcpy_reg: - pl08x_free_virtual_channels(&pl08x->slave); + if (pl08x->has_slave) + pl08x_free_virtual_channels(&pl08x->slave); out_no_slave: pl08x_free_virtual_channels(&pl08x->memcpy); out_no_memcpy: @@ -2454,11 +2982,11 @@ out_no_memcpy: out_no_phychans: free_irq(adev->irq[0], pl08x); out_no_irq: - iounmap(pl08x->base); -out_no_ioremap: dma_pool_destroy(pl08x->pool); out_no_lli_pool: out_no_platdata: + iounmap(pl08x->base); +out_no_ioremap: kfree(pl08x); out_no_pl08x: amba_release_regions(adev); @@ -2499,7 +3027,13 @@ static struct vendor_data vendor_pl081 = { .max_transfer_size = PL080_CONTROL_TRANSFER_SIZE_MASK, }; -static struct amba_id pl08x_ids[] = { +static struct vendor_data vendor_ftdmac020 = { + .config_offset = PL080_CH_CONFIG, + .ftdmac020 = true, + .max_transfer_size = PL080_CONTROL_TRANSFER_SIZE_MASK, +}; + +static const struct amba_id pl08x_ids[] = { /* Samsung PL080S variant */ { .id = 0x0a141080, @@ -2524,6 +3058,12 @@ static struct amba_id pl08x_ids[] = { .mask = 0x00ffffff, .data = &vendor_nomadik, }, + /* Faraday Technology FTDMAC020 */ + { + .id = 0x0003b080, + .mask = 0x000fffff, + .data = &vendor_ftdmac020, + }, { 0, 0 }, }; diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c index 1baf3404a365..fbab271b3bf9 100644 --- a/drivers/dma/at_hdmac.c +++ b/drivers/dma/at_hdmac.c @@ -1203,138 +1203,6 @@ err: } /** - * atc_prep_dma_sg - prepare memory to memory scather-gather operation - * @chan: the channel to prepare operation on - * @dst_sg: destination scatterlist - * @dst_nents: number of destination scatterlist entries - * @src_sg: source scatterlist - * @src_nents: number of source scatterlist entries - * @flags: tx descriptor status flags - */ -static struct dma_async_tx_descriptor * -atc_prep_dma_sg(struct dma_chan *chan, - struct scatterlist *dst_sg, unsigned int dst_nents, - struct scatterlist *src_sg, unsigned int src_nents, - unsigned long flags) -{ - struct at_dma_chan *atchan = to_at_dma_chan(chan); - struct at_desc *desc = NULL; - struct at_desc *first = NULL; - struct at_desc *prev = NULL; - unsigned int src_width; - unsigned int dst_width; - size_t xfer_count; - u32 ctrla; - u32 ctrlb; - size_t dst_len = 0, src_len = 0; - dma_addr_t dst = 0, src = 0; - size_t len = 0, total_len = 0; - - if (unlikely(dst_nents == 0 || src_nents == 0)) - return NULL; - - if (unlikely(dst_sg == NULL || src_sg == NULL)) - return NULL; - - ctrlb = ATC_DEFAULT_CTRLB | ATC_IEN - | ATC_SRC_ADDR_MODE_INCR - | ATC_DST_ADDR_MODE_INCR - | ATC_FC_MEM2MEM; - - /* - * loop until there is either no more source or no more destination - * scatterlist entry - */ - while (true) { - - /* prepare the next transfer */ - if (dst_len == 0) { - - /* no more destination scatterlist entries */ - if (!dst_sg || !dst_nents) - break; - - dst = sg_dma_address(dst_sg); - dst_len = sg_dma_len(dst_sg); - - dst_sg = sg_next(dst_sg); - dst_nents--; - } - - if (src_len == 0) { - - /* no more source scatterlist entries */ - if (!src_sg || !src_nents) - break; - - src = sg_dma_address(src_sg); - src_len = sg_dma_len(src_sg); - - src_sg = sg_next(src_sg); - src_nents--; - } - - len = min_t(size_t, src_len, dst_len); - if (len == 0) - continue; - - /* take care for the alignment */ - src_width = dst_width = atc_get_xfer_width(src, dst, len); - - ctrla = ATC_SRC_WIDTH(src_width) | - ATC_DST_WIDTH(dst_width); - - /* - * The number of transfers to set up refer to the source width - * that depends on the alignment. - */ - xfer_count = len >> src_width; - if (xfer_count > ATC_BTSIZE_MAX) { - xfer_count = ATC_BTSIZE_MAX; - len = ATC_BTSIZE_MAX << src_width; - } - - /* create the transfer */ - desc = atc_desc_get(atchan); - if (!desc) - goto err_desc_get; - - desc->lli.saddr = src; - desc->lli.daddr = dst; - desc->lli.ctrla = ctrla | xfer_count; - desc->lli.ctrlb = ctrlb; - - desc->txd.cookie = 0; - desc->len = len; - - atc_desc_chain(&first, &prev, desc); - - /* update the lengths and addresses for the next loop cycle */ - dst_len -= len; - src_len -= len; - dst += len; - src += len; - - total_len += len; - } - - /* First descriptor of the chain embedds additional information */ - first->txd.cookie = -EBUSY; - first->total_len = total_len; - - /* set end-of-link to the last link descriptor of list*/ - set_desc_eol(desc); - - first->txd.flags = flags; /* client is in control of this ack */ - - return &first->txd; - -err_desc_get: - atc_desc_put(atchan, first); - return NULL; -} - -/** * atc_dma_cyclic_check_values * Check for too big/unaligned periods and unaligned DMA buffer */ @@ -1933,14 +1801,12 @@ static int __init at_dma_probe(struct platform_device *pdev) /* setup platform data for each SoC */ dma_cap_set(DMA_MEMCPY, at91sam9rl_config.cap_mask); - dma_cap_set(DMA_SG, at91sam9rl_config.cap_mask); dma_cap_set(DMA_INTERLEAVE, at91sam9g45_config.cap_mask); dma_cap_set(DMA_MEMCPY, at91sam9g45_config.cap_mask); dma_cap_set(DMA_MEMSET, at91sam9g45_config.cap_mask); dma_cap_set(DMA_MEMSET_SG, at91sam9g45_config.cap_mask); dma_cap_set(DMA_PRIVATE, at91sam9g45_config.cap_mask); dma_cap_set(DMA_SLAVE, at91sam9g45_config.cap_mask); - dma_cap_set(DMA_SG, at91sam9g45_config.cap_mask); /* get DMA parameters from controller type */ plat_dat = at_dma_get_driver_data(pdev); @@ -2078,16 +1944,12 @@ static int __init at_dma_probe(struct platform_device *pdev) atdma->dma_common.residue_granularity = DMA_RESIDUE_GRANULARITY_BURST; } - if (dma_has_cap(DMA_SG, atdma->dma_common.cap_mask)) - atdma->dma_common.device_prep_dma_sg = atc_prep_dma_sg; - dma_writel(atdma, EN, AT_DMA_ENABLE); - dev_info(&pdev->dev, "Atmel AHB DMA Controller ( %s%s%s%s), %d channels\n", + dev_info(&pdev->dev, "Atmel AHB DMA Controller ( %s%s%s), %d channels\n", dma_has_cap(DMA_MEMCPY, atdma->dma_common.cap_mask) ? "cpy " : "", dma_has_cap(DMA_MEMSET, atdma->dma_common.cap_mask) ? "set " : "", dma_has_cap(DMA_SLAVE, atdma->dma_common.cap_mask) ? "slave " : "", - dma_has_cap(DMA_SG, atdma->dma_common.cap_mask) ? "sg-cpy " : "", plat_dat->nr_channels); dma_async_device_register(&atdma->dma_common); diff --git a/drivers/dma/at_xdmac.c b/drivers/dma/at_xdmac.c index 7d4e0bcda9af..c00e3923d7d8 100644 --- a/drivers/dma/at_xdmac.c +++ b/drivers/dma/at_xdmac.c @@ -875,7 +875,7 @@ at_xdmac_interleaved_queue_desc(struct dma_chan *chan, dwidth = at_xdmac_align_width(chan, src | dst | chunk->size); if (chunk->size >= (AT_XDMAC_MBR_UBC_UBLEN_MAX << dwidth)) { dev_dbg(chan2dev(chan), - "%s: chunk too big (%d, max size %lu)...\n", + "%s: chunk too big (%zu, max size %lu)...\n", __func__, chunk->size, AT_XDMAC_MBR_UBC_UBLEN_MAX << dwidth); return NULL; @@ -956,7 +956,7 @@ at_xdmac_prep_interleaved(struct dma_chan *chan, if ((xt->numf > 1) && (xt->frame_size > 1)) return NULL; - dev_dbg(chan2dev(chan), "%s: src=%pad, dest=%pad, numf=%d, frame_size=%d, flags=0x%lx\n", + dev_dbg(chan2dev(chan), "%s: src=%pad, dest=%pad, numf=%zu, frame_size=%zu, flags=0x%lx\n", __func__, &xt->src_start, &xt->dst_start, xt->numf, xt->frame_size, flags); @@ -990,7 +990,7 @@ at_xdmac_prep_interleaved(struct dma_chan *chan, dst_skip = chunk->size + dst_icg; dev_dbg(chan2dev(chan), - "%s: chunk size=%d, src icg=%d, dst icg=%d\n", + "%s: chunk size=%zu, src icg=%zu, dst icg=%zu\n", __func__, chunk->size, src_icg, dst_icg); desc = at_xdmac_interleaved_queue_desc(chan, atchan, @@ -1207,7 +1207,7 @@ at_xdmac_prep_dma_memset(struct dma_chan *chan, dma_addr_t dest, int value, struct at_xdmac_chan *atchan = to_at_xdmac_chan(chan); struct at_xdmac_desc *desc; - dev_dbg(chan2dev(chan), "%s: dest=%pad, len=%d, pattern=0x%x, flags=0x%lx\n", + dev_dbg(chan2dev(chan), "%s: dest=%pad, len=%zu, pattern=0x%x, flags=0x%lx\n", __func__, &dest, len, value, flags); if (unlikely(!len)) @@ -1883,8 +1883,11 @@ static int atmel_xdmac_resume(struct device *dev) struct at_xdmac_chan *atchan; struct dma_chan *chan, *_chan; int i; + int ret; - clk_prepare_enable(atxdmac->clk); + ret = clk_prepare_enable(atxdmac->clk); + if (ret) + return ret; /* Clear pending interrupts. */ for (i = 0; i < atxdmac->dma.chancnt; i++) { diff --git a/drivers/dma/bcm-sba-raid.c b/drivers/dma/bcm-sba-raid.c new file mode 100644 index 000000000000..6c2c44724637 --- /dev/null +++ b/drivers/dma/bcm-sba-raid.c @@ -0,0 +1,1831 @@ +/* + * Copyright (C) 2017 Broadcom + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* + * Broadcom SBA RAID Driver + * + * The Broadcom stream buffer accelerator (SBA) provides offloading + * capabilities for RAID operations. The SBA offload engine is accessible + * via Broadcom SoC specific ring manager. Two or more offload engines + * can share same Broadcom SoC specific ring manager due to this Broadcom + * SoC specific ring manager driver is implemented as a mailbox controller + * driver and offload engine drivers are implemented as mallbox clients. + * + * Typically, Broadcom SoC specific ring manager will implement larger + * number of hardware rings over one or more SBA hardware devices. By + * design, the internal buffer size of SBA hardware device is limited + * but all offload operations supported by SBA can be broken down into + * multiple small size requests and executed parallely on multiple SBA + * hardware devices for achieving high through-put. + * + * The Broadcom SBA RAID driver does not require any register programming + * except submitting request to SBA hardware device via mailbox channels. + * This driver implements a DMA device with one DMA channel using a set + * of mailbox channels provided by Broadcom SoC specific ring manager + * driver. To exploit parallelism (as described above), all DMA request + * coming to SBA RAID DMA channel are broken down to smaller requests + * and submitted to multiple mailbox channels in round-robin fashion. + * For having more SBA DMA channels, we can create more SBA device nodes + * in Broadcom SoC specific DTS based on number of hardware rings supported + * by Broadcom SoC ring manager. + */ + +#include <linux/bitops.h> +#include <linux/debugfs.h> +#include <linux/dma-mapping.h> +#include <linux/dmaengine.h> +#include <linux/list.h> +#include <linux/mailbox_client.h> +#include <linux/mailbox/brcm-message.h> +#include <linux/module.h> +#include <linux/of_device.h> +#include <linux/slab.h> +#include <linux/raid/pq.h> + +#include "dmaengine.h" + +/* ====== Driver macros and defines ===== */ + +#define SBA_TYPE_SHIFT 48 +#define SBA_TYPE_MASK GENMASK(1, 0) +#define SBA_TYPE_A 0x0 +#define SBA_TYPE_B 0x2 +#define SBA_TYPE_C 0x3 +#define SBA_USER_DEF_SHIFT 32 +#define SBA_USER_DEF_MASK GENMASK(15, 0) +#define SBA_R_MDATA_SHIFT 24 +#define SBA_R_MDATA_MASK GENMASK(7, 0) +#define SBA_C_MDATA_MS_SHIFT 18 +#define SBA_C_MDATA_MS_MASK GENMASK(1, 0) +#define SBA_INT_SHIFT 17 +#define SBA_INT_MASK BIT(0) +#define SBA_RESP_SHIFT 16 +#define SBA_RESP_MASK BIT(0) +#define SBA_C_MDATA_SHIFT 8 +#define SBA_C_MDATA_MASK GENMASK(7, 0) +#define SBA_C_MDATA_BNUMx_SHIFT(__bnum) (2 * (__bnum)) +#define SBA_C_MDATA_BNUMx_MASK GENMASK(1, 0) +#define SBA_C_MDATA_DNUM_SHIFT 5 +#define SBA_C_MDATA_DNUM_MASK GENMASK(4, 0) +#define SBA_C_MDATA_LS(__v) ((__v) & 0xff) +#define SBA_C_MDATA_MS(__v) (((__v) >> 8) & 0x3) +#define SBA_CMD_SHIFT 0 +#define SBA_CMD_MASK GENMASK(3, 0) +#define SBA_CMD_ZERO_BUFFER 0x4 +#define SBA_CMD_ZERO_ALL_BUFFERS 0x8 +#define SBA_CMD_LOAD_BUFFER 0x9 +#define SBA_CMD_XOR 0xa +#define SBA_CMD_GALOIS_XOR 0xb +#define SBA_CMD_WRITE_BUFFER 0xc +#define SBA_CMD_GALOIS 0xe + +#define SBA_MAX_REQ_PER_MBOX_CHANNEL 8192 + +/* Driver helper macros */ +#define to_sba_request(tx) \ + container_of(tx, struct sba_request, tx) +#define to_sba_device(dchan) \ + container_of(dchan, struct sba_device, dma_chan) + +/* ===== Driver data structures ===== */ + +enum sba_request_flags { + SBA_REQUEST_STATE_FREE = 0x001, + SBA_REQUEST_STATE_ALLOCED = 0x002, + SBA_REQUEST_STATE_PENDING = 0x004, + SBA_REQUEST_STATE_ACTIVE = 0x008, + SBA_REQUEST_STATE_ABORTED = 0x010, + SBA_REQUEST_STATE_MASK = 0x0ff, + SBA_REQUEST_FENCE = 0x100, +}; + +struct sba_request { + /* Global state */ + struct list_head node; + struct sba_device *sba; + u32 flags; + /* Chained requests management */ + struct sba_request *first; + struct list_head next; + atomic_t next_pending_count; + /* BRCM message data */ + struct brcm_message msg; + struct dma_async_tx_descriptor tx; + /* SBA commands */ + struct brcm_sba_command cmds[0]; +}; + +enum sba_version { + SBA_VER_1 = 0, + SBA_VER_2 +}; + +struct sba_device { + /* Underlying device */ + struct device *dev; + /* DT configuration parameters */ + enum sba_version ver; + /* Derived configuration parameters */ + u32 max_req; + u32 hw_buf_size; + u32 hw_resp_size; + u32 max_pq_coefs; + u32 max_pq_srcs; + u32 max_cmd_per_req; + u32 max_xor_srcs; + u32 max_resp_pool_size; + u32 max_cmds_pool_size; + /* Maibox client and Mailbox channels */ + struct mbox_client client; + int mchans_count; + atomic_t mchans_current; + struct mbox_chan **mchans; + struct device *mbox_dev; + /* DMA device and DMA channel */ + struct dma_device dma_dev; + struct dma_chan dma_chan; + /* DMA channel resources */ + void *resp_base; + dma_addr_t resp_dma_base; + void *cmds_base; + dma_addr_t cmds_dma_base; + spinlock_t reqs_lock; + bool reqs_fence; + struct list_head reqs_alloc_list; + struct list_head reqs_pending_list; + struct list_head reqs_active_list; + struct list_head reqs_aborted_list; + struct list_head reqs_free_list; + /* DebugFS directory entries */ + struct dentry *root; + struct dentry *stats; +}; + +/* ====== Command helper routines ===== */ + +static inline u64 __pure sba_cmd_enc(u64 cmd, u32 val, u32 shift, u32 mask) +{ + cmd &= ~((u64)mask << shift); + cmd |= ((u64)(val & mask) << shift); + return cmd; +} + +static inline u32 __pure sba_cmd_load_c_mdata(u32 b0) +{ + return b0 & SBA_C_MDATA_BNUMx_MASK; +} + +static inline u32 __pure sba_cmd_write_c_mdata(u32 b0) +{ + return b0 & SBA_C_MDATA_BNUMx_MASK; +} + +static inline u32 __pure sba_cmd_xor_c_mdata(u32 b1, u32 b0) +{ + return (b0 & SBA_C_MDATA_BNUMx_MASK) | + ((b1 & SBA_C_MDATA_BNUMx_MASK) << SBA_C_MDATA_BNUMx_SHIFT(1)); +} + +static inline u32 __pure sba_cmd_pq_c_mdata(u32 d, u32 b1, u32 b0) +{ + return (b0 & SBA_C_MDATA_BNUMx_MASK) | + ((b1 & SBA_C_MDATA_BNUMx_MASK) << SBA_C_MDATA_BNUMx_SHIFT(1)) | + ((d & SBA_C_MDATA_DNUM_MASK) << SBA_C_MDATA_DNUM_SHIFT); +} + +/* ====== General helper routines ===== */ + +static void sba_peek_mchans(struct sba_device *sba) +{ + int mchan_idx; + + for (mchan_idx = 0; mchan_idx < sba->mchans_count; mchan_idx++) + mbox_client_peek_data(sba->mchans[mchan_idx]); +} + +static struct sba_request *sba_alloc_request(struct sba_device *sba) +{ + bool found = false; + unsigned long flags; + struct sba_request *req = NULL; + + spin_lock_irqsave(&sba->reqs_lock, flags); + list_for_each_entry(req, &sba->reqs_free_list, node) { + if (async_tx_test_ack(&req->tx)) { + list_move_tail(&req->node, &sba->reqs_alloc_list); + found = true; + break; + } + } + spin_unlock_irqrestore(&sba->reqs_lock, flags); + + if (!found) { + /* + * We have no more free requests so, we peek + * mailbox channels hoping few active requests + * would have completed which will create more + * room for new requests. + */ + sba_peek_mchans(sba); + return NULL; + } + + req->flags = SBA_REQUEST_STATE_ALLOCED; + req->first = req; + INIT_LIST_HEAD(&req->next); + atomic_set(&req->next_pending_count, 1); + + dma_async_tx_descriptor_init(&req->tx, &sba->dma_chan); + async_tx_ack(&req->tx); + + return req; +} + +/* Note: Must be called with sba->reqs_lock held */ +static void _sba_pending_request(struct sba_device *sba, + struct sba_request *req) +{ + lockdep_assert_held(&sba->reqs_lock); + req->flags &= ~SBA_REQUEST_STATE_MASK; + req->flags |= SBA_REQUEST_STATE_PENDING; + list_move_tail(&req->node, &sba->reqs_pending_list); + if (list_empty(&sba->reqs_active_list)) + sba->reqs_fence = false; +} + +/* Note: Must be called with sba->reqs_lock held */ +static bool _sba_active_request(struct sba_device *sba, + struct sba_request *req) +{ + lockdep_assert_held(&sba->reqs_lock); + if (list_empty(&sba->reqs_active_list)) + sba->reqs_fence = false; + if (sba->reqs_fence) + return false; + req->flags &= ~SBA_REQUEST_STATE_MASK; + req->flags |= SBA_REQUEST_STATE_ACTIVE; + list_move_tail(&req->node, &sba->reqs_active_list); + if (req->flags & SBA_REQUEST_FENCE) + sba->reqs_fence = true; + return true; +} + +/* Note: Must be called with sba->reqs_lock held */ +static void _sba_abort_request(struct sba_device *sba, + struct sba_request *req) +{ + lockdep_assert_held(&sba->reqs_lock); + req->flags &= ~SBA_REQUEST_STATE_MASK; + req->flags |= SBA_REQUEST_STATE_ABORTED; + list_move_tail(&req->node, &sba->reqs_aborted_list); + if (list_empty(&sba->reqs_active_list)) + sba->reqs_fence = false; +} + +/* Note: Must be called with sba->reqs_lock held */ +static void _sba_free_request(struct sba_device *sba, + struct sba_request *req) +{ + lockdep_assert_held(&sba->reqs_lock); + req->flags &= ~SBA_REQUEST_STATE_MASK; + req->flags |= SBA_REQUEST_STATE_FREE; + list_move_tail(&req->node, &sba->reqs_free_list); + if (list_empty(&sba->reqs_active_list)) + sba->reqs_fence = false; +} + +static void sba_free_chained_requests(struct sba_request *req) +{ + unsigned long flags; + struct sba_request *nreq; + struct sba_device *sba = req->sba; + + spin_lock_irqsave(&sba->reqs_lock, flags); + + _sba_free_request(sba, req); + list_for_each_entry(nreq, &req->next, next) + _sba_free_request(sba, nreq); + + spin_unlock_irqrestore(&sba->reqs_lock, flags); +} + +static void sba_chain_request(struct sba_request *first, + struct sba_request *req) +{ + unsigned long flags; + struct sba_device *sba = req->sba; + + spin_lock_irqsave(&sba->reqs_lock, flags); + + list_add_tail(&req->next, &first->next); + req->first = first; + atomic_inc(&first->next_pending_count); + + spin_unlock_irqrestore(&sba->reqs_lock, flags); +} + +static void sba_cleanup_nonpending_requests(struct sba_device *sba) +{ + unsigned long flags; + struct sba_request *req, *req1; + + spin_lock_irqsave(&sba->reqs_lock, flags); + + /* Freeup all alloced request */ + list_for_each_entry_safe(req, req1, &sba->reqs_alloc_list, node) + _sba_free_request(sba, req); + + /* Set all active requests as aborted */ + list_for_each_entry_safe(req, req1, &sba->reqs_active_list, node) + _sba_abort_request(sba, req); + + /* + * Note: We expect that aborted request will be eventually + * freed by sba_receive_message() + */ + + spin_unlock_irqrestore(&sba->reqs_lock, flags); +} + +static void sba_cleanup_pending_requests(struct sba_device *sba) +{ + unsigned long flags; + struct sba_request *req, *req1; + + spin_lock_irqsave(&sba->reqs_lock, flags); + + /* Freeup all pending request */ + list_for_each_entry_safe(req, req1, &sba->reqs_pending_list, node) + _sba_free_request(sba, req); + + spin_unlock_irqrestore(&sba->reqs_lock, flags); +} + +static int sba_send_mbox_request(struct sba_device *sba, + struct sba_request *req) +{ + int mchans_idx, ret = 0; + + /* Select mailbox channel in round-robin fashion */ + mchans_idx = atomic_inc_return(&sba->mchans_current); + mchans_idx = mchans_idx % sba->mchans_count; + + /* Send message for the request */ + req->msg.error = 0; + ret = mbox_send_message(sba->mchans[mchans_idx], &req->msg); + if (ret < 0) { + dev_err(sba->dev, "send message failed with error %d", ret); + return ret; + } + + /* Check error returned by mailbox controller */ + ret = req->msg.error; + if (ret < 0) { + dev_err(sba->dev, "message error %d", ret); + } + + /* Signal txdone for mailbox channel */ + mbox_client_txdone(sba->mchans[mchans_idx], ret); + + return ret; +} + +/* Note: Must be called with sba->reqs_lock held */ +static void _sba_process_pending_requests(struct sba_device *sba) +{ + int ret; + u32 count; + struct sba_request *req; + + /* + * Process few pending requests + * + * For now, we process (<number_of_mailbox_channels> * 8) + * number of requests at a time. + */ + count = sba->mchans_count * 8; + while (!list_empty(&sba->reqs_pending_list) && count) { + /* Get the first pending request */ + req = list_first_entry(&sba->reqs_pending_list, + struct sba_request, node); + + /* Try to make request active */ + if (!_sba_active_request(sba, req)) + break; + + /* Send request to mailbox channel */ + ret = sba_send_mbox_request(sba, req); + if (ret < 0) { + _sba_pending_request(sba, req); + break; + } + + count--; + } +} + +static void sba_process_received_request(struct sba_device *sba, + struct sba_request *req) +{ + unsigned long flags; + struct dma_async_tx_descriptor *tx; + struct sba_request *nreq, *first = req->first; + + /* Process only after all chained requests are received */ + if (!atomic_dec_return(&first->next_pending_count)) { + tx = &first->tx; + + WARN_ON(tx->cookie < 0); + if (tx->cookie > 0) { + dma_cookie_complete(tx); + dmaengine_desc_get_callback_invoke(tx, NULL); + dma_descriptor_unmap(tx); + tx->callback = NULL; + tx->callback_result = NULL; + } + + dma_run_dependencies(tx); + + spin_lock_irqsave(&sba->reqs_lock, flags); + + /* Free all requests chained to first request */ + list_for_each_entry(nreq, &first->next, next) + _sba_free_request(sba, nreq); + INIT_LIST_HEAD(&first->next); + + /* Free the first request */ + _sba_free_request(sba, first); + + /* Process pending requests */ + _sba_process_pending_requests(sba); + + spin_unlock_irqrestore(&sba->reqs_lock, flags); + } +} + +static void sba_write_stats_in_seqfile(struct sba_device *sba, + struct seq_file *file) +{ + unsigned long flags; + struct sba_request *req; + u32 free_count = 0, alloced_count = 0; + u32 pending_count = 0, active_count = 0, aborted_count = 0; + + spin_lock_irqsave(&sba->reqs_lock, flags); + + list_for_each_entry(req, &sba->reqs_free_list, node) + if (async_tx_test_ack(&req->tx)) + free_count++; + + list_for_each_entry(req, &sba->reqs_alloc_list, node) + alloced_count++; + + list_for_each_entry(req, &sba->reqs_pending_list, node) + pending_count++; + + list_for_each_entry(req, &sba->reqs_active_list, node) + active_count++; + + list_for_each_entry(req, &sba->reqs_aborted_list, node) + aborted_count++; + + spin_unlock_irqrestore(&sba->reqs_lock, flags); + + seq_printf(file, "maximum requests = %d\n", sba->max_req); + seq_printf(file, "free requests = %d\n", free_count); + seq_printf(file, "alloced requests = %d\n", alloced_count); + seq_printf(file, "pending requests = %d\n", pending_count); + seq_printf(file, "active requests = %d\n", active_count); + seq_printf(file, "aborted requests = %d\n", aborted_count); +} + +/* ====== DMAENGINE callbacks ===== */ + +static void sba_free_chan_resources(struct dma_chan *dchan) +{ + /* + * Channel resources are pre-alloced so we just free-up + * whatever we can so that we can re-use pre-alloced + * channel resources next time. + */ + sba_cleanup_nonpending_requests(to_sba_device(dchan)); +} + +static int sba_device_terminate_all(struct dma_chan *dchan) +{ + /* Cleanup all pending requests */ + sba_cleanup_pending_requests(to_sba_device(dchan)); + + return 0; +} + +static void sba_issue_pending(struct dma_chan *dchan) +{ + unsigned long flags; + struct sba_device *sba = to_sba_device(dchan); + + /* Process pending requests */ + spin_lock_irqsave(&sba->reqs_lock, flags); + _sba_process_pending_requests(sba); + spin_unlock_irqrestore(&sba->reqs_lock, flags); +} + +static dma_cookie_t sba_tx_submit(struct dma_async_tx_descriptor *tx) +{ + unsigned long flags; + dma_cookie_t cookie; + struct sba_device *sba; + struct sba_request *req, *nreq; + + if (unlikely(!tx)) + return -EINVAL; + + sba = to_sba_device(tx->chan); + req = to_sba_request(tx); + + /* Assign cookie and mark all chained requests pending */ + spin_lock_irqsave(&sba->reqs_lock, flags); + cookie = dma_cookie_assign(tx); + _sba_pending_request(sba, req); + list_for_each_entry(nreq, &req->next, next) + _sba_pending_request(sba, nreq); + spin_unlock_irqrestore(&sba->reqs_lock, flags); + + return cookie; +} + +static enum dma_status sba_tx_status(struct dma_chan *dchan, + dma_cookie_t cookie, + struct dma_tx_state *txstate) +{ + enum dma_status ret; + struct sba_device *sba = to_sba_device(dchan); + + ret = dma_cookie_status(dchan, cookie, txstate); + if (ret == DMA_COMPLETE) + return ret; + + sba_peek_mchans(sba); + + return dma_cookie_status(dchan, cookie, txstate); +} + +static void sba_fillup_interrupt_msg(struct sba_request *req, + struct brcm_sba_command *cmds, + struct brcm_message *msg) +{ + u64 cmd; + u32 c_mdata; + dma_addr_t resp_dma = req->tx.phys; + struct brcm_sba_command *cmdsp = cmds; + + /* Type-B command to load dummy data into buf0 */ + cmd = sba_cmd_enc(0x0, SBA_TYPE_B, + SBA_TYPE_SHIFT, SBA_TYPE_MASK); + cmd = sba_cmd_enc(cmd, req->sba->hw_resp_size, + SBA_USER_DEF_SHIFT, SBA_USER_DEF_MASK); + c_mdata = sba_cmd_load_c_mdata(0); + cmd = sba_cmd_enc(cmd, SBA_C_MDATA_LS(c_mdata), + SBA_C_MDATA_SHIFT, SBA_C_MDATA_MASK); + cmd = sba_cmd_enc(cmd, SBA_CMD_LOAD_BUFFER, + SBA_CMD_SHIFT, SBA_CMD_MASK); + cmdsp->cmd = cmd; + *cmdsp->cmd_dma = cpu_to_le64(cmd); + cmdsp->flags = BRCM_SBA_CMD_TYPE_B; + cmdsp->data = resp_dma; + cmdsp->data_len = req->sba->hw_resp_size; + cmdsp++; + + /* Type-A command to write buf0 to dummy location */ + cmd = sba_cmd_enc(0x0, SBA_TYPE_A, + SBA_TYPE_SHIFT, SBA_TYPE_MASK); + cmd = sba_cmd_enc(cmd, req->sba->hw_resp_size, + SBA_USER_DEF_SHIFT, SBA_USER_DEF_MASK); + cmd = sba_cmd_enc(cmd, 0x1, + SBA_RESP_SHIFT, SBA_RESP_MASK); + c_mdata = sba_cmd_write_c_mdata(0); + cmd = sba_cmd_enc(cmd, SBA_C_MDATA_LS(c_mdata), + SBA_C_MDATA_SHIFT, SBA_C_MDATA_MASK); + cmd = sba_cmd_enc(cmd, SBA_CMD_WRITE_BUFFER, + SBA_CMD_SHIFT, SBA_CMD_MASK); + cmdsp->cmd = cmd; + *cmdsp->cmd_dma = cpu_to_le64(cmd); + cmdsp->flags = BRCM_SBA_CMD_TYPE_A; + if (req->sba->hw_resp_size) { + cmdsp->flags |= BRCM_SBA_CMD_HAS_RESP; + cmdsp->resp = resp_dma; + cmdsp->resp_len = req->sba->hw_resp_size; + } + cmdsp->flags |= BRCM_SBA_CMD_HAS_OUTPUT; + cmdsp->data = resp_dma; + cmdsp->data_len = req->sba->hw_resp_size; + cmdsp++; + + /* Fillup brcm_message */ + msg->type = BRCM_MESSAGE_SBA; + msg->sba.cmds = cmds; + msg->sba.cmds_count = cmdsp - cmds; + msg->ctx = req; + msg->error = 0; +} + +static struct dma_async_tx_descriptor * +sba_prep_dma_interrupt(struct dma_chan *dchan, unsigned long flags) +{ + struct sba_request *req = NULL; + struct sba_device *sba = to_sba_device(dchan); + + /* Alloc new request */ + req = sba_alloc_request(sba); + if (!req) + return NULL; + + /* + * Force fence so that no requests are submitted + * until DMA callback for this request is invoked. + */ + req->flags |= SBA_REQUEST_FENCE; + + /* Fillup request message */ + sba_fillup_interrupt_msg(req, req->cmds, &req->msg); + + /* Init async_tx descriptor */ + req->tx.flags = flags; + req->tx.cookie = -EBUSY; + + return &req->tx; +} + +static void sba_fillup_memcpy_msg(struct sba_request *req, + struct brcm_sba_command *cmds, + struct brcm_message *msg, + dma_addr_t msg_offset, size_t msg_len, + dma_addr_t dst, dma_addr_t src) +{ + u64 cmd; + u32 c_mdata; + dma_addr_t resp_dma = req->tx.phys; + struct brcm_sba_command *cmdsp = cmds; + + /* Type-B command to load data into buf0 */ + cmd = sba_cmd_enc(0x0, SBA_TYPE_B, + SBA_TYPE_SHIFT, SBA_TYPE_MASK); + cmd = sba_cmd_enc(cmd, msg_len, + SBA_USER_DEF_SHIFT, SBA_USER_DEF_MASK); + c_mdata = sba_cmd_load_c_mdata(0); + cmd = sba_cmd_enc(cmd, SBA_C_MDATA_LS(c_mdata), + SBA_C_MDATA_SHIFT, SBA_C_MDATA_MASK); + cmd = sba_cmd_enc(cmd, SBA_CMD_LOAD_BUFFER, + SBA_CMD_SHIFT, SBA_CMD_MASK); + cmdsp->cmd = cmd; + *cmdsp->cmd_dma = cpu_to_le64(cmd); + cmdsp->flags = BRCM_SBA_CMD_TYPE_B; + cmdsp->data = src + msg_offset; + cmdsp->data_len = msg_len; + cmdsp++; + + /* Type-A command to write buf0 */ + cmd = sba_cmd_enc(0x0, SBA_TYPE_A, + SBA_TYPE_SHIFT, SBA_TYPE_MASK); + cmd = sba_cmd_enc(cmd, msg_len, + SBA_USER_DEF_SHIFT, SBA_USER_DEF_MASK); + cmd = sba_cmd_enc(cmd, 0x1, + SBA_RESP_SHIFT, SBA_RESP_MASK); + c_mdata = sba_cmd_write_c_mdata(0); + cmd = sba_cmd_enc(cmd, SBA_C_MDATA_LS(c_mdata), + SBA_C_MDATA_SHIFT, SBA_C_MDATA_MASK); + cmd = sba_cmd_enc(cmd, SBA_CMD_WRITE_BUFFER, + SBA_CMD_SHIFT, SBA_CMD_MASK); + cmdsp->cmd = cmd; + *cmdsp->cmd_dma = cpu_to_le64(cmd); + cmdsp->flags = BRCM_SBA_CMD_TYPE_A; + if (req->sba->hw_resp_size) { + cmdsp->flags |= BRCM_SBA_CMD_HAS_RESP; + cmdsp->resp = resp_dma; + cmdsp->resp_len = req->sba->hw_resp_size; + } + cmdsp->flags |= BRCM_SBA_CMD_HAS_OUTPUT; + cmdsp->data = dst + msg_offset; + cmdsp->data_len = msg_len; + cmdsp++; + + /* Fillup brcm_message */ + msg->type = BRCM_MESSAGE_SBA; + msg->sba.cmds = cmds; + msg->sba.cmds_count = cmdsp - cmds; + msg->ctx = req; + msg->error = 0; +} + +static struct sba_request * +sba_prep_dma_memcpy_req(struct sba_device *sba, + dma_addr_t off, dma_addr_t dst, dma_addr_t src, + size_t len, unsigned long flags) +{ + struct sba_request *req = NULL; + + /* Alloc new request */ + req = sba_alloc_request(sba); + if (!req) + return NULL; + if (flags & DMA_PREP_FENCE) + req->flags |= SBA_REQUEST_FENCE; + + /* Fillup request message */ + sba_fillup_memcpy_msg(req, req->cmds, &req->msg, + off, len, dst, src); + + /* Init async_tx descriptor */ + req->tx.flags = flags; + req->tx.cookie = -EBUSY; + + return req; +} + +static struct dma_async_tx_descriptor * +sba_prep_dma_memcpy(struct dma_chan *dchan, dma_addr_t dst, dma_addr_t src, + size_t len, unsigned long flags) +{ + size_t req_len; + dma_addr_t off = 0; + struct sba_device *sba = to_sba_device(dchan); + struct sba_request *first = NULL, *req; + + /* Create chained requests where each request is upto hw_buf_size */ + while (len) { + req_len = (len < sba->hw_buf_size) ? len : sba->hw_buf_size; + + req = sba_prep_dma_memcpy_req(sba, off, dst, src, + req_len, flags); + if (!req) { + if (first) + sba_free_chained_requests(first); + return NULL; + } + + if (first) + sba_chain_request(first, req); + else + first = req; + + off += req_len; + len -= req_len; + } + + return (first) ? &first->tx : NULL; +} + +static void sba_fillup_xor_msg(struct sba_request *req, + struct brcm_sba_command *cmds, + struct brcm_message *msg, + dma_addr_t msg_offset, size_t msg_len, + dma_addr_t dst, dma_addr_t *src, u32 src_cnt) +{ + u64 cmd; + u32 c_mdata; + unsigned int i; + dma_addr_t resp_dma = req->tx.phys; + struct brcm_sba_command *cmdsp = cmds; + + /* Type-B command to load data into buf0 */ + cmd = sba_cmd_enc(0x0, SBA_TYPE_B, + SBA_TYPE_SHIFT, SBA_TYPE_MASK); + cmd = sba_cmd_enc(cmd, msg_len, + SBA_USER_DEF_SHIFT, SBA_USER_DEF_MASK); + c_mdata = sba_cmd_load_c_mdata(0); + cmd = sba_cmd_enc(cmd, SBA_C_MDATA_LS(c_mdata), + SBA_C_MDATA_SHIFT, SBA_C_MDATA_MASK); + cmd = sba_cmd_enc(cmd, SBA_CMD_LOAD_BUFFER, + SBA_CMD_SHIFT, SBA_CMD_MASK); + cmdsp->cmd = cmd; + *cmdsp->cmd_dma = cpu_to_le64(cmd); + cmdsp->flags = BRCM_SBA_CMD_TYPE_B; + cmdsp->data = src[0] + msg_offset; + cmdsp->data_len = msg_len; + cmdsp++; + + /* Type-B commands to xor data with buf0 and put it back in buf0 */ + for (i = 1; i < src_cnt; i++) { + cmd = sba_cmd_enc(0x0, SBA_TYPE_B, + SBA_TYPE_SHIFT, SBA_TYPE_MASK); + cmd = sba_cmd_enc(cmd, msg_len, + SBA_USER_DEF_SHIFT, SBA_USER_DEF_MASK); + c_mdata = sba_cmd_xor_c_mdata(0, 0); + cmd = sba_cmd_enc(cmd, SBA_C_MDATA_LS(c_mdata), + SBA_C_MDATA_SHIFT, SBA_C_MDATA_MASK); + cmd = sba_cmd_enc(cmd, SBA_CMD_XOR, + SBA_CMD_SHIFT, SBA_CMD_MASK); + cmdsp->cmd = cmd; + *cmdsp->cmd_dma = cpu_to_le64(cmd); + cmdsp->flags = BRCM_SBA_CMD_TYPE_B; + cmdsp->data = src[i] + msg_offset; + cmdsp->data_len = msg_len; + cmdsp++; + } + + /* Type-A command to write buf0 */ + cmd = sba_cmd_enc(0x0, SBA_TYPE_A, + SBA_TYPE_SHIFT, SBA_TYPE_MASK); + cmd = sba_cmd_enc(cmd, msg_len, + SBA_USER_DEF_SHIFT, SBA_USER_DEF_MASK); + cmd = sba_cmd_enc(cmd, 0x1, + SBA_RESP_SHIFT, SBA_RESP_MASK); + c_mdata = sba_cmd_write_c_mdata(0); + cmd = sba_cmd_enc(cmd, SBA_C_MDATA_LS(c_mdata), + SBA_C_MDATA_SHIFT, SBA_C_MDATA_MASK); + cmd = sba_cmd_enc(cmd, SBA_CMD_WRITE_BUFFER, + SBA_CMD_SHIFT, SBA_CMD_MASK); + cmdsp->cmd = cmd; + *cmdsp->cmd_dma = cpu_to_le64(cmd); + cmdsp->flags = BRCM_SBA_CMD_TYPE_A; + if (req->sba->hw_resp_size) { + cmdsp->flags |= BRCM_SBA_CMD_HAS_RESP; + cmdsp->resp = resp_dma; + cmdsp->resp_len = req->sba->hw_resp_size; + } + cmdsp->flags |= BRCM_SBA_CMD_HAS_OUTPUT; + cmdsp->data = dst + msg_offset; + cmdsp->data_len = msg_len; + cmdsp++; + + /* Fillup brcm_message */ + msg->type = BRCM_MESSAGE_SBA; + msg->sba.cmds = cmds; + msg->sba.cmds_count = cmdsp - cmds; + msg->ctx = req; + msg->error = 0; +} + +static struct sba_request * +sba_prep_dma_xor_req(struct sba_device *sba, + dma_addr_t off, dma_addr_t dst, dma_addr_t *src, + u32 src_cnt, size_t len, unsigned long flags) +{ + struct sba_request *req = NULL; + + /* Alloc new request */ + req = sba_alloc_request(sba); + if (!req) + return NULL; + if (flags & DMA_PREP_FENCE) + req->flags |= SBA_REQUEST_FENCE; + + /* Fillup request message */ + sba_fillup_xor_msg(req, req->cmds, &req->msg, + off, len, dst, src, src_cnt); + + /* Init async_tx descriptor */ + req->tx.flags = flags; + req->tx.cookie = -EBUSY; + + return req; +} + +static struct dma_async_tx_descriptor * +sba_prep_dma_xor(struct dma_chan *dchan, dma_addr_t dst, dma_addr_t *src, + u32 src_cnt, size_t len, unsigned long flags) +{ + size_t req_len; + dma_addr_t off = 0; + struct sba_device *sba = to_sba_device(dchan); + struct sba_request *first = NULL, *req; + + /* Sanity checks */ + if (unlikely(src_cnt > sba->max_xor_srcs)) + return NULL; + + /* Create chained requests where each request is upto hw_buf_size */ + while (len) { + req_len = (len < sba->hw_buf_size) ? len : sba->hw_buf_size; + + req = sba_prep_dma_xor_req(sba, off, dst, src, src_cnt, + req_len, flags); + if (!req) { + if (first) + sba_free_chained_requests(first); + return NULL; + } + + if (first) + sba_chain_request(first, req); + else + first = req; + + off += req_len; + len -= req_len; + } + + return (first) ? &first->tx : NULL; +} + +static void sba_fillup_pq_msg(struct sba_request *req, + bool pq_continue, + struct brcm_sba_command *cmds, + struct brcm_message *msg, + dma_addr_t msg_offset, size_t msg_len, + dma_addr_t *dst_p, dma_addr_t *dst_q, + const u8 *scf, dma_addr_t *src, u32 src_cnt) +{ + u64 cmd; + u32 c_mdata; + unsigned int i; + dma_addr_t resp_dma = req->tx.phys; + struct brcm_sba_command *cmdsp = cmds; + + if (pq_continue) { + /* Type-B command to load old P into buf0 */ + if (dst_p) { + cmd = sba_cmd_enc(0x0, SBA_TYPE_B, + SBA_TYPE_SHIFT, SBA_TYPE_MASK); + cmd = sba_cmd_enc(cmd, msg_len, + SBA_USER_DEF_SHIFT, SBA_USER_DEF_MASK); + c_mdata = sba_cmd_load_c_mdata(0); + cmd = sba_cmd_enc(cmd, SBA_C_MDATA_LS(c_mdata), + SBA_C_MDATA_SHIFT, SBA_C_MDATA_MASK); + cmd = sba_cmd_enc(cmd, SBA_CMD_LOAD_BUFFER, + SBA_CMD_SHIFT, SBA_CMD_MASK); + cmdsp->cmd = cmd; + *cmdsp->cmd_dma = cpu_to_le64(cmd); + cmdsp->flags = BRCM_SBA_CMD_TYPE_B; + cmdsp->data = *dst_p + msg_offset; + cmdsp->data_len = msg_len; + cmdsp++; + } + + /* Type-B command to load old Q into buf1 */ + if (dst_q) { + cmd = sba_cmd_enc(0x0, SBA_TYPE_B, + SBA_TYPE_SHIFT, SBA_TYPE_MASK); + cmd = sba_cmd_enc(cmd, msg_len, + SBA_USER_DEF_SHIFT, SBA_USER_DEF_MASK); + c_mdata = sba_cmd_load_c_mdata(1); + cmd = sba_cmd_enc(cmd, SBA_C_MDATA_LS(c_mdata), + SBA_C_MDATA_SHIFT, SBA_C_MDATA_MASK); + cmd = sba_cmd_enc(cmd, SBA_CMD_LOAD_BUFFER, + SBA_CMD_SHIFT, SBA_CMD_MASK); + cmdsp->cmd = cmd; + *cmdsp->cmd_dma = cpu_to_le64(cmd); + cmdsp->flags = BRCM_SBA_CMD_TYPE_B; + cmdsp->data = *dst_q + msg_offset; + cmdsp->data_len = msg_len; + cmdsp++; + } + } else { + /* Type-A command to zero all buffers */ + cmd = sba_cmd_enc(0x0, SBA_TYPE_A, + SBA_TYPE_SHIFT, SBA_TYPE_MASK); + cmd = sba_cmd_enc(cmd, msg_len, + SBA_USER_DEF_SHIFT, SBA_USER_DEF_MASK); + cmd = sba_cmd_enc(cmd, SBA_CMD_ZERO_ALL_BUFFERS, + SBA_CMD_SHIFT, SBA_CMD_MASK); + cmdsp->cmd = cmd; + *cmdsp->cmd_dma = cpu_to_le64(cmd); + cmdsp->flags = BRCM_SBA_CMD_TYPE_A; + cmdsp++; + } + + /* Type-B commands for generate P onto buf0 and Q onto buf1 */ + for (i = 0; i < src_cnt; i++) { + cmd = sba_cmd_enc(0x0, SBA_TYPE_B, + SBA_TYPE_SHIFT, SBA_TYPE_MASK); + cmd = sba_cmd_enc(cmd, msg_len, + SBA_USER_DEF_SHIFT, SBA_USER_DEF_MASK); + c_mdata = sba_cmd_pq_c_mdata(raid6_gflog[scf[i]], 1, 0); + cmd = sba_cmd_enc(cmd, SBA_C_MDATA_LS(c_mdata), + SBA_C_MDATA_SHIFT, SBA_C_MDATA_MASK); + cmd = sba_cmd_enc(cmd, SBA_C_MDATA_MS(c_mdata), + SBA_C_MDATA_MS_SHIFT, SBA_C_MDATA_MS_MASK); + cmd = sba_cmd_enc(cmd, SBA_CMD_GALOIS_XOR, + SBA_CMD_SHIFT, SBA_CMD_MASK); + cmdsp->cmd = cmd; + *cmdsp->cmd_dma = cpu_to_le64(cmd); + cmdsp->flags = BRCM_SBA_CMD_TYPE_B; + cmdsp->data = src[i] + msg_offset; + cmdsp->data_len = msg_len; + cmdsp++; + } + + /* Type-A command to write buf0 */ + if (dst_p) { + cmd = sba_cmd_enc(0x0, SBA_TYPE_A, + SBA_TYPE_SHIFT, SBA_TYPE_MASK); + cmd = sba_cmd_enc(cmd, msg_len, + SBA_USER_DEF_SHIFT, SBA_USER_DEF_MASK); + cmd = sba_cmd_enc(cmd, 0x1, + SBA_RESP_SHIFT, SBA_RESP_MASK); + c_mdata = sba_cmd_write_c_mdata(0); + cmd = sba_cmd_enc(cmd, SBA_C_MDATA_LS(c_mdata), + SBA_C_MDATA_SHIFT, SBA_C_MDATA_MASK); + cmd = sba_cmd_enc(cmd, SBA_CMD_WRITE_BUFFER, + SBA_CMD_SHIFT, SBA_CMD_MASK); + cmdsp->cmd = cmd; + *cmdsp->cmd_dma = cpu_to_le64(cmd); + cmdsp->flags = BRCM_SBA_CMD_TYPE_A; + if (req->sba->hw_resp_size) { + cmdsp->flags |= BRCM_SBA_CMD_HAS_RESP; + cmdsp->resp = resp_dma; + cmdsp->resp_len = req->sba->hw_resp_size; + } + cmdsp->flags |= BRCM_SBA_CMD_HAS_OUTPUT; + cmdsp->data = *dst_p + msg_offset; + cmdsp->data_len = msg_len; + cmdsp++; + } + + /* Type-A command to write buf1 */ + if (dst_q) { + cmd = sba_cmd_enc(0x0, SBA_TYPE_A, + SBA_TYPE_SHIFT, SBA_TYPE_MASK); + cmd = sba_cmd_enc(cmd, msg_len, + SBA_USER_DEF_SHIFT, SBA_USER_DEF_MASK); + cmd = sba_cmd_enc(cmd, 0x1, + SBA_RESP_SHIFT, SBA_RESP_MASK); + c_mdata = sba_cmd_write_c_mdata(1); + cmd = sba_cmd_enc(cmd, SBA_C_MDATA_LS(c_mdata), + SBA_C_MDATA_SHIFT, SBA_C_MDATA_MASK); + cmd = sba_cmd_enc(cmd, SBA_CMD_WRITE_BUFFER, + SBA_CMD_SHIFT, SBA_CMD_MASK); + cmdsp->cmd = cmd; + *cmdsp->cmd_dma = cpu_to_le64(cmd); + cmdsp->flags = BRCM_SBA_CMD_TYPE_A; + if (req->sba->hw_resp_size) { + cmdsp->flags |= BRCM_SBA_CMD_HAS_RESP; + cmdsp->resp = resp_dma; + cmdsp->resp_len = req->sba->hw_resp_size; + } + cmdsp->flags |= BRCM_SBA_CMD_HAS_OUTPUT; + cmdsp->data = *dst_q + msg_offset; + cmdsp->data_len = msg_len; + cmdsp++; + } + + /* Fillup brcm_message */ + msg->type = BRCM_MESSAGE_SBA; + msg->sba.cmds = cmds; + msg->sba.cmds_count = cmdsp - cmds; + msg->ctx = req; + msg->error = 0; +} + +static struct sba_request * +sba_prep_dma_pq_req(struct sba_device *sba, dma_addr_t off, + dma_addr_t *dst_p, dma_addr_t *dst_q, dma_addr_t *src, + u32 src_cnt, const u8 *scf, size_t len, unsigned long flags) +{ + struct sba_request *req = NULL; + + /* Alloc new request */ + req = sba_alloc_request(sba); + if (!req) + return NULL; + if (flags & DMA_PREP_FENCE) + req->flags |= SBA_REQUEST_FENCE; + + /* Fillup request messages */ + sba_fillup_pq_msg(req, dmaf_continue(flags), + req->cmds, &req->msg, + off, len, dst_p, dst_q, scf, src, src_cnt); + + /* Init async_tx descriptor */ + req->tx.flags = flags; + req->tx.cookie = -EBUSY; + + return req; +} + +static void sba_fillup_pq_single_msg(struct sba_request *req, + bool pq_continue, + struct brcm_sba_command *cmds, + struct brcm_message *msg, + dma_addr_t msg_offset, size_t msg_len, + dma_addr_t *dst_p, dma_addr_t *dst_q, + dma_addr_t src, u8 scf) +{ + u64 cmd; + u32 c_mdata; + u8 pos, dpos = raid6_gflog[scf]; + dma_addr_t resp_dma = req->tx.phys; + struct brcm_sba_command *cmdsp = cmds; + + if (!dst_p) + goto skip_p; + + if (pq_continue) { + /* Type-B command to load old P into buf0 */ + cmd = sba_cmd_enc(0x0, SBA_TYPE_B, + SBA_TYPE_SHIFT, SBA_TYPE_MASK); + cmd = sba_cmd_enc(cmd, msg_len, + SBA_USER_DEF_SHIFT, SBA_USER_DEF_MASK); + c_mdata = sba_cmd_load_c_mdata(0); + cmd = sba_cmd_enc(cmd, SBA_C_MDATA_LS(c_mdata), + SBA_C_MDATA_SHIFT, SBA_C_MDATA_MASK); + cmd = sba_cmd_enc(cmd, SBA_CMD_LOAD_BUFFER, + SBA_CMD_SHIFT, SBA_CMD_MASK); + cmdsp->cmd = cmd; + *cmdsp->cmd_dma = cpu_to_le64(cmd); + cmdsp->flags = BRCM_SBA_CMD_TYPE_B; + cmdsp->data = *dst_p + msg_offset; + cmdsp->data_len = msg_len; + cmdsp++; + + /* + * Type-B commands to xor data with buf0 and put it + * back in buf0 + */ + cmd = sba_cmd_enc(0x0, SBA_TYPE_B, + SBA_TYPE_SHIFT, SBA_TYPE_MASK); + cmd = sba_cmd_enc(cmd, msg_len, + SBA_USER_DEF_SHIFT, SBA_USER_DEF_MASK); + c_mdata = sba_cmd_xor_c_mdata(0, 0); + cmd = sba_cmd_enc(cmd, SBA_C_MDATA_LS(c_mdata), + SBA_C_MDATA_SHIFT, SBA_C_MDATA_MASK); + cmd = sba_cmd_enc(cmd, SBA_CMD_XOR, + SBA_CMD_SHIFT, SBA_CMD_MASK); + cmdsp->cmd = cmd; + *cmdsp->cmd_dma = cpu_to_le64(cmd); + cmdsp->flags = BRCM_SBA_CMD_TYPE_B; + cmdsp->data = src + msg_offset; + cmdsp->data_len = msg_len; + cmdsp++; + } else { + /* Type-B command to load old P into buf0 */ + cmd = sba_cmd_enc(0x0, SBA_TYPE_B, + SBA_TYPE_SHIFT, SBA_TYPE_MASK); + cmd = sba_cmd_enc(cmd, msg_len, + SBA_USER_DEF_SHIFT, SBA_USER_DEF_MASK); + c_mdata = sba_cmd_load_c_mdata(0); + cmd = sba_cmd_enc(cmd, SBA_C_MDATA_LS(c_mdata), + SBA_C_MDATA_SHIFT, SBA_C_MDATA_MASK); + cmd = sba_cmd_enc(cmd, SBA_CMD_LOAD_BUFFER, + SBA_CMD_SHIFT, SBA_CMD_MASK); + cmdsp->cmd = cmd; + *cmdsp->cmd_dma = cpu_to_le64(cmd); + cmdsp->flags = BRCM_SBA_CMD_TYPE_B; + cmdsp->data = src + msg_offset; + cmdsp->data_len = msg_len; + cmdsp++; + } + + /* Type-A command to write buf0 */ + cmd = sba_cmd_enc(0x0, SBA_TYPE_A, + SBA_TYPE_SHIFT, SBA_TYPE_MASK); + cmd = sba_cmd_enc(cmd, msg_len, + SBA_USER_DEF_SHIFT, SBA_USER_DEF_MASK); + cmd = sba_cmd_enc(cmd, 0x1, + SBA_RESP_SHIFT, SBA_RESP_MASK); + c_mdata = sba_cmd_write_c_mdata(0); + cmd = sba_cmd_enc(cmd, SBA_C_MDATA_LS(c_mdata), + SBA_C_MDATA_SHIFT, SBA_C_MDATA_MASK); + cmd = sba_cmd_enc(cmd, SBA_CMD_WRITE_BUFFER, + SBA_CMD_SHIFT, SBA_CMD_MASK); + cmdsp->cmd = cmd; + *cmdsp->cmd_dma = cpu_to_le64(cmd); + cmdsp->flags = BRCM_SBA_CMD_TYPE_A; + if (req->sba->hw_resp_size) { + cmdsp->flags |= BRCM_SBA_CMD_HAS_RESP; + cmdsp->resp = resp_dma; + cmdsp->resp_len = req->sba->hw_resp_size; + } + cmdsp->flags |= BRCM_SBA_CMD_HAS_OUTPUT; + cmdsp->data = *dst_p + msg_offset; + cmdsp->data_len = msg_len; + cmdsp++; + +skip_p: + if (!dst_q) + goto skip_q; + + /* Type-A command to zero all buffers */ + cmd = sba_cmd_enc(0x0, SBA_TYPE_A, + SBA_TYPE_SHIFT, SBA_TYPE_MASK); + cmd = sba_cmd_enc(cmd, msg_len, + SBA_USER_DEF_SHIFT, SBA_USER_DEF_MASK); + cmd = sba_cmd_enc(cmd, SBA_CMD_ZERO_ALL_BUFFERS, + SBA_CMD_SHIFT, SBA_CMD_MASK); + cmdsp->cmd = cmd; + *cmdsp->cmd_dma = cpu_to_le64(cmd); + cmdsp->flags = BRCM_SBA_CMD_TYPE_A; + cmdsp++; + + if (dpos == 255) + goto skip_q_computation; + pos = (dpos < req->sba->max_pq_coefs) ? + dpos : (req->sba->max_pq_coefs - 1); + + /* + * Type-B command to generate initial Q from data + * and store output into buf0 + */ + cmd = sba_cmd_enc(0x0, SBA_TYPE_B, + SBA_TYPE_SHIFT, SBA_TYPE_MASK); + cmd = sba_cmd_enc(cmd, msg_len, + SBA_USER_DEF_SHIFT, SBA_USER_DEF_MASK); + c_mdata = sba_cmd_pq_c_mdata(pos, 0, 0); + cmd = sba_cmd_enc(cmd, SBA_C_MDATA_LS(c_mdata), + SBA_C_MDATA_SHIFT, SBA_C_MDATA_MASK); + cmd = sba_cmd_enc(cmd, SBA_C_MDATA_MS(c_mdata), + SBA_C_MDATA_MS_SHIFT, SBA_C_MDATA_MS_MASK); + cmd = sba_cmd_enc(cmd, SBA_CMD_GALOIS, + SBA_CMD_SHIFT, SBA_CMD_MASK); + cmdsp->cmd = cmd; + *cmdsp->cmd_dma = cpu_to_le64(cmd); + cmdsp->flags = BRCM_SBA_CMD_TYPE_B; + cmdsp->data = src + msg_offset; + cmdsp->data_len = msg_len; + cmdsp++; + + dpos -= pos; + + /* Multiple Type-A command to generate final Q */ + while (dpos) { + pos = (dpos < req->sba->max_pq_coefs) ? + dpos : (req->sba->max_pq_coefs - 1); + + /* + * Type-A command to generate Q with buf0 and + * buf1 store result in buf0 + */ + cmd = sba_cmd_enc(0x0, SBA_TYPE_A, + SBA_TYPE_SHIFT, SBA_TYPE_MASK); + cmd = sba_cmd_enc(cmd, msg_len, + SBA_USER_DEF_SHIFT, SBA_USER_DEF_MASK); + c_mdata = sba_cmd_pq_c_mdata(pos, 0, 1); + cmd = sba_cmd_enc(cmd, SBA_C_MDATA_LS(c_mdata), + SBA_C_MDATA_SHIFT, SBA_C_MDATA_MASK); + cmd = sba_cmd_enc(cmd, SBA_C_MDATA_MS(c_mdata), + SBA_C_MDATA_MS_SHIFT, SBA_C_MDATA_MS_MASK); + cmd = sba_cmd_enc(cmd, SBA_CMD_GALOIS, + SBA_CMD_SHIFT, SBA_CMD_MASK); + cmdsp->cmd = cmd; + *cmdsp->cmd_dma = cpu_to_le64(cmd); + cmdsp->flags = BRCM_SBA_CMD_TYPE_A; + cmdsp++; + + dpos -= pos; + } + +skip_q_computation: + if (pq_continue) { + /* + * Type-B command to XOR previous output with + * buf0 and write it into buf0 + */ + cmd = sba_cmd_enc(0x0, SBA_TYPE_B, + SBA_TYPE_SHIFT, SBA_TYPE_MASK); + cmd = sba_cmd_enc(cmd, msg_len, + SBA_USER_DEF_SHIFT, SBA_USER_DEF_MASK); + c_mdata = sba_cmd_xor_c_mdata(0, 0); + cmd = sba_cmd_enc(cmd, SBA_C_MDATA_LS(c_mdata), + SBA_C_MDATA_SHIFT, SBA_C_MDATA_MASK); + cmd = sba_cmd_enc(cmd, SBA_CMD_XOR, + SBA_CMD_SHIFT, SBA_CMD_MASK); + cmdsp->cmd = cmd; + *cmdsp->cmd_dma = cpu_to_le64(cmd); + cmdsp->flags = BRCM_SBA_CMD_TYPE_B; + cmdsp->data = *dst_q + msg_offset; + cmdsp->data_len = msg_len; + cmdsp++; + } + + /* Type-A command to write buf0 */ + cmd = sba_cmd_enc(0x0, SBA_TYPE_A, + SBA_TYPE_SHIFT, SBA_TYPE_MASK); + cmd = sba_cmd_enc(cmd, msg_len, + SBA_USER_DEF_SHIFT, SBA_USER_DEF_MASK); + cmd = sba_cmd_enc(cmd, 0x1, + SBA_RESP_SHIFT, SBA_RESP_MASK); + c_mdata = sba_cmd_write_c_mdata(0); + cmd = sba_cmd_enc(cmd, SBA_C_MDATA_LS(c_mdata), + SBA_C_MDATA_SHIFT, SBA_C_MDATA_MASK); + cmd = sba_cmd_enc(cmd, SBA_CMD_WRITE_BUFFER, + SBA_CMD_SHIFT, SBA_CMD_MASK); + cmdsp->cmd = cmd; + *cmdsp->cmd_dma = cpu_to_le64(cmd); + cmdsp->flags = BRCM_SBA_CMD_TYPE_A; + if (req->sba->hw_resp_size) { + cmdsp->flags |= BRCM_SBA_CMD_HAS_RESP; + cmdsp->resp = resp_dma; + cmdsp->resp_len = req->sba->hw_resp_size; + } + cmdsp->flags |= BRCM_SBA_CMD_HAS_OUTPUT; + cmdsp->data = *dst_q + msg_offset; + cmdsp->data_len = msg_len; + cmdsp++; + +skip_q: + /* Fillup brcm_message */ + msg->type = BRCM_MESSAGE_SBA; + msg->sba.cmds = cmds; + msg->sba.cmds_count = cmdsp - cmds; + msg->ctx = req; + msg->error = 0; +} + +static struct sba_request * +sba_prep_dma_pq_single_req(struct sba_device *sba, dma_addr_t off, + dma_addr_t *dst_p, dma_addr_t *dst_q, + dma_addr_t src, u8 scf, size_t len, + unsigned long flags) +{ + struct sba_request *req = NULL; + + /* Alloc new request */ + req = sba_alloc_request(sba); + if (!req) + return NULL; + if (flags & DMA_PREP_FENCE) + req->flags |= SBA_REQUEST_FENCE; + + /* Fillup request messages */ + sba_fillup_pq_single_msg(req, dmaf_continue(flags), + req->cmds, &req->msg, off, len, + dst_p, dst_q, src, scf); + + /* Init async_tx descriptor */ + req->tx.flags = flags; + req->tx.cookie = -EBUSY; + + return req; +} + +static struct dma_async_tx_descriptor * +sba_prep_dma_pq(struct dma_chan *dchan, dma_addr_t *dst, dma_addr_t *src, + u32 src_cnt, const u8 *scf, size_t len, unsigned long flags) +{ + u32 i, dst_q_index; + size_t req_len; + bool slow = false; + dma_addr_t off = 0; + dma_addr_t *dst_p = NULL, *dst_q = NULL; + struct sba_device *sba = to_sba_device(dchan); + struct sba_request *first = NULL, *req; + + /* Sanity checks */ + if (unlikely(src_cnt > sba->max_pq_srcs)) + return NULL; + for (i = 0; i < src_cnt; i++) + if (sba->max_pq_coefs <= raid6_gflog[scf[i]]) + slow = true; + + /* Figure-out P and Q destination addresses */ + if (!(flags & DMA_PREP_PQ_DISABLE_P)) + dst_p = &dst[0]; + if (!(flags & DMA_PREP_PQ_DISABLE_Q)) + dst_q = &dst[1]; + + /* Create chained requests where each request is upto hw_buf_size */ + while (len) { + req_len = (len < sba->hw_buf_size) ? len : sba->hw_buf_size; + + if (slow) { + dst_q_index = src_cnt; + + if (dst_q) { + for (i = 0; i < src_cnt; i++) { + if (*dst_q == src[i]) { + dst_q_index = i; + break; + } + } + } + + if (dst_q_index < src_cnt) { + i = dst_q_index; + req = sba_prep_dma_pq_single_req(sba, + off, dst_p, dst_q, src[i], scf[i], + req_len, flags | DMA_PREP_FENCE); + if (!req) + goto fail; + + if (first) + sba_chain_request(first, req); + else + first = req; + + flags |= DMA_PREP_CONTINUE; + } + + for (i = 0; i < src_cnt; i++) { + if (dst_q_index == i) + continue; + + req = sba_prep_dma_pq_single_req(sba, + off, dst_p, dst_q, src[i], scf[i], + req_len, flags | DMA_PREP_FENCE); + if (!req) + goto fail; + + if (first) + sba_chain_request(first, req); + else + first = req; + + flags |= DMA_PREP_CONTINUE; + } + } else { + req = sba_prep_dma_pq_req(sba, off, + dst_p, dst_q, src, src_cnt, + scf, req_len, flags); + if (!req) + goto fail; + + if (first) + sba_chain_request(first, req); + else + first = req; + } + + off += req_len; + len -= req_len; + } + + return (first) ? &first->tx : NULL; + +fail: + if (first) + sba_free_chained_requests(first); + return NULL; +} + +/* ====== Mailbox callbacks ===== */ + +static void sba_receive_message(struct mbox_client *cl, void *msg) +{ + struct brcm_message *m = msg; + struct sba_request *req = m->ctx; + struct sba_device *sba = req->sba; + + /* Error count if message has error */ + if (m->error < 0) + dev_err(sba->dev, "%s got message with error %d", + dma_chan_name(&sba->dma_chan), m->error); + + /* Process received request */ + sba_process_received_request(sba, req); +} + +/* ====== Debugfs callbacks ====== */ + +static int sba_debugfs_stats_show(struct seq_file *file, void *offset) +{ + struct platform_device *pdev = to_platform_device(file->private); + struct sba_device *sba = platform_get_drvdata(pdev); + + /* Write stats in file */ + sba_write_stats_in_seqfile(sba, file); + + return 0; +} + +/* ====== Platform driver routines ===== */ + +static int sba_prealloc_channel_resources(struct sba_device *sba) +{ + int i, j, ret = 0; + struct sba_request *req = NULL; + + sba->resp_base = dma_alloc_coherent(sba->mbox_dev, + sba->max_resp_pool_size, + &sba->resp_dma_base, GFP_KERNEL); + if (!sba->resp_base) + return -ENOMEM; + + sba->cmds_base = dma_alloc_coherent(sba->mbox_dev, + sba->max_cmds_pool_size, + &sba->cmds_dma_base, GFP_KERNEL); + if (!sba->cmds_base) { + ret = -ENOMEM; + goto fail_free_resp_pool; + } + + spin_lock_init(&sba->reqs_lock); + sba->reqs_fence = false; + INIT_LIST_HEAD(&sba->reqs_alloc_list); + INIT_LIST_HEAD(&sba->reqs_pending_list); + INIT_LIST_HEAD(&sba->reqs_active_list); + INIT_LIST_HEAD(&sba->reqs_aborted_list); + INIT_LIST_HEAD(&sba->reqs_free_list); + + for (i = 0; i < sba->max_req; i++) { + req = devm_kzalloc(sba->dev, + sizeof(*req) + + sba->max_cmd_per_req * sizeof(req->cmds[0]), + GFP_KERNEL); + if (!req) { + ret = -ENOMEM; + goto fail_free_cmds_pool; + } + INIT_LIST_HEAD(&req->node); + req->sba = sba; + req->flags = SBA_REQUEST_STATE_FREE; + INIT_LIST_HEAD(&req->next); + atomic_set(&req->next_pending_count, 0); + for (j = 0; j < sba->max_cmd_per_req; j++) { + req->cmds[j].cmd = 0; + req->cmds[j].cmd_dma = sba->cmds_base + + (i * sba->max_cmd_per_req + j) * sizeof(u64); + req->cmds[j].cmd_dma_addr = sba->cmds_dma_base + + (i * sba->max_cmd_per_req + j) * sizeof(u64); + req->cmds[j].flags = 0; + } + memset(&req->msg, 0, sizeof(req->msg)); + dma_async_tx_descriptor_init(&req->tx, &sba->dma_chan); + async_tx_ack(&req->tx); + req->tx.tx_submit = sba_tx_submit; + req->tx.phys = sba->resp_dma_base + i * sba->hw_resp_size; + list_add_tail(&req->node, &sba->reqs_free_list); + } + + return 0; + +fail_free_cmds_pool: + dma_free_coherent(sba->mbox_dev, + sba->max_cmds_pool_size, + sba->cmds_base, sba->cmds_dma_base); +fail_free_resp_pool: + dma_free_coherent(sba->mbox_dev, + sba->max_resp_pool_size, + sba->resp_base, sba->resp_dma_base); + return ret; +} + +static void sba_freeup_channel_resources(struct sba_device *sba) +{ + dmaengine_terminate_all(&sba->dma_chan); + dma_free_coherent(sba->mbox_dev, sba->max_cmds_pool_size, + sba->cmds_base, sba->cmds_dma_base); + dma_free_coherent(sba->mbox_dev, sba->max_resp_pool_size, + sba->resp_base, sba->resp_dma_base); + sba->resp_base = NULL; + sba->resp_dma_base = 0; +} + +static int sba_async_register(struct sba_device *sba) +{ + int ret; + struct dma_device *dma_dev = &sba->dma_dev; + + /* Initialize DMA channel cookie */ + sba->dma_chan.device = dma_dev; + dma_cookie_init(&sba->dma_chan); + + /* Initialize DMA device capability mask */ + dma_cap_zero(dma_dev->cap_mask); + dma_cap_set(DMA_INTERRUPT, dma_dev->cap_mask); + dma_cap_set(DMA_MEMCPY, dma_dev->cap_mask); + dma_cap_set(DMA_XOR, dma_dev->cap_mask); + dma_cap_set(DMA_PQ, dma_dev->cap_mask); + + /* + * Set mailbox channel device as the base device of + * our dma_device because the actual memory accesses + * will be done by mailbox controller + */ + dma_dev->dev = sba->mbox_dev; + + /* Set base prep routines */ + dma_dev->device_free_chan_resources = sba_free_chan_resources; + dma_dev->device_terminate_all = sba_device_terminate_all; + dma_dev->device_issue_pending = sba_issue_pending; + dma_dev->device_tx_status = sba_tx_status; + + /* Set interrupt routine */ + if (dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask)) + dma_dev->device_prep_dma_interrupt = sba_prep_dma_interrupt; + + /* Set memcpy routine */ + if (dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask)) + dma_dev->device_prep_dma_memcpy = sba_prep_dma_memcpy; + + /* Set xor routine and capability */ + if (dma_has_cap(DMA_XOR, dma_dev->cap_mask)) { + dma_dev->device_prep_dma_xor = sba_prep_dma_xor; + dma_dev->max_xor = sba->max_xor_srcs; + } + + /* Set pq routine and capability */ + if (dma_has_cap(DMA_PQ, dma_dev->cap_mask)) { + dma_dev->device_prep_dma_pq = sba_prep_dma_pq; + dma_set_maxpq(dma_dev, sba->max_pq_srcs, 0); + } + + /* Initialize DMA device channel list */ + INIT_LIST_HEAD(&dma_dev->channels); + list_add_tail(&sba->dma_chan.device_node, &dma_dev->channels); + + /* Register with Linux async DMA framework*/ + ret = dma_async_device_register(dma_dev); + if (ret) { + dev_err(sba->dev, "async device register error %d", ret); + return ret; + } + + dev_info(sba->dev, "%s capabilities: %s%s%s%s\n", + dma_chan_name(&sba->dma_chan), + dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask) ? "interrupt " : "", + dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask) ? "memcpy " : "", + dma_has_cap(DMA_XOR, dma_dev->cap_mask) ? "xor " : "", + dma_has_cap(DMA_PQ, dma_dev->cap_mask) ? "pq " : ""); + + return 0; +} + +static int sba_probe(struct platform_device *pdev) +{ + int i, ret = 0, mchans_count; + struct sba_device *sba; + struct platform_device *mbox_pdev; + struct of_phandle_args args; + + /* Allocate main SBA struct */ + sba = devm_kzalloc(&pdev->dev, sizeof(*sba), GFP_KERNEL); + if (!sba) + return -ENOMEM; + + sba->dev = &pdev->dev; + platform_set_drvdata(pdev, sba); + + /* Number of channels equals number of mailbox channels */ + ret = of_count_phandle_with_args(pdev->dev.of_node, + "mboxes", "#mbox-cells"); + if (ret <= 0) + return -ENODEV; + mchans_count = ret; + + /* Determine SBA version from DT compatible string */ + if (of_device_is_compatible(sba->dev->of_node, "brcm,iproc-sba")) + sba->ver = SBA_VER_1; + else if (of_device_is_compatible(sba->dev->of_node, + "brcm,iproc-sba-v2")) + sba->ver = SBA_VER_2; + else + return -ENODEV; + + /* Derived Configuration parameters */ + switch (sba->ver) { + case SBA_VER_1: + sba->hw_buf_size = 4096; + sba->hw_resp_size = 8; + sba->max_pq_coefs = 6; + sba->max_pq_srcs = 6; + break; + case SBA_VER_2: + sba->hw_buf_size = 4096; + sba->hw_resp_size = 8; + sba->max_pq_coefs = 30; + /* + * We can support max_pq_srcs == max_pq_coefs because + * we are limited by number of SBA commands that we can + * fit in one message for underlying ring manager HW. + */ + sba->max_pq_srcs = 12; + break; + default: + return -EINVAL; + } + sba->max_req = SBA_MAX_REQ_PER_MBOX_CHANNEL * mchans_count; + sba->max_cmd_per_req = sba->max_pq_srcs + 3; + sba->max_xor_srcs = sba->max_cmd_per_req - 1; + sba->max_resp_pool_size = sba->max_req * sba->hw_resp_size; + sba->max_cmds_pool_size = sba->max_req * + sba->max_cmd_per_req * sizeof(u64); + + /* Setup mailbox client */ + sba->client.dev = &pdev->dev; + sba->client.rx_callback = sba_receive_message; + sba->client.tx_block = false; + sba->client.knows_txdone = true; + sba->client.tx_tout = 0; + + /* Allocate mailbox channel array */ + sba->mchans = devm_kcalloc(&pdev->dev, mchans_count, + sizeof(*sba->mchans), GFP_KERNEL); + if (!sba->mchans) + return -ENOMEM; + + /* Request mailbox channels */ + sba->mchans_count = 0; + for (i = 0; i < mchans_count; i++) { + sba->mchans[i] = mbox_request_channel(&sba->client, i); + if (IS_ERR(sba->mchans[i])) { + ret = PTR_ERR(sba->mchans[i]); + goto fail_free_mchans; + } + sba->mchans_count++; + } + atomic_set(&sba->mchans_current, 0); + + /* Find-out underlying mailbox device */ + ret = of_parse_phandle_with_args(pdev->dev.of_node, + "mboxes", "#mbox-cells", 0, &args); + if (ret) + goto fail_free_mchans; + mbox_pdev = of_find_device_by_node(args.np); + of_node_put(args.np); + if (!mbox_pdev) { + ret = -ENODEV; + goto fail_free_mchans; + } + sba->mbox_dev = &mbox_pdev->dev; + + /* All mailbox channels should be of same ring manager device */ + for (i = 1; i < mchans_count; i++) { + ret = of_parse_phandle_with_args(pdev->dev.of_node, + "mboxes", "#mbox-cells", i, &args); + if (ret) + goto fail_free_mchans; + mbox_pdev = of_find_device_by_node(args.np); + of_node_put(args.np); + if (sba->mbox_dev != &mbox_pdev->dev) { + ret = -EINVAL; + goto fail_free_mchans; + } + } + + /* Prealloc channel resource */ + ret = sba_prealloc_channel_resources(sba); + if (ret) + goto fail_free_mchans; + + /* Check availability of debugfs */ + if (!debugfs_initialized()) + goto skip_debugfs; + + /* Create debugfs root entry */ + sba->root = debugfs_create_dir(dev_name(sba->dev), NULL); + if (IS_ERR_OR_NULL(sba->root)) { + dev_err(sba->dev, "failed to create debugfs root entry\n"); + sba->root = NULL; + goto skip_debugfs; + } + + /* Create debugfs stats entry */ + sba->stats = debugfs_create_devm_seqfile(sba->dev, "stats", sba->root, + sba_debugfs_stats_show); + if (IS_ERR_OR_NULL(sba->stats)) + dev_err(sba->dev, "failed to create debugfs stats file\n"); +skip_debugfs: + + /* Register DMA device with Linux async framework */ + ret = sba_async_register(sba); + if (ret) + goto fail_free_resources; + + /* Print device info */ + dev_info(sba->dev, "%s using SBAv%d and %d mailbox channels", + dma_chan_name(&sba->dma_chan), sba->ver+1, + sba->mchans_count); + + return 0; + +fail_free_resources: + debugfs_remove_recursive(sba->root); + sba_freeup_channel_resources(sba); +fail_free_mchans: + for (i = 0; i < sba->mchans_count; i++) + mbox_free_channel(sba->mchans[i]); + return ret; +} + +static int sba_remove(struct platform_device *pdev) +{ + int i; + struct sba_device *sba = platform_get_drvdata(pdev); + + dma_async_device_unregister(&sba->dma_dev); + + debugfs_remove_recursive(sba->root); + + sba_freeup_channel_resources(sba); + + for (i = 0; i < sba->mchans_count; i++) + mbox_free_channel(sba->mchans[i]); + + return 0; +} + +static const struct of_device_id sba_of_match[] = { + { .compatible = "brcm,iproc-sba", }, + { .compatible = "brcm,iproc-sba-v2", }, + {}, +}; +MODULE_DEVICE_TABLE(of, sba_of_match); + +static struct platform_driver sba_driver = { + .probe = sba_probe, + .remove = sba_remove, + .driver = { + .name = "bcm-sba-raid", + .of_match_table = sba_of_match, + }, +}; +module_platform_driver(sba_driver); + +MODULE_DESCRIPTION("Broadcom SBA RAID driver"); +MODULE_AUTHOR("Anup Patel <anup.patel@broadcom.com>"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/dma/bestcomm/Makefile b/drivers/dma/bestcomm/Makefile index aed2df2a6580..8d1b33a2f0a1 100644 --- a/drivers/dma/bestcomm/Makefile +++ b/drivers/dma/bestcomm/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 # # Makefile for BestComm & co # diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c index d9118ec23025..b451354735d3 100644 --- a/drivers/dma/dmaengine.c +++ b/drivers/dma/dmaengine.c @@ -923,30 +923,85 @@ int dma_async_device_register(struct dma_device *device) return -ENODEV; /* validate device routines */ - BUG_ON(dma_has_cap(DMA_MEMCPY, device->cap_mask) && - !device->device_prep_dma_memcpy); - BUG_ON(dma_has_cap(DMA_XOR, device->cap_mask) && - !device->device_prep_dma_xor); - BUG_ON(dma_has_cap(DMA_XOR_VAL, device->cap_mask) && - !device->device_prep_dma_xor_val); - BUG_ON(dma_has_cap(DMA_PQ, device->cap_mask) && - !device->device_prep_dma_pq); - BUG_ON(dma_has_cap(DMA_PQ_VAL, device->cap_mask) && - !device->device_prep_dma_pq_val); - BUG_ON(dma_has_cap(DMA_MEMSET, device->cap_mask) && - !device->device_prep_dma_memset); - BUG_ON(dma_has_cap(DMA_INTERRUPT, device->cap_mask) && - !device->device_prep_dma_interrupt); - BUG_ON(dma_has_cap(DMA_SG, device->cap_mask) && - !device->device_prep_dma_sg); - BUG_ON(dma_has_cap(DMA_CYCLIC, device->cap_mask) && - !device->device_prep_dma_cyclic); - BUG_ON(dma_has_cap(DMA_INTERLEAVE, device->cap_mask) && - !device->device_prep_interleaved_dma); - - BUG_ON(!device->device_tx_status); - BUG_ON(!device->device_issue_pending); - BUG_ON(!device->dev); + if (!device->dev) { + pr_err("DMAdevice must have dev\n"); + return -EIO; + } + + if (dma_has_cap(DMA_MEMCPY, device->cap_mask) && !device->device_prep_dma_memcpy) { + dev_err(device->dev, + "Device claims capability %s, but op is not defined\n", + "DMA_MEMCPY"); + return -EIO; + } + + if (dma_has_cap(DMA_XOR, device->cap_mask) && !device->device_prep_dma_xor) { + dev_err(device->dev, + "Device claims capability %s, but op is not defined\n", + "DMA_XOR"); + return -EIO; + } + + if (dma_has_cap(DMA_XOR_VAL, device->cap_mask) && !device->device_prep_dma_xor_val) { + dev_err(device->dev, + "Device claims capability %s, but op is not defined\n", + "DMA_XOR_VAL"); + return -EIO; + } + + if (dma_has_cap(DMA_PQ, device->cap_mask) && !device->device_prep_dma_pq) { + dev_err(device->dev, + "Device claims capability %s, but op is not defined\n", + "DMA_PQ"); + return -EIO; + } + + if (dma_has_cap(DMA_PQ_VAL, device->cap_mask) && !device->device_prep_dma_pq_val) { + dev_err(device->dev, + "Device claims capability %s, but op is not defined\n", + "DMA_PQ_VAL"); + return -EIO; + } + + if (dma_has_cap(DMA_MEMSET, device->cap_mask) && !device->device_prep_dma_memset) { + dev_err(device->dev, + "Device claims capability %s, but op is not defined\n", + "DMA_MEMSET"); + return -EIO; + } + + if (dma_has_cap(DMA_INTERRUPT, device->cap_mask) && !device->device_prep_dma_interrupt) { + dev_err(device->dev, + "Device claims capability %s, but op is not defined\n", + "DMA_INTERRUPT"); + return -EIO; + } + + if (dma_has_cap(DMA_CYCLIC, device->cap_mask) && !device->device_prep_dma_cyclic) { + dev_err(device->dev, + "Device claims capability %s, but op is not defined\n", + "DMA_CYCLIC"); + return -EIO; + } + + if (dma_has_cap(DMA_INTERLEAVE, device->cap_mask) && !device->device_prep_interleaved_dma) { + dev_err(device->dev, + "Device claims capability %s, but op is not defined\n", + "DMA_INTERLEAVE"); + return -EIO; + } + + + if (!device->device_tx_status) { + dev_err(device->dev, "Device tx_status is not defined\n"); + return -EIO; + } + + + if (!device->device_issue_pending) { + dev_err(device->dev, "Device issue_pending is not defined\n"); + return -EIO; + } /* note: this only matters in the * CONFIG_ASYNC_TX_ENABLE_CHANNEL_SWITCH=n case diff --git a/drivers/dma/dmaengine.h b/drivers/dma/dmaengine.h index 882ff9448c3b..501c0b063f85 100644 --- a/drivers/dma/dmaengine.h +++ b/drivers/dma/dmaengine.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * The contents of this file are private to DMA engine drivers, and is not * part of the API to be used by DMA engine users. diff --git a/drivers/dma/dmatest.c b/drivers/dma/dmatest.c index a07ef3d6b3ec..34ff53290b03 100644 --- a/drivers/dma/dmatest.c +++ b/drivers/dma/dmatest.c @@ -52,15 +52,10 @@ module_param(iterations, uint, S_IRUGO | S_IWUSR); MODULE_PARM_DESC(iterations, "Iterations before stopping test (default: infinite)"); -static unsigned int sg_buffers = 1; -module_param(sg_buffers, uint, S_IRUGO | S_IWUSR); -MODULE_PARM_DESC(sg_buffers, - "Number of scatter gather buffers (default: 1)"); - static unsigned int dmatest; module_param(dmatest, uint, S_IRUGO | S_IWUSR); MODULE_PARM_DESC(dmatest, - "dmatest 0-memcpy 1-slave_sg (default: 0)"); + "dmatest 0-memcpy 1-memset (default: 0)"); static unsigned int xor_sources = 3; module_param(xor_sources, uint, S_IRUGO | S_IWUSR); @@ -158,6 +153,7 @@ MODULE_PARM_DESC(run, "Run the test (default: false)"); #define PATTERN_COPY 0x40 #define PATTERN_OVERWRITE 0x20 #define PATTERN_COUNT_MASK 0x1f +#define PATTERN_MEMSET_IDX 0x01 struct dmatest_thread { struct list_head node; @@ -239,46 +235,62 @@ static unsigned long dmatest_random(void) return buf; } +static inline u8 gen_inv_idx(u8 index, bool is_memset) +{ + u8 val = is_memset ? PATTERN_MEMSET_IDX : index; + + return ~val & PATTERN_COUNT_MASK; +} + +static inline u8 gen_src_value(u8 index, bool is_memset) +{ + return PATTERN_SRC | gen_inv_idx(index, is_memset); +} + +static inline u8 gen_dst_value(u8 index, bool is_memset) +{ + return PATTERN_DST | gen_inv_idx(index, is_memset); +} + static void dmatest_init_srcs(u8 **bufs, unsigned int start, unsigned int len, - unsigned int buf_size) + unsigned int buf_size, bool is_memset) { unsigned int i; u8 *buf; for (; (buf = *bufs); bufs++) { for (i = 0; i < start; i++) - buf[i] = PATTERN_SRC | (~i & PATTERN_COUNT_MASK); + buf[i] = gen_src_value(i, is_memset); for ( ; i < start + len; i++) - buf[i] = PATTERN_SRC | PATTERN_COPY - | (~i & PATTERN_COUNT_MASK); + buf[i] = gen_src_value(i, is_memset) | PATTERN_COPY; for ( ; i < buf_size; i++) - buf[i] = PATTERN_SRC | (~i & PATTERN_COUNT_MASK); + buf[i] = gen_src_value(i, is_memset); buf++; } } static void dmatest_init_dsts(u8 **bufs, unsigned int start, unsigned int len, - unsigned int buf_size) + unsigned int buf_size, bool is_memset) { unsigned int i; u8 *buf; for (; (buf = *bufs); bufs++) { for (i = 0; i < start; i++) - buf[i] = PATTERN_DST | (~i & PATTERN_COUNT_MASK); + buf[i] = gen_dst_value(i, is_memset); for ( ; i < start + len; i++) - buf[i] = PATTERN_DST | PATTERN_OVERWRITE - | (~i & PATTERN_COUNT_MASK); + buf[i] = gen_dst_value(i, is_memset) | + PATTERN_OVERWRITE; for ( ; i < buf_size; i++) - buf[i] = PATTERN_DST | (~i & PATTERN_COUNT_MASK); + buf[i] = gen_dst_value(i, is_memset); } } static void dmatest_mismatch(u8 actual, u8 pattern, unsigned int index, - unsigned int counter, bool is_srcbuf) + unsigned int counter, bool is_srcbuf, bool is_memset) { u8 diff = actual ^ pattern; - u8 expected = pattern | (~counter & PATTERN_COUNT_MASK); + u8 expected = pattern | gen_inv_idx(counter, is_memset); const char *thread_name = current->comm; if (is_srcbuf) @@ -298,7 +310,7 @@ static void dmatest_mismatch(u8 actual, u8 pattern, unsigned int index, static unsigned int dmatest_verify(u8 **bufs, unsigned int start, unsigned int end, unsigned int counter, u8 pattern, - bool is_srcbuf) + bool is_srcbuf, bool is_memset) { unsigned int i; unsigned int error_count = 0; @@ -311,11 +323,12 @@ static unsigned int dmatest_verify(u8 **bufs, unsigned int start, counter = counter_orig; for (i = start; i < end; i++) { actual = buf[i]; - expected = pattern | (~counter & PATTERN_COUNT_MASK); + expected = pattern | gen_inv_idx(counter, is_memset); if (actual != expected) { if (error_count < MAX_ERROR_COUNT) dmatest_mismatch(actual, pattern, i, - counter, is_srcbuf); + counter, is_srcbuf, + is_memset); error_count++; } counter++; @@ -435,6 +448,7 @@ static int dmatest_func(void *data) s64 runtime = 0; unsigned long long total_len = 0; u8 align = 0; + bool is_memset = false; set_freezable(); @@ -448,9 +462,10 @@ static int dmatest_func(void *data) if (thread->type == DMA_MEMCPY) { align = dev->copy_align; src_cnt = dst_cnt = 1; - } else if (thread->type == DMA_SG) { - align = dev->copy_align; - src_cnt = dst_cnt = sg_buffers; + } else if (thread->type == DMA_MEMSET) { + align = dev->fill_align; + src_cnt = dst_cnt = 1; + is_memset = true; } else if (thread->type == DMA_XOR) { /* force odd to ensure dst = src */ src_cnt = min_odd(params->xor_sources | 1, dev->max_xor); @@ -530,8 +545,6 @@ static int dmatest_func(void *data) dma_addr_t srcs[src_cnt]; dma_addr_t *dsts; unsigned int src_off, dst_off, len; - struct scatterlist tx_sg[src_cnt]; - struct scatterlist rx_sg[src_cnt]; total_tests++; @@ -571,9 +584,9 @@ static int dmatest_func(void *data) dst_off = (dst_off >> align) << align; dmatest_init_srcs(thread->srcs, src_off, len, - params->buf_size); + params->buf_size, is_memset); dmatest_init_dsts(thread->dsts, dst_off, len, - params->buf_size); + params->buf_size, is_memset); diff = ktime_sub(ktime_get(), start); filltime = ktime_add(filltime, diff); @@ -627,22 +640,15 @@ static int dmatest_func(void *data) um->bidi_cnt++; } - sg_init_table(tx_sg, src_cnt); - sg_init_table(rx_sg, src_cnt); - for (i = 0; i < src_cnt; i++) { - sg_dma_address(&rx_sg[i]) = srcs[i]; - sg_dma_address(&tx_sg[i]) = dsts[i] + dst_off; - sg_dma_len(&tx_sg[i]) = len; - sg_dma_len(&rx_sg[i]) = len; - } - if (thread->type == DMA_MEMCPY) tx = dev->device_prep_dma_memcpy(chan, dsts[0] + dst_off, srcs[0], len, flags); - else if (thread->type == DMA_SG) - tx = dev->device_prep_dma_sg(chan, tx_sg, src_cnt, - rx_sg, src_cnt, flags); + else if (thread->type == DMA_MEMSET) + tx = dev->device_prep_dma_memset(chan, + dsts[0] + dst_off, + *(thread->srcs[0] + src_off), + len, flags); else if (thread->type == DMA_XOR) tx = dev->device_prep_dma_xor(chan, dsts[0] + dst_off, @@ -722,23 +728,25 @@ static int dmatest_func(void *data) start = ktime_get(); pr_debug("%s: verifying source buffer...\n", current->comm); error_count = dmatest_verify(thread->srcs, 0, src_off, - 0, PATTERN_SRC, true); + 0, PATTERN_SRC, true, is_memset); error_count += dmatest_verify(thread->srcs, src_off, src_off + len, src_off, - PATTERN_SRC | PATTERN_COPY, true); + PATTERN_SRC | PATTERN_COPY, true, is_memset); error_count += dmatest_verify(thread->srcs, src_off + len, params->buf_size, src_off + len, - PATTERN_SRC, true); + PATTERN_SRC, true, is_memset); pr_debug("%s: verifying dest buffer...\n", current->comm); error_count += dmatest_verify(thread->dsts, 0, dst_off, - 0, PATTERN_DST, false); + 0, PATTERN_DST, false, is_memset); + error_count += dmatest_verify(thread->dsts, dst_off, dst_off + len, src_off, - PATTERN_SRC | PATTERN_COPY, false); + PATTERN_SRC | PATTERN_COPY, false, is_memset); + error_count += dmatest_verify(thread->dsts, dst_off + len, params->buf_size, dst_off + len, - PATTERN_DST, false); + PATTERN_DST, false, is_memset); diff = ktime_sub(ktime_get(), start); comparetime = ktime_add(comparetime, diff); @@ -821,8 +829,8 @@ static int dmatest_add_threads(struct dmatest_info *info, if (type == DMA_MEMCPY) op = "copy"; - else if (type == DMA_SG) - op = "sg"; + else if (type == DMA_MEMSET) + op = "set"; else if (type == DMA_XOR) op = "xor"; else if (type == DMA_PQ) @@ -883,9 +891,9 @@ static int dmatest_add_channel(struct dmatest_info *info, } } - if (dma_has_cap(DMA_SG, dma_dev->cap_mask)) { + if (dma_has_cap(DMA_MEMSET, dma_dev->cap_mask)) { if (dmatest == 1) { - cnt = dmatest_add_threads(info, dtc, DMA_SG); + cnt = dmatest_add_threads(info, dtc, DMA_MEMSET); thread_count += cnt > 0 ? cnt : 0; } } @@ -961,8 +969,8 @@ static void run_threaded_test(struct dmatest_info *info) params->noverify = noverify; request_channels(info, DMA_MEMCPY); + request_channels(info, DMA_MEMSET); request_channels(info, DMA_XOR); - request_channels(info, DMA_SG); request_channels(info, DMA_PQ); } diff --git a/drivers/dma/dw/Kconfig b/drivers/dma/dw/Kconfig index 5a37b9fcf40d..04b9728c1d26 100644 --- a/drivers/dma/dw/Kconfig +++ b/drivers/dma/dw/Kconfig @@ -6,17 +6,12 @@ config DW_DMAC_CORE tristate select DMA_ENGINE -config DW_DMAC_BIG_ENDIAN_IO - bool - config DW_DMAC tristate "Synopsys DesignWare AHB DMA platform driver" select DW_DMAC_CORE - select DW_DMAC_BIG_ENDIAN_IO if AVR32 - default y if CPU_AT32AP7000 help Support the Synopsys DesignWare AHB DMA controller. This - can be integrated in chips such as the Atmel AT32ap7000. + can be integrated in chips such as the Intel Cherrytrail. config DW_DMAC_PCI tristate "Synopsys DesignWare AHB DMA PCI driver" diff --git a/drivers/dma/dw/Makefile b/drivers/dma/dw/Makefile index 3eebd1ce2c6b..2b949c2e4504 100644 --- a/drivers/dma/dw/Makefile +++ b/drivers/dma/dw/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_DW_DMAC_CORE) += dw_dmac_core.o dw_dmac_core-objs := core.o diff --git a/drivers/dma/dw/core.c b/drivers/dma/dw/core.c index e500950dad82..f43e6dafe446 100644 --- a/drivers/dma/dw/core.c +++ b/drivers/dma/dw/core.c @@ -561,92 +561,14 @@ static void dwc_handle_error(struct dw_dma *dw, struct dw_dma_chan *dwc) dwc_descriptor_complete(dwc, bad_desc, true); } -/* --------------------- Cyclic DMA API extensions -------------------- */ - -dma_addr_t dw_dma_get_src_addr(struct dma_chan *chan) -{ - struct dw_dma_chan *dwc = to_dw_dma_chan(chan); - return channel_readl(dwc, SAR); -} -EXPORT_SYMBOL(dw_dma_get_src_addr); - -dma_addr_t dw_dma_get_dst_addr(struct dma_chan *chan) -{ - struct dw_dma_chan *dwc = to_dw_dma_chan(chan); - return channel_readl(dwc, DAR); -} -EXPORT_SYMBOL(dw_dma_get_dst_addr); - -/* Called with dwc->lock held and all DMAC interrupts disabled */ -static void dwc_handle_cyclic(struct dw_dma *dw, struct dw_dma_chan *dwc, - u32 status_block, u32 status_err, u32 status_xfer) -{ - unsigned long flags; - - if (status_block & dwc->mask) { - void (*callback)(void *param); - void *callback_param; - - dev_vdbg(chan2dev(&dwc->chan), "new cyclic period llp 0x%08x\n", - channel_readl(dwc, LLP)); - dma_writel(dw, CLEAR.BLOCK, dwc->mask); - - callback = dwc->cdesc->period_callback; - callback_param = dwc->cdesc->period_callback_param; - - if (callback) - callback(callback_param); - } - - /* - * Error and transfer complete are highly unlikely, and will most - * likely be due to a configuration error by the user. - */ - if (unlikely(status_err & dwc->mask) || - unlikely(status_xfer & dwc->mask)) { - unsigned int i; - - dev_err(chan2dev(&dwc->chan), - "cyclic DMA unexpected %s interrupt, stopping DMA transfer\n", - status_xfer ? "xfer" : "error"); - - spin_lock_irqsave(&dwc->lock, flags); - - dwc_dump_chan_regs(dwc); - - dwc_chan_disable(dw, dwc); - - /* Make sure DMA does not restart by loading a new list */ - channel_writel(dwc, LLP, 0); - channel_writel(dwc, CTL_LO, 0); - channel_writel(dwc, CTL_HI, 0); - - dma_writel(dw, CLEAR.BLOCK, dwc->mask); - dma_writel(dw, CLEAR.ERROR, dwc->mask); - dma_writel(dw, CLEAR.XFER, dwc->mask); - - for (i = 0; i < dwc->cdesc->periods; i++) - dwc_dump_lli(dwc, dwc->cdesc->desc[i]); - - spin_unlock_irqrestore(&dwc->lock, flags); - } - - /* Re-enable interrupts */ - channel_set_bit(dw, MASK.BLOCK, dwc->mask); -} - -/* ------------------------------------------------------------------------- */ - static void dw_dma_tasklet(unsigned long data) { struct dw_dma *dw = (struct dw_dma *)data; struct dw_dma_chan *dwc; - u32 status_block; u32 status_xfer; u32 status_err; unsigned int i; - status_block = dma_readl(dw, RAW.BLOCK); status_xfer = dma_readl(dw, RAW.XFER); status_err = dma_readl(dw, RAW.ERROR); @@ -655,8 +577,7 @@ static void dw_dma_tasklet(unsigned long data) for (i = 0; i < dw->dma.chancnt; i++) { dwc = &dw->chan[i]; if (test_bit(DW_DMA_IS_CYCLIC, &dwc->flags)) - dwc_handle_cyclic(dw, dwc, status_block, status_err, - status_xfer); + dev_vdbg(dw->dma.dev, "Cyclic xfer is not implemented\n"); else if (status_err & (1 << i)) dwc_handle_error(dw, dwc); else if (status_xfer & (1 << i)) @@ -1264,255 +1185,6 @@ static void dwc_free_chan_resources(struct dma_chan *chan) dev_vdbg(chan2dev(chan), "%s: done\n", __func__); } -/* --------------------- Cyclic DMA API extensions -------------------- */ - -/** - * dw_dma_cyclic_start - start the cyclic DMA transfer - * @chan: the DMA channel to start - * - * Must be called with soft interrupts disabled. Returns zero on success or - * -errno on failure. - */ -int dw_dma_cyclic_start(struct dma_chan *chan) -{ - struct dw_dma_chan *dwc = to_dw_dma_chan(chan); - struct dw_dma *dw = to_dw_dma(chan->device); - unsigned long flags; - - if (!test_bit(DW_DMA_IS_CYCLIC, &dwc->flags)) { - dev_err(chan2dev(&dwc->chan), "missing prep for cyclic DMA\n"); - return -ENODEV; - } - - spin_lock_irqsave(&dwc->lock, flags); - - /* Enable interrupts to perform cyclic transfer */ - channel_set_bit(dw, MASK.BLOCK, dwc->mask); - - dwc_dostart(dwc, dwc->cdesc->desc[0]); - - spin_unlock_irqrestore(&dwc->lock, flags); - - return 0; -} -EXPORT_SYMBOL(dw_dma_cyclic_start); - -/** - * dw_dma_cyclic_stop - stop the cyclic DMA transfer - * @chan: the DMA channel to stop - * - * Must be called with soft interrupts disabled. - */ -void dw_dma_cyclic_stop(struct dma_chan *chan) -{ - struct dw_dma_chan *dwc = to_dw_dma_chan(chan); - struct dw_dma *dw = to_dw_dma(dwc->chan.device); - unsigned long flags; - - spin_lock_irqsave(&dwc->lock, flags); - - dwc_chan_disable(dw, dwc); - - spin_unlock_irqrestore(&dwc->lock, flags); -} -EXPORT_SYMBOL(dw_dma_cyclic_stop); - -/** - * dw_dma_cyclic_prep - prepare the cyclic DMA transfer - * @chan: the DMA channel to prepare - * @buf_addr: physical DMA address where the buffer starts - * @buf_len: total number of bytes for the entire buffer - * @period_len: number of bytes for each period - * @direction: transfer direction, to or from device - * - * Must be called before trying to start the transfer. Returns a valid struct - * dw_cyclic_desc if successful or an ERR_PTR(-errno) if not successful. - */ -struct dw_cyclic_desc *dw_dma_cyclic_prep(struct dma_chan *chan, - dma_addr_t buf_addr, size_t buf_len, size_t period_len, - enum dma_transfer_direction direction) -{ - struct dw_dma_chan *dwc = to_dw_dma_chan(chan); - struct dma_slave_config *sconfig = &dwc->dma_sconfig; - struct dw_cyclic_desc *cdesc; - struct dw_cyclic_desc *retval = NULL; - struct dw_desc *desc; - struct dw_desc *last = NULL; - u8 lms = DWC_LLP_LMS(dwc->dws.m_master); - unsigned long was_cyclic; - unsigned int reg_width; - unsigned int periods; - unsigned int i; - unsigned long flags; - - spin_lock_irqsave(&dwc->lock, flags); - if (dwc->nollp) { - spin_unlock_irqrestore(&dwc->lock, flags); - dev_dbg(chan2dev(&dwc->chan), - "channel doesn't support LLP transfers\n"); - return ERR_PTR(-EINVAL); - } - - if (!list_empty(&dwc->queue) || !list_empty(&dwc->active_list)) { - spin_unlock_irqrestore(&dwc->lock, flags); - dev_dbg(chan2dev(&dwc->chan), - "queue and/or active list are not empty\n"); - return ERR_PTR(-EBUSY); - } - - was_cyclic = test_and_set_bit(DW_DMA_IS_CYCLIC, &dwc->flags); - spin_unlock_irqrestore(&dwc->lock, flags); - if (was_cyclic) { - dev_dbg(chan2dev(&dwc->chan), - "channel already prepared for cyclic DMA\n"); - return ERR_PTR(-EBUSY); - } - - retval = ERR_PTR(-EINVAL); - - if (unlikely(!is_slave_direction(direction))) - goto out_err; - - dwc->direction = direction; - - if (direction == DMA_MEM_TO_DEV) - reg_width = __ffs(sconfig->dst_addr_width); - else - reg_width = __ffs(sconfig->src_addr_width); - - periods = buf_len / period_len; - - /* Check for too big/unaligned periods and unaligned DMA buffer. */ - if (period_len > (dwc->block_size << reg_width)) - goto out_err; - if (unlikely(period_len & ((1 << reg_width) - 1))) - goto out_err; - if (unlikely(buf_addr & ((1 << reg_width) - 1))) - goto out_err; - - retval = ERR_PTR(-ENOMEM); - - cdesc = kzalloc(sizeof(struct dw_cyclic_desc), GFP_KERNEL); - if (!cdesc) - goto out_err; - - cdesc->desc = kzalloc(sizeof(struct dw_desc *) * periods, GFP_KERNEL); - if (!cdesc->desc) - goto out_err_alloc; - - for (i = 0; i < periods; i++) { - desc = dwc_desc_get(dwc); - if (!desc) - goto out_err_desc_get; - - switch (direction) { - case DMA_MEM_TO_DEV: - lli_write(desc, dar, sconfig->dst_addr); - lli_write(desc, sar, buf_addr + period_len * i); - lli_write(desc, ctllo, (DWC_DEFAULT_CTLLO(chan) - | DWC_CTLL_DST_WIDTH(reg_width) - | DWC_CTLL_SRC_WIDTH(reg_width) - | DWC_CTLL_DST_FIX - | DWC_CTLL_SRC_INC - | DWC_CTLL_INT_EN)); - - lli_set(desc, ctllo, sconfig->device_fc ? - DWC_CTLL_FC(DW_DMA_FC_P_M2P) : - DWC_CTLL_FC(DW_DMA_FC_D_M2P)); - - break; - case DMA_DEV_TO_MEM: - lli_write(desc, dar, buf_addr + period_len * i); - lli_write(desc, sar, sconfig->src_addr); - lli_write(desc, ctllo, (DWC_DEFAULT_CTLLO(chan) - | DWC_CTLL_SRC_WIDTH(reg_width) - | DWC_CTLL_DST_WIDTH(reg_width) - | DWC_CTLL_DST_INC - | DWC_CTLL_SRC_FIX - | DWC_CTLL_INT_EN)); - - lli_set(desc, ctllo, sconfig->device_fc ? - DWC_CTLL_FC(DW_DMA_FC_P_P2M) : - DWC_CTLL_FC(DW_DMA_FC_D_P2M)); - - break; - default: - break; - } - - lli_write(desc, ctlhi, period_len >> reg_width); - cdesc->desc[i] = desc; - - if (last) - lli_write(last, llp, desc->txd.phys | lms); - - last = desc; - } - - /* Let's make a cyclic list */ - lli_write(last, llp, cdesc->desc[0]->txd.phys | lms); - - dev_dbg(chan2dev(&dwc->chan), - "cyclic prepared buf %pad len %zu period %zu periods %d\n", - &buf_addr, buf_len, period_len, periods); - - cdesc->periods = periods; - dwc->cdesc = cdesc; - - return cdesc; - -out_err_desc_get: - while (i--) - dwc_desc_put(dwc, cdesc->desc[i]); -out_err_alloc: - kfree(cdesc); -out_err: - clear_bit(DW_DMA_IS_CYCLIC, &dwc->flags); - return (struct dw_cyclic_desc *)retval; -} -EXPORT_SYMBOL(dw_dma_cyclic_prep); - -/** - * dw_dma_cyclic_free - free a prepared cyclic DMA transfer - * @chan: the DMA channel to free - */ -void dw_dma_cyclic_free(struct dma_chan *chan) -{ - struct dw_dma_chan *dwc = to_dw_dma_chan(chan); - struct dw_dma *dw = to_dw_dma(dwc->chan.device); - struct dw_cyclic_desc *cdesc = dwc->cdesc; - unsigned int i; - unsigned long flags; - - dev_dbg(chan2dev(&dwc->chan), "%s\n", __func__); - - if (!cdesc) - return; - - spin_lock_irqsave(&dwc->lock, flags); - - dwc_chan_disable(dw, dwc); - - dma_writel(dw, CLEAR.BLOCK, dwc->mask); - dma_writel(dw, CLEAR.ERROR, dwc->mask); - dma_writel(dw, CLEAR.XFER, dwc->mask); - - spin_unlock_irqrestore(&dwc->lock, flags); - - for (i = 0; i < cdesc->periods; i++) - dwc_desc_put(dwc, cdesc->desc[i]); - - kfree(cdesc->desc); - kfree(cdesc); - - dwc->cdesc = NULL; - - clear_bit(DW_DMA_IS_CYCLIC, &dwc->flags); -} -EXPORT_SYMBOL(dw_dma_cyclic_free); - -/*----------------------------------------------------------------------*/ - int dw_dma_probe(struct dw_dma_chip *chip) { struct dw_dma_platform_data *pdata; @@ -1642,7 +1314,7 @@ int dw_dma_probe(struct dw_dma_chip *chip) if (autocfg) { unsigned int r = DW_DMA_MAX_NR_CHANNELS - i - 1; void __iomem *addr = &__dw_regs(dw)->DWC_PARAMS[r]; - unsigned int dwc_params = dma_readl_native(addr); + unsigned int dwc_params = readl(addr); dev_dbg(chip->dev, "DWC_PARAMS[%d]: 0x%08x\n", i, dwc_params); diff --git a/drivers/dma/dw/platform.c b/drivers/dma/dw/platform.c index c639c60b825a..bc31fe802061 100644 --- a/drivers/dma/dw/platform.c +++ b/drivers/dma/dw/platform.c @@ -306,8 +306,12 @@ static int dw_resume_early(struct device *dev) { struct platform_device *pdev = to_platform_device(dev); struct dw_dma_chip *chip = platform_get_drvdata(pdev); + int ret; + + ret = clk_prepare_enable(chip->clk); + if (ret) + return ret; - clk_prepare_enable(chip->clk); return dw_dma_enable(chip); } diff --git a/drivers/dma/dw/regs.h b/drivers/dma/dw/regs.h index 32a328721c88..09e7dfdbb790 100644 --- a/drivers/dma/dw/regs.h +++ b/drivers/dma/dw/regs.h @@ -116,20 +116,6 @@ struct dw_dma_regs { DW_REG(GLOBAL_CFG); }; -/* - * Big endian I/O access when reading and writing to the DMA controller - * registers. This is needed on some platforms, like the Atmel AVR32 - * architecture. - */ - -#ifdef CONFIG_DW_DMAC_BIG_ENDIAN_IO -#define dma_readl_native ioread32be -#define dma_writel_native iowrite32be -#else -#define dma_readl_native readl -#define dma_writel_native writel -#endif - /* Bitfields in DW_PARAMS */ #define DW_PARAMS_NR_CHAN 8 /* number of channels */ #define DW_PARAMS_NR_MASTER 11 /* number of AHB masters */ @@ -280,7 +266,6 @@ struct dw_dma_chan { unsigned long flags; struct list_head active_list; struct list_head queue; - struct dw_cyclic_desc *cdesc; unsigned int descs_allocated; @@ -302,9 +287,9 @@ __dwc_regs(struct dw_dma_chan *dwc) } #define channel_readl(dwc, name) \ - dma_readl_native(&(__dwc_regs(dwc)->name)) + readl(&(__dwc_regs(dwc)->name)) #define channel_writel(dwc, name, val) \ - dma_writel_native((val), &(__dwc_regs(dwc)->name)) + writel((val), &(__dwc_regs(dwc)->name)) static inline struct dw_dma_chan *to_dw_dma_chan(struct dma_chan *chan) { @@ -333,9 +318,9 @@ static inline struct dw_dma_regs __iomem *__dw_regs(struct dw_dma *dw) } #define dma_readl(dw, name) \ - dma_readl_native(&(__dw_regs(dw)->name)) + readl(&(__dw_regs(dw)->name)) #define dma_writel(dw, name, val) \ - dma_writel_native((val), &(__dw_regs(dw)->name)) + writel((val), &(__dw_regs(dw)->name)) #define idma32_readq(dw, name) \ hi_lo_readq(&(__dw_regs(dw)->name)) @@ -352,43 +337,30 @@ static inline struct dw_dma *to_dw_dma(struct dma_device *ddev) return container_of(ddev, struct dw_dma, dma); } -#ifdef CONFIG_DW_DMAC_BIG_ENDIAN_IO -typedef __be32 __dw32; -#else -typedef __le32 __dw32; -#endif - /* LLI == Linked List Item; a.k.a. DMA block descriptor */ struct dw_lli { /* values that are not changed by hardware */ - __dw32 sar; - __dw32 dar; - __dw32 llp; /* chain to next lli */ - __dw32 ctllo; + __le32 sar; + __le32 dar; + __le32 llp; /* chain to next lli */ + __le32 ctllo; /* values that may get written back: */ - __dw32 ctlhi; + __le32 ctlhi; /* sstat and dstat can snapshot peripheral register state. * silicon config may discard either or both... */ - __dw32 sstat; - __dw32 dstat; + __le32 sstat; + __le32 dstat; }; struct dw_desc { /* FIRST values the hardware uses */ struct dw_lli lli; -#ifdef CONFIG_DW_DMAC_BIG_ENDIAN_IO -#define lli_set(d, reg, v) ((d)->lli.reg |= cpu_to_be32(v)) -#define lli_clear(d, reg, v) ((d)->lli.reg &= ~cpu_to_be32(v)) -#define lli_read(d, reg) be32_to_cpu((d)->lli.reg) -#define lli_write(d, reg, v) ((d)->lli.reg = cpu_to_be32(v)) -#else #define lli_set(d, reg, v) ((d)->lli.reg |= cpu_to_le32(v)) #define lli_clear(d, reg, v) ((d)->lli.reg &= ~cpu_to_le32(v)) #define lli_read(d, reg) le32_to_cpu((d)->lli.reg) #define lli_write(d, reg, v) ((d)->lli.reg = cpu_to_le32(v)) -#endif /* THEN values for driver housekeeping */ struct list_head desc_node; diff --git a/drivers/dma/edma.c b/drivers/dma/edma.c index 3879f80a4815..a7ea20e7b8e9 100644 --- a/drivers/dma/edma.c +++ b/drivers/dma/edma.c @@ -1143,11 +1143,24 @@ static struct dma_async_tx_descriptor *edma_prep_dma_memcpy( struct edma_desc *edesc; struct device *dev = chan->device->dev; struct edma_chan *echan = to_edma_chan(chan); - unsigned int width, pset_len; + unsigned int width, pset_len, array_size; if (unlikely(!echan || !len)) return NULL; + /* Align the array size (acnt block) with the transfer properties */ + switch (__ffs((src | dest | len))) { + case 0: + array_size = SZ_32K - 1; + break; + case 1: + array_size = SZ_32K - 2; + break; + default: + array_size = SZ_32K - 4; + break; + } + if (len < SZ_64K) { /* * Transfer size less than 64K can be handled with one paRAM @@ -1169,7 +1182,7 @@ static struct dma_async_tx_descriptor *edma_prep_dma_memcpy( * When the full_length is multibple of 32767 one slot can be * used to complete the transfer. */ - width = SZ_32K - 1; + width = array_size; pset_len = rounddown(len, width); /* One slot is enough for lengths multiple of (SZ_32K -1) */ if (unlikely(pset_len == len)) @@ -1217,7 +1230,7 @@ static struct dma_async_tx_descriptor *edma_prep_dma_memcpy( } dest += pset_len; src += pset_len; - pset_len = width = len % (SZ_32K - 1); + pset_len = width = len % array_size; ret = edma_config_pset(chan, &edesc->pset[1], src, dest, 1, width, pset_len, DMA_MEM_TO_MEM); diff --git a/drivers/dma/fsl_raid.c b/drivers/dma/fsl_raid.c index 90d29f90acfb..493dc6c59d1d 100644 --- a/drivers/dma/fsl_raid.c +++ b/drivers/dma/fsl_raid.c @@ -877,7 +877,7 @@ static int fsl_re_remove(struct platform_device *ofdev) return 0; } -static struct of_device_id fsl_re_ids[] = { +static const struct of_device_id fsl_re_ids[] = { { .compatible = "fsl,raideng-v1.0", }, {} }; diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c index 51c75bf2b9b6..3eaece888e75 100644 --- a/drivers/dma/fsldma.c +++ b/drivers/dma/fsldma.c @@ -269,6 +269,7 @@ static void fsl_chan_set_src_loop_size(struct fsldma_chan *chan, int size) case 2: case 4: case 8: + mode &= ~FSL_DMA_MR_SAHTS_MASK; mode |= FSL_DMA_MR_SAHE | (__ilog2(size) << 14); break; } @@ -301,6 +302,7 @@ static void fsl_chan_set_dst_loop_size(struct fsldma_chan *chan, int size) case 2: case 4: case 8: + mode &= ~FSL_DMA_MR_DAHTS_MASK; mode |= FSL_DMA_MR_DAHE | (__ilog2(size) << 16); break; } @@ -327,7 +329,8 @@ static void fsl_chan_set_request_count(struct fsldma_chan *chan, int size) BUG_ON(size > 1024); mode = get_mr(chan); - mode |= (__ilog2(size) << 24) & 0x0f000000; + mode &= ~FSL_DMA_MR_BWC_MASK; + mode |= (__ilog2(size) << 24) & FSL_DMA_MR_BWC_MASK; set_mr(chan, mode); } @@ -822,122 +825,6 @@ fail: return NULL; } -static struct dma_async_tx_descriptor *fsl_dma_prep_sg(struct dma_chan *dchan, - struct scatterlist *dst_sg, unsigned int dst_nents, - struct scatterlist *src_sg, unsigned int src_nents, - unsigned long flags) -{ - struct fsl_desc_sw *first = NULL, *prev = NULL, *new = NULL; - struct fsldma_chan *chan = to_fsl_chan(dchan); - size_t dst_avail, src_avail; - dma_addr_t dst, src; - size_t len; - - /* basic sanity checks */ - if (dst_nents == 0 || src_nents == 0) - return NULL; - - if (dst_sg == NULL || src_sg == NULL) - return NULL; - - /* - * TODO: should we check that both scatterlists have the same - * TODO: number of bytes in total? Is that really an error? - */ - - /* get prepared for the loop */ - dst_avail = sg_dma_len(dst_sg); - src_avail = sg_dma_len(src_sg); - - /* run until we are out of scatterlist entries */ - while (true) { - - /* create the largest transaction possible */ - len = min_t(size_t, src_avail, dst_avail); - len = min_t(size_t, len, FSL_DMA_BCR_MAX_CNT); - if (len == 0) - goto fetch; - - dst = sg_dma_address(dst_sg) + sg_dma_len(dst_sg) - dst_avail; - src = sg_dma_address(src_sg) + sg_dma_len(src_sg) - src_avail; - - /* allocate and populate the descriptor */ - new = fsl_dma_alloc_descriptor(chan); - if (!new) { - chan_err(chan, "%s\n", msg_ld_oom); - goto fail; - } - - set_desc_cnt(chan, &new->hw, len); - set_desc_src(chan, &new->hw, src); - set_desc_dst(chan, &new->hw, dst); - - if (!first) - first = new; - else - set_desc_next(chan, &prev->hw, new->async_tx.phys); - - new->async_tx.cookie = 0; - async_tx_ack(&new->async_tx); - prev = new; - - /* Insert the link descriptor to the LD ring */ - list_add_tail(&new->node, &first->tx_list); - - /* update metadata */ - dst_avail -= len; - src_avail -= len; - -fetch: - /* fetch the next dst scatterlist entry */ - if (dst_avail == 0) { - - /* no more entries: we're done */ - if (dst_nents == 0) - break; - - /* fetch the next entry: if there are no more: done */ - dst_sg = sg_next(dst_sg); - if (dst_sg == NULL) - break; - - dst_nents--; - dst_avail = sg_dma_len(dst_sg); - } - - /* fetch the next src scatterlist entry */ - if (src_avail == 0) { - - /* no more entries: we're done */ - if (src_nents == 0) - break; - - /* fetch the next entry: if there are no more: done */ - src_sg = sg_next(src_sg); - if (src_sg == NULL) - break; - - src_nents--; - src_avail = sg_dma_len(src_sg); - } - } - - new->async_tx.flags = flags; /* client is in control of this ack */ - new->async_tx.cookie = -EBUSY; - - /* Set End-of-link to the last link descriptor of new list */ - set_ld_eol(chan, new); - - return &first->async_tx; - -fail: - if (!first) - return NULL; - - fsldma_free_desc_list_reverse(chan, &first->tx_list); - return NULL; -} - static int fsl_dma_device_terminate_all(struct dma_chan *dchan) { struct fsldma_chan *chan; @@ -1354,12 +1241,10 @@ static int fsldma_of_probe(struct platform_device *op) fdev->irq = irq_of_parse_and_map(op->dev.of_node, 0); dma_cap_set(DMA_MEMCPY, fdev->common.cap_mask); - dma_cap_set(DMA_SG, fdev->common.cap_mask); dma_cap_set(DMA_SLAVE, fdev->common.cap_mask); fdev->common.device_alloc_chan_resources = fsl_dma_alloc_chan_resources; fdev->common.device_free_chan_resources = fsl_dma_free_chan_resources; fdev->common.device_prep_dma_memcpy = fsl_dma_prep_memcpy; - fdev->common.device_prep_dma_sg = fsl_dma_prep_sg; fdev->common.device_tx_status = fsl_tx_status; fdev->common.device_issue_pending = fsl_dma_memcpy_issue_pending; fdev->common.device_config = fsl_dma_device_config; diff --git a/drivers/dma/fsldma.h b/drivers/dma/fsldma.h index 31bffccdcc75..4787d485dd76 100644 --- a/drivers/dma/fsldma.h +++ b/drivers/dma/fsldma.h @@ -36,6 +36,10 @@ #define FSL_DMA_MR_DAHE 0x00002000 #define FSL_DMA_MR_SAHE 0x00001000 +#define FSL_DMA_MR_SAHTS_MASK 0x0000C000 +#define FSL_DMA_MR_DAHTS_MASK 0x00030000 +#define FSL_DMA_MR_BWC_MASK 0x0f000000 + /* * Bandwidth/pause control determines how many bytes a given * channel is allowed to transfer before the DMA engine pauses diff --git a/drivers/dma/imx-dma.c b/drivers/dma/imx-dma.c index ab0fb804fb1e..f681df8f0ed3 100644 --- a/drivers/dma/imx-dma.c +++ b/drivers/dma/imx-dma.c @@ -888,7 +888,7 @@ static struct dma_async_tx_descriptor *imxdma_prep_dma_cyclic( sg_init_table(imxdmac->sg_list, periods); for (i = 0; i < periods; i++) { - imxdmac->sg_list[i].page_link = 0; + sg_assign_page(&imxdmac->sg_list[i], NULL); imxdmac->sg_list[i].offset = 0; imxdmac->sg_list[i].dma_address = dma_addr; sg_dma_len(&imxdmac->sg_list[i]) = period_len; @@ -896,10 +896,7 @@ static struct dma_async_tx_descriptor *imxdma_prep_dma_cyclic( } /* close the loop */ - imxdmac->sg_list[periods].offset = 0; - sg_dma_len(&imxdmac->sg_list[periods]) = 0; - imxdmac->sg_list[periods].page_link = - ((unsigned long)imxdmac->sg_list | 0x01) & ~0x02; + sg_chain(imxdmac->sg_list, periods + 1, imxdmac->sg_list); desc->type = IMXDMA_DESC_CYCLIC; desc->sg = imxdmac->sg_list; diff --git a/drivers/dma/imx-sdma.c b/drivers/dma/imx-sdma.c index 085993cb2ccc..a67ec1bdc4e0 100644 --- a/drivers/dma/imx-sdma.c +++ b/drivers/dma/imx-sdma.c @@ -1323,7 +1323,7 @@ static struct dma_async_tx_descriptor *sdma_prep_dma_cyclic( } if (period_len > 0xffff) { - dev_err(sdma->dev, "SDMA channel %d: maximum period size exceeded: %d > %d\n", + dev_err(sdma->dev, "SDMA channel %d: maximum period size exceeded: %zu > %d\n", channel, period_len, 0xffff); goto err_out; } @@ -1347,7 +1347,7 @@ static struct dma_async_tx_descriptor *sdma_prep_dma_cyclic( if (i + 1 == num_periods) param |= BD_WRAP; - dev_dbg(sdma->dev, "entry %d: count: %d dma: %#llx %s%s\n", + dev_dbg(sdma->dev, "entry %d: count: %zu dma: %#llx %s%s\n", i, period_len, (u64)dma_addr, param & BD_WRAP ? "wrap" : "", param & BD_INTR ? " intr" : ""); @@ -1755,19 +1755,26 @@ static int sdma_probe(struct platform_device *pdev) if (IS_ERR(sdma->clk_ahb)) return PTR_ERR(sdma->clk_ahb); - clk_prepare(sdma->clk_ipg); - clk_prepare(sdma->clk_ahb); + ret = clk_prepare(sdma->clk_ipg); + if (ret) + return ret; + + ret = clk_prepare(sdma->clk_ahb); + if (ret) + goto err_clk; ret = devm_request_irq(&pdev->dev, irq, sdma_int_handler, 0, "sdma", sdma); if (ret) - return ret; + goto err_irq; sdma->irq = irq; sdma->script_addrs = kzalloc(sizeof(*sdma->script_addrs), GFP_KERNEL); - if (!sdma->script_addrs) - return -ENOMEM; + if (!sdma->script_addrs) { + ret = -ENOMEM; + goto err_irq; + } /* initially no scripts available */ saddr_arr = (s32 *)sdma->script_addrs; @@ -1882,6 +1889,10 @@ err_register: dma_async_device_unregister(&sdma->dma_device); err_init: kfree(sdma->script_addrs); +err_irq: + clk_unprepare(sdma->clk_ahb); +err_clk: + clk_unprepare(sdma->clk_ipg); return ret; } @@ -1893,6 +1904,8 @@ static int sdma_remove(struct platform_device *pdev) devm_free_irq(&pdev->dev, sdma->irq, sdma); dma_async_device_unregister(&sdma->dma_device); kfree(sdma->script_addrs); + clk_unprepare(sdma->clk_ahb); + clk_unprepare(sdma->clk_ipg); /* Kill the tasklet */ for (i = 0; i < MAX_DMA_CHANNELS; i++) { struct sdma_channel *sdmac = &sdma->channel[i]; diff --git a/drivers/dma/ioat/dca.c b/drivers/dma/ioat/dca.c index 0b9b6b07db9e..eab2fdda29ec 100644 --- a/drivers/dma/ioat/dca.c +++ b/drivers/dma/ioat/dca.c @@ -336,10 +336,10 @@ struct dca_provider *ioat_dca_init(struct pci_dev *pdev, void __iomem *iobase) } if (dca3_tag_map_invalid(ioatdca->tag_map)) { - WARN_TAINT_ONCE(1, TAINT_FIRMWARE_WORKAROUND, - "%s %s: APICID_TAG_MAP set incorrectly by BIOS, disabling DCA\n", - dev_driver_string(&pdev->dev), - dev_name(&pdev->dev)); + add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK); + pr_warn_once("%s %s: APICID_TAG_MAP set incorrectly by BIOS, disabling DCA\n", + dev_driver_string(&pdev->dev), + dev_name(&pdev->dev)); free_dca_provider(dca); return NULL; } diff --git a/drivers/dma/ioat/dma.c b/drivers/dma/ioat/dma.c index a371b07a0981..f70cc74032ea 100644 --- a/drivers/dma/ioat/dma.c +++ b/drivers/dma/ioat/dma.c @@ -644,9 +644,13 @@ static void __cleanup(struct ioatdma_chan *ioat_chan, dma_addr_t phys_complete) mod_timer(&ioat_chan->timer, jiffies + IDLE_TIMEOUT); } - /* 5 microsecond delay per pending descriptor */ - writew(min((5 * (active - i)), IOAT_INTRDELAY_MASK), - ioat_chan->ioat_dma->reg_base + IOAT_INTRDELAY_OFFSET); + /* microsecond delay by sysfs variable per pending descriptor */ + if (ioat_chan->intr_coalesce != ioat_chan->prev_intr_coalesce) { + writew(min((ioat_chan->intr_coalesce * (active - i)), + IOAT_INTRDELAY_MASK), + ioat_chan->ioat_dma->reg_base + IOAT_INTRDELAY_OFFSET); + ioat_chan->prev_intr_coalesce = ioat_chan->intr_coalesce; + } } static void ioat_cleanup(struct ioatdma_chan *ioat_chan) diff --git a/drivers/dma/ioat/dma.h b/drivers/dma/ioat/dma.h index a9bc1a15b0d1..56200eefcf5e 100644 --- a/drivers/dma/ioat/dma.h +++ b/drivers/dma/ioat/dma.h @@ -142,11 +142,14 @@ struct ioatdma_chan { spinlock_t prep_lock; struct ioat_descs descs[2]; int desc_chunks; + int intr_coalesce; + int prev_intr_coalesce; }; struct ioat_sysfs_entry { struct attribute attr; ssize_t (*show)(struct dma_chan *, char *); + ssize_t (*store)(struct dma_chan *, const char *, size_t); }; /** diff --git a/drivers/dma/ioat/init.c b/drivers/dma/ioat/init.c index 6ad4384b3fa8..93e006c3441d 100644 --- a/drivers/dma/ioat/init.c +++ b/drivers/dma/ioat/init.c @@ -39,7 +39,7 @@ MODULE_VERSION(IOAT_DMA_VERSION); MODULE_LICENSE("Dual BSD/GPL"); MODULE_AUTHOR("Intel Corporation"); -static struct pci_device_id ioat_pci_tbl[] = { +static const struct pci_device_id ioat_pci_tbl[] = { /* I/OAT v3 platforms */ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG0) }, { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG1) }, @@ -839,8 +839,6 @@ static int ioat_xor_val_self_test(struct ioatdma_device *ioat_dma) goto free_resources; } - for (i = 0; i < IOAT_NUM_SRC_TEST; i++) - dma_srcs[i] = DMA_ERROR_CODE; for (i = 0; i < IOAT_NUM_SRC_TEST; i++) { dma_srcs[i] = dma_map_page(dev, xor_srcs[i], 0, PAGE_SIZE, DMA_TO_DEVICE); @@ -910,8 +908,6 @@ static int ioat_xor_val_self_test(struct ioatdma_device *ioat_dma) xor_val_result = 1; - for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++) - dma_srcs[i] = DMA_ERROR_CODE; for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++) { dma_srcs[i] = dma_map_page(dev, xor_val_srcs[i], 0, PAGE_SIZE, DMA_TO_DEVICE); @@ -965,8 +961,6 @@ static int ioat_xor_val_self_test(struct ioatdma_device *ioat_dma) op = IOAT_OP_XOR_VAL; xor_val_result = 0; - for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++) - dma_srcs[i] = DMA_ERROR_CODE; for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++) { dma_srcs[i] = dma_map_page(dev, xor_val_srcs[i], 0, PAGE_SIZE, DMA_TO_DEVICE); @@ -1017,18 +1011,14 @@ static int ioat_xor_val_self_test(struct ioatdma_device *ioat_dma) goto free_resources; dma_unmap: if (op == IOAT_OP_XOR) { - if (dest_dma != DMA_ERROR_CODE) - dma_unmap_page(dev, dest_dma, PAGE_SIZE, - DMA_FROM_DEVICE); - for (i = 0; i < IOAT_NUM_SRC_TEST; i++) - if (dma_srcs[i] != DMA_ERROR_CODE) - dma_unmap_page(dev, dma_srcs[i], PAGE_SIZE, - DMA_TO_DEVICE); + while (--i >= 0) + dma_unmap_page(dev, dma_srcs[i], PAGE_SIZE, + DMA_TO_DEVICE); + dma_unmap_page(dev, dest_dma, PAGE_SIZE, DMA_FROM_DEVICE); } else if (op == IOAT_OP_XOR_VAL) { - for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++) - if (dma_srcs[i] != DMA_ERROR_CODE) - dma_unmap_page(dev, dma_srcs[i], PAGE_SIZE, - DMA_TO_DEVICE); + while (--i >= 0) + dma_unmap_page(dev, dma_srcs[i], PAGE_SIZE, + DMA_TO_DEVICE); } free_resources: dma->device_free_chan_resources(dma_chan); diff --git a/drivers/dma/ioat/sysfs.c b/drivers/dma/ioat/sysfs.c index cb4a857ee21b..3ac677f29e8f 100644 --- a/drivers/dma/ioat/sysfs.c +++ b/drivers/dma/ioat/sysfs.c @@ -64,8 +64,24 @@ ioat_attr_show(struct kobject *kobj, struct attribute *attr, char *page) return entry->show(&ioat_chan->dma_chan, page); } +static ssize_t +ioat_attr_store(struct kobject *kobj, struct attribute *attr, +const char *page, size_t count) +{ + struct ioat_sysfs_entry *entry; + struct ioatdma_chan *ioat_chan; + + entry = container_of(attr, struct ioat_sysfs_entry, attr); + ioat_chan = container_of(kobj, struct ioatdma_chan, kobj); + + if (!entry->store) + return -EIO; + return entry->store(&ioat_chan->dma_chan, page, count); +} + const struct sysfs_ops ioat_sysfs_ops = { .show = ioat_attr_show, + .store = ioat_attr_store, }; void ioat_kobject_add(struct ioatdma_device *ioat_dma, struct kobj_type *type) @@ -121,11 +137,37 @@ static ssize_t ring_active_show(struct dma_chan *c, char *page) } static struct ioat_sysfs_entry ring_active_attr = __ATTR_RO(ring_active); +static ssize_t intr_coalesce_show(struct dma_chan *c, char *page) +{ + struct ioatdma_chan *ioat_chan = to_ioat_chan(c); + + return sprintf(page, "%d\n", ioat_chan->intr_coalesce); +} + +static ssize_t intr_coalesce_store(struct dma_chan *c, const char *page, +size_t count) +{ + int intr_coalesce = 0; + struct ioatdma_chan *ioat_chan = to_ioat_chan(c); + + if (sscanf(page, "%du", &intr_coalesce) != -1) { + if ((intr_coalesce < 0) || + (intr_coalesce > IOAT_INTRDELAY_MASK)) + return -EINVAL; + ioat_chan->intr_coalesce = intr_coalesce; + } + + return count; +} + +static struct ioat_sysfs_entry intr_coalesce_attr = __ATTR_RW(intr_coalesce); + static struct attribute *ioat_attrs[] = { &ring_size_attr.attr, &ring_active_attr.attr, &ioat_cap_attr.attr, &ioat_version_attr.attr, + &intr_coalesce_attr.attr, NULL, }; diff --git a/drivers/dma/k3dma.c b/drivers/dma/k3dma.c index 01e25c68dd5a..01d2a750a621 100644 --- a/drivers/dma/k3dma.c +++ b/drivers/dma/k3dma.c @@ -223,7 +223,6 @@ static irqreturn_t k3_dma_int_handler(int irq, void *dev_id) if (c && (tc1 & BIT(i))) { spin_lock_irqsave(&c->vc.lock, flags); vchan_cookie_complete(&p->ds_run->vd); - WARN_ON_ONCE(p->ds_done); p->ds_done = p->ds_run; p->ds_run = NULL; spin_unlock_irqrestore(&c->vc.lock, flags); @@ -274,13 +273,14 @@ static int k3_dma_start_txd(struct k3_dma_chan *c) */ list_del(&ds->vd.node); - WARN_ON_ONCE(c->phy->ds_run); - WARN_ON_ONCE(c->phy->ds_done); c->phy->ds_run = ds; + c->phy->ds_done = NULL; /* start dma */ k3_dma_set_desc(c->phy, &ds->desc_hw[0]); return 0; } + c->phy->ds_run = NULL; + c->phy->ds_done = NULL; return -EAGAIN; } @@ -722,11 +722,7 @@ static int k3_dma_terminate_all(struct dma_chan *chan) k3_dma_free_desc(&p->ds_run->vd); p->ds_run = NULL; } - if (p->ds_done) { - k3_dma_free_desc(&p->ds_done->vd); - p->ds_done = NULL; - } - + p->ds_done = NULL; } spin_unlock_irqrestore(&c->vc.lock, flags); vchan_dma_desc_free_list(&c->vc, &head); diff --git a/drivers/dma/mv_xor.c b/drivers/dma/mv_xor.c index 25bc5b103aa2..1993889003fd 100644 --- a/drivers/dma/mv_xor.c +++ b/drivers/dma/mv_xor.c @@ -68,36 +68,6 @@ static void mv_desc_init(struct mv_xor_desc_slot *desc, hw_desc->byte_count = byte_count; } -/* Populate the descriptor */ -static void mv_xor_config_sg_ll_desc(struct mv_xor_desc_slot *desc, - dma_addr_t dma_src, dma_addr_t dma_dst, - u32 len, struct mv_xor_desc_slot *prev) -{ - struct mv_xor_desc *hw_desc = desc->hw_desc; - - hw_desc->status = XOR_DESC_DMA_OWNED; - hw_desc->phy_next_desc = 0; - /* Configure for XOR with only one src address -> MEMCPY */ - hw_desc->desc_command = XOR_DESC_OPERATION_XOR | (0x1 << 0); - hw_desc->phy_dest_addr = dma_dst; - hw_desc->phy_src_addr[0] = dma_src; - hw_desc->byte_count = len; - - if (prev) { - struct mv_xor_desc *hw_prev = prev->hw_desc; - - hw_prev->phy_next_desc = desc->async_tx.phys; - } -} - -static void mv_xor_desc_config_eod(struct mv_xor_desc_slot *desc) -{ - struct mv_xor_desc *hw_desc = desc->hw_desc; - - /* Enable end-of-descriptor interrupt */ - hw_desc->desc_command |= XOR_DESC_EOD_INT_EN; -} - static void mv_desc_set_mode(struct mv_xor_desc_slot *desc) { struct mv_xor_desc *hw_desc = desc->hw_desc; @@ -662,132 +632,6 @@ mv_xor_prep_dma_interrupt(struct dma_chan *chan, unsigned long flags) return mv_xor_prep_dma_xor(chan, dest, &src, 1, len, flags); } -/** - * mv_xor_prep_dma_sg - prepare descriptors for a memory sg transaction - * @chan: DMA channel - * @dst_sg: Destination scatter list - * @dst_sg_len: Number of entries in destination scatter list - * @src_sg: Source scatter list - * @src_sg_len: Number of entries in source scatter list - * @flags: transfer ack flags - * - * Return: Async transaction descriptor on success and NULL on failure - */ -static struct dma_async_tx_descriptor * -mv_xor_prep_dma_sg(struct dma_chan *chan, struct scatterlist *dst_sg, - unsigned int dst_sg_len, struct scatterlist *src_sg, - unsigned int src_sg_len, unsigned long flags) -{ - struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan); - struct mv_xor_desc_slot *new; - struct mv_xor_desc_slot *first = NULL; - struct mv_xor_desc_slot *prev = NULL; - size_t len, dst_avail, src_avail; - dma_addr_t dma_dst, dma_src; - int desc_cnt = 0; - int ret; - - dev_dbg(mv_chan_to_devp(mv_chan), - "%s dst_sg_len: %d src_sg_len: %d flags: %ld\n", - __func__, dst_sg_len, src_sg_len, flags); - - dst_avail = sg_dma_len(dst_sg); - src_avail = sg_dma_len(src_sg); - - /* Run until we are out of scatterlist entries */ - while (true) { - /* Allocate and populate the descriptor */ - desc_cnt++; - new = mv_chan_alloc_slot(mv_chan); - if (!new) { - dev_err(mv_chan_to_devp(mv_chan), - "Out of descriptors (desc_cnt=%d)!\n", - desc_cnt); - goto err; - } - - len = min_t(size_t, src_avail, dst_avail); - len = min_t(size_t, len, MV_XOR_MAX_BYTE_COUNT); - if (len == 0) - goto fetch; - - if (len < MV_XOR_MIN_BYTE_COUNT) { - dev_err(mv_chan_to_devp(mv_chan), - "Transfer size of %zu too small!\n", len); - goto err; - } - - dma_dst = sg_dma_address(dst_sg) + sg_dma_len(dst_sg) - - dst_avail; - dma_src = sg_dma_address(src_sg) + sg_dma_len(src_sg) - - src_avail; - - /* Check if a new window needs to get added for 'dst' */ - ret = mv_xor_add_io_win(mv_chan, dma_dst); - if (ret) - goto err; - - /* Check if a new window needs to get added for 'src' */ - ret = mv_xor_add_io_win(mv_chan, dma_src); - if (ret) - goto err; - - /* Populate the descriptor */ - mv_xor_config_sg_ll_desc(new, dma_src, dma_dst, len, prev); - prev = new; - dst_avail -= len; - src_avail -= len; - - if (!first) - first = new; - else - list_move_tail(&new->node, &first->sg_tx_list); - -fetch: - /* Fetch the next dst scatterlist entry */ - if (dst_avail == 0) { - if (dst_sg_len == 0) - break; - - /* Fetch the next entry: if there are no more: done */ - dst_sg = sg_next(dst_sg); - if (dst_sg == NULL) - break; - - dst_sg_len--; - dst_avail = sg_dma_len(dst_sg); - } - - /* Fetch the next src scatterlist entry */ - if (src_avail == 0) { - if (src_sg_len == 0) - break; - - /* Fetch the next entry: if there are no more: done */ - src_sg = sg_next(src_sg); - if (src_sg == NULL) - break; - - src_sg_len--; - src_avail = sg_dma_len(src_sg); - } - } - - /* Set the EOD flag in the last descriptor */ - mv_xor_desc_config_eod(new); - first->async_tx.flags = flags; - - return &first->async_tx; - -err: - /* Cleanup: Move all descriptors back into the free list */ - spin_lock_bh(&mv_chan->lock); - mv_desc_clean_slot(first, mv_chan); - spin_unlock_bh(&mv_chan->lock); - - return NULL; -} - static void mv_xor_free_chan_resources(struct dma_chan *chan) { struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan); @@ -1254,8 +1098,6 @@ mv_xor_channel_add(struct mv_xor_device *xordev, dma_dev->device_prep_dma_interrupt = mv_xor_prep_dma_interrupt; if (dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask)) dma_dev->device_prep_dma_memcpy = mv_xor_prep_dma_memcpy; - if (dma_has_cap(DMA_SG, dma_dev->cap_mask)) - dma_dev->device_prep_dma_sg = mv_xor_prep_dma_sg; if (dma_has_cap(DMA_XOR, dma_dev->cap_mask)) { dma_dev->max_xor = 8; dma_dev->device_prep_dma_xor = mv_xor_prep_dma_xor; @@ -1305,11 +1147,10 @@ mv_xor_channel_add(struct mv_xor_device *xordev, goto err_free_irq; } - dev_info(&pdev->dev, "Marvell XOR (%s): ( %s%s%s%s)\n", + dev_info(&pdev->dev, "Marvell XOR (%s): ( %s%s%s)\n", mv_chan->op_in_desc ? "Descriptor Mode" : "Registers Mode", dma_has_cap(DMA_XOR, dma_dev->cap_mask) ? "xor " : "", dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask) ? "cpy " : "", - dma_has_cap(DMA_SG, dma_dev->cap_mask) ? "sg " : "", dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask) ? "intr " : ""); dma_async_device_register(dma_dev); @@ -1552,7 +1393,6 @@ static int mv_xor_probe(struct platform_device *pdev) dma_cap_zero(cap_mask); dma_cap_set(DMA_MEMCPY, cap_mask); - dma_cap_set(DMA_SG, cap_mask); dma_cap_set(DMA_XOR, cap_mask); dma_cap_set(DMA_INTERRUPT, cap_mask); diff --git a/drivers/dma/mv_xor_v2.c b/drivers/dma/mv_xor_v2.c index f3e211f8f6c5..f652a0e0f5a2 100644 --- a/drivers/dma/mv_xor_v2.c +++ b/drivers/dma/mv_xor_v2.c @@ -42,6 +42,7 @@ #define MV_XOR_V2_DMA_IMSG_THRD_OFF 0x018 #define MV_XOR_V2_DMA_IMSG_THRD_MASK 0x7FFF #define MV_XOR_V2_DMA_IMSG_THRD_SHIFT 0x0 +#define MV_XOR_V2_DMA_IMSG_TIMER_EN BIT(18) #define MV_XOR_V2_DMA_DESQ_AWATTR_OFF 0x01C /* Same flags as MV_XOR_V2_DMA_DESQ_ARATTR_OFF */ #define MV_XOR_V2_DMA_DESQ_ALLOC_OFF 0x04C @@ -55,6 +56,9 @@ #define MV_XOR_V2_DMA_DESQ_STOP_OFF 0x800 #define MV_XOR_V2_DMA_DESQ_DEALLOC_OFF 0x804 #define MV_XOR_V2_DMA_DESQ_ADD_OFF 0x808 +#define MV_XOR_V2_DMA_IMSG_TMOT 0x810 +#define MV_XOR_V2_DMA_IMSG_TIMER_THRD_MASK 0x1FFF +#define MV_XOR_V2_DMA_IMSG_TIMER_THRD_SHIFT 0 /* XOR Global registers */ #define MV_XOR_V2_GLOB_BW_CTRL 0x4 @@ -90,6 +94,13 @@ */ #define MV_XOR_V2_DESC_NUM 1024 +/* + * Threshold values for descriptors and timeout, determined by + * experimentation as giving a good level of performance. + */ +#define MV_XOR_V2_DONE_IMSG_THRD 0x14 +#define MV_XOR_V2_TIMER_THRD 0xB0 + /** * struct mv_xor_v2_descriptor - DMA HW descriptor * @desc_id: used by S/W and is not affected by H/W. @@ -246,6 +257,29 @@ static int mv_xor_v2_set_desc_size(struct mv_xor_v2_device *xor_dev) return MV_XOR_V2_EXT_DESC_SIZE; } +/* + * Set the IMSG threshold + */ +static inline +void mv_xor_v2_enable_imsg_thrd(struct mv_xor_v2_device *xor_dev) +{ + u32 reg; + + /* Configure threshold of number of descriptors, and enable timer */ + reg = readl(xor_dev->dma_base + MV_XOR_V2_DMA_IMSG_THRD_OFF); + reg &= (~MV_XOR_V2_DMA_IMSG_THRD_MASK << MV_XOR_V2_DMA_IMSG_THRD_SHIFT); + reg |= (MV_XOR_V2_DONE_IMSG_THRD << MV_XOR_V2_DMA_IMSG_THRD_SHIFT); + reg |= MV_XOR_V2_DMA_IMSG_TIMER_EN; + writel(reg, xor_dev->dma_base + MV_XOR_V2_DMA_IMSG_THRD_OFF); + + /* Configure Timer Threshold */ + reg = readl(xor_dev->dma_base + MV_XOR_V2_DMA_IMSG_TMOT); + reg &= (~MV_XOR_V2_DMA_IMSG_TIMER_THRD_MASK << + MV_XOR_V2_DMA_IMSG_TIMER_THRD_SHIFT); + reg |= (MV_XOR_V2_TIMER_THRD << MV_XOR_V2_DMA_IMSG_TIMER_THRD_SHIFT); + writel(reg, xor_dev->dma_base + MV_XOR_V2_DMA_IMSG_TMOT); +} + static irqreturn_t mv_xor_v2_interrupt_handler(int irq, void *data) { struct mv_xor_v2_device *xor_dev = data; @@ -501,9 +535,6 @@ static void mv_xor_v2_issue_pending(struct dma_chan *chan) mv_xor_v2_add_desc_to_desq(xor_dev, xor_dev->npendings); xor_dev->npendings = 0; - /* Activate the channel */ - writel(0, xor_dev->dma_base + MV_XOR_V2_DMA_DESQ_STOP_OFF); - spin_unlock_bh(&xor_dev->lock); } @@ -665,6 +696,27 @@ static int mv_xor_v2_descq_init(struct mv_xor_v2_device *xor_dev) return 0; } +static int mv_xor_v2_suspend(struct platform_device *dev, pm_message_t state) +{ + struct mv_xor_v2_device *xor_dev = platform_get_drvdata(dev); + + /* Set this bit to disable to stop the XOR unit. */ + writel(0x1, xor_dev->dma_base + MV_XOR_V2_DMA_DESQ_STOP_OFF); + + return 0; +} + +static int mv_xor_v2_resume(struct platform_device *dev) +{ + struct mv_xor_v2_device *xor_dev = platform_get_drvdata(dev); + + mv_xor_v2_set_desc_size(xor_dev); + mv_xor_v2_enable_imsg_thrd(xor_dev); + mv_xor_v2_descq_init(xor_dev); + + return 0; +} + static int mv_xor_v2_probe(struct platform_device *pdev) { struct mv_xor_v2_device *xor_dev; @@ -795,6 +847,8 @@ static int mv_xor_v2_probe(struct platform_device *pdev) list_add_tail(&xor_dev->dmachan.device_node, &dma_dev->channels); + mv_xor_v2_enable_imsg_thrd(xor_dev); + mv_xor_v2_descq_init(xor_dev); ret = dma_async_device_register(dma_dev); @@ -844,6 +898,8 @@ MODULE_DEVICE_TABLE(of, mv_xor_v2_dt_ids); static struct platform_driver mv_xor_v2_driver = { .probe = mv_xor_v2_probe, + .suspend = mv_xor_v2_suspend, + .resume = mv_xor_v2_resume, .remove = mv_xor_v2_remove, .driver = { .name = "mv_xor_v2", diff --git a/drivers/dma/mxs-dma.c b/drivers/dma/mxs-dma.c index e217268c7098..41d167921fab 100644 --- a/drivers/dma/mxs-dma.c +++ b/drivers/dma/mxs-dma.c @@ -617,7 +617,7 @@ static struct dma_async_tx_descriptor *mxs_dma_prep_dma_cyclic( if (period_len > MAX_XFER_BYTES) { dev_err(mxs_dma->dma_device.dev, - "maximum period size exceeded: %d > %d\n", + "maximum period size exceeded: %zu > %d\n", period_len, MAX_XFER_BYTES); goto err_out; } diff --git a/drivers/dma/nbpfaxi.c b/drivers/dma/nbpfaxi.c index 3f45b9bdf201..d3f918a9ee76 100644 --- a/drivers/dma/nbpfaxi.c +++ b/drivers/dma/nbpfaxi.c @@ -1005,21 +1005,6 @@ static struct dma_async_tx_descriptor *nbpf_prep_memcpy( DMA_MEM_TO_MEM, flags); } -static struct dma_async_tx_descriptor *nbpf_prep_memcpy_sg( - struct dma_chan *dchan, - struct scatterlist *dst_sg, unsigned int dst_nents, - struct scatterlist *src_sg, unsigned int src_nents, - unsigned long flags) -{ - struct nbpf_channel *chan = nbpf_to_chan(dchan); - - if (dst_nents != src_nents) - return NULL; - - return nbpf_prep_sg(chan, src_sg, dst_sg, src_nents, - DMA_MEM_TO_MEM, flags); -} - static struct dma_async_tx_descriptor *nbpf_prep_slave_sg( struct dma_chan *dchan, struct scatterlist *sgl, unsigned int sg_len, enum dma_transfer_direction direction, unsigned long flags, void *context) @@ -1417,13 +1402,11 @@ static int nbpf_probe(struct platform_device *pdev) dma_cap_set(DMA_MEMCPY, dma_dev->cap_mask); dma_cap_set(DMA_SLAVE, dma_dev->cap_mask); dma_cap_set(DMA_PRIVATE, dma_dev->cap_mask); - dma_cap_set(DMA_SG, dma_dev->cap_mask); /* Common and MEMCPY operations */ dma_dev->device_alloc_chan_resources = nbpf_alloc_chan_resources; dma_dev->device_free_chan_resources = nbpf_free_chan_resources; - dma_dev->device_prep_dma_sg = nbpf_prep_memcpy_sg; dma_dev->device_prep_dma_memcpy = nbpf_prep_memcpy; dma_dev->device_tx_status = nbpf_tx_status; dma_dev->device_issue_pending = nbpf_issue_pending; diff --git a/drivers/dma/of-dma.c b/drivers/dma/of-dma.c index faae0bfe1109..91fd395c90c4 100644 --- a/drivers/dma/of-dma.c +++ b/drivers/dma/of-dma.c @@ -38,8 +38,8 @@ static struct of_dma *of_dma_find_controller(struct of_phandle_args *dma_spec) if (ofdma->of_node == dma_spec->np) return ofdma; - pr_debug("%s: can't find DMA controller %s\n", __func__, - dma_spec->np->full_name); + pr_debug("%s: can't find DMA controller %pOF\n", __func__, + dma_spec->np); return NULL; } @@ -255,8 +255,8 @@ struct dma_chan *of_dma_request_slave_channel(struct device_node *np, count = of_property_count_strings(np, "dma-names"); if (count < 0) { - pr_err("%s: dma-names property of node '%s' missing or empty\n", - __func__, np->full_name); + pr_err("%s: dma-names property of node '%pOF' missing or empty\n", + __func__, np); return ERR_PTR(-ENODEV); } diff --git a/drivers/dma/omap-dma.c b/drivers/dma/omap-dma.c index daf479cce691..8c1665c8fe33 100644 --- a/drivers/dma/omap-dma.c +++ b/drivers/dma/omap-dma.c @@ -916,12 +916,6 @@ static struct dma_async_tx_descriptor *omap_dma_prep_slave_sg( return NULL; } - /* When the port_window is used, one frame must cover the window */ - if (port_window) { - burst = port_window; - port_window_bytes = port_window * es_bytes[es]; - } - /* Now allocate and setup the descriptor. */ d = kzalloc(sizeof(*d) + sglen * sizeof(d->sg[0]), GFP_ATOMIC); if (!d) @@ -931,6 +925,21 @@ static struct dma_async_tx_descriptor *omap_dma_prep_slave_sg( d->dev_addr = dev_addr; d->es = es; + /* When the port_window is used, one frame must cover the window */ + if (port_window) { + burst = port_window; + port_window_bytes = port_window * es_bytes[es]; + + d->ei = 1; + /* + * One frame covers the port_window and by configure + * the source frame index to be -1 * (port_window - 1) + * we instruct the sDMA that after a frame is processed + * it should move back to the start of the window. + */ + d->fi = -(port_window_bytes - 1); + } + d->ccr = c->ccr | CCR_SYNC_FRAME; if (dir == DMA_DEV_TO_MEM) { d->csdp = CSDP_DST_BURST_64 | CSDP_DST_PACKED; @@ -955,14 +964,6 @@ static struct dma_async_tx_descriptor *omap_dma_prep_slave_sg( d->ccr |= CCR_SRC_AMODE_POSTINC; if (port_window) { d->ccr |= CCR_DST_AMODE_DBLIDX; - d->ei = 1; - /* - * One frame covers the port_window and by configure - * the source frame index to be -1 * (port_window - 1) - * we instruct the sDMA that after a frame is processed - * it should move back to the start of the window. - */ - d->fi = -(port_window_bytes - 1); if (port_window_bytes >= 64) d->csdp |= CSDP_DST_BURST_64; @@ -1018,16 +1019,6 @@ static struct dma_async_tx_descriptor *omap_dma_prep_slave_sg( osg->addr = sg_dma_address(sgent); osg->en = en; osg->fn = sg_dma_len(sgent) / frame_bytes; - if (port_window && dir == DMA_DEV_TO_MEM) { - osg->ei = 1; - /* - * One frame covers the port_window and by configure - * the source frame index to be -1 * (port_window - 1) - * we instruct the sDMA that after a frame is processed - * it should move back to the start of the window. - */ - osg->fi = -(port_window_bytes - 1); - } if (d->using_ll) { osg->t2_desc = dma_pool_alloc(od->desc_pool, GFP_ATOMIC, diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c index e90a7a0d760a..f122c2a7b9f0 100644 --- a/drivers/dma/pl330.c +++ b/drivers/dma/pl330.c @@ -443,7 +443,10 @@ struct dma_pl330_chan { /* For D-to-M and M-to-D channels */ int burst_sz; /* the peripheral fifo width */ int burst_len; /* the number of burst */ - dma_addr_t fifo_addr; + phys_addr_t fifo_addr; + /* DMA-mapped view of the FIFO; may differ if an IOMMU is present */ + dma_addr_t fifo_dma; + enum dma_data_direction dir; /* for cyclic capability */ bool cyclic; @@ -538,11 +541,6 @@ struct _xfer_spec { struct dma_pl330_desc *desc; }; -static inline bool _queue_empty(struct pl330_thread *thrd) -{ - return thrd->req[0].desc == NULL && thrd->req[1].desc == NULL; -} - static inline bool _queue_full(struct pl330_thread *thrd) { return thrd->req[0].desc != NULL && thrd->req[1].desc != NULL; @@ -564,23 +562,6 @@ static inline u32 get_revision(u32 periph_id) return (periph_id >> PERIPH_REV_SHIFT) & PERIPH_REV_MASK; } -static inline u32 _emit_ADDH(unsigned dry_run, u8 buf[], - enum pl330_dst da, u16 val) -{ - if (dry_run) - return SZ_DMAADDH; - - buf[0] = CMD_DMAADDH; - buf[0] |= (da << 1); - buf[1] = val; - buf[2] = val >> 8; - - PL330_DBGCMD_DUMP(SZ_DMAADDH, "\tDMAADDH %s %u\n", - da == 1 ? "DA" : "SA", val); - - return SZ_DMAADDH; -} - static inline u32 _emit_END(unsigned dry_run, u8 buf[]) { if (dry_run) @@ -738,18 +719,6 @@ static inline u32 _emit_MOV(unsigned dry_run, u8 buf[], return SZ_DMAMOV; } -static inline u32 _emit_NOP(unsigned dry_run, u8 buf[]) -{ - if (dry_run) - return SZ_DMANOP; - - buf[0] = CMD_DMANOP; - - PL330_DBGCMD_DUMP(SZ_DMANOP, "\tDMANOP\n"); - - return SZ_DMANOP; -} - static inline u32 _emit_RMB(unsigned dry_run, u8 buf[]) { if (dry_run) @@ -817,39 +786,6 @@ static inline u32 _emit_STP(unsigned dry_run, u8 buf[], return SZ_DMASTP; } -static inline u32 _emit_STZ(unsigned dry_run, u8 buf[]) -{ - if (dry_run) - return SZ_DMASTZ; - - buf[0] = CMD_DMASTZ; - - PL330_DBGCMD_DUMP(SZ_DMASTZ, "\tDMASTZ\n"); - - return SZ_DMASTZ; -} - -static inline u32 _emit_WFE(unsigned dry_run, u8 buf[], u8 ev, - unsigned invalidate) -{ - if (dry_run) - return SZ_DMAWFE; - - buf[0] = CMD_DMAWFE; - - ev &= 0x1f; - ev <<= 3; - buf[1] = ev; - - if (invalidate) - buf[1] |= (1 << 1); - - PL330_DBGCMD_DUMP(SZ_DMAWFE, "\tDMAWFE %u%s\n", - ev >> 3, invalidate ? ", I" : ""); - - return SZ_DMAWFE; -} - static inline u32 _emit_WFP(unsigned dry_run, u8 buf[], enum pl330_cond cond, u8 peri) { @@ -2120,11 +2056,60 @@ static int pl330_alloc_chan_resources(struct dma_chan *chan) return 1; } +/* + * We need the data direction between the DMAC (the dma-mapping "device") and + * the FIFO (the dmaengine "dev"), from the FIFO's point of view. Confusing! + */ +static enum dma_data_direction +pl330_dma_slave_map_dir(enum dma_transfer_direction dir) +{ + switch (dir) { + case DMA_MEM_TO_DEV: + return DMA_FROM_DEVICE; + case DMA_DEV_TO_MEM: + return DMA_TO_DEVICE; + case DMA_DEV_TO_DEV: + return DMA_BIDIRECTIONAL; + default: + return DMA_NONE; + } +} + +static void pl330_unprep_slave_fifo(struct dma_pl330_chan *pch) +{ + if (pch->dir != DMA_NONE) + dma_unmap_resource(pch->chan.device->dev, pch->fifo_dma, + 1 << pch->burst_sz, pch->dir, 0); + pch->dir = DMA_NONE; +} + + +static bool pl330_prep_slave_fifo(struct dma_pl330_chan *pch, + enum dma_transfer_direction dir) +{ + struct device *dev = pch->chan.device->dev; + enum dma_data_direction dma_dir = pl330_dma_slave_map_dir(dir); + + /* Already mapped for this config? */ + if (pch->dir == dma_dir) + return true; + + pl330_unprep_slave_fifo(pch); + pch->fifo_dma = dma_map_resource(dev, pch->fifo_addr, + 1 << pch->burst_sz, dma_dir, 0); + if (dma_mapping_error(dev, pch->fifo_dma)) + return false; + + pch->dir = dma_dir; + return true; +} + static int pl330_config(struct dma_chan *chan, struct dma_slave_config *slave_config) { struct dma_pl330_chan *pch = to_pchan(chan); + pl330_unprep_slave_fifo(pch); if (slave_config->direction == DMA_MEM_TO_DEV) { if (slave_config->dst_addr) pch->fifo_addr = slave_config->dst_addr; @@ -2235,6 +2220,7 @@ static void pl330_free_chan_resources(struct dma_chan *chan) spin_unlock_irqrestore(&pl330->lock, flags); pm_runtime_mark_last_busy(pch->dmac->ddma.dev); pm_runtime_put_autosuspend(pch->dmac->ddma.dev); + pl330_unprep_slave_fifo(pch); } static int pl330_get_current_xferred_count(struct dma_pl330_chan *pch, @@ -2564,6 +2550,9 @@ static struct dma_async_tx_descriptor *pl330_prep_dma_cyclic( return NULL; } + if (!pl330_prep_slave_fifo(pch, direction)) + return NULL; + for (i = 0; i < len / period_len; i++) { desc = pl330_get_desc(pch); if (!desc) { @@ -2593,12 +2582,12 @@ static struct dma_async_tx_descriptor *pl330_prep_dma_cyclic( desc->rqcfg.src_inc = 1; desc->rqcfg.dst_inc = 0; src = dma_addr; - dst = pch->fifo_addr; + dst = pch->fifo_dma; break; case DMA_DEV_TO_MEM: desc->rqcfg.src_inc = 0; desc->rqcfg.dst_inc = 1; - src = pch->fifo_addr; + src = pch->fifo_dma; dst = dma_addr; break; default: @@ -2711,12 +2700,12 @@ pl330_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, struct dma_pl330_chan *pch = to_pchan(chan); struct scatterlist *sg; int i; - dma_addr_t addr; if (unlikely(!pch || !sgl || !sg_len)) return NULL; - addr = pch->fifo_addr; + if (!pl330_prep_slave_fifo(pch, direction)) + return NULL; first = NULL; @@ -2742,13 +2731,13 @@ pl330_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, if (direction == DMA_MEM_TO_DEV) { desc->rqcfg.src_inc = 1; desc->rqcfg.dst_inc = 0; - fill_px(&desc->px, - addr, sg_dma_address(sg), sg_dma_len(sg)); + fill_px(&desc->px, pch->fifo_dma, sg_dma_address(sg), + sg_dma_len(sg)); } else { desc->rqcfg.src_inc = 0; desc->rqcfg.dst_inc = 1; - fill_px(&desc->px, - sg_dma_address(sg), addr, sg_dma_len(sg)); + fill_px(&desc->px, sg_dma_address(sg), pch->fifo_dma, + sg_dma_len(sg)); } desc->rqcfg.brst_size = pch->burst_sz; @@ -2906,6 +2895,7 @@ pl330_probe(struct amba_device *adev, const struct amba_id *id) pch->thread = NULL; pch->chan.device = pd; pch->dmac = pl330; + pch->dir = DMA_NONE; /* Add the channel to the DMAC list */ list_add_tail(&pch->chan.device_node, &pd->channels); @@ -3033,7 +3023,7 @@ static int pl330_remove(struct amba_device *adev) return 0; } -static struct amba_id pl330_ids[] = { +static const struct amba_id pl330_ids[] = { { .id = 0x00041330, .mask = 0x000fffff, diff --git a/drivers/dma/ppc4xx/adma.c b/drivers/dma/ppc4xx/adma.c index b1535b1fe95c..4cf0d4d0cecf 100644 --- a/drivers/dma/ppc4xx/adma.c +++ b/drivers/dma/ppc4xx/adma.c @@ -4040,9 +4040,9 @@ static int ppc440spe_adma_probe(struct platform_device *ofdev) /* it is DMA0 or DMA1 */ idx = of_get_property(np, "cell-index", &len); if (!idx || (len != sizeof(u32))) { - dev_err(&ofdev->dev, "Device node %s has missing " + dev_err(&ofdev->dev, "Device node %pOF has missing " "or invalid cell-index property\n", - np->full_name); + np); return -EINVAL; } id = *idx; @@ -4307,7 +4307,7 @@ static int ppc440spe_adma_remove(struct platform_device *ofdev) * "poly" allows setting/checking used polynomial (for PPC440SPe only). */ -static ssize_t show_ppc440spe_devices(struct device_driver *dev, char *buf) +static ssize_t devices_show(struct device_driver *dev, char *buf) { ssize_t size = 0; int i; @@ -4321,16 +4321,17 @@ static ssize_t show_ppc440spe_devices(struct device_driver *dev, char *buf) } return size; } +static DRIVER_ATTR_RO(devices); -static ssize_t show_ppc440spe_r6enable(struct device_driver *dev, char *buf) +static ssize_t enable_show(struct device_driver *dev, char *buf) { return snprintf(buf, PAGE_SIZE, "PPC440SP(e) RAID-6 capabilities are %sABLED.\n", ppc440spe_r6_enabled ? "EN" : "DIS"); } -static ssize_t store_ppc440spe_r6enable(struct device_driver *dev, - const char *buf, size_t count) +static ssize_t enable_store(struct device_driver *dev, const char *buf, + size_t count) { unsigned long val; @@ -4357,8 +4358,9 @@ static ssize_t store_ppc440spe_r6enable(struct device_driver *dev, } return count; } +static DRIVER_ATTR_RW(enable); -static ssize_t show_ppc440spe_r6poly(struct device_driver *dev, char *buf) +static ssize_t poly_store(struct device_driver *dev, char *buf) { ssize_t size = 0; u32 reg; @@ -4377,8 +4379,8 @@ static ssize_t show_ppc440spe_r6poly(struct device_driver *dev, char *buf) return size; } -static ssize_t store_ppc440spe_r6poly(struct device_driver *dev, - const char *buf, size_t count) +static ssize_t poly_store(struct device_driver *dev, const char *buf, + size_t count) { unsigned long reg, val; @@ -4404,12 +4406,7 @@ static ssize_t store_ppc440spe_r6poly(struct device_driver *dev, return count; } - -static DRIVER_ATTR(devices, S_IRUGO, show_ppc440spe_devices, NULL); -static DRIVER_ATTR(enable, S_IRUGO | S_IWUSR, show_ppc440spe_r6enable, - store_ppc440spe_r6enable); -static DRIVER_ATTR(poly, S_IRUGO | S_IWUSR, show_ppc440spe_r6poly, - store_ppc440spe_r6poly); +static DRIVER_ATTR_RW(poly); /* * Common initialisation for RAID engines; allocate memory for @@ -4448,8 +4445,7 @@ static int ppc440spe_configure_raid_devices(void) dcr_base = dcr_resource_start(np, 0); dcr_len = dcr_resource_len(np, 0); if (!dcr_base && !dcr_len) { - pr_err("%s: can't get DCR registers base/len!\n", - np->full_name); + pr_err("%pOF: can't get DCR registers base/len!\n", np); of_node_put(np); iounmap(i2o_reg); return -ENODEV; @@ -4457,7 +4453,7 @@ static int ppc440spe_configure_raid_devices(void) i2o_dcr_host = dcr_map(np, dcr_base, dcr_len); if (!DCR_MAP_OK(i2o_dcr_host)) { - pr_err("%s: failed to map DCRs!\n", np->full_name); + pr_err("%pOF: failed to map DCRs!\n", np); of_node_put(np); iounmap(i2o_reg); return -ENODEV; @@ -4518,15 +4514,14 @@ static int ppc440spe_configure_raid_devices(void) dcr_base = dcr_resource_start(np, 0); dcr_len = dcr_resource_len(np, 0); if (!dcr_base && !dcr_len) { - pr_err("%s: can't get DCR registers base/len!\n", - np->full_name); + pr_err("%pOF: can't get DCR registers base/len!\n", np); ret = -ENODEV; goto out_mq; } ppc440spe_mq_dcr_host = dcr_map(np, dcr_base, dcr_len); if (!DCR_MAP_OK(ppc440spe_mq_dcr_host)) { - pr_err("%s: failed to map DCRs!\n", np->full_name); + pr_err("%pOF: failed to map DCRs!\n", np); ret = -ENODEV; goto out_mq; } diff --git a/drivers/dma/qcom/Makefile b/drivers/dma/qcom/Makefile index 4bfc38b45220..1ae92da88b0c 100644 --- a/drivers/dma/qcom/Makefile +++ b/drivers/dma/qcom/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_QCOM_BAM_DMA) += bam_dma.o obj-$(CONFIG_QCOM_HIDMA_MGMT) += hdma_mgmt.o hdma_mgmt-objs := hidma_mgmt.o hidma_mgmt_sys.o diff --git a/drivers/dma/qcom/bam_dma.c b/drivers/dma/qcom/bam_dma.c index 03c4eb3fd314..6d89fb6a6a92 100644 --- a/drivers/dma/qcom/bam_dma.c +++ b/drivers/dma/qcom/bam_dma.c @@ -65,6 +65,7 @@ struct bam_desc_hw { #define DESC_FLAG_EOT BIT(14) #define DESC_FLAG_EOB BIT(13) #define DESC_FLAG_NWD BIT(12) +#define DESC_FLAG_CMD BIT(11) struct bam_async_desc { struct virt_dma_desc vd; @@ -645,6 +646,9 @@ static struct dma_async_tx_descriptor *bam_prep_slave_sg(struct dma_chan *chan, unsigned int curr_offset = 0; do { + if (flags & DMA_PREP_CMD) + desc->flags |= cpu_to_le16(DESC_FLAG_CMD); + desc->addr = cpu_to_le32(sg_dma_address(sg) + curr_offset); @@ -960,7 +964,7 @@ static void bam_start_dma(struct bam_chan *bchan) /* set any special flags on the last descriptor */ if (async_desc->num_desc == async_desc->xfer_len) - desc[async_desc->xfer_len - 1].flags = + desc[async_desc->xfer_len - 1].flags |= cpu_to_le16(async_desc->flags); else desc[async_desc->xfer_len - 1].flags |= diff --git a/drivers/dma/qcom/hidma.c b/drivers/dma/qcom/hidma.c index 5072a7d306d4..e3669850aef4 100644 --- a/drivers/dma/qcom/hidma.c +++ b/drivers/dma/qcom/hidma.c @@ -1,7 +1,7 @@ /* * Qualcomm Technologies HIDMA DMA engine interface * - * Copyright (c) 2015-2016, The Linux Foundation. All rights reserved. + * Copyright (c) 2015-2017, The Linux Foundation. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 and @@ -210,6 +210,7 @@ static int hidma_chan_init(struct hidma_dev *dmadev, u32 dma_sig) INIT_LIST_HEAD(&mchan->prepared); INIT_LIST_HEAD(&mchan->active); INIT_LIST_HEAD(&mchan->completed); + INIT_LIST_HEAD(&mchan->queued); spin_lock_init(&mchan->lock); list_add_tail(&mchan->chan.device_node, &ddev->channels); @@ -230,9 +231,15 @@ static void hidma_issue_pending(struct dma_chan *dmach) struct hidma_chan *mchan = to_hidma_chan(dmach); struct hidma_dev *dmadev = mchan->dmadev; unsigned long flags; + struct hidma_desc *qdesc, *next; int status; spin_lock_irqsave(&mchan->lock, flags); + list_for_each_entry_safe(qdesc, next, &mchan->queued, node) { + hidma_ll_queue_request(dmadev->lldev, qdesc->tre_ch); + list_move_tail(&qdesc->node, &mchan->active); + } + if (!mchan->running) { struct hidma_desc *desc = list_first_entry(&mchan->active, struct hidma_desc, @@ -315,17 +322,18 @@ static dma_cookie_t hidma_tx_submit(struct dma_async_tx_descriptor *txd) pm_runtime_put_autosuspend(dmadev->ddev.dev); return -ENODEV; } + pm_runtime_mark_last_busy(dmadev->ddev.dev); + pm_runtime_put_autosuspend(dmadev->ddev.dev); mdesc = container_of(txd, struct hidma_desc, desc); spin_lock_irqsave(&mchan->lock, irqflags); - /* Move descriptor to active */ - list_move_tail(&mdesc->node, &mchan->active); + /* Move descriptor to queued */ + list_move_tail(&mdesc->node, &mchan->queued); /* Update cookie */ cookie = dma_cookie_assign(txd); - hidma_ll_queue_request(dmadev->lldev, mdesc->tre_ch); spin_unlock_irqrestore(&mchan->lock, irqflags); return cookie; @@ -403,7 +411,40 @@ hidma_prep_dma_memcpy(struct dma_chan *dmach, dma_addr_t dest, dma_addr_t src, return NULL; hidma_ll_set_transfer_params(mdma->lldev, mdesc->tre_ch, - src, dest, len, flags); + src, dest, len, flags, + HIDMA_TRE_MEMCPY); + + /* Place descriptor in prepared list */ + spin_lock_irqsave(&mchan->lock, irqflags); + list_add_tail(&mdesc->node, &mchan->prepared); + spin_unlock_irqrestore(&mchan->lock, irqflags); + + return &mdesc->desc; +} + +static struct dma_async_tx_descriptor * +hidma_prep_dma_memset(struct dma_chan *dmach, dma_addr_t dest, int value, + size_t len, unsigned long flags) +{ + struct hidma_chan *mchan = to_hidma_chan(dmach); + struct hidma_desc *mdesc = NULL; + struct hidma_dev *mdma = mchan->dmadev; + unsigned long irqflags; + + /* Get free descriptor */ + spin_lock_irqsave(&mchan->lock, irqflags); + if (!list_empty(&mchan->free)) { + mdesc = list_first_entry(&mchan->free, struct hidma_desc, node); + list_del(&mdesc->node); + } + spin_unlock_irqrestore(&mchan->lock, irqflags); + + if (!mdesc) + return NULL; + + hidma_ll_set_transfer_params(mdma->lldev, mdesc->tre_ch, + value, dest, len, flags, + HIDMA_TRE_MEMSET); /* Place descriptor in prepared list */ spin_lock_irqsave(&mchan->lock, irqflags); @@ -431,6 +472,7 @@ static int hidma_terminate_channel(struct dma_chan *chan) list_splice_init(&mchan->active, &list); list_splice_init(&mchan->prepared, &list); list_splice_init(&mchan->completed, &list); + list_splice_init(&mchan->queued, &list); spin_unlock_irqrestore(&mchan->lock, irqflags); /* this suspends the existing transfer */ @@ -767,6 +809,7 @@ static int hidma_probe(struct platform_device *pdev) pm_runtime_get_sync(dmadev->ddev.dev); dma_cap_set(DMA_MEMCPY, dmadev->ddev.cap_mask); + dma_cap_set(DMA_MEMSET, dmadev->ddev.cap_mask); if (WARN_ON(!pdev->dev.dma_mask)) { rc = -ENXIO; goto dmafree; @@ -777,6 +820,7 @@ static int hidma_probe(struct platform_device *pdev) dmadev->dev_trca = trca; dmadev->trca_resource = trca_resource; dmadev->ddev.device_prep_dma_memcpy = hidma_prep_dma_memcpy; + dmadev->ddev.device_prep_dma_memset = hidma_prep_dma_memset; dmadev->ddev.device_alloc_chan_resources = hidma_alloc_chan_resources; dmadev->ddev.device_free_chan_resources = hidma_free_chan_resources; dmadev->ddev.device_tx_status = hidma_tx_status; @@ -795,8 +839,11 @@ static int hidma_probe(struct platform_device *pdev) device_property_read_u32(&pdev->dev, "desc-count", &dmadev->nr_descriptors); - if (!dmadev->nr_descriptors && nr_desc_prm) + if (nr_desc_prm) { + dev_info(&pdev->dev, "overriding number of descriptors as %d\n", + nr_desc_prm); dmadev->nr_descriptors = nr_desc_prm; + } if (!dmadev->nr_descriptors) dmadev->nr_descriptors = HIDMA_NR_DEFAULT_DESC; diff --git a/drivers/dma/qcom/hidma.h b/drivers/dma/qcom/hidma.h index c7d014235c32..5f9966e82c0b 100644 --- a/drivers/dma/qcom/hidma.h +++ b/drivers/dma/qcom/hidma.h @@ -28,6 +28,11 @@ #define HIDMA_TRE_DEST_LOW_IDX 4 #define HIDMA_TRE_DEST_HI_IDX 5 +enum tre_type { + HIDMA_TRE_MEMCPY = 3, + HIDMA_TRE_MEMSET = 4, +}; + struct hidma_tre { atomic_t allocated; /* if this channel is allocated */ bool queued; /* flag whether this is pending */ @@ -104,6 +109,7 @@ struct hidma_chan { struct dma_chan chan; struct list_head free; struct list_head prepared; + struct list_head queued; struct list_head active; struct list_head completed; @@ -149,7 +155,7 @@ void hidma_ll_start(struct hidma_lldev *llhndl); int hidma_ll_disable(struct hidma_lldev *lldev); int hidma_ll_enable(struct hidma_lldev *llhndl); void hidma_ll_set_transfer_params(struct hidma_lldev *llhndl, u32 tre_ch, - dma_addr_t src, dma_addr_t dest, u32 len, u32 flags); + dma_addr_t src, dma_addr_t dest, u32 len, u32 flags, u32 txntype); void hidma_ll_setup_irq(struct hidma_lldev *lldev, bool msi); int hidma_ll_setup(struct hidma_lldev *lldev); struct hidma_lldev *hidma_ll_init(struct device *dev, u32 max_channels, diff --git a/drivers/dma/qcom/hidma_ll.c b/drivers/dma/qcom/hidma_ll.c index 1530a661518d..4999e266b2de 100644 --- a/drivers/dma/qcom/hidma_ll.c +++ b/drivers/dma/qcom/hidma_ll.c @@ -105,10 +105,6 @@ enum ch_state { HIDMA_CH_STOPPED = 4, }; -enum tre_type { - HIDMA_TRE_MEMCPY = 3, -}; - enum err_code { HIDMA_EVRE_STATUS_COMPLETE = 1, HIDMA_EVRE_STATUS_ERROR = 4, @@ -174,8 +170,7 @@ int hidma_ll_request(struct hidma_lldev *lldev, u32 sig, const char *dev_name, tre->err_info = 0; tre->lldev = lldev; tre_local = &tre->tre_local[0]; - tre_local[HIDMA_TRE_CFG_IDX] = HIDMA_TRE_MEMCPY; - tre_local[HIDMA_TRE_CFG_IDX] |= (lldev->chidx & 0xFF) << 8; + tre_local[HIDMA_TRE_CFG_IDX] = (lldev->chidx & 0xFF) << 8; tre_local[HIDMA_TRE_CFG_IDX] |= BIT(16); /* set IEOB */ *tre_ch = i; if (callback) @@ -607,7 +602,7 @@ int hidma_ll_disable(struct hidma_lldev *lldev) void hidma_ll_set_transfer_params(struct hidma_lldev *lldev, u32 tre_ch, dma_addr_t src, dma_addr_t dest, u32 len, - u32 flags) + u32 flags, u32 txntype) { struct hidma_tre *tre; u32 *tre_local; @@ -626,6 +621,8 @@ void hidma_ll_set_transfer_params(struct hidma_lldev *lldev, u32 tre_ch, } tre_local = &tre->tre_local[0]; + tre_local[HIDMA_TRE_CFG_IDX] &= ~GENMASK(7, 0); + tre_local[HIDMA_TRE_CFG_IDX] |= txntype; tre_local[HIDMA_TRE_LEN_IDX] = len; tre_local[HIDMA_TRE_SRC_LOW_IDX] = lower_32_bits(src); tre_local[HIDMA_TRE_SRC_HI_IDX] = upper_32_bits(src); diff --git a/drivers/dma/qcom/hidma_mgmt.c b/drivers/dma/qcom/hidma_mgmt.c index f847d32cc4b5..7335e2eb9b72 100644 --- a/drivers/dma/qcom/hidma_mgmt.c +++ b/drivers/dma/qcom/hidma_mgmt.c @@ -1,7 +1,7 @@ /* * Qualcomm Technologies HIDMA DMA engine Management interface * - * Copyright (c) 2015-2016, The Linux Foundation. All rights reserved. + * Copyright (c) 2015-2017, The Linux Foundation. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 and @@ -28,7 +28,7 @@ #include "hidma_mgmt.h" -#define HIDMA_QOS_N_OFFSET 0x300 +#define HIDMA_QOS_N_OFFSET 0x700 #define HIDMA_CFG_OFFSET 0x400 #define HIDMA_MAX_BUS_REQ_LEN_OFFSET 0x41C #define HIDMA_MAX_XACTIONS_OFFSET 0x420 @@ -49,6 +49,26 @@ #define HIDMA_AUTOSUSPEND_TIMEOUT 2000 #define HIDMA_MAX_CHANNEL_WEIGHT 15 +static unsigned int max_write_request; +module_param(max_write_request, uint, 0644); +MODULE_PARM_DESC(max_write_request, + "maximum write burst (default: ACPI/DT value)"); + +static unsigned int max_read_request; +module_param(max_read_request, uint, 0644); +MODULE_PARM_DESC(max_read_request, + "maximum read burst (default: ACPI/DT value)"); + +static unsigned int max_wr_xactions; +module_param(max_wr_xactions, uint, 0644); +MODULE_PARM_DESC(max_wr_xactions, + "maximum number of write transactions (default: ACPI/DT value)"); + +static unsigned int max_rd_xactions; +module_param(max_rd_xactions, uint, 0644); +MODULE_PARM_DESC(max_rd_xactions, + "maximum number of read transactions (default: ACPI/DT value)"); + int hidma_mgmt_setup(struct hidma_mgmt_dev *mgmtdev) { unsigned int i; @@ -207,12 +227,27 @@ static int hidma_mgmt_probe(struct platform_device *pdev) goto out; } + if (max_write_request && + (max_write_request != mgmtdev->max_write_request)) { + dev_info(&pdev->dev, "overriding max-write-burst-bytes: %d\n", + max_write_request); + mgmtdev->max_write_request = max_write_request; + } else + max_write_request = mgmtdev->max_write_request; + rc = device_property_read_u32(&pdev->dev, "max-read-burst-bytes", &mgmtdev->max_read_request); if (rc) { dev_err(&pdev->dev, "max-read-burst-bytes missing\n"); goto out; } + if (max_read_request && + (max_read_request != mgmtdev->max_read_request)) { + dev_info(&pdev->dev, "overriding max-read-burst-bytes: %d\n", + max_read_request); + mgmtdev->max_read_request = max_read_request; + } else + max_read_request = mgmtdev->max_read_request; rc = device_property_read_u32(&pdev->dev, "max-write-transactions", &mgmtdev->max_wr_xactions); @@ -220,6 +255,13 @@ static int hidma_mgmt_probe(struct platform_device *pdev) dev_err(&pdev->dev, "max-write-transactions missing\n"); goto out; } + if (max_wr_xactions && + (max_wr_xactions != mgmtdev->max_wr_xactions)) { + dev_info(&pdev->dev, "overriding max-write-transactions: %d\n", + max_wr_xactions); + mgmtdev->max_wr_xactions = max_wr_xactions; + } else + max_wr_xactions = mgmtdev->max_wr_xactions; rc = device_property_read_u32(&pdev->dev, "max-read-transactions", &mgmtdev->max_rd_xactions); @@ -227,6 +269,13 @@ static int hidma_mgmt_probe(struct platform_device *pdev) dev_err(&pdev->dev, "max-read-transactions missing\n"); goto out; } + if (max_rd_xactions && + (max_rd_xactions != mgmtdev->max_rd_xactions)) { + dev_info(&pdev->dev, "overriding max-read-transactions: %d\n", + max_rd_xactions); + mgmtdev->max_rd_xactions = max_rd_xactions; + } else + max_rd_xactions = mgmtdev->max_rd_xactions; mgmtdev->priority = devm_kcalloc(&pdev->dev, mgmtdev->dma_channels, @@ -309,7 +358,7 @@ static int __init hidma_mgmt_of_populate_channels(struct device_node *np) struct platform_device_info pdevinfo; struct of_phandle_args out_irq; struct device_node *child; - struct resource *res; + struct resource *res = NULL; const __be32 *cell; int ret = 0, size, i, num; u64 addr, addr_size; diff --git a/drivers/dma/sh/Makefile b/drivers/dma/sh/Makefile index f1e2fd64f279..7d7c9491ade1 100644 --- a/drivers/dma/sh/Makefile +++ b/drivers/dma/sh/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 # # DMA Engine Helpers # diff --git a/drivers/dma/sh/rcar-dmac.c b/drivers/dma/sh/rcar-dmac.c index bd261c9e9664..2b2c7db3e480 100644 --- a/drivers/dma/sh/rcar-dmac.c +++ b/drivers/dma/sh/rcar-dmac.c @@ -144,6 +144,7 @@ struct rcar_dmac_chan_map { * @chan: base DMA channel object * @iomem: channel I/O memory base * @index: index of this channel in the controller + * @irq: channel IRQ * @src: slave memory address and size on the source side * @dst: slave memory address and size on the destination side * @mid_rid: hardware MID/RID for the DMA client using this channel @@ -161,6 +162,7 @@ struct rcar_dmac_chan { struct dma_chan chan; void __iomem *iomem; unsigned int index; + int irq; struct rcar_dmac_chan_slave src; struct rcar_dmac_chan_slave dst; @@ -1008,7 +1010,11 @@ static void rcar_dmac_free_chan_resources(struct dma_chan *chan) rcar_dmac_chan_halt(rchan); spin_unlock_irq(&rchan->lock); - /* Now no new interrupts will occur */ + /* + * Now no new interrupts will occur, but one might already be + * running. Wait for it to finish before freeing resources. + */ + synchronize_irq(rchan->irq); if (rchan->mid_rid >= 0) { /* The caller is holding dma_list_mutex */ @@ -1366,6 +1372,13 @@ done: spin_unlock_irqrestore(&rchan->lock, flags); } +static void rcar_dmac_device_synchronize(struct dma_chan *chan) +{ + struct rcar_dmac_chan *rchan = to_rcar_dmac_chan(chan); + + synchronize_irq(rchan->irq); +} + /* ----------------------------------------------------------------------------- * IRQ handling */ @@ -1650,7 +1663,6 @@ static int rcar_dmac_chan_probe(struct rcar_dmac *dmac, struct dma_chan *chan = &rchan->chan; char pdev_irqname[5]; char *irqname; - int irq; int ret; rchan->index = index; @@ -1667,8 +1679,8 @@ static int rcar_dmac_chan_probe(struct rcar_dmac *dmac, /* Request the channel interrupt. */ sprintf(pdev_irqname, "ch%u", index); - irq = platform_get_irq_byname(pdev, pdev_irqname); - if (irq < 0) { + rchan->irq = platform_get_irq_byname(pdev, pdev_irqname); + if (rchan->irq < 0) { dev_err(dmac->dev, "no IRQ specified for channel %u\n", index); return -ENODEV; } @@ -1678,14 +1690,6 @@ static int rcar_dmac_chan_probe(struct rcar_dmac *dmac, if (!irqname) return -ENOMEM; - ret = devm_request_threaded_irq(dmac->dev, irq, rcar_dmac_isr_channel, - rcar_dmac_isr_channel_thread, 0, - irqname, rchan); - if (ret) { - dev_err(dmac->dev, "failed to request IRQ %u (%d)\n", irq, ret); - return ret; - } - /* * Initialize the DMA engine channel and add it to the DMA engine * channels list. @@ -1695,6 +1699,16 @@ static int rcar_dmac_chan_probe(struct rcar_dmac *dmac, list_add_tail(&chan->device_node, &dmac->engine.channels); + ret = devm_request_threaded_irq(dmac->dev, rchan->irq, + rcar_dmac_isr_channel, + rcar_dmac_isr_channel_thread, 0, + irqname, rchan); + if (ret) { + dev_err(dmac->dev, "failed to request IRQ %u (%d)\n", + rchan->irq, ret); + return ret; + } + return 0; } @@ -1780,14 +1794,6 @@ static int rcar_dmac_probe(struct platform_device *pdev) if (!irqname) return -ENOMEM; - ret = devm_request_irq(&pdev->dev, irq, rcar_dmac_isr_error, 0, - irqname, dmac); - if (ret) { - dev_err(&pdev->dev, "failed to request IRQ %u (%d)\n", - irq, ret); - return ret; - } - /* Enable runtime PM and initialize the device. */ pm_runtime_enable(&pdev->dev); ret = pm_runtime_get_sync(&pdev->dev); @@ -1804,8 +1810,32 @@ static int rcar_dmac_probe(struct platform_device *pdev) goto error; } - /* Initialize the channels. */ - INIT_LIST_HEAD(&dmac->engine.channels); + /* Initialize engine */ + engine = &dmac->engine; + + dma_cap_set(DMA_MEMCPY, engine->cap_mask); + dma_cap_set(DMA_SLAVE, engine->cap_mask); + + engine->dev = &pdev->dev; + engine->copy_align = ilog2(RCAR_DMAC_MEMCPY_XFER_SIZE); + + engine->src_addr_widths = widths; + engine->dst_addr_widths = widths; + engine->directions = BIT(DMA_MEM_TO_DEV) | BIT(DMA_DEV_TO_MEM); + engine->residue_granularity = DMA_RESIDUE_GRANULARITY_BURST; + + engine->device_alloc_chan_resources = rcar_dmac_alloc_chan_resources; + engine->device_free_chan_resources = rcar_dmac_free_chan_resources; + engine->device_prep_dma_memcpy = rcar_dmac_prep_dma_memcpy; + engine->device_prep_slave_sg = rcar_dmac_prep_slave_sg; + engine->device_prep_dma_cyclic = rcar_dmac_prep_dma_cyclic; + engine->device_config = rcar_dmac_device_config; + engine->device_terminate_all = rcar_dmac_chan_terminate_all; + engine->device_tx_status = rcar_dmac_tx_status; + engine->device_issue_pending = rcar_dmac_issue_pending; + engine->device_synchronize = rcar_dmac_device_synchronize; + + INIT_LIST_HEAD(&engine->channels); for (i = 0; i < dmac->n_channels; ++i) { ret = rcar_dmac_chan_probe(dmac, &dmac->channels[i], @@ -1814,6 +1844,14 @@ static int rcar_dmac_probe(struct platform_device *pdev) goto error; } + ret = devm_request_irq(&pdev->dev, irq, rcar_dmac_isr_error, 0, + irqname, dmac); + if (ret) { + dev_err(&pdev->dev, "failed to request IRQ %u (%d)\n", + irq, ret); + return ret; + } + /* Register the DMAC as a DMA provider for DT. */ ret = of_dma_controller_register(pdev->dev.of_node, rcar_dmac_of_xlate, NULL); @@ -1825,28 +1863,6 @@ static int rcar_dmac_probe(struct platform_device *pdev) * * Default transfer size of 32 bytes requires 32-byte alignment. */ - engine = &dmac->engine; - dma_cap_set(DMA_MEMCPY, engine->cap_mask); - dma_cap_set(DMA_SLAVE, engine->cap_mask); - - engine->dev = &pdev->dev; - engine->copy_align = ilog2(RCAR_DMAC_MEMCPY_XFER_SIZE); - - engine->src_addr_widths = widths; - engine->dst_addr_widths = widths; - engine->directions = BIT(DMA_MEM_TO_DEV) | BIT(DMA_DEV_TO_MEM); - engine->residue_granularity = DMA_RESIDUE_GRANULARITY_BURST; - - engine->device_alloc_chan_resources = rcar_dmac_alloc_chan_resources; - engine->device_free_chan_resources = rcar_dmac_free_chan_resources; - engine->device_prep_dma_memcpy = rcar_dmac_prep_dma_memcpy; - engine->device_prep_slave_sg = rcar_dmac_prep_slave_sg; - engine->device_prep_dma_cyclic = rcar_dmac_prep_dma_cyclic; - engine->device_config = rcar_dmac_device_config; - engine->device_terminate_all = rcar_dmac_chan_terminate_all; - engine->device_tx_status = rcar_dmac_tx_status; - engine->device_issue_pending = rcar_dmac_issue_pending; - ret = dma_async_device_register(engine); if (ret < 0) goto error; diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c index a6620b671d1d..c2b089af0420 100644 --- a/drivers/dma/ste_dma40.c +++ b/drivers/dma/ste_dma40.c @@ -79,7 +79,7 @@ static int dma40_memcpy_channels[] = { }; /* Default configuration for physcial memcpy */ -static struct stedma40_chan_cfg dma40_memcpy_conf_phy = { +static const struct stedma40_chan_cfg dma40_memcpy_conf_phy = { .mode = STEDMA40_MODE_PHYSICAL, .dir = DMA_MEM_TO_MEM, @@ -93,7 +93,7 @@ static struct stedma40_chan_cfg dma40_memcpy_conf_phy = { }; /* Default configuration for logical memcpy */ -static struct stedma40_chan_cfg dma40_memcpy_conf_log = { +static const struct stedma40_chan_cfg dma40_memcpy_conf_log = { .mode = STEDMA40_MODE_LOGICAL, .dir = DMA_MEM_TO_MEM, @@ -2485,19 +2485,6 @@ static struct dma_async_tx_descriptor *d40_prep_memcpy(struct dma_chan *chan, } static struct dma_async_tx_descriptor * -d40_prep_memcpy_sg(struct dma_chan *chan, - struct scatterlist *dst_sg, unsigned int dst_nents, - struct scatterlist *src_sg, unsigned int src_nents, - unsigned long dma_flags) -{ - if (dst_nents != src_nents) - return NULL; - - return d40_prep_sg(chan, src_sg, dst_sg, src_nents, - DMA_MEM_TO_MEM, dma_flags); -} - -static struct dma_async_tx_descriptor * d40_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, unsigned int sg_len, enum dma_transfer_direction direction, unsigned long dma_flags, void *context) @@ -2528,10 +2515,7 @@ dma40_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t dma_addr, dma_addr += period_len; } - sg[periods].offset = 0; - sg_dma_len(&sg[periods]) = 0; - sg[periods].page_link = - ((unsigned long)sg | 0x01) & ~0x02; + sg_chain(sg, periods + 1, sg); txd = d40_prep_sg(chan, sg, sg, periods, direction, DMA_PREP_INTERRUPT); @@ -2824,9 +2808,6 @@ static void d40_ops_init(struct d40_base *base, struct dma_device *dev) dev->copy_align = DMAENGINE_ALIGN_4_BYTES; } - if (dma_has_cap(DMA_SG, dev->cap_mask)) - dev->device_prep_dma_sg = d40_prep_memcpy_sg; - if (dma_has_cap(DMA_CYCLIC, dev->cap_mask)) dev->device_prep_dma_cyclic = dma40_prep_dma_cyclic; @@ -2868,7 +2849,6 @@ static int __init d40_dmaengine_init(struct d40_base *base, dma_cap_zero(base->dma_memcpy.cap_mask); dma_cap_set(DMA_MEMCPY, base->dma_memcpy.cap_mask); - dma_cap_set(DMA_SG, base->dma_memcpy.cap_mask); d40_ops_init(base, &base->dma_memcpy); @@ -2886,7 +2866,6 @@ static int __init d40_dmaengine_init(struct d40_base *base, dma_cap_zero(base->dma_both.cap_mask); dma_cap_set(DMA_SLAVE, base->dma_both.cap_mask); dma_cap_set(DMA_MEMCPY, base->dma_both.cap_mask); - dma_cap_set(DMA_SG, base->dma_both.cap_mask); dma_cap_set(DMA_CYCLIC, base->dma_slave.cap_mask); d40_ops_init(base, &base->dma_both); diff --git a/drivers/dma/sun6i-dma.c b/drivers/dma/sun6i-dma.c index a2358780ab2c..bcd496edc70f 100644 --- a/drivers/dma/sun6i-dma.c +++ b/drivers/dma/sun6i-dma.c @@ -101,6 +101,17 @@ struct sun6i_dma_config { u32 nr_max_channels; u32 nr_max_requests; u32 nr_max_vchans; + /* + * In the datasheets/user manuals of newer Allwinner SoCs, a special + * bit (bit 2 at register 0x20) is present. + * It's named "DMA MCLK interface circuit auto gating bit" in the + * documents, and the footnote of this register says that this bit + * should be set up when initializing the DMA controller. + * Allwinner A23/A33 user manuals do not have this bit documented, + * however these SoCs really have and need this bit, as seen in the + * BSP kernel source code. + */ + bool gate_needed; }; /* @@ -1009,6 +1020,7 @@ static struct sun6i_dma_config sun8i_a23_dma_cfg = { .nr_max_channels = 8, .nr_max_requests = 24, .nr_max_vchans = 37, + .gate_needed = true, }; static struct sun6i_dma_config sun8i_a83t_dma_cfg = { @@ -1028,11 +1040,24 @@ static struct sun6i_dma_config sun8i_h3_dma_cfg = { .nr_max_vchans = 34, }; +/* + * The V3s have only 8 physical channels, a maximum DRQ port id of 23, + * and a total of 24 usable source and destination endpoints. + */ + +static struct sun6i_dma_config sun8i_v3s_dma_cfg = { + .nr_max_channels = 8, + .nr_max_requests = 23, + .nr_max_vchans = 24, + .gate_needed = true, +}; + static const struct of_device_id sun6i_dma_match[] = { { .compatible = "allwinner,sun6i-a31-dma", .data = &sun6i_a31_dma_cfg }, { .compatible = "allwinner,sun8i-a23-dma", .data = &sun8i_a23_dma_cfg }, { .compatible = "allwinner,sun8i-a83t-dma", .data = &sun8i_a83t_dma_cfg }, { .compatible = "allwinner,sun8i-h3-dma", .data = &sun8i_h3_dma_cfg }, + { .compatible = "allwinner,sun8i-v3s-dma", .data = &sun8i_v3s_dma_cfg }, { /* sentinel */ } }; MODULE_DEVICE_TABLE(of, sun6i_dma_match); @@ -1174,13 +1199,7 @@ static int sun6i_dma_probe(struct platform_device *pdev) goto err_dma_unregister; } - /* - * sun8i variant requires us to toggle a dma gating register, - * as seen in Allwinner's SDK. This register is not documented - * in the A23 user manual. - */ - if (of_device_is_compatible(pdev->dev.of_node, - "allwinner,sun8i-a23-dma")) + if (sdc->cfg->gate_needed) writel(SUN8I_DMA_GATE_ENABLE, sdc->base + SUN8I_DMA_GATE); return 0; diff --git a/drivers/dma/tegra20-apb-dma.c b/drivers/dma/tegra20-apb-dma.c index 3722b9d8d9fe..b9d75a54c896 100644 --- a/drivers/dma/tegra20-apb-dma.c +++ b/drivers/dma/tegra20-apb-dma.c @@ -1494,35 +1494,7 @@ static int tegra_dma_remove(struct platform_device *pdev) static int tegra_dma_runtime_suspend(struct device *dev) { struct tegra_dma *tdma = dev_get_drvdata(dev); - - clk_disable_unprepare(tdma->dma_clk); - return 0; -} - -static int tegra_dma_runtime_resume(struct device *dev) -{ - struct tegra_dma *tdma = dev_get_drvdata(dev); - int ret; - - ret = clk_prepare_enable(tdma->dma_clk); - if (ret < 0) { - dev_err(dev, "clk_enable failed: %d\n", ret); - return ret; - } - return 0; -} - -#ifdef CONFIG_PM_SLEEP -static int tegra_dma_pm_suspend(struct device *dev) -{ - struct tegra_dma *tdma = dev_get_drvdata(dev); int i; - int ret; - - /* Enable clock before accessing register */ - ret = pm_runtime_get_sync(dev); - if (ret < 0) - return ret; tdma->reg_gen = tdma_read(tdma, TEGRA_APBDMA_GENERAL); for (i = 0; i < tdma->chip_data->nr_channels; i++) { @@ -1543,21 +1515,21 @@ static int tegra_dma_pm_suspend(struct device *dev) TEGRA_APBDMA_CHAN_WCOUNT); } - /* Disable clock */ - pm_runtime_put(dev); + clk_disable_unprepare(tdma->dma_clk); + return 0; } -static int tegra_dma_pm_resume(struct device *dev) +static int tegra_dma_runtime_resume(struct device *dev) { struct tegra_dma *tdma = dev_get_drvdata(dev); - int i; - int ret; + int i, ret; - /* Enable clock before accessing register */ - ret = pm_runtime_get_sync(dev); - if (ret < 0) + ret = clk_prepare_enable(tdma->dma_clk); + if (ret < 0) { + dev_err(dev, "clk_enable failed: %d\n", ret); return ret; + } tdma_write(tdma, TEGRA_APBDMA_GENERAL, tdma->reg_gen); tdma_write(tdma, TEGRA_APBDMA_CONTROL, 0); @@ -1582,16 +1554,14 @@ static int tegra_dma_pm_resume(struct device *dev) (ch_reg->csr & ~TEGRA_APBDMA_CSR_ENB)); } - /* Disable clock */ - pm_runtime_put(dev); return 0; } -#endif static const struct dev_pm_ops tegra_dma_dev_pm_ops = { SET_RUNTIME_PM_OPS(tegra_dma_runtime_suspend, tegra_dma_runtime_resume, NULL) - SET_SYSTEM_SLEEP_PM_OPS(tegra_dma_pm_suspend, tegra_dma_pm_resume) + SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, + pm_runtime_force_resume) }; static const struct of_device_id tegra_dma_of_match[] = { diff --git a/drivers/dma/tegra210-adma.c b/drivers/dma/tegra210-adma.c index b10cbaa82ff5..b26256f23d67 100644 --- a/drivers/dma/tegra210-adma.c +++ b/drivers/dma/tegra210-adma.c @@ -717,8 +717,8 @@ static int tegra_adma_probe(struct platform_device *pdev) tdc->chan_addr = tdma->base_addr + ADMA_CH_REG_OFFSET(i); tdc->irq = of_irq_get(pdev->dev.of_node, i); - if (tdc->irq < 0) { - ret = tdc->irq; + if (tdc->irq <= 0) { + ret = tdc->irq ?: -ENXIO; goto irq_dispose; } diff --git a/drivers/dma/ti-dma-crossbar.c b/drivers/dma/ti-dma-crossbar.c index 2403475a37cf..f1d04b70ee67 100644 --- a/drivers/dma/ti-dma-crossbar.c +++ b/drivers/dma/ti-dma-crossbar.c @@ -262,13 +262,14 @@ static void *ti_dra7_xbar_route_allocate(struct of_phandle_args *dma_spec, mutex_lock(&xbar->mutex); map->xbar_out = find_first_zero_bit(xbar->dma_inuse, xbar->dma_requests); - mutex_unlock(&xbar->mutex); if (map->xbar_out == xbar->dma_requests) { + mutex_unlock(&xbar->mutex); dev_err(&pdev->dev, "Run out of free DMA requests\n"); kfree(map); return ERR_PTR(-ENOMEM); } set_bit(map->xbar_out, xbar->dma_inuse); + mutex_unlock(&xbar->mutex); map->xbar_in = (u16)dma_spec->args[0]; @@ -308,7 +309,7 @@ static const struct of_device_id ti_dra7_master_match[] = { static inline void ti_dra7_xbar_reserve(int offset, int len, unsigned long *p) { for (; len > 0; len--) - clear_bit(offset + (len - 1), p); + set_bit(offset + (len - 1), p); } static int ti_dra7_xbar_probe(struct platform_device *pdev) diff --git a/drivers/dma/xgene-dma.c b/drivers/dma/xgene-dma.c index 8b693b712d0f..1d5988849aa6 100644 --- a/drivers/dma/xgene-dma.c +++ b/drivers/dma/xgene-dma.c @@ -391,11 +391,6 @@ static void xgene_dma_set_src_buffer(__le64 *ext8, size_t *len, *paddr += nbytes; } -static void xgene_dma_invalidate_buffer(__le64 *ext8) -{ - *ext8 |= cpu_to_le64(XGENE_DMA_INVALID_LEN_CODE); -} - static __le64 *xgene_dma_lookup_ext8(struct xgene_dma_desc_hw *desc, int idx) { switch (idx) { @@ -425,48 +420,6 @@ static void xgene_dma_init_desc(struct xgene_dma_desc_hw *desc, XGENE_DMA_DESC_HOENQ_NUM_POS); } -static void xgene_dma_prep_cpy_desc(struct xgene_dma_chan *chan, - struct xgene_dma_desc_sw *desc_sw, - dma_addr_t dst, dma_addr_t src, - size_t len) -{ - struct xgene_dma_desc_hw *desc1, *desc2; - int i; - - /* Get 1st descriptor */ - desc1 = &desc_sw->desc1; - xgene_dma_init_desc(desc1, chan->tx_ring.dst_ring_num); - - /* Set destination address */ - desc1->m2 |= cpu_to_le64(XGENE_DMA_DESC_DR_BIT); - desc1->m3 |= cpu_to_le64(dst); - - /* Set 1st source address */ - xgene_dma_set_src_buffer(&desc1->m1, &len, &src); - - if (!len) - return; - - /* - * We need to split this source buffer, - * and need to use 2nd descriptor - */ - desc2 = &desc_sw->desc2; - desc1->m0 |= cpu_to_le64(XGENE_DMA_DESC_NV_BIT); - - /* Set 2nd to 5th source address */ - for (i = 0; i < 4 && len; i++) - xgene_dma_set_src_buffer(xgene_dma_lookup_ext8(desc2, i), - &len, &src); - - /* Invalidate unused source address field */ - for (; i < 4; i++) - xgene_dma_invalidate_buffer(xgene_dma_lookup_ext8(desc2, i)); - - /* Updated flag that we have prepared 64B descriptor */ - desc_sw->flags |= XGENE_DMA_FLAG_64B_DESC; -} - static void xgene_dma_prep_xor_desc(struct xgene_dma_chan *chan, struct xgene_dma_desc_sw *desc_sw, dma_addr_t *dst, dma_addr_t *src, @@ -891,114 +844,6 @@ static void xgene_dma_free_chan_resources(struct dma_chan *dchan) chan->desc_pool = NULL; } -static struct dma_async_tx_descriptor *xgene_dma_prep_sg( - struct dma_chan *dchan, struct scatterlist *dst_sg, - u32 dst_nents, struct scatterlist *src_sg, - u32 src_nents, unsigned long flags) -{ - struct xgene_dma_desc_sw *first = NULL, *new = NULL; - struct xgene_dma_chan *chan; - size_t dst_avail, src_avail; - dma_addr_t dst, src; - size_t len; - - if (unlikely(!dchan)) - return NULL; - - if (unlikely(!dst_nents || !src_nents)) - return NULL; - - if (unlikely(!dst_sg || !src_sg)) - return NULL; - - chan = to_dma_chan(dchan); - - /* Get prepared for the loop */ - dst_avail = sg_dma_len(dst_sg); - src_avail = sg_dma_len(src_sg); - dst_nents--; - src_nents--; - - /* Run until we are out of scatterlist entries */ - while (true) { - /* Create the largest transaction possible */ - len = min_t(size_t, src_avail, dst_avail); - len = min_t(size_t, len, XGENE_DMA_MAX_64B_DESC_BYTE_CNT); - if (len == 0) - goto fetch; - - dst = sg_dma_address(dst_sg) + sg_dma_len(dst_sg) - dst_avail; - src = sg_dma_address(src_sg) + sg_dma_len(src_sg) - src_avail; - - /* Allocate the link descriptor from DMA pool */ - new = xgene_dma_alloc_descriptor(chan); - if (!new) - goto fail; - - /* Prepare DMA descriptor */ - xgene_dma_prep_cpy_desc(chan, new, dst, src, len); - - if (!first) - first = new; - - new->tx.cookie = 0; - async_tx_ack(&new->tx); - - /* update metadata */ - dst_avail -= len; - src_avail -= len; - - /* Insert the link descriptor to the LD ring */ - list_add_tail(&new->node, &first->tx_list); - -fetch: - /* fetch the next dst scatterlist entry */ - if (dst_avail == 0) { - /* no more entries: we're done */ - if (dst_nents == 0) - break; - - /* fetch the next entry: if there are no more: done */ - dst_sg = sg_next(dst_sg); - if (!dst_sg) - break; - - dst_nents--; - dst_avail = sg_dma_len(dst_sg); - } - - /* fetch the next src scatterlist entry */ - if (src_avail == 0) { - /* no more entries: we're done */ - if (src_nents == 0) - break; - - /* fetch the next entry: if there are no more: done */ - src_sg = sg_next(src_sg); - if (!src_sg) - break; - - src_nents--; - src_avail = sg_dma_len(src_sg); - } - } - - if (!new) - return NULL; - - new->tx.flags = flags; /* client is in control of this ack */ - new->tx.cookie = -EBUSY; - list_splice(&first->tx_list, &new->tx_list); - - return &new->tx; -fail: - if (!first) - return NULL; - - xgene_dma_free_desc_list(chan, &first->tx_list); - return NULL; -} - static struct dma_async_tx_descriptor *xgene_dma_prep_xor( struct dma_chan *dchan, dma_addr_t dst, dma_addr_t *src, u32 src_cnt, size_t len, unsigned long flags) @@ -1653,7 +1498,6 @@ static void xgene_dma_set_caps(struct xgene_dma_chan *chan, dma_cap_zero(dma_dev->cap_mask); /* Set DMA device capability */ - dma_cap_set(DMA_SG, dma_dev->cap_mask); /* Basically here, the X-Gene SoC DMA engine channel 0 supports XOR * and channel 1 supports XOR, PQ both. First thing here is we have @@ -1679,7 +1523,6 @@ static void xgene_dma_set_caps(struct xgene_dma_chan *chan, dma_dev->device_free_chan_resources = xgene_dma_free_chan_resources; dma_dev->device_issue_pending = xgene_dma_issue_pending; dma_dev->device_tx_status = xgene_dma_tx_status; - dma_dev->device_prep_dma_sg = xgene_dma_prep_sg; if (dma_has_cap(DMA_XOR, dma_dev->cap_mask)) { dma_dev->device_prep_dma_xor = xgene_dma_prep_xor; @@ -1731,8 +1574,7 @@ static int xgene_dma_async_register(struct xgene_dma *pdma, int id) /* DMA capability info */ dev_info(pdma->dev, - "%s: CAPABILITY ( %s%s%s)\n", dma_chan_name(&chan->dma_chan), - dma_has_cap(DMA_SG, dma_dev->cap_mask) ? "SGCPY " : "", + "%s: CAPABILITY ( %s%s)\n", dma_chan_name(&chan->dma_chan), dma_has_cap(DMA_XOR, dma_dev->cap_mask) ? "XOR " : "", dma_has_cap(DMA_PQ, dma_dev->cap_mask) ? "PQ " : ""); diff --git a/drivers/dma/xilinx/xilinx_dma.c b/drivers/dma/xilinx/xilinx_dma.c index 8cf87b1a284b..8722bcba489d 100644 --- a/drivers/dma/xilinx/xilinx_dma.c +++ b/drivers/dma/xilinx/xilinx_dma.c @@ -2124,7 +2124,7 @@ static int axidma_clk_init(struct platform_device *pdev, struct clk **axi_clk, *axi_clk = devm_clk_get(&pdev->dev, "s_axi_lite_aclk"); if (IS_ERR(*axi_clk)) { err = PTR_ERR(*axi_clk); - dev_err(&pdev->dev, "failed to get axi_aclk (%u)\n", err); + dev_err(&pdev->dev, "failed to get axi_aclk (%d)\n", err); return err; } @@ -2142,25 +2142,25 @@ static int axidma_clk_init(struct platform_device *pdev, struct clk **axi_clk, err = clk_prepare_enable(*axi_clk); if (err) { - dev_err(&pdev->dev, "failed to enable axi_clk (%u)\n", err); + dev_err(&pdev->dev, "failed to enable axi_clk (%d)\n", err); return err; } err = clk_prepare_enable(*tx_clk); if (err) { - dev_err(&pdev->dev, "failed to enable tx_clk (%u)\n", err); + dev_err(&pdev->dev, "failed to enable tx_clk (%d)\n", err); goto err_disable_axiclk; } err = clk_prepare_enable(*rx_clk); if (err) { - dev_err(&pdev->dev, "failed to enable rx_clk (%u)\n", err); + dev_err(&pdev->dev, "failed to enable rx_clk (%d)\n", err); goto err_disable_txclk; } err = clk_prepare_enable(*sg_clk); if (err) { - dev_err(&pdev->dev, "failed to enable sg_clk (%u)\n", err); + dev_err(&pdev->dev, "failed to enable sg_clk (%d)\n", err); goto err_disable_rxclk; } @@ -2189,26 +2189,26 @@ static int axicdma_clk_init(struct platform_device *pdev, struct clk **axi_clk, *axi_clk = devm_clk_get(&pdev->dev, "s_axi_lite_aclk"); if (IS_ERR(*axi_clk)) { err = PTR_ERR(*axi_clk); - dev_err(&pdev->dev, "failed to get axi_clk (%u)\n", err); + dev_err(&pdev->dev, "failed to get axi_clk (%d)\n", err); return err; } *dev_clk = devm_clk_get(&pdev->dev, "m_axi_aclk"); if (IS_ERR(*dev_clk)) { err = PTR_ERR(*dev_clk); - dev_err(&pdev->dev, "failed to get dev_clk (%u)\n", err); + dev_err(&pdev->dev, "failed to get dev_clk (%d)\n", err); return err; } err = clk_prepare_enable(*axi_clk); if (err) { - dev_err(&pdev->dev, "failed to enable axi_clk (%u)\n", err); + dev_err(&pdev->dev, "failed to enable axi_clk (%d)\n", err); return err; } err = clk_prepare_enable(*dev_clk); if (err) { - dev_err(&pdev->dev, "failed to enable dev_clk (%u)\n", err); + dev_err(&pdev->dev, "failed to enable dev_clk (%d)\n", err); goto err_disable_axiclk; } @@ -2229,7 +2229,7 @@ static int axivdma_clk_init(struct platform_device *pdev, struct clk **axi_clk, *axi_clk = devm_clk_get(&pdev->dev, "s_axi_lite_aclk"); if (IS_ERR(*axi_clk)) { err = PTR_ERR(*axi_clk); - dev_err(&pdev->dev, "failed to get axi_aclk (%u)\n", err); + dev_err(&pdev->dev, "failed to get axi_aclk (%d)\n", err); return err; } @@ -2251,31 +2251,31 @@ static int axivdma_clk_init(struct platform_device *pdev, struct clk **axi_clk, err = clk_prepare_enable(*axi_clk); if (err) { - dev_err(&pdev->dev, "failed to enable axi_clk (%u)\n", err); + dev_err(&pdev->dev, "failed to enable axi_clk (%d)\n", err); return err; } err = clk_prepare_enable(*tx_clk); if (err) { - dev_err(&pdev->dev, "failed to enable tx_clk (%u)\n", err); + dev_err(&pdev->dev, "failed to enable tx_clk (%d)\n", err); goto err_disable_axiclk; } err = clk_prepare_enable(*txs_clk); if (err) { - dev_err(&pdev->dev, "failed to enable txs_clk (%u)\n", err); + dev_err(&pdev->dev, "failed to enable txs_clk (%d)\n", err); goto err_disable_txclk; } err = clk_prepare_enable(*rx_clk); if (err) { - dev_err(&pdev->dev, "failed to enable rx_clk (%u)\n", err); + dev_err(&pdev->dev, "failed to enable rx_clk (%d)\n", err); goto err_disable_txsclk; } err = clk_prepare_enable(*rxs_clk); if (err) { - dev_err(&pdev->dev, "failed to enable rxs_clk (%u)\n", err); + dev_err(&pdev->dev, "failed to enable rxs_clk (%d)\n", err); goto err_disable_rxclk; } diff --git a/drivers/dma/xilinx/zynqmp_dma.c b/drivers/dma/xilinx/zynqmp_dma.c index 6d221e5c72ee..1ee1241ca797 100644 --- a/drivers/dma/xilinx/zynqmp_dma.c +++ b/drivers/dma/xilinx/zynqmp_dma.c @@ -794,9 +794,6 @@ static struct dma_async_tx_descriptor *zynqmp_dma_prep_memcpy( chan = to_chan(dchan); - if (len > ZYNQMP_DMA_MAX_TRANS_LEN) - return NULL; - desc_cnt = DIV_ROUND_UP(len, ZYNQMP_DMA_MAX_TRANS_LEN); spin_lock_bh(&chan->lock); @@ -833,98 +830,6 @@ static struct dma_async_tx_descriptor *zynqmp_dma_prep_memcpy( } /** - * zynqmp_dma_prep_slave_sg - prepare descriptors for a memory sg transaction - * @dchan: DMA channel - * @dst_sg: Destination scatter list - * @dst_sg_len: Number of entries in destination scatter list - * @src_sg: Source scatter list - * @src_sg_len: Number of entries in source scatter list - * @flags: transfer ack flags - * - * Return: Async transaction descriptor on success and NULL on failure - */ -static struct dma_async_tx_descriptor *zynqmp_dma_prep_sg( - struct dma_chan *dchan, struct scatterlist *dst_sg, - unsigned int dst_sg_len, struct scatterlist *src_sg, - unsigned int src_sg_len, unsigned long flags) -{ - struct zynqmp_dma_desc_sw *new, *first = NULL; - struct zynqmp_dma_chan *chan = to_chan(dchan); - void *desc = NULL, *prev = NULL; - size_t len, dst_avail, src_avail; - dma_addr_t dma_dst, dma_src; - u32 desc_cnt = 0, i; - struct scatterlist *sg; - - for_each_sg(src_sg, sg, src_sg_len, i) - desc_cnt += DIV_ROUND_UP(sg_dma_len(sg), - ZYNQMP_DMA_MAX_TRANS_LEN); - - spin_lock_bh(&chan->lock); - if (desc_cnt > chan->desc_free_cnt) { - spin_unlock_bh(&chan->lock); - dev_dbg(chan->dev, "chan %p descs are not available\n", chan); - return NULL; - } - chan->desc_free_cnt = chan->desc_free_cnt - desc_cnt; - spin_unlock_bh(&chan->lock); - - dst_avail = sg_dma_len(dst_sg); - src_avail = sg_dma_len(src_sg); - - /* Run until we are out of scatterlist entries */ - while (true) { - /* Allocate and populate the descriptor */ - new = zynqmp_dma_get_descriptor(chan); - desc = (struct zynqmp_dma_desc_ll *)new->src_v; - len = min_t(size_t, src_avail, dst_avail); - len = min_t(size_t, len, ZYNQMP_DMA_MAX_TRANS_LEN); - if (len == 0) - goto fetch; - dma_dst = sg_dma_address(dst_sg) + sg_dma_len(dst_sg) - - dst_avail; - dma_src = sg_dma_address(src_sg) + sg_dma_len(src_sg) - - src_avail; - - zynqmp_dma_config_sg_ll_desc(chan, desc, dma_src, dma_dst, - len, prev); - prev = desc; - dst_avail -= len; - src_avail -= len; - - if (!first) - first = new; - else - list_add_tail(&new->node, &first->tx_list); -fetch: - /* Fetch the next dst scatterlist entry */ - if (dst_avail == 0) { - if (dst_sg_len == 0) - break; - dst_sg = sg_next(dst_sg); - if (dst_sg == NULL) - break; - dst_sg_len--; - dst_avail = sg_dma_len(dst_sg); - } - /* Fetch the next src scatterlist entry */ - if (src_avail == 0) { - if (src_sg_len == 0) - break; - src_sg = sg_next(src_sg); - if (src_sg == NULL) - break; - src_sg_len--; - src_avail = sg_dma_len(src_sg); - } - } - - zynqmp_dma_desc_config_eod(chan, desc); - first->async_tx.flags = flags; - return &first->async_tx; -} - -/** * zynqmp_dma_chan_remove - Channel remove function * @chan: ZynqMP DMA channel pointer */ @@ -1067,11 +972,9 @@ static int zynqmp_dma_probe(struct platform_device *pdev) INIT_LIST_HEAD(&zdev->common.channels); dma_set_mask(&pdev->dev, DMA_BIT_MASK(44)); - dma_cap_set(DMA_SG, zdev->common.cap_mask); dma_cap_set(DMA_MEMCPY, zdev->common.cap_mask); p = &zdev->common; - p->device_prep_dma_sg = zynqmp_dma_prep_sg; p->device_prep_dma_memcpy = zynqmp_dma_prep_memcpy; p->device_terminate_all = zynqmp_dma_device_terminate_all; p->device_issue_pending = zynqmp_dma_issue_pending; |