diff options
Diffstat (limited to 'drivers/crypto/stm32')
-rw-r--r-- | drivers/crypto/stm32/stm32-crc32.c | 6 | ||||
-rw-r--r-- | drivers/crypto/stm32/stm32-cryp.c | 747 | ||||
-rw-r--r-- | drivers/crypto/stm32/stm32-hash.c | 572 |
3 files changed, 1140 insertions, 185 deletions
diff --git a/drivers/crypto/stm32/stm32-crc32.c b/drivers/crypto/stm32/stm32-crc32.c index b0cf6d2fd352..fd29785a3ecf 100644 --- a/drivers/crypto/stm32/stm32-crc32.c +++ b/drivers/crypto/stm32/stm32-crc32.c @@ -17,7 +17,7 @@ #include <crypto/internal/hash.h> -#include <asm/unaligned.h> +#include <linux/unaligned.h> #define DRIVER_NAME "stm32-crc32" #define CHKSUM_DIGEST_SIZE 4 @@ -162,7 +162,7 @@ static int burst_update(struct shash_desc *desc, const u8 *d8, if (mctx->poly == CRC32_POLY_LE) ctx->partial = crc32_le(ctx->partial, d8, length); else - ctx->partial = __crc32c_le(ctx->partial, d8, length); + ctx->partial = crc32c(ctx->partial, d8, length); goto pm_out; } @@ -465,7 +465,7 @@ MODULE_DEVICE_TABLE(of, stm32_dt_ids); static struct platform_driver stm32_crc_driver = { .probe = stm32_crc_probe, - .remove_new = stm32_crc_remove, + .remove = stm32_crc_remove, .driver = { .name = DRIVER_NAME, .pm = &stm32_crc_pm_ops, diff --git a/drivers/crypto/stm32/stm32-cryp.c b/drivers/crypto/stm32/stm32-cryp.c index 11ad4ffdce0d..5ce88e7a8f65 100644 --- a/drivers/crypto/stm32/stm32-cryp.c +++ b/drivers/crypto/stm32/stm32-cryp.c @@ -11,8 +11,11 @@ #include <crypto/internal/des.h> #include <crypto/internal/skcipher.h> #include <crypto/scatterwalk.h> +#include <linux/bottom_half.h> #include <linux/clk.h> #include <linux/delay.h> +#include <linux/dma-mapping.h> +#include <linux/dmaengine.h> #include <linux/err.h> #include <linux/iopoll.h> #include <linux/interrupt.h> @@ -40,6 +43,8 @@ /* Mode mask = bits [15..0] */ #define FLG_MODE_MASK GENMASK(15, 0) /* Bit [31..16] status */ +#define FLG_IN_OUT_DMA BIT(16) +#define FLG_HEADER_DMA BIT(17) /* Registers */ #define CRYP_CR 0x00000000 @@ -121,8 +126,12 @@ #define CR_PH_MASK 0x00030000 #define CR_NBPBL_SHIFT 20 -#define SR_BUSY 0x00000010 -#define SR_OFNE 0x00000004 +#define SR_IFNF BIT(1) +#define SR_OFNE BIT(2) +#define SR_BUSY BIT(8) + +#define DMACR_DIEN BIT(0) +#define DMACR_DOEN BIT(1) #define IMSCR_IN BIT(0) #define IMSCR_OUT BIT(1) @@ -133,7 +142,15 @@ /* Misc */ #define AES_BLOCK_32 (AES_BLOCK_SIZE / sizeof(u32)) #define GCM_CTR_INIT 2 -#define CRYP_AUTOSUSPEND_DELAY 50 +#define CRYP_AUTOSUSPEND_DELAY 50 + +#define CRYP_DMA_BURST_REG 4 + +enum stm32_dma_mode { + NO_DMA, + DMA_PLAIN_SG, + DMA_NEED_SG_TRUNC +}; struct stm32_cryp_caps { bool aeads_support; @@ -146,6 +163,7 @@ struct stm32_cryp_caps { u32 sr; u32 din; u32 dout; + u32 dmacr; u32 imsc; u32 mis; u32 k1l; @@ -172,6 +190,7 @@ struct stm32_cryp { struct list_head list; struct device *dev; void __iomem *regs; + phys_addr_t phys_base; struct clk *clk; unsigned long flags; u32 irq_status; @@ -190,8 +209,20 @@ struct stm32_cryp { size_t header_in; size_t payload_out; + /* DMA process fields */ + struct scatterlist *in_sg; + struct scatterlist *header_sg; struct scatterlist *out_sg; + size_t in_sg_len; + size_t header_sg_len; + size_t out_sg_len; + struct completion dma_completion; + + struct dma_chan *dma_lch_in; + struct dma_chan *dma_lch_out; + enum stm32_dma_mode dma_mode; + /* IT process fields */ struct scatter_walk in_walk; struct scatter_walk out_walk; @@ -291,12 +322,20 @@ static inline int stm32_cryp_wait_enable(struct stm32_cryp *cryp) !(status & CR_CRYPEN), 10, 100000); } +static inline int stm32_cryp_wait_input(struct stm32_cryp *cryp) +{ + u32 status; + + return readl_relaxed_poll_timeout_atomic(cryp->regs + cryp->caps->sr, status, + status & SR_IFNF, 1, 10); +} + static inline int stm32_cryp_wait_output(struct stm32_cryp *cryp) { u32 status; - return readl_relaxed_poll_timeout(cryp->regs + cryp->caps->sr, status, - status & SR_OFNE, 10, 100000); + return readl_relaxed_poll_timeout_atomic(cryp->regs + cryp->caps->sr, status, + status & SR_OFNE, 1, 10); } static inline void stm32_cryp_key_read_enable(struct stm32_cryp *cryp) @@ -311,8 +350,13 @@ static inline void stm32_cryp_key_read_disable(struct stm32_cryp *cryp) cryp->regs + cryp->caps->cr); } +static void stm32_cryp_irq_read_data(struct stm32_cryp *cryp); +static void stm32_cryp_irq_write_data(struct stm32_cryp *cryp); +static void stm32_cryp_irq_write_gcmccm_header(struct stm32_cryp *cryp); static int stm32_cryp_read_auth_tag(struct stm32_cryp *cryp); static void stm32_cryp_finish_req(struct stm32_cryp *cryp, int err); +static int stm32_cryp_dma_start(struct stm32_cryp *cryp); +static int stm32_cryp_it_start(struct stm32_cryp *cryp); static struct stm32_cryp *stm32_cryp_find_dev(struct stm32_cryp_ctx *ctx) { @@ -622,7 +666,7 @@ static void stm32_cryp_write_ccm_first_header(struct stm32_cryp *cryp) written = min_t(size_t, AES_BLOCK_SIZE - len, alen); - scatterwalk_copychunks((char *)block + len, &cryp->in_walk, written, 0); + memcpy_from_scatterwalk((char *)block + len, &cryp->in_walk, written); writesl(cryp->regs + cryp->caps->din, block, AES_BLOCK_32); @@ -813,11 +857,238 @@ static void stm32_cryp_finish_req(struct stm32_cryp *cryp, int err) if (is_gcm(cryp) || is_ccm(cryp)) crypto_finalize_aead_request(cryp->engine, cryp->areq, err); else - crypto_finalize_skcipher_request(cryp->engine, cryp->req, - err); + crypto_finalize_skcipher_request(cryp->engine, cryp->req, err); +} + +static void stm32_cryp_header_dma_callback(void *param) +{ + struct stm32_cryp *cryp = (struct stm32_cryp *)param; + int ret; + u32 reg; + + dma_unmap_sg(cryp->dev, cryp->header_sg, cryp->header_sg_len, DMA_TO_DEVICE); + + reg = stm32_cryp_read(cryp, cryp->caps->dmacr); + stm32_cryp_write(cryp, cryp->caps->dmacr, reg & ~(DMACR_DOEN | DMACR_DIEN)); + + kfree(cryp->header_sg); + + reg = stm32_cryp_read(cryp, cryp->caps->cr); + + if (cryp->header_in) { + stm32_cryp_write(cryp, cryp->caps->cr, reg | CR_CRYPEN); + + ret = stm32_cryp_wait_input(cryp); + if (ret) { + dev_err(cryp->dev, "input header ready timeout after dma\n"); + stm32_cryp_finish_req(cryp, ret); + return; + } + stm32_cryp_irq_write_gcmccm_header(cryp); + WARN_ON(cryp->header_in); + } + + if (stm32_cryp_get_input_text_len(cryp)) { + /* Phase 3 : payload */ + reg = stm32_cryp_read(cryp, cryp->caps->cr); + stm32_cryp_write(cryp, cryp->caps->cr, reg & ~CR_CRYPEN); + + reg &= ~CR_PH_MASK; + reg |= CR_PH_PAYLOAD | CR_CRYPEN; + stm32_cryp_write(cryp, cryp->caps->cr, reg); + + if (cryp->flags & FLG_IN_OUT_DMA) { + ret = stm32_cryp_dma_start(cryp); + if (ret) + stm32_cryp_finish_req(cryp, ret); + } else { + stm32_cryp_it_start(cryp); + } + } else { + /* + * Phase 4 : tag. + * Nothing to read, nothing to write => end request + */ + stm32_cryp_finish_req(cryp, 0); + } +} + +static void stm32_cryp_dma_callback(void *param) +{ + struct stm32_cryp *cryp = (struct stm32_cryp *)param; + int ret; + u32 reg; + + complete(&cryp->dma_completion); /* completion to indicate no timeout */ + + dma_sync_sg_for_device(cryp->dev, cryp->out_sg, cryp->out_sg_len, DMA_FROM_DEVICE); + + if (cryp->in_sg != cryp->out_sg) + dma_unmap_sg(cryp->dev, cryp->in_sg, cryp->in_sg_len, DMA_TO_DEVICE); + + dma_unmap_sg(cryp->dev, cryp->out_sg, cryp->out_sg_len, DMA_FROM_DEVICE); + + reg = stm32_cryp_read(cryp, cryp->caps->dmacr); + stm32_cryp_write(cryp, cryp->caps->dmacr, reg & ~(DMACR_DOEN | DMACR_DIEN)); + + reg = stm32_cryp_read(cryp, cryp->caps->cr); + + if (is_gcm(cryp) || is_ccm(cryp)) { + kfree(cryp->in_sg); + kfree(cryp->out_sg); + } else { + if (cryp->in_sg != cryp->req->src) + kfree(cryp->in_sg); + if (cryp->out_sg != cryp->req->dst) + kfree(cryp->out_sg); + } + + if (cryp->payload_in) { + stm32_cryp_write(cryp, cryp->caps->cr, reg | CR_CRYPEN); + + ret = stm32_cryp_wait_input(cryp); + if (ret) { + dev_err(cryp->dev, "input ready timeout after dma\n"); + stm32_cryp_finish_req(cryp, ret); + return; + } + stm32_cryp_irq_write_data(cryp); + + ret = stm32_cryp_wait_output(cryp); + if (ret) { + dev_err(cryp->dev, "output ready timeout after dma\n"); + stm32_cryp_finish_req(cryp, ret); + return; + } + stm32_cryp_irq_read_data(cryp); + } + + stm32_cryp_finish_req(cryp, 0); +} + +static int stm32_cryp_header_dma_start(struct stm32_cryp *cryp) +{ + int ret; + struct dma_async_tx_descriptor *tx_in; + u32 reg; + size_t align_size; + + ret = dma_map_sg(cryp->dev, cryp->header_sg, cryp->header_sg_len, DMA_TO_DEVICE); + if (!ret) { + dev_err(cryp->dev, "dma_map_sg() error\n"); + return -ENOMEM; + } + + dma_sync_sg_for_device(cryp->dev, cryp->header_sg, cryp->header_sg_len, DMA_TO_DEVICE); + + tx_in = dmaengine_prep_slave_sg(cryp->dma_lch_in, cryp->header_sg, cryp->header_sg_len, + DMA_MEM_TO_DEV, DMA_PREP_INTERRUPT | DMA_CTRL_ACK); + if (!tx_in) { + dev_err(cryp->dev, "IN prep_slave_sg() failed\n"); + return -EINVAL; + } + + tx_in->callback_param = cryp; + tx_in->callback = stm32_cryp_header_dma_callback; + + /* Advance scatterwalk to not DMA'ed data */ + align_size = ALIGN_DOWN(cryp->header_in, cryp->hw_blocksize); + scatterwalk_skip(&cryp->in_walk, align_size); + cryp->header_in -= align_size; + + ret = dma_submit_error(dmaengine_submit(tx_in)); + if (ret < 0) { + dev_err(cryp->dev, "DMA in submit failed\n"); + return ret; + } + dma_async_issue_pending(cryp->dma_lch_in); + + reg = stm32_cryp_read(cryp, cryp->caps->dmacr); + stm32_cryp_write(cryp, cryp->caps->dmacr, reg | DMACR_DIEN); + + return 0; +} + +static int stm32_cryp_dma_start(struct stm32_cryp *cryp) +{ + int ret; + size_t align_size; + struct dma_async_tx_descriptor *tx_in, *tx_out; + u32 reg; + + if (cryp->in_sg != cryp->out_sg) { + ret = dma_map_sg(cryp->dev, cryp->in_sg, cryp->in_sg_len, DMA_TO_DEVICE); + if (!ret) { + dev_err(cryp->dev, "dma_map_sg() error\n"); + return -ENOMEM; + } + } + + ret = dma_map_sg(cryp->dev, cryp->out_sg, cryp->out_sg_len, DMA_FROM_DEVICE); + if (!ret) { + dev_err(cryp->dev, "dma_map_sg() error\n"); + return -ENOMEM; + } + + dma_sync_sg_for_device(cryp->dev, cryp->in_sg, cryp->in_sg_len, DMA_TO_DEVICE); + + tx_in = dmaengine_prep_slave_sg(cryp->dma_lch_in, cryp->in_sg, cryp->in_sg_len, + DMA_MEM_TO_DEV, DMA_PREP_INTERRUPT | DMA_CTRL_ACK); + if (!tx_in) { + dev_err(cryp->dev, "IN prep_slave_sg() failed\n"); + return -EINVAL; + } + + /* No callback necessary */ + tx_in->callback_param = cryp; + tx_in->callback = NULL; + + tx_out = dmaengine_prep_slave_sg(cryp->dma_lch_out, cryp->out_sg, cryp->out_sg_len, + DMA_DEV_TO_MEM, DMA_PREP_INTERRUPT | DMA_CTRL_ACK); + if (!tx_out) { + dev_err(cryp->dev, "OUT prep_slave_sg() failed\n"); + return -EINVAL; + } + + reinit_completion(&cryp->dma_completion); + tx_out->callback = stm32_cryp_dma_callback; + tx_out->callback_param = cryp; + + /* Advance scatterwalk to not DMA'ed data */ + align_size = ALIGN_DOWN(cryp->payload_in, cryp->hw_blocksize); + scatterwalk_skip(&cryp->in_walk, align_size); + cryp->payload_in -= align_size; + + ret = dma_submit_error(dmaengine_submit(tx_in)); + if (ret < 0) { + dev_err(cryp->dev, "DMA in submit failed\n"); + return ret; + } + dma_async_issue_pending(cryp->dma_lch_in); + + /* Advance scatterwalk to not DMA'ed data */ + scatterwalk_skip(&cryp->out_walk, align_size); + cryp->payload_out -= align_size; + ret = dma_submit_error(dmaengine_submit(tx_out)); + if (ret < 0) { + dev_err(cryp->dev, "DMA out submit failed\n"); + return ret; + } + dma_async_issue_pending(cryp->dma_lch_out); + + reg = stm32_cryp_read(cryp, cryp->caps->dmacr); + stm32_cryp_write(cryp, cryp->caps->dmacr, reg | DMACR_DOEN | DMACR_DIEN); + + if (!wait_for_completion_timeout(&cryp->dma_completion, msecs_to_jiffies(1000))) { + dev_err(cryp->dev, "DMA out timed out\n"); + dmaengine_terminate_sync(cryp->dma_lch_out); + return -ETIMEDOUT; + } + + return 0; } -static int stm32_cryp_cpu_start(struct stm32_cryp *cryp) +static int stm32_cryp_it_start(struct stm32_cryp *cryp) { /* Enable interrupt and let the IRQ handler do everything */ stm32_cryp_write(cryp, cryp->caps->imsc, IMSCR_IN | IMSCR_OUT); @@ -1149,13 +1420,256 @@ static int stm32_cryp_tdes_cbc_decrypt(struct skcipher_request *req) return stm32_cryp_crypt(req, FLG_TDES | FLG_CBC); } +static enum stm32_dma_mode stm32_cryp_dma_check_sg(struct scatterlist *test_sg, size_t len, + size_t block_size) +{ + struct scatterlist *sg; + int i; + + if (len <= 16) + return NO_DMA; /* Faster */ + + for_each_sg(test_sg, sg, sg_nents(test_sg), i) { + if (!IS_ALIGNED(sg->length, block_size) && !sg_is_last(sg)) + return NO_DMA; + + if (sg->offset % sizeof(u32)) + return NO_DMA; + + if (sg_is_last(sg) && !IS_ALIGNED(sg->length, AES_BLOCK_SIZE)) + return DMA_NEED_SG_TRUNC; + } + + return DMA_PLAIN_SG; +} + +static enum stm32_dma_mode stm32_cryp_dma_check(struct stm32_cryp *cryp, struct scatterlist *in_sg, + struct scatterlist *out_sg) +{ + enum stm32_dma_mode ret = DMA_PLAIN_SG; + + if (!is_aes(cryp)) + return NO_DMA; + + if (!cryp->dma_lch_in || !cryp->dma_lch_out) + return NO_DMA; + + ret = stm32_cryp_dma_check_sg(in_sg, cryp->payload_in, AES_BLOCK_SIZE); + if (ret == NO_DMA) + return ret; + + ret = stm32_cryp_dma_check_sg(out_sg, cryp->payload_out, AES_BLOCK_SIZE); + if (ret == NO_DMA) + return ret; + + /* Check CTR counter overflow */ + if (is_aes(cryp) && is_ctr(cryp)) { + u32 c; + __be32 iv3; + + memcpy(&iv3, &cryp->req->iv[3 * sizeof(u32)], sizeof(iv3)); + c = be32_to_cpu(iv3); + if ((c + cryp->payload_in) < cryp->payload_in) + return NO_DMA; + } + + /* Workaround */ + if (is_aes(cryp) && is_ctr(cryp) && ret == DMA_NEED_SG_TRUNC) + return NO_DMA; + + return ret; +} + +static int stm32_cryp_truncate_sg(struct scatterlist **new_sg, size_t *new_sg_len, + struct scatterlist *sg, off_t skip, size_t size) +{ + struct scatterlist *cur; + int alloc_sg_len; + + *new_sg_len = 0; + + if (!sg || !size) { + *new_sg = NULL; + return 0; + } + + alloc_sg_len = sg_nents_for_len(sg, skip + size); + if (alloc_sg_len < 0) + return alloc_sg_len; + + /* We allocate to much sg entry, but it is easier */ + *new_sg = kmalloc_array((size_t)alloc_sg_len, sizeof(struct scatterlist), GFP_KERNEL); + if (!*new_sg) + return -ENOMEM; + + sg_init_table(*new_sg, (unsigned int)alloc_sg_len); + + cur = *new_sg; + while (sg && size) { + unsigned int len = sg->length; + unsigned int offset = sg->offset; + + if (skip > len) { + skip -= len; + sg = sg_next(sg); + continue; + } + + if (skip) { + len -= skip; + offset += skip; + skip = 0; + } + + if (size < len) + len = size; + + if (len > 0) { + (*new_sg_len)++; + size -= len; + sg_set_page(cur, sg_page(sg), len, offset); + if (size == 0) + sg_mark_end(cur); + cur = sg_next(cur); + } + + sg = sg_next(sg); + } + + return 0; +} + +static int stm32_cryp_cipher_prepare(struct stm32_cryp *cryp, struct scatterlist *in_sg, + struct scatterlist *out_sg) +{ + size_t align_size; + int ret; + + cryp->dma_mode = stm32_cryp_dma_check(cryp, in_sg, out_sg); + + scatterwalk_start(&cryp->in_walk, in_sg); + scatterwalk_start(&cryp->out_walk, out_sg); + + if (cryp->dma_mode == NO_DMA) { + cryp->flags &= ~FLG_IN_OUT_DMA; + + if (is_ctr(cryp)) + memset(cryp->last_ctr, 0, sizeof(cryp->last_ctr)); + + } else if (cryp->dma_mode == DMA_NEED_SG_TRUNC) { + + cryp->flags |= FLG_IN_OUT_DMA; + + align_size = ALIGN_DOWN(cryp->payload_in, cryp->hw_blocksize); + ret = stm32_cryp_truncate_sg(&cryp->in_sg, &cryp->in_sg_len, in_sg, 0, align_size); + if (ret) + return ret; + + ret = stm32_cryp_truncate_sg(&cryp->out_sg, &cryp->out_sg_len, out_sg, 0, + align_size); + if (ret) { + kfree(cryp->in_sg); + return ret; + } + } else { + cryp->flags |= FLG_IN_OUT_DMA; + + cryp->in_sg = in_sg; + cryp->out_sg = out_sg; + + ret = sg_nents_for_len(cryp->in_sg, cryp->payload_in); + if (ret < 0) + return ret; + cryp->in_sg_len = (size_t)ret; + + ret = sg_nents_for_len(out_sg, cryp->payload_out); + if (ret < 0) + return ret; + cryp->out_sg_len = (size_t)ret; + } + + return 0; +} + +static int stm32_cryp_aead_prepare(struct stm32_cryp *cryp, struct scatterlist *in_sg, + struct scatterlist *out_sg) +{ + size_t align_size; + off_t skip; + int ret, ret2; + + cryp->header_sg = NULL; + cryp->in_sg = NULL; + cryp->out_sg = NULL; + + if (!cryp->dma_lch_in || !cryp->dma_lch_out) { + cryp->dma_mode = NO_DMA; + cryp->flags &= ~(FLG_IN_OUT_DMA | FLG_HEADER_DMA); + + return 0; + } + + /* CCM hw_init may have advanced in header */ + skip = cryp->areq->assoclen - cryp->header_in; + + align_size = ALIGN_DOWN(cryp->header_in, cryp->hw_blocksize); + ret = stm32_cryp_truncate_sg(&cryp->header_sg, &cryp->header_sg_len, in_sg, skip, + align_size); + if (ret) + return ret; + + ret = stm32_cryp_dma_check_sg(cryp->header_sg, align_size, AES_BLOCK_SIZE); + if (ret == NO_DMA) { + /* We cannot DMA the header */ + kfree(cryp->header_sg); + cryp->header_sg = NULL; + + cryp->flags &= ~FLG_HEADER_DMA; + } else { + cryp->flags |= FLG_HEADER_DMA; + } + + /* Now skip all header to be at payload start */ + skip = cryp->areq->assoclen; + align_size = ALIGN_DOWN(cryp->payload_in, cryp->hw_blocksize); + ret = stm32_cryp_truncate_sg(&cryp->in_sg, &cryp->in_sg_len, in_sg, skip, align_size); + if (ret) { + kfree(cryp->header_sg); + return ret; + } + + /* For out buffer align_size is same as in buffer */ + ret = stm32_cryp_truncate_sg(&cryp->out_sg, &cryp->out_sg_len, out_sg, skip, align_size); + if (ret) { + kfree(cryp->header_sg); + kfree(cryp->in_sg); + return ret; + } + + ret = stm32_cryp_dma_check_sg(cryp->in_sg, align_size, AES_BLOCK_SIZE); + ret2 = stm32_cryp_dma_check_sg(cryp->out_sg, align_size, AES_BLOCK_SIZE); + if (ret == NO_DMA || ret2 == NO_DMA) { + kfree(cryp->in_sg); + cryp->in_sg = NULL; + + kfree(cryp->out_sg); + cryp->out_sg = NULL; + + cryp->flags &= ~FLG_IN_OUT_DMA; + } else { + cryp->flags |= FLG_IN_OUT_DMA; + } + + return 0; +} + static int stm32_cryp_prepare_req(struct skcipher_request *req, struct aead_request *areq) { struct stm32_cryp_ctx *ctx; struct stm32_cryp *cryp; struct stm32_cryp_reqctx *rctx; - struct scatterlist *in_sg; + struct scatterlist *in_sg, *out_sg; int ret; if (!req && !areq) @@ -1169,8 +1683,6 @@ static int stm32_cryp_prepare_req(struct skcipher_request *req, rctx = req ? skcipher_request_ctx(req) : aead_request_ctx(areq); rctx->mode &= FLG_MODE_MASK; - ctx->cryp = cryp; - cryp->flags = (cryp->flags & ~FLG_MODE_MASK) | rctx->mode; cryp->hw_blocksize = is_aes(cryp) ? AES_BLOCK_SIZE : DES_BLOCK_SIZE; cryp->ctx = ctx; @@ -1182,6 +1694,15 @@ static int stm32_cryp_prepare_req(struct skcipher_request *req, cryp->payload_in = req->cryptlen; cryp->payload_out = req->cryptlen; cryp->authsize = 0; + + in_sg = req->src; + out_sg = req->dst; + + ret = stm32_cryp_cipher_prepare(cryp, in_sg, out_sg); + if (ret) + return ret; + + ret = stm32_cryp_hw_init(cryp); } else { /* * Length of input and output data: @@ -1211,23 +1732,22 @@ static int stm32_cryp_prepare_req(struct skcipher_request *req, cryp->header_in = areq->assoclen; cryp->payload_out = cryp->payload_in; } - } - in_sg = req ? req->src : areq->src; - scatterwalk_start(&cryp->in_walk, in_sg); - - cryp->out_sg = req ? req->dst : areq->dst; - scatterwalk_start(&cryp->out_walk, cryp->out_sg); + in_sg = areq->src; + out_sg = areq->dst; - if (is_gcm(cryp) || is_ccm(cryp)) { + scatterwalk_start(&cryp->in_walk, in_sg); /* In output, jump after assoc data */ - scatterwalk_copychunks(NULL, &cryp->out_walk, cryp->areq->assoclen, 2); - } + scatterwalk_start_at_pos(&cryp->out_walk, out_sg, + areq->assoclen); - if (is_ctr(cryp)) - memset(cryp->last_ctr, 0, sizeof(cryp->last_ctr)); + ret = stm32_cryp_hw_init(cryp); + if (ret) + return ret; + + ret = stm32_cryp_aead_prepare(cryp, in_sg, out_sg); + } - ret = stm32_cryp_hw_init(cryp); return ret; } @@ -1239,12 +1759,24 @@ static int stm32_cryp_cipher_one_req(struct crypto_engine *engine, void *areq) struct stm32_cryp_ctx *ctx = crypto_skcipher_ctx( crypto_skcipher_reqtfm(req)); struct stm32_cryp *cryp = ctx->cryp; + int ret; if (!cryp) return -ENODEV; - return stm32_cryp_prepare_req(req, NULL) ?: - stm32_cryp_cpu_start(cryp); + ret = stm32_cryp_prepare_req(req, NULL); + if (ret) + return ret; + + if (cryp->flags & FLG_IN_OUT_DMA) + ret = stm32_cryp_dma_start(cryp); + else + ret = stm32_cryp_it_start(cryp); + + if (ret == -ETIMEDOUT) + stm32_cryp_finish_req(cryp, ret); + + return ret; } static int stm32_cryp_aead_one_req(struct crypto_engine *engine, void *areq) @@ -1262,13 +1794,20 @@ static int stm32_cryp_aead_one_req(struct crypto_engine *engine, void *areq) if (err) return err; - if (unlikely(!cryp->payload_in && !cryp->header_in)) { + if (!stm32_cryp_get_input_text_len(cryp) && !cryp->header_in && + !(cryp->flags & FLG_HEADER_DMA)) { /* No input data to process: get tag and finish */ stm32_cryp_finish_req(cryp, 0); return 0; } - return stm32_cryp_cpu_start(cryp); + if (cryp->flags & FLG_HEADER_DMA) + return stm32_cryp_header_dma_start(cryp); + + if (!cryp->header_in && cryp->flags & FLG_IN_OUT_DMA) + return stm32_cryp_dma_start(cryp); + + return stm32_cryp_it_start(cryp); } static int stm32_cryp_read_auth_tag(struct stm32_cryp *cryp) @@ -1334,12 +1873,12 @@ static int stm32_cryp_read_auth_tag(struct stm32_cryp *cryp) /* Get and write tag */ readsl(cryp->regs + cryp->caps->dout, out_tag, AES_BLOCK_32); - scatterwalk_copychunks(out_tag, &cryp->out_walk, cryp->authsize, 1); + memcpy_to_scatterwalk(&cryp->out_walk, out_tag, cryp->authsize); } else { /* Get and check tag */ u32 in_tag[AES_BLOCK_32], out_tag[AES_BLOCK_32]; - scatterwalk_copychunks(in_tag, &cryp->in_walk, cryp->authsize, 0); + memcpy_from_scatterwalk(in_tag, &cryp->in_walk, cryp->authsize); readsl(cryp->regs + cryp->caps->dout, out_tag, AES_BLOCK_32); if (crypto_memneq(in_tag, out_tag, cryp->authsize)) @@ -1384,8 +1923,8 @@ static void stm32_cryp_irq_read_data(struct stm32_cryp *cryp) u32 block[AES_BLOCK_32]; readsl(cryp->regs + cryp->caps->dout, block, cryp->hw_blocksize / sizeof(u32)); - scatterwalk_copychunks(block, &cryp->out_walk, min_t(size_t, cryp->hw_blocksize, - cryp->payload_out), 1); + memcpy_to_scatterwalk(&cryp->out_walk, block, min_t(size_t, cryp->hw_blocksize, + cryp->payload_out)); cryp->payload_out -= min_t(size_t, cryp->hw_blocksize, cryp->payload_out); } @@ -1394,8 +1933,8 @@ static void stm32_cryp_irq_write_block(struct stm32_cryp *cryp) { u32 block[AES_BLOCK_32] = {0}; - scatterwalk_copychunks(block, &cryp->in_walk, min_t(size_t, cryp->hw_blocksize, - cryp->payload_in), 0); + memcpy_from_scatterwalk(block, &cryp->in_walk, min_t(size_t, cryp->hw_blocksize, + cryp->payload_in)); writesl(cryp->regs + cryp->caps->din, block, cryp->hw_blocksize / sizeof(u32)); cryp->payload_in -= min_t(size_t, cryp->hw_blocksize, cryp->payload_in); } @@ -1442,8 +1981,8 @@ static void stm32_cryp_irq_write_gcm_padded_data(struct stm32_cryp *cryp) */ readsl(cryp->regs + cryp->caps->dout, block, cryp->hw_blocksize / sizeof(u32)); - scatterwalk_copychunks(block, &cryp->out_walk, min_t(size_t, cryp->hw_blocksize, - cryp->payload_out), 1); + memcpy_to_scatterwalk(&cryp->out_walk, block, min_t(size_t, cryp->hw_blocksize, + cryp->payload_out)); cryp->payload_out -= min_t(size_t, cryp->hw_blocksize, cryp->payload_out); @@ -1540,8 +2079,8 @@ static void stm32_cryp_irq_write_ccm_padded_data(struct stm32_cryp *cryp) */ readsl(cryp->regs + cryp->caps->dout, block, cryp->hw_blocksize / sizeof(u32)); - scatterwalk_copychunks(block, &cryp->out_walk, min_t(size_t, cryp->hw_blocksize, - cryp->payload_out), 1); + memcpy_to_scatterwalk(&cryp->out_walk, block, min_t(size_t, cryp->hw_blocksize, + cryp->payload_out)); cryp->payload_out -= min_t(size_t, cryp->hw_blocksize, cryp->payload_out); /* d) Load again CRYP_CSGCMCCMxR */ @@ -1622,7 +2161,7 @@ static void stm32_cryp_irq_write_gcmccm_header(struct stm32_cryp *cryp) written = min_t(size_t, AES_BLOCK_SIZE, cryp->header_in); - scatterwalk_copychunks(block, &cryp->in_walk, written, 0); + memcpy_from_scatterwalk(block, &cryp->in_walk, written); writesl(cryp->regs + cryp->caps->din, block, AES_BLOCK_32); @@ -1665,8 +2204,11 @@ static irqreturn_t stm32_cryp_irq_thread(int irq, void *arg) it_mask &= ~IMSCR_OUT; stm32_cryp_write(cryp, cryp->caps->imsc, it_mask); - if (!cryp->payload_in && !cryp->header_in && !cryp->payload_out) + if (!cryp->payload_in && !cryp->header_in && !cryp->payload_out) { + local_bh_disable(); stm32_cryp_finish_req(cryp, 0); + local_bh_enable(); + } return IRQ_HANDLED; } @@ -1680,13 +2222,72 @@ static irqreturn_t stm32_cryp_irq(int irq, void *arg) return IRQ_WAKE_THREAD; } +static int stm32_cryp_dma_init(struct stm32_cryp *cryp) +{ + struct dma_slave_config dma_conf; + struct dma_chan *chan; + int ret; + + memset(&dma_conf, 0, sizeof(dma_conf)); + + dma_conf.direction = DMA_MEM_TO_DEV; + dma_conf.dst_addr = cryp->phys_base + cryp->caps->din; + dma_conf.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES; + dma_conf.dst_maxburst = CRYP_DMA_BURST_REG; + dma_conf.device_fc = false; + + chan = dma_request_chan(cryp->dev, "in"); + if (IS_ERR(chan)) + return PTR_ERR(chan); + + cryp->dma_lch_in = chan; + ret = dmaengine_slave_config(cryp->dma_lch_in, &dma_conf); + if (ret) { + dma_release_channel(cryp->dma_lch_in); + cryp->dma_lch_in = NULL; + dev_err(cryp->dev, "Couldn't configure DMA in slave.\n"); + return ret; + } + + memset(&dma_conf, 0, sizeof(dma_conf)); + + dma_conf.direction = DMA_DEV_TO_MEM; + dma_conf.src_addr = cryp->phys_base + cryp->caps->dout; + dma_conf.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES; + dma_conf.src_maxburst = CRYP_DMA_BURST_REG; + dma_conf.device_fc = false; + + chan = dma_request_chan(cryp->dev, "out"); + if (IS_ERR(chan)) { + dma_release_channel(cryp->dma_lch_in); + cryp->dma_lch_in = NULL; + return PTR_ERR(chan); + } + + cryp->dma_lch_out = chan; + + ret = dmaengine_slave_config(cryp->dma_lch_out, &dma_conf); + if (ret) { + dma_release_channel(cryp->dma_lch_out); + cryp->dma_lch_out = NULL; + dev_err(cryp->dev, "Couldn't configure DMA out slave.\n"); + dma_release_channel(cryp->dma_lch_in); + cryp->dma_lch_in = NULL; + return ret; + } + + init_completion(&cryp->dma_completion); + + return 0; +} + static struct skcipher_engine_alg crypto_algs[] = { { .base = { .base.cra_name = "ecb(aes)", .base.cra_driver_name = "stm32-ecb-aes", - .base.cra_priority = 200, - .base.cra_flags = CRYPTO_ALG_ASYNC, + .base.cra_priority = 300, + .base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_KERN_DRIVER_ONLY, .base.cra_blocksize = AES_BLOCK_SIZE, .base.cra_ctxsize = sizeof(struct stm32_cryp_ctx), .base.cra_alignmask = 0, @@ -1707,8 +2308,8 @@ static struct skcipher_engine_alg crypto_algs[] = { .base = { .base.cra_name = "cbc(aes)", .base.cra_driver_name = "stm32-cbc-aes", - .base.cra_priority = 200, - .base.cra_flags = CRYPTO_ALG_ASYNC, + .base.cra_priority = 300, + .base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_KERN_DRIVER_ONLY, .base.cra_blocksize = AES_BLOCK_SIZE, .base.cra_ctxsize = sizeof(struct stm32_cryp_ctx), .base.cra_alignmask = 0, @@ -1730,8 +2331,8 @@ static struct skcipher_engine_alg crypto_algs[] = { .base = { .base.cra_name = "ctr(aes)", .base.cra_driver_name = "stm32-ctr-aes", - .base.cra_priority = 200, - .base.cra_flags = CRYPTO_ALG_ASYNC, + .base.cra_priority = 300, + .base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_KERN_DRIVER_ONLY, .base.cra_blocksize = 1, .base.cra_ctxsize = sizeof(struct stm32_cryp_ctx), .base.cra_alignmask = 0, @@ -1753,8 +2354,8 @@ static struct skcipher_engine_alg crypto_algs[] = { .base = { .base.cra_name = "ecb(des)", .base.cra_driver_name = "stm32-ecb-des", - .base.cra_priority = 200, - .base.cra_flags = CRYPTO_ALG_ASYNC, + .base.cra_priority = 300, + .base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_KERN_DRIVER_ONLY, .base.cra_blocksize = DES_BLOCK_SIZE, .base.cra_ctxsize = sizeof(struct stm32_cryp_ctx), .base.cra_alignmask = 0, @@ -1775,8 +2376,8 @@ static struct skcipher_engine_alg crypto_algs[] = { .base = { .base.cra_name = "cbc(des)", .base.cra_driver_name = "stm32-cbc-des", - .base.cra_priority = 200, - .base.cra_flags = CRYPTO_ALG_ASYNC, + .base.cra_priority = 300, + .base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_KERN_DRIVER_ONLY, .base.cra_blocksize = DES_BLOCK_SIZE, .base.cra_ctxsize = sizeof(struct stm32_cryp_ctx), .base.cra_alignmask = 0, @@ -1798,8 +2399,8 @@ static struct skcipher_engine_alg crypto_algs[] = { .base = { .base.cra_name = "ecb(des3_ede)", .base.cra_driver_name = "stm32-ecb-des3", - .base.cra_priority = 200, - .base.cra_flags = CRYPTO_ALG_ASYNC, + .base.cra_priority = 300, + .base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_KERN_DRIVER_ONLY, .base.cra_blocksize = DES_BLOCK_SIZE, .base.cra_ctxsize = sizeof(struct stm32_cryp_ctx), .base.cra_alignmask = 0, @@ -1820,8 +2421,8 @@ static struct skcipher_engine_alg crypto_algs[] = { .base = { .base.cra_name = "cbc(des3_ede)", .base.cra_driver_name = "stm32-cbc-des3", - .base.cra_priority = 200, - .base.cra_flags = CRYPTO_ALG_ASYNC, + .base.cra_priority = 300, + .base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_KERN_DRIVER_ONLY, .base.cra_blocksize = DES_BLOCK_SIZE, .base.cra_ctxsize = sizeof(struct stm32_cryp_ctx), .base.cra_alignmask = 0, @@ -1854,8 +2455,8 @@ static struct aead_engine_alg aead_algs[] = { .base.base = { .cra_name = "gcm(aes)", .cra_driver_name = "stm32-gcm-aes", - .cra_priority = 200, - .cra_flags = CRYPTO_ALG_ASYNC, + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_KERN_DRIVER_ONLY, .cra_blocksize = 1, .cra_ctxsize = sizeof(struct stm32_cryp_ctx), .cra_alignmask = 0, @@ -1877,8 +2478,8 @@ static struct aead_engine_alg aead_algs[] = { .base.base = { .cra_name = "ccm(aes)", .cra_driver_name = "stm32-ccm-aes", - .cra_priority = 200, - .cra_flags = CRYPTO_ALG_ASYNC, + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_KERN_DRIVER_ONLY, .cra_blocksize = 1, .cra_ctxsize = sizeof(struct stm32_cryp_ctx), .cra_alignmask = 0, @@ -1901,6 +2502,7 @@ static const struct stm32_cryp_caps ux500_data = { .sr = UX500_CRYP_SR, .din = UX500_CRYP_DIN, .dout = UX500_CRYP_DOUT, + .dmacr = UX500_CRYP_DMACR, .imsc = UX500_CRYP_IMSC, .mis = UX500_CRYP_MIS, .k1l = UX500_CRYP_K1L, @@ -1923,6 +2525,7 @@ static const struct stm32_cryp_caps f7_data = { .sr = CRYP_SR, .din = CRYP_DIN, .dout = CRYP_DOUT, + .dmacr = CRYP_DMACR, .imsc = CRYP_IMSCR, .mis = CRYP_MISR, .k1l = CRYP_K1LR, @@ -1945,6 +2548,7 @@ static const struct stm32_cryp_caps mp1_data = { .sr = CRYP_SR, .din = CRYP_DIN, .dout = CRYP_DOUT, + .dmacr = CRYP_DMACR, .imsc = CRYP_IMSCR, .mis = CRYP_MISR, .k1l = CRYP_K1LR, @@ -1985,6 +2589,8 @@ static int stm32_cryp_probe(struct platform_device *pdev) if (IS_ERR(cryp->regs)) return PTR_ERR(cryp->regs); + cryp->phys_base = platform_get_resource(pdev, IORESOURCE_MEM, 0)->start; + irq = platform_get_irq(pdev, 0); if (irq < 0) return irq; @@ -2030,6 +2636,17 @@ static int stm32_cryp_probe(struct platform_device *pdev) platform_set_drvdata(pdev, cryp); + ret = stm32_cryp_dma_init(cryp); + switch (ret) { + case 0: + break; + case -ENODEV: + dev_dbg(dev, "DMA mode not available\n"); + break; + default: + goto err_dma; + } + spin_lock(&cryp_list.lock); list_add(&cryp->list, &cryp_list.dev_list); spin_unlock(&cryp_list.lock); @@ -2075,6 +2692,12 @@ err_engine1: spin_lock(&cryp_list.lock); list_del(&cryp->list); spin_unlock(&cryp_list.lock); + + if (cryp->dma_lch_in) + dma_release_channel(cryp->dma_lch_in); + if (cryp->dma_lch_out) + dma_release_channel(cryp->dma_lch_out); +err_dma: err_rst: pm_runtime_disable(dev); pm_runtime_put_noidle(dev); @@ -2101,6 +2724,12 @@ static void stm32_cryp_remove(struct platform_device *pdev) list_del(&cryp->list); spin_unlock(&cryp_list.lock); + if (cryp->dma_lch_in) + dma_release_channel(cryp->dma_lch_in); + + if (cryp->dma_lch_out) + dma_release_channel(cryp->dma_lch_out); + pm_runtime_disable(cryp->dev); pm_runtime_put_noidle(cryp->dev); @@ -2142,7 +2771,7 @@ static const struct dev_pm_ops stm32_cryp_pm_ops = { static struct platform_driver stm32_cryp_driver = { .probe = stm32_cryp_probe, - .remove_new = stm32_cryp_remove, + .remove = stm32_cryp_remove, .driver = { .name = DRIVER_NAME, .pm = &stm32_cryp_pm_ops, diff --git a/drivers/crypto/stm32/stm32-hash.c b/drivers/crypto/stm32/stm32-hash.c index 34e0d7e381a8..768b27de4737 100644 --- a/drivers/crypto/stm32/stm32-hash.c +++ b/drivers/crypto/stm32/stm32-hash.c @@ -94,6 +94,7 @@ #define HASH_FLAGS_ERRORS BIT(21) #define HASH_FLAGS_EMPTY BIT(22) #define HASH_FLAGS_HMAC BIT(23) +#define HASH_FLAGS_SGS_COPIED BIT(24) #define HASH_OP_UPDATE 1 #define HASH_OP_FINAL 2 @@ -145,7 +146,7 @@ struct stm32_hash_state { u16 bufcnt; u16 blocklen; - u8 buffer[HASH_BUFLEN] __aligned(4); + u8 buffer[HASH_BUFLEN] __aligned(sizeof(u32)); /* hash state */ u32 hw_context[3 + HASH_CSR_NB_MAX]; @@ -158,8 +159,8 @@ struct stm32_hash_request_ctx { u8 digest[SHA512_DIGEST_SIZE] __aligned(sizeof(u32)); size_t digcnt; - /* DMA */ struct scatterlist *sg; + struct scatterlist sgl[2]; /* scatterlist used to realize alignment */ unsigned int offset; unsigned int total; struct scatterlist sg_key; @@ -184,6 +185,7 @@ struct stm32_hash_pdata { size_t algs_info_size; bool has_sr; bool has_mdmat; + bool context_secured; bool broken_emptymsg; bool ux500; }; @@ -195,6 +197,7 @@ struct stm32_hash_dev { struct reset_control *rst; void __iomem *io_base; phys_addr_t phys_base; + u8 xmit_buf[HASH_BUFLEN] __aligned(sizeof(u32)); u32 dma_mode; bool polled; @@ -220,6 +223,8 @@ static struct stm32_hash_drv stm32_hash = { }; static void stm32_hash_dma_callback(void *param); +static int stm32_hash_prepare_request(struct ahash_request *req); +static void stm32_hash_unprepare_request(struct ahash_request *req); static inline u32 stm32_hash_read(struct stm32_hash_dev *hdev, u32 offset) { @@ -232,6 +237,11 @@ static inline void stm32_hash_write(struct stm32_hash_dev *hdev, writel_relaxed(value, hdev->io_base + offset); } +/** + * stm32_hash_wait_busy - wait until hash processor is available. It return an + * error if the hash core is processing a block of data for more than 10 ms. + * @hdev: the stm32_hash_dev device. + */ static inline int stm32_hash_wait_busy(struct stm32_hash_dev *hdev) { u32 status; @@ -245,6 +255,11 @@ static inline int stm32_hash_wait_busy(struct stm32_hash_dev *hdev) !(status & HASH_SR_BUSY), 10, 10000); } +/** + * stm32_hash_set_nblw - set the number of valid bytes in the last word. + * @hdev: the stm32_hash_dev device. + * @length: the length of the final word. + */ static void stm32_hash_set_nblw(struct stm32_hash_dev *hdev, int length) { u32 reg; @@ -282,6 +297,11 @@ static int stm32_hash_write_key(struct stm32_hash_dev *hdev) return 0; } +/** + * stm32_hash_write_ctrl - Initialize the hash processor, only if + * HASH_FLAGS_INIT is set. + * @hdev: the stm32_hash_dev device + */ static void stm32_hash_write_ctrl(struct stm32_hash_dev *hdev) { struct stm32_hash_request_ctx *rctx = ahash_request_ctx(hdev->req); @@ -469,9 +489,7 @@ static int stm32_hash_update_cpu(struct stm32_hash_dev *hdev) { struct stm32_hash_request_ctx *rctx = ahash_request_ctx(hdev->req); struct stm32_hash_state *state = &rctx->state; - u32 *preg = state->hw_context; int bufcnt, err = 0, final; - int i, swap_reg; dev_dbg(hdev->dev, "%s flags %x\n", __func__, state->flags); @@ -495,34 +513,23 @@ static int stm32_hash_update_cpu(struct stm32_hash_dev *hdev) return stm32_hash_xmit_cpu(hdev, state->buffer, bufcnt, 1); } - if (!(hdev->flags & HASH_FLAGS_INIT)) - return 0; - - if (stm32_hash_wait_busy(hdev)) - return -ETIMEDOUT; - - swap_reg = hash_swap_reg(rctx); - - if (!hdev->pdata->ux500) - *preg++ = stm32_hash_read(hdev, HASH_IMR); - *preg++ = stm32_hash_read(hdev, HASH_STR); - *preg++ = stm32_hash_read(hdev, HASH_CR); - for (i = 0; i < swap_reg; i++) - *preg++ = stm32_hash_read(hdev, HASH_CSR(i)); - - state->flags |= HASH_FLAGS_INIT; - return err; } static int stm32_hash_xmit_dma(struct stm32_hash_dev *hdev, - struct scatterlist *sg, int length, int mdma) + struct scatterlist *sg, int length, int mdmat) { struct dma_async_tx_descriptor *in_desc; dma_cookie_t cookie; u32 reg; int err; + dev_dbg(hdev->dev, "%s mdmat: %x length: %d\n", __func__, mdmat, length); + + /* do not use dma if there is no data to send */ + if (length <= 0) + return 0; + in_desc = dmaengine_prep_slave_sg(hdev->dma_lch, sg, 1, DMA_MEM_TO_DEV, DMA_PREP_INTERRUPT | DMA_CTRL_ACK); @@ -535,13 +542,12 @@ static int stm32_hash_xmit_dma(struct stm32_hash_dev *hdev, in_desc->callback = stm32_hash_dma_callback; in_desc->callback_param = hdev; - hdev->flags |= HASH_FLAGS_FINAL; hdev->flags |= HASH_FLAGS_DMA_ACTIVE; reg = stm32_hash_read(hdev, HASH_CR); if (hdev->pdata->has_mdmat) { - if (mdma) + if (mdmat) reg |= HASH_CR_MDMAT; else reg &= ~HASH_CR_MDMAT; @@ -550,7 +556,6 @@ static int stm32_hash_xmit_dma(struct stm32_hash_dev *hdev, stm32_hash_write(hdev, HASH_CR, reg); - stm32_hash_set_nblw(hdev, length); cookie = dmaengine_submit(in_desc); err = dma_submit_error(cookie); @@ -590,7 +595,7 @@ static int stm32_hash_hmac_dma_send(struct stm32_hash_dev *hdev) struct stm32_hash_ctx *ctx = crypto_ahash_ctx(tfm); int err; - if (ctx->keylen < rctx->state.blocklen || hdev->dma_mode == 1) { + if (ctx->keylen < rctx->state.blocklen || hdev->dma_mode > 0) { err = stm32_hash_write_key(hdev); if (stm32_hash_wait_busy(hdev)) return -ETIMEDOUT; @@ -655,18 +660,20 @@ static int stm32_hash_dma_send(struct stm32_hash_dev *hdev) struct scatterlist sg[1], *tsg; int err = 0, reg, ncp = 0; unsigned int i, len = 0, bufcnt = 0; + bool final = hdev->flags & HASH_FLAGS_FINAL; bool is_last = false; + u32 last_word; - rctx->sg = hdev->req->src; - rctx->total = hdev->req->nbytes; + dev_dbg(hdev->dev, "%s total: %d bufcnt: %d final: %d\n", + __func__, rctx->total, rctx->state.bufcnt, final); - rctx->nents = sg_nents(rctx->sg); if (rctx->nents < 0) return -EINVAL; stm32_hash_write_ctrl(hdev); - if (hdev->flags & HASH_FLAGS_HMAC) { + if (hdev->flags & HASH_FLAGS_HMAC && (!(hdev->flags & HASH_FLAGS_HMAC_KEY))) { + hdev->flags |= HASH_FLAGS_HMAC_KEY; err = stm32_hash_hmac_dma_send(hdev); if (err != -EINPROGRESS) return err; @@ -677,22 +684,36 @@ static int stm32_hash_dma_send(struct stm32_hash_dev *hdev) len = sg->length; if (sg_is_last(sg) || (bufcnt + sg[0].length) >= rctx->total) { - sg->length = rctx->total - bufcnt; - is_last = true; - if (hdev->dma_mode == 1) { - len = (ALIGN(sg->length, 16) - 16); - - ncp = sg_pcopy_to_buffer( - rctx->sg, rctx->nents, - rctx->state.buffer, sg->length - len, - rctx->total - sg->length + len); - - sg->length = len; + if (!final) { + /* Always manually put the last word of a non-final transfer. */ + len -= sizeof(u32); + sg_pcopy_to_buffer(rctx->sg, rctx->nents, &last_word, 4, len); + sg->length -= sizeof(u32); } else { - if (!(IS_ALIGNED(sg->length, sizeof(u32)))) { - len = sg->length; - sg->length = ALIGN(sg->length, - sizeof(u32)); + /* + * In Multiple DMA mode, DMA must be aborted before the final + * transfer. + */ + sg->length = rctx->total - bufcnt; + if (hdev->dma_mode > 0) { + len = (ALIGN(sg->length, 16) - 16); + + ncp = sg_pcopy_to_buffer(rctx->sg, rctx->nents, + rctx->state.buffer, + sg->length - len, + rctx->total - sg->length + len); + + if (!len) + break; + + sg->length = len; + } else { + is_last = true; + if (!(IS_ALIGNED(sg->length, sizeof(u32)))) { + len = sg->length; + sg->length = ALIGN(sg->length, + sizeof(u32)); + } } } } @@ -706,43 +727,67 @@ static int stm32_hash_dma_send(struct stm32_hash_dev *hdev) err = stm32_hash_xmit_dma(hdev, sg, len, !is_last); + /* The last word of a non final transfer is sent manually. */ + if (!final) { + stm32_hash_write(hdev, HASH_DIN, last_word); + len += sizeof(u32); + } + + rctx->total -= len; + bufcnt += sg[0].length; dma_unmap_sg(hdev->dev, sg, 1, DMA_TO_DEVICE); - if (err == -ENOMEM) + if (err == -ENOMEM || err == -ETIMEDOUT) return err; if (is_last) break; } - if (hdev->dma_mode == 1) { - if (stm32_hash_wait_busy(hdev)) - return -ETIMEDOUT; - reg = stm32_hash_read(hdev, HASH_CR); - reg &= ~HASH_CR_DMAE; - reg |= HASH_CR_DMAA; - stm32_hash_write(hdev, HASH_CR, reg); + /* + * When the second last block transfer of 4 words is performed by the DMA, + * the software must set the DMA Abort bit (DMAA) to 1 before completing the + * last transfer of 4 words or less. + */ + if (final) { + if (hdev->dma_mode > 0) { + if (stm32_hash_wait_busy(hdev)) + return -ETIMEDOUT; + reg = stm32_hash_read(hdev, HASH_CR); + reg &= ~HASH_CR_DMAE; + reg |= HASH_CR_DMAA; + stm32_hash_write(hdev, HASH_CR, reg); + + if (ncp) { + memset(buffer + ncp, 0, 4 - DIV_ROUND_UP(ncp, sizeof(u32))); + writesl(hdev->io_base + HASH_DIN, buffer, + DIV_ROUND_UP(ncp, sizeof(u32))); + } - if (ncp) { - memset(buffer + ncp, 0, - DIV_ROUND_UP(ncp, sizeof(u32)) - ncp); - writesl(hdev->io_base + HASH_DIN, buffer, - DIV_ROUND_UP(ncp, sizeof(u32))); + stm32_hash_set_nblw(hdev, ncp); + reg = stm32_hash_read(hdev, HASH_STR); + reg |= HASH_STR_DCAL; + stm32_hash_write(hdev, HASH_STR, reg); + err = -EINPROGRESS; } - stm32_hash_set_nblw(hdev, ncp); - reg = stm32_hash_read(hdev, HASH_STR); - reg |= HASH_STR_DCAL; - stm32_hash_write(hdev, HASH_STR, reg); - err = -EINPROGRESS; - } - if (hdev->flags & HASH_FLAGS_HMAC) { - if (stm32_hash_wait_busy(hdev)) - return -ETIMEDOUT; - err = stm32_hash_hmac_dma_send(hdev); + /* + * The hash processor needs the key to be loaded a second time in order + * to process the HMAC. + */ + if (hdev->flags & HASH_FLAGS_HMAC) { + if (stm32_hash_wait_busy(hdev)) + return -ETIMEDOUT; + err = stm32_hash_hmac_dma_send(hdev); + } + + return err; } - return err; + if (err != -EINPROGRESS) + return err; + + return 0; } static struct stm32_hash_dev *stm32_hash_find_dev(struct stm32_hash_ctx *ctx) @@ -765,33 +810,6 @@ static struct stm32_hash_dev *stm32_hash_find_dev(struct stm32_hash_ctx *ctx) return hdev; } -static bool stm32_hash_dma_aligned_data(struct ahash_request *req) -{ - struct scatterlist *sg; - struct stm32_hash_request_ctx *rctx = ahash_request_ctx(req); - struct stm32_hash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(req)); - struct stm32_hash_dev *hdev = stm32_hash_find_dev(ctx); - int i; - - if (!hdev->dma_lch || req->nbytes <= rctx->state.blocklen) - return false; - - if (sg_nents(req->src) > 1) { - if (hdev->dma_mode == 1) - return false; - for_each_sg(req->src, sg, sg_nents(req->src), i) { - if ((!IS_ALIGNED(sg->length, sizeof(u32))) && - (!sg_is_last(sg))) - return false; - } - } - - if (req->src->offset % 4) - return false; - - return true; -} - static int stm32_hash_init(struct ahash_request *req) { struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); @@ -802,8 +820,10 @@ static int stm32_hash_init(struct ahash_request *req) bool sha3_mode = ctx->flags & HASH_FLAGS_SHA3_MODE; rctx->hdev = hdev; + state->flags = 0; - state->flags = HASH_FLAGS_CPU; + if (!(hdev->dma_lch && hdev->pdata->has_mdmat)) + state->flags |= HASH_FLAGS_CPU; if (sha3_mode) state->flags |= HASH_FLAGS_SHA3_MODE; @@ -857,6 +877,7 @@ static int stm32_hash_init(struct ahash_request *req) dev_err(hdev->dev, "Error, block too large"); return -EINVAL; } + rctx->nents = 0; rctx->total = 0; rctx->offset = 0; rctx->data_type = HASH_DATA_8_BITS; @@ -874,6 +895,9 @@ static int stm32_hash_update_req(struct stm32_hash_dev *hdev) struct stm32_hash_request_ctx *rctx = ahash_request_ctx(hdev->req); struct stm32_hash_state *state = &rctx->state; + dev_dbg(hdev->dev, "update_req: total: %u, digcnt: %zd, final: 0", + rctx->total, rctx->digcnt); + if (!(state->flags & HASH_FLAGS_CPU)) return stm32_hash_dma_send(hdev); @@ -887,6 +911,11 @@ static int stm32_hash_final_req(struct stm32_hash_dev *hdev) struct stm32_hash_state *state = &rctx->state; int buflen = state->bufcnt; + if (!(state->flags & HASH_FLAGS_CPU)) { + hdev->flags |= HASH_FLAGS_FINAL; + return stm32_hash_dma_send(hdev); + } + if (state->flags & HASH_FLAGS_FINUP) return stm32_hash_update_req(hdev); @@ -968,15 +997,21 @@ static int stm32_hash_finish(struct ahash_request *req) static void stm32_hash_finish_req(struct ahash_request *req, int err) { struct stm32_hash_request_ctx *rctx = ahash_request_ctx(req); + struct stm32_hash_state *state = &rctx->state; struct stm32_hash_dev *hdev = rctx->hdev; + if (hdev->flags & HASH_FLAGS_DMA_ACTIVE) + state->flags |= HASH_FLAGS_DMA_ACTIVE; + else + state->flags &= ~HASH_FLAGS_DMA_ACTIVE; + if (!err && (HASH_FLAGS_FINAL & hdev->flags)) { stm32_hash_copy_hash(req); err = stm32_hash_finish(req); } - pm_runtime_mark_last_busy(hdev->dev); - pm_runtime_put_autosuspend(hdev->dev); + /* Finalized request mist be unprepared here */ + stm32_hash_unprepare_request(req); crypto_finalize_hash_request(hdev->engine, req, err); } @@ -1006,6 +1041,10 @@ static int stm32_hash_one_request(struct crypto_engine *engine, void *areq) pm_runtime_get_sync(hdev->dev); + err = stm32_hash_prepare_request(req); + if (err) + return err; + hdev->req = req; hdev->flags = 0; swap_reg = hash_swap_reg(rctx); @@ -1030,6 +1069,12 @@ static int stm32_hash_one_request(struct crypto_engine *engine, void *areq) if (state->flags & HASH_FLAGS_HMAC) hdev->flags |= HASH_FLAGS_HMAC | HASH_FLAGS_HMAC_KEY; + + if (state->flags & HASH_FLAGS_CPU) + hdev->flags |= HASH_FLAGS_CPU; + + if (state->flags & HASH_FLAGS_DMA_ACTIVE) + hdev->flags |= HASH_FLAGS_DMA_ACTIVE; } if (rctx->op == HASH_OP_UPDATE) @@ -1054,6 +1099,284 @@ static int stm32_hash_one_request(struct crypto_engine *engine, void *areq) return 0; } +static int stm32_hash_copy_sgs(struct stm32_hash_request_ctx *rctx, + struct scatterlist *sg, int bs, + unsigned int new_len) +{ + struct stm32_hash_state *state = &rctx->state; + int pages; + void *buf; + + pages = get_order(new_len); + + buf = (void *)__get_free_pages(GFP_ATOMIC, pages); + if (!buf) { + pr_err("Couldn't allocate pages for unaligned cases.\n"); + return -ENOMEM; + } + + if (state->bufcnt) + memcpy(buf, rctx->hdev->xmit_buf, state->bufcnt); + + scatterwalk_map_and_copy(buf + state->bufcnt, sg, rctx->offset, + min(new_len, rctx->total) - state->bufcnt, 0); + sg_init_table(rctx->sgl, 1); + sg_set_buf(rctx->sgl, buf, new_len); + rctx->sg = rctx->sgl; + state->flags |= HASH_FLAGS_SGS_COPIED; + rctx->nents = 1; + rctx->offset += new_len - state->bufcnt; + state->bufcnt = 0; + rctx->total = new_len; + + return 0; +} + +static int stm32_hash_align_sgs(struct scatterlist *sg, + int nbytes, int bs, bool init, bool final, + struct stm32_hash_request_ctx *rctx) +{ + struct stm32_hash_state *state = &rctx->state; + struct stm32_hash_dev *hdev = rctx->hdev; + struct scatterlist *sg_tmp = sg; + int offset = rctx->offset; + int new_len; + int n = 0; + int bufcnt = state->bufcnt; + bool secure_ctx = hdev->pdata->context_secured; + bool aligned = true; + + if (!sg || !sg->length || !nbytes) { + if (bufcnt) { + bufcnt = DIV_ROUND_UP(bufcnt, bs) * bs; + sg_init_table(rctx->sgl, 1); + sg_set_buf(rctx->sgl, rctx->hdev->xmit_buf, bufcnt); + rctx->sg = rctx->sgl; + rctx->nents = 1; + } + + return 0; + } + + new_len = nbytes; + + if (offset) + aligned = false; + + if (final) { + new_len = DIV_ROUND_UP(new_len, bs) * bs; + } else { + new_len = (new_len - 1) / bs * bs; // return n block - 1 block + + /* + * Context save in some version of HASH IP can only be done when the + * FIFO is ready to get a new block. This implies to send n block plus a + * 32 bit word in the first DMA send. + */ + if (init && secure_ctx) { + new_len += sizeof(u32); + if (unlikely(new_len > nbytes)) + new_len -= bs; + } + } + + if (!new_len) + return 0; + + if (nbytes != new_len) + aligned = false; + + while (nbytes > 0 && sg_tmp) { + n++; + + if (bufcnt) { + if (!IS_ALIGNED(bufcnt, bs)) { + aligned = false; + break; + } + nbytes -= bufcnt; + bufcnt = 0; + if (!nbytes) + aligned = false; + + continue; + } + + if (offset < sg_tmp->length) { + if (!IS_ALIGNED(offset + sg_tmp->offset, 4)) { + aligned = false; + break; + } + + if (!IS_ALIGNED(sg_tmp->length - offset, bs)) { + aligned = false; + break; + } + } + + if (offset) { + offset -= sg_tmp->length; + if (offset < 0) { + nbytes += offset; + offset = 0; + } + } else { + nbytes -= sg_tmp->length; + } + + sg_tmp = sg_next(sg_tmp); + + if (nbytes < 0) { + aligned = false; + break; + } + } + + if (!aligned) + return stm32_hash_copy_sgs(rctx, sg, bs, new_len); + + rctx->total = new_len; + rctx->offset += new_len; + rctx->nents = n; + if (state->bufcnt) { + sg_init_table(rctx->sgl, 2); + sg_set_buf(rctx->sgl, rctx->hdev->xmit_buf, state->bufcnt); + sg_chain(rctx->sgl, 2, sg); + rctx->sg = rctx->sgl; + } else { + rctx->sg = sg; + } + + return 0; +} + +static int stm32_hash_prepare_request(struct ahash_request *req) +{ + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct stm32_hash_ctx *ctx = crypto_ahash_ctx(tfm); + struct stm32_hash_request_ctx *rctx = ahash_request_ctx(req); + struct stm32_hash_dev *hdev = stm32_hash_find_dev(ctx); + struct stm32_hash_state *state = &rctx->state; + unsigned int nbytes; + int ret, hash_later, bs; + bool update = rctx->op & HASH_OP_UPDATE; + bool init = !(state->flags & HASH_FLAGS_INIT); + bool finup = state->flags & HASH_FLAGS_FINUP; + bool final = state->flags & HASH_FLAGS_FINAL; + + if (!hdev->dma_lch || state->flags & HASH_FLAGS_CPU) + return 0; + + bs = crypto_ahash_blocksize(tfm); + + nbytes = state->bufcnt; + + /* + * In case of update request nbytes must correspond to the content of the + * buffer + the offset minus the content of the request already in the + * buffer. + */ + if (update || finup) + nbytes += req->nbytes - rctx->offset; + + dev_dbg(hdev->dev, + "%s: nbytes=%d, bs=%d, total=%d, offset=%d, bufcnt=%d\n", + __func__, nbytes, bs, rctx->total, rctx->offset, state->bufcnt); + + if (!nbytes) + return 0; + + rctx->total = nbytes; + + if (update && req->nbytes && (!IS_ALIGNED(state->bufcnt, bs))) { + int len = bs - state->bufcnt % bs; + + if (len > req->nbytes) + len = req->nbytes; + scatterwalk_map_and_copy(state->buffer + state->bufcnt, req->src, + 0, len, 0); + state->bufcnt += len; + rctx->offset = len; + } + + /* copy buffer in a temporary one that is used for sg alignment */ + if (state->bufcnt) + memcpy(hdev->xmit_buf, state->buffer, state->bufcnt); + + ret = stm32_hash_align_sgs(req->src, nbytes, bs, init, final, rctx); + if (ret) + return ret; + + hash_later = nbytes - rctx->total; + if (hash_later < 0) + hash_later = 0; + + if (hash_later && hash_later <= state->blocklen) { + scatterwalk_map_and_copy(state->buffer, + req->src, + req->nbytes - hash_later, + hash_later, 0); + + state->bufcnt = hash_later; + } else { + state->bufcnt = 0; + } + + if (hash_later > state->blocklen) { + /* FIXME: add support of this case */ + pr_err("Buffer contains more than one block.\n"); + return -ENOMEM; + } + + rctx->total = min(nbytes, rctx->total); + + return 0; +} + +static void stm32_hash_unprepare_request(struct ahash_request *req) +{ + struct stm32_hash_request_ctx *rctx = ahash_request_ctx(req); + struct stm32_hash_state *state = &rctx->state; + struct stm32_hash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(req)); + struct stm32_hash_dev *hdev = stm32_hash_find_dev(ctx); + u32 *preg = state->hw_context; + int swap_reg, i; + + if (hdev->dma_lch) + dmaengine_terminate_sync(hdev->dma_lch); + + if (state->flags & HASH_FLAGS_SGS_COPIED) + free_pages((unsigned long)sg_virt(rctx->sg), get_order(rctx->sg->length)); + + rctx->sg = NULL; + rctx->offset = 0; + + state->flags &= ~(HASH_FLAGS_SGS_COPIED); + + if (!(hdev->flags & HASH_FLAGS_INIT)) + goto pm_runtime; + + state->flags |= HASH_FLAGS_INIT; + + if (stm32_hash_wait_busy(hdev)) { + dev_warn(hdev->dev, "Wait busy failed."); + return; + } + + swap_reg = hash_swap_reg(rctx); + + if (!hdev->pdata->ux500) + *preg++ = stm32_hash_read(hdev, HASH_IMR); + *preg++ = stm32_hash_read(hdev, HASH_STR); + *preg++ = stm32_hash_read(hdev, HASH_CR); + for (i = 0; i < swap_reg; i++) + *preg++ = stm32_hash_read(hdev, HASH_CSR(i)); + +pm_runtime: + pm_runtime_mark_last_busy(hdev->dev); + pm_runtime_put_autosuspend(hdev->dev); +} + static int stm32_hash_enqueue(struct ahash_request *req, unsigned int op) { struct stm32_hash_request_ctx *rctx = ahash_request_ctx(req); @@ -1070,16 +1393,26 @@ static int stm32_hash_update(struct ahash_request *req) struct stm32_hash_request_ctx *rctx = ahash_request_ctx(req); struct stm32_hash_state *state = &rctx->state; - if (!req->nbytes || !(state->flags & HASH_FLAGS_CPU)) + if (!req->nbytes) return 0; - rctx->total = req->nbytes; - rctx->sg = req->src; - rctx->offset = 0; - if ((state->bufcnt + rctx->total < state->blocklen)) { - stm32_hash_append_sg(rctx); - return 0; + if (state->flags & HASH_FLAGS_CPU) { + rctx->total = req->nbytes; + rctx->sg = req->src; + rctx->offset = 0; + + if ((state->bufcnt + rctx->total < state->blocklen)) { + stm32_hash_append_sg(rctx); + return 0; + } + } else { /* DMA mode */ + if (state->bufcnt + req->nbytes <= state->blocklen) { + scatterwalk_map_and_copy(state->buffer + state->bufcnt, req->src, + 0, req->nbytes, 0); + state->bufcnt += req->nbytes; + return 0; + } } return stm32_hash_enqueue(req, HASH_OP_UPDATE); @@ -1098,20 +1431,18 @@ static int stm32_hash_final(struct ahash_request *req) static int stm32_hash_finup(struct ahash_request *req) { struct stm32_hash_request_ctx *rctx = ahash_request_ctx(req); - struct stm32_hash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(req)); - struct stm32_hash_dev *hdev = stm32_hash_find_dev(ctx); struct stm32_hash_state *state = &rctx->state; if (!req->nbytes) goto out; state->flags |= HASH_FLAGS_FINUP; - rctx->total = req->nbytes; - rctx->sg = req->src; - rctx->offset = 0; - if (hdev->dma_lch && stm32_hash_dma_aligned_data(req)) - state->flags &= ~HASH_FLAGS_CPU; + if ((state->flags & HASH_FLAGS_CPU)) { + rctx->total = req->nbytes; + rctx->sg = req->src; + rctx->offset = 0; + } out: return stm32_hash_final(req); @@ -1215,7 +1546,6 @@ static int stm32_hash_cra_sha3_hmac_init(struct crypto_tfm *tfm) HASH_FLAGS_HMAC); } - static void stm32_hash_cra_exit(struct crypto_tfm *tfm) { struct stm32_hash_ctx *ctx = crypto_tfm_ctx(tfm); @@ -1228,14 +1558,9 @@ static irqreturn_t stm32_hash_irq_thread(int irq, void *dev_id) { struct stm32_hash_dev *hdev = dev_id; - if (HASH_FLAGS_CPU & hdev->flags) { - if (HASH_FLAGS_OUTPUT_READY & hdev->flags) { - hdev->flags &= ~HASH_FLAGS_OUTPUT_READY; - goto finish; - } - } else if (HASH_FLAGS_DMA_ACTIVE & hdev->flags) { - hdev->flags &= ~HASH_FLAGS_DMA_ACTIVE; - goto finish; + if (HASH_FLAGS_OUTPUT_READY & hdev->flags) { + hdev->flags &= ~HASH_FLAGS_OUTPUT_READY; + goto finish; } return IRQ_HANDLED; @@ -1984,6 +2309,7 @@ static const struct stm32_hash_pdata stm32_hash_pdata_stm32mp13 = { .algs_info_size = ARRAY_SIZE(stm32_hash_algs_info_stm32mp13), .has_sr = true, .has_mdmat = true, + .context_secured = true, }; static const struct of_device_id stm32_hash_of_match[] = { @@ -2206,7 +2532,7 @@ static const struct dev_pm_ops stm32_hash_pm_ops = { static struct platform_driver stm32_hash_driver = { .probe = stm32_hash_probe, - .remove_new = stm32_hash_remove, + .remove = stm32_hash_remove, .driver = { .name = "stm32-hash", .pm = &stm32_hash_pm_ops, |