diff options
Diffstat (limited to 'drivers/crypto/marvell/octeontx2/otx2_cpt_reqmgr.h')
| -rw-r--r-- | drivers/crypto/marvell/octeontx2/otx2_cpt_reqmgr.h | 561 |
1 files changed, 561 insertions, 0 deletions
diff --git a/drivers/crypto/marvell/octeontx2/otx2_cpt_reqmgr.h b/drivers/crypto/marvell/octeontx2/otx2_cpt_reqmgr.h new file mode 100644 index 000000000000..e64ca30335de --- /dev/null +++ b/drivers/crypto/marvell/octeontx2/otx2_cpt_reqmgr.h @@ -0,0 +1,561 @@ +/* SPDX-License-Identifier: GPL-2.0-only + * Copyright (C) 2020 Marvell. + */ + +#ifndef __OTX2_CPT_REQMGR_H +#define __OTX2_CPT_REQMGR_H + +#include "otx2_cpt_common.h" + +/* Completion code size and initial value */ +#define OTX2_CPT_COMPLETION_CODE_SIZE 8 +#define OTX2_CPT_COMPLETION_CODE_INIT OTX2_CPT_COMP_E_NOTDONE +/* + * Maximum total number of SG buffers is 100, we divide it equally + * between input and output + */ +#define OTX2_CPT_MAX_SG_IN_CNT 50 +#define OTX2_CPT_MAX_SG_OUT_CNT 50 + +/* DMA mode direct or SG */ +#define OTX2_CPT_DMA_MODE_DIRECT 0 +#define OTX2_CPT_DMA_MODE_SG 1 + +/* Context source CPTR or DPTR */ +#define OTX2_CPT_FROM_CPTR 0 +#define OTX2_CPT_FROM_DPTR 1 + +#define OTX2_CPT_MAX_REQ_SIZE 65535 + +#define SG_COMPS_MAX 4 +#define SGV2_COMPS_MAX 3 + +#define SG_COMP_3 3 +#define SG_COMP_2 2 +#define SG_COMP_1 1 + +#define OTX2_CPT_DPTR_RPTR_ALIGN 8 +#define OTX2_CPT_RES_ADDR_ALIGN 32 + +union otx2_cpt_opcode { + u16 flags; + struct { + u8 major; + u8 minor; + } s; +}; + +struct otx2_cptvf_request { + u32 param1; + u32 param2; + u16 dlen; + union otx2_cpt_opcode opcode; + dma_addr_t cptr_dma; + void *cptr; +}; + +/* + * CPT_INST_S software command definitions + * Words EI (0-3) + */ +union otx2_cpt_iq_cmd_word0 { + u64 u; + struct { + __be16 opcode; + __be16 param1; + __be16 param2; + __be16 dlen; + } s; +}; + +union otx2_cpt_iq_cmd_word3 { + u64 u; + struct { + u64 cptr:61; + u64 grp:3; + } s; +}; + +struct otx2_cpt_iq_command { + union otx2_cpt_iq_cmd_word0 cmd; + u64 dptr; + u64 rptr; + union otx2_cpt_iq_cmd_word3 cptr; +}; + +struct otx2_cpt_pending_entry { + void *completion_addr; /* Completion address */ + void *info; + /* Kernel async request callback */ + void (*callback)(int status, void *arg1, void *arg2); + struct crypto_async_request *areq; /* Async request callback arg */ + u8 resume_sender; /* Notify sender to resume sending requests */ + u8 busy; /* Entry status (free/busy) */ +}; + +struct otx2_cpt_pending_queue { + struct otx2_cpt_pending_entry *head; /* Head of the queue */ + u32 front; /* Process work from here */ + u32 rear; /* Append new work here */ + u32 pending_count; /* Pending requests count */ + u32 qlen; /* Queue length */ + spinlock_t lock; /* Queue lock */ +}; + +struct otx2_cpt_buf_ptr { + u8 *vptr; + dma_addr_t dma_addr; + u16 size; +}; + +union otx2_cpt_ctrl_info { + u32 flags; + struct { +#if defined(__BIG_ENDIAN_BITFIELD) + u32 reserved_6_31:26; + u32 grp:3; /* Group bits */ + u32 dma_mode:2; /* DMA mode */ + u32 se_req:1; /* To SE core */ +#else + u32 se_req:1; /* To SE core */ + u32 dma_mode:2; /* DMA mode */ + u32 grp:3; /* Group bits */ + u32 reserved_6_31:26; +#endif + } s; +}; + +struct otx2_cpt_req_info { + /* Kernel async request callback */ + void (*callback)(int status, void *arg1, void *arg2); + struct crypto_async_request *areq; /* Async request callback arg */ + struct otx2_cptvf_request req;/* Request information (core specific) */ + union otx2_cpt_ctrl_info ctrl;/* User control information */ + struct otx2_cpt_buf_ptr in[OTX2_CPT_MAX_SG_IN_CNT]; + struct otx2_cpt_buf_ptr out[OTX2_CPT_MAX_SG_OUT_CNT]; + u8 *iv_out; /* IV to send back */ + u16 rlen; /* Output length */ + u8 in_cnt; /* Number of input buffers */ + u8 out_cnt; /* Number of output buffers */ + u8 req_type; /* Type of request */ + u8 is_enc; /* Is a request an encryption request */ + u8 is_trunc_hmac;/* Is truncated hmac used */ +}; + +struct otx2_cpt_inst_info { + struct otx2_cpt_pending_entry *pentry; + struct otx2_cpt_req_info *req; + struct pci_dev *pdev; + void *completion_addr; + u8 *out_buffer; + u8 *in_buffer; + dma_addr_t dptr_baddr; + dma_addr_t rptr_baddr; + dma_addr_t comp_baddr; + unsigned long time_in; + u32 dlen; + u32 dma_len; + u64 gthr_sz; + u64 sctr_sz; + u8 extra_time; +}; + +struct otx2_cpt_sglist_component { + __be16 len0; + __be16 len1; + __be16 len2; + __be16 len3; + __be64 ptr0; + __be64 ptr1; + __be64 ptr2; + __be64 ptr3; +}; + +struct cn10kb_cpt_sglist_component { + u16 len0; + u16 len1; + u16 len2; + u16 valid_segs; + u64 ptr0; + u64 ptr1; + u64 ptr2; +}; + +static inline void otx2_cpt_info_destroy(struct pci_dev *pdev, + struct otx2_cpt_inst_info *info) +{ + struct otx2_cpt_req_info *req; + int i; + + if (info->dptr_baddr) + dma_unmap_single(&pdev->dev, info->dptr_baddr, + info->dma_len, DMA_BIDIRECTIONAL); + + if (info->req) { + req = info->req; + for (i = 0; i < req->out_cnt; i++) { + if (req->out[i].dma_addr) + dma_unmap_single(&pdev->dev, + req->out[i].dma_addr, + req->out[i].size, + DMA_BIDIRECTIONAL); + } + + for (i = 0; i < req->in_cnt; i++) { + if (req->in[i].dma_addr) + dma_unmap_single(&pdev->dev, + req->in[i].dma_addr, + req->in[i].size, + DMA_BIDIRECTIONAL); + } + } + kfree(info); +} + +static inline int setup_sgio_components(struct pci_dev *pdev, + struct otx2_cpt_buf_ptr *list, + int buf_count, u8 *buffer) +{ + struct otx2_cpt_sglist_component *sg_ptr; + int components; + int i, j; + + if (unlikely(!list)) { + dev_err(&pdev->dev, "Input list pointer is NULL\n"); + return -EINVAL; + } + + for (i = 0; i < buf_count; i++) { + if (unlikely(!list[i].vptr)) + continue; + list[i].dma_addr = dma_map_single(&pdev->dev, list[i].vptr, + list[i].size, + DMA_BIDIRECTIONAL); + if (unlikely(dma_mapping_error(&pdev->dev, list[i].dma_addr))) { + dev_err(&pdev->dev, "Dma mapping failed\n"); + goto sg_cleanup; + } + } + components = buf_count / SG_COMPS_MAX; + sg_ptr = (struct otx2_cpt_sglist_component *)buffer; + for (i = 0; i < components; i++) { + sg_ptr->len0 = cpu_to_be16(list[i * SG_COMPS_MAX + 0].size); + sg_ptr->len1 = cpu_to_be16(list[i * SG_COMPS_MAX + 1].size); + sg_ptr->len2 = cpu_to_be16(list[i * SG_COMPS_MAX + 2].size); + sg_ptr->len3 = cpu_to_be16(list[i * SG_COMPS_MAX + 3].size); + sg_ptr->ptr0 = cpu_to_be64(list[i * SG_COMPS_MAX + 0].dma_addr); + sg_ptr->ptr1 = cpu_to_be64(list[i * SG_COMPS_MAX + 1].dma_addr); + sg_ptr->ptr2 = cpu_to_be64(list[i * SG_COMPS_MAX + 2].dma_addr); + sg_ptr->ptr3 = cpu_to_be64(list[i * SG_COMPS_MAX + 3].dma_addr); + sg_ptr++; + } + components = buf_count % SG_COMPS_MAX; + + switch (components) { + case SG_COMP_3: + sg_ptr->len2 = cpu_to_be16(list[i * SG_COMPS_MAX + 2].size); + sg_ptr->ptr2 = cpu_to_be64(list[i * SG_COMPS_MAX + 2].dma_addr); + fallthrough; + case SG_COMP_2: + sg_ptr->len1 = cpu_to_be16(list[i * SG_COMPS_MAX + 1].size); + sg_ptr->ptr1 = cpu_to_be64(list[i * SG_COMPS_MAX + 1].dma_addr); + fallthrough; + case SG_COMP_1: + sg_ptr->len0 = cpu_to_be16(list[i * SG_COMPS_MAX + 0].size); + sg_ptr->ptr0 = cpu_to_be64(list[i * SG_COMPS_MAX + 0].dma_addr); + break; + default: + break; + } + return 0; + +sg_cleanup: + for (j = 0; j < i; j++) { + if (list[j].dma_addr) { + dma_unmap_single(&pdev->dev, list[j].dma_addr, + list[j].size, DMA_BIDIRECTIONAL); + } + + list[j].dma_addr = 0; + } + return -EIO; +} + +static inline int sgv2io_components_setup(struct pci_dev *pdev, + struct otx2_cpt_buf_ptr *list, + int buf_count, u8 *buffer) +{ + struct cn10kb_cpt_sglist_component *sg_ptr; + int components; + int i, j; + + if (unlikely(!list)) { + dev_err(&pdev->dev, "Input list pointer is NULL\n"); + return -EFAULT; + } + + for (i = 0; i < buf_count; i++) { + if (unlikely(!list[i].vptr)) + continue; + list[i].dma_addr = dma_map_single(&pdev->dev, list[i].vptr, + list[i].size, + DMA_BIDIRECTIONAL); + if (unlikely(dma_mapping_error(&pdev->dev, list[i].dma_addr))) { + dev_err(&pdev->dev, "Dma mapping failed\n"); + goto sg_cleanup; + } + } + components = buf_count / SGV2_COMPS_MAX; + sg_ptr = (struct cn10kb_cpt_sglist_component *)buffer; + for (i = 0; i < components; i++) { + sg_ptr->len0 = list[i * SGV2_COMPS_MAX + 0].size; + sg_ptr->len1 = list[i * SGV2_COMPS_MAX + 1].size; + sg_ptr->len2 = list[i * SGV2_COMPS_MAX + 2].size; + sg_ptr->ptr0 = list[i * SGV2_COMPS_MAX + 0].dma_addr; + sg_ptr->ptr1 = list[i * SGV2_COMPS_MAX + 1].dma_addr; + sg_ptr->ptr2 = list[i * SGV2_COMPS_MAX + 2].dma_addr; + sg_ptr->valid_segs = SGV2_COMPS_MAX; + sg_ptr++; + } + components = buf_count % SGV2_COMPS_MAX; + + sg_ptr->valid_segs = components; + switch (components) { + case SG_COMP_2: + sg_ptr->len1 = list[i * SGV2_COMPS_MAX + 1].size; + sg_ptr->ptr1 = list[i * SGV2_COMPS_MAX + 1].dma_addr; + fallthrough; + case SG_COMP_1: + sg_ptr->len0 = list[i * SGV2_COMPS_MAX + 0].size; + sg_ptr->ptr0 = list[i * SGV2_COMPS_MAX + 0].dma_addr; + break; + default: + break; + } + return 0; + +sg_cleanup: + for (j = 0; j < i; j++) { + if (list[j].dma_addr) { + dma_unmap_single(&pdev->dev, list[j].dma_addr, + list[j].size, DMA_BIDIRECTIONAL); + } + + list[j].dma_addr = 0; + } + return -EIO; +} + +static inline struct otx2_cpt_inst_info * +cn10k_sgv2_info_create(struct pci_dev *pdev, struct otx2_cpt_req_info *req, + gfp_t gfp) +{ + u32 dlen = 0, g_len, s_len, sg_len, info_len; + struct otx2_cpt_inst_info *info; + u32 total_mem_len; + int i; + + /* Allocate memory to meet below alignment requirement: + * ------------------------------------ + * | struct otx2_cpt_inst_info | + * | (No alignment required) | + * | --------------------------------| + * | | padding for ARCH_DMA_MINALIGN | + * | | alignment | + * |------------------------------------| + * | SG List Gather/Input memory | + * | Length = multiple of 32Bytes | + * | Alignment = 8Byte | + * |---------------------------------- | + * | SG List Scatter/Output memory | + * | Length = multiple of 32Bytes | + * | Alignment = 8Byte | + * | -------------------------------| + * | | padding for 32B alignment | + * |------------------------------------| + * | Result response memory | + * | Alignment = 32Byte | + * ------------------------------------ + */ + + info_len = sizeof(*info); + + g_len = ((req->in_cnt + 2) / 3) * + sizeof(struct cn10kb_cpt_sglist_component); + s_len = ((req->out_cnt + 2) / 3) * + sizeof(struct cn10kb_cpt_sglist_component); + sg_len = g_len + s_len; + + /* Allocate extra memory for SG and response address alignment */ + total_mem_len = ALIGN(info_len, OTX2_CPT_DPTR_RPTR_ALIGN); + total_mem_len += (ARCH_DMA_MINALIGN - 1) & + ~(OTX2_CPT_DPTR_RPTR_ALIGN - 1); + total_mem_len += ALIGN(sg_len, OTX2_CPT_RES_ADDR_ALIGN); + total_mem_len += sizeof(union otx2_cpt_res_s); + + info = kzalloc(total_mem_len, gfp); + if (unlikely(!info)) + return NULL; + + for (i = 0; i < req->in_cnt; i++) + dlen += req->in[i].size; + + info->dlen = dlen; + info->in_buffer = PTR_ALIGN((u8 *)info + info_len, ARCH_DMA_MINALIGN); + info->out_buffer = info->in_buffer + g_len; + info->gthr_sz = req->in_cnt; + info->sctr_sz = req->out_cnt; + + /* Setup gather (input) components */ + if (sgv2io_components_setup(pdev, req->in, req->in_cnt, + info->in_buffer)) { + dev_err(&pdev->dev, "Failed to setup gather list\n"); + goto destroy_info; + } + + if (sgv2io_components_setup(pdev, req->out, req->out_cnt, + info->out_buffer)) { + dev_err(&pdev->dev, "Failed to setup scatter list\n"); + goto destroy_info; + } + + info->dma_len = total_mem_len - info_len; + info->dptr_baddr = dma_map_single(&pdev->dev, info->in_buffer, + info->dma_len, DMA_BIDIRECTIONAL); + if (unlikely(dma_mapping_error(&pdev->dev, info->dptr_baddr))) { + dev_err(&pdev->dev, "DMA Mapping failed for cpt req\n"); + goto destroy_info; + } + info->rptr_baddr = info->dptr_baddr + g_len; + /* + * Get buffer for union otx2_cpt_res_s response + * structure and its physical address + */ + info->completion_addr = PTR_ALIGN((info->in_buffer + sg_len), + OTX2_CPT_RES_ADDR_ALIGN); + info->comp_baddr = ALIGN((info->dptr_baddr + sg_len), + OTX2_CPT_RES_ADDR_ALIGN); + + return info; + +destroy_info: + otx2_cpt_info_destroy(pdev, info); + return NULL; +} + +/* SG list header size in bytes */ +#define SG_LIST_HDR_SIZE 8 +static inline struct otx2_cpt_inst_info * +otx2_sg_info_create(struct pci_dev *pdev, struct otx2_cpt_req_info *req, + gfp_t gfp) +{ + struct otx2_cpt_inst_info *info; + u32 dlen, info_len; + u16 g_len, s_len; + u32 total_mem_len; + + if (unlikely(req->in_cnt > OTX2_CPT_MAX_SG_IN_CNT || + req->out_cnt > OTX2_CPT_MAX_SG_OUT_CNT)) { + dev_err(&pdev->dev, "Error too many sg components\n"); + return NULL; + } + + /* Allocate memory to meet below alignment requirement: + * ------------------------------------ + * | struct otx2_cpt_inst_info | + * | (No alignment required) | + * | --------------------------------| + * | | padding for ARCH_DMA_MINALIGN | + * | | alignment | + * |------------------------------------| + * | SG List Header of 8 Byte | + * |------------------------------------| + * | SG List Gather/Input memory | + * | Length = multiple of 32Bytes | + * | Alignment = 8Byte | + * |---------------------------------- | + * | SG List Scatter/Output memory | + * | Length = multiple of 32Bytes | + * | Alignment = 8Byte | + * | -------------------------------| + * | | padding for 32B alignment | + * |------------------------------------| + * | Result response memory | + * | Alignment = 32Byte | + * ------------------------------------ + */ + + info_len = sizeof(*info); + + g_len = ((req->in_cnt + 3) / 4) * + sizeof(struct otx2_cpt_sglist_component); + s_len = ((req->out_cnt + 3) / 4) * + sizeof(struct otx2_cpt_sglist_component); + + dlen = g_len + s_len + SG_LIST_HDR_SIZE; + + /* Allocate extra memory for SG and response address alignment */ + total_mem_len = ALIGN(info_len, OTX2_CPT_DPTR_RPTR_ALIGN); + total_mem_len += (ARCH_DMA_MINALIGN - 1) & + ~(OTX2_CPT_DPTR_RPTR_ALIGN - 1); + total_mem_len += ALIGN(dlen, OTX2_CPT_RES_ADDR_ALIGN); + total_mem_len += sizeof(union otx2_cpt_res_s); + + info = kzalloc(total_mem_len, gfp); + if (unlikely(!info)) + return NULL; + + info->dlen = dlen; + info->in_buffer = PTR_ALIGN((u8 *)info + info_len, ARCH_DMA_MINALIGN); + info->out_buffer = info->in_buffer + SG_LIST_HDR_SIZE + g_len; + + ((u16 *)info->in_buffer)[0] = req->out_cnt; + ((u16 *)info->in_buffer)[1] = req->in_cnt; + ((u16 *)info->in_buffer)[2] = 0; + ((u16 *)info->in_buffer)[3] = 0; + cpu_to_be64s((u64 *)info->in_buffer); + + /* Setup gather (input) components */ + if (setup_sgio_components(pdev, req->in, req->in_cnt, + &info->in_buffer[8])) { + dev_err(&pdev->dev, "Failed to setup gather list\n"); + goto destroy_info; + } + + if (setup_sgio_components(pdev, req->out, req->out_cnt, + info->out_buffer)) { + dev_err(&pdev->dev, "Failed to setup scatter list\n"); + goto destroy_info; + } + + info->dma_len = total_mem_len - info_len; + info->dptr_baddr = dma_map_single(&pdev->dev, info->in_buffer, + info->dma_len, DMA_BIDIRECTIONAL); + if (unlikely(dma_mapping_error(&pdev->dev, info->dptr_baddr))) { + dev_err(&pdev->dev, "DMA Mapping failed for cpt req\n"); + goto destroy_info; + } + /* + * Get buffer for union otx2_cpt_res_s response + * structure and its physical address + */ + info->completion_addr = PTR_ALIGN((info->in_buffer + dlen), + OTX2_CPT_RES_ADDR_ALIGN); + info->comp_baddr = ALIGN((info->dptr_baddr + dlen), + OTX2_CPT_RES_ADDR_ALIGN); + + return info; + +destroy_info: + otx2_cpt_info_destroy(pdev, info); + return NULL; +} + +struct otx2_cptlf_wqe; +int otx2_cpt_do_request(struct pci_dev *pdev, struct otx2_cpt_req_info *req, + int cpu_num); +void otx2_cpt_post_process(struct otx2_cptlf_wqe *wqe); +int otx2_cpt_get_eng_grp_num(struct pci_dev *pdev, + enum otx2_cpt_eng_type); + +#endif /* __OTX2_CPT_REQMGR_H */ |
