diff options
Diffstat (limited to 'drivers/crypto/caam/qi.c')
| -rw-r--r-- | drivers/crypto/caam/qi.c | 322 |
1 files changed, 165 insertions, 157 deletions
diff --git a/drivers/crypto/caam/qi.c b/drivers/crypto/caam/qi.c index 1990ed460c46..1e731ed8702b 100644 --- a/drivers/crypto/caam/qi.c +++ b/drivers/crypto/caam/qi.c @@ -1,15 +1,24 @@ +// SPDX-License-Identifier: GPL-2.0 /* * CAAM/SEC 4.x QI transport/backend driver * Queue Interface backend functionality * * Copyright 2013-2016 Freescale Semiconductor, Inc. - * Copyright 2016-2017 NXP + * Copyright 2016-2017, 2019-2020 NXP */ #include <linux/cpumask.h> +#include <linux/device.h> +#include <linux/dma-mapping.h> +#include <linux/kernel.h> #include <linux/kthread.h> +#include <linux/netdevice.h> +#include <linux/platform_device.h> +#include <linux/slab.h> +#include <linux/string.h> #include <soc/fsl/qman.h> +#include "debugfs.h" #include "regs.h" #include "qi.h" #include "desc.h" @@ -17,6 +26,7 @@ #include "desc_constr.h" #define PREHDR_RSLS_SHIFT 31 +#define PREHDR_ABS BIT(25) /* * Use a reasonable backlog of frames (per CPU) as congestion threshold, @@ -24,9 +34,6 @@ */ #define MAX_RSP_FQ_BACKLOG_PER_CPU 256 -/* Length of a single buffer in the QI driver memory cache */ -#define CAAM_QI_MEMCACHE_SIZE 512 - #define CAAM_QI_ENQUEUE_RETRIES 10000 #define CAAM_NAPI_WEIGHT 63 @@ -50,20 +57,19 @@ struct caam_napi { */ struct caam_qi_pcpu_priv { struct caam_napi caam_napi; - struct net_device net_dev; + struct net_device *net_dev; struct qman_fq *rsp_fq; } ____cacheline_aligned; static DEFINE_PER_CPU(struct caam_qi_pcpu_priv, pcpu_qipriv); +static DEFINE_PER_CPU(int, last_cpu); /* * caam_qi_priv - CAAM QI backend private params * @cgr: QMan congestion group - * @qi_pdev: platform device for QI backend */ struct caam_qi_priv { struct qman_cgr cgr; - struct platform_device *qi_pdev; }; static struct caam_qi_priv qipriv ____cacheline_aligned; @@ -75,24 +81,8 @@ static struct caam_qi_priv qipriv ____cacheline_aligned; bool caam_congested __read_mostly; EXPORT_SYMBOL(caam_congested); -#ifdef CONFIG_DEBUG_FS -/* - * This is a counter for the number of times the congestion group (where all - * the request and response queueus are) reached congestion. Incremented - * each time the congestion callback is called with congested == true. - */ -static u64 times_congested; -#endif - /* - * CPU from where the module initialised. This is required because QMan driver - * requires CGRs to be removed from same CPU from where they were originally - * allocated. - */ -static int mod_init_cpu; - -/* - * This is a a cache of buffers, from which the users of CAAM QI driver + * This is a cache of buffers, from which the users of CAAM QI driver * can allocate short (CAAM_QI_MEMCACHE_SIZE) buffers. It's faster than * doing malloc on the hotpath. * NOTE: A more elegant solution would be to have some headroom in the frames @@ -103,6 +93,16 @@ static int mod_init_cpu; */ static struct kmem_cache *qi_cache; +static void *caam_iova_to_virt(struct iommu_domain *domain, + dma_addr_t iova_addr) +{ + phys_addr_t phys_addr; + + phys_addr = domain ? iommu_iova_to_phys(domain, iova_addr) : iova_addr; + + return phys_to_virt(phys_addr); +} + int caam_qi_enqueue(struct device *qidev, struct caam_drv_req *req) { struct qm_fd fd; @@ -122,10 +122,12 @@ int caam_qi_enqueue(struct device *qidev, struct caam_drv_req *req) qm_fd_addr_set64(&fd, addr); do { + refcount_inc(&req->drv_ctx->refcnt); ret = qman_enqueue(req->drv_ctx->req_fq, &fd); if (likely(!ret)) return 0; + refcount_dec(&req->drv_ctx->refcnt); if (ret != -EBUSY) break; num_retries++; @@ -142,26 +144,32 @@ static void caam_fq_ern_cb(struct qman_portal *qm, struct qman_fq *fq, { const struct qm_fd *fd; struct caam_drv_req *drv_req; - struct device *qidev = &(raw_cpu_ptr(&pcpu_qipriv)->net_dev.dev); + struct device *qidev = &(raw_cpu_ptr(&pcpu_qipriv)->net_dev->dev); + struct caam_drv_private *priv = dev_get_drvdata(qidev); fd = &msg->ern.fd; - if (qm_fd_get_format(fd) != qm_fd_compound) { - dev_err(qidev, "Non-compound FD from CAAM\n"); - return; - } - - drv_req = (struct caam_drv_req *)phys_to_virt(qm_fd_addr_get64(fd)); + drv_req = caam_iova_to_virt(priv->domain, qm_fd_addr_get64(fd)); if (!drv_req) { dev_err(qidev, "Can't find original request for CAAM response\n"); return; } + refcount_dec(&drv_req->drv_ctx->refcnt); + + if (qm_fd_get_format(fd) != qm_fd_compound) { + dev_err(qidev, "Non-compound FD from CAAM\n"); + return; + } + dma_unmap_single(drv_req->drv_ctx->qidev, qm_fd_addr(fd), sizeof(drv_req->fd_sgt), DMA_BIDIRECTIONAL); - drv_req->cbk(drv_req, -EIO); + if (fd->status) + drv_req->cbk(drv_req, be32_to_cpu(fd->status)); + else + drv_req->cbk(drv_req, JRSTA_SSRC_QI); } static struct qman_fq *create_caam_req_fq(struct device *qidev, @@ -203,8 +211,8 @@ static struct qman_fq *create_caam_req_fq(struct device *qidev, goto init_req_fq_fail; } - dev_info(qidev, "Allocated request FQ %u for CPU %u\n", req_fq->fqid, - smp_processor_id()); + dev_dbg(qidev, "Allocated request FQ %u for CPU %u\n", req_fq->fqid, + smp_processor_id()); return req_fq; init_req_fq_fail: @@ -277,13 +285,15 @@ empty_fq: dev_err(qidev, "OOS of FQID: %u failed\n", fq->fqid); qman_destroy_fq(fq); + kfree(fq); return ret; } -static int empty_caam_fq(struct qman_fq *fq) +static int empty_caam_fq(struct qman_fq *fq, struct caam_drv_ctx *drv_ctx) { int ret; + int retries = 10; struct qm_mcr_queryfq_np np; /* Wait till the older CAAM FQ get empty */ @@ -298,11 +308,18 @@ static int empty_caam_fq(struct qman_fq *fq) msleep(20); } while (1); - /* - * Give extra time for pending jobs from this FQ in holding tanks - * to get processed - */ - msleep(20); + /* Wait until pending jobs from this FQ are processed by CAAM */ + do { + if (refcount_read(&drv_ctx->refcnt) == 1) + break; + + msleep(20); + } while (--retries); + + if (!retries) + dev_warn_once(drv_ctx->qidev, "%d frames from FQID %u still pending in CAAM\n", + refcount_read(&drv_ctx->refcnt), fq->fqid); + return 0; } @@ -325,7 +342,7 @@ int caam_drv_ctx_update(struct caam_drv_ctx *drv_ctx, u32 *sh_desc) /* Create a new req FQ in parked state */ new_fq = create_caam_req_fq(drv_ctx->qidev, drv_ctx->rsp_fq, drv_ctx->context_a, 0); - if (unlikely(IS_ERR_OR_NULL(new_fq))) { + if (IS_ERR(new_fq)) { dev_err(qidev, "FQ allocation for shdesc update failed\n"); return PTR_ERR(new_fq); } @@ -334,7 +351,7 @@ int caam_drv_ctx_update(struct caam_drv_ctx *drv_ctx, u32 *sh_desc) drv_ctx->req_fq = new_fq; /* Empty and remove the older FQ */ - ret = empty_caam_fq(old_fq); + ret = empty_caam_fq(old_fq, drv_ctx); if (ret) { dev_err(qidev, "Old CAAM FQ empty failed: %d\n", ret); @@ -342,8 +359,7 @@ int caam_drv_ctx_update(struct caam_drv_ctx *drv_ctx, u32 *sh_desc) drv_ctx->req_fq = old_fq; if (kill_fq(qidev, new_fq)) - dev_warn(qidev, "New CAAM FQ: %u kill failed\n", - new_fq->fqid); + dev_warn(qidev, "New CAAM FQ kill failed\n"); return ret; } @@ -354,6 +370,7 @@ int caam_drv_ctx_update(struct caam_drv_ctx *drv_ctx, u32 *sh_desc) */ drv_ctx->prehdr[0] = cpu_to_caam32((1 << PREHDR_RSLS_SHIFT) | num_words); + drv_ctx->prehdr[1] = cpu_to_caam32(PREHDR_ABS); memcpy(drv_ctx->sh_desc, sh_desc, desc_bytes(sh_desc)); dma_sync_single_for_device(qidev, drv_ctx->context_a, sizeof(drv_ctx->sh_desc) + @@ -373,10 +390,9 @@ int caam_drv_ctx_update(struct caam_drv_ctx *drv_ctx, u32 *sh_desc) drv_ctx->req_fq = old_fq; if (kill_fq(qidev, new_fq)) - dev_warn(qidev, "New CAAM FQ: %u kill failed\n", - new_fq->fqid); + dev_warn(qidev, "New CAAM FQ kill failed\n"); } else if (kill_fq(qidev, old_fq)) { - dev_warn(qidev, "Old CAAM FQ: %u kill failed\n", old_fq->fqid); + dev_warn(qidev, "Old CAAM FQ kill failed\n"); } return 0; @@ -392,7 +408,6 @@ struct caam_drv_ctx *caam_drv_ctx_init(struct device *qidev, dma_addr_t hwdesc; struct caam_drv_ctx *drv_ctx; const cpumask_t *cpus = qman_affine_cpus(); - static DEFINE_PER_CPU(int, last_cpu); num_words = desc_len(sh_desc); if (num_words > MAX_SDLEN) { @@ -411,6 +426,7 @@ struct caam_drv_ctx *caam_drv_ctx_init(struct device *qidev, */ drv_ctx->prehdr[0] = cpu_to_caam32((1 << PREHDR_RSLS_SHIFT) | num_words); + drv_ctx->prehdr[1] = cpu_to_caam32(PREHDR_ABS); memcpy(drv_ctx->sh_desc, sh_desc, desc_bytes(sh_desc)); size = sizeof(drv_ctx->prehdr) + sizeof(drv_ctx->sh_desc); hwdesc = dma_map_single(qidev, drv_ctx->prehdr, size, @@ -426,11 +442,8 @@ struct caam_drv_ctx *caam_drv_ctx_init(struct device *qidev, if (!cpumask_test_cpu(*cpu, cpus)) { int *pcpu = &get_cpu_var(last_cpu); - *pcpu = cpumask_next(*pcpu, cpus); - if (*pcpu >= nr_cpu_ids) - *pcpu = cpumask_first(cpus); + *pcpu = cpumask_next_wrap(*pcpu, cpus); *cpu = *pcpu; - put_cpu_var(last_cpu); } drv_ctx->cpu = *cpu; @@ -441,13 +454,16 @@ struct caam_drv_ctx *caam_drv_ctx_init(struct device *qidev, /* Attach request FQ */ drv_ctx->req_fq = create_caam_req_fq(qidev, drv_ctx->rsp_fq, hwdesc, QMAN_INITFQ_FLAG_SCHED); - if (unlikely(IS_ERR_OR_NULL(drv_ctx->req_fq))) { + if (IS_ERR(drv_ctx->req_fq)) { dev_err(qidev, "create_caam_req_fq failed\n"); dma_unmap_single(qidev, hwdesc, size, DMA_BIDIRECTIONAL); kfree(drv_ctx); return ERR_PTR(-ENOMEM); } + /* init reference counter used to track references to request FQ */ + refcount_set(&drv_ctx->refcnt, 1); + drv_ctx->qidev = qidev; return drv_ctx; } @@ -495,12 +511,12 @@ void caam_drv_ctx_rel(struct caam_drv_ctx *drv_ctx) } EXPORT_SYMBOL(caam_drv_ctx_rel); -int caam_qi_shutdown(struct device *qidev) +static void caam_qi_shutdown(void *data) { - int i, ret; - struct caam_qi_priv *priv = dev_get_drvdata(qidev); + int i; + struct device *qidev = data; + struct caam_qi_priv *priv = &qipriv; const cpumask_t *cpus = qman_affine_cpus(); - struct cpumask old_cpumask = current->cpus_allowed; for_each_cpu(i, cpus) { struct napi_struct *irqtask; @@ -511,29 +527,13 @@ int caam_qi_shutdown(struct device *qidev) if (kill_fq(qidev, per_cpu(pcpu_qipriv.rsp_fq, i))) dev_err(qidev, "Rsp FQ kill failed, cpu: %d\n", i); - kfree(per_cpu(pcpu_qipriv.rsp_fq, i)); + free_netdev(per_cpu(pcpu_qipriv.net_dev, i)); } - /* - * QMan driver requires CGRs to be deleted from same CPU from where they - * were instantiated. Hence we get the module removal execute from the - * same CPU from where it was originally inserted. - */ - set_cpus_allowed_ptr(current, get_cpu_mask(mod_init_cpu)); - - ret = qman_delete_cgr(&priv->cgr); - if (ret) - dev_err(qidev, "Deletion of CGR failed: %d\n", ret); - else - qman_release_cgrid(priv->cgr.cgrid); + qman_delete_cgr_safe(&priv->cgr); + qman_release_cgrid(priv->cgr.cgrid); kmem_cache_destroy(qi_cache); - - /* Now that we're done with the CGRs, restore the cpus allowed mask */ - set_cpus_allowed_ptr(current, &old_cpumask); - - platform_device_unregister(priv->qi_pdev); - return ret; } static void cgr_cb(struct qman_portal *qm, struct qman_cgr *cgr, int congested) @@ -541,9 +541,8 @@ static void cgr_cb(struct qman_portal *qm, struct qman_cgr *cgr, int congested) caam_congested = congested; if (congested) { -#ifdef CONFIG_DEBUG_FS - times_congested++; -#endif + caam_debugfs_qi_congested(); + pr_debug_ratelimited("CAAM entered congestion\n"); } else { @@ -551,14 +550,10 @@ static void cgr_cb(struct qman_portal *qm, struct qman_cgr *cgr, int congested) } } -static int caam_qi_napi_schedule(struct qman_portal *p, struct caam_napi *np) +static int caam_qi_napi_schedule(struct qman_portal *p, struct caam_napi *np, + bool sched_napi) { - /* - * In case of threaded ISR, for RT kernels in_irq() does not return - * appropriate value, so use in_serving_softirq to distinguish between - * softirq and irq contexts. - */ - if (unlikely(in_irq() || !in_serving_softirq())) { + if (sched_napi) { /* Disable QMan IRQ source and invoke NAPI */ qman_p_irqsource_remove(p, QM_PIRQ_DQRI); np->p = p; @@ -570,34 +565,47 @@ static int caam_qi_napi_schedule(struct qman_portal *p, struct caam_napi *np) static enum qman_cb_dqrr_result caam_rsp_fq_dqrr_cb(struct qman_portal *p, struct qman_fq *rsp_fq, - const struct qm_dqrr_entry *dqrr) + const struct qm_dqrr_entry *dqrr, + bool sched_napi) { struct caam_napi *caam_napi = raw_cpu_ptr(&pcpu_qipriv.caam_napi); struct caam_drv_req *drv_req; const struct qm_fd *fd; - struct device *qidev = &(raw_cpu_ptr(&pcpu_qipriv)->net_dev.dev); + struct device *qidev = &(raw_cpu_ptr(&pcpu_qipriv)->net_dev->dev); + struct caam_drv_private *priv = dev_get_drvdata(qidev); u32 status; - if (caam_qi_napi_schedule(p, caam_napi)) + if (caam_qi_napi_schedule(p, caam_napi, sched_napi)) return qman_cb_dqrr_stop; fd = &dqrr->fd; - status = be32_to_cpu(fd->status); - if (unlikely(status)) - dev_err(qidev, "Error: %#x in CAAM response FD\n", status); - if (unlikely(qm_fd_get_format(fd) != qm_fd_compound)) { - dev_err(qidev, "Non-compound FD from CAAM\n"); - return qman_cb_dqrr_consume; - } - - drv_req = (struct caam_drv_req *)phys_to_virt(qm_fd_addr_get64(fd)); + drv_req = caam_iova_to_virt(priv->domain, qm_fd_addr_get64(fd)); if (unlikely(!drv_req)) { dev_err(qidev, "Can't find original request for caam response\n"); return qman_cb_dqrr_consume; } + refcount_dec(&drv_req->drv_ctx->refcnt); + + status = be32_to_cpu(fd->status); + if (unlikely(status)) { + u32 ssrc = status & JRSTA_SSRC_MASK; + u8 err_id = status & JRSTA_CCBERR_ERRID_MASK; + + if (ssrc != JRSTA_SSRC_CCB_ERROR || + err_id != JRSTA_CCBERR_ERRID_ICVCHK) + dev_err_ratelimited(qidev, + "Error: %#x in CAAM response FD\n", + status); + } + + if (unlikely(qm_fd_get_format(fd) != qm_fd_compound)) { + dev_err(qidev, "Non-compound FD from CAAM\n"); + return qman_cb_dqrr_consume; + } + dma_unmap_single(drv_req->drv_ctx->qidev, qm_fd_addr(fd), sizeof(drv_req->fd_sgt), DMA_BIDIRECTIONAL); @@ -611,7 +619,7 @@ static int alloc_rsp_fq_cpu(struct device *qidev, unsigned int cpu) struct qman_fq *fq; int ret; - fq = kzalloc(sizeof(*fq), GFP_KERNEL | GFP_DMA); + fq = kzalloc(sizeof(*fq), GFP_KERNEL); if (!fq) return -ENOMEM; @@ -646,7 +654,7 @@ static int alloc_rsp_fq_cpu(struct device *qidev, unsigned int cpu) per_cpu(pcpu_qipriv.rsp_fq, cpu) = fq; - dev_info(qidev, "Allocated response FQ %u for CPU %u", fq->fqid, cpu); + dev_dbg(qidev, "Allocated response FQ %u for CPU %u", fq->fqid, cpu); return 0; } @@ -654,9 +662,8 @@ static int init_cgr(struct device *qidev) { int ret; struct qm_mcc_initcgr opts; - const u64 cpus = *(u64 *)qman_affine_cpus(); - const int num_cpus = hweight64(cpus); - const u64 val = num_cpus * MAX_RSP_FQ_BACKLOG_PER_CPU; + const u64 val = (u64)cpumask_weight(qman_affine_cpus()) * + MAX_RSP_FQ_BACKLOG_PER_CPU; ret = qman_alloc_cgrid(&qipriv.cgr.cgrid); if (ret) { @@ -679,7 +686,7 @@ static int init_cgr(struct device *qidev) return ret; } - dev_info(qidev, "Congestion threshold set to %llu\n", val); + dev_dbg(qidev, "Congestion threshold set to %llu\n", val); return 0; } @@ -709,56 +716,43 @@ static void free_rsp_fqs(void) kfree(per_cpu(pcpu_qipriv.rsp_fq, i)); } +static void free_caam_qi_pcpu_netdev(const cpumask_t *cpus) +{ + struct caam_qi_pcpu_priv *priv; + int i; + + for_each_cpu(i, cpus) { + priv = per_cpu_ptr(&pcpu_qipriv, i); + free_netdev(priv->net_dev); + } +} + int caam_qi_init(struct platform_device *caam_pdev) { int err, i; - struct platform_device *qi_pdev; - struct device *ctrldev = &caam_pdev->dev, *qidev; + struct device *qidev = &caam_pdev->dev; struct caam_drv_private *ctrlpriv; const cpumask_t *cpus = qman_affine_cpus(); - struct cpumask old_cpumask = current->cpus_allowed; - static struct platform_device_info qi_pdev_info = { - .name = "caam_qi", - .id = PLATFORM_DEVID_NONE - }; - - /* - * QMAN requires CGRs to be removed from same CPU+portal from where it - * was originally allocated. Hence we need to note down the - * initialisation CPU and use the same CPU for module exit. - * We select the first CPU to from the list of portal owning CPUs. - * Then we pin module init to this CPU. - */ - mod_init_cpu = cpumask_first(cpus); - set_cpus_allowed_ptr(current, get_cpu_mask(mod_init_cpu)); - - qi_pdev_info.parent = ctrldev; - qi_pdev_info.dma_mask = dma_get_mask(ctrldev); - qi_pdev = platform_device_register_full(&qi_pdev_info); - if (IS_ERR(qi_pdev)) - return PTR_ERR(qi_pdev); + cpumask_var_t clean_mask; - ctrlpriv = dev_get_drvdata(ctrldev); - qidev = &qi_pdev->dev; + err = -ENOMEM; + if (!zalloc_cpumask_var(&clean_mask, GFP_KERNEL)) + goto fail_cpumask; - qipriv.qi_pdev = qi_pdev; - dev_set_drvdata(qidev, &qipriv); + ctrlpriv = dev_get_drvdata(qidev); /* Initialize the congestion detection */ err = init_cgr(qidev); if (err) { dev_err(qidev, "CGR initialization failed: %d\n", err); - platform_device_unregister(qi_pdev); - return err; + goto fail_cgr; } /* Initialise response FQs */ err = alloc_rsp_fqs(qidev); if (err) { dev_err(qidev, "Can't allocate CAAM response FQs: %d\n", err); - free_rsp_fqs(); - platform_device_unregister(qi_pdev); - return err; + goto fail_fqs; } /* @@ -769,37 +763,51 @@ int caam_qi_init(struct platform_device *caam_pdev) struct caam_qi_pcpu_priv *priv = per_cpu_ptr(&pcpu_qipriv, i); struct caam_napi *caam_napi = &priv->caam_napi; struct napi_struct *irqtask = &caam_napi->irqtask; - struct net_device *net_dev = &priv->net_dev; + struct net_device *net_dev; + net_dev = alloc_netdev_dummy(0); + if (!net_dev) { + err = -ENOMEM; + goto fail; + } + cpumask_set_cpu(i, clean_mask); + priv->net_dev = net_dev; net_dev->dev = *qidev; - INIT_LIST_HEAD(&net_dev->napi_list); - netif_napi_add(net_dev, irqtask, caam_qi_poll, - CAAM_NAPI_WEIGHT); + netif_napi_add_tx_weight(net_dev, irqtask, caam_qi_poll, + CAAM_NAPI_WEIGHT); napi_enable(irqtask); } - /* Hook up QI device to parent controlling caam device */ - ctrlpriv->qidev = qidev; - - qi_cache = kmem_cache_create("caamqicache", CAAM_QI_MEMCACHE_SIZE, 0, - SLAB_CACHE_DMA, NULL); + qi_cache = kmem_cache_create("caamqicache", CAAM_QI_MEMCACHE_SIZE, + dma_get_cache_alignment(), 0, NULL); if (!qi_cache) { dev_err(qidev, "Can't allocate CAAM cache\n"); - free_rsp_fqs(); - platform_device_unregister(qi_pdev); - return -ENOMEM; + err = -ENOMEM; + goto fail; } - /* Done with the CGRs; restore the cpus allowed mask */ - set_cpus_allowed_ptr(current, &old_cpumask); -#ifdef CONFIG_DEBUG_FS - ctrlpriv->qi_congested = debugfs_create_file("qi_congested", 0444, - ctrlpriv->ctl, - ×_congested, - &caam_fops_u64_ro); -#endif + caam_debugfs_qi_init(ctrlpriv); + + err = devm_add_action_or_reset(qidev, caam_qi_shutdown, qidev); + if (err) + goto fail2; + dev_info(qidev, "Linux CAAM Queue I/F driver initialised\n"); - return 0; + goto free_cpumask; + +fail2: + kmem_cache_destroy(qi_cache); +fail: + free_caam_qi_pcpu_netdev(clean_mask); +fail_fqs: + free_rsp_fqs(); + qman_delete_cgr_safe(&qipriv.cgr); + qman_release_cgrid(qipriv.cgr.cgrid); +fail_cgr: +free_cpumask: + free_cpumask_var(clean_mask); +fail_cpumask: + return err; } |
