diff options
Diffstat (limited to 'drivers/crypto/intel/iaa')
-rw-r--r-- | drivers/crypto/intel/iaa/Makefile | 2 | ||||
-rw-r--r-- | drivers/crypto/intel/iaa/iaa_crypto.h | 41 | ||||
-rw-r--r-- | drivers/crypto/intel/iaa/iaa_crypto_comp_fixed.c | 1 | ||||
-rw-r--r-- | drivers/crypto/intel/iaa/iaa_crypto_main.c | 382 | ||||
-rw-r--r-- | drivers/crypto/intel/iaa/iaa_crypto_stats.c | 215 | ||||
-rw-r--r-- | drivers/crypto/intel/iaa/iaa_crypto_stats.h | 8 |
6 files changed, 165 insertions, 484 deletions
diff --git a/drivers/crypto/intel/iaa/Makefile b/drivers/crypto/intel/iaa/Makefile index b64b208d2344..55bda7770fac 100644 --- a/drivers/crypto/intel/iaa/Makefile +++ b/drivers/crypto/intel/iaa/Makefile @@ -3,7 +3,7 @@ # Makefile for IAA crypto device drivers # -ccflags-y += -I $(srctree)/drivers/dma/idxd -DDEFAULT_SYMBOL_NAMESPACE=IDXD +ccflags-y += -I $(srctree)/drivers/dma/idxd -DDEFAULT_SYMBOL_NAMESPACE='"IDXD"' obj-$(CONFIG_CRYPTO_DEV_IAA_CRYPTO) := iaa_crypto.o diff --git a/drivers/crypto/intel/iaa/iaa_crypto.h b/drivers/crypto/intel/iaa/iaa_crypto.h index 014420f7beb0..56985e395263 100644 --- a/drivers/crypto/intel/iaa/iaa_crypto.h +++ b/drivers/crypto/intel/iaa/iaa_crypto.h @@ -49,20 +49,18 @@ struct iaa_wq { struct iaa_device *iaa_device; - u64 comp_calls; - u64 comp_bytes; - u64 decomp_calls; - u64 decomp_bytes; + atomic64_t comp_calls; + atomic64_t comp_bytes; + atomic64_t decomp_calls; + atomic64_t decomp_bytes; }; struct iaa_device_compression_mode { const char *name; struct aecs_comp_table_record *aecs_comp_table; - struct aecs_decomp_table_record *aecs_decomp_table; dma_addr_t aecs_comp_table_dma_addr; - dma_addr_t aecs_decomp_table_dma_addr; }; /* Representation of IAA device with wqs, populated by probe */ @@ -75,10 +73,10 @@ struct iaa_device { int n_wq; struct list_head wqs; - u64 comp_calls; - u64 comp_bytes; - u64 decomp_calls; - u64 decomp_bytes; + atomic64_t comp_calls; + atomic64_t comp_bytes; + atomic64_t decomp_calls; + atomic64_t decomp_bytes; }; struct wq_table_entry { @@ -107,23 +105,6 @@ struct aecs_comp_table_record { u32 reserved_padding[2]; } __packed; -/* AECS for decompress */ -struct aecs_decomp_table_record { - u32 crc; - u32 xor_checksum; - u32 low_filter_param; - u32 high_filter_param; - u32 output_mod_idx; - u32 drop_init_decomp_out_bytes; - u32 reserved[36]; - u32 output_accum_data[2]; - u32 out_bits_valid; - u32 bit_off_indexing; - u32 input_accum_data[64]; - u8 size_qw[32]; - u32 decomp_state[1220]; -} __packed; - int iaa_aecs_init_fixed(void); void iaa_aecs_cleanup_fixed(void); @@ -136,9 +117,6 @@ struct iaa_compression_mode { int ll_table_size; u32 *d_table; int d_table_size; - u32 *header_table; - int header_table_size; - u16 gen_decomp_table_flags; iaa_dev_comp_init_fn_t init; iaa_dev_comp_free_fn_t free; }; @@ -148,9 +126,6 @@ int add_iaa_compression_mode(const char *name, int ll_table_size, const u32 *d_table, int d_table_size, - const u8 *header_table, - int header_table_size, - u16 gen_decomp_table_flags, iaa_dev_comp_init_fn_t init, iaa_dev_comp_free_fn_t free); diff --git a/drivers/crypto/intel/iaa/iaa_crypto_comp_fixed.c b/drivers/crypto/intel/iaa/iaa_crypto_comp_fixed.c index 45cf5d74f0fb..19d9a333ac49 100644 --- a/drivers/crypto/intel/iaa/iaa_crypto_comp_fixed.c +++ b/drivers/crypto/intel/iaa/iaa_crypto_comp_fixed.c @@ -78,7 +78,6 @@ int iaa_aecs_init_fixed(void) sizeof(fixed_ll_sym), fixed_d_sym, sizeof(fixed_d_sym), - NULL, 0, 0, init_fixed_mode, NULL); if (!ret) pr_debug("IAA fixed compression mode initialized\n"); diff --git a/drivers/crypto/intel/iaa/iaa_crypto_main.c b/drivers/crypto/intel/iaa/iaa_crypto_main.c index dfd3baf0a8d8..23f585219fb4 100644 --- a/drivers/crypto/intel/iaa/iaa_crypto_main.c +++ b/drivers/crypto/intel/iaa/iaa_crypto_main.c @@ -33,8 +33,6 @@ static unsigned int nr_cpus_per_node; /* Number of physical cpus sharing each iaa instance */ static unsigned int cpus_per_iaa; -static struct crypto_comp *deflate_generic_tfm; - /* Per-cpu lookup table for balanced wqs */ static struct wq_table_entry __percpu *wq_table; @@ -173,7 +171,7 @@ static int set_iaa_sync_mode(const char *name) async_mode = false; use_irq = false; } else if (sysfs_streq(name, "async")) { - async_mode = true; + async_mode = false; use_irq = false; } else if (sysfs_streq(name, "async_irq")) { async_mode = true; @@ -258,16 +256,14 @@ static void free_iaa_compression_mode(struct iaa_compression_mode *mode) kfree(mode->name); kfree(mode->ll_table); kfree(mode->d_table); - kfree(mode->header_table); kfree(mode); } /* - * IAA Compression modes are defined by an ll_table, a d_table, and an - * optional header_table. These tables are typically generated and - * captured using statistics collected from running actual - * compress/decompress workloads. + * IAA Compression modes are defined by an ll_table and a d_table. + * These tables are typically generated and captured using statistics + * collected from running actual compress/decompress workloads. * * A module or other kernel code can add and remove compression modes * with a given name using the exported @add_iaa_compression_mode() @@ -315,9 +311,6 @@ EXPORT_SYMBOL_GPL(remove_iaa_compression_mode); * @ll_table_size: The ll table size in bytes * @d_table: The d table * @d_table_size: The d table size in bytes - * @header_table: Optional header table - * @header_table_size: Optional header table size in bytes - * @gen_decomp_table_flags: Otional flags used to generate the decomp table * @init: Optional callback function to init the compression mode data * @free: Optional callback function to free the compression mode data * @@ -330,9 +323,6 @@ int add_iaa_compression_mode(const char *name, int ll_table_size, const u32 *d_table, int d_table_size, - const u8 *header_table, - int header_table_size, - u16 gen_decomp_table_flags, iaa_dev_comp_init_fn_t init, iaa_dev_comp_free_fn_t free) { @@ -355,31 +345,19 @@ int add_iaa_compression_mode(const char *name, goto free; if (ll_table) { - mode->ll_table = kzalloc(ll_table_size, GFP_KERNEL); + mode->ll_table = kmemdup(ll_table, ll_table_size, GFP_KERNEL); if (!mode->ll_table) goto free; - memcpy(mode->ll_table, ll_table, ll_table_size); mode->ll_table_size = ll_table_size; } if (d_table) { - mode->d_table = kzalloc(d_table_size, GFP_KERNEL); + mode->d_table = kmemdup(d_table, d_table_size, GFP_KERNEL); if (!mode->d_table) goto free; - memcpy(mode->d_table, d_table, d_table_size); mode->d_table_size = d_table_size; } - if (header_table) { - mode->header_table = kzalloc(header_table_size, GFP_KERNEL); - if (!mode->header_table) - goto free; - memcpy(mode->header_table, header_table, header_table_size); - mode->header_table_size = header_table_size; - } - - mode->gen_decomp_table_flags = gen_decomp_table_flags; - mode->init = init; mode->free = free; @@ -420,10 +398,6 @@ static void free_device_compression_mode(struct iaa_device *iaa_device, if (device_mode->aecs_comp_table) dma_free_coherent(dev, size, device_mode->aecs_comp_table, device_mode->aecs_comp_table_dma_addr); - if (device_mode->aecs_decomp_table) - dma_free_coherent(dev, size, device_mode->aecs_decomp_table, - device_mode->aecs_decomp_table_dma_addr); - kfree(device_mode); } @@ -440,73 +414,6 @@ static int check_completion(struct device *dev, bool compress, bool only_once); -static int decompress_header(struct iaa_device_compression_mode *device_mode, - struct iaa_compression_mode *mode, - struct idxd_wq *wq) -{ - dma_addr_t src_addr, src2_addr; - struct idxd_desc *idxd_desc; - struct iax_hw_desc *desc; - struct device *dev; - int ret = 0; - - idxd_desc = idxd_alloc_desc(wq, IDXD_OP_BLOCK); - if (IS_ERR(idxd_desc)) - return PTR_ERR(idxd_desc); - - desc = idxd_desc->iax_hw; - - dev = &wq->idxd->pdev->dev; - - src_addr = dma_map_single(dev, (void *)mode->header_table, - mode->header_table_size, DMA_TO_DEVICE); - dev_dbg(dev, "%s: mode->name %s, src_addr %llx, dev %p, src %p, slen %d\n", - __func__, mode->name, src_addr, dev, - mode->header_table, mode->header_table_size); - if (unlikely(dma_mapping_error(dev, src_addr))) { - dev_dbg(dev, "dma_map_single err, exiting\n"); - ret = -ENOMEM; - return ret; - } - - desc->flags = IAX_AECS_GEN_FLAG; - desc->opcode = IAX_OPCODE_DECOMPRESS; - - desc->src1_addr = (u64)src_addr; - desc->src1_size = mode->header_table_size; - - src2_addr = device_mode->aecs_decomp_table_dma_addr; - desc->src2_addr = (u64)src2_addr; - desc->src2_size = 1088; - dev_dbg(dev, "%s: mode->name %s, src2_addr %llx, dev %p, src2_size %d\n", - __func__, mode->name, desc->src2_addr, dev, desc->src2_size); - desc->max_dst_size = 0; // suppressed output - - desc->decompr_flags = mode->gen_decomp_table_flags; - - desc->priv = 0; - - desc->completion_addr = idxd_desc->compl_dma; - - ret = idxd_submit_desc(wq, idxd_desc); - if (ret) { - pr_err("%s: submit_desc failed ret=0x%x\n", __func__, ret); - goto out; - } - - ret = check_completion(dev, idxd_desc->iax_completion, false, false); - if (ret) - dev_dbg(dev, "%s: mode->name %s check_completion failed ret=%d\n", - __func__, mode->name, ret); - else - dev_dbg(dev, "%s: mode->name %s succeeded\n", __func__, - mode->name); -out: - dma_unmap_single(dev, src_addr, 1088, DMA_TO_DEVICE); - - return ret; -} - static int init_device_compression_mode(struct iaa_device *iaa_device, struct iaa_compression_mode *mode, int idx, struct idxd_wq *wq) @@ -529,24 +436,11 @@ static int init_device_compression_mode(struct iaa_device *iaa_device, if (!device_mode->aecs_comp_table) goto free; - device_mode->aecs_decomp_table = dma_alloc_coherent(dev, size, - &device_mode->aecs_decomp_table_dma_addr, GFP_KERNEL); - if (!device_mode->aecs_decomp_table) - goto free; - /* Add Huffman table to aecs */ memset(device_mode->aecs_comp_table, 0, sizeof(*device_mode->aecs_comp_table)); memcpy(device_mode->aecs_comp_table->ll_sym, mode->ll_table, mode->ll_table_size); memcpy(device_mode->aecs_comp_table->d_sym, mode->d_table, mode->d_table_size); - if (mode->header_table) { - ret = decompress_header(device_mode, mode, wq); - if (ret) { - pr_debug("iaa header decompression failed: ret=%d\n", ret); - goto free; - } - } - if (mode->init) { ret = mode->init(device_mode); if (ret) @@ -599,10 +493,10 @@ static void remove_device_compression_modes(struct iaa_device *iaa_device) if (!device_mode) continue; - free_device_compression_mode(iaa_device, device_mode); - iaa_device->compression_modes[i] = NULL; if (iaa_compression_modes[i]->free) iaa_compression_modes[i]->free(device_mode); + free_device_compression_mode(iaa_device, device_mode); + iaa_device->compression_modes[i] = NULL; } } @@ -831,7 +725,7 @@ static int alloc_wq_table(int max_wqs) for (cpu = 0; cpu < nr_cpus; cpu++) { entry = per_cpu_ptr(wq_table, cpu); - entry->wqs = kcalloc(max_wqs, sizeof(struct wq *), GFP_KERNEL); + entry->wqs = kcalloc(max_wqs, sizeof(*entry->wqs), GFP_KERNEL); if (!entry->wqs) { free_wq_table(); return -ENOMEM; @@ -908,6 +802,8 @@ static int save_iaa_wq(struct idxd_wq *wq) return -EINVAL; cpus_per_iaa = (nr_nodes * nr_cpus_per_node) / nr_iaa; + if (!cpus_per_iaa) + cpus_per_iaa = 1; out: return 0; } @@ -923,10 +819,12 @@ static void remove_iaa_wq(struct idxd_wq *wq) } } - if (nr_iaa) + if (nr_iaa) { cpus_per_iaa = (nr_nodes * nr_cpus_per_node) / nr_iaa; - else - cpus_per_iaa = 0; + if (!cpus_per_iaa) + cpus_per_iaa = 1; + } else + cpus_per_iaa = 1; } static int wq_table_add_wqs(int iaa, int cpu) @@ -996,7 +894,7 @@ out: static void rebalance_wq_table(void) { const struct cpumask *node_cpus; - int node, cpu, iaa = -1; + int node_cpu, node, cpu, iaa = 0; if (nr_iaa == 0) return; @@ -1007,36 +905,29 @@ static void rebalance_wq_table(void) clear_wq_table(); if (nr_iaa == 1) { - for (cpu = 0; cpu < nr_cpus; cpu++) { - if (WARN_ON(wq_table_add_wqs(0, cpu))) { - pr_debug("could not add any wqs for iaa 0 to cpu %d!\n", cpu); - return; - } + for_each_possible_cpu(cpu) { + if (WARN_ON(wq_table_add_wqs(0, cpu))) + goto err; } return; } for_each_node_with_cpus(node) { + cpu = 0; node_cpus = cpumask_of_node(node); - for (cpu = 0; cpu < nr_cpus_per_node; cpu++) { - int node_cpu = cpumask_nth(cpu, node_cpus); - - if (WARN_ON(node_cpu >= nr_cpu_ids)) { - pr_debug("node_cpu %d doesn't exist!\n", node_cpu); - return; - } - - if ((cpu % cpus_per_iaa) == 0) - iaa++; - - if (WARN_ON(wq_table_add_wqs(iaa, node_cpu))) { - pr_debug("could not add any wqs for iaa %d to cpu %d!\n", iaa, cpu); - return; - } + for_each_cpu(node_cpu, node_cpus) { + iaa = cpu / cpus_per_iaa; + if (WARN_ON(wq_table_add_wqs(iaa, node_cpu))) + goto err; + cpu++; } } + + return; +err: + pr_debug("could not add any wqs for iaa %d to cpu %d!\n", iaa, cpu); } static inline int check_completion(struct device *dev, @@ -1045,12 +936,22 @@ static inline int check_completion(struct device *dev, bool only_once) { char *op_str = compress ? "compress" : "decompress"; + int status_checks = 0; int ret = 0; while (!comp->status) { if (only_once) return -EAGAIN; cpu_relax(); + if (status_checks++ >= IAA_COMPLETION_TIMEOUT) { + /* Something is wrong with the hw, disable it. */ + dev_err(dev, "%s completion timed out - " + "assuming broken hw, iaa_crypto now DISABLED\n", + op_str); + iaa_crypto_enabled = false; + ret = -ETIMEDOUT; + goto out; + } } if (comp->status != IAX_COMP_SUCCESS) { @@ -1091,17 +992,11 @@ out: static int deflate_generic_decompress(struct acomp_req *req) { - void *src, *dst; + ACOMP_FBREQ_ON_STACK(fbreq, req); int ret; - src = kmap_local_page(sg_page(req->src)) + req->src->offset; - dst = kmap_local_page(sg_page(req->dst)) + req->dst->offset; - - ret = crypto_comp_decompress(deflate_generic_tfm, - src, req->slen, dst, &req->dlen); - - kunmap_local(src); - kunmap_local(dst); + ret = crypto_acomp_decompress(fbreq); + req->dlen = fbreq->dlen; update_total_sw_decomp_calls(); @@ -1115,8 +1010,7 @@ static int iaa_remap_for_verify(struct device *dev, struct iaa_wq *iaa_wq, static int iaa_compress_verify(struct crypto_tfm *tfm, struct acomp_req *req, struct idxd_wq *wq, dma_addr_t src_addr, unsigned int slen, - dma_addr_t dst_addr, unsigned int *dlen, - u32 compression_crc); + dma_addr_t dst_addr, unsigned int *dlen); static void iaa_desc_complete(struct idxd_desc *idxd_desc, enum idxd_complete_type comp_type, @@ -1177,15 +1071,15 @@ static void iaa_desc_complete(struct idxd_desc *idxd_desc, update_total_comp_bytes_out(ctx->req->dlen); update_wq_comp_bytes(iaa_wq->wq, ctx->req->dlen); } else { - update_total_decomp_bytes_in(ctx->req->dlen); - update_wq_decomp_bytes(iaa_wq->wq, ctx->req->dlen); + update_total_decomp_bytes_in(ctx->req->slen); + update_wq_decomp_bytes(iaa_wq->wq, ctx->req->slen); } if (ctx->compress && compression_ctx->verify_compress) { + u32 *compression_crc = acomp_request_ctx(ctx->req); dma_addr_t src_addr, dst_addr; - u32 compression_crc; - compression_crc = idxd_desc->iax_completion->crc; + *compression_crc = idxd_desc->iax_completion->crc; ret = iaa_remap_for_verify(dev, iaa_wq, ctx->req, &src_addr, &dst_addr); if (ret) { @@ -1195,8 +1089,7 @@ static void iaa_desc_complete(struct idxd_desc *idxd_desc, } ret = iaa_compress_verify(ctx->tfm, ctx->req, iaa_wq->wq, src_addr, - ctx->req->slen, dst_addr, &ctx->req->dlen, - compression_crc); + ctx->req->slen, dst_addr, &ctx->req->dlen); if (ret) { dev_dbg(dev, "%s: compress verify failed ret=%d\n", __func__, ret); err = -EIO; @@ -1225,12 +1118,11 @@ out: static int iaa_compress(struct crypto_tfm *tfm, struct acomp_req *req, struct idxd_wq *wq, dma_addr_t src_addr, unsigned int slen, - dma_addr_t dst_addr, unsigned int *dlen, - u32 *compression_crc, - bool disable_async) + dma_addr_t dst_addr, unsigned int *dlen) { struct iaa_device_compression_mode *active_compression_mode; struct iaa_compression_ctx *ctx = crypto_tfm_ctx(tfm); + u32 *compression_crc = acomp_request_ctx(req); struct iaa_device *iaa_device; struct idxd_desc *idxd_desc; struct iax_hw_desc *desc; @@ -1270,7 +1162,7 @@ static int iaa_compress(struct crypto_tfm *tfm, struct acomp_req *req, desc->src2_size = sizeof(struct aecs_comp_table_record); desc->completion_addr = idxd_desc->compl_dma; - if (ctx->use_irq && !disable_async) { + if (ctx->use_irq) { desc->flags |= IDXD_OP_FLAG_RCI; idxd_desc->crypto.req = req; @@ -1283,8 +1175,7 @@ static int iaa_compress(struct crypto_tfm *tfm, struct acomp_req *req, " src_addr %llx, dst_addr %llx\n", __func__, active_compression_mode->name, src_addr, dst_addr); - } else if (ctx->async_mode && !disable_async) - req->base.data = idxd_desc; + } dev_dbg(dev, "%s: compression mode %s," " desc->src1_addr %llx, desc->src1_size %d," @@ -1304,7 +1195,7 @@ static int iaa_compress(struct crypto_tfm *tfm, struct acomp_req *req, update_total_comp_calls(); update_wq_comp_calls(wq); - if (ctx->async_mode && !disable_async) { + if (ctx->async_mode) { ret = -EINPROGRESS; dev_dbg(dev, "%s: returning -EINPROGRESS\n", __func__); goto out; @@ -1378,11 +1269,11 @@ out: static int iaa_compress_verify(struct crypto_tfm *tfm, struct acomp_req *req, struct idxd_wq *wq, dma_addr_t src_addr, unsigned int slen, - dma_addr_t dst_addr, unsigned int *dlen, - u32 compression_crc) + dma_addr_t dst_addr, unsigned int *dlen) { struct iaa_device_compression_mode *active_compression_mode; struct iaa_compression_ctx *ctx = crypto_tfm_ctx(tfm); + u32 *compression_crc = acomp_request_ctx(req); struct iaa_device *iaa_device; struct idxd_desc *idxd_desc; struct iax_hw_desc *desc; @@ -1442,10 +1333,10 @@ static int iaa_compress_verify(struct crypto_tfm *tfm, struct acomp_req *req, goto err; } - if (compression_crc != idxd_desc->iax_completion->crc) { + if (*compression_crc != idxd_desc->iax_completion->crc) { ret = -EINVAL; dev_dbg(dev, "(verify) iaa comp/decomp crc mismatch:" - " comp=0x%x, decomp=0x%x\n", compression_crc, + " comp=0x%x, decomp=0x%x\n", *compression_crc, idxd_desc->iax_completion->crc); print_hex_dump(KERN_INFO, "cmp-rec: ", DUMP_PREFIX_OFFSET, 8, 1, idxd_desc->iax_completion, 64, 0); @@ -1465,8 +1356,7 @@ err: static int iaa_decompress(struct crypto_tfm *tfm, struct acomp_req *req, struct idxd_wq *wq, dma_addr_t src_addr, unsigned int slen, - dma_addr_t dst_addr, unsigned int *dlen, - bool disable_async) + dma_addr_t dst_addr, unsigned int *dlen) { struct iaa_device_compression_mode *active_compression_mode; struct iaa_compression_ctx *ctx = crypto_tfm_ctx(tfm); @@ -1508,7 +1398,7 @@ static int iaa_decompress(struct crypto_tfm *tfm, struct acomp_req *req, desc->src1_size = slen; desc->completion_addr = idxd_desc->compl_dma; - if (ctx->use_irq && !disable_async) { + if (ctx->use_irq) { desc->flags |= IDXD_OP_FLAG_RCI; idxd_desc->crypto.req = req; @@ -1521,8 +1411,7 @@ static int iaa_decompress(struct crypto_tfm *tfm, struct acomp_req *req, " src_addr %llx, dst_addr %llx\n", __func__, active_compression_mode->name, src_addr, dst_addr); - } else if (ctx->async_mode && !disable_async) - req->base.data = idxd_desc; + } dev_dbg(dev, "%s: decompression mode %s," " desc->src1_addr %llx, desc->src1_size %d," @@ -1542,7 +1431,7 @@ static int iaa_decompress(struct crypto_tfm *tfm, struct acomp_req *req, update_total_decomp_calls(); update_wq_decomp_calls(wq); - if (ctx->async_mode && !disable_async) { + if (ctx->async_mode) { ret = -EINPROGRESS; dev_dbg(dev, "%s: returning -EINPROGRESS\n", __func__); goto out; @@ -1590,13 +1479,10 @@ static int iaa_comp_acompress(struct acomp_req *req) struct iaa_compression_ctx *compression_ctx; struct crypto_tfm *tfm = req->base.tfm; dma_addr_t src_addr, dst_addr; - bool disable_async = false; int nr_sgs, cpu, ret = 0; struct iaa_wq *iaa_wq; - u32 compression_crc; struct idxd_wq *wq; struct device *dev; - int order = -1; compression_ctx = crypto_tfm_ctx(tfm); @@ -1626,21 +1512,6 @@ static int iaa_comp_acompress(struct acomp_req *req) iaa_wq = idxd_wq_get_private(wq); - if (!req->dst) { - gfp_t flags = req->flags & CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL : GFP_ATOMIC; - - /* incompressible data will always be < 2 * slen */ - req->dlen = 2 * req->slen; - order = order_base_2(round_up(req->dlen, PAGE_SIZE) / PAGE_SIZE); - req->dst = sgl_alloc_order(req->dlen, order, false, flags, NULL); - if (!req->dst) { - ret = -ENOMEM; - order = -1; - goto out; - } - disable_async = true; - } - dev = &wq->idxd->pdev->dev; nr_sgs = dma_map_sg(dev, req->src, sg_nents(req->src), DMA_TO_DEVICE); @@ -1670,7 +1541,7 @@ static int iaa_comp_acompress(struct acomp_req *req) req->dst, req->dlen, sg_dma_len(req->dst)); ret = iaa_compress(tfm, req, wq, src_addr, req->slen, dst_addr, - &req->dlen, &compression_crc, disable_async); + &req->dlen); if (ret == -EINPROGRESS) return ret; @@ -1682,7 +1553,7 @@ static int iaa_comp_acompress(struct acomp_req *req) } ret = iaa_compress_verify(tfm, req, wq, src_addr, req->slen, - dst_addr, &req->dlen, compression_crc); + dst_addr, &req->dlen); if (ret) dev_dbg(dev, "asynchronous compress verification failed ret=%d\n", ret); @@ -1701,100 +1572,6 @@ err_map_dst: out: iaa_wq_put(wq); - if (order >= 0) - sgl_free_order(req->dst, order); - - return ret; -} - -static int iaa_comp_adecompress_alloc_dest(struct acomp_req *req) -{ - gfp_t flags = req->flags & CRYPTO_TFM_REQ_MAY_SLEEP ? - GFP_KERNEL : GFP_ATOMIC; - struct crypto_tfm *tfm = req->base.tfm; - dma_addr_t src_addr, dst_addr; - int nr_sgs, cpu, ret = 0; - struct iaa_wq *iaa_wq; - struct device *dev; - struct idxd_wq *wq; - int order = -1; - - cpu = get_cpu(); - wq = wq_table_next_wq(cpu); - put_cpu(); - if (!wq) { - pr_debug("no wq configured for cpu=%d\n", cpu); - return -ENODEV; - } - - ret = iaa_wq_get(wq); - if (ret) { - pr_debug("no wq available for cpu=%d\n", cpu); - return -ENODEV; - } - - iaa_wq = idxd_wq_get_private(wq); - - dev = &wq->idxd->pdev->dev; - - nr_sgs = dma_map_sg(dev, req->src, sg_nents(req->src), DMA_TO_DEVICE); - if (nr_sgs <= 0 || nr_sgs > 1) { - dev_dbg(dev, "couldn't map src sg for iaa device %d," - " wq %d: ret=%d\n", iaa_wq->iaa_device->idxd->id, - iaa_wq->wq->id, ret); - ret = -EIO; - goto out; - } - src_addr = sg_dma_address(req->src); - dev_dbg(dev, "dma_map_sg, src_addr %llx, nr_sgs %d, req->src %p," - " req->slen %d, sg_dma_len(sg) %d\n", src_addr, nr_sgs, - req->src, req->slen, sg_dma_len(req->src)); - - req->dlen = 4 * req->slen; /* start with ~avg comp rato */ -alloc_dest: - order = order_base_2(round_up(req->dlen, PAGE_SIZE) / PAGE_SIZE); - req->dst = sgl_alloc_order(req->dlen, order, false, flags, NULL); - if (!req->dst) { - ret = -ENOMEM; - order = -1; - goto out; - } - - nr_sgs = dma_map_sg(dev, req->dst, sg_nents(req->dst), DMA_FROM_DEVICE); - if (nr_sgs <= 0 || nr_sgs > 1) { - dev_dbg(dev, "couldn't map dst sg for iaa device %d," - " wq %d: ret=%d\n", iaa_wq->iaa_device->idxd->id, - iaa_wq->wq->id, ret); - ret = -EIO; - goto err_map_dst; - } - - dst_addr = sg_dma_address(req->dst); - dev_dbg(dev, "dma_map_sg, dst_addr %llx, nr_sgs %d, req->dst %p," - " req->dlen %d, sg_dma_len(sg) %d\n", dst_addr, nr_sgs, - req->dst, req->dlen, sg_dma_len(req->dst)); - ret = iaa_decompress(tfm, req, wq, src_addr, req->slen, - dst_addr, &req->dlen, true); - if (ret == -EOVERFLOW) { - dma_unmap_sg(dev, req->dst, sg_nents(req->dst), DMA_FROM_DEVICE); - req->dlen *= 2; - if (req->dlen > CRYPTO_ACOMP_DST_MAX) - goto err_map_dst; - goto alloc_dest; - } - - if (ret != 0) - dev_dbg(dev, "asynchronous decompress failed ret=%d\n", ret); - - dma_unmap_sg(dev, req->dst, sg_nents(req->dst), DMA_FROM_DEVICE); -err_map_dst: - dma_unmap_sg(dev, req->src, sg_nents(req->src), DMA_TO_DEVICE); -out: - iaa_wq_put(wq); - - if (order >= 0) - sgl_free_order(req->dst, order); - return ret; } @@ -1817,9 +1594,6 @@ static int iaa_comp_adecompress(struct acomp_req *req) return -EINVAL; } - if (!req->dst) - return iaa_comp_adecompress_alloc_dest(req); - cpu = get_cpu(); wq = wq_table_next_wq(cpu); put_cpu(); @@ -1865,7 +1639,7 @@ static int iaa_comp_adecompress(struct acomp_req *req) req->dst, req->dlen, sg_dma_len(req->dst)); ret = iaa_decompress(tfm, req, wq, src_addr, req->slen, - dst_addr, &req->dlen, false); + dst_addr, &req->dlen); if (ret == -EINPROGRESS) return ret; @@ -1900,23 +1674,16 @@ static int iaa_comp_init_fixed(struct crypto_acomp *acomp_tfm) return 0; } -static void dst_free(struct scatterlist *sgl) -{ - /* - * Called for req->dst = NULL cases but we free elsewhere - * using sgl_free_order(). - */ -} - static struct acomp_alg iaa_acomp_fixed_deflate = { .init = iaa_comp_init_fixed, .compress = iaa_comp_acompress, .decompress = iaa_comp_adecompress, - .dst_free = dst_free, .base = { .cra_name = "deflate", .cra_driver_name = "deflate-iaa", + .cra_flags = CRYPTO_ALG_ASYNC, .cra_ctxsize = sizeof(struct iaa_compression_ctx), + .cra_reqsize = sizeof(u32), .cra_module = THIS_MODULE, .cra_priority = IAA_ALG_PRIORITY, } @@ -2102,7 +1869,7 @@ static int __init iaa_crypto_init_module(void) int ret = 0; int node; - nr_cpus = num_online_cpus(); + nr_cpus = num_possible_cpus(); for_each_node_with_cpus(node) nr_nodes++; if (!nr_nodes) { @@ -2111,15 +1878,6 @@ static int __init iaa_crypto_init_module(void) } nr_cpus_per_node = nr_cpus / nr_nodes; - if (crypto_has_comp("deflate-generic", 0, 0)) - deflate_generic_tfm = crypto_alloc_comp("deflate-generic", 0, 0); - - if (IS_ERR_OR_NULL(deflate_generic_tfm)) { - pr_err("IAA could not alloc %s tfm: errcode = %ld\n", - "deflate-generic", PTR_ERR(deflate_generic_tfm)); - return -ENOMEM; - } - ret = iaa_aecs_init_fixed(); if (ret < 0) { pr_debug("IAA fixed compression mode init failed\n"); @@ -2161,7 +1919,6 @@ err_verify_attr_create: err_driver_reg: iaa_aecs_cleanup_fixed(); err_aecs_init: - crypto_free_comp(deflate_generic_tfm); goto out; } @@ -2178,12 +1935,11 @@ static void __exit iaa_crypto_cleanup_module(void) &driver_attr_verify_compress); idxd_driver_unregister(&iaa_crypto_driver); iaa_aecs_cleanup_fixed(); - crypto_free_comp(deflate_generic_tfm); pr_debug("cleaned up\n"); } -MODULE_IMPORT_NS(IDXD); +MODULE_IMPORT_NS("IDXD"); MODULE_LICENSE("GPL"); MODULE_ALIAS_IDXD_DEVICE(0); MODULE_AUTHOR("Intel Corporation"); diff --git a/drivers/crypto/intel/iaa/iaa_crypto_stats.c b/drivers/crypto/intel/iaa/iaa_crypto_stats.c index 2e3b7b73af20..f5cc3d29ca19 100644 --- a/drivers/crypto/intel/iaa/iaa_crypto_stats.c +++ b/drivers/crypto/intel/iaa/iaa_crypto_stats.c @@ -17,165 +17,117 @@ #include "iaa_crypto.h" #include "iaa_crypto_stats.h" -static u64 total_comp_calls; -static u64 total_decomp_calls; -static u64 total_sw_decomp_calls; -static u64 max_comp_delay_ns; -static u64 max_decomp_delay_ns; -static u64 max_acomp_delay_ns; -static u64 max_adecomp_delay_ns; -static u64 total_comp_bytes_out; -static u64 total_decomp_bytes_in; -static u64 total_completion_einval_errors; -static u64 total_completion_timeout_errors; -static u64 total_completion_comp_buf_overflow_errors; +static atomic64_t total_comp_calls; +static atomic64_t total_decomp_calls; +static atomic64_t total_sw_decomp_calls; +static atomic64_t total_comp_bytes_out; +static atomic64_t total_decomp_bytes_in; +static atomic64_t total_completion_einval_errors; +static atomic64_t total_completion_timeout_errors; +static atomic64_t total_completion_comp_buf_overflow_errors; static struct dentry *iaa_crypto_debugfs_root; void update_total_comp_calls(void) { - total_comp_calls++; + atomic64_inc(&total_comp_calls); } void update_total_comp_bytes_out(int n) { - total_comp_bytes_out += n; + atomic64_add(n, &total_comp_bytes_out); } void update_total_decomp_calls(void) { - total_decomp_calls++; + atomic64_inc(&total_decomp_calls); } void update_total_sw_decomp_calls(void) { - total_sw_decomp_calls++; + atomic64_inc(&total_sw_decomp_calls); } void update_total_decomp_bytes_in(int n) { - total_decomp_bytes_in += n; + atomic64_add(n, &total_decomp_bytes_in); } void update_completion_einval_errs(void) { - total_completion_einval_errors++; + atomic64_inc(&total_completion_einval_errors); } void update_completion_timeout_errs(void) { - total_completion_timeout_errors++; + atomic64_inc(&total_completion_timeout_errors); } void update_completion_comp_buf_overflow_errs(void) { - total_completion_comp_buf_overflow_errors++; -} - -void update_max_comp_delay_ns(u64 start_time_ns) -{ - u64 time_diff; - - time_diff = ktime_get_ns() - start_time_ns; - - if (time_diff > max_comp_delay_ns) - max_comp_delay_ns = time_diff; -} - -void update_max_decomp_delay_ns(u64 start_time_ns) -{ - u64 time_diff; - - time_diff = ktime_get_ns() - start_time_ns; - - if (time_diff > max_decomp_delay_ns) - max_decomp_delay_ns = time_diff; -} - -void update_max_acomp_delay_ns(u64 start_time_ns) -{ - u64 time_diff; - - time_diff = ktime_get_ns() - start_time_ns; - - if (time_diff > max_acomp_delay_ns) - max_acomp_delay_ns = time_diff; -} - -void update_max_adecomp_delay_ns(u64 start_time_ns) -{ - u64 time_diff; - - time_diff = ktime_get_ns() - start_time_ns; - - if (time_diff > max_adecomp_delay_ns) - max_adecomp_delay_ns = time_diff; + atomic64_inc(&total_completion_comp_buf_overflow_errors); } void update_wq_comp_calls(struct idxd_wq *idxd_wq) { struct iaa_wq *wq = idxd_wq_get_private(idxd_wq); - wq->comp_calls++; - wq->iaa_device->comp_calls++; + atomic64_inc(&wq->comp_calls); + atomic64_inc(&wq->iaa_device->comp_calls); } void update_wq_comp_bytes(struct idxd_wq *idxd_wq, int n) { struct iaa_wq *wq = idxd_wq_get_private(idxd_wq); - wq->comp_bytes += n; - wq->iaa_device->comp_bytes += n; + atomic64_add(n, &wq->comp_bytes); + atomic64_add(n, &wq->iaa_device->comp_bytes); } void update_wq_decomp_calls(struct idxd_wq *idxd_wq) { struct iaa_wq *wq = idxd_wq_get_private(idxd_wq); - wq->decomp_calls++; - wq->iaa_device->decomp_calls++; + atomic64_inc(&wq->decomp_calls); + atomic64_inc(&wq->iaa_device->decomp_calls); } void update_wq_decomp_bytes(struct idxd_wq *idxd_wq, int n) { struct iaa_wq *wq = idxd_wq_get_private(idxd_wq); - wq->decomp_bytes += n; - wq->iaa_device->decomp_bytes += n; + atomic64_add(n, &wq->decomp_bytes); + atomic64_add(n, &wq->iaa_device->decomp_bytes); } static void reset_iaa_crypto_stats(void) { - total_comp_calls = 0; - total_decomp_calls = 0; - total_sw_decomp_calls = 0; - max_comp_delay_ns = 0; - max_decomp_delay_ns = 0; - max_acomp_delay_ns = 0; - max_adecomp_delay_ns = 0; - total_comp_bytes_out = 0; - total_decomp_bytes_in = 0; - total_completion_einval_errors = 0; - total_completion_timeout_errors = 0; - total_completion_comp_buf_overflow_errors = 0; + atomic64_set(&total_comp_calls, 0); + atomic64_set(&total_decomp_calls, 0); + atomic64_set(&total_sw_decomp_calls, 0); + atomic64_set(&total_comp_bytes_out, 0); + atomic64_set(&total_decomp_bytes_in, 0); + atomic64_set(&total_completion_einval_errors, 0); + atomic64_set(&total_completion_timeout_errors, 0); + atomic64_set(&total_completion_comp_buf_overflow_errors, 0); } static void reset_wq_stats(struct iaa_wq *wq) { - wq->comp_calls = 0; - wq->comp_bytes = 0; - wq->decomp_calls = 0; - wq->decomp_bytes = 0; + atomic64_set(&wq->comp_calls, 0); + atomic64_set(&wq->comp_bytes, 0); + atomic64_set(&wq->decomp_calls, 0); + atomic64_set(&wq->decomp_bytes, 0); } static void reset_device_stats(struct iaa_device *iaa_device) { struct iaa_wq *iaa_wq; - iaa_device->comp_calls = 0; - iaa_device->comp_bytes = 0; - iaa_device->decomp_calls = 0; - iaa_device->decomp_bytes = 0; + atomic64_set(&iaa_device->comp_calls, 0); + atomic64_set(&iaa_device->comp_bytes, 0); + atomic64_set(&iaa_device->decomp_calls, 0); + atomic64_set(&iaa_device->decomp_bytes, 0); list_for_each_entry(iaa_wq, &iaa_device->wqs, list) reset_wq_stats(iaa_wq); @@ -184,10 +136,14 @@ static void reset_device_stats(struct iaa_device *iaa_device) static void wq_show(struct seq_file *m, struct iaa_wq *iaa_wq) { seq_printf(m, " name: %s\n", iaa_wq->wq->name); - seq_printf(m, " comp_calls: %llu\n", iaa_wq->comp_calls); - seq_printf(m, " comp_bytes: %llu\n", iaa_wq->comp_bytes); - seq_printf(m, " decomp_calls: %llu\n", iaa_wq->decomp_calls); - seq_printf(m, " decomp_bytes: %llu\n\n", iaa_wq->decomp_bytes); + seq_printf(m, " comp_calls: %llu\n", + atomic64_read(&iaa_wq->comp_calls)); + seq_printf(m, " comp_bytes: %llu\n", + atomic64_read(&iaa_wq->comp_bytes)); + seq_printf(m, " decomp_calls: %llu\n", + atomic64_read(&iaa_wq->decomp_calls)); + seq_printf(m, " decomp_bytes: %llu\n\n", + atomic64_read(&iaa_wq->decomp_bytes)); } static void device_stats_show(struct seq_file *m, struct iaa_device *iaa_device) @@ -197,30 +153,41 @@ static void device_stats_show(struct seq_file *m, struct iaa_device *iaa_device) seq_puts(m, "iaa device:\n"); seq_printf(m, " id: %d\n", iaa_device->idxd->id); seq_printf(m, " n_wqs: %d\n", iaa_device->n_wq); - seq_printf(m, " comp_calls: %llu\n", iaa_device->comp_calls); - seq_printf(m, " comp_bytes: %llu\n", iaa_device->comp_bytes); - seq_printf(m, " decomp_calls: %llu\n", iaa_device->decomp_calls); - seq_printf(m, " decomp_bytes: %llu\n", iaa_device->decomp_bytes); + seq_printf(m, " comp_calls: %llu\n", + atomic64_read(&iaa_device->comp_calls)); + seq_printf(m, " comp_bytes: %llu\n", + atomic64_read(&iaa_device->comp_bytes)); + seq_printf(m, " decomp_calls: %llu\n", + atomic64_read(&iaa_device->decomp_calls)); + seq_printf(m, " decomp_bytes: %llu\n", + atomic64_read(&iaa_device->decomp_bytes)); seq_puts(m, " wqs:\n"); list_for_each_entry(iaa_wq, &iaa_device->wqs, list) wq_show(m, iaa_wq); } -static void global_stats_show(struct seq_file *m) +static int global_stats_show(struct seq_file *m, void *v) { seq_puts(m, "global stats:\n"); - seq_printf(m, " total_comp_calls: %llu\n", total_comp_calls); - seq_printf(m, " total_decomp_calls: %llu\n", total_decomp_calls); - seq_printf(m, " total_sw_decomp_calls: %llu\n", total_sw_decomp_calls); - seq_printf(m, " total_comp_bytes_out: %llu\n", total_comp_bytes_out); - seq_printf(m, " total_decomp_bytes_in: %llu\n", total_decomp_bytes_in); + seq_printf(m, " total_comp_calls: %llu\n", + atomic64_read(&total_comp_calls)); + seq_printf(m, " total_decomp_calls: %llu\n", + atomic64_read(&total_decomp_calls)); + seq_printf(m, " total_sw_decomp_calls: %llu\n", + atomic64_read(&total_sw_decomp_calls)); + seq_printf(m, " total_comp_bytes_out: %llu\n", + atomic64_read(&total_comp_bytes_out)); + seq_printf(m, " total_decomp_bytes_in: %llu\n", + atomic64_read(&total_decomp_bytes_in)); seq_printf(m, " total_completion_einval_errors: %llu\n", - total_completion_einval_errors); + atomic64_read(&total_completion_einval_errors)); seq_printf(m, " total_completion_timeout_errors: %llu\n", - total_completion_timeout_errors); + atomic64_read(&total_completion_timeout_errors)); seq_printf(m, " total_completion_comp_buf_overflow_errors: %llu\n\n", - total_completion_comp_buf_overflow_errors); + atomic64_read(&total_completion_comp_buf_overflow_errors)); + + return 0; } static int wq_stats_show(struct seq_file *m, void *v) @@ -229,8 +196,6 @@ static int wq_stats_show(struct seq_file *m, void *v) mutex_lock(&iaa_devices_lock); - global_stats_show(m); - list_for_each_entry(iaa_device, &iaa_devices, list) device_stats_show(m, iaa_device); @@ -267,6 +232,18 @@ static const struct file_operations wq_stats_fops = { .release = single_release, }; +static int global_stats_open(struct inode *inode, struct file *file) +{ + return single_open(file, global_stats_show, file); +} + +static const struct file_operations global_stats_fops = { + .open = global_stats_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + DEFINE_DEBUGFS_ATTRIBUTE(wq_stats_reset_fops, NULL, iaa_crypto_stats_reset, "%llu\n"); int __init iaa_crypto_debugfs_init(void) @@ -275,27 +252,9 @@ int __init iaa_crypto_debugfs_init(void) return -ENODEV; iaa_crypto_debugfs_root = debugfs_create_dir("iaa_crypto", NULL); - if (!iaa_crypto_debugfs_root) - return -ENOMEM; - - debugfs_create_u64("max_comp_delay_ns", 0644, - iaa_crypto_debugfs_root, &max_comp_delay_ns); - debugfs_create_u64("max_decomp_delay_ns", 0644, - iaa_crypto_debugfs_root, &max_decomp_delay_ns); - debugfs_create_u64("max_acomp_delay_ns", 0644, - iaa_crypto_debugfs_root, &max_comp_delay_ns); - debugfs_create_u64("max_adecomp_delay_ns", 0644, - iaa_crypto_debugfs_root, &max_decomp_delay_ns); - debugfs_create_u64("total_comp_calls", 0644, - iaa_crypto_debugfs_root, &total_comp_calls); - debugfs_create_u64("total_decomp_calls", 0644, - iaa_crypto_debugfs_root, &total_decomp_calls); - debugfs_create_u64("total_sw_decomp_calls", 0644, - iaa_crypto_debugfs_root, &total_sw_decomp_calls); - debugfs_create_u64("total_comp_bytes_out", 0644, - iaa_crypto_debugfs_root, &total_comp_bytes_out); - debugfs_create_u64("total_decomp_bytes_in", 0644, - iaa_crypto_debugfs_root, &total_decomp_bytes_in); + + debugfs_create_file("global_stats", 0644, iaa_crypto_debugfs_root, NULL, + &global_stats_fops); debugfs_create_file("wq_stats", 0644, iaa_crypto_debugfs_root, NULL, &wq_stats_fops); debugfs_create_file("stats_reset", 0644, iaa_crypto_debugfs_root, NULL, diff --git a/drivers/crypto/intel/iaa/iaa_crypto_stats.h b/drivers/crypto/intel/iaa/iaa_crypto_stats.h index c10b87b86fa4..3787a5f507eb 100644 --- a/drivers/crypto/intel/iaa/iaa_crypto_stats.h +++ b/drivers/crypto/intel/iaa/iaa_crypto_stats.h @@ -13,10 +13,6 @@ void update_total_comp_bytes_out(int n); void update_total_decomp_calls(void); void update_total_sw_decomp_calls(void); void update_total_decomp_bytes_in(int n); -void update_max_comp_delay_ns(u64 start_time_ns); -void update_max_decomp_delay_ns(u64 start_time_ns); -void update_max_acomp_delay_ns(u64 start_time_ns); -void update_max_adecomp_delay_ns(u64 start_time_ns); void update_completion_einval_errs(void); void update_completion_timeout_errs(void); void update_completion_comp_buf_overflow_errs(void); @@ -35,10 +31,6 @@ static inline void update_total_comp_bytes_out(int n) {} static inline void update_total_decomp_calls(void) {} static inline void update_total_sw_decomp_calls(void) {} static inline void update_total_decomp_bytes_in(int n) {} -static inline void update_max_comp_delay_ns(u64 start_time_ns) {} -static inline void update_max_decomp_delay_ns(u64 start_time_ns) {} -static inline void update_max_acomp_delay_ns(u64 start_time_ns) {} -static inline void update_max_adecomp_delay_ns(u64 start_time_ns) {} static inline void update_completion_einval_errs(void) {} static inline void update_completion_timeout_errs(void) {} static inline void update_completion_comp_buf_overflow_errs(void) {} |